aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/cgroups.txt9
-rw-r--r--Documentation/controllers/memcg_test.txt342
-rw-r--r--Documentation/controllers/memory.txt135
-rw-r--r--Documentation/hwmon/abituguru-datasheet4
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/board.txt32
-rw-r--r--Documentation/scsi/scsi_fc_transport.txt4
-rw-r--r--Documentation/w1/masters/00-INDEX2
-rw-r--r--Documentation/w1/masters/mxc-w111
-rw-r--r--Documentation/w1/w1.netlink164
-rw-r--r--MAINTAINERS11
-rw-r--r--arch/arm/configs/clps7500_defconfig801
-rw-r--r--arch/arm/kernel/isa.c1
-rw-r--r--arch/arm/mach-at91/at91cap9.c1
-rw-r--r--arch/arm/mach-at91/at91rm9200.c1
-rw-r--r--arch/arm/mach-at91/at91sam9260.c1
-rw-r--r--arch/arm/mach-at91/at91sam9261.c1
-rw-r--r--arch/arm/mach-at91/at91sam9263.c1
-rw-r--r--arch/arm/mach-at91/at91sam9rl.c1
-rw-r--r--arch/arm/mach-at91/board-sam9rlek.c1
-rw-r--r--arch/arm/mach-clps711x/edb7211-mm.c1
-rw-r--r--arch/arm/mach-clps711x/fortunet.c1
-rw-r--r--arch/arm/mach-davinci/devices.c1
-rw-r--r--arch/arm/mach-davinci/include/mach/gpio.h1
-rw-r--r--arch/arm/mach-footbridge/common.c9
-rw-r--r--arch/arm/mach-footbridge/common.h1
-rw-r--r--arch/arm/mach-footbridge/dc21285.c23
-rw-r--r--arch/arm/mach-footbridge/isa-irq.c2
-rw-r--r--arch/arm/mach-h720x/h7202-eval.c1
-rw-r--r--arch/arm/mach-kirkwood/common.c1
-rw-r--r--arch/arm/mach-kirkwood/pcie.c1
-rw-r--r--arch/arm/mach-ks8695/devices.c1
-rw-r--r--arch/arm/mach-msm/devices.c1
-rw-r--r--arch/arm/mach-mv78xx0/pcie.c1
-rw-r--r--arch/arm/mach-mx2/devices.c1
-rw-r--r--arch/arm/mach-mx3/devices.c1
-rw-r--r--arch/arm/mach-netx/fb.c2
-rw-r--r--arch/arm/mach-netx/time.c2
-rw-r--r--arch/arm/mach-netx/xc.c1
-rw-r--r--arch/arm/mach-omap1/mcbsp.c1
-rw-r--r--arch/arm/mach-omap2/mcbsp.c1
-rw-r--r--arch/arm/mach-orion5x/pci.c1
-rw-r--r--arch/arm/mach-pnx4008/gpio.c1
-rw-r--r--arch/arm/mach-pnx4008/i2c.c1
-rw-r--r--arch/arm/mach-pxa/e350.c1
-rw-r--r--arch/arm/mach-pxa/e400.c1
-rw-r--r--arch/arm/mach-pxa/e740.c1
-rw-r--r--arch/arm/mach-pxa/e750.c53
-rw-r--r--arch/arm/mach-pxa/e800.c1
-rw-r--r--arch/arm/mach-pxa/include/mach/pxa3xx-regs.h2
-rw-r--r--arch/arm/mach-realview/platsmp.c1
-rw-r--r--arch/arm/mach-s3c2410/include/mach/gpio.h1
-rw-r--r--arch/arm/mach-s3c2410/include/mach/irqs.h4
-rw-r--r--arch/arm/mach-s3c2440/mach-at2440evb.c2
-rw-r--r--arch/arm/mach-s3c6400/include/mach/irqs.h4
-rw-r--r--arch/arm/plat-omap/i2c.c1
-rw-r--r--arch/arm/plat-s3c/dev-fb.c1
-rw-r--r--arch/arm/plat-s3c/dev-i2c0.c1
-rw-r--r--arch/arm/plat-s3c/dev-i2c1.c1
-rw-r--r--arch/arm/plat-s3c24xx/gpiolib.c18
-rw-r--r--arch/arm/plat-s3c24xx/pwm.c2
-rw-r--r--arch/arm/plat-s3c64xx/include/plat/irqs.h2
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/boot/Makefile2
-rw-r--r--arch/powerpc/boot/dts/mpc836x_mds.dts43
-rw-r--r--arch/powerpc/boot/dts/mpc836x_rdk.dts19
-rw-r--r--arch/powerpc/boot/dts/mpc8641_hpcn.dts56
-rw-r--r--arch/powerpc/boot/install.sh14
-rw-r--r--arch/powerpc/configs/85xx/mpc8572_ds_defconfig43
-rw-r--r--arch/powerpc/include/asm/ioctls.h2
-rw-r--r--arch/powerpc/include/asm/kexec.h55
-rw-r--r--arch/powerpc/include/asm/ps3.h2
-rw-r--r--arch/powerpc/include/asm/qe.h37
-rw-r--r--arch/powerpc/include/asm/qe_ic.h21
-rw-r--r--arch/powerpc/include/asm/spu.h2
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/cacheinfo.c837
-rw-r--r--arch/powerpc/kernel/cacheinfo.h8
-rw-r--r--arch/powerpc/kernel/pci-common.c71
-rw-r--r--arch/powerpc/kernel/pci_64.c9
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c1
-rw-r--r--arch/powerpc/kernel/prom.c14
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/sysfs.c300
-rw-r--r--arch/powerpc/mm/mmu_decl.h6
-rw-r--r--arch/powerpc/mm/numa.c62
-rw-r--r--arch/powerpc/mm/pgtable_32.c3
-rw-r--r--arch/powerpc/mm/tlb_nohash.c3
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h2
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_common.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc831x_rdb.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_mds.c9
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c5
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_mds.c81
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_rdk.c6
-rw-r--r--arch/powerpc/platforms/83xx/mpc837x_mds.c1
-rw-r--r--arch/powerpc/platforms/83xx/mpc837x_rdb.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc83xx.h1
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ds.c7
-rw-r--r--arch/powerpc/platforms/85xx/smp.c1
-rw-r--r--arch/powerpc/platforms/Kconfig11
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c21
-rw-r--r--arch/powerpc/platforms/cell/beat_udbg.c4
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c2
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c2
-rw-r--r--arch/powerpc/platforms/cell/io-workarounds.c4
-rw-r--r--arch/powerpc/platforms/cell/iommu.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
-rw-r--r--arch/powerpc/platforms/iseries/Kconfig5
-rw-r--r--arch/powerpc/platforms/iseries/setup.c11
-rw-r--r--arch/powerpc/platforms/pasemi/cpufreq.c2
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c2
-rw-r--r--arch/powerpc/platforms/powermac/pci.c2
-rw-r--r--arch/powerpc/platforms/powermac/time.c11
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c37
-rw-r--r--arch/powerpc/sysdev/Makefile1
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c7
-rw-r--r--arch/powerpc/sysdev/fsl_soc.h5
-rw-r--r--arch/powerpc/sysdev/qe_lib/Kconfig3
-rw-r--r--arch/powerpc/sysdev/qe_lib/gpio.c195
-rw-r--r--arch/powerpc/sysdev/simple_gpio.c155
-rw-r--r--arch/powerpc/sysdev/simple_gpio.h12
-rw-r--r--arch/sparc/kernel/sun4m_smp.c5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--drivers/amba/bus.c3
-rw-r--r--drivers/ata/ahci.c10
-rw-r--r--drivers/ata/ata_piix.c51
-rw-r--r--drivers/ata/libata-core.c5
-rw-r--r--drivers/ata/libata-sff.c185
-rw-r--r--drivers/ata/pata_ali.c107
-rw-r--r--drivers/ata/pata_amd.c4
-rw-r--r--drivers/ata/pata_hpt366.c109
-rw-r--r--drivers/ata/pata_hpt3x3.c49
-rw-r--r--drivers/ata/pata_mpiix.c3
-rw-r--r--drivers/ata/pata_platform.c2
-rw-r--r--drivers/ata/pata_sil680.c4
-rw-r--r--drivers/ata/sata_sil24.c7
-rw-r--r--drivers/atm/iphase.c6
-rw-r--r--drivers/char/Kconfig1
-rw-r--r--drivers/char/hvc_beat.c4
-rw-r--r--drivers/char/pty.c2
-rw-r--r--drivers/char/tpm/tpm_nsc.c35
-rw-r--r--drivers/char/vt.c3
-rw-r--r--drivers/firmware/dcdbas.c9
-rw-r--r--drivers/firmware/dcdbas.h2
-rw-r--r--drivers/firmware/memmap.c6
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c2
-rw-r--r--drivers/input/mouse/pxa930_trkball.c4
-rw-r--r--drivers/isdn/hardware/eicon/debuglib.h2
-rw-r--r--drivers/isdn/hardware/eicon/os_4bri.c2
-rw-r--r--drivers/isdn/hardware/eicon/os_bri.c2
-rw-r--r--drivers/isdn/hardware/eicon/os_pri.c2
-rw-r--r--drivers/md/bitmap.c11
-rw-r--r--drivers/md/faulty.c3
-rw-r--r--drivers/md/linear.c3
-rw-r--r--drivers/md/md.c416
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/raid0.c178
-rw-r--r--drivers/md/raid1.c11
-rw-r--r--drivers/md/raid10.c3
-rw-r--r--drivers/md/raid5.c8
-rw-r--r--drivers/misc/Kconfig12
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/dell-laptop.c436
-rw-r--r--drivers/mtd/devices/Kconfig7
-rw-r--r--drivers/mtd/devices/Makefile1
-rw-r--r--drivers/mtd/devices/ps3vram.c768
-rw-r--r--drivers/mtd/ubi/kapi.c2
-rw-r--r--drivers/net/wireless/ath5k/dma.c2
-rw-r--r--drivers/net/wireless/zd1211rw/zd_mac.c2
-rw-r--r--drivers/rtc/rtc-ds1307.c154
-rw-r--r--drivers/s390/block/dasd_3990_erp.c2
-rw-r--r--drivers/s390/block/dasd_int.h2
-rw-r--r--drivers/s390/char/tape_3590.c2
-rw-r--r--drivers/s390/cio/cio.c2
-rw-r--r--drivers/s390/cio/qdio_main.c2
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/serial/Kconfig21
-rw-r--r--drivers/serial/Makefile1
-rw-r--r--drivers/serial/nwpserial.c475
-rw-r--r--drivers/serial/of_serial.c19
-rw-r--r--drivers/video/amba-clcd.c1
-rw-r--r--drivers/w1/masters/Kconfig6
-rw-r--r--drivers/w1/masters/Makefile2
-rw-r--r--drivers/w1/masters/mxc_w1.c211
-rw-r--r--drivers/w1/w1.h1
-rw-r--r--drivers/w1/w1_io.c26
-rw-r--r--drivers/w1/w1_netlink.c261
-rw-r--r--drivers/w1/w1_netlink.h16
-rw-r--r--drivers/xen/Kconfig24
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/xenbus/xenbus_client.c3
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c28
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c1
-rw-r--r--drivers/xen/xenfs/Makefile3
-rw-r--r--drivers/xen/xenfs/super.c64
-rw-r--r--drivers/xen/xenfs/xenbus.c593
-rw-r--r--drivers/xen/xenfs/xenfs.h6
-rw-r--r--fs/binfmt_elf.c12
-rw-r--r--fs/block_dev.c14
-rw-r--r--fs/coda/sysctl.c5
-rw-r--r--fs/dcache.c14
-rw-r--r--fs/dquot.c2
-rw-r--r--fs/ext2/ialloc.c8
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/ioctl.c3
-rw-r--r--fs/ext2/super.c10
-rw-r--r--fs/ext3/ialloc.c8
-rw-r--r--fs/ext3/ioctl.c3
-rw-r--r--fs/ext3/namei.c4
-rw-r--r--fs/ext3/super.c10
-rw-r--r--fs/ext4/extents.c2
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/jbd/commit.c15
-rw-r--r--fs/jbd/transaction.c39
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/romfs/inode.c12
-rw-r--r--fs/splice.c1
-rw-r--r--fs/super.c2
-rw-r--r--include/linux/auxvec.h6
-rw-r--r--include/linux/cgroup.h61
-rw-r--r--include/linux/cpuset.h10
-rw-r--r--include/linux/ext2_fs.h24
-rw-r--r--include/linux/ext2_fs_sb.h4
-rw-r--r--include/linux/ext3_fs.h24
-rw-r--r--include/linux/ext3_fs_sb.h4
-rw-r--r--include/linux/jbd.h15
-rw-r--r--include/linux/kernel.h6
-rw-r--r--include/linux/libata.h3
-rw-r--r--include/linux/magic.h1
-rw-r--r--include/linux/memcontrol.h154
-rw-r--r--include/linux/mm_inline.h22
-rw-r--r--include/linux/mmzone.h24
-rw-r--r--include/linux/nwpserial.h18
-rw-r--r--include/linux/page_cgroup.h52
-rw-r--r--include/linux/pid.h18
-rw-r--r--include/linux/pid_namespace.h6
-rw-r--r--include/linux/raid/md_k.h20
-rw-r--r--include/linux/raid/md_p.h2
-rw-r--r--include/linux/raid/raid0.h10
-rw-r--r--include/linux/res_counter.h8
-rw-r--r--include/linux/serial_core.h3
-rw-r--r--include/linux/swap.h25
-rw-r--r--include/xen/xenbus.h2
-rw-r--r--init/Kconfig142
-rw-r--r--ipc/mqueue.c3
-rw-r--r--kernel/async.c4
-rw-r--r--kernel/cgroup.c276
-rw-r--r--kernel/cpuset.c251
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/ns_cgroup.c2
-rw-r--r--kernel/pid.c6
-rw-r--r--kernel/res_counter.c44
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--lib/sort.c30
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/memcontrol.c1846
-rw-r--r--mm/memory.c28
-rw-r--r--mm/migrate.c42
-rw-r--r--mm/oom_kill.c10
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_cgroup.c207
-rw-r--r--mm/shmem.c20
-rw-r--r--mm/swap.c33
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/swapfile.c24
-rw-r--r--mm/vmscan.c197
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c2
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sctp/auth.c2
-rw-r--r--security/device_cgroup.c5
-rw-r--r--security/smack/smackfs.c2
-rw-r--r--sound/soc/au1x/dbdma2.c2
-rw-r--r--sound/soc/davinci/davinci-pcm.c2
279 files changed, 9244 insertions, 3004 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index d9014aa0eb68..e33ee74eee77 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -227,7 +227,6 @@ Each cgroup is represented by a directory in the cgroup file system
227containing the following files describing that cgroup: 227containing the following files describing that cgroup:
228 228
229 - tasks: list of tasks (by pid) attached to that cgroup 229 - tasks: list of tasks (by pid) attached to that cgroup
230 - releasable flag: cgroup currently removeable?
231 - notify_on_release flag: run the release agent on exit? 230 - notify_on_release flag: run the release agent on exit?
232 - release_agent: the path to use for release notifications (this file 231 - release_agent: the path to use for release notifications (this file
233 exists in the top cgroup only) 232 exists in the top cgroup only)
@@ -360,7 +359,7 @@ Now you want to do something with this cgroup.
360 359
361In this directory you can find several files: 360In this directory you can find several files:
362# ls 361# ls
363notify_on_release releasable tasks 362notify_on_release tasks
364(plus whatever files added by the attached subsystems) 363(plus whatever files added by the attached subsystems)
365 364
366Now attach your shell to this cgroup: 365Now attach your shell to this cgroup:
@@ -479,7 +478,6 @@ newly-created cgroup if an error occurs after this subsystem's
479create() method has been called for the new cgroup). 478create() method has been called for the new cgroup).
480 479
481void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); 480void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
482(cgroup_mutex held by caller)
483 481
484Called before checking the reference count on each subsystem. This may 482Called before checking the reference count on each subsystem. This may
485be useful for subsystems which have some extra references even if 483be useful for subsystems which have some extra references even if
@@ -498,6 +496,7 @@ remain valid while the caller holds cgroup_mutex.
498 496
499void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 497void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
500 struct cgroup *old_cgrp, struct task_struct *task) 498 struct cgroup *old_cgrp, struct task_struct *task)
499(cgroup_mutex held by caller)
501 500
502Called after the task has been attached to the cgroup, to allow any 501Called after the task has been attached to the cgroup, to allow any
503post-attachment activity that requires memory allocations or blocking. 502post-attachment activity that requires memory allocations or blocking.
@@ -511,6 +510,7 @@ void exit(struct cgroup_subsys *ss, struct task_struct *task)
511Called during task exit. 510Called during task exit.
512 511
513int populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 512int populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
513(cgroup_mutex held by caller)
514 514
515Called after creation of a cgroup to allow a subsystem to populate 515Called after creation of a cgroup to allow a subsystem to populate
516the cgroup directory with file entries. The subsystem should make 516the cgroup directory with file entries. The subsystem should make
@@ -520,6 +520,7 @@ method can return an error code, the error code is currently not
520always handled well. 520always handled well.
521 521
522void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp) 522void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
523(cgroup_mutex held by caller)
523 524
524Called at the end of cgroup_clone() to do any paramater 525Called at the end of cgroup_clone() to do any paramater
525initialization which might be required before a task could attach. For 526initialization which might be required before a task could attach. For
@@ -527,7 +528,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set
527up. 528up.
528 529
529void bind(struct cgroup_subsys *ss, struct cgroup *root) 530void bind(struct cgroup_subsys *ss, struct cgroup *root)
530(cgroup_mutex held by caller) 531(cgroup_mutex and ss->hierarchy_mutex held by caller)
531 532
532Called when a cgroup subsystem is rebound to a different hierarchy 533Called when a cgroup subsystem is rebound to a different hierarchy
533and root cgroup. Currently this will only involve movement between 534and root cgroup. Currently this will only involve movement between
diff --git a/Documentation/controllers/memcg_test.txt b/Documentation/controllers/memcg_test.txt
new file mode 100644
index 000000000000..08d4d3ea0d79
--- /dev/null
+++ b/Documentation/controllers/memcg_test.txt
@@ -0,0 +1,342 @@
1Memory Resource Controller(Memcg) Implementation Memo.
2Last Updated: 2008/12/15
3Base Kernel Version: based on 2.6.28-rc8-mm.
4
5Because VM is getting complex (one of reasons is memcg...), memcg's behavior
6is complex. This is a document for memcg's internal behavior.
7Please note that implementation details can be changed.
8
9(*) Topics on API should be in Documentation/controllers/memory.txt)
10
110. How to record usage ?
12 2 objects are used.
13
14 page_cgroup ....an object per page.
15 Allocated at boot or memory hotplug. Freed at memory hot removal.
16
17 swap_cgroup ... an entry per swp_entry.
18 Allocated at swapon(). Freed at swapoff().
19
20 The page_cgroup has USED bit and double count against a page_cgroup never
21 occurs. swap_cgroup is used only when a charged page is swapped-out.
22
231. Charge
24
25 a page/swp_entry may be charged (usage += PAGE_SIZE) at
26
27 mem_cgroup_newpage_charge()
28 Called at new page fault and Copy-On-Write.
29
30 mem_cgroup_try_charge_swapin()
31 Called at do_swap_page() (page fault on swap entry) and swapoff.
32 Followed by charge-commit-cancel protocol. (With swap accounting)
33 At commit, a charge recorded in swap_cgroup is removed.
34
35 mem_cgroup_cache_charge()
36 Called at add_to_page_cache()
37
38 mem_cgroup_cache_charge_swapin()
39 Called at shmem's swapin.
40
41 mem_cgroup_prepare_migration()
42 Called before migration. "extra" charge is done and followed by
43 charge-commit-cancel protocol.
44 At commit, charge against oldpage or newpage will be committed.
45
462. Uncharge
47 a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
48
49 mem_cgroup_uncharge_page()
50 Called when an anonymous page is fully unmapped. I.e., mapcount goes
51 to 0. If the page is SwapCache, uncharge is delayed until
52 mem_cgroup_uncharge_swapcache().
53
54 mem_cgroup_uncharge_cache_page()
55 Called when a page-cache is deleted from radix-tree. If the page is
56 SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
57
58 mem_cgroup_uncharge_swapcache()
59 Called when SwapCache is removed from radix-tree. The charge itself
60 is moved to swap_cgroup. (If mem+swap controller is disabled, no
61 charge to swap occurs.)
62
63 mem_cgroup_uncharge_swap()
64 Called when swp_entry's refcnt goes down to 0. A charge against swap
65 disappears.
66
67 mem_cgroup_end_migration(old, new)
68 At success of migration old is uncharged (if necessary), a charge
69 to new page is committed. At failure, charge to old page is committed.
70
713. charge-commit-cancel
72 In some case, we can't know this "charge" is valid or not at charging
73 (because of races).
74 To handle such case, there are charge-commit-cancel functions.
75 mem_cgroup_try_charge_XXX
76 mem_cgroup_commit_charge_XXX
77 mem_cgroup_cancel_charge_XXX
78 these are used in swap-in and migration.
79
80 At try_charge(), there are no flags to say "this page is charged".
81 at this point, usage += PAGE_SIZE.
82
83 At commit(), the function checks the page should be charged or not
84 and set flags or avoid charging.(usage -= PAGE_SIZE)
85
86 At cancel(), simply usage -= PAGE_SIZE.
87
88Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
89
904. Anonymous
91 Anonymous page is newly allocated at
92 - page fault into MAP_ANONYMOUS mapping.
93 - Copy-On-Write.
94 It is charged right after it's allocated before doing any page table
95 related operations. Of course, it's uncharged when another page is used
96 for the fault address.
97
98 At freeing anonymous page (by exit() or munmap()), zap_pte() is called
99 and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
100 are done at page_remove_rmap() when page_mapcount() goes down to 0.
101
102 Another page freeing is by page-reclaim (vmscan.c) and anonymous
103 pages are swapped out. In this case, the page is marked as
104 PageSwapCache(). uncharge() routine doesn't uncharge the page marked
105 as SwapCache(). It's delayed until __delete_from_swap_cache().
106
107 4.1 Swap-in.
108 At swap-in, the page is taken from swap-cache. There are 2 cases.
109
110 (a) If the SwapCache is newly allocated and read, it has no charges.
111 (b) If the SwapCache has been mapped by processes, it has been
112 charged already.
113
114 This swap-in is one of the most complicated work. In do_swap_page(),
115 following events occur when pte is unchanged.
116
117 (1) the page (SwapCache) is looked up.
118 (2) lock_page()
119 (3) try_charge_swapin()
120 (4) reuse_swap_page() (may call delete_swap_cache())
121 (5) commit_charge_swapin()
122 (6) swap_free().
123
124 Considering following situation for example.
125
126 (A) The page has not been charged before (2) and reuse_swap_page()
127 doesn't call delete_from_swap_cache().
128 (B) The page has not been charged before (2) and reuse_swap_page()
129 calls delete_from_swap_cache().
130 (C) The page has been charged before (2) and reuse_swap_page() doesn't
131 call delete_from_swap_cache().
132 (D) The page has been charged before (2) and reuse_swap_page() calls
133 delete_from_swap_cache().
134
135 memory.usage/memsw.usage changes to this page/swp_entry will be
136 Case (A) (B) (C) (D)
137 Event
138 Before (2) 0/ 1 0/ 1 1/ 1 1/ 1
139 ===========================================
140 (3) +1/+1 +1/+1 +1/+1 +1/+1
141 (4) - 0/ 0 - -1/ 0
142 (5) 0/-1 0/ 0 -1/-1 0/ 0
143 (6) - 0/-1 - 0/-1
144 ===========================================
145 Result 1/ 1 1/ 1 1/ 1 1/ 1
146
147 In any cases, charges to this page should be 1/ 1.
148
149 4.2 Swap-out.
150 At swap-out, typical state transition is below.
151
152 (a) add to swap cache. (marked as SwapCache)
153 swp_entry's refcnt += 1.
154 (b) fully unmapped.
155 swp_entry's refcnt += # of ptes.
156 (c) write back to swap.
157 (d) delete from swap cache. (remove from SwapCache)
158 swp_entry's refcnt -= 1.
159
160
161 At (b), the page is marked as SwapCache and not uncharged.
162 At (d), the page is removed from SwapCache and a charge in page_cgroup
163 is moved to swap_cgroup.
164
165 Finally, at task exit,
166 (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
167 Here, a charge in swap_cgroup disappears.
168
1695. Page Cache
170 Page Cache is charged at
171 - add_to_page_cache_locked().
172
173 uncharged at
174 - __remove_from_page_cache().
175
176 The logic is very clear. (About migration, see below)
177 Note: __remove_from_page_cache() is called by remove_from_page_cache()
178 and __remove_mapping().
179
1806. Shmem(tmpfs) Page Cache
181 Memcg's charge/uncharge have special handlers of shmem. The best way
182 to understand shmem's page state transition is to read mm/shmem.c.
183 But brief explanation of the behavior of memcg around shmem will be
184 helpful to understand the logic.
185
186 Shmem's page (just leaf page, not direct/indirect block) can be on
187 - radix-tree of shmem's inode.
188 - SwapCache.
189 - Both on radix-tree and SwapCache. This happens at swap-in
190 and swap-out,
191
192 It's charged when...
193 - A new page is added to shmem's radix-tree.
194 - A swp page is read. (move a charge from swap_cgroup to page_cgroup)
195 It's uncharged when
196 - A page is removed from radix-tree and not SwapCache.
197 - When SwapCache is removed, a charge is moved to swap_cgroup.
198 - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
199 disappears.
200
2017. Page Migration
202 One of the most complicated functions is page-migration-handler.
203 Memcg has 2 routines. Assume that we are migrating a page's contents
204 from OLDPAGE to NEWPAGE.
205
206 Usual migration logic is..
207 (a) remove the page from LRU.
208 (b) allocate NEWPAGE (migration target)
209 (c) lock by lock_page().
210 (d) unmap all mappings.
211 (e-1) If necessary, replace entry in radix-tree.
212 (e-2) move contents of a page.
213 (f) map all mappings again.
214 (g) pushback the page to LRU.
215 (-) OLDPAGE will be freed.
216
217 Before (g), memcg should complete all necessary charge/uncharge to
218 NEWPAGE/OLDPAGE.
219
220 The point is....
221 - If OLDPAGE is anonymous, all charges will be dropped at (d) because
222 try_to_unmap() drops all mapcount and the page will not be
223 SwapCache.
224
225 - If OLDPAGE is SwapCache, charges will be kept at (g) because
226 __delete_from_swap_cache() isn't called at (e-1)
227
228 - If OLDPAGE is page-cache, charges will be kept at (g) because
229 remove_from_swap_cache() isn't called at (e-1)
230
231 memcg provides following hooks.
232
233 - mem_cgroup_prepare_migration(OLDPAGE)
234 Called after (b) to account a charge (usage += PAGE_SIZE) against
235 memcg which OLDPAGE belongs to.
236
237 - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
238 Called after (f) before (g).
239 If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
240 charged, a charge by prepare_migration() is automatically canceled.
241 If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
242
243 But zap_pte() (by exit or munmap) can be called while migration,
244 we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
245
2468. LRU
247 Each memcg has its own private LRU. Now, it's handling is under global
248 VM's control (means that it's handled under global zone->lru_lock).
249 Almost all routines around memcg's LRU is called by global LRU's
250 list management functions under zone->lru_lock().
251
252 A special function is mem_cgroup_isolate_pages(). This scans
253 memcg's private LRU and call __isolate_lru_page() to extract a page
254 from LRU.
255 (By __isolate_lru_page(), the page is removed from both of global and
256 private LRU.)
257
258
2599. Typical Tests.
260
261 Tests for racy cases.
262
263 9.1 Small limit to memcg.
264 When you do test to do racy case, it's good test to set memcg's limit
265 to be very small rather than GB. Many races found in the test under
266 xKB or xxMB limits.
267 (Memory behavior under GB and Memory behavior under MB shows very
268 different situation.)
269
270 9.2 Shmem
271 Historically, memcg's shmem handling was poor and we saw some amount
272 of troubles here. This is because shmem is page-cache but can be
273 SwapCache. Test with shmem/tmpfs is always good test.
274
275 9.3 Migration
276 For NUMA, migration is an another special case. To do easy test, cpuset
277 is useful. Following is a sample script to do migration.
278
279 mount -t cgroup -o cpuset none /opt/cpuset
280
281 mkdir /opt/cpuset/01
282 echo 1 > /opt/cpuset/01/cpuset.cpus
283 echo 0 > /opt/cpuset/01/cpuset.mems
284 echo 1 > /opt/cpuset/01/cpuset.memory_migrate
285 mkdir /opt/cpuset/02
286 echo 1 > /opt/cpuset/02/cpuset.cpus
287 echo 1 > /opt/cpuset/02/cpuset.mems
288 echo 1 > /opt/cpuset/02/cpuset.memory_migrate
289
290 In above set, when you moves a task from 01 to 02, page migration to
291 node 0 to node 1 will occur. Following is a script to migrate all
292 under cpuset.
293 --
294 move_task()
295 {
296 for pid in $1
297 do
298 /bin/echo $pid >$2/tasks 2>/dev/null
299 echo -n $pid
300 echo -n " "
301 done
302 echo END
303 }
304
305 G1_TASK=`cat ${G1}/tasks`
306 G2_TASK=`cat ${G2}/tasks`
307 move_task "${G1_TASK}" ${G2} &
308 --
309 9.4 Memory hotplug.
310 memory hotplug test is one of good test.
311 to offline memory, do following.
312 # echo offline > /sys/devices/system/memory/memoryXXX/state
313 (XXX is the place of memory)
314 This is an easy way to test page migration, too.
315
316 9.5 mkdir/rmdir
317 When using hierarchy, mkdir/rmdir test should be done.
318 Use tests like the following.
319
320 echo 1 >/opt/cgroup/01/memory/use_hierarchy
321 mkdir /opt/cgroup/01/child_a
322 mkdir /opt/cgroup/01/child_b
323
324 set limit to 01.
325 add limit to 01/child_b
326 run jobs under child_a and child_b
327
328 create/delete following groups at random while jobs are running.
329 /opt/cgroup/01/child_a/child_aa
330 /opt/cgroup/01/child_b/child_bb
331 /opt/cgroup/01/child_c
332
333 running new jobs in new group is also good.
334
335 9.6 Mount with other subsystems.
336 Mounting with other subsystems is a good test because there is a
337 race and lock dependency with other cgroup subsystems.
338
339 example)
340 # mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices
341
342 and do task move, mkdir, rmdir etc...under this.
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 1c07547d3f81..e1501964df1e 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -137,7 +137,32 @@ behind this approach is that a cgroup that aggressively uses a shared
137page will eventually get charged for it (once it is uncharged from 137page will eventually get charged for it (once it is uncharged from
138the cgroup that brought it in -- this will happen on memory pressure). 138the cgroup that brought it in -- this will happen on memory pressure).
139 139
1402.4 Reclaim 140Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used..
141When you do swapoff and make swapped-out pages of shmem(tmpfs) to
142be backed into memory in force, charges for pages are accounted against the
143caller of swapoff rather than the users of shmem.
144
145
1462.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP)
147Swap Extension allows you to record charge for swap. A swapped-in page is
148charged back to original page allocator if possible.
149
150When swap is accounted, following files are added.
151 - memory.memsw.usage_in_bytes.
152 - memory.memsw.limit_in_bytes.
153
154usage of mem+swap is limited by memsw.limit_in_bytes.
155
156Note: why 'mem+swap' rather than swap.
157The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
158to move account from memory to swap...there is no change in usage of
159mem+swap.
160
161In other words, when we want to limit the usage of swap without affecting
162global LRU, mem+swap limit is better than just limiting swap from OS point
163of view.
164
1652.5 Reclaim
141 166
142Each cgroup maintains a per cgroup LRU that consists of an active 167Each cgroup maintains a per cgroup LRU that consists of an active
143and inactive list. When a cgroup goes over its limit, we first try 168and inactive list. When a cgroup goes over its limit, we first try
@@ -207,12 +232,6 @@ exceeded.
207The memory.stat file gives accounting information. Now, the number of 232The memory.stat file gives accounting information. Now, the number of
208caches, RSS and Active pages/Inactive pages are shown. 233caches, RSS and Active pages/Inactive pages are shown.
209 234
210The memory.force_empty gives an interface to drop *all* charges by force.
211
212# echo 1 > memory.force_empty
213
214will drop all charges in cgroup. Currently, this is maintained for test.
215
2164. Testing 2354. Testing
217 236
218Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. 237Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11].
@@ -242,10 +261,106 @@ reclaimed.
242 261
243A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a 262A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
244cgroup might have some charge associated with it, even though all 263cgroup might have some charge associated with it, even though all
245tasks have migrated away from it. Such charges are automatically dropped at 264tasks have migrated away from it.
246rmdir() if there are no tasks. 265Such charges are freed(at default) or moved to its parent. When moved,
266both of RSS and CACHES are moved to parent.
267If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also.
268
269Charges recorded in swap information is not updated at removal of cgroup.
270Recorded information is discarded and a cgroup which uses swap (swapcache)
271will be charged as a new owner of it.
272
273
2745. Misc. interfaces.
275
2765.1 force_empty
277 memory.force_empty interface is provided to make cgroup's memory usage empty.
278 You can use this interface only when the cgroup has no tasks.
279 When writing anything to this
280
281 # echo 0 > memory.force_empty
282
283 Almost all pages tracked by this memcg will be unmapped and freed. Some of
284 pages cannot be freed because it's locked or in-use. Such pages are moved
285 to parent and this cgroup will be empty. But this may return -EBUSY in
286 some too busy case.
287
288 Typical use case of this interface is that calling this before rmdir().
289 Because rmdir() moves all pages to parent, some out-of-use page caches can be
290 moved to the parent. If you want to avoid that, force_empty will be useful.
291
2925.2 stat file
293 memory.stat file includes following statistics (now)
294 cache - # of pages from page-cache and shmem.
295 rss - # of pages from anonymous memory.
296 pgpgin - # of event of charging
297 pgpgout - # of event of uncharging
298 active_anon - # of pages on active lru of anon, shmem.
299 inactive_anon - # of pages on active lru of anon, shmem
300 active_file - # of pages on active lru of file-cache
301 inactive_file - # of pages on inactive lru of file cache
302 unevictable - # of pages cannot be reclaimed.(mlocked etc)
303
304 Below is depend on CONFIG_DEBUG_VM.
305 inactive_ratio - VM inernal parameter. (see mm/page_alloc.c)
306 recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
307 recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
308 recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
309 recent_scanned_file - VM internal parameter. (see mm/vmscan.c)
310
311 Memo:
312 recent_rotated means recent frequency of lru rotation.
313 recent_scanned means recent # of scans to lru.
314 showing for better debug please see the code for meanings.
315
316
3175.3 swappiness
318 Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
319
320 Following cgroup's swapiness can't be changed.
321 - root cgroup (uses /proc/sys/vm/swappiness).
322 - a cgroup which uses hierarchy and it has child cgroup.
323 - a cgroup which uses hierarchy and not the root of hierarchy.
324
325
3266. Hierarchy support
327
328The memory controller supports a deep hierarchy and hierarchical accounting.
329The hierarchy is created by creating the appropriate cgroups in the
330cgroup filesystem. Consider for example, the following cgroup filesystem
331hierarchy
332
333 root
334 / | \
335 / | \
336 a b c
337 | \
338 | \
339 d e
340
341In the diagram above, with hierarchical accounting enabled, all memory
342usage of e, is accounted to its ancestors up until the root (i.e, c and root),
343that has memory.use_hierarchy enabled. If one of the ancestors goes over its
344limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
345children of the ancestor.
346
3476.1 Enabling hierarchical accounting and reclaim
348
349The memory controller by default disables the hierarchy feature. Support
350can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup
351
352# echo 1 > memory.use_hierarchy
353
354The feature can be disabled by
355
356# echo 0 > memory.use_hierarchy
357
358NOTE1: Enabling/disabling will fail if the cgroup already has other
359cgroups created below it.
360
361NOTE2: This feature can be enabled/disabled per subtree.
247 362
2485. TODO 3637. TODO
249 364
2501. Add support for accounting huge pages (as a separate controller) 3651. Add support for accounting huge pages (as a separate controller)
2512. Make per-cgroup scanner reclaim not-shared pages first 3662. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/Documentation/hwmon/abituguru-datasheet b/Documentation/hwmon/abituguru-datasheet
index 4d184f2db0ea..d9251efdcec7 100644
--- a/Documentation/hwmon/abituguru-datasheet
+++ b/Documentation/hwmon/abituguru-datasheet
@@ -121,7 +121,7 @@ Once all bytes have been read data will hold 0x09, but there is no reason to
121test for this. Notice that the number of bytes is bank address dependent see 121test for this. Notice that the number of bytes is bank address dependent see
122above and below. 122above and below.
123 123
124After completing a successfull read it is advised to put the uGuru back in 124After completing a successful read it is advised to put the uGuru back in
125ready mode, so that it is ready for the next read / write cycle. This way 125ready mode, so that it is ready for the next read / write cycle. This way
126if your program / driver is unloaded and later loaded again the detection 126if your program / driver is unloaded and later loaded again the detection
127algorithm described above will still work. 127algorithm described above will still work.
@@ -141,7 +141,7 @@ don't ask why this is the way it is.
141 141
142Once DATA holds 0x01 read CMD it should hold 0xAC now. 142Once DATA holds 0x01 read CMD it should hold 0xAC now.
143 143
144After completing a successfull write it is advised to put the uGuru back in 144After completing a successful write it is advised to put the uGuru back in
145ready mode, so that it is ready for the next read / write cycle. This way 145ready mode, so that it is ready for the next read / write cycle. This way
146if your program / driver is unloaded and later loaded again the detection 146if your program / driver is unloaded and later loaded again the detection
147algorithm described above will still work. 147algorithm described above will still work.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 532eacbbed62..fb849020aea9 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1562,6 +1562,9 @@ and is between 256 and 4096 characters. It is defined in the file
1562 1562
1563 nosoftlockup [KNL] Disable the soft-lockup detector. 1563 nosoftlockup [KNL] Disable the soft-lockup detector.
1564 1564
1565 noswapaccount [KNL] Disable accounting of swap in memory resource
1566 controller. (See Documentation/controllers/memory.txt)
1567
1565 nosync [HW,M68K] Disables sync negotiation for all devices. 1568 nosync [HW,M68K] Disables sync negotiation for all devices.
1566 1569
1567 notsc [BUGS=X86-32] Disable Time Stamp Counter 1570 notsc [BUGS=X86-32] Disable Time Stamp Counter
diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt
index 81a917ef96e9..6c974d28eeb4 100644
--- a/Documentation/powerpc/dts-bindings/fsl/board.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/board.txt
@@ -18,7 +18,7 @@ This is the memory-mapped registers for on board FPGA.
18 18
19Required properities: 19Required properities:
20- compatible : should be "fsl,fpga-pixis". 20- compatible : should be "fsl,fpga-pixis".
21- reg : should contain the address and the lenght of the FPPGA register 21- reg : should contain the address and the length of the FPPGA register
22 set. 22 set.
23 23
24Example (MPC8610HPCD): 24Example (MPC8610HPCD):
@@ -27,3 +27,33 @@ Example (MPC8610HPCD):
27 compatible = "fsl,fpga-pixis"; 27 compatible = "fsl,fpga-pixis";
28 reg = <0xe8000000 32>; 28 reg = <0xe8000000 32>;
29 }; 29 };
30
31* Freescale BCSR GPIO banks
32
33Some BCSR registers act as simple GPIO controllers, each such
34register can be represented by the gpio-controller node.
35
36Required properities:
37- compatible : Should be "fsl,<board>-bcsr-gpio".
38- reg : Should contain the address and the length of the GPIO bank
39 register.
40- #gpio-cells : Should be two. The first cell is the pin number and the
41 second cell is used to specify optional paramters (currently unused).
42- gpio-controller : Marks the port as GPIO controller.
43
44Example:
45
46 bcsr@1,0 {
47 #address-cells = <1>;
48 #size-cells = <1>;
49 compatible = "fsl,mpc8360mds-bcsr";
50 reg = <1 0 0x8000>;
51 ranges = <0 1 0 0x8000>;
52
53 bcsr13: gpio-controller@d {
54 #gpio-cells = <2>;
55 compatible = "fsl,mpc8360mds-bcsr-gpio";
56 reg = <0xd 1>;
57 gpio-controller;
58 };
59 };
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
index 38d324d62b25..e5b071d46619 100644
--- a/Documentation/scsi/scsi_fc_transport.txt
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -191,7 +191,7 @@ Vport States:
191 This is equivalent to a driver "attach" on an adapter, which is 191 This is equivalent to a driver "attach" on an adapter, which is
192 independent of the adapter's link state. 192 independent of the adapter's link state.
193 - Instantiation of the vport on the FC link via ELS traffic, etc. 193 - Instantiation of the vport on the FC link via ELS traffic, etc.
194 This is equivalent to a "link up" and successfull link initialization. 194 This is equivalent to a "link up" and successful link initialization.
195 Further information can be found in the interfaces section below for 195 Further information can be found in the interfaces section below for
196 Vport Creation. 196 Vport Creation.
197 197
@@ -320,7 +320,7 @@ Vport Creation:
320 This is equivalent to a driver "attach" on an adapter, which is 320 This is equivalent to a driver "attach" on an adapter, which is
321 independent of the adapter's link state. 321 independent of the adapter's link state.
322 - Instantiation of the vport on the FC link via ELS traffic, etc. 322 - Instantiation of the vport on the FC link via ELS traffic, etc.
323 This is equivalent to a "link up" and successfull link initialization. 323 This is equivalent to a "link up" and successful link initialization.
324 324
325 The LLDD's vport_create() function will not synchronously wait for both 325 The LLDD's vport_create() function will not synchronously wait for both
326 parts to be fully completed before returning. It must validate that the 326 parts to be fully completed before returning. It must validate that the
diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX
index 7b0ceaaad7af..d63fa024ac05 100644
--- a/Documentation/w1/masters/00-INDEX
+++ b/Documentation/w1/masters/00-INDEX
@@ -4,5 +4,7 @@ ds2482
4 - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses. 4 - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses.
5ds2490 5ds2490
6 - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges. 6 - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges.
7mxc_w1
8 - W1 master controller driver found on Freescale MX2/MX3 SoCs
7w1-gpio 9w1-gpio
8 - GPIO 1-wire bus master driver. 10 - GPIO 1-wire bus master driver.
diff --git a/Documentation/w1/masters/mxc-w1 b/Documentation/w1/masters/mxc-w1
new file mode 100644
index 000000000000..97f6199a7f39
--- /dev/null
+++ b/Documentation/w1/masters/mxc-w1
@@ -0,0 +1,11 @@
1Kernel driver mxc_w1
2====================
3
4Supported chips:
5 * Freescale MX27, MX31 and probably other i.MX SoCs
6 Datasheets:
7 http://www.freescale.com/files/32bit/doc/data_sheet/MCIMX31.pdf?fpsp=1
8 http://www.freescale.com/files/dsp/MCIMX27.pdf?fpsp=1
9
10Author: Originally based on Freescale code, prepared for mainline by
11 Sascha Hauer <s.hauer@pengutronix.de>
diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink
index 3640c7c87d45..804445f745ed 100644
--- a/Documentation/w1/w1.netlink
+++ b/Documentation/w1/w1.netlink
@@ -5,69 +5,157 @@ Message types.
5============= 5=============
6 6
7There are three types of messages between w1 core and userspace: 7There are three types of messages between w1 core and userspace:
81. Events. They are generated each time new master or slave device found 81. Events. They are generated each time new master or slave device
9 either due to automatic or requested search. 9 found either due to automatic or requested search.
102. Userspace commands. Includes read/write and search/alarm search comamnds. 102. Userspace commands.
113. Replies to userspace commands. 113. Replies to userspace commands.
12 12
13 13
14Protocol. 14Protocol.
15======== 15========
16 16
17[struct cn_msg] - connector header. It's length field is equal to size of the attached data. 17[struct cn_msg] - connector header.
18 Its length field is equal to size of the attached data
18[struct w1_netlink_msg] - w1 netlink header. 19[struct w1_netlink_msg] - w1 netlink header.
19 __u8 type - message type. 20 __u8 type - message type.
20 W1_SLAVE_ADD/W1_SLAVE_REMOVE - slave add/remove events. 21 W1_LIST_MASTERS
21 W1_MASTER_ADD/W1_MASTER_REMOVE - master add/remove events. 22 list current bus masters
22 W1_MASTER_CMD - userspace command for bus master device (search/alarm search). 23 W1_SLAVE_ADD/W1_SLAVE_REMOVE
23 W1_SLAVE_CMD - userspace command for slave device (read/write/ search/alarm search 24 slave add/remove events
24 for bus master device where given slave device found). 25 W1_MASTER_ADD/W1_MASTER_REMOVE
26 master add/remove events
27 W1_MASTER_CMD
28 userspace command for bus master
29 device (search/alarm search)
30 W1_SLAVE_CMD
31 userspace command for slave device
32 (read/write/touch)
25 __u8 res - reserved 33 __u8 res - reserved
26 __u16 len - size of attached to this header data. 34 __u16 len - size of data attached to this header data
27 union { 35 union {
28 __u8 id; - slave unique device id 36 __u8 id[8]; - slave unique device id
29 struct w1_mst { 37 struct w1_mst {
30 __u32 id; - master's id. 38 __u32 id; - master's id
31 __u32 res; - reserved 39 __u32 res; - reserved
32 } mst; 40 } mst;
33 } id; 41 } id;
34 42
35[strucrt w1_netlink_cmd] - command for gived master or slave device. 43[struct w1_netlink_cmd] - command for given master or slave device.
36 __u8 cmd - command opcode. 44 __u8 cmd - command opcode.
37 W1_CMD_READ - read command. 45 W1_CMD_READ - read command
38 W1_CMD_WRITE - write command. 46 W1_CMD_WRITE - write command
39 W1_CMD_SEARCH - search command. 47 W1_CMD_TOUCH - touch command
40 W1_CMD_ALARM_SEARCH - alarm search command. 48 (write and sample data back to userspace)
49 W1_CMD_SEARCH - search command
50 W1_CMD_ALARM_SEARCH - alarm search command
41 __u8 res - reserved 51 __u8 res - reserved
42 __u16 len - length of data for this command. 52 __u16 len - length of data for this command
43 For read command data must be allocated like for write command. 53 For read command data must be allocated like for write command
44 __u8 data[0] - data for this command. 54 __u8 data[0] - data for this command
45 55
46 56
47Each connector message can include one or more w1_netlink_msg with zero of more attached w1_netlink_cmd messages. 57Each connector message can include one or more w1_netlink_msg with
58zero or more attached w1_netlink_cmd messages.
48 59
49For event messages there are no w1_netlink_cmd embedded structures, only connector header 60For event messages there are no w1_netlink_cmd embedded structures,
50and w1_netlink_msg strucutre with "len" field being zero and filled type (one of event types) 61only connector header and w1_netlink_msg strucutre with "len" field
51and id - either 8 bytes of slave unique id in host order, or master's id, which is assigned 62being zero and filled type (one of event types) and id:
52to bus master device when it is added to w1 core. 63either 8 bytes of slave unique id in host order,
64or master's id, which is assigned to bus master device
65when it is added to w1 core.
66
67Currently replies to userspace commands are only generated for read
68command request. One reply is generated exactly for one w1_netlink_cmd
69read request. Replies are not combined when sent - i.e. typical reply
70messages looks like the following:
53 71
54Currently replies to userspace commands are only generated for read command request.
55One reply is generated exactly for one w1_netlink_cmd read request.
56Replies are not combined when sent - i.e. typical reply messages looks like the following:
57[cn_msg][w1_netlink_msg][w1_netlink_cmd] 72[cn_msg][w1_netlink_msg][w1_netlink_cmd]
58cn_msg.len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd) + cmd->len; 73cn_msg.len = sizeof(struct w1_netlink_msg) +
74 sizeof(struct w1_netlink_cmd) +
75 cmd->len;
59w1_netlink_msg.len = sizeof(struct w1_netlink_cmd) + cmd->len; 76w1_netlink_msg.len = sizeof(struct w1_netlink_cmd) + cmd->len;
60w1_netlink_cmd.len = cmd->len; 77w1_netlink_cmd.len = cmd->len;
61 78
79Replies to W1_LIST_MASTERS should send a message back to the userspace
80which will contain list of all registered master ids in the following
81format:
82
83 cn_msg (CN_W1_IDX.CN_W1_VAL as id, len is equal to sizeof(struct
84 w1_netlink_msg) plus number of masters multipled by 4)
85 w1_netlink_msg (type: W1_LIST_MASTERS, len is equal to
86 number of masters multiplied by 4 (u32 size))
87 id0 ... idN
88
89 Each message is at most 4k in size, so if number of master devices
90 exceeds this, it will be split into several messages,
91 cn.seq will be increased for each one.
92
93W1 search and alarm search commands.
94request:
95[cn_msg]
96 [w1_netlink_msg type = W1_MASTER_CMD
97 id is equal to the bus master id to use for searching]
98 [w1_netlink_cmd cmd = W1_CMD_SEARCH or W1_CMD_ALARM_SEARCH]
99
100reply:
101 [cn_msg, ack = 1 and increasing, 0 means the last message,
102 seq is equal to the request seq]
103 [w1_netlink_msg type = W1_MASTER_CMD]
104 [w1_netlink_cmd cmd = W1_CMD_SEARCH or W1_CMD_ALARM_SEARCH
105 len is equal to number of IDs multiplied by 8]
106 [64bit-id0 ... 64bit-idN]
107Length in each header corresponds to the size of the data behind it, so
108w1_netlink_cmd->len = N * 8; where N is number of IDs in this message.
109 Can be zero.
110w1_netlink_msg->len = sizeof(struct w1_netlink_cmd) + N * 8;
111cn_msg->len = sizeof(struct w1_netlink_msg) +
112 sizeof(struct w1_netlink_cmd) +
113 N*8;
114
115W1 reset command.
116[cn_msg]
117 [w1_netlink_msg type = W1_MASTER_CMD
118 id is equal to the bus master id to use for searching]
119 [w1_netlink_cmd cmd = W1_CMD_RESET]
120
121
122Command status replies.
123======================
124
125Each command (either root, master or slave with or without w1_netlink_cmd
126structure) will be 'acked' by the w1 core. Format of the reply is the same
127as request message except that length parameters do not account for data
128requested by the user, i.e. read/write/touch IO requests will not contain
129data, so w1_netlink_cmd.len will be 0, w1_netlink_msg.len will be size
130of the w1_netlink_cmd structure and cn_msg.len will be equal to the sum
131of the sizeof(struct w1_netlink_msg) and sizeof(struct w1_netlink_cmd).
132If reply is generated for master or root command (which do not have
133w1_netlink_cmd attached), reply will contain only cn_msg and w1_netlink_msg
134structires.
135
136w1_netlink_msg.status field will carry positive error value
137(EINVAL for example) or zero in case of success.
138
139All other fields in every structure will mirror the same parameters in the
140request message (except lengths as described above).
141
142Status reply is generated for every w1_netlink_cmd embedded in the
143w1_netlink_msg, if there are no w1_netlink_cmd structures,
144reply will be generated for the w1_netlink_msg.
145
146All w1_netlink_cmd command structures are handled in every w1_netlink_msg,
147even if there were errors, only length mismatch interrupts message processing.
148
62 149
63Operation steps in w1 core when new command is received. 150Operation steps in w1 core when new command is received.
64======================================================= 151=======================================================
65 152
66When new message (w1_netlink_msg) is received w1 core detects if it is master of slave request, 153When new message (w1_netlink_msg) is received w1 core detects if it is
67according to w1_netlink_msg.type field. 154master or slave request, according to w1_netlink_msg.type field.
68Then master or slave device is searched for. 155Then master or slave device is searched for.
69When found, master device (requested or those one on where slave device is found) is locked. 156When found, master device (requested or those one on where slave device
70If slave command is requested, then reset/select procedure is started to select given device. 157is found) is locked. If slave command is requested, then reset/select
158procedure is started to select given device.
71 159
72Then all requested in w1_netlink_msg operations are performed one by one. 160Then all requested in w1_netlink_msg operations are performed one by one.
73If command requires reply (like read command) it is sent on command completion. 161If command requires reply (like read command) it is sent on command completion.
@@ -82,8 +170,8 @@ Connector [1] specific documentation.
82Each connector message includes two u32 fields as "address". 170Each connector message includes two u32 fields as "address".
83w1 uses CN_W1_IDX and CN_W1_VAL defined in include/linux/connector.h header. 171w1 uses CN_W1_IDX and CN_W1_VAL defined in include/linux/connector.h header.
84Each message also includes sequence and acknowledge numbers. 172Each message also includes sequence and acknowledge numbers.
85Sequence number for event messages is appropriate bus master sequence number increased with 173Sequence number for event messages is appropriate bus master sequence number
86each event message sent "through" this master. 174increased with each event message sent "through" this master.
87Sequence number for userspace requests is set by userspace application. 175Sequence number for userspace requests is set by userspace application.
88Sequence number for reply is the same as was in request, and 176Sequence number for reply is the same as was in request, and
89acknowledge number is set to seq+1. 177acknowledge number is set to seq+1.
@@ -93,6 +181,6 @@ Additional documantion, source code examples.
93============================================ 181============================================
94 182
951. Documentation/connector 1831. Documentation/connector
962. http://tservice.net.ru/~s0mbre/archive/w1 1842. http://www.ioremap.net/archive/w1
97This archive includes userspace application w1d.c which 185This archive includes userspace application w1d.c which uses
98uses read/write/search commands for all master/slave devices found on the bus. 186read/write/search commands for all master/slave devices found on the bus.
diff --git a/MAINTAINERS b/MAINTAINERS
index a01884407fe4..57e0309243cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1360,6 +1360,11 @@ P: Maciej W. Rozycki
1360M: macro@linux-mips.org 1360M: macro@linux-mips.org
1361S: Maintained 1361S: Maintained
1362 1362
1363DELL LAPTOP DRIVER
1364P: Matthew Garrett
1365M: mjg59@srcf.ucam.org
1366S: Maintained
1367
1363DELL LAPTOP SMM DRIVER 1368DELL LAPTOP SMM DRIVER
1364P: Massimo Dal Zotto 1369P: Massimo Dal Zotto
1365M: dz@debian.org 1370M: dz@debian.org
@@ -3484,6 +3489,12 @@ L: linuxppc-dev@ozlabs.org
3484L: cbe-oss-dev@ozlabs.org 3489L: cbe-oss-dev@ozlabs.org
3485S: Supported 3490S: Supported
3486 3491
3492PS3VRAM DRIVER
3493P: Jim Paris
3494M: jim@jtan.com
3495L: cbe-oss-dev@ozlabs.org
3496S: Maintained
3497
3487PVRUSB2 VIDEO4LINUX DRIVER 3498PVRUSB2 VIDEO4LINUX DRIVER
3488P: Mike Isely 3499P: Mike Isely
3489M: isely@pobox.com 3500M: isely@pobox.com
diff --git a/arch/arm/configs/clps7500_defconfig b/arch/arm/configs/clps7500_defconfig
deleted file mode 100644
index 49e9f9d8b3d1..000000000000
--- a/arch/arm/configs/clps7500_defconfig
+++ /dev/null
@@ -1,801 +0,0 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.12-rc1-bk2
4# Sun Mar 27 17:20:48 2005
5#
6CONFIG_ARM=y
7CONFIG_MMU=y
8CONFIG_UID16=y
9CONFIG_RWSEM_GENERIC_SPINLOCK=y
10CONFIG_GENERIC_CALIBRATE_DELAY=y
11CONFIG_GENERIC_IOMAP=y
12
13#
14# Code maturity level options
15#
16CONFIG_EXPERIMENTAL=y
17CONFIG_CLEAN_COMPILE=y
18CONFIG_BROKEN_ON_SMP=y
19
20#
21# General setup
22#
23CONFIG_LOCALVERSION=""
24CONFIG_SWAP=y
25CONFIG_SYSVIPC=y
26# CONFIG_POSIX_MQUEUE is not set
27# CONFIG_BSD_PROCESS_ACCT is not set
28# CONFIG_SYSCTL is not set
29# CONFIG_AUDIT is not set
30# CONFIG_HOTPLUG is not set
31CONFIG_KOBJECT_UEVENT=y
32# CONFIG_IKCONFIG is not set
33CONFIG_EMBEDDED=y
34CONFIG_KALLSYMS=y
35# CONFIG_KALLSYMS_EXTRA_PASS is not set
36CONFIG_BASE_FULL=y
37CONFIG_FUTEX=y
38CONFIG_EPOLL=y
39CONFIG_CC_OPTIMIZE_FOR_SIZE=y
40CONFIG_SHMEM=y
41CONFIG_CC_ALIGN_FUNCTIONS=0
42CONFIG_CC_ALIGN_LABELS=0
43CONFIG_CC_ALIGN_LOOPS=0
44CONFIG_CC_ALIGN_JUMPS=0
45# CONFIG_TINY_SHMEM is not set
46CONFIG_BASE_SMALL=0
47
48#
49# Loadable module support
50#
51# CONFIG_MODULES is not set
52
53#
54# System Type
55#
56CONFIG_ARCH_CLPS7500=y
57# CONFIG_ARCH_CLPS711X is not set
58# CONFIG_ARCH_CO285 is not set
59# CONFIG_ARCH_EBSA110 is not set
60# CONFIG_ARCH_FOOTBRIDGE is not set
61# CONFIG_ARCH_INTEGRATOR is not set
62# CONFIG_ARCH_IOP3XX is not set
63# CONFIG_ARCH_IXP4XX is not set
64# CONFIG_ARCH_IXP2000 is not set
65# CONFIG_ARCH_L7200 is not set
66# CONFIG_ARCH_PXA is not set
67# CONFIG_ARCH_RPC is not set
68# CONFIG_ARCH_SA1100 is not set
69# CONFIG_ARCH_S3C2410 is not set
70# CONFIG_ARCH_SHARK is not set
71# CONFIG_ARCH_LH7A40X is not set
72# CONFIG_ARCH_OMAP is not set
73# CONFIG_ARCH_VERSATILE is not set
74# CONFIG_ARCH_IMX is not set
75# CONFIG_ARCH_H720X is not set
76
77#
78# Processor Type
79#
80CONFIG_CPU_32=y
81CONFIG_CPU_ARM710=y
82CONFIG_CPU_32v3=y
83CONFIG_CPU_CACHE_V3=y
84CONFIG_CPU_CACHE_VIVT=y
85CONFIG_CPU_COPY_V3=y
86CONFIG_CPU_TLB_V3=y
87
88#
89# Processor Features
90#
91CONFIG_TIMER_ACORN=y
92
93#
94# Bus support
95#
96CONFIG_ISA=y
97
98#
99# PCCARD (PCMCIA/CardBus) support
100#
101# CONFIG_PCCARD is not set
102
103#
104# Kernel Features
105#
106# CONFIG_PREEMPT is not set
107CONFIG_ALIGNMENT_TRAP=y
108
109#
110# Boot options
111#
112CONFIG_ZBOOT_ROM_TEXT=0x0
113CONFIG_ZBOOT_ROM_BSS=0x0
114CONFIG_CMDLINE="mem=16M root=nfs"
115# CONFIG_XIP_KERNEL is not set
116
117#
118# Floating point emulation
119#
120
121#
122# At least one emulation must be selected
123#
124# CONFIG_FPE_NWFPE is not set
125
126#
127# Userspace binary formats
128#
129CONFIG_BINFMT_ELF=y
130# CONFIG_BINFMT_AOUT is not set
131# CONFIG_BINFMT_MISC is not set
132# CONFIG_ARTHUR is not set
133
134#
135# Power management options
136#
137# CONFIG_PM is not set
138
139#
140# Device Drivers
141#
142
143#
144# Generic Driver Options
145#
146CONFIG_STANDALONE=y
147CONFIG_PREVENT_FIRMWARE_BUILD=y
148# CONFIG_FW_LOADER is not set
149
150#
151# Memory Technology Devices (MTD)
152#
153CONFIG_MTD=y
154# CONFIG_MTD_DEBUG is not set
155# CONFIG_MTD_CONCAT is not set
156# CONFIG_MTD_PARTITIONS is not set
157
158#
159# User Modules And Translation Layers
160#
161# CONFIG_MTD_CHAR is not set
162# CONFIG_MTD_BLOCK is not set
163# CONFIG_MTD_BLOCK_RO is not set
164# CONFIG_FTL is not set
165# CONFIG_NFTL is not set
166# CONFIG_INFTL is not set
167
168#
169# RAM/ROM/Flash chip drivers
170#
171# CONFIG_MTD_CFI is not set
172# CONFIG_MTD_JEDECPROBE is not set
173CONFIG_MTD_MAP_BANK_WIDTH_1=y
174CONFIG_MTD_MAP_BANK_WIDTH_2=y
175CONFIG_MTD_MAP_BANK_WIDTH_4=y
176# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
177# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
178# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
179CONFIG_MTD_CFI_I1=y
180CONFIG_MTD_CFI_I2=y
181# CONFIG_MTD_CFI_I4 is not set
182# CONFIG_MTD_CFI_I8 is not set
183# CONFIG_MTD_RAM is not set
184# CONFIG_MTD_ROM is not set
185# CONFIG_MTD_ABSENT is not set
186
187#
188# Mapping drivers for chip access
189#
190# CONFIG_MTD_COMPLEX_MAPPINGS is not set
191
192#
193# Self-contained MTD device drivers
194#
195# CONFIG_MTD_SLRAM is not set
196# CONFIG_MTD_PHRAM is not set
197# CONFIG_MTD_MTDRAM is not set
198# CONFIG_MTD_BLKMTD is not set
199# CONFIG_MTD_BLOCK2MTD is not set
200
201#
202# Disk-On-Chip Device Drivers
203#
204# CONFIG_MTD_DOC2000 is not set
205# CONFIG_MTD_DOC2001 is not set
206# CONFIG_MTD_DOC2001PLUS is not set
207
208#
209# NAND Flash Device Drivers
210#
211# CONFIG_MTD_NAND is not set
212
213#
214# Parallel port support
215#
216CONFIG_PARPORT=y
217CONFIG_PARPORT_PC=y
218CONFIG_PARPORT_PC_FIFO=y
219# CONFIG_PARPORT_PC_SUPERIO is not set
220# CONFIG_PARPORT_ARC is not set
221# CONFIG_PARPORT_GSC is not set
222CONFIG_PARPORT_1284=y
223
224#
225# Plug and Play support
226#
227# CONFIG_PNP is not set
228
229#
230# Block devices
231#
232# CONFIG_BLK_DEV_FD is not set
233# CONFIG_BLK_DEV_XD is not set
234# CONFIG_PARIDE is not set
235# CONFIG_BLK_DEV_COW_COMMON is not set
236# CONFIG_BLK_DEV_LOOP is not set
237CONFIG_BLK_DEV_NBD=y
238CONFIG_BLK_DEV_RAM=y
239CONFIG_BLK_DEV_RAM_COUNT=16
240CONFIG_BLK_DEV_RAM_SIZE=4096
241# CONFIG_BLK_DEV_INITRD is not set
242CONFIG_INITRAMFS_SOURCE=""
243# CONFIG_CDROM_PKTCDVD is not set
244
245#
246# IO Schedulers
247#
248CONFIG_IOSCHED_NOOP=y
249CONFIG_IOSCHED_AS=y
250CONFIG_IOSCHED_DEADLINE=y
251CONFIG_IOSCHED_CFQ=y
252# CONFIG_ATA_OVER_ETH is not set
253
254#
255# ATA/ATAPI/MFM/RLL support
256#
257# CONFIG_IDE is not set
258
259#
260# SCSI device support
261#
262# CONFIG_SCSI is not set
263
264#
265# Multi-device support (RAID and LVM)
266#
267# CONFIG_MD is not set
268
269#
270# Fusion MPT device support
271#
272
273#
274# IEEE 1394 (FireWire) support
275#
276
277#
278# I2O device support
279#
280
281#
282# Networking support
283#
284CONFIG_NET=y
285
286#
287# Networking options
288#
289# CONFIG_PACKET is not set
290# CONFIG_NETLINK_DEV is not set
291CONFIG_UNIX=y
292# CONFIG_NET_KEY is not set
293CONFIG_INET=y
294# CONFIG_IP_MULTICAST is not set
295# CONFIG_IP_ADVANCED_ROUTER is not set
296CONFIG_IP_PNP=y
297# CONFIG_IP_PNP_DHCP is not set
298CONFIG_IP_PNP_BOOTP=y
299# CONFIG_IP_PNP_RARP is not set
300# CONFIG_NET_IPIP is not set
301# CONFIG_NET_IPGRE is not set
302# CONFIG_ARPD is not set
303# CONFIG_SYN_COOKIES is not set
304# CONFIG_INET_AH is not set
305# CONFIG_INET_ESP is not set
306# CONFIG_INET_IPCOMP is not set
307# CONFIG_INET_TUNNEL is not set
308CONFIG_IP_TCPDIAG=y
309# CONFIG_IP_TCPDIAG_IPV6 is not set
310# CONFIG_IPV6 is not set
311# CONFIG_NETFILTER is not set
312
313#
314# SCTP Configuration (EXPERIMENTAL)
315#
316# CONFIG_IP_SCTP is not set
317# CONFIG_ATM is not set
318# CONFIG_BRIDGE is not set
319# CONFIG_VLAN_8021Q is not set
320# CONFIG_DECNET is not set
321# CONFIG_LLC2 is not set
322# CONFIG_IPX is not set
323# CONFIG_ATALK is not set
324# CONFIG_X25 is not set
325# CONFIG_LAPB is not set
326# CONFIG_NET_DIVERT is not set
327# CONFIG_ECONET is not set
328# CONFIG_WAN_ROUTER is not set
329
330#
331# QoS and/or fair queueing
332#
333# CONFIG_NET_SCHED is not set
334# CONFIG_NET_CLS_ROUTE is not set
335
336#
337# Network testing
338#
339# CONFIG_NET_PKTGEN is not set
340# CONFIG_NETPOLL is not set
341# CONFIG_NET_POLL_CONTROLLER is not set
342# CONFIG_HAMRADIO is not set
343# CONFIG_IRDA is not set
344# CONFIG_BT is not set
345CONFIG_NETDEVICES=y
346CONFIG_DUMMY=y
347# CONFIG_BONDING is not set
348# CONFIG_EQUALIZER is not set
349# CONFIG_TUN is not set
350
351#
352# ARCnet devices
353#
354# CONFIG_ARCNET is not set
355
356#
357# Ethernet (10 or 100Mbit)
358#
359CONFIG_NET_ETHERNET=y
360# CONFIG_MII is not set
361# CONFIG_NET_VENDOR_3COM is not set
362# CONFIG_LANCE is not set
363# CONFIG_NET_VENDOR_SMC is not set
364# CONFIG_SMC91X is not set
365# CONFIG_NET_VENDOR_RACAL is not set
366# CONFIG_AT1700 is not set
367# CONFIG_DEPCA is not set
368# CONFIG_HP100 is not set
369# CONFIG_NET_ISA is not set
370CONFIG_NET_PCI=y
371# CONFIG_AC3200 is not set
372# CONFIG_APRICOT is not set
373CONFIG_CS89x0=y
374# CONFIG_NET_POCKET is not set
375
376#
377# Ethernet (1000 Mbit)
378#
379
380#
381# Ethernet (10000 Mbit)
382#
383
384#
385# Token Ring devices
386#
387# CONFIG_TR is not set
388
389#
390# Wireless LAN (non-hamradio)
391#
392# CONFIG_NET_RADIO is not set
393
394#
395# Wan interfaces
396#
397# CONFIG_WAN is not set
398# CONFIG_PLIP is not set
399CONFIG_PPP=y
400# CONFIG_PPP_MULTILINK is not set
401# CONFIG_PPP_FILTER is not set
402# CONFIG_PPP_ASYNC is not set
403# CONFIG_PPP_SYNC_TTY is not set
404# CONFIG_PPP_DEFLATE is not set
405# CONFIG_PPP_BSDCOMP is not set
406# CONFIG_PPPOE is not set
407CONFIG_SLIP=y
408CONFIG_SLIP_COMPRESSED=y
409# CONFIG_SLIP_SMART is not set
410# CONFIG_SLIP_MODE_SLIP6 is not set
411# CONFIG_SHAPER is not set
412# CONFIG_NETCONSOLE is not set
413
414#
415# ISDN subsystem
416#
417# CONFIG_ISDN is not set
418
419#
420# Input device support
421#
422CONFIG_INPUT=y
423
424#
425# Userland interfaces
426#
427CONFIG_INPUT_MOUSEDEV=y
428CONFIG_INPUT_MOUSEDEV_PSAUX=y
429CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
430CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
431# CONFIG_INPUT_JOYDEV is not set
432# CONFIG_INPUT_TSDEV is not set
433# CONFIG_INPUT_EVDEV is not set
434# CONFIG_INPUT_EVBUG is not set
435
436#
437# Input Device Drivers
438#
439CONFIG_INPUT_KEYBOARD=y
440CONFIG_KEYBOARD_ATKBD=y
441# CONFIG_KEYBOARD_SUNKBD is not set
442# CONFIG_KEYBOARD_LKKBD is not set
443# CONFIG_KEYBOARD_XTKBD is not set
444# CONFIG_KEYBOARD_NEWTON is not set
445CONFIG_INPUT_MOUSE=y
446CONFIG_MOUSE_PS2=y
447# CONFIG_MOUSE_SERIAL is not set
448# CONFIG_MOUSE_INPORT is not set
449# CONFIG_MOUSE_LOGIBM is not set
450# CONFIG_MOUSE_PC110PAD is not set
451# CONFIG_MOUSE_VSXXXAA is not set
452# CONFIG_INPUT_JOYSTICK is not set
453# CONFIG_INPUT_TOUCHSCREEN is not set
454# CONFIG_INPUT_MISC is not set
455
456#
457# Hardware I/O ports
458#
459CONFIG_SERIO=y
460# CONFIG_SERIO_SERPORT is not set
461# CONFIG_SERIO_PARKBD is not set
462CONFIG_SERIO_RPCKBD=y
463CONFIG_SERIO_LIBPS2=y
464# CONFIG_SERIO_RAW is not set
465# CONFIG_GAMEPORT is not set
466CONFIG_SOUND_GAMEPORT=y
467
468#
469# Character devices
470#
471CONFIG_VT=y
472CONFIG_VT_CONSOLE=y
473CONFIG_HW_CONSOLE=y
474# CONFIG_SERIAL_NONSTANDARD is not set
475
476#
477# Serial drivers
478#
479CONFIG_SERIAL_8250=y
480CONFIG_SERIAL_8250_CONSOLE=y
481CONFIG_SERIAL_8250_NR_UARTS=4
482# CONFIG_SERIAL_8250_EXTENDED is not set
483
484#
485# Non-8250 serial port support
486#
487CONFIG_SERIAL_CORE=y
488CONFIG_SERIAL_CORE_CONSOLE=y
489CONFIG_UNIX98_PTYS=y
490CONFIG_LEGACY_PTYS=y
491CONFIG_LEGACY_PTY_COUNT=256
492CONFIG_PRINTER=y
493# CONFIG_LP_CONSOLE is not set
494# CONFIG_PPDEV is not set
495# CONFIG_TIPAR is not set
496
497#
498# IPMI
499#
500# CONFIG_IPMI_HANDLER is not set
501
502#
503# Watchdog Cards
504#
505# CONFIG_WATCHDOG is not set
506# CONFIG_NVRAM is not set
507# CONFIG_RTC is not set
508# CONFIG_DTLK is not set
509# CONFIG_R3964 is not set
510
511#
512# Ftape, the floppy tape device driver
513#
514# CONFIG_DRM is not set
515# CONFIG_RAW_DRIVER is not set
516
517#
518# TPM devices
519#
520# CONFIG_TCG_TPM is not set
521
522#
523# I2C support
524#
525CONFIG_I2C=y
526# CONFIG_I2C_CHARDEV is not set
527
528#
529# I2C Algorithms
530#
531CONFIG_I2C_ALGOBIT=y
532# CONFIG_I2C_ALGOPCF is not set
533# CONFIG_I2C_ALGOPCA is not set
534
535#
536# I2C Hardware Bus support
537#
538# CONFIG_I2C_ELEKTOR is not set
539# CONFIG_I2C_PARPORT is not set
540# CONFIG_I2C_PARPORT_LIGHT is not set
541# CONFIG_I2C_PCA_ISA is not set
542
543#
544# Hardware Sensors Chip support
545#
546# CONFIG_I2C_SENSOR is not set
547# CONFIG_SENSORS_ADM1021 is not set
548# CONFIG_SENSORS_ADM1025 is not set
549# CONFIG_SENSORS_ADM1026 is not set
550# CONFIG_SENSORS_ADM1031 is not set
551# CONFIG_SENSORS_ASB100 is not set
552# CONFIG_SENSORS_DS1621 is not set
553# CONFIG_SENSORS_FSCHER is not set
554# CONFIG_SENSORS_FSCPOS is not set
555# CONFIG_SENSORS_GL518SM is not set
556# CONFIG_SENSORS_GL520SM is not set
557# CONFIG_SENSORS_IT87 is not set
558# CONFIG_SENSORS_LM63 is not set
559# CONFIG_SENSORS_LM75 is not set
560# CONFIG_SENSORS_LM77 is not set
561# CONFIG_SENSORS_LM78 is not set
562# CONFIG_SENSORS_LM80 is not set
563# CONFIG_SENSORS_LM83 is not set
564# CONFIG_SENSORS_LM85 is not set
565# CONFIG_SENSORS_LM87 is not set
566# CONFIG_SENSORS_LM90 is not set
567# CONFIG_SENSORS_MAX1619 is not set
568# CONFIG_SENSORS_PC87360 is not set
569# CONFIG_SENSORS_SMSC47B397 is not set
570# CONFIG_SENSORS_SMSC47M1 is not set
571# CONFIG_SENSORS_W83781D is not set
572# CONFIG_SENSORS_W83L785TS is not set
573# CONFIG_SENSORS_W83627HF is not set
574
575#
576# Other I2C Chip support
577#
578# CONFIG_SENSORS_EEPROM is not set
579# CONFIG_SENSORS_PCF8574 is not set
580# CONFIG_SENSORS_PCF8591 is not set
581# CONFIG_SENSORS_RTC8564 is not set
582# CONFIG_I2C_DEBUG_CORE is not set
583# CONFIG_I2C_DEBUG_ALGO is not set
584# CONFIG_I2C_DEBUG_BUS is not set
585# CONFIG_I2C_DEBUG_CHIP is not set
586
587#
588# Misc devices
589#
590
591#
592# Multimedia devices
593#
594# CONFIG_VIDEO_DEV is not set
595
596#
597# Digital Video Broadcasting Devices
598#
599# CONFIG_DVB is not set
600
601#
602# Graphics support
603#
604CONFIG_FB=y
605CONFIG_FB_CFB_FILLRECT=y
606CONFIG_FB_CFB_COPYAREA=y
607CONFIG_FB_CFB_IMAGEBLIT=y
608CONFIG_FB_SOFT_CURSOR=y
609# CONFIG_FB_MODE_HELPERS is not set
610# CONFIG_FB_TILEBLITTING is not set
611CONFIG_FB_ACORN=y
612# CONFIG_FB_VIRTUAL is not set
613
614#
615# Console display driver support
616#
617# CONFIG_VGA_CONSOLE is not set
618# CONFIG_MDA_CONSOLE is not set
619CONFIG_DUMMY_CONSOLE=y
620CONFIG_FRAMEBUFFER_CONSOLE=y
621CONFIG_FONTS=y
622CONFIG_FONT_8x8=y
623CONFIG_FONT_8x16=y
624# CONFIG_FONT_6x11 is not set
625# CONFIG_FONT_PEARL_8x8 is not set
626# CONFIG_FONT_ACORN_8x8 is not set
627# CONFIG_FONT_MINI_4x6 is not set
628# CONFIG_FONT_SUN8x16 is not set
629# CONFIG_FONT_SUN12x22 is not set
630
631#
632# Logo configuration
633#
634CONFIG_LOGO=y
635CONFIG_LOGO_LINUX_MONO=y
636CONFIG_LOGO_LINUX_VGA16=y
637CONFIG_LOGO_LINUX_CLUT224=y
638# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
639
640#
641# Sound
642#
643# CONFIG_SOUND is not set
644
645#
646# USB support
647#
648CONFIG_USB_ARCH_HAS_HCD=y
649# CONFIG_USB_ARCH_HAS_OHCI is not set
650# CONFIG_USB is not set
651
652#
653# USB Gadget Support
654#
655# CONFIG_USB_GADGET is not set
656
657#
658# MMC/SD Card support
659#
660# CONFIG_MMC is not set
661
662#
663# File systems
664#
665CONFIG_EXT2_FS=y
666# CONFIG_EXT2_FS_XATTR is not set
667# CONFIG_EXT3_FS is not set
668# CONFIG_JBD is not set
669# CONFIG_REISERFS_FS is not set
670# CONFIG_JFS_FS is not set
671
672#
673# XFS support
674#
675# CONFIG_XFS_FS is not set
676CONFIG_MINIX_FS=y
677# CONFIG_ROMFS_FS is not set
678# CONFIG_QUOTA is not set
679CONFIG_DNOTIFY=y
680# CONFIG_AUTOFS_FS is not set
681# CONFIG_AUTOFS4_FS is not set
682
683#
684# CD-ROM/DVD Filesystems
685#
686# CONFIG_ISO9660_FS is not set
687# CONFIG_UDF_FS is not set
688
689#
690# DOS/FAT/NT Filesystems
691#
692# CONFIG_MSDOS_FS is not set
693# CONFIG_VFAT_FS is not set
694# CONFIG_NTFS_FS is not set
695
696#
697# Pseudo filesystems
698#
699CONFIG_PROC_FS=y
700CONFIG_SYSFS=y
701# CONFIG_DEVFS_FS is not set
702# CONFIG_DEVPTS_FS_XATTR is not set
703# CONFIG_TMPFS is not set
704# CONFIG_HUGETLB_PAGE is not set
705CONFIG_RAMFS=y
706
707#
708# Miscellaneous filesystems
709#
710# CONFIG_ADFS_FS is not set
711# CONFIG_AFFS_FS is not set
712# CONFIG_HFS_FS is not set
713# CONFIG_HFSPLUS_FS is not set
714# CONFIG_BEFS_FS is not set
715# CONFIG_BFS_FS is not set
716# CONFIG_EFS_FS is not set
717# CONFIG_JFFS_FS is not set
718# CONFIG_JFFS2_FS is not set
719# CONFIG_CRAMFS is not set
720# CONFIG_VXFS_FS is not set
721# CONFIG_HPFS_FS is not set
722# CONFIG_QNX4FS_FS is not set
723# CONFIG_SYSV_FS is not set
724# CONFIG_UFS_FS is not set
725
726#
727# Network File Systems
728#
729CONFIG_NFS_FS=y
730# CONFIG_NFS_V3 is not set
731# CONFIG_NFS_V4 is not set
732# CONFIG_NFS_DIRECTIO is not set
733# CONFIG_NFSD is not set
734CONFIG_ROOT_NFS=y
735CONFIG_LOCKD=y
736CONFIG_SUNRPC=y
737# CONFIG_RPCSEC_GSS_KRB5 is not set
738# CONFIG_RPCSEC_GSS_SPKM3 is not set
739# CONFIG_SMB_FS is not set
740# CONFIG_CIFS is not set
741# CONFIG_NCP_FS is not set
742# CONFIG_CODA_FS is not set
743# CONFIG_AFS_FS is not set
744
745#
746# Partition Types
747#
748CONFIG_PARTITION_ADVANCED=y
749# CONFIG_ACORN_PARTITION is not set
750# CONFIG_OSF_PARTITION is not set
751# CONFIG_AMIGA_PARTITION is not set
752# CONFIG_ATARI_PARTITION is not set
753# CONFIG_MAC_PARTITION is not set
754# CONFIG_MSDOS_PARTITION is not set
755# CONFIG_LDM_PARTITION is not set
756# CONFIG_SGI_PARTITION is not set
757# CONFIG_ULTRIX_PARTITION is not set
758# CONFIG_SUN_PARTITION is not set
759# CONFIG_EFI_PARTITION is not set
760
761#
762# Native Language Support
763#
764# CONFIG_NLS is not set
765
766#
767# Profiling support
768#
769# CONFIG_PROFILING is not set
770
771#
772# Kernel hacking
773#
774# CONFIG_PRINTK_TIME is not set
775# CONFIG_DEBUG_KERNEL is not set
776CONFIG_LOG_BUF_SHIFT=14
777# CONFIG_DEBUG_BUGVERBOSE is not set
778CONFIG_FRAME_POINTER=y
779# CONFIG_DEBUG_USER is not set
780
781#
782# Security options
783#
784# CONFIG_KEYS is not set
785# CONFIG_SECURITY is not set
786
787#
788# Cryptographic options
789#
790# CONFIG_CRYPTO is not set
791
792#
793# Hardware crypto devices
794#
795
796#
797# Library routines
798#
799# CONFIG_CRC_CCITT is not set
800CONFIG_CRC32=y
801# CONFIG_LIBCRC32C is not set
diff --git a/arch/arm/kernel/isa.c b/arch/arm/kernel/isa.c
index 50a30bc91872..8ac9b8424007 100644
--- a/arch/arm/kernel/isa.c
+++ b/arch/arm/kernel/isa.c
@@ -16,6 +16,7 @@
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/sysctl.h> 17#include <linux/sysctl.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/io.h>
19 20
20static unsigned int isa_membase, isa_portbase, isa_portshift; 21static unsigned int isa_membase, isa_portbase, isa_portshift;
21 22
diff --git a/arch/arm/mach-at91/at91cap9.c b/arch/arm/mach-at91/at91cap9.c
index 0a38c69fdbc4..73376170fb91 100644
--- a/arch/arm/mach-at91/at91cap9.c
+++ b/arch/arm/mach-at91/at91cap9.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/pm.h> 16#include <linux/pm.h>
17 17
18#include <asm/irq.h>
18#include <asm/mach/arch.h> 19#include <asm/mach/arch.h>
19#include <asm/mach/map.h> 20#include <asm/mach/map.h>
20 21
diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c
index 28594fcc88e3..2e9ecad97f3d 100644
--- a/arch/arm/mach-at91/at91rm9200.c
+++ b/arch/arm/mach-at91/at91rm9200.c
@@ -12,6 +12,7 @@
12 12
13#include <linux/module.h> 13#include <linux/module.h>
14 14
15#include <asm/irq.h>
15#include <asm/mach/arch.h> 16#include <asm/mach/arch.h>
16#include <asm/mach/map.h> 17#include <asm/mach/map.h>
17#include <mach/at91rm9200.h> 18#include <mach/at91rm9200.h>
diff --git a/arch/arm/mach-at91/at91sam9260.c b/arch/arm/mach-at91/at91sam9260.c
index accb69ec478e..0894f1077be7 100644
--- a/arch/arm/mach-at91/at91sam9260.c
+++ b/arch/arm/mach-at91/at91sam9260.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/pm.h> 14#include <linux/pm.h>
15 15
16#include <asm/irq.h>
16#include <asm/mach/arch.h> 17#include <asm/mach/arch.h>
17#include <asm/mach/map.h> 18#include <asm/mach/map.h>
18#include <mach/cpu.h> 19#include <mach/cpu.h>
diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c
index 7b51a59ae8b3..3acd7d7e6a42 100644
--- a/arch/arm/mach-at91/at91sam9261.c
+++ b/arch/arm/mach-at91/at91sam9261.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/pm.h> 14#include <linux/pm.h>
15 15
16#include <asm/irq.h>
16#include <asm/mach/arch.h> 17#include <asm/mach/arch.h>
17#include <asm/mach/map.h> 18#include <asm/mach/map.h>
18#include <mach/at91sam9261.h> 19#include <mach/at91sam9261.h>
diff --git a/arch/arm/mach-at91/at91sam9263.c b/arch/arm/mach-at91/at91sam9263.c
index ada4b6769107..942792d630d8 100644
--- a/arch/arm/mach-at91/at91sam9263.c
+++ b/arch/arm/mach-at91/at91sam9263.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/pm.h> 14#include <linux/pm.h>
15 15
16#include <asm/irq.h>
16#include <asm/mach/arch.h> 17#include <asm/mach/arch.h>
17#include <asm/mach/map.h> 18#include <asm/mach/map.h>
18#include <mach/at91sam9263.h> 19#include <mach/at91sam9263.h>
diff --git a/arch/arm/mach-at91/at91sam9rl.c b/arch/arm/mach-at91/at91sam9rl.c
index 252e954b49fd..211c5c14a1e6 100644
--- a/arch/arm/mach-at91/at91sam9rl.c
+++ b/arch/arm/mach-at91/at91sam9rl.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/pm.h> 13#include <linux/pm.h>
14 14
15#include <asm/irq.h>
15#include <asm/mach/arch.h> 16#include <asm/mach/arch.h>
16#include <asm/mach/map.h> 17#include <asm/mach/map.h>
17#include <mach/cpu.h> 18#include <mach/cpu.h>
diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c
index 9b937ee4815a..35e12a49d1a6 100644
--- a/arch/arm/mach-at91/board-sam9rlek.c
+++ b/arch/arm/mach-at91/board-sam9rlek.c
@@ -29,6 +29,7 @@
29#include <mach/hardware.h> 29#include <mach/hardware.h>
30#include <mach/board.h> 30#include <mach/board.h>
31#include <mach/gpio.h> 31#include <mach/gpio.h>
32#include <mach/at91sam9_smc.h>
32#include <mach/at91_shdwc.h> 33#include <mach/at91_shdwc.h>
33 34
34#include "sam9_smc.h" 35#include "sam9_smc.h"
diff --git a/arch/arm/mach-clps711x/edb7211-mm.c b/arch/arm/mach-clps711x/edb7211-mm.c
index c58e32ec4c5d..0bea1454ae03 100644
--- a/arch/arm/mach-clps711x/edb7211-mm.c
+++ b/arch/arm/mach-clps711x/edb7211-mm.c
@@ -24,7 +24,6 @@
24 24
25#include <mach/hardware.h> 25#include <mach/hardware.h>
26#include <asm/page.h> 26#include <asm/page.h>
27#include <asm/pgtable.h>
28#include <asm/sizes.h> 27#include <asm/sizes.h>
29 28
30#include <asm/mach/map.h> 29#include <asm/mach/map.h>
diff --git a/arch/arm/mach-clps711x/fortunet.c b/arch/arm/mach-clps711x/fortunet.c
index 7122b3d21043..7430e4049d87 100644
--- a/arch/arm/mach-clps711x/fortunet.c
+++ b/arch/arm/mach-clps711x/fortunet.c
@@ -24,7 +24,6 @@
24#include <linux/initrd.h> 24#include <linux/initrd.h>
25 25
26#include <mach/hardware.h> 26#include <mach/hardware.h>
27#include <asm/irq.h>
28#include <asm/setup.h> 27#include <asm/setup.h>
29#include <asm/mach-types.h> 28#include <asm/mach-types.h>
30 29
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 3d4b1de8f898..808633f9f03c 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -20,6 +20,7 @@
20 20
21#include <mach/hardware.h> 21#include <mach/hardware.h>
22#include <mach/i2c.h> 22#include <mach/i2c.h>
23#include <mach/irqs.h>
23 24
24static struct resource i2c_resources[] = { 25static struct resource i2c_resources[] = {
25 { 26 {
diff --git a/arch/arm/mach-davinci/include/mach/gpio.h b/arch/arm/mach-davinci/include/mach/gpio.h
index b3a2961f0f46..b456f079f43f 100644
--- a/arch/arm/mach-davinci/include/mach/gpio.h
+++ b/arch/arm/mach-davinci/include/mach/gpio.h
@@ -16,6 +16,7 @@
16#include <linux/io.h> 16#include <linux/io.h>
17#include <asm-generic/gpio.h> 17#include <asm-generic/gpio.h>
18#include <mach/hardware.h> 18#include <mach/hardware.h>
19#include <mach/irqs.h>
19 20
20/* 21/*
21 * basic gpio routines 22 * basic gpio routines
diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c
index 36ff06d4df15..b97f529e58e8 100644
--- a/arch/arm/mach-footbridge/common.c
+++ b/arch/arm/mach-footbridge/common.c
@@ -28,12 +28,17 @@
28 28
29#include "common.h" 29#include "common.h"
30 30
31extern void __init isa_init_irq(unsigned int irq);
32
33unsigned int mem_fclk_21285 = 50000000; 31unsigned int mem_fclk_21285 = 50000000;
34 32
35EXPORT_SYMBOL(mem_fclk_21285); 33EXPORT_SYMBOL(mem_fclk_21285);
36 34
35static void __init early_fclk(char **arg)
36{
37 mem_fclk_21285 = simple_strtoul(*arg, arg, 0);
38}
39
40__early_param("mem_fclk_21285=", early_fclk);
41
37static int __init parse_tag_memclk(const struct tag *tag) 42static int __init parse_tag_memclk(const struct tag *tag)
38{ 43{
39 mem_fclk_21285 = tag->u.memclk.fmemclk; 44 mem_fclk_21285 = tag->u.memclk.fmemclk;
diff --git a/arch/arm/mach-footbridge/common.h b/arch/arm/mach-footbridge/common.h
index 580e31bbc711..b05e662d21ad 100644
--- a/arch/arm/mach-footbridge/common.h
+++ b/arch/arm/mach-footbridge/common.h
@@ -7,3 +7,4 @@ extern void isa_rtc_init(void);
7extern void footbridge_map_io(void); 7extern void footbridge_map_io(void);
8extern void footbridge_init_irq(void); 8extern void footbridge_init_irq(void);
9 9
10extern void isa_init_irq(unsigned int irq);
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index 133086019e3e..3ffa54841ec5 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -287,6 +287,9 @@ struct pci_bus * __init dc21285_scan_bus(int nr, struct pci_sys_data *sys)
287 return pci_scan_bus(0, &dc21285_ops, sys); 287 return pci_scan_bus(0, &dc21285_ops, sys);
288} 288}
289 289
290#define dc21285_request_irq(_a, _b, _c, _d, _e) \
291 WARN_ON(request_irq(_a, _b, _c, _d, _e) < 0)
292
290void __init dc21285_preinit(void) 293void __init dc21285_preinit(void)
291{ 294{
292 unsigned int mem_size, mem_mask; 295 unsigned int mem_size, mem_mask;
@@ -335,16 +338,16 @@ void __init dc21285_preinit(void)
335 /* 338 /*
336 * We don't care if these fail. 339 * We don't care if these fail.
337 */ 340 */
338 request_irq(IRQ_PCI_SERR, dc21285_serr_irq, IRQF_DISABLED, 341 dc21285_request_irq(IRQ_PCI_SERR, dc21285_serr_irq, IRQF_DISABLED,
339 "PCI system error", &serr_timer); 342 "PCI system error", &serr_timer);
340 request_irq(IRQ_PCI_PERR, dc21285_parity_irq, IRQF_DISABLED, 343 dc21285_request_irq(IRQ_PCI_PERR, dc21285_parity_irq, IRQF_DISABLED,
341 "PCI parity error", &perr_timer); 344 "PCI parity error", &perr_timer);
342 request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, IRQF_DISABLED, 345 dc21285_request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, IRQF_DISABLED,
343 "PCI abort", NULL); 346 "PCI abort", NULL);
344 request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, IRQF_DISABLED, 347 dc21285_request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, IRQF_DISABLED,
345 "Discard timer", NULL); 348 "Discard timer", NULL);
346 request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, IRQF_DISABLED, 349 dc21285_request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, IRQF_DISABLED,
347 "PCI data parity", NULL); 350 "PCI data parity", NULL);
348 351
349 if (cfn_mode) { 352 if (cfn_mode) {
350 static struct resource csrio; 353 static struct resource csrio;
diff --git a/arch/arm/mach-footbridge/isa-irq.c b/arch/arm/mach-footbridge/isa-irq.c
index 9ee80a211d3c..8bfd06aeb64d 100644
--- a/arch/arm/mach-footbridge/isa-irq.c
+++ b/arch/arm/mach-footbridge/isa-irq.c
@@ -28,6 +28,8 @@
28#include <asm/irq.h> 28#include <asm/irq.h>
29#include <asm/mach-types.h> 29#include <asm/mach-types.h>
30 30
31#include "common.h"
32
31static void isa_mask_pic_lo_irq(unsigned int irq) 33static void isa_mask_pic_lo_irq(unsigned int irq)
32{ 34{
33 unsigned int mask = 1 << (irq & 7); 35 unsigned int mask = 1 << (irq & 7);
diff --git a/arch/arm/mach-h720x/h7202-eval.c b/arch/arm/mach-h720x/h7202-eval.c
index 56161d55cf47..8c0ba99d683f 100644
--- a/arch/arm/mach-h720x/h7202-eval.c
+++ b/arch/arm/mach-h720x/h7202-eval.c
@@ -25,6 +25,7 @@
25#include <asm/page.h> 25#include <asm/page.h>
26#include <asm/pgtable.h> 26#include <asm/pgtable.h>
27#include <asm/mach/arch.h> 27#include <asm/mach/arch.h>
28#include <mach/irqs.h>
28#include <mach/hardware.h> 29#include <mach/hardware.h>
29#include "common.h" 30#include "common.h"
30 31
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 7b8ef97fb501..b3404b7775b3 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -698,6 +698,7 @@ void __init kirkwood_init(void)
698 printk(KERN_INFO "Kirkwood: %s, TCLK=%d.\n", 698 printk(KERN_INFO "Kirkwood: %s, TCLK=%d.\n",
699 kirkwood_id(), kirkwood_tclk); 699 kirkwood_id(), kirkwood_tclk);
700 kirkwood_ge00_shared_data.t_clk = kirkwood_tclk; 700 kirkwood_ge00_shared_data.t_clk = kirkwood_tclk;
701 kirkwood_ge01_shared_data.t_clk = kirkwood_tclk;
701 kirkwood_spi_plat_data.tclk = kirkwood_tclk; 702 kirkwood_spi_plat_data.tclk = kirkwood_tclk;
702 kirkwood_uart0_data[0].uartclk = kirkwood_tclk; 703 kirkwood_uart0_data[0].uartclk = kirkwood_tclk;
703 kirkwood_uart1_data[0].uartclk = kirkwood_tclk; 704 kirkwood_uart1_data[0].uartclk = kirkwood_tclk;
diff --git a/arch/arm/mach-kirkwood/pcie.c b/arch/arm/mach-kirkwood/pcie.c
index f6b08f207c89..73fccacd1a73 100644
--- a/arch/arm/mach-kirkwood/pcie.c
+++ b/arch/arm/mach-kirkwood/pcie.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/mbus.h> 13#include <linux/mbus.h>
14#include <asm/irq.h>
14#include <asm/mach/pci.h> 15#include <asm/mach/pci.h>
15#include <plat/pcie.h> 16#include <plat/pcie.h>
16#include "common.h" 17#include "common.h"
diff --git a/arch/arm/mach-ks8695/devices.c b/arch/arm/mach-ks8695/devices.c
index 36ab0fd3d9b6..b89fb6d46ccc 100644
--- a/arch/arm/mach-ks8695/devices.c
+++ b/arch/arm/mach-ks8695/devices.c
@@ -22,6 +22,7 @@
22 22
23#include <linux/platform_device.h> 23#include <linux/platform_device.h>
24 24
25#include <mach/irqs.h>
25#include <mach/regs-wan.h> 26#include <mach/regs-wan.h>
26#include <mach/regs-lan.h> 27#include <mach/regs-lan.h>
27#include <mach/regs-hpna.h> 28#include <mach/regs-hpna.h>
diff --git a/arch/arm/mach-msm/devices.c b/arch/arm/mach-msm/devices.c
index f2a74b92a97f..31b6b30e98bf 100644
--- a/arch/arm/mach-msm/devices.c
+++ b/arch/arm/mach-msm/devices.c
@@ -16,6 +16,7 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/platform_device.h> 17#include <linux/platform_device.h>
18 18
19#include <mach/irqs.h>
19#include <mach/msm_iomap.h> 20#include <mach/msm_iomap.h>
20#include "devices.h" 21#include "devices.h"
21 22
diff --git a/arch/arm/mach-mv78xx0/pcie.c b/arch/arm/mach-mv78xx0/pcie.c
index 430ea84d587d..aad3a7a2f830 100644
--- a/arch/arm/mach-mv78xx0/pcie.c
+++ b/arch/arm/mach-mv78xx0/pcie.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/mbus.h> 13#include <linux/mbus.h>
14#include <asm/irq.h>
14#include <asm/mach/pci.h> 15#include <asm/mach/pci.h>
15#include <plat/pcie.h> 16#include <plat/pcie.h>
16#include "common.h" 17#include "common.h"
diff --git a/arch/arm/mach-mx2/devices.c b/arch/arm/mach-mx2/devices.c
index af121f5ab710..2f9240be1c76 100644
--- a/arch/arm/mach-mx2/devices.c
+++ b/arch/arm/mach-mx2/devices.c
@@ -32,6 +32,7 @@
32#include <linux/platform_device.h> 32#include <linux/platform_device.h>
33#include <linux/gpio.h> 33#include <linux/gpio.h>
34 34
35#include <mach/irqs.h>
35#include <mach/hardware.h> 36#include <mach/hardware.h>
36 37
37/* 38/*
diff --git a/arch/arm/mach-mx3/devices.c b/arch/arm/mach-mx3/devices.c
index 1d46cb4adf96..f8428800f286 100644
--- a/arch/arm/mach-mx3/devices.c
+++ b/arch/arm/mach-mx3/devices.c
@@ -22,6 +22,7 @@
22#include <linux/serial.h> 22#include <linux/serial.h>
23#include <linux/gpio.h> 23#include <linux/gpio.h>
24#include <mach/hardware.h> 24#include <mach/hardware.h>
25#include <mach/irqs.h>
25#include <mach/imx-uart.h> 26#include <mach/imx-uart.h>
26 27
27static struct resource uart0[] = { 28static struct resource uart0[] = {
diff --git a/arch/arm/mach-netx/fb.c b/arch/arm/mach-netx/fb.c
index ea8fa8898fe8..1d844e228ea9 100644
--- a/arch/arm/mach-netx/fb.c
+++ b/arch/arm/mach-netx/fb.c
@@ -24,6 +24,8 @@
24#include <linux/amba/clcd.h> 24#include <linux/amba/clcd.h>
25#include <linux/err.h> 25#include <linux/err.h>
26 26
27#include <asm/irq.h>
28
27#include <mach/netx-regs.h> 29#include <mach/netx-regs.h>
28#include <mach/hardware.h> 30#include <mach/hardware.h>
29 31
diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c
index d51d627ce7cf..f201fddb594f 100644
--- a/arch/arm/mach-netx/time.c
+++ b/arch/arm/mach-netx/time.c
@@ -163,7 +163,7 @@ static void __init netx_timer_init(void)
163 * Adding some safety ... */ 163 * Adding some safety ... */
164 netx_clockevent.min_delta_ns = 164 netx_clockevent.min_delta_ns =
165 clockevent_delta2ns(0xa00, &netx_clockevent); 165 clockevent_delta2ns(0xa00, &netx_clockevent);
166 netx_clockevent.cpumask = cpumask_of_cpu(0); 166 netx_clockevent.cpumask = cpumask_of(0);
167 clockevents_register_device(&netx_clockevent); 167 clockevents_register_device(&netx_clockevent);
168} 168}
169 169
diff --git a/arch/arm/mach-netx/xc.c b/arch/arm/mach-netx/xc.c
index 8fc6205dc3a5..181a78ba8165 100644
--- a/arch/arm/mach-netx/xc.c
+++ b/arch/arm/mach-netx/xc.c
@@ -24,6 +24,7 @@
24#include <linux/io.h> 24#include <linux/io.h>
25 25
26#include <mach/hardware.h> 26#include <mach/hardware.h>
27#include <mach/irqs.h>
27#include <mach/netx-regs.h> 28#include <mach/netx-regs.h>
28 29
29#include <mach/xc.h> 30#include <mach/xc.h>
diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c
index 7de7c6915584..4474da7bc88a 100644
--- a/arch/arm/mach-omap1/mcbsp.c
+++ b/arch/arm/mach-omap1/mcbsp.c
@@ -18,6 +18,7 @@
18#include <linux/platform_device.h> 18#include <linux/platform_device.h>
19 19
20#include <mach/dma.h> 20#include <mach/dma.h>
21#include <mach/irqs.h>
21#include <mach/mux.h> 22#include <mach/mux.h>
22#include <mach/cpu.h> 23#include <mach/cpu.h>
23#include <mach/mcbsp.h> 24#include <mach/mcbsp.h>
diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c
index cae3ebe249b3..acdc709901cd 100644
--- a/arch/arm/mach-omap2/mcbsp.c
+++ b/arch/arm/mach-omap2/mcbsp.c
@@ -18,6 +18,7 @@
18#include <linux/platform_device.h> 18#include <linux/platform_device.h>
19 19
20#include <mach/dma.h> 20#include <mach/dma.h>
21#include <mach/irqs.h>
21#include <mach/mux.h> 22#include <mach/mux.h>
22#include <mach/cpu.h> 23#include <mach/cpu.h>
23#include <mach/mcbsp.h> 24#include <mach/mcbsp.h>
diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c
index a7b7d77b1b09..d0a785a3b880 100644
--- a/arch/arm/mach-orion5x/pci.c
+++ b/arch/arm/mach-orion5x/pci.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/pci.h> 14#include <linux/pci.h>
15#include <linux/mbus.h> 15#include <linux/mbus.h>
16#include <asm/irq.h>
16#include <asm/mach/pci.h> 17#include <asm/mach/pci.h>
17#include <plat/pcie.h> 18#include <plat/pcie.h>
18#include "common.h" 19#include "common.h"
diff --git a/arch/arm/mach-pnx4008/gpio.c b/arch/arm/mach-pnx4008/gpio.c
index 015cc21d5f55..f219914f5b29 100644
--- a/arch/arm/mach-pnx4008/gpio.c
+++ b/arch/arm/mach-pnx4008/gpio.c
@@ -18,6 +18,7 @@
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/io.h> 20#include <linux/io.h>
21#include <mach/hardware.h>
21#include <mach/platform.h> 22#include <mach/platform.h>
22#include <mach/gpio.h> 23#include <mach/gpio.h>
23 24
diff --git a/arch/arm/mach-pnx4008/i2c.c b/arch/arm/mach-pnx4008/i2c.c
index 87c093286ff9..f3fea29c00d3 100644
--- a/arch/arm/mach-pnx4008/i2c.c
+++ b/arch/arm/mach-pnx4008/i2c.c
@@ -15,6 +15,7 @@
15#include <linux/platform_device.h> 15#include <linux/platform_device.h>
16#include <linux/err.h> 16#include <linux/err.h>
17#include <mach/platform.h> 17#include <mach/platform.h>
18#include <mach/irqs.h>
18#include <mach/i2c.h> 19#include <mach/i2c.h>
19 20
20static int set_clock_run(struct platform_device *pdev) 21static int set_clock_run(struct platform_device *pdev)
diff --git a/arch/arm/mach-pxa/e350.c b/arch/arm/mach-pxa/e350.c
index 251129391d7d..edcd9d5ce545 100644
--- a/arch/arm/mach-pxa/e350.c
+++ b/arch/arm/mach-pxa/e350.c
@@ -20,6 +20,7 @@
20#include <asm/mach/arch.h> 20#include <asm/mach/arch.h>
21#include <asm/mach-types.h> 21#include <asm/mach-types.h>
22 22
23#include <mach/irqs.h>
23#include <mach/mfp-pxa25x.h> 24#include <mach/mfp-pxa25x.h>
24#include <mach/pxa-regs.h> 25#include <mach/pxa-regs.h>
25#include <mach/hardware.h> 26#include <mach/hardware.h>
diff --git a/arch/arm/mach-pxa/e400.c b/arch/arm/mach-pxa/e400.c
index bed0336aca3d..77bb8e2c48c0 100644
--- a/arch/arm/mach-pxa/e400.c
+++ b/arch/arm/mach-pxa/e400.c
@@ -28,6 +28,7 @@
28#include <mach/eseries-gpio.h> 28#include <mach/eseries-gpio.h>
29#include <mach/pxafb.h> 29#include <mach/pxafb.h>
30#include <mach/udc.h> 30#include <mach/udc.h>
31#include <mach/irqs.h>
31 32
32#include "generic.h" 33#include "generic.h"
33#include "eseries.h" 34#include "eseries.h"
diff --git a/arch/arm/mach-pxa/e740.c b/arch/arm/mach-pxa/e740.c
index b00d670b2ea6..6d48e00f4f0b 100644
--- a/arch/arm/mach-pxa/e740.c
+++ b/arch/arm/mach-pxa/e740.c
@@ -30,6 +30,7 @@
30#include <mach/eseries-gpio.h> 30#include <mach/eseries-gpio.h>
31#include <mach/udc.h> 31#include <mach/udc.h>
32#include <mach/irda.h> 32#include <mach/irda.h>
33#include <mach/irqs.h>
33 34
34#include "generic.h" 35#include "generic.h"
35#include "eseries.h" 36#include "eseries.h"
diff --git a/arch/arm/mach-pxa/e750.c b/arch/arm/mach-pxa/e750.c
index 84d7c1aac58d..be1ab8edb973 100644
--- a/arch/arm/mach-pxa/e750.c
+++ b/arch/arm/mach-pxa/e750.c
@@ -29,6 +29,7 @@
29#include <mach/eseries-gpio.h> 29#include <mach/eseries-gpio.h>
30#include <mach/udc.h> 30#include <mach/udc.h>
31#include <mach/irda.h> 31#include <mach/irda.h>
32#include <mach/irqs.h>
32 33
33#include "generic.h" 34#include "generic.h"
34#include "eseries.h" 35#include "eseries.h"
@@ -105,6 +106,57 @@ static struct platform_device e750_fb_device = {
105 .resource = e750_fb_resources, 106 .resource = e750_fb_resources,
106}; 107};
107 108
109/* -------------------- e750 MFP parameters -------------------- */
110
111static unsigned long e750_pin_config[] __initdata = {
112 /* Chip selects */
113 GPIO15_nCS_1, /* CS1 - Flash */
114 GPIO79_nCS_3, /* CS3 - IMAGEON */
115 GPIO80_nCS_4, /* CS4 - TMIO */
116
117 /* Clocks */
118 GPIO11_3_6MHz,
119
120 /* BTUART */
121 GPIO42_BTUART_RXD,
122 GPIO43_BTUART_TXD,
123 GPIO44_BTUART_CTS,
124
125 /* TMIO controller */
126 GPIO19_GPIO, /* t7l66xb #PCLR */
127 GPIO45_GPIO, /* t7l66xb #SUSPEND (NOT BTUART!) */
128
129 /* UDC */
130 GPIO13_GPIO,
131 GPIO3_GPIO,
132
133 /* IrDA */
134 GPIO38_GPIO | MFP_LPM_DRIVE_HIGH,
135
136 /* PC Card */
137 GPIO8_GPIO, /* CD0 */
138 GPIO44_GPIO, /* CD1 */
139 GPIO11_GPIO, /* IRQ0 */
140 GPIO6_GPIO, /* IRQ1 */
141 GPIO27_GPIO, /* RST0 */
142 GPIO24_GPIO, /* RST1 */
143 GPIO20_GPIO, /* PWR0 */
144 GPIO23_GPIO, /* PWR1 */
145 GPIO48_nPOE,
146 GPIO49_nPWE,
147 GPIO50_nPIOR,
148 GPIO51_nPIOW,
149 GPIO52_nPCE_1,
150 GPIO53_nPCE_2,
151 GPIO54_nPSKTSEL,
152 GPIO55_nPREG,
153 GPIO56_nPWAIT,
154 GPIO57_nIOIS16,
155
156 /* wakeup */
157 GPIO0_GPIO | WAKEUP_ON_EDGE_RISE,
158};
159
108/* ----------------- e750 tc6393xb parameters ------------------ */ 160/* ----------------- e750 tc6393xb parameters ------------------ */
109 161
110static struct tc6393xb_platform_data e750_tc6393xb_info = { 162static struct tc6393xb_platform_data e750_tc6393xb_info = {
@@ -137,6 +189,7 @@ static struct platform_device *devices[] __initdata = {
137 189
138static void __init e750_init(void) 190static void __init e750_init(void)
139{ 191{
192 pxa2xx_mfp_config(ARRAY_AND_SIZE(e750_pin_config));
140 clk_add_alias("CLK_CK3P6MI", &e750_tc6393xb_device.dev, 193 clk_add_alias("CLK_CK3P6MI", &e750_tc6393xb_device.dev,
141 "GPIO11_CLK", NULL), 194 "GPIO11_CLK", NULL),
142 eseries_get_tmio_gpios(); 195 eseries_get_tmio_gpios();
diff --git a/arch/arm/mach-pxa/e800.c b/arch/arm/mach-pxa/e800.c
index 9a86a426f924..cc9b1293e866 100644
--- a/arch/arm/mach-pxa/e800.c
+++ b/arch/arm/mach-pxa/e800.c
@@ -28,6 +28,7 @@
28#include <mach/hardware.h> 28#include <mach/hardware.h>
29#include <mach/eseries-gpio.h> 29#include <mach/eseries-gpio.h>
30#include <mach/udc.h> 30#include <mach/udc.h>
31#include <mach/irqs.h>
31 32
32#include "generic.h" 33#include "generic.h"
33#include "eseries.h" 34#include "eseries.h"
diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h b/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h
index b1fcd10ab6c6..bcf3fb2c4b3a 100644
--- a/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h
+++ b/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h
@@ -193,10 +193,8 @@
193#define CKEN_MINI_IM 48 /* < Mini-IM */ 193#define CKEN_MINI_IM 48 /* < Mini-IM */
194#define CKEN_MINI_LCD 49 /* < Mini LCD */ 194#define CKEN_MINI_LCD 49 /* < Mini LCD */
195 195
196#if defined(CONFIG_CPU_PXA310)
197#define CKEN_MMC3 5 /* < MMC3 Clock Enable */ 196#define CKEN_MMC3 5 /* < MMC3 Clock Enable */
198#define CKEN_MVED 43 /* < MVED clock enable */ 197#define CKEN_MVED 43 /* < MVED clock enable */
199#endif
200 198
201/* Note: GCU clock enable bit differs on PXA300/PXA310 and PXA320 */ 199/* Note: GCU clock enable bit differs on PXA300/PXA310 and PXA320 */
202#define PXA300_CKEN_GRAPHICS 42 /* Graphics controller clock enable */ 200#define PXA300_CKEN_GRAPHICS 42 /* Graphics controller clock enable */
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index 8fce85f33033..ea3c75595fa9 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -12,6 +12,7 @@
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/device.h> 14#include <linux/device.h>
15#include <linux/jiffies.h>
15#include <linux/smp.h> 16#include <linux/smp.h>
16#include <linux/io.h> 17#include <linux/io.h>
17 18
diff --git a/arch/arm/mach-s3c2410/include/mach/gpio.h b/arch/arm/mach-s3c2410/include/mach/gpio.h
index e0349af8a483..00476a573bbe 100644
--- a/arch/arm/mach-s3c2410/include/mach/gpio.h
+++ b/arch/arm/mach-s3c2410/include/mach/gpio.h
@@ -14,6 +14,7 @@
14#define gpio_get_value __gpio_get_value 14#define gpio_get_value __gpio_get_value
15#define gpio_set_value __gpio_set_value 15#define gpio_set_value __gpio_set_value
16#define gpio_cansleep __gpio_cansleep 16#define gpio_cansleep __gpio_cansleep
17#define gpio_to_irq __gpio_to_irq
17 18
18/* some boards require extra gpio capacity to support external 19/* some boards require extra gpio capacity to support external
19 * devices that need GPIO. 20 * devices that need GPIO.
diff --git a/arch/arm/mach-s3c2410/include/mach/irqs.h b/arch/arm/mach-s3c2410/include/mach/irqs.h
index 9565903d490b..49efce8cd4a7 100644
--- a/arch/arm/mach-s3c2410/include/mach/irqs.h
+++ b/arch/arm/mach-s3c2410/include/mach/irqs.h
@@ -12,10 +12,6 @@
12#ifndef __ASM_ARCH_IRQS_H 12#ifndef __ASM_ARCH_IRQS_H
13#define __ASM_ARCH_IRQS_H __FILE__ 13#define __ASM_ARCH_IRQS_H __FILE__
14 14
15#ifndef __ASM_ARM_IRQ_H
16#error "Do not include this directly, instead #include <asm/irq.h>"
17#endif
18
19/* we keep the first set of CPU IRQs out of the range of 15/* we keep the first set of CPU IRQs out of the range of
20 * the ISA space, so that the PC104 has them to itself 16 * the ISA space, so that the PC104 has them to itself
21 * and we don't end up having to do horrible things to the 17 * and we don't end up having to do horrible things to the
diff --git a/arch/arm/mach-s3c2440/mach-at2440evb.c b/arch/arm/mach-s3c2440/mach-at2440evb.c
index 0a6d0a5d961b..315c42e31278 100644
--- a/arch/arm/mach-s3c2440/mach-at2440evb.c
+++ b/arch/arm/mach-s3c2440/mach-at2440evb.c
@@ -47,7 +47,7 @@
47#include <plat/clock.h> 47#include <plat/clock.h>
48#include <plat/devs.h> 48#include <plat/devs.h>
49#include <plat/cpu.h> 49#include <plat/cpu.h>
50#include <asm/plat-s3c24xx/mci.h> 50#include <plat/mci.h>
51 51
52static struct map_desc at2440evb_iodesc[] __initdata = { 52static struct map_desc at2440evb_iodesc[] __initdata = {
53 /* Nothing here */ 53 /* Nothing here */
diff --git a/arch/arm/mach-s3c6400/include/mach/irqs.h b/arch/arm/mach-s3c6400/include/mach/irqs.h
index b38c47cffc28..4c97f9a4370b 100644
--- a/arch/arm/mach-s3c6400/include/mach/irqs.h
+++ b/arch/arm/mach-s3c6400/include/mach/irqs.h
@@ -11,10 +11,6 @@
11#ifndef __ASM_ARCH_IRQS_H 11#ifndef __ASM_ARCH_IRQS_H
12#define __ASM_ARCH_IRQS_H __FILE__ 12#define __ASM_ARCH_IRQS_H __FILE__
13 13
14#ifndef __ASM_ARM_IRQ_H
15#error "Do not include this directly, instead #include <asm/irq.h>"
16#endif
17
18#include <plat/irqs.h> 14#include <plat/irqs.h>
19 15
20#endif /* __ASM_ARCH_IRQ_H */ 16#endif /* __ASM_ARCH_IRQ_H */
diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c
index 89a6ab0b7db8..467531edefd3 100644
--- a/arch/arm/plat-omap/i2c.c
+++ b/arch/arm/plat-omap/i2c.c
@@ -26,6 +26,7 @@
26#include <linux/kernel.h> 26#include <linux/kernel.h>
27#include <linux/platform_device.h> 27#include <linux/platform_device.h>
28#include <linux/i2c.h> 28#include <linux/i2c.h>
29#include <mach/irqs.h>
29#include <mach/mux.h> 30#include <mach/mux.h>
30 31
31#define OMAP_I2C_SIZE 0x3f 32#define OMAP_I2C_SIZE 0x3f
diff --git a/arch/arm/plat-s3c/dev-fb.c b/arch/arm/plat-s3c/dev-fb.c
index 0454b8ec02e2..a90198fc4b0f 100644
--- a/arch/arm/plat-s3c/dev-fb.c
+++ b/arch/arm/plat-s3c/dev-fb.c
@@ -16,6 +16,7 @@
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17#include <linux/fb.h> 17#include <linux/fb.h>
18 18
19#include <mach/irqs.h>
19#include <mach/map.h> 20#include <mach/map.h>
20#include <mach/regs-fb.h> 21#include <mach/regs-fb.h>
21 22
diff --git a/arch/arm/plat-s3c/dev-i2c0.c b/arch/arm/plat-s3c/dev-i2c0.c
index 2c0128c77c6e..fe327074037e 100644
--- a/arch/arm/plat-s3c/dev-i2c0.c
+++ b/arch/arm/plat-s3c/dev-i2c0.c
@@ -15,6 +15,7 @@
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17 17
18#include <mach/irqs.h>
18#include <mach/map.h> 19#include <mach/map.h>
19 20
20#include <plat/regs-iic.h> 21#include <plat/regs-iic.h>
diff --git a/arch/arm/plat-s3c/dev-i2c1.c b/arch/arm/plat-s3c/dev-i2c1.c
index 9658fb0aec95..2387fbf57af6 100644
--- a/arch/arm/plat-s3c/dev-i2c1.c
+++ b/arch/arm/plat-s3c/dev-i2c1.c
@@ -15,6 +15,7 @@
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17 17
18#include <mach/irqs.h>
18#include <mach/map.h> 19#include <mach/map.h>
19 20
20#include <plat/regs-iic.h> 21#include <plat/regs-iic.h>
diff --git a/arch/arm/plat-s3c24xx/gpiolib.c b/arch/arm/plat-s3c24xx/gpiolib.c
index f95c6c9d9f1a..94a341aaa4e4 100644
--- a/arch/arm/plat-s3c24xx/gpiolib.c
+++ b/arch/arm/plat-s3c24xx/gpiolib.c
@@ -59,6 +59,22 @@ static int s3c24xx_gpiolib_banka_output(struct gpio_chip *chip,
59 return 0; 59 return 0;
60} 60}
61 61
62static int s3c24xx_gpiolib_bankf_toirq(struct gpio_chip *chip, unsigned offset)
63{
64 if (offset < 4)
65 return IRQ_EINT0 + offset;
66
67 if (offset < 8)
68 return IRQ_EINT4 + offset - 4;
69
70 return -EINVAL;
71}
72
73static int s3c24xx_gpiolib_bankg_toirq(struct gpio_chip *chip, unsigned offset)
74{
75 return IRQ_EINT8 + offset;
76}
77
62struct s3c_gpio_chip s3c24xx_gpios[] = { 78struct s3c_gpio_chip s3c24xx_gpios[] = {
63 [0] = { 79 [0] = {
64 .base = S3C24XX_GPIO_BASE(S3C2410_GPA0), 80 .base = S3C24XX_GPIO_BASE(S3C2410_GPA0),
@@ -114,6 +130,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
114 .owner = THIS_MODULE, 130 .owner = THIS_MODULE,
115 .label = "GPIOF", 131 .label = "GPIOF",
116 .ngpio = 8, 132 .ngpio = 8,
133 .to_irq = s3c24xx_gpiolib_bankf_toirq,
117 }, 134 },
118 }, 135 },
119 [6] = { 136 [6] = {
@@ -123,6 +140,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
123 .owner = THIS_MODULE, 140 .owner = THIS_MODULE,
124 .label = "GPIOG", 141 .label = "GPIOG",
125 .ngpio = 10, 142 .ngpio = 10,
143 .to_irq = s3c24xx_gpiolib_bankg_toirq,
126 }, 144 },
127 }, 145 },
128}; 146};
diff --git a/arch/arm/plat-s3c24xx/pwm.c b/arch/arm/plat-s3c24xx/pwm.c
index ec56b88866c4..0120b760315b 100644
--- a/arch/arm/plat-s3c24xx/pwm.c
+++ b/arch/arm/plat-s3c24xx/pwm.c
@@ -19,6 +19,8 @@
19#include <linux/io.h> 19#include <linux/io.h>
20#include <linux/pwm.h> 20#include <linux/pwm.h>
21 21
22#include <mach/irqs.h>
23
22#include <plat/devs.h> 24#include <plat/devs.h>
23#include <plat/regs-timer.h> 25#include <plat/regs-timer.h>
24 26
diff --git a/arch/arm/plat-s3c64xx/include/plat/irqs.h b/arch/arm/plat-s3c64xx/include/plat/irqs.h
index 02e8dd4c97d5..2846f550b727 100644
--- a/arch/arm/plat-s3c64xx/include/plat/irqs.h
+++ b/arch/arm/plat-s3c64xx/include/plat/irqs.h
@@ -191,7 +191,7 @@
191#define IRQ_EINT_GROUP8_BASE (IRQ_EINT_GROUP7_BASE + IRQ_EINT_GROUP7_NR) 191#define IRQ_EINT_GROUP8_BASE (IRQ_EINT_GROUP7_BASE + IRQ_EINT_GROUP7_NR)
192#define IRQ_EINT_GROUP9_BASE (IRQ_EINT_GROUP8_BASE + IRQ_EINT_GROUP8_NR) 192#define IRQ_EINT_GROUP9_BASE (IRQ_EINT_GROUP8_BASE + IRQ_EINT_GROUP8_NR)
193 193
194#define IRQ_EINT_GROUP(group, no) (IRQ_EINT_GROUP##group##__BASE + (x)) 194#define IRQ_EINT_GROUP(group, no) (IRQ_EINT_GROUP##group##_BASE + (no))
195 195
196/* Set the default NR_IRQS */ 196/* Set the default NR_IRQS */
197 197
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 79f25cef32df..84b861316ce7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -108,6 +108,8 @@ config ARCH_NO_VIRT_TO_BUS
108config PPC 108config PPC
109 bool 109 bool
110 default y 110 default y
111 select HAVE_FTRACE_MCOUNT_RECORD
112 select HAVE_DYNAMIC_FTRACE
111 select HAVE_FUNCTION_TRACER 113 select HAVE_FUNCTION_TRACER
112 select ARCH_WANT_OPTIONAL_GPIOLIB 114 select ARCH_WANT_OPTIONAL_GPIOLIB
113 select HAVE_IDE 115 select HAVE_IDE
@@ -326,7 +328,8 @@ config KEXEC
326 328
327config CRASH_DUMP 329config CRASH_DUMP
328 bool "Build a kdump crash kernel" 330 bool "Build a kdump crash kernel"
329 depends on (PPC64 && RELOCATABLE) || 6xx 331 depends on PPC64 || 6xx
332 select RELOCATABLE if PPC64
330 help 333 help
331 Build a kernel suitable for use as a kdump capture kernel. 334 Build a kernel suitable for use as a kdump capture kernel.
332 The same kernel binary can be used as production kernel and dump 335 The same kernel binary can be used as production kernel and dump
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index ab6dda372438..e84df338ea29 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -356,7 +356,7 @@ $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y))
356 @rm -f $@; ln $< $@ 356 @rm -f $@; ln $< $@
357 357
358install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) 358install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
359 sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $< 359 sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^
360 360
361# anything not in $(targets) 361# anything not in $(targets)
362clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ 362clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts
index 14534d04e4db..6e34f170fa62 100644
--- a/arch/powerpc/boot/dts/mpc836x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc836x_mds.dts
@@ -69,8 +69,18 @@
69 }; 69 };
70 70
71 bcsr@1,0 { 71 bcsr@1,0 {
72 #address-cells = <1>;
73 #size-cells = <1>;
72 compatible = "fsl,mpc8360mds-bcsr"; 74 compatible = "fsl,mpc8360mds-bcsr";
73 reg = <1 0 0x8000>; 75 reg = <1 0 0x8000>;
76 ranges = <0 1 0 0x8000>;
77
78 bcsr13: gpio-controller@d {
79 #gpio-cells = <2>;
80 compatible = "fsl,mpc8360mds-bcsr-gpio";
81 reg = <0xd 1>;
82 gpio-controller;
83 };
74 }; 84 };
75 }; 85 };
76 86
@@ -195,10 +205,21 @@
195 }; 205 };
196 206
197 par_io@1400 { 207 par_io@1400 {
208 #address-cells = <1>;
209 #size-cells = <1>;
198 reg = <0x1400 0x100>; 210 reg = <0x1400 0x100>;
211 ranges = <0 0x1400 0x100>;
199 device_type = "par_io"; 212 device_type = "par_io";
200 num-ports = <7>; 213 num-ports = <7>;
201 214
215 qe_pio_b: gpio-controller@18 {
216 #gpio-cells = <2>;
217 compatible = "fsl,mpc8360-qe-pario-bank",
218 "fsl,mpc8323-qe-pario-bank";
219 reg = <0x18 0x18>;
220 gpio-controller;
221 };
222
202 pio1: ucc_pin@01 { 223 pio1: ucc_pin@01 {
203 pio-map = < 224 pio-map = <
204 /* port pin dir open_drain assignment has_irq */ 225 /* port pin dir open_drain assignment has_irq */
@@ -282,6 +303,15 @@
282 }; 303 };
283 }; 304 };
284 305
306 timer@440 {
307 compatible = "fsl,mpc8360-qe-gtm",
308 "fsl,qe-gtm", "fsl,gtm";
309 reg = <0x440 0x40>;
310 clock-frequency = <132000000>;
311 interrupts = <12 13 14 15>;
312 interrupt-parent = <&qeic>;
313 };
314
285 spi@4c0 { 315 spi@4c0 {
286 cell-index = <0>; 316 cell-index = <0>;
287 compatible = "fsl,spi"; 317 compatible = "fsl,spi";
@@ -301,11 +331,20 @@
301 }; 331 };
302 332
303 usb@6c0 { 333 usb@6c0 {
304 compatible = "qe_udc"; 334 compatible = "fsl,mpc8360-qe-usb",
335 "fsl,mpc8323-qe-usb";
305 reg = <0x6c0 0x40 0x8b00 0x100>; 336 reg = <0x6c0 0x40 0x8b00 0x100>;
306 interrupts = <11>; 337 interrupts = <11>;
307 interrupt-parent = <&qeic>; 338 interrupt-parent = <&qeic>;
308 mode = "slave"; 339 fsl,fullspeed-clock = "clk21";
340 fsl,lowspeed-clock = "brg9";
341 gpios = <&qe_pio_b 2 0 /* USBOE */
342 &qe_pio_b 3 0 /* USBTP */
343 &qe_pio_b 8 0 /* USBTN */
344 &qe_pio_b 9 0 /* USBRP */
345 &qe_pio_b 11 0 /* USBRN */
346 &bcsr13 5 0 /* SPEED */
347 &bcsr13 4 1>; /* POWER */
309 }; 348 };
310 349
311 enet0: ucc@2000 { 350 enet0: ucc@2000 {
diff --git a/arch/powerpc/boot/dts/mpc836x_rdk.dts b/arch/powerpc/boot/dts/mpc836x_rdk.dts
index decadf3d9e98..37b789510d68 100644
--- a/arch/powerpc/boot/dts/mpc836x_rdk.dts
+++ b/arch/powerpc/boot/dts/mpc836x_rdk.dts
@@ -218,8 +218,23 @@
218 reg = <0x440 0x40>; 218 reg = <0x440 0x40>;
219 interrupts = <12 13 14 15>; 219 interrupts = <12 13 14 15>;
220 interrupt-parent = <&qeic>; 220 interrupt-parent = <&qeic>;
221 /* filled by u-boot */ 221 clock-frequency = <166666666>;
222 clock-frequency = <0>; 222 };
223
224 usb@6c0 {
225 compatible = "fsl,mpc8360-qe-usb",
226 "fsl,mpc8323-qe-usb";
227 reg = <0x6c0 0x40 0x8b00 0x100>;
228 interrupts = <11>;
229 interrupt-parent = <&qeic>;
230 fsl,fullspeed-clock = "clk21";
231 gpios = <&qe_pio_b 2 0 /* USBOE */
232 &qe_pio_b 3 0 /* USBTP */
233 &qe_pio_b 8 0 /* USBTN */
234 &qe_pio_b 9 0 /* USBRP */
235 &qe_pio_b 11 0 /* USBRN */
236 &qe_pio_e 20 0 /* SPEED */
237 &qe_pio_e 21 1 /* POWER */>;
223 }; 238 };
224 239
225 spi@4c0 { 240 spi@4c0 {
diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
index 35d5e248ccd7..4481532cbe77 100644
--- a/arch/powerpc/boot/dts/mpc8641_hpcn.dts
+++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
@@ -26,7 +26,13 @@
26 serial1 = &serial1; 26 serial1 = &serial1;
27 pci0 = &pci0; 27 pci0 = &pci0;
28 pci1 = &pci1; 28 pci1 = &pci1;
29 rapidio0 = &rapidio0; 29/*
30 * Only one of Rapid IO or PCI can be present due to HW limitations and
31 * due to the fact that the 2 now share address space in the new memory
32 * map. The most likely case is that we have PCI, so comment out the
33 * rapidio node. Leave it here for reference.
34 */
35 /* rapidio0 = &rapidio0; */
30 }; 36 };
31 37
32 cpus { 38 cpus {
@@ -62,18 +68,17 @@
62 reg = <0x00000000 0x40000000>; // 1G at 0x0 68 reg = <0x00000000 0x40000000>; // 1G at 0x0
63 }; 69 };
64 70
65 localbus@f8005000 { 71 localbus@ffe05000 {
66 #address-cells = <2>; 72 #address-cells = <2>;
67 #size-cells = <1>; 73 #size-cells = <1>;
68 compatible = "fsl,mpc8641-localbus", "simple-bus"; 74 compatible = "fsl,mpc8641-localbus", "simple-bus";
69 reg = <0xf8005000 0x1000>; 75 reg = <0xffe05000 0x1000>;
70 interrupts = <19 2>; 76 interrupts = <19 2>;
71 interrupt-parent = <&mpic>; 77 interrupt-parent = <&mpic>;
72 78
73 ranges = <0 0 0xff800000 0x00800000 79 ranges = <0 0 0xef800000 0x00800000
74 1 0 0xfe000000 0x01000000 80 2 0 0xffdf8000 0x00008000
75 2 0 0xf8200000 0x00100000 81 3 0 0xffdf0000 0x00008000>;
76 3 0 0xf8100000 0x00100000>;
77 82
78 flash@0,0 { 83 flash@0,0 {
79 compatible = "cfi-flash"; 84 compatible = "cfi-flash";
@@ -103,13 +108,13 @@
103 }; 108 };
104 }; 109 };
105 110
106 soc8641@f8000000 { 111 soc8641@ffe00000 {
107 #address-cells = <1>; 112 #address-cells = <1>;
108 #size-cells = <1>; 113 #size-cells = <1>;
109 device_type = "soc"; 114 device_type = "soc";
110 compatible = "simple-bus"; 115 compatible = "simple-bus";
111 ranges = <0x00000000 0xf8000000 0x00100000>; 116 ranges = <0x00000000 0xffe00000 0x00100000>;
112 reg = <0xf8000000 0x00001000>; // CCSRBAR 117 reg = <0xffe00000 0x00001000>; // CCSRBAR
113 bus-frequency = <0>; 118 bus-frequency = <0>;
114 119
115 i2c@3000 { 120 i2c@3000 {
@@ -340,17 +345,17 @@
340 }; 345 };
341 }; 346 };
342 347
343 pci0: pcie@f8008000 { 348 pci0: pcie@ffe08000 {
344 cell-index = <0>; 349 cell-index = <0>;
345 compatible = "fsl,mpc8641-pcie"; 350 compatible = "fsl,mpc8641-pcie";
346 device_type = "pci"; 351 device_type = "pci";
347 #interrupt-cells = <1>; 352 #interrupt-cells = <1>;
348 #size-cells = <2>; 353 #size-cells = <2>;
349 #address-cells = <3>; 354 #address-cells = <3>;
350 reg = <0xf8008000 0x1000>; 355 reg = <0xffe08000 0x1000>;
351 bus-range = <0x0 0xff>; 356 bus-range = <0x0 0xff>;
352 ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000 357 ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000
353 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>; 358 0x01000000 0x0 0x00000000 0xffc00000 0x0 0x00010000>;
354 clock-frequency = <33333333>; 359 clock-frequency = <33333333>;
355 interrupt-parent = <&mpic>; 360 interrupt-parent = <&mpic>;
356 interrupts = <24 2>; 361 interrupts = <24 2>;
@@ -481,7 +486,7 @@
481 486
482 0x01000000 0x0 0x00000000 487 0x01000000 0x0 0x00000000
483 0x01000000 0x0 0x00000000 488 0x01000000 0x0 0x00000000
484 0x0 0x00100000>; 489 0x0 0x00010000>;
485 uli1575@0 { 490 uli1575@0 {
486 reg = <0 0 0 0 0>; 491 reg = <0 0 0 0 0>;
487 #size-cells = <2>; 492 #size-cells = <2>;
@@ -491,7 +496,7 @@
491 0x0 0x20000000 496 0x0 0x20000000
492 0x01000000 0x0 0x00000000 497 0x01000000 0x0 0x00000000
493 0x01000000 0x0 0x00000000 498 0x01000000 0x0 0x00000000
494 0x0 0x00100000>; 499 0x0 0x00010000>;
495 isa@1e { 500 isa@1e {
496 device_type = "isa"; 501 device_type = "isa";
497 #interrupt-cells = <2>; 502 #interrupt-cells = <2>;
@@ -549,17 +554,17 @@
549 554
550 }; 555 };
551 556
552 pci1: pcie@f8009000 { 557 pci1: pcie@ffe09000 {
553 cell-index = <1>; 558 cell-index = <1>;
554 compatible = "fsl,mpc8641-pcie"; 559 compatible = "fsl,mpc8641-pcie";
555 device_type = "pci"; 560 device_type = "pci";
556 #interrupt-cells = <1>; 561 #interrupt-cells = <1>;
557 #size-cells = <2>; 562 #size-cells = <2>;
558 #address-cells = <3>; 563 #address-cells = <3>;
559 reg = <0xf8009000 0x1000>; 564 reg = <0xffe09000 0x1000>;
560 bus-range = <0 0xff>; 565 bus-range = <0 0xff>;
561 ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000 566 ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
562 0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>; 567 0x01000000 0x0 0x00000000 0xffc10000 0x0 0x00010000>;
563 clock-frequency = <33333333>; 568 clock-frequency = <33333333>;
564 interrupt-parent = <&mpic>; 569 interrupt-parent = <&mpic>;
565 interrupts = <25 2>; 570 interrupts = <25 2>;
@@ -582,18 +587,21 @@
582 587
583 0x01000000 0x0 0x00000000 588 0x01000000 0x0 0x00000000
584 0x01000000 0x0 0x00000000 589 0x01000000 0x0 0x00000000
585 0x0 0x00100000>; 590 0x0 0x00010000>;
586 }; 591 };
587 }; 592 };
588 rapidio0: rapidio@f80c0000 { 593/*
594 rapidio0: rapidio@ffec0000 {
589 #address-cells = <2>; 595 #address-cells = <2>;
590 #size-cells = <2>; 596 #size-cells = <2>;
591 compatible = "fsl,rapidio-delta"; 597 compatible = "fsl,rapidio-delta";
592 reg = <0xf80c0000 0x20000>; 598 reg = <0xffec0000 0x20000>;
593 ranges = <0 0 0xc0000000 0 0x20000000>; 599 ranges = <0 0 0x80000000 0 0x20000000>;
594 interrupt-parent = <&mpic>; 600 interrupt-parent = <&mpic>;
595 /* err_irq bell_outb_irq bell_inb_irq 601 // err_irq bell_outb_irq bell_inb_irq
596 msg1_tx_irq msg1_rx_irq msg2_tx_irq msg2_rx_irq */ 602 // msg1_tx_irq msg1_rx_irq msg2_tx_irq msg2_rx_irq
597 interrupts = <48 2 49 2 50 2 53 2 54 2 55 2 56 2>; 603 interrupts = <48 2 49 2 50 2 53 2 54 2 55 2 56 2>;
598 }; 604 };
605*/
606
599}; 607};
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
index b002bfd56786..51b2387bdba0 100644
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -15,7 +15,7 @@
15# $2 - kernel image file 15# $2 - kernel image file
16# $3 - kernel map file 16# $3 - kernel map file
17# $4 - default install path (blank if root directory) 17# $4 - default install path (blank if root directory)
18# $5 - kernel boot file, the zImage 18# $5 and more - kernel boot files; zImage*, uImage, cuImage.*, etc.
19# 19#
20 20
21# User may have a custom install script 21# User may have a custom install script
@@ -38,3 +38,15 @@ fi
38 38
39cat $2 > $4/$image_name 39cat $2 > $4/$image_name
40cp $3 $4/System.map 40cp $3 $4/System.map
41
42# Copy all the bootable image files
43path=$4
44shift 4
45while [ $# -ne 0 ]; do
46 image_name=`basename $1`
47 if [ -f $path/$image_name ]; then
48 mv $path/$image_name $path/$image_name.old
49 fi
50 cat $1 > $path/$image_name
51 shift
52done;
diff --git a/arch/powerpc/configs/85xx/mpc8572_ds_defconfig b/arch/powerpc/configs/85xx/mpc8572_ds_defconfig
index 635588319e0d..32aeb79216f7 100644
--- a/arch/powerpc/configs/85xx/mpc8572_ds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8572_ds_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.28-rc3 3# Linux kernel version: 2.6.28-rc8
4# Sat Nov 8 12:40:13 2008 4# Tue Dec 30 11:17:46 2008
5# 5#
6# CONFIG_PPC64 is not set 6# CONFIG_PPC64 is not set
7 7
@@ -21,7 +21,10 @@ CONFIG_FSL_BOOKE=y
21CONFIG_FSL_EMB_PERFMON=y 21CONFIG_FSL_EMB_PERFMON=y
22# CONFIG_PHYS_64BIT is not set 22# CONFIG_PHYS_64BIT is not set
23CONFIG_SPE=y 23CONFIG_SPE=y
24CONFIG_PPC_MMU_NOHASH=y
24# CONFIG_PPC_MM_SLICES is not set 25# CONFIG_PPC_MM_SLICES is not set
26CONFIG_SMP=y
27CONFIG_NR_CPUS=2
25CONFIG_PPC32=y 28CONFIG_PPC32=y
26CONFIG_WORD_SIZE=32 29CONFIG_WORD_SIZE=32
27# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set 30# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
@@ -50,7 +53,7 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y
50CONFIG_PPC_OF=y 53CONFIG_PPC_OF=y
51CONFIG_OF=y 54CONFIG_OF=y
52CONFIG_PPC_UDBG_16550=y 55CONFIG_PPC_UDBG_16550=y
53# CONFIG_GENERIC_TBSYNC is not set 56CONFIG_GENERIC_TBSYNC=y
54CONFIG_AUDIT_ARCH=y 57CONFIG_AUDIT_ARCH=y
55CONFIG_GENERIC_BUG=y 58CONFIG_GENERIC_BUG=y
56CONFIG_DEFAULT_UIMAGE=y 59CONFIG_DEFAULT_UIMAGE=y
@@ -62,7 +65,7 @@ CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
62# General setup 65# General setup
63# 66#
64CONFIG_EXPERIMENTAL=y 67CONFIG_EXPERIMENTAL=y
65CONFIG_BROKEN_ON_SMP=y 68CONFIG_LOCK_KERNEL=y
66CONFIG_INIT_ENV_ARG_LIMIT=32 69CONFIG_INIT_ENV_ARG_LIMIT=32
67CONFIG_LOCALVERSION="" 70CONFIG_LOCALVERSION=""
68CONFIG_LOCALVERSION_AUTO=y 71CONFIG_LOCALVERSION_AUTO=y
@@ -126,6 +129,7 @@ CONFIG_HAVE_IOREMAP_PROT=y
126CONFIG_HAVE_KPROBES=y 129CONFIG_HAVE_KPROBES=y
127CONFIG_HAVE_KRETPROBES=y 130CONFIG_HAVE_KRETPROBES=y
128CONFIG_HAVE_ARCH_TRACEHOOK=y 131CONFIG_HAVE_ARCH_TRACEHOOK=y
132CONFIG_USE_GENERIC_SMP_HELPERS=y
129# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set 133# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
130CONFIG_SLABINFO=y 134CONFIG_SLABINFO=y
131CONFIG_RT_MUTEXES=y 135CONFIG_RT_MUTEXES=y
@@ -138,6 +142,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
138CONFIG_MODVERSIONS=y 142CONFIG_MODVERSIONS=y
139# CONFIG_MODULE_SRCVERSION_ALL is not set 143# CONFIG_MODULE_SRCVERSION_ALL is not set
140CONFIG_KMOD=y 144CONFIG_KMOD=y
145CONFIG_STOP_MACHINE=y
141CONFIG_BLOCK=y 146CONFIG_BLOCK=y
142CONFIG_LBD=y 147CONFIG_LBD=y
143# CONFIG_BLK_DEV_IO_TRACE is not set 148# CONFIG_BLK_DEV_IO_TRACE is not set
@@ -197,6 +202,7 @@ CONFIG_PPC_I8259=y
197# CONFIG_CPM2 is not set 202# CONFIG_CPM2 is not set
198CONFIG_FSL_ULI1575=y 203CONFIG_FSL_ULI1575=y
199# CONFIG_MPC8xxx_GPIO is not set 204# CONFIG_MPC8xxx_GPIO is not set
205# CONFIG_SIMPLE_GPIO is not set
200 206
201# 207#
202# Kernel options 208# Kernel options
@@ -224,6 +230,7 @@ CONFIG_MATH_EMULATION=y
224CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y 230CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
225CONFIG_ARCH_HAS_WALK_MEMORY=y 231CONFIG_ARCH_HAS_WALK_MEMORY=y
226CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y 232CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
233# CONFIG_IRQ_ALL_CPUS is not set
227CONFIG_ARCH_FLATMEM_ENABLE=y 234CONFIG_ARCH_FLATMEM_ENABLE=y
228CONFIG_ARCH_POPULATES_NODE_MAP=y 235CONFIG_ARCH_POPULATES_NODE_MAP=y
229CONFIG_SELECT_MEMORY_MODEL=y 236CONFIG_SELECT_MEMORY_MODEL=y
@@ -241,6 +248,9 @@ CONFIG_ZONE_DMA_FLAG=1
241CONFIG_BOUNCE=y 248CONFIG_BOUNCE=y
242CONFIG_VIRT_TO_BUS=y 249CONFIG_VIRT_TO_BUS=y
243CONFIG_UNEVICTABLE_LRU=y 250CONFIG_UNEVICTABLE_LRU=y
251CONFIG_PPC_4K_PAGES=y
252# CONFIG_PPC_16K_PAGES is not set
253# CONFIG_PPC_64K_PAGES is not set
244CONFIG_FORCE_MAX_ZONEORDER=11 254CONFIG_FORCE_MAX_ZONEORDER=11
245CONFIG_PROC_DEVICETREE=y 255CONFIG_PROC_DEVICETREE=y
246# CONFIG_CMDLINE_BOOL is not set 256# CONFIG_CMDLINE_BOOL is not set
@@ -443,8 +453,10 @@ CONFIG_MISC_DEVICES=y
443# CONFIG_EEPROM_93CX6 is not set 453# CONFIG_EEPROM_93CX6 is not set
444# CONFIG_SGI_IOC4 is not set 454# CONFIG_SGI_IOC4 is not set
445# CONFIG_TIFM_CORE is not set 455# CONFIG_TIFM_CORE is not set
456# CONFIG_ICS932S401 is not set
446# CONFIG_ENCLOSURE_SERVICES is not set 457# CONFIG_ENCLOSURE_SERVICES is not set
447# CONFIG_HP_ILO is not set 458# CONFIG_HP_ILO is not set
459# CONFIG_C2PORT is not set
448CONFIG_HAVE_IDE=y 460CONFIG_HAVE_IDE=y
449# CONFIG_IDE is not set 461# CONFIG_IDE is not set
450 462
@@ -784,6 +796,7 @@ CONFIG_SERIAL_CORE_CONSOLE=y
784CONFIG_UNIX98_PTYS=y 796CONFIG_UNIX98_PTYS=y
785CONFIG_LEGACY_PTYS=y 797CONFIG_LEGACY_PTYS=y
786CONFIG_LEGACY_PTY_COUNT=256 798CONFIG_LEGACY_PTY_COUNT=256
799# CONFIG_HVC_UDBG is not set
787# CONFIG_IPMI_HANDLER is not set 800# CONFIG_IPMI_HANDLER is not set
788CONFIG_HW_RANDOM=y 801CONFIG_HW_RANDOM=y
789CONFIG_NVRAM=y 802CONFIG_NVRAM=y
@@ -869,11 +882,11 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
869# CONFIG_THERMAL is not set 882# CONFIG_THERMAL is not set
870# CONFIG_THERMAL_HWMON is not set 883# CONFIG_THERMAL_HWMON is not set
871# CONFIG_WATCHDOG is not set 884# CONFIG_WATCHDOG is not set
885CONFIG_SSB_POSSIBLE=y
872 886
873# 887#
874# Sonics Silicon Backplane 888# Sonics Silicon Backplane
875# 889#
876CONFIG_SSB_POSSIBLE=y
877# CONFIG_SSB is not set 890# CONFIG_SSB is not set
878 891
879# 892#
@@ -886,14 +899,7 @@ CONFIG_SSB_POSSIBLE=y
886# CONFIG_PMIC_DA903X is not set 899# CONFIG_PMIC_DA903X is not set
887# CONFIG_MFD_WM8400 is not set 900# CONFIG_MFD_WM8400 is not set
888# CONFIG_MFD_WM8350_I2C is not set 901# CONFIG_MFD_WM8350_I2C is not set
889
890#
891# Voltage and Current regulators
892#
893# CONFIG_REGULATOR is not set 902# CONFIG_REGULATOR is not set
894# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
895# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
896# CONFIG_REGULATOR_BQ24022 is not set
897 903
898# 904#
899# Multimedia devices 905# Multimedia devices
@@ -1252,11 +1258,11 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
1252# CONFIG_USB_TMC is not set 1258# CONFIG_USB_TMC is not set
1253 1259
1254# 1260#
1255# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' 1261# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
1256# 1262#
1257 1263
1258# 1264#
1259# may also be needed; see USB_STORAGE Help for more information 1265# see USB_STORAGE Help for more information
1260# 1266#
1261CONFIG_USB_STORAGE=y 1267CONFIG_USB_STORAGE=y
1262# CONFIG_USB_STORAGE_DEBUG is not set 1268# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1348,6 +1354,7 @@ CONFIG_RTC_INTF_DEV=y
1348# CONFIG_RTC_DRV_M41T80 is not set 1354# CONFIG_RTC_DRV_M41T80 is not set
1349# CONFIG_RTC_DRV_S35390A is not set 1355# CONFIG_RTC_DRV_S35390A is not set
1350# CONFIG_RTC_DRV_FM3130 is not set 1356# CONFIG_RTC_DRV_FM3130 is not set
1357# CONFIG_RTC_DRV_RX8581 is not set
1351 1358
1352# 1359#
1353# SPI RTC drivers 1360# SPI RTC drivers
@@ -1624,6 +1631,7 @@ CONFIG_HAVE_FUNCTION_TRACER=y
1624# CONFIG_SAMPLES is not set 1631# CONFIG_SAMPLES is not set
1625CONFIG_HAVE_ARCH_KGDB=y 1632CONFIG_HAVE_ARCH_KGDB=y
1626# CONFIG_KGDB is not set 1633# CONFIG_KGDB is not set
1634CONFIG_PRINT_STACK_DEPTH=64
1627# CONFIG_DEBUG_STACKOVERFLOW is not set 1635# CONFIG_DEBUG_STACKOVERFLOW is not set
1628# CONFIG_DEBUG_STACK_USAGE is not set 1636# CONFIG_DEBUG_STACK_USAGE is not set
1629# CONFIG_DEBUG_PAGEALLOC is not set 1637# CONFIG_DEBUG_PAGEALLOC is not set
@@ -1649,11 +1657,16 @@ CONFIG_CRYPTO=y
1649# 1657#
1650# CONFIG_CRYPTO_FIPS is not set 1658# CONFIG_CRYPTO_FIPS is not set
1651CONFIG_CRYPTO_ALGAPI=y 1659CONFIG_CRYPTO_ALGAPI=y
1660CONFIG_CRYPTO_ALGAPI2=y
1652CONFIG_CRYPTO_AEAD=y 1661CONFIG_CRYPTO_AEAD=y
1662CONFIG_CRYPTO_AEAD2=y
1653CONFIG_CRYPTO_BLKCIPHER=y 1663CONFIG_CRYPTO_BLKCIPHER=y
1664CONFIG_CRYPTO_BLKCIPHER2=y
1654CONFIG_CRYPTO_HASH=y 1665CONFIG_CRYPTO_HASH=y
1655CONFIG_CRYPTO_RNG=y 1666CONFIG_CRYPTO_HASH2=y
1667CONFIG_CRYPTO_RNG2=y
1656CONFIG_CRYPTO_MANAGER=y 1668CONFIG_CRYPTO_MANAGER=y
1669CONFIG_CRYPTO_MANAGER2=y
1657# CONFIG_CRYPTO_GF128MUL is not set 1670# CONFIG_CRYPTO_GF128MUL is not set
1658# CONFIG_CRYPTO_NULL is not set 1671# CONFIG_CRYPTO_NULL is not set
1659# CONFIG_CRYPTO_CRYPTD is not set 1672# CONFIG_CRYPTO_CRYPTD is not set
diff --git a/arch/powerpc/include/asm/ioctls.h b/arch/powerpc/include/asm/ioctls.h
index 279a6229584b..1842186d872c 100644
--- a/arch/powerpc/include/asm/ioctls.h
+++ b/arch/powerpc/include/asm/ioctls.h
@@ -89,6 +89,8 @@
89#define TIOCSBRK 0x5427 /* BSD compatibility */ 89#define TIOCSBRK 0x5427 /* BSD compatibility */
90#define TIOCCBRK 0x5428 /* BSD compatibility */ 90#define TIOCCBRK 0x5428 /* BSD compatibility */
91#define TIOCGSID 0x5429 /* Return the session ID of FD */ 91#define TIOCGSID 0x5429 /* Return the session ID of FD */
92#define TIOCGRS485 0x542e
93#define TIOCSRS485 0x542f
92#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ 94#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
93#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ 95#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */
94 96
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 6dbffc981702..7e06b43720d3 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -48,63 +48,8 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
48{ 48{
49 if (oldregs) 49 if (oldregs)
50 memcpy(newregs, oldregs, sizeof(*newregs)); 50 memcpy(newregs, oldregs, sizeof(*newregs));
51#ifdef __powerpc64__
52 else {
53 /* FIXME Merge this with xmon_save_regs ?? */
54 unsigned long tmp1, tmp2;
55 __asm__ __volatile__ (
56 "std 0,0(%2)\n"
57 "std 1,8(%2)\n"
58 "std 2,16(%2)\n"
59 "std 3,24(%2)\n"
60 "std 4,32(%2)\n"
61 "std 5,40(%2)\n"
62 "std 6,48(%2)\n"
63 "std 7,56(%2)\n"
64 "std 8,64(%2)\n"
65 "std 9,72(%2)\n"
66 "std 10,80(%2)\n"
67 "std 11,88(%2)\n"
68 "std 12,96(%2)\n"
69 "std 13,104(%2)\n"
70 "std 14,112(%2)\n"
71 "std 15,120(%2)\n"
72 "std 16,128(%2)\n"
73 "std 17,136(%2)\n"
74 "std 18,144(%2)\n"
75 "std 19,152(%2)\n"
76 "std 20,160(%2)\n"
77 "std 21,168(%2)\n"
78 "std 22,176(%2)\n"
79 "std 23,184(%2)\n"
80 "std 24,192(%2)\n"
81 "std 25,200(%2)\n"
82 "std 26,208(%2)\n"
83 "std 27,216(%2)\n"
84 "std 28,224(%2)\n"
85 "std 29,232(%2)\n"
86 "std 30,240(%2)\n"
87 "std 31,248(%2)\n"
88 "mfmsr %0\n"
89 "std %0, 264(%2)\n"
90 "mfctr %0\n"
91 "std %0, 280(%2)\n"
92 "mflr %0\n"
93 "std %0, 288(%2)\n"
94 "bl 1f\n"
95 "1: mflr %1\n"
96 "std %1, 256(%2)\n"
97 "mtlr %0\n"
98 "mfxer %0\n"
99 "std %0, 296(%2)\n"
100 : "=&r" (tmp1), "=&r" (tmp2)
101 : "b" (newregs)
102 : "memory");
103 }
104#else
105 else 51 else
106 ppc_save_regs(newregs); 52 ppc_save_regs(newregs);
107#endif /* __powerpc64__ */
108} 53}
109 54
110extern void kexec_smp_wait(void); /* get and clear naca physid, wait for 55extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index cff30c0ef1ff..eead5c67197a 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -320,6 +320,7 @@ enum ps3_match_id {
320 320
321enum ps3_match_sub_id { 321enum ps3_match_sub_id {
322 PS3_MATCH_SUB_ID_GPU_FB = 1, 322 PS3_MATCH_SUB_ID_GPU_FB = 1,
323 PS3_MATCH_SUB_ID_GPU_RAMDISK = 2,
323}; 324};
324 325
325#define PS3_MODULE_ALIAS_EHCI "ps3:1:0" 326#define PS3_MODULE_ALIAS_EHCI "ps3:1:0"
@@ -332,6 +333,7 @@ enum ps3_match_sub_id {
332#define PS3_MODULE_ALIAS_STOR_FLASH "ps3:8:0" 333#define PS3_MODULE_ALIAS_STOR_FLASH "ps3:8:0"
333#define PS3_MODULE_ALIAS_SOUND "ps3:9:0" 334#define PS3_MODULE_ALIAS_SOUND "ps3:9:0"
334#define PS3_MODULE_ALIAS_GPU_FB "ps3:10:1" 335#define PS3_MODULE_ALIAS_GPU_FB "ps3:10:1"
336#define PS3_MODULE_ALIAS_GPU_RAMDISK "ps3:10:2"
335#define PS3_MODULE_ALIAS_LPM "ps3:11:0" 337#define PS3_MODULE_ALIAS_LPM "ps3:11:0"
336 338
337enum ps3_system_bus_device_type { 339enum ps3_system_bus_device_type {
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index edee15d269ea..a0a15311d0d8 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -17,6 +17,8 @@
17#ifdef __KERNEL__ 17#ifdef __KERNEL__
18 18
19#include <linux/spinlock.h> 19#include <linux/spinlock.h>
20#include <linux/errno.h>
21#include <linux/err.h>
20#include <asm/cpm.h> 22#include <asm/cpm.h>
21#include <asm/immap_qe.h> 23#include <asm/immap_qe.h>
22 24
@@ -84,7 +86,11 @@ static inline bool qe_clock_is_brg(enum qe_clock clk)
84extern spinlock_t cmxgcr_lock; 86extern spinlock_t cmxgcr_lock;
85 87
86/* Export QE common operations */ 88/* Export QE common operations */
89#ifdef CONFIG_QUICC_ENGINE
87extern void __init qe_reset(void); 90extern void __init qe_reset(void);
91#else
92static inline void qe_reset(void) {}
93#endif
88 94
89/* QE PIO */ 95/* QE PIO */
90#define QE_PIO_PINS 32 96#define QE_PIO_PINS 32
@@ -101,16 +107,43 @@ struct qe_pio_regs {
101#endif 107#endif
102}; 108};
103 109
104extern int par_io_init(struct device_node *np);
105extern int par_io_of_config(struct device_node *np);
106#define QE_PIO_DIR_IN 2 110#define QE_PIO_DIR_IN 2
107#define QE_PIO_DIR_OUT 1 111#define QE_PIO_DIR_OUT 1
108extern void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin, 112extern void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin,
109 int dir, int open_drain, int assignment, 113 int dir, int open_drain, int assignment,
110 int has_irq); 114 int has_irq);
115#ifdef CONFIG_QUICC_ENGINE
116extern int par_io_init(struct device_node *np);
117extern int par_io_of_config(struct device_node *np);
111extern int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain, 118extern int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
112 int assignment, int has_irq); 119 int assignment, int has_irq);
113extern int par_io_data_set(u8 port, u8 pin, u8 val); 120extern int par_io_data_set(u8 port, u8 pin, u8 val);
121#else
122static inline int par_io_init(struct device_node *np) { return -ENOSYS; }
123static inline int par_io_of_config(struct device_node *np) { return -ENOSYS; }
124static inline int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
125 int assignment, int has_irq) { return -ENOSYS; }
126static inline int par_io_data_set(u8 port, u8 pin, u8 val) { return -ENOSYS; }
127#endif /* CONFIG_QUICC_ENGINE */
128
129/*
130 * Pin multiplexing functions.
131 */
132struct qe_pin;
133#ifdef CONFIG_QE_GPIO
134extern struct qe_pin *qe_pin_request(struct device_node *np, int index);
135extern void qe_pin_free(struct qe_pin *qe_pin);
136extern void qe_pin_set_gpio(struct qe_pin *qe_pin);
137extern void qe_pin_set_dedicated(struct qe_pin *pin);
138#else
139static inline struct qe_pin *qe_pin_request(struct device_node *np, int index)
140{
141 return ERR_PTR(-ENOSYS);
142}
143static inline void qe_pin_free(struct qe_pin *qe_pin) {}
144static inline void qe_pin_set_gpio(struct qe_pin *qe_pin) {}
145static inline void qe_pin_set_dedicated(struct qe_pin *pin) {}
146#endif /* CONFIG_QE_GPIO */
114 147
115/* QE internal API */ 148/* QE internal API */
116int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input); 149int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input);
diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h
index 56a7745ca343..cf519663a791 100644
--- a/arch/powerpc/include/asm/qe_ic.h
+++ b/arch/powerpc/include/asm/qe_ic.h
@@ -17,6 +17,9 @@
17 17
18#include <linux/irq.h> 18#include <linux/irq.h>
19 19
20struct device_node;
21struct qe_ic;
22
20#define NUM_OF_QE_IC_GROUPS 6 23#define NUM_OF_QE_IC_GROUPS 6
21 24
22/* Flags when we init the QE IC */ 25/* Flags when we init the QE IC */
@@ -54,17 +57,27 @@ enum qe_ic_grp_id {
54 QE_IC_GRP_RISCB /* QE interrupt controller RISC group B */ 57 QE_IC_GRP_RISCB /* QE interrupt controller RISC group B */
55}; 58};
56 59
60#ifdef CONFIG_QUICC_ENGINE
57void qe_ic_init(struct device_node *node, unsigned int flags, 61void qe_ic_init(struct device_node *node, unsigned int flags,
58 void (*low_handler)(unsigned int irq, struct irq_desc *desc), 62 void (*low_handler)(unsigned int irq, struct irq_desc *desc),
59 void (*high_handler)(unsigned int irq, struct irq_desc *desc)); 63 void (*high_handler)(unsigned int irq, struct irq_desc *desc));
64unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
65unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
66#else
67static inline void qe_ic_init(struct device_node *node, unsigned int flags,
68 void (*low_handler)(unsigned int irq, struct irq_desc *desc),
69 void (*high_handler)(unsigned int irq, struct irq_desc *desc))
70{}
71static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic)
72{ return 0; }
73static inline unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic)
74{ return 0; }
75#endif /* CONFIG_QUICC_ENGINE */
76
60void qe_ic_set_highest_priority(unsigned int virq, int high); 77void qe_ic_set_highest_priority(unsigned int virq, int high);
61int qe_ic_set_priority(unsigned int virq, unsigned int priority); 78int qe_ic_set_priority(unsigned int virq, unsigned int priority);
62int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high); 79int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high);
63 80
64struct qe_ic;
65unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
66unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
67
68static inline void qe_ic_cascade_low_ipic(unsigned int irq, 81static inline void qe_ic_cascade_low_ipic(unsigned int irq,
69 struct irq_desc *desc) 82 struct irq_desc *desc)
70{ 83{
diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 8b2eb044270a..0ab8d869e3d6 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -128,7 +128,7 @@ struct spu {
128 int number; 128 int number;
129 unsigned int irqs[3]; 129 unsigned int irqs[3];
130 u32 node; 130 u32 node;
131 u64 flags; 131 unsigned long flags;
132 u64 class_0_pending; 132 u64 class_0_pending;
133 u64 class_0_dar; 133 u64 class_0_dar;
134 u64 class_1_dar; 134 u64 class_1_dar;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1308a86e9070..8d1a419df35d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -29,7 +29,7 @@ endif
29obj-y := cputable.o ptrace.o syscalls.o \ 29obj-y := cputable.o ptrace.o syscalls.o \
30 irq.o align.o signal_32.o pmc.o vdso.o \ 30 irq.o align.o signal_32.o pmc.o vdso.o \
31 init_task.o process.o systbl.o idle.o \ 31 init_task.o process.o systbl.o idle.o \
32 signal.o sysfs.o 32 signal.o sysfs.o cacheinfo.o
33obj-y += vdso32/ 33obj-y += vdso32/
34obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ 34obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
35 signal_64.o ptrace32.o \ 35 signal_64.o ptrace32.o \
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
new file mode 100644
index 000000000000..b33f0417a4bf
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -0,0 +1,837 @@
1/*
2 * Processor cache information made available to userspace via sysfs;
3 * intended to be compatible with x86 intel_cacheinfo implementation.
4 *
5 * Copyright 2008 IBM Corporation
6 * Author: Nathan Lynch
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 */
12
13#include <linux/cpu.h>
14#include <linux/cpumask.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/kobject.h>
18#include <linux/list.h>
19#include <linux/notifier.h>
20#include <linux/of.h>
21#include <linux/percpu.h>
22#include <asm/prom.h>
23
24#include "cacheinfo.h"
25
26/* per-cpu object for tracking:
27 * - a "cache" kobject for the top-level directory
28 * - a list of "index" objects representing the cpu's local cache hierarchy
29 */
30struct cache_dir {
31 struct kobject *kobj; /* bare (not embedded) kobject for cache
32 * directory */
33 struct cache_index_dir *index; /* list of index objects */
34};
35
36/* "index" object: each cpu's cache directory has an index
37 * subdirectory corresponding to a cache object associated with the
38 * cpu. This object's lifetime is managed via the embedded kobject.
39 */
40struct cache_index_dir {
41 struct kobject kobj;
42 struct cache_index_dir *next; /* next index in parent directory */
43 struct cache *cache;
44};
45
46/* Template for determining which OF properties to query for a given
47 * cache type */
48struct cache_type_info {
49 const char *name;
50 const char *size_prop;
51
52 /* Allow for both [di]-cache-line-size and
53 * [di]-cache-block-size properties. According to the PowerPC
54 * Processor binding, -line-size should be provided if it
55 * differs from the cache block size (that which is operated
56 * on by cache instructions), so we look for -line-size first.
57 * See cache_get_line_size(). */
58
59 const char *line_size_props[2];
60 const char *nr_sets_prop;
61};
62
63/* These are used to index the cache_type_info array. */
64#define CACHE_TYPE_UNIFIED 0
65#define CACHE_TYPE_INSTRUCTION 1
66#define CACHE_TYPE_DATA 2
67
68static const struct cache_type_info cache_type_info[] = {
69 {
70 /* PowerPC Processor binding says the [di]-cache-*
71 * must be equal on unified caches, so just use
72 * d-cache properties. */
73 .name = "Unified",
74 .size_prop = "d-cache-size",
75 .line_size_props = { "d-cache-line-size",
76 "d-cache-block-size", },
77 .nr_sets_prop = "d-cache-sets",
78 },
79 {
80 .name = "Instruction",
81 .size_prop = "i-cache-size",
82 .line_size_props = { "i-cache-line-size",
83 "i-cache-block-size", },
84 .nr_sets_prop = "i-cache-sets",
85 },
86 {
87 .name = "Data",
88 .size_prop = "d-cache-size",
89 .line_size_props = { "d-cache-line-size",
90 "d-cache-block-size", },
91 .nr_sets_prop = "d-cache-sets",
92 },
93};
94
95/* Cache object: each instance of this corresponds to a distinct cache
96 * in the system. There are separate objects for Harvard caches: one
97 * each for instruction and data, and each refers to the same OF node.
98 * The refcount of the OF node is elevated for the lifetime of the
99 * cache object. A cache object is released when its shared_cpu_map
100 * is cleared (see cache_cpu_clear).
101 *
102 * A cache object is on two lists: an unsorted global list
103 * (cache_list) of cache objects; and a singly-linked list
104 * representing the local cache hierarchy, which is ordered by level
105 * (e.g. L1d -> L1i -> L2 -> L3).
106 */
107struct cache {
108 struct device_node *ofnode; /* OF node for this cache, may be cpu */
109 struct cpumask shared_cpu_map; /* online CPUs using this cache */
110 int type; /* split cache disambiguation */
111 int level; /* level not explicit in device tree */
112 struct list_head list; /* global list of cache objects */
113 struct cache *next_local; /* next cache of >= level */
114};
115
116static DEFINE_PER_CPU(struct cache_dir *, cache_dir);
117
118/* traversal/modification of this list occurs only at cpu hotplug time;
119 * access is serialized by cpu hotplug locking
120 */
121static LIST_HEAD(cache_list);
122
123static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *k)
124{
125 return container_of(k, struct cache_index_dir, kobj);
126}
127
128static const char *cache_type_string(const struct cache *cache)
129{
130 return cache_type_info[cache->type].name;
131}
132
133static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode)
134{
135 cache->type = type;
136 cache->level = level;
137 cache->ofnode = of_node_get(ofnode);
138 INIT_LIST_HEAD(&cache->list);
139 list_add(&cache->list, &cache_list);
140}
141
142static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode)
143{
144 struct cache *cache;
145
146 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
147 if (cache)
148 cache_init(cache, type, level, ofnode);
149
150 return cache;
151}
152
153static void release_cache_debugcheck(struct cache *cache)
154{
155 struct cache *iter;
156
157 list_for_each_entry(iter, &cache_list, list)
158 WARN_ONCE(iter->next_local == cache,
159 "cache for %s(%s) refers to cache for %s(%s)\n",
160 iter->ofnode->full_name,
161 cache_type_string(iter),
162 cache->ofnode->full_name,
163 cache_type_string(cache));
164}
165
166static void release_cache(struct cache *cache)
167{
168 if (!cache)
169 return;
170
171 pr_debug("freeing L%d %s cache for %s\n", cache->level,
172 cache_type_string(cache), cache->ofnode->full_name);
173
174 release_cache_debugcheck(cache);
175 list_del(&cache->list);
176 of_node_put(cache->ofnode);
177 kfree(cache);
178}
179
180static void cache_cpu_set(struct cache *cache, int cpu)
181{
182 struct cache *next = cache;
183
184 while (next) {
185 WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
186 "CPU %i already accounted in %s(%s)\n",
187 cpu, next->ofnode->full_name,
188 cache_type_string(next));
189 cpumask_set_cpu(cpu, &next->shared_cpu_map);
190 next = next->next_local;
191 }
192}
193
194static int cache_size(const struct cache *cache, unsigned int *ret)
195{
196 const char *propname;
197 const u32 *cache_size;
198
199 propname = cache_type_info[cache->type].size_prop;
200
201 cache_size = of_get_property(cache->ofnode, propname, NULL);
202 if (!cache_size)
203 return -ENODEV;
204
205 *ret = *cache_size;
206 return 0;
207}
208
209static int cache_size_kb(const struct cache *cache, unsigned int *ret)
210{
211 unsigned int size;
212
213 if (cache_size(cache, &size))
214 return -ENODEV;
215
216 *ret = size / 1024;
217 return 0;
218}
219
220/* not cache_line_size() because that's a macro in include/linux/cache.h */
221static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
222{
223 const u32 *line_size;
224 int i, lim;
225
226 lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props);
227
228 for (i = 0; i < lim; i++) {
229 const char *propname;
230
231 propname = cache_type_info[cache->type].line_size_props[i];
232 line_size = of_get_property(cache->ofnode, propname, NULL);
233 if (line_size)
234 break;
235 }
236
237 if (!line_size)
238 return -ENODEV;
239
240 *ret = *line_size;
241 return 0;
242}
243
244static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
245{
246 const char *propname;
247 const u32 *nr_sets;
248
249 propname = cache_type_info[cache->type].nr_sets_prop;
250
251 nr_sets = of_get_property(cache->ofnode, propname, NULL);
252 if (!nr_sets)
253 return -ENODEV;
254
255 *ret = *nr_sets;
256 return 0;
257}
258
259static int cache_associativity(const struct cache *cache, unsigned int *ret)
260{
261 unsigned int line_size;
262 unsigned int nr_sets;
263 unsigned int size;
264
265 if (cache_nr_sets(cache, &nr_sets))
266 goto err;
267
268 /* If the cache is fully associative, there is no need to
269 * check the other properties.
270 */
271 if (nr_sets == 1) {
272 *ret = 0;
273 return 0;
274 }
275
276 if (cache_get_line_size(cache, &line_size))
277 goto err;
278 if (cache_size(cache, &size))
279 goto err;
280
281 if (!(nr_sets > 0 && size > 0 && line_size > 0))
282 goto err;
283
284 *ret = (size / nr_sets) / line_size;
285 return 0;
286err:
287 return -ENODEV;
288}
289
290/* helper for dealing with split caches */
291static struct cache *cache_find_first_sibling(struct cache *cache)
292{
293 struct cache *iter;
294
295 if (cache->type == CACHE_TYPE_UNIFIED)
296 return cache;
297
298 list_for_each_entry(iter, &cache_list, list)
299 if (iter->ofnode == cache->ofnode && iter->next_local == cache)
300 return iter;
301
302 return cache;
303}
304
305/* return the first cache on a local list matching node */
306static struct cache *cache_lookup_by_node(const struct device_node *node)
307{
308 struct cache *cache = NULL;
309 struct cache *iter;
310
311 list_for_each_entry(iter, &cache_list, list) {
312 if (iter->ofnode != node)
313 continue;
314 cache = cache_find_first_sibling(iter);
315 break;
316 }
317
318 return cache;
319}
320
321static bool cache_node_is_unified(const struct device_node *np)
322{
323 return of_get_property(np, "cache-unified", NULL);
324}
325
326static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level)
327{
328 struct cache *cache;
329
330 pr_debug("creating L%d ucache for %s\n", level, node->full_name);
331
332 cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
333
334 return cache;
335}
336
337static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level)
338{
339 struct cache *dcache, *icache;
340
341 pr_debug("creating L%d dcache and icache for %s\n", level,
342 node->full_name);
343
344 dcache = new_cache(CACHE_TYPE_DATA, level, node);
345 icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
346
347 if (!dcache || !icache)
348 goto err;
349
350 dcache->next_local = icache;
351
352 return dcache;
353err:
354 release_cache(dcache);
355 release_cache(icache);
356 return NULL;
357}
358
359static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level)
360{
361 struct cache *cache;
362
363 if (cache_node_is_unified(node))
364 cache = cache_do_one_devnode_unified(node, level);
365 else
366 cache = cache_do_one_devnode_split(node, level);
367
368 return cache;
369}
370
371static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level)
372{
373 struct cache *cache;
374
375 cache = cache_lookup_by_node(node);
376
377 WARN_ONCE(cache && cache->level != level,
378 "cache level mismatch on lookup (got %d, expected %d)\n",
379 cache->level, level);
380
381 if (!cache)
382 cache = cache_do_one_devnode(node, level);
383
384 return cache;
385}
386
387static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger)
388{
389 while (smaller->next_local) {
390 if (smaller->next_local == bigger)
391 return; /* already linked */
392 smaller = smaller->next_local;
393 }
394
395 smaller->next_local = bigger;
396}
397
398static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache)
399{
400 WARN_ON_ONCE(cache->level != 1);
401 WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
402}
403
404static void __cpuinit do_subsidiary_caches(struct cache *cache)
405{
406 struct device_node *subcache_node;
407 int level = cache->level;
408
409 do_subsidiary_caches_debugcheck(cache);
410
411 while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
412 struct cache *subcache;
413
414 level++;
415 subcache = cache_lookup_or_instantiate(subcache_node, level);
416 of_node_put(subcache_node);
417 if (!subcache)
418 break;
419
420 link_cache_lists(cache, subcache);
421 cache = subcache;
422 }
423}
424
425static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id)
426{
427 struct device_node *cpu_node;
428 struct cache *cpu_cache = NULL;
429
430 pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
431
432 cpu_node = of_get_cpu_node(cpu_id, NULL);
433 WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
434 if (!cpu_node)
435 goto out;
436
437 cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
438 if (!cpu_cache)
439 goto out;
440
441 do_subsidiary_caches(cpu_cache);
442
443 cache_cpu_set(cpu_cache, cpu_id);
444out:
445 of_node_put(cpu_node);
446
447 return cpu_cache;
448}
449
450static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id)
451{
452 struct cache_dir *cache_dir;
453 struct sys_device *sysdev;
454 struct kobject *kobj = NULL;
455
456 sysdev = get_cpu_sysdev(cpu_id);
457 WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id);
458 if (!sysdev)
459 goto err;
460
461 kobj = kobject_create_and_add("cache", &sysdev->kobj);
462 if (!kobj)
463 goto err;
464
465 cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
466 if (!cache_dir)
467 goto err;
468
469 cache_dir->kobj = kobj;
470
471 WARN_ON_ONCE(per_cpu(cache_dir, cpu_id) != NULL);
472
473 per_cpu(cache_dir, cpu_id) = cache_dir;
474
475 return cache_dir;
476err:
477 kobject_put(kobj);
478 return NULL;
479}
480
481static void cache_index_release(struct kobject *kobj)
482{
483 struct cache_index_dir *index;
484
485 index = kobj_to_cache_index_dir(kobj);
486
487 pr_debug("freeing index directory for L%d %s cache\n",
488 index->cache->level, cache_type_string(index->cache));
489
490 kfree(index);
491}
492
493static ssize_t cache_index_show(struct kobject *k, struct attribute *attr, char *buf)
494{
495 struct kobj_attribute *kobj_attr;
496
497 kobj_attr = container_of(attr, struct kobj_attribute, attr);
498
499 return kobj_attr->show(k, kobj_attr, buf);
500}
501
502static struct cache *index_kobj_to_cache(struct kobject *k)
503{
504 struct cache_index_dir *index;
505
506 index = kobj_to_cache_index_dir(k);
507
508 return index->cache;
509}
510
511static ssize_t size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
512{
513 unsigned int size_kb;
514 struct cache *cache;
515
516 cache = index_kobj_to_cache(k);
517
518 if (cache_size_kb(cache, &size_kb))
519 return -ENODEV;
520
521 return sprintf(buf, "%uK\n", size_kb);
522}
523
524static struct kobj_attribute cache_size_attr =
525 __ATTR(size, 0444, size_show, NULL);
526
527
528static ssize_t line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
529{
530 unsigned int line_size;
531 struct cache *cache;
532
533 cache = index_kobj_to_cache(k);
534
535 if (cache_get_line_size(cache, &line_size))
536 return -ENODEV;
537
538 return sprintf(buf, "%u\n", line_size);
539}
540
541static struct kobj_attribute cache_line_size_attr =
542 __ATTR(coherency_line_size, 0444, line_size_show, NULL);
543
544static ssize_t nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
545{
546 unsigned int nr_sets;
547 struct cache *cache;
548
549 cache = index_kobj_to_cache(k);
550
551 if (cache_nr_sets(cache, &nr_sets))
552 return -ENODEV;
553
554 return sprintf(buf, "%u\n", nr_sets);
555}
556
557static struct kobj_attribute cache_nr_sets_attr =
558 __ATTR(number_of_sets, 0444, nr_sets_show, NULL);
559
560static ssize_t associativity_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
561{
562 unsigned int associativity;
563 struct cache *cache;
564
565 cache = index_kobj_to_cache(k);
566
567 if (cache_associativity(cache, &associativity))
568 return -ENODEV;
569
570 return sprintf(buf, "%u\n", associativity);
571}
572
573static struct kobj_attribute cache_assoc_attr =
574 __ATTR(ways_of_associativity, 0444, associativity_show, NULL);
575
576static ssize_t type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
577{
578 struct cache *cache;
579
580 cache = index_kobj_to_cache(k);
581
582 return sprintf(buf, "%s\n", cache_type_string(cache));
583}
584
585static struct kobj_attribute cache_type_attr =
586 __ATTR(type, 0444, type_show, NULL);
587
588static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
589{
590 struct cache_index_dir *index;
591 struct cache *cache;
592
593 index = kobj_to_cache_index_dir(k);
594 cache = index->cache;
595
596 return sprintf(buf, "%d\n", cache->level);
597}
598
599static struct kobj_attribute cache_level_attr =
600 __ATTR(level, 0444, level_show, NULL);
601
602static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
603{
604 struct cache_index_dir *index;
605 struct cache *cache;
606 int len;
607 int n = 0;
608
609 index = kobj_to_cache_index_dir(k);
610 cache = index->cache;
611 len = PAGE_SIZE - 2;
612
613 if (len > 1) {
614 n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map);
615 buf[n++] = '\n';
616 buf[n] = '\0';
617 }
618 return n;
619}
620
621static struct kobj_attribute cache_shared_cpu_map_attr =
622 __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
623
624/* Attributes which should always be created -- the kobject/sysfs core
625 * does this automatically via kobj_type->default_attrs. This is the
626 * minimum data required to uniquely identify a cache.
627 */
628static struct attribute *cache_index_default_attrs[] = {
629 &cache_type_attr.attr,
630 &cache_level_attr.attr,
631 &cache_shared_cpu_map_attr.attr,
632 NULL,
633};
634
635/* Attributes which should be created if the cache device node has the
636 * right properties -- see cacheinfo_create_index_opt_attrs
637 */
638static struct kobj_attribute *cache_index_opt_attrs[] = {
639 &cache_size_attr,
640 &cache_line_size_attr,
641 &cache_nr_sets_attr,
642 &cache_assoc_attr,
643};
644
645static struct sysfs_ops cache_index_ops = {
646 .show = cache_index_show,
647};
648
649static struct kobj_type cache_index_type = {
650 .release = cache_index_release,
651 .sysfs_ops = &cache_index_ops,
652 .default_attrs = cache_index_default_attrs,
653};
654
655static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
656{
657 const char *cache_name;
658 const char *cache_type;
659 struct cache *cache;
660 char *buf;
661 int i;
662
663 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
664 if (!buf)
665 return;
666
667 cache = dir->cache;
668 cache_name = cache->ofnode->full_name;
669 cache_type = cache_type_string(cache);
670
671 /* We don't want to create an attribute that can't provide a
672 * meaningful value. Check the return value of each optional
673 * attribute's ->show method before registering the
674 * attribute.
675 */
676 for (i = 0; i < ARRAY_SIZE(cache_index_opt_attrs); i++) {
677 struct kobj_attribute *attr;
678 ssize_t rc;
679
680 attr = cache_index_opt_attrs[i];
681
682 rc = attr->show(&dir->kobj, attr, buf);
683 if (rc <= 0) {
684 pr_debug("not creating %s attribute for "
685 "%s(%s) (rc = %zd)\n",
686 attr->attr.name, cache_name,
687 cache_type, rc);
688 continue;
689 }
690 if (sysfs_create_file(&dir->kobj, &attr->attr))
691 pr_debug("could not create %s attribute for %s(%s)\n",
692 attr->attr.name, cache_name, cache_type);
693 }
694
695 kfree(buf);
696}
697
698static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir)
699{
700 struct cache_index_dir *index_dir;
701 int rc;
702
703 index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
704 if (!index_dir)
705 goto err;
706
707 index_dir->cache = cache;
708
709 rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
710 cache_dir->kobj, "index%d", index);
711 if (rc)
712 goto err;
713
714 index_dir->next = cache_dir->index;
715 cache_dir->index = index_dir;
716
717 cacheinfo_create_index_opt_attrs(index_dir);
718
719 return;
720err:
721 kfree(index_dir);
722}
723
724static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list)
725{
726 struct cache_dir *cache_dir;
727 struct cache *cache;
728 int index = 0;
729
730 cache_dir = cacheinfo_create_cache_dir(cpu_id);
731 if (!cache_dir)
732 return;
733
734 cache = cache_list;
735 while (cache) {
736 cacheinfo_create_index_dir(cache, index, cache_dir);
737 index++;
738 cache = cache->next_local;
739 }
740}
741
742void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id)
743{
744 struct cache *cache;
745
746 cache = cache_chain_instantiate(cpu_id);
747 if (!cache)
748 return;
749
750 cacheinfo_sysfs_populate(cpu_id, cache);
751}
752
753#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */
754
755static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
756{
757 struct device_node *cpu_node;
758 struct cache *cache;
759
760 cpu_node = of_get_cpu_node(cpu_id, NULL);
761 WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
762 if (!cpu_node)
763 return NULL;
764
765 cache = cache_lookup_by_node(cpu_node);
766 of_node_put(cpu_node);
767
768 return cache;
769}
770
771static void remove_index_dirs(struct cache_dir *cache_dir)
772{
773 struct cache_index_dir *index;
774
775 index = cache_dir->index;
776
777 while (index) {
778 struct cache_index_dir *next;
779
780 next = index->next;
781 kobject_put(&index->kobj);
782 index = next;
783 }
784}
785
786static void remove_cache_dir(struct cache_dir *cache_dir)
787{
788 remove_index_dirs(cache_dir);
789
790 kobject_put(cache_dir->kobj);
791
792 kfree(cache_dir);
793}
794
795static void cache_cpu_clear(struct cache *cache, int cpu)
796{
797 while (cache) {
798 struct cache *next = cache->next_local;
799
800 WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
801 "CPU %i not accounted in %s(%s)\n",
802 cpu, cache->ofnode->full_name,
803 cache_type_string(cache));
804
805 cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
806
807 /* Release the cache object if all the cpus using it
808 * are offline */
809 if (cpumask_empty(&cache->shared_cpu_map))
810 release_cache(cache);
811
812 cache = next;
813 }
814}
815
816void cacheinfo_cpu_offline(unsigned int cpu_id)
817{
818 struct cache_dir *cache_dir;
819 struct cache *cache;
820
821 /* Prevent userspace from seeing inconsistent state - remove
822 * the sysfs hierarchy first */
823 cache_dir = per_cpu(cache_dir, cpu_id);
824
825 /* careful, sysfs population may have failed */
826 if (cache_dir)
827 remove_cache_dir(cache_dir);
828
829 per_cpu(cache_dir, cpu_id) = NULL;
830
831 /* clear the CPU's bit in its cache chain, possibly freeing
832 * cache objects */
833 cache = cache_lookup_by_cpu(cpu_id);
834 if (cache)
835 cache_cpu_clear(cache, cpu_id);
836}
837#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
new file mode 100644
index 000000000000..a7b74d36acd7
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -0,0 +1,8 @@
1#ifndef _PPC_CACHEINFO_H
2#define _PPC_CACHEINFO_H
3
4/* These are just hooks for sysfs.c to use. */
5extern void cacheinfo_cpu_online(unsigned int cpu_id);
6extern void cacheinfo_cpu_offline(unsigned int cpu_id);
7
8#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 2538030954d8..da5a3855a0c4 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -16,7 +16,7 @@
16 * 2 of the License, or (at your option) any later version. 16 * 2 of the License, or (at your option) any later version.
17 */ 17 */
18 18
19#undef DEBUG 19#define DEBUG
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/pci.h> 22#include <linux/pci.h>
@@ -1356,6 +1356,63 @@ static void __init pcibios_allocate_resources(int pass)
1356 } 1356 }
1357} 1357}
1358 1358
1359static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
1360{
1361 struct pci_controller *hose = pci_bus_to_host(bus);
1362 resource_size_t offset;
1363 struct resource *res, *pres;
1364 int i;
1365
1366 pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus));
1367
1368 /* Check for IO */
1369 if (!(hose->io_resource.flags & IORESOURCE_IO))
1370 goto no_io;
1371 offset = (unsigned long)hose->io_base_virt - _IO_BASE;
1372 res = kzalloc(sizeof(struct resource), GFP_KERNEL);
1373 BUG_ON(res == NULL);
1374 res->name = "Legacy IO";
1375 res->flags = IORESOURCE_IO;
1376 res->start = offset;
1377 res->end = (offset + 0xfff) & 0xfffffffful;
1378 pr_debug("Candidate legacy IO: %pR\n", res);
1379 if (request_resource(&hose->io_resource, res)) {
1380 printk(KERN_DEBUG
1381 "PCI %04x:%02x Cannot reserve Legacy IO %pR\n",
1382 pci_domain_nr(bus), bus->number, res);
1383 kfree(res);
1384 }
1385
1386 no_io:
1387 /* Check for memory */
1388 offset = hose->pci_mem_offset;
1389 pr_debug("hose mem offset: %016llx\n", (unsigned long long)offset);
1390 for (i = 0; i < 3; i++) {
1391 pres = &hose->mem_resources[i];
1392 if (!(pres->flags & IORESOURCE_MEM))
1393 continue;
1394 pr_debug("hose mem res: %pR\n", pres);
1395 if ((pres->start - offset) <= 0xa0000 &&
1396 (pres->end - offset) >= 0xbffff)
1397 break;
1398 }
1399 if (i >= 3)
1400 return;
1401 res = kzalloc(sizeof(struct resource), GFP_KERNEL);
1402 BUG_ON(res == NULL);
1403 res->name = "Legacy VGA memory";
1404 res->flags = IORESOURCE_MEM;
1405 res->start = 0xa0000 + offset;
1406 res->end = 0xbffff + offset;
1407 pr_debug("Candidate VGA memory: %pR\n", res);
1408 if (request_resource(pres, res)) {
1409 printk(KERN_DEBUG
1410 "PCI %04x:%02x Cannot reserve VGA memory %pR\n",
1411 pci_domain_nr(bus), bus->number, res);
1412 kfree(res);
1413 }
1414}
1415
1359void __init pcibios_resource_survey(void) 1416void __init pcibios_resource_survey(void)
1360{ 1417{
1361 struct pci_bus *b; 1418 struct pci_bus *b;
@@ -1371,6 +1428,18 @@ void __init pcibios_resource_survey(void)
1371 pcibios_allocate_resources(1); 1428 pcibios_allocate_resources(1);
1372 } 1429 }
1373 1430
1431 /* Before we start assigning unassigned resource, we try to reserve
1432 * the low IO area and the VGA memory area if they intersect the
1433 * bus available resources to avoid allocating things on top of them
1434 */
1435 if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) {
1436 list_for_each_entry(b, &pci_root_buses, node)
1437 pcibios_reserve_legacy_regions(b);
1438 }
1439
1440 /* Now, if the platform didn't decide to blindly trust the firmware,
1441 * we proceed to assigning things that were left unassigned
1442 */
1374 if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) { 1443 if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) {
1375 pr_debug("PCI: Assigning unassigned resouces...\n"); 1444 pr_debug("PCI: Assigning unassigned resouces...\n");
1376 pci_assign_unassigned_resources(); 1445 pci_assign_unassigned_resources();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 39fadc6e1492..586962f65c2a 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -560,9 +560,14 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
560 * G5 machines... So when something asks for bus 0 io base 560 * G5 machines... So when something asks for bus 0 io base
561 * (bus 0 is HT root), we return the AGP one instead. 561 * (bus 0 is HT root), we return the AGP one instead.
562 */ 562 */
563 if (machine_is_compatible("MacRISC4")) 563 if (in_bus == 0 && machine_is_compatible("MacRISC4")) {
564 if (in_bus == 0) 564 struct device_node *agp;
565
566 agp = of_find_compatible_node(NULL, NULL, "u3-agp");
567 if (agp)
565 in_bus = 0xf0; 568 in_bus = 0xf0;
569 of_node_put(agp);
570 }
566 571
567 /* That syscall isn't quite compatible with PCI domains, but it's 572 /* That syscall isn't quite compatible with PCI domains, but it's
568 * used on pre-domains setup. We return the first match 573 * used on pre-domains setup. We return the first match
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index dcec1325d340..c8b27bb4dbde 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -165,6 +165,7 @@ EXPORT_SYMBOL(timer_interrupt);
165EXPORT_SYMBOL(irq_desc); 165EXPORT_SYMBOL(irq_desc);
166EXPORT_SYMBOL(tb_ticks_per_jiffy); 166EXPORT_SYMBOL(tb_ticks_per_jiffy);
167EXPORT_SYMBOL(cacheable_memcpy); 167EXPORT_SYMBOL(cacheable_memcpy);
168EXPORT_SYMBOL(cacheable_memzero);
168#endif 169#endif
169 170
170#ifdef CONFIG_PPC32 171#ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6f73c739f1e2..c09cffafb6ee 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -824,11 +824,11 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
824#endif 824#endif
825 825
826#ifdef CONFIG_KEXEC 826#ifdef CONFIG_KEXEC
827 lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); 827 lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
828 if (lprop) 828 if (lprop)
829 crashk_res.start = *lprop; 829 crashk_res.start = *lprop;
830 830
831 lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL); 831 lprop = of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
832 if (lprop) 832 if (lprop)
833 crashk_res.end = crashk_res.start + *lprop - 1; 833 crashk_res.end = crashk_res.start + *lprop - 1;
834#endif 834#endif
@@ -893,12 +893,12 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
893 u64 base, size, lmb_size; 893 u64 base, size, lmb_size;
894 unsigned int is_kexec_kdump = 0, rngs; 894 unsigned int is_kexec_kdump = 0, rngs;
895 895
896 ls = (cell_t *)of_get_flat_dt_prop(node, "ibm,lmb-size", &l); 896 ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
897 if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t)) 897 if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t))
898 return 0; 898 return 0;
899 lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls); 899 lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls);
900 900
901 dm = (cell_t *)of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); 901 dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
902 if (dm == NULL || l < sizeof(cell_t)) 902 if (dm == NULL || l < sizeof(cell_t))
903 return 0; 903 return 0;
904 904
@@ -907,7 +907,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
907 return 0; 907 return 0;
908 908
909 /* check if this is a kexec/kdump kernel. */ 909 /* check if this is a kexec/kdump kernel. */
910 usm = (cell_t *)of_get_flat_dt_prop(node, "linux,drconf-usable-memory", 910 usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
911 &l); 911 &l);
912 if (usm != NULL) 912 if (usm != NULL)
913 is_kexec_kdump = 1; 913 is_kexec_kdump = 1;
@@ -981,9 +981,9 @@ static int __init early_init_dt_scan_memory(unsigned long node,
981 } else if (strcmp(type, "memory") != 0) 981 } else if (strcmp(type, "memory") != 0)
982 return 0; 982 return 0;
983 983
984 reg = (cell_t *)of_get_flat_dt_prop(node, "linux,usable-memory", &l); 984 reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
985 if (reg == NULL) 985 if (reg == NULL)
986 reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); 986 reg = of_get_flat_dt_prop(node, "reg", &l);
987 if (reg == NULL) 987 if (reg == NULL)
988 return 0; 988 return 0;
989 989
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 2445945d3761..7f1b33d5e30d 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1210,7 +1210,7 @@ static void __init prom_initialize_tce_table(void)
1210 /* Initialize the table to have a one-to-one mapping 1210 /* Initialize the table to have a one-to-one mapping
1211 * over the allocated size. 1211 * over the allocated size.
1212 */ 1212 */
1213 tce_entryp = (unsigned long *)base; 1213 tce_entryp = (u64 *)base;
1214 for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) { 1214 for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) {
1215 tce_entry = (i << PAGE_SHIFT); 1215 tce_entry = (i << PAGE_SHIFT);
1216 tce_entry |= 0x3; 1216 tce_entry |= 0x3;
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 0c64f10087b9..4a2ee08af6a7 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -18,6 +18,8 @@
18#include <asm/machdep.h> 18#include <asm/machdep.h>
19#include <asm/smp.h> 19#include <asm/smp.h>
20 20
21#include "cacheinfo.h"
22
21#ifdef CONFIG_PPC64 23#ifdef CONFIG_PPC64
22#include <asm/paca.h> 24#include <asm/paca.h>
23#include <asm/lppaca.h> 25#include <asm/lppaca.h>
@@ -25,8 +27,6 @@
25 27
26static DEFINE_PER_CPU(struct cpu, cpu_devices); 28static DEFINE_PER_CPU(struct cpu, cpu_devices);
27 29
28static DEFINE_PER_CPU(struct kobject *, cache_toplevel);
29
30/* 30/*
31 * SMT snooze delay stuff, 64-bit only for now 31 * SMT snooze delay stuff, 64-bit only for now
32 */ 32 */
@@ -343,283 +343,6 @@ static struct sysdev_attribute pa6t_attrs[] = {
343#endif /* HAS_PPC_PMC_PA6T */ 343#endif /* HAS_PPC_PMC_PA6T */
344#endif /* HAS_PPC_PMC_CLASSIC */ 344#endif /* HAS_PPC_PMC_CLASSIC */
345 345
346struct cache_desc {
347 struct kobject kobj;
348 struct cache_desc *next;
349 const char *type; /* Instruction, Data, or Unified */
350 u32 size; /* total cache size in KB */
351 u32 line_size; /* in bytes */
352 u32 nr_sets; /* number of sets */
353 u32 level; /* e.g. 1, 2, 3... */
354 u32 associativity; /* e.g. 8-way... 0 is fully associative */
355};
356
357DEFINE_PER_CPU(struct cache_desc *, cache_desc);
358
359static struct cache_desc *kobj_to_cache_desc(struct kobject *k)
360{
361 return container_of(k, struct cache_desc, kobj);
362}
363
364static void cache_desc_release(struct kobject *k)
365{
366 struct cache_desc *desc = kobj_to_cache_desc(k);
367
368 pr_debug("%s: releasing %s\n", __func__, kobject_name(k));
369
370 if (desc->next)
371 kobject_put(&desc->next->kobj);
372
373 kfree(kobj_to_cache_desc(k));
374}
375
376static ssize_t cache_desc_show(struct kobject *k, struct attribute *attr, char *buf)
377{
378 struct kobj_attribute *kobj_attr;
379
380 kobj_attr = container_of(attr, struct kobj_attribute, attr);
381
382 return kobj_attr->show(k, kobj_attr, buf);
383}
384
385static struct sysfs_ops cache_desc_sysfs_ops = {
386 .show = cache_desc_show,
387};
388
389static struct kobj_type cache_desc_type = {
390 .release = cache_desc_release,
391 .sysfs_ops = &cache_desc_sysfs_ops,
392};
393
394static ssize_t cache_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
395{
396 struct cache_desc *cache = kobj_to_cache_desc(k);
397
398 return sprintf(buf, "%uK\n", cache->size);
399}
400
401static struct kobj_attribute cache_size_attr =
402 __ATTR(size, 0444, cache_size_show, NULL);
403
404static ssize_t cache_line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
405{
406 struct cache_desc *cache = kobj_to_cache_desc(k);
407
408 return sprintf(buf, "%u\n", cache->line_size);
409}
410
411static struct kobj_attribute cache_line_size_attr =
412 __ATTR(coherency_line_size, 0444, cache_line_size_show, NULL);
413
414static ssize_t cache_nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
415{
416 struct cache_desc *cache = kobj_to_cache_desc(k);
417
418 return sprintf(buf, "%u\n", cache->nr_sets);
419}
420
421static struct kobj_attribute cache_nr_sets_attr =
422 __ATTR(number_of_sets, 0444, cache_nr_sets_show, NULL);
423
424static ssize_t cache_type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
425{
426 struct cache_desc *cache = kobj_to_cache_desc(k);
427
428 return sprintf(buf, "%s\n", cache->type);
429}
430
431static struct kobj_attribute cache_type_attr =
432 __ATTR(type, 0444, cache_type_show, NULL);
433
434static ssize_t cache_level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
435{
436 struct cache_desc *cache = kobj_to_cache_desc(k);
437
438 return sprintf(buf, "%u\n", cache->level);
439}
440
441static struct kobj_attribute cache_level_attr =
442 __ATTR(level, 0444, cache_level_show, NULL);
443
444static ssize_t cache_assoc_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
445{
446 struct cache_desc *cache = kobj_to_cache_desc(k);
447
448 return sprintf(buf, "%u\n", cache->associativity);
449}
450
451static struct kobj_attribute cache_assoc_attr =
452 __ATTR(ways_of_associativity, 0444, cache_assoc_show, NULL);
453
454struct cache_desc_info {
455 const char *type;
456 const char *size_prop;
457 const char *line_size_prop;
458 const char *nr_sets_prop;
459};
460
461/* PowerPC Processor binding says the [di]-cache-* must be equal on
462 * unified caches, so just use d-cache properties. */
463static struct cache_desc_info ucache_info = {
464 .type = "Unified",
465 .size_prop = "d-cache-size",
466 .line_size_prop = "d-cache-line-size",
467 .nr_sets_prop = "d-cache-sets",
468};
469
470static struct cache_desc_info dcache_info = {
471 .type = "Data",
472 .size_prop = "d-cache-size",
473 .line_size_prop = "d-cache-line-size",
474 .nr_sets_prop = "d-cache-sets",
475};
476
477static struct cache_desc_info icache_info = {
478 .type = "Instruction",
479 .size_prop = "i-cache-size",
480 .line_size_prop = "i-cache-line-size",
481 .nr_sets_prop = "i-cache-sets",
482};
483
484static struct cache_desc * __cpuinit create_cache_desc(struct device_node *np, struct kobject *parent, int index, int level, struct cache_desc_info *info)
485{
486 const u32 *cache_line_size;
487 struct cache_desc *new;
488 const u32 *cache_size;
489 const u32 *nr_sets;
490 int rc;
491
492 new = kzalloc(sizeof(*new), GFP_KERNEL);
493 if (!new)
494 return NULL;
495
496 rc = kobject_init_and_add(&new->kobj, &cache_desc_type, parent,
497 "index%d", index);
498 if (rc)
499 goto err;
500
501 /* type */
502 new->type = info->type;
503 rc = sysfs_create_file(&new->kobj, &cache_type_attr.attr);
504 WARN_ON(rc);
505
506 /* level */
507 new->level = level;
508 rc = sysfs_create_file(&new->kobj, &cache_level_attr.attr);
509 WARN_ON(rc);
510
511 /* size */
512 cache_size = of_get_property(np, info->size_prop, NULL);
513 if (cache_size) {
514 new->size = *cache_size / 1024;
515 rc = sysfs_create_file(&new->kobj,
516 &cache_size_attr.attr);
517 WARN_ON(rc);
518 }
519
520 /* coherency_line_size */
521 cache_line_size = of_get_property(np, info->line_size_prop, NULL);
522 if (cache_line_size) {
523 new->line_size = *cache_line_size;
524 rc = sysfs_create_file(&new->kobj,
525 &cache_line_size_attr.attr);
526 WARN_ON(rc);
527 }
528
529 /* number_of_sets */
530 nr_sets = of_get_property(np, info->nr_sets_prop, NULL);
531 if (nr_sets) {
532 new->nr_sets = *nr_sets;
533 rc = sysfs_create_file(&new->kobj,
534 &cache_nr_sets_attr.attr);
535 WARN_ON(rc);
536 }
537
538 /* ways_of_associativity */
539 if (new->nr_sets == 1) {
540 /* fully associative */
541 new->associativity = 0;
542 goto create_assoc;
543 }
544
545 if (new->nr_sets && new->size && new->line_size) {
546 /* If we have values for all of these we can derive
547 * the associativity. */
548 new->associativity =
549 ((new->size * 1024) / new->nr_sets) / new->line_size;
550create_assoc:
551 rc = sysfs_create_file(&new->kobj,
552 &cache_assoc_attr.attr);
553 WARN_ON(rc);
554 }
555
556 return new;
557err:
558 kfree(new);
559 return NULL;
560}
561
562static bool cache_is_unified(struct device_node *np)
563{
564 return of_get_property(np, "cache-unified", NULL);
565}
566
567static struct cache_desc * __cpuinit create_cache_index_info(struct device_node *np, struct kobject *parent, int index, int level)
568{
569 struct device_node *next_cache;
570 struct cache_desc *new, **end;
571
572 pr_debug("%s(node = %s, index = %d)\n", __func__, np->full_name, index);
573
574 if (cache_is_unified(np)) {
575 new = create_cache_desc(np, parent, index, level,
576 &ucache_info);
577 } else {
578 new = create_cache_desc(np, parent, index, level,
579 &dcache_info);
580 if (new) {
581 index++;
582 new->next = create_cache_desc(np, parent, index, level,
583 &icache_info);
584 }
585 }
586 if (!new)
587 return NULL;
588
589 end = &new->next;
590 while (*end)
591 end = &(*end)->next;
592
593 next_cache = of_find_next_cache_node(np);
594 if (!next_cache)
595 goto out;
596
597 *end = create_cache_index_info(next_cache, parent, ++index, ++level);
598
599 of_node_put(next_cache);
600out:
601 return new;
602}
603
604static void __cpuinit create_cache_info(struct sys_device *sysdev)
605{
606 struct kobject *cache_toplevel;
607 struct device_node *np = NULL;
608 int cpu = sysdev->id;
609
610 cache_toplevel = kobject_create_and_add("cache", &sysdev->kobj);
611 if (!cache_toplevel)
612 return;
613 per_cpu(cache_toplevel, cpu) = cache_toplevel;
614 np = of_get_cpu_node(cpu, NULL);
615 if (np != NULL) {
616 per_cpu(cache_desc, cpu) =
617 create_cache_index_info(np, cache_toplevel, 0, 1);
618 of_node_put(np);
619 }
620 return;
621}
622
623static void __cpuinit register_cpu_online(unsigned int cpu) 346static void __cpuinit register_cpu_online(unsigned int cpu)
624{ 347{
625 struct cpu *c = &per_cpu(cpu_devices, cpu); 348 struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -684,25 +407,10 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
684 sysdev_create_file(s, &attr_dscr); 407 sysdev_create_file(s, &attr_dscr);
685#endif /* CONFIG_PPC64 */ 408#endif /* CONFIG_PPC64 */
686 409
687 create_cache_info(s); 410 cacheinfo_cpu_online(cpu);
688} 411}
689 412
690#ifdef CONFIG_HOTPLUG_CPU 413#ifdef CONFIG_HOTPLUG_CPU
691static void remove_cache_info(struct sys_device *sysdev)
692{
693 struct kobject *cache_toplevel;
694 struct cache_desc *cache_desc;
695 int cpu = sysdev->id;
696
697 cache_desc = per_cpu(cache_desc, cpu);
698 if (cache_desc != NULL)
699 kobject_put(&cache_desc->kobj);
700
701 cache_toplevel = per_cpu(cache_toplevel, cpu);
702 if (cache_toplevel != NULL)
703 kobject_put(cache_toplevel);
704}
705
706static void unregister_cpu_online(unsigned int cpu) 414static void unregister_cpu_online(unsigned int cpu)
707{ 415{
708 struct cpu *c = &per_cpu(cpu_devices, cpu); 416 struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -769,7 +477,7 @@ static void unregister_cpu_online(unsigned int cpu)
769 sysdev_remove_file(s, &attr_dscr); 477 sysdev_remove_file(s, &attr_dscr);
770#endif /* CONFIG_PPC64 */ 478#endif /* CONFIG_PPC64 */
771 479
772 remove_cache_info(s); 480 cacheinfo_cpu_offline(cpu);
773} 481}
774#endif /* CONFIG_HOTPLUG_CPU */ 482#endif /* CONFIG_HOTPLUG_CPU */
775 483
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 4314b39b6faf..ad123bced404 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -30,11 +30,11 @@
30#if defined(CONFIG_40x) || defined(CONFIG_8xx) 30#if defined(CONFIG_40x) || defined(CONFIG_8xx)
31static inline void _tlbil_all(void) 31static inline void _tlbil_all(void)
32{ 32{
33 asm volatile ("sync; tlbia; isync" : : : "memory") 33 asm volatile ("sync; tlbia; isync" : : : "memory");
34} 34}
35static inline void _tlbil_pid(unsigned int pid) 35static inline void _tlbil_pid(unsigned int pid)
36{ 36{
37 asm volatile ("sync; tlbia; isync" : : : "memory") 37 asm volatile ("sync; tlbia; isync" : : : "memory");
38} 38}
39#else /* CONFIG_40x || CONFIG_8xx */ 39#else /* CONFIG_40x || CONFIG_8xx */
40extern void _tlbil_all(void); 40extern void _tlbil_all(void);
@@ -47,7 +47,7 @@ extern void _tlbil_pid(unsigned int pid);
47#ifdef CONFIG_8xx 47#ifdef CONFIG_8xx
48static inline void _tlbil_va(unsigned long address, unsigned int pid) 48static inline void _tlbil_va(unsigned long address, unsigned int pid)
49{ 49{
50 asm volatile ("tlbie %0; sync" : : "r" (address) : "memory") 50 asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
51} 51}
52#else /* CONFIG_8xx */ 52#else /* CONFIG_8xx */
53extern void _tlbil_va(unsigned long address, unsigned int pid); 53extern void _tlbil_va(unsigned long address, unsigned int pid);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index cf81049e1e51..7393bd76d698 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -822,42 +822,50 @@ static void __init dump_numa_memory_topology(void)
822 * required. nid is the preferred node and end is the physical address of 822 * required. nid is the preferred node and end is the physical address of
823 * the highest address in the node. 823 * the highest address in the node.
824 * 824 *
825 * Returns the physical address of the memory. 825 * Returns the virtual address of the memory.
826 */ 826 */
827static void __init *careful_allocation(int nid, unsigned long size, 827static void __init *careful_zallocation(int nid, unsigned long size,
828 unsigned long align, 828 unsigned long align,
829 unsigned long end_pfn) 829 unsigned long end_pfn)
830{ 830{
831 void *ret;
831 int new_nid; 832 int new_nid;
832 unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 833 unsigned long ret_paddr;
834
835 ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
833 836
834 /* retry over all memory */ 837 /* retry over all memory */
835 if (!ret) 838 if (!ret_paddr)
836 ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 839 ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
837 840
838 if (!ret) 841 if (!ret_paddr)
839 panic("numa.c: cannot allocate %lu bytes on node %d", 842 panic("numa.c: cannot allocate %lu bytes for node %d",
840 size, nid); 843 size, nid);
841 844
845 ret = __va(ret_paddr);
846
842 /* 847 /*
843 * If the memory came from a previously allocated node, we must 848 * We initialize the nodes in numeric order: 0, 1, 2...
844 * retry with the bootmem allocator. 849 * and hand over control from the LMB allocator to the
850 * bootmem allocator. If this function is called for
851 * node 5, then we know that all nodes <5 are using the
852 * bootmem allocator instead of the LMB allocator.
853 *
854 * So, check the nid from which this allocation came
855 * and double check to see if we need to use bootmem
856 * instead of the LMB. We don't free the LMB memory
857 * since it would be useless.
845 */ 858 */
846 new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 859 new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
847 if (new_nid < nid) { 860 if (new_nid < nid) {
848 ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 861 ret = __alloc_bootmem_node(NODE_DATA(new_nid),
849 size, align, 0); 862 size, align, 0);
850 863
851 if (!ret) 864 dbg("alloc_bootmem %p %lx\n", ret, size);
852 panic("numa.c: cannot allocate %lu bytes on node %d",
853 size, new_nid);
854
855 ret = __pa(ret);
856
857 dbg("alloc_bootmem %lx %lx\n", ret, size);
858 } 865 }
859 866
860 return (void *)ret; 867 memset(ret, 0, size);
868 return ret;
861} 869}
862 870
863static struct notifier_block __cpuinitdata ppc64_numa_nb = { 871static struct notifier_block __cpuinitdata ppc64_numa_nb = {
@@ -952,7 +960,7 @@ void __init do_init_bootmem(void)
952 960
953 for_each_online_node(nid) { 961 for_each_online_node(nid) {
954 unsigned long start_pfn, end_pfn; 962 unsigned long start_pfn, end_pfn;
955 unsigned long bootmem_paddr; 963 void *bootmem_vaddr;
956 unsigned long bootmap_pages; 964 unsigned long bootmap_pages;
957 965
958 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 966 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
@@ -964,11 +972,9 @@ void __init do_init_bootmem(void)
964 * previous nodes' bootmem to be initialized and have 972 * previous nodes' bootmem to be initialized and have
965 * all reserved areas marked. 973 * all reserved areas marked.
966 */ 974 */
967 NODE_DATA(nid) = careful_allocation(nid, 975 NODE_DATA(nid) = careful_zallocation(nid,
968 sizeof(struct pglist_data), 976 sizeof(struct pglist_data),
969 SMP_CACHE_BYTES, end_pfn); 977 SMP_CACHE_BYTES, end_pfn);
970 NODE_DATA(nid) = __va(NODE_DATA(nid));
971 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
972 978
973 dbg("node %d\n", nid); 979 dbg("node %d\n", nid);
974 dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 980 dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
@@ -984,20 +990,20 @@ void __init do_init_bootmem(void)
984 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 990 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
985 991
986 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 992 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
987 bootmem_paddr = (unsigned long)careful_allocation(nid, 993 bootmem_vaddr = careful_zallocation(nid,
988 bootmap_pages << PAGE_SHIFT, 994 bootmap_pages << PAGE_SHIFT,
989 PAGE_SIZE, end_pfn); 995 PAGE_SIZE, end_pfn);
990 memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
991 996
992 dbg("bootmap_paddr = %lx\n", bootmem_paddr); 997 dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
993 998
994 init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 999 init_bootmem_node(NODE_DATA(nid),
1000 __pa(bootmem_vaddr) >> PAGE_SHIFT,
995 start_pfn, end_pfn); 1001 start_pfn, end_pfn);
996 1002
997 free_bootmem_with_active_regions(nid, end_pfn); 1003 free_bootmem_with_active_regions(nid, end_pfn);
998 /* 1004 /*
999 * Be very careful about moving this around. Future 1005 * Be very careful about moving this around. Future
1000 * calls to careful_allocation() depend on this getting 1006 * calls to careful_zallocation() depend on this getting
1001 * done correctly. 1007 * done correctly.
1002 */ 1008 */
1003 mark_reserved_regions_for_nid(nid); 1009 mark_reserved_regions_for_nid(nid);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 38ff35f2142a..22972cd83cc9 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -266,7 +266,8 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
266 /* The PTE should never be already set nor present in the 266 /* The PTE should never be already set nor present in the
267 * hash table 267 * hash table
268 */ 268 */
269 BUG_ON(pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)); 269 BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) &&
270 flags);
270 set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, 271 set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
271 __pgprot(flags))); 272 __pgprot(flags)));
272 } 273 }
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 803a64c02b06..39ac22b13c73 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -189,8 +189,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
189 smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); 189 smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
190 _tlbil_pid(0); 190 _tlbil_pid(0);
191 preempt_enable(); 191 preempt_enable();
192#endif 192#else
193 _tlbil_pid(0); 193 _tlbil_pid(0);
194#endif
194} 195}
195EXPORT_SYMBOL(flush_tlb_kernel_range); 196EXPORT_SYMBOL(flush_tlb_kernel_range);
196 197
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
index 628009c01958..dfdbffa06818 100644
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -79,7 +79,7 @@ struct spu_buffer {
79 * the vma-to-fileoffset map. 79 * the vma-to-fileoffset map.
80 */ 80 */
81struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, 81struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
82 u64 objectid); 82 unsigned long objectid);
83unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, 83unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
84 unsigned int vma, const struct spu *aSpu, 84 unsigned int vma, const struct spu *aSpu,
85 int *grd_val); 85 int *grd_val);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
index ae7c34f37e1c..98367a0255f3 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_common.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -42,7 +42,7 @@ static struct of_device_id mpc52xx_bus_ids[] __initdata = {
42 * from interrupt context while node mapping (which calls ioremap()) 42 * from interrupt context while node mapping (which calls ioremap())
43 * cannot be used at such point. 43 * cannot be used at such point.
44 */ 44 */
45static spinlock_t mpc52xx_lock = SPIN_LOCK_UNLOCKED; 45static DEFINE_SPINLOCK(mpc52xx_lock);
46static struct mpc52xx_gpt __iomem *mpc52xx_wdt; 46static struct mpc52xx_gpt __iomem *mpc52xx_wdt;
47static struct mpc52xx_cdm __iomem *mpc52xx_cdm; 47static struct mpc52xx_cdm __iomem *mpc52xx_cdm;
48 48
diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
index a428f8d1ac80..5177bdd2c62a 100644
--- a/arch/powerpc/platforms/83xx/mpc831x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
@@ -42,7 +42,7 @@ static void __init mpc831x_rdb_setup_arch(void)
42 mpc831x_usb_cfg(); 42 mpc831x_usb_cfg();
43} 43}
44 44
45void __init mpc831x_rdb_init_IRQ(void) 45static void __init mpc831x_rdb_init_IRQ(void)
46{ 46{
47 struct device_node *np; 47 struct device_node *np;
48 48
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index ec43477caa63..ec0b401bc9cf 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -49,8 +49,6 @@
49#define DBG(fmt...) 49#define DBG(fmt...)
50#endif 50#endif
51 51
52static u8 *bcsr_regs = NULL;
53
54/* ************************************************************************ 52/* ************************************************************************
55 * 53 *
56 * Setup the architecture 54 * Setup the architecture
@@ -59,13 +57,14 @@ static u8 *bcsr_regs = NULL;
59static void __init mpc832x_sys_setup_arch(void) 57static void __init mpc832x_sys_setup_arch(void)
60{ 58{
61 struct device_node *np; 59 struct device_node *np;
60 u8 __iomem *bcsr_regs = NULL;
62 61
63 if (ppc_md.progress) 62 if (ppc_md.progress)
64 ppc_md.progress("mpc832x_sys_setup_arch()", 0); 63 ppc_md.progress("mpc832x_sys_setup_arch()", 0);
65 64
66 /* Map BCSR area */ 65 /* Map BCSR area */
67 np = of_find_node_by_name(NULL, "bcsr"); 66 np = of_find_node_by_name(NULL, "bcsr");
68 if (np != 0) { 67 if (np) {
69 struct resource res; 68 struct resource res;
70 69
71 of_address_to_resource(np, 0, &res); 70 of_address_to_resource(np, 0, &res);
@@ -93,9 +92,9 @@ static void __init mpc832x_sys_setup_arch(void)
93 != NULL){ 92 != NULL){
94 /* Reset the Ethernet PHYs */ 93 /* Reset the Ethernet PHYs */
95#define BCSR8_FETH_RST 0x50 94#define BCSR8_FETH_RST 0x50
96 bcsr_regs[8] &= ~BCSR8_FETH_RST; 95 clrbits8(&bcsr_regs[8], BCSR8_FETH_RST);
97 udelay(1000); 96 udelay(1000);
98 bcsr_regs[8] |= BCSR8_FETH_RST; 97 setbits8(&bcsr_regs[8], BCSR8_FETH_RST);
99 iounmap(bcsr_regs); 98 iounmap(bcsr_regs);
100 of_node_put(np); 99 of_node_put(np);
101 } 100 }
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 0300268ce5b8..2a1295f19832 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -38,6 +38,7 @@
38#define DBG(fmt...) 38#define DBG(fmt...)
39#endif 39#endif
40 40
41#ifdef CONFIG_QUICC_ENGINE
41static void mpc83xx_spi_activate_cs(u8 cs, u8 polarity) 42static void mpc83xx_spi_activate_cs(u8 cs, u8 polarity)
42{ 43{
43 pr_debug("%s %d %d\n", __func__, cs, polarity); 44 pr_debug("%s %d %d\n", __func__, cs, polarity);
@@ -77,8 +78,8 @@ static int __init mpc832x_spi_init(void)
77 mpc83xx_spi_activate_cs, 78 mpc83xx_spi_activate_cs,
78 mpc83xx_spi_deactivate_cs); 79 mpc83xx_spi_deactivate_cs);
79} 80}
80
81machine_device_initcall(mpc832x_rdb, mpc832x_spi_init); 81machine_device_initcall(mpc832x_rdb, mpc832x_spi_init);
82#endif /* CONFIG_QUICC_ENGINE */
82 83
83/* ************************************************************************ 84/* ************************************************************************
84 * 85 *
@@ -130,7 +131,7 @@ static int __init mpc832x_declare_of_platform_devices(void)
130} 131}
131machine_device_initcall(mpc832x_rdb, mpc832x_declare_of_platform_devices); 132machine_device_initcall(mpc832x_rdb, mpc832x_declare_of_platform_devices);
132 133
133void __init mpc832x_rdb_init_IRQ(void) 134static void __init mpc832x_rdb_init_IRQ(void)
134{ 135{
135 136
136 struct device_node *np; 137 struct device_node *np;
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 9d46e5bdd101..09e9d6fb7411 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/stddef.h> 19#include <linux/stddef.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21#include <linux/compiler.h>
21#include <linux/init.h> 22#include <linux/init.h>
22#include <linux/errno.h> 23#include <linux/errno.h>
23#include <linux/reboot.h> 24#include <linux/reboot.h>
@@ -43,6 +44,7 @@
43#include <asm/udbg.h> 44#include <asm/udbg.h>
44#include <sysdev/fsl_soc.h> 45#include <sysdev/fsl_soc.h>
45#include <sysdev/fsl_pci.h> 46#include <sysdev/fsl_pci.h>
47#include <sysdev/simple_gpio.h>
46#include <asm/qe.h> 48#include <asm/qe.h>
47#include <asm/qe_ic.h> 49#include <asm/qe_ic.h>
48 50
@@ -55,8 +57,6 @@
55#define DBG(fmt...) 57#define DBG(fmt...)
56#endif 58#endif
57 59
58static u8 *bcsr_regs = NULL;
59
60/* ************************************************************************ 60/* ************************************************************************
61 * 61 *
62 * Setup the architecture 62 * Setup the architecture
@@ -65,13 +65,14 @@ static u8 *bcsr_regs = NULL;
65static void __init mpc836x_mds_setup_arch(void) 65static void __init mpc836x_mds_setup_arch(void)
66{ 66{
67 struct device_node *np; 67 struct device_node *np;
68 u8 __iomem *bcsr_regs = NULL;
68 69
69 if (ppc_md.progress) 70 if (ppc_md.progress)
70 ppc_md.progress("mpc836x_mds_setup_arch()", 0); 71 ppc_md.progress("mpc836x_mds_setup_arch()", 0);
71 72
72 /* Map BCSR area */ 73 /* Map BCSR area */
73 np = of_find_node_by_name(NULL, "bcsr"); 74 np = of_find_node_by_name(NULL, "bcsr");
74 if (np != 0) { 75 if (np) {
75 struct resource res; 76 struct resource res;
76 77
77 of_address_to_resource(np, 0, &res); 78 of_address_to_resource(np, 0, &res);
@@ -93,6 +94,16 @@ static void __init mpc836x_mds_setup_arch(void)
93 94
94 for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;) 95 for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
95 par_io_of_config(np); 96 par_io_of_config(np);
97#ifdef CONFIG_QE_USB
98 /* Must fixup Par IO before QE GPIO chips are registered. */
99 par_io_config_pin(1, 2, 1, 0, 3, 0); /* USBOE */
100 par_io_config_pin(1, 3, 1, 0, 3, 0); /* USBTP */
101 par_io_config_pin(1, 8, 1, 0, 1, 0); /* USBTN */
102 par_io_config_pin(1, 10, 2, 0, 3, 0); /* USBRXD */
103 par_io_config_pin(1, 9, 2, 1, 3, 0); /* USBRP */
104 par_io_config_pin(1, 11, 2, 1, 3, 0); /* USBRN */
105 par_io_config_pin(2, 20, 2, 0, 1, 0); /* CLK21 */
106#endif /* CONFIG_QE_USB */
96 } 107 }
97 108
98 if ((np = of_find_compatible_node(NULL, "network", "ucc_geth")) 109 if ((np = of_find_compatible_node(NULL, "network", "ucc_geth"))
@@ -151,6 +162,70 @@ static int __init mpc836x_declare_of_platform_devices(void)
151} 162}
152machine_device_initcall(mpc836x_mds, mpc836x_declare_of_platform_devices); 163machine_device_initcall(mpc836x_mds, mpc836x_declare_of_platform_devices);
153 164
165#ifdef CONFIG_QE_USB
166static int __init mpc836x_usb_cfg(void)
167{
168 u8 __iomem *bcsr;
169 struct device_node *np;
170 const char *mode;
171 int ret = 0;
172
173 np = of_find_compatible_node(NULL, NULL, "fsl,mpc8360mds-bcsr");
174 if (!np)
175 return -ENODEV;
176
177 bcsr = of_iomap(np, 0);
178 of_node_put(np);
179 if (!bcsr)
180 return -ENOMEM;
181
182 np = of_find_compatible_node(NULL, NULL, "fsl,mpc8323-qe-usb");
183 if (!np) {
184 ret = -ENODEV;
185 goto err;
186 }
187
188#define BCSR8_TSEC1M_MASK (0x3 << 6)
189#define BCSR8_TSEC1M_RGMII (0x0 << 6)
190#define BCSR8_TSEC2M_MASK (0x3 << 4)
191#define BCSR8_TSEC2M_RGMII (0x0 << 4)
192 /*
193 * Default is GMII (2), but we should set it to RGMII (0) if we use
194 * USB (Eth PHY is in RGMII mode anyway).
195 */
196 clrsetbits_8(&bcsr[8], BCSR8_TSEC1M_MASK | BCSR8_TSEC2M_MASK,
197 BCSR8_TSEC1M_RGMII | BCSR8_TSEC2M_RGMII);
198
199#define BCSR13_USBMASK 0x0f
200#define BCSR13_nUSBEN 0x08 /* 1 - Disable, 0 - Enable */
201#define BCSR13_USBSPEED 0x04 /* 1 - Full, 0 - Low */
202#define BCSR13_USBMODE 0x02 /* 1 - Host, 0 - Function */
203#define BCSR13_nUSBVCC 0x01 /* 1 - gets VBUS, 0 - supplies VBUS */
204
205 clrsetbits_8(&bcsr[13], BCSR13_USBMASK, BCSR13_USBSPEED);
206
207 mode = of_get_property(np, "mode", NULL);
208 if (mode && !strcmp(mode, "peripheral")) {
209 setbits8(&bcsr[13], BCSR13_nUSBVCC);
210 qe_usb_clock_set(QE_CLK21, 48000000);
211 } else {
212 setbits8(&bcsr[13], BCSR13_USBMODE);
213 /*
214 * The BCSR GPIOs are used to control power and
215 * speed of the USB transceiver. This is needed for
216 * the USB Host only.
217 */
218 simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio");
219 }
220
221 of_node_put(np);
222err:
223 iounmap(bcsr);
224 return ret;
225}
226machine_arch_initcall(mpc836x_mds, mpc836x_usb_cfg);
227#endif /* CONFIG_QE_USB */
228
154static void __init mpc836x_mds_init_IRQ(void) 229static void __init mpc836x_mds_init_IRQ(void)
155{ 230{
156 struct device_node *np; 231 struct device_node *np;
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
index a5273bb28e1b..b0090aac9642 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -51,8 +51,9 @@ static void __init mpc836x_rdk_setup_arch(void)
51 for_each_compatible_node(np, "pci", "fsl,mpc8349-pci") 51 for_each_compatible_node(np, "pci", "fsl,mpc8349-pci")
52 mpc83xx_add_bridge(np); 52 mpc83xx_add_bridge(np);
53#endif 53#endif
54 54#ifdef CONFIG_QUICC_ENGINE
55 qe_reset(); 55 qe_reset();
56#endif
56} 57}
57 58
58static void __init mpc836x_rdk_init_IRQ(void) 59static void __init mpc836x_rdk_init_IRQ(void)
@@ -71,13 +72,14 @@ static void __init mpc836x_rdk_init_IRQ(void)
71 */ 72 */
72 ipic_set_default_priority(); 73 ipic_set_default_priority();
73 of_node_put(np); 74 of_node_put(np);
74 75#ifdef CONFIG_QUICC_ENGINE
75 np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic"); 76 np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
76 if (!np) 77 if (!np)
77 return; 78 return;
78 79
79 qe_ic_init(np, 0, qe_ic_cascade_low_ipic, qe_ic_cascade_high_ipic); 80 qe_ic_init(np, 0, qe_ic_cascade_low_ipic, qe_ic_cascade_high_ipic);
80 of_node_put(np); 81 of_node_put(np);
82#endif
81} 83}
82 84
83/* 85/*
diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c
index 8bb13c807142..530ef990ca7c 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c
@@ -26,7 +26,6 @@
26#define BCSR12_USB_SER_MASK 0x8a 26#define BCSR12_USB_SER_MASK 0x8a
27#define BCSR12_USB_SER_PIN 0x80 27#define BCSR12_USB_SER_PIN 0x80
28#define BCSR12_USB_SER_DEVICE 0x02 28#define BCSR12_USB_SER_DEVICE 0x02
29extern int mpc837x_usb_cfg(void);
30 29
31static int mpc837xmds_usb_cfg(void) 30static int mpc837xmds_usb_cfg(void)
32{ 31{
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index da030afa2e2c..1d096545322b 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -21,8 +21,6 @@
21 21
22#include "mpc83xx.h" 22#include "mpc83xx.h"
23 23
24extern int mpc837x_usb_cfg(void);
25
26/* ************************************************************************ 24/* ************************************************************************
27 * 25 *
28 * Setup the architecture 26 * Setup the architecture
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
index 2a7cbabb410a..83cfe51526ec 100644
--- a/arch/powerpc/platforms/83xx/mpc83xx.h
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -61,6 +61,7 @@
61 61
62extern void mpc83xx_restart(char *cmd); 62extern void mpc83xx_restart(char *cmd);
63extern long mpc83xx_time_init(void); 63extern long mpc83xx_time_init(void);
64extern int mpc837x_usb_cfg(void);
64extern int mpc834x_usb_cfg(void); 65extern int mpc834x_usb_cfg(void);
65extern int mpc831x_usb_cfg(void); 66extern int mpc831x_usb_cfg(void);
66 67
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index a8301c8ad537..7326d904202c 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -148,6 +148,9 @@ static int mpc85xx_exclude_device(struct pci_controller *hose,
148/* 148/*
149 * Setup the architecture 149 * Setup the architecture
150 */ 150 */
151#ifdef CONFIG_SMP
152extern void __init mpc85xx_smp_init(void);
153#endif
151static void __init mpc85xx_ds_setup_arch(void) 154static void __init mpc85xx_ds_setup_arch(void)
152{ 155{
153#ifdef CONFIG_PCI 156#ifdef CONFIG_PCI
@@ -173,6 +176,10 @@ static void __init mpc85xx_ds_setup_arch(void)
173 ppc_md.pci_exclude_device = mpc85xx_exclude_device; 176 ppc_md.pci_exclude_device = mpc85xx_exclude_device;
174#endif 177#endif
175 178
179#ifdef CONFIG_SMP
180 mpc85xx_smp_init();
181#endif
182
176 printk("MPC85xx DS board from Freescale Semiconductor\n"); 183 printk("MPC85xx DS board from Freescale Semiconductor\n");
177} 184}
178 185
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index d652c713f496..79a0df17078b 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -58,6 +58,7 @@ smp_85xx_kick_cpu(int nr)
58 58
59 if (cpu_rel_addr == NULL) { 59 if (cpu_rel_addr == NULL) {
60 printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr); 60 printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
61 local_irq_restore(flags);
61 return; 62 return;
62 } 63 }
63 64
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 47e956c871fe..47fe2bea9865 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -312,4 +312,15 @@ config MPC8xxx_GPIO
312 Say Y here if you're going to use hardware that connects to the 312 Say Y here if you're going to use hardware that connects to the
313 MPC831x/834x/837x/8572/8610 GPIOs. 313 MPC831x/834x/837x/8572/8610 GPIOs.
314 314
315config SIMPLE_GPIO
316 bool "Support for simple, memory-mapped GPIO controllers"
317 depends on PPC
318 select GENERIC_GPIO
319 select ARCH_REQUIRE_GPIOLIB
320 help
321 Say Y here to support simple, memory-mapped GPIO controllers.
322 These are usually BCSRs used to control board's switches, LEDs,
323 chip-selects, Ethernet/USB PHY's power and various other small
324 on-board peripherals.
325
315endmenu 326endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 3d0c776f888d..e868b5c50723 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -231,7 +231,7 @@ config VIRT_CPU_ACCOUNTING
231 If in doubt, say Y here. 231 If in doubt, say Y here.
232 232
233config SMP 233config SMP
234 depends on PPC_STD_MMU 234 depends on PPC_STD_MMU || FSL_BOOKE
235 bool "Symmetric multi-processing support" 235 bool "Symmetric multi-processing support"
236 ---help--- 236 ---help---
237 This enables support for systems with more than one CPU. If you have 237 This enables support for systems with more than one CPU. If you have
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 2e67bd840e01..35b1ec492715 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -44,8 +44,8 @@ static DEFINE_SPINLOCK(beat_htab_lock);
44 44
45static inline unsigned int beat_read_mask(unsigned hpte_group) 45static inline unsigned int beat_read_mask(unsigned hpte_group)
46{ 46{
47 unsigned long hpte_v[5];
48 unsigned long rmask = 0; 47 unsigned long rmask = 0;
48 u64 hpte_v[5];
49 49
50 beat_read_htab_entries(0, hpte_group + 0, hpte_v); 50 beat_read_htab_entries(0, hpte_group + 0, hpte_v);
51 if (!(hpte_v[0] & HPTE_V_BOLTED)) 51 if (!(hpte_v[0] & HPTE_V_BOLTED))
@@ -93,8 +93,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
93 int psize, int ssize) 93 int psize, int ssize)
94{ 94{
95 unsigned long lpar_rc; 95 unsigned long lpar_rc;
96 unsigned long slot; 96 u64 hpte_v, hpte_r, slot;
97 unsigned long hpte_v, hpte_r;
98 97
99 /* same as iseries */ 98 /* same as iseries */
100 if (vflags & HPTE_V_SECONDARY) 99 if (vflags & HPTE_V_SECONDARY)
@@ -153,8 +152,9 @@ static long beat_lpar_hpte_remove(unsigned long hpte_group)
153 152
154static unsigned long beat_lpar_hpte_getword0(unsigned long slot) 153static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
155{ 154{
156 unsigned long dword0, dword[5]; 155 unsigned long dword0;
157 unsigned long lpar_rc; 156 unsigned long lpar_rc;
157 u64 dword[5];
158 158
159 lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword); 159 lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
160 160
@@ -170,7 +170,7 @@ static void beat_lpar_hptab_clear(void)
170 unsigned long size_bytes = 1UL << ppc64_pft_size; 170 unsigned long size_bytes = 1UL << ppc64_pft_size;
171 unsigned long hpte_count = size_bytes >> 4; 171 unsigned long hpte_count = size_bytes >> 4;
172 int i; 172 int i;
173 unsigned long dummy0, dummy1; 173 u64 dummy0, dummy1;
174 174
175 /* TODO: Use bulk call */ 175 /* TODO: Use bulk call */
176 for (i = 0; i < hpte_count; i++) 176 for (i = 0; i < hpte_count; i++)
@@ -189,7 +189,8 @@ static long beat_lpar_hpte_updatepp(unsigned long slot,
189 int psize, int ssize, int local) 189 int psize, int ssize, int local)
190{ 190{
191 unsigned long lpar_rc; 191 unsigned long lpar_rc;
192 unsigned long dummy0, dummy1, want_v; 192 u64 dummy0, dummy1;
193 unsigned long want_v;
193 194
194 want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M); 195 want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
195 196
@@ -255,7 +256,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
255 unsigned long ea, 256 unsigned long ea,
256 int psize, int ssize) 257 int psize, int ssize)
257{ 258{
258 unsigned long lpar_rc, slot, vsid, va, dummy0, dummy1; 259 unsigned long lpar_rc, slot, vsid, va;
260 u64 dummy0, dummy1;
259 261
260 vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); 262 vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
261 va = (vsid << 28) | (ea & 0x0fffffff); 263 va = (vsid << 28) | (ea & 0x0fffffff);
@@ -276,7 +278,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
276{ 278{
277 unsigned long want_v; 279 unsigned long want_v;
278 unsigned long lpar_rc; 280 unsigned long lpar_rc;
279 unsigned long dummy1, dummy2; 281 u64 dummy1, dummy2;
280 unsigned long flags; 282 unsigned long flags;
281 283
282 DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n", 284 DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
@@ -315,8 +317,7 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
315 int psize, int ssize) 317 int psize, int ssize)
316{ 318{
317 unsigned long lpar_rc; 319 unsigned long lpar_rc;
318 unsigned long slot; 320 u64 hpte_v, hpte_r, slot;
319 unsigned long hpte_v, hpte_r;
320 321
321 /* same as iseries */ 322 /* same as iseries */
322 if (vflags & HPTE_V_SECONDARY) 323 if (vflags & HPTE_V_SECONDARY)
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
index 6b418f6b6175..350735bc8888 100644
--- a/arch/powerpc/platforms/cell/beat_udbg.c
+++ b/arch/powerpc/platforms/cell/beat_udbg.c
@@ -40,8 +40,8 @@ static void udbg_putc_beat(char c)
40} 40}
41 41
42/* Buffered chars getc */ 42/* Buffered chars getc */
43static long inbuflen; 43static u64 inbuflen;
44static long inbuf[2]; /* must be 2 longs */ 44static u64 inbuf[2]; /* must be 2 u64s */
45 45
46static int udbg_getc_poll_beat(void) 46static int udbg_getc_poll_beat(void)
47{ 47{
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
index 70fa7aef5edd..20472e487b6f 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
@@ -54,7 +54,7 @@ int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
54{ 54{
55 struct cbe_pmd_regs __iomem *pmd_regs; 55 struct cbe_pmd_regs __iomem *pmd_regs;
56 struct cbe_mic_tm_regs __iomem *mic_tm_regs; 56 struct cbe_mic_tm_regs __iomem *mic_tm_regs;
57 u64 flags; 57 unsigned long flags;
58 u64 value; 58 u64 value;
59#ifdef DEBUG 59#ifdef DEBUG
60 long time; 60 long time;
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 2d5bb22d6c09..28c04dab2633 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void)
148 148
149 iic = &__get_cpu_var(iic); 149 iic = &__get_cpu_var(iic);
150 *(unsigned long *) &pending = 150 *(unsigned long *) &pending =
151 in_be64((unsigned long __iomem *) &iic->regs->pending_destr); 151 in_be64((u64 __iomem *) &iic->regs->pending_destr);
152 if (!(pending.flags & CBE_IIC_IRQ_VALID)) 152 if (!(pending.flags & CBE_IIC_IRQ_VALID))
153 return NO_IRQ; 153 return NO_IRQ;
154 virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending)); 154 virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c
index b5f84e8f0899..059cad6c3f69 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/platforms/cell/io-workarounds.c
@@ -130,14 +130,14 @@ static const struct ppc_pci_io __devinitconst iowa_pci_io = {
130 130
131}; 131};
132 132
133static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size, 133static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
134 unsigned long flags) 134 unsigned long flags)
135{ 135{
136 struct iowa_bus *bus; 136 struct iowa_bus *bus;
137 void __iomem *res = __ioremap(addr, size, flags); 137 void __iomem *res = __ioremap(addr, size, flags);
138 int busno; 138 int busno;
139 139
140 bus = iowa_pci_find(0, addr); 140 bus = iowa_pci_find(0, (unsigned long)addr);
141 if (bus != NULL) { 141 if (bus != NULL) {
142 busno = bus - iowa_busses; 142 busno = bus - iowa_busses;
143 PCI_SET_ADDR_TOKEN(res, busno + 1); 143 PCI_SET_ADDR_TOKEN(res, busno + 1);
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 86db4dd170a0..88d94b59a7cb 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -150,8 +150,8 @@ static int cbe_nr_iommus;
150static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte, 150static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
151 long n_ptes) 151 long n_ptes)
152{ 152{
153 unsigned long __iomem *reg; 153 u64 __iomem *reg;
154 unsigned long val; 154 u64 val;
155 long n; 155 long n;
156 156
157 reg = iommu->xlate_regs + IOC_IOPT_CacheInvd; 157 reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 15c62d3ca129..3bf908e2873a 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -314,7 +314,7 @@ extern char *isolated_loader;
314 * we need to call spu_release(ctx) before sleeping, and 314 * we need to call spu_release(ctx) before sleeping, and
315 * then spu_acquire(ctx) when awoken. 315 * then spu_acquire(ctx) when awoken.
316 * 316 *
317 * Returns with state_mutex re-acquired when successfull or 317 * Returns with state_mutex re-acquired when successful or
318 * with -ERESTARTSYS and the state_mutex dropped when interrupted. 318 * with -ERESTARTSYS and the state_mutex dropped when interrupted.
319 */ 319 */
320 320
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index ed3753d8c109..7ddd0a2c8027 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -10,18 +10,21 @@ menu "iSeries device drivers"
10config VIODASD 10config VIODASD
11 tristate "iSeries Virtual I/O disk support" 11 tristate "iSeries Virtual I/O disk support"
12 depends on BLOCK 12 depends on BLOCK
13 select VIOPATH
13 help 14 help
14 If you are running on an iSeries system and you want to use 15 If you are running on an iSeries system and you want to use
15 virtual disks created and managed by OS/400, say Y. 16 virtual disks created and managed by OS/400, say Y.
16 17
17config VIOCD 18config VIOCD
18 tristate "iSeries Virtual I/O CD support" 19 tristate "iSeries Virtual I/O CD support"
20 select VIOPATH
19 help 21 help
20 If you are running Linux on an IBM iSeries system and you want to 22 If you are running Linux on an IBM iSeries system and you want to
21 read a CD drive owned by OS/400, say Y here. 23 read a CD drive owned by OS/400, say Y here.
22 24
23config VIOTAPE 25config VIOTAPE
24 tristate "iSeries Virtual Tape Support" 26 tristate "iSeries Virtual Tape Support"
27 select VIOPATH
25 help 28 help
26 If you are running Linux on an iSeries system and you want Linux 29 If you are running Linux on an iSeries system and you want Linux
27 to read and/or write a tape drive owned by OS/400, say Y here. 30 to read and/or write a tape drive owned by OS/400, say Y here.
@@ -30,5 +33,3 @@ endmenu
30 33
31config VIOPATH 34config VIOPATH
32 bool 35 bool
33 depends on VIODASD || VIOCD || VIOTAPE || ISERIES_VETH
34 default y
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 70b688c1aefb..24519b96d6ad 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -23,6 +23,7 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/seq_file.h> 24#include <linux/seq_file.h>
25#include <linux/kdev_t.h> 25#include <linux/kdev_t.h>
26#include <linux/kexec.h>
26#include <linux/major.h> 27#include <linux/major.h>
27#include <linux/root_dev.h> 28#include <linux/root_dev.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
@@ -638,6 +639,13 @@ static int __init iseries_probe(void)
638 return 1; 639 return 1;
639} 640}
640 641
642#ifdef CONFIG_KEXEC
643static int iseries_kexec_prepare(struct kimage *image)
644{
645 return -ENOSYS;
646}
647#endif
648
641define_machine(iseries) { 649define_machine(iseries) {
642 .name = "iSeries", 650 .name = "iSeries",
643 .setup_arch = iSeries_setup_arch, 651 .setup_arch = iSeries_setup_arch,
@@ -658,6 +666,9 @@ define_machine(iseries) {
658 .probe = iseries_probe, 666 .probe = iseries_probe,
659 .ioremap = iseries_ioremap, 667 .ioremap = iseries_ioremap,
660 .iounmap = iseries_iounmap, 668 .iounmap = iseries_iounmap,
669#ifdef CONFIG_KEXEC
670 .machine_kexec_prepare = iseries_kexec_prepare,
671#endif
661 /* XXX Implement enable_pmcs for iSeries */ 672 /* XXX Implement enable_pmcs for iSeries */
662}; 673};
663 674
diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c
index 58556b028a4c..86db47c1b665 100644
--- a/arch/powerpc/platforms/pasemi/cpufreq.c
+++ b/arch/powerpc/platforms/pasemi/cpufreq.c
@@ -112,7 +112,7 @@ static int get_gizmo_latency(void)
112 112
113static void set_astate(int cpu, unsigned int astate) 113static void set_astate(int cpu, unsigned int astate)
114{ 114{
115 u64 flags; 115 unsigned long flags;
116 116
117 /* Return if called before init has run */ 117 /* Return if called before init has run */
118 if (unlikely(!sdcasr_mapbase)) 118 if (unlikely(!sdcasr_mapbase))
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index 217af321b0ca..a6152d922243 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -509,7 +509,7 @@ fallback:
509 */ 509 */
510int pasemi_dma_init(void) 510int pasemi_dma_init(void)
511{ 511{
512 static spinlock_t init_lock = SPIN_LOCK_UNLOCKED; 512 static DEFINE_SPINLOCK(init_lock);
513 struct pci_dev *iob_pdev; 513 struct pci_dev *iob_pdev;
514 struct pci_dev *pdev; 514 struct pci_dev *pdev;
515 struct resource res; 515 struct resource res;
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 54b7b76ed4f0..04cdd32624d4 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -661,6 +661,7 @@ static void __init init_second_ohare(void)
661 pci_find_hose_for_OF_device(np); 661 pci_find_hose_for_OF_device(np);
662 if (!hose) { 662 if (!hose) {
663 printk(KERN_ERR "Can't find PCI hose for OHare2 !\n"); 663 printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
664 of_node_put(np);
664 return; 665 return;
665 } 666 }
666 early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd); 667 early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
@@ -669,6 +670,7 @@ static void __init init_second_ohare(void)
669 early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd); 670 early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
670 } 671 }
671 has_second_ohare = 1; 672 has_second_ohare = 1;
673 of_node_put(np);
672} 674}
673 675
674/* 676/*
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 59eb840d8ce2..1810e4226e56 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -265,12 +265,15 @@ int __init via_calibrate_decr(void)
265 struct resource rsrc; 265 struct resource rsrc;
266 266
267 vias = of_find_node_by_name(NULL, "via-cuda"); 267 vias = of_find_node_by_name(NULL, "via-cuda");
268 if (vias == 0) 268 if (vias == NULL)
269 vias = of_find_node_by_name(NULL, "via-pmu"); 269 vias = of_find_node_by_name(NULL, "via-pmu");
270 if (vias == 0) 270 if (vias == NULL)
271 vias = of_find_node_by_name(NULL, "via"); 271 vias = of_find_node_by_name(NULL, "via");
272 if (vias == 0 || of_address_to_resource(vias, 0, &rsrc)) 272 if (vias == NULL || of_address_to_resource(vias, 0, &rsrc)) {
273 of_node_put(vias);
273 return 0; 274 return 0;
275 }
276 of_node_put(vias);
274 via = ioremap(rsrc.start, rsrc.end - rsrc.start + 1); 277 via = ioremap(rsrc.start, rsrc.end - rsrc.start + 1);
275 if (via == NULL) { 278 if (via == NULL) {
276 printk(KERN_ERR "Failed to map VIA for timer calibration !\n"); 279 printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
@@ -297,7 +300,7 @@ int __init via_calibrate_decr(void)
297 ppc_tb_freq = (dstart - dend) * 100 / 6; 300 ppc_tb_freq = (dstart - dend) * 100 / 6;
298 301
299 iounmap(via); 302 iounmap(via);
300 303
301 return 1; 304 return 1;
302} 305}
303#endif 306#endif
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index dbc124e05646..ca71a12b764c 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -518,6 +518,41 @@ fail_device_register:
518 return result; 518 return result;
519} 519}
520 520
521static int __init ps3_register_ramdisk_device(void)
522{
523 int result;
524 struct layout {
525 struct ps3_system_bus_device dev;
526 } *p;
527
528 pr_debug(" -> %s:%d\n", __func__, __LINE__);
529
530 p = kzalloc(sizeof(struct layout), GFP_KERNEL);
531
532 if (!p)
533 return -ENOMEM;
534
535 p->dev.match_id = PS3_MATCH_ID_GPU;
536 p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK;
537 p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
538
539 result = ps3_system_bus_device_register(&p->dev);
540
541 if (result) {
542 pr_debug("%s:%d ps3_system_bus_device_register failed\n",
543 __func__, __LINE__);
544 goto fail_device_register;
545 }
546
547 pr_debug(" <- %s:%d\n", __func__, __LINE__);
548 return 0;
549
550fail_device_register:
551 kfree(p);
552 pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
553 return result;
554}
555
521/** 556/**
522 * ps3_setup_dynamic_device - Setup a dynamic device from the repository 557 * ps3_setup_dynamic_device - Setup a dynamic device from the repository
523 */ 558 */
@@ -946,6 +981,8 @@ static int __init ps3_register_devices(void)
946 981
947 ps3_register_lpm_devices(); 982 ps3_register_lpm_devices();
948 983
984 ps3_register_ramdisk_device();
985
949 pr_debug(" <- %s:%d\n", __func__, __LINE__); 986 pr_debug(" <- %s:%d\n", __func__, __LINE__);
950 return 0; 987 return 0;
951} 988}
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 5afce115ab1f..b33b28a6fe12 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_FSL_PCI) += fsl_pci.o $(fsl-msi-obj-y)
17obj-$(CONFIG_FSL_LBC) += fsl_lbc.o 17obj-$(CONFIG_FSL_LBC) += fsl_lbc.o
18obj-$(CONFIG_FSL_GTM) += fsl_gtm.o 18obj-$(CONFIG_FSL_GTM) += fsl_gtm.o
19obj-$(CONFIG_MPC8xxx_GPIO) += mpc8xxx_gpio.o 19obj-$(CONFIG_MPC8xxx_GPIO) += mpc8xxx_gpio.o
20obj-$(CONFIG_SIMPLE_GPIO) += simple_gpio.o
20obj-$(CONFIG_RAPIDIO) += fsl_rio.o 21obj-$(CONFIG_RAPIDIO) += fsl_rio.o
21obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o 22obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o
22obj-$(CONFIG_QUICC_ENGINE) += qe_lib/ 23obj-$(CONFIG_QUICC_ENGINE) += qe_lib/
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index d5f9ae0f1b75..f611d0369cc8 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -29,7 +29,8 @@
29 29
30#if defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_86xx) 30#if defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_86xx)
31/* atmu setup for fsl pci/pcie controller */ 31/* atmu setup for fsl pci/pcie controller */
32void __init setup_pci_atmu(struct pci_controller *hose, struct resource *rsrc) 32static void __init setup_pci_atmu(struct pci_controller *hose,
33 struct resource *rsrc)
33{ 34{
34 struct ccsr_pci __iomem *pci; 35 struct ccsr_pci __iomem *pci;
35 int i; 36 int i;
@@ -86,7 +87,7 @@ void __init setup_pci_atmu(struct pci_controller *hose, struct resource *rsrc)
86 out_be32(&pci->piw[2].piwar, PIWAR_2G); 87 out_be32(&pci->piw[2].piwar, PIWAR_2G);
87} 88}
88 89
89void __init setup_pci_cmd(struct pci_controller *hose) 90static void __init setup_pci_cmd(struct pci_controller *hose)
90{ 91{
91 u16 cmd; 92 u16 cmd;
92 int cap_x; 93 int cap_x;
@@ -130,7 +131,7 @@ static void __init quirk_fsl_pcie_header(struct pci_dev *dev)
130 return ; 131 return ;
131} 132}
132 133
133int __init fsl_pcie_check_link(struct pci_controller *hose) 134static int __init fsl_pcie_check_link(struct pci_controller *hose)
134{ 135{
135 u32 val; 136 u32 val;
136 early_read_config_dword(hose, 0, 0, PCIE_LTSSM, &val); 137 early_read_config_dword(hose, 0, 0, PCIE_LTSSM, &val);
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index 60f7f227327c..9c744e4285a0 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -5,8 +5,13 @@
5#include <asm/mmu.h> 5#include <asm/mmu.h>
6 6
7extern phys_addr_t get_immrbase(void); 7extern phys_addr_t get_immrbase(void);
8#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx)
8extern u32 get_brgfreq(void); 9extern u32 get_brgfreq(void);
9extern u32 get_baudrate(void); 10extern u32 get_baudrate(void);
11#else
12static inline u32 get_brgfreq(void) { return -1; }
13static inline u32 get_baudrate(void) { return -1; }
14#endif
10extern u32 fsl_get_sys_freq(void); 15extern u32 fsl_get_sys_freq(void);
11 16
12struct spi_board_info; 17struct spi_board_info;
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig
index 76ffbc48d4b9..41ac3dfac98e 100644
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ b/arch/powerpc/sysdev/qe_lib/Kconfig
@@ -22,5 +22,6 @@ config UCC
22 22
23config QE_USB 23config QE_USB
24 bool 24 bool
25 default y if USB_GADGET_FSL_QE
25 help 26 help
26 QE USB Host Controller support 27 QE USB Controller support
diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c
index 8e5a0bc36d0b..3485288dce31 100644
--- a/arch/powerpc/sysdev/qe_lib/gpio.c
+++ b/arch/powerpc/sysdev/qe_lib/gpio.c
@@ -14,6 +14,7 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/err.h>
17#include <linux/io.h> 18#include <linux/io.h>
18#include <linux/of.h> 19#include <linux/of.h>
19#include <linux/of_gpio.h> 20#include <linux/of_gpio.h>
@@ -24,8 +25,14 @@ struct qe_gpio_chip {
24 struct of_mm_gpio_chip mm_gc; 25 struct of_mm_gpio_chip mm_gc;
25 spinlock_t lock; 26 spinlock_t lock;
26 27
28 unsigned long pin_flags[QE_PIO_PINS];
29#define QE_PIN_REQUESTED 0
30
27 /* shadowed data register to clear/set bits safely */ 31 /* shadowed data register to clear/set bits safely */
28 u32 cpdata; 32 u32 cpdata;
33
34 /* saved_regs used to restore dedicated functions */
35 struct qe_pio_regs saved_regs;
29}; 36};
30 37
31static inline struct qe_gpio_chip * 38static inline struct qe_gpio_chip *
@@ -40,6 +47,12 @@ static void qe_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
40 struct qe_pio_regs __iomem *regs = mm_gc->regs; 47 struct qe_pio_regs __iomem *regs = mm_gc->regs;
41 48
42 qe_gc->cpdata = in_be32(&regs->cpdata); 49 qe_gc->cpdata = in_be32(&regs->cpdata);
50 qe_gc->saved_regs.cpdata = qe_gc->cpdata;
51 qe_gc->saved_regs.cpdir1 = in_be32(&regs->cpdir1);
52 qe_gc->saved_regs.cpdir2 = in_be32(&regs->cpdir2);
53 qe_gc->saved_regs.cppar1 = in_be32(&regs->cppar1);
54 qe_gc->saved_regs.cppar2 = in_be32(&regs->cppar2);
55 qe_gc->saved_regs.cpodr = in_be32(&regs->cpodr);
43} 56}
44 57
45static int qe_gpio_get(struct gpio_chip *gc, unsigned int gpio) 58static int qe_gpio_get(struct gpio_chip *gc, unsigned int gpio)
@@ -103,6 +116,188 @@ static int qe_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
103 return 0; 116 return 0;
104} 117}
105 118
119struct qe_pin {
120 /*
121 * The qe_gpio_chip name is unfortunate, we should change that to
122 * something like qe_pio_controller. Someday.
123 */
124 struct qe_gpio_chip *controller;
125 int num;
126};
127
128/**
129 * qe_pin_request - Request a QE pin
130 * @np: device node to get a pin from
131 * @index: index of a pin in the device tree
132 * Context: non-atomic
133 *
134 * This function return qe_pin so that you could use it with the rest of
135 * the QE Pin Multiplexing API.
136 */
137struct qe_pin *qe_pin_request(struct device_node *np, int index)
138{
139 struct qe_pin *qe_pin;
140 struct device_node *gc;
141 struct of_gpio_chip *of_gc = NULL;
142 struct of_mm_gpio_chip *mm_gc;
143 struct qe_gpio_chip *qe_gc;
144 int err;
145 int size;
146 const void *gpio_spec;
147 const u32 *gpio_cells;
148 unsigned long flags;
149
150 qe_pin = kzalloc(sizeof(*qe_pin), GFP_KERNEL);
151 if (!qe_pin) {
152 pr_debug("%s: can't allocate memory\n", __func__);
153 return ERR_PTR(-ENOMEM);
154 }
155
156 err = of_parse_phandles_with_args(np, "gpios", "#gpio-cells", index,
157 &gc, &gpio_spec);
158 if (err) {
159 pr_debug("%s: can't parse gpios property\n", __func__);
160 goto err0;
161 }
162
163 if (!of_device_is_compatible(gc, "fsl,mpc8323-qe-pario-bank")) {
164 pr_debug("%s: tried to get a non-qe pin\n", __func__);
165 err = -EINVAL;
166 goto err1;
167 }
168
169 of_gc = gc->data;
170 if (!of_gc) {
171 pr_debug("%s: gpio controller %s isn't registered\n",
172 np->full_name, gc->full_name);
173 err = -ENODEV;
174 goto err1;
175 }
176
177 gpio_cells = of_get_property(gc, "#gpio-cells", &size);
178 if (!gpio_cells || size != sizeof(*gpio_cells) ||
179 *gpio_cells != of_gc->gpio_cells) {
180 pr_debug("%s: wrong #gpio-cells for %s\n",
181 np->full_name, gc->full_name);
182 err = -EINVAL;
183 goto err1;
184 }
185
186 err = of_gc->xlate(of_gc, np, gpio_spec, NULL);
187 if (err < 0)
188 goto err1;
189
190 mm_gc = to_of_mm_gpio_chip(&of_gc->gc);
191 qe_gc = to_qe_gpio_chip(mm_gc);
192
193 spin_lock_irqsave(&qe_gc->lock, flags);
194
195 if (test_and_set_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[err]) == 0) {
196 qe_pin->controller = qe_gc;
197 qe_pin->num = err;
198 err = 0;
199 } else {
200 err = -EBUSY;
201 }
202
203 spin_unlock_irqrestore(&qe_gc->lock, flags);
204
205 if (!err)
206 return qe_pin;
207err1:
208 of_node_put(gc);
209err0:
210 kfree(qe_pin);
211 pr_debug("%s failed with status %d\n", __func__, err);
212 return ERR_PTR(err);
213}
214EXPORT_SYMBOL(qe_pin_request);
215
216/**
217 * qe_pin_free - Free a pin
218 * @qe_pin: pointer to the qe_pin structure
219 * Context: any
220 *
221 * This function frees the qe_pin structure and makes a pin available
222 * for further qe_pin_request() calls.
223 */
224void qe_pin_free(struct qe_pin *qe_pin)
225{
226 struct qe_gpio_chip *qe_gc = qe_pin->controller;
227 unsigned long flags;
228 const int pin = qe_pin->num;
229
230 spin_lock_irqsave(&qe_gc->lock, flags);
231 test_and_clear_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[pin]);
232 spin_unlock_irqrestore(&qe_gc->lock, flags);
233
234 kfree(qe_pin);
235}
236EXPORT_SYMBOL(qe_pin_free);
237
238/**
239 * qe_pin_set_dedicated - Revert a pin to a dedicated peripheral function mode
240 * @qe_pin: pointer to the qe_pin structure
241 * Context: any
242 *
243 * This function resets a pin to a dedicated peripheral function that
244 * has been set up by the firmware.
245 */
246void qe_pin_set_dedicated(struct qe_pin *qe_pin)
247{
248 struct qe_gpio_chip *qe_gc = qe_pin->controller;
249 struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs;
250 struct qe_pio_regs *sregs = &qe_gc->saved_regs;
251 int pin = qe_pin->num;
252 u32 mask1 = 1 << (QE_PIO_PINS - (pin + 1));
253 u32 mask2 = 0x3 << (QE_PIO_PINS - (pin % (QE_PIO_PINS / 2) + 1) * 2);
254 bool second_reg = pin > (QE_PIO_PINS / 2) - 1;
255 unsigned long flags;
256
257 spin_lock_irqsave(&qe_gc->lock, flags);
258
259 if (second_reg) {
260 clrsetbits_be32(&regs->cpdir2, mask2, sregs->cpdir2 & mask2);
261 clrsetbits_be32(&regs->cppar2, mask2, sregs->cppar2 & mask2);
262 } else {
263 clrsetbits_be32(&regs->cpdir1, mask2, sregs->cpdir1 & mask2);
264 clrsetbits_be32(&regs->cppar1, mask2, sregs->cppar1 & mask2);
265 }
266
267 if (sregs->cpdata & mask1)
268 qe_gc->cpdata |= mask1;
269 else
270 qe_gc->cpdata &= ~mask1;
271
272 out_be32(&regs->cpdata, qe_gc->cpdata);
273 clrsetbits_be32(&regs->cpodr, mask1, sregs->cpodr & mask1);
274
275 spin_unlock_irqrestore(&qe_gc->lock, flags);
276}
277EXPORT_SYMBOL(qe_pin_set_dedicated);
278
279/**
280 * qe_pin_set_gpio - Set a pin to the GPIO mode
281 * @qe_pin: pointer to the qe_pin structure
282 * Context: any
283 *
284 * This function sets a pin to the GPIO mode.
285 */
286void qe_pin_set_gpio(struct qe_pin *qe_pin)
287{
288 struct qe_gpio_chip *qe_gc = qe_pin->controller;
289 struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs;
290 unsigned long flags;
291
292 spin_lock_irqsave(&qe_gc->lock, flags);
293
294 /* Let's make it input by default, GPIO API is able to change that. */
295 __par_io_config_pin(regs, qe_pin->num, QE_PIO_DIR_IN, 0, 0, 0);
296
297 spin_unlock_irqrestore(&qe_gc->lock, flags);
298}
299EXPORT_SYMBOL(qe_pin_set_gpio);
300
106static int __init qe_add_gpiochips(void) 301static int __init qe_add_gpiochips(void)
107{ 302{
108 struct device_node *np; 303 struct device_node *np;
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
new file mode 100644
index 000000000000..43c4569e24b7
--- /dev/null
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -0,0 +1,155 @@
1/*
2 * Simple Memory-Mapped GPIOs
3 *
4 * Copyright (c) MontaVista Software, Inc. 2008.
5 *
6 * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/spinlock.h>
18#include <linux/types.h>
19#include <linux/ioport.h>
20#include <linux/io.h>
21#include <linux/of.h>
22#include <linux/of_gpio.h>
23#include <linux/gpio.h>
24#include <asm/prom.h>
25#include "simple_gpio.h"
26
27struct u8_gpio_chip {
28 struct of_mm_gpio_chip mm_gc;
29 spinlock_t lock;
30
31 /* shadowed data register to clear/set bits safely */
32 u8 data;
33};
34
35static struct u8_gpio_chip *to_u8_gpio_chip(struct of_mm_gpio_chip *mm_gc)
36{
37 return container_of(mm_gc, struct u8_gpio_chip, mm_gc);
38}
39
40static u8 u8_pin2mask(unsigned int pin)
41{
42 return 1 << (8 - 1 - pin);
43}
44
45static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
46{
47 struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
48
49 return in_8(mm_gc->regs) & u8_pin2mask(gpio);
50}
51
52static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
53{
54 struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
55 struct u8_gpio_chip *u8_gc = to_u8_gpio_chip(mm_gc);
56 unsigned long flags;
57
58 spin_lock_irqsave(&u8_gc->lock, flags);
59
60 if (val)
61 u8_gc->data |= u8_pin2mask(gpio);
62 else
63 u8_gc->data &= ~u8_pin2mask(gpio);
64
65 out_8(mm_gc->regs, u8_gc->data);
66
67 spin_unlock_irqrestore(&u8_gc->lock, flags);
68}
69
70static int u8_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
71{
72 return 0;
73}
74
75static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
76{
77 u8_gpio_set(gc, gpio, val);
78 return 0;
79}
80
81static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
82{
83 struct u8_gpio_chip *u8_gc = to_u8_gpio_chip(mm_gc);
84
85 u8_gc->data = in_8(mm_gc->regs);
86}
87
88static int __init u8_simple_gpiochip_add(struct device_node *np)
89{
90 int ret;
91 struct u8_gpio_chip *u8_gc;
92 struct of_mm_gpio_chip *mm_gc;
93 struct of_gpio_chip *of_gc;
94 struct gpio_chip *gc;
95
96 u8_gc = kzalloc(sizeof(*u8_gc), GFP_KERNEL);
97 if (!u8_gc)
98 return -ENOMEM;
99
100 spin_lock_init(&u8_gc->lock);
101
102 mm_gc = &u8_gc->mm_gc;
103 of_gc = &mm_gc->of_gc;
104 gc = &of_gc->gc;
105
106 mm_gc->save_regs = u8_gpio_save_regs;
107 of_gc->gpio_cells = 2;
108 gc->ngpio = 8;
109 gc->direction_input = u8_gpio_dir_in;
110 gc->direction_output = u8_gpio_dir_out;
111 gc->get = u8_gpio_get;
112 gc->set = u8_gpio_set;
113
114 ret = of_mm_gpiochip_add(np, mm_gc);
115 if (ret)
116 goto err;
117 return 0;
118err:
119 kfree(u8_gc);
120 return ret;
121}
122
123void __init simple_gpiochip_init(const char *compatible)
124{
125 struct device_node *np;
126
127 for_each_compatible_node(np, NULL, compatible) {
128 int ret;
129 struct resource r;
130
131 ret = of_address_to_resource(np, 0, &r);
132 if (ret)
133 goto err;
134
135 switch (resource_size(&r)) {
136 case 1:
137 ret = u8_simple_gpiochip_add(np);
138 if (ret)
139 goto err;
140 break;
141 default:
142 /*
143 * Whenever you need support for GPIO bank width > 1,
144 * please just turn u8_ code into huge macros, and
145 * construct needed uX_ code with it.
146 */
147 ret = -ENOSYS;
148 goto err;
149 }
150 continue;
151err:
152 pr_err("%s: registration failed, status %d\n",
153 np->full_name, ret);
154 }
155}
diff --git a/arch/powerpc/sysdev/simple_gpio.h b/arch/powerpc/sysdev/simple_gpio.h
new file mode 100644
index 000000000000..3a7b0c513c76
--- /dev/null
+++ b/arch/powerpc/sysdev/simple_gpio.h
@@ -0,0 +1,12 @@
1#ifndef __SYSDEV_SIMPLE_GPIO_H
2#define __SYSDEV_SIMPLE_GPIO_H
3
4#include <linux/errno.h>
5
6#ifdef CONFIG_SIMPLE_GPIO
7extern void simple_gpiochip_init(const char *compatible);
8#else
9static inline void simple_gpiochip_init(const char *compatible) {}
10#endif /* CONFIG_SIMPLE_GPIO */
11
12#endif /* __SYSDEV_SIMPLE_GPIO_H */
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 4f8d60586b07..8040376c4890 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -54,7 +54,8 @@ extern int __smp4m_processor_id(void);
54#define SMP_PRINTK(x) 54#define SMP_PRINTK(x)
55#endif 55#endif
56 56
57static inline unsigned long swap(volatile unsigned long *ptr, unsigned long val) 57static inline unsigned long
58swap_ulong(volatile unsigned long *ptr, unsigned long val)
58{ 59{
59 __asm__ __volatile__("swap [%1], %0\n\t" : 60 __asm__ __volatile__("swap [%1], %0\n\t" :
60 "=&r" (val), "=&r" (ptr) : 61 "=&r" (val), "=&r" (ptr) :
@@ -90,7 +91,7 @@ void __cpuinit smp4m_callin(void)
90 * to call the scheduler code. 91 * to call the scheduler code.
91 */ 92 */
92 /* Allow master to continue. */ 93 /* Allow master to continue. */
93 swap(&cpu_callin_map[cpuid], 1); 94 swap_ulong(&cpu_callin_map[cpuid], 1);
94 95
95 /* XXX: What's up with all the flushes? */ 96 /* XXX: What's up with all the flushes? */
96 local_flush_cache_all(); 97 local_flush_cache_all();
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index b0461856acfb..a4cff5d6e380 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -982,7 +982,7 @@ static int __init longhaul_init(void)
982 case 10: 982 case 10:
983 printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); 983 printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
984 default: 984 default:
985 ;; 985 ;
986 } 986 }
987 987
988 return -ENODEV; 988 return -ENODEV;
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 6b94fb7be5f2..00c46e0b40e4 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -12,9 +12,10 @@
12#include <linux/device.h> 12#include <linux/device.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/io.h>
15#include <linux/amba/bus.h> 16#include <linux/amba/bus.h>
16 17
17#include <asm/io.h> 18#include <asm/irq.h>
18#include <asm/sizes.h> 19#include <asm/sizes.h>
19 20
20#define to_amba_device(d) container_of(d, struct amba_device, dev) 21#define to_amba_device(d) container_of(d, struct amba_device, dev)
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 656448c7fef9..7f701cbe14ab 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -105,7 +105,7 @@ enum {
105 board_ahci_ign_iferr = 2, 105 board_ahci_ign_iferr = 2,
106 board_ahci_sb600 = 3, 106 board_ahci_sb600 = 3,
107 board_ahci_mv = 4, 107 board_ahci_mv = 4,
108 board_ahci_sb700 = 5, 108 board_ahci_sb700 = 5, /* for SB700 and SB800 */
109 board_ahci_mcp65 = 6, 109 board_ahci_mcp65 = 6,
110 board_ahci_nopmp = 7, 110 board_ahci_nopmp = 7,
111 111
@@ -439,7 +439,7 @@ static const struct ata_port_info ahci_port_info[] = {
439 .udma_mask = ATA_UDMA6, 439 .udma_mask = ATA_UDMA6,
440 .port_ops = &ahci_ops, 440 .port_ops = &ahci_ops,
441 }, 441 },
442 /* board_ahci_sb700 */ 442 /* board_ahci_sb700, for SB700 and SB800 */
443 { 443 {
444 AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL), 444 AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL),
445 .flags = AHCI_FLAG_COMMON, 445 .flags = AHCI_FLAG_COMMON,
@@ -2446,6 +2446,8 @@ static void ahci_print_info(struct ata_host *host)
2446 speed_s = "1.5"; 2446 speed_s = "1.5";
2447 else if (speed == 2) 2447 else if (speed == 2)
2448 speed_s = "3"; 2448 speed_s = "3";
2449 else if (speed == 3)
2450 speed_s = "6";
2449 else 2451 else
2450 speed_s = "?"; 2452 speed_s = "?";
2451 2453
@@ -2610,6 +2612,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2610 (pdev->revision == 0xa1 || pdev->revision == 0xa2)) 2612 (pdev->revision == 0xa1 || pdev->revision == 0xa2))
2611 hpriv->flags |= AHCI_HFLAG_NO_MSI; 2613 hpriv->flags |= AHCI_HFLAG_NO_MSI;
2612 2614
2615 /* SB800 does NOT need the workaround to ignore SERR_INTERNAL */
2616 if (board_id == board_ahci_sb700 && pdev->revision >= 0x40)
2617 hpriv->flags &= ~AHCI_HFLAG_IGN_SERR_INTERNAL;
2618
2613 if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev)) 2619 if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev))
2614 pci_intx(pdev, 1); 2620 pci_intx(pdev, 1);
2615 2621
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 5fdf1678d0cc..887d8f46a287 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -154,11 +154,13 @@ struct piix_map_db {
154 154
155struct piix_host_priv { 155struct piix_host_priv {
156 const int *map; 156 const int *map;
157 u32 saved_iocfg;
157 void __iomem *sidpr; 158 void __iomem *sidpr;
158}; 159};
159 160
160static int piix_init_one(struct pci_dev *pdev, 161static int piix_init_one(struct pci_dev *pdev,
161 const struct pci_device_id *ent); 162 const struct pci_device_id *ent);
163static void piix_remove_one(struct pci_dev *pdev);
162static int piix_pata_prereset(struct ata_link *link, unsigned long deadline); 164static int piix_pata_prereset(struct ata_link *link, unsigned long deadline);
163static void piix_set_piomode(struct ata_port *ap, struct ata_device *adev); 165static void piix_set_piomode(struct ata_port *ap, struct ata_device *adev);
164static void piix_set_dmamode(struct ata_port *ap, struct ata_device *adev); 166static void piix_set_dmamode(struct ata_port *ap, struct ata_device *adev);
@@ -296,7 +298,7 @@ static struct pci_driver piix_pci_driver = {
296 .name = DRV_NAME, 298 .name = DRV_NAME,
297 .id_table = piix_pci_tbl, 299 .id_table = piix_pci_tbl,
298 .probe = piix_init_one, 300 .probe = piix_init_one,
299 .remove = ata_pci_remove_one, 301 .remove = piix_remove_one,
300#ifdef CONFIG_PM 302#ifdef CONFIG_PM
301 .suspend = piix_pci_device_suspend, 303 .suspend = piix_pci_device_suspend,
302 .resume = piix_pci_device_resume, 304 .resume = piix_pci_device_resume,
@@ -308,7 +310,7 @@ static struct scsi_host_template piix_sht = {
308}; 310};
309 311
310static struct ata_port_operations piix_pata_ops = { 312static struct ata_port_operations piix_pata_ops = {
311 .inherits = &ata_bmdma_port_ops, 313 .inherits = &ata_bmdma32_port_ops,
312 .cable_detect = ata_cable_40wire, 314 .cable_detect = ata_cable_40wire,
313 .set_piomode = piix_set_piomode, 315 .set_piomode = piix_set_piomode,
314 .set_dmamode = piix_set_dmamode, 316 .set_dmamode = piix_set_dmamode,
@@ -610,8 +612,9 @@ static const struct ich_laptop ich_laptop[] = {
610static int ich_pata_cable_detect(struct ata_port *ap) 612static int ich_pata_cable_detect(struct ata_port *ap)
611{ 613{
612 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 614 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
615 struct piix_host_priv *hpriv = ap->host->private_data;
613 const struct ich_laptop *lap = &ich_laptop[0]; 616 const struct ich_laptop *lap = &ich_laptop[0];
614 u8 tmp, mask; 617 u8 mask;
615 618
616 /* Check for specials - Acer Aspire 5602WLMi */ 619 /* Check for specials - Acer Aspire 5602WLMi */
617 while (lap->device) { 620 while (lap->device) {
@@ -625,8 +628,7 @@ static int ich_pata_cable_detect(struct ata_port *ap)
625 628
626 /* check BIOS cable detect results */ 629 /* check BIOS cable detect results */
627 mask = ap->port_no == 0 ? PIIX_80C_PRI : PIIX_80C_SEC; 630 mask = ap->port_no == 0 ? PIIX_80C_PRI : PIIX_80C_SEC;
628 pci_read_config_byte(pdev, PIIX_IOCFG, &tmp); 631 if ((hpriv->saved_iocfg & mask) == 0)
629 if ((tmp & mask) == 0)
630 return ATA_CBL_PATA40; 632 return ATA_CBL_PATA40;
631 return ATA_CBL_PATA80; 633 return ATA_CBL_PATA80;
632} 634}
@@ -1350,7 +1352,7 @@ static int __devinit piix_init_sidpr(struct ata_host *host)
1350 return 0; 1352 return 0;
1351} 1353}
1352 1354
1353static void piix_iocfg_bit18_quirk(struct pci_dev *pdev) 1355static void piix_iocfg_bit18_quirk(struct ata_host *host)
1354{ 1356{
1355 static const struct dmi_system_id sysids[] = { 1357 static const struct dmi_system_id sysids[] = {
1356 { 1358 {
@@ -1367,7 +1369,8 @@ static void piix_iocfg_bit18_quirk(struct pci_dev *pdev)
1367 1369
1368 { } /* terminate list */ 1370 { } /* terminate list */
1369 }; 1371 };
1370 u32 iocfg; 1372 struct pci_dev *pdev = to_pci_dev(host->dev);
1373 struct piix_host_priv *hpriv = host->private_data;
1371 1374
1372 if (!dmi_check_system(sysids)) 1375 if (!dmi_check_system(sysids))
1373 return; 1376 return;
@@ -1376,12 +1379,11 @@ static void piix_iocfg_bit18_quirk(struct pci_dev *pdev)
1376 * seem to use it to disable a channel. Clear the bit on the 1379 * seem to use it to disable a channel. Clear the bit on the
1377 * affected systems. 1380 * affected systems.
1378 */ 1381 */
1379 pci_read_config_dword(pdev, PIIX_IOCFG, &iocfg); 1382 if (hpriv->saved_iocfg & (1 << 18)) {
1380 if (iocfg & (1 << 18)) {
1381 dev_printk(KERN_INFO, &pdev->dev, 1383 dev_printk(KERN_INFO, &pdev->dev,
1382 "applying IOCFG bit18 quirk\n"); 1384 "applying IOCFG bit18 quirk\n");
1383 iocfg &= ~(1 << 18); 1385 pci_write_config_dword(pdev, PIIX_IOCFG,
1384 pci_write_config_dword(pdev, PIIX_IOCFG, iocfg); 1386 hpriv->saved_iocfg & ~(1 << 18));
1385 } 1387 }
1386} 1388}
1387 1389
@@ -1430,6 +1432,17 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
1430 if (rc) 1432 if (rc)
1431 return rc; 1433 return rc;
1432 1434
1435 hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
1436 if (!hpriv)
1437 return -ENOMEM;
1438
1439 /* Save IOCFG, this will be used for cable detection, quirk
1440 * detection and restoration on detach. This is necessary
1441 * because some ACPI implementations mess up cable related
1442 * bits on _STM. Reported on kernel bz#11879.
1443 */
1444 pci_read_config_dword(pdev, PIIX_IOCFG, &hpriv->saved_iocfg);
1445
1433 /* ICH6R may be driven by either ata_piix or ahci driver 1446 /* ICH6R may be driven by either ata_piix or ahci driver
1434 * regardless of BIOS configuration. Make sure AHCI mode is 1447 * regardless of BIOS configuration. Make sure AHCI mode is
1435 * off. 1448 * off.
@@ -1441,10 +1454,6 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
1441 } 1454 }
1442 1455
1443 /* SATA map init can change port_info, do it before prepping host */ 1456 /* SATA map init can change port_info, do it before prepping host */
1444 hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
1445 if (!hpriv)
1446 return -ENOMEM;
1447
1448 if (port_flags & ATA_FLAG_SATA) 1457 if (port_flags & ATA_FLAG_SATA)
1449 hpriv->map = piix_init_sata_map(pdev, port_info, 1458 hpriv->map = piix_init_sata_map(pdev, port_info,
1450 piix_map_db_table[ent->driver_data]); 1459 piix_map_db_table[ent->driver_data]);
@@ -1463,7 +1472,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
1463 } 1472 }
1464 1473
1465 /* apply IOCFG bit18 quirk */ 1474 /* apply IOCFG bit18 quirk */
1466 piix_iocfg_bit18_quirk(pdev); 1475 piix_iocfg_bit18_quirk(host);
1467 1476
1468 /* On ICH5, some BIOSen disable the interrupt using the 1477 /* On ICH5, some BIOSen disable the interrupt using the
1469 * PCI_COMMAND_INTX_DISABLE bit added in PCI 2.3. 1478 * PCI_COMMAND_INTX_DISABLE bit added in PCI 2.3.
@@ -1488,6 +1497,16 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
1488 return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht); 1497 return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht);
1489} 1498}
1490 1499
1500static void piix_remove_one(struct pci_dev *pdev)
1501{
1502 struct ata_host *host = dev_get_drvdata(&pdev->dev);
1503 struct piix_host_priv *hpriv = host->private_data;
1504
1505 pci_write_config_dword(pdev, PIIX_IOCFG, hpriv->saved_iocfg);
1506
1507 ata_pci_remove_one(pdev);
1508}
1509
1491static int __init piix_init(void) 1510static int __init piix_init(void)
1492{ 1511{
1493 int rc; 1512 int rc;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index f178a450ec08..175df54eb664 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1007,6 +1007,7 @@ static const char *sata_spd_string(unsigned int spd)
1007 static const char * const spd_str[] = { 1007 static const char * const spd_str[] = {
1008 "1.5 Gbps", 1008 "1.5 Gbps",
1009 "3.0 Gbps", 1009 "3.0 Gbps",
1010 "6.0 Gbps",
1010 }; 1011 };
1011 1012
1012 if (spd == 0 || (spd - 1) >= ARRAY_SIZE(spd_str)) 1013 if (spd == 0 || (spd - 1) >= ARRAY_SIZE(spd_str))
@@ -2000,6 +2001,10 @@ unsigned int ata_pio_need_iordy(const struct ata_device *adev)
2000 as the caller should know this */ 2001 as the caller should know this */
2001 if (adev->link->ap->flags & ATA_FLAG_NO_IORDY) 2002 if (adev->link->ap->flags & ATA_FLAG_NO_IORDY)
2002 return 0; 2003 return 0;
2004 /* CF spec. r4.1 Table 22 says no iordy on PIO5 and PIO6. */
2005 if (ata_id_is_cfa(adev->id)
2006 && (adev->pio_mode == XFER_PIO_5 || adev->pio_mode == XFER_PIO_6))
2007 return 0;
2003 /* PIO3 and higher it is mandatory */ 2008 /* PIO3 and higher it is mandatory */
2004 if (adev->pio_mode > XFER_PIO_2) 2009 if (adev->pio_mode > XFER_PIO_2)
2005 return 1; 2010 return 1;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 9033d164c4ec..c59ad76c84b1 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -66,6 +66,7 @@ const struct ata_port_operations ata_sff_port_ops = {
66 66
67 .port_start = ata_sff_port_start, 67 .port_start = ata_sff_port_start,
68}; 68};
69EXPORT_SYMBOL_GPL(ata_sff_port_ops);
69 70
70const struct ata_port_operations ata_bmdma_port_ops = { 71const struct ata_port_operations ata_bmdma_port_ops = {
71 .inherits = &ata_sff_port_ops, 72 .inherits = &ata_sff_port_ops,
@@ -77,6 +78,14 @@ const struct ata_port_operations ata_bmdma_port_ops = {
77 .bmdma_stop = ata_bmdma_stop, 78 .bmdma_stop = ata_bmdma_stop,
78 .bmdma_status = ata_bmdma_status, 79 .bmdma_status = ata_bmdma_status,
79}; 80};
81EXPORT_SYMBOL_GPL(ata_bmdma_port_ops);
82
83const struct ata_port_operations ata_bmdma32_port_ops = {
84 .inherits = &ata_bmdma_port_ops,
85
86 .sff_data_xfer = ata_sff_data_xfer32,
87};
88EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops);
80 89
81/** 90/**
82 * ata_fill_sg - Fill PCI IDE PRD table 91 * ata_fill_sg - Fill PCI IDE PRD table
@@ -166,8 +175,9 @@ static void ata_fill_sg_dumb(struct ata_queued_cmd *qc)
166 blen = len & 0xffff; 175 blen = len & 0xffff;
167 ap->prd[pi].addr = cpu_to_le32(addr); 176 ap->prd[pi].addr = cpu_to_le32(addr);
168 if (blen == 0) { 177 if (blen == 0) {
169 /* Some PATA chipsets like the CS5530 can't 178 /* Some PATA chipsets like the CS5530 can't
170 cope with 0x0000 meaning 64K as the spec says */ 179 cope with 0x0000 meaning 64K as the spec
180 says */
171 ap->prd[pi].flags_len = cpu_to_le32(0x8000); 181 ap->prd[pi].flags_len = cpu_to_le32(0x8000);
172 blen = 0x8000; 182 blen = 0x8000;
173 ap->prd[++pi].addr = cpu_to_le32(addr + 0x8000); 183 ap->prd[++pi].addr = cpu_to_le32(addr + 0x8000);
@@ -200,6 +210,7 @@ void ata_sff_qc_prep(struct ata_queued_cmd *qc)
200 210
201 ata_fill_sg(qc); 211 ata_fill_sg(qc);
202} 212}
213EXPORT_SYMBOL_GPL(ata_sff_qc_prep);
203 214
204/** 215/**
205 * ata_sff_dumb_qc_prep - Prepare taskfile for submission 216 * ata_sff_dumb_qc_prep - Prepare taskfile for submission
@@ -217,6 +228,7 @@ void ata_sff_dumb_qc_prep(struct ata_queued_cmd *qc)
217 228
218 ata_fill_sg_dumb(qc); 229 ata_fill_sg_dumb(qc);
219} 230}
231EXPORT_SYMBOL_GPL(ata_sff_dumb_qc_prep);
220 232
221/** 233/**
222 * ata_sff_check_status - Read device status reg & clear interrupt 234 * ata_sff_check_status - Read device status reg & clear interrupt
@@ -233,6 +245,7 @@ u8 ata_sff_check_status(struct ata_port *ap)
233{ 245{
234 return ioread8(ap->ioaddr.status_addr); 246 return ioread8(ap->ioaddr.status_addr);
235} 247}
248EXPORT_SYMBOL_GPL(ata_sff_check_status);
236 249
237/** 250/**
238 * ata_sff_altstatus - Read device alternate status reg 251 * ata_sff_altstatus - Read device alternate status reg
@@ -275,7 +288,7 @@ static u8 ata_sff_irq_status(struct ata_port *ap)
275 status = ata_sff_altstatus(ap); 288 status = ata_sff_altstatus(ap);
276 /* Not us: We are busy */ 289 /* Not us: We are busy */
277 if (status & ATA_BUSY) 290 if (status & ATA_BUSY)
278 return status; 291 return status;
279 } 292 }
280 /* Clear INTRQ latch */ 293 /* Clear INTRQ latch */
281 status = ap->ops->sff_check_status(ap); 294 status = ap->ops->sff_check_status(ap);
@@ -319,6 +332,7 @@ void ata_sff_pause(struct ata_port *ap)
319 ata_sff_sync(ap); 332 ata_sff_sync(ap);
320 ndelay(400); 333 ndelay(400);
321} 334}
335EXPORT_SYMBOL_GPL(ata_sff_pause);
322 336
323/** 337/**
324 * ata_sff_dma_pause - Pause before commencing DMA 338 * ata_sff_dma_pause - Pause before commencing DMA
@@ -327,7 +341,7 @@ void ata_sff_pause(struct ata_port *ap)
327 * Perform I/O fencing and ensure sufficient cycle delays occur 341 * Perform I/O fencing and ensure sufficient cycle delays occur
328 * for the HDMA1:0 transition 342 * for the HDMA1:0 transition
329 */ 343 */
330 344
331void ata_sff_dma_pause(struct ata_port *ap) 345void ata_sff_dma_pause(struct ata_port *ap)
332{ 346{
333 if (ap->ops->sff_check_altstatus || ap->ioaddr.altstatus_addr) { 347 if (ap->ops->sff_check_altstatus || ap->ioaddr.altstatus_addr) {
@@ -341,6 +355,7 @@ void ata_sff_dma_pause(struct ata_port *ap)
341 corruption. */ 355 corruption. */
342 BUG(); 356 BUG();
343} 357}
358EXPORT_SYMBOL_GPL(ata_sff_dma_pause);
344 359
345/** 360/**
346 * ata_sff_busy_sleep - sleep until BSY clears, or timeout 361 * ata_sff_busy_sleep - sleep until BSY clears, or timeout
@@ -396,6 +411,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
396 411
397 return 0; 412 return 0;
398} 413}
414EXPORT_SYMBOL_GPL(ata_sff_busy_sleep);
399 415
400static int ata_sff_check_ready(struct ata_link *link) 416static int ata_sff_check_ready(struct ata_link *link)
401{ 417{
@@ -422,6 +438,7 @@ int ata_sff_wait_ready(struct ata_link *link, unsigned long deadline)
422{ 438{
423 return ata_wait_ready(link, deadline, ata_sff_check_ready); 439 return ata_wait_ready(link, deadline, ata_sff_check_ready);
424} 440}
441EXPORT_SYMBOL_GPL(ata_sff_wait_ready);
425 442
426/** 443/**
427 * ata_sff_dev_select - Select device 0/1 on ATA bus 444 * ata_sff_dev_select - Select device 0/1 on ATA bus
@@ -449,6 +466,7 @@ void ata_sff_dev_select(struct ata_port *ap, unsigned int device)
449 iowrite8(tmp, ap->ioaddr.device_addr); 466 iowrite8(tmp, ap->ioaddr.device_addr);
450 ata_sff_pause(ap); /* needed; also flushes, for mmio */ 467 ata_sff_pause(ap); /* needed; also flushes, for mmio */
451} 468}
469EXPORT_SYMBOL_GPL(ata_sff_dev_select);
452 470
453/** 471/**
454 * ata_dev_select - Select device 0/1 on ATA bus 472 * ata_dev_select - Select device 0/1 on ATA bus
@@ -513,6 +531,7 @@ u8 ata_sff_irq_on(struct ata_port *ap)
513 531
514 return tmp; 532 return tmp;
515} 533}
534EXPORT_SYMBOL_GPL(ata_sff_irq_on);
516 535
517/** 536/**
518 * ata_sff_irq_clear - Clear PCI IDE BMDMA interrupt. 537 * ata_sff_irq_clear - Clear PCI IDE BMDMA interrupt.
@@ -534,6 +553,7 @@ void ata_sff_irq_clear(struct ata_port *ap)
534 553
535 iowrite8(ioread8(mmio + ATA_DMA_STATUS), mmio + ATA_DMA_STATUS); 554 iowrite8(ioread8(mmio + ATA_DMA_STATUS), mmio + ATA_DMA_STATUS);
536} 555}
556EXPORT_SYMBOL_GPL(ata_sff_irq_clear);
537 557
538/** 558/**
539 * ata_sff_tf_load - send taskfile registers to host controller 559 * ata_sff_tf_load - send taskfile registers to host controller
@@ -593,6 +613,7 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
593 613
594 ata_wait_idle(ap); 614 ata_wait_idle(ap);
595} 615}
616EXPORT_SYMBOL_GPL(ata_sff_tf_load);
596 617
597/** 618/**
598 * ata_sff_tf_read - input device's ATA taskfile shadow registers 619 * ata_sff_tf_read - input device's ATA taskfile shadow registers
@@ -633,6 +654,7 @@ void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
633 WARN_ON(1); 654 WARN_ON(1);
634 } 655 }
635} 656}
657EXPORT_SYMBOL_GPL(ata_sff_tf_read);
636 658
637/** 659/**
638 * ata_sff_exec_command - issue ATA command to host controller 660 * ata_sff_exec_command - issue ATA command to host controller
@@ -652,6 +674,7 @@ void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf)
652 iowrite8(tf->command, ap->ioaddr.command_addr); 674 iowrite8(tf->command, ap->ioaddr.command_addr);
653 ata_sff_pause(ap); 675 ata_sff_pause(ap);
654} 676}
677EXPORT_SYMBOL_GPL(ata_sff_exec_command);
655 678
656/** 679/**
657 * ata_tf_to_host - issue ATA taskfile to host controller 680 * ata_tf_to_host - issue ATA taskfile to host controller
@@ -717,6 +740,53 @@ unsigned int ata_sff_data_xfer(struct ata_device *dev, unsigned char *buf,
717 740
718 return words << 1; 741 return words << 1;
719} 742}
743EXPORT_SYMBOL_GPL(ata_sff_data_xfer);
744
745/**
746 * ata_sff_data_xfer32 - Transfer data by PIO
747 * @dev: device to target
748 * @buf: data buffer
749 * @buflen: buffer length
750 * @rw: read/write
751 *
752 * Transfer data from/to the device data register by PIO using 32bit
753 * I/O operations.
754 *
755 * LOCKING:
756 * Inherited from caller.
757 *
758 * RETURNS:
759 * Bytes consumed.
760 */
761
762unsigned int ata_sff_data_xfer32(struct ata_device *dev, unsigned char *buf,
763 unsigned int buflen, int rw)
764{
765 struct ata_port *ap = dev->link->ap;
766 void __iomem *data_addr = ap->ioaddr.data_addr;
767 unsigned int words = buflen >> 2;
768 int slop = buflen & 3;
769
770 /* Transfer multiple of 4 bytes */
771 if (rw == READ)
772 ioread32_rep(data_addr, buf, words);
773 else
774 iowrite32_rep(data_addr, buf, words);
775
776 if (unlikely(slop)) {
777 __le32 pad;
778 if (rw == READ) {
779 pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
780 memcpy(buf + buflen - slop, &pad, slop);
781 } else {
782 memcpy(&pad, buf + buflen - slop, slop);
783 iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr);
784 }
785 words++;
786 }
787 return words << 2;
788}
789EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
720 790
721/** 791/**
722 * ata_sff_data_xfer_noirq - Transfer data by PIO 792 * ata_sff_data_xfer_noirq - Transfer data by PIO
@@ -746,6 +816,7 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf,
746 816
747 return consumed; 817 return consumed;
748} 818}
819EXPORT_SYMBOL_GPL(ata_sff_data_xfer_noirq);
749 820
750/** 821/**
751 * ata_pio_sector - Transfer a sector of data. 822 * ata_pio_sector - Transfer a sector of data.
@@ -922,13 +993,15 @@ next_sg:
922 buf = kmap_atomic(page, KM_IRQ0); 993 buf = kmap_atomic(page, KM_IRQ0);
923 994
924 /* do the actual data transfer */ 995 /* do the actual data transfer */
925 consumed = ap->ops->sff_data_xfer(dev, buf + offset, count, rw); 996 consumed = ap->ops->sff_data_xfer(dev, buf + offset,
997 count, rw);
926 998
927 kunmap_atomic(buf, KM_IRQ0); 999 kunmap_atomic(buf, KM_IRQ0);
928 local_irq_restore(flags); 1000 local_irq_restore(flags);
929 } else { 1001 } else {
930 buf = page_address(page); 1002 buf = page_address(page);
931 consumed = ap->ops->sff_data_xfer(dev, buf + offset, count, rw); 1003 consumed = ap->ops->sff_data_xfer(dev, buf + offset,
1004 count, rw);
932 } 1005 }
933 1006
934 bytes -= min(bytes, consumed); 1007 bytes -= min(bytes, consumed);
@@ -1013,18 +1086,19 @@ static void atapi_pio_bytes(struct ata_queued_cmd *qc)
1013 * RETURNS: 1086 * RETURNS:
1014 * 1 if ok in workqueue, 0 otherwise. 1087 * 1 if ok in workqueue, 0 otherwise.
1015 */ 1088 */
1016static inline int ata_hsm_ok_in_wq(struct ata_port *ap, struct ata_queued_cmd *qc) 1089static inline int ata_hsm_ok_in_wq(struct ata_port *ap,
1090 struct ata_queued_cmd *qc)
1017{ 1091{
1018 if (qc->tf.flags & ATA_TFLAG_POLLING) 1092 if (qc->tf.flags & ATA_TFLAG_POLLING)
1019 return 1; 1093 return 1;
1020 1094
1021 if (ap->hsm_task_state == HSM_ST_FIRST) { 1095 if (ap->hsm_task_state == HSM_ST_FIRST) {
1022 if (qc->tf.protocol == ATA_PROT_PIO && 1096 if (qc->tf.protocol == ATA_PROT_PIO &&
1023 (qc->tf.flags & ATA_TFLAG_WRITE)) 1097 (qc->tf.flags & ATA_TFLAG_WRITE))
1024 return 1; 1098 return 1;
1025 1099
1026 if (ata_is_atapi(qc->tf.protocol) && 1100 if (ata_is_atapi(qc->tf.protocol) &&
1027 !(qc->dev->flags & ATA_DFLAG_CDB_INTR)) 1101 !(qc->dev->flags & ATA_DFLAG_CDB_INTR))
1028 return 1; 1102 return 1;
1029 } 1103 }
1030 1104
@@ -1338,6 +1412,7 @@ fsm_start:
1338 1412
1339 return poll_next; 1413 return poll_next;
1340} 1414}
1415EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
1341 1416
1342void ata_pio_task(struct work_struct *work) 1417void ata_pio_task(struct work_struct *work)
1343{ 1418{
@@ -1507,6 +1582,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
1507 1582
1508 return 0; 1583 return 0;
1509} 1584}
1585EXPORT_SYMBOL_GPL(ata_sff_qc_issue);
1510 1586
1511/** 1587/**
1512 * ata_sff_qc_fill_rtf - fill result TF using ->sff_tf_read 1588 * ata_sff_qc_fill_rtf - fill result TF using ->sff_tf_read
@@ -1526,6 +1602,7 @@ bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc)
1526 qc->ap->ops->sff_tf_read(qc->ap, &qc->result_tf); 1602 qc->ap->ops->sff_tf_read(qc->ap, &qc->result_tf);
1527 return true; 1603 return true;
1528} 1604}
1605EXPORT_SYMBOL_GPL(ata_sff_qc_fill_rtf);
1529 1606
1530/** 1607/**
1531 * ata_sff_host_intr - Handle host interrupt for given (port, task) 1608 * ata_sff_host_intr - Handle host interrupt for given (port, task)
@@ -1623,6 +1700,7 @@ idle_irq:
1623#endif 1700#endif
1624 return 0; /* irq not handled */ 1701 return 0; /* irq not handled */
1625} 1702}
1703EXPORT_SYMBOL_GPL(ata_sff_host_intr);
1626 1704
1627/** 1705/**
1628 * ata_sff_interrupt - Default ATA host interrupt handler 1706 * ata_sff_interrupt - Default ATA host interrupt handler
@@ -1667,6 +1745,7 @@ irqreturn_t ata_sff_interrupt(int irq, void *dev_instance)
1667 1745
1668 return IRQ_RETVAL(handled); 1746 return IRQ_RETVAL(handled);
1669} 1747}
1748EXPORT_SYMBOL_GPL(ata_sff_interrupt);
1670 1749
1671/** 1750/**
1672 * ata_sff_freeze - Freeze SFF controller port 1751 * ata_sff_freeze - Freeze SFF controller port
@@ -1695,6 +1774,7 @@ void ata_sff_freeze(struct ata_port *ap)
1695 1774
1696 ap->ops->sff_irq_clear(ap); 1775 ap->ops->sff_irq_clear(ap);
1697} 1776}
1777EXPORT_SYMBOL_GPL(ata_sff_freeze);
1698 1778
1699/** 1779/**
1700 * ata_sff_thaw - Thaw SFF controller port 1780 * ata_sff_thaw - Thaw SFF controller port
@@ -1712,6 +1792,7 @@ void ata_sff_thaw(struct ata_port *ap)
1712 ap->ops->sff_irq_clear(ap); 1792 ap->ops->sff_irq_clear(ap);
1713 ap->ops->sff_irq_on(ap); 1793 ap->ops->sff_irq_on(ap);
1714} 1794}
1795EXPORT_SYMBOL_GPL(ata_sff_thaw);
1715 1796
1716/** 1797/**
1717 * ata_sff_prereset - prepare SFF link for reset 1798 * ata_sff_prereset - prepare SFF link for reset
@@ -1753,6 +1834,7 @@ int ata_sff_prereset(struct ata_link *link, unsigned long deadline)
1753 1834
1754 return 0; 1835 return 0;
1755} 1836}
1837EXPORT_SYMBOL_GPL(ata_sff_prereset);
1756 1838
1757/** 1839/**
1758 * ata_devchk - PATA device presence detection 1840 * ata_devchk - PATA device presence detection
@@ -1865,6 +1947,7 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
1865 1947
1866 return class; 1948 return class;
1867} 1949}
1950EXPORT_SYMBOL_GPL(ata_sff_dev_classify);
1868 1951
1869/** 1952/**
1870 * ata_sff_wait_after_reset - wait for devices to become ready after reset 1953 * ata_sff_wait_after_reset - wait for devices to become ready after reset
@@ -1941,6 +2024,7 @@ int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask,
1941 2024
1942 return ret; 2025 return ret;
1943} 2026}
2027EXPORT_SYMBOL_GPL(ata_sff_wait_after_reset);
1944 2028
1945static int ata_bus_softreset(struct ata_port *ap, unsigned int devmask, 2029static int ata_bus_softreset(struct ata_port *ap, unsigned int devmask,
1946 unsigned long deadline) 2030 unsigned long deadline)
@@ -2013,6 +2097,7 @@ int ata_sff_softreset(struct ata_link *link, unsigned int *classes,
2013 DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]); 2097 DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]);
2014 return 0; 2098 return 0;
2015} 2099}
2100EXPORT_SYMBOL_GPL(ata_sff_softreset);
2016 2101
2017/** 2102/**
2018 * sata_sff_hardreset - reset host port via SATA phy reset 2103 * sata_sff_hardreset - reset host port via SATA phy reset
@@ -2045,6 +2130,7 @@ int sata_sff_hardreset(struct ata_link *link, unsigned int *class,
2045 DPRINTK("EXIT, class=%u\n", *class); 2130 DPRINTK("EXIT, class=%u\n", *class);
2046 return rc; 2131 return rc;
2047} 2132}
2133EXPORT_SYMBOL_GPL(sata_sff_hardreset);
2048 2134
2049/** 2135/**
2050 * ata_sff_postreset - SFF postreset callback 2136 * ata_sff_postreset - SFF postreset callback
@@ -2080,6 +2166,7 @@ void ata_sff_postreset(struct ata_link *link, unsigned int *classes)
2080 if (ap->ioaddr.ctl_addr) 2166 if (ap->ioaddr.ctl_addr)
2081 iowrite8(ap->ctl, ap->ioaddr.ctl_addr); 2167 iowrite8(ap->ctl, ap->ioaddr.ctl_addr);
2082} 2168}
2169EXPORT_SYMBOL_GPL(ata_sff_postreset);
2083 2170
2084/** 2171/**
2085 * ata_sff_error_handler - Stock error handler for BMDMA controller 2172 * ata_sff_error_handler - Stock error handler for BMDMA controller
@@ -2152,6 +2239,7 @@ void ata_sff_error_handler(struct ata_port *ap)
2152 ata_do_eh(ap, ap->ops->prereset, softreset, hardreset, 2239 ata_do_eh(ap, ap->ops->prereset, softreset, hardreset,
2153 ap->ops->postreset); 2240 ap->ops->postreset);
2154} 2241}
2242EXPORT_SYMBOL_GPL(ata_sff_error_handler);
2155 2243
2156/** 2244/**
2157 * ata_sff_post_internal_cmd - Stock post_internal_cmd for SFF controller 2245 * ata_sff_post_internal_cmd - Stock post_internal_cmd for SFF controller
@@ -2174,6 +2262,7 @@ void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc)
2174 2262
2175 spin_unlock_irqrestore(ap->lock, flags); 2263 spin_unlock_irqrestore(ap->lock, flags);
2176} 2264}
2265EXPORT_SYMBOL_GPL(ata_sff_post_internal_cmd);
2177 2266
2178/** 2267/**
2179 * ata_sff_port_start - Set port up for dma. 2268 * ata_sff_port_start - Set port up for dma.
@@ -2194,6 +2283,7 @@ int ata_sff_port_start(struct ata_port *ap)
2194 return ata_port_start(ap); 2283 return ata_port_start(ap);
2195 return 0; 2284 return 0;
2196} 2285}
2286EXPORT_SYMBOL_GPL(ata_sff_port_start);
2197 2287
2198/** 2288/**
2199 * ata_sff_std_ports - initialize ioaddr with standard port offsets. 2289 * ata_sff_std_ports - initialize ioaddr with standard port offsets.
@@ -2219,6 +2309,7 @@ void ata_sff_std_ports(struct ata_ioports *ioaddr)
2219 ioaddr->status_addr = ioaddr->cmd_addr + ATA_REG_STATUS; 2309 ioaddr->status_addr = ioaddr->cmd_addr + ATA_REG_STATUS;
2220 ioaddr->command_addr = ioaddr->cmd_addr + ATA_REG_CMD; 2310 ioaddr->command_addr = ioaddr->cmd_addr + ATA_REG_CMD;
2221} 2311}
2312EXPORT_SYMBOL_GPL(ata_sff_std_ports);
2222 2313
2223unsigned long ata_bmdma_mode_filter(struct ata_device *adev, 2314unsigned long ata_bmdma_mode_filter(struct ata_device *adev,
2224 unsigned long xfer_mask) 2315 unsigned long xfer_mask)
@@ -2230,6 +2321,7 @@ unsigned long ata_bmdma_mode_filter(struct ata_device *adev,
2230 xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA); 2321 xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
2231 return xfer_mask; 2322 return xfer_mask;
2232} 2323}
2324EXPORT_SYMBOL_GPL(ata_bmdma_mode_filter);
2233 2325
2234/** 2326/**
2235 * ata_bmdma_setup - Set up PCI IDE BMDMA transaction 2327 * ata_bmdma_setup - Set up PCI IDE BMDMA transaction
@@ -2258,6 +2350,7 @@ void ata_bmdma_setup(struct ata_queued_cmd *qc)
2258 /* issue r/w command */ 2350 /* issue r/w command */
2259 ap->ops->sff_exec_command(ap, &qc->tf); 2351 ap->ops->sff_exec_command(ap, &qc->tf);
2260} 2352}
2353EXPORT_SYMBOL_GPL(ata_bmdma_setup);
2261 2354
2262/** 2355/**
2263 * ata_bmdma_start - Start a PCI IDE BMDMA transaction 2356 * ata_bmdma_start - Start a PCI IDE BMDMA transaction
@@ -2290,6 +2383,7 @@ void ata_bmdma_start(struct ata_queued_cmd *qc)
2290 * unneccessarily delayed for MMIO 2383 * unneccessarily delayed for MMIO
2291 */ 2384 */
2292} 2385}
2386EXPORT_SYMBOL_GPL(ata_bmdma_start);
2293 2387
2294/** 2388/**
2295 * ata_bmdma_stop - Stop PCI IDE BMDMA transfer 2389 * ata_bmdma_stop - Stop PCI IDE BMDMA transfer
@@ -2314,6 +2408,7 @@ void ata_bmdma_stop(struct ata_queued_cmd *qc)
2314 /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */ 2408 /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
2315 ata_sff_dma_pause(ap); 2409 ata_sff_dma_pause(ap);
2316} 2410}
2411EXPORT_SYMBOL_GPL(ata_bmdma_stop);
2317 2412
2318/** 2413/**
2319 * ata_bmdma_status - Read PCI IDE BMDMA status 2414 * ata_bmdma_status - Read PCI IDE BMDMA status
@@ -2330,6 +2425,7 @@ u8 ata_bmdma_status(struct ata_port *ap)
2330{ 2425{
2331 return ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS); 2426 return ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
2332} 2427}
2428EXPORT_SYMBOL_GPL(ata_bmdma_status);
2333 2429
2334/** 2430/**
2335 * ata_bus_reset - reset host port and associated ATA channel 2431 * ata_bus_reset - reset host port and associated ATA channel
@@ -2422,6 +2518,7 @@ err_out:
2422 2518
2423 DPRINTK("EXIT\n"); 2519 DPRINTK("EXIT\n");
2424} 2520}
2521EXPORT_SYMBOL_GPL(ata_bus_reset);
2425 2522
2426#ifdef CONFIG_PCI 2523#ifdef CONFIG_PCI
2427 2524
@@ -2449,6 +2546,7 @@ int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev)
2449 return -EOPNOTSUPP; 2546 return -EOPNOTSUPP;
2450 return 0; 2547 return 0;
2451} 2548}
2549EXPORT_SYMBOL_GPL(ata_pci_bmdma_clear_simplex);
2452 2550
2453/** 2551/**
2454 * ata_pci_bmdma_init - acquire PCI BMDMA resources and init ATA host 2552 * ata_pci_bmdma_init - acquire PCI BMDMA resources and init ATA host
@@ -2501,11 +2599,12 @@ int ata_pci_bmdma_init(struct ata_host *host)
2501 host->flags |= ATA_HOST_SIMPLEX; 2599 host->flags |= ATA_HOST_SIMPLEX;
2502 2600
2503 ata_port_desc(ap, "bmdma 0x%llx", 2601 ata_port_desc(ap, "bmdma 0x%llx",
2504 (unsigned long long)pci_resource_start(pdev, 4) + 8 * i); 2602 (unsigned long long)pci_resource_start(pdev, 4) + 8 * i);
2505 } 2603 }
2506 2604
2507 return 0; 2605 return 0;
2508} 2606}
2607EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
2509 2608
2510static int ata_resources_present(struct pci_dev *pdev, int port) 2609static int ata_resources_present(struct pci_dev *pdev, int port)
2511{ 2610{
@@ -2513,7 +2612,7 @@ static int ata_resources_present(struct pci_dev *pdev, int port)
2513 2612
2514 /* Check the PCI resources for this channel are enabled */ 2613 /* Check the PCI resources for this channel are enabled */
2515 port = port * 2; 2614 port = port * 2;
2516 for (i = 0; i < 2; i ++) { 2615 for (i = 0; i < 2; i++) {
2517 if (pci_resource_start(pdev, port + i) == 0 || 2616 if (pci_resource_start(pdev, port + i) == 0 ||
2518 pci_resource_len(pdev, port + i) == 0) 2617 pci_resource_len(pdev, port + i) == 0)
2519 return 0; 2618 return 0;
@@ -2598,6 +2697,7 @@ int ata_pci_sff_init_host(struct ata_host *host)
2598 2697
2599 return 0; 2698 return 0;
2600} 2699}
2700EXPORT_SYMBOL_GPL(ata_pci_sff_init_host);
2601 2701
2602/** 2702/**
2603 * ata_pci_sff_prepare_host - helper to prepare native PCI ATA host 2703 * ata_pci_sff_prepare_host - helper to prepare native PCI ATA host
@@ -2615,7 +2715,7 @@ int ata_pci_sff_init_host(struct ata_host *host)
2615 * 0 on success, -errno otherwise. 2715 * 0 on success, -errno otherwise.
2616 */ 2716 */
2617int ata_pci_sff_prepare_host(struct pci_dev *pdev, 2717int ata_pci_sff_prepare_host(struct pci_dev *pdev,
2618 const struct ata_port_info * const * ppi, 2718 const struct ata_port_info * const *ppi,
2619 struct ata_host **r_host) 2719 struct ata_host **r_host)
2620{ 2720{
2621 struct ata_host *host; 2721 struct ata_host *host;
@@ -2645,17 +2745,18 @@ int ata_pci_sff_prepare_host(struct pci_dev *pdev,
2645 *r_host = host; 2745 *r_host = host;
2646 return 0; 2746 return 0;
2647 2747
2648 err_bmdma: 2748err_bmdma:
2649 /* This is necessary because PCI and iomap resources are 2749 /* This is necessary because PCI and iomap resources are
2650 * merged and releasing the top group won't release the 2750 * merged and releasing the top group won't release the
2651 * acquired resources if some of those have been acquired 2751 * acquired resources if some of those have been acquired
2652 * before entering this function. 2752 * before entering this function.
2653 */ 2753 */
2654 pcim_iounmap_regions(pdev, 0xf); 2754 pcim_iounmap_regions(pdev, 0xf);
2655 err_out: 2755err_out:
2656 devres_release_group(&pdev->dev, NULL); 2756 devres_release_group(&pdev->dev, NULL);
2657 return rc; 2757 return rc;
2658} 2758}
2759EXPORT_SYMBOL_GPL(ata_pci_sff_prepare_host);
2659 2760
2660/** 2761/**
2661 * ata_pci_sff_activate_host - start SFF host, request IRQ and register it 2762 * ata_pci_sff_activate_host - start SFF host, request IRQ and register it
@@ -2741,7 +2842,7 @@ int ata_pci_sff_activate_host(struct ata_host *host,
2741 } 2842 }
2742 2843
2743 rc = ata_host_register(host, sht); 2844 rc = ata_host_register(host, sht);
2744 out: 2845out:
2745 if (rc == 0) 2846 if (rc == 0)
2746 devres_remove_group(dev, NULL); 2847 devres_remove_group(dev, NULL);
2747 else 2848 else
@@ -2749,6 +2850,7 @@ int ata_pci_sff_activate_host(struct ata_host *host,
2749 2850
2750 return rc; 2851 return rc;
2751} 2852}
2853EXPORT_SYMBOL_GPL(ata_pci_sff_activate_host);
2752 2854
2753/** 2855/**
2754 * ata_pci_sff_init_one - Initialize/register PCI IDE host controller 2856 * ata_pci_sff_init_one - Initialize/register PCI IDE host controller
@@ -2776,7 +2878,7 @@ int ata_pci_sff_activate_host(struct ata_host *host,
2776 * Zero on success, negative on errno-based value on error. 2878 * Zero on success, negative on errno-based value on error.
2777 */ 2879 */
2778int ata_pci_sff_init_one(struct pci_dev *pdev, 2880int ata_pci_sff_init_one(struct pci_dev *pdev,
2779 const struct ata_port_info * const * ppi, 2881 const struct ata_port_info * const *ppi,
2780 struct scsi_host_template *sht, void *host_priv) 2882 struct scsi_host_template *sht, void *host_priv)
2781{ 2883{
2782 struct device *dev = &pdev->dev; 2884 struct device *dev = &pdev->dev;
@@ -2815,7 +2917,7 @@ int ata_pci_sff_init_one(struct pci_dev *pdev,
2815 2917
2816 pci_set_master(pdev); 2918 pci_set_master(pdev);
2817 rc = ata_pci_sff_activate_host(host, ata_sff_interrupt, sht); 2919 rc = ata_pci_sff_activate_host(host, ata_sff_interrupt, sht);
2818 out: 2920out:
2819 if (rc == 0) 2921 if (rc == 0)
2820 devres_remove_group(&pdev->dev, NULL); 2922 devres_remove_group(&pdev->dev, NULL);
2821 else 2923 else
@@ -2823,54 +2925,7 @@ int ata_pci_sff_init_one(struct pci_dev *pdev,
2823 2925
2824 return rc; 2926 return rc;
2825} 2927}
2928EXPORT_SYMBOL_GPL(ata_pci_sff_init_one);
2826 2929
2827#endif /* CONFIG_PCI */ 2930#endif /* CONFIG_PCI */
2828 2931
2829EXPORT_SYMBOL_GPL(ata_sff_port_ops);
2830EXPORT_SYMBOL_GPL(ata_bmdma_port_ops);
2831EXPORT_SYMBOL_GPL(ata_sff_qc_prep);
2832EXPORT_SYMBOL_GPL(ata_sff_dumb_qc_prep);
2833EXPORT_SYMBOL_GPL(ata_sff_dev_select);
2834EXPORT_SYMBOL_GPL(ata_sff_check_status);
2835EXPORT_SYMBOL_GPL(ata_sff_dma_pause);
2836EXPORT_SYMBOL_GPL(ata_sff_pause);
2837EXPORT_SYMBOL_GPL(ata_sff_busy_sleep);
2838EXPORT_SYMBOL_GPL(ata_sff_wait_ready);
2839EXPORT_SYMBOL_GPL(ata_sff_tf_load);
2840EXPORT_SYMBOL_GPL(ata_sff_tf_read);
2841EXPORT_SYMBOL_GPL(ata_sff_exec_command);
2842EXPORT_SYMBOL_GPL(ata_sff_data_xfer);
2843EXPORT_SYMBOL_GPL(ata_sff_data_xfer_noirq);
2844EXPORT_SYMBOL_GPL(ata_sff_irq_on);
2845EXPORT_SYMBOL_GPL(ata_sff_irq_clear);
2846EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
2847EXPORT_SYMBOL_GPL(ata_sff_qc_issue);
2848EXPORT_SYMBOL_GPL(ata_sff_qc_fill_rtf);
2849EXPORT_SYMBOL_GPL(ata_sff_host_intr);
2850EXPORT_SYMBOL_GPL(ata_sff_interrupt);
2851EXPORT_SYMBOL_GPL(ata_sff_freeze);
2852EXPORT_SYMBOL_GPL(ata_sff_thaw);
2853EXPORT_SYMBOL_GPL(ata_sff_prereset);
2854EXPORT_SYMBOL_GPL(ata_sff_dev_classify);
2855EXPORT_SYMBOL_GPL(ata_sff_wait_after_reset);
2856EXPORT_SYMBOL_GPL(ata_sff_softreset);
2857EXPORT_SYMBOL_GPL(sata_sff_hardreset);
2858EXPORT_SYMBOL_GPL(ata_sff_postreset);
2859EXPORT_SYMBOL_GPL(ata_sff_error_handler);
2860EXPORT_SYMBOL_GPL(ata_sff_post_internal_cmd);
2861EXPORT_SYMBOL_GPL(ata_sff_port_start);
2862EXPORT_SYMBOL_GPL(ata_sff_std_ports);
2863EXPORT_SYMBOL_GPL(ata_bmdma_mode_filter);
2864EXPORT_SYMBOL_GPL(ata_bmdma_setup);
2865EXPORT_SYMBOL_GPL(ata_bmdma_start);
2866EXPORT_SYMBOL_GPL(ata_bmdma_stop);
2867EXPORT_SYMBOL_GPL(ata_bmdma_status);
2868EXPORT_SYMBOL_GPL(ata_bus_reset);
2869#ifdef CONFIG_PCI
2870EXPORT_SYMBOL_GPL(ata_pci_bmdma_clear_simplex);
2871EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
2872EXPORT_SYMBOL_GPL(ata_pci_sff_init_host);
2873EXPORT_SYMBOL_GPL(ata_pci_sff_prepare_host);
2874EXPORT_SYMBOL_GPL(ata_pci_sff_activate_host);
2875EXPORT_SYMBOL_GPL(ata_pci_sff_init_one);
2876#endif /* CONFIG_PCI */
diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index 73c466e452ca..a7999c19f0c9 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -19,7 +19,9 @@
19 * 19 *
20 * TODO/CHECK 20 * TODO/CHECK
21 * Cannot have ATAPI on both master & slave for rev < c2 (???) but 21 * Cannot have ATAPI on both master & slave for rev < c2 (???) but
22 * otherwise should do atapi DMA. 22 * otherwise should do atapi DMA (For now for old we do PIO only for
23 * ATAPI)
24 * Review Sunblade workaround.
23 */ 25 */
24 26
25#include <linux/kernel.h> 27#include <linux/kernel.h>
@@ -33,12 +35,14 @@
33#include <linux/dmi.h> 35#include <linux/dmi.h>
34 36
35#define DRV_NAME "pata_ali" 37#define DRV_NAME "pata_ali"
36#define DRV_VERSION "0.7.5" 38#define DRV_VERSION "0.7.8"
37 39
38static int ali_atapi_dma = 0; 40static int ali_atapi_dma = 0;
39module_param_named(atapi_dma, ali_atapi_dma, int, 0644); 41module_param_named(atapi_dma, ali_atapi_dma, int, 0644);
40MODULE_PARM_DESC(atapi_dma, "Enable ATAPI DMA (0=disable, 1=enable)"); 42MODULE_PARM_DESC(atapi_dma, "Enable ATAPI DMA (0=disable, 1=enable)");
41 43
44static struct pci_dev *isa_bridge;
45
42/* 46/*
43 * Cable special cases 47 * Cable special cases
44 */ 48 */
@@ -147,8 +151,7 @@ static void ali_fifo_control(struct ata_port *ap, struct ata_device *adev, int o
147 151
148 pci_read_config_byte(pdev, pio_fifo, &fifo); 152 pci_read_config_byte(pdev, pio_fifo, &fifo);
149 fifo &= ~(0x0F << shift); 153 fifo &= ~(0x0F << shift);
150 if (on) 154 fifo |= (on << shift);
151 fifo |= (on << shift);
152 pci_write_config_byte(pdev, pio_fifo, fifo); 155 pci_write_config_byte(pdev, pio_fifo, fifo);
153} 156}
154 157
@@ -337,6 +340,23 @@ static int ali_check_atapi_dma(struct ata_queued_cmd *qc)
337 return 0; 340 return 0;
338} 341}
339 342
343static void ali_c2_c3_postreset(struct ata_link *link, unsigned int *classes)
344{
345 u8 r;
346 int port_bit = 4 << link->ap->port_no;
347
348 /* If our bridge is an ALI 1533 then do the extra work */
349 if (isa_bridge) {
350 /* Tristate and re-enable the bus signals */
351 pci_read_config_byte(isa_bridge, 0x58, &r);
352 r &= ~port_bit;
353 pci_write_config_byte(isa_bridge, 0x58, r);
354 r |= port_bit;
355 pci_write_config_byte(isa_bridge, 0x58, r);
356 }
357 ata_sff_postreset(link, classes);
358}
359
340static struct scsi_host_template ali_sht = { 360static struct scsi_host_template ali_sht = {
341 ATA_BMDMA_SHT(DRV_NAME), 361 ATA_BMDMA_SHT(DRV_NAME),
342}; 362};
@@ -349,10 +369,11 @@ static struct ata_port_operations ali_early_port_ops = {
349 .inherits = &ata_sff_port_ops, 369 .inherits = &ata_sff_port_ops,
350 .cable_detect = ata_cable_40wire, 370 .cable_detect = ata_cable_40wire,
351 .set_piomode = ali_set_piomode, 371 .set_piomode = ali_set_piomode,
372 .sff_data_xfer = ata_sff_data_xfer32,
352}; 373};
353 374
354static const struct ata_port_operations ali_dma_base_ops = { 375static const struct ata_port_operations ali_dma_base_ops = {
355 .inherits = &ata_bmdma_port_ops, 376 .inherits = &ata_bmdma32_port_ops,
356 .set_piomode = ali_set_piomode, 377 .set_piomode = ali_set_piomode,
357 .set_dmamode = ali_set_dmamode, 378 .set_dmamode = ali_set_dmamode,
358}; 379};
@@ -377,6 +398,17 @@ static struct ata_port_operations ali_c2_port_ops = {
377 .check_atapi_dma = ali_check_atapi_dma, 398 .check_atapi_dma = ali_check_atapi_dma,
378 .cable_detect = ali_c2_cable_detect, 399 .cable_detect = ali_c2_cable_detect,
379 .dev_config = ali_lock_sectors, 400 .dev_config = ali_lock_sectors,
401 .postreset = ali_c2_c3_postreset,
402};
403
404/*
405 * Port operations for DMA capable ALi with cable detect
406 */
407static struct ata_port_operations ali_c4_port_ops = {
408 .inherits = &ali_dma_base_ops,
409 .check_atapi_dma = ali_check_atapi_dma,
410 .cable_detect = ali_c2_cable_detect,
411 .dev_config = ali_lock_sectors,
380}; 412};
381 413
382/* 414/*
@@ -401,52 +433,49 @@ static struct ata_port_operations ali_c5_port_ops = {
401static void ali_init_chipset(struct pci_dev *pdev) 433static void ali_init_chipset(struct pci_dev *pdev)
402{ 434{
403 u8 tmp; 435 u8 tmp;
404 struct pci_dev *north, *isa_bridge; 436 struct pci_dev *north;
405 437
406 /* 438 /*
407 * The chipset revision selects the driver operations and 439 * The chipset revision selects the driver operations and
408 * mode data. 440 * mode data.
409 */ 441 */
410 442
411 if (pdev->revision >= 0x20 && pdev->revision < 0xC2) { 443 if (pdev->revision <= 0x20) {
412 /* 1543-E/F, 1543C-C, 1543C-D, 1543C-E */ 444 pci_read_config_byte(pdev, 0x53, &tmp);
413 pci_read_config_byte(pdev, 0x4B, &tmp); 445 tmp |= 0x03;
414 /* Clear CD-ROM DMA write bit */ 446 pci_write_config_byte(pdev, 0x53, tmp);
415 tmp &= 0x7F; 447 } else {
416 pci_write_config_byte(pdev, 0x4B, tmp); 448 pci_read_config_byte(pdev, 0x4a, &tmp);
417 } else if (pdev->revision >= 0xC2) { 449 pci_write_config_byte(pdev, 0x4a, tmp | 0x20);
418 /* Enable cable detection logic */
419 pci_read_config_byte(pdev, 0x4B, &tmp); 450 pci_read_config_byte(pdev, 0x4B, &tmp);
420 pci_write_config_byte(pdev, 0x4B, tmp | 0x08); 451 if (pdev->revision < 0xC2)
421 } 452 /* 1543-E/F, 1543C-C, 1543C-D, 1543C-E */
422 north = pci_get_bus_and_slot(0, PCI_DEVFN(0,0)); 453 /* Clear CD-ROM DMA write bit */
423 isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); 454 tmp &= 0x7F;
424 455 /* Cable and UDMA */
425 if (north && north->vendor == PCI_VENDOR_ID_AL && isa_bridge) { 456 pci_write_config_byte(pdev, 0x4B, tmp | 0x09);
426 /* Configure the ALi bridge logic. For non ALi rely on BIOS.
427 Set the south bridge enable bit */
428 pci_read_config_byte(isa_bridge, 0x79, &tmp);
429 if (pdev->revision == 0xC2)
430 pci_write_config_byte(isa_bridge, 0x79, tmp | 0x04);
431 else if (pdev->revision > 0xC2 && pdev->revision < 0xC5)
432 pci_write_config_byte(isa_bridge, 0x79, tmp | 0x02);
433 }
434 if (pdev->revision >= 0x20) {
435 /* 457 /*
436 * CD_ROM DMA on (0x53 bit 0). Enable this even if we want 458 * CD_ROM DMA on (0x53 bit 0). Enable this even if we want
437 * to use PIO. 0x53 bit 1 (rev 20 only) - enable FIFO control 459 * to use PIO. 0x53 bit 1 (rev 20 only) - enable FIFO control
438 * via 0x54/55. 460 * via 0x54/55.
439 */ 461 */
440 pci_read_config_byte(pdev, 0x53, &tmp); 462 pci_read_config_byte(pdev, 0x53, &tmp);
441 if (pdev->revision <= 0x20)
442 tmp &= ~0x02;
443 if (pdev->revision >= 0xc7) 463 if (pdev->revision >= 0xc7)
444 tmp |= 0x03; 464 tmp |= 0x03;
445 else 465 else
446 tmp |= 0x01; /* CD_ROM enable for DMA */ 466 tmp |= 0x01; /* CD_ROM enable for DMA */
447 pci_write_config_byte(pdev, 0x53, tmp); 467 pci_write_config_byte(pdev, 0x53, tmp);
448 } 468 }
449 pci_dev_put(isa_bridge); 469 north = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
470 if (north && north->vendor == PCI_VENDOR_ID_AL && isa_bridge) {
471 /* Configure the ALi bridge logic. For non ALi rely on BIOS.
472 Set the south bridge enable bit */
473 pci_read_config_byte(isa_bridge, 0x79, &tmp);
474 if (pdev->revision == 0xC2)
475 pci_write_config_byte(isa_bridge, 0x79, tmp | 0x04);
476 else if (pdev->revision > 0xC2 && pdev->revision < 0xC5)
477 pci_write_config_byte(isa_bridge, 0x79, tmp | 0x02);
478 }
450 pci_dev_put(north); 479 pci_dev_put(north);
451 ata_pci_bmdma_clear_simplex(pdev); 480 ata_pci_bmdma_clear_simplex(pdev);
452} 481}
@@ -503,7 +532,7 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
503 .pio_mask = 0x1f, 532 .pio_mask = 0x1f,
504 .mwdma_mask = 0x07, 533 .mwdma_mask = 0x07,
505 .udma_mask = ATA_UDMA5, 534 .udma_mask = ATA_UDMA5,
506 .port_ops = &ali_c2_port_ops 535 .port_ops = &ali_c4_port_ops
507 }; 536 };
508 /* Revision 0xC5 is UDMA133 with LBA48 DMA */ 537 /* Revision 0xC5 is UDMA133 with LBA48 DMA */
509 static const struct ata_port_info info_c5 = { 538 static const struct ata_port_info info_c5 = {
@@ -516,7 +545,6 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
516 545
517 const struct ata_port_info *ppi[] = { NULL, NULL }; 546 const struct ata_port_info *ppi[] = { NULL, NULL };
518 u8 tmp; 547 u8 tmp;
519 struct pci_dev *isa_bridge;
520 int rc; 548 int rc;
521 549
522 rc = pcim_enable_device(pdev); 550 rc = pcim_enable_device(pdev);
@@ -543,14 +571,12 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
543 571
544 ali_init_chipset(pdev); 572 ali_init_chipset(pdev);
545 573
546 isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
547 if (isa_bridge && pdev->revision >= 0x20 && pdev->revision < 0xC2) { 574 if (isa_bridge && pdev->revision >= 0x20 && pdev->revision < 0xC2) {
548 /* Are we paired with a UDMA capable chip */ 575 /* Are we paired with a UDMA capable chip */
549 pci_read_config_byte(isa_bridge, 0x5E, &tmp); 576 pci_read_config_byte(isa_bridge, 0x5E, &tmp);
550 if ((tmp & 0x1E) == 0x12) 577 if ((tmp & 0x1E) == 0x12)
551 ppi[0] = &info_20_udma; 578 ppi[0] = &info_20_udma;
552 } 579 }
553 pci_dev_put(isa_bridge);
554 580
555 return ata_pci_sff_init_one(pdev, ppi, &ali_sht, NULL); 581 return ata_pci_sff_init_one(pdev, ppi, &ali_sht, NULL);
556} 582}
@@ -590,13 +616,20 @@ static struct pci_driver ali_pci_driver = {
590 616
591static int __init ali_init(void) 617static int __init ali_init(void)
592{ 618{
593 return pci_register_driver(&ali_pci_driver); 619 int ret;
620 isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
621
622 ret = pci_register_driver(&ali_pci_driver);
623 if (ret < 0)
624 pci_dev_put(isa_bridge);
625 return ret;
594} 626}
595 627
596 628
597static void __exit ali_exit(void) 629static void __exit ali_exit(void)
598{ 630{
599 pci_unregister_driver(&ali_pci_driver); 631 pci_unregister_driver(&ali_pci_driver);
632 pci_dev_put(isa_bridge);
600} 633}
601 634
602 635
diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c
index 0ec9c7d9fe9d..63719ab9ea44 100644
--- a/drivers/ata/pata_amd.c
+++ b/drivers/ata/pata_amd.c
@@ -24,7 +24,7 @@
24#include <linux/libata.h> 24#include <linux/libata.h>
25 25
26#define DRV_NAME "pata_amd" 26#define DRV_NAME "pata_amd"
27#define DRV_VERSION "0.3.10" 27#define DRV_VERSION "0.3.11"
28 28
29/** 29/**
30 * timing_setup - shared timing computation and load 30 * timing_setup - shared timing computation and load
@@ -345,7 +345,7 @@ static struct scsi_host_template amd_sht = {
345}; 345};
346 346
347static const struct ata_port_operations amd_base_port_ops = { 347static const struct ata_port_operations amd_base_port_ops = {
348 .inherits = &ata_bmdma_port_ops, 348 .inherits = &ata_bmdma32_port_ops,
349 .prereset = amd_pre_reset, 349 .prereset = amd_pre_reset,
350}; 350};
351 351
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index e0c4f05d7d57..65c28e5a6cd7 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -30,7 +30,7 @@
30#define DRV_VERSION "0.6.2" 30#define DRV_VERSION "0.6.2"
31 31
32struct hpt_clock { 32struct hpt_clock {
33 u8 xfer_speed; 33 u8 xfer_mode;
34 u32 timing; 34 u32 timing;
35}; 35};
36 36
@@ -189,28 +189,6 @@ static unsigned long hpt366_filter(struct ata_device *adev, unsigned long mask)
189 return ata_bmdma_mode_filter(adev, mask); 189 return ata_bmdma_mode_filter(adev, mask);
190} 190}
191 191
192/**
193 * hpt36x_find_mode - reset the hpt36x bus
194 * @ap: ATA port
195 * @speed: transfer mode
196 *
197 * Return the 32bit register programming information for this channel
198 * that matches the speed provided.
199 */
200
201static u32 hpt36x_find_mode(struct ata_port *ap, int speed)
202{
203 struct hpt_clock *clocks = ap->host->private_data;
204
205 while(clocks->xfer_speed) {
206 if (clocks->xfer_speed == speed)
207 return clocks->timing;
208 clocks++;
209 }
210 BUG();
211 return 0xffffffffU; /* silence compiler warning */
212}
213
214static int hpt36x_cable_detect(struct ata_port *ap) 192static int hpt36x_cable_detect(struct ata_port *ap)
215{ 193{
216 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 194 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
@@ -226,25 +204,16 @@ static int hpt36x_cable_detect(struct ata_port *ap)
226 return ATA_CBL_PATA80; 204 return ATA_CBL_PATA80;
227} 205}
228 206
229/** 207static void hpt366_set_mode(struct ata_port *ap, struct ata_device *adev,
230 * hpt366_set_piomode - PIO setup 208 u8 mode)
231 * @ap: ATA interface
232 * @adev: device on the interface
233 *
234 * Perform PIO mode setup.
235 */
236
237static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev)
238{ 209{
210 struct hpt_clock *clocks = ap->host->private_data;
239 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 211 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
240 u32 addr1, addr2; 212 u32 addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
241 u32 reg; 213 u32 addr2 = 0x51 + 4 * ap->port_no;
242 u32 mode; 214 u32 mask, reg;
243 u8 fast; 215 u8 fast;
244 216
245 addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
246 addr2 = 0x51 + 4 * ap->port_no;
247
248 /* Fast interrupt prediction disable, hold off interrupt disable */ 217 /* Fast interrupt prediction disable, hold off interrupt disable */
249 pci_read_config_byte(pdev, addr2, &fast); 218 pci_read_config_byte(pdev, addr2, &fast);
250 if (fast & 0x80) { 219 if (fast & 0x80) {
@@ -252,12 +221,43 @@ static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev)
252 pci_write_config_byte(pdev, addr2, fast); 221 pci_write_config_byte(pdev, addr2, fast);
253 } 222 }
254 223
224 /* determine timing mask and find matching clock entry */
225 if (mode < XFER_MW_DMA_0)
226 mask = 0xc1f8ffff;
227 else if (mode < XFER_UDMA_0)
228 mask = 0x303800ff;
229 else
230 mask = 0x30070000;
231
232 while (clocks->xfer_mode) {
233 if (clocks->xfer_mode == mode)
234 break;
235 clocks++;
236 }
237 if (!clocks->xfer_mode)
238 BUG();
239
240 /*
241 * Combine new mode bits with old config bits and disable
242 * on-chip PIO FIFO/buffer (and PIO MST mode as well) to avoid
243 * problems handling I/O errors later.
244 */
255 pci_read_config_dword(pdev, addr1, &reg); 245 pci_read_config_dword(pdev, addr1, &reg);
256 mode = hpt36x_find_mode(ap, adev->pio_mode); 246 reg = ((reg & ~mask) | (clocks->timing & mask)) & ~0xc0000000;
257 mode &= ~0x8000000; /* No FIFO in PIO */ 247 pci_write_config_dword(pdev, addr1, reg);
258 mode &= ~0x30070000; /* Leave config bits alone */ 248}
259 reg &= 0x30070000; /* Strip timing bits */ 249
260 pci_write_config_dword(pdev, addr1, reg | mode); 250/**
251 * hpt366_set_piomode - PIO setup
252 * @ap: ATA interface
253 * @adev: device on the interface
254 *
255 * Perform PIO mode setup.
256 */
257
258static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev)
259{
260 hpt366_set_mode(ap, adev, adev->pio_mode);
261} 261}
262 262
263/** 263/**
@@ -271,28 +271,7 @@ static void hpt366_set_piomode(struct ata_port *ap, struct ata_device *adev)
271 271
272static void hpt366_set_dmamode(struct ata_port *ap, struct ata_device *adev) 272static void hpt366_set_dmamode(struct ata_port *ap, struct ata_device *adev)
273{ 273{
274 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 274 hpt366_set_mode(ap, adev, adev->dma_mode);
275 u32 addr1, addr2;
276 u32 reg;
277 u32 mode;
278 u8 fast;
279
280 addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
281 addr2 = 0x51 + 4 * ap->port_no;
282
283 /* Fast interrupt prediction disable, hold off interrupt disable */
284 pci_read_config_byte(pdev, addr2, &fast);
285 if (fast & 0x80) {
286 fast &= ~0x80;
287 pci_write_config_byte(pdev, addr2, fast);
288 }
289
290 pci_read_config_dword(pdev, addr1, &reg);
291 mode = hpt36x_find_mode(ap, adev->dma_mode);
292 mode |= 0x8000000; /* FIFO in MWDMA or UDMA */
293 mode &= ~0xC0000000; /* Leave config bits alone */
294 reg &= 0xC0000000; /* Strip timing bits */
295 pci_write_config_dword(pdev, addr1, reg | mode);
296} 275}
297 276
298static struct scsi_host_template hpt36x_sht = { 277static struct scsi_host_template hpt36x_sht = {
diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c
index f11a320337c0..f19cc645881a 100644
--- a/drivers/ata/pata_hpt3x3.c
+++ b/drivers/ata/pata_hpt3x3.c
@@ -23,7 +23,7 @@
23#include <linux/libata.h> 23#include <linux/libata.h>
24 24
25#define DRV_NAME "pata_hpt3x3" 25#define DRV_NAME "pata_hpt3x3"
26#define DRV_VERSION "0.5.3" 26#define DRV_VERSION "0.6.1"
27 27
28/** 28/**
29 * hpt3x3_set_piomode - PIO setup 29 * hpt3x3_set_piomode - PIO setup
@@ -80,14 +80,48 @@ static void hpt3x3_set_dmamode(struct ata_port *ap, struct ata_device *adev)
80 r2 &= ~(0x11 << dn); /* Clear MWDMA and UDMA bits */ 80 r2 &= ~(0x11 << dn); /* Clear MWDMA and UDMA bits */
81 81
82 if (adev->dma_mode >= XFER_UDMA_0) 82 if (adev->dma_mode >= XFER_UDMA_0)
83 r2 |= (0x10 << dn); /* Ultra mode */ 83 r2 |= (0x01 << dn); /* Ultra mode */
84 else 84 else
85 r2 |= (0x01 << dn); /* MWDMA */ 85 r2 |= (0x10 << dn); /* MWDMA */
86 86
87 pci_write_config_dword(pdev, 0x44, r1); 87 pci_write_config_dword(pdev, 0x44, r1);
88 pci_write_config_dword(pdev, 0x48, r2); 88 pci_write_config_dword(pdev, 0x48, r2);
89} 89}
90#endif /* CONFIG_PATA_HPT3X3_DMA */ 90
91/**
92 * hpt3x3_freeze - DMA workaround
93 * @ap: port to freeze
94 *
95 * When freezing an HPT3x3 we must stop any pending DMA before
96 * writing to the control register or the chip will hang
97 */
98
99static void hpt3x3_freeze(struct ata_port *ap)
100{
101 void __iomem *mmio = ap->ioaddr.bmdma_addr;
102
103 iowrite8(ioread8(mmio + ATA_DMA_CMD) & ~ ATA_DMA_START,
104 mmio + ATA_DMA_CMD);
105 ata_sff_dma_pause(ap);
106 ata_sff_freeze(ap);
107}
108
109/**
110 * hpt3x3_bmdma_setup - DMA workaround
111 * @qc: Queued command
112 *
113 * When issuing BMDMA we must clean up the error/active bits in
114 * software on this device
115 */
116
117static void hpt3x3_bmdma_setup(struct ata_queued_cmd *qc)
118{
119 struct ata_port *ap = qc->ap;
120 u8 r = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
121 r |= ATA_DMA_INTR | ATA_DMA_ERR;
122 iowrite8(r, ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
123 return ata_bmdma_setup(qc);
124}
91 125
92/** 126/**
93 * hpt3x3_atapi_dma - ATAPI DMA check 127 * hpt3x3_atapi_dma - ATAPI DMA check
@@ -101,18 +135,23 @@ static int hpt3x3_atapi_dma(struct ata_queued_cmd *qc)
101 return 1; 135 return 1;
102} 136}
103 137
138#endif /* CONFIG_PATA_HPT3X3_DMA */
139
104static struct scsi_host_template hpt3x3_sht = { 140static struct scsi_host_template hpt3x3_sht = {
105 ATA_BMDMA_SHT(DRV_NAME), 141 ATA_BMDMA_SHT(DRV_NAME),
106}; 142};
107 143
108static struct ata_port_operations hpt3x3_port_ops = { 144static struct ata_port_operations hpt3x3_port_ops = {
109 .inherits = &ata_bmdma_port_ops, 145 .inherits = &ata_bmdma_port_ops,
110 .check_atapi_dma= hpt3x3_atapi_dma,
111 .cable_detect = ata_cable_40wire, 146 .cable_detect = ata_cable_40wire,
112 .set_piomode = hpt3x3_set_piomode, 147 .set_piomode = hpt3x3_set_piomode,
113#if defined(CONFIG_PATA_HPT3X3_DMA) 148#if defined(CONFIG_PATA_HPT3X3_DMA)
114 .set_dmamode = hpt3x3_set_dmamode, 149 .set_dmamode = hpt3x3_set_dmamode,
150 .bmdma_setup = hpt3x3_bmdma_setup,
151 .check_atapi_dma= hpt3x3_atapi_dma,
152 .freeze = hpt3x3_freeze,
115#endif 153#endif
154
116}; 155};
117 156
118/** 157/**
diff --git a/drivers/ata/pata_mpiix.c b/drivers/ata/pata_mpiix.c
index 7c8faa48b5f3..aa576cac4d17 100644
--- a/drivers/ata/pata_mpiix.c
+++ b/drivers/ata/pata_mpiix.c
@@ -35,7 +35,7 @@
35#include <linux/libata.h> 35#include <linux/libata.h>
36 36
37#define DRV_NAME "pata_mpiix" 37#define DRV_NAME "pata_mpiix"
38#define DRV_VERSION "0.7.6" 38#define DRV_VERSION "0.7.7"
39 39
40enum { 40enum {
41 IDETIM = 0x6C, /* IDE control register */ 41 IDETIM = 0x6C, /* IDE control register */
@@ -146,6 +146,7 @@ static struct ata_port_operations mpiix_port_ops = {
146 .cable_detect = ata_cable_40wire, 146 .cable_detect = ata_cable_40wire,
147 .set_piomode = mpiix_set_piomode, 147 .set_piomode = mpiix_set_piomode,
148 .prereset = mpiix_pre_reset, 148 .prereset = mpiix_pre_reset,
149 .sff_data_xfer = ata_sff_data_xfer32,
149}; 150};
150 151
151static int mpiix_init_one(struct pci_dev *dev, const struct pci_device_id *id) 152static int mpiix_init_one(struct pci_dev *dev, const struct pci_device_id *id)
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 6afa07a37648..d8d743af3225 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -186,7 +186,7 @@ EXPORT_SYMBOL_GPL(__pata_platform_probe);
186 * A platform bus ATA device has been unplugged. Perform the needed 186 * A platform bus ATA device has been unplugged. Perform the needed
187 * cleanup. Also called on module unload for any active devices. 187 * cleanup. Also called on module unload for any active devices.
188 */ 188 */
189int __devexit __pata_platform_remove(struct device *dev) 189int __pata_platform_remove(struct device *dev)
190{ 190{
191 struct ata_host *host = dev_get_drvdata(dev); 191 struct ata_host *host = dev_get_drvdata(dev);
192 192
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index 83580a59db58..9e764e5747e6 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -32,7 +32,7 @@
32#include <linux/libata.h> 32#include <linux/libata.h>
33 33
34#define DRV_NAME "pata_sil680" 34#define DRV_NAME "pata_sil680"
35#define DRV_VERSION "0.4.8" 35#define DRV_VERSION "0.4.9"
36 36
37#define SIL680_MMIO_BAR 5 37#define SIL680_MMIO_BAR 5
38 38
@@ -195,7 +195,7 @@ static struct scsi_host_template sil680_sht = {
195}; 195};
196 196
197static struct ata_port_operations sil680_port_ops = { 197static struct ata_port_operations sil680_port_ops = {
198 .inherits = &ata_bmdma_port_ops, 198 .inherits = &ata_bmdma32_port_ops,
199 .cable_detect = sil680_cable_detect, 199 .cable_detect = sil680_cable_detect,
200 .set_piomode = sil680_set_piomode, 200 .set_piomode = sil680_set_piomode,
201 .set_dmamode = sil680_set_dmamode, 201 .set_dmamode = sil680_set_dmamode,
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index ccee930f1e12..2590c2279fa7 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -51,13 +51,6 @@ struct sil24_sge {
51 __le32 flags; 51 __le32 flags;
52}; 52};
53 53
54/*
55 * Port multiplier
56 */
57struct sil24_port_multiplier {
58 __le32 diag;
59 __le32 sactive;
60};
61 54
62enum { 55enum {
63 SIL24_HOST_BAR = 0, 56 SIL24_HOST_BAR = 0,
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 088885ed51b9..e1c7611e9144 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -64,7 +64,7 @@
64#include <linux/jiffies.h> 64#include <linux/jiffies.h>
65#include "iphase.h" 65#include "iphase.h"
66#include "suni.h" 66#include "suni.h"
67#define swap(x) (((x & 0xff) << 8) | ((x & 0xff00) >> 8)) 67#define swap_byte_order(x) (((x & 0xff) << 8) | ((x & 0xff00) >> 8))
68 68
69#define PRIV(dev) ((struct suni_priv *) dev->phy_data) 69#define PRIV(dev) ((struct suni_priv *) dev->phy_data)
70 70
@@ -1306,7 +1306,7 @@ static void rx_dle_intr(struct atm_dev *dev)
1306 // get real pkt length pwang_test 1306 // get real pkt length pwang_test
1307 trailer = (struct cpcs_trailer*)((u_char *)skb->data + 1307 trailer = (struct cpcs_trailer*)((u_char *)skb->data +
1308 skb->len - sizeof(*trailer)); 1308 skb->len - sizeof(*trailer));
1309 length = swap(trailer->length); 1309 length = swap_byte_order(trailer->length);
1310 if ((length > iadev->rx_buf_sz) || (length > 1310 if ((length > iadev->rx_buf_sz) || (length >
1311 (skb->len - sizeof(struct cpcs_trailer)))) 1311 (skb->len - sizeof(struct cpcs_trailer))))
1312 { 1312 {
@@ -2995,7 +2995,7 @@ static int ia_pkt_tx (struct atm_vcc *vcc, struct sk_buff *skb) {
2995 skb->len, PCI_DMA_TODEVICE); 2995 skb->len, PCI_DMA_TODEVICE);
2996 wr_ptr->local_pkt_addr = (buf_desc_ptr->buf_start_hi << 16) | 2996 wr_ptr->local_pkt_addr = (buf_desc_ptr->buf_start_hi << 16) |
2997 buf_desc_ptr->buf_start_lo; 2997 buf_desc_ptr->buf_start_lo;
2998 /* wr_ptr->bytes = swap(total_len); didn't seem to affect ?? */ 2998 /* wr_ptr->bytes = swap_byte_order(total_len); didn't seem to affect?? */
2999 wr_ptr->bytes = skb->len; 2999 wr_ptr->bytes = skb->len;
3000 3000
3001 /* hw bug - DLEs of 0x2d, 0x2e, 0x2f cause DMA lockup */ 3001 /* hw bug - DLEs of 0x2d, 0x2e, 0x2f cause DMA lockup */
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 35914b6e1d2a..f5be8081cd81 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -616,6 +616,7 @@ config HVC_ISERIES
616 default y 616 default y
617 select HVC_DRIVER 617 select HVC_DRIVER
618 select HVC_IRQ 618 select HVC_IRQ
619 select VIOPATH
619 help 620 help
620 iSeries machines support a hypervisor virtual console. 621 iSeries machines support a hypervisor virtual console.
621 622
diff --git a/drivers/char/hvc_beat.c b/drivers/char/hvc_beat.c
index 91cdb35a9204..0afc8b82212e 100644
--- a/drivers/char/hvc_beat.c
+++ b/drivers/char/hvc_beat.c
@@ -44,7 +44,7 @@ static int hvc_beat_get_chars(uint32_t vtermno, char *buf, int cnt)
44 static unsigned char q[sizeof(unsigned long) * 2] 44 static unsigned char q[sizeof(unsigned long) * 2]
45 __attribute__((aligned(sizeof(unsigned long)))); 45 __attribute__((aligned(sizeof(unsigned long))));
46 static int qlen = 0; 46 static int qlen = 0;
47 unsigned long got; 47 u64 got;
48 48
49again: 49again:
50 if (qlen) { 50 if (qlen) {
@@ -63,7 +63,7 @@ again:
63 } 63 }
64 } 64 }
65 if (beat_get_term_char(vtermno, &got, 65 if (beat_get_term_char(vtermno, &got,
66 ((unsigned long *)q), ((unsigned long *)q) + 1) == 0) { 66 ((u64 *)q), ((u64 *)q) + 1) == 0) {
67 qlen = got; 67 qlen = got;
68 goto again; 68 goto again;
69 } 69 }
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 112a6ba9a96f..146c97613da0 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -32,7 +32,7 @@
32 32
33/* These are global because they are accessed in tty_io.c */ 33/* These are global because they are accessed in tty_io.c */
34#ifdef CONFIG_UNIX98_PTYS 34#ifdef CONFIG_UNIX98_PTYS
35struct tty_driver *ptm_driver; 35static struct tty_driver *ptm_driver;
36static struct tty_driver *pts_driver; 36static struct tty_driver *pts_driver;
37#endif 37#endif
38 38
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index ab18c1e7b115..70efba2ee053 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -273,12 +273,23 @@ static void tpm_nsc_remove(struct device *dev)
273 } 273 }
274} 274}
275 275
276static struct device_driver nsc_drv = { 276static int tpm_nsc_suspend(struct platform_device *dev, pm_message_t msg)
277 .name = "tpm_nsc", 277{
278 .bus = &platform_bus_type, 278 return tpm_pm_suspend(&dev->dev, msg);
279 .owner = THIS_MODULE, 279}
280 .suspend = tpm_pm_suspend, 280
281 .resume = tpm_pm_resume, 281static int tpm_nsc_resume(struct platform_device *dev)
282{
283 return tpm_pm_resume(&dev->dev);
284}
285
286static struct platform_driver nsc_drv = {
287 .suspend = tpm_nsc_suspend,
288 .resume = tpm_nsc_resume,
289 .driver = {
290 .name = "tpm_nsc",
291 .owner = THIS_MODULE,
292 },
282}; 293};
283 294
284static int __init init_nsc(void) 295static int __init init_nsc(void)
@@ -297,7 +308,7 @@ static int __init init_nsc(void)
297 return -ENODEV; 308 return -ENODEV;
298 } 309 }
299 310
300 err = driver_register(&nsc_drv); 311 err = platform_driver_register(&nsc_drv);
301 if (err) 312 if (err)
302 return err; 313 return err;
303 314
@@ -308,17 +319,15 @@ static int __init init_nsc(void)
308 /* enable the DPM module */ 319 /* enable the DPM module */
309 tpm_write_index(nscAddrBase, NSC_LDC_INDEX, 0x01); 320 tpm_write_index(nscAddrBase, NSC_LDC_INDEX, 0x01);
310 321
311 pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); 322 pdev = platform_device_alloc("tpm_nscl0", -1);
312 if (!pdev) { 323 if (!pdev) {
313 rc = -ENOMEM; 324 rc = -ENOMEM;
314 goto err_unreg_drv; 325 goto err_unreg_drv;
315 } 326 }
316 327
317 pdev->name = "tpm_nscl0";
318 pdev->id = -1;
319 pdev->num_resources = 0; 328 pdev->num_resources = 0;
329 pdev->dev.driver = &nsc_drv.driver;
320 pdev->dev.release = tpm_nsc_remove; 330 pdev->dev.release = tpm_nsc_remove;
321 pdev->dev.driver = &nsc_drv;
322 331
323 if ((rc = platform_device_register(pdev)) < 0) 332 if ((rc = platform_device_register(pdev)) < 0)
324 goto err_free_dev; 333 goto err_free_dev;
@@ -377,7 +386,7 @@ err_unreg_dev:
377err_free_dev: 386err_free_dev:
378 kfree(pdev); 387 kfree(pdev);
379err_unreg_drv: 388err_unreg_drv:
380 driver_unregister(&nsc_drv); 389 platform_driver_unregister(&nsc_drv);
381 return rc; 390 return rc;
382} 391}
383 392
@@ -390,7 +399,7 @@ static void __exit cleanup_nsc(void)
390 pdev = NULL; 399 pdev = NULL;
391 } 400 }
392 401
393 driver_unregister(&nsc_drv); 402 platform_driver_unregister(&nsc_drv);
394} 403}
395 404
396module_init(init_nsc); 405module_init(init_nsc);
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 80014213fb53..7900bd63b36d 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -969,8 +969,7 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
969 * Takes the console sem and the called methods then take the tty 969 * Takes the console sem and the called methods then take the tty
970 * termios_mutex and the tty ctrl_lock in that order. 970 * termios_mutex and the tty ctrl_lock in that order.
971 */ 971 */
972 972static int vt_resize(struct tty_struct *tty, struct winsize *ws)
973int vt_resize(struct tty_struct *tty, struct winsize *ws)
974{ 973{
975 struct vc_data *vc = tty->driver_data; 974 struct vc_data *vc = tty->driver_data;
976 int ret; 975 int ret;
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 50a071f1c945..777fba48d2d3 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -238,11 +238,11 @@ static ssize_t host_control_on_shutdown_store(struct device *dev,
238} 238}
239 239
240/** 240/**
241 * smi_request: generate SMI request 241 * dcdbas_smi_request: generate SMI request
242 * 242 *
243 * Called with smi_data_lock. 243 * Called with smi_data_lock.
244 */ 244 */
245static int smi_request(struct smi_cmd *smi_cmd) 245int dcdbas_smi_request(struct smi_cmd *smi_cmd)
246{ 246{
247 cpumask_t old_mask; 247 cpumask_t old_mask;
248 int ret = 0; 248 int ret = 0;
@@ -309,14 +309,14 @@ static ssize_t smi_request_store(struct device *dev,
309 switch (val) { 309 switch (val) {
310 case 2: 310 case 2:
311 /* Raw SMI */ 311 /* Raw SMI */
312 ret = smi_request(smi_cmd); 312 ret = dcdbas_smi_request(smi_cmd);
313 if (!ret) 313 if (!ret)
314 ret = count; 314 ret = count;
315 break; 315 break;
316 case 1: 316 case 1:
317 /* Calling Interface SMI */ 317 /* Calling Interface SMI */
318 smi_cmd->ebx = (u32) virt_to_phys(smi_cmd->command_buffer); 318 smi_cmd->ebx = (u32) virt_to_phys(smi_cmd->command_buffer);
319 ret = smi_request(smi_cmd); 319 ret = dcdbas_smi_request(smi_cmd);
320 if (!ret) 320 if (!ret)
321 ret = count; 321 ret = count;
322 break; 322 break;
@@ -333,6 +333,7 @@ out:
333 mutex_unlock(&smi_data_lock); 333 mutex_unlock(&smi_data_lock);
334 return ret; 334 return ret;
335} 335}
336EXPORT_SYMBOL(dcdbas_smi_request);
336 337
337/** 338/**
338 * host_control_smi: generate host control SMI 339 * host_control_smi: generate host control SMI
diff --git a/drivers/firmware/dcdbas.h b/drivers/firmware/dcdbas.h
index 87bc3417de27..ca3cb0a54ab6 100644
--- a/drivers/firmware/dcdbas.h
+++ b/drivers/firmware/dcdbas.h
@@ -101,5 +101,7 @@ struct apm_cmd {
101 } __attribute__ ((packed)) parameters; 101 } __attribute__ ((packed)) parameters;
102} __attribute__ ((packed)); 102} __attribute__ ((packed));
103 103
104int dcdbas_smi_request(struct smi_cmd *smi_cmd);
105
104#endif /* _DCDBAS_H_ */ 106#endif /* _DCDBAS_H_ */
105 107
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 3bf8ee120d42..261b9aa3f248 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -56,9 +56,9 @@ struct memmap_attribute {
56 ssize_t (*show)(struct firmware_map_entry *entry, char *buf); 56 ssize_t (*show)(struct firmware_map_entry *entry, char *buf);
57}; 57};
58 58
59struct memmap_attribute memmap_start_attr = __ATTR_RO(start); 59static struct memmap_attribute memmap_start_attr = __ATTR_RO(start);
60struct memmap_attribute memmap_end_attr = __ATTR_RO(end); 60static struct memmap_attribute memmap_end_attr = __ATTR_RO(end);
61struct memmap_attribute memmap_type_attr = __ATTR_RO(type); 61static struct memmap_attribute memmap_type_attr = __ATTR_RO(type);
62 62
63/* 63/*
64 * These are default attributes that are added for every memmap entry. 64 * These are default attributes that are added for every memmap entry.
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index a812db243477..6ba57e91d7ab 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2705,7 +2705,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2705 sizeof(struct ietf_mpa_frame)); 2705 sizeof(struct ietf_mpa_frame));
2706 2706
2707 2707
2708 /* notify OF layer that accept event was successfull */ 2708 /* notify OF layer that accept event was successful */
2709 cm_id->add_ref(cm_id); 2709 cm_id->add_ref(cm_id);
2710 2710
2711 cm_event.event = IW_CM_EVENT_ESTABLISHED; 2711 cm_event.event = IW_CM_EVENT_ESTABLISHED;
diff --git a/drivers/input/mouse/pxa930_trkball.c b/drivers/input/mouse/pxa930_trkball.c
index a0f45c4fc198..d297accf9a7f 100644
--- a/drivers/input/mouse/pxa930_trkball.c
+++ b/drivers/input/mouse/pxa930_trkball.c
@@ -186,7 +186,7 @@ static int __devinit pxa930_trkball_probe(struct platform_device *pdev)
186 error = request_irq(irq, pxa930_trkball_interrupt, IRQF_DISABLED, 186 error = request_irq(irq, pxa930_trkball_interrupt, IRQF_DISABLED,
187 pdev->name, trkball); 187 pdev->name, trkball);
188 if (error) { 188 if (error) {
189 dev_err(&pdev->dev, "failed to request irq: %d\n", ret); 189 dev_err(&pdev->dev, "failed to request irq: %d\n", error);
190 goto failed_free_io; 190 goto failed_free_io;
191 } 191 }
192 192
@@ -227,7 +227,7 @@ failed_free_io:
227 iounmap(trkball->mmio_base); 227 iounmap(trkball->mmio_base);
228failed: 228failed:
229 kfree(trkball); 229 kfree(trkball);
230 return ret; 230 return error;
231} 231}
232 232
233static int __devexit pxa930_trkball_remove(struct platform_device *pdev) 233static int __devexit pxa930_trkball_remove(struct platform_device *pdev)
diff --git a/drivers/isdn/hardware/eicon/debuglib.h b/drivers/isdn/hardware/eicon/debuglib.h
index 016410cf2273..8ea587783e14 100644
--- a/drivers/isdn/hardware/eicon/debuglib.h
+++ b/drivers/isdn/hardware/eicon/debuglib.h
@@ -235,7 +235,7 @@ typedef void ( * DbgOld) (unsigned short, char *, va_list) ;
235typedef void ( * DbgEv) (unsigned short, unsigned long, va_list) ; 235typedef void ( * DbgEv) (unsigned short, unsigned long, va_list) ;
236typedef void ( * DbgIrq) (unsigned short, int, char *, va_list) ; 236typedef void ( * DbgIrq) (unsigned short, int, char *, va_list) ;
237typedef struct _DbgHandle_ 237typedef struct _DbgHandle_
238{ char Registered ; /* driver successfull registered */ 238{ char Registered ; /* driver successfully registered */
239#define DBG_HANDLE_REG_NEW 0x01 /* this (new) structure */ 239#define DBG_HANDLE_REG_NEW 0x01 /* this (new) structure */
240#define DBG_HANDLE_REG_OLD 0x7f /* old structure (see below) */ 240#define DBG_HANDLE_REG_OLD 0x7f /* old structure (see below) */
241 char Version; /* version of this structure */ 241 char Version; /* version of this structure */
diff --git a/drivers/isdn/hardware/eicon/os_4bri.c b/drivers/isdn/hardware/eicon/os_4bri.c
index 7b4ec3f60dbf..c964b8d91ada 100644
--- a/drivers/isdn/hardware/eicon/os_4bri.c
+++ b/drivers/isdn/hardware/eicon/os_4bri.c
@@ -997,7 +997,7 @@ diva_4bri_start_adapter(PISDN_ADAPTER IoAdapter,
997 diva_xdi_display_adapter_features(IoAdapter->ANum); 997 diva_xdi_display_adapter_features(IoAdapter->ANum);
998 998
999 for (i = 0; i < IoAdapter->tasks; i++) { 999 for (i = 0; i < IoAdapter->tasks; i++) {
1000 DBG_LOG(("A(%d) %s adapter successfull started", 1000 DBG_LOG(("A(%d) %s adapter successfully started",
1001 IoAdapter->QuadroList->QuadroAdapter[i]->ANum, 1001 IoAdapter->QuadroList->QuadroAdapter[i]->ANum,
1002 (IoAdapter->tasks == 1) ? "BRI 2.0" : "4BRI")) 1002 (IoAdapter->tasks == 1) ? "BRI 2.0" : "4BRI"))
1003 diva_xdi_didd_register_adapter(IoAdapter->QuadroList->QuadroAdapter[i]->ANum); 1003 diva_xdi_didd_register_adapter(IoAdapter->QuadroList->QuadroAdapter[i]->ANum);
diff --git a/drivers/isdn/hardware/eicon/os_bri.c b/drivers/isdn/hardware/eicon/os_bri.c
index f31bba5b16ff..08f01993f46b 100644
--- a/drivers/isdn/hardware/eicon/os_bri.c
+++ b/drivers/isdn/hardware/eicon/os_bri.c
@@ -736,7 +736,7 @@ diva_bri_start_adapter(PISDN_ADAPTER IoAdapter,
736 736
737 IoAdapter->Properties.Features = (word) features; 737 IoAdapter->Properties.Features = (word) features;
738 diva_xdi_display_adapter_features(IoAdapter->ANum); 738 diva_xdi_display_adapter_features(IoAdapter->ANum);
739 DBG_LOG(("A(%d) BRI adapter successfull started", IoAdapter->ANum)) 739 DBG_LOG(("A(%d) BRI adapter successfully started", IoAdapter->ANum))
740 /* 740 /*
741 Register with DIDD 741 Register with DIDD
742 */ 742 */
diff --git a/drivers/isdn/hardware/eicon/os_pri.c b/drivers/isdn/hardware/eicon/os_pri.c
index 903356547b79..5d65405c75f4 100644
--- a/drivers/isdn/hardware/eicon/os_pri.c
+++ b/drivers/isdn/hardware/eicon/os_pri.c
@@ -513,7 +513,7 @@ diva_pri_start_adapter(PISDN_ADAPTER IoAdapter,
513 513
514 diva_xdi_display_adapter_features(IoAdapter->ANum); 514 diva_xdi_display_adapter_features(IoAdapter->ANum);
515 515
516 DBG_LOG(("A(%d) PRI adapter successfull started", IoAdapter->ANum)) 516 DBG_LOG(("A(%d) PRI adapter successfully started", IoAdapter->ANum))
517 /* 517 /*
518 Register with DIDD 518 Register with DIDD
519 */ 519 */
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index ab7c8e4a61f9..719943763391 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -215,7 +215,6 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
215 /* choose a good rdev and read the page from there */ 215 /* choose a good rdev and read the page from there */
216 216
217 mdk_rdev_t *rdev; 217 mdk_rdev_t *rdev;
218 struct list_head *tmp;
219 sector_t target; 218 sector_t target;
220 219
221 if (!page) 220 if (!page)
@@ -223,7 +222,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
223 if (!page) 222 if (!page)
224 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
225 224
226 rdev_for_each(rdev, tmp, mddev) { 225 list_for_each_entry(rdev, &mddev->disks, same_set) {
227 if (! test_bit(In_sync, &rdev->flags) 226 if (! test_bit(In_sync, &rdev->flags)
228 || test_bit(Faulty, &rdev->flags)) 227 || test_bit(Faulty, &rdev->flags))
229 continue; 228 continue;
@@ -964,9 +963,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
964 */ 963 */
965 page = bitmap->sb_page; 964 page = bitmap->sb_page;
966 offset = sizeof(bitmap_super_t); 965 offset = sizeof(bitmap_super_t);
967 read_sb_page(bitmap->mddev, bitmap->offset, 966 if (!file)
968 page, 967 read_sb_page(bitmap->mddev,
969 index, count); 968 bitmap->offset,
969 page,
970 index, count);
970 } else if (file) { 971 } else if (file) {
971 page = read_page(file, index, bitmap, count); 972 page = read_page(file, index, bitmap, count);
972 offset = 0; 973 offset = 0;
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index f26c1f9a475b..86d9adf90e79 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -283,7 +283,6 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
283static int run(mddev_t *mddev) 283static int run(mddev_t *mddev)
284{ 284{
285 mdk_rdev_t *rdev; 285 mdk_rdev_t *rdev;
286 struct list_head *tmp;
287 int i; 286 int i;
288 287
289 conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); 288 conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL);
@@ -296,7 +295,7 @@ static int run(mddev_t *mddev)
296 } 295 }
297 conf->nfaults = 0; 296 conf->nfaults = 0;
298 297
299 rdev_for_each(rdev, tmp, mddev) 298 list_for_each_entry(rdev, &mddev->disks, same_set)
300 conf->rdev = rdev; 299 conf->rdev = rdev;
301 300
302 mddev->array_sectors = mddev->size * 2; 301 mddev->array_sectors = mddev->size * 2;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 3b90c5c924ec..1e3aea9eecf1 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -105,7 +105,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
105 int i, nb_zone, cnt; 105 int i, nb_zone, cnt;
106 sector_t min_sectors; 106 sector_t min_sectors;
107 sector_t curr_sector; 107 sector_t curr_sector;
108 struct list_head *tmp;
109 108
110 conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), 109 conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
111 GFP_KERNEL); 110 GFP_KERNEL);
@@ -115,7 +114,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
115 cnt = 0; 114 cnt = 0;
116 conf->array_sectors = 0; 115 conf->array_sectors = 0;
117 116
118 rdev_for_each(rdev, tmp, mddev) { 117 list_for_each_entry(rdev, &mddev->disks, same_set) {
119 int j = rdev->raid_disk; 118 int j = rdev->raid_disk;
120 dev_info_t *disk = conf->disks + j; 119 dev_info_t *disk = conf->disks + j;
121 120
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1b1d32694f6f..41e2509bf896 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -214,20 +214,33 @@ static inline mddev_t *mddev_get(mddev_t *mddev)
214 return mddev; 214 return mddev;
215} 215}
216 216
217static void mddev_delayed_delete(struct work_struct *ws)
218{
219 mddev_t *mddev = container_of(ws, mddev_t, del_work);
220 kobject_del(&mddev->kobj);
221 kobject_put(&mddev->kobj);
222}
223
217static void mddev_put(mddev_t *mddev) 224static void mddev_put(mddev_t *mddev)
218{ 225{
219 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) 226 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
220 return; 227 return;
221 if (!mddev->raid_disks && list_empty(&mddev->disks)) { 228 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
229 !mddev->hold_active) {
222 list_del(&mddev->all_mddevs); 230 list_del(&mddev->all_mddevs);
223 spin_unlock(&all_mddevs_lock); 231 if (mddev->gendisk) {
224 blk_cleanup_queue(mddev->queue); 232 /* we did a probe so need to clean up.
225 if (mddev->sysfs_state) 233 * Call schedule_work inside the spinlock
226 sysfs_put(mddev->sysfs_state); 234 * so that flush_scheduled_work() after
227 mddev->sysfs_state = NULL; 235 * mddev_find will succeed in waiting for the
228 kobject_put(&mddev->kobj); 236 * work to be done.
229 } else 237 */
230 spin_unlock(&all_mddevs_lock); 238 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
239 schedule_work(&mddev->del_work);
240 } else
241 kfree(mddev);
242 }
243 spin_unlock(&all_mddevs_lock);
231} 244}
232 245
233static mddev_t * mddev_find(dev_t unit) 246static mddev_t * mddev_find(dev_t unit)
@@ -236,15 +249,50 @@ static mddev_t * mddev_find(dev_t unit)
236 249
237 retry: 250 retry:
238 spin_lock(&all_mddevs_lock); 251 spin_lock(&all_mddevs_lock);
239 list_for_each_entry(mddev, &all_mddevs, all_mddevs) 252
240 if (mddev->unit == unit) { 253 if (unit) {
241 mddev_get(mddev); 254 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
255 if (mddev->unit == unit) {
256 mddev_get(mddev);
257 spin_unlock(&all_mddevs_lock);
258 kfree(new);
259 return mddev;
260 }
261
262 if (new) {
263 list_add(&new->all_mddevs, &all_mddevs);
242 spin_unlock(&all_mddevs_lock); 264 spin_unlock(&all_mddevs_lock);
243 kfree(new); 265 new->hold_active = UNTIL_IOCTL;
244 return mddev; 266 return new;
245 } 267 }
246 268 } else if (new) {
247 if (new) { 269 /* find an unused unit number */
270 static int next_minor = 512;
271 int start = next_minor;
272 int is_free = 0;
273 int dev = 0;
274 while (!is_free) {
275 dev = MKDEV(MD_MAJOR, next_minor);
276 next_minor++;
277 if (next_minor > MINORMASK)
278 next_minor = 0;
279 if (next_minor == start) {
280 /* Oh dear, all in use. */
281 spin_unlock(&all_mddevs_lock);
282 kfree(new);
283 return NULL;
284 }
285
286 is_free = 1;
287 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
288 if (mddev->unit == dev) {
289 is_free = 0;
290 break;
291 }
292 }
293 new->unit = dev;
294 new->md_minor = MINOR(dev);
295 new->hold_active = UNTIL_STOP;
248 list_add(&new->all_mddevs, &all_mddevs); 296 list_add(&new->all_mddevs, &all_mddevs);
249 spin_unlock(&all_mddevs_lock); 297 spin_unlock(&all_mddevs_lock);
250 return new; 298 return new;
@@ -275,16 +323,6 @@ static mddev_t * mddev_find(dev_t unit)
275 new->resync_max = MaxSector; 323 new->resync_max = MaxSector;
276 new->level = LEVEL_NONE; 324 new->level = LEVEL_NONE;
277 325
278 new->queue = blk_alloc_queue(GFP_KERNEL);
279 if (!new->queue) {
280 kfree(new);
281 return NULL;
282 }
283 /* Can be unlocked because the queue is new: no concurrency */
284 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
285
286 blk_queue_make_request(new->queue, md_fail_request);
287
288 goto retry; 326 goto retry;
289} 327}
290 328
@@ -307,25 +345,23 @@ static inline void mddev_unlock(mddev_t * mddev)
307 345
308static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) 346static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
309{ 347{
310 mdk_rdev_t * rdev; 348 mdk_rdev_t *rdev;
311 struct list_head *tmp;
312 349
313 rdev_for_each(rdev, tmp, mddev) { 350 list_for_each_entry(rdev, &mddev->disks, same_set)
314 if (rdev->desc_nr == nr) 351 if (rdev->desc_nr == nr)
315 return rdev; 352 return rdev;
316 } 353
317 return NULL; 354 return NULL;
318} 355}
319 356
320static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) 357static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
321{ 358{
322 struct list_head *tmp;
323 mdk_rdev_t *rdev; 359 mdk_rdev_t *rdev;
324 360
325 rdev_for_each(rdev, tmp, mddev) { 361 list_for_each_entry(rdev, &mddev->disks, same_set)
326 if (rdev->bdev->bd_dev == dev) 362 if (rdev->bdev->bd_dev == dev)
327 return rdev; 363 return rdev;
328 } 364
329 return NULL; 365 return NULL;
330} 366}
331 367
@@ -861,7 +897,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
861static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) 897static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
862{ 898{
863 mdp_super_t *sb; 899 mdp_super_t *sb;
864 struct list_head *tmp;
865 mdk_rdev_t *rdev2; 900 mdk_rdev_t *rdev2;
866 int next_spare = mddev->raid_disks; 901 int next_spare = mddev->raid_disks;
867 902
@@ -933,7 +968,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
933 sb->state |= (1<<MD_SB_BITMAP_PRESENT); 968 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
934 969
935 sb->disks[0].state = (1<<MD_DISK_REMOVED); 970 sb->disks[0].state = (1<<MD_DISK_REMOVED);
936 rdev_for_each(rdev2, tmp, mddev) { 971 list_for_each_entry(rdev2, &mddev->disks, same_set) {
937 mdp_disk_t *d; 972 mdp_disk_t *d;
938 int desc_nr; 973 int desc_nr;
939 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) 974 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
@@ -1259,7 +1294,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1259static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) 1294static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1260{ 1295{
1261 struct mdp_superblock_1 *sb; 1296 struct mdp_superblock_1 *sb;
1262 struct list_head *tmp;
1263 mdk_rdev_t *rdev2; 1297 mdk_rdev_t *rdev2;
1264 int max_dev, i; 1298 int max_dev, i;
1265 /* make rdev->sb match mddev and rdev data. */ 1299 /* make rdev->sb match mddev and rdev data. */
@@ -1307,7 +1341,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1307 } 1341 }
1308 1342
1309 max_dev = 0; 1343 max_dev = 0;
1310 rdev_for_each(rdev2, tmp, mddev) 1344 list_for_each_entry(rdev2, &mddev->disks, same_set)
1311 if (rdev2->desc_nr+1 > max_dev) 1345 if (rdev2->desc_nr+1 > max_dev)
1312 max_dev = rdev2->desc_nr+1; 1346 max_dev = rdev2->desc_nr+1;
1313 1347
@@ -1316,7 +1350,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1316 for (i=0; i<max_dev;i++) 1350 for (i=0; i<max_dev;i++)
1317 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1351 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1318 1352
1319 rdev_for_each(rdev2, tmp, mddev) { 1353 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1320 i = rdev2->desc_nr; 1354 i = rdev2->desc_nr;
1321 if (test_bit(Faulty, &rdev2->flags)) 1355 if (test_bit(Faulty, &rdev2->flags))
1322 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1356 sb->dev_roles[i] = cpu_to_le16(0xfffe);
@@ -1466,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1466 1500
1467 list_add_rcu(&rdev->same_set, &mddev->disks); 1501 list_add_rcu(&rdev->same_set, &mddev->disks);
1468 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); 1502 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1503
1504 /* May as well allow recovery to be retried once */
1505 mddev->recovery_disabled = 0;
1469 return 0; 1506 return 0;
1470 1507
1471 fail: 1508 fail:
@@ -1571,8 +1608,7 @@ static void kick_rdev_from_array(mdk_rdev_t * rdev)
1571 1608
1572static void export_array(mddev_t *mddev) 1609static void export_array(mddev_t *mddev)
1573{ 1610{
1574 struct list_head *tmp; 1611 mdk_rdev_t *rdev, *tmp;
1575 mdk_rdev_t *rdev;
1576 1612
1577 rdev_for_each(rdev, tmp, mddev) { 1613 rdev_for_each(rdev, tmp, mddev) {
1578 if (!rdev->mddev) { 1614 if (!rdev->mddev) {
@@ -1593,7 +1629,7 @@ static void print_desc(mdp_disk_t *desc)
1593 desc->major,desc->minor,desc->raid_disk,desc->state); 1629 desc->major,desc->minor,desc->raid_disk,desc->state);
1594} 1630}
1595 1631
1596static void print_sb(mdp_super_t *sb) 1632static void print_sb_90(mdp_super_t *sb)
1597{ 1633{
1598 int i; 1634 int i;
1599 1635
@@ -1624,10 +1660,57 @@ static void print_sb(mdp_super_t *sb)
1624 } 1660 }
1625 printk(KERN_INFO "md: THIS: "); 1661 printk(KERN_INFO "md: THIS: ");
1626 print_desc(&sb->this_disk); 1662 print_desc(&sb->this_disk);
1627
1628} 1663}
1629 1664
1630static void print_rdev(mdk_rdev_t *rdev) 1665static void print_sb_1(struct mdp_superblock_1 *sb)
1666{
1667 __u8 *uuid;
1668
1669 uuid = sb->set_uuid;
1670 printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
1671 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
1672 KERN_INFO "md: Name: \"%s\" CT:%llu\n",
1673 le32_to_cpu(sb->major_version),
1674 le32_to_cpu(sb->feature_map),
1675 uuid[0], uuid[1], uuid[2], uuid[3],
1676 uuid[4], uuid[5], uuid[6], uuid[7],
1677 uuid[8], uuid[9], uuid[10], uuid[11],
1678 uuid[12], uuid[13], uuid[14], uuid[15],
1679 sb->set_name,
1680 (unsigned long long)le64_to_cpu(sb->ctime)
1681 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
1682
1683 uuid = sb->device_uuid;
1684 printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
1685 " RO:%llu\n"
1686 KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
1687 ":%02x%02x%02x%02x%02x%02x\n"
1688 KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
1689 KERN_INFO "md: (MaxDev:%u) \n",
1690 le32_to_cpu(sb->level),
1691 (unsigned long long)le64_to_cpu(sb->size),
1692 le32_to_cpu(sb->raid_disks),
1693 le32_to_cpu(sb->layout),
1694 le32_to_cpu(sb->chunksize),
1695 (unsigned long long)le64_to_cpu(sb->data_offset),
1696 (unsigned long long)le64_to_cpu(sb->data_size),
1697 (unsigned long long)le64_to_cpu(sb->super_offset),
1698 (unsigned long long)le64_to_cpu(sb->recovery_offset),
1699 le32_to_cpu(sb->dev_number),
1700 uuid[0], uuid[1], uuid[2], uuid[3],
1701 uuid[4], uuid[5], uuid[6], uuid[7],
1702 uuid[8], uuid[9], uuid[10], uuid[11],
1703 uuid[12], uuid[13], uuid[14], uuid[15],
1704 sb->devflags,
1705 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
1706 (unsigned long long)le64_to_cpu(sb->events),
1707 (unsigned long long)le64_to_cpu(sb->resync_offset),
1708 le32_to_cpu(sb->sb_csum),
1709 le32_to_cpu(sb->max_dev)
1710 );
1711}
1712
1713static void print_rdev(mdk_rdev_t *rdev, int major_version)
1631{ 1714{
1632 char b[BDEVNAME_SIZE]; 1715 char b[BDEVNAME_SIZE];
1633 printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", 1716 printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
@@ -1635,15 +1718,22 @@ static void print_rdev(mdk_rdev_t *rdev)
1635 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), 1718 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
1636 rdev->desc_nr); 1719 rdev->desc_nr);
1637 if (rdev->sb_loaded) { 1720 if (rdev->sb_loaded) {
1638 printk(KERN_INFO "md: rdev superblock:\n"); 1721 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
1639 print_sb((mdp_super_t*)page_address(rdev->sb_page)); 1722 switch (major_version) {
1723 case 0:
1724 print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
1725 break;
1726 case 1:
1727 print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
1728 break;
1729 }
1640 } else 1730 } else
1641 printk(KERN_INFO "md: no rdev superblock!\n"); 1731 printk(KERN_INFO "md: no rdev superblock!\n");
1642} 1732}
1643 1733
1644static void md_print_devices(void) 1734static void md_print_devices(void)
1645{ 1735{
1646 struct list_head *tmp, *tmp2; 1736 struct list_head *tmp;
1647 mdk_rdev_t *rdev; 1737 mdk_rdev_t *rdev;
1648 mddev_t *mddev; 1738 mddev_t *mddev;
1649 char b[BDEVNAME_SIZE]; 1739 char b[BDEVNAME_SIZE];
@@ -1658,12 +1748,12 @@ static void md_print_devices(void)
1658 bitmap_print_sb(mddev->bitmap); 1748 bitmap_print_sb(mddev->bitmap);
1659 else 1749 else
1660 printk("%s: ", mdname(mddev)); 1750 printk("%s: ", mdname(mddev));
1661 rdev_for_each(rdev, tmp2, mddev) 1751 list_for_each_entry(rdev, &mddev->disks, same_set)
1662 printk("<%s>", bdevname(rdev->bdev,b)); 1752 printk("<%s>", bdevname(rdev->bdev,b));
1663 printk("\n"); 1753 printk("\n");
1664 1754
1665 rdev_for_each(rdev, tmp2, mddev) 1755 list_for_each_entry(rdev, &mddev->disks, same_set)
1666 print_rdev(rdev); 1756 print_rdev(rdev, mddev->major_version);
1667 } 1757 }
1668 printk("md: **********************************\n"); 1758 printk("md: **********************************\n");
1669 printk("\n"); 1759 printk("\n");
@@ -1679,9 +1769,8 @@ static void sync_sbs(mddev_t * mddev, int nospares)
1679 * with the rest of the array) 1769 * with the rest of the array)
1680 */ 1770 */
1681 mdk_rdev_t *rdev; 1771 mdk_rdev_t *rdev;
1682 struct list_head *tmp;
1683 1772
1684 rdev_for_each(rdev, tmp, mddev) { 1773 list_for_each_entry(rdev, &mddev->disks, same_set) {
1685 if (rdev->sb_events == mddev->events || 1774 if (rdev->sb_events == mddev->events ||
1686 (nospares && 1775 (nospares &&
1687 rdev->raid_disk < 0 && 1776 rdev->raid_disk < 0 &&
@@ -1699,7 +1788,6 @@ static void sync_sbs(mddev_t * mddev, int nospares)
1699 1788
1700static void md_update_sb(mddev_t * mddev, int force_change) 1789static void md_update_sb(mddev_t * mddev, int force_change)
1701{ 1790{
1702 struct list_head *tmp;
1703 mdk_rdev_t *rdev; 1791 mdk_rdev_t *rdev;
1704 int sync_req; 1792 int sync_req;
1705 int nospares = 0; 1793 int nospares = 0;
@@ -1790,7 +1878,7 @@ repeat:
1790 mdname(mddev),mddev->in_sync); 1878 mdname(mddev),mddev->in_sync);
1791 1879
1792 bitmap_update_sb(mddev->bitmap); 1880 bitmap_update_sb(mddev->bitmap);
1793 rdev_for_each(rdev, tmp, mddev) { 1881 list_for_each_entry(rdev, &mddev->disks, same_set) {
1794 char b[BDEVNAME_SIZE]; 1882 char b[BDEVNAME_SIZE];
1795 dprintk(KERN_INFO "md: "); 1883 dprintk(KERN_INFO "md: ");
1796 if (rdev->sb_loaded != 1) 1884 if (rdev->sb_loaded != 1)
@@ -1999,7 +2087,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1999 md_wakeup_thread(rdev->mddev->thread); 2087 md_wakeup_thread(rdev->mddev->thread);
2000 } else if (rdev->mddev->pers) { 2088 } else if (rdev->mddev->pers) {
2001 mdk_rdev_t *rdev2; 2089 mdk_rdev_t *rdev2;
2002 struct list_head *tmp;
2003 /* Activating a spare .. or possibly reactivating 2090 /* Activating a spare .. or possibly reactivating
2004 * if we every get bitmaps working here. 2091 * if we every get bitmaps working here.
2005 */ 2092 */
@@ -2010,7 +2097,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2010 if (rdev->mddev->pers->hot_add_disk == NULL) 2097 if (rdev->mddev->pers->hot_add_disk == NULL)
2011 return -EINVAL; 2098 return -EINVAL;
2012 2099
2013 rdev_for_each(rdev2, tmp, rdev->mddev) 2100 list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
2014 if (rdev2->raid_disk == slot) 2101 if (rdev2->raid_disk == slot)
2015 return -EEXIST; 2102 return -EEXIST;
2016 2103
@@ -2125,14 +2212,14 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2125 */ 2212 */
2126 mddev_t *mddev; 2213 mddev_t *mddev;
2127 int overlap = 0; 2214 int overlap = 0;
2128 struct list_head *tmp, *tmp2; 2215 struct list_head *tmp;
2129 2216
2130 mddev_unlock(my_mddev); 2217 mddev_unlock(my_mddev);
2131 for_each_mddev(mddev, tmp) { 2218 for_each_mddev(mddev, tmp) {
2132 mdk_rdev_t *rdev2; 2219 mdk_rdev_t *rdev2;
2133 2220
2134 mddev_lock(mddev); 2221 mddev_lock(mddev);
2135 rdev_for_each(rdev2, tmp2, mddev) 2222 list_for_each_entry(rdev2, &mddev->disks, same_set)
2136 if (test_bit(AllReserved, &rdev2->flags) || 2223 if (test_bit(AllReserved, &rdev2->flags) ||
2137 (rdev->bdev == rdev2->bdev && 2224 (rdev->bdev == rdev2->bdev &&
2138 rdev != rdev2 && 2225 rdev != rdev2 &&
@@ -2328,8 +2415,7 @@ abort_free:
2328static void analyze_sbs(mddev_t * mddev) 2415static void analyze_sbs(mddev_t * mddev)
2329{ 2416{
2330 int i; 2417 int i;
2331 struct list_head *tmp; 2418 mdk_rdev_t *rdev, *freshest, *tmp;
2332 mdk_rdev_t *rdev, *freshest;
2333 char b[BDEVNAME_SIZE]; 2419 char b[BDEVNAME_SIZE];
2334 2420
2335 freshest = NULL; 2421 freshest = NULL;
@@ -3046,7 +3132,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
3046 } 3132 }
3047 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3133 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3048 md_wakeup_thread(mddev->thread); 3134 md_wakeup_thread(mddev->thread);
3049 sysfs_notify(&mddev->kobj, NULL, "sync_action"); 3135 sysfs_notify_dirent(mddev->sysfs_action);
3050 return len; 3136 return len;
3051} 3137}
3052 3138
@@ -3404,6 +3490,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
3404 if (!capable(CAP_SYS_ADMIN)) 3490 if (!capable(CAP_SYS_ADMIN))
3405 return -EACCES; 3491 return -EACCES;
3406 rv = mddev_lock(mddev); 3492 rv = mddev_lock(mddev);
3493 if (mddev->hold_active == UNTIL_IOCTL)
3494 mddev->hold_active = 0;
3407 if (!rv) { 3495 if (!rv) {
3408 rv = entry->store(mddev, page, length); 3496 rv = entry->store(mddev, page, length);
3409 mddev_unlock(mddev); 3497 mddev_unlock(mddev);
@@ -3414,6 +3502,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
3414static void md_free(struct kobject *ko) 3502static void md_free(struct kobject *ko)
3415{ 3503{
3416 mddev_t *mddev = container_of(ko, mddev_t, kobj); 3504 mddev_t *mddev = container_of(ko, mddev_t, kobj);
3505
3506 if (mddev->sysfs_state)
3507 sysfs_put(mddev->sysfs_state);
3508
3509 if (mddev->gendisk) {
3510 del_gendisk(mddev->gendisk);
3511 put_disk(mddev->gendisk);
3512 }
3513 if (mddev->queue)
3514 blk_cleanup_queue(mddev->queue);
3515
3417 kfree(mddev); 3516 kfree(mddev);
3418} 3517}
3419 3518
@@ -3429,34 +3528,74 @@ static struct kobj_type md_ktype = {
3429 3528
3430int mdp_major = 0; 3529int mdp_major = 0;
3431 3530
3432static struct kobject *md_probe(dev_t dev, int *part, void *data) 3531static int md_alloc(dev_t dev, char *name)
3433{ 3532{
3434 static DEFINE_MUTEX(disks_mutex); 3533 static DEFINE_MUTEX(disks_mutex);
3435 mddev_t *mddev = mddev_find(dev); 3534 mddev_t *mddev = mddev_find(dev);
3436 struct gendisk *disk; 3535 struct gendisk *disk;
3437 int partitioned = (MAJOR(dev) != MD_MAJOR); 3536 int partitioned;
3438 int shift = partitioned ? MdpMinorShift : 0; 3537 int shift;
3439 int unit = MINOR(dev) >> shift; 3538 int unit;
3440 int error; 3539 int error;
3441 3540
3442 if (!mddev) 3541 if (!mddev)
3443 return NULL; 3542 return -ENODEV;
3543
3544 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
3545 shift = partitioned ? MdpMinorShift : 0;
3546 unit = MINOR(mddev->unit) >> shift;
3547
3548 /* wait for any previous instance if this device
3549 * to be completed removed (mddev_delayed_delete).
3550 */
3551 flush_scheduled_work();
3444 3552
3445 mutex_lock(&disks_mutex); 3553 mutex_lock(&disks_mutex);
3446 if (mddev->gendisk) { 3554 if (mddev->gendisk) {
3447 mutex_unlock(&disks_mutex); 3555 mutex_unlock(&disks_mutex);
3448 mddev_put(mddev); 3556 mddev_put(mddev);
3449 return NULL; 3557 return -EEXIST;
3558 }
3559
3560 if (name) {
3561 /* Need to ensure that 'name' is not a duplicate.
3562 */
3563 mddev_t *mddev2;
3564 spin_lock(&all_mddevs_lock);
3565
3566 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
3567 if (mddev2->gendisk &&
3568 strcmp(mddev2->gendisk->disk_name, name) == 0) {
3569 spin_unlock(&all_mddevs_lock);
3570 return -EEXIST;
3571 }
3572 spin_unlock(&all_mddevs_lock);
3573 }
3574
3575 mddev->queue = blk_alloc_queue(GFP_KERNEL);
3576 if (!mddev->queue) {
3577 mutex_unlock(&disks_mutex);
3578 mddev_put(mddev);
3579 return -ENOMEM;
3450 } 3580 }
3581 /* Can be unlocked because the queue is new: no concurrency */
3582 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
3583
3584 blk_queue_make_request(mddev->queue, md_fail_request);
3585
3451 disk = alloc_disk(1 << shift); 3586 disk = alloc_disk(1 << shift);
3452 if (!disk) { 3587 if (!disk) {
3453 mutex_unlock(&disks_mutex); 3588 mutex_unlock(&disks_mutex);
3589 blk_cleanup_queue(mddev->queue);
3590 mddev->queue = NULL;
3454 mddev_put(mddev); 3591 mddev_put(mddev);
3455 return NULL; 3592 return -ENOMEM;
3456 } 3593 }
3457 disk->major = MAJOR(dev); 3594 disk->major = MAJOR(mddev->unit);
3458 disk->first_minor = unit << shift; 3595 disk->first_minor = unit << shift;
3459 if (partitioned) 3596 if (name)
3597 strcpy(disk->disk_name, name);
3598 else if (partitioned)
3460 sprintf(disk->disk_name, "md_d%d", unit); 3599 sprintf(disk->disk_name, "md_d%d", unit);
3461 else 3600 else
3462 sprintf(disk->disk_name, "md%d", unit); 3601 sprintf(disk->disk_name, "md%d", unit);
@@ -3464,7 +3603,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
3464 disk->private_data = mddev; 3603 disk->private_data = mddev;
3465 disk->queue = mddev->queue; 3604 disk->queue = mddev->queue;
3466 /* Allow extended partitions. This makes the 3605 /* Allow extended partitions. This makes the
3467 * 'mdp' device redundant, but we can really 3606 * 'mdp' device redundant, but we can't really
3468 * remove it now. 3607 * remove it now.
3469 */ 3608 */
3470 disk->flags |= GENHD_FL_EXT_DEVT; 3609 disk->flags |= GENHD_FL_EXT_DEVT;
@@ -3480,9 +3619,35 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
3480 kobject_uevent(&mddev->kobj, KOBJ_ADD); 3619 kobject_uevent(&mddev->kobj, KOBJ_ADD);
3481 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); 3620 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
3482 } 3621 }
3622 mddev_put(mddev);
3623 return 0;
3624}
3625
3626static struct kobject *md_probe(dev_t dev, int *part, void *data)
3627{
3628 md_alloc(dev, NULL);
3483 return NULL; 3629 return NULL;
3484} 3630}
3485 3631
3632static int add_named_array(const char *val, struct kernel_param *kp)
3633{
3634 /* val must be "md_*" where * is not all digits.
3635 * We allocate an array with a large free minor number, and
3636 * set the name to val. val must not already be an active name.
3637 */
3638 int len = strlen(val);
3639 char buf[DISK_NAME_LEN];
3640
3641 while (len && val[len-1] == '\n')
3642 len--;
3643 if (len >= DISK_NAME_LEN)
3644 return -E2BIG;
3645 strlcpy(buf, val, len+1);
3646 if (strncmp(buf, "md_", 3) != 0)
3647 return -EINVAL;
3648 return md_alloc(0, buf);
3649}
3650
3486static void md_safemode_timeout(unsigned long data) 3651static void md_safemode_timeout(unsigned long data)
3487{ 3652{
3488 mddev_t *mddev = (mddev_t *) data; 3653 mddev_t *mddev = (mddev_t *) data;
@@ -3501,7 +3666,6 @@ static int do_md_run(mddev_t * mddev)
3501{ 3666{
3502 int err; 3667 int err;
3503 int chunk_size; 3668 int chunk_size;
3504 struct list_head *tmp;
3505 mdk_rdev_t *rdev; 3669 mdk_rdev_t *rdev;
3506 struct gendisk *disk; 3670 struct gendisk *disk;
3507 struct mdk_personality *pers; 3671 struct mdk_personality *pers;
@@ -3540,7 +3704,7 @@ static int do_md_run(mddev_t * mddev)
3540 } 3704 }
3541 3705
3542 /* devices must have minimum size of one chunk */ 3706 /* devices must have minimum size of one chunk */
3543 rdev_for_each(rdev, tmp, mddev) { 3707 list_for_each_entry(rdev, &mddev->disks, same_set) {
3544 if (test_bit(Faulty, &rdev->flags)) 3708 if (test_bit(Faulty, &rdev->flags))
3545 continue; 3709 continue;
3546 if (rdev->size < chunk_size / 1024) { 3710 if (rdev->size < chunk_size / 1024) {
@@ -3565,7 +3729,7 @@ static int do_md_run(mddev_t * mddev)
3565 * the only valid external interface is through the md 3729 * the only valid external interface is through the md
3566 * device. 3730 * device.
3567 */ 3731 */
3568 rdev_for_each(rdev, tmp, mddev) { 3732 list_for_each_entry(rdev, &mddev->disks, same_set) {
3569 if (test_bit(Faulty, &rdev->flags)) 3733 if (test_bit(Faulty, &rdev->flags))
3570 continue; 3734 continue;
3571 sync_blockdev(rdev->bdev); 3735 sync_blockdev(rdev->bdev);
@@ -3630,10 +3794,10 @@ static int do_md_run(mddev_t * mddev)
3630 */ 3794 */
3631 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; 3795 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
3632 mdk_rdev_t *rdev2; 3796 mdk_rdev_t *rdev2;
3633 struct list_head *tmp2;
3634 int warned = 0; 3797 int warned = 0;
3635 rdev_for_each(rdev, tmp, mddev) { 3798
3636 rdev_for_each(rdev2, tmp2, mddev) { 3799 list_for_each_entry(rdev, &mddev->disks, same_set)
3800 list_for_each_entry(rdev2, &mddev->disks, same_set) {
3637 if (rdev < rdev2 && 3801 if (rdev < rdev2 &&
3638 rdev->bdev->bd_contains == 3802 rdev->bdev->bd_contains ==
3639 rdev2->bdev->bd_contains) { 3803 rdev2->bdev->bd_contains) {
@@ -3647,7 +3811,7 @@ static int do_md_run(mddev_t * mddev)
3647 warned = 1; 3811 warned = 1;
3648 } 3812 }
3649 } 3813 }
3650 } 3814
3651 if (warned) 3815 if (warned)
3652 printk(KERN_WARNING 3816 printk(KERN_WARNING
3653 "True protection against single-disk" 3817 "True protection against single-disk"
@@ -3684,6 +3848,7 @@ static int do_md_run(mddev_t * mddev)
3684 printk(KERN_WARNING 3848 printk(KERN_WARNING
3685 "md: cannot register extra attributes for %s\n", 3849 "md: cannot register extra attributes for %s\n",
3686 mdname(mddev)); 3850 mdname(mddev));
3851 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3687 } else if (mddev->ro == 2) /* auto-readonly not meaningful */ 3852 } else if (mddev->ro == 2) /* auto-readonly not meaningful */
3688 mddev->ro = 0; 3853 mddev->ro = 0;
3689 3854
@@ -3694,7 +3859,7 @@ static int do_md_run(mddev_t * mddev)
3694 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ 3859 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
3695 mddev->in_sync = 1; 3860 mddev->in_sync = 1;
3696 3861
3697 rdev_for_each(rdev, tmp, mddev) 3862 list_for_each_entry(rdev, &mddev->disks, same_set)
3698 if (rdev->raid_disk >= 0) { 3863 if (rdev->raid_disk >= 0) {
3699 char nm[20]; 3864 char nm[20];
3700 sprintf(nm, "rd%d", rdev->raid_disk); 3865 sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3725,9 +3890,8 @@ static int do_md_run(mddev_t * mddev)
3725 * it will remove the drives and not do the right thing 3890 * it will remove the drives and not do the right thing
3726 */ 3891 */
3727 if (mddev->degraded && !mddev->sync_thread) { 3892 if (mddev->degraded && !mddev->sync_thread) {
3728 struct list_head *rtmp;
3729 int spares = 0; 3893 int spares = 0;
3730 rdev_for_each(rdev, rtmp, mddev) 3894 list_for_each_entry(rdev, &mddev->disks, same_set)
3731 if (rdev->raid_disk >= 0 && 3895 if (rdev->raid_disk >= 0 &&
3732 !test_bit(In_sync, &rdev->flags) && 3896 !test_bit(In_sync, &rdev->flags) &&
3733 !test_bit(Faulty, &rdev->flags)) 3897 !test_bit(Faulty, &rdev->flags))
@@ -3754,7 +3918,8 @@ static int do_md_run(mddev_t * mddev)
3754 mddev->changed = 1; 3918 mddev->changed = 1;
3755 md_new_event(mddev); 3919 md_new_event(mddev);
3756 sysfs_notify_dirent(mddev->sysfs_state); 3920 sysfs_notify_dirent(mddev->sysfs_state);
3757 sysfs_notify(&mddev->kobj, NULL, "sync_action"); 3921 if (mddev->sysfs_action)
3922 sysfs_notify_dirent(mddev->sysfs_action);
3758 sysfs_notify(&mddev->kobj, NULL, "degraded"); 3923 sysfs_notify(&mddev->kobj, NULL, "degraded");
3759 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 3924 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
3760 return 0; 3925 return 0;
@@ -3854,9 +4019,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
3854 mddev->queue->merge_bvec_fn = NULL; 4019 mddev->queue->merge_bvec_fn = NULL;
3855 mddev->queue->unplug_fn = NULL; 4020 mddev->queue->unplug_fn = NULL;
3856 mddev->queue->backing_dev_info.congested_fn = NULL; 4021 mddev->queue->backing_dev_info.congested_fn = NULL;
3857 if (mddev->pers->sync_request) 4022 if (mddev->pers->sync_request) {
3858 sysfs_remove_group(&mddev->kobj, &md_redundancy_group); 4023 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
3859 4024 if (mddev->sysfs_action)
4025 sysfs_put(mddev->sysfs_action);
4026 mddev->sysfs_action = NULL;
4027 }
3860 module_put(mddev->pers->owner); 4028 module_put(mddev->pers->owner);
3861 mddev->pers = NULL; 4029 mddev->pers = NULL;
3862 /* tell userspace to handle 'inactive' */ 4030 /* tell userspace to handle 'inactive' */
@@ -3883,7 +4051,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
3883 */ 4051 */
3884 if (mode == 0) { 4052 if (mode == 0) {
3885 mdk_rdev_t *rdev; 4053 mdk_rdev_t *rdev;
3886 struct list_head *tmp;
3887 4054
3888 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); 4055 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
3889 4056
@@ -3895,7 +4062,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
3895 } 4062 }
3896 mddev->bitmap_offset = 0; 4063 mddev->bitmap_offset = 0;
3897 4064
3898 rdev_for_each(rdev, tmp, mddev) 4065 list_for_each_entry(rdev, &mddev->disks, same_set)
3899 if (rdev->raid_disk >= 0) { 4066 if (rdev->raid_disk >= 0) {
3900 char nm[20]; 4067 char nm[20];
3901 sprintf(nm, "rd%d", rdev->raid_disk); 4068 sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3941,6 +4108,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
3941 mddev->barriers_work = 0; 4108 mddev->barriers_work = 0;
3942 mddev->safemode = 0; 4109 mddev->safemode = 0;
3943 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4110 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4111 if (mddev->hold_active == UNTIL_STOP)
4112 mddev->hold_active = 0;
3944 4113
3945 } else if (mddev->pers) 4114 } else if (mddev->pers)
3946 printk(KERN_INFO "md: %s switched to read-only mode.\n", 4115 printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -3956,7 +4125,6 @@ out:
3956static void autorun_array(mddev_t *mddev) 4125static void autorun_array(mddev_t *mddev)
3957{ 4126{
3958 mdk_rdev_t *rdev; 4127 mdk_rdev_t *rdev;
3959 struct list_head *tmp;
3960 int err; 4128 int err;
3961 4129
3962 if (list_empty(&mddev->disks)) 4130 if (list_empty(&mddev->disks))
@@ -3964,7 +4132,7 @@ static void autorun_array(mddev_t *mddev)
3964 4132
3965 printk(KERN_INFO "md: running: "); 4133 printk(KERN_INFO "md: running: ");
3966 4134
3967 rdev_for_each(rdev, tmp, mddev) { 4135 list_for_each_entry(rdev, &mddev->disks, same_set) {
3968 char b[BDEVNAME_SIZE]; 4136 char b[BDEVNAME_SIZE];
3969 printk("<%s>", bdevname(rdev->bdev,b)); 4137 printk("<%s>", bdevname(rdev->bdev,b));
3970 } 4138 }
@@ -3991,8 +4159,7 @@ static void autorun_array(mddev_t *mddev)
3991 */ 4159 */
3992static void autorun_devices(int part) 4160static void autorun_devices(int part)
3993{ 4161{
3994 struct list_head *tmp; 4162 mdk_rdev_t *rdev0, *rdev, *tmp;
3995 mdk_rdev_t *rdev0, *rdev;
3996 mddev_t *mddev; 4163 mddev_t *mddev;
3997 char b[BDEVNAME_SIZE]; 4164 char b[BDEVNAME_SIZE];
3998 4165
@@ -4007,7 +4174,7 @@ static void autorun_devices(int part)
4007 printk(KERN_INFO "md: considering %s ...\n", 4174 printk(KERN_INFO "md: considering %s ...\n",
4008 bdevname(rdev0->bdev,b)); 4175 bdevname(rdev0->bdev,b));
4009 INIT_LIST_HEAD(&candidates); 4176 INIT_LIST_HEAD(&candidates);
4010 rdev_for_each_list(rdev, tmp, pending_raid_disks) 4177 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
4011 if (super_90_load(rdev, rdev0, 0) >= 0) { 4178 if (super_90_load(rdev, rdev0, 0) >= 0) {
4012 printk(KERN_INFO "md: adding %s ...\n", 4179 printk(KERN_INFO "md: adding %s ...\n",
4013 bdevname(rdev->bdev,b)); 4180 bdevname(rdev->bdev,b));
@@ -4053,7 +4220,7 @@ static void autorun_devices(int part)
4053 } else { 4220 } else {
4054 printk(KERN_INFO "md: created %s\n", mdname(mddev)); 4221 printk(KERN_INFO "md: created %s\n", mdname(mddev));
4055 mddev->persistent = 1; 4222 mddev->persistent = 1;
4056 rdev_for_each_list(rdev, tmp, candidates) { 4223 rdev_for_each_list(rdev, tmp, &candidates) {
4057 list_del_init(&rdev->same_set); 4224 list_del_init(&rdev->same_set);
4058 if (bind_rdev_to_array(rdev, mddev)) 4225 if (bind_rdev_to_array(rdev, mddev))
4059 export_rdev(rdev); 4226 export_rdev(rdev);
@@ -4064,7 +4231,7 @@ static void autorun_devices(int part)
4064 /* on success, candidates will be empty, on error 4231 /* on success, candidates will be empty, on error
4065 * it won't... 4232 * it won't...
4066 */ 4233 */
4067 rdev_for_each_list(rdev, tmp, candidates) { 4234 rdev_for_each_list(rdev, tmp, &candidates) {
4068 list_del_init(&rdev->same_set); 4235 list_del_init(&rdev->same_set);
4069 export_rdev(rdev); 4236 export_rdev(rdev);
4070 } 4237 }
@@ -4093,10 +4260,9 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
4093 mdu_array_info_t info; 4260 mdu_array_info_t info;
4094 int nr,working,active,failed,spare; 4261 int nr,working,active,failed,spare;
4095 mdk_rdev_t *rdev; 4262 mdk_rdev_t *rdev;
4096 struct list_head *tmp;
4097 4263
4098 nr=working=active=failed=spare=0; 4264 nr=working=active=failed=spare=0;
4099 rdev_for_each(rdev, tmp, mddev) { 4265 list_for_each_entry(rdev, &mddev->disks, same_set) {
4100 nr++; 4266 nr++;
4101 if (test_bit(Faulty, &rdev->flags)) 4267 if (test_bit(Faulty, &rdev->flags))
4102 failed++; 4268 failed++;
@@ -4614,9 +4780,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
4614 4780
4615static int update_size(mddev_t *mddev, sector_t num_sectors) 4781static int update_size(mddev_t *mddev, sector_t num_sectors)
4616{ 4782{
4617 mdk_rdev_t * rdev; 4783 mdk_rdev_t *rdev;
4618 int rv; 4784 int rv;
4619 struct list_head *tmp;
4620 int fit = (num_sectors == 0); 4785 int fit = (num_sectors == 0);
4621 4786
4622 if (mddev->pers->resize == NULL) 4787 if (mddev->pers->resize == NULL)
@@ -4638,7 +4803,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
4638 * grow, and re-add. 4803 * grow, and re-add.
4639 */ 4804 */
4640 return -EBUSY; 4805 return -EBUSY;
4641 rdev_for_each(rdev, tmp, mddev) { 4806 list_for_each_entry(rdev, &mddev->disks, same_set) {
4642 sector_t avail; 4807 sector_t avail;
4643 avail = rdev->size * 2; 4808 avail = rdev->size * 2;
4644 4809
@@ -5000,6 +5165,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
5000 5165
5001done_unlock: 5166done_unlock:
5002abort_unlock: 5167abort_unlock:
5168 if (mddev->hold_active == UNTIL_IOCTL &&
5169 err != -EINVAL)
5170 mddev->hold_active = 0;
5003 mddev_unlock(mddev); 5171 mddev_unlock(mddev);
5004 5172
5005 return err; 5173 return err;
@@ -5016,14 +5184,25 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5016 * Succeed if we can lock the mddev, which confirms that 5184 * Succeed if we can lock the mddev, which confirms that
5017 * it isn't being stopped right now. 5185 * it isn't being stopped right now.
5018 */ 5186 */
5019 mddev_t *mddev = bdev->bd_disk->private_data; 5187 mddev_t *mddev = mddev_find(bdev->bd_dev);
5020 int err; 5188 int err;
5021 5189
5190 if (mddev->gendisk != bdev->bd_disk) {
5191 /* we are racing with mddev_put which is discarding this
5192 * bd_disk.
5193 */
5194 mddev_put(mddev);
5195 /* Wait until bdev->bd_disk is definitely gone */
5196 flush_scheduled_work();
5197 /* Then retry the open from the top */
5198 return -ERESTARTSYS;
5199 }
5200 BUG_ON(mddev != bdev->bd_disk->private_data);
5201
5022 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5202 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
5023 goto out; 5203 goto out;
5024 5204
5025 err = 0; 5205 err = 0;
5026 mddev_get(mddev);
5027 atomic_inc(&mddev->openers); 5206 atomic_inc(&mddev->openers);
5028 mddev_unlock(mddev); 5207 mddev_unlock(mddev);
5029 5208
@@ -5187,11 +5366,10 @@ static void status_unused(struct seq_file *seq)
5187{ 5366{
5188 int i = 0; 5367 int i = 0;
5189 mdk_rdev_t *rdev; 5368 mdk_rdev_t *rdev;
5190 struct list_head *tmp;
5191 5369
5192 seq_printf(seq, "unused devices: "); 5370 seq_printf(seq, "unused devices: ");
5193 5371
5194 rdev_for_each_list(rdev, tmp, pending_raid_disks) { 5372 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
5195 char b[BDEVNAME_SIZE]; 5373 char b[BDEVNAME_SIZE];
5196 i++; 5374 i++;
5197 seq_printf(seq, "%s ", 5375 seq_printf(seq, "%s ",
@@ -5350,7 +5528,6 @@ static int md_seq_show(struct seq_file *seq, void *v)
5350{ 5528{
5351 mddev_t *mddev = v; 5529 mddev_t *mddev = v;
5352 sector_t size; 5530 sector_t size;
5353 struct list_head *tmp2;
5354 mdk_rdev_t *rdev; 5531 mdk_rdev_t *rdev;
5355 struct mdstat_info *mi = seq->private; 5532 struct mdstat_info *mi = seq->private;
5356 struct bitmap *bitmap; 5533 struct bitmap *bitmap;
@@ -5387,7 +5564,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
5387 } 5564 }
5388 5565
5389 size = 0; 5566 size = 0;
5390 rdev_for_each(rdev, tmp2, mddev) { 5567 list_for_each_entry(rdev, &mddev->disks, same_set) {
5391 char b[BDEVNAME_SIZE]; 5568 char b[BDEVNAME_SIZE];
5392 seq_printf(seq, " %s[%d]", 5569 seq_printf(seq, " %s[%d]",
5393 bdevname(rdev->bdev,b), rdev->desc_nr); 5570 bdevname(rdev->bdev,b), rdev->desc_nr);
@@ -5694,7 +5871,6 @@ void md_do_sync(mddev_t *mddev)
5694 struct list_head *tmp; 5871 struct list_head *tmp;
5695 sector_t last_check; 5872 sector_t last_check;
5696 int skipped = 0; 5873 int skipped = 0;
5697 struct list_head *rtmp;
5698 mdk_rdev_t *rdev; 5874 mdk_rdev_t *rdev;
5699 char *desc; 5875 char *desc;
5700 5876
@@ -5799,7 +5975,7 @@ void md_do_sync(mddev_t *mddev)
5799 /* recovery follows the physical size of devices */ 5975 /* recovery follows the physical size of devices */
5800 max_sectors = mddev->size << 1; 5976 max_sectors = mddev->size << 1;
5801 j = MaxSector; 5977 j = MaxSector;
5802 rdev_for_each(rdev, rtmp, mddev) 5978 list_for_each_entry(rdev, &mddev->disks, same_set)
5803 if (rdev->raid_disk >= 0 && 5979 if (rdev->raid_disk >= 0 &&
5804 !test_bit(Faulty, &rdev->flags) && 5980 !test_bit(Faulty, &rdev->flags) &&
5805 !test_bit(In_sync, &rdev->flags) && 5981 !test_bit(In_sync, &rdev->flags) &&
@@ -5949,7 +6125,7 @@ void md_do_sync(mddev_t *mddev)
5949 } else { 6125 } else {
5950 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 6126 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5951 mddev->curr_resync = MaxSector; 6127 mddev->curr_resync = MaxSector;
5952 rdev_for_each(rdev, rtmp, mddev) 6128 list_for_each_entry(rdev, &mddev->disks, same_set)
5953 if (rdev->raid_disk >= 0 && 6129 if (rdev->raid_disk >= 0 &&
5954 !test_bit(Faulty, &rdev->flags) && 6130 !test_bit(Faulty, &rdev->flags) &&
5955 !test_bit(In_sync, &rdev->flags) && 6131 !test_bit(In_sync, &rdev->flags) &&
@@ -5985,10 +6161,9 @@ EXPORT_SYMBOL_GPL(md_do_sync);
5985static int remove_and_add_spares(mddev_t *mddev) 6161static int remove_and_add_spares(mddev_t *mddev)
5986{ 6162{
5987 mdk_rdev_t *rdev; 6163 mdk_rdev_t *rdev;
5988 struct list_head *rtmp;
5989 int spares = 0; 6164 int spares = 0;
5990 6165
5991 rdev_for_each(rdev, rtmp, mddev) 6166 list_for_each_entry(rdev, &mddev->disks, same_set)
5992 if (rdev->raid_disk >= 0 && 6167 if (rdev->raid_disk >= 0 &&
5993 !test_bit(Blocked, &rdev->flags) && 6168 !test_bit(Blocked, &rdev->flags) &&
5994 (test_bit(Faulty, &rdev->flags) || 6169 (test_bit(Faulty, &rdev->flags) ||
@@ -6003,8 +6178,8 @@ static int remove_and_add_spares(mddev_t *mddev)
6003 } 6178 }
6004 } 6179 }
6005 6180
6006 if (mddev->degraded && ! mddev->ro) { 6181 if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
6007 rdev_for_each(rdev, rtmp, mddev) { 6182 list_for_each_entry(rdev, &mddev->disks, same_set) {
6008 if (rdev->raid_disk >= 0 && 6183 if (rdev->raid_disk >= 0 &&
6009 !test_bit(In_sync, &rdev->flags) && 6184 !test_bit(In_sync, &rdev->flags) &&
6010 !test_bit(Blocked, &rdev->flags)) 6185 !test_bit(Blocked, &rdev->flags))
@@ -6056,7 +6231,6 @@ static int remove_and_add_spares(mddev_t *mddev)
6056void md_check_recovery(mddev_t *mddev) 6231void md_check_recovery(mddev_t *mddev)
6057{ 6232{
6058 mdk_rdev_t *rdev; 6233 mdk_rdev_t *rdev;
6059 struct list_head *rtmp;
6060 6234
6061 6235
6062 if (mddev->bitmap) 6236 if (mddev->bitmap)
@@ -6120,7 +6294,7 @@ void md_check_recovery(mddev_t *mddev)
6120 if (mddev->flags) 6294 if (mddev->flags)
6121 md_update_sb(mddev, 0); 6295 md_update_sb(mddev, 0);
6122 6296
6123 rdev_for_each(rdev, rtmp, mddev) 6297 list_for_each_entry(rdev, &mddev->disks, same_set)
6124 if (test_and_clear_bit(StateChanged, &rdev->flags)) 6298 if (test_and_clear_bit(StateChanged, &rdev->flags))
6125 sysfs_notify_dirent(rdev->sysfs_state); 6299 sysfs_notify_dirent(rdev->sysfs_state);
6126 6300
@@ -6149,13 +6323,13 @@ void md_check_recovery(mddev_t *mddev)
6149 * information must be scrapped 6323 * information must be scrapped
6150 */ 6324 */
6151 if (!mddev->degraded) 6325 if (!mddev->degraded)
6152 rdev_for_each(rdev, rtmp, mddev) 6326 list_for_each_entry(rdev, &mddev->disks, same_set)
6153 rdev->saved_raid_disk = -1; 6327 rdev->saved_raid_disk = -1;
6154 6328
6155 mddev->recovery = 0; 6329 mddev->recovery = 0;
6156 /* flag recovery needed just to double check */ 6330 /* flag recovery needed just to double check */
6157 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 6331 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6158 sysfs_notify(&mddev->kobj, NULL, "sync_action"); 6332 sysfs_notify_dirent(mddev->sysfs_action);
6159 md_new_event(mddev); 6333 md_new_event(mddev);
6160 goto unlock; 6334 goto unlock;
6161 } 6335 }
@@ -6216,7 +6390,7 @@ void md_check_recovery(mddev_t *mddev)
6216 mddev->recovery = 0; 6390 mddev->recovery = 0;
6217 } else 6391 } else
6218 md_wakeup_thread(mddev->sync_thread); 6392 md_wakeup_thread(mddev->sync_thread);
6219 sysfs_notify(&mddev->kobj, NULL, "sync_action"); 6393 sysfs_notify_dirent(mddev->sysfs_action);
6220 md_new_event(mddev); 6394 md_new_event(mddev);
6221 } 6395 }
6222 unlock: 6396 unlock:
@@ -6224,7 +6398,8 @@ void md_check_recovery(mddev_t *mddev)
6224 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 6398 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
6225 if (test_and_clear_bit(MD_RECOVERY_RECOVER, 6399 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
6226 &mddev->recovery)) 6400 &mddev->recovery))
6227 sysfs_notify(&mddev->kobj, NULL, "sync_action"); 6401 if (mddev->sysfs_action)
6402 sysfs_notify_dirent(mddev->sysfs_action);
6228 } 6403 }
6229 mddev_unlock(mddev); 6404 mddev_unlock(mddev);
6230 } 6405 }
@@ -6386,14 +6561,8 @@ static __exit void md_exit(void)
6386 unregister_sysctl_table(raid_table_header); 6561 unregister_sysctl_table(raid_table_header);
6387 remove_proc_entry("mdstat", NULL); 6562 remove_proc_entry("mdstat", NULL);
6388 for_each_mddev(mddev, tmp) { 6563 for_each_mddev(mddev, tmp) {
6389 struct gendisk *disk = mddev->gendisk;
6390 if (!disk)
6391 continue;
6392 export_array(mddev); 6564 export_array(mddev);
6393 del_gendisk(disk); 6565 mddev->hold_active = 0;
6394 put_disk(disk);
6395 mddev->gendisk = NULL;
6396 mddev_put(mddev);
6397 } 6566 }
6398} 6567}
6399 6568
@@ -6418,6 +6587,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
6418module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); 6587module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
6419module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); 6588module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
6420 6589
6590module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
6421 6591
6422EXPORT_SYMBOL(register_md_personality); 6592EXPORT_SYMBOL(register_md_personality);
6423EXPORT_SYMBOL(unregister_md_personality); 6593EXPORT_SYMBOL(unregister_md_personality);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index d4ac47d11279..f6d08f241671 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -408,7 +408,6 @@ static int multipath_run (mddev_t *mddev)
408 int disk_idx; 408 int disk_idx;
409 struct multipath_info *disk; 409 struct multipath_info *disk;
410 mdk_rdev_t *rdev; 410 mdk_rdev_t *rdev;
411 struct list_head *tmp;
412 411
413 if (mddev->level != LEVEL_MULTIPATH) { 412 if (mddev->level != LEVEL_MULTIPATH) {
414 printk("multipath: %s: raid level not set to multipath IO (%d)\n", 413 printk("multipath: %s: raid level not set to multipath IO (%d)\n",
@@ -441,7 +440,7 @@ static int multipath_run (mddev_t *mddev)
441 } 440 }
442 441
443 conf->working_disks = 0; 442 conf->working_disks = 0;
444 rdev_for_each(rdev, tmp, mddev) { 443 list_for_each_entry(rdev, &mddev->disks, same_set) {
445 disk_idx = rdev->raid_disk; 444 disk_idx = rdev->raid_disk;
446 if (disk_idx < 0 || 445 if (disk_idx < 0 ||
447 disk_idx >= mddev->raid_disks) 446 disk_idx >= mddev->raid_disks)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 8ac6488ad0dc..c605ba805586 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -53,11 +53,10 @@ static int raid0_congested(void *data, int bits)
53static int create_strip_zones (mddev_t *mddev) 53static int create_strip_zones (mddev_t *mddev)
54{ 54{
55 int i, c, j; 55 int i, c, j;
56 sector_t current_offset, curr_zone_offset; 56 sector_t current_start, curr_zone_start;
57 sector_t min_spacing; 57 sector_t min_spacing;
58 raid0_conf_t *conf = mddev_to_conf(mddev); 58 raid0_conf_t *conf = mddev_to_conf(mddev);
59 mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; 59 mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev;
60 struct list_head *tmp1, *tmp2;
61 struct strip_zone *zone; 60 struct strip_zone *zone;
62 int cnt; 61 int cnt;
63 char b[BDEVNAME_SIZE]; 62 char b[BDEVNAME_SIZE];
@@ -67,19 +66,19 @@ static int create_strip_zones (mddev_t *mddev)
67 */ 66 */
68 conf->nr_strip_zones = 0; 67 conf->nr_strip_zones = 0;
69 68
70 rdev_for_each(rdev1, tmp1, mddev) { 69 list_for_each_entry(rdev1, &mddev->disks, same_set) {
71 printk("raid0: looking at %s\n", 70 printk(KERN_INFO "raid0: looking at %s\n",
72 bdevname(rdev1->bdev,b)); 71 bdevname(rdev1->bdev,b));
73 c = 0; 72 c = 0;
74 rdev_for_each(rdev2, tmp2, mddev) { 73 list_for_each_entry(rdev2, &mddev->disks, same_set) {
75 printk("raid0: comparing %s(%llu)", 74 printk(KERN_INFO "raid0: comparing %s(%llu)",
76 bdevname(rdev1->bdev,b), 75 bdevname(rdev1->bdev,b),
77 (unsigned long long)rdev1->size); 76 (unsigned long long)rdev1->size);
78 printk(" with %s(%llu)\n", 77 printk(KERN_INFO " with %s(%llu)\n",
79 bdevname(rdev2->bdev,b), 78 bdevname(rdev2->bdev,b),
80 (unsigned long long)rdev2->size); 79 (unsigned long long)rdev2->size);
81 if (rdev2 == rdev1) { 80 if (rdev2 == rdev1) {
82 printk("raid0: END\n"); 81 printk(KERN_INFO "raid0: END\n");
83 break; 82 break;
84 } 83 }
85 if (rdev2->size == rdev1->size) 84 if (rdev2->size == rdev1->size)
@@ -88,19 +87,20 @@ static int create_strip_zones (mddev_t *mddev)
88 * Not unique, don't count it as a new 87 * Not unique, don't count it as a new
89 * group 88 * group
90 */ 89 */
91 printk("raid0: EQUAL\n"); 90 printk(KERN_INFO "raid0: EQUAL\n");
92 c = 1; 91 c = 1;
93 break; 92 break;
94 } 93 }
95 printk("raid0: NOT EQUAL\n"); 94 printk(KERN_INFO "raid0: NOT EQUAL\n");
96 } 95 }
97 if (!c) { 96 if (!c) {
98 printk("raid0: ==> UNIQUE\n"); 97 printk(KERN_INFO "raid0: ==> UNIQUE\n");
99 conf->nr_strip_zones++; 98 conf->nr_strip_zones++;
100 printk("raid0: %d zones\n", conf->nr_strip_zones); 99 printk(KERN_INFO "raid0: %d zones\n",
100 conf->nr_strip_zones);
101 } 101 }
102 } 102 }
103 printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); 103 printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones);
104 104
105 conf->strip_zone = kzalloc(sizeof(struct strip_zone)* 105 conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
106 conf->nr_strip_zones, GFP_KERNEL); 106 conf->nr_strip_zones, GFP_KERNEL);
@@ -119,16 +119,17 @@ static int create_strip_zones (mddev_t *mddev)
119 cnt = 0; 119 cnt = 0;
120 smallest = NULL; 120 smallest = NULL;
121 zone->dev = conf->devlist; 121 zone->dev = conf->devlist;
122 rdev_for_each(rdev1, tmp1, mddev) { 122 list_for_each_entry(rdev1, &mddev->disks, same_set) {
123 int j = rdev1->raid_disk; 123 int j = rdev1->raid_disk;
124 124
125 if (j < 0 || j >= mddev->raid_disks) { 125 if (j < 0 || j >= mddev->raid_disks) {
126 printk("raid0: bad disk number %d - aborting!\n", j); 126 printk(KERN_ERR "raid0: bad disk number %d - "
127 "aborting!\n", j);
127 goto abort; 128 goto abort;
128 } 129 }
129 if (zone->dev[j]) { 130 if (zone->dev[j]) {
130 printk("raid0: multiple devices for %d - aborting!\n", 131 printk(KERN_ERR "raid0: multiple devices for %d - "
131 j); 132 "aborting!\n", j);
132 goto abort; 133 goto abort;
133 } 134 }
134 zone->dev[j] = rdev1; 135 zone->dev[j] = rdev1;
@@ -149,16 +150,16 @@ static int create_strip_zones (mddev_t *mddev)
149 cnt++; 150 cnt++;
150 } 151 }
151 if (cnt != mddev->raid_disks) { 152 if (cnt != mddev->raid_disks) {
152 printk("raid0: too few disks (%d of %d) - aborting!\n", 153 printk(KERN_ERR "raid0: too few disks (%d of %d) - "
153 cnt, mddev->raid_disks); 154 "aborting!\n", cnt, mddev->raid_disks);
154 goto abort; 155 goto abort;
155 } 156 }
156 zone->nb_dev = cnt; 157 zone->nb_dev = cnt;
157 zone->size = smallest->size * cnt; 158 zone->sectors = smallest->size * cnt * 2;
158 zone->zone_offset = 0; 159 zone->zone_start = 0;
159 160
160 current_offset = smallest->size; 161 current_start = smallest->size * 2;
161 curr_zone_offset = zone->size; 162 curr_zone_start = zone->sectors;
162 163
163 /* now do the other zones */ 164 /* now do the other zones */
164 for (i = 1; i < conf->nr_strip_zones; i++) 165 for (i = 1; i < conf->nr_strip_zones; i++)
@@ -166,40 +167,41 @@ static int create_strip_zones (mddev_t *mddev)
166 zone = conf->strip_zone + i; 167 zone = conf->strip_zone + i;
167 zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; 168 zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks;
168 169
169 printk("raid0: zone %d\n", i); 170 printk(KERN_INFO "raid0: zone %d\n", i);
170 zone->dev_offset = current_offset; 171 zone->dev_start = current_start;
171 smallest = NULL; 172 smallest = NULL;
172 c = 0; 173 c = 0;
173 174
174 for (j=0; j<cnt; j++) { 175 for (j=0; j<cnt; j++) {
175 char b[BDEVNAME_SIZE]; 176 char b[BDEVNAME_SIZE];
176 rdev = conf->strip_zone[0].dev[j]; 177 rdev = conf->strip_zone[0].dev[j];
177 printk("raid0: checking %s ...", bdevname(rdev->bdev,b)); 178 printk(KERN_INFO "raid0: checking %s ...",
178 if (rdev->size > current_offset) 179 bdevname(rdev->bdev, b));
179 { 180 if (rdev->size > current_start / 2) {
180 printk(" contained as device %d\n", c); 181 printk(KERN_INFO " contained as device %d\n",
182 c);
181 zone->dev[c] = rdev; 183 zone->dev[c] = rdev;
182 c++; 184 c++;
183 if (!smallest || (rdev->size <smallest->size)) { 185 if (!smallest || (rdev->size <smallest->size)) {
184 smallest = rdev; 186 smallest = rdev;
185 printk(" (%llu) is smallest!.\n", 187 printk(KERN_INFO " (%llu) is smallest!.\n",
186 (unsigned long long)rdev->size); 188 (unsigned long long)rdev->size);
187 } 189 }
188 } else 190 } else
189 printk(" nope.\n"); 191 printk(KERN_INFO " nope.\n");
190 } 192 }
191 193
192 zone->nb_dev = c; 194 zone->nb_dev = c;
193 zone->size = (smallest->size - current_offset) * c; 195 zone->sectors = (smallest->size * 2 - current_start) * c;
194 printk("raid0: zone->nb_dev: %d, size: %llu\n", 196 printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
195 zone->nb_dev, (unsigned long long)zone->size); 197 zone->nb_dev, (unsigned long long)zone->sectors);
196 198
197 zone->zone_offset = curr_zone_offset; 199 zone->zone_start = curr_zone_start;
198 curr_zone_offset += zone->size; 200 curr_zone_start += zone->sectors;
199 201
200 current_offset = smallest->size; 202 current_start = smallest->size * 2;
201 printk("raid0: current zone offset: %llu\n", 203 printk(KERN_INFO "raid0: current zone start: %llu\n",
202 (unsigned long long)current_offset); 204 (unsigned long long)current_start);
203 } 205 }
204 206
205 /* Now find appropriate hash spacing. 207 /* Now find appropriate hash spacing.
@@ -210,16 +212,16 @@ static int create_strip_zones (mddev_t *mddev)
210 * strip though as it's size has no bearing on the efficacy of the hash 212 * strip though as it's size has no bearing on the efficacy of the hash
211 * table. 213 * table.
212 */ 214 */
213 conf->hash_spacing = curr_zone_offset; 215 conf->spacing = curr_zone_start;
214 min_spacing = curr_zone_offset; 216 min_spacing = curr_zone_start;
215 sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); 217 sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*));
216 for (i=0; i < conf->nr_strip_zones-1; i++) { 218 for (i=0; i < conf->nr_strip_zones-1; i++) {
217 sector_t sz = 0; 219 sector_t s = 0;
218 for (j=i; j<conf->nr_strip_zones-1 && 220 for (j = i; j < conf->nr_strip_zones - 1 &&
219 sz < min_spacing ; j++) 221 s < min_spacing; j++)
220 sz += conf->strip_zone[j].size; 222 s += conf->strip_zone[j].sectors;
221 if (sz >= min_spacing && sz < conf->hash_spacing) 223 if (s >= min_spacing && s < conf->spacing)
222 conf->hash_spacing = sz; 224 conf->spacing = s;
223 } 225 }
224 226
225 mddev->queue->unplug_fn = raid0_unplug; 227 mddev->queue->unplug_fn = raid0_unplug;
@@ -227,7 +229,7 @@ static int create_strip_zones (mddev_t *mddev)
227 mddev->queue->backing_dev_info.congested_fn = raid0_congested; 229 mddev->queue->backing_dev_info.congested_fn = raid0_congested;
228 mddev->queue->backing_dev_info.congested_data = mddev; 230 mddev->queue->backing_dev_info.congested_data = mddev;
229 231
230 printk("raid0: done.\n"); 232 printk(KERN_INFO "raid0: done.\n");
231 return 0; 233 return 0;
232 abort: 234 abort:
233 return 1; 235 return 1;
@@ -262,10 +264,9 @@ static int raid0_mergeable_bvec(struct request_queue *q,
262static int raid0_run (mddev_t *mddev) 264static int raid0_run (mddev_t *mddev)
263{ 265{
264 unsigned cur=0, i=0, nb_zone; 266 unsigned cur=0, i=0, nb_zone;
265 s64 size; 267 s64 sectors;
266 raid0_conf_t *conf; 268 raid0_conf_t *conf;
267 mdk_rdev_t *rdev; 269 mdk_rdev_t *rdev;
268 struct list_head *tmp;
269 270
270 if (mddev->chunk_size == 0) { 271 if (mddev->chunk_size == 0) {
271 printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); 272 printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
@@ -291,54 +292,54 @@ static int raid0_run (mddev_t *mddev)
291 292
292 /* calculate array device size */ 293 /* calculate array device size */
293 mddev->array_sectors = 0; 294 mddev->array_sectors = 0;
294 rdev_for_each(rdev, tmp, mddev) 295 list_for_each_entry(rdev, &mddev->disks, same_set)
295 mddev->array_sectors += rdev->size * 2; 296 mddev->array_sectors += rdev->size * 2;
296 297
297 printk("raid0 : md_size is %llu blocks.\n", 298 printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
298 (unsigned long long)mddev->array_sectors / 2); 299 (unsigned long long)mddev->array_sectors);
299 printk("raid0 : conf->hash_spacing is %llu blocks.\n", 300 printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
300 (unsigned long long)conf->hash_spacing); 301 (unsigned long long)conf->spacing);
301 { 302 {
302 sector_t s = mddev->array_sectors / 2; 303 sector_t s = mddev->array_sectors;
303 sector_t space = conf->hash_spacing; 304 sector_t space = conf->spacing;
304 int round; 305 int round;
305 conf->preshift = 0; 306 conf->sector_shift = 0;
306 if (sizeof(sector_t) > sizeof(u32)) { 307 if (sizeof(sector_t) > sizeof(u32)) {
307 /*shift down space and s so that sector_div will work */ 308 /*shift down space and s so that sector_div will work */
308 while (space > (sector_t) (~(u32)0)) { 309 while (space > (sector_t) (~(u32)0)) {
309 s >>= 1; 310 s >>= 1;
310 space >>= 1; 311 space >>= 1;
311 s += 1; /* force round-up */ 312 s += 1; /* force round-up */
312 conf->preshift++; 313 conf->sector_shift++;
313 } 314 }
314 } 315 }
315 round = sector_div(s, (u32)space) ? 1 : 0; 316 round = sector_div(s, (u32)space) ? 1 : 0;
316 nb_zone = s + round; 317 nb_zone = s + round;
317 } 318 }
318 printk("raid0 : nb_zone is %d.\n", nb_zone); 319 printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone);
319 320
320 printk("raid0 : Allocating %Zd bytes for hash.\n", 321 printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n",
321 nb_zone*sizeof(struct strip_zone*)); 322 nb_zone*sizeof(struct strip_zone*));
322 conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); 323 conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL);
323 if (!conf->hash_table) 324 if (!conf->hash_table)
324 goto out_free_conf; 325 goto out_free_conf;
325 size = conf->strip_zone[cur].size; 326 sectors = conf->strip_zone[cur].sectors;
326 327
327 conf->hash_table[0] = conf->strip_zone + cur; 328 conf->hash_table[0] = conf->strip_zone + cur;
328 for (i=1; i< nb_zone; i++) { 329 for (i=1; i< nb_zone; i++) {
329 while (size <= conf->hash_spacing) { 330 while (sectors <= conf->spacing) {
330 cur++; 331 cur++;
331 size += conf->strip_zone[cur].size; 332 sectors += conf->strip_zone[cur].sectors;
332 } 333 }
333 size -= conf->hash_spacing; 334 sectors -= conf->spacing;
334 conf->hash_table[i] = conf->strip_zone + cur; 335 conf->hash_table[i] = conf->strip_zone + cur;
335 } 336 }
336 if (conf->preshift) { 337 if (conf->sector_shift) {
337 conf->hash_spacing >>= conf->preshift; 338 conf->spacing >>= conf->sector_shift;
338 /* round hash_spacing up so when we divide by it, we 339 /* round spacing up so when we divide by it, we
339 * err on the side of too-low, which is safest 340 * err on the side of too-low, which is safest
340 */ 341 */
341 conf->hash_spacing++; 342 conf->spacing++;
342 } 343 }
343 344
344 /* calculate the max read-ahead size. 345 /* calculate the max read-ahead size.
@@ -387,12 +388,12 @@ static int raid0_stop (mddev_t *mddev)
387static int raid0_make_request (struct request_queue *q, struct bio *bio) 388static int raid0_make_request (struct request_queue *q, struct bio *bio)
388{ 389{
389 mddev_t *mddev = q->queuedata; 390 mddev_t *mddev = q->queuedata;
390 unsigned int sect_in_chunk, chunksize_bits, chunk_size, chunk_sects; 391 unsigned int sect_in_chunk, chunksect_bits, chunk_sects;
391 raid0_conf_t *conf = mddev_to_conf(mddev); 392 raid0_conf_t *conf = mddev_to_conf(mddev);
392 struct strip_zone *zone; 393 struct strip_zone *zone;
393 mdk_rdev_t *tmp_dev; 394 mdk_rdev_t *tmp_dev;
394 sector_t chunk; 395 sector_t chunk;
395 sector_t block, rsect; 396 sector_t sector, rsect;
396 const int rw = bio_data_dir(bio); 397 const int rw = bio_data_dir(bio);
397 int cpu; 398 int cpu;
398 399
@@ -407,11 +408,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
407 bio_sectors(bio)); 408 bio_sectors(bio));
408 part_stat_unlock(); 409 part_stat_unlock();
409 410
410 chunk_size = mddev->chunk_size >> 10;
411 chunk_sects = mddev->chunk_size >> 9; 411 chunk_sects = mddev->chunk_size >> 9;
412 chunksize_bits = ffz(~chunk_size); 412 chunksect_bits = ffz(~chunk_sects);
413 block = bio->bi_sector >> 1; 413 sector = bio->bi_sector;
414
415 414
416 if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { 415 if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
417 struct bio_pair *bp; 416 struct bio_pair *bp;
@@ -434,28 +433,27 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
434 433
435 434
436 { 435 {
437 sector_t x = block >> conf->preshift; 436 sector_t x = sector >> conf->sector_shift;
438 sector_div(x, (u32)conf->hash_spacing); 437 sector_div(x, (u32)conf->spacing);
439 zone = conf->hash_table[x]; 438 zone = conf->hash_table[x];
440 } 439 }
441 440
442 while (block >= (zone->zone_offset + zone->size)) 441 while (sector >= zone->zone_start + zone->sectors)
443 zone++; 442 zone++;
444 443
445 sect_in_chunk = bio->bi_sector & ((chunk_size<<1) -1); 444 sect_in_chunk = bio->bi_sector & (chunk_sects - 1);
446 445
447 446
448 { 447 {
449 sector_t x = (block - zone->zone_offset) >> chunksize_bits; 448 sector_t x = (sector - zone->zone_start) >> chunksect_bits;
450 449
451 sector_div(x, zone->nb_dev); 450 sector_div(x, zone->nb_dev);
452 chunk = x; 451 chunk = x;
453 452
454 x = block >> chunksize_bits; 453 x = sector >> chunksect_bits;
455 tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; 454 tmp_dev = zone->dev[sector_div(x, zone->nb_dev)];
456 } 455 }
457 rsect = (((chunk << chunksize_bits) + zone->dev_offset)<<1) 456 rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk;
458 + sect_in_chunk;
459 457
460 bio->bi_bdev = tmp_dev->bdev; 458 bio->bi_bdev = tmp_dev->bdev;
461 bio->bi_sector = rsect + tmp_dev->data_offset; 459 bio->bi_sector = rsect + tmp_dev->data_offset;
@@ -467,7 +465,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
467 465
468bad_map: 466bad_map:
469 printk("raid0_make_request bug: can't convert block across chunks" 467 printk("raid0_make_request bug: can't convert block across chunks"
470 " or bigger than %dk %llu %d\n", chunk_size, 468 " or bigger than %dk %llu %d\n", chunk_sects / 2,
471 (unsigned long long)bio->bi_sector, bio->bi_size >> 10); 469 (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
472 470
473 bio_io_error(bio); 471 bio_io_error(bio);
@@ -492,10 +490,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev)
492 seq_printf(seq, "%s/", bdevname( 490 seq_printf(seq, "%s/", bdevname(
493 conf->strip_zone[j].dev[k]->bdev,b)); 491 conf->strip_zone[j].dev[k]->bdev,b));
494 492
495 seq_printf(seq, "] zo=%d do=%d s=%d\n", 493 seq_printf(seq, "] zs=%d ds=%d s=%d\n",
496 conf->strip_zone[j].zone_offset, 494 conf->strip_zone[j].zone_start,
497 conf->strip_zone[j].dev_offset, 495 conf->strip_zone[j].dev_start,
498 conf->strip_zone[j].size); 496 conf->strip_zone[j].sectors);
499 } 497 }
500#endif 498#endif
501 seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); 499 seq_printf(seq, " %dk chunks", mddev->chunk_size/1024);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 9c788e2489b1..7b4f5f7155d8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1016 * else mark the drive as failed 1016 * else mark the drive as failed
1017 */ 1017 */
1018 if (test_bit(In_sync, &rdev->flags) 1018 if (test_bit(In_sync, &rdev->flags)
1019 && (conf->raid_disks - mddev->degraded) == 1) 1019 && (conf->raid_disks - mddev->degraded) == 1) {
1020 /* 1020 /*
1021 * Don't fail the drive, act as though we were just a 1021 * Don't fail the drive, act as though we were just a
1022 * normal single drive 1022 * normal single drive.
1023 * However don't try a recovery from this drive as
1024 * it is very likely to fail.
1023 */ 1025 */
1026 mddev->recovery_disabled = 1;
1024 return; 1027 return;
1028 }
1025 if (test_and_clear_bit(In_sync, &rdev->flags)) { 1029 if (test_and_clear_bit(In_sync, &rdev->flags)) {
1026 unsigned long flags; 1030 unsigned long flags;
1027 spin_lock_irqsave(&conf->device_lock, flags); 1031 spin_lock_irqsave(&conf->device_lock, flags);
@@ -1919,7 +1923,6 @@ static int run(mddev_t *mddev)
1919 int i, j, disk_idx; 1923 int i, j, disk_idx;
1920 mirror_info_t *disk; 1924 mirror_info_t *disk;
1921 mdk_rdev_t *rdev; 1925 mdk_rdev_t *rdev;
1922 struct list_head *tmp;
1923 1926
1924 if (mddev->level != 1) { 1927 if (mddev->level != 1) {
1925 printk("raid1: %s: raid level not set to mirroring (%d)\n", 1928 printk("raid1: %s: raid level not set to mirroring (%d)\n",
@@ -1964,7 +1967,7 @@ static int run(mddev_t *mddev)
1964 spin_lock_init(&conf->device_lock); 1967 spin_lock_init(&conf->device_lock);
1965 mddev->queue->queue_lock = &conf->device_lock; 1968 mddev->queue->queue_lock = &conf->device_lock;
1966 1969
1967 rdev_for_each(rdev, tmp, mddev) { 1970 list_for_each_entry(rdev, &mddev->disks, same_set) {
1968 disk_idx = rdev->raid_disk; 1971 disk_idx = rdev->raid_disk;
1969 if (disk_idx >= mddev->raid_disks 1972 if (disk_idx >= mddev->raid_disks
1970 || disk_idx < 0) 1973 || disk_idx < 0)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 970a96ef9b18..6736d6dff981 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2025,7 +2025,6 @@ static int run(mddev_t *mddev)
2025 int i, disk_idx; 2025 int i, disk_idx;
2026 mirror_info_t *disk; 2026 mirror_info_t *disk;
2027 mdk_rdev_t *rdev; 2027 mdk_rdev_t *rdev;
2028 struct list_head *tmp;
2029 int nc, fc, fo; 2028 int nc, fc, fo;
2030 sector_t stride, size; 2029 sector_t stride, size;
2031 2030
@@ -2108,7 +2107,7 @@ static int run(mddev_t *mddev)
2108 spin_lock_init(&conf->device_lock); 2107 spin_lock_init(&conf->device_lock);
2109 mddev->queue->queue_lock = &conf->device_lock; 2108 mddev->queue->queue_lock = &conf->device_lock;
2110 2109
2111 rdev_for_each(rdev, tmp, mddev) { 2110 list_for_each_entry(rdev, &mddev->disks, same_set) {
2112 disk_idx = rdev->raid_disk; 2111 disk_idx = rdev->raid_disk;
2113 if (disk_idx >= mddev->raid_disks 2112 if (disk_idx >= mddev->raid_disks
2114 || disk_idx < 0) 2113 || disk_idx < 0)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a36a7435edf5..a5ba080d303b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3998,7 +3998,6 @@ static int run(mddev_t *mddev)
3998 int raid_disk, memory; 3998 int raid_disk, memory;
3999 mdk_rdev_t *rdev; 3999 mdk_rdev_t *rdev;
4000 struct disk_info *disk; 4000 struct disk_info *disk;
4001 struct list_head *tmp;
4002 int working_disks = 0; 4001 int working_disks = 0;
4003 4002
4004 if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { 4003 if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
@@ -4108,7 +4107,7 @@ static int run(mddev_t *mddev)
4108 4107
4109 pr_debug("raid5: run(%s) called.\n", mdname(mddev)); 4108 pr_debug("raid5: run(%s) called.\n", mdname(mddev));
4110 4109
4111 rdev_for_each(rdev, tmp, mddev) { 4110 list_for_each_entry(rdev, &mddev->disks, same_set) {
4112 raid_disk = rdev->raid_disk; 4111 raid_disk = rdev->raid_disk;
4113 if (raid_disk >= conf->raid_disks 4112 if (raid_disk >= conf->raid_disks
4114 || raid_disk < 0) 4113 || raid_disk < 0)
@@ -4533,7 +4532,6 @@ static int raid5_start_reshape(mddev_t *mddev)
4533{ 4532{
4534 raid5_conf_t *conf = mddev_to_conf(mddev); 4533 raid5_conf_t *conf = mddev_to_conf(mddev);
4535 mdk_rdev_t *rdev; 4534 mdk_rdev_t *rdev;
4536 struct list_head *rtmp;
4537 int spares = 0; 4535 int spares = 0;
4538 int added_devices = 0; 4536 int added_devices = 0;
4539 unsigned long flags; 4537 unsigned long flags;
@@ -4541,7 +4539,7 @@ static int raid5_start_reshape(mddev_t *mddev)
4541 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 4539 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4542 return -EBUSY; 4540 return -EBUSY;
4543 4541
4544 rdev_for_each(rdev, rtmp, mddev) 4542 list_for_each_entry(rdev, &mddev->disks, same_set)
4545 if (rdev->raid_disk < 0 && 4543 if (rdev->raid_disk < 0 &&
4546 !test_bit(Faulty, &rdev->flags)) 4544 !test_bit(Faulty, &rdev->flags))
4547 spares++; 4545 spares++;
@@ -4563,7 +4561,7 @@ static int raid5_start_reshape(mddev_t *mddev)
4563 /* Add some new drives, as many as will fit. 4561 /* Add some new drives, as many as will fit.
4564 * We know there are enough to make the newly sized array work. 4562 * We know there are enough to make the newly sized array work.
4565 */ 4563 */
4566 rdev_for_each(rdev, rtmp, mddev) 4564 list_for_each_entry(rdev, &mddev->disks, same_set)
4567 if (rdev->raid_disk < 0 && 4565 if (rdev->raid_disk < 0 &&
4568 !test_bit(Faulty, &rdev->flags)) { 4566 !test_bit(Faulty, &rdev->flags)) {
4569 if (raid5_add_disk(mddev, rdev) == 0) { 4567 if (raid5_add_disk(mddev, rdev) == 0) {
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index fee7304102af..3949a1c73451 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -498,6 +498,18 @@ config SGI_GRU_DEBUG
498 This option enables addition debugging code for the SGI GRU driver. If 498 This option enables addition debugging code for the SGI GRU driver. If
499 you are unsure, say N. 499 you are unsure, say N.
500 500
501config DELL_LAPTOP
502 tristate "Dell Laptop Extras (EXPERIMENTAL)"
503 depends on X86
504 depends on DCDBAS
505 depends on EXPERIMENTAL
506 depends on BACKLIGHT_CLASS_DEVICE
507 depends on RFKILL
508 default n
509 ---help---
510 This driver adds support for rfkill and backlight control to Dell
511 laptops.
512
501source "drivers/misc/c2port/Kconfig" 513source "drivers/misc/c2port/Kconfig"
502 514
503endif # MISC_DEVICES 515endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 817f7f5ab3bd..5de863a0e395 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_ICS932S401) += ics932s401.o
18obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o 18obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
19obj-$(CONFIG_LKDTM) += lkdtm.o 19obj-$(CONFIG_LKDTM) += lkdtm.o
20obj-$(CONFIG_TIFM_CORE) += tifm_core.o 20obj-$(CONFIG_TIFM_CORE) += tifm_core.o
21obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o
21obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o 22obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o
22obj-$(CONFIG_PHANTOM) += phantom.o 23obj-$(CONFIG_PHANTOM) += phantom.o
23obj-$(CONFIG_SGI_IOC4) += ioc4.o 24obj-$(CONFIG_SGI_IOC4) += ioc4.o
diff --git a/drivers/misc/dell-laptop.c b/drivers/misc/dell-laptop.c
new file mode 100644
index 000000000000..4d33a2068b7a
--- /dev/null
+++ b/drivers/misc/dell-laptop.c
@@ -0,0 +1,436 @@
1/*
2 * Driver for Dell laptop extras
3 *
4 * Copyright (c) Red Hat <mjg@redhat.com>
5 *
6 * Based on documentation in the libsmbios package, Copyright (C) 2005 Dell
7 * Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/module.h>
15#include <linux/kernel.h>
16#include <linux/init.h>
17#include <linux/platform_device.h>
18#include <linux/backlight.h>
19#include <linux/err.h>
20#include <linux/dmi.h>
21#include <linux/io.h>
22#include <linux/rfkill.h>
23#include <linux/power_supply.h>
24#include <linux/acpi.h>
25#include "../firmware/dcdbas.h"
26
27#define BRIGHTNESS_TOKEN 0x7d
28
29/* This structure will be modified by the firmware when we enter
30 * system management mode, hence the volatiles */
31
32struct calling_interface_buffer {
33 u16 class;
34 u16 select;
35 volatile u32 input[4];
36 volatile u32 output[4];
37} __packed;
38
39struct calling_interface_token {
40 u16 tokenID;
41 u16 location;
42 union {
43 u16 value;
44 u16 stringlength;
45 };
46};
47
48struct calling_interface_structure {
49 struct dmi_header header;
50 u16 cmdIOAddress;
51 u8 cmdIOCode;
52 u32 supportedCmds;
53 struct calling_interface_token tokens[];
54} __packed;
55
56static int da_command_address;
57static int da_command_code;
58static int da_num_tokens;
59static struct calling_interface_token *da_tokens;
60
61static struct backlight_device *dell_backlight_device;
62static struct rfkill *wifi_rfkill;
63static struct rfkill *bluetooth_rfkill;
64static struct rfkill *wwan_rfkill;
65
66static const struct dmi_system_id __initdata dell_device_table[] = {
67 {
68 .ident = "Dell laptop",
69 .matches = {
70 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
71 DMI_MATCH(DMI_CHASSIS_TYPE, "8"),
72 },
73 },
74 { }
75};
76
77static void parse_da_table(const struct dmi_header *dm)
78{
79 /* Final token is a terminator, so we don't want to copy it */
80 int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1;
81 struct calling_interface_structure *table =
82 container_of(dm, struct calling_interface_structure, header);
83
84 /* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
85 6 bytes of entry */
86
87 if (dm->length < 17)
88 return;
89
90 da_command_address = table->cmdIOAddress;
91 da_command_code = table->cmdIOCode;
92
93 da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) *
94 sizeof(struct calling_interface_token),
95 GFP_KERNEL);
96
97 if (!da_tokens)
98 return;
99
100 memcpy(da_tokens+da_num_tokens, table->tokens,
101 sizeof(struct calling_interface_token) * tokens);
102
103 da_num_tokens += tokens;
104}
105
106static void find_tokens(const struct dmi_header *dm)
107{
108 switch (dm->type) {
109 case 0xd4: /* Indexed IO */
110 break;
111 case 0xd5: /* Protected Area Type 1 */
112 break;
113 case 0xd6: /* Protected Area Type 2 */
114 break;
115 case 0xda: /* Calling interface */
116 parse_da_table(dm);
117 break;
118 }
119}
120
121static int find_token_location(int tokenid)
122{
123 int i;
124 for (i = 0; i < da_num_tokens; i++) {
125 if (da_tokens[i].tokenID == tokenid)
126 return da_tokens[i].location;
127 }
128
129 return -1;
130}
131
132static struct calling_interface_buffer *
133dell_send_request(struct calling_interface_buffer *buffer, int class,
134 int select)
135{
136 struct smi_cmd command;
137
138 command.magic = SMI_CMD_MAGIC;
139 command.command_address = da_command_address;
140 command.command_code = da_command_code;
141 command.ebx = virt_to_phys(buffer);
142 command.ecx = 0x42534931;
143
144 buffer->class = class;
145 buffer->select = select;
146
147 dcdbas_smi_request(&command);
148
149 return buffer;
150}
151
152/* Derived from information in DellWirelessCtl.cpp:
153 Class 17, select 11 is radio control. It returns an array of 32-bit values.
154
155 result[0]: return code
156 result[1]:
157 Bit 0: Hardware switch supported
158 Bit 1: Wifi locator supported
159 Bit 2: Wifi is supported
160 Bit 3: Bluetooth is supported
161 Bit 4: WWAN is supported
162 Bit 5: Wireless keyboard supported
163 Bits 6-7: Reserved
164 Bit 8: Wifi is installed
165 Bit 9: Bluetooth is installed
166 Bit 10: WWAN is installed
167 Bits 11-15: Reserved
168 Bit 16: Hardware switch is on
169 Bit 17: Wifi is blocked
170 Bit 18: Bluetooth is blocked
171 Bit 19: WWAN is blocked
172 Bits 20-31: Reserved
173 result[2]: NVRAM size in bytes
174 result[3]: NVRAM format version number
175*/
176
177static int dell_rfkill_set(int radio, enum rfkill_state state)
178{
179 struct calling_interface_buffer buffer;
180 int disable = (state == RFKILL_STATE_UNBLOCKED) ? 0 : 1;
181
182 memset(&buffer, 0, sizeof(struct calling_interface_buffer));
183 buffer.input[0] = (1 | (radio<<8) | (disable << 16));
184 dell_send_request(&buffer, 17, 11);
185
186 return 0;
187}
188
189static int dell_wifi_set(void *data, enum rfkill_state state)
190{
191 return dell_rfkill_set(1, state);
192}
193
194static int dell_bluetooth_set(void *data, enum rfkill_state state)
195{
196 return dell_rfkill_set(2, state);
197}
198
199static int dell_wwan_set(void *data, enum rfkill_state state)
200{
201 return dell_rfkill_set(3, state);
202}
203
204static int dell_rfkill_get(int bit, enum rfkill_state *state)
205{
206 struct calling_interface_buffer buffer;
207 int status;
208 int new_state = RFKILL_STATE_HARD_BLOCKED;
209
210 memset(&buffer, 0, sizeof(struct calling_interface_buffer));
211 dell_send_request(&buffer, 17, 11);
212 status = buffer.output[1];
213
214 if (status & (1<<16))
215 new_state = RFKILL_STATE_SOFT_BLOCKED;
216
217 if (status & (1<<bit))
218 *state = new_state;
219 else
220 *state = RFKILL_STATE_UNBLOCKED;
221
222 return 0;
223}
224
225static int dell_wifi_get(void *data, enum rfkill_state *state)
226{
227 return dell_rfkill_get(17, state);
228}
229
230static int dell_bluetooth_get(void *data, enum rfkill_state *state)
231{
232 return dell_rfkill_get(18, state);
233}
234
235static int dell_wwan_get(void *data, enum rfkill_state *state)
236{
237 return dell_rfkill_get(19, state);
238}
239
240static int dell_setup_rfkill(void)
241{
242 struct calling_interface_buffer buffer;
243 int status;
244 int ret;
245
246 memset(&buffer, 0, sizeof(struct calling_interface_buffer));
247 dell_send_request(&buffer, 17, 11);
248 status = buffer.output[1];
249
250 if ((status & (1<<2|1<<8)) == (1<<2|1<<8)) {
251 wifi_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WLAN);
252 if (!wifi_rfkill)
253 goto err_wifi;
254 wifi_rfkill->name = "dell-wifi";
255 wifi_rfkill->toggle_radio = dell_wifi_set;
256 wifi_rfkill->get_state = dell_wifi_get;
257 ret = rfkill_register(wifi_rfkill);
258 if (ret)
259 goto err_wifi;
260 }
261
262 if ((status & (1<<3|1<<9)) == (1<<3|1<<9)) {
263 bluetooth_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_BLUETOOTH);
264 if (!bluetooth_rfkill)
265 goto err_bluetooth;
266 bluetooth_rfkill->name = "dell-bluetooth";
267 bluetooth_rfkill->toggle_radio = dell_bluetooth_set;
268 bluetooth_rfkill->get_state = dell_bluetooth_get;
269 ret = rfkill_register(bluetooth_rfkill);
270 if (ret)
271 goto err_bluetooth;
272 }
273
274 if ((status & (1<<4|1<<10)) == (1<<4|1<<10)) {
275 wwan_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WWAN);
276 if (!wwan_rfkill)
277 goto err_wwan;
278 wwan_rfkill->name = "dell-wwan";
279 wwan_rfkill->toggle_radio = dell_wwan_set;
280 wwan_rfkill->get_state = dell_wwan_get;
281 ret = rfkill_register(wwan_rfkill);
282 if (ret)
283 goto err_wwan;
284 }
285
286 return 0;
287err_wwan:
288 if (wwan_rfkill)
289 rfkill_free(wwan_rfkill);
290 if (bluetooth_rfkill) {
291 rfkill_unregister(bluetooth_rfkill);
292 bluetooth_rfkill = NULL;
293 }
294err_bluetooth:
295 if (bluetooth_rfkill)
296 rfkill_free(bluetooth_rfkill);
297 if (wifi_rfkill) {
298 rfkill_unregister(wifi_rfkill);
299 wifi_rfkill = NULL;
300 }
301err_wifi:
302 if (wifi_rfkill)
303 rfkill_free(wifi_rfkill);
304
305 return ret;
306}
307
308static int dell_send_intensity(struct backlight_device *bd)
309{
310 struct calling_interface_buffer buffer;
311
312 memset(&buffer, 0, sizeof(struct calling_interface_buffer));
313 buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN);
314 buffer.input[1] = bd->props.brightness;
315
316 if (buffer.input[0] == -1)
317 return -ENODEV;
318
319 if (power_supply_is_system_supplied() > 0)
320 dell_send_request(&buffer, 1, 2);
321 else
322 dell_send_request(&buffer, 1, 1);
323
324 return 0;
325}
326
327static int dell_get_intensity(struct backlight_device *bd)
328{
329 struct calling_interface_buffer buffer;
330
331 memset(&buffer, 0, sizeof(struct calling_interface_buffer));
332 buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN);
333
334 if (buffer.input[0] == -1)
335 return -ENODEV;
336
337 if (power_supply_is_system_supplied() > 0)
338 dell_send_request(&buffer, 0, 2);
339 else
340 dell_send_request(&buffer, 0, 1);
341
342 return buffer.output[1];
343}
344
345static struct backlight_ops dell_ops = {
346 .get_brightness = dell_get_intensity,
347 .update_status = dell_send_intensity,
348};
349
350static int __init dell_init(void)
351{
352 struct calling_interface_buffer buffer;
353 int max_intensity = 0;
354 int ret;
355
356 if (!dmi_check_system(dell_device_table))
357 return -ENODEV;
358
359 dmi_walk(find_tokens);
360
361 if (!da_tokens) {
362 printk(KERN_INFO "dell-laptop: Unable to find dmi tokens\n");
363 return -ENODEV;
364 }
365
366 ret = dell_setup_rfkill();
367
368 if (ret) {
369 printk(KERN_WARNING "dell-laptop: Unable to setup rfkill\n");
370 goto out;
371 }
372
373#ifdef CONFIG_ACPI
374 /* In the event of an ACPI backlight being available, don't
375 * register the platform controller.
376 */
377 if (acpi_video_backlight_support())
378 return 0;
379#endif
380
381 memset(&buffer, 0, sizeof(struct calling_interface_buffer));
382 buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN);
383
384 if (buffer.input[0] != -1) {
385 dell_send_request(&buffer, 0, 2);
386 max_intensity = buffer.output[3];
387 }
388
389 if (max_intensity) {
390 dell_backlight_device = backlight_device_register(
391 "dell_backlight",
392 NULL, NULL,
393 &dell_ops);
394
395 if (IS_ERR(dell_backlight_device)) {
396 ret = PTR_ERR(dell_backlight_device);
397 dell_backlight_device = NULL;
398 goto out;
399 }
400
401 dell_backlight_device->props.max_brightness = max_intensity;
402 dell_backlight_device->props.brightness =
403 dell_get_intensity(dell_backlight_device);
404 backlight_update_status(dell_backlight_device);
405 }
406
407 return 0;
408out:
409 if (wifi_rfkill)
410 rfkill_unregister(wifi_rfkill);
411 if (bluetooth_rfkill)
412 rfkill_unregister(bluetooth_rfkill);
413 if (wwan_rfkill)
414 rfkill_unregister(wwan_rfkill);
415 kfree(da_tokens);
416 return ret;
417}
418
419static void __exit dell_exit(void)
420{
421 backlight_device_unregister(dell_backlight_device);
422 if (wifi_rfkill)
423 rfkill_unregister(wifi_rfkill);
424 if (bluetooth_rfkill)
425 rfkill_unregister(bluetooth_rfkill);
426 if (wwan_rfkill)
427 rfkill_unregister(wwan_rfkill);
428}
429
430module_init(dell_init);
431module_exit(dell_exit);
432
433MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
434MODULE_DESCRIPTION("Dell laptop driver");
435MODULE_LICENSE("GPL");
436MODULE_ALIAS("dmi:*svnDellInc.:*:ct8:*");
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 6fde0a2e3567..bc33200535fc 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -120,6 +120,13 @@ config MTD_PHRAM
120 doesn't have access to, memory beyond the mem=xxx limit, nvram, 120 doesn't have access to, memory beyond the mem=xxx limit, nvram,
121 memory on the video card, etc... 121 memory on the video card, etc...
122 122
123config MTD_PS3VRAM
124 tristate "PS3 video RAM"
125 depends on FB_PS3
126 help
127 This driver allows you to use excess PS3 video RAM as volatile
128 storage or system swap.
129
123config MTD_LART 130config MTD_LART
124 tristate "28F160xx flash driver for LART" 131 tristate "28F160xx flash driver for LART"
125 depends on SA1100_LART 132 depends on SA1100_LART
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 0993d5cf3923..e51521df4e40 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_MTD_LART) += lart.o
16obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o 16obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o
17obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o 17obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o
18obj-$(CONFIG_MTD_M25P80) += m25p80.o 18obj-$(CONFIG_MTD_M25P80) += m25p80.o
19obj-$(CONFIG_MTD_PS3VRAM) += ps3vram.o
diff --git a/drivers/mtd/devices/ps3vram.c b/drivers/mtd/devices/ps3vram.c
new file mode 100644
index 000000000000..d21e9beb7ed2
--- /dev/null
+++ b/drivers/mtd/devices/ps3vram.c
@@ -0,0 +1,768 @@
1/**
2 * ps3vram - Use extra PS3 video ram as MTD block device.
3 *
4 * Copyright (c) 2007-2008 Jim Paris <jim@jtan.com>
5 * Added support RSX DMA Vivien Chappelier <vivien.chappelier@free.fr>
6 */
7
8#include <linux/io.h>
9#include <linux/mm.h>
10#include <linux/init.h>
11#include <linux/kernel.h>
12#include <linux/list.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/slab.h>
16#include <linux/version.h>
17#include <linux/gfp.h>
18#include <linux/delay.h>
19#include <linux/mtd/mtd.h>
20
21#include <asm/lv1call.h>
22#include <asm/ps3.h>
23
24#define DEVICE_NAME "ps3vram"
25
26#define XDR_BUF_SIZE (2 * 1024 * 1024) /* XDR buffer (must be 1MiB aligned) */
27#define XDR_IOIF 0x0c000000
28
29#define FIFO_BASE XDR_IOIF
30#define FIFO_SIZE (64 * 1024)
31
32#define DMA_PAGE_SIZE (4 * 1024)
33
34#define CACHE_PAGE_SIZE (256 * 1024)
35#define CACHE_PAGE_COUNT ((XDR_BUF_SIZE - FIFO_SIZE) / CACHE_PAGE_SIZE)
36
37#define CACHE_OFFSET CACHE_PAGE_SIZE
38#define FIFO_OFFSET 0
39
40#define CTRL_PUT 0x10
41#define CTRL_GET 0x11
42#define CTRL_TOP 0x15
43
44#define UPLOAD_SUBCH 1
45#define DOWNLOAD_SUBCH 2
46
47#define NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c
48#define NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104
49
50#define L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT 0x601
51
52struct mtd_info ps3vram_mtd;
53
54#define CACHE_PAGE_PRESENT 1
55#define CACHE_PAGE_DIRTY 2
56
57struct ps3vram_tag {
58 unsigned int address;
59 unsigned int flags;
60};
61
62struct ps3vram_cache {
63 unsigned int page_count;
64 unsigned int page_size;
65 struct ps3vram_tag *tags;
66};
67
68struct ps3vram_priv {
69 u64 memory_handle;
70 u64 context_handle;
71 u32 *ctrl;
72 u32 *reports;
73 u8 __iomem *ddr_base;
74 u8 *xdr_buf;
75
76 u32 *fifo_base;
77 u32 *fifo_ptr;
78
79 struct device *dev;
80 struct ps3vram_cache cache;
81
82 /* Used to serialize cache/DMA operations */
83 struct mutex lock;
84};
85
86#define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
87#define DMA_NOTIFIER_OFFSET_BASE 0x1000 /* first DMA notifier offset */
88#define DMA_NOTIFIER_SIZE 0x40
89#define NOTIFIER 7 /* notifier used for completion report */
90
91/* A trailing '-' means to subtract off ps3fb_videomemory.size */
92char *size = "256M-";
93module_param(size, charp, 0);
94MODULE_PARM_DESC(size, "memory size");
95
96static u32 *ps3vram_get_notifier(u32 *reports, int notifier)
97{
98 return (void *) reports +
99 DMA_NOTIFIER_OFFSET_BASE +
100 DMA_NOTIFIER_SIZE * notifier;
101}
102
103static void ps3vram_notifier_reset(struct mtd_info *mtd)
104{
105 int i;
106
107 struct ps3vram_priv *priv = mtd->priv;
108 u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
109 for (i = 0; i < 4; i++)
110 notify[i] = 0xffffffff;
111}
112
113static int ps3vram_notifier_wait(struct mtd_info *mtd, unsigned int timeout_ms)
114{
115 struct ps3vram_priv *priv = mtd->priv;
116 u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
117 unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
118
119 do {
120 if (!notify[3])
121 return 0;
122 msleep(1);
123 } while (time_before(jiffies, timeout));
124
125 return -ETIMEDOUT;
126}
127
128static void ps3vram_init_ring(struct mtd_info *mtd)
129{
130 struct ps3vram_priv *priv = mtd->priv;
131
132 priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
133 priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
134}
135
136static int ps3vram_wait_ring(struct mtd_info *mtd, unsigned int timeout_ms)
137{
138 struct ps3vram_priv *priv = mtd->priv;
139 unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
140
141 do {
142 if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
143 return 0;
144 msleep(1);
145 } while (time_before(jiffies, timeout));
146
147 dev_dbg(priv->dev, "%s:%d: FIFO timeout (%08x/%08x/%08x)\n", __func__,
148 __LINE__, priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
149 priv->ctrl[CTRL_TOP]);
150
151 return -ETIMEDOUT;
152}
153
154static void ps3vram_out_ring(struct ps3vram_priv *priv, u32 data)
155{
156 *(priv->fifo_ptr)++ = data;
157}
158
159static void ps3vram_begin_ring(struct ps3vram_priv *priv, u32 chan,
160 u32 tag, u32 size)
161{
162 ps3vram_out_ring(priv, (size << 18) | (chan << 13) | tag);
163}
164
165static void ps3vram_rewind_ring(struct mtd_info *mtd)
166{
167 struct ps3vram_priv *priv = mtd->priv;
168 u64 status;
169
170 ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
171
172 priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
173
174 /* asking the HV for a blit will kick the fifo */
175 status = lv1_gpu_context_attribute(priv->context_handle,
176 L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT,
177 0, 0, 0, 0);
178 if (status)
179 dev_err(priv->dev, "%s:%d: lv1_gpu_context_attribute failed\n",
180 __func__, __LINE__);
181
182 priv->fifo_ptr = priv->fifo_base;
183}
184
185static void ps3vram_fire_ring(struct mtd_info *mtd)
186{
187 struct ps3vram_priv *priv = mtd->priv;
188 u64 status;
189
190 mutex_lock(&ps3_gpu_mutex);
191
192 priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
193 (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
194
195 /* asking the HV for a blit will kick the fifo */
196 status = lv1_gpu_context_attribute(priv->context_handle,
197 L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT,
198 0, 0, 0, 0);
199 if (status)
200 dev_err(priv->dev, "%s:%d: lv1_gpu_context_attribute failed\n",
201 __func__, __LINE__);
202
203 if ((priv->fifo_ptr - priv->fifo_base) * sizeof(u32) >
204 FIFO_SIZE - 1024) {
205 dev_dbg(priv->dev, "%s:%d: fifo full, rewinding\n", __func__,
206 __LINE__);
207 ps3vram_wait_ring(mtd, 200);
208 ps3vram_rewind_ring(mtd);
209 }
210
211 mutex_unlock(&ps3_gpu_mutex);
212}
213
214static void ps3vram_bind(struct mtd_info *mtd)
215{
216 struct ps3vram_priv *priv = mtd->priv;
217
218 ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0, 1);
219 ps3vram_out_ring(priv, 0x31337303);
220 ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x180, 3);
221 ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
222 ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
223 ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
224
225 ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0, 1);
226 ps3vram_out_ring(priv, 0x3137c0de);
227 ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x180, 3);
228 ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
229 ps3vram_out_ring(priv, 0xfeed0000); /* DMA video RAM instance */
230 ps3vram_out_ring(priv, 0xfeed0001); /* DMA system RAM instance */
231
232 ps3vram_fire_ring(mtd);
233}
234
235static int ps3vram_upload(struct mtd_info *mtd, unsigned int src_offset,
236 unsigned int dst_offset, int len, int count)
237{
238 struct ps3vram_priv *priv = mtd->priv;
239
240 ps3vram_begin_ring(priv, UPLOAD_SUBCH,
241 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
242 ps3vram_out_ring(priv, XDR_IOIF + src_offset);
243 ps3vram_out_ring(priv, dst_offset);
244 ps3vram_out_ring(priv, len);
245 ps3vram_out_ring(priv, len);
246 ps3vram_out_ring(priv, len);
247 ps3vram_out_ring(priv, count);
248 ps3vram_out_ring(priv, (1 << 8) | 1);
249 ps3vram_out_ring(priv, 0);
250
251 ps3vram_notifier_reset(mtd);
252 ps3vram_begin_ring(priv, UPLOAD_SUBCH,
253 NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
254 ps3vram_out_ring(priv, 0);
255 ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x100, 1);
256 ps3vram_out_ring(priv, 0);
257 ps3vram_fire_ring(mtd);
258 if (ps3vram_notifier_wait(mtd, 200) < 0) {
259 dev_dbg(priv->dev, "%s:%d: notifier timeout\n", __func__,
260 __LINE__);
261 return -1;
262 }
263
264 return 0;
265}
266
267static int ps3vram_download(struct mtd_info *mtd, unsigned int src_offset,
268 unsigned int dst_offset, int len, int count)
269{
270 struct ps3vram_priv *priv = mtd->priv;
271
272 ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
273 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
274 ps3vram_out_ring(priv, src_offset);
275 ps3vram_out_ring(priv, XDR_IOIF + dst_offset);
276 ps3vram_out_ring(priv, len);
277 ps3vram_out_ring(priv, len);
278 ps3vram_out_ring(priv, len);
279 ps3vram_out_ring(priv, count);
280 ps3vram_out_ring(priv, (1 << 8) | 1);
281 ps3vram_out_ring(priv, 0);
282
283 ps3vram_notifier_reset(mtd);
284 ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
285 NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
286 ps3vram_out_ring(priv, 0);
287 ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x100, 1);
288 ps3vram_out_ring(priv, 0);
289 ps3vram_fire_ring(mtd);
290 if (ps3vram_notifier_wait(mtd, 200) < 0) {
291 dev_dbg(priv->dev, "%s:%d: notifier timeout\n", __func__,
292 __LINE__);
293 return -1;
294 }
295
296 return 0;
297}
298
299static void ps3vram_cache_evict(struct mtd_info *mtd, int entry)
300{
301 struct ps3vram_priv *priv = mtd->priv;
302 struct ps3vram_cache *cache = &priv->cache;
303
304 if (cache->tags[entry].flags & CACHE_PAGE_DIRTY) {
305 dev_dbg(priv->dev, "%s:%d: flushing %d : 0x%08x\n", __func__,
306 __LINE__, entry, cache->tags[entry].address);
307 if (ps3vram_upload(mtd,
308 CACHE_OFFSET + entry * cache->page_size,
309 cache->tags[entry].address,
310 DMA_PAGE_SIZE,
311 cache->page_size / DMA_PAGE_SIZE) < 0) {
312 dev_dbg(priv->dev, "%s:%d: failed to upload from "
313 "0x%x to 0x%x size 0x%x\n", __func__, __LINE__,
314 entry * cache->page_size,
315 cache->tags[entry].address, cache->page_size);
316 }
317 cache->tags[entry].flags &= ~CACHE_PAGE_DIRTY;
318 }
319}
320
321static void ps3vram_cache_load(struct mtd_info *mtd, int entry,
322 unsigned int address)
323{
324 struct ps3vram_priv *priv = mtd->priv;
325 struct ps3vram_cache *cache = &priv->cache;
326
327 dev_dbg(priv->dev, "%s:%d: fetching %d : 0x%08x\n", __func__, __LINE__,
328 entry, address);
329 if (ps3vram_download(mtd,
330 address,
331 CACHE_OFFSET + entry * cache->page_size,
332 DMA_PAGE_SIZE,
333 cache->page_size / DMA_PAGE_SIZE) < 0) {
334 dev_err(priv->dev, "%s:%d: failed to download from "
335 "0x%x to 0x%x size 0x%x\n", __func__, __LINE__, address,
336 entry * cache->page_size, cache->page_size);
337 }
338
339 cache->tags[entry].address = address;
340 cache->tags[entry].flags |= CACHE_PAGE_PRESENT;
341}
342
343
344static void ps3vram_cache_flush(struct mtd_info *mtd)
345{
346 struct ps3vram_priv *priv = mtd->priv;
347 struct ps3vram_cache *cache = &priv->cache;
348 int i;
349
350 dev_dbg(priv->dev, "%s:%d: FLUSH\n", __func__, __LINE__);
351 for (i = 0; i < cache->page_count; i++) {
352 ps3vram_cache_evict(mtd, i);
353 cache->tags[i].flags = 0;
354 }
355}
356
357static unsigned int ps3vram_cache_match(struct mtd_info *mtd, loff_t address)
358{
359 struct ps3vram_priv *priv = mtd->priv;
360 struct ps3vram_cache *cache = &priv->cache;
361 unsigned int base;
362 unsigned int offset;
363 int i;
364 static int counter;
365
366 offset = (unsigned int) (address & (cache->page_size - 1));
367 base = (unsigned int) (address - offset);
368
369 /* fully associative check */
370 for (i = 0; i < cache->page_count; i++) {
371 if ((cache->tags[i].flags & CACHE_PAGE_PRESENT) &&
372 cache->tags[i].address == base) {
373 dev_dbg(priv->dev, "%s:%d: found entry %d : 0x%08x\n",
374 __func__, __LINE__, i, cache->tags[i].address);
375 return i;
376 }
377 }
378
379 /* choose a random entry */
380 i = (jiffies + (counter++)) % cache->page_count;
381 dev_dbg(priv->dev, "%s:%d: using entry %d\n", __func__, __LINE__, i);
382
383 ps3vram_cache_evict(mtd, i);
384 ps3vram_cache_load(mtd, i, base);
385
386 return i;
387}
388
389static int ps3vram_cache_init(struct mtd_info *mtd)
390{
391 struct ps3vram_priv *priv = mtd->priv;
392
393 priv->cache.page_count = CACHE_PAGE_COUNT;
394 priv->cache.page_size = CACHE_PAGE_SIZE;
395 priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
396 CACHE_PAGE_COUNT, GFP_KERNEL);
397 if (priv->cache.tags == NULL) {
398 dev_err(priv->dev, "%s:%d: could not allocate cache tags\n",
399 __func__, __LINE__);
400 return -ENOMEM;
401 }
402
403 dev_info(priv->dev, "created ram cache: %d entries, %d KiB each\n",
404 CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
405
406 return 0;
407}
408
409static void ps3vram_cache_cleanup(struct mtd_info *mtd)
410{
411 struct ps3vram_priv *priv = mtd->priv;
412
413 ps3vram_cache_flush(mtd);
414 kfree(priv->cache.tags);
415}
416
417static int ps3vram_erase(struct mtd_info *mtd, struct erase_info *instr)
418{
419 struct ps3vram_priv *priv = mtd->priv;
420
421 if (instr->addr + instr->len > mtd->size)
422 return -EINVAL;
423
424 mutex_lock(&priv->lock);
425
426 ps3vram_cache_flush(mtd);
427
428 /* Set bytes to 0xFF */
429 memset_io(priv->ddr_base + instr->addr, 0xFF, instr->len);
430
431 mutex_unlock(&priv->lock);
432
433 instr->state = MTD_ERASE_DONE;
434 mtd_erase_callback(instr);
435
436 return 0;
437}
438
439static int ps3vram_read(struct mtd_info *mtd, loff_t from, size_t len,
440 size_t *retlen, u_char *buf)
441{
442 struct ps3vram_priv *priv = mtd->priv;
443 unsigned int cached, count;
444
445 dev_dbg(priv->dev, "%s:%d: from=0x%08x len=0x%zx\n", __func__, __LINE__,
446 (unsigned int)from, len);
447
448 if (from >= mtd->size)
449 return -EINVAL;
450
451 if (len > mtd->size - from)
452 len = mtd->size - from;
453
454 /* Copy from vram to buf */
455 count = len;
456 while (count) {
457 unsigned int offset, avail;
458 unsigned int entry;
459
460 offset = (unsigned int) (from & (priv->cache.page_size - 1));
461 avail = priv->cache.page_size - offset;
462
463 mutex_lock(&priv->lock);
464
465 entry = ps3vram_cache_match(mtd, from);
466 cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
467
468 dev_dbg(priv->dev, "%s:%d: from=%08x cached=%08x offset=%08x "
469 "avail=%08x count=%08x\n", __func__, __LINE__,
470 (unsigned int)from, cached, offset, avail, count);
471
472 if (avail > count)
473 avail = count;
474 memcpy(buf, priv->xdr_buf + cached, avail);
475
476 mutex_unlock(&priv->lock);
477
478 buf += avail;
479 count -= avail;
480 from += avail;
481 }
482
483 *retlen = len;
484 return 0;
485}
486
487static int ps3vram_write(struct mtd_info *mtd, loff_t to, size_t len,
488 size_t *retlen, const u_char *buf)
489{
490 struct ps3vram_priv *priv = mtd->priv;
491 unsigned int cached, count;
492
493 if (to >= mtd->size)
494 return -EINVAL;
495
496 if (len > mtd->size - to)
497 len = mtd->size - to;
498
499 /* Copy from buf to vram */
500 count = len;
501 while (count) {
502 unsigned int offset, avail;
503 unsigned int entry;
504
505 offset = (unsigned int) (to & (priv->cache.page_size - 1));
506 avail = priv->cache.page_size - offset;
507
508 mutex_lock(&priv->lock);
509
510 entry = ps3vram_cache_match(mtd, to);
511 cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
512
513 dev_dbg(priv->dev, "%s:%d: to=%08x cached=%08x offset=%08x "
514 "avail=%08x count=%08x\n", __func__, __LINE__,
515 (unsigned int)to, cached, offset, avail, count);
516
517 if (avail > count)
518 avail = count;
519 memcpy(priv->xdr_buf + cached, buf, avail);
520
521 priv->cache.tags[entry].flags |= CACHE_PAGE_DIRTY;
522
523 mutex_unlock(&priv->lock);
524
525 buf += avail;
526 count -= avail;
527 to += avail;
528 }
529
530 *retlen = len;
531 return 0;
532}
533
534static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
535{
536 struct ps3vram_priv *priv;
537 int status;
538 u64 ddr_lpar;
539 u64 ctrl_lpar;
540 u64 info_lpar;
541 u64 reports_lpar;
542 u64 ddr_size;
543 u64 reports_size;
544 int ret = -ENOMEM;
545 char *rest;
546
547 ret = -EIO;
548 ps3vram_mtd.priv = kzalloc(sizeof(struct ps3vram_priv), GFP_KERNEL);
549 if (!ps3vram_mtd.priv)
550 goto out;
551 priv = ps3vram_mtd.priv;
552
553 mutex_init(&priv->lock);
554 priv->dev = &dev->core;
555
556 /* Allocate XDR buffer (1MiB aligned) */
557 priv->xdr_buf = (void *)__get_free_pages(GFP_KERNEL,
558 get_order(XDR_BUF_SIZE));
559 if (priv->xdr_buf == NULL) {
560 dev_dbg(&dev->core, "%s:%d: could not allocate XDR buffer\n",
561 __func__, __LINE__);
562 ret = -ENOMEM;
563 goto out_free_priv;
564 }
565
566 /* Put FIFO at begginning of XDR buffer */
567 priv->fifo_base = (u32 *) (priv->xdr_buf + FIFO_OFFSET);
568 priv->fifo_ptr = priv->fifo_base;
569
570 /* XXX: Need to open GPU, in case ps3fb or snd_ps3 aren't loaded */
571 if (ps3_open_hv_device(dev)) {
572 dev_err(&dev->core, "%s:%d: ps3_open_hv_device failed\n",
573 __func__, __LINE__);
574 ret = -EAGAIN;
575 goto out_close_gpu;
576 }
577
578 /* Request memory */
579 status = -1;
580 ddr_size = memparse(size, &rest);
581 if (*rest == '-')
582 ddr_size -= ps3fb_videomemory.size;
583 ddr_size = ALIGN(ddr_size, 1024*1024);
584 if (ddr_size <= 0) {
585 dev_err(&dev->core, "%s:%d: specified size is too small\n",
586 __func__, __LINE__);
587 ret = -EINVAL;
588 goto out_close_gpu;
589 }
590
591 while (ddr_size > 0) {
592 status = lv1_gpu_memory_allocate(ddr_size, 0, 0, 0, 0,
593 &priv->memory_handle,
594 &ddr_lpar);
595 if (!status)
596 break;
597 ddr_size -= 1024*1024;
598 }
599 if (status || ddr_size <= 0) {
600 dev_err(&dev->core, "%s:%d: lv1_gpu_memory_allocate failed\n",
601 __func__, __LINE__);
602 ret = -ENOMEM;
603 goto out_free_xdr_buf;
604 }
605
606 /* Request context */
607 status = lv1_gpu_context_allocate(priv->memory_handle,
608 0,
609 &priv->context_handle,
610 &ctrl_lpar,
611 &info_lpar,
612 &reports_lpar,
613 &reports_size);
614 if (status) {
615 dev_err(&dev->core, "%s:%d: lv1_gpu_context_allocate failed\n",
616 __func__, __LINE__);
617 ret = -ENOMEM;
618 goto out_free_memory;
619 }
620
621 /* Map XDR buffer to RSX */
622 status = lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF,
623 ps3_mm_phys_to_lpar(__pa(priv->xdr_buf)),
624 XDR_BUF_SIZE, 0);
625 if (status) {
626 dev_err(&dev->core, "%s:%d: lv1_gpu_context_iomap failed\n",
627 __func__, __LINE__);
628 ret = -ENOMEM;
629 goto out_free_context;
630 }
631
632 priv->ddr_base = ioremap_flags(ddr_lpar, ddr_size, _PAGE_NO_CACHE);
633
634 if (!priv->ddr_base) {
635 dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
636 __LINE__);
637 ret = -ENOMEM;
638 goto out_free_context;
639 }
640
641 priv->ctrl = ioremap(ctrl_lpar, 64 * 1024);
642 if (!priv->ctrl) {
643 dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
644 __LINE__);
645 ret = -ENOMEM;
646 goto out_unmap_vram;
647 }
648
649 priv->reports = ioremap(reports_lpar, reports_size);
650 if (!priv->reports) {
651 dev_err(&dev->core, "%s:%d: ioremap failed\n", __func__,
652 __LINE__);
653 ret = -ENOMEM;
654 goto out_unmap_ctrl;
655 }
656
657 mutex_lock(&ps3_gpu_mutex);
658 ps3vram_init_ring(&ps3vram_mtd);
659 mutex_unlock(&ps3_gpu_mutex);
660
661 ps3vram_mtd.name = "ps3vram";
662 ps3vram_mtd.size = ddr_size;
663 ps3vram_mtd.flags = MTD_CAP_RAM;
664 ps3vram_mtd.erase = ps3vram_erase;
665 ps3vram_mtd.point = NULL;
666 ps3vram_mtd.unpoint = NULL;
667 ps3vram_mtd.read = ps3vram_read;
668 ps3vram_mtd.write = ps3vram_write;
669 ps3vram_mtd.owner = THIS_MODULE;
670 ps3vram_mtd.type = MTD_RAM;
671 ps3vram_mtd.erasesize = CACHE_PAGE_SIZE;
672 ps3vram_mtd.writesize = 1;
673
674 ps3vram_bind(&ps3vram_mtd);
675
676 mutex_lock(&ps3_gpu_mutex);
677 ret = ps3vram_wait_ring(&ps3vram_mtd, 100);
678 mutex_unlock(&ps3_gpu_mutex);
679 if (ret < 0) {
680 dev_err(&dev->core, "%s:%d: failed to initialize channels\n",
681 __func__, __LINE__);
682 ret = -ETIMEDOUT;
683 goto out_unmap_reports;
684 }
685
686 ps3vram_cache_init(&ps3vram_mtd);
687
688 if (add_mtd_device(&ps3vram_mtd)) {
689 dev_err(&dev->core, "%s:%d: add_mtd_device failed\n",
690 __func__, __LINE__);
691 ret = -EAGAIN;
692 goto out_cache_cleanup;
693 }
694
695 dev_info(&dev->core, "reserved %u MiB of gpu memory\n",
696 (unsigned int)(ddr_size / 1024 / 1024));
697
698 return 0;
699
700out_cache_cleanup:
701 ps3vram_cache_cleanup(&ps3vram_mtd);
702out_unmap_reports:
703 iounmap(priv->reports);
704out_unmap_ctrl:
705 iounmap(priv->ctrl);
706out_unmap_vram:
707 iounmap(priv->ddr_base);
708out_free_context:
709 lv1_gpu_context_free(priv->context_handle);
710out_free_memory:
711 lv1_gpu_memory_free(priv->memory_handle);
712out_close_gpu:
713 ps3_close_hv_device(dev);
714out_free_xdr_buf:
715 free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
716out_free_priv:
717 kfree(ps3vram_mtd.priv);
718 ps3vram_mtd.priv = NULL;
719out:
720 return ret;
721}
722
723static int ps3vram_shutdown(struct ps3_system_bus_device *dev)
724{
725 struct ps3vram_priv *priv;
726
727 priv = ps3vram_mtd.priv;
728
729 del_mtd_device(&ps3vram_mtd);
730 ps3vram_cache_cleanup(&ps3vram_mtd);
731 iounmap(priv->reports);
732 iounmap(priv->ctrl);
733 iounmap(priv->ddr_base);
734 lv1_gpu_context_free(priv->context_handle);
735 lv1_gpu_memory_free(priv->memory_handle);
736 ps3_close_hv_device(dev);
737 free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
738 kfree(priv);
739 return 0;
740}
741
742static struct ps3_system_bus_driver ps3vram_driver = {
743 .match_id = PS3_MATCH_ID_GPU,
744 .match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK,
745 .core.name = DEVICE_NAME,
746 .core.owner = THIS_MODULE,
747 .probe = ps3vram_probe,
748 .remove = ps3vram_shutdown,
749 .shutdown = ps3vram_shutdown,
750};
751
752static int __init ps3vram_init(void)
753{
754 return ps3_system_bus_driver_register(&ps3vram_driver);
755}
756
757static void __exit ps3vram_exit(void)
758{
759 ps3_system_bus_driver_unregister(&ps3vram_driver);
760}
761
762module_init(ps3vram_init);
763module_exit(ps3vram_exit);
764
765MODULE_LICENSE("GPL");
766MODULE_AUTHOR("Jim Paris <jim@jtan.com>");
767MODULE_DESCRIPTION("MTD driver for PS3 video RAM");
768MODULE_ALIAS(PS3_MODULE_ALIAS_GPU_RAMDISK);
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 5d9bcf109c13..4abbe573fa40 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -564,7 +564,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_unmap);
564 * @dtype: expected data type 564 * @dtype: expected data type
565 * 565 *
566 * This function maps an un-mapped logical eraseblock @lnum to a physical 566 * This function maps an un-mapped logical eraseblock @lnum to a physical
567 * eraseblock. This means, that after a successfull invocation of this 567 * eraseblock. This means, that after a successful invocation of this
568 * function the logical eraseblock @lnum will be empty (contain only %0xFF 568 * function the logical eraseblock @lnum will be empty (contain only %0xFF
569 * bytes) and be mapped to a physical eraseblock, even if an unclean reboot 569 * bytes) and be mapped to a physical eraseblock, even if an unclean reboot
570 * happens. 570 * happens.
diff --git a/drivers/net/wireless/ath5k/dma.c b/drivers/net/wireless/ath5k/dma.c
index 7e2b1a67e5da..b65b4feb2d28 100644
--- a/drivers/net/wireless/ath5k/dma.c
+++ b/drivers/net/wireless/ath5k/dma.c
@@ -594,7 +594,7 @@ int ath5k_hw_get_isr(struct ath5k_hw *ah, enum ath5k_int *interrupt_mask)
594 * XXX: BMISS interrupts may occur after association. 594 * XXX: BMISS interrupts may occur after association.
595 * I found this on 5210 code but it needs testing. If this is 595 * I found this on 5210 code but it needs testing. If this is
596 * true we should disable them before assoc and re-enable them 596 * true we should disable them before assoc and re-enable them
597 * after a successfull assoc + some jiffies. 597 * after a successful assoc + some jiffies.
598 interrupt_mask &= ~AR5K_INT_BMISS; 598 interrupt_mask &= ~AR5K_INT_BMISS;
599 */ 599 */
600 } 600 }
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 9caa96a13586..a611ad857983 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -287,7 +287,7 @@ static void zd_op_stop(struct ieee80211_hw *hw)
287 * @skb - a sk-buffer 287 * @skb - a sk-buffer
288 * @flags: extra flags to set in the TX status info 288 * @flags: extra flags to set in the TX status info
289 * @ackssi: ACK signal strength 289 * @ackssi: ACK signal strength
290 * @success - True for successfull transmission of the frame 290 * @success - True for successful transmission of the frame
291 * 291 *
292 * This information calls ieee80211_tx_status_irqsafe() if required by the 292 * This information calls ieee80211_tx_status_irqsafe() if required by the
293 * control information. It copies the control information into the status 293 * control information. It copies the control information into the status
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 162330b9d1dc..7e5155e88ac7 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -86,13 +86,11 @@ enum ds_type {
86 86
87 87
88struct ds1307 { 88struct ds1307 {
89 u8 reg_addr;
90 u8 regs[11]; 89 u8 regs[11];
91 enum ds_type type; 90 enum ds_type type;
92 unsigned long flags; 91 unsigned long flags;
93#define HAS_NVRAM 0 /* bit 0 == sysfs file active */ 92#define HAS_NVRAM 0 /* bit 0 == sysfs file active */
94#define HAS_ALARM 1 /* bit 1 == irq claimed */ 93#define HAS_ALARM 1 /* bit 1 == irq claimed */
95 struct i2c_msg msg[2];
96 struct i2c_client *client; 94 struct i2c_client *client;
97 struct rtc_device *rtc; 95 struct rtc_device *rtc;
98 struct work_struct work; 96 struct work_struct work;
@@ -204,13 +202,9 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
204 int tmp; 202 int tmp;
205 203
206 /* read the RTC date and time registers all at once */ 204 /* read the RTC date and time registers all at once */
207 ds1307->reg_addr = 0; 205 tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
208 ds1307->msg[1].flags = I2C_M_RD; 206 DS1307_REG_SECS, 7, ds1307->regs);
209 ds1307->msg[1].len = 7; 207 if (tmp != 7) {
210
211 tmp = i2c_transfer(to_i2c_adapter(ds1307->client->dev.parent),
212 ds1307->msg, 2);
213 if (tmp != 2) {
214 dev_err(dev, "%s error %d\n", "read", tmp); 208 dev_err(dev, "%s error %d\n", "read", tmp);
215 return -EIO; 209 return -EIO;
216 } 210 }
@@ -257,7 +251,6 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
257 t->tm_hour, t->tm_mday, 251 t->tm_hour, t->tm_mday,
258 t->tm_mon, t->tm_year, t->tm_wday); 252 t->tm_mon, t->tm_year, t->tm_wday);
259 253
260 *buf++ = 0; /* first register addr */
261 buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec); 254 buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec);
262 buf[DS1307_REG_MIN] = bin2bcd(t->tm_min); 255 buf[DS1307_REG_MIN] = bin2bcd(t->tm_min);
263 buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour); 256 buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour);
@@ -282,23 +275,19 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
282 break; 275 break;
283 } 276 }
284 277
285 ds1307->msg[1].flags = 0;
286 ds1307->msg[1].len = 8;
287
288 dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x\n", 278 dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x\n",
289 "write", buf[0], buf[1], buf[2], buf[3], 279 "write", buf[0], buf[1], buf[2], buf[3],
290 buf[4], buf[5], buf[6]); 280 buf[4], buf[5], buf[6]);
291 281
292 result = i2c_transfer(to_i2c_adapter(ds1307->client->dev.parent), 282 result = i2c_smbus_write_i2c_block_data(ds1307->client, 0, 7, buf);
293 &ds1307->msg[1], 1); 283 if (result < 0) {
294 if (result != 1) { 284 dev_err(dev, "%s error %d\n", "write", result);
295 dev_err(dev, "%s error %d\n", "write", tmp); 285 return result;
296 return -EIO;
297 } 286 }
298 return 0; 287 return 0;
299} 288}
300 289
301static int ds1307_read_alarm(struct device *dev, struct rtc_wkalrm *t) 290static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t)
302{ 291{
303 struct i2c_client *client = to_i2c_client(dev); 292 struct i2c_client *client = to_i2c_client(dev);
304 struct ds1307 *ds1307 = i2c_get_clientdata(client); 293 struct ds1307 *ds1307 = i2c_get_clientdata(client);
@@ -308,13 +297,9 @@ static int ds1307_read_alarm(struct device *dev, struct rtc_wkalrm *t)
308 return -EINVAL; 297 return -EINVAL;
309 298
310 /* read all ALARM1, ALARM2, and status registers at once */ 299 /* read all ALARM1, ALARM2, and status registers at once */
311 ds1307->reg_addr = DS1339_REG_ALARM1_SECS; 300 ret = i2c_smbus_read_i2c_block_data(client,
312 ds1307->msg[1].flags = I2C_M_RD; 301 DS1339_REG_ALARM1_SECS, 9, ds1307->regs);
313 ds1307->msg[1].len = 9; 302 if (ret != 9) {
314
315 ret = i2c_transfer(to_i2c_adapter(client->dev.parent),
316 ds1307->msg, 2);
317 if (ret != 2) {
318 dev_err(dev, "%s error %d\n", "alarm read", ret); 303 dev_err(dev, "%s error %d\n", "alarm read", ret);
319 return -EIO; 304 return -EIO;
320 } 305 }
@@ -353,7 +338,7 @@ static int ds1307_read_alarm(struct device *dev, struct rtc_wkalrm *t)
353 return 0; 338 return 0;
354} 339}
355 340
356static int ds1307_set_alarm(struct device *dev, struct rtc_wkalrm *t) 341static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
357{ 342{
358 struct i2c_client *client = to_i2c_client(dev); 343 struct i2c_client *client = to_i2c_client(dev);
359 struct ds1307 *ds1307 = i2c_get_clientdata(client); 344 struct ds1307 *ds1307 = i2c_get_clientdata(client);
@@ -371,13 +356,9 @@ static int ds1307_set_alarm(struct device *dev, struct rtc_wkalrm *t)
371 t->enabled, t->pending); 356 t->enabled, t->pending);
372 357
373 /* read current status of both alarms and the chip */ 358 /* read current status of both alarms and the chip */
374 ds1307->reg_addr = DS1339_REG_ALARM1_SECS; 359 ret = i2c_smbus_read_i2c_block_data(client,
375 ds1307->msg[1].flags = I2C_M_RD; 360 DS1339_REG_ALARM1_SECS, 9, buf);
376 ds1307->msg[1].len = 9; 361 if (ret != 9) {
377
378 ret = i2c_transfer(to_i2c_adapter(client->dev.parent),
379 ds1307->msg, 2);
380 if (ret != 2) {
381 dev_err(dev, "%s error %d\n", "alarm write", ret); 362 dev_err(dev, "%s error %d\n", "alarm write", ret);
382 return -EIO; 363 return -EIO;
383 } 364 }
@@ -392,7 +373,6 @@ static int ds1307_set_alarm(struct device *dev, struct rtc_wkalrm *t)
392 ds1307->regs[6], control, status); 373 ds1307->regs[6], control, status);
393 374
394 /* set ALARM1, using 24 hour and day-of-month modes */ 375 /* set ALARM1, using 24 hour and day-of-month modes */
395 *buf++ = DS1339_REG_ALARM1_SECS; /* first register addr */
396 buf[0] = bin2bcd(t->time.tm_sec); 376 buf[0] = bin2bcd(t->time.tm_sec);
397 buf[1] = bin2bcd(t->time.tm_min); 377 buf[1] = bin2bcd(t->time.tm_min);
398 buf[2] = bin2bcd(t->time.tm_hour); 378 buf[2] = bin2bcd(t->time.tm_hour);
@@ -411,14 +391,11 @@ static int ds1307_set_alarm(struct device *dev, struct rtc_wkalrm *t)
411 } 391 }
412 buf[8] = status & ~(DS1337_BIT_A1I | DS1337_BIT_A2I); 392 buf[8] = status & ~(DS1337_BIT_A1I | DS1337_BIT_A2I);
413 393
414 ds1307->msg[1].flags = 0; 394 ret = i2c_smbus_write_i2c_block_data(client,
415 ds1307->msg[1].len = 10; 395 DS1339_REG_ALARM1_SECS, 9, buf);
416 396 if (ret < 0) {
417 ret = i2c_transfer(to_i2c_adapter(client->dev.parent),
418 &ds1307->msg[1], 1);
419 if (ret != 1) {
420 dev_err(dev, "can't set alarm time\n"); 397 dev_err(dev, "can't set alarm time\n");
421 return -EIO; 398 return ret;
422 } 399 }
423 400
424 return 0; 401 return 0;
@@ -475,8 +452,8 @@ static int ds1307_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
475static const struct rtc_class_ops ds13xx_rtc_ops = { 452static const struct rtc_class_ops ds13xx_rtc_ops = {
476 .read_time = ds1307_get_time, 453 .read_time = ds1307_get_time,
477 .set_time = ds1307_set_time, 454 .set_time = ds1307_set_time,
478 .read_alarm = ds1307_read_alarm, 455 .read_alarm = ds1337_read_alarm,
479 .set_alarm = ds1307_set_alarm, 456 .set_alarm = ds1337_set_alarm,
480 .ioctl = ds1307_ioctl, 457 .ioctl = ds1307_ioctl,
481}; 458};
482 459
@@ -490,7 +467,6 @@ ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
490{ 467{
491 struct i2c_client *client; 468 struct i2c_client *client;
492 struct ds1307 *ds1307; 469 struct ds1307 *ds1307;
493 struct i2c_msg msg[2];
494 int result; 470 int result;
495 471
496 client = kobj_to_i2c_client(kobj); 472 client = kobj_to_i2c_client(kobj);
@@ -503,24 +479,10 @@ ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
503 if (unlikely(!count)) 479 if (unlikely(!count))
504 return count; 480 return count;
505 481
506 msg[0].addr = client->addr; 482 result = i2c_smbus_read_i2c_block_data(client, 8 + off, count, buf);
507 msg[0].flags = 0; 483 if (result < 0)
508 msg[0].len = 1;
509 msg[0].buf = buf;
510
511 buf[0] = 8 + off;
512
513 msg[1].addr = client->addr;
514 msg[1].flags = I2C_M_RD;
515 msg[1].len = count;
516 msg[1].buf = buf;
517
518 result = i2c_transfer(to_i2c_adapter(client->dev.parent), msg, 2);
519 if (result != 2) {
520 dev_err(&client->dev, "%s error %d\n", "nvram read", result); 484 dev_err(&client->dev, "%s error %d\n", "nvram read", result);
521 return -EIO; 485 return result;
522 }
523 return count;
524} 486}
525 487
526static ssize_t 488static ssize_t
@@ -528,8 +490,7 @@ ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
528 char *buf, loff_t off, size_t count) 490 char *buf, loff_t off, size_t count)
529{ 491{
530 struct i2c_client *client; 492 struct i2c_client *client;
531 u8 buffer[NVRAM_SIZE + 1]; 493 int result;
532 int ret;
533 494
534 client = kobj_to_i2c_client(kobj); 495 client = kobj_to_i2c_client(kobj);
535 496
@@ -540,11 +501,12 @@ ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
540 if (unlikely(!count)) 501 if (unlikely(!count))
541 return count; 502 return count;
542 503
543 buffer[0] = 8 + off; 504 result = i2c_smbus_write_i2c_block_data(client, 8 + off, count, buf);
544 memcpy(buffer + 1, buf, count); 505 if (result < 0) {
545 506 dev_err(&client->dev, "%s error %d\n", "nvram write", result);
546 ret = i2c_master_send(client, buffer, count + 1); 507 return result;
547 return (ret < 0) ? ret : (ret - 1); 508 }
509 return count;
548} 510}
549 511
550static struct bin_attribute nvram = { 512static struct bin_attribute nvram = {
@@ -571,9 +533,11 @@ static int __devinit ds1307_probe(struct i2c_client *client,
571 const struct chip_desc *chip = &chips[id->driver_data]; 533 const struct chip_desc *chip = &chips[id->driver_data];
572 struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); 534 struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
573 int want_irq = false; 535 int want_irq = false;
536 unsigned char *buf;
574 537
575 if (!i2c_check_functionality(adapter, 538 if (!i2c_check_functionality(adapter,
576 I2C_FUNC_I2C | I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) 539 I2C_FUNC_SMBUS_WRITE_BYTE_DATA |
540 I2C_FUNC_SMBUS_I2C_BLOCK))
577 return -EIO; 541 return -EIO;
578 542
579 if (!(ds1307 = kzalloc(sizeof(struct ds1307), GFP_KERNEL))) 543 if (!(ds1307 = kzalloc(sizeof(struct ds1307), GFP_KERNEL)))
@@ -581,18 +545,8 @@ static int __devinit ds1307_probe(struct i2c_client *client,
581 545
582 ds1307->client = client; 546 ds1307->client = client;
583 i2c_set_clientdata(client, ds1307); 547 i2c_set_clientdata(client, ds1307);
584
585 ds1307->msg[0].addr = client->addr;
586 ds1307->msg[0].flags = 0;
587 ds1307->msg[0].len = 1;
588 ds1307->msg[0].buf = &ds1307->reg_addr;
589
590 ds1307->msg[1].addr = client->addr;
591 ds1307->msg[1].flags = I2C_M_RD;
592 ds1307->msg[1].len = sizeof(ds1307->regs);
593 ds1307->msg[1].buf = ds1307->regs;
594
595 ds1307->type = id->driver_data; 548 ds1307->type = id->driver_data;
549 buf = ds1307->regs;
596 550
597 switch (ds1307->type) { 551 switch (ds1307->type) {
598 case ds_1337: 552 case ds_1337:
@@ -602,21 +556,15 @@ static int __devinit ds1307_probe(struct i2c_client *client,
602 INIT_WORK(&ds1307->work, ds1307_work); 556 INIT_WORK(&ds1307->work, ds1307_work);
603 want_irq = true; 557 want_irq = true;
604 } 558 }
605
606 ds1307->reg_addr = DS1337_REG_CONTROL;
607 ds1307->msg[1].len = 2;
608
609 /* get registers that the "rtc" read below won't read... */ 559 /* get registers that the "rtc" read below won't read... */
610 tmp = i2c_transfer(adapter, ds1307->msg, 2); 560 tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
561 DS1337_REG_CONTROL, 2, buf);
611 if (tmp != 2) { 562 if (tmp != 2) {
612 pr_debug("read error %d\n", tmp); 563 pr_debug("read error %d\n", tmp);
613 err = -EIO; 564 err = -EIO;
614 goto exit_free; 565 goto exit_free;
615 } 566 }
616 567
617 ds1307->reg_addr = 0;
618 ds1307->msg[1].len = sizeof(ds1307->regs);
619
620 /* oscillator off? turn it on, so clock can tick. */ 568 /* oscillator off? turn it on, so clock can tick. */
621 if (ds1307->regs[0] & DS1337_BIT_nEOSC) 569 if (ds1307->regs[0] & DS1337_BIT_nEOSC)
622 ds1307->regs[0] &= ~DS1337_BIT_nEOSC; 570 ds1307->regs[0] &= ~DS1337_BIT_nEOSC;
@@ -647,9 +595,8 @@ static int __devinit ds1307_probe(struct i2c_client *client,
647 595
648read_rtc: 596read_rtc:
649 /* read RTC registers */ 597 /* read RTC registers */
650 598 tmp = i2c_smbus_read_i2c_block_data(ds1307->client, 0, 8, buf);
651 tmp = i2c_transfer(adapter, ds1307->msg, 2); 599 if (tmp != 8) {
652 if (tmp != 2) {
653 pr_debug("read error %d\n", tmp); 600 pr_debug("read error %d\n", tmp);
654 err = -EIO; 601 err = -EIO;
655 goto exit_free; 602 goto exit_free;
@@ -707,22 +654,6 @@ read_rtc:
707 break; 654 break;
708 } 655 }
709 656
710 tmp = ds1307->regs[DS1307_REG_SECS];
711 tmp = bcd2bin(tmp & 0x7f);
712 if (tmp > 60)
713 goto exit_bad;
714 tmp = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
715 if (tmp > 60)
716 goto exit_bad;
717
718 tmp = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
719 if (tmp == 0 || tmp > 31)
720 goto exit_bad;
721
722 tmp = bcd2bin(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
723 if (tmp == 0 || tmp > 12)
724 goto exit_bad;
725
726 tmp = ds1307->regs[DS1307_REG_HOUR]; 657 tmp = ds1307->regs[DS1307_REG_HOUR];
727 switch (ds1307->type) { 658 switch (ds1307->type) {
728 case ds_1340: 659 case ds_1340:
@@ -779,13 +710,6 @@ read_rtc:
779 710
780 return 0; 711 return 0;
781 712
782exit_bad:
783 dev_dbg(&client->dev, "%s: %02x %02x %02x %02x %02x %02x %02x\n",
784 "bogus register",
785 ds1307->regs[0], ds1307->regs[1],
786 ds1307->regs[2], ds1307->regs[3],
787 ds1307->regs[4], ds1307->regs[5],
788 ds1307->regs[6]);
789exit_irq: 713exit_irq:
790 if (ds1307->rtc) 714 if (ds1307->rtc)
791 rtc_device_unregister(ds1307->rtc); 715 rtc_device_unregister(ds1307->rtc);
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index b8f9c00633f3..d82aad5224f0 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -2621,7 +2621,7 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr)
2621 } 2621 }
2622 } 2622 }
2623 2623
2624 /* double-check if current erp/cqr was successfull */ 2624 /* double-check if current erp/cqr was successful */
2625 if ((cqr->irb.scsw.cmd.cstat == 0x00) && 2625 if ((cqr->irb.scsw.cmd.cstat == 0x00) &&
2626 (cqr->irb.scsw.cmd.dstat == 2626 (cqr->irb.scsw.cmd.dstat ==
2627 (DEV_STAT_CHN_END | DEV_STAT_DEV_END))) { 2627 (DEV_STAT_CHN_END | DEV_STAT_DEV_END))) {
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 05a14536c369..4a39084d9c95 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -199,7 +199,7 @@ struct dasd_ccw_req {
199#define DASD_CQR_ERROR 0x82 /* request is completed with error */ 199#define DASD_CQR_ERROR 0x82 /* request is completed with error */
200#define DASD_CQR_CLEAR_PENDING 0x83 /* request is clear pending */ 200#define DASD_CQR_CLEAR_PENDING 0x83 /* request is clear pending */
201#define DASD_CQR_CLEARED 0x84 /* request was cleared */ 201#define DASD_CQR_CLEARED 0x84 /* request was cleared */
202#define DASD_CQR_SUCCESS 0x85 /* request was successfull */ 202#define DASD_CQR_SUCCESS 0x85 /* request was successful */
203 203
204 204
205/* per dasd_ccw_req flags */ 205/* per dasd_ccw_req flags */
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index 4005c44a404c..71605a179d65 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -801,7 +801,7 @@ tape_3590_done(struct tape_device *device, struct tape_request *request)
801static inline int 801static inline int
802tape_3590_erp_succeded(struct tape_device *device, struct tape_request *request) 802tape_3590_erp_succeded(struct tape_device *device, struct tape_request *request)
803{ 803{
804 DBF_EVENT(3, "Error Recovery successfull for %s\n", 804 DBF_EVENT(3, "Error Recovery successful for %s\n",
805 tape_op_verbose[request->op]); 805 tape_op_verbose[request->op]);
806 return tape_3590_done(device, request); 806 return tape_3590_done(device, request);
807} 807}
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 06b71823f399..659f8a791656 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -379,7 +379,7 @@ int cio_commit_config(struct subchannel *sch)
379 if (ccode < 0) /* -EIO if msch gets a program check. */ 379 if (ccode < 0) /* -EIO if msch gets a program check. */
380 return ccode; 380 return ccode;
381 switch (ccode) { 381 switch (ccode) {
382 case 0: /* successfull */ 382 case 0: /* successful */
383 if (stsch(sch->schid, &schib) || 383 if (stsch(sch->schid, &schib) ||
384 !css_sch_is_valid(&schib)) 384 !css_sch_is_valid(&schib))
385 return -ENODEV; 385 return -ENODEV;
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 744f928a59ea..10cb0f8726e5 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -114,7 +114,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq)
114 * @count: count of buffers to examine 114 * @count: count of buffers to examine
115 * @auto_ack: automatically acknowledge buffers 115 * @auto_ack: automatically acknowledge buffers
116 * 116 *
117 * Returns the number of successfull extracted equal buffer states. 117 * Returns the number of successfully extracted equal buffer states.
118 * Stops processing if a state is different from the last buffers state. 118 * Stops processing if a state is different from the last buffers state.
119 */ 119 */
120static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, 120static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index b7322976d2b7..256c7bec7bd7 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -884,6 +884,7 @@ config SCSI_IBMVSCSI
884 tristate "IBM Virtual SCSI support" 884 tristate "IBM Virtual SCSI support"
885 depends on PPC_PSERIES || PPC_ISERIES 885 depends on PPC_PSERIES || PPC_ISERIES
886 select SCSI_SRP_ATTRS 886 select SCSI_SRP_ATTRS
887 select VIOPATH if PPC_ISERIES
887 help 888 help
888 This is the IBM POWER Virtual SCSI Client 889 This is the IBM POWER Virtual SCSI Client
889 890
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index b695ab3142d8..3e525e38a5d9 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -457,7 +457,7 @@ config SERIAL_SAMSUNG
457 457
458config SERIAL_SAMSUNG_UARTS 458config SERIAL_SAMSUNG_UARTS
459 int 459 int
460 depends on SERIAL_SAMSUNG 460 depends on ARM && PLAT_S3C
461 default 2 if ARCH_S3C2400 461 default 2 if ARCH_S3C2400
462 default 4 if ARCH_S3C64XX || CPU_S3C2443 462 default 4 if ARCH_S3C64XX || CPU_S3C2443
463 default 3 463 default 3
@@ -1320,13 +1320,30 @@ config SERIAL_NETX_CONSOLE
1320config SERIAL_OF_PLATFORM 1320config SERIAL_OF_PLATFORM
1321 tristate "Serial port on Open Firmware platform bus" 1321 tristate "Serial port on Open Firmware platform bus"
1322 depends on PPC_OF 1322 depends on PPC_OF
1323 depends on SERIAL_8250 1323 depends on SERIAL_8250 || SERIAL_OF_PLATFORM_NWPSERIAL
1324 help 1324 help
1325 If you have a PowerPC based system that has serial ports 1325 If you have a PowerPC based system that has serial ports
1326 on a platform specific bus, you should enable this option. 1326 on a platform specific bus, you should enable this option.
1327 Currently, only 8250 compatible ports are supported, but 1327 Currently, only 8250 compatible ports are supported, but
1328 others can easily be added. 1328 others can easily be added.
1329 1329
1330config SERIAL_OF_PLATFORM_NWPSERIAL
1331 tristate "NWP serial port driver"
1332 depends on PPC_OF && PPC_DCR
1333 select SERIAL_OF_PLATFORM
1334 select SERIAL_CORE_CONSOLE
1335 select SERIAL_CORE
1336 help
1337 This driver supports the cell network processor nwp serial
1338 device.
1339
1340config SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE
1341 bool "Console on NWP serial port"
1342 depends on SERIAL_OF_PLATFORM_NWPSERIAL=y
1343 select SERIAL_CORE_CONSOLE
1344 help
1345 Support for Console on the NWP serial ports.
1346
1330config SERIAL_QE 1347config SERIAL_QE
1331 tristate "Freescale QUICC Engine serial port support" 1348 tristate "Freescale QUICC Engine serial port support"
1332 depends on QUICC_ENGINE 1349 depends on QUICC_ENGINE
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index dfe775ac45b2..8844c0a03929 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_SERIAL_ATMEL) += atmel_serial.o
72obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o 72obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o
73obj-$(CONFIG_SERIAL_NETX) += netx-serial.o 73obj-$(CONFIG_SERIAL_NETX) += netx-serial.o
74obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o 74obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o
75obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o
75obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o 76obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
76obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o 77obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
77obj-$(CONFIG_SERIAL_QE) += ucc_uart.o 78obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
diff --git a/drivers/serial/nwpserial.c b/drivers/serial/nwpserial.c
new file mode 100644
index 000000000000..32f3eaf0d262
--- /dev/null
+++ b/drivers/serial/nwpserial.c
@@ -0,0 +1,475 @@
1/*
2 * Serial Port driver for a NWP uart device
3 *
4 * Copyright (C) 2008 IBM Corp., Benjamin Krill <ben@codiert.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12#include <linux/init.h>
13#include <linux/console.h>
14#include <linux/serial.h>
15#include <linux/serial_reg.h>
16#include <linux/serial_core.h>
17#include <linux/tty.h>
18#include <linux/irqreturn.h>
19#include <linux/mutex.h>
20#include <linux/of_platform.h>
21#include <linux/of_device.h>
22#include <linux/nwpserial.h>
23#include <asm/prom.h>
24#include <asm/dcr.h>
25
26#define NWPSERIAL_NR 2
27
28#define NWPSERIAL_STATUS_RXVALID 0x1
29#define NWPSERIAL_STATUS_TXFULL 0x2
30
31struct nwpserial_port {
32 struct uart_port port;
33 dcr_host_t dcr_host;
34 unsigned int ier;
35 unsigned int mcr;
36};
37
38static DEFINE_MUTEX(nwpserial_mutex);
39static struct nwpserial_port nwpserial_ports[NWPSERIAL_NR];
40
41static void wait_for_bits(struct nwpserial_port *up, int bits)
42{
43 unsigned int status, tmout = 10000;
44
45 /* Wait up to 10ms for the character(s) to be sent. */
46 do {
47 status = dcr_read(up->dcr_host, UART_LSR);
48
49 if (--tmout == 0)
50 break;
51 udelay(1);
52 } while ((status & bits) != bits);
53}
54
55#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE
56static void nwpserial_console_putchar(struct uart_port *port, int c)
57{
58 struct nwpserial_port *up;
59 up = container_of(port, struct nwpserial_port, port);
60 /* check if tx buffer is full */
61 wait_for_bits(up, UART_LSR_THRE);
62 dcr_write(up->dcr_host, UART_TX, c);
63 up->port.icount.tx++;
64}
65
66static void
67nwpserial_console_write(struct console *co, const char *s, unsigned int count)
68{
69 struct nwpserial_port *up = &nwpserial_ports[co->index];
70 unsigned long flags;
71 int locked = 1;
72
73 if (oops_in_progress)
74 locked = spin_trylock_irqsave(&up->port.lock, flags);
75 else
76 spin_lock_irqsave(&up->port.lock, flags);
77
78 /* save and disable interrupt */
79 up->ier = dcr_read(up->dcr_host, UART_IER);
80 dcr_write(up->dcr_host, UART_IER, up->ier & ~UART_IER_RDI);
81
82 uart_console_write(&up->port, s, count, nwpserial_console_putchar);
83
84 /* wait for transmitter to become emtpy */
85 while ((dcr_read(up->dcr_host, UART_LSR) & UART_LSR_THRE) == 0)
86 cpu_relax();
87
88 /* restore interrupt state */
89 dcr_write(up->dcr_host, UART_IER, up->ier);
90
91 if (locked)
92 spin_unlock_irqrestore(&up->port.lock, flags);
93}
94
95static struct uart_driver nwpserial_reg;
96static struct console nwpserial_console = {
97 .name = "ttySQ",
98 .write = nwpserial_console_write,
99 .device = uart_console_device,
100 .flags = CON_PRINTBUFFER,
101 .index = -1,
102 .data = &nwpserial_reg,
103};
104#define NWPSERIAL_CONSOLE (&nwpserial_console)
105#else
106#define NWPSERIAL_CONSOLE NULL
107#endif /* CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE */
108
109/**************************************************************************/
110
111static int nwpserial_request_port(struct uart_port *port)
112{
113 return 0;
114}
115
116static void nwpserial_release_port(struct uart_port *port)
117{
118 /* N/A */
119}
120
121static void nwpserial_config_port(struct uart_port *port, int flags)
122{
123 port->type = PORT_NWPSERIAL;
124}
125
126static irqreturn_t nwpserial_interrupt(int irq, void *dev_id)
127{
128 struct nwpserial_port *up = dev_id;
129 struct tty_struct *tty = up->port.info->port.tty;
130 irqreturn_t ret;
131 unsigned int iir;
132 unsigned char ch;
133
134 spin_lock(&up->port.lock);
135
136 /* check if the uart was the interrupt source. */
137 iir = dcr_read(up->dcr_host, UART_IIR);
138 if (!iir) {
139 ret = IRQ_NONE;
140 goto out;
141 }
142
143 do {
144 up->port.icount.rx++;
145 ch = dcr_read(up->dcr_host, UART_RX);
146 if (up->port.ignore_status_mask != NWPSERIAL_STATUS_RXVALID)
147 tty_insert_flip_char(tty, ch, TTY_NORMAL);
148 } while (dcr_read(up->dcr_host, UART_RX) & UART_LSR_DR);
149
150 tty_flip_buffer_push(tty);
151 ret = IRQ_HANDLED;
152
153out:
154 spin_unlock(&up->port.lock);
155 return ret;
156}
157
158static int nwpserial_startup(struct uart_port *port)
159{
160 struct nwpserial_port *up;
161 int err;
162
163 up = container_of(port, struct nwpserial_port, port);
164
165 /* disable flow control by default */
166 up->mcr = dcr_read(up->dcr_host, UART_MCR) & ~UART_MCR_AFE;
167 dcr_write(up->dcr_host, UART_MCR, up->mcr);
168
169 /* register interrupt handler */
170 err = request_irq(up->port.irq, nwpserial_interrupt,
171 IRQF_SHARED, "nwpserial", up);
172 if (err)
173 return err;
174
175 /* enable interrupts */
176 up->ier = UART_IER_RDI;
177 dcr_write(up->dcr_host, UART_IER, up->ier);
178
179 /* enable receiving */
180 up->port.ignore_status_mask &= ~NWPSERIAL_STATUS_RXVALID;
181
182 return 0;
183}
184
185static void nwpserial_shutdown(struct uart_port *port)
186{
187 struct nwpserial_port *up;
188 up = container_of(port, struct nwpserial_port, port);
189
190 /* disable receiving */
191 up->port.ignore_status_mask |= NWPSERIAL_STATUS_RXVALID;
192
193 /* disable interrupts from this port */
194 up->ier = 0;
195 dcr_write(up->dcr_host, UART_IER, up->ier);
196
197 /* free irq */
198 free_irq(up->port.irq, port);
199}
200
201static int nwpserial_verify_port(struct uart_port *port,
202 struct serial_struct *ser)
203{
204 return -EINVAL;
205}
206
207static const char *nwpserial_type(struct uart_port *port)
208{
209 return port->type == PORT_NWPSERIAL ? "nwpserial" : NULL;
210}
211
212static void nwpserial_set_termios(struct uart_port *port,
213 struct ktermios *termios, struct ktermios *old)
214{
215 struct nwpserial_port *up;
216 up = container_of(port, struct nwpserial_port, port);
217
218 up->port.read_status_mask = NWPSERIAL_STATUS_RXVALID
219 | NWPSERIAL_STATUS_TXFULL;
220
221 up->port.ignore_status_mask = 0;
222 /* ignore all characters if CREAD is not set */
223 if ((termios->c_cflag & CREAD) == 0)
224 up->port.ignore_status_mask |= NWPSERIAL_STATUS_RXVALID;
225
226 /* Copy back the old hardware settings */
227 if (old)
228 tty_termios_copy_hw(termios, old);
229}
230
231static void nwpserial_break_ctl(struct uart_port *port, int ctl)
232{
233 /* N/A */
234}
235
236static void nwpserial_enable_ms(struct uart_port *port)
237{
238 /* N/A */
239}
240
241static void nwpserial_stop_rx(struct uart_port *port)
242{
243 struct nwpserial_port *up;
244 up = container_of(port, struct nwpserial_port, port);
245 /* don't forward any more data (like !CREAD) */
246 up->port.ignore_status_mask = NWPSERIAL_STATUS_RXVALID;
247}
248
249static void nwpserial_putchar(struct nwpserial_port *up, unsigned char c)
250{
251 /* check if tx buffer is full */
252 wait_for_bits(up, UART_LSR_THRE);
253 dcr_write(up->dcr_host, UART_TX, c);
254 up->port.icount.tx++;
255}
256
257static void nwpserial_start_tx(struct uart_port *port)
258{
259 struct nwpserial_port *up;
260 struct circ_buf *xmit;
261 up = container_of(port, struct nwpserial_port, port);
262 xmit = &up->port.info->xmit;
263
264 if (port->x_char) {
265 nwpserial_putchar(up, up->port.x_char);
266 port->x_char = 0;
267 }
268
269 while (!(uart_circ_empty(xmit) || uart_tx_stopped(&up->port))) {
270 nwpserial_putchar(up, xmit->buf[xmit->tail]);
271 xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE-1);
272 }
273}
274
275static unsigned int nwpserial_get_mctrl(struct uart_port *port)
276{
277 return 0;
278}
279
280static void nwpserial_set_mctrl(struct uart_port *port, unsigned int mctrl)
281{
282 /* N/A */
283}
284
285static void nwpserial_stop_tx(struct uart_port *port)
286{
287 /* N/A */
288}
289
290static unsigned int nwpserial_tx_empty(struct uart_port *port)
291{
292 struct nwpserial_port *up;
293 unsigned long flags;
294 int ret;
295 up = container_of(port, struct nwpserial_port, port);
296
297 spin_lock_irqsave(&up->port.lock, flags);
298 ret = dcr_read(up->dcr_host, UART_LSR);
299 spin_unlock_irqrestore(&up->port.lock, flags);
300
301 return ret & UART_LSR_TEMT ? TIOCSER_TEMT : 0;
302}
303
304static struct uart_ops nwpserial_pops = {
305 .tx_empty = nwpserial_tx_empty,
306 .set_mctrl = nwpserial_set_mctrl,
307 .get_mctrl = nwpserial_get_mctrl,
308 .stop_tx = nwpserial_stop_tx,
309 .start_tx = nwpserial_start_tx,
310 .stop_rx = nwpserial_stop_rx,
311 .enable_ms = nwpserial_enable_ms,
312 .break_ctl = nwpserial_break_ctl,
313 .startup = nwpserial_startup,
314 .shutdown = nwpserial_shutdown,
315 .set_termios = nwpserial_set_termios,
316 .type = nwpserial_type,
317 .release_port = nwpserial_release_port,
318 .request_port = nwpserial_request_port,
319 .config_port = nwpserial_config_port,
320 .verify_port = nwpserial_verify_port,
321};
322
323static struct uart_driver nwpserial_reg = {
324 .owner = THIS_MODULE,
325 .driver_name = "nwpserial",
326 .dev_name = "ttySQ",
327 .major = TTY_MAJOR,
328 .minor = 68,
329 .nr = NWPSERIAL_NR,
330 .cons = NWPSERIAL_CONSOLE,
331};
332
333int nwpserial_register_port(struct uart_port *port)
334{
335 struct nwpserial_port *up = NULL;
336 int ret = -1;
337 int i;
338 static int first = 1;
339 int dcr_len;
340 int dcr_base;
341 struct device_node *dn;
342
343 mutex_lock(&nwpserial_mutex);
344
345 dn = to_of_device(port->dev)->node;
346 if (dn == NULL)
347 goto out;
348
349 /* get dcr base. */
350 dcr_base = dcr_resource_start(dn, 0);
351
352 /* find matching entry */
353 for (i = 0; i < NWPSERIAL_NR; i++)
354 if (nwpserial_ports[i].port.iobase == dcr_base) {
355 up = &nwpserial_ports[i];
356 break;
357 }
358
359 /* we didn't find a mtching entry, search for a free port */
360 if (up == NULL)
361 for (i = 0; i < NWPSERIAL_NR; i++)
362 if (nwpserial_ports[i].port.type == PORT_UNKNOWN &&
363 nwpserial_ports[i].port.iobase == 0) {
364 up = &nwpserial_ports[i];
365 break;
366 }
367
368 if (up == NULL) {
369 ret = -EBUSY;
370 goto out;
371 }
372
373 if (first)
374 uart_register_driver(&nwpserial_reg);
375 first = 0;
376
377 up->port.membase = port->membase;
378 up->port.irq = port->irq;
379 up->port.uartclk = port->uartclk;
380 up->port.fifosize = port->fifosize;
381 up->port.regshift = port->regshift;
382 up->port.iotype = port->iotype;
383 up->port.flags = port->flags;
384 up->port.mapbase = port->mapbase;
385 up->port.private_data = port->private_data;
386
387 if (port->dev)
388 up->port.dev = port->dev;
389
390 if (up->port.iobase != dcr_base) {
391 up->port.ops = &nwpserial_pops;
392 up->port.fifosize = 16;
393
394 spin_lock_init(&up->port.lock);
395
396 up->port.iobase = dcr_base;
397 dcr_len = dcr_resource_len(dn, 0);
398
399 up->dcr_host = dcr_map(dn, dcr_base, dcr_len);
400 if (!DCR_MAP_OK(up->dcr_host)) {
401 printk(KERN_ERR "Cannot map DCR resources for NWPSERIAL");
402 goto out;
403 }
404 }
405
406 ret = uart_add_one_port(&nwpserial_reg, &up->port);
407 if (ret == 0)
408 ret = up->port.line;
409
410out:
411 mutex_unlock(&nwpserial_mutex);
412
413 return ret;
414}
415EXPORT_SYMBOL(nwpserial_register_port);
416
417void nwpserial_unregister_port(int line)
418{
419 struct nwpserial_port *up = &nwpserial_ports[line];
420 mutex_lock(&nwpserial_mutex);
421 uart_remove_one_port(&nwpserial_reg, &up->port);
422
423 up->port.type = PORT_UNKNOWN;
424
425 mutex_unlock(&nwpserial_mutex);
426}
427EXPORT_SYMBOL(nwpserial_unregister_port);
428
429#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE
430static int __init nwpserial_console_init(void)
431{
432 struct nwpserial_port *up = NULL;
433 struct device_node *dn;
434 const char *name;
435 int dcr_base;
436 int dcr_len;
437 int i;
438
439 /* search for a free port */
440 for (i = 0; i < NWPSERIAL_NR; i++)
441 if (nwpserial_ports[i].port.type == PORT_UNKNOWN) {
442 up = &nwpserial_ports[i];
443 break;
444 }
445
446 if (up == NULL)
447 return -1;
448
449 name = of_get_property(of_chosen, "linux,stdout-path", NULL);
450 if (name == NULL)
451 return -1;
452
453 dn = of_find_node_by_path(name);
454 if (!dn)
455 return -1;
456
457 spin_lock_init(&up->port.lock);
458 up->port.ops = &nwpserial_pops;
459 up->port.type = PORT_NWPSERIAL;
460 up->port.fifosize = 16;
461
462 dcr_base = dcr_resource_start(dn, 0);
463 dcr_len = dcr_resource_len(dn, 0);
464 up->port.iobase = dcr_base;
465
466 up->dcr_host = dcr_map(dn, dcr_base, dcr_len);
467 if (!DCR_MAP_OK(up->dcr_host)) {
468 printk("Cannot map DCR resources for SERIAL");
469 return -1;
470 }
471 register_console(&nwpserial_console);
472 return 0;
473}
474console_initcall(nwpserial_console_init);
475#endif /* CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE */
diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c
index 8fa0ff561e9f..a821e3a3d664 100644
--- a/drivers/serial/of_serial.c
+++ b/drivers/serial/of_serial.c
@@ -14,6 +14,7 @@
14#include <linux/serial_core.h> 14#include <linux/serial_core.h>
15#include <linux/serial_8250.h> 15#include <linux/serial_8250.h>
16#include <linux/of_platform.h> 16#include <linux/of_platform.h>
17#include <linux/nwpserial.h>
17 18
18#include <asm/prom.h> 19#include <asm/prom.h>
19 20
@@ -99,9 +100,16 @@ static int __devinit of_platform_serial_probe(struct of_device *ofdev,
99 goto out; 100 goto out;
100 101
101 switch (port_type) { 102 switch (port_type) {
103#ifdef CONFIG_SERIAL_8250
102 case PORT_8250 ... PORT_MAX_8250: 104 case PORT_8250 ... PORT_MAX_8250:
103 ret = serial8250_register_port(&port); 105 ret = serial8250_register_port(&port);
104 break; 106 break;
107#endif
108#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL
109 case PORT_NWPSERIAL:
110 ret = nwpserial_register_port(&port);
111 break;
112#endif
105 default: 113 default:
106 /* need to add code for these */ 114 /* need to add code for these */
107 case PORT_UNKNOWN: 115 case PORT_UNKNOWN:
@@ -129,9 +137,16 @@ static int of_platform_serial_remove(struct of_device *ofdev)
129{ 137{
130 struct of_serial_info *info = ofdev->dev.driver_data; 138 struct of_serial_info *info = ofdev->dev.driver_data;
131 switch (info->type) { 139 switch (info->type) {
140#ifdef CONFIG_SERIAL_8250
132 case PORT_8250 ... PORT_MAX_8250: 141 case PORT_8250 ... PORT_MAX_8250:
133 serial8250_unregister_port(info->line); 142 serial8250_unregister_port(info->line);
134 break; 143 break;
144#endif
145#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL
146 case PORT_NWPSERIAL:
147 nwpserial_unregister_port(info->line);
148 break;
149#endif
135 default: 150 default:
136 /* need to add code for these */ 151 /* need to add code for these */
137 break; 152 break;
@@ -148,6 +163,10 @@ static struct of_device_id __devinitdata of_platform_serial_table[] = {
148 { .type = "serial", .compatible = "ns16450", .data = (void *)PORT_16450, }, 163 { .type = "serial", .compatible = "ns16450", .data = (void *)PORT_16450, },
149 { .type = "serial", .compatible = "ns16550", .data = (void *)PORT_16550, }, 164 { .type = "serial", .compatible = "ns16550", .data = (void *)PORT_16550, },
150 { .type = "serial", .compatible = "ns16750", .data = (void *)PORT_16750, }, 165 { .type = "serial", .compatible = "ns16750", .data = (void *)PORT_16750, },
166#ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL
167 { .type = "serial", .compatible = "ibm,qpace-nwp-serial",
168 .data = (void *)PORT_NWPSERIAL, },
169#endif
151 { .type = "serial", .data = (void *)PORT_UNKNOWN, }, 170 { .type = "serial", .data = (void *)PORT_UNKNOWN, },
152 { /* end of list */ }, 171 { /* end of list */ },
153}; 172};
diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index 2ac52fd8cc11..4e046fed1380 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -24,6 +24,7 @@
24#include <linux/amba/bus.h> 24#include <linux/amba/bus.h>
25#include <linux/amba/clcd.h> 25#include <linux/amba/clcd.h>
26#include <linux/clk.h> 26#include <linux/clk.h>
27#include <linux/hardirq.h>
27 28
28#include <asm/sizes.h> 29#include <asm/sizes.h>
29 30
diff --git a/drivers/w1/masters/Kconfig b/drivers/w1/masters/Kconfig
index 90616822cd20..96d2f8e4c275 100644
--- a/drivers/w1/masters/Kconfig
+++ b/drivers/w1/masters/Kconfig
@@ -34,6 +34,12 @@ config W1_MASTER_DS2482
34 This driver can also be built as a module. If so, the module 34 This driver can also be built as a module. If so, the module
35 will be called ds2482. 35 will be called ds2482.
36 36
37config W1_MASTER_MXC
38 tristate "Freescale MXC 1-wire busmaster"
39 depends on W1 && ARCH_MXC
40 help
41 Say Y here to enable MXC 1-wire host
42
37config W1_MASTER_DS1WM 43config W1_MASTER_DS1WM
38 tristate "Maxim DS1WM 1-wire busmaster" 44 tristate "Maxim DS1WM 1-wire busmaster"
39 depends on W1 && ARM && HAVE_CLK 45 depends on W1 && ARM && HAVE_CLK
diff --git a/drivers/w1/masters/Makefile b/drivers/w1/masters/Makefile
index bc4714a75f3a..c5a3e96fcbab 100644
--- a/drivers/w1/masters/Makefile
+++ b/drivers/w1/masters/Makefile
@@ -5,6 +5,8 @@
5obj-$(CONFIG_W1_MASTER_MATROX) += matrox_w1.o 5obj-$(CONFIG_W1_MASTER_MATROX) += matrox_w1.o
6obj-$(CONFIG_W1_MASTER_DS2490) += ds2490.o 6obj-$(CONFIG_W1_MASTER_DS2490) += ds2490.o
7obj-$(CONFIG_W1_MASTER_DS2482) += ds2482.o 7obj-$(CONFIG_W1_MASTER_DS2482) += ds2482.o
8obj-$(CONFIG_W1_MASTER_MXC) += mxc_w1.o
9
8obj-$(CONFIG_W1_MASTER_DS1WM) += ds1wm.o 10obj-$(CONFIG_W1_MASTER_DS1WM) += ds1wm.o
9obj-$(CONFIG_W1_MASTER_GPIO) += w1-gpio.o 11obj-$(CONFIG_W1_MASTER_GPIO) += w1-gpio.o
10obj-$(CONFIG_HDQ_MASTER_OMAP) += omap_hdq.o 12obj-$(CONFIG_HDQ_MASTER_OMAP) += omap_hdq.o
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
new file mode 100644
index 000000000000..b9d74d0b353e
--- /dev/null
+++ b/drivers/w1/masters/mxc_w1.c
@@ -0,0 +1,211 @@
1/*
2 * Copyright 2005-2008 Freescale Semiconductor, Inc. All Rights Reserved.
3 * Copyright 2008 Luotao Fu, kernel@pengutronix.de
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 */
19
20#include <linux/module.h>
21#include <linux/interrupt.h>
22#include <linux/platform_device.h>
23#include <linux/clk.h>
24#include <linux/delay.h>
25#include <linux/io.h>
26
27#include "../w1.h"
28#include "../w1_int.h"
29#include "../w1_log.h"
30
31/* According to the mx27 Datasheet the reset procedure should take up to about
32 * 1350us. We set the timeout to 500*100us = 50ms for sure */
33#define MXC_W1_RESET_TIMEOUT 500
34
35/*
36 * MXC W1 Register offsets
37 */
38#define MXC_W1_CONTROL 0x00
39#define MXC_W1_TIME_DIVIDER 0x02
40#define MXC_W1_RESET 0x04
41#define MXC_W1_COMMAND 0x06
42#define MXC_W1_TXRX 0x08
43#define MXC_W1_INTERRUPT 0x0A
44#define MXC_W1_INTERRUPT_EN 0x0C
45
46struct mxc_w1_device {
47 void __iomem *regs;
48 unsigned int clkdiv;
49 struct clk *clk;
50 struct w1_bus_master bus_master;
51};
52
53/*
54 * this is the low level routine to
55 * reset the device on the One Wire interface
56 * on the hardware
57 */
58static u8 mxc_w1_ds2_reset_bus(void *data)
59{
60 u8 reg_val;
61 unsigned int timeout_cnt = 0;
62 struct mxc_w1_device *dev = data;
63
64 __raw_writeb(0x80, (dev->regs + MXC_W1_CONTROL));
65
66 while (1) {
67 reg_val = __raw_readb(dev->regs + MXC_W1_CONTROL);
68
69 if (((reg_val >> 7) & 0x1) == 0 ||
70 timeout_cnt > MXC_W1_RESET_TIMEOUT)
71 break;
72 else
73 timeout_cnt++;
74
75 udelay(100);
76 }
77 return (reg_val >> 7) & 0x1;
78}
79
80/*
81 * this is the low level routine to read/write a bit on the One Wire
82 * interface on the hardware. It does write 0 if parameter bit is set
83 * to 0, otherwise a write 1/read.
84 */
85static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit)
86{
87 struct mxc_w1_device *mdev = data;
88 void __iomem *ctrl_addr = mdev->regs + MXC_W1_CONTROL;
89 unsigned int timeout_cnt = 400; /* Takes max. 120us according to
90 * datasheet.
91 */
92
93 __raw_writeb((1 << (5 - bit)), ctrl_addr);
94
95 while (timeout_cnt--) {
96 if (!((__raw_readb(ctrl_addr) >> (5 - bit)) & 0x1))
97 break;
98
99 udelay(1);
100 }
101
102 return ((__raw_readb(ctrl_addr)) >> 3) & 0x1;
103}
104
105static int __init mxc_w1_probe(struct platform_device *pdev)
106{
107 struct mxc_w1_device *mdev;
108 struct resource *res;
109 int err = 0;
110
111 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
112 if (!res)
113 return -ENODEV;
114
115 mdev = kzalloc(sizeof(struct mxc_w1_device), GFP_KERNEL);
116 if (!mdev)
117 return -ENOMEM;
118
119 mdev->clk = clk_get(&pdev->dev, "owire_clk");
120 if (!mdev->clk) {
121 err = -ENODEV;
122 goto failed_clk;
123 }
124
125 mdev->clkdiv = (clk_get_rate(mdev->clk) / 1000000) - 1;
126
127 res = request_mem_region(res->start, resource_size(res),
128 "mxc_w1");
129 if (!res) {
130 err = -EBUSY;
131 goto failed_req;
132 }
133
134 mdev->regs = ioremap(res->start, resource_size(res));
135 if (!mdev->regs) {
136 printk(KERN_ERR "Cannot map frame buffer registers\n");
137 goto failed_ioremap;
138 }
139
140 clk_enable(mdev->clk);
141 __raw_writeb(mdev->clkdiv, mdev->regs + MXC_W1_TIME_DIVIDER);
142
143 mdev->bus_master.data = mdev;
144 mdev->bus_master.reset_bus = mxc_w1_ds2_reset_bus;
145 mdev->bus_master.touch_bit = mxc_w1_ds2_touch_bit;
146
147 err = w1_add_master_device(&mdev->bus_master);
148
149 if (err)
150 goto failed_add;
151
152 platform_set_drvdata(pdev, mdev);
153 return 0;
154
155failed_add:
156 iounmap(mdev->regs);
157failed_ioremap:
158 release_mem_region(res->start, resource_size(res));
159failed_req:
160 clk_put(mdev->clk);
161failed_clk:
162 kfree(mdev);
163 return err;
164}
165
166/*
167 * disassociate the w1 device from the driver
168 */
169static int mxc_w1_remove(struct platform_device *pdev)
170{
171 struct mxc_w1_device *mdev = platform_get_drvdata(pdev);
172 struct resource *res;
173
174 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
175
176 w1_remove_master_device(&mdev->bus_master);
177
178 iounmap(mdev->regs);
179 release_mem_region(res->start, resource_size(res));
180 clk_disable(mdev->clk);
181 clk_put(mdev->clk);
182
183 platform_set_drvdata(pdev, NULL);
184
185 return 0;
186}
187
188static struct platform_driver mxc_w1_driver = {
189 .driver = {
190 .name = "mxc_w1",
191 },
192 .probe = mxc_w1_probe,
193 .remove = mxc_w1_remove,
194};
195
196static int __init mxc_w1_init(void)
197{
198 return platform_driver_register(&mxc_w1_driver);
199}
200
201static void mxc_w1_exit(void)
202{
203 platform_driver_unregister(&mxc_w1_driver);
204}
205
206module_init(mxc_w1_init);
207module_exit(mxc_w1_exit);
208
209MODULE_LICENSE("GPL");
210MODULE_AUTHOR("Freescale Semiconductors Inc");
211MODULE_DESCRIPTION("Driver for One-Wire on MXC");
diff --git a/drivers/w1/w1.h b/drivers/w1/w1.h
index 97304bd83ec9..d8a9709f3449 100644
--- a/drivers/w1/w1.h
+++ b/drivers/w1/w1.h
@@ -210,6 +210,7 @@ u8 w1_read_8(struct w1_master *);
210int w1_reset_bus(struct w1_master *); 210int w1_reset_bus(struct w1_master *);
211u8 w1_calc_crc8(u8 *, int); 211u8 w1_calc_crc8(u8 *, int);
212void w1_write_block(struct w1_master *, const u8 *, int); 212void w1_write_block(struct w1_master *, const u8 *, int);
213void w1_touch_block(struct w1_master *, u8 *, int);
213u8 w1_read_block(struct w1_master *, u8 *, int); 214u8 w1_read_block(struct w1_master *, u8 *, int);
214int w1_reset_select_slave(struct w1_slave *sl); 215int w1_reset_select_slave(struct w1_slave *sl);
215void w1_next_pullup(struct w1_master *, int); 216void w1_next_pullup(struct w1_master *, int);
diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c
index 5139c25ca962..442bd8bbd4a5 100644
--- a/drivers/w1/w1_io.c
+++ b/drivers/w1/w1_io.c
@@ -238,7 +238,6 @@ EXPORT_SYMBOL_GPL(w1_read_8);
238 * @param dev the master device 238 * @param dev the master device
239 * @param buf pointer to the data to write 239 * @param buf pointer to the data to write
240 * @param len the number of bytes to write 240 * @param len the number of bytes to write
241 * @return the byte read
242 */ 241 */
243void w1_write_block(struct w1_master *dev, const u8 *buf, int len) 242void w1_write_block(struct w1_master *dev, const u8 *buf, int len)
244{ 243{
@@ -256,6 +255,31 @@ void w1_write_block(struct w1_master *dev, const u8 *buf, int len)
256EXPORT_SYMBOL_GPL(w1_write_block); 255EXPORT_SYMBOL_GPL(w1_write_block);
257 256
258/** 257/**
258 * Touches a series of bytes.
259 *
260 * @param dev the master device
261 * @param buf pointer to the data to write
262 * @param len the number of bytes to write
263 */
264void w1_touch_block(struct w1_master *dev, u8 *buf, int len)
265{
266 int i, j;
267 u8 tmp;
268
269 for (i = 0; i < len; ++i) {
270 tmp = 0;
271 for (j = 0; j < 8; ++j) {
272 if (j == 7)
273 w1_pre_write(dev);
274 tmp |= w1_touch_bit(dev, (buf[i] >> j) & 0x1) << j;
275 }
276
277 buf[i] = tmp;
278 }
279}
280EXPORT_SYMBOL_GPL(w1_touch_block);
281
282/**
259 * Reads a series of bytes. 283 * Reads a series of bytes.
260 * 284 *
261 * @param dev the master device 285 * @param dev the master device
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 65c5ebd0787e..fdf72851c574 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -47,21 +47,56 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg)
47 cn_netlink_send(m, 0, GFP_KERNEL); 47 cn_netlink_send(m, 0, GFP_KERNEL);
48} 48}
49 49
50static int w1_process_command_master(struct w1_master *dev, struct cn_msg *msg, 50static void w1_send_slave(struct w1_master *dev, u64 rn)
51 struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd) 51{
52 struct cn_msg *msg = dev->priv;
53 struct w1_netlink_msg *hdr = (struct w1_netlink_msg *)(msg + 1);
54 struct w1_netlink_cmd *cmd = (struct w1_netlink_cmd *)(hdr + 1);
55 int avail;
56
57 avail = dev->priv_size - cmd->len;
58
59 if (avail > 8) {
60 u64 *data = (void *)(cmd + 1) + cmd->len;
61
62 *data = rn;
63 cmd->len += 8;
64 hdr->len += 8;
65 msg->len += 8;
66 return;
67 }
68
69 msg->ack++;
70 cn_netlink_send(msg, 0, GFP_KERNEL);
71
72 msg->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd);
73 hdr->len = sizeof(struct w1_netlink_cmd);
74 cmd->len = 0;
75}
76
77static int w1_process_search_command(struct w1_master *dev, struct cn_msg *msg,
78 unsigned int avail)
52{ 79{
53 dev_dbg(&dev->dev, "%s: %s: cmd=%02x, len=%u.\n", 80 struct w1_netlink_msg *hdr = (struct w1_netlink_msg *)(msg + 1);
54 __func__, dev->name, cmd->cmd, cmd->len); 81 struct w1_netlink_cmd *cmd = (struct w1_netlink_cmd *)(hdr + 1);
82 int search_type = (cmd->cmd == W1_CMD_ALARM_SEARCH)?W1_ALARM_SEARCH:W1_SEARCH;
55 83
56 if (cmd->cmd != W1_CMD_SEARCH && cmd->cmd != W1_CMD_ALARM_SEARCH) 84 dev->priv = msg;
57 return -EINVAL; 85 dev->priv_size = avail;
86
87 w1_search_devices(dev, search_type, w1_send_slave);
88
89 msg->ack = 0;
90 cn_netlink_send(msg, 0, GFP_KERNEL);
91
92 dev->priv = NULL;
93 dev->priv_size = 0;
58 94
59 w1_search_process(dev, (cmd->cmd == W1_CMD_ALARM_SEARCH)?W1_ALARM_SEARCH:W1_SEARCH);
60 return 0; 95 return 0;
61} 96}
62 97
63static int w1_send_read_reply(struct w1_slave *sl, struct cn_msg *msg, 98static int w1_send_read_reply(struct cn_msg *msg, struct w1_netlink_msg *hdr,
64 struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd) 99 struct w1_netlink_cmd *cmd)
65{ 100{
66 void *data; 101 void *data;
67 struct w1_netlink_msg *h; 102 struct w1_netlink_msg *h;
@@ -85,7 +120,8 @@ static int w1_send_read_reply(struct w1_slave *sl, struct cn_msg *msg,
85 memcpy(c, cmd, sizeof(struct w1_netlink_cmd)); 120 memcpy(c, cmd, sizeof(struct w1_netlink_cmd));
86 121
87 cm->ack = msg->seq+1; 122 cm->ack = msg->seq+1;
88 cm->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd) + cmd->len; 123 cm->len = sizeof(struct w1_netlink_msg) +
124 sizeof(struct w1_netlink_cmd) + cmd->len;
89 125
90 h->len = sizeof(struct w1_netlink_cmd) + cmd->len; 126 h->len = sizeof(struct w1_netlink_cmd) + cmd->len;
91 127
@@ -98,36 +134,178 @@ static int w1_send_read_reply(struct w1_slave *sl, struct cn_msg *msg,
98 return err; 134 return err;
99} 135}
100 136
101static int w1_process_command_slave(struct w1_slave *sl, struct cn_msg *msg, 137static int w1_process_command_io(struct w1_master *dev, struct cn_msg *msg,
102 struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd) 138 struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd)
103{ 139{
104 int err = 0; 140 int err = 0;
105 141
106 dev_dbg(&sl->master->dev, "%s: %02x.%012llx.%02x: cmd=%02x, len=%u.\n", 142 switch (cmd->cmd) {
107 __func__, sl->reg_num.family, (unsigned long long)sl->reg_num.id, sl->reg_num.crc, 143 case W1_CMD_TOUCH:
108 cmd->cmd, cmd->len); 144 w1_touch_block(dev, cmd->data, cmd->len);
145 w1_send_read_reply(msg, hdr, cmd);
146 break;
147 case W1_CMD_READ:
148 w1_read_block(dev, cmd->data, cmd->len);
149 w1_send_read_reply(msg, hdr, cmd);
150 break;
151 case W1_CMD_WRITE:
152 w1_write_block(dev, cmd->data, cmd->len);
153 break;
154 default:
155 err = -EINVAL;
156 break;
157 }
158
159 return err;
160}
161
162static int w1_process_command_master(struct w1_master *dev, struct cn_msg *req_msg,
163 struct w1_netlink_msg *req_hdr, struct w1_netlink_cmd *req_cmd)
164{
165 int err = -EINVAL;
166 struct cn_msg *msg;
167 struct w1_netlink_msg *hdr;
168 struct w1_netlink_cmd *cmd;
169
170 msg = kzalloc(PAGE_SIZE, GFP_KERNEL);
171 if (!msg)
172 return -ENOMEM;
173
174 msg->id = req_msg->id;
175 msg->seq = req_msg->seq;
176 msg->ack = 0;
177 msg->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd);
178
179 hdr = (struct w1_netlink_msg *)(msg + 1);
180 cmd = (struct w1_netlink_cmd *)(hdr + 1);
181
182 hdr->type = W1_MASTER_CMD;
183 hdr->id = req_hdr->id;
184 hdr->len = sizeof(struct w1_netlink_cmd);
185
186 cmd->cmd = req_cmd->cmd;
187 cmd->len = 0;
109 188
110 switch (cmd->cmd) { 189 switch (cmd->cmd) {
111 case W1_CMD_READ: 190 case W1_CMD_SEARCH:
112 w1_read_block(sl->master, cmd->data, cmd->len); 191 case W1_CMD_ALARM_SEARCH:
113 w1_send_read_reply(sl, msg, hdr, cmd); 192 err = w1_process_search_command(dev, msg,
114 break; 193 PAGE_SIZE - msg->len - sizeof(struct cn_msg));
115 case W1_CMD_WRITE: 194 break;
116 w1_write_block(sl->master, cmd->data, cmd->len); 195 case W1_CMD_READ:
117 break; 196 case W1_CMD_WRITE:
118 case W1_CMD_SEARCH: 197 case W1_CMD_TOUCH:
119 case W1_CMD_ALARM_SEARCH: 198 err = w1_process_command_io(dev, req_msg, req_hdr, req_cmd);
120 w1_search_process(sl->master, 199 break;
121 (cmd->cmd == W1_CMD_ALARM_SEARCH)?W1_ALARM_SEARCH:W1_SEARCH); 200 case W1_CMD_RESET:
122 break; 201 err = w1_reset_bus(dev);
123 default: 202 break;
124 err = -1; 203 default:
125 break; 204 err = -EINVAL;
205 break;
126 } 206 }
127 207
208 kfree(msg);
128 return err; 209 return err;
129} 210}
130 211
212static int w1_process_command_slave(struct w1_slave *sl, struct cn_msg *msg,
213 struct w1_netlink_msg *hdr, struct w1_netlink_cmd *cmd)
214{
215 dev_dbg(&sl->master->dev, "%s: %02x.%012llx.%02x: cmd=%02x, len=%u.\n",
216 __func__, sl->reg_num.family, (unsigned long long)sl->reg_num.id,
217 sl->reg_num.crc, cmd->cmd, cmd->len);
218
219 return w1_process_command_io(sl->master, msg, hdr, cmd);
220}
221
222static int w1_process_command_root(struct cn_msg *msg, struct w1_netlink_msg *mcmd)
223{
224 struct w1_master *m;
225 struct cn_msg *cn;
226 struct w1_netlink_msg *w;
227 u32 *id;
228
229 if (mcmd->type != W1_LIST_MASTERS) {
230 printk(KERN_NOTICE "%s: msg: %x.%x, wrong type: %u, len: %u.\n",
231 __func__, msg->id.idx, msg->id.val, mcmd->type, mcmd->len);
232 return -EPROTO;
233 }
234
235 cn = kmalloc(PAGE_SIZE, GFP_KERNEL);
236 if (!cn)
237 return -ENOMEM;
238
239 cn->id.idx = CN_W1_IDX;
240 cn->id.val = CN_W1_VAL;
241
242 cn->seq = msg->seq;
243 cn->ack = 1;
244 cn->len = sizeof(struct w1_netlink_msg);
245 w = (struct w1_netlink_msg *)(cn + 1);
246
247 w->type = W1_LIST_MASTERS;
248 w->status = 0;
249 w->len = 0;
250 id = (u32 *)(w + 1);
251
252 mutex_lock(&w1_mlock);
253 list_for_each_entry(m, &w1_masters, w1_master_entry) {
254 if (cn->len + sizeof(*id) > PAGE_SIZE - sizeof(struct cn_msg)) {
255 cn_netlink_send(cn, 0, GFP_KERNEL);
256 cn->ack++;
257 cn->len = sizeof(struct w1_netlink_msg);
258 w->len = 0;
259 id = (u32 *)(w + 1);
260 }
261
262 *id = m->id;
263 w->len += sizeof(*id);
264 cn->len += sizeof(*id);
265 id++;
266 }
267 cn->ack = 0;
268 cn_netlink_send(cn, 0, GFP_KERNEL);
269 mutex_unlock(&w1_mlock);
270
271 kfree(cn);
272 return 0;
273}
274
275static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rmsg,
276 struct w1_netlink_cmd *rcmd, int error)
277{
278 struct cn_msg *cmsg;
279 struct w1_netlink_msg *msg;
280 struct w1_netlink_cmd *cmd;
281
282 cmsg = kzalloc(sizeof(*msg) + sizeof(*cmd) + sizeof(*cmsg), GFP_KERNEL);
283 if (!cmsg)
284 return -ENOMEM;
285
286 msg = (struct w1_netlink_msg *)(cmsg + 1);
287 cmd = (struct w1_netlink_cmd *)(msg + 1);
288
289 memcpy(cmsg, rcmsg, sizeof(*cmsg));
290 cmsg->len = sizeof(*msg);
291
292 memcpy(msg, rmsg, sizeof(*msg));
293 msg->len = 0;
294 msg->status = (short)-error;
295
296 if (rcmd) {
297 memcpy(cmd, rcmd, sizeof(*cmd));
298 cmd->len = 0;
299 msg->len += sizeof(*cmd);
300 cmsg->len += sizeof(*cmd);
301 }
302
303 error = cn_netlink_send(cmsg, 0, GFP_KERNEL);
304 kfree(cmsg);
305
306 return error;
307}
308
131static void w1_cn_callback(void *data) 309static void w1_cn_callback(void *data)
132{ 310{
133 struct cn_msg *msg = data; 311 struct cn_msg *msg = data;
@@ -144,6 +322,7 @@ static void w1_cn_callback(void *data)
144 322
145 dev = NULL; 323 dev = NULL;
146 sl = NULL; 324 sl = NULL;
325 cmd = NULL;
147 326
148 memcpy(&id, m->id.id, sizeof(id)); 327 memcpy(&id, m->id.id, sizeof(id));
149#if 0 328#if 0
@@ -155,15 +334,15 @@ static void w1_cn_callback(void *data)
155 break; 334 break;
156 } 335 }
157 336
158 if (!mlen)
159 goto out_cont;
160
161 if (m->type == W1_MASTER_CMD) { 337 if (m->type == W1_MASTER_CMD) {
162 dev = w1_search_master_id(m->id.mst.id); 338 dev = w1_search_master_id(m->id.mst.id);
163 } else if (m->type == W1_SLAVE_CMD) { 339 } else if (m->type == W1_SLAVE_CMD) {
164 sl = w1_search_slave(&id); 340 sl = w1_search_slave(&id);
165 if (sl) 341 if (sl)
166 dev = sl->master; 342 dev = sl->master;
343 } else {
344 err = w1_process_command_root(msg, m);
345 goto out_cont;
167 } 346 }
168 347
169 if (!dev) { 348 if (!dev) {
@@ -171,6 +350,10 @@ static void w1_cn_callback(void *data)
171 goto out_cont; 350 goto out_cont;
172 } 351 }
173 352
353 err = 0;
354 if (!mlen)
355 goto out_cont;
356
174 mutex_lock(&dev->mutex); 357 mutex_lock(&dev->mutex);
175 358
176 if (sl && w1_reset_select_slave(sl)) { 359 if (sl && w1_reset_select_slave(sl)) {
@@ -187,9 +370,12 @@ static void w1_cn_callback(void *data)
187 } 370 }
188 371
189 if (sl) 372 if (sl)
190 w1_process_command_slave(sl, msg, m, cmd); 373 err = w1_process_command_slave(sl, msg, m, cmd);
191 else 374 else
192 w1_process_command_master(dev, msg, m, cmd); 375 err = w1_process_command_master(dev, msg, m, cmd);
376
377 w1_netlink_send_error(msg, m, cmd, err);
378 err = 0;
193 379
194 cmd_data += cmd->len + sizeof(struct w1_netlink_cmd); 380 cmd_data += cmd->len + sizeof(struct w1_netlink_cmd);
195 mlen -= cmd->len + sizeof(struct w1_netlink_cmd); 381 mlen -= cmd->len + sizeof(struct w1_netlink_cmd);
@@ -200,6 +386,8 @@ out_up:
200 atomic_dec(&sl->refcnt); 386 atomic_dec(&sl->refcnt);
201 mutex_unlock(&dev->mutex); 387 mutex_unlock(&dev->mutex);
202out_cont: 388out_cont:
389 if (!cmd || err)
390 w1_netlink_send_error(msg, m, cmd, err);
203 msg->len -= sizeof(struct w1_netlink_msg) + m->len; 391 msg->len -= sizeof(struct w1_netlink_msg) + m->len;
204 m = (struct w1_netlink_msg *)(((u8 *)m) + sizeof(struct w1_netlink_msg) + m->len); 392 m = (struct w1_netlink_msg *)(((u8 *)m) + sizeof(struct w1_netlink_msg) + m->len);
205 393
@@ -209,11 +397,6 @@ out_cont:
209 if (err == -ENODEV) 397 if (err == -ENODEV)
210 err = 0; 398 err = 0;
211 } 399 }
212#if 0
213 if (err) {
214 printk("%s: malformed message. Dropping.\n", __func__);
215 }
216#endif
217} 400}
218 401
219int w1_init_netlink(void) 402int w1_init_netlink(void)
diff --git a/drivers/w1/w1_netlink.h b/drivers/w1/w1_netlink.h
index 56122b9e9294..27e950f935b1 100644
--- a/drivers/w1/w1_netlink.h
+++ b/drivers/w1/w1_netlink.h
@@ -34,12 +34,13 @@ enum w1_netlink_message_types {
34 W1_MASTER_REMOVE, 34 W1_MASTER_REMOVE,
35 W1_MASTER_CMD, 35 W1_MASTER_CMD,
36 W1_SLAVE_CMD, 36 W1_SLAVE_CMD,
37 W1_LIST_MASTERS,
37}; 38};
38 39
39struct w1_netlink_msg 40struct w1_netlink_msg
40{ 41{
41 __u8 type; 42 __u8 type;
42 __u8 reserved; 43 __u8 status;
43 __u16 len; 44 __u16 len;
44 union { 45 union {
45 __u8 id[8]; 46 __u8 id[8];
@@ -51,10 +52,15 @@ struct w1_netlink_msg
51 __u8 data[0]; 52 __u8 data[0];
52}; 53};
53 54
54#define W1_CMD_READ 0x0 55enum w1_commands {
55#define W1_CMD_WRITE 0x1 56 W1_CMD_READ = 0,
56#define W1_CMD_SEARCH 0x2 57 W1_CMD_WRITE,
57#define W1_CMD_ALARM_SEARCH 0x3 58 W1_CMD_SEARCH,
59 W1_CMD_ALARM_SEARCH,
60 W1_CMD_TOUCH,
61 W1_CMD_RESET,
62 W1_CMD_MAX,
63};
58 64
59struct w1_netlink_cmd 65struct w1_netlink_cmd
60{ 66{
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 4b75a16de009..526187c8a12d 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -17,3 +17,27 @@ config XEN_SCRUB_PAGES
17 is not accidentally visible to other domains. Is it more 17 is not accidentally visible to other domains. Is it more
18 secure, but slightly less efficient. 18 secure, but slightly less efficient.
19 If in doubt, say yes. 19 If in doubt, say yes.
20
21config XENFS
22 tristate "Xen filesystem"
23 depends on XEN
24 default y
25 help
26 The xen filesystem provides a way for domains to share
27 information with each other and with the hypervisor.
28 For example, by reading and writing the "xenbus" file, guests
29 may pass arbitrary information to the initial domain.
30 If in doubt, say yes.
31
32config XEN_COMPAT_XENFS
33 bool "Create compatibility mount point /proc/xen"
34 depends on XENFS
35 default y
36 help
37 The old xenstore userspace tools expect to find "xenbus"
38 under /proc/xen, but "xenbus" is now found at the root of the
39 xenfs filesystem. Selecting this causes the kernel to create
40 the compatibilty mount point /proc/xen if it is running on
41 a xen platform.
42 If in doubt, say yes.
43
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index d2a8fdf0e191..ff8accc9e103 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,5 +1,7 @@
1obj-y += grant-table.o features.o events.o manage.o 1obj-y += grant-table.o features.o events.o manage.o
2obj-y += xenbus/ 2obj-y += xenbus/
3
3obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 4obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
4obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 5obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
5obj-$(CONFIG_XEN_BALLOON) += balloon.o 6obj-$(CONFIG_XEN_BALLOON) += balloon.o
7obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 9678b3e98c63..92a1ef80a288 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -136,7 +136,6 @@ EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
136/** 136/**
137 * xenbus_switch_state 137 * xenbus_switch_state
138 * @dev: xenbus device 138 * @dev: xenbus device
139 * @xbt: transaction handle
140 * @state: new state 139 * @state: new state
141 * 140 *
142 * Advertise in the store a change of the given driver to the given new_state. 141 * Advertise in the store a change of the given driver to the given new_state.
@@ -267,7 +266,7 @@ EXPORT_SYMBOL_GPL(xenbus_dev_error);
267 * @fmt: error message format 266 * @fmt: error message format
268 * 267 *
269 * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by 268 * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
270 * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly 269 * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
271 * closedown of this driver and its peer. 270 * closedown of this driver and its peer.
272 */ 271 */
273 272
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index b2a03184a246..773d1cf23283 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -40,6 +40,7 @@
40#include <linux/ctype.h> 40#include <linux/ctype.h>
41#include <linux/fcntl.h> 41#include <linux/fcntl.h>
42#include <linux/mm.h> 42#include <linux/mm.h>
43#include <linux/proc_fs.h>
43#include <linux/notifier.h> 44#include <linux/notifier.h>
44#include <linux/kthread.h> 45#include <linux/kthread.h>
45#include <linux/mutex.h> 46#include <linux/mutex.h>
@@ -55,7 +56,10 @@
55#include "xenbus_comms.h" 56#include "xenbus_comms.h"
56#include "xenbus_probe.h" 57#include "xenbus_probe.h"
57 58
59
58int xen_store_evtchn; 60int xen_store_evtchn;
61EXPORT_SYMBOL(xen_store_evtchn);
62
59struct xenstore_domain_interface *xen_store_interface; 63struct xenstore_domain_interface *xen_store_interface;
60static unsigned long xen_store_mfn; 64static unsigned long xen_store_mfn;
61 65
@@ -166,6 +170,9 @@ static int read_backend_details(struct xenbus_device *xendev)
166 return read_otherend_details(xendev, "backend-id", "backend"); 170 return read_otherend_details(xendev, "backend-id", "backend");
167} 171}
168 172
173static struct device_attribute xenbus_dev_attrs[] = {
174 __ATTR_NULL
175};
169 176
170/* Bus type for frontend drivers. */ 177/* Bus type for frontend drivers. */
171static struct xen_bus_type xenbus_frontend = { 178static struct xen_bus_type xenbus_frontend = {
@@ -174,12 +181,13 @@ static struct xen_bus_type xenbus_frontend = {
174 .get_bus_id = frontend_bus_id, 181 .get_bus_id = frontend_bus_id,
175 .probe = xenbus_probe_frontend, 182 .probe = xenbus_probe_frontend,
176 .bus = { 183 .bus = {
177 .name = "xen", 184 .name = "xen",
178 .match = xenbus_match, 185 .match = xenbus_match,
179 .uevent = xenbus_uevent, 186 .uevent = xenbus_uevent,
180 .probe = xenbus_dev_probe, 187 .probe = xenbus_dev_probe,
181 .remove = xenbus_dev_remove, 188 .remove = xenbus_dev_remove,
182 .shutdown = xenbus_dev_shutdown, 189 .shutdown = xenbus_dev_shutdown,
190 .dev_attrs = xenbus_dev_attrs,
183 }, 191 },
184}; 192};
185 193
@@ -852,6 +860,14 @@ static int __init xenbus_probe_init(void)
852 if (!xen_initial_domain()) 860 if (!xen_initial_domain())
853 xenbus_probe(NULL); 861 xenbus_probe(NULL);
854 862
863#ifdef CONFIG_XEN_COMPAT_XENFS
864 /*
865 * Create xenfs mountpoint in /proc for compatibility with
866 * utilities that expect to find "xenbus" under "/proc/xen".
867 */
868 proc_mkdir("xen", NULL);
869#endif
870
855 return 0; 871 return 0;
856 872
857 out_unreg_back: 873 out_unreg_back:
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 7f2f91c0e11d..e325eab4724d 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -184,6 +184,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
184 184
185 return ret; 185 return ret;
186} 186}
187EXPORT_SYMBOL(xenbus_dev_request_and_reply);
187 188
188/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ 189/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
189static void *xs_talkv(struct xenbus_transaction t, 190static void *xs_talkv(struct xenbus_transaction t,
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
new file mode 100644
index 000000000000..25275c3bbdff
--- /dev/null
+++ b/drivers/xen/xenfs/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_XENFS) += xenfs.o
2
3xenfs-objs = super.o xenbus.o \ No newline at end of file
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
new file mode 100644
index 000000000000..515741a8e6b8
--- /dev/null
+++ b/drivers/xen/xenfs/super.c
@@ -0,0 +1,64 @@
1/*
2 * xenfs.c - a filesystem for passing info between the a domain and
3 * the hypervisor.
4 *
5 * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem
6 * and /proc/xen compatibility mount point.
7 * Turned xenfs into a loadable module.
8 */
9
10#include <linux/kernel.h>
11#include <linux/errno.h>
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/magic.h>
15
16#include "xenfs.h"
17
18#include <asm/xen/hypervisor.h>
19
20MODULE_DESCRIPTION("Xen filesystem");
21MODULE_LICENSE("GPL");
22
23static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
24{
25 static struct tree_descr xenfs_files[] = {
26 [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR},
27 {""},
28 };
29
30 return simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files);
31}
32
33static int xenfs_get_sb(struct file_system_type *fs_type,
34 int flags, const char *dev_name,
35 void *data, struct vfsmount *mnt)
36{
37 return get_sb_single(fs_type, flags, data, xenfs_fill_super, mnt);
38}
39
40static struct file_system_type xenfs_type = {
41 .owner = THIS_MODULE,
42 .name = "xenfs",
43 .get_sb = xenfs_get_sb,
44 .kill_sb = kill_litter_super,
45};
46
47static int __init xenfs_init(void)
48{
49 if (xen_pv_domain())
50 return register_filesystem(&xenfs_type);
51
52 printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n");
53 return 0;
54}
55
56static void __exit xenfs_exit(void)
57{
58 if (xen_pv_domain())
59 unregister_filesystem(&xenfs_type);
60}
61
62module_init(xenfs_init);
63module_exit(xenfs_exit);
64
diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c
new file mode 100644
index 000000000000..875a4c59c594
--- /dev/null
+++ b/drivers/xen/xenfs/xenbus.c
@@ -0,0 +1,593 @@
1/*
2 * Driver giving user-space access to the kernel's xenbus connection
3 * to xenstore.
4 *
5 * Copyright (c) 2005, Christian Limpach
6 * Copyright (c) 2005, Rusty Russell, IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 *
32 * Changes:
33 * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem
34 * and /proc/xen compatibility mount point.
35 * Turned xenfs into a loadable module.
36 */
37
38#include <linux/kernel.h>
39#include <linux/errno.h>
40#include <linux/uio.h>
41#include <linux/notifier.h>
42#include <linux/wait.h>
43#include <linux/fs.h>
44#include <linux/poll.h>
45#include <linux/mutex.h>
46#include <linux/spinlock.h>
47#include <linux/mount.h>
48#include <linux/pagemap.h>
49#include <linux/uaccess.h>
50#include <linux/init.h>
51#include <linux/namei.h>
52#include <linux/string.h>
53
54#include "xenfs.h"
55#include "../xenbus/xenbus_comms.h"
56
57#include <xen/xenbus.h>
58#include <asm/xen/hypervisor.h>
59
60/*
61 * An element of a list of outstanding transactions, for which we're
62 * still waiting a reply.
63 */
64struct xenbus_transaction_holder {
65 struct list_head list;
66 struct xenbus_transaction handle;
67};
68
69/*
70 * A buffer of data on the queue.
71 */
72struct read_buffer {
73 struct list_head list;
74 unsigned int cons;
75 unsigned int len;
76 char msg[];
77};
78
79struct xenbus_file_priv {
80 /*
81 * msgbuffer_mutex is held while partial requests are built up
82 * and complete requests are acted on. It therefore protects
83 * the "transactions" and "watches" lists, and the partial
84 * request length and buffer.
85 *
86 * reply_mutex protects the reply being built up to return to
87 * usermode. It nests inside msgbuffer_mutex but may be held
88 * alone during a watch callback.
89 */
90 struct mutex msgbuffer_mutex;
91
92 /* In-progress transactions */
93 struct list_head transactions;
94
95 /* Active watches. */
96 struct list_head watches;
97
98 /* Partial request. */
99 unsigned int len;
100 union {
101 struct xsd_sockmsg msg;
102 char buffer[PAGE_SIZE];
103 } u;
104
105 /* Response queue. */
106 struct mutex reply_mutex;
107 struct list_head read_buffers;
108 wait_queue_head_t read_waitq;
109
110};
111
112/* Read out any raw xenbus messages queued up. */
113static ssize_t xenbus_file_read(struct file *filp,
114 char __user *ubuf,
115 size_t len, loff_t *ppos)
116{
117 struct xenbus_file_priv *u = filp->private_data;
118 struct read_buffer *rb;
119 unsigned i;
120 int ret;
121
122 mutex_lock(&u->reply_mutex);
123 while (list_empty(&u->read_buffers)) {
124 mutex_unlock(&u->reply_mutex);
125 ret = wait_event_interruptible(u->read_waitq,
126 !list_empty(&u->read_buffers));
127 if (ret)
128 return ret;
129 mutex_lock(&u->reply_mutex);
130 }
131
132 rb = list_entry(u->read_buffers.next, struct read_buffer, list);
133 i = 0;
134 while (i < len) {
135 unsigned sz = min((unsigned)len - i, rb->len - rb->cons);
136
137 ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
138
139 i += sz - ret;
140 rb->cons += sz - ret;
141
142 if (ret != sz) {
143 if (i == 0)
144 i = -EFAULT;
145 goto out;
146 }
147
148 /* Clear out buffer if it has been consumed */
149 if (rb->cons == rb->len) {
150 list_del(&rb->list);
151 kfree(rb);
152 if (list_empty(&u->read_buffers))
153 break;
154 rb = list_entry(u->read_buffers.next,
155 struct read_buffer, list);
156 }
157 }
158
159out:
160 mutex_unlock(&u->reply_mutex);
161 return i;
162}
163
164/*
165 * Add a buffer to the queue. Caller must hold the appropriate lock
166 * if the queue is not local. (Commonly the caller will build up
167 * multiple queued buffers on a temporary local list, and then add it
168 * to the appropriate list under lock once all the buffers have een
169 * successfully allocated.)
170 */
171static int queue_reply(struct list_head *queue, const void *data, size_t len)
172{
173 struct read_buffer *rb;
174
175 if (len == 0)
176 return 0;
177
178 rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
179 if (rb == NULL)
180 return -ENOMEM;
181
182 rb->cons = 0;
183 rb->len = len;
184
185 memcpy(rb->msg, data, len);
186
187 list_add_tail(&rb->list, queue);
188 return 0;
189}
190
191/*
192 * Free all the read_buffer s on a list.
193 * Caller must have sole reference to list.
194 */
195static void queue_cleanup(struct list_head *list)
196{
197 struct read_buffer *rb;
198
199 while (!list_empty(list)) {
200 rb = list_entry(list->next, struct read_buffer, list);
201 list_del(list->next);
202 kfree(rb);
203 }
204}
205
206struct watch_adapter {
207 struct list_head list;
208 struct xenbus_watch watch;
209 struct xenbus_file_priv *dev_data;
210 char *token;
211};
212
213static void free_watch_adapter(struct watch_adapter *watch)
214{
215 kfree(watch->watch.node);
216 kfree(watch->token);
217 kfree(watch);
218}
219
220static struct watch_adapter *alloc_watch_adapter(const char *path,
221 const char *token)
222{
223 struct watch_adapter *watch;
224
225 watch = kzalloc(sizeof(*watch), GFP_KERNEL);
226 if (watch == NULL)
227 goto out_fail;
228
229 watch->watch.node = kstrdup(path, GFP_KERNEL);
230 if (watch->watch.node == NULL)
231 goto out_free;
232
233 watch->token = kstrdup(token, GFP_KERNEL);
234 if (watch->token == NULL)
235 goto out_free;
236
237 return watch;
238
239out_free:
240 free_watch_adapter(watch);
241
242out_fail:
243 return NULL;
244}
245
246static void watch_fired(struct xenbus_watch *watch,
247 const char **vec,
248 unsigned int len)
249{
250 struct watch_adapter *adap;
251 struct xsd_sockmsg hdr;
252 const char *path, *token;
253 int path_len, tok_len, body_len, data_len = 0;
254 int ret;
255 LIST_HEAD(staging_q);
256
257 adap = container_of(watch, struct watch_adapter, watch);
258
259 path = vec[XS_WATCH_PATH];
260 token = adap->token;
261
262 path_len = strlen(path) + 1;
263 tok_len = strlen(token) + 1;
264 if (len > 2)
265 data_len = vec[len] - vec[2] + 1;
266 body_len = path_len + tok_len + data_len;
267
268 hdr.type = XS_WATCH_EVENT;
269 hdr.len = body_len;
270
271 mutex_lock(&adap->dev_data->reply_mutex);
272
273 ret = queue_reply(&staging_q, &hdr, sizeof(hdr));
274 if (!ret)
275 ret = queue_reply(&staging_q, path, path_len);
276 if (!ret)
277 ret = queue_reply(&staging_q, token, tok_len);
278 if (!ret && len > 2)
279 ret = queue_reply(&staging_q, vec[2], data_len);
280
281 if (!ret) {
282 /* success: pass reply list onto watcher */
283 list_splice_tail(&staging_q, &adap->dev_data->read_buffers);
284 wake_up(&adap->dev_data->read_waitq);
285 } else
286 queue_cleanup(&staging_q);
287
288 mutex_unlock(&adap->dev_data->reply_mutex);
289}
290
291static int xenbus_write_transaction(unsigned msg_type,
292 struct xenbus_file_priv *u)
293{
294 int rc, ret;
295 void *reply;
296 struct xenbus_transaction_holder *trans = NULL;
297 LIST_HEAD(staging_q);
298
299 if (msg_type == XS_TRANSACTION_START) {
300 trans = kmalloc(sizeof(*trans), GFP_KERNEL);
301 if (!trans) {
302 rc = -ENOMEM;
303 goto out;
304 }
305 }
306
307 reply = xenbus_dev_request_and_reply(&u->u.msg);
308 if (IS_ERR(reply)) {
309 kfree(trans);
310 rc = PTR_ERR(reply);
311 goto out;
312 }
313
314 if (msg_type == XS_TRANSACTION_START) {
315 trans->handle.id = simple_strtoul(reply, NULL, 0);
316
317 list_add(&trans->list, &u->transactions);
318 } else if (msg_type == XS_TRANSACTION_END) {
319 list_for_each_entry(trans, &u->transactions, list)
320 if (trans->handle.id == u->u.msg.tx_id)
321 break;
322 BUG_ON(&trans->list == &u->transactions);
323 list_del(&trans->list);
324
325 kfree(trans);
326 }
327
328 mutex_lock(&u->reply_mutex);
329 ret = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
330 if (!ret)
331 ret = queue_reply(&staging_q, reply, u->u.msg.len);
332 if (!ret) {
333 list_splice_tail(&staging_q, &u->read_buffers);
334 wake_up(&u->read_waitq);
335 } else {
336 queue_cleanup(&staging_q);
337 rc = ret;
338 }
339 mutex_unlock(&u->reply_mutex);
340
341 kfree(reply);
342
343out:
344 return rc;
345}
346
347static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
348{
349 struct watch_adapter *watch, *tmp_watch;
350 char *path, *token;
351 int err, rc;
352 LIST_HEAD(staging_q);
353
354 path = u->u.buffer + sizeof(u->u.msg);
355 token = memchr(path, 0, u->u.msg.len);
356 if (token == NULL) {
357 rc = -EILSEQ;
358 goto out;
359 }
360 token++;
361
362 if (msg_type == XS_WATCH) {
363 watch = alloc_watch_adapter(path, token);
364 if (watch == NULL) {
365 rc = -ENOMEM;
366 goto out;
367 }
368
369 watch->watch.callback = watch_fired;
370 watch->dev_data = u;
371
372 err = register_xenbus_watch(&watch->watch);
373 if (err) {
374 free_watch_adapter(watch);
375 rc = err;
376 goto out;
377 }
378 list_add(&watch->list, &u->watches);
379 } else {
380 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
381 if (!strcmp(watch->token, token) &&
382 !strcmp(watch->watch.node, path)) {
383 unregister_xenbus_watch(&watch->watch);
384 list_del(&watch->list);
385 free_watch_adapter(watch);
386 break;
387 }
388 }
389 }
390
391 /* Success. Synthesize a reply to say all is OK. */
392 {
393 struct {
394 struct xsd_sockmsg hdr;
395 char body[3];
396 } __packed reply = {
397 {
398 .type = msg_type,
399 .len = sizeof(reply.body)
400 },
401 "OK"
402 };
403
404 mutex_lock(&u->reply_mutex);
405 rc = queue_reply(&u->read_buffers, &reply, sizeof(reply));
406 mutex_unlock(&u->reply_mutex);
407 }
408
409out:
410 return rc;
411}
412
413static ssize_t xenbus_file_write(struct file *filp,
414 const char __user *ubuf,
415 size_t len, loff_t *ppos)
416{
417 struct xenbus_file_priv *u = filp->private_data;
418 uint32_t msg_type;
419 int rc = len;
420 int ret;
421 LIST_HEAD(staging_q);
422
423 /*
424 * We're expecting usermode to be writing properly formed
425 * xenbus messages. If they write an incomplete message we
426 * buffer it up. Once it is complete, we act on it.
427 */
428
429 /*
430 * Make sure concurrent writers can't stomp all over each
431 * other's messages and make a mess of our partial message
432 * buffer. We don't make any attemppt to stop multiple
433 * writers from making a mess of each other's incomplete
434 * messages; we're just trying to guarantee our own internal
435 * consistency and make sure that single writes are handled
436 * atomically.
437 */
438 mutex_lock(&u->msgbuffer_mutex);
439
440 /* Get this out of the way early to avoid confusion */
441 if (len == 0)
442 goto out;
443
444 /* Can't write a xenbus message larger we can buffer */
445 if ((len + u->len) > sizeof(u->u.buffer)) {
446 /* On error, dump existing buffer */
447 u->len = 0;
448 rc = -EINVAL;
449 goto out;
450 }
451
452 ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
453
454 if (ret == len) {
455 rc = -EFAULT;
456 goto out;
457 }
458
459 /* Deal with a partial copy. */
460 len -= ret;
461 rc = len;
462
463 u->len += len;
464
465 /* Return if we haven't got a full message yet */
466 if (u->len < sizeof(u->u.msg))
467 goto out; /* not even the header yet */
468
469 /* If we're expecting a message that's larger than we can
470 possibly send, dump what we have and return an error. */
471 if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) {
472 rc = -E2BIG;
473 u->len = 0;
474 goto out;
475 }
476
477 if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
478 goto out; /* incomplete data portion */
479
480 /*
481 * OK, now we have a complete message. Do something with it.
482 */
483
484 msg_type = u->u.msg.type;
485
486 switch (msg_type) {
487 case XS_TRANSACTION_START:
488 case XS_TRANSACTION_END:
489 case XS_DIRECTORY:
490 case XS_READ:
491 case XS_GET_PERMS:
492 case XS_RELEASE:
493 case XS_GET_DOMAIN_PATH:
494 case XS_WRITE:
495 case XS_MKDIR:
496 case XS_RM:
497 case XS_SET_PERMS:
498 /* Send out a transaction */
499 ret = xenbus_write_transaction(msg_type, u);
500 break;
501
502 case XS_WATCH:
503 case XS_UNWATCH:
504 /* (Un)Ask for some path to be watched for changes */
505 ret = xenbus_write_watch(msg_type, u);
506 break;
507
508 default:
509 ret = -EINVAL;
510 break;
511 }
512 if (ret != 0)
513 rc = ret;
514
515 /* Buffered message consumed */
516 u->len = 0;
517
518 out:
519 mutex_unlock(&u->msgbuffer_mutex);
520 return rc;
521}
522
523static int xenbus_file_open(struct inode *inode, struct file *filp)
524{
525 struct xenbus_file_priv *u;
526
527 if (xen_store_evtchn == 0)
528 return -ENOENT;
529
530 nonseekable_open(inode, filp);
531
532 u = kzalloc(sizeof(*u), GFP_KERNEL);
533 if (u == NULL)
534 return -ENOMEM;
535
536 INIT_LIST_HEAD(&u->transactions);
537 INIT_LIST_HEAD(&u->watches);
538 INIT_LIST_HEAD(&u->read_buffers);
539 init_waitqueue_head(&u->read_waitq);
540
541 mutex_init(&u->reply_mutex);
542 mutex_init(&u->msgbuffer_mutex);
543
544 filp->private_data = u;
545
546 return 0;
547}
548
549static int xenbus_file_release(struct inode *inode, struct file *filp)
550{
551 struct xenbus_file_priv *u = filp->private_data;
552 struct xenbus_transaction_holder *trans, *tmp;
553 struct watch_adapter *watch, *tmp_watch;
554
555 /*
556 * No need for locking here because there are no other users,
557 * by definition.
558 */
559
560 list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
561 xenbus_transaction_end(trans->handle, 1);
562 list_del(&trans->list);
563 kfree(trans);
564 }
565
566 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
567 unregister_xenbus_watch(&watch->watch);
568 list_del(&watch->list);
569 free_watch_adapter(watch);
570 }
571
572 kfree(u);
573
574 return 0;
575}
576
577static unsigned int xenbus_file_poll(struct file *file, poll_table *wait)
578{
579 struct xenbus_file_priv *u = file->private_data;
580
581 poll_wait(file, &u->read_waitq, wait);
582 if (!list_empty(&u->read_buffers))
583 return POLLIN | POLLRDNORM;
584 return 0;
585}
586
587const struct file_operations xenbus_file_ops = {
588 .read = xenbus_file_read,
589 .write = xenbus_file_write,
590 .open = xenbus_file_open,
591 .release = xenbus_file_release,
592 .poll = xenbus_file_poll,
593};
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
new file mode 100644
index 000000000000..51f08b2d0bf1
--- /dev/null
+++ b/drivers/xen/xenfs/xenfs.h
@@ -0,0 +1,6 @@
1#ifndef _XENFS_XENBUS_H
2#define _XENFS_XENBUS_H
3
4extern const struct file_operations xenbus_file_ops;
5
6#endif /* _XENFS_XENBUS_H */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c41fa2af7677..e3ff2b9e602f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -152,8 +152,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
152 elf_addr_t __user *sp; 152 elf_addr_t __user *sp;
153 elf_addr_t __user *u_platform; 153 elf_addr_t __user *u_platform;
154 elf_addr_t __user *u_base_platform; 154 elf_addr_t __user *u_base_platform;
155 elf_addr_t __user *u_rand_bytes;
155 const char *k_platform = ELF_PLATFORM; 156 const char *k_platform = ELF_PLATFORM;
156 const char *k_base_platform = ELF_BASE_PLATFORM; 157 const char *k_base_platform = ELF_BASE_PLATFORM;
158 unsigned char k_rand_bytes[16];
157 int items; 159 int items;
158 elf_addr_t *elf_info; 160 elf_addr_t *elf_info;
159 int ei_index = 0; 161 int ei_index = 0;
@@ -196,6 +198,15 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
196 return -EFAULT; 198 return -EFAULT;
197 } 199 }
198 200
201 /*
202 * Generate 16 random bytes for userspace PRNG seeding.
203 */
204 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205 u_rand_bytes = (elf_addr_t __user *)
206 STACK_ALLOC(p, sizeof(k_rand_bytes));
207 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208 return -EFAULT;
209
199 /* Create the ELF interpreter info */ 210 /* Create the ELF interpreter info */
200 elf_info = (elf_addr_t *)current->mm->saved_auxv; 211 elf_info = (elf_addr_t *)current->mm->saved_auxv;
201 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 212 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -228,6 +239,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
228 NEW_AUX_ENT(AT_GID, cred->gid); 239 NEW_AUX_ENT(AT_GID, cred->gid);
229 NEW_AUX_ENT(AT_EGID, cred->egid); 240 NEW_AUX_ENT(AT_EGID, cred->egid);
230 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); 241 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
231 NEW_AUX_ENT(AT_EXECFN, bprm->exec); 243 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
232 if (k_platform) { 244 if (k_platform) {
233 NEW_AUX_ENT(AT_PLATFORM, 245 NEW_AUX_ENT(AT_PLATFORM,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b957717e25ab..8ebbfdf708c2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1005,6 +1005,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1005 } 1005 }
1006 1006
1007 lock_kernel(); 1007 lock_kernel();
1008 restart:
1008 1009
1009 ret = -ENXIO; 1010 ret = -ENXIO;
1010 disk = get_gendisk(bdev->bd_dev, &partno); 1011 disk = get_gendisk(bdev->bd_dev, &partno);
@@ -1025,6 +1026,19 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1025 1026
1026 if (disk->fops->open) { 1027 if (disk->fops->open) {
1027 ret = disk->fops->open(bdev, mode); 1028 ret = disk->fops->open(bdev, mode);
1029 if (ret == -ERESTARTSYS) {
1030 /* Lost a race with 'disk' being
1031 * deleted, try again.
1032 * See md.c
1033 */
1034 disk_put_part(bdev->bd_part);
1035 bdev->bd_part = NULL;
1036 module_put(disk->fops->owner);
1037 put_disk(disk);
1038 bdev->bd_disk = NULL;
1039 mutex_unlock(&bdev->bd_mutex);
1040 goto restart;
1041 }
1028 if (ret) 1042 if (ret)
1029 goto out_clear; 1043 goto out_clear;
1030 } 1044 }
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index 81b7771c6465..43c96ce29614 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -11,7 +11,9 @@
11 11
12#include "coda_int.h" 12#include "coda_int.h"
13 13
14#ifdef CONFIG_SYSCTL
14static struct ctl_table_header *fs_table_header; 15static struct ctl_table_header *fs_table_header;
16#endif
15 17
16static ctl_table coda_table[] = { 18static ctl_table coda_table[] = {
17 { 19 {
@@ -41,6 +43,7 @@ static ctl_table coda_table[] = {
41 {} 43 {}
42}; 44};
43 45
46#ifdef CONFIG_SYSCTL
44static ctl_table fs_table[] = { 47static ctl_table fs_table[] = {
45 { 48 {
46 .ctl_name = CTL_UNNUMBERED, 49 .ctl_name = CTL_UNNUMBERED,
@@ -50,7 +53,7 @@ static ctl_table fs_table[] = {
50 }, 53 },
51 {} 54 {}
52}; 55};
53 56#endif
54 57
55void coda_sysctl_init(void) 58void coda_sysctl_init(void)
56{ 59{
diff --git a/fs/dcache.c b/fs/dcache.c
index e88c23b85a32..4547f66884a0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1567,10 +1567,6 @@ void d_rehash(struct dentry * entry)
1567 spin_unlock(&dcache_lock); 1567 spin_unlock(&dcache_lock);
1568} 1568}
1569 1569
1570#define do_switch(x,y) do { \
1571 __typeof__ (x) __tmp = x; \
1572 x = y; y = __tmp; } while (0)
1573
1574/* 1570/*
1575 * When switching names, the actual string doesn't strictly have to 1571 * When switching names, the actual string doesn't strictly have to
1576 * be preserved in the target - because we're dropping the target 1572 * be preserved in the target - because we're dropping the target
@@ -1589,7 +1585,7 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1589 /* 1585 /*
1590 * Both external: swap the pointers 1586 * Both external: swap the pointers
1591 */ 1587 */
1592 do_switch(target->d_name.name, dentry->d_name.name); 1588 swap(target->d_name.name, dentry->d_name.name);
1593 } else { 1589 } else {
1594 /* 1590 /*
1595 * dentry:internal, target:external. Steal target's 1591 * dentry:internal, target:external. Steal target's
@@ -1620,7 +1616,7 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1620 return; 1616 return;
1621 } 1617 }
1622 } 1618 }
1623 do_switch(dentry->d_name.len, target->d_name.len); 1619 swap(dentry->d_name.len, target->d_name.len);
1624} 1620}
1625 1621
1626/* 1622/*
@@ -1680,7 +1676,7 @@ already_unhashed:
1680 1676
1681 /* Switch the names.. */ 1677 /* Switch the names.. */
1682 switch_names(dentry, target); 1678 switch_names(dentry, target);
1683 do_switch(dentry->d_name.hash, target->d_name.hash); 1679 swap(dentry->d_name.hash, target->d_name.hash);
1684 1680
1685 /* ... and switch the parents */ 1681 /* ... and switch the parents */
1686 if (IS_ROOT(dentry)) { 1682 if (IS_ROOT(dentry)) {
@@ -1688,7 +1684,7 @@ already_unhashed:
1688 target->d_parent = target; 1684 target->d_parent = target;
1689 INIT_LIST_HEAD(&target->d_u.d_child); 1685 INIT_LIST_HEAD(&target->d_u.d_child);
1690 } else { 1686 } else {
1691 do_switch(dentry->d_parent, target->d_parent); 1687 swap(dentry->d_parent, target->d_parent);
1692 1688
1693 /* And add them back to the (new) parent lists */ 1689 /* And add them back to the (new) parent lists */
1694 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 1690 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
@@ -1789,7 +1785,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1789 struct dentry *dparent, *aparent; 1785 struct dentry *dparent, *aparent;
1790 1786
1791 switch_names(dentry, anon); 1787 switch_names(dentry, anon);
1792 do_switch(dentry->d_name.hash, anon->d_name.hash); 1788 swap(dentry->d_name.hash, anon->d_name.hash);
1793 1789
1794 dparent = dentry->d_parent; 1790 dparent = dentry->d_parent;
1795 aparent = anon->d_parent; 1791 aparent = anon->d_parent;
diff --git a/fs/dquot.c b/fs/dquot.c
index 61bfff64e5af..48c0571f831d 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -2090,10 +2090,12 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
2090 } 2090 }
2091 if (di->dqb_valid & QIF_BTIME) { 2091 if (di->dqb_valid & QIF_BTIME) {
2092 dm->dqb_btime = di->dqb_btime; 2092 dm->dqb_btime = di->dqb_btime;
2093 check_blim = 1;
2093 __set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); 2094 __set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
2094 } 2095 }
2095 if (di->dqb_valid & QIF_ITIME) { 2096 if (di->dqb_valid & QIF_ITIME) {
2096 dm->dqb_itime = di->dqb_itime; 2097 dm->dqb_itime = di->dqb_itime;
2098 check_ilim = 1;
2097 __set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); 2099 __set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
2098 } 2100 }
2099 2101
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index c454d5db28a5..66321a877e74 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -565,12 +565,8 @@ got:
565 inode->i_blocks = 0; 565 inode->i_blocks = 0;
566 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 566 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
567 memset(ei->i_data, 0, sizeof(ei->i_data)); 567 memset(ei->i_data, 0, sizeof(ei->i_data));
568 ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; 568 ei->i_flags =
569 if (S_ISLNK(mode)) 569 ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
570 ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
571 /* dirsync is only applied to directories */
572 if (!S_ISDIR(mode))
573 ei->i_flags &= ~EXT2_DIRSYNC_FL;
574 ei->i_faddr = 0; 570 ei->i_faddr = 0;
575 ei->i_frag_no = 0; 571 ei->i_frag_no = 0;
576 ei->i_frag_size = 0; 572 ei->i_frag_size = 0;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 02b39a5deb74..23fff2f87783 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -498,8 +498,6 @@ static int ext2_alloc_branch(struct inode *inode,
498 * ext2_splice_branch - splice the allocated branch onto inode. 498 * ext2_splice_branch - splice the allocated branch onto inode.
499 * @inode: owner 499 * @inode: owner
500 * @block: (logical) number of block we are adding 500 * @block: (logical) number of block we are adding
501 * @chain: chain of indirect blocks (with a missing link - see
502 * ext2_alloc_branch)
503 * @where: location of missing link 501 * @where: location of missing link
504 * @num: number of indirect blocks we are adding 502 * @num: number of indirect blocks we are adding
505 * @blks: number of direct blocks we are adding 503 * @blks: number of direct blocks we are adding
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index de876fa793e1..7cb4badef927 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -50,8 +50,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
50 goto setflags_out; 50 goto setflags_out;
51 } 51 }
52 52
53 if (!S_ISDIR(inode->i_mode)) 53 flags = ext2_mask_flags(inode->i_mode, flags);
54 flags &= ~EXT2_DIRSYNC_FL;
55 54
56 mutex_lock(&inode->i_mutex); 55 mutex_lock(&inode->i_mutex);
57 /* Is it quota file? Do not allow user to mess with it */ 56 /* Is it quota file? Do not allow user to mess with it */
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 647cd888ac87..da8bdeaa2e6d 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -132,6 +132,7 @@ static void ext2_put_super (struct super_block * sb)
132 percpu_counter_destroy(&sbi->s_dirs_counter); 132 percpu_counter_destroy(&sbi->s_dirs_counter);
133 brelse (sbi->s_sbh); 133 brelse (sbi->s_sbh);
134 sb->s_fs_info = NULL; 134 sb->s_fs_info = NULL;
135 kfree(sbi->s_blockgroup_lock);
135 kfree(sbi); 136 kfree(sbi);
136 137
137 return; 138 return;
@@ -756,6 +757,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
756 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 757 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
757 if (!sbi) 758 if (!sbi)
758 return -ENOMEM; 759 return -ENOMEM;
760
761 sbi->s_blockgroup_lock =
762 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
763 if (!sbi->s_blockgroup_lock) {
764 kfree(sbi);
765 return -ENOMEM;
766 }
759 sb->s_fs_info = sbi; 767 sb->s_fs_info = sbi;
760 sbi->s_sb_block = sb_block; 768 sbi->s_sb_block = sb_block;
761 769
@@ -983,7 +991,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
983 printk ("EXT2-fs: not enough memory\n"); 991 printk ("EXT2-fs: not enough memory\n");
984 goto failed_mount; 992 goto failed_mount;
985 } 993 }
986 bgl_lock_init(&sbi->s_blockgroup_lock); 994 bgl_lock_init(sbi->s_blockgroup_lock);
987 sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL); 995 sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL);
988 if (!sbi->s_debts) { 996 if (!sbi->s_debts) {
989 printk ("EXT2-fs: not enough memory\n"); 997 printk ("EXT2-fs: not enough memory\n");
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 5655fbcbd11f..8de6c720e510 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -559,12 +559,8 @@ got:
559 ei->i_dir_start_lookup = 0; 559 ei->i_dir_start_lookup = 0;
560 ei->i_disksize = 0; 560 ei->i_disksize = 0;
561 561
562 ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; 562 ei->i_flags =
563 if (S_ISLNK(mode)) 563 ext3_mask_flags(mode, EXT3_I(dir)->i_flags & EXT3_FL_INHERITED);
564 ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
565 /* dirsync only applies to directories */
566 if (!S_ISDIR(mode))
567 ei->i_flags &= ~EXT3_DIRSYNC_FL;
568#ifdef EXT3_FRAGMENTS 564#ifdef EXT3_FRAGMENTS
569 ei->i_faddr = 0; 565 ei->i_faddr = 0;
570 ei->i_frag_no = 0; 566 ei->i_frag_no = 0;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index b7394d05ee8e..5e86ce9a86e0 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -53,8 +53,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
53 goto flags_out; 53 goto flags_out;
54 } 54 }
55 55
56 if (!S_ISDIR(inode->i_mode)) 56 flags = ext3_mask_flags(inode->i_mode, flags);
57 flags &= ~EXT3_DIRSYNC_FL;
58 57
59 mutex_lock(&inode->i_mutex); 58 mutex_lock(&inode->i_mutex);
60 /* Is it quota file? Do not allow user to mess with it */ 59 /* Is it quota file? Do not allow user to mess with it */
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1dd2abe6313e..8d6f965e502c 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -74,10 +74,6 @@ static struct buffer_head *ext3_append(handle_t *handle,
74#define assert(test) J_ASSERT(test) 74#define assert(test) J_ASSERT(test)
75#endif 75#endif
76 76
77#ifndef swap
78#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
79#endif
80
81#ifdef DX_DEBUG 77#ifdef DX_DEBUG
82#define dxtrace(command) command 78#define dxtrace(command) command
83#else 79#else
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index c22d01467bd1..01c235bc2054 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -439,6 +439,7 @@ static void ext3_put_super (struct super_block * sb)
439 ext3_blkdev_remove(sbi); 439 ext3_blkdev_remove(sbi);
440 } 440 }
441 sb->s_fs_info = NULL; 441 sb->s_fs_info = NULL;
442 kfree(sbi->s_blockgroup_lock);
442 kfree(sbi); 443 kfree(sbi);
443 return; 444 return;
444} 445}
@@ -1546,6 +1547,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1546 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1547 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1547 if (!sbi) 1548 if (!sbi)
1548 return -ENOMEM; 1549 return -ENOMEM;
1550
1551 sbi->s_blockgroup_lock =
1552 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
1553 if (!sbi->s_blockgroup_lock) {
1554 kfree(sbi);
1555 return -ENOMEM;
1556 }
1549 sb->s_fs_info = sbi; 1557 sb->s_fs_info = sbi;
1550 sbi->s_mount_opt = 0; 1558 sbi->s_mount_opt = 0;
1551 sbi->s_resuid = EXT3_DEF_RESUID; 1559 sbi->s_resuid = EXT3_DEF_RESUID;
@@ -1786,7 +1794,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1786 goto failed_mount; 1794 goto failed_mount;
1787 } 1795 }
1788 1796
1789 bgl_lock_init(&sbi->s_blockgroup_lock); 1797 bgl_lock_init(sbi->s_blockgroup_lock);
1790 1798
1791 for (i = 0; i < db_count; i++) { 1799 for (i = 0; i < db_count; i++) {
1792 block = descriptor_loc(sb, logic_sb_block, i); 1800 block = descriptor_loc(sb, logic_sb_block, i);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ea2ce3c0ae66..3f54db31cdc2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2536,7 +2536,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2536 */ 2536 */
2537 newdepth = ext_depth(inode); 2537 newdepth = ext_depth(inode);
2538 /* 2538 /*
2539 * update the extent length after successfull insert of the 2539 * update the extent length after successful insert of the
2540 * split extent 2540 * split extent
2541 */ 2541 */
2542 orig_ex.ee_len = cpu_to_le16(ee_len - 2542 orig_ex.ee_len = cpu_to_le16(ee_len -
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 9fd2a5e1be4d..4b8d431d7dff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -74,10 +74,6 @@ static struct buffer_head *ext4_append(handle_t *handle,
74#define assert(test) J_ASSERT(test) 74#define assert(test) J_ASSERT(test)
75#endif 75#endif
76 76
77#ifndef swap
78#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
79#endif
80
81#ifdef DX_DEBUG 77#ifdef DX_DEBUG
82#define dxtrace(command) command 78#define dxtrace(command) command
83#else 79#else
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 25719d902c51..3fbffb1ea714 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -306,6 +306,8 @@ void journal_commit_transaction(journal_t *journal)
306 int flags; 306 int flags;
307 int err; 307 int err;
308 unsigned long blocknr; 308 unsigned long blocknr;
309 ktime_t start_time;
310 u64 commit_time;
309 char *tagp = NULL; 311 char *tagp = NULL;
310 journal_header_t *header; 312 journal_header_t *header;
311 journal_block_tag_t *tag = NULL; 313 journal_block_tag_t *tag = NULL;
@@ -418,6 +420,7 @@ void journal_commit_transaction(journal_t *journal)
418 commit_transaction->t_state = T_FLUSH; 420 commit_transaction->t_state = T_FLUSH;
419 journal->j_committing_transaction = commit_transaction; 421 journal->j_committing_transaction = commit_transaction;
420 journal->j_running_transaction = NULL; 422 journal->j_running_transaction = NULL;
423 start_time = ktime_get();
421 commit_transaction->t_log_start = journal->j_head; 424 commit_transaction->t_log_start = journal->j_head;
422 wake_up(&journal->j_wait_transaction_locked); 425 wake_up(&journal->j_wait_transaction_locked);
423 spin_unlock(&journal->j_state_lock); 426 spin_unlock(&journal->j_state_lock);
@@ -913,6 +916,18 @@ restart_loop:
913 J_ASSERT(commit_transaction == journal->j_committing_transaction); 916 J_ASSERT(commit_transaction == journal->j_committing_transaction);
914 journal->j_commit_sequence = commit_transaction->t_tid; 917 journal->j_commit_sequence = commit_transaction->t_tid;
915 journal->j_committing_transaction = NULL; 918 journal->j_committing_transaction = NULL;
919 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
920
921 /*
922 * weight the commit time higher than the average time so we don't
923 * react too strongly to vast changes in commit time
924 */
925 if (likely(journal->j_average_commit_time))
926 journal->j_average_commit_time = (commit_time*3 +
927 journal->j_average_commit_time) / 4;
928 else
929 journal->j_average_commit_time = commit_time;
930
916 spin_unlock(&journal->j_state_lock); 931 spin_unlock(&journal->j_state_lock);
917 932
918 if (commit_transaction->t_checkpoint_list == NULL && 933 if (commit_transaction->t_checkpoint_list == NULL &&
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 60d4c32c8808..e6a117431277 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -25,6 +25,7 @@
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h>
28 29
29static void __journal_temp_unlink_buffer(struct journal_head *jh); 30static void __journal_temp_unlink_buffer(struct journal_head *jh);
30 31
@@ -49,6 +50,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
49{ 50{
50 transaction->t_journal = journal; 51 transaction->t_journal = journal;
51 transaction->t_state = T_RUNNING; 52 transaction->t_state = T_RUNNING;
53 transaction->t_start_time = ktime_get();
52 transaction->t_tid = journal->j_transaction_sequence++; 54 transaction->t_tid = journal->j_transaction_sequence++;
53 transaction->t_expires = jiffies + journal->j_commit_interval; 55 transaction->t_expires = jiffies + journal->j_commit_interval;
54 spin_lock_init(&transaction->t_handle_lock); 56 spin_lock_init(&transaction->t_handle_lock);
@@ -752,7 +754,6 @@ out:
752 * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. 754 * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
753 * @handle: transaction to add buffer modifications to 755 * @handle: transaction to add buffer modifications to
754 * @bh: bh to be used for metadata writes 756 * @bh: bh to be used for metadata writes
755 * @credits: variable that will receive credits for the buffer
756 * 757 *
757 * Returns an error code or 0 on success. 758 * Returns an error code or 0 on success.
758 * 759 *
@@ -1370,7 +1371,7 @@ int journal_stop(handle_t *handle)
1370{ 1371{
1371 transaction_t *transaction = handle->h_transaction; 1372 transaction_t *transaction = handle->h_transaction;
1372 journal_t *journal = transaction->t_journal; 1373 journal_t *journal = transaction->t_journal;
1373 int old_handle_count, err; 1374 int err;
1374 pid_t pid; 1375 pid_t pid;
1375 1376
1376 J_ASSERT(journal_current_handle() == handle); 1377 J_ASSERT(journal_current_handle() == handle);
@@ -1399,6 +1400,17 @@ int journal_stop(handle_t *handle)
1399 * on IO anyway. Speeds up many-threaded, many-dir operations 1400 * on IO anyway. Speeds up many-threaded, many-dir operations
1400 * by 30x or more... 1401 * by 30x or more...
1401 * 1402 *
1403 * We try and optimize the sleep time against what the underlying disk
1404 * can do, instead of having a static sleep time. This is usefull for
1405 * the case where our storage is so fast that it is more optimal to go
1406 * ahead and force a flush and wait for the transaction to be committed
1407 * than it is to wait for an arbitrary amount of time for new writers to
1408 * join the transaction. We acheive this by measuring how long it takes
1409 * to commit a transaction, and compare it with how long this
1410 * transaction has been running, and if run time < commit time then we
1411 * sleep for the delta and commit. This greatly helps super fast disks
1412 * that would see slowdowns as more threads started doing fsyncs.
1413 *
1402 * But don't do this if this process was the most recent one to 1414 * But don't do this if this process was the most recent one to
1403 * perform a synchronous write. We do this to detect the case where a 1415 * perform a synchronous write. We do this to detect the case where a
1404 * single process is doing a stream of sync writes. No point in waiting 1416 * single process is doing a stream of sync writes. No point in waiting
@@ -1406,11 +1418,26 @@ int journal_stop(handle_t *handle)
1406 */ 1418 */
1407 pid = current->pid; 1419 pid = current->pid;
1408 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1420 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1421 u64 commit_time, trans_time;
1422
1409 journal->j_last_sync_writer = pid; 1423 journal->j_last_sync_writer = pid;
1410 do { 1424
1411 old_handle_count = transaction->t_handle_count; 1425 spin_lock(&journal->j_state_lock);
1412 schedule_timeout_uninterruptible(1); 1426 commit_time = journal->j_average_commit_time;
1413 } while (old_handle_count != transaction->t_handle_count); 1427 spin_unlock(&journal->j_state_lock);
1428
1429 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1430 transaction->t_start_time));
1431
1432 commit_time = min_t(u64, commit_time,
1433 1000*jiffies_to_usecs(1));
1434
1435 if (trans_time < commit_time) {
1436 ktime_t expires = ktime_add_ns(ktime_get(),
1437 commit_time);
1438 set_current_state(TASK_UNINTERRUPTIBLE);
1439 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1440 }
1414 } 1441 }
1415 1442
1416 current->journal_info = NULL; 1443 current->journal_info = NULL;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 54ff4c77aaa3..d861096c9d81 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3868,7 +3868,7 @@ static void ocfs2_split_record(struct inode *inode,
3868 struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el; 3868 struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
3869 struct ocfs2_extent_rec *rec, *tmprec; 3869 struct ocfs2_extent_rec *rec, *tmprec;
3870 3870
3871 right_el = path_leaf_el(right_path);; 3871 right_el = path_leaf_el(right_path);
3872 if (left_path) 3872 if (left_path)
3873 left_el = path_leaf_el(left_path); 3873 left_el = path_leaf_el(left_path);
3874 3874
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f731ab491795..b0c4cadd4c45 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1324,7 +1324,7 @@ again:
1324 goto out; 1324 goto out;
1325 } 1325 }
1326 1326
1327 mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", 1327 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1328 lockres->l_name); 1328 lockres->l_name);
1329 1329
1330 /* At this point we've gone inside the dlm and need to 1330 /* At this point we've gone inside the dlm and need to
@@ -2951,7 +2951,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2951 ocfs2_dlm_dump_lksb(&lockres->l_lksb); 2951 ocfs2_dlm_dump_lksb(&lockres->l_lksb);
2952 BUG(); 2952 BUG();
2953 } 2953 }
2954 mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", 2954 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
2955 lockres->l_name); 2955 lockres->l_name);
2956 2956
2957 ocfs2_wait_on_busy_lock(lockres); 2957 ocfs2_wait_on_busy_lock(lockres);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e8f795f978aa..a5887df2cd8a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1605,7 +1605,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1605 struct ocfs2_space_resv *sr) 1605 struct ocfs2_space_resv *sr)
1606{ 1606{
1607 struct inode *inode = file->f_path.dentry->d_inode; 1607 struct inode *inode = file->f_path.dentry->d_inode;
1608 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);; 1608 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1609 1609
1610 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && 1610 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
1611 !ocfs2_writes_unwritten_extents(osb)) 1611 !ocfs2_writes_unwritten_extents(osb))
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 03ec59504906..5edcc3f92ba7 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -47,8 +47,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
47 47
48 offset = (unsigned long)(*ppos % PAGE_SIZE); 48 offset = (unsigned long)(*ppos % PAGE_SIZE);
49 pfn = (unsigned long)(*ppos / PAGE_SIZE); 49 pfn = (unsigned long)(*ppos / PAGE_SIZE);
50 if (pfn > saved_max_pfn)
51 return -EINVAL;
52 50
53 do { 51 do {
54 if (count > (PAGE_SIZE - offset)) 52 if (count > (PAGE_SIZE - offset))
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c97d4c931715..98a232f7196b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -490,7 +490,7 @@ static mode_t romfs_modemap[] =
490static struct inode * 490static struct inode *
491romfs_iget(struct super_block *sb, unsigned long ino) 491romfs_iget(struct super_block *sb, unsigned long ino)
492{ 492{
493 int nextfh; 493 int nextfh, ret;
494 struct romfs_inode ri; 494 struct romfs_inode ri;
495 struct inode *i; 495 struct inode *i;
496 496
@@ -526,11 +526,11 @@ romfs_iget(struct super_block *sb, unsigned long ino)
526 i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; 526 i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
527 527
528 /* Precalculate the data offset */ 528 /* Precalculate the data offset */
529 ino = romfs_strnlen(i, ino+ROMFH_SIZE, ROMFS_MAXFN); 529 ret = romfs_strnlen(i, ino + ROMFH_SIZE, ROMFS_MAXFN);
530 if (ino >= 0) 530 if (ret >= 0)
531 ino = ((ROMFH_SIZE+ino+1+ROMFH_PAD)&ROMFH_MASK); 531 ino = (ROMFH_SIZE + ret + 1 + ROMFH_PAD) & ROMFH_MASK;
532 else 532 else
533 ino = 0; 533 ino = 0;
534 534
535 ROMFS_I(i)->i_metasize = ino; 535 ROMFS_I(i)->i_metasize = ino;
536 ROMFS_I(i)->i_dataoffset = ino+(i->i_ino&ROMFH_MASK); 536 ROMFS_I(i)->i_dataoffset = ino+(i->i_ino&ROMFH_MASK);
diff --git a/fs/splice.c b/fs/splice.c
index 1abab5cee4ba..a54b3e3f10a7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -21,6 +21,7 @@
21#include <linux/file.h> 21#include <linux/file.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/splice.h> 23#include <linux/splice.h>
24#include <linux/memcontrol.h>
24#include <linux/mm_inline.h> 25#include <linux/mm_inline.h>
25#include <linux/swap.h> 26#include <linux/swap.h>
26#include <linux/writeback.h> 27#include <linux/writeback.h>
diff --git a/fs/super.c b/fs/super.c
index cb20744ec789..7d67387496cb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -458,7 +458,6 @@ void sync_filesystems(int wait)
458 if (sb->s_flags & MS_RDONLY) 458 if (sb->s_flags & MS_RDONLY)
459 continue; 459 continue;
460 sb->s_need_sync_fs = 1; 460 sb->s_need_sync_fs = 1;
461 async_synchronize_full_special(&sb->s_async_list);
462 } 461 }
463 462
464restart: 463restart:
@@ -471,6 +470,7 @@ restart:
471 sb->s_count++; 470 sb->s_count++;
472 spin_unlock(&sb_lock); 471 spin_unlock(&sb_lock);
473 down_read(&sb->s_umount); 472 down_read(&sb->s_umount);
473 async_synchronize_full_special(&sb->s_async_list);
474 if (sb->s_root && (wait || sb->s_dirt)) 474 if (sb->s_root && (wait || sb->s_dirt))
475 sb->s_op->sync_fs(sb, wait); 475 sb->s_op->sync_fs(sb, wait);
476 up_read(&sb->s_umount); 476 up_read(&sb->s_umount);
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index d7afa9dd6635..f3b5d4e3a2ac 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -23,16 +23,16 @@
23#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ 23#define AT_PLATFORM 15 /* string identifying CPU for optimizations */
24#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ 24#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
25#define AT_CLKTCK 17 /* frequency at which times() increments */ 25#define AT_CLKTCK 17 /* frequency at which times() increments */
26 26/* AT_* values 18 through 22 are reserved */
27#define AT_SECURE 23 /* secure mode boolean */ 27#define AT_SECURE 23 /* secure mode boolean */
28
29#define AT_BASE_PLATFORM 24 /* string identifying real platform, may 28#define AT_BASE_PLATFORM 24 /* string identifying real platform, may
30 * differ from AT_PLATFORM. */ 29 * differ from AT_PLATFORM. */
30#define AT_RANDOM 25 /* address of 16 random bytes */
31 31
32#define AT_EXECFN 31 /* filename of program */ 32#define AT_EXECFN 31 /* filename of program */
33 33
34#ifdef __KERNEL__ 34#ifdef __KERNEL__
35#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */ 35#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */
36 /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ 36 /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
37#endif 37#endif
38 38
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 08b78c09b09a..e267e62827bb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -52,9 +52,9 @@ struct cgroup_subsys_state {
52 * hierarchy structure */ 52 * hierarchy structure */
53 struct cgroup *cgroup; 53 struct cgroup *cgroup;
54 54
55 /* State maintained by the cgroup system to allow 55 /* State maintained by the cgroup system to allow subsystems
56 * subsystems to be "busy". Should be accessed via css_get() 56 * to be "busy". Should be accessed via css_get(),
57 * and css_put() */ 57 * css_tryget() and and css_put(). */
58 58
59 atomic_t refcnt; 59 atomic_t refcnt;
60 60
@@ -64,11 +64,14 @@ struct cgroup_subsys_state {
64/* bits in struct cgroup_subsys_state flags field */ 64/* bits in struct cgroup_subsys_state flags field */
65enum { 65enum {
66 CSS_ROOT, /* This CSS is the root of the subsystem */ 66 CSS_ROOT, /* This CSS is the root of the subsystem */
67 CSS_REMOVED, /* This CSS is dead */
67}; 68};
68 69
69/* 70/*
70 * Call css_get() to hold a reference on the cgroup; 71 * Call css_get() to hold a reference on the css; it can be used
71 * 72 * for a reference obtained via:
73 * - an existing ref-counted reference to the css
74 * - task->cgroups for a locked task
72 */ 75 */
73 76
74static inline void css_get(struct cgroup_subsys_state *css) 77static inline void css_get(struct cgroup_subsys_state *css)
@@ -77,9 +80,32 @@ static inline void css_get(struct cgroup_subsys_state *css)
77 if (!test_bit(CSS_ROOT, &css->flags)) 80 if (!test_bit(CSS_ROOT, &css->flags))
78 atomic_inc(&css->refcnt); 81 atomic_inc(&css->refcnt);
79} 82}
83
84static inline bool css_is_removed(struct cgroup_subsys_state *css)
85{
86 return test_bit(CSS_REMOVED, &css->flags);
87}
88
89/*
90 * Call css_tryget() to take a reference on a css if your existing
91 * (known-valid) reference isn't already ref-counted. Returns false if
92 * the css has been destroyed.
93 */
94
95static inline bool css_tryget(struct cgroup_subsys_state *css)
96{
97 if (test_bit(CSS_ROOT, &css->flags))
98 return true;
99 while (!atomic_inc_not_zero(&css->refcnt)) {
100 if (test_bit(CSS_REMOVED, &css->flags))
101 return false;
102 }
103 return true;
104}
105
80/* 106/*
81 * css_put() should be called to release a reference taken by 107 * css_put() should be called to release a reference taken by
82 * css_get() 108 * css_get() or css_tryget()
83 */ 109 */
84 110
85extern void __css_put(struct cgroup_subsys_state *css); 111extern void __css_put(struct cgroup_subsys_state *css);
@@ -116,7 +142,7 @@ struct cgroup {
116 struct list_head children; /* my children */ 142 struct list_head children; /* my children */
117 143
118 struct cgroup *parent; /* my parent */ 144 struct cgroup *parent; /* my parent */
119 struct dentry *dentry; /* cgroup fs entry */ 145 struct dentry *dentry; /* cgroup fs entry, RCU protected */
120 146
121 /* Private pointers for each registered subsystem */ 147 /* Private pointers for each registered subsystem */
122 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 148 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
@@ -145,6 +171,9 @@ struct cgroup {
145 int pids_use_count; 171 int pids_use_count;
146 /* Length of the current tasks_pids array */ 172 /* Length of the current tasks_pids array */
147 int pids_length; 173 int pids_length;
174
175 /* For RCU-protected deletion */
176 struct rcu_head rcu_head;
148}; 177};
149 178
150/* A css_set is a structure holding pointers to a set of 179/* A css_set is a structure holding pointers to a set of
@@ -337,9 +366,23 @@ struct cgroup_subsys {
337#define MAX_CGROUP_TYPE_NAMELEN 32 366#define MAX_CGROUP_TYPE_NAMELEN 32
338 const char *name; 367 const char *name;
339 368
340 /* Protected by RCU */ 369 /*
341 struct cgroupfs_root *root; 370 * Protects sibling/children links of cgroups in this
371 * hierarchy, plus protects which hierarchy (or none) the
372 * subsystem is a part of (i.e. root/sibling). To avoid
373 * potential deadlocks, the following operations should not be
374 * undertaken while holding any hierarchy_mutex:
375 *
376 * - allocating memory
377 * - initiating hotplug events
378 */
379 struct mutex hierarchy_mutex;
342 380
381 /*
382 * Link to parent, and list entry in parent's children.
383 * Protected by this->hierarchy_mutex and cgroup_lock()
384 */
385 struct cgroupfs_root *root;
343 struct list_head sibling; 386 struct list_head sibling;
344}; 387};
345 388
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 51ea2bdea0f9..90c6074a36ca 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -20,8 +20,9 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
20extern int cpuset_init_early(void); 20extern int cpuset_init_early(void);
21extern int cpuset_init(void); 21extern int cpuset_init(void);
22extern void cpuset_init_smp(void); 22extern void cpuset_init_smp(void);
23extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask); 23extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
24extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask); 24extern void cpuset_cpus_allowed_locked(struct task_struct *p,
25 struct cpumask *mask);
25extern nodemask_t cpuset_mems_allowed(struct task_struct *p); 26extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
26#define cpuset_current_mems_allowed (current->mems_allowed) 27#define cpuset_current_mems_allowed (current->mems_allowed)
27void cpuset_init_current_mems_allowed(void); 28void cpuset_init_current_mems_allowed(void);
@@ -86,12 +87,13 @@ static inline int cpuset_init_early(void) { return 0; }
86static inline int cpuset_init(void) { return 0; } 87static inline int cpuset_init(void) { return 0; }
87static inline void cpuset_init_smp(void) {} 88static inline void cpuset_init_smp(void) {}
88 89
89static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask) 90static inline void cpuset_cpus_allowed(struct task_struct *p,
91 struct cpumask *mask)
90{ 92{
91 *mask = cpu_possible_map; 93 *mask = cpu_possible_map;
92} 94}
93static inline void cpuset_cpus_allowed_locked(struct task_struct *p, 95static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
94 cpumask_t *mask) 96 struct cpumask *mask)
95{ 97{
96 *mask = cpu_possible_map; 98 *mask = cpu_possible_map;
97} 99}
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 78c775a83f7c..121720d74e15 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -194,6 +194,30 @@ struct ext2_group_desc
194#define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ 194#define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
195#define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ 195#define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
196 196
197/* Flags that should be inherited by new inodes from their parent. */
198#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\
199 EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\
200 EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\
201 EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
202 EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)
203
204/* Flags that are appropriate for regular files (all but dir-specific ones). */
205#define EXT2_REG_FLMASK (~(EXT2_DIRSYNC_FL | EXT2_TOPDIR_FL))
206
207/* Flags that are appropriate for non-directories/regular files. */
208#define EXT2_OTHER_FLMASK (EXT2_NODUMP_FL | EXT2_NOATIME_FL)
209
210/* Mask out flags that are inappropriate for the given type of inode. */
211static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags)
212{
213 if (S_ISDIR(mode))
214 return flags;
215 else if (S_ISREG(mode))
216 return flags & EXT2_REG_FLMASK;
217 else
218 return flags & EXT2_OTHER_FLMASK;
219}
220
197/* 221/*
198 * ioctl commands 222 * ioctl commands
199 */ 223 */
diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h
index dc541f3653d1..1cdb66367c98 100644
--- a/include/linux/ext2_fs_sb.h
+++ b/include/linux/ext2_fs_sb.h
@@ -101,7 +101,7 @@ struct ext2_sb_info {
101 struct percpu_counter s_freeblocks_counter; 101 struct percpu_counter s_freeblocks_counter;
102 struct percpu_counter s_freeinodes_counter; 102 struct percpu_counter s_freeinodes_counter;
103 struct percpu_counter s_dirs_counter; 103 struct percpu_counter s_dirs_counter;
104 struct blockgroup_lock s_blockgroup_lock; 104 struct blockgroup_lock *s_blockgroup_lock;
105 /* root of the per fs reservation window tree */ 105 /* root of the per fs reservation window tree */
106 spinlock_t s_rsv_window_lock; 106 spinlock_t s_rsv_window_lock;
107 struct rb_root s_rsv_window_root; 107 struct rb_root s_rsv_window_root;
@@ -111,7 +111,7 @@ struct ext2_sb_info {
111static inline spinlock_t * 111static inline spinlock_t *
112sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group) 112sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group)
113{ 113{
114 return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); 114 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
115} 115}
116 116
117#endif /* _LINUX_EXT2_FS_SB */ 117#endif /* _LINUX_EXT2_FS_SB */
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index d14f02918483..d76800f6ecf0 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -178,6 +178,30 @@ struct ext3_group_desc
178#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ 178#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
179#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ 179#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
180 180
181/* Flags that should be inherited by new inodes from their parent. */
182#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
183 EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
184 EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
185 EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
186 EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
187
188/* Flags that are appropriate for regular files (all but dir-specific ones). */
189#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
190
191/* Flags that are appropriate for non-directories/regular files. */
192#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
193
194/* Mask out flags that are inappropriate for the given type of inode. */
195static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
196{
197 if (S_ISDIR(mode))
198 return flags;
199 else if (S_ISREG(mode))
200 return flags & EXT3_REG_FLMASK;
201 else
202 return flags & EXT3_OTHER_FLMASK;
203}
204
181/* 205/*
182 * Inode dynamic state flags 206 * Inode dynamic state flags
183 */ 207 */
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index e024e38248ff..76fdc0f4b028 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -60,7 +60,7 @@ struct ext3_sb_info {
60 struct percpu_counter s_freeblocks_counter; 60 struct percpu_counter s_freeblocks_counter;
61 struct percpu_counter s_freeinodes_counter; 61 struct percpu_counter s_freeinodes_counter;
62 struct percpu_counter s_dirs_counter; 62 struct percpu_counter s_dirs_counter;
63 struct blockgroup_lock s_blockgroup_lock; 63 struct blockgroup_lock *s_blockgroup_lock;
64 64
65 /* root of the per fs reservation window tree */ 65 /* root of the per fs reservation window tree */
66 spinlock_t s_rsv_window_lock; 66 spinlock_t s_rsv_window_lock;
@@ -86,7 +86,7 @@ struct ext3_sb_info {
86static inline spinlock_t * 86static inline spinlock_t *
87sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group) 87sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
88{ 88{
89 return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); 89 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
90} 90}
91 91
92#endif /* _LINUX_EXT3_FS_SB */ 92#endif /* _LINUX_EXT3_FS_SB */
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 346e2b80be7d..6384b19efe64 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -543,6 +543,11 @@ struct transaction_s
543 unsigned long t_expires; 543 unsigned long t_expires;
544 544
545 /* 545 /*
546 * When this transaction started, in nanoseconds [no locking]
547 */
548 ktime_t t_start_time;
549
550 /*
546 * How many handles used this transaction? [t_handle_lock] 551 * How many handles used this transaction? [t_handle_lock]
547 */ 552 */
548 int t_handle_count; 553 int t_handle_count;
@@ -798,9 +803,19 @@ struct journal_s
798 struct buffer_head **j_wbuf; 803 struct buffer_head **j_wbuf;
799 int j_wbufsize; 804 int j_wbufsize;
800 805
806 /*
807 * this is the pid of the last person to run a synchronous operation
808 * through the journal.
809 */
801 pid_t j_last_sync_writer; 810 pid_t j_last_sync_writer;
802 811
803 /* 812 /*
813 * the average amount of time in nanoseconds it takes to commit a
814 * transaction to the disk. [j_state_lock]
815 */
816 u64 j_average_commit_time;
817
818 /*
804 * An opaque pointer to fs-private information. ext3 puts its 819 * An opaque pointer to fs-private information. ext3 puts its
805 * superblock pointer here 820 * superblock pointer here
806 */ 821 */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 6b8e2027165e..343df9ef2412 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -476,6 +476,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
476 __val = __val < __min ? __min: __val; \ 476 __val = __val < __min ? __min: __val; \
477 __val > __max ? __max: __val; }) 477 __val > __max ? __max: __val; })
478 478
479
480/*
481 * swap - swap value of @a and @b
482 */
483#define swap(a, b) ({ typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; })
484
479/** 485/**
480 * container_of - cast a member of a structure out to the containing structure 486 * container_of - cast a member of a structure out to the containing structure
481 * @ptr: the pointer to the member. 487 * @ptr: the pointer to the member.
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3449de597eff..4f7c8fb4d3fe 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1518,6 +1518,7 @@ extern void sata_pmp_error_handler(struct ata_port *ap);
1518 1518
1519extern const struct ata_port_operations ata_sff_port_ops; 1519extern const struct ata_port_operations ata_sff_port_ops;
1520extern const struct ata_port_operations ata_bmdma_port_ops; 1520extern const struct ata_port_operations ata_bmdma_port_ops;
1521extern const struct ata_port_operations ata_bmdma32_port_ops;
1521 1522
1522/* PIO only, sg_tablesize and dma_boundary limits can be removed */ 1523/* PIO only, sg_tablesize and dma_boundary limits can be removed */
1523#define ATA_PIO_SHT(drv_name) \ 1524#define ATA_PIO_SHT(drv_name) \
@@ -1545,6 +1546,8 @@ extern void ata_sff_exec_command(struct ata_port *ap,
1545 const struct ata_taskfile *tf); 1546 const struct ata_taskfile *tf);
1546extern unsigned int ata_sff_data_xfer(struct ata_device *dev, 1547extern unsigned int ata_sff_data_xfer(struct ata_device *dev,
1547 unsigned char *buf, unsigned int buflen, int rw); 1548 unsigned char *buf, unsigned int buflen, int rw);
1549extern unsigned int ata_sff_data_xfer32(struct ata_device *dev,
1550 unsigned char *buf, unsigned int buflen, int rw);
1548extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, 1551extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev,
1549 unsigned char *buf, unsigned int buflen, int rw); 1552 unsigned char *buf, unsigned int buflen, int rw);
1550extern u8 ata_sff_irq_on(struct ata_port *ap); 1553extern u8 ata_sff_irq_on(struct ata_port *ap);
diff --git a/include/linux/magic.h b/include/linux/magic.h
index f7f3fdddbef0..439f6f3cb0c4 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -13,6 +13,7 @@
13#define EFS_SUPER_MAGIC 0x414A53 13#define EFS_SUPER_MAGIC 0x414A53
14#define EXT2_SUPER_MAGIC 0xEF53 14#define EXT2_SUPER_MAGIC 0xEF53
15#define EXT3_SUPER_MAGIC 0xEF53 15#define EXT3_SUPER_MAGIC 0xEF53
16#define XENFS_SUPER_MAGIC 0xabba1974
16#define EXT4_SUPER_MAGIC 0xEF53 17#define EXT4_SUPER_MAGIC 0xEF53
17#define HPFS_SUPER_MAGIC 0xf995e849 18#define HPFS_SUPER_MAGIC 0xf995e849
18#define ISOFS_SUPER_MAGIC 0x9660 19#define ISOFS_SUPER_MAGIC 0x9660
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1fbe14d39521..326f45c86530 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -19,22 +19,45 @@
19 19
20#ifndef _LINUX_MEMCONTROL_H 20#ifndef _LINUX_MEMCONTROL_H
21#define _LINUX_MEMCONTROL_H 21#define _LINUX_MEMCONTROL_H
22 22#include <linux/cgroup.h>
23struct mem_cgroup; 23struct mem_cgroup;
24struct page_cgroup; 24struct page_cgroup;
25struct page; 25struct page;
26struct mm_struct; 26struct mm_struct;
27 27
28#ifdef CONFIG_CGROUP_MEM_RES_CTLR 28#ifdef CONFIG_CGROUP_MEM_RES_CTLR
29/*
30 * All "charge" functions with gfp_mask should use GFP_KERNEL or
31 * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
32 * alloc memory but reclaims memory from all available zones. So, "where I want
33 * memory from" bits of gfp_mask has no meaning. So any bits of that field is
34 * available but adding a rule is better. charge functions' gfp_mask should
35 * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
36 * codes.
37 * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
38 */
29 39
30extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, 40extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
31 gfp_t gfp_mask); 41 gfp_t gfp_mask);
42/* for swap handling */
43extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
44 struct page *page, gfp_t mask, struct mem_cgroup **ptr);
45extern void mem_cgroup_commit_charge_swapin(struct page *page,
46 struct mem_cgroup *ptr);
47extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
48
32extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 49extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
33 gfp_t gfp_mask); 50 gfp_t gfp_mask);
34extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); 51extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
52extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
53extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
54extern void mem_cgroup_del_lru(struct page *page);
55extern void mem_cgroup_move_lists(struct page *page,
56 enum lru_list from, enum lru_list to);
35extern void mem_cgroup_uncharge_page(struct page *page); 57extern void mem_cgroup_uncharge_page(struct page *page);
36extern void mem_cgroup_uncharge_cache_page(struct page *page); 58extern void mem_cgroup_uncharge_cache_page(struct page *page);
37extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); 59extern int mem_cgroup_shrink_usage(struct page *page,
60 struct mm_struct *mm, gfp_t gfp_mask);
38 61
39extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 62extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
40 struct list_head *dst, 63 struct list_head *dst,
@@ -47,12 +70,20 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
47 70
48extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); 71extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
49 72
50#define mm_match_cgroup(mm, cgroup) \ 73static inline
51 ((cgroup) == mem_cgroup_from_task((mm)->owner)) 74int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
75{
76 struct mem_cgroup *mem;
77 rcu_read_lock();
78 mem = mem_cgroup_from_task((mm)->owner);
79 rcu_read_unlock();
80 return cgroup == mem;
81}
52 82
53extern int 83extern int
54mem_cgroup_prepare_migration(struct page *page, struct page *newpage); 84mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
55extern void mem_cgroup_end_migration(struct page *page); 85extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
86 struct page *oldpage, struct page *newpage);
56 87
57/* 88/*
58 * For memory reclaim. 89 * For memory reclaim.
@@ -65,13 +96,32 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
65 int priority); 96 int priority);
66extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, 97extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
67 int priority); 98 int priority);
99int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
100unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
101 struct zone *zone,
102 enum lru_list lru);
103struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
104 struct zone *zone);
105struct zone_reclaim_stat*
106mem_cgroup_get_reclaim_stat_from_page(struct page *page);
68 107
69extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, 108#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
70 int priority, enum lru_list lru); 109extern int do_swap_account;
110#endif
71 111
112static inline bool mem_cgroup_disabled(void)
113{
114 if (mem_cgroup_subsys.disabled)
115 return true;
116 return false;
117}
118
119extern bool mem_cgroup_oom_called(struct task_struct *task);
72 120
73#else /* CONFIG_CGROUP_MEM_RES_CTLR */ 121#else /* CONFIG_CGROUP_MEM_RES_CTLR */
74static inline int mem_cgroup_charge(struct page *page, 122struct mem_cgroup;
123
124static inline int mem_cgroup_newpage_charge(struct page *page,
75 struct mm_struct *mm, gfp_t gfp_mask) 125 struct mm_struct *mm, gfp_t gfp_mask)
76{ 126{
77 return 0; 127 return 0;
@@ -83,6 +133,21 @@ static inline int mem_cgroup_cache_charge(struct page *page,
83 return 0; 133 return 0;
84} 134}
85 135
136static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
137 struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
138{
139 return 0;
140}
141
142static inline void mem_cgroup_commit_charge_swapin(struct page *page,
143 struct mem_cgroup *ptr)
144{
145}
146
147static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
148{
149}
150
86static inline void mem_cgroup_uncharge_page(struct page *page) 151static inline void mem_cgroup_uncharge_page(struct page *page)
87{ 152{
88} 153}
@@ -91,12 +156,33 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
91{ 156{
92} 157}
93 158
94static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) 159static inline int mem_cgroup_shrink_usage(struct page *page,
160 struct mm_struct *mm, gfp_t gfp_mask)
95{ 161{
96 return 0; 162 return 0;
97} 163}
98 164
99static inline void mem_cgroup_move_lists(struct page *page, bool active) 165static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
166{
167}
168
169static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
170{
171 return ;
172}
173
174static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
175{
176 return ;
177}
178
179static inline void mem_cgroup_del_lru(struct page *page)
180{
181 return ;
182}
183
184static inline void
185mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
100{ 186{
101} 187}
102 188
@@ -112,12 +198,14 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
112} 198}
113 199
114static inline int 200static inline int
115mem_cgroup_prepare_migration(struct page *page, struct page *newpage) 201mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
116{ 202{
117 return 0; 203 return 0;
118} 204}
119 205
120static inline void mem_cgroup_end_migration(struct page *page) 206static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
207 struct page *oldpage,
208 struct page *newpage)
121{ 209{
122} 210}
123 211
@@ -146,12 +234,42 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
146{ 234{
147} 235}
148 236
149static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, 237static inline bool mem_cgroup_disabled(void)
150 struct zone *zone, int priority, 238{
151 enum lru_list lru) 239 return true;
240}
241
242static inline bool mem_cgroup_oom_called(struct task_struct *task)
243{
244 return false;
245}
246
247static inline int
248mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
249{
250 return 1;
251}
252
253static inline unsigned long
254mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
255 enum lru_list lru)
152{ 256{
153 return 0; 257 return 0;
154} 258}
259
260
261static inline struct zone_reclaim_stat*
262mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone)
263{
264 return NULL;
265}
266
267static inline struct zone_reclaim_stat*
268mem_cgroup_get_reclaim_stat_from_page(struct page *page)
269{
270 return NULL;
271}
272
155#endif /* CONFIG_CGROUP_MEM_CONT */ 273#endif /* CONFIG_CGROUP_MEM_CONT */
156 274
157#endif /* _LINUX_MEMCONTROL_H */ 275#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c948350c378e..7fbb97267556 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -28,6 +28,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
28{ 28{
29 list_add(&page->lru, &zone->lru[l].list); 29 list_add(&page->lru, &zone->lru[l].list);
30 __inc_zone_state(zone, NR_LRU_BASE + l); 30 __inc_zone_state(zone, NR_LRU_BASE + l);
31 mem_cgroup_add_lru_list(page, l);
31} 32}
32 33
33static inline void 34static inline void
@@ -35,6 +36,7 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
35{ 36{
36 list_del(&page->lru); 37 list_del(&page->lru);
37 __dec_zone_state(zone, NR_LRU_BASE + l); 38 __dec_zone_state(zone, NR_LRU_BASE + l);
39 mem_cgroup_del_lru_list(page, l);
38} 40}
39 41
40static inline void 42static inline void
@@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
54 l += page_is_file_cache(page); 56 l += page_is_file_cache(page);
55 } 57 }
56 __dec_zone_state(zone, NR_LRU_BASE + l); 58 __dec_zone_state(zone, NR_LRU_BASE + l);
59 mem_cgroup_del_lru_list(page, l);
57} 60}
58 61
59/** 62/**
@@ -78,23 +81,4 @@ static inline enum lru_list page_lru(struct page *page)
78 return lru; 81 return lru;
79} 82}
80 83
81/**
82 * inactive_anon_is_low - check if anonymous pages need to be deactivated
83 * @zone: zone to check
84 *
85 * Returns true if the zone does not have enough inactive anon pages,
86 * meaning some active anon pages need to be deactivated.
87 */
88static inline int inactive_anon_is_low(struct zone *zone)
89{
90 unsigned long active, inactive;
91
92 active = zone_page_state(zone, NR_ACTIVE_ANON);
93 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
94
95 if (inactive * zone->inactive_ratio < active)
96 return 1;
97
98 return 0;
99}
100#endif 84#endif
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 35a7b5e19465..09c14e213b63 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -263,6 +263,19 @@ enum zone_type {
263#error ZONES_SHIFT -- too many zones configured adjust calculation 263#error ZONES_SHIFT -- too many zones configured adjust calculation
264#endif 264#endif
265 265
266struct zone_reclaim_stat {
267 /*
268 * The pageout code in vmscan.c keeps track of how many of the
269 * mem/swap backed and file backed pages are refeferenced.
270 * The higher the rotated/scanned ratio, the more valuable
271 * that cache is.
272 *
273 * The anon LRU stats live in [0], file LRU stats in [1]
274 */
275 unsigned long recent_rotated[2];
276 unsigned long recent_scanned[2];
277};
278
266struct zone { 279struct zone {
267 /* Fields commonly accessed by the page allocator */ 280 /* Fields commonly accessed by the page allocator */
268 unsigned long pages_min, pages_low, pages_high; 281 unsigned long pages_min, pages_low, pages_high;
@@ -315,16 +328,7 @@ struct zone {
315 unsigned long nr_scan; 328 unsigned long nr_scan;
316 } lru[NR_LRU_LISTS]; 329 } lru[NR_LRU_LISTS];
317 330
318 /* 331 struct zone_reclaim_stat reclaim_stat;
319 * The pageout code in vmscan.c keeps track of how many of the
320 * mem/swap backed and file backed pages are refeferenced.
321 * The higher the rotated/scanned ratio, the more valuable
322 * that cache is.
323 *
324 * The anon LRU stats live in [0], file LRU stats in [1]
325 */
326 unsigned long recent_rotated[2];
327 unsigned long recent_scanned[2];
328 332
329 unsigned long pages_scanned; /* since last reclaim */ 333 unsigned long pages_scanned; /* since last reclaim */
330 unsigned long flags; /* zone flags, see below */ 334 unsigned long flags; /* zone flags, see below */
diff --git a/include/linux/nwpserial.h b/include/linux/nwpserial.h
new file mode 100644
index 000000000000..9acb21572eaf
--- /dev/null
+++ b/include/linux/nwpserial.h
@@ -0,0 +1,18 @@
1/*
2 * Serial Port driver for a NWP uart device
3 *
4 * Copyright (C) 2008 IBM Corp., Benjamin Krill <ben@codiert.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12#ifndef _NWPSERIAL_H
13#define _NWPSERIAL_H
14
15int nwpserial_register_port(struct uart_port *port);
16void nwpserial_unregister_port(int line);
17
18#endif /* _NWPSERIAL_H */
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 1e6d34bfa094..602cc1fdee90 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -26,10 +26,6 @@ enum {
26 PCG_LOCK, /* page cgroup is locked */ 26 PCG_LOCK, /* page cgroup is locked */
27 PCG_CACHE, /* charged as cache */ 27 PCG_CACHE, /* charged as cache */
28 PCG_USED, /* this object is in use. */ 28 PCG_USED, /* this object is in use. */
29 /* flags for LRU placement */
30 PCG_ACTIVE, /* page is active in this cgroup */
31 PCG_FILE, /* page is file system backed */
32 PCG_UNEVICTABLE, /* page is unevictableable */
33}; 29};
34 30
35#define TESTPCGFLAG(uname, lname) \ 31#define TESTPCGFLAG(uname, lname) \
@@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE)
50TESTPCGFLAG(Used, USED) 46TESTPCGFLAG(Used, USED)
51CLEARPCGFLAG(Used, USED) 47CLEARPCGFLAG(Used, USED)
52 48
53/* LRU management flags (from global-lru definition) */
54TESTPCGFLAG(File, FILE)
55SETPCGFLAG(File, FILE)
56CLEARPCGFLAG(File, FILE)
57
58TESTPCGFLAG(Active, ACTIVE)
59SETPCGFLAG(Active, ACTIVE)
60CLEARPCGFLAG(Active, ACTIVE)
61
62TESTPCGFLAG(Unevictable, UNEVICTABLE)
63SETPCGFLAG(Unevictable, UNEVICTABLE)
64CLEARPCGFLAG(Unevictable, UNEVICTABLE)
65
66static inline int page_cgroup_nid(struct page_cgroup *pc) 49static inline int page_cgroup_nid(struct page_cgroup *pc)
67{ 50{
68 return page_to_nid(pc->page); 51 return page_to_nid(pc->page);
@@ -105,4 +88,39 @@ static inline void page_cgroup_init(void)
105} 88}
106 89
107#endif 90#endif
91
92#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
93#include <linux/swap.h>
94extern struct mem_cgroup *
95swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
96extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
97extern int swap_cgroup_swapon(int type, unsigned long max_pages);
98extern void swap_cgroup_swapoff(int type);
99#else
100#include <linux/swap.h>
101
102static inline
103struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
104{
105 return NULL;
106}
107
108static inline
109struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
110{
111 return NULL;
112}
113
114static inline int
115swap_cgroup_swapon(int type, unsigned long max_pages)
116{
117 return 0;
118}
119
120static inline void swap_cgroup_swapoff(int type)
121{
122 return;
123}
124
125#endif
108#endif 126#endif
diff --git a/include/linux/pid.h b/include/linux/pid.h
index bb206c56d1f0..49f1c2f66e95 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -123,6 +123,24 @@ extern struct pid *alloc_pid(struct pid_namespace *ns);
123extern void free_pid(struct pid *pid); 123extern void free_pid(struct pid *pid);
124 124
125/* 125/*
126 * ns_of_pid() returns the pid namespace in which the specified pid was
127 * allocated.
128 *
129 * NOTE:
130 * ns_of_pid() is expected to be called for a process (task) that has
131 * an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid
132 * is expected to be non-NULL. If @pid is NULL, caller should handle
133 * the resulting NULL pid-ns.
134 */
135static inline struct pid_namespace *ns_of_pid(struct pid *pid)
136{
137 struct pid_namespace *ns = NULL;
138 if (pid)
139 ns = pid->numbers[pid->level].ns;
140 return ns;
141}
142
143/*
126 * the helpers to get the pid's id seen from different namespaces 144 * the helpers to get the pid's id seen from different namespaces
127 * 145 *
128 * pid_nr() : global id, i.e. the id seen from the init namespace; 146 * pid_nr() : global id, i.e. the id seen from the init namespace;
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index d82fe825d62f..38d10326246a 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -79,11 +79,7 @@ static inline void zap_pid_ns_processes(struct pid_namespace *ns)
79} 79}
80#endif /* CONFIG_PID_NS */ 80#endif /* CONFIG_PID_NS */
81 81
82static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) 82extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
83{
84 return tsk->nsproxy->pid_ns;
85}
86
87void pidhash_init(void); 83void pidhash_init(void);
88void pidmap_init(void); 84void pidmap_init(void);
89 85
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 8fc909ef6787..9743e4dbc918 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -137,6 +137,9 @@ struct mddev_s
137 struct gendisk *gendisk; 137 struct gendisk *gendisk;
138 138
139 struct kobject kobj; 139 struct kobject kobj;
140 int hold_active;
141#define UNTIL_IOCTL 1
142#define UNTIL_STOP 2
140 143
141 /* Superblock information */ 144 /* Superblock information */
142 int major_version, 145 int major_version,
@@ -215,6 +218,9 @@ struct mddev_s
215#define MD_RECOVERY_FROZEN 9 218#define MD_RECOVERY_FROZEN 9
216 219
217 unsigned long recovery; 220 unsigned long recovery;
221 int recovery_disabled; /* if we detect that recovery
222 * will always fail, set this
223 * so we don't loop trying */
218 224
219 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
220 struct mutex reconfig_mutex; 226 struct mutex reconfig_mutex;
@@ -244,6 +250,9 @@ struct mddev_s
244 struct sysfs_dirent *sysfs_state; /* handle for 'array_state' 250 struct sysfs_dirent *sysfs_state; /* handle for 'array_state'
245 * file in sysfs. 251 * file in sysfs.
246 */ 252 */
253 struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */
254
255 struct work_struct del_work; /* used for delayed sysfs removal */
247 256
248 spinlock_t write_lock; 257 spinlock_t write_lock;
249 wait_queue_head_t sb_wait; /* for waiting on superblock updates */ 258 wait_queue_head_t sb_wait; /* for waiting on superblock updates */
@@ -334,17 +343,14 @@ static inline char * mdname (mddev_t * mddev)
334 * iterates through some rdev ringlist. It's safe to remove the 343 * iterates through some rdev ringlist. It's safe to remove the
335 * current 'rdev'. Dont touch 'tmp' though. 344 * current 'rdev'. Dont touch 'tmp' though.
336 */ 345 */
337#define rdev_for_each_list(rdev, tmp, list) \ 346#define rdev_for_each_list(rdev, tmp, head) \
338 \ 347 list_for_each_entry_safe(rdev, tmp, head, same_set)
339 for ((tmp) = (list).next; \ 348
340 (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \
341 (tmp) = (tmp)->next, (tmp)->prev != &(list) \
342 ; )
343/* 349/*
344 * iterates through the 'same array disks' ringlist 350 * iterates through the 'same array disks' ringlist
345 */ 351 */
346#define rdev_for_each(rdev, tmp, mddev) \ 352#define rdev_for_each(rdev, tmp, mddev) \
347 rdev_for_each_list(rdev, tmp, (mddev)->disks) 353 list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
348 354
349#define rdev_for_each_rcu(rdev, mddev) \ 355#define rdev_for_each_rcu(rdev, mddev) \
350 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) 356 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 8b4de4a41ff1..9491026afe66 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) {
194 return (ev<<32)| sb->events_lo; 194 return (ev<<32)| sb->events_lo;
195} 195}
196 196
197#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1)
198
197/* 199/*
198 * The version-1 superblock : 200 * The version-1 superblock :
199 * All numeric fields are little-endian. 201 * All numeric fields are little-endian.
diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h
index 1b2dda035f8e..fd42aa87c391 100644
--- a/include/linux/raid/raid0.h
+++ b/include/linux/raid/raid0.h
@@ -5,9 +5,9 @@
5 5
6struct strip_zone 6struct strip_zone
7{ 7{
8 sector_t zone_offset; /* Zone offset in md_dev */ 8 sector_t zone_start; /* Zone offset in md_dev (in sectors) */
9 sector_t dev_offset; /* Zone offset in real dev */ 9 sector_t dev_start; /* Zone offset in real dev (in sectors) */
10 sector_t size; /* Zone size */ 10 sector_t sectors; /* Zone size in sectors */
11 int nb_dev; /* # of devices attached to the zone */ 11 int nb_dev; /* # of devices attached to the zone */
12 mdk_rdev_t **dev; /* Devices attached to the zone */ 12 mdk_rdev_t **dev; /* Devices attached to the zone */
13}; 13};
@@ -19,8 +19,8 @@ struct raid0_private_data
19 mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ 19 mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
20 int nr_strip_zones; 20 int nr_strip_zones;
21 21
22 sector_t hash_spacing; 22 sector_t spacing;
23 int preshift; /* shift this before divide by hash_spacing */ 23 int sector_shift; /* shift this before divide by spacing */
24}; 24};
25 25
26typedef struct raid0_private_data raid0_conf_t; 26typedef struct raid0_private_data raid0_conf_t;
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 271c1c2c9f6f..dede0a2cfc45 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -43,6 +43,10 @@ struct res_counter {
43 * the routines below consider this to be IRQ-safe 43 * the routines below consider this to be IRQ-safe
44 */ 44 */
45 spinlock_t lock; 45 spinlock_t lock;
46 /*
47 * Parent counter, used for hierarchial resource accounting
48 */
49 struct res_counter *parent;
46}; 50};
47 51
48/** 52/**
@@ -87,7 +91,7 @@ enum {
87 * helpers for accounting 91 * helpers for accounting
88 */ 92 */
89 93
90void res_counter_init(struct res_counter *counter); 94void res_counter_init(struct res_counter *counter, struct res_counter *parent);
91 95
92/* 96/*
93 * charge - try to consume more resource. 97 * charge - try to consume more resource.
@@ -103,7 +107,7 @@ void res_counter_init(struct res_counter *counter);
103int __must_check res_counter_charge_locked(struct res_counter *counter, 107int __must_check res_counter_charge_locked(struct res_counter *counter,
104 unsigned long val); 108 unsigned long val);
105int __must_check res_counter_charge(struct res_counter *counter, 109int __must_check res_counter_charge(struct res_counter *counter,
106 unsigned long val); 110 unsigned long val, struct res_counter **limit_fail_at);
107 111
108/* 112/*
109 * uncharge - tell that some portion of the resource is released 113 * uncharge - tell that some portion of the resource is released
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index b4199841f1fc..90bbbf0b1161 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -161,6 +161,9 @@
161 161
162#define PORT_S3C6400 84 162#define PORT_S3C6400 84
163 163
164/* NWPSERIAL */
165#define PORT_NWPSERIAL 85
166
164#ifdef __KERNEL__ 167#ifdef __KERNEL__
165 168
166#include <linux/compiler.h> 169#include <linux/compiler.h>
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 91dee50fe260..d30215578877 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page)
214extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 214extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
215 gfp_t gfp_mask); 215 gfp_t gfp_mask);
216extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, 216extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
217 gfp_t gfp_mask); 217 gfp_t gfp_mask, bool noswap,
218 unsigned int swappiness);
218extern int __isolate_lru_page(struct page *page, int mode, int file); 219extern int __isolate_lru_page(struct page *page, int mode, int file);
219extern unsigned long shrink_all_memory(unsigned long nr_pages); 220extern unsigned long shrink_all_memory(unsigned long nr_pages);
220extern int vm_swappiness; 221extern int vm_swappiness;
@@ -333,6 +334,22 @@ static inline void disable_swap_token(void)
333 put_swap_token(swap_token_mm); 334 put_swap_token(swap_token_mm);
334} 335}
335 336
337#ifdef CONFIG_CGROUP_MEM_RES_CTLR
338extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent);
339#else
340static inline void
341mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
342{
343}
344#endif
345#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
346extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
347#else
348static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
349{
350}
351#endif
352
336#else /* CONFIG_SWAP */ 353#else /* CONFIG_SWAP */
337 354
338#define nr_swap_pages 0L 355#define nr_swap_pages 0L
@@ -409,6 +426,12 @@ static inline swp_entry_t get_swap_page(void)
409#define has_swap_token(x) 0 426#define has_swap_token(x) 0
410#define disable_swap_token() do { } while(0) 427#define disable_swap_token() do { } while(0)
411 428
429static inline int mem_cgroup_cache_charge_swapin(struct page *page,
430 struct mm_struct *mm, gfp_t mask, bool locked)
431{
432 return 0;
433}
434
412#endif /* CONFIG_SWAP */ 435#endif /* CONFIG_SWAP */
413#endif /* __KERNEL__*/ 436#endif /* __KERNEL__*/
414#endif /* _LINUX_SWAP_H */ 437#endif /* _LINUX_SWAP_H */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 6369d89c25d5..f87f9614844d 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -136,8 +136,6 @@ struct xenbus_transaction
136/* Nil transaction ID. */ 136/* Nil transaction ID. */
137#define XBT_NIL ((struct xenbus_transaction) { 0 }) 137#define XBT_NIL ((struct xenbus_transaction) { 0 })
138 138
139int __init xenbus_dev_init(void);
140
141char **xenbus_directory(struct xenbus_transaction t, 139char **xenbus_directory(struct xenbus_transaction t,
142 const char *dir, const char *node, unsigned int *num); 140 const char *dir, const char *node, unsigned int *num);
143void *xenbus_read(struct xenbus_transaction t, 141void *xenbus_read(struct xenbus_transaction t,
diff --git a/init/Kconfig b/init/Kconfig
index e7893b1d3e42..a724a149bf3f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -271,59 +271,6 @@ config LOG_BUF_SHIFT
271 13 => 8 KB 271 13 => 8 KB
272 12 => 4 KB 272 12 => 4 KB
273 273
274config CGROUPS
275 bool "Control Group support"
276 help
277 This option will let you use process cgroup subsystems
278 such as Cpusets
279
280 Say N if unsure.
281
282config CGROUP_DEBUG
283 bool "Example debug cgroup subsystem"
284 depends on CGROUPS
285 default n
286 help
287 This option enables a simple cgroup subsystem that
288 exports useful debugging information about the cgroups
289 framework
290
291 Say N if unsure
292
293config CGROUP_NS
294 bool "Namespace cgroup subsystem"
295 depends on CGROUPS
296 help
297 Provides a simple namespace cgroup subsystem to
298 provide hierarchical naming of sets of namespaces,
299 for instance virtual servers and checkpoint/restart
300 jobs.
301
302config CGROUP_FREEZER
303 bool "control group freezer subsystem"
304 depends on CGROUPS
305 help
306 Provides a way to freeze and unfreeze all tasks in a
307 cgroup.
308
309config CGROUP_DEVICE
310 bool "Device controller for cgroups"
311 depends on CGROUPS && EXPERIMENTAL
312 help
313 Provides a cgroup implementing whitelists for devices which
314 a process in the cgroup can mknod or open.
315
316config CPUSETS
317 bool "Cpuset support"
318 depends on SMP && CGROUPS
319 help
320 This option will let you create and manage CPUSETs which
321 allow dynamically partitioning a system into sets of CPUs and
322 Memory Nodes and assigning tasks to run only within those sets.
323 This is primarily useful on large SMP or NUMA systems.
324
325 Say N if unsure.
326
327# 274#
328# Architectures with an unreliable sched_clock() should select this: 275# Architectures with an unreliable sched_clock() should select this:
329# 276#
@@ -337,6 +284,8 @@ config GROUP_SCHED
337 help 284 help
338 This feature lets CPU scheduler recognize task groups and control CPU 285 This feature lets CPU scheduler recognize task groups and control CPU
339 bandwidth allocation to such task groups. 286 bandwidth allocation to such task groups.
287 In order to create a group from arbitrary set of processes, use
288 CONFIG_CGROUPS. (See Control Group support.)
340 289
341config FAIR_GROUP_SCHED 290config FAIR_GROUP_SCHED
342 bool "Group scheduling for SCHED_OTHER" 291 bool "Group scheduling for SCHED_OTHER"
@@ -379,6 +328,66 @@ config CGROUP_SCHED
379 328
380endchoice 329endchoice
381 330
331menu "Control Group support"
332config CGROUPS
333 bool "Control Group support"
334 help
335 This option add support for grouping sets of processes together, for
336 use with process control subsystems such as Cpusets, CFS, memory
337 controls or device isolation.
338 See
339 - Documentation/cpusets.txt (Cpusets)
340 - Documentation/scheduler/sched-design-CFS.txt (CFS)
341 - Documentation/cgroups/ (features for grouping, isolation)
342 - Documentation/controllers/ (features for resource control)
343
344 Say N if unsure.
345
346config CGROUP_DEBUG
347 bool "Example debug cgroup subsystem"
348 depends on CGROUPS
349 default n
350 help
351 This option enables a simple cgroup subsystem that
352 exports useful debugging information about the cgroups
353 framework
354
355 Say N if unsure
356
357config CGROUP_NS
358 bool "Namespace cgroup subsystem"
359 depends on CGROUPS
360 help
361 Provides a simple namespace cgroup subsystem to
362 provide hierarchical naming of sets of namespaces,
363 for instance virtual servers and checkpoint/restart
364 jobs.
365
366config CGROUP_FREEZER
367 bool "control group freezer subsystem"
368 depends on CGROUPS
369 help
370 Provides a way to freeze and unfreeze all tasks in a
371 cgroup.
372
373config CGROUP_DEVICE
374 bool "Device controller for cgroups"
375 depends on CGROUPS && EXPERIMENTAL
376 help
377 Provides a cgroup implementing whitelists for devices which
378 a process in the cgroup can mknod or open.
379
380config CPUSETS
381 bool "Cpuset support"
382 depends on SMP && CGROUPS
383 help
384 This option will let you create and manage CPUSETs which
385 allow dynamically partitioning a system into sets of CPUs and
386 Memory Nodes and assigning tasks to run only within those sets.
387 This is primarily useful on large SMP or NUMA systems.
388
389 Say N if unsure.
390
382config CGROUP_CPUACCT 391config CGROUP_CPUACCT
383 bool "Simple CPU accounting cgroup subsystem" 392 bool "Simple CPU accounting cgroup subsystem"
384 depends on CGROUPS 393 depends on CGROUPS
@@ -393,9 +402,6 @@ config RESOURCE_COUNTERS
393 infrastructure that works with cgroups 402 infrastructure that works with cgroups
394 depends on CGROUPS 403 depends on CGROUPS
395 404
396config MM_OWNER
397 bool
398
399config CGROUP_MEM_RES_CTLR 405config CGROUP_MEM_RES_CTLR
400 bool "Memory Resource Controller for Control Groups" 406 bool "Memory Resource Controller for Control Groups"
401 depends on CGROUPS && RESOURCE_COUNTERS 407 depends on CGROUPS && RESOURCE_COUNTERS
@@ -414,11 +420,33 @@ config CGROUP_MEM_RES_CTLR
414 sure you need the memory resource controller. Even when you enable 420 sure you need the memory resource controller. Even when you enable
415 this, you can set "cgroup_disable=memory" at your boot option to 421 this, you can set "cgroup_disable=memory" at your boot option to
416 disable memory resource controller and you can avoid overheads. 422 disable memory resource controller and you can avoid overheads.
417 (and lose benefits of memory resource contoller) 423 (and lose benefits of memory resource controller)
418 424
419 This config option also selects MM_OWNER config option, which 425 This config option also selects MM_OWNER config option, which
420 could in turn add some fork/exit overhead. 426 could in turn add some fork/exit overhead.
421 427
428config MM_OWNER
429 bool
430
431config CGROUP_MEM_RES_CTLR_SWAP
432 bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)"
433 depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL
434 help
435 Add swap management feature to memory resource controller. When you
436 enable this, you can limit mem+swap usage per cgroup. In other words,
437 when you disable this, memory resource controller has no cares to
438 usage of swap...a process can exhaust all of the swap. This extension
439 is useful when you want to avoid exhaustion swap but this itself
440 adds more overheads and consumes memory for remembering information.
441 Especially if you use 32bit system or small memory system, please
442 be careful about enabling this. When memory resource controller
443 is disabled by boot option, this will be automatically disabled and
444 there will be no overhead from this. Even when you set this config=y,
445 if boot option "noswapaccount" is set, swap will not be accounted.
446
447
448endmenu
449
422config SYSFS_DEPRECATED 450config SYSFS_DEPRECATED
423 bool 451 bool
424 452
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index eddb6247a553..23fdb8492b8e 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -505,7 +505,8 @@ static void __do_notify(struct mqueue_inode_info *info)
505 sig_i.si_errno = 0; 505 sig_i.si_errno = 0;
506 sig_i.si_code = SI_MESGQ; 506 sig_i.si_code = SI_MESGQ;
507 sig_i.si_value = info->notify.sigev_value; 507 sig_i.si_value = info->notify.sigev_value;
508 sig_i.si_pid = task_tgid_vnr(current); 508 sig_i.si_pid = task_tgid_nr_ns(current,
509 ns_of_pid(info->notify_owner));
509 sig_i.si_uid = current_uid(); 510 sig_i.si_uid = current_uid();
510 511
511 kill_pid_info(info->notify.sigev_signo, 512 kill_pid_info(info->notify.sigev_signo,
diff --git a/kernel/async.c b/kernel/async.c
index 97373380c9e7..64cc916299a5 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -206,7 +206,9 @@ EXPORT_SYMBOL_GPL(async_schedule_special);
206 206
207void async_synchronize_full(void) 207void async_synchronize_full(void)
208{ 208{
209 async_synchronize_cookie(next_cookie); 209 do {
210 async_synchronize_cookie(next_cookie);
211 } while (!list_empty(&async_running) || !list_empty(&async_pending));
210} 212}
211EXPORT_SYMBOL_GPL(async_synchronize_full); 213EXPORT_SYMBOL_GPL(async_synchronize_full);
212 214
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f221446aa02d..c29831076e7a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -84,7 +84,7 @@ struct cgroupfs_root {
84 /* Tracks how many cgroups are currently defined in hierarchy.*/ 84 /* Tracks how many cgroups are currently defined in hierarchy.*/
85 int number_of_cgroups; 85 int number_of_cgroups;
86 86
87 /* A list running through the mounted hierarchies */ 87 /* A list running through the active hierarchies */
88 struct list_head root_list; 88 struct list_head root_list;
89 89
90 /* Hierarchy-specific flags */ 90 /* Hierarchy-specific flags */
@@ -148,8 +148,8 @@ static int notify_on_release(const struct cgroup *cgrp)
148#define for_each_subsys(_root, _ss) \ 148#define for_each_subsys(_root, _ss) \
149list_for_each_entry(_ss, &_root->subsys_list, sibling) 149list_for_each_entry(_ss, &_root->subsys_list, sibling)
150 150
151/* for_each_root() allows you to iterate across the active hierarchies */ 151/* for_each_active_root() allows you to iterate across the active hierarchies */
152#define for_each_root(_root) \ 152#define for_each_active_root(_root) \
153list_for_each_entry(_root, &roots, root_list) 153list_for_each_entry(_root, &roots, root_list)
154 154
155/* the list of cgroups eligible for automatic release. Protected by 155/* the list of cgroups eligible for automatic release. Protected by
@@ -271,7 +271,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
271 271
272 rcu_read_lock(); 272 rcu_read_lock();
273 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 273 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
274 struct cgroup *cgrp = cg->subsys[i]->cgroup; 274 struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
275 if (atomic_dec_and_test(&cgrp->count) && 275 if (atomic_dec_and_test(&cgrp->count) &&
276 notify_on_release(cgrp)) { 276 notify_on_release(cgrp)) {
277 if (taskexit) 277 if (taskexit)
@@ -384,6 +384,25 @@ static int allocate_cg_links(int count, struct list_head *tmp)
384 return 0; 384 return 0;
385} 385}
386 386
387/**
388 * link_css_set - a helper function to link a css_set to a cgroup
389 * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
390 * @cg: the css_set to be linked
391 * @cgrp: the destination cgroup
392 */
393static void link_css_set(struct list_head *tmp_cg_links,
394 struct css_set *cg, struct cgroup *cgrp)
395{
396 struct cg_cgroup_link *link;
397
398 BUG_ON(list_empty(tmp_cg_links));
399 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
400 cgrp_link_list);
401 link->cg = cg;
402 list_move(&link->cgrp_link_list, &cgrp->css_sets);
403 list_add(&link->cg_link_list, &cg->cg_links);
404}
405
387/* 406/*
388 * find_css_set() takes an existing cgroup group and a 407 * find_css_set() takes an existing cgroup group and a
389 * cgroup object, and returns a css_set object that's 408 * cgroup object, and returns a css_set object that's
@@ -399,7 +418,6 @@ static struct css_set *find_css_set(
399 int i; 418 int i;
400 419
401 struct list_head tmp_cg_links; 420 struct list_head tmp_cg_links;
402 struct cg_cgroup_link *link;
403 421
404 struct hlist_head *hhead; 422 struct hlist_head *hhead;
405 423
@@ -444,26 +462,11 @@ static struct css_set *find_css_set(
444 * only do it for the first subsystem in each 462 * only do it for the first subsystem in each
445 * hierarchy 463 * hierarchy
446 */ 464 */
447 if (ss->root->subsys_list.next == &ss->sibling) { 465 if (ss->root->subsys_list.next == &ss->sibling)
448 BUG_ON(list_empty(&tmp_cg_links)); 466 link_css_set(&tmp_cg_links, res, cgrp);
449 link = list_entry(tmp_cg_links.next,
450 struct cg_cgroup_link,
451 cgrp_link_list);
452 list_del(&link->cgrp_link_list);
453 list_add(&link->cgrp_link_list, &cgrp->css_sets);
454 link->cg = res;
455 list_add(&link->cg_link_list, &res->cg_links);
456 }
457 }
458 if (list_empty(&rootnode.subsys_list)) {
459 link = list_entry(tmp_cg_links.next,
460 struct cg_cgroup_link,
461 cgrp_link_list);
462 list_del(&link->cgrp_link_list);
463 list_add(&link->cgrp_link_list, &dummytop->css_sets);
464 link->cg = res;
465 list_add(&link->cg_link_list, &res->cg_links);
466 } 467 }
468 if (list_empty(&rootnode.subsys_list))
469 link_css_set(&tmp_cg_links, res, dummytop);
467 470
468 BUG_ON(!list_empty(&tmp_cg_links)); 471 BUG_ON(!list_empty(&tmp_cg_links));
469 472
@@ -586,11 +589,18 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp)
586{ 589{
587 struct cgroup_subsys *ss; 590 struct cgroup_subsys *ss;
588 for_each_subsys(cgrp->root, ss) 591 for_each_subsys(cgrp->root, ss)
589 if (ss->pre_destroy && cgrp->subsys[ss->subsys_id]) 592 if (ss->pre_destroy)
590 ss->pre_destroy(ss, cgrp); 593 ss->pre_destroy(ss, cgrp);
591 return; 594 return;
592} 595}
593 596
597static void free_cgroup_rcu(struct rcu_head *obj)
598{
599 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
600
601 kfree(cgrp);
602}
603
594static void cgroup_diput(struct dentry *dentry, struct inode *inode) 604static void cgroup_diput(struct dentry *dentry, struct inode *inode)
595{ 605{
596 /* is dentry a directory ? if so, kfree() associated cgroup */ 606 /* is dentry a directory ? if so, kfree() associated cgroup */
@@ -610,19 +620,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
610 /* 620 /*
611 * Release the subsystem state objects. 621 * Release the subsystem state objects.
612 */ 622 */
613 for_each_subsys(cgrp->root, ss) { 623 for_each_subsys(cgrp->root, ss)
614 if (cgrp->subsys[ss->subsys_id]) 624 ss->destroy(ss, cgrp);
615 ss->destroy(ss, cgrp);
616 }
617 625
618 cgrp->root->number_of_cgroups--; 626 cgrp->root->number_of_cgroups--;
619 mutex_unlock(&cgroup_mutex); 627 mutex_unlock(&cgroup_mutex);
620 628
621 /* Drop the active superblock reference that we took when we 629 /*
622 * created the cgroup */ 630 * Drop the active superblock reference that we took when we
631 * created the cgroup
632 */
623 deactivate_super(cgrp->root->sb); 633 deactivate_super(cgrp->root->sb);
624 634
625 kfree(cgrp); 635 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
626 } 636 }
627 iput(inode); 637 iput(inode);
628} 638}
@@ -712,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root,
712 BUG_ON(cgrp->subsys[i]); 722 BUG_ON(cgrp->subsys[i]);
713 BUG_ON(!dummytop->subsys[i]); 723 BUG_ON(!dummytop->subsys[i]);
714 BUG_ON(dummytop->subsys[i]->cgroup != dummytop); 724 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
725 mutex_lock(&ss->hierarchy_mutex);
715 cgrp->subsys[i] = dummytop->subsys[i]; 726 cgrp->subsys[i] = dummytop->subsys[i];
716 cgrp->subsys[i]->cgroup = cgrp; 727 cgrp->subsys[i]->cgroup = cgrp;
717 list_add(&ss->sibling, &root->subsys_list); 728 list_move(&ss->sibling, &root->subsys_list);
718 rcu_assign_pointer(ss->root, root); 729 ss->root = root;
719 if (ss->bind) 730 if (ss->bind)
720 ss->bind(ss, cgrp); 731 ss->bind(ss, cgrp);
721 732 mutex_unlock(&ss->hierarchy_mutex);
722 } else if (bit & removed_bits) { 733 } else if (bit & removed_bits) {
723 /* We're removing this subsystem */ 734 /* We're removing this subsystem */
724 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); 735 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
725 BUG_ON(cgrp->subsys[i]->cgroup != cgrp); 736 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
737 mutex_lock(&ss->hierarchy_mutex);
726 if (ss->bind) 738 if (ss->bind)
727 ss->bind(ss, dummytop); 739 ss->bind(ss, dummytop);
728 dummytop->subsys[i]->cgroup = dummytop; 740 dummytop->subsys[i]->cgroup = dummytop;
729 cgrp->subsys[i] = NULL; 741 cgrp->subsys[i] = NULL;
730 rcu_assign_pointer(subsys[i]->root, &rootnode); 742 subsys[i]->root = &rootnode;
731 list_del(&ss->sibling); 743 list_move(&ss->sibling, &rootnode.subsys_list);
744 mutex_unlock(&ss->hierarchy_mutex);
732 } else if (bit & final_bits) { 745 } else if (bit & final_bits) {
733 /* Subsystem state should already exist */ 746 /* Subsystem state should already exist */
734 BUG_ON(!cgrp->subsys[i]); 747 BUG_ON(!cgrp->subsys[i]);
@@ -990,7 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
990 root = NULL; 1003 root = NULL;
991 } else { 1004 } else {
992 /* New superblock */ 1005 /* New superblock */
993 struct cgroup *cgrp = &root->top_cgroup; 1006 struct cgroup *root_cgrp = &root->top_cgroup;
994 struct inode *inode; 1007 struct inode *inode;
995 int i; 1008 int i;
996 1009
@@ -1031,7 +1044,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1031 list_add(&root->root_list, &roots); 1044 list_add(&root->root_list, &roots);
1032 root_count++; 1045 root_count++;
1033 1046
1034 sb->s_root->d_fsdata = &root->top_cgroup; 1047 sb->s_root->d_fsdata = root_cgrp;
1035 root->top_cgroup.dentry = sb->s_root; 1048 root->top_cgroup.dentry = sb->s_root;
1036 1049
1037 /* Link the top cgroup in this hierarchy into all 1050 /* Link the top cgroup in this hierarchy into all
@@ -1042,29 +1055,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1042 struct hlist_node *node; 1055 struct hlist_node *node;
1043 struct css_set *cg; 1056 struct css_set *cg;
1044 1057
1045 hlist_for_each_entry(cg, node, hhead, hlist) { 1058 hlist_for_each_entry(cg, node, hhead, hlist)
1046 struct cg_cgroup_link *link; 1059 link_css_set(&tmp_cg_links, cg, root_cgrp);
1047
1048 BUG_ON(list_empty(&tmp_cg_links));
1049 link = list_entry(tmp_cg_links.next,
1050 struct cg_cgroup_link,
1051 cgrp_link_list);
1052 list_del(&link->cgrp_link_list);
1053 link->cg = cg;
1054 list_add(&link->cgrp_link_list,
1055 &root->top_cgroup.css_sets);
1056 list_add(&link->cg_link_list, &cg->cg_links);
1057 }
1058 } 1060 }
1059 write_unlock(&css_set_lock); 1061 write_unlock(&css_set_lock);
1060 1062
1061 free_cg_links(&tmp_cg_links); 1063 free_cg_links(&tmp_cg_links);
1062 1064
1063 BUG_ON(!list_empty(&cgrp->sibling)); 1065 BUG_ON(!list_empty(&root_cgrp->sibling));
1064 BUG_ON(!list_empty(&cgrp->children)); 1066 BUG_ON(!list_empty(&root_cgrp->children));
1065 BUG_ON(root->number_of_cgroups != 1); 1067 BUG_ON(root->number_of_cgroups != 1);
1066 1068
1067 cgroup_populate_dir(cgrp); 1069 cgroup_populate_dir(root_cgrp);
1068 mutex_unlock(&inode->i_mutex); 1070 mutex_unlock(&inode->i_mutex);
1069 mutex_unlock(&cgroup_mutex); 1071 mutex_unlock(&cgroup_mutex);
1070 } 1072 }
@@ -1113,10 +1115,9 @@ static void cgroup_kill_sb(struct super_block *sb) {
1113 } 1115 }
1114 write_unlock(&css_set_lock); 1116 write_unlock(&css_set_lock);
1115 1117
1116 if (!list_empty(&root->root_list)) { 1118 list_del(&root->root_list);
1117 list_del(&root->root_list); 1119 root_count--;
1118 root_count--; 1120
1119 }
1120 mutex_unlock(&cgroup_mutex); 1121 mutex_unlock(&cgroup_mutex);
1121 1122
1122 kfree(root); 1123 kfree(root);
@@ -1145,14 +1146,16 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
1145 * @buf: the buffer to write the path into 1146 * @buf: the buffer to write the path into
1146 * @buflen: the length of the buffer 1147 * @buflen: the length of the buffer
1147 * 1148 *
1148 * Called with cgroup_mutex held. Writes path of cgroup into buf. 1149 * Called with cgroup_mutex held or else with an RCU-protected cgroup
1149 * Returns 0 on success, -errno on error. 1150 * reference. Writes path of cgroup into buf. Returns 0 on success,
1151 * -errno on error.
1150 */ 1152 */
1151int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) 1153int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1152{ 1154{
1153 char *start; 1155 char *start;
1156 struct dentry *dentry = rcu_dereference(cgrp->dentry);
1154 1157
1155 if (cgrp == dummytop) { 1158 if (!dentry || cgrp == dummytop) {
1156 /* 1159 /*
1157 * Inactive subsystems have no dentry for their root 1160 * Inactive subsystems have no dentry for their root
1158 * cgroup 1161 * cgroup
@@ -1165,13 +1168,14 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1165 1168
1166 *--start = '\0'; 1169 *--start = '\0';
1167 for (;;) { 1170 for (;;) {
1168 int len = cgrp->dentry->d_name.len; 1171 int len = dentry->d_name.len;
1169 if ((start -= len) < buf) 1172 if ((start -= len) < buf)
1170 return -ENAMETOOLONG; 1173 return -ENAMETOOLONG;
1171 memcpy(start, cgrp->dentry->d_name.name, len); 1174 memcpy(start, cgrp->dentry->d_name.name, len);
1172 cgrp = cgrp->parent; 1175 cgrp = cgrp->parent;
1173 if (!cgrp) 1176 if (!cgrp)
1174 break; 1177 break;
1178 dentry = rcu_dereference(cgrp->dentry);
1175 if (!cgrp->parent) 1179 if (!cgrp->parent)
1176 continue; 1180 continue;
1177 if (--start < buf) 1181 if (--start < buf)
@@ -1216,7 +1220,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1216 int retval = 0; 1220 int retval = 0;
1217 struct cgroup_subsys *ss; 1221 struct cgroup_subsys *ss;
1218 struct cgroup *oldcgrp; 1222 struct cgroup *oldcgrp;
1219 struct css_set *cg = tsk->cgroups; 1223 struct css_set *cg;
1220 struct css_set *newcg; 1224 struct css_set *newcg;
1221 struct cgroupfs_root *root = cgrp->root; 1225 struct cgroupfs_root *root = cgrp->root;
1222 int subsys_id; 1226 int subsys_id;
@@ -1236,11 +1240,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1236 } 1240 }
1237 } 1241 }
1238 1242
1243 task_lock(tsk);
1244 cg = tsk->cgroups;
1245 get_css_set(cg);
1246 task_unlock(tsk);
1239 /* 1247 /*
1240 * Locate or allocate a new css_set for this task, 1248 * Locate or allocate a new css_set for this task,
1241 * based on its final set of cgroups 1249 * based on its final set of cgroups
1242 */ 1250 */
1243 newcg = find_css_set(cg, cgrp); 1251 newcg = find_css_set(cg, cgrp);
1252 put_css_set(cg);
1244 if (!newcg) 1253 if (!newcg)
1245 return -ENOMEM; 1254 return -ENOMEM;
1246 1255
@@ -1445,7 +1454,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1445 struct cftype *cft = __d_cft(file->f_dentry); 1454 struct cftype *cft = __d_cft(file->f_dentry);
1446 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 1455 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1447 1456
1448 if (!cft || cgroup_is_removed(cgrp)) 1457 if (cgroup_is_removed(cgrp))
1449 return -ENODEV; 1458 return -ENODEV;
1450 if (cft->write) 1459 if (cft->write)
1451 return cft->write(cgrp, cft, file, buf, nbytes, ppos); 1460 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
@@ -1490,7 +1499,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1490 struct cftype *cft = __d_cft(file->f_dentry); 1499 struct cftype *cft = __d_cft(file->f_dentry);
1491 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 1500 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1492 1501
1493 if (!cft || cgroup_is_removed(cgrp)) 1502 if (cgroup_is_removed(cgrp))
1494 return -ENODEV; 1503 return -ENODEV;
1495 1504
1496 if (cft->read) 1505 if (cft->read)
@@ -1554,10 +1563,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
1554 err = generic_file_open(inode, file); 1563 err = generic_file_open(inode, file);
1555 if (err) 1564 if (err)
1556 return err; 1565 return err;
1557
1558 cft = __d_cft(file->f_dentry); 1566 cft = __d_cft(file->f_dentry);
1559 if (!cft) 1567
1560 return -ENODEV;
1561 if (cft->read_map || cft->read_seq_string) { 1568 if (cft->read_map || cft->read_seq_string) {
1562 struct cgroup_seqfile_state *state = 1569 struct cgroup_seqfile_state *state =
1563 kzalloc(sizeof(*state), GFP_USER); 1570 kzalloc(sizeof(*state), GFP_USER);
@@ -1671,7 +1678,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1671 if (!error) { 1678 if (!error) {
1672 dentry->d_fsdata = cgrp; 1679 dentry->d_fsdata = cgrp;
1673 inc_nlink(parent->d_inode); 1680 inc_nlink(parent->d_inode);
1674 cgrp->dentry = dentry; 1681 rcu_assign_pointer(cgrp->dentry, dentry);
1675 dget(dentry); 1682 dget(dentry);
1676 } 1683 }
1677 dput(dentry); 1684 dput(dentry);
@@ -1812,6 +1819,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
1812{ 1819{
1813 struct task_struct *res; 1820 struct task_struct *res;
1814 struct list_head *l = it->task; 1821 struct list_head *l = it->task;
1822 struct cg_cgroup_link *link;
1815 1823
1816 /* If the iterator cg is NULL, we have no tasks */ 1824 /* If the iterator cg is NULL, we have no tasks */
1817 if (!it->cg_link) 1825 if (!it->cg_link)
@@ -1819,7 +1827,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
1819 res = list_entry(l, struct task_struct, cg_list); 1827 res = list_entry(l, struct task_struct, cg_list);
1820 /* Advance iterator to find next entry */ 1828 /* Advance iterator to find next entry */
1821 l = l->next; 1829 l = l->next;
1822 if (l == &res->cgroups->tasks) { 1830 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
1831 if (l == &link->cg->tasks) {
1823 /* We reached the end of this task list - move on to 1832 /* We reached the end of this task list - move on to
1824 * the next cg_cgroup_link */ 1833 * the next cg_cgroup_link */
1825 cgroup_advance_iter(cgrp, it); 1834 cgroup_advance_iter(cgrp, it);
@@ -2013,14 +2022,16 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
2013 */ 2022 */
2014static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) 2023static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
2015{ 2024{
2016 int n = 0; 2025 int n = 0, pid;
2017 struct cgroup_iter it; 2026 struct cgroup_iter it;
2018 struct task_struct *tsk; 2027 struct task_struct *tsk;
2019 cgroup_iter_start(cgrp, &it); 2028 cgroup_iter_start(cgrp, &it);
2020 while ((tsk = cgroup_iter_next(cgrp, &it))) { 2029 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2021 if (unlikely(n == npids)) 2030 if (unlikely(n == npids))
2022 break; 2031 break;
2023 pidarray[n++] = task_pid_vnr(tsk); 2032 pid = task_pid_vnr(tsk);
2033 if (pid > 0)
2034 pidarray[n++] = pid;
2024 } 2035 }
2025 cgroup_iter_end(cgrp, &it); 2036 cgroup_iter_end(cgrp, &it);
2026 return n; 2037 return n;
@@ -2052,7 +2063,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2052 2063
2053 ret = 0; 2064 ret = 0;
2054 cgrp = dentry->d_fsdata; 2065 cgrp = dentry->d_fsdata;
2055 rcu_read_lock();
2056 2066
2057 cgroup_iter_start(cgrp, &it); 2067 cgroup_iter_start(cgrp, &it);
2058 while ((tsk = cgroup_iter_next(cgrp, &it))) { 2068 while ((tsk = cgroup_iter_next(cgrp, &it))) {
@@ -2077,7 +2087,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2077 } 2087 }
2078 cgroup_iter_end(cgrp, &it); 2088 cgroup_iter_end(cgrp, &it);
2079 2089
2080 rcu_read_unlock();
2081err: 2090err:
2082 return ret; 2091 return ret;
2083} 2092}
@@ -2324,7 +2333,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
2324 struct cgroup *cgrp) 2333 struct cgroup *cgrp)
2325{ 2334{
2326 css->cgroup = cgrp; 2335 css->cgroup = cgrp;
2327 atomic_set(&css->refcnt, 0); 2336 atomic_set(&css->refcnt, 1);
2328 css->flags = 0; 2337 css->flags = 0;
2329 if (cgrp == dummytop) 2338 if (cgrp == dummytop)
2330 set_bit(CSS_ROOT, &css->flags); 2339 set_bit(CSS_ROOT, &css->flags);
@@ -2332,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
2332 cgrp->subsys[ss->subsys_id] = css; 2341 cgrp->subsys[ss->subsys_id] = css;
2333} 2342}
2334 2343
2344static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2345{
2346 /* We need to take each hierarchy_mutex in a consistent order */
2347 int i;
2348
2349 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2350 struct cgroup_subsys *ss = subsys[i];
2351 if (ss->root == root)
2352 mutex_lock_nested(&ss->hierarchy_mutex, i);
2353 }
2354}
2355
2356static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2357{
2358 int i;
2359
2360 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2361 struct cgroup_subsys *ss = subsys[i];
2362 if (ss->root == root)
2363 mutex_unlock(&ss->hierarchy_mutex);
2364 }
2365}
2366
2335/* 2367/*
2336 * cgroup_create - create a cgroup 2368 * cgroup_create - create a cgroup
2337 * @parent: cgroup that will be parent of the new cgroup 2369 * @parent: cgroup that will be parent of the new cgroup
@@ -2380,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2380 init_cgroup_css(css, ss, cgrp); 2412 init_cgroup_css(css, ss, cgrp);
2381 } 2413 }
2382 2414
2415 cgroup_lock_hierarchy(root);
2383 list_add(&cgrp->sibling, &cgrp->parent->children); 2416 list_add(&cgrp->sibling, &cgrp->parent->children);
2417 cgroup_unlock_hierarchy(root);
2384 root->number_of_cgroups++; 2418 root->number_of_cgroups++;
2385 2419
2386 err = cgroup_create_dir(cgrp, dentry, mode); 2420 err = cgroup_create_dir(cgrp, dentry, mode);
@@ -2431,7 +2465,7 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
2431{ 2465{
2432 /* Check the reference count on each subsystem. Since we 2466 /* Check the reference count on each subsystem. Since we
2433 * already established that there are no tasks in the 2467 * already established that there are no tasks in the
2434 * cgroup, if the css refcount is also 0, then there should 2468 * cgroup, if the css refcount is also 1, then there should
2435 * be no outstanding references, so the subsystem is safe to 2469 * be no outstanding references, so the subsystem is safe to
2436 * destroy. We scan across all subsystems rather than using 2470 * destroy. We scan across all subsystems rather than using
2437 * the per-hierarchy linked list of mounted subsystems since 2471 * the per-hierarchy linked list of mounted subsystems since
@@ -2452,19 +2486,67 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
2452 * matter, since it can only happen if the cgroup 2486 * matter, since it can only happen if the cgroup
2453 * has been deleted and hence no longer needs the 2487 * has been deleted and hence no longer needs the
2454 * release agent to be called anyway. */ 2488 * release agent to be called anyway. */
2455 if (css && atomic_read(&css->refcnt)) 2489 if (css && (atomic_read(&css->refcnt) > 1))
2456 return 1; 2490 return 1;
2457 } 2491 }
2458 return 0; 2492 return 0;
2459} 2493}
2460 2494
2495/*
2496 * Atomically mark all (or else none) of the cgroup's CSS objects as
2497 * CSS_REMOVED. Return true on success, or false if the cgroup has
2498 * busy subsystems. Call with cgroup_mutex held
2499 */
2500
2501static int cgroup_clear_css_refs(struct cgroup *cgrp)
2502{
2503 struct cgroup_subsys *ss;
2504 unsigned long flags;
2505 bool failed = false;
2506 local_irq_save(flags);
2507 for_each_subsys(cgrp->root, ss) {
2508 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2509 int refcnt;
2510 do {
2511 /* We can only remove a CSS with a refcnt==1 */
2512 refcnt = atomic_read(&css->refcnt);
2513 if (refcnt > 1) {
2514 failed = true;
2515 goto done;
2516 }
2517 BUG_ON(!refcnt);
2518 /*
2519 * Drop the refcnt to 0 while we check other
2520 * subsystems. This will cause any racing
2521 * css_tryget() to spin until we set the
2522 * CSS_REMOVED bits or abort
2523 */
2524 } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
2525 }
2526 done:
2527 for_each_subsys(cgrp->root, ss) {
2528 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2529 if (failed) {
2530 /*
2531 * Restore old refcnt if we previously managed
2532 * to clear it from 1 to 0
2533 */
2534 if (!atomic_read(&css->refcnt))
2535 atomic_set(&css->refcnt, 1);
2536 } else {
2537 /* Commit the fact that the CSS is removed */
2538 set_bit(CSS_REMOVED, &css->flags);
2539 }
2540 }
2541 local_irq_restore(flags);
2542 return !failed;
2543}
2544
2461static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) 2545static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2462{ 2546{
2463 struct cgroup *cgrp = dentry->d_fsdata; 2547 struct cgroup *cgrp = dentry->d_fsdata;
2464 struct dentry *d; 2548 struct dentry *d;
2465 struct cgroup *parent; 2549 struct cgroup *parent;
2466 struct super_block *sb;
2467 struct cgroupfs_root *root;
2468 2550
2469 /* the vfs holds both inode->i_mutex already */ 2551 /* the vfs holds both inode->i_mutex already */
2470 2552
@@ -2487,12 +2569,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2487 2569
2488 mutex_lock(&cgroup_mutex); 2570 mutex_lock(&cgroup_mutex);
2489 parent = cgrp->parent; 2571 parent = cgrp->parent;
2490 root = cgrp->root;
2491 sb = root->sb;
2492 2572
2493 if (atomic_read(&cgrp->count) 2573 if (atomic_read(&cgrp->count)
2494 || !list_empty(&cgrp->children) 2574 || !list_empty(&cgrp->children)
2495 || cgroup_has_css_refs(cgrp)) { 2575 || !cgroup_clear_css_refs(cgrp)) {
2496 mutex_unlock(&cgroup_mutex); 2576 mutex_unlock(&cgroup_mutex);
2497 return -EBUSY; 2577 return -EBUSY;
2498 } 2578 }
@@ -2502,8 +2582,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2502 if (!list_empty(&cgrp->release_list)) 2582 if (!list_empty(&cgrp->release_list))
2503 list_del(&cgrp->release_list); 2583 list_del(&cgrp->release_list);
2504 spin_unlock(&release_list_lock); 2584 spin_unlock(&release_list_lock);
2505 /* delete my sibling from parent->children */ 2585
2586 cgroup_lock_hierarchy(cgrp->root);
2587 /* delete this cgroup from parent->children */
2506 list_del(&cgrp->sibling); 2588 list_del(&cgrp->sibling);
2589 cgroup_unlock_hierarchy(cgrp->root);
2590
2507 spin_lock(&cgrp->dentry->d_lock); 2591 spin_lock(&cgrp->dentry->d_lock);
2508 d = dget(cgrp->dentry); 2592 d = dget(cgrp->dentry);
2509 spin_unlock(&d->d_lock); 2593 spin_unlock(&d->d_lock);
@@ -2525,6 +2609,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2525 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); 2609 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
2526 2610
2527 /* Create the top cgroup state for this subsystem */ 2611 /* Create the top cgroup state for this subsystem */
2612 list_add(&ss->sibling, &rootnode.subsys_list);
2528 ss->root = &rootnode; 2613 ss->root = &rootnode;
2529 css = ss->create(ss, dummytop); 2614 css = ss->create(ss, dummytop);
2530 /* We don't handle early failures gracefully */ 2615 /* We don't handle early failures gracefully */
@@ -2544,6 +2629,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2544 * need to invoke fork callbacks here. */ 2629 * need to invoke fork callbacks here. */
2545 BUG_ON(!list_empty(&init_task.tasks)); 2630 BUG_ON(!list_empty(&init_task.tasks));
2546 2631
2632 mutex_init(&ss->hierarchy_mutex);
2547 ss->active = 1; 2633 ss->active = 1;
2548} 2634}
2549 2635
@@ -2562,7 +2648,6 @@ int __init cgroup_init_early(void)
2562 INIT_HLIST_NODE(&init_css_set.hlist); 2648 INIT_HLIST_NODE(&init_css_set.hlist);
2563 css_set_count = 1; 2649 css_set_count = 1;
2564 init_cgroup_root(&rootnode); 2650 init_cgroup_root(&rootnode);
2565 list_add(&rootnode.root_list, &roots);
2566 root_count = 1; 2651 root_count = 1;
2567 init_task.cgroups = &init_css_set; 2652 init_task.cgroups = &init_css_set;
2568 2653
@@ -2669,15 +2754,12 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
2669 2754
2670 mutex_lock(&cgroup_mutex); 2755 mutex_lock(&cgroup_mutex);
2671 2756
2672 for_each_root(root) { 2757 for_each_active_root(root) {
2673 struct cgroup_subsys *ss; 2758 struct cgroup_subsys *ss;
2674 struct cgroup *cgrp; 2759 struct cgroup *cgrp;
2675 int subsys_id; 2760 int subsys_id;
2676 int count = 0; 2761 int count = 0;
2677 2762
2678 /* Skip this hierarchy if it has no active subsystems */
2679 if (!root->actual_subsys_bits)
2680 continue;
2681 seq_printf(m, "%lu:", root->subsys_bits); 2763 seq_printf(m, "%lu:", root->subsys_bits);
2682 for_each_subsys(root, ss) 2764 for_each_subsys(root, ss)
2683 seq_printf(m, "%s%s", count++ ? "," : "", ss->name); 2765 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
@@ -2800,8 +2882,10 @@ void cgroup_post_fork(struct task_struct *child)
2800{ 2882{
2801 if (use_task_css_set_links) { 2883 if (use_task_css_set_links) {
2802 write_lock(&css_set_lock); 2884 write_lock(&css_set_lock);
2885 task_lock(child);
2803 if (list_empty(&child->cg_list)) 2886 if (list_empty(&child->cg_list))
2804 list_add(&child->cg_list, &child->cgroups->tasks); 2887 list_add(&child->cg_list, &child->cgroups->tasks);
2888 task_unlock(child);
2805 write_unlock(&css_set_lock); 2889 write_unlock(&css_set_lock);
2806 } 2890 }
2807} 2891}
@@ -2907,6 +2991,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2907 mutex_unlock(&cgroup_mutex); 2991 mutex_unlock(&cgroup_mutex);
2908 return 0; 2992 return 0;
2909 } 2993 }
2994 task_lock(tsk);
2910 cg = tsk->cgroups; 2995 cg = tsk->cgroups;
2911 parent = task_cgroup(tsk, subsys->subsys_id); 2996 parent = task_cgroup(tsk, subsys->subsys_id);
2912 2997
@@ -2919,6 +3004,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2919 3004
2920 /* Keep the cgroup alive */ 3005 /* Keep the cgroup alive */
2921 get_css_set(cg); 3006 get_css_set(cg);
3007 task_unlock(tsk);
2922 mutex_unlock(&cgroup_mutex); 3008 mutex_unlock(&cgroup_mutex);
2923 3009
2924 /* Now do the VFS work to create a cgroup */ 3010 /* Now do the VFS work to create a cgroup */
@@ -2937,7 +3023,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2937 } 3023 }
2938 3024
2939 /* Create the cgroup directory, which also creates the cgroup */ 3025 /* Create the cgroup directory, which also creates the cgroup */
2940 ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755); 3026 ret = vfs_mkdir(inode, dentry, 0755);
2941 child = __d_cgrp(dentry); 3027 child = __d_cgrp(dentry);
2942 dput(dentry); 3028 dput(dentry);
2943 if (ret) { 3029 if (ret) {
@@ -2947,13 +3033,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2947 goto out_release; 3033 goto out_release;
2948 } 3034 }
2949 3035
2950 if (!child) {
2951 printk(KERN_INFO
2952 "Couldn't find new cgroup %s\n", nodename);
2953 ret = -ENOMEM;
2954 goto out_release;
2955 }
2956
2957 /* The cgroup now exists. Retake cgroup_mutex and check 3036 /* The cgroup now exists. Retake cgroup_mutex and check
2958 * that we're still in the same state that we thought we 3037 * that we're still in the same state that we thought we
2959 * were. */ 3038 * were. */
@@ -3049,7 +3128,8 @@ void __css_put(struct cgroup_subsys_state *css)
3049{ 3128{
3050 struct cgroup *cgrp = css->cgroup; 3129 struct cgroup *cgrp = css->cgroup;
3051 rcu_read_lock(); 3130 rcu_read_lock();
3052 if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) { 3131 if ((atomic_dec_return(&css->refcnt) == 1) &&
3132 notify_on_release(cgrp)) {
3053 set_bit(CGRP_RELEASABLE, &cgrp->flags); 3133 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3054 check_for_release(cgrp); 3134 check_for_release(cgrp);
3055 } 3135 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 345ace5117de..647c77a88fcb 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -84,7 +84,7 @@ struct cpuset {
84 struct cgroup_subsys_state css; 84 struct cgroup_subsys_state css;
85 85
86 unsigned long flags; /* "unsigned long" so bitops work */ 86 unsigned long flags; /* "unsigned long" so bitops work */
87 cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ 87 cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
88 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ 88 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
89 89
90 struct cpuset *parent; /* my parent */ 90 struct cpuset *parent; /* my parent */
@@ -195,8 +195,6 @@ static int cpuset_mems_generation;
195 195
196static struct cpuset top_cpuset = { 196static struct cpuset top_cpuset = {
197 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), 197 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
198 .cpus_allowed = CPU_MASK_ALL,
199 .mems_allowed = NODE_MASK_ALL,
200}; 198};
201 199
202/* 200/*
@@ -278,7 +276,7 @@ static struct file_system_type cpuset_fs_type = {
278}; 276};
279 277
280/* 278/*
281 * Return in *pmask the portion of a cpusets's cpus_allowed that 279 * Return in pmask the portion of a cpusets's cpus_allowed that
282 * are online. If none are online, walk up the cpuset hierarchy 280 * are online. If none are online, walk up the cpuset hierarchy
283 * until we find one that does have some online cpus. If we get 281 * until we find one that does have some online cpus. If we get
284 * all the way to the top and still haven't found any online cpus, 282 * all the way to the top and still haven't found any online cpus,
@@ -291,15 +289,16 @@ static struct file_system_type cpuset_fs_type = {
291 * Call with callback_mutex held. 289 * Call with callback_mutex held.
292 */ 290 */
293 291
294static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) 292static void guarantee_online_cpus(const struct cpuset *cs,
293 struct cpumask *pmask)
295{ 294{
296 while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map)) 295 while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
297 cs = cs->parent; 296 cs = cs->parent;
298 if (cs) 297 if (cs)
299 cpus_and(*pmask, cs->cpus_allowed, cpu_online_map); 298 cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
300 else 299 else
301 *pmask = cpu_online_map; 300 cpumask_copy(pmask, cpu_online_mask);
302 BUG_ON(!cpus_intersects(*pmask, cpu_online_map)); 301 BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
303} 302}
304 303
305/* 304/*
@@ -375,14 +374,9 @@ void cpuset_update_task_memory_state(void)
375 struct task_struct *tsk = current; 374 struct task_struct *tsk = current;
376 struct cpuset *cs; 375 struct cpuset *cs;
377 376
378 if (task_cs(tsk) == &top_cpuset) { 377 rcu_read_lock();
379 /* Don't need rcu for top_cpuset. It's never freed. */ 378 my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
380 my_cpusets_mem_gen = top_cpuset.mems_generation; 379 rcu_read_unlock();
381 } else {
382 rcu_read_lock();
383 my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
384 rcu_read_unlock();
385 }
386 380
387 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { 381 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
388 mutex_lock(&callback_mutex); 382 mutex_lock(&callback_mutex);
@@ -414,12 +408,43 @@ void cpuset_update_task_memory_state(void)
414 408
415static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) 409static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
416{ 410{
417 return cpus_subset(p->cpus_allowed, q->cpus_allowed) && 411 return cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
418 nodes_subset(p->mems_allowed, q->mems_allowed) && 412 nodes_subset(p->mems_allowed, q->mems_allowed) &&
419 is_cpu_exclusive(p) <= is_cpu_exclusive(q) && 413 is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
420 is_mem_exclusive(p) <= is_mem_exclusive(q); 414 is_mem_exclusive(p) <= is_mem_exclusive(q);
421} 415}
422 416
417/**
418 * alloc_trial_cpuset - allocate a trial cpuset
419 * @cs: the cpuset that the trial cpuset duplicates
420 */
421static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
422{
423 struct cpuset *trial;
424
425 trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
426 if (!trial)
427 return NULL;
428
429 if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
430 kfree(trial);
431 return NULL;
432 }
433 cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
434
435 return trial;
436}
437
438/**
439 * free_trial_cpuset - free the trial cpuset
440 * @trial: the trial cpuset to be freed
441 */
442static void free_trial_cpuset(struct cpuset *trial)
443{
444 free_cpumask_var(trial->cpus_allowed);
445 kfree(trial);
446}
447
423/* 448/*
424 * validate_change() - Used to validate that any proposed cpuset change 449 * validate_change() - Used to validate that any proposed cpuset change
425 * follows the structural rules for cpusets. 450 * follows the structural rules for cpusets.
@@ -469,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
469 c = cgroup_cs(cont); 494 c = cgroup_cs(cont);
470 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && 495 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
471 c != cur && 496 c != cur &&
472 cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) 497 cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
473 return -EINVAL; 498 return -EINVAL;
474 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && 499 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
475 c != cur && 500 c != cur &&
@@ -479,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
479 504
480 /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ 505 /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
481 if (cgroup_task_count(cur->css.cgroup)) { 506 if (cgroup_task_count(cur->css.cgroup)) {
482 if (cpus_empty(trial->cpus_allowed) || 507 if (cpumask_empty(trial->cpus_allowed) ||
483 nodes_empty(trial->mems_allowed)) { 508 nodes_empty(trial->mems_allowed)) {
484 return -ENOSPC; 509 return -ENOSPC;
485 } 510 }
@@ -494,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
494 */ 519 */
495static int cpusets_overlap(struct cpuset *a, struct cpuset *b) 520static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
496{ 521{
497 return cpus_intersects(a->cpus_allowed, b->cpus_allowed); 522 return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
498} 523}
499 524
500static void 525static void
@@ -519,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
519 cp = list_first_entry(&q, struct cpuset, stack_list); 544 cp = list_first_entry(&q, struct cpuset, stack_list);
520 list_del(q.next); 545 list_del(q.next);
521 546
522 if (cpus_empty(cp->cpus_allowed)) 547 if (cpumask_empty(cp->cpus_allowed))
523 continue; 548 continue;
524 549
525 if (is_sched_load_balance(cp)) 550 if (is_sched_load_balance(cp))
@@ -586,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
586 * element of the partition (one sched domain) to be passed to 611 * element of the partition (one sched domain) to be passed to
587 * partition_sched_domains(). 612 * partition_sched_domains().
588 */ 613 */
589static int generate_sched_domains(cpumask_t **domains, 614/* FIXME: see the FIXME in partition_sched_domains() */
615static int generate_sched_domains(struct cpumask **domains,
590 struct sched_domain_attr **attributes) 616 struct sched_domain_attr **attributes)
591{ 617{
592 LIST_HEAD(q); /* queue of cpusets to be scanned */ 618 LIST_HEAD(q); /* queue of cpusets to be scanned */
@@ -594,10 +620,10 @@ static int generate_sched_domains(cpumask_t **domains,
594 struct cpuset **csa; /* array of all cpuset ptrs */ 620 struct cpuset **csa; /* array of all cpuset ptrs */
595 int csn; /* how many cpuset ptrs in csa so far */ 621 int csn; /* how many cpuset ptrs in csa so far */
596 int i, j, k; /* indices for partition finding loops */ 622 int i, j, k; /* indices for partition finding loops */
597 cpumask_t *doms; /* resulting partition; i.e. sched domains */ 623 struct cpumask *doms; /* resulting partition; i.e. sched domains */
598 struct sched_domain_attr *dattr; /* attributes for custom domains */ 624 struct sched_domain_attr *dattr; /* attributes for custom domains */
599 int ndoms = 0; /* number of sched domains in result */ 625 int ndoms = 0; /* number of sched domains in result */
600 int nslot; /* next empty doms[] cpumask_t slot */ 626 int nslot; /* next empty doms[] struct cpumask slot */
601 627
602 doms = NULL; 628 doms = NULL;
603 dattr = NULL; 629 dattr = NULL;
@@ -605,7 +631,7 @@ static int generate_sched_domains(cpumask_t **domains,
605 631
606 /* Special case for the 99% of systems with one, full, sched domain */ 632 /* Special case for the 99% of systems with one, full, sched domain */
607 if (is_sched_load_balance(&top_cpuset)) { 633 if (is_sched_load_balance(&top_cpuset)) {
608 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 634 doms = kmalloc(cpumask_size(), GFP_KERNEL);
609 if (!doms) 635 if (!doms)
610 goto done; 636 goto done;
611 637
@@ -614,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains,
614 *dattr = SD_ATTR_INIT; 640 *dattr = SD_ATTR_INIT;
615 update_domain_attr_tree(dattr, &top_cpuset); 641 update_domain_attr_tree(dattr, &top_cpuset);
616 } 642 }
617 *doms = top_cpuset.cpus_allowed; 643 cpumask_copy(doms, top_cpuset.cpus_allowed);
618 644
619 ndoms = 1; 645 ndoms = 1;
620 goto done; 646 goto done;
@@ -633,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains,
633 cp = list_first_entry(&q, struct cpuset, stack_list); 659 cp = list_first_entry(&q, struct cpuset, stack_list);
634 list_del(q.next); 660 list_del(q.next);
635 661
636 if (cpus_empty(cp->cpus_allowed)) 662 if (cpumask_empty(cp->cpus_allowed))
637 continue; 663 continue;
638 664
639 /* 665 /*
@@ -684,7 +710,7 @@ restart:
684 * Now we know how many domains to create. 710 * Now we know how many domains to create.
685 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 711 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
686 */ 712 */
687 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 713 doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
688 if (!doms) 714 if (!doms)
689 goto done; 715 goto done;
690 716
@@ -696,7 +722,7 @@ restart:
696 722
697 for (nslot = 0, i = 0; i < csn; i++) { 723 for (nslot = 0, i = 0; i < csn; i++) {
698 struct cpuset *a = csa[i]; 724 struct cpuset *a = csa[i];
699 cpumask_t *dp; 725 struct cpumask *dp;
700 int apn = a->pn; 726 int apn = a->pn;
701 727
702 if (apn < 0) { 728 if (apn < 0) {
@@ -719,14 +745,14 @@ restart:
719 continue; 745 continue;
720 } 746 }
721 747
722 cpus_clear(*dp); 748 cpumask_clear(dp);
723 if (dattr) 749 if (dattr)
724 *(dattr + nslot) = SD_ATTR_INIT; 750 *(dattr + nslot) = SD_ATTR_INIT;
725 for (j = i; j < csn; j++) { 751 for (j = i; j < csn; j++) {
726 struct cpuset *b = csa[j]; 752 struct cpuset *b = csa[j];
727 753
728 if (apn == b->pn) { 754 if (apn == b->pn) {
729 cpus_or(*dp, *dp, b->cpus_allowed); 755 cpumask_or(dp, dp, b->cpus_allowed);
730 if (dattr) 756 if (dattr)
731 update_domain_attr_tree(dattr + nslot, b); 757 update_domain_attr_tree(dattr + nslot, b);
732 758
@@ -766,7 +792,7 @@ done:
766static void do_rebuild_sched_domains(struct work_struct *unused) 792static void do_rebuild_sched_domains(struct work_struct *unused)
767{ 793{
768 struct sched_domain_attr *attr; 794 struct sched_domain_attr *attr;
769 cpumask_t *doms; 795 struct cpumask *doms;
770 int ndoms; 796 int ndoms;
771 797
772 get_online_cpus(); 798 get_online_cpus();
@@ -835,7 +861,7 @@ void rebuild_sched_domains(void)
835static int cpuset_test_cpumask(struct task_struct *tsk, 861static int cpuset_test_cpumask(struct task_struct *tsk,
836 struct cgroup_scanner *scan) 862 struct cgroup_scanner *scan)
837{ 863{
838 return !cpus_equal(tsk->cpus_allowed, 864 return !cpumask_equal(&tsk->cpus_allowed,
839 (cgroup_cs(scan->cg))->cpus_allowed); 865 (cgroup_cs(scan->cg))->cpus_allowed);
840} 866}
841 867
@@ -853,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
853static void cpuset_change_cpumask(struct task_struct *tsk, 879static void cpuset_change_cpumask(struct task_struct *tsk,
854 struct cgroup_scanner *scan) 880 struct cgroup_scanner *scan)
855{ 881{
856 set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); 882 set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
857} 883}
858 884
859/** 885/**
@@ -885,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
885 * @cs: the cpuset to consider 911 * @cs: the cpuset to consider
886 * @buf: buffer of cpu numbers written to this cpuset 912 * @buf: buffer of cpu numbers written to this cpuset
887 */ 913 */
888static int update_cpumask(struct cpuset *cs, const char *buf) 914static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
915 const char *buf)
889{ 916{
890 struct ptr_heap heap; 917 struct ptr_heap heap;
891 struct cpuset trialcs;
892 int retval; 918 int retval;
893 int is_load_balanced; 919 int is_load_balanced;
894 920
@@ -896,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
896 if (cs == &top_cpuset) 922 if (cs == &top_cpuset)
897 return -EACCES; 923 return -EACCES;
898 924
899 trialcs = *cs;
900
901 /* 925 /*
902 * An empty cpus_allowed is ok only if the cpuset has no tasks. 926 * An empty cpus_allowed is ok only if the cpuset has no tasks.
903 * Since cpulist_parse() fails on an empty mask, we special case 927 * Since cpulist_parse() fails on an empty mask, we special case
@@ -905,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
905 * with tasks have cpus. 929 * with tasks have cpus.
906 */ 930 */
907 if (!*buf) { 931 if (!*buf) {
908 cpus_clear(trialcs.cpus_allowed); 932 cpumask_clear(trialcs->cpus_allowed);
909 } else { 933 } else {
910 retval = cpulist_parse(buf, &trialcs.cpus_allowed); 934 retval = cpulist_parse(buf, trialcs->cpus_allowed);
911 if (retval < 0) 935 if (retval < 0)
912 return retval; 936 return retval;
913 937
914 if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) 938 if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
915 return -EINVAL; 939 return -EINVAL;
916 } 940 }
917 retval = validate_change(cs, &trialcs); 941 retval = validate_change(cs, trialcs);
918 if (retval < 0) 942 if (retval < 0)
919 return retval; 943 return retval;
920 944
921 /* Nothing to do if the cpus didn't change */ 945 /* Nothing to do if the cpus didn't change */
922 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) 946 if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
923 return 0; 947 return 0;
924 948
925 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); 949 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
926 if (retval) 950 if (retval)
927 return retval; 951 return retval;
928 952
929 is_load_balanced = is_sched_load_balance(&trialcs); 953 is_load_balanced = is_sched_load_balance(trialcs);
930 954
931 mutex_lock(&callback_mutex); 955 mutex_lock(&callback_mutex);
932 cs->cpus_allowed = trialcs.cpus_allowed; 956 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
933 mutex_unlock(&callback_mutex); 957 mutex_unlock(&callback_mutex);
934 958
935 /* 959 /*
@@ -1017,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
1017 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ 1041 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
1018 1042
1019 fudge = 10; /* spare mmarray[] slots */ 1043 fudge = 10; /* spare mmarray[] slots */
1020 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ 1044 fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
1021 retval = -ENOMEM; 1045 retval = -ENOMEM;
1022 1046
1023 /* 1047 /*
@@ -1104,9 +1128,9 @@ done:
1104 * lock each such tasks mm->mmap_sem, scan its vma's and rebind 1128 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
1105 * their mempolicies to the cpusets new mems_allowed. 1129 * their mempolicies to the cpusets new mems_allowed.
1106 */ 1130 */
1107static int update_nodemask(struct cpuset *cs, const char *buf) 1131static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1132 const char *buf)
1108{ 1133{
1109 struct cpuset trialcs;
1110 nodemask_t oldmem; 1134 nodemask_t oldmem;
1111 int retval; 1135 int retval;
1112 1136
@@ -1117,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
1117 if (cs == &top_cpuset) 1141 if (cs == &top_cpuset)
1118 return -EACCES; 1142 return -EACCES;
1119 1143
1120 trialcs = *cs;
1121
1122 /* 1144 /*
1123 * An empty mems_allowed is ok iff there are no tasks in the cpuset. 1145 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
1124 * Since nodelist_parse() fails on an empty mask, we special case 1146 * Since nodelist_parse() fails on an empty mask, we special case
@@ -1126,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
1126 * with tasks have memory. 1148 * with tasks have memory.
1127 */ 1149 */
1128 if (!*buf) { 1150 if (!*buf) {
1129 nodes_clear(trialcs.mems_allowed); 1151 nodes_clear(trialcs->mems_allowed);
1130 } else { 1152 } else {
1131 retval = nodelist_parse(buf, trialcs.mems_allowed); 1153 retval = nodelist_parse(buf, trialcs->mems_allowed);
1132 if (retval < 0) 1154 if (retval < 0)
1133 goto done; 1155 goto done;
1134 1156
1135 if (!nodes_subset(trialcs.mems_allowed, 1157 if (!nodes_subset(trialcs->mems_allowed,
1136 node_states[N_HIGH_MEMORY])) 1158 node_states[N_HIGH_MEMORY]))
1137 return -EINVAL; 1159 return -EINVAL;
1138 } 1160 }
1139 oldmem = cs->mems_allowed; 1161 oldmem = cs->mems_allowed;
1140 if (nodes_equal(oldmem, trialcs.mems_allowed)) { 1162 if (nodes_equal(oldmem, trialcs->mems_allowed)) {
1141 retval = 0; /* Too easy - nothing to do */ 1163 retval = 0; /* Too easy - nothing to do */
1142 goto done; 1164 goto done;
1143 } 1165 }
1144 retval = validate_change(cs, &trialcs); 1166 retval = validate_change(cs, trialcs);
1145 if (retval < 0) 1167 if (retval < 0)
1146 goto done; 1168 goto done;
1147 1169
1148 mutex_lock(&callback_mutex); 1170 mutex_lock(&callback_mutex);
1149 cs->mems_allowed = trialcs.mems_allowed; 1171 cs->mems_allowed = trialcs->mems_allowed;
1150 cs->mems_generation = cpuset_mems_generation++; 1172 cs->mems_generation = cpuset_mems_generation++;
1151 mutex_unlock(&callback_mutex); 1173 mutex_unlock(&callback_mutex);
1152 1174
@@ -1167,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1167 1189
1168 if (val != cs->relax_domain_level) { 1190 if (val != cs->relax_domain_level) {
1169 cs->relax_domain_level = val; 1191 cs->relax_domain_level = val;
1170 if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) 1192 if (!cpumask_empty(cs->cpus_allowed) &&
1193 is_sched_load_balance(cs))
1171 async_rebuild_sched_domains(); 1194 async_rebuild_sched_domains();
1172 } 1195 }
1173 1196
@@ -1186,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1186static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, 1209static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1187 int turning_on) 1210 int turning_on)
1188{ 1211{
1189 struct cpuset trialcs; 1212 struct cpuset *trialcs;
1190 int err; 1213 int err;
1191 int balance_flag_changed; 1214 int balance_flag_changed;
1192 1215
1193 trialcs = *cs; 1216 trialcs = alloc_trial_cpuset(cs);
1217 if (!trialcs)
1218 return -ENOMEM;
1219
1194 if (turning_on) 1220 if (turning_on)
1195 set_bit(bit, &trialcs.flags); 1221 set_bit(bit, &trialcs->flags);
1196 else 1222 else
1197 clear_bit(bit, &trialcs.flags); 1223 clear_bit(bit, &trialcs->flags);
1198 1224
1199 err = validate_change(cs, &trialcs); 1225 err = validate_change(cs, trialcs);
1200 if (err < 0) 1226 if (err < 0)
1201 return err; 1227 goto out;
1202 1228
1203 balance_flag_changed = (is_sched_load_balance(cs) != 1229 balance_flag_changed = (is_sched_load_balance(cs) !=
1204 is_sched_load_balance(&trialcs)); 1230 is_sched_load_balance(trialcs));
1205 1231
1206 mutex_lock(&callback_mutex); 1232 mutex_lock(&callback_mutex);
1207 cs->flags = trialcs.flags; 1233 cs->flags = trialcs->flags;
1208 mutex_unlock(&callback_mutex); 1234 mutex_unlock(&callback_mutex);
1209 1235
1210 if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed) 1236 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1211 async_rebuild_sched_domains(); 1237 async_rebuild_sched_domains();
1212 1238
1213 return 0; 1239out:
1240 free_trial_cpuset(trialcs);
1241 return err;
1214} 1242}
1215 1243
1216/* 1244/*
@@ -1311,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp)
1311 return val; 1339 return val;
1312} 1340}
1313 1341
1342/* Protected by cgroup_lock */
1343static cpumask_var_t cpus_attach;
1344
1314/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ 1345/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1315static int cpuset_can_attach(struct cgroup_subsys *ss, 1346static int cpuset_can_attach(struct cgroup_subsys *ss,
1316 struct cgroup *cont, struct task_struct *tsk) 1347 struct cgroup *cont, struct task_struct *tsk)
1317{ 1348{
1318 struct cpuset *cs = cgroup_cs(cont); 1349 struct cpuset *cs = cgroup_cs(cont);
1350 int ret = 0;
1319 1351
1320 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1352 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1321 return -ENOSPC; 1353 return -ENOSPC;
1322 if (tsk->flags & PF_THREAD_BOUND) {
1323 cpumask_t mask;
1324 1354
1355 if (tsk->flags & PF_THREAD_BOUND) {
1325 mutex_lock(&callback_mutex); 1356 mutex_lock(&callback_mutex);
1326 mask = cs->cpus_allowed; 1357 if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed))
1358 ret = -EINVAL;
1327 mutex_unlock(&callback_mutex); 1359 mutex_unlock(&callback_mutex);
1328 if (!cpus_equal(tsk->cpus_allowed, mask))
1329 return -EINVAL;
1330 } 1360 }
1331 1361
1332 return security_task_setscheduler(tsk, 0, NULL); 1362 return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL);
1333} 1363}
1334 1364
1335static void cpuset_attach(struct cgroup_subsys *ss, 1365static void cpuset_attach(struct cgroup_subsys *ss,
1336 struct cgroup *cont, struct cgroup *oldcont, 1366 struct cgroup *cont, struct cgroup *oldcont,
1337 struct task_struct *tsk) 1367 struct task_struct *tsk)
1338{ 1368{
1339 cpumask_t cpus;
1340 nodemask_t from, to; 1369 nodemask_t from, to;
1341 struct mm_struct *mm; 1370 struct mm_struct *mm;
1342 struct cpuset *cs = cgroup_cs(cont); 1371 struct cpuset *cs = cgroup_cs(cont);
1343 struct cpuset *oldcs = cgroup_cs(oldcont); 1372 struct cpuset *oldcs = cgroup_cs(oldcont);
1344 int err; 1373 int err;
1345 1374
1346 mutex_lock(&callback_mutex); 1375 if (cs == &top_cpuset) {
1347 guarantee_online_cpus(cs, &cpus); 1376 cpumask_copy(cpus_attach, cpu_possible_mask);
1348 err = set_cpus_allowed_ptr(tsk, &cpus); 1377 } else {
1349 mutex_unlock(&callback_mutex); 1378 mutex_lock(&callback_mutex);
1379 guarantee_online_cpus(cs, cpus_attach);
1380 mutex_unlock(&callback_mutex);
1381 }
1382 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1350 if (err) 1383 if (err)
1351 return; 1384 return;
1352 1385
@@ -1359,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss,
1359 cpuset_migrate_mm(mm, &from, &to); 1392 cpuset_migrate_mm(mm, &from, &to);
1360 mmput(mm); 1393 mmput(mm);
1361 } 1394 }
1362
1363} 1395}
1364 1396
1365/* The various types of files and directories in a cpuset file system */ 1397/* The various types of files and directories in a cpuset file system */
@@ -1454,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1454 const char *buf) 1486 const char *buf)
1455{ 1487{
1456 int retval = 0; 1488 int retval = 0;
1489 struct cpuset *cs = cgroup_cs(cgrp);
1490 struct cpuset *trialcs;
1457 1491
1458 if (!cgroup_lock_live_group(cgrp)) 1492 if (!cgroup_lock_live_group(cgrp))
1459 return -ENODEV; 1493 return -ENODEV;
1460 1494
1495 trialcs = alloc_trial_cpuset(cs);
1496 if (!trialcs)
1497 return -ENOMEM;
1498
1461 switch (cft->private) { 1499 switch (cft->private) {
1462 case FILE_CPULIST: 1500 case FILE_CPULIST:
1463 retval = update_cpumask(cgroup_cs(cgrp), buf); 1501 retval = update_cpumask(cs, trialcs, buf);
1464 break; 1502 break;
1465 case FILE_MEMLIST: 1503 case FILE_MEMLIST:
1466 retval = update_nodemask(cgroup_cs(cgrp), buf); 1504 retval = update_nodemask(cs, trialcs, buf);
1467 break; 1505 break;
1468 default: 1506 default:
1469 retval = -EINVAL; 1507 retval = -EINVAL;
1470 break; 1508 break;
1471 } 1509 }
1510
1511 free_trial_cpuset(trialcs);
1472 cgroup_unlock(); 1512 cgroup_unlock();
1473 return retval; 1513 return retval;
1474} 1514}
@@ -1487,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1487 1527
1488static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) 1528static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1489{ 1529{
1490 cpumask_t mask; 1530 int ret;
1491 1531
1492 mutex_lock(&callback_mutex); 1532 mutex_lock(&callback_mutex);
1493 mask = cs->cpus_allowed; 1533 ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
1494 mutex_unlock(&callback_mutex); 1534 mutex_unlock(&callback_mutex);
1495 1535
1496 return cpulist_scnprintf(page, PAGE_SIZE, &mask); 1536 return ret;
1497} 1537}
1498 1538
1499static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1539static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
@@ -1729,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
1729 parent_cs = cgroup_cs(parent); 1769 parent_cs = cgroup_cs(parent);
1730 1770
1731 cs->mems_allowed = parent_cs->mems_allowed; 1771 cs->mems_allowed = parent_cs->mems_allowed;
1732 cs->cpus_allowed = parent_cs->cpus_allowed; 1772 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
1733 return; 1773 return;
1734} 1774}
1735 1775
@@ -1755,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create(
1755 cs = kmalloc(sizeof(*cs), GFP_KERNEL); 1795 cs = kmalloc(sizeof(*cs), GFP_KERNEL);
1756 if (!cs) 1796 if (!cs)
1757 return ERR_PTR(-ENOMEM); 1797 return ERR_PTR(-ENOMEM);
1798 if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
1799 kfree(cs);
1800 return ERR_PTR(-ENOMEM);
1801 }
1758 1802
1759 cpuset_update_task_memory_state(); 1803 cpuset_update_task_memory_state();
1760 cs->flags = 0; 1804 cs->flags = 0;
@@ -1763,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create(
1763 if (is_spread_slab(parent)) 1807 if (is_spread_slab(parent))
1764 set_bit(CS_SPREAD_SLAB, &cs->flags); 1808 set_bit(CS_SPREAD_SLAB, &cs->flags);
1765 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); 1809 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
1766 cpus_clear(cs->cpus_allowed); 1810 cpumask_clear(cs->cpus_allowed);
1767 nodes_clear(cs->mems_allowed); 1811 nodes_clear(cs->mems_allowed);
1768 cs->mems_generation = cpuset_mems_generation++; 1812 cs->mems_generation = cpuset_mems_generation++;
1769 fmeter_init(&cs->fmeter); 1813 fmeter_init(&cs->fmeter);
@@ -1790,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
1790 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); 1834 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
1791 1835
1792 number_of_cpusets--; 1836 number_of_cpusets--;
1837 free_cpumask_var(cs->cpus_allowed);
1793 kfree(cs); 1838 kfree(cs);
1794} 1839}
1795 1840
@@ -1813,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = {
1813 1858
1814int __init cpuset_init_early(void) 1859int __init cpuset_init_early(void)
1815{ 1860{
1861 alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
1862
1816 top_cpuset.mems_generation = cpuset_mems_generation++; 1863 top_cpuset.mems_generation = cpuset_mems_generation++;
1817 return 0; 1864 return 0;
1818} 1865}
@@ -1828,7 +1875,7 @@ int __init cpuset_init(void)
1828{ 1875{
1829 int err = 0; 1876 int err = 0;
1830 1877
1831 cpus_setall(top_cpuset.cpus_allowed); 1878 cpumask_setall(top_cpuset.cpus_allowed);
1832 nodes_setall(top_cpuset.mems_allowed); 1879 nodes_setall(top_cpuset.mems_allowed);
1833 1880
1834 fmeter_init(&top_cpuset.fmeter); 1881 fmeter_init(&top_cpuset.fmeter);
@@ -1840,6 +1887,9 @@ int __init cpuset_init(void)
1840 if (err < 0) 1887 if (err < 0)
1841 return err; 1888 return err;
1842 1889
1890 if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
1891 BUG();
1892
1843 number_of_cpusets = 1; 1893 number_of_cpusets = 1;
1844 return 0; 1894 return 0;
1845} 1895}
@@ -1914,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
1914 * has online cpus, so can't be empty). 1964 * has online cpus, so can't be empty).
1915 */ 1965 */
1916 parent = cs->parent; 1966 parent = cs->parent;
1917 while (cpus_empty(parent->cpus_allowed) || 1967 while (cpumask_empty(parent->cpus_allowed) ||
1918 nodes_empty(parent->mems_allowed)) 1968 nodes_empty(parent->mems_allowed))
1919 parent = parent->parent; 1969 parent = parent->parent;
1920 1970
@@ -1955,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
1955 } 2005 }
1956 2006
1957 /* Continue past cpusets with all cpus, mems online */ 2007 /* Continue past cpusets with all cpus, mems online */
1958 if (cpus_subset(cp->cpus_allowed, cpu_online_map) && 2008 if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
1959 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2009 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
1960 continue; 2010 continue;
1961 2011
@@ -1963,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root)
1963 2013
1964 /* Remove offline cpus and mems from this cpuset. */ 2014 /* Remove offline cpus and mems from this cpuset. */
1965 mutex_lock(&callback_mutex); 2015 mutex_lock(&callback_mutex);
1966 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); 2016 cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2017 cpu_online_mask);
1967 nodes_and(cp->mems_allowed, cp->mems_allowed, 2018 nodes_and(cp->mems_allowed, cp->mems_allowed,
1968 node_states[N_HIGH_MEMORY]); 2019 node_states[N_HIGH_MEMORY]);
1969 mutex_unlock(&callback_mutex); 2020 mutex_unlock(&callback_mutex);
1970 2021
1971 /* Move tasks from the empty cpuset to a parent */ 2022 /* Move tasks from the empty cpuset to a parent */
1972 if (cpus_empty(cp->cpus_allowed) || 2023 if (cpumask_empty(cp->cpus_allowed) ||
1973 nodes_empty(cp->mems_allowed)) 2024 nodes_empty(cp->mems_allowed))
1974 remove_tasks_in_empty_cpuset(cp); 2025 remove_tasks_in_empty_cpuset(cp);
1975 else { 2026 else {
@@ -1995,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
1995 unsigned long phase, void *unused_cpu) 2046 unsigned long phase, void *unused_cpu)
1996{ 2047{
1997 struct sched_domain_attr *attr; 2048 struct sched_domain_attr *attr;
1998 cpumask_t *doms; 2049 struct cpumask *doms;
1999 int ndoms; 2050 int ndoms;
2000 2051
2001 switch (phase) { 2052 switch (phase) {
@@ -2010,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2010 } 2061 }
2011 2062
2012 cgroup_lock(); 2063 cgroup_lock();
2013 top_cpuset.cpus_allowed = cpu_online_map; 2064 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
2014 scan_for_empty_cpusets(&top_cpuset); 2065 scan_for_empty_cpusets(&top_cpuset);
2015 ndoms = generate_sched_domains(&doms, &attr); 2066 ndoms = generate_sched_domains(&doms, &attr);
2016 cgroup_unlock(); 2067 cgroup_unlock();
@@ -2055,7 +2106,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2055 2106
2056void __init cpuset_init_smp(void) 2107void __init cpuset_init_smp(void)
2057{ 2108{
2058 top_cpuset.cpus_allowed = cpu_online_map; 2109 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
2059 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2110 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2060 2111
2061 hotcpu_notifier(cpuset_track_online_cpus, 0); 2112 hotcpu_notifier(cpuset_track_online_cpus, 0);
@@ -2065,15 +2116,15 @@ void __init cpuset_init_smp(void)
2065/** 2116/**
2066 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. 2117 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
2067 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. 2118 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
2068 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. 2119 * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
2069 * 2120 *
2070 * Description: Returns the cpumask_t cpus_allowed of the cpuset 2121 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
2071 * attached to the specified @tsk. Guaranteed to return some non-empty 2122 * attached to the specified @tsk. Guaranteed to return some non-empty
2072 * subset of cpu_online_map, even if this means going outside the 2123 * subset of cpu_online_map, even if this means going outside the
2073 * tasks cpuset. 2124 * tasks cpuset.
2074 **/ 2125 **/
2075 2126
2076void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) 2127void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2077{ 2128{
2078 mutex_lock(&callback_mutex); 2129 mutex_lock(&callback_mutex);
2079 cpuset_cpus_allowed_locked(tsk, pmask); 2130 cpuset_cpus_allowed_locked(tsk, pmask);
@@ -2084,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
2084 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. 2135 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
2085 * Must be called with callback_mutex held. 2136 * Must be called with callback_mutex held.
2086 **/ 2137 **/
2087void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask) 2138void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
2088{ 2139{
2089 task_lock(tsk); 2140 task_lock(tsk);
2090 guarantee_online_cpus(task_cs(tsk), pmask); 2141 guarantee_online_cpus(task_cs(tsk), pmask);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7b8f2a78be3d..4018308048cf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1126,12 +1126,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1126 1126
1127 if (pid != &init_struct_pid) { 1127 if (pid != &init_struct_pid) {
1128 retval = -ENOMEM; 1128 retval = -ENOMEM;
1129 pid = alloc_pid(task_active_pid_ns(p)); 1129 pid = alloc_pid(p->nsproxy->pid_ns);
1130 if (!pid) 1130 if (!pid)
1131 goto bad_fork_cleanup_io; 1131 goto bad_fork_cleanup_io;
1132 1132
1133 if (clone_flags & CLONE_NEWPID) { 1133 if (clone_flags & CLONE_NEWPID) {
1134 retval = pid_ns_prepare_proc(task_active_pid_ns(p)); 1134 retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
1135 if (retval < 0) 1135 if (retval < 0)
1136 goto bad_fork_free_pid; 1136 goto bad_fork_free_pid;
1137 } 1137 }
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 43c2111cd54d..78bc3fdac0d2 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -13,7 +13,6 @@
13 13
14struct ns_cgroup { 14struct ns_cgroup {
15 struct cgroup_subsys_state css; 15 struct cgroup_subsys_state css;
16 spinlock_t lock;
17}; 16};
18 17
19struct cgroup_subsys ns_subsys; 18struct cgroup_subsys ns_subsys;
@@ -84,7 +83,6 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
84 ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); 83 ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
85 if (!ns_cgroup) 84 if (!ns_cgroup)
86 return ERR_PTR(-ENOMEM); 85 return ERR_PTR(-ENOMEM);
87 spin_lock_init(&ns_cgroup->lock);
88 return &ns_cgroup->css; 86 return &ns_cgroup->css;
89} 87}
90 88
diff --git a/kernel/pid.c b/kernel/pid.c
index af9224cdd6c0..1b3586fe753a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -474,6 +474,12 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
474} 474}
475EXPORT_SYMBOL(task_session_nr_ns); 475EXPORT_SYMBOL(task_session_nr_ns);
476 476
477struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
478{
479 return ns_of_pid(task_pid(tsk));
480}
481EXPORT_SYMBOL_GPL(task_active_pid_ns);
482
477/* 483/*
478 * Used by proc to find the first pid that is greater than or equal to nr. 484 * Used by proc to find the first pid that is greater than or equal to nr.
479 * 485 *
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index f275c8eca772..bf8e7534c803 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -15,10 +15,11 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17 17
18void res_counter_init(struct res_counter *counter) 18void res_counter_init(struct res_counter *counter, struct res_counter *parent)
19{ 19{
20 spin_lock_init(&counter->lock); 20 spin_lock_init(&counter->lock);
21 counter->limit = (unsigned long long)LLONG_MAX; 21 counter->limit = (unsigned long long)LLONG_MAX;
22 counter->parent = parent;
22} 23}
23 24
24int res_counter_charge_locked(struct res_counter *counter, unsigned long val) 25int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
@@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
34 return 0; 35 return 0;
35} 36}
36 37
37int res_counter_charge(struct res_counter *counter, unsigned long val) 38int res_counter_charge(struct res_counter *counter, unsigned long val,
39 struct res_counter **limit_fail_at)
38{ 40{
39 int ret; 41 int ret;
40 unsigned long flags; 42 unsigned long flags;
41 43 struct res_counter *c, *u;
42 spin_lock_irqsave(&counter->lock, flags); 44
43 ret = res_counter_charge_locked(counter, val); 45 *limit_fail_at = NULL;
44 spin_unlock_irqrestore(&counter->lock, flags); 46 local_irq_save(flags);
47 for (c = counter; c != NULL; c = c->parent) {
48 spin_lock(&c->lock);
49 ret = res_counter_charge_locked(c, val);
50 spin_unlock(&c->lock);
51 if (ret < 0) {
52 *limit_fail_at = c;
53 goto undo;
54 }
55 }
56 ret = 0;
57 goto done;
58undo:
59 for (u = counter; u != c; u = u->parent) {
60 spin_lock(&u->lock);
61 res_counter_uncharge_locked(u, val);
62 spin_unlock(&u->lock);
63 }
64done:
65 local_irq_restore(flags);
45 return ret; 66 return ret;
46} 67}
47 68
@@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
56void res_counter_uncharge(struct res_counter *counter, unsigned long val) 77void res_counter_uncharge(struct res_counter *counter, unsigned long val)
57{ 78{
58 unsigned long flags; 79 unsigned long flags;
80 struct res_counter *c;
59 81
60 spin_lock_irqsave(&counter->lock, flags); 82 local_irq_save(flags);
61 res_counter_uncharge_locked(counter, val); 83 for (c = counter; c != NULL; c = c->parent) {
62 spin_unlock_irqrestore(&counter->lock, flags); 84 spin_lock(&c->lock);
85 res_counter_uncharge_locked(c, val);
86 spin_unlock(&c->lock);
87 }
88 local_irq_restore(flags);
63} 89}
64 90
65 91
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e0c0b4bc3f08..8e1352c75557 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1617,8 +1617,6 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
1617 } 1617 }
1618} 1618}
1619 1619
1620#define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0)
1621
1622/* 1620/*
1623 * Share the fairness runtime between parent and child, thus the 1621 * Share the fairness runtime between parent and child, thus the
1624 * total amount of pressure for CPU stays equal - new tasks 1622 * total amount of pressure for CPU stays equal - new tasks
diff --git a/lib/sort.c b/lib/sort.c
index 6abbaf3d5858..926d00429ed2 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -32,11 +32,11 @@ static void generic_swap(void *a, void *b, int size)
32 * @base: pointer to data to sort 32 * @base: pointer to data to sort
33 * @num: number of elements 33 * @num: number of elements
34 * @size: size of each element 34 * @size: size of each element
35 * @cmp: pointer to comparison function 35 * @cmp_func: pointer to comparison function
36 * @swap: pointer to swap function or NULL 36 * @swap_func: pointer to swap function or NULL
37 * 37 *
38 * This function does a heapsort on the given array. You may provide a 38 * This function does a heapsort on the given array. You may provide a
39 * swap function optimized to your element type. 39 * swap_func function optimized to your element type.
40 * 40 *
41 * Sorting time is O(n log n) both on average and worst-case. While 41 * Sorting time is O(n log n) both on average and worst-case. While
42 * qsort is about 20% faster on average, it suffers from exploitable 42 * qsort is about 20% faster on average, it suffers from exploitable
@@ -45,37 +45,39 @@ static void generic_swap(void *a, void *b, int size)
45 */ 45 */
46 46
47void sort(void *base, size_t num, size_t size, 47void sort(void *base, size_t num, size_t size,
48 int (*cmp)(const void *, const void *), 48 int (*cmp_func)(const void *, const void *),
49 void (*swap)(void *, void *, int size)) 49 void (*swap_func)(void *, void *, int size))
50{ 50{
51 /* pre-scale counters for performance */ 51 /* pre-scale counters for performance */
52 int i = (num/2 - 1) * size, n = num * size, c, r; 52 int i = (num/2 - 1) * size, n = num * size, c, r;
53 53
54 if (!swap) 54 if (!swap_func)
55 swap = (size == 4 ? u32_swap : generic_swap); 55 swap_func = (size == 4 ? u32_swap : generic_swap);
56 56
57 /* heapify */ 57 /* heapify */
58 for ( ; i >= 0; i -= size) { 58 for ( ; i >= 0; i -= size) {
59 for (r = i; r * 2 + size < n; r = c) { 59 for (r = i; r * 2 + size < n; r = c) {
60 c = r * 2 + size; 60 c = r * 2 + size;
61 if (c < n - size && cmp(base + c, base + c + size) < 0) 61 if (c < n - size &&
62 cmp_func(base + c, base + c + size) < 0)
62 c += size; 63 c += size;
63 if (cmp(base + r, base + c) >= 0) 64 if (cmp_func(base + r, base + c) >= 0)
64 break; 65 break;
65 swap(base + r, base + c, size); 66 swap_func(base + r, base + c, size);
66 } 67 }
67 } 68 }
68 69
69 /* sort */ 70 /* sort */
70 for (i = n - size; i > 0; i -= size) { 71 for (i = n - size; i > 0; i -= size) {
71 swap(base, base + i, size); 72 swap_func(base, base + i, size);
72 for (r = 0; r * 2 + size < i; r = c) { 73 for (r = 0; r * 2 + size < i; r = c) {
73 c = r * 2 + size; 74 c = r * 2 + size;
74 if (c < i - size && cmp(base + c, base + c + size) < 0) 75 if (c < i - size &&
76 cmp_func(base + c, base + c + size) < 0)
75 c += size; 77 c += size;
76 if (cmp(base + r, base + c) >= 0) 78 if (cmp_func(base + r, base + c) >= 0)
77 break; 79 break;
78 swap(base + r, base + c, size); 80 swap_func(base + r, base + c, size);
79 } 81 }
80 } 82 }
81} 83}
diff --git a/mm/filemap.c b/mm/filemap.c
index 2f55a1e2baf7..ceba0bd03662 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -460,7 +460,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
460 VM_BUG_ON(!PageLocked(page)); 460 VM_BUG_ON(!PageLocked(page));
461 461
462 error = mem_cgroup_cache_charge(page, current->mm, 462 error = mem_cgroup_cache_charge(page, current->mm,
463 gfp_mask & ~__GFP_HIGHMEM); 463 gfp_mask & GFP_RECLAIM_MASK);
464 if (error) 464 if (error)
465 goto out; 465 goto out;
466 466
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 51ee96545579..e2996b80601f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -21,11 +21,13 @@
21#include <linux/memcontrol.h> 21#include <linux/memcontrol.h>
22#include <linux/cgroup.h> 22#include <linux/cgroup.h>
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/pagemap.h>
24#include <linux/smp.h> 25#include <linux/smp.h>
25#include <linux/page-flags.h> 26#include <linux/page-flags.h>
26#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
27#include <linux/bit_spinlock.h> 28#include <linux/bit_spinlock.h>
28#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
30#include <linux/mutex.h>
29#include <linux/slab.h> 31#include <linux/slab.h>
30#include <linux/swap.h> 32#include <linux/swap.h>
31#include <linux/spinlock.h> 33#include <linux/spinlock.h>
@@ -34,12 +36,23 @@
34#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
35#include <linux/mm_inline.h> 37#include <linux/mm_inline.h>
36#include <linux/page_cgroup.h> 38#include <linux/page_cgroup.h>
39#include "internal.h"
37 40
38#include <asm/uaccess.h> 41#include <asm/uaccess.h>
39 42
40struct cgroup_subsys mem_cgroup_subsys __read_mostly; 43struct cgroup_subsys mem_cgroup_subsys __read_mostly;
41#define MEM_CGROUP_RECLAIM_RETRIES 5 44#define MEM_CGROUP_RECLAIM_RETRIES 5
42 45
46#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
47/* Turned on only when memory cgroup is enabled && really_do_swap_account = 0 */
48int do_swap_account __read_mostly;
49static int really_do_swap_account __initdata = 1; /* for remember boot option*/
50#else
51#define do_swap_account (0)
52#endif
53
54static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */
55
43/* 56/*
44 * Statistics for memory cgroup. 57 * Statistics for memory cgroup.
45 */ 58 */
@@ -60,7 +73,7 @@ struct mem_cgroup_stat_cpu {
60} ____cacheline_aligned_in_smp; 73} ____cacheline_aligned_in_smp;
61 74
62struct mem_cgroup_stat { 75struct mem_cgroup_stat {
63 struct mem_cgroup_stat_cpu cpustat[NR_CPUS]; 76 struct mem_cgroup_stat_cpu cpustat[0];
64}; 77};
65 78
66/* 79/*
@@ -89,9 +102,10 @@ struct mem_cgroup_per_zone {
89 /* 102 /*
90 * spin_lock to protect the per cgroup LRU 103 * spin_lock to protect the per cgroup LRU
91 */ 104 */
92 spinlock_t lru_lock;
93 struct list_head lists[NR_LRU_LISTS]; 105 struct list_head lists[NR_LRU_LISTS];
94 unsigned long count[NR_LRU_LISTS]; 106 unsigned long count[NR_LRU_LISTS];
107
108 struct zone_reclaim_stat reclaim_stat;
95}; 109};
96/* Macro for accessing counter */ 110/* Macro for accessing counter */
97#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) 111#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)])
@@ -122,44 +136,73 @@ struct mem_cgroup {
122 */ 136 */
123 struct res_counter res; 137 struct res_counter res;
124 /* 138 /*
139 * the counter to account for mem+swap usage.
140 */
141 struct res_counter memsw;
142 /*
125 * Per cgroup active and inactive list, similar to the 143 * Per cgroup active and inactive list, similar to the
126 * per zone LRU lists. 144 * per zone LRU lists.
127 */ 145 */
128 struct mem_cgroup_lru_info info; 146 struct mem_cgroup_lru_info info;
129 147
148 /*
149 protect against reclaim related member.
150 */
151 spinlock_t reclaim_param_lock;
152
130 int prev_priority; /* for recording reclaim priority */ 153 int prev_priority; /* for recording reclaim priority */
154
155 /*
156 * While reclaiming in a hiearchy, we cache the last child we
157 * reclaimed from. Protected by hierarchy_mutex
158 */
159 struct mem_cgroup *last_scanned_child;
131 /* 160 /*
132 * statistics. 161 * Should the accounting and control be hierarchical, per subtree?
162 */
163 bool use_hierarchy;
164 unsigned long last_oom_jiffies;
165 atomic_t refcnt;
166
167 unsigned int swappiness;
168
169 /*
170 * statistics. This must be placed at the end of memcg.
133 */ 171 */
134 struct mem_cgroup_stat stat; 172 struct mem_cgroup_stat stat;
135}; 173};
136static struct mem_cgroup init_mem_cgroup;
137 174
138enum charge_type { 175enum charge_type {
139 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 176 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
140 MEM_CGROUP_CHARGE_TYPE_MAPPED, 177 MEM_CGROUP_CHARGE_TYPE_MAPPED,
141 MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ 178 MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */
142 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ 179 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
180 MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */
143 NR_CHARGE_TYPE, 181 NR_CHARGE_TYPE,
144}; 182};
145 183
146/* only for here (for easy reading.) */ 184/* only for here (for easy reading.) */
147#define PCGF_CACHE (1UL << PCG_CACHE) 185#define PCGF_CACHE (1UL << PCG_CACHE)
148#define PCGF_USED (1UL << PCG_USED) 186#define PCGF_USED (1UL << PCG_USED)
149#define PCGF_ACTIVE (1UL << PCG_ACTIVE)
150#define PCGF_LOCK (1UL << PCG_LOCK) 187#define PCGF_LOCK (1UL << PCG_LOCK)
151#define PCGF_FILE (1UL << PCG_FILE)
152static const unsigned long 188static const unsigned long
153pcg_default_flags[NR_CHARGE_TYPE] = { 189pcg_default_flags[NR_CHARGE_TYPE] = {
154 PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */ 190 PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
155 PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */ 191 PCGF_USED | PCGF_LOCK, /* Anon */
156 PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */ 192 PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
157 0, /* FORCE */ 193 0, /* FORCE */
158}; 194};
159 195
160/* 196/* for encoding cft->private value on file */
161 * Always modified under lru lock. Then, not necessary to preempt_disable() 197#define _MEM (0)
162 */ 198#define _MEMSWAP (1)
199#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
200#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff)
201#define MEMFILE_ATTR(val) ((val) & 0xffff)
202
203static void mem_cgroup_get(struct mem_cgroup *mem);
204static void mem_cgroup_put(struct mem_cgroup *mem);
205
163static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 206static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
164 struct page_cgroup *pc, 207 struct page_cgroup *pc,
165 bool charge) 208 bool charge)
@@ -167,10 +210,9 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
167 int val = (charge)? 1 : -1; 210 int val = (charge)? 1 : -1;
168 struct mem_cgroup_stat *stat = &mem->stat; 211 struct mem_cgroup_stat *stat = &mem->stat;
169 struct mem_cgroup_stat_cpu *cpustat; 212 struct mem_cgroup_stat_cpu *cpustat;
213 int cpu = get_cpu();
170 214
171 VM_BUG_ON(!irqs_disabled()); 215 cpustat = &stat->cpustat[cpu];
172
173 cpustat = &stat->cpustat[smp_processor_id()];
174 if (PageCgroupCache(pc)) 216 if (PageCgroupCache(pc))
175 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val); 217 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
176 else 218 else
@@ -182,6 +224,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
182 else 224 else
183 __mem_cgroup_stat_add_safe(cpustat, 225 __mem_cgroup_stat_add_safe(cpustat,
184 MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); 226 MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
227 put_cpu();
185} 228}
186 229
187static struct mem_cgroup_per_zone * 230static struct mem_cgroup_per_zone *
@@ -197,6 +240,9 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
197 int nid = page_cgroup_nid(pc); 240 int nid = page_cgroup_nid(pc);
198 int zid = page_cgroup_zid(pc); 241 int zid = page_cgroup_zid(pc);
199 242
243 if (!mem)
244 return NULL;
245
200 return mem_cgroup_zoneinfo(mem, nid, zid); 246 return mem_cgroup_zoneinfo(mem, nid, zid);
201} 247}
202 248
@@ -236,77 +282,152 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
236 struct mem_cgroup, css); 282 struct mem_cgroup, css);
237} 283}
238 284
239static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, 285static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
240 struct page_cgroup *pc)
241{ 286{
242 int lru = LRU_BASE; 287 struct mem_cgroup *mem = NULL;
288 /*
289 * Because we have no locks, mm->owner's may be being moved to other
290 * cgroup. We use css_tryget() here even if this looks
291 * pessimistic (rather than adding locks here).
292 */
293 rcu_read_lock();
294 do {
295 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
296 if (unlikely(!mem))
297 break;
298 } while (!css_tryget(&mem->css));
299 rcu_read_unlock();
300 return mem;
301}
243 302
244 if (PageCgroupUnevictable(pc)) 303static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
245 lru = LRU_UNEVICTABLE; 304{
246 else { 305 if (!mem)
247 if (PageCgroupActive(pc)) 306 return true;
248 lru += LRU_ACTIVE; 307 return css_is_removed(&mem->css);
249 if (PageCgroupFile(pc)) 308}
250 lru += LRU_FILE;
251 }
252 309
253 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 310/*
311 * Following LRU functions are allowed to be used without PCG_LOCK.
312 * Operations are called by routine of global LRU independently from memcg.
313 * What we have to take care of here is validness of pc->mem_cgroup.
314 *
315 * Changes to pc->mem_cgroup happens when
316 * 1. charge
317 * 2. moving account
318 * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
319 * It is added to LRU before charge.
320 * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
321 * When moving account, the page is not on LRU. It's isolated.
322 */
254 323
255 mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false); 324void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
256 list_del(&pc->lru); 325{
326 struct page_cgroup *pc;
327 struct mem_cgroup *mem;
328 struct mem_cgroup_per_zone *mz;
329
330 if (mem_cgroup_disabled())
331 return;
332 pc = lookup_page_cgroup(page);
333 /* can happen while we handle swapcache. */
334 if (list_empty(&pc->lru) || !pc->mem_cgroup)
335 return;
336 /*
337 * We don't check PCG_USED bit. It's cleared when the "page" is finally
338 * removed from global LRU.
339 */
340 mz = page_cgroup_zoneinfo(pc);
341 mem = pc->mem_cgroup;
342 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
343 list_del_init(&pc->lru);
344 return;
257} 345}
258 346
259static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, 347void mem_cgroup_del_lru(struct page *page)
260 struct page_cgroup *pc)
261{ 348{
262 int lru = LRU_BASE; 349 mem_cgroup_del_lru_list(page, page_lru(page));
350}
263 351
264 if (PageCgroupUnevictable(pc)) 352void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
265 lru = LRU_UNEVICTABLE; 353{
266 else { 354 struct mem_cgroup_per_zone *mz;
267 if (PageCgroupActive(pc)) 355 struct page_cgroup *pc;
268 lru += LRU_ACTIVE;
269 if (PageCgroupFile(pc))
270 lru += LRU_FILE;
271 }
272 356
273 MEM_CGROUP_ZSTAT(mz, lru) += 1; 357 if (mem_cgroup_disabled())
274 list_add(&pc->lru, &mz->lists[lru]); 358 return;
275 359
276 mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true); 360 pc = lookup_page_cgroup(page);
361 smp_rmb();
362 /* unused page is not rotated. */
363 if (!PageCgroupUsed(pc))
364 return;
365 mz = page_cgroup_zoneinfo(pc);
366 list_move(&pc->lru, &mz->lists[lru]);
277} 367}
278 368
279static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru) 369void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
280{ 370{
281 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); 371 struct page_cgroup *pc;
282 int active = PageCgroupActive(pc); 372 struct mem_cgroup_per_zone *mz;
283 int file = PageCgroupFile(pc);
284 int unevictable = PageCgroupUnevictable(pc);
285 enum lru_list from = unevictable ? LRU_UNEVICTABLE :
286 (LRU_FILE * !!file + !!active);
287 373
288 if (lru == from) 374 if (mem_cgroup_disabled())
375 return;
376 pc = lookup_page_cgroup(page);
377 /* barrier to sync with "charge" */
378 smp_rmb();
379 if (!PageCgroupUsed(pc))
289 return; 380 return;
290 381
291 MEM_CGROUP_ZSTAT(mz, from) -= 1; 382 mz = page_cgroup_zoneinfo(pc);
383 MEM_CGROUP_ZSTAT(mz, lru) += 1;
384 list_add(&pc->lru, &mz->lists[lru]);
385}
386
387/*
388 * At handling SwapCache, pc->mem_cgroup may be changed while it's linked to
389 * lru because the page may.be reused after it's fully uncharged (because of
390 * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge
391 * it again. This function is only used to charge SwapCache. It's done under
392 * lock_page and expected that zone->lru_lock is never held.
393 */
394static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page)
395{
396 unsigned long flags;
397 struct zone *zone = page_zone(page);
398 struct page_cgroup *pc = lookup_page_cgroup(page);
399
400 spin_lock_irqsave(&zone->lru_lock, flags);
292 /* 401 /*
293 * However this is done under mz->lru_lock, another flags, which 402 * Forget old LRU when this page_cgroup is *not* used. This Used bit
294 * are not related to LRU, will be modified from out-of-lock. 403 * is guarded by lock_page() because the page is SwapCache.
295 * We have to use atomic set/clear flags.
296 */ 404 */
297 if (is_unevictable_lru(lru)) { 405 if (!PageCgroupUsed(pc))
298 ClearPageCgroupActive(pc); 406 mem_cgroup_del_lru_list(page, page_lru(page));
299 SetPageCgroupUnevictable(pc); 407 spin_unlock_irqrestore(&zone->lru_lock, flags);
300 } else { 408}
301 if (is_active_lru(lru))
302 SetPageCgroupActive(pc);
303 else
304 ClearPageCgroupActive(pc);
305 ClearPageCgroupUnevictable(pc);
306 }
307 409
308 MEM_CGROUP_ZSTAT(mz, lru) += 1; 410static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
309 list_move(&pc->lru, &mz->lists[lru]); 411{
412 unsigned long flags;
413 struct zone *zone = page_zone(page);
414 struct page_cgroup *pc = lookup_page_cgroup(page);
415
416 spin_lock_irqsave(&zone->lru_lock, flags);
417 /* link when the page is linked to LRU but page_cgroup isn't */
418 if (PageLRU(page) && list_empty(&pc->lru))
419 mem_cgroup_add_lru_list(page, page_lru(page));
420 spin_unlock_irqrestore(&zone->lru_lock, flags);
421}
422
423
424void mem_cgroup_move_lists(struct page *page,
425 enum lru_list from, enum lru_list to)
426{
427 if (mem_cgroup_disabled())
428 return;
429 mem_cgroup_del_lru_list(page, from);
430 mem_cgroup_add_lru_list(page, to);
310} 431}
311 432
312int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) 433int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
@@ -320,37 +441,6 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
320} 441}
321 442
322/* 443/*
323 * This routine assumes that the appropriate zone's lru lock is already held
324 */
325void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
326{
327 struct page_cgroup *pc;
328 struct mem_cgroup_per_zone *mz;
329 unsigned long flags;
330
331 if (mem_cgroup_subsys.disabled)
332 return;
333
334 /*
335 * We cannot lock_page_cgroup while holding zone's lru_lock,
336 * because other holders of lock_page_cgroup can be interrupted
337 * with an attempt to rotate_reclaimable_page. But we cannot
338 * safely get to page_cgroup without it, so just try_lock it:
339 * mem_cgroup_isolate_pages allows for page left on wrong list.
340 */
341 pc = lookup_page_cgroup(page);
342 if (!trylock_page_cgroup(pc))
343 return;
344 if (pc && PageCgroupUsed(pc)) {
345 mz = page_cgroup_zoneinfo(pc);
346 spin_lock_irqsave(&mz->lru_lock, flags);
347 __mem_cgroup_move_lists(pc, lru);
348 spin_unlock_irqrestore(&mz->lru_lock, flags);
349 }
350 unlock_page_cgroup(pc);
351}
352
353/*
354 * Calculate mapped_ratio under memory controller. This will be used in 444 * Calculate mapped_ratio under memory controller. This will be used in
355 * vmscan.c for deteremining we have to reclaim mapped pages. 445 * vmscan.c for deteremining we have to reclaim mapped pages.
356 */ 446 */
@@ -372,39 +462,108 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
372 */ 462 */
373int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) 463int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
374{ 464{
375 return mem->prev_priority; 465 int prev_priority;
466
467 spin_lock(&mem->reclaim_param_lock);
468 prev_priority = mem->prev_priority;
469 spin_unlock(&mem->reclaim_param_lock);
470
471 return prev_priority;
376} 472}
377 473
378void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority) 474void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority)
379{ 475{
476 spin_lock(&mem->reclaim_param_lock);
380 if (priority < mem->prev_priority) 477 if (priority < mem->prev_priority)
381 mem->prev_priority = priority; 478 mem->prev_priority = priority;
479 spin_unlock(&mem->reclaim_param_lock);
382} 480}
383 481
384void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority) 482void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
385{ 483{
484 spin_lock(&mem->reclaim_param_lock);
386 mem->prev_priority = priority; 485 mem->prev_priority = priority;
486 spin_unlock(&mem->reclaim_param_lock);
387} 487}
388 488
389/* 489static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
390 * Calculate # of pages to be scanned in this priority/zone. 490{
391 * See also vmscan.c 491 unsigned long active;
392 * 492 unsigned long inactive;
393 * priority starts from "DEF_PRIORITY" and decremented in each loop. 493 unsigned long gb;
394 * (see include/linux/mmzone.h) 494 unsigned long inactive_ratio;
395 */ 495
496 inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON);
497 active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON);
498
499 gb = (inactive + active) >> (30 - PAGE_SHIFT);
500 if (gb)
501 inactive_ratio = int_sqrt(10 * gb);
502 else
503 inactive_ratio = 1;
504
505 if (present_pages) {
506 present_pages[0] = inactive;
507 present_pages[1] = active;
508 }
509
510 return inactive_ratio;
511}
512
513int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
514{
515 unsigned long active;
516 unsigned long inactive;
517 unsigned long present_pages[2];
518 unsigned long inactive_ratio;
396 519
397long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, 520 inactive_ratio = calc_inactive_ratio(memcg, present_pages);
398 int priority, enum lru_list lru) 521
522 inactive = present_pages[0];
523 active = present_pages[1];
524
525 if (inactive * inactive_ratio < active)
526 return 1;
527
528 return 0;
529}
530
531unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
532 struct zone *zone,
533 enum lru_list lru)
399{ 534{
400 long nr_pages;
401 int nid = zone->zone_pgdat->node_id; 535 int nid = zone->zone_pgdat->node_id;
402 int zid = zone_idx(zone); 536 int zid = zone_idx(zone);
403 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); 537 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
404 538
405 nr_pages = MEM_CGROUP_ZSTAT(mz, lru); 539 return MEM_CGROUP_ZSTAT(mz, lru);
540}
406 541
407 return (nr_pages >> priority); 542struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
543 struct zone *zone)
544{
545 int nid = zone->zone_pgdat->node_id;
546 int zid = zone_idx(zone);
547 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
548
549 return &mz->reclaim_stat;
550}
551
552struct zone_reclaim_stat *
553mem_cgroup_get_reclaim_stat_from_page(struct page *page)
554{
555 struct page_cgroup *pc;
556 struct mem_cgroup_per_zone *mz;
557
558 if (mem_cgroup_disabled())
559 return NULL;
560
561 pc = lookup_page_cgroup(page);
562 mz = page_cgroup_zoneinfo(pc);
563 if (!mz)
564 return NULL;
565
566 return &mz->reclaim_stat;
408} 567}
409 568
410unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 569unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -429,95 +588,281 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
429 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); 588 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
430 src = &mz->lists[lru]; 589 src = &mz->lists[lru];
431 590
432 spin_lock(&mz->lru_lock);
433 scan = 0; 591 scan = 0;
434 list_for_each_entry_safe_reverse(pc, tmp, src, lru) { 592 list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
435 if (scan >= nr_to_scan) 593 if (scan >= nr_to_scan)
436 break; 594 break;
595
596 page = pc->page;
437 if (unlikely(!PageCgroupUsed(pc))) 597 if (unlikely(!PageCgroupUsed(pc)))
438 continue; 598 continue;
439 page = pc->page;
440
441 if (unlikely(!PageLRU(page))) 599 if (unlikely(!PageLRU(page)))
442 continue; 600 continue;
443 601
444 /*
445 * TODO: play better with lumpy reclaim, grabbing anything.
446 */
447 if (PageUnevictable(page) ||
448 (PageActive(page) && !active) ||
449 (!PageActive(page) && active)) {
450 __mem_cgroup_move_lists(pc, page_lru(page));
451 continue;
452 }
453
454 scan++; 602 scan++;
455 list_move(&pc->lru, &pc_list);
456
457 if (__isolate_lru_page(page, mode, file) == 0) { 603 if (__isolate_lru_page(page, mode, file) == 0) {
458 list_move(&page->lru, dst); 604 list_move(&page->lru, dst);
459 nr_taken++; 605 nr_taken++;
460 } 606 }
461 } 607 }
462 608
463 list_splice(&pc_list, src);
464 spin_unlock(&mz->lru_lock);
465
466 *scanned = scan; 609 *scanned = scan;
467 return nr_taken; 610 return nr_taken;
468} 611}
469 612
613#define mem_cgroup_from_res_counter(counter, member) \
614 container_of(counter, struct mem_cgroup, member)
615
470/* 616/*
471 * Charge the memory controller for page usage. 617 * This routine finds the DFS walk successor. This routine should be
472 * Return 618 * called with hierarchy_mutex held
473 * 0 if the charge was successful
474 * < 0 if the cgroup is over its limit
475 */ 619 */
476static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 620static struct mem_cgroup *
477 gfp_t gfp_mask, enum charge_type ctype, 621mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
478 struct mem_cgroup *memcg)
479{ 622{
623 struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
624
625 curr_cgroup = curr->css.cgroup;
626 root_cgroup = root_mem->css.cgroup;
627
628 if (!list_empty(&curr_cgroup->children)) {
629 /*
630 * Walk down to children
631 */
632 mem_cgroup_put(curr);
633 cgroup = list_entry(curr_cgroup->children.next,
634 struct cgroup, sibling);
635 curr = mem_cgroup_from_cont(cgroup);
636 mem_cgroup_get(curr);
637 goto done;
638 }
639
640visit_parent:
641 if (curr_cgroup == root_cgroup) {
642 mem_cgroup_put(curr);
643 curr = root_mem;
644 mem_cgroup_get(curr);
645 goto done;
646 }
647
648 /*
649 * Goto next sibling
650 */
651 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
652 mem_cgroup_put(curr);
653 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
654 sibling);
655 curr = mem_cgroup_from_cont(cgroup);
656 mem_cgroup_get(curr);
657 goto done;
658 }
659
660 /*
661 * Go up to next parent and next parent's sibling if need be
662 */
663 curr_cgroup = curr_cgroup->parent;
664 goto visit_parent;
665
666done:
667 root_mem->last_scanned_child = curr;
668 return curr;
669}
670
671/*
672 * Visit the first child (need not be the first child as per the ordering
673 * of the cgroup list, since we track last_scanned_child) of @mem and use
674 * that to reclaim free pages from.
675 */
676static struct mem_cgroup *
677mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
678{
679 struct cgroup *cgroup;
680 struct mem_cgroup *ret;
681 bool obsolete;
682
683 obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
684
685 /*
686 * Scan all children under the mem_cgroup mem
687 */
688 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
689 if (list_empty(&root_mem->css.cgroup->children)) {
690 ret = root_mem;
691 goto done;
692 }
693
694 if (!root_mem->last_scanned_child || obsolete) {
695
696 if (obsolete && root_mem->last_scanned_child)
697 mem_cgroup_put(root_mem->last_scanned_child);
698
699 cgroup = list_first_entry(&root_mem->css.cgroup->children,
700 struct cgroup, sibling);
701 ret = mem_cgroup_from_cont(cgroup);
702 mem_cgroup_get(ret);
703 } else
704 ret = mem_cgroup_get_next_node(root_mem->last_scanned_child,
705 root_mem);
706
707done:
708 root_mem->last_scanned_child = ret;
709 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
710 return ret;
711}
712
713static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
714{
715 if (do_swap_account) {
716 if (res_counter_check_under_limit(&mem->res) &&
717 res_counter_check_under_limit(&mem->memsw))
718 return true;
719 } else
720 if (res_counter_check_under_limit(&mem->res))
721 return true;
722 return false;
723}
724
725static unsigned int get_swappiness(struct mem_cgroup *memcg)
726{
727 struct cgroup *cgrp = memcg->css.cgroup;
728 unsigned int swappiness;
729
730 /* root ? */
731 if (cgrp->parent == NULL)
732 return vm_swappiness;
733
734 spin_lock(&memcg->reclaim_param_lock);
735 swappiness = memcg->swappiness;
736 spin_unlock(&memcg->reclaim_param_lock);
737
738 return swappiness;
739}
740
741/*
742 * Dance down the hierarchy if needed to reclaim memory. We remember the
743 * last child we reclaimed from, so that we don't end up penalizing
744 * one child extensively based on its position in the children list.
745 *
746 * root_mem is the original ancestor that we've been reclaim from.
747 */
748static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
749 gfp_t gfp_mask, bool noswap)
750{
751 struct mem_cgroup *next_mem;
752 int ret = 0;
753
754 /*
755 * Reclaim unconditionally and don't check for return value.
756 * We need to reclaim in the current group and down the tree.
757 * One might think about checking for children before reclaiming,
758 * but there might be left over accounting, even after children
759 * have left.
760 */
761 ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
762 get_swappiness(root_mem));
763 if (mem_cgroup_check_under_limit(root_mem))
764 return 0;
765 if (!root_mem->use_hierarchy)
766 return ret;
767
768 next_mem = mem_cgroup_get_first_node(root_mem);
769
770 while (next_mem != root_mem) {
771 if (mem_cgroup_is_obsolete(next_mem)) {
772 mem_cgroup_put(next_mem);
773 next_mem = mem_cgroup_get_first_node(root_mem);
774 continue;
775 }
776 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
777 get_swappiness(next_mem));
778 if (mem_cgroup_check_under_limit(root_mem))
779 return 0;
780 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
781 next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
782 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
783 }
784 return ret;
785}
786
787bool mem_cgroup_oom_called(struct task_struct *task)
788{
789 bool ret = false;
480 struct mem_cgroup *mem; 790 struct mem_cgroup *mem;
481 struct page_cgroup *pc; 791 struct mm_struct *mm;
482 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
483 struct mem_cgroup_per_zone *mz;
484 unsigned long flags;
485 792
486 pc = lookup_page_cgroup(page); 793 rcu_read_lock();
487 /* can happen at boot */ 794 mm = task->mm;
488 if (unlikely(!pc)) 795 if (!mm)
796 mm = &init_mm;
797 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
798 if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10))
799 ret = true;
800 rcu_read_unlock();
801 return ret;
802}
803/*
804 * Unlike exported interface, "oom" parameter is added. if oom==true,
805 * oom-killer can be invoked.
806 */
807static int __mem_cgroup_try_charge(struct mm_struct *mm,
808 gfp_t gfp_mask, struct mem_cgroup **memcg,
809 bool oom)
810{
811 struct mem_cgroup *mem, *mem_over_limit;
812 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
813 struct res_counter *fail_res;
814
815 if (unlikely(test_thread_flag(TIF_MEMDIE))) {
816 /* Don't account this! */
817 *memcg = NULL;
489 return 0; 818 return 0;
490 prefetchw(pc); 819 }
820
491 /* 821 /*
492 * We always charge the cgroup the mm_struct belongs to. 822 * We always charge the cgroup the mm_struct belongs to.
493 * The mm_struct's mem_cgroup changes on task migration if the 823 * The mm_struct's mem_cgroup changes on task migration if the
494 * thread group leader migrates. It's possible that mm is not 824 * thread group leader migrates. It's possible that mm is not
495 * set, if so charge the init_mm (happens for pagecache usage). 825 * set, if so charge the init_mm (happens for pagecache usage).
496 */ 826 */
497 827 mem = *memcg;
498 if (likely(!memcg)) { 828 if (likely(!mem)) {
499 rcu_read_lock(); 829 mem = try_get_mem_cgroup_from_mm(mm);
500 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 830 *memcg = mem;
501 if (unlikely(!mem)) {
502 rcu_read_unlock();
503 return 0;
504 }
505 /*
506 * For every charge from the cgroup, increment reference count
507 */
508 css_get(&mem->css);
509 rcu_read_unlock();
510 } else { 831 } else {
511 mem = memcg; 832 css_get(&mem->css);
512 css_get(&memcg->css);
513 } 833 }
834 if (unlikely(!mem))
835 return 0;
836
837 VM_BUG_ON(mem_cgroup_is_obsolete(mem));
838
839 while (1) {
840 int ret;
841 bool noswap = false;
842
843 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
844 if (likely(!ret)) {
845 if (!do_swap_account)
846 break;
847 ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
848 &fail_res);
849 if (likely(!ret))
850 break;
851 /* mem+swap counter fails */
852 res_counter_uncharge(&mem->res, PAGE_SIZE);
853 noswap = true;
854 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
855 memsw);
856 } else
857 /* mem counter fails */
858 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
859 res);
514 860
515 while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
516 if (!(gfp_mask & __GFP_WAIT)) 861 if (!(gfp_mask & __GFP_WAIT))
517 goto out; 862 goto nomem;
518 863
519 if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) 864 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
520 continue; 865 noswap);
521 866
522 /* 867 /*
523 * try_to_free_mem_cgroup_pages() might not give us a full 868 * try_to_free_mem_cgroup_pages() might not give us a full
@@ -525,49 +870,214 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
525 * moved to swap cache or just unmapped from the cgroup. 870 * moved to swap cache or just unmapped from the cgroup.
526 * Check the limit again to see if the reclaim reduced the 871 * Check the limit again to see if the reclaim reduced the
527 * current usage of the cgroup before giving up 872 * current usage of the cgroup before giving up
873 *
528 */ 874 */
529 if (res_counter_check_under_limit(&mem->res)) 875 if (mem_cgroup_check_under_limit(mem_over_limit))
530 continue; 876 continue;
531 877
532 if (!nr_retries--) { 878 if (!nr_retries--) {
533 mem_cgroup_out_of_memory(mem, gfp_mask); 879 if (oom) {
534 goto out; 880 mutex_lock(&memcg_tasklist);
881 mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
882 mutex_unlock(&memcg_tasklist);
883 mem_over_limit->last_oom_jiffies = jiffies;
884 }
885 goto nomem;
535 } 886 }
536 } 887 }
888 return 0;
889nomem:
890 css_put(&mem->css);
891 return -ENOMEM;
892}
537 893
894static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
895{
896 struct mem_cgroup *mem;
897 swp_entry_t ent;
898
899 if (!PageSwapCache(page))
900 return NULL;
901
902 ent.val = page_private(page);
903 mem = lookup_swap_cgroup(ent);
904 if (!mem)
905 return NULL;
906 if (!css_tryget(&mem->css))
907 return NULL;
908 return mem;
909}
910
911/*
912 * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
913 * USED state. If already USED, uncharge and return.
914 */
915
916static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
917 struct page_cgroup *pc,
918 enum charge_type ctype)
919{
920 /* try_charge() can return NULL to *memcg, taking care of it. */
921 if (!mem)
922 return;
538 923
539 lock_page_cgroup(pc); 924 lock_page_cgroup(pc);
540 if (unlikely(PageCgroupUsed(pc))) { 925 if (unlikely(PageCgroupUsed(pc))) {
541 unlock_page_cgroup(pc); 926 unlock_page_cgroup(pc);
542 res_counter_uncharge(&mem->res, PAGE_SIZE); 927 res_counter_uncharge(&mem->res, PAGE_SIZE);
928 if (do_swap_account)
929 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
543 css_put(&mem->css); 930 css_put(&mem->css);
544 931 return;
545 goto done;
546 } 932 }
547 pc->mem_cgroup = mem; 933 pc->mem_cgroup = mem;
548 /* 934 smp_wmb();
549 * If a page is accounted as a page cache, insert to inactive list.
550 * If anon, insert to active list.
551 */
552 pc->flags = pcg_default_flags[ctype]; 935 pc->flags = pcg_default_flags[ctype];
553 936
554 mz = page_cgroup_zoneinfo(pc); 937 mem_cgroup_charge_statistics(mem, pc, true);
555 938
556 spin_lock_irqsave(&mz->lru_lock, flags);
557 __mem_cgroup_add_list(mz, pc);
558 spin_unlock_irqrestore(&mz->lru_lock, flags);
559 unlock_page_cgroup(pc); 939 unlock_page_cgroup(pc);
940}
560 941
561done: 942/**
562 return 0; 943 * mem_cgroup_move_account - move account of the page
944 * @pc: page_cgroup of the page.
945 * @from: mem_cgroup which the page is moved from.
946 * @to: mem_cgroup which the page is moved to. @from != @to.
947 *
948 * The caller must confirm following.
949 * - page is not on LRU (isolate_page() is useful.)
950 *
951 * returns 0 at success,
952 * returns -EBUSY when lock is busy or "pc" is unstable.
953 *
954 * This function does "uncharge" from old cgroup but doesn't do "charge" to
955 * new cgroup. It should be done by a caller.
956 */
957
958static int mem_cgroup_move_account(struct page_cgroup *pc,
959 struct mem_cgroup *from, struct mem_cgroup *to)
960{
961 struct mem_cgroup_per_zone *from_mz, *to_mz;
962 int nid, zid;
963 int ret = -EBUSY;
964
965 VM_BUG_ON(from == to);
966 VM_BUG_ON(PageLRU(pc->page));
967
968 nid = page_cgroup_nid(pc);
969 zid = page_cgroup_zid(pc);
970 from_mz = mem_cgroup_zoneinfo(from, nid, zid);
971 to_mz = mem_cgroup_zoneinfo(to, nid, zid);
972
973 if (!trylock_page_cgroup(pc))
974 return ret;
975
976 if (!PageCgroupUsed(pc))
977 goto out;
978
979 if (pc->mem_cgroup != from)
980 goto out;
981
982 css_put(&from->css);
983 res_counter_uncharge(&from->res, PAGE_SIZE);
984 mem_cgroup_charge_statistics(from, pc, false);
985 if (do_swap_account)
986 res_counter_uncharge(&from->memsw, PAGE_SIZE);
987 pc->mem_cgroup = to;
988 mem_cgroup_charge_statistics(to, pc, true);
989 css_get(&to->css);
990 ret = 0;
563out: 991out:
564 css_put(&mem->css); 992 unlock_page_cgroup(pc);
565 return -ENOMEM; 993 return ret;
994}
995
996/*
997 * move charges to its parent.
998 */
999
1000static int mem_cgroup_move_parent(struct page_cgroup *pc,
1001 struct mem_cgroup *child,
1002 gfp_t gfp_mask)
1003{
1004 struct page *page = pc->page;
1005 struct cgroup *cg = child->css.cgroup;
1006 struct cgroup *pcg = cg->parent;
1007 struct mem_cgroup *parent;
1008 int ret;
1009
1010 /* Is ROOT ? */
1011 if (!pcg)
1012 return -EINVAL;
1013
1014
1015 parent = mem_cgroup_from_cont(pcg);
1016
1017
1018 ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
1019 if (ret || !parent)
1020 return ret;
1021
1022 if (!get_page_unless_zero(page))
1023 return -EBUSY;
1024
1025 ret = isolate_lru_page(page);
1026
1027 if (ret)
1028 goto cancel;
1029
1030 ret = mem_cgroup_move_account(pc, child, parent);
1031
1032 /* drop extra refcnt by try_charge() (move_account increment one) */
1033 css_put(&parent->css);
1034 putback_lru_page(page);
1035 if (!ret) {
1036 put_page(page);
1037 return 0;
1038 }
1039 /* uncharge if move fails */
1040cancel:
1041 res_counter_uncharge(&parent->res, PAGE_SIZE);
1042 if (do_swap_account)
1043 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1044 put_page(page);
1045 return ret;
1046}
1047
1048/*
1049 * Charge the memory controller for page usage.
1050 * Return
1051 * 0 if the charge was successful
1052 * < 0 if the cgroup is over its limit
1053 */
1054static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
1055 gfp_t gfp_mask, enum charge_type ctype,
1056 struct mem_cgroup *memcg)
1057{
1058 struct mem_cgroup *mem;
1059 struct page_cgroup *pc;
1060 int ret;
1061
1062 pc = lookup_page_cgroup(page);
1063 /* can happen at boot */
1064 if (unlikely(!pc))
1065 return 0;
1066 prefetchw(pc);
1067
1068 mem = memcg;
1069 ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
1070 if (ret || !mem)
1071 return ret;
1072
1073 __mem_cgroup_commit_charge(mem, pc, ctype);
1074 return 0;
566} 1075}
567 1076
568int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 1077int mem_cgroup_newpage_charge(struct page *page,
1078 struct mm_struct *mm, gfp_t gfp_mask)
569{ 1079{
570 if (mem_cgroup_subsys.disabled) 1080 if (mem_cgroup_disabled())
571 return 0; 1081 return 0;
572 if (PageCompound(page)) 1082 if (PageCompound(page))
573 return 0; 1083 return 0;
@@ -589,7 +1099,10 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
589int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 1099int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
590 gfp_t gfp_mask) 1100 gfp_t gfp_mask)
591{ 1101{
592 if (mem_cgroup_subsys.disabled) 1102 struct mem_cgroup *mem = NULL;
1103 int ret;
1104
1105 if (mem_cgroup_disabled())
593 return 0; 1106 return 0;
594 if (PageCompound(page)) 1107 if (PageCompound(page))
595 return 0; 1108 return 0;
@@ -601,6 +1114,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
601 * For GFP_NOWAIT case, the page may be pre-charged before calling 1114 * For GFP_NOWAIT case, the page may be pre-charged before calling
602 * add_to_page_cache(). (See shmem.c) check it here and avoid to call 1115 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
603 * charge twice. (It works but has to pay a bit larger cost.) 1116 * charge twice. (It works but has to pay a bit larger cost.)
1117 * And when the page is SwapCache, it should take swap information
1118 * into account. This is under lock_page() now.
604 */ 1119 */
605 if (!(gfp_mask & __GFP_WAIT)) { 1120 if (!(gfp_mask & __GFP_WAIT)) {
606 struct page_cgroup *pc; 1121 struct page_cgroup *pc;
@@ -617,58 +1132,198 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
617 unlock_page_cgroup(pc); 1132 unlock_page_cgroup(pc);
618 } 1133 }
619 1134
620 if (unlikely(!mm)) 1135 if (do_swap_account && PageSwapCache(page)) {
1136 mem = try_get_mem_cgroup_from_swapcache(page);
1137 if (mem)
1138 mm = NULL;
1139 else
1140 mem = NULL;
1141 /* SwapCache may be still linked to LRU now. */
1142 mem_cgroup_lru_del_before_commit_swapcache(page);
1143 }
1144
1145 if (unlikely(!mm && !mem))
621 mm = &init_mm; 1146 mm = &init_mm;
622 1147
623 if (page_is_file_cache(page)) 1148 if (page_is_file_cache(page))
624 return mem_cgroup_charge_common(page, mm, gfp_mask, 1149 return mem_cgroup_charge_common(page, mm, gfp_mask,
625 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); 1150 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
626 else 1151
627 return mem_cgroup_charge_common(page, mm, gfp_mask, 1152 ret = mem_cgroup_charge_common(page, mm, gfp_mask,
628 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); 1153 MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
1154 if (mem)
1155 css_put(&mem->css);
1156 if (PageSwapCache(page))
1157 mem_cgroup_lru_add_after_commit_swapcache(page);
1158
1159 if (do_swap_account && !ret && PageSwapCache(page)) {
1160 swp_entry_t ent = {.val = page_private(page)};
1161 /* avoid double counting */
1162 mem = swap_cgroup_record(ent, NULL);
1163 if (mem) {
1164 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1165 mem_cgroup_put(mem);
1166 }
1167 }
1168 return ret;
1169}
1170
1171/*
1172 * While swap-in, try_charge -> commit or cancel, the page is locked.
1173 * And when try_charge() successfully returns, one refcnt to memcg without
1174 * struct page_cgroup is aquired. This refcnt will be cumsumed by
1175 * "commit()" or removed by "cancel()"
1176 */
1177int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
1178 struct page *page,
1179 gfp_t mask, struct mem_cgroup **ptr)
1180{
1181 struct mem_cgroup *mem;
1182 int ret;
1183
1184 if (mem_cgroup_disabled())
1185 return 0;
1186
1187 if (!do_swap_account)
1188 goto charge_cur_mm;
1189 /*
1190 * A racing thread's fault, or swapoff, may have already updated
1191 * the pte, and even removed page from swap cache: return success
1192 * to go on to do_swap_page()'s pte_same() test, which should fail.
1193 */
1194 if (!PageSwapCache(page))
1195 return 0;
1196 mem = try_get_mem_cgroup_from_swapcache(page);
1197 if (!mem)
1198 goto charge_cur_mm;
1199 *ptr = mem;
1200 ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
1201 /* drop extra refcnt from tryget */
1202 css_put(&mem->css);
1203 return ret;
1204charge_cur_mm:
1205 if (unlikely(!mm))
1206 mm = &init_mm;
1207 return __mem_cgroup_try_charge(mm, mask, ptr, true);
1208}
1209
1210void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
1211{
1212 struct page_cgroup *pc;
1213
1214 if (mem_cgroup_disabled())
1215 return;
1216 if (!ptr)
1217 return;
1218 pc = lookup_page_cgroup(page);
1219 mem_cgroup_lru_del_before_commit_swapcache(page);
1220 __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED);
1221 mem_cgroup_lru_add_after_commit_swapcache(page);
1222 /*
1223 * Now swap is on-memory. This means this page may be
1224 * counted both as mem and swap....double count.
1225 * Fix it by uncharging from memsw. Basically, this SwapCache is stable
1226 * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
1227 * may call delete_from_swap_cache() before reach here.
1228 */
1229 if (do_swap_account && PageSwapCache(page)) {
1230 swp_entry_t ent = {.val = page_private(page)};
1231 struct mem_cgroup *memcg;
1232 memcg = swap_cgroup_record(ent, NULL);
1233 if (memcg) {
1234 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1235 mem_cgroup_put(memcg);
1236 }
1237
1238 }
1239 /* add this page(page_cgroup) to the LRU we want. */
1240
629} 1241}
630 1242
1243void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
1244{
1245 if (mem_cgroup_disabled())
1246 return;
1247 if (!mem)
1248 return;
1249 res_counter_uncharge(&mem->res, PAGE_SIZE);
1250 if (do_swap_account)
1251 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1252 css_put(&mem->css);
1253}
1254
1255
631/* 1256/*
632 * uncharge if !page_mapped(page) 1257 * uncharge if !page_mapped(page)
633 */ 1258 */
634static void 1259static struct mem_cgroup *
635__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) 1260__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
636{ 1261{
637 struct page_cgroup *pc; 1262 struct page_cgroup *pc;
638 struct mem_cgroup *mem; 1263 struct mem_cgroup *mem = NULL;
639 struct mem_cgroup_per_zone *mz; 1264 struct mem_cgroup_per_zone *mz;
640 unsigned long flags;
641 1265
642 if (mem_cgroup_subsys.disabled) 1266 if (mem_cgroup_disabled())
643 return; 1267 return NULL;
1268
1269 if (PageSwapCache(page))
1270 return NULL;
644 1271
645 /* 1272 /*
646 * Check if our page_cgroup is valid 1273 * Check if our page_cgroup is valid
647 */ 1274 */
648 pc = lookup_page_cgroup(page); 1275 pc = lookup_page_cgroup(page);
649 if (unlikely(!pc || !PageCgroupUsed(pc))) 1276 if (unlikely(!pc || !PageCgroupUsed(pc)))
650 return; 1277 return NULL;
651 1278
652 lock_page_cgroup(pc); 1279 lock_page_cgroup(pc);
653 if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page)) 1280
654 || !PageCgroupUsed(pc)) { 1281 mem = pc->mem_cgroup;
655 /* This happens at race in zap_pte_range() and do_swap_page()*/ 1282
656 unlock_page_cgroup(pc); 1283 if (!PageCgroupUsed(pc))
657 return; 1284 goto unlock_out;
1285
1286 switch (ctype) {
1287 case MEM_CGROUP_CHARGE_TYPE_MAPPED:
1288 if (page_mapped(page))
1289 goto unlock_out;
1290 break;
1291 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
1292 if (!PageAnon(page)) { /* Shared memory */
1293 if (page->mapping && !page_is_file_cache(page))
1294 goto unlock_out;
1295 } else if (page_mapped(page)) /* Anon */
1296 goto unlock_out;
1297 break;
1298 default:
1299 break;
658 } 1300 }
1301
1302 res_counter_uncharge(&mem->res, PAGE_SIZE);
1303 if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
1304 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1305
1306 mem_cgroup_charge_statistics(mem, pc, false);
659 ClearPageCgroupUsed(pc); 1307 ClearPageCgroupUsed(pc);
660 mem = pc->mem_cgroup; 1308 /*
1309 * pc->mem_cgroup is not cleared here. It will be accessed when it's
1310 * freed from LRU. This is safe because uncharged page is expected not
1311 * to be reused (freed soon). Exception is SwapCache, it's handled by
1312 * special functions.
1313 */
661 1314
662 mz = page_cgroup_zoneinfo(pc); 1315 mz = page_cgroup_zoneinfo(pc);
663 spin_lock_irqsave(&mz->lru_lock, flags);
664 __mem_cgroup_remove_list(mz, pc);
665 spin_unlock_irqrestore(&mz->lru_lock, flags);
666 unlock_page_cgroup(pc); 1316 unlock_page_cgroup(pc);
667 1317
668 res_counter_uncharge(&mem->res, PAGE_SIZE); 1318 /* at swapout, this memcg will be accessed to record to swap */
669 css_put(&mem->css); 1319 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1320 css_put(&mem->css);
670 1321
671 return; 1322 return mem;
1323
1324unlock_out:
1325 unlock_page_cgroup(pc);
1326 return NULL;
672} 1327}
673 1328
674void mem_cgroup_uncharge_page(struct page *page) 1329void mem_cgroup_uncharge_page(struct page *page)
@@ -689,16 +1344,55 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
689} 1344}
690 1345
691/* 1346/*
692 * Before starting migration, account against new page. 1347 * called from __delete_from_swap_cache() and drop "page" account.
1348 * memcg information is recorded to swap_cgroup of "ent"
1349 */
1350void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
1351{
1352 struct mem_cgroup *memcg;
1353
1354 memcg = __mem_cgroup_uncharge_common(page,
1355 MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
1356 /* record memcg information */
1357 if (do_swap_account && memcg) {
1358 swap_cgroup_record(ent, memcg);
1359 mem_cgroup_get(memcg);
1360 }
1361 if (memcg)
1362 css_put(&memcg->css);
1363}
1364
1365#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
1366/*
1367 * called from swap_entry_free(). remove record in swap_cgroup and
1368 * uncharge "memsw" account.
693 */ 1369 */
694int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) 1370void mem_cgroup_uncharge_swap(swp_entry_t ent)
1371{
1372 struct mem_cgroup *memcg;
1373
1374 if (!do_swap_account)
1375 return;
1376
1377 memcg = swap_cgroup_record(ent, NULL);
1378 if (memcg) {
1379 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1380 mem_cgroup_put(memcg);
1381 }
1382}
1383#endif
1384
1385/*
1386 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
1387 * page belongs to.
1388 */
1389int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
695{ 1390{
696 struct page_cgroup *pc; 1391 struct page_cgroup *pc;
697 struct mem_cgroup *mem = NULL; 1392 struct mem_cgroup *mem = NULL;
698 enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
699 int ret = 0; 1393 int ret = 0;
700 1394
701 if (mem_cgroup_subsys.disabled) 1395 if (mem_cgroup_disabled())
702 return 0; 1396 return 0;
703 1397
704 pc = lookup_page_cgroup(page); 1398 pc = lookup_page_cgroup(page);
@@ -706,41 +1400,67 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
706 if (PageCgroupUsed(pc)) { 1400 if (PageCgroupUsed(pc)) {
707 mem = pc->mem_cgroup; 1401 mem = pc->mem_cgroup;
708 css_get(&mem->css); 1402 css_get(&mem->css);
709 if (PageCgroupCache(pc)) {
710 if (page_is_file_cache(page))
711 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
712 else
713 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
714 }
715 } 1403 }
716 unlock_page_cgroup(pc); 1404 unlock_page_cgroup(pc);
1405
717 if (mem) { 1406 if (mem) {
718 ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, 1407 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
719 ctype, mem);
720 css_put(&mem->css); 1408 css_put(&mem->css);
721 } 1409 }
1410 *ptr = mem;
722 return ret; 1411 return ret;
723} 1412}
724 1413
725/* remove redundant charge if migration failed*/ 1414/* remove redundant charge if migration failed*/
726void mem_cgroup_end_migration(struct page *newpage) 1415void mem_cgroup_end_migration(struct mem_cgroup *mem,
1416 struct page *oldpage, struct page *newpage)
727{ 1417{
1418 struct page *target, *unused;
1419 struct page_cgroup *pc;
1420 enum charge_type ctype;
1421
1422 if (!mem)
1423 return;
1424
1425 /* at migration success, oldpage->mapping is NULL. */
1426 if (oldpage->mapping) {
1427 target = oldpage;
1428 unused = NULL;
1429 } else {
1430 target = newpage;
1431 unused = oldpage;
1432 }
1433
1434 if (PageAnon(target))
1435 ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
1436 else if (page_is_file_cache(target))
1437 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
1438 else
1439 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
1440
1441 /* unused page is not on radix-tree now. */
1442 if (unused)
1443 __mem_cgroup_uncharge_common(unused, ctype);
1444
1445 pc = lookup_page_cgroup(target);
728 /* 1446 /*
729 * At success, page->mapping is not NULL. 1447 * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup.
730 * special rollback care is necessary when 1448 * So, double-counting is effectively avoided.
731 * 1. at migration failure. (newpage->mapping is cleared in this case)
732 * 2. the newpage was moved but not remapped again because the task
733 * exits and the newpage is obsolete. In this case, the new page
734 * may be a swapcache. So, we just call mem_cgroup_uncharge_page()
735 * always for avoiding mess. The page_cgroup will be removed if
736 * unnecessary. File cache pages is still on radix-tree. Don't
737 * care it.
738 */ 1449 */
739 if (!newpage->mapping) 1450 __mem_cgroup_commit_charge(mem, pc, ctype);
740 __mem_cgroup_uncharge_common(newpage, 1451
741 MEM_CGROUP_CHARGE_TYPE_FORCE); 1452 /*
742 else if (PageAnon(newpage)) 1453 * Both of oldpage and newpage are still under lock_page().
743 mem_cgroup_uncharge_page(newpage); 1454 * Then, we don't have to care about race in radix-tree.
1455 * But we have to be careful that this page is unmapped or not.
1456 *
1457 * There is a case for !page_mapped(). At the start of
1458 * migration, oldpage was mapped. But now, it's zapped.
1459 * But we know *target* page is not freed/reused under us.
1460 * mem_cgroup_uncharge_page() does all necessary checks.
1461 */
1462 if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
1463 mem_cgroup_uncharge_page(target);
744} 1464}
745 1465
746/* 1466/*
@@ -748,29 +1468,26 @@ void mem_cgroup_end_migration(struct page *newpage)
748 * This is typically used for page reclaiming for shmem for reducing side 1468 * This is typically used for page reclaiming for shmem for reducing side
749 * effect of page allocation from shmem, which is used by some mem_cgroup. 1469 * effect of page allocation from shmem, which is used by some mem_cgroup.
750 */ 1470 */
751int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) 1471int mem_cgroup_shrink_usage(struct page *page,
1472 struct mm_struct *mm,
1473 gfp_t gfp_mask)
752{ 1474{
753 struct mem_cgroup *mem; 1475 struct mem_cgroup *mem = NULL;
754 int progress = 0; 1476 int progress = 0;
755 int retry = MEM_CGROUP_RECLAIM_RETRIES; 1477 int retry = MEM_CGROUP_RECLAIM_RETRIES;
756 1478
757 if (mem_cgroup_subsys.disabled) 1479 if (mem_cgroup_disabled())
758 return 0; 1480 return 0;
759 if (!mm) 1481 if (page)
1482 mem = try_get_mem_cgroup_from_swapcache(page);
1483 if (!mem && mm)
1484 mem = try_get_mem_cgroup_from_mm(mm);
1485 if (unlikely(!mem))
760 return 0; 1486 return 0;
761 1487
762 rcu_read_lock();
763 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
764 if (unlikely(!mem)) {
765 rcu_read_unlock();
766 return 0;
767 }
768 css_get(&mem->css);
769 rcu_read_unlock();
770
771 do { 1488 do {
772 progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); 1489 progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true);
773 progress += res_counter_check_under_limit(&mem->res); 1490 progress += mem_cgroup_check_under_limit(mem);
774 } while (!progress && --retry); 1491 } while (!progress && --retry);
775 1492
776 css_put(&mem->css); 1493 css_put(&mem->css);
@@ -779,117 +1496,295 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
779 return 0; 1496 return 0;
780} 1497}
781 1498
1499static DEFINE_MUTEX(set_limit_mutex);
1500
782static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, 1501static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
783 unsigned long long val) 1502 unsigned long long val)
784{ 1503{
785 1504
786 int retry_count = MEM_CGROUP_RECLAIM_RETRIES; 1505 int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
787 int progress; 1506 int progress;
1507 u64 memswlimit;
788 int ret = 0; 1508 int ret = 0;
789 1509
790 while (res_counter_set_limit(&memcg->res, val)) { 1510 while (retry_count) {
791 if (signal_pending(current)) { 1511 if (signal_pending(current)) {
792 ret = -EINTR; 1512 ret = -EINTR;
793 break; 1513 break;
794 } 1514 }
795 if (!retry_count) { 1515 /*
796 ret = -EBUSY; 1516 * Rather than hide all in some function, I do this in
1517 * open coded manner. You see what this really does.
1518 * We have to guarantee mem->res.limit < mem->memsw.limit.
1519 */
1520 mutex_lock(&set_limit_mutex);
1521 memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1522 if (memswlimit < val) {
1523 ret = -EINVAL;
1524 mutex_unlock(&set_limit_mutex);
797 break; 1525 break;
798 } 1526 }
799 progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL); 1527 ret = res_counter_set_limit(&memcg->res, val);
800 if (!progress) 1528 mutex_unlock(&set_limit_mutex);
801 retry_count--; 1529
1530 if (!ret)
1531 break;
1532
1533 progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
1534 false);
1535 if (!progress) retry_count--;
802 } 1536 }
1537
803 return ret; 1538 return ret;
804} 1539}
805 1540
1541int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
1542 unsigned long long val)
1543{
1544 int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
1545 u64 memlimit, oldusage, curusage;
1546 int ret;
1547
1548 if (!do_swap_account)
1549 return -EINVAL;
1550
1551 while (retry_count) {
1552 if (signal_pending(current)) {
1553 ret = -EINTR;
1554 break;
1555 }
1556 /*
1557 * Rather than hide all in some function, I do this in
1558 * open coded manner. You see what this really does.
1559 * We have to guarantee mem->res.limit < mem->memsw.limit.
1560 */
1561 mutex_lock(&set_limit_mutex);
1562 memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
1563 if (memlimit > val) {
1564 ret = -EINVAL;
1565 mutex_unlock(&set_limit_mutex);
1566 break;
1567 }
1568 ret = res_counter_set_limit(&memcg->memsw, val);
1569 mutex_unlock(&set_limit_mutex);
1570
1571 if (!ret)
1572 break;
1573
1574 oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
1575 mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true);
1576 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
1577 if (curusage >= oldusage)
1578 retry_count--;
1579 }
1580 return ret;
1581}
806 1582
807/* 1583/*
808 * This routine traverse page_cgroup in given list and drop them all. 1584 * This routine traverse page_cgroup in given list and drop them all.
809 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 1585 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
810 */ 1586 */
811#define FORCE_UNCHARGE_BATCH (128) 1587static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
812static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, 1588 int node, int zid, enum lru_list lru)
813 struct mem_cgroup_per_zone *mz,
814 enum lru_list lru)
815{ 1589{
816 struct page_cgroup *pc; 1590 struct zone *zone;
817 struct page *page; 1591 struct mem_cgroup_per_zone *mz;
818 int count = FORCE_UNCHARGE_BATCH; 1592 struct page_cgroup *pc, *busy;
819 unsigned long flags; 1593 unsigned long flags, loop;
820 struct list_head *list; 1594 struct list_head *list;
1595 int ret = 0;
821 1596
1597 zone = &NODE_DATA(node)->node_zones[zid];
1598 mz = mem_cgroup_zoneinfo(mem, node, zid);
822 list = &mz->lists[lru]; 1599 list = &mz->lists[lru];
823 1600
824 spin_lock_irqsave(&mz->lru_lock, flags); 1601 loop = MEM_CGROUP_ZSTAT(mz, lru);
825 while (!list_empty(list)) { 1602 /* give some margin against EBUSY etc...*/
826 pc = list_entry(list->prev, struct page_cgroup, lru); 1603 loop += 256;
827 page = pc->page; 1604 busy = NULL;
828 if (!PageCgroupUsed(pc)) 1605 while (loop--) {
829 break; 1606 ret = 0;
830 get_page(page); 1607 spin_lock_irqsave(&zone->lru_lock, flags);
831 spin_unlock_irqrestore(&mz->lru_lock, flags); 1608 if (list_empty(list)) {
832 /* 1609 spin_unlock_irqrestore(&zone->lru_lock, flags);
833 * Check if this page is on LRU. !LRU page can be found
834 * if it's under page migration.
835 */
836 if (PageLRU(page)) {
837 __mem_cgroup_uncharge_common(page,
838 MEM_CGROUP_CHARGE_TYPE_FORCE);
839 put_page(page);
840 if (--count <= 0) {
841 count = FORCE_UNCHARGE_BATCH;
842 cond_resched();
843 }
844 } else {
845 spin_lock_irqsave(&mz->lru_lock, flags);
846 break; 1610 break;
847 } 1611 }
848 spin_lock_irqsave(&mz->lru_lock, flags); 1612 pc = list_entry(list->prev, struct page_cgroup, lru);
1613 if (busy == pc) {
1614 list_move(&pc->lru, list);
1615 busy = 0;
1616 spin_unlock_irqrestore(&zone->lru_lock, flags);
1617 continue;
1618 }
1619 spin_unlock_irqrestore(&zone->lru_lock, flags);
1620
1621 ret = mem_cgroup_move_parent(pc, mem, GFP_KERNEL);
1622 if (ret == -ENOMEM)
1623 break;
1624
1625 if (ret == -EBUSY || ret == -EINVAL) {
1626 /* found lock contention or "pc" is obsolete. */
1627 busy = pc;
1628 cond_resched();
1629 } else
1630 busy = NULL;
849 } 1631 }
850 spin_unlock_irqrestore(&mz->lru_lock, flags); 1632
1633 if (!ret && !list_empty(list))
1634 return -EBUSY;
1635 return ret;
851} 1636}
852 1637
853/* 1638/*
854 * make mem_cgroup's charge to be 0 if there is no task. 1639 * make mem_cgroup's charge to be 0 if there is no task.
855 * This enables deleting this mem_cgroup. 1640 * This enables deleting this mem_cgroup.
856 */ 1641 */
857static int mem_cgroup_force_empty(struct mem_cgroup *mem) 1642static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
858{ 1643{
859 int ret = -EBUSY; 1644 int ret;
860 int node, zid; 1645 int node, zid, shrink;
1646 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
1647 struct cgroup *cgrp = mem->css.cgroup;
861 1648
862 css_get(&mem->css); 1649 css_get(&mem->css);
863 /* 1650
864 * page reclaim code (kswapd etc..) will move pages between 1651 shrink = 0;
865 * active_list <-> inactive_list while we don't take a lock. 1652 /* should free all ? */
866 * So, we have to do loop here until all lists are empty. 1653 if (free_all)
867 */ 1654 goto try_to_free;
1655move_account:
868 while (mem->res.usage > 0) { 1656 while (mem->res.usage > 0) {
869 if (atomic_read(&mem->css.cgroup->count) > 0) 1657 ret = -EBUSY;
1658 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
1659 goto out;
1660 ret = -EINTR;
1661 if (signal_pending(current))
870 goto out; 1662 goto out;
871 /* This is for making all *used* pages to be on LRU. */ 1663 /* This is for making all *used* pages to be on LRU. */
872 lru_add_drain_all(); 1664 lru_add_drain_all();
873 for_each_node_state(node, N_POSSIBLE) 1665 ret = 0;
874 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 1666 for_each_node_state(node, N_POSSIBLE) {
875 struct mem_cgroup_per_zone *mz; 1667 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
876 enum lru_list l; 1668 enum lru_list l;
877 mz = mem_cgroup_zoneinfo(mem, node, zid); 1669 for_each_lru(l) {
878 for_each_lru(l) 1670 ret = mem_cgroup_force_empty_list(mem,
879 mem_cgroup_force_empty_list(mem, mz, l); 1671 node, zid, l);
1672 if (ret)
1673 break;
1674 }
880 } 1675 }
1676 if (ret)
1677 break;
1678 }
1679 /* it seems parent cgroup doesn't have enough mem */
1680 if (ret == -ENOMEM)
1681 goto try_to_free;
881 cond_resched(); 1682 cond_resched();
882 } 1683 }
883 ret = 0; 1684 ret = 0;
884out: 1685out:
885 css_put(&mem->css); 1686 css_put(&mem->css);
886 return ret; 1687 return ret;
1688
1689try_to_free:
1690 /* returns EBUSY if there is a task or if we come here twice. */
1691 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) {
1692 ret = -EBUSY;
1693 goto out;
1694 }
1695 /* we call try-to-free pages for make this cgroup empty */
1696 lru_add_drain_all();
1697 /* try to free all pages in this cgroup */
1698 shrink = 1;
1699 while (nr_retries && mem->res.usage > 0) {
1700 int progress;
1701
1702 if (signal_pending(current)) {
1703 ret = -EINTR;
1704 goto out;
1705 }
1706 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
1707 false, get_swappiness(mem));
1708 if (!progress) {
1709 nr_retries--;
1710 /* maybe some writeback is necessary */
1711 congestion_wait(WRITE, HZ/10);
1712 }
1713
1714 }
1715 lru_add_drain();
1716 /* try move_account...there may be some *locked* pages. */
1717 if (mem->res.usage)
1718 goto move_account;
1719 ret = 0;
1720 goto out;
1721}
1722
1723int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
1724{
1725 return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
1726}
1727
1728
1729static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft)
1730{
1731 return mem_cgroup_from_cont(cont)->use_hierarchy;
1732}
1733
1734static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
1735 u64 val)
1736{
1737 int retval = 0;
1738 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
1739 struct cgroup *parent = cont->parent;
1740 struct mem_cgroup *parent_mem = NULL;
1741
1742 if (parent)
1743 parent_mem = mem_cgroup_from_cont(parent);
1744
1745 cgroup_lock();
1746 /*
1747 * If parent's use_hiearchy is set, we can't make any modifications
1748 * in the child subtrees. If it is unset, then the change can
1749 * occur, provided the current cgroup has no children.
1750 *
1751 * For the root cgroup, parent_mem is NULL, we allow value to be
1752 * set if there are no children.
1753 */
1754 if ((!parent_mem || !parent_mem->use_hierarchy) &&
1755 (val == 1 || val == 0)) {
1756 if (list_empty(&cont->children))
1757 mem->use_hierarchy = val;
1758 else
1759 retval = -EBUSY;
1760 } else
1761 retval = -EINVAL;
1762 cgroup_unlock();
1763
1764 return retval;
887} 1765}
888 1766
889static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 1767static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
890{ 1768{
891 return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, 1769 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
892 cft->private); 1770 u64 val = 0;
1771 int type, name;
1772
1773 type = MEMFILE_TYPE(cft->private);
1774 name = MEMFILE_ATTR(cft->private);
1775 switch (type) {
1776 case _MEM:
1777 val = res_counter_read_u64(&mem->res, name);
1778 break;
1779 case _MEMSWAP:
1780 if (do_swap_account)
1781 val = res_counter_read_u64(&mem->memsw, name);
1782 break;
1783 default:
1784 BUG();
1785 break;
1786 }
1787 return val;
893} 1788}
894/* 1789/*
895 * The user of this function is... 1790 * The user of this function is...
@@ -899,15 +1794,22 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
899 const char *buffer) 1794 const char *buffer)
900{ 1795{
901 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 1796 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
1797 int type, name;
902 unsigned long long val; 1798 unsigned long long val;
903 int ret; 1799 int ret;
904 1800
905 switch (cft->private) { 1801 type = MEMFILE_TYPE(cft->private);
1802 name = MEMFILE_ATTR(cft->private);
1803 switch (name) {
906 case RES_LIMIT: 1804 case RES_LIMIT:
907 /* This function does all necessary parse...reuse it */ 1805 /* This function does all necessary parse...reuse it */
908 ret = res_counter_memparse_write_strategy(buffer, &val); 1806 ret = res_counter_memparse_write_strategy(buffer, &val);
909 if (!ret) 1807 if (ret)
1808 break;
1809 if (type == _MEM)
910 ret = mem_cgroup_resize_limit(memcg, val); 1810 ret = mem_cgroup_resize_limit(memcg, val);
1811 else
1812 ret = mem_cgroup_resize_memsw_limit(memcg, val);
911 break; 1813 break;
912 default: 1814 default:
913 ret = -EINVAL; /* should be BUG() ? */ 1815 ret = -EINVAL; /* should be BUG() ? */
@@ -916,27 +1818,59 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
916 return ret; 1818 return ret;
917} 1819}
918 1820
1821static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
1822 unsigned long long *mem_limit, unsigned long long *memsw_limit)
1823{
1824 struct cgroup *cgroup;
1825 unsigned long long min_limit, min_memsw_limit, tmp;
1826
1827 min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
1828 min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1829 cgroup = memcg->css.cgroup;
1830 if (!memcg->use_hierarchy)
1831 goto out;
1832
1833 while (cgroup->parent) {
1834 cgroup = cgroup->parent;
1835 memcg = mem_cgroup_from_cont(cgroup);
1836 if (!memcg->use_hierarchy)
1837 break;
1838 tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
1839 min_limit = min(min_limit, tmp);
1840 tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1841 min_memsw_limit = min(min_memsw_limit, tmp);
1842 }
1843out:
1844 *mem_limit = min_limit;
1845 *memsw_limit = min_memsw_limit;
1846 return;
1847}
1848
919static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) 1849static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
920{ 1850{
921 struct mem_cgroup *mem; 1851 struct mem_cgroup *mem;
1852 int type, name;
922 1853
923 mem = mem_cgroup_from_cont(cont); 1854 mem = mem_cgroup_from_cont(cont);
924 switch (event) { 1855 type = MEMFILE_TYPE(event);
1856 name = MEMFILE_ATTR(event);
1857 switch (name) {
925 case RES_MAX_USAGE: 1858 case RES_MAX_USAGE:
926 res_counter_reset_max(&mem->res); 1859 if (type == _MEM)
1860 res_counter_reset_max(&mem->res);
1861 else
1862 res_counter_reset_max(&mem->memsw);
927 break; 1863 break;
928 case RES_FAILCNT: 1864 case RES_FAILCNT:
929 res_counter_reset_failcnt(&mem->res); 1865 if (type == _MEM)
1866 res_counter_reset_failcnt(&mem->res);
1867 else
1868 res_counter_reset_failcnt(&mem->memsw);
930 break; 1869 break;
931 } 1870 }
932 return 0; 1871 return 0;
933} 1872}
934 1873
935static int mem_force_empty_write(struct cgroup *cont, unsigned int event)
936{
937 return mem_cgroup_force_empty(mem_cgroup_from_cont(cont));
938}
939
940static const struct mem_cgroup_stat_desc { 1874static const struct mem_cgroup_stat_desc {
941 const char *msg; 1875 const char *msg;
942 u64 unit; 1876 u64 unit;
@@ -985,43 +1919,163 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
985 cb->fill(cb, "unevictable", unevictable * PAGE_SIZE); 1919 cb->fill(cb, "unevictable", unevictable * PAGE_SIZE);
986 1920
987 } 1921 }
1922 {
1923 unsigned long long limit, memsw_limit;
1924 memcg_get_hierarchical_limit(mem_cont, &limit, &memsw_limit);
1925 cb->fill(cb, "hierarchical_memory_limit", limit);
1926 if (do_swap_account)
1927 cb->fill(cb, "hierarchical_memsw_limit", memsw_limit);
1928 }
1929
1930#ifdef CONFIG_DEBUG_VM
1931 cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
1932
1933 {
1934 int nid, zid;
1935 struct mem_cgroup_per_zone *mz;
1936 unsigned long recent_rotated[2] = {0, 0};
1937 unsigned long recent_scanned[2] = {0, 0};
1938
1939 for_each_online_node(nid)
1940 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1941 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
1942
1943 recent_rotated[0] +=
1944 mz->reclaim_stat.recent_rotated[0];
1945 recent_rotated[1] +=
1946 mz->reclaim_stat.recent_rotated[1];
1947 recent_scanned[0] +=
1948 mz->reclaim_stat.recent_scanned[0];
1949 recent_scanned[1] +=
1950 mz->reclaim_stat.recent_scanned[1];
1951 }
1952 cb->fill(cb, "recent_rotated_anon", recent_rotated[0]);
1953 cb->fill(cb, "recent_rotated_file", recent_rotated[1]);
1954 cb->fill(cb, "recent_scanned_anon", recent_scanned[0]);
1955 cb->fill(cb, "recent_scanned_file", recent_scanned[1]);
1956 }
1957#endif
1958
1959 return 0;
1960}
1961
1962static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft)
1963{
1964 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
1965
1966 return get_swappiness(memcg);
1967}
1968
1969static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1970 u64 val)
1971{
1972 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
1973 struct mem_cgroup *parent;
1974 if (val > 100)
1975 return -EINVAL;
1976
1977 if (cgrp->parent == NULL)
1978 return -EINVAL;
1979
1980 parent = mem_cgroup_from_cont(cgrp->parent);
1981 /* If under hierarchy, only empty-root can set this value */
1982 if ((parent->use_hierarchy) ||
1983 (memcg->use_hierarchy && !list_empty(&cgrp->children)))
1984 return -EINVAL;
1985
1986 spin_lock(&memcg->reclaim_param_lock);
1987 memcg->swappiness = val;
1988 spin_unlock(&memcg->reclaim_param_lock);
1989
988 return 0; 1990 return 0;
989} 1991}
990 1992
1993
991static struct cftype mem_cgroup_files[] = { 1994static struct cftype mem_cgroup_files[] = {
992 { 1995 {
993 .name = "usage_in_bytes", 1996 .name = "usage_in_bytes",
994 .private = RES_USAGE, 1997 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
995 .read_u64 = mem_cgroup_read, 1998 .read_u64 = mem_cgroup_read,
996 }, 1999 },
997 { 2000 {
998 .name = "max_usage_in_bytes", 2001 .name = "max_usage_in_bytes",
999 .private = RES_MAX_USAGE, 2002 .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
1000 .trigger = mem_cgroup_reset, 2003 .trigger = mem_cgroup_reset,
1001 .read_u64 = mem_cgroup_read, 2004 .read_u64 = mem_cgroup_read,
1002 }, 2005 },
1003 { 2006 {
1004 .name = "limit_in_bytes", 2007 .name = "limit_in_bytes",
1005 .private = RES_LIMIT, 2008 .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
1006 .write_string = mem_cgroup_write, 2009 .write_string = mem_cgroup_write,
1007 .read_u64 = mem_cgroup_read, 2010 .read_u64 = mem_cgroup_read,
1008 }, 2011 },
1009 { 2012 {
1010 .name = "failcnt", 2013 .name = "failcnt",
1011 .private = RES_FAILCNT, 2014 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
1012 .trigger = mem_cgroup_reset, 2015 .trigger = mem_cgroup_reset,
1013 .read_u64 = mem_cgroup_read, 2016 .read_u64 = mem_cgroup_read,
1014 }, 2017 },
1015 { 2018 {
2019 .name = "stat",
2020 .read_map = mem_control_stat_show,
2021 },
2022 {
1016 .name = "force_empty", 2023 .name = "force_empty",
1017 .trigger = mem_force_empty_write, 2024 .trigger = mem_cgroup_force_empty_write,
1018 }, 2025 },
1019 { 2026 {
1020 .name = "stat", 2027 .name = "use_hierarchy",
1021 .read_map = mem_control_stat_show, 2028 .write_u64 = mem_cgroup_hierarchy_write,
2029 .read_u64 = mem_cgroup_hierarchy_read,
2030 },
2031 {
2032 .name = "swappiness",
2033 .read_u64 = mem_cgroup_swappiness_read,
2034 .write_u64 = mem_cgroup_swappiness_write,
1022 }, 2035 },
1023}; 2036};
1024 2037
2038#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
2039static struct cftype memsw_cgroup_files[] = {
2040 {
2041 .name = "memsw.usage_in_bytes",
2042 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
2043 .read_u64 = mem_cgroup_read,
2044 },
2045 {
2046 .name = "memsw.max_usage_in_bytes",
2047 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
2048 .trigger = mem_cgroup_reset,
2049 .read_u64 = mem_cgroup_read,
2050 },
2051 {
2052 .name = "memsw.limit_in_bytes",
2053 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
2054 .write_string = mem_cgroup_write,
2055 .read_u64 = mem_cgroup_read,
2056 },
2057 {
2058 .name = "memsw.failcnt",
2059 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
2060 .trigger = mem_cgroup_reset,
2061 .read_u64 = mem_cgroup_read,
2062 },
2063};
2064
2065static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
2066{
2067 if (!do_swap_account)
2068 return 0;
2069 return cgroup_add_files(cont, ss, memsw_cgroup_files,
2070 ARRAY_SIZE(memsw_cgroup_files));
2071};
2072#else
2073static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
2074{
2075 return 0;
2076}
2077#endif
2078
1025static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) 2079static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1026{ 2080{
1027 struct mem_cgroup_per_node *pn; 2081 struct mem_cgroup_per_node *pn;
@@ -1047,7 +2101,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1047 2101
1048 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 2102 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
1049 mz = &pn->zoneinfo[zone]; 2103 mz = &pn->zoneinfo[zone];
1050 spin_lock_init(&mz->lru_lock);
1051 for_each_lru(l) 2104 for_each_lru(l)
1052 INIT_LIST_HEAD(&mz->lists[l]); 2105 INIT_LIST_HEAD(&mz->lists[l]);
1053 } 2106 }
@@ -1059,55 +2112,113 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1059 kfree(mem->info.nodeinfo[node]); 2112 kfree(mem->info.nodeinfo[node]);
1060} 2113}
1061 2114
2115static int mem_cgroup_size(void)
2116{
2117 int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu);
2118 return sizeof(struct mem_cgroup) + cpustat_size;
2119}
2120
1062static struct mem_cgroup *mem_cgroup_alloc(void) 2121static struct mem_cgroup *mem_cgroup_alloc(void)
1063{ 2122{
1064 struct mem_cgroup *mem; 2123 struct mem_cgroup *mem;
2124 int size = mem_cgroup_size();
1065 2125
1066 if (sizeof(*mem) < PAGE_SIZE) 2126 if (size < PAGE_SIZE)
1067 mem = kmalloc(sizeof(*mem), GFP_KERNEL); 2127 mem = kmalloc(size, GFP_KERNEL);
1068 else 2128 else
1069 mem = vmalloc(sizeof(*mem)); 2129 mem = vmalloc(size);
1070 2130
1071 if (mem) 2131 if (mem)
1072 memset(mem, 0, sizeof(*mem)); 2132 memset(mem, 0, size);
1073 return mem; 2133 return mem;
1074} 2134}
1075 2135
1076static void mem_cgroup_free(struct mem_cgroup *mem) 2136/*
2137 * At destroying mem_cgroup, references from swap_cgroup can remain.
2138 * (scanning all at force_empty is too costly...)
2139 *
2140 * Instead of clearing all references at force_empty, we remember
2141 * the number of reference from swap_cgroup and free mem_cgroup when
2142 * it goes down to 0.
2143 *
2144 * Removal of cgroup itself succeeds regardless of refs from swap.
2145 */
2146
2147static void __mem_cgroup_free(struct mem_cgroup *mem)
1077{ 2148{
1078 if (sizeof(*mem) < PAGE_SIZE) 2149 int node;
2150
2151 for_each_node_state(node, N_POSSIBLE)
2152 free_mem_cgroup_per_zone_info(mem, node);
2153
2154 if (mem_cgroup_size() < PAGE_SIZE)
1079 kfree(mem); 2155 kfree(mem);
1080 else 2156 else
1081 vfree(mem); 2157 vfree(mem);
1082} 2158}
1083 2159
2160static void mem_cgroup_get(struct mem_cgroup *mem)
2161{
2162 atomic_inc(&mem->refcnt);
2163}
2164
2165static void mem_cgroup_put(struct mem_cgroup *mem)
2166{
2167 if (atomic_dec_and_test(&mem->refcnt))
2168 __mem_cgroup_free(mem);
2169}
2170
2171
2172#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
2173static void __init enable_swap_cgroup(void)
2174{
2175 if (!mem_cgroup_disabled() && really_do_swap_account)
2176 do_swap_account = 1;
2177}
2178#else
2179static void __init enable_swap_cgroup(void)
2180{
2181}
2182#endif
1084 2183
1085static struct cgroup_subsys_state * 2184static struct cgroup_subsys_state *
1086mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 2185mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1087{ 2186{
1088 struct mem_cgroup *mem; 2187 struct mem_cgroup *mem, *parent;
1089 int node; 2188 int node;
1090 2189
1091 if (unlikely((cont->parent) == NULL)) { 2190 mem = mem_cgroup_alloc();
1092 mem = &init_mem_cgroup; 2191 if (!mem)
1093 } else { 2192 return ERR_PTR(-ENOMEM);
1094 mem = mem_cgroup_alloc();
1095 if (!mem)
1096 return ERR_PTR(-ENOMEM);
1097 }
1098
1099 res_counter_init(&mem->res);
1100 2193
1101 for_each_node_state(node, N_POSSIBLE) 2194 for_each_node_state(node, N_POSSIBLE)
1102 if (alloc_mem_cgroup_per_zone_info(mem, node)) 2195 if (alloc_mem_cgroup_per_zone_info(mem, node))
1103 goto free_out; 2196 goto free_out;
2197 /* root ? */
2198 if (cont->parent == NULL) {
2199 enable_swap_cgroup();
2200 parent = NULL;
2201 } else {
2202 parent = mem_cgroup_from_cont(cont->parent);
2203 mem->use_hierarchy = parent->use_hierarchy;
2204 }
1104 2205
2206 if (parent && parent->use_hierarchy) {
2207 res_counter_init(&mem->res, &parent->res);
2208 res_counter_init(&mem->memsw, &parent->memsw);
2209 } else {
2210 res_counter_init(&mem->res, NULL);
2211 res_counter_init(&mem->memsw, NULL);
2212 }
2213 mem->last_scanned_child = NULL;
2214 spin_lock_init(&mem->reclaim_param_lock);
2215
2216 if (parent)
2217 mem->swappiness = get_swappiness(parent);
2218 atomic_set(&mem->refcnt, 1);
1105 return &mem->css; 2219 return &mem->css;
1106free_out: 2220free_out:
1107 for_each_node_state(node, N_POSSIBLE) 2221 __mem_cgroup_free(mem);
1108 free_mem_cgroup_per_zone_info(mem, node);
1109 if (cont->parent != NULL)
1110 mem_cgroup_free(mem);
1111 return ERR_PTR(-ENOMEM); 2222 return ERR_PTR(-ENOMEM);
1112} 2223}
1113 2224
@@ -1115,26 +2226,26 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
1115 struct cgroup *cont) 2226 struct cgroup *cont)
1116{ 2227{
1117 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 2228 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
1118 mem_cgroup_force_empty(mem); 2229 mem_cgroup_force_empty(mem, false);
1119} 2230}
1120 2231
1121static void mem_cgroup_destroy(struct cgroup_subsys *ss, 2232static void mem_cgroup_destroy(struct cgroup_subsys *ss,
1122 struct cgroup *cont) 2233 struct cgroup *cont)
1123{ 2234{
1124 int node; 2235 mem_cgroup_put(mem_cgroup_from_cont(cont));
1125 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
1126
1127 for_each_node_state(node, N_POSSIBLE)
1128 free_mem_cgroup_per_zone_info(mem, node);
1129
1130 mem_cgroup_free(mem_cgroup_from_cont(cont));
1131} 2236}
1132 2237
1133static int mem_cgroup_populate(struct cgroup_subsys *ss, 2238static int mem_cgroup_populate(struct cgroup_subsys *ss,
1134 struct cgroup *cont) 2239 struct cgroup *cont)
1135{ 2240{
1136 return cgroup_add_files(cont, ss, mem_cgroup_files, 2241 int ret;
1137 ARRAY_SIZE(mem_cgroup_files)); 2242
2243 ret = cgroup_add_files(cont, ss, mem_cgroup_files,
2244 ARRAY_SIZE(mem_cgroup_files));
2245
2246 if (!ret)
2247 ret = register_memsw_files(cont, ss);
2248 return ret;
1138} 2249}
1139 2250
1140static void mem_cgroup_move_task(struct cgroup_subsys *ss, 2251static void mem_cgroup_move_task(struct cgroup_subsys *ss,
@@ -1142,25 +2253,12 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
1142 struct cgroup *old_cont, 2253 struct cgroup *old_cont,
1143 struct task_struct *p) 2254 struct task_struct *p)
1144{ 2255{
1145 struct mm_struct *mm; 2256 mutex_lock(&memcg_tasklist);
1146 struct mem_cgroup *mem, *old_mem;
1147
1148 mm = get_task_mm(p);
1149 if (mm == NULL)
1150 return;
1151
1152 mem = mem_cgroup_from_cont(cont);
1153 old_mem = mem_cgroup_from_cont(old_cont);
1154
1155 /* 2257 /*
1156 * Only thread group leaders are allowed to migrate, the mm_struct is 2258 * FIXME: It's better to move charges of this process from old
1157 * in effect owned by the leader 2259 * memcg to new memcg. But it's just on TODO-List now.
1158 */ 2260 */
1159 if (!thread_group_leader(p)) 2261 mutex_unlock(&memcg_tasklist);
1160 goto out;
1161
1162out:
1163 mmput(mm);
1164} 2262}
1165 2263
1166struct cgroup_subsys mem_cgroup_subsys = { 2264struct cgroup_subsys mem_cgroup_subsys = {
@@ -1173,3 +2271,13 @@ struct cgroup_subsys mem_cgroup_subsys = {
1173 .attach = mem_cgroup_move_task, 2271 .attach = mem_cgroup_move_task,
1174 .early_init = 0, 2272 .early_init = 0,
1175}; 2273};
2274
2275#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
2276
2277static int __init disable_swap_account(char *s)
2278{
2279 really_do_swap_account = 0;
2280 return 1;
2281}
2282__setup("noswapaccount", disable_swap_account);
2283#endif
diff --git a/mm/memory.c b/mm/memory.c
index 3f8fa06b963b..e009ce870859 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2000,7 +2000,7 @@ gotten:
2000 cow_user_page(new_page, old_page, address, vma); 2000 cow_user_page(new_page, old_page, address, vma);
2001 __SetPageUptodate(new_page); 2001 __SetPageUptodate(new_page);
2002 2002
2003 if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) 2003 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2004 goto oom_free_new; 2004 goto oom_free_new;
2005 2005
2006 /* 2006 /*
@@ -2392,6 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2392 struct page *page; 2392 struct page *page;
2393 swp_entry_t entry; 2393 swp_entry_t entry;
2394 pte_t pte; 2394 pte_t pte;
2395 struct mem_cgroup *ptr = NULL;
2395 int ret = 0; 2396 int ret = 0;
2396 2397
2397 if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) 2398 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
@@ -2430,7 +2431,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2430 lock_page(page); 2431 lock_page(page);
2431 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2432 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2432 2433
2433 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { 2434 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
2434 ret = VM_FAULT_OOM; 2435 ret = VM_FAULT_OOM;
2435 unlock_page(page); 2436 unlock_page(page);
2436 goto out; 2437 goto out;
@@ -2448,7 +2449,19 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2448 goto out_nomap; 2449 goto out_nomap;
2449 } 2450 }
2450 2451
2451 /* The page isn't present yet, go ahead with the fault. */ 2452 /*
2453 * The page isn't present yet, go ahead with the fault.
2454 *
2455 * Be careful about the sequence of operations here.
2456 * To get its accounting right, reuse_swap_page() must be called
2457 * while the page is counted on swap but not yet in mapcount i.e.
2458 * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
2459 * must be called after the swap_free(), or it will never succeed.
2460 * Because delete_from_swap_page() may be called by reuse_swap_page(),
2461 * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
2462 * in page->private. In this case, a record in swap_cgroup is silently
2463 * discarded at swap_free().
2464 */
2452 2465
2453 inc_mm_counter(mm, anon_rss); 2466 inc_mm_counter(mm, anon_rss);
2454 pte = mk_pte(page, vma->vm_page_prot); 2467 pte = mk_pte(page, vma->vm_page_prot);
@@ -2456,10 +2469,11 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2456 pte = maybe_mkwrite(pte_mkdirty(pte), vma); 2469 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
2457 write_access = 0; 2470 write_access = 0;
2458 } 2471 }
2459
2460 flush_icache_page(vma, page); 2472 flush_icache_page(vma, page);
2461 set_pte_at(mm, address, page_table, pte); 2473 set_pte_at(mm, address, page_table, pte);
2462 page_add_anon_rmap(page, vma, address); 2474 page_add_anon_rmap(page, vma, address);
2475 /* It's better to call commit-charge after rmap is established */
2476 mem_cgroup_commit_charge_swapin(page, ptr);
2463 2477
2464 swap_free(entry); 2478 swap_free(entry);
2465 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) 2479 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2480,7 +2494,7 @@ unlock:
2480out: 2494out:
2481 return ret; 2495 return ret;
2482out_nomap: 2496out_nomap:
2483 mem_cgroup_uncharge_page(page); 2497 mem_cgroup_cancel_charge_swapin(ptr);
2484 pte_unmap_unlock(page_table, ptl); 2498 pte_unmap_unlock(page_table, ptl);
2485 unlock_page(page); 2499 unlock_page(page);
2486 page_cache_release(page); 2500 page_cache_release(page);
@@ -2510,7 +2524,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2510 goto oom; 2524 goto oom;
2511 __SetPageUptodate(page); 2525 __SetPageUptodate(page);
2512 2526
2513 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) 2527 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))
2514 goto oom_free_page; 2528 goto oom_free_page;
2515 2529
2516 entry = mk_pte(page, vma->vm_page_prot); 2530 entry = mk_pte(page, vma->vm_page_prot);
@@ -2601,7 +2615,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2601 ret = VM_FAULT_OOM; 2615 ret = VM_FAULT_OOM;
2602 goto out; 2616 goto out;
2603 } 2617 }
2604 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { 2618 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) {
2605 ret = VM_FAULT_OOM; 2619 ret = VM_FAULT_OOM;
2606 page_cache_release(page); 2620 page_cache_release(page);
2607 goto out; 2621 goto out;
diff --git a/mm/migrate.c b/mm/migrate.c
index 55373983c9c6..a30ea5fcf9f1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
121 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) 121 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
122 goto out; 122 goto out;
123 123
124 /*
125 * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
126 * Failure is not an option here: we're now expected to remove every
127 * migration pte, and will cause crashes otherwise. Normally this
128 * is not an issue: mem_cgroup_prepare_migration bumped up the old
129 * page_cgroup count for safety, that's now attached to the new page,
130 * so this charge should just be another incrementation of the count,
131 * to keep in balance with rmap.c's mem_cgroup_uncharging. But if
132 * there's been a force_empty, those reference counts may no longer
133 * be reliable, and this charge can actually fail: oh well, we don't
134 * make the situation any worse by proceeding as if it had succeeded.
135 */
136 mem_cgroup_charge(new, mm, GFP_ATOMIC);
137
138 get_page(new); 124 get_page(new);
139 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 125 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
140 if (is_write_migration_entry(entry)) 126 if (is_write_migration_entry(entry))
@@ -378,9 +364,6 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
378 anon = PageAnon(page); 364 anon = PageAnon(page);
379 page->mapping = NULL; 365 page->mapping = NULL;
380 366
381 if (!anon) /* This page was removed from radix-tree. */
382 mem_cgroup_uncharge_cache_page(page);
383
384 /* 367 /*
385 * If any waiters have accumulated on the new page then 368 * If any waiters have accumulated on the new page then
386 * wake them up. 369 * wake them up.
@@ -614,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
614 struct page *newpage = get_new_page(page, private, &result); 597 struct page *newpage = get_new_page(page, private, &result);
615 int rcu_locked = 0; 598 int rcu_locked = 0;
616 int charge = 0; 599 int charge = 0;
600 struct mem_cgroup *mem;
617 601
618 if (!newpage) 602 if (!newpage)
619 return -ENOMEM; 603 return -ENOMEM;
@@ -623,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
623 goto move_newpage; 607 goto move_newpage;
624 } 608 }
625 609
626 charge = mem_cgroup_prepare_migration(page, newpage);
627 if (charge == -ENOMEM) {
628 rc = -ENOMEM;
629 goto move_newpage;
630 }
631 /* prepare cgroup just returns 0 or -ENOMEM */ 610 /* prepare cgroup just returns 0 or -ENOMEM */
632 BUG_ON(charge);
633
634 rc = -EAGAIN; 611 rc = -EAGAIN;
612
635 if (!trylock_page(page)) { 613 if (!trylock_page(page)) {
636 if (!force) 614 if (!force)
637 goto move_newpage; 615 goto move_newpage;
638 lock_page(page); 616 lock_page(page);
639 } 617 }
640 618
619 /* charge against new page */
620 charge = mem_cgroup_prepare_migration(page, &mem);
621 if (charge == -ENOMEM) {
622 rc = -ENOMEM;
623 goto unlock;
624 }
625 BUG_ON(charge);
626
641 if (PageWriteback(page)) { 627 if (PageWriteback(page)) {
642 if (!force) 628 if (!force)
643 goto unlock; 629 goto uncharge;
644 wait_on_page_writeback(page); 630 wait_on_page_writeback(page);
645 } 631 }
646 /* 632 /*
@@ -693,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
693rcu_unlock: 679rcu_unlock:
694 if (rcu_locked) 680 if (rcu_locked)
695 rcu_read_unlock(); 681 rcu_read_unlock();
696 682uncharge:
683 if (!charge)
684 mem_cgroup_end_migration(mem, page, newpage);
697unlock: 685unlock:
698 unlock_page(page); 686 unlock_page(page);
699 687
@@ -709,8 +697,6 @@ unlock:
709 } 697 }
710 698
711move_newpage: 699move_newpage:
712 if (!charge)
713 mem_cgroup_end_migration(newpage);
714 700
715 /* 701 /*
716 * Move the new page to the LRU. If migration was not successful 702 * Move the new page to the LRU. If migration was not successful
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6b9e758c98a5..40ba05061a4f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -429,7 +429,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
429 unsigned long points = 0; 429 unsigned long points = 0;
430 struct task_struct *p; 430 struct task_struct *p;
431 431
432 cgroup_lock();
433 read_lock(&tasklist_lock); 432 read_lock(&tasklist_lock);
434retry: 433retry:
435 p = select_bad_process(&points, mem); 434 p = select_bad_process(&points, mem);
@@ -444,7 +443,6 @@ retry:
444 goto retry; 443 goto retry;
445out: 444out:
446 read_unlock(&tasklist_lock); 445 read_unlock(&tasklist_lock);
447 cgroup_unlock();
448} 446}
449#endif 447#endif
450 448
@@ -560,6 +558,13 @@ void pagefault_out_of_memory(void)
560 /* Got some memory back in the last second. */ 558 /* Got some memory back in the last second. */
561 return; 559 return;
562 560
561 /*
562 * If this is from memcg, oom-killer is already invoked.
563 * and not worth to go system-wide-oom.
564 */
565 if (mem_cgroup_oom_called(current))
566 goto rest_and_return;
567
563 if (sysctl_panic_on_oom) 568 if (sysctl_panic_on_oom)
564 panic("out of memory from page fault. panic_on_oom is selected.\n"); 569 panic("out of memory from page fault. panic_on_oom is selected.\n");
565 570
@@ -571,6 +576,7 @@ void pagefault_out_of_memory(void)
571 * Give "p" a good chance of killing itself before we 576 * Give "p" a good chance of killing itself before we
572 * retry to allocate memory. 577 * retry to allocate memory.
573 */ 578 */
579rest_and_return:
574 if (!test_thread_flag(TIF_MEMDIE)) 580 if (!test_thread_flag(TIF_MEMDIE))
575 schedule_timeout_uninterruptible(1); 581 schedule_timeout_uninterruptible(1);
576} 582}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7bf22e045318..5675b3073854 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3523,10 +3523,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3523 INIT_LIST_HEAD(&zone->lru[l].list); 3523 INIT_LIST_HEAD(&zone->lru[l].list);
3524 zone->lru[l].nr_scan = 0; 3524 zone->lru[l].nr_scan = 0;
3525 } 3525 }
3526 zone->recent_rotated[0] = 0; 3526 zone->reclaim_stat.recent_rotated[0] = 0;
3527 zone->recent_rotated[1] = 0; 3527 zone->reclaim_stat.recent_rotated[1] = 0;
3528 zone->recent_scanned[0] = 0; 3528 zone->reclaim_stat.recent_scanned[0] = 0;
3529 zone->recent_scanned[1] = 0; 3529 zone->reclaim_stat.recent_scanned[1] = 0;
3530 zap_zone_vm_stats(zone); 3530 zap_zone_vm_stats(zone);
3531 zone->flags = 0; 3531 zone->flags = 0;
3532 if (!size) 3532 if (!size)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index d6507a660ed6..7006a11350c8 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -8,6 +8,7 @@
8#include <linux/memory.h> 8#include <linux/memory.h>
9#include <linux/vmalloc.h> 9#include <linux/vmalloc.h>
10#include <linux/cgroup.h> 10#include <linux/cgroup.h>
11#include <linux/swapops.h>
11 12
12static void __meminit 13static void __meminit
13__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) 14__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -15,6 +16,7 @@ __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
15 pc->flags = 0; 16 pc->flags = 0;
16 pc->mem_cgroup = NULL; 17 pc->mem_cgroup = NULL;
17 pc->page = pfn_to_page(pfn); 18 pc->page = pfn_to_page(pfn);
19 INIT_LIST_HEAD(&pc->lru);
18} 20}
19static unsigned long total_usage; 21static unsigned long total_usage;
20 22
@@ -72,7 +74,7 @@ void __init page_cgroup_init(void)
72 74
73 int nid, fail; 75 int nid, fail;
74 76
75 if (mem_cgroup_subsys.disabled) 77 if (mem_cgroup_disabled())
76 return; 78 return;
77 79
78 for_each_online_node(nid) { 80 for_each_online_node(nid) {
@@ -103,13 +105,11 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
103/* __alloc_bootmem...() is protected by !slab_available() */ 105/* __alloc_bootmem...() is protected by !slab_available() */
104static int __init_refok init_section_page_cgroup(unsigned long pfn) 106static int __init_refok init_section_page_cgroup(unsigned long pfn)
105{ 107{
106 struct mem_section *section; 108 struct mem_section *section = __pfn_to_section(pfn);
107 struct page_cgroup *base, *pc; 109 struct page_cgroup *base, *pc;
108 unsigned long table_size; 110 unsigned long table_size;
109 int nid, index; 111 int nid, index;
110 112
111 section = __pfn_to_section(pfn);
112
113 if (!section->page_cgroup) { 113 if (!section->page_cgroup) {
114 nid = page_to_nid(pfn_to_page(pfn)); 114 nid = page_to_nid(pfn_to_page(pfn));
115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
@@ -145,7 +145,6 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
145 __init_page_cgroup(pc, pfn + index); 145 __init_page_cgroup(pc, pfn + index);
146 } 146 }
147 147
148 section = __pfn_to_section(pfn);
149 section->page_cgroup = base - pfn; 148 section->page_cgroup = base - pfn;
150 total_usage += table_size; 149 total_usage += table_size;
151 return 0; 150 return 0;
@@ -248,7 +247,7 @@ void __init page_cgroup_init(void)
248 unsigned long pfn; 247 unsigned long pfn;
249 int fail = 0; 248 int fail = 0;
250 249
251 if (mem_cgroup_subsys.disabled) 250 if (mem_cgroup_disabled())
252 return; 251 return;
253 252
254 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { 253 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
@@ -273,3 +272,199 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
273} 272}
274 273
275#endif 274#endif
275
276
277#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
278
279static DEFINE_MUTEX(swap_cgroup_mutex);
280struct swap_cgroup_ctrl {
281 struct page **map;
282 unsigned long length;
283};
284
285struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
286
287/*
288 * This 8bytes seems big..maybe we can reduce this when we can use "id" for
289 * cgroup rather than pointer.
290 */
291struct swap_cgroup {
292 struct mem_cgroup *val;
293};
294#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
295#define SC_POS_MASK (SC_PER_PAGE - 1)
296
297/*
298 * SwapCgroup implements "lookup" and "exchange" operations.
299 * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
300 * against SwapCache. At swap_free(), this is accessed directly from swap.
301 *
302 * This means,
303 * - we have no race in "exchange" when we're accessed via SwapCache because
304 * SwapCache(and its swp_entry) is under lock.
305 * - When called via swap_free(), there is no user of this entry and no race.
306 * Then, we don't need lock around "exchange".
307 *
308 * TODO: we can push these buffers out to HIGHMEM.
309 */
310
311/*
312 * allocate buffer for swap_cgroup.
313 */
314static int swap_cgroup_prepare(int type)
315{
316 struct page *page;
317 struct swap_cgroup_ctrl *ctrl;
318 unsigned long idx, max;
319
320 if (!do_swap_account)
321 return 0;
322 ctrl = &swap_cgroup_ctrl[type];
323
324 for (idx = 0; idx < ctrl->length; idx++) {
325 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
326 if (!page)
327 goto not_enough_page;
328 ctrl->map[idx] = page;
329 }
330 return 0;
331not_enough_page:
332 max = idx;
333 for (idx = 0; idx < max; idx++)
334 __free_page(ctrl->map[idx]);
335
336 return -ENOMEM;
337}
338
339/**
340 * swap_cgroup_record - record mem_cgroup for this swp_entry.
341 * @ent: swap entry to be recorded into
342 * @mem: mem_cgroup to be recorded
343 *
344 * Returns old value at success, NULL at failure.
345 * (Of course, old value can be NULL.)
346 */
347struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
348{
349 int type = swp_type(ent);
350 unsigned long offset = swp_offset(ent);
351 unsigned long idx = offset / SC_PER_PAGE;
352 unsigned long pos = offset & SC_POS_MASK;
353 struct swap_cgroup_ctrl *ctrl;
354 struct page *mappage;
355 struct swap_cgroup *sc;
356 struct mem_cgroup *old;
357
358 if (!do_swap_account)
359 return NULL;
360
361 ctrl = &swap_cgroup_ctrl[type];
362
363 mappage = ctrl->map[idx];
364 sc = page_address(mappage);
365 sc += pos;
366 old = sc->val;
367 sc->val = mem;
368
369 return old;
370}
371
372/**
373 * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
374 * @ent: swap entry to be looked up.
375 *
376 * Returns pointer to mem_cgroup at success. NULL at failure.
377 */
378struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
379{
380 int type = swp_type(ent);
381 unsigned long offset = swp_offset(ent);
382 unsigned long idx = offset / SC_PER_PAGE;
383 unsigned long pos = offset & SC_POS_MASK;
384 struct swap_cgroup_ctrl *ctrl;
385 struct page *mappage;
386 struct swap_cgroup *sc;
387 struct mem_cgroup *ret;
388
389 if (!do_swap_account)
390 return NULL;
391
392 ctrl = &swap_cgroup_ctrl[type];
393 mappage = ctrl->map[idx];
394 sc = page_address(mappage);
395 sc += pos;
396 ret = sc->val;
397 return ret;
398}
399
400int swap_cgroup_swapon(int type, unsigned long max_pages)
401{
402 void *array;
403 unsigned long array_size;
404 unsigned long length;
405 struct swap_cgroup_ctrl *ctrl;
406
407 if (!do_swap_account)
408 return 0;
409
410 length = ((max_pages/SC_PER_PAGE) + 1);
411 array_size = length * sizeof(void *);
412
413 array = vmalloc(array_size);
414 if (!array)
415 goto nomem;
416
417 memset(array, 0, array_size);
418 ctrl = &swap_cgroup_ctrl[type];
419 mutex_lock(&swap_cgroup_mutex);
420 ctrl->length = length;
421 ctrl->map = array;
422 if (swap_cgroup_prepare(type)) {
423 /* memory shortage */
424 ctrl->map = NULL;
425 ctrl->length = 0;
426 vfree(array);
427 mutex_unlock(&swap_cgroup_mutex);
428 goto nomem;
429 }
430 mutex_unlock(&swap_cgroup_mutex);
431
432 printk(KERN_INFO
433 "swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
434 " and %ld bytes to hold mem_cgroup pointers on swap\n",
435 array_size, length * PAGE_SIZE);
436 printk(KERN_INFO
437 "swap_cgroup can be disabled by noswapaccount boot option.\n");
438
439 return 0;
440nomem:
441 printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
442 printk(KERN_INFO
443 "swap_cgroup can be disabled by noswapaccount boot option\n");
444 return -ENOMEM;
445}
446
447void swap_cgroup_swapoff(int type)
448{
449 int i;
450 struct swap_cgroup_ctrl *ctrl;
451
452 if (!do_swap_account)
453 return;
454
455 mutex_lock(&swap_cgroup_mutex);
456 ctrl = &swap_cgroup_ctrl[type];
457 if (ctrl->map) {
458 for (i = 0; i < ctrl->length; i++) {
459 struct page *page = ctrl->map[i];
460 if (page)
461 __free_page(page);
462 }
463 vfree(ctrl->map);
464 ctrl->map = NULL;
465 ctrl->length = 0;
466 }
467 mutex_unlock(&swap_cgroup_mutex);
468}
469
470#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index 5941f9801363..5d0de96c9789 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -928,7 +928,11 @@ found:
928 error = 1; 928 error = 1;
929 if (!inode) 929 if (!inode)
930 goto out; 930 goto out;
931 /* Precharge page using GFP_KERNEL while we can wait */ 931 /*
932 * Charge page using GFP_KERNEL while we can wait.
933 * Charged back to the user(not to caller) when swap account is used.
934 * add_to_page_cache() will be called with GFP_NOWAIT.
935 */
932 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 936 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
933 if (error) 937 if (error)
934 goto out; 938 goto out;
@@ -1320,15 +1324,19 @@ repeat:
1320 } else { 1324 } else {
1321 shmem_swp_unmap(entry); 1325 shmem_swp_unmap(entry);
1322 spin_unlock(&info->lock); 1326 spin_unlock(&info->lock);
1323 unlock_page(swappage);
1324 page_cache_release(swappage);
1325 if (error == -ENOMEM) { 1327 if (error == -ENOMEM) {
1326 /* allow reclaim from this memory cgroup */ 1328 /* allow reclaim from this memory cgroup */
1327 error = mem_cgroup_shrink_usage(current->mm, 1329 error = mem_cgroup_shrink_usage(swappage,
1330 current->mm,
1328 gfp); 1331 gfp);
1329 if (error) 1332 if (error) {
1333 unlock_page(swappage);
1334 page_cache_release(swappage);
1330 goto failed; 1335 goto failed;
1336 }
1331 } 1337 }
1338 unlock_page(swappage);
1339 page_cache_release(swappage);
1332 goto repeat; 1340 goto repeat;
1333 } 1341 }
1334 } else if (sgp == SGP_READ && !filepage) { 1342 } else if (sgp == SGP_READ && !filepage) {
@@ -1379,7 +1387,7 @@ repeat:
1379 1387
1380 /* Precharge page while we can wait, compensate after */ 1388 /* Precharge page while we can wait, compensate after */
1381 error = mem_cgroup_cache_charge(filepage, current->mm, 1389 error = mem_cgroup_cache_charge(filepage, current->mm,
1382 gfp & ~__GFP_HIGHMEM); 1390 GFP_KERNEL);
1383 if (error) { 1391 if (error) {
1384 page_cache_release(filepage); 1392 page_cache_release(filepage);
1385 shmem_unacct_blocks(info->flags, 1); 1393 shmem_unacct_blocks(info->flags, 1);
diff --git a/mm/swap.c b/mm/swap.c
index ba2c0e8b8b54..8adb9feb61e1 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -151,6 +151,26 @@ void rotate_reclaimable_page(struct page *page)
151 } 151 }
152} 152}
153 153
154static void update_page_reclaim_stat(struct zone *zone, struct page *page,
155 int file, int rotated)
156{
157 struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
158 struct zone_reclaim_stat *memcg_reclaim_stat;
159
160 memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
161
162 reclaim_stat->recent_scanned[file]++;
163 if (rotated)
164 reclaim_stat->recent_rotated[file]++;
165
166 if (!memcg_reclaim_stat)
167 return;
168
169 memcg_reclaim_stat->recent_scanned[file]++;
170 if (rotated)
171 memcg_reclaim_stat->recent_rotated[file]++;
172}
173
154/* 174/*
155 * FIXME: speed this up? 175 * FIXME: speed this up?
156 */ 176 */
@@ -168,10 +188,8 @@ void activate_page(struct page *page)
168 lru += LRU_ACTIVE; 188 lru += LRU_ACTIVE;
169 add_page_to_lru_list(zone, page, lru); 189 add_page_to_lru_list(zone, page, lru);
170 __count_vm_event(PGACTIVATE); 190 __count_vm_event(PGACTIVATE);
171 mem_cgroup_move_lists(page, lru);
172 191
173 zone->recent_rotated[!!file]++; 192 update_page_reclaim_stat(zone, page, !!file, 1);
174 zone->recent_scanned[!!file]++;
175 } 193 }
176 spin_unlock_irq(&zone->lru_lock); 194 spin_unlock_irq(&zone->lru_lock);
177} 195}
@@ -386,12 +404,14 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
386{ 404{
387 int i; 405 int i;
388 struct zone *zone = NULL; 406 struct zone *zone = NULL;
407
389 VM_BUG_ON(is_unevictable_lru(lru)); 408 VM_BUG_ON(is_unevictable_lru(lru));
390 409
391 for (i = 0; i < pagevec_count(pvec); i++) { 410 for (i = 0; i < pagevec_count(pvec); i++) {
392 struct page *page = pvec->pages[i]; 411 struct page *page = pvec->pages[i];
393 struct zone *pagezone = page_zone(page); 412 struct zone *pagezone = page_zone(page);
394 int file; 413 int file;
414 int active;
395 415
396 if (pagezone != zone) { 416 if (pagezone != zone) {
397 if (zone) 417 if (zone)
@@ -403,12 +423,11 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
403 VM_BUG_ON(PageUnevictable(page)); 423 VM_BUG_ON(PageUnevictable(page));
404 VM_BUG_ON(PageLRU(page)); 424 VM_BUG_ON(PageLRU(page));
405 SetPageLRU(page); 425 SetPageLRU(page);
426 active = is_active_lru(lru);
406 file = is_file_lru(lru); 427 file = is_file_lru(lru);
407 zone->recent_scanned[file]++; 428 if (active)
408 if (is_active_lru(lru)) {
409 SetPageActive(page); 429 SetPageActive(page);
410 zone->recent_rotated[file]++; 430 update_page_reclaim_stat(zone, page, file, active);
411 }
412 add_page_to_lru_list(zone, page, lru); 431 add_page_to_lru_list(zone, page, lru);
413 } 432 }
414 if (zone) 433 if (zone)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 81c825f67a7f..3ecea98ecb45 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,6 +17,7 @@
17#include <linux/backing-dev.h> 17#include <linux/backing-dev.h>
18#include <linux/pagevec.h> 18#include <linux/pagevec.h>
19#include <linux/migrate.h> 19#include <linux/migrate.h>
20#include <linux/page_cgroup.h>
20 21
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
22 23
@@ -108,6 +109,8 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
108 */ 109 */
109void __delete_from_swap_cache(struct page *page) 110void __delete_from_swap_cache(struct page *page)
110{ 111{
112 swp_entry_t ent = {.val = page_private(page)};
113
111 VM_BUG_ON(!PageLocked(page)); 114 VM_BUG_ON(!PageLocked(page));
112 VM_BUG_ON(!PageSwapCache(page)); 115 VM_BUG_ON(!PageSwapCache(page));
113 VM_BUG_ON(PageWriteback(page)); 116 VM_BUG_ON(PageWriteback(page));
@@ -118,6 +121,7 @@ void __delete_from_swap_cache(struct page *page)
118 total_swapcache_pages--; 121 total_swapcache_pages--;
119 __dec_zone_page_state(page, NR_FILE_PAGES); 122 __dec_zone_page_state(page, NR_FILE_PAGES);
120 INC_CACHE_INFO(del_total); 123 INC_CACHE_INFO(del_total);
124 mem_cgroup_uncharge_swapcache(page, ent);
121} 125}
122 126
123/** 127/**
diff --git a/mm/swapfile.c b/mm/swapfile.c
index eec5ca758a23..da422c47e2ee 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -33,6 +33,7 @@
33#include <asm/pgtable.h> 33#include <asm/pgtable.h>
34#include <asm/tlbflush.h> 34#include <asm/tlbflush.h>
35#include <linux/swapops.h> 35#include <linux/swapops.h>
36#include <linux/page_cgroup.h>
36 37
37static DEFINE_SPINLOCK(swap_lock); 38static DEFINE_SPINLOCK(swap_lock);
38static unsigned int nr_swapfiles; 39static unsigned int nr_swapfiles;
@@ -470,8 +471,9 @@ out:
470 return NULL; 471 return NULL;
471} 472}
472 473
473static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) 474static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent)
474{ 475{
476 unsigned long offset = swp_offset(ent);
475 int count = p->swap_map[offset]; 477 int count = p->swap_map[offset];
476 478
477 if (count < SWAP_MAP_MAX) { 479 if (count < SWAP_MAP_MAX) {
@@ -486,6 +488,7 @@ static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
486 swap_list.next = p - swap_info; 488 swap_list.next = p - swap_info;
487 nr_swap_pages++; 489 nr_swap_pages++;
488 p->inuse_pages--; 490 p->inuse_pages--;
491 mem_cgroup_uncharge_swap(ent);
489 } 492 }
490 } 493 }
491 return count; 494 return count;
@@ -501,7 +504,7 @@ void swap_free(swp_entry_t entry)
501 504
502 p = swap_info_get(entry); 505 p = swap_info_get(entry);
503 if (p) { 506 if (p) {
504 swap_entry_free(p, swp_offset(entry)); 507 swap_entry_free(p, entry);
505 spin_unlock(&swap_lock); 508 spin_unlock(&swap_lock);
506 } 509 }
507} 510}
@@ -581,7 +584,7 @@ int free_swap_and_cache(swp_entry_t entry)
581 584
582 p = swap_info_get(entry); 585 p = swap_info_get(entry);
583 if (p) { 586 if (p) {
584 if (swap_entry_free(p, swp_offset(entry)) == 1) { 587 if (swap_entry_free(p, entry) == 1) {
585 page = find_get_page(&swapper_space, entry.val); 588 page = find_get_page(&swapper_space, entry.val);
586 if (page && !trylock_page(page)) { 589 if (page && !trylock_page(page)) {
587 page_cache_release(page); 590 page_cache_release(page);
@@ -690,17 +693,18 @@ unsigned int count_swap_pages(int type, int free)
690static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, 693static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
691 unsigned long addr, swp_entry_t entry, struct page *page) 694 unsigned long addr, swp_entry_t entry, struct page *page)
692{ 695{
696 struct mem_cgroup *ptr = NULL;
693 spinlock_t *ptl; 697 spinlock_t *ptl;
694 pte_t *pte; 698 pte_t *pte;
695 int ret = 1; 699 int ret = 1;
696 700
697 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) 701 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr))
698 ret = -ENOMEM; 702 ret = -ENOMEM;
699 703
700 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 704 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
701 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { 705 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
702 if (ret > 0) 706 if (ret > 0)
703 mem_cgroup_uncharge_page(page); 707 mem_cgroup_cancel_charge_swapin(ptr);
704 ret = 0; 708 ret = 0;
705 goto out; 709 goto out;
706 } 710 }
@@ -710,6 +714,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
710 set_pte_at(vma->vm_mm, addr, pte, 714 set_pte_at(vma->vm_mm, addr, pte,
711 pte_mkold(mk_pte(page, vma->vm_page_prot))); 715 pte_mkold(mk_pte(page, vma->vm_page_prot)));
712 page_add_anon_rmap(page, vma, addr); 716 page_add_anon_rmap(page, vma, addr);
717 mem_cgroup_commit_charge_swapin(page, ptr);
713 swap_free(entry); 718 swap_free(entry);
714 /* 719 /*
715 * Move the page to the active list so it is not 720 * Move the page to the active list so it is not
@@ -1492,6 +1497,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
1492 spin_unlock(&swap_lock); 1497 spin_unlock(&swap_lock);
1493 mutex_unlock(&swapon_mutex); 1498 mutex_unlock(&swapon_mutex);
1494 vfree(swap_map); 1499 vfree(swap_map);
1500 /* Destroy swap account informatin */
1501 swap_cgroup_swapoff(type);
1502
1495 inode = mapping->host; 1503 inode = mapping->host;
1496 if (S_ISBLK(inode->i_mode)) { 1504 if (S_ISBLK(inode->i_mode)) {
1497 struct block_device *bdev = I_BDEV(inode); 1505 struct block_device *bdev = I_BDEV(inode);
@@ -1809,6 +1817,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1809 } 1817 }
1810 swap_map[page_nr] = SWAP_MAP_BAD; 1818 swap_map[page_nr] = SWAP_MAP_BAD;
1811 } 1819 }
1820
1821 error = swap_cgroup_swapon(type, maxpages);
1822 if (error)
1823 goto bad_swap;
1824
1812 nr_good_pages = swap_header->info.last_page - 1825 nr_good_pages = swap_header->info.last_page -
1813 swap_header->info.nr_badpages - 1826 swap_header->info.nr_badpages -
1814 1 /* header page */; 1827 1 /* header page */;
@@ -1880,6 +1893,7 @@ bad_swap:
1880 bd_release(bdev); 1893 bd_release(bdev);
1881 } 1894 }
1882 destroy_swap_extents(p); 1895 destroy_swap_extents(p);
1896 swap_cgroup_swapoff(type);
1883bad_swap_2: 1897bad_swap_2:
1884 spin_lock(&swap_lock); 1898 spin_lock(&swap_lock);
1885 p->swap_file = NULL; 1899 p->swap_file = NULL;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b07c48b09a93..9a27c44aa327 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -125,11 +125,30 @@ static LIST_HEAD(shrinker_list);
125static DECLARE_RWSEM(shrinker_rwsem); 125static DECLARE_RWSEM(shrinker_rwsem);
126 126
127#ifdef CONFIG_CGROUP_MEM_RES_CTLR 127#ifdef CONFIG_CGROUP_MEM_RES_CTLR
128#define scan_global_lru(sc) (!(sc)->mem_cgroup) 128#define scanning_global_lru(sc) (!(sc)->mem_cgroup)
129#else 129#else
130#define scan_global_lru(sc) (1) 130#define scanning_global_lru(sc) (1)
131#endif 131#endif
132 132
133static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
134 struct scan_control *sc)
135{
136 if (!scanning_global_lru(sc))
137 return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
138
139 return &zone->reclaim_stat;
140}
141
142static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc,
143 enum lru_list lru)
144{
145 if (!scanning_global_lru(sc))
146 return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
147
148 return zone_page_state(zone, NR_LRU_BASE + lru);
149}
150
151
133/* 152/*
134 * Add a shrinker callback to be called from the vm 153 * Add a shrinker callback to be called from the vm
135 */ 154 */
@@ -512,7 +531,6 @@ redo:
512 lru = LRU_UNEVICTABLE; 531 lru = LRU_UNEVICTABLE;
513 add_page_to_unevictable_list(page); 532 add_page_to_unevictable_list(page);
514 } 533 }
515 mem_cgroup_move_lists(page, lru);
516 534
517 /* 535 /*
518 * page's status can change while we move it among lru. If an evictable 536 * page's status can change while we move it among lru. If an evictable
@@ -547,7 +565,6 @@ void putback_lru_page(struct page *page)
547 565
548 lru = !!TestClearPageActive(page) + page_is_file_cache(page); 566 lru = !!TestClearPageActive(page) + page_is_file_cache(page);
549 lru_cache_add_lru(page, lru); 567 lru_cache_add_lru(page, lru);
550 mem_cgroup_move_lists(page, lru);
551 put_page(page); 568 put_page(page);
552} 569}
553#endif /* CONFIG_UNEVICTABLE_LRU */ 570#endif /* CONFIG_UNEVICTABLE_LRU */
@@ -813,6 +830,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)
813 return ret; 830 return ret;
814 831
815 ret = -EBUSY; 832 ret = -EBUSY;
833
816 if (likely(get_page_unless_zero(page))) { 834 if (likely(get_page_unless_zero(page))) {
817 /* 835 /*
818 * Be careful not to clear PageLRU until after we're 836 * Be careful not to clear PageLRU until after we're
@@ -821,6 +839,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)
821 */ 839 */
822 ClearPageLRU(page); 840 ClearPageLRU(page);
823 ret = 0; 841 ret = 0;
842 mem_cgroup_del_lru(page);
824 } 843 }
825 844
826 return ret; 845 return ret;
@@ -1029,6 +1048,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1029 struct pagevec pvec; 1048 struct pagevec pvec;
1030 unsigned long nr_scanned = 0; 1049 unsigned long nr_scanned = 0;
1031 unsigned long nr_reclaimed = 0; 1050 unsigned long nr_reclaimed = 0;
1051 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1032 1052
1033 pagevec_init(&pvec, 1); 1053 pagevec_init(&pvec, 1);
1034 1054
@@ -1070,13 +1090,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1070 __mod_zone_page_state(zone, NR_INACTIVE_ANON, 1090 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
1071 -count[LRU_INACTIVE_ANON]); 1091 -count[LRU_INACTIVE_ANON]);
1072 1092
1073 if (scan_global_lru(sc)) { 1093 if (scanning_global_lru(sc))
1074 zone->pages_scanned += nr_scan; 1094 zone->pages_scanned += nr_scan;
1075 zone->recent_scanned[0] += count[LRU_INACTIVE_ANON]; 1095
1076 zone->recent_scanned[0] += count[LRU_ACTIVE_ANON]; 1096 reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
1077 zone->recent_scanned[1] += count[LRU_INACTIVE_FILE]; 1097 reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
1078 zone->recent_scanned[1] += count[LRU_ACTIVE_FILE]; 1098 reclaim_stat->recent_scanned[1] += count[LRU_INACTIVE_FILE];
1079 } 1099 reclaim_stat->recent_scanned[1] += count[LRU_ACTIVE_FILE];
1100
1080 spin_unlock_irq(&zone->lru_lock); 1101 spin_unlock_irq(&zone->lru_lock);
1081 1102
1082 nr_scanned += nr_scan; 1103 nr_scanned += nr_scan;
@@ -1108,7 +1129,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1108 if (current_is_kswapd()) { 1129 if (current_is_kswapd()) {
1109 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); 1130 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
1110 __count_vm_events(KSWAPD_STEAL, nr_freed); 1131 __count_vm_events(KSWAPD_STEAL, nr_freed);
1111 } else if (scan_global_lru(sc)) 1132 } else if (scanning_global_lru(sc))
1112 __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan); 1133 __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
1113 1134
1114 __count_zone_vm_events(PGSTEAL, zone, nr_freed); 1135 __count_zone_vm_events(PGSTEAL, zone, nr_freed);
@@ -1134,10 +1155,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1134 SetPageLRU(page); 1155 SetPageLRU(page);
1135 lru = page_lru(page); 1156 lru = page_lru(page);
1136 add_page_to_lru_list(zone, page, lru); 1157 add_page_to_lru_list(zone, page, lru);
1137 mem_cgroup_move_lists(page, lru); 1158 if (PageActive(page)) {
1138 if (PageActive(page) && scan_global_lru(sc)) {
1139 int file = !!page_is_file_cache(page); 1159 int file = !!page_is_file_cache(page);
1140 zone->recent_rotated[file]++; 1160 reclaim_stat->recent_rotated[file]++;
1141 } 1161 }
1142 if (!pagevec_add(&pvec, page)) { 1162 if (!pagevec_add(&pvec, page)) {
1143 spin_unlock_irq(&zone->lru_lock); 1163 spin_unlock_irq(&zone->lru_lock);
@@ -1197,6 +1217,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1197 struct page *page; 1217 struct page *page;
1198 struct pagevec pvec; 1218 struct pagevec pvec;
1199 enum lru_list lru; 1219 enum lru_list lru;
1220 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1200 1221
1201 lru_add_drain(); 1222 lru_add_drain();
1202 spin_lock_irq(&zone->lru_lock); 1223 spin_lock_irq(&zone->lru_lock);
@@ -1207,10 +1228,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1207 * zone->pages_scanned is used for detect zone's oom 1228 * zone->pages_scanned is used for detect zone's oom
1208 * mem_cgroup remembers nr_scan by itself. 1229 * mem_cgroup remembers nr_scan by itself.
1209 */ 1230 */
1210 if (scan_global_lru(sc)) { 1231 if (scanning_global_lru(sc)) {
1211 zone->pages_scanned += pgscanned; 1232 zone->pages_scanned += pgscanned;
1212 zone->recent_scanned[!!file] += pgmoved;
1213 } 1233 }
1234 reclaim_stat->recent_scanned[!!file] += pgmoved;
1214 1235
1215 if (file) 1236 if (file)
1216 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); 1237 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
@@ -1251,8 +1272,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1251 * This helps balance scan pressure between file and anonymous 1272 * This helps balance scan pressure between file and anonymous
1252 * pages in get_scan_ratio. 1273 * pages in get_scan_ratio.
1253 */ 1274 */
1254 if (scan_global_lru(sc)) 1275 reclaim_stat->recent_rotated[!!file] += pgmoved;
1255 zone->recent_rotated[!!file] += pgmoved;
1256 1276
1257 while (!list_empty(&l_inactive)) { 1277 while (!list_empty(&l_inactive)) {
1258 page = lru_to_page(&l_inactive); 1278 page = lru_to_page(&l_inactive);
@@ -1263,7 +1283,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1263 ClearPageActive(page); 1283 ClearPageActive(page);
1264 1284
1265 list_move(&page->lru, &zone->lru[lru].list); 1285 list_move(&page->lru, &zone->lru[lru].list);
1266 mem_cgroup_move_lists(page, lru); 1286 mem_cgroup_add_lru_list(page, lru);
1267 pgmoved++; 1287 pgmoved++;
1268 if (!pagevec_add(&pvec, page)) { 1288 if (!pagevec_add(&pvec, page)) {
1269 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1289 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1292,6 +1312,38 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1292 pagevec_release(&pvec); 1312 pagevec_release(&pvec);
1293} 1313}
1294 1314
1315static int inactive_anon_is_low_global(struct zone *zone)
1316{
1317 unsigned long active, inactive;
1318
1319 active = zone_page_state(zone, NR_ACTIVE_ANON);
1320 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1321
1322 if (inactive * zone->inactive_ratio < active)
1323 return 1;
1324
1325 return 0;
1326}
1327
1328/**
1329 * inactive_anon_is_low - check if anonymous pages need to be deactivated
1330 * @zone: zone to check
1331 * @sc: scan control of this context
1332 *
1333 * Returns true if the zone does not have enough inactive anon pages,
1334 * meaning some active anon pages need to be deactivated.
1335 */
1336static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1337{
1338 int low;
1339
1340 if (scanning_global_lru(sc))
1341 low = inactive_anon_is_low_global(zone);
1342 else
1343 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
1344 return low;
1345}
1346
1295static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, 1347static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1296 struct zone *zone, struct scan_control *sc, int priority) 1348 struct zone *zone, struct scan_control *sc, int priority)
1297{ 1349{
@@ -1302,8 +1354,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1302 return 0; 1354 return 0;
1303 } 1355 }
1304 1356
1305 if (lru == LRU_ACTIVE_ANON && 1357 if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) {
1306 (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
1307 shrink_active_list(nr_to_scan, zone, sc, priority, file); 1358 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1308 return 0; 1359 return 0;
1309 } 1360 }
@@ -1325,6 +1376,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1325 unsigned long anon, file, free; 1376 unsigned long anon, file, free;
1326 unsigned long anon_prio, file_prio; 1377 unsigned long anon_prio, file_prio;
1327 unsigned long ap, fp; 1378 unsigned long ap, fp;
1379 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1328 1380
1329 /* If we have no swap space, do not bother scanning anon pages. */ 1381 /* If we have no swap space, do not bother scanning anon pages. */
1330 if (nr_swap_pages <= 0) { 1382 if (nr_swap_pages <= 0) {
@@ -1333,17 +1385,20 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1333 return; 1385 return;
1334 } 1386 }
1335 1387
1336 anon = zone_page_state(zone, NR_ACTIVE_ANON) + 1388 anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) +
1337 zone_page_state(zone, NR_INACTIVE_ANON); 1389 zone_nr_pages(zone, sc, LRU_INACTIVE_ANON);
1338 file = zone_page_state(zone, NR_ACTIVE_FILE) + 1390 file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) +
1339 zone_page_state(zone, NR_INACTIVE_FILE); 1391 zone_nr_pages(zone, sc, LRU_INACTIVE_FILE);
1340 free = zone_page_state(zone, NR_FREE_PAGES); 1392
1341 1393 if (scanning_global_lru(sc)) {
1342 /* If we have very few page cache pages, force-scan anon pages. */ 1394 free = zone_page_state(zone, NR_FREE_PAGES);
1343 if (unlikely(file + free <= zone->pages_high)) { 1395 /* If we have very few page cache pages,
1344 percent[0] = 100; 1396 force-scan anon pages. */
1345 percent[1] = 0; 1397 if (unlikely(file + free <= zone->pages_high)) {
1346 return; 1398 percent[0] = 100;
1399 percent[1] = 0;
1400 return;
1401 }
1347 } 1402 }
1348 1403
1349 /* 1404 /*
@@ -1357,17 +1412,17 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1357 * 1412 *
1358 * anon in [0], file in [1] 1413 * anon in [0], file in [1]
1359 */ 1414 */
1360 if (unlikely(zone->recent_scanned[0] > anon / 4)) { 1415 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1361 spin_lock_irq(&zone->lru_lock); 1416 spin_lock_irq(&zone->lru_lock);
1362 zone->recent_scanned[0] /= 2; 1417 reclaim_stat->recent_scanned[0] /= 2;
1363 zone->recent_rotated[0] /= 2; 1418 reclaim_stat->recent_rotated[0] /= 2;
1364 spin_unlock_irq(&zone->lru_lock); 1419 spin_unlock_irq(&zone->lru_lock);
1365 } 1420 }
1366 1421
1367 if (unlikely(zone->recent_scanned[1] > file / 4)) { 1422 if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
1368 spin_lock_irq(&zone->lru_lock); 1423 spin_lock_irq(&zone->lru_lock);
1369 zone->recent_scanned[1] /= 2; 1424 reclaim_stat->recent_scanned[1] /= 2;
1370 zone->recent_rotated[1] /= 2; 1425 reclaim_stat->recent_rotated[1] /= 2;
1371 spin_unlock_irq(&zone->lru_lock); 1426 spin_unlock_irq(&zone->lru_lock);
1372 } 1427 }
1373 1428
@@ -1383,11 +1438,11 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1383 * proportional to the fraction of recently scanned pages on 1438 * proportional to the fraction of recently scanned pages on
1384 * each list that were recently referenced and in active use. 1439 * each list that were recently referenced and in active use.
1385 */ 1440 */
1386 ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); 1441 ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
1387 ap /= zone->recent_rotated[0] + 1; 1442 ap /= reclaim_stat->recent_rotated[0] + 1;
1388 1443
1389 fp = (file_prio + 1) * (zone->recent_scanned[1] + 1); 1444 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
1390 fp /= zone->recent_rotated[1] + 1; 1445 fp /= reclaim_stat->recent_rotated[1] + 1;
1391 1446
1392 /* Normalize to percentages */ 1447 /* Normalize to percentages */
1393 percent[0] = 100 * ap / (ap + fp + 1); 1448 percent[0] = 100 * ap / (ap + fp + 1);
@@ -1411,30 +1466,23 @@ static void shrink_zone(int priority, struct zone *zone,
1411 get_scan_ratio(zone, sc, percent); 1466 get_scan_ratio(zone, sc, percent);
1412 1467
1413 for_each_evictable_lru(l) { 1468 for_each_evictable_lru(l) {
1414 if (scan_global_lru(sc)) { 1469 int file = is_file_lru(l);
1415 int file = is_file_lru(l); 1470 int scan;
1416 int scan; 1471
1417 1472 scan = zone_page_state(zone, NR_LRU_BASE + l);
1418 scan = zone_page_state(zone, NR_LRU_BASE + l); 1473 if (priority) {
1419 if (priority) { 1474 scan >>= priority;
1420 scan >>= priority; 1475 scan = (scan * percent[file]) / 100;
1421 scan = (scan * percent[file]) / 100; 1476 }
1422 } 1477 if (scanning_global_lru(sc)) {
1423 zone->lru[l].nr_scan += scan; 1478 zone->lru[l].nr_scan += scan;
1424 nr[l] = zone->lru[l].nr_scan; 1479 nr[l] = zone->lru[l].nr_scan;
1425 if (nr[l] >= swap_cluster_max) 1480 if (nr[l] >= swap_cluster_max)
1426 zone->lru[l].nr_scan = 0; 1481 zone->lru[l].nr_scan = 0;
1427 else 1482 else
1428 nr[l] = 0; 1483 nr[l] = 0;
1429 } else { 1484 } else
1430 /* 1485 nr[l] = scan;
1431 * This reclaim occurs not because zone memory shortage
1432 * but because memory controller hits its limit.
1433 * Don't modify zone reclaim related data.
1434 */
1435 nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
1436 priority, l);
1437 }
1438 } 1486 }
1439 1487
1440 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1488 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -1467,9 +1515,7 @@ static void shrink_zone(int priority, struct zone *zone,
1467 * Even if we did not try to evict anon pages at all, we want to 1515 * Even if we did not try to evict anon pages at all, we want to
1468 * rebalance the anon lru active/inactive ratio. 1516 * rebalance the anon lru active/inactive ratio.
1469 */ 1517 */
1470 if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) 1518 if (inactive_anon_is_low(zone, sc))
1471 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1472 else if (!scan_global_lru(sc))
1473 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); 1519 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1474 1520
1475 throttle_vm_writeout(sc->gfp_mask); 1521 throttle_vm_writeout(sc->gfp_mask);
@@ -1504,7 +1550,7 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
1504 * Take care memory controller reclaiming has small influence 1550 * Take care memory controller reclaiming has small influence
1505 * to global LRU. 1551 * to global LRU.
1506 */ 1552 */
1507 if (scan_global_lru(sc)) { 1553 if (scanning_global_lru(sc)) {
1508 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1554 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1509 continue; 1555 continue;
1510 note_zone_scanning_priority(zone, priority); 1556 note_zone_scanning_priority(zone, priority);
@@ -1557,12 +1603,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1557 1603
1558 delayacct_freepages_start(); 1604 delayacct_freepages_start();
1559 1605
1560 if (scan_global_lru(sc)) 1606 if (scanning_global_lru(sc))
1561 count_vm_event(ALLOCSTALL); 1607 count_vm_event(ALLOCSTALL);
1562 /* 1608 /*
1563 * mem_cgroup will not do shrink_slab. 1609 * mem_cgroup will not do shrink_slab.
1564 */ 1610 */
1565 if (scan_global_lru(sc)) { 1611 if (scanning_global_lru(sc)) {
1566 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1612 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1567 1613
1568 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1614 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1581,7 +1627,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1581 * Don't shrink slabs when reclaiming memory from 1627 * Don't shrink slabs when reclaiming memory from
1582 * over limit cgroups 1628 * over limit cgroups
1583 */ 1629 */
1584 if (scan_global_lru(sc)) { 1630 if (scanning_global_lru(sc)) {
1585 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); 1631 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
1586 if (reclaim_state) { 1632 if (reclaim_state) {
1587 sc->nr_reclaimed += reclaim_state->reclaimed_slab; 1633 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
@@ -1612,7 +1658,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1612 congestion_wait(WRITE, HZ/10); 1658 congestion_wait(WRITE, HZ/10);
1613 } 1659 }
1614 /* top priority shrink_zones still had more to do? don't OOM, then */ 1660 /* top priority shrink_zones still had more to do? don't OOM, then */
1615 if (!sc->all_unreclaimable && scan_global_lru(sc)) 1661 if (!sc->all_unreclaimable && scanning_global_lru(sc))
1616 ret = sc->nr_reclaimed; 1662 ret = sc->nr_reclaimed;
1617out: 1663out:
1618 /* 1664 /*
@@ -1625,7 +1671,7 @@ out:
1625 if (priority < 0) 1671 if (priority < 0)
1626 priority = 0; 1672 priority = 0;
1627 1673
1628 if (scan_global_lru(sc)) { 1674 if (scanning_global_lru(sc)) {
1629 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1675 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1630 1676
1631 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1677 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1661,19 +1707,24 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1661#ifdef CONFIG_CGROUP_MEM_RES_CTLR 1707#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1662 1708
1663unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, 1709unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1664 gfp_t gfp_mask) 1710 gfp_t gfp_mask,
1711 bool noswap,
1712 unsigned int swappiness)
1665{ 1713{
1666 struct scan_control sc = { 1714 struct scan_control sc = {
1667 .may_writepage = !laptop_mode, 1715 .may_writepage = !laptop_mode,
1668 .may_swap = 1, 1716 .may_swap = 1,
1669 .swap_cluster_max = SWAP_CLUSTER_MAX, 1717 .swap_cluster_max = SWAP_CLUSTER_MAX,
1670 .swappiness = vm_swappiness, 1718 .swappiness = swappiness,
1671 .order = 0, 1719 .order = 0,
1672 .mem_cgroup = mem_cont, 1720 .mem_cgroup = mem_cont,
1673 .isolate_pages = mem_cgroup_isolate_pages, 1721 .isolate_pages = mem_cgroup_isolate_pages,
1674 }; 1722 };
1675 struct zonelist *zonelist; 1723 struct zonelist *zonelist;
1676 1724
1725 if (noswap)
1726 sc.may_swap = 0;
1727
1677 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 1728 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1678 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 1729 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1679 zonelist = NODE_DATA(numa_node_id())->node_zonelists; 1730 zonelist = NODE_DATA(numa_node_id())->node_zonelists;
@@ -1761,7 +1812,7 @@ loop_again:
1761 * Do some background aging of the anon list, to give 1812 * Do some background aging of the anon list, to give
1762 * pages a chance to be referenced before reclaiming. 1813 * pages a chance to be referenced before reclaiming.
1763 */ 1814 */
1764 if (inactive_anon_is_low(zone)) 1815 if (inactive_anon_is_low(zone, &sc))
1765 shrink_active_list(SWAP_CLUSTER_MAX, zone, 1816 shrink_active_list(SWAP_CLUSTER_MAX, zone,
1766 &sc, priority, 0); 1817 &sc, priority, 0);
1767 1818
@@ -2404,6 +2455,7 @@ retry:
2404 2455
2405 __dec_zone_state(zone, NR_UNEVICTABLE); 2456 __dec_zone_state(zone, NR_UNEVICTABLE);
2406 list_move(&page->lru, &zone->lru[l].list); 2457 list_move(&page->lru, &zone->lru[l].list);
2458 mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
2407 __inc_zone_state(zone, NR_INACTIVE_ANON + l); 2459 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
2408 __count_vm_event(UNEVICTABLE_PGRESCUED); 2460 __count_vm_event(UNEVICTABLE_PGRESCUED);
2409 } else { 2461 } else {
@@ -2412,6 +2464,7 @@ retry:
2412 */ 2464 */
2413 SetPageUnevictable(page); 2465 SetPageUnevictable(page);
2414 list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); 2466 list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
2467 mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
2415 if (page_evictable(page, NULL)) 2468 if (page_evictable(page, NULL))
2416 goto retry; 2469 goto retry;
2417 } 2470 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 76f06b94ab9f..c4a59824ac2c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2752,7 +2752,7 @@ int __init ip6_route_init(void)
2752 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2752 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2753 SLAB_HWCACHE_ALIGN, NULL); 2753 SLAB_HWCACHE_ALIGN, NULL);
2754 if (!ip6_dst_ops_template.kmem_cachep) 2754 if (!ip6_dst_ops_template.kmem_cachep)
2755 goto out;; 2755 goto out;
2756 2756
2757 ret = register_pernet_subsys(&ip6_route_net_ops); 2757 ret = register_pernet_subsys(&ip6_route_net_ops);
2758 if (ret) 2758 if (ret)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 9048fe7e7ea7..a031034720b4 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -128,7 +128,7 @@ static struct ctl_table_header *ip6_header;
128 128
129int ipv6_sysctl_register(void) 129int ipv6_sysctl_register(void)
130{ 130{
131 int err = -ENOMEM;; 131 int err = -ENOMEM;
132 132
133 ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table); 133 ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table);
134 if (ip6_header == NULL) 134 if (ip6_header == NULL)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index f3965df00559..33133d27b539 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -435,7 +435,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
435 int i; 435 int i;
436 436
437 q->perturb_timer.function = sfq_perturbation; 437 q->perturb_timer.function = sfq_perturbation;
438 q->perturb_timer.data = (unsigned long)sch;; 438 q->perturb_timer.data = (unsigned long)sch;
439 init_timer_deferrable(&q->perturb_timer); 439 init_timer_deferrable(&q->perturb_timer);
440 440
441 for (i = 0; i < SFQ_HASH_DIVISOR; i++) 441 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 20c576f530fa..56935bbc1496 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -489,7 +489,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
489 return 0; 489 return 0;
490 490
491out_err: 491out_err:
492 /* Clean up any successfull allocations */ 492 /* Clean up any successful allocations */
493 sctp_auth_destroy_hmacs(ep->auth_hmacs); 493 sctp_auth_destroy_hmacs(ep->auth_hmacs);
494 return -ENOMEM; 494 return -ENOMEM;
495} 495}
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 5ba78701adc3..3aacd0fe7179 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -513,11 +513,14 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
513 struct dev_cgroup *dev_cgroup; 513 struct dev_cgroup *dev_cgroup;
514 struct dev_whitelist_item *wh; 514 struct dev_whitelist_item *wh;
515 515
516 if (!S_ISBLK(mode) && !S_ISCHR(mode))
517 return 0;
518
516 rcu_read_lock(); 519 rcu_read_lock();
517 520
518 dev_cgroup = task_devcgroup(current); 521 dev_cgroup = task_devcgroup(current);
519 522
520 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 523 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
521 if (wh->type & DEV_ALL) 524 if (wh->type & DEV_ALL)
522 goto acc_check; 525 goto acc_check;
523 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 526 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode))
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index bf107a389ac1..71e2b914363e 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -569,7 +569,7 @@ static ssize_t smk_write_cipso(struct file *file, const char __user *buf,
569 if (skp == NULL) 569 if (skp == NULL)
570 goto out; 570 goto out;
571 571
572 rule += SMK_LABELLEN;; 572 rule += SMK_LABELLEN;
573 ret = sscanf(rule, "%d", &maplevel); 573 ret = sscanf(rule, "%d", &maplevel);
574 if (ret != 1 || maplevel > SMACK_CIPSO_MAXLEVEL) 574 if (ret != 1 || maplevel > SMACK_CIPSO_MAXLEVEL)
575 goto out; 575 goto out;
diff --git a/sound/soc/au1x/dbdma2.c b/sound/soc/au1x/dbdma2.c
index 74c823d60f91..bc8d654576c0 100644
--- a/sound/soc/au1x/dbdma2.c
+++ b/sound/soc/au1x/dbdma2.c
@@ -187,7 +187,7 @@ static int au1x_pcm_dbdma_realloc(struct au1xpsc_audio_dmadata *pcd,
187 au1x_pcm_dmatx_cb, (void *)pcd); 187 au1x_pcm_dmatx_cb, (void *)pcd);
188 188
189 if (!pcd->ddma_chan) 189 if (!pcd->ddma_chan)
190 return -ENOMEM;; 190 return -ENOMEM;
191 191
192 au1xxx_dbdma_set_devwidth(pcd->ddma_chan, msbits); 192 au1xxx_dbdma_set_devwidth(pcd->ddma_chan, msbits);
193 au1xxx_dbdma_ring_alloc(pcd->ddma_chan, 2); 193 au1xxx_dbdma_ring_alloc(pcd->ddma_chan, 2);
diff --git a/sound/soc/davinci/davinci-pcm.c b/sound/soc/davinci/davinci-pcm.c
index 74abc9b4f1cc..366049d8578c 100644
--- a/sound/soc/davinci/davinci-pcm.c
+++ b/sound/soc/davinci/davinci-pcm.c
@@ -212,7 +212,7 @@ davinci_pcm_pointer(struct snd_pcm_substream *substream)
212 if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) 212 if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
213 count = src - runtime->dma_addr; 213 count = src - runtime->dma_addr;
214 else 214 else
215 count = dst - runtime->dma_addr;; 215 count = dst - runtime->dma_addr;
216 216
217 spin_unlock(&prtd->lock); 217 spin_unlock(&prtd->lock);
218 218