aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Garzik <jeff@garzik.org>2006-09-26 13:13:19 -0400
committerJeff Garzik <jeff@garzik.org>2006-09-26 13:13:19 -0400
commitc226951b93f7cd7c3a10b17384535b617bd43fd0 (patch)
tree07b8796a5c99fbbf587b8d0dbcbc173cfe5e381e
parentb0df3bd1e553e901ec7297267611a5db88240b38 (diff)
parente8216dee838c09776680a6f1a2e54d81f3cdfa14 (diff)
Merge branch 'master' into upstream
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Documentation/networking/dccp.txt8
-rw-r--r--Documentation/power/interface.txt15
-rw-r--r--Documentation/sysctl/vm.txt27
-rw-r--r--MAINTAINERS24
-rw-r--r--arch/alpha/Kconfig2
-rw-r--r--arch/alpha/mm/init.c2
-rw-r--r--arch/arm/mm/ioremap.c2
-rw-r--r--arch/avr32/Kconfig196
-rw-r--r--arch/avr32/Kconfig.debug19
-rw-r--r--arch/avr32/Makefile84
-rw-r--r--arch/avr32/boards/atstk1000/Makefile2
-rw-r--r--arch/avr32/boards/atstk1000/atstk1002.c37
-rw-r--r--arch/avr32/boards/atstk1000/flash.c95
-rw-r--r--arch/avr32/boards/atstk1000/setup.c59
-rw-r--r--arch/avr32/boards/atstk1000/spi.c27
-rw-r--r--arch/avr32/boot/images/Makefile62
-rw-r--r--arch/avr32/boot/u-boot/Makefile3
-rw-r--r--arch/avr32/boot/u-boot/empty.S1
-rw-r--r--arch/avr32/boot/u-boot/head.S60
-rw-r--r--arch/avr32/configs/atstk1002_defconfig754
-rw-r--r--arch/avr32/kernel/Makefile18
-rw-r--r--arch/avr32/kernel/asm-offsets.c25
-rw-r--r--arch/avr32/kernel/avr32_ksyms.c55
-rw-r--r--arch/avr32/kernel/cpu.c327
-rw-r--r--arch/avr32/kernel/entry-avr32b.S678
-rw-r--r--arch/avr32/kernel/head.S45
-rw-r--r--arch/avr32/kernel/init_task.c38
-rw-r--r--arch/avr32/kernel/irq.c71
-rw-r--r--arch/avr32/kernel/kprobes.c270
-rw-r--r--arch/avr32/kernel/module.c324
-rw-r--r--arch/avr32/kernel/process.c276
-rw-r--r--arch/avr32/kernel/ptrace.c371
-rw-r--r--arch/avr32/kernel/semaphore.c148
-rw-r--r--arch/avr32/kernel/setup.c335
-rw-r--r--arch/avr32/kernel/signal.c328
-rw-r--r--arch/avr32/kernel/switch_to.S35
-rw-r--r--arch/avr32/kernel/sys_avr32.c51
-rw-r--r--arch/avr32/kernel/syscall-stubs.S102
-rw-r--r--arch/avr32/kernel/syscall_table.S289
-rw-r--r--arch/avr32/kernel/time.c238
-rw-r--r--arch/avr32/kernel/traps.c425
-rw-r--r--arch/avr32/kernel/vmlinux.lds.c139
-rw-r--r--arch/avr32/lib/Makefile10
-rw-r--r--arch/avr32/lib/__avr32_asr64.S31
-rw-r--r--arch/avr32/lib/__avr32_lsl64.S31
-rw-r--r--arch/avr32/lib/__avr32_lsr64.S31
-rw-r--r--arch/avr32/lib/clear_user.S76
-rw-r--r--arch/avr32/lib/copy_user.S119
-rw-r--r--arch/avr32/lib/csum_partial.S47
-rw-r--r--arch/avr32/lib/csum_partial_copy_generic.S99
-rw-r--r--arch/avr32/lib/delay.c55
-rw-r--r--arch/avr32/lib/findbit.S154
-rw-r--r--arch/avr32/lib/io-readsl.S24
-rw-r--r--arch/avr32/lib/io-readsw.S43
-rw-r--r--arch/avr32/lib/io-writesl.S20
-rw-r--r--arch/avr32/lib/io-writesw.S38
-rw-r--r--arch/avr32/lib/libgcc.h33
-rw-r--r--arch/avr32/lib/longlong.h98
-rw-r--r--arch/avr32/lib/memcpy.S62
-rw-r--r--arch/avr32/lib/memset.S72
-rw-r--r--arch/avr32/lib/strncpy_from_user.S60
-rw-r--r--arch/avr32/lib/strnlen_user.S67
-rw-r--r--arch/avr32/mach-at32ap/Makefile2
-rw-r--r--arch/avr32/mach-at32ap/at32ap.c90
-rw-r--r--arch/avr32/mach-at32ap/at32ap7000.c876
-rw-r--r--arch/avr32/mach-at32ap/clock.c148
-rw-r--r--arch/avr32/mach-at32ap/clock.h30
-rw-r--r--arch/avr32/mach-at32ap/extint.c171
-rw-r--r--arch/avr32/mach-at32ap/hsmc.c164
-rw-r--r--arch/avr32/mach-at32ap/hsmc.h127
-rw-r--r--arch/avr32/mach-at32ap/intc.c133
-rw-r--r--arch/avr32/mach-at32ap/intc.h327
-rw-r--r--arch/avr32/mach-at32ap/pio.c118
-rw-r--r--arch/avr32/mach-at32ap/pio.h178
-rw-r--r--arch/avr32/mach-at32ap/sm.c289
-rw-r--r--arch/avr32/mach-at32ap/sm.h240
-rw-r--r--arch/avr32/mm/Makefile6
-rw-r--r--arch/avr32/mm/cache.c150
-rw-r--r--arch/avr32/mm/clear_page.S25
-rw-r--r--arch/avr32/mm/copy_page.S28
-rw-r--r--arch/avr32/mm/dma-coherent.c139
-rw-r--r--arch/avr32/mm/fault.c315
-rw-r--r--arch/avr32/mm/init.c480
-rw-r--r--arch/avr32/mm/ioremap.c197
-rw-r--r--arch/avr32/mm/tlb.c378
-rw-r--r--arch/frv/Kconfig12
-rw-r--r--arch/frv/kernel/Makefile5
-rw-r--r--arch/frv/kernel/irq-mb93091.c157
-rw-r--r--arch/frv/kernel/irq-mb93093.c115
-rw-r--r--arch/frv/kernel/irq-mb93493.c160
-rw-r--r--arch/frv/kernel/irq-routing.c291
-rw-r--r--arch/frv/kernel/irq.c741
-rw-r--r--arch/frv/kernel/setup.c1
-rw-r--r--arch/frv/kernel/time.c1
-rw-r--r--arch/frv/mb93090-mb00/pci-irq.c1
-rw-r--r--arch/frv/mm/init.c2
-rw-r--r--arch/h8300/mm/init.c2
-rw-r--r--arch/i386/Kconfig4
-rw-r--r--arch/i386/kernel/apm.c26
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c4
-rw-r--r--arch/i386/kernel/efi_stub.S1
-rw-r--r--arch/i386/kernel/reboot.c12
-rw-r--r--arch/i386/kernel/setup.c23
-rw-r--r--arch/i386/kernel/smp.c66
-rw-r--r--arch/i386/kernel/smpboot.c6
-rw-r--r--arch/i386/kernel/srat.c5
-rw-r--r--arch/i386/kernel/time.c50
-rw-r--r--arch/i386/kernel/time_hpet.c37
-rw-r--r--arch/i386/kernel/traps.c11
-rw-r--r--arch/i386/kernel/vmlinux.lds.S12
-rw-r--r--arch/i386/mach-voyager/voyager_thread.c1
-rw-r--r--arch/i386/mm/boot_ioremap.c7
-rw-r--r--arch/i386/mm/discontig.c33
-rw-r--r--arch/i386/mm/init.c44
-rw-r--r--arch/i386/mm/pgtable.c30
-rw-r--r--arch/i386/power/swsusp.S2
-rw-r--r--arch/ia64/Kconfig9
-rw-r--r--arch/ia64/kernel/acpi.c13
-rw-r--r--arch/ia64/kernel/numa.c34
-rw-r--r--arch/ia64/kernel/perfmon.c1
-rw-r--r--arch/ia64/kernel/topology.c6
-rw-r--r--arch/ia64/kernel/uncached.c2
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c3
-rw-r--r--arch/m32r/mm/init.c2
-rw-r--r--arch/m68knommu/mm/init.c2
-rw-r--r--arch/mips/au1000/common/dbdma.c10
-rw-r--r--arch/mips/mm/init.c4
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c3
-rw-r--r--arch/parisc/mm/init.c4
-rw-r--r--arch/powerpc/kernel/swsusp_32.S4
-rw-r--r--arch/s390/appldata/appldata_mem.c3
-rw-r--r--arch/s390/mm/cmm.c205
-rw-r--r--arch/sh/mm/cache-sh7705.c2
-rw-r--r--arch/sh64/mm/init.c2
-rw-r--r--arch/sparc/mm/srmmu.c2
-rw-r--r--arch/sparc/mm/sun4c.c2
-rw-r--r--arch/sparc64/solaris/misc.c20
-rw-r--r--arch/sparc64/solaris/socksys.c6
-rw-r--r--arch/um/drivers/chan_kern.c2
-rw-r--r--arch/um/drivers/mconsole_kern.c2
-rw-r--r--arch/um/drivers/mconsole_user.c7
-rw-r--r--arch/um/drivers/net_kern.c14
-rw-r--r--arch/um/drivers/pcap_kern.c2
-rw-r--r--arch/um/include/kern_util.h1
-rw-r--r--arch/um/include/longjmp.h5
-rw-r--r--arch/um/include/net_user.h1
-rw-r--r--arch/um/include/os.h5
-rw-r--r--arch/um/include/registers.h3
-rw-r--r--arch/um/include/sysdep-i386/archsetjmp.h19
-rw-r--r--arch/um/include/sysdep-i386/signal.h27
-rw-r--r--arch/um/include/sysdep-x86_64/archsetjmp.h21
-rw-r--r--arch/um/include/sysdep-x86_64/signal.h29
-rw-r--r--arch/um/kernel/exec.c4
-rw-r--r--arch/um/kernel/irq.c34
-rw-r--r--arch/um/kernel/mem.c7
-rw-r--r--arch/um/kernel/process_kern.c3
-rw-r--r--arch/um/kernel/reboot.c13
-rw-r--r--arch/um/kernel/skas/mmu.c2
-rw-r--r--arch/um/kernel/time.c12
-rw-r--r--arch/um/kernel/tlb.c370
-rw-r--r--arch/um/kernel/trap.c11
-rw-r--r--arch/um/os-Linux/helper.c22
-rw-r--r--arch/um/os-Linux/irq.c2
-rw-r--r--arch/um/os-Linux/main.c34
-rw-r--r--arch/um/os-Linux/mem.c6
-rw-r--r--arch/um/os-Linux/process.c13
-rw-r--r--arch/um/os-Linux/sigio.c103
-rw-r--r--arch/um/os-Linux/signal.c38
-rw-r--r--arch/um/os-Linux/skas/process.c26
-rw-r--r--arch/um/os-Linux/start_up.c1
-rw-r--r--arch/um/os-Linux/sys-i386/Makefile2
-rw-r--r--arch/um/os-Linux/sys-i386/registers.c17
-rw-r--r--arch/um/os-Linux/sys-i386/signal.c15
-rw-r--r--arch/um/os-Linux/sys-x86_64/Makefile2
-rw-r--r--arch/um/os-Linux/sys-x86_64/registers.c17
-rw-r--r--arch/um/os-Linux/sys-x86_64/signal.c16
-rw-r--r--arch/um/os-Linux/time.c22
-rw-r--r--arch/um/os-Linux/trap.c1
-rw-r--r--arch/um/os-Linux/uaccess.c3
-rw-r--r--arch/um/os-Linux/util.c5
-rw-r--r--arch/um/sys-i386/Makefile2
-rw-r--r--arch/um/sys-i386/bugs.c9
-rw-r--r--arch/um/sys-i386/ldt.c3
-rw-r--r--arch/um/sys-i386/ptrace_user.c5
-rw-r--r--arch/um/sys-i386/setjmp.S58
-rw-r--r--arch/um/sys-x86_64/Makefile4
-rw-r--r--arch/um/sys-x86_64/setjmp.S54
-rw-r--r--arch/x86_64/Kconfig4
-rw-r--r--arch/x86_64/kernel/e820.c48
-rw-r--r--arch/x86_64/kernel/setup.c1
-rw-r--r--arch/x86_64/kernel/smpboot.c3
-rw-r--r--arch/x86_64/kernel/suspend_asm.S2
-rw-r--r--arch/x86_64/kernel/time.c37
-rw-r--r--arch/x86_64/mm/fault.c6
-rw-r--r--arch/x86_64/mm/init.c2
-rw-r--r--drivers/ata/ata_piix.c2
-rw-r--r--drivers/ata/sata_nv.c6
-rw-r--r--drivers/ata/sata_sis.c6
-rw-r--r--drivers/ata/sata_uli.c6
-rw-r--r--drivers/ata/sata_via.c7
-rw-r--r--drivers/atm/he.c16
-rw-r--r--drivers/base/node.c13
-rw-r--r--drivers/char/rtc.c5
-rw-r--r--drivers/ide/mips/au1xxx-ide.c4
-rw-r--r--drivers/media/video/videodev.c2
-rw-r--r--drivers/mmc/au1xmmc.c2
-rw-r--r--drivers/net/sunlance.c10
-rw-r--r--drivers/serial/serial_core.c14
-rw-r--r--drivers/video/fbsysfs.c12
-rw-r--r--fs/autofs4/expire.c6
-rw-r--r--fs/binfmt_elf.c10
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/jbd/commit.c182
-rw-r--r--fs/proc/proc_misc.c11
-rw-r--r--include/asm-alpha/mmzone.h1
-rw-r--r--include/asm-alpha/pgtable.h9
-rw-r--r--include/asm-arm/pgtable.h8
-rw-r--r--include/asm-arm26/pgtable.h8
-rw-r--r--include/asm-avr32/Kbuild3
-rw-r--r--include/asm-avr32/a.out.h26
-rw-r--r--include/asm-avr32/addrspace.h43
-rw-r--r--include/asm-avr32/arch-at32ap/at91rm9200_pdc.h36
-rw-r--r--include/asm-avr32/arch-at32ap/at91rm9200_usart.h123
-rw-r--r--include/asm-avr32/arch-at32ap/board.h35
-rw-r--r--include/asm-avr32/arch-at32ap/init.h21
-rw-r--r--include/asm-avr32/arch-at32ap/portmux.h16
-rw-r--r--include/asm-avr32/arch-at32ap/sm.h27
-rw-r--r--include/asm-avr32/arch-at32ap/smc.h60
-rw-r--r--include/asm-avr32/asm.h102
-rw-r--r--include/asm-avr32/atomic.h201
-rw-r--r--include/asm-avr32/auxvec.h4
-rw-r--r--include/asm-avr32/bitops.h296
-rw-r--r--include/asm-avr32/bug.h47
-rw-r--r--include/asm-avr32/bugs.h15
-rw-r--r--include/asm-avr32/byteorder.h25
-rw-r--r--include/asm-avr32/cache.h29
-rw-r--r--include/asm-avr32/cachectl.h11
-rw-r--r--include/asm-avr32/cacheflush.h129
-rw-r--r--include/asm-avr32/checksum.h156
-rw-r--r--include/asm-avr32/cputime.h6
-rw-r--r--include/asm-avr32/current.h15
-rw-r--r--include/asm-avr32/delay.h26
-rw-r--r--include/asm-avr32/div64.h6
-rw-r--r--include/asm-avr32/dma-mapping.h320
-rw-r--r--include/asm-avr32/dma.h8
-rw-r--r--include/asm-avr32/elf.h110
-rw-r--r--include/asm-avr32/emergency-restart.h6
-rw-r--r--include/asm-avr32/errno.h6
-rw-r--r--include/asm-avr32/fcntl.h6
-rw-r--r--include/asm-avr32/futex.h6
-rw-r--r--include/asm-avr32/hardirq.h34
-rw-r--r--include/asm-avr32/hw_irq.h9
-rw-r--r--include/asm-avr32/intc.h128
-rw-r--r--include/asm-avr32/io.h253
-rw-r--r--include/asm-avr32/ioctl.h6
-rw-r--r--include/asm-avr32/ioctls.h83
-rw-r--r--include/asm-avr32/ipcbuf.h29
-rw-r--r--include/asm-avr32/irq.h10
-rw-r--r--include/asm-avr32/irqflags.h68
-rw-r--r--include/asm-avr32/kdebug.h38
-rw-r--r--include/asm-avr32/kmap_types.h30
-rw-r--r--include/asm-avr32/kprobes.h34
-rw-r--r--include/asm-avr32/linkage.h7
-rw-r--r--include/asm-avr32/local.h6
-rw-r--r--include/asm-avr32/mach/serial_at91.h33
-rw-r--r--include/asm-avr32/mman.h17
-rw-r--r--include/asm-avr32/mmu.h10
-rw-r--r--include/asm-avr32/mmu_context.h148
-rw-r--r--include/asm-avr32/module.h28
-rw-r--r--include/asm-avr32/msgbuf.h31
-rw-r--r--include/asm-avr32/mutex.h9
-rw-r--r--include/asm-avr32/namei.h7
-rw-r--r--include/asm-avr32/numnodes.h7
-rw-r--r--include/asm-avr32/ocd.h78
-rw-r--r--include/asm-avr32/page.h112
-rw-r--r--include/asm-avr32/param.h23
-rw-r--r--include/asm-avr32/pci.h8
-rw-r--r--include/asm-avr32/percpu.h6
-rw-r--r--include/asm-avr32/pgalloc.h96
-rw-r--r--include/asm-avr32/pgtable-2level.h47
-rw-r--r--include/asm-avr32/pgtable.h408
-rw-r--r--include/asm-avr32/poll.h27
-rw-r--r--include/asm-avr32/posix_types.h129
-rw-r--r--include/asm-avr32/processor.h147
-rw-r--r--include/asm-avr32/ptrace.h154
-rw-r--r--include/asm-avr32/resource.h6
-rw-r--r--include/asm-avr32/scatterlist.h21
-rw-r--r--include/asm-avr32/sections.h6
-rw-r--r--include/asm-avr32/semaphore.h109
-rw-r--r--include/asm-avr32/sembuf.h25
-rw-r--r--include/asm-avr32/setup.h141
-rw-r--r--include/asm-avr32/shmbuf.h42
-rw-r--r--include/asm-avr32/shmparam.h6
-rw-r--r--include/asm-avr32/sigcontext.h34
-rw-r--r--include/asm-avr32/siginfo.h6
-rw-r--r--include/asm-avr32/signal.h168
-rw-r--r--include/asm-avr32/socket.h53
-rw-r--r--include/asm-avr32/sockios.h12
-rw-r--r--include/asm-avr32/stat.h79
-rw-r--r--include/asm-avr32/statfs.h6
-rw-r--r--include/asm-avr32/string.h17
-rw-r--r--include/asm-avr32/sysreg.h332
-rw-r--r--include/asm-avr32/system.h155
-rw-r--r--include/asm-avr32/termbits.h173
-rw-r--r--include/asm-avr32/termios.h80
-rw-r--r--include/asm-avr32/thread_info.h106
-rw-r--r--include/asm-avr32/timex.h40
-rw-r--r--include/asm-avr32/tlb.h32
-rw-r--r--include/asm-avr32/tlbflush.h40
-rw-r--r--include/asm-avr32/topology.h6
-rw-r--r--include/asm-avr32/traps.h23
-rw-r--r--include/asm-avr32/types.h70
-rw-r--r--include/asm-avr32/uaccess.h335
-rw-r--r--include/asm-avr32/ucontext.h12
-rw-r--r--include/asm-avr32/unaligned.h25
-rw-r--r--include/asm-avr32/unistd.h387
-rw-r--r--include/asm-avr32/user.h65
-rw-r--r--include/asm-cris/pgtable.h4
-rw-r--r--include/asm-frv/bitops.h96
-rw-r--r--include/asm-frv/cpu-irqs.h54
-rw-r--r--include/asm-frv/hardirq.h5
-rw-r--r--include/asm-frv/irq-routing.h70
-rw-r--r--include/asm-frv/irq.h26
-rw-r--r--include/asm-frv/mb93091-fpga-irqs.h6
-rw-r--r--include/asm-frv/mb93093-fpga-irqs.h6
-rw-r--r--include/asm-frv/mb93493-irqs.h6
-rw-r--r--include/asm-frv/mb93493-regs.h2
-rw-r--r--include/asm-frv/pgtable.h8
-rw-r--r--include/asm-generic/4level-fixup.h4
-rw-r--r--include/asm-generic/percpu.h4
-rw-r--r--include/asm-generic/pgtable-nopmd.h2
-rw-r--r--include/asm-generic/pgtable-nopud.h2
-rw-r--r--include/asm-generic/pgtable.h3
-rw-r--r--include/asm-generic/vmlinux.lds.h3
-rw-r--r--include/asm-i386/Kbuild1
-rw-r--r--include/asm-i386/dma-mapping.h9
-rw-r--r--include/asm-i386/fixmap.h7
-rw-r--r--include/asm-i386/mmzone.h6
-rw-r--r--include/asm-i386/pgtable-2level.h3
-rw-r--r--include/asm-i386/pgtable-3level.h4
-rw-r--r--include/asm-i386/pgtable.h45
-rw-r--r--include/asm-i386/processor.h60
-rw-r--r--include/asm-i386/ptrace-abi.h39
-rw-r--r--include/asm-i386/ptrace.h35
-rw-r--r--include/asm-i386/sync_bitops.h156
-rw-r--r--include/asm-i386/system.h36
-rw-r--r--include/asm-ia64/numa.h4
-rw-r--r--include/asm-ia64/pgtable.h14
-rw-r--r--include/asm-ia64/smp.h2
-rw-r--r--include/asm-m32r/pgtable-2level.h6
-rw-r--r--include/asm-m32r/pgtable.h4
-rw-r--r--include/asm-m68k/motorola_pgtable.h1
-rw-r--r--include/asm-mips/mach-au1x00/au1xxx_dbdma.h6
-rw-r--r--include/asm-mips/pgtable-32.h4
-rw-r--r--include/asm-mips/pgtable-64.h10
-rw-r--r--include/asm-mips/pgtable.h2
-rw-r--r--include/asm-parisc/pgtable.h9
-rw-r--r--include/asm-powerpc/pgtable-4k.h5
-rw-r--r--include/asm-powerpc/pgtable.h11
-rw-r--r--include/asm-ppc/pgtable.h8
-rw-r--r--include/asm-s390/percpu.h20
-rw-r--r--include/asm-s390/pgtable.h10
-rw-r--r--include/asm-s390/processor.h2
-rw-r--r--include/asm-sh/pgtable-2level.h5
-rw-r--r--include/asm-sh/pgtable.h4
-rw-r--r--include/asm-sh64/pgtable.h6
-rw-r--r--include/asm-sparc/pgtable.h4
-rw-r--r--include/asm-sparc64/pgtable.h5
-rw-r--r--include/asm-um/pgtable-2level.h2
-rw-r--r--include/asm-um/pgtable-3level.h5
-rw-r--r--include/asm-um/pgtable.h4
-rw-r--r--include/asm-um/processor-generic.h4
-rw-r--r--include/asm-um/ptrace-generic.h14
-rw-r--r--include/asm-um/ptrace-x86_64.h4
-rw-r--r--include/asm-x86_64/Kbuild1
-rw-r--r--include/asm-x86_64/e820.h1
-rw-r--r--include/asm-x86_64/percpu.h12
-rw-r--r--include/asm-x86_64/pgtable.h16
-rw-r--r--include/asm-x86_64/ptrace-abi.h51
-rw-r--r--include/asm-x86_64/ptrace.h59
-rw-r--r--include/asm-x86_64/smp.h2
-rw-r--r--include/asm-xtensa/pgtable.h4
-rw-r--r--include/linux/bootmem.h100
-rw-r--r--include/linux/console.h5
-rw-r--r--include/linux/cpu.h8
-rw-r--r--include/linux/dccp.h14
-rw-r--r--include/linux/elf-em.h1
-rw-r--r--include/linux/elfnote.h90
-rw-r--r--include/linux/gfp.h36
-rw-r--r--include/linux/highmem.h5
-rw-r--r--include/linux/irq.h6
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/mm.h128
-rw-r--r--include/linux/mmzone.h120
-rw-r--r--include/linux/netfilter/Kbuild2
-rw-r--r--include/linux/page-flags.h35
-rw-r--r--include/linux/pagemap.h15
-rw-r--r--include/linux/percpu.h89
-rw-r--r--include/linux/resume-trace.h24
-rw-r--r--include/linux/rmap.h14
-rw-r--r--include/linux/selinux.h29
-rw-r--r--include/linux/slab.h29
-rw-r--r--include/linux/smp.h3
-rw-r--r--include/linux/suspend.h32
-rw-r--r--include/linux/swap.h12
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/linux/vmstat.h18
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/net/cipso_ipv4.h23
-rw-r--r--include/net/netlabel.h57
-rw-r--r--include/net/netlink.h20
-rw-r--r--kernel/audit.c14
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/cpu.c138
-rw-r--r--kernel/cpuset.c6
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/power/Kconfig11
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/disk.c7
-rw-r--r--kernel/power/main.c40
-rw-r--r--kernel/power/power.h59
-rw-r--r--kernel/power/smp.c62
-rw-r--r--kernel/power/snapshot.c1155
-rw-r--r--kernel/power/swap.c270
-rw-r--r--kernel/power/swsusp.c5
-rw-r--r--kernel/power/user.c15
-rw-r--r--kernel/printk.c3
-rw-r--r--kernel/profile.c16
-rw-r--r--kernel/sched.c54
-rw-r--r--kernel/sysctl.c11
-rw-r--r--lib/Kconfig.debug4
-rw-r--r--mm/Makefile2
-rw-r--r--mm/allocpercpu.c129
-rw-r--r--mm/bootmem.c202
-rw-r--r--mm/filemap.c25
-rw-r--r--mm/fremap.c4
-rw-r--r--mm/highmem.c13
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/internal.h4
-rw-r--r--mm/memory.c77
-rw-r--r--mm/mempolicy.c19
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmap.c12
-rw-r--r--mm/mprotect.c51
-rw-r--r--mm/msync.c196
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c97
-rw-r--r--mm/page-writeback.c29
-rw-r--r--mm/page_alloc.c233
-rw-r--r--mm/page_io.c48
-rw-r--r--mm/rmap.c65
-rw-r--r--mm/shmem.c1
-rw-r--r--mm/slab.c310
-rw-r--r--mm/slob.c49
-rw-r--r--mm/swap.c49
-rw-r--r--mm/vmalloc.c8
-rw-r--r--mm/vmscan.c110
-rw-r--r--mm/vmstat.c49
-rw-r--r--net/dccp/Kconfig16
-rw-r--r--net/dccp/Makefile2
-rw-r--r--net/dccp/ccids/ccid2.c2
-rw-r--r--net/dccp/ccids/ccid3.c2
-rw-r--r--net/dccp/ipv4.c3
-rw-r--r--net/dccp/probe.c198
-rw-r--r--net/dccp/proto.c11
-rw-r--r--net/ipv4/Kconfig53
-rw-r--r--net/ipv4/cipso_ipv4.c267
-rw-r--r--net/ipv4/sysctl_net_ipv4.c6
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/netlabel/Kconfig5
-rw-r--r--net/netlabel/netlabel_cipso_v4.c628
-rw-r--r--net/netlabel/netlabel_cipso_v4.h225
-rw-r--r--net/netlabel/netlabel_domainhash.c183
-rw-r--r--net/netlabel/netlabel_domainhash.h6
-rw-r--r--net/netlabel/netlabel_kapi.c23
-rw-r--r--net/netlabel/netlabel_mgmt.c541
-rw-r--r--net/netlabel/netlabel_mgmt.h211
-rw-r--r--net/netlabel/netlabel_unlabeled.c79
-rw-r--r--net/netlabel/netlabel_unlabeled.h41
-rw-r--r--net/netlabel/netlabel_user.c82
-rw-r--r--net/netlabel/netlabel_user.h141
-rw-r--r--security/selinux/Kconfig37
-rw-r--r--security/selinux/exports.c13
-rw-r--r--security/selinux/hooks.c69
-rw-r--r--security/selinux/include/objsec.h4
-rw-r--r--security/selinux/include/security.h7
-rw-r--r--security/selinux/ss/mls.c21
-rw-r--r--security/selinux/ss/policydb.c27
-rw-r--r--security/selinux/ss/policydb.h7
-rw-r--r--security/selinux/ss/services.c30
-rw-r--r--sound/oss/au1550_ac97.c6
-rw-r--r--sound/sparc/amd7930.c20
497 files changed, 25494 insertions, 5470 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 71d05f481727..766abdab94e7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1363,6 +1363,11 @@ running once the system is up.
1363 1363
1364 reserve= [KNL,BUGS] Force the kernel to ignore some iomem area 1364 reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
1365 1365
1366 reservetop= [IA-32]
1367 Format: nn[KMG]
1368 Reserves a hole at the top of the kernel virtual
1369 address space.
1370
1366 resume= [SWSUSP] 1371 resume= [SWSUSP]
1367 Specify the partition device for software suspend 1372 Specify the partition device for software suspend
1368 1373
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index c45daabd3bfe..74563b38ffd9 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -1,7 +1,6 @@
1DCCP protocol 1DCCP protocol
2============ 2============
3 3
4Last updated: 10 November 2005
5 4
6Contents 5Contents
7======== 6========
@@ -42,8 +41,11 @@ Socket options
42DCCP_SOCKOPT_PACKET_SIZE is used for CCID3 to set default packet size for 41DCCP_SOCKOPT_PACKET_SIZE is used for CCID3 to set default packet size for
43calculations. 42calculations.
44 43
45DCCP_SOCKOPT_SERVICE sets the service. This is compulsory as per the 44DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
46specification. If you don't set it you will get EPROTO. 45service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
46the socket will fall back to 0 (which means that no meaningful service code
47is present). Connecting sockets set at most one service option; for
48listening sockets, multiple service codes can be specified.
47 49
48Notes 50Notes
49===== 51=====
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index 4117802af0f8..a66bec222b16 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -52,3 +52,18 @@ suspend image will be as small as possible.
52 52
53Reading from this file will display the current image size limit, which 53Reading from this file will display the current image size limit, which
54is set to 500 MB by default. 54is set to 500 MB by default.
55
56/sys/power/pm_trace controls the code which saves the last PM event point in
57the RTC across reboots, so that you can debug a machine that just hangs
58during suspend (or more commonly, during resume). Namely, the RTC is only
59used to save the last PM event point if this file contains '1'. Initially it
60contains '0' which may be changed to '1' by writing a string representing a
61nonzero integer into it.
62
63To use this debugging feature you should attempt to suspend the machine, then
64reboot it and run
65
66 dmesg -s 1000000 | grep 'hash matches'
67
68CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
69set to a random invalid time after a resume.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 7cee90223d3a..20d0d797f539 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm:
29- drop-caches 29- drop-caches
30- zone_reclaim_mode 30- zone_reclaim_mode
31- min_unmapped_ratio 31- min_unmapped_ratio
32- min_slab_ratio
32- panic_on_oom 33- panic_on_oom
33 34
34============================================================== 35==============================================================
@@ -138,7 +139,6 @@ This is value ORed together of
1381 = Zone reclaim on 1391 = Zone reclaim on
1392 = Zone reclaim writes dirty pages out 1402 = Zone reclaim writes dirty pages out
1404 = Zone reclaim swaps pages 1414 = Zone reclaim swaps pages
1418 = Also do a global slab reclaim pass
142 142
143zone_reclaim_mode is set during bootup to 1 if it is determined that pages 143zone_reclaim_mode is set during bootup to 1 if it is determined that pages
144from remote zones will cause a measurable performance reduction. The 144from remote zones will cause a measurable performance reduction. The
@@ -162,18 +162,13 @@ Allowing regular swap effectively restricts allocations to the local
162node unless explicitly overridden by memory policies or cpuset 162node unless explicitly overridden by memory policies or cpuset
163configurations. 163configurations.
164 164
165It may be advisable to allow slab reclaim if the system makes heavy
166use of files and builds up large slab caches. However, the slab
167shrink operation is global, may take a long time and free slabs
168in all nodes of the system.
169
170============================================================= 165=============================================================
171 166
172min_unmapped_ratio: 167min_unmapped_ratio:
173 168
174This is available only on NUMA kernels. 169This is available only on NUMA kernels.
175 170
176A percentage of the file backed pages in each zone. Zone reclaim will only 171A percentage of the total pages in each zone. Zone reclaim will only
177occur if more than this percentage of pages are file backed and unmapped. 172occur if more than this percentage of pages are file backed and unmapped.
178This is to insure that a minimal amount of local pages is still available for 173This is to insure that a minimal amount of local pages is still available for
179file I/O even if the node is overallocated. 174file I/O even if the node is overallocated.
@@ -182,6 +177,24 @@ The default is 1 percent.
182 177
183============================================================= 178=============================================================
184 179
180min_slab_ratio:
181
182This is available only on NUMA kernels.
183
184A percentage of the total pages in each zone. On Zone reclaim
185(fallback from the local zone occurs) slabs will be reclaimed if more
186than this percentage of pages in a zone are reclaimable slab pages.
187This insures that the slab growth stays under control even in NUMA
188systems that rarely perform global reclaim.
189
190The default is 5 percent.
191
192Note that slab reclaim is triggered in a per zone / node fashion.
193The process of reclaiming slab memory is currently not node specific
194and may not be fast.
195
196=============================================================
197
185panic_on_oom 198panic_on_oom
186 199
187This enables or disables panic on out-of-memory feature. If this is set to 1, 200This enables or disables panic on out-of-memory feature. If this is set to 1,
diff --git a/MAINTAINERS b/MAINTAINERS
index bd446e251d5b..63673e6513b7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -443,6 +443,23 @@ W: http://people.redhat.com/sgrubb/audit/
443T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git 443T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git
444S: Maintained 444S: Maintained
445 445
446AVR32 ARCHITECTURE
447P: Atmel AVR32 Support Team
448M: avr32@atmel.com
449P: Haavard Skinnemoen
450M: hskinnemoen@atmel.com
451W: http://www.atmel.com/products/AVR32/
452W: http://avr32linux.org/
453W: http://avrfreaks.net/
454S: Supported
455
456AVR32/AT32AP MACHINE SUPPORT
457P: Atmel AVR32 Support Team
458M: avr32@atmel.com
459P: Haavard Skinnemoen
460M: hskinnemoen@atmel.com
461S: Supported
462
446AX.25 NETWORK LAYER 463AX.25 NETWORK LAYER
447P: Ralf Baechle 464P: Ralf Baechle
448M: ralf@linux-mips.org 465M: ralf@linux-mips.org
@@ -2031,6 +2048,13 @@ L: netfilter@lists.netfilter.org
2031L: netfilter-devel@lists.netfilter.org 2048L: netfilter-devel@lists.netfilter.org
2032S: Supported 2049S: Supported
2033 2050
2051NETLABEL
2052P: Paul Moore
2053M: paul.moore@hp.com
2054W: http://netlabel.sf.net
2055L: netdev@vger.kernel.org
2056S: Supported
2057
2034NETROM NETWORK LAYER 2058NETROM NETWORK LAYER
2035P: Ralf Baechle 2059P: Ralf Baechle
2036M: ralf@linux-mips.org 2060M: ralf@linux-mips.org
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 213c7850d5fb..2b36afd8e969 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -381,7 +381,7 @@ config ALPHA_EV56
381 381
382config ALPHA_EV56 382config ALPHA_EV56
383 prompt "EV56 CPU (speed >= 333MHz)?" 383 prompt "EV56 CPU (speed >= 333MHz)?"
384 depends on ALPHA_NORITAKE && ALPHA_PRIMO 384 depends on ALPHA_NORITAKE || ALPHA_PRIMO
385 385
386config ALPHA_EV56 386config ALPHA_EV56
387 prompt "EV56 CPU (speed >= 400MHz)?" 387 prompt "EV56 CPU (speed >= 400MHz)?"
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 917dad1b74c8..550f4907d613 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -270,7 +270,7 @@ callback_init(void * kernel_end)
270void 270void
271paging_init(void) 271paging_init(void)
272{ 272{
273 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 273 unsigned long zones_size[MAX_NR_ZONES] = {0, };
274 unsigned long dma_pfn, high_pfn; 274 unsigned long dma_pfn, high_pfn;
275 275
276 dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; 276 dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 88a999df0ab3..591fc3187c7f 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -177,7 +177,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
177 * Free the page table, if there was one. 177 * Free the page table, if there was one.
178 */ 178 */
179 if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE) 179 if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE)
180 pte_free_kernel(pmd_page_kernel(pmd)); 180 pte_free_kernel(pmd_page_vaddr(pmd));
181 } 181 }
182 182
183 addr += PGDIR_SIZE; 183 addr += PGDIR_SIZE;
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
new file mode 100644
index 000000000000..5f1694eea842
--- /dev/null
+++ b/arch/avr32/Kconfig
@@ -0,0 +1,196 @@
1#
2# For a description of the syntax of this configuration file,
3# see Documentation/kbuild/kconfig-language.txt.
4#
5
6mainmenu "Linux Kernel Configuration"
7
8config AVR32
9 bool
10 default y
11 # With EMBEDDED=n, we get lots of stuff automatically selected
12 # that we usually don't need on AVR32.
13 select EMBEDDED
14 help
15 AVR32 is a high-performance 32-bit RISC microprocessor core,
16 designed for cost-sensitive embedded applications, with particular
17 emphasis on low power consumption and high code density.
18
19 There is an AVR32 Linux project with a web page at
20 http://avr32linux.org/.
21
22config UID16
23 bool
24
25config GENERIC_HARDIRQS
26 bool
27 default y
28
29config HARDIRQS_SW_RESEND
30 bool
31 default y
32
33config GENERIC_IRQ_PROBE
34 bool
35 default y
36
37config RWSEM_GENERIC_SPINLOCK
38 bool
39 default y
40
41config GENERIC_TIME
42 bool
43 default y
44
45config RWSEM_XCHGADD_ALGORITHM
46 bool
47
48config GENERIC_BUST_SPINLOCK
49 bool
50
51config GENERIC_HWEIGHT
52 bool
53 default y
54
55config GENERIC_CALIBRATE_DELAY
56 bool
57 default y
58
59source "init/Kconfig"
60
61menu "System Type and features"
62
63config SUBARCH_AVR32B
64 bool
65config MMU
66 bool
67config PERFORMANCE_COUNTERS
68 bool
69
70config PLATFORM_AT32AP
71 bool
72 select SUBARCH_AVR32B
73 select MMU
74 select PERFORMANCE_COUNTERS
75
76choice
77 prompt "AVR32 CPU type"
78 default CPU_AT32AP7000
79
80config CPU_AT32AP7000
81 bool "AT32AP7000"
82 select PLATFORM_AT32AP
83endchoice
84
85#
86# CPU Daughterboards for ATSTK1000
87config BOARD_ATSTK1002
88 bool
89
90choice
91 prompt "AVR32 board type"
92 default BOARD_ATSTK1000
93
94config BOARD_ATSTK1000
95 bool "ATSTK1000 evaluation board"
96 select BOARD_ATSTK1002 if CPU_AT32AP7000
97endchoice
98
99choice
100 prompt "Boot loader type"
101 default LOADER_U_BOOT
102
103config LOADER_U_BOOT
104 bool "U-Boot (or similar) bootloader"
105endchoice
106
107config LOAD_ADDRESS
108 hex
109 default 0x10000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y
110
111config ENTRY_ADDRESS
112 hex
113 default 0x90000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y
114
115config PHYS_OFFSET
116 hex
117 default 0x10000000 if CPU_AT32AP7000=y
118
119source "kernel/Kconfig.preempt"
120
121config HAVE_ARCH_BOOTMEM_NODE
122 bool
123 default n
124
125config ARCH_HAVE_MEMORY_PRESENT
126 bool
127 default n
128
129config NEED_NODE_MEMMAP_SIZE
130 bool
131 default n
132
133config ARCH_FLATMEM_ENABLE
134 bool
135 default y
136
137config ARCH_DISCONTIGMEM_ENABLE
138 bool
139 default n
140
141config ARCH_SPARSEMEM_ENABLE
142 bool
143 default n
144
145source "mm/Kconfig"
146
147config OWNERSHIP_TRACE
148 bool "Ownership trace support"
149 default y
150 help
151 Say Y to generate an Ownership Trace message on every context switch,
152 enabling Nexus-compliant debuggers to keep track of the PID of the
153 currently executing task.
154
155# FPU emulation goes here
156
157source "kernel/Kconfig.hz"
158
159config CMDLINE
160 string "Default kernel command line"
161 default ""
162 help
163 If you don't have a boot loader capable of passing a command line string
164 to the kernel, you may specify one here. As a minimum, you should specify
165 the memory size and the root device (e.g., mem=8M, root=/dev/nfs).
166
167endmenu
168
169menu "Bus options"
170
171config PCI
172 bool
173
174source "drivers/pci/Kconfig"
175
176source "drivers/pcmcia/Kconfig"
177
178endmenu
179
180menu "Executable file formats"
181source "fs/Kconfig.binfmt"
182endmenu
183
184source "net/Kconfig"
185
186source "drivers/Kconfig"
187
188source "fs/Kconfig"
189
190source "arch/avr32/Kconfig.debug"
191
192source "security/Kconfig"
193
194source "crypto/Kconfig"
195
196source "lib/Kconfig"
diff --git a/arch/avr32/Kconfig.debug b/arch/avr32/Kconfig.debug
new file mode 100644
index 000000000000..64ace00fe6cb
--- /dev/null
+++ b/arch/avr32/Kconfig.debug
@@ -0,0 +1,19 @@
1menu "Kernel hacking"
2
3config TRACE_IRQFLAGS_SUPPORT
4 bool
5 default y
6
7source "lib/Kconfig.debug"
8
9config KPROBES
10 bool "Kprobes"
11 depends on DEBUG_KERNEL
12 help
13 Kprobes allows you to trap at almost any kernel address and
14 execute a callback function. register_kprobe() establishes
15 a probepoint and specifies the callback. Kprobes is useful
16 for kernel debugging, non-intrusive instrumentation and testing.
17 If in doubt, say "N".
18
19endmenu
diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile
new file mode 100644
index 000000000000..cefc95a73980
--- /dev/null
+++ b/arch/avr32/Makefile
@@ -0,0 +1,84 @@
1#
2# This file is subject to the terms and conditions of the GNU General Public
3# License. See the file "COPYING" in the main directory of this archive
4# for more details.
5#
6# Copyright (C) 2004-2006 Atmel Corporation.
7
8# Default target when executing plain make
9.PHONY: all
10all: uImage vmlinux.elf linux.lst
11
12KBUILD_DEFCONFIG := atstk1002_defconfig
13
14CFLAGS += -pipe -fno-builtin -mno-pic
15AFLAGS += -mrelax -mno-pic
16CFLAGS_MODULE += -mno-relax
17LDFLAGS_vmlinux += --relax
18
19cpuflags-$(CONFIG_CPU_AP7000) += -mcpu=ap7000
20
21CFLAGS += $(cpuflags-y)
22AFLAGS += $(cpuflags-y)
23
24CHECKFLAGS += -D__avr32__
25
26LIBGCC := $(shell $(CC) $(CFLAGS) -print-libgcc-file-name)
27
28head-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/head.o
29head-y += arch/avr32/kernel/head.o
30core-$(CONFIG_PLATFORM_AT32AP) += arch/avr32/mach-at32ap/
31core-$(CONFIG_BOARD_ATSTK1000) += arch/avr32/boards/atstk1000/
32core-$(CONFIG_LOADER_U_BOOT) += arch/avr32/boot/u-boot/
33core-y += arch/avr32/kernel/
34core-y += arch/avr32/mm/
35libs-y += arch/avr32/lib/ #$(LIBGCC)
36
37archincdir-$(CONFIG_PLATFORM_AT32AP) := arch-at32ap
38
39include/asm-avr32/.arch: $(wildcard include/config/platform/*.h) include/config/auto.conf
40 @echo ' SYMLINK include/asm-avr32/arch -> include/asm-avr32/$(archincdir-y)'
41ifneq ($(KBUILD_SRC),)
42 $(Q)mkdir -p include/asm-avr32
43 $(Q)ln -fsn $(srctree)/include/asm-avr32/$(archincdir-y) include/asm-avr32/arch
44else
45 $(Q)ln -fsn $(archincdir-y) include/asm-avr32/arch
46endif
47 @touch $@
48
49archprepare: include/asm-avr32/.arch
50
51BOOT_TARGETS := vmlinux.elf vmlinux.bin uImage uImage.srec
52
53.PHONY: $(BOOT_TARGETS) install
54
55boot := arch/$(ARCH)/boot/images
56
57 KBUILD_IMAGE := $(boot)/uImage
58vmlinux.elf: KBUILD_IMAGE := $(boot)/vmlinux.elf
59vmlinux.cso: KBUILD_IMAGE := $(boot)/vmlinux.cso
60uImage.srec: KBUILD_IMAGE := $(boot)/uImage.srec
61uImage: KBUILD_IMAGE := $(boot)/uImage
62
63quiet_cmd_listing = LST $@
64 cmd_listing = avr32-linux-objdump $(OBJDUMPFLAGS) -lS $< > $@
65quiet_cmd_disasm = DIS $@
66 cmd_disasm = avr32-linux-objdump $(OBJDUMPFLAGS) -d $< > $@
67
68vmlinux.elf vmlinux.bin uImage.srec uImage vmlinux.cso: vmlinux
69 $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
70
71install: vmlinux
72 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
73
74linux.s: vmlinux
75 $(call if_changed,disasm)
76
77linux.lst: vmlinux
78 $(call if_changed,listing)
79
80define archhelp
81 @echo '* vmlinux.elf - ELF image with load address 0'
82 @echo ' vmlinux.cso - PathFinder CSO image'
83 @echo ' uImage - Create a bootable image for U-Boot'
84endef
diff --git a/arch/avr32/boards/atstk1000/Makefile b/arch/avr32/boards/atstk1000/Makefile
new file mode 100644
index 000000000000..df9499480530
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/Makefile
@@ -0,0 +1,2 @@
1obj-y += setup.o spi.o flash.o
2obj-$(CONFIG_BOARD_ATSTK1002) += atstk1002.o
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
new file mode 100644
index 000000000000..49164e9aadd6
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -0,0 +1,37 @@
1/*
2 * ATSTK1002 daughterboard-specific init code
3 *
4 * Copyright (C) 2005-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/init.h>
11
12#include <asm/arch/board.h>
13
14struct eth_platform_data __initdata eth0_data = {
15 .valid = 1,
16 .mii_phy_addr = 0x10,
17 .is_rmii = 0,
18 .hw_addr = { 0x6a, 0x87, 0x71, 0x14, 0xcd, 0xcb },
19};
20
21extern struct lcdc_platform_data atstk1000_fb0_data;
22
23static int __init atstk1002_init(void)
24{
25 at32_add_system_devices();
26
27 at32_add_device_usart(1); /* /dev/ttyS0 */
28 at32_add_device_usart(2); /* /dev/ttyS1 */
29 at32_add_device_usart(3); /* /dev/ttyS2 */
30
31 at32_add_device_eth(0, &eth0_data);
32 at32_add_device_spi(0);
33 at32_add_device_lcdc(0, &atstk1000_fb0_data);
34
35 return 0;
36}
37postcore_initcall(atstk1002_init);
diff --git a/arch/avr32/boards/atstk1000/flash.c b/arch/avr32/boards/atstk1000/flash.c
new file mode 100644
index 000000000000..aac4300cca12
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/flash.c
@@ -0,0 +1,95 @@
1/*
2 * ATSTK1000 board-specific flash initialization
3 *
4 * Copyright (C) 2005-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/init.h>
11#include <linux/platform_device.h>
12#include <linux/mtd/mtd.h>
13#include <linux/mtd/partitions.h>
14#include <linux/mtd/physmap.h>
15
16#include <asm/arch/smc.h>
17
18static struct smc_config flash_config __initdata = {
19 .ncs_read_setup = 0,
20 .nrd_setup = 40,
21 .ncs_write_setup = 0,
22 .nwe_setup = 10,
23
24 .ncs_read_pulse = 80,
25 .nrd_pulse = 40,
26 .ncs_write_pulse = 65,
27 .nwe_pulse = 55,
28
29 .read_cycle = 120,
30 .write_cycle = 120,
31
32 .bus_width = 2,
33 .nrd_controlled = 1,
34 .nwe_controlled = 1,
35 .byte_write = 1,
36};
37
38static struct mtd_partition flash_parts[] = {
39 {
40 .name = "u-boot",
41 .offset = 0x00000000,
42 .size = 0x00020000, /* 128 KiB */
43 .mask_flags = MTD_WRITEABLE,
44 },
45 {
46 .name = "root",
47 .offset = 0x00020000,
48 .size = 0x007d0000,
49 },
50 {
51 .name = "env",
52 .offset = 0x007f0000,
53 .size = 0x00010000,
54 .mask_flags = MTD_WRITEABLE,
55 },
56};
57
58static struct physmap_flash_data flash_data = {
59 .width = 2,
60 .nr_parts = ARRAY_SIZE(flash_parts),
61 .parts = flash_parts,
62};
63
64static struct resource flash_resource = {
65 .start = 0x00000000,
66 .end = 0x007fffff,
67 .flags = IORESOURCE_MEM,
68};
69
70static struct platform_device flash_device = {
71 .name = "physmap-flash",
72 .id = 0,
73 .resource = &flash_resource,
74 .num_resources = 1,
75 .dev = {
76 .platform_data = &flash_data,
77 },
78};
79
80/* This needs to be called after the SMC has been initialized */
81static int __init atstk1000_flash_init(void)
82{
83 int ret;
84
85 ret = smc_set_configuration(0, &flash_config);
86 if (ret < 0) {
87 printk(KERN_ERR "atstk1000: failed to set NOR flash timing\n");
88 return ret;
89 }
90
91 platform_device_register(&flash_device);
92
93 return 0;
94}
95device_initcall(atstk1000_flash_init);
diff --git a/arch/avr32/boards/atstk1000/setup.c b/arch/avr32/boards/atstk1000/setup.c
new file mode 100644
index 000000000000..191ab85de9a3
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/setup.c
@@ -0,0 +1,59 @@
1/*
2 * ATSTK1000 board-specific setup code.
3 *
4 * Copyright (C) 2005-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/bootmem.h>
11#include <linux/init.h>
12#include <linux/types.h>
13#include <linux/linkage.h>
14
15#include <asm/setup.h>
16
17#include <asm/arch/board.h>
18
19/* Initialized by bootloader-specific startup code. */
20struct tag *bootloader_tags __initdata;
21
22struct lcdc_platform_data __initdata atstk1000_fb0_data;
23
24asmlinkage void __init board_early_init(void)
25{
26 extern void sdram_init(void);
27
28#ifdef CONFIG_LOADER_STANDALONE
29 sdram_init();
30#endif
31}
32
33void __init board_setup_fbmem(unsigned long fbmem_start,
34 unsigned long fbmem_size)
35{
36 if (!fbmem_size)
37 return;
38
39 if (!fbmem_start) {
40 void *fbmem;
41
42 fbmem = alloc_bootmem_low_pages(fbmem_size);
43 fbmem_start = __pa(fbmem);
44 } else {
45 pg_data_t *pgdat;
46
47 for_each_online_pgdat(pgdat) {
48 if (fbmem_start >= pgdat->bdata->node_boot_start
49 && fbmem_start <= pgdat->bdata->node_low_pfn)
50 reserve_bootmem_node(pgdat, fbmem_start,
51 fbmem_size);
52 }
53 }
54
55 printk("%luKiB framebuffer memory at address 0x%08lx\n",
56 fbmem_size >> 10, fbmem_start);
57 atstk1000_fb0_data.fbmem_start = fbmem_start;
58 atstk1000_fb0_data.fbmem_size = fbmem_size;
59}
diff --git a/arch/avr32/boards/atstk1000/spi.c b/arch/avr32/boards/atstk1000/spi.c
new file mode 100644
index 000000000000..567726c82c6e
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/spi.c
@@ -0,0 +1,27 @@
1/*
2 * ATSTK1000 SPI devices
3 *
4 * Copyright (C) 2005 Atmel Norway
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/device.h>
11#include <linux/spi/spi.h>
12
13static struct spi_board_info spi_board_info[] __initdata = {
14 {
15 .modalias = "ltv350qv",
16 .max_speed_hz = 16000000,
17 .bus_num = 0,
18 .chip_select = 1,
19 },
20};
21
22static int board_init_spi(void)
23{
24 spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
25 return 0;
26}
27arch_initcall(board_init_spi);
diff --git a/arch/avr32/boot/images/Makefile b/arch/avr32/boot/images/Makefile
new file mode 100644
index 000000000000..ccd74eeecec3
--- /dev/null
+++ b/arch/avr32/boot/images/Makefile
@@ -0,0 +1,62 @@
1#
2# Copyright (C) 2004-2006 Atmel Corporation
3#
4# This file is subject to the terms and conditions of the GNU General Public
5# License. See the file "COPYING" in the main directory of this archive
6# for more details.
7#
8
9MKIMAGE := $(srctree)/scripts/mkuboot.sh
10
11extra-y := vmlinux.bin vmlinux.gz
12
13OBJCOPYFLAGS_vmlinux.bin := -O binary
14$(obj)/vmlinux.bin: vmlinux FORCE
15 $(call if_changed,objcopy)
16
17$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
18 $(call if_changed,gzip)
19
20quiet_cmd_uimage = UIMAGE $@
21 cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A avr32 -O linux -T kernel \
22 -C gzip -a $(CONFIG_LOAD_ADDRESS) -e $(CONFIG_ENTRY_ADDRESS) \
23 -n 'Linux-$(KERNELRELEASE)' -d $< $@
24
25targets += uImage uImage.srec
26$(obj)/uImage: $(obj)/vmlinux.gz
27 $(call if_changed,uimage)
28 @echo ' Image $@ is ready'
29
30OBJCOPYFLAGS_uImage.srec := -I binary -O srec
31$(obj)/uImage.srec: $(obj)/uImage
32 $(call if_changed,objcopy)
33
34OBJCOPYFLAGS_vmlinux.elf := --change-section-lma .text-0x80000000 \
35 --change-section-lma __ex_table-0x80000000 \
36 --change-section-lma .rodata-0x80000000 \
37 --change-section-lma .data-0x80000000 \
38 --change-section-lma .init-0x80000000 \
39 --change-section-lma .bss-0x80000000 \
40 --change-section-lma .initrd-0x80000000 \
41 --change-section-lma __param-0x80000000 \
42 --change-section-lma __ksymtab-0x80000000 \
43 --change-section-lma __ksymtab_gpl-0x80000000 \
44 --change-section-lma __kcrctab-0x80000000 \
45 --change-section-lma __kcrctab_gpl-0x80000000 \
46 --change-section-lma __ksymtab_strings-0x80000000 \
47 --change-section-lma .got-0x80000000 \
48 --set-start 0xa0000000
49$(obj)/vmlinux.elf: vmlinux FORCE
50 $(call if_changed,objcopy)
51
52quiet_cmd_sfdwarf = SFDWARF $@
53 cmd_sfdwarf = sfdwarf $< TO $@ GNUAVR IW $(SFDWARF_FLAGS) > $(obj)/sfdwarf.log
54
55$(obj)/vmlinux.cso: $(obj)/vmlinux.elf FORCE
56 $(call if_changed,sfdwarf)
57
58install: $(BOOTIMAGE)
59 sh $(srctree)/install-kernel.sh $<
60
61# Generated files to be removed upon make clean
62clean-files := vmlinux* uImage uImage.srec
diff --git a/arch/avr32/boot/u-boot/Makefile b/arch/avr32/boot/u-boot/Makefile
new file mode 100644
index 000000000000..125ddc96c275
--- /dev/null
+++ b/arch/avr32/boot/u-boot/Makefile
@@ -0,0 +1,3 @@
1extra-y := head.o
2
3obj-y := empty.o
diff --git a/arch/avr32/boot/u-boot/empty.S b/arch/avr32/boot/u-boot/empty.S
new file mode 100644
index 000000000000..8ac91a5f12f0
--- /dev/null
+++ b/arch/avr32/boot/u-boot/empty.S
@@ -0,0 +1 @@
/* Empty file */
diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S
new file mode 100644
index 000000000000..4488fa27fe94
--- /dev/null
+++ b/arch/avr32/boot/u-boot/head.S
@@ -0,0 +1,60 @@
1/*
2 * Startup code for use with the u-boot bootloader.
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <asm/setup.h>
11
12 /*
13 * The kernel is loaded where we want it to be and all caches
14 * have just been flushed. We get two parameters from u-boot:
15 *
16 * r12 contains a magic number (ATAG_MAGIC)
17 * r11 points to a tag table providing information about
18 * the system.
19 */
20 .section .init.text,"ax"
21 .global _start
22_start:
23 /* Check if the boot loader actually provided a tag table */
24 lddpc r0, magic_number
25 cp.w r12, r0
26 brne no_tag_table
27
28 /* Initialize .bss */
29 lddpc r2, bss_start_addr
30 lddpc r3, end_addr
31 mov r0, 0
32 mov r1, 0
331: st.d r2++, r0
34 cp r2, r3
35 brlo 1b
36
37 /*
38 * Save the tag table address for later use. This must be done
39 * _after_ .bss has been initialized...
40 */
41 lddpc r0, tag_table_addr
42 st.w r0[0], r11
43
44 /* Jump to loader-independent setup code */
45 rjmp kernel_entry
46
47 .align 2
48magic_number:
49 .long ATAG_MAGIC
50tag_table_addr:
51 .long bootloader_tags
52bss_start_addr:
53 .long __bss_start
54end_addr:
55 .long _end
56
57no_tag_table:
58 sub r12, pc, (. - 2f)
59 bral panic
602: .asciz "Boot loader didn't provide correct magic number\n"
diff --git a/arch/avr32/configs/atstk1002_defconfig b/arch/avr32/configs/atstk1002_defconfig
new file mode 100644
index 000000000000..1d22255009fd
--- /dev/null
+++ b/arch/avr32/configs/atstk1002_defconfig
@@ -0,0 +1,754 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.18-rc1
4# Tue Jul 11 12:41:36 2006
5#
6CONFIG_AVR32=y
7CONFIG_GENERIC_HARDIRQS=y
8CONFIG_HARDIRQS_SW_RESEND=y
9CONFIG_GENERIC_IRQ_PROBE=y
10CONFIG_RWSEM_GENERIC_SPINLOCK=y
11CONFIG_GENERIC_HWEIGHT=y
12CONFIG_GENERIC_CALIBRATE_DELAY=y
13CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
14
15#
16# Code maturity level options
17#
18CONFIG_EXPERIMENTAL=y
19CONFIG_BROKEN_ON_SMP=y
20CONFIG_INIT_ENV_ARG_LIMIT=32
21
22#
23# General setup
24#
25CONFIG_LOCALVERSION=""
26# CONFIG_LOCALVERSION_AUTO is not set
27CONFIG_SWAP=y
28# CONFIG_SYSVIPC is not set
29# CONFIG_POSIX_MQUEUE is not set
30# CONFIG_BSD_PROCESS_ACCT is not set
31CONFIG_SYSCTL=y
32# CONFIG_AUDIT is not set
33# CONFIG_IKCONFIG is not set
34# CONFIG_RELAY is not set
35CONFIG_INITRAMFS_SOURCE=""
36CONFIG_CC_OPTIMIZE_FOR_SIZE=y
37CONFIG_EMBEDDED=y
38CONFIG_KALLSYMS=y
39# CONFIG_KALLSYMS_ALL is not set
40# CONFIG_KALLSYMS_EXTRA_PASS is not set
41CONFIG_HOTPLUG=y
42CONFIG_PRINTK=y
43CONFIG_BUG=y
44CONFIG_ELF_CORE=y
45# CONFIG_BASE_FULL is not set
46# CONFIG_FUTEX is not set
47# CONFIG_EPOLL is not set
48CONFIG_SHMEM=y
49# CONFIG_SLAB is not set
50# CONFIG_VM_EVENT_COUNTERS is not set
51# CONFIG_TINY_SHMEM is not set
52CONFIG_BASE_SMALL=1
53CONFIG_SLOB=y
54
55#
56# Loadable module support
57#
58CONFIG_MODULES=y
59CONFIG_MODULE_UNLOAD=y
60# CONFIG_MODULE_FORCE_UNLOAD is not set
61# CONFIG_MODVERSIONS is not set
62# CONFIG_MODULE_SRCVERSION_ALL is not set
63# CONFIG_KMOD is not set
64
65#
66# Block layer
67#
68# CONFIG_BLK_DEV_IO_TRACE is not set
69
70#
71# IO Schedulers
72#
73CONFIG_IOSCHED_NOOP=y
74# CONFIG_IOSCHED_AS is not set
75# CONFIG_IOSCHED_DEADLINE is not set
76# CONFIG_IOSCHED_CFQ is not set
77# CONFIG_DEFAULT_AS is not set
78# CONFIG_DEFAULT_DEADLINE is not set
79# CONFIG_DEFAULT_CFQ is not set
80CONFIG_DEFAULT_NOOP=y
81CONFIG_DEFAULT_IOSCHED="noop"
82
83#
84# System Type and features
85#
86CONFIG_SUBARCH_AVR32B=y
87CONFIG_MMU=y
88CONFIG_PERFORMANCE_COUNTERS=y
89CONFIG_PLATFORM_AT32AP=y
90CONFIG_CPU_AT32AP7000=y
91CONFIG_BOARD_ATSTK1002=y
92CONFIG_BOARD_ATSTK1000=y
93CONFIG_LOADER_U_BOOT=y
94CONFIG_LOAD_ADDRESS=0x10000000
95CONFIG_ENTRY_ADDRESS=0x90000000
96CONFIG_PHYS_OFFSET=0x10000000
97CONFIG_PREEMPT_NONE=y
98# CONFIG_PREEMPT_VOLUNTARY is not set
99# CONFIG_PREEMPT is not set
100# CONFIG_HAVE_ARCH_BOOTMEM_NODE is not set
101# CONFIG_ARCH_HAVE_MEMORY_PRESENT is not set
102# CONFIG_NEED_NODE_MEMMAP_SIZE is not set
103CONFIG_ARCH_FLATMEM_ENABLE=y
104# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
105# CONFIG_ARCH_SPARSEMEM_ENABLE is not set
106CONFIG_SELECT_MEMORY_MODEL=y
107CONFIG_FLATMEM_MANUAL=y
108# CONFIG_DISCONTIGMEM_MANUAL is not set
109# CONFIG_SPARSEMEM_MANUAL is not set
110CONFIG_FLATMEM=y
111CONFIG_FLAT_NODE_MEM_MAP=y
112# CONFIG_SPARSEMEM_STATIC is not set
113CONFIG_SPLIT_PTLOCK_CPUS=4
114# CONFIG_RESOURCES_64BIT is not set
115# CONFIG_OWNERSHIP_TRACE is not set
116# CONFIG_HZ_100 is not set
117CONFIG_HZ_250=y
118# CONFIG_HZ_1000 is not set
119CONFIG_HZ=250
120CONFIG_CMDLINE=""
121
122#
123# Bus options
124#
125
126#
127# PCCARD (PCMCIA/CardBus) support
128#
129# CONFIG_PCCARD is not set
130
131#
132# Executable file formats
133#
134CONFIG_BINFMT_ELF=y
135# CONFIG_BINFMT_MISC is not set
136
137#
138# Networking
139#
140CONFIG_NET=y
141
142#
143# Networking options
144#
145# CONFIG_NETDEBUG is not set
146CONFIG_PACKET=y
147CONFIG_PACKET_MMAP=y
148CONFIG_UNIX=y
149# CONFIG_NET_KEY is not set
150CONFIG_INET=y
151# CONFIG_IP_MULTICAST is not set
152# CONFIG_IP_ADVANCED_ROUTER is not set
153CONFIG_IP_FIB_HASH=y
154CONFIG_IP_PNP=y
155CONFIG_IP_PNP_DHCP=y
156# CONFIG_IP_PNP_BOOTP is not set
157# CONFIG_IP_PNP_RARP is not set
158# CONFIG_NET_IPIP is not set
159# CONFIG_NET_IPGRE is not set
160# CONFIG_ARPD is not set
161# CONFIG_SYN_COOKIES is not set
162# CONFIG_INET_AH is not set
163# CONFIG_INET_ESP is not set
164# CONFIG_INET_IPCOMP is not set
165# CONFIG_INET_XFRM_TUNNEL is not set
166# CONFIG_INET_TUNNEL is not set
167# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
168# CONFIG_INET_XFRM_MODE_TUNNEL is not set
169CONFIG_INET_DIAG=y
170CONFIG_INET_TCP_DIAG=y
171# CONFIG_TCP_CONG_ADVANCED is not set
172CONFIG_TCP_CONG_BIC=y
173# CONFIG_IPV6 is not set
174# CONFIG_INET6_XFRM_TUNNEL is not set
175# CONFIG_INET6_TUNNEL is not set
176# CONFIG_NETWORK_SECMARK is not set
177# CONFIG_NETFILTER is not set
178
179#
180# DCCP Configuration (EXPERIMENTAL)
181#
182# CONFIG_IP_DCCP is not set
183
184#
185# SCTP Configuration (EXPERIMENTAL)
186#
187# CONFIG_IP_SCTP is not set
188
189#
190# TIPC Configuration (EXPERIMENTAL)
191#
192# CONFIG_TIPC is not set
193# CONFIG_ATM is not set
194# CONFIG_BRIDGE is not set
195# CONFIG_VLAN_8021Q is not set
196# CONFIG_DECNET is not set
197# CONFIG_LLC2 is not set
198# CONFIG_IPX is not set
199# CONFIG_ATALK is not set
200# CONFIG_X25 is not set
201# CONFIG_LAPB is not set
202# CONFIG_NET_DIVERT is not set
203# CONFIG_ECONET is not set
204# CONFIG_WAN_ROUTER is not set
205
206#
207# QoS and/or fair queueing
208#
209# CONFIG_NET_SCHED is not set
210
211#
212# Network testing
213#
214# CONFIG_NET_PKTGEN is not set
215# CONFIG_NET_TCPPROBE is not set
216# CONFIG_HAMRADIO is not set
217# CONFIG_IRDA is not set
218# CONFIG_BT is not set
219# CONFIG_IEEE80211 is not set
220
221#
222# Device Drivers
223#
224
225#
226# Generic Driver Options
227#
228CONFIG_STANDALONE=y
229# CONFIG_PREVENT_FIRMWARE_BUILD is not set
230# CONFIG_FW_LOADER is not set
231# CONFIG_DEBUG_DRIVER is not set
232# CONFIG_SYS_HYPERVISOR is not set
233
234#
235# Connector - unified userspace <-> kernelspace linker
236#
237# CONFIG_CONNECTOR is not set
238
239#
240# Memory Technology Devices (MTD)
241#
242# CONFIG_MTD is not set
243
244#
245# Parallel port support
246#
247# CONFIG_PARPORT is not set
248
249#
250# Plug and Play support
251#
252
253#
254# Block devices
255#
256# CONFIG_BLK_DEV_COW_COMMON is not set
257CONFIG_BLK_DEV_LOOP=m
258# CONFIG_BLK_DEV_CRYPTOLOOP is not set
259CONFIG_BLK_DEV_NBD=m
260CONFIG_BLK_DEV_RAM=m
261CONFIG_BLK_DEV_RAM_COUNT=16
262CONFIG_BLK_DEV_RAM_SIZE=4096
263CONFIG_BLK_DEV_INITRD=y
264# CONFIG_CDROM_PKTCDVD is not set
265# CONFIG_ATA_OVER_ETH is not set
266
267#
268# ATA/ATAPI/MFM/RLL support
269#
270# CONFIG_IDE is not set
271
272#
273# SCSI device support
274#
275# CONFIG_RAID_ATTRS is not set
276# CONFIG_SCSI is not set
277
278#
279# Multi-device support (RAID and LVM)
280#
281# CONFIG_MD is not set
282
283#
284# Fusion MPT device support
285#
286# CONFIG_FUSION is not set
287
288#
289# IEEE 1394 (FireWire) support
290#
291
292#
293# I2O device support
294#
295
296#
297# Network device support
298#
299CONFIG_NETDEVICES=y
300CONFIG_DUMMY=y
301# CONFIG_BONDING is not set
302# CONFIG_EQUALIZER is not set
303CONFIG_TUN=m
304
305#
306# PHY device support
307#
308# CONFIG_PHYLIB is not set
309
310#
311# Ethernet (10 or 100Mbit)
312#
313CONFIG_NET_ETHERNET=y
314CONFIG_MII=y
315CONFIG_MACB=y
316
317#
318# Ethernet (1000 Mbit)
319#
320
321#
322# Ethernet (10000 Mbit)
323#
324
325#
326# Token Ring devices
327#
328
329#
330# Wireless LAN (non-hamradio)
331#
332# CONFIG_NET_RADIO is not set
333
334#
335# Wan interfaces
336#
337# CONFIG_WAN is not set
338CONFIG_PPP=m
339# CONFIG_PPP_MULTILINK is not set
340# CONFIG_PPP_FILTER is not set
341CONFIG_PPP_ASYNC=m
342# CONFIG_PPP_SYNC_TTY is not set
343CONFIG_PPP_DEFLATE=m
344# CONFIG_PPP_BSDCOMP is not set
345# CONFIG_PPP_MPPE is not set
346# CONFIG_PPPOE is not set
347# CONFIG_SLIP is not set
348# CONFIG_SHAPER is not set
349# CONFIG_NETCONSOLE is not set
350# CONFIG_NETPOLL is not set
351# CONFIG_NET_POLL_CONTROLLER is not set
352
353#
354# ISDN subsystem
355#
356# CONFIG_ISDN is not set
357
358#
359# Telephony Support
360#
361# CONFIG_PHONE is not set
362
363#
364# Input device support
365#
366# CONFIG_INPUT is not set
367
368#
369# Hardware I/O ports
370#
371# CONFIG_SERIO is not set
372# CONFIG_GAMEPORT is not set
373
374#
375# Character devices
376#
377# CONFIG_VT is not set
378# CONFIG_SERIAL_NONSTANDARD is not set
379
380#
381# Serial drivers
382#
383# CONFIG_SERIAL_8250 is not set
384
385#
386# Non-8250 serial port support
387#
388CONFIG_SERIAL_AT91=y
389CONFIG_SERIAL_AT91_CONSOLE=y
390# CONFIG_SERIAL_AT91_TTYAT is not set
391CONFIG_SERIAL_CORE=y
392CONFIG_SERIAL_CORE_CONSOLE=y
393CONFIG_UNIX98_PTYS=y
394# CONFIG_LEGACY_PTYS is not set
395
396#
397# IPMI
398#
399# CONFIG_IPMI_HANDLER is not set
400
401#
402# Watchdog Cards
403#
404# CONFIG_WATCHDOG is not set
405# CONFIG_HW_RANDOM is not set
406# CONFIG_RTC is not set
407# CONFIG_GEN_RTC is not set
408# CONFIG_DTLK is not set
409# CONFIG_R3964 is not set
410
411#
412# Ftape, the floppy tape device driver
413#
414# CONFIG_RAW_DRIVER is not set
415
416#
417# TPM devices
418#
419# CONFIG_TCG_TPM is not set
420# CONFIG_TELCLOCK is not set
421
422#
423# I2C support
424#
425# CONFIG_I2C is not set
426
427#
428# SPI support
429#
430CONFIG_SPI=y
431# CONFIG_SPI_DEBUG is not set
432CONFIG_SPI_MASTER=y
433
434#
435# SPI Master Controller Drivers
436#
437CONFIG_SPI_ATMEL=m
438# CONFIG_SPI_BITBANG is not set
439
440#
441# SPI Protocol Masters
442#
443
444#
445# Dallas's 1-wire bus
446#
447
448#
449# Hardware Monitoring support
450#
451# CONFIG_HWMON is not set
452# CONFIG_HWMON_VID is not set
453
454#
455# Misc devices
456#
457
458#
459# Multimedia devices
460#
461# CONFIG_VIDEO_DEV is not set
462CONFIG_VIDEO_V4L2=y
463
464#
465# Digital Video Broadcasting Devices
466#
467# CONFIG_DVB is not set
468
469#
470# Graphics support
471#
472# CONFIG_FIRMWARE_EDID is not set
473CONFIG_FB=m
474CONFIG_FB_CFB_FILLRECT=m
475CONFIG_FB_CFB_COPYAREA=m
476CONFIG_FB_CFB_IMAGEBLIT=m
477# CONFIG_FB_MACMODES is not set
478# CONFIG_FB_BACKLIGHT is not set
479# CONFIG_FB_MODE_HELPERS is not set
480# CONFIG_FB_TILEBLITTING is not set
481CONFIG_FB_SIDSA=m
482CONFIG_FB_SIDSA_DEFAULT_BPP=24
483# CONFIG_FB_S1D13XXX is not set
484# CONFIG_FB_VIRTUAL is not set
485
486#
487# Logo configuration
488#
489# CONFIG_LOGO is not set
490CONFIG_BACKLIGHT_LCD_SUPPORT=y
491# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
492CONFIG_LCD_CLASS_DEVICE=m
493CONFIG_LCD_DEVICE=y
494CONFIG_LCD_LTV350QV=m
495
496#
497# Sound
498#
499# CONFIG_SOUND is not set
500
501#
502# USB support
503#
504# CONFIG_USB_ARCH_HAS_HCD is not set
505# CONFIG_USB_ARCH_HAS_OHCI is not set
506# CONFIG_USB_ARCH_HAS_EHCI is not set
507
508#
509# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
510#
511
512#
513# USB Gadget Support
514#
515# CONFIG_USB_GADGET is not set
516
517#
518# MMC/SD Card support
519#
520# CONFIG_MMC is not set
521
522#
523# LED devices
524#
525# CONFIG_NEW_LEDS is not set
526
527#
528# LED drivers
529#
530
531#
532# LED Triggers
533#
534
535#
536# InfiniBand support
537#
538
539#
540# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
541#
542
543#
544# Real Time Clock
545#
546# CONFIG_RTC_CLASS is not set
547
548#
549# DMA Engine support
550#
551# CONFIG_DMA_ENGINE is not set
552
553#
554# DMA Clients
555#
556
557#
558# DMA Devices
559#
560
561#
562# File systems
563#
564CONFIG_EXT2_FS=y
565# CONFIG_EXT2_FS_XATTR is not set
566# CONFIG_EXT2_FS_XIP is not set
567# CONFIG_EXT3_FS is not set
568# CONFIG_REISERFS_FS is not set
569# CONFIG_JFS_FS is not set
570# CONFIG_FS_POSIX_ACL is not set
571# CONFIG_XFS_FS is not set
572# CONFIG_OCFS2_FS is not set
573CONFIG_MINIX_FS=m
574CONFIG_ROMFS_FS=m
575# CONFIG_INOTIFY is not set
576# CONFIG_QUOTA is not set
577# CONFIG_DNOTIFY is not set
578# CONFIG_AUTOFS_FS is not set
579# CONFIG_AUTOFS4_FS is not set
580# CONFIG_FUSE_FS is not set
581
582#
583# CD-ROM/DVD Filesystems
584#
585# CONFIG_ISO9660_FS is not set
586# CONFIG_UDF_FS is not set
587
588#
589# DOS/FAT/NT Filesystems
590#
591CONFIG_FAT_FS=m
592CONFIG_MSDOS_FS=m
593CONFIG_VFAT_FS=m
594CONFIG_FAT_DEFAULT_CODEPAGE=437
595CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
596# CONFIG_NTFS_FS is not set
597
598#
599# Pseudo filesystems
600#
601CONFIG_PROC_FS=y
602CONFIG_PROC_KCORE=y
603CONFIG_SYSFS=y
604CONFIG_TMPFS=y
605# CONFIG_HUGETLB_PAGE is not set
606CONFIG_RAMFS=y
607CONFIG_CONFIGFS_FS=m
608
609#
610# Miscellaneous filesystems
611#
612# CONFIG_ADFS_FS is not set
613# CONFIG_AFFS_FS is not set
614# CONFIG_HFS_FS is not set
615# CONFIG_HFSPLUS_FS is not set
616# CONFIG_BEFS_FS is not set
617# CONFIG_BFS_FS is not set
618# CONFIG_EFS_FS is not set
619# CONFIG_CRAMFS is not set
620# CONFIG_VXFS_FS is not set
621# CONFIG_HPFS_FS is not set
622# CONFIG_QNX4FS_FS is not set
623# CONFIG_SYSV_FS is not set
624# CONFIG_UFS_FS is not set
625
626#
627# Network File Systems
628#
629CONFIG_NFS_FS=y
630CONFIG_NFS_V3=y
631# CONFIG_NFS_V3_ACL is not set
632# CONFIG_NFS_V4 is not set
633# CONFIG_NFS_DIRECTIO is not set
634# CONFIG_NFSD is not set
635CONFIG_ROOT_NFS=y
636CONFIG_LOCKD=y
637CONFIG_LOCKD_V4=y
638CONFIG_NFS_COMMON=y
639CONFIG_SUNRPC=y
640# CONFIG_RPCSEC_GSS_KRB5 is not set
641# CONFIG_RPCSEC_GSS_SPKM3 is not set
642# CONFIG_SMB_FS is not set
643CONFIG_CIFS=m
644# CONFIG_CIFS_STATS is not set
645# CONFIG_CIFS_WEAK_PW_HASH is not set
646# CONFIG_CIFS_XATTR is not set
647# CONFIG_CIFS_DEBUG2 is not set
648# CONFIG_CIFS_EXPERIMENTAL is not set
649# CONFIG_NCP_FS is not set
650# CONFIG_CODA_FS is not set
651# CONFIG_AFS_FS is not set
652# CONFIG_9P_FS is not set
653
654#
655# Partition Types
656#
657# CONFIG_PARTITION_ADVANCED is not set
658CONFIG_MSDOS_PARTITION=y
659
660#
661# Native Language Support
662#
663CONFIG_NLS=m
664CONFIG_NLS_DEFAULT="iso8859-1"
665CONFIG_NLS_CODEPAGE_437=m
666# CONFIG_NLS_CODEPAGE_737 is not set
667# CONFIG_NLS_CODEPAGE_775 is not set
668CONFIG_NLS_CODEPAGE_850=m
669# CONFIG_NLS_CODEPAGE_852 is not set
670# CONFIG_NLS_CODEPAGE_855 is not set
671# CONFIG_NLS_CODEPAGE_857 is not set
672# CONFIG_NLS_CODEPAGE_860 is not set
673# CONFIG_NLS_CODEPAGE_861 is not set
674# CONFIG_NLS_CODEPAGE_862 is not set
675# CONFIG_NLS_CODEPAGE_863 is not set
676# CONFIG_NLS_CODEPAGE_864 is not set
677# CONFIG_NLS_CODEPAGE_865 is not set
678# CONFIG_NLS_CODEPAGE_866 is not set
679# CONFIG_NLS_CODEPAGE_869 is not set
680# CONFIG_NLS_CODEPAGE_936 is not set
681# CONFIG_NLS_CODEPAGE_950 is not set
682# CONFIG_NLS_CODEPAGE_932 is not set
683# CONFIG_NLS_CODEPAGE_949 is not set
684# CONFIG_NLS_CODEPAGE_874 is not set
685# CONFIG_NLS_ISO8859_8 is not set
686# CONFIG_NLS_CODEPAGE_1250 is not set
687# CONFIG_NLS_CODEPAGE_1251 is not set
688# CONFIG_NLS_ASCII is not set
689CONFIG_NLS_ISO8859_1=m
690# CONFIG_NLS_ISO8859_2 is not set
691# CONFIG_NLS_ISO8859_3 is not set
692# CONFIG_NLS_ISO8859_4 is not set
693# CONFIG_NLS_ISO8859_5 is not set
694# CONFIG_NLS_ISO8859_6 is not set
695# CONFIG_NLS_ISO8859_7 is not set
696# CONFIG_NLS_ISO8859_9 is not set
697# CONFIG_NLS_ISO8859_13 is not set
698# CONFIG_NLS_ISO8859_14 is not set
699# CONFIG_NLS_ISO8859_15 is not set
700# CONFIG_NLS_KOI8_R is not set
701# CONFIG_NLS_KOI8_U is not set
702CONFIG_NLS_UTF8=m
703
704#
705# Kernel hacking
706#
707CONFIG_TRACE_IRQFLAGS_SUPPORT=y
708CONFIG_PRINTK_TIME=y
709CONFIG_MAGIC_SYSRQ=y
710# CONFIG_UNUSED_SYMBOLS is not set
711CONFIG_DEBUG_KERNEL=y
712CONFIG_LOG_BUF_SHIFT=14
713CONFIG_DETECT_SOFTLOCKUP=y
714# CONFIG_SCHEDSTATS is not set
715# CONFIG_DEBUG_SPINLOCK is not set
716# CONFIG_DEBUG_MUTEXES is not set
717# CONFIG_DEBUG_RWSEMS is not set
718# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
719# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
720# CONFIG_DEBUG_KOBJECT is not set
721CONFIG_DEBUG_BUGVERBOSE=y
722# CONFIG_DEBUG_INFO is not set
723CONFIG_DEBUG_FS=y
724# CONFIG_DEBUG_VM is not set
725CONFIG_FRAME_POINTER=y
726# CONFIG_UNWIND_INFO is not set
727CONFIG_FORCED_INLINING=y
728# CONFIG_RCU_TORTURE_TEST is not set
729CONFIG_KPROBES=y
730
731#
732# Security options
733#
734# CONFIG_KEYS is not set
735# CONFIG_SECURITY is not set
736
737#
738# Cryptographic options
739#
740# CONFIG_CRYPTO is not set
741
742#
743# Hardware crypto devices
744#
745
746#
747# Library routines
748#
749CONFIG_CRC_CCITT=m
750# CONFIG_CRC16 is not set
751CONFIG_CRC32=m
752# CONFIG_LIBCRC32C is not set
753CONFIG_ZLIB_INFLATE=m
754CONFIG_ZLIB_DEFLATE=m
diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile
new file mode 100644
index 000000000000..90e5afff54a2
--- /dev/null
+++ b/arch/avr32/kernel/Makefile
@@ -0,0 +1,18 @@
1#
2# Makefile for the Linux/AVR32 kernel.
3#
4
5extra-y := head.o vmlinux.lds
6
7obj-$(CONFIG_SUBARCH_AVR32B) += entry-avr32b.o
8obj-y += syscall_table.o syscall-stubs.o irq.o
9obj-y += setup.o traps.o semaphore.o ptrace.o
10obj-y += signal.o sys_avr32.o process.o time.o
11obj-y += init_task.o switch_to.o cpu.o
12obj-$(CONFIG_MODULES) += module.o avr32_ksyms.o
13obj-$(CONFIG_KPROBES) += kprobes.o
14
15USE_STANDARD_AS_RULE := true
16
17%.lds: %.lds.c FORCE
18 $(call if_changed_dep,cpp_lds_S)
diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c
new file mode 100644
index 000000000000..97d865865667
--- /dev/null
+++ b/arch/avr32/kernel/asm-offsets.c
@@ -0,0 +1,25 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed
4 * to extract and format the required data.
5 */
6
7#include <linux/thread_info.h>
8
9#define DEFINE(sym, val) \
10 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
11
12#define BLANK() asm volatile("\n->" : : )
13
14#define OFFSET(sym, str, mem) \
15 DEFINE(sym, offsetof(struct str, mem));
16
17void foo(void)
18{
19 OFFSET(TI_task, thread_info, task);
20 OFFSET(TI_exec_domain, thread_info, exec_domain);
21 OFFSET(TI_flags, thread_info, flags);
22 OFFSET(TI_cpu, thread_info, cpu);
23 OFFSET(TI_preempt_count, thread_info, preempt_count);
24 OFFSET(TI_restart_block, thread_info, restart_block);
25}
diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c
new file mode 100644
index 000000000000..04f767a272b7
--- /dev/null
+++ b/arch/avr32/kernel/avr32_ksyms.c
@@ -0,0 +1,55 @@
1/*
2 * Export AVR32-specific functions for loadable modules.
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/module.h>
11
12#include <asm/checksum.h>
13#include <asm/uaccess.h>
14#include <asm/delay.h>
15
16/*
17 * GCC functions
18 */
19extern unsigned long long __avr32_lsl64(unsigned long long u, unsigned long b);
20extern unsigned long long __avr32_lsr64(unsigned long long u, unsigned long b);
21extern unsigned long long __avr32_asr64(unsigned long long u, unsigned long b);
22EXPORT_SYMBOL(__avr32_lsl64);
23EXPORT_SYMBOL(__avr32_lsr64);
24EXPORT_SYMBOL(__avr32_asr64);
25
26/*
27 * String functions
28 */
29EXPORT_SYMBOL(memset);
30EXPORT_SYMBOL(memcpy);
31
32/*
33 * Userspace access stuff.
34 */
35EXPORT_SYMBOL(copy_from_user);
36EXPORT_SYMBOL(copy_to_user);
37EXPORT_SYMBOL(__copy_user);
38EXPORT_SYMBOL(strncpy_from_user);
39EXPORT_SYMBOL(__strncpy_from_user);
40EXPORT_SYMBOL(clear_user);
41EXPORT_SYMBOL(__clear_user);
42EXPORT_SYMBOL(csum_partial);
43EXPORT_SYMBOL(csum_partial_copy_generic);
44
45/* Delay loops (lib/delay.S) */
46EXPORT_SYMBOL(__ndelay);
47EXPORT_SYMBOL(__udelay);
48EXPORT_SYMBOL(__const_udelay);
49
50/* Bit operations (lib/findbit.S) */
51EXPORT_SYMBOL(find_first_zero_bit);
52EXPORT_SYMBOL(find_next_zero_bit);
53EXPORT_SYMBOL(find_first_bit);
54EXPORT_SYMBOL(find_next_bit);
55EXPORT_SYMBOL(generic_find_next_zero_le_bit);
diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c
new file mode 100644
index 000000000000..342452ba2049
--- /dev/null
+++ b/arch/avr32/kernel/cpu.c
@@ -0,0 +1,327 @@
1/*
2 * Copyright (C) 2005-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/init.h>
9#include <linux/sysdev.h>
10#include <linux/seq_file.h>
11#include <linux/cpu.h>
12#include <linux/percpu.h>
13#include <linux/param.h>
14#include <linux/errno.h>
15
16#include <asm/setup.h>
17#include <asm/sysreg.h>
18
19static DEFINE_PER_CPU(struct cpu, cpu_devices);
20
21#ifdef CONFIG_PERFORMANCE_COUNTERS
22
23/*
24 * XXX: If/when a SMP-capable implementation of AVR32 will ever be
25 * made, we must make sure that the code executes on the correct CPU.
26 */
27static ssize_t show_pc0event(struct sys_device *dev, char *buf)
28{
29 unsigned long pccr;
30
31 pccr = sysreg_read(PCCR);
32 return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f);
33}
34static ssize_t store_pc0event(struct sys_device *dev, const char *buf,
35 size_t count)
36{
37 unsigned long val;
38 char *endp;
39
40 val = simple_strtoul(buf, &endp, 0);
41 if (endp == buf || val > 0x3f)
42 return -EINVAL;
43 val = (val << 12) | (sysreg_read(PCCR) & 0xfffc0fff);
44 sysreg_write(PCCR, val);
45 return count;
46}
47static ssize_t show_pc0count(struct sys_device *dev, char *buf)
48{
49 unsigned long pcnt0;
50
51 pcnt0 = sysreg_read(PCNT0);
52 return sprintf(buf, "%lu\n", pcnt0);
53}
54static ssize_t store_pc0count(struct sys_device *dev, const char *buf,
55 size_t count)
56{
57 unsigned long val;
58 char *endp;
59
60 val = simple_strtoul(buf, &endp, 0);
61 if (endp == buf)
62 return -EINVAL;
63 sysreg_write(PCNT0, val);
64
65 return count;
66}
67
68static ssize_t show_pc1event(struct sys_device *dev, char *buf)
69{
70 unsigned long pccr;
71
72 pccr = sysreg_read(PCCR);
73 return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f);
74}
75static ssize_t store_pc1event(struct sys_device *dev, const char *buf,
76 size_t count)
77{
78 unsigned long val;
79 char *endp;
80
81 val = simple_strtoul(buf, &endp, 0);
82 if (endp == buf || val > 0x3f)
83 return -EINVAL;
84 val = (val << 18) | (sysreg_read(PCCR) & 0xff03ffff);
85 sysreg_write(PCCR, val);
86 return count;
87}
88static ssize_t show_pc1count(struct sys_device *dev, char *buf)
89{
90 unsigned long pcnt1;
91
92 pcnt1 = sysreg_read(PCNT1);
93 return sprintf(buf, "%lu\n", pcnt1);
94}
95static ssize_t store_pc1count(struct sys_device *dev, const char *buf,
96 size_t count)
97{
98 unsigned long val;
99 char *endp;
100
101 val = simple_strtoul(buf, &endp, 0);
102 if (endp == buf)
103 return -EINVAL;
104 sysreg_write(PCNT1, val);
105
106 return count;
107}
108
109static ssize_t show_pccycles(struct sys_device *dev, char *buf)
110{
111 unsigned long pccnt;
112
113 pccnt = sysreg_read(PCCNT);
114 return sprintf(buf, "%lu\n", pccnt);
115}
116static ssize_t store_pccycles(struct sys_device *dev, const char *buf,
117 size_t count)
118{
119 unsigned long val;
120 char *endp;
121
122 val = simple_strtoul(buf, &endp, 0);
123 if (endp == buf)
124 return -EINVAL;
125 sysreg_write(PCCNT, val);
126
127 return count;
128}
129
130static ssize_t show_pcenable(struct sys_device *dev, char *buf)
131{
132 unsigned long pccr;
133
134 pccr = sysreg_read(PCCR);
135 return sprintf(buf, "%c\n", (pccr & 1)?'1':'0');
136}
137static ssize_t store_pcenable(struct sys_device *dev, const char *buf,
138 size_t count)
139{
140 unsigned long pccr, val;
141 char *endp;
142
143 val = simple_strtoul(buf, &endp, 0);
144 if (endp == buf)
145 return -EINVAL;
146 if (val)
147 val = 1;
148
149 pccr = sysreg_read(PCCR);
150 pccr = (pccr & ~1UL) | val;
151 sysreg_write(PCCR, pccr);
152
153 return count;
154}
155
156static SYSDEV_ATTR(pc0event, 0600, show_pc0event, store_pc0event);
157static SYSDEV_ATTR(pc0count, 0600, show_pc0count, store_pc0count);
158static SYSDEV_ATTR(pc1event, 0600, show_pc1event, store_pc1event);
159static SYSDEV_ATTR(pc1count, 0600, show_pc1count, store_pc1count);
160static SYSDEV_ATTR(pccycles, 0600, show_pccycles, store_pccycles);
161static SYSDEV_ATTR(pcenable, 0600, show_pcenable, store_pcenable);
162
163#endif /* CONFIG_PERFORMANCE_COUNTERS */
164
165static int __init topology_init(void)
166{
167 int cpu;
168
169 for_each_possible_cpu(cpu) {
170 struct cpu *c = &per_cpu(cpu_devices, cpu);
171
172 register_cpu(c, cpu);
173
174#ifdef CONFIG_PERFORMANCE_COUNTERS
175 sysdev_create_file(&c->sysdev, &attr_pc0event);
176 sysdev_create_file(&c->sysdev, &attr_pc0count);
177 sysdev_create_file(&c->sysdev, &attr_pc1event);
178 sysdev_create_file(&c->sysdev, &attr_pc1count);
179 sysdev_create_file(&c->sysdev, &attr_pccycles);
180 sysdev_create_file(&c->sysdev, &attr_pcenable);
181#endif
182 }
183
184 return 0;
185}
186
187subsys_initcall(topology_init);
188
189static const char *cpu_names[] = {
190 "Morgan",
191 "AP7000",
192};
193#define NR_CPU_NAMES ARRAY_SIZE(cpu_names)
194
195static const char *arch_names[] = {
196 "AVR32A",
197 "AVR32B",
198};
199#define NR_ARCH_NAMES ARRAY_SIZE(arch_names)
200
201static const char *mmu_types[] = {
202 "No MMU",
203 "ITLB and DTLB",
204 "Shared TLB",
205 "MPU"
206};
207
208void __init setup_processor(void)
209{
210 unsigned long config0, config1;
211 unsigned cpu_id, cpu_rev, arch_id, arch_rev, mmu_type;
212 unsigned tmp;
213
214 config0 = sysreg_read(CONFIG0); /* 0x0000013e; */
215 config1 = sysreg_read(CONFIG1); /* 0x01f689a2; */
216 cpu_id = config0 >> 24;
217 cpu_rev = (config0 >> 16) & 0xff;
218 arch_id = (config0 >> 13) & 0x07;
219 arch_rev = (config0 >> 10) & 0x07;
220 mmu_type = (config0 >> 7) & 0x03;
221
222 boot_cpu_data.arch_type = arch_id;
223 boot_cpu_data.cpu_type = cpu_id;
224 boot_cpu_data.arch_revision = arch_rev;
225 boot_cpu_data.cpu_revision = cpu_rev;
226 boot_cpu_data.tlb_config = mmu_type;
227
228 tmp = (config1 >> 13) & 0x07;
229 if (tmp) {
230 boot_cpu_data.icache.ways = 1 << ((config1 >> 10) & 0x07);
231 boot_cpu_data.icache.sets = 1 << ((config1 >> 16) & 0x0f);
232 boot_cpu_data.icache.linesz = 1 << (tmp + 1);
233 }
234 tmp = (config1 >> 3) & 0x07;
235 if (tmp) {
236 boot_cpu_data.dcache.ways = 1 << (config1 & 0x07);
237 boot_cpu_data.dcache.sets = 1 << ((config1 >> 6) & 0x0f);
238 boot_cpu_data.dcache.linesz = 1 << (tmp + 1);
239 }
240
241 if ((cpu_id >= NR_CPU_NAMES) || (arch_id >= NR_ARCH_NAMES)) {
242 printk ("Unknown CPU configuration (ID %02x, arch %02x), "
243 "continuing anyway...\n",
244 cpu_id, arch_id);
245 return;
246 }
247
248 printk ("CPU: %s [%02x] revision %d (%s revision %d)\n",
249 cpu_names[cpu_id], cpu_id, cpu_rev,
250 arch_names[arch_id], arch_rev);
251 printk ("CPU: MMU configuration: %s\n", mmu_types[mmu_type]);
252 printk ("CPU: features:");
253 if (config0 & (1 << 6))
254 printk(" fpu");
255 if (config0 & (1 << 5))
256 printk(" java");
257 if (config0 & (1 << 4))
258 printk(" perfctr");
259 if (config0 & (1 << 3))
260 printk(" ocd");
261 printk("\n");
262}
263
264#ifdef CONFIG_PROC_FS
265static int c_show(struct seq_file *m, void *v)
266{
267 unsigned int icache_size, dcache_size;
268 unsigned int cpu = smp_processor_id();
269
270 icache_size = boot_cpu_data.icache.ways *
271 boot_cpu_data.icache.sets *
272 boot_cpu_data.icache.linesz;
273 dcache_size = boot_cpu_data.dcache.ways *
274 boot_cpu_data.dcache.sets *
275 boot_cpu_data.dcache.linesz;
276
277 seq_printf(m, "processor\t: %d\n", cpu);
278
279 if (boot_cpu_data.arch_type < NR_ARCH_NAMES)
280 seq_printf(m, "cpu family\t: %s revision %d\n",
281 arch_names[boot_cpu_data.arch_type],
282 boot_cpu_data.arch_revision);
283 if (boot_cpu_data.cpu_type < NR_CPU_NAMES)
284 seq_printf(m, "cpu type\t: %s revision %d\n",
285 cpu_names[boot_cpu_data.cpu_type],
286 boot_cpu_data.cpu_revision);
287
288 seq_printf(m, "i-cache\t\t: %dK (%u ways x %u sets x %u)\n",
289 icache_size >> 10,
290 boot_cpu_data.icache.ways,
291 boot_cpu_data.icache.sets,
292 boot_cpu_data.icache.linesz);
293 seq_printf(m, "d-cache\t\t: %dK (%u ways x %u sets x %u)\n",
294 dcache_size >> 10,
295 boot_cpu_data.dcache.ways,
296 boot_cpu_data.dcache.sets,
297 boot_cpu_data.dcache.linesz);
298 seq_printf(m, "bogomips\t: %lu.%02lu\n",
299 boot_cpu_data.loops_per_jiffy / (500000/HZ),
300 (boot_cpu_data.loops_per_jiffy / (5000/HZ)) % 100);
301
302 return 0;
303}
304
305static void *c_start(struct seq_file *m, loff_t *pos)
306{
307 return *pos < 1 ? (void *)1 : NULL;
308}
309
310static void *c_next(struct seq_file *m, void *v, loff_t *pos)
311{
312 ++*pos;
313 return NULL;
314}
315
316static void c_stop(struct seq_file *m, void *v)
317{
318
319}
320
321struct seq_operations cpuinfo_op = {
322 .start = c_start,
323 .next = c_next,
324 .stop = c_stop,
325 .show = c_show
326};
327#endif /* CONFIG_PROC_FS */
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
new file mode 100644
index 000000000000..eeb66792bc37
--- /dev/null
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -0,0 +1,678 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9/*
10 * This file contains the low-level entry-points into the kernel, that is,
11 * exception handlers, debug trap handlers, interrupt handlers and the
12 * system call handler.
13 */
14#include <linux/errno.h>
15
16#include <asm/asm.h>
17#include <asm/hardirq.h>
18#include <asm/irq.h>
19#include <asm/ocd.h>
20#include <asm/page.h>
21#include <asm/pgtable.h>
22#include <asm/ptrace.h>
23#include <asm/sysreg.h>
24#include <asm/thread_info.h>
25#include <asm/unistd.h>
26
27#ifdef CONFIG_PREEMPT
28# define preempt_stop mask_interrupts
29#else
30# define preempt_stop
31# define fault_resume_kernel fault_restore_all
32#endif
33
34#define __MASK(x) ((1 << (x)) - 1)
35#define IRQ_MASK ((__MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | \
36 (__MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT))
37
38 .section .ex.text,"ax",@progbits
39 .align 2
40exception_vectors:
41 bral handle_critical
42 .align 2
43 bral handle_critical
44 .align 2
45 bral do_bus_error_write
46 .align 2
47 bral do_bus_error_read
48 .align 2
49 bral do_nmi_ll
50 .align 2
51 bral handle_address_fault
52 .align 2
53 bral handle_protection_fault
54 .align 2
55 bral handle_debug
56 .align 2
57 bral do_illegal_opcode_ll
58 .align 2
59 bral do_illegal_opcode_ll
60 .align 2
61 bral do_illegal_opcode_ll
62 .align 2
63 bral do_fpe_ll
64 .align 2
65 bral do_illegal_opcode_ll
66 .align 2
67 bral handle_address_fault
68 .align 2
69 bral handle_address_fault
70 .align 2
71 bral handle_protection_fault
72 .align 2
73 bral handle_protection_fault
74 .align 2
75 bral do_dtlb_modified
76
77 /*
78 * r0 : PGD/PT/PTE
79 * r1 : Offending address
80 * r2 : Scratch register
81 * r3 : Cause (5, 12 or 13)
82 */
83#define tlbmiss_save pushm r0-r3
84#define tlbmiss_restore popm r0-r3
85
86 .section .tlbx.ex.text,"ax",@progbits
87 .global itlb_miss
88itlb_miss:
89 tlbmiss_save
90 rjmp tlb_miss_common
91
92 .section .tlbr.ex.text,"ax",@progbits
93dtlb_miss_read:
94 tlbmiss_save
95 rjmp tlb_miss_common
96
97 .section .tlbw.ex.text,"ax",@progbits
98dtlb_miss_write:
99 tlbmiss_save
100
101 .global tlb_miss_common
102tlb_miss_common:
103 mfsr r0, SYSREG_PTBR
104 mfsr r1, SYSREG_TLBEAR
105
106 /* Is it the vmalloc space? */
107 bld r1, 31
108 brcs handle_vmalloc_miss
109
110 /* First level lookup */
111pgtbl_lookup:
112 lsr r2, r1, PGDIR_SHIFT
113 ld.w r0, r0[r2 << 2]
114 bld r0, _PAGE_BIT_PRESENT
115 brcc page_table_not_present
116
117 /* TODO: Check access rights on page table if necessary */
118
119 /* Translate to virtual address in P1. */
120 andl r0, 0xf000
121 sbr r0, 31
122
123 /* Second level lookup */
124 lsl r1, (32 - PGDIR_SHIFT)
125 lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
126 add r2, r0, r1 << 2
127 ld.w r1, r2[0]
128 bld r1, _PAGE_BIT_PRESENT
129 brcc page_not_present
130
131 /* Mark the page as accessed */
132 sbr r1, _PAGE_BIT_ACCESSED
133 st.w r2[0], r1
134
135 /* Drop software flags */
136 andl r1, _PAGE_FLAGS_HARDWARE_MASK & 0xffff
137 mtsr SYSREG_TLBELO, r1
138
139 /* Figure out which entry we want to replace */
140 mfsr r0, SYSREG_TLBARLO
141 clz r2, r0
142 brcc 1f
143 mov r1, -1 /* All entries have been accessed, */
144 mtsr SYSREG_TLBARLO, r1 /* so reset TLBAR */
145 mov r2, 0 /* and start at 0 */
1461: mfsr r1, SYSREG_MMUCR
147 lsl r2, 14
148 andl r1, 0x3fff, COH
149 or r1, r2
150 mtsr SYSREG_MMUCR, r1
151
152 tlbw
153
154 tlbmiss_restore
155 rete
156
157handle_vmalloc_miss:
158 /* Simply do the lookup in init's page table */
159 mov r0, lo(swapper_pg_dir)
160 orh r0, hi(swapper_pg_dir)
161 rjmp pgtbl_lookup
162
163
164 /* --- System Call --- */
165
166 .section .scall.text,"ax",@progbits
167system_call:
168 pushm r12 /* r12_orig */
169 stmts --sp, r0-lr
170 zero_fp
171 mfsr r0, SYSREG_RAR_SUP
172 mfsr r1, SYSREG_RSR_SUP
173 stm --sp, r0-r1
174
175 /* check for syscall tracing */
176 get_thread_info r0
177 ld.w r1, r0[TI_flags]
178 bld r1, TIF_SYSCALL_TRACE
179 brcs syscall_trace_enter
180
181syscall_trace_cont:
182 cp.w r8, NR_syscalls
183 brhs syscall_badsys
184
185 lddpc lr, syscall_table_addr
186 ld.w lr, lr[r8 << 2]
187 mov r8, r5 /* 5th argument (6th is pushed by stub) */
188 icall lr
189
190 .global syscall_return
191syscall_return:
192 get_thread_info r0
193 mask_interrupts /* make sure we don't miss an interrupt
194 setting need_resched or sigpending
195 between sampling and the rets */
196
197 /* Store the return value so that the correct value is loaded below */
198 stdsp sp[REG_R12], r12
199
200 ld.w r1, r0[TI_flags]
201 andl r1, _TIF_ALLWORK_MASK, COH
202 brne syscall_exit_work
203
204syscall_exit_cont:
205 popm r8-r9
206 mtsr SYSREG_RAR_SUP, r8
207 mtsr SYSREG_RSR_SUP, r9
208 ldmts sp++, r0-lr
209 sub sp, -4 /* r12_orig */
210 rets
211
212 .align 2
213syscall_table_addr:
214 .long sys_call_table
215
216syscall_badsys:
217 mov r12, -ENOSYS
218 rjmp syscall_return
219
220 .global ret_from_fork
221ret_from_fork:
222 rcall schedule_tail
223
224 /* check for syscall tracing */
225 get_thread_info r0
226 ld.w r1, r0[TI_flags]
227 andl r1, _TIF_ALLWORK_MASK, COH
228 brne syscall_exit_work
229 rjmp syscall_exit_cont
230
231syscall_trace_enter:
232 pushm r8-r12
233 rcall syscall_trace
234 popm r8-r12
235 rjmp syscall_trace_cont
236
237syscall_exit_work:
238 bld r1, TIF_SYSCALL_TRACE
239 brcc 1f
240 unmask_interrupts
241 rcall syscall_trace
242 mask_interrupts
243 ld.w r1, r0[TI_flags]
244
2451: bld r1, TIF_NEED_RESCHED
246 brcc 2f
247 unmask_interrupts
248 rcall schedule
249 mask_interrupts
250 ld.w r1, r0[TI_flags]
251 rjmp 1b
252
2532: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
254 tst r1, r2
255 breq 3f
256 unmask_interrupts
257 mov r12, sp
258 mov r11, r0
259 rcall do_notify_resume
260 mask_interrupts
261 ld.w r1, r0[TI_flags]
262 rjmp 1b
263
2643: bld r1, TIF_BREAKPOINT
265 brcc syscall_exit_cont
266 mfsr r3, SYSREG_TLBEHI
267 lddsp r2, sp[REG_PC]
268 andl r3, 0xff, COH
269 lsl r3, 1
270 sbr r3, 30
271 sbr r3, 0
272 mtdr DBGREG_BWA2A, r2
273 mtdr DBGREG_BWC2A, r3
274 rjmp syscall_exit_cont
275
276
277 /* The slow path of the TLB miss handler */
278page_table_not_present:
279page_not_present:
280 tlbmiss_restore
281 sub sp, 4
282 stmts --sp, r0-lr
283 rcall save_full_context_ex
284 mfsr r12, SYSREG_ECR
285 mov r11, sp
286 rcall do_page_fault
287 rjmp ret_from_exception
288
289 /* This function expects to find offending PC in SYSREG_RAR_EX */
290save_full_context_ex:
291 mfsr r8, SYSREG_RSR_EX
292 mov r12, r8
293 andh r8, (MODE_MASK >> 16), COH
294 mfsr r11, SYSREG_RAR_EX
295 brne 2f
296
2971: pushm r11, r12 /* PC and SR */
298 unmask_exceptions
299 ret r12
300
3012: sub r10, sp, -(FRAME_SIZE_FULL - REG_LR)
302 stdsp sp[4], r10 /* replace saved SP */
303 rjmp 1b
304
305 /* Low-level exception handlers */
306handle_critical:
307 pushm r12
308 pushm r0-r12
309 rcall save_full_context_ex
310 mfsr r12, SYSREG_ECR
311 mov r11, sp
312 rcall do_critical_exception
313
314 /* We should never get here... */
315bad_return:
316 sub r12, pc, (. - 1f)
317 bral panic
318 .align 2
3191: .asciz "Return from critical exception!"
320
321 .align 1
322do_bus_error_write:
323 sub sp, 4
324 stmts --sp, r0-lr
325 rcall save_full_context_ex
326 mov r11, 1
327 rjmp 1f
328
329do_bus_error_read:
330 sub sp, 4
331 stmts --sp, r0-lr
332 rcall save_full_context_ex
333 mov r11, 0
3341: mfsr r12, SYSREG_BEAR
335 mov r10, sp
336 rcall do_bus_error
337 rjmp ret_from_exception
338
339 .align 1
340do_nmi_ll:
341 sub sp, 4
342 stmts --sp, r0-lr
343 /* FIXME: Make sure RAR_NMI and RSR_NMI are pushed instead of *_EX */
344 rcall save_full_context_ex
345 mfsr r12, SYSREG_ECR
346 mov r11, sp
347 rcall do_nmi
348 rjmp bad_return
349
350handle_address_fault:
351 sub sp, 4
352 stmts --sp, r0-lr
353 rcall save_full_context_ex
354 mfsr r12, SYSREG_ECR
355 mov r11, sp
356 rcall do_address_exception
357 rjmp ret_from_exception
358
359handle_protection_fault:
360 sub sp, 4
361 stmts --sp, r0-lr
362 rcall save_full_context_ex
363 mfsr r12, SYSREG_ECR
364 mov r11, sp
365 rcall do_page_fault
366 rjmp ret_from_exception
367
368 .align 1
369do_illegal_opcode_ll:
370 sub sp, 4
371 stmts --sp, r0-lr
372 rcall save_full_context_ex
373 mfsr r12, SYSREG_ECR
374 mov r11, sp
375 rcall do_illegal_opcode
376 rjmp ret_from_exception
377
378do_dtlb_modified:
379 pushm r0-r3
380 mfsr r1, SYSREG_TLBEAR
381 mfsr r0, SYSREG_PTBR
382 lsr r2, r1, PGDIR_SHIFT
383 ld.w r0, r0[r2 << 2]
384 lsl r1, (32 - PGDIR_SHIFT)
385 lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
386
387 /* Translate to virtual address in P1 */
388 andl r0, 0xf000
389 sbr r0, 31
390 add r2, r0, r1 << 2
391 ld.w r3, r2[0]
392 sbr r3, _PAGE_BIT_DIRTY
393 mov r0, r3
394 st.w r2[0], r3
395
396 /* The page table is up-to-date. Update the TLB entry as well */
397 andl r0, lo(_PAGE_FLAGS_HARDWARE_MASK)
398 mtsr SYSREG_TLBELO, r0
399
400 /* MMUCR[DRP] is updated automatically, so let's go... */
401 tlbw
402
403 popm r0-r3
404 rete
405
406do_fpe_ll:
407 sub sp, 4
408 stmts --sp, r0-lr
409 rcall save_full_context_ex
410 unmask_interrupts
411 mov r12, 26
412 mov r11, sp
413 rcall do_fpe
414 rjmp ret_from_exception
415
416ret_from_exception:
417 mask_interrupts
418 lddsp r4, sp[REG_SR]
419 andh r4, (MODE_MASK >> 16), COH
420 brne fault_resume_kernel
421
422 get_thread_info r0
423 ld.w r1, r0[TI_flags]
424 andl r1, _TIF_WORK_MASK, COH
425 brne fault_exit_work
426
427fault_resume_user:
428 popm r8-r9
429 mask_exceptions
430 mtsr SYSREG_RAR_EX, r8
431 mtsr SYSREG_RSR_EX, r9
432 ldmts sp++, r0-lr
433 sub sp, -4
434 rete
435
436fault_resume_kernel:
437#ifdef CONFIG_PREEMPT
438 get_thread_info r0
439 ld.w r2, r0[TI_preempt_count]
440 cp.w r2, 0
441 brne 1f
442 ld.w r1, r0[TI_flags]
443 bld r1, TIF_NEED_RESCHED
444 brcc 1f
445 lddsp r4, sp[REG_SR]
446 bld r4, SYSREG_GM_OFFSET
447 brcs 1f
448 rcall preempt_schedule_irq
4491:
450#endif
451
452 popm r8-r9
453 mask_exceptions
454 mfsr r1, SYSREG_SR
455 mtsr SYSREG_RAR_EX, r8
456 mtsr SYSREG_RSR_EX, r9
457 popm lr
458 sub sp, -4 /* ignore SP */
459 popm r0-r12
460 sub sp, -4 /* ignore r12_orig */
461 rete
462
463irq_exit_work:
464 /* Switch to exception mode so that we can share the same code. */
465 mfsr r8, SYSREG_SR
466 cbr r8, SYSREG_M0_OFFSET
467 orh r8, hi(SYSREG_BIT(M1) | SYSREG_BIT(M2))
468 mtsr SYSREG_SR, r8
469 sub pc, -2
470 get_thread_info r0
471 ld.w r1, r0[TI_flags]
472
473fault_exit_work:
474 bld r1, TIF_NEED_RESCHED
475 brcc 1f
476 unmask_interrupts
477 rcall schedule
478 mask_interrupts
479 ld.w r1, r0[TI_flags]
480 rjmp fault_exit_work
481
4821: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
483 tst r1, r2
484 breq 2f
485 unmask_interrupts
486 mov r12, sp
487 mov r11, r0
488 rcall do_notify_resume
489 mask_interrupts
490 ld.w r1, r0[TI_flags]
491 rjmp fault_exit_work
492
4932: bld r1, TIF_BREAKPOINT
494 brcc fault_resume_user
495 mfsr r3, SYSREG_TLBEHI
496 lddsp r2, sp[REG_PC]
497 andl r3, 0xff, COH
498 lsl r3, 1
499 sbr r3, 30
500 sbr r3, 0
501 mtdr DBGREG_BWA2A, r2
502 mtdr DBGREG_BWC2A, r3
503 rjmp fault_resume_user
504
505 /* If we get a debug trap from privileged context we end up here */
506handle_debug_priv:
507 /* Fix up LR and SP in regs. r11 contains the mode we came from */
508 mfsr r8, SYSREG_SR
509 mov r9, r8
510 andh r8, hi(~MODE_MASK)
511 or r8, r11
512 mtsr SYSREG_SR, r8
513 sub pc, -2
514 stdsp sp[REG_LR], lr
515 mtsr SYSREG_SR, r9
516 sub pc, -2
517 sub r10, sp, -FRAME_SIZE_FULL
518 stdsp sp[REG_SP], r10
519 mov r12, sp
520 rcall do_debug_priv
521
522 /* Now, put everything back */
523 ssrf SR_EM_BIT
524 popm r10, r11
525 mtsr SYSREG_RAR_DBG, r10
526 mtsr SYSREG_RSR_DBG, r11
527 mfsr r8, SYSREG_SR
528 mov r9, r8
529 andh r8, hi(~MODE_MASK)
530 andh r11, hi(MODE_MASK)
531 or r8, r11
532 mtsr SYSREG_SR, r8
533 sub pc, -2
534 popm lr
535 mtsr SYSREG_SR, r9
536 sub pc, -2
537 sub sp, -4 /* skip SP */
538 popm r0-r12
539 sub sp, -4
540 retd
541
542 /*
543 * At this point, everything is masked, that is, interrupts,
544 * exceptions and debugging traps. We might get called from
545 * interrupt or exception context in some rare cases, but this
546 * will be taken care of by do_debug(), so we're not going to
547 * do a 100% correct context save here.
548 */
549handle_debug:
550 sub sp, 4 /* r12_orig */
551 stmts --sp, r0-lr
552 mfsr r10, SYSREG_RAR_DBG
553 mfsr r11, SYSREG_RSR_DBG
554 unmask_exceptions
555 pushm r10,r11
556 andh r11, (MODE_MASK >> 16), COH
557 brne handle_debug_priv
558
559 mov r12, sp
560 rcall do_debug
561
562 lddsp r10, sp[REG_SR]
563 andh r10, (MODE_MASK >> 16), COH
564 breq debug_resume_user
565
566debug_restore_all:
567 popm r10,r11
568 mask_exceptions
569 mtsr SYSREG_RSR_DBG, r11
570 mtsr SYSREG_RAR_DBG, r10
571 ldmts sp++, r0-lr
572 sub sp, -4
573 retd
574
575debug_resume_user:
576 get_thread_info r0
577 mask_interrupts
578
579 ld.w r1, r0[TI_flags]
580 andl r1, _TIF_DBGWORK_MASK, COH
581 breq debug_restore_all
582
5831: bld r1, TIF_NEED_RESCHED
584 brcc 2f
585 unmask_interrupts
586 rcall schedule
587 mask_interrupts
588 ld.w r1, r0[TI_flags]
589 rjmp 1b
590
5912: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
592 tst r1, r2
593 breq 3f
594 unmask_interrupts
595 mov r12, sp
596 mov r11, r0
597 rcall do_notify_resume
598 mask_interrupts
599 ld.w r1, r0[TI_flags]
600 rjmp 1b
601
6023: bld r1, TIF_SINGLE_STEP
603 brcc debug_restore_all
604 mfdr r2, DBGREG_DC
605 sbr r2, DC_SS_BIT
606 mtdr DBGREG_DC, r2
607 rjmp debug_restore_all
608
609 .set rsr_int0, SYSREG_RSR_INT0
610 .set rsr_int1, SYSREG_RSR_INT1
611 .set rsr_int2, SYSREG_RSR_INT2
612 .set rsr_int3, SYSREG_RSR_INT3
613 .set rar_int0, SYSREG_RAR_INT0
614 .set rar_int1, SYSREG_RAR_INT1
615 .set rar_int2, SYSREG_RAR_INT2
616 .set rar_int3, SYSREG_RAR_INT3
617
618 .macro IRQ_LEVEL level
619 .type irq_level\level, @function
620irq_level\level:
621 sub sp, 4 /* r12_orig */
622 stmts --sp,r0-lr
623 mfsr r8, rar_int\level
624 mfsr r9, rsr_int\level
625 pushm r8-r9
626
627 mov r11, sp
628 mov r12, \level
629
630 rcall do_IRQ
631
632 lddsp r4, sp[REG_SR]
633 andh r4, (MODE_MASK >> 16), COH
634#ifdef CONFIG_PREEMPT
635 brne 2f
636#else
637 brne 1f
638#endif
639
640 get_thread_info r0
641 ld.w r1, r0[TI_flags]
642 andl r1, _TIF_WORK_MASK, COH
643 brne irq_exit_work
644
6451: popm r8-r9
646 mtsr rar_int\level, r8
647 mtsr rsr_int\level, r9
648 ldmts sp++,r0-lr
649 sub sp, -4 /* ignore r12_orig */
650 rete
651
652#ifdef CONFIG_PREEMPT
6532:
654 get_thread_info r0
655 ld.w r2, r0[TI_preempt_count]
656 cp.w r2, 0
657 brne 1b
658 ld.w r1, r0[TI_flags]
659 bld r1, TIF_NEED_RESCHED
660 brcc 1b
661 lddsp r4, sp[REG_SR]
662 bld r4, SYSREG_GM_OFFSET
663 brcs 1b
664 rcall preempt_schedule_irq
665 rjmp 1b
666#endif
667 .endm
668
669 .section .irq.text,"ax",@progbits
670
671 .global irq_level0
672 .global irq_level1
673 .global irq_level2
674 .global irq_level3
675 IRQ_LEVEL 0
676 IRQ_LEVEL 1
677 IRQ_LEVEL 2
678 IRQ_LEVEL 3
diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S
new file mode 100644
index 000000000000..773b7ad87be9
--- /dev/null
+++ b/arch/avr32/kernel/head.S
@@ -0,0 +1,45 @@
1/*
2 * Non-board-specific low-level startup code
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/linkage.h>
11
12#include <asm/page.h>
13#include <asm/thread_info.h>
14#include <asm/sysreg.h>
15
16 .section .init.text,"ax"
17 .global kernel_entry
18kernel_entry:
19 /* Initialize status register */
20 lddpc r0, init_sr
21 mtsr SYSREG_SR, r0
22
23 /* Set initial stack pointer */
24 lddpc sp, stack_addr
25 sub sp, -THREAD_SIZE
26
27#ifdef CONFIG_FRAME_POINTER
28 /* Mark last stack frame */
29 mov lr, 0
30 mov r7, 0
31#endif
32
33 /* Set up the PIO, SDRAM controller, early printk, etc. */
34 rcall board_early_init
35
36 /* Start the show */
37 lddpc pc, kernel_start_addr
38
39 .align 2
40init_sr:
41 .long 0x007f0000 /* Supervisor mode, everything masked */
42stack_addr:
43 .long init_thread_union
44kernel_start_addr:
45 .long start_kernel
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
new file mode 100644
index 000000000000..effcacf9d1a2
--- /dev/null
+++ b/arch/avr32/kernel/init_task.c
@@ -0,0 +1,38 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/sched.h>
11#include <linux/init_task.h>
12#include <linux/mqueue.h>
13
14#include <asm/pgtable.h>
15
16static struct fs_struct init_fs = INIT_FS;
17static struct files_struct init_files = INIT_FILES;
18static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
19static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
20struct mm_struct init_mm = INIT_MM(init_mm);
21
22EXPORT_SYMBOL(init_mm);
23
24/*
25 * Initial thread structure. Must be aligned on an 8192-byte boundary.
26 */
27union thread_union init_thread_union
28 __attribute__((__section__(".data.init_task"))) =
29 { INIT_THREAD_INFO(init_task) };
30
31/*
32 * Initial task structure.
33 *
34 * All other task structs will be allocated on slabs in fork.c
35 */
36struct task_struct init_task = INIT_TASK(init_task);
37
38EXPORT_SYMBOL(init_task);
diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c
new file mode 100644
index 000000000000..856f3548e664
--- /dev/null
+++ b/arch/avr32/kernel/irq.c
@@ -0,0 +1,71 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * Based on arch/i386/kernel/irq.c
5 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This file contains the code used by various IRQ handling routines:
12 * asking for different IRQ's should be done through these routines
13 * instead of just grabbing them. Thus setups with different IRQ numbers
14 * shouldn't result in any weird surprises, and installing new handlers
15 * should be easier.
16 *
17 * IRQ's are in fact implemented a bit like signal handlers for the kernel.
18 * Naturally it's not a 1:1 relation, but there are similarities.
19 */
20
21#include <linux/interrupt.h>
22#include <linux/irq.h>
23#include <linux/kernel_stat.h>
24#include <linux/proc_fs.h>
25#include <linux/seq_file.h>
26#include <linux/sysdev.h>
27
28/*
29 * 'what should we do if we get a hw irq event on an illegal vector'.
30 * each architecture has to answer this themselves.
31 */
32void ack_bad_irq(unsigned int irq)
33{
34 printk("unexpected IRQ %u\n", irq);
35}
36
37#ifdef CONFIG_PROC_FS
38int show_interrupts(struct seq_file *p, void *v)
39{
40 int i = *(loff_t *)v, cpu;
41 struct irqaction *action;
42 unsigned long flags;
43
44 if (i == 0) {
45 seq_puts(p, " ");
46 for_each_online_cpu(cpu)
47 seq_printf(p, "CPU%d ", cpu);
48 seq_putc(p, '\n');
49 }
50
51 if (i < NR_IRQS) {
52 spin_lock_irqsave(&irq_desc[i].lock, flags);
53 action = irq_desc[i].action;
54 if (!action)
55 goto unlock;
56
57 seq_printf(p, "%3d: ", i);
58 for_each_online_cpu(cpu)
59 seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
60 seq_printf(p, " %s", action->name);
61 for (action = action->next; action; action = action->next)
62 seq_printf(p, ", %s", action->name);
63
64 seq_putc(p, '\n');
65 unlock:
66 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
67 }
68
69 return 0;
70}
71#endif
diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c
new file mode 100644
index 000000000000..6caf9e8d8080
--- /dev/null
+++ b/arch/avr32/kernel/kprobes.c
@@ -0,0 +1,270 @@
1/*
2 * Kernel Probes (KProbes)
3 *
4 * Copyright (C) 2005-2006 Atmel Corporation
5 *
6 * Based on arch/ppc64/kernel/kprobes.c
7 * Copyright (C) IBM Corporation, 2002, 2004
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/kprobes.h>
15#include <linux/ptrace.h>
16
17#include <asm/cacheflush.h>
18#include <asm/kdebug.h>
19#include <asm/ocd.h>
20
21DEFINE_PER_CPU(struct kprobe *, current_kprobe);
22static unsigned long kprobe_status;
23static struct pt_regs jprobe_saved_regs;
24
25int __kprobes arch_prepare_kprobe(struct kprobe *p)
26{
27 int ret = 0;
28
29 if ((unsigned long)p->addr & 0x01) {
30 printk("Attempt to register kprobe at an unaligned address\n");
31 ret = -EINVAL;
32 }
33
34 /* XXX: Might be a good idea to check if p->addr is a valid
35 * kernel address as well... */
36
37 if (!ret) {
38 pr_debug("copy kprobe at %p\n", p->addr);
39 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
40 p->opcode = *p->addr;
41 }
42
43 return ret;
44}
45
46void __kprobes arch_arm_kprobe(struct kprobe *p)
47{
48 pr_debug("arming kprobe at %p\n", p->addr);
49 *p->addr = BREAKPOINT_INSTRUCTION;
50 flush_icache_range((unsigned long)p->addr,
51 (unsigned long)p->addr + sizeof(kprobe_opcode_t));
52}
53
54void __kprobes arch_disarm_kprobe(struct kprobe *p)
55{
56 pr_debug("disarming kprobe at %p\n", p->addr);
57 *p->addr = p->opcode;
58 flush_icache_range((unsigned long)p->addr,
59 (unsigned long)p->addr + sizeof(kprobe_opcode_t));
60}
61
62static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
63{
64 unsigned long dc;
65
66 pr_debug("preparing to singlestep over %p (PC=%08lx)\n",
67 p->addr, regs->pc);
68
69 BUG_ON(!(sysreg_read(SR) & SYSREG_BIT(SR_D)));
70
71 dc = __mfdr(DBGREG_DC);
72 dc |= DC_SS;
73 __mtdr(DBGREG_DC, dc);
74
75 /*
76 * We must run the instruction from its original location
77 * since it may actually reference PC.
78 *
79 * TODO: Do the instruction replacement directly in icache.
80 */
81 *p->addr = p->opcode;
82 flush_icache_range((unsigned long)p->addr,
83 (unsigned long)p->addr + sizeof(kprobe_opcode_t));
84}
85
86static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
87{
88 unsigned long dc;
89
90 pr_debug("resuming execution at PC=%08lx\n", regs->pc);
91
92 dc = __mfdr(DBGREG_DC);
93 dc &= ~DC_SS;
94 __mtdr(DBGREG_DC, dc);
95
96 *p->addr = BREAKPOINT_INSTRUCTION;
97 flush_icache_range((unsigned long)p->addr,
98 (unsigned long)p->addr + sizeof(kprobe_opcode_t));
99}
100
101static void __kprobes set_current_kprobe(struct kprobe *p)
102{
103 __get_cpu_var(current_kprobe) = p;
104}
105
106static int __kprobes kprobe_handler(struct pt_regs *regs)
107{
108 struct kprobe *p;
109 void *addr = (void *)regs->pc;
110 int ret = 0;
111
112 pr_debug("kprobe_handler: kprobe_running=%d\n",
113 kprobe_running());
114
115 /*
116 * We don't want to be preempted for the entire
117 * duration of kprobe processing
118 */
119 preempt_disable();
120
121 /* Check that we're not recursing */
122 if (kprobe_running()) {
123 p = get_kprobe(addr);
124 if (p) {
125 if (kprobe_status == KPROBE_HIT_SS) {
126 printk("FIXME: kprobe hit while single-stepping!\n");
127 goto no_kprobe;
128 }
129
130 printk("FIXME: kprobe hit while handling another kprobe\n");
131 goto no_kprobe;
132 } else {
133 p = kprobe_running();
134 if (p->break_handler && p->break_handler(p, regs))
135 goto ss_probe;
136 }
137 /* If it's not ours, can't be delete race, (we hold lock). */
138 goto no_kprobe;
139 }
140
141 p = get_kprobe(addr);
142 if (!p)
143 goto no_kprobe;
144
145 kprobe_status = KPROBE_HIT_ACTIVE;
146 set_current_kprobe(p);
147 if (p->pre_handler && p->pre_handler(p, regs))
148 /* handler has already set things up, so skip ss setup */
149 return 1;
150
151ss_probe:
152 prepare_singlestep(p, regs);
153 kprobe_status = KPROBE_HIT_SS;
154 return 1;
155
156no_kprobe:
157 return ret;
158}
159
160static int __kprobes post_kprobe_handler(struct pt_regs *regs)
161{
162 struct kprobe *cur = kprobe_running();
163
164 pr_debug("post_kprobe_handler, cur=%p\n", cur);
165
166 if (!cur)
167 return 0;
168
169 if (cur->post_handler) {
170 kprobe_status = KPROBE_HIT_SSDONE;
171 cur->post_handler(cur, regs, 0);
172 }
173
174 resume_execution(cur, regs);
175 reset_current_kprobe();
176 preempt_enable_no_resched();
177
178 return 1;
179}
180
181static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
182{
183 struct kprobe *cur = kprobe_running();
184
185 pr_debug("kprobe_fault_handler: trapnr=%d\n", trapnr);
186
187 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
188 return 1;
189
190 if (kprobe_status & KPROBE_HIT_SS) {
191 resume_execution(cur, regs);
192 preempt_enable_no_resched();
193 }
194 return 0;
195}
196
197/*
198 * Wrapper routine to for handling exceptions.
199 */
200int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
201 unsigned long val, void *data)
202{
203 struct die_args *args = (struct die_args *)data;
204 int ret = NOTIFY_DONE;
205
206 pr_debug("kprobe_exceptions_notify: val=%lu, data=%p\n",
207 val, data);
208
209 switch (val) {
210 case DIE_BREAKPOINT:
211 if (kprobe_handler(args->regs))
212 ret = NOTIFY_STOP;
213 break;
214 case DIE_SSTEP:
215 if (post_kprobe_handler(args->regs))
216 ret = NOTIFY_STOP;
217 break;
218 case DIE_FAULT:
219 if (kprobe_running()
220 && kprobe_fault_handler(args->regs, args->trapnr))
221 ret = NOTIFY_STOP;
222 break;
223 default:
224 break;
225 }
226
227 return ret;
228}
229
230int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
231{
232 struct jprobe *jp = container_of(p, struct jprobe, kp);
233
234 memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs));
235
236 /*
237 * TODO: We should probably save some of the stack here as
238 * well, since gcc may pass arguments on the stack for certain
239 * functions (lots of arguments, large aggregates, varargs)
240 */
241
242 /* setup return addr to the jprobe handler routine */
243 regs->pc = (unsigned long)jp->entry;
244 return 1;
245}
246
247void __kprobes jprobe_return(void)
248{
249 asm volatile("breakpoint" ::: "memory");
250}
251
252int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
253{
254 /*
255 * FIXME - we should ideally be validating that we got here 'cos
256 * of the "trap" in jprobe_return() above, before restoring the
257 * saved regs...
258 */
259 memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
260 return 1;
261}
262
263int __init arch_init_kprobes(void)
264{
265 printk("KPROBES: Enabling monitor mode (MM|DBE)...\n");
266 __mtdr(DBGREG_DC, DC_MM | DC_DBE);
267
268 /* TODO: Register kretprobe trampoline */
269 return 0;
270}
diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
new file mode 100644
index 000000000000..dfc32f2817b6
--- /dev/null
+++ b/arch/avr32/kernel/module.c
@@ -0,0 +1,324 @@
1/*
2 * AVR32-specific kernel module loader
3 *
4 * Copyright (C) 2005-2006 Atmel Corporation
5 *
6 * GOT initialization parts are based on the s390 version
7 * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH,
8 * IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/moduleloader.h>
16#include <linux/module.h>
17#include <linux/kernel.h>
18#include <linux/elf.h>
19#include <linux/vmalloc.h>
20
21void *module_alloc(unsigned long size)
22{
23 if (size == 0)
24 return NULL;
25 return vmalloc(size);
26}
27
28void module_free(struct module *mod, void *module_region)
29{
30 vfree(mod->arch.syminfo);
31 mod->arch.syminfo = NULL;
32
33 vfree(module_region);
34 /* FIXME: if module_region == mod->init_region, trim exception
35 * table entries. */
36}
37
38static inline int check_rela(Elf32_Rela *rela, struct module *module,
39 char *strings, Elf32_Sym *symbols)
40{
41 struct mod_arch_syminfo *info;
42
43 info = module->arch.syminfo + ELF32_R_SYM(rela->r_info);
44 switch (ELF32_R_TYPE(rela->r_info)) {
45 case R_AVR32_GOT32:
46 case R_AVR32_GOT16:
47 case R_AVR32_GOT8:
48 case R_AVR32_GOT21S:
49 case R_AVR32_GOT18SW: /* mcall */
50 case R_AVR32_GOT16S: /* ld.w */
51 if (rela->r_addend != 0) {
52 printk(KERN_ERR
53 "GOT relocation against %s at offset %u with addend\n",
54 strings + symbols[ELF32_R_SYM(rela->r_info)].st_name,
55 rela->r_offset);
56 return -ENOEXEC;
57 }
58 if (info->got_offset == -1UL) {
59 info->got_offset = module->arch.got_size;
60 module->arch.got_size += sizeof(void *);
61 }
62 pr_debug("GOT[%3lu] %s\n", info->got_offset,
63 strings + symbols[ELF32_R_SYM(rela->r_info)].st_name);
64 break;
65 }
66
67 return 0;
68}
69
70int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
71 char *secstrings, struct module *module)
72{
73 Elf32_Shdr *symtab;
74 Elf32_Sym *symbols;
75 Elf32_Rela *rela;
76 char *strings;
77 int nrela, i, j;
78 int ret;
79
80 /* Find the symbol table */
81 symtab = NULL;
82 for (i = 0; i < hdr->e_shnum; i++)
83 switch (sechdrs[i].sh_type) {
84 case SHT_SYMTAB:
85 symtab = &sechdrs[i];
86 break;
87 }
88 if (!symtab) {
89 printk(KERN_ERR "module %s: no symbol table\n", module->name);
90 return -ENOEXEC;
91 }
92
93 /* Allocate room for one syminfo structure per symbol. */
94 module->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
95 module->arch.syminfo = vmalloc(module->arch.nsyms
96 * sizeof(struct mod_arch_syminfo));
97 if (!module->arch.syminfo)
98 return -ENOMEM;
99
100 symbols = (void *)hdr + symtab->sh_offset;
101 strings = (void *)hdr + sechdrs[symtab->sh_link].sh_offset;
102 for (i = 0; i < module->arch.nsyms; i++) {
103 if (symbols[i].st_shndx == SHN_UNDEF &&
104 strcmp(strings + symbols[i].st_name,
105 "_GLOBAL_OFFSET_TABLE_") == 0)
106 /* "Define" it as absolute. */
107 symbols[i].st_shndx = SHN_ABS;
108 module->arch.syminfo[i].got_offset = -1UL;
109 module->arch.syminfo[i].got_initialized = 0;
110 }
111
112 /* Allocate GOT entries for symbols that need it. */
113 module->arch.got_size = 0;
114 for (i = 0; i < hdr->e_shnum; i++) {
115 if (sechdrs[i].sh_type != SHT_RELA)
116 continue;
117 nrela = sechdrs[i].sh_size / sizeof(Elf32_Rela);
118 rela = (void *)hdr + sechdrs[i].sh_offset;
119 for (j = 0; j < nrela; j++) {
120 ret = check_rela(rela + j, module,
121 strings, symbols);
122 if (ret)
123 goto out_free_syminfo;
124 }
125 }
126
127 /*
128 * Increase core size to make room for GOT and set start
129 * offset for GOT.
130 */
131 module->core_size = ALIGN(module->core_size, 4);
132 module->arch.got_offset = module->core_size;
133 module->core_size += module->arch.got_size;
134
135 return 0;
136
137out_free_syminfo:
138 vfree(module->arch.syminfo);
139 module->arch.syminfo = NULL;
140
141 return ret;
142}
143
144static inline int reloc_overflow(struct module *module, const char *reloc_name,
145 Elf32_Addr relocation)
146{
147 printk(KERN_ERR "module %s: Value %lx does not fit relocation %s\n",
148 module->name, (unsigned long)relocation, reloc_name);
149 return -ENOEXEC;
150}
151
152#define get_u16(loc) (*((uint16_t *)loc))
153#define put_u16(loc, val) (*((uint16_t *)loc) = (val))
154
155int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
156 unsigned int symindex, unsigned int relindex,
157 struct module *module)
158{
159 Elf32_Shdr *symsec = sechdrs + symindex;
160 Elf32_Shdr *relsec = sechdrs + relindex;
161 Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
162 Elf32_Rela *rel = (void *)relsec->sh_addr;
163 unsigned int i;
164 int ret = 0;
165
166 for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rela); i++, rel++) {
167 struct mod_arch_syminfo *info;
168 Elf32_Sym *sym;
169 Elf32_Addr relocation;
170 uint32_t *location;
171 uint32_t value;
172
173 location = (void *)dstsec->sh_addr + rel->r_offset;
174 sym = (Elf32_Sym *)symsec->sh_addr + ELF32_R_SYM(rel->r_info);
175 relocation = sym->st_value + rel->r_addend;
176
177 info = module->arch.syminfo + ELF32_R_SYM(rel->r_info);
178
179 /* Initialize GOT entry if necessary */
180 switch (ELF32_R_TYPE(rel->r_info)) {
181 case R_AVR32_GOT32:
182 case R_AVR32_GOT16:
183 case R_AVR32_GOT8:
184 case R_AVR32_GOT21S:
185 case R_AVR32_GOT18SW:
186 case R_AVR32_GOT16S:
187 if (!info->got_initialized) {
188 Elf32_Addr *gotent;
189
190 gotent = (module->module_core
191 + module->arch.got_offset
192 + info->got_offset);
193 *gotent = relocation;
194 info->got_initialized = 1;
195 }
196
197 relocation = info->got_offset;
198 break;
199 }
200
201 switch (ELF32_R_TYPE(rel->r_info)) {
202 case R_AVR32_32:
203 case R_AVR32_32_CPENT:
204 *location = relocation;
205 break;
206 case R_AVR32_22H_PCREL:
207 relocation -= (Elf32_Addr)location;
208 if ((relocation & 0xffe00001) != 0
209 && (relocation & 0xffc00001) != 0xffc00000)
210 return reloc_overflow(module,
211 "R_AVR32_22H_PCREL",
212 relocation);
213 relocation >>= 1;
214
215 value = *location;
216 value = ((value & 0xe1ef0000)
217 | (relocation & 0xffff)
218 | ((relocation & 0x10000) << 4)
219 | ((relocation & 0x1e0000) << 8));
220 *location = value;
221 break;
222 case R_AVR32_11H_PCREL:
223 relocation -= (Elf32_Addr)location;
224 if ((relocation & 0xfffffc01) != 0
225 && (relocation & 0xfffff801) != 0xfffff800)
226 return reloc_overflow(module,
227 "R_AVR32_11H_PCREL",
228 relocation);
229 value = get_u16(location);
230 value = ((value & 0xf00c)
231 | ((relocation & 0x1fe) << 3)
232 | ((relocation & 0x600) >> 9));
233 put_u16(location, value);
234 break;
235 case R_AVR32_9H_PCREL:
236 relocation -= (Elf32_Addr)location;
237 if ((relocation & 0xffffff01) != 0
238 && (relocation & 0xfffffe01) != 0xfffffe00)
239 return reloc_overflow(module,
240 "R_AVR32_9H_PCREL",
241 relocation);
242 value = get_u16(location);
243 value = ((value & 0xf00f)
244 | ((relocation & 0x1fe) << 3));
245 put_u16(location, value);
246 break;
247 case R_AVR32_9UW_PCREL:
248 relocation -= ((Elf32_Addr)location) & 0xfffffffc;
249 if ((relocation & 0xfffffc03) != 0)
250 return reloc_overflow(module,
251 "R_AVR32_9UW_PCREL",
252 relocation);
253 value = get_u16(location);
254 value = ((value & 0xf80f)
255 | ((relocation & 0x1fc) << 2));
256 put_u16(location, value);
257 break;
258 case R_AVR32_GOTPC:
259 /*
260 * R6 = PC - (PC - GOT)
261 *
262 * At this point, relocation contains the
263 * value of PC. Just subtract the value of
264 * GOT, and we're done.
265 */
266 pr_debug("GOTPC: PC=0x%lx, got_offset=0x%lx, core=0x%p\n",
267 relocation, module->arch.got_offset,
268 module->module_core);
269 relocation -= ((unsigned long)module->module_core
270 + module->arch.got_offset);
271 *location = relocation;
272 break;
273 case R_AVR32_GOT18SW:
274 if ((relocation & 0xfffe0003) != 0
275 && (relocation & 0xfffc0003) != 0xffff0000)
276 return reloc_overflow(module, "R_AVR32_GOT18SW",
277 relocation);
278 relocation >>= 2;
279 /* fall through */
280 case R_AVR32_GOT16S:
281 if ((relocation & 0xffff8000) != 0
282 && (relocation & 0xffff0000) != 0xffff0000)
283 return reloc_overflow(module, "R_AVR32_GOT16S",
284 relocation);
285 pr_debug("GOT reloc @ 0x%lx -> %lu\n",
286 rel->r_offset, relocation);
287 value = *location;
288 value = ((value & 0xffff0000)
289 | (relocation & 0xffff));
290 *location = value;
291 break;
292
293 default:
294 printk(KERN_ERR "module %s: Unknown relocation: %u\n",
295 module->name, ELF32_R_TYPE(rel->r_info));
296 return -ENOEXEC;
297 }
298 }
299
300 return ret;
301}
302
303int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab,
304 unsigned int symindex, unsigned int relindex,
305 struct module *module)
306{
307 printk(KERN_ERR "module %s: REL relocations are not supported\n",
308 module->name);
309 return -ENOEXEC;
310}
311
312int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
313 struct module *module)
314{
315 vfree(module->arch.syminfo);
316 module->arch.syminfo = NULL;
317
318 return 0;
319}
320
321void module_arch_cleanup(struct module *module)
322{
323
324}
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
new file mode 100644
index 000000000000..317dc50945f2
--- /dev/null
+++ b/arch/avr32/kernel/process.c
@@ -0,0 +1,276 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/sched.h>
9#include <linux/module.h>
10#include <linux/kallsyms.h>
11#include <linux/fs.h>
12#include <linux/ptrace.h>
13#include <linux/reboot.h>
14#include <linux/unistd.h>
15
16#include <asm/sysreg.h>
17#include <asm/ocd.h>
18
19void (*pm_power_off)(void) = NULL;
20EXPORT_SYMBOL(pm_power_off);
21
22/*
23 * This file handles the architecture-dependent parts of process handling..
24 */
25
26void cpu_idle(void)
27{
28 /* endless idle loop with no priority at all */
29 while (1) {
30 /* TODO: Enter sleep mode */
31 while (!need_resched())
32 cpu_relax();
33 preempt_enable_no_resched();
34 schedule();
35 preempt_disable();
36 }
37}
38
39void machine_halt(void)
40{
41}
42
43void machine_power_off(void)
44{
45}
46
47void machine_restart(char *cmd)
48{
49 __mtdr(DBGREG_DC, DC_DBE);
50 __mtdr(DBGREG_DC, DC_RES);
51 while (1) ;
52}
53
54/*
55 * PC is actually discarded when returning from a system call -- the
56 * return address must be stored in LR. This function will make sure
57 * LR points to do_exit before starting the thread.
58 *
59 * Also, when returning from fork(), r12 is 0, so we must copy the
60 * argument as well.
61 *
62 * r0 : The argument to the main thread function
63 * r1 : The address of do_exit
64 * r2 : The address of the main thread function
65 */
66asmlinkage extern void kernel_thread_helper(void);
67__asm__(" .type kernel_thread_helper, @function\n"
68 "kernel_thread_helper:\n"
69 " mov r12, r0\n"
70 " mov lr, r2\n"
71 " mov pc, r1\n"
72 " .size kernel_thread_helper, . - kernel_thread_helper");
73
74int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
75{
76 struct pt_regs regs;
77
78 memset(&regs, 0, sizeof(regs));
79
80 regs.r0 = (unsigned long)arg;
81 regs.r1 = (unsigned long)fn;
82 regs.r2 = (unsigned long)do_exit;
83 regs.lr = (unsigned long)kernel_thread_helper;
84 regs.pc = (unsigned long)kernel_thread_helper;
85 regs.sr = MODE_SUPERVISOR;
86
87 return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
88 0, &regs, 0, NULL, NULL);
89}
90EXPORT_SYMBOL(kernel_thread);
91
92/*
93 * Free current thread data structures etc
94 */
95void exit_thread(void)
96{
97 /* nothing to do */
98}
99
100void flush_thread(void)
101{
102 /* nothing to do */
103}
104
105void release_thread(struct task_struct *dead_task)
106{
107 /* do nothing */
108}
109
110static const char *cpu_modes[] = {
111 "Application", "Supervisor", "Interrupt level 0", "Interrupt level 1",
112 "Interrupt level 2", "Interrupt level 3", "Exception", "NMI"
113};
114
115void show_regs(struct pt_regs *regs)
116{
117 unsigned long sp = regs->sp;
118 unsigned long lr = regs->lr;
119 unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT;
120
121 if (!user_mode(regs))
122 sp = (unsigned long)regs + FRAME_SIZE_FULL;
123
124 print_symbol("PC is at %s\n", instruction_pointer(regs));
125 print_symbol("LR is at %s\n", lr);
126 printk("pc : [<%08lx>] lr : [<%08lx>] %s\n"
127 "sp : %08lx r12: %08lx r11: %08lx\n",
128 instruction_pointer(regs),
129 lr, print_tainted(), sp, regs->r12, regs->r11);
130 printk("r10: %08lx r9 : %08lx r8 : %08lx\n",
131 regs->r10, regs->r9, regs->r8);
132 printk("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n",
133 regs->r7, regs->r6, regs->r5, regs->r4);
134 printk("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n",
135 regs->r3, regs->r2, regs->r1, regs->r0);
136 printk("Flags: %c%c%c%c%c\n",
137 regs->sr & SR_Q ? 'Q' : 'q',
138 regs->sr & SR_V ? 'V' : 'v',
139 regs->sr & SR_N ? 'N' : 'n',
140 regs->sr & SR_Z ? 'Z' : 'z',
141 regs->sr & SR_C ? 'C' : 'c');
142 printk("Mode bits: %c%c%c%c%c%c%c%c%c\n",
143 regs->sr & SR_H ? 'H' : 'h',
144 regs->sr & SR_R ? 'R' : 'r',
145 regs->sr & SR_J ? 'J' : 'j',
146 regs->sr & SR_EM ? 'E' : 'e',
147 regs->sr & SR_I3M ? '3' : '.',
148 regs->sr & SR_I2M ? '2' : '.',
149 regs->sr & SR_I1M ? '1' : '.',
150 regs->sr & SR_I0M ? '0' : '.',
151 regs->sr & SR_GM ? 'G' : 'g');
152 printk("CPU Mode: %s\n", cpu_modes[mode]);
153
154 show_trace(NULL, (unsigned long *)sp, regs);
155}
156EXPORT_SYMBOL(show_regs);
157
158/* Fill in the fpu structure for a core dump. This is easy -- we don't have any */
159int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
160{
161 /* Not valid */
162 return 0;
163}
164
165asmlinkage void ret_from_fork(void);
166
167int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
168 unsigned long unused,
169 struct task_struct *p, struct pt_regs *regs)
170{
171 struct pt_regs *childregs;
172
173 childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)p->thread_info)) - 1;
174 *childregs = *regs;
175
176 if (user_mode(regs))
177 childregs->sp = usp;
178 else
179 childregs->sp = (unsigned long)p->thread_info + THREAD_SIZE;
180
181 childregs->r12 = 0; /* Set return value for child */
182
183 p->thread.cpu_context.sr = MODE_SUPERVISOR | SR_GM;
184 p->thread.cpu_context.ksp = (unsigned long)childregs;
185 p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
186
187 return 0;
188}
189
190/* r12-r8 are dummy parameters to force the compiler to use the stack */
191asmlinkage int sys_fork(struct pt_regs *regs)
192{
193 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
194}
195
196asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
197 unsigned long parent_tidptr,
198 unsigned long child_tidptr, struct pt_regs *regs)
199{
200 if (!newsp)
201 newsp = regs->sp;
202 return do_fork(clone_flags, newsp, regs, 0,
203 (int __user *)parent_tidptr,
204 (int __user *)child_tidptr);
205}
206
207asmlinkage int sys_vfork(struct pt_regs *regs)
208{
209 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs,
210 0, NULL, NULL);
211}
212
213asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
214 char __user *__user *uenvp, struct pt_regs *regs)
215{
216 int error;
217 char *filename;
218
219 filename = getname(ufilename);
220 error = PTR_ERR(filename);
221 if (IS_ERR(filename))
222 goto out;
223
224 error = do_execve(filename, uargv, uenvp, regs);
225 if (error == 0)
226 current->ptrace &= ~PT_DTRACE;
227 putname(filename);
228
229out:
230 return error;
231}
232
233
234/*
235 * This function is supposed to answer the question "who called
236 * schedule()?"
237 */
238unsigned long get_wchan(struct task_struct *p)
239{
240 unsigned long pc;
241 unsigned long stack_page;
242
243 if (!p || p == current || p->state == TASK_RUNNING)
244 return 0;
245
246 stack_page = (unsigned long)p->thread_info;
247 BUG_ON(!stack_page);
248
249 /*
250 * The stored value of PC is either the address right after
251 * the call to __switch_to() or ret_from_fork.
252 */
253 pc = thread_saved_pc(p);
254 if (in_sched_functions(pc)) {
255#ifdef CONFIG_FRAME_POINTER
256 unsigned long fp = p->thread.cpu_context.r7;
257 BUG_ON(fp < stack_page || fp > (THREAD_SIZE + stack_page));
258 pc = *(unsigned long *)fp;
259#else
260 /*
261 * We depend on the frame size of schedule here, which
262 * is actually quite ugly. It might be possible to
263 * determine the frame size automatically at build
264 * time by doing this:
265 * - compile sched.c
266 * - disassemble the resulting sched.o
267 * - look for 'sub sp,??' shortly after '<schedule>:'
268 */
269 unsigned long sp = p->thread.cpu_context.ksp + 16;
270 BUG_ON(sp < stack_page || sp > (THREAD_SIZE + stack_page));
271 pc = *(unsigned long *)sp;
272#endif
273 }
274
275 return pc;
276}
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
new file mode 100644
index 000000000000..3c89e59029ab
--- /dev/null
+++ b/arch/avr32/kernel/ptrace.c
@@ -0,0 +1,371 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#undef DEBUG
9#include <linux/kernel.h>
10#include <linux/sched.h>
11#include <linux/mm.h>
12#include <linux/smp_lock.h>
13#include <linux/ptrace.h>
14#include <linux/errno.h>
15#include <linux/user.h>
16#include <linux/security.h>
17#include <linux/unistd.h>
18#include <linux/notifier.h>
19
20#include <asm/traps.h>
21#include <asm/uaccess.h>
22#include <asm/ocd.h>
23#include <asm/mmu_context.h>
24#include <asm/kdebug.h>
25
26static struct pt_regs *get_user_regs(struct task_struct *tsk)
27{
28 return (struct pt_regs *)((unsigned long) tsk->thread_info +
29 THREAD_SIZE - sizeof(struct pt_regs));
30}
31
32static void ptrace_single_step(struct task_struct *tsk)
33{
34 pr_debug("ptrace_single_step: pid=%u, SR=0x%08lx\n",
35 tsk->pid, tsk->thread.cpu_context.sr);
36 if (!(tsk->thread.cpu_context.sr & SR_D)) {
37 /*
38 * Set a breakpoint at the current pc to force the
39 * process into debug mode. The syscall/exception
40 * exit code will set a breakpoint at the return
41 * address when this flag is set.
42 */
43 pr_debug("ptrace_single_step: Setting TIF_BREAKPOINT\n");
44 set_tsk_thread_flag(tsk, TIF_BREAKPOINT);
45 }
46
47 /* The monitor code will do the actual step for us */
48 set_tsk_thread_flag(tsk, TIF_SINGLE_STEP);
49}
50
51/*
52 * Called by kernel/ptrace.c when detaching
53 *
54 * Make sure any single step bits, etc. are not set
55 */
56void ptrace_disable(struct task_struct *child)
57{
58 clear_tsk_thread_flag(child, TIF_SINGLE_STEP);
59}
60
61/*
62 * Handle hitting a breakpoint
63 */
64static void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
65{
66 siginfo_t info;
67
68 info.si_signo = SIGTRAP;
69 info.si_errno = 0;
70 info.si_code = TRAP_BRKPT;
71 info.si_addr = (void __user *)instruction_pointer(regs);
72
73 pr_debug("ptrace_break: Sending SIGTRAP to PID %u (pc = 0x%p)\n",
74 tsk->pid, info.si_addr);
75 force_sig_info(SIGTRAP, &info, tsk);
76}
77
78/*
79 * Read the word at offset "offset" into the task's "struct user". We
80 * actually access the pt_regs struct stored on the kernel stack.
81 */
82static int ptrace_read_user(struct task_struct *tsk, unsigned long offset,
83 unsigned long __user *data)
84{
85 unsigned long *regs;
86 unsigned long value;
87
88 pr_debug("ptrace_read_user(%p, %#lx, %p)\n",
89 tsk, offset, data);
90
91 if (offset & 3 || offset >= sizeof(struct user)) {
92 printk("ptrace_read_user: invalid offset 0x%08lx\n", offset);
93 return -EIO;
94 }
95
96 regs = (unsigned long *)get_user_regs(tsk);
97
98 value = 0;
99 if (offset < sizeof(struct pt_regs))
100 value = regs[offset / sizeof(regs[0])];
101
102 return put_user(value, data);
103}
104
105/*
106 * Write the word "value" to offset "offset" into the task's "struct
107 * user". We actually access the pt_regs struct stored on the kernel
108 * stack.
109 */
110static int ptrace_write_user(struct task_struct *tsk, unsigned long offset,
111 unsigned long value)
112{
113 unsigned long *regs;
114
115 if (offset & 3 || offset >= sizeof(struct user)) {
116 printk("ptrace_write_user: invalid offset 0x%08lx\n", offset);
117 return -EIO;
118 }
119
120 if (offset >= sizeof(struct pt_regs))
121 return 0;
122
123 regs = (unsigned long *)get_user_regs(tsk);
124 regs[offset / sizeof(regs[0])] = value;
125
126 return 0;
127}
128
129static int ptrace_getregs(struct task_struct *tsk, void __user *uregs)
130{
131 struct pt_regs *regs = get_user_regs(tsk);
132
133 return copy_to_user(uregs, regs, sizeof(*regs)) ? -EFAULT : 0;
134}
135
136static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs)
137{
138 struct pt_regs newregs;
139 int ret;
140
141 ret = -EFAULT;
142 if (copy_from_user(&newregs, uregs, sizeof(newregs)) == 0) {
143 struct pt_regs *regs = get_user_regs(tsk);
144
145 ret = -EINVAL;
146 if (valid_user_regs(&newregs)) {
147 *regs = newregs;
148 ret = 0;
149 }
150 }
151
152 return ret;
153}
154
155long arch_ptrace(struct task_struct *child, long request, long addr, long data)
156{
157 unsigned long tmp;
158 int ret;
159
160 pr_debug("arch_ptrace(%ld, %ld, %#lx, %#lx)\n",
161 request, child->pid, addr, data);
162
163 pr_debug("ptrace: Enabling monitor mode...\n");
164 __mtdr(DBGREG_DC, __mfdr(DBGREG_DC) | DC_MM | DC_DBE);
165
166 switch (request) {
167 /* Read the word at location addr in the child process */
168 case PTRACE_PEEKTEXT:
169 case PTRACE_PEEKDATA:
170 ret = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
171 if (ret == sizeof(tmp))
172 ret = put_user(tmp, (unsigned long __user *)data);
173 else
174 ret = -EIO;
175 break;
176
177 case PTRACE_PEEKUSR:
178 ret = ptrace_read_user(child, addr,
179 (unsigned long __user *)data);
180 break;
181
182 /* Write the word in data at location addr */
183 case PTRACE_POKETEXT:
184 case PTRACE_POKEDATA:
185 ret = access_process_vm(child, addr, &data, sizeof(data), 1);
186 if (ret == sizeof(data))
187 ret = 0;
188 else
189 ret = -EIO;
190 break;
191
192 case PTRACE_POKEUSR:
193 ret = ptrace_write_user(child, addr, data);
194 break;
195
196 /* continue and stop at next (return from) syscall */
197 case PTRACE_SYSCALL:
198 /* restart after signal */
199 case PTRACE_CONT:
200 ret = -EIO;
201 if (!valid_signal(data))
202 break;
203 if (request == PTRACE_SYSCALL)
204 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
205 else
206 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
207 child->exit_code = data;
208 /* XXX: Are we sure no breakpoints are active here? */
209 wake_up_process(child);
210 ret = 0;
211 break;
212
213 /*
214 * Make the child exit. Best I can do is send it a
215 * SIGKILL. Perhaps it should be put in the status that it
216 * wants to exit.
217 */
218 case PTRACE_KILL:
219 ret = 0;
220 if (child->exit_state == EXIT_ZOMBIE)
221 break;
222 child->exit_code = SIGKILL;
223 wake_up_process(child);
224 break;
225
226 /*
227 * execute single instruction.
228 */
229 case PTRACE_SINGLESTEP:
230 ret = -EIO;
231 if (!valid_signal(data))
232 break;
233 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
234 ptrace_single_step(child);
235 child->exit_code = data;
236 wake_up_process(child);
237 ret = 0;
238 break;
239
240 /* Detach a process that was attached */
241 case PTRACE_DETACH:
242 ret = ptrace_detach(child, data);
243 break;
244
245 case PTRACE_GETREGS:
246 ret = ptrace_getregs(child, (void __user *)data);
247 break;
248
249 case PTRACE_SETREGS:
250 ret = ptrace_setregs(child, (const void __user *)data);
251 break;
252
253 default:
254 ret = ptrace_request(child, request, addr, data);
255 break;
256 }
257
258 pr_debug("sys_ptrace returning %d (DC = 0x%08lx)\n", ret, __mfdr(DBGREG_DC));
259 return ret;
260}
261
262asmlinkage void syscall_trace(void)
263{
264 pr_debug("syscall_trace called\n");
265 if (!test_thread_flag(TIF_SYSCALL_TRACE))
266 return;
267 if (!(current->ptrace & PT_PTRACED))
268 return;
269
270 pr_debug("syscall_trace: notifying parent\n");
271 /* The 0x80 provides a way for the tracing parent to
272 * distinguish between a syscall stop and SIGTRAP delivery */
273 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
274 ? 0x80 : 0));
275
276 /*
277 * this isn't the same as continuing with a signal, but it
278 * will do for normal use. strace only continues with a
279 * signal if the stopping signal is not SIGTRAP. -brl
280 */
281 if (current->exit_code) {
282 pr_debug("syscall_trace: sending signal %d to PID %u\n",
283 current->exit_code, current->pid);
284 send_sig(current->exit_code, current, 1);
285 current->exit_code = 0;
286 }
287}
288
289asmlinkage void do_debug_priv(struct pt_regs *regs)
290{
291 unsigned long dc, ds;
292 unsigned long die_val;
293
294 ds = __mfdr(DBGREG_DS);
295
296 pr_debug("do_debug_priv: pc = %08lx, ds = %08lx\n", regs->pc, ds);
297
298 if (ds & DS_SSS)
299 die_val = DIE_SSTEP;
300 else
301 die_val = DIE_BREAKPOINT;
302
303 if (notify_die(die_val, regs, 0, SIGTRAP) == NOTIFY_STOP)
304 return;
305
306 if (likely(ds & DS_SSS)) {
307 extern void itlb_miss(void);
308 extern void tlb_miss_common(void);
309 struct thread_info *ti;
310
311 dc = __mfdr(DBGREG_DC);
312 dc &= ~DC_SS;
313 __mtdr(DBGREG_DC, dc);
314
315 ti = current_thread_info();
316 ti->flags |= _TIF_BREAKPOINT;
317
318 /* The TLB miss handlers don't check thread flags */
319 if ((regs->pc >= (unsigned long)&itlb_miss)
320 && (regs->pc <= (unsigned long)&tlb_miss_common)) {
321 __mtdr(DBGREG_BWA2A, sysreg_read(RAR_EX));
322 __mtdr(DBGREG_BWC2A, 0x40000001 | (get_asid() << 1));
323 }
324
325 /*
326 * If we're running in supervisor mode, the breakpoint
327 * will take us where we want directly, no need to
328 * single step.
329 */
330 if ((regs->sr & MODE_MASK) != MODE_SUPERVISOR)
331 ti->flags |= TIF_SINGLE_STEP;
332 } else {
333 panic("Unable to handle debug trap at pc = %08lx\n",
334 regs->pc);
335 }
336}
337
338/*
339 * Handle breakpoints, single steps and other debuggy things. To keep
340 * things simple initially, we run with interrupts and exceptions
341 * disabled all the time.
342 */
343asmlinkage void do_debug(struct pt_regs *regs)
344{
345 unsigned long dc, ds;
346
347 ds = __mfdr(DBGREG_DS);
348 pr_debug("do_debug: pc = %08lx, ds = %08lx\n", regs->pc, ds);
349
350 if (test_thread_flag(TIF_BREAKPOINT)) {
351 pr_debug("TIF_BREAKPOINT set\n");
352 /* We're taking care of it */
353 clear_thread_flag(TIF_BREAKPOINT);
354 __mtdr(DBGREG_BWC2A, 0);
355 }
356
357 if (test_thread_flag(TIF_SINGLE_STEP)) {
358 pr_debug("TIF_SINGLE_STEP set, ds = 0x%08lx\n", ds);
359 if (ds & DS_SSS) {
360 dc = __mfdr(DBGREG_DC);
361 dc &= ~DC_SS;
362 __mtdr(DBGREG_DC, dc);
363
364 clear_thread_flag(TIF_SINGLE_STEP);
365 ptrace_break(current, regs);
366 }
367 } else {
368 /* regular breakpoint */
369 ptrace_break(current, regs);
370 }
371}
diff --git a/arch/avr32/kernel/semaphore.c b/arch/avr32/kernel/semaphore.c
new file mode 100644
index 000000000000..1e2705a05016
--- /dev/null
+++ b/arch/avr32/kernel/semaphore.c
@@ -0,0 +1,148 @@
1/*
2 * AVR32 sempahore implementation.
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * Based on linux/arch/i386/kernel/semaphore.c
7 * Copyright (C) 1999 Linus Torvalds
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/sched.h>
15#include <linux/errno.h>
16#include <linux/module.h>
17
18#include <asm/semaphore.h>
19#include <asm/atomic.h>
20
21/*
22 * Semaphores are implemented using a two-way counter:
23 * The "count" variable is decremented for each process
24 * that tries to acquire the semaphore, while the "sleeping"
25 * variable is a count of such acquires.
26 *
27 * Notably, the inline "up()" and "down()" functions can
28 * efficiently test if they need to do any extra work (up
29 * needs to do something only if count was negative before
30 * the increment operation.
31 *
32 * "sleeping" and the contention routine ordering is protected
33 * by the spinlock in the semaphore's waitqueue head.
34 *
35 * Note that these functions are only called when there is
36 * contention on the lock, and as such all this is the
37 * "non-critical" part of the whole semaphore business. The
38 * critical part is the inline stuff in <asm/semaphore.h>
39 * where we want to avoid any extra jumps and calls.
40 */
41
42/*
43 * Logic:
44 * - only on a boundary condition do we need to care. When we go
45 * from a negative count to a non-negative, we wake people up.
46 * - when we go from a non-negative count to a negative do we
47 * (a) synchronize with the "sleeper" count and (b) make sure
48 * that we're on the wakeup list before we synchronize so that
49 * we cannot lose wakeup events.
50 */
51
52void __up(struct semaphore *sem)
53{
54 wake_up(&sem->wait);
55}
56EXPORT_SYMBOL(__up);
57
58void __sched __down(struct semaphore *sem)
59{
60 struct task_struct *tsk = current;
61 DECLARE_WAITQUEUE(wait, tsk);
62 unsigned long flags;
63
64 tsk->state = TASK_UNINTERRUPTIBLE;
65 spin_lock_irqsave(&sem->wait.lock, flags);
66 add_wait_queue_exclusive_locked(&sem->wait, &wait);
67
68 sem->sleepers++;
69 for (;;) {
70 int sleepers = sem->sleepers;
71
72 /*
73 * Add "everybody else" into it. They aren't
74 * playing, because we own the spinlock in
75 * the wait_queue_head.
76 */
77 if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
78 sem->sleepers = 0;
79 break;
80 }
81 sem->sleepers = 1; /* us - see -1 above */
82 spin_unlock_irqrestore(&sem->wait.lock, flags);
83
84 schedule();
85
86 spin_lock_irqsave(&sem->wait.lock, flags);
87 tsk->state = TASK_UNINTERRUPTIBLE;
88 }
89 remove_wait_queue_locked(&sem->wait, &wait);
90 wake_up_locked(&sem->wait);
91 spin_unlock_irqrestore(&sem->wait.lock, flags);
92 tsk->state = TASK_RUNNING;
93}
94EXPORT_SYMBOL(__down);
95
96int __sched __down_interruptible(struct semaphore *sem)
97{
98 int retval = 0;
99 struct task_struct *tsk = current;
100 DECLARE_WAITQUEUE(wait, tsk);
101 unsigned long flags;
102
103 tsk->state = TASK_INTERRUPTIBLE;
104 spin_lock_irqsave(&sem->wait.lock, flags);
105 add_wait_queue_exclusive_locked(&sem->wait, &wait);
106
107 sem->sleepers++;
108 for (;;) {
109 int sleepers = sem->sleepers;
110
111 /*
112 * With signals pending, this turns into the trylock
113 * failure case - we won't be sleeping, and we can't
114 * get the lock as it has contention. Just correct the
115 * count and exit.
116 */
117 if (signal_pending(current)) {
118 retval = -EINTR;
119 sem->sleepers = 0;
120 atomic_add(sleepers, &sem->count);
121 break;
122 }
123
124 /*
125 * Add "everybody else" into it. They aren't
126 * playing, because we own the spinlock in
127 * the wait_queue_head.
128 */
129 if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
130 sem->sleepers = 0;
131 break;
132 }
133 sem->sleepers = 1; /* us - see -1 above */
134 spin_unlock_irqrestore(&sem->wait.lock, flags);
135
136 schedule();
137
138 spin_lock_irqsave(&sem->wait.lock, flags);
139 tsk->state = TASK_INTERRUPTIBLE;
140 }
141 remove_wait_queue_locked(&sem->wait, &wait);
142 wake_up_locked(&sem->wait);
143 spin_unlock_irqrestore(&sem->wait.lock, flags);
144
145 tsk->state = TASK_RUNNING;
146 return retval;
147}
148EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
new file mode 100644
index 000000000000..5d68f3c6990b
--- /dev/null
+++ b/arch/avr32/kernel/setup.c
@@ -0,0 +1,335 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/clk.h>
10#include <linux/init.h>
11#include <linux/sched.h>
12#include <linux/console.h>
13#include <linux/ioport.h>
14#include <linux/bootmem.h>
15#include <linux/fs.h>
16#include <linux/module.h>
17#include <linux/root_dev.h>
18#include <linux/cpu.h>
19
20#include <asm/sections.h>
21#include <asm/processor.h>
22#include <asm/pgtable.h>
23#include <asm/setup.h>
24#include <asm/sysreg.h>
25
26#include <asm/arch/board.h>
27#include <asm/arch/init.h>
28
29extern int root_mountflags;
30
31/*
32 * Bootloader-provided information about physical memory
33 */
34struct tag_mem_range *mem_phys;
35struct tag_mem_range *mem_reserved;
36struct tag_mem_range *mem_ramdisk;
37
38/*
39 * Initialize loops_per_jiffy as 5000000 (500MIPS).
40 * Better make it too large than too small...
41 */
42struct avr32_cpuinfo boot_cpu_data = {
43 .loops_per_jiffy = 5000000
44};
45EXPORT_SYMBOL(boot_cpu_data);
46
47static char command_line[COMMAND_LINE_SIZE];
48
49/*
50 * Should be more than enough, but if you have a _really_ complex
51 * setup, you might need to increase the size of this...
52 */
53static struct tag_mem_range __initdata mem_range_cache[32];
54static unsigned mem_range_next_free;
55
56/*
57 * Standard memory resources
58 */
59static struct resource mem_res[] = {
60 {
61 .name = "Kernel code",
62 .start = 0,
63 .end = 0,
64 .flags = IORESOURCE_MEM
65 },
66 {
67 .name = "Kernel data",
68 .start = 0,
69 .end = 0,
70 .flags = IORESOURCE_MEM,
71 },
72};
73
74#define kernel_code mem_res[0]
75#define kernel_data mem_res[1]
76
77/*
78 * Early framebuffer allocation. Works as follows:
79 * - If fbmem_size is zero, nothing will be allocated or reserved.
80 * - If fbmem_start is zero when setup_bootmem() is called,
81 * fbmem_size bytes will be allocated from the bootmem allocator.
82 * - If fbmem_start is nonzero, an area of size fbmem_size will be
83 * reserved at the physical address fbmem_start if necessary. If
84 * the area isn't in a memory region known to the kernel, it will
85 * be left alone.
86 *
87 * Board-specific code may use these variables to set up platform data
88 * for the framebuffer driver if fbmem_size is nonzero.
89 */
90static unsigned long __initdata fbmem_start;
91static unsigned long __initdata fbmem_size;
92
93/*
94 * "fbmem=xxx[kKmM]" allocates the specified amount of boot memory for
95 * use as framebuffer.
96 *
97 * "fbmem=xxx[kKmM]@yyy[kKmM]" defines a memory region of size xxx and
98 * starting at yyy to be reserved for use as framebuffer.
99 *
100 * The kernel won't verify that the memory region starting at yyy
101 * actually contains usable RAM.
102 */
103static int __init early_parse_fbmem(char *p)
104{
105 fbmem_size = memparse(p, &p);
106 if (*p == '@')
107 fbmem_start = memparse(p, &p);
108 return 0;
109}
110early_param("fbmem", early_parse_fbmem);
111
112static inline void __init resource_init(void)
113{
114 struct tag_mem_range *region;
115
116 kernel_code.start = __pa(init_mm.start_code);
117 kernel_code.end = __pa(init_mm.end_code - 1);
118 kernel_data.start = __pa(init_mm.end_code);
119 kernel_data.end = __pa(init_mm.brk - 1);
120
121 for (region = mem_phys; region; region = region->next) {
122 struct resource *res;
123 unsigned long phys_start, phys_end;
124
125 if (region->size == 0)
126 continue;
127
128 phys_start = region->addr;
129 phys_end = phys_start + region->size - 1;
130
131 res = alloc_bootmem_low(sizeof(*res));
132 res->name = "System RAM";
133 res->start = phys_start;
134 res->end = phys_end;
135 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
136
137 request_resource (&iomem_resource, res);
138
139 if (kernel_code.start >= res->start &&
140 kernel_code.end <= res->end)
141 request_resource (res, &kernel_code);
142 if (kernel_data.start >= res->start &&
143 kernel_data.end <= res->end)
144 request_resource (res, &kernel_data);
145 }
146}
147
148static int __init parse_tag_core(struct tag *tag)
149{
150 if (tag->hdr.size > 2) {
151 if ((tag->u.core.flags & 1) == 0)
152 root_mountflags &= ~MS_RDONLY;
153 ROOT_DEV = new_decode_dev(tag->u.core.rootdev);
154 }
155 return 0;
156}
157__tagtable(ATAG_CORE, parse_tag_core);
158
159static int __init parse_tag_mem_range(struct tag *tag,
160 struct tag_mem_range **root)
161{
162 struct tag_mem_range *cur, **pprev;
163 struct tag_mem_range *new;
164
165 /*
166 * Ignore zero-sized entries. If we're running standalone, the
167 * SDRAM code may emit such entries if something goes
168 * wrong...
169 */
170 if (tag->u.mem_range.size == 0)
171 return 0;
172
173 /*
174 * Copy the data so the bootmem init code doesn't need to care
175 * about it.
176 */
177 if (mem_range_next_free >=
178 (sizeof(mem_range_cache) / sizeof(mem_range_cache[0])))
179 panic("Physical memory map too complex!\n");
180
181 new = &mem_range_cache[mem_range_next_free++];
182 *new = tag->u.mem_range;
183
184 pprev = root;
185 cur = *root;
186 while (cur) {
187 pprev = &cur->next;
188 cur = cur->next;
189 }
190
191 *pprev = new;
192 new->next = NULL;
193
194 return 0;
195}
196
197static int __init parse_tag_mem(struct tag *tag)
198{
199 return parse_tag_mem_range(tag, &mem_phys);
200}
201__tagtable(ATAG_MEM, parse_tag_mem);
202
203static int __init parse_tag_cmdline(struct tag *tag)
204{
205 strlcpy(saved_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE);
206 return 0;
207}
208__tagtable(ATAG_CMDLINE, parse_tag_cmdline);
209
210static int __init parse_tag_rdimg(struct tag *tag)
211{
212 return parse_tag_mem_range(tag, &mem_ramdisk);
213}
214__tagtable(ATAG_RDIMG, parse_tag_rdimg);
215
216static int __init parse_tag_clock(struct tag *tag)
217{
218 /*
219 * We'll figure out the clocks by peeking at the system
220 * manager regs directly.
221 */
222 return 0;
223}
224__tagtable(ATAG_CLOCK, parse_tag_clock);
225
226static int __init parse_tag_rsvd_mem(struct tag *tag)
227{
228 return parse_tag_mem_range(tag, &mem_reserved);
229}
230__tagtable(ATAG_RSVD_MEM, parse_tag_rsvd_mem);
231
232static int __init parse_tag_ethernet(struct tag *tag)
233{
234#if 0
235 const struct platform_device *pdev;
236
237 /*
238 * We really need a bus type that supports "classes"...this
239 * will do for now (until we must handle other kinds of
240 * ethernet controllers)
241 */
242 pdev = platform_get_device("macb", tag->u.ethernet.mac_index);
243 if (pdev && pdev->dev.platform_data) {
244 struct eth_platform_data *data = pdev->dev.platform_data;
245
246 data->valid = 1;
247 data->mii_phy_addr = tag->u.ethernet.mii_phy_addr;
248 memcpy(data->hw_addr, tag->u.ethernet.hw_address,
249 sizeof(data->hw_addr));
250 }
251#endif
252 return 0;
253}
254__tagtable(ATAG_ETHERNET, parse_tag_ethernet);
255
256/*
257 * Scan the tag table for this tag, and call its parse function. The
258 * tag table is built by the linker from all the __tagtable
259 * declarations.
260 */
261static int __init parse_tag(struct tag *tag)
262{
263 extern struct tagtable __tagtable_begin, __tagtable_end;
264 struct tagtable *t;
265
266 for (t = &__tagtable_begin; t < &__tagtable_end; t++)
267 if (tag->hdr.tag == t->tag) {
268 t->parse(tag);
269 break;
270 }
271
272 return t < &__tagtable_end;
273}
274
275/*
276 * Parse all tags in the list we got from the boot loader
277 */
278static void __init parse_tags(struct tag *t)
279{
280 for (; t->hdr.tag != ATAG_NONE; t = tag_next(t))
281 if (!parse_tag(t))
282 printk(KERN_WARNING
283 "Ignoring unrecognised tag 0x%08x\n",
284 t->hdr.tag);
285}
286
287void __init setup_arch (char **cmdline_p)
288{
289 struct clk *cpu_clk;
290
291 parse_tags(bootloader_tags);
292
293 setup_processor();
294 setup_platform();
295
296 cpu_clk = clk_get(NULL, "cpu");
297 if (IS_ERR(cpu_clk)) {
298 printk(KERN_WARNING "Warning: Unable to get CPU clock\n");
299 } else {
300 unsigned long cpu_hz = clk_get_rate(cpu_clk);
301
302 /*
303 * Well, duh, but it's probably a good idea to
304 * increment the use count.
305 */
306 clk_enable(cpu_clk);
307
308 boot_cpu_data.clk = cpu_clk;
309 boot_cpu_data.loops_per_jiffy = cpu_hz * 4;
310 printk("CPU: Running at %lu.%03lu MHz\n",
311 ((cpu_hz + 500) / 1000) / 1000,
312 ((cpu_hz + 500) / 1000) % 1000);
313 }
314
315 init_mm.start_code = (unsigned long) &_text;
316 init_mm.end_code = (unsigned long) &_etext;
317 init_mm.end_data = (unsigned long) &_edata;
318 init_mm.brk = (unsigned long) &_end;
319
320 strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
321 *cmdline_p = command_line;
322 parse_early_param();
323
324 setup_bootmem();
325
326 board_setup_fbmem(fbmem_start, fbmem_size);
327
328#ifdef CONFIG_VT
329 conswitchp = &dummy_con;
330#endif
331
332 paging_init();
333
334 resource_init();
335}
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
new file mode 100644
index 000000000000..33096651c24f
--- /dev/null
+++ b/arch/avr32/kernel/signal.c
@@ -0,0 +1,328 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * Based on linux/arch/sh/kernel/signal.c
5 * Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima
6 * Copyright (C) 1991, 1992 Linus Torvalds
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/sched.h>
14#include <linux/mm.h>
15#include <linux/errno.h>
16#include <linux/ptrace.h>
17#include <linux/unistd.h>
18#include <linux/suspend.h>
19
20#include <asm/uaccess.h>
21#include <asm/ucontext.h>
22
23#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
24
25asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
26 struct pt_regs *regs)
27{
28 return do_sigaltstack(uss, uoss, regs->sp);
29}
30
31struct rt_sigframe
32{
33 struct siginfo info;
34 struct ucontext uc;
35 unsigned long retcode;
36};
37
38static int
39restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
40{
41 int err = 0;
42
43#define COPY(x) err |= __get_user(regs->x, &sc->x)
44 COPY(sr);
45 COPY(pc);
46 COPY(lr);
47 COPY(sp);
48 COPY(r12);
49 COPY(r11);
50 COPY(r10);
51 COPY(r9);
52 COPY(r8);
53 COPY(r7);
54 COPY(r6);
55 COPY(r5);
56 COPY(r4);
57 COPY(r3);
58 COPY(r2);
59 COPY(r1);
60 COPY(r0);
61#undef COPY
62
63 /*
64 * Don't allow anyone to pretend they're running in supervisor
65 * mode or something...
66 */
67 err |= !valid_user_regs(regs);
68
69 return err;
70}
71
72
73asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
74{
75 struct rt_sigframe __user *frame;
76 sigset_t set;
77
78 frame = (struct rt_sigframe __user *)regs->sp;
79 pr_debug("SIG return: frame = %p\n", frame);
80
81 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
82 goto badframe;
83
84 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
85 goto badframe;
86
87 sigdelsetmask(&set, ~_BLOCKABLE);
88 spin_lock_irq(&current->sighand->siglock);
89 current->blocked = set;
90 recalc_sigpending();
91 spin_unlock_irq(&current->sighand->siglock);
92
93 if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
94 goto badframe;
95
96 pr_debug("Context restored: pc = %08lx, lr = %08lx, sp = %08lx\n",
97 regs->pc, regs->lr, regs->sp);
98
99 return regs->r12;
100
101badframe:
102 force_sig(SIGSEGV, current);
103 return 0;
104}
105
106static int
107setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
108{
109 int err = 0;
110
111#define COPY(x) err |= __put_user(regs->x, &sc->x)
112 COPY(sr);
113 COPY(pc);
114 COPY(lr);
115 COPY(sp);
116 COPY(r12);
117 COPY(r11);
118 COPY(r10);
119 COPY(r9);
120 COPY(r8);
121 COPY(r7);
122 COPY(r6);
123 COPY(r5);
124 COPY(r4);
125 COPY(r3);
126 COPY(r2);
127 COPY(r1);
128 COPY(r0);
129#undef COPY
130
131 return err;
132}
133
134static inline void __user *
135get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize)
136{
137 unsigned long sp = regs->sp;
138
139 if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp))
140 sp = current->sas_ss_sp + current->sas_ss_size;
141
142 return (void __user *)((sp - framesize) & ~3);
143}
144
145static int
146setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
147 sigset_t *set, struct pt_regs *regs)
148{
149 struct rt_sigframe __user *frame;
150 int err = 0;
151
152 frame = get_sigframe(ka, regs, sizeof(*frame));
153 err = -EFAULT;
154 if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
155 goto out;
156
157 /*
158 * Set up the return code:
159 *
160 * mov r8, __NR_rt_sigreturn
161 * scall
162 *
163 * Note: This will blow up since we're using a non-executable
164 * stack. Better use SA_RESTORER.
165 */
166#if __NR_rt_sigreturn > 127
167# error __NR_rt_sigreturn must be < 127 to fit in a short mov
168#endif
169 err = __put_user(0x3008d733 | (__NR_rt_sigreturn << 20),
170 &frame->retcode);
171
172 err |= copy_siginfo_to_user(&frame->info, info);
173
174 /* Set up the ucontext */
175 err |= __put_user(0, &frame->uc.uc_flags);
176 err |= __put_user(NULL, &frame->uc.uc_link);
177 err |= __put_user((void __user *)current->sas_ss_sp,
178 &frame->uc.uc_stack.ss_sp);
179 err |= __put_user(sas_ss_flags(regs->sp),
180 &frame->uc.uc_stack.ss_flags);
181 err |= __put_user(current->sas_ss_size,
182 &frame->uc.uc_stack.ss_size);
183 err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
184 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
185
186 if (err)
187 goto out;
188
189 regs->r12 = sig;
190 regs->r11 = (unsigned long) &frame->info;
191 regs->r10 = (unsigned long) &frame->uc;
192 regs->sp = (unsigned long) frame;
193 if (ka->sa.sa_flags & SA_RESTORER)
194 regs->lr = (unsigned long)ka->sa.sa_restorer;
195 else {
196 printk(KERN_NOTICE "[%s:%d] did not set SA_RESTORER\n",
197 current->comm, current->pid);
198 regs->lr = (unsigned long) &frame->retcode;
199 }
200
201 pr_debug("SIG deliver [%s:%d]: sig=%d sp=0x%lx pc=0x%lx->0x%p lr=0x%lx\n",
202 current->comm, current->pid, sig, regs->sp,
203 regs->pc, ka->sa.sa_handler, regs->lr);
204
205 regs->pc = (unsigned long) ka->sa.sa_handler;
206
207out:
208 return err;
209}
210
211static inline void restart_syscall(struct pt_regs *regs)
212{
213 if (regs->r12 == -ERESTART_RESTARTBLOCK)
214 regs->r8 = __NR_restart_syscall;
215 else
216 regs->r12 = regs->r12_orig;
217 regs->pc -= 2;
218}
219
220static inline void
221handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
222 sigset_t *oldset, struct pt_regs *regs, int syscall)
223{
224 int ret;
225
226 /*
227 * Set up the stack frame
228 */
229 ret = setup_rt_frame(sig, ka, info, oldset, regs);
230
231 /*
232 * Check that the resulting registers are sane
233 */
234 ret |= !valid_user_regs(regs);
235
236 /*
237 * Block the signal if we were unsuccessful.
238 */
239 if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) {
240 spin_lock_irq(&current->sighand->siglock);
241 sigorsets(&current->blocked, &current->blocked,
242 &ka->sa.sa_mask);
243 sigaddset(&current->blocked, sig);
244 recalc_sigpending();
245 spin_unlock_irq(&current->sighand->siglock);
246 }
247
248 if (ret == 0)
249 return;
250
251 force_sigsegv(sig, current);
252}
253
254/*
255 * Note that 'init' is a special process: it doesn't get signals it
256 * doesn't want to handle. Thus you cannot kill init even with a
257 * SIGKILL even by mistake.
258 */
259int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall)
260{
261 siginfo_t info;
262 int signr;
263 struct k_sigaction ka;
264
265 /*
266 * We want the common case to go fast, which is why we may in
267 * certain cases get here from kernel mode. Just return
268 * without doing anything if so.
269 */
270 if (!user_mode(regs))
271 return 0;
272
273 if (try_to_freeze()) {
274 signr = 0;
275 if (!signal_pending(current))
276 goto no_signal;
277 }
278
279 if (test_thread_flag(TIF_RESTORE_SIGMASK))
280 oldset = &current->saved_sigmask;
281 else if (!oldset)
282 oldset = &current->blocked;
283
284 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
285no_signal:
286 if (syscall) {
287 switch (regs->r12) {
288 case -ERESTART_RESTARTBLOCK:
289 case -ERESTARTNOHAND:
290 if (signr > 0) {
291 regs->r12 = -EINTR;
292 break;
293 }
294 /* fall through */
295 case -ERESTARTSYS:
296 if (signr > 0 && !(ka.sa.sa_flags & SA_RESTART)) {
297 regs->r12 = -EINTR;
298 break;
299 }
300 /* fall through */
301 case -ERESTARTNOINTR:
302 restart_syscall(regs);
303 }
304 }
305
306 if (signr == 0) {
307 /* No signal to deliver -- put the saved sigmask back */
308 if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
309 clear_thread_flag(TIF_RESTORE_SIGMASK);
310 sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
311 }
312 return 0;
313 }
314
315 handle_signal(signr, &ka, &info, oldset, regs, syscall);
316 return 1;
317}
318
319asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
320{
321 int syscall = 0;
322
323 if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR)
324 syscall = 1;
325
326 if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
327 do_signal(regs, &current->blocked, syscall);
328}
diff --git a/arch/avr32/kernel/switch_to.S b/arch/avr32/kernel/switch_to.S
new file mode 100644
index 000000000000..a48d046723c5
--- /dev/null
+++ b/arch/avr32/kernel/switch_to.S
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <asm/sysreg.h>
10
11 .text
12 .global __switch_to
13 .type __switch_to, @function
14
15 /* Switch thread context from "prev" to "next", returning "last"
16 * r12 : prev
17 * r11 : &prev->thread + 1
18 * r10 : &next->thread
19 */
20__switch_to:
21 stm --r11, r0,r1,r2,r3,r4,r5,r6,r7,sp,lr
22 mfsr r9, SYSREG_SR
23 st.w --r11, r9
24 ld.w r8, r10++
25 /*
26 * schedule() may have been called from a mode with a different
27 * set of registers. Make sure we don't lose anything here.
28 */
29 pushm r10,r12
30 mtsr SYSREG_SR, r8
31 frs /* flush the return stack */
32 sub pc, -2 /* flush the pipeline */
33 popm r10,r12
34 ldm r10++, r0,r1,r2,r3,r4,r5,r6,r7,sp,pc
35 .size __switch_to, . - __switch_to
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
new file mode 100644
index 000000000000..6ec5693da448
--- /dev/null
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/errno.h>
9#include <linux/fs.h>
10#include <linux/file.h>
11#include <linux/mm.h>
12#include <linux/unistd.h>
13
14#include <asm/mman.h>
15#include <asm/uaccess.h>
16
17asmlinkage int sys_pipe(unsigned long __user *filedes)
18{
19 int fd[2];
20 int error;
21
22 error = do_pipe(fd);
23 if (!error) {
24 if (copy_to_user(filedes, fd, sizeof(fd)))
25 error = -EFAULT;
26 }
27 return error;
28}
29
30asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
31 unsigned long prot, unsigned long flags,
32 unsigned long fd, off_t offset)
33{
34 int error = -EBADF;
35 struct file *file = NULL;
36
37 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
38 if (!(flags & MAP_ANONYMOUS)) {
39 file = fget(fd);
40 if (!file)
41 return error;
42 }
43
44 down_write(&current->mm->mmap_sem);
45 error = do_mmap_pgoff(file, addr, len, prot, flags, offset);
46 up_write(&current->mm->mmap_sem);
47
48 if (file)
49 fput(file);
50 return error;
51}
diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
new file mode 100644
index 000000000000..7589a9b426cb
--- /dev/null
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -0,0 +1,102 @@
1/*
2 * Copyright (C) 2005-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9/*
10 * Stubs for syscalls that require access to pt_regs or that take more
11 * than five parameters.
12 */
13
14#define ARG6 r3
15
16 .text
17 .global __sys_rt_sigsuspend
18 .type __sys_rt_sigsuspend,@function
19__sys_rt_sigsuspend:
20 mov r10, sp
21 rjmp sys_rt_sigsuspend
22
23 .global __sys_sigaltstack
24 .type __sys_sigaltstack,@function
25__sys_sigaltstack:
26 mov r10, sp
27 rjmp sys_sigaltstack
28
29 .global __sys_rt_sigreturn
30 .type __sys_rt_sigreturn,@function
31__sys_rt_sigreturn:
32 mov r12, sp
33 rjmp sys_rt_sigreturn
34
35 .global __sys_fork
36 .type __sys_fork,@function
37__sys_fork:
38 mov r12, sp
39 rjmp sys_fork
40
41 .global __sys_clone
42 .type __sys_clone,@function
43__sys_clone:
44 mov r8, sp
45 rjmp sys_clone
46
47 .global __sys_vfork
48 .type __sys_vfork,@function
49__sys_vfork:
50 mov r12, sp
51 rjmp sys_vfork
52
53 .global __sys_execve
54 .type __sys_execve,@function
55__sys_execve:
56 mov r9, sp
57 rjmp sys_execve
58
59 .global __sys_mmap2
60 .type __sys_mmap2,@function
61__sys_mmap2:
62 pushm lr
63 st.w --sp, ARG6
64 rcall sys_mmap2
65 sub sp, -4
66 popm pc
67
68 .global __sys_sendto
69 .type __sys_sendto,@function
70__sys_sendto:
71 pushm lr
72 st.w --sp, ARG6
73 rcall sys_sendto
74 sub sp, -4
75 popm pc
76
77 .global __sys_recvfrom
78 .type __sys_recvfrom,@function
79__sys_recvfrom:
80 pushm lr
81 st.w --sp, ARG6
82 rcall sys_recvfrom
83 sub sp, -4
84 popm pc
85
86 .global __sys_pselect6
87 .type __sys_pselect6,@function
88__sys_pselect6:
89 pushm lr
90 st.w --sp, ARG6
91 rcall sys_pselect6
92 sub sp, -4
93 popm pc
94
95 .global __sys_splice
96 .type __sys_splice,@function
97__sys_splice:
98 pushm lr
99 st.w --sp, ARG6
100 rcall sys_splice
101 sub sp, -4
102 popm pc
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
new file mode 100644
index 000000000000..63b206965d05
--- /dev/null
+++ b/arch/avr32/kernel/syscall_table.S
@@ -0,0 +1,289 @@
1/*
2 * AVR32 system call table
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#if !defined(CONFIG_NFSD) && !defined(CONFIG_NFSD_MODULE)
12#define sys_nfsservctl sys_ni_syscall
13#endif
14
15#if !defined(CONFIG_SYSV_IPC)
16# define sys_ipc sys_ni_syscall
17#endif
18
19 .section .rodata,"a",@progbits
20 .type sys_call_table,@object
21 .global sys_call_table
22 .align 2
23sys_call_table:
24 .long sys_restart_syscall
25 .long sys_exit
26 .long __sys_fork
27 .long sys_read
28 .long sys_write
29 .long sys_open /* 5 */
30 .long sys_close
31 .long sys_umask
32 .long sys_creat
33 .long sys_link
34 .long sys_unlink /* 10 */
35 .long __sys_execve
36 .long sys_chdir
37 .long sys_time
38 .long sys_mknod
39 .long sys_chmod /* 15 */
40 .long sys_chown
41 .long sys_lchown
42 .long sys_lseek
43 .long sys_llseek
44 .long sys_getpid /* 20 */
45 .long sys_mount
46 .long sys_umount
47 .long sys_setuid
48 .long sys_getuid
49 .long sys_stime /* 25 */
50 .long sys_ptrace
51 .long sys_alarm
52 .long sys_pause
53 .long sys_utime
54 .long sys_newstat /* 30 */
55 .long sys_newfstat
56 .long sys_newlstat
57 .long sys_access
58 .long sys_chroot
59 .long sys_sync /* 35 */
60 .long sys_fsync
61 .long sys_kill
62 .long sys_rename
63 .long sys_mkdir
64 .long sys_rmdir /* 40 */
65 .long sys_dup
66 .long sys_pipe
67 .long sys_times
68 .long __sys_clone
69 .long sys_brk /* 45 */
70 .long sys_setgid
71 .long sys_getgid
72 .long sys_getcwd
73 .long sys_geteuid
74 .long sys_getegid /* 50 */
75 .long sys_acct
76 .long sys_setfsuid
77 .long sys_setfsgid
78 .long sys_ioctl
79 .long sys_fcntl /* 55 */
80 .long sys_setpgid
81 .long sys_mremap
82 .long sys_setresuid
83 .long sys_getresuid
84 .long sys_setreuid /* 60 */
85 .long sys_setregid
86 .long sys_ustat
87 .long sys_dup2
88 .long sys_getppid
89 .long sys_getpgrp /* 65 */
90 .long sys_setsid
91 .long sys_rt_sigaction
92 .long __sys_rt_sigreturn
93 .long sys_rt_sigprocmask
94 .long sys_rt_sigpending /* 70 */
95 .long sys_rt_sigtimedwait
96 .long sys_rt_sigqueueinfo
97 .long __sys_rt_sigsuspend
98 .long sys_sethostname
99 .long sys_setrlimit /* 75 */
100 .long sys_getrlimit
101 .long sys_getrusage
102 .long sys_gettimeofday
103 .long sys_settimeofday
104 .long sys_getgroups /* 80 */
105 .long sys_setgroups
106 .long sys_select
107 .long sys_symlink
108 .long sys_fchdir
109 .long sys_readlink /* 85 */
110 .long sys_pread64
111 .long sys_pwrite64
112 .long sys_swapon
113 .long sys_reboot
114 .long __sys_mmap2 /* 90 */
115 .long sys_munmap
116 .long sys_truncate
117 .long sys_ftruncate
118 .long sys_fchmod
119 .long sys_fchown /* 95 */
120 .long sys_getpriority
121 .long sys_setpriority
122 .long sys_wait4
123 .long sys_statfs
124 .long sys_fstatfs /* 100 */
125 .long sys_vhangup
126 .long __sys_sigaltstack
127 .long sys_syslog
128 .long sys_setitimer
129 .long sys_getitimer /* 105 */
130 .long sys_swapoff
131 .long sys_sysinfo
132 .long sys_ipc
133 .long sys_sendfile
134 .long sys_setdomainname /* 110 */
135 .long sys_newuname
136 .long sys_adjtimex
137 .long sys_mprotect
138 .long __sys_vfork
139 .long sys_init_module /* 115 */
140 .long sys_delete_module
141 .long sys_quotactl
142 .long sys_getpgid
143 .long sys_bdflush
144 .long sys_sysfs /* 120 */
145 .long sys_personality
146 .long sys_ni_syscall /* reserved for afs_syscall */
147 .long sys_getdents
148 .long sys_flock
149 .long sys_msync /* 125 */
150 .long sys_readv
151 .long sys_writev
152 .long sys_getsid
153 .long sys_fdatasync
154 .long sys_sysctl /* 130 */
155 .long sys_mlock
156 .long sys_munlock
157 .long sys_mlockall
158 .long sys_munlockall
159 .long sys_sched_setparam /* 135 */
160 .long sys_sched_getparam
161 .long sys_sched_setscheduler
162 .long sys_sched_getscheduler
163 .long sys_sched_yield
164 .long sys_sched_get_priority_max /* 140 */
165 .long sys_sched_get_priority_min
166 .long sys_sched_rr_get_interval
167 .long sys_nanosleep
168 .long sys_poll
169 .long sys_nfsservctl /* 145 */
170 .long sys_setresgid
171 .long sys_getresgid
172 .long sys_prctl
173 .long sys_socket
174 .long sys_bind /* 150 */
175 .long sys_connect
176 .long sys_listen
177 .long sys_accept
178 .long sys_getsockname
179 .long sys_getpeername /* 155 */
180 .long sys_socketpair
181 .long sys_send
182 .long sys_recv
183 .long __sys_sendto
184 .long __sys_recvfrom /* 160 */
185 .long sys_shutdown
186 .long sys_setsockopt
187 .long sys_getsockopt
188 .long sys_sendmsg
189 .long sys_recvmsg /* 165 */
190 .long sys_truncate64
191 .long sys_ftruncate64
192 .long sys_stat64
193 .long sys_lstat64
194 .long sys_fstat64 /* 170 */
195 .long sys_pivot_root
196 .long sys_mincore
197 .long sys_madvise
198 .long sys_getdents64
199 .long sys_fcntl64 /* 175 */
200 .long sys_gettid
201 .long sys_readahead
202 .long sys_setxattr
203 .long sys_lsetxattr
204 .long sys_fsetxattr /* 180 */
205 .long sys_getxattr
206 .long sys_lgetxattr
207 .long sys_fgetxattr
208 .long sys_listxattr
209 .long sys_llistxattr /* 185 */
210 .long sys_flistxattr
211 .long sys_removexattr
212 .long sys_lremovexattr
213 .long sys_fremovexattr
214 .long sys_tkill /* 190 */
215 .long sys_sendfile64
216 .long sys_futex
217 .long sys_sched_setaffinity
218 .long sys_sched_getaffinity
219 .long sys_capget /* 195 */
220 .long sys_capset
221 .long sys_io_setup
222 .long sys_io_destroy
223 .long sys_io_getevents
224 .long sys_io_submit /* 200 */
225 .long sys_io_cancel
226 .long sys_fadvise64
227 .long sys_exit_group
228 .long sys_lookup_dcookie
229 .long sys_epoll_create /* 205 */
230 .long sys_epoll_ctl
231 .long sys_epoll_wait
232 .long sys_remap_file_pages
233 .long sys_set_tid_address
234 .long sys_timer_create /* 210 */
235 .long sys_timer_settime
236 .long sys_timer_gettime
237 .long sys_timer_getoverrun
238 .long sys_timer_delete
239 .long sys_clock_settime /* 215 */
240 .long sys_clock_gettime
241 .long sys_clock_getres
242 .long sys_clock_nanosleep
243 .long sys_statfs64
244 .long sys_fstatfs64 /* 220 */
245 .long sys_tgkill
246 .long sys_ni_syscall /* reserved for TUX */
247 .long sys_utimes
248 .long sys_fadvise64_64
249 .long sys_cacheflush /* 225 */
250 .long sys_ni_syscall /* sys_vserver */
251 .long sys_mq_open
252 .long sys_mq_unlink
253 .long sys_mq_timedsend
254 .long sys_mq_timedreceive /* 230 */
255 .long sys_mq_notify
256 .long sys_mq_getsetattr
257 .long sys_kexec_load
258 .long sys_waitid
259 .long sys_add_key /* 235 */
260 .long sys_request_key
261 .long sys_keyctl
262 .long sys_ioprio_set
263 .long sys_ioprio_get
264 .long sys_inotify_init /* 240 */
265 .long sys_inotify_add_watch
266 .long sys_inotify_rm_watch
267 .long sys_openat
268 .long sys_mkdirat
269 .long sys_mknodat /* 245 */
270 .long sys_fchownat
271 .long sys_futimesat
272 .long sys_fstatat64
273 .long sys_unlinkat
274 .long sys_renameat /* 250 */
275 .long sys_linkat
276 .long sys_symlinkat
277 .long sys_readlinkat
278 .long sys_fchmodat
279 .long sys_faccessat /* 255 */
280 .long __sys_pselect6
281 .long sys_ppoll
282 .long sys_unshare
283 .long sys_set_robust_list
284 .long sys_get_robust_list /* 260 */
285 .long __sys_splice
286 .long sys_sync_file_range
287 .long sys_tee
288 .long sys_vmsplice
289 .long sys_ni_syscall /* r8 is saturated at nr_syscalls */
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
new file mode 100644
index 000000000000..b0e6b5855a38
--- /dev/null
+++ b/arch/avr32/kernel/time.c
@@ -0,0 +1,238 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * Based on MIPS implementation arch/mips/kernel/time.c
5 * Copyright 2001 MontaVista Software Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/clk.h>
13#include <linux/clocksource.h>
14#include <linux/time.h>
15#include <linux/module.h>
16#include <linux/interrupt.h>
17#include <linux/irq.h>
18#include <linux/kernel_stat.h>
19#include <linux/errno.h>
20#include <linux/init.h>
21#include <linux/profile.h>
22#include <linux/sysdev.h>
23
24#include <asm/div64.h>
25#include <asm/sysreg.h>
26#include <asm/io.h>
27#include <asm/sections.h>
28
29static cycle_t read_cycle_count(void)
30{
31 return (cycle_t)sysreg_read(COUNT);
32}
33
34static struct clocksource clocksource_avr32 = {
35 .name = "avr32",
36 .rating = 350,
37 .read = read_cycle_count,
38 .mask = CLOCKSOURCE_MASK(32),
39 .shift = 16,
40 .is_continuous = 1,
41};
42
43/*
44 * By default we provide the null RTC ops
45 */
46static unsigned long null_rtc_get_time(void)
47{
48 return mktime(2004, 1, 1, 0, 0, 0);
49}
50
51static int null_rtc_set_time(unsigned long sec)
52{
53 return 0;
54}
55
56static unsigned long (*rtc_get_time)(void) = null_rtc_get_time;
57static int (*rtc_set_time)(unsigned long) = null_rtc_set_time;
58
59/* how many counter cycles in a jiffy? */
60static unsigned long cycles_per_jiffy;
61
62/* cycle counter value at the previous timer interrupt */
63static unsigned int timerhi, timerlo;
64
65/* the count value for the next timer interrupt */
66static unsigned int expirelo;
67
68static void avr32_timer_ack(void)
69{
70 unsigned int count;
71
72 /* Ack this timer interrupt and set the next one */
73 expirelo += cycles_per_jiffy;
74 if (expirelo == 0) {
75 printk(KERN_DEBUG "expirelo == 0\n");
76 sysreg_write(COMPARE, expirelo + 1);
77 } else {
78 sysreg_write(COMPARE, expirelo);
79 }
80
81 /* Check to see if we have missed any timer interrupts */
82 count = sysreg_read(COUNT);
83 if ((count - expirelo) < 0x7fffffff) {
84 expirelo = count + cycles_per_jiffy;
85 sysreg_write(COMPARE, expirelo);
86 }
87}
88
89static unsigned int avr32_hpt_read(void)
90{
91 return sysreg_read(COUNT);
92}
93
94/*
95 * Taken from MIPS c0_hpt_timer_init().
96 *
97 * Why is it so complicated, and what is "count"? My assumption is
98 * that `count' specifies the "reference cycle", i.e. the cycle since
99 * reset that should mean "zero". The reason COUNT is written twice is
100 * probably to make sure we don't get any timer interrupts while we
101 * are messing with the counter.
102 */
103static void avr32_hpt_init(unsigned int count)
104{
105 count = sysreg_read(COUNT) - count;
106 expirelo = (count / cycles_per_jiffy + 1) * cycles_per_jiffy;
107 sysreg_write(COUNT, expirelo - cycles_per_jiffy);
108 sysreg_write(COMPARE, expirelo);
109 sysreg_write(COUNT, count);
110}
111
112/*
113 * Scheduler clock - returns current time in nanosec units.
114 */
115unsigned long long sched_clock(void)
116{
117 /* There must be better ways...? */
118 return (unsigned long long)jiffies * (1000000000 / HZ);
119}
120
121/*
122 * local_timer_interrupt() does profiling and process accounting on a
123 * per-CPU basis.
124 *
125 * In UP mode, it is invoked from the (global) timer_interrupt.
126 */
127static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
128{
129 if (current->pid)
130 profile_tick(CPU_PROFILING, regs);
131 update_process_times(user_mode(regs));
132}
133
134static irqreturn_t
135timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
136{
137 unsigned int count;
138
139 /* ack timer interrupt and try to set next interrupt */
140 count = avr32_hpt_read();
141 avr32_timer_ack();
142
143 /* Update timerhi/timerlo for intra-jiffy calibration */
144 timerhi += count < timerlo; /* Wrap around */
145 timerlo = count;
146
147 /*
148 * Call the generic timer interrupt handler
149 */
150 write_seqlock(&xtime_lock);
151 do_timer(regs);
152 write_sequnlock(&xtime_lock);
153
154 /*
155 * In UP mode, we call local_timer_interrupt() to do profiling
156 * and process accounting.
157 *
158 * SMP is not supported yet.
159 */
160 local_timer_interrupt(irq, dev_id, regs);
161
162 return IRQ_HANDLED;
163}
164
165static struct irqaction timer_irqaction = {
166 .handler = timer_interrupt,
167 .flags = IRQF_DISABLED,
168 .name = "timer",
169};
170
171void __init time_init(void)
172{
173 unsigned long mult, shift, count_hz;
174 int ret;
175
176 xtime.tv_sec = rtc_get_time();
177 xtime.tv_nsec = 0;
178
179 set_normalized_timespec(&wall_to_monotonic,
180 -xtime.tv_sec, -xtime.tv_nsec);
181
182 printk("Before time_init: count=%08lx, compare=%08lx\n",
183 (unsigned long)sysreg_read(COUNT),
184 (unsigned long)sysreg_read(COMPARE));
185
186 count_hz = clk_get_rate(boot_cpu_data.clk);
187 shift = clocksource_avr32.shift;
188 mult = clocksource_hz2mult(count_hz, shift);
189 clocksource_avr32.mult = mult;
190
191 printk("Cycle counter: mult=%lu, shift=%lu\n", mult, shift);
192
193 {
194 u64 tmp;
195
196 tmp = TICK_NSEC;
197 tmp <<= shift;
198 tmp += mult / 2;
199 do_div(tmp, mult);
200
201 cycles_per_jiffy = tmp;
202 }
203
204 /* This sets up the high precision timer for the first interrupt. */
205 avr32_hpt_init(avr32_hpt_read());
206
207 printk("After time_init: count=%08lx, compare=%08lx\n",
208 (unsigned long)sysreg_read(COUNT),
209 (unsigned long)sysreg_read(COMPARE));
210
211 ret = clocksource_register(&clocksource_avr32);
212 if (ret)
213 printk(KERN_ERR
214 "timer: could not register clocksource: %d\n", ret);
215
216 ret = setup_irq(0, &timer_irqaction);
217 if (ret)
218 printk("timer: could not request IRQ 0: %d\n", ret);
219}
220
221static struct sysdev_class timer_class = {
222 set_kset_name("timer"),
223};
224
225static struct sys_device timer_device = {
226 .id = 0,
227 .cls = &timer_class,
228};
229
230static int __init init_timer_sysfs(void)
231{
232 int err = sysdev_class_register(&timer_class);
233 if (!err)
234 err = sysdev_register(&timer_device);
235 return err;
236}
237
238device_initcall(init_timer_sysfs);
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
new file mode 100644
index 000000000000..7e803f4d7a12
--- /dev/null
+++ b/arch/avr32/kernel/traps.c
@@ -0,0 +1,425 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#undef DEBUG
9#include <linux/sched.h>
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/kallsyms.h>
13#include <linux/notifier.h>
14
15#include <asm/traps.h>
16#include <asm/sysreg.h>
17#include <asm/addrspace.h>
18#include <asm/ocd.h>
19#include <asm/mmu_context.h>
20#include <asm/uaccess.h>
21
22static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
23{
24 unsigned long p;
25 int i;
26
27 printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top);
28
29 for (p = bottom & ~31; p < top; ) {
30 printk("%04lx: ", p & 0xffff);
31
32 for (i = 0; i < 8; i++, p += 4) {
33 unsigned int val;
34
35 if (p < bottom || p >= top)
36 printk(" ");
37 else {
38 if (__get_user(val, (unsigned int __user *)p)) {
39 printk("\n");
40 goto out;
41 }
42 printk("%08x ", val);
43 }
44 }
45 printk("\n");
46 }
47
48out:
49 return;
50}
51
52#ifdef CONFIG_FRAME_POINTER
53static inline void __show_trace(struct task_struct *tsk, unsigned long *sp,
54 struct pt_regs *regs)
55{
56 unsigned long __user *fp;
57 unsigned long __user *last_fp = NULL;
58
59 if (regs) {
60 fp = (unsigned long __user *)regs->r7;
61 } else if (tsk == current) {
62 register unsigned long __user *real_fp __asm__("r7");
63 fp = real_fp;
64 } else {
65 fp = (unsigned long __user *)tsk->thread.cpu_context.r7;
66 }
67
68 /*
69 * Walk the stack until (a) we get an exception, (b) the frame
70 * pointer becomes zero, or (c) the frame pointer gets stuck
71 * at the same value.
72 */
73 while (fp && fp != last_fp) {
74 unsigned long lr, new_fp = 0;
75
76 last_fp = fp;
77 if (__get_user(lr, fp))
78 break;
79 if (fp && __get_user(new_fp, fp + 1))
80 break;
81 fp = (unsigned long __user *)new_fp;
82
83 printk(" [<%08lx>] ", lr);
84 print_symbol("%s\n", lr);
85 }
86 printk("\n");
87}
88#else
89static inline void __show_trace(struct task_struct *tsk, unsigned long *sp,
90 struct pt_regs *regs)
91{
92 unsigned long addr;
93
94 while (!kstack_end(sp)) {
95 addr = *sp++;
96 if (kernel_text_address(addr)) {
97 printk(" [<%08lx>] ", addr);
98 print_symbol("%s\n", addr);
99 }
100 }
101}
102#endif
103
104void show_trace(struct task_struct *tsk, unsigned long *sp,
105 struct pt_regs *regs)
106{
107 if (regs &&
108 (((regs->sr & MODE_MASK) == MODE_EXCEPTION) ||
109 ((regs->sr & MODE_MASK) == MODE_USER)))
110 return;
111
112 printk ("Call trace:");
113#ifdef CONFIG_KALLSYMS
114 printk("\n");
115#endif
116
117 __show_trace(tsk, sp, regs);
118 printk("\n");
119}
120
121void show_stack(struct task_struct *tsk, unsigned long *sp)
122{
123 unsigned long stack;
124
125 if (!tsk)
126 tsk = current;
127 if (sp == 0) {
128 if (tsk == current) {
129 register unsigned long *real_sp __asm__("sp");
130 sp = real_sp;
131 } else {
132 sp = (unsigned long *)tsk->thread.cpu_context.ksp;
133 }
134 }
135
136 stack = (unsigned long)sp;
137 dump_mem("Stack: ", stack,
138 THREAD_SIZE + (unsigned long)tsk->thread_info);
139 show_trace(tsk, sp, NULL);
140}
141
142void dump_stack(void)
143{
144 show_stack(NULL, NULL);
145}
146EXPORT_SYMBOL(dump_stack);
147
148ATOMIC_NOTIFIER_HEAD(avr32_die_chain);
149
150int register_die_notifier(struct notifier_block *nb)
151{
152 pr_debug("register_die_notifier: %p\n", nb);
153
154 return atomic_notifier_chain_register(&avr32_die_chain, nb);
155}
156EXPORT_SYMBOL(register_die_notifier);
157
158int unregister_die_notifier(struct notifier_block *nb)
159{
160 return atomic_notifier_chain_unregister(&avr32_die_chain, nb);
161}
162EXPORT_SYMBOL(unregister_die_notifier);
163
164static DEFINE_SPINLOCK(die_lock);
165
166void __die(const char *str, struct pt_regs *regs, unsigned long err,
167 const char *file, const char *func, unsigned long line)
168{
169 struct task_struct *tsk = current;
170 static int die_counter;
171
172 console_verbose();
173 spin_lock_irq(&die_lock);
174 bust_spinlocks(1);
175
176 printk(KERN_ALERT "%s", str);
177 if (file && func)
178 printk(" in %s:%s, line %ld", file, func, line);
179 printk("[#%d]:\n", ++die_counter);
180 print_modules();
181 show_regs(regs);
182 printk("Process %s (pid: %d, stack limit = 0x%p)\n",
183 tsk->comm, tsk->pid, tsk->thread_info + 1);
184
185 if (!user_mode(regs) || in_interrupt()) {
186 dump_mem("Stack: ", regs->sp,
187 THREAD_SIZE + (unsigned long)tsk->thread_info);
188 }
189
190 bust_spinlocks(0);
191 spin_unlock_irq(&die_lock);
192 do_exit(SIGSEGV);
193}
194
195void __die_if_kernel(const char *str, struct pt_regs *regs, unsigned long err,
196 const char *file, const char *func, unsigned long line)
197{
198 if (!user_mode(regs))
199 __die(str, regs, err, file, func, line);
200}
201
202asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs)
203{
204#ifdef CONFIG_SUBARCH_AVR32B
205 /*
206 * The exception entry always saves RSR_EX. For NMI, this is
207 * wrong; it should be RSR_NMI
208 */
209 regs->sr = sysreg_read(RSR_NMI);
210#endif
211
212 printk("NMI taken!!!!\n");
213 die("NMI", regs, ecr);
214 BUG();
215}
216
217asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs)
218{
219 printk("Unable to handle critical exception %lu at pc = %08lx!\n",
220 ecr, regs->pc);
221 die("Oops", regs, ecr);
222 BUG();
223}
224
225asmlinkage void do_address_exception(unsigned long ecr, struct pt_regs *regs)
226{
227 siginfo_t info;
228
229 die_if_kernel("Oops: Address exception in kernel mode", regs, ecr);
230
231#ifdef DEBUG
232 if (ecr == ECR_ADDR_ALIGN_X)
233 pr_debug("Instruction Address Exception at pc = %08lx\n",
234 regs->pc);
235 else if (ecr == ECR_ADDR_ALIGN_R)
236 pr_debug("Data Address Exception (Read) at pc = %08lx\n",
237 regs->pc);
238 else if (ecr == ECR_ADDR_ALIGN_W)
239 pr_debug("Data Address Exception (Write) at pc = %08lx\n",
240 regs->pc);
241 else
242 BUG();
243
244 show_regs(regs);
245#endif
246
247 info.si_signo = SIGBUS;
248 info.si_errno = 0;
249 info.si_code = BUS_ADRALN;
250 info.si_addr = (void __user *)regs->pc;
251
252 force_sig_info(SIGBUS, &info, current);
253}
254
255/* This way of handling undefined instructions is stolen from ARM */
256static LIST_HEAD(undef_hook);
257static spinlock_t undef_lock = SPIN_LOCK_UNLOCKED;
258
259void register_undef_hook(struct undef_hook *hook)
260{
261 spin_lock_irq(&undef_lock);
262 list_add(&hook->node, &undef_hook);
263 spin_unlock_irq(&undef_lock);
264}
265
266void unregister_undef_hook(struct undef_hook *hook)
267{
268 spin_lock_irq(&undef_lock);
269 list_del(&hook->node);
270 spin_unlock_irq(&undef_lock);
271}
272
273static int do_cop_absent(u32 insn)
274{
275 int cop_nr;
276 u32 cpucr;
277 if ( (insn & 0xfdf00000) == 0xf1900000 )
278 /* LDC0 */
279 cop_nr = 0;
280 else
281 cop_nr = (insn >> 13) & 0x7;
282
283 /* Try enabling the coprocessor */
284 cpucr = sysreg_read(CPUCR);
285 cpucr |= (1 << (24 + cop_nr));
286 sysreg_write(CPUCR, cpucr);
287
288 cpucr = sysreg_read(CPUCR);
289 if ( !(cpucr & (1 << (24 + cop_nr))) ){
290 printk("Coprocessor #%i not found!\n", cop_nr);
291 return -1;
292 }
293
294 return 0;
295}
296
297#ifdef CONFIG_BUG
298#ifdef CONFIG_DEBUG_BUGVERBOSE
299static inline void do_bug_verbose(struct pt_regs *regs, u32 insn)
300{
301 char *file;
302 u16 line;
303 char c;
304
305 if (__get_user(line, (u16 __user *)(regs->pc + 2)))
306 return;
307 if (__get_user(file, (char * __user *)(regs->pc + 4))
308 || (unsigned long)file < PAGE_OFFSET
309 || __get_user(c, file))
310 file = "<bad filename>";
311
312 printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
313}
314#else
315static inline void do_bug_verbose(struct pt_regs *regs, u32 insn)
316{
317
318}
319#endif
320#endif
321
322asmlinkage void do_illegal_opcode(unsigned long ecr, struct pt_regs *regs)
323{
324 u32 insn;
325 struct undef_hook *hook;
326 siginfo_t info;
327 void __user *pc;
328
329 if (!user_mode(regs))
330 goto kernel_trap;
331
332 local_irq_enable();
333
334 pc = (void __user *)instruction_pointer(regs);
335 if (__get_user(insn, (u32 __user *)pc))
336 goto invalid_area;
337
338 if (ecr == ECR_COPROC_ABSENT) {
339 if (do_cop_absent(insn) == 0)
340 return;
341 }
342
343 spin_lock_irq(&undef_lock);
344 list_for_each_entry(hook, &undef_hook, node) {
345 if ((insn & hook->insn_mask) == hook->insn_val) {
346 if (hook->fn(regs, insn) == 0) {
347 spin_unlock_irq(&undef_lock);
348 return;
349 }
350 }
351 }
352 spin_unlock_irq(&undef_lock);
353
354invalid_area:
355
356#ifdef DEBUG
357 printk("Illegal instruction at pc = %08lx\n", regs->pc);
358 if (regs->pc < TASK_SIZE) {
359 unsigned long ptbr, pgd, pte, *p;
360
361 ptbr = sysreg_read(PTBR);
362 p = (unsigned long *)ptbr;
363 pgd = p[regs->pc >> 22];
364 p = (unsigned long *)((pgd & 0x1ffff000) | 0x80000000);
365 pte = p[(regs->pc >> 12) & 0x3ff];
366 printk("page table: 0x%08lx -> 0x%08lx -> 0x%08lx\n", ptbr, pgd, pte);
367 }
368#endif
369
370 info.si_signo = SIGILL;
371 info.si_errno = 0;
372 info.si_addr = (void __user *)regs->pc;
373 switch (ecr) {
374 case ECR_ILLEGAL_OPCODE:
375 case ECR_UNIMPL_INSTRUCTION:
376 info.si_code = ILL_ILLOPC;
377 break;
378 case ECR_PRIVILEGE_VIOLATION:
379 info.si_code = ILL_PRVOPC;
380 break;
381 case ECR_COPROC_ABSENT:
382 info.si_code = ILL_COPROC;
383 break;
384 default:
385 BUG();
386 }
387
388 force_sig_info(SIGILL, &info, current);
389 return;
390
391kernel_trap:
392#ifdef CONFIG_BUG
393 if (__kernel_text_address(instruction_pointer(regs))) {
394 insn = *(u16 *)instruction_pointer(regs);
395 if (insn == AVR32_BUG_OPCODE) {
396 do_bug_verbose(regs, insn);
397 die("Kernel BUG", regs, 0);
398 return;
399 }
400 }
401#endif
402
403 die("Oops: Illegal instruction in kernel code", regs, ecr);
404}
405
406asmlinkage void do_fpe(unsigned long ecr, struct pt_regs *regs)
407{
408 siginfo_t info;
409
410 printk("Floating-point exception at pc = %08lx\n", regs->pc);
411
412 /* We have no FPU... */
413 info.si_signo = SIGILL;
414 info.si_errno = 0;
415 info.si_addr = (void __user *)regs->pc;
416 info.si_code = ILL_COPROC;
417
418 force_sig_info(SIGILL, &info, current);
419}
420
421
422void __init trap_init(void)
423{
424
425}
diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c
new file mode 100644
index 000000000000..cdd627c6b7dc
--- /dev/null
+++ b/arch/avr32/kernel/vmlinux.lds.c
@@ -0,0 +1,139 @@
1/*
2 * AVR32 linker script for the Linux kernel
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#define LOAD_OFFSET 0x00000000
11#include <asm-generic/vmlinux.lds.h>
12
13OUTPUT_FORMAT("elf32-avr32", "elf32-avr32", "elf32-avr32")
14OUTPUT_ARCH(avr32)
15ENTRY(_start)
16
17/* Big endian */
18jiffies = jiffies_64 + 4;
19
20SECTIONS
21{
22 . = CONFIG_ENTRY_ADDRESS;
23 .init : AT(ADDR(.init) - LOAD_OFFSET) {
24 _stext = .;
25 __init_begin = .;
26 _sinittext = .;
27 *(.text.reset)
28 *(.init.text)
29 _einittext = .;
30 . = ALIGN(4);
31 __tagtable_begin = .;
32 *(.taglist)
33 __tagtable_end = .;
34 *(.init.data)
35 . = ALIGN(16);
36 __setup_start = .;
37 *(.init.setup)
38 __setup_end = .;
39 . = ALIGN(4);
40 __initcall_start = .;
41 *(.initcall1.init)
42 *(.initcall2.init)
43 *(.initcall3.init)
44 *(.initcall4.init)
45 *(.initcall5.init)
46 *(.initcall6.init)
47 *(.initcall7.init)
48 __initcall_end = .;
49 __con_initcall_start = .;
50 *(.con_initcall.init)
51 __con_initcall_end = .;
52 __security_initcall_start = .;
53 *(.security_initcall.init)
54 __security_initcall_end = .;
55 . = ALIGN(32);
56 __initramfs_start = .;
57 *(.init.ramfs)
58 __initramfs_end = .;
59 . = ALIGN(4096);
60 __init_end = .;
61 }
62
63 . = ALIGN(8192);
64 .text : AT(ADDR(.text) - LOAD_OFFSET) {
65 _evba = .;
66 _text = .;
67 *(.ex.text)
68 . = 0x50;
69 *(.tlbx.ex.text)
70 . = 0x60;
71 *(.tlbr.ex.text)
72 . = 0x70;
73 *(.tlbw.ex.text)
74 . = 0x100;
75 *(.scall.text)
76 *(.irq.text)
77 *(.text)
78 SCHED_TEXT
79 LOCK_TEXT
80 KPROBES_TEXT
81 *(.fixup)
82 *(.gnu.warning)
83 _etext = .;
84 } = 0xd703d703
85
86 . = ALIGN(4);
87 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
88 __start___ex_table = .;
89 *(__ex_table)
90 __stop___ex_table = .;
91 }
92
93 RODATA
94
95 . = ALIGN(8192);
96
97 .data : AT(ADDR(.data) - LOAD_OFFSET) {
98 _data = .;
99 _sdata = .;
100 /*
101 * First, the init task union, aligned to an 8K boundary.
102 */
103 *(.data.init_task)
104
105 /* Then, the cacheline aligned data */
106 . = ALIGN(32);
107 *(.data.cacheline_aligned)
108
109 /* And the rest... */
110 *(.data.rel*)
111 *(.data)
112 CONSTRUCTORS
113
114 _edata = .;
115 }
116
117
118 . = ALIGN(8);
119 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
120 __bss_start = .;
121 *(.bss)
122 *(COMMON)
123 . = ALIGN(8);
124 __bss_stop = .;
125 _end = .;
126 }
127
128 /* When something in the kernel is NOT compiled as a module, the module
129 * cleanup code and data are put into these segments. Both can then be
130 * thrown away, as cleanup code is never called unless it's a module.
131 */
132 /DISCARD/ : {
133 *(.exit.text)
134 *(.exit.data)
135 *(.exitcall.exit)
136 }
137
138 DWARF_DEBUG
139}
diff --git a/arch/avr32/lib/Makefile b/arch/avr32/lib/Makefile
new file mode 100644
index 000000000000..09ac43e40522
--- /dev/null
+++ b/arch/avr32/lib/Makefile
@@ -0,0 +1,10 @@
1#
2# Makefile for AVR32-specific library files
3#
4
5lib-y := copy_user.o clear_user.o
6lib-y += strncpy_from_user.o strnlen_user.o
7lib-y += delay.o memset.o memcpy.o findbit.o
8lib-y += csum_partial.o csum_partial_copy_generic.o
9lib-y += io-readsw.o io-readsl.o io-writesw.o io-writesl.o
10lib-y += __avr32_lsl64.o __avr32_lsr64.o __avr32_asr64.o
diff --git a/arch/avr32/lib/__avr32_asr64.S b/arch/avr32/lib/__avr32_asr64.S
new file mode 100644
index 000000000000..368b6bca4c76
--- /dev/null
+++ b/arch/avr32/lib/__avr32_asr64.S
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) 2005-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9 /*
10 * DWtype __avr32_asr64(DWtype u, word_type b)
11 */
12 .text
13 .global __avr32_asr64
14 .type __avr32_asr64,@function
15__avr32_asr64:
16 cp.w r12, 0
17 reteq r12
18
19 rsub r9, r12, 32
20 brle 1f
21
22 lsl r8, r11, r9
23 lsr r10, r10, r12
24 asr r11, r11, r12
25 or r10, r8
26 retal r12
27
281: neg r9
29 asr r10, r11, r9
30 asr r11, 31
31 retal r12
diff --git a/arch/avr32/lib/__avr32_lsl64.S b/arch/avr32/lib/__avr32_lsl64.S
new file mode 100644
index 000000000000..f1dbc2b36257
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsl64.S
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) 2005-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9 /*
10 * DWtype __avr32_lsl64(DWtype u, word_type b)
11 */
12 .text
13 .global __avr32_lsl64
14 .type __avr32_lsl64,@function
15__avr32_lsl64:
16 cp.w r12, 0
17 reteq r12
18
19 rsub r9, r12, 32
20 brle 1f
21
22 lsr r8, r10, r9
23 lsl r10, r10, r12
24 lsl r11, r11, r12
25 or r11, r8
26 retal r12
27
281: neg r9
29 lsl r11, r10, r9
30 mov r10, 0
31 retal r12
diff --git a/arch/avr32/lib/__avr32_lsr64.S b/arch/avr32/lib/__avr32_lsr64.S
new file mode 100644
index 000000000000..e65bb7f0d24c
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsr64.S
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) 2005-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9 /*
10 * DWtype __avr32_lsr64(DWtype u, word_type b)
11 */
12 .text
13 .global __avr32_lsr64
14 .type __avr32_lsr64,@function
15__avr32_lsr64:
16 cp.w r12, 0
17 reteq r12
18
19 rsub r9, r12, 32
20 brle 1f
21
22 lsl r8, r11, r9
23 lsr r11, r11, r12
24 lsr r10, r10, r12
25 or r10, r8
26 retal r12
27
281: neg r9
29 lsr r10, r11, r9
30 mov r11, 0
31 retal r12
diff --git a/arch/avr32/lib/clear_user.S b/arch/avr32/lib/clear_user.S
new file mode 100644
index 000000000000..d8991b6f8eb7
--- /dev/null
+++ b/arch/avr32/lib/clear_user.S
@@ -0,0 +1,76 @@
1/*
2 * Copyright 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <asm/page.h>
9#include <asm/thread_info.h>
10#include <asm/asm.h>
11
12 .text
13 .align 1
14 .global clear_user
15 .type clear_user, "function"
16clear_user:
17 branch_if_kernel r8, __clear_user
18 ret_if_privileged r8, r12, r11, r11
19
20 .global __clear_user
21 .type __clear_user, "function"
22__clear_user:
23 mov r9, r12
24 mov r8, 0
25 andl r9, 3, COH
26 brne 5f
27
281: sub r11, 4
29 brlt 2f
30
3110: st.w r12++, r8
32 sub r11, 4
33 brge 10b
34
352: sub r11, -4
36 reteq 0
37
38 /* Unaligned count or address */
39 bld r11, 1
40 brcc 12f
4111: st.h r12++, r8
42 sub r11, 2
43 reteq 0
4412: st.b r12++, r8
45 retal 0
46
47 /* Unaligned address */
485: cp.w r11, 4
49 brlt 2b
50
51 lsl r9, 2
52 add pc, pc, r9
5313: st.b r12++, r8
54 sub r11, 1
5514: st.b r12++, r8
56 sub r11, 1
5715: st.b r12++, r8
58 sub r11, 1
59 rjmp 1b
60
61 .size clear_user, . - clear_user
62 .size __clear_user, . - __clear_user
63
64 .section .fixup, "ax"
65 .align 1
6618: sub r11, -4
6719: retal r11
68
69 .section __ex_table, "a"
70 .align 2
71 .long 10b, 18b
72 .long 11b, 19b
73 .long 12b, 19b
74 .long 13b, 19b
75 .long 14b, 19b
76 .long 15b, 19b
diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S
new file mode 100644
index 000000000000..ea59c04b07de
--- /dev/null
+++ b/arch/avr32/lib/copy_user.S
@@ -0,0 +1,119 @@
1/*
2 * Copy to/from userspace with optional address space checking.
3 *
4 * Copyright 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <asm/page.h>
11#include <asm/thread_info.h>
12#include <asm/asm.h>
13
14 /*
15 * __kernel_size_t
16 * __copy_user(void *to, const void *from, __kernel_size_t n)
17 *
18 * Returns the number of bytes not copied. Might be off by
19 * max 3 bytes if we get a fault in the main loop.
20 *
21 * The address-space checking functions simply fall through to
22 * the non-checking version.
23 */
24 .text
25 .align 1
26 .global copy_from_user
27 .type copy_from_user, @function
28copy_from_user:
29 branch_if_kernel r8, __copy_user
30 ret_if_privileged r8, r11, r10, r10
31 rjmp __copy_user
32 .size copy_from_user, . - copy_from_user
33
34 .global copy_to_user
35 .type copy_to_user, @function
36copy_to_user:
37 branch_if_kernel r8, __copy_user
38 ret_if_privileged r8, r12, r10, r10
39 .size copy_to_user, . - copy_to_user
40
41 .global __copy_user
42 .type __copy_user, @function
43__copy_user:
44 mov r9, r11
45 andl r9, 3, COH
46 brne 6f
47
48 /* At this point, from is word-aligned */
491: sub r10, 4
50 brlt 3f
51
522:
5310: ld.w r8, r11++
5411: st.w r12++, r8
55 sub r10, 4
56 brge 2b
57
583: sub r10, -4
59 reteq 0
60
61 /*
62 * Handle unaligned count. Need to be careful with r10 here so
63 * that we return the correct value even if we get a fault
64 */
654:
6620: ld.ub r8, r11++
6721: st.b r12++, r8
68 sub r10, 1
69 reteq 0
7022: ld.ub r8, r11++
7123: st.b r12++, r8
72 sub r10, 1
73 reteq 0
7424: ld.ub r8, r11++
7525: st.b r12++, r8
76 retal 0
77
78 /* Handle unaligned from-pointer */
796: cp.w r10, 4
80 brlt 4b
81 rsub r9, r9, 4
82
8330: ld.ub r8, r11++
8431: st.b r12++, r8
85 sub r10, 1
86 sub r9, 1
87 breq 1b
8832: ld.ub r8, r11++
8933: st.b r12++, r8
90 sub r10, 1
91 sub r9, 1
92 breq 1b
9334: ld.ub r8, r11++
9435: st.b r12++, r8
95 sub r10, 1
96 rjmp 1b
97 .size __copy_user, . - __copy_user
98
99 .section .fixup,"ax"
100 .align 1
10119: sub r10, -4
10229: retal r10
103
104 .section __ex_table,"a"
105 .align 2
106 .long 10b, 19b
107 .long 11b, 19b
108 .long 20b, 29b
109 .long 21b, 29b
110 .long 22b, 29b
111 .long 23b, 29b
112 .long 24b, 29b
113 .long 25b, 29b
114 .long 30b, 29b
115 .long 31b, 29b
116 .long 32b, 29b
117 .long 33b, 29b
118 .long 34b, 29b
119 .long 35b, 29b
diff --git a/arch/avr32/lib/csum_partial.S b/arch/avr32/lib/csum_partial.S
new file mode 100644
index 000000000000..6a262b528eb7
--- /dev/null
+++ b/arch/avr32/lib/csum_partial.S
@@ -0,0 +1,47 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9 /*
10 * unsigned int csum_partial(const unsigned char *buff,
11 * int len, unsigned int sum)
12 */
13 .text
14 .global csum_partial
15 .type csum_partial,"function"
16 .align 1
17csum_partial:
18 /* checksum complete words, aligned or not */
193: sub r11, 4
20 brlt 5f
214: ld.w r9, r12++
22 add r10, r9
23 acr r10
24 sub r11, 4
25 brge 4b
26
27 /* return if we had a whole number of words */
285: sub r11, -4
29 reteq r10
30
31 /* checksum any remaining bytes at the end */
32 mov r9, 0
33 mov r8, 0
34 cp r11, 2
35 brlt 6f
36 ld.uh r9, r12++
37 sub r11, 2
38 breq 7f
39 lsl r9, 16
406: ld.ub r8, r12++
41 lsl r8, 8
427: or r9, r8
43 add r10, r9
44 acr r10
45
46 retal r10
47 .size csum_partial, . - csum_partial
diff --git a/arch/avr32/lib/csum_partial_copy_generic.S b/arch/avr32/lib/csum_partial_copy_generic.S
new file mode 100644
index 000000000000..a3a0f9b8929c
--- /dev/null
+++ b/arch/avr32/lib/csum_partial_copy_generic.S
@@ -0,0 +1,99 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <asm/errno.h>
9#include <asm/asm.h>
10
11 /*
12 * unsigned int csum_partial_copy_generic(const char *src, char *dst, int len
13 * int sum, int *src_err_ptr,
14 * int *dst_err_ptr)
15 *
16 * Copy src to dst while checksumming, otherwise like csum_partial.
17 */
18
19 .macro ld_src size, reg, ptr
209999: ld.\size \reg, \ptr
21 .section __ex_table, "a"
22 .long 9999b, fixup_ld_src
23 .previous
24 .endm
25
26 .macro st_dst size, ptr, reg
279999: st.\size \ptr, \reg
28 .section __ex_table, "a"
29 .long 9999b, fixup_st_dst
30 .previous
31 .endm
32
33 .text
34 .global csum_partial_copy_generic
35 .type csum_partial_copy_generic,"function"
36 .align 1
37csum_partial_copy_generic:
38 pushm r4-r7,lr
39
40 /* The inner loop */
411: sub r10, 4
42 brlt 5f
432: ld_src w, r5, r12++
44 st_dst w, r11++, r5
45 add r9, r5
46 acr r9
47 sub r10, 4
48 brge 2b
49
50 /* return if we had a whole number of words */
515: sub r10, -4
52 brne 7f
53
546: mov r12, r9
55 popm r4-r7,pc
56
57 /* handle additional bytes at the tail */
587: mov r5, 0
59 mov r4, 32
608: ld_src ub, r6, r12++
61 st_dst b, r11++, r6
62 lsl r5, 8
63 sub r4, 8
64 bfins r5, r6, 0, 8
65 sub r10, 1
66 brne 8b
67
68 lsl r5, r5, r4
69 add r9, r5
70 acr r9
71 rjmp 6b
72
73 /* Exception handler */
74 .section .fixup,"ax"
75 .align 1
76fixup_ld_src:
77 mov r9, -EFAULT
78 cp.w r8, 0
79 breq 1f
80 st.w r8[0], r9
81
821: /*
83 * TODO: zero the complete destination - computing the rest
84 * is too much work
85 */
86
87 mov r9, 0
88 rjmp 6b
89
90fixup_st_dst:
91 mov r9, -EFAULT
92 lddsp r8, sp[20]
93 cp.w r8, 0
94 breq 1f
95 st.w r8[0], r9
961: mov r9, 0
97 rjmp 6b
98
99 .previous
diff --git a/arch/avr32/lib/delay.c b/arch/avr32/lib/delay.c
new file mode 100644
index 000000000000..462c8307b680
--- /dev/null
+++ b/arch/avr32/lib/delay.c
@@ -0,0 +1,55 @@
1/*
2 * Precise Delay Loops for avr32
3 *
4 * Copyright (C) 1993 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 * Copyright (C) 2005-2006 Atmel Corporation
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/delay.h>
14#include <linux/module.h>
15#include <linux/types.h>
16
17#include <asm/delay.h>
18#include <asm/processor.h>
19#include <asm/sysreg.h>
20
21int read_current_timer(unsigned long *timer_value)
22{
23 *timer_value = sysreg_read(COUNT);
24 return 0;
25}
26
27void __delay(unsigned long loops)
28{
29 unsigned bclock, now;
30
31 bclock = sysreg_read(COUNT);
32 do {
33 now = sysreg_read(COUNT);
34 } while ((now - bclock) < loops);
35}
36
37inline void __const_udelay(unsigned long xloops)
38{
39 unsigned long long loops;
40
41 asm("mulu.d %0, %1, %2"
42 : "=r"(loops)
43 : "r"(current_cpu_data.loops_per_jiffy * HZ), "r"(xloops));
44 __delay(loops >> 32);
45}
46
47void __udelay(unsigned long usecs)
48{
49 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
50}
51
52void __ndelay(unsigned long nsecs)
53{
54 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
55}
diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S
new file mode 100644
index 000000000000..2b4856f4bf7c
--- /dev/null
+++ b/arch/avr32/lib/findbit.S
@@ -0,0 +1,154 @@
1/*
2 * Copyright (C) 2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/linkage.h>
9
10 .text
11 /*
12 * unsigned long find_first_zero_bit(const unsigned long *addr,
13 * unsigned long size)
14 */
15ENTRY(find_first_zero_bit)
16 cp.w r11, 0
17 reteq r11
18 mov r9, r11
191: ld.w r8, r12[0]
20 com r8
21 brne .L_found
22 sub r12, -4
23 sub r9, 32
24 brgt 1b
25 retal r11
26
27 /*
28 * unsigned long find_next_zero_bit(const unsigned long *addr,
29 * unsigned long size,
30 * unsigned long offset)
31 */
32ENTRY(find_next_zero_bit)
33 lsr r8, r10, 5
34 sub r9, r11, r10
35 retle r11
36
37 lsl r8, 2
38 add r12, r8
39 andl r10, 31, COH
40 breq 1f
41
42 /* offset is not word-aligned. Handle the first (32 - r10) bits */
43 ld.w r8, r12[0]
44 com r8
45 sub r12, -4
46 lsr r8, r8, r10
47 brne .L_found
48
49 /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
50 add r9, r10
51 sub r9, 32
52 retle r11
53
54 /* Main loop. offset must be word-aligned */
551: ld.w r8, r12[0]
56 com r8
57 brne .L_found
58 sub r12, -4
59 sub r9, 32
60 brgt 1b
61 retal r11
62
63 /* Common return path for when a bit is actually found. */
64.L_found:
65 brev r8
66 clz r10, r8
67 rsub r9, r11
68 add r10, r9
69
70 /* XXX: If we don't have to return exactly "size" when the bit
71 is not found, we may drop this "min" thing */
72 min r12, r11, r10
73 retal r12
74
75 /*
76 * unsigned long find_first_bit(const unsigned long *addr,
77 * unsigned long size)
78 */
79ENTRY(find_first_bit)
80 cp.w r11, 0
81 reteq r11
82 mov r9, r11
831: ld.w r8, r12[0]
84 cp.w r8, 0
85 brne .L_found
86 sub r12, -4
87 sub r9, 32
88 brgt 1b
89 retal r11
90
91 /*
92 * unsigned long find_next_bit(const unsigned long *addr,
93 * unsigned long size,
94 * unsigned long offset)
95 */
96ENTRY(find_next_bit)
97 lsr r8, r10, 5
98 sub r9, r11, r10
99 retle r11
100
101 lsl r8, 2
102 add r12, r8
103 andl r10, 31, COH
104 breq 1f
105
106 /* offset is not word-aligned. Handle the first (32 - r10) bits */
107 ld.w r8, r12[0]
108 sub r12, -4
109 lsr r8, r8, r10
110 brne .L_found
111
112 /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
113 add r9, r10
114 sub r9, 32
115 retle r11
116
117 /* Main loop. offset must be word-aligned */
1181: ld.w r8, r12[0]
119 cp.w r8, 0
120 brne .L_found
121 sub r12, -4
122 sub r9, 32
123 brgt 1b
124 retal r11
125
126ENTRY(generic_find_next_zero_le_bit)
127 lsr r8, r10, 5
128 sub r9, r11, r10
129 retle r11
130
131 lsl r8, 2
132 add r12, r8
133 andl r10, 31, COH
134 breq 1f
135
136 /* offset is not word-aligned. Handle the first (32 - r10) bits */
137 ldswp.w r8, r12[0]
138 sub r12, -4
139 lsr r8, r8, r10
140 brne .L_found
141
142 /* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
143 add r9, r10
144 sub r9, 32
145 retle r11
146
147 /* Main loop. offset must be word-aligned */
1481: ldswp.w r8, r12[0]
149 cp.w r8, 0
150 brne .L_found
151 sub r12, -4
152 sub r9, 32
153 brgt 1b
154 retal r11
diff --git a/arch/avr32/lib/io-readsl.S b/arch/avr32/lib/io-readsl.S
new file mode 100644
index 000000000000..b103511ed6c4
--- /dev/null
+++ b/arch/avr32/lib/io-readsl.S
@@ -0,0 +1,24 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9 .global __raw_readsl
10 .type __raw_readsl,@function
11__raw_readsl:
12 cp.w r10, 0
13 reteq r12
14
15 /*
16 * If r11 isn't properly aligned, we might get an exception on
17 * some implementations. But there's not much we can do about it.
18 */
191: ld.w r8, r12[0]
20 sub r10, 1
21 st.w r11++, r8
22 brne 1b
23
24 retal r12
diff --git a/arch/avr32/lib/io-readsw.S b/arch/avr32/lib/io-readsw.S
new file mode 100644
index 000000000000..456be9909027
--- /dev/null
+++ b/arch/avr32/lib/io-readsw.S
@@ -0,0 +1,43 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9.Lnot_word_aligned:
10 /*
11 * Bad alignment will cause a hardware exception, which is as
12 * good as anything. No need for us to check for proper alignment.
13 */
14 ld.uh r8, r12[0]
15 sub r10, 1
16 st.h r11++, r8
17
18 /* fall through */
19
20 .global __raw_readsw
21 .type __raw_readsw,@function
22__raw_readsw:
23 cp.w r10, 0
24 reteq r12
25 mov r9, 3
26 tst r11, r9
27 brne .Lnot_word_aligned
28
29 sub r10, 2
30 brlt 2f
31
321: ldins.h r8:t, r12[0]
33 ldins.h r8:b, r12[0]
34 st.w r11++, r8
35 sub r10, 2
36 brge 1b
37
382: sub r10, -2
39 reteq r12
40
41 ld.uh r8, r12[0]
42 st.h r11++, r8
43 retal r12
diff --git a/arch/avr32/lib/io-writesl.S b/arch/avr32/lib/io-writesl.S
new file mode 100644
index 000000000000..22138b3a16e5
--- /dev/null
+++ b/arch/avr32/lib/io-writesl.S
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9 .global __raw_writesl
10 .type __raw_writesl,@function
11__raw_writesl:
12 cp.w r10, 0
13 reteq r12
14
151: ld.w r8, r11++
16 sub r10, 1
17 st.w r12[0], r8
18 brne 1b
19
20 retal r12
diff --git a/arch/avr32/lib/io-writesw.S b/arch/avr32/lib/io-writesw.S
new file mode 100644
index 000000000000..8c4a53f1c52a
--- /dev/null
+++ b/arch/avr32/lib/io-writesw.S
@@ -0,0 +1,38 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9.Lnot_word_aligned:
10 ld.uh r8, r11++
11 sub r10, 1
12 st.h r12[0], r8
13
14 .global __raw_writesw
15 .type __raw_writesw,@function
16__raw_writesw:
17 cp.w r10, 0
18 mov r9, 3
19 reteq r12
20 tst r11, r9
21 brne .Lnot_word_aligned
22
23 sub r10, 2
24 brlt 2f
25
261: ld.w r8, r11++
27 bfextu r9, r8, 16, 16
28 st.h r12[0], r9
29 st.h r12[0], r8
30 sub r10, 2
31 brge 1b
32
332: sub r10, -2
34 reteq r12
35
36 ld.uh r8, r11++
37 st.h r12[0], r8
38 retal r12
diff --git a/arch/avr32/lib/libgcc.h b/arch/avr32/lib/libgcc.h
new file mode 100644
index 000000000000..5a091b5e3618
--- /dev/null
+++ b/arch/avr32/lib/libgcc.h
@@ -0,0 +1,33 @@
1/* Definitions for various functions 'borrowed' from gcc-3.4.3 */
2
3#define BITS_PER_UNIT 8
4
5typedef int QItype __attribute__ ((mode (QI)));
6typedef unsigned int UQItype __attribute__ ((mode (QI)));
7typedef int HItype __attribute__ ((mode (HI)));
8typedef unsigned int UHItype __attribute__ ((mode (HI)));
9typedef int SItype __attribute__ ((mode (SI)));
10typedef unsigned int USItype __attribute__ ((mode (SI)));
11typedef int DItype __attribute__ ((mode (DI)));
12typedef unsigned int UDItype __attribute__ ((mode (DI)));
13typedef float SFtype __attribute__ ((mode (SF)));
14typedef float DFtype __attribute__ ((mode (DF)));
15typedef int word_type __attribute__ ((mode (__word__)));
16
17#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
18#define Wtype SItype
19#define UWtype USItype
20#define HWtype SItype
21#define UHWtype USItype
22#define DWtype DItype
23#define UDWtype UDItype
24#define __NW(a,b) __ ## a ## si ## b
25#define __NDW(a,b) __ ## a ## di ## b
26
27struct DWstruct {Wtype high, low;};
28
29typedef union
30{
31 struct DWstruct s;
32 DWtype ll;
33} DWunion;
diff --git a/arch/avr32/lib/longlong.h b/arch/avr32/lib/longlong.h
new file mode 100644
index 000000000000..cd5e369ac437
--- /dev/null
+++ b/arch/avr32/lib/longlong.h
@@ -0,0 +1,98 @@
1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2 Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
4
5 This definition file is free software; you can redistribute it
6 and/or modify it under the terms of the GNU General Public
7 License as published by the Free Software Foundation; either
8 version 2, or (at your option) any later version.
9
10 This definition file is distributed in the hope that it will be
11 useful, but WITHOUT ANY WARRANTY; without even the implied
12 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 See the GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20/* Borrowed from gcc-3.4.3 */
21
22#define __BITS4 (W_TYPE_SIZE / 4)
23#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
24#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
25#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
26
27#define count_leading_zeros(count, x) ((count) = __builtin_clz(x))
28
29#define __udiv_qrnnd_c(q, r, n1, n0, d) \
30 do { \
31 UWtype __d1, __d0, __q1, __q0; \
32 UWtype __r1, __r0, __m; \
33 __d1 = __ll_highpart (d); \
34 __d0 = __ll_lowpart (d); \
35 \
36 __r1 = (n1) % __d1; \
37 __q1 = (n1) / __d1; \
38 __m = (UWtype) __q1 * __d0; \
39 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
40 if (__r1 < __m) \
41 { \
42 __q1--, __r1 += (d); \
43 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
44 if (__r1 < __m) \
45 __q1--, __r1 += (d); \
46 } \
47 __r1 -= __m; \
48 \
49 __r0 = __r1 % __d1; \
50 __q0 = __r1 / __d1; \
51 __m = (UWtype) __q0 * __d0; \
52 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
53 if (__r0 < __m) \
54 { \
55 __q0--, __r0 += (d); \
56 if (__r0 >= (d)) \
57 if (__r0 < __m) \
58 __q0--, __r0 += (d); \
59 } \
60 __r0 -= __m; \
61 \
62 (q) = (UWtype) __q1 * __ll_B | __q0; \
63 (r) = __r0; \
64 } while (0)
65
66#define udiv_qrnnd __udiv_qrnnd_c
67
68#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
69 do { \
70 UWtype __x; \
71 __x = (al) - (bl); \
72 (sh) = (ah) - (bh) - (__x > (al)); \
73 (sl) = __x; \
74 } while (0)
75
76#define umul_ppmm(w1, w0, u, v) \
77 do { \
78 UWtype __x0, __x1, __x2, __x3; \
79 UHWtype __ul, __vl, __uh, __vh; \
80 \
81 __ul = __ll_lowpart (u); \
82 __uh = __ll_highpart (u); \
83 __vl = __ll_lowpart (v); \
84 __vh = __ll_highpart (v); \
85 \
86 __x0 = (UWtype) __ul * __vl; \
87 __x1 = (UWtype) __ul * __vh; \
88 __x2 = (UWtype) __uh * __vl; \
89 __x3 = (UWtype) __uh * __vh; \
90 \
91 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
92 __x1 += __x2; /* but this indeed can */ \
93 if (__x1 < __x2) /* did we get it? */ \
94 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
95 \
96 (w1) = __x3 + __ll_highpart (__x1); \
97 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
98 } while (0)
diff --git a/arch/avr32/lib/memcpy.S b/arch/avr32/lib/memcpy.S
new file mode 100644
index 000000000000..0abb26142b64
--- /dev/null
+++ b/arch/avr32/lib/memcpy.S
@@ -0,0 +1,62 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9 /*
10 * void *memcpy(void *to, const void *from, unsigned long n)
11 *
12 * This implementation does word-aligned loads in the main loop,
13 * possibly sacrificing alignment of stores.
14 *
15 * Hopefully, in most cases, both "to" and "from" will be
16 * word-aligned to begin with.
17 */
18 .text
19 .global memcpy
20 .type memcpy, @function
21memcpy:
22 mov r9, r11
23 andl r9, 3, COH
24 brne 1f
25
26 /* At this point, "from" is word-aligned */
272: sub r10, 4
28 mov r9, r12
29 brlt 4f
30
313: ld.w r8, r11++
32 sub r10, 4
33 st.w r12++, r8
34 brge 3b
35
364: neg r10
37 reteq r9
38
39 /* Handle unaligned count */
40 lsl r10, 2
41 add pc, pc, r10
42 ld.ub r8, r11++
43 st.b r12++, r8
44 ld.ub r8, r11++
45 st.b r12++, r8
46 ld.ub r8, r11++
47 st.b r12++, r8
48 retal r9
49
50 /* Handle unaligned "from" pointer */
511: sub r10, 4
52 brlt 4b
53 add r10, r9
54 lsl r9, 2
55 add pc, pc, r9
56 ld.ub r8, r11++
57 st.b r12++, r8
58 ld.ub r8, r11++
59 st.b r12++, r8
60 ld.ub r8, r11++
61 st.b r12++, r8
62 rjmp 2b
diff --git a/arch/avr32/lib/memset.S b/arch/avr32/lib/memset.S
new file mode 100644
index 000000000000..40da32c0480c
--- /dev/null
+++ b/arch/avr32/lib/memset.S
@@ -0,0 +1,72 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * Based on linux/arch/arm/lib/memset.S
5 * Copyright (C) 1995-2000 Russell King
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * ASM optimised string functions
12 */
13#include <asm/asm.h>
14
15 /*
16 * r12: void *b
17 * r11: int c
18 * r10: size_t len
19 *
20 * Returns b in r12
21 */
22 .text
23 .global memset
24 .type memset, @function
25 .align 5
26memset:
27 mov r9, r12
28 mov r8, r12
29 or r11, r11, r11 << 8
30 andl r9, 3, COH
31 brne 1f
32
332: or r11, r11, r11 << 16
34 sub r10, 4
35 brlt 5f
36
37 /* Let's do some real work */
384: st.w r8++, r11
39 sub r10, 4
40 brge 4b
41
42 /*
43 * When we get here, we've got less than 4 bytes to set. r10
44 * might be negative.
45 */
465: sub r10, -4
47 reteq r12
48
49 /* Fastpath ends here, exactly 32 bytes from memset */
50
51 /* Handle unaligned count or pointer */
52 bld r10, 1
53 brcc 6f
54 st.b r8++, r11
55 st.b r8++, r11
56 bld r10, 0
57 retcc r12
586: st.b r8++, r11
59 retal r12
60
61 /* Handle unaligned pointer */
621: sub r10, 4
63 brlt 5b
64 add r10, r9
65 lsl r9, 1
66 add pc, r9
67 st.b r8++, r11
68 st.b r8++, r11
69 st.b r8++, r11
70 rjmp 2b
71
72 .size memset, . - memset
diff --git a/arch/avr32/lib/strncpy_from_user.S b/arch/avr32/lib/strncpy_from_user.S
new file mode 100644
index 000000000000..72bd50599ec6
--- /dev/null
+++ b/arch/avr32/lib/strncpy_from_user.S
@@ -0,0 +1,60 @@
1/*
2 * Copy to/from userspace with optional address space checking.
3 *
4 * Copyright 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/errno.h>
11
12#include <asm/page.h>
13#include <asm/thread_info.h>
14#include <asm/asm.h>
15
16 /*
17 * long strncpy_from_user(char *dst, const char *src, long count)
18 *
19 * On success, returns the length of the string, not including
20 * the terminating NUL.
21 *
22 * If the string is longer than count, returns count
23 *
24 * If userspace access fails, returns -EFAULT
25 */
26 .text
27 .align 1
28 .global strncpy_from_user
29 .type strncpy_from_user, "function"
30strncpy_from_user:
31 mov r9, -EFAULT
32 branch_if_kernel r8, __strncpy_from_user
33 ret_if_privileged r8, r11, r10, r9
34
35 .global __strncpy_from_user
36 .type __strncpy_from_user, "function"
37__strncpy_from_user:
38 cp.w r10, 0
39 reteq 0
40
41 mov r9, r10
42
431: ld.ub r8, r11++
44 st.b r12++, r8
45 cp.w r8, 0
46 breq 2f
47 sub r9, 1
48 brne 1b
49
502: sub r10, r9
51 retal r10
52
53 .section .fixup, "ax"
54 .align 1
553: mov r12, -EFAULT
56 retal r12
57
58 .section __ex_table, "a"
59 .align 2
60 .long 1b, 3b
diff --git a/arch/avr32/lib/strnlen_user.S b/arch/avr32/lib/strnlen_user.S
new file mode 100644
index 000000000000..65ce11afa66a
--- /dev/null
+++ b/arch/avr32/lib/strnlen_user.S
@@ -0,0 +1,67 @@
1/*
2 * Copy to/from userspace with optional address space checking.
3 *
4 * Copyright 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <asm/page.h>
11#include <asm/thread_info.h>
12#include <asm/processor.h>
13#include <asm/asm.h>
14
15 .text
16 .align 1
17 .global strnlen_user
18 .type strnlen_user, "function"
19strnlen_user:
20 branch_if_kernel r8, __strnlen_user
21 sub r8, r11, 1
22 add r8, r12
23 retcs 0
24 brmi adjust_length /* do a closer inspection */
25
26 .global __strnlen_user
27 .type __strnlen_user, "function"
28__strnlen_user:
29 mov r10, r12
30
3110: ld.ub r8, r12++
32 cp.w r8, 0
33 breq 2f
34 sub r11, 1
35 brne 10b
36
37 sub r12, -1
382: sub r12, r10
39 retal r12
40
41
42 .type adjust_length, "function"
43adjust_length:
44 cp.w r12, 0 /* addr must always be < TASK_SIZE */
45 retmi 0
46
47 pushm lr
48 lddpc lr, _task_size
49 sub r11, lr, r12
50 mov r9, r11
51 rcall __strnlen_user
52 cp.w r12, r9
53 brgt 1f
54 popm pc
551: popm pc, r12=0
56
57 .align 2
58_task_size:
59 .long TASK_SIZE
60
61 .section .fixup, "ax"
62 .align 1
6319: retal 0
64
65 .section __ex_table, "a"
66 .align 2
67 .long 10b, 19b
diff --git a/arch/avr32/mach-at32ap/Makefile b/arch/avr32/mach-at32ap/Makefile
new file mode 100644
index 000000000000..f62eb6915510
--- /dev/null
+++ b/arch/avr32/mach-at32ap/Makefile
@@ -0,0 +1,2 @@
1obj-y += at32ap.o clock.o pio.o intc.o extint.o hsmc.o
2obj-$(CONFIG_CPU_AT32AP7000) += at32ap7000.o
diff --git a/arch/avr32/mach-at32ap/at32ap.c b/arch/avr32/mach-at32ap/at32ap.c
new file mode 100644
index 000000000000..f7cedf5aabea
--- /dev/null
+++ b/arch/avr32/mach-at32ap/at32ap.c
@@ -0,0 +1,90 @@
1/*
2 * Copyright (C) 2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/clk.h>
10#include <linux/err.h>
11#include <linux/init.h>
12#include <linux/platform_device.h>
13
14#include <asm/io.h>
15
16#include <asm/arch/init.h>
17#include <asm/arch/sm.h>
18
19struct at32_sm system_manager;
20
21static int __init at32_sm_init(void)
22{
23 struct resource *regs;
24 struct at32_sm *sm = &system_manager;
25 int ret = -ENXIO;
26
27 regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0);
28 if (!regs)
29 goto fail;
30
31 spin_lock_init(&sm->lock);
32 sm->pdev = &at32_sm_device;
33
34 ret = -ENOMEM;
35 sm->regs = ioremap(regs->start, regs->end - regs->start + 1);
36 if (!sm->regs)
37 goto fail;
38
39 return 0;
40
41fail:
42 printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret);
43 return ret;
44}
45
46void __init setup_platform(void)
47{
48 at32_sm_init();
49 at32_clock_init();
50 at32_portmux_init();
51
52 /* FIXME: This doesn't belong here */
53 at32_setup_serial_console(1);
54}
55
56static int __init pdc_probe(struct platform_device *pdev)
57{
58 struct clk *pclk, *hclk;
59
60 pclk = clk_get(&pdev->dev, "pclk");
61 if (IS_ERR(pclk)) {
62 dev_err(&pdev->dev, "no pclk defined\n");
63 return PTR_ERR(pclk);
64 }
65 hclk = clk_get(&pdev->dev, "hclk");
66 if (IS_ERR(hclk)) {
67 dev_err(&pdev->dev, "no hclk defined\n");
68 clk_put(pclk);
69 return PTR_ERR(hclk);
70 }
71
72 clk_enable(pclk);
73 clk_enable(hclk);
74
75 dev_info(&pdev->dev, "Atmel Peripheral DMA Controller enabled\n");
76 return 0;
77}
78
79static struct platform_driver pdc_driver = {
80 .probe = pdc_probe,
81 .driver = {
82 .name = "pdc",
83 },
84};
85
86static int __init pdc_init(void)
87{
88 return platform_driver_register(&pdc_driver);
89}
90arch_initcall(pdc_init);
diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c
new file mode 100644
index 000000000000..37982b60398e
--- /dev/null
+++ b/arch/avr32/mach-at32ap/at32ap7000.c
@@ -0,0 +1,876 @@
1/*
2 * Copyright (C) 2005-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/clk.h>
9#include <linux/init.h>
10#include <linux/platform_device.h>
11
12#include <asm/io.h>
13
14#include <asm/arch/board.h>
15#include <asm/arch/portmux.h>
16#include <asm/arch/sm.h>
17
18#include "clock.h"
19#include "pio.h"
20#include "sm.h"
21
22#define PBMEM(base) \
23 { \
24 .start = base, \
25 .end = base + 0x3ff, \
26 .flags = IORESOURCE_MEM, \
27 }
28#define IRQ(num) \
29 { \
30 .start = num, \
31 .end = num, \
32 .flags = IORESOURCE_IRQ, \
33 }
34#define NAMED_IRQ(num, _name) \
35 { \
36 .start = num, \
37 .end = num, \
38 .name = _name, \
39 .flags = IORESOURCE_IRQ, \
40 }
41
42#define DEFINE_DEV(_name, _id) \
43static struct platform_device _name##_id##_device = { \
44 .name = #_name, \
45 .id = _id, \
46 .resource = _name##_id##_resource, \
47 .num_resources = ARRAY_SIZE(_name##_id##_resource), \
48}
49#define DEFINE_DEV_DATA(_name, _id) \
50static struct platform_device _name##_id##_device = { \
51 .name = #_name, \
52 .id = _id, \
53 .dev = { \
54 .platform_data = &_name##_id##_data, \
55 }, \
56 .resource = _name##_id##_resource, \
57 .num_resources = ARRAY_SIZE(_name##_id##_resource), \
58}
59
60#define DEV_CLK(_name, devname, bus, _index) \
61static struct clk devname##_##_name = { \
62 .name = #_name, \
63 .dev = &devname##_device.dev, \
64 .parent = &bus##_clk, \
65 .mode = bus##_clk_mode, \
66 .get_rate = bus##_clk_get_rate, \
67 .index = _index, \
68}
69
70enum {
71 PIOA,
72 PIOB,
73 PIOC,
74 PIOD,
75};
76
77enum {
78 FUNC_A,
79 FUNC_B,
80};
81
82unsigned long at32ap7000_osc_rates[3] = {
83 [0] = 32768,
84 /* FIXME: these are ATSTK1002-specific */
85 [1] = 20000000,
86 [2] = 12000000,
87};
88
89static unsigned long osc_get_rate(struct clk *clk)
90{
91 return at32ap7000_osc_rates[clk->index];
92}
93
94static unsigned long pll_get_rate(struct clk *clk, unsigned long control)
95{
96 unsigned long div, mul, rate;
97
98 if (!(control & SM_BIT(PLLEN)))
99 return 0;
100
101 div = SM_BFEXT(PLLDIV, control) + 1;
102 mul = SM_BFEXT(PLLMUL, control) + 1;
103
104 rate = clk->parent->get_rate(clk->parent);
105 rate = (rate + div / 2) / div;
106 rate *= mul;
107
108 return rate;
109}
110
111static unsigned long pll0_get_rate(struct clk *clk)
112{
113 u32 control;
114
115 control = sm_readl(&system_manager, PM_PLL0);
116
117 return pll_get_rate(clk, control);
118}
119
120static unsigned long pll1_get_rate(struct clk *clk)
121{
122 u32 control;
123
124 control = sm_readl(&system_manager, PM_PLL1);
125
126 return pll_get_rate(clk, control);
127}
128
129/*
130 * The AT32AP7000 has five primary clock sources: One 32kHz
131 * oscillator, two crystal oscillators and two PLLs.
132 */
133static struct clk osc32k = {
134 .name = "osc32k",
135 .get_rate = osc_get_rate,
136 .users = 1,
137 .index = 0,
138};
139static struct clk osc0 = {
140 .name = "osc0",
141 .get_rate = osc_get_rate,
142 .users = 1,
143 .index = 1,
144};
145static struct clk osc1 = {
146 .name = "osc1",
147 .get_rate = osc_get_rate,
148 .index = 2,
149};
150static struct clk pll0 = {
151 .name = "pll0",
152 .get_rate = pll0_get_rate,
153 .parent = &osc0,
154};
155static struct clk pll1 = {
156 .name = "pll1",
157 .get_rate = pll1_get_rate,
158 .parent = &osc0,
159};
160
161/*
162 * The main clock can be either osc0 or pll0. The boot loader may
163 * have chosen one for us, so we don't really know which one until we
164 * have a look at the SM.
165 */
166static struct clk *main_clock;
167
168/*
169 * Synchronous clocks are generated from the main clock. The clocks
170 * must satisfy the constraint
171 * fCPU >= fHSB >= fPB
172 * i.e. each clock must not be faster than its parent.
173 */
174static unsigned long bus_clk_get_rate(struct clk *clk, unsigned int shift)
175{
176 return main_clock->get_rate(main_clock) >> shift;
177};
178
179static void cpu_clk_mode(struct clk *clk, int enabled)
180{
181 struct at32_sm *sm = &system_manager;
182 unsigned long flags;
183 u32 mask;
184
185 spin_lock_irqsave(&sm->lock, flags);
186 mask = sm_readl(sm, PM_CPU_MASK);
187 if (enabled)
188 mask |= 1 << clk->index;
189 else
190 mask &= ~(1 << clk->index);
191 sm_writel(sm, PM_CPU_MASK, mask);
192 spin_unlock_irqrestore(&sm->lock, flags);
193}
194
195static unsigned long cpu_clk_get_rate(struct clk *clk)
196{
197 unsigned long cksel, shift = 0;
198
199 cksel = sm_readl(&system_manager, PM_CKSEL);
200 if (cksel & SM_BIT(CPUDIV))
201 shift = SM_BFEXT(CPUSEL, cksel) + 1;
202
203 return bus_clk_get_rate(clk, shift);
204}
205
206static void hsb_clk_mode(struct clk *clk, int enabled)
207{
208 struct at32_sm *sm = &system_manager;
209 unsigned long flags;
210 u32 mask;
211
212 spin_lock_irqsave(&sm->lock, flags);
213 mask = sm_readl(sm, PM_HSB_MASK);
214 if (enabled)
215 mask |= 1 << clk->index;
216 else
217 mask &= ~(1 << clk->index);
218 sm_writel(sm, PM_HSB_MASK, mask);
219 spin_unlock_irqrestore(&sm->lock, flags);
220}
221
222static unsigned long hsb_clk_get_rate(struct clk *clk)
223{
224 unsigned long cksel, shift = 0;
225
226 cksel = sm_readl(&system_manager, PM_CKSEL);
227 if (cksel & SM_BIT(HSBDIV))
228 shift = SM_BFEXT(HSBSEL, cksel) + 1;
229
230 return bus_clk_get_rate(clk, shift);
231}
232
233static void pba_clk_mode(struct clk *clk, int enabled)
234{
235 struct at32_sm *sm = &system_manager;
236 unsigned long flags;
237 u32 mask;
238
239 spin_lock_irqsave(&sm->lock, flags);
240 mask = sm_readl(sm, PM_PBA_MASK);
241 if (enabled)
242 mask |= 1 << clk->index;
243 else
244 mask &= ~(1 << clk->index);
245 sm_writel(sm, PM_PBA_MASK, mask);
246 spin_unlock_irqrestore(&sm->lock, flags);
247}
248
249static unsigned long pba_clk_get_rate(struct clk *clk)
250{
251 unsigned long cksel, shift = 0;
252
253 cksel = sm_readl(&system_manager, PM_CKSEL);
254 if (cksel & SM_BIT(PBADIV))
255 shift = SM_BFEXT(PBASEL, cksel) + 1;
256
257 return bus_clk_get_rate(clk, shift);
258}
259
260static void pbb_clk_mode(struct clk *clk, int enabled)
261{
262 struct at32_sm *sm = &system_manager;
263 unsigned long flags;
264 u32 mask;
265
266 spin_lock_irqsave(&sm->lock, flags);
267 mask = sm_readl(sm, PM_PBB_MASK);
268 if (enabled)
269 mask |= 1 << clk->index;
270 else
271 mask &= ~(1 << clk->index);
272 sm_writel(sm, PM_PBB_MASK, mask);
273 spin_unlock_irqrestore(&sm->lock, flags);
274}
275
276static unsigned long pbb_clk_get_rate(struct clk *clk)
277{
278 unsigned long cksel, shift = 0;
279
280 cksel = sm_readl(&system_manager, PM_CKSEL);
281 if (cksel & SM_BIT(PBBDIV))
282 shift = SM_BFEXT(PBBSEL, cksel) + 1;
283
284 return bus_clk_get_rate(clk, shift);
285}
286
287static struct clk cpu_clk = {
288 .name = "cpu",
289 .get_rate = cpu_clk_get_rate,
290 .users = 1,
291};
292static struct clk hsb_clk = {
293 .name = "hsb",
294 .parent = &cpu_clk,
295 .get_rate = hsb_clk_get_rate,
296};
297static struct clk pba_clk = {
298 .name = "pba",
299 .parent = &hsb_clk,
300 .mode = hsb_clk_mode,
301 .get_rate = pba_clk_get_rate,
302 .index = 1,
303};
304static struct clk pbb_clk = {
305 .name = "pbb",
306 .parent = &hsb_clk,
307 .mode = hsb_clk_mode,
308 .get_rate = pbb_clk_get_rate,
309 .users = 1,
310 .index = 2,
311};
312
313/* --------------------------------------------------------------------
314 * Generic Clock operations
315 * -------------------------------------------------------------------- */
316
317static void genclk_mode(struct clk *clk, int enabled)
318{
319 u32 control;
320
321 BUG_ON(clk->index > 7);
322
323 control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
324 if (enabled)
325 control |= SM_BIT(CEN);
326 else
327 control &= ~SM_BIT(CEN);
328 sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control);
329}
330
331static unsigned long genclk_get_rate(struct clk *clk)
332{
333 u32 control;
334 unsigned long div = 1;
335
336 BUG_ON(clk->index > 7);
337
338 if (!clk->parent)
339 return 0;
340
341 control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
342 if (control & SM_BIT(DIVEN))
343 div = 2 * (SM_BFEXT(DIV, control) + 1);
344
345 return clk->parent->get_rate(clk->parent) / div;
346}
347
348static long genclk_set_rate(struct clk *clk, unsigned long rate, int apply)
349{
350 u32 control;
351 unsigned long parent_rate, actual_rate, div;
352
353 BUG_ON(clk->index > 7);
354
355 if (!clk->parent)
356 return 0;
357
358 parent_rate = clk->parent->get_rate(clk->parent);
359 control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
360
361 if (rate > 3 * parent_rate / 4) {
362 actual_rate = parent_rate;
363 control &= ~SM_BIT(DIVEN);
364 } else {
365 div = (parent_rate + rate) / (2 * rate) - 1;
366 control = SM_BFINS(DIV, div, control) | SM_BIT(DIVEN);
367 actual_rate = parent_rate / (2 * (div + 1));
368 }
369
370 printk("clk %s: new rate %lu (actual rate %lu)\n",
371 clk->name, rate, actual_rate);
372
373 if (apply)
374 sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index,
375 control);
376
377 return actual_rate;
378}
379
380int genclk_set_parent(struct clk *clk, struct clk *parent)
381{
382 u32 control;
383
384 BUG_ON(clk->index > 7);
385
386 printk("clk %s: new parent %s (was %s)\n",
387 clk->name, parent->name,
388 clk->parent ? clk->parent->name : "(null)");
389
390 control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
391
392 if (parent == &osc1 || parent == &pll1)
393 control |= SM_BIT(OSCSEL);
394 else if (parent == &osc0 || parent == &pll0)
395 control &= ~SM_BIT(OSCSEL);
396 else
397 return -EINVAL;
398
399 if (parent == &pll0 || parent == &pll1)
400 control |= SM_BIT(PLLSEL);
401 else
402 control &= ~SM_BIT(PLLSEL);
403
404 sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control);
405 clk->parent = parent;
406
407 return 0;
408}
409
410/* --------------------------------------------------------------------
411 * System peripherals
412 * -------------------------------------------------------------------- */
413static struct resource sm_resource[] = {
414 PBMEM(0xfff00000),
415 NAMED_IRQ(19, "eim"),
416 NAMED_IRQ(20, "pm"),
417 NAMED_IRQ(21, "rtc"),
418};
419struct platform_device at32_sm_device = {
420 .name = "sm",
421 .id = 0,
422 .resource = sm_resource,
423 .num_resources = ARRAY_SIZE(sm_resource),
424};
425DEV_CLK(pclk, at32_sm, pbb, 0);
426
427static struct resource intc0_resource[] = {
428 PBMEM(0xfff00400),
429};
430struct platform_device at32_intc0_device = {
431 .name = "intc",
432 .id = 0,
433 .resource = intc0_resource,
434 .num_resources = ARRAY_SIZE(intc0_resource),
435};
436DEV_CLK(pclk, at32_intc0, pbb, 1);
437
438static struct clk ebi_clk = {
439 .name = "ebi",
440 .parent = &hsb_clk,
441 .mode = hsb_clk_mode,
442 .get_rate = hsb_clk_get_rate,
443 .users = 1,
444};
445static struct clk hramc_clk = {
446 .name = "hramc",
447 .parent = &hsb_clk,
448 .mode = hsb_clk_mode,
449 .get_rate = hsb_clk_get_rate,
450 .users = 1,
451};
452
453static struct resource smc0_resource[] = {
454 PBMEM(0xfff03400),
455};
456DEFINE_DEV(smc, 0);
457DEV_CLK(pclk, smc0, pbb, 13);
458DEV_CLK(mck, smc0, hsb, 0);
459
460static struct platform_device pdc_device = {
461 .name = "pdc",
462 .id = 0,
463};
464DEV_CLK(hclk, pdc, hsb, 4);
465DEV_CLK(pclk, pdc, pba, 16);
466
467static struct clk pico_clk = {
468 .name = "pico",
469 .parent = &cpu_clk,
470 .mode = cpu_clk_mode,
471 .get_rate = cpu_clk_get_rate,
472 .users = 1,
473};
474
475/* --------------------------------------------------------------------
476 * PIO
477 * -------------------------------------------------------------------- */
478
479static struct resource pio0_resource[] = {
480 PBMEM(0xffe02800),
481 IRQ(13),
482};
483DEFINE_DEV(pio, 0);
484DEV_CLK(mck, pio0, pba, 10);
485
486static struct resource pio1_resource[] = {
487 PBMEM(0xffe02c00),
488 IRQ(14),
489};
490DEFINE_DEV(pio, 1);
491DEV_CLK(mck, pio1, pba, 11);
492
493static struct resource pio2_resource[] = {
494 PBMEM(0xffe03000),
495 IRQ(15),
496};
497DEFINE_DEV(pio, 2);
498DEV_CLK(mck, pio2, pba, 12);
499
500static struct resource pio3_resource[] = {
501 PBMEM(0xffe03400),
502 IRQ(16),
503};
504DEFINE_DEV(pio, 3);
505DEV_CLK(mck, pio3, pba, 13);
506
507void __init at32_add_system_devices(void)
508{
509 system_manager.eim_first_irq = NR_INTERNAL_IRQS;
510
511 platform_device_register(&at32_sm_device);
512 platform_device_register(&at32_intc0_device);
513 platform_device_register(&smc0_device);
514 platform_device_register(&pdc_device);
515
516 platform_device_register(&pio0_device);
517 platform_device_register(&pio1_device);
518 platform_device_register(&pio2_device);
519 platform_device_register(&pio3_device);
520}
521
522/* --------------------------------------------------------------------
523 * USART
524 * -------------------------------------------------------------------- */
525
526static struct resource usart0_resource[] = {
527 PBMEM(0xffe00c00),
528 IRQ(7),
529};
530DEFINE_DEV(usart, 0);
531DEV_CLK(usart, usart0, pba, 4);
532
533static struct resource usart1_resource[] = {
534 PBMEM(0xffe01000),
535 IRQ(7),
536};
537DEFINE_DEV(usart, 1);
538DEV_CLK(usart, usart1, pba, 4);
539
540static struct resource usart2_resource[] = {
541 PBMEM(0xffe01400),
542 IRQ(8),
543};
544DEFINE_DEV(usart, 2);
545DEV_CLK(usart, usart2, pba, 5);
546
547static struct resource usart3_resource[] = {
548 PBMEM(0xffe01800),
549 IRQ(9),
550};
551DEFINE_DEV(usart, 3);
552DEV_CLK(usart, usart3, pba, 6);
553
554static inline void configure_usart0_pins(void)
555{
556 portmux_set_func(PIOA, 8, FUNC_B); /* RXD */
557 portmux_set_func(PIOA, 9, FUNC_B); /* TXD */
558}
559
560static inline void configure_usart1_pins(void)
561{
562 portmux_set_func(PIOA, 17, FUNC_A); /* RXD */
563 portmux_set_func(PIOA, 18, FUNC_A); /* TXD */
564}
565
566static inline void configure_usart2_pins(void)
567{
568 portmux_set_func(PIOB, 26, FUNC_B); /* RXD */
569 portmux_set_func(PIOB, 27, FUNC_B); /* TXD */
570}
571
572static inline void configure_usart3_pins(void)
573{
574 portmux_set_func(PIOB, 18, FUNC_B); /* RXD */
575 portmux_set_func(PIOB, 17, FUNC_B); /* TXD */
576}
577
578static struct platform_device *setup_usart(unsigned int id)
579{
580 struct platform_device *pdev;
581
582 switch (id) {
583 case 0:
584 pdev = &usart0_device;
585 configure_usart0_pins();
586 break;
587 case 1:
588 pdev = &usart1_device;
589 configure_usart1_pins();
590 break;
591 case 2:
592 pdev = &usart2_device;
593 configure_usart2_pins();
594 break;
595 case 3:
596 pdev = &usart3_device;
597 configure_usart3_pins();
598 break;
599 default:
600 pdev = NULL;
601 break;
602 }
603
604 return pdev;
605}
606
607struct platform_device *__init at32_add_device_usart(unsigned int id)
608{
609 struct platform_device *pdev;
610
611 pdev = setup_usart(id);
612 if (pdev)
613 platform_device_register(pdev);
614
615 return pdev;
616}
617
618struct platform_device *at91_default_console_device;
619
620void __init at32_setup_serial_console(unsigned int usart_id)
621{
622 at91_default_console_device = setup_usart(usart_id);
623}
624
625/* --------------------------------------------------------------------
626 * Ethernet
627 * -------------------------------------------------------------------- */
628
629static struct eth_platform_data macb0_data;
630static struct resource macb0_resource[] = {
631 PBMEM(0xfff01800),
632 IRQ(25),
633};
634DEFINE_DEV_DATA(macb, 0);
635DEV_CLK(hclk, macb0, hsb, 8);
636DEV_CLK(pclk, macb0, pbb, 6);
637
638struct platform_device *__init
639at32_add_device_eth(unsigned int id, struct eth_platform_data *data)
640{
641 struct platform_device *pdev;
642
643 switch (id) {
644 case 0:
645 pdev = &macb0_device;
646
647 portmux_set_func(PIOC, 3, FUNC_A); /* TXD0 */
648 portmux_set_func(PIOC, 4, FUNC_A); /* TXD1 */
649 portmux_set_func(PIOC, 7, FUNC_A); /* TXEN */
650 portmux_set_func(PIOC, 8, FUNC_A); /* TXCK */
651 portmux_set_func(PIOC, 9, FUNC_A); /* RXD0 */
652 portmux_set_func(PIOC, 10, FUNC_A); /* RXD1 */
653 portmux_set_func(PIOC, 13, FUNC_A); /* RXER */
654 portmux_set_func(PIOC, 15, FUNC_A); /* RXDV */
655 portmux_set_func(PIOC, 16, FUNC_A); /* MDC */
656 portmux_set_func(PIOC, 17, FUNC_A); /* MDIO */
657
658 if (!data->is_rmii) {
659 portmux_set_func(PIOC, 0, FUNC_A); /* COL */
660 portmux_set_func(PIOC, 1, FUNC_A); /* CRS */
661 portmux_set_func(PIOC, 2, FUNC_A); /* TXER */
662 portmux_set_func(PIOC, 5, FUNC_A); /* TXD2 */
663 portmux_set_func(PIOC, 6, FUNC_A); /* TXD3 */
664 portmux_set_func(PIOC, 11, FUNC_A); /* RXD2 */
665 portmux_set_func(PIOC, 12, FUNC_A); /* RXD3 */
666 portmux_set_func(PIOC, 14, FUNC_A); /* RXCK */
667 portmux_set_func(PIOC, 18, FUNC_A); /* SPD */
668 }
669 break;
670
671 default:
672 return NULL;
673 }
674
675 memcpy(pdev->dev.platform_data, data, sizeof(struct eth_platform_data));
676 platform_device_register(pdev);
677
678 return pdev;
679}
680
681/* --------------------------------------------------------------------
682 * SPI
683 * -------------------------------------------------------------------- */
684static struct resource spi0_resource[] = {
685 PBMEM(0xffe00000),
686 IRQ(3),
687};
688DEFINE_DEV(spi, 0);
689DEV_CLK(mck, spi0, pba, 0);
690
691struct platform_device *__init at32_add_device_spi(unsigned int id)
692{
693 struct platform_device *pdev;
694
695 switch (id) {
696 case 0:
697 pdev = &spi0_device;
698 portmux_set_func(PIOA, 0, FUNC_A); /* MISO */
699 portmux_set_func(PIOA, 1, FUNC_A); /* MOSI */
700 portmux_set_func(PIOA, 2, FUNC_A); /* SCK */
701 portmux_set_func(PIOA, 3, FUNC_A); /* NPCS0 */
702 portmux_set_func(PIOA, 4, FUNC_A); /* NPCS1 */
703 portmux_set_func(PIOA, 5, FUNC_A); /* NPCS2 */
704 break;
705
706 default:
707 return NULL;
708 }
709
710 platform_device_register(pdev);
711 return pdev;
712}
713
714/* --------------------------------------------------------------------
715 * LCDC
716 * -------------------------------------------------------------------- */
717static struct lcdc_platform_data lcdc0_data;
718static struct resource lcdc0_resource[] = {
719 {
720 .start = 0xff000000,
721 .end = 0xff000fff,
722 .flags = IORESOURCE_MEM,
723 },
724 IRQ(1),
725};
726DEFINE_DEV_DATA(lcdc, 0);
727DEV_CLK(hclk, lcdc0, hsb, 7);
728static struct clk lcdc0_pixclk = {
729 .name = "pixclk",
730 .dev = &lcdc0_device.dev,
731 .mode = genclk_mode,
732 .get_rate = genclk_get_rate,
733 .set_rate = genclk_set_rate,
734 .set_parent = genclk_set_parent,
735 .index = 7,
736};
737
738struct platform_device *__init
739at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data)
740{
741 struct platform_device *pdev;
742
743 switch (id) {
744 case 0:
745 pdev = &lcdc0_device;
746 portmux_set_func(PIOC, 19, FUNC_A); /* CC */
747 portmux_set_func(PIOC, 20, FUNC_A); /* HSYNC */
748 portmux_set_func(PIOC, 21, FUNC_A); /* PCLK */
749 portmux_set_func(PIOC, 22, FUNC_A); /* VSYNC */
750 portmux_set_func(PIOC, 23, FUNC_A); /* DVAL */
751 portmux_set_func(PIOC, 24, FUNC_A); /* MODE */
752 portmux_set_func(PIOC, 25, FUNC_A); /* PWR */
753 portmux_set_func(PIOC, 26, FUNC_A); /* DATA0 */
754 portmux_set_func(PIOC, 27, FUNC_A); /* DATA1 */
755 portmux_set_func(PIOC, 28, FUNC_A); /* DATA2 */
756 portmux_set_func(PIOC, 29, FUNC_A); /* DATA3 */
757 portmux_set_func(PIOC, 30, FUNC_A); /* DATA4 */
758 portmux_set_func(PIOC, 31, FUNC_A); /* DATA5 */
759 portmux_set_func(PIOD, 0, FUNC_A); /* DATA6 */
760 portmux_set_func(PIOD, 1, FUNC_A); /* DATA7 */
761 portmux_set_func(PIOD, 2, FUNC_A); /* DATA8 */
762 portmux_set_func(PIOD, 3, FUNC_A); /* DATA9 */
763 portmux_set_func(PIOD, 4, FUNC_A); /* DATA10 */
764 portmux_set_func(PIOD, 5, FUNC_A); /* DATA11 */
765 portmux_set_func(PIOD, 6, FUNC_A); /* DATA12 */
766 portmux_set_func(PIOD, 7, FUNC_A); /* DATA13 */
767 portmux_set_func(PIOD, 8, FUNC_A); /* DATA14 */
768 portmux_set_func(PIOD, 9, FUNC_A); /* DATA15 */
769 portmux_set_func(PIOD, 10, FUNC_A); /* DATA16 */
770 portmux_set_func(PIOD, 11, FUNC_A); /* DATA17 */
771 portmux_set_func(PIOD, 12, FUNC_A); /* DATA18 */
772 portmux_set_func(PIOD, 13, FUNC_A); /* DATA19 */
773 portmux_set_func(PIOD, 14, FUNC_A); /* DATA20 */
774 portmux_set_func(PIOD, 15, FUNC_A); /* DATA21 */
775 portmux_set_func(PIOD, 16, FUNC_A); /* DATA22 */
776 portmux_set_func(PIOD, 17, FUNC_A); /* DATA23 */
777
778 clk_set_parent(&lcdc0_pixclk, &pll0);
779 clk_set_rate(&lcdc0_pixclk, clk_get_rate(&pll0));
780 break;
781
782 default:
783 return NULL;
784 }
785
786 memcpy(pdev->dev.platform_data, data,
787 sizeof(struct lcdc_platform_data));
788
789 platform_device_register(pdev);
790 return pdev;
791}
792
793struct clk *at32_clock_list[] = {
794 &osc32k,
795 &osc0,
796 &osc1,
797 &pll0,
798 &pll1,
799 &cpu_clk,
800 &hsb_clk,
801 &pba_clk,
802 &pbb_clk,
803 &at32_sm_pclk,
804 &at32_intc0_pclk,
805 &ebi_clk,
806 &hramc_clk,
807 &smc0_pclk,
808 &smc0_mck,
809 &pdc_hclk,
810 &pdc_pclk,
811 &pico_clk,
812 &pio0_mck,
813 &pio1_mck,
814 &pio2_mck,
815 &pio3_mck,
816 &usart0_usart,
817 &usart1_usart,
818 &usart2_usart,
819 &usart3_usart,
820 &macb0_hclk,
821 &macb0_pclk,
822 &spi0_mck,
823 &lcdc0_hclk,
824 &lcdc0_pixclk,
825};
826unsigned int at32_nr_clocks = ARRAY_SIZE(at32_clock_list);
827
828void __init at32_portmux_init(void)
829{
830 at32_init_pio(&pio0_device);
831 at32_init_pio(&pio1_device);
832 at32_init_pio(&pio2_device);
833 at32_init_pio(&pio3_device);
834}
835
836void __init at32_clock_init(void)
837{
838 struct at32_sm *sm = &system_manager;
839 u32 cpu_mask = 0, hsb_mask = 0, pba_mask = 0, pbb_mask = 0;
840 int i;
841
842 if (sm_readl(sm, PM_MCCTRL) & SM_BIT(PLLSEL))
843 main_clock = &pll0;
844 else
845 main_clock = &osc0;
846
847 if (sm_readl(sm, PM_PLL0) & SM_BIT(PLLOSC))
848 pll0.parent = &osc1;
849 if (sm_readl(sm, PM_PLL1) & SM_BIT(PLLOSC))
850 pll1.parent = &osc1;
851
852 /*
853 * Turn on all clocks that have at least one user already, and
854 * turn off everything else. We only do this for module
855 * clocks, and even though it isn't particularly pretty to
856 * check the address of the mode function, it should do the
857 * trick...
858 */
859 for (i = 0; i < ARRAY_SIZE(at32_clock_list); i++) {
860 struct clk *clk = at32_clock_list[i];
861
862 if (clk->mode == &cpu_clk_mode)
863 cpu_mask |= 1 << clk->index;
864 else if (clk->mode == &hsb_clk_mode)
865 hsb_mask |= 1 << clk->index;
866 else if (clk->mode == &pba_clk_mode)
867 pba_mask |= 1 << clk->index;
868 else if (clk->mode == &pbb_clk_mode)
869 pbb_mask |= 1 << clk->index;
870 }
871
872 sm_writel(sm, PM_CPU_MASK, cpu_mask);
873 sm_writel(sm, PM_HSB_MASK, hsb_mask);
874 sm_writel(sm, PM_PBA_MASK, pba_mask);
875 sm_writel(sm, PM_PBB_MASK, pbb_mask);
876}
diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c
new file mode 100644
index 000000000000..3d0d1097389f
--- /dev/null
+++ b/arch/avr32/mach-at32ap/clock.c
@@ -0,0 +1,148 @@
1/*
2 * Clock management for AT32AP CPUs
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * Based on arch/arm/mach-at91rm9200/clock.c
7 * Copyright (C) 2005 David Brownell
8 * Copyright (C) 2005 Ivan Kokshaysky
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14#include <linux/clk.h>
15#include <linux/err.h>
16#include <linux/device.h>
17#include <linux/string.h>
18
19#include "clock.h"
20
21static spinlock_t clk_lock = SPIN_LOCK_UNLOCKED;
22
23struct clk *clk_get(struct device *dev, const char *id)
24{
25 int i;
26
27 for (i = 0; i < at32_nr_clocks; i++) {
28 struct clk *clk = at32_clock_list[i];
29
30 if (clk->dev == dev && strcmp(id, clk->name) == 0)
31 return clk;
32 }
33
34 return ERR_PTR(-ENOENT);
35}
36EXPORT_SYMBOL(clk_get);
37
38void clk_put(struct clk *clk)
39{
40 /* clocks are static for now, we can't free them */
41}
42EXPORT_SYMBOL(clk_put);
43
44static void __clk_enable(struct clk *clk)
45{
46 if (clk->parent)
47 __clk_enable(clk->parent);
48 if (clk->users++ == 0 && clk->mode)
49 clk->mode(clk, 1);
50}
51
52int clk_enable(struct clk *clk)
53{
54 unsigned long flags;
55
56 spin_lock_irqsave(&clk_lock, flags);
57 __clk_enable(clk);
58 spin_unlock_irqrestore(&clk_lock, flags);
59
60 return 0;
61}
62EXPORT_SYMBOL(clk_enable);
63
64static void __clk_disable(struct clk *clk)
65{
66 BUG_ON(clk->users == 0);
67
68 if (--clk->users == 0 && clk->mode)
69 clk->mode(clk, 0);
70 if (clk->parent)
71 __clk_disable(clk->parent);
72}
73
74void clk_disable(struct clk *clk)
75{
76 unsigned long flags;
77
78 spin_lock_irqsave(&clk_lock, flags);
79 __clk_disable(clk);
80 spin_unlock_irqrestore(&clk_lock, flags);
81}
82EXPORT_SYMBOL(clk_disable);
83
84unsigned long clk_get_rate(struct clk *clk)
85{
86 unsigned long flags;
87 unsigned long rate;
88
89 spin_lock_irqsave(&clk_lock, flags);
90 rate = clk->get_rate(clk);
91 spin_unlock_irqrestore(&clk_lock, flags);
92
93 return rate;
94}
95EXPORT_SYMBOL(clk_get_rate);
96
97long clk_round_rate(struct clk *clk, unsigned long rate)
98{
99 unsigned long flags, actual_rate;
100
101 if (!clk->set_rate)
102 return -ENOSYS;
103
104 spin_lock_irqsave(&clk_lock, flags);
105 actual_rate = clk->set_rate(clk, rate, 0);
106 spin_unlock_irqrestore(&clk_lock, flags);
107
108 return actual_rate;
109}
110EXPORT_SYMBOL(clk_round_rate);
111
112int clk_set_rate(struct clk *clk, unsigned long rate)
113{
114 unsigned long flags;
115 long ret;
116
117 if (!clk->set_rate)
118 return -ENOSYS;
119
120 spin_lock_irqsave(&clk_lock, flags);
121 ret = clk->set_rate(clk, rate, 1);
122 spin_unlock_irqrestore(&clk_lock, flags);
123
124 return (ret < 0) ? ret : 0;
125}
126EXPORT_SYMBOL(clk_set_rate);
127
128int clk_set_parent(struct clk *clk, struct clk *parent)
129{
130 unsigned long flags;
131 int ret;
132
133 if (!clk->set_parent)
134 return -ENOSYS;
135
136 spin_lock_irqsave(&clk_lock, flags);
137 ret = clk->set_parent(clk, parent);
138 spin_unlock_irqrestore(&clk_lock, flags);
139
140 return ret;
141}
142EXPORT_SYMBOL(clk_set_parent);
143
144struct clk *clk_get_parent(struct clk *clk)
145{
146 return clk->parent;
147}
148EXPORT_SYMBOL(clk_get_parent);
diff --git a/arch/avr32/mach-at32ap/clock.h b/arch/avr32/mach-at32ap/clock.h
new file mode 100644
index 000000000000..f953f044ba4d
--- /dev/null
+++ b/arch/avr32/mach-at32ap/clock.h
@@ -0,0 +1,30 @@
1/*
2 * Clock management for AT32AP CPUs
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * Based on arch/arm/mach-at91rm9200/clock.c
7 * Copyright (C) 2005 David Brownell
8 * Copyright (C) 2005 Ivan Kokshaysky
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14#include <linux/clk.h>
15
16struct clk {
17 const char *name; /* Clock name/function */
18 struct device *dev; /* Device the clock is used by */
19 struct clk *parent; /* Parent clock, if any */
20 void (*mode)(struct clk *clk, int enabled);
21 unsigned long (*get_rate)(struct clk *clk);
22 long (*set_rate)(struct clk *clk, unsigned long rate,
23 int apply);
24 int (*set_parent)(struct clk *clk, struct clk *parent);
25 u16 users; /* Enabled if non-zero */
26 u16 index; /* Sibling index */
27};
28
29extern struct clk *at32_clock_list[];
30extern unsigned int at32_nr_clocks;
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
new file mode 100644
index 000000000000..7da9c5f7a0eb
--- /dev/null
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -0,0 +1,171 @@
1/*
2 * External interrupt handling for AT32AP CPUs
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/errno.h>
12#include <linux/init.h>
13#include <linux/interrupt.h>
14#include <linux/irq.h>
15#include <linux/platform_device.h>
16#include <linux/random.h>
17
18#include <asm/io.h>
19
20#include <asm/arch/sm.h>
21
22#include "sm.h"
23
24static void eim_ack_irq(unsigned int irq)
25{
26 struct at32_sm *sm = get_irq_chip_data(irq);
27 sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq));
28}
29
30static void eim_mask_irq(unsigned int irq)
31{
32 struct at32_sm *sm = get_irq_chip_data(irq);
33 sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq));
34}
35
36static void eim_mask_ack_irq(unsigned int irq)
37{
38 struct at32_sm *sm = get_irq_chip_data(irq);
39 sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq));
40 sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq));
41}
42
43static void eim_unmask_irq(unsigned int irq)
44{
45 struct at32_sm *sm = get_irq_chip_data(irq);
46 sm_writel(sm, EIM_IER, 1 << (irq - sm->eim_first_irq));
47}
48
49static int eim_set_irq_type(unsigned int irq, unsigned int flow_type)
50{
51 struct at32_sm *sm = get_irq_chip_data(irq);
52 unsigned int i = irq - sm->eim_first_irq;
53 u32 mode, edge, level;
54 unsigned long flags;
55 int ret = 0;
56
57 flow_type &= IRQ_TYPE_SENSE_MASK;
58
59 spin_lock_irqsave(&sm->lock, flags);
60
61 mode = sm_readl(sm, EIM_MODE);
62 edge = sm_readl(sm, EIM_EDGE);
63 level = sm_readl(sm, EIM_LEVEL);
64
65 switch (flow_type) {
66 case IRQ_TYPE_LEVEL_LOW:
67 mode |= 1 << i;
68 level &= ~(1 << i);
69 break;
70 case IRQ_TYPE_LEVEL_HIGH:
71 mode |= 1 << i;
72 level |= 1 << i;
73 break;
74 case IRQ_TYPE_EDGE_RISING:
75 mode &= ~(1 << i);
76 edge |= 1 << i;
77 break;
78 case IRQ_TYPE_EDGE_FALLING:
79 mode &= ~(1 << i);
80 edge &= ~(1 << i);
81 break;
82 default:
83 ret = -EINVAL;
84 break;
85 }
86
87 sm_writel(sm, EIM_MODE, mode);
88 sm_writel(sm, EIM_EDGE, edge);
89 sm_writel(sm, EIM_LEVEL, level);
90
91 spin_unlock_irqrestore(&sm->lock, flags);
92
93 return ret;
94}
95
96struct irq_chip eim_chip = {
97 .name = "eim",
98 .ack = eim_ack_irq,
99 .mask = eim_mask_irq,
100 .mask_ack = eim_mask_ack_irq,
101 .unmask = eim_unmask_irq,
102 .set_type = eim_set_irq_type,
103};
104
105static void demux_eim_irq(unsigned int irq, struct irq_desc *desc,
106 struct pt_regs *regs)
107{
108 struct at32_sm *sm = desc->handler_data;
109 struct irq_desc *ext_desc;
110 unsigned long status, pending;
111 unsigned int i, ext_irq;
112
113 spin_lock(&sm->lock);
114
115 status = sm_readl(sm, EIM_ISR);
116 pending = status & sm_readl(sm, EIM_IMR);
117
118 while (pending) {
119 i = fls(pending) - 1;
120 pending &= ~(1 << i);
121
122 ext_irq = i + sm->eim_first_irq;
123 ext_desc = irq_desc + ext_irq;
124 ext_desc->handle_irq(ext_irq, ext_desc, regs);
125 }
126
127 spin_unlock(&sm->lock);
128}
129
130static int __init eim_init(void)
131{
132 struct at32_sm *sm = &system_manager;
133 unsigned int i;
134 unsigned int nr_irqs;
135 unsigned int int_irq;
136 u32 pattern;
137
138 /*
139 * The EIM is really the same module as SM, so register
140 * mapping, etc. has been taken care of already.
141 */
142
143 /*
144 * Find out how many interrupt lines that are actually
145 * implemented in hardware.
146 */
147 sm_writel(sm, EIM_IDR, ~0UL);
148 sm_writel(sm, EIM_MODE, ~0UL);
149 pattern = sm_readl(sm, EIM_MODE);
150 nr_irqs = fls(pattern);
151
152 sm->eim_chip = &eim_chip;
153
154 for (i = 0; i < nr_irqs; i++) {
155 set_irq_chip(sm->eim_first_irq + i, &eim_chip);
156 set_irq_chip_data(sm->eim_first_irq + i, sm);
157 }
158
159 int_irq = platform_get_irq_byname(sm->pdev, "eim");
160
161 set_irq_chained_handler(int_irq, demux_eim_irq);
162 set_irq_data(int_irq, sm);
163
164 printk("EIM: External Interrupt Module at 0x%p, IRQ %u\n",
165 sm->regs, int_irq);
166 printk("EIM: Handling %u external IRQs, starting with IRQ %u\n",
167 nr_irqs, sm->eim_first_irq);
168
169 return 0;
170}
171arch_initcall(eim_init);
diff --git a/arch/avr32/mach-at32ap/hsmc.c b/arch/avr32/mach-at32ap/hsmc.c
new file mode 100644
index 000000000000..7691721928a7
--- /dev/null
+++ b/arch/avr32/mach-at32ap/hsmc.c
@@ -0,0 +1,164 @@
1/*
2 * Static Memory Controller for AT32 chips
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#define DEBUG
11#include <linux/clk.h>
12#include <linux/err.h>
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/platform_device.h>
16
17#include <asm/io.h>
18#include <asm/arch/smc.h>
19
20#include "hsmc.h"
21
22#define NR_CHIP_SELECTS 6
23
24struct hsmc {
25 void __iomem *regs;
26 struct clk *pclk;
27 struct clk *mck;
28};
29
30static struct hsmc *hsmc;
31
32int smc_set_configuration(int cs, const struct smc_config *config)
33{
34 unsigned long mul;
35 unsigned long offset;
36 u32 setup, pulse, cycle, mode;
37
38 if (!hsmc)
39 return -ENODEV;
40 if (cs >= NR_CHIP_SELECTS)
41 return -EINVAL;
42
43 /*
44 * cycles = x / T = x * f
45 * = ((x * 1000000000) * ((f * 65536) / 1000000000)) / 65536
46 * = ((x * 1000000000) * (((f / 10000) * 65536) / 100000)) / 65536
47 */
48 mul = (clk_get_rate(hsmc->mck) / 10000) << 16;
49 mul /= 100000;
50
51#define ns2cyc(x) ((((x) * mul) + 65535) >> 16)
52
53 setup = (HSMC_BF(NWE_SETUP, ns2cyc(config->nwe_setup))
54 | HSMC_BF(NCS_WR_SETUP, ns2cyc(config->ncs_write_setup))
55 | HSMC_BF(NRD_SETUP, ns2cyc(config->nrd_setup))
56 | HSMC_BF(NCS_RD_SETUP, ns2cyc(config->ncs_read_setup)));
57 pulse = (HSMC_BF(NWE_PULSE, ns2cyc(config->nwe_pulse))
58 | HSMC_BF(NCS_WR_PULSE, ns2cyc(config->ncs_write_pulse))
59 | HSMC_BF(NRD_PULSE, ns2cyc(config->nrd_pulse))
60 | HSMC_BF(NCS_RD_PULSE, ns2cyc(config->ncs_read_pulse)));
61 cycle = (HSMC_BF(NWE_CYCLE, ns2cyc(config->write_cycle))
62 | HSMC_BF(NRD_CYCLE, ns2cyc(config->read_cycle)));
63
64 switch (config->bus_width) {
65 case 1:
66 mode = HSMC_BF(DBW, HSMC_DBW_8_BITS);
67 break;
68 case 2:
69 mode = HSMC_BF(DBW, HSMC_DBW_16_BITS);
70 break;
71 case 4:
72 mode = HSMC_BF(DBW, HSMC_DBW_32_BITS);
73 break;
74 default:
75 return -EINVAL;
76 }
77
78 if (config->nrd_controlled)
79 mode |= HSMC_BIT(READ_MODE);
80 if (config->nwe_controlled)
81 mode |= HSMC_BIT(WRITE_MODE);
82 if (config->byte_write)
83 mode |= HSMC_BIT(BAT);
84
85 pr_debug("smc cs%d: setup/%08x pulse/%08x cycle/%08x mode/%08x\n",
86 cs, setup, pulse, cycle, mode);
87
88 offset = cs * 0x10;
89 hsmc_writel(hsmc, SETUP0 + offset, setup);
90 hsmc_writel(hsmc, PULSE0 + offset, pulse);
91 hsmc_writel(hsmc, CYCLE0 + offset, cycle);
92 hsmc_writel(hsmc, MODE0 + offset, mode);
93 hsmc_readl(hsmc, MODE0); /* I/O barrier */
94
95 return 0;
96}
97EXPORT_SYMBOL(smc_set_configuration);
98
99static int hsmc_probe(struct platform_device *pdev)
100{
101 struct resource *regs;
102 struct clk *pclk, *mck;
103 int ret;
104
105 if (hsmc)
106 return -EBUSY;
107
108 regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
109 if (!regs)
110 return -ENXIO;
111 pclk = clk_get(&pdev->dev, "pclk");
112 if (IS_ERR(pclk))
113 return PTR_ERR(pclk);
114 mck = clk_get(&pdev->dev, "mck");
115 if (IS_ERR(mck)) {
116 ret = PTR_ERR(mck);
117 goto out_put_pclk;
118 }
119
120 ret = -ENOMEM;
121 hsmc = kzalloc(sizeof(struct hsmc), GFP_KERNEL);
122 if (!hsmc)
123 goto out_put_clocks;
124
125 clk_enable(pclk);
126 clk_enable(mck);
127
128 hsmc->pclk = pclk;
129 hsmc->mck = mck;
130 hsmc->regs = ioremap(regs->start, regs->end - regs->start + 1);
131 if (!hsmc->regs)
132 goto out_disable_clocks;
133
134 dev_info(&pdev->dev, "Atmel Static Memory Controller at 0x%08lx\n",
135 (unsigned long)regs->start);
136
137 platform_set_drvdata(pdev, hsmc);
138
139 return 0;
140
141out_disable_clocks:
142 clk_disable(mck);
143 clk_disable(pclk);
144 kfree(hsmc);
145out_put_clocks:
146 clk_put(mck);
147out_put_pclk:
148 clk_put(pclk);
149 hsmc = NULL;
150 return ret;
151}
152
153static struct platform_driver hsmc_driver = {
154 .probe = hsmc_probe,
155 .driver = {
156 .name = "smc",
157 },
158};
159
160static int __init hsmc_init(void)
161{
162 return platform_driver_register(&hsmc_driver);
163}
164arch_initcall(hsmc_init);
diff --git a/arch/avr32/mach-at32ap/hsmc.h b/arch/avr32/mach-at32ap/hsmc.h
new file mode 100644
index 000000000000..5681276fafdb
--- /dev/null
+++ b/arch/avr32/mach-at32ap/hsmc.h
@@ -0,0 +1,127 @@
1/*
2 * Register definitions for Atmel Static Memory Controller (SMC)
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ASM_AVR32_HSMC_H__
11#define __ASM_AVR32_HSMC_H__
12
13/* HSMC register offsets */
14#define HSMC_SETUP0 0x0000
15#define HSMC_PULSE0 0x0004
16#define HSMC_CYCLE0 0x0008
17#define HSMC_MODE0 0x000c
18#define HSMC_SETUP1 0x0010
19#define HSMC_PULSE1 0x0014
20#define HSMC_CYCLE1 0x0018
21#define HSMC_MODE1 0x001c
22#define HSMC_SETUP2 0x0020
23#define HSMC_PULSE2 0x0024
24#define HSMC_CYCLE2 0x0028
25#define HSMC_MODE2 0x002c
26#define HSMC_SETUP3 0x0030
27#define HSMC_PULSE3 0x0034
28#define HSMC_CYCLE3 0x0038
29#define HSMC_MODE3 0x003c
30#define HSMC_SETUP4 0x0040
31#define HSMC_PULSE4 0x0044
32#define HSMC_CYCLE4 0x0048
33#define HSMC_MODE4 0x004c
34#define HSMC_SETUP5 0x0050
35#define HSMC_PULSE5 0x0054
36#define HSMC_CYCLE5 0x0058
37#define HSMC_MODE5 0x005c
38
39/* Bitfields in SETUP0 */
40#define HSMC_NWE_SETUP_OFFSET 0
41#define HSMC_NWE_SETUP_SIZE 6
42#define HSMC_NCS_WR_SETUP_OFFSET 8
43#define HSMC_NCS_WR_SETUP_SIZE 6
44#define HSMC_NRD_SETUP_OFFSET 16
45#define HSMC_NRD_SETUP_SIZE 6
46#define HSMC_NCS_RD_SETUP_OFFSET 24
47#define HSMC_NCS_RD_SETUP_SIZE 6
48
49/* Bitfields in PULSE0 */
50#define HSMC_NWE_PULSE_OFFSET 0
51#define HSMC_NWE_PULSE_SIZE 7
52#define HSMC_NCS_WR_PULSE_OFFSET 8
53#define HSMC_NCS_WR_PULSE_SIZE 7
54#define HSMC_NRD_PULSE_OFFSET 16
55#define HSMC_NRD_PULSE_SIZE 7
56#define HSMC_NCS_RD_PULSE_OFFSET 24
57#define HSMC_NCS_RD_PULSE_SIZE 7
58
59/* Bitfields in CYCLE0 */
60#define HSMC_NWE_CYCLE_OFFSET 0
61#define HSMC_NWE_CYCLE_SIZE 9
62#define HSMC_NRD_CYCLE_OFFSET 16
63#define HSMC_NRD_CYCLE_SIZE 9
64
65/* Bitfields in MODE0 */
66#define HSMC_READ_MODE_OFFSET 0
67#define HSMC_READ_MODE_SIZE 1
68#define HSMC_WRITE_MODE_OFFSET 1
69#define HSMC_WRITE_MODE_SIZE 1
70#define HSMC_EXNW_MODE_OFFSET 4
71#define HSMC_EXNW_MODE_SIZE 2
72#define HSMC_BAT_OFFSET 8
73#define HSMC_BAT_SIZE 1
74#define HSMC_DBW_OFFSET 12
75#define HSMC_DBW_SIZE 2
76#define HSMC_TDF_CYCLES_OFFSET 16
77#define HSMC_TDF_CYCLES_SIZE 4
78#define HSMC_TDF_MODE_OFFSET 20
79#define HSMC_TDF_MODE_SIZE 1
80#define HSMC_PMEN_OFFSET 24
81#define HSMC_PMEN_SIZE 1
82#define HSMC_PS_OFFSET 28
83#define HSMC_PS_SIZE 2
84
85/* Constants for READ_MODE */
86#define HSMC_READ_MODE_NCS_CONTROLLED 0
87#define HSMC_READ_MODE_NRD_CONTROLLED 1
88
89/* Constants for WRITE_MODE */
90#define HSMC_WRITE_MODE_NCS_CONTROLLED 0
91#define HSMC_WRITE_MODE_NWE_CONTROLLED 1
92
93/* Constants for EXNW_MODE */
94#define HSMC_EXNW_MODE_DISABLED 0
95#define HSMC_EXNW_MODE_RESERVED 1
96#define HSMC_EXNW_MODE_FROZEN 2
97#define HSMC_EXNW_MODE_READY 3
98
99/* Constants for BAT */
100#define HSMC_BAT_BYTE_SELECT 0
101#define HSMC_BAT_BYTE_WRITE 1
102
103/* Constants for DBW */
104#define HSMC_DBW_8_BITS 0
105#define HSMC_DBW_16_BITS 1
106#define HSMC_DBW_32_BITS 2
107
108/* Bit manipulation macros */
109#define HSMC_BIT(name) \
110 (1 << HSMC_##name##_OFFSET)
111#define HSMC_BF(name,value) \
112 (((value) & ((1 << HSMC_##name##_SIZE) - 1)) \
113 << HSMC_##name##_OFFSET)
114#define HSMC_BFEXT(name,value) \
115 (((value) >> HSMC_##name##_OFFSET) \
116 & ((1 << HSMC_##name##_SIZE) - 1))
117#define HSMC_BFINS(name,value,old) \
118 (((old) & ~(((1 << HSMC_##name##_SIZE) - 1) \
119 << HSMC_##name##_OFFSET)) | HSMC_BF(name,value))
120
121/* Register access macros */
122#define hsmc_readl(port,reg) \
123 readl((port)->regs + HSMC_##reg)
124#define hsmc_writel(port,reg,value) \
125 writel((value), (port)->regs + HSMC_##reg)
126
127#endif /* __ASM_AVR32_HSMC_H__ */
diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
new file mode 100644
index 000000000000..74f8c9f2f03d
--- /dev/null
+++ b/arch/avr32/mach-at32ap/intc.c
@@ -0,0 +1,133 @@
1/*
2 * Copyright (C) 2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/clk.h>
10#include <linux/err.h>
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/irq.h>
14#include <linux/platform_device.h>
15
16#include <asm/io.h>
17
18#include "intc.h"
19
20struct intc {
21 void __iomem *regs;
22 struct irq_chip chip;
23};
24
25extern struct platform_device at32_intc0_device;
26
27/*
28 * TODO: We may be able to implement mask/unmask by setting IxM flags
29 * in the status register.
30 */
31static void intc_mask_irq(unsigned int irq)
32{
33
34}
35
36static void intc_unmask_irq(unsigned int irq)
37{
38
39}
40
41static struct intc intc0 = {
42 .chip = {
43 .name = "intc",
44 .mask = intc_mask_irq,
45 .unmask = intc_unmask_irq,
46 },
47};
48
49/*
50 * All interrupts go via intc at some point.
51 */
52asmlinkage void do_IRQ(int level, struct pt_regs *regs)
53{
54 struct irq_desc *desc;
55 unsigned int irq;
56 unsigned long status_reg;
57
58 local_irq_disable();
59
60 irq_enter();
61
62 irq = intc_readl(&intc0, INTCAUSE0 - 4 * level);
63 desc = irq_desc + irq;
64 desc->handle_irq(irq, desc, regs);
65
66 /*
67 * Clear all interrupt level masks so that we may handle
68 * interrupts during softirq processing. If this is a nested
69 * interrupt, interrupts must stay globally disabled until we
70 * return.
71 */
72 status_reg = sysreg_read(SR);
73 status_reg &= ~(SYSREG_BIT(I0M) | SYSREG_BIT(I1M)
74 | SYSREG_BIT(I2M) | SYSREG_BIT(I3M));
75 sysreg_write(SR, status_reg);
76
77 irq_exit();
78}
79
80void __init init_IRQ(void)
81{
82 extern void _evba(void);
83 extern void irq_level0(void);
84 struct resource *regs;
85 struct clk *pclk;
86 unsigned int i;
87 u32 offset, readback;
88
89 regs = platform_get_resource(&at32_intc0_device, IORESOURCE_MEM, 0);
90 if (!regs) {
91 printk(KERN_EMERG "intc: no mmio resource defined\n");
92 goto fail;
93 }
94 pclk = clk_get(&at32_intc0_device.dev, "pclk");
95 if (IS_ERR(pclk)) {
96 printk(KERN_EMERG "intc: no clock defined\n");
97 goto fail;
98 }
99
100 clk_enable(pclk);
101
102 intc0.regs = ioremap(regs->start, regs->end - regs->start + 1);
103 if (!intc0.regs) {
104 printk(KERN_EMERG "intc: failed to map registers (0x%08lx)\n",
105 (unsigned long)regs->start);
106 goto fail;
107 }
108
109 /*
110 * Initialize all interrupts to level 0 (lowest priority). The
111 * priority level may be changed by calling
112 * irq_set_priority().
113 *
114 */
115 offset = (unsigned long)&irq_level0 - (unsigned long)&_evba;
116 for (i = 0; i < NR_INTERNAL_IRQS; i++) {
117 intc_writel(&intc0, INTPR0 + 4 * i, offset);
118 readback = intc_readl(&intc0, INTPR0 + 4 * i);
119 if (readback == offset)
120 set_irq_chip_and_handler(i, &intc0.chip,
121 handle_simple_irq);
122 }
123
124 /* Unmask all interrupt levels */
125 sysreg_write(SR, (sysreg_read(SR)
126 & ~(SR_I3M | SR_I2M | SR_I1M | SR_I0M)));
127
128 return;
129
130fail:
131 panic("Interrupt controller initialization failed!\n");
132}
133
diff --git a/arch/avr32/mach-at32ap/intc.h b/arch/avr32/mach-at32ap/intc.h
new file mode 100644
index 000000000000..d289ca2fff13
--- /dev/null
+++ b/arch/avr32/mach-at32ap/intc.h
@@ -0,0 +1,327 @@
1/*
2 * Automatically generated by gen-header.xsl
3 */
4#ifndef __ASM_AVR32_PERIHP_INTC_H__
5#define __ASM_AVR32_PERIHP_INTC_H__
6
7#define INTC_NUM_INT_GRPS 33
8
9#define INTC_INTPR0 0x0
10# define INTC_INTPR0_INTLEV_OFFSET 30
11# define INTC_INTPR0_INTLEV_SIZE 2
12# define INTC_INTPR0_OFFSET_OFFSET 0
13# define INTC_INTPR0_OFFSET_SIZE 24
14#define INTC_INTREQ0 0x100
15# define INTC_INTREQ0_IREQUEST0_OFFSET 0
16# define INTC_INTREQ0_IREQUEST0_SIZE 1
17# define INTC_INTREQ0_IREQUEST1_OFFSET 1
18# define INTC_INTREQ0_IREQUEST1_SIZE 1
19#define INTC_INTPR1 0x4
20# define INTC_INTPR1_INTLEV_OFFSET 30
21# define INTC_INTPR1_INTLEV_SIZE 2
22# define INTC_INTPR1_OFFSET_OFFSET 0
23# define INTC_INTPR1_OFFSET_SIZE 24
24#define INTC_INTREQ1 0x104
25# define INTC_INTREQ1_IREQUEST32_OFFSET 0
26# define INTC_INTREQ1_IREQUEST32_SIZE 1
27# define INTC_INTREQ1_IREQUEST33_OFFSET 1
28# define INTC_INTREQ1_IREQUEST33_SIZE 1
29# define INTC_INTREQ1_IREQUEST34_OFFSET 2
30# define INTC_INTREQ1_IREQUEST34_SIZE 1
31# define INTC_INTREQ1_IREQUEST35_OFFSET 3
32# define INTC_INTREQ1_IREQUEST35_SIZE 1
33# define INTC_INTREQ1_IREQUEST36_OFFSET 4
34# define INTC_INTREQ1_IREQUEST36_SIZE 1
35# define INTC_INTREQ1_IREQUEST37_OFFSET 5
36# define INTC_INTREQ1_IREQUEST37_SIZE 1
37#define INTC_INTPR2 0x8
38# define INTC_INTPR2_INTLEV_OFFSET 30
39# define INTC_INTPR2_INTLEV_SIZE 2
40# define INTC_INTPR2_OFFSET_OFFSET 0
41# define INTC_INTPR2_OFFSET_SIZE 24
42#define INTC_INTREQ2 0x108
43# define INTC_INTREQ2_IREQUEST64_OFFSET 0
44# define INTC_INTREQ2_IREQUEST64_SIZE 1
45# define INTC_INTREQ2_IREQUEST65_OFFSET 1
46# define INTC_INTREQ2_IREQUEST65_SIZE 1
47# define INTC_INTREQ2_IREQUEST66_OFFSET 2
48# define INTC_INTREQ2_IREQUEST66_SIZE 1
49# define INTC_INTREQ2_IREQUEST67_OFFSET 3
50# define INTC_INTREQ2_IREQUEST67_SIZE 1
51# define INTC_INTREQ2_IREQUEST68_OFFSET 4
52# define INTC_INTREQ2_IREQUEST68_SIZE 1
53#define INTC_INTPR3 0xc
54# define INTC_INTPR3_INTLEV_OFFSET 30
55# define INTC_INTPR3_INTLEV_SIZE 2
56# define INTC_INTPR3_OFFSET_OFFSET 0
57# define INTC_INTPR3_OFFSET_SIZE 24
58#define INTC_INTREQ3 0x10c
59# define INTC_INTREQ3_IREQUEST96_OFFSET 0
60# define INTC_INTREQ3_IREQUEST96_SIZE 1
61#define INTC_INTPR4 0x10
62# define INTC_INTPR4_INTLEV_OFFSET 30
63# define INTC_INTPR4_INTLEV_SIZE 2
64# define INTC_INTPR4_OFFSET_OFFSET 0
65# define INTC_INTPR4_OFFSET_SIZE 24
66#define INTC_INTREQ4 0x110
67# define INTC_INTREQ4_IREQUEST128_OFFSET 0
68# define INTC_INTREQ4_IREQUEST128_SIZE 1
69#define INTC_INTPR5 0x14
70# define INTC_INTPR5_INTLEV_OFFSET 30
71# define INTC_INTPR5_INTLEV_SIZE 2
72# define INTC_INTPR5_OFFSET_OFFSET 0
73# define INTC_INTPR5_OFFSET_SIZE 24
74#define INTC_INTREQ5 0x114
75# define INTC_INTREQ5_IREQUEST160_OFFSET 0
76# define INTC_INTREQ5_IREQUEST160_SIZE 1
77#define INTC_INTPR6 0x18
78# define INTC_INTPR6_INTLEV_OFFSET 30
79# define INTC_INTPR6_INTLEV_SIZE 2
80# define INTC_INTPR6_OFFSET_OFFSET 0
81# define INTC_INTPR6_OFFSET_SIZE 24
82#define INTC_INTREQ6 0x118
83# define INTC_INTREQ6_IREQUEST192_OFFSET 0
84# define INTC_INTREQ6_IREQUEST192_SIZE 1
85#define INTC_INTPR7 0x1c
86# define INTC_INTPR7_INTLEV_OFFSET 30
87# define INTC_INTPR7_INTLEV_SIZE 2
88# define INTC_INTPR7_OFFSET_OFFSET 0
89# define INTC_INTPR7_OFFSET_SIZE 24
90#define INTC_INTREQ7 0x11c
91# define INTC_INTREQ7_IREQUEST224_OFFSET 0
92# define INTC_INTREQ7_IREQUEST224_SIZE 1
93#define INTC_INTPR8 0x20
94# define INTC_INTPR8_INTLEV_OFFSET 30
95# define INTC_INTPR8_INTLEV_SIZE 2
96# define INTC_INTPR8_OFFSET_OFFSET 0
97# define INTC_INTPR8_OFFSET_SIZE 24
98#define INTC_INTREQ8 0x120
99# define INTC_INTREQ8_IREQUEST256_OFFSET 0
100# define INTC_INTREQ8_IREQUEST256_SIZE 1
101#define INTC_INTPR9 0x24
102# define INTC_INTPR9_INTLEV_OFFSET 30
103# define INTC_INTPR9_INTLEV_SIZE 2
104# define INTC_INTPR9_OFFSET_OFFSET 0
105# define INTC_INTPR9_OFFSET_SIZE 24
106#define INTC_INTREQ9 0x124
107# define INTC_INTREQ9_IREQUEST288_OFFSET 0
108# define INTC_INTREQ9_IREQUEST288_SIZE 1
109#define INTC_INTPR10 0x28
110# define INTC_INTPR10_INTLEV_OFFSET 30
111# define INTC_INTPR10_INTLEV_SIZE 2
112# define INTC_INTPR10_OFFSET_OFFSET 0
113# define INTC_INTPR10_OFFSET_SIZE 24
114#define INTC_INTREQ10 0x128
115# define INTC_INTREQ10_IREQUEST320_OFFSET 0
116# define INTC_INTREQ10_IREQUEST320_SIZE 1
117#define INTC_INTPR11 0x2c
118# define INTC_INTPR11_INTLEV_OFFSET 30
119# define INTC_INTPR11_INTLEV_SIZE 2
120# define INTC_INTPR11_OFFSET_OFFSET 0
121# define INTC_INTPR11_OFFSET_SIZE 24
122#define INTC_INTREQ11 0x12c
123# define INTC_INTREQ11_IREQUEST352_OFFSET 0
124# define INTC_INTREQ11_IREQUEST352_SIZE 1
125#define INTC_INTPR12 0x30
126# define INTC_INTPR12_INTLEV_OFFSET 30
127# define INTC_INTPR12_INTLEV_SIZE 2
128# define INTC_INTPR12_OFFSET_OFFSET 0
129# define INTC_INTPR12_OFFSET_SIZE 24
130#define INTC_INTREQ12 0x130
131# define INTC_INTREQ12_IREQUEST384_OFFSET 0
132# define INTC_INTREQ12_IREQUEST384_SIZE 1
133#define INTC_INTPR13 0x34
134# define INTC_INTPR13_INTLEV_OFFSET 30
135# define INTC_INTPR13_INTLEV_SIZE 2
136# define INTC_INTPR13_OFFSET_OFFSET 0
137# define INTC_INTPR13_OFFSET_SIZE 24
138#define INTC_INTREQ13 0x134
139# define INTC_INTREQ13_IREQUEST416_OFFSET 0
140# define INTC_INTREQ13_IREQUEST416_SIZE 1
141#define INTC_INTPR14 0x38
142# define INTC_INTPR14_INTLEV_OFFSET 30
143# define INTC_INTPR14_INTLEV_SIZE 2
144# define INTC_INTPR14_OFFSET_OFFSET 0
145# define INTC_INTPR14_OFFSET_SIZE 24
146#define INTC_INTREQ14 0x138
147# define INTC_INTREQ14_IREQUEST448_OFFSET 0
148# define INTC_INTREQ14_IREQUEST448_SIZE 1
149#define INTC_INTPR15 0x3c
150# define INTC_INTPR15_INTLEV_OFFSET 30
151# define INTC_INTPR15_INTLEV_SIZE 2
152# define INTC_INTPR15_OFFSET_OFFSET 0
153# define INTC_INTPR15_OFFSET_SIZE 24
154#define INTC_INTREQ15 0x13c
155# define INTC_INTREQ15_IREQUEST480_OFFSET 0
156# define INTC_INTREQ15_IREQUEST480_SIZE 1
157#define INTC_INTPR16 0x40
158# define INTC_INTPR16_INTLEV_OFFSET 30
159# define INTC_INTPR16_INTLEV_SIZE 2
160# define INTC_INTPR16_OFFSET_OFFSET 0
161# define INTC_INTPR16_OFFSET_SIZE 24
162#define INTC_INTREQ16 0x140
163# define INTC_INTREQ16_IREQUEST512_OFFSET 0
164# define INTC_INTREQ16_IREQUEST512_SIZE 1
165#define INTC_INTPR17 0x44
166# define INTC_INTPR17_INTLEV_OFFSET 30
167# define INTC_INTPR17_INTLEV_SIZE 2
168# define INTC_INTPR17_OFFSET_OFFSET 0
169# define INTC_INTPR17_OFFSET_SIZE 24
170#define INTC_INTREQ17 0x144
171# define INTC_INTREQ17_IREQUEST544_OFFSET 0
172# define INTC_INTREQ17_IREQUEST544_SIZE 1
173#define INTC_INTPR18 0x48
174# define INTC_INTPR18_INTLEV_OFFSET 30
175# define INTC_INTPR18_INTLEV_SIZE 2
176# define INTC_INTPR18_OFFSET_OFFSET 0
177# define INTC_INTPR18_OFFSET_SIZE 24
178#define INTC_INTREQ18 0x148
179# define INTC_INTREQ18_IREQUEST576_OFFSET 0
180# define INTC_INTREQ18_IREQUEST576_SIZE 1
181#define INTC_INTPR19 0x4c
182# define INTC_INTPR19_INTLEV_OFFSET 30
183# define INTC_INTPR19_INTLEV_SIZE 2
184# define INTC_INTPR19_OFFSET_OFFSET 0
185# define INTC_INTPR19_OFFSET_SIZE 24
186#define INTC_INTREQ19 0x14c
187# define INTC_INTREQ19_IREQUEST608_OFFSET 0
188# define INTC_INTREQ19_IREQUEST608_SIZE 1
189# define INTC_INTREQ19_IREQUEST609_OFFSET 1
190# define INTC_INTREQ19_IREQUEST609_SIZE 1
191# define INTC_INTREQ19_IREQUEST610_OFFSET 2
192# define INTC_INTREQ19_IREQUEST610_SIZE 1
193# define INTC_INTREQ19_IREQUEST611_OFFSET 3
194# define INTC_INTREQ19_IREQUEST611_SIZE 1
195#define INTC_INTPR20 0x50
196# define INTC_INTPR20_INTLEV_OFFSET 30
197# define INTC_INTPR20_INTLEV_SIZE 2
198# define INTC_INTPR20_OFFSET_OFFSET 0
199# define INTC_INTPR20_OFFSET_SIZE 24
200#define INTC_INTREQ20 0x150
201# define INTC_INTREQ20_IREQUEST640_OFFSET 0
202# define INTC_INTREQ20_IREQUEST640_SIZE 1
203#define INTC_INTPR21 0x54
204# define INTC_INTPR21_INTLEV_OFFSET 30
205# define INTC_INTPR21_INTLEV_SIZE 2
206# define INTC_INTPR21_OFFSET_OFFSET 0
207# define INTC_INTPR21_OFFSET_SIZE 24
208#define INTC_INTREQ21 0x154
209# define INTC_INTREQ21_IREQUEST672_OFFSET 0
210# define INTC_INTREQ21_IREQUEST672_SIZE 1
211#define INTC_INTPR22 0x58
212# define INTC_INTPR22_INTLEV_OFFSET 30
213# define INTC_INTPR22_INTLEV_SIZE 2
214# define INTC_INTPR22_OFFSET_OFFSET 0
215# define INTC_INTPR22_OFFSET_SIZE 24
216#define INTC_INTREQ22 0x158
217# define INTC_INTREQ22_IREQUEST704_OFFSET 0
218# define INTC_INTREQ22_IREQUEST704_SIZE 1
219# define INTC_INTREQ22_IREQUEST705_OFFSET 1
220# define INTC_INTREQ22_IREQUEST705_SIZE 1
221# define INTC_INTREQ22_IREQUEST706_OFFSET 2
222# define INTC_INTREQ22_IREQUEST706_SIZE 1
223#define INTC_INTPR23 0x5c
224# define INTC_INTPR23_INTLEV_OFFSET 30
225# define INTC_INTPR23_INTLEV_SIZE 2
226# define INTC_INTPR23_OFFSET_OFFSET 0
227# define INTC_INTPR23_OFFSET_SIZE 24
228#define INTC_INTREQ23 0x15c
229# define INTC_INTREQ23_IREQUEST736_OFFSET 0
230# define INTC_INTREQ23_IREQUEST736_SIZE 1
231# define INTC_INTREQ23_IREQUEST737_OFFSET 1
232# define INTC_INTREQ23_IREQUEST737_SIZE 1
233# define INTC_INTREQ23_IREQUEST738_OFFSET 2
234# define INTC_INTREQ23_IREQUEST738_SIZE 1
235#define INTC_INTPR24 0x60
236# define INTC_INTPR24_INTLEV_OFFSET 30
237# define INTC_INTPR24_INTLEV_SIZE 2
238# define INTC_INTPR24_OFFSET_OFFSET 0
239# define INTC_INTPR24_OFFSET_SIZE 24
240#define INTC_INTREQ24 0x160
241# define INTC_INTREQ24_IREQUEST768_OFFSET 0
242# define INTC_INTREQ24_IREQUEST768_SIZE 1
243#define INTC_INTPR25 0x64
244# define INTC_INTPR25_INTLEV_OFFSET 30
245# define INTC_INTPR25_INTLEV_SIZE 2
246# define INTC_INTPR25_OFFSET_OFFSET 0
247# define INTC_INTPR25_OFFSET_SIZE 24
248#define INTC_INTREQ25 0x164
249# define INTC_INTREQ25_IREQUEST800_OFFSET 0
250# define INTC_INTREQ25_IREQUEST800_SIZE 1
251#define INTC_INTPR26 0x68
252# define INTC_INTPR26_INTLEV_OFFSET 30
253# define INTC_INTPR26_INTLEV_SIZE 2
254# define INTC_INTPR26_OFFSET_OFFSET 0
255# define INTC_INTPR26_OFFSET_SIZE 24
256#define INTC_INTREQ26 0x168
257# define INTC_INTREQ26_IREQUEST832_OFFSET 0
258# define INTC_INTREQ26_IREQUEST832_SIZE 1
259#define INTC_INTPR27 0x6c
260# define INTC_INTPR27_INTLEV_OFFSET 30
261# define INTC_INTPR27_INTLEV_SIZE 2
262# define INTC_INTPR27_OFFSET_OFFSET 0
263# define INTC_INTPR27_OFFSET_SIZE 24
264#define INTC_INTREQ27 0x16c
265# define INTC_INTREQ27_IREQUEST864_OFFSET 0
266# define INTC_INTREQ27_IREQUEST864_SIZE 1
267#define INTC_INTPR28 0x70
268# define INTC_INTPR28_INTLEV_OFFSET 30
269# define INTC_INTPR28_INTLEV_SIZE 2
270# define INTC_INTPR28_OFFSET_OFFSET 0
271# define INTC_INTPR28_OFFSET_SIZE 24
272#define INTC_INTREQ28 0x170
273# define INTC_INTREQ28_IREQUEST896_OFFSET 0
274# define INTC_INTREQ28_IREQUEST896_SIZE 1
275#define INTC_INTPR29 0x74
276# define INTC_INTPR29_INTLEV_OFFSET 30
277# define INTC_INTPR29_INTLEV_SIZE 2
278# define INTC_INTPR29_OFFSET_OFFSET 0
279# define INTC_INTPR29_OFFSET_SIZE 24
280#define INTC_INTREQ29 0x174
281# define INTC_INTREQ29_IREQUEST928_OFFSET 0
282# define INTC_INTREQ29_IREQUEST928_SIZE 1
283#define INTC_INTPR30 0x78
284# define INTC_INTPR30_INTLEV_OFFSET 30
285# define INTC_INTPR30_INTLEV_SIZE 2
286# define INTC_INTPR30_OFFSET_OFFSET 0
287# define INTC_INTPR30_OFFSET_SIZE 24
288#define INTC_INTREQ30 0x178
289# define INTC_INTREQ30_IREQUEST960_OFFSET 0
290# define INTC_INTREQ30_IREQUEST960_SIZE 1
291#define INTC_INTPR31 0x7c
292# define INTC_INTPR31_INTLEV_OFFSET 30
293# define INTC_INTPR31_INTLEV_SIZE 2
294# define INTC_INTPR31_OFFSET_OFFSET 0
295# define INTC_INTPR31_OFFSET_SIZE 24
296#define INTC_INTREQ31 0x17c
297# define INTC_INTREQ31_IREQUEST992_OFFSET 0
298# define INTC_INTREQ31_IREQUEST992_SIZE 1
299#define INTC_INTPR32 0x80
300# define INTC_INTPR32_INTLEV_OFFSET 30
301# define INTC_INTPR32_INTLEV_SIZE 2
302# define INTC_INTPR32_OFFSET_OFFSET 0
303# define INTC_INTPR32_OFFSET_SIZE 24
304#define INTC_INTREQ32 0x180
305# define INTC_INTREQ32_IREQUEST1024_OFFSET 0
306# define INTC_INTREQ32_IREQUEST1024_SIZE 1
307#define INTC_INTCAUSE0 0x20c
308# define INTC_INTCAUSE0_CAUSEGRP_OFFSET 0
309# define INTC_INTCAUSE0_CAUSEGRP_SIZE 6
310#define INTC_INTCAUSE1 0x208
311# define INTC_INTCAUSE1_CAUSEGRP_OFFSET 0
312# define INTC_INTCAUSE1_CAUSEGRP_SIZE 6
313#define INTC_INTCAUSE2 0x204
314# define INTC_INTCAUSE2_CAUSEGRP_OFFSET 0
315# define INTC_INTCAUSE2_CAUSEGRP_SIZE 6
316#define INTC_INTCAUSE3 0x200
317# define INTC_INTCAUSE3_CAUSEGRP_OFFSET 0
318# define INTC_INTCAUSE3_CAUSEGRP_SIZE 6
319
320#define INTC_BIT(name) (1 << INTC_##name##_OFFSET)
321#define INTC_MKBF(name, value) (((value) & ((1 << INTC_##name##_SIZE) - 1)) << INTC_##name##_OFFSET)
322#define INTC_GETBF(name, value) (((value) >> INTC_##name##_OFFSET) & ((1 << INTC_##name##_SIZE) - 1))
323
324#define intc_readl(port,reg) readl((port)->regs + INTC_##reg)
325#define intc_writel(port,reg,value) writel((value), (port)->regs + INTC_##reg)
326
327#endif /* __ASM_AVR32_PERIHP_INTC_H__ */
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
new file mode 100644
index 000000000000..d3aabfca8598
--- /dev/null
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -0,0 +1,118 @@
1/*
2 * Atmel PIO2 Port Multiplexer support
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/clk.h>
12#include <linux/debugfs.h>
13#include <linux/fs.h>
14#include <linux/platform_device.h>
15
16#include <asm/io.h>
17
18#include <asm/arch/portmux.h>
19
20#include "pio.h"
21
22#define MAX_NR_PIO_DEVICES 8
23
24struct pio_device {
25 void __iomem *regs;
26 const struct platform_device *pdev;
27 struct clk *clk;
28 u32 alloc_mask;
29 char name[32];
30};
31
32static struct pio_device pio_dev[MAX_NR_PIO_DEVICES];
33
34void portmux_set_func(unsigned int portmux_id, unsigned int pin_id,
35 unsigned int function_id)
36{
37 struct pio_device *pio;
38 u32 mask = 1 << pin_id;
39
40 BUG_ON(portmux_id >= MAX_NR_PIO_DEVICES);
41
42 pio = &pio_dev[portmux_id];
43
44 if (function_id)
45 pio_writel(pio, BSR, mask);
46 else
47 pio_writel(pio, ASR, mask);
48 pio_writel(pio, PDR, mask);
49}
50
51static int __init pio_probe(struct platform_device *pdev)
52{
53 struct pio_device *pio = NULL;
54
55 BUG_ON(pdev->id >= MAX_NR_PIO_DEVICES);
56 pio = &pio_dev[pdev->id];
57 BUG_ON(!pio->regs);
58
59 /* TODO: Interrupts */
60
61 platform_set_drvdata(pdev, pio);
62
63 printk(KERN_INFO "%s: Atmel Port Multiplexer at 0x%p (irq %d)\n",
64 pio->name, pio->regs, platform_get_irq(pdev, 0));
65
66 return 0;
67}
68
69static struct platform_driver pio_driver = {
70 .probe = pio_probe,
71 .driver = {
72 .name = "pio",
73 },
74};
75
76static int __init pio_init(void)
77{
78 return platform_driver_register(&pio_driver);
79}
80subsys_initcall(pio_init);
81
82void __init at32_init_pio(struct platform_device *pdev)
83{
84 struct resource *regs;
85 struct pio_device *pio;
86
87 if (pdev->id > MAX_NR_PIO_DEVICES) {
88 dev_err(&pdev->dev, "only %d PIO devices supported\n",
89 MAX_NR_PIO_DEVICES);
90 return;
91 }
92
93 pio = &pio_dev[pdev->id];
94 snprintf(pio->name, sizeof(pio->name), "pio%d", pdev->id);
95
96 regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
97 if (!regs) {
98 dev_err(&pdev->dev, "no mmio resource defined\n");
99 return;
100 }
101
102 pio->clk = clk_get(&pdev->dev, "mck");
103 if (IS_ERR(pio->clk))
104 /*
105 * This is a fatal error, but if we continue we might
106 * be so lucky that we manage to initialize the
107 * console and display this message...
108 */
109 dev_err(&pdev->dev, "no mck clock defined\n");
110 else
111 clk_enable(pio->clk);
112
113 pio->pdev = pdev;
114 pio->regs = ioremap(regs->start, regs->end - regs->start + 1);
115
116 pio_writel(pio, ODR, ~0UL);
117 pio_writel(pio, PER, ~0UL);
118}
diff --git a/arch/avr32/mach-at32ap/pio.h b/arch/avr32/mach-at32ap/pio.h
new file mode 100644
index 000000000000..cfea12351599
--- /dev/null
+++ b/arch/avr32/mach-at32ap/pio.h
@@ -0,0 +1,178 @@
1/*
2 * Atmel PIO2 Port Multiplexer support
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ARCH_AVR32_AT32AP_PIO_H__
11#define __ARCH_AVR32_AT32AP_PIO_H__
12
13/* PIO register offsets */
14#define PIO_PER 0x0000
15#define PIO_PDR 0x0004
16#define PIO_PSR 0x0008
17#define PIO_OER 0x0010
18#define PIO_ODR 0x0014
19#define PIO_OSR 0x0018
20#define PIO_IFER 0x0020
21#define PIO_IFDR 0x0024
22#define PIO_ISFR 0x0028
23#define PIO_SODR 0x0030
24#define PIO_CODR 0x0034
25#define PIO_ODSR 0x0038
26#define PIO_PDSR 0x003c
27#define PIO_IER 0x0040
28#define PIO_IDR 0x0044
29#define PIO_IMR 0x0048
30#define PIO_ISR 0x004c
31#define PIO_MDER 0x0050
32#define PIO_MDDR 0x0054
33#define PIO_MDSR 0x0058
34#define PIO_PUDR 0x0060
35#define PIO_PUER 0x0064
36#define PIO_PUSR 0x0068
37#define PIO_ASR 0x0070
38#define PIO_BSR 0x0074
39#define PIO_ABSR 0x0078
40#define PIO_OWER 0x00a0
41#define PIO_OWDR 0x00a4
42#define PIO_OWSR 0x00a8
43
44/* Bitfields in PER */
45
46/* Bitfields in PDR */
47
48/* Bitfields in PSR */
49
50/* Bitfields in OER */
51
52/* Bitfields in ODR */
53
54/* Bitfields in OSR */
55
56/* Bitfields in IFER */
57
58/* Bitfields in IFDR */
59
60/* Bitfields in ISFR */
61
62/* Bitfields in SODR */
63
64/* Bitfields in CODR */
65
66/* Bitfields in ODSR */
67
68/* Bitfields in PDSR */
69
70/* Bitfields in IER */
71
72/* Bitfields in IDR */
73
74/* Bitfields in IMR */
75
76/* Bitfields in ISR */
77
78/* Bitfields in MDER */
79
80/* Bitfields in MDDR */
81
82/* Bitfields in MDSR */
83
84/* Bitfields in PUDR */
85
86/* Bitfields in PUER */
87
88/* Bitfields in PUSR */
89
90/* Bitfields in ASR */
91
92/* Bitfields in BSR */
93
94/* Bitfields in ABSR */
95#define PIO_P0_OFFSET 0
96#define PIO_P0_SIZE 1
97#define PIO_P1_OFFSET 1
98#define PIO_P1_SIZE 1
99#define PIO_P2_OFFSET 2
100#define PIO_P2_SIZE 1
101#define PIO_P3_OFFSET 3
102#define PIO_P3_SIZE 1
103#define PIO_P4_OFFSET 4
104#define PIO_P4_SIZE 1
105#define PIO_P5_OFFSET 5
106#define PIO_P5_SIZE 1
107#define PIO_P6_OFFSET 6
108#define PIO_P6_SIZE 1
109#define PIO_P7_OFFSET 7
110#define PIO_P7_SIZE 1
111#define PIO_P8_OFFSET 8
112#define PIO_P8_SIZE 1
113#define PIO_P9_OFFSET 9
114#define PIO_P9_SIZE 1
115#define PIO_P10_OFFSET 10
116#define PIO_P10_SIZE 1
117#define PIO_P11_OFFSET 11
118#define PIO_P11_SIZE 1
119#define PIO_P12_OFFSET 12
120#define PIO_P12_SIZE 1
121#define PIO_P13_OFFSET 13
122#define PIO_P13_SIZE 1
123#define PIO_P14_OFFSET 14
124#define PIO_P14_SIZE 1
125#define PIO_P15_OFFSET 15
126#define PIO_P15_SIZE 1
127#define PIO_P16_OFFSET 16
128#define PIO_P16_SIZE 1
129#define PIO_P17_OFFSET 17
130#define PIO_P17_SIZE 1
131#define PIO_P18_OFFSET 18
132#define PIO_P18_SIZE 1
133#define PIO_P19_OFFSET 19
134#define PIO_P19_SIZE 1
135#define PIO_P20_OFFSET 20
136#define PIO_P20_SIZE 1
137#define PIO_P21_OFFSET 21
138#define PIO_P21_SIZE 1
139#define PIO_P22_OFFSET 22
140#define PIO_P22_SIZE 1
141#define PIO_P23_OFFSET 23
142#define PIO_P23_SIZE 1
143#define PIO_P24_OFFSET 24
144#define PIO_P24_SIZE 1
145#define PIO_P25_OFFSET 25
146#define PIO_P25_SIZE 1
147#define PIO_P26_OFFSET 26
148#define PIO_P26_SIZE 1
149#define PIO_P27_OFFSET 27
150#define PIO_P27_SIZE 1
151#define PIO_P28_OFFSET 28
152#define PIO_P28_SIZE 1
153#define PIO_P29_OFFSET 29
154#define PIO_P29_SIZE 1
155#define PIO_P30_OFFSET 30
156#define PIO_P30_SIZE 1
157#define PIO_P31_OFFSET 31
158#define PIO_P31_SIZE 1
159
160/* Bitfields in OWER */
161
162/* Bitfields in OWDR */
163
164/* Bitfields in OWSR */
165
166/* Bit manipulation macros */
167#define PIO_BIT(name) (1 << PIO_##name##_OFFSET)
168#define PIO_BF(name,value) (((value) & ((1 << PIO_##name##_SIZE) - 1)) << PIO_##name##_OFFSET)
169#define PIO_BFEXT(name,value) (((value) >> PIO_##name##_OFFSET) & ((1 << PIO_##name##_SIZE) - 1))
170#define PIO_BFINS(name,value,old) (((old) & ~(((1 << PIO_##name##_SIZE) - 1) << PIO_##name##_OFFSET)) | PIO_BF(name,value))
171
172/* Register access macros */
173#define pio_readl(port,reg) readl((port)->regs + PIO_##reg)
174#define pio_writel(port,reg,value) writel((value), (port)->regs + PIO_##reg)
175
176void at32_init_pio(struct platform_device *pdev);
177
178#endif /* __ARCH_AVR32_AT32AP_PIO_H__ */
diff --git a/arch/avr32/mach-at32ap/sm.c b/arch/avr32/mach-at32ap/sm.c
new file mode 100644
index 000000000000..03306eb0345e
--- /dev/null
+++ b/arch/avr32/mach-at32ap/sm.c
@@ -0,0 +1,289 @@
1/*
2 * System Manager driver for AT32AP CPUs
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/errno.h>
12#include <linux/init.h>
13#include <linux/interrupt.h>
14#include <linux/kernel.h>
15#include <linux/platform_device.h>
16#include <linux/random.h>
17#include <linux/spinlock.h>
18
19#include <asm/intc.h>
20#include <asm/io.h>
21#include <asm/irq.h>
22
23#include <asm/arch/sm.h>
24
25#include "sm.h"
26
27#define SM_EIM_IRQ_RESOURCE 1
28#define SM_PM_IRQ_RESOURCE 2
29#define SM_RTC_IRQ_RESOURCE 3
30
31#define to_eim(irqc) container_of(irqc, struct at32_sm, irqc)
32
33struct at32_sm system_manager;
34
35int __init at32_sm_init(void)
36{
37 struct resource *regs;
38 struct at32_sm *sm = &system_manager;
39 int ret = -ENXIO;
40
41 regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0);
42 if (!regs)
43 goto fail;
44
45 spin_lock_init(&sm->lock);
46 sm->pdev = &at32_sm_device;
47
48 ret = -ENOMEM;
49 sm->regs = ioremap(regs->start, regs->end - regs->start + 1);
50 if (!sm->regs)
51 goto fail;
52
53 return 0;
54
55fail:
56 printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret);
57 return ret;
58}
59
60/*
61 * External Interrupt Module (EIM).
62 *
63 * EIM gets level- or edge-triggered interrupts of either polarity
64 * from the outside and converts it to active-high level-triggered
65 * interrupts that the internal interrupt controller can handle. EIM
66 * also provides masking/unmasking of interrupts, as well as
67 * acknowledging of edge-triggered interrupts.
68 */
69
70static irqreturn_t spurious_eim_interrupt(int irq, void *dev_id,
71 struct pt_regs *regs)
72{
73 printk(KERN_WARNING "Spurious EIM interrupt %d\n", irq);
74 disable_irq(irq);
75 return IRQ_NONE;
76}
77
78static struct irqaction eim_spurious_action = {
79 .handler = spurious_eim_interrupt,
80};
81
82static irqreturn_t eim_handle_irq(int irq, void *dev_id, struct pt_regs *regs)
83{
84 struct irq_controller * irqc = dev_id;
85 struct at32_sm *sm = to_eim(irqc);
86 unsigned long pending;
87
88 /*
89 * No need to disable interrupts globally. The interrupt
90 * level relevant to this group must be masked all the time,
91 * so we know that this particular EIM instance will not be
92 * re-entered.
93 */
94 spin_lock(&sm->lock);
95
96 pending = intc_get_pending(sm->irqc.irq_group);
97 if (unlikely(!pending)) {
98 printk(KERN_ERR "EIM (group %u): No interrupts pending!\n",
99 sm->irqc.irq_group);
100 goto unlock;
101 }
102
103 do {
104 struct irqaction *action;
105 unsigned int i;
106
107 i = fls(pending) - 1;
108 pending &= ~(1 << i);
109 action = sm->action[i];
110
111 /* Acknowledge the interrupt */
112 sm_writel(sm, EIM_ICR, 1 << i);
113
114 spin_unlock(&sm->lock);
115
116 if (action->flags & SA_INTERRUPT)
117 local_irq_disable();
118 action->handler(sm->irqc.first_irq + i, action->dev_id, regs);
119 local_irq_enable();
120 spin_lock(&sm->lock);
121 if (action->flags & SA_SAMPLE_RANDOM)
122 add_interrupt_randomness(sm->irqc.first_irq + i);
123 } while (pending);
124
125unlock:
126 spin_unlock(&sm->lock);
127 return IRQ_HANDLED;
128}
129
130static void eim_mask(struct irq_controller *irqc, unsigned int irq)
131{
132 struct at32_sm *sm = to_eim(irqc);
133 unsigned int i;
134
135 i = irq - sm->irqc.first_irq;
136 sm_writel(sm, EIM_IDR, 1 << i);
137}
138
139static void eim_unmask(struct irq_controller *irqc, unsigned int irq)
140{
141 struct at32_sm *sm = to_eim(irqc);
142 unsigned int i;
143
144 i = irq - sm->irqc.first_irq;
145 sm_writel(sm, EIM_IER, 1 << i);
146}
147
148static int eim_setup(struct irq_controller *irqc, unsigned int irq,
149 struct irqaction *action)
150{
151 struct at32_sm *sm = to_eim(irqc);
152 sm->action[irq - sm->irqc.first_irq] = action;
153 /* Acknowledge earlier interrupts */
154 sm_writel(sm, EIM_ICR, (1<<(irq - sm->irqc.first_irq)));
155 eim_unmask(irqc, irq);
156 return 0;
157}
158
159static void eim_free(struct irq_controller *irqc, unsigned int irq,
160 void *dev)
161{
162 struct at32_sm *sm = to_eim(irqc);
163 eim_mask(irqc, irq);
164 sm->action[irq - sm->irqc.first_irq] = &eim_spurious_action;
165}
166
167static int eim_set_type(struct irq_controller *irqc, unsigned int irq,
168 unsigned int type)
169{
170 struct at32_sm *sm = to_eim(irqc);
171 unsigned long flags;
172 u32 value, pattern;
173
174 spin_lock_irqsave(&sm->lock, flags);
175
176 pattern = 1 << (irq - sm->irqc.first_irq);
177
178 value = sm_readl(sm, EIM_MODE);
179 if (type & IRQ_TYPE_LEVEL)
180 value |= pattern;
181 else
182 value &= ~pattern;
183 sm_writel(sm, EIM_MODE, value);
184 value = sm_readl(sm, EIM_EDGE);
185 if (type & IRQ_EDGE_RISING)
186 value |= pattern;
187 else
188 value &= ~pattern;
189 sm_writel(sm, EIM_EDGE, value);
190 value = sm_readl(sm, EIM_LEVEL);
191 if (type & IRQ_LEVEL_HIGH)
192 value |= pattern;
193 else
194 value &= ~pattern;
195 sm_writel(sm, EIM_LEVEL, value);
196
197 spin_unlock_irqrestore(&sm->lock, flags);
198
199 return 0;
200}
201
202static unsigned int eim_get_type(struct irq_controller *irqc,
203 unsigned int irq)
204{
205 struct at32_sm *sm = to_eim(irqc);
206 unsigned long flags;
207 unsigned int type = 0;
208 u32 mode, edge, level, pattern;
209
210 pattern = 1 << (irq - sm->irqc.first_irq);
211
212 spin_lock_irqsave(&sm->lock, flags);
213 mode = sm_readl(sm, EIM_MODE);
214 edge = sm_readl(sm, EIM_EDGE);
215 level = sm_readl(sm, EIM_LEVEL);
216 spin_unlock_irqrestore(&sm->lock, flags);
217
218 if (mode & pattern)
219 type |= IRQ_TYPE_LEVEL;
220 if (edge & pattern)
221 type |= IRQ_EDGE_RISING;
222 if (level & pattern)
223 type |= IRQ_LEVEL_HIGH;
224
225 return type;
226}
227
228static struct irq_controller_class eim_irq_class = {
229 .typename = "EIM",
230 .handle = eim_handle_irq,
231 .setup = eim_setup,
232 .free = eim_free,
233 .mask = eim_mask,
234 .unmask = eim_unmask,
235 .set_type = eim_set_type,
236 .get_type = eim_get_type,
237};
238
239static int __init eim_init(void)
240{
241 struct at32_sm *sm = &system_manager;
242 unsigned int i;
243 u32 pattern;
244 int ret;
245
246 /*
247 * The EIM is really the same module as SM, so register
248 * mapping, etc. has been taken care of already.
249 */
250
251 /*
252 * Find out how many interrupt lines that are actually
253 * implemented in hardware.
254 */
255 sm_writel(sm, EIM_IDR, ~0UL);
256 sm_writel(sm, EIM_MODE, ~0UL);
257 pattern = sm_readl(sm, EIM_MODE);
258 sm->irqc.nr_irqs = fls(pattern);
259
260 ret = -ENOMEM;
261 sm->action = kmalloc(sizeof(*sm->action) * sm->irqc.nr_irqs,
262 GFP_KERNEL);
263 if (!sm->action)
264 goto out;
265
266 for (i = 0; i < sm->irqc.nr_irqs; i++)
267 sm->action[i] = &eim_spurious_action;
268
269 spin_lock_init(&sm->lock);
270 sm->irqc.irq_group = sm->pdev->resource[SM_EIM_IRQ_RESOURCE].start;
271 sm->irqc.class = &eim_irq_class;
272
273 ret = intc_register_controller(&sm->irqc);
274 if (ret < 0)
275 goto out_free_actions;
276
277 printk("EIM: External Interrupt Module at 0x%p, IRQ group %u\n",
278 sm->regs, sm->irqc.irq_group);
279 printk("EIM: Handling %u external IRQs, starting with IRQ%u\n",
280 sm->irqc.nr_irqs, sm->irqc.first_irq);
281
282 return 0;
283
284out_free_actions:
285 kfree(sm->action);
286out:
287 return ret;
288}
289arch_initcall(eim_init);
diff --git a/arch/avr32/mach-at32ap/sm.h b/arch/avr32/mach-at32ap/sm.h
new file mode 100644
index 000000000000..27565822ae2a
--- /dev/null
+++ b/arch/avr32/mach-at32ap/sm.h
@@ -0,0 +1,240 @@
1/*
2 * Register definitions for SM
3 *
4 * System Manager
5 */
6#ifndef __ASM_AVR32_SM_H__
7#define __ASM_AVR32_SM_H__
8
9/* SM register offsets */
10#define SM_PM_MCCTRL 0x0000
11#define SM_PM_CKSEL 0x0004
12#define SM_PM_CPU_MASK 0x0008
13#define SM_PM_HSB_MASK 0x000c
14#define SM_PM_PBA_MASK 0x0010
15#define SM_PM_PBB_MASK 0x0014
16#define SM_PM_PLL0 0x0020
17#define SM_PM_PLL1 0x0024
18#define SM_PM_VCTRL 0x0030
19#define SM_PM_VMREF 0x0034
20#define SM_PM_VMV 0x0038
21#define SM_PM_IER 0x0040
22#define SM_PM_IDR 0x0044
23#define SM_PM_IMR 0x0048
24#define SM_PM_ISR 0x004c
25#define SM_PM_ICR 0x0050
26#define SM_PM_GCCTRL 0x0060
27#define SM_RTC_CTRL 0x0080
28#define SM_RTC_VAL 0x0084
29#define SM_RTC_TOP 0x0088
30#define SM_RTC_IER 0x0090
31#define SM_RTC_IDR 0x0094
32#define SM_RTC_IMR 0x0098
33#define SM_RTC_ISR 0x009c
34#define SM_RTC_ICR 0x00a0
35#define SM_WDT_CTRL 0x00b0
36#define SM_WDT_CLR 0x00b4
37#define SM_WDT_EXT 0x00b8
38#define SM_RC_RCAUSE 0x00c0
39#define SM_EIM_IER 0x0100
40#define SM_EIM_IDR 0x0104
41#define SM_EIM_IMR 0x0108
42#define SM_EIM_ISR 0x010c
43#define SM_EIM_ICR 0x0110
44#define SM_EIM_MODE 0x0114
45#define SM_EIM_EDGE 0x0118
46#define SM_EIM_LEVEL 0x011c
47#define SM_EIM_TEST 0x0120
48#define SM_EIM_NMIC 0x0124
49
50/* Bitfields in PM_MCCTRL */
51
52/* Bitfields in PM_CKSEL */
53#define SM_CPUSEL_OFFSET 0
54#define SM_CPUSEL_SIZE 3
55#define SM_CPUDIV_OFFSET 7
56#define SM_CPUDIV_SIZE 1
57#define SM_HSBSEL_OFFSET 8
58#define SM_HSBSEL_SIZE 3
59#define SM_HSBDIV_OFFSET 15
60#define SM_HSBDIV_SIZE 1
61#define SM_PBASEL_OFFSET 16
62#define SM_PBASEL_SIZE 3
63#define SM_PBADIV_OFFSET 23
64#define SM_PBADIV_SIZE 1
65#define SM_PBBSEL_OFFSET 24
66#define SM_PBBSEL_SIZE 3
67#define SM_PBBDIV_OFFSET 31
68#define SM_PBBDIV_SIZE 1
69
70/* Bitfields in PM_CPU_MASK */
71
72/* Bitfields in PM_HSB_MASK */
73
74/* Bitfields in PM_PBA_MASK */
75
76/* Bitfields in PM_PBB_MASK */
77
78/* Bitfields in PM_PLL0 */
79#define SM_PLLEN_OFFSET 0
80#define SM_PLLEN_SIZE 1
81#define SM_PLLOSC_OFFSET 1
82#define SM_PLLOSC_SIZE 1
83#define SM_PLLOPT_OFFSET 2
84#define SM_PLLOPT_SIZE 3
85#define SM_PLLDIV_OFFSET 8
86#define SM_PLLDIV_SIZE 8
87#define SM_PLLMUL_OFFSET 16
88#define SM_PLLMUL_SIZE 8
89#define SM_PLLCOUNT_OFFSET 24
90#define SM_PLLCOUNT_SIZE 6
91#define SM_PLLTEST_OFFSET 31
92#define SM_PLLTEST_SIZE 1
93
94/* Bitfields in PM_PLL1 */
95
96/* Bitfields in PM_VCTRL */
97#define SM_VAUTO_OFFSET 0
98#define SM_VAUTO_SIZE 1
99#define SM_PM_VCTRL_VAL_OFFSET 8
100#define SM_PM_VCTRL_VAL_SIZE 7
101
102/* Bitfields in PM_VMREF */
103#define SM_REFSEL_OFFSET 0
104#define SM_REFSEL_SIZE 4
105
106/* Bitfields in PM_VMV */
107#define SM_PM_VMV_VAL_OFFSET 0
108#define SM_PM_VMV_VAL_SIZE 8
109
110/* Bitfields in PM_IER */
111
112/* Bitfields in PM_IDR */
113
114/* Bitfields in PM_IMR */
115
116/* Bitfields in PM_ISR */
117
118/* Bitfields in PM_ICR */
119#define SM_LOCK0_OFFSET 0
120#define SM_LOCK0_SIZE 1
121#define SM_LOCK1_OFFSET 1
122#define SM_LOCK1_SIZE 1
123#define SM_WAKE_OFFSET 2
124#define SM_WAKE_SIZE 1
125#define SM_VOK_OFFSET 3
126#define SM_VOK_SIZE 1
127#define SM_VMRDY_OFFSET 4
128#define SM_VMRDY_SIZE 1
129#define SM_CKRDY_OFFSET 5
130#define SM_CKRDY_SIZE 1
131
132/* Bitfields in PM_GCCTRL */
133#define SM_OSCSEL_OFFSET 0
134#define SM_OSCSEL_SIZE 1
135#define SM_PLLSEL_OFFSET 1
136#define SM_PLLSEL_SIZE 1
137#define SM_CEN_OFFSET 2
138#define SM_CEN_SIZE 1
139#define SM_CPC_OFFSET 3
140#define SM_CPC_SIZE 1
141#define SM_DIVEN_OFFSET 4
142#define SM_DIVEN_SIZE 1
143#define SM_DIV_OFFSET 8
144#define SM_DIV_SIZE 8
145
146/* Bitfields in RTC_CTRL */
147#define SM_PCLR_OFFSET 1
148#define SM_PCLR_SIZE 1
149#define SM_TOPEN_OFFSET 2
150#define SM_TOPEN_SIZE 1
151#define SM_CLKEN_OFFSET 3
152#define SM_CLKEN_SIZE 1
153#define SM_PSEL_OFFSET 8
154#define SM_PSEL_SIZE 16
155
156/* Bitfields in RTC_VAL */
157#define SM_RTC_VAL_VAL_OFFSET 0
158#define SM_RTC_VAL_VAL_SIZE 31
159
160/* Bitfields in RTC_TOP */
161#define SM_RTC_TOP_VAL_OFFSET 0
162#define SM_RTC_TOP_VAL_SIZE 32
163
164/* Bitfields in RTC_IER */
165
166/* Bitfields in RTC_IDR */
167
168/* Bitfields in RTC_IMR */
169
170/* Bitfields in RTC_ISR */
171
172/* Bitfields in RTC_ICR */
173#define SM_TOPI_OFFSET 0
174#define SM_TOPI_SIZE 1
175
176/* Bitfields in WDT_CTRL */
177#define SM_KEY_OFFSET 24
178#define SM_KEY_SIZE 8
179
180/* Bitfields in WDT_CLR */
181
182/* Bitfields in WDT_EXT */
183
184/* Bitfields in RC_RCAUSE */
185#define SM_POR_OFFSET 0
186#define SM_POR_SIZE 1
187#define SM_BOD_OFFSET 1
188#define SM_BOD_SIZE 1
189#define SM_EXT_OFFSET 2
190#define SM_EXT_SIZE 1
191#define SM_WDT_OFFSET 3
192#define SM_WDT_SIZE 1
193#define SM_NTAE_OFFSET 4
194#define SM_NTAE_SIZE 1
195#define SM_SERP_OFFSET 5
196#define SM_SERP_SIZE 1
197
198/* Bitfields in EIM_IER */
199
200/* Bitfields in EIM_IDR */
201
202/* Bitfields in EIM_IMR */
203
204/* Bitfields in EIM_ISR */
205
206/* Bitfields in EIM_ICR */
207
208/* Bitfields in EIM_MODE */
209
210/* Bitfields in EIM_EDGE */
211#define SM_INT0_OFFSET 0
212#define SM_INT0_SIZE 1
213#define SM_INT1_OFFSET 1
214#define SM_INT1_SIZE 1
215#define SM_INT2_OFFSET 2
216#define SM_INT2_SIZE 1
217#define SM_INT3_OFFSET 3
218#define SM_INT3_SIZE 1
219
220/* Bitfields in EIM_LEVEL */
221
222/* Bitfields in EIM_TEST */
223#define SM_TESTEN_OFFSET 31
224#define SM_TESTEN_SIZE 1
225
226/* Bitfields in EIM_NMIC */
227#define SM_EN_OFFSET 0
228#define SM_EN_SIZE 1
229
230/* Bit manipulation macros */
231#define SM_BIT(name) (1 << SM_##name##_OFFSET)
232#define SM_BF(name,value) (((value) & ((1 << SM_##name##_SIZE) - 1)) << SM_##name##_OFFSET)
233#define SM_BFEXT(name,value) (((value) >> SM_##name##_OFFSET) & ((1 << SM_##name##_SIZE) - 1))
234#define SM_BFINS(name,value,old) (((old) & ~(((1 << SM_##name##_SIZE) - 1) << SM_##name##_OFFSET)) | SM_BF(name,value))
235
236/* Register access macros */
237#define sm_readl(port,reg) readl((port)->regs + SM_##reg)
238#define sm_writel(port,reg,value) writel((value), (port)->regs + SM_##reg)
239
240#endif /* __ASM_AVR32_SM_H__ */
diff --git a/arch/avr32/mm/Makefile b/arch/avr32/mm/Makefile
new file mode 100644
index 000000000000..0066491f90d4
--- /dev/null
+++ b/arch/avr32/mm/Makefile
@@ -0,0 +1,6 @@
1#
2# Makefile for the Linux/AVR32 kernel.
3#
4
5obj-y += init.o clear_page.o copy_page.o dma-coherent.o
6obj-y += ioremap.o cache.o fault.o tlb.o
diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c
new file mode 100644
index 000000000000..450515b245a0
--- /dev/null
+++ b/arch/avr32/mm/cache.c
@@ -0,0 +1,150 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/highmem.h>
10#include <linux/unistd.h>
11
12#include <asm/cacheflush.h>
13#include <asm/cachectl.h>
14#include <asm/processor.h>
15#include <asm/uaccess.h>
16
17/*
18 * If you attempt to flush anything more than this, you need superuser
19 * privileges. The value is completely arbitrary.
20 */
21#define CACHEFLUSH_MAX_LEN 1024
22
23void invalidate_dcache_region(void *start, size_t size)
24{
25 unsigned long v, begin, end, linesz;
26
27 linesz = boot_cpu_data.dcache.linesz;
28
29 //printk("invalidate dcache: %p + %u\n", start, size);
30
31 /* You asked for it, you got it */
32 begin = (unsigned long)start & ~(linesz - 1);
33 end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
34
35 for (v = begin; v < end; v += linesz)
36 invalidate_dcache_line((void *)v);
37}
38
39void clean_dcache_region(void *start, size_t size)
40{
41 unsigned long v, begin, end, linesz;
42
43 linesz = boot_cpu_data.dcache.linesz;
44 begin = (unsigned long)start & ~(linesz - 1);
45 end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
46
47 for (v = begin; v < end; v += linesz)
48 clean_dcache_line((void *)v);
49 flush_write_buffer();
50}
51
52void flush_dcache_region(void *start, size_t size)
53{
54 unsigned long v, begin, end, linesz;
55
56 linesz = boot_cpu_data.dcache.linesz;
57 begin = (unsigned long)start & ~(linesz - 1);
58 end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
59
60 for (v = begin; v < end; v += linesz)
61 flush_dcache_line((void *)v);
62 flush_write_buffer();
63}
64
65void invalidate_icache_region(void *start, size_t size)
66{
67 unsigned long v, begin, end, linesz;
68
69 linesz = boot_cpu_data.icache.linesz;
70 begin = (unsigned long)start & ~(linesz - 1);
71 end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
72
73 for (v = begin; v < end; v += linesz)
74 invalidate_icache_line((void *)v);
75}
76
77static inline void __flush_icache_range(unsigned long start, unsigned long end)
78{
79 unsigned long v, linesz;
80
81 linesz = boot_cpu_data.dcache.linesz;
82 for (v = start; v < end; v += linesz) {
83 clean_dcache_line((void *)v);
84 invalidate_icache_line((void *)v);
85 }
86
87 flush_write_buffer();
88}
89
90/*
91 * This one is called after a module has been loaded.
92 */
93void flush_icache_range(unsigned long start, unsigned long end)
94{
95 unsigned long linesz;
96
97 linesz = boot_cpu_data.dcache.linesz;
98 __flush_icache_range(start & ~(linesz - 1),
99 (end + linesz - 1) & ~(linesz - 1));
100}
101
102/*
103 * This one is called from do_no_page(), do_swap_page() and install_page().
104 */
105void flush_icache_page(struct vm_area_struct *vma, struct page *page)
106{
107 if (vma->vm_flags & VM_EXEC) {
108 void *v = kmap(page);
109 __flush_icache_range((unsigned long)v, (unsigned long)v + PAGE_SIZE);
110 kunmap(v);
111 }
112}
113
114/*
115 * This one is used by copy_to_user_page()
116 */
117void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
118 unsigned long addr, int len)
119{
120 if (vma->vm_flags & VM_EXEC)
121 flush_icache_range(addr, addr + len);
122}
123
124asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len)
125{
126 int ret;
127
128 if (len > CACHEFLUSH_MAX_LEN) {
129 ret = -EPERM;
130 if (!capable(CAP_SYS_ADMIN))
131 goto out;
132 }
133
134 ret = -EFAULT;
135 if (!access_ok(VERIFY_WRITE, addr, len))
136 goto out;
137
138 switch (operation) {
139 case CACHE_IFLUSH:
140 flush_icache_range((unsigned long)addr,
141 (unsigned long)addr + len);
142 ret = 0;
143 break;
144 default:
145 ret = -EINVAL;
146 }
147
148out:
149 return ret;
150}
diff --git a/arch/avr32/mm/clear_page.S b/arch/avr32/mm/clear_page.S
new file mode 100644
index 000000000000..5d70dca00699
--- /dev/null
+++ b/arch/avr32/mm/clear_page.S
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/linkage.h>
10#include <asm/page.h>
11
12/*
13 * clear_page
14 * r12: P1 address (to)
15 */
16 .text
17 .global clear_page
18clear_page:
19 sub r9, r12, -PAGE_SIZE
20 mov r10, 0
21 mov r11, 0
220: st.d r12++, r10
23 cp r12, r9
24 brne 0b
25 mov pc, lr
diff --git a/arch/avr32/mm/copy_page.S b/arch/avr32/mm/copy_page.S
new file mode 100644
index 000000000000..c2b3752946b8
--- /dev/null
+++ b/arch/avr32/mm/copy_page.S
@@ -0,0 +1,28 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/linkage.h>
9#include <asm/page.h>
10
11/*
12 * copy_page
13 *
14 * r12 to (P1 address)
15 * r11 from (P1 address)
16 * r8-r10 scratch
17 */
18 .text
19 .global copy_page
20copy_page:
21 sub r10, r11, -(1 << PAGE_SHIFT)
22 /* pref r11[0] */
231: /* pref r11[8] */
24 ld.d r8, r11++
25 st.d r12++, r8
26 cp r11, r10
27 brlo 1b
28 mov pc, lr
diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c
new file mode 100644
index 000000000000..44ab8a7bdae2
--- /dev/null
+++ b/arch/avr32/mm/dma-coherent.c
@@ -0,0 +1,139 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/dma-mapping.h>
10
11#include <asm/addrspace.h>
12#include <asm/cacheflush.h>
13
14void dma_cache_sync(void *vaddr, size_t size, int direction)
15{
16 /*
17 * No need to sync an uncached area
18 */
19 if (PXSEG(vaddr) == P2SEG)
20 return;
21
22 switch (direction) {
23 case DMA_FROM_DEVICE: /* invalidate only */
24 dma_cache_inv(vaddr, size);
25 break;
26 case DMA_TO_DEVICE: /* writeback only */
27 dma_cache_wback(vaddr, size);
28 break;
29 case DMA_BIDIRECTIONAL: /* writeback and invalidate */
30 dma_cache_wback_inv(vaddr, size);
31 break;
32 default:
33 BUG();
34 }
35}
36EXPORT_SYMBOL(dma_cache_sync);
37
38static struct page *__dma_alloc(struct device *dev, size_t size,
39 dma_addr_t *handle, gfp_t gfp)
40{
41 struct page *page, *free, *end;
42 int order;
43
44 size = PAGE_ALIGN(size);
45 order = get_order(size);
46
47 page = alloc_pages(gfp, order);
48 if (!page)
49 return NULL;
50 split_page(page, order);
51
52 /*
53 * When accessing physical memory with valid cache data, we
54 * get a cache hit even if the virtual memory region is marked
55 * as uncached.
56 *
57 * Since the memory is newly allocated, there is no point in
58 * doing a writeback. If the previous owner cares, he should
59 * have flushed the cache before releasing the memory.
60 */
61 invalidate_dcache_region(phys_to_virt(page_to_phys(page)), size);
62
63 *handle = page_to_bus(page);
64 free = page + (size >> PAGE_SHIFT);
65 end = page + (1 << order);
66
67 /*
68 * Free any unused pages
69 */
70 while (free < end) {
71 __free_page(free);
72 free++;
73 }
74
75 return page;
76}
77
78static void __dma_free(struct device *dev, size_t size,
79 struct page *page, dma_addr_t handle)
80{
81 struct page *end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT);
82
83 while (page < end)
84 __free_page(page++);
85}
86
87void *dma_alloc_coherent(struct device *dev, size_t size,
88 dma_addr_t *handle, gfp_t gfp)
89{
90 struct page *page;
91 void *ret = NULL;
92
93 page = __dma_alloc(dev, size, handle, gfp);
94 if (page)
95 ret = phys_to_uncached(page_to_phys(page));
96
97 return ret;
98}
99EXPORT_SYMBOL(dma_alloc_coherent);
100
101void dma_free_coherent(struct device *dev, size_t size,
102 void *cpu_addr, dma_addr_t handle)
103{
104 void *addr = phys_to_cached(uncached_to_phys(cpu_addr));
105 struct page *page;
106
107 pr_debug("dma_free_coherent addr %p (phys %08lx) size %u\n",
108 cpu_addr, (unsigned long)handle, (unsigned)size);
109 BUG_ON(!virt_addr_valid(addr));
110 page = virt_to_page(addr);
111 __dma_free(dev, size, page, handle);
112}
113EXPORT_SYMBOL(dma_free_coherent);
114
115#if 0
116void *dma_alloc_writecombine(struct device *dev, size_t size,
117 dma_addr_t *handle, gfp_t gfp)
118{
119 struct page *page;
120
121 page = __dma_alloc(dev, size, handle, gfp);
122
123 /* Now, map the page into P3 with write-combining turned on */
124 return __ioremap(page_to_phys(page), size, _PAGE_BUFFER);
125}
126EXPORT_SYMBOL(dma_alloc_writecombine);
127
128void dma_free_writecombine(struct device *dev, size_t size,
129 void *cpu_addr, dma_addr_t handle)
130{
131 struct page *page;
132
133 iounmap(cpu_addr);
134
135 page = bus_to_page(handle);
136 __dma_free(dev, size, page, handle);
137}
138EXPORT_SYMBOL(dma_free_writecombine);
139#endif
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
new file mode 100644
index 000000000000..678557260a35
--- /dev/null
+++ b/arch/avr32/mm/fault.c
@@ -0,0 +1,315 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * Based on linux/arch/sh/mm/fault.c:
5 * Copyright (C) 1999 Niibe Yutaka
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/pagemap.h>
15
16#include <asm/kdebug.h>
17#include <asm/mmu_context.h>
18#include <asm/sysreg.h>
19#include <asm/uaccess.h>
20#include <asm/tlb.h>
21
22#ifdef DEBUG
23static void dump_code(unsigned long pc)
24{
25 char *p = (char *)pc;
26 char val;
27 int i;
28
29
30 printk(KERN_DEBUG "Code:");
31 for (i = 0; i < 16; i++) {
32 if (__get_user(val, p + i))
33 break;
34 printk(" %02x", val);
35 }
36 printk("\n");
37}
38#endif
39
40#ifdef CONFIG_KPROBES
41ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
42
43/* Hook to register for page fault notifications */
44int register_page_fault_notifier(struct notifier_block *nb)
45{
46 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
47}
48
49int unregister_page_fault_notifier(struct notifier_block *nb)
50{
51 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
52}
53
54static inline int notify_page_fault(enum die_val val, struct pt_regs *regs,
55 int trap, int sig)
56{
57 struct die_args args = {
58 .regs = regs,
59 .trapnr = trap,
60 };
61 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
62}
63#else
64static inline int notify_page_fault(enum die_val val, struct pt_regs *regs,
65 int trap, int sig)
66{
67 return NOTIFY_DONE;
68}
69#endif
70
71/*
72 * This routine handles page faults. It determines the address and the
73 * problem, and then passes it off to one of the appropriate routines.
74 *
75 * ecr is the Exception Cause Register. Possible values are:
76 * 5: Page not found (instruction access)
77 * 6: Protection fault (instruction access)
78 * 12: Page not found (read access)
79 * 13: Page not found (write access)
80 * 14: Protection fault (read access)
81 * 15: Protection fault (write access)
82 */
83asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
84{
85 struct task_struct *tsk;
86 struct mm_struct *mm;
87 struct vm_area_struct *vma;
88 const struct exception_table_entry *fixup;
89 unsigned long address;
90 unsigned long page;
91 int writeaccess = 0;
92
93 if (notify_page_fault(DIE_PAGE_FAULT, regs,
94 ecr, SIGSEGV) == NOTIFY_STOP)
95 return;
96
97 address = sysreg_read(TLBEAR);
98
99 tsk = current;
100 mm = tsk->mm;
101
102 /*
103 * If we're in an interrupt or have no user context, we must
104 * not take the fault...
105 */
106 if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
107 goto no_context;
108
109 local_irq_enable();
110
111 down_read(&mm->mmap_sem);
112
113 vma = find_vma(mm, address);
114 if (!vma)
115 goto bad_area;
116 if (vma->vm_start <= address)
117 goto good_area;
118 if (!(vma->vm_flags & VM_GROWSDOWN))
119 goto bad_area;
120 if (expand_stack(vma, address))
121 goto bad_area;
122
123 /*
124 * Ok, we have a good vm_area for this memory access, so we
125 * can handle it...
126 */
127good_area:
128 //pr_debug("good area: vm_flags = 0x%lx\n", vma->vm_flags);
129 switch (ecr) {
130 case ECR_PROTECTION_X:
131 case ECR_TLB_MISS_X:
132 if (!(vma->vm_flags & VM_EXEC))
133 goto bad_area;
134 break;
135 case ECR_PROTECTION_R:
136 case ECR_TLB_MISS_R:
137 if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
138 goto bad_area;
139 break;
140 case ECR_PROTECTION_W:
141 case ECR_TLB_MISS_W:
142 if (!(vma->vm_flags & VM_WRITE))
143 goto bad_area;
144 writeaccess = 1;
145 break;
146 default:
147 panic("Unhandled case %lu in do_page_fault!", ecr);
148 }
149
150 /*
151 * If for any reason at all we couldn't handle the fault, make
152 * sure we exit gracefully rather than endlessly redo the
153 * fault.
154 */
155survive:
156 switch (handle_mm_fault(mm, vma, address, writeaccess)) {
157 case VM_FAULT_MINOR:
158 tsk->min_flt++;
159 break;
160 case VM_FAULT_MAJOR:
161 tsk->maj_flt++;
162 break;
163 case VM_FAULT_SIGBUS:
164 goto do_sigbus;
165 case VM_FAULT_OOM:
166 goto out_of_memory;
167 default:
168 BUG();
169 }
170
171 up_read(&mm->mmap_sem);
172 return;
173
174 /*
175 * Something tried to access memory that isn't in our memory
176 * map. Fix it, but check if it's kernel or user first...
177 */
178bad_area:
179 pr_debug("Bad area [%s:%u]: addr %08lx, ecr %lu\n",
180 tsk->comm, tsk->pid, address, ecr);
181
182 up_read(&mm->mmap_sem);
183
184 if (user_mode(regs)) {
185 /* Hmm...we have to pass address and ecr somehow... */
186 /* tsk->thread.address = address;
187 tsk->thread.error_code = ecr; */
188#ifdef DEBUG
189 show_regs(regs);
190 dump_code(regs->pc);
191
192 page = sysreg_read(PTBR);
193 printk("ptbr = %08lx", page);
194 if (page) {
195 page = ((unsigned long *)page)[address >> 22];
196 printk(" pgd = %08lx", page);
197 if (page & _PAGE_PRESENT) {
198 page &= PAGE_MASK;
199 address &= 0x003ff000;
200 page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
201 printk(" pte = %08lx\n", page);
202 }
203 }
204#endif
205 pr_debug("Sending SIGSEGV to PID %d...\n",
206 tsk->pid);
207 force_sig(SIGSEGV, tsk);
208 return;
209 }
210
211no_context:
212 pr_debug("No context\n");
213
214 /* Are we prepared to handle this kernel fault? */
215 fixup = search_exception_tables(regs->pc);
216 if (fixup) {
217 regs->pc = fixup->fixup;
218 pr_debug("Found fixup at %08lx\n", fixup->fixup);
219 return;
220 }
221
222 /*
223 * Oops. The kernel tried to access some bad page. We'll have
224 * to terminate things with extreme prejudice.
225 */
226 if (address < PAGE_SIZE)
227 printk(KERN_ALERT
228 "Unable to handle kernel NULL pointer dereference");
229 else
230 printk(KERN_ALERT
231 "Unable to handle kernel paging request");
232 printk(" at virtual address %08lx\n", address);
233 printk(KERN_ALERT "pc = %08lx\n", regs->pc);
234
235 page = sysreg_read(PTBR);
236 printk(KERN_ALERT "ptbr = %08lx", page);
237 if (page) {
238 page = ((unsigned long *)page)[address >> 22];
239 printk(" pgd = %08lx", page);
240 if (page & _PAGE_PRESENT) {
241 page &= PAGE_MASK;
242 address &= 0x003ff000;
243 page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
244 printk(" pte = %08lx\n", page);
245 }
246 }
247 die("\nOops", regs, ecr);
248 do_exit(SIGKILL);
249
250 /*
251 * We ran out of memory, or some other thing happened to us
252 * that made us unable to handle the page fault gracefully.
253 */
254out_of_memory:
255 printk("Out of memory\n");
256 up_read(&mm->mmap_sem);
257 if (current->pid == 1) {
258 yield();
259 down_read(&mm->mmap_sem);
260 goto survive;
261 }
262 printk("VM: Killing process %s\n", tsk->comm);
263 if (user_mode(regs))
264 do_exit(SIGKILL);
265 goto no_context;
266
267do_sigbus:
268 up_read(&mm->mmap_sem);
269
270 /*
271 * Send a sigbus, regardless of whether we were in kernel or
272 * user mode.
273 */
274 /* address, error_code, trap_no, ... */
275#ifdef DEBUG
276 show_regs(regs);
277 dump_code(regs->pc);
278#endif
279 pr_debug("Sending SIGBUS to PID %d...\n", tsk->pid);
280 force_sig(SIGBUS, tsk);
281
282 /* Kernel mode? Handle exceptions or die */
283 if (!user_mode(regs))
284 goto no_context;
285}
286
287asmlinkage void do_bus_error(unsigned long addr, int write_access,
288 struct pt_regs *regs)
289{
290 printk(KERN_ALERT
291 "Bus error at physical address 0x%08lx (%s access)\n",
292 addr, write_access ? "write" : "read");
293 printk(KERN_INFO "DTLB dump:\n");
294 dump_dtlb();
295 die("Bus Error", regs, write_access);
296 do_exit(SIGKILL);
297}
298
299/*
300 * This functionality is currently not possible to implement because
301 * we're using segmentation to ensure a fixed mapping of the kernel
302 * virtual address space.
303 *
304 * It would be possible to implement this, but it would require us to
305 * disable segmentation at startup and load the kernel mappings into
306 * the TLB like any other pages. There will be lots of trickery to
307 * avoid recursive invocation of the TLB miss handler, though...
308 */
309#ifdef CONFIG_DEBUG_PAGEALLOC
310void kernel_map_pages(struct page *page, int numpages, int enable)
311{
312
313}
314EXPORT_SYMBOL(kernel_map_pages);
315#endif
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
new file mode 100644
index 000000000000..3e6c41039808
--- /dev/null
+++ b/arch/avr32/mm/init.c
@@ -0,0 +1,480 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/kernel.h>
10#include <linux/mm.h>
11#include <linux/swap.h>
12#include <linux/init.h>
13#include <linux/initrd.h>
14#include <linux/mmzone.h>
15#include <linux/bootmem.h>
16#include <linux/pagemap.h>
17#include <linux/pfn.h>
18#include <linux/nodemask.h>
19
20#include <asm/page.h>
21#include <asm/mmu_context.h>
22#include <asm/tlb.h>
23#include <asm/io.h>
24#include <asm/dma.h>
25#include <asm/setup.h>
26#include <asm/sections.h>
27
28DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
29
30pgd_t swapper_pg_dir[PTRS_PER_PGD];
31
32struct page *empty_zero_page;
33
34/*
35 * Cache of MMU context last used.
36 */
37unsigned long mmu_context_cache = NO_CONTEXT;
38
39#define START_PFN (NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT)
40#define MAX_LOW_PFN (NODE_DATA(0)->bdata->node_low_pfn)
41
42void show_mem(void)
43{
44 int total = 0, reserved = 0, cached = 0;
45 int slab = 0, free = 0, shared = 0;
46 pg_data_t *pgdat;
47
48 printk("Mem-info:\n");
49 show_free_areas();
50
51 for_each_online_pgdat(pgdat) {
52 struct page *page, *end;
53
54 page = pgdat->node_mem_map;
55 end = page + pgdat->node_spanned_pages;
56
57 do {
58 total++;
59 if (PageReserved(page))
60 reserved++;
61 else if (PageSwapCache(page))
62 cached++;
63 else if (PageSlab(page))
64 slab++;
65 else if (!page_count(page))
66 free++;
67 else
68 shared += page_count(page) - 1;
69 page++;
70 } while (page < end);
71 }
72
73 printk ("%d pages of RAM\n", total);
74 printk ("%d free pages\n", free);
75 printk ("%d reserved pages\n", reserved);
76 printk ("%d slab pages\n", slab);
77 printk ("%d pages shared\n", shared);
78 printk ("%d pages swap cached\n", cached);
79}
80
81static void __init print_memory_map(const char *what,
82 struct tag_mem_range *mem)
83{
84 printk ("%s:\n", what);
85 for (; mem; mem = mem->next) {
86 printk (" %08lx - %08lx\n",
87 (unsigned long)mem->addr,
88 (unsigned long)(mem->addr + mem->size));
89 }
90}
91
92#define MAX_LOWMEM HIGHMEM_START
93#define MAX_LOWMEM_PFN PFN_DOWN(MAX_LOWMEM)
94
95/*
96 * Sort a list of memory regions in-place by ascending address.
97 *
98 * We're using bubble sort because we only have singly linked lists
99 * with few elements.
100 */
101static void __init sort_mem_list(struct tag_mem_range **pmem)
102{
103 int done;
104 struct tag_mem_range **a, **b;
105
106 if (!*pmem)
107 return;
108
109 do {
110 done = 1;
111 a = pmem, b = &(*pmem)->next;
112 while (*b) {
113 if ((*a)->addr > (*b)->addr) {
114 struct tag_mem_range *tmp;
115 tmp = (*b)->next;
116 (*b)->next = *a;
117 *a = *b;
118 *b = tmp;
119 done = 0;
120 }
121 a = &(*a)->next;
122 b = &(*a)->next;
123 }
124 } while (!done);
125}
126
127/*
128 * Find a free memory region large enough for storing the
129 * bootmem bitmap.
130 */
131static unsigned long __init
132find_bootmap_pfn(const struct tag_mem_range *mem)
133{
134 unsigned long bootmap_pages, bootmap_len;
135 unsigned long node_pages = PFN_UP(mem->size);
136 unsigned long bootmap_addr = mem->addr;
137 struct tag_mem_range *reserved = mem_reserved;
138 struct tag_mem_range *ramdisk = mem_ramdisk;
139 unsigned long kern_start = virt_to_phys(_stext);
140 unsigned long kern_end = virt_to_phys(_end);
141
142 bootmap_pages = bootmem_bootmap_pages(node_pages);
143 bootmap_len = bootmap_pages << PAGE_SHIFT;
144
145 /*
146 * Find a large enough region without reserved pages for
147 * storing the bootmem bitmap. We can take advantage of the
148 * fact that all lists have been sorted.
149 *
150 * We have to check explicitly reserved regions as well as the
151 * kernel image and any RAMDISK images...
152 *
153 * Oh, and we have to make sure we don't overwrite the taglist
154 * since we're going to use it until the bootmem allocator is
155 * fully up and running.
156 */
157 while (1) {
158 if ((bootmap_addr < kern_end) &&
159 ((bootmap_addr + bootmap_len) > kern_start))
160 bootmap_addr = kern_end;
161
162 while (reserved &&
163 (bootmap_addr >= (reserved->addr + reserved->size)))
164 reserved = reserved->next;
165
166 if (reserved &&
167 ((bootmap_addr + bootmap_len) >= reserved->addr)) {
168 bootmap_addr = reserved->addr + reserved->size;
169 continue;
170 }
171
172 while (ramdisk &&
173 (bootmap_addr >= (ramdisk->addr + ramdisk->size)))
174 ramdisk = ramdisk->next;
175
176 if (!ramdisk ||
177 ((bootmap_addr + bootmap_len) < ramdisk->addr))
178 break;
179
180 bootmap_addr = ramdisk->addr + ramdisk->size;
181 }
182
183 if ((PFN_UP(bootmap_addr) + bootmap_len) >= (mem->addr + mem->size))
184 return ~0UL;
185
186 return PFN_UP(bootmap_addr);
187}
188
189void __init setup_bootmem(void)
190{
191 unsigned bootmap_size;
192 unsigned long first_pfn, bootmap_pfn, pages;
193 unsigned long max_pfn, max_low_pfn;
194 unsigned long kern_start = virt_to_phys(_stext);
195 unsigned long kern_end = virt_to_phys(_end);
196 unsigned node = 0;
197 struct tag_mem_range *bank, *res;
198
199 sort_mem_list(&mem_phys);
200 sort_mem_list(&mem_reserved);
201
202 print_memory_map("Physical memory", mem_phys);
203 print_memory_map("Reserved memory", mem_reserved);
204
205 nodes_clear(node_online_map);
206
207 if (mem_ramdisk) {
208#ifdef CONFIG_BLK_DEV_INITRD
209 initrd_start = __va(mem_ramdisk->addr);
210 initrd_end = initrd_start + mem_ramdisk->size;
211
212 print_memory_map("RAMDISK images", mem_ramdisk);
213 if (mem_ramdisk->next)
214 printk(KERN_WARNING
215 "Warning: Only the first RAMDISK image "
216 "will be used\n");
217 sort_mem_list(&mem_ramdisk);
218#else
219 printk(KERN_WARNING "RAM disk image present, but "
220 "no initrd support in kernel!\n");
221#endif
222 }
223
224 if (mem_phys->next)
225 printk(KERN_WARNING "Only using first memory bank\n");
226
227 for (bank = mem_phys; bank; bank = NULL) {
228 first_pfn = PFN_UP(bank->addr);
229 max_low_pfn = max_pfn = PFN_DOWN(bank->addr + bank->size);
230 bootmap_pfn = find_bootmap_pfn(bank);
231 if (bootmap_pfn > max_pfn)
232 panic("No space for bootmem bitmap!\n");
233
234 if (max_low_pfn > MAX_LOWMEM_PFN) {
235 max_low_pfn = MAX_LOWMEM_PFN;
236#ifndef CONFIG_HIGHMEM
237 /*
238 * Lowmem is memory that can be addressed
239 * directly through P1/P2
240 */
241 printk(KERN_WARNING
242 "Node %u: Only %ld MiB of memory will be used.\n",
243 node, MAX_LOWMEM >> 20);
244 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
245#else
246#error HIGHMEM is not supported by AVR32 yet
247#endif
248 }
249
250 /* Initialize the boot-time allocator with low memory only. */
251 bootmap_size = init_bootmem_node(NODE_DATA(node), bootmap_pfn,
252 first_pfn, max_low_pfn);
253
254 printk("Node %u: bdata = %p, bdata->node_bootmem_map = %p\n",
255 node, NODE_DATA(node)->bdata,
256 NODE_DATA(node)->bdata->node_bootmem_map);
257
258 /*
259 * Register fully available RAM pages with the bootmem
260 * allocator.
261 */
262 pages = max_low_pfn - first_pfn;
263 free_bootmem_node (NODE_DATA(node), PFN_PHYS(first_pfn),
264 PFN_PHYS(pages));
265
266 /*
267 * Reserve space for the kernel image (if present in
268 * this node)...
269 */
270 if ((kern_start >= PFN_PHYS(first_pfn)) &&
271 (kern_start < PFN_PHYS(max_pfn))) {
272 printk("Node %u: Kernel image %08lx - %08lx\n",
273 node, kern_start, kern_end);
274 reserve_bootmem_node(NODE_DATA(node), kern_start,
275 kern_end - kern_start);
276 }
277
278 /* ...the bootmem bitmap... */
279 reserve_bootmem_node(NODE_DATA(node),
280 PFN_PHYS(bootmap_pfn),
281 bootmap_size);
282
283 /* ...any RAMDISK images... */
284 for (res = mem_ramdisk; res; res = res->next) {
285 if (res->addr > PFN_PHYS(max_pfn))
286 break;
287
288 if (res->addr >= PFN_PHYS(first_pfn)) {
289 printk("Node %u: RAMDISK %08lx - %08lx\n",
290 node,
291 (unsigned long)res->addr,
292 (unsigned long)(res->addr + res->size));
293 reserve_bootmem_node(NODE_DATA(node),
294 res->addr, res->size);
295 }
296 }
297
298 /* ...and any other reserved regions. */
299 for (res = mem_reserved; res; res = res->next) {
300 if (res->addr > PFN_PHYS(max_pfn))
301 break;
302
303 if (res->addr >= PFN_PHYS(first_pfn)) {
304 printk("Node %u: Reserved %08lx - %08lx\n",
305 node,
306 (unsigned long)res->addr,
307 (unsigned long)(res->addr + res->size));
308 reserve_bootmem_node(NODE_DATA(node),
309 res->addr, res->size);
310 }
311 }
312
313 node_set_online(node);
314 }
315}
316
317/*
318 * paging_init() sets up the page tables
319 *
320 * This routine also unmaps the page at virtual kernel address 0, so
321 * that we can trap those pesky NULL-reference errors in the kernel.
322 */
323void __init paging_init(void)
324{
325 extern unsigned long _evba;
326 void *zero_page;
327 int nid;
328
329 /*
330 * Make sure we can handle exceptions before enabling
331 * paging. Not that we should ever _get_ any exceptions this
332 * early, but you never know...
333 */
334 printk("Exception vectors start at %p\n", &_evba);
335 sysreg_write(EVBA, (unsigned long)&_evba);
336
337 /*
338 * Since we are ready to handle exceptions now, we should let
339 * the CPU generate them...
340 */
341 __asm__ __volatile__ ("csrf %0" : : "i"(SR_EM_BIT));
342
343 /*
344 * Allocate the zero page. The allocator will panic if it
345 * can't satisfy the request, so no need to check.
346 */
347 zero_page = alloc_bootmem_low_pages_node(NODE_DATA(0),
348 PAGE_SIZE);
349
350 {
351 pgd_t *pg_dir;
352 int i;
353
354 pg_dir = swapper_pg_dir;
355 sysreg_write(PTBR, (unsigned long)pg_dir);
356
357 for (i = 0; i < PTRS_PER_PGD; i++)
358 pgd_val(pg_dir[i]) = 0;
359
360 enable_mmu();
361 printk ("CPU: Paging enabled\n");
362 }
363
364 for_each_online_node(nid) {
365 pg_data_t *pgdat = NODE_DATA(nid);
366 unsigned long zones_size[MAX_NR_ZONES];
367 unsigned long low, start_pfn;
368
369 start_pfn = pgdat->bdata->node_boot_start;
370 start_pfn >>= PAGE_SHIFT;
371 low = pgdat->bdata->node_low_pfn;
372
373 memset(zones_size, 0, sizeof(zones_size));
374 zones_size[ZONE_NORMAL] = low - start_pfn;
375
376 printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
377 nid, start_pfn, low);
378
379 free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL);
380
381 printk("Node %u: mem_map starts at %p\n",
382 pgdat->node_id, pgdat->node_mem_map);
383 }
384
385 mem_map = NODE_DATA(0)->node_mem_map;
386
387 memset(zero_page, 0, PAGE_SIZE);
388 empty_zero_page = virt_to_page(zero_page);
389 flush_dcache_page(empty_zero_page);
390}
391
392void __init mem_init(void)
393{
394 int codesize, reservedpages, datasize, initsize;
395 int nid, i;
396
397 reservedpages = 0;
398 high_memory = NULL;
399
400 /* this will put all low memory onto the freelists */
401 for_each_online_node(nid) {
402 pg_data_t *pgdat = NODE_DATA(nid);
403 unsigned long node_pages = 0;
404 void *node_high_memory;
405
406 num_physpages += pgdat->node_present_pages;
407
408 if (pgdat->node_spanned_pages != 0)
409 node_pages = free_all_bootmem_node(pgdat);
410
411 totalram_pages += node_pages;
412
413 for (i = 0; i < node_pages; i++)
414 if (PageReserved(pgdat->node_mem_map + i))
415 reservedpages++;
416
417 node_high_memory = (void *)((pgdat->node_start_pfn
418 + pgdat->node_spanned_pages)
419 << PAGE_SHIFT);
420 if (node_high_memory > high_memory)
421 high_memory = node_high_memory;
422 }
423
424 max_mapnr = MAP_NR(high_memory);
425
426 codesize = (unsigned long)_etext - (unsigned long)_text;
427 datasize = (unsigned long)_edata - (unsigned long)_data;
428 initsize = (unsigned long)__init_end - (unsigned long)__init_begin;
429
430 printk ("Memory: %luk/%luk available (%dk kernel code, "
431 "%dk reserved, %dk data, %dk init)\n",
432 (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
433 totalram_pages << (PAGE_SHIFT - 10),
434 codesize >> 10,
435 reservedpages << (PAGE_SHIFT - 10),
436 datasize >> 10,
437 initsize >> 10);
438}
439
440static inline void free_area(unsigned long addr, unsigned long end, char *s)
441{
442 unsigned int size = (end - addr) >> 10;
443
444 for (; addr < end; addr += PAGE_SIZE) {
445 struct page *page = virt_to_page(addr);
446 ClearPageReserved(page);
447 init_page_count(page);
448 free_page(addr);
449 totalram_pages++;
450 }
451
452 if (size && s)
453 printk(KERN_INFO "Freeing %s memory: %dK (%lx - %lx)\n",
454 s, size, end - (size << 10), end);
455}
456
457void free_initmem(void)
458{
459 free_area((unsigned long)__init_begin, (unsigned long)__init_end,
460 "init");
461}
462
463#ifdef CONFIG_BLK_DEV_INITRD
464
465static int keep_initrd;
466
467void free_initrd_mem(unsigned long start, unsigned long end)
468{
469 if (!keep_initrd)
470 free_area(start, end, "initrd");
471}
472
473static int __init keepinitrd_setup(char *__unused)
474{
475 keep_initrd = 1;
476 return 1;
477}
478
479__setup("keepinitrd", keepinitrd_setup);
480#endif
diff --git a/arch/avr32/mm/ioremap.c b/arch/avr32/mm/ioremap.c
new file mode 100644
index 000000000000..536021877df6
--- /dev/null
+++ b/arch/avr32/mm/ioremap.c
@@ -0,0 +1,197 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/vmalloc.h>
9#include <linux/module.h>
10
11#include <asm/io.h>
12#include <asm/pgtable.h>
13#include <asm/cacheflush.h>
14#include <asm/tlbflush.h>
15#include <asm/addrspace.h>
16
17static inline int remap_area_pte(pte_t *pte, unsigned long address,
18 unsigned long end, unsigned long phys_addr,
19 pgprot_t prot)
20{
21 unsigned long pfn;
22
23 pfn = phys_addr >> PAGE_SHIFT;
24 do {
25 WARN_ON(!pte_none(*pte));
26
27 set_pte(pte, pfn_pte(pfn, prot));
28 address += PAGE_SIZE;
29 pfn++;
30 pte++;
31 } while (address && (address < end));
32
33 return 0;
34}
35
36static inline int remap_area_pmd(pmd_t *pmd, unsigned long address,
37 unsigned long end, unsigned long phys_addr,
38 pgprot_t prot)
39{
40 unsigned long next;
41
42 phys_addr -= address;
43
44 do {
45 pte_t *pte = pte_alloc_kernel(pmd, address);
46 if (!pte)
47 return -ENOMEM;
48
49 next = (address + PMD_SIZE) & PMD_MASK;
50 if (remap_area_pte(pte, address, next,
51 address + phys_addr, prot))
52 return -ENOMEM;
53
54 address = next;
55 pmd++;
56 } while (address && (address < end));
57 return 0;
58}
59
60static int remap_area_pud(pud_t *pud, unsigned long address,
61 unsigned long end, unsigned long phys_addr,
62 pgprot_t prot)
63{
64 unsigned long next;
65
66 phys_addr -= address;
67
68 do {
69 pmd_t *pmd = pmd_alloc(&init_mm, pud, address);
70 if (!pmd)
71 return -ENOMEM;
72 next = (address + PUD_SIZE) & PUD_MASK;
73 if (remap_area_pmd(pmd, address, next,
74 phys_addr + address, prot))
75 return -ENOMEM;
76
77 address = next;
78 pud++;
79 } while (address && address < end);
80
81 return 0;
82}
83
84static int remap_area_pages(unsigned long address, unsigned long phys_addr,
85 size_t size, pgprot_t prot)
86{
87 unsigned long end = address + size;
88 unsigned long next;
89 pgd_t *pgd;
90 int err = 0;
91
92 phys_addr -= address;
93
94 pgd = pgd_offset_k(address);
95 flush_cache_all();
96 BUG_ON(address >= end);
97
98 spin_lock(&init_mm.page_table_lock);
99 do {
100 pud_t *pud = pud_alloc(&init_mm, pgd, address);
101
102 err = -ENOMEM;
103 if (!pud)
104 break;
105
106 next = (address + PGDIR_SIZE) & PGDIR_MASK;
107 if (next < address || next > end)
108 next = end;
109 err = remap_area_pud(pud, address, next,
110 phys_addr + address, prot);
111 if (err)
112 break;
113
114 address = next;
115 pgd++;
116 } while (address && (address < end));
117
118 spin_unlock(&init_mm.page_table_lock);
119 flush_tlb_all();
120 return err;
121}
122
123/*
124 * Re-map an arbitrary physical address space into the kernel virtual
125 * address space. Needed when the kernel wants to access physical
126 * memory directly.
127 */
128void __iomem *__ioremap(unsigned long phys_addr, size_t size,
129 unsigned long flags)
130{
131 void *addr;
132 struct vm_struct *area;
133 unsigned long offset, last_addr;
134 pgprot_t prot;
135
136 /*
137 * Check if we can simply use the P4 segment. This area is
138 * uncacheable, so if caching/buffering is requested, we can't
139 * use it.
140 */
141 if ((phys_addr >= P4SEG) && (flags == 0))
142 return (void __iomem *)phys_addr;
143
144 /* Don't allow wraparound or zero size */
145 last_addr = phys_addr + size - 1;
146 if (!size || last_addr < phys_addr)
147 return NULL;
148
149 /*
150 * XXX: When mapping regular RAM, we'd better make damn sure
151 * it's never used for anything else. But this is really the
152 * caller's responsibility...
153 */
154 if (PHYSADDR(P2SEGADDR(phys_addr)) == phys_addr)
155 return (void __iomem *)P2SEGADDR(phys_addr);
156
157 /* Mappings have to be page-aligned */
158 offset = phys_addr & ~PAGE_MASK;
159 phys_addr &= PAGE_MASK;
160 size = PAGE_ALIGN(last_addr + 1) - phys_addr;
161
162 prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY
163 | _PAGE_ACCESSED | _PAGE_TYPE_SMALL | flags);
164
165 /*
166 * Ok, go for it..
167 */
168 area = get_vm_area(size, VM_IOREMAP);
169 if (!area)
170 return NULL;
171 area->phys_addr = phys_addr;
172 addr = area->addr;
173 if (remap_area_pages((unsigned long)addr, phys_addr, size, prot)) {
174 vunmap(addr);
175 return NULL;
176 }
177
178 return (void __iomem *)(offset + (char *)addr);
179}
180EXPORT_SYMBOL(__ioremap);
181
182void __iounmap(void __iomem *addr)
183{
184 struct vm_struct *p;
185
186 if ((unsigned long)addr >= P4SEG)
187 return;
188
189 p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
190 if (unlikely(!p)) {
191 printk (KERN_ERR "iounmap: bad address %p\n", addr);
192 return;
193 }
194
195 kfree (p);
196}
197EXPORT_SYMBOL(__iounmap);
diff --git a/arch/avr32/mm/tlb.c b/arch/avr32/mm/tlb.c
new file mode 100644
index 000000000000..5d0523bbe298
--- /dev/null
+++ b/arch/avr32/mm/tlb.c
@@ -0,0 +1,378 @@
1/*
2 * AVR32 TLB operations
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/mm.h>
11
12#include <asm/mmu_context.h>
13
14#define _TLBEHI_I 0x100
15
16void show_dtlb_entry(unsigned int index)
17{
18 unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
19
20 local_irq_save(flags);
21 mmucr_save = sysreg_read(MMUCR);
22 tlbehi_save = sysreg_read(TLBEHI);
23 mmucr = mmucr_save & 0x13;
24 mmucr |= index << 14;
25 sysreg_write(MMUCR, mmucr);
26
27 asm volatile("tlbr" : : : "memory");
28 cpu_sync_pipeline();
29
30 tlbehi = sysreg_read(TLBEHI);
31 tlbelo = sysreg_read(TLBELO);
32
33 printk("%2u: %c %c %02x %05x %05x %o %o %c %c %c %c\n",
34 index,
35 (tlbehi & 0x200)?'1':'0',
36 (tlbelo & 0x100)?'1':'0',
37 (tlbehi & 0xff),
38 (tlbehi >> 12), (tlbelo >> 12),
39 (tlbelo >> 4) & 7, (tlbelo >> 2) & 3,
40 (tlbelo & 0x200)?'1':'0',
41 (tlbelo & 0x080)?'1':'0',
42 (tlbelo & 0x001)?'1':'0',
43 (tlbelo & 0x002)?'1':'0');
44
45 sysreg_write(MMUCR, mmucr_save);
46 sysreg_write(TLBEHI, tlbehi_save);
47 cpu_sync_pipeline();
48 local_irq_restore(flags);
49}
50
51void dump_dtlb(void)
52{
53 unsigned int i;
54
55 printk("ID V G ASID VPN PFN AP SZ C B W D\n");
56 for (i = 0; i < 32; i++)
57 show_dtlb_entry(i);
58}
59
60static unsigned long last_mmucr;
61
62static inline void set_replacement_pointer(unsigned shift)
63{
64 unsigned long mmucr, mmucr_save;
65
66 mmucr = mmucr_save = sysreg_read(MMUCR);
67
68 /* Does this mapping already exist? */
69 __asm__ __volatile__(
70 " tlbs\n"
71 " mfsr %0, %1"
72 : "=r"(mmucr)
73 : "i"(SYSREG_MMUCR));
74
75 if (mmucr & SYSREG_BIT(MMUCR_N)) {
76 /* Not found -- pick a not-recently-accessed entry */
77 unsigned long rp;
78 unsigned long tlbar = sysreg_read(TLBARLO);
79
80 rp = 32 - fls(tlbar);
81 if (rp == 32) {
82 rp = 0;
83 sysreg_write(TLBARLO, -1L);
84 }
85
86 mmucr &= 0x13;
87 mmucr |= (rp << shift);
88
89 sysreg_write(MMUCR, mmucr);
90 }
91
92 last_mmucr = mmucr;
93}
94
95static void update_dtlb(unsigned long address, pte_t pte, unsigned long asid)
96{
97 unsigned long vpn;
98
99 vpn = (address & MMU_VPN_MASK) | _TLBEHI_VALID | asid;
100 sysreg_write(TLBEHI, vpn);
101 cpu_sync_pipeline();
102
103 set_replacement_pointer(14);
104
105 sysreg_write(TLBELO, pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK);
106
107 /* Let's go */
108 asm volatile("nop\n\ttlbw" : : : "memory");
109 cpu_sync_pipeline();
110}
111
112void update_mmu_cache(struct vm_area_struct *vma,
113 unsigned long address, pte_t pte)
114{
115 unsigned long flags;
116
117 /* ptrace may call this routine */
118 if (vma && current->active_mm != vma->vm_mm)
119 return;
120
121 local_irq_save(flags);
122 update_dtlb(address, pte, get_asid());
123 local_irq_restore(flags);
124}
125
126void __flush_tlb_page(unsigned long asid, unsigned long page)
127{
128 unsigned long mmucr, tlbehi;
129
130 page |= asid;
131 sysreg_write(TLBEHI, page);
132 cpu_sync_pipeline();
133 asm volatile("tlbs");
134 mmucr = sysreg_read(MMUCR);
135
136 if (!(mmucr & SYSREG_BIT(MMUCR_N))) {
137 unsigned long tlbarlo;
138 unsigned long entry;
139
140 /* Clear the "valid" bit */
141 tlbehi = sysreg_read(TLBEHI);
142 tlbehi &= ~_TLBEHI_VALID;
143 sysreg_write(TLBEHI, tlbehi);
144 cpu_sync_pipeline();
145
146 /* mark the entry as "not accessed" */
147 entry = (mmucr >> 14) & 0x3f;
148 tlbarlo = sysreg_read(TLBARLO);
149 tlbarlo |= (0x80000000 >> entry);
150 sysreg_write(TLBARLO, tlbarlo);
151
152 /* update the entry with valid bit clear */
153 asm volatile("tlbw");
154 cpu_sync_pipeline();
155 }
156}
157
158void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
159{
160 if (vma->vm_mm && vma->vm_mm->context != NO_CONTEXT) {
161 unsigned long flags, asid;
162 unsigned long saved_asid = MMU_NO_ASID;
163
164 asid = vma->vm_mm->context & MMU_CONTEXT_ASID_MASK;
165 page &= PAGE_MASK;
166
167 local_irq_save(flags);
168 if (vma->vm_mm != current->mm) {
169 saved_asid = get_asid();
170 set_asid(asid);
171 }
172
173 __flush_tlb_page(asid, page);
174
175 if (saved_asid != MMU_NO_ASID)
176 set_asid(saved_asid);
177 local_irq_restore(flags);
178 }
179}
180
181void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
182 unsigned long end)
183{
184 struct mm_struct *mm = vma->vm_mm;
185
186 if (mm->context != NO_CONTEXT) {
187 unsigned long flags;
188 int size;
189
190 local_irq_save(flags);
191 size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
192 if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */
193 mm->context = NO_CONTEXT;
194 if (mm == current->mm)
195 activate_context(mm);
196 } else {
197 unsigned long asid = mm->context & MMU_CONTEXT_ASID_MASK;
198 unsigned long saved_asid = MMU_NO_ASID;
199
200 start &= PAGE_MASK;
201 end += (PAGE_SIZE - 1);
202 end &= PAGE_MASK;
203 if (mm != current->mm) {
204 saved_asid = get_asid();
205 set_asid(asid);
206 }
207
208 while (start < end) {
209 __flush_tlb_page(asid, start);
210 start += PAGE_SIZE;
211 }
212 if (saved_asid != MMU_NO_ASID)
213 set_asid(saved_asid);
214 }
215 local_irq_restore(flags);
216 }
217}
218
219/*
220 * TODO: If this is only called for addresses > TASK_SIZE, we can probably
221 * skip the ASID stuff and just use the Global bit...
222 */
223void flush_tlb_kernel_range(unsigned long start, unsigned long end)
224{
225 unsigned long flags;
226 int size;
227
228 local_irq_save(flags);
229 size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
230 if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */
231 flush_tlb_all();
232 } else {
233 unsigned long asid = init_mm.context & MMU_CONTEXT_ASID_MASK;
234 unsigned long saved_asid = get_asid();
235
236 start &= PAGE_MASK;
237 end += (PAGE_SIZE - 1);
238 end &= PAGE_MASK;
239 set_asid(asid);
240 while (start < end) {
241 __flush_tlb_page(asid, start);
242 start += PAGE_SIZE;
243 }
244 set_asid(saved_asid);
245 }
246 local_irq_restore(flags);
247}
248
249void flush_tlb_mm(struct mm_struct *mm)
250{
251 /* Invalidate all TLB entries of this process by getting a new ASID */
252 if (mm->context != NO_CONTEXT) {
253 unsigned long flags;
254
255 local_irq_save(flags);
256 mm->context = NO_CONTEXT;
257 if (mm == current->mm)
258 activate_context(mm);
259 local_irq_restore(flags);
260 }
261}
262
263void flush_tlb_all(void)
264{
265 unsigned long flags;
266
267 local_irq_save(flags);
268 sysreg_write(MMUCR, sysreg_read(MMUCR) | SYSREG_BIT(MMUCR_I));
269 local_irq_restore(flags);
270}
271
272#ifdef CONFIG_PROC_FS
273
274#include <linux/seq_file.h>
275#include <linux/proc_fs.h>
276#include <linux/init.h>
277
278static void *tlb_start(struct seq_file *tlb, loff_t *pos)
279{
280 static unsigned long tlb_index;
281
282 if (*pos >= 32)
283 return NULL;
284
285 tlb_index = 0;
286 return &tlb_index;
287}
288
289static void *tlb_next(struct seq_file *tlb, void *v, loff_t *pos)
290{
291 unsigned long *index = v;
292
293 if (*index >= 31)
294 return NULL;
295
296 ++*pos;
297 ++*index;
298 return index;
299}
300
301static void tlb_stop(struct seq_file *tlb, void *v)
302{
303
304}
305
306static int tlb_show(struct seq_file *tlb, void *v)
307{
308 unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
309 unsigned long *index = v;
310
311 if (*index == 0)
312 seq_puts(tlb, "ID V G ASID VPN PFN AP SZ C B W D\n");
313
314 BUG_ON(*index >= 32);
315
316 local_irq_save(flags);
317 mmucr_save = sysreg_read(MMUCR);
318 tlbehi_save = sysreg_read(TLBEHI);
319 mmucr = mmucr_save & 0x13;
320 mmucr |= *index << 14;
321 sysreg_write(MMUCR, mmucr);
322
323 asm volatile("tlbr" : : : "memory");
324 cpu_sync_pipeline();
325
326 tlbehi = sysreg_read(TLBEHI);
327 tlbelo = sysreg_read(TLBELO);
328
329 sysreg_write(MMUCR, mmucr_save);
330 sysreg_write(TLBEHI, tlbehi_save);
331 cpu_sync_pipeline();
332 local_irq_restore(flags);
333
334 seq_printf(tlb, "%2lu: %c %c %02x %05x %05x %o %o %c %c %c %c\n",
335 *index,
336 (tlbehi & 0x200)?'1':'0',
337 (tlbelo & 0x100)?'1':'0',
338 (tlbehi & 0xff),
339 (tlbehi >> 12), (tlbelo >> 12),
340 (tlbelo >> 4) & 7, (tlbelo >> 2) & 3,
341 (tlbelo & 0x200)?'1':'0',
342 (tlbelo & 0x080)?'1':'0',
343 (tlbelo & 0x001)?'1':'0',
344 (tlbelo & 0x002)?'1':'0');
345
346 return 0;
347}
348
349static struct seq_operations tlb_ops = {
350 .start = tlb_start,
351 .next = tlb_next,
352 .stop = tlb_stop,
353 .show = tlb_show,
354};
355
356static int tlb_open(struct inode *inode, struct file *file)
357{
358 return seq_open(file, &tlb_ops);
359}
360
361static struct file_operations proc_tlb_operations = {
362 .open = tlb_open,
363 .read = seq_read,
364 .llseek = seq_lseek,
365 .release = seq_release,
366};
367
368static int __init proctlb_init(void)
369{
370 struct proc_dir_entry *entry;
371
372 entry = create_proc_entry("tlb", 0, NULL);
373 if (entry)
374 entry->proc_fops = &proc_tlb_operations;
375 return 0;
376}
377late_initcall(proctlb_init);
378#endif /* CONFIG_PROC_FS */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index a601a17cf568..f7b171b92ea2 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -27,7 +27,11 @@ config GENERIC_CALIBRATE_DELAY
27 27
28config GENERIC_HARDIRQS 28config GENERIC_HARDIRQS
29 bool 29 bool
30 default n 30 default y
31
32config GENERIC_HARDIRQS_NO__DO_IRQ
33 bool
34 default y
31 35
32config GENERIC_TIME 36config GENERIC_TIME
33 bool 37 bool
@@ -251,6 +255,12 @@ config MB93091_NO_MB
251endchoice 255endchoice
252endif 256endif
253 257
258config FUJITSU_MB93493
259 bool "MB93493 Multimedia chip"
260 help
261 Select this option if the MB93493 multimedia chip is going to be
262 used.
263
254choice 264choice
255 prompt "GP-Relative data support" 265 prompt "GP-Relative data support"
256 default GPREL_DATA_8 266 default GPREL_DATA_8
diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile
index 5a827b349b5e..32db3499c461 100644
--- a/arch/frv/kernel/Makefile
+++ b/arch/frv/kernel/Makefile
@@ -10,15 +10,14 @@ extra-y:= head.o init_task.o vmlinux.lds
10obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \ 10obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
11 process.o traps.o ptrace.o signal.o dma.o \ 11 process.o traps.o ptrace.o signal.o dma.o \
12 sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \ 12 sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
13 debug-stub.o irq.o irq-routing.o sleep.o uaccess.o 13 debug-stub.o irq.o sleep.o uaccess.o
14 14
15obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-io.o 15obj-$(CONFIG_GDBSTUB) += gdb-stub.o gdb-io.o
16 16
17obj-$(CONFIG_MB93091_VDK) += irq-mb93091.o 17obj-$(CONFIG_MB93091_VDK) += irq-mb93091.o
18obj-$(CONFIG_MB93093_PDK) += irq-mb93093.o
19obj-$(CONFIG_FUJITSU_MB93493) += irq-mb93493.o
20obj-$(CONFIG_PM) += pm.o cmode.o 18obj-$(CONFIG_PM) += pm.o cmode.o
21obj-$(CONFIG_MB93093_PDK) += pm-mb93093.o 19obj-$(CONFIG_MB93093_PDK) += pm-mb93093.o
20obj-$(CONFIG_FUJITSU_MB93493) += irq-mb93493.o
22obj-$(CONFIG_SYSCTL) += sysctl.o 21obj-$(CONFIG_SYSCTL) += sysctl.o
23obj-$(CONFIG_FUTEX) += futex.o 22obj-$(CONFIG_FUTEX) += futex.o
24obj-$(CONFIG_MODULES) += module.o 23obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/frv/kernel/irq-mb93091.c b/arch/frv/kernel/irq-mb93091.c
index 1381abcd5cc9..369bc0a7443d 100644
--- a/arch/frv/kernel/irq-mb93091.c
+++ b/arch/frv/kernel/irq-mb93091.c
@@ -24,7 +24,6 @@
24#include <asm/delay.h> 24#include <asm/delay.h>
25#include <asm/irq.h> 25#include <asm/irq.h>
26#include <asm/irc-regs.h> 26#include <asm/irc-regs.h>
27#include <asm/irq-routing.h>
28 27
29#define __reg16(ADDR) (*(volatile unsigned short *)(ADDR)) 28#define __reg16(ADDR) (*(volatile unsigned short *)(ADDR))
30 29
@@ -33,83 +32,131 @@
33#define __get_IFR() ({ __reg16(0xffc0000c); }) 32#define __get_IFR() ({ __reg16(0xffc0000c); })
34#define __clr_IFR(M) do { __reg16(0xffc0000c) = ~(M); wmb(); } while(0) 33#define __clr_IFR(M) do { __reg16(0xffc0000c) = ~(M); wmb(); } while(0)
35 34
36static void frv_fpga_doirq(struct irq_source *source);
37static void frv_fpga_control(struct irq_group *group, int irq, int on);
38 35
39/*****************************************************************************/
40/* 36/*
41 * FPGA IRQ multiplexor 37 * on-motherboard FPGA PIC operations
42 */ 38 */
43static struct irq_source frv_fpga[4] = { 39static void frv_fpga_mask(unsigned int irq)
44#define __FPGA(X, M) \ 40{
45 [X] = { \ 41 uint16_t imr = __get_IMR();
46 .muxname = "fpga."#X, \
47 .irqmask = M, \
48 .doirq = frv_fpga_doirq, \
49 }
50 42
51 __FPGA(0, 0x0028), 43 imr |= 1 << (irq - IRQ_BASE_FPGA);
52 __FPGA(1, 0x0050),
53 __FPGA(2, 0x1c00),
54 __FPGA(3, 0x6386),
55};
56 44
57static struct irq_group frv_fpga_irqs = { 45 __set_IMR(imr);
58 .first_irq = IRQ_BASE_FPGA, 46}
59 .control = frv_fpga_control,
60 .sources = {
61 [ 1] = &frv_fpga[3],
62 [ 2] = &frv_fpga[3],
63 [ 3] = &frv_fpga[0],
64 [ 4] = &frv_fpga[1],
65 [ 5] = &frv_fpga[0],
66 [ 6] = &frv_fpga[1],
67 [ 7] = &frv_fpga[3],
68 [ 8] = &frv_fpga[3],
69 [ 9] = &frv_fpga[3],
70 [10] = &frv_fpga[2],
71 [11] = &frv_fpga[2],
72 [12] = &frv_fpga[2],
73 [13] = &frv_fpga[3],
74 [14] = &frv_fpga[3],
75 },
76};
77 47
48static void frv_fpga_ack(unsigned int irq)
49{
50 __clr_IFR(1 << (irq - IRQ_BASE_FPGA));
51}
78 52
79static void frv_fpga_control(struct irq_group *group, int index, int on) 53static void frv_fpga_mask_ack(unsigned int irq)
80{ 54{
81 uint16_t imr = __get_IMR(); 55 uint16_t imr = __get_IMR();
82 56
83 if (on) 57 imr |= 1 << (irq - IRQ_BASE_FPGA);
84 imr &= ~(1 << index); 58 __set_IMR(imr);
85 else 59
86 imr |= 1 << index; 60 __clr_IFR(1 << (irq - IRQ_BASE_FPGA));
61}
62
63static void frv_fpga_unmask(unsigned int irq)
64{
65 uint16_t imr = __get_IMR();
66
67 imr &= ~(1 << (irq - IRQ_BASE_FPGA));
87 68
88 __set_IMR(imr); 69 __set_IMR(imr);
89} 70}
90 71
91static void frv_fpga_doirq(struct irq_source *source) 72static struct irq_chip frv_fpga_pic = {
73 .name = "mb93091",
74 .ack = frv_fpga_ack,
75 .mask = frv_fpga_mask,
76 .mask_ack = frv_fpga_mask_ack,
77 .unmask = frv_fpga_unmask,
78};
79
80/*
81 * FPGA PIC interrupt handler
82 */
83static irqreturn_t fpga_interrupt(int irq, void *_mask, struct pt_regs *regs)
92{ 84{
93 uint16_t mask, imr; 85 uint16_t imr, mask = (unsigned long) _mask;
94 86
95 imr = __get_IMR(); 87 imr = __get_IMR();
96 mask = source->irqmask & ~imr & __get_IFR(); 88 mask = mask & ~imr & __get_IFR();
97 if (mask) { 89
98 __set_IMR(imr | mask); 90 /* poll all the triggered IRQs */
99 __clr_IFR(mask); 91 while (mask) {
100 distribute_irqs(&frv_fpga_irqs, mask); 92 int irq;
101 __set_IMR(imr); 93
94 asm("scan %1,gr0,%0" : "=r"(irq) : "r"(mask));
95 irq = 31 - irq;
96 mask &= ~(1 << irq);
97
98 generic_handle_irq(IRQ_BASE_FPGA + irq, regs);
102 } 99 }
100
101 return IRQ_HANDLED;
103} 102}
104 103
104/*
105 * define an interrupt action for each FPGA PIC output
106 * - use dev_id to indicate the FPGA PIC input to output mappings
107 */
108static struct irqaction fpga_irq[4] = {
109 [0] = {
110 .handler = fpga_interrupt,
111 .flags = IRQF_DISABLED | IRQF_SHARED,
112 .mask = CPU_MASK_NONE,
113 .name = "fpga.0",
114 .dev_id = (void *) 0x0028UL,
115 },
116 [1] = {
117 .handler = fpga_interrupt,
118 .flags = IRQF_DISABLED | IRQF_SHARED,
119 .mask = CPU_MASK_NONE,
120 .name = "fpga.1",
121 .dev_id = (void *) 0x0050UL,
122 },
123 [2] = {
124 .handler = fpga_interrupt,
125 .flags = IRQF_DISABLED | IRQF_SHARED,
126 .mask = CPU_MASK_NONE,
127 .name = "fpga.2",
128 .dev_id = (void *) 0x1c00UL,
129 },
130 [3] = {
131 .handler = fpga_interrupt,
132 .flags = IRQF_DISABLED | IRQF_SHARED,
133 .mask = CPU_MASK_NONE,
134 .name = "fpga.3",
135 .dev_id = (void *) 0x6386UL,
136 }
137};
138
139/*
140 * initialise the motherboard FPGA's PIC
141 */
105void __init fpga_init(void) 142void __init fpga_init(void)
106{ 143{
144 int irq;
145
146 /* all PIC inputs are all set to be low-level driven, apart from the
147 * NMI button (15) which is fixed at falling-edge
148 */
107 __set_IMR(0x7ffe); 149 __set_IMR(0x7ffe);
108 __clr_IFR(0x0000); 150 __clr_IFR(0x0000);
109 151
110 frv_irq_route_external(&frv_fpga[0], IRQ_CPU_EXTERNAL0); 152 for (irq = IRQ_BASE_FPGA + 1; irq <= IRQ_BASE_FPGA + 14; irq++)
111 frv_irq_route_external(&frv_fpga[1], IRQ_CPU_EXTERNAL1); 153 set_irq_chip_and_handler(irq, &frv_fpga_pic, handle_level_irq);
112 frv_irq_route_external(&frv_fpga[2], IRQ_CPU_EXTERNAL2); 154
113 frv_irq_route_external(&frv_fpga[3], IRQ_CPU_EXTERNAL3); 155 set_irq_chip_and_handler(IRQ_FPGA_NMI, &frv_fpga_pic, handle_edge_irq);
114 frv_irq_set_group(&frv_fpga_irqs); 156
157 /* the FPGA drives the first four external IRQ inputs on the CPU PIC */
158 setup_irq(IRQ_CPU_EXTERNAL0, &fpga_irq[0]);
159 setup_irq(IRQ_CPU_EXTERNAL1, &fpga_irq[1]);
160 setup_irq(IRQ_CPU_EXTERNAL2, &fpga_irq[2]);
161 setup_irq(IRQ_CPU_EXTERNAL3, &fpga_irq[3]);
115} 162}
diff --git a/arch/frv/kernel/irq-mb93093.c b/arch/frv/kernel/irq-mb93093.c
index 48b2a6420888..a43a22158956 100644
--- a/arch/frv/kernel/irq-mb93093.c
+++ b/arch/frv/kernel/irq-mb93093.c
@@ -1,6 +1,6 @@
1/* irq-mb93093.c: MB93093 FPGA interrupt handling 1/* irq-mb93093.c: MB93093 FPGA interrupt handling
2 * 2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
24#include <asm/delay.h> 24#include <asm/delay.h>
25#include <asm/irq.h> 25#include <asm/irq.h>
26#include <asm/irc-regs.h> 26#include <asm/irc-regs.h>
27#include <asm/irq-routing.h>
28 27
29#define __reg16(ADDR) (*(volatile unsigned short *)(__region_CS2 + (ADDR))) 28#define __reg16(ADDR) (*(volatile unsigned short *)(__region_CS2 + (ADDR)))
30 29
@@ -33,66 +32,102 @@
33#define __get_IFR() ({ __reg16(0x02); }) 32#define __get_IFR() ({ __reg16(0x02); })
34#define __clr_IFR(M) do { __reg16(0x02) = ~(M); wmb(); } while(0) 33#define __clr_IFR(M) do { __reg16(0x02) = ~(M); wmb(); } while(0)
35 34
36static void frv_fpga_doirq(struct irq_source *source);
37static void frv_fpga_control(struct irq_group *group, int irq, int on);
38
39/*****************************************************************************/
40/* 35/*
41 * FPGA IRQ multiplexor 36 * off-CPU FPGA PIC operations
42 */ 37 */
43static struct irq_source frv_fpga[4] = { 38static void frv_fpga_mask(unsigned int irq)
44#define __FPGA(X, M) \ 39{
45 [X] = { \ 40 uint16_t imr = __get_IMR();
46 .muxname = "fpga."#X, \
47 .irqmask = M, \
48 .doirq = frv_fpga_doirq, \
49 }
50 41
51 __FPGA(0, 0x0700), 42 imr |= 1 << (irq - IRQ_BASE_FPGA);
52}; 43 __set_IMR(imr);
44}
53 45
54static struct irq_group frv_fpga_irqs = { 46static void frv_fpga_ack(unsigned int irq)
55 .first_irq = IRQ_BASE_FPGA, 47{
56 .control = frv_fpga_control, 48 __clr_IFR(1 << (irq - IRQ_BASE_FPGA));
57 .sources = { 49}
58 [ 8] = &frv_fpga[0], 50
59 [ 9] = &frv_fpga[0], 51static void frv_fpga_mask_ack(unsigned int irq)
60 [10] = &frv_fpga[0], 52{
61 }, 53 uint16_t imr = __get_IMR();
62};
63 54
55 imr |= 1 << (irq - IRQ_BASE_FPGA);
56 __set_IMR(imr);
57
58 __clr_IFR(1 << (irq - IRQ_BASE_FPGA));
59}
64 60
65static void frv_fpga_control(struct irq_group *group, int index, int on) 61static void frv_fpga_unmask(unsigned int irq)
66{ 62{
67 uint16_t imr = __get_IMR(); 63 uint16_t imr = __get_IMR();
68 64
69 if (on) 65 imr &= ~(1 << (irq - IRQ_BASE_FPGA));
70 imr &= ~(1 << index);
71 else
72 imr |= 1 << index;
73 66
74 __set_IMR(imr); 67 __set_IMR(imr);
75} 68}
76 69
77static void frv_fpga_doirq(struct irq_source *source) 70static struct irq_chip frv_fpga_pic = {
71 .name = "mb93093",
72 .ack = frv_fpga_ack,
73 .mask = frv_fpga_mask,
74 .mask_ack = frv_fpga_mask_ack,
75 .unmask = frv_fpga_unmask,
76 .end = frv_fpga_end,
77};
78
79/*
80 * FPGA PIC interrupt handler
81 */
82static irqreturn_t fpga_interrupt(int irq, void *_mask, struct pt_regs *regs)
78{ 83{
79 uint16_t mask, imr; 84 uint16_t imr, mask = (unsigned long) _mask;
80 85
81 imr = __get_IMR(); 86 imr = __get_IMR();
82 mask = source->irqmask & ~imr & __get_IFR(); 87 mask = mask & ~imr & __get_IFR();
83 if (mask) { 88
84 __set_IMR(imr | mask); 89 /* poll all the triggered IRQs */
85 __clr_IFR(mask); 90 while (mask) {
86 distribute_irqs(&frv_fpga_irqs, mask); 91 int irq;
87 __set_IMR(imr); 92
93 asm("scan %1,gr0,%0" : "=r"(irq) : "r"(mask));
94 irq = 31 - irq;
95 mask &= ~(1 << irq);
96
97 generic_irq_handle(IRQ_BASE_FPGA + irq, regs);
88 } 98 }
99
100 return IRQ_HANDLED;
89} 101}
90 102
103/*
104 * define an interrupt action for each FPGA PIC output
105 * - use dev_id to indicate the FPGA PIC input to output mappings
106 */
107static struct irqaction fpga_irq[1] = {
108 [0] = {
109 .handler = fpga_interrupt,
110 .flags = IRQF_DISABLED,
111 .mask = CPU_MASK_NONE,
112 .name = "fpga.0",
113 .dev_id = (void *) 0x0700UL,
114 }
115};
116
117/*
118 * initialise the motherboard FPGA's PIC
119 */
91void __init fpga_init(void) 120void __init fpga_init(void)
92{ 121{
122 int irq;
123
124 /* all PIC inputs are all set to be edge triggered */
93 __set_IMR(0x0700); 125 __set_IMR(0x0700);
94 __clr_IFR(0x0000); 126 __clr_IFR(0x0000);
95 127
96 frv_irq_route_external(&frv_fpga[0], IRQ_CPU_EXTERNAL2); 128 for (irq = IRQ_BASE_FPGA + 8; irq <= IRQ_BASE_FPGA + 10; irq++)
97 frv_irq_set_group(&frv_fpga_irqs); 129 set_irq_chip_and_handler(irq, &frv_fpga_pic, handle_edge_irq);
130
131 /* the FPGA drives external IRQ input #2 on the CPU PIC */
132 setup_irq(IRQ_CPU_EXTERNAL2, &fpga_irq[0]);
98} 133}
diff --git a/arch/frv/kernel/irq-mb93493.c b/arch/frv/kernel/irq-mb93493.c
index 988d035640e1..39c0188a3498 100644
--- a/arch/frv/kernel/irq-mb93493.c
+++ b/arch/frv/kernel/irq-mb93493.c
@@ -1,6 +1,6 @@
1/* irq-mb93493.c: MB93493 companion chip interrupt handler 1/* irq-mb93493.c: MB93493 companion chip interrupt handler
2 * 2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -24,84 +24,126 @@
24#include <asm/delay.h> 24#include <asm/delay.h>
25#include <asm/irq.h> 25#include <asm/irq.h>
26#include <asm/irc-regs.h> 26#include <asm/irc-regs.h>
27#include <asm/irq-routing.h>
28#include <asm/mb93493-irqs.h> 27#include <asm/mb93493-irqs.h>
28#include <asm/mb93493-regs.h>
29 29
30static void frv_mb93493_doirq(struct irq_source *source); 30#define IRQ_ROUTE_ONE(X) (X##_ROUTE << (X - IRQ_BASE_MB93493))
31
32#define IRQ_ROUTING \
33 (IRQ_ROUTE_ONE(IRQ_MB93493_VDC) | \
34 IRQ_ROUTE_ONE(IRQ_MB93493_VCC) | \
35 IRQ_ROUTE_ONE(IRQ_MB93493_AUDIO_OUT) | \
36 IRQ_ROUTE_ONE(IRQ_MB93493_I2C_0) | \
37 IRQ_ROUTE_ONE(IRQ_MB93493_I2C_1) | \
38 IRQ_ROUTE_ONE(IRQ_MB93493_USB) | \
39 IRQ_ROUTE_ONE(IRQ_MB93493_LOCAL_BUS) | \
40 IRQ_ROUTE_ONE(IRQ_MB93493_PCMCIA) | \
41 IRQ_ROUTE_ONE(IRQ_MB93493_GPIO) | \
42 IRQ_ROUTE_ONE(IRQ_MB93493_AUDIO_IN))
31 43
32/*****************************************************************************/
33/* 44/*
34 * MB93493 companion chip IRQ multiplexor 45 * daughter board PIC operations
46 * - there is no way to ACK interrupts in the MB93493 chip
35 */ 47 */
36static struct irq_source frv_mb93493[2] = { 48static void frv_mb93493_mask(unsigned int irq)
37 [0] = {
38 .muxname = "mb93493.0",
39 .muxdata = __region_CS3 + 0x3d0,
40 .doirq = frv_mb93493_doirq,
41 .irqmask = 0x0000,
42 },
43 [1] = {
44 .muxname = "mb93493.1",
45 .muxdata = __region_CS3 + 0x3d4,
46 .doirq = frv_mb93493_doirq,
47 .irqmask = 0x0000,
48 },
49};
50
51static void frv_mb93493_control(struct irq_group *group, int index, int on)
52{ 49{
53 struct irq_source *source;
54 uint32_t iqsr; 50 uint32_t iqsr;
51 volatile void *piqsr;
55 52
56 if ((frv_mb93493[0].irqmask & (1 << index))) 53 if (IRQ_ROUTING & (1 << (irq - IRQ_BASE_MB93493)))
57 source = &frv_mb93493[0]; 54 piqsr = __addr_MB93493_IQSR(1);
58 else 55 else
59 source = &frv_mb93493[1]; 56 piqsr = __addr_MB93493_IQSR(0);
57
58 iqsr = readl(piqsr);
59 iqsr &= ~(1 << (irq - IRQ_BASE_MB93493 + 16));
60 writel(iqsr, piqsr);
61}
60 62
61 iqsr = readl(source->muxdata); 63static void frv_mb93493_ack(unsigned int irq)
62 if (on) 64{
63 iqsr |= 1 << (index + 16); 65}
66
67static void frv_mb93493_unmask(unsigned int irq)
68{
69 uint32_t iqsr;
70 volatile void *piqsr;
71
72 if (IRQ_ROUTING & (1 << (irq - IRQ_BASE_MB93493)))
73 piqsr = __addr_MB93493_IQSR(1);
64 else 74 else
65 iqsr &= ~(1 << (index + 16)); 75 piqsr = __addr_MB93493_IQSR(0);
66 76
67 writel(iqsr, source->muxdata); 77 iqsr = readl(piqsr);
78 iqsr |= 1 << (irq - IRQ_BASE_MB93493 + 16);
79 writel(iqsr, piqsr);
68} 80}
69 81
70static struct irq_group frv_mb93493_irqs = { 82static struct irq_chip frv_mb93493_pic = {
71 .first_irq = IRQ_BASE_MB93493, 83 .name = "mb93093",
72 .control = frv_mb93493_control, 84 .ack = frv_mb93493_ack,
85 .mask = frv_mb93493_mask,
86 .mask_ack = frv_mb93493_mask,
87 .unmask = frv_mb93493_unmask,
73}; 88};
74 89
75static void frv_mb93493_doirq(struct irq_source *source) 90/*
91 * MB93493 PIC interrupt handler
92 */
93static irqreturn_t mb93493_interrupt(int irq, void *_piqsr, struct pt_regs *regs)
76{ 94{
77 uint32_t mask = readl(source->muxdata); 95 volatile void *piqsr = _piqsr;
78 mask = mask & (mask >> 16) & 0xffff; 96 uint32_t iqsr;
79 97
80 if (mask) 98 iqsr = readl(piqsr);
81 distribute_irqs(&frv_mb93493_irqs, mask); 99 iqsr = iqsr & (iqsr >> 16) & 0xffff;
82}
83 100
84static void __init mb93493_irq_route(int irq, int source) 101 /* poll all the triggered IRQs */
85{ 102 while (iqsr) {
86 frv_mb93493[source].irqmask |= 1 << (irq - IRQ_BASE_MB93493); 103 int irq;
87 frv_mb93493_irqs.sources[irq - IRQ_BASE_MB93493] = &frv_mb93493[source]; 104
105 asm("scan %1,gr0,%0" : "=r"(irq) : "r"(iqsr));
106 irq = 31 - irq;
107 iqsr &= ~(1 << irq);
108
109 generic_handle_irq(IRQ_BASE_MB93493 + irq, regs);
110 }
111
112 return IRQ_HANDLED;
88} 113}
89 114
90void __init route_mb93493_irqs(void) 115/*
116 * define an interrupt action for each MB93493 PIC output
117 * - use dev_id to indicate the MB93493 PIC input to output mappings
118 */
119static struct irqaction mb93493_irq[2] = {
120 [0] = {
121 .handler = mb93493_interrupt,
122 .flags = IRQF_DISABLED | IRQF_SHARED,
123 .mask = CPU_MASK_NONE,
124 .name = "mb93493.0",
125 .dev_id = (void *) __addr_MB93493_IQSR(0),
126 },
127 [1] = {
128 .handler = mb93493_interrupt,
129 .flags = IRQF_DISABLED | IRQF_SHARED,
130 .mask = CPU_MASK_NONE,
131 .name = "mb93493.1",
132 .dev_id = (void *) __addr_MB93493_IQSR(1),
133 }
134};
135
136/*
137 * initialise the motherboard MB93493's PIC
138 */
139void __init mb93493_init(void)
91{ 140{
92 frv_irq_route_external(&frv_mb93493[0], IRQ_CPU_MB93493_0); 141 int irq;
93 frv_irq_route_external(&frv_mb93493[1], IRQ_CPU_MB93493_1); 142
94 143 for (irq = IRQ_BASE_MB93493 + 0; irq <= IRQ_BASE_MB93493 + 10; irq++)
95 frv_irq_set_group(&frv_mb93493_irqs); 144 set_irq_chip_and_handler(irq, &frv_mb93493_pic, handle_edge_irq);
96 145
97 mb93493_irq_route(IRQ_MB93493_VDC, IRQ_MB93493_VDC_ROUTE); 146 /* the MB93493 drives external IRQ inputs on the CPU PIC */
98 mb93493_irq_route(IRQ_MB93493_VCC, IRQ_MB93493_VCC_ROUTE); 147 setup_irq(IRQ_CPU_MB93493_0, &mb93493_irq[0]);
99 mb93493_irq_route(IRQ_MB93493_AUDIO_IN, IRQ_MB93493_AUDIO_IN_ROUTE); 148 setup_irq(IRQ_CPU_MB93493_1, &mb93493_irq[1]);
100 mb93493_irq_route(IRQ_MB93493_I2C_0, IRQ_MB93493_I2C_0_ROUTE);
101 mb93493_irq_route(IRQ_MB93493_I2C_1, IRQ_MB93493_I2C_1_ROUTE);
102 mb93493_irq_route(IRQ_MB93493_USB, IRQ_MB93493_USB_ROUTE);
103 mb93493_irq_route(IRQ_MB93493_LOCAL_BUS, IRQ_MB93493_LOCAL_BUS_ROUTE);
104 mb93493_irq_route(IRQ_MB93493_PCMCIA, IRQ_MB93493_PCMCIA_ROUTE);
105 mb93493_irq_route(IRQ_MB93493_GPIO, IRQ_MB93493_GPIO_ROUTE);
106 mb93493_irq_route(IRQ_MB93493_AUDIO_OUT, IRQ_MB93493_AUDIO_OUT_ROUTE);
107} 149}
diff --git a/arch/frv/kernel/irq-routing.c b/arch/frv/kernel/irq-routing.c
deleted file mode 100644
index 53886adf47de..000000000000
--- a/arch/frv/kernel/irq-routing.c
+++ /dev/null
@@ -1,291 +0,0 @@
1/* irq-routing.c: IRQ routing
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/random.h>
14#include <linux/init.h>
15#include <linux/serial_reg.h>
16#include <asm/io.h>
17#include <asm/irq-routing.h>
18#include <asm/irc-regs.h>
19#include <asm/serial-regs.h>
20#include <asm/dma.h>
21
22struct irq_level frv_irq_levels[16] = {
23 [0 ... 15] = {
24 .lock = SPIN_LOCK_UNLOCKED,
25 }
26};
27
28struct irq_group *irq_groups[NR_IRQ_GROUPS];
29
30extern struct irq_group frv_cpu_irqs;
31
32void __init frv_irq_route(struct irq_source *source, int irqlevel)
33{
34 source->level = &frv_irq_levels[irqlevel];
35 source->next = frv_irq_levels[irqlevel].sources;
36 frv_irq_levels[irqlevel].sources = source;
37}
38
39void __init frv_irq_route_external(struct irq_source *source, int irq)
40{
41 int irqlevel = 0;
42
43 switch (irq) {
44 case IRQ_CPU_EXTERNAL0: irqlevel = IRQ_XIRQ0_LEVEL; break;
45 case IRQ_CPU_EXTERNAL1: irqlevel = IRQ_XIRQ1_LEVEL; break;
46 case IRQ_CPU_EXTERNAL2: irqlevel = IRQ_XIRQ2_LEVEL; break;
47 case IRQ_CPU_EXTERNAL3: irqlevel = IRQ_XIRQ3_LEVEL; break;
48 case IRQ_CPU_EXTERNAL4: irqlevel = IRQ_XIRQ4_LEVEL; break;
49 case IRQ_CPU_EXTERNAL5: irqlevel = IRQ_XIRQ5_LEVEL; break;
50 case IRQ_CPU_EXTERNAL6: irqlevel = IRQ_XIRQ6_LEVEL; break;
51 case IRQ_CPU_EXTERNAL7: irqlevel = IRQ_XIRQ7_LEVEL; break;
52 default: BUG();
53 }
54
55 source->level = &frv_irq_levels[irqlevel];
56 source->next = frv_irq_levels[irqlevel].sources;
57 frv_irq_levels[irqlevel].sources = source;
58}
59
60void __init frv_irq_set_group(struct irq_group *group)
61{
62 irq_groups[group->first_irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP] = group;
63}
64
65void distribute_irqs(struct irq_group *group, unsigned long irqmask)
66{
67 struct irqaction *action;
68 int irq;
69
70 while (irqmask) {
71 asm("scan %1,gr0,%0" : "=r"(irq) : "r"(irqmask));
72 if (irq < 0 || irq > 31)
73 asm volatile("break");
74 irq = 31 - irq;
75
76 irqmask &= ~(1 << irq);
77 action = group->actions[irq];
78
79 irq += group->first_irq;
80
81 if (action) {
82 int status = 0;
83
84// if (!(action->flags & IRQF_DISABLED))
85// local_irq_enable();
86
87 do {
88 status |= action->flags;
89 action->handler(irq, action->dev_id, __frame);
90 action = action->next;
91 } while (action);
92
93 if (status & IRQF_SAMPLE_RANDOM)
94 add_interrupt_randomness(irq);
95 local_irq_disable();
96 }
97 }
98}
99
100/*****************************************************************************/
101/*
102 * CPU UART interrupts
103 */
104static void frv_cpuuart_doirq(struct irq_source *source)
105{
106// uint8_t iir = readb(source->muxdata + UART_IIR * 8);
107// if ((iir & 0x0f) != UART_IIR_NO_INT)
108 distribute_irqs(&frv_cpu_irqs, source->irqmask);
109}
110
111struct irq_source frv_cpuuart[2] = {
112#define __CPUUART(X, A) \
113 [X] = { \
114 .muxname = "uart", \
115 .muxdata = (volatile void __iomem *)(unsigned long)A,\
116 .irqmask = 1 << IRQ_CPU_UART##X, \
117 .doirq = frv_cpuuart_doirq, \
118 }
119
120 __CPUUART(0, UART0_BASE),
121 __CPUUART(1, UART1_BASE),
122};
123
124/*****************************************************************************/
125/*
126 * CPU DMA interrupts
127 */
128static void frv_cpudma_doirq(struct irq_source *source)
129{
130 uint32_t cstr = readl(source->muxdata + DMAC_CSTRx);
131 if (cstr & DMAC_CSTRx_INT)
132 distribute_irqs(&frv_cpu_irqs, source->irqmask);
133}
134
135struct irq_source frv_cpudma[8] = {
136#define __CPUDMA(X, A) \
137 [X] = { \
138 .muxname = "dma", \
139 .muxdata = (volatile void __iomem *)(unsigned long)A,\
140 .irqmask = 1 << IRQ_CPU_DMA##X, \
141 .doirq = frv_cpudma_doirq, \
142 }
143
144 __CPUDMA(0, 0xfe000900),
145 __CPUDMA(1, 0xfe000980),
146 __CPUDMA(2, 0xfe000a00),
147 __CPUDMA(3, 0xfe000a80),
148 __CPUDMA(4, 0xfe001000),
149 __CPUDMA(5, 0xfe001080),
150 __CPUDMA(6, 0xfe001100),
151 __CPUDMA(7, 0xfe001180),
152};
153
154/*****************************************************************************/
155/*
156 * CPU timer interrupts - can't tell whether they've generated an interrupt or not
157 */
158static void frv_cputimer_doirq(struct irq_source *source)
159{
160 distribute_irqs(&frv_cpu_irqs, source->irqmask);
161}
162
163struct irq_source frv_cputimer[3] = {
164#define __CPUTIMER(X) \
165 [X] = { \
166 .muxname = "timer", \
167 .muxdata = NULL, \
168 .irqmask = 1 << IRQ_CPU_TIMER##X, \
169 .doirq = frv_cputimer_doirq, \
170 }
171
172 __CPUTIMER(0),
173 __CPUTIMER(1),
174 __CPUTIMER(2),
175};
176
177/*****************************************************************************/
178/*
179 * external CPU interrupts - can't tell directly whether they've generated an interrupt or not
180 */
181static void frv_cpuexternal_doirq(struct irq_source *source)
182{
183 distribute_irqs(&frv_cpu_irqs, source->irqmask);
184}
185
186struct irq_source frv_cpuexternal[8] = {
187#define __CPUEXTERNAL(X) \
188 [X] = { \
189 .muxname = "ext", \
190 .muxdata = NULL, \
191 .irqmask = 1 << IRQ_CPU_EXTERNAL##X, \
192 .doirq = frv_cpuexternal_doirq, \
193 }
194
195 __CPUEXTERNAL(0),
196 __CPUEXTERNAL(1),
197 __CPUEXTERNAL(2),
198 __CPUEXTERNAL(3),
199 __CPUEXTERNAL(4),
200 __CPUEXTERNAL(5),
201 __CPUEXTERNAL(6),
202 __CPUEXTERNAL(7),
203};
204
205#define set_IRR(N,A,B,C,D) __set_IRR(N, (A << 28) | (B << 24) | (C << 20) | (D << 16))
206
207struct irq_group frv_cpu_irqs = {
208 .sources = {
209 [IRQ_CPU_UART0] = &frv_cpuuart[0],
210 [IRQ_CPU_UART1] = &frv_cpuuart[1],
211 [IRQ_CPU_TIMER0] = &frv_cputimer[0],
212 [IRQ_CPU_TIMER1] = &frv_cputimer[1],
213 [IRQ_CPU_TIMER2] = &frv_cputimer[2],
214 [IRQ_CPU_DMA0] = &frv_cpudma[0],
215 [IRQ_CPU_DMA1] = &frv_cpudma[1],
216 [IRQ_CPU_DMA2] = &frv_cpudma[2],
217 [IRQ_CPU_DMA3] = &frv_cpudma[3],
218 [IRQ_CPU_DMA4] = &frv_cpudma[4],
219 [IRQ_CPU_DMA5] = &frv_cpudma[5],
220 [IRQ_CPU_DMA6] = &frv_cpudma[6],
221 [IRQ_CPU_DMA7] = &frv_cpudma[7],
222 [IRQ_CPU_EXTERNAL0] = &frv_cpuexternal[0],
223 [IRQ_CPU_EXTERNAL1] = &frv_cpuexternal[1],
224 [IRQ_CPU_EXTERNAL2] = &frv_cpuexternal[2],
225 [IRQ_CPU_EXTERNAL3] = &frv_cpuexternal[3],
226 [IRQ_CPU_EXTERNAL4] = &frv_cpuexternal[4],
227 [IRQ_CPU_EXTERNAL5] = &frv_cpuexternal[5],
228 [IRQ_CPU_EXTERNAL6] = &frv_cpuexternal[6],
229 [IRQ_CPU_EXTERNAL7] = &frv_cpuexternal[7],
230 },
231};
232
233/*****************************************************************************/
234/*
235 * route the CPU's interrupt sources
236 */
237void __init route_cpu_irqs(void)
238{
239 frv_irq_set_group(&frv_cpu_irqs);
240
241 __set_IITMR(0, 0x003f0000); /* DMA0-3, TIMER0-2 IRQ detect levels */
242 __set_IITMR(1, 0x20000000); /* ERR0-1, UART0-1, DMA4-7 IRQ detect levels */
243
244 /* route UART and error interrupts */
245 frv_irq_route(&frv_cpuuart[0], IRQ_UART0_LEVEL);
246 frv_irq_route(&frv_cpuuart[1], IRQ_UART1_LEVEL);
247
248 set_IRR(6, IRQ_GDBSTUB_LEVEL, IRQ_GDBSTUB_LEVEL, IRQ_UART1_LEVEL, IRQ_UART0_LEVEL);
249
250 /* route DMA channel interrupts */
251 frv_irq_route(&frv_cpudma[0], IRQ_DMA0_LEVEL);
252 frv_irq_route(&frv_cpudma[1], IRQ_DMA1_LEVEL);
253 frv_irq_route(&frv_cpudma[2], IRQ_DMA2_LEVEL);
254 frv_irq_route(&frv_cpudma[3], IRQ_DMA3_LEVEL);
255 frv_irq_route(&frv_cpudma[4], IRQ_DMA4_LEVEL);
256 frv_irq_route(&frv_cpudma[5], IRQ_DMA5_LEVEL);
257 frv_irq_route(&frv_cpudma[6], IRQ_DMA6_LEVEL);
258 frv_irq_route(&frv_cpudma[7], IRQ_DMA7_LEVEL);
259
260 set_IRR(4, IRQ_DMA3_LEVEL, IRQ_DMA2_LEVEL, IRQ_DMA1_LEVEL, IRQ_DMA0_LEVEL);
261 set_IRR(7, IRQ_DMA7_LEVEL, IRQ_DMA6_LEVEL, IRQ_DMA5_LEVEL, IRQ_DMA4_LEVEL);
262
263 /* route timer interrupts */
264 frv_irq_route(&frv_cputimer[0], IRQ_TIMER0_LEVEL);
265 frv_irq_route(&frv_cputimer[1], IRQ_TIMER1_LEVEL);
266 frv_irq_route(&frv_cputimer[2], IRQ_TIMER2_LEVEL);
267
268 set_IRR(5, 0, IRQ_TIMER2_LEVEL, IRQ_TIMER1_LEVEL, IRQ_TIMER0_LEVEL);
269
270 /* route external interrupts */
271 frv_irq_route(&frv_cpuexternal[0], IRQ_XIRQ0_LEVEL);
272 frv_irq_route(&frv_cpuexternal[1], IRQ_XIRQ1_LEVEL);
273 frv_irq_route(&frv_cpuexternal[2], IRQ_XIRQ2_LEVEL);
274 frv_irq_route(&frv_cpuexternal[3], IRQ_XIRQ3_LEVEL);
275 frv_irq_route(&frv_cpuexternal[4], IRQ_XIRQ4_LEVEL);
276 frv_irq_route(&frv_cpuexternal[5], IRQ_XIRQ5_LEVEL);
277 frv_irq_route(&frv_cpuexternal[6], IRQ_XIRQ6_LEVEL);
278 frv_irq_route(&frv_cpuexternal[7], IRQ_XIRQ7_LEVEL);
279
280 set_IRR(2, IRQ_XIRQ7_LEVEL, IRQ_XIRQ6_LEVEL, IRQ_XIRQ5_LEVEL, IRQ_XIRQ4_LEVEL);
281 set_IRR(3, IRQ_XIRQ3_LEVEL, IRQ_XIRQ2_LEVEL, IRQ_XIRQ1_LEVEL, IRQ_XIRQ0_LEVEL);
282
283#if defined(CONFIG_MB93091_VDK)
284 __set_TM1(0x55550000); /* XIRQ7-0 all active low */
285#elif defined(CONFIG_MB93093_PDK)
286 __set_TM1(0x15550000); /* XIRQ7 active high, 6-0 all active low */
287#else
288#error dont know external IRQ trigger levels for this setup
289#endif
290
291} /* end route_cpu_irqs() */
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index 08967010be04..5ac041c7c0a4 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -1,6 +1,6 @@
1/* irq.c: FRV IRQ handling 1/* irq.c: FRV IRQ handling
2 * 2 *
3 * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -9,13 +9,6 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12/*
13 * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
14 *
15 * IRQs are in fact implemented a bit like signal handlers for the kernel.
16 * Naturally it's not a 1:1 relation, but there are similarities.
17 */
18
19#include <linux/ptrace.h> 12#include <linux/ptrace.h>
20#include <linux/errno.h> 13#include <linux/errno.h>
21#include <linux/signal.h> 14#include <linux/signal.h>
@@ -43,19 +36,16 @@
43#include <asm/delay.h> 36#include <asm/delay.h>
44#include <asm/irq.h> 37#include <asm/irq.h>
45#include <asm/irc-regs.h> 38#include <asm/irc-regs.h>
46#include <asm/irq-routing.h>
47#include <asm/gdb-stub.h> 39#include <asm/gdb-stub.h>
48 40
49extern void __init fpga_init(void); 41#define set_IRR(N,A,B,C,D) __set_IRR(N, (A << 28) | (B << 24) | (C << 20) | (D << 16))
50extern void __init route_mb93493_irqs(void);
51
52static void register_irq_proc (unsigned int irq);
53 42
54/* 43extern void __init fpga_init(void);
55 * Special irq handlers. 44#ifdef CONFIG_FUJITSU_MB93493
56 */ 45extern void __init mb93493_init(void);
46#endif
57 47
58irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) { return IRQ_HANDLED; } 48#define __reg16(ADDR) (*(volatile unsigned short *)(ADDR))
59 49
60atomic_t irq_err_count; 50atomic_t irq_err_count;
61 51
@@ -64,215 +54,86 @@ atomic_t irq_err_count;
64 */ 54 */
65int show_interrupts(struct seq_file *p, void *v) 55int show_interrupts(struct seq_file *p, void *v)
66{ 56{
67 struct irqaction *action; 57 int i = *(loff_t *) v, cpu;
68 struct irq_group *group; 58 struct irqaction * action;
69 unsigned long flags; 59 unsigned long flags;
70 int level, grp, ix, i, j;
71
72 i = *(loff_t *) v;
73
74 switch (i) {
75 case 0:
76 seq_printf(p, " ");
77 for_each_online_cpu(j)
78 seq_printf(p, "CPU%d ",j);
79
80 seq_putc(p, '\n');
81 break;
82 60
83 case 1 ... NR_IRQ_GROUPS * NR_IRQ_ACTIONS_PER_GROUP: 61 if (i == 0) {
84 local_irq_save(flags); 62 char cpuname[12];
85
86 grp = (i - 1) / NR_IRQ_ACTIONS_PER_GROUP;
87 group = irq_groups[grp];
88 if (!group)
89 goto skip;
90
91 ix = (i - 1) % NR_IRQ_ACTIONS_PER_GROUP;
92 action = group->actions[ix];
93 if (!action)
94 goto skip;
95
96 seq_printf(p, "%3d: ", i - 1);
97
98#ifndef CONFIG_SMP
99 seq_printf(p, "%10u ", kstat_irqs(i));
100#else
101 for_each_online_cpu(j)
102 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
103#endif
104
105 level = group->sources[ix]->level - frv_irq_levels;
106
107 seq_printf(p, " %12s@%x", group->sources[ix]->muxname, level);
108 seq_printf(p, " %s", action->name);
109
110 for (action = action->next; action; action = action->next)
111 seq_printf(p, ", %s", action->name);
112 63
64 seq_printf(p, " ");
65 for_each_present_cpu(cpu) {
66 sprintf(cpuname, "CPU%d", cpu);
67 seq_printf(p, " %10s", cpuname);
68 }
113 seq_putc(p, '\n'); 69 seq_putc(p, '\n');
114skip: 70 }
115 local_irq_restore(flags);
116 break;
117 71
118 case NR_IRQ_GROUPS * NR_IRQ_ACTIONS_PER_GROUP + 1: 72 if (i < NR_IRQS) {
119 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); 73 spin_lock_irqsave(&irq_desc[i].lock, flags);
120 break; 74 action = irq_desc[i].action;
75 if (action) {
76 seq_printf(p, "%3d: ", i);
77 for_each_present_cpu(cpu)
78 seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
79 seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-");
80 seq_printf(p, " %s", action->name);
81 for (action = action->next;
82 action;
83 action = action->next)
84 seq_printf(p, ", %s", action->name);
85
86 seq_putc(p, '\n');
87 }
121 88
122 default: 89 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
123 break; 90 } else if (i == NR_IRQS) {
91 seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count));
124 } 92 }
125 93
126 return 0; 94 return 0;
127} 95}
128 96
129
130/* 97/*
131 * Generic enable/disable code: this just calls 98 * on-CPU PIC operations
132 * down into the PIC-specific version for the actual
133 * hardware disable after having gotten the irq
134 * controller lock.
135 */ 99 */
136 100static void frv_cpupic_ack(unsigned int irqlevel)
137/**
138 * disable_irq_nosync - disable an irq without waiting
139 * @irq: Interrupt to disable
140 *
141 * Disable the selected interrupt line. Disables and Enables are
142 * nested.
143 * Unlike disable_irq(), this function does not ensure existing
144 * instances of the IRQ handler have completed before returning.
145 *
146 * This function may be called from IRQ context.
147 */
148
149void disable_irq_nosync(unsigned int irq)
150{ 101{
151 struct irq_source *source; 102 __clr_RC(irqlevel);
152 struct irq_group *group; 103 __clr_IRL();
153 struct irq_level *level;
154 unsigned long flags;
155 int idx = irq & (NR_IRQ_ACTIONS_PER_GROUP - 1);
156
157 group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
158 if (!group)
159 BUG();
160
161 source = group->sources[idx];
162 if (!source)
163 BUG();
164
165 level = source->level;
166
167 spin_lock_irqsave(&level->lock, flags);
168
169 if (group->control) {
170 if (!group->disable_cnt[idx]++)
171 group->control(group, idx, 0);
172 } else if (!level->disable_count++) {
173 __set_MASK(level - frv_irq_levels);
174 }
175
176 spin_unlock_irqrestore(&level->lock, flags);
177} 104}
178 105
179EXPORT_SYMBOL(disable_irq_nosync); 106static void frv_cpupic_mask(unsigned int irqlevel)
180
181/**
182 * disable_irq - disable an irq and wait for completion
183 * @irq: Interrupt to disable
184 *
185 * Disable the selected interrupt line. Enables and Disables are
186 * nested.
187 * This function waits for any pending IRQ handlers for this interrupt
188 * to complete before returning. If you use this function while
189 * holding a resource the IRQ handler may need you will deadlock.
190 *
191 * This function may be called - with care - from IRQ context.
192 */
193
194void disable_irq(unsigned int irq)
195{ 107{
196 disable_irq_nosync(irq); 108 __set_MASK(irqlevel);
197
198#ifdef CONFIG_SMP
199 if (!local_irq_count(smp_processor_id())) {
200 do {
201 barrier();
202 } while (irq_desc[irq].status & IRQ_INPROGRESS);
203 }
204#endif
205} 109}
206 110
207EXPORT_SYMBOL(disable_irq); 111static void frv_cpupic_mask_ack(unsigned int irqlevel)
208
209/**
210 * enable_irq - enable handling of an irq
211 * @irq: Interrupt to enable
212 *
213 * Undoes the effect of one call to disable_irq(). If this
214 * matches the last disable, processing of interrupts on this
215 * IRQ line is re-enabled.
216 *
217 * This function may be called from IRQ context.
218 */
219
220void enable_irq(unsigned int irq)
221{ 112{
222 struct irq_source *source; 113 __set_MASK(irqlevel);
223 struct irq_group *group; 114 __clr_RC(irqlevel);
224 struct irq_level *level; 115 __clr_IRL();
225 unsigned long flags; 116}
226 int idx = irq & (NR_IRQ_ACTIONS_PER_GROUP - 1);
227 int count;
228
229 group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
230 if (!group)
231 BUG();
232
233 source = group->sources[idx];
234 if (!source)
235 BUG();
236
237 level = source->level;
238
239 spin_lock_irqsave(&level->lock, flags);
240
241 if (group->control)
242 count = group->disable_cnt[idx];
243 else
244 count = level->disable_count;
245
246 switch (count) {
247 case 1:
248 if (group->control) {
249 if (group->actions[idx])
250 group->control(group, idx, 1);
251 } else {
252 if (level->usage)
253 __clr_MASK(level - frv_irq_levels);
254 }
255 /* fall-through */
256
257 default:
258 count--;
259 break;
260
261 case 0:
262 printk("enable_irq(%u) unbalanced from %p\n", irq, __builtin_return_address(0));
263 }
264 117
265 if (group->control) 118static void frv_cpupic_unmask(unsigned int irqlevel)
266 group->disable_cnt[idx] = count; 119{
267 else 120 __clr_MASK(irqlevel);
268 level->disable_count = count; 121}
269 122
270 spin_unlock_irqrestore(&level->lock, flags); 123static void frv_cpupic_end(unsigned int irqlevel)
124{
125 __clr_MASK(irqlevel);
271} 126}
272 127
273EXPORT_SYMBOL(enable_irq); 128static struct irq_chip frv_cpu_pic = {
129 .name = "cpu",
130 .ack = frv_cpupic_ack,
131 .mask = frv_cpupic_mask,
132 .mask_ack = frv_cpupic_mask_ack,
133 .unmask = frv_cpupic_unmask,
134 .end = frv_cpupic_end,
135};
274 136
275/*****************************************************************************/
276/* 137/*
277 * handles all normal device IRQ's 138 * handles all normal device IRQ's
278 * - registers are referred to by the __frame variable (GR28) 139 * - registers are referred to by the __frame variable (GR28)
@@ -281,463 +142,65 @@ EXPORT_SYMBOL(enable_irq);
281 */ 142 */
282asmlinkage void do_IRQ(void) 143asmlinkage void do_IRQ(void)
283{ 144{
284 struct irq_source *source;
285 int level, cpu;
286
287 irq_enter(); 145 irq_enter();
288 146 generic_handle_irq(__get_IRL(), __frame);
289 level = (__frame->tbr >> 4) & 0xf;
290 cpu = smp_processor_id();
291
292 if ((unsigned long) __frame - (unsigned long) (current + 1) < 512)
293 BUG();
294
295 __set_MASK(level);
296 __clr_RC(level);
297 __clr_IRL();
298
299 kstat_this_cpu.irqs[level]++;
300
301 for (source = frv_irq_levels[level].sources; source; source = source->next)
302 source->doirq(source);
303
304 __clr_MASK(level);
305
306 irq_exit(); 147 irq_exit();
148}
307 149
308} /* end do_IRQ() */
309
310/*****************************************************************************/
311/* 150/*
312 * handles all NMIs when not co-opted by the debugger 151 * handles all NMIs when not co-opted by the debugger
313 * - registers are referred to by the __frame variable (GR28) 152 * - registers are referred to by the __frame variable (GR28)
314 */ 153 */
315asmlinkage void do_NMI(void) 154asmlinkage void do_NMI(void)
316{ 155{
317} /* end do_NMI() */
318
319/*****************************************************************************/
320/**
321 * request_irq - allocate an interrupt line
322 * @irq: Interrupt line to allocate
323 * @handler: Function to be called when the IRQ occurs
324 * @irqflags: Interrupt type flags
325 * @devname: An ascii name for the claiming device
326 * @dev_id: A cookie passed back to the handler function
327 *
328 * This call allocates interrupt resources and enables the
329 * interrupt line and IRQ handling. From the point this
330 * call is made your handler function may be invoked. Since
331 * your handler function must clear any interrupt the board
332 * raises, you must take care both to initialise your hardware
333 * and to set up the interrupt handler in the right order.
334 *
335 * Dev_id must be globally unique. Normally the address of the
336 * device data structure is used as the cookie. Since the handler
337 * receives this value it makes sense to use it.
338 *
339 * If your interrupt is shared you must pass a non NULL dev_id
340 * as this is required when freeing the interrupt.
341 *
342 * Flags:
343 *
344 * IRQF_SHARED Interrupt is shared
345 *
346 * IRQF_DISABLED Disable local interrupts while processing
347 *
348 * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
349 *
350 */
351
352int request_irq(unsigned int irq,
353 irqreturn_t (*handler)(int, void *, struct pt_regs *),
354 unsigned long irqflags,
355 const char * devname,
356 void *dev_id)
357{
358 int retval;
359 struct irqaction *action;
360
361#if 1
362 /*
363 * Sanity-check: shared interrupts should REALLY pass in
364 * a real dev-ID, otherwise we'll have trouble later trying
365 * to figure out which interrupt is which (messes up the
366 * interrupt freeing logic etc).
367 */
368 if (irqflags & IRQF_SHARED) {
369 if (!dev_id)
370 printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n",
371 devname, (&irq)[-1]);
372 }
373#endif
374
375 if ((irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP) >= NR_IRQ_GROUPS)
376 return -EINVAL;
377 if (!handler)
378 return -EINVAL;
379
380 action = (struct irqaction *) kmalloc(sizeof(struct irqaction), GFP_KERNEL);
381 if (!action)
382 return -ENOMEM;
383
384 action->handler = handler;
385 action->flags = irqflags;
386 action->mask = CPU_MASK_NONE;
387 action->name = devname;
388 action->next = NULL;
389 action->dev_id = dev_id;
390
391 retval = setup_irq(irq, action);
392 if (retval)
393 kfree(action);
394 return retval;
395}
396
397EXPORT_SYMBOL(request_irq);
398
399/**
400 * free_irq - free an interrupt
401 * @irq: Interrupt line to free
402 * @dev_id: Device identity to free
403 *
404 * Remove an interrupt handler. The handler is removed and if the
405 * interrupt line is no longer in use by any driver it is disabled.
406 * On a shared IRQ the caller must ensure the interrupt is disabled
407 * on the card it drives before calling this function. The function
408 * does not return until any executing interrupts for this IRQ
409 * have completed.
410 *
411 * This function may be called from interrupt context.
412 *
413 * Bugs: Attempting to free an irq in a handler for the same irq hangs
414 * the machine.
415 */
416
417void free_irq(unsigned int irq, void *dev_id)
418{
419 struct irq_source *source;
420 struct irq_group *group;
421 struct irq_level *level;
422 struct irqaction **p, **pp;
423 unsigned long flags;
424
425 if ((irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP) >= NR_IRQ_GROUPS)
426 return;
427
428 group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
429 if (!group)
430 BUG();
431
432 source = group->sources[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
433 if (!source)
434 BUG();
435
436 level = source->level;
437 p = &group->actions[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
438
439 spin_lock_irqsave(&level->lock, flags);
440
441 for (pp = p; *pp; pp = &(*pp)->next) {
442 struct irqaction *action = *pp;
443
444 if (action->dev_id != dev_id)
445 continue;
446
447 /* found it - remove from the list of entries */
448 *pp = action->next;
449
450 level->usage--;
451
452 if (p == pp && group->control)
453 group->control(group, irq & (NR_IRQ_ACTIONS_PER_GROUP - 1), 0);
454
455 if (level->usage == 0)
456 __set_MASK(level - frv_irq_levels);
457
458 spin_unlock_irqrestore(&level->lock,flags);
459
460#ifdef CONFIG_SMP
461 /* Wait to make sure it's not being used on another CPU */
462 while (desc->status & IRQ_INPROGRESS)
463 barrier();
464#endif
465 kfree(action);
466 return;
467 }
468}
469
470EXPORT_SYMBOL(free_irq);
471
472/*
473 * IRQ autodetection code..
474 *
475 * This depends on the fact that any interrupt that comes in on to an
476 * unassigned IRQ will cause GxICR_DETECT to be set
477 */
478
479static DECLARE_MUTEX(probe_sem);
480
481/**
482 * probe_irq_on - begin an interrupt autodetect
483 *
484 * Commence probing for an interrupt. The interrupts are scanned
485 * and a mask of potential interrupt lines is returned.
486 *
487 */
488
489unsigned long probe_irq_on(void)
490{
491 down(&probe_sem);
492 return 0;
493} 156}
494 157
495EXPORT_SYMBOL(probe_irq_on);
496
497/* 158/*
498 * Return a mask of triggered interrupts (this 159 * initialise the interrupt system
499 * can handle only legacy ISA interrupts).
500 */
501
502/**
503 * probe_irq_mask - scan a bitmap of interrupt lines
504 * @val: mask of interrupts to consider
505 *
506 * Scan the ISA bus interrupt lines and return a bitmap of
507 * active interrupts. The interrupt probe logic state is then
508 * returned to its previous value.
509 *
510 * Note: we need to scan all the irq's even though we will
511 * only return ISA irq numbers - just so that we reset them
512 * all to a known state.
513 */
514unsigned int probe_irq_mask(unsigned long xmask)
515{
516 up(&probe_sem);
517 return 0;
518}
519
520EXPORT_SYMBOL(probe_irq_mask);
521
522/*
523 * Return the one interrupt that triggered (this can
524 * handle any interrupt source).
525 */
526
527/**
528 * probe_irq_off - end an interrupt autodetect
529 * @xmask: mask of potential interrupts (unused)
530 *
531 * Scans the unused interrupt lines and returns the line which
532 * appears to have triggered the interrupt. If no interrupt was
533 * found then zero is returned. If more than one interrupt is
534 * found then minus the first candidate is returned to indicate
535 * their is doubt.
536 *
537 * The interrupt probe logic state is returned to its previous
538 * value.
539 *
540 * BUGS: When used in a module (which arguably shouldnt happen)
541 * nothing prevents two IRQ probe callers from overlapping. The
542 * results of this are non-optimal.
543 */ 160 */
544 161void __init init_IRQ(void)
545int probe_irq_off(unsigned long xmask)
546{
547 up(&probe_sem);
548 return -1;
549}
550
551EXPORT_SYMBOL(probe_irq_off);
552
553/* this was setup_x86_irq but it seems pretty generic */
554int setup_irq(unsigned int irq, struct irqaction *new)
555{
556 struct irq_source *source;
557 struct irq_group *group;
558 struct irq_level *level;
559 struct irqaction **p, **pp;
560 unsigned long flags;
561
562 group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
563 if (!group)
564 BUG();
565
566 source = group->sources[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
567 if (!source)
568 BUG();
569
570 level = source->level;
571
572 p = &group->actions[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
573
574 /*
575 * Some drivers like serial.c use request_irq() heavily,
576 * so we have to be careful not to interfere with a
577 * running system.
578 */
579 if (new->flags & IRQF_SAMPLE_RANDOM) {
580 /*
581 * This function might sleep, we want to call it first,
582 * outside of the atomic block.
583 * Yes, this might clear the entropy pool if the wrong
584 * driver is attempted to be loaded, without actually
585 * installing a new handler, but is this really a problem,
586 * only the sysadmin is able to do this.
587 */
588 rand_initialize_irq(irq);
589 }
590
591 /* must juggle the interrupt processing stuff with interrupts disabled */
592 spin_lock_irqsave(&level->lock, flags);
593
594 /* can't share interrupts unless all parties agree to */
595 if (level->usage != 0 && !(level->flags & new->flags & IRQF_SHARED)) {
596 spin_unlock_irqrestore(&level->lock,flags);
597 return -EBUSY;
598 }
599
600 /* add new interrupt at end of irq queue */
601 pp = p;
602 while (*pp)
603 pp = &(*pp)->next;
604
605 *pp = new;
606
607 level->usage++;
608 level->flags = new->flags;
609
610 /* turn the interrupts on */
611 if (level->usage == 1)
612 __clr_MASK(level - frv_irq_levels);
613
614 if (p == pp && group->control)
615 group->control(group, irq & (NR_IRQ_ACTIONS_PER_GROUP - 1), 1);
616
617 spin_unlock_irqrestore(&level->lock, flags);
618 register_irq_proc(irq);
619 return 0;
620}
621
622static struct proc_dir_entry * root_irq_dir;
623static struct proc_dir_entry * irq_dir [NR_IRQS];
624
625#define HEX_DIGITS 8
626
627static unsigned int parse_hex_value (const char __user *buffer,
628 unsigned long count, unsigned long *ret)
629{
630 unsigned char hexnum [HEX_DIGITS];
631 unsigned long value;
632 int i;
633
634 if (!count)
635 return -EINVAL;
636 if (count > HEX_DIGITS)
637 count = HEX_DIGITS;
638 if (copy_from_user(hexnum, buffer, count))
639 return -EFAULT;
640
641 /*
642 * Parse the first 8 characters as a hex string, any non-hex char
643 * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
644 */
645 value = 0;
646
647 for (i = 0; i < count; i++) {
648 unsigned int c = hexnum[i];
649
650 switch (c) {
651 case '0' ... '9': c -= '0'; break;
652 case 'a' ... 'f': c -= 'a'-10; break;
653 case 'A' ... 'F': c -= 'A'-10; break;
654 default:
655 goto out;
656 }
657 value = (value << 4) | c;
658 }
659out:
660 *ret = value;
661 return 0;
662}
663
664
665static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
666 int count, int *eof, void *data)
667{
668 unsigned long *mask = (unsigned long *) data;
669 if (count < HEX_DIGITS+1)
670 return -EINVAL;
671 return sprintf (page, "%08lx\n", *mask);
672}
673
674static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
675 unsigned long count, void *data)
676{
677 unsigned long *mask = (unsigned long *) data, full_count = count, err;
678 unsigned long new_value;
679
680 show_state();
681 err = parse_hex_value(buffer, count, &new_value);
682 if (err)
683 return err;
684
685 *mask = new_value;
686 return full_count;
687}
688
689#define MAX_NAMELEN 10
690
691static void register_irq_proc (unsigned int irq)
692{
693 char name [MAX_NAMELEN];
694
695 if (!root_irq_dir || irq_dir[irq])
696 return;
697
698 memset(name, 0, MAX_NAMELEN);
699 sprintf(name, "%d", irq);
700
701 /* create /proc/irq/1234 */
702 irq_dir[irq] = proc_mkdir(name, root_irq_dir);
703}
704
705unsigned long prof_cpu_mask = -1;
706
707void init_irq_proc (void)
708{ 162{
709 struct proc_dir_entry *entry; 163 int level;
710 int i;
711 164
712 /* create /proc/irq */ 165 for (level = 1; level <= 14; level++)
713 root_irq_dir = proc_mkdir("irq", NULL); 166 set_irq_chip_and_handler(level, &frv_cpu_pic,
167 handle_level_irq);
714 168
715 /* create /proc/irq/prof_cpu_mask */ 169 set_irq_handler(IRQ_CPU_TIMER0, handle_edge_irq);
716 entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
717 if (!entry)
718 return;
719 170
720 entry->nlink = 1; 171 /* set the trigger levels for internal interrupt sources
721 entry->data = (void *)&prof_cpu_mask; 172 * - timers all falling-edge
722 entry->read_proc = prof_cpu_mask_read_proc; 173 * - ERR0 is rising-edge
723 entry->write_proc = prof_cpu_mask_write_proc; 174 * - all others are high-level
724
725 /*
726 * Create entries for all existing IRQs.
727 */ 175 */
728 for (i = 0; i < NR_IRQS; i++) 176 __set_IITMR(0, 0x003f0000); /* DMA0-3, TIMER0-2 */
729 register_irq_proc(i); 177 __set_IITMR(1, 0x20000000); /* ERR0-1, UART0-1, DMA4-7 */
730} 178
179 /* route internal interrupts */
180 set_IRR(4, IRQ_DMA3_LEVEL, IRQ_DMA2_LEVEL, IRQ_DMA1_LEVEL,
181 IRQ_DMA0_LEVEL);
182 set_IRR(5, 0, IRQ_TIMER2_LEVEL, IRQ_TIMER1_LEVEL, IRQ_TIMER0_LEVEL);
183 set_IRR(6, IRQ_GDBSTUB_LEVEL, IRQ_GDBSTUB_LEVEL,
184 IRQ_UART1_LEVEL, IRQ_UART0_LEVEL);
185 set_IRR(7, IRQ_DMA7_LEVEL, IRQ_DMA6_LEVEL, IRQ_DMA5_LEVEL,
186 IRQ_DMA4_LEVEL);
187
188 /* route external interrupts */
189 set_IRR(2, IRQ_XIRQ7_LEVEL, IRQ_XIRQ6_LEVEL, IRQ_XIRQ5_LEVEL,
190 IRQ_XIRQ4_LEVEL);
191 set_IRR(3, IRQ_XIRQ3_LEVEL, IRQ_XIRQ2_LEVEL, IRQ_XIRQ1_LEVEL,
192 IRQ_XIRQ0_LEVEL);
193
194#if defined(CONFIG_MB93091_VDK)
195 __set_TM1(0x55550000); /* XIRQ7-0 all active low */
196#elif defined(CONFIG_MB93093_PDK)
197 __set_TM1(0x15550000); /* XIRQ7 active high, 6-0 all active low */
198#else
199#error dont know external IRQ trigger levels for this setup
200#endif
731 201
732/*****************************************************************************/
733/*
734 * initialise the interrupt system
735 */
736void __init init_IRQ(void)
737{
738 route_cpu_irqs();
739 fpga_init(); 202 fpga_init();
740#ifdef CONFIG_FUJITSU_MB93493 203#ifdef CONFIG_FUJITSU_MB93493
741 route_mb93493_irqs(); 204 mb93493_init();
742#endif 205#endif
743} /* end init_IRQ() */ 206}
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index af08ccd4ed6e..d96a57e5f030 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -43,7 +43,6 @@
43#include <asm/mb-regs.h> 43#include <asm/mb-regs.h>
44#include <asm/mb93493-regs.h> 44#include <asm/mb93493-regs.h>
45#include <asm/gdb-stub.h> 45#include <asm/gdb-stub.h>
46#include <asm/irq-routing.h>
47#include <asm/io.h> 46#include <asm/io.h>
48 47
49#ifdef CONFIG_BLK_DEV_INITRD 48#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 68a77fe3bb40..3d0284bccb94 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -26,7 +26,6 @@
26#include <asm/timer-regs.h> 26#include <asm/timer-regs.h>
27#include <asm/mb-regs.h> 27#include <asm/mb-regs.h>
28#include <asm/mb86943a.h> 28#include <asm/mb86943a.h>
29#include <asm/irq-routing.h>
30 29
31#include <linux/timex.h> 30#include <linux/timex.h>
32 31
diff --git a/arch/frv/mb93090-mb00/pci-irq.c b/arch/frv/mb93090-mb00/pci-irq.c
index 2278c80bd88c..ba587523c015 100644
--- a/arch/frv/mb93090-mb00/pci-irq.c
+++ b/arch/frv/mb93090-mb00/pci-irq.c
@@ -15,7 +15,6 @@
15 15
16#include <asm/io.h> 16#include <asm/io.h>
17#include <asm/smp.h> 17#include <asm/smp.h>
18#include <asm/irq-routing.h>
19 18
20#include "pci-frv.h" 19#include "pci-frv.h"
21 20
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index b5b4286f9dd4..3f3a0ed3539b 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -98,7 +98,7 @@ void show_mem(void)
98 */ 98 */
99void __init paging_init(void) 99void __init paging_init(void)
100{ 100{
101 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 101 unsigned long zones_size[MAX_NR_ZONES] = {0, };
102 102
103 /* allocate some pages for kernel housekeeping tasks */ 103 /* allocate some pages for kernel housekeeping tasks */
104 empty_bad_page_table = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); 104 empty_bad_page_table = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index d3d40bdc2d6a..e4f4199f97ab 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -138,7 +138,7 @@ void paging_init(void)
138#endif 138#endif
139 139
140 { 140 {
141 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 141 unsigned long zones_size[MAX_NR_ZONES] = {0, };
142 142
143 zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; 143 zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT;
144 zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT; 144 zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b2751eadbc56..6189b0c28d6f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -494,7 +494,7 @@ config HIGHMEM64G
494endchoice 494endchoice
495 495
496choice 496choice
497 depends on EXPERIMENTAL && !X86_PAE 497 depends on EXPERIMENTAL
498 prompt "Memory split" if EMBEDDED 498 prompt "Memory split" if EMBEDDED
499 default VMSPLIT_3G 499 default VMSPLIT_3G
500 help 500 help
@@ -516,6 +516,7 @@ choice
516 config VMSPLIT_3G 516 config VMSPLIT_3G
517 bool "3G/1G user/kernel split" 517 bool "3G/1G user/kernel split"
518 config VMSPLIT_3G_OPT 518 config VMSPLIT_3G_OPT
519 depends on !HIGHMEM
519 bool "3G/1G user/kernel split (for full 1G low memory)" 520 bool "3G/1G user/kernel split (for full 1G low memory)"
520 config VMSPLIT_2G 521 config VMSPLIT_2G
521 bool "2G/2G user/kernel split" 522 bool "2G/2G user/kernel split"
@@ -794,6 +795,7 @@ config HOTPLUG_CPU
794config COMPAT_VDSO 795config COMPAT_VDSO
795 bool "Compat VDSO support" 796 bool "Compat VDSO support"
796 default y 797 default y
798 depends on !PARAVIRT
797 help 799 help
798 Map the VDSO to the predictable old-style address too. 800 Map the VDSO to the predictable old-style address too.
799 ---help--- 801 ---help---
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 8591f2fa920c..ff9ce4b5eaa8 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -1154,9 +1154,11 @@ out:
1154 1154
1155static void set_time(void) 1155static void set_time(void)
1156{ 1156{
1157 struct timespec ts;
1157 if (got_clock_diff) { /* Must know time zone in order to set clock */ 1158 if (got_clock_diff) { /* Must know time zone in order to set clock */
1158 xtime.tv_sec = get_cmos_time() + clock_cmos_diff; 1159 ts.tv_sec = get_cmos_time() + clock_cmos_diff;
1159 xtime.tv_nsec = 0; 1160 ts.tv_nsec = 0;
1161 do_settimeofday(&ts);
1160 } 1162 }
1161} 1163}
1162 1164
@@ -1232,13 +1234,8 @@ static int suspend(int vetoable)
1232 restore_processor_state(); 1234 restore_processor_state();
1233 1235
1234 local_irq_disable(); 1236 local_irq_disable();
1235 write_seqlock(&xtime_lock);
1236 spin_lock(&i8253_lock);
1237 reinit_timer();
1238 set_time(); 1237 set_time();
1239 1238 reinit_timer();
1240 spin_unlock(&i8253_lock);
1241 write_sequnlock(&xtime_lock);
1242 1239
1243 if (err == APM_NO_ERROR) 1240 if (err == APM_NO_ERROR)
1244 err = APM_SUCCESS; 1241 err = APM_SUCCESS;
@@ -1365,9 +1362,7 @@ static void check_events(void)
1365 ignore_bounce = 1; 1362 ignore_bounce = 1;
1366 if ((event != APM_NORMAL_RESUME) 1363 if ((event != APM_NORMAL_RESUME)
1367 || (ignore_normal_resume == 0)) { 1364 || (ignore_normal_resume == 0)) {
1368 write_seqlock_irq(&xtime_lock);
1369 set_time(); 1365 set_time();
1370 write_sequnlock_irq(&xtime_lock);
1371 device_resume(); 1366 device_resume();
1372 pm_send_all(PM_RESUME, (void *)0); 1367 pm_send_all(PM_RESUME, (void *)0);
1373 queue_event(event, NULL); 1368 queue_event(event, NULL);
@@ -1383,9 +1378,7 @@ static void check_events(void)
1383 break; 1378 break;
1384 1379
1385 case APM_UPDATE_TIME: 1380 case APM_UPDATE_TIME:
1386 write_seqlock_irq(&xtime_lock);
1387 set_time(); 1381 set_time();
1388 write_sequnlock_irq(&xtime_lock);
1389 break; 1382 break;
1390 1383
1391 case APM_CRITICAL_SUSPEND: 1384 case APM_CRITICAL_SUSPEND:
@@ -2339,6 +2332,7 @@ static int __init apm_init(void)
2339 ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD); 2332 ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
2340 if (ret < 0) { 2333 if (ret < 0) {
2341 printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n"); 2334 printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
2335 remove_proc_entry("apm", NULL);
2342 return -ENOMEM; 2336 return -ENOMEM;
2343 } 2337 }
2344 2338
@@ -2348,7 +2342,13 @@ static int __init apm_init(void)
2348 return 0; 2342 return 0;
2349 } 2343 }
2350 2344
2351 misc_register(&apm_device); 2345 /*
2346 * Note we don't actually care if the misc_device cannot be registered.
2347 * this driver can do its job without it, even if userspace can't
2348 * control it. just log the error
2349 */
2350 if (misc_register(&apm_device))
2351 printk(KERN_WARNING "apm: Could not register misc device.\n");
2352 2352
2353 if (HZ != 100) 2353 if (HZ != 100)
2354 idle_period = (idle_period * HZ) / 100; 2354 idle_period = (idle_period * HZ) / 100;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index 169ac8e0db68..0b61eed8bbd8 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -243,7 +243,7 @@ static DEFINE_SPINLOCK(set_atomicity_lock);
243 * has been called. 243 * has been called.
244 */ 244 */
245 245
246static void prepare_set(void) 246static void prepare_set(void) __acquires(set_atomicity_lock)
247{ 247{
248 unsigned long cr0; 248 unsigned long cr0;
249 249
@@ -274,7 +274,7 @@ static void prepare_set(void)
274 mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi); 274 mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
275} 275}
276 276
277static void post_set(void) 277static void post_set(void) __releases(set_atomicity_lock)
278{ 278{
279 /* Flush TLBs (no need to flush caches - they are disabled) */ 279 /* Flush TLBs (no need to flush caches - they are disabled) */
280 __flush_tlb(); 280 __flush_tlb();
diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S
index d3ee73a3eee3..ef00bb77d7e4 100644
--- a/arch/i386/kernel/efi_stub.S
+++ b/arch/i386/kernel/efi_stub.S
@@ -7,7 +7,6 @@
7 7
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/page.h> 9#include <asm/page.h>
10#include <asm/pgtable.h>
11 10
12/* 11/*
13 * efi_call_phys(void *, ...) is a function with variable parameters. 12 * efi_call_phys(void *, ...) is a function with variable parameters.
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 54cfeabbc5e4..84278e0093a2 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -145,14 +145,10 @@ real_mode_gdt_entries [3] =
145 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ 145 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
146}; 146};
147 147
148static struct 148static struct Xgt_desc_struct
149{ 149real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
150 unsigned short size __attribute__ ((packed)); 150real_mode_idt = { 0x3ff, 0 },
151 unsigned long long * base __attribute__ ((packed)); 151no_idt = { 0, 0 };
152}
153real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
154real_mode_idt = { 0x3ff, NULL },
155no_idt = { 0, NULL };
156 152
157 153
158/* This is 16-bit protected mode code to disable paging and the cache, 154/* This is 16-bit protected mode code to disable paging and the cache,
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index f1682206d304..16d99444cf66 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -53,6 +53,7 @@
53#include <asm/apic.h> 53#include <asm/apic.h>
54#include <asm/e820.h> 54#include <asm/e820.h>
55#include <asm/mpspec.h> 55#include <asm/mpspec.h>
56#include <asm/mmzone.h>
56#include <asm/setup.h> 57#include <asm/setup.h>
57#include <asm/arch_hooks.h> 58#include <asm/arch_hooks.h>
58#include <asm/sections.h> 59#include <asm/sections.h>
@@ -934,6 +935,24 @@ static void __init parse_cmdline_early (char ** cmdline_p)
934} 935}
935 936
936/* 937/*
938 * reservetop=size reserves a hole at the top of the kernel address space which
939 * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
940 * so relocating the fixmap can be done before paging initialization.
941 */
942static int __init parse_reservetop(char *arg)
943{
944 unsigned long address;
945
946 if (!arg)
947 return -EINVAL;
948
949 address = memparse(arg, &arg);
950 reserve_top_address(address);
951 return 0;
952}
953early_param("reservetop", parse_reservetop);
954
955/*
937 * Callback for efi_memory_walk. 956 * Callback for efi_memory_walk.
938 */ 957 */
939static int __init 958static int __init
@@ -1181,7 +1200,7 @@ static unsigned long __init setup_memory(void)
1181 1200
1182void __init zone_sizes_init(void) 1201void __init zone_sizes_init(void)
1183{ 1202{
1184 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 1203 unsigned long zones_size[MAX_NR_ZONES] = { 0, };
1185 unsigned int max_dma, low; 1204 unsigned int max_dma, low;
1186 1205
1187 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; 1206 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
@@ -1258,7 +1277,7 @@ void __init setup_bootmem_allocator(void)
1258 */ 1277 */
1259 find_smp_config(); 1278 find_smp_config();
1260#endif 1279#endif
1261 1280 numa_kva_reserve();
1262#ifdef CONFIG_BLK_DEV_INITRD 1281#ifdef CONFIG_BLK_DEV_INITRD
1263 if (LOADER_TYPE && INITRD_START) { 1282 if (LOADER_TYPE && INITRD_START) {
1264 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { 1283 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index c10789d7a9d3..465188e2d701 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -634,3 +634,69 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
634 } 634 }
635} 635}
636 636
637/*
638 * this function sends a 'generic call function' IPI to one other CPU
639 * in the system.
640 *
641 * cpu is a standard Linux logical CPU number.
642 */
643static void
644__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
645 int nonatomic, int wait)
646{
647 struct call_data_struct data;
648 int cpus = 1;
649
650 data.func = func;
651 data.info = info;
652 atomic_set(&data.started, 0);
653 data.wait = wait;
654 if (wait)
655 atomic_set(&data.finished, 0);
656
657 call_data = &data;
658 wmb();
659 /* Send a message to all other CPUs and wait for them to respond */
660 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
661
662 /* Wait for response */
663 while (atomic_read(&data.started) != cpus)
664 cpu_relax();
665
666 if (!wait)
667 return;
668
669 while (atomic_read(&data.finished) != cpus)
670 cpu_relax();
671}
672
673/*
674 * smp_call_function_single - Run a function on another CPU
675 * @func: The function to run. This must be fast and non-blocking.
676 * @info: An arbitrary pointer to pass to the function.
677 * @nonatomic: Currently unused.
678 * @wait: If true, wait until function has completed on other CPUs.
679 *
680 * Retrurns 0 on success, else a negative status code.
681 *
682 * Does not return until the remote CPU is nearly ready to execute <func>
683 * or is or has executed.
684 */
685
686int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
687 int nonatomic, int wait)
688{
689 /* prevent preemption and reschedule on another processor */
690 int me = get_cpu();
691 if (cpu == me) {
692 WARN_ON(1);
693 put_cpu();
694 return -EBUSY;
695 }
696 spin_lock_bh(&call_lock);
697 __smp_call_function_single(cpu, func, info, nonatomic, wait);
698 spin_unlock_bh(&call_lock);
699 put_cpu();
700 return 0;
701}
702EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index f948419c888a..efe07990e7fc 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -642,9 +642,13 @@ static void map_cpu_to_logical_apicid(void)
642{ 642{
643 int cpu = smp_processor_id(); 643 int cpu = smp_processor_id();
644 int apicid = logical_smp_processor_id(); 644 int apicid = logical_smp_processor_id();
645 int node = apicid_to_node(apicid);
646
647 if (!node_online(node))
648 node = first_online_node;
645 649
646 cpu_2_logical_apicid[cpu] = apicid; 650 cpu_2_logical_apicid[cpu] = apicid;
647 map_cpu_to_node(cpu, apicid_to_node(apicid)); 651 map_cpu_to_node(cpu, node);
648} 652}
649 653
650static void unmap_cpu_to_logical_apicid(int cpu) 654static void unmap_cpu_to_logical_apicid(int cpu)
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
index b1809c9a0899..83db411b3aa7 100644
--- a/arch/i386/kernel/srat.c
+++ b/arch/i386/kernel/srat.c
@@ -42,7 +42,7 @@
42#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 42#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
43static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ 43static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
44 44
45#define MAX_CHUNKS_PER_NODE 4 45#define MAX_CHUNKS_PER_NODE 3
46#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) 46#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
47struct node_memory_chunk_s { 47struct node_memory_chunk_s {
48 unsigned long start_pfn; 48 unsigned long start_pfn;
@@ -135,9 +135,6 @@ static void __init parse_memory_affinity_structure (char *sratp)
135 "enabled and removable" : "enabled" ) ); 135 "enabled and removable" : "enabled" ) );
136} 136}
137 137
138#if MAX_NR_ZONES != 4
139#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
140#endif
141/* Take a chunk of pages from page frame cstart to cend and count the number 138/* Take a chunk of pages from page frame cstart to cend and count the number
142 * of pages in each zone, returned via zones[]. 139 * of pages in each zone, returned via zones[].
143 */ 140 */
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index edd00f6cee37..1302e4ab3c4f 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -270,16 +270,19 @@ void notify_arch_cmos_timer(void)
270 mod_timer(&sync_cmos_timer, jiffies + 1); 270 mod_timer(&sync_cmos_timer, jiffies + 1);
271} 271}
272 272
273static long clock_cmos_diff, sleep_start; 273static long clock_cmos_diff;
274static unsigned long sleep_start;
274 275
275static int timer_suspend(struct sys_device *dev, pm_message_t state) 276static int timer_suspend(struct sys_device *dev, pm_message_t state)
276{ 277{
277 /* 278 /*
278 * Estimate time zone so that set_time can update the clock 279 * Estimate time zone so that set_time can update the clock
279 */ 280 */
280 clock_cmos_diff = -get_cmos_time(); 281 unsigned long ctime = get_cmos_time();
282
283 clock_cmos_diff = -ctime;
281 clock_cmos_diff += get_seconds(); 284 clock_cmos_diff += get_seconds();
282 sleep_start = get_cmos_time(); 285 sleep_start = ctime;
283 return 0; 286 return 0;
284} 287}
285 288
@@ -287,18 +290,29 @@ static int timer_resume(struct sys_device *dev)
287{ 290{
288 unsigned long flags; 291 unsigned long flags;
289 unsigned long sec; 292 unsigned long sec;
290 unsigned long sleep_length; 293 unsigned long ctime = get_cmos_time();
291 294 long sleep_length = (ctime - sleep_start) * HZ;
295 struct timespec ts;
296
297 if (sleep_length < 0) {
298 printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
299 /* The time after the resume must not be earlier than the time
300 * before the suspend or some nasty things will happen
301 */
302 sleep_length = 0;
303 ctime = sleep_start;
304 }
292#ifdef CONFIG_HPET_TIMER 305#ifdef CONFIG_HPET_TIMER
293 if (is_hpet_enabled()) 306 if (is_hpet_enabled())
294 hpet_reenable(); 307 hpet_reenable();
295#endif 308#endif
296 setup_pit_timer(); 309 setup_pit_timer();
297 sec = get_cmos_time() + clock_cmos_diff; 310
298 sleep_length = (get_cmos_time() - sleep_start) * HZ; 311 sec = ctime + clock_cmos_diff;
312 ts.tv_sec = sec;
313 ts.tv_nsec = 0;
314 do_settimeofday(&ts);
299 write_seqlock_irqsave(&xtime_lock, flags); 315 write_seqlock_irqsave(&xtime_lock, flags);
300 xtime.tv_sec = sec;
301 xtime.tv_nsec = 0;
302 jiffies_64 += sleep_length; 316 jiffies_64 += sleep_length;
303 wall_jiffies += sleep_length; 317 wall_jiffies += sleep_length;
304 write_sequnlock_irqrestore(&xtime_lock, flags); 318 write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -334,10 +348,11 @@ extern void (*late_time_init)(void);
334/* Duplicate of time_init() below, with hpet_enable part added */ 348/* Duplicate of time_init() below, with hpet_enable part added */
335static void __init hpet_time_init(void) 349static void __init hpet_time_init(void)
336{ 350{
337 xtime.tv_sec = get_cmos_time(); 351 struct timespec ts;
338 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); 352 ts.tv_sec = get_cmos_time();
339 set_normalized_timespec(&wall_to_monotonic, 353 ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
340 -xtime.tv_sec, -xtime.tv_nsec); 354
355 do_settimeofday(&ts);
341 356
342 if ((hpet_enable() >= 0) && hpet_use_timer) { 357 if ((hpet_enable() >= 0) && hpet_use_timer) {
343 printk("Using HPET for base-timer\n"); 358 printk("Using HPET for base-timer\n");
@@ -349,6 +364,7 @@ static void __init hpet_time_init(void)
349 364
350void __init time_init(void) 365void __init time_init(void)
351{ 366{
367 struct timespec ts;
352#ifdef CONFIG_HPET_TIMER 368#ifdef CONFIG_HPET_TIMER
353 if (is_hpet_capable()) { 369 if (is_hpet_capable()) {
354 /* 370 /*
@@ -359,10 +375,10 @@ void __init time_init(void)
359 return; 375 return;
360 } 376 }
361#endif 377#endif
362 xtime.tv_sec = get_cmos_time(); 378 ts.tv_sec = get_cmos_time();
363 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); 379 ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
364 set_normalized_timespec(&wall_to_monotonic, 380
365 -xtime.tv_sec, -xtime.tv_nsec); 381 do_settimeofday(&ts);
366 382
367 time_init_hook(); 383 time_init_hook();
368} 384}
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 14a1376fedd1..6bf14a4e995e 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -301,23 +301,25 @@ int hpet_rtc_timer_init(void)
301 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; 301 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
302 302
303 local_irq_save(flags); 303 local_irq_save(flags);
304
304 cnt = hpet_readl(HPET_COUNTER); 305 cnt = hpet_readl(HPET_COUNTER);
305 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); 306 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
306 hpet_writel(cnt, HPET_T1_CMP); 307 hpet_writel(cnt, HPET_T1_CMP);
307 hpet_t1_cmp = cnt; 308 hpet_t1_cmp = cnt;
308 local_irq_restore(flags);
309 309
310 cfg = hpet_readl(HPET_T1_CFG); 310 cfg = hpet_readl(HPET_T1_CFG);
311 cfg &= ~HPET_TN_PERIODIC; 311 cfg &= ~HPET_TN_PERIODIC;
312 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; 312 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
313 hpet_writel(cfg, HPET_T1_CFG); 313 hpet_writel(cfg, HPET_T1_CFG);
314 314
315 local_irq_restore(flags);
316
315 return 1; 317 return 1;
316} 318}
317 319
318static void hpet_rtc_timer_reinit(void) 320static void hpet_rtc_timer_reinit(void)
319{ 321{
320 unsigned int cfg, cnt; 322 unsigned int cfg, cnt, ticks_per_int, lost_ints;
321 323
322 if (unlikely(!(PIE_on | AIE_on | UIE_on))) { 324 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
323 cfg = hpet_readl(HPET_T1_CFG); 325 cfg = hpet_readl(HPET_T1_CFG);
@@ -332,10 +334,33 @@ static void hpet_rtc_timer_reinit(void)
332 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; 334 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
333 335
334 /* It is more accurate to use the comparator value than current count.*/ 336 /* It is more accurate to use the comparator value than current count.*/
335 cnt = hpet_t1_cmp; 337 ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
336 cnt += hpet_tick*HZ/hpet_rtc_int_freq; 338 hpet_t1_cmp += ticks_per_int;
337 hpet_writel(cnt, HPET_T1_CMP); 339 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
338 hpet_t1_cmp = cnt; 340
341 /*
342 * If the interrupt handler was delayed too long, the write above tries
343 * to schedule the next interrupt in the past and the hardware would
344 * not interrupt until the counter had wrapped around.
345 * So we have to check that the comparator wasn't set to a past time.
346 */
347 cnt = hpet_readl(HPET_COUNTER);
348 if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
349 lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
350 /* Make sure that, even with the time needed to execute
351 * this code, the next scheduled interrupt has been moved
352 * back to the future: */
353 lost_ints++;
354
355 hpet_t1_cmp += lost_ints * ticks_per_int;
356 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
357
358 if (PIE_on)
359 PIE_count += lost_ints;
360
361 printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
362 hpet_rtc_int_freq);
363 }
339} 364}
340 365
341/* 366/*
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 7e9edafffd8a..4fcc6690be99 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -313,6 +313,8 @@ void show_registers(struct pt_regs *regs)
313 */ 313 */
314 if (in_kernel) { 314 if (in_kernel) {
315 u8 __user *eip; 315 u8 __user *eip;
316 int code_bytes = 64;
317 unsigned char c;
316 318
317 printk("\n" KERN_EMERG "Stack: "); 319 printk("\n" KERN_EMERG "Stack: ");
318 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); 320 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
@@ -320,9 +322,12 @@ void show_registers(struct pt_regs *regs)
320 printk(KERN_EMERG "Code: "); 322 printk(KERN_EMERG "Code: ");
321 323
322 eip = (u8 __user *)regs->eip - 43; 324 eip = (u8 __user *)regs->eip - 43;
323 for (i = 0; i < 64; i++, eip++) { 325 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
324 unsigned char c; 326 /* try starting at EIP */
325 327 eip = (u8 __user *)regs->eip;
328 code_bytes = 32;
329 }
330 for (i = 0; i < code_bytes; i++, eip++) {
326 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { 331 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
327 printk(" Bad EIP value."); 332 printk(" Bad EIP value.");
328 break; 333 break;
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 2d4f1386e2b1..1e7ac1c44ddc 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
13OUTPUT_ARCH(i386) 13OUTPUT_ARCH(i386)
14ENTRY(phys_startup_32) 14ENTRY(phys_startup_32)
15jiffies = jiffies_64; 15jiffies = jiffies_64;
16
17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */
20 note PT_NOTE FLAGS(4); /* R__ */
21}
16SECTIONS 22SECTIONS
17{ 23{
18 . = __KERNEL_START; 24 . = __KERNEL_START;
@@ -26,7 +32,7 @@ SECTIONS
26 KPROBES_TEXT 32 KPROBES_TEXT
27 *(.fixup) 33 *(.fixup)
28 *(.gnu.warning) 34 *(.gnu.warning)
29 } = 0x9090 35 } :text = 0x9090
30 36
31 _etext = .; /* End of text section */ 37 _etext = .; /* End of text section */
32 38
@@ -48,7 +54,7 @@ SECTIONS
48 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ 54 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
49 *(.data) 55 *(.data)
50 CONSTRUCTORS 56 CONSTRUCTORS
51 } 57 } :data
52 58
53 . = ALIGN(4096); 59 . = ALIGN(4096);
54 __nosave_begin = .; 60 __nosave_begin = .;
@@ -184,4 +190,6 @@ SECTIONS
184 STABS_DEBUG 190 STABS_DEBUG
185 191
186 DWARF_DEBUG 192 DWARF_DEBUG
193
194 NOTES
187} 195}
diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c
index 50f6de6ff64d..f39887359e8e 100644
--- a/arch/i386/mach-voyager/voyager_thread.c
+++ b/arch/i386/mach-voyager/voyager_thread.c
@@ -130,7 +130,6 @@ thread(void *unused)
130 init_timer(&wakeup_timer); 130 init_timer(&wakeup_timer);
131 131
132 sigfillset(&current->blocked); 132 sigfillset(&current->blocked);
133 current->signal->tty = NULL;
134 133
135 printk(KERN_NOTICE "Voyager starting monitor thread\n"); 134 printk(KERN_NOTICE "Voyager starting monitor thread\n");
136 135
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
index 5d44f4f5ff59..4de11f508c3a 100644
--- a/arch/i386/mm/boot_ioremap.c
+++ b/arch/i386/mm/boot_ioremap.c
@@ -29,8 +29,11 @@
29 */ 29 */
30 30
31#define BOOT_PTE_PTRS (PTRS_PER_PTE*2) 31#define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
32#define boot_pte_index(address) \ 32
33 (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1)) 33static unsigned long boot_pte_index(unsigned long vaddr)
34{
35 return __pa(vaddr) >> PAGE_SHIFT;
36}
34 37
35static inline boot_pte_t* boot_vaddr_to_pte(void *address) 38static inline boot_pte_t* boot_vaddr_to_pte(void *address)
36{ 39{
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 7c392dc553b8..fb5d8b747de4 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
117 117
118void *node_remap_end_vaddr[MAX_NUMNODES]; 118void *node_remap_end_vaddr[MAX_NUMNODES];
119void *node_remap_alloc_vaddr[MAX_NUMNODES]; 119void *node_remap_alloc_vaddr[MAX_NUMNODES];
120 120static unsigned long kva_start_pfn;
121static unsigned long kva_pages;
121/* 122/*
122 * FLAT - support for basic PC memory model with discontig enabled, essentially 123 * FLAT - support for basic PC memory model with discontig enabled, essentially
123 * a single node with all available processors in it with a flat 124 * a single node with all available processors in it with a flat
@@ -286,7 +287,6 @@ unsigned long __init setup_memory(void)
286{ 287{
287 int nid; 288 int nid;
288 unsigned long system_start_pfn, system_max_low_pfn; 289 unsigned long system_start_pfn, system_max_low_pfn;
289 unsigned long reserve_pages;
290 290
291 /* 291 /*
292 * When mapping a NUMA machine we allocate the node_mem_map arrays 292 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -298,14 +298,23 @@ unsigned long __init setup_memory(void)
298 find_max_pfn(); 298 find_max_pfn();
299 get_memcfg_numa(); 299 get_memcfg_numa();
300 300
301 reserve_pages = calculate_numa_remap_pages(); 301 kva_pages = calculate_numa_remap_pages();
302 302
303 /* partially used pages are not usable - thus round upwards */ 303 /* partially used pages are not usable - thus round upwards */
304 system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); 304 system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
305 305
306 system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; 306 kva_start_pfn = find_max_low_pfn() - kva_pages;
307 printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", 307
308 reserve_pages, max_low_pfn + reserve_pages); 308#ifdef CONFIG_BLK_DEV_INITRD
309 /* Numa kva area is below the initrd */
310 if (LOADER_TYPE && INITRD_START)
311 kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages;
312#endif
313 kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);
314
315 system_max_low_pfn = max_low_pfn = find_max_low_pfn();
316 printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
317 kva_start_pfn, max_low_pfn);
309 printk("max_pfn = %ld\n", max_pfn); 318 printk("max_pfn = %ld\n", max_pfn);
310#ifdef CONFIG_HIGHMEM 319#ifdef CONFIG_HIGHMEM
311 highstart_pfn = highend_pfn = max_pfn; 320 highstart_pfn = highend_pfn = max_pfn;
@@ -323,7 +332,7 @@ unsigned long __init setup_memory(void)
323 (ulong) pfn_to_kaddr(max_low_pfn)); 332 (ulong) pfn_to_kaddr(max_low_pfn));
324 for_each_online_node(nid) { 333 for_each_online_node(nid) {
325 node_remap_start_vaddr[nid] = pfn_to_kaddr( 334 node_remap_start_vaddr[nid] = pfn_to_kaddr(
326 highstart_pfn + node_remap_offset[nid]); 335 kva_start_pfn + node_remap_offset[nid]);
327 /* Init the node remap allocator */ 336 /* Init the node remap allocator */
328 node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + 337 node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
329 (node_remap_size[nid] * PAGE_SIZE); 338 (node_remap_size[nid] * PAGE_SIZE);
@@ -338,7 +347,6 @@ unsigned long __init setup_memory(void)
338 } 347 }
339 printk("High memory starts at vaddr %08lx\n", 348 printk("High memory starts at vaddr %08lx\n",
340 (ulong) pfn_to_kaddr(highstart_pfn)); 349 (ulong) pfn_to_kaddr(highstart_pfn));
341 vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
342 for_each_online_node(nid) 350 for_each_online_node(nid)
343 find_max_pfn_node(nid); 351 find_max_pfn_node(nid);
344 352
@@ -348,13 +356,18 @@ unsigned long __init setup_memory(void)
348 return max_low_pfn; 356 return max_low_pfn;
349} 357}
350 358
359void __init numa_kva_reserve(void)
360{
361 reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages));
362}
363
351void __init zone_sizes_init(void) 364void __init zone_sizes_init(void)
352{ 365{
353 int nid; 366 int nid;
354 367
355 368
356 for_each_online_node(nid) { 369 for_each_online_node(nid) {
357 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 370 unsigned long zones_size[MAX_NR_ZONES] = {0, };
358 unsigned long *zholes_size; 371 unsigned long *zholes_size;
359 unsigned int max_dma; 372 unsigned int max_dma;
360 373
@@ -409,7 +422,7 @@ void __init set_highmem_pages_init(int bad_ppro)
409 zone_end_pfn = zone_start_pfn + zone->spanned_pages; 422 zone_end_pfn = zone_start_pfn + zone->spanned_pages;
410 423
411 printk("Initializing %s for node %d (%08lx:%08lx)\n", 424 printk("Initializing %s for node %d (%08lx:%08lx)\n",
412 zone->name, zone->zone_pgdat->node_id, 425 zone->name, zone_to_nid(zone),
413 zone_start_pfn, zone_end_pfn); 426 zone_start_pfn, zone_end_pfn);
414 427
415 for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { 428 for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 89e8486aac34..efd0bcdac65d 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -629,6 +629,48 @@ void __init mem_init(void)
629 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 629 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
630 ); 630 );
631 631
632#if 1 /* double-sanity-check paranoia */
633 printk("virtual kernel memory layout:\n"
634 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
635#ifdef CONFIG_HIGHMEM
636 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
637#endif
638 " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
639 " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
640 " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
641 " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
642 " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
643 FIXADDR_START, FIXADDR_TOP,
644 (FIXADDR_TOP - FIXADDR_START) >> 10,
645
646#ifdef CONFIG_HIGHMEM
647 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
648 (LAST_PKMAP*PAGE_SIZE) >> 10,
649#endif
650
651 VMALLOC_START, VMALLOC_END,
652 (VMALLOC_END - VMALLOC_START) >> 20,
653
654 (unsigned long)__va(0), (unsigned long)high_memory,
655 ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
656
657 (unsigned long)&__init_begin, (unsigned long)&__init_end,
658 ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
659
660 (unsigned long)&_etext, (unsigned long)&_edata,
661 ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
662
663 (unsigned long)&_text, (unsigned long)&_etext,
664 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
665
666#ifdef CONFIG_HIGHMEM
667 BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
668 BUG_ON(VMALLOC_END > PKMAP_BASE);
669#endif
670 BUG_ON(VMALLOC_START > VMALLOC_END);
671 BUG_ON((unsigned long)high_memory > VMALLOC_START);
672#endif /* double-sanity-check paranoia */
673
632#ifdef CONFIG_X86_PAE 674#ifdef CONFIG_X86_PAE
633 if (!cpu_has_pae) 675 if (!cpu_has_pae)
634 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); 676 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
@@ -657,7 +699,7 @@ void __init mem_init(void)
657int arch_add_memory(int nid, u64 start, u64 size) 699int arch_add_memory(int nid, u64 start, u64 size)
658{ 700{
659 struct pglist_data *pgdata = &contig_page_data; 701 struct pglist_data *pgdata = &contig_page_data;
660 struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; 702 struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
661 unsigned long start_pfn = start >> PAGE_SHIFT; 703 unsigned long start_pfn = start >> PAGE_SHIFT;
662 unsigned long nr_pages = size >> PAGE_SHIFT; 704 unsigned long nr_pages = size >> PAGE_SHIFT;
663 705
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index bd98768d8764..10126e3f8174 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -12,6 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/module.h>
15 16
16#include <asm/system.h> 17#include <asm/system.h>
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
@@ -60,7 +61,9 @@ void show_mem(void)
60 printk(KERN_INFO "%lu pages writeback\n", 61 printk(KERN_INFO "%lu pages writeback\n",
61 global_page_state(NR_WRITEBACK)); 62 global_page_state(NR_WRITEBACK));
62 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); 63 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
63 printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB)); 64 printk(KERN_INFO "%lu pages slab\n",
65 global_page_state(NR_SLAB_RECLAIMABLE) +
66 global_page_state(NR_SLAB_UNRECLAIMABLE));
64 printk(KERN_INFO "%lu pages pagetables\n", 67 printk(KERN_INFO "%lu pages pagetables\n",
65 global_page_state(NR_PAGETABLE)); 68 global_page_state(NR_PAGETABLE));
66} 69}
@@ -137,6 +140,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
137 __flush_tlb_one(vaddr); 140 __flush_tlb_one(vaddr);
138} 141}
139 142
143static int fixmaps;
144#ifndef CONFIG_COMPAT_VDSO
145unsigned long __FIXADDR_TOP = 0xfffff000;
146EXPORT_SYMBOL(__FIXADDR_TOP);
147#endif
148
140void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 149void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
141{ 150{
142 unsigned long address = __fix_to_virt(idx); 151 unsigned long address = __fix_to_virt(idx);
@@ -146,6 +155,25 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
146 return; 155 return;
147 } 156 }
148 set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 157 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
158 fixmaps++;
159}
160
161/**
162 * reserve_top_address - reserves a hole in the top of kernel address space
163 * @reserve - size of hole to reserve
164 *
165 * Can be used to relocate the fixmap area and poke a hole in the top
166 * of kernel address space to make room for a hypervisor.
167 */
168void reserve_top_address(unsigned long reserve)
169{
170 BUG_ON(fixmaps > 0);
171#ifdef CONFIG_COMPAT_VDSO
172 BUG_ON(reserve != 0);
173#else
174 __FIXADDR_TOP = -reserve - PAGE_SIZE;
175 __VMALLOC_RESERVE += reserve;
176#endif
149} 177}
150 178
151pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 179pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S
index c893b897217f..8a2b50a0aaad 100644
--- a/arch/i386/power/swsusp.S
+++ b/arch/i386/power/swsusp.S
@@ -32,7 +32,7 @@ ENTRY(swsusp_arch_resume)
32 movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx 32 movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx
33 movl %ecx, %cr3 33 movl %ecx, %cr3
34 34
35 movl pagedir_nosave, %edx 35 movl restore_pblist, %edx
36 .p2align 4,,7 36 .p2align 4,,7
37 37
38copy_loop: 38copy_loop:
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index db274da7dba1..f521f2f60a78 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR
66 bool 66 bool
67 select GENERIC_ALLOCATOR 67 select GENERIC_ALLOCATOR
68 68
69config DMA_IS_DMA32
70 bool
71 default y
72
73config DMA_IS_NORMAL
74 bool
75 depends on IA64_SGI_SN2
76 default y
77
78config AUDIT_ARCH 69config AUDIT_ARCH
79 bool 70 bool
80 default y 71 default y
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 0176556aeecc..32c3abededc6 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -771,16 +771,19 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
771{ 771{
772#ifdef CONFIG_ACPI_NUMA 772#ifdef CONFIG_ACPI_NUMA
773 int pxm_id; 773 int pxm_id;
774 int nid;
774 775
775 pxm_id = acpi_get_pxm(handle); 776 pxm_id = acpi_get_pxm(handle);
776
777 /* 777 /*
778 * Assuming that the container driver would have set the proximity 778 * We don't have cpu-only-node hotadd. But if the system equips
779 * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag 779 * SRAT table, pxm is already found and node is ready.
780 * So, just pxm_to_nid(pxm) is OK.
781 * This code here is for the system which doesn't have full SRAT
782 * table for possible cpus.
780 */ 783 */
781 node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); 784 nid = acpi_map_pxm_to_node(pxm_id);
782
783 node_cpuid[cpu].phys_id = physid; 785 node_cpuid[cpu].phys_id = physid;
786 node_cpuid[cpu].nid = nid;
784#endif 787#endif
785 return (0); 788 return (0);
786} 789}
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 1cc360c83e7a..20340631179f 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -29,6 +29,36 @@ EXPORT_SYMBOL(cpu_to_node_map);
29 29
30cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; 30cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
31 31
32void __cpuinit map_cpu_to_node(int cpu, int nid)
33{
34 int oldnid;
35 if (nid < 0) { /* just initialize by zero */
36 cpu_to_node_map[cpu] = 0;
37 return;
38 }
39 /* sanity check first */
40 oldnid = cpu_to_node_map[cpu];
41 if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
42 return; /* nothing to do */
43 }
44 /* we don't have cpu-driven node hot add yet...
45 In usual case, node is created from SRAT at boot time. */
46 if (!node_online(nid))
47 nid = first_online_node;
48 cpu_to_node_map[cpu] = nid;
49 cpu_set(cpu, node_to_cpu_mask[nid]);
50 return;
51}
52
53void __cpuinit unmap_cpu_from_node(int cpu, int nid)
54{
55 WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
56 WARN_ON(cpu_to_node_map[cpu] != nid);
57 cpu_to_node_map[cpu] = 0;
58 cpu_clear(cpu, node_to_cpu_mask[nid]);
59}
60
61
32/** 62/**
33 * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays 63 * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
34 * 64 *
@@ -49,8 +79,6 @@ void __init build_cpu_to_node_map(void)
49 node = node_cpuid[i].nid; 79 node = node_cpuid[i].nid;
50 break; 80 break;
51 } 81 }
52 cpu_to_node_map[cpu] = (node >= 0) ? node : 0; 82 map_cpu_to_node(cpu, node);
53 if (node >= 0)
54 cpu_set(cpu, node_to_cpu_mask[node]);
55 } 83 }
56} 84}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 84a7e52f56f6..7bb7696e4ce2 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -34,6 +34,7 @@
34#include <linux/file.h> 34#include <linux/file.h>
35#include <linux/poll.h> 35#include <linux/poll.h>
36#include <linux/vfs.h> 36#include <linux/vfs.h>
37#include <linux/smp.h>
37#include <linux/pagemap.h> 38#include <linux/pagemap.h>
38#include <linux/mount.h> 39#include <linux/mount.h>
39#include <linux/bitops.h> 40#include <linux/bitops.h>
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index f648c610b10c..05bdf7affb43 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,6 +36,9 @@ int arch_register_cpu(int num)
36 */ 36 */
37 if (!can_cpei_retarget() && is_cpu_cpei_target(num)) 37 if (!can_cpei_retarget() && is_cpu_cpei_target(num))
38 sysfs_cpus[num].cpu.no_control = 1; 38 sysfs_cpus[num].cpu.no_control = 1;
39#ifdef CONFIG_NUMA
40 map_cpu_to_node(num, node_cpuid[num].nid);
41#endif
39#endif 42#endif
40 43
41 return register_cpu(&sysfs_cpus[num].cpu, num); 44 return register_cpu(&sysfs_cpus[num].cpu, num);
@@ -45,7 +48,8 @@ int arch_register_cpu(int num)
45 48
46void arch_unregister_cpu(int num) 49void arch_unregister_cpu(int num)
47{ 50{
48 return unregister_cpu(&sysfs_cpus[num].cpu); 51 unregister_cpu(&sysfs_cpus[num].cpu);
52 unmap_cpu_from_node(num, cpu_to_node(num));
49} 53}
50EXPORT_SYMBOL(arch_register_cpu); 54EXPORT_SYMBOL(arch_register_cpu);
51EXPORT_SYMBOL(arch_unregister_cpu); 55EXPORT_SYMBOL(arch_unregister_cpu);
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 4c73a6763669..c58e933694d5 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
98 98
99 /* attempt to allocate a granule's worth of cached memory pages */ 99 /* attempt to allocate a granule's worth of cached memory pages */
100 100
101 page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO, 101 page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
102 IA64_GRANULE_SHIFT-PAGE_SHIFT); 102 IA64_GRANULE_SHIFT-PAGE_SHIFT);
103 if (!page) { 103 if (!page) {
104 mutex_unlock(&uc_pool->add_chunk_mutex); 104 mutex_unlock(&uc_pool->add_chunk_mutex);
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 9a8a29339d2d..b632b9c1e3b3 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -32,9 +32,10 @@
32#include <linux/cpumask.h> 32#include <linux/cpumask.h>
33#include <linux/smp_lock.h> 33#include <linux/smp_lock.h>
34#include <linux/nodemask.h> 34#include <linux/nodemask.h>
35#include <linux/smp.h>
36
35#include <asm/processor.h> 37#include <asm/processor.h>
36#include <asm/topology.h> 38#include <asm/topology.h>
37#include <asm/smp.h>
38#include <asm/semaphore.h> 39#include <asm/semaphore.h>
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40#include <asm/sal.h> 41#include <asm/sal.h>
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index b71348fec1f4..bbd97c85bc5d 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -100,7 +100,7 @@ void free_initrd_mem(unsigned long, unsigned long);
100#ifndef CONFIG_DISCONTIGMEM 100#ifndef CONFIG_DISCONTIGMEM
101unsigned long __init zone_sizes_init(void) 101unsigned long __init zone_sizes_init(void)
102{ 102{
103 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 103 unsigned long zones_size[MAX_NR_ZONES] = {0, };
104 unsigned long max_dma; 104 unsigned long max_dma;
105 unsigned long low; 105 unsigned long low;
106 unsigned long start_pfn; 106 unsigned long start_pfn;
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index e4c233eef195..06e538d1be3a 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -136,7 +136,7 @@ void paging_init(void)
136#endif 136#endif
137 137
138 { 138 {
139 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 139 unsigned long zones_size[MAX_NR_ZONES] = {0, };
140 140
141 zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; 141 zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT;
142 zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT; 142 zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
diff --git a/arch/mips/au1000/common/dbdma.c b/arch/mips/au1000/common/dbdma.c
index 98244d51c154..c4fae8ff4671 100644
--- a/arch/mips/au1000/common/dbdma.c
+++ b/arch/mips/au1000/common/dbdma.c
@@ -230,7 +230,7 @@ EXPORT_SYMBOL(au1xxx_ddma_add_device);
230*/ 230*/
231u32 231u32
232au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid, 232au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
233 void (*callback)(int, void *, struct pt_regs *), void *callparam) 233 void (*callback)(int, void *), void *callparam)
234{ 234{
235 unsigned long flags; 235 unsigned long flags;
236 u32 used, chan, rv; 236 u32 used, chan, rv;
@@ -248,8 +248,10 @@ au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
248 au1xxx_dbdma_init(); 248 au1xxx_dbdma_init();
249 dbdma_initialized = 1; 249 dbdma_initialized = 1;
250 250
251 if ((stp = find_dbdev_id(srcid)) == NULL) return 0; 251 if ((stp = find_dbdev_id(srcid)) == NULL)
252 if ((dtp = find_dbdev_id(destid)) == NULL) return 0; 252 return 0;
253 if ((dtp = find_dbdev_id(destid)) == NULL)
254 return 0;
253 255
254 used = 0; 256 used = 0;
255 rv = 0; 257 rv = 0;
@@ -869,7 +871,7 @@ dbdma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
869 au_sync(); 871 au_sync();
870 872
871 if (ctp->chan_callback) 873 if (ctp->chan_callback)
872 (ctp->chan_callback)(irq, ctp->chan_callparam, regs); 874 (ctp->chan_callback)(irq, ctp->chan_callparam);
873 875
874 ctp->cur_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr)); 876 ctp->cur_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr));
875 return IRQ_RETVAL(1); 877 return IRQ_RETVAL(1);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index c52497bb102a..5b06349af2d5 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -163,10 +163,10 @@ static int __init page_is_ram(unsigned long pagenr)
163 163
164void __init paging_init(void) 164void __init paging_init(void)
165{ 165{
166 unsigned long zones_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 }; 166 unsigned long zones_size[] = { 0, };
167 unsigned long max_dma, high, low; 167 unsigned long max_dma, high, low;
168#ifndef CONFIG_FLATMEM 168#ifndef CONFIG_FLATMEM
169 unsigned long zholes_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 }; 169 unsigned long zholes_size[] = { 0, };
170 unsigned long i, j, pfn; 170 unsigned long i, j, pfn;
171#endif 171#endif
172 172
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index efe6971fc800..16e5682b01f1 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -19,6 +19,7 @@
19#include <linux/swap.h> 19#include <linux/swap.h>
20#include <linux/bootmem.h> 20#include <linux/bootmem.h>
21#include <linux/pfn.h> 21#include <linux/pfn.h>
22#include <linux/highmem.h>
22#include <asm/page.h> 23#include <asm/page.h>
23#include <asm/sections.h> 24#include <asm/sections.h>
24 25
@@ -508,7 +509,7 @@ extern unsigned long setup_zero_pages(void);
508 509
509void __init paging_init(void) 510void __init paging_init(void)
510{ 511{
511 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 512 unsigned long zones_size[MAX_NR_ZONES] = {0, };
512 unsigned node; 513 unsigned node;
513 514
514 pagetable_init(); 515 pagetable_init();
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f2b96f1e0da7..25ad28d63e88 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -551,7 +551,7 @@ void show_mem(void)
551 551
552 printk("Zone list for zone %d on node %d: ", j, i); 552 printk("Zone list for zone %d on node %d: ", j, i);
553 for (k = 0; zl->zones[k] != NULL; k++) 553 for (k = 0; zl->zones[k] != NULL; k++)
554 printk("[%d/%s] ", zl->zones[k]->zone_pgdat->node_id, zl->zones[k]->name); 554 printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name);
555 printk("\n"); 555 printk("\n");
556 } 556 }
557 } 557 }
@@ -809,7 +809,7 @@ void __init paging_init(void)
809 flush_tlb_all_local(NULL); 809 flush_tlb_all_local(NULL);
810 810
811 for (i = 0; i < npmem_ranges; i++) { 811 for (i = 0; i < npmem_ranges; i++) {
812 unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0 }; 812 unsigned long zones_size[MAX_NR_ZONES] = { 0, };
813 813
814 /* We have an IOMMU, so all memory can go into a single 814 /* We have an IOMMU, so all memory can go into a single
815 ZONE_DMA zone. */ 815 ZONE_DMA zone. */
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index 7369f9a6ad25..69e8f86aa4f8 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -159,8 +159,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
159 isync 159 isync
160 160
161 /* Load ptr the list of pages to copy in r3 */ 161 /* Load ptr the list of pages to copy in r3 */
162 lis r11,(pagedir_nosave - KERNELBASE)@h 162 lis r11,(restore_pblist - KERNELBASE)@h
163 ori r11,r11,pagedir_nosave@l 163 ori r11,r11,restore_pblist@l
164 lwz r10,0(r11) 164 lwz r10,0(r11)
165 165
166 /* Copy the pages. This is a very basic implementation, to 166 /* Copy the pages. This is a very basic implementation, to
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index ab3b0765a64e..8aea3698a77b 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -117,8 +117,7 @@ static void appldata_get_mem_data(void *data)
117 mem_data->pgpgout = ev[PGPGOUT] >> 1; 117 mem_data->pgpgout = ev[PGPGOUT] >> 1;
118 mem_data->pswpin = ev[PSWPIN]; 118 mem_data->pswpin = ev[PSWPIN];
119 mem_data->pswpout = ev[PSWPOUT]; 119 mem_data->pswpout = ev[PSWPOUT];
120 mem_data->pgalloc = ev[PGALLOC_HIGH] + ev[PGALLOC_NORMAL] + 120 mem_data->pgalloc = ev[PGALLOC_NORMAL] + ev[PGALLOC_DMA];
121 ev[PGALLOC_DMA];
122 mem_data->pgfault = ev[PGFAULT]; 121 mem_data->pgfault = ev[PGFAULT];
123 mem_data->pgmajfault = ev[PGMAJFAULT]; 122 mem_data->pgmajfault = ev[PGMAJFAULT];
124 123
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 786a44dba5bf..607f50ead1fd 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -15,6 +15,8 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/sysctl.h> 16#include <linux/sysctl.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/swap.h>
19#include <linux/kthread.h>
18 20
19#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
20#include <asm/uaccess.h> 22#include <asm/uaccess.h>
@@ -34,18 +36,18 @@ struct cmm_page_array {
34 unsigned long pages[CMM_NR_PAGES]; 36 unsigned long pages[CMM_NR_PAGES];
35}; 37};
36 38
37static long cmm_pages = 0; 39static long cmm_pages;
38static long cmm_timed_pages = 0; 40static long cmm_timed_pages;
39static volatile long cmm_pages_target = 0; 41static volatile long cmm_pages_target;
40static volatile long cmm_timed_pages_target = 0; 42static volatile long cmm_timed_pages_target;
41static long cmm_timeout_pages = 0; 43static long cmm_timeout_pages;
42static long cmm_timeout_seconds = 0; 44static long cmm_timeout_seconds;
43 45
44static struct cmm_page_array *cmm_page_list = NULL; 46static struct cmm_page_array *cmm_page_list;
45static struct cmm_page_array *cmm_timed_page_list = NULL; 47static struct cmm_page_array *cmm_timed_page_list;
48static DEFINE_SPINLOCK(cmm_lock);
46 49
47static unsigned long cmm_thread_active = 0; 50static struct task_struct *cmm_thread_ptr;
48static struct work_struct cmm_thread_starter;
49static wait_queue_head_t cmm_thread_wait; 51static wait_queue_head_t cmm_thread_wait;
50static struct timer_list cmm_timer; 52static struct timer_list cmm_timer;
51 53
@@ -53,71 +55,100 @@ static void cmm_timer_fn(unsigned long);
53static void cmm_set_timer(void); 55static void cmm_set_timer(void);
54 56
55static long 57static long
56cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) 58cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list)
57{ 59{
58 struct cmm_page_array *pa; 60 struct cmm_page_array *pa, *npa;
59 unsigned long page; 61 unsigned long addr;
60 62
61 pa = *list; 63 while (nr) {
62 while (pages) { 64 addr = __get_free_page(GFP_NOIO);
63 page = __get_free_page(GFP_NOIO); 65 if (!addr)
64 if (!page)
65 break; 66 break;
67 spin_lock(&cmm_lock);
68 pa = *list;
66 if (!pa || pa->index >= CMM_NR_PAGES) { 69 if (!pa || pa->index >= CMM_NR_PAGES) {
67 /* Need a new page for the page list. */ 70 /* Need a new page for the page list. */
68 pa = (struct cmm_page_array *) 71 spin_unlock(&cmm_lock);
72 npa = (struct cmm_page_array *)
69 __get_free_page(GFP_NOIO); 73 __get_free_page(GFP_NOIO);
70 if (!pa) { 74 if (!npa) {
71 free_page(page); 75 free_page(addr);
72 break; 76 break;
73 } 77 }
74 pa->next = *list; 78 spin_lock(&cmm_lock);
75 pa->index = 0; 79 pa = *list;
76 *list = pa; 80 if (!pa || pa->index >= CMM_NR_PAGES) {
81 npa->next = pa;
82 npa->index = 0;
83 pa = npa;
84 *list = pa;
85 } else
86 free_page((unsigned long) npa);
77 } 87 }
78 diag10(page); 88 diag10(addr);
79 pa->pages[pa->index++] = page; 89 pa->pages[pa->index++] = addr;
80 (*counter)++; 90 (*counter)++;
81 pages--; 91 spin_unlock(&cmm_lock);
92 nr--;
82 } 93 }
83 return pages; 94 return nr;
84} 95}
85 96
86static void 97static long
87cmm_free_pages(long pages, long *counter, struct cmm_page_array **list) 98cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
88{ 99{
89 struct cmm_page_array *pa; 100 struct cmm_page_array *pa;
90 unsigned long page; 101 unsigned long addr;
91 102
103 spin_lock(&cmm_lock);
92 pa = *list; 104 pa = *list;
93 while (pages) { 105 while (nr) {
94 if (!pa || pa->index <= 0) 106 if (!pa || pa->index <= 0)
95 break; 107 break;
96 page = pa->pages[--pa->index]; 108 addr = pa->pages[--pa->index];
97 if (pa->index == 0) { 109 if (pa->index == 0) {
98 pa = pa->next; 110 pa = pa->next;
99 free_page((unsigned long) *list); 111 free_page((unsigned long) *list);
100 *list = pa; 112 *list = pa;
101 } 113 }
102 free_page(page); 114 free_page(addr);
103 (*counter)--; 115 (*counter)--;
104 pages--; 116 nr--;
105 } 117 }
118 spin_unlock(&cmm_lock);
119 return nr;
106} 120}
107 121
122static int cmm_oom_notify(struct notifier_block *self,
123 unsigned long dummy, void *parm)
124{
125 unsigned long *freed = parm;
126 long nr = 256;
127
128 nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list);
129 if (nr > 0)
130 nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list);
131 cmm_pages_target = cmm_pages;
132 cmm_timed_pages_target = cmm_timed_pages;
133 *freed += 256 - nr;
134 return NOTIFY_OK;
135}
136
137static struct notifier_block cmm_oom_nb = {
138 .notifier_call = cmm_oom_notify
139};
140
108static int 141static int
109cmm_thread(void *dummy) 142cmm_thread(void *dummy)
110{ 143{
111 int rc; 144 int rc;
112 145
113 daemonize("cmmthread");
114 while (1) { 146 while (1) {
115 rc = wait_event_interruptible(cmm_thread_wait, 147 rc = wait_event_interruptible(cmm_thread_wait,
116 (cmm_pages != cmm_pages_target || 148 (cmm_pages != cmm_pages_target ||
117 cmm_timed_pages != cmm_timed_pages_target)); 149 cmm_timed_pages != cmm_timed_pages_target ||
118 if (rc == -ERESTARTSYS) { 150 kthread_should_stop()));
119 /* Got kill signal. End thread. */ 151 if (kthread_should_stop() || rc == -ERESTARTSYS) {
120 clear_bit(0, &cmm_thread_active);
121 cmm_pages_target = cmm_pages; 152 cmm_pages_target = cmm_pages;
122 cmm_timed_pages_target = cmm_timed_pages; 153 cmm_timed_pages_target = cmm_timed_pages;
123 break; 154 break;
@@ -143,16 +174,8 @@ cmm_thread(void *dummy)
143} 174}
144 175
145static void 176static void
146cmm_start_thread(void)
147{
148 kernel_thread(cmm_thread, NULL, 0);
149}
150
151static void
152cmm_kick_thread(void) 177cmm_kick_thread(void)
153{ 178{
154 if (!test_and_set_bit(0, &cmm_thread_active))
155 schedule_work(&cmm_thread_starter);
156 wake_up(&cmm_thread_wait); 179 wake_up(&cmm_thread_wait);
157} 180}
158 181
@@ -177,21 +200,21 @@ cmm_set_timer(void)
177static void 200static void
178cmm_timer_fn(unsigned long ignored) 201cmm_timer_fn(unsigned long ignored)
179{ 202{
180 long pages; 203 long nr;
181 204
182 pages = cmm_timed_pages_target - cmm_timeout_pages; 205 nr = cmm_timed_pages_target - cmm_timeout_pages;
183 if (pages < 0) 206 if (nr < 0)
184 cmm_timed_pages_target = 0; 207 cmm_timed_pages_target = 0;
185 else 208 else
186 cmm_timed_pages_target = pages; 209 cmm_timed_pages_target = nr;
187 cmm_kick_thread(); 210 cmm_kick_thread();
188 cmm_set_timer(); 211 cmm_set_timer();
189} 212}
190 213
191void 214void
192cmm_set_pages(long pages) 215cmm_set_pages(long nr)
193{ 216{
194 cmm_pages_target = pages; 217 cmm_pages_target = nr;
195 cmm_kick_thread(); 218 cmm_kick_thread();
196} 219}
197 220
@@ -202,9 +225,9 @@ cmm_get_pages(void)
202} 225}
203 226
204void 227void
205cmm_add_timed_pages(long pages) 228cmm_add_timed_pages(long nr)
206{ 229{
207 cmm_timed_pages_target += pages; 230 cmm_timed_pages_target += nr;
208 cmm_kick_thread(); 231 cmm_kick_thread();
209} 232}
210 233
@@ -215,9 +238,9 @@ cmm_get_timed_pages(void)
215} 238}
216 239
217void 240void
218cmm_set_timeout(long pages, long seconds) 241cmm_set_timeout(long nr, long seconds)
219{ 242{
220 cmm_timeout_pages = pages; 243 cmm_timeout_pages = nr;
221 cmm_timeout_seconds = seconds; 244 cmm_timeout_seconds = seconds;
222 cmm_set_timer(); 245 cmm_set_timer();
223} 246}
@@ -245,7 +268,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
245 void __user *buffer, size_t *lenp, loff_t *ppos) 268 void __user *buffer, size_t *lenp, loff_t *ppos)
246{ 269{
247 char buf[16], *p; 270 char buf[16], *p;
248 long pages; 271 long nr;
249 int len; 272 int len;
250 273
251 if (!*lenp || (*ppos && !write)) { 274 if (!*lenp || (*ppos && !write)) {
@@ -260,17 +283,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
260 return -EFAULT; 283 return -EFAULT;
261 buf[sizeof(buf) - 1] = '\0'; 284 buf[sizeof(buf) - 1] = '\0';
262 cmm_skip_blanks(buf, &p); 285 cmm_skip_blanks(buf, &p);
263 pages = simple_strtoul(p, &p, 0); 286 nr = simple_strtoul(p, &p, 0);
264 if (ctl == &cmm_table[0]) 287 if (ctl == &cmm_table[0])
265 cmm_set_pages(pages); 288 cmm_set_pages(nr);
266 else 289 else
267 cmm_add_timed_pages(pages); 290 cmm_add_timed_pages(nr);
268 } else { 291 } else {
269 if (ctl == &cmm_table[0]) 292 if (ctl == &cmm_table[0])
270 pages = cmm_get_pages(); 293 nr = cmm_get_pages();
271 else 294 else
272 pages = cmm_get_timed_pages(); 295 nr = cmm_get_timed_pages();
273 len = sprintf(buf, "%ld\n", pages); 296 len = sprintf(buf, "%ld\n", nr);
274 if (len > *lenp) 297 if (len > *lenp)
275 len = *lenp; 298 len = *lenp;
276 if (copy_to_user(buffer, buf, len)) 299 if (copy_to_user(buffer, buf, len))
@@ -286,7 +309,7 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
286 void __user *buffer, size_t *lenp, loff_t *ppos) 309 void __user *buffer, size_t *lenp, loff_t *ppos)
287{ 310{
288 char buf[64], *p; 311 char buf[64], *p;
289 long pages, seconds; 312 long nr, seconds;
290 int len; 313 int len;
291 314
292 if (!*lenp || (*ppos && !write)) { 315 if (!*lenp || (*ppos && !write)) {
@@ -301,10 +324,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
301 return -EFAULT; 324 return -EFAULT;
302 buf[sizeof(buf) - 1] = '\0'; 325 buf[sizeof(buf) - 1] = '\0';
303 cmm_skip_blanks(buf, &p); 326 cmm_skip_blanks(buf, &p);
304 pages = simple_strtoul(p, &p, 0); 327 nr = simple_strtoul(p, &p, 0);
305 cmm_skip_blanks(p, &p); 328 cmm_skip_blanks(p, &p);
306 seconds = simple_strtoul(p, &p, 0); 329 seconds = simple_strtoul(p, &p, 0);
307 cmm_set_timeout(pages, seconds); 330 cmm_set_timeout(nr, seconds);
308 } else { 331 } else {
309 len = sprintf(buf, "%ld %ld\n", 332 len = sprintf(buf, "%ld %ld\n",
310 cmm_timeout_pages, cmm_timeout_seconds); 333 cmm_timeout_pages, cmm_timeout_seconds);
@@ -357,7 +380,7 @@ static struct ctl_table cmm_dir_table[] = {
357static void 380static void
358cmm_smsg_target(char *from, char *msg) 381cmm_smsg_target(char *from, char *msg)
359{ 382{
360 long pages, seconds; 383 long nr, seconds;
361 384
362 if (strlen(sender) > 0 && strcmp(from, sender) != 0) 385 if (strlen(sender) > 0 && strcmp(from, sender) != 0)
363 return; 386 return;
@@ -366,27 +389,27 @@ cmm_smsg_target(char *from, char *msg)
366 if (strncmp(msg, "SHRINK", 6) == 0) { 389 if (strncmp(msg, "SHRINK", 6) == 0) {
367 if (!cmm_skip_blanks(msg + 6, &msg)) 390 if (!cmm_skip_blanks(msg + 6, &msg))
368 return; 391 return;
369 pages = simple_strtoul(msg, &msg, 0); 392 nr = simple_strtoul(msg, &msg, 0);
370 cmm_skip_blanks(msg, &msg); 393 cmm_skip_blanks(msg, &msg);
371 if (*msg == '\0') 394 if (*msg == '\0')
372 cmm_set_pages(pages); 395 cmm_set_pages(nr);
373 } else if (strncmp(msg, "RELEASE", 7) == 0) { 396 } else if (strncmp(msg, "RELEASE", 7) == 0) {
374 if (!cmm_skip_blanks(msg + 7, &msg)) 397 if (!cmm_skip_blanks(msg + 7, &msg))
375 return; 398 return;
376 pages = simple_strtoul(msg, &msg, 0); 399 nr = simple_strtoul(msg, &msg, 0);
377 cmm_skip_blanks(msg, &msg); 400 cmm_skip_blanks(msg, &msg);
378 if (*msg == '\0') 401 if (*msg == '\0')
379 cmm_add_timed_pages(pages); 402 cmm_add_timed_pages(nr);
380 } else if (strncmp(msg, "REUSE", 5) == 0) { 403 } else if (strncmp(msg, "REUSE", 5) == 0) {
381 if (!cmm_skip_blanks(msg + 5, &msg)) 404 if (!cmm_skip_blanks(msg + 5, &msg))
382 return; 405 return;
383 pages = simple_strtoul(msg, &msg, 0); 406 nr = simple_strtoul(msg, &msg, 0);
384 if (!cmm_skip_blanks(msg, &msg)) 407 if (!cmm_skip_blanks(msg, &msg))
385 return; 408 return;
386 seconds = simple_strtoul(msg, &msg, 0); 409 seconds = simple_strtoul(msg, &msg, 0);
387 cmm_skip_blanks(msg, &msg); 410 cmm_skip_blanks(msg, &msg);
388 if (*msg == '\0') 411 if (*msg == '\0')
389 cmm_set_timeout(pages, seconds); 412 cmm_set_timeout(nr, seconds);
390 } 413 }
391} 414}
392#endif 415#endif
@@ -396,21 +419,49 @@ struct ctl_table_header *cmm_sysctl_header;
396static int 419static int
397cmm_init (void) 420cmm_init (void)
398{ 421{
422 int rc = -ENOMEM;
423
399#ifdef CONFIG_CMM_PROC 424#ifdef CONFIG_CMM_PROC
400 cmm_sysctl_header = register_sysctl_table(cmm_dir_table, 1); 425 cmm_sysctl_header = register_sysctl_table(cmm_dir_table, 1);
426 if (!cmm_sysctl_header)
427 goto out;
401#endif 428#endif
402#ifdef CONFIG_CMM_IUCV 429#ifdef CONFIG_CMM_IUCV
403 smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); 430 rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
431 if (rc < 0)
432 goto out_smsg;
404#endif 433#endif
405 INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL); 434 rc = register_oom_notifier(&cmm_oom_nb);
435 if (rc < 0)
436 goto out_oom_notify;
406 init_waitqueue_head(&cmm_thread_wait); 437 init_waitqueue_head(&cmm_thread_wait);
407 init_timer(&cmm_timer); 438 init_timer(&cmm_timer);
408 return 0; 439 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
440 rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0;
441 if (!rc)
442 goto out;
443 /*
444 * kthread_create failed. undo all the stuff from above again.
445 */
446 unregister_oom_notifier(&cmm_oom_nb);
447
448out_oom_notify:
449#ifdef CONFIG_CMM_IUCV
450 smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
451out_smsg:
452#endif
453#ifdef CONFIG_CMM_PROC
454 unregister_sysctl_table(cmm_sysctl_header);
455#endif
456out:
457 return rc;
409} 458}
410 459
411static void 460static void
412cmm_exit(void) 461cmm_exit(void)
413{ 462{
463 kthread_stop(cmm_thread_ptr);
464 unregister_oom_notifier(&cmm_oom_nb);
414 cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); 465 cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
415 cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); 466 cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
416#ifdef CONFIG_CMM_PROC 467#ifdef CONFIG_CMM_PROC
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index ad8ed7d41e16..bf94eedb0a8e 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -30,7 +30,7 @@
30 30
31#define __pte_offset(address) \ 31#define __pte_offset(address) \
32 ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 32 ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
33#define pte_offset(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \ 33#define pte_offset(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
34 __pte_offset(address)) 34 __pte_offset(address))
35 35
36static inline void cache_wback_all(void) 36static inline void cache_wback_all(void)
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c
index 1169757fb38b..83295bd21aa7 100644
--- a/arch/sh64/mm/init.c
+++ b/arch/sh64/mm/init.c
@@ -110,7 +110,7 @@ void show_mem(void)
110 */ 110 */
111void __init paging_init(void) 111void __init paging_init(void)
112{ 112{
113 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 113 unsigned long zones_size[MAX_NR_ZONES] = {0, };
114 114
115 pgd_init((unsigned long)swapper_pg_dir); 115 pgd_init((unsigned long)swapper_pg_dir);
116 pgd_init((unsigned long)swapper_pg_dir + 116 pgd_init((unsigned long)swapper_pg_dir +
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 16e13f663ab0..b27a506309ee 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -2175,7 +2175,7 @@ void __init ld_mmu_srmmu(void)
2175 2175
2176 BTFIXUPSET_CALL(pte_pfn, srmmu_pte_pfn, BTFIXUPCALL_NORM); 2176 BTFIXUPSET_CALL(pte_pfn, srmmu_pte_pfn, BTFIXUPCALL_NORM);
2177 BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM); 2177 BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM);
2178 BTFIXUPSET_CALL(pgd_page, srmmu_pgd_page, BTFIXUPCALL_NORM); 2178 BTFIXUPSET_CALL(pgd_page_vaddr, srmmu_pgd_page, BTFIXUPCALL_NORM);
2179 2179
2180 BTFIXUPSET_SETHI(none_mask, 0xF0000000); 2180 BTFIXUPSET_SETHI(none_mask, 0xF0000000);
2181 2181
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 7fdddf3c7e16..436021ceb2e7 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -2280,5 +2280,5 @@ void __init ld_mmu_sun4c(void)
2280 2280
2281 /* These should _never_ get called with two level tables. */ 2281 /* These should _never_ get called with two level tables. */
2282 BTFIXUPSET_CALL(pgd_set, sun4c_pgd_set, BTFIXUPCALL_NOP); 2282 BTFIXUPSET_CALL(pgd_set, sun4c_pgd_set, BTFIXUPCALL_NOP);
2283 BTFIXUPSET_CALL(pgd_page, sun4c_pgd_page, BTFIXUPCALL_RETO0); 2283 BTFIXUPSET_CALL(pgd_page_vaddr, sun4c_pgd_page, BTFIXUPCALL_RETO0);
2284} 2284}
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 8135ec322c9c..642541769a17 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -736,20 +736,15 @@ struct exec_domain solaris_exec_domain = {
736 736
737extern int init_socksys(void); 737extern int init_socksys(void);
738 738
739#ifdef MODULE
740
741MODULE_AUTHOR("Jakub Jelinek (jj@ultra.linux.cz), Patrik Rak (prak3264@ss1000.ms.mff.cuni.cz)"); 739MODULE_AUTHOR("Jakub Jelinek (jj@ultra.linux.cz), Patrik Rak (prak3264@ss1000.ms.mff.cuni.cz)");
742MODULE_DESCRIPTION("Solaris binary emulation module"); 740MODULE_DESCRIPTION("Solaris binary emulation module");
743MODULE_LICENSE("GPL"); 741MODULE_LICENSE("GPL");
744 742
745#ifdef __sparc_v9__
746extern u32 tl0_solaris[8]; 743extern u32 tl0_solaris[8];
747#define update_ttable(x) \ 744#define update_ttable(x) \
748 tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000; \ 745 tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000; \
749 wmb(); \ 746 wmb(); \
750 __asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3])) 747 __asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3]))
751#else
752#endif
753 748
754extern u32 solaris_sparc_syscall[]; 749extern u32 solaris_sparc_syscall[];
755extern u32 solaris_syscall[]; 750extern u32 solaris_syscall[];
@@ -757,7 +752,7 @@ extern void cleanup_socksys(void);
757 752
758extern u32 entry64_personality_patch; 753extern u32 entry64_personality_patch;
759 754
760int init_module(void) 755static int __init solaris_init(void)
761{ 756{
762 int ret; 757 int ret;
763 758
@@ -777,19 +772,12 @@ int init_module(void)
777 return 0; 772 return 0;
778} 773}
779 774
780void cleanup_module(void) 775static void __exit solaris_exit(void)
781{ 776{
782 update_ttable(solaris_syscall); 777 update_ttable(solaris_syscall);
783 cleanup_socksys(); 778 cleanup_socksys();
784 unregister_exec_domain(&solaris_exec_domain); 779 unregister_exec_domain(&solaris_exec_domain);
785} 780}
786 781
787#else 782module_init(solaris_init);
788int init_solaris_emul(void) 783module_exit(solaris_exit);
789{
790 register_exec_domain(&solaris_exec_domain);
791 init_socksys();
792 return 0;
793}
794#endif
795
diff --git a/arch/sparc64/solaris/socksys.c b/arch/sparc64/solaris/socksys.c
index bc3df95bc057..7c90e41fd3be 100644
--- a/arch/sparc64/solaris/socksys.c
+++ b/arch/sparc64/solaris/socksys.c
@@ -168,8 +168,7 @@ static struct file_operations socksys_fops = {
168 .release = socksys_release, 168 .release = socksys_release,
169}; 169};
170 170
171int __init 171int __init init_socksys(void)
172init_socksys(void)
173{ 172{
174 int ret; 173 int ret;
175 struct file * file; 174 struct file * file;
@@ -199,8 +198,7 @@ init_socksys(void)
199 return 0; 198 return 0;
200} 199}
201 200
202void 201void __exit cleanup_socksys(void)
203cleanup_socksys(void)
204{ 202{
205 if (unregister_chrdev(30, "socksys")) 203 if (unregister_chrdev(30, "socksys"))
206 printk ("Couldn't unregister socksys character device\n"); 204 printk ("Couldn't unregister socksys character device\n");
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 7218c754505b..e82764f75e7f 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -544,7 +544,7 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
544 544
545 ops = NULL; 545 ops = NULL;
546 data = NULL; 546 data = NULL;
547 for(i = 0; i < sizeof(chan_table)/sizeof(chan_table[0]); i++){ 547 for(i = 0; i < ARRAY_SIZE(chan_table); i++){
548 entry = &chan_table[i]; 548 entry = &chan_table[i];
549 if(!strncmp(str, entry->key, strlen(entry->key))){ 549 if(!strncmp(str, entry->key, strlen(entry->key))){
550 ops = entry->ops; 550 ops = entry->ops;
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index b414522f7686..79610b5ce67e 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -497,7 +497,7 @@ static void mconsole_get_config(int (*get_config)(char *, char *, int,
497 } 497 }
498 498
499 error = NULL; 499 error = NULL;
500 size = sizeof(default_buf)/sizeof(default_buf[0]); 500 size = ARRAY_SIZE(default_buf);
501 buf = default_buf; 501 buf = default_buf;
502 502
503 while(1){ 503 while(1){
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index 9bfd405c3bd8..5b2f5fe9e426 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -16,6 +16,7 @@
16#include "user.h" 16#include "user.h"
17#include "mconsole.h" 17#include "mconsole.h"
18#include "umid.h" 18#include "umid.h"
19#include "user_util.h"
19 20
20static struct mconsole_command commands[] = { 21static struct mconsole_command commands[] = {
21 /* With uts namespaces, uts information becomes process-specific, so 22 /* With uts namespaces, uts information becomes process-specific, so
@@ -65,14 +66,14 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req)
65 struct mconsole_command *cmd; 66 struct mconsole_command *cmd;
66 int i; 67 int i;
67 68
68 for(i=0;i<sizeof(commands)/sizeof(commands[0]);i++){ 69 for(i = 0; i < ARRAY_SIZE(commands); i++){
69 cmd = &commands[i]; 70 cmd = &commands[i];
70 if(!strncmp(req->request.data, cmd->command, 71 if(!strncmp(req->request.data, cmd->command,
71 strlen(cmd->command))){ 72 strlen(cmd->command))){
72 return(cmd); 73 return cmd;
73 } 74 }
74 } 75 }
75 return(NULL); 76 return NULL;
76} 77}
77 78
78#define MIN(a,b) ((a)<(b) ? (a):(b)) 79#define MIN(a,b) ((a)<(b) ? (a):(b))
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 501f95675d89..4a7966b21931 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -31,6 +31,11 @@
31#include "irq_user.h" 31#include "irq_user.h"
32#include "irq_kern.h" 32#include "irq_kern.h"
33 33
34static inline void set_ether_mac(struct net_device *dev, unsigned char *addr)
35{
36 memcpy(dev->dev_addr, addr, ETH_ALEN);
37}
38
34#define DRIVER_NAME "uml-netdev" 39#define DRIVER_NAME "uml-netdev"
35 40
36static DEFINE_SPINLOCK(opened_lock); 41static DEFINE_SPINLOCK(opened_lock);
@@ -242,7 +247,7 @@ static int uml_net_set_mac(struct net_device *dev, void *addr)
242 struct sockaddr *hwaddr = addr; 247 struct sockaddr *hwaddr = addr;
243 248
244 spin_lock(&lp->lock); 249 spin_lock(&lp->lock);
245 memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN); 250 set_ether_mac(dev, hwaddr->sa_data);
246 spin_unlock(&lp->lock); 251 spin_unlock(&lp->lock);
247 252
248 return(0); 253 return(0);
@@ -790,13 +795,6 @@ void dev_ip_addr(void *d, unsigned char *bin_buf)
790 memcpy(bin_buf, &in->ifa_address, sizeof(in->ifa_address)); 795 memcpy(bin_buf, &in->ifa_address, sizeof(in->ifa_address));
791} 796}
792 797
793void set_ether_mac(void *d, unsigned char *addr)
794{
795 struct net_device *dev = d;
796
797 memcpy(dev->dev_addr, addr, ETH_ALEN);
798}
799
800struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra) 798struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra)
801{ 799{
802 if((skb != NULL) && (skb_tailroom(skb) < extra)){ 800 if((skb != NULL) && (skb_tailroom(skb) < extra)){
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index 466ff2c2f918..4c767c7adb96 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -76,7 +76,7 @@ int pcap_setup(char *str, char **mac_out, void *data)
76 if(host_if != NULL) 76 if(host_if != NULL)
77 init->host_if = host_if; 77 init->host_if = host_if;
78 78
79 for(i = 0; i < sizeof(options)/sizeof(options[0]); i++){ 79 for(i = 0; i < ARRAY_SIZE(options); i++){
80 if(options[i] == NULL) 80 if(options[i] == NULL)
81 continue; 81 continue;
82 if(!strcmp(options[i], "promisc")) 82 if(!strcmp(options[i], "promisc"))
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
index b98bdd8e052a..89e1dc835a5b 100644
--- a/arch/um/include/kern_util.h
+++ b/arch/um/include/kern_util.h
@@ -27,7 +27,6 @@ extern int ncpus;
27extern char *linux_prog; 27extern char *linux_prog;
28extern char *gdb_init; 28extern char *gdb_init;
29extern int kmalloc_ok; 29extern int kmalloc_ok;
30extern int timer_irq_inited;
31extern int jail; 30extern int jail;
32extern int nsyscalls; 31extern int nsyscalls;
33 32
diff --git a/arch/um/include/longjmp.h b/arch/um/include/longjmp.h
index 1b5c0131a12e..e93c6d3e893b 100644
--- a/arch/um/include/longjmp.h
+++ b/arch/um/include/longjmp.h
@@ -1,9 +1,12 @@
1#ifndef __UML_LONGJMP_H 1#ifndef __UML_LONGJMP_H
2#define __UML_LONGJMP_H 2#define __UML_LONGJMP_H
3 3
4#include <setjmp.h> 4#include "sysdep/archsetjmp.h"
5#include "os.h" 5#include "os.h"
6 6
7extern int setjmp(jmp_buf);
8extern void longjmp(jmp_buf, int);
9
7#define UML_LONGJMP(buf, val) do { \ 10#define UML_LONGJMP(buf, val) do { \
8 longjmp(*buf, val); \ 11 longjmp(*buf, val); \
9} while(0) 12} while(0)
diff --git a/arch/um/include/net_user.h b/arch/um/include/net_user.h
index 800c403920bc..47ef7cb49a8e 100644
--- a/arch/um/include/net_user.h
+++ b/arch/um/include/net_user.h
@@ -26,7 +26,6 @@ struct net_user_info {
26 26
27extern void ether_user_init(void *data, void *dev); 27extern void ether_user_init(void *data, void *dev);
28extern void dev_ip_addr(void *d, unsigned char *bin_buf); 28extern void dev_ip_addr(void *d, unsigned char *bin_buf);
29extern void set_ether_mac(void *d, unsigned char *addr);
30extern void iter_addresses(void *d, void (*cb)(unsigned char *, 29extern void iter_addresses(void *d, void (*cb)(unsigned char *,
31 unsigned char *, void *), 30 unsigned char *, void *),
32 void *arg); 31 void *arg);
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 5316e8a4a4fd..24fb6d8680e1 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -276,9 +276,11 @@ extern int setjmp_wrapper(void (*proc)(void *, void *), ...);
276 276
277extern void switch_timers(int to_real); 277extern void switch_timers(int to_real);
278extern void idle_sleep(int secs); 278extern void idle_sleep(int secs);
279extern int set_interval(int is_virtual);
280#ifdef CONFIG_MODE_TT
279extern void enable_timer(void); 281extern void enable_timer(void);
282#endif
280extern void disable_timer(void); 283extern void disable_timer(void);
281extern void user_time_init(void);
282extern void uml_idle_timer(void); 284extern void uml_idle_timer(void);
283extern unsigned long long os_nsecs(void); 285extern unsigned long long os_nsecs(void);
284 286
@@ -329,6 +331,7 @@ extern void os_set_ioignore(void);
329extern void init_irq_signals(int on_sigstack); 331extern void init_irq_signals(int on_sigstack);
330 332
331/* sigio.c */ 333/* sigio.c */
334extern int add_sigio_fd(int fd);
332extern int ignore_sigio_fd(int fd); 335extern int ignore_sigio_fd(int fd);
333extern void maybe_sigio_broken(int fd, int read); 336extern void maybe_sigio_broken(int fd, int read);
334 337
diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h
index 83b688ca198f..f845b3629a6d 100644
--- a/arch/um/include/registers.h
+++ b/arch/um/include/registers.h
@@ -7,6 +7,7 @@
7#define __REGISTERS_H 7#define __REGISTERS_H
8 8
9#include "sysdep/ptrace.h" 9#include "sysdep/ptrace.h"
10#include "sysdep/archsetjmp.h"
10 11
11extern void init_thread_registers(union uml_pt_regs *to); 12extern void init_thread_registers(union uml_pt_regs *to);
12extern int save_fp_registers(int pid, unsigned long *fp_regs); 13extern int save_fp_registers(int pid, unsigned long *fp_regs);
@@ -15,6 +16,6 @@ extern void save_registers(int pid, union uml_pt_regs *regs);
15extern void restore_registers(int pid, union uml_pt_regs *regs); 16extern void restore_registers(int pid, union uml_pt_regs *regs);
16extern void init_registers(int pid); 17extern void init_registers(int pid);
17extern void get_safe_registers(unsigned long * regs, unsigned long * fp_regs); 18extern void get_safe_registers(unsigned long * regs, unsigned long * fp_regs);
18extern void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer); 19extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
19 20
20#endif 21#endif
diff --git a/arch/um/include/sysdep-i386/archsetjmp.h b/arch/um/include/sysdep-i386/archsetjmp.h
new file mode 100644
index 000000000000..ea1ba3d42aee
--- /dev/null
+++ b/arch/um/include/sysdep-i386/archsetjmp.h
@@ -0,0 +1,19 @@
1/*
2 * arch/i386/include/klibc/archsetjmp.h
3 */
4
5#ifndef _KLIBC_ARCHSETJMP_H
6#define _KLIBC_ARCHSETJMP_H
7
8struct __jmp_buf {
9 unsigned int __ebx;
10 unsigned int __esp;
11 unsigned int __ebp;
12 unsigned int __esi;
13 unsigned int __edi;
14 unsigned int __eip;
15};
16
17typedef struct __jmp_buf jmp_buf[1];
18
19#endif /* _SETJMP_H */
diff --git a/arch/um/include/sysdep-i386/signal.h b/arch/um/include/sysdep-i386/signal.h
deleted file mode 100644
index 07518b162136..000000000000
--- a/arch/um/include/sysdep-i386/signal.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright (C) 2004 PathScale, Inc
3 * Licensed under the GPL
4 */
5
6#ifndef __I386_SIGNAL_H_
7#define __I386_SIGNAL_H_
8
9#include <signal.h>
10
11#define ARCH_SIGHDLR_PARAM int sig
12
13#define ARCH_GET_SIGCONTEXT(sc, sig) \
14 do sc = (struct sigcontext *) (&sig + 1); while(0)
15
16#endif
17
18/*
19 * Overrides for Emacs so that we follow Linus's tabbing style.
20 * Emacs will notice this stuff at the end of the file and automatically
21 * adjust the settings for this buffer only. This must remain at the end
22 * of the file.
23 * ---------------------------------------------------------------------------
24 * Local variables:
25 * c-file-style: "linux"
26 * End:
27 */
diff --git a/arch/um/include/sysdep-x86_64/archsetjmp.h b/arch/um/include/sysdep-x86_64/archsetjmp.h
new file mode 100644
index 000000000000..454fc60aff6d
--- /dev/null
+++ b/arch/um/include/sysdep-x86_64/archsetjmp.h
@@ -0,0 +1,21 @@
1/*
2 * arch/x86_64/include/klibc/archsetjmp.h
3 */
4
5#ifndef _KLIBC_ARCHSETJMP_H
6#define _KLIBC_ARCHSETJMP_H
7
8struct __jmp_buf {
9 unsigned long __rbx;
10 unsigned long __rsp;
11 unsigned long __rbp;
12 unsigned long __r12;
13 unsigned long __r13;
14 unsigned long __r14;
15 unsigned long __r15;
16 unsigned long __rip;
17};
18
19typedef struct __jmp_buf jmp_buf[1];
20
21#endif /* _SETJMP_H */
diff --git a/arch/um/include/sysdep-x86_64/signal.h b/arch/um/include/sysdep-x86_64/signal.h
deleted file mode 100644
index 6142897af3d1..000000000000
--- a/arch/um/include/sysdep-x86_64/signal.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (C) 2004 PathScale, Inc
3 * Licensed under the GPL
4 */
5
6#ifndef __X86_64_SIGNAL_H_
7#define __X86_64_SIGNAL_H_
8
9#define ARCH_SIGHDLR_PARAM int sig
10
11#define ARCH_GET_SIGCONTEXT(sc, sig_addr) \
12 do { \
13 struct ucontext *__uc; \
14 asm("movq %%rdx, %0" : "=r" (__uc)); \
15 sc = (struct sigcontext *) &__uc->uc_mcontext; \
16 } while(0)
17
18#endif
19
20/*
21 * Overrides for Emacs so that we follow Linus's tabbing style.
22 * Emacs will notice this stuff at the end of the file and automatically
23 * adjust the settings for this buffer only. This must remain at the end
24 * of the file.
25 * ---------------------------------------------------------------------------
26 * Local variables:
27 * c-file-style: "linux"
28 * End:
29 */
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index fc38a6d5906d..0561c43b4685 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -41,9 +41,11 @@ static long execve1(char *file, char __user * __user *argv,
41 long error; 41 long error;
42 42
43#ifdef CONFIG_TTY_LOG 43#ifdef CONFIG_TTY_LOG
44 task_lock(current); 44 mutex_lock(&tty_mutex);
45 task_lock(current); /* FIXME: is this needed ? */
45 log_exec(argv, current->signal->tty); 46 log_exec(argv, current->signal->tty);
46 task_unlock(current); 47 task_unlock(current);
48 mutex_unlock(&tty_mutex);
47#endif 49#endif
48 error = do_execve(file, argv, env, &current->thread.regs); 50 error = do_execve(file, argv, env, &current->thread.regs);
49 if (error == 0){ 51 if (error == 0){
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 589c69a75043..ce7f233fc490 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -142,19 +142,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
142 .events = events, 142 .events = events,
143 .current_events = 0 } ); 143 .current_events = 0 } );
144 144
145 /* Critical section - locked by a spinlock because this stuff can
146 * be changed from interrupt handlers. The stuff above is done
147 * outside the lock because it allocates memory.
148 */
149
150 /* Actually, it only looks like it can be called from interrupt
151 * context. The culprit is reactivate_fd, which calls
152 * maybe_sigio_broken, which calls write_sigio_workaround,
153 * which calls activate_fd. However, write_sigio_workaround should
154 * only be called once, at boot time. That would make it clear that
155 * this is called only from process context, and can be locked with
156 * a semaphore.
157 */
158 spin_lock_irqsave(&irq_lock, flags); 145 spin_lock_irqsave(&irq_lock, flags);
159 for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) { 146 for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
160 if ((irq_fd->fd == fd) && (irq_fd->type == type)) { 147 if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
@@ -165,7 +152,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
165 } 152 }
166 } 153 }
167 154
168 /*-------------*/
169 if (type == IRQ_WRITE) 155 if (type == IRQ_WRITE)
170 fd = -1; 156 fd = -1;
171 157
@@ -198,7 +184,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
198 184
199 spin_lock_irqsave(&irq_lock, flags); 185 spin_lock_irqsave(&irq_lock, flags);
200 } 186 }
201 /*-------------*/
202 187
203 *last_irq_ptr = new_fd; 188 *last_irq_ptr = new_fd;
204 last_irq_ptr = &new_fd->next; 189 last_irq_ptr = &new_fd->next;
@@ -210,14 +195,14 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
210 */ 195 */
211 maybe_sigio_broken(fd, (type == IRQ_READ)); 196 maybe_sigio_broken(fd, (type == IRQ_READ));
212 197
213 return(0); 198 return 0;
214 199
215 out_unlock: 200 out_unlock:
216 spin_unlock_irqrestore(&irq_lock, flags); 201 spin_unlock_irqrestore(&irq_lock, flags);
217 out_kfree: 202 out_kfree:
218 kfree(new_fd); 203 kfree(new_fd);
219 out: 204 out:
220 return(err); 205 return err;
221} 206}
222 207
223static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) 208static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
@@ -302,10 +287,7 @@ void reactivate_fd(int fd, int irqnum)
302 os_set_pollfd(i, irq->fd); 287 os_set_pollfd(i, irq->fd);
303 spin_unlock_irqrestore(&irq_lock, flags); 288 spin_unlock_irqrestore(&irq_lock, flags);
304 289
305 /* This calls activate_fd, so it has to be outside the critical 290 add_sigio_fd(fd);
306 * section.
307 */
308 maybe_sigio_broken(fd, (irq->type == IRQ_READ));
309} 291}
310 292
311void deactivate_fd(int fd, int irqnum) 293void deactivate_fd(int fd, int irqnum)
@@ -316,11 +298,15 @@ void deactivate_fd(int fd, int irqnum)
316 298
317 spin_lock_irqsave(&irq_lock, flags); 299 spin_lock_irqsave(&irq_lock, flags);
318 irq = find_irq_by_fd(fd, irqnum, &i); 300 irq = find_irq_by_fd(fd, irqnum, &i);
319 if (irq == NULL) 301 if(irq == NULL){
320 goto out; 302 spin_unlock_irqrestore(&irq_lock, flags);
303 return;
304 }
305
321 os_set_pollfd(i, -1); 306 os_set_pollfd(i, -1);
322 out:
323 spin_unlock_irqrestore(&irq_lock, flags); 307 spin_unlock_irqrestore(&irq_lock, flags);
308
309 ignore_sigio_fd(fd);
324} 310}
325 311
326int deactivate_all_fds(void) 312int deactivate_all_fds(void)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 61280167c560..93121c6d26e5 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -79,8 +79,10 @@ void mem_init(void)
79 79
80 /* this will put all low memory onto the freelists */ 80 /* this will put all low memory onto the freelists */
81 totalram_pages = free_all_bootmem(); 81 totalram_pages = free_all_bootmem();
82#ifdef CONFIG_HIGHMEM
82 totalhigh_pages = highmem >> PAGE_SHIFT; 83 totalhigh_pages = highmem >> PAGE_SHIFT;
83 totalram_pages += totalhigh_pages; 84 totalram_pages += totalhigh_pages;
85#endif
84 num_physpages = totalram_pages; 86 num_physpages = totalram_pages;
85 max_pfn = totalram_pages; 87 max_pfn = totalram_pages;
86 printk(KERN_INFO "Memory: %luk available\n", 88 printk(KERN_INFO "Memory: %luk available\n",
@@ -221,10 +223,13 @@ void paging_init(void)
221 223
222 empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); 224 empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
223 empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); 225 empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
224 for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++) 226 for(i = 0; i < ARRAY_SIZE(zones_size); i++)
225 zones_size[i] = 0; 227 zones_size[i] = 0;
228
226 zones_size[ZONE_DMA] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); 229 zones_size[ZONE_DMA] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT);
230#ifdef CONFIG_HIGHMEM
227 zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT; 231 zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT;
232#endif
228 free_area_init(zones_size); 233 free_area_init(zones_size);
229 234
230 /* 235 /*
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index f6a5a502120b..537895d68ad1 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -23,6 +23,7 @@
23#include "linux/proc_fs.h" 23#include "linux/proc_fs.h"
24#include "linux/ptrace.h" 24#include "linux/ptrace.h"
25#include "linux/random.h" 25#include "linux/random.h"
26#include "linux/personality.h"
26#include "asm/unistd.h" 27#include "asm/unistd.h"
27#include "asm/mman.h" 28#include "asm/mman.h"
28#include "asm/segment.h" 29#include "asm/segment.h"
@@ -476,7 +477,7 @@ int singlestepping(void * t)
476#ifndef arch_align_stack 477#ifndef arch_align_stack
477unsigned long arch_align_stack(unsigned long sp) 478unsigned long arch_align_stack(unsigned long sp)
478{ 479{
479 if (randomize_va_space) 480 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
480 sp -= get_random_int() % 8192; 481 sp -= get_random_int() % 8192;
481 return sp & ~0xf; 482 return sp & ~0xf;
482} 483}
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 3ef73bf2e781..f602623644aa 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -22,7 +22,7 @@ static void kill_idlers(int me)
22 struct task_struct *p; 22 struct task_struct *p;
23 int i; 23 int i;
24 24
25 for(i = 0; i < sizeof(idle_threads)/sizeof(idle_threads[0]); i++){ 25 for(i = 0; i < ARRAY_SIZE(idle_threads); i++){
26 p = idle_threads[i]; 26 p = idle_threads[i];
27 if((p != NULL) && (p->thread.mode.tt.extern_pid != me)) 27 if((p != NULL) && (p->thread.mode.tt.extern_pid != me))
28 os_kill_process(p->thread.mode.tt.extern_pid, 0); 28 os_kill_process(p->thread.mode.tt.extern_pid, 0);
@@ -62,14 +62,3 @@ void machine_halt(void)
62{ 62{
63 machine_power_off(); 63 machine_power_off();
64} 64}
65
66/*
67 * Overrides for Emacs so that we follow Linus's tabbing style.
68 * Emacs will notice this stuff at the end of the file and automatically
69 * adjust the settings for this buffer only. This must remain at the end
70 * of the file.
71 * ---------------------------------------------------------------------------
72 * Local variables:
73 * c-file-style: "linux"
74 * End:
75 */
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 624ca238d1fd..79c22707a637 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -55,7 +55,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
55 * destroy_context_skas. 55 * destroy_context_skas.
56 */ 56 */
57 57
58 mm->context.skas.last_page_table = pmd_page_kernel(*pmd); 58 mm->context.skas.last_page_table = pmd_page_vaddr(*pmd);
59#ifdef CONFIG_3_LEVEL_PGTABLES 59#ifdef CONFIG_3_LEVEL_PGTABLES
60 mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud)); 60 mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud));
61#endif 61#endif
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 552ca1cb9847..2454bbd9555d 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -35,9 +35,6 @@ unsigned long long sched_clock(void)
35 return (unsigned long long)jiffies_64 * (1000000000 / HZ); 35 return (unsigned long long)jiffies_64 * (1000000000 / HZ);
36} 36}
37 37
38/* Changed at early boot */
39int timer_irq_inited = 0;
40
41static unsigned long long prev_nsecs; 38static unsigned long long prev_nsecs;
42#ifdef CONFIG_UML_REAL_TIME_CLOCK 39#ifdef CONFIG_UML_REAL_TIME_CLOCK
43static long long delta; /* Deviation per interval */ 40static long long delta; /* Deviation per interval */
@@ -113,12 +110,13 @@ static void register_timer(void)
113 110
114 err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL); 111 err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL);
115 if(err != 0) 112 if(err != 0)
116 printk(KERN_ERR "timer_init : request_irq failed - " 113 printk(KERN_ERR "register_timer : request_irq failed - "
117 "errno = %d\n", -err); 114 "errno = %d\n", -err);
118 115
119 timer_irq_inited = 1; 116 err = set_interval(1);
120 117 if(err != 0)
121 user_time_init(); 118 printk(KERN_ERR "register_timer : set_interval failed - "
119 "errno = %d\n", -err);
122} 120}
123 121
124extern void (*late_time_init)(void); 122extern void (*late_time_init)(void);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index f5b0636f9ad7..54a5ff25645a 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -1,4 +1,4 @@
1/* 1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
@@ -16,12 +16,12 @@
16#include "os.h" 16#include "os.h"
17 17
18static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, 18static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
19 int r, int w, int x, struct host_vm_op *ops, int *index, 19 int r, int w, int x, struct host_vm_op *ops, int *index,
20 int last_filled, union mm_context *mmu, void **flush, 20 int last_filled, union mm_context *mmu, void **flush,
21 int (*do_ops)(union mm_context *, struct host_vm_op *, 21 int (*do_ops)(union mm_context *, struct host_vm_op *,
22 int, int, void **)) 22 int, int, void **))
23{ 23{
24 __u64 offset; 24 __u64 offset;
25 struct host_vm_op *last; 25 struct host_vm_op *last;
26 int fd, ret = 0; 26 int fd, ret = 0;
27 27
@@ -89,7 +89,7 @@ static int add_munmap(unsigned long addr, unsigned long len,
89static int add_mprotect(unsigned long addr, unsigned long len, int r, int w, 89static int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
90 int x, struct host_vm_op *ops, int *index, 90 int x, struct host_vm_op *ops, int *index,
91 int last_filled, union mm_context *mmu, void **flush, 91 int last_filled, union mm_context *mmu, void **flush,
92 int (*do_ops)(union mm_context *, struct host_vm_op *, 92 int (*do_ops)(union mm_context *, struct host_vm_op *,
93 int, int, void **)) 93 int, int, void **))
94{ 94{
95 struct host_vm_op *last; 95 struct host_vm_op *last;
@@ -124,105 +124,105 @@ static int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
124#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 124#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
125 125
126void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 126void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
127 unsigned long end_addr, int force, 127 unsigned long end_addr, int force,
128 int (*do_ops)(union mm_context *, struct host_vm_op *, 128 int (*do_ops)(union mm_context *, struct host_vm_op *,
129 int, int, void **)) 129 int, int, void **))
130{ 130{
131 pgd_t *npgd; 131 pgd_t *npgd;
132 pud_t *npud; 132 pud_t *npud;
133 pmd_t *npmd; 133 pmd_t *npmd;
134 pte_t *npte; 134 pte_t *npte;
135 union mm_context *mmu = &mm->context; 135 union mm_context *mmu = &mm->context;
136 unsigned long addr, end; 136 unsigned long addr, end;
137 int r, w, x; 137 int r, w, x;
138 struct host_vm_op ops[1]; 138 struct host_vm_op ops[1];
139 void *flush = NULL; 139 void *flush = NULL;
140 int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1; 140 int op_index = -1, last_op = ARRAY_SIZE(ops) - 1;
141 int ret = 0; 141 int ret = 0;
142 142
143 if(mm == NULL) return; 143 if(mm == NULL)
144 144 return;
145 ops[0].type = NONE; 145
146 for(addr = start_addr; addr < end_addr && !ret;){ 146 ops[0].type = NONE;
147 npgd = pgd_offset(mm, addr); 147 for(addr = start_addr; addr < end_addr && !ret;){
148 if(!pgd_present(*npgd)){ 148 npgd = pgd_offset(mm, addr);
149 end = ADD_ROUND(addr, PGDIR_SIZE); 149 if(!pgd_present(*npgd)){
150 if(end > end_addr) 150 end = ADD_ROUND(addr, PGDIR_SIZE);
151 end = end_addr; 151 if(end > end_addr)
152 if(force || pgd_newpage(*npgd)){ 152 end = end_addr;
153 ret = add_munmap(addr, end - addr, ops, 153 if(force || pgd_newpage(*npgd)){
154 &op_index, last_op, mmu, 154 ret = add_munmap(addr, end - addr, ops,
155 &flush, do_ops); 155 &op_index, last_op, mmu,
156 pgd_mkuptodate(*npgd); 156 &flush, do_ops);
157 } 157 pgd_mkuptodate(*npgd);
158 addr = end; 158 }
159 continue; 159 addr = end;
160 } 160 continue;
161 161 }
162 npud = pud_offset(npgd, addr); 162
163 if(!pud_present(*npud)){ 163 npud = pud_offset(npgd, addr);
164 end = ADD_ROUND(addr, PUD_SIZE); 164 if(!pud_present(*npud)){
165 if(end > end_addr) 165 end = ADD_ROUND(addr, PUD_SIZE);
166 end = end_addr; 166 if(end > end_addr)
167 if(force || pud_newpage(*npud)){ 167 end = end_addr;
168 ret = add_munmap(addr, end - addr, ops, 168 if(force || pud_newpage(*npud)){
169 &op_index, last_op, mmu, 169 ret = add_munmap(addr, end - addr, ops,
170 &flush, do_ops); 170 &op_index, last_op, mmu,
171 pud_mkuptodate(*npud); 171 &flush, do_ops);
172 } 172 pud_mkuptodate(*npud);
173 addr = end; 173 }
174 continue; 174 addr = end;
175 } 175 continue;
176 176 }
177 npmd = pmd_offset(npud, addr); 177
178 if(!pmd_present(*npmd)){ 178 npmd = pmd_offset(npud, addr);
179 end = ADD_ROUND(addr, PMD_SIZE); 179 if(!pmd_present(*npmd)){
180 if(end > end_addr) 180 end = ADD_ROUND(addr, PMD_SIZE);
181 end = end_addr; 181 if(end > end_addr)
182 if(force || pmd_newpage(*npmd)){ 182 end = end_addr;
183 ret = add_munmap(addr, end - addr, ops, 183 if(force || pmd_newpage(*npmd)){
184 &op_index, last_op, mmu, 184 ret = add_munmap(addr, end - addr, ops,
185 &flush, do_ops); 185 &op_index, last_op, mmu,
186 pmd_mkuptodate(*npmd); 186 &flush, do_ops);
187 } 187 pmd_mkuptodate(*npmd);
188 addr = end; 188 }
189 continue; 189 addr = end;
190 } 190 continue;
191 191 }
192 npte = pte_offset_kernel(npmd, addr); 192
193 r = pte_read(*npte); 193 npte = pte_offset_kernel(npmd, addr);
194 w = pte_write(*npte); 194 r = pte_read(*npte);
195 x = pte_exec(*npte); 195 w = pte_write(*npte);
196 x = pte_exec(*npte);
196 if (!pte_young(*npte)) { 197 if (!pte_young(*npte)) {
197 r = 0; 198 r = 0;
198 w = 0; 199 w = 0;
199 } else if (!pte_dirty(*npte)) { 200 } else if (!pte_dirty(*npte)) {
200 w = 0; 201 w = 0;
201 } 202 }
202 if(force || pte_newpage(*npte)){ 203 if(force || pte_newpage(*npte)){
203 if(pte_present(*npte)) 204 if(pte_present(*npte))
204 ret = add_mmap(addr, 205 ret = add_mmap(addr,
205 pte_val(*npte) & PAGE_MASK, 206 pte_val(*npte) & PAGE_MASK,
206 PAGE_SIZE, r, w, x, ops, 207 PAGE_SIZE, r, w, x, ops,
207 &op_index, last_op, mmu, 208 &op_index, last_op, mmu,
208 &flush, do_ops); 209 &flush, do_ops);
209 else ret = add_munmap(addr, PAGE_SIZE, ops, 210 else ret = add_munmap(addr, PAGE_SIZE, ops,
210 &op_index, last_op, mmu, 211 &op_index, last_op, mmu,
211 &flush, do_ops); 212 &flush, do_ops);
212 } 213 }
213 else if(pte_newprot(*npte)) 214 else if(pte_newprot(*npte))
214 ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, 215 ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops,
215 &op_index, last_op, mmu, 216 &op_index, last_op, mmu,
216 &flush, do_ops); 217 &flush, do_ops);
217 218
218 *npte = pte_mkuptodate(*npte); 219 *npte = pte_mkuptodate(*npte);
219 addr += PAGE_SIZE; 220 addr += PAGE_SIZE;
220 } 221 }
221
222 if(!ret) 222 if(!ret)
223 ret = (*do_ops)(mmu, ops, op_index, 1, &flush); 223 ret = (*do_ops)(mmu, ops, op_index, 1, &flush);
224 224
225 /* This is not an else because ret is modified above */ 225/* This is not an else because ret is modified above */
226 if(ret) { 226 if(ret) {
227 printk("fix_range_common: failed, killing current process\n"); 227 printk("fix_range_common: failed, killing current process\n");
228 force_sig(SIGKILL, current); 228 force_sig(SIGKILL, current);
@@ -231,160 +231,160 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
231 231
232int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 232int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
233{ 233{
234 struct mm_struct *mm; 234 struct mm_struct *mm;
235 pgd_t *pgd; 235 pgd_t *pgd;
236 pud_t *pud; 236 pud_t *pud;
237 pmd_t *pmd; 237 pmd_t *pmd;
238 pte_t *pte; 238 pte_t *pte;
239 unsigned long addr, last; 239 unsigned long addr, last;
240 int updated = 0, err; 240 int updated = 0, err;
241 241
242 mm = &init_mm; 242 mm = &init_mm;
243 for(addr = start; addr < end;){ 243 for(addr = start; addr < end;){
244 pgd = pgd_offset(mm, addr); 244 pgd = pgd_offset(mm, addr);
245 if(!pgd_present(*pgd)){ 245 if(!pgd_present(*pgd)){
246 last = ADD_ROUND(addr, PGDIR_SIZE); 246 last = ADD_ROUND(addr, PGDIR_SIZE);
247 if(last > end) 247 if(last > end)
248 last = end; 248 last = end;
249 if(pgd_newpage(*pgd)){ 249 if(pgd_newpage(*pgd)){
250 updated = 1; 250 updated = 1;
251 err = os_unmap_memory((void *) addr, 251 err = os_unmap_memory((void *) addr,
252 last - addr); 252 last - addr);
253 if(err < 0) 253 if(err < 0)
254 panic("munmap failed, errno = %d\n", 254 panic("munmap failed, errno = %d\n",
255 -err); 255 -err);
256 } 256 }
257 addr = last; 257 addr = last;
258 continue; 258 continue;
259 } 259 }
260 260
261 pud = pud_offset(pgd, addr); 261 pud = pud_offset(pgd, addr);
262 if(!pud_present(*pud)){ 262 if(!pud_present(*pud)){
263 last = ADD_ROUND(addr, PUD_SIZE); 263 last = ADD_ROUND(addr, PUD_SIZE);
264 if(last > end) 264 if(last > end)
265 last = end; 265 last = end;
266 if(pud_newpage(*pud)){ 266 if(pud_newpage(*pud)){
267 updated = 1; 267 updated = 1;
268 err = os_unmap_memory((void *) addr, 268 err = os_unmap_memory((void *) addr,
269 last - addr); 269 last - addr);
270 if(err < 0) 270 if(err < 0)
271 panic("munmap failed, errno = %d\n", 271 panic("munmap failed, errno = %d\n",
272 -err); 272 -err);
273 } 273 }
274 addr = last; 274 addr = last;
275 continue; 275 continue;
276 } 276 }
277 277
278 pmd = pmd_offset(pud, addr); 278 pmd = pmd_offset(pud, addr);
279 if(!pmd_present(*pmd)){ 279 if(!pmd_present(*pmd)){
280 last = ADD_ROUND(addr, PMD_SIZE); 280 last = ADD_ROUND(addr, PMD_SIZE);
281 if(last > end) 281 if(last > end)
282 last = end; 282 last = end;
283 if(pmd_newpage(*pmd)){ 283 if(pmd_newpage(*pmd)){
284 updated = 1; 284 updated = 1;
285 err = os_unmap_memory((void *) addr, 285 err = os_unmap_memory((void *) addr,
286 last - addr); 286 last - addr);
287 if(err < 0) 287 if(err < 0)
288 panic("munmap failed, errno = %d\n", 288 panic("munmap failed, errno = %d\n",
289 -err); 289 -err);
290 } 290 }
291 addr = last; 291 addr = last;
292 continue; 292 continue;
293 } 293 }
294 294
295 pte = pte_offset_kernel(pmd, addr); 295 pte = pte_offset_kernel(pmd, addr);
296 if(!pte_present(*pte) || pte_newpage(*pte)){ 296 if(!pte_present(*pte) || pte_newpage(*pte)){
297 updated = 1; 297 updated = 1;
298 err = os_unmap_memory((void *) addr, 298 err = os_unmap_memory((void *) addr,
299 PAGE_SIZE); 299 PAGE_SIZE);
300 if(err < 0) 300 if(err < 0)
301 panic("munmap failed, errno = %d\n", 301 panic("munmap failed, errno = %d\n",
302 -err); 302 -err);
303 if(pte_present(*pte)) 303 if(pte_present(*pte))
304 map_memory(addr, 304 map_memory(addr,
305 pte_val(*pte) & PAGE_MASK, 305 pte_val(*pte) & PAGE_MASK,
306 PAGE_SIZE, 1, 1, 1); 306 PAGE_SIZE, 1, 1, 1);
307 } 307 }
308 else if(pte_newprot(*pte)){ 308 else if(pte_newprot(*pte)){
309 updated = 1; 309 updated = 1;
310 os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1); 310 os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
311 } 311 }
312 addr += PAGE_SIZE; 312 addr += PAGE_SIZE;
313 } 313 }
314 return(updated); 314 return(updated);
315} 315}
316 316
317pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) 317pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
318{ 318{
319 return(pgd_offset(mm, address)); 319 return(pgd_offset(mm, address));
320} 320}
321 321
322pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address) 322pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
323{ 323{
324 return(pud_offset(pgd, address)); 324 return(pud_offset(pgd, address));
325} 325}
326 326
327pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address) 327pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
328{ 328{
329 return(pmd_offset(pud, address)); 329 return(pmd_offset(pud, address));
330} 330}
331 331
332pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) 332pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
333{ 333{
334 return(pte_offset_kernel(pmd, address)); 334 return(pte_offset_kernel(pmd, address));
335} 335}
336 336
337pte_t *addr_pte(struct task_struct *task, unsigned long addr) 337pte_t *addr_pte(struct task_struct *task, unsigned long addr)
338{ 338{
339 pgd_t *pgd = pgd_offset(task->mm, addr); 339 pgd_t *pgd = pgd_offset(task->mm, addr);
340 pud_t *pud = pud_offset(pgd, addr); 340 pud_t *pud = pud_offset(pgd, addr);
341 pmd_t *pmd = pmd_offset(pud, addr); 341 pmd_t *pmd = pmd_offset(pud, addr);
342 342
343 return(pte_offset_map(pmd, addr)); 343 return(pte_offset_map(pmd, addr));
344} 344}
345 345
346void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) 346void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
347{ 347{
348 address &= PAGE_MASK; 348 address &= PAGE_MASK;
349 flush_tlb_range(vma, address, address + PAGE_SIZE); 349 flush_tlb_range(vma, address, address + PAGE_SIZE);
350} 350}
351 351
352void flush_tlb_all(void) 352void flush_tlb_all(void)
353{ 353{
354 flush_tlb_mm(current->mm); 354 flush_tlb_mm(current->mm);
355} 355}
356 356
357void flush_tlb_kernel_range(unsigned long start, unsigned long end) 357void flush_tlb_kernel_range(unsigned long start, unsigned long end)
358{ 358{
359 CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, 359 CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt,
360 flush_tlb_kernel_range_common, start, end); 360 flush_tlb_kernel_range_common, start, end);
361} 361}
362 362
363void flush_tlb_kernel_vm(void) 363void flush_tlb_kernel_vm(void)
364{ 364{
365 CHOOSE_MODE(flush_tlb_kernel_vm_tt(), 365 CHOOSE_MODE(flush_tlb_kernel_vm_tt(),
366 flush_tlb_kernel_range_common(start_vm, end_vm)); 366 flush_tlb_kernel_range_common(start_vm, end_vm));
367} 367}
368 368
369void __flush_tlb_one(unsigned long addr) 369void __flush_tlb_one(unsigned long addr)
370{ 370{
371 CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); 371 CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr);
372} 372}
373 373
374void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 374void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
375 unsigned long end) 375 unsigned long end)
376{ 376{
377 CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, 377 CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start,
378 end); 378 end);
379} 379}
380 380
381void flush_tlb_mm(struct mm_struct *mm) 381void flush_tlb_mm(struct mm_struct *mm)
382{ 382{
383 CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); 383 CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm);
384} 384}
385 385
386void force_flush_all(void) 386void force_flush_all(void)
387{ 387{
388 CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); 388 CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas());
389} 389}
390 390
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ac70fa5a2e2a..e5eeaf2b6af1 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -227,9 +227,16 @@ void bad_segv(struct faultinfo fi, unsigned long ip)
227 227
228void relay_signal(int sig, union uml_pt_regs *regs) 228void relay_signal(int sig, union uml_pt_regs *regs)
229{ 229{
230 if(arch_handle_signal(sig, regs)) return; 230 if(arch_handle_signal(sig, regs))
231 if(!UPT_IS_USER(regs)) 231 return;
232
233 if(!UPT_IS_USER(regs)){
234 if(sig == SIGBUS)
235 printk("Bus error - the /dev/shm or /tmp mount likely "
236 "just ran out of space\n");
232 panic("Kernel mode signal %d", sig); 237 panic("Kernel mode signal %d", sig);
238 }
239
233 current->thread.arch.faultinfo = *UPT_FAULTINFO(regs); 240 current->thread.arch.faultinfo = *UPT_FAULTINFO(regs);
234 force_sig(sig, current); 241 force_sig(sig, current);
235} 242}
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index 6987d1d247a2..cd15b9df5b5c 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -42,7 +42,7 @@ static int helper_child(void *arg)
42 if(data->pre_exec != NULL) 42 if(data->pre_exec != NULL)
43 (*data->pre_exec)(data->pre_data); 43 (*data->pre_exec)(data->pre_data);
44 execvp(argv[0], argv); 44 execvp(argv[0], argv);
45 errval = errno; 45 errval = -errno;
46 printk("helper_child - execve of '%s' failed - errno = %d\n", argv[0], errno); 46 printk("helper_child - execve of '%s' failed - errno = %d\n", argv[0], errno);
47 os_write_file(data->fd, &errval, sizeof(errval)); 47 os_write_file(data->fd, &errval, sizeof(errval));
48 kill(os_getpid(), SIGKILL); 48 kill(os_getpid(), SIGKILL);
@@ -62,7 +62,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv,
62 stack = *stack_out; 62 stack = *stack_out;
63 else stack = alloc_stack(0, __cant_sleep()); 63 else stack = alloc_stack(0, __cant_sleep());
64 if(stack == 0) 64 if(stack == 0)
65 return(-ENOMEM); 65 return -ENOMEM;
66 66
67 ret = os_pipe(fds, 1, 0); 67 ret = os_pipe(fds, 1, 0);
68 if(ret < 0){ 68 if(ret < 0){
@@ -95,16 +95,16 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv,
95 /* Read the errno value from the child, if the exec failed, or get 0 if 95 /* Read the errno value from the child, if the exec failed, or get 0 if
96 * the exec succeeded because the pipe fd was set as close-on-exec. */ 96 * the exec succeeded because the pipe fd was set as close-on-exec. */
97 n = os_read_file(fds[0], &ret, sizeof(ret)); 97 n = os_read_file(fds[0], &ret, sizeof(ret));
98 if (n < 0) { 98 if(n == 0)
99 printk("run_helper : read on pipe failed, ret = %d\n", -n);
100 ret = n;
101 kill(pid, SIGKILL);
102 CATCH_EINTR(waitpid(pid, NULL, 0));
103 } else if(n != 0){
104 CATCH_EINTR(n = waitpid(pid, NULL, 0));
105 ret = -errno;
106 } else {
107 ret = pid; 99 ret = pid;
100 else {
101 if(n < 0){
102 printk("run_helper : read on pipe failed, ret = %d\n",
103 -n);
104 ret = n;
105 kill(pid, SIGKILL);
106 }
107 CATCH_EINTR(waitpid(pid, NULL, 0));
108 } 108 }
109 109
110out_close: 110out_close:
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index 7555bf9c33d9..a97206df5b52 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -132,7 +132,7 @@ void os_set_pollfd(int i, int fd)
132 132
133void os_set_ioignore(void) 133void os_set_ioignore(void)
134{ 134{
135 set_handler(SIGIO, SIG_IGN, 0, -1); 135 signal(SIGIO, SIG_IGN);
136} 136}
137 137
138void init_irq_signals(int on_sigstack) 138void init_irq_signals(int on_sigstack)
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 90912aaca7aa..d1c5670787dc 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -67,13 +67,32 @@ static __init void do_uml_initcalls(void)
67 67
68static void last_ditch_exit(int sig) 68static void last_ditch_exit(int sig)
69{ 69{
70 signal(SIGINT, SIG_DFL);
71 signal(SIGTERM, SIG_DFL);
72 signal(SIGHUP, SIG_DFL);
73 uml_cleanup(); 70 uml_cleanup();
74 exit(1); 71 exit(1);
75} 72}
76 73
74static void install_fatal_handler(int sig)
75{
76 struct sigaction action;
77
78 /* All signals are enabled in this handler ... */
79 sigemptyset(&action.sa_mask);
80
81 /* ... including the signal being handled, plus we want the
82 * handler reset to the default behavior, so that if an exit
83 * handler is hanging for some reason, the UML will just die
84 * after this signal is sent a second time.
85 */
86 action.sa_flags = SA_RESETHAND | SA_NODEFER;
87 action.sa_restorer = NULL;
88 action.sa_handler = last_ditch_exit;
89 if(sigaction(sig, &action, NULL) < 0){
90 printf("failed to install handler for signal %d - errno = %d\n",
91 errno);
92 exit(1);
93 }
94}
95
77#define UML_LIB_PATH ":/usr/lib/uml" 96#define UML_LIB_PATH ":/usr/lib/uml"
78 97
79static void setup_env_path(void) 98static void setup_env_path(void)
@@ -158,9 +177,12 @@ int main(int argc, char **argv, char **envp)
158 } 177 }
159 new_argv[argc] = NULL; 178 new_argv[argc] = NULL;
160 179
161 set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); 180 /* Allow these signals to bring down a UML if all other
162 set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); 181 * methods of control fail.
163 set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); 182 */
183 install_fatal_handler(SIGINT);
184 install_fatal_handler(SIGTERM);
185 install_fatal_handler(SIGHUP);
164 186
165 scan_elf_aux( envp); 187 scan_elf_aux( envp);
166 188
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 560c8063c77c..b170b4704dc4 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -114,14 +114,14 @@ static void which_tmpdir(void)
114 } 114 }
115 115
116 while(1){ 116 while(1){
117 found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), ' '); 117 found = next(fd, buf, ARRAY_SIZE(buf), ' ');
118 if(found != 1) 118 if(found != 1)
119 break; 119 break;
120 120
121 if(!strncmp(buf, "/dev/shm", strlen("/dev/shm"))) 121 if(!strncmp(buf, "/dev/shm", strlen("/dev/shm")))
122 goto found; 122 goto found;
123 123
124 found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), '\n'); 124 found = next(fd, buf, ARRAY_SIZE(buf), '\n');
125 if(found != 1) 125 if(found != 1)
126 break; 126 break;
127 } 127 }
@@ -135,7 +135,7 @@ err:
135 return; 135 return;
136 136
137found: 137found:
138 found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), ' '); 138 found = next(fd, buf, ARRAY_SIZE(buf), ' ');
139 if(found != 1) 139 if(found != 1)
140 goto err; 140 goto err;
141 141
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index b98d3ca2cd1b..ff203625a4bd 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -7,7 +7,6 @@
7#include <stdio.h> 7#include <stdio.h>
8#include <errno.h> 8#include <errno.h>
9#include <signal.h> 9#include <signal.h>
10#include <setjmp.h>
11#include <linux/unistd.h> 10#include <linux/unistd.h>
12#include <sys/mman.h> 11#include <sys/mman.h>
13#include <sys/wait.h> 12#include <sys/wait.h>
@@ -247,7 +246,17 @@ void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
247 set_sigstack(sig_stack, pages * page_size()); 246 set_sigstack(sig_stack, pages * page_size());
248 flags = SA_ONSTACK; 247 flags = SA_ONSTACK;
249 } 248 }
250 if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); 249 if(usr1_handler){
250 struct sigaction sa;
251
252 sa.sa_handler = usr1_handler;
253 sigemptyset(&sa.sa_mask);
254 sa.sa_flags = flags;
255 sa.sa_restorer = NULL;
256 if(sigaction(SIGUSR1, &sa, NULL) < 0)
257 panic("init_new_thread_stack - sigaction failed - "
258 "errno = %d\n", errno);
259 }
251} 260}
252 261
253void init_new_thread_signals(void) 262void init_new_thread_signals(void)
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 0ecac563c7b3..f6457765b17d 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -43,17 +43,9 @@ struct pollfds {
43/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread 43/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread
44 * synchronizes with it. 44 * synchronizes with it.
45 */ 45 */
46static struct pollfds current_poll = { 46static struct pollfds current_poll;
47 .poll = NULL, 47static struct pollfds next_poll;
48 .size = 0, 48static struct pollfds all_sigio_fds;
49 .used = 0
50};
51
52static struct pollfds next_poll = {
53 .poll = NULL,
54 .size = 0,
55 .used = 0
56};
57 49
58static int write_sigio_thread(void *unused) 50static int write_sigio_thread(void *unused)
59{ 51{
@@ -78,7 +70,8 @@ static int write_sigio_thread(void *unused)
78 n = os_read_file(sigio_private[1], &c, sizeof(c)); 70 n = os_read_file(sigio_private[1], &c, sizeof(c));
79 if(n != sizeof(c)) 71 if(n != sizeof(c))
80 printk("write_sigio_thread : " 72 printk("write_sigio_thread : "
81 "read failed, err = %d\n", -n); 73 "read on socket failed, "
74 "err = %d\n", -n);
82 tmp = current_poll; 75 tmp = current_poll;
83 current_poll = next_poll; 76 current_poll = next_poll;
84 next_poll = tmp; 77 next_poll = tmp;
@@ -93,35 +86,36 @@ static int write_sigio_thread(void *unused)
93 86
94 n = os_write_file(respond_fd, &c, sizeof(c)); 87 n = os_write_file(respond_fd, &c, sizeof(c));
95 if(n != sizeof(c)) 88 if(n != sizeof(c))
96 printk("write_sigio_thread : write failed, " 89 printk("write_sigio_thread : write on socket "
97 "err = %d\n", -n); 90 "failed, err = %d\n", -n);
98 } 91 }
99 } 92 }
100 93
101 return 0; 94 return 0;
102} 95}
103 96
104static int need_poll(int n) 97static int need_poll(struct pollfds *polls, int n)
105{ 98{
106 if(n <= next_poll.size){ 99 if(n <= polls->size){
107 next_poll.used = n; 100 polls->used = n;
108 return(0); 101 return 0;
109 } 102 }
110 kfree(next_poll.poll); 103 kfree(polls->poll);
111 next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); 104 polls->poll = um_kmalloc_atomic(n * sizeof(struct pollfd));
112 if(next_poll.poll == NULL){ 105 if(polls->poll == NULL){
113 printk("need_poll : failed to allocate new pollfds\n"); 106 printk("need_poll : failed to allocate new pollfds\n");
114 next_poll.size = 0; 107 polls->size = 0;
115 next_poll.used = 0; 108 polls->used = 0;
116 return(-1); 109 return -ENOMEM;
117 } 110 }
118 next_poll.size = n; 111 polls->size = n;
119 next_poll.used = n; 112 polls->used = n;
120 return(0); 113 return 0;
121} 114}
122 115
123/* Must be called with sigio_lock held, because it's needed by the marked 116/* Must be called with sigio_lock held, because it's needed by the marked
124 * critical section. */ 117 * critical section.
118 */
125static void update_thread(void) 119static void update_thread(void)
126{ 120{
127 unsigned long flags; 121 unsigned long flags;
@@ -156,34 +150,39 @@ static void update_thread(void)
156 set_signals(flags); 150 set_signals(flags);
157} 151}
158 152
159static int add_sigio_fd(int fd, int read) 153int add_sigio_fd(int fd)
160{ 154{
161 int err = 0, i, n, events; 155 struct pollfd *p;
156 int err = 0, i, n;
162 157
163 sigio_lock(); 158 sigio_lock();
159 for(i = 0; i < all_sigio_fds.used; i++){
160 if(all_sigio_fds.poll[i].fd == fd)
161 break;
162 }
163 if(i == all_sigio_fds.used)
164 goto out;
165
166 p = &all_sigio_fds.poll[i];
167
164 for(i = 0; i < current_poll.used; i++){ 168 for(i = 0; i < current_poll.used; i++){
165 if(current_poll.poll[i].fd == fd) 169 if(current_poll.poll[i].fd == fd)
166 goto out; 170 goto out;
167 } 171 }
168 172
169 n = current_poll.used + 1; 173 n = current_poll.used + 1;
170 err = need_poll(n); 174 err = need_poll(&next_poll, n);
171 if(err) 175 if(err)
172 goto out; 176 goto out;
173 177
174 for(i = 0; i < current_poll.used; i++) 178 for(i = 0; i < current_poll.used; i++)
175 next_poll.poll[i] = current_poll.poll[i]; 179 next_poll.poll[i] = current_poll.poll[i];
176 180
177 if(read) events = POLLIN; 181 next_poll.poll[n - 1] = *p;
178 else events = POLLOUT;
179
180 next_poll.poll[n - 1] = ((struct pollfd) { .fd = fd,
181 .events = events,
182 .revents = 0 });
183 update_thread(); 182 update_thread();
184 out: 183 out:
185 sigio_unlock(); 184 sigio_unlock();
186 return(err); 185 return err;
187} 186}
188 187
189int ignore_sigio_fd(int fd) 188int ignore_sigio_fd(int fd)
@@ -205,18 +204,14 @@ int ignore_sigio_fd(int fd)
205 if(i == current_poll.used) 204 if(i == current_poll.used)
206 goto out; 205 goto out;
207 206
208 err = need_poll(current_poll.used - 1); 207 err = need_poll(&next_poll, current_poll.used - 1);
209 if(err) 208 if(err)
210 goto out; 209 goto out;
211 210
212 for(i = 0; i < current_poll.used; i++){ 211 for(i = 0; i < current_poll.used; i++){
213 p = &current_poll.poll[i]; 212 p = &current_poll.poll[i];
214 if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i]; 213 if(p->fd != fd)
215 } 214 next_poll.poll[n++] = *p;
216 if(n == i){
217 printk("ignore_sigio_fd : fd %d not found\n", fd);
218 err = -1;
219 goto out;
220 } 215 }
221 216
222 update_thread(); 217 update_thread();
@@ -234,7 +229,7 @@ static struct pollfd *setup_initial_poll(int fd)
234 printk("setup_initial_poll : failed to allocate poll\n"); 229 printk("setup_initial_poll : failed to allocate poll\n");
235 return NULL; 230 return NULL;
236 } 231 }
237 *p = ((struct pollfd) { .fd = fd, 232 *p = ((struct pollfd) { .fd = fd,
238 .events = POLLIN, 233 .events = POLLIN,
239 .revents = 0 }); 234 .revents = 0 });
240 return p; 235 return p;
@@ -323,6 +318,8 @@ out_close1:
323 318
324void maybe_sigio_broken(int fd, int read) 319void maybe_sigio_broken(int fd, int read)
325{ 320{
321 int err;
322
326 if(!isatty(fd)) 323 if(!isatty(fd))
327 return; 324 return;
328 325
@@ -330,7 +327,19 @@ void maybe_sigio_broken(int fd, int read)
330 return; 327 return;
331 328
332 write_sigio_workaround(); 329 write_sigio_workaround();
333 add_sigio_fd(fd, read); 330
331 sigio_lock();
332 err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1);
333 if(err){
334 printk("maybe_sigio_broken - failed to add pollfd\n");
335 goto out;
336 }
337 all_sigio_fds.poll[all_sigio_fds.used++] =
338 ((struct pollfd) { .fd = fd,
339 .events = read ? POLLIN : POLLOUT,
340 .revents = 0 });
341out:
342 sigio_unlock();
334} 343}
335 344
336static void sigio_cleanup(void) 345static void sigio_cleanup(void)
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 60e4faedf254..6b81739279d1 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -15,7 +15,6 @@
15#include "user.h" 15#include "user.h"
16#include "signal_kern.h" 16#include "signal_kern.h"
17#include "sysdep/sigcontext.h" 17#include "sysdep/sigcontext.h"
18#include "sysdep/signal.h"
19#include "sigcontext.h" 18#include "sigcontext.h"
20#include "mode.h" 19#include "mode.h"
21#include "os.h" 20#include "os.h"
@@ -38,18 +37,10 @@
38static int signals_enabled = 1; 37static int signals_enabled = 1;
39static int pending = 0; 38static int pending = 0;
40 39
41void sig_handler(ARCH_SIGHDLR_PARAM) 40void sig_handler(int sig, struct sigcontext *sc)
42{ 41{
43 struct sigcontext *sc;
44 int enabled; 42 int enabled;
45 43
46 /* Must be the first thing that this handler does - x86_64 stores
47 * the sigcontext in %rdx, and we need to save it before it has a
48 * chance to get trashed.
49 */
50
51 ARCH_GET_SIGCONTEXT(sc, sig);
52
53 enabled = signals_enabled; 44 enabled = signals_enabled;
54 if(!enabled && (sig == SIGIO)){ 45 if(!enabled && (sig == SIGIO)){
55 pending |= SIGIO_MASK; 46 pending |= SIGIO_MASK;
@@ -64,15 +55,8 @@ void sig_handler(ARCH_SIGHDLR_PARAM)
64 set_signals(enabled); 55 set_signals(enabled);
65} 56}
66 57
67extern int timer_irq_inited;
68
69static void real_alarm_handler(int sig, struct sigcontext *sc) 58static void real_alarm_handler(int sig, struct sigcontext *sc)
70{ 59{
71 if(!timer_irq_inited){
72 signals_enabled = 1;
73 return;
74 }
75
76 if(sig == SIGALRM) 60 if(sig == SIGALRM)
77 switch_timers(0); 61 switch_timers(0);
78 62
@@ -84,13 +68,10 @@ static void real_alarm_handler(int sig, struct sigcontext *sc)
84 68
85} 69}
86 70
87void alarm_handler(ARCH_SIGHDLR_PARAM) 71void alarm_handler(int sig, struct sigcontext *sc)
88{ 72{
89 struct sigcontext *sc;
90 int enabled; 73 int enabled;
91 74
92 ARCH_GET_SIGCONTEXT(sc, sig);
93
94 enabled = signals_enabled; 75 enabled = signals_enabled;
95 if(!signals_enabled){ 76 if(!signals_enabled){
96 if(sig == SIGVTALRM) 77 if(sig == SIGVTALRM)
@@ -126,6 +107,10 @@ void remove_sigstack(void)
126 panic("disabling signal stack failed, errno = %d\n", errno); 107 panic("disabling signal stack failed, errno = %d\n", errno);
127} 108}
128 109
110void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
111
112extern void hard_handler(int sig);
113
129void set_handler(int sig, void (*handler)(int), int flags, ...) 114void set_handler(int sig, void (*handler)(int), int flags, ...)
130{ 115{
131 struct sigaction action; 116 struct sigaction action;
@@ -133,13 +118,16 @@ void set_handler(int sig, void (*handler)(int), int flags, ...)
133 sigset_t sig_mask; 118 sigset_t sig_mask;
134 int mask; 119 int mask;
135 120
136 va_start(ap, flags); 121 handlers[sig] = (void (*)(int, struct sigcontext *)) handler;
137 action.sa_handler = handler; 122 action.sa_handler = hard_handler;
123
138 sigemptyset(&action.sa_mask); 124 sigemptyset(&action.sa_mask);
139 while((mask = va_arg(ap, int)) != -1){ 125
126 va_start(ap, flags);
127 while((mask = va_arg(ap, int)) != -1)
140 sigaddset(&action.sa_mask, mask); 128 sigaddset(&action.sa_mask, mask);
141 }
142 va_end(ap); 129 va_end(ap);
130
143 action.sa_flags = flags; 131 action.sa_flags = flags;
144 action.sa_restorer = NULL; 132 action.sa_restorer = NULL;
145 if(sigaction(sig, &action, NULL) < 0) 133 if(sigaction(sig, &action, NULL) < 0)
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 7baf90fda58b..42e3d1ed802c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -8,7 +8,6 @@
8#include <unistd.h> 8#include <unistd.h>
9#include <errno.h> 9#include <errno.h>
10#include <signal.h> 10#include <signal.h>
11#include <setjmp.h>
12#include <sched.h> 11#include <sched.h>
13#include "ptrace_user.h" 12#include "ptrace_user.h"
14#include <sys/wait.h> 13#include <sys/wait.h>
@@ -156,11 +155,15 @@ extern int __syscall_stub_start;
156static int userspace_tramp(void *stack) 155static int userspace_tramp(void *stack)
157{ 156{
158 void *addr; 157 void *addr;
158 int err;
159 159
160 ptrace(PTRACE_TRACEME, 0, 0, 0); 160 ptrace(PTRACE_TRACEME, 0, 0, 0);
161 161
162 init_new_thread_signals(); 162 init_new_thread_signals();
163 enable_timer(); 163 err = set_interval(1);
164 if(err)
165 panic("userspace_tramp - setting timer failed, errno = %d\n",
166 err);
164 167
165 if(!proc_mm){ 168 if(!proc_mm){
166 /* This has a pte, but it can't be mapped in with the usual 169 /* This has a pte, but it can't be mapped in with the usual
@@ -190,14 +193,25 @@ static int userspace_tramp(void *stack)
190 } 193 }
191 } 194 }
192 if(!ptrace_faultinfo && (stack != NULL)){ 195 if(!ptrace_faultinfo && (stack != NULL)){
196 struct sigaction sa;
197
193 unsigned long v = UML_CONFIG_STUB_CODE + 198 unsigned long v = UML_CONFIG_STUB_CODE +
194 (unsigned long) stub_segv_handler - 199 (unsigned long) stub_segv_handler -
195 (unsigned long) &__syscall_stub_start; 200 (unsigned long) &__syscall_stub_start;
196 201
197 set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size()); 202 set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size());
198 set_handler(SIGSEGV, (void *) v, SA_ONSTACK, 203 sigemptyset(&sa.sa_mask);
199 SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, 204 sigaddset(&sa.sa_mask, SIGIO);
200 SIGUSR1, -1); 205 sigaddset(&sa.sa_mask, SIGWINCH);
206 sigaddset(&sa.sa_mask, SIGALRM);
207 sigaddset(&sa.sa_mask, SIGVTALRM);
208 sigaddset(&sa.sa_mask, SIGUSR1);
209 sa.sa_flags = SA_ONSTACK;
210 sa.sa_handler = (void *) v;
211 sa.sa_restorer = NULL;
212 if(sigaction(SIGSEGV, &sa, NULL) < 0)
213 panic("userspace_tramp - setting SIGSEGV handler "
214 "failed - errno = %d\n", errno);
201 } 215 }
202 216
203 os_stop_process(os_getpid()); 217 os_stop_process(os_getpid());
@@ -470,7 +484,7 @@ void thread_wait(void *sw, void *fb)
470 *switch_buf = &buf; 484 *switch_buf = &buf;
471 fork_buf = fb; 485 fork_buf = fb;
472 if(UML_SETJMP(&buf) == 0) 486 if(UML_SETJMP(&buf) == 0)
473 siglongjmp(*fork_buf, INIT_JMP_REMOVE_SIGSTACK); 487 UML_LONGJMP(fork_buf, INIT_JMP_REMOVE_SIGSTACK);
474} 488}
475 489
476void switch_threads(void *me, void *next) 490void switch_threads(void *me, void *next)
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 503148504009..7fe92680c7dd 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -14,7 +14,6 @@
14#include <sched.h> 14#include <sched.h>
15#include <fcntl.h> 15#include <fcntl.h>
16#include <errno.h> 16#include <errno.h>
17#include <setjmp.h>
18#include <sys/time.h> 17#include <sys/time.h>
19#include <sys/wait.h> 18#include <sys/wait.h>
20#include <sys/mman.h> 19#include <sys/mman.h>
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index b3213613c41c..37806621b25d 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -3,7 +3,7 @@
3# Licensed under the GPL 3# Licensed under the GPL
4# 4#
5 5
6obj-$(CONFIG_MODE_SKAS) = registers.o tls.o 6obj-$(CONFIG_MODE_SKAS) = registers.o signal.o tls.o
7 7
8USER_OBJS := $(obj-y) 8USER_OBJS := $(obj-y)
9 9
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c
index 516f66dd87e3..7cd0369e02b3 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/um/os-Linux/sys-i386/registers.c
@@ -5,12 +5,12 @@
5 5
6#include <errno.h> 6#include <errno.h>
7#include <string.h> 7#include <string.h>
8#include <setjmp.h>
9#include "sysdep/ptrace_user.h" 8#include "sysdep/ptrace_user.h"
10#include "sysdep/ptrace.h" 9#include "sysdep/ptrace.h"
11#include "uml-config.h" 10#include "uml-config.h"
12#include "skas_ptregs.h" 11#include "skas_ptregs.h"
13#include "registers.h" 12#include "registers.h"
13#include "longjmp.h"
14#include "user.h" 14#include "user.h"
15 15
16/* These are set once at boot time and not changed thereafter */ 16/* These are set once at boot time and not changed thereafter */
@@ -130,11 +130,14 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
130 HOST_FP_SIZE * sizeof(unsigned long)); 130 HOST_FP_SIZE * sizeof(unsigned long));
131} 131}
132 132
133void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer) 133unsigned long get_thread_reg(int reg, jmp_buf *buf)
134{ 134{
135 struct __jmp_buf_tag *jmpbuf = buffer; 135 switch(reg){
136 136 case EIP: return buf[0]->__eip;
137 UPT_SET(uml_regs, EIP, jmpbuf->__jmpbuf[JB_PC]); 137 case UESP: return buf[0]->__esp;
138 UPT_SET(uml_regs, UESP, jmpbuf->__jmpbuf[JB_SP]); 138 case EBP: return buf[0]->__ebp;
139 UPT_SET(uml_regs, EBP, jmpbuf->__jmpbuf[JB_BP]); 139 default:
140 printk("get_thread_regs - unknown register %d\n", reg);
141 return 0;
142 }
140} 143}
diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c
new file mode 100644
index 000000000000..0d3eae518352
--- /dev/null
+++ b/arch/um/os-Linux/sys-i386/signal.c
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <signal.h>
7
8extern void (*handlers[])(int sig, struct sigcontext *sc);
9
10void hard_handler(int sig)
11{
12 struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
13
14 (*handlers[sig])(sig, sc);
15}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
index 340ef26f5944..f67842a7735b 100644
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ b/arch/um/os-Linux/sys-x86_64/Makefile
@@ -3,7 +3,7 @@
3# Licensed under the GPL 3# Licensed under the GPL
4# 4#
5 5
6obj-$(CONFIG_MODE_SKAS) = registers.o 6obj-$(CONFIG_MODE_SKAS) = registers.o signal.o
7 7
8USER_OBJS := $(obj-y) 8USER_OBJS := $(obj-y)
9 9
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
index becd898d9398..cb8e8a263280 100644
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ b/arch/um/os-Linux/sys-x86_64/registers.c
@@ -5,11 +5,11 @@
5 5
6#include <errno.h> 6#include <errno.h>
7#include <string.h> 7#include <string.h>
8#include <setjmp.h>
9#include "ptrace_user.h" 8#include "ptrace_user.h"
10#include "uml-config.h" 9#include "uml-config.h"
11#include "skas_ptregs.h" 10#include "skas_ptregs.h"
12#include "registers.h" 11#include "registers.h"
12#include "longjmp.h"
13#include "user.h" 13#include "user.h"
14 14
15/* These are set once at boot time and not changed thereafter */ 15/* These are set once at boot time and not changed thereafter */
@@ -78,11 +78,14 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
78 HOST_FP_SIZE * sizeof(unsigned long)); 78 HOST_FP_SIZE * sizeof(unsigned long));
79} 79}
80 80
81void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer) 81unsigned long get_thread_reg(int reg, jmp_buf *buf)
82{ 82{
83 struct __jmp_buf_tag *jmpbuf = buffer; 83 switch(reg){
84 84 case RIP: return buf[0]->__rip;
85 UPT_SET(uml_regs, RIP, jmpbuf->__jmpbuf[JB_PC]); 85 case RSP: return buf[0]->__rsp;
86 UPT_SET(uml_regs, RSP, jmpbuf->__jmpbuf[JB_RSP]); 86 case RBP: return buf[0]->__rbp;
87 UPT_SET(uml_regs, RBP, jmpbuf->__jmpbuf[JB_RBP]); 87 default:
88 printk("get_thread_regs - unknown register %d\n", reg);
89 return 0;
90 }
88} 91}
diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c
new file mode 100644
index 000000000000..3f369e5f976b
--- /dev/null
+++ b/arch/um/os-Linux/sys-x86_64/signal.c
@@ -0,0 +1,16 @@
1/*
2 * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <signal.h>
7
8extern void (*handlers[])(int sig, struct sigcontext *sc);
9
10void hard_handler(int sig)
11{
12 struct ucontext *uc;
13 asm("movq %%rdx, %0" : "=r" (uc));
14
15 (*handlers[sig])(sig, (struct sigcontext *) &uc->uc_mcontext);
16}
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 4ae73c0e5485..38be096e750f 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -17,20 +17,25 @@
17#include "kern_constants.h" 17#include "kern_constants.h"
18#include "os.h" 18#include "os.h"
19 19
20static void set_interval(int timer_type) 20int set_interval(int is_virtual)
21{ 21{
22 int usec = 1000000/hz(); 22 int usec = 1000000/hz();
23 int timer_type = is_virtual ? ITIMER_VIRTUAL : ITIMER_REAL;
23 struct itimerval interval = ((struct itimerval) { { 0, usec }, 24 struct itimerval interval = ((struct itimerval) { { 0, usec },
24 { 0, usec } }); 25 { 0, usec } });
25 26
26 if(setitimer(timer_type, &interval, NULL) == -1) 27 if(setitimer(timer_type, &interval, NULL) == -1)
27 panic("setitimer failed - errno = %d\n", errno); 28 return -errno;
29
30 return 0;
28} 31}
29 32
33#ifdef CONFIG_MODE_TT
30void enable_timer(void) 34void enable_timer(void)
31{ 35{
32 set_interval(ITIMER_VIRTUAL); 36 set_interval(1);
33} 37}
38#endif
34 39
35void disable_timer(void) 40void disable_timer(void)
36{ 41{
@@ -40,8 +45,8 @@ void disable_timer(void)
40 printk("disnable_timer - setitimer failed, errno = %d\n", 45 printk("disnable_timer - setitimer failed, errno = %d\n",
41 errno); 46 errno);
42 /* If there are signals already queued, after unblocking ignore them */ 47 /* If there are signals already queued, after unblocking ignore them */
43 set_handler(SIGALRM, SIG_IGN, 0, -1); 48 signal(SIGALRM, SIG_IGN);
44 set_handler(SIGVTALRM, SIG_IGN, 0, -1); 49 signal(SIGVTALRM, SIG_IGN);
45} 50}
46 51
47void switch_timers(int to_real) 52void switch_timers(int to_real)
@@ -74,7 +79,7 @@ void uml_idle_timer(void)
74 79
75 set_handler(SIGALRM, (__sighandler_t) alarm_handler, 80 set_handler(SIGALRM, (__sighandler_t) alarm_handler,
76 SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); 81 SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
77 set_interval(ITIMER_REAL); 82 set_interval(0);
78} 83}
79#endif 84#endif
80 85
@@ -94,8 +99,3 @@ void idle_sleep(int secs)
94 ts.tv_nsec = 0; 99 ts.tv_nsec = 0;
95 nanosleep(&ts, NULL); 100 nanosleep(&ts, NULL);
96} 101}
97
98void user_time_init(void)
99{
100 set_interval(ITIMER_VIRTUAL);
101}
diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
index 90b29ae9af46..1df231a26244 100644
--- a/arch/um/os-Linux/trap.c
+++ b/arch/um/os-Linux/trap.c
@@ -5,7 +5,6 @@
5 5
6#include <stdlib.h> 6#include <stdlib.h>
7#include <signal.h> 7#include <signal.h>
8#include <setjmp.h>
9#include "kern_util.h" 8#include "kern_util.h"
10#include "user_util.h" 9#include "user_util.h"
11#include "os.h" 10#include "os.h"
diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c
index 865f6a6a2590..bbb73a650370 100644
--- a/arch/um/os-Linux/uaccess.c
+++ b/arch/um/os-Linux/uaccess.c
@@ -4,8 +4,7 @@
4 * Licensed under the GPL 4 * Licensed under the GPL
5 */ 5 */
6 6
7#include <setjmp.h> 7#include <stddef.h>
8#include <string.h>
9#include "longjmp.h" 8#include "longjmp.h"
10 9
11unsigned long __do_user_copy(void *to, const void *from, int n, 10unsigned long __do_user_copy(void *to, const void *from, int n,
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index c47a2a7ce70e..3f5b1514e8a7 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -7,7 +7,6 @@
7#include <stdlib.h> 7#include <stdlib.h>
8#include <unistd.h> 8#include <unistd.h>
9#include <limits.h> 9#include <limits.h>
10#include <setjmp.h>
11#include <sys/mman.h> 10#include <sys/mman.h>
12#include <sys/stat.h> 11#include <sys/stat.h>
13#include <sys/utsname.h> 12#include <sys/utsname.h>
@@ -107,11 +106,11 @@ int setjmp_wrapper(void (*proc)(void *, void *), ...)
107 jmp_buf buf; 106 jmp_buf buf;
108 int n; 107 int n;
109 108
110 n = sigsetjmp(buf, 1); 109 n = UML_SETJMP(&buf);
111 if(n == 0){ 110 if(n == 0){
112 va_start(args, proc); 111 va_start(args, proc);
113 (*proc)(&buf, &args); 112 (*proc)(&buf, &args);
114 } 113 }
115 va_end(args); 114 va_end(args);
116 return(n); 115 return n;
117} 116}
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 374d61a19439..59cc70275754 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -1,5 +1,5 @@
1obj-y = bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ 1obj-y = bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
2 ptrace_user.o signal.o sigcontext.o syscalls.o sysrq.o \ 2 ptrace_user.o setjmp.o signal.o sigcontext.o syscalls.o sysrq.o \
3 sys_call_table.o tls.o 3 sys_call_table.o tls.o
4 4
5obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o 5obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
index 41b0ab2fe830..f1bcd399ac90 100644
--- a/arch/um/sys-i386/bugs.c
+++ b/arch/um/sys-i386/bugs.c
@@ -13,6 +13,7 @@
13#include "sysdep/ptrace.h" 13#include "sysdep/ptrace.h"
14#include "task.h" 14#include "task.h"
15#include "os.h" 15#include "os.h"
16#include "user_util.h"
16 17
17#define MAXTOKEN 64 18#define MAXTOKEN 64
18 19
@@ -104,17 +105,17 @@ int cpu_feature(char *what, char *buf, int len)
104static int check_cpu_flag(char *feature, int *have_it) 105static int check_cpu_flag(char *feature, int *have_it)
105{ 106{
106 char buf[MAXTOKEN], c; 107 char buf[MAXTOKEN], c;
107 int fd, len = sizeof(buf)/sizeof(buf[0]); 108 int fd, len = ARRAY_SIZE(buf);
108 109
109 printk("Checking for host processor %s support...", feature); 110 printk("Checking for host processor %s support...", feature);
110 fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); 111 fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0);
111 if(fd < 0){ 112 if(fd < 0){
112 printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); 113 printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd);
113 return(0); 114 return 0;
114 } 115 }
115 116
116 *have_it = 0; 117 *have_it = 0;
117 if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0]))) 118 if(!find_cpuinfo_line(fd, "flags", buf, ARRAY_SIZE(buf)))
118 goto out; 119 goto out;
119 120
120 c = token(fd, buf, len - 1, ' '); 121 c = token(fd, buf, len - 1, ' ');
@@ -138,7 +139,7 @@ static int check_cpu_flag(char *feature, int *have_it)
138 if(*have_it == 0) printk("No\n"); 139 if(*have_it == 0) printk("No\n");
139 else if(*have_it == 1) printk("Yes\n"); 140 else if(*have_it == 1) printk("Yes\n");
140 os_close_file(fd); 141 os_close_file(fd);
141 return(1); 142 return 1;
142} 143}
143 144
144#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems 145#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index fe0877b3509c..69971b78beaf 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -424,9 +424,8 @@ void ldt_get_host_info(void)
424 size++; 424 size++;
425 } 425 }
426 426
427 if(size < sizeof(dummy_list)/sizeof(dummy_list[0])) { 427 if(size < ARRAY_SIZE(dummy_list))
428 host_ldt_entries = dummy_list; 428 host_ldt_entries = dummy_list;
429 }
430 else { 429 else {
431 size = (size + 1) * sizeof(dummy_list[0]); 430 size = (size + 1) * sizeof(dummy_list[0]);
432 host_ldt_entries = (short *)kmalloc(size, GFP_KERNEL); 431 host_ldt_entries = (short *)kmalloc(size, GFP_KERNEL);
diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c
index 40aa88531446..5f3cc6685820 100644
--- a/arch/um/sys-i386/ptrace_user.c
+++ b/arch/um/sys-i386/ptrace_user.c
@@ -15,6 +15,7 @@
15#include "user.h" 15#include "user.h"
16#include "os.h" 16#include "os.h"
17#include "uml-config.h" 17#include "uml-config.h"
18#include "user_util.h"
18 19
19int ptrace_getregs(long pid, unsigned long *regs_out) 20int ptrace_getregs(long pid, unsigned long *regs_out)
20{ 21{
@@ -51,7 +52,7 @@ static void write_debugregs(int pid, unsigned long *regs)
51 int nregs, i; 52 int nregs, i;
52 53
53 dummy = NULL; 54 dummy = NULL;
54 nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); 55 nregs = ARRAY_SIZE(dummy->u_debugreg);
55 for(i = 0; i < nregs; i++){ 56 for(i = 0; i < nregs; i++){
56 if((i == 4) || (i == 5)) continue; 57 if((i == 4) || (i == 5)) continue;
57 if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i], 58 if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i],
@@ -68,7 +69,7 @@ static void read_debugregs(int pid, unsigned long *regs)
68 int nregs, i; 69 int nregs, i;
69 70
70 dummy = NULL; 71 dummy = NULL;
71 nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); 72 nregs = ARRAY_SIZE(dummy->u_debugreg);
72 for(i = 0; i < nregs; i++){ 73 for(i = 0; i < nregs; i++){
73 regs[i] = ptrace(PTRACE_PEEKUSR, pid, 74 regs[i] = ptrace(PTRACE_PEEKUSR, pid,
74 &dummy->u_debugreg[i], 0); 75 &dummy->u_debugreg[i], 0);
diff --git a/arch/um/sys-i386/setjmp.S b/arch/um/sys-i386/setjmp.S
new file mode 100644
index 000000000000..b766792c9933
--- /dev/null
+++ b/arch/um/sys-i386/setjmp.S
@@ -0,0 +1,58 @@
1#
2# arch/i386/setjmp.S
3#
4# setjmp/longjmp for the i386 architecture
5#
6
7#
8# The jmp_buf is assumed to contain the following, in order:
9# %ebx
10# %esp
11# %ebp
12# %esi
13# %edi
14# <return address>
15#
16
17 .text
18 .align 4
19 .globl setjmp
20 .type setjmp, @function
21setjmp:
22#ifdef _REGPARM
23 movl %eax,%edx
24#else
25 movl 4(%esp),%edx
26#endif
27 popl %ecx # Return address, and adjust the stack
28 xorl %eax,%eax # Return value
29 movl %ebx,(%edx)
30 movl %esp,4(%edx) # Post-return %esp!
31 pushl %ecx # Make the call/return stack happy
32 movl %ebp,8(%edx)
33 movl %esi,12(%edx)
34 movl %edi,16(%edx)
35 movl %ecx,20(%edx) # Return address
36 ret
37
38 .size setjmp,.-setjmp
39
40 .text
41 .align 4
42 .globl longjmp
43 .type longjmp, @function
44longjmp:
45#ifdef _REGPARM
46 xchgl %eax,%edx
47#else
48 movl 4(%esp),%edx # jmp_ptr address
49 movl 8(%esp),%eax # Return value
50#endif
51 movl (%edx),%ebx
52 movl 4(%edx),%esp
53 movl 8(%edx),%ebp
54 movl 12(%edx),%esi
55 movl 16(%edx),%edi
56 jmp *20(%edx)
57
58 .size longjmp,.-longjmp
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index c19794d435d6..f41768b8e25e 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -5,8 +5,8 @@
5# 5#
6 6
7obj-y = bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ 7obj-y = bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
8 sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o ksyms.o \ 8 setjmp.o sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o \
9 tls.o 9 ksyms.o tls.o
10 10
11obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o 11obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
12obj-$(CONFIG_MODULES) += um_module.o 12obj-$(CONFIG_MODULES) += um_module.o
diff --git a/arch/um/sys-x86_64/setjmp.S b/arch/um/sys-x86_64/setjmp.S
new file mode 100644
index 000000000000..45f547b4043e
--- /dev/null
+++ b/arch/um/sys-x86_64/setjmp.S
@@ -0,0 +1,54 @@
1#
2# arch/x86_64/setjmp.S
3#
4# setjmp/longjmp for the x86-64 architecture
5#
6
7#
8# The jmp_buf is assumed to contain the following, in order:
9# %rbx
10# %rsp (post-return)
11# %rbp
12# %r12
13# %r13
14# %r14
15# %r15
16# <return address>
17#
18
19 .text
20 .align 4
21 .globl setjmp
22 .type setjmp, @function
23setjmp:
24 pop %rsi # Return address, and adjust the stack
25 xorl %eax,%eax # Return value
26 movq %rbx,(%rdi)
27 movq %rsp,8(%rdi) # Post-return %rsp!
28 push %rsi # Make the call/return stack happy
29 movq %rbp,16(%rdi)
30 movq %r12,24(%rdi)
31 movq %r13,32(%rdi)
32 movq %r14,40(%rdi)
33 movq %r15,48(%rdi)
34 movq %rsi,56(%rdi) # Return address
35 ret
36
37 .size setjmp,.-setjmp
38
39 .text
40 .align 4
41 .globl longjmp
42 .type longjmp, @function
43longjmp:
44 movl %esi,%eax # Return value (int)
45 movq (%rdi),%rbx
46 movq 8(%rdi),%rsp
47 movq 16(%rdi),%rbp
48 movq 24(%rdi),%r12
49 movq 32(%rdi),%r13
50 movq 40(%rdi),%r14
51 movq 48(%rdi),%r15
52 jmp *56(%rdi)
53
54 .size longjmp,.-longjmp
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 6cd4878625f1..581ce9af0ec8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,10 @@ config X86
24 bool 24 bool
25 default y 25 default y
26 26
27config ZONE_DMA32
28 bool
29 default y
30
27config LOCKDEP_SUPPORT 31config LOCKDEP_SUPPORT
28 bool 32 bool
29 default y 33 default y
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index d6d7f731f6f0..708a3cd9a27e 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -16,6 +16,7 @@
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/kexec.h> 17#include <linux/kexec.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/mm.h>
19 20
20#include <asm/pgtable.h> 21#include <asm/pgtable.h>
21#include <asm/page.h> 22#include <asm/page.h>
@@ -297,6 +298,53 @@ void __init e820_reserve_resources(void)
297 } 298 }
298} 299}
299 300
301/* Mark pages corresponding to given address range as nosave */
302static void __init
303e820_mark_nosave_range(unsigned long start, unsigned long end)
304{
305 unsigned long pfn, max_pfn;
306
307 if (start >= end)
308 return;
309
310 printk("Nosave address range: %016lx - %016lx\n", start, end);
311 max_pfn = end >> PAGE_SHIFT;
312 for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
313 if (pfn_valid(pfn))
314 SetPageNosave(pfn_to_page(pfn));
315}
316
317/*
318 * Find the ranges of physical addresses that do not correspond to
319 * e820 RAM areas and mark the corresponding pages as nosave for software
320 * suspend and suspend to RAM.
321 *
322 * This function requires the e820 map to be sorted and without any
323 * overlapping entries and assumes the first e820 area to be RAM.
324 */
325void __init e820_mark_nosave_regions(void)
326{
327 int i;
328 unsigned long paddr;
329
330 paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
331 for (i = 1; i < e820.nr_map; i++) {
332 struct e820entry *ei = &e820.map[i];
333
334 if (paddr < ei->addr)
335 e820_mark_nosave_range(paddr,
336 round_up(ei->addr, PAGE_SIZE));
337
338 paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
339 if (ei->type != E820_RAM)
340 e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
341 paddr);
342
343 if (paddr >= (end_pfn << PAGE_SHIFT))
344 break;
345 }
346}
347
300/* 348/*
301 * Add a memory region to the kernel e820 map. 349 * Add a memory region to the kernel e820 map.
302 */ 350 */
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 34afad704824..4b39f0da17f3 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -689,6 +689,7 @@ void __init setup_arch(char **cmdline_p)
689 */ 689 */
690 probe_roms(); 690 probe_roms();
691 e820_reserve_resources(); 691 e820_reserve_resources();
692 e820_mark_nosave_regions();
692 693
693 request_resource(&iomem_resource, &video_ram_resource); 694 request_resource(&iomem_resource, &video_ram_resource);
694 695
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 975380207b46..3ae9ffddddc0 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -46,9 +46,10 @@
46#include <linux/bootmem.h> 46#include <linux/bootmem.h>
47#include <linux/thread_info.h> 47#include <linux/thread_info.h>
48#include <linux/module.h> 48#include <linux/module.h>
49
50#include <linux/delay.h> 49#include <linux/delay.h>
51#include <linux/mc146818rtc.h> 50#include <linux/mc146818rtc.h>
51#include <linux/smp.h>
52
52#include <asm/mtrr.h> 53#include <asm/mtrr.h>
53#include <asm/pgalloc.h> 54#include <asm/pgalloc.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index 320b6fb00cca..bfbe00763c68 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -54,7 +54,7 @@ ENTRY(restore_image)
54 movq %rcx, %cr3; 54 movq %rcx, %cr3;
55 movq %rax, %cr4; # turn PGE back on 55 movq %rax, %cr4; # turn PGE back on
56 56
57 movq pagedir_nosave(%rip), %rdx 57 movq restore_pblist(%rip), %rdx
58loop: 58loop:
59 testq %rdx, %rdx 59 testq %rdx, %rdx
60 jz done 60 jz done
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7a9b18224182..7700e6cd2bd9 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -1148,23 +1148,25 @@ int hpet_rtc_timer_init(void)
1148 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; 1148 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1149 1149
1150 local_irq_save(flags); 1150 local_irq_save(flags);
1151
1151 cnt = hpet_readl(HPET_COUNTER); 1152 cnt = hpet_readl(HPET_COUNTER);
1152 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); 1153 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
1153 hpet_writel(cnt, HPET_T1_CMP); 1154 hpet_writel(cnt, HPET_T1_CMP);
1154 hpet_t1_cmp = cnt; 1155 hpet_t1_cmp = cnt;
1155 local_irq_restore(flags);
1156 1156
1157 cfg = hpet_readl(HPET_T1_CFG); 1157 cfg = hpet_readl(HPET_T1_CFG);
1158 cfg &= ~HPET_TN_PERIODIC; 1158 cfg &= ~HPET_TN_PERIODIC;
1159 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; 1159 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
1160 hpet_writel(cfg, HPET_T1_CFG); 1160 hpet_writel(cfg, HPET_T1_CFG);
1161 1161
1162 local_irq_restore(flags);
1163
1162 return 1; 1164 return 1;
1163} 1165}
1164 1166
1165static void hpet_rtc_timer_reinit(void) 1167static void hpet_rtc_timer_reinit(void)
1166{ 1168{
1167 unsigned int cfg, cnt; 1169 unsigned int cfg, cnt, ticks_per_int, lost_ints;
1168 1170
1169 if (unlikely(!(PIE_on | AIE_on | UIE_on))) { 1171 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
1170 cfg = hpet_readl(HPET_T1_CFG); 1172 cfg = hpet_readl(HPET_T1_CFG);
@@ -1179,10 +1181,33 @@ static void hpet_rtc_timer_reinit(void)
1179 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; 1181 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1180 1182
1181 /* It is more accurate to use the comparator value than current count.*/ 1183 /* It is more accurate to use the comparator value than current count.*/
1182 cnt = hpet_t1_cmp; 1184 ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
1183 cnt += hpet_tick*HZ/hpet_rtc_int_freq; 1185 hpet_t1_cmp += ticks_per_int;
1184 hpet_writel(cnt, HPET_T1_CMP); 1186 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1185 hpet_t1_cmp = cnt; 1187
1188 /*
1189 * If the interrupt handler was delayed too long, the write above tries
1190 * to schedule the next interrupt in the past and the hardware would
1191 * not interrupt until the counter had wrapped around.
1192 * So we have to check that the comparator wasn't set to a past time.
1193 */
1194 cnt = hpet_readl(HPET_COUNTER);
1195 if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
1196 lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
1197 /* Make sure that, even with the time needed to execute
1198 * this code, the next scheduled interrupt has been moved
1199 * back to the future: */
1200 lost_ints++;
1201
1202 hpet_t1_cmp += lost_ints * ticks_per_int;
1203 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1204
1205 if (PIE_on)
1206 PIE_count += lost_ints;
1207
1208 printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
1209 hpet_rtc_int_freq);
1210 }
1186} 1211}
1187 1212
1188/* 1213/*
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index ac8ea66ccb94..4198798e1469 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -299,7 +299,7 @@ static int vmalloc_fault(unsigned long address)
299 if (pgd_none(*pgd)) 299 if (pgd_none(*pgd))
300 set_pgd(pgd, *pgd_ref); 300 set_pgd(pgd, *pgd_ref);
301 else 301 else
302 BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); 302 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
303 303
304 /* Below here mismatches are bugs because these lower tables 304 /* Below here mismatches are bugs because these lower tables
305 are shared */ 305 are shared */
@@ -308,7 +308,7 @@ static int vmalloc_fault(unsigned long address)
308 pud_ref = pud_offset(pgd_ref, address); 308 pud_ref = pud_offset(pgd_ref, address);
309 if (pud_none(*pud_ref)) 309 if (pud_none(*pud_ref))
310 return -1; 310 return -1;
311 if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref)) 311 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
312 BUG(); 312 BUG();
313 pmd = pmd_offset(pud, address); 313 pmd = pmd_offset(pud, address);
314 pmd_ref = pmd_offset(pud_ref, address); 314 pmd_ref = pmd_offset(pud_ref, address);
@@ -641,7 +641,7 @@ void vmalloc_sync_all(void)
641 if (pgd_none(*pgd)) 641 if (pgd_none(*pgd))
642 set_pgd(pgd, *pgd_ref); 642 set_pgd(pgd, *pgd_ref);
643 else 643 else
644 BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); 644 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
645 } 645 }
646 spin_unlock(&pgd_lock); 646 spin_unlock(&pgd_lock);
647 set_bit(pgd_index(address), insync); 647 set_bit(pgd_index(address), insync);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index d14fb2dfbfc4..52fd42c40c86 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -536,7 +536,7 @@ int memory_add_physaddr_to_nid(u64 start)
536int arch_add_memory(int nid, u64 start, u64 size) 536int arch_add_memory(int nid, u64 start, u64 size)
537{ 537{
538 struct pglist_data *pgdat = NODE_DATA(nid); 538 struct pglist_data *pgdat = NODE_DATA(nid);
539 struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; 539 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
540 unsigned long start_pfn = start >> PAGE_SHIFT; 540 unsigned long start_pfn = start >> PAGE_SHIFT;
541 unsigned long nr_pages = size >> PAGE_SHIFT; 541 unsigned long nr_pages = size >> PAGE_SHIFT;
542 int ret; 542 int ret;
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index ab2ecccf7798..ffa111eea9da 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -851,7 +851,7 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev)
851 * @ap: Port whose timings we are configuring 851 * @ap: Port whose timings we are configuring
852 * @adev: Drive in question 852 * @adev: Drive in question
853 * @udma: udma mode, 0 - 6 853 * @udma: udma mode, 0 - 6
854 * @is_ich: set if the chip is an ICH device 854 * @isich: set if the chip is an ICH device
855 * 855 *
856 * Set UDMA mode for device, in host controller PCI config space. 856 * Set UDMA mode for device, in host controller PCI config space.
857 * 857 *
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 27c22feebf30..8cd730fe5dd3 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -484,7 +484,7 @@ static void nv_error_handler(struct ata_port *ap)
484static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) 484static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
485{ 485{
486 static int printed_version = 0; 486 static int printed_version = 0;
487 struct ata_port_info *ppi; 487 struct ata_port_info *ppi[2];
488 struct ata_probe_ent *probe_ent; 488 struct ata_probe_ent *probe_ent;
489 int pci_dev_busy = 0; 489 int pci_dev_busy = 0;
490 int rc; 490 int rc;
@@ -520,8 +520,8 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
520 520
521 rc = -ENOMEM; 521 rc = -ENOMEM;
522 522
523 ppi = &nv_port_info[ent->driver_data]; 523 ppi[0] = ppi[1] = &nv_port_info[ent->driver_data];
524 probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); 524 probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
525 if (!probe_ent) 525 if (!probe_ent)
526 goto err_out_regions; 526 goto err_out_regions;
527 527
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index 9b17375d8056..18d49fff8dc4 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -240,7 +240,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
240 struct ata_probe_ent *probe_ent = NULL; 240 struct ata_probe_ent *probe_ent = NULL;
241 int rc; 241 int rc;
242 u32 genctl; 242 u32 genctl;
243 struct ata_port_info *ppi; 243 struct ata_port_info *ppi[2];
244 int pci_dev_busy = 0; 244 int pci_dev_busy = 0;
245 u8 pmr; 245 u8 pmr;
246 u8 port2_start; 246 u8 port2_start;
@@ -265,8 +265,8 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
265 if (rc) 265 if (rc)
266 goto err_out_regions; 266 goto err_out_regions;
267 267
268 ppi = &sis_port_info; 268 ppi[0] = ppi[1] = &sis_port_info;
269 probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); 269 probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
270 if (!probe_ent) { 270 if (!probe_ent) {
271 rc = -ENOMEM; 271 rc = -ENOMEM;
272 goto err_out_regions; 272 goto err_out_regions;
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index 8fc6e800011a..dd76f37be182 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -185,7 +185,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
185{ 185{
186 static int printed_version; 186 static int printed_version;
187 struct ata_probe_ent *probe_ent; 187 struct ata_probe_ent *probe_ent;
188 struct ata_port_info *ppi; 188 struct ata_port_info *ppi[2];
189 int rc; 189 int rc;
190 unsigned int board_idx = (unsigned int) ent->driver_data; 190 unsigned int board_idx = (unsigned int) ent->driver_data;
191 int pci_dev_busy = 0; 191 int pci_dev_busy = 0;
@@ -211,8 +211,8 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
211 if (rc) 211 if (rc)
212 goto err_out_regions; 212 goto err_out_regions;
213 213
214 ppi = &uli_port_info; 214 ppi[0] = ppi[1] = &uli_port_info;
215 probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); 215 probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
216 if (!probe_ent) { 216 if (!probe_ent) {
217 rc = -ENOMEM; 217 rc = -ENOMEM;
218 goto err_out_regions; 218 goto err_out_regions;
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 7f087aef99de..a72a2389a11c 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -318,9 +318,10 @@ static void vt6421_init_addrs(struct ata_probe_ent *probe_ent,
318static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev) 318static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev)
319{ 319{
320 struct ata_probe_ent *probe_ent; 320 struct ata_probe_ent *probe_ent;
321 struct ata_port_info *ppi = &vt6420_port_info; 321 struct ata_port_info *ppi[2];
322 322
323 probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY); 323 ppi[0] = ppi[1] = &vt6420_port_info;
324 probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
324 if (!probe_ent) 325 if (!probe_ent)
325 return NULL; 326 return NULL;
326 327
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 41e052fecd7f..f2511b42dba2 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -454,7 +454,7 @@ rate_to_atmf(unsigned rate) /* cps to atm forum format */
454 return (NONZERO | (exp << 9) | (rate & 0x1ff)); 454 return (NONZERO | (exp << 9) | (rate & 0x1ff));
455} 455}
456 456
457static void __init 457static void __devinit
458he_init_rx_lbfp0(struct he_dev *he_dev) 458he_init_rx_lbfp0(struct he_dev *he_dev)
459{ 459{
460 unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count; 460 unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -485,7 +485,7 @@ he_init_rx_lbfp0(struct he_dev *he_dev)
485 he_writel(he_dev, he_dev->r0_numbuffs, RLBF0_C); 485 he_writel(he_dev, he_dev->r0_numbuffs, RLBF0_C);
486} 486}
487 487
488static void __init 488static void __devinit
489he_init_rx_lbfp1(struct he_dev *he_dev) 489he_init_rx_lbfp1(struct he_dev *he_dev)
490{ 490{
491 unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count; 491 unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -516,7 +516,7 @@ he_init_rx_lbfp1(struct he_dev *he_dev)
516 he_writel(he_dev, he_dev->r1_numbuffs, RLBF1_C); 516 he_writel(he_dev, he_dev->r1_numbuffs, RLBF1_C);
517} 517}
518 518
519static void __init 519static void __devinit
520he_init_tx_lbfp(struct he_dev *he_dev) 520he_init_tx_lbfp(struct he_dev *he_dev)
521{ 521{
522 unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count; 522 unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -546,7 +546,7 @@ he_init_tx_lbfp(struct he_dev *he_dev)
546 he_writel(he_dev, lbufd_index - 1, TLBF_T); 546 he_writel(he_dev, lbufd_index - 1, TLBF_T);
547} 547}
548 548
549static int __init 549static int __devinit
550he_init_tpdrq(struct he_dev *he_dev) 550he_init_tpdrq(struct he_dev *he_dev)
551{ 551{
552 he_dev->tpdrq_base = pci_alloc_consistent(he_dev->pci_dev, 552 he_dev->tpdrq_base = pci_alloc_consistent(he_dev->pci_dev,
@@ -568,7 +568,7 @@ he_init_tpdrq(struct he_dev *he_dev)
568 return 0; 568 return 0;
569} 569}
570 570
571static void __init 571static void __devinit
572he_init_cs_block(struct he_dev *he_dev) 572he_init_cs_block(struct he_dev *he_dev)
573{ 573{
574 unsigned clock, rate, delta; 574 unsigned clock, rate, delta;
@@ -664,7 +664,7 @@ he_init_cs_block(struct he_dev *he_dev)
664 664
665} 665}
666 666
667static int __init 667static int __devinit
668he_init_cs_block_rcm(struct he_dev *he_dev) 668he_init_cs_block_rcm(struct he_dev *he_dev)
669{ 669{
670 unsigned (*rategrid)[16][16]; 670 unsigned (*rategrid)[16][16];
@@ -785,7 +785,7 @@ he_init_cs_block_rcm(struct he_dev *he_dev)
785 return 0; 785 return 0;
786} 786}
787 787
788static int __init 788static int __devinit
789he_init_group(struct he_dev *he_dev, int group) 789he_init_group(struct he_dev *he_dev, int group)
790{ 790{
791 int i; 791 int i;
@@ -955,7 +955,7 @@ he_init_group(struct he_dev *he_dev, int group)
955 return 0; 955 return 0;
956} 956}
957 957
958static int __init 958static int __devinit
959he_init_irq(struct he_dev *he_dev) 959he_init_irq(struct he_dev *he_dev)
960{ 960{
961 int i; 961 int i;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index e9b0957f15d1..001e6f6b9c1b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -54,10 +54,12 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
54 "Node %d MemUsed: %8lu kB\n" 54 "Node %d MemUsed: %8lu kB\n"
55 "Node %d Active: %8lu kB\n" 55 "Node %d Active: %8lu kB\n"
56 "Node %d Inactive: %8lu kB\n" 56 "Node %d Inactive: %8lu kB\n"
57#ifdef CONFIG_HIGHMEM
57 "Node %d HighTotal: %8lu kB\n" 58 "Node %d HighTotal: %8lu kB\n"
58 "Node %d HighFree: %8lu kB\n" 59 "Node %d HighFree: %8lu kB\n"
59 "Node %d LowTotal: %8lu kB\n" 60 "Node %d LowTotal: %8lu kB\n"
60 "Node %d LowFree: %8lu kB\n" 61 "Node %d LowFree: %8lu kB\n"
62#endif
61 "Node %d Dirty: %8lu kB\n" 63 "Node %d Dirty: %8lu kB\n"
62 "Node %d Writeback: %8lu kB\n" 64 "Node %d Writeback: %8lu kB\n"
63 "Node %d FilePages: %8lu kB\n" 65 "Node %d FilePages: %8lu kB\n"
@@ -66,16 +68,20 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
66 "Node %d PageTables: %8lu kB\n" 68 "Node %d PageTables: %8lu kB\n"
67 "Node %d NFS_Unstable: %8lu kB\n" 69 "Node %d NFS_Unstable: %8lu kB\n"
68 "Node %d Bounce: %8lu kB\n" 70 "Node %d Bounce: %8lu kB\n"
69 "Node %d Slab: %8lu kB\n", 71 "Node %d Slab: %8lu kB\n"
72 "Node %d SReclaimable: %8lu kB\n"
73 "Node %d SUnreclaim: %8lu kB\n",
70 nid, K(i.totalram), 74 nid, K(i.totalram),
71 nid, K(i.freeram), 75 nid, K(i.freeram),
72 nid, K(i.totalram - i.freeram), 76 nid, K(i.totalram - i.freeram),
73 nid, K(active), 77 nid, K(active),
74 nid, K(inactive), 78 nid, K(inactive),
79#ifdef CONFIG_HIGHMEM
75 nid, K(i.totalhigh), 80 nid, K(i.totalhigh),
76 nid, K(i.freehigh), 81 nid, K(i.freehigh),
77 nid, K(i.totalram - i.totalhigh), 82 nid, K(i.totalram - i.totalhigh),
78 nid, K(i.freeram - i.freehigh), 83 nid, K(i.freeram - i.freehigh),
84#endif
79 nid, K(node_page_state(nid, NR_FILE_DIRTY)), 85 nid, K(node_page_state(nid, NR_FILE_DIRTY)),
80 nid, K(node_page_state(nid, NR_WRITEBACK)), 86 nid, K(node_page_state(nid, NR_WRITEBACK)),
81 nid, K(node_page_state(nid, NR_FILE_PAGES)), 87 nid, K(node_page_state(nid, NR_FILE_PAGES)),
@@ -84,7 +90,10 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
84 nid, K(node_page_state(nid, NR_PAGETABLE)), 90 nid, K(node_page_state(nid, NR_PAGETABLE)),
85 nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), 91 nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
86 nid, K(node_page_state(nid, NR_BOUNCE)), 92 nid, K(node_page_state(nid, NR_BOUNCE)),
87 nid, K(node_page_state(nid, NR_SLAB))); 93 nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
94 node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
95 nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
96 nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
88 n += hugetlb_report_node_meminfo(nid, buf + n); 97 n += hugetlb_report_node_meminfo(nid, buf + n);
89 return n; 98 return n;
90} 99}
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 6e6a7c7a7eff..ab6429b4a84e 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -209,11 +209,12 @@ static const unsigned char days_in_mo[] =
209 */ 209 */
210static inline unsigned char rtc_is_updating(void) 210static inline unsigned char rtc_is_updating(void)
211{ 211{
212 unsigned long flags;
212 unsigned char uip; 213 unsigned char uip;
213 214
214 spin_lock_irq(&rtc_lock); 215 spin_lock_irqsave(&rtc_lock, flags);
215 uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); 216 uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
216 spin_unlock_irq(&rtc_lock); 217 spin_unlock_irqrestore(&rtc_lock, flags);
217 return uip; 218 return uip;
218} 219}
219 220
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index 71f27e955d87..c7854ea57b52 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -476,13 +476,13 @@ static int auide_dma_lostirq(ide_drive_t *drive)
476 return 0; 476 return 0;
477} 477}
478 478
479static void auide_ddma_tx_callback(int irq, void *param, struct pt_regs *regs) 479static void auide_ddma_tx_callback(int irq, void *param)
480{ 480{
481 _auide_hwif *ahwif = (_auide_hwif*)param; 481 _auide_hwif *ahwif = (_auide_hwif*)param;
482 ahwif->drive->waiting_for_dma = 0; 482 ahwif->drive->waiting_for_dma = 0;
483} 483}
484 484
485static void auide_ddma_rx_callback(int irq, void *param, struct pt_regs *regs) 485static void auide_ddma_rx_callback(int irq, void *param)
486{ 486{
487 _auide_hwif *ahwif = (_auide_hwif*)param; 487 _auide_hwif *ahwif = (_auide_hwif*)param;
488 ahwif->drive->waiting_for_dma = 0; 488 ahwif->drive->waiting_for_dma = 0;
diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c
index 88bf2af2a0e7..edd7b83c3464 100644
--- a/drivers/media/video/videodev.c
+++ b/drivers/media/video/videodev.c
@@ -836,7 +836,7 @@ static int __video_do_ioctl(struct inode *inode, struct file *file,
836 break; 836 break;
837 } 837 }
838 838
839 if (index<=0 || index >= vfd->tvnormsize) { 839 if (index < 0 || index >= vfd->tvnormsize) {
840 ret=-EINVAL; 840 ret=-EINVAL;
841 break; 841 break;
842 } 842 }
diff --git a/drivers/mmc/au1xmmc.c b/drivers/mmc/au1xmmc.c
index fb606165af3b..61268da13957 100644
--- a/drivers/mmc/au1xmmc.c
+++ b/drivers/mmc/au1xmmc.c
@@ -731,7 +731,7 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios)
731 } 731 }
732} 732}
733 733
734static void au1xmmc_dma_callback(int irq, void *dev_id, struct pt_regs *regs) 734static void au1xmmc_dma_callback(int irq, void *dev_id)
735{ 735{
736 struct au1xmmc_host *host = (struct au1xmmc_host *) dev_id; 736 struct au1xmmc_host *host = (struct au1xmmc_host *) dev_id;
737 737
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index 77670741e101..feb42db10ee1 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -1323,9 +1323,9 @@ static const struct ethtool_ops sparc_lance_ethtool_ops = {
1323 .get_link = sparc_lance_get_link, 1323 .get_link = sparc_lance_get_link,
1324}; 1324};
1325 1325
1326static int __init sparc_lance_probe_one(struct sbus_dev *sdev, 1326static int __devinit sparc_lance_probe_one(struct sbus_dev *sdev,
1327 struct sbus_dma *ledma, 1327 struct sbus_dma *ledma,
1328 struct sbus_dev *lebuffer) 1328 struct sbus_dev *lebuffer)
1329{ 1329{
1330 static unsigned version_printed; 1330 static unsigned version_printed;
1331 struct net_device *dev; 1331 struct net_device *dev;
@@ -1515,7 +1515,7 @@ fail:
1515} 1515}
1516 1516
1517/* On 4m, find the associated dma for the lance chip */ 1517/* On 4m, find the associated dma for the lance chip */
1518static inline struct sbus_dma *find_ledma(struct sbus_dev *sdev) 1518static struct sbus_dma * __devinit find_ledma(struct sbus_dev *sdev)
1519{ 1519{
1520 struct sbus_dma *p; 1520 struct sbus_dma *p;
1521 1521
@@ -1533,7 +1533,7 @@ static inline struct sbus_dma *find_ledma(struct sbus_dev *sdev)
1533 1533
1534/* Find all the lance cards on the system and initialize them */ 1534/* Find all the lance cards on the system and initialize them */
1535static struct sbus_dev sun4_sdev; 1535static struct sbus_dev sun4_sdev;
1536static int __init sparc_lance_init(void) 1536static int __devinit sparc_lance_init(void)
1537{ 1537{
1538 if ((idprom->id_machtype == (SM_SUN4|SM_4_330)) || 1538 if ((idprom->id_machtype == (SM_SUN4|SM_4_330)) ||
1539 (idprom->id_machtype == (SM_SUN4|SM_4_470))) { 1539 (idprom->id_machtype == (SM_SUN4|SM_4_470))) {
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 372e47f7d596..5f7ba1adb309 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1929,6 +1929,13 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
1929 1929
1930 mutex_lock(&state->mutex); 1930 mutex_lock(&state->mutex);
1931 1931
1932#ifdef CONFIG_DISABLE_CONSOLE_SUSPEND
1933 if (uart_console(port)) {
1934 mutex_unlock(&state->mutex);
1935 return 0;
1936 }
1937#endif
1938
1932 if (state->info && state->info->flags & UIF_INITIALIZED) { 1939 if (state->info && state->info->flags & UIF_INITIALIZED) {
1933 const struct uart_ops *ops = port->ops; 1940 const struct uart_ops *ops = port->ops;
1934 1941
@@ -1967,6 +1974,13 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
1967 1974
1968 mutex_lock(&state->mutex); 1975 mutex_lock(&state->mutex);
1969 1976
1977#ifdef CONFIG_DISABLE_CONSOLE_SUSPEND
1978 if (uart_console(port)) {
1979 mutex_unlock(&state->mutex);
1980 return 0;
1981 }
1982#endif
1983
1970 uart_change_pm(state, 0); 1984 uart_change_pm(state, 0);
1971 1985
1972 /* 1986 /*
diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c
index 4f78f234473d..c151dcf68786 100644
--- a/drivers/video/fbsysfs.c
+++ b/drivers/video/fbsysfs.c
@@ -397,6 +397,12 @@ static ssize_t store_bl_curve(struct class_device *class_device,
397 u8 tmp_curve[FB_BACKLIGHT_LEVELS]; 397 u8 tmp_curve[FB_BACKLIGHT_LEVELS];
398 unsigned int i; 398 unsigned int i;
399 399
400 /* Some drivers don't use framebuffer_alloc(), but those also
401 * don't have backlights.
402 */
403 if (!fb_info || !fb_info->bl_dev)
404 return -ENODEV;
405
400 if (count != (FB_BACKLIGHT_LEVELS / 8 * 24)) 406 if (count != (FB_BACKLIGHT_LEVELS / 8 * 24))
401 return -EINVAL; 407 return -EINVAL;
402 408
@@ -430,6 +436,12 @@ static ssize_t show_bl_curve(struct class_device *class_device, char *buf)
430 ssize_t len = 0; 436 ssize_t len = 0;
431 unsigned int i; 437 unsigned int i;
432 438
439 /* Some drivers don't use framebuffer_alloc(), but those also
440 * don't have backlights.
441 */
442 if (!fb_info || !fb_info->bl_dev)
443 return -ENODEV;
444
433 mutex_lock(&fb_info->bl_mutex); 445 mutex_lock(&fb_info->bl_mutex);
434 for (i = 0; i < FB_BACKLIGHT_LEVELS; i += 8) 446 for (i = 0; i < FB_BACKLIGHT_LEVELS; i += 8)
435 len += snprintf(&buf[len], PAGE_SIZE, 447 len += snprintf(&buf[len], PAGE_SIZE,
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8dbd44f10e9d..d96e5c14a9ca 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -32,7 +32,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
32 32
33 if (!do_now) { 33 if (!do_now) {
34 /* Too young to die */ 34 /* Too young to die */
35 if (time_after(ino->last_used + timeout, now)) 35 if (!timeout || time_after(ino->last_used + timeout, now))
36 return 0; 36 return 0;
37 37
38 /* update last_used here :- 38 /* update last_used here :-
@@ -253,7 +253,7 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
253 struct dentry *root = dget(sb->s_root); 253 struct dentry *root = dget(sb->s_root);
254 int do_now = how & AUTOFS_EXP_IMMEDIATE; 254 int do_now = how & AUTOFS_EXP_IMMEDIATE;
255 255
256 if (!sbi->exp_timeout || !root) 256 if (!root)
257 return NULL; 257 return NULL;
258 258
259 now = jiffies; 259 now = jiffies;
@@ -293,7 +293,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
293 int do_now = how & AUTOFS_EXP_IMMEDIATE; 293 int do_now = how & AUTOFS_EXP_IMMEDIATE;
294 int exp_leaves = how & AUTOFS_EXP_LEAVES; 294 int exp_leaves = how & AUTOFS_EXP_LEAVES;
295 295
296 if ( !sbi->exp_timeout || !root ) 296 if (!root)
297 return NULL; 297 return NULL;
298 298
299 now = jiffies; 299 now = jiffies;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 672a3b90bc55..64802aabd1ac 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1262,7 +1262,7 @@ static void fill_elf_header(struct elfhdr *elf, int segs)
1262 return; 1262 return;
1263} 1263}
1264 1264
1265static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset) 1265static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1266{ 1266{
1267 phdr->p_type = PT_NOTE; 1267 phdr->p_type = PT_NOTE;
1268 phdr->p_offset = offset; 1268 phdr->p_offset = offset;
@@ -1428,7 +1428,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1428 int i; 1428 int i;
1429 struct vm_area_struct *vma; 1429 struct vm_area_struct *vma;
1430 struct elfhdr *elf = NULL; 1430 struct elfhdr *elf = NULL;
1431 off_t offset = 0, dataoff; 1431 loff_t offset = 0, dataoff;
1432 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; 1432 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1433 int numnote; 1433 int numnote;
1434 struct memelfnote *notes = NULL; 1434 struct memelfnote *notes = NULL;
@@ -1661,11 +1661,11 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1661 ELF_CORE_WRITE_EXTRA_DATA; 1661 ELF_CORE_WRITE_EXTRA_DATA;
1662#endif 1662#endif
1663 1663
1664 if ((off_t)file->f_pos != offset) { 1664 if (file->f_pos != offset) {
1665 /* Sanity check */ 1665 /* Sanity check */
1666 printk(KERN_WARNING 1666 printk(KERN_WARNING
1667 "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n", 1667 "elf_core_dump: file->f_pos (%Ld) != offset (%Ld)\n",
1668 (off_t)file->f_pos, offset); 1668 file->f_pos, offset);
1669 } 1669 }
1670 1670
1671end_coredump: 1671end_coredump:
diff --git a/fs/buffer.c b/fs/buffer.c
index 71649ef9b658..3b6d701073e7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page)
2987 2987
2988 spin_lock(&mapping->private_lock); 2988 spin_lock(&mapping->private_lock);
2989 ret = drop_buffers(page, &buffers_to_free); 2989 ret = drop_buffers(page, &buffers_to_free);
2990 spin_unlock(&mapping->private_lock);
2990 if (ret) { 2991 if (ret) {
2991 /* 2992 /*
2992 * If the filesystem writes its buffers by hand (eg ext3) 2993 * If the filesystem writes its buffers by hand (eg ext3)
@@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page)
2998 */ 2999 */
2999 clear_page_dirty(page); 3000 clear_page_dirty(page);
3000 } 3001 }
3001 spin_unlock(&mapping->private_lock);
3002out: 3002out:
3003 if (buffers_to_free) { 3003 if (buffers_to_free) {
3004 struct buffer_head *bh = buffers_to_free; 3004 struct buffer_head *bh = buffers_to_free;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 42da60784311..32a8caf0c41e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -160,6 +160,117 @@ static int journal_write_commit_record(journal_t *journal,
160 return (ret == -EIO); 160 return (ret == -EIO);
161} 161}
162 162
163static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
164{
165 int i;
166
167 for (i = 0; i < bufs; i++) {
168 wbuf[i]->b_end_io = end_buffer_write_sync;
169 /* We use-up our safety reference in submit_bh() */
170 submit_bh(WRITE, wbuf[i]);
171 }
172}
173
174/*
175 * Submit all the data buffers to disk
176 */
177static void journal_submit_data_buffers(journal_t *journal,
178 transaction_t *commit_transaction)
179{
180 struct journal_head *jh;
181 struct buffer_head *bh;
182 int locked;
183 int bufs = 0;
184 struct buffer_head **wbuf = journal->j_wbuf;
185
186 /*
187 * Whenever we unlock the journal and sleep, things can get added
188 * onto ->t_sync_datalist, so we have to keep looping back to
189 * write_out_data until we *know* that the list is empty.
190 *
191 * Cleanup any flushed data buffers from the data list. Even in
192 * abort mode, we want to flush this out as soon as possible.
193 */
194write_out_data:
195 cond_resched();
196 spin_lock(&journal->j_list_lock);
197
198 while (commit_transaction->t_sync_datalist) {
199 jh = commit_transaction->t_sync_datalist;
200 bh = jh2bh(jh);
201 locked = 0;
202
203 /* Get reference just to make sure buffer does not disappear
204 * when we are forced to drop various locks */
205 get_bh(bh);
206 /* If the buffer is dirty, we need to submit IO and hence
207 * we need the buffer lock. We try to lock the buffer without
208 * blocking. If we fail, we need to drop j_list_lock and do
209 * blocking lock_buffer().
210 */
211 if (buffer_dirty(bh)) {
212 if (test_set_buffer_locked(bh)) {
213 BUFFER_TRACE(bh, "needs blocking lock");
214 spin_unlock(&journal->j_list_lock);
215 /* Write out all data to prevent deadlocks */
216 journal_do_submit_data(wbuf, bufs);
217 bufs = 0;
218 lock_buffer(bh);
219 spin_lock(&journal->j_list_lock);
220 }
221 locked = 1;
222 }
223 /* We have to get bh_state lock. Again out of order, sigh. */
224 if (!inverted_lock(journal, bh)) {
225 jbd_lock_bh_state(bh);
226 spin_lock(&journal->j_list_lock);
227 }
228 /* Someone already cleaned up the buffer? */
229 if (!buffer_jbd(bh)
230 || jh->b_transaction != commit_transaction
231 || jh->b_jlist != BJ_SyncData) {
232 jbd_unlock_bh_state(bh);
233 if (locked)
234 unlock_buffer(bh);
235 BUFFER_TRACE(bh, "already cleaned up");
236 put_bh(bh);
237 continue;
238 }
239 if (locked && test_clear_buffer_dirty(bh)) {
240 BUFFER_TRACE(bh, "needs writeout, adding to array");
241 wbuf[bufs++] = bh;
242 __journal_file_buffer(jh, commit_transaction,
243 BJ_Locked);
244 jbd_unlock_bh_state(bh);
245 if (bufs == journal->j_wbufsize) {
246 spin_unlock(&journal->j_list_lock);
247 journal_do_submit_data(wbuf, bufs);
248 bufs = 0;
249 goto write_out_data;
250 }
251 }
252 else {
253 BUFFER_TRACE(bh, "writeout complete: unfile");
254 __journal_unfile_buffer(jh);
255 jbd_unlock_bh_state(bh);
256 if (locked)
257 unlock_buffer(bh);
258 journal_remove_journal_head(bh);
259 /* Once for our safety reference, once for
260 * journal_remove_journal_head() */
261 put_bh(bh);
262 put_bh(bh);
263 }
264
265 if (lock_need_resched(&journal->j_list_lock)) {
266 spin_unlock(&journal->j_list_lock);
267 goto write_out_data;
268 }
269 }
270 spin_unlock(&journal->j_list_lock);
271 journal_do_submit_data(wbuf, bufs);
272}
273
163/* 274/*
164 * journal_commit_transaction 275 * journal_commit_transaction
165 * 276 *
@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_t *journal)
313 * Now start flushing things to disk, in the order they appear 424 * Now start flushing things to disk, in the order they appear
314 * on the transaction lists. Data blocks go first. 425 * on the transaction lists. Data blocks go first.
315 */ 426 */
316
317 err = 0; 427 err = 0;
318 /* 428 journal_submit_data_buffers(journal, commit_transaction);
319 * Whenever we unlock the journal and sleep, things can get added
320 * onto ->t_sync_datalist, so we have to keep looping back to
321 * write_out_data until we *know* that the list is empty.
322 */
323 bufs = 0;
324 /*
325 * Cleanup any flushed data buffers from the data list. Even in
326 * abort mode, we want to flush this out as soon as possible.
327 */
328write_out_data:
329 cond_resched();
330 spin_lock(&journal->j_list_lock);
331
332 while (commit_transaction->t_sync_datalist) {
333 struct buffer_head *bh;
334
335 jh = commit_transaction->t_sync_datalist;
336 commit_transaction->t_sync_datalist = jh->b_tnext;
337 bh = jh2bh(jh);
338 if (buffer_locked(bh)) {
339 BUFFER_TRACE(bh, "locked");
340 if (!inverted_lock(journal, bh))
341 goto write_out_data;
342 __journal_temp_unlink_buffer(jh);
343 __journal_file_buffer(jh, commit_transaction,
344 BJ_Locked);
345 jbd_unlock_bh_state(bh);
346 if (lock_need_resched(&journal->j_list_lock)) {
347 spin_unlock(&journal->j_list_lock);
348 goto write_out_data;
349 }
350 } else {
351 if (buffer_dirty(bh)) {
352 BUFFER_TRACE(bh, "start journal writeout");
353 get_bh(bh);
354 wbuf[bufs++] = bh;
355 if (bufs == journal->j_wbufsize) {
356 jbd_debug(2, "submit %d writes\n",
357 bufs);
358 spin_unlock(&journal->j_list_lock);
359 ll_rw_block(SWRITE, bufs, wbuf);
360 journal_brelse_array(wbuf, bufs);
361 bufs = 0;
362 goto write_out_data;
363 }
364 } else {
365 BUFFER_TRACE(bh, "writeout complete: unfile");
366 if (!inverted_lock(journal, bh))
367 goto write_out_data;
368 __journal_unfile_buffer(jh);
369 jbd_unlock_bh_state(bh);
370 journal_remove_journal_head(bh);
371 put_bh(bh);
372 if (lock_need_resched(&journal->j_list_lock)) {
373 spin_unlock(&journal->j_list_lock);
374 goto write_out_data;
375 }
376 }
377 }
378 }
379
380 if (bufs) {
381 spin_unlock(&journal->j_list_lock);
382 ll_rw_block(SWRITE, bufs, wbuf);
383 journal_brelse_array(wbuf, bufs);
384 spin_lock(&journal->j_list_lock);
385 }
386 429
387 /* 430 /*
388 * Wait for all previously submitted IO to complete. 431 * Wait for all previously submitted IO to complete.
389 */ 432 */
433 spin_lock(&journal->j_list_lock);
390 while (commit_transaction->t_locked_list) { 434 while (commit_transaction->t_locked_list) {
391 struct buffer_head *bh; 435 struct buffer_head *bh;
392 436
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 942156225447..5bbd60896050 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -157,10 +157,12 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
157 "SwapCached: %8lu kB\n" 157 "SwapCached: %8lu kB\n"
158 "Active: %8lu kB\n" 158 "Active: %8lu kB\n"
159 "Inactive: %8lu kB\n" 159 "Inactive: %8lu kB\n"
160#ifdef CONFIG_HIGHMEM
160 "HighTotal: %8lu kB\n" 161 "HighTotal: %8lu kB\n"
161 "HighFree: %8lu kB\n" 162 "HighFree: %8lu kB\n"
162 "LowTotal: %8lu kB\n" 163 "LowTotal: %8lu kB\n"
163 "LowFree: %8lu kB\n" 164 "LowFree: %8lu kB\n"
165#endif
164 "SwapTotal: %8lu kB\n" 166 "SwapTotal: %8lu kB\n"
165 "SwapFree: %8lu kB\n" 167 "SwapFree: %8lu kB\n"
166 "Dirty: %8lu kB\n" 168 "Dirty: %8lu kB\n"
@@ -168,6 +170,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
168 "AnonPages: %8lu kB\n" 170 "AnonPages: %8lu kB\n"
169 "Mapped: %8lu kB\n" 171 "Mapped: %8lu kB\n"
170 "Slab: %8lu kB\n" 172 "Slab: %8lu kB\n"
173 "SReclaimable: %8lu kB\n"
174 "SUnreclaim: %8lu kB\n"
171 "PageTables: %8lu kB\n" 175 "PageTables: %8lu kB\n"
172 "NFS_Unstable: %8lu kB\n" 176 "NFS_Unstable: %8lu kB\n"
173 "Bounce: %8lu kB\n" 177 "Bounce: %8lu kB\n"
@@ -183,17 +187,22 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
183 K(total_swapcache_pages), 187 K(total_swapcache_pages),
184 K(active), 188 K(active),
185 K(inactive), 189 K(inactive),
190#ifdef CONFIG_HIGHMEM
186 K(i.totalhigh), 191 K(i.totalhigh),
187 K(i.freehigh), 192 K(i.freehigh),
188 K(i.totalram-i.totalhigh), 193 K(i.totalram-i.totalhigh),
189 K(i.freeram-i.freehigh), 194 K(i.freeram-i.freehigh),
195#endif
190 K(i.totalswap), 196 K(i.totalswap),
191 K(i.freeswap), 197 K(i.freeswap),
192 K(global_page_state(NR_FILE_DIRTY)), 198 K(global_page_state(NR_FILE_DIRTY)),
193 K(global_page_state(NR_WRITEBACK)), 199 K(global_page_state(NR_WRITEBACK)),
194 K(global_page_state(NR_ANON_PAGES)), 200 K(global_page_state(NR_ANON_PAGES)),
195 K(global_page_state(NR_FILE_MAPPED)), 201 K(global_page_state(NR_FILE_MAPPED)),
196 K(global_page_state(NR_SLAB)), 202 K(global_page_state(NR_SLAB_RECLAIMABLE) +
203 global_page_state(NR_SLAB_UNRECLAIMABLE)),
204 K(global_page_state(NR_SLAB_RECLAIMABLE)),
205 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
197 K(global_page_state(NR_PAGETABLE)), 206 K(global_page_state(NR_PAGETABLE)),
198 K(global_page_state(NR_UNSTABLE_NFS)), 207 K(global_page_state(NR_UNSTABLE_NFS)),
199 K(global_page_state(NR_BOUNCE)), 208 K(global_page_state(NR_BOUNCE)),
diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h
index 64d0ab98fcd8..8af56ce346ad 100644
--- a/include/asm-alpha/mmzone.h
+++ b/include/asm-alpha/mmzone.h
@@ -75,6 +75,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
75#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) 75#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr)
76 76
77#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32)) 77#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32))
78#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32))
78#define pte_pfn(pte) (pte_val(pte) >> 32) 79#define pte_pfn(pte) (pte_val(pte) >> 32)
79 80
80#define mk_pte(page, pgprot) \ 81#define mk_pte(page, pgprot) \
diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h
index 93eaa58b7961..49ac9bee7ced 100644
--- a/include/asm-alpha/pgtable.h
+++ b/include/asm-alpha/pgtable.h
@@ -230,16 +230,17 @@ extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
230 230
231 231
232extern inline unsigned long 232extern inline unsigned long
233pmd_page_kernel(pmd_t pmd) 233pmd_page_vaddr(pmd_t pmd)
234{ 234{
235 return ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)) + PAGE_OFFSET; 235 return ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)) + PAGE_OFFSET;
236} 236}
237 237
238#ifndef CONFIG_DISCONTIGMEM 238#ifndef CONFIG_DISCONTIGMEM
239#define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32)) 239#define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32))
240#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32))
240#endif 241#endif
241 242
242extern inline unsigned long pgd_page(pgd_t pgd) 243extern inline unsigned long pgd_page_vaddr(pgd_t pgd)
243{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } 244{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
244 245
245extern inline int pte_none(pte_t pte) { return !pte_val(pte); } 246extern inline int pte_none(pte_t pte) { return !pte_val(pte); }
@@ -293,13 +294,13 @@ extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; retu
293/* Find an entry in the second-level page table.. */ 294/* Find an entry in the second-level page table.. */
294extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) 295extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
295{ 296{
296 return (pmd_t *) pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); 297 return (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
297} 298}
298 299
299/* Find an entry in the third-level page table.. */ 300/* Find an entry in the third-level page table.. */
300extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) 301extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address)
301{ 302{
302 return (pte_t *) pmd_page_kernel(*dir) 303 return (pte_t *) pmd_page_vaddr(*dir)
303 + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1)); 304 + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1));
304} 305}
305 306
diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h
index 8d3919c6458c..4d10d319fa34 100644
--- a/include/asm-arm/pgtable.h
+++ b/include/asm-arm/pgtable.h
@@ -224,9 +224,9 @@ extern struct page *empty_zero_page;
224#define pte_none(pte) (!pte_val(pte)) 224#define pte_none(pte) (!pte_val(pte))
225#define pte_clear(mm,addr,ptep) set_pte_at((mm),(addr),(ptep), __pte(0)) 225#define pte_clear(mm,addr,ptep) set_pte_at((mm),(addr),(ptep), __pte(0))
226#define pte_page(pte) (pfn_to_page(pte_pfn(pte))) 226#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
227#define pte_offset_kernel(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) 227#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
228#define pte_offset_map(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) 228#define pte_offset_map(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
229#define pte_offset_map_nested(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) 229#define pte_offset_map_nested(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
230#define pte_unmap(pte) do { } while (0) 230#define pte_unmap(pte) do { } while (0)
231#define pte_unmap_nested(pte) do { } while (0) 231#define pte_unmap_nested(pte) do { } while (0)
232 232
@@ -291,7 +291,7 @@ PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG);
291 clean_pmd_entry(pmdp); \ 291 clean_pmd_entry(pmdp); \
292 } while (0) 292 } while (0)
293 293
294static inline pte_t *pmd_page_kernel(pmd_t pmd) 294static inline pte_t *pmd_page_vaddr(pmd_t pmd)
295{ 295{
296 unsigned long ptr; 296 unsigned long ptr;
297 297
diff --git a/include/asm-arm26/pgtable.h b/include/asm-arm26/pgtable.h
index 19ac9101a6bb..63a8881fae13 100644
--- a/include/asm-arm26/pgtable.h
+++ b/include/asm-arm26/pgtable.h
@@ -186,12 +186,12 @@ extern struct page *empty_zero_page;
186 * return a pointer to memory (no special alignment) 186 * return a pointer to memory (no special alignment)
187 */ 187 */
188#define pmd_page(pmd) ((struct page *)(pmd_val((pmd)) & ~_PMD_PRESENT)) 188#define pmd_page(pmd) ((struct page *)(pmd_val((pmd)) & ~_PMD_PRESENT))
189#define pmd_page_kernel(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT)) 189#define pmd_page_vaddr(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT))
190 190
191#define pte_offset_kernel(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) 191#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
192 192
193#define pte_offset_map(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) 193#define pte_offset_map(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
194#define pte_offset_map_nested(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr)) 194#define pte_offset_map_nested(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
195#define pte_unmap(pte) do { } while (0) 195#define pte_unmap(pte) do { } while (0)
196#define pte_unmap_nested(pte) do { } while (0) 196#define pte_unmap_nested(pte) do { } while (0)
197 197
diff --git a/include/asm-avr32/Kbuild b/include/asm-avr32/Kbuild
new file mode 100644
index 000000000000..8770e73ce938
--- /dev/null
+++ b/include/asm-avr32/Kbuild
@@ -0,0 +1,3 @@
1include include/asm-generic/Kbuild.asm
2
3headers-y += cachectl.h
diff --git a/include/asm-avr32/a.out.h b/include/asm-avr32/a.out.h
new file mode 100644
index 000000000000..50bf6e31a143
--- /dev/null
+++ b/include/asm-avr32/a.out.h
@@ -0,0 +1,26 @@
1#ifndef __ASM_AVR32_A_OUT_H
2#define __ASM_AVR32_A_OUT_H
3
4struct exec
5{
6 unsigned long a_info; /* Use macros N_MAGIC, etc for access */
7 unsigned a_text; /* length of text, in bytes */
8 unsigned a_data; /* length of data, in bytes */
9 unsigned a_bss; /* length of uninitialized data area for file, in bytes */
10 unsigned a_syms; /* length of symbol table data in file, in bytes */
11 unsigned a_entry; /* start address */
12 unsigned a_trsize; /* length of relocation info for text, in bytes */
13 unsigned a_drsize; /* length of relocation info for data, in bytes */
14};
15
16#define N_TRSIZE(a) ((a).a_trsize)
17#define N_DRSIZE(a) ((a).a_drsize)
18#define N_SYMSIZE(a) ((a).a_syms)
19
20#ifdef __KERNEL__
21
22#define STACK_TOP TASK_SIZE
23
24#endif
25
26#endif /* __ASM_AVR32_A_OUT_H */
diff --git a/include/asm-avr32/addrspace.h b/include/asm-avr32/addrspace.h
new file mode 100644
index 000000000000..366794858ec7
--- /dev/null
+++ b/include/asm-avr32/addrspace.h
@@ -0,0 +1,43 @@
1/*
2 * Defitions for the address spaces of the AVR32 CPUs. Heavily based on
3 * include/asm-sh/addrspace.h
4 *
5 * Copyright (C) 2004-2006 Atmel Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef __ASM_AVR32_ADDRSPACE_H
12#define __ASM_AVR32_ADDRSPACE_H
13
14#ifdef CONFIG_MMU
15
16/* Memory segments when segmentation is enabled */
17#define P0SEG 0x00000000
18#define P1SEG 0x80000000
19#define P2SEG 0xa0000000
20#define P3SEG 0xc0000000
21#define P4SEG 0xe0000000
22
23/* Returns the privileged segment base of a given address */
24#define PXSEG(a) (((unsigned long)(a)) & 0xe0000000)
25
26/* Returns the physical address of a PnSEG (n=1,2) address */
27#define PHYSADDR(a) (((unsigned long)(a)) & 0x1fffffff)
28
29/*
30 * Map an address to a certain privileged segment
31 */
32#define P1SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
33 | P1SEG))
34#define P2SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
35 | P2SEG))
36#define P3SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
37 | P3SEG))
38#define P4SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
39 | P4SEG))
40
41#endif /* CONFIG_MMU */
42
43#endif /* __ASM_AVR32_ADDRSPACE_H */
diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
new file mode 100644
index 000000000000..ce1150d4438d
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
@@ -0,0 +1,36 @@
1/*
2 * include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h
3 *
4 * Copyright (C) 2005 Ivan Kokshaysky
5 * Copyright (C) SAN People
6 *
7 * Peripheral Data Controller (PDC) registers.
8 * Based on AT91RM9200 datasheet revision E.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 */
15
16#ifndef AT91RM9200_PDC_H
17#define AT91RM9200_PDC_H
18
19#define AT91_PDC_RPR 0x100 /* Receive Pointer Register */
20#define AT91_PDC_RCR 0x104 /* Receive Counter Register */
21#define AT91_PDC_TPR 0x108 /* Transmit Pointer Register */
22#define AT91_PDC_TCR 0x10c /* Transmit Counter Register */
23#define AT91_PDC_RNPR 0x110 /* Receive Next Pointer Register */
24#define AT91_PDC_RNCR 0x114 /* Receive Next Counter Register */
25#define AT91_PDC_TNPR 0x118 /* Transmit Next Pointer Register */
26#define AT91_PDC_TNCR 0x11c /* Transmit Next Counter Register */
27
28#define AT91_PDC_PTCR 0x120 /* Transfer Control Register */
29#define AT91_PDC_RXTEN (1 << 0) /* Receiver Transfer Enable */
30#define AT91_PDC_RXTDIS (1 << 1) /* Receiver Transfer Disable */
31#define AT91_PDC_TXTEN (1 << 8) /* Transmitter Transfer Enable */
32#define AT91_PDC_TXTDIS (1 << 9) /* Transmitter Transfer Disable */
33
34#define AT91_PDC_PTSR 0x124 /* Transfer Status Register */
35
36#endif
diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_usart.h b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h
new file mode 100644
index 000000000000..79f851e31b9c
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h
@@ -0,0 +1,123 @@
1/*
2 * include/asm-arm/arch-at91rm9200/at91rm9200_usart.h
3 *
4 * Copyright (C) 2005 Ivan Kokshaysky
5 * Copyright (C) SAN People
6 *
7 * USART registers.
8 * Based on AT91RM9200 datasheet revision E.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 */
15
16#ifndef AT91RM9200_USART_H
17#define AT91RM9200_USART_H
18
19#define AT91_US_CR 0x00 /* Control Register */
20#define AT91_US_RSTRX (1 << 2) /* Reset Receiver */
21#define AT91_US_RSTTX (1 << 3) /* Reset Transmitter */
22#define AT91_US_RXEN (1 << 4) /* Receiver Enable */
23#define AT91_US_RXDIS (1 << 5) /* Receiver Disable */
24#define AT91_US_TXEN (1 << 6) /* Transmitter Enable */
25#define AT91_US_TXDIS (1 << 7) /* Transmitter Disable */
26#define AT91_US_RSTSTA (1 << 8) /* Reset Status Bits */
27#define AT91_US_STTBRK (1 << 9) /* Start Break */
28#define AT91_US_STPBRK (1 << 10) /* Stop Break */
29#define AT91_US_STTTO (1 << 11) /* Start Time-out */
30#define AT91_US_SENDA (1 << 12) /* Send Address */
31#define AT91_US_RSTIT (1 << 13) /* Reset Iterations */
32#define AT91_US_RSTNACK (1 << 14) /* Reset Non Acknowledge */
33#define AT91_US_RETTO (1 << 15) /* Rearm Time-out */
34#define AT91_US_DTREN (1 << 16) /* Data Terminal Ready Enable */
35#define AT91_US_DTRDIS (1 << 17) /* Data Terminal Ready Disable */
36#define AT91_US_RTSEN (1 << 18) /* Request To Send Enable */
37#define AT91_US_RTSDIS (1 << 19) /* Request To Send Disable */
38
39#define AT91_US_MR 0x04 /* Mode Register */
40#define AT91_US_USMODE (0xf << 0) /* Mode of the USART */
41#define AT91_US_USMODE_NORMAL 0
42#define AT91_US_USMODE_RS485 1
43#define AT91_US_USMODE_HWHS 2
44#define AT91_US_USMODE_MODEM 3
45#define AT91_US_USMODE_ISO7816_T0 4
46#define AT91_US_USMODE_ISO7816_T1 6
47#define AT91_US_USMODE_IRDA 8
48#define AT91_US_USCLKS (3 << 4) /* Clock Selection */
49#define AT91_US_CHRL (3 << 6) /* Character Length */
50#define AT91_US_CHRL_5 (0 << 6)
51#define AT91_US_CHRL_6 (1 << 6)
52#define AT91_US_CHRL_7 (2 << 6)
53#define AT91_US_CHRL_8 (3 << 6)
54#define AT91_US_SYNC (1 << 8) /* Synchronous Mode Select */
55#define AT91_US_PAR (7 << 9) /* Parity Type */
56#define AT91_US_PAR_EVEN (0 << 9)
57#define AT91_US_PAR_ODD (1 << 9)
58#define AT91_US_PAR_SPACE (2 << 9)
59#define AT91_US_PAR_MARK (3 << 9)
60#define AT91_US_PAR_NONE (4 << 9)
61#define AT91_US_PAR_MULTI_DROP (6 << 9)
62#define AT91_US_NBSTOP (3 << 12) /* Number of Stop Bits */
63#define AT91_US_NBSTOP_1 (0 << 12)
64#define AT91_US_NBSTOP_1_5 (1 << 12)
65#define AT91_US_NBSTOP_2 (2 << 12)
66#define AT91_US_CHMODE (3 << 14) /* Channel Mode */
67#define AT91_US_CHMODE_NORMAL (0 << 14)
68#define AT91_US_CHMODE_ECHO (1 << 14)
69#define AT91_US_CHMODE_LOC_LOOP (2 << 14)
70#define AT91_US_CHMODE_REM_LOOP (3 << 14)
71#define AT91_US_MSBF (1 << 16) /* Bit Order */
72#define AT91_US_MODE9 (1 << 17) /* 9-bit Character Length */
73#define AT91_US_CLKO (1 << 18) /* Clock Output Select */
74#define AT91_US_OVER (1 << 19) /* Oversampling Mode */
75#define AT91_US_INACK (1 << 20) /* Inhibit Non Acknowledge */
76#define AT91_US_DSNACK (1 << 21) /* Disable Successive NACK */
77#define AT91_US_MAX_ITER (7 << 24) /* Max Iterations */
78#define AT91_US_FILTER (1 << 28) /* Infrared Receive Line Filter */
79
80#define AT91_US_IER 0x08 /* Interrupt Enable Register */
81#define AT91_US_RXRDY (1 << 0) /* Receiver Ready */
82#define AT91_US_TXRDY (1 << 1) /* Transmitter Ready */
83#define AT91_US_RXBRK (1 << 2) /* Break Received / End of Break */
84#define AT91_US_ENDRX (1 << 3) /* End of Receiver Transfer */
85#define AT91_US_ENDTX (1 << 4) /* End of Transmitter Transfer */
86#define AT91_US_OVRE (1 << 5) /* Overrun Error */
87#define AT91_US_FRAME (1 << 6) /* Framing Error */
88#define AT91_US_PARE (1 << 7) /* Parity Error */
89#define AT91_US_TIMEOUT (1 << 8) /* Receiver Time-out */
90#define AT91_US_TXEMPTY (1 << 9) /* Transmitter Empty */
91#define AT91_US_ITERATION (1 << 10) /* Max number of Repetitions Reached */
92#define AT91_US_TXBUFE (1 << 11) /* Transmission Buffer Empty */
93#define AT91_US_RXBUFF (1 << 12) /* Reception Buffer Full */
94#define AT91_US_NACK (1 << 13) /* Non Acknowledge */
95#define AT91_US_RIIC (1 << 16) /* Ring Indicator Input Change */
96#define AT91_US_DSRIC (1 << 17) /* Data Set Ready Input Change */
97#define AT91_US_DCDIC (1 << 18) /* Data Carrier Detect Input Change */
98#define AT91_US_CTSIC (1 << 19) /* Clear to Send Input Change */
99#define AT91_US_RI (1 << 20) /* RI */
100#define AT91_US_DSR (1 << 21) /* DSR */
101#define AT91_US_DCD (1 << 22) /* DCD */
102#define AT91_US_CTS (1 << 23) /* CTS */
103
104#define AT91_US_IDR 0x0c /* Interrupt Disable Register */
105#define AT91_US_IMR 0x10 /* Interrupt Mask Register */
106#define AT91_US_CSR 0x14 /* Channel Status Register */
107#define AT91_US_RHR 0x18 /* Receiver Holding Register */
108#define AT91_US_THR 0x1c /* Transmitter Holding Register */
109
110#define AT91_US_BRGR 0x20 /* Baud Rate Generator Register */
111#define AT91_US_CD (0xffff << 0) /* Clock Divider */
112
113#define AT91_US_RTOR 0x24 /* Receiver Time-out Register */
114#define AT91_US_TO (0xffff << 0) /* Time-out Value */
115
116#define AT91_US_TTGR 0x28 /* Transmitter Timeguard Register */
117#define AT91_US_TG (0xff << 0) /* Timeguard Value */
118
119#define AT91_US_FIDI 0x40 /* FI DI Ratio Register */
120#define AT91_US_NER 0x44 /* Number of Errors Register */
121#define AT91_US_IF 0x4c /* IrDA Filter Register */
122
123#endif
diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h
new file mode 100644
index 000000000000..39368e18ab20
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/board.h
@@ -0,0 +1,35 @@
1/*
2 * Platform data definitions.
3 */
4#ifndef __ASM_ARCH_BOARD_H
5#define __ASM_ARCH_BOARD_H
6
7#include <linux/types.h>
8
9/* Add basic devices: system manager, interrupt controller, portmuxes, etc. */
10void at32_add_system_devices(void);
11
12#define AT91_NR_UART 4
13extern struct platform_device *at91_default_console_device;
14
15struct platform_device *at32_add_device_usart(unsigned int id);
16
17struct eth_platform_data {
18 u8 valid;
19 u8 mii_phy_addr;
20 u8 is_rmii;
21 u8 hw_addr[6];
22};
23struct platform_device *
24at32_add_device_eth(unsigned int id, struct eth_platform_data *data);
25
26struct platform_device *at32_add_device_spi(unsigned int id);
27
28struct lcdc_platform_data {
29 unsigned long fbmem_start;
30 unsigned long fbmem_size;
31};
32struct platform_device *
33at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data);
34
35#endif /* __ASM_ARCH_BOARD_H */
diff --git a/include/asm-avr32/arch-at32ap/init.h b/include/asm-avr32/arch-at32ap/init.h
new file mode 100644
index 000000000000..43722634e069
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/init.h
@@ -0,0 +1,21 @@
1/*
2 * AT32AP platform initialization calls.
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ASM_AVR32_AT32AP_INIT_H__
11#define __ASM_AVR32_AT32AP_INIT_H__
12
13void setup_platform(void);
14
15/* Called by setup_platform */
16void at32_clock_init(void);
17void at32_portmux_init(void);
18
19void at32_setup_serial_console(unsigned int usart_id);
20
21#endif /* __ASM_AVR32_AT32AP_INIT_H__ */
diff --git a/include/asm-avr32/arch-at32ap/portmux.h b/include/asm-avr32/arch-at32ap/portmux.h
new file mode 100644
index 000000000000..4d50421262a1
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/portmux.h
@@ -0,0 +1,16 @@
1/*
2 * AT32 portmux interface.
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ASM_AVR32_AT32_PORTMUX_H__
11#define __ASM_AVR32_AT32_PORTMUX_H__
12
13void portmux_set_func(unsigned int portmux_id, unsigned int pin_id,
14 unsigned int function_id);
15
16#endif /* __ASM_AVR32_AT32_PORTMUX_H__ */
diff --git a/include/asm-avr32/arch-at32ap/sm.h b/include/asm-avr32/arch-at32ap/sm.h
new file mode 100644
index 000000000000..265a9ead20bf
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/sm.h
@@ -0,0 +1,27 @@
1/*
2 * AT32 System Manager interface.
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ASM_AVR32_AT32_SM_H__
11#define __ASM_AVR32_AT32_SM_H__
12
13struct irq_chip;
14struct platform_device;
15
16struct at32_sm {
17 spinlock_t lock;
18 void __iomem *regs;
19 struct irq_chip *eim_chip;
20 unsigned int eim_first_irq;
21 struct platform_device *pdev;
22};
23
24extern struct platform_device at32_sm_device;
25extern struct at32_sm system_manager;
26
27#endif /* __ASM_AVR32_AT32_SM_H__ */
diff --git a/include/asm-avr32/arch-at32ap/smc.h b/include/asm-avr32/arch-at32ap/smc.h
new file mode 100644
index 000000000000..3732b328303d
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/smc.h
@@ -0,0 +1,60 @@
1/*
2 * Static Memory Controller for AT32 chips
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * Inspired by the OMAP2 General-Purpose Memory Controller interface
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#ifndef __ARCH_AT32AP_SMC_H
13#define __ARCH_AT32AP_SMC_H
14
15/*
16 * All timing parameters are in nanoseconds.
17 */
18struct smc_config {
19 /* Delay from address valid to assertion of given strobe */
20 u16 ncs_read_setup;
21 u16 nrd_setup;
22 u16 ncs_write_setup;
23 u16 nwe_setup;
24
25 /* Pulse length of given strobe */
26 u16 ncs_read_pulse;
27 u16 nrd_pulse;
28 u16 ncs_write_pulse;
29 u16 nwe_pulse;
30
31 /* Total cycle length of given operation */
32 u16 read_cycle;
33 u16 write_cycle;
34
35 /* Bus width in bytes */
36 u8 bus_width;
37
38 /*
39 * 0: Data is sampled on rising edge of NCS
40 * 1: Data is sampled on rising edge of NRD
41 */
42 unsigned int nrd_controlled:1;
43
44 /*
45 * 0: Data is driven on falling edge of NCS
46 * 1: Data is driven on falling edge of NWR
47 */
48 unsigned int nwe_controlled:1;
49
50 /*
51 * 0: Byte select access type
52 * 1: Byte write access type
53 */
54 unsigned int byte_write:1;
55};
56
57extern int smc_set_configuration(int cs, const struct smc_config *config);
58extern struct smc_config *smc_get_configuration(int cs);
59
60#endif /* __ARCH_AT32AP_SMC_H */
diff --git a/include/asm-avr32/asm.h b/include/asm-avr32/asm.h
new file mode 100644
index 000000000000..515c7618952b
--- /dev/null
+++ b/include/asm-avr32/asm.h
@@ -0,0 +1,102 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_ASM_H__
9#define __ASM_AVR32_ASM_H__
10
11#include <asm/sysreg.h>
12#include <asm/asm-offsets.h>
13#include <asm/thread_info.h>
14
15#define mask_interrupts ssrf SR_GM_BIT
16#define mask_exceptions ssrf SR_EM_BIT
17#define unmask_interrupts csrf SR_GM_BIT
18#define unmask_exceptions csrf SR_EM_BIT
19
20#ifdef CONFIG_FRAME_POINTER
21 .macro save_fp
22 st.w --sp, r7
23 .endm
24 .macro restore_fp
25 ld.w r7, sp++
26 .endm
27 .macro zero_fp
28 mov r7, 0
29 .endm
30#else
31 .macro save_fp
32 .endm
33 .macro restore_fp
34 .endm
35 .macro zero_fp
36 .endm
37#endif
38 .macro get_thread_info reg
39 mov \reg, sp
40 andl \reg, ~(THREAD_SIZE - 1) & 0xffff
41 .endm
42
43 /* Save and restore registers */
44 .macro save_min sr, tmp=lr
45 pushm lr
46 mfsr \tmp, \sr
47 zero_fp
48 st.w --sp, \tmp
49 .endm
50
51 .macro restore_min sr, tmp=lr
52 ld.w \tmp, sp++
53 mtsr \sr, \tmp
54 popm lr
55 .endm
56
57 .macro save_half sr, tmp=lr
58 save_fp
59 pushm r8-r9,r10,r11,r12,lr
60 zero_fp
61 mfsr \tmp, \sr
62 st.w --sp, \tmp
63 .endm
64
65 .macro restore_half sr, tmp=lr
66 ld.w \tmp, sp++
67 mtsr \sr, \tmp
68 popm r8-r9,r10,r11,r12,lr
69 restore_fp
70 .endm
71
72 .macro save_full_user sr, tmp=lr
73 stmts --sp, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr
74 st.w --sp, lr
75 zero_fp
76 mfsr \tmp, \sr
77 st.w --sp, \tmp
78 .endm
79
80 .macro restore_full_user sr, tmp=lr
81 ld.w \tmp, sp++
82 mtsr \sr, \tmp
83 ld.w lr, sp++
84 ldmts sp++, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr
85 .endm
86
87 /* uaccess macros */
88 .macro branch_if_kernel scratch, label
89 get_thread_info \scratch
90 ld.w \scratch, \scratch[TI_flags]
91 bld \scratch, TIF_USERSPACE
92 brcc \label
93 .endm
94
95 .macro ret_if_privileged scratch, addr, size, ret
96 sub \scratch, \size, 1
97 add \scratch, \addr
98 retcs \ret
99 retmi \ret
100 .endm
101
102#endif /* __ASM_AVR32_ASM_H__ */
diff --git a/include/asm-avr32/atomic.h b/include/asm-avr32/atomic.h
new file mode 100644
index 000000000000..e0b9c44c126c
--- /dev/null
+++ b/include/asm-avr32/atomic.h
@@ -0,0 +1,201 @@
1/*
2 * Atomic operations that C can't guarantee us. Useful for
3 * resource counting etc.
4 *
5 * But use these as seldom as possible since they are slower than
6 * regular operations.
7 *
8 * Copyright (C) 2004-2006 Atmel Corporation
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14#ifndef __ASM_AVR32_ATOMIC_H
15#define __ASM_AVR32_ATOMIC_H
16
17#include <asm/system.h>
18
19typedef struct { volatile int counter; } atomic_t;
20#define ATOMIC_INIT(i) { (i) }
21
22#define atomic_read(v) ((v)->counter)
23#define atomic_set(v, i) (((v)->counter) = i)
24
25/*
26 * atomic_sub_return - subtract the atomic variable
27 * @i: integer value to subtract
28 * @v: pointer of type atomic_t
29 *
30 * Atomically subtracts @i from @v. Returns the resulting value.
31 */
32static inline int atomic_sub_return(int i, atomic_t *v)
33{
34 int result;
35
36 asm volatile(
37 "/* atomic_sub_return */\n"
38 "1: ssrf 5\n"
39 " ld.w %0, %2\n"
40 " sub %0, %3\n"
41 " stcond %1, %0\n"
42 " brne 1b"
43 : "=&r"(result), "=o"(v->counter)
44 : "m"(v->counter), "ir"(i)
45 : "cc");
46
47 return result;
48}
49
50/*
51 * atomic_add_return - add integer to atomic variable
52 * @i: integer value to add
53 * @v: pointer of type atomic_t
54 *
55 * Atomically adds @i to @v. Returns the resulting value.
56 */
57static inline int atomic_add_return(int i, atomic_t *v)
58{
59 int result;
60
61 if (__builtin_constant_p(i))
62 result = atomic_sub_return(-i, v);
63 else
64 asm volatile(
65 "/* atomic_add_return */\n"
66 "1: ssrf 5\n"
67 " ld.w %0, %1\n"
68 " add %0, %3\n"
69 " stcond %2, %0\n"
70 " brne 1b"
71 : "=&r"(result), "=o"(v->counter)
72 : "m"(v->counter), "r"(i)
73 : "cc", "memory");
74
75 return result;
76}
77
78/*
79 * atomic_sub_unless - sub unless the number is a given value
80 * @v: pointer of type atomic_t
81 * @a: the amount to add to v...
82 * @u: ...unless v is equal to u.
83 *
84 * If the atomic value v is not equal to u, this function subtracts a
85 * from v, and returns non zero. If v is equal to u then it returns
86 * zero. This is done as an atomic operation.
87*/
88static inline int atomic_sub_unless(atomic_t *v, int a, int u)
89{
90 int tmp, result = 0;
91
92 asm volatile(
93 "/* atomic_sub_unless */\n"
94 "1: ssrf 5\n"
95 " ld.w %0, %3\n"
96 " cp.w %0, %5\n"
97 " breq 1f\n"
98 " sub %0, %4\n"
99 " stcond %2, %0\n"
100 " brne 1b\n"
101 " mov %1, 1\n"
102 "1:"
103 : "=&r"(tmp), "=&r"(result), "=o"(v->counter)
104 : "m"(v->counter), "ir"(a), "ir"(u)
105 : "cc", "memory");
106
107 return result;
108}
109
110/*
111 * atomic_add_unless - add unless the number is a given value
112 * @v: pointer of type atomic_t
113 * @a: the amount to add to v...
114 * @u: ...unless v is equal to u.
115 *
116 * If the atomic value v is not equal to u, this function adds a to v,
117 * and returns non zero. If v is equal to u then it returns zero. This
118 * is done as an atomic operation.
119*/
120static inline int atomic_add_unless(atomic_t *v, int a, int u)
121{
122 int tmp, result;
123
124 if (__builtin_constant_p(a))
125 result = atomic_sub_unless(v, -a, u);
126 else {
127 result = 0;
128 asm volatile(
129 "/* atomic_add_unless */\n"
130 "1: ssrf 5\n"
131 " ld.w %0, %3\n"
132 " cp.w %0, %5\n"
133 " breq 1f\n"
134 " add %0, %4\n"
135 " stcond %2, %0\n"
136 " brne 1b\n"
137 " mov %1, 1\n"
138 "1:"
139 : "=&r"(tmp), "=&r"(result), "=o"(v->counter)
140 : "m"(v->counter), "r"(a), "ir"(u)
141 : "cc", "memory");
142 }
143
144 return result;
145}
146
147/*
148 * atomic_sub_if_positive - conditionally subtract integer from atomic variable
149 * @i: integer value to subtract
150 * @v: pointer of type atomic_t
151 *
152 * Atomically test @v and subtract @i if @v is greater or equal than @i.
153 * The function returns the old value of @v minus @i.
154 */
155static inline int atomic_sub_if_positive(int i, atomic_t *v)
156{
157 int result;
158
159 asm volatile(
160 "/* atomic_sub_if_positive */\n"
161 "1: ssrf 5\n"
162 " ld.w %0, %2\n"
163 " sub %0, %3\n"
164 " brlt 1f\n"
165 " stcond %1, %0\n"
166 " brne 1b\n"
167 "1:"
168 : "=&r"(result), "=o"(v->counter)
169 : "m"(v->counter), "ir"(i)
170 : "cc", "memory");
171
172 return result;
173}
174
175#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
176#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
177
178#define atomic_sub(i, v) (void)atomic_sub_return(i, v)
179#define atomic_add(i, v) (void)atomic_add_return(i, v)
180#define atomic_dec(v) atomic_sub(1, (v))
181#define atomic_inc(v) atomic_add(1, (v))
182
183#define atomic_dec_return(v) atomic_sub_return(1, v)
184#define atomic_inc_return(v) atomic_add_return(1, v)
185
186#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
187#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
188#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
189#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
190
191#define atomic_inc_not_zero(v) atomic_add_unless(v, 1, 0)
192#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v)
193
194#define smp_mb__before_atomic_dec() barrier()
195#define smp_mb__after_atomic_dec() barrier()
196#define smp_mb__before_atomic_inc() barrier()
197#define smp_mb__after_atomic_inc() barrier()
198
199#include <asm-generic/atomic.h>
200
201#endif /* __ASM_AVR32_ATOMIC_H */
diff --git a/include/asm-avr32/auxvec.h b/include/asm-avr32/auxvec.h
new file mode 100644
index 000000000000..d5dd435bf8f4
--- /dev/null
+++ b/include/asm-avr32/auxvec.h
@@ -0,0 +1,4 @@
1#ifndef __ASM_AVR32_AUXVEC_H
2#define __ASM_AVR32_AUXVEC_H
3
4#endif /* __ASM_AVR32_AUXVEC_H */
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
new file mode 100644
index 000000000000..5299f8c8e11d
--- /dev/null
+++ b/include/asm-avr32/bitops.h
@@ -0,0 +1,296 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_BITOPS_H
9#define __ASM_AVR32_BITOPS_H
10
11#include <asm/byteorder.h>
12#include <asm/system.h>
13
14/*
15 * clear_bit() doesn't provide any barrier for the compiler
16 */
17#define smp_mb__before_clear_bit() barrier()
18#define smp_mb__after_clear_bit() barrier()
19
20/*
21 * set_bit - Atomically set a bit in memory
22 * @nr: the bit to set
23 * @addr: the address to start counting from
24 *
25 * This function is atomic and may not be reordered. See __set_bit()
26 * if you do not require the atomic guarantees.
27 *
28 * Note that @nr may be almost arbitrarily large; this function is not
29 * restricted to acting on a single-word quantity.
30 */
31static inline void set_bit(int nr, volatile void * addr)
32{
33 unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
34 unsigned long tmp;
35
36 if (__builtin_constant_p(nr)) {
37 asm volatile(
38 "1: ssrf 5\n"
39 " ld.w %0, %2\n"
40 " sbr %0, %3\n"
41 " stcond %1, %0\n"
42 " brne 1b"
43 : "=&r"(tmp), "=o"(*p)
44 : "m"(*p), "i"(nr)
45 : "cc");
46 } else {
47 unsigned long mask = 1UL << (nr % BITS_PER_LONG);
48 asm volatile(
49 "1: ssrf 5\n"
50 " ld.w %0, %2\n"
51 " or %0, %3\n"
52 " stcond %1, %0\n"
53 " brne 1b"
54 : "=&r"(tmp), "=o"(*p)
55 : "m"(*p), "r"(mask)
56 : "cc");
57 }
58}
59
60/*
61 * clear_bit - Clears a bit in memory
62 * @nr: Bit to clear
63 * @addr: Address to start counting from
64 *
65 * clear_bit() is atomic and may not be reordered. However, it does
66 * not contain a memory barrier, so if it is used for locking purposes,
67 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
68 * in order to ensure changes are visible on other processors.
69 */
70static inline void clear_bit(int nr, volatile void * addr)
71{
72 unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
73 unsigned long tmp;
74
75 if (__builtin_constant_p(nr)) {
76 asm volatile(
77 "1: ssrf 5\n"
78 " ld.w %0, %2\n"
79 " cbr %0, %3\n"
80 " stcond %1, %0\n"
81 " brne 1b"
82 : "=&r"(tmp), "=o"(*p)
83 : "m"(*p), "i"(nr)
84 : "cc");
85 } else {
86 unsigned long mask = 1UL << (nr % BITS_PER_LONG);
87 asm volatile(
88 "1: ssrf 5\n"
89 " ld.w %0, %2\n"
90 " andn %0, %3\n"
91 " stcond %1, %0\n"
92 " brne 1b"
93 : "=&r"(tmp), "=o"(*p)
94 : "m"(*p), "r"(mask)
95 : "cc");
96 }
97}
98
99/*
100 * change_bit - Toggle a bit in memory
101 * @nr: Bit to change
102 * @addr: Address to start counting from
103 *
104 * change_bit() is atomic and may not be reordered.
105 * Note that @nr may be almost arbitrarily large; this function is not
106 * restricted to acting on a single-word quantity.
107 */
108static inline void change_bit(int nr, volatile void * addr)
109{
110 unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
111 unsigned long mask = 1UL << (nr % BITS_PER_LONG);
112 unsigned long tmp;
113
114 asm volatile(
115 "1: ssrf 5\n"
116 " ld.w %0, %2\n"
117 " eor %0, %3\n"
118 " stcond %1, %0\n"
119 " brne 1b"
120 : "=&r"(tmp), "=o"(*p)
121 : "m"(*p), "r"(mask)
122 : "cc");
123}
124
125/*
126 * test_and_set_bit - Set a bit and return its old value
127 * @nr: Bit to set
128 * @addr: Address to count from
129 *
130 * This operation is atomic and cannot be reordered.
131 * It also implies a memory barrier.
132 */
133static inline int test_and_set_bit(int nr, volatile void * addr)
134{
135 unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
136 unsigned long mask = 1UL << (nr % BITS_PER_LONG);
137 unsigned long tmp, old;
138
139 if (__builtin_constant_p(nr)) {
140 asm volatile(
141 "1: ssrf 5\n"
142 " ld.w %0, %3\n"
143 " mov %2, %0\n"
144 " sbr %0, %4\n"
145 " stcond %1, %0\n"
146 " brne 1b"
147 : "=&r"(tmp), "=o"(*p), "=&r"(old)
148 : "m"(*p), "i"(nr)
149 : "memory", "cc");
150 } else {
151 asm volatile(
152 "1: ssrf 5\n"
153 " ld.w %2, %3\n"
154 " or %0, %2, %4\n"
155 " stcond %1, %0\n"
156 " brne 1b"
157 : "=&r"(tmp), "=o"(*p), "=&r"(old)
158 : "m"(*p), "r"(mask)
159 : "memory", "cc");
160 }
161
162 return (old & mask) != 0;
163}
164
165/*
166 * test_and_clear_bit - Clear a bit and return its old value
167 * @nr: Bit to clear
168 * @addr: Address to count from
169 *
170 * This operation is atomic and cannot be reordered.
171 * It also implies a memory barrier.
172 */
173static inline int test_and_clear_bit(int nr, volatile void * addr)
174{
175 unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
176 unsigned long mask = 1UL << (nr % BITS_PER_LONG);
177 unsigned long tmp, old;
178
179 if (__builtin_constant_p(nr)) {
180 asm volatile(
181 "1: ssrf 5\n"
182 " ld.w %0, %3\n"
183 " mov %2, %0\n"
184 " cbr %0, %4\n"
185 " stcond %1, %0\n"
186 " brne 1b"
187 : "=&r"(tmp), "=o"(*p), "=&r"(old)
188 : "m"(*p), "i"(nr)
189 : "memory", "cc");
190 } else {
191 asm volatile(
192 "1: ssrf 5\n"
193 " ld.w %0, %3\n"
194 " mov %2, %0\n"
195 " andn %0, %4\n"
196 " stcond %1, %0\n"
197 " brne 1b"
198 : "=&r"(tmp), "=o"(*p), "=&r"(old)
199 : "m"(*p), "r"(mask)
200 : "memory", "cc");
201 }
202
203 return (old & mask) != 0;
204}
205
206/*
207 * test_and_change_bit - Change a bit and return its old value
208 * @nr: Bit to change
209 * @addr: Address to count from
210 *
211 * This operation is atomic and cannot be reordered.
212 * It also implies a memory barrier.
213 */
214static inline int test_and_change_bit(int nr, volatile void * addr)
215{
216 unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
217 unsigned long mask = 1UL << (nr % BITS_PER_LONG);
218 unsigned long tmp, old;
219
220 asm volatile(
221 "1: ssrf 5\n"
222 " ld.w %2, %3\n"
223 " eor %0, %2, %4\n"
224 " stcond %1, %0\n"
225 " brne 1b"
226 : "=&r"(tmp), "=o"(*p), "=&r"(old)
227 : "m"(*p), "r"(mask)
228 : "memory", "cc");
229
230 return (old & mask) != 0;
231}
232
233#include <asm-generic/bitops/non-atomic.h>
234
235/* Find First bit Set */
236static inline unsigned long __ffs(unsigned long word)
237{
238 unsigned long result;
239
240 asm("brev %1\n\t"
241 "clz %0,%1"
242 : "=r"(result), "=&r"(word)
243 : "1"(word));
244 return result;
245}
246
247/* Find First Zero */
248static inline unsigned long ffz(unsigned long word)
249{
250 return __ffs(~word);
251}
252
253/* Find Last bit Set */
254static inline int fls(unsigned long word)
255{
256 unsigned long result;
257
258 asm("clz %0,%1" : "=r"(result) : "r"(word));
259 return 32 - result;
260}
261
262unsigned long find_first_zero_bit(const unsigned long *addr,
263 unsigned long size);
264unsigned long find_next_zero_bit(const unsigned long *addr,
265 unsigned long size,
266 unsigned long offset);
267unsigned long find_first_bit(const unsigned long *addr,
268 unsigned long size);
269unsigned long find_next_bit(const unsigned long *addr,
270 unsigned long size,
271 unsigned long offset);
272
273/*
274 * ffs: find first bit set. This is defined the same way as
275 * the libc and compiler builtin ffs routines, therefore
276 * differs in spirit from the above ffz (man ffs).
277 *
278 * The difference is that bit numbering starts at 1, and if no bit is set,
279 * the function returns 0.
280 */
281static inline int ffs(unsigned long word)
282{
283 if(word == 0)
284 return 0;
285 return __ffs(word) + 1;
286}
287
288#include <asm-generic/bitops/fls64.h>
289#include <asm-generic/bitops/sched.h>
290#include <asm-generic/bitops/hweight.h>
291
292#include <asm-generic/bitops/ext2-non-atomic.h>
293#include <asm-generic/bitops/ext2-atomic.h>
294#include <asm-generic/bitops/minix-le.h>
295
296#endif /* __ASM_AVR32_BITOPS_H */
diff --git a/include/asm-avr32/bug.h b/include/asm-avr32/bug.h
new file mode 100644
index 000000000000..521766bc9366
--- /dev/null
+++ b/include/asm-avr32/bug.h
@@ -0,0 +1,47 @@
1/*
2 * Copyright (C) 2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_BUG_H
9#define __ASM_AVR32_BUG_H
10
11#ifdef CONFIG_BUG
12
13/*
14 * According to our Chief Architect, this compact opcode is very
15 * unlikely to ever be implemented.
16 */
17#define AVR32_BUG_OPCODE 0x5df0
18
19#ifdef CONFIG_DEBUG_BUGVERBOSE
20
21#define BUG() \
22 do { \
23 asm volatile(".hword %0\n\t" \
24 ".hword %1\n\t" \
25 ".long %2" \
26 : \
27 : "n"(AVR32_BUG_OPCODE), \
28 "i"(__LINE__), "X"(__FILE__)); \
29 } while (0)
30
31#else
32
33#define BUG() \
34 do { \
35 asm volatile(".hword %0\n\t" \
36 : : "n"(AVR32_BUG_OPCODE)); \
37 } while (0)
38
39#endif /* CONFIG_DEBUG_BUGVERBOSE */
40
41#define HAVE_ARCH_BUG
42
43#endif /* CONFIG_BUG */
44
45#include <asm-generic/bug.h>
46
47#endif /* __ASM_AVR32_BUG_H */
diff --git a/include/asm-avr32/bugs.h b/include/asm-avr32/bugs.h
new file mode 100644
index 000000000000..7635e770622e
--- /dev/null
+++ b/include/asm-avr32/bugs.h
@@ -0,0 +1,15 @@
1/*
2 * This is included by init/main.c to check for architecture-dependent bugs.
3 *
4 * Needs:
5 * void check_bugs(void);
6 */
7#ifndef __ASM_AVR32_BUGS_H
8#define __ASM_AVR32_BUGS_H
9
10static void __init check_bugs(void)
11{
12 cpu_data->loops_per_jiffy = loops_per_jiffy;
13}
14
15#endif /* __ASM_AVR32_BUGS_H */
diff --git a/include/asm-avr32/byteorder.h b/include/asm-avr32/byteorder.h
new file mode 100644
index 000000000000..402ff4125cdc
--- /dev/null
+++ b/include/asm-avr32/byteorder.h
@@ -0,0 +1,25 @@
1/*
2 * AVR32 endian-conversion functions.
3 */
4#ifndef __ASM_AVR32_BYTEORDER_H
5#define __ASM_AVR32_BYTEORDER_H
6
7#include <asm/types.h>
8#include <linux/compiler.h>
9
10#ifdef __CHECKER__
11extern unsigned long __builtin_bswap_32(unsigned long x);
12extern unsigned short __builtin_bswap_16(unsigned short x);
13#endif
14
15#define __arch__swab32(x) __builtin_bswap_32(x)
16#define __arch__swab16(x) __builtin_bswap_16(x)
17
18#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
19# define __BYTEORDER_HAS_U64__
20# define __SWAB_64_THRU_32__
21#endif
22
23#include <linux/byteorder/big_endian.h>
24
25#endif /* __ASM_AVR32_BYTEORDER_H */
diff --git a/include/asm-avr32/cache.h b/include/asm-avr32/cache.h
new file mode 100644
index 000000000000..dabb955f3c00
--- /dev/null
+++ b/include/asm-avr32/cache.h
@@ -0,0 +1,29 @@
1#ifndef __ASM_AVR32_CACHE_H
2#define __ASM_AVR32_CACHE_H
3
4#define L1_CACHE_SHIFT 5
5#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
6
7#ifndef __ASSEMBLER__
8struct cache_info {
9 unsigned int ways;
10 unsigned int sets;
11 unsigned int linesz;
12};
13#endif /* __ASSEMBLER */
14
15/* Cache operation constants */
16#define ICACHE_FLUSH 0x00
17#define ICACHE_INVALIDATE 0x01
18#define ICACHE_LOCK 0x02
19#define ICACHE_UNLOCK 0x03
20#define ICACHE_PREFETCH 0x04
21
22#define DCACHE_FLUSH 0x08
23#define DCACHE_LOCK 0x09
24#define DCACHE_UNLOCK 0x0a
25#define DCACHE_INVALIDATE 0x0b
26#define DCACHE_CLEAN 0x0c
27#define DCACHE_CLEAN_INVAL 0x0d
28
29#endif /* __ASM_AVR32_CACHE_H */
diff --git a/include/asm-avr32/cachectl.h b/include/asm-avr32/cachectl.h
new file mode 100644
index 000000000000..4faf1ce60061
--- /dev/null
+++ b/include/asm-avr32/cachectl.h
@@ -0,0 +1,11 @@
1#ifndef __ASM_AVR32_CACHECTL_H
2#define __ASM_AVR32_CACHECTL_H
3
4/*
5 * Operations that can be performed through the cacheflush system call
6 */
7
8/* Clean the data cache, then invalidate the icache */
9#define CACHE_IFLUSH 0
10
11#endif /* __ASM_AVR32_CACHECTL_H */
diff --git a/include/asm-avr32/cacheflush.h b/include/asm-avr32/cacheflush.h
new file mode 100644
index 000000000000..f1bf1708980e
--- /dev/null
+++ b/include/asm-avr32/cacheflush.h
@@ -0,0 +1,129 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_CACHEFLUSH_H
9#define __ASM_AVR32_CACHEFLUSH_H
10
11/* Keep includes the same across arches. */
12#include <linux/mm.h>
13
14#define CACHE_OP_ICACHE_INVALIDATE 0x01
15#define CACHE_OP_DCACHE_INVALIDATE 0x0b
16#define CACHE_OP_DCACHE_CLEAN 0x0c
17#define CACHE_OP_DCACHE_CLEAN_INVAL 0x0d
18
19/*
20 * Invalidate any cacheline containing virtual address vaddr without
21 * writing anything back to memory.
22 *
23 * Note that this function may corrupt unrelated data structures when
24 * applied on buffers that are not cacheline aligned in both ends.
25 */
26static inline void invalidate_dcache_line(void *vaddr)
27{
28 asm volatile("cache %0[0], %1"
29 :
30 : "r"(vaddr), "n"(CACHE_OP_DCACHE_INVALIDATE)
31 : "memory");
32}
33
34/*
35 * Make sure any cacheline containing virtual address vaddr is written
36 * to memory.
37 */
38static inline void clean_dcache_line(void *vaddr)
39{
40 asm volatile("cache %0[0], %1"
41 :
42 : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN)
43 : "memory");
44}
45
46/*
47 * Make sure any cacheline containing virtual address vaddr is written
48 * to memory and then invalidate it.
49 */
50static inline void flush_dcache_line(void *vaddr)
51{
52 asm volatile("cache %0[0], %1"
53 :
54 : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN_INVAL)
55 : "memory");
56}
57
58/*
59 * Invalidate any instruction cacheline containing virtual address
60 * vaddr.
61 */
62static inline void invalidate_icache_line(void *vaddr)
63{
64 asm volatile("cache %0[0], %1"
65 :
66 : "r"(vaddr), "n"(CACHE_OP_ICACHE_INVALIDATE)
67 : "memory");
68}
69
70/*
71 * Applies the above functions on all lines that are touched by the
72 * specified virtual address range.
73 */
74void invalidate_dcache_region(void *start, size_t len);
75void clean_dcache_region(void *start, size_t len);
76void flush_dcache_region(void *start, size_t len);
77void invalidate_icache_region(void *start, size_t len);
78
79/*
80 * Make sure any pending writes are completed before continuing.
81 */
82#define flush_write_buffer() asm volatile("sync 0" : : : "memory")
83
84/*
85 * The following functions are called when a virtual mapping changes.
86 * We do not need to flush anything in this case.
87 */
88#define flush_cache_all() do { } while (0)
89#define flush_cache_mm(mm) do { } while (0)
90#define flush_cache_range(vma, start, end) do { } while (0)
91#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
92#define flush_cache_vmap(start, end) do { } while (0)
93#define flush_cache_vunmap(start, end) do { } while (0)
94
95/*
96 * I think we need to implement this one to be able to reliably
97 * execute pages from RAMDISK. However, if we implement the
98 * flush_dcache_*() functions, it might not be needed anymore.
99 *
100 * #define flush_icache_page(vma, page) do { } while (0)
101 */
102extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
103
104/*
105 * These are (I think) related to D-cache aliasing. We might need to
106 * do something here, but only for certain configurations. No such
107 * configurations exist at this time.
108 */
109#define flush_dcache_page(page) do { } while (0)
110#define flush_dcache_mmap_lock(page) do { } while (0)
111#define flush_dcache_mmap_unlock(page) do { } while (0)
112
113/*
114 * These are for I/D cache coherency. In this case, we do need to
115 * flush with all configurations.
116 */
117extern void flush_icache_range(unsigned long start, unsigned long end);
118extern void flush_icache_user_range(struct vm_area_struct *vma,
119 struct page *page,
120 unsigned long addr, int len);
121
122#define copy_to_user_page(vma, page, vaddr, dst, src, len) do { \
123 memcpy(dst, src, len); \
124 flush_icache_user_range(vma, page, vaddr, len); \
125} while(0)
126#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
127 memcpy(dst, src, len)
128
129#endif /* __ASM_AVR32_CACHEFLUSH_H */
diff --git a/include/asm-avr32/checksum.h b/include/asm-avr32/checksum.h
new file mode 100644
index 000000000000..41b7af09edc4
--- /dev/null
+++ b/include/asm-avr32/checksum.h
@@ -0,0 +1,156 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_CHECKSUM_H
9#define __ASM_AVR32_CHECKSUM_H
10
11/*
12 * computes the checksum of a memory block at buff, length len,
13 * and adds in "sum" (32-bit)
14 *
15 * returns a 32-bit number suitable for feeding into itself
16 * or csum_tcpudp_magic
17 *
18 * this function must be called with even lengths, except
19 * for the last fragment, which may be odd
20 *
21 * it's best to have buff aligned on a 32-bit boundary
22 */
23unsigned int csum_partial(const unsigned char * buff, int len,
24 unsigned int sum);
25
26/*
27 * the same as csum_partial, but copies from src while it
28 * checksums, and handles user-space pointer exceptions correctly, when needed.
29 *
30 * here even more important to align src and dst on a 32-bit (or even
31 * better 64-bit) boundary
32 */
33unsigned int csum_partial_copy_generic(const char *src, char *dst, int len,
34 int sum, int *src_err_ptr,
35 int *dst_err_ptr);
36
37/*
38 * Note: when you get a NULL pointer exception here this means someone
39 * passed in an incorrect kernel address to one of these functions.
40 *
41 * If you use these functions directly please don't forget the
42 * verify_area().
43 */
44static inline
45unsigned int csum_partial_copy_nocheck(const char *src, char *dst,
46 int len, int sum)
47{
48 return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
49}
50
51static inline
52unsigned int csum_partial_copy_from_user (const char __user *src, char *dst,
53 int len, int sum, int *err_ptr)
54{
55 return csum_partial_copy_generic((const char __force *)src, dst, len,
56 sum, err_ptr, NULL);
57}
58
59/*
60 * This is a version of ip_compute_csum() optimized for IP headers,
61 * which always checksum on 4 octet boundaries.
62 */
63static inline unsigned short ip_fast_csum(unsigned char *iph,
64 unsigned int ihl)
65{
66 unsigned int sum, tmp;
67
68 __asm__ __volatile__(
69 " ld.w %0, %1++\n"
70 " ld.w %3, %1++\n"
71 " sub %2, 4\n"
72 " add %0, %3\n"
73 " ld.w %3, %1++\n"
74 " adc %0, %0, %3\n"
75 " ld.w %3, %1++\n"
76 " adc %0, %0, %3\n"
77 " acr %0\n"
78 "1: ld.w %3, %1++\n"
79 " add %0, %3\n"
80 " acr %0\n"
81 " sub %2, 1\n"
82 " brne 1b\n"
83 " lsl %3, %0, 16\n"
84 " andl %0, 0\n"
85 " mov %2, 0xffff\n"
86 " add %0, %3\n"
87 " adc %0, %0, %2\n"
88 " com %0\n"
89 " lsr %0, 16\n"
90 : "=r"(sum), "=r"(iph), "=r"(ihl), "=r"(tmp)
91 : "1"(iph), "2"(ihl)
92 : "memory", "cc");
93 return sum;
94}
95
96/*
97 * Fold a partial checksum
98 */
99
100static inline unsigned int csum_fold(unsigned int sum)
101{
102 unsigned int tmp;
103
104 asm(" bfextu %1, %0, 0, 16\n"
105 " lsr %0, 16\n"
106 " add %0, %1\n"
107 " bfextu %1, %0, 16, 16\n"
108 " add %0, %1"
109 : "=&r"(sum), "=&r"(tmp)
110 : "0"(sum));
111
112 return ~sum;
113}
114
115static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
116 unsigned long daddr,
117 unsigned short len,
118 unsigned short proto,
119 unsigned int sum)
120{
121 asm(" add %0, %1\n"
122 " adc %0, %0, %2\n"
123 " adc %0, %0, %3\n"
124 " acr %0"
125 : "=r"(sum)
126 : "r"(daddr), "r"(saddr), "r"(ntohs(len) | (proto << 16)),
127 "0"(sum)
128 : "cc");
129
130 return sum;
131}
132
133/*
134 * computes the checksum of the TCP/UDP pseudo-header
135 * returns a 16-bit checksum, already complemented
136 */
137static inline unsigned short int csum_tcpudp_magic(unsigned long saddr,
138 unsigned long daddr,
139 unsigned short len,
140 unsigned short proto,
141 unsigned int sum)
142{
143 return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
144}
145
146/*
147 * this routine is used for miscellaneous IP-like checksums, mainly
148 * in icmp.c
149 */
150
151static inline unsigned short ip_compute_csum(unsigned char * buff, int len)
152{
153 return csum_fold(csum_partial(buff, len, 0));
154}
155
156#endif /* __ASM_AVR32_CHECKSUM_H */
diff --git a/include/asm-avr32/cputime.h b/include/asm-avr32/cputime.h
new file mode 100644
index 000000000000..e87e0f81cbeb
--- /dev/null
+++ b/include/asm-avr32/cputime.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_CPUTIME_H
2#define __ASM_AVR32_CPUTIME_H
3
4#include <asm-generic/cputime.h>
5
6#endif /* __ASM_AVR32_CPUTIME_H */
diff --git a/include/asm-avr32/current.h b/include/asm-avr32/current.h
new file mode 100644
index 000000000000..c7b0549eab8a
--- /dev/null
+++ b/include/asm-avr32/current.h
@@ -0,0 +1,15 @@
1#ifndef __ASM_AVR32_CURRENT_H
2#define __ASM_AVR32_CURRENT_H
3
4#include <linux/thread_info.h>
5
6struct task_struct;
7
8inline static struct task_struct * get_current(void)
9{
10 return current_thread_info()->task;
11}
12
13#define current get_current()
14
15#endif /* __ASM_AVR32_CURRENT_H */
diff --git a/include/asm-avr32/delay.h b/include/asm-avr32/delay.h
new file mode 100644
index 000000000000..cc3b2e3343b3
--- /dev/null
+++ b/include/asm-avr32/delay.h
@@ -0,0 +1,26 @@
1#ifndef __ASM_AVR32_DELAY_H
2#define __ASM_AVR32_DELAY_H
3
4/*
5 * Copyright (C) 1993 Linus Torvalds
6 *
7 * Delay routines calling functions in arch/avr32/lib/delay.c
8 */
9
10extern void __bad_udelay(void);
11extern void __bad_ndelay(void);
12
13extern void __udelay(unsigned long usecs);
14extern void __ndelay(unsigned long nsecs);
15extern void __const_udelay(unsigned long usecs);
16extern void __delay(unsigned long loops);
17
18#define udelay(n) (__builtin_constant_p(n) ? \
19 ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
20 __udelay(n))
21
22#define ndelay(n) (__builtin_constant_p(n) ? \
23 ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
24 __ndelay(n))
25
26#endif /* __ASM_AVR32_DELAY_H */
diff --git a/include/asm-avr32/div64.h b/include/asm-avr32/div64.h
new file mode 100644
index 000000000000..d7ddd4fdeca6
--- /dev/null
+++ b/include/asm-avr32/div64.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_DIV64_H
2#define __ASM_AVR32_DIV64_H
3
4#include <asm-generic/div64.h>
5
6#endif /* __ASM_AVR32_DIV64_H */
diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h
new file mode 100644
index 000000000000..4c40cb41cdf8
--- /dev/null
+++ b/include/asm-avr32/dma-mapping.h
@@ -0,0 +1,320 @@
1#ifndef __ASM_AVR32_DMA_MAPPING_H
2#define __ASM_AVR32_DMA_MAPPING_H
3
4#include <linux/mm.h>
5#include <linux/device.h>
6#include <asm/scatterlist.h>
7#include <asm/processor.h>
8#include <asm/cacheflush.h>
9#include <asm/io.h>
10
11extern void dma_cache_sync(void *vaddr, size_t size, int direction);
12
13/*
14 * Return whether the given device DMA address mask can be supported
15 * properly. For example, if your device can only drive the low 24-bits
16 * during bus mastering, then you would pass 0x00ffffff as the mask
17 * to this function.
18 */
19static inline int dma_supported(struct device *dev, u64 mask)
20{
21 /* Fix when needed. I really don't know of any limitations */
22 return 1;
23}
24
25static inline int dma_set_mask(struct device *dev, u64 dma_mask)
26{
27 if (!dev->dma_mask || !dma_supported(dev, dma_mask))
28 return -EIO;
29
30 *dev->dma_mask = dma_mask;
31 return 0;
32}
33
34/**
35 * dma_alloc_coherent - allocate consistent memory for DMA
36 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
37 * @size: required memory size
38 * @handle: bus-specific DMA address
39 *
40 * Allocate some uncached, unbuffered memory for a device for
41 * performing DMA. This function allocates pages, and will
42 * return the CPU-viewed address, and sets @handle to be the
43 * device-viewed address.
44 */
45extern void *dma_alloc_coherent(struct device *dev, size_t size,
46 dma_addr_t *handle, gfp_t gfp);
47
48/**
49 * dma_free_coherent - free memory allocated by dma_alloc_coherent
50 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
51 * @size: size of memory originally requested in dma_alloc_coherent
52 * @cpu_addr: CPU-view address returned from dma_alloc_coherent
53 * @handle: device-view address returned from dma_alloc_coherent
54 *
55 * Free (and unmap) a DMA buffer previously allocated by
56 * dma_alloc_coherent().
57 *
58 * References to memory and mappings associated with cpu_addr/handle
59 * during and after this call executing are illegal.
60 */
61extern void dma_free_coherent(struct device *dev, size_t size,
62 void *cpu_addr, dma_addr_t handle);
63
64/**
65 * dma_alloc_writecombine - allocate write-combining memory for DMA
66 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
67 * @size: required memory size
68 * @handle: bus-specific DMA address
69 *
70 * Allocate some uncached, buffered memory for a device for
71 * performing DMA. This function allocates pages, and will
72 * return the CPU-viewed address, and sets @handle to be the
73 * device-viewed address.
74 */
75extern void *dma_alloc_writecombine(struct device *dev, size_t size,
76 dma_addr_t *handle, gfp_t gfp);
77
78/**
79 * dma_free_coherent - free memory allocated by dma_alloc_writecombine
80 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
81 * @size: size of memory originally requested in dma_alloc_writecombine
82 * @cpu_addr: CPU-view address returned from dma_alloc_writecombine
83 * @handle: device-view address returned from dma_alloc_writecombine
84 *
85 * Free (and unmap) a DMA buffer previously allocated by
86 * dma_alloc_writecombine().
87 *
88 * References to memory and mappings associated with cpu_addr/handle
89 * during and after this call executing are illegal.
90 */
91extern void dma_free_writecombine(struct device *dev, size_t size,
92 void *cpu_addr, dma_addr_t handle);
93
94/**
95 * dma_map_single - map a single buffer for streaming DMA
96 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
97 * @cpu_addr: CPU direct mapped address of buffer
98 * @size: size of buffer to map
99 * @dir: DMA transfer direction
100 *
101 * Ensure that any data held in the cache is appropriately discarded
102 * or written back.
103 *
104 * The device owns this memory once this call has completed. The CPU
105 * can regain ownership by calling dma_unmap_single() or dma_sync_single().
106 */
107static inline dma_addr_t
108dma_map_single(struct device *dev, void *cpu_addr, size_t size,
109 enum dma_data_direction direction)
110{
111 dma_cache_sync(cpu_addr, size, direction);
112 return virt_to_bus(cpu_addr);
113}
114
115/**
116 * dma_unmap_single - unmap a single buffer previously mapped
117 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
118 * @handle: DMA address of buffer
119 * @size: size of buffer to map
120 * @dir: DMA transfer direction
121 *
122 * Unmap a single streaming mode DMA translation. The handle and size
123 * must match what was provided in the previous dma_map_single() call.
124 * All other usages are undefined.
125 *
126 * After this call, reads by the CPU to the buffer are guaranteed to see
127 * whatever the device wrote there.
128 */
129static inline void
130dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
131 enum dma_data_direction direction)
132{
133
134}
135
136/**
137 * dma_map_page - map a portion of a page for streaming DMA
138 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
139 * @page: page that buffer resides in
140 * @offset: offset into page for start of buffer
141 * @size: size of buffer to map
142 * @dir: DMA transfer direction
143 *
144 * Ensure that any data held in the cache is appropriately discarded
145 * or written back.
146 *
147 * The device owns this memory once this call has completed. The CPU
148 * can regain ownership by calling dma_unmap_page() or dma_sync_single().
149 */
150static inline dma_addr_t
151dma_map_page(struct device *dev, struct page *page,
152 unsigned long offset, size_t size,
153 enum dma_data_direction direction)
154{
155 return dma_map_single(dev, page_address(page) + offset,
156 size, direction);
157}
158
159/**
160 * dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
161 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
162 * @handle: DMA address of buffer
163 * @size: size of buffer to map
164 * @dir: DMA transfer direction
165 *
166 * Unmap a single streaming mode DMA translation. The handle and size
167 * must match what was provided in the previous dma_map_single() call.
168 * All other usages are undefined.
169 *
170 * After this call, reads by the CPU to the buffer are guaranteed to see
171 * whatever the device wrote there.
172 */
173static inline void
174dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
175 enum dma_data_direction direction)
176{
177 dma_unmap_single(dev, dma_address, size, direction);
178}
179
180/**
181 * dma_map_sg - map a set of SG buffers for streaming mode DMA
182 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
183 * @sg: list of buffers
184 * @nents: number of buffers to map
185 * @dir: DMA transfer direction
186 *
187 * Map a set of buffers described by scatterlist in streaming
188 * mode for DMA. This is the scatter-gather version of the
189 * above pci_map_single interface. Here the scatter gather list
190 * elements are each tagged with the appropriate dma address
191 * and length. They are obtained via sg_dma_{address,length}(SG).
192 *
193 * NOTE: An implementation may be able to use a smaller number of
194 * DMA address/length pairs than there are SG table elements.
195 * (for example via virtual mapping capabilities)
196 * The routine returns the number of addr/length pairs actually
197 * used, at most nents.
198 *
199 * Device ownership issues as mentioned above for pci_map_single are
200 * the same here.
201 */
202static inline int
203dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
204 enum dma_data_direction direction)
205{
206 int i;
207
208 for (i = 0; i < nents; i++) {
209 char *virt;
210
211 sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
212 virt = page_address(sg[i].page) + sg[i].offset;
213 dma_cache_sync(virt, sg[i].length, direction);
214 }
215
216 return nents;
217}
218
219/**
220 * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
221 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
222 * @sg: list of buffers
223 * @nents: number of buffers to map
224 * @dir: DMA transfer direction
225 *
226 * Unmap a set of streaming mode DMA translations.
227 * Again, CPU read rules concerning calls here are the same as for
228 * pci_unmap_single() above.
229 */
230static inline void
231dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
232 enum dma_data_direction direction)
233{
234
235}
236
237/**
238 * dma_sync_single_for_cpu
239 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
240 * @handle: DMA address of buffer
241 * @size: size of buffer to map
242 * @dir: DMA transfer direction
243 *
244 * Make physical memory consistent for a single streaming mode DMA
245 * translation after a transfer.
246 *
247 * If you perform a dma_map_single() but wish to interrogate the
248 * buffer using the cpu, yet do not wish to teardown the DMA mapping,
249 * you must call this function before doing so. At the next point you
250 * give the DMA address back to the card, you must first perform a
251 * dma_sync_single_for_device, and then the device again owns the
252 * buffer.
253 */
254static inline void
255dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
256 size_t size, enum dma_data_direction direction)
257{
258 dma_cache_sync(bus_to_virt(dma_handle), size, direction);
259}
260
261static inline void
262dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
263 size_t size, enum dma_data_direction direction)
264{
265 dma_cache_sync(bus_to_virt(dma_handle), size, direction);
266}
267
268/**
269 * dma_sync_sg_for_cpu
270 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
271 * @sg: list of buffers
272 * @nents: number of buffers to map
273 * @dir: DMA transfer direction
274 *
275 * Make physical memory consistent for a set of streaming
276 * mode DMA translations after a transfer.
277 *
278 * The same as dma_sync_single_for_* but for a scatter-gather list,
279 * same rules and usage.
280 */
281static inline void
282dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
283 int nents, enum dma_data_direction direction)
284{
285 int i;
286
287 for (i = 0; i < nents; i++) {
288 dma_cache_sync(page_address(sg[i].page) + sg[i].offset,
289 sg[i].length, direction);
290 }
291}
292
293static inline void
294dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
295 int nents, enum dma_data_direction direction)
296{
297 int i;
298
299 for (i = 0; i < nents; i++) {
300 dma_cache_sync(page_address(sg[i].page) + sg[i].offset,
301 sg[i].length, direction);
302 }
303}
304
305/* Now for the API extensions over the pci_ one */
306
307#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
308#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
309
310static inline int dma_is_consistent(dma_addr_t dma_addr)
311{
312 return 1;
313}
314
315static inline int dma_get_cache_alignment(void)
316{
317 return boot_cpu_data.dcache.linesz;
318}
319
320#endif /* __ASM_AVR32_DMA_MAPPING_H */
diff --git a/include/asm-avr32/dma.h b/include/asm-avr32/dma.h
new file mode 100644
index 000000000000..9e91205590ac
--- /dev/null
+++ b/include/asm-avr32/dma.h
@@ -0,0 +1,8 @@
1#ifndef __ASM_AVR32_DMA_H
2#define __ASM_AVR32_DMA_H
3
4/* The maximum address that we can perform a DMA transfer to on this platform.
5 * Not really applicable to AVR32, but some functions need it. */
6#define MAX_DMA_ADDRESS 0xffffffff
7
8#endif /* __ASM_AVR32_DMA_H */
diff --git a/include/asm-avr32/elf.h b/include/asm-avr32/elf.h
new file mode 100644
index 000000000000..d334b4994d2d
--- /dev/null
+++ b/include/asm-avr32/elf.h
@@ -0,0 +1,110 @@
1#ifndef __ASM_AVR32_ELF_H
2#define __ASM_AVR32_ELF_H
3
4/* AVR32 relocation numbers */
5#define R_AVR32_NONE 0
6#define R_AVR32_32 1
7#define R_AVR32_16 2
8#define R_AVR32_8 3
9#define R_AVR32_32_PCREL 4
10#define R_AVR32_16_PCREL 5
11#define R_AVR32_8_PCREL 6
12#define R_AVR32_DIFF32 7
13#define R_AVR32_DIFF16 8
14#define R_AVR32_DIFF8 9
15#define R_AVR32_GOT32 10
16#define R_AVR32_GOT16 11
17#define R_AVR32_GOT8 12
18#define R_AVR32_21S 13
19#define R_AVR32_16U 14
20#define R_AVR32_16S 15
21#define R_AVR32_8S 16
22#define R_AVR32_8S_EXT 17
23#define R_AVR32_22H_PCREL 18
24#define R_AVR32_18W_PCREL 19
25#define R_AVR32_16B_PCREL 20
26#define R_AVR32_16N_PCREL 21
27#define R_AVR32_14UW_PCREL 22
28#define R_AVR32_11H_PCREL 23
29#define R_AVR32_10UW_PCREL 24
30#define R_AVR32_9H_PCREL 25
31#define R_AVR32_9UW_PCREL 26
32#define R_AVR32_HI16 27
33#define R_AVR32_LO16 28
34#define R_AVR32_GOTPC 29
35#define R_AVR32_GOTCALL 30
36#define R_AVR32_LDA_GOT 31
37#define R_AVR32_GOT21S 32
38#define R_AVR32_GOT18SW 33
39#define R_AVR32_GOT16S 34
40#define R_AVR32_GOT7UW 35
41#define R_AVR32_32_CPENT 36
42#define R_AVR32_CPCALL 37
43#define R_AVR32_16_CP 38
44#define R_AVR32_9W_CP 39
45#define R_AVR32_RELATIVE 40
46#define R_AVR32_GLOB_DAT 41
47#define R_AVR32_JMP_SLOT 42
48#define R_AVR32_ALIGN 43
49
50/*
51 * ELF register definitions..
52 */
53
54#include <asm/ptrace.h>
55#include <asm/user.h>
56
57typedef unsigned long elf_greg_t;
58
59#define ELF_NGREG (sizeof (struct pt_regs) / sizeof (elf_greg_t))
60typedef elf_greg_t elf_gregset_t[ELF_NGREG];
61
62typedef struct user_fpu_struct elf_fpregset_t;
63
64/*
65 * This is used to ensure we don't load something for the wrong architecture.
66 */
67#define elf_check_arch(x) ( (x)->e_machine == EM_AVR32 )
68
69/*
70 * These are used to set parameters in the core dumps.
71 */
72#define ELF_CLASS ELFCLASS32
73#ifdef __LITTLE_ENDIAN__
74#define ELF_DATA ELFDATA2LSB
75#else
76#define ELF_DATA ELFDATA2MSB
77#endif
78#define ELF_ARCH EM_AVR32
79
80#define USE_ELF_CORE_DUMP
81#define ELF_EXEC_PAGESIZE 4096
82
83/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
84 use of this is to invoke "./ld.so someprog" to test out a new version of
85 the loader. We need to make sure that it is out of the way of the program
86 that it will "exec", and that there is sufficient room for the brk. */
87
88#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
89
90
91/* This yields a mask that user programs can use to figure out what
92 instruction set this CPU supports. This could be done in user space,
93 but it's not easy, and we've already done it here. */
94
95#define ELF_HWCAP (0)
96
97/* This yields a string that ld.so will use to load implementation
98 specific libraries for optimization. This is more specific in
99 intent than poking at uname or /proc/cpuinfo.
100
101 For the moment, we have only optimizations for the Intel generations,
102 but that could change... */
103
104#define ELF_PLATFORM (NULL)
105
106#ifdef __KERNEL__
107#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT)
108#endif
109
110#endif /* __ASM_AVR32_ELF_H */
diff --git a/include/asm-avr32/emergency-restart.h b/include/asm-avr32/emergency-restart.h
new file mode 100644
index 000000000000..3e7e014776ba
--- /dev/null
+++ b/include/asm-avr32/emergency-restart.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_EMERGENCY_RESTART_H
2#define __ASM_AVR32_EMERGENCY_RESTART_H
3
4#include <asm-generic/emergency-restart.h>
5
6#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */
diff --git a/include/asm-avr32/errno.h b/include/asm-avr32/errno.h
new file mode 100644
index 000000000000..558a7249f06d
--- /dev/null
+++ b/include/asm-avr32/errno.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_ERRNO_H
2#define __ASM_AVR32_ERRNO_H
3
4#include <asm-generic/errno.h>
5
6#endif /* __ASM_AVR32_ERRNO_H */
diff --git a/include/asm-avr32/fcntl.h b/include/asm-avr32/fcntl.h
new file mode 100644
index 000000000000..14c0c4402b11
--- /dev/null
+++ b/include/asm-avr32/fcntl.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_FCNTL_H
2#define __ASM_AVR32_FCNTL_H
3
4#include <asm-generic/fcntl.h>
5
6#endif /* __ASM_AVR32_FCNTL_H */
diff --git a/include/asm-avr32/futex.h b/include/asm-avr32/futex.h
new file mode 100644
index 000000000000..10419f14a68a
--- /dev/null
+++ b/include/asm-avr32/futex.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_FUTEX_H
2#define __ASM_AVR32_FUTEX_H
3
4#include <asm-generic/futex.h>
5
6#endif /* __ASM_AVR32_FUTEX_H */
diff --git a/include/asm-avr32/hardirq.h b/include/asm-avr32/hardirq.h
new file mode 100644
index 000000000000..267354356f60
--- /dev/null
+++ b/include/asm-avr32/hardirq.h
@@ -0,0 +1,34 @@
1#ifndef __ASM_AVR32_HARDIRQ_H
2#define __ASM_AVR32_HARDIRQ_H
3
4#include <linux/threads.h>
5#include <asm/irq.h>
6
7#ifndef __ASSEMBLY__
8
9#include <linux/cache.h>
10
11/* entry.S is sensitive to the offsets of these fields */
12typedef struct {
13 unsigned int __softirq_pending;
14} ____cacheline_aligned irq_cpustat_t;
15
16void ack_bad_irq(unsigned int irq);
17
18/* Standard mappings for irq_cpustat_t above */
19#include <linux/irq_cpustat.h>
20
21#endif /* __ASSEMBLY__ */
22
23#define HARDIRQ_BITS 12
24
25/*
26 * The hardirq mask has to be large enough to have
27 * space for potentially all IRQ sources in the system
28 * nesting on a single CPU:
29 */
30#if (1 << HARDIRQ_BITS) < NR_IRQS
31# error HARDIRQ_BITS is too low!
32#endif
33
34#endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/include/asm-avr32/hw_irq.h b/include/asm-avr32/hw_irq.h
new file mode 100644
index 000000000000..218b0a6bfd1b
--- /dev/null
+++ b/include/asm-avr32/hw_irq.h
@@ -0,0 +1,9 @@
1#ifndef __ASM_AVR32_HW_IRQ_H
2#define __ASM_AVR32_HW_IRQ_H
3
4static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
5{
6 /* Nothing to do */
7}
8
9#endif /* __ASM_AVR32_HW_IRQ_H */
diff --git a/include/asm-avr32/intc.h b/include/asm-avr32/intc.h
new file mode 100644
index 000000000000..1ac9ca75e8fd
--- /dev/null
+++ b/include/asm-avr32/intc.h
@@ -0,0 +1,128 @@
1#ifndef __ASM_AVR32_INTC_H
2#define __ASM_AVR32_INTC_H
3
4#include <linux/sysdev.h>
5#include <linux/interrupt.h>
6
7struct irq_controller;
8struct irqaction;
9struct pt_regs;
10
11struct platform_device;
12
13/* Information about the internal interrupt controller */
14struct intc_device {
15 /* ioremapped address of configuration block */
16 void __iomem *regs;
17
18 /* the physical device */
19 struct platform_device *pdev;
20
21 /* Number of interrupt lines per group. */
22 unsigned int irqs_per_group;
23
24 /* The highest group ID + 1 */
25 unsigned int nr_groups;
26
27 /*
28 * Bitfield indicating which groups are actually in use. The
29 * size of the array is
30 * ceil(group_max / (8 * sizeof(unsigned int))).
31 */
32 unsigned int group_mask[];
33};
34
35struct irq_controller_class {
36 /*
37 * A short name identifying this kind of controller.
38 */
39 const char *typename;
40 /*
41 * Handle the IRQ. Must do any necessary acking and masking.
42 */
43 irqreturn_t (*handle)(int irq, void *dev_id, struct pt_regs *regs);
44 /*
45 * Register a new IRQ handler.
46 */
47 int (*setup)(struct irq_controller *ctrl, unsigned int irq,
48 struct irqaction *action);
49 /*
50 * Unregister a IRQ handler.
51 */
52 void (*free)(struct irq_controller *ctrl, unsigned int irq,
53 void *dev_id);
54 /*
55 * Mask the IRQ in the interrupt controller.
56 */
57 void (*mask)(struct irq_controller *ctrl, unsigned int irq);
58 /*
59 * Unmask the IRQ in the interrupt controller.
60 */
61 void (*unmask)(struct irq_controller *ctrl, unsigned int irq);
62 /*
63 * Set the type of the IRQ. See below for possible types.
64 * Return -EINVAL if a given type is not supported
65 */
66 int (*set_type)(struct irq_controller *ctrl, unsigned int irq,
67 unsigned int type);
68 /*
69 * Return the IRQ type currently set
70 */
71 unsigned int (*get_type)(struct irq_controller *ctrl, unsigned int irq);
72};
73
74struct irq_controller {
75 struct irq_controller_class *class;
76 unsigned int irq_group;
77 unsigned int first_irq;
78 unsigned int nr_irqs;
79 struct list_head list;
80};
81
82struct intc_group_desc {
83 struct irq_controller *ctrl;
84 irqreturn_t (*handle)(int, void *, struct pt_regs *);
85 unsigned long flags;
86 void *dev_id;
87 const char *devname;
88};
89
90/*
91 * The internal interrupt controller. Defined in board/part-specific
92 * devices.c.
93 * TODO: Should probably be defined per-cpu.
94 */
95extern struct intc_device intc;
96
97extern int request_internal_irq(unsigned int irq,
98 irqreturn_t (*handler)(int, void *, struct pt_regs *),
99 unsigned long irqflags,
100 const char *devname, void *dev_id);
101extern void free_internal_irq(unsigned int irq);
102
103/* Only used by time_init() */
104extern int setup_internal_irq(unsigned int irq, struct intc_group_desc *desc);
105
106/*
107 * Set interrupt priority for a given group. `group' can be found by
108 * using irq_to_group(irq). Priority can be from 0 (lowest) to 3
109 * (highest). Higher-priority interrupts will preempt lower-priority
110 * interrupts (unless interrupts are masked globally).
111 *
112 * This function does not check for conflicts within a group.
113 */
114extern int intc_set_priority(unsigned int group,
115 unsigned int priority);
116
117/*
118 * Returns a bitmask of pending interrupts in a group.
119 */
120extern unsigned long intc_get_pending(unsigned int group);
121
122/*
123 * Register a new external interrupt controller. Returns the first
124 * external IRQ number that is assigned to the new controller.
125 */
126extern int intc_register_controller(struct irq_controller *ctrl);
127
128#endif /* __ASM_AVR32_INTC_H */
diff --git a/include/asm-avr32/io.h b/include/asm-avr32/io.h
new file mode 100644
index 000000000000..2fc8f111dce9
--- /dev/null
+++ b/include/asm-avr32/io.h
@@ -0,0 +1,253 @@
1#ifndef __ASM_AVR32_IO_H
2#define __ASM_AVR32_IO_H
3
4#include <linux/string.h>
5
6#ifdef __KERNEL__
7
8#include <asm/addrspace.h>
9#include <asm/byteorder.h>
10
11/* virt_to_phys will only work when address is in P1 or P2 */
12static __inline__ unsigned long virt_to_phys(volatile void *address)
13{
14 return PHYSADDR(address);
15}
16
17static __inline__ void * phys_to_virt(unsigned long address)
18{
19 return (void *)P1SEGADDR(address);
20}
21
22#define cached_to_phys(addr) ((unsigned long)PHYSADDR(addr))
23#define uncached_to_phys(addr) ((unsigned long)PHYSADDR(addr))
24#define phys_to_cached(addr) ((void *)P1SEGADDR(addr))
25#define phys_to_uncached(addr) ((void *)P2SEGADDR(addr))
26
27/*
28 * Generic IO read/write. These perform native-endian accesses. Note
29 * that some architectures will want to re-define __raw_{read,write}w.
30 */
31extern void __raw_writesb(unsigned int addr, const void *data, int bytelen);
32extern void __raw_writesw(unsigned int addr, const void *data, int wordlen);
33extern void __raw_writesl(unsigned int addr, const void *data, int longlen);
34
35extern void __raw_readsb(unsigned int addr, void *data, int bytelen);
36extern void __raw_readsw(unsigned int addr, void *data, int wordlen);
37extern void __raw_readsl(unsigned int addr, void *data, int longlen);
38
39static inline void writeb(unsigned char b, volatile void __iomem *addr)
40{
41 *(volatile unsigned char __force *)addr = b;
42}
43static inline void writew(unsigned short b, volatile void __iomem *addr)
44{
45 *(volatile unsigned short __force *)addr = b;
46}
47static inline void writel(unsigned int b, volatile void __iomem *addr)
48{
49 *(volatile unsigned int __force *)addr = b;
50}
51#define __raw_writeb writeb
52#define __raw_writew writew
53#define __raw_writel writel
54
55static inline unsigned char readb(const volatile void __iomem *addr)
56{
57 return *(const volatile unsigned char __force *)addr;
58}
59static inline unsigned short readw(const volatile void __iomem *addr)
60{
61 return *(const volatile unsigned short __force *)addr;
62}
63static inline unsigned int readl(const volatile void __iomem *addr)
64{
65 return *(const volatile unsigned int __force *)addr;
66}
67#define __raw_readb readb
68#define __raw_readw readw
69#define __raw_readl readl
70
71#define writesb(p, d, l) __raw_writesb((unsigned int)p, d, l)
72#define writesw(p, d, l) __raw_writesw((unsigned int)p, d, l)
73#define writesl(p, d, l) __raw_writesl((unsigned int)p, d, l)
74
75#define readsb(p, d, l) __raw_readsb((unsigned int)p, d, l)
76#define readsw(p, d, l) __raw_readsw((unsigned int)p, d, l)
77#define readsl(p, d, l) __raw_readsl((unsigned int)p, d, l)
78
79/*
80 * These two are only here because ALSA _thinks_ it needs them...
81 */
82static inline void memcpy_fromio(void * to, const volatile void __iomem *from,
83 unsigned long count)
84{
85 char *p = to;
86 while (count) {
87 count--;
88 *p = readb(from);
89 p++;
90 from++;
91 }
92}
93
94static inline void memcpy_toio(volatile void __iomem *to, const void * from,
95 unsigned long count)
96{
97 const char *p = from;
98 while (count) {
99 count--;
100 writeb(*p, to);
101 p++;
102 to++;
103 }
104}
105
106static inline void memset_io(volatile void __iomem *addr, unsigned char val,
107 unsigned long count)
108{
109 memset((void __force *)addr, val, count);
110}
111
112/*
113 * Bad read/write accesses...
114 */
115extern void __readwrite_bug(const char *fn);
116
117#define IO_SPACE_LIMIT 0xffffffff
118
119/* Convert I/O port address to virtual address */
120#define __io(p) ((void __iomem *)phys_to_uncached(p))
121
122/*
123 * IO port access primitives
124 * -------------------------
125 *
126 * The AVR32 doesn't have special IO access instructions; all IO is memory
127 * mapped. Note that these are defined to perform little endian accesses
128 * only. Their primary purpose is to access PCI and ISA peripherals.
129 *
130 * Note that for a big endian machine, this implies that the following
131 * big endian mode connectivity is in place.
132 *
133 * The machine specific io.h include defines __io to translate an "IO"
134 * address to a memory address.
135 *
136 * Note that we prevent GCC re-ordering or caching values in expressions
137 * by introducing sequence points into the in*() definitions. Note that
138 * __raw_* do not guarantee this behaviour.
139 *
140 * The {in,out}[bwl] macros are for emulating x86-style PCI/ISA IO space.
141 */
142#define outb(v, p) __raw_writeb(v, __io(p))
143#define outw(v, p) __raw_writew(cpu_to_le16(v), __io(p))
144#define outl(v, p) __raw_writel(cpu_to_le32(v), __io(p))
145
146#define inb(p) __raw_readb(__io(p))
147#define inw(p) le16_to_cpu(__raw_readw(__io(p)))
148#define inl(p) le32_to_cpu(__raw_readl(__io(p)))
149
150static inline void __outsb(unsigned long port, void *addr, unsigned int count)
151{
152 while (count--) {
153 outb(*(u8 *)addr, port);
154 addr++;
155 }
156}
157
158static inline void __insb(unsigned long port, void *addr, unsigned int count)
159{
160 while (count--) {
161 *(u8 *)addr = inb(port);
162 addr++;
163 }
164}
165
166static inline void __outsw(unsigned long port, void *addr, unsigned int count)
167{
168 while (count--) {
169 outw(*(u16 *)addr, port);
170 addr += 2;
171 }
172}
173
174static inline void __insw(unsigned long port, void *addr, unsigned int count)
175{
176 while (count--) {
177 *(u16 *)addr = inw(port);
178 addr += 2;
179 }
180}
181
182static inline void __outsl(unsigned long port, void *addr, unsigned int count)
183{
184 while (count--) {
185 outl(*(u32 *)addr, port);
186 addr += 4;
187 }
188}
189
190static inline void __insl(unsigned long port, void *addr, unsigned int count)
191{
192 while (count--) {
193 *(u32 *)addr = inl(port);
194 addr += 4;
195 }
196}
197
198#define outsb(port, addr, count) __outsb(port, addr, count)
199#define insb(port, addr, count) __insb(port, addr, count)
200#define outsw(port, addr, count) __outsw(port, addr, count)
201#define insw(port, addr, count) __insw(port, addr, count)
202#define outsl(port, addr, count) __outsl(port, addr, count)
203#define insl(port, addr, count) __insl(port, addr, count)
204
205extern void __iomem *__ioremap(unsigned long offset, size_t size,
206 unsigned long flags);
207extern void __iounmap(void __iomem *addr);
208
209/*
210 * ioremap - map bus memory into CPU space
211 * @offset bus address of the memory
212 * @size size of the resource to map
213 *
214 * ioremap performs a platform specific sequence of operations to make
215 * bus memory CPU accessible via the readb/.../writel functions and
216 * the other mmio helpers. The returned address is not guaranteed to
217 * be usable directly as a virtual address.
218 */
219#define ioremap(offset, size) \
220 __ioremap((offset), (size), 0)
221
222#define iounmap(addr) \
223 __iounmap(addr)
224
225#define cached(addr) P1SEGADDR(addr)
226#define uncached(addr) P2SEGADDR(addr)
227
228#define virt_to_bus virt_to_phys
229#define bus_to_virt phys_to_virt
230#define page_to_bus page_to_phys
231#define bus_to_page phys_to_page
232
233#define dma_cache_wback_inv(_start, _size) \
234 flush_dcache_region(_start, _size)
235#define dma_cache_inv(_start, _size) \
236 invalidate_dcache_region(_start, _size)
237#define dma_cache_wback(_start, _size) \
238 clean_dcache_region(_start, _size)
239
240/*
241 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
242 * access
243 */
244#define xlate_dev_mem_ptr(p) __va(p)
245
246/*
247 * Convert a virtual cached pointer to an uncached pointer
248 */
249#define xlate_dev_kmem_ptr(p) p
250
251#endif /* __KERNEL__ */
252
253#endif /* __ASM_AVR32_IO_H */
diff --git a/include/asm-avr32/ioctl.h b/include/asm-avr32/ioctl.h
new file mode 100644
index 000000000000..c8472c1398ef
--- /dev/null
+++ b/include/asm-avr32/ioctl.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_IOCTL_H
2#define __ASM_AVR32_IOCTL_H
3
4#include <asm-generic/ioctl.h>
5
6#endif /* __ASM_AVR32_IOCTL_H */
diff --git a/include/asm-avr32/ioctls.h b/include/asm-avr32/ioctls.h
new file mode 100644
index 000000000000..0500426b7186
--- /dev/null
+++ b/include/asm-avr32/ioctls.h
@@ -0,0 +1,83 @@
1#ifndef __ASM_AVR32_IOCTLS_H
2#define __ASM_AVR32_IOCTLS_H
3
4#include <asm/ioctl.h>
5
6/* 0x54 is just a magic number to make these relatively unique ('T') */
7
8#define TCGETS 0x5401
9#define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
10#define TCSETSW 0x5403
11#define TCSETSF 0x5404
12#define TCGETA 0x5405
13#define TCSETA 0x5406
14#define TCSETAW 0x5407
15#define TCSETAF 0x5408
16#define TCSBRK 0x5409
17#define TCXONC 0x540A
18#define TCFLSH 0x540B
19#define TIOCEXCL 0x540C
20#define TIOCNXCL 0x540D
21#define TIOCSCTTY 0x540E
22#define TIOCGPGRP 0x540F
23#define TIOCSPGRP 0x5410
24#define TIOCOUTQ 0x5411
25#define TIOCSTI 0x5412
26#define TIOCGWINSZ 0x5413
27#define TIOCSWINSZ 0x5414
28#define TIOCMGET 0x5415
29#define TIOCMBIS 0x5416
30#define TIOCMBIC 0x5417
31#define TIOCMSET 0x5418
32#define TIOCGSOFTCAR 0x5419
33#define TIOCSSOFTCAR 0x541A
34#define FIONREAD 0x541B
35#define TIOCINQ FIONREAD
36#define TIOCLINUX 0x541C
37#define TIOCCONS 0x541D
38#define TIOCGSERIAL 0x541E
39#define TIOCSSERIAL 0x541F
40#define TIOCPKT 0x5420
41#define FIONBIO 0x5421
42#define TIOCNOTTY 0x5422
43#define TIOCSETD 0x5423
44#define TIOCGETD 0x5424
45#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */
46/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
47#define TIOCSBRK 0x5427 /* BSD compatibility */
48#define TIOCCBRK 0x5428 /* BSD compatibility */
49#define TIOCGSID 0x5429 /* Return the session ID of FD */
50#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
51#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */
52
53#define FIONCLEX 0x5450
54#define FIOCLEX 0x5451
55#define FIOASYNC 0x5452
56#define TIOCSERCONFIG 0x5453
57#define TIOCSERGWILD 0x5454
58#define TIOCSERSWILD 0x5455
59#define TIOCGLCKTRMIOS 0x5456
60#define TIOCSLCKTRMIOS 0x5457
61#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
62#define TIOCSERGETLSR 0x5459 /* Get line status register */
63#define TIOCSERGETMULTI 0x545A /* Get multiport config */
64#define TIOCSERSETMULTI 0x545B /* Set multiport config */
65
66#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */
67#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */
68#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */
69#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */
70#define FIOQSIZE 0x5460
71
72/* Used for packet mode */
73#define TIOCPKT_DATA 0
74#define TIOCPKT_FLUSHREAD 1
75#define TIOCPKT_FLUSHWRITE 2
76#define TIOCPKT_STOP 4
77#define TIOCPKT_START 8
78#define TIOCPKT_NOSTOP 16
79#define TIOCPKT_DOSTOP 32
80
81#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */
82
83#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/include/asm-avr32/ipcbuf.h b/include/asm-avr32/ipcbuf.h
new file mode 100644
index 000000000000..1552c9698f5e
--- /dev/null
+++ b/include/asm-avr32/ipcbuf.h
@@ -0,0 +1,29 @@
1#ifndef __ASM_AVR32_IPCBUF_H
2#define __ASM_AVR32_IPCBUF_H
3
4/*
5* The user_ipc_perm structure for AVR32 architecture.
6* Note extra padding because this structure is passed back and forth
7* between kernel and user space.
8*
9* Pad space is left for:
10* - 32-bit mode_t and seq
11* - 2 miscellaneous 32-bit values
12*/
13
14struct ipc64_perm
15{
16 __kernel_key_t key;
17 __kernel_uid32_t uid;
18 __kernel_gid32_t gid;
19 __kernel_uid32_t cuid;
20 __kernel_gid32_t cgid;
21 __kernel_mode_t mode;
22 unsigned short __pad1;
23 unsigned short seq;
24 unsigned short __pad2;
25 unsigned long __unused1;
26 unsigned long __unused2;
27};
28
29#endif /* __ASM_AVR32_IPCBUF_H */
diff --git a/include/asm-avr32/irq.h b/include/asm-avr32/irq.h
new file mode 100644
index 000000000000..f7e725707dd7
--- /dev/null
+++ b/include/asm-avr32/irq.h
@@ -0,0 +1,10 @@
1#ifndef __ASM_AVR32_IRQ_H
2#define __ASM_AVR32_IRQ_H
3
4#define NR_INTERNAL_IRQS 64
5#define NR_EXTERNAL_IRQS 64
6#define NR_IRQS (NR_INTERNAL_IRQS + NR_EXTERNAL_IRQS)
7
8#define irq_canonicalize(i) (i)
9
10#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/include/asm-avr32/irqflags.h b/include/asm-avr32/irqflags.h
new file mode 100644
index 000000000000..93570daac38a
--- /dev/null
+++ b/include/asm-avr32/irqflags.h
@@ -0,0 +1,68 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_IRQFLAGS_H
9#define __ASM_AVR32_IRQFLAGS_H
10
11#include <asm/sysreg.h>
12
13static inline unsigned long __raw_local_save_flags(void)
14{
15 return sysreg_read(SR);
16}
17
18#define raw_local_save_flags(x) \
19 do { (x) = __raw_local_save_flags(); } while (0)
20
21/*
22 * This will restore ALL status register flags, not only the interrupt
23 * mask flag.
24 *
25 * The empty asm statement informs the compiler of this fact while
26 * also serving as a barrier.
27 */
28static inline void raw_local_irq_restore(unsigned long flags)
29{
30 sysreg_write(SR, flags);
31 asm volatile("" : : : "memory", "cc");
32}
33
34static inline void raw_local_irq_disable(void)
35{
36 asm volatile("ssrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
37}
38
39static inline void raw_local_irq_enable(void)
40{
41 asm volatile("csrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
42}
43
44static inline int raw_irqs_disabled_flags(unsigned long flags)
45{
46 return (flags & SYSREG_BIT(GM)) != 0;
47}
48
49static inline int raw_irqs_disabled(void)
50{
51 unsigned long flags = __raw_local_save_flags();
52
53 return raw_irqs_disabled_flags(flags);
54}
55
56static inline unsigned long __raw_local_irq_save(void)
57{
58 unsigned long flags = __raw_local_save_flags();
59
60 raw_local_irq_disable();
61
62 return flags;
63}
64
65#define raw_local_irq_save(flags) \
66 do { (flags) = __raw_local_irq_save(); } while (0)
67
68#endif /* __ASM_AVR32_IRQFLAGS_H */
diff --git a/include/asm-avr32/kdebug.h b/include/asm-avr32/kdebug.h
new file mode 100644
index 000000000000..f583b643ffb2
--- /dev/null
+++ b/include/asm-avr32/kdebug.h
@@ -0,0 +1,38 @@
1#ifndef __ASM_AVR32_KDEBUG_H
2#define __ASM_AVR32_KDEBUG_H
3
4#include <linux/notifier.h>
5
6struct pt_regs;
7
8struct die_args {
9 struct pt_regs *regs;
10 int trapnr;
11};
12
13int register_die_notifier(struct notifier_block *nb);
14int unregister_die_notifier(struct notifier_block *nb);
15int register_page_fault_notifier(struct notifier_block *nb);
16int unregister_page_fault_notifier(struct notifier_block *nb);
17extern struct atomic_notifier_head avr32_die_chain;
18
19/* Grossly misnamed. */
20enum die_val {
21 DIE_FAULT,
22 DIE_BREAKPOINT,
23 DIE_SSTEP,
24 DIE_PAGE_FAULT,
25};
26
27static inline int notify_die(enum die_val val, struct pt_regs *regs,
28 int trap, int sig)
29{
30 struct die_args args = {
31 .regs = regs,
32 .trapnr = trap,
33 };
34
35 return atomic_notifier_call_chain(&avr32_die_chain, val, &args);
36}
37
38#endif /* __ASM_AVR32_KDEBUG_H */
diff --git a/include/asm-avr32/kmap_types.h b/include/asm-avr32/kmap_types.h
new file mode 100644
index 000000000000..b7f5c6870107
--- /dev/null
+++ b/include/asm-avr32/kmap_types.h
@@ -0,0 +1,30 @@
1#ifndef __ASM_AVR32_KMAP_TYPES_H
2#define __ASM_AVR32_KMAP_TYPES_H
3
4#ifdef CONFIG_DEBUG_HIGHMEM
5# define D(n) __KM_FENCE_##n ,
6#else
7# define D(n)
8#endif
9
10enum km_type {
11D(0) KM_BOUNCE_READ,
12D(1) KM_SKB_SUNRPC_DATA,
13D(2) KM_SKB_DATA_SOFTIRQ,
14D(3) KM_USER0,
15D(4) KM_USER1,
16D(5) KM_BIO_SRC_IRQ,
17D(6) KM_BIO_DST_IRQ,
18D(7) KM_PTE0,
19D(8) KM_PTE1,
20D(9) KM_PTE2,
21D(10) KM_IRQ0,
22D(11) KM_IRQ1,
23D(12) KM_SOFTIRQ0,
24D(13) KM_SOFTIRQ1,
25D(14) KM_TYPE_NR
26};
27
28#undef D
29
30#endif /* __ASM_AVR32_KMAP_TYPES_H */
diff --git a/include/asm-avr32/kprobes.h b/include/asm-avr32/kprobes.h
new file mode 100644
index 000000000000..09a5cbe2f896
--- /dev/null
+++ b/include/asm-avr32/kprobes.h
@@ -0,0 +1,34 @@
1/*
2 * Kernel Probes (KProbes)
3 *
4 * Copyright (C) 2005-2006 Atmel Corporation
5 * Copyright (C) IBM Corporation, 2002, 2004
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef __ASM_AVR32_KPROBES_H
12#define __ASM_AVR32_KPROBES_H
13
14#include <linux/types.h>
15
16typedef u16 kprobe_opcode_t;
17#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */
18#define MAX_INSN_SIZE 2
19
20#define ARCH_INACTIVE_KPROBE_COUNT 1
21
22#define arch_remove_kprobe(p) do { } while (0)
23
24/* Architecture specific copy of original instruction */
25struct arch_specific_insn {
26 kprobe_opcode_t insn[MAX_INSN_SIZE];
27};
28
29extern int kprobe_exceptions_notify(struct notifier_block *self,
30 unsigned long val, void *data);
31
32#define flush_insn_slot(p) do { } while (0)
33
34#endif /* __ASM_AVR32_KPROBES_H */
diff --git a/include/asm-avr32/linkage.h b/include/asm-avr32/linkage.h
new file mode 100644
index 000000000000..f7b285e910d4
--- /dev/null
+++ b/include/asm-avr32/linkage.h
@@ -0,0 +1,7 @@
1#ifndef __ASM_LINKAGE_H
2#define __ASM_LINKAGE_H
3
4#define __ALIGN .balign 2
5#define __ALIGN_STR ".balign 2"
6
7#endif /* __ASM_LINKAGE_H */
diff --git a/include/asm-avr32/local.h b/include/asm-avr32/local.h
new file mode 100644
index 000000000000..1c1619694da3
--- /dev/null
+++ b/include/asm-avr32/local.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_LOCAL_H
2#define __ASM_AVR32_LOCAL_H
3
4#include <asm-generic/local.h>
5
6#endif /* __ASM_AVR32_LOCAL_H */
diff --git a/include/asm-avr32/mach/serial_at91.h b/include/asm-avr32/mach/serial_at91.h
new file mode 100644
index 000000000000..1290bb32802d
--- /dev/null
+++ b/include/asm-avr32/mach/serial_at91.h
@@ -0,0 +1,33 @@
1/*
2 * linux/include/asm-arm/mach/serial_at91.h
3 *
4 * Based on serial_sa1100.h by Nicolas Pitre
5 *
6 * Copyright (C) 2002 ATMEL Rousset
7 *
8 * Low level machine dependent UART functions.
9 */
10
11struct uart_port;
12
13/*
14 * This is a temporary structure for registering these
15 * functions; it is intended to be discarded after boot.
16 */
17struct at91_port_fns {
18 void (*set_mctrl)(struct uart_port *, u_int);
19 u_int (*get_mctrl)(struct uart_port *);
20 void (*enable_ms)(struct uart_port *);
21 void (*pm)(struct uart_port *, u_int, u_int);
22 int (*set_wake)(struct uart_port *, u_int);
23 int (*open)(struct uart_port *);
24 void (*close)(struct uart_port *);
25};
26
27#if defined(CONFIG_SERIAL_AT91)
28void at91_register_uart_fns(struct at91_port_fns *fns);
29#else
30#define at91_register_uart_fns(fns) do { } while (0)
31#endif
32
33
diff --git a/include/asm-avr32/mman.h b/include/asm-avr32/mman.h
new file mode 100644
index 000000000000..648f91e7187a
--- /dev/null
+++ b/include/asm-avr32/mman.h
@@ -0,0 +1,17 @@
1#ifndef __ASM_AVR32_MMAN_H__
2#define __ASM_AVR32_MMAN_H__
3
4#include <asm-generic/mman.h>
5
6#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
7#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
8#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
9#define MAP_LOCKED 0x2000 /* pages are locked */
10#define MAP_NORESERVE 0x4000 /* don't check for reservations */
11#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */
12#define MAP_NONBLOCK 0x10000 /* do not block on IO */
13
14#define MCL_CURRENT 1 /* lock all current mappings */
15#define MCL_FUTURE 2 /* lock all future mappings */
16
17#endif /* __ASM_AVR32_MMAN_H__ */
diff --git a/include/asm-avr32/mmu.h b/include/asm-avr32/mmu.h
new file mode 100644
index 000000000000..60c2d2650d32
--- /dev/null
+++ b/include/asm-avr32/mmu.h
@@ -0,0 +1,10 @@
1#ifndef __ASM_AVR32_MMU_H
2#define __ASM_AVR32_MMU_H
3
4/* Default "unsigned long" context */
5typedef unsigned long mm_context_t;
6
7#define MMU_ITLB_ENTRIES 64
8#define MMU_DTLB_ENTRIES 64
9
10#endif /* __ASM_AVR32_MMU_H */
diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h
new file mode 100644
index 000000000000..31add1ae8089
--- /dev/null
+++ b/include/asm-avr32/mmu_context.h
@@ -0,0 +1,148 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * ASID handling taken from SH implementation.
5 * Copyright (C) 1999 Niibe Yutaka
6 * Copyright (C) 2003 Paul Mundt
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#ifndef __ASM_AVR32_MMU_CONTEXT_H
13#define __ASM_AVR32_MMU_CONTEXT_H
14
15#include <asm/tlbflush.h>
16#include <asm/pgalloc.h>
17#include <asm/sysreg.h>
18
19/*
20 * The MMU "context" consists of two things:
21 * (a) TLB cache version
22 * (b) ASID (Address Space IDentifier)
23 */
24#define MMU_CONTEXT_ASID_MASK 0x000000ff
25#define MMU_CONTEXT_VERSION_MASK 0xffffff00
26#define MMU_CONTEXT_FIRST_VERSION 0x00000100
27#define NO_CONTEXT 0
28
29#define MMU_NO_ASID 0x100
30
31/* Virtual Page Number mask */
32#define MMU_VPN_MASK 0xfffff000
33
34/* Cache of MMU context last used */
35extern unsigned long mmu_context_cache;
36
37/*
38 * Get MMU context if needed
39 */
40static inline void
41get_mmu_context(struct mm_struct *mm)
42{
43 unsigned long mc = mmu_context_cache;
44
45 if (((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0)
46 /* It's up to date, do nothing */
47 return;
48
49 /* It's old, we need to get new context with new version */
50 mc = ++mmu_context_cache;
51 if (!(mc & MMU_CONTEXT_ASID_MASK)) {
52 /*
53 * We have exhausted all ASIDs of this version.
54 * Flush the TLB and start new cycle.
55 */
56 flush_tlb_all();
57 /*
58 * Fix version. Note that we avoid version #0
59 * to distinguish NO_CONTEXT.
60 */
61 if (!mc)
62 mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION;
63 }
64 mm->context = mc;
65}
66
67/*
68 * Initialize the context related info for a new mm_struct
69 * instance.
70 */
71static inline int init_new_context(struct task_struct *tsk,
72 struct mm_struct *mm)
73{
74 mm->context = NO_CONTEXT;
75 return 0;
76}
77
78/*
79 * Destroy context related info for an mm_struct that is about
80 * to be put to rest.
81 */
82static inline void destroy_context(struct mm_struct *mm)
83{
84 /* Do nothing */
85}
86
87static inline void set_asid(unsigned long asid)
88{
89 /* XXX: We're destroying TLBEHI[8:31] */
90 sysreg_write(TLBEHI, asid & MMU_CONTEXT_ASID_MASK);
91 cpu_sync_pipeline();
92}
93
94static inline unsigned long get_asid(void)
95{
96 unsigned long asid;
97
98 asid = sysreg_read(TLBEHI);
99 return asid & MMU_CONTEXT_ASID_MASK;
100}
101
102static inline void activate_context(struct mm_struct *mm)
103{
104 get_mmu_context(mm);
105 set_asid(mm->context & MMU_CONTEXT_ASID_MASK);
106}
107
108static inline void switch_mm(struct mm_struct *prev,
109 struct mm_struct *next,
110 struct task_struct *tsk)
111{
112 if (likely(prev != next)) {
113 unsigned long __pgdir = (unsigned long)next->pgd;
114
115 sysreg_write(PTBR, __pgdir);
116 activate_context(next);
117 }
118}
119
120#define deactivate_mm(tsk,mm) do { } while(0)
121
122#define activate_mm(prev, next) switch_mm((prev), (next), NULL)
123
124static inline void
125enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
126{
127}
128
129
130static inline void enable_mmu(void)
131{
132 sysreg_write(MMUCR, (SYSREG_BIT(MMUCR_S)
133 | SYSREG_BIT(E)
134 | SYSREG_BIT(MMUCR_I)));
135 nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
136
137 if (mmu_context_cache == NO_CONTEXT)
138 mmu_context_cache = MMU_CONTEXT_FIRST_VERSION;
139
140 set_asid(mmu_context_cache & MMU_CONTEXT_ASID_MASK);
141}
142
143static inline void disable_mmu(void)
144{
145 sysreg_write(MMUCR, SYSREG_BIT(MMUCR_S));
146}
147
148#endif /* __ASM_AVR32_MMU_CONTEXT_H */
diff --git a/include/asm-avr32/module.h b/include/asm-avr32/module.h
new file mode 100644
index 000000000000..451444538a1b
--- /dev/null
+++ b/include/asm-avr32/module.h
@@ -0,0 +1,28 @@
1#ifndef __ASM_AVR32_MODULE_H
2#define __ASM_AVR32_MODULE_H
3
4struct mod_arch_syminfo {
5 unsigned long got_offset;
6 int got_initialized;
7};
8
9struct mod_arch_specific {
10 /* Starting offset of got in the module core memory. */
11 unsigned long got_offset;
12 /* Size of the got. */
13 unsigned long got_size;
14 /* Number of symbols in syminfo. */
15 int nsyms;
16 /* Additional symbol information (got offsets). */
17 struct mod_arch_syminfo *syminfo;
18};
19
20#define Elf_Shdr Elf32_Shdr
21#define Elf_Sym Elf32_Sym
22#define Elf_Ehdr Elf32_Ehdr
23
24#define MODULE_PROC_FAMILY "AVR32v1"
25
26#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
27
28#endif /* __ASM_AVR32_MODULE_H */
diff --git a/include/asm-avr32/msgbuf.h b/include/asm-avr32/msgbuf.h
new file mode 100644
index 000000000000..ac18bc4da7f7
--- /dev/null
+++ b/include/asm-avr32/msgbuf.h
@@ -0,0 +1,31 @@
1#ifndef __ASM_AVR32_MSGBUF_H
2#define __ASM_AVR32_MSGBUF_H
3
4/*
5 * The msqid64_ds structure for i386 architecture.
6 * Note extra padding because this structure is passed back and forth
7 * between kernel and user space.
8 *
9 * Pad space is left for:
10 * - 64-bit time_t to solve y2038 problem
11 * - 2 miscellaneous 32-bit values
12 */
13
14struct msqid64_ds {
15 struct ipc64_perm msg_perm;
16 __kernel_time_t msg_stime; /* last msgsnd time */
17 unsigned long __unused1;
18 __kernel_time_t msg_rtime; /* last msgrcv time */
19 unsigned long __unused2;
20 __kernel_time_t msg_ctime; /* last change time */
21 unsigned long __unused3;
22 unsigned long msg_cbytes; /* current number of bytes on queue */
23 unsigned long msg_qnum; /* number of messages in queue */
24 unsigned long msg_qbytes; /* max number of bytes on queue */
25 __kernel_pid_t msg_lspid; /* pid of last msgsnd */
26 __kernel_pid_t msg_lrpid; /* last receive pid */
27 unsigned long __unused4;
28 unsigned long __unused5;
29};
30
31#endif /* __ASM_AVR32_MSGBUF_H */
diff --git a/include/asm-avr32/mutex.h b/include/asm-avr32/mutex.h
new file mode 100644
index 000000000000..458c1f7fbc18
--- /dev/null
+++ b/include/asm-avr32/mutex.h
@@ -0,0 +1,9 @@
1/*
2 * Pull in the generic implementation for the mutex fastpath.
3 *
4 * TODO: implement optimized primitives instead, or leave the generic
5 * implementation in place, or pick the atomic_xchg() based generic
6 * implementation. (see asm-generic/mutex-xchg.h for details)
7 */
8
9#include <asm-generic/mutex-dec.h>
diff --git a/include/asm-avr32/namei.h b/include/asm-avr32/namei.h
new file mode 100644
index 000000000000..f0a26de06cab
--- /dev/null
+++ b/include/asm-avr32/namei.h
@@ -0,0 +1,7 @@
1#ifndef __ASM_AVR32_NAMEI_H
2#define __ASM_AVR32_NAMEI_H
3
4/* This dummy routine may be changed to something useful */
5#define __emul_prefix() NULL
6
7#endif /* __ASM_AVR32_NAMEI_H */
diff --git a/include/asm-avr32/numnodes.h b/include/asm-avr32/numnodes.h
new file mode 100644
index 000000000000..0b864d7ce330
--- /dev/null
+++ b/include/asm-avr32/numnodes.h
@@ -0,0 +1,7 @@
1#ifndef __ASM_AVR32_NUMNODES_H
2#define __ASM_AVR32_NUMNODES_H
3
4/* Max 4 nodes */
5#define NODES_SHIFT 2
6
7#endif /* __ASM_AVR32_NUMNODES_H */
diff --git a/include/asm-avr32/ocd.h b/include/asm-avr32/ocd.h
new file mode 100644
index 000000000000..46f73180a127
--- /dev/null
+++ b/include/asm-avr32/ocd.h
@@ -0,0 +1,78 @@
1/*
2 * AVR32 OCD Registers
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ASM_AVR32_OCD_H
11#define __ASM_AVR32_OCD_H
12
13/* Debug Registers */
14#define DBGREG_DID 0
15#define DBGREG_DC 8
16#define DBGREG_DS 16
17#define DBGREG_RWCS 28
18#define DBGREG_RWA 36
19#define DBGREG_RWD 40
20#define DBGREG_WT 44
21#define DBGREG_DTC 52
22#define DBGREG_DTSA0 56
23#define DBGREG_DTSA1 60
24#define DBGREG_DTEA0 72
25#define DBGREG_DTEA1 76
26#define DBGREG_BWC0A 88
27#define DBGREG_BWC0B 92
28#define DBGREG_BWC1A 96
29#define DBGREG_BWC1B 100
30#define DBGREG_BWC2A 104
31#define DBGREG_BWC2B 108
32#define DBGREG_BWC3A 112
33#define DBGREG_BWC3B 116
34#define DBGREG_BWA0A 120
35#define DBGREG_BWA0B 124
36#define DBGREG_BWA1A 128
37#define DBGREG_BWA1B 132
38#define DBGREG_BWA2A 136
39#define DBGREG_BWA2B 140
40#define DBGREG_BWA3A 144
41#define DBGREG_BWA3B 148
42#define DBGREG_BWD3A 153
43#define DBGREG_BWD3B 156
44
45#define DBGREG_PID 284
46
47#define SABAH_OCD 0x01
48#define SABAH_ICACHE 0x02
49#define SABAH_MEM_CACHED 0x04
50#define SABAH_MEM_UNCACHED 0x05
51
52/* Fields in the Development Control register */
53#define DC_SS_BIT 8
54
55#define DC_SS (1 << DC_SS_BIT)
56#define DC_DBE (1 << 13)
57#define DC_RID (1 << 27)
58#define DC_ORP (1 << 28)
59#define DC_MM (1 << 29)
60#define DC_RES (1 << 30)
61
62/* Fields in the Development Status register */
63#define DS_SSS (1 << 0)
64#define DS_SWB (1 << 1)
65#define DS_HWB (1 << 2)
66#define DS_BP_SHIFT 8
67#define DS_BP_MASK (0xff << DS_BP_SHIFT)
68
69#define __mfdr(addr) \
70({ \
71 register unsigned long value; \
72 asm volatile("mfdr %0, %1" : "=r"(value) : "i"(addr)); \
73 value; \
74})
75#define __mtdr(addr, value) \
76 asm volatile("mtdr %0, %1" : : "i"(addr), "r"(value))
77
78#endif /* __ASM_AVR32_OCD_H */
diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h
new file mode 100644
index 000000000000..0f630b3e9932
--- /dev/null
+++ b/include/asm-avr32/page.h
@@ -0,0 +1,112 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_PAGE_H
9#define __ASM_AVR32_PAGE_H
10
11#ifdef __KERNEL__
12
13/* PAGE_SHIFT determines the page size */
14#define PAGE_SHIFT 12
15#ifdef __ASSEMBLY__
16#define PAGE_SIZE (1 << PAGE_SHIFT)
17#else
18#define PAGE_SIZE (1UL << PAGE_SHIFT)
19#endif
20#define PAGE_MASK (~(PAGE_SIZE-1))
21#define PTE_MASK PAGE_MASK
22
23#ifndef __ASSEMBLY__
24
25#include <asm/addrspace.h>
26
27extern void clear_page(void *to);
28extern void copy_page(void *to, void *from);
29
30#define clear_user_page(page, vaddr, pg) clear_page(page)
31#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
32
33/*
34 * These are used to make use of C type-checking..
35 */
36typedef struct { unsigned long pte; } pte_t;
37typedef struct { unsigned long pgd; } pgd_t;
38typedef struct { unsigned long pgprot; } pgprot_t;
39
40#define pte_val(x) ((x).pte)
41#define pgd_val(x) ((x).pgd)
42#define pgprot_val(x) ((x).pgprot)
43
44#define __pte(x) ((pte_t) { (x) })
45#define __pgd(x) ((pgd_t) { (x) })
46#define __pgprot(x) ((pgprot_t) { (x) })
47
48/* FIXME: These should be removed soon */
49extern unsigned long memory_start, memory_end;
50
51/* Pure 2^n version of get_order */
52static inline int get_order(unsigned long size)
53{
54 unsigned lz;
55
56 size = (size - 1) >> PAGE_SHIFT;
57 asm("clz %0, %1" : "=r"(lz) : "r"(size));
58 return 32 - lz;
59}
60
61#endif /* !__ASSEMBLY__ */
62
63/* Align the pointer to the (next) page boundary */
64#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
65
66/*
67 * The hardware maps the virtual addresses 0x80000000 -> 0x9fffffff
68 * permanently to the physical addresses 0x00000000 -> 0x1fffffff when
69 * segmentation is enabled. We want to make use of this in order to
70 * minimize TLB pressure.
71 */
72#define PAGE_OFFSET (0x80000000UL)
73
74/*
75 * ALSA uses virt_to_page() on DMA pages, which I'm not entirely sure
76 * is a good idea. Anyway, we can't simply subtract PAGE_OFFSET here
77 * in that case, so we'll have to mask out the three most significant
78 * bits of the address instead...
79 *
80 * What's the difference between __pa() and virt_to_phys() anyway?
81 */
82#define __pa(x) PHYSADDR(x)
83#define __va(x) ((void *)(P1SEGADDR(x)))
84
85#define MAP_NR(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT)
86
87#define phys_to_page(phys) (pfn_to_page(phys >> PAGE_SHIFT))
88#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
89
90#ifndef CONFIG_NEED_MULTIPLE_NODES
91
92#define PHYS_PFN_OFFSET (CONFIG_PHYS_OFFSET >> PAGE_SHIFT)
93
94#define pfn_to_page(pfn) (mem_map + ((pfn) - PHYS_PFN_OFFSET))
95#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET)
96#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
97#endif /* CONFIG_NEED_MULTIPLE_NODES */
98
99#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
100#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
101
102#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
103 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
104
105/*
106 * Memory above this physical address will be considered highmem.
107 */
108#define HIGHMEM_START 0x20000000UL
109
110#endif /* __KERNEL__ */
111
112#endif /* __ASM_AVR32_PAGE_H */
diff --git a/include/asm-avr32/param.h b/include/asm-avr32/param.h
new file mode 100644
index 000000000000..34bc8d4c3b29
--- /dev/null
+++ b/include/asm-avr32/param.h
@@ -0,0 +1,23 @@
1#ifndef __ASM_AVR32_PARAM_H
2#define __ASM_AVR32_PARAM_H
3
4#ifdef __KERNEL__
5# define HZ CONFIG_HZ
6# define USER_HZ 100 /* User interfaces are in "ticks" */
7# define CLOCKS_PER_SEC (USER_HZ) /* frequency at which times() counts */
8#endif
9
10#ifndef HZ
11# define HZ 100
12#endif
13
14/* TODO: Should be configurable */
15#define EXEC_PAGESIZE 4096
16
17#ifndef NOGROUP
18# define NOGROUP (-1)
19#endif
20
21#define MAXHOSTNAMELEN 64
22
23#endif /* __ASM_AVR32_PARAM_H */
diff --git a/include/asm-avr32/pci.h b/include/asm-avr32/pci.h
new file mode 100644
index 000000000000..0f5f134b896a
--- /dev/null
+++ b/include/asm-avr32/pci.h
@@ -0,0 +1,8 @@
1#ifndef __ASM_AVR32_PCI_H__
2#define __ASM_AVR32_PCI_H__
3
4/* We don't support PCI yet, but some drivers require this file anyway */
5
6#define PCI_DMA_BUS_IS_PHYS (1)
7
8#endif /* __ASM_AVR32_PCI_H__ */
diff --git a/include/asm-avr32/percpu.h b/include/asm-avr32/percpu.h
new file mode 100644
index 000000000000..69227b4cd0d4
--- /dev/null
+++ b/include/asm-avr32/percpu.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_PERCPU_H
2#define __ASM_AVR32_PERCPU_H
3
4#include <asm-generic/percpu.h>
5
6#endif /* __ASM_AVR32_PERCPU_H */
diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h
new file mode 100644
index 000000000000..7492cfb92ced
--- /dev/null
+++ b/include/asm-avr32/pgalloc.h
@@ -0,0 +1,96 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_PGALLOC_H
9#define __ASM_AVR32_PGALLOC_H
10
11#include <asm/processor.h>
12#include <linux/threads.h>
13#include <linux/slab.h>
14#include <linux/mm.h>
15
16#define pmd_populate_kernel(mm, pmd, pte) \
17 set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
18
19static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
20 struct page *pte)
21{
22 set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
23}
24
25/*
26 * Allocate and free page tables
27 */
28static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm)
29{
30 unsigned int pgd_size = (USER_PTRS_PER_PGD * sizeof(pgd_t));
31 pgd_t *pgd = (pgd_t *)kmalloc(pgd_size, GFP_KERNEL);
32
33 if (pgd)
34 memset(pgd, 0, pgd_size);
35
36 return pgd;
37}
38
39static inline void pgd_free(pgd_t *pgd)
40{
41 kfree(pgd);
42}
43
44static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
45 unsigned long address)
46{
47 int count = 0;
48 pte_t *pte;
49
50 do {
51 pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT);
52 if (pte)
53 clear_page(pte);
54 else {
55 current->state = TASK_UNINTERRUPTIBLE;
56 schedule_timeout(HZ);
57 }
58 } while (!pte && (count++ < 10));
59
60 return pte;
61}
62
63static inline struct page *pte_alloc_one(struct mm_struct *mm,
64 unsigned long address)
65{
66 int count = 0;
67 struct page *pte;
68
69 do {
70 pte = alloc_pages(GFP_KERNEL, 0);
71 if (pte)
72 clear_page(page_address(pte));
73 else {
74 current->state = TASK_UNINTERRUPTIBLE;
75 schedule_timeout(HZ);
76 }
77 } while (!pte && (count++ < 10));
78
79 return pte;
80}
81
82static inline void pte_free_kernel(pte_t *pte)
83{
84 free_page((unsigned long)pte);
85}
86
87static inline void pte_free(struct page *pte)
88{
89 __free_page(pte);
90}
91
92#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
93
94#define check_pgt_cache() do { } while(0)
95
96#endif /* __ASM_AVR32_PGALLOC_H */
diff --git a/include/asm-avr32/pgtable-2level.h b/include/asm-avr32/pgtable-2level.h
new file mode 100644
index 000000000000..425dd567b5b9
--- /dev/null
+++ b/include/asm-avr32/pgtable-2level.h
@@ -0,0 +1,47 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
9#define __ASM_AVR32_PGTABLE_2LEVEL_H
10
11#include <asm-generic/pgtable-nopmd.h>
12
13/*
14 * Traditional 2-level paging structure
15 */
16#define PGDIR_SHIFT 22
17#define PTRS_PER_PGD 1024
18
19#define PTRS_PER_PTE 1024
20
21#ifndef __ASSEMBLY__
22#define pte_ERROR(e) \
23 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
24#define pgd_ERROR(e) \
25 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
26
27/*
28 * Certain architectures need to do special things when PTEs
29 * within a page table are directly modified. Thus, the following
30 * hook is made available.
31 */
32#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
33#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep, pteval)
34
35/*
36 * (pmds are folded into pgds so this doesn't get actually called,
37 * but the define is needed for a generic inline function.)
38 */
39#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
40
41#define pte_pfn(x) ((unsigned long)(((x).pte >> PAGE_SHIFT)))
42#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
43#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
44
45#endif /* !__ASSEMBLY__ */
46
47#endif /* __ASM_AVR32_PGTABLE_2LEVEL_H */
diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h
new file mode 100644
index 000000000000..6b8ca9db2bd5
--- /dev/null
+++ b/include/asm-avr32/pgtable.h
@@ -0,0 +1,408 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_PGTABLE_H
9#define __ASM_AVR32_PGTABLE_H
10
11#include <asm/addrspace.h>
12
13#ifndef __ASSEMBLY__
14#include <linux/sched.h>
15
16#endif /* !__ASSEMBLY__ */
17
18/*
19 * Use two-level page tables just as the i386 (without PAE)
20 */
21#include <asm/pgtable-2level.h>
22
23/*
24 * The following code might need some cleanup when the values are
25 * final...
26 */
27#define PMD_SIZE (1UL << PMD_SHIFT)
28#define PMD_MASK (~(PMD_SIZE-1))
29#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
30#define PGDIR_MASK (~(PGDIR_SIZE-1))
31
32#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
33#define FIRST_USER_ADDRESS 0
34
35#define PTE_PHYS_MASK 0x1ffff000
36
37#ifndef __ASSEMBLY__
38extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
39extern void paging_init(void);
40
41/*
42 * ZERO_PAGE is a global shared page that is always zero: used for
43 * zero-mapped memory areas etc.
44 */
45extern struct page *empty_zero_page;
46#define ZERO_PAGE(vaddr) (empty_zero_page)
47
48/*
49 * Just any arbitrary offset to the start of the vmalloc VM area: the
50 * current 8 MiB value just means that there will be a 8 MiB "hole"
51 * after the uncached physical memory (P2 segment) until the vmalloc
52 * area starts. That means that any out-of-bounds memory accesses will
53 * hopefully be caught; we don't know if the end of the P1/P2 segments
54 * are actually used for anything, but it is anyway safer to let the
55 * MMU catch these kinds of errors than to rely on the memory bus.
56 *
57 * A "hole" of the same size is added to the end of the P3 segment as
58 * well. It might seem wasteful to use 16 MiB of virtual address space
59 * on this, but we do have 512 MiB of it...
60 *
61 * The vmalloc() routines leave a hole of 4 KiB between each vmalloced
62 * area for the same reason.
63 */
64#define VMALLOC_OFFSET (8 * 1024 * 1024)
65#define VMALLOC_START (P3SEG + VMALLOC_OFFSET)
66#define VMALLOC_END (P4SEG - VMALLOC_OFFSET)
67#endif /* !__ASSEMBLY__ */
68
69/*
70 * Page flags. Some of these flags are not directly supported by
71 * hardware, so we have to emulate them.
72 */
73#define _TLBEHI_BIT_VALID 9
74#define _TLBEHI_VALID (1 << _TLBEHI_BIT_VALID)
75
76#define _PAGE_BIT_WT 0 /* W-bit : write-through */
77#define _PAGE_BIT_DIRTY 1 /* D-bit : page changed */
78#define _PAGE_BIT_SZ0 2 /* SZ0-bit : Size of page */
79#define _PAGE_BIT_SZ1 3 /* SZ1-bit : Size of page */
80#define _PAGE_BIT_EXECUTE 4 /* X-bit : execute access allowed */
81#define _PAGE_BIT_RW 5 /* AP0-bit : write access allowed */
82#define _PAGE_BIT_USER 6 /* AP1-bit : user space access allowed */
83#define _PAGE_BIT_BUFFER 7 /* B-bit : bufferable */
84#define _PAGE_BIT_GLOBAL 8 /* G-bit : global (ignore ASID) */
85#define _PAGE_BIT_CACHABLE 9 /* C-bit : cachable */
86
87/* If we drop support for 1K pages, we get two extra bits */
88#define _PAGE_BIT_PRESENT 10
89#define _PAGE_BIT_ACCESSED 11 /* software: page was accessed */
90
91/* The following flags are only valid when !PRESENT */
92#define _PAGE_BIT_FILE 0 /* software: pagecache or swap? */
93
94#define _PAGE_WT (1 << _PAGE_BIT_WT)
95#define _PAGE_DIRTY (1 << _PAGE_BIT_DIRTY)
96#define _PAGE_EXECUTE (1 << _PAGE_BIT_EXECUTE)
97#define _PAGE_RW (1 << _PAGE_BIT_RW)
98#define _PAGE_USER (1 << _PAGE_BIT_USER)
99#define _PAGE_BUFFER (1 << _PAGE_BIT_BUFFER)
100#define _PAGE_GLOBAL (1 << _PAGE_BIT_GLOBAL)
101#define _PAGE_CACHABLE (1 << _PAGE_BIT_CACHABLE)
102
103/* Software flags */
104#define _PAGE_ACCESSED (1 << _PAGE_BIT_ACCESSED)
105#define _PAGE_PRESENT (1 << _PAGE_BIT_PRESENT)
106#define _PAGE_FILE (1 << _PAGE_BIT_FILE)
107
108/*
109 * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is
110 * usually called _PAGE_PROTNONE on other architectures.
111 *
112 * XXX: Find out if _PAGE_PROTNONE is equivalent with !_PAGE_USER. If
113 * so, we can encode all possible page sizes (although we can't really
114 * support 1K pages anyway due to the _PAGE_PRESENT and _PAGE_ACCESSED
115 * bits)
116 *
117 */
118#define _PAGE_TYPE_MASK ((1 << _PAGE_BIT_SZ0) | (1 << _PAGE_BIT_SZ1))
119#define _PAGE_TYPE_NONE (0 << _PAGE_BIT_SZ0)
120#define _PAGE_TYPE_SMALL (1 << _PAGE_BIT_SZ0)
121#define _PAGE_TYPE_MEDIUM (2 << _PAGE_BIT_SZ0)
122#define _PAGE_TYPE_LARGE (3 << _PAGE_BIT_SZ0)
123
124/*
125 * Mask which drop software flags. We currently can't handle more than
126 * 512 MiB of physical memory, so we can use bits 29-31 for other
127 * stuff. With a fixed 4K page size, we can use bits 10-11 as well as
128 * bits 2-3 (SZ)
129 */
130#define _PAGE_FLAGS_HARDWARE_MASK 0xfffff3ff
131
132#define _PAGE_FLAGS_CACHE_MASK (_PAGE_CACHABLE | _PAGE_BUFFER | _PAGE_WT)
133
134/* TODO: Check for saneness */
135/* User-mode page table flags (to be set in a pgd or pmd entry) */
136#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \
137 | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
138/* Kernel-mode page table flags */
139#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \
140 | _PAGE_ACCESSED | _PAGE_DIRTY)
141/* Flags that may be modified by software */
142#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY \
143 | _PAGE_FLAGS_CACHE_MASK)
144
145#define _PAGE_FLAGS_READ (_PAGE_CACHABLE | _PAGE_BUFFER)
146#define _PAGE_FLAGS_WRITE (_PAGE_FLAGS_READ | _PAGE_RW | _PAGE_DIRTY)
147
148#define _PAGE_NORMAL(x) __pgprot((x) | _PAGE_PRESENT | _PAGE_TYPE_SMALL \
149 | _PAGE_ACCESSED)
150
151#define PAGE_NONE (_PAGE_ACCESSED | _PAGE_TYPE_NONE)
152#define PAGE_READ (_PAGE_FLAGS_READ | _PAGE_USER)
153#define PAGE_EXEC (_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_USER)
154#define PAGE_WRITE (_PAGE_FLAGS_WRITE | _PAGE_USER)
155#define PAGE_KERNEL _PAGE_NORMAL(_PAGE_FLAGS_WRITE | _PAGE_EXECUTE | _PAGE_GLOBAL)
156#define PAGE_KERNEL_RO _PAGE_NORMAL(_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_GLOBAL)
157
158#define _PAGE_P(x) _PAGE_NORMAL((x) & ~(_PAGE_RW | _PAGE_DIRTY))
159#define _PAGE_S(x) _PAGE_NORMAL(x)
160
161#define PAGE_COPY _PAGE_P(PAGE_WRITE | PAGE_READ)
162
163#ifndef __ASSEMBLY__
164/*
165 * The hardware supports flags for write- and execute access. Read is
166 * always allowed if the page is loaded into the TLB, so the "-w-",
167 * "--x" and "-wx" mappings are implemented as "rw-", "r-x" and "rwx",
168 * respectively.
169 *
170 * The "---" case is handled by software; the page will simply not be
171 * loaded into the TLB if the page type is _PAGE_TYPE_NONE.
172 */
173
174#define __P000 __pgprot(PAGE_NONE)
175#define __P001 _PAGE_P(PAGE_READ)
176#define __P010 _PAGE_P(PAGE_WRITE)
177#define __P011 _PAGE_P(PAGE_WRITE | PAGE_READ)
178#define __P100 _PAGE_P(PAGE_EXEC)
179#define __P101 _PAGE_P(PAGE_EXEC | PAGE_READ)
180#define __P110 _PAGE_P(PAGE_EXEC | PAGE_WRITE)
181#define __P111 _PAGE_P(PAGE_EXEC | PAGE_WRITE | PAGE_READ)
182
183#define __S000 __pgprot(PAGE_NONE)
184#define __S001 _PAGE_S(PAGE_READ)
185#define __S010 _PAGE_S(PAGE_WRITE)
186#define __S011 _PAGE_S(PAGE_WRITE | PAGE_READ)
187#define __S100 _PAGE_S(PAGE_EXEC)
188#define __S101 _PAGE_S(PAGE_EXEC | PAGE_READ)
189#define __S110 _PAGE_S(PAGE_EXEC | PAGE_WRITE)
190#define __S111 _PAGE_S(PAGE_EXEC | PAGE_WRITE | PAGE_READ)
191
192#define pte_none(x) (!pte_val(x))
193#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
194
195#define pte_clear(mm,addr,xp) \
196 do { \
197 set_pte_at(mm, addr, xp, __pte(0)); \
198 } while (0)
199
200/*
201 * The following only work if pte_present() is true.
202 * Undefined behaviour if not..
203 */
204static inline int pte_read(pte_t pte)
205{
206 return pte_val(pte) & _PAGE_USER;
207}
208static inline int pte_write(pte_t pte)
209{
210 return pte_val(pte) & _PAGE_RW;
211}
212static inline int pte_exec(pte_t pte)
213{
214 return pte_val(pte) & _PAGE_EXECUTE;
215}
216static inline int pte_dirty(pte_t pte)
217{
218 return pte_val(pte) & _PAGE_DIRTY;
219}
220static inline int pte_young(pte_t pte)
221{
222 return pte_val(pte) & _PAGE_ACCESSED;
223}
224
225/*
226 * The following only work if pte_present() is not true.
227 */
228static inline int pte_file(pte_t pte)
229{
230 return pte_val(pte) & _PAGE_FILE;
231}
232
233/* Mutator functions for PTE bits */
234static inline pte_t pte_rdprotect(pte_t pte)
235{
236 set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER));
237 return pte;
238}
239static inline pte_t pte_wrprotect(pte_t pte)
240{
241 set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW));
242 return pte;
243}
244static inline pte_t pte_exprotect(pte_t pte)
245{
246 set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_EXECUTE));
247 return pte;
248}
249static inline pte_t pte_mkclean(pte_t pte)
250{
251 set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY));
252 return pte;
253}
254static inline pte_t pte_mkold(pte_t pte)
255{
256 set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED));
257 return pte;
258}
259static inline pte_t pte_mkread(pte_t pte)
260{
261 set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER));
262 return pte;
263}
264static inline pte_t pte_mkwrite(pte_t pte)
265{
266 set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW));
267 return pte;
268}
269static inline pte_t pte_mkexec(pte_t pte)
270{
271 set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE));
272 return pte;
273}
274static inline pte_t pte_mkdirty(pte_t pte)
275{
276 set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY));
277 return pte;
278}
279static inline pte_t pte_mkyoung(pte_t pte)
280{
281 set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED));
282 return pte;
283}
284
285#define pmd_none(x) (!pmd_val(x))
286#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
287#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
288#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) \
289 != _KERNPG_TABLE)
290
291/*
292 * Permanent address of a page. We don't support highmem, so this is
293 * trivial.
294 */
295#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
296#define pte_page(x) phys_to_page(pte_val(x) & PTE_PHYS_MASK)
297
298/*
299 * Mark the prot value as uncacheable and unbufferable
300 */
301#define pgprot_noncached(prot) \
302 __pgprot(pgprot_val(prot) & ~(_PAGE_BUFFER | _PAGE_CACHABLE))
303
304/*
305 * Mark the prot value as uncacheable but bufferable
306 */
307#define pgprot_writecombine(prot) \
308 __pgprot((pgprot_val(prot) & ~_PAGE_CACHABLE) | _PAGE_BUFFER)
309
310/*
311 * Conversion functions: convert a page and protection to a page entry,
312 * and a page entry and page directory to the page they refer to.
313 *
314 * extern pte_t mk_pte(struct page *page, pgprot_t pgprot)
315 */
316#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
317
318static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
319{
320 set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK)
321 | pgprot_val(newprot)));
322 return pte;
323}
324
325#define page_pte(page) page_pte_prot(page, __pgprot(0))
326
327#define pmd_page_vaddr(pmd) \
328 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
329
330#define pmd_page(pmd) (phys_to_page(pmd_val(pmd)))
331
332/* to find an entry in a page-table-directory. */
333#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
334#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
335#define pgd_offset_current(address) \
336 ((pgd_t *)__mfsr(SYSREG_PTBR) + pgd_index(address))
337
338/* to find an entry in a kernel page-table-directory */
339#define pgd_offset_k(address) pgd_offset(&init_mm, address)
340
341/* Find an entry in the third-level page table.. */
342#define pte_index(address) \
343 ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
344#define pte_offset(dir, address) \
345 ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
346#define pte_offset_kernel(dir, address) \
347 ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
348#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
349#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address)
350#define pte_unmap(pte) do { } while (0)
351#define pte_unmap_nested(pte) do { } while (0)
352
353struct vm_area_struct;
354extern void update_mmu_cache(struct vm_area_struct * vma,
355 unsigned long address, pte_t pte);
356
357/*
358 * Encode and decode a swap entry
359 *
360 * Constraints:
361 * _PAGE_FILE at bit 0
362 * _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE)
363 * _PAGE_PRESENT at bit 10
364 *
365 * We encode the type into bits 4-9 and offset into bits 11-31. This
366 * gives us a 21 bits offset, or 2**21 * 4K = 8G usable swap space per
367 * device, and 64 possible types.
368 *
369 * NOTE: We should set ZEROs at the position of _PAGE_PRESENT
370 * and _PAGE_PROTNONE bits
371 */
372#define __swp_type(x) (((x).val >> 4) & 0x3f)
373#define __swp_offset(x) ((x).val >> 11)
374#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 4) | ((offset) << 11) })
375#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
376#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
377
378/*
379 * Encode and decode a nonlinear file mapping entry. We have to
380 * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't
381 * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?)
382 */
383#define PTE_FILE_MAX_BITS 30
384#define pte_to_pgoff(pte) (((pte_val(pte) >> 1) & 0x1ff) \
385 | ((pte_val(pte) >> 11) << 9))
386#define pgoff_to_pte(off) ((pte_t) { ((((off) & 0x1ff) << 1) \
387 | (((off) >> 9) << 11) \
388 | _PAGE_FILE) })
389
390typedef pte_t *pte_addr_t;
391
392#define kern_addr_valid(addr) (1)
393
394#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
395 remap_pfn_range(vma, vaddr, pfn, size, prot)
396
397#define MK_IOSPACE_PFN(space, pfn) (pfn)
398#define GET_IOSPACE(pfn) 0
399#define GET_PFN(pfn) (pfn)
400
401/* No page table caches to initialize (?) */
402#define pgtable_cache_init() do { } while(0)
403
404#include <asm-generic/pgtable.h>
405
406#endif /* !__ASSEMBLY__ */
407
408#endif /* __ASM_AVR32_PGTABLE_H */
diff --git a/include/asm-avr32/poll.h b/include/asm-avr32/poll.h
new file mode 100644
index 000000000000..736e29755dfc
--- /dev/null
+++ b/include/asm-avr32/poll.h
@@ -0,0 +1,27 @@
1#ifndef __ASM_AVR32_POLL_H
2#define __ASM_AVR32_POLL_H
3
4/* These are specified by iBCS2 */
5#define POLLIN 0x0001
6#define POLLPRI 0x0002
7#define POLLOUT 0x0004
8#define POLLERR 0x0008
9#define POLLHUP 0x0010
10#define POLLNVAL 0x0020
11
12/* The rest seem to be more-or-less nonstandard. Check them! */
13#define POLLRDNORM 0x0040
14#define POLLRDBAND 0x0080
15#define POLLWRNORM 0x0100
16#define POLLWRBAND 0x0200
17#define POLLMSG 0x0400
18#define POLLREMOVE 0x1000
19#define POLLRDHUP 0x2000
20
21struct pollfd {
22 int fd;
23 short events;
24 short revents;
25};
26
27#endif /* __ASM_AVR32_POLL_H */
diff --git a/include/asm-avr32/posix_types.h b/include/asm-avr32/posix_types.h
new file mode 100644
index 000000000000..2831b039b349
--- /dev/null
+++ b/include/asm-avr32/posix_types.h
@@ -0,0 +1,129 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_POSIX_TYPES_H
9#define __ASM_AVR32_POSIX_TYPES_H
10
11/*
12 * This file is generally used by user-level software, so you need to
13 * be a little careful about namespace pollution etc. Also, we cannot
14 * assume GCC is being used.
15 */
16
17typedef unsigned long __kernel_ino_t;
18typedef unsigned short __kernel_mode_t;
19typedef unsigned short __kernel_nlink_t;
20typedef long __kernel_off_t;
21typedef int __kernel_pid_t;
22typedef unsigned short __kernel_ipc_pid_t;
23typedef unsigned int __kernel_uid_t;
24typedef unsigned int __kernel_gid_t;
25typedef unsigned long __kernel_size_t;
26typedef int __kernel_ssize_t;
27typedef int __kernel_ptrdiff_t;
28typedef long __kernel_time_t;
29typedef long __kernel_suseconds_t;
30typedef long __kernel_clock_t;
31typedef int __kernel_timer_t;
32typedef int __kernel_clockid_t;
33typedef int __kernel_daddr_t;
34typedef char * __kernel_caddr_t;
35typedef unsigned short __kernel_uid16_t;
36typedef unsigned short __kernel_gid16_t;
37typedef unsigned int __kernel_uid32_t;
38typedef unsigned int __kernel_gid32_t;
39
40typedef unsigned short __kernel_old_uid_t;
41typedef unsigned short __kernel_old_gid_t;
42typedef unsigned short __kernel_old_dev_t;
43
44#ifdef __GNUC__
45typedef long long __kernel_loff_t;
46#endif
47
48typedef struct {
49#if defined(__KERNEL__) || defined(__USE_ALL)
50 int val[2];
51#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */
52 int __val[2];
53#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */
54} __kernel_fsid_t;
55
56#if defined(__KERNEL__)
57
58#undef __FD_SET
59static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
60{
61 unsigned long __tmp = __fd / __NFDBITS;
62 unsigned long __rem = __fd % __NFDBITS;
63 __fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
64}
65
66#undef __FD_CLR
67static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
68{
69 unsigned long __tmp = __fd / __NFDBITS;
70 unsigned long __rem = __fd % __NFDBITS;
71 __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
72}
73
74
75#undef __FD_ISSET
76static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
77{
78 unsigned long __tmp = __fd / __NFDBITS;
79 unsigned long __rem = __fd % __NFDBITS;
80 return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
81}
82
83/*
84 * This will unroll the loop for the normal constant case (8 ints,
85 * for a 256-bit fd_set)
86 */
87#undef __FD_ZERO
88static __inline__ void __FD_ZERO(__kernel_fd_set *__p)
89{
90 unsigned long *__tmp = __p->fds_bits;
91 int __i;
92
93 if (__builtin_constant_p(__FDSET_LONGS)) {
94 switch (__FDSET_LONGS) {
95 case 16:
96 __tmp[ 0] = 0; __tmp[ 1] = 0;
97 __tmp[ 2] = 0; __tmp[ 3] = 0;
98 __tmp[ 4] = 0; __tmp[ 5] = 0;
99 __tmp[ 6] = 0; __tmp[ 7] = 0;
100 __tmp[ 8] = 0; __tmp[ 9] = 0;
101 __tmp[10] = 0; __tmp[11] = 0;
102 __tmp[12] = 0; __tmp[13] = 0;
103 __tmp[14] = 0; __tmp[15] = 0;
104 return;
105
106 case 8:
107 __tmp[ 0] = 0; __tmp[ 1] = 0;
108 __tmp[ 2] = 0; __tmp[ 3] = 0;
109 __tmp[ 4] = 0; __tmp[ 5] = 0;
110 __tmp[ 6] = 0; __tmp[ 7] = 0;
111 return;
112
113 case 4:
114 __tmp[ 0] = 0; __tmp[ 1] = 0;
115 __tmp[ 2] = 0; __tmp[ 3] = 0;
116 return;
117 }
118 }
119 __i = __FDSET_LONGS;
120 while (__i) {
121 __i--;
122 *__tmp = 0;
123 __tmp++;
124 }
125}
126
127#endif /* defined(__KERNEL__) */
128
129#endif /* __ASM_AVR32_POSIX_TYPES_H */
diff --git a/include/asm-avr32/processor.h b/include/asm-avr32/processor.h
new file mode 100644
index 000000000000..f6913778a45f
--- /dev/null
+++ b/include/asm-avr32/processor.h
@@ -0,0 +1,147 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_PROCESSOR_H
9#define __ASM_AVR32_PROCESSOR_H
10
11#include <asm/page.h>
12#include <asm/cache.h>
13
14#define TASK_SIZE 0x80000000
15
16#ifndef __ASSEMBLY__
17
18static inline void *current_text_addr(void)
19{
20 register void *pc asm("pc");
21 return pc;
22}
23
24enum arch_type {
25 ARCH_AVR32A,
26 ARCH_AVR32B,
27 ARCH_MAX
28};
29
30enum cpu_type {
31 CPU_MORGAN,
32 CPU_AT32AP,
33 CPU_MAX
34};
35
36enum tlb_config {
37 TLB_NONE,
38 TLB_SPLIT,
39 TLB_UNIFIED,
40 TLB_INVALID
41};
42
43struct avr32_cpuinfo {
44 struct clk *clk;
45 unsigned long loops_per_jiffy;
46 enum arch_type arch_type;
47 enum cpu_type cpu_type;
48 unsigned short arch_revision;
49 unsigned short cpu_revision;
50 enum tlb_config tlb_config;
51
52 struct cache_info icache;
53 struct cache_info dcache;
54};
55
56extern struct avr32_cpuinfo boot_cpu_data;
57
58#ifdef CONFIG_SMP
59extern struct avr32_cpuinfo cpu_data[];
60#define current_cpu_data cpu_data[smp_processor_id()]
61#else
62#define cpu_data (&boot_cpu_data)
63#define current_cpu_data boot_cpu_data
64#endif
65
66/* This decides where the kernel will search for a free chunk of vm
67 * space during mmap's
68 */
69#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
70
71#define cpu_relax() barrier()
72#define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory")
73
74struct cpu_context {
75 unsigned long sr;
76 unsigned long pc;
77 unsigned long ksp; /* Kernel stack pointer */
78 unsigned long r7;
79 unsigned long r6;
80 unsigned long r5;
81 unsigned long r4;
82 unsigned long r3;
83 unsigned long r2;
84 unsigned long r1;
85 unsigned long r0;
86};
87
88/* This struct contains the CPU context as stored by switch_to() */
89struct thread_struct {
90 struct cpu_context cpu_context;
91 unsigned long single_step_addr;
92 u16 single_step_insn;
93};
94
95#define INIT_THREAD { \
96 .cpu_context = { \
97 .ksp = sizeof(init_stack) + (long)&init_stack, \
98 }, \
99}
100
101/*
102 * Do necessary setup to start up a newly executed thread.
103 */
104#define start_thread(regs, new_pc, new_sp) \
105 do { \
106 set_fs(USER_DS); \
107 memset(regs, 0, sizeof(*regs)); \
108 regs->sr = MODE_USER; \
109 regs->pc = new_pc & ~1; \
110 regs->sp = new_sp; \
111 } while(0)
112
113struct task_struct;
114
115/* Free all resources held by a thread */
116extern void release_thread(struct task_struct *);
117
118/* Create a kernel thread without removing it from tasklists */
119extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
120
121/* Prepare to copy thread state - unlazy all lazy status */
122#define prepare_to_copy(tsk) do { } while(0)
123
124/* Return saved PC of a blocked thread */
125#define thread_saved_pc(tsk) ((tsk)->thread.cpu_context.pc)
126
127struct pt_regs;
128void show_trace(struct task_struct *task, unsigned long *stack,
129 struct pt_regs *regs);
130
131extern unsigned long get_wchan(struct task_struct *p);
132
133#define KSTK_EIP(tsk) ((tsk)->thread.cpu_context.pc)
134#define KSTK_ESP(tsk) ((tsk)->thread.cpu_context.ksp)
135
136#define ARCH_HAS_PREFETCH
137
138static inline void prefetch(const void *x)
139{
140 const char *c = x;
141 asm volatile("pref %0" : : "r"(c));
142}
143#define PREFETCH_STRIDE L1_CACHE_BYTES
144
145#endif /* __ASSEMBLY__ */
146
147#endif /* __ASM_AVR32_PROCESSOR_H */
diff --git a/include/asm-avr32/ptrace.h b/include/asm-avr32/ptrace.h
new file mode 100644
index 000000000000..60f0f19a81f1
--- /dev/null
+++ b/include/asm-avr32/ptrace.h
@@ -0,0 +1,154 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_PTRACE_H
9#define __ASM_AVR32_PTRACE_H
10
11#define PTRACE_GETREGS 12
12#define PTRACE_SETREGS 13
13
14/*
15 * Status Register bits
16 */
17#define SR_H 0x40000000
18#define SR_R 0x20000000
19#define SR_J 0x10000000
20#define SR_DM 0x08000000
21#define SR_D 0x04000000
22#define MODE_NMI 0x01c00000
23#define MODE_EXCEPTION 0x01800000
24#define MODE_INT3 0x01400000
25#define MODE_INT2 0x01000000
26#define MODE_INT1 0x00c00000
27#define MODE_INT0 0x00800000
28#define MODE_SUPERVISOR 0x00400000
29#define MODE_USER 0x00000000
30#define MODE_MASK 0x01c00000
31#define SR_EM 0x00200000
32#define SR_I3M 0x00100000
33#define SR_I2M 0x00080000
34#define SR_I1M 0x00040000
35#define SR_I0M 0x00020000
36#define SR_GM 0x00010000
37
38#define SR_H_BIT 30
39#define SR_R_BIT 29
40#define SR_J_BIT 28
41#define SR_DM_BIT 27
42#define SR_D_BIT 26
43#define MODE_SHIFT 22
44#define SR_EM_BIT 21
45#define SR_I3M_BIT 20
46#define SR_I2M_BIT 19
47#define SR_I1M_BIT 18
48#define SR_I0M_BIT 17
49#define SR_GM_BIT 16
50
51/* The user-visible part */
52#define SR_L 0x00000020
53#define SR_Q 0x00000010
54#define SR_V 0x00000008
55#define SR_N 0x00000004
56#define SR_Z 0x00000002
57#define SR_C 0x00000001
58
59#define SR_L_BIT 5
60#define SR_Q_BIT 4
61#define SR_V_BIT 3
62#define SR_N_BIT 2
63#define SR_Z_BIT 1
64#define SR_C_BIT 0
65
66/*
67 * The order is defined by the stmts instruction. r0 is stored first,
68 * so it gets the highest address.
69 *
70 * Registers 0-12 are general-purpose registers (r12 is normally used for
71 * the function return value).
72 * Register 13 is the stack pointer
73 * Register 14 is the link register
74 * Register 15 is the program counter (retrieved from the RAR sysreg)
75 */
76#define FRAME_SIZE_FULL 72
77#define REG_R12_ORIG 68
78#define REG_R0 64
79#define REG_R1 60
80#define REG_R2 56
81#define REG_R3 52
82#define REG_R4 48
83#define REG_R5 44
84#define REG_R6 40
85#define REG_R7 36
86#define REG_R8 32
87#define REG_R9 28
88#define REG_R10 24
89#define REG_R11 20
90#define REG_R12 16
91#define REG_SP 12
92#define REG_LR 8
93
94#define FRAME_SIZE_MIN 8
95#define REG_PC 4
96#define REG_SR 0
97
98#ifndef __ASSEMBLY__
99struct pt_regs {
100 /* These are always saved */
101 unsigned long sr;
102 unsigned long pc;
103
104 /* These are sometimes saved */
105 unsigned long lr;
106 unsigned long sp;
107 unsigned long r12;
108 unsigned long r11;
109 unsigned long r10;
110 unsigned long r9;
111 unsigned long r8;
112 unsigned long r7;
113 unsigned long r6;
114 unsigned long r5;
115 unsigned long r4;
116 unsigned long r3;
117 unsigned long r2;
118 unsigned long r1;
119 unsigned long r0;
120
121 /* Only saved on system call */
122 unsigned long r12_orig;
123};
124
125#ifdef __KERNEL__
126# define user_mode(regs) (((regs)->sr & MODE_MASK) == MODE_USER)
127extern void show_regs (struct pt_regs *);
128
129static __inline__ int valid_user_regs(struct pt_regs *regs)
130{
131 /*
132 * Some of the Java bits might be acceptable if/when we
133 * implement some support for that stuff...
134 */
135 if ((regs->sr & 0xffff0000) == 0)
136 return 1;
137
138 /*
139 * Force status register flags to be sane and report this
140 * illegal behaviour...
141 */
142 regs->sr &= 0x0000ffff;
143 return 0;
144}
145
146#define instruction_pointer(regs) ((regs)->pc)
147
148#define profile_pc(regs) instruction_pointer(regs)
149
150#endif /* __KERNEL__ */
151
152#endif /* ! __ASSEMBLY__ */
153
154#endif /* __ASM_AVR32_PTRACE_H */
diff --git a/include/asm-avr32/resource.h b/include/asm-avr32/resource.h
new file mode 100644
index 000000000000..c6dd101472b1
--- /dev/null
+++ b/include/asm-avr32/resource.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_RESOURCE_H
2#define __ASM_AVR32_RESOURCE_H
3
4#include <asm-generic/resource.h>
5
6#endif /* __ASM_AVR32_RESOURCE_H */
diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h
new file mode 100644
index 000000000000..bfe7d753423c
--- /dev/null
+++ b/include/asm-avr32/scatterlist.h
@@ -0,0 +1,21 @@
1#ifndef __ASM_AVR32_SCATTERLIST_H
2#define __ASM_AVR32_SCATTERLIST_H
3
4struct scatterlist {
5 struct page *page;
6 unsigned int offset;
7 dma_addr_t dma_address;
8 unsigned int length;
9};
10
11/* These macros should be used after a pci_map_sg call has been done
12 * to get bus addresses of each of the SG entries and their lengths.
13 * You should only work with the number of sg entries pci_map_sg
14 * returns.
15 */
16#define sg_dma_address(sg) ((sg)->dma_address)
17#define sg_dma_len(sg) ((sg)->length)
18
19#define ISA_DMA_THRESHOLD (0xffffffff)
20
21#endif /* __ASM_AVR32_SCATTERLIST_H */
diff --git a/include/asm-avr32/sections.h b/include/asm-avr32/sections.h
new file mode 100644
index 000000000000..aa14252e4181
--- /dev/null
+++ b/include/asm-avr32/sections.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_SECTIONS_H
2#define __ASM_AVR32_SECTIONS_H
3
4#include <asm-generic/sections.h>
5
6#endif /* __ASM_AVR32_SECTIONS_H */
diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h
new file mode 100644
index 000000000000..ef99ddccc10c
--- /dev/null
+++ b/include/asm-avr32/semaphore.h
@@ -0,0 +1,109 @@
1/*
2 * SMP- and interrupt-safe semaphores.
3 *
4 * Copyright (C) 2006 Atmel Corporation
5 *
6 * Based on include/asm-i386/semaphore.h
7 * Copyright (C) 1996 Linus Torvalds
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13#ifndef __ASM_AVR32_SEMAPHORE_H
14#define __ASM_AVR32_SEMAPHORE_H
15
16#include <linux/linkage.h>
17
18#include <asm/system.h>
19#include <asm/atomic.h>
20#include <linux/wait.h>
21#include <linux/rwsem.h>
22
23struct semaphore {
24 atomic_t count;
25 int sleepers;
26 wait_queue_head_t wait;
27};
28
29#define __SEMAPHORE_INITIALIZER(name, n) \
30{ \
31 .count = ATOMIC_INIT(n), \
32 .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \
33}
34
35#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
36 struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
37
38#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
39#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
40
41static inline void sema_init (struct semaphore *sem, int val)
42{
43 atomic_set(&sem->count, val);
44 sem->sleepers = 0;
45 init_waitqueue_head(&sem->wait);
46}
47
48static inline void init_MUTEX (struct semaphore *sem)
49{
50 sema_init(sem, 1);
51}
52
53static inline void init_MUTEX_LOCKED (struct semaphore *sem)
54{
55 sema_init(sem, 0);
56}
57
58void __down(struct semaphore * sem);
59int __down_interruptible(struct semaphore * sem);
60void __up(struct semaphore * sem);
61
62/*
63 * This is ugly, but we want the default case to fall through.
64 * "__down_failed" is a special asm handler that calls the C
65 * routine that actually waits. See arch/i386/kernel/semaphore.c
66 */
67static inline void down(struct semaphore * sem)
68{
69 might_sleep();
70 if (unlikely(atomic_dec_return (&sem->count) < 0))
71 __down (sem);
72}
73
74/*
75 * Interruptible try to acquire a semaphore. If we obtained
76 * it, return zero. If we were interrupted, returns -EINTR
77 */
78static inline int down_interruptible(struct semaphore * sem)
79{
80 int ret = 0;
81
82 might_sleep();
83 if (unlikely(atomic_dec_return (&sem->count) < 0))
84 ret = __down_interruptible (sem);
85 return ret;
86}
87
88/*
89 * Non-blockingly attempt to down() a semaphore.
90 * Returns zero if we acquired it
91 */
92static inline int down_trylock(struct semaphore * sem)
93{
94 return atomic_dec_if_positive(&sem->count) < 0;
95}
96
97/*
98 * Note! This is subtle. We jump to wake people up only if
99 * the semaphore was negative (== somebody was waiting on it).
100 * The default case (no contention) will result in NO
101 * jumps for both down() and up().
102 */
103static inline void up(struct semaphore * sem)
104{
105 if (unlikely(atomic_inc_return (&sem->count) <= 0))
106 __up (sem);
107}
108
109#endif /*__ASM_AVR32_SEMAPHORE_H */
diff --git a/include/asm-avr32/sembuf.h b/include/asm-avr32/sembuf.h
new file mode 100644
index 000000000000..e472216e0c97
--- /dev/null
+++ b/include/asm-avr32/sembuf.h
@@ -0,0 +1,25 @@
1#ifndef __ASM_AVR32_SEMBUF_H
2#define __ASM_AVR32_SEMBUF_H
3
4/*
5* The semid64_ds structure for AVR32 architecture.
6 * Note extra padding because this structure is passed back and forth
7 * between kernel and user space.
8 *
9 * Pad space is left for:
10 * - 64-bit time_t to solve y2038 problem
11 * - 2 miscellaneous 32-bit values
12 */
13
14struct semid64_ds {
15 struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
16 __kernel_time_t sem_otime; /* last semop time */
17 unsigned long __unused1;
18 __kernel_time_t sem_ctime; /* last change time */
19 unsigned long __unused2;
20 unsigned long sem_nsems; /* no. of semaphores in array */
21 unsigned long __unused3;
22 unsigned long __unused4;
23};
24
25#endif /* __ASM_AVR32_SEMBUF_H */
diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h
new file mode 100644
index 000000000000..10193da4113b
--- /dev/null
+++ b/include/asm-avr32/setup.h
@@ -0,0 +1,141 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * Based on linux/include/asm-arm/setup.h
5 * Copyright (C) 1997-1999 Russel King
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef __ASM_AVR32_SETUP_H__
12#define __ASM_AVR32_SETUP_H__
13
14#define COMMAND_LINE_SIZE 256
15
16/* Magic number indicating that a tag table is present */
17#define ATAG_MAGIC 0xa2a25441
18
19#ifndef __ASSEMBLY__
20
21/*
22 * Generic memory range, used by several tags.
23 *
24 * addr is always physical.
25 * size is measured in bytes.
26 * next is for use by the OS, e.g. for grouping regions into
27 * linked lists.
28 */
29struct tag_mem_range {
30 u32 addr;
31 u32 size;
32 struct tag_mem_range * next;
33};
34
35/* The list ends with an ATAG_NONE node. */
36#define ATAG_NONE 0x00000000
37
38struct tag_header {
39 u32 size;
40 u32 tag;
41};
42
43/* The list must start with an ATAG_CORE node */
44#define ATAG_CORE 0x54410001
45
46struct tag_core {
47 u32 flags;
48 u32 pagesize;
49 u32 rootdev;
50};
51
52/* it is allowed to have multiple ATAG_MEM nodes */
53#define ATAG_MEM 0x54410002
54/* ATAG_MEM uses tag_mem_range */
55
56/* command line: \0 terminated string */
57#define ATAG_CMDLINE 0x54410003
58
59struct tag_cmdline {
60 char cmdline[1]; /* this is the minimum size */
61};
62
63/* Ramdisk image (may be compressed) */
64#define ATAG_RDIMG 0x54410004
65/* ATAG_RDIMG uses tag_mem_range */
66
67/* Information about various clocks present in the system */
68#define ATAG_CLOCK 0x54410005
69
70struct tag_clock {
71 u32 clock_id; /* Which clock are we talking about? */
72 u32 clock_flags; /* Special features */
73 u64 clock_hz; /* Clock speed in Hz */
74};
75
76/* The clock types we know about */
77#define CLOCK_BOOTCPU 0
78
79/* Memory reserved for the system (e.g. the bootloader) */
80#define ATAG_RSVD_MEM 0x54410006
81/* ATAG_RSVD_MEM uses tag_mem_range */
82
83/* Ethernet information */
84
85#define ATAG_ETHERNET 0x54410007
86
87struct tag_ethernet {
88 u8 mac_index;
89 u8 mii_phy_addr;
90 u8 hw_address[6];
91};
92
93#define ETH_INVALID_PHY 0xff
94
95struct tag {
96 struct tag_header hdr;
97 union {
98 struct tag_core core;
99 struct tag_mem_range mem_range;
100 struct tag_cmdline cmdline;
101 struct tag_clock clock;
102 struct tag_ethernet ethernet;
103 } u;
104};
105
106struct tagtable {
107 u32 tag;
108 int (*parse)(struct tag *);
109};
110
111#define __tag __attribute_used__ __attribute__((__section__(".taglist")))
112#define __tagtable(tag, fn) \
113 static struct tagtable __tagtable_##fn __tag = { tag, fn }
114
115#define tag_member_present(tag,member) \
116 ((unsigned long)(&((struct tag *)0L)->member + 1) \
117 <= (tag)->hdr.size * 4)
118
119#define tag_next(t) ((struct tag *)((u32 *)(t) + (t)->hdr.size))
120#define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type)) >> 2)
121
122#define for_each_tag(t,base) \
123 for (t = base; t->hdr.size; t = tag_next(t))
124
125extern struct tag_mem_range *mem_phys;
126extern struct tag_mem_range *mem_reserved;
127extern struct tag_mem_range *mem_ramdisk;
128
129extern struct tag *bootloader_tags;
130
131extern void setup_bootmem(void);
132extern void setup_processor(void);
133extern void board_setup_fbmem(unsigned long fbmem_start,
134 unsigned long fbmem_size);
135
136/* Chip-specific hook to enable the use of SDRAM */
137void chip_enable_sdram(void);
138
139#endif /* !__ASSEMBLY__ */
140
141#endif /* __ASM_AVR32_SETUP_H__ */
diff --git a/include/asm-avr32/shmbuf.h b/include/asm-avr32/shmbuf.h
new file mode 100644
index 000000000000..c62fba41739a
--- /dev/null
+++ b/include/asm-avr32/shmbuf.h
@@ -0,0 +1,42 @@
1#ifndef __ASM_AVR32_SHMBUF_H
2#define __ASM_AVR32_SHMBUF_H
3
4/*
5 * The shmid64_ds structure for i386 architecture.
6 * Note extra padding because this structure is passed back and forth
7 * between kernel and user space.
8 *
9 * Pad space is left for:
10 * - 64-bit time_t to solve y2038 problem
11 * - 2 miscellaneous 32-bit values
12 */
13
14struct shmid64_ds {
15 struct ipc64_perm shm_perm; /* operation perms */
16 size_t shm_segsz; /* size of segment (bytes) */
17 __kernel_time_t shm_atime; /* last attach time */
18 unsigned long __unused1;
19 __kernel_time_t shm_dtime; /* last detach time */
20 unsigned long __unused2;
21 __kernel_time_t shm_ctime; /* last change time */
22 unsigned long __unused3;
23 __kernel_pid_t shm_cpid; /* pid of creator */
24 __kernel_pid_t shm_lpid; /* pid of last operator */
25 unsigned long shm_nattch; /* no. of current attaches */
26 unsigned long __unused4;
27 unsigned long __unused5;
28};
29
30struct shminfo64 {
31 unsigned long shmmax;
32 unsigned long shmmin;
33 unsigned long shmmni;
34 unsigned long shmseg;
35 unsigned long shmall;
36 unsigned long __unused1;
37 unsigned long __unused2;
38 unsigned long __unused3;
39 unsigned long __unused4;
40};
41
42#endif /* __ASM_AVR32_SHMBUF_H */
diff --git a/include/asm-avr32/shmparam.h b/include/asm-avr32/shmparam.h
new file mode 100644
index 000000000000..3681266c77f7
--- /dev/null
+++ b/include/asm-avr32/shmparam.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_SHMPARAM_H
2#define __ASM_AVR32_SHMPARAM_H
3
4#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
5
6#endif /* __ASM_AVR32_SHMPARAM_H */
diff --git a/include/asm-avr32/sigcontext.h b/include/asm-avr32/sigcontext.h
new file mode 100644
index 000000000000..e04062b5f39f
--- /dev/null
+++ b/include/asm-avr32/sigcontext.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_SIGCONTEXT_H
9#define __ASM_AVR32_SIGCONTEXT_H
10
11struct sigcontext {
12 unsigned long oldmask;
13
14 /* CPU registers */
15 unsigned long sr;
16 unsigned long pc;
17 unsigned long lr;
18 unsigned long sp;
19 unsigned long r12;
20 unsigned long r11;
21 unsigned long r10;
22 unsigned long r9;
23 unsigned long r8;
24 unsigned long r7;
25 unsigned long r6;
26 unsigned long r5;
27 unsigned long r4;
28 unsigned long r3;
29 unsigned long r2;
30 unsigned long r1;
31 unsigned long r0;
32};
33
34#endif /* __ASM_AVR32_SIGCONTEXT_H */
diff --git a/include/asm-avr32/siginfo.h b/include/asm-avr32/siginfo.h
new file mode 100644
index 000000000000..5ee93f40a8a8
--- /dev/null
+++ b/include/asm-avr32/siginfo.h
@@ -0,0 +1,6 @@
1#ifndef _AVR32_SIGINFO_H
2#define _AVR32_SIGINFO_H
3
4#include <asm-generic/siginfo.h>
5
6#endif
diff --git a/include/asm-avr32/signal.h b/include/asm-avr32/signal.h
new file mode 100644
index 000000000000..caffefeeba1f
--- /dev/null
+++ b/include/asm-avr32/signal.h
@@ -0,0 +1,168 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_SIGNAL_H
9#define __ASM_AVR32_SIGNAL_H
10
11#include <linux/types.h>
12
13/* Avoid too many header ordering problems. */
14struct siginfo;
15
16#ifdef __KERNEL__
17/* Most things should be clean enough to redefine this at will, if care
18 is taken to make libc match. */
19
20#define _NSIG 64
21#define _NSIG_BPW 32
22#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
23
24typedef unsigned long old_sigset_t; /* at least 32 bits */
25
26typedef struct {
27 unsigned long sig[_NSIG_WORDS];
28} sigset_t;
29
30#else
31/* Here we must cater to libcs that poke about in kernel headers. */
32
33#define NSIG 32
34typedef unsigned long sigset_t;
35
36#endif /* __KERNEL__ */
37
38#define SIGHUP 1
39#define SIGINT 2
40#define SIGQUIT 3
41#define SIGILL 4
42#define SIGTRAP 5
43#define SIGABRT 6
44#define SIGIOT 6
45#define SIGBUS 7
46#define SIGFPE 8
47#define SIGKILL 9
48#define SIGUSR1 10
49#define SIGSEGV 11
50#define SIGUSR2 12
51#define SIGPIPE 13
52#define SIGALRM 14
53#define SIGTERM 15
54#define SIGSTKFLT 16
55#define SIGCHLD 17
56#define SIGCONT 18
57#define SIGSTOP 19
58#define SIGTSTP 20
59#define SIGTTIN 21
60#define SIGTTOU 22
61#define SIGURG 23
62#define SIGXCPU 24
63#define SIGXFSZ 25
64#define SIGVTALRM 26
65#define SIGPROF 27
66#define SIGWINCH 28
67#define SIGIO 29
68#define SIGPOLL SIGIO
69/*
70#define SIGLOST 29
71*/
72#define SIGPWR 30
73#define SIGSYS 31
74#define SIGUNUSED 31
75
76/* These should not be considered constants from userland. */
77#define SIGRTMIN 32
78#define SIGRTMAX (_NSIG-1)
79
80/*
81 * SA_FLAGS values:
82 *
83 * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
84 * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
85 * SA_SIGINFO deliver the signal with SIGINFO structs
86 * SA_ONSTACK indicates that a registered stack_t will be used.
87 * SA_RESTART flag to get restarting signals (which were the default long ago)
88 * SA_NODEFER prevents the current signal from being masked in the handler.
89 * SA_RESETHAND clears the handler when the signal is delivered.
90 *
91 * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
92 * Unix names RESETHAND and NODEFER respectively.
93 */
94#define SA_NOCLDSTOP 0x00000001
95#define SA_NOCLDWAIT 0x00000002
96#define SA_SIGINFO 0x00000004
97#define SA_RESTORER 0x04000000
98#define SA_ONSTACK 0x08000000
99#define SA_RESTART 0x10000000
100#define SA_NODEFER 0x40000000
101#define SA_RESETHAND 0x80000000
102
103#define SA_NOMASK SA_NODEFER
104#define SA_ONESHOT SA_RESETHAND
105
106/*
107 * sigaltstack controls
108 */
109#define SS_ONSTACK 1
110#define SS_DISABLE 2
111
112#define MINSIGSTKSZ 2048
113#define SIGSTKSZ 8192
114
115#include <asm-generic/signal.h>
116
117#ifdef __KERNEL__
118struct old_sigaction {
119 __sighandler_t sa_handler;
120 old_sigset_t sa_mask;
121 unsigned long sa_flags;
122 __sigrestore_t sa_restorer;
123};
124
125struct sigaction {
126 __sighandler_t sa_handler;
127 unsigned long sa_flags;
128 __sigrestore_t sa_restorer;
129 sigset_t sa_mask; /* mask last for extensibility */
130};
131
132struct k_sigaction {
133 struct sigaction sa;
134};
135#else
136/* Here we must cater to libcs that poke about in kernel headers. */
137
138struct sigaction {
139 union {
140 __sighandler_t _sa_handler;
141 void (*_sa_sigaction)(int, struct siginfo *, void *);
142 } _u;
143 sigset_t sa_mask;
144 unsigned long sa_flags;
145 void (*sa_restorer)(void);
146};
147
148#define sa_handler _u._sa_handler
149#define sa_sigaction _u._sa_sigaction
150
151#endif /* __KERNEL__ */
152
153typedef struct sigaltstack {
154 void __user *ss_sp;
155 int ss_flags;
156 size_t ss_size;
157} stack_t;
158
159#ifdef __KERNEL__
160
161#include <asm/sigcontext.h>
162#undef __HAVE_ARCH_SIG_BITOPS
163
164#define ptrace_signal_deliver(regs, cookie) do { } while (0)
165
166#endif /* __KERNEL__ */
167
168#endif
diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h
new file mode 100644
index 000000000000..543229de8173
--- /dev/null
+++ b/include/asm-avr32/socket.h
@@ -0,0 +1,53 @@
1#ifndef __ASM_AVR32_SOCKET_H
2#define __ASM_AVR32_SOCKET_H
3
4#include <asm/sockios.h>
5
6/* For setsockopt(2) */
7#define SOL_SOCKET 1
8
9#define SO_DEBUG 1
10#define SO_REUSEADDR 2
11#define SO_TYPE 3
12#define SO_ERROR 4
13#define SO_DONTROUTE 5
14#define SO_BROADCAST 6
15#define SO_SNDBUF 7
16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
19#define SO_KEEPALIVE 9
20#define SO_OOBINLINE 10
21#define SO_NO_CHECK 11
22#define SO_PRIORITY 12
23#define SO_LINGER 13
24#define SO_BSDCOMPAT 14
25/* To add :#define SO_REUSEPORT 15 */
26#define SO_PASSCRED 16
27#define SO_PEERCRED 17
28#define SO_RCVLOWAT 18
29#define SO_SNDLOWAT 19
30#define SO_RCVTIMEO 20
31#define SO_SNDTIMEO 21
32
33/* Security levels - as per NRL IPv6 - don't actually do anything */
34#define SO_SECURITY_AUTHENTICATION 22
35#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
36#define SO_SECURITY_ENCRYPTION_NETWORK 24
37
38#define SO_BINDTODEVICE 25
39
40/* Socket filtering */
41#define SO_ATTACH_FILTER 26
42#define SO_DETACH_FILTER 27
43
44#define SO_PEERNAME 28
45#define SO_TIMESTAMP 29
46#define SCM_TIMESTAMP SO_TIMESTAMP
47
48#define SO_ACCEPTCONN 30
49
50#define SO_PEERSEC 31
51#define SO_PASSSEC 34
52
53#endif /* __ASM_AVR32_SOCKET_H */
diff --git a/include/asm-avr32/sockios.h b/include/asm-avr32/sockios.h
new file mode 100644
index 000000000000..84f3d65b3b3b
--- /dev/null
+++ b/include/asm-avr32/sockios.h
@@ -0,0 +1,12 @@
1#ifndef __ASM_AVR32_SOCKIOS_H
2#define __ASM_AVR32_SOCKIOS_H
3
4/* Socket-level I/O control calls. */
5#define FIOSETOWN 0x8901
6#define SIOCSPGRP 0x8902
7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */
11
12#endif /* __ASM_AVR32_SOCKIOS_H */
diff --git a/include/asm-avr32/stat.h b/include/asm-avr32/stat.h
new file mode 100644
index 000000000000..e72881e10230
--- /dev/null
+++ b/include/asm-avr32/stat.h
@@ -0,0 +1,79 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_STAT_H
9#define __ASM_AVR32_STAT_H
10
11struct __old_kernel_stat {
12 unsigned short st_dev;
13 unsigned short st_ino;
14 unsigned short st_mode;
15 unsigned short st_nlink;
16 unsigned short st_uid;
17 unsigned short st_gid;
18 unsigned short st_rdev;
19 unsigned long st_size;
20 unsigned long st_atime;
21 unsigned long st_mtime;
22 unsigned long st_ctime;
23};
24
25struct stat {
26 unsigned long st_dev;
27 unsigned long st_ino;
28 unsigned short st_mode;
29 unsigned short st_nlink;
30 unsigned short st_uid;
31 unsigned short st_gid;
32 unsigned long st_rdev;
33 unsigned long st_size;
34 unsigned long st_blksize;
35 unsigned long st_blocks;
36 unsigned long st_atime;
37 unsigned long st_atime_nsec;
38 unsigned long st_mtime;
39 unsigned long st_mtime_nsec;
40 unsigned long st_ctime;
41 unsigned long st_ctime_nsec;
42 unsigned long __unused4;
43 unsigned long __unused5;
44};
45
46#define STAT_HAVE_NSEC 1
47
48struct stat64 {
49 unsigned long long st_dev;
50
51 unsigned long long st_ino;
52 unsigned int st_mode;
53 unsigned int st_nlink;
54
55 unsigned long st_uid;
56 unsigned long st_gid;
57
58 unsigned long long st_rdev;
59
60 long long st_size;
61 unsigned long __pad1; /* align 64-bit st_blocks */
62 unsigned long st_blksize;
63
64 unsigned long long st_blocks; /* Number 512-byte blocks allocated. */
65
66 unsigned long st_atime;
67 unsigned long st_atime_nsec;
68
69 unsigned long st_mtime;
70 unsigned long st_mtime_nsec;
71
72 unsigned long st_ctime;
73 unsigned long st_ctime_nsec;
74
75 unsigned long __unused1;
76 unsigned long __unused2;
77};
78
79#endif /* __ASM_AVR32_STAT_H */
diff --git a/include/asm-avr32/statfs.h b/include/asm-avr32/statfs.h
new file mode 100644
index 000000000000..2961bd18c50e
--- /dev/null
+++ b/include/asm-avr32/statfs.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_STATFS_H
2#define __ASM_AVR32_STATFS_H
3
4#include <asm-generic/statfs.h>
5
6#endif /* __ASM_AVR32_STATFS_H */
diff --git a/include/asm-avr32/string.h b/include/asm-avr32/string.h
new file mode 100644
index 000000000000..c91a623cd585
--- /dev/null
+++ b/include/asm-avr32/string.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_STRING_H
9#define __ASM_AVR32_STRING_H
10
11#define __HAVE_ARCH_MEMSET
12extern void *memset(void *b, int c, size_t len);
13
14#define __HAVE_ARCH_MEMCPY
15extern void *memcpy(void *to, const void *from, size_t len);
16
17#endif /* __ASM_AVR32_STRING_H */
diff --git a/include/asm-avr32/sysreg.h b/include/asm-avr32/sysreg.h
new file mode 100644
index 000000000000..f91975f330f6
--- /dev/null
+++ b/include/asm-avr32/sysreg.h
@@ -0,0 +1,332 @@
1/*
2 * AVR32 System Registers
3 *
4 * Copyright (C) 2004-2006 Atmel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ASM_AVR32_SYSREG_H__
11#define __ASM_AVR32_SYSREG_H__
12
13/* sysreg register offsets */
14#define SYSREG_SR 0x0000
15#define SYSREG_EVBA 0x0004
16#define SYSREG_ACBA 0x0008
17#define SYSREG_CPUCR 0x000c
18#define SYSREG_ECR 0x0010
19#define SYSREG_RSR_SUP 0x0014
20#define SYSREG_RSR_INT0 0x0018
21#define SYSREG_RSR_INT1 0x001c
22#define SYSREG_RSR_INT2 0x0020
23#define SYSREG_RSR_INT3 0x0024
24#define SYSREG_RSR_EX 0x0028
25#define SYSREG_RSR_NMI 0x002c
26#define SYSREG_RSR_DBG 0x0030
27#define SYSREG_RAR_SUP 0x0034
28#define SYSREG_RAR_INT0 0x0038
29#define SYSREG_RAR_INT1 0x003c
30#define SYSREG_RAR_INT2 0x0040
31#define SYSREG_RAR_INT3 0x0044
32#define SYSREG_RAR_EX 0x0048
33#define SYSREG_RAR_NMI 0x004c
34#define SYSREG_RAR_DBG 0x0050
35#define SYSREG_JECR 0x0054
36#define SYSREG_JOSP 0x0058
37#define SYSREG_JAVA_LV0 0x005c
38#define SYSREG_JAVA_LV1 0x0060
39#define SYSREG_JAVA_LV2 0x0064
40#define SYSREG_JAVA_LV3 0x0068
41#define SYSREG_JAVA_LV4 0x006c
42#define SYSREG_JAVA_LV5 0x0070
43#define SYSREG_JAVA_LV6 0x0074
44#define SYSREG_JAVA_LV7 0x0078
45#define SYSREG_JTBA 0x007c
46#define SYSREG_JBCR 0x0080
47#define SYSREG_CONFIG0 0x0100
48#define SYSREG_CONFIG1 0x0104
49#define SYSREG_COUNT 0x0108
50#define SYSREG_COMPARE 0x010c
51#define SYSREG_TLBEHI 0x0110
52#define SYSREG_TLBELO 0x0114
53#define SYSREG_PTBR 0x0118
54#define SYSREG_TLBEAR 0x011c
55#define SYSREG_MMUCR 0x0120
56#define SYSREG_TLBARLO 0x0124
57#define SYSREG_TLBARHI 0x0128
58#define SYSREG_PCCNT 0x012c
59#define SYSREG_PCNT0 0x0130
60#define SYSREG_PCNT1 0x0134
61#define SYSREG_PCCR 0x0138
62#define SYSREG_BEAR 0x013c
63
64/* Bitfields in SR */
65#define SYSREG_SR_C_OFFSET 0
66#define SYSREG_SR_C_SIZE 1
67#define SYSREG_Z_OFFSET 1
68#define SYSREG_Z_SIZE 1
69#define SYSREG_SR_N_OFFSET 2
70#define SYSREG_SR_N_SIZE 1
71#define SYSREG_SR_V_OFFSET 3
72#define SYSREG_SR_V_SIZE 1
73#define SYSREG_Q_OFFSET 4
74#define SYSREG_Q_SIZE 1
75#define SYSREG_GM_OFFSET 16
76#define SYSREG_GM_SIZE 1
77#define SYSREG_I0M_OFFSET 17
78#define SYSREG_I0M_SIZE 1
79#define SYSREG_I1M_OFFSET 18
80#define SYSREG_I1M_SIZE 1
81#define SYSREG_I2M_OFFSET 19
82#define SYSREG_I2M_SIZE 1
83#define SYSREG_I3M_OFFSET 20
84#define SYSREG_I3M_SIZE 1
85#define SYSREG_EM_OFFSET 21
86#define SYSREG_EM_SIZE 1
87#define SYSREG_M0_OFFSET 22
88#define SYSREG_M0_SIZE 1
89#define SYSREG_M1_OFFSET 23
90#define SYSREG_M1_SIZE 1
91#define SYSREG_M2_OFFSET 24
92#define SYSREG_M2_SIZE 1
93#define SYSREG_SR_D_OFFSET 26
94#define SYSREG_SR_D_SIZE 1
95#define SYSREG_DM_OFFSET 27
96#define SYSREG_DM_SIZE 1
97#define SYSREG_SR_J_OFFSET 28
98#define SYSREG_SR_J_SIZE 1
99#define SYSREG_R_OFFSET 29
100#define SYSREG_R_SIZE 1
101#define SYSREG_H_OFFSET 30
102#define SYSREG_H_SIZE 1
103
104/* Bitfields in EVBA */
105
106/* Bitfields in ACBA */
107
108/* Bitfields in CPUCR */
109#define SYSREG_BI_OFFSET 0
110#define SYSREG_BI_SIZE 1
111#define SYSREG_BE_OFFSET 1
112#define SYSREG_BE_SIZE 1
113#define SYSREG_FE_OFFSET 2
114#define SYSREG_FE_SIZE 1
115#define SYSREG_RE_OFFSET 3
116#define SYSREG_RE_SIZE 1
117#define SYSREG_IBE_OFFSET 4
118#define SYSREG_IBE_SIZE 1
119#define SYSREG_IEE_OFFSET 5
120#define SYSREG_IEE_SIZE 1
121
122/* Bitfields in ECR */
123#define SYSREG_ECR_OFFSET 0
124#define SYSREG_ECR_SIZE 32
125
126/* Bitfields in RSR_SUP */
127
128/* Bitfields in RSR_INT0 */
129
130/* Bitfields in RSR_INT1 */
131
132/* Bitfields in RSR_INT2 */
133
134/* Bitfields in RSR_INT3 */
135
136/* Bitfields in RSR_EX */
137
138/* Bitfields in RSR_NMI */
139
140/* Bitfields in RSR_DBG */
141
142/* Bitfields in RAR_SUP */
143
144/* Bitfields in RAR_INT0 */
145
146/* Bitfields in RAR_INT1 */
147
148/* Bitfields in RAR_INT2 */
149
150/* Bitfields in RAR_INT3 */
151
152/* Bitfields in RAR_EX */
153
154/* Bitfields in RAR_NMI */
155
156/* Bitfields in RAR_DBG */
157
158/* Bitfields in JECR */
159
160/* Bitfields in JOSP */
161
162/* Bitfields in JAVA_LV0 */
163
164/* Bitfields in JAVA_LV1 */
165
166/* Bitfields in JAVA_LV2 */
167
168/* Bitfields in JAVA_LV3 */
169
170/* Bitfields in JAVA_LV4 */
171
172/* Bitfields in JAVA_LV5 */
173
174/* Bitfields in JAVA_LV6 */
175
176/* Bitfields in JAVA_LV7 */
177
178/* Bitfields in JTBA */
179
180/* Bitfields in JBCR */
181
182/* Bitfields in CONFIG0 */
183#define SYSREG_CONFIG0_D_OFFSET 1
184#define SYSREG_CONFIG0_D_SIZE 1
185#define SYSREG_CONFIG0_S_OFFSET 2
186#define SYSREG_CONFIG0_S_SIZE 1
187#define SYSREG_O_OFFSET 3
188#define SYSREG_O_SIZE 1
189#define SYSREG_P_OFFSET 4
190#define SYSREG_P_SIZE 1
191#define SYSREG_CONFIG0_J_OFFSET 5
192#define SYSREG_CONFIG0_J_SIZE 1
193#define SYSREG_F_OFFSET 6
194#define SYSREG_F_SIZE 1
195#define SYSREG_MMUT_OFFSET 7
196#define SYSREG_MMUT_SIZE 3
197#define SYSREG_AR_OFFSET 10
198#define SYSREG_AR_SIZE 3
199#define SYSREG_AT_OFFSET 13
200#define SYSREG_AT_SIZE 3
201#define SYSREG_PROCESSORREVISION_OFFSET 16
202#define SYSREG_PROCESSORREVISION_SIZE 8
203#define SYSREG_PROCESSORID_OFFSET 24
204#define SYSREG_PROCESSORID_SIZE 8
205
206/* Bitfields in CONFIG1 */
207#define SYSREG_DASS_OFFSET 0
208#define SYSREG_DASS_SIZE 3
209#define SYSREG_DLSZ_OFFSET 3
210#define SYSREG_DLSZ_SIZE 3
211#define SYSREG_DSET_OFFSET 6
212#define SYSREG_DSET_SIZE 4
213#define SYSREG_IASS_OFFSET 10
214#define SYSREG_IASS_SIZE 2
215#define SYSREG_ILSZ_OFFSET 13
216#define SYSREG_ILSZ_SIZE 3
217#define SYSREG_ISET_OFFSET 16
218#define SYSREG_ISET_SIZE 4
219#define SYSREG_DMMUSZ_OFFSET 20
220#define SYSREG_DMMUSZ_SIZE 6
221#define SYSREG_IMMUSZ_OFFSET 26
222#define SYSREG_IMMUSZ_SIZE 6
223
224/* Bitfields in COUNT */
225
226/* Bitfields in COMPARE */
227
228/* Bitfields in TLBEHI */
229#define SYSREG_ASID_OFFSET 0
230#define SYSREG_ASID_SIZE 8
231#define SYSREG_TLBEHI_I_OFFSET 8
232#define SYSREG_TLBEHI_I_SIZE 1
233#define SYSREG_TLBEHI_V_OFFSET 9
234#define SYSREG_TLBEHI_V_SIZE 1
235#define SYSREG_VPN_OFFSET 10
236#define SYSREG_VPN_SIZE 22
237
238/* Bitfields in TLBELO */
239#define SYSREG_W_OFFSET 0
240#define SYSREG_W_SIZE 1
241#define SYSREG_TLBELO_D_OFFSET 1
242#define SYSREG_TLBELO_D_SIZE 1
243#define SYSREG_SZ_OFFSET 2
244#define SYSREG_SZ_SIZE 2
245#define SYSREG_AP_OFFSET 4
246#define SYSREG_AP_SIZE 3
247#define SYSREG_B_OFFSET 7
248#define SYSREG_B_SIZE 1
249#define SYSREG_G_OFFSET 8
250#define SYSREG_G_SIZE 1
251#define SYSREG_TLBELO_C_OFFSET 9
252#define SYSREG_TLBELO_C_SIZE 1
253#define SYSREG_PFN_OFFSET 10
254#define SYSREG_PFN_SIZE 22
255
256/* Bitfields in PTBR */
257
258/* Bitfields in TLBEAR */
259
260/* Bitfields in MMUCR */
261#define SYSREG_E_OFFSET 0
262#define SYSREG_E_SIZE 1
263#define SYSREG_M_OFFSET 1
264#define SYSREG_M_SIZE 1
265#define SYSREG_MMUCR_I_OFFSET 2
266#define SYSREG_MMUCR_I_SIZE 1
267#define SYSREG_MMUCR_N_OFFSET 3
268#define SYSREG_MMUCR_N_SIZE 1
269#define SYSREG_MMUCR_S_OFFSET 4
270#define SYSREG_MMUCR_S_SIZE 1
271#define SYSREG_DLA_OFFSET 8
272#define SYSREG_DLA_SIZE 6
273#define SYSREG_DRP_OFFSET 14
274#define SYSREG_DRP_SIZE 6
275#define SYSREG_ILA_OFFSET 20
276#define SYSREG_ILA_SIZE 6
277#define SYSREG_IRP_OFFSET 26
278#define SYSREG_IRP_SIZE 6
279
280/* Bitfields in TLBARLO */
281
282/* Bitfields in TLBARHI */
283
284/* Bitfields in PCCNT */
285
286/* Bitfields in PCNT0 */
287
288/* Bitfields in PCNT1 */
289
290/* Bitfields in PCCR */
291
292/* Bitfields in BEAR */
293
294/* Constants for ECR */
295#define ECR_UNRECOVERABLE 0
296#define ECR_TLB_MULTIPLE 1
297#define ECR_BUS_ERROR_WRITE 2
298#define ECR_BUS_ERROR_READ 3
299#define ECR_NMI 4
300#define ECR_ADDR_ALIGN_X 5
301#define ECR_PROTECTION_X 6
302#define ECR_DEBUG 7
303#define ECR_ILLEGAL_OPCODE 8
304#define ECR_UNIMPL_INSTRUCTION 9
305#define ECR_PRIVILEGE_VIOLATION 10
306#define ECR_FPE 11
307#define ECR_COPROC_ABSENT 12
308#define ECR_ADDR_ALIGN_R 13
309#define ECR_ADDR_ALIGN_W 14
310#define ECR_PROTECTION_R 15
311#define ECR_PROTECTION_W 16
312#define ECR_DTLB_MODIFIED 17
313#define ECR_TLB_MISS_X 20
314#define ECR_TLB_MISS_R 24
315#define ECR_TLB_MISS_W 28
316
317/* Bit manipulation macros */
318#define SYSREG_BIT(name) (1 << SYSREG_##name##_OFFSET)
319#define SYSREG_BF(name,value) (((value) & ((1 << SYSREG_##name##_SIZE) - 1)) << SYSREG_##name##_OFFSET)
320#define SYSREG_BFEXT(name,value) (((value) >> SYSREG_##name##_OFFSET) & ((1 << SYSREG_##name##_SIZE) - 1))
321#define SYSREG_BFINS(name,value,old) (((old) & ~(((1 << SYSREG_##name##_SIZE) - 1) << SYSREG_##name##_OFFSET)) | SYSREG_BF(name,value))
322
323#ifdef __CHECKER__
324extern unsigned long __builtin_mfsr(unsigned long reg);
325extern void __builtin_mtsr(unsigned long reg, unsigned long value);
326#endif
327
328/* Register access macros */
329#define sysreg_read(reg) __builtin_mfsr(SYSREG_##reg)
330#define sysreg_write(reg, value) __builtin_mtsr(SYSREG_##reg, value)
331
332#endif /* __ASM_AVR32_SYSREG_H__ */
diff --git a/include/asm-avr32/system.h b/include/asm-avr32/system.h
new file mode 100644
index 000000000000..ac596058697d
--- /dev/null
+++ b/include/asm-avr32/system.h
@@ -0,0 +1,155 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_SYSTEM_H
9#define __ASM_AVR32_SYSTEM_H
10
11#include <linux/compiler.h>
12#include <linux/types.h>
13
14#include <asm/ptrace.h>
15#include <asm/sysreg.h>
16
17#define xchg(ptr,x) \
18 ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
19
20#define nop() asm volatile("nop")
21
22#define mb() asm volatile("" : : : "memory")
23#define rmb() mb()
24#define wmb() asm volatile("sync 0" : : : "memory")
25#define read_barrier_depends() do { } while(0)
26#define set_mb(var, value) do { var = value; mb(); } while(0)
27
28/*
29 * Help PathFinder and other Nexus-compliant debuggers keep track of
30 * the current PID by emitting an Ownership Trace Message each time we
31 * switch task.
32 */
33#ifdef CONFIG_OWNERSHIP_TRACE
34#include <asm/ocd.h>
35#define finish_arch_switch(prev) \
36 do { \
37 __mtdr(DBGREG_PID, prev->pid); \
38 __mtdr(DBGREG_PID, current->pid); \
39 } while(0)
40#endif
41
42/*
43 * switch_to(prev, next, last) should switch from task `prev' to task
44 * `next'. `prev' will never be the same as `next'.
45 *
46 * We just delegate everything to the __switch_to assembly function,
47 * which is implemented in arch/avr32/kernel/switch_to.S
48 *
49 * mb() tells GCC not to cache `current' across this call.
50 */
51struct cpu_context;
52struct task_struct;
53extern struct task_struct *__switch_to(struct task_struct *,
54 struct cpu_context *,
55 struct cpu_context *);
56#define switch_to(prev, next, last) \
57 do { \
58 last = __switch_to(prev, &prev->thread.cpu_context + 1, \
59 &next->thread.cpu_context); \
60 } while (0)
61
62#ifdef CONFIG_SMP
63# error "The AVR32 port does not support SMP"
64#else
65# define smp_mb() barrier()
66# define smp_rmb() barrier()
67# define smp_wmb() barrier()
68# define smp_read_barrier_depends() do { } while(0)
69#endif
70
71#include <linux/irqflags.h>
72
73extern void __xchg_called_with_bad_pointer(void);
74
75#ifdef __CHECKER__
76extern unsigned long __builtin_xchg(void *ptr, unsigned long x);
77#endif
78
79#define xchg_u32(val, m) __builtin_xchg((void *)m, val)
80
81static inline unsigned long __xchg(unsigned long x,
82 volatile void *ptr,
83 int size)
84{
85 switch(size) {
86 case 4:
87 return xchg_u32(x, ptr);
88 default:
89 __xchg_called_with_bad_pointer();
90 return x;
91 }
92}
93
94static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
95 unsigned long new)
96{
97 __u32 ret;
98
99 asm volatile(
100 "1: ssrf 5\n"
101 " ld.w %[ret], %[m]\n"
102 " cp.w %[ret], %[old]\n"
103 " brne 2f\n"
104 " stcond %[m], %[new]\n"
105 " brne 1b\n"
106 "2:\n"
107 : [ret] "=&r"(ret), [m] "=m"(*m)
108 : "m"(m), [old] "ir"(old), [new] "r"(new)
109 : "memory", "cc");
110 return ret;
111}
112
113extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
114 volatile int * m, unsigned long old, unsigned long new);
115#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels
116
117/* This function doesn't exist, so you'll get a linker error
118 if something tries to do an invalid cmpxchg(). */
119extern void __cmpxchg_called_with_bad_pointer(void);
120
121#define __HAVE_ARCH_CMPXCHG 1
122
123static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
124 unsigned long new, int size)
125{
126 switch (size) {
127 case 4:
128 return __cmpxchg_u32(ptr, old, new);
129 case 8:
130 return __cmpxchg_u64(ptr, old, new);
131 }
132
133 __cmpxchg_called_with_bad_pointer();
134 return old;
135}
136
137#define cmpxchg(ptr, old, new) \
138 ((typeof(*(ptr)))__cmpxchg((ptr), (unsigned long)(old), \
139 (unsigned long)(new), \
140 sizeof(*(ptr))))
141
142struct pt_regs;
143extern void __die(const char *, struct pt_regs *, unsigned long,
144 const char *, const char *, unsigned long);
145extern void __die_if_kernel(const char *, struct pt_regs *, unsigned long,
146 const char *, const char *, unsigned long);
147
148#define die(msg, regs, err) \
149 __die(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__)
150#define die_if_kernel(msg, regs, err) \
151 __die_if_kernel(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__)
152
153#define arch_align_stack(x) (x)
154
155#endif /* __ASM_AVR32_SYSTEM_H */
diff --git a/include/asm-avr32/termbits.h b/include/asm-avr32/termbits.h
new file mode 100644
index 000000000000..9dc6eacafa33
--- /dev/null
+++ b/include/asm-avr32/termbits.h
@@ -0,0 +1,173 @@
1#ifndef __ASM_AVR32_TERMBITS_H
2#define __ASM_AVR32_TERMBITS_H
3
4#include <linux/posix_types.h>
5
6typedef unsigned char cc_t;
7typedef unsigned int speed_t;
8typedef unsigned int tcflag_t;
9
10#define NCCS 19
11struct termios {
12 tcflag_t c_iflag; /* input mode flags */
13 tcflag_t c_oflag; /* output mode flags */
14 tcflag_t c_cflag; /* control mode flags */
15 tcflag_t c_lflag; /* local mode flags */
16 cc_t c_line; /* line discipline */
17 cc_t c_cc[NCCS]; /* control characters */
18};
19
20/* c_cc characters */
21#define VINTR 0
22#define VQUIT 1
23#define VERASE 2
24#define VKILL 3
25#define VEOF 4
26#define VTIME 5
27#define VMIN 6
28#define VSWTC 7
29#define VSTART 8
30#define VSTOP 9
31#define VSUSP 10
32#define VEOL 11
33#define VREPRINT 12
34#define VDISCARD 13
35#define VWERASE 14
36#define VLNEXT 15
37#define VEOL2 16
38
39/* c_iflag bits */
40#define IGNBRK 0000001
41#define BRKINT 0000002
42#define IGNPAR 0000004
43#define PARMRK 0000010
44#define INPCK 0000020
45#define ISTRIP 0000040
46#define INLCR 0000100
47#define IGNCR 0000200
48#define ICRNL 0000400
49#define IUCLC 0001000
50#define IXON 0002000
51#define IXANY 0004000
52#define IXOFF 0010000
53#define IMAXBEL 0020000
54#define IUTF8 0040000
55
56/* c_oflag bits */
57#define OPOST 0000001
58#define OLCUC 0000002
59#define ONLCR 0000004
60#define OCRNL 0000010
61#define ONOCR 0000020
62#define ONLRET 0000040
63#define OFILL 0000100
64#define OFDEL 0000200
65#define NLDLY 0000400
66#define NL0 0000000
67#define NL1 0000400
68#define CRDLY 0003000
69#define CR0 0000000
70#define CR1 0001000
71#define CR2 0002000
72#define CR3 0003000
73#define TABDLY 0014000
74#define TAB0 0000000
75#define TAB1 0004000
76#define TAB2 0010000
77#define TAB3 0014000
78#define XTABS 0014000
79#define BSDLY 0020000
80#define BS0 0000000
81#define BS1 0020000
82#define VTDLY 0040000
83#define VT0 0000000
84#define VT1 0040000
85#define FFDLY 0100000
86#define FF0 0000000
87#define FF1 0100000
88
89/* c_cflag bit meaning */
90#define CBAUD 0010017
91#define B0 0000000 /* hang up */
92#define B50 0000001
93#define B75 0000002
94#define B110 0000003
95#define B134 0000004
96#define B150 0000005
97#define B200 0000006
98#define B300 0000007
99#define B600 0000010
100#define B1200 0000011
101#define B1800 0000012
102#define B2400 0000013
103#define B4800 0000014
104#define B9600 0000015
105#define B19200 0000016
106#define B38400 0000017
107#define EXTA B19200
108#define EXTB B38400
109#define CSIZE 0000060
110#define CS5 0000000
111#define CS6 0000020
112#define CS7 0000040
113#define CS8 0000060
114#define CSTOPB 0000100
115#define CREAD 0000200
116#define PARENB 0000400
117#define PARODD 0001000
118#define HUPCL 0002000
119#define CLOCAL 0004000
120#define CBAUDEX 0010000
121#define B57600 0010001
122#define B115200 0010002
123#define B230400 0010003
124#define B460800 0010004
125#define B500000 0010005
126#define B576000 0010006
127#define B921600 0010007
128#define B1000000 0010010
129#define B1152000 0010011
130#define B1500000 0010012
131#define B2000000 0010013
132#define B2500000 0010014
133#define B3000000 0010015
134#define B3500000 0010016
135#define B4000000 0010017
136#define CIBAUD 002003600000 /* input baud rate (not used) */
137#define CMSPAR 010000000000 /* mark or space (stick) parity */
138#define CRTSCTS 020000000000 /* flow control */
139
140/* c_lflag bits */
141#define ISIG 0000001
142#define ICANON 0000002
143#define XCASE 0000004
144#define ECHO 0000010
145#define ECHOE 0000020
146#define ECHOK 0000040
147#define ECHONL 0000100
148#define NOFLSH 0000200
149#define TOSTOP 0000400
150#define ECHOCTL 0001000
151#define ECHOPRT 0002000
152#define ECHOKE 0004000
153#define FLUSHO 0010000
154#define PENDIN 0040000
155#define IEXTEN 0100000
156
157/* tcflow() and TCXONC use these */
158#define TCOOFF 0
159#define TCOON 1
160#define TCIOFF 2
161#define TCION 3
162
163/* tcflush() and TCFLSH use these */
164#define TCIFLUSH 0
165#define TCOFLUSH 1
166#define TCIOFLUSH 2
167
168/* tcsetattr uses these */
169#define TCSANOW 0
170#define TCSADRAIN 1
171#define TCSAFLUSH 2
172
173#endif /* __ASM_AVR32_TERMBITS_H */
diff --git a/include/asm-avr32/termios.h b/include/asm-avr32/termios.h
new file mode 100644
index 000000000000..615bc0639e5c
--- /dev/null
+++ b/include/asm-avr32/termios.h
@@ -0,0 +1,80 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_TERMIOS_H
9#define __ASM_AVR32_TERMIOS_H
10
11#include <asm/termbits.h>
12#include <asm/ioctls.h>
13
14struct winsize {
15 unsigned short ws_row;
16 unsigned short ws_col;
17 unsigned short ws_xpixel;
18 unsigned short ws_ypixel;
19};
20
21#define NCC 8
22struct termio {
23 unsigned short c_iflag; /* input mode flags */
24 unsigned short c_oflag; /* output mode flags */
25 unsigned short c_cflag; /* control mode flags */
26 unsigned short c_lflag; /* local mode flags */
27 unsigned char c_line; /* line discipline */
28 unsigned char c_cc[NCC]; /* control characters */
29};
30
31/* modem lines */
32#define TIOCM_LE 0x001
33#define TIOCM_DTR 0x002
34#define TIOCM_RTS 0x004
35#define TIOCM_ST 0x008
36#define TIOCM_SR 0x010
37#define TIOCM_CTS 0x020
38#define TIOCM_CAR 0x040
39#define TIOCM_RNG 0x080
40#define TIOCM_DSR 0x100
41#define TIOCM_CD TIOCM_CAR
42#define TIOCM_RI TIOCM_RNG
43#define TIOCM_OUT1 0x2000
44#define TIOCM_OUT2 0x4000
45#define TIOCM_LOOP 0x8000
46
47/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
48
49/* line disciplines */
50#define N_TTY 0
51#define N_SLIP 1
52#define N_MOUSE 2
53#define N_PPP 3
54#define N_STRIP 4
55#define N_AX25 5
56#define N_X25 6 /* X.25 async */
57#define N_6PACK 7
58#define N_MASC 8 /* Reserved for Mobitex module <kaz@cafe.net> */
59#define N_R3964 9 /* Reserved for Simatic R3964 module */
60#define N_PROFIBUS_FDL 10 /* Reserved for Profibus <Dave@mvhi.com> */
61#define N_IRDA 11 /* Linux IR - http://irda.sourceforge.net/ */
62#define N_SMSBLOCK 12 /* SMS block mode - for talking to GSM data cards about SMS messages */
63#define N_HDLC 13 /* synchronous HDLC */
64#define N_SYNC_PPP 14 /* synchronous PPP */
65#define N_HCI 15 /* Bluetooth HCI UART */
66
67#ifdef __KERNEL__
68/* intr=^C quit=^\ erase=del kill=^U
69 eof=^D vtime=\0 vmin=\1 sxtc=\0
70 start=^Q stop=^S susp=^Z eol=\0
71 reprint=^R discard=^U werase=^W lnext=^V
72 eol2=\0
73*/
74#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
75
76#include <asm-generic/termios.h>
77
78#endif /* __KERNEL__ */
79
80#endif /* __ASM_AVR32_TERMIOS_H */
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h
new file mode 100644
index 000000000000..d1f5b35ebd54
--- /dev/null
+++ b/include/asm-avr32/thread_info.h
@@ -0,0 +1,106 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_THREAD_INFO_H
9#define __ASM_AVR32_THREAD_INFO_H
10
11#include <asm/page.h>
12
13#define THREAD_SIZE_ORDER 1
14#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
15
16#ifndef __ASSEMBLY__
17#include <asm/types.h>
18
19struct task_struct;
20struct exec_domain;
21
22struct thread_info {
23 struct task_struct *task; /* main task structure */
24 struct exec_domain *exec_domain; /* execution domain */
25 unsigned long flags; /* low level flags */
26 __u32 cpu;
27 __s32 preempt_count; /* 0 => preemptable, <0 => BUG */
28 struct restart_block restart_block;
29 __u8 supervisor_stack[0];
30};
31
32#define INIT_THREAD_INFO(tsk) \
33{ \
34 .task = &tsk, \
35 .exec_domain = &default_exec_domain, \
36 .flags = 0, \
37 .cpu = 0, \
38 .preempt_count = 1, \
39 .restart_block = { \
40 .fn = do_no_restart_syscall \
41 } \
42}
43
44#define init_thread_info (init_thread_union.thread_info)
45#define init_stack (init_thread_union.stack)
46
47/*
48 * Get the thread information struct from C.
49 * We do the usual trick and use the lower end of the stack for this
50 */
51static inline struct thread_info *current_thread_info(void)
52{
53 unsigned long addr = ~(THREAD_SIZE - 1);
54
55 asm("and %0, sp" : "=r"(addr) : "0"(addr));
56 return (struct thread_info *)addr;
57}
58
59/* thread information allocation */
60#define alloc_thread_info(ti) \
61 ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
62#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
63#define get_thread_info(ti) get_task_struct((ti)->task)
64#define put_thread_info(ti) put_task_struct((ti)->task)
65
66#endif /* !__ASSEMBLY__ */
67
68#define PREEMPT_ACTIVE 0x40000000
69
70/*
71 * Thread information flags
72 * - these are process state flags that various assembly files may need to access
73 * - pending work-to-be-done flags are in LSW
74 * - other flags in MSW
75 */
76#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
77#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */
78#define TIF_SIGPENDING 2 /* signal pending */
79#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
80#define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling
81 TIF_NEED_RESCHED */
82#define TIF_BREAKPOINT 5 /* true if we should break after return */
83#define TIF_SINGLE_STEP 6 /* single step after next break */
84#define TIF_MEMDIE 7
85#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal */
86#define TIF_USERSPACE 31 /* true if FS sets userspace */
87
88#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
89#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
90#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
91#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
92#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
93#define _TIF_BREAKPOINT (1 << TIF_BREAKPOINT)
94#define _TIF_SINGLE_STEP (1 << TIF_SINGLE_STEP)
95#define _TIF_MEMDIE (1 << TIF_MEMDIE)
96#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
97
98/* XXX: These two masks must never span more than 16 bits! */
99/* work to do on interrupt/exception return */
100#define _TIF_WORK_MASK 0x0000013e
101/* work to do on any return to userspace */
102#define _TIF_ALLWORK_MASK 0x0000013f
103/* work to do on return from debug mode */
104#define _TIF_DBGWORK_MASK 0x0000017e
105
106#endif /* __ASM_AVR32_THREAD_INFO_H */
diff --git a/include/asm-avr32/timex.h b/include/asm-avr32/timex.h
new file mode 100644
index 000000000000..5e44ecb3ce0c
--- /dev/null
+++ b/include/asm-avr32/timex.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_TIMEX_H
9#define __ASM_AVR32_TIMEX_H
10
11/*
12 * This is the frequency of the timer used for Linux's timer interrupt.
13 * The value should be defined as accurate as possible or under certain
14 * circumstances Linux timekeeping might become inaccurate or fail.
15 *
16 * For many system the exact clockrate of the timer isn't known but due to
17 * the way this value is used we can get away with a wrong value as long
18 * as this value is:
19 *
20 * - a multiple of HZ
21 * - a divisor of the actual rate
22 *
23 * 500000 is a good such cheat value.
24 *
25 * The obscure number 1193182 is the same as used by the original i8254
26 * time in legacy PC hardware; the chip is never found in AVR32 systems.
27 */
28#define CLOCK_TICK_RATE 500000 /* Underlying HZ */
29
30typedef unsigned long cycles_t;
31
32static inline cycles_t get_cycles (void)
33{
34 return 0;
35}
36
37extern int read_current_timer(unsigned long *timer_value);
38#define ARCH_HAS_READ_CURRENT_TIMER 1
39
40#endif /* __ASM_AVR32_TIMEX_H */
diff --git a/include/asm-avr32/tlb.h b/include/asm-avr32/tlb.h
new file mode 100644
index 000000000000..5c55f9ce7c7d
--- /dev/null
+++ b/include/asm-avr32/tlb.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_TLB_H
9#define __ASM_AVR32_TLB_H
10
11#define tlb_start_vma(tlb, vma) \
12 flush_cache_range(vma, vma->vm_start, vma->vm_end)
13
14#define tlb_end_vma(tlb, vma) \
15 flush_tlb_range(vma, vma->vm_start, vma->vm_end)
16
17#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while(0)
18
19/*
20 * Flush whole TLB for MM
21 */
22#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
23
24#include <asm-generic/tlb.h>
25
26/*
27 * For debugging purposes
28 */
29extern void show_dtlb_entry(unsigned int index);
30extern void dump_dtlb(void);
31
32#endif /* __ASM_AVR32_TLB_H */
diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h
new file mode 100644
index 000000000000..730e268f81f3
--- /dev/null
+++ b/include/asm-avr32/tlbflush.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_TLBFLUSH_H
9#define __ASM_AVR32_TLBFLUSH_H
10
11#include <asm/mmu.h>
12
13/*
14 * TLB flushing:
15 *
16 * - flush_tlb() flushes the current mm struct TLBs
17 * - flush_tlb_all() flushes all processes' TLB entries
18 * - flush_tlb_mm(mm) flushes the specified mm context TLBs
19 * - flush_tlb_page(vma, vmaddr) flushes one page
20 * - flush_tlb_range(vma, start, end) flushes a range of pages
21 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
22 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
23 */
24extern void flush_tlb(void);
25extern void flush_tlb_all(void);
26extern void flush_tlb_mm(struct mm_struct *mm);
27extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
28 unsigned long end);
29extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
30extern void __flush_tlb_page(unsigned long asid, unsigned long page);
31
32static inline void flush_tlb_pgtables(struct mm_struct *mm,
33 unsigned long start, unsigned long end)
34{
35 /* Nothing to do */
36}
37
38extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
39
40#endif /* __ASM_AVR32_TLBFLUSH_H */
diff --git a/include/asm-avr32/topology.h b/include/asm-avr32/topology.h
new file mode 100644
index 000000000000..5b766cbb4806
--- /dev/null
+++ b/include/asm-avr32/topology.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_AVR32_TOPOLOGY_H
2#define __ASM_AVR32_TOPOLOGY_H
3
4#include <asm-generic/topology.h>
5
6#endif /* __ASM_AVR32_TOPOLOGY_H */
diff --git a/include/asm-avr32/traps.h b/include/asm-avr32/traps.h
new file mode 100644
index 000000000000..6a8fb944f414
--- /dev/null
+++ b/include/asm-avr32/traps.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_TRAPS_H
9#define __ASM_AVR32_TRAPS_H
10
11#include <linux/list.h>
12
13struct undef_hook {
14 struct list_head node;
15 u32 insn_mask;
16 u32 insn_val;
17 int (*fn)(struct pt_regs *regs, u32 insn);
18};
19
20void register_undef_hook(struct undef_hook *hook);
21void unregister_undef_hook(struct undef_hook *hook);
22
23#endif /* __ASM_AVR32_TRAPS_H */
diff --git a/include/asm-avr32/types.h b/include/asm-avr32/types.h
new file mode 100644
index 000000000000..3f47db9675af
--- /dev/null
+++ b/include/asm-avr32/types.h
@@ -0,0 +1,70 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_TYPES_H
9#define __ASM_AVR32_TYPES_H
10
11#ifndef __ASSEMBLY__
12
13typedef unsigned short umode_t;
14
15/*
16 * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
17 * header files exported to user space
18 */
19typedef __signed__ char __s8;
20typedef unsigned char __u8;
21
22typedef __signed__ short __s16;
23typedef unsigned short __u16;
24
25typedef __signed__ int __s32;
26typedef unsigned int __u32;
27
28#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
29typedef __signed__ long long __s64;
30typedef unsigned long long __u64;
31#endif
32
33#endif /* __ASSEMBLY__ */
34
35/*
36 * These aren't exported outside the kernel to avoid name space clashes
37 */
38#ifdef __KERNEL__
39
40#define BITS_PER_LONG 32
41
42#ifndef __ASSEMBLY__
43
44typedef signed char s8;
45typedef unsigned char u8;
46
47typedef signed short s16;
48typedef unsigned short u16;
49
50typedef signed int s32;
51typedef unsigned int u32;
52
53typedef signed long long s64;
54typedef unsigned long long u64;
55
56/* Dma addresses are 32-bits wide. */
57
58typedef u32 dma_addr_t;
59
60#ifdef CONFIG_LBD
61typedef u64 sector_t;
62#define HAVE_SECTOR_T
63#endif
64
65#endif /* __ASSEMBLY__ */
66
67#endif /* __KERNEL__ */
68
69
70#endif /* __ASM_AVR32_TYPES_H */
diff --git a/include/asm-avr32/uaccess.h b/include/asm-avr32/uaccess.h
new file mode 100644
index 000000000000..821deb5a9d28
--- /dev/null
+++ b/include/asm-avr32/uaccess.h
@@ -0,0 +1,335 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_UACCESS_H
9#define __ASM_AVR32_UACCESS_H
10
11#include <linux/errno.h>
12#include <linux/sched.h>
13
14#define VERIFY_READ 0
15#define VERIFY_WRITE 1
16
17typedef struct {
18 unsigned int is_user_space;
19} mm_segment_t;
20
21/*
22 * The fs value determines whether argument validity checking should be
23 * performed or not. If get_fs() == USER_DS, checking is performed, with
24 * get_fs() == KERNEL_DS, checking is bypassed.
25 *
26 * For historical reasons (Data Segment Register?), these macros are misnamed.
27 */
28#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
29#define segment_eq(a,b) ((a).is_user_space == (b).is_user_space)
30
31#define USER_ADDR_LIMIT 0x80000000
32
33#define KERNEL_DS MAKE_MM_SEG(0)
34#define USER_DS MAKE_MM_SEG(1)
35
36#define get_ds() (KERNEL_DS)
37
38static inline mm_segment_t get_fs(void)
39{
40 return MAKE_MM_SEG(test_thread_flag(TIF_USERSPACE));
41}
42
43static inline void set_fs(mm_segment_t s)
44{
45 if (s.is_user_space)
46 set_thread_flag(TIF_USERSPACE);
47 else
48 clear_thread_flag(TIF_USERSPACE);
49}
50
51/*
52 * Test whether a block of memory is a valid user space address.
53 * Returns 0 if the range is valid, nonzero otherwise.
54 *
55 * We do the following checks:
56 * 1. Is the access from kernel space?
57 * 2. Does (addr + size) set the carry bit?
58 * 3. Is (addr + size) a negative number (i.e. >= 0x80000000)?
59 *
60 * If yes on the first check, access is granted.
61 * If no on any of the others, access is denied.
62 */
63#define __range_ok(addr, size) \
64 (test_thread_flag(TIF_USERSPACE) \
65 && (((unsigned long)(addr) >= 0x80000000) \
66 || ((unsigned long)(size) > 0x80000000) \
67 || (((unsigned long)(addr) + (unsigned long)(size)) > 0x80000000)))
68
69#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0))
70
71static inline int
72verify_area(int type, const void __user *addr, unsigned long size)
73{
74 return access_ok(type, addr, size) ? 0 : -EFAULT;
75}
76
77/* Generic arbitrary sized copy. Return the number of bytes NOT copied */
78extern __kernel_size_t __copy_user(void *to, const void *from,
79 __kernel_size_t n);
80
81extern __kernel_size_t copy_to_user(void __user *to, const void *from,
82 __kernel_size_t n);
83extern __kernel_size_t copy_from_user(void *to, const void __user *from,
84 __kernel_size_t n);
85
86static inline __kernel_size_t __copy_to_user(void __user *to, const void *from,
87 __kernel_size_t n)
88{
89 return __copy_user((void __force *)to, from, n);
90}
91static inline __kernel_size_t __copy_from_user(void *to,
92 const void __user *from,
93 __kernel_size_t n)
94{
95 return __copy_user(to, (const void __force *)from, n);
96}
97
98#define __copy_to_user_inatomic __copy_to_user
99#define __copy_from_user_inatomic __copy_from_user
100
101/*
102 * put_user: - Write a simple value into user space.
103 * @x: Value to copy to user space.
104 * @ptr: Destination address, in user space.
105 *
106 * Context: User context only. This function may sleep.
107 *
108 * This macro copies a single simple value from kernel space to user
109 * space. It supports simple types like char and int, but not larger
110 * data types like structures or arrays.
111 *
112 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
113 * to the result of dereferencing @ptr.
114 *
115 * Returns zero on success, or -EFAULT on error.
116 */
117#define put_user(x,ptr) \
118 __put_user_check((x),(ptr),sizeof(*(ptr)))
119
120/*
121 * get_user: - Get a simple variable from user space.
122 * @x: Variable to store result.
123 * @ptr: Source address, in user space.
124 *
125 * Context: User context only. This function may sleep.
126 *
127 * This macro copies a single simple variable from user space to kernel
128 * space. It supports simple types like char and int, but not larger
129 * data types like structures or arrays.
130 *
131 * @ptr must have pointer-to-simple-variable type, and the result of
132 * dereferencing @ptr must be assignable to @x without a cast.
133 *
134 * Returns zero on success, or -EFAULT on error.
135 * On error, the variable @x is set to zero.
136 */
137#define get_user(x,ptr) \
138 __get_user_check((x),(ptr),sizeof(*(ptr)))
139
140/*
141 * __put_user: - Write a simple value into user space, with less checking.
142 * @x: Value to copy to user space.
143 * @ptr: Destination address, in user space.
144 *
145 * Context: User context only. This function may sleep.
146 *
147 * This macro copies a single simple value from kernel space to user
148 * space. It supports simple types like char and int, but not larger
149 * data types like structures or arrays.
150 *
151 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
152 * to the result of dereferencing @ptr.
153 *
154 * Caller must check the pointer with access_ok() before calling this
155 * function.
156 *
157 * Returns zero on success, or -EFAULT on error.
158 */
159#define __put_user(x,ptr) \
160 __put_user_nocheck((x),(ptr),sizeof(*(ptr)))
161
162/*
163 * __get_user: - Get a simple variable from user space, with less checking.
164 * @x: Variable to store result.
165 * @ptr: Source address, in user space.
166 *
167 * Context: User context only. This function may sleep.
168 *
169 * This macro copies a single simple variable from user space to kernel
170 * space. It supports simple types like char and int, but not larger
171 * data types like structures or arrays.
172 *
173 * @ptr must have pointer-to-simple-variable type, and the result of
174 * dereferencing @ptr must be assignable to @x without a cast.
175 *
176 * Caller must check the pointer with access_ok() before calling this
177 * function.
178 *
179 * Returns zero on success, or -EFAULT on error.
180 * On error, the variable @x is set to zero.
181 */
182#define __get_user(x,ptr) \
183 __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
184
185extern int __get_user_bad(void);
186extern int __put_user_bad(void);
187
188#define __get_user_nocheck(x, ptr, size) \
189({ \
190 typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0; \
191 int __gu_err = 0; \
192 \
193 switch (size) { \
194 case 1: __get_user_asm("ub", __gu_val, ptr, __gu_err); break; \
195 case 2: __get_user_asm("uh", __gu_val, ptr, __gu_err); break; \
196 case 4: __get_user_asm("w", __gu_val, ptr, __gu_err); break; \
197 case 8: __get_user_asm("d", __gu_val, ptr, __gu_err); break; \
198 default: __gu_err = __get_user_bad(); break; \
199 } \
200 \
201 x = __gu_val; \
202 __gu_err; \
203})
204
205#define __get_user_check(x, ptr, size) \
206({ \
207 typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0; \
208 const typeof(*(ptr)) __user * __gu_addr = (ptr); \
209 int __gu_err = 0; \
210 \
211 if (access_ok(VERIFY_READ, __gu_addr, size)) { \
212 switch (size) { \
213 case 1: \
214 __get_user_asm("ub", __gu_val, __gu_addr, \
215 __gu_err); \
216 break; \
217 case 2: \
218 __get_user_asm("uh", __gu_val, __gu_addr, \
219 __gu_err); \
220 break; \
221 case 4: \
222 __get_user_asm("w", __gu_val, __gu_addr, \
223 __gu_err); \
224 break; \
225 case 8: \
226 __get_user_asm("d", __gu_val, __gu_addr, \
227 __gu_err); \
228 break; \
229 default: \
230 __gu_err = __get_user_bad(); \
231 break; \
232 } \
233 } else { \
234 __gu_err = -EFAULT; \
235 } \
236 x = __gu_val; \
237 __gu_err; \
238})
239
240#define __get_user_asm(suffix, __gu_val, ptr, __gu_err) \
241 asm volatile( \
242 "1: ld." suffix " %1, %3 \n" \
243 "2: \n" \
244 " .section .fixup, \"ax\" \n" \
245 "3: mov %0, %4 \n" \
246 " rjmp 2b \n" \
247 " .previous \n" \
248 " .section __ex_table, \"a\" \n" \
249 " .long 1b, 3b \n" \
250 " .previous \n" \
251 : "=r"(__gu_err), "=r"(__gu_val) \
252 : "0"(__gu_err), "m"(*(ptr)), "i"(-EFAULT))
253
254#define __put_user_nocheck(x, ptr, size) \
255({ \
256 typeof(*(ptr)) __pu_val; \
257 int __pu_err = 0; \
258 \
259 __pu_val = (x); \
260 switch (size) { \
261 case 1: __put_user_asm("b", ptr, __pu_val, __pu_err); break; \
262 case 2: __put_user_asm("h", ptr, __pu_val, __pu_err); break; \
263 case 4: __put_user_asm("w", ptr, __pu_val, __pu_err); break; \
264 case 8: __put_user_asm("d", ptr, __pu_val, __pu_err); break; \
265 default: __pu_err = __put_user_bad(); break; \
266 } \
267 __pu_err; \
268})
269
270#define __put_user_check(x, ptr, size) \
271({ \
272 typeof(*(ptr)) __pu_val; \
273 typeof(*(ptr)) __user *__pu_addr = (ptr); \
274 int __pu_err = 0; \
275 \
276 __pu_val = (x); \
277 if (access_ok(VERIFY_WRITE, __pu_addr, size)) { \
278 switch (size) { \
279 case 1: \
280 __put_user_asm("b", __pu_addr, __pu_val, \
281 __pu_err); \
282 break; \
283 case 2: \
284 __put_user_asm("h", __pu_addr, __pu_val, \
285 __pu_err); \
286 break; \
287 case 4: \
288 __put_user_asm("w", __pu_addr, __pu_val, \
289 __pu_err); \
290 break; \
291 case 8: \
292 __put_user_asm("d", __pu_addr, __pu_val, \
293 __pu_err); \
294 break; \
295 default: \
296 __pu_err = __put_user_bad(); \
297 break; \
298 } \
299 } else { \
300 __pu_err = -EFAULT; \
301 } \
302 __pu_err; \
303})
304
305#define __put_user_asm(suffix, ptr, __pu_val, __gu_err) \
306 asm volatile( \
307 "1: st." suffix " %1, %3 \n" \
308 "2: \n" \
309 " .section .fixup, \"ax\" \n" \
310 "3: mov %0, %4 \n" \
311 " rjmp 2b \n" \
312 " .previous \n" \
313 " .section __ex_table, \"a\" \n" \
314 " .long 1b, 3b \n" \
315 " .previous \n" \
316 : "=r"(__gu_err), "=m"(*(ptr)) \
317 : "0"(__gu_err), "r"(__pu_val), "i"(-EFAULT))
318
319extern __kernel_size_t clear_user(void __user *addr, __kernel_size_t size);
320extern __kernel_size_t __clear_user(void __user *addr, __kernel_size_t size);
321
322extern long strncpy_from_user(char *dst, const char __user *src, long count);
323extern long __strncpy_from_user(char *dst, const char __user *src, long count);
324
325extern long strnlen_user(const char __user *__s, long __n);
326extern long __strnlen_user(const char __user *__s, long __n);
327
328#define strlen_user(s) strnlen_user(s, ~0UL >> 1)
329
330struct exception_table_entry
331{
332 unsigned long insn, fixup;
333};
334
335#endif /* __ASM_AVR32_UACCESS_H */
diff --git a/include/asm-avr32/ucontext.h b/include/asm-avr32/ucontext.h
new file mode 100644
index 000000000000..ac7259c2a799
--- /dev/null
+++ b/include/asm-avr32/ucontext.h
@@ -0,0 +1,12 @@
1#ifndef __ASM_AVR32_UCONTEXT_H
2#define __ASM_AVR32_UCONTEXT_H
3
4struct ucontext {
5 unsigned long uc_flags;
6 struct ucontext * uc_link;
7 stack_t uc_stack;
8 struct sigcontext uc_mcontext;
9 sigset_t uc_sigmask;
10};
11
12#endif /* __ASM_AVR32_UCONTEXT_H */
diff --git a/include/asm-avr32/unaligned.h b/include/asm-avr32/unaligned.h
new file mode 100644
index 000000000000..3042723fcbfd
--- /dev/null
+++ b/include/asm-avr32/unaligned.h
@@ -0,0 +1,25 @@
1#ifndef __ASM_AVR32_UNALIGNED_H
2#define __ASM_AVR32_UNALIGNED_H
3
4/*
5 * AVR32 can handle some unaligned accesses, depending on the
6 * implementation. The AVR32 AP implementation can handle unaligned
7 * words, but halfwords must be halfword-aligned, and doublewords must
8 * be word-aligned.
9 *
10 * TODO: Make all this CPU-specific and optimize.
11 */
12
13#include <linux/string.h>
14
15/* Use memmove here, so gcc does not insert a __builtin_memcpy. */
16
17#define get_unaligned(ptr) \
18 ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; })
19
20#define put_unaligned(val, ptr) \
21 ({ __typeof__(*(ptr)) __tmp = (val); \
22 memmove((ptr), &__tmp, sizeof(*(ptr))); \
23 (void)0; })
24
25#endif /* __ASM_AVR32_UNALIGNED_H */
diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h
new file mode 100644
index 000000000000..1f528f92690d
--- /dev/null
+++ b/include/asm-avr32/unistd.h
@@ -0,0 +1,387 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef __ASM_AVR32_UNISTD_H
9#define __ASM_AVR32_UNISTD_H
10
11/*
12 * This file contains the system call numbers.
13 */
14
15#define __NR_restart_syscall 0
16#define __NR_exit 1
17#define __NR_fork 2
18#define __NR_read 3
19#define __NR_write 4
20#define __NR_open 5
21#define __NR_close 6
22#define __NR_umask 7
23#define __NR_creat 8
24#define __NR_link 9
25#define __NR_unlink 10
26#define __NR_execve 11
27#define __NR_chdir 12
28#define __NR_time 13
29#define __NR_mknod 14
30#define __NR_chmod 15
31#define __NR_chown 16
32#define __NR_lchown 17
33#define __NR_lseek 18
34#define __NR__llseek 19
35#define __NR_getpid 20
36#define __NR_mount 21
37#define __NR_umount2 22
38#define __NR_setuid 23
39#define __NR_getuid 24
40#define __NR_stime 25
41#define __NR_ptrace 26
42#define __NR_alarm 27
43#define __NR_pause 28
44#define __NR_utime 29
45#define __NR_stat 30
46#define __NR_fstat 31
47#define __NR_lstat 32
48#define __NR_access 33
49#define __NR_chroot 34
50#define __NR_sync 35
51#define __NR_fsync 36
52#define __NR_kill 37
53#define __NR_rename 38
54#define __NR_mkdir 39
55#define __NR_rmdir 40
56#define __NR_dup 41
57#define __NR_pipe 42
58#define __NR_times 43
59#define __NR_clone 44
60#define __NR_brk 45
61#define __NR_setgid 46
62#define __NR_getgid 47
63#define __NR_getcwd 48
64#define __NR_geteuid 49
65#define __NR_getegid 50
66#define __NR_acct 51
67#define __NR_setfsuid 52
68#define __NR_setfsgid 53
69#define __NR_ioctl 54
70#define __NR_fcntl 55
71#define __NR_setpgid 56
72#define __NR_mremap 57
73#define __NR_setresuid 58
74#define __NR_getresuid 59
75#define __NR_setreuid 60
76#define __NR_setregid 61
77#define __NR_ustat 62
78#define __NR_dup2 63
79#define __NR_getppid 64
80#define __NR_getpgrp 65
81#define __NR_setsid 66
82#define __NR_rt_sigaction 67
83#define __NR_rt_sigreturn 68
84#define __NR_rt_sigprocmask 69
85#define __NR_rt_sigpending 70
86#define __NR_rt_sigtimedwait 71
87#define __NR_rt_sigqueueinfo 72
88#define __NR_rt_sigsuspend 73
89#define __NR_sethostname 74
90#define __NR_setrlimit 75
91#define __NR_getrlimit 76 /* SuS compliant getrlimit */
92#define __NR_getrusage 77
93#define __NR_gettimeofday 78
94#define __NR_settimeofday 79
95#define __NR_getgroups 80
96#define __NR_setgroups 81
97#define __NR_select 82
98#define __NR_symlink 83
99#define __NR_fchdir 84
100#define __NR_readlink 85
101#define __NR_pread 86
102#define __NR_pwrite 87
103#define __NR_swapon 88
104#define __NR_reboot 89
105#define __NR_mmap2 90
106#define __NR_munmap 91
107#define __NR_truncate 92
108#define __NR_ftruncate 93
109#define __NR_fchmod 94
110#define __NR_fchown 95
111#define __NR_getpriority 96
112#define __NR_setpriority 97
113#define __NR_wait4 98
114#define __NR_statfs 99
115#define __NR_fstatfs 100
116#define __NR_vhangup 101
117#define __NR_sigaltstack 102
118#define __NR_syslog 103
119#define __NR_setitimer 104
120#define __NR_getitimer 105
121#define __NR_swapoff 106
122#define __NR_sysinfo 107
123#define __NR_ipc 108
124#define __NR_sendfile 109
125#define __NR_setdomainname 110
126#define __NR_uname 111
127#define __NR_adjtimex 112
128#define __NR_mprotect 113
129#define __NR_vfork 114
130#define __NR_init_module 115
131#define __NR_delete_module 116
132#define __NR_quotactl 117
133#define __NR_getpgid 118
134#define __NR_bdflush 119
135#define __NR_sysfs 120
136#define __NR_personality 121
137#define __NR_afs_syscall 122 /* Syscall for Andrew File System */
138#define __NR_getdents 123
139#define __NR_flock 124
140#define __NR_msync 125
141#define __NR_readv 126
142#define __NR_writev 127
143#define __NR_getsid 128
144#define __NR_fdatasync 129
145#define __NR__sysctl 130
146#define __NR_mlock 131
147#define __NR_munlock 132
148#define __NR_mlockall 133
149#define __NR_munlockall 134
150#define __NR_sched_setparam 135
151#define __NR_sched_getparam 136
152#define __NR_sched_setscheduler 137
153#define __NR_sched_getscheduler 138
154#define __NR_sched_yield 139
155#define __NR_sched_get_priority_max 140
156#define __NR_sched_get_priority_min 141
157#define __NR_sched_rr_get_interval 142
158#define __NR_nanosleep 143
159#define __NR_poll 144
160#define __NR_nfsservctl 145
161#define __NR_setresgid 146
162#define __NR_getresgid 147
163#define __NR_prctl 148
164#define __NR_socket 149
165#define __NR_bind 150
166#define __NR_connect 151
167#define __NR_listen 152
168#define __NR_accept 153
169#define __NR_getsockname 154
170#define __NR_getpeername 155
171#define __NR_socketpair 156
172#define __NR_send 157
173#define __NR_recv 158
174#define __NR_sendto 159
175#define __NR_recvfrom 160
176#define __NR_shutdown 161
177#define __NR_setsockopt 162
178#define __NR_getsockopt 163
179#define __NR_sendmsg 164
180#define __NR_recvmsg 165
181#define __NR_truncate64 166
182#define __NR_ftruncate64 167
183#define __NR_stat64 168
184#define __NR_lstat64 169
185#define __NR_fstat64 170
186#define __NR_pivot_root 171
187#define __NR_mincore 172
188#define __NR_madvise 173
189#define __NR_getdents64 174
190#define __NR_fcntl64 175
191#define __NR_gettid 176
192#define __NR_readahead 177
193#define __NR_setxattr 178
194#define __NR_lsetxattr 179
195#define __NR_fsetxattr 180
196#define __NR_getxattr 181
197#define __NR_lgetxattr 182
198#define __NR_fgetxattr 183
199#define __NR_listxattr 184
200#define __NR_llistxattr 185
201#define __NR_flistxattr 186
202#define __NR_removexattr 187
203#define __NR_lremovexattr 188
204#define __NR_fremovexattr 189
205#define __NR_tkill 190
206#define __NR_sendfile64 191
207#define __NR_futex 192
208#define __NR_sched_setaffinity 193
209#define __NR_sched_getaffinity 194
210#define __NR_capget 195
211#define __NR_capset 196
212#define __NR_io_setup 197
213#define __NR_io_destroy 198
214#define __NR_io_getevents 199
215#define __NR_io_submit 200
216#define __NR_io_cancel 201
217#define __NR_fadvise64 202
218#define __NR_exit_group 203
219#define __NR_lookup_dcookie 204
220#define __NR_epoll_create 205
221#define __NR_epoll_ctl 206
222#define __NR_epoll_wait 207
223#define __NR_remap_file_pages 208
224#define __NR_set_tid_address 209
225
226#define __NR_timer_create 210
227#define __NR_timer_settime 211
228#define __NR_timer_gettime 212
229#define __NR_timer_getoverrun 213
230#define __NR_timer_delete 214
231#define __NR_clock_settime 215
232#define __NR_clock_gettime 216
233#define __NR_clock_getres 217
234#define __NR_clock_nanosleep 218
235#define __NR_statfs64 219
236#define __NR_fstatfs64 220
237#define __NR_tgkill 221
238 /* 222 reserved for tux */
239#define __NR_utimes 223
240#define __NR_fadvise64_64 224
241
242#define __NR_cacheflush 225
243
244#define __NR_vserver 226
245#define __NR_mq_open 227
246#define __NR_mq_unlink 228
247#define __NR_mq_timedsend 229
248#define __NR_mq_timedreceive 230
249#define __NR_mq_notify 231
250#define __NR_mq_getsetattr 232
251#define __NR_kexec_load 233
252#define __NR_waitid 234
253#define __NR_add_key 235
254#define __NR_request_key 236
255#define __NR_keyctl 237
256#define __NR_ioprio_set 238
257#define __NR_ioprio_get 239
258#define __NR_inotify_init 240
259#define __NR_inotify_add_watch 241
260#define __NR_inotify_rm_watch 242
261#define __NR_openat 243
262#define __NR_mkdirat 244
263#define __NR_mknodat 245
264#define __NR_fchownat 246
265#define __NR_futimesat 247
266#define __NR_fstatat64 248
267#define __NR_unlinkat 249
268#define __NR_renameat 250
269#define __NR_linkat 251
270#define __NR_symlinkat 252
271#define __NR_readlinkat 253
272#define __NR_fchmodat 254
273#define __NR_faccessat 255
274#define __NR_pselect6 256
275#define __NR_ppoll 257
276#define __NR_unshare 258
277#define __NR_set_robust_list 259
278#define __NR_get_robust_list 260
279#define __NR_splice 261
280#define __NR_sync_file_range 262
281#define __NR_tee 263
282#define __NR_vmsplice 264
283
284#define NR_syscalls 265
285
286
287/*
288 * AVR32 calling convention for system calls:
289 * - System call number in r8
290 * - Parameters in r12 and downwards to r9 as well as r6 and r5.
291 * - Return value in r12
292 */
293
294/*
295 * user-visible error numbers are in the range -1 - -124: see
296 * <asm-generic/errno.h>
297 */
298
299#define __syscall_return(type, res) do { \
300 if ((unsigned long)(res) >= (unsigned long)(-125)) { \
301 errno = -(res); \
302 res = -1; \
303 } \
304 return (type) (res); \
305 } while (0)
306
307#ifdef __KERNEL__
308#define __ARCH_WANT_IPC_PARSE_VERSION
309#define __ARCH_WANT_STAT64
310#define __ARCH_WANT_SYS_ALARM
311#define __ARCH_WANT_SYS_GETHOSTNAME
312#define __ARCH_WANT_SYS_PAUSE
313#define __ARCH_WANT_SYS_TIME
314#define __ARCH_WANT_SYS_UTIME
315#define __ARCH_WANT_SYS_WAITPID
316#define __ARCH_WANT_SYS_FADVISE64
317#define __ARCH_WANT_SYS_GETPGRP
318#define __ARCH_WANT_SYS_LLSEEK
319#define __ARCH_WANT_SYS_GETPGRP
320#define __ARCH_WANT_SYS_RT_SIGACTION
321#define __ARCH_WANT_SYS_RT_SIGSUSPEND
322#endif
323
324#if defined(__KERNEL_SYSCALLS__) || defined(__CHECKER__)
325
326#include <linux/types.h>
327#include <linux/linkage.h>
328#include <asm/signal.h>
329
330struct pt_regs;
331
332/*
333 * we need this inline - forking from kernel space will result
334 * in NO COPY ON WRITE (!!!), until an execve is executed. This
335 * is no problem, but for the stack. This is handled by not letting
336 * main() use the stack at all after fork(). Thus, no function
337 * calls - which means inline code for fork too, as otherwise we
338 * would use the stack upon exit from 'fork()'.
339 *
340 * Actually only pause and fork are needed inline, so that there
341 * won't be any messing with the stack from main(), but we define
342 * some others too.
343 */
344static inline int execve(const char *file, char **argv, char **envp)
345{
346 register long scno asm("r8") = __NR_execve;
347 register long sc1 asm("r12") = (long)file;
348 register long sc2 asm("r11") = (long)argv;
349 register long sc3 asm("r10") = (long)envp;
350 int res;
351
352 asm volatile("scall"
353 : "=r"(sc1)
354 : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
355 : "lr", "memory");
356 res = sc1;
357 __syscall_return(int, res);
358}
359
360asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
361asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
362 struct pt_regs *regs);
363asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
364asmlinkage int sys_pipe(unsigned long __user *filedes);
365asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
366 unsigned long prot, unsigned long flags,
367 unsigned long fd, off_t offset);
368asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len);
369asmlinkage int sys_fork(struct pt_regs *regs);
370asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
371 unsigned long parent_tidptr,
372 unsigned long child_tidptr, struct pt_regs *regs);
373asmlinkage int sys_vfork(struct pt_regs *regs);
374asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
375 char __user *__user *uenvp, struct pt_regs *regs);
376
377#endif
378
379/*
380 * "Conditional" syscalls
381 *
382 * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
383 * but it doesn't work on all toolchains, so we just do it by hand
384 */
385#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall");
386
387#endif /* __ASM_AVR32_UNISTD_H */
diff --git a/include/asm-avr32/user.h b/include/asm-avr32/user.h
new file mode 100644
index 000000000000..060fb3acee49
--- /dev/null
+++ b/include/asm-avr32/user.h
@@ -0,0 +1,65 @@
1/*
2 * Copyright (C) 2004-2006 Atmel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Note: We may not need these definitions for AVR32, as we don't
9 * support a.out.
10 */
11#ifndef __ASM_AVR32_USER_H
12#define __ASM_AVR32_USER_H
13
14#include <linux/types.h>
15#include <asm/ptrace.h>
16#include <asm/page.h>
17
18/*
19 * Core file format: The core file is written in such a way that gdb
20 * can understand it and provide useful information to the user (under
21 * linux we use the `trad-core' bfd). The file contents are as follows:
22 *
23 * upage: 1 page consisting of a user struct that tells gdb
24 * what is present in the file. Directly after this is a
25 * copy of the task_struct, which is currently not used by gdb,
26 * but it may come in handy at some point. All of the registers
27 * are stored as part of the upage. The upage should always be
28 * only one page long.
29 * data: The data segment follows next. We use current->end_text to
30 * current->brk to pick up all of the user variables, plus any memory
31 * that may have been sbrk'ed. No attempt is made to determine if a
32 * page is demand-zero or if a page is totally unused, we just cover
33 * the entire range. All of the addresses are rounded in such a way
34 * that an integral number of pages is written.
35 * stack: We need the stack information in order to get a meaningful
36 * backtrace. We need to write the data from usp to
37 * current->start_stack, so we round each of these in order to be able
38 * to write an integer number of pages.
39 */
40
41struct user_fpu_struct {
42 /* We have no FPU (yet) */
43};
44
45struct user {
46 struct pt_regs regs; /* entire machine state */
47 size_t u_tsize; /* text size (pages) */
48 size_t u_dsize; /* data size (pages) */
49 size_t u_ssize; /* stack size (pages) */
50 unsigned long start_code; /* text starting address */
51 unsigned long start_data; /* data starting address */
52 unsigned long start_stack; /* stack starting address */
53 long int signal; /* signal causing core dump */
54 struct regs * u_ar0; /* help gdb find registers */
55 unsigned long magic; /* identifies a core file */
56 char u_comm[32]; /* user command name */
57};
58
59#define NBPG PAGE_SIZE
60#define UPAGES 1
61#define HOST_TEXT_START_ADDR (u.start_code)
62#define HOST_DATA_START_ADDR (u.start_data)
63#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
64
65#endif /* __ASM_AVR32_USER_H */
diff --git a/include/asm-cris/pgtable.h b/include/asm-cris/pgtable.h
index 5d76c1c0d6c9..c94a7107019c 100644
--- a/include/asm-cris/pgtable.h
+++ b/include/asm-cris/pgtable.h
@@ -253,7 +253,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
253{ pmd_val(*pmdp) = _PAGE_TABLE | (unsigned long) ptep; } 253{ pmd_val(*pmdp) = _PAGE_TABLE | (unsigned long) ptep; }
254 254
255#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) 255#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
256#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 256#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
257 257
258/* to find an entry in a page-table-directory. */ 258/* to find an entry in a page-table-directory. */
259#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) 259#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -271,7 +271,7 @@ static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
271#define __pte_offset(address) \ 271#define __pte_offset(address) \
272 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 272 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
273#define pte_offset_kernel(dir, address) \ 273#define pte_offset_kernel(dir, address) \
274 ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address)) 274 ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address))
275#define pte_offset_map(dir, address) \ 275#define pte_offset_map(dir, address) \
276 ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address)) 276 ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
277#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) 277#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index 980ae1b0cd28..1f70d47148bd 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -157,23 +157,105 @@ static inline int __test_bit(int nr, const volatile void * addr)
157 __constant_test_bit((nr),(addr)) : \ 157 __constant_test_bit((nr),(addr)) : \
158 __test_bit((nr),(addr))) 158 __test_bit((nr),(addr)))
159 159
160#include <asm-generic/bitops/ffs.h>
161#include <asm-generic/bitops/__ffs.h>
162#include <asm-generic/bitops/find.h> 160#include <asm-generic/bitops/find.h>
163 161
164/* 162/**
165 * fls: find last bit set. 163 * fls - find last bit set
164 * @x: the word to search
165 *
166 * This is defined the same way as ffs:
167 * - return 32..1 to indicate bit 31..0 most significant bit set
168 * - return 0 to indicate no bits set
166 */ 169 */
167#define fls(x) \ 170#define fls(x) \
168({ \ 171({ \
169 int bit; \ 172 int bit; \
170 \ 173 \
171 asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x)); \ 174 asm(" subcc %1,gr0,gr0,icc0 \n" \
175 " ckne icc0,cc4 \n" \
176 " cscan.p %1,gr0,%0 ,cc4,#1 \n" \
177 " csub %0,%0,%0 ,cc4,#0 \n" \
178 " csub %2,%0,%0 ,cc4,#1 \n" \
179 : "=&r"(bit) \
180 : "r"(x), "r"(32) \
181 : "icc0", "cc4" \
182 ); \
172 \ 183 \
173 bit ? 33 - bit : bit; \ 184 bit; \
174}) 185})
175 186
176#include <asm-generic/bitops/fls64.h> 187/**
188 * fls64 - find last bit set in a 64-bit value
189 * @n: the value to search
190 *
191 * This is defined the same way as ffs:
192 * - return 64..1 to indicate bit 63..0 most significant bit set
193 * - return 0 to indicate no bits set
194 */
195static inline __attribute__((const))
196int fls64(u64 n)
197{
198 union {
199 u64 ll;
200 struct { u32 h, l; };
201 } _;
202 int bit, x, y;
203
204 _.ll = n;
205
206 asm(" subcc.p %3,gr0,gr0,icc0 \n"
207 " subcc %4,gr0,gr0,icc1 \n"
208 " ckne icc0,cc4 \n"
209 " ckne icc1,cc5 \n"
210 " norcr cc4,cc5,cc6 \n"
211 " csub.p %0,%0,%0 ,cc6,1 \n"
212 " orcr cc5,cc4,cc4 \n"
213 " andcr cc4,cc5,cc4 \n"
214 " cscan.p %3,gr0,%0 ,cc4,0 \n"
215 " setlos #64,%1 \n"
216 " cscan.p %4,gr0,%0 ,cc4,1 \n"
217 " setlos #32,%2 \n"
218 " csub.p %1,%0,%0 ,cc4,0 \n"
219 " csub %2,%0,%0 ,cc4,1 \n"
220 : "=&r"(bit), "=r"(x), "=r"(y)
221 : "0r"(_.h), "r"(_.l)
222 : "icc0", "icc1", "cc4", "cc5", "cc6"
223 );
224 return bit;
225
226}
227
228/**
229 * ffs - find first bit set
230 * @x: the word to search
231 *
232 * - return 32..1 to indicate bit 31..0 most least significant bit set
233 * - return 0 to indicate no bits set
234 */
235static inline __attribute__((const))
236int ffs(int x)
237{
238 /* Note: (x & -x) gives us a mask that is the least significant
239 * (rightmost) 1-bit of the value in x.
240 */
241 return fls(x & -x);
242}
243
244/**
245 * __ffs - find first bit set
246 * @x: the word to search
247 *
248 * - return 31..0 to indicate bit 31..0 most least significant bit set
249 * - if no bits are set in x, the result is undefined
250 */
251static inline __attribute__((const))
252int __ffs(unsigned long x)
253{
254 int bit;
255 asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x & -x));
256 return 31 - bit;
257}
258
177#include <asm-generic/bitops/sched.h> 259#include <asm-generic/bitops/sched.h>
178#include <asm-generic/bitops/hweight.h> 260#include <asm-generic/bitops/hweight.h>
179 261
diff --git a/include/asm-frv/cpu-irqs.h b/include/asm-frv/cpu-irqs.h
index 5cd691e1f8c4..478f3498fcfe 100644
--- a/include/asm-frv/cpu-irqs.h
+++ b/include/asm-frv/cpu-irqs.h
@@ -14,36 +14,6 @@
14 14
15#ifndef __ASSEMBLY__ 15#ifndef __ASSEMBLY__
16 16
17#include <asm/irq-routing.h>
18
19#define IRQ_BASE_CPU (NR_IRQ_ACTIONS_PER_GROUP * 0)
20
21/* IRQ IDs presented to drivers */
22enum {
23 IRQ_CPU__UNUSED = IRQ_BASE_CPU,
24 IRQ_CPU_UART0,
25 IRQ_CPU_UART1,
26 IRQ_CPU_TIMER0,
27 IRQ_CPU_TIMER1,
28 IRQ_CPU_TIMER2,
29 IRQ_CPU_DMA0,
30 IRQ_CPU_DMA1,
31 IRQ_CPU_DMA2,
32 IRQ_CPU_DMA3,
33 IRQ_CPU_DMA4,
34 IRQ_CPU_DMA5,
35 IRQ_CPU_DMA6,
36 IRQ_CPU_DMA7,
37 IRQ_CPU_EXTERNAL0,
38 IRQ_CPU_EXTERNAL1,
39 IRQ_CPU_EXTERNAL2,
40 IRQ_CPU_EXTERNAL3,
41 IRQ_CPU_EXTERNAL4,
42 IRQ_CPU_EXTERNAL5,
43 IRQ_CPU_EXTERNAL6,
44 IRQ_CPU_EXTERNAL7,
45};
46
47/* IRQ to level mappings */ 17/* IRQ to level mappings */
48#define IRQ_GDBSTUB_LEVEL 15 18#define IRQ_GDBSTUB_LEVEL 15
49#define IRQ_UART_LEVEL 13 19#define IRQ_UART_LEVEL 13
@@ -82,6 +52,30 @@ enum {
82#define IRQ_XIRQ6_LEVEL 7 52#define IRQ_XIRQ6_LEVEL 7
83#define IRQ_XIRQ7_LEVEL 8 53#define IRQ_XIRQ7_LEVEL 8
84 54
55/* IRQ IDs presented to drivers */
56#define IRQ_CPU__UNUSED IRQ_BASE_CPU
57#define IRQ_CPU_UART0 (IRQ_BASE_CPU + IRQ_UART0_LEVEL)
58#define IRQ_CPU_UART1 (IRQ_BASE_CPU + IRQ_UART1_LEVEL)
59#define IRQ_CPU_TIMER0 (IRQ_BASE_CPU + IRQ_TIMER0_LEVEL)
60#define IRQ_CPU_TIMER1 (IRQ_BASE_CPU + IRQ_TIMER1_LEVEL)
61#define IRQ_CPU_TIMER2 (IRQ_BASE_CPU + IRQ_TIMER2_LEVEL)
62#define IRQ_CPU_DMA0 (IRQ_BASE_CPU + IRQ_DMA0_LEVEL)
63#define IRQ_CPU_DMA1 (IRQ_BASE_CPU + IRQ_DMA1_LEVEL)
64#define IRQ_CPU_DMA2 (IRQ_BASE_CPU + IRQ_DMA2_LEVEL)
65#define IRQ_CPU_DMA3 (IRQ_BASE_CPU + IRQ_DMA3_LEVEL)
66#define IRQ_CPU_DMA4 (IRQ_BASE_CPU + IRQ_DMA4_LEVEL)
67#define IRQ_CPU_DMA5 (IRQ_BASE_CPU + IRQ_DMA5_LEVEL)
68#define IRQ_CPU_DMA6 (IRQ_BASE_CPU + IRQ_DMA6_LEVEL)
69#define IRQ_CPU_DMA7 (IRQ_BASE_CPU + IRQ_DMA7_LEVEL)
70#define IRQ_CPU_EXTERNAL0 (IRQ_BASE_CPU + IRQ_XIRQ0_LEVEL)
71#define IRQ_CPU_EXTERNAL1 (IRQ_BASE_CPU + IRQ_XIRQ1_LEVEL)
72#define IRQ_CPU_EXTERNAL2 (IRQ_BASE_CPU + IRQ_XIRQ2_LEVEL)
73#define IRQ_CPU_EXTERNAL3 (IRQ_BASE_CPU + IRQ_XIRQ3_LEVEL)
74#define IRQ_CPU_EXTERNAL4 (IRQ_BASE_CPU + IRQ_XIRQ4_LEVEL)
75#define IRQ_CPU_EXTERNAL5 (IRQ_BASE_CPU + IRQ_XIRQ5_LEVEL)
76#define IRQ_CPU_EXTERNAL6 (IRQ_BASE_CPU + IRQ_XIRQ6_LEVEL)
77#define IRQ_CPU_EXTERNAL7 (IRQ_BASE_CPU + IRQ_XIRQ7_LEVEL)
78
85#endif /* !__ASSEMBLY__ */ 79#endif /* !__ASSEMBLY__ */
86 80
87#endif /* _ASM_CPU_IRQS_H */ 81#endif /* _ASM_CPU_IRQS_H */
diff --git a/include/asm-frv/hardirq.h b/include/asm-frv/hardirq.h
index 7581b5a7559a..fc47515822a2 100644
--- a/include/asm-frv/hardirq.h
+++ b/include/asm-frv/hardirq.h
@@ -26,5 +26,10 @@ typedef struct {
26#error SMP not available on FR-V 26#error SMP not available on FR-V
27#endif /* CONFIG_SMP */ 27#endif /* CONFIG_SMP */
28 28
29extern atomic_t irq_err_count;
30static inline void ack_bad_irq(int irq)
31{
32 atomic_inc(&irq_err_count);
33}
29 34
30#endif 35#endif
diff --git a/include/asm-frv/irq-routing.h b/include/asm-frv/irq-routing.h
deleted file mode 100644
index ac3ab900a1dc..000000000000
--- a/include/asm-frv/irq-routing.h
+++ /dev/null
@@ -1,70 +0,0 @@
1/* irq-routing.h: multiplexed IRQ routing
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _ASM_IRQ_ROUTING_H
13#define _ASM_IRQ_ROUTING_H
14
15#ifndef __ASSEMBLY__
16
17#include <linux/spinlock.h>
18#include <asm/irq.h>
19
20struct irq_source;
21struct irq_level;
22
23/*
24 * IRQ action distribution sets
25 */
26struct irq_group {
27 int first_irq; /* first IRQ distributed here */
28 void (*control)(struct irq_group *group, int index, int on);
29
30 struct irqaction *actions[NR_IRQ_ACTIONS_PER_GROUP]; /* IRQ action chains */
31 struct irq_source *sources[NR_IRQ_ACTIONS_PER_GROUP]; /* IRQ sources */
32 int disable_cnt[NR_IRQ_ACTIONS_PER_GROUP]; /* disable counts */
33};
34
35/*
36 * IRQ source manager
37 */
38struct irq_source {
39 struct irq_source *next;
40 struct irq_level *level;
41 const char *muxname;
42 volatile void __iomem *muxdata;
43 unsigned long irqmask;
44
45 void (*doirq)(struct irq_source *source);
46};
47
48/*
49 * IRQ level management (per CPU IRQ priority / entry vector)
50 */
51struct irq_level {
52 int usage;
53 int disable_count;
54 unsigned long flags; /* current IRQF_DISABLED and IRQF_SHARED settings */
55 spinlock_t lock;
56 struct irq_source *sources;
57};
58
59extern struct irq_level frv_irq_levels[16];
60extern struct irq_group *irq_groups[NR_IRQ_GROUPS];
61
62extern void frv_irq_route(struct irq_source *source, int irqlevel);
63extern void frv_irq_route_external(struct irq_source *source, int irq);
64extern void frv_irq_set_group(struct irq_group *group);
65extern void distribute_irqs(struct irq_group *group, unsigned long irqmask);
66extern void route_cpu_irqs(void);
67
68#endif /* !__ASSEMBLY__ */
69
70#endif /* _ASM_IRQ_ROUTING_H */
diff --git a/include/asm-frv/irq.h b/include/asm-frv/irq.h
index 58b619215a50..8fefd6b827aa 100644
--- a/include/asm-frv/irq.h
+++ b/include/asm-frv/irq.h
@@ -1,6 +1,6 @@
1/* irq.h: FRV IRQ definitions 1/* irq.h: FRV IRQ definitions
2 * 2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -12,32 +12,22 @@
12#ifndef _ASM_IRQ_H_ 12#ifndef _ASM_IRQ_H_
13#define _ASM_IRQ_H_ 13#define _ASM_IRQ_H_
14 14
15
16/*
17 * the system has an on-CPU PIC and another PIC on the FPGA and other PICs on other peripherals,
18 * so we do some routing in irq-routing.[ch] to reduce the number of false-positives seen by
19 * drivers
20 */
21
22/* this number is used when no interrupt has been assigned */ 15/* this number is used when no interrupt has been assigned */
23#define NO_IRQ (-1) 16#define NO_IRQ (-1)
24 17
25#define NR_IRQ_LOG2_ACTIONS_PER_GROUP 5 18#define NR_IRQS 48
26#define NR_IRQ_ACTIONS_PER_GROUP (1 << NR_IRQ_LOG2_ACTIONS_PER_GROUP) 19#define IRQ_BASE_CPU (0 * 16)
27#define NR_IRQ_GROUPS 4 20#define IRQ_BASE_FPGA (1 * 16)
28#define NR_IRQS (NR_IRQ_ACTIONS_PER_GROUP * NR_IRQ_GROUPS) 21#define IRQ_BASE_MB93493 (2 * 16)
29 22
30/* probe returns a 32-bit IRQ mask:-/ */ 23/* probe returns a 32-bit IRQ mask:-/ */
31#define MIN_PROBE_IRQ (NR_IRQS - 32) 24#define MIN_PROBE_IRQ (NR_IRQS - 32)
32 25
26#ifndef __ASSEMBLY__
33static inline int irq_canonicalize(int irq) 27static inline int irq_canonicalize(int irq)
34{ 28{
35 return irq; 29 return irq;
36} 30}
37 31#endif
38extern void disable_irq_nosync(unsigned int irq);
39extern void disable_irq(unsigned int irq);
40extern void enable_irq(unsigned int irq);
41
42 32
43#endif /* _ASM_IRQ_H_ */ 33#endif /* _ASM_IRQ_H_ */
diff --git a/include/asm-frv/mb93091-fpga-irqs.h b/include/asm-frv/mb93091-fpga-irqs.h
index 341bfc52a0eb..19778c5ba9d6 100644
--- a/include/asm-frv/mb93091-fpga-irqs.h
+++ b/include/asm-frv/mb93091-fpga-irqs.h
@@ -12,11 +12,9 @@
12#ifndef _ASM_MB93091_FPGA_IRQS_H 12#ifndef _ASM_MB93091_FPGA_IRQS_H
13#define _ASM_MB93091_FPGA_IRQS_H 13#define _ASM_MB93091_FPGA_IRQS_H
14 14
15#ifndef __ASSEMBLY__ 15#include <asm/irq.h>
16
17#include <asm/irq-routing.h>
18 16
19#define IRQ_BASE_FPGA (NR_IRQ_ACTIONS_PER_GROUP * 1) 17#ifndef __ASSEMBLY__
20 18
21/* IRQ IDs presented to drivers */ 19/* IRQ IDs presented to drivers */
22enum { 20enum {
diff --git a/include/asm-frv/mb93093-fpga-irqs.h b/include/asm-frv/mb93093-fpga-irqs.h
index 1e0f11c2fcdb..590266b1a6d3 100644
--- a/include/asm-frv/mb93093-fpga-irqs.h
+++ b/include/asm-frv/mb93093-fpga-irqs.h
@@ -12,11 +12,9 @@
12#ifndef _ASM_MB93093_FPGA_IRQS_H 12#ifndef _ASM_MB93093_FPGA_IRQS_H
13#define _ASM_MB93093_FPGA_IRQS_H 13#define _ASM_MB93093_FPGA_IRQS_H
14 14
15#ifndef __ASSEMBLY__ 15#include <asm/irq.h>
16
17#include <asm/irq-routing.h>
18 16
19#define IRQ_BASE_FPGA (NR_IRQ_ACTIONS_PER_GROUP * 1) 17#ifndef __ASSEMBLY__
20 18
21/* IRQ IDs presented to drivers */ 19/* IRQ IDs presented to drivers */
22enum { 20enum {
diff --git a/include/asm-frv/mb93493-irqs.h b/include/asm-frv/mb93493-irqs.h
index 15096e731325..82c7aeddd333 100644
--- a/include/asm-frv/mb93493-irqs.h
+++ b/include/asm-frv/mb93493-irqs.h
@@ -12,11 +12,9 @@
12#ifndef _ASM_MB93493_IRQS_H 12#ifndef _ASM_MB93493_IRQS_H
13#define _ASM_MB93493_IRQS_H 13#define _ASM_MB93493_IRQS_H
14 14
15#ifndef __ASSEMBLY__ 15#include <asm/irq.h>
16
17#include <asm/irq-routing.h>
18 16
19#define IRQ_BASE_MB93493 (NR_IRQ_ACTIONS_PER_GROUP * 2) 17#ifndef __ASSEMBLY__
20 18
21/* IRQ IDs presented to drivers */ 19/* IRQ IDs presented to drivers */
22enum { 20enum {
diff --git a/include/asm-frv/mb93493-regs.h b/include/asm-frv/mb93493-regs.h
index c54aa9d14468..8a1f6aac8cf1 100644
--- a/include/asm-frv/mb93493-regs.h
+++ b/include/asm-frv/mb93493-regs.h
@@ -15,6 +15,7 @@
15#include <asm/mb-regs.h> 15#include <asm/mb-regs.h>
16#include <asm/mb93493-irqs.h> 16#include <asm/mb93493-irqs.h>
17 17
18#define __addr_MB93493(X) ((volatile unsigned long *)(__region_CS3 + (X)))
18#define __get_MB93493(X) ({ *(volatile unsigned long *)(__region_CS3 + (X)); }) 19#define __get_MB93493(X) ({ *(volatile unsigned long *)(__region_CS3 + (X)); })
19 20
20#define __set_MB93493(X,V) \ 21#define __set_MB93493(X,V) \
@@ -26,6 +27,7 @@ do { \
26#define __set_MB93493_STSR(X,V) __set_MB93493(0x3c0 + (X) * 4, (V)) 27#define __set_MB93493_STSR(X,V) __set_MB93493(0x3c0 + (X) * 4, (V))
27#define MB93493_STSR_EN 28#define MB93493_STSR_EN
28 29
30#define __addr_MB93493_IQSR(X) __addr_MB93493(0x3d0 + (X) * 4)
29#define __get_MB93493_IQSR(X) __get_MB93493(0x3d0 + (X) * 4) 31#define __get_MB93493_IQSR(X) __get_MB93493(0x3d0 + (X) * 4)
30#define __set_MB93493_IQSR(X,V) __set_MB93493(0x3d0 + (X) * 4, (V)) 32#define __set_MB93493_IQSR(X,V) __set_MB93493(0x3d0 + (X) * 4, (V))
31 33
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 7af7485e889e..2fb3c6f05e03 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -217,7 +217,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
217} 217}
218 218
219#define pgd_page(pgd) (pud_page((pud_t){ pgd })) 219#define pgd_page(pgd) (pud_page((pud_t){ pgd }))
220#define pgd_page_kernel(pgd) (pud_page_kernel((pud_t){ pgd })) 220#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd }))
221 221
222/* 222/*
223 * allocating and freeing a pud is trivial: the 1-entry pud is 223 * allocating and freeing a pud is trivial: the 1-entry pud is
@@ -246,7 +246,7 @@ static inline void pud_clear(pud_t *pud) { }
246#define set_pud(pudptr, pudval) set_pmd((pmd_t *)(pudptr), (pmd_t) { pudval }) 246#define set_pud(pudptr, pudval) set_pmd((pmd_t *)(pudptr), (pmd_t) { pudval })
247 247
248#define pud_page(pud) (pmd_page((pmd_t){ pud })) 248#define pud_page(pud) (pmd_page((pmd_t){ pud }))
249#define pud_page_kernel(pud) (pmd_page_kernel((pmd_t){ pud })) 249#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud }))
250 250
251/* 251/*
252 * (pmds are folded into pgds so this doesn't get actually called, 252 * (pmds are folded into pgds so this doesn't get actually called,
@@ -362,7 +362,7 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
362#define pmd_bad(x) (pmd_val(x) & xAMPRx_SS) 362#define pmd_bad(x) (pmd_val(x) & xAMPRx_SS)
363#define pmd_clear(xp) do { __set_pmd(xp, 0); } while(0) 363#define pmd_clear(xp) do { __set_pmd(xp, 0); } while(0)
364 364
365#define pmd_page_kernel(pmd) \ 365#define pmd_page_vaddr(pmd) \
366 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 366 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
367 367
368#ifndef CONFIG_DISCONTIGMEM 368#ifndef CONFIG_DISCONTIGMEM
@@ -458,7 +458,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
458#define pte_index(address) \ 458#define pte_index(address) \
459 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 459 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
460#define pte_offset_kernel(dir, address) \ 460#define pte_offset_kernel(dir, address) \
461 ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) 461 ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
462 462
463#if defined(CONFIG_HIGHPTE) 463#if defined(CONFIG_HIGHPTE)
464#define pte_offset_map(dir, address) \ 464#define pte_offset_map(dir, address) \
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 68c6fea994d9..7b88d3931e34 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -21,6 +21,10 @@
21#define pud_present(pud) 1 21#define pud_present(pud) 1
22#define pud_ERROR(pud) do { } while (0) 22#define pud_ERROR(pud) do { } while (0)
23#define pud_clear(pud) pgd_clear(pud) 23#define pud_clear(pud) pgd_clear(pud)
24#define pud_val(pud) pgd_val(pud)
25#define pud_populate(mm, pud, pmd) pgd_populate(mm, pud, pmd)
26#define pud_page(pud) pgd_page(pud)
27#define pud_page_vaddr(pud) pgd_page_vaddr(pud)
24 28
25#undef pud_free_tlb 29#undef pud_free_tlb
26#define pud_free_tlb(tlb, x) do { } while (0) 30#define pud_free_tlb(tlb, x) do { } while (0)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index e160e04290fb..6d45ee5472af 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -14,7 +14,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
14 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name 14 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
15 15
16/* var is in discarded region: offset to particular copy we want */ 16/* var is in discarded region: offset to particular copy we want */
17#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) 17#define per_cpu(var, cpu) (*({ \
18 extern int simple_indentifier_##var(void); \
19 RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
18#define __get_cpu_var(var) per_cpu(var, smp_processor_id()) 20#define __get_cpu_var(var) per_cpu(var, smp_processor_id())
19#define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id()) 21#define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id())
20 22
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index c8d53ba20e19..29ff5d84d8c3 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -47,7 +47,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
47#define __pmd(x) ((pmd_t) { __pud(x) } ) 47#define __pmd(x) ((pmd_t) { __pud(x) } )
48 48
49#define pud_page(pud) (pmd_page((pmd_t){ pud })) 49#define pud_page(pud) (pmd_page((pmd_t){ pud }))
50#define pud_page_kernel(pud) (pmd_page_kernel((pmd_t){ pud })) 50#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud }))
51 51
52/* 52/*
53 * allocating and freeing a pmd is trivial: the 1-entry pmd is 53 * allocating and freeing a pmd is trivial: the 1-entry pmd is
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 82e29f0ce467..566464500558 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -44,7 +44,7 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
44#define __pud(x) ((pud_t) { __pgd(x) } ) 44#define __pud(x) ((pud_t) { __pgd(x) } )
45 45
46#define pgd_page(pgd) (pud_page((pud_t){ pgd })) 46#define pgd_page(pgd) (pud_page((pud_t){ pgd }))
47#define pgd_page_kernel(pgd) (pud_page_kernel((pud_t){ pgd })) 47#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd }))
48 48
49/* 49/*
50 * allocating and freeing a pud is trivial: the 1-entry pud is 50 * allocating and freeing a pud is trivial: the 1-entry pud is
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index c2059a3a0621..349260cd86ed 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_GENERIC_PGTABLE_H 1#ifndef _ASM_GENERIC_PGTABLE_H
2#define _ASM_GENERIC_PGTABLE_H 2#define _ASM_GENERIC_PGTABLE_H
3 3
4#ifndef __ASSEMBLY__
5
4#ifndef __HAVE_ARCH_PTEP_ESTABLISH 6#ifndef __HAVE_ARCH_PTEP_ESTABLISH
5/* 7/*
6 * Establish a new mapping: 8 * Establish a new mapping:
@@ -188,7 +190,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
188}) 190})
189#endif 191#endif
190 192
191#ifndef __ASSEMBLY__
192/* 193/*
193 * When walking page tables, we usually want to skip any p?d_none entries; 194 * When walking page tables, we usually want to skip any p?d_none entries;
194 * and any p?d_bad entries - reporting the error before resetting to none. 195 * and any p?d_bad entries - reporting the error before resetting to none.
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db5a3732f106..253ae1328271 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -194,3 +194,6 @@
194 .stab.index 0 : { *(.stab.index) } \ 194 .stab.index 0 : { *(.stab.index) } \
195 .stab.indexstr 0 : { *(.stab.indexstr) } \ 195 .stab.indexstr 0 : { *(.stab.indexstr) } \
196 .comment 0 : { *(.comment) } 196 .comment 0 : { *(.comment) }
197
198#define NOTES \
199 .notes : { *(.note.*) } :note
diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild
index b75a348d0c1c..147e4ac1ebf0 100644
--- a/include/asm-i386/Kbuild
+++ b/include/asm-i386/Kbuild
@@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm
3header-y += boot.h 3header-y += boot.h
4header-y += debugreg.h 4header-y += debugreg.h
5header-y += ldt.h 5header-y += ldt.h
6header-y += ptrace-abi.h
6header-y += ucontext.h 7header-y += ucontext.h
7 8
8unifdef-y += mtrr.h 9unifdef-y += mtrr.h
diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h
index 9cf20cacf76e..576ae01d71c8 100644
--- a/include/asm-i386/dma-mapping.h
+++ b/include/asm-i386/dma-mapping.h
@@ -21,8 +21,7 @@ static inline dma_addr_t
21dma_map_single(struct device *dev, void *ptr, size_t size, 21dma_map_single(struct device *dev, void *ptr, size_t size,
22 enum dma_data_direction direction) 22 enum dma_data_direction direction)
23{ 23{
24 if (direction == DMA_NONE) 24 BUG_ON(direction == DMA_NONE);
25 BUG();
26 WARN_ON(size == 0); 25 WARN_ON(size == 0);
27 flush_write_buffers(); 26 flush_write_buffers();
28 return virt_to_phys(ptr); 27 return virt_to_phys(ptr);
@@ -32,8 +31,7 @@ static inline void
32dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, 31dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
33 enum dma_data_direction direction) 32 enum dma_data_direction direction)
34{ 33{
35 if (direction == DMA_NONE) 34 BUG_ON(direction == DMA_NONE);
36 BUG();
37} 35}
38 36
39static inline int 37static inline int
@@ -42,8 +40,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
42{ 40{
43 int i; 41 int i;
44 42
45 if (direction == DMA_NONE) 43 BUG_ON(direction == DMA_NONE);
46 BUG();
47 WARN_ON(nents == 0 || sg[0].length == 0); 44 WARN_ON(nents == 0 || sg[0].length == 0);
48 45
49 for (i = 0; i < nents; i++ ) { 46 for (i = 0; i < nents; i++ ) {
diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
index a48cc3f7ccc6..02428cb36621 100644
--- a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -19,7 +19,11 @@
19 * Leave one empty page between vmalloc'ed areas and 19 * Leave one empty page between vmalloc'ed areas and
20 * the start of the fixmap. 20 * the start of the fixmap.
21 */ 21 */
22#define __FIXADDR_TOP 0xfffff000 22#ifndef CONFIG_COMPAT_VDSO
23extern unsigned long __FIXADDR_TOP;
24#else
25#define __FIXADDR_TOP 0xfffff000
26#endif
23 27
24#ifndef __ASSEMBLY__ 28#ifndef __ASSEMBLY__
25#include <linux/kernel.h> 29#include <linux/kernel.h>
@@ -93,6 +97,7 @@ enum fixed_addresses {
93 97
94extern void __set_fixmap (enum fixed_addresses idx, 98extern void __set_fixmap (enum fixed_addresses idx,
95 unsigned long phys, pgprot_t flags); 99 unsigned long phys, pgprot_t flags);
100extern void reserve_top_address(unsigned long reserve);
96 101
97#define set_fixmap(idx, phys) \ 102#define set_fixmap(idx, phys) \
98 __set_fixmap(idx, phys, PAGE_KERNEL) 103 __set_fixmap(idx, phys, PAGE_KERNEL)
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 22cb07cc8f32..61b073322006 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -38,10 +38,16 @@ static inline void get_memcfg_numa(void)
38} 38}
39 39
40extern int early_pfn_to_nid(unsigned long pfn); 40extern int early_pfn_to_nid(unsigned long pfn);
41extern void numa_kva_reserve(void);
41 42
42#else /* !CONFIG_NUMA */ 43#else /* !CONFIG_NUMA */
44
43#define get_memcfg_numa get_memcfg_numa_flat 45#define get_memcfg_numa get_memcfg_numa_flat
44#define get_zholes_size(n) (0) 46#define get_zholes_size(n) (0)
47
48static inline void numa_kva_reserve(void)
49{
50}
45#endif /* CONFIG_NUMA */ 51#endif /* CONFIG_NUMA */
46 52
47#ifdef CONFIG_DISCONTIGMEM 53#ifdef CONFIG_DISCONTIGMEM
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 2756d4b04c27..201c86a6711e 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -21,8 +21,9 @@
21#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) 21#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
22#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 22#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
23 23
24#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
24#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0)) 25#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0))
25#define pte_same(a, b) ((a).pte_low == (b).pte_low) 26
26#define pte_page(x) pfn_to_page(pte_pfn(x)) 27#define pte_page(x) pfn_to_page(pte_pfn(x))
27#define pte_none(x) (!(x).pte_low) 28#define pte_none(x) (!(x).pte_low)
28#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) 29#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index dccb1b3337ad..0d899173232e 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -77,7 +77,7 @@ static inline void pud_clear (pud_t * pud) { }
77#define pud_page(pud) \ 77#define pud_page(pud) \
78((struct page *) __va(pud_val(pud) & PAGE_MASK)) 78((struct page *) __va(pud_val(pud) & PAGE_MASK))
79 79
80#define pud_page_kernel(pud) \ 80#define pud_page_vaddr(pud) \
81((unsigned long) __va(pud_val(pud) & PAGE_MASK)) 81((unsigned long) __va(pud_val(pud) & PAGE_MASK))
82 82
83 83
@@ -105,6 +105,7 @@ static inline void pmd_clear(pmd_t *pmd)
105 *(tmp + 1) = 0; 105 *(tmp + 1) = 0;
106} 106}
107 107
108#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
108static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 109static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
109{ 110{
110 pte_t res; 111 pte_t res;
@@ -117,6 +118,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
117 return res; 118 return res;
118} 119}
119 120
121#define __HAVE_ARCH_PTE_SAME
120static inline int pte_same(pte_t a, pte_t b) 122static inline int pte_same(pte_t a, pte_t b)
121{ 123{
122 return a.pte_low == b.pte_low && a.pte_high == b.pte_high; 124 return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 09697fec3d2b..0dc051a8078b 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -246,6 +246,23 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p
246# include <asm/pgtable-2level.h> 246# include <asm/pgtable-2level.h>
247#endif 247#endif
248 248
249/*
250 * We only update the dirty/accessed state if we set
251 * the dirty bit by hand in the kernel, since the hardware
252 * will do the accessed bit for us, and we don't want to
253 * race with other CPU's that might be updating the dirty
254 * bit at the same time.
255 */
256#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
257#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
258do { \
259 if (dirty) { \
260 (ptep)->pte_low = (entry).pte_low; \
261 flush_tlb_page(vma, address); \
262 } \
263} while (0)
264
265#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
249static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 266static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
250{ 267{
251 if (!pte_dirty(*ptep)) 268 if (!pte_dirty(*ptep))
@@ -253,6 +270,7 @@ static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned
253 return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); 270 return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
254} 271}
255 272
273#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
256static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 274static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
257{ 275{
258 if (!pte_young(*ptep)) 276 if (!pte_young(*ptep))
@@ -260,6 +278,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
260 return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); 278 return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
261} 279}
262 280
281#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
263static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) 282static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
264{ 283{
265 pte_t pte; 284 pte_t pte;
@@ -272,6 +291,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
272 return pte; 291 return pte;
273} 292}
274 293
294#define __HAVE_ARCH_PTEP_SET_WRPROTECT
275static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 295static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
276{ 296{
277 clear_bit(_PAGE_BIT_RW, &ptep->pte_low); 297 clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
@@ -364,11 +384,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
364#define pte_index(address) \ 384#define pte_index(address) \
365 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 385 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
366#define pte_offset_kernel(dir, address) \ 386#define pte_offset_kernel(dir, address) \
367 ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) 387 ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
368 388
369#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) 389#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
370 390
371#define pmd_page_kernel(pmd) \ 391#define pmd_page_vaddr(pmd) \
372 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 392 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
373 393
374/* 394/*
@@ -411,23 +431,8 @@ extern void noexec_setup(const char *str);
411/* 431/*
412 * The i386 doesn't have any external MMU info: the kernel page 432 * The i386 doesn't have any external MMU info: the kernel page
413 * tables contain all the necessary information. 433 * tables contain all the necessary information.
414 *
415 * Also, we only update the dirty/accessed state if we set
416 * the dirty bit by hand in the kernel, since the hardware
417 * will do the accessed bit for us, and we don't want to
418 * race with other CPU's that might be updating the dirty
419 * bit at the same time.
420 */ 434 */
421#define update_mmu_cache(vma,address,pte) do { } while (0) 435#define update_mmu_cache(vma,address,pte) do { } while (0)
422#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
423#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
424 do { \
425 if (__dirty) { \
426 (__ptep)->pte_low = (__entry).pte_low; \
427 flush_tlb_page(__vma, __address); \
428 } \
429 } while (0)
430
431#endif /* !__ASSEMBLY__ */ 436#endif /* !__ASSEMBLY__ */
432 437
433#ifdef CONFIG_FLATMEM 438#ifdef CONFIG_FLATMEM
@@ -441,12 +446,6 @@ extern void noexec_setup(const char *str);
441#define GET_IOSPACE(pfn) 0 446#define GET_IOSPACE(pfn) 0
442#define GET_PFN(pfn) (pfn) 447#define GET_PFN(pfn) (pfn)
443 448
444#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
445#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
446#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
447#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
448#define __HAVE_ARCH_PTEP_SET_WRPROTECT
449#define __HAVE_ARCH_PTE_SAME
450#include <asm-generic/pgtable.h> 449#include <asm-generic/pgtable.h>
451 450
452#endif /* _I386_PGTABLE_H */ 451#endif /* _I386_PGTABLE_H */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index b32346d62e10..2277127696d2 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -143,6 +143,18 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {}
143#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ 143#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
144#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ 144#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
145 145
146static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
147 unsigned int *ecx, unsigned int *edx)
148{
149 /* ecx is often an input as well as an output. */
150 __asm__("cpuid"
151 : "=a" (*eax),
152 "=b" (*ebx),
153 "=c" (*ecx),
154 "=d" (*edx)
155 : "0" (*eax), "2" (*ecx));
156}
157
146/* 158/*
147 * Generic CPUID function 159 * Generic CPUID function
148 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx 160 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
@@ -150,24 +162,18 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {}
150 */ 162 */
151static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) 163static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
152{ 164{
153 __asm__("cpuid" 165 *eax = op;
154 : "=a" (*eax), 166 *ecx = 0;
155 "=b" (*ebx), 167 __cpuid(eax, ebx, ecx, edx);
156 "=c" (*ecx),
157 "=d" (*edx)
158 : "0" (op), "c"(0));
159} 168}
160 169
161/* Some CPUID calls want 'count' to be placed in ecx */ 170/* Some CPUID calls want 'count' to be placed in ecx */
162static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, 171static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
163 int *edx) 172 int *edx)
164{ 173{
165 __asm__("cpuid" 174 *eax = op;
166 : "=a" (*eax), 175 *ecx = count;
167 "=b" (*ebx), 176 __cpuid(eax, ebx, ecx, edx);
168 "=c" (*ecx),
169 "=d" (*edx)
170 : "0" (op), "c" (count));
171} 177}
172 178
173/* 179/*
@@ -175,42 +181,30 @@ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
175 */ 181 */
176static inline unsigned int cpuid_eax(unsigned int op) 182static inline unsigned int cpuid_eax(unsigned int op)
177{ 183{
178 unsigned int eax; 184 unsigned int eax, ebx, ecx, edx;
179 185
180 __asm__("cpuid" 186 cpuid(op, &eax, &ebx, &ecx, &edx);
181 : "=a" (eax)
182 : "0" (op)
183 : "bx", "cx", "dx");
184 return eax; 187 return eax;
185} 188}
186static inline unsigned int cpuid_ebx(unsigned int op) 189static inline unsigned int cpuid_ebx(unsigned int op)
187{ 190{
188 unsigned int eax, ebx; 191 unsigned int eax, ebx, ecx, edx;
189 192
190 __asm__("cpuid" 193 cpuid(op, &eax, &ebx, &ecx, &edx);
191 : "=a" (eax), "=b" (ebx)
192 : "0" (op)
193 : "cx", "dx" );
194 return ebx; 194 return ebx;
195} 195}
196static inline unsigned int cpuid_ecx(unsigned int op) 196static inline unsigned int cpuid_ecx(unsigned int op)
197{ 197{
198 unsigned int eax, ecx; 198 unsigned int eax, ebx, ecx, edx;
199 199
200 __asm__("cpuid" 200 cpuid(op, &eax, &ebx, &ecx, &edx);
201 : "=a" (eax), "=c" (ecx)
202 : "0" (op)
203 : "bx", "dx" );
204 return ecx; 201 return ecx;
205} 202}
206static inline unsigned int cpuid_edx(unsigned int op) 203static inline unsigned int cpuid_edx(unsigned int op)
207{ 204{
208 unsigned int eax, edx; 205 unsigned int eax, ebx, ecx, edx;
209 206
210 __asm__("cpuid" 207 cpuid(op, &eax, &ebx, &ecx, &edx);
211 : "=a" (eax), "=d" (edx)
212 : "0" (op)
213 : "bx", "cx");
214 return edx; 208 return edx;
215} 209}
216 210
diff --git a/include/asm-i386/ptrace-abi.h b/include/asm-i386/ptrace-abi.h
new file mode 100644
index 000000000000..a44901817a26
--- /dev/null
+++ b/include/asm-i386/ptrace-abi.h
@@ -0,0 +1,39 @@
1#ifndef I386_PTRACE_ABI_H
2#define I386_PTRACE_ABI_H
3
4#define EBX 0
5#define ECX 1
6#define EDX 2
7#define ESI 3
8#define EDI 4
9#define EBP 5
10#define EAX 6
11#define DS 7
12#define ES 8
13#define FS 9
14#define GS 10
15#define ORIG_EAX 11
16#define EIP 12
17#define CS 13
18#define EFL 14
19#define UESP 15
20#define SS 16
21#define FRAME_SIZE 17
22
23/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
24#define PTRACE_GETREGS 12
25#define PTRACE_SETREGS 13
26#define PTRACE_GETFPREGS 14
27#define PTRACE_SETFPREGS 15
28#define PTRACE_GETFPXREGS 18
29#define PTRACE_SETFPXREGS 19
30
31#define PTRACE_OLDSETOPTIONS 21
32
33#define PTRACE_GET_THREAD_AREA 25
34#define PTRACE_SET_THREAD_AREA 26
35
36#define PTRACE_SYSEMU 31
37#define PTRACE_SYSEMU_SINGLESTEP 32
38
39#endif
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index f324c53b6f9a..1910880fcd40 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -1,24 +1,7 @@
1#ifndef _I386_PTRACE_H 1#ifndef _I386_PTRACE_H
2#define _I386_PTRACE_H 2#define _I386_PTRACE_H
3 3
4#define EBX 0 4#include <asm/ptrace-abi.h>
5#define ECX 1
6#define EDX 2
7#define ESI 3
8#define EDI 4
9#define EBP 5
10#define EAX 6
11#define DS 7
12#define ES 8
13#define FS 9
14#define GS 10
15#define ORIG_EAX 11
16#define EIP 12
17#define CS 13
18#define EFL 14
19#define UESP 15
20#define SS 16
21#define FRAME_SIZE 17
22 5
23/* this struct defines the way the registers are stored on the 6/* this struct defines the way the registers are stored on the
24 stack during a system call. */ 7 stack during a system call. */
@@ -41,22 +24,6 @@ struct pt_regs {
41 int xss; 24 int xss;
42}; 25};
43 26
44/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
45#define PTRACE_GETREGS 12
46#define PTRACE_SETREGS 13
47#define PTRACE_GETFPREGS 14
48#define PTRACE_SETFPREGS 15
49#define PTRACE_GETFPXREGS 18
50#define PTRACE_SETFPXREGS 19
51
52#define PTRACE_OLDSETOPTIONS 21
53
54#define PTRACE_GET_THREAD_AREA 25
55#define PTRACE_SET_THREAD_AREA 26
56
57#define PTRACE_SYSEMU 31
58#define PTRACE_SYSEMU_SINGLESTEP 32
59
60#ifdef __KERNEL__ 27#ifdef __KERNEL__
61 28
62#include <asm/vm86.h> 29#include <asm/vm86.h>
diff --git a/include/asm-i386/sync_bitops.h b/include/asm-i386/sync_bitops.h
new file mode 100644
index 000000000000..c94d51c993ee
--- /dev/null
+++ b/include/asm-i386/sync_bitops.h
@@ -0,0 +1,156 @@
1#ifndef _I386_SYNC_BITOPS_H
2#define _I386_SYNC_BITOPS_H
3
4/*
5 * Copyright 1992, Linus Torvalds.
6 */
7
8/*
9 * These have to be done with inline assembly: that way the bit-setting
10 * is guaranteed to be atomic. All bit operations return 0 if the bit
11 * was cleared before the operation and != 0 if it was not.
12 *
13 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
14 */
15
16#define ADDR (*(volatile long *) addr)
17
18/**
19 * sync_set_bit - Atomically set a bit in memory
20 * @nr: the bit to set
21 * @addr: the address to start counting from
22 *
23 * This function is atomic and may not be reordered. See __set_bit()
24 * if you do not require the atomic guarantees.
25 *
26 * Note: there are no guarantees that this function will not be reordered
27 * on non x86 architectures, so if you are writting portable code,
28 * make sure not to rely on its reordering guarantees.
29 *
30 * Note that @nr may be almost arbitrarily large; this function is not
31 * restricted to acting on a single-word quantity.
32 */
33static inline void sync_set_bit(int nr, volatile unsigned long * addr)
34{
35 __asm__ __volatile__("lock; btsl %1,%0"
36 :"+m" (ADDR)
37 :"Ir" (nr)
38 : "memory");
39}
40
41/**
42 * sync_clear_bit - Clears a bit in memory
43 * @nr: Bit to clear
44 * @addr: Address to start counting from
45 *
46 * sync_clear_bit() is atomic and may not be reordered. However, it does
47 * not contain a memory barrier, so if it is used for locking purposes,
48 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
49 * in order to ensure changes are visible on other processors.
50 */
51static inline void sync_clear_bit(int nr, volatile unsigned long * addr)
52{
53 __asm__ __volatile__("lock; btrl %1,%0"
54 :"+m" (ADDR)
55 :"Ir" (nr)
56 : "memory");
57}
58
59/**
60 * sync_change_bit - Toggle a bit in memory
61 * @nr: Bit to change
62 * @addr: Address to start counting from
63 *
64 * change_bit() is atomic and may not be reordered. It may be
65 * reordered on other architectures than x86.
66 * Note that @nr may be almost arbitrarily large; this function is not
67 * restricted to acting on a single-word quantity.
68 */
69static inline void sync_change_bit(int nr, volatile unsigned long * addr)
70{
71 __asm__ __volatile__("lock; btcl %1,%0"
72 :"+m" (ADDR)
73 :"Ir" (nr)
74 : "memory");
75}
76
77/**
78 * sync_test_and_set_bit - Set a bit and return its old value
79 * @nr: Bit to set
80 * @addr: Address to count from
81 *
82 * This operation is atomic and cannot be reordered.
83 * It may be reordered on other architectures than x86.
84 * It also implies a memory barrier.
85 */
86static inline int sync_test_and_set_bit(int nr, volatile unsigned long * addr)
87{
88 int oldbit;
89
90 __asm__ __volatile__("lock; btsl %2,%1\n\tsbbl %0,%0"
91 :"=r" (oldbit),"+m" (ADDR)
92 :"Ir" (nr) : "memory");
93 return oldbit;
94}
95
96/**
97 * sync_test_and_clear_bit - Clear a bit and return its old value
98 * @nr: Bit to clear
99 * @addr: Address to count from
100 *
101 * This operation is atomic and cannot be reordered.
102 * It can be reorderdered on other architectures other than x86.
103 * It also implies a memory barrier.
104 */
105static inline int sync_test_and_clear_bit(int nr, volatile unsigned long * addr)
106{
107 int oldbit;
108
109 __asm__ __volatile__("lock; btrl %2,%1\n\tsbbl %0,%0"
110 :"=r" (oldbit),"+m" (ADDR)
111 :"Ir" (nr) : "memory");
112 return oldbit;
113}
114
115/**
116 * sync_test_and_change_bit - Change a bit and return its old value
117 * @nr: Bit to change
118 * @addr: Address to count from
119 *
120 * This operation is atomic and cannot be reordered.
121 * It also implies a memory barrier.
122 */
123static inline int sync_test_and_change_bit(int nr, volatile unsigned long* addr)
124{
125 int oldbit;
126
127 __asm__ __volatile__("lock; btcl %2,%1\n\tsbbl %0,%0"
128 :"=r" (oldbit),"+m" (ADDR)
129 :"Ir" (nr) : "memory");
130 return oldbit;
131}
132
133static __always_inline int sync_const_test_bit(int nr, const volatile unsigned long *addr)
134{
135 return ((1UL << (nr & 31)) &
136 (((const volatile unsigned int *)addr)[nr >> 5])) != 0;
137}
138
139static inline int sync_var_test_bit(int nr, const volatile unsigned long * addr)
140{
141 int oldbit;
142
143 __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
144 :"=r" (oldbit)
145 :"m" (ADDR),"Ir" (nr));
146 return oldbit;
147}
148
149#define sync_test_bit(nr,addr) \
150 (__builtin_constant_p(nr) ? \
151 sync_constant_test_bit((nr),(addr)) : \
152 sync_var_test_bit((nr),(addr)))
153
154#undef ADDR
155
156#endif /* _I386_SYNC_BITOPS_H */
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index 098bcee94e38..a6dabbcd6e6a 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
267#define cmpxchg(ptr,o,n)\ 267#define cmpxchg(ptr,o,n)\
268 ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ 268 ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
269 (unsigned long)(n),sizeof(*(ptr)))) 269 (unsigned long)(n),sizeof(*(ptr))))
270#define sync_cmpxchg(ptr,o,n)\
271 ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
272 (unsigned long)(n),sizeof(*(ptr))))
270#endif 273#endif
271 274
272static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, 275static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
296 return old; 299 return old;
297} 300}
298 301
302/*
303 * Always use locked operations when touching memory shared with a
304 * hypervisor, since the system may be SMP even if the guest kernel
305 * isn't.
306 */
307static inline unsigned long __sync_cmpxchg(volatile void *ptr,
308 unsigned long old,
309 unsigned long new, int size)
310{
311 unsigned long prev;
312 switch (size) {
313 case 1:
314 __asm__ __volatile__("lock; cmpxchgb %b1,%2"
315 : "=a"(prev)
316 : "q"(new), "m"(*__xg(ptr)), "0"(old)
317 : "memory");
318 return prev;
319 case 2:
320 __asm__ __volatile__("lock; cmpxchgw %w1,%2"
321 : "=a"(prev)
322 : "r"(new), "m"(*__xg(ptr)), "0"(old)
323 : "memory");
324 return prev;
325 case 4:
326 __asm__ __volatile__("lock; cmpxchgl %1,%2"
327 : "=a"(prev)
328 : "r"(new), "m"(*__xg(ptr)), "0"(old)
329 : "memory");
330 return prev;
331 }
332 return old;
333}
334
299#ifndef CONFIG_X86_CMPXCHG 335#ifndef CONFIG_X86_CMPXCHG
300/* 336/*
301 * Building a kernel capable running on 80386. It may be necessary to 337 * Building a kernel capable running on 80386. It may be necessary to
diff --git a/include/asm-ia64/numa.h b/include/asm-ia64/numa.h
index e5a8260593a5..e0a1d173e42d 100644
--- a/include/asm-ia64/numa.h
+++ b/include/asm-ia64/numa.h
@@ -64,6 +64,10 @@ extern int paddr_to_nid(unsigned long paddr);
64 64
65#define local_nodeid (cpu_to_node_map[smp_processor_id()]) 65#define local_nodeid (cpu_to_node_map[smp_processor_id()])
66 66
67extern void map_cpu_to_node(int cpu, int nid);
68extern void unmap_cpu_from_node(int cpu, int nid);
69
70
67#else /* !CONFIG_NUMA */ 71#else /* !CONFIG_NUMA */
68 72
69#define paddr_to_nid(addr) 0 73#define paddr_to_nid(addr) 0
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index 228981cadf8f..553182747722 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -275,21 +275,23 @@ ia64_phys_addr_valid (unsigned long addr)
275#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd))) 275#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd)))
276#define pmd_present(pmd) (pmd_val(pmd) != 0UL) 276#define pmd_present(pmd) (pmd_val(pmd) != 0UL)
277#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) 277#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
278#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK)) 278#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
279#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET)) 279#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET))
280 280
281#define pud_none(pud) (!pud_val(pud)) 281#define pud_none(pud) (!pud_val(pud))
282#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud))) 282#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud)))
283#define pud_present(pud) (pud_val(pud) != 0UL) 283#define pud_present(pud) (pud_val(pud) != 0UL)
284#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) 284#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
285#define pud_page(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) 285#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK))
286#define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET))
286 287
287#ifdef CONFIG_PGTABLE_4 288#ifdef CONFIG_PGTABLE_4
288#define pgd_none(pgd) (!pgd_val(pgd)) 289#define pgd_none(pgd) (!pgd_val(pgd))
289#define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) 290#define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd)))
290#define pgd_present(pgd) (pgd_val(pgd) != 0UL) 291#define pgd_present(pgd) (pgd_val(pgd) != 0UL)
291#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) 292#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
292#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK)) 293#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
294#define pgd_page(pgd) virt_to_page((pgd_val(pgd) + PAGE_OFFSET))
293#endif 295#endif
294 296
295/* 297/*
@@ -360,19 +362,19 @@ pgd_offset (struct mm_struct *mm, unsigned long address)
360#ifdef CONFIG_PGTABLE_4 362#ifdef CONFIG_PGTABLE_4
361/* Find an entry in the second-level page table.. */ 363/* Find an entry in the second-level page table.. */
362#define pud_offset(dir,addr) \ 364#define pud_offset(dir,addr) \
363 ((pud_t *) pgd_page(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) 365 ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
364#endif 366#endif
365 367
366/* Find an entry in the third-level page table.. */ 368/* Find an entry in the third-level page table.. */
367#define pmd_offset(dir,addr) \ 369#define pmd_offset(dir,addr) \
368 ((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) 370 ((pmd_t *) pud_page_vaddr(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
369 371
370/* 372/*
371 * Find an entry in the third-level page table. This looks more complicated than it 373 * Find an entry in the third-level page table. This looks more complicated than it
372 * should be because some platforms place page tables in high memory. 374 * should be because some platforms place page tables in high memory.
373 */ 375 */
374#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 376#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
375#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr)) 377#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
376#define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr) 378#define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr)
377#define pte_offset_map_nested(dir,addr) pte_offset_map(dir, addr) 379#define pte_offset_map_nested(dir,addr) pte_offset_map(dir, addr)
378#define pte_unmap(pte) do { } while (0) 380#define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 719ff309ce09..74bde1c2bb1a 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -122,8 +122,6 @@ extern void __init smp_build_cpu_map(void);
122extern void __init init_smp_config (void); 122extern void __init init_smp_config (void);
123extern void smp_do_timer (struct pt_regs *regs); 123extern void smp_do_timer (struct pt_regs *regs);
124 124
125extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info,
126 int retry, int wait);
127extern void smp_send_reschedule (int cpu); 125extern void smp_send_reschedule (int cpu);
128extern void lock_ipi_calllock(void); 126extern void lock_ipi_calllock(void);
129extern void unlock_ipi_calllock(void); 127extern void unlock_ipi_calllock(void);
diff --git a/include/asm-m32r/pgtable-2level.h b/include/asm-m32r/pgtable-2level.h
index be0f167e344a..6a674e3d37a2 100644
--- a/include/asm-m32r/pgtable-2level.h
+++ b/include/asm-m32r/pgtable-2level.h
@@ -52,9 +52,13 @@ static inline int pgd_present(pgd_t pgd) { return 1; }
52#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) 52#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
53#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) 53#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval)
54 54
55#define pgd_page(pgd) \ 55#define pgd_page_vaddr(pgd) \
56((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) 56((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
57 57
58#ifndef CONFIG_DISCONTIGMEM
59#define pgd_page(pgd) (mem_map + ((pgd_val(pgd) >> PAGE_SHIFT) - PFN_BASE))
60#endif /* !CONFIG_DISCONTIGMEM */
61
58static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address) 62static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
59{ 63{
60 return (pmd_t *) dir; 64 return (pmd_t *) dir;
diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h
index 1983b7f4527a..1c15ba7ce319 100644
--- a/include/asm-m32r/pgtable.h
+++ b/include/asm-m32r/pgtable.h
@@ -336,7 +336,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
336 pmd_val(*pmdp) = (((unsigned long) ptep) & PAGE_MASK); 336 pmd_val(*pmdp) = (((unsigned long) ptep) & PAGE_MASK);
337} 337}
338 338
339#define pmd_page_kernel(pmd) \ 339#define pmd_page_vaddr(pmd) \
340 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 340 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
341 341
342#ifndef CONFIG_DISCONTIGMEM 342#ifndef CONFIG_DISCONTIGMEM
@@ -358,7 +358,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
358#define pte_index(address) \ 358#define pte_index(address) \
359 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 359 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
360#define pte_offset_kernel(dir, address) \ 360#define pte_offset_kernel(dir, address) \
361 ((pte_t *)pmd_page_kernel(*(dir)) + pte_index(address)) 361 ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index(address))
362#define pte_offset_map(dir, address) \ 362#define pte_offset_map(dir, address) \
363 ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) 363 ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
364#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) 364#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
diff --git a/include/asm-m68k/motorola_pgtable.h b/include/asm-m68k/motorola_pgtable.h
index 1ccc7338a54b..61e4406ed96a 100644
--- a/include/asm-m68k/motorola_pgtable.h
+++ b/include/asm-m68k/motorola_pgtable.h
@@ -150,6 +150,7 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
150#define pgd_bad(pgd) ((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE) 150#define pgd_bad(pgd) ((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE)
151#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_TABLE) 151#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_TABLE)
152#define pgd_clear(pgdp) ({ pgd_val(*pgdp) = 0; }) 152#define pgd_clear(pgdp) ({ pgd_val(*pgdp) = 0; })
153#define pgd_page(pgd) (mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT))
153 154
154#define pte_ERROR(e) \ 155#define pte_ERROR(e) \
155 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) 156 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/include/asm-mips/mach-au1x00/au1xxx_dbdma.h b/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
index d5b38a247e5a..eeb0c3115b6a 100644
--- a/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
+++ b/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
@@ -316,7 +316,7 @@ typedef struct dbdma_chan_config {
316 au1x_ddma_desc_t *chan_desc_base; 316 au1x_ddma_desc_t *chan_desc_base;
317 au1x_ddma_desc_t *get_ptr, *put_ptr, *cur_ptr; 317 au1x_ddma_desc_t *get_ptr, *put_ptr, *cur_ptr;
318 void *chan_callparam; 318 void *chan_callparam;
319 void (*chan_callback)(int, void *, struct pt_regs *); 319 void (*chan_callback)(int, void *);
320} chan_tab_t; 320} chan_tab_t;
321 321
322#define DEV_FLAGS_INUSE (1 << 0) 322#define DEV_FLAGS_INUSE (1 << 0)
@@ -334,8 +334,8 @@ typedef struct dbdma_chan_config {
334 * meaningful name. The 'callback' is called during dma completion 334 * meaningful name. The 'callback' is called during dma completion
335 * interrupt. 335 * interrupt.
336 */ 336 */
337u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid, 337extern u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
338 void (*callback)(int, void *, struct pt_regs *), void *callparam); 338 void (*callback)(int, void *), void *callparam);
339 339
340#define DBDMA_MEM_CHAN DSCR_CMD0_ALWAYS 340#define DBDMA_MEM_CHAN DSCR_CMD0_ALWAYS
341 341
diff --git a/include/asm-mips/pgtable-32.h b/include/asm-mips/pgtable-32.h
index 4b26d8528133..d20f2e9b28be 100644
--- a/include/asm-mips/pgtable-32.h
+++ b/include/asm-mips/pgtable-32.h
@@ -156,9 +156,9 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
156#define __pte_offset(address) \ 156#define __pte_offset(address) \
157 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 157 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
158#define pte_offset(dir, address) \ 158#define pte_offset(dir, address) \
159 ((pte_t *) (pmd_page_kernel(*dir)) + __pte_offset(address)) 159 ((pte_t *) (pmd_page_vaddr(*dir)) + __pte_offset(address))
160#define pte_offset_kernel(dir, address) \ 160#define pte_offset_kernel(dir, address) \
161 ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address)) 161 ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address))
162 162
163#define pte_offset_map(dir, address) \ 163#define pte_offset_map(dir, address) \
164 ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address)) 164 ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
diff --git a/include/asm-mips/pgtable-64.h b/include/asm-mips/pgtable-64.h
index e3db93212eab..c59a1e21f5b0 100644
--- a/include/asm-mips/pgtable-64.h
+++ b/include/asm-mips/pgtable-64.h
@@ -178,24 +178,26 @@ static inline void pud_clear(pud_t *pudp)
178/* to find an entry in a page-table-directory */ 178/* to find an entry in a page-table-directory */
179#define pgd_offset(mm,addr) ((mm)->pgd + pgd_index(addr)) 179#define pgd_offset(mm,addr) ((mm)->pgd + pgd_index(addr))
180 180
181static inline unsigned long pud_page(pud_t pud) 181static inline unsigned long pud_page_vaddr(pud_t pud)
182{ 182{
183 return pud_val(pud); 183 return pud_val(pud);
184} 184}
185#define pud_phys(pud) (pud_val(pud) - PAGE_OFFSET)
186#define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT))
185 187
186/* Find an entry in the second-level page table.. */ 188/* Find an entry in the second-level page table.. */
187static inline pmd_t *pmd_offset(pud_t * pud, unsigned long address) 189static inline pmd_t *pmd_offset(pud_t * pud, unsigned long address)
188{ 190{
189 return (pmd_t *) pud_page(*pud) + pmd_index(address); 191 return (pmd_t *) pud_page_vaddr(*pud) + pmd_index(address);
190} 192}
191 193
192/* Find an entry in the third-level page table.. */ 194/* Find an entry in the third-level page table.. */
193#define __pte_offset(address) \ 195#define __pte_offset(address) \
194 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 196 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
195#define pte_offset(dir, address) \ 197#define pte_offset(dir, address) \
196 ((pte_t *) (pmd_page_kernel(*dir)) + __pte_offset(address)) 198 ((pte_t *) (pmd_page_vaddr(*dir)) + __pte_offset(address))
197#define pte_offset_kernel(dir, address) \ 199#define pte_offset_kernel(dir, address) \
198 ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address)) 200 ((pte_t *) pmd_page_vaddr(*(dir)) + __pte_offset(address))
199#define pte_offset_map(dir, address) \ 201#define pte_offset_map(dir, address) \
200 ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address)) 202 ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
201#define pte_offset_map_nested(dir, address) \ 203#define pte_offset_map_nested(dir, address) \
diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h
index a36ca1be17f2..1ca4d1e185c7 100644
--- a/include/asm-mips/pgtable.h
+++ b/include/asm-mips/pgtable.h
@@ -87,7 +87,7 @@ extern void paging_init(void);
87 */ 87 */
88#define pmd_phys(pmd) (pmd_val(pmd) - PAGE_OFFSET) 88#define pmd_phys(pmd) (pmd_val(pmd) - PAGE_OFFSET)
89#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) 89#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
90#define pmd_page_kernel(pmd) pmd_val(pmd) 90#define pmd_page_vaddr(pmd) pmd_val(pmd)
91 91
92#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1) 92#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1)
93 93
diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h
index 5066c54dae0a..c0b61e0d1497 100644
--- a/include/asm-parisc/pgtable.h
+++ b/include/asm-parisc/pgtable.h
@@ -303,7 +303,8 @@ static inline void pmd_clear(pmd_t *pmd) {
303 303
304 304
305#if PT_NLEVELS == 3 305#if PT_NLEVELS == 3
306#define pgd_page(pgd) ((unsigned long) __va(pgd_address(pgd))) 306#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
307#define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd))
307 308
308/* For 64 bit we have three level tables */ 309/* For 64 bit we have three level tables */
309 310
@@ -382,7 +383,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
382 383
383#define pte_page(pte) (pfn_to_page(pte_pfn(pte))) 384#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
384 385
385#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_address(pmd))) 386#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_address(pmd)))
386 387
387#define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd))) 388#define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd)))
388#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) 389#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
@@ -400,7 +401,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
400 401
401#if PT_NLEVELS == 3 402#if PT_NLEVELS == 3
402#define pmd_offset(dir,address) \ 403#define pmd_offset(dir,address) \
403((pmd_t *) pgd_page(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) 404((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
404#else 405#else
405#define pmd_offset(dir,addr) ((pmd_t *) dir) 406#define pmd_offset(dir,addr) ((pmd_t *) dir)
406#endif 407#endif
@@ -408,7 +409,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
408/* Find an entry in the third-level page table.. */ 409/* Find an entry in the third-level page table.. */
409#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) 410#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
410#define pte_offset_kernel(pmd, address) \ 411#define pte_offset_kernel(pmd, address) \
411 ((pte_t *) pmd_page_kernel(*(pmd)) + pte_index(address)) 412 ((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address))
412#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) 413#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
413#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address) 414#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address)
414#define pte_unmap(pte) do { } while (0) 415#define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h
index e7036155672e..345d9b07b3e2 100644
--- a/include/asm-powerpc/pgtable-4k.h
+++ b/include/asm-powerpc/pgtable-4k.h
@@ -88,10 +88,11 @@
88#define pgd_bad(pgd) (pgd_val(pgd) == 0) 88#define pgd_bad(pgd) (pgd_val(pgd) == 0)
89#define pgd_present(pgd) (pgd_val(pgd) != 0) 89#define pgd_present(pgd) (pgd_val(pgd) != 0)
90#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) 90#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
91#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) 91#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
92#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd))
92 93
93#define pud_offset(pgdp, addr) \ 94#define pud_offset(pgdp, addr) \
94 (((pud_t *) pgd_page(*(pgdp))) + \ 95 (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
95 (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) 96 (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
96 97
97#define pud_ERROR(e) \ 98#define pud_ERROR(e) \
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
index 8dbf5ad8150f..10f52743f4ff 100644
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -196,8 +196,8 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
196 || (pmd_val(pmd) & PMD_BAD_BITS)) 196 || (pmd_val(pmd) & PMD_BAD_BITS))
197#define pmd_present(pmd) (pmd_val(pmd) != 0) 197#define pmd_present(pmd) (pmd_val(pmd) != 0)
198#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) 198#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
199#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) 199#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
200#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) 200#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
201 201
202#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) 202#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
203#define pud_none(pud) (!pud_val(pud)) 203#define pud_none(pud) (!pud_val(pud))
@@ -205,7 +205,8 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
205 || (pud_val(pud) & PUD_BAD_BITS)) 205 || (pud_val(pud) & PUD_BAD_BITS))
206#define pud_present(pud) (pud_val(pud) != 0) 206#define pud_present(pud) (pud_val(pud) != 0)
207#define pud_clear(pudp) (pud_val(*(pudp)) = 0) 207#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
208#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS) 208#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
209#define pud_page(pud) virt_to_page(pud_page_vaddr(pud))
209 210
210#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) 211#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
211 212
@@ -219,10 +220,10 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
219#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) 220#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
220 221
221#define pmd_offset(pudp,addr) \ 222#define pmd_offset(pudp,addr) \
222 (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) 223 (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
223 224
224#define pte_offset_kernel(dir,addr) \ 225#define pte_offset_kernel(dir,addr) \
225 (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) 226 (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
226 227
227#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) 228#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
228#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) 229#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h
index 51fa7c662917..b1fdbf40dba2 100644
--- a/include/asm-ppc/pgtable.h
+++ b/include/asm-ppc/pgtable.h
@@ -526,7 +526,7 @@ static inline int pgd_bad(pgd_t pgd) { return 0; }
526static inline int pgd_present(pgd_t pgd) { return 1; } 526static inline int pgd_present(pgd_t pgd) { return 1; }
527#define pgd_clear(xp) do { } while (0) 527#define pgd_clear(xp) do { } while (0)
528 528
529#define pgd_page(pgd) \ 529#define pgd_page_vaddr(pgd) \
530 ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) 530 ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
531 531
532/* 532/*
@@ -720,12 +720,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
720 * of the pte page. -- paulus 720 * of the pte page. -- paulus
721 */ 721 */
722#ifndef CONFIG_BOOKE 722#ifndef CONFIG_BOOKE
723#define pmd_page_kernel(pmd) \ 723#define pmd_page_vaddr(pmd) \
724 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 724 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
725#define pmd_page(pmd) \ 725#define pmd_page(pmd) \
726 (mem_map + (pmd_val(pmd) >> PAGE_SHIFT)) 726 (mem_map + (pmd_val(pmd) >> PAGE_SHIFT))
727#else 727#else
728#define pmd_page_kernel(pmd) \ 728#define pmd_page_vaddr(pmd) \
729 ((unsigned long) (pmd_val(pmd) & PAGE_MASK)) 729 ((unsigned long) (pmd_val(pmd) & PAGE_MASK))
730#define pmd_page(pmd) \ 730#define pmd_page(pmd) \
731 (mem_map + (__pa(pmd_val(pmd)) >> PAGE_SHIFT)) 731 (mem_map + (__pa(pmd_val(pmd)) >> PAGE_SHIFT))
@@ -748,7 +748,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
748#define pte_index(address) \ 748#define pte_index(address) \
749 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 749 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
750#define pte_offset_kernel(dir, addr) \ 750#define pte_offset_kernel(dir, addr) \
751 ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr)) 751 ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
752#define pte_offset_map(dir, addr) \ 752#define pte_offset_map(dir, addr) \
753 ((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE0) + pte_index(addr)) 753 ((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE0) + pte_index(addr))
754#define pte_offset_map_nested(dir, addr) \ 754#define pte_offset_map_nested(dir, addr) \
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index 28b3517e787c..495ad99c7635 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -15,18 +15,20 @@
15 */ 15 */
16#if defined(__s390x__) && defined(MODULE) 16#if defined(__s390x__) && defined(MODULE)
17 17
18#define __reloc_hide(var,offset) \ 18#define __reloc_hide(var,offset) (*({ \
19 (*({ unsigned long *__ptr; \ 19 extern int simple_indentifier_##var(void); \
20 asm ( "larl %0,per_cpu__"#var"@GOTENT" \ 20 unsigned long *__ptr; \
21 : "=a" (__ptr) : "X" (per_cpu__##var) ); \ 21 asm ( "larl %0,per_cpu__"#var"@GOTENT" \
22 (typeof(&per_cpu__##var))((*__ptr) + (offset)); })) 22 : "=a" (__ptr) : "X" (per_cpu__##var) ); \
23 (typeof(&per_cpu__##var))((*__ptr) + (offset)); }))
23 24
24#else 25#else
25 26
26#define __reloc_hide(var, offset) \ 27#define __reloc_hide(var, offset) (*({ \
27 (*({ unsigned long __ptr; \ 28 extern int simple_indentifier_##var(void); \
28 asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \ 29 unsigned long __ptr; \
29 (typeof(&per_cpu__##var)) (__ptr + (offset)); })) 30 asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \
31 (typeof(&per_cpu__##var)) (__ptr + (offset)); }))
30 32
31#endif 33#endif
32 34
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 1a07028d575e..e965309fedac 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -664,11 +664,13 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
664#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) 664#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
665#define pte_page(x) pfn_to_page(pte_pfn(x)) 665#define pte_page(x) pfn_to_page(pte_pfn(x))
666 666
667#define pmd_page_kernel(pmd) (pmd_val(pmd) & PAGE_MASK) 667#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
668 668
669#define pmd_page(pmd) (mem_map+(pmd_val(pmd) >> PAGE_SHIFT)) 669#define pmd_page(pmd) (mem_map+(pmd_val(pmd) >> PAGE_SHIFT))
670 670
671#define pgd_page_kernel(pgd) (pgd_val(pgd) & PAGE_MASK) 671#define pgd_page_vaddr(pgd) (pgd_val(pgd) & PAGE_MASK)
672
673#define pgd_page(pgd) (mem_map+(pgd_val(pgd) >> PAGE_SHIFT))
672 674
673/* to find an entry in a page-table-directory */ 675/* to find an entry in a page-table-directory */
674#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) 676#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -690,14 +692,14 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
690/* Find an entry in the second-level page table.. */ 692/* Find an entry in the second-level page table.. */
691#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) 693#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
692#define pmd_offset(dir,addr) \ 694#define pmd_offset(dir,addr) \
693 ((pmd_t *) pgd_page_kernel(*(dir)) + pmd_index(addr)) 695 ((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(addr))
694 696
695#endif /* __s390x__ */ 697#endif /* __s390x__ */
696 698
697/* Find an entry in the third-level page table.. */ 699/* Find an entry in the third-level page table.. */
698#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) 700#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
699#define pte_offset_kernel(pmd, address) \ 701#define pte_offset_kernel(pmd, address) \
700 ((pte_t *) pmd_page_kernel(*(pmd)) + pte_index(address)) 702 ((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address))
701#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) 703#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
702#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address) 704#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address)
703#define pte_unmap(pte) do { } while (0) 705#define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index a3a4e5fd30d7..578c2209fa76 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -337,6 +337,8 @@ struct notifier_block;
337int register_idle_notifier(struct notifier_block *nb); 337int register_idle_notifier(struct notifier_block *nb);
338int unregister_idle_notifier(struct notifier_block *nb); 338int unregister_idle_notifier(struct notifier_block *nb);
339 339
340#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
341
340#endif 342#endif
341 343
342/* 344/*
diff --git a/include/asm-sh/pgtable-2level.h b/include/asm-sh/pgtable-2level.h
index b0528aa3cb1f..b525db6f61c6 100644
--- a/include/asm-sh/pgtable-2level.h
+++ b/include/asm-sh/pgtable-2level.h
@@ -50,9 +50,12 @@ static inline void pgd_clear (pgd_t * pgdp) { }
50#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) 50#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
51#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) 51#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval)
52 52
53#define pgd_page(pgd) \ 53#define pgd_page_vaddr(pgd) \
54((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) 54((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
55 55
56#define pgd_page(pgd) \
57 (phys_to_page(pgd_val(pgd)))
58
56static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) 59static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
57{ 60{
58 return (pmd_t *) dir; 61 return (pmd_t *) dir;
diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h
index dcd23a03683d..40d41a78041e 100644
--- a/include/asm-sh/pgtable.h
+++ b/include/asm-sh/pgtable.h
@@ -225,7 +225,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot)
225static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 225static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
226{ set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; } 226{ set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; }
227 227
228#define pmd_page_kernel(pmd) \ 228#define pmd_page_vaddr(pmd) \
229((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 229((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
230 230
231#define pmd_page(pmd) \ 231#define pmd_page(pmd) \
@@ -242,7 +242,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
242#define pte_index(address) \ 242#define pte_index(address) \
243 ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 243 ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
244#define pte_offset_kernel(dir, address) \ 244#define pte_offset_kernel(dir, address) \
245 ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) 245 ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
246#define pte_offset_map(dir, address) pte_offset_kernel(dir, address) 246#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
247#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address) 247#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address)
248#define pte_unmap(pte) do { } while (0) 248#define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h
index 54c7821893f5..6b97c4cb1d64 100644
--- a/include/asm-sh64/pgtable.h
+++ b/include/asm-sh64/pgtable.h
@@ -190,7 +190,9 @@ static inline int pgd_bad(pgd_t pgd) { return 0; }
190#endif 190#endif
191 191
192 192
193#define pgd_page(pgd_entry) ((unsigned long) (pgd_val(pgd_entry) & PAGE_MASK)) 193#define pgd_page_vaddr(pgd_entry) ((unsigned long) (pgd_val(pgd_entry) & PAGE_MASK))
194#define pgd_page(pgd) (virt_to_page(pgd_val(pgd)))
195
194 196
195/* 197/*
196 * PMD defines. Middle level. 198 * PMD defines. Middle level.
@@ -219,7 +221,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
219#define pmd_none(pmd_entry) (pmd_val((pmd_entry)) == _PMD_EMPTY) 221#define pmd_none(pmd_entry) (pmd_val((pmd_entry)) == _PMD_EMPTY)
220#define pmd_bad(pmd_entry) ((pmd_val(pmd_entry) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) 222#define pmd_bad(pmd_entry) ((pmd_val(pmd_entry) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
221 223
222#define pmd_page_kernel(pmd_entry) \ 224#define pmd_page_vaddr(pmd_entry) \
223 ((unsigned long) __va(pmd_val(pmd_entry) & PAGE_MASK)) 225 ((unsigned long) __va(pmd_val(pmd_entry) & PAGE_MASK))
224 226
225#define pmd_page(pmd) \ 227#define pmd_page(pmd) \
diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h
index 226c6475c9a2..4f0a5ba0d6a0 100644
--- a/include/asm-sparc/pgtable.h
+++ b/include/asm-sparc/pgtable.h
@@ -143,10 +143,10 @@ extern unsigned long empty_zero_page;
143/* 143/*
144 */ 144 */
145BTFIXUPDEF_CALL_CONST(struct page *, pmd_page, pmd_t) 145BTFIXUPDEF_CALL_CONST(struct page *, pmd_page, pmd_t)
146BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page, pgd_t) 146BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page_vaddr, pgd_t)
147 147
148#define pmd_page(pmd) BTFIXUP_CALL(pmd_page)(pmd) 148#define pmd_page(pmd) BTFIXUP_CALL(pmd_page)(pmd)
149#define pgd_page(pgd) BTFIXUP_CALL(pgd_page)(pgd) 149#define pgd_page_vaddr(pgd) BTFIXUP_CALL(pgd_page_vaddr)(pgd)
150 150
151BTFIXUPDEF_SETHI(none_mask) 151BTFIXUPDEF_SETHI(none_mask)
152BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t) 152BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t)
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index ebfe395cfb87..b12be7a869f6 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -630,8 +630,9 @@ static inline unsigned long pte_present(pte_t pte)
630#define __pmd_page(pmd) \ 630#define __pmd_page(pmd) \
631 ((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL))) 631 ((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL)))
632#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) 632#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
633#define pud_page(pud) \ 633#define pud_page_vaddr(pud) \
634 ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL))) 634 ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL)))
635#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
635#define pmd_none(pmd) (!pmd_val(pmd)) 636#define pmd_none(pmd) (!pmd_val(pmd))
636#define pmd_bad(pmd) (0) 637#define pmd_bad(pmd) (0)
637#define pmd_present(pmd) (pmd_val(pmd) != 0U) 638#define pmd_present(pmd) (pmd_val(pmd) != 0U)
@@ -653,7 +654,7 @@ static inline unsigned long pte_present(pte_t pte)
653 654
654/* Find an entry in the second-level page table.. */ 655/* Find an entry in the second-level page table.. */
655#define pmd_offset(pudp, address) \ 656#define pmd_offset(pudp, address) \
656 ((pmd_t *) pud_page(*(pudp)) + \ 657 ((pmd_t *) pud_page_vaddr(*(pudp)) + \
657 (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))) 658 (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)))
658 659
659/* Find an entry in the third-level page table.. */ 660/* Find an entry in the third-level page table.. */
diff --git a/include/asm-um/pgtable-2level.h b/include/asm-um/pgtable-2level.h
index ffe017f6b64b..6050e0eb257e 100644
--- a/include/asm-um/pgtable-2level.h
+++ b/include/asm-um/pgtable-2level.h
@@ -41,7 +41,7 @@ static inline void pgd_mkuptodate(pgd_t pgd) { }
41#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) 41#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
42#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) 42#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
43 43
44#define pmd_page_kernel(pmd) \ 44#define pmd_page_vaddr(pmd) \
45 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 45 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
46 46
47/* 47/*
diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h
index 786c25727289..ca0c2a92a112 100644
--- a/include/asm-um/pgtable-3level.h
+++ b/include/asm-um/pgtable-3level.h
@@ -74,11 +74,12 @@ extern inline void pud_clear (pud_t *pud)
74 set_pud(pud, __pud(0)); 74 set_pud(pud, __pud(0));
75} 75}
76 76
77#define pud_page(pud) \ 77#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
78#define pud_page_vaddr(pud) \
78 ((struct page *) __va(pud_val(pud) & PAGE_MASK)) 79 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
79 80
80/* Find an entry in the second-level page table.. */ 81/* Find an entry in the second-level page table.. */
81#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ 82#define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \
82 pmd_index(address)) 83 pmd_index(address))
83 84
84static inline unsigned long pte_pfn(pte_t pte) 85static inline unsigned long pte_pfn(pte_t pte)
diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index ac64eb955868..4862daf8b906 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -349,7 +349,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
349 return pte; 349 return pte;
350} 350}
351 351
352#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 352#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
353 353
354/* 354/*
355 * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] 355 * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
@@ -389,7 +389,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
389 */ 389 */
390#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 390#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
391#define pte_offset_kernel(dir, address) \ 391#define pte_offset_kernel(dir, address) \
392 ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) 392 ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
393#define pte_offset_map(dir, address) \ 393#define pte_offset_map(dir, address) \
394 ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) 394 ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
395#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) 395#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
index 824c28896382..afa4fe1ca9f1 100644
--- a/include/asm-um/processor-generic.h
+++ b/include/asm-um/processor-generic.h
@@ -138,9 +138,7 @@ extern struct cpuinfo_um cpu_data[];
138 138
139#ifdef CONFIG_MODE_SKAS 139#ifdef CONFIG_MODE_SKAS
140#define KSTK_REG(tsk, reg) \ 140#define KSTK_REG(tsk, reg) \
141 ({ union uml_pt_regs regs; \ 141 get_thread_reg(reg, tsk->thread.mode.skas.switch_buf)
142 get_thread_regs(&regs, tsk->thread.mode.skas.switch_buf); \
143 UPT_REG(&regs, reg); })
144#else 142#else
145#define KSTK_REG(tsk, reg) (0xbadbabe) 143#define KSTK_REG(tsk, reg) (0xbadbabe)
146#endif 144#endif
diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h
index a36f5371b36b..99c87c5ce994 100644
--- a/include/asm-um/ptrace-generic.h
+++ b/include/asm-um/ptrace-generic.h
@@ -8,19 +8,7 @@
8 8
9#ifndef __ASSEMBLY__ 9#ifndef __ASSEMBLY__
10 10
11 11#include "asm/arch/ptrace-abi.h"
12#define pt_regs pt_regs_subarch
13#define show_regs show_regs_subarch
14#define send_sigtrap send_sigtrap_subarch
15
16#include "asm/arch/ptrace.h"
17
18#undef pt_regs
19#undef show_regs
20#undef send_sigtrap
21#undef user_mode
22#undef instruction_pointer
23
24#include "sysdep/ptrace.h" 12#include "sysdep/ptrace.h"
25 13
26struct pt_regs { 14struct pt_regs {
diff --git a/include/asm-um/ptrace-x86_64.h b/include/asm-um/ptrace-x86_64.h
index c894e68b1f96..2074483e6ca4 100644
--- a/include/asm-um/ptrace-x86_64.h
+++ b/include/asm-um/ptrace-x86_64.h
@@ -11,15 +11,11 @@
11#include "asm/errno.h" 11#include "asm/errno.h"
12#include "asm/host_ldt.h" 12#include "asm/host_ldt.h"
13 13
14#define signal_fault signal_fault_x86_64
15#define __FRAME_OFFSETS /* Needed to get the R* macros */ 14#define __FRAME_OFFSETS /* Needed to get the R* macros */
16#include "asm/ptrace-generic.h" 15#include "asm/ptrace-generic.h"
17#undef signal_fault
18 16
19#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 17#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
20 18
21void signal_fault(struct pt_regs_subarch *regs, void *frame, char *where);
22
23#define FS_BASE (21 * sizeof(unsigned long)) 19#define FS_BASE (21 * sizeof(unsigned long))
24#define GS_BASE (22 * sizeof(unsigned long)) 20#define GS_BASE (22 * sizeof(unsigned long))
25#define DS (23 * sizeof(unsigned long)) 21#define DS (23 * sizeof(unsigned long))
diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild
index 40f2f13fe174..1ee9b07f3fe6 100644
--- a/include/asm-x86_64/Kbuild
+++ b/include/asm-x86_64/Kbuild
@@ -11,6 +11,7 @@ header-y += debugreg.h
11header-y += ldt.h 11header-y += ldt.h
12header-y += msr.h 12header-y += msr.h
13header-y += prctl.h 13header-y += prctl.h
14header-y += ptrace-abi.h
14header-y += setup.h 15header-y += setup.h
15header-y += sigcontext32.h 16header-y += sigcontext32.h
16header-y += ucontext.h 17header-y += ucontext.h
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h
index 670a3388e70a..f65674832318 100644
--- a/include/asm-x86_64/e820.h
+++ b/include/asm-x86_64/e820.h
@@ -46,6 +46,7 @@ extern void setup_memory_region(void);
46extern void contig_e820_setup(void); 46extern void contig_e820_setup(void);
47extern unsigned long e820_end_of_ram(void); 47extern unsigned long e820_end_of_ram(void);
48extern void e820_reserve_resources(void); 48extern void e820_reserve_resources(void);
49extern void e820_mark_nosave_regions(void);
49extern void e820_print_map(char *who); 50extern void e820_print_map(char *who);
50extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); 51extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
51extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); 52extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 08dd9f9dda81..bffb2f886a51 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -21,9 +21,15 @@
21 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name 21 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
22 22
23/* var is in discarded region: offset to particular copy we want */ 23/* var is in discarded region: offset to particular copy we want */
24#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) 24#define per_cpu(var, cpu) (*({ \
25#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) 25 extern int simple_indentifier_##var(void); \
26#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) 26 RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); }))
27#define __get_cpu_var(var) (*({ \
28 extern int simple_indentifier_##var(void); \
29 RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
30#define __raw_get_cpu_var(var) (*({ \
31 extern int simple_indentifier_##var(void); \
32 RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
27 33
28/* A macro to avoid #include hell... */ 34/* A macro to avoid #include hell... */
29#define percpu_modcopy(pcpudst, src, size) \ 35#define percpu_modcopy(pcpudst, src, size) \
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index a31ab4e68a9b..51eba2395171 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -101,9 +101,6 @@ static inline void pgd_clear (pgd_t * pgd)
101 set_pgd(pgd, __pgd(0)); 101 set_pgd(pgd, __pgd(0));
102} 102}
103 103
104#define pud_page(pud) \
105((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
106
107#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0)) 104#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0))
108 105
109struct mm_struct; 106struct mm_struct;
@@ -326,7 +323,8 @@ static inline int pmd_large(pmd_t pte) {
326/* 323/*
327 * Level 4 access. 324 * Level 4 access.
328 */ 325 */
329#define pgd_page(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK)) 326#define pgd_page_vaddr(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK))
327#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
330#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) 328#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
331#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) 329#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
332#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address)) 330#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
@@ -335,16 +333,18 @@ static inline int pmd_large(pmd_t pte) {
335 333
336/* PUD - Level3 access */ 334/* PUD - Level3 access */
337/* to find an entry in a page-table-directory. */ 335/* to find an entry in a page-table-directory. */
336#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
337#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
338#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) 338#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
339#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) 339#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
340#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT) 340#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT)
341 341
342/* PMD - Level 2 access */ 342/* PMD - Level 2 access */
343#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) 343#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
344#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) 344#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
345 345
346#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) 346#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
347#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \ 347#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
348 pmd_index(address)) 348 pmd_index(address))
349#define pmd_none(x) (!pmd_val(x)) 349#define pmd_none(x) (!pmd_val(x))
350#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) 350#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
@@ -382,7 +382,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
382 382
383#define pte_index(address) \ 383#define pte_index(address) \
384 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 384 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
385#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \ 385#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
386 pte_index(address)) 386 pte_index(address))
387 387
388/* x86-64 always has all page tables mapped. */ 388/* x86-64 always has all page tables mapped. */
diff --git a/include/asm-x86_64/ptrace-abi.h b/include/asm-x86_64/ptrace-abi.h
new file mode 100644
index 000000000000..19184b0806b1
--- /dev/null
+++ b/include/asm-x86_64/ptrace-abi.h
@@ -0,0 +1,51 @@
1#ifndef _X86_64_PTRACE_ABI_H
2#define _X86_64_PTRACE_ABI_H
3
4#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
5#define R15 0
6#define R14 8
7#define R13 16
8#define R12 24
9#define RBP 32
10#define RBX 40
11/* arguments: interrupts/non tracing syscalls only save upto here*/
12#define R11 48
13#define R10 56
14#define R9 64
15#define R8 72
16#define RAX 80
17#define RCX 88
18#define RDX 96
19#define RSI 104
20#define RDI 112
21#define ORIG_RAX 120 /* = ERROR */
22/* end of arguments */
23/* cpu exception frame or undefined in case of fast syscall. */
24#define RIP 128
25#define CS 136
26#define EFLAGS 144
27#define RSP 152
28#define SS 160
29#define ARGOFFSET R11
30#endif /* __ASSEMBLY__ */
31
32/* top of stack page */
33#define FRAME_SIZE 168
34
35#define PTRACE_OLDSETOPTIONS 21
36
37/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
38#define PTRACE_GETREGS 12
39#define PTRACE_SETREGS 13
40#define PTRACE_GETFPREGS 14
41#define PTRACE_SETFPREGS 15
42#define PTRACE_GETFPXREGS 18
43#define PTRACE_SETFPXREGS 19
44
45/* only useful for access 32bit programs */
46#define PTRACE_GET_THREAD_AREA 25
47#define PTRACE_SET_THREAD_AREA 26
48
49#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */
50
51#endif
diff --git a/include/asm-x86_64/ptrace.h b/include/asm-x86_64/ptrace.h
index ca6f15ff61d4..ab827dc381d7 100644
--- a/include/asm-x86_64/ptrace.h
+++ b/include/asm-x86_64/ptrace.h
@@ -1,40 +1,9 @@
1#ifndef _X86_64_PTRACE_H 1#ifndef _X86_64_PTRACE_H
2#define _X86_64_PTRACE_H 2#define _X86_64_PTRACE_H
3 3
4#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) 4#include <asm/ptrace-abi.h>
5#define R15 0
6#define R14 8
7#define R13 16
8#define R12 24
9#define RBP 32
10#define RBX 40
11/* arguments: interrupts/non tracing syscalls only save upto here*/
12#define R11 48
13#define R10 56
14#define R9 64
15#define R8 72
16#define RAX 80
17#define RCX 88
18#define RDX 96
19#define RSI 104
20#define RDI 112
21#define ORIG_RAX 120 /* = ERROR */
22/* end of arguments */
23/* cpu exception frame or undefined in case of fast syscall. */
24#define RIP 128
25#define CS 136
26#define EFLAGS 144
27#define RSP 152
28#define SS 160
29#define ARGOFFSET R11
30#endif /* __ASSEMBLY__ */
31 5
32/* top of stack page */ 6#ifndef __ASSEMBLY__
33#define FRAME_SIZE 168
34
35#define PTRACE_OLDSETOPTIONS 21
36
37#ifndef __ASSEMBLY__
38 7
39struct pt_regs { 8struct pt_regs {
40 unsigned long r15; 9 unsigned long r15;
@@ -45,7 +14,7 @@ struct pt_regs {
45 unsigned long rbx; 14 unsigned long rbx;
46/* arguments: non interrupts/non tracing syscalls only save upto here*/ 15/* arguments: non interrupts/non tracing syscalls only save upto here*/
47 unsigned long r11; 16 unsigned long r11;
48 unsigned long r10; 17 unsigned long r10;
49 unsigned long r9; 18 unsigned long r9;
50 unsigned long r8; 19 unsigned long r8;
51 unsigned long rax; 20 unsigned long rax;
@@ -54,32 +23,18 @@ struct pt_regs {
54 unsigned long rsi; 23 unsigned long rsi;
55 unsigned long rdi; 24 unsigned long rdi;
56 unsigned long orig_rax; 25 unsigned long orig_rax;
57/* end of arguments */ 26/* end of arguments */
58/* cpu exception frame or undefined */ 27/* cpu exception frame or undefined */
59 unsigned long rip; 28 unsigned long rip;
60 unsigned long cs; 29 unsigned long cs;
61 unsigned long eflags; 30 unsigned long eflags;
62 unsigned long rsp; 31 unsigned long rsp;
63 unsigned long ss; 32 unsigned long ss;
64/* top of stack page */ 33/* top of stack page */
65}; 34};
66 35
67#endif 36#endif
68 37
69/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
70#define PTRACE_GETREGS 12
71#define PTRACE_SETREGS 13
72#define PTRACE_GETFPREGS 14
73#define PTRACE_SETFPREGS 15
74#define PTRACE_GETFPXREGS 18
75#define PTRACE_SETFPXREGS 19
76
77/* only useful for access 32bit programs */
78#define PTRACE_GET_THREAD_AREA 25
79#define PTRACE_SET_THREAD_AREA 26
80
81#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */
82
83#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 38#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
84#define user_mode(regs) (!!((regs)->cs & 3)) 39#define user_mode(regs) (!!((regs)->cs & 3))
85#define user_mode_vm(regs) user_mode(regs) 40#define user_mode_vm(regs) user_mode(regs)
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 6805e1feb300..ce97f65e1d10 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -48,8 +48,6 @@ extern void unlock_ipi_call_lock(void);
48extern int smp_num_siblings; 48extern int smp_num_siblings;
49extern void smp_send_reschedule(int cpu); 49extern void smp_send_reschedule(int cpu);
50void smp_stop_cpu(void); 50void smp_stop_cpu(void);
51extern int smp_call_function_single(int cpuid, void (*func) (void *info),
52 void *info, int retry, int wait);
53 51
54extern cpumask_t cpu_sibling_map[NR_CPUS]; 52extern cpumask_t cpu_sibling_map[NR_CPUS];
55extern cpumask_t cpu_core_map[NR_CPUS]; 53extern cpumask_t cpu_core_map[NR_CPUS];
diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h
index 7b15afb70c56..a47cc734c20c 100644
--- a/include/asm-xtensa/pgtable.h
+++ b/include/asm-xtensa/pgtable.h
@@ -218,7 +218,7 @@ extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)];
218/* 218/*
219 * The pmd contains the kernel virtual address of the pte page. 219 * The pmd contains the kernel virtual address of the pte page.
220 */ 220 */
221#define pmd_page_kernel(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK)) 221#define pmd_page_vaddr(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK))
222#define pmd_page(pmd) virt_to_page(pmd_val(pmd)) 222#define pmd_page(pmd) virt_to_page(pmd_val(pmd))
223 223
224/* 224/*
@@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
349/* Find an entry in the third-level page table.. */ 349/* Find an entry in the third-level page table.. */
350#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 350#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
351#define pte_offset_kernel(dir,addr) \ 351#define pte_offset_kernel(dir,addr) \
352 ((pte_t*) pmd_page_kernel(*(dir)) + pte_index(addr)) 352 ((pte_t*) pmd_page_vaddr(*(dir)) + pte_index(addr))
353#define pte_offset_map(dir,addr) pte_offset_kernel((dir),(addr)) 353#define pte_offset_map(dir,addr) pte_offset_kernel((dir),(addr))
354#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir),(addr)) 354#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir),(addr))
355 355
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index e319c649e4fd..31e9abb6d977 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -4,11 +4,8 @@
4#ifndef _LINUX_BOOTMEM_H 4#ifndef _LINUX_BOOTMEM_H
5#define _LINUX_BOOTMEM_H 5#define _LINUX_BOOTMEM_H
6 6
7#include <asm/pgtable.h>
8#include <asm/dma.h>
9#include <linux/cache.h>
10#include <linux/init.h>
11#include <linux/mmzone.h> 7#include <linux/mmzone.h>
8#include <asm/dma.h>
12 9
13/* 10/*
14 * simple boot-time physical memory area allocator. 11 * simple boot-time physical memory area allocator.
@@ -41,45 +38,64 @@ typedef struct bootmem_data {
41 struct list_head list; 38 struct list_head list;
42} bootmem_data_t; 39} bootmem_data_t;
43 40
44extern unsigned long __init bootmem_bootmap_pages (unsigned long); 41extern unsigned long bootmem_bootmap_pages(unsigned long);
45extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); 42extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
46extern void __init free_bootmem (unsigned long addr, unsigned long size); 43extern void free_bootmem(unsigned long addr, unsigned long size);
47extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); 44extern void *__alloc_bootmem(unsigned long size,
48extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal); 45 unsigned long align,
49extern void * __init __alloc_bootmem_low(unsigned long size, 46 unsigned long goal);
50 unsigned long align, 47extern void *__alloc_bootmem_nopanic(unsigned long size,
51 unsigned long goal); 48 unsigned long align,
52extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, 49 unsigned long goal);
53 unsigned long size, 50extern void *__alloc_bootmem_low(unsigned long size,
54 unsigned long align, 51 unsigned long align,
55 unsigned long goal); 52 unsigned long goal);
56extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata, 53extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
57 unsigned long size, unsigned long align, unsigned long goal, 54 unsigned long size,
58 unsigned long limit); 55 unsigned long align,
56 unsigned long goal);
57extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
58 unsigned long size,
59 unsigned long align,
60 unsigned long goal,
61 unsigned long limit);
62
59#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE 63#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
60extern void __init reserve_bootmem (unsigned long addr, unsigned long size); 64extern void reserve_bootmem(unsigned long addr, unsigned long size);
61#define alloc_bootmem(x) \ 65#define alloc_bootmem(x) \
62 __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) 66 __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
63#define alloc_bootmem_low(x) \ 67#define alloc_bootmem_low(x) \
64 __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0) 68 __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
65#define alloc_bootmem_pages(x) \ 69#define alloc_bootmem_pages(x) \
66 __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 70 __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
67#define alloc_bootmem_low_pages(x) \ 71#define alloc_bootmem_low_pages(x) \
68 __alloc_bootmem_low((x), PAGE_SIZE, 0) 72 __alloc_bootmem_low(x, PAGE_SIZE, 0)
69#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 73#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
70extern unsigned long __init free_all_bootmem (void); 74
71extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); 75extern unsigned long free_all_bootmem(void);
72extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); 76extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
73extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); 77extern void *__alloc_bootmem_node(pg_data_t *pgdat,
74extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); 78 unsigned long size,
75extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); 79 unsigned long align,
80 unsigned long goal);
81extern unsigned long init_bootmem_node(pg_data_t *pgdat,
82 unsigned long freepfn,
83 unsigned long startpfn,
84 unsigned long endpfn);
85extern void reserve_bootmem_node(pg_data_t *pgdat,
86 unsigned long physaddr,
87 unsigned long size);
88extern void free_bootmem_node(pg_data_t *pgdat,
89 unsigned long addr,
90 unsigned long size);
91
76#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE 92#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
77#define alloc_bootmem_node(pgdat, x) \ 93#define alloc_bootmem_node(pgdat, x) \
78 __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) 94 __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
79#define alloc_bootmem_pages_node(pgdat, x) \ 95#define alloc_bootmem_pages_node(pgdat, x) \
80 __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 96 __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
81#define alloc_bootmem_low_pages_node(pgdat, x) \ 97#define alloc_bootmem_low_pages_node(pgdat, x) \
82 __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0) 98 __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
83#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 99#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
84 100
85#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP 101#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
@@ -89,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size)
89{ 105{
90 return NULL; 106 return NULL;
91} 107}
92#endif 108#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
93 109
94extern unsigned long __meminitdata nr_kernel_pages; 110extern unsigned long __meminitdata nr_kernel_pages;
95extern unsigned long nr_all_pages; 111extern unsigned long nr_all_pages;
96 112
97extern void *__init alloc_large_system_hash(const char *tablename, 113extern void *alloc_large_system_hash(const char *tablename,
98 unsigned long bucketsize, 114 unsigned long bucketsize,
99 unsigned long numentries, 115 unsigned long numentries,
100 int scale, 116 int scale,
101 int flags, 117 int flags,
102 unsigned int *_hash_shift, 118 unsigned int *_hash_shift,
103 unsigned int *_hash_mask, 119 unsigned int *_hash_mask,
104 unsigned long limit); 120 unsigned long limit);
105 121
106#define HASH_HIGHMEM 0x00000001 /* Consider highmem? */ 122#define HASH_HIGHMEM 0x00000001 /* Consider highmem? */
107#define HASH_EARLY 0x00000002 /* Allocating during early boot? */ 123#define HASH_EARLY 0x00000002 /* Allocating during early boot? */
diff --git a/include/linux/console.h b/include/linux/console.h
index 3bdf2155e565..76a1807726eb 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -120,9 +120,14 @@ extern void console_stop(struct console *);
120extern void console_start(struct console *); 120extern void console_start(struct console *);
121extern int is_console_locked(void); 121extern int is_console_locked(void);
122 122
123#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
123/* Suspend and resume console messages over PM events */ 124/* Suspend and resume console messages over PM events */
124extern void suspend_console(void); 125extern void suspend_console(void);
125extern void resume_console(void); 126extern void resume_console(void);
127#else
128static inline void suspend_console(void) {}
129static inline void resume_console(void) {}
130#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
126 131
127/* Some debug stub to catch some of the obvious races in the VT code */ 132/* Some debug stub to catch some of the obvious races in the VT code */
128#if 1 133#if 1
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8fb344a9abd8..3fef7d67aedc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
89static inline int cpu_is_offline(int cpu) { return 0; } 89static inline int cpu_is_offline(int cpu) { return 0; }
90#endif 90#endif
91 91
92#ifdef CONFIG_SUSPEND_SMP
93extern int disable_nonboot_cpus(void);
94extern void enable_nonboot_cpus(void);
95#else
96static inline int disable_nonboot_cpus(void) { return 0; }
97static inline void enable_nonboot_cpus(void) {}
98#endif
99
92#endif /* _LINUX_CPU_H_ */ 100#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 2d7671c92c0b..d6f4ec467a4b 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -169,6 +169,12 @@ enum {
169 DCCPO_MAX_CCID_SPECIFIC = 255, 169 DCCPO_MAX_CCID_SPECIFIC = 255,
170}; 170};
171 171
172/* DCCP CCIDS */
173enum {
174 DCCPC_CCID2 = 2,
175 DCCPC_CCID3 = 3,
176};
177
172/* DCCP features */ 178/* DCCP features */
173enum { 179enum {
174 DCCPF_RESERVED = 0, 180 DCCPF_RESERVED = 0,
@@ -320,7 +326,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
320/* initial values for each feature */ 326/* initial values for each feature */
321#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 327#define DCCPF_INITIAL_SEQUENCE_WINDOW 100
322#define DCCPF_INITIAL_ACK_RATIO 2 328#define DCCPF_INITIAL_ACK_RATIO 2
323#define DCCPF_INITIAL_CCID 2 329#define DCCPF_INITIAL_CCID DCCPC_CCID2
324#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 330#define DCCPF_INITIAL_SEND_ACK_VECTOR 1
325/* FIXME: for now we're default to 1 but it should really be 0 */ 331/* FIXME: for now we're default to 1 but it should really be 0 */
326#define DCCPF_INITIAL_SEND_NDP_COUNT 1 332#define DCCPF_INITIAL_SEND_NDP_COUNT 1
@@ -404,6 +410,7 @@ struct dccp_service_list {
404}; 410};
405 411
406#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) 412#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
413#define DCCP_SERVICE_CODE_IS_ABSENT 0
407 414
408static inline int dccp_list_has_service(const struct dccp_service_list *sl, 415static inline int dccp_list_has_service(const struct dccp_service_list *sl,
409 const __be32 service) 416 const __be32 service)
@@ -484,11 +491,6 @@ static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
484 return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; 491 return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
485} 492}
486 493
487static inline int dccp_service_not_initialized(const struct sock *sk)
488{
489 return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
490}
491
492static inline const char *dccp_role(const struct sock *sk) 494static inline const char *dccp_role(const struct sock *sk)
493{ 495{
494 switch (dccp_sk(sk)->dccps_role) { 496 switch (dccp_sk(sk)->dccps_role) {
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 6a5796c81c90..666e0a5f00fc 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -31,6 +31,7 @@
31#define EM_M32R 88 /* Renesas M32R */ 31#define EM_M32R 88 /* Renesas M32R */
32#define EM_H8_300 46 /* Renesas H8/300,300H,H8S */ 32#define EM_H8_300 46 /* Renesas H8/300,300H,H8S */
33#define EM_FRV 0x5441 /* Fujitsu FR-V */ 33#define EM_FRV 0x5441 /* Fujitsu FR-V */
34#define EM_AVR32 0x18ad /* Atmel AVR32 */
34 35
35/* 36/*
36 * This is an interim value that we will use until the committee comes 37 * This is an interim value that we will use until the committee comes
diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
new file mode 100644
index 000000000000..67396db141e8
--- /dev/null
+++ b/include/linux/elfnote.h
@@ -0,0 +1,90 @@
1#ifndef _LINUX_ELFNOTE_H
2#define _LINUX_ELFNOTE_H
3/*
4 * Helper macros to generate ELF Note structures, which are put into a
5 * PT_NOTE segment of the final vmlinux image. These are useful for
6 * including name-value pairs of metadata into the kernel binary (or
7 * modules?) for use by external programs.
8 *
9 * Each note has three parts: a name, a type and a desc. The name is
10 * intended to distinguish the note's originator, so it would be a
11 * company, project, subsystem, etc; it must be in a suitable form for
12 * use in a section name. The type is an integer which is used to tag
13 * the data, and is considered to be within the "name" namespace (so
14 * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
15 * "desc" field is the actual data. There are no constraints on the
16 * desc field's contents, though typically they're fairly small.
17 *
18 * All notes from a given NAME are put into a section named
19 * .note.NAME. When the kernel image is finally linked, all the notes
20 * are packed into a single .notes section, which is mapped into the
21 * PT_NOTE segment. Because notes for a given name are grouped into
22 * the same section, they'll all be adjacent the output file.
23 *
24 * This file defines macros for both C and assembler use. Their
25 * syntax is slightly different, but they're semantically similar.
26 *
27 * See the ELF specification for more detail about ELF notes.
28 */
29
30#ifdef __ASSEMBLER__
31/*
32 * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
33 * turn out to be the same size and shape), followed by the name and
34 * desc data with appropriate padding. The 'desctype' argument is the
35 * assembler pseudo op defining the type of the data e.g. .asciz while
36 * 'descdata' is the data itself e.g. "hello, world".
37 *
38 * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
39 * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
40 */
41#define ELFNOTE(name, type, desctype, descdata) \
42.pushsection .note.name ; \
43 .align 4 ; \
44 .long 2f - 1f /* namesz */ ; \
45 .long 4f - 3f /* descsz */ ; \
46 .long type ; \
471:.asciz "name" ; \
482:.align 4 ; \
493:desctype descdata ; \
504:.align 4 ; \
51.popsection ;
52#else /* !__ASSEMBLER__ */
53#include <linux/elf.h>
54/*
55 * Use an anonymous structure which matches the shape of
56 * Elf{32,64}_Nhdr, but includes the name and desc data. The size and
57 * type of name and desc depend on the macro arguments. "name" must
58 * be a literal string, and "desc" must be passed by value. You may
59 * only define one note per line, since __LINE__ is used to generate
60 * unique symbols.
61 */
62#define _ELFNOTE_PASTE(a,b) a##b
63#define _ELFNOTE(size, name, unique, type, desc) \
64 static const struct { \
65 struct elf##size##_note _nhdr; \
66 unsigned char _name[sizeof(name)] \
67 __attribute__((aligned(sizeof(Elf##size##_Word)))); \
68 typeof(desc) _desc \
69 __attribute__((aligned(sizeof(Elf##size##_Word)))); \
70 } _ELFNOTE_PASTE(_note_, unique) \
71 __attribute_used__ \
72 __attribute__((section(".note." name), \
73 aligned(sizeof(Elf##size##_Word)), \
74 unused)) = { \
75 { \
76 sizeof(name), \
77 sizeof(desc), \
78 type, \
79 }, \
80 name, \
81 desc \
82 }
83#define ELFNOTE(size, name, type, desc) \
84 _ELFNOTE(size, name, __LINE__, type, desc)
85
86#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
87#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
88#endif /* __ASSEMBLER__ */
89
90#endif /* _LINUX_ELFNOTE_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cc9e60844484..8b34aabfe4c6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,17 +9,16 @@ struct vm_area_struct;
9 9
10/* 10/*
11 * GFP bitmasks.. 11 * GFP bitmasks..
12 *
13 * Zone modifiers (see linux/mmzone.h - low three bits)
14 *
15 * Do not put any conditional on these. If necessary modify the definitions
16 * without the underscores and use the consistently. The definitions here may
17 * be used in bit comparisons.
12 */ 18 */
13/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
14#define __GFP_DMA ((__force gfp_t)0x01u) 19#define __GFP_DMA ((__force gfp_t)0x01u)
15#define __GFP_HIGHMEM ((__force gfp_t)0x02u) 20#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
16#ifdef CONFIG_DMA_IS_DMA32 21#define __GFP_DMA32 ((__force gfp_t)0x04u)
17#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */
18#elif BITS_PER_LONG < 64
19#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */
20#else
21#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */
22#endif
23 22
24/* 23/*
25 * Action modifiers - doesn't change the zoning 24 * Action modifiers - doesn't change the zoning
@@ -46,6 +45,7 @@ struct vm_area_struct;
46#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ 45#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
47#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ 46#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
48#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ 47#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
48#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
49 49
50#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ 50#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
51#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 51#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +54,7 @@ struct vm_area_struct;
54#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ 54#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
55 __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ 55 __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
56 __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ 56 __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
57 __GFP_NOMEMALLOC|__GFP_HARDWALL) 57 __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
58 58
59/* This equals 0, but use constants in case they ever change */ 59/* This equals 0, but use constants in case they ever change */
60#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) 60#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
@@ -67,6 +67,8 @@ struct vm_area_struct;
67#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ 67#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
68 __GFP_HIGHMEM) 68 __GFP_HIGHMEM)
69 69
70#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
71
70/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some 72/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
71 platforms, used as appropriate on others */ 73 platforms, used as appropriate on others */
72 74
@@ -76,11 +78,19 @@ struct vm_area_struct;
76#define GFP_DMA32 __GFP_DMA32 78#define GFP_DMA32 __GFP_DMA32
77 79
78 80
79static inline int gfp_zone(gfp_t gfp) 81static inline enum zone_type gfp_zone(gfp_t flags)
80{ 82{
81 int zone = GFP_ZONEMASK & (__force int) gfp; 83 if (flags & __GFP_DMA)
82 BUG_ON(zone >= GFP_ZONETYPES); 84 return ZONE_DMA;
83 return zone; 85#ifdef CONFIG_ZONE_DMA32
86 if (flags & __GFP_DMA32)
87 return ZONE_DMA32;
88#endif
89#ifdef CONFIG_HIGHMEM
90 if (flags & __GFP_HIGHMEM)
91 return ZONE_HIGHMEM;
92#endif
93 return ZONE_NORMAL;
84} 94}
85 95
86/* 96/*
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 85ce7ef9a512..fd7d12daa94f 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -24,11 +24,15 @@ static inline void flush_kernel_dcache_page(struct page *page)
24 24
25/* declarations for linux/mm/highmem.c */ 25/* declarations for linux/mm/highmem.c */
26unsigned int nr_free_highpages(void); 26unsigned int nr_free_highpages(void);
27extern unsigned long totalhigh_pages;
27 28
28#else /* CONFIG_HIGHMEM */ 29#else /* CONFIG_HIGHMEM */
29 30
30static inline unsigned int nr_free_highpages(void) { return 0; } 31static inline unsigned int nr_free_highpages(void) { return 0; }
31 32
33#define totalhigh_pages 0
34
35#ifndef ARCH_HAS_KMAP
32static inline void *kmap(struct page *page) 36static inline void *kmap(struct page *page)
33{ 37{
34 might_sleep(); 38 might_sleep();
@@ -41,6 +45,7 @@ static inline void *kmap(struct page *page)
41#define kunmap_atomic(addr, idx) do { } while (0) 45#define kunmap_atomic(addr, idx) do { } while (0)
42#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) 46#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn))
43#define kmap_atomic_to_page(ptr) virt_to_page(ptr) 47#define kmap_atomic_to_page(ptr) virt_to_page(ptr)
48#endif
44 49
45#endif /* CONFIG_HIGHMEM */ 50#endif /* CONFIG_HIGHMEM */
46 51
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fbf6d901e9c2..48d3cb3b6a47 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
320 * Monolithic do_IRQ implementation. 320 * Monolithic do_IRQ implementation.
321 * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly) 321 * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
322 */ 322 */
323#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
323extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); 324extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
325#endif
324 326
325/* 327/*
326 * Architectures call this to let the generic IRQ layer 328 * Architectures call this to let the generic IRQ layer
@@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs)
332{ 334{
333 struct irq_desc *desc = irq_desc + irq; 335 struct irq_desc *desc = irq_desc + irq;
334 336
337#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
338 desc->handle_irq(irq, desc, regs);
339#else
335 if (likely(desc->handle_irq)) 340 if (likely(desc->handle_irq))
336 desc->handle_irq(irq, desc, regs); 341 desc->handle_irq(irq, desc, regs);
337 else 342 else
338 __do_IRQ(irq, regs); 343 __do_IRQ(irq, regs);
344#endif
339} 345}
340 346
341/* Handling of unhandled and spurious interrupts: */ 347/* Handling of unhandled and spurious interrupts: */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b2ae4fdce8b..e44a37e2c71c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@ extern const char linux_banner[];
33#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 33#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
34#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) 34#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
35#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) 35#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
36#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
36#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) 37#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
37 38
38#define KERN_EMERG "<0>" /* system is unusable */ 39#define KERN_EMERG "<0>" /* system is unusable */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 72440f0a443d..09f0f575ddff 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
162 unsigned long addr); 162 unsigned long addr);
163extern unsigned slab_node(struct mempolicy *policy); 163extern unsigned slab_node(struct mempolicy *policy);
164 164
165extern int policy_zone; 165extern enum zone_type policy_zone;
166 166
167static inline void check_highest_zone(int k) 167static inline void check_highest_zone(enum zone_type k)
168{ 168{
169 if (k > policy_zone) 169 if (k > policy_zone)
170 policy_zone = k; 170 policy_zone = k;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 224178a000d2..856f0ee7e84a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/debug_locks.h> 17#include <linux/debug_locks.h>
18#include <linux/backing-dev.h>
18 19
19struct mempolicy; 20struct mempolicy;
20struct anon_vma; 21struct anon_vma;
@@ -218,7 +219,8 @@ struct inode;
218 * Each physical page in the system has a struct page associated with 219 * Each physical page in the system has a struct page associated with
219 * it to keep track of whatever it is we are using the page for at the 220 * it to keep track of whatever it is we are using the page for at the
220 * moment. Note that we have no way to track which tasks are using 221 * moment. Note that we have no way to track which tasks are using
221 * a page. 222 * a page, though if it is a pagecache page, rmap structures can tell us
223 * who is mapping it.
222 */ 224 */
223struct page { 225struct page {
224 unsigned long flags; /* Atomic flags, some possibly 226 unsigned long flags; /* Atomic flags, some possibly
@@ -278,6 +280,12 @@ struct page {
278 */ 280 */
279#include <linux/page-flags.h> 281#include <linux/page-flags.h>
280 282
283#ifdef CONFIG_DEBUG_VM
284#define VM_BUG_ON(cond) BUG_ON(cond)
285#else
286#define VM_BUG_ON(condition) do { } while(0)
287#endif
288
281/* 289/*
282 * Methods to modify the page usage count. 290 * Methods to modify the page usage count.
283 * 291 *
@@ -292,12 +300,11 @@ struct page {
292 */ 300 */
293 301
294/* 302/*
295 * Drop a ref, return true if the logical refcount fell to zero (the page has 303 * Drop a ref, return true if the refcount fell to zero (the page has no users)
296 * no users)
297 */ 304 */
298static inline int put_page_testzero(struct page *page) 305static inline int put_page_testzero(struct page *page)
299{ 306{
300 BUG_ON(atomic_read(&page->_count) == 0); 307 VM_BUG_ON(atomic_read(&page->_count) == 0);
301 return atomic_dec_and_test(&page->_count); 308 return atomic_dec_and_test(&page->_count);
302} 309}
303 310
@@ -307,11 +314,10 @@ static inline int put_page_testzero(struct page *page)
307 */ 314 */
308static inline int get_page_unless_zero(struct page *page) 315static inline int get_page_unless_zero(struct page *page)
309{ 316{
317 VM_BUG_ON(PageCompound(page));
310 return atomic_inc_not_zero(&page->_count); 318 return atomic_inc_not_zero(&page->_count);
311} 319}
312 320
313extern void FASTCALL(__page_cache_release(struct page *));
314
315static inline int page_count(struct page *page) 321static inline int page_count(struct page *page)
316{ 322{
317 if (unlikely(PageCompound(page))) 323 if (unlikely(PageCompound(page)))
@@ -323,6 +329,7 @@ static inline void get_page(struct page *page)
323{ 329{
324 if (unlikely(PageCompound(page))) 330 if (unlikely(PageCompound(page)))
325 page = (struct page *)page_private(page); 331 page = (struct page *)page_private(page);
332 VM_BUG_ON(atomic_read(&page->_count) == 0);
326 atomic_inc(&page->_count); 333 atomic_inc(&page->_count);
327} 334}
328 335
@@ -349,43 +356,55 @@ void split_page(struct page *page, unsigned int order);
349 * For the non-reserved pages, page_count(page) denotes a reference count. 356 * For the non-reserved pages, page_count(page) denotes a reference count.
350 * page_count() == 0 means the page is free. page->lru is then used for 357 * page_count() == 0 means the page is free. page->lru is then used for
351 * freelist management in the buddy allocator. 358 * freelist management in the buddy allocator.
352 * page_count() == 1 means the page is used for exactly one purpose 359 * page_count() > 0 means the page has been allocated.
353 * (e.g. a private data page of one process). 360 *
361 * Pages are allocated by the slab allocator in order to provide memory
362 * to kmalloc and kmem_cache_alloc. In this case, the management of the
363 * page, and the fields in 'struct page' are the responsibility of mm/slab.c
364 * unless a particular usage is carefully commented. (the responsibility of
365 * freeing the kmalloc memory is the caller's, of course).
354 * 366 *
355 * A page may be used for kmalloc() or anyone else who does a 367 * A page may be used by anyone else who does a __get_free_page().
356 * __get_free_page(). In this case the page_count() is at least 1, and 368 * In this case, page_count still tracks the references, and should only
357 * all other fields are unused but should be 0 or NULL. The 369 * be used through the normal accessor functions. The top bits of page->flags
358 * management of this page is the responsibility of the one who uses 370 * and page->virtual store page management information, but all other fields
359 * it. 371 * are unused and could be used privately, carefully. The management of this
372 * page is the responsibility of the one who allocated it, and those who have
373 * subsequently been given references to it.
360 * 374 *
361 * The other pages (we may call them "process pages") are completely 375 * The other pages (we may call them "pagecache pages") are completely
362 * managed by the Linux memory manager: I/O, buffers, swapping etc. 376 * managed by the Linux memory manager: I/O, buffers, swapping etc.
363 * The following discussion applies only to them. 377 * The following discussion applies only to them.
364 * 378 *
365 * A page may belong to an inode's memory mapping. In this case, 379 * A pagecache page contains an opaque `private' member, which belongs to the
366 * page->mapping is the pointer to the inode, and page->index is the 380 * page's address_space. Usually, this is the address of a circular list of
367 * file offset of the page, in units of PAGE_CACHE_SIZE. 381 * the page's disk buffers. PG_private must be set to tell the VM to call
382 * into the filesystem to release these pages.
368 * 383 *
369 * A page contains an opaque `private' member, which belongs to the 384 * A page may belong to an inode's memory mapping. In this case, page->mapping
370 * page's address_space. Usually, this is the address of a circular 385 * is the pointer to the inode, and page->index is the file offset of the page,
371 * list of the page's disk buffers. 386 * in units of PAGE_CACHE_SIZE.
372 * 387 *
373 * For pages belonging to inodes, the page_count() is the number of 388 * If pagecache pages are not associated with an inode, they are said to be
374 * attaches, plus 1 if `private' contains something, plus one for 389 * anonymous pages. These may become associated with the swapcache, and in that
375 * the page cache itself. 390 * case PG_swapcache is set, and page->private is an offset into the swapcache.
376 * 391 *
377 * Instead of keeping dirty/clean pages in per address-space lists, we instead 392 * In either case (swapcache or inode backed), the pagecache itself holds one
378 * now tag pages as dirty/under writeback in the radix tree. 393 * reference to the page. Setting PG_private should also increment the
394 * refcount. The each user mapping also has a reference to the page.
379 * 395 *
380 * There is also a per-mapping radix tree mapping index to the page 396 * The pagecache pages are stored in a per-mapping radix tree, which is
381 * in memory if present. The tree is rooted at mapping->root. 397 * rooted at mapping->page_tree, and indexed by offset.
398 * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
399 * lists, we instead now tag pages as dirty/writeback in the radix tree.
382 * 400 *
383 * All process pages can do I/O: 401 * All pagecache pages may be subject to I/O:
384 * - inode pages may need to be read from disk, 402 * - inode pages may need to be read from disk,
385 * - inode pages which have been modified and are MAP_SHARED may need 403 * - inode pages which have been modified and are MAP_SHARED may need
386 * to be written to disk, 404 * to be written back to the inode on disk,
387 * - private pages which have been modified may need to be swapped out 405 * - anonymous pages (including MAP_PRIVATE file mappings) which have been
388 * to swap space and (later) to be read back into memory. 406 * modified may need to be swapped out to swap space and (later) to be read
407 * back into memory.
389 */ 408 */
390 409
391/* 410/*
@@ -463,7 +482,7 @@ void split_page(struct page *page, unsigned int order);
463#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) 482#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
464#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) 483#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1)
465 484
466static inline unsigned long page_zonenum(struct page *page) 485static inline enum zone_type page_zonenum(struct page *page)
467{ 486{
468 return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; 487 return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
469} 488}
@@ -480,23 +499,29 @@ static inline struct zone *page_zone(struct page *page)
480 return zone_table[page_zone_id(page)]; 499 return zone_table[page_zone_id(page)];
481} 500}
482 501
502static inline unsigned long zone_to_nid(struct zone *zone)
503{
504 return zone->zone_pgdat->node_id;
505}
506
483static inline unsigned long page_to_nid(struct page *page) 507static inline unsigned long page_to_nid(struct page *page)
484{ 508{
485 if (FLAGS_HAS_NODE) 509 if (FLAGS_HAS_NODE)
486 return (page->flags >> NODES_PGSHIFT) & NODES_MASK; 510 return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
487 else 511 else
488 return page_zone(page)->zone_pgdat->node_id; 512 return zone_to_nid(page_zone(page));
489} 513}
490static inline unsigned long page_to_section(struct page *page) 514static inline unsigned long page_to_section(struct page *page)
491{ 515{
492 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; 516 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
493} 517}
494 518
495static inline void set_page_zone(struct page *page, unsigned long zone) 519static inline void set_page_zone(struct page *page, enum zone_type zone)
496{ 520{
497 page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); 521 page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
498 page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; 522 page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
499} 523}
524
500static inline void set_page_node(struct page *page, unsigned long node) 525static inline void set_page_node(struct page *page, unsigned long node)
501{ 526{
502 page->flags &= ~(NODES_MASK << NODES_PGSHIFT); 527 page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
@@ -508,7 +533,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
508 page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; 533 page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
509} 534}
510 535
511static inline void set_page_links(struct page *page, unsigned long zone, 536static inline void set_page_links(struct page *page, enum zone_type zone,
512 unsigned long node, unsigned long pfn) 537 unsigned long node, unsigned long pfn)
513{ 538{
514 set_page_zone(page, zone); 539 set_page_zone(page, zone);
@@ -802,6 +827,39 @@ struct shrinker;
802extern struct shrinker *set_shrinker(int, shrinker_t); 827extern struct shrinker *set_shrinker(int, shrinker_t);
803extern void remove_shrinker(struct shrinker *shrinker); 828extern void remove_shrinker(struct shrinker *shrinker);
804 829
830/*
831 * Some shared mappigns will want the pages marked read-only
832 * to track write events. If so, we'll downgrade vm_page_prot
833 * to the private version (using protection_map[] without the
834 * VM_SHARED bit).
835 */
836static inline int vma_wants_writenotify(struct vm_area_struct *vma)
837{
838 unsigned int vm_flags = vma->vm_flags;
839
840 /* If it was private or non-writable, the write bit is already clear */
841 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
842 return 0;
843
844 /* The backer wishes to know when pages are first written to? */
845 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
846 return 1;
847
848 /* The open routine did something to the protections already? */
849 if (pgprot_val(vma->vm_page_prot) !=
850 pgprot_val(protection_map[vm_flags &
851 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
852 return 0;
853
854 /* Specialty mapping? */
855 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
856 return 0;
857
858 /* Can the mapping track the dirty pages? */
859 return vma->vm_file && vma->vm_file->f_mapping &&
860 mapping_cap_account_dirty(vma->vm_file->f_mapping);
861}
862
805extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); 863extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
806 864
807int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); 865int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f45163c528e8..3693f1a52788 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -51,7 +51,8 @@ enum zone_stat_item {
51 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. 51 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
52 only modified from process context */ 52 only modified from process context */
53 NR_FILE_PAGES, 53 NR_FILE_PAGES,
54 NR_SLAB, /* Pages used by slab allocator */ 54 NR_SLAB_RECLAIMABLE,
55 NR_SLAB_UNRECLAIMABLE,
55 NR_PAGETABLE, /* used for pagetables */ 56 NR_PAGETABLE, /* used for pagetables */
56 NR_FILE_DIRTY, 57 NR_FILE_DIRTY,
57 NR_WRITEBACK, 58 NR_WRITEBACK,
@@ -88,53 +89,68 @@ struct per_cpu_pageset {
88#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) 89#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
89#endif 90#endif
90 91
91#define ZONE_DMA 0 92enum zone_type {
92#define ZONE_DMA32 1 93 /*
93#define ZONE_NORMAL 2 94 * ZONE_DMA is used when there are devices that are not able
94#define ZONE_HIGHMEM 3 95 * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
95 96 * carve out the portion of memory that is needed for these devices.
96#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ 97 * The range is arch specific.
97#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ 98 *
98 99 * Some examples
100 *
101 * Architecture Limit
102 * ---------------------------
103 * parisc, ia64, sparc <4G
104 * s390 <2G
105 * arm26 <48M
106 * arm Various
107 * alpha Unlimited or 0-16MB.
108 *
109 * i386, x86_64 and multiple other arches
110 * <16M.
111 */
112 ZONE_DMA,
113#ifdef CONFIG_ZONE_DMA32
114 /*
115 * x86_64 needs two ZONE_DMAs because it supports devices that are
116 * only able to do DMA to the lower 16M but also 32 bit devices that
117 * can only do DMA areas below 4G.
118 */
119 ZONE_DMA32,
120#endif
121 /*
122 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
123 * performed on pages in ZONE_NORMAL if the DMA devices support
124 * transfers to all addressable memory.
125 */
126 ZONE_NORMAL,
127#ifdef CONFIG_HIGHMEM
128 /*
129 * A memory area that is only addressable by the kernel through
130 * mapping portions into its own address space. This is for example
131 * used by i386 to allow the kernel to address the memory beyond
132 * 900MB. The kernel will set up special mappings (page
133 * table entries on i386) for each page that the kernel needs to
134 * access.
135 */
136 ZONE_HIGHMEM,
137#endif
138 MAX_NR_ZONES
139};
99 140
100/* 141/*
101 * When a memory allocation must conform to specific limitations (such 142 * When a memory allocation must conform to specific limitations (such
102 * as being suitable for DMA) the caller will pass in hints to the 143 * as being suitable for DMA) the caller will pass in hints to the
103 * allocator in the gfp_mask, in the zone modifier bits. These bits 144 * allocator in the gfp_mask, in the zone modifier bits. These bits
104 * are used to select a priority ordered list of memory zones which 145 * are used to select a priority ordered list of memory zones which
105 * match the requested limits. GFP_ZONEMASK defines which bits within 146 * match the requested limits. See gfp_zone() in include/linux/gfp.h
106 * the gfp_mask should be considered as zone modifiers. Each valid
107 * combination of the zone modifier bits has a corresponding list
108 * of zones (in node_zonelists). Thus for two zone modifiers there
109 * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
110 * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible
111 * combinations of zone modifiers in "zone modifier space".
112 *
113 * As an optimisation any zone modifier bits which are only valid when
114 * no other zone modifier bits are set (loners) should be placed in
115 * the highest order bits of this field. This allows us to reduce the
116 * extent of the zonelists thus saving space. For example in the case
117 * of three zone modifier bits, we could require up to eight zonelists.
118 * If the left most zone modifier is a "loner" then the highest valid
119 * zonelist would be four allowing us to allocate only five zonelists.
120 * Use the first form for GFP_ZONETYPES when the left most bit is not
121 * a "loner", otherwise use the second.
122 *
123 * NOTE! Make sure this matches the zones in <linux/gfp.h>
124 */ 147 */
125#define GFP_ZONEMASK 0x07
126/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */
127#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */
128 148
129/* 149#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM)
130 * On machines where it is needed (eg PCs) we divide physical memory 150#define ZONES_SHIFT 1
131 * into multiple physical zones. On a 32bit PC we have 4 zones: 151#else
132 * 152#define ZONES_SHIFT 2
133 * ZONE_DMA < 16 MB ISA DMA capable memory 153#endif
134 * ZONE_DMA32 0 MB Empty
135 * ZONE_NORMAL 16-896 MB direct mapped by the kernel
136 * ZONE_HIGHMEM > 896 MB only page cache and user processes
137 */
138 154
139struct zone { 155struct zone {
140 /* Fields commonly accessed by the page allocator */ 156 /* Fields commonly accessed by the page allocator */
@@ -154,7 +170,8 @@ struct zone {
154 /* 170 /*
155 * zone reclaim becomes active if more unmapped pages exist. 171 * zone reclaim becomes active if more unmapped pages exist.
156 */ 172 */
157 unsigned long min_unmapped_ratio; 173 unsigned long min_unmapped_pages;
174 unsigned long min_slab_pages;
158 struct per_cpu_pageset *pageset[NR_CPUS]; 175 struct per_cpu_pageset *pageset[NR_CPUS];
159#else 176#else
160 struct per_cpu_pageset pageset[NR_CPUS]; 177 struct per_cpu_pageset pageset[NR_CPUS];
@@ -266,7 +283,6 @@ struct zone {
266 char *name; 283 char *name;
267} ____cacheline_internodealigned_in_smp; 284} ____cacheline_internodealigned_in_smp;
268 285
269
270/* 286/*
271 * The "priority" of VM scanning is how much of the queues we will scan in one 287 * The "priority" of VM scanning is how much of the queues we will scan in one
272 * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the 288 * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -304,7 +320,7 @@ struct zonelist {
304struct bootmem_data; 320struct bootmem_data;
305typedef struct pglist_data { 321typedef struct pglist_data {
306 struct zone node_zones[MAX_NR_ZONES]; 322 struct zone node_zones[MAX_NR_ZONES];
307 struct zonelist node_zonelists[GFP_ZONETYPES]; 323 struct zonelist node_zonelists[MAX_NR_ZONES];
308 int nr_zones; 324 int nr_zones;
309#ifdef CONFIG_FLAT_NODE_MEM_MAP 325#ifdef CONFIG_FLAT_NODE_MEM_MAP
310 struct page *node_mem_map; 326 struct page *node_mem_map;
@@ -373,12 +389,16 @@ static inline int populated_zone(struct zone *zone)
373 return (!!zone->present_pages); 389 return (!!zone->present_pages);
374} 390}
375 391
376static inline int is_highmem_idx(int idx) 392static inline int is_highmem_idx(enum zone_type idx)
377{ 393{
394#ifdef CONFIG_HIGHMEM
378 return (idx == ZONE_HIGHMEM); 395 return (idx == ZONE_HIGHMEM);
396#else
397 return 0;
398#endif
379} 399}
380 400
381static inline int is_normal_idx(int idx) 401static inline int is_normal_idx(enum zone_type idx)
382{ 402{
383 return (idx == ZONE_NORMAL); 403 return (idx == ZONE_NORMAL);
384} 404}
@@ -391,7 +411,11 @@ static inline int is_normal_idx(int idx)
391 */ 411 */
392static inline int is_highmem(struct zone *zone) 412static inline int is_highmem(struct zone *zone)
393{ 413{
414#ifdef CONFIG_HIGHMEM
394 return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; 415 return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
416#else
417 return 0;
418#endif
395} 419}
396 420
397static inline int is_normal(struct zone *zone) 421static inline int is_normal(struct zone *zone)
@@ -401,7 +425,11 @@ static inline int is_normal(struct zone *zone)
401 425
402static inline int is_dma32(struct zone *zone) 426static inline int is_dma32(struct zone *zone)
403{ 427{
428#ifdef CONFIG_ZONE_DMA32
404 return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; 429 return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
430#else
431 return 0;
432#endif
405} 433}
406 434
407static inline int is_dma(struct zone *zone) 435static inline int is_dma(struct zone *zone)
@@ -421,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file
421 void __user *, size_t *, loff_t *); 449 void __user *, size_t *, loff_t *);
422int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, 450int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
423 struct file *, void __user *, size_t *, loff_t *); 451 struct file *, void __user *, size_t *, loff_t *);
452int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
453 struct file *, void __user *, size_t *, loff_t *);
424 454
425#include <linux/topology.h> 455#include <linux/topology.h>
426/* Returns the number of the current Node. */ 456/* Returns the number of the current Node. */
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9a285cecf249..312bd2ffee33 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -10,6 +10,8 @@ header-y += xt_connmark.h
10header-y += xt_CONNMARK.h 10header-y += xt_CONNMARK.h
11header-y += xt_conntrack.h 11header-y += xt_conntrack.h
12header-y += xt_dccp.h 12header-y += xt_dccp.h
13header-y += xt_dscp.h
14header-y += xt_DSCP.h
13header-y += xt_esp.h 15header-y += xt_esp.h
14header-y += xt_helper.h 16header-y += xt_helper.h
15header-y += xt_length.h 17header-y += xt_length.h
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5748642e9f36..9d7921dd50f0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -13,24 +13,25 @@
13 * PG_reserved is set for special pages, which can never be swapped out. Some 13 * PG_reserved is set for special pages, which can never be swapped out. Some
14 * of them might not even exist (eg empty_bad_page)... 14 * of them might not even exist (eg empty_bad_page)...
15 * 15 *
16 * The PG_private bitflag is set if page->private contains a valid value. 16 * The PG_private bitflag is set on pagecache pages if they contain filesystem
17 * specific data (which is normally at page->private). It can be used by
18 * private allocations for its own usage.
17 * 19 *
18 * During disk I/O, PG_locked is used. This bit is set before I/O and 20 * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
19 * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks 21 * and cleared when writeback _starts_ or when read _completes_. PG_writeback
20 * waiting for the I/O on this page to complete. 22 * is set before writeback starts and cleared when it finishes.
23 *
24 * PG_locked also pins a page in pagecache, and blocks truncation of the file
25 * while it is held.
26 *
27 * page_waitqueue(page) is a wait queue of all tasks waiting for the page
28 * to become unlocked.
21 * 29 *
22 * PG_uptodate tells whether the page's contents is valid. When a read 30 * PG_uptodate tells whether the page's contents is valid. When a read
23 * completes, the page becomes uptodate, unless a disk I/O error happened. 31 * completes, the page becomes uptodate, unless a disk I/O error happened.
24 * 32 *
25 * For choosing which pages to swap out, inode pages carry a PG_referenced bit, 33 * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
26 * which is set any time the system accesses that page through the (mapping, 34 * file-backed pagecache (see mm/vmscan.c).
27 * index) hash table. This referenced bit, together with the referenced bit
28 * in the page tables, is used to manipulate page->age and move the page across
29 * the active, inactive_dirty and inactive_clean lists.
30 *
31 * Note that the referenced bit, the page->lru list_head and the active,
32 * inactive_dirty and inactive_clean lists are protected by the
33 * zone->lru_lock, and *NOT* by the usual PG_locked bit!
34 * 35 *
35 * PG_error is set to indicate that an I/O error occurred on this page. 36 * PG_error is set to indicate that an I/O error occurred on this page.
36 * 37 *
@@ -42,6 +43,10 @@
42 * space, they need to be kmapped separately for doing IO on the pages. The 43 * space, they need to be kmapped separately for doing IO on the pages. The
43 * struct page (these bits with information) are always mapped into kernel 44 * struct page (these bits with information) are always mapped into kernel
44 * address space... 45 * address space...
46 *
47 * PG_buddy is set to indicate that the page is free and in the buddy system
48 * (see mm/page_alloc.c).
49 *
45 */ 50 */
46 51
47/* 52/*
@@ -74,7 +79,7 @@
74#define PG_checked 8 /* kill me in 2.5.<early>. */ 79#define PG_checked 8 /* kill me in 2.5.<early>. */
75#define PG_arch_1 9 80#define PG_arch_1 9
76#define PG_reserved 10 81#define PG_reserved 10
77#define PG_private 11 /* Has something at ->private */ 82#define PG_private 11 /* If pagecache, has fs-private data */
78 83
79#define PG_writeback 12 /* Page is under writeback */ 84#define PG_writeback 12 /* Page is under writeback */
80#define PG_nosave 13 /* Used for system suspend/resume */ 85#define PG_nosave 13 /* Used for system suspend/resume */
@@ -83,7 +88,7 @@
83 88
84#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ 89#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */
85#define PG_reclaim 17 /* To be reclaimed asap */ 90#define PG_reclaim 17 /* To be reclaimed asap */
86#define PG_nosave_free 18 /* Free, should not be written */ 91#define PG_nosave_free 18 /* Used for system suspend/resume */
87#define PG_buddy 19 /* Page is free, on buddy lists */ 92#define PG_buddy 19 /* Page is free, on buddy lists */
88 93
89 94
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a2f5d27f60e..64f950925151 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
130} 130}
131 131
132extern void FASTCALL(__lock_page(struct page *page)); 132extern void FASTCALL(__lock_page(struct page *page));
133extern void FASTCALL(__lock_page_nosync(struct page *page));
133extern void FASTCALL(unlock_page(struct page *page)); 134extern void FASTCALL(unlock_page(struct page *page));
134 135
136/*
137 * lock_page may only be called if we have the page's inode pinned.
138 */
135static inline void lock_page(struct page *page) 139static inline void lock_page(struct page *page)
136{ 140{
137 might_sleep(); 141 might_sleep();
138 if (TestSetPageLocked(page)) 142 if (TestSetPageLocked(page))
139 __lock_page(page); 143 __lock_page(page);
140} 144}
145
146/*
147 * lock_page_nosync should only be used if we can't pin the page's inode.
148 * Doesn't play quite so well with block device plugging.
149 */
150static inline void lock_page_nosync(struct page *page)
151{
152 might_sleep();
153 if (TestSetPageLocked(page))
154 __lock_page_nosync(page);
155}
141 156
142/* 157/*
143 * This is exported only for wait_on_page_locked/wait_on_page_writeback. 158 * This is exported only for wait_on_page_locked/wait_on_page_writeback.
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cb9039a21f2a..3835a9642f13 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,9 +1,12 @@
1#ifndef __LINUX_PERCPU_H 1#ifndef __LINUX_PERCPU_H
2#define __LINUX_PERCPU_H 2#define __LINUX_PERCPU_H
3
3#include <linux/spinlock.h> /* For preempt_disable() */ 4#include <linux/spinlock.h> /* For preempt_disable() */
4#include <linux/slab.h> /* For kmalloc() */ 5#include <linux/slab.h> /* For kmalloc() */
5#include <linux/smp.h> 6#include <linux/smp.h>
6#include <linux/string.h> /* For memset() */ 7#include <linux/string.h> /* For memset() */
8#include <linux/cpumask.h>
9
7#include <asm/percpu.h> 10#include <asm/percpu.h>
8 11
9/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ 12/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
@@ -11,8 +14,14 @@
11#define PERCPU_ENOUGH_ROOM 32768 14#define PERCPU_ENOUGH_ROOM 32768
12#endif 15#endif
13 16
14/* Must be an lvalue. */ 17/*
15#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); })) 18 * Must be an lvalue. Since @var must be a simple identifier,
19 * we force a syntax error here if it isn't.
20 */
21#define get_cpu_var(var) (*({ \
22 extern int simple_indentifier_##var(void); \
23 preempt_disable(); \
24 &__get_cpu_var(var); }))
16#define put_cpu_var(var) preempt_enable() 25#define put_cpu_var(var) preempt_enable()
17 26
18#ifdef CONFIG_SMP 27#ifdef CONFIG_SMP
@@ -21,39 +30,77 @@ struct percpu_data {
21 void *ptrs[NR_CPUS]; 30 void *ptrs[NR_CPUS];
22}; 31};
23 32
33#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
24/* 34/*
25 * Use this to get to a cpu's version of the per-cpu object allocated using 35 * Use this to get to a cpu's version of the per-cpu object dynamically
26 * alloc_percpu. Non-atomic access to the current CPU's version should 36 * allocated. Non-atomic access to the current CPU's version should
27 * probably be combined with get_cpu()/put_cpu(). 37 * probably be combined with get_cpu()/put_cpu().
28 */ 38 */
29#define per_cpu_ptr(ptr, cpu) \ 39#define percpu_ptr(ptr, cpu) \
30({ \ 40({ \
31 struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \ 41 struct percpu_data *__p = __percpu_disguise(ptr); \
32 (__typeof__(ptr))__p->ptrs[(cpu)]; \ 42 (__typeof__(ptr))__p->ptrs[(cpu)]; \
33}) 43})
34 44
35extern void *__alloc_percpu(size_t size); 45extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
36extern void free_percpu(const void *); 46extern void percpu_depopulate(void *__pdata, int cpu);
47extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
48 cpumask_t *mask);
49extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
50extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
51extern void percpu_free(void *__pdata);
37 52
38#else /* CONFIG_SMP */ 53#else /* CONFIG_SMP */
39 54
40#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) 55#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
56
57static inline void percpu_depopulate(void *__pdata, int cpu)
58{
59}
60
61static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
62{
63}
41 64
42static inline void *__alloc_percpu(size_t size) 65static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
66 int cpu)
43{ 67{
44 void *ret = kmalloc(size, GFP_KERNEL); 68 return percpu_ptr(__pdata, cpu);
45 if (ret)
46 memset(ret, 0, size);
47 return ret;
48} 69}
49static inline void free_percpu(const void *ptr) 70
50{ 71static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
51 kfree(ptr); 72 cpumask_t *mask)
73{
74 return 0;
75}
76
77static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
78{
79 return kzalloc(size, gfp);
80}
81
82static inline void percpu_free(void *__pdata)
83{
84 kfree(__pdata);
52} 85}
53 86
54#endif /* CONFIG_SMP */ 87#endif /* CONFIG_SMP */
55 88
56/* Simple wrapper for the common case: zeros memory. */ 89#define percpu_populate_mask(__pdata, size, gfp, mask) \
57#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) 90 __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
91#define percpu_depopulate_mask(__pdata, mask) \
92 __percpu_depopulate_mask((__pdata), &(mask))
93#define percpu_alloc_mask(size, gfp, mask) \
94 __percpu_alloc_mask((size), (gfp), &(mask))
95
96#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
97
98/* (legacy) interface for use without CPU hotplug handling */
99
100#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \
101 cpu_possible_map)
102#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type))
103#define free_percpu(ptr) percpu_free((ptr))
104#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
58 105
59#endif /* __LINUX_PERCPU_H */ 106#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index a376bd4ade39..81e9299ca148 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,21 +3,25 @@
3 3
4#ifdef CONFIG_PM_TRACE 4#ifdef CONFIG_PM_TRACE
5 5
6extern int pm_trace_enabled;
7
6struct device; 8struct device;
7extern void set_trace_device(struct device *); 9extern void set_trace_device(struct device *);
8extern void generate_resume_trace(void *tracedata, unsigned int user); 10extern void generate_resume_trace(void *tracedata, unsigned int user);
9 11
10#define TRACE_DEVICE(dev) set_trace_device(dev) 12#define TRACE_DEVICE(dev) set_trace_device(dev)
11#define TRACE_RESUME(user) do { \ 13#define TRACE_RESUME(user) do { \
12 void *tracedata; \ 14 if (pm_trace_enabled) { \
13 asm volatile("movl $1f,%0\n" \ 15 void *tracedata; \
14 ".section .tracedata,\"a\"\n" \ 16 asm volatile("movl $1f,%0\n" \
15 "1:\t.word %c1\n" \ 17 ".section .tracedata,\"a\"\n" \
16 "\t.long %c2\n" \ 18 "1:\t.word %c1\n" \
17 ".previous" \ 19 "\t.long %c2\n" \
18 :"=r" (tracedata) \ 20 ".previous" \
19 : "i" (__LINE__), "i" (__FILE__)); \ 21 :"=r" (tracedata) \
20 generate_resume_trace(tracedata, user); \ 22 : "i" (__LINE__), "i" (__FILE__)); \
23 generate_resume_trace(tracedata, user); \
24 } \
21} while (0) 25} while (0)
22 26
23#else 27#else
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf97b0900014..db2c1df4fef9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *,
103 */ 103 */
104unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); 104unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
105 105
106/*
107 * Cleans the PTEs of shared mappings.
108 * (and since clean PTEs should also be readonly, write protects them too)
109 *
110 * returns the number of cleaned PTEs.
111 */
112int page_mkclean(struct page *);
113
106#else /* !CONFIG_MMU */ 114#else /* !CONFIG_MMU */
107 115
108#define anon_vma_init() do {} while (0) 116#define anon_vma_init() do {} while (0)
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
112#define page_referenced(page,l) TestClearPageReferenced(page) 120#define page_referenced(page,l) TestClearPageReferenced(page)
113#define try_to_unmap(page, refs) SWAP_FAIL 121#define try_to_unmap(page, refs) SWAP_FAIL
114 122
123static inline int page_mkclean(struct page *page)
124{
125 return 0;
126}
127
128
115#endif /* CONFIG_MMU */ 129#endif /* CONFIG_MMU */
116 130
117/* 131/*
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index aad4e390d6a5..d1b7ca6c1c57 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
46 46
47/** 47/**
48 * selinux_audit_rule_match - determine if a context ID matches a rule. 48 * selinux_audit_rule_match - determine if a context ID matches a rule.
49 * @ctxid: the context ID to check 49 * @sid: the context ID to check
50 * @field: the field this rule refers to 50 * @field: the field this rule refers to
51 * @op: the operater the rule uses 51 * @op: the operater the rule uses
52 * @rule: pointer to the audit rule to check against 52 * @rule: pointer to the audit rule to check against
@@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
55 * Returns 1 if the context id matches the rule, 0 if it does not, and 55 * Returns 1 if the context id matches the rule, 0 if it does not, and
56 * -errno on failure. 56 * -errno on failure.
57 */ 57 */
58int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, 58int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
59 struct selinux_audit_rule *rule, 59 struct selinux_audit_rule *rule,
60 struct audit_context *actx); 60 struct audit_context *actx);
61 61
@@ -70,18 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
70void selinux_audit_set_callback(int (*callback)(void)); 70void selinux_audit_set_callback(int (*callback)(void));
71 71
72/** 72/**
73 * selinux_task_ctxid - determine a context ID for a process. 73 * selinux_sid_to_string - map a security context ID to a string
74 * @tsk: the task object 74 * @sid: security context ID to be converted.
75 * @ctxid: ID value returned via this
76 *
77 * On return, ctxid will contain an ID for the context. This value
78 * should only be used opaquely.
79 */
80void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
81
82/**
83 * selinux_ctxid_to_string - map a security context ID to a string
84 * @ctxid: security context ID to be converted.
85 * @ctx: address of context string to be returned 75 * @ctx: address of context string to be returned
86 * @ctxlen: length of returned context string. 76 * @ctxlen: length of returned context string.
87 * 77 *
@@ -89,7 +79,7 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
89 * string will be allocated internally, and the caller must call 79 * string will be allocated internally, and the caller must call
90 * kfree() on it after use. 80 * kfree() on it after use.
91 */ 81 */
92int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen); 82int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen);
93 83
94/** 84/**
95 * selinux_get_inode_sid - get the inode's security context ID 85 * selinux_get_inode_sid - get the inode's security context ID
@@ -154,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule)
154 return; 144 return;
155} 145}
156 146
157static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, 147static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
158 struct selinux_audit_rule *rule, 148 struct selinux_audit_rule *rule,
159 struct audit_context *actx) 149 struct audit_context *actx)
160{ 150{
@@ -166,12 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
166 return; 156 return;
167} 157}
168 158
169static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) 159static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
170{
171 *ctxid = 0;
172}
173
174static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
175{ 160{
176 *ctx = NULL; 161 *ctx = NULL;
177 *ctxlen = 0; 162 *ctxlen = 0;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 45ad55b70d1c..66d6eb78d1c6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
67extern void kmem_cache_free(kmem_cache_t *, void *); 67extern void kmem_cache_free(kmem_cache_t *, void *);
68extern unsigned int kmem_cache_size(kmem_cache_t *); 68extern unsigned int kmem_cache_size(kmem_cache_t *);
69extern const char *kmem_cache_name(kmem_cache_t *); 69extern const char *kmem_cache_name(kmem_cache_t *);
70extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
71 70
72/* Size description struct for general caches. */ 71/* Size description struct for general caches. */
73struct cache_sizes { 72struct cache_sizes {
@@ -203,7 +202,30 @@ extern int slab_is_available(void);
203 202
204#ifdef CONFIG_NUMA 203#ifdef CONFIG_NUMA
205extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); 204extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
206extern void *kmalloc_node(size_t size, gfp_t flags, int node); 205extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
206
207static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
208{
209 if (__builtin_constant_p(size)) {
210 int i = 0;
211#define CACHE(x) \
212 if (size <= x) \
213 goto found; \
214 else \
215 i++;
216#include "kmalloc_sizes.h"
217#undef CACHE
218 {
219 extern void __you_cannot_kmalloc_that_much(void);
220 __you_cannot_kmalloc_that_much();
221 }
222found:
223 return kmem_cache_alloc_node((flags & GFP_DMA) ?
224 malloc_sizes[i].cs_dmacachep :
225 malloc_sizes[i].cs_cachep, flags, node);
226 }
227 return __kmalloc_node(size, flags, node);
228}
207#else 229#else
208static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) 230static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
209{ 231{
@@ -223,7 +245,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
223/* SLOB allocator routines */ 245/* SLOB allocator routines */
224 246
225void kmem_cache_init(void); 247void kmem_cache_init(void);
226struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
227struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, 248struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
228 unsigned long, 249 unsigned long,
229 void (*)(void *, struct kmem_cache *, unsigned long), 250 void (*)(void *, struct kmem_cache *, unsigned long),
@@ -263,8 +284,6 @@ extern kmem_cache_t *fs_cachep;
263extern kmem_cache_t *sighand_cachep; 284extern kmem_cache_t *sighand_cachep;
264extern kmem_cache_t *bio_cachep; 285extern kmem_cache_t *bio_cachep;
265 286
266extern atomic_t slab_reclaim_pages;
267
268#endif /* __KERNEL__ */ 287#endif /* __KERNEL__ */
269 288
270#endif /* _LINUX_SLAB_H */ 289#endif /* _LINUX_SLAB_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 837e8bce1349..51649987f691 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus);
53 */ 53 */
54int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); 54int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
55 55
56int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
57 int retry, int wait);
58
56/* 59/*
57 * Call a function on all processors 60 * Call a function on all processors
58 */ 61 */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 96e31aa64cc7..b1237f16ecde 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -10,29 +10,11 @@
10#include <linux/pm.h> 10#include <linux/pm.h>
11 11
12/* page backup entry */ 12/* page backup entry */
13typedef struct pbe { 13struct pbe {
14 unsigned long address; /* address of the copy */ 14 unsigned long address; /* address of the copy */
15 unsigned long orig_address; /* original address of page */ 15 unsigned long orig_address; /* original address of page */
16 struct pbe *next; 16 struct pbe *next;
17} suspend_pagedir_t; 17};
18
19#define for_each_pbe(pbe, pblist) \
20 for (pbe = pblist ; pbe ; pbe = pbe->next)
21
22#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe))
23#define PB_PAGE_SKIP (PBES_PER_PAGE-1)
24
25#define for_each_pb_page(pbe, pblist) \
26 for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
27
28
29#define SWAP_FILENAME_MAXLENGTH 32
30
31
32extern dev_t swsusp_resume_device;
33
34/* mm/vmscan.c */
35extern int shrink_mem(void);
36 18
37/* mm/page_alloc.c */ 19/* mm/page_alloc.c */
38extern void drain_local_pages(void); 20extern void drain_local_pages(void);
@@ -53,18 +35,10 @@ static inline void pm_restore_console(void) {}
53static inline int software_suspend(void) 35static inline int software_suspend(void)
54{ 36{
55 printk("Warning: fake suspend called\n"); 37 printk("Warning: fake suspend called\n");
56 return -EPERM; 38 return -ENOSYS;
57} 39}
58#endif /* CONFIG_PM */ 40#endif /* CONFIG_PM */
59 41
60#ifdef CONFIG_SUSPEND_SMP
61extern void disable_nonboot_cpus(void);
62extern void enable_nonboot_cpus(void);
63#else
64static inline void disable_nonboot_cpus(void) {}
65static inline void enable_nonboot_cpus(void) {}
66#endif
67
68void save_processor_state(void); 42void save_processor_state(void);
69void restore_processor_state(void); 43void restore_processor_state(void);
70struct saved_context; 44struct saved_context;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5e59184c9096..e7c36ba2a2db 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -10,6 +10,10 @@
10#include <asm/atomic.h> 10#include <asm/atomic.h>
11#include <asm/page.h> 11#include <asm/page.h>
12 12
13struct notifier_block;
14
15struct bio;
16
13#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ 17#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
14#define SWAP_FLAG_PRIO_MASK 0x7fff 18#define SWAP_FLAG_PRIO_MASK 0x7fff
15#define SWAP_FLAG_PRIO_SHIFT 0 19#define SWAP_FLAG_PRIO_SHIFT 0
@@ -156,13 +160,14 @@ struct swap_list_t {
156 160
157/* linux/mm/oom_kill.c */ 161/* linux/mm/oom_kill.c */
158extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); 162extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
163extern int register_oom_notifier(struct notifier_block *nb);
164extern int unregister_oom_notifier(struct notifier_block *nb);
159 165
160/* linux/mm/memory.c */ 166/* linux/mm/memory.c */
161extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); 167extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
162 168
163/* linux/mm/page_alloc.c */ 169/* linux/mm/page_alloc.c */
164extern unsigned long totalram_pages; 170extern unsigned long totalram_pages;
165extern unsigned long totalhigh_pages;
166extern unsigned long totalreserve_pages; 171extern unsigned long totalreserve_pages;
167extern long nr_swap_pages; 172extern long nr_swap_pages;
168extern unsigned int nr_free_pages(void); 173extern unsigned int nr_free_pages(void);
@@ -190,6 +195,7 @@ extern long vm_total_pages;
190#ifdef CONFIG_NUMA 195#ifdef CONFIG_NUMA
191extern int zone_reclaim_mode; 196extern int zone_reclaim_mode;
192extern int sysctl_min_unmapped_ratio; 197extern int sysctl_min_unmapped_ratio;
198extern int sysctl_min_slab_ratio;
193extern int zone_reclaim(struct zone *, gfp_t, unsigned int); 199extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
194#else 200#else
195#define zone_reclaim_mode 0 201#define zone_reclaim_mode 0
@@ -212,7 +218,9 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
212/* linux/mm/page_io.c */ 218/* linux/mm/page_io.c */
213extern int swap_readpage(struct file *, struct page *); 219extern int swap_readpage(struct file *, struct page *);
214extern int swap_writepage(struct page *page, struct writeback_control *wbc); 220extern int swap_writepage(struct page *page, struct writeback_control *wbc);
215extern int rw_swap_page_sync(int, swp_entry_t, struct page *); 221extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
222 struct bio **bio_chain);
223extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
216 224
217/* linux/mm/swap_state.c */ 225/* linux/mm/swap_state.c */
218extern struct address_space swapper_space; 226extern struct address_space swapper_space;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 736ed917a4f8..eca555781d05 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -191,6 +191,7 @@ enum
191 VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ 191 VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */
192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ 192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ 193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
194 VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */
194}; 195};
195 196
196 197
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 71b6363caaaf..dee88c6b6fa7 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size);
44extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); 44extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
45extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, 45extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
46 pgprot_t prot); 46 pgprot_t prot);
47extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
48 pgprot_t prot, int node);
49extern void vfree(void *addr); 47extern void vfree(void *addr);
50 48
51extern void *vmap(struct page **pages, unsigned int count, 49extern void *vmap(struct page **pages, unsigned int count,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d9b1b60798a..176c7f797339 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -18,7 +18,19 @@
18 * generated will simply be the increment of a global address. 18 * generated will simply be the increment of a global address.
19 */ 19 */
20 20
21#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH 21#ifdef CONFIG_ZONE_DMA32
22#define DMA32_ZONE(xx) xx##_DMA32,
23#else
24#define DMA32_ZONE(xx)
25#endif
26
27#ifdef CONFIG_HIGHMEM
28#define HIGHMEM_ZONE(xx) , xx##_HIGH
29#else
30#define HIGHMEM_ZONE(xx)
31#endif
32
33#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
22 34
23enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, 35enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
24 FOR_ALL_ZONES(PGALLOC), 36 FOR_ALL_ZONES(PGALLOC),
@@ -124,12 +136,10 @@ static inline unsigned long node_page_state(int node,
124 struct zone *zones = NODE_DATA(node)->node_zones; 136 struct zone *zones = NODE_DATA(node)->node_zones;
125 137
126 return 138 return
127#ifndef CONFIG_DMA_IS_NORMAL 139#ifdef CONFIG_ZONE_DMA32
128#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64
129 zone_page_state(&zones[ZONE_DMA32], item) + 140 zone_page_state(&zones[ZONE_DMA32], item) +
130#endif 141#endif
131 zone_page_state(&zones[ZONE_NORMAL], item) + 142 zone_page_state(&zones[ZONE_NORMAL], item) +
132#endif
133#ifdef CONFIG_HIGHMEM 143#ifdef CONFIG_HIGHMEM
134 zone_page_state(&zones[ZONE_HIGHMEM], item) + 144 zone_page_state(&zones[ZONE_HIGHMEM], item) +
135#endif 145#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0422036af4eb..56a23a0e7f2e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
116 loff_t pos, loff_t count); 116 loff_t pos, loff_t count);
117int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, 117int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
118 loff_t pos, loff_t count); 118 loff_t pos, loff_t count);
119void set_page_dirty_balance(struct page *page);
119 120
120/* pdflush.c */ 121/* pdflush.c */
121extern int nr_pdflush_threads; /* Global so it can be exported to sysctl 122extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 59406e0dc5b2..2d72496c2029 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -130,8 +130,9 @@ extern int cipso_v4_rbm_strictvalid;
130int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); 130int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
131int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); 131int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head));
132struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); 132struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
133struct sk_buff *cipso_v4_doi_dump_all(size_t headroom); 133int cipso_v4_doi_walk(u32 *skip_cnt,
134struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom); 134 int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
135 void *cb_arg);
135int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain); 136int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain);
136int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, 137int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
137 const char *domain); 138 const char *domain);
@@ -152,14 +153,11 @@ static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
152 return NULL; 153 return NULL;
153} 154}
154 155
155static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) 156static inline int cipso_v4_doi_walk(u32 *skip_cnt,
157 int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
158 void *cb_arg)
156{ 159{
157 return NULL; 160 return 0;
158}
159
160static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
161{
162 return NULL;
163} 161}
164 162
165static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, 163static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def,
@@ -205,6 +203,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
205int cipso_v4_socket_setattr(const struct socket *sock, 203int cipso_v4_socket_setattr(const struct socket *sock,
206 const struct cipso_v4_doi *doi_def, 204 const struct cipso_v4_doi *doi_def,
207 const struct netlbl_lsm_secattr *secattr); 205 const struct netlbl_lsm_secattr *secattr);
206int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr);
208int cipso_v4_socket_getattr(const struct socket *sock, 207int cipso_v4_socket_getattr(const struct socket *sock,
209 struct netlbl_lsm_secattr *secattr); 208 struct netlbl_lsm_secattr *secattr);
210int cipso_v4_skbuff_getattr(const struct sk_buff *skb, 209int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
@@ -225,6 +224,12 @@ static inline int cipso_v4_socket_setattr(const struct socket *sock,
225 return -ENOSYS; 224 return -ENOSYS;
226} 225}
227 226
227static inline int cipso_v4_sock_getattr(struct sock *sk,
228 struct netlbl_lsm_secattr *secattr)
229{
230 return -ENOSYS;
231}
232
228static inline int cipso_v4_socket_getattr(const struct socket *sock, 233static inline int cipso_v4_socket_getattr(const struct socket *sock,
229 struct netlbl_lsm_secattr *secattr) 234 struct netlbl_lsm_secattr *secattr)
230{ 235{
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index dd5780b36919..6692430063fd 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -57,9 +57,8 @@
57 * The payload is dependent on the subsystem specified in the 57 * The payload is dependent on the subsystem specified in the
58 * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions 58 * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions
59 * should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c 59 * should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c
60 * file. All of the fields in the NetLabel payload are NETLINK attributes, the 60 * file. All of the fields in the NetLabel payload are NETLINK attributes, see
61 * length of each field is the length of the NETLINK attribute payload, see 61 * the include/net/netlink.h file for more information on NETLINK attributes.
62 * include/net/netlink.h for more information on NETLINK attributes.
63 * 62 *
64 */ 63 */
65 64
@@ -82,50 +81,6 @@
82#define NETLBL_NLTYPE_UNLABELED 5 81#define NETLBL_NLTYPE_UNLABELED 5
83#define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" 82#define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL"
84 83
85/* NetLabel return codes */
86#define NETLBL_E_OK 0
87
88/*
89 * Helper functions
90 */
91
92#define NETLBL_LEN_U8 nla_total_size(sizeof(u8))
93#define NETLBL_LEN_U16 nla_total_size(sizeof(u16))
94#define NETLBL_LEN_U32 nla_total_size(sizeof(u32))
95
96/**
97 * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer
98 * @head: the amount of headroom in bytes
99 * @body: the desired size (minus headroom) in bytes
100 * @gfp_flags: the alloc flags to pass to alloc_skb()
101 *
102 * Description:
103 * Allocate a NETLINK message buffer based on the sizes given in @head and
104 * @body. If @head is greater than zero skb_reserve() is called to reserve
105 * @head bytes at the start of the buffer. Returns a valid sk_buff pointer on
106 * success, NULL on failure.
107 *
108 */
109static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head,
110 size_t body,
111 gfp_t gfp_flags)
112{
113 struct sk_buff *skb;
114
115 skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags);
116 if (skb == NULL)
117 return NULL;
118 if (head > 0) {
119 skb_reserve(skb, head);
120 if (skb_tailroom(skb) < body) {
121 kfree_skb(skb);
122 return NULL;
123 }
124 }
125
126 return skb;
127}
128
129/* 84/*
130 * NetLabel - Kernel API for accessing the network packet label mappings. 85 * NetLabel - Kernel API for accessing the network packet label mappings.
131 * 86 *
@@ -238,6 +193,8 @@ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr,
238#ifdef CONFIG_NETLABEL 193#ifdef CONFIG_NETLABEL
239int netlbl_socket_setattr(const struct socket *sock, 194int netlbl_socket_setattr(const struct socket *sock,
240 const struct netlbl_lsm_secattr *secattr); 195 const struct netlbl_lsm_secattr *secattr);
196int netlbl_sock_getattr(struct sock *sk,
197 struct netlbl_lsm_secattr *secattr);
241int netlbl_socket_getattr(const struct socket *sock, 198int netlbl_socket_getattr(const struct socket *sock,
242 struct netlbl_lsm_secattr *secattr); 199 struct netlbl_lsm_secattr *secattr);
243int netlbl_skbuff_getattr(const struct sk_buff *skb, 200int netlbl_skbuff_getattr(const struct sk_buff *skb,
@@ -250,6 +207,12 @@ static inline int netlbl_socket_setattr(const struct socket *sock,
250 return -ENOSYS; 207 return -ENOSYS;
251} 208}
252 209
210static inline int netlbl_sock_getattr(struct sock *sk,
211 struct netlbl_lsm_secattr *secattr)
212{
213 return -ENOSYS;
214}
215
253static inline int netlbl_socket_getattr(const struct socket *sock, 216static inline int netlbl_socket_getattr(const struct socket *sock,
254 struct netlbl_lsm_secattr *secattr) 217 struct netlbl_lsm_secattr *secattr)
255{ 218{
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 11dc2e7f679a..4ab68a7a636a 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -146,11 +146,13 @@
146 * nla_ok(nla, remaining) does nla fit into remaining bytes? 146 * nla_ok(nla, remaining) does nla fit into remaining bytes?
147 * nla_next(nla, remaining) get next netlink attribute 147 * nla_next(nla, remaining) get next netlink attribute
148 * nla_validate() validate a stream of attributes 148 * nla_validate() validate a stream of attributes
149 * nla_validate_nested() validate a stream of nested attributes
149 * nla_find() find attribute in stream of attributes 150 * nla_find() find attribute in stream of attributes
150 * nla_find_nested() find attribute in nested attributes 151 * nla_find_nested() find attribute in nested attributes
151 * nla_parse() parse and validate stream of attrs 152 * nla_parse() parse and validate stream of attrs
152 * nla_parse_nested() parse nested attribuets 153 * nla_parse_nested() parse nested attribuets
153 * nla_for_each_attr() loop over all attributes 154 * nla_for_each_attr() loop over all attributes
155 * nla_for_each_nested() loop over the nested attributes
154 *========================================================================= 156 *=========================================================================
155 */ 157 */
156 158
@@ -950,6 +952,24 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
950} 952}
951 953
952/** 954/**
955 * nla_validate_nested - Validate a stream of nested attributes
956 * @start: container attribute
957 * @maxtype: maximum attribute type to be expected
958 * @policy: validation policy
959 *
960 * Validates all attributes in the nested attribute stream against the
961 * specified policy. Attributes with a type exceeding maxtype will be
962 * ignored. See documenation of struct nla_policy for more details.
963 *
964 * Returns 0 on success or a negative error code.
965 */
966static inline int nla_validate_nested(struct nlattr *start, int maxtype,
967 struct nla_policy *policy)
968{
969 return nla_validate(nla_data(start), nla_len(start), maxtype, policy);
970}
971
972/**
953 * nla_for_each_attr - iterate over a stream of attributes 973 * nla_for_each_attr - iterate over a stream of attributes
954 * @pos: loop counter, set to current attribute 974 * @pos: loop counter, set to current attribute
955 * @head: head of attribute stream 975 * @head: head of attribute stream
diff --git a/kernel/audit.c b/kernel/audit.c
index 963fd15c9621..f9889ee77825 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
244 char *ctx = NULL; 244 char *ctx = NULL;
245 u32 len; 245 u32 len;
246 int rc; 246 int rc;
247 if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) 247 if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
248 return rc; 248 return rc;
249 else 249 else
250 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 250 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
267 char *ctx = NULL; 267 char *ctx = NULL;
268 u32 len; 268 u32 len;
269 int rc; 269 int rc;
270 if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) 270 if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
271 return rc; 271 return rc;
272 else 272 else
273 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 273 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
293 char *ctx = NULL; 293 char *ctx = NULL;
294 u32 len; 294 u32 len;
295 int rc; 295 int rc;
296 if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) 296 if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
297 return rc; 297 return rc;
298 else 298 else
299 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 299 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid)
321 char *ctx = NULL; 321 char *ctx = NULL;
322 u32 len; 322 u32 len;
323 int rc; 323 int rc;
324 if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) 324 if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
325 return rc; 325 return rc;
326 else 326 else
327 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 327 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
538 if (status_get->mask & AUDIT_STATUS_PID) { 538 if (status_get->mask & AUDIT_STATUS_PID) {
539 int old = audit_pid; 539 int old = audit_pid;
540 if (sid) { 540 if (sid) {
541 if ((err = selinux_ctxid_to_string( 541 if ((err = selinux_sid_to_string(
542 sid, &ctx, &len))) 542 sid, &ctx, &len)))
543 return err; 543 return err;
544 else 544 else
@@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
576 "user pid=%d uid=%u auid=%u", 576 "user pid=%d uid=%u auid=%u",
577 pid, uid, loginuid); 577 pid, uid, loginuid);
578 if (sid) { 578 if (sid) {
579 if (selinux_ctxid_to_string( 579 if (selinux_sid_to_string(
580 sid, &ctx, &len)) { 580 sid, &ctx, &len)) {
581 audit_log_format(ab, 581 audit_log_format(ab,
582 " ssid=%u", sid); 582 " ssid=%u", sid);
@@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
614 loginuid, sid); 614 loginuid, sid);
615 break; 615 break;
616 case AUDIT_SIGNAL_INFO: 616 case AUDIT_SIGNAL_INFO:
617 err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len); 617 err = selinux_sid_to_string(audit_sig_sid, &ctx, &len);
618 if (err) 618 if (err)
619 return err; 619 return err;
620 sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); 620 sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a44879b0c72f..1a58a81fb09d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
1398 if (sid) { 1398 if (sid) {
1399 char *ctx = NULL; 1399 char *ctx = NULL;
1400 u32 len; 1400 u32 len;
1401 if (selinux_ctxid_to_string(sid, &ctx, &len)) 1401 if (selinux_sid_to_string(sid, &ctx, &len))
1402 audit_log_format(ab, " ssid=%u", sid); 1402 audit_log_format(ab, " ssid=%u", sid);
1403 else 1403 else
1404 audit_log_format(ab, " subj=%s", ctx); 1404 audit_log_format(ab, " subj=%s", ctx);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1bd8827a0102..fb83c5cb8c32 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk,
385 logged upon error */ 385 logged upon error */
386 if (f->se_rule) { 386 if (f->se_rule) {
387 if (need_sid) { 387 if (need_sid) {
388 selinux_task_ctxid(tsk, &sid); 388 selinux_get_task_sid(tsk, &sid);
389 need_sid = 0; 389 need_sid = 0;
390 } 390 }
391 result = selinux_audit_rule_match(sid, f->type, 391 result = selinux_audit_rule_match(sid, f->type,
@@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
898 if (axi->osid != 0) { 898 if (axi->osid != 0) {
899 char *ctx = NULL; 899 char *ctx = NULL;
900 u32 len; 900 u32 len;
901 if (selinux_ctxid_to_string( 901 if (selinux_sid_to_string(
902 axi->osid, &ctx, &len)) { 902 axi->osid, &ctx, &len)) {
903 audit_log_format(ab, " osid=%u", 903 audit_log_format(ab, " osid=%u",
904 axi->osid); 904 axi->osid);
@@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1005 if (n->osid != 0) { 1005 if (n->osid != 0) {
1006 char *ctx = NULL; 1006 char *ctx = NULL;
1007 u32 len; 1007 u32 len;
1008 if (selinux_ctxid_to_string( 1008 if (selinux_sid_to_string(
1009 n->osid, &ctx, &len)) { 1009 n->osid, &ctx, &len)) {
1010 audit_log_format(ab, " osid=%u", n->osid); 1010 audit_log_format(ab, " osid=%u", n->osid);
1011 call_panic = 2; 1011 call_panic = 2;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f230f9ae01c2..32c96628463e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
21 21
22static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); 22static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
23 23
24/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
25 * Should always be manipulated under cpu_add_remove_lock
26 */
27static int cpu_hotplug_disabled;
28
24#ifdef CONFIG_HOTPLUG_CPU 29#ifdef CONFIG_HOTPLUG_CPU
25 30
26/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ 31/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
108 return 0; 113 return 0;
109} 114}
110 115
111int cpu_down(unsigned int cpu) 116/* Requires cpu_add_remove_lock to be held */
117static int _cpu_down(unsigned int cpu)
112{ 118{
113 int err; 119 int err;
114 struct task_struct *p; 120 struct task_struct *p;
115 cpumask_t old_allowed, tmp; 121 cpumask_t old_allowed, tmp;
116 122
117 mutex_lock(&cpu_add_remove_lock); 123 if (num_online_cpus() == 1)
118 if (num_online_cpus() == 1) { 124 return -EBUSY;
119 err = -EBUSY;
120 goto out;
121 }
122 125
123 if (!cpu_online(cpu)) { 126 if (!cpu_online(cpu))
124 err = -EINVAL; 127 return -EINVAL;
125 goto out;
126 }
127 128
128 err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, 129 err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
129 (void *)(long)cpu); 130 (void *)(long)cpu);
130 if (err == NOTIFY_BAD) { 131 if (err == NOTIFY_BAD) {
131 printk("%s: attempt to take down CPU %u failed\n", 132 printk("%s: attempt to take down CPU %u failed\n",
132 __FUNCTION__, cpu); 133 __FUNCTION__, cpu);
133 err = -EINVAL; 134 return -EINVAL;
134 goto out;
135 } 135 }
136 136
137 /* Ensure that we are not runnable on dying cpu */ 137 /* Ensure that we are not runnable on dying cpu */
@@ -179,22 +179,32 @@ out_thread:
179 err = kthread_stop(p); 179 err = kthread_stop(p);
180out_allowed: 180out_allowed:
181 set_cpus_allowed(current, old_allowed); 181 set_cpus_allowed(current, old_allowed);
182out: 182 return err;
183}
184
185int cpu_down(unsigned int cpu)
186{
187 int err = 0;
188
189 mutex_lock(&cpu_add_remove_lock);
190 if (cpu_hotplug_disabled)
191 err = -EBUSY;
192 else
193 err = _cpu_down(cpu);
194
183 mutex_unlock(&cpu_add_remove_lock); 195 mutex_unlock(&cpu_add_remove_lock);
184 return err; 196 return err;
185} 197}
186#endif /*CONFIG_HOTPLUG_CPU*/ 198#endif /*CONFIG_HOTPLUG_CPU*/
187 199
188int __devinit cpu_up(unsigned int cpu) 200/* Requires cpu_add_remove_lock to be held */
201static int __devinit _cpu_up(unsigned int cpu)
189{ 202{
190 int ret; 203 int ret;
191 void *hcpu = (void *)(long)cpu; 204 void *hcpu = (void *)(long)cpu;
192 205
193 mutex_lock(&cpu_add_remove_lock); 206 if (cpu_online(cpu) || !cpu_present(cpu))
194 if (cpu_online(cpu) || !cpu_present(cpu)) { 207 return -EINVAL;
195 ret = -EINVAL;
196 goto out;
197 }
198 208
199 ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); 209 ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
200 if (ret == NOTIFY_BAD) { 210 if (ret == NOTIFY_BAD) {
@@ -219,7 +229,95 @@ out_notify:
219 if (ret != 0) 229 if (ret != 0)
220 blocking_notifier_call_chain(&cpu_chain, 230 blocking_notifier_call_chain(&cpu_chain,
221 CPU_UP_CANCELED, hcpu); 231 CPU_UP_CANCELED, hcpu);
232
233 return ret;
234}
235
236int __devinit cpu_up(unsigned int cpu)
237{
238 int err = 0;
239
240 mutex_lock(&cpu_add_remove_lock);
241 if (cpu_hotplug_disabled)
242 err = -EBUSY;
243 else
244 err = _cpu_up(cpu);
245
246 mutex_unlock(&cpu_add_remove_lock);
247 return err;
248}
249
250#ifdef CONFIG_SUSPEND_SMP
251static cpumask_t frozen_cpus;
252
253int disable_nonboot_cpus(void)
254{
255 int cpu, first_cpu, error;
256
257 mutex_lock(&cpu_add_remove_lock);
258 first_cpu = first_cpu(cpu_present_map);
259 if (!cpu_online(first_cpu)) {
260 error = _cpu_up(first_cpu);
261 if (error) {
262 printk(KERN_ERR "Could not bring CPU%d up.\n",
263 first_cpu);
264 goto out;
265 }
266 }
267 error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
268 if (error) {
269 printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
270 goto out;
271 }
272 /* We take down all of the non-boot CPUs in one shot to avoid races
273 * with the userspace trying to use the CPU hotplug at the same time
274 */
275 cpus_clear(frozen_cpus);
276 printk("Disabling non-boot CPUs ...\n");
277 for_each_online_cpu(cpu) {
278 if (cpu == first_cpu)
279 continue;
280 error = _cpu_down(cpu);
281 if (!error) {
282 cpu_set(cpu, frozen_cpus);
283 printk("CPU%d is down\n", cpu);
284 } else {
285 printk(KERN_ERR "Error taking CPU%d down: %d\n",
286 cpu, error);
287 break;
288 }
289 }
290 if (!error) {
291 BUG_ON(num_online_cpus() > 1);
292 /* Make sure the CPUs won't be enabled by someone else */
293 cpu_hotplug_disabled = 1;
294 } else {
295 printk(KERN_ERR "Non-boot CPUs are not disabled");
296 }
222out: 297out:
223 mutex_unlock(&cpu_add_remove_lock); 298 mutex_unlock(&cpu_add_remove_lock);
224 return ret; 299 return error;
300}
301
302void enable_nonboot_cpus(void)
303{
304 int cpu, error;
305
306 /* Allow everyone to use the CPU hotplug again */
307 mutex_lock(&cpu_add_remove_lock);
308 cpu_hotplug_disabled = 0;
309 mutex_unlock(&cpu_add_remove_lock);
310
311 printk("Enabling non-boot CPUs ...\n");
312 for_each_cpu_mask(cpu, frozen_cpus) {
313 error = cpu_up(cpu);
314 if (!error) {
315 printk("CPU%d is up\n", cpu);
316 continue;
317 }
318 printk(KERN_WARNING "Error taking CPU%d up: %d\n",
319 cpu, error);
320 }
321 cpus_clear(frozen_cpus);
225} 322}
323#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ea6f0dc2fc5..cff41511269f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2245,7 +2245,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
2245 int i; 2245 int i;
2246 2246
2247 for (i = 0; zl->zones[i]; i++) { 2247 for (i = 0; zl->zones[i]; i++) {
2248 int nid = zl->zones[i]->zone_pgdat->node_id; 2248 int nid = zone_to_nid(zl->zones[i]);
2249 2249
2250 if (node_isset(nid, current->mems_allowed)) 2250 if (node_isset(nid, current->mems_allowed))
2251 return 1; 2251 return 1;
@@ -2316,9 +2316,9 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2316 const struct cpuset *cs; /* current cpuset ancestors */ 2316 const struct cpuset *cs; /* current cpuset ancestors */
2317 int allowed; /* is allocation in zone z allowed? */ 2317 int allowed; /* is allocation in zone z allowed? */
2318 2318
2319 if (in_interrupt()) 2319 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2320 return 1; 2320 return 1;
2321 node = z->zone_pgdat->node_id; 2321 node = zone_to_nid(z);
2322 might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); 2322 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2323 if (node_isset(node, current->mems_allowed)) 2323 if (node_isset(node, current->mems_allowed))
2324 return 1; 2324 return 1;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 48a53f68af96..4c6cdbaed661 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
154 return retval; 154 return retval;
155} 155}
156 156
157#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
157/** 158/**
158 * __do_IRQ - original all in one highlevel IRQ handler 159 * __do_IRQ - original all in one highlevel IRQ handler
159 * @irq: the interrupt number 160 * @irq: the interrupt number
@@ -253,6 +254,7 @@ out:
253 254
254 return 1; 255 return 1;
255} 256}
257#endif
256 258
257#ifdef CONFIG_TRACE_IRQFLAGS 259#ifdef CONFIG_TRACE_IRQFLAGS
258 260
diff --git a/kernel/module.c b/kernel/module.c
index 2a19cd47c046..b7fe6e840963 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1054,6 +1054,12 @@ static int mod_sysfs_setup(struct module *mod,
1054{ 1054{
1055 int err; 1055 int err;
1056 1056
1057 if (!module_subsys.kset.subsys) {
1058 printk(KERN_ERR "%s: module_subsys not initialized\n",
1059 mod->name);
1060 err = -EINVAL;
1061 goto out;
1062 }
1057 memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); 1063 memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
1058 err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name); 1064 err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name);
1059 if (err) 1065 if (err)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 619ecabf7c58..4b6e2f18e056 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -36,6 +36,17 @@ config PM_DEBUG
36 code. This is helpful when debugging and reporting various PM bugs, 36 code. This is helpful when debugging and reporting various PM bugs,
37 like suspend support. 37 like suspend support.
38 38
39config DISABLE_CONSOLE_SUSPEND
40 bool "Keep console(s) enabled during suspend/resume (DANGEROUS)"
41 depends on PM && PM_DEBUG
42 default n
43 ---help---
44 This option turns off the console suspend mechanism that prevents
45 debug messages from reaching the console during the suspend/resume
46 operations. This may be helpful when debugging device drivers'
47 suspend/resume routines, but may itself lead to problems, for example
48 if netconsole is used.
49
39config PM_TRACE 50config PM_TRACE
40 bool "Suspend/resume event tracing" 51 bool "Suspend/resume event tracing"
41 depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL 52 depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 8d0af3d37a4b..38725f526afc 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,6 +7,4 @@ obj-y := main.o process.o console.o
7obj-$(CONFIG_PM_LEGACY) += pm.o 7obj-$(CONFIG_PM_LEGACY) += pm.o
8obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o 8obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o swap.o user.o
9 9
10obj-$(CONFIG_SUSPEND_SMP) += smp.o
11
12obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o 10obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e13e74067845..7c7b9b65e365 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -18,6 +18,7 @@
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/pm.h> 20#include <linux/pm.h>
21#include <linux/cpu.h>
21 22
22#include "power.h" 23#include "power.h"
23 24
@@ -72,7 +73,10 @@ static int prepare_processes(void)
72 int error; 73 int error;
73 74
74 pm_prepare_console(); 75 pm_prepare_console();
75 disable_nonboot_cpus(); 76
77 error = disable_nonboot_cpus();
78 if (error)
79 goto enable_cpus;
76 80
77 if (freeze_processes()) { 81 if (freeze_processes()) {
78 error = -EBUSY; 82 error = -EBUSY;
@@ -84,6 +88,7 @@ static int prepare_processes(void)
84 return 0; 88 return 0;
85thaw: 89thaw:
86 thaw_processes(); 90 thaw_processes();
91enable_cpus:
87 enable_nonboot_cpus(); 92 enable_nonboot_cpus();
88 pm_restore_console(); 93 pm_restore_console();
89 return error; 94 return error;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6d295c776794..873228c71dab 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -16,6 +16,8 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/pm.h> 17#include <linux/pm.h>
18#include <linux/console.h> 18#include <linux/console.h>
19#include <linux/cpu.h>
20#include <linux/resume-trace.h>
19 21
20#include "power.h" 22#include "power.h"
21 23
@@ -51,7 +53,7 @@ void pm_set_ops(struct pm_ops * ops)
51 53
52static int suspend_prepare(suspend_state_t state) 54static int suspend_prepare(suspend_state_t state)
53{ 55{
54 int error = 0; 56 int error;
55 unsigned int free_pages; 57 unsigned int free_pages;
56 58
57 if (!pm_ops || !pm_ops->enter) 59 if (!pm_ops || !pm_ops->enter)
@@ -59,12 +61,9 @@ static int suspend_prepare(suspend_state_t state)
59 61
60 pm_prepare_console(); 62 pm_prepare_console();
61 63
62 disable_nonboot_cpus(); 64 error = disable_nonboot_cpus();
63 65 if (error)
64 if (num_online_cpus() != 1) {
65 error = -EPERM;
66 goto Enable_cpu; 66 goto Enable_cpu;
67 }
68 67
69 if (freeze_processes()) { 68 if (freeze_processes()) {
70 error = -EAGAIN; 69 error = -EAGAIN;
@@ -283,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n
283 282
284power_attr(state); 283power_attr(state);
285 284
285#ifdef CONFIG_PM_TRACE
286int pm_trace_enabled;
287
288static ssize_t pm_trace_show(struct subsystem * subsys, char * buf)
289{
290 return sprintf(buf, "%d\n", pm_trace_enabled);
291}
292
293static ssize_t
294pm_trace_store(struct subsystem * subsys, const char * buf, size_t n)
295{
296 int val;
297
298 if (sscanf(buf, "%d", &val) == 1) {
299 pm_trace_enabled = !!val;
300 return n;
301 }
302 return -EINVAL;
303}
304
305power_attr(pm_trace);
306
307static struct attribute * g[] = {
308 &state_attr.attr,
309 &pm_trace_attr.attr,
310 NULL,
311};
312#else
286static struct attribute * g[] = { 313static struct attribute * g[] = {
287 &state_attr.attr, 314 &state_attr.attr,
288 NULL, 315 NULL,
289}; 316};
317#endif /* CONFIG_PM_TRACE */
290 318
291static struct attribute_group attr_group = { 319static struct attribute_group attr_group = {
292 .attrs = g, 320 .attrs = g,
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 57a792982fb9..bfe999f7b272 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -38,8 +38,6 @@ extern struct subsystem power_subsys;
38/* References to section boundaries */ 38/* References to section boundaries */
39extern const void __nosave_begin, __nosave_end; 39extern const void __nosave_begin, __nosave_end;
40 40
41extern struct pbe *pagedir_nosave;
42
43/* Preferred image size in bytes (default 500 MB) */ 41/* Preferred image size in bytes (default 500 MB) */
44extern unsigned long image_size; 42extern unsigned long image_size;
45extern int in_suspend; 43extern int in_suspend;
@@ -50,21 +48,62 @@ extern asmlinkage int swsusp_arch_resume(void);
50 48
51extern unsigned int count_data_pages(void); 49extern unsigned int count_data_pages(void);
52 50
51/**
52 * Auxiliary structure used for reading the snapshot image data and
53 * metadata from and writing them to the list of page backup entries
54 * (PBEs) which is the main data structure of swsusp.
55 *
56 * Using struct snapshot_handle we can transfer the image, including its
57 * metadata, as a continuous sequence of bytes with the help of
58 * snapshot_read_next() and snapshot_write_next().
59 *
60 * The code that writes the image to a storage or transfers it to
61 * the user land is required to use snapshot_read_next() for this
62 * purpose and it should not make any assumptions regarding the internal
63 * structure of the image. Similarly, the code that reads the image from
64 * a storage or transfers it from the user land is required to use
65 * snapshot_write_next().
66 *
67 * This may allow us to change the internal structure of the image
68 * in the future with considerably less effort.
69 */
70
53struct snapshot_handle { 71struct snapshot_handle {
54 loff_t offset; 72 loff_t offset; /* number of the last byte ready for reading
55 unsigned int page; 73 * or writing in the sequence
56 unsigned int page_offset; 74 */
57 unsigned int prev; 75 unsigned int cur; /* number of the block of PAGE_SIZE bytes the
58 struct pbe *pbe, *last_pbe; 76 * next operation will refer to (ie. current)
59 void *buffer; 77 */
60 unsigned int buf_offset; 78 unsigned int cur_offset; /* offset with respect to the current
79 * block (for the next operation)
80 */
81 unsigned int prev; /* number of the block of PAGE_SIZE bytes that
82 * was the current one previously
83 */
84 void *buffer; /* address of the block to read from
85 * or write to
86 */
87 unsigned int buf_offset; /* location to read from or write to,
88 * given as a displacement from 'buffer'
89 */
90 int sync_read; /* Set to one to notify the caller of
91 * snapshot_write_next() that it may
92 * need to call wait_on_bio_chain()
93 */
61}; 94};
62 95
96/* This macro returns the address from/to which the caller of
97 * snapshot_read_next()/snapshot_write_next() is allowed to
98 * read/write data after the function returns
99 */
63#define data_of(handle) ((handle).buffer + (handle).buf_offset) 100#define data_of(handle) ((handle).buffer + (handle).buf_offset)
64 101
102extern unsigned int snapshot_additional_pages(struct zone *zone);
65extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); 103extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
66extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); 104extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
67int snapshot_image_loaded(struct snapshot_handle *handle); 105extern int snapshot_image_loaded(struct snapshot_handle *handle);
106extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
68 107
69#define SNAPSHOT_IOC_MAGIC '3' 108#define SNAPSHOT_IOC_MAGIC '3'
70#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1) 109#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1)
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
deleted file mode 100644
index 5957312b2d68..000000000000
--- a/kernel/power/smp.c
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2 * drivers/power/smp.c - Functions for stopping other CPUs.
3 *
4 * Copyright 2004 Pavel Machek <pavel@suse.cz>
5 * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
6 *
7 * This file is released under the GPLv2.
8 */
9
10#undef DEBUG
11
12#include <linux/smp_lock.h>
13#include <linux/interrupt.h>
14#include <linux/suspend.h>
15#include <linux/module.h>
16#include <linux/cpu.h>
17#include <asm/atomic.h>
18#include <asm/tlbflush.h>
19
20/* This is protected by pm_sem semaphore */
21static cpumask_t frozen_cpus;
22
23void disable_nonboot_cpus(void)
24{
25 int cpu, error;
26
27 error = 0;
28 cpus_clear(frozen_cpus);
29 printk("Freezing cpus ...\n");
30 for_each_online_cpu(cpu) {
31 if (cpu == 0)
32 continue;
33 error = cpu_down(cpu);
34 if (!error) {
35 cpu_set(cpu, frozen_cpus);
36 printk("CPU%d is down\n", cpu);
37 continue;
38 }
39 printk("Error taking cpu %d down: %d\n", cpu, error);
40 }
41 BUG_ON(raw_smp_processor_id() != 0);
42 if (error)
43 panic("cpus not sleeping");
44}
45
46void enable_nonboot_cpus(void)
47{
48 int cpu, error;
49
50 printk("Thawing cpus ...\n");
51 for_each_cpu_mask(cpu, frozen_cpus) {
52 error = cpu_up(cpu);
53 if (!error) {
54 printk("CPU%d is up\n", cpu);
55 continue;
56 }
57 printk("Error taking cpu %d up: %d\n", cpu, error);
58 panic("Not enough cpus");
59 }
60 cpus_clear(frozen_cpus);
61}
62
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 75d4886e648e..1b84313cbab5 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -34,10 +34,12 @@
34 34
35#include "power.h" 35#include "power.h"
36 36
37struct pbe *pagedir_nosave; 37/* List of PBEs used for creating and restoring the suspend image */
38struct pbe *restore_pblist;
39
38static unsigned int nr_copy_pages; 40static unsigned int nr_copy_pages;
39static unsigned int nr_meta_pages; 41static unsigned int nr_meta_pages;
40static unsigned long *buffer; 42static void *buffer;
41 43
42#ifdef CONFIG_HIGHMEM 44#ifdef CONFIG_HIGHMEM
43unsigned int count_highmem_pages(void) 45unsigned int count_highmem_pages(void)
@@ -156,240 +158,637 @@ static inline int save_highmem(void) {return 0;}
156static inline int restore_highmem(void) {return 0;} 158static inline int restore_highmem(void) {return 0;}
157#endif 159#endif
158 160
159static int pfn_is_nosave(unsigned long pfn) 161/**
162 * @safe_needed - on resume, for storing the PBE list and the image,
163 * we can only use memory pages that do not conflict with the pages
164 * used before suspend.
165 *
166 * The unsafe pages are marked with the PG_nosave_free flag
167 * and we count them using unsafe_pages
168 */
169
170#define PG_ANY 0
171#define PG_SAFE 1
172#define PG_UNSAFE_CLEAR 1
173#define PG_UNSAFE_KEEP 0
174
175static unsigned int allocated_unsafe_pages;
176
177static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
160{ 178{
161 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; 179 void *res;
162 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; 180
163 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); 181 res = (void *)get_zeroed_page(gfp_mask);
182 if (safe_needed)
183 while (res && PageNosaveFree(virt_to_page(res))) {
184 /* The page is unsafe, mark it for swsusp_free() */
185 SetPageNosave(virt_to_page(res));
186 allocated_unsafe_pages++;
187 res = (void *)get_zeroed_page(gfp_mask);
188 }
189 if (res) {
190 SetPageNosave(virt_to_page(res));
191 SetPageNosaveFree(virt_to_page(res));
192 }
193 return res;
194}
195
196unsigned long get_safe_page(gfp_t gfp_mask)
197{
198 return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE);
164} 199}
165 200
166/** 201/**
167 * saveable - Determine whether a page should be cloned or not. 202 * free_image_page - free page represented by @addr, allocated with
168 * @pfn: The page 203 * alloc_image_page (page flags set by it must be cleared)
169 *
170 * We save a page if it's Reserved, and not in the range of pages
171 * statically defined as 'unsaveable', or if it isn't reserved, and
172 * isn't part of a free chunk of pages.
173 */ 204 */
174 205
175static int saveable(struct zone *zone, unsigned long *zone_pfn) 206static inline void free_image_page(void *addr, int clear_nosave_free)
176{ 207{
177 unsigned long pfn = *zone_pfn + zone->zone_start_pfn; 208 ClearPageNosave(virt_to_page(addr));
178 struct page *page; 209 if (clear_nosave_free)
210 ClearPageNosaveFree(virt_to_page(addr));
211 free_page((unsigned long)addr);
212}
179 213
180 if (!pfn_valid(pfn)) 214/* struct linked_page is used to build chains of pages */
181 return 0;
182 215
183 page = pfn_to_page(pfn); 216#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *))
184 BUG_ON(PageReserved(page) && PageNosave(page));
185 if (PageNosave(page))
186 return 0;
187 if (PageReserved(page) && pfn_is_nosave(pfn))
188 return 0;
189 if (PageNosaveFree(page))
190 return 0;
191 217
192 return 1; 218struct linked_page {
193} 219 struct linked_page *next;
220 char data[LINKED_PAGE_DATA_SIZE];
221} __attribute__((packed));
194 222
195unsigned int count_data_pages(void) 223static inline void
224free_list_of_pages(struct linked_page *list, int clear_page_nosave)
196{ 225{
197 struct zone *zone; 226 while (list) {
198 unsigned long zone_pfn; 227 struct linked_page *lp = list->next;
199 unsigned int n = 0;
200 228
201 for_each_zone (zone) { 229 free_image_page(list, clear_page_nosave);
202 if (is_highmem(zone)) 230 list = lp;
203 continue;
204 mark_free_pages(zone);
205 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
206 n += saveable(zone, &zone_pfn);
207 } 231 }
208 return n;
209} 232}
210 233
211static void copy_data_pages(struct pbe *pblist) 234/**
235 * struct chain_allocator is used for allocating small objects out of
236 * a linked list of pages called 'the chain'.
237 *
238 * The chain grows each time when there is no room for a new object in
239 * the current page. The allocated objects cannot be freed individually.
240 * It is only possible to free them all at once, by freeing the entire
241 * chain.
242 *
243 * NOTE: The chain allocator may be inefficient if the allocated objects
244 * are not much smaller than PAGE_SIZE.
245 */
246
247struct chain_allocator {
248 struct linked_page *chain; /* the chain */
249 unsigned int used_space; /* total size of objects allocated out
250 * of the current page
251 */
252 gfp_t gfp_mask; /* mask for allocating pages */
253 int safe_needed; /* if set, only "safe" pages are allocated */
254};
255
256static void
257chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
212{ 258{
213 struct zone *zone; 259 ca->chain = NULL;
214 unsigned long zone_pfn; 260 ca->used_space = LINKED_PAGE_DATA_SIZE;
215 struct pbe *pbe, *p; 261 ca->gfp_mask = gfp_mask;
262 ca->safe_needed = safe_needed;
263}
216 264
217 pbe = pblist; 265static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
218 for_each_zone (zone) { 266{
219 if (is_highmem(zone)) 267 void *ret;
220 continue; 268
221 mark_free_pages(zone); 269 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
222 /* This is necessary for swsusp_free() */ 270 struct linked_page *lp;
223 for_each_pb_page (p, pblist) 271
224 SetPageNosaveFree(virt_to_page(p)); 272 lp = alloc_image_page(ca->gfp_mask, ca->safe_needed);
225 for_each_pbe (p, pblist) 273 if (!lp)
226 SetPageNosaveFree(virt_to_page(p->address)); 274 return NULL;
227 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { 275
228 if (saveable(zone, &zone_pfn)) { 276 lp->next = ca->chain;
229 struct page *page; 277 ca->chain = lp;
230 long *src, *dst; 278 ca->used_space = 0;
231 int n;
232
233 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
234 BUG_ON(!pbe);
235 pbe->orig_address = (unsigned long)page_address(page);
236 /* copy_page and memcpy are not usable for copying task structs. */
237 dst = (long *)pbe->address;
238 src = (long *)pbe->orig_address;
239 for (n = PAGE_SIZE / sizeof(long); n; n--)
240 *dst++ = *src++;
241 pbe = pbe->next;
242 }
243 }
244 } 279 }
245 BUG_ON(pbe); 280 ret = ca->chain->data + ca->used_space;
281 ca->used_space += size;
282 return ret;
246} 283}
247 284
285static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
286{
287 free_list_of_pages(ca->chain, clear_page_nosave);
288 memset(ca, 0, sizeof(struct chain_allocator));
289}
248 290
249/** 291/**
250 * free_pagedir - free pages allocated with alloc_pagedir() 292 * Data types related to memory bitmaps.
293 *
294 * Memory bitmap is a structure consiting of many linked lists of
295 * objects. The main list's elements are of type struct zone_bitmap
296 * and each of them corresonds to one zone. For each zone bitmap
297 * object there is a list of objects of type struct bm_block that
298 * represent each blocks of bit chunks in which information is
299 * stored.
300 *
301 * struct memory_bitmap contains a pointer to the main list of zone
302 * bitmap objects, a struct bm_position used for browsing the bitmap,
303 * and a pointer to the list of pages used for allocating all of the
304 * zone bitmap objects and bitmap block objects.
305 *
306 * NOTE: It has to be possible to lay out the bitmap in memory
307 * using only allocations of order 0. Additionally, the bitmap is
308 * designed to work with arbitrary number of zones (this is over the
309 * top for now, but let's avoid making unnecessary assumptions ;-).
310 *
311 * struct zone_bitmap contains a pointer to a list of bitmap block
312 * objects and a pointer to the bitmap block object that has been
313 * most recently used for setting bits. Additionally, it contains the
314 * pfns that correspond to the start and end of the represented zone.
315 *
316 * struct bm_block contains a pointer to the memory page in which
317 * information is stored (in the form of a block of bit chunks
318 * of type unsigned long each). It also contains the pfns that
319 * correspond to the start and end of the represented memory area and
320 * the number of bit chunks in the block.
321 *
322 * NOTE: Memory bitmaps are used for two types of operations only:
323 * "set a bit" and "find the next bit set". Moreover, the searching
324 * is always carried out after all of the "set a bit" operations
325 * on given bitmap.
251 */ 326 */
252 327
253static void free_pagedir(struct pbe *pblist, int clear_nosave_free) 328#define BM_END_OF_MAP (~0UL)
329
330#define BM_CHUNKS_PER_BLOCK (PAGE_SIZE / sizeof(long))
331#define BM_BITS_PER_CHUNK (sizeof(long) << 3)
332#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
333
334struct bm_block {
335 struct bm_block *next; /* next element of the list */
336 unsigned long start_pfn; /* pfn represented by the first bit */
337 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
338 unsigned int size; /* number of bit chunks */
339 unsigned long *data; /* chunks of bits representing pages */
340};
341
342struct zone_bitmap {
343 struct zone_bitmap *next; /* next element of the list */
344 unsigned long start_pfn; /* minimal pfn in this zone */
345 unsigned long end_pfn; /* maximal pfn in this zone plus 1 */
346 struct bm_block *bm_blocks; /* list of bitmap blocks */
347 struct bm_block *cur_block; /* recently used bitmap block */
348};
349
350/* strcut bm_position is used for browsing memory bitmaps */
351
352struct bm_position {
353 struct zone_bitmap *zone_bm;
354 struct bm_block *block;
355 int chunk;
356 int bit;
357};
358
359struct memory_bitmap {
360 struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */
361 struct linked_page *p_list; /* list of pages used to store zone
362 * bitmap objects and bitmap block
363 * objects
364 */
365 struct bm_position cur; /* most recently used bit position */
366};
367
368/* Functions that operate on memory bitmaps */
369
370static inline void memory_bm_reset_chunk(struct memory_bitmap *bm)
254{ 371{
255 struct pbe *pbe; 372 bm->cur.chunk = 0;
373 bm->cur.bit = -1;
374}
256 375
257 while (pblist) { 376static void memory_bm_position_reset(struct memory_bitmap *bm)
258 pbe = (pblist + PB_PAGE_SKIP)->next; 377{
259 ClearPageNosave(virt_to_page(pblist)); 378 struct zone_bitmap *zone_bm;
260 if (clear_nosave_free) 379
261 ClearPageNosaveFree(virt_to_page(pblist)); 380 zone_bm = bm->zone_bm_list;
262 free_page((unsigned long)pblist); 381 bm->cur.zone_bm = zone_bm;
263 pblist = pbe; 382 bm->cur.block = zone_bm->bm_blocks;
264 } 383 memory_bm_reset_chunk(bm);
265} 384}
266 385
386static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
387
267/** 388/**
268 * fill_pb_page - Create a list of PBEs on a given memory page 389 * create_bm_block_list - create a list of block bitmap objects
269 */ 390 */
270 391
271static inline void fill_pb_page(struct pbe *pbpage) 392static inline struct bm_block *
393create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca)
272{ 394{
273 struct pbe *p; 395 struct bm_block *bblist = NULL;
396
397 while (nr_blocks-- > 0) {
398 struct bm_block *bb;
274 399
275 p = pbpage; 400 bb = chain_alloc(ca, sizeof(struct bm_block));
276 pbpage += PB_PAGE_SKIP; 401 if (!bb)
277 do 402 return NULL;
278 p->next = p + 1; 403
279 while (++p < pbpage); 404 bb->next = bblist;
405 bblist = bb;
406 }
407 return bblist;
280} 408}
281 409
282/** 410/**
283 * create_pbe_list - Create a list of PBEs on top of a given chain 411 * create_zone_bm_list - create a list of zone bitmap objects
284 * of memory pages allocated with alloc_pagedir()
285 */ 412 */
286 413
287static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) 414static inline struct zone_bitmap *
415create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca)
288{ 416{
289 struct pbe *pbpage, *p; 417 struct zone_bitmap *zbmlist = NULL;
290 unsigned int num = PBES_PER_PAGE;
291 418
292 for_each_pb_page (pbpage, pblist) { 419 while (nr_zones-- > 0) {
293 if (num >= nr_pages) 420 struct zone_bitmap *zbm;
294 break; 421
422 zbm = chain_alloc(ca, sizeof(struct zone_bitmap));
423 if (!zbm)
424 return NULL;
425
426 zbm->next = zbmlist;
427 zbmlist = zbm;
428 }
429 return zbmlist;
430}
431
432/**
433 * memory_bm_create - allocate memory for a memory bitmap
434 */
435
436static int
437memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
438{
439 struct chain_allocator ca;
440 struct zone *zone;
441 struct zone_bitmap *zone_bm;
442 struct bm_block *bb;
443 unsigned int nr;
444
445 chain_init(&ca, gfp_mask, safe_needed);
295 446
296 fill_pb_page(pbpage); 447 /* Compute the number of zones */
297 num += PBES_PER_PAGE; 448 nr = 0;
449 for_each_zone (zone)
450 if (populated_zone(zone) && !is_highmem(zone))
451 nr++;
452
453 /* Allocate the list of zones bitmap objects */
454 zone_bm = create_zone_bm_list(nr, &ca);
455 bm->zone_bm_list = zone_bm;
456 if (!zone_bm) {
457 chain_free(&ca, PG_UNSAFE_CLEAR);
458 return -ENOMEM;
298 } 459 }
299 if (pbpage) { 460
300 for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++) 461 /* Initialize the zone bitmap objects */
301 p->next = p + 1; 462 for_each_zone (zone) {
302 p->next = NULL; 463 unsigned long pfn;
464
465 if (!populated_zone(zone) || is_highmem(zone))
466 continue;
467
468 zone_bm->start_pfn = zone->zone_start_pfn;
469 zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
470 /* Allocate the list of bitmap block objects */
471 nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
472 bb = create_bm_block_list(nr, &ca);
473 zone_bm->bm_blocks = bb;
474 zone_bm->cur_block = bb;
475 if (!bb)
476 goto Free;
477
478 nr = zone->spanned_pages;
479 pfn = zone->zone_start_pfn;
480 /* Initialize the bitmap block objects */
481 while (bb) {
482 unsigned long *ptr;
483
484 ptr = alloc_image_page(gfp_mask, safe_needed);
485 bb->data = ptr;
486 if (!ptr)
487 goto Free;
488
489 bb->start_pfn = pfn;
490 if (nr >= BM_BITS_PER_BLOCK) {
491 pfn += BM_BITS_PER_BLOCK;
492 bb->size = BM_CHUNKS_PER_BLOCK;
493 nr -= BM_BITS_PER_BLOCK;
494 } else {
495 /* This is executed only once in the loop */
496 pfn += nr;
497 bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK);
498 }
499 bb->end_pfn = pfn;
500 bb = bb->next;
501 }
502 zone_bm = zone_bm->next;
303 } 503 }
504 bm->p_list = ca.chain;
505 memory_bm_position_reset(bm);
506 return 0;
507
508Free:
509 bm->p_list = ca.chain;
510 memory_bm_free(bm, PG_UNSAFE_CLEAR);
511 return -ENOMEM;
304} 512}
305 513
306static unsigned int unsafe_pages; 514/**
515 * memory_bm_free - free memory occupied by the memory bitmap @bm
516 */
517
518static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
519{
520 struct zone_bitmap *zone_bm;
521
522 /* Free the list of bit blocks for each zone_bitmap object */
523 zone_bm = bm->zone_bm_list;
524 while (zone_bm) {
525 struct bm_block *bb;
526
527 bb = zone_bm->bm_blocks;
528 while (bb) {
529 if (bb->data)
530 free_image_page(bb->data, clear_nosave_free);
531 bb = bb->next;
532 }
533 zone_bm = zone_bm->next;
534 }
535 free_list_of_pages(bm->p_list, clear_nosave_free);
536 bm->zone_bm_list = NULL;
537}
307 538
308/** 539/**
309 * @safe_needed - on resume, for storing the PBE list and the image, 540 * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds
310 * we can only use memory pages that do not conflict with the pages 541 * to given pfn. The cur_zone_bm member of @bm and the cur_block member
311 * used before suspend. 542 * of @bm->cur_zone_bm are updated.
312 * 543 *
313 * The unsafe pages are marked with the PG_nosave_free flag 544 * If the bit cannot be set, the function returns -EINVAL .
314 * and we count them using unsafe_pages
315 */ 545 */
316 546
317static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) 547static int
548memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
318{ 549{
319 void *res; 550 struct zone_bitmap *zone_bm;
320 551 struct bm_block *bb;
321 res = (void *)get_zeroed_page(gfp_mask); 552
322 if (safe_needed) 553 /* Check if the pfn is from the current zone */
323 while (res && PageNosaveFree(virt_to_page(res))) { 554 zone_bm = bm->cur.zone_bm;
324 /* The page is unsafe, mark it for swsusp_free() */ 555 if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
325 SetPageNosave(virt_to_page(res)); 556 zone_bm = bm->zone_bm_list;
326 unsafe_pages++; 557 /* We don't assume that the zones are sorted by pfns */
327 res = (void *)get_zeroed_page(gfp_mask); 558 while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
559 zone_bm = zone_bm->next;
560 if (unlikely(!zone_bm))
561 return -EINVAL;
328 } 562 }
329 if (res) { 563 bm->cur.zone_bm = zone_bm;
330 SetPageNosave(virt_to_page(res));
331 SetPageNosaveFree(virt_to_page(res));
332 } 564 }
333 return res; 565 /* Check if the pfn corresponds to the current bitmap block */
566 bb = zone_bm->cur_block;
567 if (pfn < bb->start_pfn)
568 bb = zone_bm->bm_blocks;
569
570 while (pfn >= bb->end_pfn) {
571 bb = bb->next;
572 if (unlikely(!bb))
573 return -EINVAL;
574 }
575 zone_bm->cur_block = bb;
576 pfn -= bb->start_pfn;
577 set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK);
578 return 0;
334} 579}
335 580
336unsigned long get_safe_page(gfp_t gfp_mask) 581/* Two auxiliary functions for memory_bm_next_pfn */
582
583/* Find the first set bit in the given chunk, if there is one */
584
585static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p)
337{ 586{
338 return (unsigned long)alloc_image_page(gfp_mask, 1); 587 bit++;
588 while (bit < BM_BITS_PER_CHUNK) {
589 if (test_bit(bit, chunk_p))
590 return bit;
591
592 bit++;
593 }
594 return -1;
595}
596
597/* Find a chunk containing some bits set in given block of bits */
598
599static inline int next_chunk_in_block(int n, struct bm_block *bb)
600{
601 n++;
602 while (n < bb->size) {
603 if (bb->data[n])
604 return n;
605
606 n++;
607 }
608 return -1;
339} 609}
340 610
341/** 611/**
342 * alloc_pagedir - Allocate the page directory. 612 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit
343 * 613 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
344 * First, determine exactly how many pages we need and 614 * returned.
345 * allocate them.
346 * 615 *
347 * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE 616 * It is required to run memory_bm_position_reset() before the first call to
348 * struct pbe elements (pbes) and the last element in the page points 617 * this function.
349 * to the next page. 618 */
619
620static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
621{
622 struct zone_bitmap *zone_bm;
623 struct bm_block *bb;
624 int chunk;
625 int bit;
626
627 do {
628 bb = bm->cur.block;
629 do {
630 chunk = bm->cur.chunk;
631 bit = bm->cur.bit;
632 do {
633 bit = next_bit_in_chunk(bit, bb->data + chunk);
634 if (bit >= 0)
635 goto Return_pfn;
636
637 chunk = next_chunk_in_block(chunk, bb);
638 bit = -1;
639 } while (chunk >= 0);
640 bb = bb->next;
641 bm->cur.block = bb;
642 memory_bm_reset_chunk(bm);
643 } while (bb);
644 zone_bm = bm->cur.zone_bm->next;
645 if (zone_bm) {
646 bm->cur.zone_bm = zone_bm;
647 bm->cur.block = zone_bm->bm_blocks;
648 memory_bm_reset_chunk(bm);
649 }
650 } while (zone_bm);
651 memory_bm_position_reset(bm);
652 return BM_END_OF_MAP;
653
654Return_pfn:
655 bm->cur.chunk = chunk;
656 bm->cur.bit = bit;
657 return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
658}
659
660/**
661 * snapshot_additional_pages - estimate the number of additional pages
662 * be needed for setting up the suspend image data structures for given
663 * zone (usually the returned value is greater than the exact number)
664 */
665
666unsigned int snapshot_additional_pages(struct zone *zone)
667{
668 unsigned int res;
669
670 res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
671 res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
672 return res;
673}
674
675/**
676 * pfn_is_nosave - check if given pfn is in the 'nosave' section
677 */
678
679static inline int pfn_is_nosave(unsigned long pfn)
680{
681 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
682 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
683 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
684}
685
686/**
687 * saveable - Determine whether a page should be cloned or not.
688 * @pfn: The page
350 * 689 *
351 * On each page we set up a list of struct_pbe elements. 690 * We save a page if it isn't Nosave, and is not in the range of pages
691 * statically defined as 'unsaveable', and it
692 * isn't a part of a free chunk of pages.
352 */ 693 */
353 694
354static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, 695static struct page *saveable_page(unsigned long pfn)
355 int safe_needed)
356{ 696{
357 unsigned int num; 697 struct page *page;
358 struct pbe *pblist, *pbe; 698
699 if (!pfn_valid(pfn))
700 return NULL;
359 701
360 if (!nr_pages) 702 page = pfn_to_page(pfn);
703
704 if (PageNosave(page))
705 return NULL;
706 if (PageReserved(page) && pfn_is_nosave(pfn))
361 return NULL; 707 return NULL;
708 if (PageNosaveFree(page))
709 return NULL;
710
711 return page;
712}
713
714unsigned int count_data_pages(void)
715{
716 struct zone *zone;
717 unsigned long pfn, max_zone_pfn;
718 unsigned int n = 0;
362 719
363 pblist = alloc_image_page(gfp_mask, safe_needed); 720 for_each_zone (zone) {
364 /* FIXME: rewrite this ugly loop */ 721 if (is_highmem(zone))
365 for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; 722 continue;
366 pbe = pbe->next, num += PBES_PER_PAGE) { 723 mark_free_pages(zone);
367 pbe += PB_PAGE_SKIP; 724 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
368 pbe->next = alloc_image_page(gfp_mask, safe_needed); 725 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
726 n += !!saveable_page(pfn);
369 } 727 }
370 if (!pbe) { /* get_zeroed_page() failed */ 728 return n;
371 free_pagedir(pblist, 1); 729}
372 pblist = NULL; 730
373 } else 731static inline void copy_data_page(long *dst, long *src)
374 create_pbe_list(pblist, nr_pages); 732{
375 return pblist; 733 int n;
734
735 /* copy_page and memcpy are not usable for copying task structs. */
736 for (n = PAGE_SIZE / sizeof(long); n; n--)
737 *dst++ = *src++;
738}
739
740static void
741copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
742{
743 struct zone *zone;
744 unsigned long pfn;
745
746 for_each_zone (zone) {
747 unsigned long max_zone_pfn;
748
749 if (is_highmem(zone))
750 continue;
751
752 mark_free_pages(zone);
753 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
754 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
755 if (saveable_page(pfn))
756 memory_bm_set_bit(orig_bm, pfn);
757 }
758 memory_bm_position_reset(orig_bm);
759 memory_bm_position_reset(copy_bm);
760 do {
761 pfn = memory_bm_next_pfn(orig_bm);
762 if (likely(pfn != BM_END_OF_MAP)) {
763 struct page *page;
764 void *src;
765
766 page = pfn_to_page(pfn);
767 src = page_address(page);
768 page = pfn_to_page(memory_bm_next_pfn(copy_bm));
769 copy_data_page(page_address(page), src);
770 }
771 } while (pfn != BM_END_OF_MAP);
376} 772}
377 773
378/** 774/**
379 * Free pages we allocated for suspend. Suspend pages are alocated 775 * swsusp_free - free pages allocated for the suspend.
380 * before atomic copy, so we need to free them after resume. 776 *
777 * Suspend pages are alocated before the atomic copy is made, so we
778 * need to release them after the resume.
381 */ 779 */
382 780
383void swsusp_free(void) 781void swsusp_free(void)
384{ 782{
385 struct zone *zone; 783 struct zone *zone;
386 unsigned long zone_pfn; 784 unsigned long pfn, max_zone_pfn;
387 785
388 for_each_zone(zone) { 786 for_each_zone(zone) {
389 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) 787 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
390 if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { 788 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
391 struct page *page; 789 if (pfn_valid(pfn)) {
392 page = pfn_to_page(zone_pfn + zone->zone_start_pfn); 790 struct page *page = pfn_to_page(pfn);
791
393 if (PageNosave(page) && PageNosaveFree(page)) { 792 if (PageNosave(page) && PageNosaveFree(page)) {
394 ClearPageNosave(page); 793 ClearPageNosave(page);
395 ClearPageNosaveFree(page); 794 ClearPageNosaveFree(page);
@@ -399,7 +798,7 @@ void swsusp_free(void)
399 } 798 }
400 nr_copy_pages = 0; 799 nr_copy_pages = 0;
401 nr_meta_pages = 0; 800 nr_meta_pages = 0;
402 pagedir_nosave = NULL; 801 restore_pblist = NULL;
403 buffer = NULL; 802 buffer = NULL;
404} 803}
405 804
@@ -414,46 +813,57 @@ void swsusp_free(void)
414static int enough_free_mem(unsigned int nr_pages) 813static int enough_free_mem(unsigned int nr_pages)
415{ 814{
416 struct zone *zone; 815 struct zone *zone;
417 unsigned int n = 0; 816 unsigned int free = 0, meta = 0;
418 817
419 for_each_zone (zone) 818 for_each_zone (zone)
420 if (!is_highmem(zone)) 819 if (!is_highmem(zone)) {
421 n += zone->free_pages; 820 free += zone->free_pages;
422 pr_debug("swsusp: available memory: %u pages\n", n); 821 meta += snapshot_additional_pages(zone);
423 return n > (nr_pages + PAGES_FOR_IO + 822 }
424 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
425}
426 823
427static int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) 824 pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n",
428{ 825 nr_pages, PAGES_FOR_IO, meta, free);
429 struct pbe *p;
430 826
431 for_each_pbe (p, pblist) { 827 return free > nr_pages + PAGES_FOR_IO + meta;
432 p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed);
433 if (!p->address)
434 return -ENOMEM;
435 }
436 return 0;
437} 828}
438 829
439static struct pbe *swsusp_alloc(unsigned int nr_pages) 830static int
831swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
832 unsigned int nr_pages)
440{ 833{
441 struct pbe *pblist; 834 int error;
442 835
443 if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) { 836 error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
444 printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); 837 if (error)
445 return NULL; 838 goto Free;
446 }
447 839
448 if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { 840 error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
449 printk(KERN_ERR "suspend: Allocating image pages failed.\n"); 841 if (error)
450 swsusp_free(); 842 goto Free;
451 return NULL; 843
844 while (nr_pages-- > 0) {
845 struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD);
846 if (!page)
847 goto Free;
848
849 SetPageNosave(page);
850 SetPageNosaveFree(page);
851 memory_bm_set_bit(copy_bm, page_to_pfn(page));
452 } 852 }
853 return 0;
453 854
454 return pblist; 855Free:
856 swsusp_free();
857 return -ENOMEM;
455} 858}
456 859
860/* Memory bitmap used for marking saveable pages */
861static struct memory_bitmap orig_bm;
862/* Memory bitmap used for marking allocated pages that will contain the copies
863 * of saveable pages
864 */
865static struct memory_bitmap copy_bm;
866
457asmlinkage int swsusp_save(void) 867asmlinkage int swsusp_save(void)
458{ 868{
459 unsigned int nr_pages; 869 unsigned int nr_pages;
@@ -464,25 +874,19 @@ asmlinkage int swsusp_save(void)
464 nr_pages = count_data_pages(); 874 nr_pages = count_data_pages();
465 printk("swsusp: Need to copy %u pages\n", nr_pages); 875 printk("swsusp: Need to copy %u pages\n", nr_pages);
466 876
467 pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
468 nr_pages,
469 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
470 PAGES_FOR_IO, nr_free_pages());
471
472 if (!enough_free_mem(nr_pages)) { 877 if (!enough_free_mem(nr_pages)) {
473 printk(KERN_ERR "swsusp: Not enough free memory\n"); 878 printk(KERN_ERR "swsusp: Not enough free memory\n");
474 return -ENOMEM; 879 return -ENOMEM;
475 } 880 }
476 881
477 pagedir_nosave = swsusp_alloc(nr_pages); 882 if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages))
478 if (!pagedir_nosave)
479 return -ENOMEM; 883 return -ENOMEM;
480 884
481 /* During allocating of suspend pagedir, new cold pages may appear. 885 /* During allocating of suspend pagedir, new cold pages may appear.
482 * Kill them. 886 * Kill them.
483 */ 887 */
484 drain_local_pages(); 888 drain_local_pages();
485 copy_data_pages(pagedir_nosave); 889 copy_data_pages(&copy_bm, &orig_bm);
486 890
487 /* 891 /*
488 * End of critical section. From now on, we can write to memory, 892 * End of critical section. From now on, we can write to memory,
@@ -511,22 +915,20 @@ static void init_header(struct swsusp_info *info)
511} 915}
512 916
513/** 917/**
514 * pack_orig_addresses - the .orig_address fields of the PBEs from the 918 * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
515 * list starting at @pbe are stored in the array @buf[] (1 page) 919 * are stored in the array @buf[] (1 page at a time)
516 */ 920 */
517 921
518static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pbe) 922static inline void
923pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
519{ 924{
520 int j; 925 int j;
521 926
522 for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { 927 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
523 buf[j] = pbe->orig_address; 928 buf[j] = memory_bm_next_pfn(bm);
524 pbe = pbe->next; 929 if (unlikely(buf[j] == BM_END_OF_MAP))
930 break;
525 } 931 }
526 if (!pbe)
527 for (; j < PAGE_SIZE / sizeof(long); j++)
528 buf[j] = 0;
529 return pbe;
530} 932}
531 933
532/** 934/**
@@ -553,37 +955,39 @@ static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pb
553 955
554int snapshot_read_next(struct snapshot_handle *handle, size_t count) 956int snapshot_read_next(struct snapshot_handle *handle, size_t count)
555{ 957{
556 if (handle->page > nr_meta_pages + nr_copy_pages) 958 if (handle->cur > nr_meta_pages + nr_copy_pages)
557 return 0; 959 return 0;
960
558 if (!buffer) { 961 if (!buffer) {
559 /* This makes the buffer be freed by swsusp_free() */ 962 /* This makes the buffer be freed by swsusp_free() */
560 buffer = alloc_image_page(GFP_ATOMIC, 0); 963 buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
561 if (!buffer) 964 if (!buffer)
562 return -ENOMEM; 965 return -ENOMEM;
563 } 966 }
564 if (!handle->offset) { 967 if (!handle->offset) {
565 init_header((struct swsusp_info *)buffer); 968 init_header((struct swsusp_info *)buffer);
566 handle->buffer = buffer; 969 handle->buffer = buffer;
567 handle->pbe = pagedir_nosave; 970 memory_bm_position_reset(&orig_bm);
971 memory_bm_position_reset(&copy_bm);
568 } 972 }
569 if (handle->prev < handle->page) { 973 if (handle->prev < handle->cur) {
570 if (handle->page <= nr_meta_pages) { 974 if (handle->cur <= nr_meta_pages) {
571 handle->pbe = pack_orig_addresses(buffer, handle->pbe); 975 memset(buffer, 0, PAGE_SIZE);
572 if (!handle->pbe) 976 pack_pfns(buffer, &orig_bm);
573 handle->pbe = pagedir_nosave;
574 } else { 977 } else {
575 handle->buffer = (void *)handle->pbe->address; 978 unsigned long pfn = memory_bm_next_pfn(&copy_bm);
576 handle->pbe = handle->pbe->next; 979
980 handle->buffer = page_address(pfn_to_page(pfn));
577 } 981 }
578 handle->prev = handle->page; 982 handle->prev = handle->cur;
579 } 983 }
580 handle->buf_offset = handle->page_offset; 984 handle->buf_offset = handle->cur_offset;
581 if (handle->page_offset + count >= PAGE_SIZE) { 985 if (handle->cur_offset + count >= PAGE_SIZE) {
582 count = PAGE_SIZE - handle->page_offset; 986 count = PAGE_SIZE - handle->cur_offset;
583 handle->page_offset = 0; 987 handle->cur_offset = 0;
584 handle->page++; 988 handle->cur++;
585 } else { 989 } else {
586 handle->page_offset += count; 990 handle->cur_offset += count;
587 } 991 }
588 handle->offset += count; 992 handle->offset += count;
589 return count; 993 return count;
@@ -595,47 +999,50 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
595 * had been used before suspend 999 * had been used before suspend
596 */ 1000 */
597 1001
598static int mark_unsafe_pages(struct pbe *pblist) 1002static int mark_unsafe_pages(struct memory_bitmap *bm)
599{ 1003{
600 struct zone *zone; 1004 struct zone *zone;
601 unsigned long zone_pfn; 1005 unsigned long pfn, max_zone_pfn;
602 struct pbe *p;
603
604 if (!pblist) /* a sanity check */
605 return -EINVAL;
606 1006
607 /* Clear page flags */ 1007 /* Clear page flags */
608 for_each_zone (zone) { 1008 for_each_zone (zone) {
609 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) 1009 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
610 if (pfn_valid(zone_pfn + zone->zone_start_pfn)) 1010 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
611 ClearPageNosaveFree(pfn_to_page(zone_pfn + 1011 if (pfn_valid(pfn))
612 zone->zone_start_pfn)); 1012 ClearPageNosaveFree(pfn_to_page(pfn));
613 } 1013 }
614 1014
615 /* Mark orig addresses */ 1015 /* Mark pages that correspond to the "original" pfns as "unsafe" */
616 for_each_pbe (p, pblist) { 1016 memory_bm_position_reset(bm);
617 if (virt_addr_valid(p->orig_address)) 1017 do {
618 SetPageNosaveFree(virt_to_page(p->orig_address)); 1018 pfn = memory_bm_next_pfn(bm);
619 else 1019 if (likely(pfn != BM_END_OF_MAP)) {
620 return -EFAULT; 1020 if (likely(pfn_valid(pfn)))
621 } 1021 SetPageNosaveFree(pfn_to_page(pfn));
1022 else
1023 return -EFAULT;
1024 }
1025 } while (pfn != BM_END_OF_MAP);
622 1026
623 unsafe_pages = 0; 1027 allocated_unsafe_pages = 0;
624 1028
625 return 0; 1029 return 0;
626} 1030}
627 1031
628static void copy_page_backup_list(struct pbe *dst, struct pbe *src) 1032static void
1033duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
629{ 1034{
630 /* We assume both lists contain the same number of elements */ 1035 unsigned long pfn;
631 while (src) { 1036
632 dst->orig_address = src->orig_address; 1037 memory_bm_position_reset(src);
633 dst = dst->next; 1038 pfn = memory_bm_next_pfn(src);
634 src = src->next; 1039 while (pfn != BM_END_OF_MAP) {
1040 memory_bm_set_bit(dst, pfn);
1041 pfn = memory_bm_next_pfn(src);
635 } 1042 }
636} 1043}
637 1044
638static int check_header(struct swsusp_info *info) 1045static inline int check_header(struct swsusp_info *info)
639{ 1046{
640 char *reason = NULL; 1047 char *reason = NULL;
641 1048
@@ -662,19 +1069,14 @@ static int check_header(struct swsusp_info *info)
662 * load header - check the image header and copy data from it 1069 * load header - check the image header and copy data from it
663 */ 1070 */
664 1071
665static int load_header(struct snapshot_handle *handle, 1072static int
666 struct swsusp_info *info) 1073load_header(struct swsusp_info *info)
667{ 1074{
668 int error; 1075 int error;
669 struct pbe *pblist;
670 1076
1077 restore_pblist = NULL;
671 error = check_header(info); 1078 error = check_header(info);
672 if (!error) { 1079 if (!error) {
673 pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, 0);
674 if (!pblist)
675 return -ENOMEM;
676 pagedir_nosave = pblist;
677 handle->pbe = pblist;
678 nr_copy_pages = info->image_pages; 1080 nr_copy_pages = info->image_pages;
679 nr_meta_pages = info->pages - info->image_pages - 1; 1081 nr_meta_pages = info->pages - info->image_pages - 1;
680 } 1082 }
@@ -682,113 +1084,137 @@ static int load_header(struct snapshot_handle *handle,
682} 1084}
683 1085
684/** 1086/**
685 * unpack_orig_addresses - copy the elements of @buf[] (1 page) to 1087 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
686 * the PBEs in the list starting at @pbe 1088 * the corresponding bit in the memory bitmap @bm
687 */ 1089 */
688 1090
689static inline struct pbe *unpack_orig_addresses(unsigned long *buf, 1091static inline void
690 struct pbe *pbe) 1092unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
691{ 1093{
692 int j; 1094 int j;
693 1095
694 for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { 1096 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
695 pbe->orig_address = buf[j]; 1097 if (unlikely(buf[j] == BM_END_OF_MAP))
696 pbe = pbe->next; 1098 break;
1099
1100 memory_bm_set_bit(bm, buf[j]);
697 } 1101 }
698 return pbe;
699} 1102}
700 1103
701/** 1104/**
702 * prepare_image - use metadata contained in the PBE list 1105 * prepare_image - use the memory bitmap @bm to mark the pages that will
703 * pointed to by pagedir_nosave to mark the pages that will 1106 * be overwritten in the process of restoring the system memory state
704 * be overwritten in the process of restoring the system 1107 * from the suspend image ("unsafe" pages) and allocate memory for the
705 * memory state from the image ("unsafe" pages) and allocate 1108 * image.
706 * memory for the image
707 * 1109 *
708 * The idea is to allocate the PBE list first and then 1110 * The idea is to allocate a new memory bitmap first and then allocate
709 * allocate as many pages as it's needed for the image data, 1111 * as many pages as needed for the image data, but not to assign these
710 * but not to assign these pages to the PBEs initially. 1112 * pages to specific tasks initially. Instead, we just mark them as
711 * Instead, we just mark them as allocated and create a list 1113 * allocated and create a list of "safe" pages that will be used later.
712 * of "safe" which will be used later
713 */ 1114 */
714 1115
715struct safe_page { 1116#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
716 struct safe_page *next;
717 char padding[PAGE_SIZE - sizeof(void *)];
718};
719 1117
720static struct safe_page *safe_pages; 1118static struct linked_page *safe_pages_list;
721 1119
722static int prepare_image(struct snapshot_handle *handle) 1120static int
1121prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
723{ 1122{
724 int error = 0; 1123 unsigned int nr_pages;
725 unsigned int nr_pages = nr_copy_pages; 1124 struct linked_page *sp_list, *lp;
726 struct pbe *p, *pblist = NULL; 1125 int error;
727 1126
728 p = pagedir_nosave; 1127 error = mark_unsafe_pages(bm);
729 error = mark_unsafe_pages(p); 1128 if (error)
730 if (!error) { 1129 goto Free;
731 pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); 1130
732 if (pblist) 1131 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
733 copy_page_backup_list(pblist, p); 1132 if (error)
734 free_pagedir(p, 0); 1133 goto Free;
735 if (!pblist) 1134
1135 duplicate_memory_bitmap(new_bm, bm);
1136 memory_bm_free(bm, PG_UNSAFE_KEEP);
1137 /* Reserve some safe pages for potential later use.
1138 *
1139 * NOTE: This way we make sure there will be enough safe pages for the
1140 * chain_alloc() in get_buffer(). It is a bit wasteful, but
1141 * nr_copy_pages cannot be greater than 50% of the memory anyway.
1142 */
1143 sp_list = NULL;
1144 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
1145 nr_pages = nr_copy_pages - allocated_unsafe_pages;
1146 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
1147 while (nr_pages > 0) {
1148 lp = alloc_image_page(GFP_ATOMIC, PG_SAFE);
1149 if (!lp) {
736 error = -ENOMEM; 1150 error = -ENOMEM;
1151 goto Free;
1152 }
1153 lp->next = sp_list;
1154 sp_list = lp;
1155 nr_pages--;
737 } 1156 }
738 safe_pages = NULL; 1157 /* Preallocate memory for the image */
739 if (!error && nr_pages > unsafe_pages) { 1158 safe_pages_list = NULL;
740 nr_pages -= unsafe_pages; 1159 nr_pages = nr_copy_pages - allocated_unsafe_pages;
741 while (nr_pages--) { 1160 while (nr_pages > 0) {
742 struct safe_page *ptr; 1161 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
743 1162 if (!lp) {
744 ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC); 1163 error = -ENOMEM;
745 if (!ptr) { 1164 goto Free;
746 error = -ENOMEM; 1165 }
747 break; 1166 if (!PageNosaveFree(virt_to_page(lp))) {
748 } 1167 /* The page is "safe", add it to the list */
749 if (!PageNosaveFree(virt_to_page(ptr))) { 1168 lp->next = safe_pages_list;
750 /* The page is "safe", add it to the list */ 1169 safe_pages_list = lp;
751 ptr->next = safe_pages;
752 safe_pages = ptr;
753 }
754 /* Mark the page as allocated */
755 SetPageNosave(virt_to_page(ptr));
756 SetPageNosaveFree(virt_to_page(ptr));
757 } 1170 }
1171 /* Mark the page as allocated */
1172 SetPageNosave(virt_to_page(lp));
1173 SetPageNosaveFree(virt_to_page(lp));
1174 nr_pages--;
758 } 1175 }
759 if (!error) { 1176 /* Free the reserved safe pages so that chain_alloc() can use them */
760 pagedir_nosave = pblist; 1177 while (sp_list) {
761 } else { 1178 lp = sp_list->next;
762 handle->pbe = NULL; 1179 free_image_page(sp_list, PG_UNSAFE_CLEAR);
763 swsusp_free(); 1180 sp_list = lp;
764 } 1181 }
1182 return 0;
1183
1184Free:
1185 swsusp_free();
765 return error; 1186 return error;
766} 1187}
767 1188
768static void *get_buffer(struct snapshot_handle *handle) 1189/**
1190 * get_buffer - compute the address that snapshot_write_next() should
1191 * set for its caller to write to.
1192 */
1193
1194static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
769{ 1195{
770 struct pbe *pbe = handle->pbe, *last = handle->last_pbe; 1196 struct pbe *pbe;
771 struct page *page = virt_to_page(pbe->orig_address); 1197 struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
772 1198
773 if (PageNosave(page) && PageNosaveFree(page)) { 1199 if (PageNosave(page) && PageNosaveFree(page))
774 /* 1200 /* We have allocated the "original" page frame and we can
775 * We have allocated the "original" page frame and we can 1201 * use it directly to store the loaded page.
776 * use it directly to store the read page
777 */ 1202 */
778 pbe->address = 0; 1203 return page_address(page);
779 if (last && last->next) 1204
780 last->next = NULL; 1205 /* The "original" page frame has not been allocated and we have to
781 return (void *)pbe->orig_address; 1206 * use a "safe" page frame to store the loaded page.
782 }
783 /*
784 * The "original" page frame has not been allocated and we have to
785 * use a "safe" page frame to store the read page
786 */ 1207 */
787 pbe->address = (unsigned long)safe_pages; 1208 pbe = chain_alloc(ca, sizeof(struct pbe));
788 safe_pages = safe_pages->next; 1209 if (!pbe) {
789 if (last) 1210 swsusp_free();
790 last->next = pbe; 1211 return NULL;
791 handle->last_pbe = pbe; 1212 }
1213 pbe->orig_address = (unsigned long)page_address(page);
1214 pbe->address = (unsigned long)safe_pages_list;
1215 safe_pages_list = safe_pages_list->next;
1216 pbe->next = restore_pblist;
1217 restore_pblist = pbe;
792 return (void *)pbe->address; 1218 return (void *)pbe->address;
793} 1219}
794 1220
@@ -816,46 +1242,60 @@ static void *get_buffer(struct snapshot_handle *handle)
816 1242
817int snapshot_write_next(struct snapshot_handle *handle, size_t count) 1243int snapshot_write_next(struct snapshot_handle *handle, size_t count)
818{ 1244{
1245 static struct chain_allocator ca;
819 int error = 0; 1246 int error = 0;
820 1247
821 if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages) 1248 /* Check if we have already loaded the entire image */
1249 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
822 return 0; 1250 return 0;
1251
823 if (!buffer) { 1252 if (!buffer) {
824 /* This makes the buffer be freed by swsusp_free() */ 1253 /* This makes the buffer be freed by swsusp_free() */
825 buffer = alloc_image_page(GFP_ATOMIC, 0); 1254 buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
826 if (!buffer) 1255 if (!buffer)
827 return -ENOMEM; 1256 return -ENOMEM;
828 } 1257 }
829 if (!handle->offset) 1258 if (!handle->offset)
830 handle->buffer = buffer; 1259 handle->buffer = buffer;
831 if (handle->prev < handle->page) { 1260 handle->sync_read = 1;
832 if (!handle->prev) { 1261 if (handle->prev < handle->cur) {
833 error = load_header(handle, (struct swsusp_info *)buffer); 1262 if (handle->prev == 0) {
1263 error = load_header(buffer);
834 if (error) 1264 if (error)
835 return error; 1265 return error;
1266
1267 error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
1268 if (error)
1269 return error;
1270
836 } else if (handle->prev <= nr_meta_pages) { 1271 } else if (handle->prev <= nr_meta_pages) {
837 handle->pbe = unpack_orig_addresses(buffer, handle->pbe); 1272 unpack_orig_pfns(buffer, &copy_bm);
838 if (!handle->pbe) { 1273 if (handle->prev == nr_meta_pages) {
839 error = prepare_image(handle); 1274 error = prepare_image(&orig_bm, &copy_bm);
840 if (error) 1275 if (error)
841 return error; 1276 return error;
842 handle->pbe = pagedir_nosave; 1277
843 handle->last_pbe = NULL; 1278 chain_init(&ca, GFP_ATOMIC, PG_SAFE);
844 handle->buffer = get_buffer(handle); 1279 memory_bm_position_reset(&orig_bm);
1280 restore_pblist = NULL;
1281 handle->buffer = get_buffer(&orig_bm, &ca);
1282 handle->sync_read = 0;
1283 if (!handle->buffer)
1284 return -ENOMEM;
845 } 1285 }
846 } else { 1286 } else {
847 handle->pbe = handle->pbe->next; 1287 handle->buffer = get_buffer(&orig_bm, &ca);
848 handle->buffer = get_buffer(handle); 1288 handle->sync_read = 0;
849 } 1289 }
850 handle->prev = handle->page; 1290 handle->prev = handle->cur;
851 } 1291 }
852 handle->buf_offset = handle->page_offset; 1292 handle->buf_offset = handle->cur_offset;
853 if (handle->page_offset + count >= PAGE_SIZE) { 1293 if (handle->cur_offset + count >= PAGE_SIZE) {
854 count = PAGE_SIZE - handle->page_offset; 1294 count = PAGE_SIZE - handle->cur_offset;
855 handle->page_offset = 0; 1295 handle->cur_offset = 0;
856 handle->page++; 1296 handle->cur++;
857 } else { 1297 } else {
858 handle->page_offset += count; 1298 handle->cur_offset += count;
859 } 1299 }
860 handle->offset += count; 1300 handle->offset += count;
861 return count; 1301 return count;
@@ -863,6 +1303,13 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
863 1303
864int snapshot_image_loaded(struct snapshot_handle *handle) 1304int snapshot_image_loaded(struct snapshot_handle *handle)
865{ 1305{
866 return !(!handle->pbe || handle->pbe->next || !nr_copy_pages || 1306 return !(!nr_copy_pages ||
867 handle->page <= nr_meta_pages + nr_copy_pages); 1307 handle->cur <= nr_meta_pages + nr_copy_pages);
1308}
1309
1310void snapshot_free_unused_memory(struct snapshot_handle *handle)
1311{
1312 /* Free only if we have loaded the image entirely */
1313 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
1314 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
868} 1315}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f1dd146bd64d..9b2ee5344dee 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -22,6 +22,7 @@
22#include <linux/device.h> 22#include <linux/device.h>
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/bio.h> 24#include <linux/bio.h>
25#include <linux/blkdev.h>
25#include <linux/swap.h> 26#include <linux/swap.h>
26#include <linux/swapops.h> 27#include <linux/swapops.h>
27#include <linux/pm.h> 28#include <linux/pm.h>
@@ -49,18 +50,16 @@ static int mark_swapfiles(swp_entry_t start)
49{ 50{
50 int error; 51 int error;
51 52
52 rw_swap_page_sync(READ, 53 rw_swap_page_sync(READ, swp_entry(root_swap, 0),
53 swp_entry(root_swap, 0), 54 virt_to_page((unsigned long)&swsusp_header), NULL);
54 virt_to_page((unsigned long)&swsusp_header));
55 if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || 55 if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
56 !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { 56 !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
57 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); 57 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
58 memcpy(swsusp_header.sig,SWSUSP_SIG, 10); 58 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
59 swsusp_header.image = start; 59 swsusp_header.image = start;
60 error = rw_swap_page_sync(WRITE, 60 error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0),
61 swp_entry(root_swap, 0), 61 virt_to_page((unsigned long)&swsusp_header),
62 virt_to_page((unsigned long) 62 NULL);
63 &swsusp_header));
64 } else { 63 } else {
65 pr_debug("swsusp: Partition is not swap space.\n"); 64 pr_debug("swsusp: Partition is not swap space.\n");
66 error = -ENODEV; 65 error = -ENODEV;
@@ -88,16 +87,37 @@ static int swsusp_swap_check(void) /* This is called before saving image */
88 * write_page - Write one page to given swap location. 87 * write_page - Write one page to given swap location.
89 * @buf: Address we're writing. 88 * @buf: Address we're writing.
90 * @offset: Offset of the swap page we're writing to. 89 * @offset: Offset of the swap page we're writing to.
90 * @bio_chain: Link the next write BIO here
91 */ 91 */
92 92
93static int write_page(void *buf, unsigned long offset) 93static int write_page(void *buf, unsigned long offset, struct bio **bio_chain)
94{ 94{
95 swp_entry_t entry; 95 swp_entry_t entry;
96 int error = -ENOSPC; 96 int error = -ENOSPC;
97 97
98 if (offset) { 98 if (offset) {
99 struct page *page = virt_to_page(buf);
100
101 if (bio_chain) {
102 /*
103 * Whether or not we successfully allocated a copy page,
104 * we take a ref on the page here. It gets undone in
105 * wait_on_bio_chain().
106 */
107 struct page *page_copy;
108 page_copy = alloc_page(GFP_ATOMIC);
109 if (page_copy == NULL) {
110 WARN_ON_ONCE(1);
111 bio_chain = NULL; /* Go synchronous */
112 get_page(page);
113 } else {
114 memcpy(page_address(page_copy),
115 page_address(page), PAGE_SIZE);
116 page = page_copy;
117 }
118 }
99 entry = swp_entry(root_swap, offset); 119 entry = swp_entry(root_swap, offset);
100 error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf)); 120 error = rw_swap_page_sync(WRITE, entry, page, bio_chain);
101 } 121 }
102 return error; 122 return error;
103} 123}
@@ -146,6 +166,26 @@ static void release_swap_writer(struct swap_map_handle *handle)
146 handle->bitmap = NULL; 166 handle->bitmap = NULL;
147} 167}
148 168
169static void show_speed(struct timeval *start, struct timeval *stop,
170 unsigned nr_pages, char *msg)
171{
172 s64 elapsed_centisecs64;
173 int centisecs;
174 int k;
175 int kps;
176
177 elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
178 do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
179 centisecs = elapsed_centisecs64;
180 if (centisecs == 0)
181 centisecs = 1; /* avoid div-by-zero */
182 k = nr_pages * (PAGE_SIZE / 1024);
183 kps = (k * 100) / centisecs;
184 printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
185 centisecs / 100, centisecs % 100,
186 kps / 1000, (kps % 1000) / 10);
187}
188
149static int get_swap_writer(struct swap_map_handle *handle) 189static int get_swap_writer(struct swap_map_handle *handle)
150{ 190{
151 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); 191 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
@@ -165,37 +205,70 @@ static int get_swap_writer(struct swap_map_handle *handle)
165 return 0; 205 return 0;
166} 206}
167 207
168static int swap_write_page(struct swap_map_handle *handle, void *buf) 208static int wait_on_bio_chain(struct bio **bio_chain)
169{ 209{
170 int error; 210 struct bio *bio;
211 struct bio *next_bio;
212 int ret = 0;
213
214 if (bio_chain == NULL)
215 return 0;
216
217 bio = *bio_chain;
218 if (bio == NULL)
219 return 0;
220 while (bio) {
221 struct page *page;
222
223 next_bio = bio->bi_private;
224 page = bio->bi_io_vec[0].bv_page;
225 wait_on_page_locked(page);
226 if (!PageUptodate(page) || PageError(page))
227 ret = -EIO;
228 put_page(page);
229 bio_put(bio);
230 bio = next_bio;
231 }
232 *bio_chain = NULL;
233 return ret;
234}
235
236static int swap_write_page(struct swap_map_handle *handle, void *buf,
237 struct bio **bio_chain)
238{
239 int error = 0;
171 unsigned long offset; 240 unsigned long offset;
172 241
173 if (!handle->cur) 242 if (!handle->cur)
174 return -EINVAL; 243 return -EINVAL;
175 offset = alloc_swap_page(root_swap, handle->bitmap); 244 offset = alloc_swap_page(root_swap, handle->bitmap);
176 error = write_page(buf, offset); 245 error = write_page(buf, offset, bio_chain);
177 if (error) 246 if (error)
178 return error; 247 return error;
179 handle->cur->entries[handle->k++] = offset; 248 handle->cur->entries[handle->k++] = offset;
180 if (handle->k >= MAP_PAGE_ENTRIES) { 249 if (handle->k >= MAP_PAGE_ENTRIES) {
250 error = wait_on_bio_chain(bio_chain);
251 if (error)
252 goto out;
181 offset = alloc_swap_page(root_swap, handle->bitmap); 253 offset = alloc_swap_page(root_swap, handle->bitmap);
182 if (!offset) 254 if (!offset)
183 return -ENOSPC; 255 return -ENOSPC;
184 handle->cur->next_swap = offset; 256 handle->cur->next_swap = offset;
185 error = write_page(handle->cur, handle->cur_swap); 257 error = write_page(handle->cur, handle->cur_swap, NULL);
186 if (error) 258 if (error)
187 return error; 259 goto out;
188 memset(handle->cur, 0, PAGE_SIZE); 260 memset(handle->cur, 0, PAGE_SIZE);
189 handle->cur_swap = offset; 261 handle->cur_swap = offset;
190 handle->k = 0; 262 handle->k = 0;
191 } 263 }
192 return 0; 264out:
265 return error;
193} 266}
194 267
195static int flush_swap_writer(struct swap_map_handle *handle) 268static int flush_swap_writer(struct swap_map_handle *handle)
196{ 269{
197 if (handle->cur && handle->cur_swap) 270 if (handle->cur && handle->cur_swap)
198 return write_page(handle->cur, handle->cur_swap); 271 return write_page(handle->cur, handle->cur_swap, NULL);
199 else 272 else
200 return -EINVAL; 273 return -EINVAL;
201} 274}
@@ -206,21 +279,29 @@ static int flush_swap_writer(struct swap_map_handle *handle)
206 279
207static int save_image(struct swap_map_handle *handle, 280static int save_image(struct swap_map_handle *handle,
208 struct snapshot_handle *snapshot, 281 struct snapshot_handle *snapshot,
209 unsigned int nr_pages) 282 unsigned int nr_to_write)
210{ 283{
211 unsigned int m; 284 unsigned int m;
212 int ret; 285 int ret;
213 int error = 0; 286 int error = 0;
287 int nr_pages;
288 int err2;
289 struct bio *bio;
290 struct timeval start;
291 struct timeval stop;
214 292
215 printk("Saving image data pages (%u pages) ... ", nr_pages); 293 printk("Saving image data pages (%u pages) ... ", nr_to_write);
216 m = nr_pages / 100; 294 m = nr_to_write / 100;
217 if (!m) 295 if (!m)
218 m = 1; 296 m = 1;
219 nr_pages = 0; 297 nr_pages = 0;
298 bio = NULL;
299 do_gettimeofday(&start);
220 do { 300 do {
221 ret = snapshot_read_next(snapshot, PAGE_SIZE); 301 ret = snapshot_read_next(snapshot, PAGE_SIZE);
222 if (ret > 0) { 302 if (ret > 0) {
223 error = swap_write_page(handle, data_of(*snapshot)); 303 error = swap_write_page(handle, data_of(*snapshot),
304 &bio);
224 if (error) 305 if (error)
225 break; 306 break;
226 if (!(nr_pages % m)) 307 if (!(nr_pages % m))
@@ -228,8 +309,13 @@ static int save_image(struct swap_map_handle *handle,
228 nr_pages++; 309 nr_pages++;
229 } 310 }
230 } while (ret > 0); 311 } while (ret > 0);
312 err2 = wait_on_bio_chain(&bio);
313 do_gettimeofday(&stop);
314 if (!error)
315 error = err2;
231 if (!error) 316 if (!error)
232 printk("\b\b\b\bdone\n"); 317 printk("\b\b\b\bdone\n");
318 show_speed(&start, &stop, nr_to_write, "Wrote");
233 return error; 319 return error;
234} 320}
235 321
@@ -245,8 +331,7 @@ static int enough_swap(unsigned int nr_pages)
245 unsigned int free_swap = count_swap_pages(root_swap, 1); 331 unsigned int free_swap = count_swap_pages(root_swap, 1);
246 332
247 pr_debug("swsusp: free swap pages: %u\n", free_swap); 333 pr_debug("swsusp: free swap pages: %u\n", free_swap);
248 return free_swap > (nr_pages + PAGES_FOR_IO + 334 return free_swap > nr_pages + PAGES_FOR_IO;
249 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
250} 335}
251 336
252/** 337/**
@@ -266,7 +351,8 @@ int swsusp_write(void)
266 int error; 351 int error;
267 352
268 if ((error = swsusp_swap_check())) { 353 if ((error = swsusp_swap_check())) {
269 printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n"); 354 printk(KERN_ERR "swsusp: Cannot find swap device, try "
355 "swapon -a.\n");
270 return error; 356 return error;
271 } 357 }
272 memset(&snapshot, 0, sizeof(struct snapshot_handle)); 358 memset(&snapshot, 0, sizeof(struct snapshot_handle));
@@ -281,7 +367,7 @@ int swsusp_write(void)
281 error = get_swap_writer(&handle); 367 error = get_swap_writer(&handle);
282 if (!error) { 368 if (!error) {
283 unsigned long start = handle.cur_swap; 369 unsigned long start = handle.cur_swap;
284 error = swap_write_page(&handle, header); 370 error = swap_write_page(&handle, header, NULL);
285 if (!error) 371 if (!error)
286 error = save_image(&handle, &snapshot, 372 error = save_image(&handle, &snapshot,
287 header->pages - 1); 373 header->pages - 1);
@@ -298,27 +384,6 @@ int swsusp_write(void)
298 return error; 384 return error;
299} 385}
300 386
301/*
302 * Using bio to read from swap.
303 * This code requires a bit more work than just using buffer heads
304 * but, it is the recommended way for 2.5/2.6.
305 * The following are to signal the beginning and end of I/O. Bios
306 * finish asynchronously, while we want them to happen synchronously.
307 * A simple atomic_t, and a wait loop take care of this problem.
308 */
309
310static atomic_t io_done = ATOMIC_INIT(0);
311
312static int end_io(struct bio *bio, unsigned int num, int err)
313{
314 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
315 printk(KERN_ERR "I/O error reading swsusp image.\n");
316 return -EIO;
317 }
318 atomic_set(&io_done, 0);
319 return 0;
320}
321
322static struct block_device *resume_bdev; 387static struct block_device *resume_bdev;
323 388
324/** 389/**
@@ -326,15 +391,15 @@ static struct block_device *resume_bdev;
326 * @rw: READ or WRITE. 391 * @rw: READ or WRITE.
327 * @off physical offset of page. 392 * @off physical offset of page.
328 * @page: page we're reading or writing. 393 * @page: page we're reading or writing.
394 * @bio_chain: list of pending biod (for async reading)
329 * 395 *
330 * Straight from the textbook - allocate and initialize the bio. 396 * Straight from the textbook - allocate and initialize the bio.
331 * If we're writing, make sure the page is marked as dirty. 397 * If we're reading, make sure the page is marked as dirty.
332 * Then submit it and wait. 398 * Then submit it and, if @bio_chain == NULL, wait.
333 */ 399 */
334 400static int submit(int rw, pgoff_t page_off, struct page *page,
335static int submit(int rw, pgoff_t page_off, void *page) 401 struct bio **bio_chain)
336{ 402{
337 int error = 0;
338 struct bio *bio; 403 struct bio *bio;
339 404
340 bio = bio_alloc(GFP_ATOMIC, 1); 405 bio = bio_alloc(GFP_ATOMIC, 1);
@@ -342,33 +407,40 @@ static int submit(int rw, pgoff_t page_off, void *page)
342 return -ENOMEM; 407 return -ENOMEM;
343 bio->bi_sector = page_off * (PAGE_SIZE >> 9); 408 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
344 bio->bi_bdev = resume_bdev; 409 bio->bi_bdev = resume_bdev;
345 bio->bi_end_io = end_io; 410 bio->bi_end_io = end_swap_bio_read;
346 411
347 if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { 412 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
348 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); 413 printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
349 error = -EFAULT; 414 bio_put(bio);
350 goto Done; 415 return -EFAULT;
351 } 416 }
352 417
353 atomic_set(&io_done, 1); 418 lock_page(page);
354 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 419 bio_get(bio);
355 while (atomic_read(&io_done)) 420
356 yield(); 421 if (bio_chain == NULL) {
357 if (rw == READ) 422 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
358 bio_set_pages_dirty(bio); 423 wait_on_page_locked(page);
359 Done: 424 if (rw == READ)
360 bio_put(bio); 425 bio_set_pages_dirty(bio);
361 return error; 426 bio_put(bio);
427 } else {
428 get_page(page);
429 bio->bi_private = *bio_chain;
430 *bio_chain = bio;
431 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
432 }
433 return 0;
362} 434}
363 435
364static int bio_read_page(pgoff_t page_off, void *page) 436static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
365{ 437{
366 return submit(READ, page_off, page); 438 return submit(READ, page_off, virt_to_page(addr), bio_chain);
367} 439}
368 440
369static int bio_write_page(pgoff_t page_off, void *page) 441static int bio_write_page(pgoff_t page_off, void *addr)
370{ 442{
371 return submit(WRITE, page_off, page); 443 return submit(WRITE, page_off, virt_to_page(addr), NULL);
372} 444}
373 445
374/** 446/**
@@ -393,7 +465,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
393 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); 465 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
394 if (!handle->cur) 466 if (!handle->cur)
395 return -ENOMEM; 467 return -ENOMEM;
396 error = bio_read_page(swp_offset(start), handle->cur); 468 error = bio_read_page(swp_offset(start), handle->cur, NULL);
397 if (error) { 469 if (error) {
398 release_swap_reader(handle); 470 release_swap_reader(handle);
399 return error; 471 return error;
@@ -402,7 +474,8 @@ static int get_swap_reader(struct swap_map_handle *handle,
402 return 0; 474 return 0;
403} 475}
404 476
405static int swap_read_page(struct swap_map_handle *handle, void *buf) 477static int swap_read_page(struct swap_map_handle *handle, void *buf,
478 struct bio **bio_chain)
406{ 479{
407 unsigned long offset; 480 unsigned long offset;
408 int error; 481 int error;
@@ -412,16 +485,17 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf)
412 offset = handle->cur->entries[handle->k]; 485 offset = handle->cur->entries[handle->k];
413 if (!offset) 486 if (!offset)
414 return -EFAULT; 487 return -EFAULT;
415 error = bio_read_page(offset, buf); 488 error = bio_read_page(offset, buf, bio_chain);
416 if (error) 489 if (error)
417 return error; 490 return error;
418 if (++handle->k >= MAP_PAGE_ENTRIES) { 491 if (++handle->k >= MAP_PAGE_ENTRIES) {
492 error = wait_on_bio_chain(bio_chain);
419 handle->k = 0; 493 handle->k = 0;
420 offset = handle->cur->next_swap; 494 offset = handle->cur->next_swap;
421 if (!offset) 495 if (!offset)
422 release_swap_reader(handle); 496 release_swap_reader(handle);
423 else 497 else if (!error)
424 error = bio_read_page(offset, handle->cur); 498 error = bio_read_page(offset, handle->cur, NULL);
425 } 499 }
426 return error; 500 return error;
427} 501}
@@ -434,33 +508,49 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf)
434 508
435static int load_image(struct swap_map_handle *handle, 509static int load_image(struct swap_map_handle *handle,
436 struct snapshot_handle *snapshot, 510 struct snapshot_handle *snapshot,
437 unsigned int nr_pages) 511 unsigned int nr_to_read)
438{ 512{
439 unsigned int m; 513 unsigned int m;
440 int ret;
441 int error = 0; 514 int error = 0;
515 struct timeval start;
516 struct timeval stop;
517 struct bio *bio;
518 int err2;
519 unsigned nr_pages;
442 520
443 printk("Loading image data pages (%u pages) ... ", nr_pages); 521 printk("Loading image data pages (%u pages) ... ", nr_to_read);
444 m = nr_pages / 100; 522 m = nr_to_read / 100;
445 if (!m) 523 if (!m)
446 m = 1; 524 m = 1;
447 nr_pages = 0; 525 nr_pages = 0;
448 do { 526 bio = NULL;
449 ret = snapshot_write_next(snapshot, PAGE_SIZE); 527 do_gettimeofday(&start);
450 if (ret > 0) { 528 for ( ; ; ) {
451 error = swap_read_page(handle, data_of(*snapshot)); 529 error = snapshot_write_next(snapshot, PAGE_SIZE);
452 if (error) 530 if (error <= 0)
453 break; 531 break;
454 if (!(nr_pages % m)) 532 error = swap_read_page(handle, data_of(*snapshot), &bio);
455 printk("\b\b\b\b%3d%%", nr_pages / m); 533 if (error)
456 nr_pages++; 534 break;
457 } 535 if (snapshot->sync_read)
458 } while (ret > 0); 536 error = wait_on_bio_chain(&bio);
537 if (error)
538 break;
539 if (!(nr_pages % m))
540 printk("\b\b\b\b%3d%%", nr_pages / m);
541 nr_pages++;
542 }
543 err2 = wait_on_bio_chain(&bio);
544 do_gettimeofday(&stop);
545 if (!error)
546 error = err2;
459 if (!error) { 547 if (!error) {
460 printk("\b\b\b\bdone\n"); 548 printk("\b\b\b\bdone\n");
549 snapshot_free_unused_memory(snapshot);
461 if (!snapshot_image_loaded(snapshot)) 550 if (!snapshot_image_loaded(snapshot))
462 error = -ENODATA; 551 error = -ENODATA;
463 } 552 }
553 show_speed(&start, &stop, nr_to_read, "Read");
464 return error; 554 return error;
465} 555}
466 556
@@ -483,7 +573,7 @@ int swsusp_read(void)
483 header = (struct swsusp_info *)data_of(snapshot); 573 header = (struct swsusp_info *)data_of(snapshot);
484 error = get_swap_reader(&handle, swsusp_header.image); 574 error = get_swap_reader(&handle, swsusp_header.image);
485 if (!error) 575 if (!error)
486 error = swap_read_page(&handle, header); 576 error = swap_read_page(&handle, header, NULL);
487 if (!error) 577 if (!error)
488 error = load_image(&handle, &snapshot, header->pages - 1); 578 error = load_image(&handle, &snapshot, header->pages - 1);
489 release_swap_reader(&handle); 579 release_swap_reader(&handle);
@@ -509,7 +599,7 @@ int swsusp_check(void)
509 if (!IS_ERR(resume_bdev)) { 599 if (!IS_ERR(resume_bdev)) {
510 set_blocksize(resume_bdev, PAGE_SIZE); 600 set_blocksize(resume_bdev, PAGE_SIZE);
511 memset(&swsusp_header, 0, sizeof(swsusp_header)); 601 memset(&swsusp_header, 0, sizeof(swsusp_header));
512 if ((error = bio_read_page(0, &swsusp_header))) 602 if ((error = bio_read_page(0, &swsusp_header, NULL)))
513 return error; 603 return error;
514 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { 604 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
515 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); 605 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 17f669c83012..8ef677ea0cea 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -193,14 +193,13 @@ int swsusp_shrink_memory(void)
193 printk("Shrinking memory... "); 193 printk("Shrinking memory... ");
194 do { 194 do {
195 size = 2 * count_highmem_pages(); 195 size = 2 * count_highmem_pages();
196 size += size / 50 + count_data_pages(); 196 size += size / 50 + count_data_pages() + PAGES_FOR_IO;
197 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
198 PAGES_FOR_IO;
199 tmp = size; 197 tmp = size;
200 for_each_zone (zone) 198 for_each_zone (zone)
201 if (!is_highmem(zone) && populated_zone(zone)) { 199 if (!is_highmem(zone) && populated_zone(zone)) {
202 tmp -= zone->free_pages; 200 tmp -= zone->free_pages;
203 tmp += zone->lowmem_reserve[ZONE_NORMAL]; 201 tmp += zone->lowmem_reserve[ZONE_NORMAL];
202 tmp += snapshot_additional_pages(zone);
204 } 203 }
205 if (tmp > 0) { 204 if (tmp > 0) {
206 tmp = __shrink_memory(tmp); 205 tmp = __shrink_memory(tmp);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3f1539fbe48a..2e4499f3e4d9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -19,6 +19,7 @@
19#include <linux/swapops.h> 19#include <linux/swapops.h>
20#include <linux/pm.h> 20#include <linux/pm.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/cpu.h>
22 23
23#include <asm/uaccess.h> 24#include <asm/uaccess.h>
24 25
@@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
139 if (data->frozen) 140 if (data->frozen)
140 break; 141 break;
141 down(&pm_sem); 142 down(&pm_sem);
142 disable_nonboot_cpus(); 143 error = disable_nonboot_cpus();
143 if (freeze_processes()) { 144 if (!error) {
144 thaw_processes(); 145 error = freeze_processes();
145 enable_nonboot_cpus(); 146 if (error) {
146 error = -EBUSY; 147 thaw_processes();
148 error = -EBUSY;
149 }
147 } 150 }
151 enable_nonboot_cpus();
148 up(&pm_sem); 152 up(&pm_sem);
149 if (!error) 153 if (!error)
150 data->frozen = 1; 154 data->frozen = 1;
@@ -189,6 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
189 error = -EPERM; 193 error = -EPERM;
190 break; 194 break;
191 } 195 }
196 snapshot_free_unused_memory(&data->handle);
192 down(&pm_sem); 197 down(&pm_sem);
193 pm_prepare_console(); 198 pm_prepare_console();
194 error = device_suspend(PMSG_FREEZE); 199 error = device_suspend(PMSG_FREEZE);
diff --git a/kernel/printk.c b/kernel/printk.c
index 1149365e989e..771f5e861bcd 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -721,6 +721,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
721 return 0; 721 return 0;
722} 722}
723 723
724#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
724/** 725/**
725 * suspend_console - suspend the console subsystem 726 * suspend_console - suspend the console subsystem
726 * 727 *
@@ -728,6 +729,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
728 */ 729 */
729void suspend_console(void) 730void suspend_console(void)
730{ 731{
732 printk("Suspending console(s)\n");
731 acquire_console_sem(); 733 acquire_console_sem();
732 console_suspended = 1; 734 console_suspended = 1;
733} 735}
@@ -737,6 +739,7 @@ void resume_console(void)
737 console_suspended = 0; 739 console_suspended = 0;
738 release_console_sem(); 740 release_console_sem();
739} 741}
742#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
740 743
741/** 744/**
742 * acquire_console_sem - lock the console system for exclusive use. 745 * acquire_console_sem - lock the console system for exclusive use.
diff --git a/kernel/profile.c b/kernel/profile.c
index d5bd75e7501c..fb660c7d35ba 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -309,13 +309,17 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
309 node = cpu_to_node(cpu); 309 node = cpu_to_node(cpu);
310 per_cpu(cpu_profile_flip, cpu) = 0; 310 per_cpu(cpu_profile_flip, cpu) = 0;
311 if (!per_cpu(cpu_profile_hits, cpu)[1]) { 311 if (!per_cpu(cpu_profile_hits, cpu)[1]) {
312 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); 312 page = alloc_pages_node(node,
313 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
314 0);
313 if (!page) 315 if (!page)
314 return NOTIFY_BAD; 316 return NOTIFY_BAD;
315 per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); 317 per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
316 } 318 }
317 if (!per_cpu(cpu_profile_hits, cpu)[0]) { 319 if (!per_cpu(cpu_profile_hits, cpu)[0]) {
318 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); 320 page = alloc_pages_node(node,
321 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
322 0);
319 if (!page) 323 if (!page)
320 goto out_free; 324 goto out_free;
321 per_cpu(cpu_profile_hits, cpu)[0] = page_address(page); 325 per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
@@ -491,12 +495,16 @@ static int __init create_hash_tables(void)
491 int node = cpu_to_node(cpu); 495 int node = cpu_to_node(cpu);
492 struct page *page; 496 struct page *page;
493 497
494 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); 498 page = alloc_pages_node(node,
499 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
500 0);
495 if (!page) 501 if (!page)
496 goto out_cleanup; 502 goto out_cleanup;
497 per_cpu(cpu_profile_hits, cpu)[1] 503 per_cpu(cpu_profile_hits, cpu)[1]
498 = (struct profile_hit *)page_address(page); 504 = (struct profile_hit *)page_address(page);
499 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); 505 page = alloc_pages_node(node,
506 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
507 0);
500 if (!page) 508 if (!page)
501 goto out_cleanup; 509 goto out_cleanup;
502 per_cpu(cpu_profile_hits, cpu)[0] 510 per_cpu(cpu_profile_hits, cpu)[0]
diff --git a/kernel/sched.c b/kernel/sched.c
index a234fbee1238..5c848fd4e461 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -238,6 +238,7 @@ struct rq {
238 /* For active balancing */ 238 /* For active balancing */
239 int active_balance; 239 int active_balance;
240 int push_cpu; 240 int push_cpu;
241 int cpu; /* cpu of this runqueue */
241 242
242 struct task_struct *migration_thread; 243 struct task_struct *migration_thread;
243 struct list_head migration_queue; 244 struct list_head migration_queue;
@@ -267,6 +268,15 @@ struct rq {
267 268
268static DEFINE_PER_CPU(struct rq, runqueues); 269static DEFINE_PER_CPU(struct rq, runqueues);
269 270
271static inline int cpu_of(struct rq *rq)
272{
273#ifdef CONFIG_SMP
274 return rq->cpu;
275#else
276 return 0;
277#endif
278}
279
270/* 280/*
271 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 281 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
272 * See detach_destroy_domains: synchronize_sched for details. 282 * See detach_destroy_domains: synchronize_sched for details.
@@ -2211,7 +2221,8 @@ out:
2211 */ 2221 */
2212static struct sched_group * 2222static struct sched_group *
2213find_busiest_group(struct sched_domain *sd, int this_cpu, 2223find_busiest_group(struct sched_domain *sd, int this_cpu,
2214 unsigned long *imbalance, enum idle_type idle, int *sd_idle) 2224 unsigned long *imbalance, enum idle_type idle, int *sd_idle,
2225 cpumask_t *cpus)
2215{ 2226{
2216 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 2227 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
2217 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 2228 unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2248,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2248 sum_weighted_load = sum_nr_running = avg_load = 0; 2259 sum_weighted_load = sum_nr_running = avg_load = 0;
2249 2260
2250 for_each_cpu_mask(i, group->cpumask) { 2261 for_each_cpu_mask(i, group->cpumask) {
2251 struct rq *rq = cpu_rq(i); 2262 struct rq *rq;
2263
2264 if (!cpu_isset(i, *cpus))
2265 continue;
2266
2267 rq = cpu_rq(i);
2252 2268
2253 if (*sd_idle && !idle_cpu(i)) 2269 if (*sd_idle && !idle_cpu(i))
2254 *sd_idle = 0; 2270 *sd_idle = 0;
@@ -2466,13 +2482,17 @@ ret:
2466 */ 2482 */
2467static struct rq * 2483static struct rq *
2468find_busiest_queue(struct sched_group *group, enum idle_type idle, 2484find_busiest_queue(struct sched_group *group, enum idle_type idle,
2469 unsigned long imbalance) 2485 unsigned long imbalance, cpumask_t *cpus)
2470{ 2486{
2471 struct rq *busiest = NULL, *rq; 2487 struct rq *busiest = NULL, *rq;
2472 unsigned long max_load = 0; 2488 unsigned long max_load = 0;
2473 int i; 2489 int i;
2474 2490
2475 for_each_cpu_mask(i, group->cpumask) { 2491 for_each_cpu_mask(i, group->cpumask) {
2492
2493 if (!cpu_isset(i, *cpus))
2494 continue;
2495
2476 rq = cpu_rq(i); 2496 rq = cpu_rq(i);
2477 2497
2478 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) 2498 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
@@ -2511,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2511 struct sched_group *group; 2531 struct sched_group *group;
2512 unsigned long imbalance; 2532 unsigned long imbalance;
2513 struct rq *busiest; 2533 struct rq *busiest;
2534 cpumask_t cpus = CPU_MASK_ALL;
2514 2535
2515 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2536 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2516 !sched_smt_power_savings) 2537 !sched_smt_power_savings)
@@ -2518,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2518 2539
2519 schedstat_inc(sd, lb_cnt[idle]); 2540 schedstat_inc(sd, lb_cnt[idle]);
2520 2541
2521 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); 2542redo:
2543 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
2544 &cpus);
2522 if (!group) { 2545 if (!group) {
2523 schedstat_inc(sd, lb_nobusyg[idle]); 2546 schedstat_inc(sd, lb_nobusyg[idle]);
2524 goto out_balanced; 2547 goto out_balanced;
2525 } 2548 }
2526 2549
2527 busiest = find_busiest_queue(group, idle, imbalance); 2550 busiest = find_busiest_queue(group, idle, imbalance, &cpus);
2528 if (!busiest) { 2551 if (!busiest) {
2529 schedstat_inc(sd, lb_nobusyq[idle]); 2552 schedstat_inc(sd, lb_nobusyq[idle]);
2530 goto out_balanced; 2553 goto out_balanced;
@@ -2549,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2549 double_rq_unlock(this_rq, busiest); 2572 double_rq_unlock(this_rq, busiest);
2550 2573
2551 /* All tasks on this runqueue were pinned by CPU affinity */ 2574 /* All tasks on this runqueue were pinned by CPU affinity */
2552 if (unlikely(all_pinned)) 2575 if (unlikely(all_pinned)) {
2576 cpu_clear(cpu_of(busiest), cpus);
2577 if (!cpus_empty(cpus))
2578 goto redo;
2553 goto out_balanced; 2579 goto out_balanced;
2580 }
2554 } 2581 }
2555 2582
2556 if (!nr_moved) { 2583 if (!nr_moved) {
@@ -2639,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2639 unsigned long imbalance; 2666 unsigned long imbalance;
2640 int nr_moved = 0; 2667 int nr_moved = 0;
2641 int sd_idle = 0; 2668 int sd_idle = 0;
2669 cpumask_t cpus = CPU_MASK_ALL;
2642 2670
2643 if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2671 if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
2644 sd_idle = 1; 2672 sd_idle = 1;
2645 2673
2646 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); 2674 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
2647 group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); 2675redo:
2676 group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
2677 &sd_idle, &cpus);
2648 if (!group) { 2678 if (!group) {
2649 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); 2679 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
2650 goto out_balanced; 2680 goto out_balanced;
2651 } 2681 }
2652 2682
2653 busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance); 2683 busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
2684 &cpus);
2654 if (!busiest) { 2685 if (!busiest) {
2655 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); 2686 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
2656 goto out_balanced; 2687 goto out_balanced;
@@ -2668,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2668 minus_1_or_zero(busiest->nr_running), 2699 minus_1_or_zero(busiest->nr_running),
2669 imbalance, sd, NEWLY_IDLE, NULL); 2700 imbalance, sd, NEWLY_IDLE, NULL);
2670 spin_unlock(&busiest->lock); 2701 spin_unlock(&busiest->lock);
2702
2703 if (!nr_moved) {
2704 cpu_clear(cpu_of(busiest), cpus);
2705 if (!cpus_empty(cpus))
2706 goto redo;
2707 }
2671 } 2708 }
2672 2709
2673 if (!nr_moved) { 2710 if (!nr_moved) {
@@ -6747,6 +6784,7 @@ void __init sched_init(void)
6747 rq->cpu_load[j] = 0; 6784 rq->cpu_load[j] = 0;
6748 rq->active_balance = 0; 6785 rq->active_balance = 0;
6749 rq->push_cpu = 0; 6786 rq->push_cpu = 0;
6787 rq->cpu = i;
6750 rq->migration_thread = NULL; 6788 rq->migration_thread = NULL;
6751 INIT_LIST_HEAD(&rq->migration_queue); 6789 INIT_LIST_HEAD(&rq->migration_queue);
6752#endif 6790#endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 362a0cc37138..fd43c3e6786b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -943,6 +943,17 @@ static ctl_table vm_table[] = {
943 .extra1 = &zero, 943 .extra1 = &zero,
944 .extra2 = &one_hundred, 944 .extra2 = &one_hundred,
945 }, 945 },
946 {
947 .ctl_name = VM_MIN_SLAB,
948 .procname = "min_slab_ratio",
949 .data = &sysctl_min_slab_ratio,
950 .maxlen = sizeof(sysctl_min_slab_ratio),
951 .mode = 0644,
952 .proc_handler = &sysctl_min_slab_ratio_sysctl_handler,
953 .strategy = &sysctl_intvec,
954 .extra1 = &zero,
955 .extra2 = &one_hundred,
956 },
946#endif 957#endif
947#ifdef CONFIG_X86_32 958#ifdef CONFIG_X86_32
948 { 959 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 554ee688a9f8..3f21cc79a134 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -277,7 +277,7 @@ config DEBUG_HIGHMEM
277config DEBUG_BUGVERBOSE 277config DEBUG_BUGVERBOSE
278 bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED 278 bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
279 depends on BUG 279 depends on BUG
280 depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV 280 depends on ARM || ARM26 || AVR32 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
281 default !EMBEDDED 281 default !EMBEDDED
282 help 282 help
283 Say Y here to make BUG() panics output the file name and line number 283 Say Y here to make BUG() panics output the file name and line number
@@ -315,7 +315,7 @@ config DEBUG_VM
315 315
316config FRAME_POINTER 316config FRAME_POINTER
317 bool "Compile the kernel with frame pointers" 317 bool "Compile the kernel with frame pointers"
318 depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390) 318 depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32)
319 default y if DEBUG_INFO && UML 319 default y if DEBUG_INFO && UML
320 help 320 help
321 If you say Y here the resulting kernel image will be slightly larger 321 If you say Y here the resulting kernel image will be slightly larger
diff --git a/mm/Makefile b/mm/Makefile
index 9dd824c11eeb..60c56c0b5e10 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,4 +23,4 @@ obj-$(CONFIG_SLAB) += slab.o
23obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 23obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
24obj-$(CONFIG_FS_XIP) += filemap_xip.o 24obj-$(CONFIG_FS_XIP) += filemap_xip.o
25obj-$(CONFIG_MIGRATION) += migrate.o 25obj-$(CONFIG_MIGRATION) += migrate.o
26 26obj-$(CONFIG_SMP) += allocpercpu.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
new file mode 100644
index 000000000000..eaa9abeea536
--- /dev/null
+++ b/mm/allocpercpu.c
@@ -0,0 +1,129 @@
1/*
2 * linux/mm/allocpercpu.c
3 *
4 * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com>
5 */
6#include <linux/mm.h>
7#include <linux/module.h>
8
9/**
10 * percpu_depopulate - depopulate per-cpu data for given cpu
11 * @__pdata: per-cpu data to depopulate
12 * @cpu: depopulate per-cpu data for this cpu
13 *
14 * Depopulating per-cpu data for a cpu going offline would be a typical
15 * use case. You need to register a cpu hotplug handler for that purpose.
16 */
17void percpu_depopulate(void *__pdata, int cpu)
18{
19 struct percpu_data *pdata = __percpu_disguise(__pdata);
20 if (pdata->ptrs[cpu]) {
21 kfree(pdata->ptrs[cpu]);
22 pdata->ptrs[cpu] = NULL;
23 }
24}
25EXPORT_SYMBOL_GPL(percpu_depopulate);
26
27/**
28 * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
29 * @__pdata: per-cpu data to depopulate
30 * @mask: depopulate per-cpu data for cpu's selected through mask bits
31 */
32void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
33{
34 int cpu;
35 for_each_cpu_mask(cpu, *mask)
36 percpu_depopulate(__pdata, cpu);
37}
38EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
39
40/**
41 * percpu_populate - populate per-cpu data for given cpu
42 * @__pdata: per-cpu data to populate further
43 * @size: size of per-cpu object
44 * @gfp: may sleep or not etc.
45 * @cpu: populate per-data for this cpu
46 *
47 * Populating per-cpu data for a cpu coming online would be a typical
48 * use case. You need to register a cpu hotplug handler for that purpose.
49 * Per-cpu object is populated with zeroed buffer.
50 */
51void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
52{
53 struct percpu_data *pdata = __percpu_disguise(__pdata);
54 int node = cpu_to_node(cpu);
55
56 BUG_ON(pdata->ptrs[cpu]);
57 if (node_online(node)) {
58 /* FIXME: kzalloc_node(size, gfp, node) */
59 pdata->ptrs[cpu] = kmalloc_node(size, gfp, node);
60 if (pdata->ptrs[cpu])
61 memset(pdata->ptrs[cpu], 0, size);
62 } else
63 pdata->ptrs[cpu] = kzalloc(size, gfp);
64 return pdata->ptrs[cpu];
65}
66EXPORT_SYMBOL_GPL(percpu_populate);
67
68/**
69 * percpu_populate_mask - populate per-cpu data for more cpu's
70 * @__pdata: per-cpu data to populate further
71 * @size: size of per-cpu object
72 * @gfp: may sleep or not etc.
73 * @mask: populate per-cpu data for cpu's selected through mask bits
74 *
75 * Per-cpu objects are populated with zeroed buffers.
76 */
77int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
78 cpumask_t *mask)
79{
80 cpumask_t populated = CPU_MASK_NONE;
81 int cpu;
82
83 for_each_cpu_mask(cpu, *mask)
84 if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
85 __percpu_depopulate_mask(__pdata, &populated);
86 return -ENOMEM;
87 } else
88 cpu_set(cpu, populated);
89 return 0;
90}
91EXPORT_SYMBOL_GPL(__percpu_populate_mask);
92
93/**
94 * percpu_alloc_mask - initial setup of per-cpu data
95 * @size: size of per-cpu object
96 * @gfp: may sleep or not etc.
97 * @mask: populate per-data for cpu's selected through mask bits
98 *
99 * Populating per-cpu data for all online cpu's would be a typical use case,
100 * which is simplified by the percpu_alloc() wrapper.
101 * Per-cpu objects are populated with zeroed buffers.
102 */
103void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
104{
105 void *pdata = kzalloc(sizeof(struct percpu_data), gfp);
106 void *__pdata = __percpu_disguise(pdata);
107
108 if (unlikely(!pdata))
109 return NULL;
110 if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
111 return __pdata;
112 kfree(pdata);
113 return NULL;
114}
115EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
116
117/**
118 * percpu_free - final cleanup of per-cpu data
119 * @__pdata: object to clean up
120 *
121 * We simply clean up any per-cpu object left. No need for the client to
122 * track and specify through a bis mask which per-cpu objects are to free.
123 */
124void percpu_free(void *__pdata)
125{
126 __percpu_depopulate_mask(__pdata, &cpu_possible_map);
127 kfree(__percpu_disguise(__pdata));
128}
129EXPORT_SYMBOL_GPL(percpu_free);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 50353e0dac12..d53112fcb404 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -8,17 +8,15 @@
8 * free memory collector. It's used to deal with reserved 8 * free memory collector. It's used to deal with reserved
9 * system memory and memory holes as well. 9 * system memory and memory holes as well.
10 */ 10 */
11
12#include <linux/mm.h>
13#include <linux/kernel_stat.h>
14#include <linux/swap.h>
15#include <linux/interrupt.h>
16#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/pfn.h>
17#include <linux/bootmem.h> 13#include <linux/bootmem.h>
18#include <linux/mmzone.h>
19#include <linux/module.h> 14#include <linux/module.h>
20#include <asm/dma.h> 15
16#include <asm/bug.h>
21#include <asm/io.h> 17#include <asm/io.h>
18#include <asm/processor.h>
19
22#include "internal.h" 20#include "internal.h"
23 21
24/* 22/*
@@ -41,7 +39,7 @@ unsigned long saved_max_pfn;
41#endif 39#endif
42 40
43/* return the number of _pages_ that will be allocated for the boot bitmap */ 41/* return the number of _pages_ that will be allocated for the boot bitmap */
44unsigned long __init bootmem_bootmap_pages (unsigned long pages) 42unsigned long __init bootmem_bootmap_pages(unsigned long pages)
45{ 43{
46 unsigned long mapsize; 44 unsigned long mapsize;
47 45
@@ -51,12 +49,14 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages)
51 49
52 return mapsize; 50 return mapsize;
53} 51}
52
54/* 53/*
55 * link bdata in order 54 * link bdata in order
56 */ 55 */
57static void link_bootmem(bootmem_data_t *bdata) 56static void __init link_bootmem(bootmem_data_t *bdata)
58{ 57{
59 bootmem_data_t *ent; 58 bootmem_data_t *ent;
59
60 if (list_empty(&bdata_list)) { 60 if (list_empty(&bdata_list)) {
61 list_add(&bdata->list, &bdata_list); 61 list_add(&bdata->list, &bdata_list);
62 return; 62 return;
@@ -69,22 +69,32 @@ static void link_bootmem(bootmem_data_t *bdata)
69 } 69 }
70 } 70 }
71 list_add_tail(&bdata->list, &bdata_list); 71 list_add_tail(&bdata->list, &bdata_list);
72 return;
73} 72}
74 73
74/*
75 * Given an initialised bdata, it returns the size of the boot bitmap
76 */
77static unsigned long __init get_mapsize(bootmem_data_t *bdata)
78{
79 unsigned long mapsize;
80 unsigned long start = PFN_DOWN(bdata->node_boot_start);
81 unsigned long end = bdata->node_low_pfn;
82
83 mapsize = ((end - start) + 7) / 8;
84 return ALIGN(mapsize, sizeof(long));
85}
75 86
76/* 87/*
77 * Called once to set up the allocator itself. 88 * Called once to set up the allocator itself.
78 */ 89 */
79static unsigned long __init init_bootmem_core (pg_data_t *pgdat, 90static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
80 unsigned long mapstart, unsigned long start, unsigned long end) 91 unsigned long mapstart, unsigned long start, unsigned long end)
81{ 92{
82 bootmem_data_t *bdata = pgdat->bdata; 93 bootmem_data_t *bdata = pgdat->bdata;
83 unsigned long mapsize = ((end - start)+7)/8; 94 unsigned long mapsize;
84 95
85 mapsize = ALIGN(mapsize, sizeof(long)); 96 bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
86 bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); 97 bdata->node_boot_start = PFN_PHYS(start);
87 bdata->node_boot_start = (start << PAGE_SHIFT);
88 bdata->node_low_pfn = end; 98 bdata->node_low_pfn = end;
89 link_bootmem(bdata); 99 link_bootmem(bdata);
90 100
@@ -92,6 +102,7 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
92 * Initially all pages are reserved - setup_arch() has to 102 * Initially all pages are reserved - setup_arch() has to
93 * register free RAM areas explicitly. 103 * register free RAM areas explicitly.
94 */ 104 */
105 mapsize = get_mapsize(bdata);
95 memset(bdata->node_bootmem_map, 0xff, mapsize); 106 memset(bdata->node_bootmem_map, 0xff, mapsize);
96 107
97 return mapsize; 108 return mapsize;
@@ -102,22 +113,22 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
102 * might be used for boot-time allocations - or it might get added 113 * might be used for boot-time allocations - or it might get added
103 * to the free page pool later on. 114 * to the free page pool later on.
104 */ 115 */
105static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) 116static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
117 unsigned long size)
106{ 118{
119 unsigned long sidx, eidx;
107 unsigned long i; 120 unsigned long i;
121
108 /* 122 /*
109 * round up, partially reserved pages are considered 123 * round up, partially reserved pages are considered
110 * fully reserved. 124 * fully reserved.
111 */ 125 */
112 unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE;
113 unsigned long eidx = (addr + size - bdata->node_boot_start +
114 PAGE_SIZE-1)/PAGE_SIZE;
115 unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE;
116
117 BUG_ON(!size); 126 BUG_ON(!size);
118 BUG_ON(sidx >= eidx); 127 BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
119 BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn); 128 BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
120 BUG_ON(end > bdata->node_low_pfn); 129
130 sidx = PFN_DOWN(addr - bdata->node_boot_start);
131 eidx = PFN_UP(addr + size - bdata->node_boot_start);
121 132
122 for (i = sidx; i < eidx; i++) 133 for (i = sidx; i < eidx; i++)
123 if (test_and_set_bit(i, bdata->node_bootmem_map)) { 134 if (test_and_set_bit(i, bdata->node_bootmem_map)) {
@@ -127,20 +138,18 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add
127 } 138 }
128} 139}
129 140
130static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) 141static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
142 unsigned long size)
131{ 143{
144 unsigned long sidx, eidx;
132 unsigned long i; 145 unsigned long i;
133 unsigned long start; 146
134 /* 147 /*
135 * round down end of usable mem, partially free pages are 148 * round down end of usable mem, partially free pages are
136 * considered reserved. 149 * considered reserved.
137 */ 150 */
138 unsigned long sidx;
139 unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
140 unsigned long end = (addr + size)/PAGE_SIZE;
141
142 BUG_ON(!size); 151 BUG_ON(!size);
143 BUG_ON(end > bdata->node_low_pfn); 152 BUG_ON(PFN_DOWN(addr + size) > bdata->node_low_pfn);
144 153
145 if (addr < bdata->last_success) 154 if (addr < bdata->last_success)
146 bdata->last_success = addr; 155 bdata->last_success = addr;
@@ -148,8 +157,8 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
148 /* 157 /*
149 * Round up the beginning of the address. 158 * Round up the beginning of the address.
150 */ 159 */
151 start = (addr + PAGE_SIZE-1) / PAGE_SIZE; 160 sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
152 sidx = start - (bdata->node_boot_start/PAGE_SIZE); 161 eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
153 162
154 for (i = sidx; i < eidx; i++) { 163 for (i = sidx; i < eidx; i++) {
155 if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) 164 if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
@@ -175,10 +184,10 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
175 unsigned long align, unsigned long goal, unsigned long limit) 184 unsigned long align, unsigned long goal, unsigned long limit)
176{ 185{
177 unsigned long offset, remaining_size, areasize, preferred; 186 unsigned long offset, remaining_size, areasize, preferred;
178 unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn; 187 unsigned long i, start = 0, incr, eidx, end_pfn;
179 void *ret; 188 void *ret;
180 189
181 if(!size) { 190 if (!size) {
182 printk("__alloc_bootmem_core(): zero-sized request\n"); 191 printk("__alloc_bootmem_core(): zero-sized request\n");
183 BUG(); 192 BUG();
184 } 193 }
@@ -187,23 +196,22 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
187 if (limit && bdata->node_boot_start >= limit) 196 if (limit && bdata->node_boot_start >= limit)
188 return NULL; 197 return NULL;
189 198
190 limit >>=PAGE_SHIFT; 199 end_pfn = bdata->node_low_pfn;
200 limit = PFN_DOWN(limit);
191 if (limit && end_pfn > limit) 201 if (limit && end_pfn > limit)
192 end_pfn = limit; 202 end_pfn = limit;
193 203
194 eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT); 204 eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
195 offset = 0; 205 offset = 0;
196 if (align && 206 if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
197 (bdata->node_boot_start & (align - 1UL)) != 0) 207 offset = align - (bdata->node_boot_start & (align - 1UL));
198 offset = (align - (bdata->node_boot_start & (align - 1UL))); 208 offset = PFN_DOWN(offset);
199 offset >>= PAGE_SHIFT;
200 209
201 /* 210 /*
202 * We try to allocate bootmem pages above 'goal' 211 * We try to allocate bootmem pages above 'goal'
203 * first, then we try to allocate lower pages. 212 * first, then we try to allocate lower pages.
204 */ 213 */
205 if (goal && (goal >= bdata->node_boot_start) && 214 if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
206 ((goal >> PAGE_SHIFT) < end_pfn)) {
207 preferred = goal - bdata->node_boot_start; 215 preferred = goal - bdata->node_boot_start;
208 216
209 if (bdata->last_success >= preferred) 217 if (bdata->last_success >= preferred)
@@ -212,9 +220,8 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
212 } else 220 } else
213 preferred = 0; 221 preferred = 0;
214 222
215 preferred = ALIGN(preferred, align) >> PAGE_SHIFT; 223 preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
216 preferred += offset; 224 areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
217 areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
218 incr = align >> PAGE_SHIFT ? : 1; 225 incr = align >> PAGE_SHIFT ? : 1;
219 226
220restart_scan: 227restart_scan:
@@ -229,7 +236,7 @@ restart_scan:
229 for (j = i + 1; j < i + areasize; ++j) { 236 for (j = i + 1; j < i + areasize; ++j) {
230 if (j >= eidx) 237 if (j >= eidx)
231 goto fail_block; 238 goto fail_block;
232 if (test_bit (j, bdata->node_bootmem_map)) 239 if (test_bit(j, bdata->node_bootmem_map))
233 goto fail_block; 240 goto fail_block;
234 } 241 }
235 start = i; 242 start = i;
@@ -245,7 +252,7 @@ restart_scan:
245 return NULL; 252 return NULL;
246 253
247found: 254found:
248 bdata->last_success = start << PAGE_SHIFT; 255 bdata->last_success = PFN_PHYS(start);
249 BUG_ON(start >= eidx); 256 BUG_ON(start >= eidx);
250 257
251 /* 258 /*
@@ -257,19 +264,21 @@ found:
257 bdata->last_offset && bdata->last_pos+1 == start) { 264 bdata->last_offset && bdata->last_pos+1 == start) {
258 offset = ALIGN(bdata->last_offset, align); 265 offset = ALIGN(bdata->last_offset, align);
259 BUG_ON(offset > PAGE_SIZE); 266 BUG_ON(offset > PAGE_SIZE);
260 remaining_size = PAGE_SIZE-offset; 267 remaining_size = PAGE_SIZE - offset;
261 if (size < remaining_size) { 268 if (size < remaining_size) {
262 areasize = 0; 269 areasize = 0;
263 /* last_pos unchanged */ 270 /* last_pos unchanged */
264 bdata->last_offset = offset+size; 271 bdata->last_offset = offset + size;
265 ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + 272 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
266 bdata->node_boot_start); 273 offset +
274 bdata->node_boot_start);
267 } else { 275 } else {
268 remaining_size = size - remaining_size; 276 remaining_size = size - remaining_size;
269 areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; 277 areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
270 ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + 278 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
271 bdata->node_boot_start); 279 offset +
272 bdata->last_pos = start+areasize-1; 280 bdata->node_boot_start);
281 bdata->last_pos = start + areasize - 1;
273 bdata->last_offset = remaining_size; 282 bdata->last_offset = remaining_size;
274 } 283 }
275 bdata->last_offset &= ~PAGE_MASK; 284 bdata->last_offset &= ~PAGE_MASK;
@@ -282,7 +291,7 @@ found:
282 /* 291 /*
283 * Reserve the area now: 292 * Reserve the area now:
284 */ 293 */
285 for (i = start; i < start+areasize; i++) 294 for (i = start; i < start + areasize; i++)
286 if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) 295 if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
287 BUG(); 296 BUG();
288 memset(ret, 0, size); 297 memset(ret, 0, size);
@@ -303,8 +312,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
303 312
304 count = 0; 313 count = 0;
305 /* first extant page of the node */ 314 /* first extant page of the node */
306 pfn = bdata->node_boot_start >> PAGE_SHIFT; 315 pfn = PFN_DOWN(bdata->node_boot_start);
307 idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); 316 idx = bdata->node_low_pfn - pfn;
308 map = bdata->node_bootmem_map; 317 map = bdata->node_bootmem_map;
309 /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ 318 /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
310 if (bdata->node_boot_start == 0 || 319 if (bdata->node_boot_start == 0 ||
@@ -333,7 +342,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
333 } 342 }
334 } 343 }
335 } else { 344 } else {
336 i+=BITS_PER_LONG; 345 i += BITS_PER_LONG;
337 } 346 }
338 pfn += BITS_PER_LONG; 347 pfn += BITS_PER_LONG;
339 } 348 }
@@ -345,9 +354,10 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
345 */ 354 */
346 page = virt_to_page(bdata->node_bootmem_map); 355 page = virt_to_page(bdata->node_bootmem_map);
347 count = 0; 356 count = 0;
348 for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { 357 idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
349 count++; 358 for (i = 0; i < idx; i++, page++) {
350 __free_pages_bootmem(page, 0); 359 __free_pages_bootmem(page, 0);
360 count++;
351 } 361 }
352 total += count; 362 total += count;
353 bdata->node_bootmem_map = NULL; 363 bdata->node_bootmem_map = NULL;
@@ -355,64 +365,72 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
355 return total; 365 return total;
356} 366}
357 367
358unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) 368unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
369 unsigned long startpfn, unsigned long endpfn)
359{ 370{
360 return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn)); 371 return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
361} 372}
362 373
363void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) 374void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
375 unsigned long size)
364{ 376{
365 reserve_bootmem_core(pgdat->bdata, physaddr, size); 377 reserve_bootmem_core(pgdat->bdata, physaddr, size);
366} 378}
367 379
368void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) 380void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
381 unsigned long size)
369{ 382{
370 free_bootmem_core(pgdat->bdata, physaddr, size); 383 free_bootmem_core(pgdat->bdata, physaddr, size);
371} 384}
372 385
373unsigned long __init free_all_bootmem_node (pg_data_t *pgdat) 386unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
374{ 387{
375 return(free_all_bootmem_core(pgdat)); 388 return free_all_bootmem_core(pgdat);
376} 389}
377 390
378unsigned long __init init_bootmem (unsigned long start, unsigned long pages) 391unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
379{ 392{
380 max_low_pfn = pages; 393 max_low_pfn = pages;
381 min_low_pfn = start; 394 min_low_pfn = start;
382 return(init_bootmem_core(NODE_DATA(0), start, 0, pages)); 395 return init_bootmem_core(NODE_DATA(0), start, 0, pages);
383} 396}
384 397
385#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE 398#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
386void __init reserve_bootmem (unsigned long addr, unsigned long size) 399void __init reserve_bootmem(unsigned long addr, unsigned long size)
387{ 400{
388 reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size); 401 reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size);
389} 402}
390#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 403#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
391 404
392void __init free_bootmem (unsigned long addr, unsigned long size) 405void __init free_bootmem(unsigned long addr, unsigned long size)
393{ 406{
394 free_bootmem_core(NODE_DATA(0)->bdata, addr, size); 407 free_bootmem_core(NODE_DATA(0)->bdata, addr, size);
395} 408}
396 409
397unsigned long __init free_all_bootmem (void) 410unsigned long __init free_all_bootmem(void)
398{ 411{
399 return(free_all_bootmem_core(NODE_DATA(0))); 412 return free_all_bootmem_core(NODE_DATA(0));
400} 413}
401 414
402void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) 415void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
416 unsigned long goal)
403{ 417{
404 bootmem_data_t *bdata; 418 bootmem_data_t *bdata;
405 void *ptr; 419 void *ptr;
406 420
407 list_for_each_entry(bdata, &bdata_list, list) 421 list_for_each_entry(bdata, &bdata_list, list) {
408 if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) 422 ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
409 return(ptr); 423 if (ptr)
424 return ptr;
425 }
410 return NULL; 426 return NULL;
411} 427}
412 428
413void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) 429void * __init __alloc_bootmem(unsigned long size, unsigned long align,
430 unsigned long goal)
414{ 431{
415 void *mem = __alloc_bootmem_nopanic(size,align,goal); 432 void *mem = __alloc_bootmem_nopanic(size,align,goal);
433
416 if (mem) 434 if (mem)
417 return mem; 435 return mem;
418 /* 436 /*
@@ -424,29 +442,34 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
424} 442}
425 443
426 444
427void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, 445void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
428 unsigned long goal) 446 unsigned long align, unsigned long goal)
429{ 447{
430 void *ptr; 448 void *ptr;
431 449
432 ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); 450 ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
433 if (ptr) 451 if (ptr)
434 return (ptr); 452 return ptr;
435 453
436 return __alloc_bootmem(size, align, goal); 454 return __alloc_bootmem(size, align, goal);
437} 455}
438 456
439#define LOW32LIMIT 0xffffffff 457#ifndef ARCH_LOW_ADDRESS_LIMIT
458#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
459#endif
440 460
441void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) 461void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
462 unsigned long goal)
442{ 463{
443 bootmem_data_t *bdata; 464 bootmem_data_t *bdata;
444 void *ptr; 465 void *ptr;
445 466
446 list_for_each_entry(bdata, &bdata_list, list) 467 list_for_each_entry(bdata, &bdata_list, list) {
447 if ((ptr = __alloc_bootmem_core(bdata, size, 468 ptr = __alloc_bootmem_core(bdata, size, align, goal,
448 align, goal, LOW32LIMIT))) 469 ARCH_LOW_ADDRESS_LIMIT);
449 return(ptr); 470 if (ptr)
471 return ptr;
472 }
450 473
451 /* 474 /*
452 * Whoops, we cannot satisfy the allocation request. 475 * Whoops, we cannot satisfy the allocation request.
@@ -459,5 +482,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsig
459void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 482void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
460 unsigned long align, unsigned long goal) 483 unsigned long align, unsigned long goal)
461{ 484{
462 return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT); 485 return __alloc_bootmem_core(pgdat->bdata, size, align, goal,
486 ARCH_LOW_ADDRESS_LIMIT);
463} 487}
diff --git a/mm/filemap.c b/mm/filemap.c
index b9a60c43b61a..afcdc72b5e90 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -488,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x)
488EXPORT_SYMBOL(page_cache_alloc_cold); 488EXPORT_SYMBOL(page_cache_alloc_cold);
489#endif 489#endif
490 490
491static int __sleep_on_page_lock(void *word)
492{
493 io_schedule();
494 return 0;
495}
496
491/* 497/*
492 * In order to wait for pages to become available there must be 498 * In order to wait for pages to become available there must be
493 * waitqueues associated with pages. By using a hash table of 499 * waitqueues associated with pages. By using a hash table of
@@ -577,13 +583,24 @@ void fastcall __lock_page(struct page *page)
577} 583}
578EXPORT_SYMBOL(__lock_page); 584EXPORT_SYMBOL(__lock_page);
579 585
586/*
587 * Variant of lock_page that does not require the caller to hold a reference
588 * on the page's mapping.
589 */
590void fastcall __lock_page_nosync(struct page *page)
591{
592 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
593 __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
594 TASK_UNINTERRUPTIBLE);
595}
596
580/** 597/**
581 * find_get_page - find and get a page reference 598 * find_get_page - find and get a page reference
582 * @mapping: the address_space to search 599 * @mapping: the address_space to search
583 * @offset: the page index 600 * @offset: the page index
584 * 601 *
585 * A rather lightweight function, finding and getting a reference to a 602 * Is there a pagecache struct page at the given (mapping, offset) tuple?
586 * hashed page atomically. 603 * If yes, increment its refcount and return it; if no, return NULL.
587 */ 604 */
588struct page * find_get_page(struct address_space *mapping, unsigned long offset) 605struct page * find_get_page(struct address_space *mapping, unsigned long offset)
589{ 606{
@@ -970,7 +987,7 @@ page_not_up_to_date:
970 /* Get exclusive access to the page ... */ 987 /* Get exclusive access to the page ... */
971 lock_page(page); 988 lock_page(page);
972 989
973 /* Did it get unhashed before we got the lock? */ 990 /* Did it get truncated before we got the lock? */
974 if (!page->mapping) { 991 if (!page->mapping) {
975 unlock_page(page); 992 unlock_page(page);
976 page_cache_release(page); 993 page_cache_release(page);
@@ -1610,7 +1627,7 @@ no_cached_page:
1610page_not_uptodate: 1627page_not_uptodate:
1611 lock_page(page); 1628 lock_page(page);
1612 1629
1613 /* Did it get unhashed while we waited for it? */ 1630 /* Did it get truncated while we waited for it? */
1614 if (!page->mapping) { 1631 if (!page->mapping) {
1615 unlock_page(page); 1632 unlock_page(page);
1616 goto err; 1633 goto err;
diff --git a/mm/fremap.c b/mm/fremap.c
index 21b7d0cbc98c..aa30618ec6b2 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -79,9 +79,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
79 inc_mm_counter(mm, file_rss); 79 inc_mm_counter(mm, file_rss);
80 80
81 flush_icache_page(vma, page); 81 flush_icache_page(vma, page);
82 set_pte_at(mm, addr, pte, mk_pte(page, prot)); 82 pte_val = mk_pte(page, prot);
83 set_pte_at(mm, addr, pte, pte_val);
83 page_add_file_rmap(page); 84 page_add_file_rmap(page);
84 pte_val = *pte;
85 update_mmu_cache(vma, addr, pte_val); 85 update_mmu_cache(vma, addr, pte_val);
86 lazy_mmu_prot_update(pte_val); 86 lazy_mmu_prot_update(pte_val);
87 err = 0; 87 err = 0;
diff --git a/mm/highmem.c b/mm/highmem.c
index 9b2a5403c447..ee5519b176ee 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -46,6 +46,19 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
46 */ 46 */
47#ifdef CONFIG_HIGHMEM 47#ifdef CONFIG_HIGHMEM
48 48
49unsigned long totalhigh_pages __read_mostly;
50
51unsigned int nr_free_highpages (void)
52{
53 pg_data_t *pgdat;
54 unsigned int pages = 0;
55
56 for_each_online_pgdat(pgdat)
57 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
58
59 return pages;
60}
61
49static int pkmap_count[LAST_PKMAP]; 62static int pkmap_count[LAST_PKMAP];
50static unsigned int last_pkmap_nr; 63static unsigned int last_pkmap_nr;
51static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); 64static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index df499973255f..7c7d03dbf73d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -72,7 +72,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
72 struct zone **z; 72 struct zone **z;
73 73
74 for (z = zonelist->zones; *z; z++) { 74 for (z = zonelist->zones; *z; z++) {
75 nid = (*z)->zone_pgdat->node_id; 75 nid = zone_to_nid(*z);
76 if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && 76 if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
77 !list_empty(&hugepage_freelists[nid])) 77 !list_empty(&hugepage_freelists[nid]))
78 break; 78 break;
@@ -177,7 +177,7 @@ static void update_and_free_page(struct page *page)
177{ 177{
178 int i; 178 int i;
179 nr_huge_pages--; 179 nr_huge_pages--;
180 nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--; 180 nr_huge_pages_node[page_to_nid(page)]--;
181 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { 181 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
182 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 182 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
183 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 183 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
@@ -191,7 +191,8 @@ static void update_and_free_page(struct page *page)
191#ifdef CONFIG_HIGHMEM 191#ifdef CONFIG_HIGHMEM
192static void try_to_free_low(unsigned long count) 192static void try_to_free_low(unsigned long count)
193{ 193{
194 int i, nid; 194 int i;
195
195 for (i = 0; i < MAX_NUMNODES; ++i) { 196 for (i = 0; i < MAX_NUMNODES; ++i) {
196 struct page *page, *next; 197 struct page *page, *next;
197 list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { 198 list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
@@ -199,9 +200,8 @@ static void try_to_free_low(unsigned long count)
199 continue; 200 continue;
200 list_del(&page->lru); 201 list_del(&page->lru);
201 update_and_free_page(page); 202 update_and_free_page(page);
202 nid = page_zone(page)->zone_pgdat->node_id;
203 free_huge_pages--; 203 free_huge_pages--;
204 free_huge_pages_node[nid]--; 204 free_huge_pages_node[page_to_nid(page)]--;
205 if (count >= nr_huge_pages) 205 if (count >= nr_huge_pages)
206 return; 206 return;
207 } 207 }
diff --git a/mm/internal.h b/mm/internal.h
index d20e3cc4aef0..d527b80b292f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,8 +24,8 @@ static inline void set_page_count(struct page *page, int v)
24 */ 24 */
25static inline void set_page_refcounted(struct page *page) 25static inline void set_page_refcounted(struct page *page)
26{ 26{
27 BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); 27 VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
28 BUG_ON(atomic_read(&page->_count)); 28 VM_BUG_ON(atomic_read(&page->_count));
29 set_page_count(page, 1); 29 set_page_count(page, 1);
30} 30}
31 31
diff --git a/mm/memory.c b/mm/memory.c
index 109e9866237e..92a3ebd8d795 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/delayacct.h> 50#include <linux/delayacct.h>
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/writeback.h>
52 53
53#include <asm/pgalloc.h> 54#include <asm/pgalloc.h>
54#include <asm/uaccess.h> 55#include <asm/uaccess.h>
@@ -1226,7 +1227,12 @@ out:
1226 return retval; 1227 return retval;
1227} 1228}
1228 1229
1229/* 1230/**
1231 * vm_insert_page - insert single page into user vma
1232 * @vma: user vma to map to
1233 * @addr: target user address of this page
1234 * @page: source kernel page
1235 *
1230 * This allows drivers to insert individual pages they've allocated 1236 * This allows drivers to insert individual pages they've allocated
1231 * into a user vma. 1237 * into a user vma.
1232 * 1238 *
@@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1318 return 0; 1324 return 0;
1319} 1325}
1320 1326
1321/* Note: this is only safe if the mm semaphore is held when called. */ 1327/**
1328 * remap_pfn_range - remap kernel memory to userspace
1329 * @vma: user vma to map to
1330 * @addr: target user address to start at
1331 * @pfn: physical address of kernel memory
1332 * @size: size of map area
1333 * @prot: page protection flags for this mapping
1334 *
1335 * Note: this is only safe if the mm semaphore is held when called.
1336 */
1322int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1337int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1323 unsigned long pfn, unsigned long size, pgprot_t prot) 1338 unsigned long pfn, unsigned long size, pgprot_t prot)
1324{ 1339{
@@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1458{ 1473{
1459 struct page *old_page, *new_page; 1474 struct page *old_page, *new_page;
1460 pte_t entry; 1475 pte_t entry;
1461 int reuse, ret = VM_FAULT_MINOR; 1476 int reuse = 0, ret = VM_FAULT_MINOR;
1477 struct page *dirty_page = NULL;
1462 1478
1463 old_page = vm_normal_page(vma, address, orig_pte); 1479 old_page = vm_normal_page(vma, address, orig_pte);
1464 if (!old_page) 1480 if (!old_page)
1465 goto gotten; 1481 goto gotten;
1466 1482
1467 if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == 1483 /*
1468 (VM_SHARED|VM_WRITE))) { 1484 * Take out anonymous pages first, anonymous shared vmas are
1485 * not dirty accountable.
1486 */
1487 if (PageAnon(old_page)) {
1488 if (!TestSetPageLocked(old_page)) {
1489 reuse = can_share_swap_page(old_page);
1490 unlock_page(old_page);
1491 }
1492 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
1493 (VM_WRITE|VM_SHARED))) {
1494 /*
1495 * Only catch write-faults on shared writable pages,
1496 * read-only shared pages can get COWed by
1497 * get_user_pages(.write=1, .force=1).
1498 */
1469 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { 1499 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1470 /* 1500 /*
1471 * Notify the address space that the page is about to 1501 * Notify the address space that the page is about to
@@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1494 if (!pte_same(*page_table, orig_pte)) 1524 if (!pte_same(*page_table, orig_pte))
1495 goto unlock; 1525 goto unlock;
1496 } 1526 }
1497 1527 dirty_page = old_page;
1528 get_page(dirty_page);
1498 reuse = 1; 1529 reuse = 1;
1499 } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
1500 reuse = can_share_swap_page(old_page);
1501 unlock_page(old_page);
1502 } else {
1503 reuse = 0;
1504 } 1530 }
1505 1531
1506 if (reuse) { 1532 if (reuse) {
@@ -1566,6 +1592,10 @@ gotten:
1566 page_cache_release(old_page); 1592 page_cache_release(old_page);
1567unlock: 1593unlock:
1568 pte_unmap_unlock(page_table, ptl); 1594 pte_unmap_unlock(page_table, ptl);
1595 if (dirty_page) {
1596 set_page_dirty_balance(dirty_page);
1597 put_page(dirty_page);
1598 }
1569 return ret; 1599 return ret;
1570oom: 1600oom:
1571 if (old_page) 1601 if (old_page)
@@ -1785,9 +1815,10 @@ void unmap_mapping_range(struct address_space *mapping,
1785} 1815}
1786EXPORT_SYMBOL(unmap_mapping_range); 1816EXPORT_SYMBOL(unmap_mapping_range);
1787 1817
1788/* 1818/**
1789 * Handle all mappings that got truncated by a "truncate()" 1819 * vmtruncate - unmap mappings "freed" by truncate() syscall
1790 * system call. 1820 * @inode: inode of the file used
1821 * @offset: file offset to start truncating
1791 * 1822 *
1792 * NOTE! We have to be ready to update the memory sharing 1823 * NOTE! We have to be ready to update the memory sharing
1793 * between the file and the memory map for a potential last 1824 * between the file and the memory map for a potential last
@@ -1856,11 +1887,16 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
1856} 1887}
1857EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */ 1888EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */
1858 1889
1859/* 1890/**
1891 * swapin_readahead - swap in pages in hope we need them soon
1892 * @entry: swap entry of this memory
1893 * @addr: address to start
1894 * @vma: user vma this addresses belong to
1895 *
1860 * Primitive swap readahead code. We simply read an aligned block of 1896 * Primitive swap readahead code. We simply read an aligned block of
1861 * (1 << page_cluster) entries in the swap area. This method is chosen 1897 * (1 << page_cluster) entries in the swap area. This method is chosen
1862 * because it doesn't cost us any seek time. We also make sure to queue 1898 * because it doesn't cost us any seek time. We also make sure to queue
1863 * the 'original' request together with the readahead ones... 1899 * the 'original' request together with the readahead ones...
1864 * 1900 *
1865 * This has been extended to use the NUMA policies from the mm triggering 1901 * This has been extended to use the NUMA policies from the mm triggering
1866 * the readahead. 1902 * the readahead.
@@ -2098,6 +2134,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2098 unsigned int sequence = 0; 2134 unsigned int sequence = 0;
2099 int ret = VM_FAULT_MINOR; 2135 int ret = VM_FAULT_MINOR;
2100 int anon = 0; 2136 int anon = 0;
2137 struct page *dirty_page = NULL;
2101 2138
2102 pte_unmap(page_table); 2139 pte_unmap(page_table);
2103 BUG_ON(vma->vm_flags & VM_PFNMAP); 2140 BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2192,6 +2229,10 @@ retry:
2192 } else { 2229 } else {
2193 inc_mm_counter(mm, file_rss); 2230 inc_mm_counter(mm, file_rss);
2194 page_add_file_rmap(new_page); 2231 page_add_file_rmap(new_page);
2232 if (write_access) {
2233 dirty_page = new_page;
2234 get_page(dirty_page);
2235 }
2195 } 2236 }
2196 } else { 2237 } else {
2197 /* One of our sibling threads was faster, back out. */ 2238 /* One of our sibling threads was faster, back out. */
@@ -2204,6 +2245,10 @@ retry:
2204 lazy_mmu_prot_update(entry); 2245 lazy_mmu_prot_update(entry);
2205unlock: 2246unlock:
2206 pte_unmap_unlock(page_table, ptl); 2247 pte_unmap_unlock(page_table, ptl);
2248 if (dirty_page) {
2249 set_page_dirty_balance(dirty_page);
2250 put_page(dirty_page);
2251 }
2207 return ret; 2252 return ret;
2208oom: 2253oom:
2209 page_cache_release(new_page); 2254 page_cache_release(new_page);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a9963ceddd65..38f89650bc84 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
105 105
106/* Highest zone. An specific allocation for a zone below that is not 106/* Highest zone. An specific allocation for a zone below that is not
107 policied. */ 107 policied. */
108int policy_zone = ZONE_DMA; 108enum zone_type policy_zone = ZONE_DMA;
109 109
110struct mempolicy default_policy = { 110struct mempolicy default_policy = {
111 .refcnt = ATOMIC_INIT(1), /* never free it */ 111 .refcnt = ATOMIC_INIT(1), /* never free it */
@@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes)
137static struct zonelist *bind_zonelist(nodemask_t *nodes) 137static struct zonelist *bind_zonelist(nodemask_t *nodes)
138{ 138{
139 struct zonelist *zl; 139 struct zonelist *zl;
140 int num, max, nd, k; 140 int num, max, nd;
141 enum zone_type k;
141 142
142 max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); 143 max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
143 zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); 144 zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
@@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
148 lower zones etc. Avoid empty zones because the memory allocator 149 lower zones etc. Avoid empty zones because the memory allocator
149 doesn't like them. If you implement node hot removal you 150 doesn't like them. If you implement node hot removal you
150 have to fix that. */ 151 have to fix that. */
151 for (k = policy_zone; k >= 0; k--) { 152 k = policy_zone;
153 while (1) {
152 for_each_node_mask(nd, *nodes) { 154 for_each_node_mask(nd, *nodes) {
153 struct zone *z = &NODE_DATA(nd)->node_zones[k]; 155 struct zone *z = &NODE_DATA(nd)->node_zones[k];
154 if (z->present_pages > 0) 156 if (z->present_pages > 0)
155 zl->zones[num++] = z; 157 zl->zones[num++] = z;
156 } 158 }
159 if (k == 0)
160 break;
161 k--;
157 } 162 }
158 zl->zones[num] = NULL; 163 zl->zones[num] = NULL;
159 return zl; 164 return zl;
@@ -482,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
482 switch (p->policy) { 487 switch (p->policy) {
483 case MPOL_BIND: 488 case MPOL_BIND:
484 for (i = 0; p->v.zonelist->zones[i]; i++) 489 for (i = 0; p->v.zonelist->zones[i]; i++)
485 node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, 490 node_set(zone_to_nid(p->v.zonelist->zones[i]),
486 *nodes); 491 *nodes);
487 break; 492 break;
488 case MPOL_DEFAULT: 493 case MPOL_DEFAULT:
@@ -1140,7 +1145,7 @@ unsigned slab_node(struct mempolicy *policy)
1140 * Follow bind policy behavior and start allocation at the 1145 * Follow bind policy behavior and start allocation at the
1141 * first node. 1146 * first node.
1142 */ 1147 */
1143 return policy->v.zonelist->zones[0]->zone_pgdat->node_id; 1148 return zone_to_nid(policy->v.zonelist->zones[0]);
1144 1149
1145 case MPOL_PREFERRED: 1150 case MPOL_PREFERRED:
1146 if (policy->v.preferred_node >= 0) 1151 if (policy->v.preferred_node >= 0)
@@ -1285,7 +1290,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1285 1290
1286 if ((gfp & __GFP_WAIT) && !in_interrupt()) 1291 if ((gfp & __GFP_WAIT) && !in_interrupt())
1287 cpuset_update_task_memory_state(); 1292 cpuset_update_task_memory_state();
1288 if (!pol || in_interrupt()) 1293 if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
1289 pol = &default_policy; 1294 pol = &default_policy;
1290 if (pol->policy == MPOL_INTERLEAVE) 1295 if (pol->policy == MPOL_INTERLEAVE)
1291 return alloc_page_interleave(gfp, order, interleave_nodes(pol)); 1296 return alloc_page_interleave(gfp, order, interleave_nodes(pol));
@@ -1644,7 +1649,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
1644 1649
1645 nodes_clear(nodes); 1650 nodes_clear(nodes);
1646 for (z = pol->v.zonelist->zones; *z; z++) 1651 for (z = pol->v.zonelist->zones; *z; z++)
1647 node_set((*z)->zone_pgdat->node_id, nodes); 1652 node_set(zone_to_nid(*z), nodes);
1648 nodes_remap(tmp, nodes, *mpolmask, *newmask); 1653 nodes_remap(tmp, nodes, *mpolmask, *newmask);
1649 nodes = tmp; 1654 nodes = tmp;
1650 1655
diff --git a/mm/migrate.c b/mm/migrate.c
index 3f1e0c2c942c..20a8c2687b1e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -741,7 +741,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
741 741
742 *result = &pm->status; 742 *result = &pm->status;
743 743
744 return alloc_pages_node(pm->node, GFP_HIGHUSER, 0); 744 return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0);
745} 745}
746 746
747/* 747/*
diff --git a/mm/mmap.c b/mm/mmap.c
index d799d896d74a..eea8eefd51a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -116,7 +116,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
116 * which are reclaimable, under pressure. The dentry 116 * which are reclaimable, under pressure. The dentry
117 * cache and most inode caches should fall into this 117 * cache and most inode caches should fall into this
118 */ 118 */
119 free += atomic_read(&slab_reclaim_pages); 119 free += global_page_state(NR_SLAB_RECLAIMABLE);
120 120
121 /* 121 /*
122 * Leave the last 3% for root 122 * Leave the last 3% for root
@@ -1105,12 +1105,6 @@ munmap_back:
1105 goto free_vma; 1105 goto free_vma;
1106 } 1106 }
1107 1107
1108 /* Don't make the VMA automatically writable if it's shared, but the
1109 * backer wishes to know when pages are first written to */
1110 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1111 vma->vm_page_prot =
1112 protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
1113
1114 /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform 1108 /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
1115 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) 1109 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
1116 * that memory reservation must be checked; but that reservation 1110 * that memory reservation must be checked; but that reservation
@@ -1128,6 +1122,10 @@ munmap_back:
1128 pgoff = vma->vm_pgoff; 1122 pgoff = vma->vm_pgoff;
1129 vm_flags = vma->vm_flags; 1123 vm_flags = vma->vm_flags;
1130 1124
1125 if (vma_wants_writenotify(vma))
1126 vma->vm_page_prot =
1127 protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
1128
1131 if (!file || !vma_merge(mm, prev, addr, vma->vm_end, 1129 if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
1132 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { 1130 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
1133 file = vma->vm_file; 1131 file = vma->vm_file;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 638edabaff71..955f9d0e38aa 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -27,7 +27,8 @@
27#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
28 28
29static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, 29static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
30 unsigned long addr, unsigned long end, pgprot_t newprot) 30 unsigned long addr, unsigned long end, pgprot_t newprot,
31 int dirty_accountable)
31{ 32{
32 pte_t *pte, oldpte; 33 pte_t *pte, oldpte;
33 spinlock_t *ptl; 34 spinlock_t *ptl;
@@ -42,7 +43,14 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
42 * bits by wiping the pte and then setting the new pte 43 * bits by wiping the pte and then setting the new pte
43 * into place. 44 * into place.
44 */ 45 */
45 ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot); 46 ptent = ptep_get_and_clear(mm, addr, pte);
47 ptent = pte_modify(ptent, newprot);
48 /*
49 * Avoid taking write faults for pages we know to be
50 * dirty.
51 */
52 if (dirty_accountable && pte_dirty(ptent))
53 ptent = pte_mkwrite(ptent);
46 set_pte_at(mm, addr, pte, ptent); 54 set_pte_at(mm, addr, pte, ptent);
47 lazy_mmu_prot_update(ptent); 55 lazy_mmu_prot_update(ptent);
48#ifdef CONFIG_MIGRATION 56#ifdef CONFIG_MIGRATION
@@ -66,7 +74,8 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
66} 74}
67 75
68static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud, 76static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
69 unsigned long addr, unsigned long end, pgprot_t newprot) 77 unsigned long addr, unsigned long end, pgprot_t newprot,
78 int dirty_accountable)
70{ 79{
71 pmd_t *pmd; 80 pmd_t *pmd;
72 unsigned long next; 81 unsigned long next;
@@ -76,12 +85,13 @@ static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
76 next = pmd_addr_end(addr, end); 85 next = pmd_addr_end(addr, end);
77 if (pmd_none_or_clear_bad(pmd)) 86 if (pmd_none_or_clear_bad(pmd))
78 continue; 87 continue;
79 change_pte_range(mm, pmd, addr, next, newprot); 88 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
80 } while (pmd++, addr = next, addr != end); 89 } while (pmd++, addr = next, addr != end);
81} 90}
82 91
83static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd, 92static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
84 unsigned long addr, unsigned long end, pgprot_t newprot) 93 unsigned long addr, unsigned long end, pgprot_t newprot,
94 int dirty_accountable)
85{ 95{
86 pud_t *pud; 96 pud_t *pud;
87 unsigned long next; 97 unsigned long next;
@@ -91,12 +101,13 @@ static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
91 next = pud_addr_end(addr, end); 101 next = pud_addr_end(addr, end);
92 if (pud_none_or_clear_bad(pud)) 102 if (pud_none_or_clear_bad(pud))
93 continue; 103 continue;
94 change_pmd_range(mm, pud, addr, next, newprot); 104 change_pmd_range(mm, pud, addr, next, newprot, dirty_accountable);
95 } while (pud++, addr = next, addr != end); 105 } while (pud++, addr = next, addr != end);
96} 106}
97 107
98static void change_protection(struct vm_area_struct *vma, 108static void change_protection(struct vm_area_struct *vma,
99 unsigned long addr, unsigned long end, pgprot_t newprot) 109 unsigned long addr, unsigned long end, pgprot_t newprot,
110 int dirty_accountable)
100{ 111{
101 struct mm_struct *mm = vma->vm_mm; 112 struct mm_struct *mm = vma->vm_mm;
102 pgd_t *pgd; 113 pgd_t *pgd;
@@ -110,7 +121,7 @@ static void change_protection(struct vm_area_struct *vma,
110 next = pgd_addr_end(addr, end); 121 next = pgd_addr_end(addr, end);
111 if (pgd_none_or_clear_bad(pgd)) 122 if (pgd_none_or_clear_bad(pgd))
112 continue; 123 continue;
113 change_pud_range(mm, pgd, addr, next, newprot); 124 change_pud_range(mm, pgd, addr, next, newprot, dirty_accountable);
114 } while (pgd++, addr = next, addr != end); 125 } while (pgd++, addr = next, addr != end);
115 flush_tlb_range(vma, start, end); 126 flush_tlb_range(vma, start, end);
116} 127}
@@ -123,10 +134,9 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
123 unsigned long oldflags = vma->vm_flags; 134 unsigned long oldflags = vma->vm_flags;
124 long nrpages = (end - start) >> PAGE_SHIFT; 135 long nrpages = (end - start) >> PAGE_SHIFT;
125 unsigned long charged = 0; 136 unsigned long charged = 0;
126 unsigned int mask;
127 pgprot_t newprot;
128 pgoff_t pgoff; 137 pgoff_t pgoff;
129 int error; 138 int error;
139 int dirty_accountable = 0;
130 140
131 if (newflags == oldflags) { 141 if (newflags == oldflags) {
132 *pprev = vma; 142 *pprev = vma;
@@ -176,24 +186,23 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
176 } 186 }
177 187
178success: 188success:
179 /* Don't make the VMA automatically writable if it's shared, but the
180 * backer wishes to know when pages are first written to */
181 mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED;
182 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
183 mask &= ~VM_SHARED;
184
185 newprot = protection_map[newflags & mask];
186
187 /* 189 /*
188 * vm_flags and vm_page_prot are protected by the mmap_sem 190 * vm_flags and vm_page_prot are protected by the mmap_sem
189 * held in write mode. 191 * held in write mode.
190 */ 192 */
191 vma->vm_flags = newflags; 193 vma->vm_flags = newflags;
192 vma->vm_page_prot = newprot; 194 vma->vm_page_prot = protection_map[newflags &
195 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
196 if (vma_wants_writenotify(vma)) {
197 vma->vm_page_prot = protection_map[newflags &
198 (VM_READ|VM_WRITE|VM_EXEC)];
199 dirty_accountable = 1;
200 }
201
193 if (is_vm_hugetlb_page(vma)) 202 if (is_vm_hugetlb_page(vma))
194 hugetlb_change_protection(vma, start, end, newprot); 203 hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
195 else 204 else
196 change_protection(vma, start, end, newprot); 205 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
197 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 206 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
198 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 207 vm_stat_account(mm, newflags, vma->vm_file, nrpages);
199 return 0; 208 return 0;
diff --git a/mm/msync.c b/mm/msync.c
index d083544df21b..358d73cf7b78 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -7,149 +7,33 @@
7/* 7/*
8 * The msync() system call. 8 * The msync() system call.
9 */ 9 */
10#include <linux/slab.h>
11#include <linux/pagemap.h>
12#include <linux/fs.h> 10#include <linux/fs.h>
13#include <linux/mm.h> 11#include <linux/mm.h>
14#include <linux/mman.h> 12#include <linux/mman.h>
15#include <linux/hugetlb.h>
16#include <linux/writeback.h>
17#include <linux/file.h> 13#include <linux/file.h>
18#include <linux/syscalls.h> 14#include <linux/syscalls.h>
19 15
20#include <asm/pgtable.h>
21#include <asm/tlbflush.h>
22
23static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
24 unsigned long addr, unsigned long end)
25{
26 pte_t *pte;
27 spinlock_t *ptl;
28 int progress = 0;
29 unsigned long ret = 0;
30
31again:
32 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
33 do {
34 struct page *page;
35
36 if (progress >= 64) {
37 progress = 0;
38 if (need_resched() || need_lockbreak(ptl))
39 break;
40 }
41 progress++;
42 if (!pte_present(*pte))
43 continue;
44 if (!pte_maybe_dirty(*pte))
45 continue;
46 page = vm_normal_page(vma, addr, *pte);
47 if (!page)
48 continue;
49 if (ptep_clear_flush_dirty(vma, addr, pte) ||
50 page_test_and_clear_dirty(page))
51 ret += set_page_dirty(page);
52 progress += 3;
53 } while (pte++, addr += PAGE_SIZE, addr != end);
54 pte_unmap_unlock(pte - 1, ptl);
55 cond_resched();
56 if (addr != end)
57 goto again;
58 return ret;
59}
60
61static inline unsigned long msync_pmd_range(struct vm_area_struct *vma,
62 pud_t *pud, unsigned long addr, unsigned long end)
63{
64 pmd_t *pmd;
65 unsigned long next;
66 unsigned long ret = 0;
67
68 pmd = pmd_offset(pud, addr);
69 do {
70 next = pmd_addr_end(addr, end);
71 if (pmd_none_or_clear_bad(pmd))
72 continue;
73 ret += msync_pte_range(vma, pmd, addr, next);
74 } while (pmd++, addr = next, addr != end);
75 return ret;
76}
77
78static inline unsigned long msync_pud_range(struct vm_area_struct *vma,
79 pgd_t *pgd, unsigned long addr, unsigned long end)
80{
81 pud_t *pud;
82 unsigned long next;
83 unsigned long ret = 0;
84
85 pud = pud_offset(pgd, addr);
86 do {
87 next = pud_addr_end(addr, end);
88 if (pud_none_or_clear_bad(pud))
89 continue;
90 ret += msync_pmd_range(vma, pud, addr, next);
91 } while (pud++, addr = next, addr != end);
92 return ret;
93}
94
95static unsigned long msync_page_range(struct vm_area_struct *vma,
96 unsigned long addr, unsigned long end)
97{
98 pgd_t *pgd;
99 unsigned long next;
100 unsigned long ret = 0;
101
102 /* For hugepages we can't go walking the page table normally,
103 * but that's ok, hugetlbfs is memory based, so we don't need
104 * to do anything more on an msync().
105 */
106 if (vma->vm_flags & VM_HUGETLB)
107 return 0;
108
109 BUG_ON(addr >= end);
110 pgd = pgd_offset(vma->vm_mm, addr);
111 flush_cache_range(vma, addr, end);
112 do {
113 next = pgd_addr_end(addr, end);
114 if (pgd_none_or_clear_bad(pgd))
115 continue;
116 ret += msync_pud_range(vma, pgd, addr, next);
117 } while (pgd++, addr = next, addr != end);
118 return ret;
119}
120
121/* 16/*
122 * MS_SYNC syncs the entire file - including mappings. 17 * MS_SYNC syncs the entire file - including mappings.
123 * 18 *
124 * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just 19 * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
125 * marks the relevant pages dirty. The application may now run fsync() to 20 * Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
21 * Now it doesn't do anything, since dirty pages are properly tracked.
22 *
23 * The application may now run fsync() to
126 * write out the dirty pages and wait on the writeout and check the result. 24 * write out the dirty pages and wait on the writeout and check the result.
127 * Or the application may run fadvise(FADV_DONTNEED) against the fd to start 25 * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
128 * async writeout immediately. 26 * async writeout immediately.
129 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to 27 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
130 * applications. 28 * applications.
131 */ 29 */
132static int msync_interval(struct vm_area_struct *vma, unsigned long addr,
133 unsigned long end, int flags,
134 unsigned long *nr_pages_dirtied)
135{
136 struct file *file = vma->vm_file;
137
138 if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
139 return -EBUSY;
140
141 if (file && (vma->vm_flags & VM_SHARED))
142 *nr_pages_dirtied = msync_page_range(vma, addr, end);
143 return 0;
144}
145
146asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 30asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
147{ 31{
148 unsigned long end; 32 unsigned long end;
33 struct mm_struct *mm = current->mm;
149 struct vm_area_struct *vma; 34 struct vm_area_struct *vma;
150 int unmapped_error = 0; 35 int unmapped_error = 0;
151 int error = -EINVAL; 36 int error = -EINVAL;
152 int done = 0;
153 37
154 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) 38 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
155 goto out; 39 goto out;
@@ -169,64 +53,50 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
169 * If the interval [start,end) covers some unmapped address ranges, 53 * If the interval [start,end) covers some unmapped address ranges,
170 * just ignore them, but return -ENOMEM at the end. 54 * just ignore them, but return -ENOMEM at the end.
171 */ 55 */
172 down_read(&current->mm->mmap_sem); 56 down_read(&mm->mmap_sem);
173 vma = find_vma(current->mm, start); 57 vma = find_vma(mm, start);
174 if (!vma) { 58 for (;;) {
175 error = -ENOMEM;
176 goto out_unlock;
177 }
178 do {
179 unsigned long nr_pages_dirtied = 0;
180 struct file *file; 59 struct file *file;
181 60
61 /* Still start < end. */
62 error = -ENOMEM;
63 if (!vma)
64 goto out_unlock;
182 /* Here start < vma->vm_end. */ 65 /* Here start < vma->vm_end. */
183 if (start < vma->vm_start) { 66 if (start < vma->vm_start) {
184 unmapped_error = -ENOMEM;
185 start = vma->vm_start; 67 start = vma->vm_start;
68 if (start >= end)
69 goto out_unlock;
70 unmapped_error = -ENOMEM;
186 } 71 }
187 /* Here vma->vm_start <= start < vma->vm_end. */ 72 /* Here vma->vm_start <= start < vma->vm_end. */
188 if (end <= vma->vm_end) { 73 if ((flags & MS_INVALIDATE) &&
189 if (start < end) { 74 (vma->vm_flags & VM_LOCKED)) {
190 error = msync_interval(vma, start, end, flags, 75 error = -EBUSY;
191 &nr_pages_dirtied); 76 goto out_unlock;
192 if (error)
193 goto out_unlock;
194 }
195 error = unmapped_error;
196 done = 1;
197 } else {
198 /* Here vma->vm_start <= start < vma->vm_end < end. */
199 error = msync_interval(vma, start, vma->vm_end, flags,
200 &nr_pages_dirtied);
201 if (error)
202 goto out_unlock;
203 } 77 }
204 file = vma->vm_file; 78 file = vma->vm_file;
205 start = vma->vm_end; 79 start = vma->vm_end;
206 if ((flags & MS_ASYNC) && file && nr_pages_dirtied) { 80 if ((flags & MS_SYNC) && file &&
207 get_file(file);
208 up_read(&current->mm->mmap_sem);
209 balance_dirty_pages_ratelimited_nr(file->f_mapping,
210 nr_pages_dirtied);
211 fput(file);
212 down_read(&current->mm->mmap_sem);
213 vma = find_vma(current->mm, start);
214 } else if ((flags & MS_SYNC) && file &&
215 (vma->vm_flags & VM_SHARED)) { 81 (vma->vm_flags & VM_SHARED)) {
216 get_file(file); 82 get_file(file);
217 up_read(&current->mm->mmap_sem); 83 up_read(&mm->mmap_sem);
218 error = do_fsync(file, 0); 84 error = do_fsync(file, 0);
219 fput(file); 85 fput(file);
220 down_read(&current->mm->mmap_sem); 86 if (error || start >= end)
221 if (error) 87 goto out;
222 goto out_unlock; 88 down_read(&mm->mmap_sem);
223 vma = find_vma(current->mm, start); 89 vma = find_vma(mm, start);
224 } else { 90 } else {
91 if (start >= end) {
92 error = 0;
93 goto out_unlock;
94 }
225 vma = vma->vm_next; 95 vma = vma->vm_next;
226 } 96 }
227 } while (vma && !done); 97 }
228out_unlock: 98out_unlock:
229 up_read(&current->mm->mmap_sem); 99 up_read(&mm->mmap_sem);
230out: 100out:
231 return error; 101 return error ? : unmapped_error;
232} 102}
diff --git a/mm/nommu.c b/mm/nommu.c
index c576df71e3bb..d99dea31e443 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1133,7 +1133,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
1133 * which are reclaimable, under pressure. The dentry 1133 * which are reclaimable, under pressure. The dentry
1134 * cache and most inode caches should fall into this 1134 * cache and most inode caches should fall into this
1135 */ 1135 */
1136 free += atomic_read(&slab_reclaim_pages); 1136 free += global_page_state(NR_SLAB_RECLAIMABLE);
1137 1137
1138 /* 1138 /*
1139 * Leave the last 3% for root 1139 * Leave the last 3% for root
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b9af136e5cfa..bada3d03119f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -21,6 +21,8 @@
21#include <linux/timex.h> 21#include <linux/timex.h>
22#include <linux/jiffies.h> 22#include <linux/jiffies.h>
23#include <linux/cpuset.h> 23#include <linux/cpuset.h>
24#include <linux/module.h>
25#include <linux/notifier.h>
24 26
25int sysctl_panic_on_oom; 27int sysctl_panic_on_oom;
26/* #define DEBUG */ 28/* #define DEBUG */
@@ -58,6 +60,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
58 } 60 }
59 61
60 /* 62 /*
63 * swapoff can easily use up all memory, so kill those first.
64 */
65 if (p->flags & PF_SWAPOFF)
66 return ULONG_MAX;
67
68 /*
61 * The memory size of the process is the basis for the badness. 69 * The memory size of the process is the basis for the badness.
62 */ 70 */
63 points = mm->total_vm; 71 points = mm->total_vm;
@@ -127,6 +135,14 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
127 points /= 4; 135 points /= 4;
128 136
129 /* 137 /*
138 * If p's nodes don't overlap ours, it may still help to kill p
139 * because p may have allocated or otherwise mapped memory on
140 * this node before. However it will be less likely.
141 */
142 if (!cpuset_excl_nodes_overlap(p))
143 points /= 8;
144
145 /*
130 * Adjust the score by oomkilladj. 146 * Adjust the score by oomkilladj.
131 */ 147 */
132 if (p->oomkilladj) { 148 if (p->oomkilladj) {
@@ -161,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
161 177
162 for (z = zonelist->zones; *z; z++) 178 for (z = zonelist->zones; *z; z++)
163 if (cpuset_zone_allowed(*z, gfp_mask)) 179 if (cpuset_zone_allowed(*z, gfp_mask))
164 node_clear((*z)->zone_pgdat->node_id, 180 node_clear(zone_to_nid(*z), nodes);
165 nodes);
166 else 181 else
167 return CONSTRAINT_CPUSET; 182 return CONSTRAINT_CPUSET;
168 183
@@ -191,25 +206,38 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
191 unsigned long points; 206 unsigned long points;
192 int releasing; 207 int releasing;
193 208
209 /* skip kernel threads */
210 if (!p->mm)
211 continue;
194 /* skip the init task with pid == 1 */ 212 /* skip the init task with pid == 1 */
195 if (p->pid == 1) 213 if (p->pid == 1)
196 continue; 214 continue;
197 if (p->oomkilladj == OOM_DISABLE)
198 continue;
199 /* If p's nodes don't overlap ours, it won't help to kill p. */
200 if (!cpuset_excl_nodes_overlap(p))
201 continue;
202 215
203 /* 216 /*
204 * This is in the process of releasing memory so wait for it 217 * This is in the process of releasing memory so wait for it
205 * to finish before killing some other task by mistake. 218 * to finish before killing some other task by mistake.
219 *
220 * However, if p is the current task, we allow the 'kill' to
221 * go ahead if it is exiting: this will simply set TIF_MEMDIE,
222 * which will allow it to gain access to memory reserves in
223 * the process of exiting and releasing its resources.
224 * Otherwise we could get an OOM deadlock.
206 */ 225 */
207 releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || 226 releasing = test_tsk_thread_flag(p, TIF_MEMDIE) ||
208 p->flags & PF_EXITING; 227 p->flags & PF_EXITING;
209 if (releasing && !(p->flags & PF_DEAD)) 228 if (releasing) {
229 /* PF_DEAD tasks have already released their mm */
230 if (p->flags & PF_DEAD)
231 continue;
232 if (p->flags & PF_EXITING && p == current) {
233 chosen = p;
234 *ppoints = ULONG_MAX;
235 break;
236 }
210 return ERR_PTR(-1UL); 237 return ERR_PTR(-1UL);
211 if (p->flags & PF_SWAPOFF) 238 }
212 return p; 239 if (p->oomkilladj == OOM_DISABLE)
240 continue;
213 241
214 points = badness(p, uptime.tv_sec); 242 points = badness(p, uptime.tv_sec);
215 if (points > *ppoints || !chosen) { 243 if (points > *ppoints || !chosen) {
@@ -221,9 +249,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
221} 249}
222 250
223/** 251/**
224 * We must be careful though to never send SIGKILL a process with 252 * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO
225 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that 253 * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO
226 * we select a process with CAP_SYS_RAW_IO set). 254 * set.
227 */ 255 */
228static void __oom_kill_task(struct task_struct *p, const char *message) 256static void __oom_kill_task(struct task_struct *p, const char *message)
229{ 257{
@@ -241,8 +269,11 @@ static void __oom_kill_task(struct task_struct *p, const char *message)
241 return; 269 return;
242 } 270 }
243 task_unlock(p); 271 task_unlock(p);
244 printk(KERN_ERR "%s: Killed process %d (%s).\n", 272
273 if (message) {
274 printk(KERN_ERR "%s: Killed process %d (%s).\n",
245 message, p->pid, p->comm); 275 message, p->pid, p->comm);
276 }
246 277
247 /* 278 /*
248 * We give our sacrificial lamb high priority and access to 279 * We give our sacrificial lamb high priority and access to
@@ -293,8 +324,17 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
293 struct task_struct *c; 324 struct task_struct *c;
294 struct list_head *tsk; 325 struct list_head *tsk;
295 326
296 printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and " 327 /*
297 "children.\n", p->pid, p->comm, points); 328 * If the task is already exiting, don't alarm the sysadmin or kill
329 * its children or threads, just set TIF_MEMDIE so it can die quickly
330 */
331 if (p->flags & PF_EXITING) {
332 __oom_kill_task(p, NULL);
333 return 0;
334 }
335
336 printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li"
337 " and children.\n", p->pid, p->comm, points);
298 /* Try to kill a child first */ 338 /* Try to kill a child first */
299 list_for_each(tsk, &p->children) { 339 list_for_each(tsk, &p->children) {
300 c = list_entry(tsk, struct task_struct, sibling); 340 c = list_entry(tsk, struct task_struct, sibling);
@@ -306,6 +346,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
306 return oom_kill_task(p, message); 346 return oom_kill_task(p, message);
307} 347}
308 348
349static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
350
351int register_oom_notifier(struct notifier_block *nb)
352{
353 return blocking_notifier_chain_register(&oom_notify_list, nb);
354}
355EXPORT_SYMBOL_GPL(register_oom_notifier);
356
357int unregister_oom_notifier(struct notifier_block *nb)
358{
359 return blocking_notifier_chain_unregister(&oom_notify_list, nb);
360}
361EXPORT_SYMBOL_GPL(unregister_oom_notifier);
362
309/** 363/**
310 * out_of_memory - kill the "best" process when we run out of memory 364 * out_of_memory - kill the "best" process when we run out of memory
311 * 365 *
@@ -318,10 +372,17 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
318{ 372{
319 struct task_struct *p; 373 struct task_struct *p;
320 unsigned long points = 0; 374 unsigned long points = 0;
375 unsigned long freed = 0;
376
377 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
378 if (freed > 0)
379 /* Got some memory back in the last second. */
380 return;
321 381
322 if (printk_ratelimit()) { 382 if (printk_ratelimit()) {
323 printk("oom-killer: gfp_mask=0x%x, order=%d\n", 383 printk(KERN_WARNING "%s invoked oom-killer: "
324 gfp_mask, order); 384 "gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
385 current->comm, gfp_mask, order, current->oomkilladj);
325 dump_stack(); 386 dump_stack();
326 show_mem(); 387 show_mem();
327 } 388 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 77a0bc4e261a..555752907dc3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -23,6 +23,7 @@
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include <linux/blkdev.h> 24#include <linux/blkdev.h>
25#include <linux/mpage.h> 25#include <linux/mpage.h>
26#include <linux/rmap.h>
26#include <linux/percpu.h> 27#include <linux/percpu.h>
27#include <linux/notifier.h> 28#include <linux/notifier.h>
28#include <linux/smp.h> 29#include <linux/smp.h>
@@ -243,6 +244,16 @@ static void balance_dirty_pages(struct address_space *mapping)
243 pdflush_operation(background_writeout, 0); 244 pdflush_operation(background_writeout, 0);
244} 245}
245 246
247void set_page_dirty_balance(struct page *page)
248{
249 if (set_page_dirty(page)) {
250 struct address_space *mapping = page_mapping(page);
251
252 if (mapping)
253 balance_dirty_pages_ratelimited(mapping);
254 }
255}
256
246/** 257/**
247 * balance_dirty_pages_ratelimited_nr - balance dirty memory state 258 * balance_dirty_pages_ratelimited_nr - balance dirty memory state
248 * @mapping: address_space which was dirtied 259 * @mapping: address_space which was dirtied
@@ -550,7 +561,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
550 return 0; 561 return 0;
551 wbc->for_writepages = 1; 562 wbc->for_writepages = 1;
552 if (mapping->a_ops->writepages) 563 if (mapping->a_ops->writepages)
553 ret = mapping->a_ops->writepages(mapping, wbc); 564 ret = mapping->a_ops->writepages(mapping, wbc);
554 else 565 else
555 ret = generic_writepages(mapping, wbc); 566 ret = generic_writepages(mapping, wbc);
556 wbc->for_writepages = 0; 567 wbc->for_writepages = 0;
@@ -690,7 +701,7 @@ int set_page_dirty_lock(struct page *page)
690{ 701{
691 int ret; 702 int ret;
692 703
693 lock_page(page); 704 lock_page_nosync(page);
694 ret = set_page_dirty(page); 705 ret = set_page_dirty(page);
695 unlock_page(page); 706 unlock_page(page);
696 return ret; 707 return ret;
@@ -712,9 +723,15 @@ int test_clear_page_dirty(struct page *page)
712 radix_tree_tag_clear(&mapping->page_tree, 723 radix_tree_tag_clear(&mapping->page_tree,
713 page_index(page), 724 page_index(page),
714 PAGECACHE_TAG_DIRTY); 725 PAGECACHE_TAG_DIRTY);
715 if (mapping_cap_account_dirty(mapping))
716 __dec_zone_page_state(page, NR_FILE_DIRTY);
717 write_unlock_irqrestore(&mapping->tree_lock, flags); 726 write_unlock_irqrestore(&mapping->tree_lock, flags);
727 /*
728 * We can continue to use `mapping' here because the
729 * page is locked, which pins the address_space
730 */
731 if (mapping_cap_account_dirty(mapping)) {
732 page_mkclean(page);
733 dec_zone_page_state(page, NR_FILE_DIRTY);
734 }
718 return 1; 735 return 1;
719 } 736 }
720 write_unlock_irqrestore(&mapping->tree_lock, flags); 737 write_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -744,8 +761,10 @@ int clear_page_dirty_for_io(struct page *page)
744 761
745 if (mapping) { 762 if (mapping) {
746 if (TestClearPageDirty(page)) { 763 if (TestClearPageDirty(page)) {
747 if (mapping_cap_account_dirty(mapping)) 764 if (mapping_cap_account_dirty(mapping)) {
765 page_mkclean(page);
748 dec_zone_page_state(page, NR_FILE_DIRTY); 766 dec_zone_page_state(page, NR_FILE_DIRTY);
767 }
749 return 1; 768 return 1;
750 } 769 }
751 return 0; 770 return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3b5358a0561f..9810f0a60db7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(node_online_map);
51nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; 51nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
52EXPORT_SYMBOL(node_possible_map); 52EXPORT_SYMBOL(node_possible_map);
53unsigned long totalram_pages __read_mostly; 53unsigned long totalram_pages __read_mostly;
54unsigned long totalhigh_pages __read_mostly;
55unsigned long totalreserve_pages __read_mostly; 54unsigned long totalreserve_pages __read_mostly;
56long nr_swap_pages; 55long nr_swap_pages;
57int percpu_pagelist_fraction; 56int percpu_pagelist_fraction;
@@ -69,7 +68,15 @@ static void __free_pages_ok(struct page *page, unsigned int order);
69 * TBD: should special case ZONE_DMA32 machines here - in those we normally 68 * TBD: should special case ZONE_DMA32 machines here - in those we normally
70 * don't need any ZONE_NORMAL reservation 69 * don't need any ZONE_NORMAL reservation
71 */ 70 */
72int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; 71int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
72 256,
73#ifdef CONFIG_ZONE_DMA32
74 256,
75#endif
76#ifdef CONFIG_HIGHMEM
77 32
78#endif
79};
73 80
74EXPORT_SYMBOL(totalram_pages); 81EXPORT_SYMBOL(totalram_pages);
75 82
@@ -80,7 +87,17 @@ EXPORT_SYMBOL(totalram_pages);
80struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; 87struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
81EXPORT_SYMBOL(zone_table); 88EXPORT_SYMBOL(zone_table);
82 89
83static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; 90static char *zone_names[MAX_NR_ZONES] = {
91 "DMA",
92#ifdef CONFIG_ZONE_DMA32
93 "DMA32",
94#endif
95 "Normal",
96#ifdef CONFIG_HIGHMEM
97 "HighMem"
98#endif
99};
100
84int min_free_kbytes = 1024; 101int min_free_kbytes = 1024;
85 102
86unsigned long __meminitdata nr_kernel_pages; 103unsigned long __meminitdata nr_kernel_pages;
@@ -127,7 +144,6 @@ static int bad_range(struct zone *zone, struct page *page)
127 144
128 return 0; 145 return 0;
129} 146}
130
131#else 147#else
132static inline int bad_range(struct zone *zone, struct page *page) 148static inline int bad_range(struct zone *zone, struct page *page)
133{ 149{
@@ -218,12 +234,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
218{ 234{
219 int i; 235 int i;
220 236
221 BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); 237 VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
222 /* 238 /*
223 * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO 239 * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
224 * and __GFP_HIGHMEM from hard or soft interrupt context. 240 * and __GFP_HIGHMEM from hard or soft interrupt context.
225 */ 241 */
226 BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); 242 VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
227 for (i = 0; i < (1 << order); i++) 243 for (i = 0; i < (1 << order); i++)
228 clear_highpage(page + i); 244 clear_highpage(page + i);
229} 245}
@@ -347,8 +363,8 @@ static inline void __free_one_page(struct page *page,
347 363
348 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); 364 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
349 365
350 BUG_ON(page_idx & (order_size - 1)); 366 VM_BUG_ON(page_idx & (order_size - 1));
351 BUG_ON(bad_range(zone, page)); 367 VM_BUG_ON(bad_range(zone, page));
352 368
353 zone->free_pages += order_size; 369 zone->free_pages += order_size;
354 while (order < MAX_ORDER-1) { 370 while (order < MAX_ORDER-1) {
@@ -421,7 +437,7 @@ static void free_pages_bulk(struct zone *zone, int count,
421 while (count--) { 437 while (count--) {
422 struct page *page; 438 struct page *page;
423 439
424 BUG_ON(list_empty(list)); 440 VM_BUG_ON(list_empty(list));
425 page = list_entry(list->prev, struct page, lru); 441 page = list_entry(list->prev, struct page, lru);
426 /* have to delete it as __free_one_page list manipulates */ 442 /* have to delete it as __free_one_page list manipulates */
427 list_del(&page->lru); 443 list_del(&page->lru);
@@ -432,9 +448,11 @@ static void free_pages_bulk(struct zone *zone, int count,
432 448
433static void free_one_page(struct zone *zone, struct page *page, int order) 449static void free_one_page(struct zone *zone, struct page *page, int order)
434{ 450{
435 LIST_HEAD(list); 451 spin_lock(&zone->lock);
436 list_add(&page->lru, &list); 452 zone->all_unreclaimable = 0;
437 free_pages_bulk(zone, 1, &list, order); 453 zone->pages_scanned = 0;
454 __free_one_page(page, zone ,order);
455 spin_unlock(&zone->lock);
438} 456}
439 457
440static void __free_pages_ok(struct page *page, unsigned int order) 458static void __free_pages_ok(struct page *page, unsigned int order)
@@ -512,7 +530,7 @@ static inline void expand(struct zone *zone, struct page *page,
512 area--; 530 area--;
513 high--; 531 high--;
514 size >>= 1; 532 size >>= 1;
515 BUG_ON(bad_range(zone, &page[size])); 533 VM_BUG_ON(bad_range(zone, &page[size]));
516 list_add(&page[size].lru, &area->free_list); 534 list_add(&page[size].lru, &area->free_list);
517 area->nr_free++; 535 area->nr_free++;
518 set_page_order(&page[size], high); 536 set_page_order(&page[size], high);
@@ -615,19 +633,23 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
615#ifdef CONFIG_NUMA 633#ifdef CONFIG_NUMA
616/* 634/*
617 * Called from the slab reaper to drain pagesets on a particular node that 635 * Called from the slab reaper to drain pagesets on a particular node that
618 * belong to the currently executing processor. 636 * belongs to the currently executing processor.
619 * Note that this function must be called with the thread pinned to 637 * Note that this function must be called with the thread pinned to
620 * a single processor. 638 * a single processor.
621 */ 639 */
622void drain_node_pages(int nodeid) 640void drain_node_pages(int nodeid)
623{ 641{
624 int i, z; 642 int i;
643 enum zone_type z;
625 unsigned long flags; 644 unsigned long flags;
626 645
627 for (z = 0; z < MAX_NR_ZONES; z++) { 646 for (z = 0; z < MAX_NR_ZONES; z++) {
628 struct zone *zone = NODE_DATA(nodeid)->node_zones + z; 647 struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
629 struct per_cpu_pageset *pset; 648 struct per_cpu_pageset *pset;
630 649
650 if (!populated_zone(zone))
651 continue;
652
631 pset = zone_pcp(zone, smp_processor_id()); 653 pset = zone_pcp(zone, smp_processor_id());
632 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { 654 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
633 struct per_cpu_pages *pcp; 655 struct per_cpu_pages *pcp;
@@ -672,7 +694,8 @@ static void __drain_pages(unsigned int cpu)
672 694
673void mark_free_pages(struct zone *zone) 695void mark_free_pages(struct zone *zone)
674{ 696{
675 unsigned long zone_pfn, flags; 697 unsigned long pfn, max_zone_pfn;
698 unsigned long flags;
676 int order; 699 int order;
677 struct list_head *curr; 700 struct list_head *curr;
678 701
@@ -680,18 +703,25 @@ void mark_free_pages(struct zone *zone)
680 return; 703 return;
681 704
682 spin_lock_irqsave(&zone->lock, flags); 705 spin_lock_irqsave(&zone->lock, flags);
683 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) 706
684 ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn)); 707 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
708 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
709 if (pfn_valid(pfn)) {
710 struct page *page = pfn_to_page(pfn);
711
712 if (!PageNosave(page))
713 ClearPageNosaveFree(page);
714 }
685 715
686 for (order = MAX_ORDER - 1; order >= 0; --order) 716 for (order = MAX_ORDER - 1; order >= 0; --order)
687 list_for_each(curr, &zone->free_area[order].free_list) { 717 list_for_each(curr, &zone->free_area[order].free_list) {
688 unsigned long start_pfn, i; 718 unsigned long i;
689 719
690 start_pfn = page_to_pfn(list_entry(curr, struct page, lru)); 720 pfn = page_to_pfn(list_entry(curr, struct page, lru));
721 for (i = 0; i < (1UL << order); i++)
722 SetPageNosaveFree(pfn_to_page(pfn + i));
723 }
691 724
692 for (i=0; i < (1<<order); i++)
693 SetPageNosaveFree(pfn_to_page(start_pfn+i));
694 }
695 spin_unlock_irqrestore(&zone->lock, flags); 725 spin_unlock_irqrestore(&zone->lock, flags);
696} 726}
697 727
@@ -761,8 +791,8 @@ void split_page(struct page *page, unsigned int order)
761{ 791{
762 int i; 792 int i;
763 793
764 BUG_ON(PageCompound(page)); 794 VM_BUG_ON(PageCompound(page));
765 BUG_ON(!page_count(page)); 795 VM_BUG_ON(!page_count(page));
766 for (i = 1; i < (1 << order); i++) 796 for (i = 1; i < (1 << order); i++)
767 set_page_refcounted(page + i); 797 set_page_refcounted(page + i);
768} 798}
@@ -809,7 +839,7 @@ again:
809 local_irq_restore(flags); 839 local_irq_restore(flags);
810 put_cpu(); 840 put_cpu();
811 841
812 BUG_ON(bad_range(zone, page)); 842 VM_BUG_ON(bad_range(zone, page));
813 if (prep_new_page(page, order, gfp_flags)) 843 if (prep_new_page(page, order, gfp_flags))
814 goto again; 844 goto again;
815 return page; 845 return page;
@@ -870,32 +900,37 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
870 struct zone **z = zonelist->zones; 900 struct zone **z = zonelist->zones;
871 struct page *page = NULL; 901 struct page *page = NULL;
872 int classzone_idx = zone_idx(*z); 902 int classzone_idx = zone_idx(*z);
903 struct zone *zone;
873 904
874 /* 905 /*
875 * Go through the zonelist once, looking for a zone with enough free. 906 * Go through the zonelist once, looking for a zone with enough free.
876 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 907 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
877 */ 908 */
878 do { 909 do {
910 zone = *z;
911 if (unlikely((gfp_mask & __GFP_THISNODE) &&
912 zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
913 break;
879 if ((alloc_flags & ALLOC_CPUSET) && 914 if ((alloc_flags & ALLOC_CPUSET) &&
880 !cpuset_zone_allowed(*z, gfp_mask)) 915 !cpuset_zone_allowed(zone, gfp_mask))
881 continue; 916 continue;
882 917
883 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { 918 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
884 unsigned long mark; 919 unsigned long mark;
885 if (alloc_flags & ALLOC_WMARK_MIN) 920 if (alloc_flags & ALLOC_WMARK_MIN)
886 mark = (*z)->pages_min; 921 mark = zone->pages_min;
887 else if (alloc_flags & ALLOC_WMARK_LOW) 922 else if (alloc_flags & ALLOC_WMARK_LOW)
888 mark = (*z)->pages_low; 923 mark = zone->pages_low;
889 else 924 else
890 mark = (*z)->pages_high; 925 mark = zone->pages_high;
891 if (!zone_watermark_ok(*z, order, mark, 926 if (!zone_watermark_ok(zone , order, mark,
892 classzone_idx, alloc_flags)) 927 classzone_idx, alloc_flags))
893 if (!zone_reclaim_mode || 928 if (!zone_reclaim_mode ||
894 !zone_reclaim(*z, gfp_mask, order)) 929 !zone_reclaim(zone, gfp_mask, order))
895 continue; 930 continue;
896 } 931 }
897 932
898 page = buffered_rmqueue(zonelist, *z, order, gfp_mask); 933 page = buffered_rmqueue(zonelist, zone, order, gfp_mask);
899 if (page) { 934 if (page) {
900 break; 935 break;
901 } 936 }
@@ -1083,7 +1118,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
1083 * get_zeroed_page() returns a 32-bit address, which cannot represent 1118 * get_zeroed_page() returns a 32-bit address, which cannot represent
1084 * a highmem page 1119 * a highmem page
1085 */ 1120 */
1086 BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); 1121 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1087 1122
1088 page = alloc_pages(gfp_mask | __GFP_ZERO, 0); 1123 page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
1089 if (page) 1124 if (page)
@@ -1116,7 +1151,7 @@ EXPORT_SYMBOL(__free_pages);
1116fastcall void free_pages(unsigned long addr, unsigned int order) 1151fastcall void free_pages(unsigned long addr, unsigned int order)
1117{ 1152{
1118 if (addr != 0) { 1153 if (addr != 0) {
1119 BUG_ON(!virt_addr_valid((void *)addr)); 1154 VM_BUG_ON(!virt_addr_valid((void *)addr));
1120 __free_pages(virt_to_page((void *)addr), order); 1155 __free_pages(virt_to_page((void *)addr), order);
1121 } 1156 }
1122} 1157}
@@ -1142,7 +1177,8 @@ EXPORT_SYMBOL(nr_free_pages);
1142#ifdef CONFIG_NUMA 1177#ifdef CONFIG_NUMA
1143unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) 1178unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
1144{ 1179{
1145 unsigned int i, sum = 0; 1180 unsigned int sum = 0;
1181 enum zone_type i;
1146 1182
1147 for (i = 0; i < MAX_NR_ZONES; i++) 1183 for (i = 0; i < MAX_NR_ZONES; i++)
1148 sum += pgdat->node_zones[i].free_pages; 1184 sum += pgdat->node_zones[i].free_pages;
@@ -1186,24 +1222,10 @@ unsigned int nr_free_pagecache_pages(void)
1186{ 1222{
1187 return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); 1223 return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
1188} 1224}
1189
1190#ifdef CONFIG_HIGHMEM
1191unsigned int nr_free_highpages (void)
1192{
1193 pg_data_t *pgdat;
1194 unsigned int pages = 0;
1195
1196 for_each_online_pgdat(pgdat)
1197 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
1198
1199 return pages;
1200}
1201#endif
1202
1203#ifdef CONFIG_NUMA 1225#ifdef CONFIG_NUMA
1204static void show_node(struct zone *zone) 1226static void show_node(struct zone *zone)
1205{ 1227{
1206 printk("Node %d ", zone->zone_pgdat->node_id); 1228 printk("Node %ld ", zone_to_nid(zone));
1207} 1229}
1208#else 1230#else
1209#define show_node(zone) do { } while (0) 1231#define show_node(zone) do { } while (0)
@@ -1215,13 +1237,8 @@ void si_meminfo(struct sysinfo *val)
1215 val->sharedram = 0; 1237 val->sharedram = 0;
1216 val->freeram = nr_free_pages(); 1238 val->freeram = nr_free_pages();
1217 val->bufferram = nr_blockdev_pages(); 1239 val->bufferram = nr_blockdev_pages();
1218#ifdef CONFIG_HIGHMEM
1219 val->totalhigh = totalhigh_pages; 1240 val->totalhigh = totalhigh_pages;
1220 val->freehigh = nr_free_highpages(); 1241 val->freehigh = nr_free_highpages();
1221#else
1222 val->totalhigh = 0;
1223 val->freehigh = 0;
1224#endif
1225 val->mem_unit = PAGE_SIZE; 1242 val->mem_unit = PAGE_SIZE;
1226} 1243}
1227 1244
@@ -1234,8 +1251,13 @@ void si_meminfo_node(struct sysinfo *val, int nid)
1234 1251
1235 val->totalram = pgdat->node_present_pages; 1252 val->totalram = pgdat->node_present_pages;
1236 val->freeram = nr_free_pages_pgdat(pgdat); 1253 val->freeram = nr_free_pages_pgdat(pgdat);
1254#ifdef CONFIG_HIGHMEM
1237 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; 1255 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
1238 val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; 1256 val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
1257#else
1258 val->totalhigh = 0;
1259 val->freehigh = 0;
1260#endif
1239 val->mem_unit = PAGE_SIZE; 1261 val->mem_unit = PAGE_SIZE;
1240} 1262}
1241#endif 1263#endif
@@ -1282,10 +1304,6 @@ void show_free_areas(void)
1282 1304
1283 get_zone_counts(&active, &inactive, &free); 1305 get_zone_counts(&active, &inactive, &free);
1284 1306
1285 printk("Free pages: %11ukB (%ukB HighMem)\n",
1286 K(nr_free_pages()),
1287 K(nr_free_highpages()));
1288
1289 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " 1307 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu "
1290 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", 1308 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
1291 active, 1309 active,
@@ -1294,7 +1312,8 @@ void show_free_areas(void)
1294 global_page_state(NR_WRITEBACK), 1312 global_page_state(NR_WRITEBACK),
1295 global_page_state(NR_UNSTABLE_NFS), 1313 global_page_state(NR_UNSTABLE_NFS),
1296 nr_free_pages(), 1314 nr_free_pages(),
1297 global_page_state(NR_SLAB), 1315 global_page_state(NR_SLAB_RECLAIMABLE) +
1316 global_page_state(NR_SLAB_UNRECLAIMABLE),
1298 global_page_state(NR_FILE_MAPPED), 1317 global_page_state(NR_FILE_MAPPED),
1299 global_page_state(NR_PAGETABLE)); 1318 global_page_state(NR_PAGETABLE));
1300 1319
@@ -1360,39 +1379,25 @@ void show_free_areas(void)
1360 * Add all populated zones of a node to the zonelist. 1379 * Add all populated zones of a node to the zonelist.
1361 */ 1380 */
1362static int __meminit build_zonelists_node(pg_data_t *pgdat, 1381static int __meminit build_zonelists_node(pg_data_t *pgdat,
1363 struct zonelist *zonelist, int nr_zones, int zone_type) 1382 struct zonelist *zonelist, int nr_zones, enum zone_type zone_type)
1364{ 1383{
1365 struct zone *zone; 1384 struct zone *zone;
1366 1385
1367 BUG_ON(zone_type > ZONE_HIGHMEM); 1386 BUG_ON(zone_type >= MAX_NR_ZONES);
1387 zone_type++;
1368 1388
1369 do { 1389 do {
1390 zone_type--;
1370 zone = pgdat->node_zones + zone_type; 1391 zone = pgdat->node_zones + zone_type;
1371 if (populated_zone(zone)) { 1392 if (populated_zone(zone)) {
1372#ifndef CONFIG_HIGHMEM
1373 BUG_ON(zone_type > ZONE_NORMAL);
1374#endif
1375 zonelist->zones[nr_zones++] = zone; 1393 zonelist->zones[nr_zones++] = zone;
1376 check_highest_zone(zone_type); 1394 check_highest_zone(zone_type);
1377 } 1395 }
1378 zone_type--;
1379 1396
1380 } while (zone_type >= 0); 1397 } while (zone_type);
1381 return nr_zones; 1398 return nr_zones;
1382} 1399}
1383 1400
1384static inline int highest_zone(int zone_bits)
1385{
1386 int res = ZONE_NORMAL;
1387 if (zone_bits & (__force int)__GFP_HIGHMEM)
1388 res = ZONE_HIGHMEM;
1389 if (zone_bits & (__force int)__GFP_DMA32)
1390 res = ZONE_DMA32;
1391 if (zone_bits & (__force int)__GFP_DMA)
1392 res = ZONE_DMA;
1393 return res;
1394}
1395
1396#ifdef CONFIG_NUMA 1401#ifdef CONFIG_NUMA
1397#define MAX_NODE_LOAD (num_online_nodes()) 1402#define MAX_NODE_LOAD (num_online_nodes())
1398static int __meminitdata node_load[MAX_NUMNODES]; 1403static int __meminitdata node_load[MAX_NUMNODES];
@@ -1458,13 +1463,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask)
1458 1463
1459static void __meminit build_zonelists(pg_data_t *pgdat) 1464static void __meminit build_zonelists(pg_data_t *pgdat)
1460{ 1465{
1461 int i, j, k, node, local_node; 1466 int j, node, local_node;
1467 enum zone_type i;
1462 int prev_node, load; 1468 int prev_node, load;
1463 struct zonelist *zonelist; 1469 struct zonelist *zonelist;
1464 nodemask_t used_mask; 1470 nodemask_t used_mask;
1465 1471
1466 /* initialize zonelists */ 1472 /* initialize zonelists */
1467 for (i = 0; i < GFP_ZONETYPES; i++) { 1473 for (i = 0; i < MAX_NR_ZONES; i++) {
1468 zonelist = pgdat->node_zonelists + i; 1474 zonelist = pgdat->node_zonelists + i;
1469 zonelist->zones[0] = NULL; 1475 zonelist->zones[0] = NULL;
1470 } 1476 }
@@ -1494,13 +1500,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
1494 node_load[node] += load; 1500 node_load[node] += load;
1495 prev_node = node; 1501 prev_node = node;
1496 load--; 1502 load--;
1497 for (i = 0; i < GFP_ZONETYPES; i++) { 1503 for (i = 0; i < MAX_NR_ZONES; i++) {
1498 zonelist = pgdat->node_zonelists + i; 1504 zonelist = pgdat->node_zonelists + i;
1499 for (j = 0; zonelist->zones[j] != NULL; j++); 1505 for (j = 0; zonelist->zones[j] != NULL; j++);
1500 1506
1501 k = highest_zone(i); 1507 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1502
1503 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
1504 zonelist->zones[j] = NULL; 1508 zonelist->zones[j] = NULL;
1505 } 1509 }
1506 } 1510 }
@@ -1510,17 +1514,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
1510 1514
1511static void __meminit build_zonelists(pg_data_t *pgdat) 1515static void __meminit build_zonelists(pg_data_t *pgdat)
1512{ 1516{
1513 int i, j, k, node, local_node; 1517 int node, local_node;
1518 enum zone_type i,j;
1514 1519
1515 local_node = pgdat->node_id; 1520 local_node = pgdat->node_id;
1516 for (i = 0; i < GFP_ZONETYPES; i++) { 1521 for (i = 0; i < MAX_NR_ZONES; i++) {
1517 struct zonelist *zonelist; 1522 struct zonelist *zonelist;
1518 1523
1519 zonelist = pgdat->node_zonelists + i; 1524 zonelist = pgdat->node_zonelists + i;
1520 1525
1521 j = 0; 1526 j = build_zonelists_node(pgdat, zonelist, 0, i);
1522 k = highest_zone(i);
1523 j = build_zonelists_node(pgdat, zonelist, j, k);
1524 /* 1527 /*
1525 * Now we build the zonelist so that it contains the zones 1528 * Now we build the zonelist so that it contains the zones
1526 * of all the other nodes. 1529 * of all the other nodes.
@@ -1532,12 +1535,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
1532 for (node = local_node + 1; node < MAX_NUMNODES; node++) { 1535 for (node = local_node + 1; node < MAX_NUMNODES; node++) {
1533 if (!node_online(node)) 1536 if (!node_online(node))
1534 continue; 1537 continue;
1535 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); 1538 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1536 } 1539 }
1537 for (node = 0; node < local_node; node++) { 1540 for (node = 0; node < local_node; node++) {
1538 if (!node_online(node)) 1541 if (!node_online(node))
1539 continue; 1542 continue;
1540 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); 1543 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1541 } 1544 }
1542 1545
1543 zonelist->zones[j] = NULL; 1546 zonelist->zones[j] = NULL;
@@ -1643,7 +1646,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
1643 unsigned long *zones_size, unsigned long *zholes_size) 1646 unsigned long *zones_size, unsigned long *zholes_size)
1644{ 1647{
1645 unsigned long realtotalpages, totalpages = 0; 1648 unsigned long realtotalpages, totalpages = 0;
1646 int i; 1649 enum zone_type i;
1647 1650
1648 for (i = 0; i < MAX_NR_ZONES; i++) 1651 for (i = 0; i < MAX_NR_ZONES; i++)
1649 totalpages += zones_size[i]; 1652 totalpages += zones_size[i];
@@ -1698,8 +1701,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
1698} 1701}
1699 1702
1700#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) 1703#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr)
1701void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, 1704void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
1702 unsigned long size) 1705 unsigned long pfn, unsigned long size)
1703{ 1706{
1704 unsigned long snum = pfn_to_section_nr(pfn); 1707 unsigned long snum = pfn_to_section_nr(pfn);
1705 unsigned long end = pfn_to_section_nr(pfn + size); 1708 unsigned long end = pfn_to_section_nr(pfn + size);
@@ -1845,8 +1848,10 @@ static inline void free_zone_pagesets(int cpu)
1845 for_each_zone(zone) { 1848 for_each_zone(zone) {
1846 struct per_cpu_pageset *pset = zone_pcp(zone, cpu); 1849 struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
1847 1850
1851 /* Free per_cpu_pageset if it is slab allocated */
1852 if (pset != &boot_pageset[cpu])
1853 kfree(pset);
1848 zone_pcp(zone, cpu) = NULL; 1854 zone_pcp(zone, cpu) = NULL;
1849 kfree(pset);
1850 } 1855 }
1851} 1856}
1852 1857
@@ -1981,7 +1986,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
1981static void __meminit free_area_init_core(struct pglist_data *pgdat, 1986static void __meminit free_area_init_core(struct pglist_data *pgdat,
1982 unsigned long *zones_size, unsigned long *zholes_size) 1987 unsigned long *zones_size, unsigned long *zholes_size)
1983{ 1988{
1984 unsigned long j; 1989 enum zone_type j;
1985 int nid = pgdat->node_id; 1990 int nid = pgdat->node_id;
1986 unsigned long zone_start_pfn = pgdat->node_start_pfn; 1991 unsigned long zone_start_pfn = pgdat->node_start_pfn;
1987 int ret; 1992 int ret;
@@ -1999,15 +2004,16 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
1999 if (zholes_size) 2004 if (zholes_size)
2000 realsize -= zholes_size[j]; 2005 realsize -= zholes_size[j];
2001 2006
2002 if (j < ZONE_HIGHMEM) 2007 if (!is_highmem_idx(j))
2003 nr_kernel_pages += realsize; 2008 nr_kernel_pages += realsize;
2004 nr_all_pages += realsize; 2009 nr_all_pages += realsize;
2005 2010
2006 zone->spanned_pages = size; 2011 zone->spanned_pages = size;
2007 zone->present_pages = realsize; 2012 zone->present_pages = realsize;
2008#ifdef CONFIG_NUMA 2013#ifdef CONFIG_NUMA
2009 zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) 2014 zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
2010 / 100; 2015 / 100;
2016 zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
2011#endif 2017#endif
2012 zone->name = zone_names[j]; 2018 zone->name = zone_names[j];
2013 spin_lock_init(&zone->lock); 2019 spin_lock_init(&zone->lock);
@@ -2129,7 +2135,7 @@ static void calculate_totalreserve_pages(void)
2129{ 2135{
2130 struct pglist_data *pgdat; 2136 struct pglist_data *pgdat;
2131 unsigned long reserve_pages = 0; 2137 unsigned long reserve_pages = 0;
2132 int i, j; 2138 enum zone_type i, j;
2133 2139
2134 for_each_online_pgdat(pgdat) { 2140 for_each_online_pgdat(pgdat) {
2135 for (i = 0; i < MAX_NR_ZONES; i++) { 2141 for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -2162,7 +2168,7 @@ static void calculate_totalreserve_pages(void)
2162static void setup_per_zone_lowmem_reserve(void) 2168static void setup_per_zone_lowmem_reserve(void)
2163{ 2169{
2164 struct pglist_data *pgdat; 2170 struct pglist_data *pgdat;
2165 int j, idx; 2171 enum zone_type j, idx;
2166 2172
2167 for_each_online_pgdat(pgdat) { 2173 for_each_online_pgdat(pgdat) {
2168 for (j = 0; j < MAX_NR_ZONES; j++) { 2174 for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -2171,9 +2177,12 @@ static void setup_per_zone_lowmem_reserve(void)
2171 2177
2172 zone->lowmem_reserve[j] = 0; 2178 zone->lowmem_reserve[j] = 0;
2173 2179
2174 for (idx = j-1; idx >= 0; idx--) { 2180 idx = j;
2181 while (idx) {
2175 struct zone *lower_zone; 2182 struct zone *lower_zone;
2176 2183
2184 idx--;
2185
2177 if (sysctl_lowmem_reserve_ratio[idx] < 1) 2186 if (sysctl_lowmem_reserve_ratio[idx] < 1)
2178 sysctl_lowmem_reserve_ratio[idx] = 1; 2187 sysctl_lowmem_reserve_ratio[idx] = 1;
2179 2188
@@ -2314,10 +2323,26 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
2314 return rc; 2323 return rc;
2315 2324
2316 for_each_zone(zone) 2325 for_each_zone(zone)
2317 zone->min_unmapped_ratio = (zone->present_pages * 2326 zone->min_unmapped_pages = (zone->present_pages *
2318 sysctl_min_unmapped_ratio) / 100; 2327 sysctl_min_unmapped_ratio) / 100;
2319 return 0; 2328 return 0;
2320} 2329}
2330
2331int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
2332 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2333{
2334 struct zone *zone;
2335 int rc;
2336
2337 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2338 if (rc)
2339 return rc;
2340
2341 for_each_zone(zone)
2342 zone->min_slab_pages = (zone->present_pages *
2343 sysctl_min_slab_ratio) / 100;
2344 return 0;
2345}
2321#endif 2346#endif
2322 2347
2323/* 2348/*
diff --git a/mm/page_io.c b/mm/page_io.c
index 88029948d00a..d4840ecbf8f9 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -52,14 +52,29 @@ static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err)
52 if (bio->bi_size) 52 if (bio->bi_size)
53 return 1; 53 return 1;
54 54
55 if (!uptodate) 55 if (!uptodate) {
56 SetPageError(page); 56 SetPageError(page);
57 /*
58 * We failed to write the page out to swap-space.
59 * Re-dirty the page in order to avoid it being reclaimed.
60 * Also print a dire warning that things will go BAD (tm)
61 * very quickly.
62 *
63 * Also clear PG_reclaim to avoid rotate_reclaimable_page()
64 */
65 set_page_dirty(page);
66 printk(KERN_ALERT "Write-error on swap-device (%u:%u:%Lu)\n",
67 imajor(bio->bi_bdev->bd_inode),
68 iminor(bio->bi_bdev->bd_inode),
69 (unsigned long long)bio->bi_sector);
70 ClearPageReclaim(page);
71 }
57 end_page_writeback(page); 72 end_page_writeback(page);
58 bio_put(bio); 73 bio_put(bio);
59 return 0; 74 return 0;
60} 75}
61 76
62static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err) 77int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
63{ 78{
64 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 79 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
65 struct page *page = bio->bi_io_vec[0].bv_page; 80 struct page *page = bio->bi_io_vec[0].bv_page;
@@ -70,6 +85,10 @@ static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
70 if (!uptodate) { 85 if (!uptodate) {
71 SetPageError(page); 86 SetPageError(page);
72 ClearPageUptodate(page); 87 ClearPageUptodate(page);
88 printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
89 imajor(bio->bi_bdev->bd_inode),
90 iminor(bio->bi_bdev->bd_inode),
91 (unsigned long long)bio->bi_sector);
73 } else { 92 } else {
74 SetPageUptodate(page); 93 SetPageUptodate(page);
75 } 94 }
@@ -137,10 +156,12 @@ out:
137 * We use end_swap_bio_read() even for writes, because it happens to do what 156 * We use end_swap_bio_read() even for writes, because it happens to do what
138 * we want. 157 * we want.
139 */ 158 */
140int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page) 159int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
160 struct bio **bio_chain)
141{ 161{
142 struct bio *bio; 162 struct bio *bio;
143 int ret = 0; 163 int ret = 0;
164 int bio_rw;
144 165
145 lock_page(page); 166 lock_page(page);
146 167
@@ -151,11 +172,22 @@ int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
151 goto out; 172 goto out;
152 } 173 }
153 174
154 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 175 bio_rw = rw;
155 wait_on_page_locked(page); 176 if (!bio_chain)
156 177 bio_rw |= (1 << BIO_RW_SYNC);
157 if (!PageUptodate(page) || PageError(page)) 178 if (bio_chain)
158 ret = -EIO; 179 bio_get(bio);
180 submit_bio(bio_rw, bio);
181 if (bio_chain == NULL) {
182 wait_on_page_locked(page);
183
184 if (!PageUptodate(page) || PageError(page))
185 ret = -EIO;
186 }
187 if (bio_chain) {
188 bio->bi_private = *bio_chain;
189 *bio_chain = bio;
190 }
159out: 191out:
160 return ret; 192 return ret;
161} 193}
diff --git a/mm/rmap.c b/mm/rmap.c
index 40158b59729e..e2155d791d99 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -434,6 +434,71 @@ int page_referenced(struct page *page, int is_locked)
434 return referenced; 434 return referenced;
435} 435}
436 436
437static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
438{
439 struct mm_struct *mm = vma->vm_mm;
440 unsigned long address;
441 pte_t *pte, entry;
442 spinlock_t *ptl;
443 int ret = 0;
444
445 address = vma_address(page, vma);
446 if (address == -EFAULT)
447 goto out;
448
449 pte = page_check_address(page, mm, address, &ptl);
450 if (!pte)
451 goto out;
452
453 if (!pte_dirty(*pte) && !pte_write(*pte))
454 goto unlock;
455
456 entry = ptep_get_and_clear(mm, address, pte);
457 entry = pte_mkclean(entry);
458 entry = pte_wrprotect(entry);
459 ptep_establish(vma, address, pte, entry);
460 lazy_mmu_prot_update(entry);
461 ret = 1;
462
463unlock:
464 pte_unmap_unlock(pte, ptl);
465out:
466 return ret;
467}
468
469static int page_mkclean_file(struct address_space *mapping, struct page *page)
470{
471 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
472 struct vm_area_struct *vma;
473 struct prio_tree_iter iter;
474 int ret = 0;
475
476 BUG_ON(PageAnon(page));
477
478 spin_lock(&mapping->i_mmap_lock);
479 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
480 if (vma->vm_flags & VM_SHARED)
481 ret += page_mkclean_one(page, vma);
482 }
483 spin_unlock(&mapping->i_mmap_lock);
484 return ret;
485}
486
487int page_mkclean(struct page *page)
488{
489 int ret = 0;
490
491 BUG_ON(!PageLocked(page));
492
493 if (page_mapped(page)) {
494 struct address_space *mapping = page_mapping(page);
495 if (mapping)
496 ret = page_mkclean_file(mapping, page);
497 }
498
499 return ret;
500}
501
437/** 502/**
438 * page_set_anon_rmap - setup new anonymous rmap 503 * page_set_anon_rmap - setup new anonymous rmap
439 * @page: the page to add the mapping to 504 * @page: the page to add the mapping to
diff --git a/mm/shmem.c b/mm/shmem.c
index db21c51531ca..8631be45b40d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -45,6 +45,7 @@
45#include <linux/namei.h> 45#include <linux/namei.h>
46#include <linux/ctype.h> 46#include <linux/ctype.h>
47#include <linux/migrate.h> 47#include <linux/migrate.h>
48#include <linux/highmem.h>
48 49
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50#include <asm/div64.h> 51#include <asm/div64.h>
diff --git a/mm/slab.c b/mm/slab.c
index 21ba06035700..7a48eb1a60c8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache,
313 struct kmem_list3 *l3, int tofree); 313 struct kmem_list3 *l3, int tofree);
314static void free_block(struct kmem_cache *cachep, void **objpp, int len, 314static void free_block(struct kmem_cache *cachep, void **objpp, int len,
315 int node); 315 int node);
316static void enable_cpucache(struct kmem_cache *cachep); 316static int enable_cpucache(struct kmem_cache *cachep);
317static void cache_reap(void *unused); 317static void cache_reap(void *unused);
318 318
319/* 319/*
@@ -674,6 +674,8 @@ static struct kmem_cache cache_cache = {
674#endif 674#endif
675}; 675};
676 676
677#define BAD_ALIEN_MAGIC 0x01020304ul
678
677#ifdef CONFIG_LOCKDEP 679#ifdef CONFIG_LOCKDEP
678 680
679/* 681/*
@@ -682,42 +684,58 @@ static struct kmem_cache cache_cache = {
682 * The locking for this is tricky in that it nests within the locks 684 * The locking for this is tricky in that it nests within the locks
683 * of all other slabs in a few places; to deal with this special 685 * of all other slabs in a few places; to deal with this special
684 * locking we put on-slab caches into a separate lock-class. 686 * locking we put on-slab caches into a separate lock-class.
687 *
688 * We set lock class for alien array caches which are up during init.
689 * The lock annotation will be lost if all cpus of a node goes down and
690 * then comes back up during hotplug
685 */ 691 */
686static struct lock_class_key on_slab_key; 692static struct lock_class_key on_slab_l3_key;
693static struct lock_class_key on_slab_alc_key;
694
695static inline void init_lock_keys(void)
687 696
688static inline void init_lock_keys(struct cache_sizes *s)
689{ 697{
690 int q; 698 int q;
691 699 struct cache_sizes *s = malloc_sizes;
692 for (q = 0; q < MAX_NUMNODES; q++) { 700
693 if (!s->cs_cachep->nodelists[q] || OFF_SLAB(s->cs_cachep)) 701 while (s->cs_size != ULONG_MAX) {
694 continue; 702 for_each_node(q) {
695 lockdep_set_class(&s->cs_cachep->nodelists[q]->list_lock, 703 struct array_cache **alc;
696 &on_slab_key); 704 int r;
705 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
706 if (!l3 || OFF_SLAB(s->cs_cachep))
707 continue;
708 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
709 alc = l3->alien;
710 /*
711 * FIXME: This check for BAD_ALIEN_MAGIC
712 * should go away when common slab code is taught to
713 * work even without alien caches.
714 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
715 * for alloc_alien_cache,
716 */
717 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
718 continue;
719 for_each_node(r) {
720 if (alc[r])
721 lockdep_set_class(&alc[r]->lock,
722 &on_slab_alc_key);
723 }
724 }
725 s++;
697 } 726 }
698} 727}
699
700#else 728#else
701static inline void init_lock_keys(struct cache_sizes *s) 729static inline void init_lock_keys(void)
702{ 730{
703} 731}
704#endif 732#endif
705 733
706
707
708/* Guard access to the cache-chain. */ 734/* Guard access to the cache-chain. */
709static DEFINE_MUTEX(cache_chain_mutex); 735static DEFINE_MUTEX(cache_chain_mutex);
710static struct list_head cache_chain; 736static struct list_head cache_chain;
711 737
712/* 738/*
713 * vm_enough_memory() looks at this to determine how many slab-allocated pages
714 * are possibly freeable under pressure
715 *
716 * SLAB_RECLAIM_ACCOUNT turns this on per-slab
717 */
718atomic_t slab_reclaim_pages;
719
720/*
721 * chicken and egg problem: delay the per-cpu array allocation 739 * chicken and egg problem: delay the per-cpu array allocation
722 * until the general caches are up. 740 * until the general caches are up.
723 */ 741 */
@@ -768,11 +786,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
768 return csizep->cs_cachep; 786 return csizep->cs_cachep;
769} 787}
770 788
771struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) 789static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
772{ 790{
773 return __find_general_cachep(size, gfpflags); 791 return __find_general_cachep(size, gfpflags);
774} 792}
775EXPORT_SYMBOL(kmem_find_general_cachep);
776 793
777static size_t slab_mgmt_size(size_t nr_objs, size_t align) 794static size_t slab_mgmt_size(size_t nr_objs, size_t align)
778{ 795{
@@ -1092,7 +1109,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1092 1109
1093static inline struct array_cache **alloc_alien_cache(int node, int limit) 1110static inline struct array_cache **alloc_alien_cache(int node, int limit)
1094{ 1111{
1095 return (struct array_cache **) 0x01020304ul; 1112 return (struct array_cache **)BAD_ALIEN_MAGIC;
1096} 1113}
1097 1114
1098static inline void free_alien_cache(struct array_cache **ac_ptr) 1115static inline void free_alien_cache(struct array_cache **ac_ptr)
@@ -1422,7 +1439,6 @@ void __init kmem_cache_init(void)
1422 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1439 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1423 NULL, NULL); 1440 NULL, NULL);
1424 } 1441 }
1425 init_lock_keys(sizes);
1426 1442
1427 sizes->cs_dmacachep = kmem_cache_create(names->name_dma, 1443 sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
1428 sizes->cs_size, 1444 sizes->cs_size,
@@ -1491,10 +1507,15 @@ void __init kmem_cache_init(void)
1491 struct kmem_cache *cachep; 1507 struct kmem_cache *cachep;
1492 mutex_lock(&cache_chain_mutex); 1508 mutex_lock(&cache_chain_mutex);
1493 list_for_each_entry(cachep, &cache_chain, next) 1509 list_for_each_entry(cachep, &cache_chain, next)
1494 enable_cpucache(cachep); 1510 if (enable_cpucache(cachep))
1511 BUG();
1495 mutex_unlock(&cache_chain_mutex); 1512 mutex_unlock(&cache_chain_mutex);
1496 } 1513 }
1497 1514
1515 /* Annotate slab for lockdep -- annotate the malloc caches */
1516 init_lock_keys();
1517
1518
1498 /* Done! */ 1519 /* Done! */
1499 g_cpucache_up = FULL; 1520 g_cpucache_up = FULL;
1500 1521
@@ -1551,8 +1572,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1551 1572
1552 nr_pages = (1 << cachep->gfporder); 1573 nr_pages = (1 << cachep->gfporder);
1553 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1574 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1554 atomic_add(nr_pages, &slab_reclaim_pages); 1575 add_zone_page_state(page_zone(page),
1555 add_zone_page_state(page_zone(page), NR_SLAB, nr_pages); 1576 NR_SLAB_RECLAIMABLE, nr_pages);
1577 else
1578 add_zone_page_state(page_zone(page),
1579 NR_SLAB_UNRECLAIMABLE, nr_pages);
1556 for (i = 0; i < nr_pages; i++) 1580 for (i = 0; i < nr_pages; i++)
1557 __SetPageSlab(page + i); 1581 __SetPageSlab(page + i);
1558 return page_address(page); 1582 return page_address(page);
@@ -1567,7 +1591,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1567 struct page *page = virt_to_page(addr); 1591 struct page *page = virt_to_page(addr);
1568 const unsigned long nr_freed = i; 1592 const unsigned long nr_freed = i;
1569 1593
1570 sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed); 1594 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1595 sub_zone_page_state(page_zone(page),
1596 NR_SLAB_RECLAIMABLE, nr_freed);
1597 else
1598 sub_zone_page_state(page_zone(page),
1599 NR_SLAB_UNRECLAIMABLE, nr_freed);
1571 while (i--) { 1600 while (i--) {
1572 BUG_ON(!PageSlab(page)); 1601 BUG_ON(!PageSlab(page));
1573 __ClearPageSlab(page); 1602 __ClearPageSlab(page);
@@ -1576,8 +1605,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1576 if (current->reclaim_state) 1605 if (current->reclaim_state)
1577 current->reclaim_state->reclaimed_slab += nr_freed; 1606 current->reclaim_state->reclaimed_slab += nr_freed;
1578 free_pages((unsigned long)addr, cachep->gfporder); 1607 free_pages((unsigned long)addr, cachep->gfporder);
1579 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1580 atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
1581} 1608}
1582 1609
1583static void kmem_rcu_free(struct rcu_head *head) 1610static void kmem_rcu_free(struct rcu_head *head)
@@ -1834,6 +1861,27 @@ static void set_up_list3s(struct kmem_cache *cachep, int index)
1834 } 1861 }
1835} 1862}
1836 1863
1864static void __kmem_cache_destroy(struct kmem_cache *cachep)
1865{
1866 int i;
1867 struct kmem_list3 *l3;
1868
1869 for_each_online_cpu(i)
1870 kfree(cachep->array[i]);
1871
1872 /* NUMA: free the list3 structures */
1873 for_each_online_node(i) {
1874 l3 = cachep->nodelists[i];
1875 if (l3) {
1876 kfree(l3->shared);
1877 free_alien_cache(l3->alien);
1878 kfree(l3);
1879 }
1880 }
1881 kmem_cache_free(&cache_cache, cachep);
1882}
1883
1884
1837/** 1885/**
1838 * calculate_slab_order - calculate size (page order) of slabs 1886 * calculate_slab_order - calculate size (page order) of slabs
1839 * @cachep: pointer to the cache that is being created 1887 * @cachep: pointer to the cache that is being created
@@ -1904,12 +1952,11 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
1904 return left_over; 1952 return left_over;
1905} 1953}
1906 1954
1907static void setup_cpu_cache(struct kmem_cache *cachep) 1955static int setup_cpu_cache(struct kmem_cache *cachep)
1908{ 1956{
1909 if (g_cpucache_up == FULL) { 1957 if (g_cpucache_up == FULL)
1910 enable_cpucache(cachep); 1958 return enable_cpucache(cachep);
1911 return; 1959
1912 }
1913 if (g_cpucache_up == NONE) { 1960 if (g_cpucache_up == NONE) {
1914 /* 1961 /*
1915 * Note: the first kmem_cache_create must create the cache 1962 * Note: the first kmem_cache_create must create the cache
@@ -1956,6 +2003,7 @@ static void setup_cpu_cache(struct kmem_cache *cachep)
1956 cpu_cache_get(cachep)->touched = 0; 2003 cpu_cache_get(cachep)->touched = 0;
1957 cachep->batchcount = 1; 2004 cachep->batchcount = 1;
1958 cachep->limit = BOOT_CPUCACHE_ENTRIES; 2005 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2006 return 0;
1959} 2007}
1960 2008
1961/** 2009/**
@@ -2097,6 +2145,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2097 } else { 2145 } else {
2098 ralign = BYTES_PER_WORD; 2146 ralign = BYTES_PER_WORD;
2099 } 2147 }
2148
2149 /*
2150 * Redzoning and user store require word alignment. Note this will be
2151 * overridden by architecture or caller mandated alignment if either
2152 * is greater than BYTES_PER_WORD.
2153 */
2154 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
2155 ralign = BYTES_PER_WORD;
2156
2100 /* 2) arch mandated alignment: disables debug if necessary */ 2157 /* 2) arch mandated alignment: disables debug if necessary */
2101 if (ralign < ARCH_SLAB_MINALIGN) { 2158 if (ralign < ARCH_SLAB_MINALIGN) {
2102 ralign = ARCH_SLAB_MINALIGN; 2159 ralign = ARCH_SLAB_MINALIGN;
@@ -2110,8 +2167,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2110 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 2167 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2111 } 2168 }
2112 /* 2169 /*
2113 * 4) Store it. Note that the debug code below can reduce 2170 * 4) Store it.
2114 * the alignment to BYTES_PER_WORD.
2115 */ 2171 */
2116 align = ralign; 2172 align = ralign;
2117 2173
@@ -2123,20 +2179,19 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2123#if DEBUG 2179#if DEBUG
2124 cachep->obj_size = size; 2180 cachep->obj_size = size;
2125 2181
2182 /*
2183 * Both debugging options require word-alignment which is calculated
2184 * into align above.
2185 */
2126 if (flags & SLAB_RED_ZONE) { 2186 if (flags & SLAB_RED_ZONE) {
2127 /* redzoning only works with word aligned caches */
2128 align = BYTES_PER_WORD;
2129
2130 /* add space for red zone words */ 2187 /* add space for red zone words */
2131 cachep->obj_offset += BYTES_PER_WORD; 2188 cachep->obj_offset += BYTES_PER_WORD;
2132 size += 2 * BYTES_PER_WORD; 2189 size += 2 * BYTES_PER_WORD;
2133 } 2190 }
2134 if (flags & SLAB_STORE_USER) { 2191 if (flags & SLAB_STORE_USER) {
2135 /* user store requires word alignment and 2192 /* user store requires one word storage behind the end of
2136 * one word storage behind the end of the real 2193 * the real object.
2137 * object.
2138 */ 2194 */
2139 align = BYTES_PER_WORD;
2140 size += BYTES_PER_WORD; 2195 size += BYTES_PER_WORD;
2141 } 2196 }
2142#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 2197#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
@@ -2200,14 +2255,26 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2200 cachep->gfpflags |= GFP_DMA; 2255 cachep->gfpflags |= GFP_DMA;
2201 cachep->buffer_size = size; 2256 cachep->buffer_size = size;
2202 2257
2203 if (flags & CFLGS_OFF_SLAB) 2258 if (flags & CFLGS_OFF_SLAB) {
2204 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); 2259 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2260 /*
2261 * This is a possibility for one of the malloc_sizes caches.
2262 * But since we go off slab only for object size greater than
2263 * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
2264 * this should not happen at all.
2265 * But leave a BUG_ON for some lucky dude.
2266 */
2267 BUG_ON(!cachep->slabp_cache);
2268 }
2205 cachep->ctor = ctor; 2269 cachep->ctor = ctor;
2206 cachep->dtor = dtor; 2270 cachep->dtor = dtor;
2207 cachep->name = name; 2271 cachep->name = name;
2208 2272
2209 2273 if (setup_cpu_cache(cachep)) {
2210 setup_cpu_cache(cachep); 2274 __kmem_cache_destroy(cachep);
2275 cachep = NULL;
2276 goto oops;
2277 }
2211 2278
2212 /* cache setup completed, link it into the list */ 2279 /* cache setup completed, link it into the list */
2213 list_add(&cachep->next, &cache_chain); 2280 list_add(&cachep->next, &cache_chain);
@@ -2389,9 +2456,6 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2389 */ 2456 */
2390int kmem_cache_destroy(struct kmem_cache *cachep) 2457int kmem_cache_destroy(struct kmem_cache *cachep)
2391{ 2458{
2392 int i;
2393 struct kmem_list3 *l3;
2394
2395 BUG_ON(!cachep || in_interrupt()); 2459 BUG_ON(!cachep || in_interrupt());
2396 2460
2397 /* Don't let CPUs to come and go */ 2461 /* Don't let CPUs to come and go */
@@ -2417,25 +2481,23 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
2417 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) 2481 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2418 synchronize_rcu(); 2482 synchronize_rcu();
2419 2483
2420 for_each_online_cpu(i) 2484 __kmem_cache_destroy(cachep);
2421 kfree(cachep->array[i]);
2422
2423 /* NUMA: free the list3 structures */
2424 for_each_online_node(i) {
2425 l3 = cachep->nodelists[i];
2426 if (l3) {
2427 kfree(l3->shared);
2428 free_alien_cache(l3->alien);
2429 kfree(l3);
2430 }
2431 }
2432 kmem_cache_free(&cache_cache, cachep);
2433 unlock_cpu_hotplug(); 2485 unlock_cpu_hotplug();
2434 return 0; 2486 return 0;
2435} 2487}
2436EXPORT_SYMBOL(kmem_cache_destroy); 2488EXPORT_SYMBOL(kmem_cache_destroy);
2437 2489
2438/* Get the memory for a slab management obj. */ 2490/*
2491 * Get the memory for a slab management obj.
2492 * For a slab cache when the slab descriptor is off-slab, slab descriptors
2493 * always come from malloc_sizes caches. The slab descriptor cannot
2494 * come from the same cache which is getting created because,
2495 * when we are searching for an appropriate cache for these
2496 * descriptors in kmem_cache_create, we search through the malloc_sizes array.
2497 * If we are creating a malloc_sizes cache here it would not be visible to
2498 * kmem_find_general_cachep till the initialization is complete.
2499 * Hence we cannot have slabp_cache same as the original cache.
2500 */
2439static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, 2501static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2440 int colour_off, gfp_t local_flags, 2502 int colour_off, gfp_t local_flags,
2441 int nodeid) 2503 int nodeid)
@@ -3119,6 +3181,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3119 if (slabp->inuse == 0) { 3181 if (slabp->inuse == 0) {
3120 if (l3->free_objects > l3->free_limit) { 3182 if (l3->free_objects > l3->free_limit) {
3121 l3->free_objects -= cachep->num; 3183 l3->free_objects -= cachep->num;
3184 /* No need to drop any previously held
3185 * lock here, even if we have a off-slab slab
3186 * descriptor it is guaranteed to come from
3187 * a different cache, refer to comments before
3188 * alloc_slabmgmt.
3189 */
3122 slab_destroy(cachep, slabp); 3190 slab_destroy(cachep, slabp);
3123 } else { 3191 } else {
3124 list_add(&slabp->list, &l3->slabs_free); 3192 list_add(&slabp->list, &l3->slabs_free);
@@ -3317,7 +3385,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3317} 3385}
3318EXPORT_SYMBOL(kmem_cache_alloc_node); 3386EXPORT_SYMBOL(kmem_cache_alloc_node);
3319 3387
3320void *kmalloc_node(size_t size, gfp_t flags, int node) 3388void *__kmalloc_node(size_t size, gfp_t flags, int node)
3321{ 3389{
3322 struct kmem_cache *cachep; 3390 struct kmem_cache *cachep;
3323 3391
@@ -3326,7 +3394,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node)
3326 return NULL; 3394 return NULL;
3327 return kmem_cache_alloc_node(cachep, flags, node); 3395 return kmem_cache_alloc_node(cachep, flags, node);
3328} 3396}
3329EXPORT_SYMBOL(kmalloc_node); 3397EXPORT_SYMBOL(__kmalloc_node);
3330#endif 3398#endif
3331 3399
3332/** 3400/**
@@ -3370,55 +3438,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
3370EXPORT_SYMBOL(__kmalloc_track_caller); 3438EXPORT_SYMBOL(__kmalloc_track_caller);
3371#endif 3439#endif
3372 3440
3373#ifdef CONFIG_SMP
3374/**
3375 * __alloc_percpu - allocate one copy of the object for every present
3376 * cpu in the system, zeroing them.
3377 * Objects should be dereferenced using the per_cpu_ptr macro only.
3378 *
3379 * @size: how many bytes of memory are required.
3380 */
3381void *__alloc_percpu(size_t size)
3382{
3383 int i;
3384 struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
3385
3386 if (!pdata)
3387 return NULL;
3388
3389 /*
3390 * Cannot use for_each_online_cpu since a cpu may come online
3391 * and we have no way of figuring out how to fix the array
3392 * that we have allocated then....
3393 */
3394 for_each_possible_cpu(i) {
3395 int node = cpu_to_node(i);
3396
3397 if (node_online(node))
3398 pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
3399 else
3400 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
3401
3402 if (!pdata->ptrs[i])
3403 goto unwind_oom;
3404 memset(pdata->ptrs[i], 0, size);
3405 }
3406
3407 /* Catch derefs w/o wrappers */
3408 return (void *)(~(unsigned long)pdata);
3409
3410unwind_oom:
3411 while (--i >= 0) {
3412 if (!cpu_possible(i))
3413 continue;
3414 kfree(pdata->ptrs[i]);
3415 }
3416 kfree(pdata);
3417 return NULL;
3418}
3419EXPORT_SYMBOL(__alloc_percpu);
3420#endif
3421
3422/** 3441/**
3423 * kmem_cache_free - Deallocate an object 3442 * kmem_cache_free - Deallocate an object
3424 * @cachep: The cache the allocation was from. 3443 * @cachep: The cache the allocation was from.
@@ -3464,29 +3483,6 @@ void kfree(const void *objp)
3464} 3483}
3465EXPORT_SYMBOL(kfree); 3484EXPORT_SYMBOL(kfree);
3466 3485
3467#ifdef CONFIG_SMP
3468/**
3469 * free_percpu - free previously allocated percpu memory
3470 * @objp: pointer returned by alloc_percpu.
3471 *
3472 * Don't free memory not originally allocated by alloc_percpu()
3473 * The complemented objp is to check for that.
3474 */
3475void free_percpu(const void *objp)
3476{
3477 int i;
3478 struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
3479
3480 /*
3481 * We allocate for all cpus so we cannot use for online cpu here.
3482 */
3483 for_each_possible_cpu(i)
3484 kfree(p->ptrs[i]);
3485 kfree(p);
3486}
3487EXPORT_SYMBOL(free_percpu);
3488#endif
3489
3490unsigned int kmem_cache_size(struct kmem_cache *cachep) 3486unsigned int kmem_cache_size(struct kmem_cache *cachep)
3491{ 3487{
3492 return obj_size(cachep); 3488 return obj_size(cachep);
@@ -3603,22 +3599,26 @@ static void do_ccupdate_local(void *info)
3603static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3599static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3604 int batchcount, int shared) 3600 int batchcount, int shared)
3605{ 3601{
3606 struct ccupdate_struct new; 3602 struct ccupdate_struct *new;
3607 int i, err; 3603 int i;
3604
3605 new = kzalloc(sizeof(*new), GFP_KERNEL);
3606 if (!new)
3607 return -ENOMEM;
3608 3608
3609 memset(&new.new, 0, sizeof(new.new));
3610 for_each_online_cpu(i) { 3609 for_each_online_cpu(i) {
3611 new.new[i] = alloc_arraycache(cpu_to_node(i), limit, 3610 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3612 batchcount); 3611 batchcount);
3613 if (!new.new[i]) { 3612 if (!new->new[i]) {
3614 for (i--; i >= 0; i--) 3613 for (i--; i >= 0; i--)
3615 kfree(new.new[i]); 3614 kfree(new->new[i]);
3615 kfree(new);
3616 return -ENOMEM; 3616 return -ENOMEM;
3617 } 3617 }
3618 } 3618 }
3619 new.cachep = cachep; 3619 new->cachep = cachep;
3620 3620
3621 on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); 3621 on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
3622 3622
3623 check_irq_on(); 3623 check_irq_on();
3624 cachep->batchcount = batchcount; 3624 cachep->batchcount = batchcount;
@@ -3626,7 +3626,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3626 cachep->shared = shared; 3626 cachep->shared = shared;
3627 3627
3628 for_each_online_cpu(i) { 3628 for_each_online_cpu(i) {
3629 struct array_cache *ccold = new.new[i]; 3629 struct array_cache *ccold = new->new[i];
3630 if (!ccold) 3630 if (!ccold)
3631 continue; 3631 continue;
3632 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3632 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
@@ -3634,18 +3634,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3634 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3634 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3635 kfree(ccold); 3635 kfree(ccold);
3636 } 3636 }
3637 3637 kfree(new);
3638 err = alloc_kmemlist(cachep); 3638 return alloc_kmemlist(cachep);
3639 if (err) {
3640 printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n",
3641 cachep->name, -err);
3642 BUG();
3643 }
3644 return 0;
3645} 3639}
3646 3640
3647/* Called with cache_chain_mutex held always */ 3641/* Called with cache_chain_mutex held always */
3648static void enable_cpucache(struct kmem_cache *cachep) 3642static int enable_cpucache(struct kmem_cache *cachep)
3649{ 3643{
3650 int err; 3644 int err;
3651 int limit, shared; 3645 int limit, shared;
@@ -3697,6 +3691,7 @@ static void enable_cpucache(struct kmem_cache *cachep)
3697 if (err) 3691 if (err)
3698 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", 3692 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
3699 cachep->name, -err); 3693 cachep->name, -err);
3694 return err;
3700} 3695}
3701 3696
3702/* 3697/*
@@ -4157,6 +4152,7 @@ static int leaks_show(struct seq_file *m, void *p)
4157 show_symbol(m, n[2*i+2]); 4152 show_symbol(m, n[2*i+2]);
4158 seq_putc(m, '\n'); 4153 seq_putc(m, '\n');
4159 } 4154 }
4155
4160 return 0; 4156 return 0;
4161} 4157}
4162 4158
diff --git a/mm/slob.c b/mm/slob.c
index 7b52b20b9607..20188627347c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -339,52 +339,3 @@ void kmem_cache_init(void)
339 339
340 mod_timer(&slob_timer, jiffies + HZ); 340 mod_timer(&slob_timer, jiffies + HZ);
341} 341}
342
343atomic_t slab_reclaim_pages = ATOMIC_INIT(0);
344EXPORT_SYMBOL(slab_reclaim_pages);
345
346#ifdef CONFIG_SMP
347
348void *__alloc_percpu(size_t size)
349{
350 int i;
351 struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
352
353 if (!pdata)
354 return NULL;
355
356 for_each_possible_cpu(i) {
357 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
358 if (!pdata->ptrs[i])
359 goto unwind_oom;
360 memset(pdata->ptrs[i], 0, size);
361 }
362
363 /* Catch derefs w/o wrappers */
364 return (void *) (~(unsigned long) pdata);
365
366unwind_oom:
367 while (--i >= 0) {
368 if (!cpu_possible(i))
369 continue;
370 kfree(pdata->ptrs[i]);
371 }
372 kfree(pdata);
373 return NULL;
374}
375EXPORT_SYMBOL(__alloc_percpu);
376
377void
378free_percpu(const void *objp)
379{
380 int i;
381 struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
382
383 for_each_possible_cpu(i)
384 kfree(p->ptrs[i]);
385
386 kfree(p);
387}
388EXPORT_SYMBOL(free_percpu);
389
390#endif
diff --git a/mm/swap.c b/mm/swap.c
index 687686a61f7c..2e0e871f542f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,6 +34,25 @@
34/* How many pages do we try to swap or page in/out together? */ 34/* How many pages do we try to swap or page in/out together? */
35int page_cluster; 35int page_cluster;
36 36
37/*
38 * This path almost never happens for VM activity - pages are normally
39 * freed via pagevecs. But it gets used by networking.
40 */
41static void fastcall __page_cache_release(struct page *page)
42{
43 if (PageLRU(page)) {
44 unsigned long flags;
45 struct zone *zone = page_zone(page);
46
47 spin_lock_irqsave(&zone->lru_lock, flags);
48 VM_BUG_ON(!PageLRU(page));
49 __ClearPageLRU(page);
50 del_page_from_lru(zone, page);
51 spin_unlock_irqrestore(&zone->lru_lock, flags);
52 }
53 free_hot_page(page);
54}
55
37static void put_compound_page(struct page *page) 56static void put_compound_page(struct page *page)
38{ 57{
39 page = (struct page *)page_private(page); 58 page = (struct page *)page_private(page);
@@ -223,26 +242,6 @@ int lru_add_drain_all(void)
223#endif 242#endif
224 243
225/* 244/*
226 * This path almost never happens for VM activity - pages are normally
227 * freed via pagevecs. But it gets used by networking.
228 */
229void fastcall __page_cache_release(struct page *page)
230{
231 if (PageLRU(page)) {
232 unsigned long flags;
233 struct zone *zone = page_zone(page);
234
235 spin_lock_irqsave(&zone->lru_lock, flags);
236 BUG_ON(!PageLRU(page));
237 __ClearPageLRU(page);
238 del_page_from_lru(zone, page);
239 spin_unlock_irqrestore(&zone->lru_lock, flags);
240 }
241 free_hot_page(page);
242}
243EXPORT_SYMBOL(__page_cache_release);
244
245/*
246 * Batched page_cache_release(). Decrement the reference count on all the 245 * Batched page_cache_release(). Decrement the reference count on all the
247 * passed pages. If it fell to zero then remove the page from the LRU and 246 * passed pages. If it fell to zero then remove the page from the LRU and
248 * free it. 247 * free it.
@@ -284,7 +283,7 @@ void release_pages(struct page **pages, int nr, int cold)
284 zone = pagezone; 283 zone = pagezone;
285 spin_lock_irq(&zone->lru_lock); 284 spin_lock_irq(&zone->lru_lock);
286 } 285 }
287 BUG_ON(!PageLRU(page)); 286 VM_BUG_ON(!PageLRU(page));
288 __ClearPageLRU(page); 287 __ClearPageLRU(page);
289 del_page_from_lru(zone, page); 288 del_page_from_lru(zone, page);
290 } 289 }
@@ -337,7 +336,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
337 for (i = 0; i < pagevec_count(pvec); i++) { 336 for (i = 0; i < pagevec_count(pvec); i++) {
338 struct page *page = pvec->pages[i]; 337 struct page *page = pvec->pages[i];
339 338
340 BUG_ON(PageLRU(page)); 339 VM_BUG_ON(PageLRU(page));
341 if (put_page_testzero(page)) 340 if (put_page_testzero(page))
342 pagevec_add(&pages_to_free, page); 341 pagevec_add(&pages_to_free, page);
343 } 342 }
@@ -364,7 +363,7 @@ void __pagevec_lru_add(struct pagevec *pvec)
364 zone = pagezone; 363 zone = pagezone;
365 spin_lock_irq(&zone->lru_lock); 364 spin_lock_irq(&zone->lru_lock);
366 } 365 }
367 BUG_ON(PageLRU(page)); 366 VM_BUG_ON(PageLRU(page));
368 SetPageLRU(page); 367 SetPageLRU(page);
369 add_page_to_inactive_list(zone, page); 368 add_page_to_inactive_list(zone, page);
370 } 369 }
@@ -391,9 +390,9 @@ void __pagevec_lru_add_active(struct pagevec *pvec)
391 zone = pagezone; 390 zone = pagezone;
392 spin_lock_irq(&zone->lru_lock); 391 spin_lock_irq(&zone->lru_lock);
393 } 392 }
394 BUG_ON(PageLRU(page)); 393 VM_BUG_ON(PageLRU(page));
395 SetPageLRU(page); 394 SetPageLRU(page);
396 BUG_ON(PageActive(page)); 395 VM_BUG_ON(PageActive(page));
397 SetPageActive(page); 396 SetPageActive(page);
398 add_page_to_active_list(zone, page); 397 add_page_to_active_list(zone, page);
399 } 398 }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 266162d2ba28..9aad8b0cc6ee 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -24,6 +24,9 @@
24DEFINE_RWLOCK(vmlist_lock); 24DEFINE_RWLOCK(vmlist_lock);
25struct vm_struct *vmlist; 25struct vm_struct *vmlist;
26 26
27static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
28 int node);
29
27static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) 30static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
28{ 31{
29 pte_t *pte; 32 pte_t *pte;
@@ -478,8 +481,8 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
478 * allocator with @gfp_mask flags. Map them into contiguous 481 * allocator with @gfp_mask flags. Map them into contiguous
479 * kernel virtual space, using a pagetable protection of @prot. 482 * kernel virtual space, using a pagetable protection of @prot.
480 */ 483 */
481void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, 484static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
482 int node) 485 int node)
483{ 486{
484 struct vm_struct *area; 487 struct vm_struct *area;
485 488
@@ -493,7 +496,6 @@ void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
493 496
494 return __vmalloc_area_node(area, gfp_mask, prot, node); 497 return __vmalloc_area_node(area, gfp_mask, prot, node);
495} 498}
496EXPORT_SYMBOL(__vmalloc_node);
497 499
498void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 500void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
499{ 501{
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5d4c4d02254d..87779dda4ec6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -62,6 +62,8 @@ struct scan_control {
62 int swap_cluster_max; 62 int swap_cluster_max;
63 63
64 int swappiness; 64 int swappiness;
65
66 int all_unreclaimable;
65}; 67};
66 68
67/* 69/*
@@ -377,8 +379,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
377 379
378int remove_mapping(struct address_space *mapping, struct page *page) 380int remove_mapping(struct address_space *mapping, struct page *page)
379{ 381{
380 if (!mapping) 382 BUG_ON(!PageLocked(page));
381 return 0; /* truncate got there first */ 383 BUG_ON(mapping != page_mapping(page));
382 384
383 write_lock_irq(&mapping->tree_lock); 385 write_lock_irq(&mapping->tree_lock);
384 386
@@ -440,7 +442,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
440 if (TestSetPageLocked(page)) 442 if (TestSetPageLocked(page))
441 goto keep; 443 goto keep;
442 444
443 BUG_ON(PageActive(page)); 445 VM_BUG_ON(PageActive(page));
444 446
445 sc->nr_scanned++; 447 sc->nr_scanned++;
446 448
@@ -547,7 +549,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
547 goto free_it; 549 goto free_it;
548 } 550 }
549 551
550 if (!remove_mapping(mapping, page)) 552 if (!mapping || !remove_mapping(mapping, page))
551 goto keep_locked; 553 goto keep_locked;
552 554
553free_it: 555free_it:
@@ -564,7 +566,7 @@ keep_locked:
564 unlock_page(page); 566 unlock_page(page);
565keep: 567keep:
566 list_add(&page->lru, &ret_pages); 568 list_add(&page->lru, &ret_pages);
567 BUG_ON(PageLRU(page)); 569 VM_BUG_ON(PageLRU(page));
568 } 570 }
569 list_splice(&ret_pages, page_list); 571 list_splice(&ret_pages, page_list);
570 if (pagevec_count(&freed_pvec)) 572 if (pagevec_count(&freed_pvec))
@@ -603,7 +605,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
603 page = lru_to_page(src); 605 page = lru_to_page(src);
604 prefetchw_prev_lru_page(page, src, flags); 606 prefetchw_prev_lru_page(page, src, flags);
605 607
606 BUG_ON(!PageLRU(page)); 608 VM_BUG_ON(!PageLRU(page));
607 609
608 list_del(&page->lru); 610 list_del(&page->lru);
609 target = src; 611 target = src;
@@ -674,7 +676,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
674 */ 676 */
675 while (!list_empty(&page_list)) { 677 while (!list_empty(&page_list)) {
676 page = lru_to_page(&page_list); 678 page = lru_to_page(&page_list);
677 BUG_ON(PageLRU(page)); 679 VM_BUG_ON(PageLRU(page));
678 SetPageLRU(page); 680 SetPageLRU(page);
679 list_del(&page->lru); 681 list_del(&page->lru);
680 if (PageActive(page)) 682 if (PageActive(page))
@@ -695,6 +697,11 @@ done:
695 return nr_reclaimed; 697 return nr_reclaimed;
696} 698}
697 699
700static inline int zone_is_near_oom(struct zone *zone)
701{
702 return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
703}
704
698/* 705/*
699 * This moves pages from the active list to the inactive list. 706 * This moves pages from the active list to the inactive list.
700 * 707 *
@@ -730,6 +737,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
730 long distress; 737 long distress;
731 long swap_tendency; 738 long swap_tendency;
732 739
740 if (zone_is_near_oom(zone))
741 goto force_reclaim_mapped;
742
733 /* 743 /*
734 * `distress' is a measure of how much trouble we're having 744 * `distress' is a measure of how much trouble we're having
735 * reclaiming pages. 0 -> no problems. 100 -> great trouble. 745 * reclaiming pages. 0 -> no problems. 100 -> great trouble.
@@ -765,6 +775,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
765 * memory onto the inactive list. 775 * memory onto the inactive list.
766 */ 776 */
767 if (swap_tendency >= 100) 777 if (swap_tendency >= 100)
778force_reclaim_mapped:
768 reclaim_mapped = 1; 779 reclaim_mapped = 1;
769 } 780 }
770 781
@@ -797,9 +808,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
797 while (!list_empty(&l_inactive)) { 808 while (!list_empty(&l_inactive)) {
798 page = lru_to_page(&l_inactive); 809 page = lru_to_page(&l_inactive);
799 prefetchw_prev_lru_page(page, &l_inactive, flags); 810 prefetchw_prev_lru_page(page, &l_inactive, flags);
800 BUG_ON(PageLRU(page)); 811 VM_BUG_ON(PageLRU(page));
801 SetPageLRU(page); 812 SetPageLRU(page);
802 BUG_ON(!PageActive(page)); 813 VM_BUG_ON(!PageActive(page));
803 ClearPageActive(page); 814 ClearPageActive(page);
804 815
805 list_move(&page->lru, &zone->inactive_list); 816 list_move(&page->lru, &zone->inactive_list);
@@ -827,9 +838,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
827 while (!list_empty(&l_active)) { 838 while (!list_empty(&l_active)) {
828 page = lru_to_page(&l_active); 839 page = lru_to_page(&l_active);
829 prefetchw_prev_lru_page(page, &l_active, flags); 840 prefetchw_prev_lru_page(page, &l_active, flags);
830 BUG_ON(PageLRU(page)); 841 VM_BUG_ON(PageLRU(page));
831 SetPageLRU(page); 842 SetPageLRU(page);
832 BUG_ON(!PageActive(page)); 843 VM_BUG_ON(!PageActive(page));
833 list_move(&page->lru, &zone->active_list); 844 list_move(&page->lru, &zone->active_list);
834 pgmoved++; 845 pgmoved++;
835 if (!pagevec_add(&pvec, page)) { 846 if (!pagevec_add(&pvec, page)) {
@@ -925,6 +936,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
925 unsigned long nr_reclaimed = 0; 936 unsigned long nr_reclaimed = 0;
926 int i; 937 int i;
927 938
939 sc->all_unreclaimable = 1;
928 for (i = 0; zones[i] != NULL; i++) { 940 for (i = 0; zones[i] != NULL; i++) {
929 struct zone *zone = zones[i]; 941 struct zone *zone = zones[i];
930 942
@@ -941,6 +953,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
941 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 953 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
942 continue; /* Let kswapd poll it */ 954 continue; /* Let kswapd poll it */
943 955
956 sc->all_unreclaimable = 0;
957
944 nr_reclaimed += shrink_zone(priority, zone, sc); 958 nr_reclaimed += shrink_zone(priority, zone, sc);
945 } 959 }
946 return nr_reclaimed; 960 return nr_reclaimed;
@@ -1021,6 +1035,9 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1021 if (sc.nr_scanned && priority < DEF_PRIORITY - 2) 1035 if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
1022 blk_congestion_wait(WRITE, HZ/10); 1036 blk_congestion_wait(WRITE, HZ/10);
1023 } 1037 }
1038 /* top priority shrink_caches still had more to do? don't OOM, then */
1039 if (!sc.all_unreclaimable)
1040 ret = 1;
1024out: 1041out:
1025 for (i = 0; zones[i] != 0; i++) { 1042 for (i = 0; zones[i] != 0; i++) {
1026 struct zone *zone = zones[i]; 1043 struct zone *zone = zones[i];
@@ -1153,7 +1170,7 @@ scan:
1153 if (zone->all_unreclaimable) 1170 if (zone->all_unreclaimable)
1154 continue; 1171 continue;
1155 if (nr_slab == 0 && zone->pages_scanned >= 1172 if (nr_slab == 0 && zone->pages_scanned >=
1156 (zone->nr_active + zone->nr_inactive) * 4) 1173 (zone->nr_active + zone->nr_inactive) * 6)
1157 zone->all_unreclaimable = 1; 1174 zone->all_unreclaimable = 1;
1158 /* 1175 /*
1159 * If we've done a decent amount of scanning and 1176 * If we've done a decent amount of scanning and
@@ -1361,7 +1378,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1361 for_each_zone(zone) 1378 for_each_zone(zone)
1362 lru_pages += zone->nr_active + zone->nr_inactive; 1379 lru_pages += zone->nr_active + zone->nr_inactive;
1363 1380
1364 nr_slab = global_page_state(NR_SLAB); 1381 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
1365 /* If slab caches are huge, it's better to hit them first */ 1382 /* If slab caches are huge, it's better to hit them first */
1366 while (nr_slab >= lru_pages) { 1383 while (nr_slab >= lru_pages) {
1367 reclaim_state.reclaimed_slab = 0; 1384 reclaim_state.reclaimed_slab = 0;
@@ -1510,7 +1527,6 @@ int zone_reclaim_mode __read_mostly;
1510#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ 1527#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
1511#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ 1528#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
1512#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ 1529#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
1513#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */
1514 1530
1515/* 1531/*
1516 * Priority for ZONE_RECLAIM. This determines the fraction of pages 1532 * Priority for ZONE_RECLAIM. This determines the fraction of pages
@@ -1526,6 +1542,12 @@ int zone_reclaim_mode __read_mostly;
1526int sysctl_min_unmapped_ratio = 1; 1542int sysctl_min_unmapped_ratio = 1;
1527 1543
1528/* 1544/*
1545 * If the number of slab pages in a zone grows beyond this percentage then
1546 * slab reclaim needs to occur.
1547 */
1548int sysctl_min_slab_ratio = 5;
1549
1550/*
1529 * Try to free up some pages from this zone through reclaim. 1551 * Try to free up some pages from this zone through reclaim.
1530 */ 1552 */
1531static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) 1553static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1544,6 +1566,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1544 .gfp_mask = gfp_mask, 1566 .gfp_mask = gfp_mask,
1545 .swappiness = vm_swappiness, 1567 .swappiness = vm_swappiness,
1546 }; 1568 };
1569 unsigned long slab_reclaimable;
1547 1570
1548 disable_swap_token(); 1571 disable_swap_token();
1549 cond_resched(); 1572 cond_resched();
@@ -1556,29 +1579,43 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1556 reclaim_state.reclaimed_slab = 0; 1579 reclaim_state.reclaimed_slab = 0;
1557 p->reclaim_state = &reclaim_state; 1580 p->reclaim_state = &reclaim_state;
1558 1581
1559 /* 1582 if (zone_page_state(zone, NR_FILE_PAGES) -
1560 * Free memory by calling shrink zone with increasing priorities 1583 zone_page_state(zone, NR_FILE_MAPPED) >
1561 * until we have enough memory freed. 1584 zone->min_unmapped_pages) {
1562 */ 1585 /*
1563 priority = ZONE_RECLAIM_PRIORITY; 1586 * Free memory by calling shrink zone with increasing
1564 do { 1587 * priorities until we have enough memory freed.
1565 nr_reclaimed += shrink_zone(priority, zone, &sc); 1588 */
1566 priority--; 1589 priority = ZONE_RECLAIM_PRIORITY;
1567 } while (priority >= 0 && nr_reclaimed < nr_pages); 1590 do {
1591 nr_reclaimed += shrink_zone(priority, zone, &sc);
1592 priority--;
1593 } while (priority >= 0 && nr_reclaimed < nr_pages);
1594 }
1568 1595
1569 if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { 1596 slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
1597 if (slab_reclaimable > zone->min_slab_pages) {
1570 /* 1598 /*
1571 * shrink_slab() does not currently allow us to determine how 1599 * shrink_slab() does not currently allow us to determine how
1572 * many pages were freed in this zone. So we just shake the slab 1600 * many pages were freed in this zone. So we take the current
1573 * a bit and then go off node for this particular allocation 1601 * number of slab pages and shake the slab until it is reduced
1574 * despite possibly having freed enough memory to allocate in 1602 * by the same nr_pages that we used for reclaiming unmapped
1575 * this zone. If we freed local memory then the next 1603 * pages.
1576 * allocations will be local again.
1577 * 1604 *
1578 * shrink_slab will free memory on all zones and may take 1605 * Note that shrink_slab will free memory on all zones and may
1579 * a long time. 1606 * take a long time.
1607 */
1608 while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
1609 zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
1610 slab_reclaimable - nr_pages)
1611 ;
1612
1613 /*
1614 * Update nr_reclaimed by the number of slab pages we
1615 * reclaimed from this zone.
1580 */ 1616 */
1581 shrink_slab(sc.nr_scanned, gfp_mask, order); 1617 nr_reclaimed += slab_reclaimable -
1618 zone_page_state(zone, NR_SLAB_RECLAIMABLE);
1582 } 1619 }
1583 1620
1584 p->reclaim_state = NULL; 1621 p->reclaim_state = NULL;
@@ -1592,7 +1629,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1592 int node_id; 1629 int node_id;
1593 1630
1594 /* 1631 /*
1595 * Zone reclaim reclaims unmapped file backed pages. 1632 * Zone reclaim reclaims unmapped file backed pages and
1633 * slab pages if we are over the defined limits.
1596 * 1634 *
1597 * A small portion of unmapped file backed pages is needed for 1635 * A small portion of unmapped file backed pages is needed for
1598 * file I/O otherwise pages read by file I/O will be immediately 1636 * file I/O otherwise pages read by file I/O will be immediately
@@ -1601,7 +1639,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1601 * unmapped file backed pages. 1639 * unmapped file backed pages.
1602 */ 1640 */
1603 if (zone_page_state(zone, NR_FILE_PAGES) - 1641 if (zone_page_state(zone, NR_FILE_PAGES) -
1604 zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) 1642 zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages
1643 && zone_page_state(zone, NR_SLAB_RECLAIMABLE)
1644 <= zone->min_slab_pages)
1605 return 0; 1645 return 0;
1606 1646
1607 /* 1647 /*
@@ -1621,7 +1661,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1621 * over remote processors and spread off node memory allocations 1661 * over remote processors and spread off node memory allocations
1622 * as wide as possible. 1662 * as wide as possible.
1623 */ 1663 */
1624 node_id = zone->zone_pgdat->node_id; 1664 node_id = zone_to_nid(zone);
1625 mask = node_to_cpumask(node_id); 1665 mask = node_to_cpumask(node_id);
1626 if (!cpus_empty(mask) && node_id != numa_node_id()) 1666 if (!cpus_empty(mask) && node_id != numa_node_id())
1627 return 0; 1667 return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c1b5f4106b38..490d8c1a0ded 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -321,6 +321,9 @@ void refresh_cpu_vm_stats(int cpu)
321 for_each_zone(zone) { 321 for_each_zone(zone) {
322 struct per_cpu_pageset *pcp; 322 struct per_cpu_pageset *pcp;
323 323
324 if (!populated_zone(zone))
325 continue;
326
324 pcp = zone_pcp(zone, cpu); 327 pcp = zone_pcp(zone, cpu);
325 328
326 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 329 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
@@ -435,12 +438,28 @@ struct seq_operations fragmentation_op = {
435 .show = frag_show, 438 .show = frag_show,
436}; 439};
437 440
441#ifdef CONFIG_ZONE_DMA32
442#define TEXT_FOR_DMA32(xx) xx "_dma32",
443#else
444#define TEXT_FOR_DMA32(xx)
445#endif
446
447#ifdef CONFIG_HIGHMEM
448#define TEXT_FOR_HIGHMEM(xx) xx "_high",
449#else
450#define TEXT_FOR_HIGHMEM(xx)
451#endif
452
453#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \
454 TEXT_FOR_HIGHMEM(xx)
455
438static char *vmstat_text[] = { 456static char *vmstat_text[] = {
439 /* Zoned VM counters */ 457 /* Zoned VM counters */
440 "nr_anon_pages", 458 "nr_anon_pages",
441 "nr_mapped", 459 "nr_mapped",
442 "nr_file_pages", 460 "nr_file_pages",
443 "nr_slab", 461 "nr_slab_reclaimable",
462 "nr_slab_unreclaimable",
444 "nr_page_table_pages", 463 "nr_page_table_pages",
445 "nr_dirty", 464 "nr_dirty",
446 "nr_writeback", 465 "nr_writeback",
@@ -462,10 +481,7 @@ static char *vmstat_text[] = {
462 "pswpin", 481 "pswpin",
463 "pswpout", 482 "pswpout",
464 483
465 "pgalloc_dma", 484 TEXTS_FOR_ZONES("pgalloc")
466 "pgalloc_dma32",
467 "pgalloc_normal",
468 "pgalloc_high",
469 485
470 "pgfree", 486 "pgfree",
471 "pgactivate", 487 "pgactivate",
@@ -474,25 +490,10 @@ static char *vmstat_text[] = {
474 "pgfault", 490 "pgfault",
475 "pgmajfault", 491 "pgmajfault",
476 492
477 "pgrefill_dma", 493 TEXTS_FOR_ZONES("pgrefill")
478 "pgrefill_dma32", 494 TEXTS_FOR_ZONES("pgsteal")
479 "pgrefill_normal", 495 TEXTS_FOR_ZONES("pgscan_kswapd")
480 "pgrefill_high", 496 TEXTS_FOR_ZONES("pgscan_direct")
481
482 "pgsteal_dma",
483 "pgsteal_dma32",
484 "pgsteal_normal",
485 "pgsteal_high",
486
487 "pgscan_kswapd_dma",
488 "pgscan_kswapd_dma32",
489 "pgscan_kswapd_normal",
490 "pgscan_kswapd_high",
491
492 "pgscan_direct_dma",
493 "pgscan_direct_dma32",
494 "pgscan_direct_normal",
495 "pgscan_direct_high",
496 497
497 "pginodesteal", 498 "pginodesteal",
498 "slabs_scanned", 499 "slabs_scanned",
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 859e3359fcda..e2a095d0fd80 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -40,6 +40,22 @@ config IP_DCCP_DEBUG
40 40
41 Just say N. 41 Just say N.
42 42
43config NET_DCCPPROBE
44 tristate "DCCP connection probing"
45 depends on PROC_FS && KPROBES
46 ---help---
47 This module allows for capturing the changes to DCCP connection
48 state in response to incoming packets. It is used for debugging
49 DCCP congestion avoidance modules. If you don't understand
50 what was just said, you don't need it: say N.
51
52 Documentation on how to use the packet generator can be found
53 at http://linux-net.osdl.org/index.php/DccpProbe
54
55 To compile this code as a module, choose M here: the
56 module will be called dccp_probe.
57
58
43endmenu 59endmenu
44 60
45endmenu 61endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 7696e219b05d..17ed99c46617 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -11,9 +11,11 @@ dccp_ipv4-y := ipv4.o
11dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o 11dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
12 12
13obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o 13obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
14obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
14 15
15dccp-$(CONFIG_SYSCTL) += sysctl.o 16dccp-$(CONFIG_SYSCTL) += sysctl.o
16 17
17dccp_diag-y := diag.o 18dccp_diag-y := diag.o
19dccp_probe-y := probe.o
18 20
19obj-y += ccids/ 21obj-y += ccids/
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 457dd3db7f41..2efb505aeb35 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -808,7 +808,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
808} 808}
809 809
810static struct ccid_operations ccid2 = { 810static struct ccid_operations ccid2 = {
811 .ccid_id = 2, 811 .ccid_id = DCCPC_CCID2,
812 .ccid_name = "ccid2", 812 .ccid_name = "ccid2",
813 .ccid_owner = THIS_MODULE, 813 .ccid_owner = THIS_MODULE,
814 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), 814 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 195aa9566228..67d2dc0e7c67 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1240,7 +1240,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
1240} 1240}
1241 1241
1242static struct ccid_operations ccid3 = { 1242static struct ccid_operations ccid3 = {
1243 .ccid_id = 3, 1243 .ccid_id = DCCPC_CCID3,
1244 .ccid_name = "ccid3", 1244 .ccid_name = "ccid3",
1245 .ccid_owner = THIS_MODULE, 1245 .ccid_owner = THIS_MODULE,
1246 .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), 1246 .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9a1a76a7dc41..66be29b6f508 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -56,9 +56,6 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
56 56
57 dp->dccps_role = DCCP_ROLE_CLIENT; 57 dp->dccps_role = DCCP_ROLE_CLIENT;
58 58
59 if (dccp_service_not_initialized(sk))
60 return -EPROTO;
61
62 if (addr_len < sizeof(struct sockaddr_in)) 59 if (addr_len < sizeof(struct sockaddr_in))
63 return -EINVAL; 60 return -EINVAL;
64 61
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
new file mode 100644
index 000000000000..146496fce2e2
--- /dev/null
+++ b/net/dccp/probe.c
@@ -0,0 +1,198 @@
1/*
2 * dccp_probe - Observe the DCCP flow with kprobes.
3 *
4 * The idea for this came from Werner Almesberger's umlsim
5 * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
6 *
7 * Modified for DCCP from Stephen Hemminger's code
8 * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25#include <linux/kernel.h>
26#include <linux/kprobes.h>
27#include <linux/socket.h>
28#include <linux/dccp.h>
29#include <linux/proc_fs.h>
30#include <linux/module.h>
31#include <linux/kfifo.h>
32#include <linux/vmalloc.h>
33
34#include "dccp.h"
35#include "ccid.h"
36#include "ccids/ccid3.h"
37
38static int port;
39
40static int bufsize = 64 * 1024;
41
42static const char procname[] = "dccpprobe";
43
44struct {
45 struct kfifo *fifo;
46 spinlock_t lock;
47 wait_queue_head_t wait;
48 struct timeval tstart;
49} dccpw;
50
51static void printl(const char *fmt, ...)
52{
53 va_list args;
54 int len;
55 struct timeval now;
56 char tbuf[256];
57
58 va_start(args, fmt);
59 do_gettimeofday(&now);
60
61 now.tv_sec -= dccpw.tstart.tv_sec;
62 now.tv_usec -= dccpw.tstart.tv_usec;
63 if (now.tv_usec < 0) {
64 --now.tv_sec;
65 now.tv_usec += 1000000;
66 }
67
68 len = sprintf(tbuf, "%lu.%06lu ",
69 (unsigned long) now.tv_sec,
70 (unsigned long) now.tv_usec);
71 len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
72 va_end(args);
73
74 kfifo_put(dccpw.fifo, tbuf, len);
75 wake_up(&dccpw.wait);
76}
77
78static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
79 struct msghdr *msg, size_t size)
80{
81 const struct dccp_minisock *dmsk = dccp_msk(sk);
82 const struct inet_sock *inet = inet_sk(sk);
83 const struct ccid3_hc_tx_sock *hctx;
84
85 if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
86 hctx = ccid3_hc_tx_sk(sk);
87 else
88 hctx = NULL;
89
90 if (port == 0 || ntohs(inet->dport) == port ||
91 ntohs(inet->sport) == port) {
92 if (hctx)
93 printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
94 NIPQUAD(inet->saddr), ntohs(inet->sport),
95 NIPQUAD(inet->daddr), ntohs(inet->dport), size,
96 hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
97 hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
98 else
99 printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
100 NIPQUAD(inet->saddr), ntohs(inet->sport),
101 NIPQUAD(inet->daddr), ntohs(inet->dport), size);
102 }
103
104 jprobe_return();
105 return 0;
106}
107
108static struct jprobe dccp_send_probe = {
109 .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, },
110 .entry = (kprobe_opcode_t *)&jdccp_sendmsg,
111};
112
113static int dccpprobe_open(struct inode *inode, struct file *file)
114{
115 kfifo_reset(dccpw.fifo);
116 do_gettimeofday(&dccpw.tstart);
117 return 0;
118}
119
120static ssize_t dccpprobe_read(struct file *file, char __user *buf,
121 size_t len, loff_t *ppos)
122{
123 int error = 0, cnt = 0;
124 unsigned char *tbuf;
125
126 if (!buf || len < 0)
127 return -EINVAL;
128
129 if (len == 0)
130 return 0;
131
132 tbuf = vmalloc(len);
133 if (!tbuf)
134 return -ENOMEM;
135
136 error = wait_event_interruptible(dccpw.wait,
137 __kfifo_len(dccpw.fifo) != 0);
138 if (error)
139 goto out_free;
140
141 cnt = kfifo_get(dccpw.fifo, tbuf, len);
142 error = copy_to_user(buf, tbuf, cnt);
143
144out_free:
145 vfree(tbuf);
146
147 return error ? error : cnt;
148}
149
150static struct file_operations dccpprobe_fops = {
151 .owner = THIS_MODULE,
152 .open = dccpprobe_open,
153 .read = dccpprobe_read,
154};
155
156static __init int dccpprobe_init(void)
157{
158 int ret = -ENOMEM;
159
160 init_waitqueue_head(&dccpw.wait);
161 spin_lock_init(&dccpw.lock);
162 dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
163
164 if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
165 goto err0;
166
167 ret = register_jprobe(&dccp_send_probe);
168 if (ret)
169 goto err1;
170
171 pr_info("DCCP watch registered (port=%d)\n", port);
172 return 0;
173err1:
174 proc_net_remove(procname);
175err0:
176 kfifo_free(dccpw.fifo);
177 return ret;
178}
179module_init(dccpprobe_init);
180
181static __exit void dccpprobe_exit(void)
182{
183 kfifo_free(dccpw.fifo);
184 proc_net_remove(procname);
185 unregister_jprobe(&dccp_send_probe);
186
187}
188module_exit(dccpprobe_exit);
189
190MODULE_PARM_DESC(port, "Port to match (0=all)");
191module_param(port, int, 0);
192
193MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
194module_param(bufsize, int, 0);
195
196MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
197MODULE_DESCRIPTION("DCCP snooper");
198MODULE_LICENSE("GPL");
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 962df0ea31aa..72cbdcfc2c65 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -217,7 +217,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
217 icsk->icsk_sync_mss = dccp_sync_mss; 217 icsk->icsk_sync_mss = dccp_sync_mss;
218 dp->dccps_mss_cache = 536; 218 dp->dccps_mss_cache = 536;
219 dp->dccps_role = DCCP_ROLE_UNDEFINED; 219 dp->dccps_role = DCCP_ROLE_UNDEFINED;
220 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; 220 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
221 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; 221 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
222 222
223 return 0; 223 return 0;
@@ -267,12 +267,6 @@ static inline int dccp_listen_start(struct sock *sk)
267 struct dccp_sock *dp = dccp_sk(sk); 267 struct dccp_sock *dp = dccp_sk(sk);
268 268
269 dp->dccps_role = DCCP_ROLE_LISTEN; 269 dp->dccps_role = DCCP_ROLE_LISTEN;
270 /*
271 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
272 * before calling listen()
273 */
274 if (dccp_service_not_initialized(sk))
275 return -EPROTO;
276 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); 270 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
277} 271}
278 272
@@ -540,9 +534,6 @@ static int dccp_getsockopt_service(struct sock *sk, int len,
540 int err = -ENOENT, slen = 0, total_len = sizeof(u32); 534 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
541 535
542 lock_sock(sk); 536 lock_sock(sk);
543 if (dccp_service_not_initialized(sk))
544 goto out;
545
546 if ((sl = dp->dccps_service_list) != NULL) { 537 if ((sl = dp->dccps_service_list) != NULL) {
547 slen = sl->dccpsl_nr * sizeof(u32); 538 slen = sl->dccpsl_nr * sizeof(u32);
548 total_len += slen; 539 total_len += slen;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 1650b64415aa..30af4a4dfcc8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -448,24 +448,22 @@ config INET_TCP_DIAG
448 depends on INET_DIAG 448 depends on INET_DIAG
449 def_tristate INET_DIAG 449 def_tristate INET_DIAG
450 450
451config TCP_CONG_ADVANCED 451menuconfig TCP_CONG_ADVANCED
452 bool "TCP: advanced congestion control" 452 bool "TCP: advanced congestion control"
453 ---help--- 453 ---help---
454 Support for selection of various TCP congestion control 454 Support for selection of various TCP congestion control
455 modules. 455 modules.
456 456
457 Nearly all users can safely say no here, and a safe default 457 Nearly all users can safely say no here, and a safe default
458 selection will be made (BIC-TCP with new Reno as a fallback). 458 selection will be made (CUBIC with new Reno as a fallback).
459 459
460 If unsure, say N. 460 If unsure, say N.
461 461
462# TCP Reno is builtin (required as fallback) 462if TCP_CONG_ADVANCED
463menu "TCP congestion control"
464 depends on TCP_CONG_ADVANCED
465 463
466config TCP_CONG_BIC 464config TCP_CONG_BIC
467 tristate "Binary Increase Congestion (BIC) control" 465 tristate "Binary Increase Congestion (BIC) control"
468 default y 466 default m
469 ---help--- 467 ---help---
470 BIC-TCP is a sender-side only change that ensures a linear RTT 468 BIC-TCP is a sender-side only change that ensures a linear RTT
471 fairness under large windows while offering both scalability and 469 fairness under large windows while offering both scalability and
@@ -479,7 +477,7 @@ config TCP_CONG_BIC
479 477
480config TCP_CONG_CUBIC 478config TCP_CONG_CUBIC
481 tristate "CUBIC TCP" 479 tristate "CUBIC TCP"
482 default m 480 default y
483 ---help--- 481 ---help---
484 This is version 2.0 of BIC-TCP which uses a cubic growth function 482 This is version 2.0 of BIC-TCP which uses a cubic growth function
485 among other techniques. 483 among other techniques.
@@ -574,12 +572,49 @@ config TCP_CONG_VENO
574 loss packets. 572 loss packets.
575 See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf 573 See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
576 574
577endmenu 575choice
576 prompt "Default TCP congestion control"
577 default DEFAULT_CUBIC
578 help
579 Select the TCP congestion control that will be used by default
580 for all connections.
578 581
579config TCP_CONG_BIC 582 config DEFAULT_BIC
583 bool "Bic" if TCP_CONG_BIC=y
584
585 config DEFAULT_CUBIC
586 bool "Cubic" if TCP_CONG_CUBIC=y
587
588 config DEFAULT_HTCP
589 bool "Htcp" if TCP_CONG_HTCP=y
590
591 config DEFAULT_VEGAS
592 bool "Vegas" if TCP_CONG_VEGAS=y
593
594 config DEFAULT_WESTWOOD
595 bool "Westwood" if TCP_CONG_WESTWOOD=y
596
597 config DEFAULT_RENO
598 bool "Reno"
599
600endchoice
601
602endif
603
604config TCP_CONG_CUBIC
580 tristate 605 tristate
581 depends on !TCP_CONG_ADVANCED 606 depends on !TCP_CONG_ADVANCED
582 default y 607 default y
583 608
609config DEFAULT_TCP_CONG
610 string
611 default "bic" if DEFAULT_BIC
612 default "cubic" if DEFAULT_CUBIC
613 default "htcp" if DEFAULT_HTCP
614 default "vegas" if DEFAULT_VEGAS
615 default "westwood" if DEFAULT_WESTWOOD
616 default "reno" if DEFAULT_RENO
617 default "cubic"
618
584source "net/ipv4/ipvs/Kconfig" 619source "net/ipv4/ipvs/Kconfig"
585 620
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 80a2a0911b49..e6ce0b3ba62a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -259,7 +259,7 @@ void cipso_v4_cache_invalidate(void)
259 u32 iter; 259 u32 iter;
260 260
261 for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { 261 for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
262 spin_lock(&cipso_v4_cache[iter].lock); 262 spin_lock_bh(&cipso_v4_cache[iter].lock);
263 list_for_each_entry_safe(entry, 263 list_for_each_entry_safe(entry,
264 tmp_entry, 264 tmp_entry,
265 &cipso_v4_cache[iter].list, list) { 265 &cipso_v4_cache[iter].list, list) {
@@ -267,7 +267,7 @@ void cipso_v4_cache_invalidate(void)
267 cipso_v4_cache_entry_free(entry); 267 cipso_v4_cache_entry_free(entry);
268 } 268 }
269 cipso_v4_cache[iter].size = 0; 269 cipso_v4_cache[iter].size = 0;
270 spin_unlock(&cipso_v4_cache[iter].lock); 270 spin_unlock_bh(&cipso_v4_cache[iter].lock);
271 } 271 }
272 272
273 return; 273 return;
@@ -309,7 +309,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
309 309
310 hash = cipso_v4_map_cache_hash(key, key_len); 310 hash = cipso_v4_map_cache_hash(key, key_len);
311 bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); 311 bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
312 spin_lock(&cipso_v4_cache[bkt].lock); 312 spin_lock_bh(&cipso_v4_cache[bkt].lock);
313 list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { 313 list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
314 if (entry->hash == hash && 314 if (entry->hash == hash &&
315 entry->key_len == key_len && 315 entry->key_len == key_len &&
@@ -318,7 +318,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
318 secattr->cache.free = entry->lsm_data.free; 318 secattr->cache.free = entry->lsm_data.free;
319 secattr->cache.data = entry->lsm_data.data; 319 secattr->cache.data = entry->lsm_data.data;
320 if (prev_entry == NULL) { 320 if (prev_entry == NULL) {
321 spin_unlock(&cipso_v4_cache[bkt].lock); 321 spin_unlock_bh(&cipso_v4_cache[bkt].lock);
322 return 0; 322 return 0;
323 } 323 }
324 324
@@ -333,12 +333,12 @@ static int cipso_v4_cache_check(const unsigned char *key,
333 &prev_entry->list); 333 &prev_entry->list);
334 } 334 }
335 335
336 spin_unlock(&cipso_v4_cache[bkt].lock); 336 spin_unlock_bh(&cipso_v4_cache[bkt].lock);
337 return 0; 337 return 0;
338 } 338 }
339 prev_entry = entry; 339 prev_entry = entry;
340 } 340 }
341 spin_unlock(&cipso_v4_cache[bkt].lock); 341 spin_unlock_bh(&cipso_v4_cache[bkt].lock);
342 342
343 return -ENOENT; 343 return -ENOENT;
344} 344}
@@ -387,7 +387,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
387 entry->lsm_data.data = secattr->cache.data; 387 entry->lsm_data.data = secattr->cache.data;
388 388
389 bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); 389 bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
390 spin_lock(&cipso_v4_cache[bkt].lock); 390 spin_lock_bh(&cipso_v4_cache[bkt].lock);
391 if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { 391 if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
392 list_add(&entry->list, &cipso_v4_cache[bkt].list); 392 list_add(&entry->list, &cipso_v4_cache[bkt].list);
393 cipso_v4_cache[bkt].size += 1; 393 cipso_v4_cache[bkt].size += 1;
@@ -398,7 +398,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
398 list_add(&entry->list, &cipso_v4_cache[bkt].list); 398 list_add(&entry->list, &cipso_v4_cache[bkt].list);
399 cipso_v4_cache_entry_free(old_entry); 399 cipso_v4_cache_entry_free(old_entry);
400 } 400 }
401 spin_unlock(&cipso_v4_cache[bkt].lock); 401 spin_unlock_bh(&cipso_v4_cache[bkt].lock);
402 402
403 return 0; 403 return 0;
404 404
@@ -530,197 +530,42 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
530} 530}
531 531
532/** 532/**
533 * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff 533 * cipso_v4_doi_walk - Iterate through the DOI definitions
534 * @headroom: the amount of headroom to allocate for the sk_buff 534 * @skip_cnt: skip past this number of DOI definitions, updated
535 * @callback: callback for each DOI definition
536 * @cb_arg: argument for the callback function
535 * 537 *
536 * Description: 538 * Description:
537 * Dump a list of all the configured DOI values into a sk_buff. The returned 539 * Iterate over the DOI definition list, skipping the first @skip_cnt entries.
538 * sk_buff has room at the front of the sk_buff for @headroom bytes. See 540 * For each entry call @callback, if @callback returns a negative value stop
539 * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This 541 * 'walking' through the list and return. Updates the value in @skip_cnt upon
540 * function may fail if another process is changing the DOI list at the same 542 * return. Returns zero on success, negative values on failure.
541 * time. Returns a pointer to a sk_buff on success, NULL on error.
542 * 543 *
543 */ 544 */
544struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) 545int cipso_v4_doi_walk(u32 *skip_cnt,
546 int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
547 void *cb_arg)
545{ 548{
546 struct sk_buff *skb = NULL; 549 int ret_val = -ENOENT;
547 struct cipso_v4_doi *iter;
548 u32 doi_cnt = 0; 550 u32 doi_cnt = 0;
549 ssize_t buf_len; 551 struct cipso_v4_doi *iter_doi;
550 552
551 buf_len = NETLBL_LEN_U32;
552 rcu_read_lock(); 553 rcu_read_lock();
553 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) 554 list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
554 if (iter->valid) { 555 if (iter_doi->valid) {
555 doi_cnt += 1; 556 if (doi_cnt++ < *skip_cnt)
556 buf_len += 2 * NETLBL_LEN_U32; 557 continue;
557 } 558 ret_val = callback(iter_doi, cb_arg);
558 559 if (ret_val < 0) {
559 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); 560 doi_cnt--;
560 if (skb == NULL) 561 goto doi_walk_return;
561 goto doi_dump_all_failure;
562
563 if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
564 goto doi_dump_all_failure;
565 buf_len -= NETLBL_LEN_U32;
566 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
567 if (iter->valid) {
568 if (buf_len < 2 * NETLBL_LEN_U32)
569 goto doi_dump_all_failure;
570 if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
571 goto doi_dump_all_failure;
572 if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
573 goto doi_dump_all_failure;
574 buf_len -= 2 * NETLBL_LEN_U32;
575 }
576 rcu_read_unlock();
577
578 return skb;
579
580doi_dump_all_failure:
581 rcu_read_unlock();
582 kfree(skb);
583 return NULL;
584}
585
586/**
587 * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
588 * @doi: the DOI value
589 * @headroom: the amount of headroom to allocate for the sk_buff
590 *
591 * Description:
592 * Lookup the DOI definition matching @doi and dump it's contents into a
593 * sk_buff. The returned sk_buff has room at the front of the sk_buff for
594 * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message
595 * format. This function may fail if another process is changing the DOI list
596 * at the same time. Returns a pointer to a sk_buff on success, NULL on error.
597 *
598 */
599struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
600{
601 struct sk_buff *skb = NULL;
602 struct cipso_v4_doi *iter;
603 u32 tag_cnt = 0;
604 u32 lvl_cnt = 0;
605 u32 cat_cnt = 0;
606 ssize_t buf_len;
607 ssize_t tmp;
608
609 rcu_read_lock();
610 iter = cipso_v4_doi_getdef(doi);
611 if (iter == NULL)
612 goto doi_dump_failure;
613 buf_len = NETLBL_LEN_U32;
614 switch (iter->type) {
615 case CIPSO_V4_MAP_PASS:
616 buf_len += NETLBL_LEN_U32;
617 while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
618 iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
619 tag_cnt += 1;
620 buf_len += NETLBL_LEN_U8;
621 }
622 break;
623 case CIPSO_V4_MAP_STD:
624 buf_len += 3 * NETLBL_LEN_U32;
625 while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
626 iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
627 tag_cnt += 1;
628 buf_len += NETLBL_LEN_U8;
629 }
630 for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
631 if (iter->map.std->lvl.local[tmp] !=
632 CIPSO_V4_INV_LVL) {
633 lvl_cnt += 1;
634 buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
635 }
636 for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
637 if (iter->map.std->cat.local[tmp] !=
638 CIPSO_V4_INV_CAT) {
639 cat_cnt += 1;
640 buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
641 } 562 }
642 break;
643 }
644
645 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
646 if (skb == NULL)
647 goto doi_dump_failure;
648
649 if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
650 goto doi_dump_failure;
651 buf_len -= NETLBL_LEN_U32;
652 if (iter != cipso_v4_doi_getdef(doi))
653 goto doi_dump_failure;
654 switch (iter->type) {
655 case CIPSO_V4_MAP_PASS:
656 if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
657 goto doi_dump_failure;
658 buf_len -= NETLBL_LEN_U32;
659 for (tmp = 0;
660 tmp < CIPSO_V4_TAG_MAXCNT &&
661 iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
662 tmp++) {
663 if (buf_len < NETLBL_LEN_U8)
664 goto doi_dump_failure;
665 if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
666 goto doi_dump_failure;
667 buf_len -= NETLBL_LEN_U8;
668 } 563 }
669 break;
670 case CIPSO_V4_MAP_STD:
671 if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
672 goto doi_dump_failure;
673 if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
674 goto doi_dump_failure;
675 if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
676 goto doi_dump_failure;
677 buf_len -= 3 * NETLBL_LEN_U32;
678 for (tmp = 0;
679 tmp < CIPSO_V4_TAG_MAXCNT &&
680 iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
681 tmp++) {
682 if (buf_len < NETLBL_LEN_U8)
683 goto doi_dump_failure;
684 if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
685 goto doi_dump_failure;
686 buf_len -= NETLBL_LEN_U8;
687 }
688 for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
689 if (iter->map.std->lvl.local[tmp] !=
690 CIPSO_V4_INV_LVL) {
691 if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
692 goto doi_dump_failure;
693 if (nla_put_u32(skb, NLA_U32, tmp) != 0)
694 goto doi_dump_failure;
695 if (nla_put_u8(skb,
696 NLA_U8,
697 iter->map.std->lvl.local[tmp]) != 0)
698 goto doi_dump_failure;
699 buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
700 }
701 for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
702 if (iter->map.std->cat.local[tmp] !=
703 CIPSO_V4_INV_CAT) {
704 if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
705 goto doi_dump_failure;
706 if (nla_put_u32(skb, NLA_U32, tmp) != 0)
707 goto doi_dump_failure;
708 if (nla_put_u16(skb,
709 NLA_U16,
710 iter->map.std->cat.local[tmp]) != 0)
711 goto doi_dump_failure;
712 buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
713 }
714 break;
715 }
716 rcu_read_unlock();
717
718 return skb;
719 564
720doi_dump_failure: 565doi_walk_return:
721 rcu_read_unlock(); 566 rcu_read_unlock();
722 kfree(skb); 567 *skip_cnt = doi_cnt;
723 return NULL; 568 return ret_val;
724} 569}
725 570
726/** 571/**
@@ -1486,43 +1331,40 @@ socket_setattr_failure:
1486} 1331}
1487 1332
1488/** 1333/**
1489 * cipso_v4_socket_getattr - Get the security attributes from a socket 1334 * cipso_v4_sock_getattr - Get the security attributes from a sock
1490 * @sock: the socket 1335 * @sk: the sock
1491 * @secattr: the security attributes 1336 * @secattr: the security attributes
1492 * 1337 *
1493 * Description: 1338 * Description:
1494 * Query @sock to see if there is a CIPSO option attached to the socket and if 1339 * Query @sk to see if there is a CIPSO option attached to the sock and if
1495 * there is return the CIPSO security attributes in @secattr. Returns zero on 1340 * there is return the CIPSO security attributes in @secattr. This function
1496 * success and negative values on failure. 1341 * requires that @sk be locked, or privately held, but it does not do any
1342 * locking itself. Returns zero on success and negative values on failure.
1497 * 1343 *
1498 */ 1344 */
1499int cipso_v4_socket_getattr(const struct socket *sock, 1345int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
1500 struct netlbl_lsm_secattr *secattr)
1501{ 1346{
1502 int ret_val = -ENOMSG; 1347 int ret_val = -ENOMSG;
1503 struct sock *sk;
1504 struct inet_sock *sk_inet; 1348 struct inet_sock *sk_inet;
1505 unsigned char *cipso_ptr; 1349 unsigned char *cipso_ptr;
1506 u32 doi; 1350 u32 doi;
1507 struct cipso_v4_doi *doi_def; 1351 struct cipso_v4_doi *doi_def;
1508 1352
1509 sk = sock->sk;
1510 lock_sock(sk);
1511 sk_inet = inet_sk(sk); 1353 sk_inet = inet_sk(sk);
1512 if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) 1354 if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
1513 goto socket_getattr_return; 1355 return -ENOMSG;
1514 cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - 1356 cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
1515 sizeof(struct iphdr); 1357 sizeof(struct iphdr);
1516 ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); 1358 ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
1517 if (ret_val == 0) 1359 if (ret_val == 0)
1518 goto socket_getattr_return; 1360 return ret_val;
1519 1361
1520 doi = ntohl(*(u32 *)&cipso_ptr[2]); 1362 doi = ntohl(*(u32 *)&cipso_ptr[2]);
1521 rcu_read_lock(); 1363 rcu_read_lock();
1522 doi_def = cipso_v4_doi_getdef(doi); 1364 doi_def = cipso_v4_doi_getdef(doi);
1523 if (doi_def == NULL) { 1365 if (doi_def == NULL) {
1524 rcu_read_unlock(); 1366 rcu_read_unlock();
1525 goto socket_getattr_return; 1367 return -ENOMSG;
1526 } 1368 }
1527 switch (cipso_ptr[6]) { 1369 switch (cipso_ptr[6]) {
1528 case CIPSO_V4_TAG_RBITMAP: 1370 case CIPSO_V4_TAG_RBITMAP:
@@ -1533,8 +1375,29 @@ int cipso_v4_socket_getattr(const struct socket *sock,
1533 } 1375 }
1534 rcu_read_unlock(); 1376 rcu_read_unlock();
1535 1377
1536socket_getattr_return: 1378 return ret_val;
1537 release_sock(sk); 1379}
1380
1381/**
1382 * cipso_v4_socket_getattr - Get the security attributes from a socket
1383 * @sock: the socket
1384 * @secattr: the security attributes
1385 *
1386 * Description:
1387 * Query @sock to see if there is a CIPSO option attached to the socket and if
1388 * there is return the CIPSO security attributes in @secattr. Returns zero on
1389 * success and negative values on failure.
1390 *
1391 */
1392int cipso_v4_socket_getattr(const struct socket *sock,
1393 struct netlbl_lsm_secattr *secattr)
1394{
1395 int ret_val;
1396
1397 lock_sock(sock->sk);
1398 ret_val = cipso_v4_sock_getattr(sock->sk, secattr);
1399 release_sock(sock->sk);
1400
1538 return ret_val; 1401 return ret_val;
1539} 1402}
1540 1403
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 19b2071ff319..e82a5be894b5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -129,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
129 return ret; 129 return ret;
130} 130}
131 131
132static int __init tcp_congestion_default(void)
133{
134 return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
135}
136
137late_initcall(tcp_congestion_default);
132 138
133ctl_table ipv4_table[] = { 139ctl_table ipv4_table[] = {
134 { 140 {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 7ff2e4273a7c..af0aca1e6be6 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -48,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
48 printk(KERN_NOTICE "TCP %s already registered\n", ca->name); 48 printk(KERN_NOTICE "TCP %s already registered\n", ca->name);
49 ret = -EEXIST; 49 ret = -EEXIST;
50 } else { 50 } else {
51 list_add_rcu(&ca->list, &tcp_cong_list); 51 list_add_tail_rcu(&ca->list, &tcp_cong_list);
52 printk(KERN_INFO "TCP %s registered\n", ca->name); 52 printk(KERN_INFO "TCP %s registered\n", ca->name);
53 } 53 }
54 spin_unlock(&tcp_cong_list_lock); 54 spin_unlock(&tcp_cong_list_lock);
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index fe23cb7f1e87..9f7121ae13e9 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig
@@ -9,6 +9,9 @@ config NETLABEL
9 ---help--- 9 ---help---
10 NetLabel provides support for explicit network packet labeling 10 NetLabel provides support for explicit network packet labeling
11 protocols such as CIPSO and RIPSO. For more information see 11 protocols such as CIPSO and RIPSO. For more information see
12 Documentation/netlabel. 12 Documentation/netlabel as well as the NetLabel SourceForge project
13 for configuration tools and additional documentation.
14
15 * http://netlabel.sf.net
13 16
14 If you are unsure, say N. 17 If you are unsure, say N.
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index a4f40adc447b..4125a55f469f 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -41,15 +41,37 @@
41#include "netlabel_user.h" 41#include "netlabel_user.h"
42#include "netlabel_cipso_v4.h" 42#include "netlabel_cipso_v4.h"
43 43
44/* Argument struct for cipso_v4_doi_walk() */
45struct netlbl_cipsov4_doiwalk_arg {
46 struct netlink_callback *nl_cb;
47 struct sk_buff *skb;
48 u32 seq;
49};
50
44/* NetLabel Generic NETLINK CIPSOv4 family */ 51/* NetLabel Generic NETLINK CIPSOv4 family */
45static struct genl_family netlbl_cipsov4_gnl_family = { 52static struct genl_family netlbl_cipsov4_gnl_family = {
46 .id = GENL_ID_GENERATE, 53 .id = GENL_ID_GENERATE,
47 .hdrsize = 0, 54 .hdrsize = 0,
48 .name = NETLBL_NLTYPE_CIPSOV4_NAME, 55 .name = NETLBL_NLTYPE_CIPSOV4_NAME,
49 .version = NETLBL_PROTO_VERSION, 56 .version = NETLBL_PROTO_VERSION,
50 .maxattr = 0, 57 .maxattr = NLBL_CIPSOV4_A_MAX,
51}; 58};
52 59
60/* NetLabel Netlink attribute policy */
61static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
62 [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
63 [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 },
64 [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 },
65 [NLBL_CIPSOV4_A_TAGLST] = { .type = NLA_NESTED },
66 [NLBL_CIPSOV4_A_MLSLVLLOC] = { .type = NLA_U32 },
67 [NLBL_CIPSOV4_A_MLSLVLREM] = { .type = NLA_U32 },
68 [NLBL_CIPSOV4_A_MLSLVL] = { .type = NLA_NESTED },
69 [NLBL_CIPSOV4_A_MLSLVLLST] = { .type = NLA_NESTED },
70 [NLBL_CIPSOV4_A_MLSCATLOC] = { .type = NLA_U32 },
71 [NLBL_CIPSOV4_A_MLSCATREM] = { .type = NLA_U32 },
72 [NLBL_CIPSOV4_A_MLSCAT] = { .type = NLA_NESTED },
73 [NLBL_CIPSOV4_A_MLSCATLST] = { .type = NLA_NESTED },
74};
53 75
54/* 76/*
55 * Helper Functions 77 * Helper Functions
@@ -81,6 +103,41 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
81 kfree(ptr); 103 kfree(ptr);
82} 104}
83 105
106/**
107 * netlbl_cipsov4_add_common - Parse the common sections of a ADD message
108 * @info: the Generic NETLINK info block
109 * @doi_def: the CIPSO V4 DOI definition
110 *
111 * Description:
112 * Parse the common sections of a ADD message and fill in the related values
113 * in @doi_def. Returns zero on success, negative values on failure.
114 *
115 */
116static int netlbl_cipsov4_add_common(struct genl_info *info,
117 struct cipso_v4_doi *doi_def)
118{
119 struct nlattr *nla;
120 int nla_rem;
121 u32 iter = 0;
122
123 doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
124
125 if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST],
126 NLBL_CIPSOV4_A_MAX,
127 netlbl_cipsov4_genl_policy) != 0)
128 return -EINVAL;
129
130 nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem)
131 if (nla->nla_type == NLBL_CIPSOV4_A_TAG) {
132 if (iter > CIPSO_V4_TAG_MAXCNT)
133 return -EINVAL;
134 doi_def->tags[iter++] = nla_get_u8(nla);
135 }
136 if (iter < CIPSO_V4_TAG_MAXCNT)
137 doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
138
139 return 0;
140}
84 141
85/* 142/*
86 * NetLabel Command Handlers 143 * NetLabel Command Handlers
@@ -88,9 +145,7 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
88 145
89/** 146/**
90 * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition 147 * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition
91 * @doi: the DOI value 148 * @info: the Generic NETLINK info block
92 * @msg: the ADD message data
93 * @msg_size: the size of the ADD message buffer
94 * 149 *
95 * Description: 150 * Description:
96 * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message 151 * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
@@ -98,29 +153,28 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
98 * error. 153 * error.
99 * 154 *
100 */ 155 */
101static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size) 156static int netlbl_cipsov4_add_std(struct genl_info *info)
102{ 157{
103 int ret_val = -EINVAL; 158 int ret_val = -EINVAL;
104 int msg_len = msg_size;
105 u32 num_tags;
106 u32 num_lvls;
107 u32 num_cats;
108 struct cipso_v4_doi *doi_def = NULL; 159 struct cipso_v4_doi *doi_def = NULL;
109 u32 iter; 160 struct nlattr *nla_a;
110 u32 tmp_val_a; 161 struct nlattr *nla_b;
111 u32 tmp_val_b; 162 int nla_a_rem;
163 int nla_b_rem;
112 164
113 if (msg_len < NETLBL_LEN_U32) 165 if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
114 goto add_std_failure; 166 !info->attrs[NLBL_CIPSOV4_A_TAGLST] ||
115 num_tags = netlbl_getinc_u32(&msg, &msg_len); 167 !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST])
116 if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) 168 return -EINVAL;
117 goto add_std_failure; 169
170 if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
171 NLBL_CIPSOV4_A_MAX,
172 netlbl_cipsov4_genl_policy) != 0)
173 return -EINVAL;
118 174
119 doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); 175 doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
120 if (doi_def == NULL) { 176 if (doi_def == NULL)
121 ret_val = -ENOMEM; 177 return -ENOMEM;
122 goto add_std_failure;
123 }
124 doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); 178 doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
125 if (doi_def->map.std == NULL) { 179 if (doi_def->map.std == NULL) {
126 ret_val = -ENOMEM; 180 ret_val = -ENOMEM;
@@ -128,28 +182,32 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
128 } 182 }
129 doi_def->type = CIPSO_V4_MAP_STD; 183 doi_def->type = CIPSO_V4_MAP_STD;
130 184
131 for (iter = 0; iter < num_tags; iter++) { 185 ret_val = netlbl_cipsov4_add_common(info, doi_def);
132 if (msg_len < NETLBL_LEN_U8) 186 if (ret_val != 0)
133 goto add_std_failure;
134 doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
135 switch (doi_def->tags[iter]) {
136 case CIPSO_V4_TAG_RBITMAP:
137 break;
138 default:
139 goto add_std_failure;
140 }
141 }
142 if (iter < CIPSO_V4_TAG_MAXCNT)
143 doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
144
145 if (msg_len < 6 * NETLBL_LEN_U32)
146 goto add_std_failure; 187 goto add_std_failure;
147 188
148 num_lvls = netlbl_getinc_u32(&msg, &msg_len); 189 nla_for_each_nested(nla_a,
149 if (num_lvls == 0) 190 info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
150 goto add_std_failure; 191 nla_a_rem)
151 doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len); 192 if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) {
152 if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS) 193 nla_for_each_nested(nla_b, nla_a, nla_b_rem)
194 switch (nla_b->nla_type) {
195 case NLBL_CIPSOV4_A_MLSLVLLOC:
196 if (nla_get_u32(nla_b) >=
197 doi_def->map.std->lvl.local_size)
198 doi_def->map.std->lvl.local_size =
199 nla_get_u32(nla_b) + 1;
200 break;
201 case NLBL_CIPSOV4_A_MLSLVLREM:
202 if (nla_get_u32(nla_b) >=
203 doi_def->map.std->lvl.cipso_size)
204 doi_def->map.std->lvl.cipso_size =
205 nla_get_u32(nla_b) + 1;
206 break;
207 }
208 }
209 if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS ||
210 doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
153 goto add_std_failure; 211 goto add_std_failure;
154 doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, 212 doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
155 sizeof(u32), 213 sizeof(u32),
@@ -158,9 +216,6 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
158 ret_val = -ENOMEM; 216 ret_val = -ENOMEM;
159 goto add_std_failure; 217 goto add_std_failure;
160 } 218 }
161 doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len);
162 if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
163 goto add_std_failure;
164 doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size, 219 doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
165 sizeof(u32), 220 sizeof(u32),
166 GFP_KERNEL); 221 GFP_KERNEL);
@@ -168,68 +223,101 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
168 ret_val = -ENOMEM; 223 ret_val = -ENOMEM;
169 goto add_std_failure; 224 goto add_std_failure;
170 } 225 }
226 nla_for_each_nested(nla_a,
227 info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
228 nla_a_rem)
229 if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) {
230 struct nlattr *lvl_loc;
231 struct nlattr *lvl_rem;
232
233 if (nla_validate_nested(nla_a,
234 NLBL_CIPSOV4_A_MAX,
235 netlbl_cipsov4_genl_policy) != 0)
236 goto add_std_failure;
237
238 lvl_loc = nla_find_nested(nla_a,
239 NLBL_CIPSOV4_A_MLSLVLLOC);
240 lvl_rem = nla_find_nested(nla_a,
241 NLBL_CIPSOV4_A_MLSLVLREM);
242 if (lvl_loc == NULL || lvl_rem == NULL)
243 goto add_std_failure;
244 doi_def->map.std->lvl.local[nla_get_u32(lvl_loc)] =
245 nla_get_u32(lvl_rem);
246 doi_def->map.std->lvl.cipso[nla_get_u32(lvl_rem)] =
247 nla_get_u32(lvl_loc);
248 }
171 249
172 num_cats = netlbl_getinc_u32(&msg, &msg_len); 250 if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) {
173 doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len); 251 if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
174 if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS) 252 NLBL_CIPSOV4_A_MAX,
175 goto add_std_failure; 253 netlbl_cipsov4_genl_policy) != 0)
176 doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size, 254 goto add_std_failure;
255
256 nla_for_each_nested(nla_a,
257 info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
258 nla_a_rem)
259 if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) {
260 if (nla_validate_nested(nla_a,
261 NLBL_CIPSOV4_A_MAX,
262 netlbl_cipsov4_genl_policy) != 0)
263 goto add_std_failure;
264 nla_for_each_nested(nla_b, nla_a, nla_b_rem)
265 switch (nla_b->nla_type) {
266 case NLBL_CIPSOV4_A_MLSCATLOC:
267 if (nla_get_u32(nla_b) >=
268 doi_def->map.std->cat.local_size)
269 doi_def->map.std->cat.local_size =
270 nla_get_u32(nla_b) + 1;
271 break;
272 case NLBL_CIPSOV4_A_MLSCATREM:
273 if (nla_get_u32(nla_b) >=
274 doi_def->map.std->cat.cipso_size)
275 doi_def->map.std->cat.cipso_size =
276 nla_get_u32(nla_b) + 1;
277 break;
278 }
279 }
280 if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS ||
281 doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
282 goto add_std_failure;
283 doi_def->map.std->cat.local = kcalloc(
284 doi_def->map.std->cat.local_size,
177 sizeof(u32), 285 sizeof(u32),
178 GFP_KERNEL); 286 GFP_KERNEL);
179 if (doi_def->map.std->cat.local == NULL) { 287 if (doi_def->map.std->cat.local == NULL) {
180 ret_val = -ENOMEM; 288 ret_val = -ENOMEM;
181 goto add_std_failure; 289 goto add_std_failure;
182 } 290 }
183 doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len); 291 doi_def->map.std->cat.cipso = kcalloc(
184 if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS) 292 doi_def->map.std->cat.cipso_size,
185 goto add_std_failure;
186 doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size,
187 sizeof(u32), 293 sizeof(u32),
188 GFP_KERNEL); 294 GFP_KERNEL);
189 if (doi_def->map.std->cat.cipso == NULL) { 295 if (doi_def->map.std->cat.cipso == NULL) {
190 ret_val = -ENOMEM; 296 ret_val = -ENOMEM;
191 goto add_std_failure;
192 }
193
194 if (msg_len <
195 num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) +
196 num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16))
197 goto add_std_failure;
198
199 for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++)
200 doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL;
201 for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++)
202 doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL;
203 for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++)
204 doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT;
205 for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++)
206 doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT;
207
208 for (iter = 0; iter < num_lvls; iter++) {
209 tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
210 tmp_val_b = netlbl_getinc_u8(&msg, &msg_len);
211
212 if (tmp_val_a >= doi_def->map.std->lvl.local_size ||
213 tmp_val_b >= doi_def->map.std->lvl.cipso_size)
214 goto add_std_failure;
215
216 doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a;
217 doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b;
218 }
219
220 for (iter = 0; iter < num_cats; iter++) {
221 tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
222 tmp_val_b = netlbl_getinc_u16(&msg, &msg_len);
223
224 if (tmp_val_a >= doi_def->map.std->cat.local_size ||
225 tmp_val_b >= doi_def->map.std->cat.cipso_size)
226 goto add_std_failure; 297 goto add_std_failure;
227 298 }
228 doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a; 299 nla_for_each_nested(nla_a,
229 doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b; 300 info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
301 nla_a_rem)
302 if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) {
303 struct nlattr *cat_loc;
304 struct nlattr *cat_rem;
305
306 cat_loc = nla_find_nested(nla_a,
307 NLBL_CIPSOV4_A_MLSCATLOC);
308 cat_rem = nla_find_nested(nla_a,
309 NLBL_CIPSOV4_A_MLSCATREM);
310 if (cat_loc == NULL || cat_rem == NULL)
311 goto add_std_failure;
312 doi_def->map.std->cat.local[
313 nla_get_u32(cat_loc)] =
314 nla_get_u32(cat_rem);
315 doi_def->map.std->cat.cipso[
316 nla_get_u32(cat_rem)] =
317 nla_get_u32(cat_loc);
318 }
230 } 319 }
231 320
232 doi_def->doi = doi;
233 ret_val = cipso_v4_doi_add(doi_def); 321 ret_val = cipso_v4_doi_add(doi_def);
234 if (ret_val != 0) 322 if (ret_val != 0)
235 goto add_std_failure; 323 goto add_std_failure;
@@ -243,9 +331,7 @@ add_std_failure:
243 331
244/** 332/**
245 * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition 333 * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition
246 * @doi: the DOI value 334 * @info: the Generic NETLINK info block
247 * @msg: the ADD message data
248 * @msg_size: the size of the ADD message buffer
249 * 335 *
250 * Description: 336 * Description:
251 * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message 337 * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message
@@ -253,52 +339,31 @@ add_std_failure:
253 * error. 339 * error.
254 * 340 *
255 */ 341 */
256static int netlbl_cipsov4_add_pass(u32 doi, 342static int netlbl_cipsov4_add_pass(struct genl_info *info)
257 struct nlattr *msg,
258 size_t msg_size)
259{ 343{
260 int ret_val = -EINVAL; 344 int ret_val;
261 int msg_len = msg_size;
262 u32 num_tags;
263 struct cipso_v4_doi *doi_def = NULL; 345 struct cipso_v4_doi *doi_def = NULL;
264 u32 iter;
265 346
266 if (msg_len < NETLBL_LEN_U32) 347 if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
267 goto add_pass_failure; 348 !info->attrs[NLBL_CIPSOV4_A_TAGLST])
268 num_tags = netlbl_getinc_u32(&msg, &msg_len); 349 return -EINVAL;
269 if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
270 goto add_pass_failure;
271 350
272 doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); 351 doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
273 if (doi_def == NULL) { 352 if (doi_def == NULL)
274 ret_val = -ENOMEM; 353 return -ENOMEM;
275 goto add_pass_failure;
276 }
277 doi_def->type = CIPSO_V4_MAP_PASS; 354 doi_def->type = CIPSO_V4_MAP_PASS;
278 355
279 for (iter = 0; iter < num_tags; iter++) { 356 ret_val = netlbl_cipsov4_add_common(info, doi_def);
280 if (msg_len < NETLBL_LEN_U8) 357 if (ret_val != 0)
281 goto add_pass_failure; 358 goto add_pass_failure;
282 doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
283 switch (doi_def->tags[iter]) {
284 case CIPSO_V4_TAG_RBITMAP:
285 break;
286 default:
287 goto add_pass_failure;
288 }
289 }
290 if (iter < CIPSO_V4_TAG_MAXCNT)
291 doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
292 359
293 doi_def->doi = doi;
294 ret_val = cipso_v4_doi_add(doi_def); 360 ret_val = cipso_v4_doi_add(doi_def);
295 if (ret_val != 0) 361 if (ret_val != 0)
296 goto add_pass_failure; 362 goto add_pass_failure;
297 return 0; 363 return 0;
298 364
299add_pass_failure: 365add_pass_failure:
300 if (doi_def) 366 netlbl_cipsov4_doi_free(&doi_def->rcu);
301 netlbl_cipsov4_doi_free(&doi_def->rcu);
302 return ret_val; 367 return ret_val;
303} 368}
304 369
@@ -316,34 +381,21 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
316 381
317{ 382{
318 int ret_val = -EINVAL; 383 int ret_val = -EINVAL;
319 u32 doi;
320 u32 map_type; 384 u32 map_type;
321 int msg_len = netlbl_netlink_payload_len(skb);
322 struct nlattr *msg = netlbl_netlink_payload_data(skb);
323
324 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
325 if (ret_val != 0)
326 goto add_return;
327 385
328 if (msg_len < 2 * NETLBL_LEN_U32) 386 if (!info->attrs[NLBL_CIPSOV4_A_MTYPE])
329 goto add_return; 387 return -EINVAL;
330 388
331 doi = netlbl_getinc_u32(&msg, &msg_len); 389 map_type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
332 map_type = netlbl_getinc_u32(&msg, &msg_len);
333 switch (map_type) { 390 switch (map_type) {
334 case CIPSO_V4_MAP_STD: 391 case CIPSO_V4_MAP_STD:
335 ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len); 392 ret_val = netlbl_cipsov4_add_std(info);
336 break; 393 break;
337 case CIPSO_V4_MAP_PASS: 394 case CIPSO_V4_MAP_PASS:
338 ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len); 395 ret_val = netlbl_cipsov4_add_pass(info);
339 break; 396 break;
340 } 397 }
341 398
342add_return:
343 netlbl_netlink_send_ack(info,
344 netlbl_cipsov4_gnl_family.id,
345 NLBL_CIPSOV4_C_ACK,
346 -ret_val);
347 return ret_val; 399 return ret_val;
348} 400}
349 401
@@ -353,84 +405,239 @@ add_return:
353 * @info: the Generic NETLINK info block 405 * @info: the Generic NETLINK info block
354 * 406 *
355 * Description: 407 * Description:
356 * Process a user generated LIST message and respond accordingly. Returns 408 * Process a user generated LIST message and respond accordingly. While the
357 * zero on success and negative values on error. 409 * response message generated by the kernel is straightforward, determining
410 * before hand the size of the buffer to allocate is not (we have to generate
411 * the message to know the size). In order to keep this function sane what we
412 * do is allocate a buffer of NLMSG_GOODSIZE and try to fit the response in
413 * that size, if we fail then we restart with a larger buffer and try again.
414 * We continue in this manner until we hit a limit of failed attempts then we
415 * give up and just send an error message. Returns zero on success and
416 * negative values on error.
358 * 417 *
359 */ 418 */
360static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) 419static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
361{ 420{
362 int ret_val = -EINVAL; 421 int ret_val;
422 struct sk_buff *ans_skb = NULL;
423 u32 nlsze_mult = 1;
424 void *data;
363 u32 doi; 425 u32 doi;
364 struct nlattr *msg = netlbl_netlink_payload_data(skb); 426 struct nlattr *nla_a;
365 struct sk_buff *ans_skb; 427 struct nlattr *nla_b;
428 struct cipso_v4_doi *doi_def;
429 u32 iter;
366 430
367 if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) 431 if (!info->attrs[NLBL_CIPSOV4_A_DOI]) {
432 ret_val = -EINVAL;
368 goto list_failure; 433 goto list_failure;
434 }
369 435
370 doi = nla_get_u32(msg); 436list_start:
371 ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN)); 437 ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL);
372 if (ans_skb == NULL) { 438 if (ans_skb == NULL) {
373 ret_val = -ENOMEM; 439 ret_val = -ENOMEM;
374 goto list_failure; 440 goto list_failure;
375 } 441 }
376 netlbl_netlink_hdr_push(ans_skb, 442 data = netlbl_netlink_hdr_put(ans_skb,
377 info->snd_pid, 443 info->snd_pid,
378 0, 444 info->snd_seq,
379 netlbl_cipsov4_gnl_family.id, 445 netlbl_cipsov4_gnl_family.id,
380 NLBL_CIPSOV4_C_LIST); 446 0,
447 NLBL_CIPSOV4_C_LIST);
448 if (data == NULL) {
449 ret_val = -ENOMEM;
450 goto list_failure;
451 }
452
453 doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
454
455 rcu_read_lock();
456 doi_def = cipso_v4_doi_getdef(doi);
457 if (doi_def == NULL) {
458 ret_val = -EINVAL;
459 goto list_failure;
460 }
461
462 ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type);
463 if (ret_val != 0)
464 goto list_failure_lock;
465
466 nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST);
467 if (nla_a == NULL) {
468 ret_val = -ENOMEM;
469 goto list_failure_lock;
470 }
471 for (iter = 0;
472 iter < CIPSO_V4_TAG_MAXCNT &&
473 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID;
474 iter++) {
475 ret_val = nla_put_u8(ans_skb,
476 NLBL_CIPSOV4_A_TAG,
477 doi_def->tags[iter]);
478 if (ret_val != 0)
479 goto list_failure_lock;
480 }
481 nla_nest_end(ans_skb, nla_a);
482
483 switch (doi_def->type) {
484 case CIPSO_V4_MAP_STD:
485 nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
486 if (nla_a == NULL) {
487 ret_val = -ENOMEM;
488 goto list_failure_lock;
489 }
490 for (iter = 0;
491 iter < doi_def->map.std->lvl.local_size;
492 iter++) {
493 if (doi_def->map.std->lvl.local[iter] ==
494 CIPSO_V4_INV_LVL)
495 continue;
496
497 nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL);
498 if (nla_b == NULL) {
499 ret_val = -ENOMEM;
500 goto list_retry;
501 }
502 ret_val = nla_put_u32(ans_skb,
503 NLBL_CIPSOV4_A_MLSLVLLOC,
504 iter);
505 if (ret_val != 0)
506 goto list_retry;
507 ret_val = nla_put_u32(ans_skb,
508 NLBL_CIPSOV4_A_MLSLVLREM,
509 doi_def->map.std->lvl.local[iter]);
510 if (ret_val != 0)
511 goto list_retry;
512 nla_nest_end(ans_skb, nla_b);
513 }
514 nla_nest_end(ans_skb, nla_a);
515
516 nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST);
517 if (nla_a == NULL) {
518 ret_val = -ENOMEM;
519 goto list_retry;
520 }
521 for (iter = 0;
522 iter < doi_def->map.std->cat.local_size;
523 iter++) {
524 if (doi_def->map.std->cat.local[iter] ==
525 CIPSO_V4_INV_CAT)
526 continue;
527
528 nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT);
529 if (nla_b == NULL) {
530 ret_val = -ENOMEM;
531 goto list_retry;
532 }
533 ret_val = nla_put_u32(ans_skb,
534 NLBL_CIPSOV4_A_MLSCATLOC,
535 iter);
536 if (ret_val != 0)
537 goto list_retry;
538 ret_val = nla_put_u32(ans_skb,
539 NLBL_CIPSOV4_A_MLSCATREM,
540 doi_def->map.std->cat.local[iter]);
541 if (ret_val != 0)
542 goto list_retry;
543 nla_nest_end(ans_skb, nla_b);
544 }
545 nla_nest_end(ans_skb, nla_a);
546
547 break;
548 }
549 rcu_read_unlock();
381 550
382 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); 551 genlmsg_end(ans_skb, data);
552
553 ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
383 if (ret_val != 0) 554 if (ret_val != 0)
384 goto list_failure; 555 goto list_failure;
385 556
386 return 0; 557 return 0;
387 558
559list_retry:
560 /* XXX - this limit is a guesstimate */
561 if (nlsze_mult < 4) {
562 rcu_read_unlock();
563 kfree_skb(ans_skb);
564 nlsze_mult++;
565 goto list_start;
566 }
567list_failure_lock:
568 rcu_read_unlock();
388list_failure: 569list_failure:
389 netlbl_netlink_send_ack(info, 570 kfree_skb(ans_skb);
390 netlbl_cipsov4_gnl_family.id, 571 return ret_val;
391 NLBL_CIPSOV4_C_ACK, 572}
392 -ret_val); 573
574/**
575 * netlbl_cipsov4_listall_cb - cipso_v4_doi_walk() callback for LISTALL
576 * @doi_def: the CIPSOv4 DOI definition
577 * @arg: the netlbl_cipsov4_doiwalk_arg structure
578 *
579 * Description:
580 * This function is designed to be used as a callback to the
581 * cipso_v4_doi_walk() function for use in generating a response for a LISTALL
582 * message. Returns the size of the message on success, negative values on
583 * failure.
584 *
585 */
586static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
587{
588 int ret_val = -ENOMEM;
589 struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg;
590 void *data;
591
592 data = netlbl_netlink_hdr_put(cb_arg->skb,
593 NETLINK_CB(cb_arg->nl_cb->skb).pid,
594 cb_arg->seq,
595 netlbl_cipsov4_gnl_family.id,
596 NLM_F_MULTI,
597 NLBL_CIPSOV4_C_LISTALL);
598 if (data == NULL)
599 goto listall_cb_failure;
600
601 ret_val = nla_put_u32(cb_arg->skb, NLBL_CIPSOV4_A_DOI, doi_def->doi);
602 if (ret_val != 0)
603 goto listall_cb_failure;
604 ret_val = nla_put_u32(cb_arg->skb,
605 NLBL_CIPSOV4_A_MTYPE,
606 doi_def->type);
607 if (ret_val != 0)
608 goto listall_cb_failure;
609
610 return genlmsg_end(cb_arg->skb, data);
611
612listall_cb_failure:
613 genlmsg_cancel(cb_arg->skb, data);
393 return ret_val; 614 return ret_val;
394} 615}
395 616
396/** 617/**
397 * netlbl_cipsov4_listall - Handle a LISTALL message 618 * netlbl_cipsov4_listall - Handle a LISTALL message
398 * @skb: the NETLINK buffer 619 * @skb: the NETLINK buffer
399 * @info: the Generic NETLINK info block 620 * @cb: the NETLINK callback
400 * 621 *
401 * Description: 622 * Description:
402 * Process a user generated LISTALL message and respond accordingly. Returns 623 * Process a user generated LISTALL message and respond accordingly. Returns
403 * zero on success and negative values on error. 624 * zero on success and negative values on error.
404 * 625 *
405 */ 626 */
406static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info) 627static int netlbl_cipsov4_listall(struct sk_buff *skb,
628 struct netlink_callback *cb)
407{ 629{
408 int ret_val = -EINVAL; 630 struct netlbl_cipsov4_doiwalk_arg cb_arg;
409 struct sk_buff *ans_skb; 631 int doi_skip = cb->args[0];
410 632
411 ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN)); 633 cb_arg.nl_cb = cb;
412 if (ans_skb == NULL) { 634 cb_arg.skb = skb;
413 ret_val = -ENOMEM; 635 cb_arg.seq = cb->nlh->nlmsg_seq;
414 goto listall_failure;
415 }
416 netlbl_netlink_hdr_push(ans_skb,
417 info->snd_pid,
418 0,
419 netlbl_cipsov4_gnl_family.id,
420 NLBL_CIPSOV4_C_LISTALL);
421 636
422 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); 637 cipso_v4_doi_walk(&doi_skip, netlbl_cipsov4_listall_cb, &cb_arg);
423 if (ret_val != 0)
424 goto listall_failure;
425
426 return 0;
427 638
428listall_failure: 639 cb->args[0] = doi_skip;
429 netlbl_netlink_send_ack(info, 640 return skb->len;
430 netlbl_cipsov4_gnl_family.id,
431 NLBL_CIPSOV4_C_ACK,
432 -ret_val);
433 return ret_val;
434} 641}
435 642
436/** 643/**
@@ -445,27 +652,14 @@ listall_failure:
445 */ 652 */
446static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) 653static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
447{ 654{
448 int ret_val; 655 int ret_val = -EINVAL;
449 u32 doi; 656 u32 doi;
450 struct nlattr *msg = netlbl_netlink_payload_data(skb);
451 657
452 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); 658 if (info->attrs[NLBL_CIPSOV4_A_DOI]) {
453 if (ret_val != 0) 659 doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
454 goto remove_return; 660 ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
455
456 if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) {
457 ret_val = -EINVAL;
458 goto remove_return;
459 } 661 }
460 662
461 doi = nla_get_u32(msg);
462 ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
463
464remove_return:
465 netlbl_netlink_send_ack(info,
466 netlbl_cipsov4_gnl_family.id,
467 NLBL_CIPSOV4_C_ACK,
468 -ret_val);
469 return ret_val; 663 return ret_val;
470} 664}
471 665
@@ -475,14 +669,16 @@ remove_return:
475 669
476static struct genl_ops netlbl_cipsov4_genl_c_add = { 670static struct genl_ops netlbl_cipsov4_genl_c_add = {
477 .cmd = NLBL_CIPSOV4_C_ADD, 671 .cmd = NLBL_CIPSOV4_C_ADD,
478 .flags = 0, 672 .flags = GENL_ADMIN_PERM,
673 .policy = netlbl_cipsov4_genl_policy,
479 .doit = netlbl_cipsov4_add, 674 .doit = netlbl_cipsov4_add,
480 .dumpit = NULL, 675 .dumpit = NULL,
481}; 676};
482 677
483static struct genl_ops netlbl_cipsov4_genl_c_remove = { 678static struct genl_ops netlbl_cipsov4_genl_c_remove = {
484 .cmd = NLBL_CIPSOV4_C_REMOVE, 679 .cmd = NLBL_CIPSOV4_C_REMOVE,
485 .flags = 0, 680 .flags = GENL_ADMIN_PERM,
681 .policy = netlbl_cipsov4_genl_policy,
486 .doit = netlbl_cipsov4_remove, 682 .doit = netlbl_cipsov4_remove,
487 .dumpit = NULL, 683 .dumpit = NULL,
488}; 684};
@@ -490,6 +686,7 @@ static struct genl_ops netlbl_cipsov4_genl_c_remove = {
490static struct genl_ops netlbl_cipsov4_genl_c_list = { 686static struct genl_ops netlbl_cipsov4_genl_c_list = {
491 .cmd = NLBL_CIPSOV4_C_LIST, 687 .cmd = NLBL_CIPSOV4_C_LIST,
492 .flags = 0, 688 .flags = 0,
689 .policy = netlbl_cipsov4_genl_policy,
493 .doit = netlbl_cipsov4_list, 690 .doit = netlbl_cipsov4_list,
494 .dumpit = NULL, 691 .dumpit = NULL,
495}; 692};
@@ -497,8 +694,9 @@ static struct genl_ops netlbl_cipsov4_genl_c_list = {
497static struct genl_ops netlbl_cipsov4_genl_c_listall = { 694static struct genl_ops netlbl_cipsov4_genl_c_listall = {
498 .cmd = NLBL_CIPSOV4_C_LISTALL, 695 .cmd = NLBL_CIPSOV4_C_LISTALL,
499 .flags = 0, 696 .flags = 0,
500 .doit = netlbl_cipsov4_listall, 697 .policy = netlbl_cipsov4_genl_policy,
501 .dumpit = NULL, 698 .doit = NULL,
699 .dumpit = netlbl_cipsov4_listall,
502}; 700};
503 701
504/* 702/*
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 4c6ff4b93004..f03cf9b78286 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -34,175 +34,71 @@
34#include <net/netlabel.h> 34#include <net/netlabel.h>
35 35
36/* 36/*
37 * The following NetLabel payloads are supported by the CIPSO subsystem, all 37 * The following NetLabel payloads are supported by the CIPSO subsystem.
38 * of which are preceeded by the nlmsghdr struct.
39 * 38 *
40 * o ACK: 39 * o ADD:
41 * Sent by the kernel in response to an applications message, applications 40 * Sent by an application to add a new DOI mapping table.
42 * should never send this message.
43 * 41 *
44 * +----------------------+-----------------------+ 42 * Required attributes:
45 * | seq number (32 bits) | return code (32 bits) |
46 * +----------------------+-----------------------+
47 * 43 *
48 * seq number: the sequence number of the original message, taken from the 44 * NLBL_CIPSOV4_A_DOI
49 * nlmsghdr structure 45 * NLBL_CIPSOV4_A_MTYPE
50 * return code: return value, based on errno values 46 * NLBL_CIPSOV4_A_TAGLST
51 * 47 *
52 * o ADD: 48 * If using CIPSO_V4_MAP_STD the following attributes are required:
53 * Sent by an application to add a new DOI mapping table, after completion 49 *
54 * of the task the kernel should ACK this message. 50 * NLBL_CIPSOV4_A_MLSLVLLST
55 * 51 * NLBL_CIPSOV4_A_MLSCATLST
56 * +---------------+--------------------+---------------------+ 52 *
57 * | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ... 53 * If using CIPSO_V4_MAP_PASS no additional attributes are required.
58 * +---------------+--------------------+---------------------+
59 *
60 * +-----------------+
61 * | tag #X (8 bits) | ... repeated
62 * +-----------------+
63 *
64 * +-------------- ---- --- -- -
65 * | mapping data
66 * +-------------- ---- --- -- -
67 *
68 * DOI: the DOI value
69 * map type: the mapping table type (defined in the cipso_ipv4.h header
70 * as CIPSO_V4_MAP_*)
71 * tag count: the number of tags, must be greater than zero
72 * tag: the CIPSO tag for the DOI, tags listed first are given
73 * higher priorirty when sending packets
74 * mapping data: specific to the map type (see below)
75 *
76 * CIPSO_V4_MAP_STD
77 *
78 * +------------------+-----------------------+----------------------+
79 * | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ...
80 * +------------------+-----------------------+----------------------+
81 *
82 * +----------------------+---------------------+---------------------+
83 * | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ...
84 * +----------------------+---------------------+---------------------+
85 *
86 * +--------------------------+-------------------------+
87 * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
88 * +--------------------------+-------------------------+
89 *
90 * +-----------------------------+-----------------------------+
91 * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
92 * +-----------------------------+-----------------------------+
93 *
94 * levels: the number of level mappings
95 * max l level: the highest local level
96 * max r level: the highest remote/CIPSO level
97 * categories: the number of category mappings
98 * max l cat: the highest local category
99 * max r cat: the highest remote/CIPSO category
100 * local level: the local part of a level mapping
101 * CIPSO level: the remote/CIPSO part of a level mapping
102 * local category: the local part of a category mapping
103 * CIPSO category: the remote/CIPSO part of a category mapping
104 *
105 * CIPSO_V4_MAP_PASS
106 *
107 * No mapping data is needed for this map type.
108 * 54 *
109 * o REMOVE: 55 * o REMOVE:
110 * Sent by an application to remove a specific DOI mapping table from the 56 * Sent by an application to remove a specific DOI mapping table from the
111 * CIPSO V4 system. The kernel should ACK this message. 57 * CIPSO V4 system.
112 * 58 *
113 * +---------------+ 59 * Required attributes:
114 * | DOI (32 bits) |
115 * +---------------+
116 * 60 *
117 * DOI: the DOI value 61 * NLBL_CIPSOV4_A_DOI
118 * 62 *
119 * o LIST: 63 * o LIST:
120 * Sent by an application to list the details of a DOI definition. The 64 * Sent by an application to list the details of a DOI definition. On
121 * kernel should send an ACK on error or a response as indicated below. The 65 * success the kernel should send a response using the following format.
122 * application generated message format is shown below.
123 * 66 *
124 * +---------------+ 67 * Required attributes:
125 * | DOI (32 bits) |
126 * +---------------+
127 * 68 *
128 * DOI: the DOI value 69 * NLBL_CIPSOV4_A_DOI
129 * 70 *
130 * The valid response message format depends on the type of the DOI mapping, 71 * The valid response message format depends on the type of the DOI mapping,
131 * the known formats are shown below. 72 * the defined formats are shown below.
132 *
133 * +--------------------+
134 * | map type (32 bits) | ...
135 * +--------------------+
136 *
137 * map type: the DOI mapping table type (defined in the cipso_ipv4.h
138 * header as CIPSO_V4_MAP_*)
139 *
140 * (map type == CIPSO_V4_MAP_STD)
141 *
142 * +----------------+------------------+----------------------+
143 * | tags (32 bits) | levels (32 bits) | categories (32 bits) | ...
144 * +----------------+------------------+----------------------+
145 * 73 *
146 * +-----------------+ 74 * Required attributes:
147 * | tag #X (8 bits) | ... repeated
148 * +-----------------+
149 * 75 *
150 * +--------------------------+-------------------------+ 76 * NLBL_CIPSOV4_A_MTYPE
151 * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated 77 * NLBL_CIPSOV4_A_TAGLST
152 * +--------------------------+-------------------------+
153 * 78 *
154 * +-----------------------------+-----------------------------+ 79 * If using CIPSO_V4_MAP_STD the following attributes are required:
155 * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
156 * +-----------------------------+-----------------------------+
157 * 80 *
158 * tags: the number of CIPSO tag types 81 * NLBL_CIPSOV4_A_MLSLVLLST
159 * levels: the number of level mappings 82 * NLBL_CIPSOV4_A_MLSCATLST
160 * categories: the number of category mappings
161 * tag: the tag number, tags listed first are given higher
162 * priority when sending packets
163 * local level: the local part of a level mapping
164 * CIPSO level: the remote/CIPSO part of a level mapping
165 * local category: the local part of a category mapping
166 * CIPSO category: the remote/CIPSO part of a category mapping
167 * 83 *
168 * (map type == CIPSO_V4_MAP_PASS) 84 * If using CIPSO_V4_MAP_PASS no additional attributes are required.
169 *
170 * +----------------+
171 * | tags (32 bits) | ...
172 * +----------------+
173 *
174 * +-----------------+
175 * | tag #X (8 bits) | ... repeated
176 * +-----------------+
177 *
178 * tags: the number of CIPSO tag types
179 * tag: the tag number, tags listed first are given higher
180 * priority when sending packets
181 * 85 *
182 * o LISTALL: 86 * o LISTALL:
183 * This message is sent by an application to list the valid DOIs on the 87 * This message is sent by an application to list the valid DOIs on the
184 * system. There is no payload and the kernel should respond with an ACK 88 * system. When sent by an application there is no payload and the
185 * or the following message. 89 * NLM_F_DUMP flag should be set. The kernel should respond with a series of
186 * 90 * the following messages.
187 * +---------------------+------------------+-----------------------+
188 * | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) |
189 * +---------------------+------------------+-----------------------+
190 * 91 *
191 * +-----------------------+ 92 * Required attributes:
192 * | map type #X (32 bits) | ...
193 * +-----------------------+
194 * 93 *
195 * DOI count: the number of DOIs 94 * NLBL_CIPSOV4_A_DOI
196 * DOI: the DOI value 95 * NLBL_CIPSOV4_A_MTYPE
197 * map type: the DOI mapping table type (defined in the cipso_ipv4.h
198 * header as CIPSO_V4_MAP_*)
199 * 96 *
200 */ 97 */
201 98
202/* NetLabel CIPSOv4 commands */ 99/* NetLabel CIPSOv4 commands */
203enum { 100enum {
204 NLBL_CIPSOV4_C_UNSPEC, 101 NLBL_CIPSOV4_C_UNSPEC,
205 NLBL_CIPSOV4_C_ACK,
206 NLBL_CIPSOV4_C_ADD, 102 NLBL_CIPSOV4_C_ADD,
207 NLBL_CIPSOV4_C_REMOVE, 103 NLBL_CIPSOV4_C_REMOVE,
208 NLBL_CIPSOV4_C_LIST, 104 NLBL_CIPSOV4_C_LIST,
@@ -211,6 +107,59 @@ enum {
211}; 107};
212#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1) 108#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
213 109
110/* NetLabel CIPSOv4 attributes */
111enum {
112 NLBL_CIPSOV4_A_UNSPEC,
113 NLBL_CIPSOV4_A_DOI,
114 /* (NLA_U32)
115 * the DOI value */
116 NLBL_CIPSOV4_A_MTYPE,
117 /* (NLA_U32)
118 * the mapping table type (defined in the cipso_ipv4.h header as
119 * CIPSO_V4_MAP_*) */
120 NLBL_CIPSOV4_A_TAG,
121 /* (NLA_U8)
122 * a CIPSO tag type, meant to be used within a NLBL_CIPSOV4_A_TAGLST
123 * attribute */
124 NLBL_CIPSOV4_A_TAGLST,
125 /* (NLA_NESTED)
126 * the CIPSO tag list for the DOI, there must be at least one
127 * NLBL_CIPSOV4_A_TAG attribute, tags listed first are given higher
128 * priorirty when sending packets */
129 NLBL_CIPSOV4_A_MLSLVLLOC,
130 /* (NLA_U32)
131 * the local MLS sensitivity level */
132 NLBL_CIPSOV4_A_MLSLVLREM,
133 /* (NLA_U32)
134 * the remote MLS sensitivity level */
135 NLBL_CIPSOV4_A_MLSLVL,
136 /* (NLA_NESTED)
137 * a MLS sensitivity level mapping, must contain only one attribute of
138 * each of the following types: NLBL_CIPSOV4_A_MLSLVLLOC and
139 * NLBL_CIPSOV4_A_MLSLVLREM */
140 NLBL_CIPSOV4_A_MLSLVLLST,
141 /* (NLA_NESTED)
142 * the CIPSO level mappings, there must be at least one
143 * NLBL_CIPSOV4_A_MLSLVL attribute */
144 NLBL_CIPSOV4_A_MLSCATLOC,
145 /* (NLA_U32)
146 * the local MLS category */
147 NLBL_CIPSOV4_A_MLSCATREM,
148 /* (NLA_U32)
149 * the remote MLS category */
150 NLBL_CIPSOV4_A_MLSCAT,
151 /* (NLA_NESTED)
152 * a MLS category mapping, must contain only one attribute of each of
153 * the following types: NLBL_CIPSOV4_A_MLSCATLOC and
154 * NLBL_CIPSOV4_A_MLSCATREM */
155 NLBL_CIPSOV4_A_MLSCATLST,
156 /* (NLA_NESTED)
157 * the CIPSO category mappings, there must be at least one
158 * NLBL_CIPSOV4_A_MLSCAT attribute */
159 __NLBL_CIPSOV4_A_MAX,
160};
161#define NLBL_CIPSOV4_A_MAX (__NLBL_CIPSOV4_A_MAX - 1)
162
214/* NetLabel protocol functions */ 163/* NetLabel protocol functions */
215int netlbl_cipsov4_genl_init(void); 164int netlbl_cipsov4_genl_init(void);
216 165
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 0489a1378101..f56d7a8ac7b7 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -354,160 +354,51 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
354} 354}
355 355
356/** 356/**
357 * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff 357 * netlbl_domhsh_walk - Iterate through the domain mapping hash table
358 * @skip_bkt: the number of buckets to skip at the start
359 * @skip_chain: the number of entries to skip in the first iterated bucket
360 * @callback: callback for each entry
361 * @cb_arg: argument for the callback function
358 * 362 *
359 * Description: 363 * Description:
360 * Dump the domain hash table into a buffer suitable for returning to an 364 * Interate over the domain mapping hash table, skipping the first @skip_bkt
361 * application in response to a NetLabel management DOMAIN message. This 365 * buckets and @skip_chain entries. For each entry in the table call
362 * function may fail if another process is growing the hash table at the same 366 * @callback, if @callback returns a negative value stop 'walking' through the
363 * time. The returned sk_buff has room at the front of the sk_buff for 367 * table and return. Updates the values in @skip_bkt and @skip_chain on
364 * @headroom bytes. See netlabel.h for the DOMAIN message format. Returns a 368 * return. Returns zero on succcess, negative values on failure.
365 * pointer to a sk_buff on success, NULL on error.
366 * 369 *
367 */ 370 */
368struct sk_buff *netlbl_domhsh_dump(size_t headroom) 371int netlbl_domhsh_walk(u32 *skip_bkt,
372 u32 *skip_chain,
373 int (*callback) (struct netlbl_dom_map *entry, void *arg),
374 void *cb_arg)
369{ 375{
370 struct sk_buff *skb = NULL; 376 int ret_val = -ENOENT;
371 ssize_t buf_len; 377 u32 iter_bkt;
372 u32 bkt_iter; 378 struct netlbl_dom_map *iter_entry;
373 u32 dom_cnt = 0; 379 u32 chain_cnt = 0;
374 struct netlbl_domhsh_tbl *hsh_tbl;
375 struct netlbl_dom_map *list_iter;
376 ssize_t tmp_len;
377 380
378 buf_len = NETLBL_LEN_U32;
379 rcu_read_lock(); 381 rcu_read_lock();
380 hsh_tbl = rcu_dereference(netlbl_domhsh); 382 for (iter_bkt = *skip_bkt;
381 for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) 383 iter_bkt < rcu_dereference(netlbl_domhsh)->size;
382 list_for_each_entry_rcu(list_iter, 384 iter_bkt++, chain_cnt = 0) {
383 &hsh_tbl->tbl[bkt_iter], list) { 385 list_for_each_entry_rcu(iter_entry,
384 buf_len += NETLBL_LEN_U32 + 386 &netlbl_domhsh->tbl[iter_bkt],
385 nla_total_size(strlen(list_iter->domain) + 1); 387 list)
386 switch (list_iter->type) { 388 if (iter_entry->valid) {
387 case NETLBL_NLTYPE_UNLABELED: 389 if (chain_cnt++ < *skip_chain)
388 break; 390 continue;
389 case NETLBL_NLTYPE_CIPSOV4: 391 ret_val = callback(iter_entry, cb_arg);
390 buf_len += 2 * NETLBL_LEN_U32; 392 if (ret_val < 0) {
391 break; 393 chain_cnt--;
392 } 394 goto walk_return;
393 dom_cnt++; 395 }
394 }
395
396 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
397 if (skb == NULL)
398 goto dump_failure;
399
400 if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0)
401 goto dump_failure;
402 buf_len -= NETLBL_LEN_U32;
403 hsh_tbl = rcu_dereference(netlbl_domhsh);
404 for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
405 list_for_each_entry_rcu(list_iter,
406 &hsh_tbl->tbl[bkt_iter], list) {
407 tmp_len = nla_total_size(strlen(list_iter->domain) +
408 1);
409 if (buf_len < NETLBL_LEN_U32 + tmp_len)
410 goto dump_failure;
411 if (nla_put_string(skb,
412 NLA_STRING,
413 list_iter->domain) != 0)
414 goto dump_failure;
415 if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0)
416 goto dump_failure;
417 buf_len -= NETLBL_LEN_U32 + tmp_len;
418 switch (list_iter->type) {
419 case NETLBL_NLTYPE_UNLABELED:
420 break;
421 case NETLBL_NLTYPE_CIPSOV4:
422 if (buf_len < 2 * NETLBL_LEN_U32)
423 goto dump_failure;
424 if (nla_put_u32(skb,
425 NLA_U32,
426 list_iter->type_def.cipsov4->type) != 0)
427 goto dump_failure;
428 if (nla_put_u32(skb,
429 NLA_U32,
430 list_iter->type_def.cipsov4->doi) != 0)
431 goto dump_failure;
432 buf_len -= 2 * NETLBL_LEN_U32;
433 break;
434 } 396 }
435 } 397 }
436 rcu_read_unlock();
437
438 return skb;
439
440dump_failure:
441 rcu_read_unlock();
442 kfree_skb(skb);
443 return NULL;
444}
445
446/**
447 * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff
448 *
449 * Description:
450 * Dump the default domain mapping into a buffer suitable for returning to an
451 * application in response to a NetLabel management DEFDOMAIN message. This
452 * function may fail if another process is changing the default domain mapping
453 * at the same time. The returned sk_buff has room at the front of the
454 * skb_buff for @headroom bytes. See netlabel.h for the DEFDOMAIN message
455 * format. Returns a pointer to a sk_buff on success, NULL on error.
456 *
457 */
458struct sk_buff *netlbl_domhsh_dump_default(size_t headroom)
459{
460 struct sk_buff *skb;
461 ssize_t buf_len;
462 struct netlbl_dom_map *entry;
463
464 buf_len = NETLBL_LEN_U32;
465 rcu_read_lock();
466 entry = rcu_dereference(netlbl_domhsh_def);
467 if (entry != NULL)
468 switch (entry->type) {
469 case NETLBL_NLTYPE_UNLABELED:
470 break;
471 case NETLBL_NLTYPE_CIPSOV4:
472 buf_len += 2 * NETLBL_LEN_U32;
473 break;
474 }
475
476 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
477 if (skb == NULL)
478 goto dump_default_failure;
479
480 if (entry != rcu_dereference(netlbl_domhsh_def))
481 goto dump_default_failure;
482 if (entry != NULL) {
483 if (nla_put_u32(skb, NLA_U32, entry->type) != 0)
484 goto dump_default_failure;
485 buf_len -= NETLBL_LEN_U32;
486 switch (entry->type) {
487 case NETLBL_NLTYPE_UNLABELED:
488 break;
489 case NETLBL_NLTYPE_CIPSOV4:
490 if (buf_len < 2 * NETLBL_LEN_U32)
491 goto dump_default_failure;
492 if (nla_put_u32(skb,
493 NLA_U32,
494 entry->type_def.cipsov4->type) != 0)
495 goto dump_default_failure;
496 if (nla_put_u32(skb,
497 NLA_U32,
498 entry->type_def.cipsov4->doi) != 0)
499 goto dump_default_failure;
500 buf_len -= 2 * NETLBL_LEN_U32;
501 break;
502 }
503 } else
504 nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE);
505 rcu_read_unlock();
506
507 return skb;
508 398
509dump_default_failure: 399walk_return:
510 rcu_read_unlock(); 400 rcu_read_unlock();
511 kfree_skb(skb); 401 *skip_bkt = iter_bkt;
512 return NULL; 402 *skip_chain = chain_cnt;
403 return ret_val;
513} 404}
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 99a2287de246..02af72a7877c 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -61,7 +61,9 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry);
61int netlbl_domhsh_add_default(struct netlbl_dom_map *entry); 61int netlbl_domhsh_add_default(struct netlbl_dom_map *entry);
62int netlbl_domhsh_remove_default(void); 62int netlbl_domhsh_remove_default(void);
63struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); 63struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
64struct sk_buff *netlbl_domhsh_dump(size_t headroom); 64int netlbl_domhsh_walk(u32 *skip_bkt,
65struct sk_buff *netlbl_domhsh_dump_default(size_t headroom); 65 u32 *skip_chain,
66 int (*callback) (struct netlbl_dom_map *entry, void *arg),
67 void *cb_arg);
66 68
67#endif 69#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 0fd8aaafe23f..54fb7de3c2b1 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -85,6 +85,29 @@ socket_setattr_return:
85} 85}
86 86
87/** 87/**
88 * netlbl_sock_getattr - Determine the security attributes of a sock
89 * @sk: the sock
90 * @secattr: the security attributes
91 *
92 * Description:
93 * Examines the given sock to see any NetLabel style labeling has been
94 * applied to the sock, if so it parses the socket label and returns the
95 * security attributes in @secattr. Returns zero on success, negative values
96 * on failure.
97 *
98 */
99int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
100{
101 int ret_val;
102
103 ret_val = cipso_v4_sock_getattr(sk, secattr);
104 if (ret_val == 0)
105 return 0;
106
107 return netlbl_unlabel_getattr(secattr);
108}
109
110/**
88 * netlbl_socket_getattr - Determine the security attributes of a socket 111 * netlbl_socket_getattr - Determine the security attributes of a socket
89 * @sock: the socket 112 * @sock: the socket
90 * @secattr: the security attributes 113 * @secattr: the security attributes
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 85bc11a1fc46..8626c9f678eb 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -42,15 +42,29 @@
42#include "netlabel_user.h" 42#include "netlabel_user.h"
43#include "netlabel_mgmt.h" 43#include "netlabel_mgmt.h"
44 44
45/* Argument struct for netlbl_domhsh_walk() */
46struct netlbl_domhsh_walk_arg {
47 struct netlink_callback *nl_cb;
48 struct sk_buff *skb;
49 u32 seq;
50};
51
45/* NetLabel Generic NETLINK CIPSOv4 family */ 52/* NetLabel Generic NETLINK CIPSOv4 family */
46static struct genl_family netlbl_mgmt_gnl_family = { 53static struct genl_family netlbl_mgmt_gnl_family = {
47 .id = GENL_ID_GENERATE, 54 .id = GENL_ID_GENERATE,
48 .hdrsize = 0, 55 .hdrsize = 0,
49 .name = NETLBL_NLTYPE_MGMT_NAME, 56 .name = NETLBL_NLTYPE_MGMT_NAME,
50 .version = NETLBL_PROTO_VERSION, 57 .version = NETLBL_PROTO_VERSION,
51 .maxattr = 0, 58 .maxattr = NLBL_MGMT_A_MAX,
52}; 59};
53 60
61/* NetLabel Netlink attribute policy */
62static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
63 [NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING },
64 [NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 },
65 [NLBL_MGMT_A_VERSION] = { .type = NLA_U32 },
66 [NLBL_MGMT_A_CV4DOI] = { .type = NLA_U32 },
67};
54 68
55/* 69/*
56 * NetLabel Command Handlers 70 * NetLabel Command Handlers
@@ -70,97 +84,62 @@ static struct genl_family netlbl_mgmt_gnl_family = {
70static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) 84static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
71{ 85{
72 int ret_val = -EINVAL; 86 int ret_val = -EINVAL;
73 struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
74 int msg_len = netlbl_netlink_payload_len(skb);
75 u32 count;
76 struct netlbl_dom_map *entry = NULL; 87 struct netlbl_dom_map *entry = NULL;
77 u32 iter; 88 size_t tmp_size;
78 u32 tmp_val; 89 u32 tmp_val;
79 int tmp_size;
80 90
81 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); 91 if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
82 if (ret_val != 0) 92 !info->attrs[NLBL_MGMT_A_PROTOCOL])
83 goto add_failure; 93 goto add_failure;
84 94
85 if (msg_len < NETLBL_LEN_U32) 95 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
96 if (entry == NULL) {
97 ret_val = -ENOMEM;
98 goto add_failure;
99 }
100 tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
101 entry->domain = kmalloc(tmp_size, GFP_KERNEL);
102 if (entry->domain == NULL) {
103 ret_val = -ENOMEM;
86 goto add_failure; 104 goto add_failure;
87 count = netlbl_getinc_u32(&msg_ptr, &msg_len); 105 }
106 entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
107 nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
88 108
89 for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) { 109 switch (entry->type) {
90 if (msg_len <= 0) { 110 case NETLBL_NLTYPE_UNLABELED:
91 ret_val = -EINVAL; 111 ret_val = netlbl_domhsh_add(entry);
92 goto add_failure; 112 break;
93 } 113 case NETLBL_NLTYPE_CIPSOV4:
94 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 114 if (!info->attrs[NLBL_MGMT_A_CV4DOI])
95 if (entry == NULL) {
96 ret_val = -ENOMEM;
97 goto add_failure;
98 }
99 tmp_size = nla_len(msg_ptr);
100 if (tmp_size <= 0 || tmp_size > msg_len) {
101 ret_val = -EINVAL;
102 goto add_failure;
103 }
104 entry->domain = kmalloc(tmp_size, GFP_KERNEL);
105 if (entry->domain == NULL) {
106 ret_val = -ENOMEM;
107 goto add_failure; 115 goto add_failure;
108 }
109 nla_strlcpy(entry->domain, msg_ptr, tmp_size);
110 entry->domain[tmp_size - 1] = '\0';
111 msg_ptr = nla_next(msg_ptr, &msg_len);
112 116
113 if (msg_len < NETLBL_LEN_U32) { 117 tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
114 ret_val = -EINVAL; 118 /* We should be holding a rcu_read_lock() here while we hold
115 goto add_failure; 119 * the result but since the entry will always be deleted when
116 } 120 * the CIPSO DOI is deleted we aren't going to keep the
117 tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); 121 * lock. */
118 entry->type = tmp_val; 122 rcu_read_lock();
119 switch (tmp_val) { 123 entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
120 case NETLBL_NLTYPE_UNLABELED: 124 if (entry->type_def.cipsov4 == NULL) {
121 ret_val = netlbl_domhsh_add(entry);
122 break;
123 case NETLBL_NLTYPE_CIPSOV4:
124 if (msg_len < NETLBL_LEN_U32) {
125 ret_val = -EINVAL;
126 goto add_failure;
127 }
128 tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
129 /* We should be holding a rcu_read_lock() here
130 * while we hold the result but since the entry
131 * will always be deleted when the CIPSO DOI
132 * is deleted we aren't going to keep the lock. */
133 rcu_read_lock();
134 entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
135 if (entry->type_def.cipsov4 == NULL) {
136 rcu_read_unlock();
137 ret_val = -EINVAL;
138 goto add_failure;
139 }
140 ret_val = netlbl_domhsh_add(entry);
141 rcu_read_unlock(); 125 rcu_read_unlock();
142 break;
143 default:
144 ret_val = -EINVAL;
145 }
146 if (ret_val != 0)
147 goto add_failure; 126 goto add_failure;
127 }
128 ret_val = netlbl_domhsh_add(entry);
129 rcu_read_unlock();
130 break;
131 default:
132 goto add_failure;
148 } 133 }
134 if (ret_val != 0)
135 goto add_failure;
149 136
150 netlbl_netlink_send_ack(info,
151 netlbl_mgmt_gnl_family.id,
152 NLBL_MGMT_C_ACK,
153 NETLBL_E_OK);
154 return 0; 137 return 0;
155 138
156add_failure: 139add_failure:
157 if (entry) 140 if (entry)
158 kfree(entry->domain); 141 kfree(entry->domain);
159 kfree(entry); 142 kfree(entry);
160 netlbl_netlink_send_ack(info,
161 netlbl_mgmt_gnl_family.id,
162 NLBL_MGMT_C_ACK,
163 -ret_val);
164 return ret_val; 143 return ret_val;
165} 144}
166 145
@@ -176,87 +155,98 @@ add_failure:
176 */ 155 */
177static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) 156static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
178{ 157{
179 int ret_val = -EINVAL; 158 char *domain;
180 struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
181 int msg_len = netlbl_netlink_payload_len(skb);
182 u32 count;
183 u32 iter;
184 int tmp_size;
185 unsigned char *domain;
186
187 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
188 if (ret_val != 0)
189 goto remove_return;
190 159
191 if (msg_len < NETLBL_LEN_U32) 160 if (!info->attrs[NLBL_MGMT_A_DOMAIN])
192 goto remove_return; 161 return -EINVAL;
193 count = netlbl_getinc_u32(&msg_ptr, &msg_len);
194 162
195 for (iter = 0; iter < count && msg_len > 0; iter++) { 163 domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
196 if (msg_len <= 0) { 164 return netlbl_domhsh_remove(domain);
197 ret_val = -EINVAL; 165}
198 goto remove_return; 166
199 } 167/**
200 tmp_size = nla_len(msg_ptr); 168 * netlbl_mgmt_listall_cb - netlbl_domhsh_walk() callback for LISTALL
201 domain = nla_data(msg_ptr); 169 * @entry: the domain mapping hash table entry
202 if (tmp_size <= 0 || tmp_size > msg_len || 170 * @arg: the netlbl_domhsh_walk_arg structure
203 domain[tmp_size - 1] != '\0') { 171 *
204 ret_val = -EINVAL; 172 * Description:
205 goto remove_return; 173 * This function is designed to be used as a callback to the
206 } 174 * netlbl_domhsh_walk() function for use in generating a response for a LISTALL
207 ret_val = netlbl_domhsh_remove(domain); 175 * message. Returns the size of the message on success, negative values on
176 * failure.
177 *
178 */
179static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
180{
181 int ret_val = -ENOMEM;
182 struct netlbl_domhsh_walk_arg *cb_arg = arg;
183 void *data;
184
185 data = netlbl_netlink_hdr_put(cb_arg->skb,
186 NETLINK_CB(cb_arg->nl_cb->skb).pid,
187 cb_arg->seq,
188 netlbl_mgmt_gnl_family.id,
189 NLM_F_MULTI,
190 NLBL_MGMT_C_LISTALL);
191 if (data == NULL)
192 goto listall_cb_failure;
193
194 ret_val = nla_put_string(cb_arg->skb,
195 NLBL_MGMT_A_DOMAIN,
196 entry->domain);
197 if (ret_val != 0)
198 goto listall_cb_failure;
199 ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type);
200 if (ret_val != 0)
201 goto listall_cb_failure;
202 switch (entry->type) {
203 case NETLBL_NLTYPE_CIPSOV4:
204 ret_val = nla_put_u32(cb_arg->skb,
205 NLBL_MGMT_A_CV4DOI,
206 entry->type_def.cipsov4->doi);
208 if (ret_val != 0) 207 if (ret_val != 0)
209 goto remove_return; 208 goto listall_cb_failure;
210 msg_ptr = nla_next(msg_ptr, &msg_len); 209 break;
211 } 210 }
212 211
213 ret_val = 0; 212 cb_arg->seq++;
213 return genlmsg_end(cb_arg->skb, data);
214 214
215remove_return: 215listall_cb_failure:
216 netlbl_netlink_send_ack(info, 216 genlmsg_cancel(cb_arg->skb, data);
217 netlbl_mgmt_gnl_family.id,
218 NLBL_MGMT_C_ACK,
219 -ret_val);
220 return ret_val; 217 return ret_val;
221} 218}
222 219
223/** 220/**
224 * netlbl_mgmt_list - Handle a LIST message 221 * netlbl_mgmt_listall - Handle a LISTALL message
225 * @skb: the NETLINK buffer 222 * @skb: the NETLINK buffer
226 * @info: the Generic NETLINK info block 223 * @cb: the NETLINK callback
227 * 224 *
228 * Description: 225 * Description:
229 * Process a user generated LIST message and dumps the domain hash table in a 226 * Process a user generated LISTALL message and dumps the domain hash table in
230 * form suitable for use in a kernel generated LIST message. Returns zero on 227 * a form suitable for use in a kernel generated LISTALL message. Returns zero
231 * success, negative values on failure. 228 * on success, negative values on failure.
232 * 229 *
233 */ 230 */
234static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info) 231static int netlbl_mgmt_listall(struct sk_buff *skb,
232 struct netlink_callback *cb)
235{ 233{
236 int ret_val = -ENOMEM; 234 struct netlbl_domhsh_walk_arg cb_arg;
237 struct sk_buff *ans_skb; 235 u32 skip_bkt = cb->args[0];
238 236 u32 skip_chain = cb->args[1];
239 ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN)); 237
240 if (ans_skb == NULL) 238 cb_arg.nl_cb = cb;
241 goto list_failure; 239 cb_arg.skb = skb;
242 netlbl_netlink_hdr_push(ans_skb, 240 cb_arg.seq = cb->nlh->nlmsg_seq;
243 info->snd_pid, 241
244 0, 242 netlbl_domhsh_walk(&skip_bkt,
245 netlbl_mgmt_gnl_family.id, 243 &skip_chain,
246 NLBL_MGMT_C_LIST); 244 netlbl_mgmt_listall_cb,
247 245 &cb_arg);
248 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); 246
249 if (ret_val != 0) 247 cb->args[0] = skip_bkt;
250 goto list_failure; 248 cb->args[1] = skip_chain;
251 249 return skb->len;
252 return 0;
253
254list_failure:
255 netlbl_netlink_send_ack(info,
256 netlbl_mgmt_gnl_family.id,
257 NLBL_MGMT_C_ACK,
258 -ret_val);
259 return ret_val;
260} 250}
261 251
262/** 252/**
@@ -272,68 +262,51 @@ list_failure:
272static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) 262static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
273{ 263{
274 int ret_val = -EINVAL; 264 int ret_val = -EINVAL;
275 struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
276 int msg_len = netlbl_netlink_payload_len(skb);
277 struct netlbl_dom_map *entry = NULL; 265 struct netlbl_dom_map *entry = NULL;
278 u32 tmp_val; 266 u32 tmp_val;
279 267
280 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); 268 if (!info->attrs[NLBL_MGMT_A_PROTOCOL])
281 if (ret_val != 0)
282 goto adddef_failure;
283
284 if (msg_len < NETLBL_LEN_U32)
285 goto adddef_failure; 269 goto adddef_failure;
286 tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
287 270
288 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 271 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
289 if (entry == NULL) { 272 if (entry == NULL) {
290 ret_val = -ENOMEM; 273 ret_val = -ENOMEM;
291 goto adddef_failure; 274 goto adddef_failure;
292 } 275 }
276 entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
293 277
294 entry->type = tmp_val;
295 switch (entry->type) { 278 switch (entry->type) {
296 case NETLBL_NLTYPE_UNLABELED: 279 case NETLBL_NLTYPE_UNLABELED:
297 ret_val = netlbl_domhsh_add_default(entry); 280 ret_val = netlbl_domhsh_add_default(entry);
298 break; 281 break;
299 case NETLBL_NLTYPE_CIPSOV4: 282 case NETLBL_NLTYPE_CIPSOV4:
300 if (msg_len < NETLBL_LEN_U32) { 283 if (!info->attrs[NLBL_MGMT_A_CV4DOI])
301 ret_val = -EINVAL;
302 goto adddef_failure; 284 goto adddef_failure;
303 } 285
304 tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); 286 tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
305 /* We should be holding a rcu_read_lock here while we 287 /* We should be holding a rcu_read_lock() here while we hold
306 * hold the result but since the entry will always be 288 * the result but since the entry will always be deleted when
307 * deleted when the CIPSO DOI is deleted we are going 289 * the CIPSO DOI is deleted we aren't going to keep the
308 * to skip the lock. */ 290 * lock. */
309 rcu_read_lock(); 291 rcu_read_lock();
310 entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); 292 entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
311 if (entry->type_def.cipsov4 == NULL) { 293 if (entry->type_def.cipsov4 == NULL) {
312 rcu_read_unlock(); 294 rcu_read_unlock();
313 ret_val = -EINVAL;
314 goto adddef_failure; 295 goto adddef_failure;
315 } 296 }
316 ret_val = netlbl_domhsh_add_default(entry); 297 ret_val = netlbl_domhsh_add_default(entry);
317 rcu_read_unlock(); 298 rcu_read_unlock();
318 break; 299 break;
319 default: 300 default:
320 ret_val = -EINVAL; 301 goto adddef_failure;
321 } 302 }
322 if (ret_val != 0) 303 if (ret_val != 0)
323 goto adddef_failure; 304 goto adddef_failure;
324 305
325 netlbl_netlink_send_ack(info,
326 netlbl_mgmt_gnl_family.id,
327 NLBL_MGMT_C_ACK,
328 NETLBL_E_OK);
329 return 0; 306 return 0;
330 307
331adddef_failure: 308adddef_failure:
332 kfree(entry); 309 kfree(entry);
333 netlbl_netlink_send_ack(info,
334 netlbl_mgmt_gnl_family.id,
335 NLBL_MGMT_C_ACK,
336 -ret_val);
337 return ret_val; 310 return ret_val;
338} 311}
339 312
@@ -349,20 +322,7 @@ adddef_failure:
349 */ 322 */
350static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) 323static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
351{ 324{
352 int ret_val; 325 return netlbl_domhsh_remove_default();
353
354 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
355 if (ret_val != 0)
356 goto removedef_return;
357
358 ret_val = netlbl_domhsh_remove_default();
359
360removedef_return:
361 netlbl_netlink_send_ack(info,
362 netlbl_mgmt_gnl_family.id,
363 NLBL_MGMT_C_ACK,
364 -ret_val);
365 return ret_val;
366} 326}
367 327
368/** 328/**
@@ -379,88 +339,131 @@ removedef_return:
379static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) 339static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
380{ 340{
381 int ret_val = -ENOMEM; 341 int ret_val = -ENOMEM;
382 struct sk_buff *ans_skb; 342 struct sk_buff *ans_skb = NULL;
343 void *data;
344 struct netlbl_dom_map *entry;
383 345
384 ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN)); 346 ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
385 if (ans_skb == NULL) 347 if (ans_skb == NULL)
348 return -ENOMEM;
349 data = netlbl_netlink_hdr_put(ans_skb,
350 info->snd_pid,
351 info->snd_seq,
352 netlbl_mgmt_gnl_family.id,
353 0,
354 NLBL_MGMT_C_LISTDEF);
355 if (data == NULL)
386 goto listdef_failure; 356 goto listdef_failure;
387 netlbl_netlink_hdr_push(ans_skb,
388 info->snd_pid,
389 0,
390 netlbl_mgmt_gnl_family.id,
391 NLBL_MGMT_C_LISTDEF);
392 357
393 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); 358 rcu_read_lock();
359 entry = netlbl_domhsh_getentry(NULL);
360 if (entry == NULL) {
361 ret_val = -ENOENT;
362 goto listdef_failure_lock;
363 }
364 ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type);
394 if (ret_val != 0) 365 if (ret_val != 0)
395 goto listdef_failure; 366 goto listdef_failure_lock;
367 switch (entry->type) {
368 case NETLBL_NLTYPE_CIPSOV4:
369 ret_val = nla_put_u32(ans_skb,
370 NLBL_MGMT_A_CV4DOI,
371 entry->type_def.cipsov4->doi);
372 if (ret_val != 0)
373 goto listdef_failure_lock;
374 break;
375 }
376 rcu_read_unlock();
396 377
378 genlmsg_end(ans_skb, data);
379
380 ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
381 if (ret_val != 0)
382 goto listdef_failure;
397 return 0; 383 return 0;
398 384
385listdef_failure_lock:
386 rcu_read_unlock();
399listdef_failure: 387listdef_failure:
400 netlbl_netlink_send_ack(info, 388 kfree_skb(ans_skb);
401 netlbl_mgmt_gnl_family.id,
402 NLBL_MGMT_C_ACK,
403 -ret_val);
404 return ret_val; 389 return ret_val;
405} 390}
406 391
407/** 392/**
408 * netlbl_mgmt_modules - Handle a MODULES message 393 * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response
409 * @skb: the NETLINK buffer 394 * @skb: the skb to write to
410 * @info: the Generic NETLINK info block 395 * @seq: the NETLINK sequence number
396 * @cb: the NETLINK callback
397 * @protocol: the NetLabel protocol to use in the message
411 * 398 *
412 * Description: 399 * Description:
413 * Process a user generated MODULES message and respond accordingly. 400 * This function is to be used in conjunction with netlbl_mgmt_protocols() to
401 * answer a application's PROTOCOLS message. Returns the size of the message
402 * on success, negative values on failure.
414 * 403 *
415 */ 404 */
416static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info) 405static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
406 struct netlink_callback *cb,
407 u32 protocol)
417{ 408{
418 int ret_val = -ENOMEM; 409 int ret_val = -ENOMEM;
419 size_t data_size; 410 void *data;
420 u32 mod_count; 411
421 struct sk_buff *ans_skb = NULL; 412 data = netlbl_netlink_hdr_put(skb,
422 413 NETLINK_CB(cb->skb).pid,
423 /* unlabeled + cipsov4 */ 414 cb->nlh->nlmsg_seq,
424 mod_count = 2; 415 netlbl_mgmt_gnl_family.id,
425 416 NLM_F_MULTI,
426 data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32; 417 NLBL_MGMT_C_PROTOCOLS);
427 ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); 418 if (data == NULL)
428 if (ans_skb == NULL) 419 goto protocols_cb_failure;
429 goto modules_failure; 420
430 421 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, protocol);
431 if (netlbl_netlink_hdr_put(ans_skb,
432 info->snd_pid,
433 0,
434 netlbl_mgmt_gnl_family.id,
435 NLBL_MGMT_C_MODULES) == NULL)
436 goto modules_failure;
437
438 ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count);
439 if (ret_val != 0)
440 goto modules_failure;
441 ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED);
442 if (ret_val != 0) 422 if (ret_val != 0)
443 goto modules_failure; 423 goto protocols_cb_failure;
444 ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4);
445 if (ret_val != 0)
446 goto modules_failure;
447
448 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
449 if (ret_val != 0)
450 goto modules_failure;
451 424
452 return 0; 425 return genlmsg_end(skb, data);
453 426
454modules_failure: 427protocols_cb_failure:
455 kfree_skb(ans_skb); 428 genlmsg_cancel(skb, data);
456 netlbl_netlink_send_ack(info,
457 netlbl_mgmt_gnl_family.id,
458 NLBL_MGMT_C_ACK,
459 -ret_val);
460 return ret_val; 429 return ret_val;
461} 430}
462 431
463/** 432/**
433 * netlbl_mgmt_protocols - Handle a PROTOCOLS message
434 * @skb: the NETLINK buffer
435 * @cb: the NETLINK callback
436 *
437 * Description:
438 * Process a user generated PROTOCOLS message and respond accordingly.
439 *
440 */
441static int netlbl_mgmt_protocols(struct sk_buff *skb,
442 struct netlink_callback *cb)
443{
444 u32 protos_sent = cb->args[0];
445
446 if (protos_sent == 0) {
447 if (netlbl_mgmt_protocols_cb(skb,
448 cb,
449 NETLBL_NLTYPE_UNLABELED) < 0)
450 goto protocols_return;
451 protos_sent++;
452 }
453 if (protos_sent == 1) {
454 if (netlbl_mgmt_protocols_cb(skb,
455 cb,
456 NETLBL_NLTYPE_CIPSOV4) < 0)
457 goto protocols_return;
458 protos_sent++;
459 }
460
461protocols_return:
462 cb->args[0] = protos_sent;
463 return skb->len;
464}
465
466/**
464 * netlbl_mgmt_version - Handle a VERSION message 467 * netlbl_mgmt_version - Handle a VERSION message
465 * @skb: the NETLINK buffer 468 * @skb: the NETLINK buffer
466 * @info: the Generic NETLINK info block 469 * @info: the Generic NETLINK info block
@@ -474,35 +477,35 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
474{ 477{
475 int ret_val = -ENOMEM; 478 int ret_val = -ENOMEM;
476 struct sk_buff *ans_skb = NULL; 479 struct sk_buff *ans_skb = NULL;
480 void *data;
477 481
478 ans_skb = netlbl_netlink_alloc_skb(0, 482 ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
479 GENL_HDRLEN + NETLBL_LEN_U32,
480 GFP_KERNEL);
481 if (ans_skb == NULL) 483 if (ans_skb == NULL)
482 goto version_failure; 484 return -ENOMEM;
483 if (netlbl_netlink_hdr_put(ans_skb, 485 data = netlbl_netlink_hdr_put(ans_skb,
484 info->snd_pid, 486 info->snd_pid,
485 0, 487 info->snd_seq,
486 netlbl_mgmt_gnl_family.id, 488 netlbl_mgmt_gnl_family.id,
487 NLBL_MGMT_C_VERSION) == NULL) 489 0,
490 NLBL_MGMT_C_VERSION);
491 if (data == NULL)
488 goto version_failure; 492 goto version_failure;
489 493
490 ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION); 494 ret_val = nla_put_u32(ans_skb,
495 NLBL_MGMT_A_VERSION,
496 NETLBL_PROTO_VERSION);
491 if (ret_val != 0) 497 if (ret_val != 0)
492 goto version_failure; 498 goto version_failure;
493 499
494 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); 500 genlmsg_end(ans_skb, data);
501
502 ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
495 if (ret_val != 0) 503 if (ret_val != 0)
496 goto version_failure; 504 goto version_failure;
497
498 return 0; 505 return 0;
499 506
500version_failure: 507version_failure:
501 kfree_skb(ans_skb); 508 kfree_skb(ans_skb);
502 netlbl_netlink_send_ack(info,
503 netlbl_mgmt_gnl_family.id,
504 NLBL_MGMT_C_ACK,
505 -ret_val);
506 return ret_val; 509 return ret_val;
507} 510}
508 511
@@ -513,35 +516,40 @@ version_failure:
513 516
514static struct genl_ops netlbl_mgmt_genl_c_add = { 517static struct genl_ops netlbl_mgmt_genl_c_add = {
515 .cmd = NLBL_MGMT_C_ADD, 518 .cmd = NLBL_MGMT_C_ADD,
516 .flags = 0, 519 .flags = GENL_ADMIN_PERM,
520 .policy = netlbl_mgmt_genl_policy,
517 .doit = netlbl_mgmt_add, 521 .doit = netlbl_mgmt_add,
518 .dumpit = NULL, 522 .dumpit = NULL,
519}; 523};
520 524
521static struct genl_ops netlbl_mgmt_genl_c_remove = { 525static struct genl_ops netlbl_mgmt_genl_c_remove = {
522 .cmd = NLBL_MGMT_C_REMOVE, 526 .cmd = NLBL_MGMT_C_REMOVE,
523 .flags = 0, 527 .flags = GENL_ADMIN_PERM,
528 .policy = netlbl_mgmt_genl_policy,
524 .doit = netlbl_mgmt_remove, 529 .doit = netlbl_mgmt_remove,
525 .dumpit = NULL, 530 .dumpit = NULL,
526}; 531};
527 532
528static struct genl_ops netlbl_mgmt_genl_c_list = { 533static struct genl_ops netlbl_mgmt_genl_c_listall = {
529 .cmd = NLBL_MGMT_C_LIST, 534 .cmd = NLBL_MGMT_C_LISTALL,
530 .flags = 0, 535 .flags = 0,
531 .doit = netlbl_mgmt_list, 536 .policy = netlbl_mgmt_genl_policy,
532 .dumpit = NULL, 537 .doit = NULL,
538 .dumpit = netlbl_mgmt_listall,
533}; 539};
534 540
535static struct genl_ops netlbl_mgmt_genl_c_adddef = { 541static struct genl_ops netlbl_mgmt_genl_c_adddef = {
536 .cmd = NLBL_MGMT_C_ADDDEF, 542 .cmd = NLBL_MGMT_C_ADDDEF,
537 .flags = 0, 543 .flags = GENL_ADMIN_PERM,
544 .policy = netlbl_mgmt_genl_policy,
538 .doit = netlbl_mgmt_adddef, 545 .doit = netlbl_mgmt_adddef,
539 .dumpit = NULL, 546 .dumpit = NULL,
540}; 547};
541 548
542static struct genl_ops netlbl_mgmt_genl_c_removedef = { 549static struct genl_ops netlbl_mgmt_genl_c_removedef = {
543 .cmd = NLBL_MGMT_C_REMOVEDEF, 550 .cmd = NLBL_MGMT_C_REMOVEDEF,
544 .flags = 0, 551 .flags = GENL_ADMIN_PERM,
552 .policy = netlbl_mgmt_genl_policy,
545 .doit = netlbl_mgmt_removedef, 553 .doit = netlbl_mgmt_removedef,
546 .dumpit = NULL, 554 .dumpit = NULL,
547}; 555};
@@ -549,20 +557,23 @@ static struct genl_ops netlbl_mgmt_genl_c_removedef = {
549static struct genl_ops netlbl_mgmt_genl_c_listdef = { 557static struct genl_ops netlbl_mgmt_genl_c_listdef = {
550 .cmd = NLBL_MGMT_C_LISTDEF, 558 .cmd = NLBL_MGMT_C_LISTDEF,
551 .flags = 0, 559 .flags = 0,
560 .policy = netlbl_mgmt_genl_policy,
552 .doit = netlbl_mgmt_listdef, 561 .doit = netlbl_mgmt_listdef,
553 .dumpit = NULL, 562 .dumpit = NULL,
554}; 563};
555 564
556static struct genl_ops netlbl_mgmt_genl_c_modules = { 565static struct genl_ops netlbl_mgmt_genl_c_protocols = {
557 .cmd = NLBL_MGMT_C_MODULES, 566 .cmd = NLBL_MGMT_C_PROTOCOLS,
558 .flags = 0, 567 .flags = 0,
559 .doit = netlbl_mgmt_modules, 568 .policy = netlbl_mgmt_genl_policy,
560 .dumpit = NULL, 569 .doit = NULL,
570 .dumpit = netlbl_mgmt_protocols,
561}; 571};
562 572
563static struct genl_ops netlbl_mgmt_genl_c_version = { 573static struct genl_ops netlbl_mgmt_genl_c_version = {
564 .cmd = NLBL_MGMT_C_VERSION, 574 .cmd = NLBL_MGMT_C_VERSION,
565 .flags = 0, 575 .flags = 0,
576 .policy = netlbl_mgmt_genl_policy,
566 .doit = netlbl_mgmt_version, 577 .doit = netlbl_mgmt_version,
567 .dumpit = NULL, 578 .dumpit = NULL,
568}; 579};
@@ -596,7 +607,7 @@ int netlbl_mgmt_genl_init(void)
596 if (ret_val != 0) 607 if (ret_val != 0)
597 return ret_val; 608 return ret_val;
598 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, 609 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
599 &netlbl_mgmt_genl_c_list); 610 &netlbl_mgmt_genl_c_listall);
600 if (ret_val != 0) 611 if (ret_val != 0)
601 return ret_val; 612 return ret_val;
602 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, 613 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
@@ -612,7 +623,7 @@ int netlbl_mgmt_genl_init(void)
612 if (ret_val != 0) 623 if (ret_val != 0)
613 return ret_val; 624 return ret_val;
614 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, 625 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
615 &netlbl_mgmt_genl_c_modules); 626 &netlbl_mgmt_genl_c_protocols);
616 if (ret_val != 0) 627 if (ret_val != 0)
617 return ret_val; 628 return ret_val;
618 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, 629 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index fd6c6acbfa08..3642d3bfc8eb 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -34,212 +34,137 @@
34#include <net/netlabel.h> 34#include <net/netlabel.h>
35 35
36/* 36/*
37 * The following NetLabel payloads are supported by the management interface, 37 * The following NetLabel payloads are supported by the management interface.
38 * all of which are preceeded by the nlmsghdr struct.
39 *
40 * o ACK:
41 * Sent by the kernel in response to an applications message, applications
42 * should never send this message.
43 *
44 * +----------------------+-----------------------+
45 * | seq number (32 bits) | return code (32 bits) |
46 * +----------------------+-----------------------+
47 *
48 * seq number: the sequence number of the original message, taken from the
49 * nlmsghdr structure
50 * return code: return value, based on errno values
51 * 38 *
52 * o ADD: 39 * o ADD:
53 * Sent by an application to add a domain mapping to the NetLabel system. 40 * Sent by an application to add a domain mapping to the NetLabel system.
54 * The kernel should respond with an ACK.
55 *
56 * +-------------------+
57 * | domains (32 bits) | ...
58 * +-------------------+
59 *
60 * domains: the number of domains in the message
61 *
62 * +--------------------------+-------------------------+
63 * | domain string (variable) | protocol type (32 bits) | ...
64 * +--------------------------+-------------------------+
65 * 41 *
66 * +-------------- ---- --- -- - 42 * Required attributes:
67 * | mapping data ... repeated
68 * +-------------- ---- --- -- -
69 * 43 *
70 * domain string: the domain string, NULL terminated 44 * NLBL_MGMT_A_DOMAIN
71 * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) 45 * NLBL_MGMT_A_PROTOCOL
72 * mapping data: specific to the map type (see below)
73 * 46 *
74 * NETLBL_NLTYPE_UNLABELED 47 * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
75 * 48 *
76 * No mapping data for this protocol type. 49 * NLBL_MGMT_A_CV4DOI
77 * 50 *
78 * NETLBL_NLTYPE_CIPSOV4 51 * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
79 *
80 * +---------------+
81 * | doi (32 bits) |
82 * +---------------+
83 *
84 * doi: the CIPSO DOI value
85 * 52 *
86 * o REMOVE: 53 * o REMOVE:
87 * Sent by an application to remove a domain mapping from the NetLabel 54 * Sent by an application to remove a domain mapping from the NetLabel
88 * system. The kernel should ACK this message. 55 * system.
89 *
90 * +-------------------+
91 * | domains (32 bits) | ...
92 * +-------------------+
93 * 56 *
94 * domains: the number of domains in the message 57 * Required attributes:
95 * 58 *
96 * +--------------------------+ 59 * NLBL_MGMT_A_DOMAIN
97 * | domain string (variable) | ...
98 * +--------------------------+
99 * 60 *
100 * domain string: the domain string, NULL terminated 61 * o LISTALL:
101 *
102 * o LIST:
103 * This message can be sent either from an application or by the kernel in 62 * This message can be sent either from an application or by the kernel in
104 * response to an application generated LIST message. When sent by an 63 * response to an application generated LISTALL message. When sent by an
105 * application there is no payload. The kernel should respond to a LIST 64 * application there is no payload and the NLM_F_DUMP flag should be set.
106 * message either with a LIST message on success or an ACK message on 65 * The kernel should respond with a series of the following messages.
107 * failure.
108 *
109 * +-------------------+
110 * | domains (32 bits) | ...
111 * +-------------------+
112 *
113 * domains: the number of domains in the message
114 * 66 *
115 * +--------------------------+ 67 * Required attributes:
116 * | domain string (variable) | ...
117 * +--------------------------+
118 * 68 *
119 * +-------------------------+-------------- ---- --- -- - 69 * NLBL_MGMT_A_DOMAIN
120 * | protocol type (32 bits) | mapping data ... repeated 70 * NLBL_MGMT_A_PROTOCOL
121 * +-------------------------+-------------- ---- --- -- -
122 * 71 *
123 * domain string: the domain string, NULL terminated 72 * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
124 * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
125 * mapping data: specific to the map type (see below)
126 * 73 *
127 * NETLBL_NLTYPE_UNLABELED 74 * NLBL_MGMT_A_CV4DOI
128 * 75 *
129 * No mapping data for this protocol type. 76 * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
130 *
131 * NETLBL_NLTYPE_CIPSOV4
132 *
133 * +----------------+---------------+
134 * | type (32 bits) | doi (32 bits) |
135 * +----------------+---------------+
136 *
137 * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
138 * as CIPSO_V4_MAP_*)
139 * doi: the CIPSO DOI value
140 * 77 *
141 * o ADDDEF: 78 * o ADDDEF:
142 * Sent by an application to set the default domain mapping for the NetLabel 79 * Sent by an application to set the default domain mapping for the NetLabel
143 * system. The kernel should respond with an ACK. 80 * system.
144 * 81 *
145 * +-------------------------+-------------- ---- --- -- - 82 * Required attributes:
146 * | protocol type (32 bits) | mapping data ... repeated
147 * +-------------------------+-------------- ---- --- -- -
148 * 83 *
149 * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) 84 * NLBL_MGMT_A_PROTOCOL
150 * mapping data: specific to the map type (see below)
151 * 85 *
152 * NETLBL_NLTYPE_UNLABELED 86 * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
153 * 87 *
154 * No mapping data for this protocol type. 88 * NLBL_MGMT_A_CV4DOI
155 * 89 *
156 * NETLBL_NLTYPE_CIPSOV4 90 * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
157 *
158 * +---------------+
159 * | doi (32 bits) |
160 * +---------------+
161 *
162 * doi: the CIPSO DOI value
163 * 91 *
164 * o REMOVEDEF: 92 * o REMOVEDEF:
165 * Sent by an application to remove the default domain mapping from the 93 * Sent by an application to remove the default domain mapping from the
166 * NetLabel system, there is no payload. The kernel should ACK this message. 94 * NetLabel system, there is no payload.
167 * 95 *
168 * o LISTDEF: 96 * o LISTDEF:
169 * This message can be sent either from an application or by the kernel in 97 * This message can be sent either from an application or by the kernel in
170 * response to an application generated LISTDEF message. When sent by an 98 * response to an application generated LISTDEF message. When sent by an
171 * application there is no payload. The kernel should respond to a 99 * application there is no payload. On success the kernel should send a
172 * LISTDEF message either with a LISTDEF message on success or an ACK message 100 * response using the following format.
173 * on failure.
174 *
175 * +-------------------------+-------------- ---- --- -- -
176 * | protocol type (32 bits) | mapping data ... repeated
177 * +-------------------------+-------------- ---- --- -- -
178 * 101 *
179 * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) 102 * Required attributes:
180 * mapping data: specific to the map type (see below)
181 * 103 *
182 * NETLBL_NLTYPE_UNLABELED 104 * NLBL_MGMT_A_PROTOCOL
183 * 105 *
184 * No mapping data for this protocol type. 106 * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
185 * 107 *
186 * NETLBL_NLTYPE_CIPSOV4 108 * NLBL_MGMT_A_CV4DOI
187 * 109 *
188 * +----------------+---------------+ 110 * If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
189 * | type (32 bits) | doi (32 bits) |
190 * +----------------+---------------+
191 * 111 *
192 * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header 112 * o PROTOCOLS:
193 * as CIPSO_V4_MAP_*) 113 * Sent by an application to request a list of configured NetLabel protocols
194 * doi: the CIPSO DOI value 114 * in the kernel. When sent by an application there is no payload and the
115 * NLM_F_DUMP flag should be set. The kernel should respond with a series of
116 * the following messages.
195 * 117 *
196 * o MODULES: 118 * Required attributes:
197 * Sent by an application to request a list of configured NetLabel modules
198 * in the kernel. When sent by an application there is no payload.
199 * 119 *
200 * +-------------------+ 120 * NLBL_MGMT_A_PROTOCOL
201 * | modules (32 bits) | ...
202 * +-------------------+
203 *
204 * modules: the number of modules in the message, if this is an application
205 * generated message and the value is zero then return a list of
206 * the configured modules
207 *
208 * +------------------+
209 * | module (32 bits) | ... repeated
210 * +------------------+
211 *
212 * module: the module number as defined by NETLBL_NLTYPE_*
213 * 121 *
214 * o VERSION: 122 * o VERSION:
215 * Sent by an application to request the NetLabel version string. When sent 123 * Sent by an application to request the NetLabel version. When sent by an
216 * by an application there is no payload. This message type is also used by 124 * application there is no payload. This message type is also used by the
217 * the kernel to respond to an VERSION request. 125 * kernel to respond to an VERSION request.
218 * 126 *
219 * +-------------------+ 127 * Required attributes:
220 * | version (32 bits) |
221 * +-------------------+
222 * 128 *
223 * version: the protocol version number 129 * NLBL_MGMT_A_VERSION
224 * 130 *
225 */ 131 */
226 132
227/* NetLabel Management commands */ 133/* NetLabel Management commands */
228enum { 134enum {
229 NLBL_MGMT_C_UNSPEC, 135 NLBL_MGMT_C_UNSPEC,
230 NLBL_MGMT_C_ACK,
231 NLBL_MGMT_C_ADD, 136 NLBL_MGMT_C_ADD,
232 NLBL_MGMT_C_REMOVE, 137 NLBL_MGMT_C_REMOVE,
233 NLBL_MGMT_C_LIST, 138 NLBL_MGMT_C_LISTALL,
234 NLBL_MGMT_C_ADDDEF, 139 NLBL_MGMT_C_ADDDEF,
235 NLBL_MGMT_C_REMOVEDEF, 140 NLBL_MGMT_C_REMOVEDEF,
236 NLBL_MGMT_C_LISTDEF, 141 NLBL_MGMT_C_LISTDEF,
237 NLBL_MGMT_C_MODULES, 142 NLBL_MGMT_C_PROTOCOLS,
238 NLBL_MGMT_C_VERSION, 143 NLBL_MGMT_C_VERSION,
239 __NLBL_MGMT_C_MAX, 144 __NLBL_MGMT_C_MAX,
240}; 145};
241#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1) 146#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
242 147
148/* NetLabel Management attributes */
149enum {
150 NLBL_MGMT_A_UNSPEC,
151 NLBL_MGMT_A_DOMAIN,
152 /* (NLA_NUL_STRING)
153 * the NULL terminated LSM domain string */
154 NLBL_MGMT_A_PROTOCOL,
155 /* (NLA_U32)
156 * the NetLabel protocol type (defined by NETLBL_NLTYPE_*) */
157 NLBL_MGMT_A_VERSION,
158 /* (NLA_U32)
159 * the NetLabel protocol version number (defined by
160 * NETLBL_PROTO_VERSION) */
161 NLBL_MGMT_A_CV4DOI,
162 /* (NLA_U32)
163 * the CIPSOv4 DOI value */
164 __NLBL_MGMT_A_MAX,
165};
166#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1)
167
243/* NetLabel protocol functions */ 168/* NetLabel protocol functions */
244int netlbl_mgmt_genl_init(void); 169int netlbl_mgmt_genl_init(void);
245 170
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 785f4960e0d3..440f5c4e1e2d 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -55,9 +55,13 @@ static struct genl_family netlbl_unlabel_gnl_family = {
55 .hdrsize = 0, 55 .hdrsize = 0,
56 .name = NETLBL_NLTYPE_UNLABELED_NAME, 56 .name = NETLBL_NLTYPE_UNLABELED_NAME,
57 .version = NETLBL_PROTO_VERSION, 57 .version = NETLBL_PROTO_VERSION,
58 .maxattr = 0, 58 .maxattr = NLBL_UNLABEL_A_MAX,
59}; 59};
60 60
61/* NetLabel Netlink attribute policy */
62static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
63 [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 },
64};
61 65
62/* 66/*
63 * NetLabel Command Handlers 67 * NetLabel Command Handlers
@@ -75,31 +79,18 @@ static struct genl_family netlbl_unlabel_gnl_family = {
75 */ 79 */
76static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) 80static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
77{ 81{
78 int ret_val; 82 int ret_val = -EINVAL;
79 struct nlattr *data = netlbl_netlink_payload_data(skb); 83 u8 value;
80 u32 value;
81
82 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
83 if (ret_val != 0)
84 return ret_val;
85 84
86 if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) { 85 if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
87 value = nla_get_u32(data); 86 value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
88 if (value == 1 || value == 0) { 87 if (value == 1 || value == 0) {
89 atomic_set(&netlabel_unlabel_accept_flg, value); 88 atomic_set(&netlabel_unlabel_accept_flg, value);
90 netlbl_netlink_send_ack(info, 89 ret_val = 0;
91 netlbl_unlabel_gnl_family.id,
92 NLBL_UNLABEL_C_ACK,
93 NETLBL_E_OK);
94 return 0;
95 } 90 }
96 } 91 }
97 92
98 netlbl_netlink_send_ack(info, 93 return ret_val;
99 netlbl_unlabel_gnl_family.id,
100 NLBL_UNLABEL_C_ACK,
101 EINVAL);
102 return -EINVAL;
103} 94}
104 95
105/** 96/**
@@ -114,39 +105,39 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
114 */ 105 */
115static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) 106static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
116{ 107{
117 int ret_val = -ENOMEM; 108 int ret_val = -EINVAL;
118 struct sk_buff *ans_skb; 109 struct sk_buff *ans_skb;
110 void *data;
119 111
120 ans_skb = netlbl_netlink_alloc_skb(0, 112 ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
121 GENL_HDRLEN + NETLBL_LEN_U32,
122 GFP_KERNEL);
123 if (ans_skb == NULL) 113 if (ans_skb == NULL)
124 goto list_failure; 114 goto list_failure;
125 115 data = netlbl_netlink_hdr_put(ans_skb,
126 if (netlbl_netlink_hdr_put(ans_skb, 116 info->snd_pid,
127 info->snd_pid, 117 info->snd_seq,
128 0, 118 netlbl_unlabel_gnl_family.id,
129 netlbl_unlabel_gnl_family.id, 119 0,
130 NLBL_UNLABEL_C_LIST) == NULL) 120 NLBL_UNLABEL_C_LIST);
121 if (data == NULL) {
122 ret_val = -ENOMEM;
131 goto list_failure; 123 goto list_failure;
124 }
132 125
133 ret_val = nla_put_u32(ans_skb, 126 ret_val = nla_put_u8(ans_skb,
134 NLA_U32, 127 NLBL_UNLABEL_A_ACPTFLG,
135 atomic_read(&netlabel_unlabel_accept_flg)); 128 atomic_read(&netlabel_unlabel_accept_flg));
136 if (ret_val != 0) 129 if (ret_val != 0)
137 goto list_failure; 130 goto list_failure;
138 131
139 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); 132 genlmsg_end(ans_skb, data);
133
134 ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
140 if (ret_val != 0) 135 if (ret_val != 0)
141 goto list_failure; 136 goto list_failure;
142
143 return 0; 137 return 0;
144 138
145list_failure: 139list_failure:
146 netlbl_netlink_send_ack(info, 140 kfree(ans_skb);
147 netlbl_unlabel_gnl_family.id,
148 NLBL_UNLABEL_C_ACK,
149 -ret_val);
150 return ret_val; 141 return ret_val;
151} 142}
152 143
@@ -157,7 +148,8 @@ list_failure:
157 148
158static struct genl_ops netlbl_unlabel_genl_c_accept = { 149static struct genl_ops netlbl_unlabel_genl_c_accept = {
159 .cmd = NLBL_UNLABEL_C_ACCEPT, 150 .cmd = NLBL_UNLABEL_C_ACCEPT,
160 .flags = 0, 151 .flags = GENL_ADMIN_PERM,
152 .policy = netlbl_unlabel_genl_policy,
161 .doit = netlbl_unlabel_accept, 153 .doit = netlbl_unlabel_accept,
162 .dumpit = NULL, 154 .dumpit = NULL,
163}; 155};
@@ -165,6 +157,7 @@ static struct genl_ops netlbl_unlabel_genl_c_accept = {
165static struct genl_ops netlbl_unlabel_genl_c_list = { 157static struct genl_ops netlbl_unlabel_genl_c_list = {
166 .cmd = NLBL_UNLABEL_C_LIST, 158 .cmd = NLBL_UNLABEL_C_LIST,
167 .flags = 0, 159 .flags = 0,
160 .policy = netlbl_unlabel_genl_policy,
168 .doit = netlbl_unlabel_list, 161 .doit = netlbl_unlabel_list,
169 .dumpit = NULL, 162 .dumpit = NULL,
170}; 163};
@@ -218,10 +211,8 @@ int netlbl_unlabel_genl_init(void)
218 */ 211 */
219int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) 212int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
220{ 213{
221 if (atomic_read(&netlabel_unlabel_accept_flg) == 1) { 214 if (atomic_read(&netlabel_unlabel_accept_flg) == 1)
222 memset(secattr, 0, sizeof(*secattr)); 215 return netlbl_secattr_init(secattr);
223 return 0;
224 }
225 216
226 return -ENOMSG; 217 return -ENOMSG;
227} 218}
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index f300e54e14b6..c2917fbb42cf 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -36,56 +36,47 @@
36/* 36/*
37 * The following NetLabel payloads are supported by the Unlabeled subsystem. 37 * The following NetLabel payloads are supported by the Unlabeled subsystem.
38 * 38 *
39 * o ACK:
40 * Sent by the kernel in response to an applications message, applications
41 * should never send this message.
42 *
43 * +----------------------+-----------------------+
44 * | seq number (32 bits) | return code (32 bits) |
45 * +----------------------+-----------------------+
46 *
47 * seq number: the sequence number of the original message, taken from the
48 * nlmsghdr structure
49 * return code: return value, based on errno values
50 *
51 * o ACCEPT 39 * o ACCEPT
52 * This message is sent from an application to specify if the kernel should 40 * This message is sent from an application to specify if the kernel should
53 * allow unlabled packets to pass if they do not match any of the static 41 * allow unlabled packets to pass if they do not match any of the static
54 * mappings defined in the unlabeled module. 42 * mappings defined in the unlabeled module.
55 * 43 *
56 * +-----------------+ 44 * Required attributes:
57 * | allow (32 bits) |
58 * +-----------------+
59 * 45 *
60 * allow: if true (1) then allow the packets to pass, if false (0) then 46 * NLBL_UNLABEL_A_ACPTFLG
61 * reject the packets
62 * 47 *
63 * o LIST 48 * o LIST
64 * This message can be sent either from an application or by the kernel in 49 * This message can be sent either from an application or by the kernel in
65 * response to an application generated LIST message. When sent by an 50 * response to an application generated LIST message. When sent by an
66 * application there is no payload. The kernel should respond to a LIST 51 * application there is no payload. The kernel should respond to a LIST
67 * message either with a LIST message on success or an ACK message on 52 * message with a LIST message on success.
68 * failure.
69 * 53 *
70 * +-----------------------+ 54 * Required attributes:
71 * | accept flag (32 bits) |
72 * +-----------------------+
73 * 55 *
74 * accept flag: if true (1) then unlabeled packets are allowed to pass, 56 * NLBL_UNLABEL_A_ACPTFLG
75 * if false (0) then unlabeled packets are rejected
76 * 57 *
77 */ 58 */
78 59
79/* NetLabel Unlabeled commands */ 60/* NetLabel Unlabeled commands */
80enum { 61enum {
81 NLBL_UNLABEL_C_UNSPEC, 62 NLBL_UNLABEL_C_UNSPEC,
82 NLBL_UNLABEL_C_ACK,
83 NLBL_UNLABEL_C_ACCEPT, 63 NLBL_UNLABEL_C_ACCEPT,
84 NLBL_UNLABEL_C_LIST, 64 NLBL_UNLABEL_C_LIST,
85 __NLBL_UNLABEL_C_MAX, 65 __NLBL_UNLABEL_C_MAX,
86}; 66};
87#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) 67#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
88 68
69/* NetLabel Unlabeled attributes */
70enum {
71 NLBL_UNLABEL_A_UNSPEC,
72 NLBL_UNLABEL_A_ACPTFLG,
73 /* (NLA_U8)
74 * if true then unlabeled packets are allowed to pass, else unlabeled
75 * packets are rejected */
76 __NLBL_UNLABEL_A_MAX,
77};
78#define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1)
79
89/* NetLabel protocol functions */ 80/* NetLabel protocol functions */
90int netlbl_unlabel_genl_init(void); 81int netlbl_unlabel_genl_init(void);
91 82
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 73cbe66e42ff..eeb7d768d2bb 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -74,85 +74,3 @@ int netlbl_netlink_init(void)
74 74
75 return 0; 75 return 0;
76} 76}
77
78/*
79 * NetLabel Common Protocol Functions
80 */
81
82/**
83 * netlbl_netlink_send_ack - Send an ACK message
84 * @info: the generic NETLINK information
85 * @genl_family: the generic NETLINK family ID value
86 * @ack_cmd: the generic NETLINK family ACK command value
87 * @ret_code: return code to use
88 *
89 * Description:
90 * This function sends an ACK message to the sender of the NETLINK message
91 * specified by @info.
92 *
93 */
94void netlbl_netlink_send_ack(const struct genl_info *info,
95 u32 genl_family,
96 u8 ack_cmd,
97 u32 ret_code)
98{
99 size_t data_size;
100 struct sk_buff *skb;
101
102 data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32;
103 skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
104 if (skb == NULL)
105 return;
106
107 if (netlbl_netlink_hdr_put(skb,
108 info->snd_pid,
109 0,
110 genl_family,
111 ack_cmd) == NULL)
112 goto send_ack_failure;
113
114 if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0)
115 goto send_ack_failure;
116 if (nla_put_u32(skb, NLA_U32, ret_code) != 0)
117 goto send_ack_failure;
118
119 netlbl_netlink_snd(skb, info->snd_pid);
120 return;
121
122send_ack_failure:
123 kfree_skb(skb);
124}
125
126/*
127 * NETLINK I/O Functions
128 */
129
130/**
131 * netlbl_netlink_snd - Send a NetLabel message
132 * @skb: NetLabel message
133 * @pid: destination PID
134 *
135 * Description:
136 * Sends a unicast NetLabel message over the NETLINK socket.
137 *
138 */
139int netlbl_netlink_snd(struct sk_buff *skb, u32 pid)
140{
141 return genlmsg_unicast(skb, pid);
142}
143
144/**
145 * netlbl_netlink_snd - Send a NetLabel message
146 * @skb: NetLabel message
147 * @pid: sending PID
148 * @group: multicast group id
149 *
150 * Description:
151 * Sends a multicast NetLabel message over the NETLINK socket to all members
152 * of @group except @pid.
153 *
154 */
155int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group)
156{
157 return genlmsg_multicast(skb, pid, group, GFP_KERNEL);
158}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 385a6c7488c6..3f9386b917df 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -41,72 +41,6 @@
41/* NetLabel NETLINK helper functions */ 41/* NetLabel NETLINK helper functions */
42 42
43/** 43/**
44 * netlbl_netlink_cap_check - Check the NETLINK msg capabilities
45 * @skb: the NETLINK buffer
46 * @req_cap: the required capability
47 *
48 * Description:
49 * Check the NETLINK buffer's capabilities against the required capabilities.
50 * Returns zero on success, negative values on failure.
51 *
52 */
53static inline int netlbl_netlink_cap_check(const struct sk_buff *skb,
54 kernel_cap_t req_cap)
55{
56 if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap))
57 return 0;
58 return -EPERM;
59}
60
61/**
62 * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on
63 * @nla: the attribute
64 * @rem_len: remaining length
65 *
66 * Description:
67 * Return a u8 value pointed to by @nla and advance it to the next attribute.
68 *
69 */
70static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len)
71{
72 u8 val = nla_get_u8(*nla);
73 *nla = nla_next(*nla, rem_len);
74 return val;
75}
76
77/**
78 * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on
79 * @nla: the attribute
80 * @rem_len: remaining length
81 *
82 * Description:
83 * Return a u16 value pointed to by @nla and advance it to the next attribute.
84 *
85 */
86static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len)
87{
88 u16 val = nla_get_u16(*nla);
89 *nla = nla_next(*nla, rem_len);
90 return val;
91}
92
93/**
94 * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on
95 * @nla: the attribute
96 * @rem_len: remaining length
97 *
98 * Description:
99 * Return a u32 value pointed to by @nla and advance it to the next attribute.
100 *
101 */
102static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len)
103{
104 u32 val = nla_get_u32(*nla);
105 *nla = nla_next(*nla, rem_len);
106 return val;
107}
108
109/**
110 * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff 44 * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
111 * @skb: the packet 45 * @skb: the packet
112 * @pid: the PID of the receipient 46 * @pid: the PID of the receipient
@@ -124,6 +58,7 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
124 u32 pid, 58 u32 pid,
125 u32 seq, 59 u32 seq,
126 int type, 60 int type,
61 int flags,
127 u8 cmd) 62 u8 cmd)
128{ 63{
129 return genlmsg_put(skb, 64 return genlmsg_put(skb,
@@ -131,85 +66,13 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
131 seq, 66 seq,
132 type, 67 type,
133 0, 68 0,
134 0, 69 flags,
135 cmd, 70 cmd,
136 NETLBL_PROTO_VERSION); 71 NETLBL_PROTO_VERSION);
137} 72}
138 73
139/**
140 * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff
141 * @skb: the packet
142 * @pid: the PID of the receipient
143 * @seq: the sequence number
144 * @type: the generic NETLINK message family type
145 * @cmd: command
146 *
147 * Description:
148 * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
149 * struct to the packet.
150 *
151 */
152static inline void netlbl_netlink_hdr_push(struct sk_buff *skb,
153 u32 pid,
154 u32 seq,
155 int type,
156 u8 cmd)
157
158{
159 struct nlmsghdr *nlh;
160 struct genlmsghdr *hdr;
161
162 nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN));
163 nlh->nlmsg_type = type;
164 nlh->nlmsg_len = skb->len;
165 nlh->nlmsg_flags = 0;
166 nlh->nlmsg_pid = pid;
167 nlh->nlmsg_seq = seq;
168
169 hdr = nlmsg_data(nlh);
170 hdr->cmd = cmd;
171 hdr->version = NETLBL_PROTO_VERSION;
172 hdr->reserved = 0;
173}
174
175/**
176 * netlbl_netlink_payload_len - Return the length of the payload
177 * @skb: the NETLINK buffer
178 *
179 * Description:
180 * This function returns the length of the NetLabel payload.
181 *
182 */
183static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb)
184{
185 return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN;
186}
187
188/**
189 * netlbl_netlink_payload_data - Returns a pointer to the start of the payload
190 * @skb: the NETLINK buffer
191 *
192 * Description:
193 * This function returns a pointer to the start of the NetLabel payload.
194 *
195 */
196static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb)
197{
198 return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) +
199 GENL_HDRLEN;
200}
201
202/* NetLabel common protocol functions */
203
204void netlbl_netlink_send_ack(const struct genl_info *info,
205 u32 genl_family,
206 u8 ack_cmd,
207 u32 ret_code);
208
209/* NetLabel NETLINK I/O functions */ 74/* NetLabel NETLINK I/O functions */
210 75
211int netlbl_netlink_init(void); 76int netlbl_netlink_init(void);
212int netlbl_netlink_snd(struct sk_buff *skb, u32 pid);
213int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group);
214 77
215#endif 78#endif
diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
index 814ddc42f1f4..293dbd6246c1 100644
--- a/security/selinux/Kconfig
+++ b/security/selinux/Kconfig
@@ -124,3 +124,40 @@ config SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT
124 124
125 If you are unsure what do do here, select N. 125 If you are unsure what do do here, select N.
126 126
127config SECURITY_SELINUX_POLICYDB_VERSION_MAX
128 bool "NSA SELinux maximum supported policy format version"
129 depends on SECURITY_SELINUX
130 default n
131 help
132 This option enables the maximum policy format version supported
133 by SELinux to be set to a particular value. This value is reported
134 to userspace via /selinux/policyvers and used at policy load time.
135 It can be adjusted downward to support legacy userland (init) that
136 does not correctly handle kernels that support newer policy versions.
137
138 Examples:
139 For the Fedora Core 3 or 4 Linux distributions, enable this option
140 and set the value via the next option. For Fedore Core 5 and later,
141 do not enable this option.
142
143 If you are unsure how to answer this question, answer N.
144
145config SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
146 int "NSA SELinux maximum supported policy format version value"
147 depends on SECURITY_SELINUX_POLICYDB_VERSION_MAX
148 range 15 21
149 default 19
150 help
151 This option sets the value for the maximum policy format version
152 supported by SELinux.
153
154 Examples:
155 For Fedora Core 3, use 18.
156 For Fedora Core 4, use 19.
157
158 If you are unsure how to answer this question, look for the
159 policy format version supported by your policy toolchain, by
160 running 'checkpolicy -V'. Or look at what policy you have
161 installed under /etc/selinux/$SELINUXTYPE/policy, where
162 SELINUXTYPE is defined in your /etc/selinux/config.
163
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index 9d7737db5e51..b6f96943be1f 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -21,19 +21,10 @@
21#include "security.h" 21#include "security.h"
22#include "objsec.h" 22#include "objsec.h"
23 23
24void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) 24int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
25{ 25{
26 struct task_security_struct *tsec = tsk->security;
27 if (selinux_enabled) 26 if (selinux_enabled)
28 *ctxid = tsec->sid; 27 return security_sid_to_context(sid, ctx, ctxlen);
29 else
30 *ctxid = 0;
31}
32
33int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
34{
35 if (selinux_enabled)
36 return security_sid_to_context(ctxid, ctx, ctxlen);
37 else { 28 else {
38 *ctx = NULL; 29 *ctx = NULL;
39 *ctxlen = 0; 30 *ctxlen = 0;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5a66c4c09f7a..e4d81a42fca4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -51,7 +51,6 @@
51#include <net/ip.h> /* for sysctl_local_port_range[] */ 51#include <net/ip.h> /* for sysctl_local_port_range[] */
52#include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */ 52#include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <asm/semaphore.h>
55#include <asm/ioctls.h> 54#include <asm/ioctls.h>
56#include <linux/bitops.h> 55#include <linux/bitops.h>
57#include <linux/interrupt.h> 56#include <linux/interrupt.h>
@@ -71,6 +70,7 @@
71#include <linux/audit.h> 70#include <linux/audit.h>
72#include <linux/string.h> 71#include <linux/string.h>
73#include <linux/selinux.h> 72#include <linux/selinux.h>
73#include <linux/mutex.h>
74 74
75#include "avc.h" 75#include "avc.h"
76#include "objsec.h" 76#include "objsec.h"
@@ -185,7 +185,7 @@ static int inode_alloc_security(struct inode *inode)
185 return -ENOMEM; 185 return -ENOMEM;
186 186
187 memset(isec, 0, sizeof(*isec)); 187 memset(isec, 0, sizeof(*isec));
188 init_MUTEX(&isec->sem); 188 mutex_init(&isec->lock);
189 INIT_LIST_HEAD(&isec->list); 189 INIT_LIST_HEAD(&isec->list);
190 isec->inode = inode; 190 isec->inode = inode;
191 isec->sid = SECINITSID_UNLABELED; 191 isec->sid = SECINITSID_UNLABELED;
@@ -242,7 +242,7 @@ static int superblock_alloc_security(struct super_block *sb)
242 if (!sbsec) 242 if (!sbsec)
243 return -ENOMEM; 243 return -ENOMEM;
244 244
245 init_MUTEX(&sbsec->sem); 245 mutex_init(&sbsec->lock);
246 INIT_LIST_HEAD(&sbsec->list); 246 INIT_LIST_HEAD(&sbsec->list);
247 INIT_LIST_HEAD(&sbsec->isec_head); 247 INIT_LIST_HEAD(&sbsec->isec_head);
248 spin_lock_init(&sbsec->isec_lock); 248 spin_lock_init(&sbsec->isec_lock);
@@ -594,7 +594,7 @@ static int superblock_doinit(struct super_block *sb, void *data)
594 struct inode *inode = root->d_inode; 594 struct inode *inode = root->d_inode;
595 int rc = 0; 595 int rc = 0;
596 596
597 down(&sbsec->sem); 597 mutex_lock(&sbsec->lock);
598 if (sbsec->initialized) 598 if (sbsec->initialized)
599 goto out; 599 goto out;
600 600
@@ -689,7 +689,7 @@ next_inode:
689 } 689 }
690 spin_unlock(&sbsec->isec_lock); 690 spin_unlock(&sbsec->isec_lock);
691out: 691out:
692 up(&sbsec->sem); 692 mutex_unlock(&sbsec->lock);
693 return rc; 693 return rc;
694} 694}
695 695
@@ -843,15 +843,13 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
843 char *context = NULL; 843 char *context = NULL;
844 unsigned len = 0; 844 unsigned len = 0;
845 int rc = 0; 845 int rc = 0;
846 int hold_sem = 0;
847 846
848 if (isec->initialized) 847 if (isec->initialized)
849 goto out; 848 goto out;
850 849
851 down(&isec->sem); 850 mutex_lock(&isec->lock);
852 hold_sem = 1;
853 if (isec->initialized) 851 if (isec->initialized)
854 goto out; 852 goto out_unlock;
855 853
856 sbsec = inode->i_sb->s_security; 854 sbsec = inode->i_sb->s_security;
857 if (!sbsec->initialized) { 855 if (!sbsec->initialized) {
@@ -862,7 +860,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
862 if (list_empty(&isec->list)) 860 if (list_empty(&isec->list))
863 list_add(&isec->list, &sbsec->isec_head); 861 list_add(&isec->list, &sbsec->isec_head);
864 spin_unlock(&sbsec->isec_lock); 862 spin_unlock(&sbsec->isec_lock);
865 goto out; 863 goto out_unlock;
866 } 864 }
867 865
868 switch (sbsec->behavior) { 866 switch (sbsec->behavior) {
@@ -885,7 +883,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
885 printk(KERN_WARNING "%s: no dentry for dev=%s " 883 printk(KERN_WARNING "%s: no dentry for dev=%s "
886 "ino=%ld\n", __FUNCTION__, inode->i_sb->s_id, 884 "ino=%ld\n", __FUNCTION__, inode->i_sb->s_id,
887 inode->i_ino); 885 inode->i_ino);
888 goto out; 886 goto out_unlock;
889 } 887 }
890 888
891 len = INITCONTEXTLEN; 889 len = INITCONTEXTLEN;
@@ -893,7 +891,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
893 if (!context) { 891 if (!context) {
894 rc = -ENOMEM; 892 rc = -ENOMEM;
895 dput(dentry); 893 dput(dentry);
896 goto out; 894 goto out_unlock;
897 } 895 }
898 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, 896 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
899 context, len); 897 context, len);
@@ -903,7 +901,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
903 NULL, 0); 901 NULL, 0);
904 if (rc < 0) { 902 if (rc < 0) {
905 dput(dentry); 903 dput(dentry);
906 goto out; 904 goto out_unlock;
907 } 905 }
908 kfree(context); 906 kfree(context);
909 len = rc; 907 len = rc;
@@ -911,7 +909,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
911 if (!context) { 909 if (!context) {
912 rc = -ENOMEM; 910 rc = -ENOMEM;
913 dput(dentry); 911 dput(dentry);
914 goto out; 912 goto out_unlock;
915 } 913 }
916 rc = inode->i_op->getxattr(dentry, 914 rc = inode->i_op->getxattr(dentry,
917 XATTR_NAME_SELINUX, 915 XATTR_NAME_SELINUX,
@@ -924,7 +922,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
924 "%d for dev=%s ino=%ld\n", __FUNCTION__, 922 "%d for dev=%s ino=%ld\n", __FUNCTION__,
925 -rc, inode->i_sb->s_id, inode->i_ino); 923 -rc, inode->i_sb->s_id, inode->i_ino);
926 kfree(context); 924 kfree(context);
927 goto out; 925 goto out_unlock;
928 } 926 }
929 /* Map ENODATA to the default file SID */ 927 /* Map ENODATA to the default file SID */
930 sid = sbsec->def_sid; 928 sid = sbsec->def_sid;
@@ -960,7 +958,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
960 isec->sclass, 958 isec->sclass,
961 &sid); 959 &sid);
962 if (rc) 960 if (rc)
963 goto out; 961 goto out_unlock;
964 isec->sid = sid; 962 isec->sid = sid;
965 break; 963 break;
966 case SECURITY_FS_USE_MNTPOINT: 964 case SECURITY_FS_USE_MNTPOINT:
@@ -978,7 +976,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
978 isec->sclass, 976 isec->sclass,
979 &sid); 977 &sid);
980 if (rc) 978 if (rc)
981 goto out; 979 goto out_unlock;
982 isec->sid = sid; 980 isec->sid = sid;
983 } 981 }
984 } 982 }
@@ -987,12 +985,11 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
987 985
988 isec->initialized = 1; 986 isec->initialized = 1;
989 987
988out_unlock:
989 mutex_unlock(&isec->lock);
990out: 990out:
991 if (isec->sclass == SECCLASS_FILE) 991 if (isec->sclass == SECCLASS_FILE)
992 isec->sclass = inode_mode_to_security_class(inode->i_mode); 992 isec->sclass = inode_mode_to_security_class(inode->i_mode);
993
994 if (hold_sem)
995 up(&isec->sem);
996 return rc; 993 return rc;
997} 994}
998 995
@@ -1364,25 +1361,6 @@ static inline u32 file_to_av(struct file *file)
1364 return av; 1361 return av;
1365} 1362}
1366 1363
1367/* Set an inode's SID to a specified value. */
1368static int inode_security_set_sid(struct inode *inode, u32 sid)
1369{
1370 struct inode_security_struct *isec = inode->i_security;
1371 struct superblock_security_struct *sbsec = inode->i_sb->s_security;
1372
1373 if (!sbsec->initialized) {
1374 /* Defer initialization to selinux_complete_init. */
1375 return 0;
1376 }
1377
1378 down(&isec->sem);
1379 isec->sclass = inode_mode_to_security_class(inode->i_mode);
1380 isec->sid = sid;
1381 isec->initialized = 1;
1382 up(&isec->sem);
1383 return 0;
1384}
1385
1386/* Hook functions begin here. */ 1364/* Hook functions begin here. */
1387 1365
1388static int selinux_ptrace(struct task_struct *parent, struct task_struct *child) 1366static int selinux_ptrace(struct task_struct *parent, struct task_struct *child)
@@ -1711,10 +1689,12 @@ static inline void flush_unauthorized_files(struct files_struct * files)
1711{ 1689{
1712 struct avc_audit_data ad; 1690 struct avc_audit_data ad;
1713 struct file *file, *devnull = NULL; 1691 struct file *file, *devnull = NULL;
1714 struct tty_struct *tty = current->signal->tty; 1692 struct tty_struct *tty;
1715 struct fdtable *fdt; 1693 struct fdtable *fdt;
1716 long j = -1; 1694 long j = -1;
1717 1695
1696 mutex_lock(&tty_mutex);
1697 tty = current->signal->tty;
1718 if (tty) { 1698 if (tty) {
1719 file_list_lock(); 1699 file_list_lock();
1720 file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list); 1700 file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
@@ -1734,6 +1714,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
1734 } 1714 }
1735 file_list_unlock(); 1715 file_list_unlock();
1736 } 1716 }
1717 mutex_unlock(&tty_mutex);
1737 1718
1738 /* Revalidate access to inherited open files. */ 1719 /* Revalidate access to inherited open files. */
1739 1720
@@ -2091,7 +2072,13 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
2091 } 2072 }
2092 } 2073 }
2093 2074
2094 inode_security_set_sid(inode, newsid); 2075 /* Possibly defer initialization to selinux_complete_init. */
2076 if (sbsec->initialized) {
2077 struct inode_security_struct *isec = inode->i_security;
2078 isec->sclass = inode_mode_to_security_class(inode->i_mode);
2079 isec->sid = newsid;
2080 isec->initialized = 1;
2081 }
2095 2082
2096 if (!ss_initialized || sbsec->behavior == SECURITY_FS_USE_MNTPOINT) 2083 if (!ss_initialized || sbsec->behavior == SECURITY_FS_USE_MNTPOINT)
2097 return -EOPNOTSUPP; 2084 return -EOPNOTSUPP;
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 0a39bfd1319f..ef2267fea8bd 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -44,7 +44,7 @@ struct inode_security_struct {
44 u32 sid; /* SID of this object */ 44 u32 sid; /* SID of this object */
45 u16 sclass; /* security class of this object */ 45 u16 sclass; /* security class of this object */
46 unsigned char initialized; /* initialization flag */ 46 unsigned char initialized; /* initialization flag */
47 struct semaphore sem; 47 struct mutex lock;
48 unsigned char inherit; /* inherit SID from parent entry */ 48 unsigned char inherit; /* inherit SID from parent entry */
49}; 49};
50 50
@@ -63,7 +63,7 @@ struct superblock_security_struct {
63 unsigned int behavior; /* labeling behavior */ 63 unsigned int behavior; /* labeling behavior */
64 unsigned char initialized; /* initialization flag */ 64 unsigned char initialized; /* initialization flag */
65 unsigned char proc; /* proc fs */ 65 unsigned char proc; /* proc fs */
66 struct semaphore sem; 66 struct mutex lock;
67 struct list_head isec_head; 67 struct list_head isec_head;
68 spinlock_t isec_lock; 68 spinlock_t isec_lock;
69}; 69};
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 911954a692fa..1ef79172cc8c 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -24,10 +24,15 @@
24#define POLICYDB_VERSION_VALIDATETRANS 19 24#define POLICYDB_VERSION_VALIDATETRANS 19
25#define POLICYDB_VERSION_MLS 19 25#define POLICYDB_VERSION_MLS 19
26#define POLICYDB_VERSION_AVTAB 20 26#define POLICYDB_VERSION_AVTAB 20
27#define POLICYDB_VERSION_RANGETRANS 21
27 28
28/* Range of policy versions we understand*/ 29/* Range of policy versions we understand*/
29#define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE 30#define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE
30#define POLICYDB_VERSION_MAX POLICYDB_VERSION_AVTAB 31#ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
32#define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
33#else
34#define POLICYDB_VERSION_MAX POLICYDB_VERSION_RANGETRANS
35#endif
31 36
32extern int selinux_enabled; 37extern int selinux_enabled;
33extern int selinux_mls_enabled; 38extern int selinux_mls_enabled;
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 119bd6078ba1..c713af23250a 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -530,22 +530,21 @@ int mls_compute_sid(struct context *scontext,
530 u32 specified, 530 u32 specified,
531 struct context *newcontext) 531 struct context *newcontext)
532{ 532{
533 struct range_trans *rtr;
534
533 if (!selinux_mls_enabled) 535 if (!selinux_mls_enabled)
534 return 0; 536 return 0;
535 537
536 switch (specified) { 538 switch (specified) {
537 case AVTAB_TRANSITION: 539 case AVTAB_TRANSITION:
538 if (tclass == SECCLASS_PROCESS) { 540 /* Look for a range transition rule. */
539 struct range_trans *rangetr; 541 for (rtr = policydb.range_tr; rtr; rtr = rtr->next) {
540 /* Look for a range transition rule. */ 542 if (rtr->source_type == scontext->type &&
541 for (rangetr = policydb.range_tr; rangetr; 543 rtr->target_type == tcontext->type &&
542 rangetr = rangetr->next) { 544 rtr->target_class == tclass) {
543 if (rangetr->dom == scontext->type && 545 /* Set the range from the rule */
544 rangetr->type == tcontext->type) { 546 return mls_range_set(newcontext,
545 /* Set the range from the rule */ 547 &rtr->target_range);
546 return mls_range_set(newcontext,
547 &rangetr->range);
548 }
549 } 548 }
550 } 549 }
551 /* Fallthrough */ 550 /* Fallthrough */
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index f03960e697ce..b18895302555 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -96,6 +96,11 @@ static struct policydb_compat_info policydb_compat[] = {
96 .sym_num = SYM_NUM, 96 .sym_num = SYM_NUM,
97 .ocon_num = OCON_NUM, 97 .ocon_num = OCON_NUM,
98 }, 98 },
99 {
100 .version = POLICYDB_VERSION_RANGETRANS,
101 .sym_num = SYM_NUM,
102 .ocon_num = OCON_NUM,
103 },
99}; 104};
100 105
101static struct policydb_compat_info *policydb_lookup_compat(int version) 106static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -645,15 +650,15 @@ void policydb_destroy(struct policydb *p)
645 650
646 for (rt = p->range_tr; rt; rt = rt -> next) { 651 for (rt = p->range_tr; rt; rt = rt -> next) {
647 if (lrt) { 652 if (lrt) {
648 ebitmap_destroy(&lrt->range.level[0].cat); 653 ebitmap_destroy(&lrt->target_range.level[0].cat);
649 ebitmap_destroy(&lrt->range.level[1].cat); 654 ebitmap_destroy(&lrt->target_range.level[1].cat);
650 kfree(lrt); 655 kfree(lrt);
651 } 656 }
652 lrt = rt; 657 lrt = rt;
653 } 658 }
654 if (lrt) { 659 if (lrt) {
655 ebitmap_destroy(&lrt->range.level[0].cat); 660 ebitmap_destroy(&lrt->target_range.level[0].cat);
656 ebitmap_destroy(&lrt->range.level[1].cat); 661 ebitmap_destroy(&lrt->target_range.level[1].cat);
657 kfree(lrt); 662 kfree(lrt);
658 } 663 }
659 664
@@ -1829,6 +1834,7 @@ int policydb_read(struct policydb *p, void *fp)
1829 } 1834 }
1830 1835
1831 if (p->policyvers >= POLICYDB_VERSION_MLS) { 1836 if (p->policyvers >= POLICYDB_VERSION_MLS) {
1837 int new_rangetr = p->policyvers >= POLICYDB_VERSION_RANGETRANS;
1832 rc = next_entry(buf, fp, sizeof(u32)); 1838 rc = next_entry(buf, fp, sizeof(u32));
1833 if (rc < 0) 1839 if (rc < 0)
1834 goto bad; 1840 goto bad;
@@ -1847,9 +1853,16 @@ int policydb_read(struct policydb *p, void *fp)
1847 rc = next_entry(buf, fp, (sizeof(u32) * 2)); 1853 rc = next_entry(buf, fp, (sizeof(u32) * 2));
1848 if (rc < 0) 1854 if (rc < 0)
1849 goto bad; 1855 goto bad;
1850 rt->dom = le32_to_cpu(buf[0]); 1856 rt->source_type = le32_to_cpu(buf[0]);
1851 rt->type = le32_to_cpu(buf[1]); 1857 rt->target_type = le32_to_cpu(buf[1]);
1852 rc = mls_read_range_helper(&rt->range, fp); 1858 if (new_rangetr) {
1859 rc = next_entry(buf, fp, sizeof(u32));
1860 if (rc < 0)
1861 goto bad;
1862 rt->target_class = le32_to_cpu(buf[0]);
1863 } else
1864 rt->target_class = SECCLASS_PROCESS;
1865 rc = mls_read_range_helper(&rt->target_range, fp);
1853 if (rc) 1866 if (rc)
1854 goto bad; 1867 goto bad;
1855 lrt = rt; 1868 lrt = rt;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index b1340711f721..8319d5ff5944 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -106,9 +106,10 @@ struct cat_datum {
106}; 106};
107 107
108struct range_trans { 108struct range_trans {
109 u32 dom; /* current process domain */ 109 u32 source_type;
110 u32 type; /* program executable type */ 110 u32 target_type;
111 struct mls_range range; /* new range */ 111 u32 target_class;
112 struct mls_range target_range;
112 struct range_trans *next; 113 struct range_trans *next;
113}; 114};
114 115
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 7eb69a602d8f..0c219a1b3243 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2003,7 +2003,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr,
2003 return rc; 2003 return rc;
2004} 2004}
2005 2005
2006int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, 2006int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
2007 struct selinux_audit_rule *rule, 2007 struct selinux_audit_rule *rule,
2008 struct audit_context *actx) 2008 struct audit_context *actx)
2009{ 2009{
@@ -2026,11 +2026,11 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
2026 goto out; 2026 goto out;
2027 } 2027 }
2028 2028
2029 ctxt = sidtab_search(&sidtab, ctxid); 2029 ctxt = sidtab_search(&sidtab, sid);
2030 if (!ctxt) { 2030 if (!ctxt) {
2031 audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR, 2031 audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR,
2032 "selinux_audit_rule_match: unrecognized SID %d\n", 2032 "selinux_audit_rule_match: unrecognized SID %d\n",
2033 ctxid); 2033 sid);
2034 match = -ENOENT; 2034 match = -ENOENT;
2035 goto out; 2035 goto out;
2036 } 2036 }
@@ -2502,14 +2502,24 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
2502{ 2502{
2503 struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; 2503 struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
2504 struct sk_security_struct *sksec = sk->sk_security; 2504 struct sk_security_struct *sksec = sk->sk_security;
2505 struct netlbl_lsm_secattr secattr;
2506 u32 nlbl_peer_sid;
2505 2507
2506 sksec->sclass = isec->sclass; 2508 sksec->sclass = isec->sclass;
2507 2509
2508 if (sk->sk_family != PF_INET) 2510 if (sk->sk_family != PF_INET)
2509 return; 2511 return;
2510 2512
2513 netlbl_secattr_init(&secattr);
2514 if (netlbl_sock_getattr(sk, &secattr) == 0 &&
2515 selinux_netlbl_secattr_to_sid(NULL,
2516 &secattr,
2517 sksec->sid,
2518 &nlbl_peer_sid) == 0)
2519 sksec->peer_sid = nlbl_peer_sid;
2520 netlbl_secattr_destroy(&secattr, 0);
2521
2511 sksec->nlbl_state = NLBL_REQUIRE; 2522 sksec->nlbl_state = NLBL_REQUIRE;
2512 sksec->peer_sid = sksec->sid;
2513 2523
2514 /* Try to set the NetLabel on the socket to save time later, if we fail 2524 /* Try to set the NetLabel on the socket to save time later, if we fail
2515 * here we will pick up the pieces in later calls to 2525 * here we will pick up the pieces in later calls to
@@ -2568,7 +2578,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask)
2568 sock = SOCKET_I(inode); 2578 sock = SOCKET_I(inode);
2569 isec = inode->i_security; 2579 isec = inode->i_security;
2570 sksec = sock->sk->sk_security; 2580 sksec = sock->sk->sk_security;
2571 down(&isec->sem); 2581 mutex_lock(&isec->lock);
2572 if (unlikely(sksec->nlbl_state == NLBL_REQUIRE && 2582 if (unlikely(sksec->nlbl_state == NLBL_REQUIRE &&
2573 (mask & (MAY_WRITE | MAY_APPEND)))) { 2583 (mask & (MAY_WRITE | MAY_APPEND)))) {
2574 lock_sock(sock->sk); 2584 lock_sock(sock->sk);
@@ -2576,7 +2586,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask)
2576 release_sock(sock->sk); 2586 release_sock(sock->sk);
2577 } else 2587 } else
2578 rc = 0; 2588 rc = 0;
2579 up(&isec->sem); 2589 mutex_unlock(&isec->lock);
2580 2590
2581 return rc; 2591 return rc;
2582} 2592}
@@ -2601,7 +2611,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
2601 u32 netlbl_sid; 2611 u32 netlbl_sid;
2602 u32 recv_perm; 2612 u32 recv_perm;
2603 2613
2604 rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid); 2614 rc = selinux_netlbl_skbuff_getsid(skb, SECINITSID_NETMSG, &netlbl_sid);
2605 if (rc != 0) 2615 if (rc != 0)
2606 return rc; 2616 return rc;
2607 2617
@@ -2610,13 +2620,13 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
2610 2620
2611 switch (sksec->sclass) { 2621 switch (sksec->sclass) {
2612 case SECCLASS_UDP_SOCKET: 2622 case SECCLASS_UDP_SOCKET:
2613 recv_perm = UDP_SOCKET__RECV_MSG; 2623 recv_perm = UDP_SOCKET__RECVFROM;
2614 break; 2624 break;
2615 case SECCLASS_TCP_SOCKET: 2625 case SECCLASS_TCP_SOCKET:
2616 recv_perm = TCP_SOCKET__RECV_MSG; 2626 recv_perm = TCP_SOCKET__RECVFROM;
2617 break; 2627 break;
2618 default: 2628 default:
2619 recv_perm = RAWIP_SOCKET__RECV_MSG; 2629 recv_perm = RAWIP_SOCKET__RECVFROM;
2620 } 2630 }
2621 2631
2622 rc = avc_has_perm(sksec->sid, 2632 rc = avc_has_perm(sksec->sid,
diff --git a/sound/oss/au1550_ac97.c b/sound/oss/au1550_ac97.c
index 4cdb86252d67..219795171c71 100644
--- a/sound/oss/au1550_ac97.c
+++ b/sound/oss/au1550_ac97.c
@@ -719,8 +719,7 @@ prog_dmabuf_dac(struct au1550_state *s)
719} 719}
720 720
721 721
722static void 722static void dac_dma_interrupt(int irq, void *dev_id)
723dac_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
724{ 723{
725 struct au1550_state *s = (struct au1550_state *) dev_id; 724 struct au1550_state *s = (struct au1550_state *) dev_id;
726 struct dmabuf *db = &s->dma_dac; 725 struct dmabuf *db = &s->dma_dac;
@@ -754,8 +753,7 @@ dac_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
754} 753}
755 754
756 755
757static void 756static void adc_dma_interrupt(int irq, void *dev_id)
758adc_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
759{ 757{
760 struct au1550_state *s = (struct au1550_state *)dev_id; 758 struct au1550_state *s = (struct au1550_state *)dev_id;
761 struct dmabuf *dp = &s->dma_adc; 759 struct dmabuf *dp = &s->dma_adc;
diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index 2bd8e40b8541..be0bd503f013 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -755,7 +755,7 @@ static struct snd_pcm_ops snd_amd7930_capture_ops = {
755 .pointer = snd_amd7930_capture_pointer, 755 .pointer = snd_amd7930_capture_pointer,
756}; 756};
757 757
758static int __init snd_amd7930_pcm(struct snd_amd7930 *amd) 758static int __devinit snd_amd7930_pcm(struct snd_amd7930 *amd)
759{ 759{
760 struct snd_pcm *pcm; 760 struct snd_pcm *pcm;
761 int err; 761 int err;
@@ -870,7 +870,7 @@ static int snd_amd7930_put_volume(struct snd_kcontrol *kctl, struct snd_ctl_elem
870 return change; 870 return change;
871} 871}
872 872
873static struct snd_kcontrol_new amd7930_controls[] __initdata = { 873static struct snd_kcontrol_new amd7930_controls[] __devinitdata = {
874 { 874 {
875 .iface = SNDRV_CTL_ELEM_IFACE_MIXER, 875 .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
876 .name = "Monitor Volume", 876 .name = "Monitor Volume",
@@ -900,7 +900,7 @@ static struct snd_kcontrol_new amd7930_controls[] __initdata = {
900 }, 900 },
901}; 901};
902 902
903static int __init snd_amd7930_mixer(struct snd_amd7930 *amd) 903static int __devinit snd_amd7930_mixer(struct snd_amd7930 *amd)
904{ 904{
905 struct snd_card *card; 905 struct snd_card *card;
906 int idx, err; 906 int idx, err;
@@ -945,11 +945,11 @@ static struct snd_device_ops snd_amd7930_dev_ops = {
945 .dev_free = snd_amd7930_dev_free, 945 .dev_free = snd_amd7930_dev_free,
946}; 946};
947 947
948static int __init snd_amd7930_create(struct snd_card *card, 948static int __devinit snd_amd7930_create(struct snd_card *card,
949 struct resource *rp, 949 struct resource *rp,
950 unsigned int reg_size, 950 unsigned int reg_size,
951 int irq, int dev, 951 int irq, int dev,
952 struct snd_amd7930 **ramd) 952 struct snd_amd7930 **ramd)
953{ 953{
954 unsigned long flags; 954 unsigned long flags;
955 struct snd_amd7930 *amd; 955 struct snd_amd7930 *amd;
@@ -1013,7 +1013,7 @@ static int __init snd_amd7930_create(struct snd_card *card,
1013 return 0; 1013 return 0;
1014} 1014}
1015 1015
1016static int __init amd7930_attach_common(struct resource *rp, int irq) 1016static int __devinit amd7930_attach_common(struct resource *rp, int irq)
1017{ 1017{
1018 static int dev_num; 1018 static int dev_num;
1019 struct snd_card *card; 1019 struct snd_card *card;
@@ -1065,7 +1065,7 @@ out_err:
1065 return err; 1065 return err;
1066} 1066}
1067 1067
1068static int __init amd7930_obio_attach(struct device_node *dp) 1068static int __devinit amd7930_obio_attach(struct device_node *dp)
1069{ 1069{
1070 struct linux_prom_registers *regs; 1070 struct linux_prom_registers *regs;
1071 struct linux_prom_irqs *irqp; 1071 struct linux_prom_irqs *irqp;