aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/console/console.txt144
-rw-r--r--Documentation/fb/fbcon.txt180
-rw-r--r--Documentation/filesystems/ext3.txt8
-rw-r--r--Documentation/kernel-parameters.txt19
-rw-r--r--Documentation/keys.txt43
-rw-r--r--Documentation/md.txt67
-rw-r--r--Documentation/tty.txt7
-rw-r--r--MAINTAINERS15
-rw-r--r--arch/alpha/oprofile/common.c2
-rw-r--r--arch/arm/common/locomo.c45
-rw-r--r--arch/i386/Kconfig24
-rw-r--r--arch/i386/boot/video.S19
-rw-r--r--arch/i386/kernel/Makefile4
-rw-r--r--arch/i386/kernel/hpet.c67
-rw-r--r--arch/i386/kernel/i8253.c118
-rw-r--r--arch/i386/kernel/kprobes.c95
-rw-r--r--arch/i386/kernel/numaq.c10
-rw-r--r--arch/i386/kernel/setup.c1
-rw-r--r--arch/i386/kernel/time.c157
-rw-r--r--arch/i386/kernel/timers/Makefile9
-rw-r--r--arch/i386/kernel/timers/common.c172
-rw-r--r--arch/i386/kernel/timers/timer.c75
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c259
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c217
-rw-r--r--arch/i386/kernel/timers/timer_none.c39
-rw-r--r--arch/i386/kernel/timers/timer_pit.c177
-rw-r--r--arch/i386/kernel/timers/timer_pm.c342
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c617
-rw-r--r--arch/i386/kernel/tsc.c478
-rw-r--r--arch/i386/lib/delay.c65
-rw-r--r--arch/i386/mm/fault.c38
-rw-r--r--arch/i386/oprofile/nmi_int.c4
-rw-r--r--arch/i386/pci/pcbios.c6
-rw-r--r--arch/ia64/mm/fault.c36
-rw-r--r--arch/m68k/mm/memory.c6
-rw-r--r--arch/m68k/sun3/sun3dvma.c6
-rw-r--r--arch/mips/oprofile/common.c2
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/mm/fault.c36
-rw-r--r--arch/powerpc/oprofile/common.c2
-rw-r--r--arch/sh/oprofile/op_model_sh7750.c2
-rw-r--r--arch/sparc64/mm/fault.c36
-rw-r--r--arch/x86_64/boot/video.S19
-rw-r--r--arch/x86_64/kernel/pmtimer.c2
-rw-r--r--arch/x86_64/kernel/setup.c1
-rw-r--r--arch/x86_64/mm/fault.c39
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/processor_idle.c9
-rw-r--r--drivers/base/power/resume.c6
-rw-r--r--drivers/base/power/suspend.c13
-rw-r--r--drivers/bluetooth/dtl1_cs.c3
-rw-r--r--drivers/char/Kconfig32
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/char/hangcheck-timer.c4
-rw-r--r--drivers/char/hw_random.c698
-rw-r--r--drivers/char/hw_random/Kconfig90
-rw-r--r--drivers/char/hw_random/Makefile11
-rw-r--r--drivers/char/hw_random/amd-rng.c152
-rw-r--r--drivers/char/hw_random/core.c354
-rw-r--r--drivers/char/hw_random/geode-rng.c128
-rw-r--r--drivers/char/hw_random/intel-rng.c189
-rw-r--r--drivers/char/hw_random/ixp4xx-rng.c73
-rw-r--r--drivers/char/hw_random/omap-rng.c208
-rw-r--r--drivers/char/hw_random/via-rng.c183
-rw-r--r--drivers/char/ipmi/ipmi_msghandler.c7
-rw-r--r--drivers/char/keyboard.c10
-rw-r--r--drivers/char/vt.c580
-rw-r--r--drivers/clocksource/Makefile3
-rw-r--r--drivers/clocksource/acpi_pm.c177
-rw-r--r--drivers/clocksource/cyclone.c119
-rw-r--r--drivers/clocksource/scx200_hrt.c101
-rw-r--r--drivers/dma/ioatdma.c5
-rw-r--r--drivers/ide/ide-io.c2
-rw-r--r--drivers/ide/ide-lib.c4
-rw-r--r--drivers/ide/ide-timing.h8
-rw-r--r--drivers/ide/pci/pdc202xx_old.c40
-rw-r--r--drivers/ide/pci/piix.c12
-rw-r--r--drivers/ieee1394/eth1394.c3
-rw-r--r--drivers/ieee1394/raw1394.c3
-rw-r--r--drivers/infiniband/core/mad.c9
-rw-r--r--drivers/infiniband/core/mad_rmpp.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c6
-rw-r--r--drivers/isdn/capi/capi.c54
-rw-r--r--drivers/isdn/gigaset/bas-gigaset.c298
-rw-r--r--drivers/isdn/gigaset/ev-layer.c13
-rw-r--r--drivers/isdn/hisax/q931.c4
-rw-r--r--drivers/md/Kconfig46
-rw-r--r--drivers/md/Makefile5
-rw-r--r--drivers/md/bitmap.c483
-rw-r--r--drivers/md/dm-crypt.c56
-rw-r--r--drivers/md/dm-emc.c40
-rw-r--r--drivers/md/dm-exception-store.c67
-rw-r--r--drivers/md/dm-ioctl.c109
-rw-r--r--drivers/md/dm-linear.c8
-rw-r--r--drivers/md/dm-log.c157
-rw-r--r--drivers/md/dm-mpath.c43
-rw-r--r--drivers/md/dm-raid1.c97
-rw-r--r--drivers/md/dm-round-robin.c6
-rw-r--r--drivers/md/dm-snap.c16
-rw-r--r--drivers/md/dm-stripe.c25
-rw-r--r--drivers/md/dm-table.c57
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-zero.c8
-rw-r--r--drivers/md/dm.c184
-rw-r--r--drivers/md/dm.h81
-rw-r--r--drivers/md/kcopyd.c4
-rw-r--r--drivers/md/linear.c74
-rw-r--r--drivers/md/md.c634
-rw-r--r--drivers/md/raid1.c43
-rw-r--r--drivers/md/raid10.c77
-rw-r--r--drivers/md/raid5.c1308
-rw-r--r--drivers/md/raid6main.c2427
-rw-r--r--drivers/media/video/cx88/cx88-video.c6
-rw-r--r--drivers/media/video/usbvideo/quickcam_messenger.c2
-rw-r--r--drivers/net/irda/nsc-ircc.c8
-rw-r--r--drivers/net/ppp_generic.c3
-rw-r--r--drivers/net/wireless/bcm43xx/Kconfig1
-rw-r--r--drivers/net/wireless/bcm43xx/bcm43xx.h6
-rw-r--r--drivers/net/wireless/bcm43xx/bcm43xx_main.c37
-rw-r--r--drivers/s390/net/lcs.c7
-rw-r--r--drivers/scsi/ncr53c8xx.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c3
-rw-r--r--drivers/usb/host/hc_crisv10.c3
-rw-r--r--drivers/usb/serial/whiteheat.c19
-rw-r--r--drivers/video/Kconfig49
-rw-r--r--drivers/video/Makefile9
-rw-r--r--drivers/video/aty/aty128fb.c36
-rw-r--r--drivers/video/aty/atyfb_base.c43
-rw-r--r--drivers/video/aty/mach64_accel.c10
-rw-r--r--drivers/video/aty/mach64_cursor.c33
-rw-r--r--drivers/video/aty/radeon_base.c2
-rw-r--r--drivers/video/au1100fb.c41
-rw-r--r--drivers/video/backlight/Kconfig12
-rw-r--r--drivers/video/backlight/Makefile2
-rw-r--r--drivers/video/backlight/locomolcd.c123
-rw-r--r--drivers/video/cfbimgblt.c1
-rw-r--r--drivers/video/cirrusfb.c2
-rw-r--r--drivers/video/console/fbcon.c361
-rw-r--r--drivers/video/console/fbcon.h1
-rw-r--r--drivers/video/console/mdacon.c2
-rw-r--r--drivers/video/console/newport_con.c38
-rw-r--r--drivers/video/console/promcon.c4
-rw-r--r--drivers/video/console/sticon.c2
-rw-r--r--drivers/video/console/vgacon.c28
-rw-r--r--drivers/video/epson1355fb.c29
-rw-r--r--drivers/video/fbcvt.c1
-rw-r--r--drivers/video/fbmem.c58
-rw-r--r--drivers/video/fbmon.c33
-rw-r--r--drivers/video/fbsysfs.c52
-rw-r--r--drivers/video/geode/gx1fb_core.c3
-rw-r--r--drivers/video/geode/gxfb_core.c3
-rw-r--r--drivers/video/i810/i810_main.c3
-rw-r--r--drivers/video/imacfb.c345
-rw-r--r--drivers/video/macmodes.c6
-rw-r--r--drivers/video/macmodes.h7
-rw-r--r--drivers/video/modedb.c11
-rw-r--r--drivers/video/neofb.c32
-rw-r--r--drivers/video/nvidia/nv_hw.c10
-rw-r--r--drivers/video/nvidia/nvidia.c370
-rw-r--r--drivers/video/riva/fbdev.c2
-rw-r--r--drivers/video/s3c2410fb.c17
-rw-r--r--drivers/video/savage/savagefb.h45
-rw-r--r--drivers/video/savage/savagefb_driver.c1475
-rw-r--r--drivers/video/sis/sis_main.c2
-rw-r--r--drivers/video/skeletonfb.c5
-rw-r--r--drivers/video/tgafb.c1
-rw-r--r--drivers/video/vesafb.c59
-rw-r--r--drivers/video/vfb.c29
-rw-r--r--drivers/video/vga16fb.c40
-rw-r--r--fs/Kconfig12
-rw-r--r--fs/afs/cell.c3
-rw-r--r--fs/afs/kafsasyncd.c9
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/afs/vlocation.c6
-rw-r--r--fs/afs/vnode.c3
-rw-r--r--fs/autofs4/expire.c3
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat_ioctl.c1
-rw-r--r--fs/configfs/dir.c6
-rw-r--r--fs/dcache.c5
-rw-r--r--fs/dquot.c4
-rw-r--r--fs/exec.c147
-rw-r--r--fs/ext3/super.c6
-rw-r--r--fs/jffs2/erase.c15
-rw-r--r--fs/jffs2/nodemgmt.c3
-rw-r--r--fs/jffs2/wbuf.c3
-rw-r--r--fs/libfs.c10
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/nfscache.c3
-rw-r--r--fs/ocfs2/dlm/dlmast.c3
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c9
-rw-r--r--fs/ocfs2/dlm/dlmlock.c3
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c9
-rw-r--r--fs/ocfs2/dlm/dlmthread.c6
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c3
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/proc/base.c1086
-rw-r--r--fs/proc/inode.c11
-rw-r--r--fs/proc/internal.h22
-rw-r--r--fs/proc/task_mmu.c110
-rw-r--r--fs/proc/task_nommu.c21
-rw-r--r--fs/reiserfs/file.c8
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/smbfs/request.c6
-rw-r--r--fs/smbfs/smbiod.c3
-rw-r--r--fs/sysfs/dir.c10
-rw-r--r--include/asm-arm/hardware/locomo.h5
-rw-r--r--include/asm-i386/delay.h2
-rw-r--r--include/asm-i386/kdebug.h2
-rw-r--r--include/asm-i386/kprobes.h1
-rw-r--r--include/asm-i386/mach-default/mach_timer.h4
-rw-r--r--include/asm-i386/mach-summit/mach_mpparse.h3
-rw-r--r--include/asm-i386/timer.h57
-rw-r--r--include/asm-i386/timex.h34
-rw-r--r--include/asm-i386/tsc.h49
-rw-r--r--include/asm-ia64/kdebug.h2
-rw-r--r--include/asm-ia64/kprobes.h1
-rw-r--r--include/asm-powerpc/kdebug.h2
-rw-r--r--include/asm-powerpc/kprobes.h2
-rw-r--r--include/asm-sparc64/kdebug.h2
-rw-r--r--include/asm-sparc64/kprobes.h1
-rw-r--r--include/asm-x86_64/kdebug.h2
-rw-r--r--include/asm-x86_64/kprobes.h1
-rw-r--r--include/keys/user-type.h1
-rw-r--r--include/linux/clocksource.h185
-rw-r--r--include/linux/compat_ioctl.h5
-rw-r--r--include/linux/console.h4
-rw-r--r--include/linux/device-mapper.h111
-rw-r--r--include/linux/dm-ioctl.h6
-rw-r--r--include/linux/fb.h19
-rw-r--r--include/linux/hw_random.h50
-rw-r--r--include/linux/idr.h1
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/key.h13
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/netpoll.h1
-rw-r--r--include/linux/proc_fs.h16
-rw-r--r--include/linux/ptrace.h1
-rw-r--r--include/linux/raid/bitmap.h11
-rw-r--r--include/linux/raid/linear.h2
-rw-r--r--include/linux/raid/md.h4
-rw-r--r--include/linux/raid/md_k.h10
-rw-r--r--include/linux/raid/md_p.h5
-rw-r--r--include/linux/raid/raid10.h7
-rw-r--r--include/linux/raid/raid5.h1
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/security.h11
-rw-r--r--include/linux/time.h17
-rw-r--r--include/linux/timex.h2
-rw-r--r--include/net/tipc/tipc_bearer.h12
-rw-r--r--init/initramfs.c36
-rw-r--r--init/main.c1
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cpu.c10
-rw-r--r--kernel/cpuset.c26
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/kprobes.c58
-rw-r--r--kernel/mutex-debug.c12
-rw-r--r--kernel/mutex-debug.h25
-rw-r--r--kernel/mutex.c21
-rw-r--r--kernel/mutex.h6
-rw-r--r--kernel/ptrace.c23
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/signal.c35
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/clocksource.c349
-rw-r--r--kernel/time/jiffies.c73
-rw-r--r--kernel/timer.c396
-rw-r--r--lib/idr.c43
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/swap.c3
-rw-r--r--net/atm/mpc.c3
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/netpoll.c36
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/ipv4/tcp.c22
-rw-r--r--net/netrom/nr_route.c12
-rw-r--r--net/rxrpc/call.c3
-rw-r--r--net/rxrpc/connection.c3
-rw-r--r--net/rxrpc/krxsecd.c3
-rw-r--r--net/tipc/bcast.c79
-rw-r--r--net/tipc/bcast.h2
-rw-r--r--net/tipc/bearer.c70
-rw-r--r--net/tipc/cluster.c22
-rw-r--r--net/tipc/config.c85
-rw-r--r--net/tipc/core.c7
-rw-r--r--net/tipc/core.h21
-rw-r--r--net/tipc/discover.c13
-rw-r--r--net/tipc/eth_media.c29
-rw-r--r--net/tipc/link.c217
-rw-r--r--net/tipc/name_distr.c30
-rw-r--r--net/tipc/name_table.c203
-rw-r--r--net/tipc/node.c78
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/node_subscr.c15
-rw-r--r--net/tipc/port.c41
-rw-r--r--net/tipc/ref.c31
-rw-r--r--net/tipc/socket.c100
-rw-r--r--net/tipc/subscr.c18
-rw-r--r--net/tipc/zone.c19
-rw-r--r--security/Kconfig20
-rw-r--r--security/dummy.c3
-rw-r--r--security/keys/internal.h3
-rw-r--r--security/keys/key.c54
-rw-r--r--security/keys/keyctl.c61
-rw-r--r--security/keys/keyring.c25
-rw-r--r--security/keys/proc.c7
-rw-r--r--security/keys/process_keys.c24
-rw-r--r--security/keys/request_key.c36
-rw-r--r--security/keys/request_key_auth.c2
-rw-r--r--security/keys/user_defined.c25
-rw-r--r--security/selinux/hooks.c54
-rw-r--r--security/selinux/include/av_perm_to_string.h3
-rw-r--r--security/selinux/include/av_permissions.h4
-rw-r--r--security/selinux/include/objsec.h2
320 files changed, 12134 insertions, 9691 deletions
diff --git a/Documentation/console/console.txt b/Documentation/console/console.txt
new file mode 100644
index 000000000000..d3e17447321c
--- /dev/null
+++ b/Documentation/console/console.txt
@@ -0,0 +1,144 @@
1Console Drivers
2===============
3
4The linux kernel has 2 general types of console drivers. The first type is
5assigned by the kernel to all the virtual consoles during the boot process.
6This type will be called 'system driver', and only one system driver is allowed
7to exist. The system driver is persistent and it can never be unloaded, though
8it may become inactive.
9
10The second type has to be explicitly loaded and unloaded. This will be called
11'modular driver' by this document. Multiple modular drivers can coexist at
12any time with each driver sharing the console with other drivers including
13the system driver. However, modular drivers cannot take over the console
14that is currently occupied by another modular driver. (Exception: Drivers that
15call take_over_console() will succeed in the takeover regardless of the type
16of driver occupying the consoles.) They can only take over the console that is
17occupied by the system driver. In the same token, if the modular driver is
18released by the console, the system driver will take over.
19
20Modular drivers, from the programmer's point of view, has to call:
21
22 take_over_console() - load and bind driver to console layer
23 give_up_console() - unbind and unload driver
24
25In newer kernels, the following are also available:
26
27 register_con_driver()
28 unregister_con_driver()
29
30If sysfs is enabled, the contents of /sys/class/vtconsole can be
31examined. This shows the console backends currently registered by the
32system which are named vtcon<n> where <n> is an integer fro 0 to 15. Thus:
33
34 ls /sys/class/vtconsole
35 . .. vtcon0 vtcon1
36
37Each directory in /sys/class/vtconsole has 3 files:
38
39 ls /sys/class/vtconsole/vtcon0
40 . .. bind name uevent
41
42What do these files signify?
43
44 1. bind - this is a read/write file. It shows the status of the driver if
45 read, or acts to bind or unbind the driver to the virtual consoles
46 when written to. The possible values are:
47
48 0 - means the driver is not bound and if echo'ed, commands the driver
49 to unbind
50
51 1 - means the driver is bound and if echo'ed, commands the driver to
52 bind
53
54 2. name - read-only file. Shows the name of the driver in this format:
55
56 cat /sys/class/vtconsole/vtcon0/name
57 (S) VGA+
58
59 '(S)' stands for a (S)ystem driver, ie, it cannot be directly
60 commanded to bind or unbind
61
62 'VGA+' is the name of the driver
63
64 cat /sys/class/vtconsole/vtcon1/name
65 (M) frame buffer device
66
67 In this case, '(M)' stands for a (M)odular driver, one that can be
68 directly commanded to bind or unbind.
69
70 3. uevent - ignore this file
71
72When unbinding, the modular driver is detached first, and then the system
73driver takes over the consoles vacated by the driver. Binding, on the other
74hand, will bind the driver to the consoles that are currently occupied by a
75system driver.
76
77NOTE1: Binding and binding must be selected in Kconfig. It's under:
78
79Device Drivers -> Character devices -> Support for binding and unbinding
80console drivers
81
82NOTE2: If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
83unbinding will not succeed. An example of an application that sets the console
84to KD_GRAPHICS is X.
85
86How useful is this feature? This is very useful for console driver
87developers. By unbinding the driver from the console layer, one can unload the
88driver, make changes, recompile, reload and rebind the driver without any need
89for rebooting the kernel. For regular users who may want to switch from
90framebuffer console to VGA console and vice versa, this feature also makes
91this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
92for more details).
93
94Notes for developers:
95=====================
96
97take_over_console() is now broken up into:
98
99 register_con_driver()
100 bind_con_driver() - private function
101
102give_up_console() is a wrapper to unregister_con_driver(), and a driver must
103be fully unbound for this call to succeed. con_is_bound() will check if the
104driver is bound or not.
105
106Guidelines for console driver writers:
107=====================================
108
109In order for binding to and unbinding from the console to properly work,
110console drivers must follow these guidelines:
111
1121. All drivers, except system drivers, must call either register_con_driver()
113 or take_over_console(). register_con_driver() will just add the driver to
114 the console's internal list. It won't take over the
115 console. take_over_console(), as it name implies, will also take over (or
116 bind to) the console.
117
1182. All resources allocated during con->con_init() must be released in
119 con->con_deinit().
120
1213. All resources allocated in con->con_startup() must be released when the
122 driver, which was previously bound, becomes unbound. The console layer
123 does not have a complementary call to con->con_startup() so it's up to the
124 driver to check when it's legal to release these resources. Calling
125 con_is_bound() in con->con_deinit() will help. If the call returned
126 false(), then it's safe to release the resources. This balance has to be
127 ensured because con->con_startup() can be called again when a request to
128 rebind the driver to the console arrives.
129
1304. Upon exit of the driver, ensure that the driver is totally unbound. If the
131 condition is satisfied, then the driver must call unregister_con_driver()
132 or give_up_console().
133
1345. unregister_con_driver() can also be called on conditions which make it
135 impossible for the driver to service console requests. This can happen
136 with the framebuffer console that suddenly lost all of its drivers.
137
138The current crop of console drivers should still work correctly, but binding
139and unbinding them may cause problems. With minimal fixes, these drivers can
140be made to work correctly.
141
142==========================
143Antonino Daplas <adaplas@pol.net>
144
diff --git a/Documentation/fb/fbcon.txt b/Documentation/fb/fbcon.txt
index 08dce0f631bf..f373df12ed4c 100644
--- a/Documentation/fb/fbcon.txt
+++ b/Documentation/fb/fbcon.txt
@@ -135,10 +135,10 @@ C. Boot options
135 135
136 The angle can be changed anytime afterwards by 'echoing' the same 136 The angle can be changed anytime afterwards by 'echoing' the same
137 numbers to any one of the 2 attributes found in 137 numbers to any one of the 2 attributes found in
138 /sys/class/graphics/fb{x} 138 /sys/class/graphics/fbcon
139 139
140 con_rotate - rotate the display of the active console 140 rotate - rotate the display of the active console
141 con_rotate_all - rotate the display of all consoles 141 rotate_all - rotate the display of all consoles
142 142
143 Console rotation will only become available if Console Rotation 143 Console rotation will only become available if Console Rotation
144 Support is compiled in your kernel. 144 Support is compiled in your kernel.
@@ -148,5 +148,177 @@ C. Boot options
148 Actually, the underlying fb driver is totally ignorant of console 148 Actually, the underlying fb driver is totally ignorant of console
149 rotation. 149 rotation.
150 150
151--- 151C. Attaching, Detaching and Unloading
152
153Before going on on how to attach, detach and unload the framebuffer console, an
154illustration of the dependencies may help.
155
156The console layer, as with most subsystems, needs a driver that interfaces with
157the hardware. Thus, in a VGA console:
158
159console ---> VGA driver ---> hardware.
160
161Assuming the VGA driver can be unloaded, one must first unbind the VGA driver
162from the console layer before unloading the driver. The VGA driver cannot be
163unloaded if it is still bound to the console layer. (See
164Documentation/console/console.txt for more information).
165
166This is more complicated in the case of the the framebuffer console (fbcon),
167because fbcon is an intermediate layer between the console and the drivers:
168
169console ---> fbcon ---> fbdev drivers ---> hardware
170
171The fbdev drivers cannot be unloaded if it's bound to fbcon, and fbcon cannot
172be unloaded if it's bound to the console layer.
173
174So to unload the fbdev drivers, one must first unbind fbcon from the console,
175then unbind the fbdev drivers from fbcon. Fortunately, unbinding fbcon from
176the console layer will automatically unbind framebuffer drivers from
177fbcon. Thus, there is no need to explicitly unbind the fbdev drivers from
178fbcon.
179
180So, how do we unbind fbcon from the console? Part of the answer is in
181Documentation/console/console.txt. To summarize:
182
183Echo a value to the bind file that represents the framebuffer console
184driver. So assuming vtcon1 represents fbcon, then:
185
186echo 1 > sys/class/vtconsole/vtcon1/bind - attach framebuffer console to
187 console layer
188echo 0 > sys/class/vtconsole/vtcon1/bind - detach framebuffer console from
189 console layer
190
191If fbcon is detached from the console layer, your boot console driver (which is
192usually VGA text mode) will take over. A few drivers (rivafb and i810fb) will
193restore VGA text mode for you. With the rest, before detaching fbcon, you
194must take a few additional steps to make sure that your VGA text mode is
195restored properly. The following is one of the several methods that you can do:
196
1971. Download or install vbetool. This utility is included with most
198 distributions nowadays, and is usually part of the suspend/resume tool.
199
2002. In your kernel configuration, ensure that CONFIG_FRAMEBUFFER_CONSOLE is set
201 to 'y' or 'm'. Enable one or more of your favorite framebuffer drivers.
202
2033. Boot into text mode and as root run:
204
205 vbetool vbestate save > <vga state file>
206
207 The above command saves the register contents of your graphics
208 hardware to <vga state file>. You need to do this step only once as
209 the state file can be reused.
210
2114. If fbcon is compiled as a module, load fbcon by doing:
212
213 modprobe fbcon
214
2155. Now to detach fbcon:
216
217 vbetool vbestate restore < <vga state file> && \
218 echo 0 > /sys/class/vtconsole/vtcon1/bind
219
2206. That's it, you're back to VGA mode. And if you compiled fbcon as a module,
221 you can unload it by 'rmmod fbcon'
222
2237. To reattach fbcon:
224
225 echo 1 > /sys/class/vtconsole/vtcon1/bind
226
2278. Once fbcon is unbound, all drivers registered to the system will also
228become unbound. This means that fbcon and individual framebuffer drivers
229can be unloaded or reloaded at will. Reloading the drivers or fbcon will
230automatically bind the console, fbcon and the drivers together. Unloading
231all the drivers without unloading fbcon will make it impossible for the
232console to bind fbcon.
233
234Notes for vesafb users:
235=======================
236
237Unfortunately, if your bootline includes a vga=xxx parameter that sets the
238hardware in graphics mode, such as when loading vesafb, vgacon will not load.
239Instead, vgacon will replace the default boot console with dummycon, and you
240won't get any display after detaching fbcon. Your machine is still alive, so
241you can reattach vesafb. However, to reattach vesafb, you need to do one of
242the following:
243
244Variation 1:
245
246 a. Before detaching fbcon, do
247
248 vbetool vbemode save > <vesa state file> # do once for each vesafb mode,
249 # the file can be reused
250
251 b. Detach fbcon as in step 5.
252
253 c. Attach fbcon
254
255 vbetool vbestate restore < <vesa state file> && \
256 echo 1 > /sys/class/vtconsole/vtcon1/bind
257
258Variation 2:
259
260 a. Before detaching fbcon, do:
261 echo <ID> > /sys/class/tty/console/bind
262
263
264 vbetool vbemode get
265
266 b. Take note of the mode number
267
268 b. Detach fbcon as in step 5.
269
270 c. Attach fbcon:
271
272 vbetool vbemode set <mode number> && \
273 echo 1 > /sys/class/vtconsole/vtcon1/bind
274
275Samples:
276========
277
278Here are 2 sample bash scripts that you can use to bind or unbind the
279framebuffer console driver if you are in an X86 box:
280
281---------------------------------------------------------------------------
282#!/bin/bash
283# Unbind fbcon
284
285# Change this to where your actual vgastate file is located
286# Or Use VGASTATE=$1 to indicate the state file at runtime
287VGASTATE=/tmp/vgastate
288
289# path to vbetool
290VBETOOL=/usr/local/bin
291
292
293for (( i = 0; i < 16; i++))
294do
295 if test -x /sys/class/vtconsole/vtcon$i; then
296 if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
297 = 1 ]; then
298 if test -x $VBETOOL/vbetool; then
299 echo Unbinding vtcon$i
300 $VBETOOL/vbetool vbestate restore < $VGASTATE
301 echo 0 > /sys/class/vtconsole/vtcon$i/bind
302 fi
303 fi
304 fi
305done
306
307---------------------------------------------------------------------------
308#!/bin/bash
309# Bind fbcon
310
311for (( i = 0; i < 16; i++))
312do
313 if test -x /sys/class/vtconsole/vtcon$i; then
314 if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
315 = 1 ]; then
316 echo Unbinding vtcon$i
317 echo 1 > /sys/class/vtconsole/vtcon$i/bind
318 fi
319 fi
320done
321---------------------------------------------------------------------------
322
323--
152Antonino Daplas <adaplas@pol.net> 324Antonino Daplas <adaplas@pol.net>
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index afb1335c05d6..4aecc9bdb273 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -113,6 +113,14 @@ noquota
113grpquota 113grpquota
114usrquota 114usrquota
115 115
116bh (*) ext3 associates buffer heads to data pages to
117nobh (a) cache disk block mapping information
118 (b) link pages into transaction to provide
119 ordering guarantees.
120 "bh" option forces use of buffer heads.
121 "nobh" option tries to avoid associating buffer
122 heads (supported only for "writeback" mode).
123
116 124
117Specification 125Specification
118============= 126=============
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bca6f389da66..2e352a605fcf 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -61,6 +61,7 @@ parameter is applicable:
61 MTD MTD support is enabled. 61 MTD MTD support is enabled.
62 NET Appropriate network support is enabled. 62 NET Appropriate network support is enabled.
63 NUMA NUMA support is enabled. 63 NUMA NUMA support is enabled.
64 GENERIC_TIME The generic timeofday code is enabled.
64 NFS Appropriate NFS support is enabled. 65 NFS Appropriate NFS support is enabled.
65 OSS OSS sound support is enabled. 66 OSS OSS sound support is enabled.
66 PARIDE The ParIDE subsystem is enabled. 67 PARIDE The ParIDE subsystem is enabled.
@@ -179,6 +180,11 @@ running once the system is up.
179 override platform specific driver. 180 override platform specific driver.
180 See also Documentation/acpi-hotkey.txt. 181 See also Documentation/acpi-hotkey.txt.
181 182
183 acpi_pm_good [IA-32,X86-64]
184 Override the pmtimer bug detection: force the kernel
185 to assume that this machine's pmtimer latches its value
186 and always returns good values.
187
182 enable_timer_pin_1 [i386,x86-64] 188 enable_timer_pin_1 [i386,x86-64]
183 Enable PIN 1 of APIC timer 189 Enable PIN 1 of APIC timer
184 Can be useful to work around chipset bugs 190 Can be useful to work around chipset bugs
@@ -341,10 +347,11 @@ running once the system is up.
341 Value can be changed at runtime via 347 Value can be changed at runtime via
342 /selinux/checkreqprot. 348 /selinux/checkreqprot.
343 349
344 clock= [BUGS=IA-32,HW] gettimeofday timesource override. 350 clock= [BUGS=IA-32, HW] gettimeofday clocksource override.
345 Forces specified timesource (if avaliable) to be used 351 [Deprecated]
346 when calculating gettimeofday(). If specicified 352 Forces specified clocksource (if avaliable) to be used
347 timesource is not avalible, it defaults to PIT. 353 when calculating gettimeofday(). If specified
354 clocksource is not avalible, it defaults to PIT.
348 Format: { pit | tsc | cyclone | pmtmr } 355 Format: { pit | tsc | cyclone | pmtmr }
349 356
350 disable_8254_timer 357 disable_8254_timer
@@ -1617,6 +1624,10 @@ running once the system is up.
1617 1624
1618 time Show timing data prefixed to each printk message line 1625 time Show timing data prefixed to each printk message line
1619 1626
1627 clocksource= [GENERIC_TIME] Override the default clocksource
1628 Override the default clocksource and use the clocksource
1629 with the name specified.
1630
1620 tipar.timeout= [HW,PPT] 1631 tipar.timeout= [HW,PPT]
1621 Set communications timeout in tenths of a second 1632 Set communications timeout in tenths of a second
1622 (default 15). 1633 (default 15).
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 3bbe157b45e4..61c0fad2fe2f 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -241,25 +241,30 @@ The security class "key" has been added to SELinux so that mandatory access
241controls can be applied to keys created within various contexts. This support 241controls can be applied to keys created within various contexts. This support
242is preliminary, and is likely to change quite significantly in the near future. 242is preliminary, and is likely to change quite significantly in the near future.
243Currently, all of the basic permissions explained above are provided in SELinux 243Currently, all of the basic permissions explained above are provided in SELinux
244as well; SE Linux is simply invoked after all basic permission checks have been 244as well; SELinux is simply invoked after all basic permission checks have been
245performed. 245performed.
246 246
247Each key is labeled with the same context as the task to which it belongs. 247The value of the file /proc/self/attr/keycreate influences the labeling of
248Typically, this is the same task that was running when the key was created. 248newly-created keys. If the contents of that file correspond to an SELinux
249The default keyrings are handled differently, but in a way that is very 249security context, then the key will be assigned that context. Otherwise, the
250intuitive: 250key will be assigned the current context of the task that invoked the key
251creation request. Tasks must be granted explicit permission to assign a
252particular context to newly-created keys, using the "create" permission in the
253key security class.
251 254
252 (*) The user and user session keyrings that are created when the user logs in 255The default keyrings associated with users will be labeled with the default
253 are currently labeled with the context of the login manager. 256context of the user if and only if the login programs have been instrumented to
254 257properly initialize keycreate during the login process. Otherwise, they will
255 (*) The keyrings associated with new threads are each labeled with the context 258be labeled with the context of the login program itself.
256 of their associated thread, and both session and process keyrings are
257 handled similarly.
258 259
259Note, however, that the default keyrings associated with the root user are 260Note, however, that the default keyrings associated with the root user are
260labeled with the default kernel context, since they are created early in the 261labeled with the default kernel context, since they are created early in the
261boot process, before root has a chance to log in. 262boot process, before root has a chance to log in.
262 263
264The keyrings associated with new threads are each labeled with the context of
265their associated thread, and both session and process keyrings are handled
266similarly.
267
263 268
264================ 269================
265NEW PROCFS FILES 270NEW PROCFS FILES
@@ -270,9 +275,17 @@ about the status of the key service:
270 275
271 (*) /proc/keys 276 (*) /proc/keys
272 277
273 This lists all the keys on the system, giving information about their 278 This lists the keys that are currently viewable by the task reading the
274 type, description and permissions. The payload of the key is not available 279 file, giving information about their type, description and permissions.
275 this way: 280 It is not possible to view the payload of the key this way, though some
281 information about it may be given.
282
283 The only keys included in the list are those that grant View permission to
284 the reading process whether or not it possesses them. Note that LSM
285 security checks are still performed, and may further filter out keys that
286 the current process is not authorised to view.
287
288 The contents of the file look like this:
276 289
277 SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY 290 SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY
278 00000001 I----- 39 perm 1f3f0000 0 0 keyring _uid_ses.0: 1/4 291 00000001 I----- 39 perm 1f3f0000 0 0 keyring _uid_ses.0: 1/4
@@ -300,7 +313,7 @@ about the status of the key service:
300 (*) /proc/key-users 313 (*) /proc/key-users
301 314
302 This file lists the tracking data for each user that has at least one key 315 This file lists the tracking data for each user that has at least one key
303 on the system. Such data includes quota information and statistics: 316 on the system. Such data includes quota information and statistics:
304 317
305 [root@andromeda root]# cat /proc/key-users 318 [root@andromeda root]# cat /proc/key-users
306 0: 46 45/45 1/100 13/10000 319 0: 46 45/45 1/100 13/10000
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 03a13c462cf2..0668f9dc9d29 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -200,6 +200,17 @@ All md devices contain:
200 This can be written only while the array is being assembled, not 200 This can be written only while the array is being assembled, not
201 after it is started. 201 after it is started.
202 202
203 layout
204 The "layout" for the array for the particular level. This is
205 simply a number that is interpretted differently by different
206 levels. It can be written while assembling an array.
207
208 resync_start
209 The point at which resync should start. If no resync is needed,
210 this will be a very large number. At array creation it will
211 default to 0, though starting the array as 'clean' will
212 set it much larger.
213
203 new_dev 214 new_dev
204 This file can be written but not read. The value written should 215 This file can be written but not read. The value written should
205 be a block device number as major:minor. e.g. 8:0 216 be a block device number as major:minor. e.g. 8:0
@@ -207,6 +218,54 @@ All md devices contain:
207 available. It will then appear at md/dev-XXX (depending on the 218 available. It will then appear at md/dev-XXX (depending on the
208 name of the device) and further configuration is then possible. 219 name of the device) and further configuration is then possible.
209 220
221 safe_mode_delay
222 When an md array has seen no write requests for a certain period
223 of time, it will be marked as 'clean'. When another write
224 request arrive, the array is marked as 'dirty' before the write
225 commenses. This is known as 'safe_mode'.
226 The 'certain period' is controlled by this file which stores the
227 period as a number of seconds. The default is 200msec (0.200).
228 Writing a value of 0 disables safemode.
229
230 array_state
231 This file contains a single word which describes the current
232 state of the array. In many cases, the state can be set by
233 writing the word for the desired state, however some states
234 cannot be explicitly set, and some transitions are not allowed.
235
236 clear
237 No devices, no size, no level
238 Writing is equivalent to STOP_ARRAY ioctl
239 inactive
240 May have some settings, but array is not active
241 all IO results in error
242 When written, doesn't tear down array, but just stops it
243 suspended (not supported yet)
244 All IO requests will block. The array can be reconfigured.
245 Writing this, if accepted, will block until array is quiessent
246 readonly
247 no resync can happen. no superblocks get written.
248 write requests fail
249 read-auto
250 like readonly, but behaves like 'clean' on a write request.
251
252 clean - no pending writes, but otherwise active.
253 When written to inactive array, starts without resync
254 If a write request arrives then
255 if metadata is known, mark 'dirty' and switch to 'active'.
256 if not known, block and switch to write-pending
257 If written to an active array that has pending writes, then fails.
258 active
259 fully active: IO and resync can be happening.
260 When written to inactive array, starts with resync
261
262 write-pending
263 clean, but writes are blocked waiting for 'active' to be written.
264
265 active-idle
266 like active, but no writes have been seen for a while (safe_mode_delay).
267
268
210 sync_speed_min 269 sync_speed_min
211 sync_speed_max 270 sync_speed_max
212 This are similar to /proc/sys/dev/raid/speed_limit_{min,max} 271 This are similar to /proc/sys/dev/raid/speed_limit_{min,max}
@@ -250,10 +309,18 @@ Each directory contains:
250 faulty - device has been kicked from active use due to 309 faulty - device has been kicked from active use due to
251 a detected fault 310 a detected fault
252 in_sync - device is a fully in-sync member of the array 311 in_sync - device is a fully in-sync member of the array
312 writemostly - device will only be subject to read
313 requests if there are no other options.
314 This applies only to raid1 arrays.
253 spare - device is working, but not a full member. 315 spare - device is working, but not a full member.
254 This includes spares that are in the process 316 This includes spares that are in the process
255 of being recoverred to 317 of being recoverred to
256 This list make grow in future. 318 This list make grow in future.
319 This can be written to.
320 Writing "faulty" simulates a failure on the device.
321 Writing "remove" removes the device from the array.
322 Writing "writemostly" sets the writemostly flag.
323 Writing "-writemostly" clears the writemostly flag.
257 324
258 errors 325 errors
259 An approximate count of read errors that have been detected on 326 An approximate count of read errors that have been detected on
diff --git a/Documentation/tty.txt b/Documentation/tty.txt
index 8ff7bc2a0811..dab56604745d 100644
--- a/Documentation/tty.txt
+++ b/Documentation/tty.txt
@@ -80,13 +80,6 @@ receive_buf() - Hand buffers of bytes from the driver to the ldisc
80 for processing. Semantics currently rather 80 for processing. Semantics currently rather
81 mysterious 8( 81 mysterious 8(
82 82
83receive_room() - Can be called by the driver layer at any time when
84 the ldisc is opened. The ldisc must be able to
85 handle the reported amount of data at that instant.
86 Synchronization between active receive_buf and
87 receive_room calls is down to the driver not the
88 ldisc. Must not sleep.
89
90write_wakeup() - May be called at any point between open and close. 83write_wakeup() - May be called at any point between open and close.
91 The TTY_DO_WRITE_WAKEUP flag indicates if a call 84 The TTY_DO_WRITE_WAKEUP flag indicates if a call
92 is needed but always races versus calls. Thus the 85 is needed but always races versus calls. Thus the
diff --git a/MAINTAINERS b/MAINTAINERS
index 4dcd2f1f14d6..28c0a9676927 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1118,6 +1118,11 @@ L: lm-sensors@lm-sensors.org
1118W: http://www.lm-sensors.nu/ 1118W: http://www.lm-sensors.nu/
1119S: Maintained 1119S: Maintained
1120 1120
1121HARDWARE RANDOM NUMBER GENERATOR CORE
1122P: Michael Buesch
1123M: mb@bu3sch.de
1124S: Maintained
1125
1121HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER 1126HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER
1122P: Robert Love 1127P: Robert Love
1123M: rlove@rlove.org 1128M: rlove@rlove.org
@@ -1436,6 +1441,11 @@ P: Tigran Aivazian
1436M: tigran@veritas.com 1441M: tigran@veritas.com
1437S: Maintained 1442S: Maintained
1438 1443
1444INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT
1445P: Deepak Saxena
1446M: dsaxena@plexity.net
1447S: Maintained
1448
1439INTEL PRO/100 ETHERNET SUPPORT 1449INTEL PRO/100 ETHERNET SUPPORT
1440P: John Ronciak 1450P: John Ronciak
1441M: john.ronciak@intel.com 1451M: john.ronciak@intel.com
@@ -2725,6 +2735,11 @@ P: Christoph Hellwig
2725M: hch@infradead.org 2735M: hch@infradead.org
2726S: Maintained 2736S: Maintained
2727 2737
2738TI OMAP RANDOM NUMBER GENERATOR SUPPORT
2739P: Deepak Saxena
2740M: dsaxena@plexity.net
2741S: Maintained
2742
2728TI PARALLEL LINK CABLE DRIVER 2743TI PARALLEL LINK CABLE DRIVER
2729P: Romain Lievin 2744P: Romain Lievin
2730M: roms@lpg.ticalc.org 2745M: roms@lpg.ticalc.org
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index ba788cfdc3c6..9fc0eeb4f0ab 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -112,7 +112,7 @@ op_axp_create_files(struct super_block * sb, struct dentry * root)
112 112
113 for (i = 0; i < model->num_counters; ++i) { 113 for (i = 0; i < model->num_counters; ++i) {
114 struct dentry *dir; 114 struct dentry *dir;
115 char buf[3]; 115 char buf[4];
116 116
117 snprintf(buf, sizeof buf, "%d", i); 117 snprintf(buf, sizeof buf, "%d", i);
118 dir = oprofilefs_mkdir(sb, root, buf); 118 dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index a7dc1370695b..0dafba3a701d 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -629,21 +629,6 @@ static int locomo_resume(struct platform_device *dev)
629#endif 629#endif
630 630
631 631
632#define LCM_ALC_EN 0x8000
633
634void frontlight_set(struct locomo *lchip, int duty, int vr, int bpwf)
635{
636 unsigned long flags;
637
638 spin_lock_irqsave(&lchip->lock, flags);
639 locomo_writel(bpwf, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
640 udelay(100);
641 locomo_writel(duty, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
642 locomo_writel(bpwf | LCM_ALC_EN, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
643 spin_unlock_irqrestore(&lchip->lock, flags);
644}
645
646
647/** 632/**
648 * locomo_probe - probe for a single LoCoMo chip. 633 * locomo_probe - probe for a single LoCoMo chip.
649 * @phys_addr: physical address of device. 634 * @phys_addr: physical address of device.
@@ -698,14 +683,10 @@ __locomo_probe(struct device *me, struct resource *mem, int irq)
698 , lchip->base + LOCOMO_GPD); 683 , lchip->base + LOCOMO_GPD);
699 locomo_writel(0, lchip->base + LOCOMO_GIE); 684 locomo_writel(0, lchip->base + LOCOMO_GIE);
700 685
701 /* FrontLight */ 686 /* Frontlight */
702 locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS); 687 locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
703 locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD); 688 locomo_writel(0, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
704 689
705 /* Same constants can be used for collie and poodle
706 (depending on CONFIG options in original sharp code)? */
707 frontlight_set(lchip, 163, 0, 148);
708
709 /* Longtime timer */ 690 /* Longtime timer */
710 locomo_writel(0, lchip->base + LOCOMO_LTINT); 691 locomo_writel(0, lchip->base + LOCOMO_LTINT);
711 /* SPI */ 692 /* SPI */
@@ -1063,6 +1044,30 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int
1063} 1044}
1064 1045
1065/* 1046/*
1047 * Frontlight control
1048 */
1049
1050static struct locomo *locomo_chip_driver(struct locomo_dev *ldev);
1051
1052void locomo_frontlight_set(struct locomo_dev *dev, int duty, int vr, int bpwf)
1053{
1054 unsigned long flags;
1055 struct locomo *lchip = locomo_chip_driver(dev);
1056
1057 if (vr)
1058 locomo_gpio_write(dev, LOCOMO_GPIO_FL_VR, 1);
1059 else
1060 locomo_gpio_write(dev, LOCOMO_GPIO_FL_VR, 0);
1061
1062 spin_lock_irqsave(&lchip->lock, flags);
1063 locomo_writel(bpwf, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
1064 udelay(100);
1065 locomo_writel(duty, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALD);
1066 locomo_writel(bpwf | LOCOMO_ALC_EN, lchip->base + LOCOMO_FRONTLIGHT + LOCOMO_ALS);
1067 spin_unlock_irqrestore(&lchip->lock, flags);
1068}
1069
1070/*
1066 * LoCoMo "Register Access Bus." 1071 * LoCoMo "Register Access Bus."
1067 * 1072 *
1068 * We model this as a regular bus type, and hang devices directly 1073 * We model this as a regular bus type, and hang devices directly
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 1596101cfaf8..374fb50608a0 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86_32
14 486, 586, Pentiums, and various instruction-set-compatible chips by 14 486, 586, Pentiums, and various instruction-set-compatible chips by
15 AMD, Cyrix, and others. 15 AMD, Cyrix, and others.
16 16
17config GENERIC_TIME
18 bool
19 default y
20
17config SEMAPHORE_SLEEPERS 21config SEMAPHORE_SLEEPERS
18 bool 22 bool
19 default y 23 default y
@@ -1046,13 +1050,23 @@ config SCx200
1046 tristate "NatSemi SCx200 support" 1050 tristate "NatSemi SCx200 support"
1047 depends on !X86_VOYAGER 1051 depends on !X86_VOYAGER
1048 help 1052 help
1049 This provides basic support for the National Semiconductor SCx200 1053 This provides basic support for National Semiconductor's
1050 processor. Right now this is just a driver for the GPIO pins. 1054 (now AMD's) Geode processors. The driver probes for the
1055 PCI-IDs of several on-chip devices, so its a good dependency
1056 for other scx200_* drivers.
1051 1057
1052 If you don't know what to do here, say N. 1058 If compiled as a module, the driver is named scx200.
1053 1059
1054 This support is also available as a module. If compiled as a 1060config SCx200HR_TIMER
1055 module, it will be called scx200. 1061 tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
1062 depends on SCx200 && GENERIC_TIME
1063 default y
1064 help
1065 This driver provides a clocksource built upon the on-chip
1066 27MHz high-resolution timer. Its also a workaround for
1067 NSC Geode SC-1100's buggy TSC, which loses time when the
1068 processor goes idle (as is done by the scheduler). The
1069 other workaround is idle=poll boot option.
1056 1070
1057source "drivers/pcmcia/Kconfig" 1071source "drivers/pcmcia/Kconfig"
1058 1072
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index c9343c3a8082..8c2a6faeeae5 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -1929,7 +1929,7 @@ skip10: movb %ah, %al
1929 ret 1929 ret
1930 1930
1931store_edid: 1931store_edid:
1932#ifdef CONFIG_FB_FIRMWARE_EDID 1932#ifdef CONFIG_FIRMWARE_EDID
1933 pushw %es # just save all registers 1933 pushw %es # just save all registers
1934 pushw %ax 1934 pushw %ax
1935 pushw %bx 1935 pushw %bx
@@ -1947,6 +1947,22 @@ store_edid:
1947 rep 1947 rep
1948 stosl 1948 stosl
1949 1949
1950 pushw %es # save ES
1951 xorw %di, %di # Report Capability
1952 pushw %di
1953 popw %es # ES:DI must be 0:0
1954 movw $0x4f15, %ax
1955 xorw %bx, %bx
1956 xorw %cx, %cx
1957 int $0x10
1958 popw %es # restore ES
1959
1960 cmpb $0x00, %ah # call successful
1961 jne no_edid
1962
1963 cmpb $0x4f, %al # function supported
1964 jne no_edid
1965
1950 movw $0x4f15, %ax # do VBE/DDC 1966 movw $0x4f15, %ax # do VBE/DDC
1951 movw $0x01, %bx 1967 movw $0x01, %bx
1952 movw $0x00, %cx 1968 movw $0x00, %cx
@@ -1954,6 +1970,7 @@ store_edid:
1954 movw $0x140, %di 1970 movw $0x140, %di
1955 int $0x10 1971 int $0x10
1956 1972
1973no_edid:
1957 popw %di # restore all registers 1974 popw %di # restore all registers
1958 popw %dx 1975 popw %dx
1959 popw %cx 1976 popw %cx
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 96fb8a020af2..0fac85df64f1 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.lds
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ 7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o bootflag.o \
10 quirks.o i8237.o topology.o alternative.o 10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
11 11
12obj-y += cpu/ 12obj-y += cpu/
13obj-y += timers/
14obj-y += acpi/ 13obj-y += acpi/
15obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 14obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
16obj-$(CONFIG_MCA) += mca.o 15obj-$(CONFIG_MCA) += mca.o
@@ -37,6 +36,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
37obj-$(CONFIG_DOUBLEFAULT) += doublefault.o 36obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
38obj-$(CONFIG_VM86) += vm86.o 37obj-$(CONFIG_VM86) += vm86.o
39obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 38obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
39obj-$(CONFIG_HPET_TIMER) += hpet.o
40 40
41EXTRA_AFLAGS := -traditional 41EXTRA_AFLAGS := -traditional
42 42
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
new file mode 100644
index 000000000000..c6737c35815d
--- /dev/null
+++ b/arch/i386/kernel/hpet.c
@@ -0,0 +1,67 @@
1#include <linux/clocksource.h>
2#include <linux/errno.h>
3#include <linux/hpet.h>
4#include <linux/init.h>
5
6#include <asm/hpet.h>
7#include <asm/io.h>
8
9#define HPET_MASK CLOCKSOURCE_MASK(32)
10#define HPET_SHIFT 22
11
12/* FSEC = 10^-15 NSEC = 10^-9 */
13#define FSEC_PER_NSEC 1000000
14
15static void *hpet_ptr;
16
17static cycle_t read_hpet(void)
18{
19 return (cycle_t)readl(hpet_ptr);
20}
21
22static struct clocksource clocksource_hpet = {
23 .name = "hpet",
24 .rating = 250,
25 .read = read_hpet,
26 .mask = HPET_MASK,
27 .mult = 0, /* set below */
28 .shift = HPET_SHIFT,
29 .is_continuous = 1,
30};
31
32static int __init init_hpet_clocksource(void)
33{
34 unsigned long hpet_period;
35 void __iomem* hpet_base;
36 u64 tmp;
37
38 if (!hpet_address)
39 return -ENODEV;
40
41 /* calculate the hpet address: */
42 hpet_base =
43 (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
44 hpet_ptr = hpet_base + HPET_COUNTER;
45
46 /* calculate the frequency: */
47 hpet_period = readl(hpet_base + HPET_PERIOD);
48
49 /*
50 * hpet period is in femto seconds per cycle
51 * so we need to convert this to ns/cyc units
52 * aproximated by mult/2^shift
53 *
54 * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
55 * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
56 * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
57 * (fsec/cyc << shift)/1000000 = mult
58 * (hpet_period << shift)/FSEC_PER_NSEC = mult
59 */
60 tmp = (u64)hpet_period << HPET_SHIFT;
61 do_div(tmp, FSEC_PER_NSEC);
62 clocksource_hpet.mult = (u32)tmp;
63
64 return clocksource_register(&clocksource_hpet);
65}
66
67module_init(init_hpet_clocksource);
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
new file mode 100644
index 000000000000..477b24daff53
--- /dev/null
+++ b/arch/i386/kernel/i8253.c
@@ -0,0 +1,118 @@
1/*
2 * i8253.c 8253/PIT functions
3 *
4 */
5#include <linux/clocksource.h>
6#include <linux/spinlock.h>
7#include <linux/jiffies.h>
8#include <linux/sysdev.h>
9#include <linux/module.h>
10#include <linux/init.h>
11
12#include <asm/smp.h>
13#include <asm/delay.h>
14#include <asm/i8253.h>
15#include <asm/io.h>
16
17#include "io_ports.h"
18
19DEFINE_SPINLOCK(i8253_lock);
20EXPORT_SYMBOL(i8253_lock);
21
22void setup_pit_timer(void)
23{
24 unsigned long flags;
25
26 spin_lock_irqsave(&i8253_lock, flags);
27 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
28 udelay(10);
29 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
30 udelay(10);
31 outb(LATCH >> 8 , PIT_CH0); /* MSB */
32 spin_unlock_irqrestore(&i8253_lock, flags);
33}
34
35/*
36 * Since the PIT overflows every tick, its not very useful
37 * to just read by itself. So use jiffies to emulate a free
38 * running counter:
39 */
40static cycle_t pit_read(void)
41{
42 unsigned long flags;
43 int count;
44 u32 jifs;
45 static int old_count;
46 static u32 old_jifs;
47
48 spin_lock_irqsave(&i8253_lock, flags);
49 /*
50 * Although our caller may have the read side of xtime_lock,
51 * this is now a seqlock, and we are cheating in this routine
52 * by having side effects on state that we cannot undo if
53 * there is a collision on the seqlock and our caller has to
54 * retry. (Namely, old_jifs and old_count.) So we must treat
55 * jiffies as volatile despite the lock. We read jiffies
56 * before latching the timer count to guarantee that although
57 * the jiffies value might be older than the count (that is,
58 * the counter may underflow between the last point where
59 * jiffies was incremented and the point where we latch the
60 * count), it cannot be newer.
61 */
62 jifs = jiffies;
63 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
64 count = inb_p(PIT_CH0); /* read the latched count */
65 count |= inb_p(PIT_CH0) << 8;
66
67 /* VIA686a test code... reset the latch if count > max + 1 */
68 if (count > LATCH) {
69 outb_p(0x34, PIT_MODE);
70 outb_p(LATCH & 0xff, PIT_CH0);
71 outb(LATCH >> 8, PIT_CH0);
72 count = LATCH - 1;
73 }
74
75 /*
76 * It's possible for count to appear to go the wrong way for a
77 * couple of reasons:
78 *
79 * 1. The timer counter underflows, but we haven't handled the
80 * resulting interrupt and incremented jiffies yet.
81 * 2. Hardware problem with the timer, not giving us continuous time,
82 * the counter does small "jumps" upwards on some Pentium systems,
83 * (see c't 95/10 page 335 for Neptun bug.)
84 *
85 * Previous attempts to handle these cases intelligently were
86 * buggy, so we just do the simple thing now.
87 */
88 if (count > old_count && jifs == old_jifs) {
89 count = old_count;
90 }
91 old_count = count;
92 old_jifs = jifs;
93
94 spin_unlock_irqrestore(&i8253_lock, flags);
95
96 count = (LATCH - 1) - count;
97
98 return (cycle_t)(jifs * LATCH) + count;
99}
100
101static struct clocksource clocksource_pit = {
102 .name = "pit",
103 .rating = 110,
104 .read = pit_read,
105 .mask = CLOCKSOURCE_MASK(32),
106 .mult = 0,
107 .shift = 20,
108};
109
110static int __init init_pit_clocksource(void)
111{
112 if (num_possible_cpus() > 4) /* PIT does not scale! */
113 return 0;
114
115 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
116 return clocksource_register(&clocksource_pit);
117}
118module_init(init_pit_clocksource);
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 395a9a6dff88..727e419ad78a 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -57,34 +57,85 @@ static __always_inline void set_jmp_op(void *from, void *to)
57/* 57/*
58 * returns non-zero if opcodes can be boosted. 58 * returns non-zero if opcodes can be boosted.
59 */ 59 */
60static __always_inline int can_boost(kprobe_opcode_t opcode) 60static __always_inline int can_boost(kprobe_opcode_t *opcodes)
61{ 61{
62 switch (opcode & 0xf0 ) { 62#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
63 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
64 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
65 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
66 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
67 << (row % 32))
68 /*
69 * Undefined/reserved opcodes, conditional jump, Opcode Extension
70 * Groups, and some special opcodes can not be boost.
71 */
72 static const unsigned long twobyte_is_boostable[256 / 32] = {
73 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
74 /* ------------------------------- */
75 W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
76 W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
77 W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
78 W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
79 W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
80 W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
81 W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
82 W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
83 W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
84 W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
85 W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
86 W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
87 W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
88 W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
89 W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
90 W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */
91 /* ------------------------------- */
92 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
93 };
94#undef W
95 kprobe_opcode_t opcode;
96 kprobe_opcode_t *orig_opcodes = opcodes;
97retry:
98 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
99 return 0;
100 opcode = *(opcodes++);
101
102 /* 2nd-byte opcode */
103 if (opcode == 0x0f) {
104 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
105 return 0;
106 return test_bit(*opcodes, twobyte_is_boostable);
107 }
108
109 switch (opcode & 0xf0) {
110 case 0x60:
111 if (0x63 < opcode && opcode < 0x67)
112 goto retry; /* prefixes */
113 /* can't boost Address-size override and bound */
114 return (opcode != 0x62 && opcode != 0x67);
63 case 0x70: 115 case 0x70:
64 return 0; /* can't boost conditional jump */ 116 return 0; /* can't boost conditional jump */
65 case 0x90:
66 /* can't boost call and pushf */
67 return opcode != 0x9a && opcode != 0x9c;
68 case 0xc0: 117 case 0xc0:
69 /* can't boost undefined opcodes and soft-interruptions */ 118 /* can't boost software-interruptions */
70 return (0xc1 < opcode && opcode < 0xc6) || 119 return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
71 (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
72 case 0xd0: 120 case 0xd0:
73 /* can boost AA* and XLAT */ 121 /* can boost AA* and XLAT */
74 return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); 122 return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
75 case 0xe0: 123 case 0xe0:
76 /* can boost in/out and (may be) jmps */ 124 /* can boost in/out and absolute jmps */
77 return (0xe3 < opcode && opcode != 0xe8); 125 return ((opcode & 0x04) || opcode == 0xea);
78 case 0xf0: 126 case 0xf0:
127 if ((opcode & 0x0c) == 0 && opcode != 0xf1)
128 goto retry; /* lock/rep(ne) prefix */
79 /* clear and set flags can be boost */ 129 /* clear and set flags can be boost */
80 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); 130 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
81 default: 131 default:
82 /* currently, can't boost 2 bytes opcodes */ 132 if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
83 return opcode != 0x0f; 133 goto retry; /* prefixes */
134 /* can't boost CS override and call */
135 return (opcode != 0x2e && opcode != 0x9a);
84 } 136 }
85} 137}
86 138
87
88/* 139/*
89 * returns non-zero if opcode modifies the interrupt flag. 140 * returns non-zero if opcode modifies the interrupt flag.
90 */ 141 */
@@ -109,7 +160,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
109 160
110 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 161 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
111 p->opcode = *p->addr; 162 p->opcode = *p->addr;
112 if (can_boost(p->opcode)) { 163 if (can_boost(p->addr)) {
113 p->ainsn.boostable = 0; 164 p->ainsn.boostable = 0;
114 } else { 165 } else {
115 p->ainsn.boostable = -1; 166 p->ainsn.boostable = -1;
@@ -208,7 +259,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
208 struct kprobe_ctlblk *kcb; 259 struct kprobe_ctlblk *kcb;
209#ifdef CONFIG_PREEMPT 260#ifdef CONFIG_PREEMPT
210 unsigned pre_preempt_count = preempt_count(); 261 unsigned pre_preempt_count = preempt_count();
211#endif /* CONFIG_PREEMPT */ 262#else
263 unsigned pre_preempt_count = 1;
264#endif
212 265
213 addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); 266 addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
214 267
@@ -285,22 +338,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
285 /* handler has already set things up, so skip ss setup */ 338 /* handler has already set things up, so skip ss setup */
286 return 1; 339 return 1;
287 340
288 if (p->ainsn.boostable == 1 && 341ss_probe:
289#ifdef CONFIG_PREEMPT 342 if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){
290 !(pre_preempt_count) && /*
291 * This enables booster when the direct
292 * execution path aren't preempted.
293 */
294#endif /* CONFIG_PREEMPT */
295 !p->post_handler && !p->break_handler ) {
296 /* Boost up -- we can execute copied instructions directly */ 343 /* Boost up -- we can execute copied instructions directly */
297 reset_current_kprobe(); 344 reset_current_kprobe();
298 regs->eip = (unsigned long)p->ainsn.insn; 345 regs->eip = (unsigned long)p->ainsn.insn;
299 preempt_enable_no_resched(); 346 preempt_enable_no_resched();
300 return 1; 347 return 1;
301 } 348 }
302
303ss_probe:
304 prepare_singlestep(p, regs); 349 prepare_singlestep(p, regs);
305 kcb->kprobe_status = KPROBE_HIT_SS; 350 kcb->kprobe_status = KPROBE_HIT_SS;
306 return 1; 351 return 1;
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
index 5f5b075f860a..0caf14652bad 100644
--- a/arch/i386/kernel/numaq.c
+++ b/arch/i386/kernel/numaq.c
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void)
79 return 1; 79 return 1;
80} 80}
81 81
82static int __init numaq_dsc_disable(void) 82static int __init numaq_tsc_disable(void)
83{ 83{
84 printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); 84 if (num_online_nodes() > 1) {
85 tsc_disable = 1; 85 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
86 tsc_disable = 1;
87 }
86 return 0; 88 return 0;
87} 89}
88core_initcall(numaq_dsc_disable); 90arch_initcall(numaq_tsc_disable);
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 6bef9273733e..4a65040cc624 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1575,6 +1575,7 @@ void __init setup_arch(char **cmdline_p)
1575 conswitchp = &dummy_con; 1575 conswitchp = &dummy_con;
1576#endif 1576#endif
1577#endif 1577#endif
1578 tsc_init();
1578} 1579}
1579 1580
1580static __init int add_pcspkr(void) 1581static __init int add_pcspkr(void)
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 9d3074759856..5f43d0410122 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -82,13 +82,6 @@ extern unsigned long wall_jiffies;
82DEFINE_SPINLOCK(rtc_lock); 82DEFINE_SPINLOCK(rtc_lock);
83EXPORT_SYMBOL(rtc_lock); 83EXPORT_SYMBOL(rtc_lock);
84 84
85#include <asm/i8253.h>
86
87DEFINE_SPINLOCK(i8253_lock);
88EXPORT_SYMBOL(i8253_lock);
89
90struct timer_opts *cur_timer __read_mostly = &timer_none;
91
92/* 85/*
93 * This is a special lock that is owned by the CPU and holds the index 86 * This is a special lock that is owned by the CPU and holds the index
94 * register we are working with. It is required for NMI access to the 87 * register we are working with. It is required for NMI access to the
@@ -118,99 +111,19 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
118} 111}
119EXPORT_SYMBOL(rtc_cmos_write); 112EXPORT_SYMBOL(rtc_cmos_write);
120 113
121/*
122 * This version of gettimeofday has microsecond resolution
123 * and better than microsecond precision on fast x86 machines with TSC.
124 */
125void do_gettimeofday(struct timeval *tv)
126{
127 unsigned long seq;
128 unsigned long usec, sec;
129 unsigned long max_ntp_tick;
130
131 do {
132 unsigned long lost;
133
134 seq = read_seqbegin(&xtime_lock);
135
136 usec = cur_timer->get_offset();
137 lost = jiffies - wall_jiffies;
138
139 /*
140 * If time_adjust is negative then NTP is slowing the clock
141 * so make sure not to go into next possible interval.
142 * Better to lose some accuracy than have time go backwards..
143 */
144 if (unlikely(time_adjust < 0)) {
145 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
146 usec = min(usec, max_ntp_tick);
147
148 if (lost)
149 usec += lost * max_ntp_tick;
150 }
151 else if (unlikely(lost))
152 usec += lost * (USEC_PER_SEC / HZ);
153
154 sec = xtime.tv_sec;
155 usec += (xtime.tv_nsec / 1000);
156 } while (read_seqretry(&xtime_lock, seq));
157
158 while (usec >= 1000000) {
159 usec -= 1000000;
160 sec++;
161 }
162
163 tv->tv_sec = sec;
164 tv->tv_usec = usec;
165}
166
167EXPORT_SYMBOL(do_gettimeofday);
168
169int do_settimeofday(struct timespec *tv)
170{
171 time_t wtm_sec, sec = tv->tv_sec;
172 long wtm_nsec, nsec = tv->tv_nsec;
173
174 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
175 return -EINVAL;
176
177 write_seqlock_irq(&xtime_lock);
178 /*
179 * This is revolting. We need to set "xtime" correctly. However, the
180 * value in this location is the value at the most recent update of
181 * wall time. Discover what correction gettimeofday() would have
182 * made, and then undo it!
183 */
184 nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
185 nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
186
187 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
188 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
189
190 set_normalized_timespec(&xtime, sec, nsec);
191 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
192
193 ntp_clear();
194 write_sequnlock_irq(&xtime_lock);
195 clock_was_set();
196 return 0;
197}
198
199EXPORT_SYMBOL(do_settimeofday);
200
201static int set_rtc_mmss(unsigned long nowtime) 114static int set_rtc_mmss(unsigned long nowtime)
202{ 115{
203 int retval; 116 int retval;
204 117 unsigned long flags;
205 WARN_ON(irqs_disabled());
206 118
207 /* gets recalled with irq locally disabled */ 119 /* gets recalled with irq locally disabled */
208 spin_lock_irq(&rtc_lock); 120 /* XXX - does irqsave resolve this? -johnstul */
121 spin_lock_irqsave(&rtc_lock, flags);
209 if (efi_enabled) 122 if (efi_enabled)
210 retval = efi_set_rtc_mmss(nowtime); 123 retval = efi_set_rtc_mmss(nowtime);
211 else 124 else
212 retval = mach_set_rtc_mmss(nowtime); 125 retval = mach_set_rtc_mmss(nowtime);
213 spin_unlock_irq(&rtc_lock); 126 spin_unlock_irqrestore(&rtc_lock, flags);
214 127
215 return retval; 128 return retval;
216} 129}
@@ -218,16 +131,6 @@ static int set_rtc_mmss(unsigned long nowtime)
218 131
219int timer_ack; 132int timer_ack;
220 133
221/* monotonic_clock(): returns # of nanoseconds passed since time_init()
222 * Note: This function is required to return accurate
223 * time even in the absence of multiple timer ticks.
224 */
225unsigned long long monotonic_clock(void)
226{
227 return cur_timer->monotonic_clock();
228}
229EXPORT_SYMBOL(monotonic_clock);
230
231#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) 134#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
232unsigned long profile_pc(struct pt_regs *regs) 135unsigned long profile_pc(struct pt_regs *regs)
233{ 136{
@@ -242,11 +145,21 @@ EXPORT_SYMBOL(profile_pc);
242#endif 145#endif
243 146
244/* 147/*
245 * timer_interrupt() needs to keep up the real-time clock, 148 * This is the same as the above, except we _also_ save the current
246 * as well as call the "do_timer()" routine every clocktick 149 * Time Stamp Counter value at the time of the timer interrupt, so that
150 * we later on can estimate the time of day more exactly.
247 */ 151 */
248static inline void do_timer_interrupt(int irq, struct pt_regs *regs) 152irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
249{ 153{
154 /*
155 * Here we are in the timer irq handler. We just have irqs locally
156 * disabled but we don't know if the timer_bh is running on the other
157 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
158 * the irq version of write_lock because as just said we have irq
159 * locally disabled. -arca
160 */
161 write_seqlock(&xtime_lock);
162
250#ifdef CONFIG_X86_IO_APIC 163#ifdef CONFIG_X86_IO_APIC
251 if (timer_ack) { 164 if (timer_ack) {
252 /* 165 /*
@@ -279,27 +192,6 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
279 irq = inb_p( 0x61 ); /* read the current state */ 192 irq = inb_p( 0x61 ); /* read the current state */
280 outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ 193 outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
281 } 194 }
282}
283
284/*
285 * This is the same as the above, except we _also_ save the current
286 * Time Stamp Counter value at the time of the timer interrupt, so that
287 * we later on can estimate the time of day more exactly.
288 */
289irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
290{
291 /*
292 * Here we are in the timer irq handler. We just have irqs locally
293 * disabled but we don't know if the timer_bh is running on the other
294 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
295 * the irq version of write_lock because as just said we have irq
296 * locally disabled. -arca
297 */
298 write_seqlock(&xtime_lock);
299
300 cur_timer->mark_offset();
301
302 do_timer_interrupt(irq, regs);
303 195
304 write_sequnlock(&xtime_lock); 196 write_sequnlock(&xtime_lock);
305 197
@@ -380,7 +272,6 @@ void notify_arch_cmos_timer(void)
380 272
381static long clock_cmos_diff, sleep_start; 273static long clock_cmos_diff, sleep_start;
382 274
383static struct timer_opts *last_timer;
384static int timer_suspend(struct sys_device *dev, pm_message_t state) 275static int timer_suspend(struct sys_device *dev, pm_message_t state)
385{ 276{
386 /* 277 /*
@@ -389,10 +280,6 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
389 clock_cmos_diff = -get_cmos_time(); 280 clock_cmos_diff = -get_cmos_time();
390 clock_cmos_diff += get_seconds(); 281 clock_cmos_diff += get_seconds();
391 sleep_start = get_cmos_time(); 282 sleep_start = get_cmos_time();
392 last_timer = cur_timer;
393 cur_timer = &timer_none;
394 if (last_timer->suspend)
395 last_timer->suspend(state);
396 return 0; 283 return 0;
397} 284}
398 285
@@ -415,10 +302,6 @@ static int timer_resume(struct sys_device *dev)
415 jiffies_64 += sleep_length; 302 jiffies_64 += sleep_length;
416 wall_jiffies += sleep_length; 303 wall_jiffies += sleep_length;
417 write_sequnlock_irqrestore(&xtime_lock, flags); 304 write_sequnlock_irqrestore(&xtime_lock, flags);
418 if (last_timer->resume)
419 last_timer->resume();
420 cur_timer = last_timer;
421 last_timer = NULL;
422 touch_softlockup_watchdog(); 305 touch_softlockup_watchdog();
423 return 0; 306 return 0;
424} 307}
@@ -460,9 +343,6 @@ static void __init hpet_time_init(void)
460 printk("Using HPET for base-timer\n"); 343 printk("Using HPET for base-timer\n");
461 } 344 }
462 345
463 cur_timer = select_timer();
464 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
465
466 time_init_hook(); 346 time_init_hook();
467} 347}
468#endif 348#endif
@@ -484,8 +364,5 @@ void __init time_init(void)
484 set_normalized_timespec(&wall_to_monotonic, 364 set_normalized_timespec(&wall_to_monotonic,
485 -xtime.tv_sec, -xtime.tv_nsec); 365 -xtime.tv_sec, -xtime.tv_nsec);
486 366
487 cur_timer = select_timer();
488 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
489
490 time_init_hook(); 367 time_init_hook();
491} 368}
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
deleted file mode 100644
index 8fa12be658dd..000000000000
--- a/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for x86 timers
3#
4
5obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
6
7obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
8obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
9obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
deleted file mode 100644
index 8163fe0cf1f0..000000000000
--- a/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
1/*
2 * Common functions used across the timers go here
3 */
4
5#include <linux/init.h>
6#include <linux/timex.h>
7#include <linux/errno.h>
8#include <linux/jiffies.h>
9#include <linux/module.h>
10
11#include <asm/io.h>
12#include <asm/timer.h>
13#include <asm/hpet.h>
14
15#include "mach_timer.h"
16
17/* ------ Calibrate the TSC -------
18 * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
19 * Too much 64-bit arithmetic here to do this cleanly in C, and for
20 * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
21 * output busy loop as low as possible. We avoid reading the CTC registers
22 * directly because of the awkward 8-bit access mechanism of the 82C54
23 * device.
24 */
25
26#define CALIBRATE_TIME (5 * 1000020/HZ)
27
28unsigned long calibrate_tsc(void)
29{
30 mach_prepare_counter();
31
32 {
33 unsigned long startlow, starthigh;
34 unsigned long endlow, endhigh;
35 unsigned long count;
36
37 rdtsc(startlow,starthigh);
38 mach_countup(&count);
39 rdtsc(endlow,endhigh);
40
41
42 /* Error: ECTCNEVERSET */
43 if (count <= 1)
44 goto bad_ctc;
45
46 /* 64-bit subtract - gcc just messes up with long longs */
47 __asm__("subl %2,%0\n\t"
48 "sbbl %3,%1"
49 :"=a" (endlow), "=d" (endhigh)
50 :"g" (startlow), "g" (starthigh),
51 "0" (endlow), "1" (endhigh));
52
53 /* Error: ECPUTOOFAST */
54 if (endhigh)
55 goto bad_ctc;
56
57 /* Error: ECPUTOOSLOW */
58 if (endlow <= CALIBRATE_TIME)
59 goto bad_ctc;
60
61 __asm__("divl %2"
62 :"=a" (endlow), "=d" (endhigh)
63 :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
64
65 return endlow;
66 }
67
68 /*
69 * The CTC wasn't reliable: we got a hit on the very first read,
70 * or the CPU was so fast/slow that the quotient wouldn't fit in
71 * 32 bits..
72 */
73bad_ctc:
74 return 0;
75}
76
77#ifdef CONFIG_HPET_TIMER
78/* ------ Calibrate the TSC using HPET -------
79 * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
80 * Second output is parameter 1 (when non NULL)
81 * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
82 * calibrate_tsc() calibrates the processor TSC by comparing
83 * it to the HPET timer of known frequency.
84 * Too much 64-bit arithmetic here to do this cleanly in C
85 */
86#define CALIBRATE_CNT_HPET (5 * hpet_tick)
87#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
88
89unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
90{
91 unsigned long tsc_startlow, tsc_starthigh;
92 unsigned long tsc_endlow, tsc_endhigh;
93 unsigned long hpet_start, hpet_end;
94 unsigned long result, remain;
95
96 hpet_start = hpet_readl(HPET_COUNTER);
97 rdtsc(tsc_startlow, tsc_starthigh);
98 do {
99 hpet_end = hpet_readl(HPET_COUNTER);
100 } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
101 rdtsc(tsc_endlow, tsc_endhigh);
102
103 /* 64-bit subtract - gcc just messes up with long longs */
104 __asm__("subl %2,%0\n\t"
105 "sbbl %3,%1"
106 :"=a" (tsc_endlow), "=d" (tsc_endhigh)
107 :"g" (tsc_startlow), "g" (tsc_starthigh),
108 "0" (tsc_endlow), "1" (tsc_endhigh));
109
110 /* Error: ECPUTOOFAST */
111 if (tsc_endhigh)
112 goto bad_calibration;
113
114 /* Error: ECPUTOOSLOW */
115 if (tsc_endlow <= CALIBRATE_TIME_HPET)
116 goto bad_calibration;
117
118 ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
119 if (remain > (tsc_endlow >> 1))
120 result++; /* rounding the result */
121
122 if (tsc_hpet_quotient_ptr) {
123 unsigned long tsc_hpet_quotient;
124
125 ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
126 CALIBRATE_CNT_HPET);
127 if (remain > (tsc_endlow >> 1))
128 tsc_hpet_quotient++; /* rounding the result */
129 *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
130 }
131
132 return result;
133bad_calibration:
134 /*
135 * the CPU was so fast/slow that the quotient wouldn't fit in
136 * 32 bits..
137 */
138 return 0;
139}
140#endif
141
142
143unsigned long read_timer_tsc(void)
144{
145 unsigned long retval;
146 rdtscl(retval);
147 return retval;
148}
149
150
151/* calculate cpu_khz */
152void init_cpu_khz(void)
153{
154 if (cpu_has_tsc) {
155 unsigned long tsc_quotient = calibrate_tsc();
156 if (tsc_quotient) {
157 /* report CPU clock rate in Hz.
158 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
159 * clock/second. Our precision is about 100 ppm.
160 */
161 { unsigned long eax=0, edx=1000;
162 __asm__("divl %2"
163 :"=a" (cpu_khz), "=d" (edx)
164 :"r" (tsc_quotient),
165 "0" (eax), "1" (edx));
166 printk("Detected %u.%03u MHz processor.\n",
167 cpu_khz / 1000, cpu_khz % 1000);
168 }
169 }
170 }
171}
172
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
deleted file mode 100644
index 7e39ed8e33f8..000000000000
--- a/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/string.h>
4#include <asm/timer.h>
5
6#ifdef CONFIG_HPET_TIMER
7/*
8 * HPET memory read is slower than tsc reads, but is more dependable as it
9 * always runs at constant frequency and reduces complexity due to
10 * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
11 * timer_pit when HPET is active. So, we default to timer_tsc.
12 */
13#endif
14/* list of timers, ordered by preference, NULL terminated */
15static struct init_timer_opts* __initdata timers[] = {
16#ifdef CONFIG_X86_CYCLONE_TIMER
17 &timer_cyclone_init,
18#endif
19#ifdef CONFIG_HPET_TIMER
20 &timer_hpet_init,
21#endif
22#ifdef CONFIG_X86_PM_TIMER
23 &timer_pmtmr_init,
24#endif
25 &timer_tsc_init,
26 &timer_pit_init,
27 NULL,
28};
29
30static char clock_override[10] __initdata;
31
32static int __init clock_setup(char* str)
33{
34 if (str)
35 strlcpy(clock_override, str, sizeof(clock_override));
36 return 1;
37}
38__setup("clock=", clock_setup);
39
40
41/* The chosen timesource has been found to be bad.
42 * Fall back to a known good timesource (the PIT)
43 */
44void clock_fallback(void)
45{
46 cur_timer = &timer_pit;
47}
48
49/* iterates through the list of timers, returning the first
50 * one that initializes successfully.
51 */
52struct timer_opts* __init select_timer(void)
53{
54 int i = 0;
55
56 /* find most preferred working timer */
57 while (timers[i]) {
58 if (timers[i]->init)
59 if (timers[i]->init(clock_override) == 0)
60 return timers[i]->opts;
61 ++i;
62 }
63
64 panic("select_timer: Cannot find a suitable timer\n");
65 return NULL;
66}
67
68int read_current_timer(unsigned long *timer_val)
69{
70 if (cur_timer->read_timer) {
71 *timer_val = cur_timer->read_timer();
72 return 0;
73 }
74 return -1;
75}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
deleted file mode 100644
index 13892a65c941..000000000000
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
1/* Cyclone-timer:
2 * This code implements timer_ops for the cyclone counter found
3 * on IBM x440, x360, and other Summit based systems.
4 *
5 * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
6 */
7
8
9#include <linux/spinlock.h>
10#include <linux/init.h>
11#include <linux/timex.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/jiffies.h>
15
16#include <asm/timer.h>
17#include <asm/io.h>
18#include <asm/pgtable.h>
19#include <asm/fixmap.h>
20#include <asm/i8253.h>
21
22#include "io_ports.h"
23
24/* Number of usecs that the last interrupt was delayed */
25static int delay_at_last_interrupt;
26
27#define CYCLONE_CBAR_ADDR 0xFEB00CD0
28#define CYCLONE_PMCC_OFFSET 0x51A0
29#define CYCLONE_MPMC_OFFSET 0x51D0
30#define CYCLONE_MPCS_OFFSET 0x51A8
31#define CYCLONE_TIMER_FREQ 100000000
32#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
33int use_cyclone = 0;
34
35static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
36static u32 last_cyclone_low;
37static u32 last_cyclone_high;
38static unsigned long long monotonic_base;
39static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
40
41/* helper macro to atomically read both cyclone counter registers */
42#define read_cyclone_counter(low,high) \
43 do{ \
44 high = cyclone_timer[1]; low = cyclone_timer[0]; \
45 } while (high != cyclone_timer[1]);
46
47
48static void mark_offset_cyclone(void)
49{
50 unsigned long lost, delay;
51 unsigned long delta = last_cyclone_low;
52 int count;
53 unsigned long long this_offset, last_offset;
54
55 write_seqlock(&monotonic_lock);
56 last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
57
58 spin_lock(&i8253_lock);
59 read_cyclone_counter(last_cyclone_low,last_cyclone_high);
60
61 /* read values for delay_at_last_interrupt */
62 outb_p(0x00, 0x43); /* latch the count ASAP */
63
64 count = inb_p(0x40); /* read the latched count */
65 count |= inb(0x40) << 8;
66
67 /*
68 * VIA686a test code... reset the latch if count > max + 1
69 * from timer_pit.c - cjb
70 */
71 if (count > LATCH) {
72 outb_p(0x34, PIT_MODE);
73 outb_p(LATCH & 0xff, PIT_CH0);
74 outb(LATCH >> 8, PIT_CH0);
75 count = LATCH - 1;
76 }
77 spin_unlock(&i8253_lock);
78
79 /* lost tick compensation */
80 delta = last_cyclone_low - delta;
81 delta /= (CYCLONE_TIMER_FREQ/1000000);
82 delta += delay_at_last_interrupt;
83 lost = delta/(1000000/HZ);
84 delay = delta%(1000000/HZ);
85 if (lost >= 2)
86 jiffies_64 += lost-1;
87
88 /* update the monotonic base value */
89 this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
90 monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
91 write_sequnlock(&monotonic_lock);
92
93 /* calculate delay_at_last_interrupt */
94 count = ((LATCH-1) - count) * TICK_SIZE;
95 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
96
97
98 /* catch corner case where tick rollover occured
99 * between cyclone and pit reads (as noted when
100 * usec delta is > 90% # of usecs/tick)
101 */
102 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
103 jiffies_64++;
104}
105
106static unsigned long get_offset_cyclone(void)
107{
108 u32 offset;
109
110 if(!cyclone_timer)
111 return delay_at_last_interrupt;
112
113 /* Read the cyclone timer */
114 offset = cyclone_timer[0];
115
116 /* .. relative to previous jiffy */
117 offset = offset - last_cyclone_low;
118
119 /* convert cyclone ticks to microseconds */
120 /* XXX slow, can we speed this up? */
121 offset = offset/(CYCLONE_TIMER_FREQ/1000000);
122
123 /* our adjusted time offset in microseconds */
124 return delay_at_last_interrupt + offset;
125}
126
127static unsigned long long monotonic_clock_cyclone(void)
128{
129 u32 now_low, now_high;
130 unsigned long long last_offset, this_offset, base;
131 unsigned long long ret;
132 unsigned seq;
133
134 /* atomically read monotonic base & last_offset */
135 do {
136 seq = read_seqbegin(&monotonic_lock);
137 last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
138 base = monotonic_base;
139 } while (read_seqretry(&monotonic_lock, seq));
140
141
142 /* Read the cyclone counter */
143 read_cyclone_counter(now_low,now_high);
144 this_offset = ((unsigned long long)now_high<<32)|now_low;
145
146 /* convert to nanoseconds */
147 ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
148 return ret * (1000000000 / CYCLONE_TIMER_FREQ);
149}
150
151static int __init init_cyclone(char* override)
152{
153 u32* reg;
154 u32 base; /* saved cyclone base address */
155 u32 pageaddr; /* page that contains cyclone_timer register */
156 u32 offset; /* offset from pageaddr to cyclone_timer register */
157 int i;
158
159 /* check clock override */
160 if (override[0] && strncmp(override,"cyclone",7))
161 return -ENODEV;
162
163 /*make sure we're on a summit box*/
164 if(!use_cyclone) return -ENODEV;
165
166 printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
167
168 /* find base address */
169 pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
170 offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
171 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
172 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
173 if(!reg){
174 printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
175 return -ENODEV;
176 }
177 base = *reg;
178 if(!base){
179 printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
180 return -ENODEV;
181 }
182
183 /* setup PMCC */
184 pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
185 offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
186 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
187 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
188 if(!reg){
189 printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
190 return -ENODEV;
191 }
192 reg[0] = 0x00000001;
193
194 /* setup MPCS */
195 pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
196 offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
197 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
198 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
199 if(!reg){
200 printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
201 return -ENODEV;
202 }
203 reg[0] = 0x00000001;
204
205 /* map in cyclone_timer */
206 pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
207 offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
208 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
209 cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
210 if(!cyclone_timer){
211 printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
212 return -ENODEV;
213 }
214
215 /*quick test to make sure its ticking*/
216 for(i=0; i<3; i++){
217 u32 old = cyclone_timer[0];
218 int stall = 100;
219 while(stall--) barrier();
220 if(cyclone_timer[0] == old){
221 printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
222 cyclone_timer = 0;
223 return -ENODEV;
224 }
225 }
226
227 init_cpu_khz();
228
229 /* Everything looks good! */
230 return 0;
231}
232
233
234static void delay_cyclone(unsigned long loops)
235{
236 unsigned long bclock, now;
237 if(!cyclone_timer)
238 return;
239 bclock = cyclone_timer[0];
240 do {
241 rep_nop();
242 now = cyclone_timer[0];
243 } while ((now-bclock) < loops);
244}
245/************************************************************/
246
247/* cyclone timer_opts struct */
248static struct timer_opts timer_cyclone = {
249 .name = "cyclone",
250 .mark_offset = mark_offset_cyclone,
251 .get_offset = get_offset_cyclone,
252 .monotonic_clock = monotonic_clock_cyclone,
253 .delay = delay_cyclone,
254};
255
256struct init_timer_opts __initdata timer_cyclone_init = {
257 .init = init_cyclone,
258 .opts = &timer_cyclone,
259};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
deleted file mode 100644
index 17a6fe7166e7..000000000000
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,217 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/init.h>
8#include <linux/timex.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/jiffies.h>
12
13#include <asm/timer.h>
14#include <asm/io.h>
15#include <asm/processor.h>
16
17#include "io_ports.h"
18#include "mach_timer.h"
19#include <asm/hpet.h>
20
21static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
22static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
23static unsigned long hpet_last; /* hpet counter value at last tick*/
24static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
25static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
26static unsigned long long monotonic_base;
27static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
28
29/* convert from cycles(64bits) => nanoseconds (64bits)
30 * basic equation:
31 * ns = cycles / (freq / ns_per_sec)
32 * ns = cycles * (ns_per_sec / freq)
33 * ns = cycles * (10^9 / (cpu_khz * 10^3))
34 * ns = cycles * (10^6 / cpu_khz)
35 *
36 * Then we use scaling math (suggested by george@mvista.com) to get:
37 * ns = cycles * (10^6 * SC / cpu_khz) / SC
38 * ns = cycles * cyc2ns_scale / SC
39 *
40 * And since SC is a constant power of two, we can convert the div
41 * into a shift.
42 *
43 * We can use khz divisor instead of mhz to keep a better percision, since
44 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
45 * (mathieu.desnoyers@polymtl.ca)
46 *
47 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
48 */
49static unsigned long cyc2ns_scale __read_mostly;
50#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
51
52static inline void set_cyc2ns_scale(unsigned long cpu_khz)
53{
54 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
55}
56
57static inline unsigned long long cycles_2_ns(unsigned long long cyc)
58{
59 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
60}
61
62static unsigned long long monotonic_clock_hpet(void)
63{
64 unsigned long long last_offset, this_offset, base;
65 unsigned seq;
66
67 /* atomically read monotonic base & last_offset */
68 do {
69 seq = read_seqbegin(&monotonic_lock);
70 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
71 base = monotonic_base;
72 } while (read_seqretry(&monotonic_lock, seq));
73
74 /* Read the Time Stamp Counter */
75 rdtscll(this_offset);
76
77 /* return the value in ns */
78 return base + cycles_2_ns(this_offset - last_offset);
79}
80
81static unsigned long get_offset_hpet(void)
82{
83 register unsigned long eax, edx;
84
85 eax = hpet_readl(HPET_COUNTER);
86 eax -= hpet_last; /* hpet delta */
87 eax = min(hpet_tick, eax);
88 /*
89 * Time offset = (hpet delta) * ( usecs per HPET clock )
90 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
91 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
92 *
93 * Where,
94 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
95 *
96 * Using a mull instead of a divl saves some cycles in critical path.
97 */
98 ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
99
100 /* our adjusted time offset in microseconds */
101 return edx;
102}
103
104static void mark_offset_hpet(void)
105{
106 unsigned long long this_offset, last_offset;
107 unsigned long offset;
108
109 write_seqlock(&monotonic_lock);
110 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
111 rdtsc(last_tsc_low, last_tsc_high);
112
113 if (hpet_use_timer)
114 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
115 else
116 offset = hpet_readl(HPET_COUNTER);
117 if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
118 int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
119 jiffies_64 += lost_ticks;
120 }
121 hpet_last = offset;
122
123 /* update the monotonic base value */
124 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
125 monotonic_base += cycles_2_ns(this_offset - last_offset);
126 write_sequnlock(&monotonic_lock);
127}
128
129static void delay_hpet(unsigned long loops)
130{
131 unsigned long hpet_start, hpet_end;
132 unsigned long eax;
133
134 /* loops is the number of cpu cycles. Convert it to hpet clocks */
135 ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
136
137 hpet_start = hpet_readl(HPET_COUNTER);
138 do {
139 rep_nop();
140 hpet_end = hpet_readl(HPET_COUNTER);
141 } while ((hpet_end - hpet_start) < (loops));
142}
143
144static struct timer_opts timer_hpet;
145
146static int __init init_hpet(char* override)
147{
148 unsigned long result, remain;
149
150 /* check clock override */
151 if (override[0] && strncmp(override,"hpet",4))
152 return -ENODEV;
153
154 if (!is_hpet_enabled())
155 return -ENODEV;
156
157 printk("Using HPET for gettimeofday\n");
158 if (cpu_has_tsc) {
159 unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
160 if (tsc_quotient) {
161 /* report CPU clock rate in Hz.
162 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
163 * clock/second. Our precision is about 100 ppm.
164 */
165 { unsigned long eax=0, edx=1000;
166 ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
167 eax, edx);
168 printk("Detected %u.%03u MHz processor.\n",
169 cpu_khz / 1000, cpu_khz % 1000);
170 }
171 set_cyc2ns_scale(cpu_khz);
172 }
173 /* set this only when cpu_has_tsc */
174 timer_hpet.read_timer = read_timer_tsc;
175 }
176
177 /*
178 * Math to calculate hpet to usec multiplier
179 * Look for the comments at get_offset_hpet()
180 */
181 ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
182 if (remain > (hpet_tick >> 1))
183 result++; /* rounding the result */
184 hpet_usec_quotient = result;
185
186 return 0;
187}
188
189static int hpet_resume(void)
190{
191 write_seqlock(&monotonic_lock);
192 /* Assume this is the last mark offset time */
193 rdtsc(last_tsc_low, last_tsc_high);
194
195 if (hpet_use_timer)
196 hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
197 else
198 hpet_last = hpet_readl(HPET_COUNTER);
199 write_sequnlock(&monotonic_lock);
200 return 0;
201}
202/************************************************************/
203
204/* tsc timer_opts struct */
205static struct timer_opts timer_hpet __read_mostly = {
206 .name = "hpet",
207 .mark_offset = mark_offset_hpet,
208 .get_offset = get_offset_hpet,
209 .monotonic_clock = monotonic_clock_hpet,
210 .delay = delay_hpet,
211 .resume = hpet_resume,
212};
213
214struct init_timer_opts __initdata timer_hpet_init = {
215 .init = init_hpet,
216 .opts = &timer_hpet,
217};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
deleted file mode 100644
index 4ea2f414dbbd..000000000000
--- a/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
1#include <linux/init.h>
2#include <asm/timer.h>
3
4static void mark_offset_none(void)
5{
6 /* nothing needed */
7}
8
9static unsigned long get_offset_none(void)
10{
11 return 0;
12}
13
14static unsigned long long monotonic_clock_none(void)
15{
16 return 0;
17}
18
19static void delay_none(unsigned long loops)
20{
21 int d0;
22 __asm__ __volatile__(
23 "\tjmp 1f\n"
24 ".align 16\n"
25 "1:\tjmp 2f\n"
26 ".align 16\n"
27 "2:\tdecl %0\n\tjns 2b"
28 :"=&a" (d0)
29 :"0" (loops));
30}
31
32/* none timer_opts struct */
33struct timer_opts timer_none = {
34 .name = "none",
35 .mark_offset = mark_offset_none,
36 .get_offset = get_offset_none,
37 .monotonic_clock = monotonic_clock_none,
38 .delay = delay_none,
39};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
deleted file mode 100644
index b9b6bd56b9ba..000000000000
--- a/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/module.h>
8#include <linux/device.h>
9#include <linux/sysdev.h>
10#include <linux/timex.h>
11#include <asm/delay.h>
12#include <asm/mpspec.h>
13#include <asm/timer.h>
14#include <asm/smp.h>
15#include <asm/io.h>
16#include <asm/arch_hooks.h>
17#include <asm/i8253.h>
18
19#include "do_timer.h"
20#include "io_ports.h"
21
22static int count_p; /* counter in get_offset_pit() */
23
24static int __init init_pit(char* override)
25{
26 /* check clock override */
27 if (override[0] && strncmp(override,"pit",3))
28 printk(KERN_ERR "Warning: clock= override failed. Defaulting "
29 "to PIT\n");
30 init_cpu_khz();
31 count_p = LATCH;
32 return 0;
33}
34
35static void mark_offset_pit(void)
36{
37 /* nothing needed */
38}
39
40static unsigned long long monotonic_clock_pit(void)
41{
42 return 0;
43}
44
45static void delay_pit(unsigned long loops)
46{
47 int d0;
48 __asm__ __volatile__(
49 "\tjmp 1f\n"
50 ".align 16\n"
51 "1:\tjmp 2f\n"
52 ".align 16\n"
53 "2:\tdecl %0\n\tjns 2b"
54 :"=&a" (d0)
55 :"0" (loops));
56}
57
58
59/* This function must be called with xtime_lock held.
60 * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
61 *
62 * However, the pc-audio speaker driver changes the divisor so that
63 * it gets interrupted rather more often - it loads 64 into the
64 * counter rather than 11932! This has an adverse impact on
65 * do_gettimeoffset() -- it stops working! What is also not
66 * good is that the interval that our timer function gets called
67 * is no longer 10.0002 ms, but 9.9767 ms. To get around this
68 * would require using a different timing source. Maybe someone
69 * could use the RTC - I know that this can interrupt at frequencies
70 * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
71 * it so that at startup, the timer code in sched.c would select
72 * using either the RTC or the 8253 timer. The decision would be
73 * based on whether there was any other device around that needed
74 * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
75 * and then do some jiggery to have a version of do_timer that
76 * advanced the clock by 1/1024 s. Every time that reached over 1/100
77 * of a second, then do all the old code. If the time was kept correct
78 * then do_gettimeoffset could just return 0 - there is no low order
79 * divider that can be accessed.
80 *
81 * Ideally, you would be able to use the RTC for the speaker driver,
82 * but it appears that the speaker driver really needs interrupt more
83 * often than every 120 us or so.
84 *
85 * Anyway, this needs more thought.... pjsg (1993-08-28)
86 *
87 * If you are really that interested, you should be reading
88 * comp.protocols.time.ntp!
89 */
90
91static unsigned long get_offset_pit(void)
92{
93 int count;
94 unsigned long flags;
95 static unsigned long jiffies_p = 0;
96
97 /*
98 * cache volatile jiffies temporarily; we have xtime_lock.
99 */
100 unsigned long jiffies_t;
101
102 spin_lock_irqsave(&i8253_lock, flags);
103 /* timer count may underflow right here */
104 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
105
106 count = inb_p(PIT_CH0); /* read the latched count */
107
108 /*
109 * We do this guaranteed double memory access instead of a _p
110 * postfix in the previous port access. Wheee, hackady hack
111 */
112 jiffies_t = jiffies;
113
114 count |= inb_p(PIT_CH0) << 8;
115
116 /* VIA686a test code... reset the latch if count > max + 1 */
117 if (count > LATCH) {
118 outb_p(0x34, PIT_MODE);
119 outb_p(LATCH & 0xff, PIT_CH0);
120 outb(LATCH >> 8, PIT_CH0);
121 count = LATCH - 1;
122 }
123
124 /*
125 * avoiding timer inconsistencies (they are rare, but they happen)...
126 * there are two kinds of problems that must be avoided here:
127 * 1. the timer counter underflows
128 * 2. hardware problem with the timer, not giving us continuous time,
129 * the counter does small "jumps" upwards on some Pentium systems,
130 * (see c't 95/10 page 335 for Neptun bug.)
131 */
132
133 if( jiffies_t == jiffies_p ) {
134 if( count > count_p ) {
135 /* the nutcase */
136 count = do_timer_overflow(count);
137 }
138 } else
139 jiffies_p = jiffies_t;
140
141 count_p = count;
142
143 spin_unlock_irqrestore(&i8253_lock, flags);
144
145 count = ((LATCH-1) - count) * TICK_SIZE;
146 count = (count + LATCH/2) / LATCH;
147
148 return count;
149}
150
151
152/* tsc timer_opts struct */
153struct timer_opts timer_pit = {
154 .name = "pit",
155 .mark_offset = mark_offset_pit,
156 .get_offset = get_offset_pit,
157 .monotonic_clock = monotonic_clock_pit,
158 .delay = delay_pit,
159};
160
161struct init_timer_opts __initdata timer_pit_init = {
162 .init = init_pit,
163 .opts = &timer_pit,
164};
165
166void setup_pit_timer(void)
167{
168 unsigned long flags;
169
170 spin_lock_irqsave(&i8253_lock, flags);
171 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
172 udelay(10);
173 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
174 udelay(10);
175 outb(LATCH >> 8 , PIT_CH0); /* MSB */
176 spin_unlock_irqrestore(&i8253_lock, flags);
177}
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
deleted file mode 100644
index 144e94a04933..000000000000
--- a/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,342 +0,0 @@
1/*
2 * (C) Dominik Brodowski <linux@brodo.de> 2003
3 *
4 * Driver to use the Power Management Timer (PMTMR) available in some
5 * southbridges as primary timing source for the Linux kernel.
6 *
7 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
8 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
9 *
10 * This file is licensed under the GPL v2.
11 */
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/device.h>
17#include <linux/init.h>
18#include <linux/pci.h>
19#include <asm/types.h>
20#include <asm/timer.h>
21#include <asm/smp.h>
22#include <asm/io.h>
23#include <asm/arch_hooks.h>
24
25#include <linux/timex.h>
26#include "mach_timer.h"
27
28/* Number of PMTMR ticks expected during calibration run */
29#define PMTMR_TICKS_PER_SEC 3579545
30#define PMTMR_EXPECTED_RATE \
31 ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
32
33
34/* The I/O port the PMTMR resides at.
35 * The location is detected during setup_arch(),
36 * in arch/i386/acpi/boot.c */
37u32 pmtmr_ioport = 0;
38
39
40/* value of the Power timer at last timer interrupt */
41static u32 offset_tick;
42static u32 offset_delay;
43
44static unsigned long long monotonic_base;
45static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
46
47#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
48
49static int pmtmr_need_workaround __read_mostly = 1;
50
51/*helper function to safely read acpi pm timesource*/
52static inline u32 read_pmtmr(void)
53{
54 if (pmtmr_need_workaround) {
55 u32 v1, v2, v3;
56
57 /* It has been reported that because of various broken
58 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
59 * source is not latched, so you must read it multiple
60 * times to insure a safe value is read.
61 */
62 do {
63 v1 = inl(pmtmr_ioport);
64 v2 = inl(pmtmr_ioport);
65 v3 = inl(pmtmr_ioport);
66 } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
67 || (v3 > v1 && v3 < v2));
68
69 /* mask the output to 24 bits */
70 return v2 & ACPI_PM_MASK;
71 }
72
73 return inl(pmtmr_ioport) & ACPI_PM_MASK;
74}
75
76
77/*
78 * Some boards have the PMTMR running way too fast. We check
79 * the PMTMR rate against PIT channel 2 to catch these cases.
80 */
81static int verify_pmtmr_rate(void)
82{
83 u32 value1, value2;
84 unsigned long count, delta;
85
86 mach_prepare_counter();
87 value1 = read_pmtmr();
88 mach_countup(&count);
89 value2 = read_pmtmr();
90 delta = (value2 - value1) & ACPI_PM_MASK;
91
92 /* Check that the PMTMR delta is within 5% of what we expect */
93 if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
94 delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
95 printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
96 return -1;
97 }
98
99 return 0;
100}
101
102
103static int init_pmtmr(char* override)
104{
105 u32 value1, value2;
106 unsigned int i;
107
108 if (override[0] && strncmp(override,"pmtmr",5))
109 return -ENODEV;
110
111 if (!pmtmr_ioport)
112 return -ENODEV;
113
114 /* we use the TSC for delay_pmtmr, so make sure it exists */
115 if (!cpu_has_tsc)
116 return -ENODEV;
117
118 /* "verify" this timing source */
119 value1 = read_pmtmr();
120 for (i = 0; i < 10000; i++) {
121 value2 = read_pmtmr();
122 if (value2 == value1)
123 continue;
124 if (value2 > value1)
125 goto pm_good;
126 if ((value2 < value1) && ((value2) < 0xFFF))
127 goto pm_good;
128 printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
129 return -EINVAL;
130 }
131 printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
132 return -ENODEV;
133
134pm_good:
135 if (verify_pmtmr_rate() != 0)
136 return -ENODEV;
137
138 init_cpu_khz();
139 return 0;
140}
141
142static inline u32 cyc2us(u32 cycles)
143{
144 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
145 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
146 *
147 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
148 * easily be multiplied with 286 (=0x11E) without having to fear
149 * u32 overflows.
150 */
151 cycles *= 286;
152 return (cycles >> 10);
153}
154
155/*
156 * this gets called during each timer interrupt
157 * - Called while holding the writer xtime_lock
158 */
159static void mark_offset_pmtmr(void)
160{
161 u32 lost, delta, last_offset;
162 static int first_run = 1;
163 last_offset = offset_tick;
164
165 write_seqlock(&monotonic_lock);
166
167 offset_tick = read_pmtmr();
168
169 /* calculate tick interval */
170 delta = (offset_tick - last_offset) & ACPI_PM_MASK;
171
172 /* convert to usecs */
173 delta = cyc2us(delta);
174
175 /* update the monotonic base value */
176 monotonic_base += delta * NSEC_PER_USEC;
177 write_sequnlock(&monotonic_lock);
178
179 /* convert to ticks */
180 delta += offset_delay;
181 lost = delta / (USEC_PER_SEC / HZ);
182 offset_delay = delta % (USEC_PER_SEC / HZ);
183
184
185 /* compensate for lost ticks */
186 if (lost >= 2)
187 jiffies_64 += lost - 1;
188
189 /* don't calculate delay for first run,
190 or if we've got less then a tick */
191 if (first_run || (lost < 1)) {
192 first_run = 0;
193 offset_delay = 0;
194 }
195}
196
197static int pmtmr_resume(void)
198{
199 write_seqlock(&monotonic_lock);
200 /* Assume this is the last mark offset time */
201 offset_tick = read_pmtmr();
202 write_sequnlock(&monotonic_lock);
203 return 0;
204}
205
206static unsigned long long monotonic_clock_pmtmr(void)
207{
208 u32 last_offset, this_offset;
209 unsigned long long base, ret;
210 unsigned seq;
211
212
213 /* atomically read monotonic base & last_offset */
214 do {
215 seq = read_seqbegin(&monotonic_lock);
216 last_offset = offset_tick;
217 base = monotonic_base;
218 } while (read_seqretry(&monotonic_lock, seq));
219
220 /* Read the pmtmr */
221 this_offset = read_pmtmr();
222
223 /* convert to nanoseconds */
224 ret = (this_offset - last_offset) & ACPI_PM_MASK;
225 ret = base + (cyc2us(ret) * NSEC_PER_USEC);
226 return ret;
227}
228
229static void delay_pmtmr(unsigned long loops)
230{
231 unsigned long bclock, now;
232
233 rdtscl(bclock);
234 do
235 {
236 rep_nop();
237 rdtscl(now);
238 } while ((now-bclock) < loops);
239}
240
241
242/*
243 * get the offset (in microseconds) from the last call to mark_offset()
244 * - Called holding a reader xtime_lock
245 */
246static unsigned long get_offset_pmtmr(void)
247{
248 u32 now, offset, delta = 0;
249
250 offset = offset_tick;
251 now = read_pmtmr();
252 delta = (now - offset)&ACPI_PM_MASK;
253
254 return (unsigned long) offset_delay + cyc2us(delta);
255}
256
257
258/* acpi timer_opts struct */
259static struct timer_opts timer_pmtmr = {
260 .name = "pmtmr",
261 .mark_offset = mark_offset_pmtmr,
262 .get_offset = get_offset_pmtmr,
263 .monotonic_clock = monotonic_clock_pmtmr,
264 .delay = delay_pmtmr,
265 .read_timer = read_timer_tsc,
266 .resume = pmtmr_resume,
267};
268
269struct init_timer_opts __initdata timer_pmtmr_init = {
270 .init = init_pmtmr,
271 .opts = &timer_pmtmr,
272};
273
274#ifdef CONFIG_PCI
275/*
276 * PIIX4 Errata:
277 *
278 * The power management timer may return improper results when read.
279 * Although the timer value settles properly after incrementing,
280 * while incrementing there is a 3 ns window every 69.8 ns where the
281 * timer value is indeterminate (a 4.2% chance that the data will be
282 * incorrect when read). As a result, the ACPI free running count up
283 * timer specification is violated due to erroneous reads.
284 */
285static int __init pmtmr_bug_check(void)
286{
287 static struct pci_device_id gray_list[] __initdata = {
288 /* these chipsets may have bug. */
289 { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
290 PCI_DEVICE_ID_INTEL_82801DB_0) },
291 { },
292 };
293 struct pci_dev *dev;
294 int pmtmr_has_bug = 0;
295 u8 rev;
296
297 if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround)
298 return 0;
299
300 dev = pci_get_device(PCI_VENDOR_ID_INTEL,
301 PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
302 if (dev) {
303 pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
304 /* the bug has been fixed in PIIX4M */
305 if (rev < 3) {
306 printk(KERN_WARNING "* Found PM-Timer Bug on this "
307 "chipset. Due to workarounds for a bug,\n"
308 "* this time source is slow. Consider trying "
309 "other time sources (clock=)\n");
310 pmtmr_has_bug = 1;
311 }
312 pci_dev_put(dev);
313 }
314
315 if (pci_dev_present(gray_list)) {
316 printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due"
317 " to workarounds for a bug,\n"
318 "* this time source is slow. If you are sure your timer"
319 " does not have\n"
320 "* this bug, please use \"pmtmr_good\" to disable the "
321 "workaround\n");
322 pmtmr_has_bug = 1;
323 }
324
325 if (!pmtmr_has_bug)
326 pmtmr_need_workaround = 0;
327
328 return 0;
329}
330device_initcall(pmtmr_bug_check);
331#endif
332
333static int __init pmtr_good_setup(char *__str)
334{
335 pmtmr_need_workaround = 0;
336 return 1;
337}
338__setup("pmtmr_good", pmtr_good_setup);
339
340MODULE_LICENSE("GPL");
341MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
342MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
deleted file mode 100644
index f1187ddb0d0f..000000000000
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,617 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 *
5 * 2004-06-25 Jesper Juhl
6 * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7 * failing to inline.
8 */
9
10#include <linux/spinlock.h>
11#include <linux/init.h>
12#include <linux/timex.h>
13#include <linux/errno.h>
14#include <linux/cpufreq.h>
15#include <linux/string.h>
16#include <linux/jiffies.h>
17
18#include <asm/timer.h>
19#include <asm/io.h>
20/* processor.h for distable_tsc flag */
21#include <asm/processor.h>
22
23#include "io_ports.h"
24#include "mach_timer.h"
25
26#include <asm/hpet.h>
27#include <asm/i8253.h>
28
29#ifdef CONFIG_HPET_TIMER
30static unsigned long hpet_usec_quotient;
31static unsigned long hpet_last;
32static struct timer_opts timer_tsc;
33#endif
34
35static inline void cpufreq_delayed_get(void);
36
37int tsc_disable __devinitdata = 0;
38
39static int use_tsc;
40/* Number of usecs that the last interrupt was delayed */
41static int delay_at_last_interrupt;
42
43static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45static unsigned long long monotonic_base;
46static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
47
48/* Avoid compensating for lost ticks before TSCs are synched */
49static int detect_lost_ticks;
50static int __init start_lost_tick_compensation(void)
51{
52 detect_lost_ticks = 1;
53 return 0;
54}
55late_initcall(start_lost_tick_compensation);
56
57/* convert from cycles(64bits) => nanoseconds (64bits)
58 * basic equation:
59 * ns = cycles / (freq / ns_per_sec)
60 * ns = cycles * (ns_per_sec / freq)
61 * ns = cycles * (10^9 / (cpu_khz * 10^3))
62 * ns = cycles * (10^6 / cpu_khz)
63 *
64 * Then we use scaling math (suggested by george@mvista.com) to get:
65 * ns = cycles * (10^6 * SC / cpu_khz) / SC
66 * ns = cycles * cyc2ns_scale / SC
67 *
68 * And since SC is a constant power of two, we can convert the div
69 * into a shift.
70 *
71 * We can use khz divisor instead of mhz to keep a better percision, since
72 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
73 * (mathieu.desnoyers@polymtl.ca)
74 *
75 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
76 */
77static unsigned long cyc2ns_scale __read_mostly;
78#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
79
80static inline void set_cyc2ns_scale(unsigned long cpu_khz)
81{
82 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
83}
84
85static inline unsigned long long cycles_2_ns(unsigned long long cyc)
86{
87 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
88}
89
90static int count2; /* counter for mark_offset_tsc() */
91
92/* Cached *multiplier* to convert TSC counts to microseconds.
93 * (see the equation below).
94 * Equal to 2^32 * (1 / (clocks per usec) ).
95 * Initialized in time_init.
96 */
97static unsigned long fast_gettimeoffset_quotient;
98
99static unsigned long get_offset_tsc(void)
100{
101 register unsigned long eax, edx;
102
103 /* Read the Time Stamp Counter */
104
105 rdtsc(eax,edx);
106
107 /* .. relative to previous jiffy (32 bits is enough) */
108 eax -= last_tsc_low; /* tsc_low delta */
109
110 /*
111 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
112 * = (tsc_low delta) * (usecs_per_clock)
113 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
114 *
115 * Using a mull instead of a divl saves up to 31 clock cycles
116 * in the critical path.
117 */
118
119 __asm__("mull %2"
120 :"=a" (eax), "=d" (edx)
121 :"rm" (fast_gettimeoffset_quotient),
122 "0" (eax));
123
124 /* our adjusted time offset in microseconds */
125 return delay_at_last_interrupt + edx;
126}
127
128static unsigned long long monotonic_clock_tsc(void)
129{
130 unsigned long long last_offset, this_offset, base;
131 unsigned seq;
132
133 /* atomically read monotonic base & last_offset */
134 do {
135 seq = read_seqbegin(&monotonic_lock);
136 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
137 base = monotonic_base;
138 } while (read_seqretry(&monotonic_lock, seq));
139
140 /* Read the Time Stamp Counter */
141 rdtscll(this_offset);
142
143 /* return the value in ns */
144 return base + cycles_2_ns(this_offset - last_offset);
145}
146
147/*
148 * Scheduler clock - returns current time in nanosec units.
149 */
150unsigned long long sched_clock(void)
151{
152 unsigned long long this_offset;
153
154 /*
155 * In the NUMA case we dont use the TSC as they are not
156 * synchronized across all CPUs.
157 */
158#ifndef CONFIG_NUMA
159 if (!use_tsc)
160#endif
161 /* no locking but a rare wrong value is not a big deal */
162 return jiffies_64 * (1000000000 / HZ);
163
164 /* Read the Time Stamp Counter */
165 rdtscll(this_offset);
166
167 /* return the value in ns */
168 return cycles_2_ns(this_offset);
169}
170
171static void delay_tsc(unsigned long loops)
172{
173 unsigned long bclock, now;
174
175 rdtscl(bclock);
176 do
177 {
178 rep_nop();
179 rdtscl(now);
180 } while ((now-bclock) < loops);
181}
182
183#ifdef CONFIG_HPET_TIMER
184static void mark_offset_tsc_hpet(void)
185{
186 unsigned long long this_offset, last_offset;
187 unsigned long offset, temp, hpet_current;
188
189 write_seqlock(&monotonic_lock);
190 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
191 /*
192 * It is important that these two operations happen almost at
193 * the same time. We do the RDTSC stuff first, since it's
194 * faster. To avoid any inconsistencies, we need interrupts
195 * disabled locally.
196 */
197 /*
198 * Interrupts are just disabled locally since the timer irq
199 * has the SA_INTERRUPT flag set. -arca
200 */
201 /* read Pentium cycle counter */
202
203 hpet_current = hpet_readl(HPET_COUNTER);
204 rdtsc(last_tsc_low, last_tsc_high);
205
206 /* lost tick compensation */
207 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
208 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
209 && detect_lost_ticks) {
210 int lost_ticks = (offset - hpet_last) / hpet_tick;
211 jiffies_64 += lost_ticks;
212 }
213 hpet_last = hpet_current;
214
215 /* update the monotonic base value */
216 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
217 monotonic_base += cycles_2_ns(this_offset - last_offset);
218 write_sequnlock(&monotonic_lock);
219
220 /* calculate delay_at_last_interrupt */
221 /*
222 * Time offset = (hpet delta) * ( usecs per HPET clock )
223 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
224 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
225 * Where,
226 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
227 */
228 delay_at_last_interrupt = hpet_current - offset;
229 ASM_MUL64_REG(temp, delay_at_last_interrupt,
230 hpet_usec_quotient, delay_at_last_interrupt);
231}
232#endif
233
234
235#ifdef CONFIG_CPU_FREQ
236#include <linux/workqueue.h>
237
238static unsigned int cpufreq_delayed_issched = 0;
239static unsigned int cpufreq_init = 0;
240static struct work_struct cpufreq_delayed_get_work;
241
242static void handle_cpufreq_delayed_get(void *v)
243{
244 unsigned int cpu;
245 for_each_online_cpu(cpu) {
246 cpufreq_get(cpu);
247 }
248 cpufreq_delayed_issched = 0;
249}
250
251/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
252 * to verify the CPU frequency the timing core thinks the CPU is running
253 * at is still correct.
254 */
255static inline void cpufreq_delayed_get(void)
256{
257 if (cpufreq_init && !cpufreq_delayed_issched) {
258 cpufreq_delayed_issched = 1;
259 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
260 schedule_work(&cpufreq_delayed_get_work);
261 }
262}
263
264/* If the CPU frequency is scaled, TSC-based delays will need a different
265 * loops_per_jiffy value to function properly.
266 */
267
268static unsigned int ref_freq = 0;
269static unsigned long loops_per_jiffy_ref = 0;
270
271#ifndef CONFIG_SMP
272static unsigned long fast_gettimeoffset_ref = 0;
273static unsigned int cpu_khz_ref = 0;
274#endif
275
276static int
277time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
278 void *data)
279{
280 struct cpufreq_freqs *freq = data;
281
282 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
283 write_seqlock_irq(&xtime_lock);
284 if (!ref_freq) {
285 if (!freq->old){
286 ref_freq = freq->new;
287 goto end;
288 }
289 ref_freq = freq->old;
290 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
291#ifndef CONFIG_SMP
292 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
293 cpu_khz_ref = cpu_khz;
294#endif
295 }
296
297 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
298 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
299 (val == CPUFREQ_RESUMECHANGE)) {
300 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
301 cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
302#ifndef CONFIG_SMP
303 if (cpu_khz)
304 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
305 if (use_tsc) {
306 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
307 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
308 set_cyc2ns_scale(cpu_khz);
309 }
310 }
311#endif
312 }
313
314end:
315 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
316 write_sequnlock_irq(&xtime_lock);
317
318 return 0;
319}
320
321static struct notifier_block time_cpufreq_notifier_block = {
322 .notifier_call = time_cpufreq_notifier
323};
324
325
326static int __init cpufreq_tsc(void)
327{
328 int ret;
329 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
330 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
331 CPUFREQ_TRANSITION_NOTIFIER);
332 if (!ret)
333 cpufreq_init = 1;
334 return ret;
335}
336core_initcall(cpufreq_tsc);
337
338#else /* CONFIG_CPU_FREQ */
339static inline void cpufreq_delayed_get(void) { return; }
340#endif
341
342int recalibrate_cpu_khz(void)
343{
344#ifndef CONFIG_SMP
345 unsigned int cpu_khz_old = cpu_khz;
346
347 if (cpu_has_tsc) {
348 local_irq_disable();
349 init_cpu_khz();
350 local_irq_enable();
351 cpu_data[0].loops_per_jiffy =
352 cpufreq_scale(cpu_data[0].loops_per_jiffy,
353 cpu_khz_old,
354 cpu_khz);
355 return 0;
356 } else
357 return -ENODEV;
358#else
359 return -ENODEV;
360#endif
361}
362EXPORT_SYMBOL(recalibrate_cpu_khz);
363
364static void mark_offset_tsc(void)
365{
366 unsigned long lost,delay;
367 unsigned long delta = last_tsc_low;
368 int count;
369 int countmp;
370 static int count1 = 0;
371 unsigned long long this_offset, last_offset;
372 static int lost_count = 0;
373
374 write_seqlock(&monotonic_lock);
375 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
376 /*
377 * It is important that these two operations happen almost at
378 * the same time. We do the RDTSC stuff first, since it's
379 * faster. To avoid any inconsistencies, we need interrupts
380 * disabled locally.
381 */
382
383 /*
384 * Interrupts are just disabled locally since the timer irq
385 * has the SA_INTERRUPT flag set. -arca
386 */
387
388 /* read Pentium cycle counter */
389
390 rdtsc(last_tsc_low, last_tsc_high);
391
392 spin_lock(&i8253_lock);
393 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
394
395 count = inb_p(PIT_CH0); /* read the latched count */
396 count |= inb(PIT_CH0) << 8;
397
398 /*
399 * VIA686a test code... reset the latch if count > max + 1
400 * from timer_pit.c - cjb
401 */
402 if (count > LATCH) {
403 outb_p(0x34, PIT_MODE);
404 outb_p(LATCH & 0xff, PIT_CH0);
405 outb(LATCH >> 8, PIT_CH0);
406 count = LATCH - 1;
407 }
408
409 spin_unlock(&i8253_lock);
410
411 if (pit_latch_buggy) {
412 /* get center value of last 3 time lutch */
413 if ((count2 >= count && count >= count1)
414 || (count1 >= count && count >= count2)) {
415 count2 = count1; count1 = count;
416 } else if ((count1 >= count2 && count2 >= count)
417 || (count >= count2 && count2 >= count1)) {
418 countmp = count;count = count2;
419 count2 = count1;count1 = countmp;
420 } else {
421 count2 = count1; count1 = count; count = count1;
422 }
423 }
424
425 /* lost tick compensation */
426 delta = last_tsc_low - delta;
427 {
428 register unsigned long eax, edx;
429 eax = delta;
430 __asm__("mull %2"
431 :"=a" (eax), "=d" (edx)
432 :"rm" (fast_gettimeoffset_quotient),
433 "0" (eax));
434 delta = edx;
435 }
436 delta += delay_at_last_interrupt;
437 lost = delta/(1000000/HZ);
438 delay = delta%(1000000/HZ);
439 if (lost >= 2 && detect_lost_ticks) {
440 jiffies_64 += lost-1;
441
442 /* sanity check to ensure we're not always losing ticks */
443 if (lost_count++ > 100) {
444 printk(KERN_WARNING "Losing too many ticks!\n");
445 printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
446 printk(KERN_WARNING "Possible reasons for this are:\n");
447 printk(KERN_WARNING " You're running with Speedstep,\n");
448 printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
449 printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
450 printk(KERN_WARNING "Falling back to a sane timesource now.\n");
451
452 clock_fallback();
453 }
454 /* ... but give the TSC a fair chance */
455 if (lost_count > 25)
456 cpufreq_delayed_get();
457 } else
458 lost_count = 0;
459 /* update the monotonic base value */
460 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
461 monotonic_base += cycles_2_ns(this_offset - last_offset);
462 write_sequnlock(&monotonic_lock);
463
464 /* calculate delay_at_last_interrupt */
465 count = ((LATCH-1) - count) * TICK_SIZE;
466 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
467
468 /* catch corner case where tick rollover occured
469 * between tsc and pit reads (as noted when
470 * usec delta is > 90% # of usecs/tick)
471 */
472 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
473 jiffies_64++;
474}
475
476static int __init init_tsc(char* override)
477{
478
479 /* check clock override */
480 if (override[0] && strncmp(override,"tsc",3)) {
481#ifdef CONFIG_HPET_TIMER
482 if (is_hpet_enabled()) {
483 printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
484 } else
485#endif
486 {
487 return -ENODEV;
488 }
489 }
490
491 /*
492 * If we have APM enabled or the CPU clock speed is variable
493 * (CPU stops clock on HLT or slows clock to save power)
494 * then the TSC timestamps may diverge by up to 1 jiffy from
495 * 'real time' but nothing will break.
496 * The most frequent case is that the CPU is "woken" from a halt
497 * state by the timer interrupt itself, so we get 0 error. In the
498 * rare cases where a driver would "wake" the CPU and request a
499 * timestamp, the maximum error is < 1 jiffy. But timestamps are
500 * still perfectly ordered.
501 * Note that the TSC counter will be reset if APM suspends
502 * to disk; this won't break the kernel, though, 'cuz we're
503 * smart. See arch/i386/kernel/apm.c.
504 */
505 /*
506 * Firstly we have to do a CPU check for chips with
507 * a potentially buggy TSC. At this point we haven't run
508 * the ident/bugs checks so we must run this hook as it
509 * may turn off the TSC flag.
510 *
511 * NOTE: this doesn't yet handle SMP 486 machines where only
512 * some CPU's have a TSC. Thats never worked and nobody has
513 * moaned if you have the only one in the world - you fix it!
514 */
515
516 count2 = LATCH; /* initialize counter for mark_offset_tsc() */
517
518 if (cpu_has_tsc) {
519 unsigned long tsc_quotient;
520#ifdef CONFIG_HPET_TIMER
521 if (is_hpet_enabled() && hpet_use_timer) {
522 unsigned long result, remain;
523 printk("Using TSC for gettimeofday\n");
524 tsc_quotient = calibrate_tsc_hpet(NULL);
525 timer_tsc.mark_offset = &mark_offset_tsc_hpet;
526 /*
527 * Math to calculate hpet to usec multiplier
528 * Look for the comments at get_offset_tsc_hpet()
529 */
530 ASM_DIV64_REG(result, remain, hpet_tick,
531 0, KERNEL_TICK_USEC);
532 if (remain > (hpet_tick >> 1))
533 result++; /* rounding the result */
534
535 hpet_usec_quotient = result;
536 } else
537#endif
538 {
539 tsc_quotient = calibrate_tsc();
540 }
541
542 if (tsc_quotient) {
543 fast_gettimeoffset_quotient = tsc_quotient;
544 use_tsc = 1;
545 /*
546 * We could be more selective here I suspect
547 * and just enable this for the next intel chips ?
548 */
549 /* report CPU clock rate in Hz.
550 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
551 * clock/second. Our precision is about 100 ppm.
552 */
553 { unsigned long eax=0, edx=1000;
554 __asm__("divl %2"
555 :"=a" (cpu_khz), "=d" (edx)
556 :"r" (tsc_quotient),
557 "0" (eax), "1" (edx));
558 printk("Detected %u.%03u MHz processor.\n",
559 cpu_khz / 1000, cpu_khz % 1000);
560 }
561 set_cyc2ns_scale(cpu_khz);
562 return 0;
563 }
564 }
565 return -ENODEV;
566}
567
568static int tsc_resume(void)
569{
570 write_seqlock(&monotonic_lock);
571 /* Assume this is the last mark offset time */
572 rdtsc(last_tsc_low, last_tsc_high);
573#ifdef CONFIG_HPET_TIMER
574 if (is_hpet_enabled() && hpet_use_timer)
575 hpet_last = hpet_readl(HPET_COUNTER);
576#endif
577 write_sequnlock(&monotonic_lock);
578 return 0;
579}
580
581#ifndef CONFIG_X86_TSC
582/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
583 * in cpu/common.c */
584static int __init tsc_setup(char *str)
585{
586 tsc_disable = 1;
587 return 1;
588}
589#else
590static int __init tsc_setup(char *str)
591{
592 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
593 "cannot disable TSC.\n");
594 return 1;
595}
596#endif
597__setup("notsc", tsc_setup);
598
599
600
601/************************************************************/
602
603/* tsc timer_opts struct */
604static struct timer_opts timer_tsc = {
605 .name = "tsc",
606 .mark_offset = mark_offset_tsc,
607 .get_offset = get_offset_tsc,
608 .monotonic_clock = monotonic_clock_tsc,
609 .delay = delay_tsc,
610 .read_timer = read_timer_tsc,
611 .resume = tsc_resume,
612};
613
614struct init_timer_opts __initdata timer_tsc_init = {
615 .init = init_tsc,
616 .opts = &timer_tsc,
617};
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
new file mode 100644
index 000000000000..7e0d8dab2075
--- /dev/null
+++ b/arch/i386/kernel/tsc.c
@@ -0,0 +1,478 @@
1/*
2 * This code largely moved from arch/i386/kernel/timer/timer_tsc.c
3 * which was originally moved from arch/i386/kernel/time.c.
4 * See comments there for proper credits.
5 */
6
7#include <linux/clocksource.h>
8#include <linux/workqueue.h>
9#include <linux/cpufreq.h>
10#include <linux/jiffies.h>
11#include <linux/init.h>
12#include <linux/dmi.h>
13
14#include <asm/delay.h>
15#include <asm/tsc.h>
16#include <asm/delay.h>
17#include <asm/io.h>
18
19#include "mach_timer.h"
20
21/*
22 * On some systems the TSC frequency does not
23 * change with the cpu frequency. So we need
24 * an extra value to store the TSC freq
25 */
26unsigned int tsc_khz;
27
28int tsc_disable __cpuinitdata = 0;
29
30#ifdef CONFIG_X86_TSC
31static int __init tsc_setup(char *str)
32{
33 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
34 "cannot disable TSC.\n");
35 return 1;
36}
37#else
38/*
39 * disable flag for tsc. Takes effect by clearing the TSC cpu flag
40 * in cpu/common.c
41 */
42static int __init tsc_setup(char *str)
43{
44 tsc_disable = 1;
45
46 return 1;
47}
48#endif
49
50__setup("notsc", tsc_setup);
51
52/*
53 * code to mark and check if the TSC is unstable
54 * due to cpufreq or due to unsynced TSCs
55 */
56static int tsc_unstable;
57
58static inline int check_tsc_unstable(void)
59{
60 return tsc_unstable;
61}
62
63void mark_tsc_unstable(void)
64{
65 tsc_unstable = 1;
66}
67EXPORT_SYMBOL_GPL(mark_tsc_unstable);
68
69/* Accellerators for sched_clock()
70 * convert from cycles(64bits) => nanoseconds (64bits)
71 * basic equation:
72 * ns = cycles / (freq / ns_per_sec)
73 * ns = cycles * (ns_per_sec / freq)
74 * ns = cycles * (10^9 / (cpu_khz * 10^3))
75 * ns = cycles * (10^6 / cpu_khz)
76 *
77 * Then we use scaling math (suggested by george@mvista.com) to get:
78 * ns = cycles * (10^6 * SC / cpu_khz) / SC
79 * ns = cycles * cyc2ns_scale / SC
80 *
81 * And since SC is a constant power of two, we can convert the div
82 * into a shift.
83 *
84 * We can use khz divisor instead of mhz to keep a better percision, since
85 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
86 * (mathieu.desnoyers@polymtl.ca)
87 *
88 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
89 */
90static unsigned long cyc2ns_scale __read_mostly;
91
92#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
93
94static inline void set_cyc2ns_scale(unsigned long cpu_khz)
95{
96 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
97}
98
99static inline unsigned long long cycles_2_ns(unsigned long long cyc)
100{
101 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
102}
103
104/*
105 * Scheduler clock - returns current time in nanosec units.
106 */
107unsigned long long sched_clock(void)
108{
109 unsigned long long this_offset;
110
111 /*
112 * in the NUMA case we dont use the TSC as they are not
113 * synchronized across all CPUs.
114 */
115#ifndef CONFIG_NUMA
116 if (!cpu_khz || check_tsc_unstable())
117#endif
118 /* no locking but a rare wrong value is not a big deal */
119 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
120
121 /* read the Time Stamp Counter: */
122 rdtscll(this_offset);
123
124 /* return the value in ns */
125 return cycles_2_ns(this_offset);
126}
127
128static unsigned long calculate_cpu_khz(void)
129{
130 unsigned long long start, end;
131 unsigned long count;
132 u64 delta64;
133 int i;
134 unsigned long flags;
135
136 local_irq_save(flags);
137
138 /* run 3 times to ensure the cache is warm */
139 for (i = 0; i < 3; i++) {
140 mach_prepare_counter();
141 rdtscll(start);
142 mach_countup(&count);
143 rdtscll(end);
144 }
145 /*
146 * Error: ECTCNEVERSET
147 * The CTC wasn't reliable: we got a hit on the very first read,
148 * or the CPU was so fast/slow that the quotient wouldn't fit in
149 * 32 bits..
150 */
151 if (count <= 1)
152 goto err;
153
154 delta64 = end - start;
155
156 /* cpu freq too fast: */
157 if (delta64 > (1ULL<<32))
158 goto err;
159
160 /* cpu freq too slow: */
161 if (delta64 <= CALIBRATE_TIME_MSEC)
162 goto err;
163
164 delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
165 do_div(delta64,CALIBRATE_TIME_MSEC);
166
167 local_irq_restore(flags);
168 return (unsigned long)delta64;
169err:
170 local_irq_restore(flags);
171 return 0;
172}
173
174int recalibrate_cpu_khz(void)
175{
176#ifndef CONFIG_SMP
177 unsigned long cpu_khz_old = cpu_khz;
178
179 if (cpu_has_tsc) {
180 cpu_khz = calculate_cpu_khz();
181 tsc_khz = cpu_khz;
182 cpu_data[0].loops_per_jiffy =
183 cpufreq_scale(cpu_data[0].loops_per_jiffy,
184 cpu_khz_old, cpu_khz);
185 return 0;
186 } else
187 return -ENODEV;
188#else
189 return -ENODEV;
190#endif
191}
192
193EXPORT_SYMBOL(recalibrate_cpu_khz);
194
195void tsc_init(void)
196{
197 if (!cpu_has_tsc || tsc_disable)
198 return;
199
200 cpu_khz = calculate_cpu_khz();
201 tsc_khz = cpu_khz;
202
203 if (!cpu_khz)
204 return;
205
206 printk("Detected %lu.%03lu MHz processor.\n",
207 (unsigned long)cpu_khz / 1000,
208 (unsigned long)cpu_khz % 1000);
209
210 set_cyc2ns_scale(cpu_khz);
211 use_tsc_delay();
212}
213
214#ifdef CONFIG_CPU_FREQ
215
216static unsigned int cpufreq_delayed_issched = 0;
217static unsigned int cpufreq_init = 0;
218static struct work_struct cpufreq_delayed_get_work;
219
220static void handle_cpufreq_delayed_get(void *v)
221{
222 unsigned int cpu;
223
224 for_each_online_cpu(cpu)
225 cpufreq_get(cpu);
226
227 cpufreq_delayed_issched = 0;
228}
229
230/*
231 * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
232 * to verify the CPU frequency the timing core thinks the CPU is running
233 * at is still correct.
234 */
235static inline void cpufreq_delayed_get(void)
236{
237 if (cpufreq_init && !cpufreq_delayed_issched) {
238 cpufreq_delayed_issched = 1;
239 printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
240 schedule_work(&cpufreq_delayed_get_work);
241 }
242}
243
244/*
245 * if the CPU frequency is scaled, TSC-based delays will need a different
246 * loops_per_jiffy value to function properly.
247 */
248static unsigned int ref_freq = 0;
249static unsigned long loops_per_jiffy_ref = 0;
250static unsigned long cpu_khz_ref = 0;
251
252static int
253time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
254{
255 struct cpufreq_freqs *freq = data;
256
257 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
258 write_seqlock_irq(&xtime_lock);
259
260 if (!ref_freq) {
261 if (!freq->old){
262 ref_freq = freq->new;
263 goto end;
264 }
265 ref_freq = freq->old;
266 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
267 cpu_khz_ref = cpu_khz;
268 }
269
270 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
271 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
272 (val == CPUFREQ_RESUMECHANGE)) {
273 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
274 cpu_data[freq->cpu].loops_per_jiffy =
275 cpufreq_scale(loops_per_jiffy_ref,
276 ref_freq, freq->new);
277
278 if (cpu_khz) {
279
280 if (num_online_cpus() == 1)
281 cpu_khz = cpufreq_scale(cpu_khz_ref,
282 ref_freq, freq->new);
283 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
284 tsc_khz = cpu_khz;
285 set_cyc2ns_scale(cpu_khz);
286 /*
287 * TSC based sched_clock turns
288 * to junk w/ cpufreq
289 */
290 mark_tsc_unstable();
291 }
292 }
293 }
294end:
295 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
296 write_sequnlock_irq(&xtime_lock);
297
298 return 0;
299}
300
301static struct notifier_block time_cpufreq_notifier_block = {
302 .notifier_call = time_cpufreq_notifier
303};
304
305static int __init cpufreq_tsc(void)
306{
307 int ret;
308
309 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
310 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
311 CPUFREQ_TRANSITION_NOTIFIER);
312 if (!ret)
313 cpufreq_init = 1;
314
315 return ret;
316}
317
318core_initcall(cpufreq_tsc);
319
320#endif
321
322/* clock source code */
323
324static unsigned long current_tsc_khz = 0;
325static int tsc_update_callback(void);
326
327static cycle_t read_tsc(void)
328{
329 cycle_t ret;
330
331 rdtscll(ret);
332
333 return ret;
334}
335
336static struct clocksource clocksource_tsc = {
337 .name = "tsc",
338 .rating = 300,
339 .read = read_tsc,
340 .mask = CLOCKSOURCE_MASK(64),
341 .mult = 0, /* to be set */
342 .shift = 22,
343 .update_callback = tsc_update_callback,
344 .is_continuous = 1,
345};
346
347static int tsc_update_callback(void)
348{
349 int change = 0;
350
351 /* check to see if we should switch to the safe clocksource: */
352 if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
353 clocksource_tsc.rating = 50;
354 clocksource_reselect();
355 change = 1;
356 }
357
358 /* only update if tsc_khz has changed: */
359 if (current_tsc_khz != tsc_khz) {
360 current_tsc_khz = tsc_khz;
361 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
362 clocksource_tsc.shift);
363 change = 1;
364 }
365
366 return change;
367}
368
369static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
370{
371 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
372 d->ident);
373 mark_tsc_unstable();
374 return 0;
375}
376
377/* List of systems that have known TSC problems */
378static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
379 {
380 .callback = dmi_mark_tsc_unstable,
381 .ident = "IBM Thinkpad 380XD",
382 .matches = {
383 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
384 DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
385 },
386 },
387 {}
388};
389
390#define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */
391static struct timer_list verify_tsc_freq_timer;
392
393/* XXX - Probably should add locking */
394static void verify_tsc_freq(unsigned long unused)
395{
396 static u64 last_tsc;
397 static unsigned long last_jiffies;
398
399 u64 now_tsc, interval_tsc;
400 unsigned long now_jiffies, interval_jiffies;
401
402
403 if (check_tsc_unstable())
404 return;
405
406 rdtscll(now_tsc);
407 now_jiffies = jiffies;
408
409 if (!last_jiffies) {
410 goto out;
411 }
412
413 interval_jiffies = now_jiffies - last_jiffies;
414 interval_tsc = now_tsc - last_tsc;
415 interval_tsc *= HZ;
416 do_div(interval_tsc, cpu_khz*1000);
417
418 if (interval_tsc < (interval_jiffies * 3 / 4)) {
419 printk("TSC appears to be running slowly. "
420 "Marking it as unstable\n");
421 mark_tsc_unstable();
422 return;
423 }
424
425out:
426 last_tsc = now_tsc;
427 last_jiffies = now_jiffies;
428 /* set us up to go off on the next interval: */
429 mod_timer(&verify_tsc_freq_timer,
430 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL));
431}
432
433/*
434 * Make an educated guess if the TSC is trustworthy and synchronized
435 * over all CPUs.
436 */
437static __init int unsynchronized_tsc(void)
438{
439 /*
440 * Intel systems are normally all synchronized.
441 * Exceptions must mark TSC as unstable:
442 */
443 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
444 return 0;
445
446 /* assume multi socket systems are not synchronized: */
447 return num_possible_cpus() > 1;
448}
449
450static int __init init_tsc_clocksource(void)
451{
452
453 if (cpu_has_tsc && tsc_khz && !tsc_disable) {
454 /* check blacklist */
455 dmi_check_system(bad_tsc_dmi_table);
456
457 if (unsynchronized_tsc()) /* mark unstable if unsynced */
458 mark_tsc_unstable();
459 current_tsc_khz = tsc_khz;
460 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
461 clocksource_tsc.shift);
462 /* lower the rating if we already know its unstable: */
463 if (check_tsc_unstable())
464 clocksource_tsc.rating = 50;
465
466 init_timer(&verify_tsc_freq_timer);
467 verify_tsc_freq_timer.function = verify_tsc_freq;
468 verify_tsc_freq_timer.expires =
469 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL);
470 add_timer(&verify_tsc_freq_timer);
471
472 return clocksource_register(&clocksource_tsc);
473 }
474
475 return 0;
476}
477
478module_init(init_tsc_clocksource);
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index c49a6acbee56..3c0714c4b669 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -10,43 +10,92 @@
10 * we have to worry about. 10 * we have to worry about.
11 */ 11 */
12 12
13#include <linux/module.h>
13#include <linux/config.h> 14#include <linux/config.h>
14#include <linux/sched.h> 15#include <linux/sched.h>
15#include <linux/delay.h> 16#include <linux/delay.h>
16#include <linux/module.h> 17
17#include <asm/processor.h> 18#include <asm/processor.h>
18#include <asm/delay.h> 19#include <asm/delay.h>
19#include <asm/timer.h> 20#include <asm/timer.h>
20 21
21#ifdef CONFIG_SMP 22#ifdef CONFIG_SMP
22#include <asm/smp.h> 23# include <asm/smp.h>
23#endif 24#endif
24 25
25extern struct timer_opts* timer; 26/* simple loop based delay: */
27static void delay_loop(unsigned long loops)
28{
29 int d0;
30
31 __asm__ __volatile__(
32 "\tjmp 1f\n"
33 ".align 16\n"
34 "1:\tjmp 2f\n"
35 ".align 16\n"
36 "2:\tdecl %0\n\tjns 2b"
37 :"=&a" (d0)
38 :"0" (loops));
39}
40
41/* TSC based delay: */
42static void delay_tsc(unsigned long loops)
43{
44 unsigned long bclock, now;
45
46 rdtscl(bclock);
47 do {
48 rep_nop();
49 rdtscl(now);
50 } while ((now-bclock) < loops);
51}
52
53/*
54 * Since we calibrate only once at boot, this
55 * function should be set once at boot and not changed
56 */
57static void (*delay_fn)(unsigned long) = delay_loop;
58
59void use_tsc_delay(void)
60{
61 delay_fn = delay_tsc;
62}
63
64int read_current_timer(unsigned long *timer_val)
65{
66 if (delay_fn == delay_tsc) {
67 rdtscl(*timer_val);
68 return 0;
69 }
70 return -1;
71}
26 72
27void __delay(unsigned long loops) 73void __delay(unsigned long loops)
28{ 74{
29 cur_timer->delay(loops); 75 delay_fn(loops);
30} 76}
31 77
32inline void __const_udelay(unsigned long xloops) 78inline void __const_udelay(unsigned long xloops)
33{ 79{
34 int d0; 80 int d0;
81
35 xloops *= 4; 82 xloops *= 4;
36 __asm__("mull %0" 83 __asm__("mull %0"
37 :"=d" (xloops), "=&a" (d0) 84 :"=d" (xloops), "=&a" (d0)
38 :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); 85 :"1" (xloops), "0"
39 __delay(++xloops); 86 (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
87
88 __delay(++xloops);
40} 89}
41 90
42void __udelay(unsigned long usecs) 91void __udelay(unsigned long usecs)
43{ 92{
44 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ 93 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
45} 94}
46 95
47void __ndelay(unsigned long nsecs) 96void __ndelay(unsigned long nsecs)
48{ 97{
49 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 98 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
50} 99}
51 100
52EXPORT_SYMBOL(__delay); 101EXPORT_SYMBOL(__delay);
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index bd6fe96cc16d..6ee7faaf2c1b 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -30,6 +30,40 @@
30 30
31extern void die(const char *,struct pt_regs *,long); 31extern void die(const char *,struct pt_regs *,long);
32 32
33#ifdef CONFIG_KPROBES
34ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
35int register_page_fault_notifier(struct notifier_block *nb)
36{
37 vmalloc_sync_all();
38 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
39}
40
41int unregister_page_fault_notifier(struct notifier_block *nb)
42{
43 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
44}
45
46static inline int notify_page_fault(enum die_val val, const char *str,
47 struct pt_regs *regs, long err, int trap, int sig)
48{
49 struct die_args args = {
50 .regs = regs,
51 .str = str,
52 .err = err,
53 .trapnr = trap,
54 .signr = sig
55 };
56 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
57}
58#else
59static inline int notify_page_fault(enum die_val val, const char *str,
60 struct pt_regs *regs, long err, int trap, int sig)
61{
62 return NOTIFY_DONE;
63}
64#endif
65
66
33/* 67/*
34 * Unlock any spinlocks which will prevent us from getting the 68 * Unlock any spinlocks which will prevent us from getting the
35 * message out 69 * message out
@@ -324,7 +358,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
324 if (unlikely(address >= TASK_SIZE)) { 358 if (unlikely(address >= TASK_SIZE)) {
325 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) 359 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
326 return; 360 return;
327 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 361 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
328 SIGSEGV) == NOTIFY_STOP) 362 SIGSEGV) == NOTIFY_STOP)
329 return; 363 return;
330 /* 364 /*
@@ -334,7 +368,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
334 goto bad_area_nosemaphore; 368 goto bad_area_nosemaphore;
335 } 369 }
336 370
337 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 371 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
338 SIGSEGV) == NOTIFY_STOP) 372 SIGSEGV) == NOTIFY_STOP)
339 return; 373 return;
340 374
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index ec0fd3cfa774..fa8a37bcb391 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -281,9 +281,9 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
281 281
282 for (i = 0; i < model->num_counters; ++i) { 282 for (i = 0; i < model->num_counters; ++i) {
283 struct dentry * dir; 283 struct dentry * dir;
284 char buf[2]; 284 char buf[4];
285 285
286 snprintf(buf, 2, "%d", i); 286 snprintf(buf, sizeof(buf), "%d", i);
287 dir = oprofilefs_mkdir(sb, root, buf); 287 dir = oprofilefs_mkdir(sb, root, buf);
288 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 288 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
289 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 289 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
index 1eec0868f4b3..ed1512a175ab 100644
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -371,8 +371,7 @@ void __devinit pcibios_sort(void)
371 list_for_each(ln, &pci_devices) { 371 list_for_each(ln, &pci_devices) {
372 d = pci_dev_g(ln); 372 d = pci_dev_g(ln);
373 if (d->bus->number == bus && d->devfn == devfn) { 373 if (d->bus->number == bus && d->devfn == devfn) {
374 list_del(&d->global_list); 374 list_move_tail(&d->global_list, &sorted_devices);
375 list_add_tail(&d->global_list, &sorted_devices);
376 if (d == dev) 375 if (d == dev)
377 found = 1; 376 found = 1;
378 break; 377 break;
@@ -390,8 +389,7 @@ void __devinit pcibios_sort(void)
390 if (!found) { 389 if (!found) {
391 printk(KERN_WARNING "PCI: Device %s not found by BIOS\n", 390 printk(KERN_WARNING "PCI: Device %s not found by BIOS\n",
392 pci_name(dev)); 391 pci_name(dev));
393 list_del(&dev->global_list); 392 list_move_tail(&dev->global_list, &sorted_devices);
394 list_add_tail(&dev->global_list, &sorted_devices);
395 } 393 }
396 } 394 }
397 list_splice(&sorted_devices, &pci_devices); 395 list_splice(&sorted_devices, &pci_devices);
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index d98ec49570b8..14ef7cceb208 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -19,6 +19,40 @@
19 19
20extern void die (char *, struct pt_regs *, long); 20extern void die (char *, struct pt_regs *, long);
21 21
22#ifdef CONFIG_KPROBES
23ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
24
25/* Hook to register for page fault notifications */
26int register_page_fault_notifier(struct notifier_block *nb)
27{
28 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
29}
30
31int unregister_page_fault_notifier(struct notifier_block *nb)
32{
33 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
34}
35
36static inline int notify_page_fault(enum die_val val, const char *str,
37 struct pt_regs *regs, long err, int trap, int sig)
38{
39 struct die_args args = {
40 .regs = regs,
41 .str = str,
42 .err = err,
43 .trapnr = trap,
44 .signr = sig
45 };
46 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
47}
48#else
49static inline int notify_page_fault(enum die_val val, const char *str,
50 struct pt_regs *regs, long err, int trap, int sig)
51{
52 return NOTIFY_DONE;
53}
54#endif
55
22/* 56/*
23 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment 57 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
24 * (inside region 5, on ia64) and that page is present. 58 * (inside region 5, on ia64) and that page is present.
@@ -84,7 +118,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
84 /* 118 /*
85 * This is to handle the kprobes on user space access instructions 119 * This is to handle the kprobes on user space access instructions
86 */ 120 */
87 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT, 121 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT,
88 SIGSEGV) == NOTIFY_STOP) 122 SIGSEGV) == NOTIFY_STOP)
89 return; 123 return;
90 124
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c
index d6d582a5abb0..a226668f20c3 100644
--- a/arch/m68k/mm/memory.c
+++ b/arch/m68k/mm/memory.c
@@ -94,8 +94,7 @@ pmd_t *get_pointer_table (void)
94 PD_MARKBITS(dp) = mask & ~tmp; 94 PD_MARKBITS(dp) = mask & ~tmp;
95 if (!PD_MARKBITS(dp)) { 95 if (!PD_MARKBITS(dp)) {
96 /* move to end of list */ 96 /* move to end of list */
97 list_del(dp); 97 list_move_tail(dp, &ptable_list);
98 list_add_tail(dp, &ptable_list);
99 } 98 }
100 return (pmd_t *) (page_address(PD_PAGE(dp)) + off); 99 return (pmd_t *) (page_address(PD_PAGE(dp)) + off);
101} 100}
@@ -123,8 +122,7 @@ int free_pointer_table (pmd_t *ptable)
123 * move this descriptor to the front of the list, since 122 * move this descriptor to the front of the list, since
124 * it has one or more free tables. 123 * it has one or more free tables.
125 */ 124 */
126 list_del(dp); 125 list_move(dp, &ptable_list);
127 list_add(dp, &ptable_list);
128 } 126 }
129 return 0; 127 return 0;
130} 128}
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index f04a1d25f1a2..97c7bfde8ae8 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -119,8 +119,7 @@ static inline int refill(void)
119 if(hole->end == prev->start) { 119 if(hole->end == prev->start) {
120 hole->size += prev->size; 120 hole->size += prev->size;
121 hole->end = prev->end; 121 hole->end = prev->end;
122 list_del(&(prev->list)); 122 list_move(&(prev->list), &hole_cache);
123 list_add(&(prev->list), &hole_cache);
124 ret++; 123 ret++;
125 } 124 }
126 125
@@ -182,8 +181,7 @@ static inline unsigned long get_baddr(int len, unsigned long align)
182#endif 181#endif
183 return hole->end; 182 return hole->end;
184 } else if(hole->size == newlen) { 183 } else if(hole->size == newlen) {
185 list_del(&(hole->list)); 184 list_move(&(hole->list), &hole_cache);
186 list_add(&(hole->list), &hole_cache);
187 dvma_entry_use(hole->start) = newlen; 185 dvma_entry_use(hole->start) = newlen;
188#ifdef DVMA_DEBUG 186#ifdef DVMA_DEBUG
189 dvma_allocs++; 187 dvma_allocs++;
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index c31e4cff64e0..65eb55400d77 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -38,7 +38,7 @@ static int op_mips_create_files(struct super_block * sb, struct dentry * root)
38 38
39 for (i = 0; i < model->num_counters; ++i) { 39 for (i = 0; i < model->num_counters; ++i) {
40 struct dentry *dir; 40 struct dentry *dir;
41 char buf[3]; 41 char buf[4];
42 42
43 snprintf(buf, sizeof buf, "%d", i); 43 snprintf(buf, sizeof buf, "%d", i);
44 dir = oprofilefs_mkdir(sb, root, buf); 44 dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d20907561f46..7dd5dab789a1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */
102u64 tb_to_xs; 102u64 tb_to_xs;
103unsigned tb_to_us; 103unsigned tb_to_us;
104 104
105#define TICKLEN_SCALE (SHIFT_SCALE - 10) 105#define TICKLEN_SCALE TICK_LENGTH_SHIFT
106u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ 106u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */
107u64 ticklen_to_xs; /* 0.64 fraction */ 107u64 ticklen_to_xs; /* 0.64 fraction */
108 108
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index fdbba4206d59..a0a9e1e0061e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -40,6 +40,40 @@
40#include <asm/kdebug.h> 40#include <asm/kdebug.h>
41#include <asm/siginfo.h> 41#include <asm/siginfo.h>
42 42
43#ifdef CONFIG_KPROBES
44ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
45
46/* Hook to register for page fault notifications */
47int register_page_fault_notifier(struct notifier_block *nb)
48{
49 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
50}
51
52int unregister_page_fault_notifier(struct notifier_block *nb)
53{
54 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
55}
56
57static inline int notify_page_fault(enum die_val val, const char *str,
58 struct pt_regs *regs, long err, int trap, int sig)
59{
60 struct die_args args = {
61 .regs = regs,
62 .str = str,
63 .err = err,
64 .trapnr = trap,
65 .signr = sig
66 };
67 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
68}
69#else
70static inline int notify_page_fault(enum die_val val, const char *str,
71 struct pt_regs *regs, long err, int trap, int sig)
72{
73 return NOTIFY_DONE;
74}
75#endif
76
43/* 77/*
44 * Check whether the instruction at regs->nip is a store using 78 * Check whether the instruction at regs->nip is a store using
45 * an update addressing form which will update r1. 79 * an update addressing form which will update r1.
@@ -142,7 +176,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
142 is_write = error_code & ESR_DST; 176 is_write = error_code & ESR_DST;
143#endif /* CONFIG_4xx || CONFIG_BOOKE */ 177#endif /* CONFIG_4xx || CONFIG_BOOKE */
144 178
145 if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, 179 if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs, error_code,
146 11, SIGSEGV) == NOTIFY_STOP) 180 11, SIGSEGV) == NOTIFY_STOP)
147 return 0; 181 return 0;
148 182
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 27ad56bd227e..fd0bbbe7a4de 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -94,7 +94,7 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
94 94
95 for (i = 0; i < model->num_counters; ++i) { 95 for (i = 0; i < model->num_counters; ++i) {
96 struct dentry *dir; 96 struct dentry *dir;
97 char buf[3]; 97 char buf[4];
98 98
99 snprintf(buf, sizeof buf, "%d", i); 99 snprintf(buf, sizeof buf, "%d", i);
100 dir = oprofilefs_mkdir(sb, root, buf); 100 dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/sh/oprofile/op_model_sh7750.c b/arch/sh/oprofile/op_model_sh7750.c
index 5ec9ddcc4b0b..c265185b22a7 100644
--- a/arch/sh/oprofile/op_model_sh7750.c
+++ b/arch/sh/oprofile/op_model_sh7750.c
@@ -198,7 +198,7 @@ static int sh7750_perf_counter_create_files(struct super_block *sb, struct dentr
198 198
199 for (i = 0; i < NR_CNTRS; i++) { 199 for (i = 0; i < NR_CNTRS; i++) {
200 struct dentry *dir; 200 struct dentry *dir;
201 char buf[3]; 201 char buf[4];
202 202
203 snprintf(buf, sizeof(buf), "%d", i); 203 snprintf(buf, sizeof(buf), "%d", i);
204 dir = oprofilefs_mkdir(sb, root, buf); 204 dir = oprofilefs_mkdir(sb, root, buf);
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 6e002aacb961..1605967cce91 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -31,6 +31,40 @@
31#include <asm/kdebug.h> 31#include <asm/kdebug.h>
32#include <asm/mmu_context.h> 32#include <asm/mmu_context.h>
33 33
34#ifdef CONFIG_KPROBES
35ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
36
37/* Hook to register for page fault notifications */
38int register_page_fault_notifier(struct notifier_block *nb)
39{
40 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
41}
42
43int unregister_page_fault_notifier(struct notifier_block *nb)
44{
45 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
46}
47
48static inline int notify_page_fault(enum die_val val, const char *str,
49 struct pt_regs *regs, long err, int trap, int sig)
50{
51 struct die_args args = {
52 .regs = regs,
53 .str = str,
54 .err = err,
55 .trapnr = trap,
56 .signr = sig
57 };
58 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
59}
60#else
61static inline int notify_page_fault(enum die_val val, const char *str,
62 struct pt_regs *regs, long err, int trap, int sig)
63{
64 return NOTIFY_DONE;
65}
66#endif
67
34/* 68/*
35 * To debug kernel to catch accesses to certain virtual/physical addresses. 69 * To debug kernel to catch accesses to certain virtual/physical addresses.
36 * Mode = 0 selects physical watchpoints, mode = 1 selects virtual watchpoints. 70 * Mode = 0 selects physical watchpoints, mode = 1 selects virtual watchpoints.
@@ -263,7 +297,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
263 297
264 fault_code = get_thread_fault_code(); 298 fault_code = get_thread_fault_code();
265 299
266 if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, 300 if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs,
267 fault_code, 0, SIGSEGV) == NOTIFY_STOP) 301 fault_code, 0, SIGSEGV) == NOTIFY_STOP)
268 return; 302 return;
269 303
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
index 32327bb37aff..2aa565c136e5 100644
--- a/arch/x86_64/boot/video.S
+++ b/arch/x86_64/boot/video.S
@@ -1929,6 +1929,7 @@ skip10: movb %ah, %al
1929 ret 1929 ret
1930 1930
1931store_edid: 1931store_edid:
1932#ifdef CONFIG_FIRMWARE_EDID
1932 pushw %es # just save all registers 1933 pushw %es # just save all registers
1933 pushw %ax 1934 pushw %ax
1934 pushw %bx 1935 pushw %bx
@@ -1946,6 +1947,22 @@ store_edid:
1946 rep 1947 rep
1947 stosl 1948 stosl
1948 1949
1950 pushw %es # save ES
1951 xorw %di, %di # Report Capability
1952 pushw %di
1953 popw %es # ES:DI must be 0:0
1954 movw $0x4f15, %ax
1955 xorw %bx, %bx
1956 xorw %cx, %cx
1957 int $0x10
1958 popw %es # restore ES
1959
1960 cmpb $0x00, %ah # call successful
1961 jne no_edid
1962
1963 cmpb $0x4f, %al # function supported
1964 jne no_edid
1965
1949 movw $0x4f15, %ax # do VBE/DDC 1966 movw $0x4f15, %ax # do VBE/DDC
1950 movw $0x01, %bx 1967 movw $0x01, %bx
1951 movw $0x00, %cx 1968 movw $0x00, %cx
@@ -1953,12 +1970,14 @@ store_edid:
1953 movw $0x140, %di 1970 movw $0x140, %di
1954 int $0x10 1971 int $0x10
1955 1972
1973no_edid:
1956 popw %di # restore all registers 1974 popw %di # restore all registers
1957 popw %dx 1975 popw %dx
1958 popw %cx 1976 popw %cx
1959 popw %bx 1977 popw %bx
1960 popw %ax 1978 popw %ax
1961 popw %es 1979 popw %es
1980#endif
1962 ret 1981 ret
1963 1982
1964# VIDEO_SELECT-only variables 1983# VIDEO_SELECT-only variables
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
index bf421ed26808..7554458dc9cb 100644
--- a/arch/x86_64/kernel/pmtimer.c
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -27,7 +27,7 @@
27/* The I/O port the PMTMR resides at. 27/* The I/O port the PMTMR resides at.
28 * The location is detected during setup_arch(), 28 * The location is detected during setup_arch(),
29 * in arch/i386/kernel/acpi/boot.c */ 29 * in arch/i386/kernel/acpi/boot.c */
30u32 pmtmr_ioport; 30u32 pmtmr_ioport __read_mostly;
31 31
32/* value of the Power timer at last timer interrupt */ 32/* value of the Power timer at last timer interrupt */
33static u32 offset_delay; 33static u32 offset_delay;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index fb850b52b4da..143c65031539 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -109,6 +109,7 @@ struct sys_desc_table_struct {
109}; 109};
110 110
111struct edid_info edid_info; 111struct edid_info edid_info;
112EXPORT_SYMBOL_GPL(edid_info);
112struct e820map e820; 113struct e820map e820;
113 114
114extern int root_mountflags; 115extern int root_mountflags;
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 55250593d8c9..0803d3858af1 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -41,6 +41,41 @@
41#define PF_RSVD (1<<3) 41#define PF_RSVD (1<<3)
42#define PF_INSTR (1<<4) 42#define PF_INSTR (1<<4)
43 43
44#ifdef CONFIG_KPROBES
45ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
46
47/* Hook to register for page fault notifications */
48int register_page_fault_notifier(struct notifier_block *nb)
49{
50 vmalloc_sync_all();
51 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
52}
53
54int unregister_page_fault_notifier(struct notifier_block *nb)
55{
56 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
57}
58
59static inline int notify_page_fault(enum die_val val, const char *str,
60 struct pt_regs *regs, long err, int trap, int sig)
61{
62 struct die_args args = {
63 .regs = regs,
64 .str = str,
65 .err = err,
66 .trapnr = trap,
67 .signr = sig
68 };
69 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
70}
71#else
72static inline int notify_page_fault(enum die_val val, const char *str,
73 struct pt_regs *regs, long err, int trap, int sig)
74{
75 return NOTIFY_DONE;
76}
77#endif
78
44void bust_spinlocks(int yes) 79void bust_spinlocks(int yes)
45{ 80{
46 int loglevel_save = console_loglevel; 81 int loglevel_save = console_loglevel;
@@ -348,7 +383,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
348 if (vmalloc_fault(address) >= 0) 383 if (vmalloc_fault(address) >= 0)
349 return; 384 return;
350 } 385 }
351 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 386 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
352 SIGSEGV) == NOTIFY_STOP) 387 SIGSEGV) == NOTIFY_STOP)
353 return; 388 return;
354 /* 389 /*
@@ -358,7 +393,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
358 goto bad_area_nosemaphore; 393 goto bad_area_nosemaphore;
359 } 394 }
360 395
361 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 396 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
362 SIGSEGV) == NOTIFY_STOP) 397 SIGSEGV) == NOTIFY_STOP)
363 return; 398 return;
364 399
diff --git a/drivers/Makefile b/drivers/Makefile
index 3c5170310bd0..fc2d744a4e4a 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -74,4 +74,5 @@ obj-$(CONFIG_SGI_SN) += sn/
74obj-y += firmware/ 74obj-y += firmware/
75obj-$(CONFIG_CRYPTO) += crypto/ 75obj-$(CONFIG_CRYPTO) += crypto/
76obj-$(CONFIG_SUPERH) += sh/ 76obj-$(CONFIG_SUPERH) += sh/
77obj-$(CONFIG_GENERIC_TIME) += clocksource/
77obj-$(CONFIG_DMA_ENGINE) += dma/ 78obj-$(CONFIG_DMA_ENGINE) += dma/
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 3b97a5eae9e8..a5f4f2aa007a 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -369,6 +369,11 @@ static void acpi_processor_idle(void)
369 t2 = inl(acpi_fadt.xpm_tmr_blk.address); 369 t2 = inl(acpi_fadt.xpm_tmr_blk.address);
370 /* Get end time (ticks) */ 370 /* Get end time (ticks) */
371 t2 = inl(acpi_fadt.xpm_tmr_blk.address); 371 t2 = inl(acpi_fadt.xpm_tmr_blk.address);
372
373#ifdef CONFIG_GENERIC_TIME
374 /* TSC halts in C2, so notify users */
375 mark_tsc_unstable();
376#endif
372 /* Re-enable interrupts */ 377 /* Re-enable interrupts */
373 local_irq_enable(); 378 local_irq_enable();
374 set_thread_flag(TIF_POLLING_NRFLAG); 379 set_thread_flag(TIF_POLLING_NRFLAG);
@@ -409,6 +414,10 @@ static void acpi_processor_idle(void)
409 ACPI_MTX_DO_NOT_LOCK); 414 ACPI_MTX_DO_NOT_LOCK);
410 } 415 }
411 416
417#ifdef CONFIG_GENERIC_TIME
418 /* TSC halts in C3, so notify users */
419 mark_tsc_unstable();
420#endif
412 /* Re-enable interrupts */ 421 /* Re-enable interrupts */
413 local_irq_enable(); 422 local_irq_enable();
414 set_thread_flag(TIF_POLLING_NRFLAG); 423 set_thread_flag(TIF_POLLING_NRFLAG);
diff --git a/drivers/base/power/resume.c b/drivers/base/power/resume.c
index 520679ce53a8..826093ef4c7e 100644
--- a/drivers/base/power/resume.c
+++ b/drivers/base/power/resume.c
@@ -53,8 +53,7 @@ void dpm_resume(void)
53 struct device * dev = to_device(entry); 53 struct device * dev = to_device(entry);
54 54
55 get_device(dev); 55 get_device(dev);
56 list_del_init(entry); 56 list_move_tail(entry, &dpm_active);
57 list_add_tail(entry, &dpm_active);
58 57
59 up(&dpm_list_sem); 58 up(&dpm_list_sem);
60 if (!dev->power.prev_state.event) 59 if (!dev->power.prev_state.event)
@@ -101,8 +100,7 @@ void dpm_power_up(void)
101 struct device * dev = to_device(entry); 100 struct device * dev = to_device(entry);
102 101
103 get_device(dev); 102 get_device(dev);
104 list_del_init(entry); 103 list_move_tail(entry, &dpm_active);
105 list_add_tail(entry, &dpm_active);
106 resume_device(dev); 104 resume_device(dev);
107 put_device(dev); 105 put_device(dev);
108 } 106 }
diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c
index 1a1fe43a3057..69509e02f703 100644
--- a/drivers/base/power/suspend.c
+++ b/drivers/base/power/suspend.c
@@ -116,12 +116,10 @@ int device_suspend(pm_message_t state)
116 /* Check if the device got removed */ 116 /* Check if the device got removed */
117 if (!list_empty(&dev->power.entry)) { 117 if (!list_empty(&dev->power.entry)) {
118 /* Move it to the dpm_off or dpm_off_irq list */ 118 /* Move it to the dpm_off or dpm_off_irq list */
119 if (!error) { 119 if (!error)
120 list_del(&dev->power.entry); 120 list_move(&dev->power.entry, &dpm_off);
121 list_add(&dev->power.entry, &dpm_off); 121 else if (error == -EAGAIN) {
122 } else if (error == -EAGAIN) { 122 list_move(&dev->power.entry, &dpm_off_irq);
123 list_del(&dev->power.entry);
124 list_add(&dev->power.entry, &dpm_off_irq);
125 error = 0; 123 error = 0;
126 } 124 }
127 } 125 }
@@ -139,8 +137,7 @@ int device_suspend(pm_message_t state)
139 */ 137 */
140 while (!list_empty(&dpm_off_irq)) { 138 while (!list_empty(&dpm_off_irq)) {
141 struct list_head * entry = dpm_off_irq.next; 139 struct list_head * entry = dpm_off_irq.next;
142 list_del(entry); 140 list_move(entry, &dpm_off);
143 list_add(entry, &dpm_off);
144 } 141 }
145 dpm_resume(); 142 dpm_resume();
146 } 143 }
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index a71a240611e0..ed8dca84ff69 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -423,6 +423,9 @@ static int dtl1_hci_send_frame(struct sk_buff *skb)
423 nsh.len = skb->len; 423 nsh.len = skb->len;
424 424
425 s = bt_skb_alloc(NSHL + skb->len + 1, GFP_ATOMIC); 425 s = bt_skb_alloc(NSHL + skb->len + 1, GFP_ATOMIC);
426 if (!s)
427 return -ENOMEM;
428
426 skb_reserve(s, NSHL); 429 skb_reserve(s, NSHL);
427 memcpy(skb_put(s, skb->len), skb->data, skb->len); 430 memcpy(skb_put(s, skb->len), skb->data, skb->len);
428 if (skb->len & 0x0001) 431 if (skb->len & 0x0001)
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 63f28d169b36..3610c5729553 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -62,6 +62,23 @@ config HW_CONSOLE
62 depends on VT && !S390 && !UML 62 depends on VT && !S390 && !UML
63 default y 63 default y
64 64
65config VT_HW_CONSOLE_BINDING
66 bool "Support for binding and unbinding console drivers"
67 depends on HW_CONSOLE
68 default n
69 ---help---
70 The virtual terminal is the device that interacts with the physical
71 terminal through console drivers. On these systems, at least one
72 console driver is loaded. In other configurations, additional console
73 drivers may be enabled, such as the framebuffer console. If more than
74 1 console driver is enabled, setting this to 'y' will allow you to
75 select the console driver that will serve as the backend for the
76 virtual terminals.
77
78 See <file:Documentation/console/console.txt> for more
79 information. For framebuffer console users, please refer to
80 <file:Documentation/fb/fbcon.txt>.
81
65config SERIAL_NONSTANDARD 82config SERIAL_NONSTANDARD
66 bool "Non-standard serial port support" 83 bool "Non-standard serial port support"
67 ---help--- 84 ---help---
@@ -670,20 +687,7 @@ config NWFLASH
670 687
671 If you're not sure, say N. 688 If you're not sure, say N.
672 689
673config HW_RANDOM 690source "drivers/char/hw_random/Kconfig"
674 tristate "Intel/AMD/VIA HW Random Number Generator support"
675 depends on (X86 || IA64) && PCI
676 ---help---
677 This driver provides kernel-side support for the Random Number
678 Generator hardware found on Intel i8xx-based motherboards,
679 AMD 76x-based motherboards, and Via Nehemiah CPUs.
680
681 Provides a character driver, used to read() entropy data.
682
683 To compile this driver as a module, choose M here: the
684 module will be called hw_random.
685
686 If unsure, say N.
687 691
688config NVRAM 692config NVRAM
689 tristate "/dev/nvram support" 693 tristate "/dev/nvram support"
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index fb919bfb2824..524105597ea7 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -75,7 +75,7 @@ endif
75obj-$(CONFIG_TOSHIBA) += toshiba.o 75obj-$(CONFIG_TOSHIBA) += toshiba.o
76obj-$(CONFIG_I8K) += i8k.o 76obj-$(CONFIG_I8K) += i8k.o
77obj-$(CONFIG_DS1620) += ds1620.o 77obj-$(CONFIG_DS1620) += ds1620.o
78obj-$(CONFIG_HW_RANDOM) += hw_random.o 78obj-$(CONFIG_HW_RANDOM) += hw_random/
79obj-$(CONFIG_FTAPE) += ftape/ 79obj-$(CONFIG_FTAPE) += ftape/
80obj-$(CONFIG_COBALT_LCD) += lcd.o 80obj-$(CONFIG_COBALT_LCD) += lcd.o
81obj-$(CONFIG_PPDEV) += ppdev.o 81obj-$(CONFIG_PPDEV) += ppdev.o
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
index ac626418b329..d69f2ad9a67d 100644
--- a/drivers/char/hangcheck-timer.c
+++ b/drivers/char/hangcheck-timer.c
@@ -117,12 +117,12 @@ __setup("hcheck_reboot", hangcheck_parse_reboot);
117__setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks); 117__setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks);
118#endif /* not MODULE */ 118#endif /* not MODULE */
119 119
120#if defined(CONFIG_X86) || defined(CONFIG_S390) 120#if defined(CONFIG_X86_64) || defined(CONFIG_S390)
121# define HAVE_MONOTONIC 121# define HAVE_MONOTONIC
122# define TIMER_FREQ 1000000000ULL 122# define TIMER_FREQ 1000000000ULL
123#elif defined(CONFIG_IA64) 123#elif defined(CONFIG_IA64)
124# define TIMER_FREQ ((unsigned long long)local_cpu_data->itc_freq) 124# define TIMER_FREQ ((unsigned long long)local_cpu_data->itc_freq)
125#elif defined(CONFIG_PPC64) 125#else
126# define TIMER_FREQ (HZ*loops_per_jiffy) 126# define TIMER_FREQ (HZ*loops_per_jiffy)
127#endif 127#endif
128 128
diff --git a/drivers/char/hw_random.c b/drivers/char/hw_random.c
deleted file mode 100644
index 29dc87e59020..000000000000
--- a/drivers/char/hw_random.c
+++ /dev/null
@@ -1,698 +0,0 @@
1/*
2 Added support for the AMD Geode LX RNG
3 (c) Copyright 2004-2005 Advanced Micro Devices, Inc.
4
5 derived from
6
7 Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG)
8 (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com>
9
10 derived from
11
12 Hardware driver for the AMD 768 Random Number Generator (RNG)
13 (c) Copyright 2001 Red Hat Inc <alan@redhat.com>
14
15 derived from
16
17 Hardware driver for Intel i810 Random Number Generator (RNG)
18 Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
19 Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
20
21 Please read Documentation/hw_random.txt for details on use.
22
23 ----------------------------------------------------------
24 This software may be used and distributed according to the terms
25 of the GNU General Public License, incorporated herein by reference.
26
27 */
28
29
30#include <linux/module.h>
31#include <linux/kernel.h>
32#include <linux/fs.h>
33#include <linux/init.h>
34#include <linux/pci.h>
35#include <linux/interrupt.h>
36#include <linux/spinlock.h>
37#include <linux/random.h>
38#include <linux/miscdevice.h>
39#include <linux/smp_lock.h>
40#include <linux/mm.h>
41#include <linux/delay.h>
42
43#ifdef __i386__
44#include <asm/msr.h>
45#include <asm/cpufeature.h>
46#endif
47
48#include <asm/io.h>
49#include <asm/uaccess.h>
50
51
52/*
53 * core module and version information
54 */
55#define RNG_VERSION "1.0.0"
56#define RNG_MODULE_NAME "hw_random"
57#define RNG_DRIVER_NAME RNG_MODULE_NAME " hardware driver " RNG_VERSION
58#define PFX RNG_MODULE_NAME ": "
59
60
61/*
62 * debugging macros
63 */
64
65/* pr_debug() collapses to a no-op if DEBUG is not defined */
66#define DPRINTK(fmt, args...) pr_debug(PFX "%s: " fmt, __FUNCTION__ , ## args)
67
68
69#undef RNG_NDEBUG /* define to enable lightweight runtime checks */
70#ifdef RNG_NDEBUG
71#define assert(expr) \
72 if(!(expr)) { \
73 printk(KERN_DEBUG PFX "Assertion failed! %s,%s,%s," \
74 "line=%d\n", #expr, __FILE__, __FUNCTION__, __LINE__); \
75 }
76#else
77#define assert(expr)
78#endif
79
80#define RNG_MISCDEV_MINOR 183 /* official */
81
82static int rng_dev_open (struct inode *inode, struct file *filp);
83static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
84 loff_t * offp);
85
86static int __init intel_init (struct pci_dev *dev);
87static void intel_cleanup(void);
88static unsigned int intel_data_present (void);
89static u32 intel_data_read (void);
90
91static int __init amd_init (struct pci_dev *dev);
92static void amd_cleanup(void);
93static unsigned int amd_data_present (void);
94static u32 amd_data_read (void);
95
96#ifdef __i386__
97static int __init via_init(struct pci_dev *dev);
98static void via_cleanup(void);
99static unsigned int via_data_present (void);
100static u32 via_data_read (void);
101#endif
102
103static int __init geode_init(struct pci_dev *dev);
104static void geode_cleanup(void);
105static unsigned int geode_data_present (void);
106static u32 geode_data_read (void);
107
108struct rng_operations {
109 int (*init) (struct pci_dev *dev);
110 void (*cleanup) (void);
111 unsigned int (*data_present) (void);
112 u32 (*data_read) (void);
113 unsigned int n_bytes; /* number of bytes per ->data_read */
114};
115static struct rng_operations *rng_ops;
116
117static struct file_operations rng_chrdev_ops = {
118 .owner = THIS_MODULE,
119 .open = rng_dev_open,
120 .read = rng_dev_read,
121};
122
123
124static struct miscdevice rng_miscdev = {
125 RNG_MISCDEV_MINOR,
126 RNG_MODULE_NAME,
127 &rng_chrdev_ops,
128};
129
130enum {
131 rng_hw_none,
132 rng_hw_intel,
133 rng_hw_amd,
134#ifdef __i386__
135 rng_hw_via,
136#endif
137 rng_hw_geode,
138};
139
140static struct rng_operations rng_vendor_ops[] = {
141 /* rng_hw_none */
142 { },
143
144 /* rng_hw_intel */
145 { intel_init, intel_cleanup, intel_data_present,
146 intel_data_read, 1 },
147
148 /* rng_hw_amd */
149 { amd_init, amd_cleanup, amd_data_present, amd_data_read, 4 },
150
151#ifdef __i386__
152 /* rng_hw_via */
153 { via_init, via_cleanup, via_data_present, via_data_read, 1 },
154#endif
155
156 /* rng_hw_geode */
157 { geode_init, geode_cleanup, geode_data_present, geode_data_read, 4 }
158};
159
160/*
161 * Data for PCI driver interface
162 *
163 * This data only exists for exporting the supported
164 * PCI ids via MODULE_DEVICE_TABLE. We do not actually
165 * register a pci_driver, because someone else might one day
166 * want to register another driver on the same PCI id.
167 */
168static struct pci_device_id rng_pci_tbl[] = {
169 { 0x1022, 0x7443, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_amd },
170 { 0x1022, 0x746b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_amd },
171
172 { 0x8086, 0x2418, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
173 { 0x8086, 0x2428, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
174 { 0x8086, 0x2430, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
175 { 0x8086, 0x2448, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
176 { 0x8086, 0x244e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
177 { 0x8086, 0x245e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
178
179 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LX_AES,
180 PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_geode },
181
182 { 0, }, /* terminate list */
183};
184MODULE_DEVICE_TABLE (pci, rng_pci_tbl);
185
186
187/***********************************************************************
188 *
189 * Intel RNG operations
190 *
191 */
192
193/*
194 * RNG registers (offsets from rng_mem)
195 */
196#define INTEL_RNG_HW_STATUS 0
197#define INTEL_RNG_PRESENT 0x40
198#define INTEL_RNG_ENABLED 0x01
199#define INTEL_RNG_STATUS 1
200#define INTEL_RNG_DATA_PRESENT 0x01
201#define INTEL_RNG_DATA 2
202
203/*
204 * Magic address at which Intel PCI bridges locate the RNG
205 */
206#define INTEL_RNG_ADDR 0xFFBC015F
207#define INTEL_RNG_ADDR_LEN 3
208
209/* token to our ioremap'd RNG register area */
210static void __iomem *rng_mem;
211
212static inline u8 intel_hwstatus (void)
213{
214 assert (rng_mem != NULL);
215 return readb (rng_mem + INTEL_RNG_HW_STATUS);
216}
217
218static inline u8 intel_hwstatus_set (u8 hw_status)
219{
220 assert (rng_mem != NULL);
221 writeb (hw_status, rng_mem + INTEL_RNG_HW_STATUS);
222 return intel_hwstatus ();
223}
224
225static unsigned int intel_data_present(void)
226{
227 assert (rng_mem != NULL);
228
229 return (readb (rng_mem + INTEL_RNG_STATUS) & INTEL_RNG_DATA_PRESENT) ?
230 1 : 0;
231}
232
233static u32 intel_data_read(void)
234{
235 assert (rng_mem != NULL);
236
237 return readb (rng_mem + INTEL_RNG_DATA);
238}
239
240static int __init intel_init (struct pci_dev *dev)
241{
242 int rc;
243 u8 hw_status;
244
245 DPRINTK ("ENTER\n");
246
247 rng_mem = ioremap (INTEL_RNG_ADDR, INTEL_RNG_ADDR_LEN);
248 if (rng_mem == NULL) {
249 printk (KERN_ERR PFX "cannot ioremap RNG Memory\n");
250 rc = -EBUSY;
251 goto err_out;
252 }
253
254 /* Check for Intel 82802 */
255 hw_status = intel_hwstatus ();
256 if ((hw_status & INTEL_RNG_PRESENT) == 0) {
257 printk (KERN_ERR PFX "RNG not detected\n");
258 rc = -ENODEV;
259 goto err_out_free_map;
260 }
261
262 /* turn RNG h/w on, if it's off */
263 if ((hw_status & INTEL_RNG_ENABLED) == 0)
264 hw_status = intel_hwstatus_set (hw_status | INTEL_RNG_ENABLED);
265 if ((hw_status & INTEL_RNG_ENABLED) == 0) {
266 printk (KERN_ERR PFX "cannot enable RNG, aborting\n");
267 rc = -EIO;
268 goto err_out_free_map;
269 }
270
271 DPRINTK ("EXIT, returning 0\n");
272 return 0;
273
274err_out_free_map:
275 iounmap (rng_mem);
276 rng_mem = NULL;
277err_out:
278 DPRINTK ("EXIT, returning %d\n", rc);
279 return rc;
280}
281
282static void intel_cleanup(void)
283{
284 u8 hw_status;
285
286 hw_status = intel_hwstatus ();
287 if (hw_status & INTEL_RNG_ENABLED)
288 intel_hwstatus_set (hw_status & ~INTEL_RNG_ENABLED);
289 else
290 printk(KERN_WARNING PFX "unusual: RNG already disabled\n");
291 iounmap(rng_mem);
292 rng_mem = NULL;
293}
294
295/***********************************************************************
296 *
297 * AMD RNG operations
298 *
299 */
300
301static u32 pmbase; /* PMxx I/O base */
302static struct pci_dev *amd_dev;
303
304static unsigned int amd_data_present (void)
305{
306 return inl(pmbase + 0xF4) & 1;
307}
308
309
310static u32 amd_data_read (void)
311{
312 return inl(pmbase + 0xF0);
313}
314
315static int __init amd_init (struct pci_dev *dev)
316{
317 int rc;
318 u8 rnen;
319
320 DPRINTK ("ENTER\n");
321
322 pci_read_config_dword(dev, 0x58, &pmbase);
323
324 pmbase &= 0x0000FF00;
325
326 if (pmbase == 0)
327 {
328 printk (KERN_ERR PFX "power management base not set\n");
329 rc = -EIO;
330 goto err_out;
331 }
332
333 pci_read_config_byte(dev, 0x40, &rnen);
334 rnen |= (1 << 7); /* RNG on */
335 pci_write_config_byte(dev, 0x40, rnen);
336
337 pci_read_config_byte(dev, 0x41, &rnen);
338 rnen |= (1 << 7); /* PMIO enable */
339 pci_write_config_byte(dev, 0x41, rnen);
340
341 pr_info( PFX "AMD768 system management I/O registers at 0x%X.\n",
342 pmbase);
343
344 amd_dev = dev;
345
346 DPRINTK ("EXIT, returning 0\n");
347 return 0;
348
349err_out:
350 DPRINTK ("EXIT, returning %d\n", rc);
351 return rc;
352}
353
354static void amd_cleanup(void)
355{
356 u8 rnen;
357
358 pci_read_config_byte(amd_dev, 0x40, &rnen);
359 rnen &= ~(1 << 7); /* RNG off */
360 pci_write_config_byte(amd_dev, 0x40, rnen);
361
362 /* FIXME: twiddle pmio, also? */
363}
364
365#ifdef __i386__
366/***********************************************************************
367 *
368 * VIA RNG operations
369 *
370 */
371
372enum {
373 VIA_STRFILT_CNT_SHIFT = 16,
374 VIA_STRFILT_FAIL = (1 << 15),
375 VIA_STRFILT_ENABLE = (1 << 14),
376 VIA_RAWBITS_ENABLE = (1 << 13),
377 VIA_RNG_ENABLE = (1 << 6),
378 VIA_XSTORE_CNT_MASK = 0x0F,
379
380 VIA_RNG_CHUNK_8 = 0x00, /* 64 rand bits, 64 stored bits */
381 VIA_RNG_CHUNK_4 = 0x01, /* 32 rand bits, 32 stored bits */
382 VIA_RNG_CHUNK_4_MASK = 0xFFFFFFFF,
383 VIA_RNG_CHUNK_2 = 0x02, /* 16 rand bits, 32 stored bits */
384 VIA_RNG_CHUNK_2_MASK = 0xFFFF,
385 VIA_RNG_CHUNK_1 = 0x03, /* 8 rand bits, 32 stored bits */
386 VIA_RNG_CHUNK_1_MASK = 0xFF,
387};
388
389static u32 via_rng_datum;
390
391/*
392 * Investigate using the 'rep' prefix to obtain 32 bits of random data
393 * in one insn. The upside is potentially better performance. The
394 * downside is that the instruction becomes no longer atomic. Due to
395 * this, just like familiar issues with /dev/random itself, the worst
396 * case of a 'rep xstore' could potentially pause a cpu for an
397 * unreasonably long time. In practice, this condition would likely
398 * only occur when the hardware is failing. (or so we hope :))
399 *
400 * Another possible performance boost may come from simply buffering
401 * until we have 4 bytes, thus returning a u32 at a time,
402 * instead of the current u8-at-a-time.
403 */
404
405static inline u32 xstore(u32 *addr, u32 edx_in)
406{
407 u32 eax_out;
408
409 asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */"
410 :"=m"(*addr), "=a"(eax_out)
411 :"D"(addr), "d"(edx_in));
412
413 return eax_out;
414}
415
416static unsigned int via_data_present(void)
417{
418 u32 bytes_out;
419
420 /* We choose the recommended 1-byte-per-instruction RNG rate,
421 * for greater randomness at the expense of speed. Larger
422 * values 2, 4, or 8 bytes-per-instruction yield greater
423 * speed at lesser randomness.
424 *
425 * If you change this to another VIA_CHUNK_n, you must also
426 * change the ->n_bytes values in rng_vendor_ops[] tables.
427 * VIA_CHUNK_8 requires further code changes.
428 *
429 * A copy of MSR_VIA_RNG is placed in eax_out when xstore
430 * completes.
431 */
432 via_rng_datum = 0; /* paranoia, not really necessary */
433 bytes_out = xstore(&via_rng_datum, VIA_RNG_CHUNK_1) & VIA_XSTORE_CNT_MASK;
434 if (bytes_out == 0)
435 return 0;
436
437 return 1;
438}
439
440static u32 via_data_read(void)
441{
442 return via_rng_datum;
443}
444
445static int __init via_init(struct pci_dev *dev)
446{
447 u32 lo, hi, old_lo;
448
449 /* Control the RNG via MSR. Tread lightly and pay very close
450 * close attention to values written, as the reserved fields
451 * are documented to be "undefined and unpredictable"; but it
452 * does not say to write them as zero, so I make a guess that
453 * we restore the values we find in the register.
454 */
455 rdmsr(MSR_VIA_RNG, lo, hi);
456
457 old_lo = lo;
458 lo &= ~(0x7f << VIA_STRFILT_CNT_SHIFT);
459 lo &= ~VIA_XSTORE_CNT_MASK;
460 lo &= ~(VIA_STRFILT_ENABLE | VIA_STRFILT_FAIL | VIA_RAWBITS_ENABLE);
461 lo |= VIA_RNG_ENABLE;
462
463 if (lo != old_lo)
464 wrmsr(MSR_VIA_RNG, lo, hi);
465
466 /* perhaps-unnecessary sanity check; remove after testing if
467 unneeded */
468 rdmsr(MSR_VIA_RNG, lo, hi);
469 if ((lo & VIA_RNG_ENABLE) == 0) {
470 printk(KERN_ERR PFX "cannot enable VIA C3 RNG, aborting\n");
471 return -ENODEV;
472 }
473
474 return 0;
475}
476
477static void via_cleanup(void)
478{
479 /* do nothing */
480}
481#endif
482
483/***********************************************************************
484 *
485 * AMD Geode RNG operations
486 *
487 */
488
489static void __iomem *geode_rng_base = NULL;
490
491#define GEODE_RNG_DATA_REG 0x50
492#define GEODE_RNG_STATUS_REG 0x54
493
494static u32 geode_data_read(void)
495{
496 u32 val;
497
498 assert(geode_rng_base != NULL);
499 val = readl(geode_rng_base + GEODE_RNG_DATA_REG);
500 return val;
501}
502
503static unsigned int geode_data_present(void)
504{
505 u32 val;
506
507 assert(geode_rng_base != NULL);
508 val = readl(geode_rng_base + GEODE_RNG_STATUS_REG);
509 return val;
510}
511
512static void geode_cleanup(void)
513{
514 iounmap(geode_rng_base);
515 geode_rng_base = NULL;
516}
517
518static int geode_init(struct pci_dev *dev)
519{
520 unsigned long rng_base = pci_resource_start(dev, 0);
521
522 if (rng_base == 0)
523 return 1;
524
525 geode_rng_base = ioremap(rng_base, 0x58);
526
527 if (geode_rng_base == NULL) {
528 printk(KERN_ERR PFX "Cannot ioremap RNG memory\n");
529 return -EBUSY;
530 }
531
532 return 0;
533}
534
535/***********************************************************************
536 *
537 * /dev/hwrandom character device handling (major 10, minor 183)
538 *
539 */
540
541static int rng_dev_open (struct inode *inode, struct file *filp)
542{
543 /* enforce read-only access to this chrdev */
544 if ((filp->f_mode & FMODE_READ) == 0)
545 return -EINVAL;
546 if (filp->f_mode & FMODE_WRITE)
547 return -EINVAL;
548
549 return 0;
550}
551
552
553static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
554 loff_t * offp)
555{
556 static DEFINE_SPINLOCK(rng_lock);
557 unsigned int have_data;
558 u32 data = 0;
559 ssize_t ret = 0;
560
561 while (size) {
562 spin_lock(&rng_lock);
563
564 have_data = 0;
565 if (rng_ops->data_present()) {
566 data = rng_ops->data_read();
567 have_data = rng_ops->n_bytes;
568 }
569
570 spin_unlock (&rng_lock);
571
572 while (have_data && size) {
573 if (put_user((u8)data, buf++)) {
574 ret = ret ? : -EFAULT;
575 break;
576 }
577 size--;
578 ret++;
579 have_data--;
580 data>>=8;
581 }
582
583 if (filp->f_flags & O_NONBLOCK)
584 return ret ? : -EAGAIN;
585
586 if(need_resched())
587 schedule_timeout_interruptible(1);
588 else
589 udelay(200); /* FIXME: We could poll for 250uS ?? */
590
591 if (signal_pending (current))
592 return ret ? : -ERESTARTSYS;
593 }
594 return ret;
595}
596
597
598
599/*
600 * rng_init_one - look for and attempt to init a single RNG
601 */
602static int __init rng_init_one (struct pci_dev *dev)
603{
604 int rc;
605
606 DPRINTK ("ENTER\n");
607
608 assert(rng_ops != NULL);
609
610 rc = rng_ops->init(dev);
611 if (rc)
612 goto err_out;
613
614 rc = misc_register (&rng_miscdev);
615 if (rc) {
616 printk (KERN_ERR PFX "misc device register failed\n");
617 goto err_out_cleanup_hw;
618 }
619
620 DPRINTK ("EXIT, returning 0\n");
621 return 0;
622
623err_out_cleanup_hw:
624 rng_ops->cleanup();
625err_out:
626 DPRINTK ("EXIT, returning %d\n", rc);
627 return rc;
628}
629
630
631
632MODULE_AUTHOR("The Linux Kernel team");
633MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver");
634MODULE_LICENSE("GPL");
635
636
637/*
638 * rng_init - initialize RNG module
639 */
640static int __init rng_init (void)
641{
642 int rc;
643 struct pci_dev *pdev = NULL;
644 const struct pci_device_id *ent;
645
646 DPRINTK ("ENTER\n");
647
648 /* Probe for Intel, AMD, Geode RNGs */
649 for_each_pci_dev(pdev) {
650 ent = pci_match_id(rng_pci_tbl, pdev);
651 if (ent) {
652 rng_ops = &rng_vendor_ops[ent->driver_data];
653 goto match;
654 }
655 }
656
657#ifdef __i386__
658 /* Probe for VIA RNG */
659 if (cpu_has_xstore) {
660 rng_ops = &rng_vendor_ops[rng_hw_via];
661 pdev = NULL;
662 goto match;
663 }
664#endif
665
666 DPRINTK ("EXIT, returning -ENODEV\n");
667 return -ENODEV;
668
669match:
670 rc = rng_init_one (pdev);
671 if (rc)
672 return rc;
673
674 pr_info( RNG_DRIVER_NAME " loaded\n");
675
676 DPRINTK ("EXIT, returning 0\n");
677 return 0;
678}
679
680
681/*
682 * rng_init - shutdown RNG module
683 */
684static void __exit rng_cleanup (void)
685{
686 DPRINTK ("ENTER\n");
687
688 misc_deregister (&rng_miscdev);
689
690 if (rng_ops->cleanup)
691 rng_ops->cleanup();
692
693 DPRINTK ("EXIT\n");
694}
695
696
697module_init (rng_init);
698module_exit (rng_cleanup);
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
new file mode 100644
index 000000000000..9f7635f75178
--- /dev/null
+++ b/drivers/char/hw_random/Kconfig
@@ -0,0 +1,90 @@
1#
2# Hardware Random Number Generator (RNG) configuration
3#
4
5config HW_RANDOM
6 bool "Hardware Random Number Generator Core support"
7 default y
8 ---help---
9 Hardware Random Number Generator Core infrastructure.
10
11 If unsure, say Y.
12
13config HW_RANDOM_INTEL
14 tristate "Intel HW Random Number Generator support"
15 depends on HW_RANDOM && (X86 || IA64) && PCI
16 default y
17 ---help---
18 This driver provides kernel-side support for the Random Number
19 Generator hardware found on Intel i8xx-based motherboards.
20
21 To compile this driver as a module, choose M here: the
22 module will be called intel-rng.
23
24 If unsure, say Y.
25
26config HW_RANDOM_AMD
27 tristate "AMD HW Random Number Generator support"
28 depends on HW_RANDOM && X86 && PCI
29 default y
30 ---help---
31 This driver provides kernel-side support for the Random Number
32 Generator hardware found on AMD 76x-based motherboards.
33
34 To compile this driver as a module, choose M here: the
35 module will be called amd-rng.
36
37 If unsure, say Y.
38
39config HW_RANDOM_GEODE
40 tristate "AMD Geode HW Random Number Generator support"
41 depends on HW_RANDOM && X86 && PCI
42 default y
43 ---help---
44 This driver provides kernel-side support for the Random Number
45 Generator hardware found on the AMD Geode LX.
46
47 To compile this driver as a module, choose M here: the
48 module will be called geode-rng.
49
50 If unsure, say Y.
51
52config HW_RANDOM_VIA
53 tristate "VIA HW Random Number Generator support"
54 depends on HW_RANDOM && X86_32
55 default y
56 ---help---
57 This driver provides kernel-side support for the Random Number
58 Generator hardware found on VIA based motherboards.
59
60 To compile this driver as a module, choose M here: the
61 module will be called via-rng.
62
63 If unsure, say Y.
64
65config HW_RANDOM_IXP4XX
66 tristate "Intel IXP4xx NPU HW Random Number Generator support"
67 depends on HW_RANDOM && ARCH_IXP4XX
68 default y
69 ---help---
70 This driver provides kernel-side support for the Random
71 Number Generator hardware found on the Intel IXP4xx NPU.
72
73 To compile this driver as a module, choose M here: the
74 module will be called ixp4xx-rng.
75
76 If unsure, say Y.
77
78config HW_RANDOM_OMAP
79 tristate "OMAP Random Number Generator support"
80 depends on HW_RANDOM && (ARCH_OMAP16XX || ARCH_OMAP24XX)
81 default y
82 ---help---
83 This driver provides kernel-side support for the Random Number
84 Generator hardware found on OMAP16xx and OMAP24xx multimedia
85 processors.
86
87 To compile this driver as a module, choose M here: the
88 module will be called omap-rng.
89
90 If unsure, say Y.
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
new file mode 100644
index 000000000000..e263ae96f940
--- /dev/null
+++ b/drivers/char/hw_random/Makefile
@@ -0,0 +1,11 @@
1#
2# Makefile for HW Random Number Generator (RNG) device drivers.
3#
4
5obj-$(CONFIG_HW_RANDOM) += core.o
6obj-$(CONFIG_HW_RANDOM_INTEL) += intel-rng.o
7obj-$(CONFIG_HW_RANDOM_AMD) += amd-rng.o
8obj-$(CONFIG_HW_RANDOM_GEODE) += geode-rng.o
9obj-$(CONFIG_HW_RANDOM_VIA) += via-rng.o
10obj-$(CONFIG_HW_RANDOM_IXP4XX) += ixp4xx-rng.o
11obj-$(CONFIG_HW_RANDOM_OMAP) += omap-rng.o
diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
new file mode 100644
index 000000000000..71e4e0f3fd54
--- /dev/null
+++ b/drivers/char/hw_random/amd-rng.c
@@ -0,0 +1,152 @@
1/*
2 * RNG driver for AMD RNGs
3 *
4 * Copyright 2005 (c) MontaVista Software, Inc.
5 *
6 * with the majority of the code coming from:
7 *
8 * Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG)
9 * (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com>
10 *
11 * derived from
12 *
13 * Hardware driver for the AMD 768 Random Number Generator (RNG)
14 * (c) Copyright 2001 Red Hat Inc <alan@redhat.com>
15 *
16 * derived from
17 *
18 * Hardware driver for Intel i810 Random Number Generator (RNG)
19 * Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
20 * Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
21 *
22 * This file is licensed under the terms of the GNU General Public
23 * License version 2. This program is licensed "as is" without any
24 * warranty of any kind, whether express or implied.
25 */
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/pci.h>
30#include <linux/hw_random.h>
31#include <asm/io.h>
32
33
34#define PFX KBUILD_MODNAME ": "
35
36
37/*
38 * Data for PCI driver interface
39 *
40 * This data only exists for exporting the supported
41 * PCI ids via MODULE_DEVICE_TABLE. We do not actually
42 * register a pci_driver, because someone else might one day
43 * want to register another driver on the same PCI id.
44 */
45static const struct pci_device_id pci_tbl[] = {
46 { 0x1022, 0x7443, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
47 { 0x1022, 0x746b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
48 { 0, }, /* terminate list */
49};
50MODULE_DEVICE_TABLE(pci, pci_tbl);
51
52static struct pci_dev *amd_pdev;
53
54
55static int amd_rng_data_present(struct hwrng *rng)
56{
57 u32 pmbase = (u32)rng->priv;
58
59 return !!(inl(pmbase + 0xF4) & 1);
60}
61
62static int amd_rng_data_read(struct hwrng *rng, u32 *data)
63{
64 u32 pmbase = (u32)rng->priv;
65
66 *data = inl(pmbase + 0xF0);
67
68 return 4;
69}
70
71static int amd_rng_init(struct hwrng *rng)
72{
73 u8 rnen;
74
75 pci_read_config_byte(amd_pdev, 0x40, &rnen);
76 rnen |= (1 << 7); /* RNG on */
77 pci_write_config_byte(amd_pdev, 0x40, rnen);
78
79 pci_read_config_byte(amd_pdev, 0x41, &rnen);
80 rnen |= (1 << 7); /* PMIO enable */
81 pci_write_config_byte(amd_pdev, 0x41, rnen);
82
83 return 0;
84}
85
86static void amd_rng_cleanup(struct hwrng *rng)
87{
88 u8 rnen;
89
90 pci_read_config_byte(amd_pdev, 0x40, &rnen);
91 rnen &= ~(1 << 7); /* RNG off */
92 pci_write_config_byte(amd_pdev, 0x40, rnen);
93}
94
95
96static struct hwrng amd_rng = {
97 .name = "amd",
98 .init = amd_rng_init,
99 .cleanup = amd_rng_cleanup,
100 .data_present = amd_rng_data_present,
101 .data_read = amd_rng_data_read,
102};
103
104
105static int __init mod_init(void)
106{
107 int err = -ENODEV;
108 struct pci_dev *pdev = NULL;
109 const struct pci_device_id *ent;
110 u32 pmbase;
111
112 for_each_pci_dev(pdev) {
113 ent = pci_match_id(pci_tbl, pdev);
114 if (ent)
115 goto found;
116 }
117 /* Device not found. */
118 goto out;
119
120found:
121 err = pci_read_config_dword(pdev, 0x58, &pmbase);
122 if (err)
123 goto out;
124 err = -EIO;
125 pmbase &= 0x0000FF00;
126 if (pmbase == 0)
127 goto out;
128 amd_rng.priv = (unsigned long)pmbase;
129 amd_pdev = pdev;
130
131 printk(KERN_INFO "AMD768 RNG detected\n");
132 err = hwrng_register(&amd_rng);
133 if (err) {
134 printk(KERN_ERR PFX "RNG registering failed (%d)\n",
135 err);
136 goto out;
137 }
138out:
139 return err;
140}
141
142static void __exit mod_exit(void)
143{
144 hwrng_unregister(&amd_rng);
145}
146
147subsys_initcall(mod_init);
148module_exit(mod_exit);
149
150MODULE_AUTHOR("The Linux Kernel team");
151MODULE_DESCRIPTION("H/W RNG driver for AMD chipsets");
152MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
new file mode 100644
index 000000000000..88b026639f10
--- /dev/null
+++ b/drivers/char/hw_random/core.c
@@ -0,0 +1,354 @@
1/*
2 Added support for the AMD Geode LX RNG
3 (c) Copyright 2004-2005 Advanced Micro Devices, Inc.
4
5 derived from
6
7 Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG)
8 (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com>
9
10 derived from
11
12 Hardware driver for the AMD 768 Random Number Generator (RNG)
13 (c) Copyright 2001 Red Hat Inc <alan@redhat.com>
14
15 derived from
16
17 Hardware driver for Intel i810 Random Number Generator (RNG)
18 Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
19 Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
20
21 Added generic RNG API
22 Copyright 2006 Michael Buesch <mbuesch@freenet.de>
23 Copyright 2005 (c) MontaVista Software, Inc.
24
25 Please read Documentation/hw_random.txt for details on use.
26
27 ----------------------------------------------------------
28 This software may be used and distributed according to the terms
29 of the GNU General Public License, incorporated herein by reference.
30
31 */
32
33
34#include <linux/device.h>
35#include <linux/hw_random.h>
36#include <linux/module.h>
37#include <linux/kernel.h>
38#include <linux/fs.h>
39#include <linux/init.h>
40#include <linux/miscdevice.h>
41#include <linux/delay.h>
42#include <asm/uaccess.h>
43
44
45#define RNG_MODULE_NAME "hw_random"
46#define PFX RNG_MODULE_NAME ": "
47#define RNG_MISCDEV_MINOR 183 /* official */
48
49
50static struct hwrng *current_rng;
51static LIST_HEAD(rng_list);
52static DEFINE_MUTEX(rng_mutex);
53
54
55static inline int hwrng_init(struct hwrng *rng)
56{
57 if (!rng->init)
58 return 0;
59 return rng->init(rng);
60}
61
62static inline void hwrng_cleanup(struct hwrng *rng)
63{
64 if (rng && rng->cleanup)
65 rng->cleanup(rng);
66}
67
68static inline int hwrng_data_present(struct hwrng *rng)
69{
70 if (!rng->data_present)
71 return 1;
72 return rng->data_present(rng);
73}
74
75static inline int hwrng_data_read(struct hwrng *rng, u32 *data)
76{
77 return rng->data_read(rng, data);
78}
79
80
81static int rng_dev_open(struct inode *inode, struct file *filp)
82{
83 /* enforce read-only access to this chrdev */
84 if ((filp->f_mode & FMODE_READ) == 0)
85 return -EINVAL;
86 if (filp->f_mode & FMODE_WRITE)
87 return -EINVAL;
88 return 0;
89}
90
91static ssize_t rng_dev_read(struct file *filp, char __user *buf,
92 size_t size, loff_t *offp)
93{
94 u32 data;
95 ssize_t ret = 0;
96 int i, err = 0;
97 int data_present;
98 int bytes_read;
99
100 while (size) {
101 err = -ERESTARTSYS;
102 if (mutex_lock_interruptible(&rng_mutex))
103 goto out;
104 if (!current_rng) {
105 mutex_unlock(&rng_mutex);
106 err = -ENODEV;
107 goto out;
108 }
109 if (filp->f_flags & O_NONBLOCK) {
110 data_present = hwrng_data_present(current_rng);
111 } else {
112 /* Some RNG require some time between data_reads to gather
113 * new entropy. Poll it.
114 */
115 for (i = 0; i < 20; i++) {
116 data_present = hwrng_data_present(current_rng);
117 if (data_present)
118 break;
119 udelay(10);
120 }
121 }
122 bytes_read = 0;
123 if (data_present)
124 bytes_read = hwrng_data_read(current_rng, &data);
125 mutex_unlock(&rng_mutex);
126
127 err = -EAGAIN;
128 if (!bytes_read && (filp->f_flags & O_NONBLOCK))
129 goto out;
130
131 err = -EFAULT;
132 while (bytes_read && size) {
133 if (put_user((u8)data, buf++))
134 goto out;
135 size--;
136 ret++;
137 bytes_read--;
138 data >>= 8;
139 }
140
141 if (need_resched())
142 schedule_timeout_interruptible(1);
143 err = -ERESTARTSYS;
144 if (signal_pending(current))
145 goto out;
146 }
147out:
148 return ret ? : err;
149}
150
151
152static struct file_operations rng_chrdev_ops = {
153 .owner = THIS_MODULE,
154 .open = rng_dev_open,
155 .read = rng_dev_read,
156};
157
158static struct miscdevice rng_miscdev = {
159 .minor = RNG_MISCDEV_MINOR,
160 .name = RNG_MODULE_NAME,
161 .fops = &rng_chrdev_ops,
162};
163
164
165static ssize_t hwrng_attr_current_store(struct class_device *class,
166 const char *buf, size_t len)
167{
168 int err;
169 struct hwrng *rng;
170
171 err = mutex_lock_interruptible(&rng_mutex);
172 if (err)
173 return -ERESTARTSYS;
174 err = -ENODEV;
175 list_for_each_entry(rng, &rng_list, list) {
176 if (strcmp(rng->name, buf) == 0) {
177 if (rng == current_rng) {
178 err = 0;
179 break;
180 }
181 err = hwrng_init(rng);
182 if (err)
183 break;
184 hwrng_cleanup(current_rng);
185 current_rng = rng;
186 err = 0;
187 break;
188 }
189 }
190 mutex_unlock(&rng_mutex);
191
192 return err ? : len;
193}
194
195static ssize_t hwrng_attr_current_show(struct class_device *class,
196 char *buf)
197{
198 int err;
199 ssize_t ret;
200 const char *name = "none";
201
202 err = mutex_lock_interruptible(&rng_mutex);
203 if (err)
204 return -ERESTARTSYS;
205 if (current_rng)
206 name = current_rng->name;
207 ret = snprintf(buf, PAGE_SIZE, "%s\n", name);
208 mutex_unlock(&rng_mutex);
209
210 return ret;
211}
212
213static ssize_t hwrng_attr_available_show(struct class_device *class,
214 char *buf)
215{
216 int err;
217 ssize_t ret = 0;
218 struct hwrng *rng;
219
220 err = mutex_lock_interruptible(&rng_mutex);
221 if (err)
222 return -ERESTARTSYS;
223 buf[0] = '\0';
224 list_for_each_entry(rng, &rng_list, list) {
225 strncat(buf, rng->name, PAGE_SIZE - ret - 1);
226 ret += strlen(rng->name);
227 strncat(buf, " ", PAGE_SIZE - ret - 1);
228 ret++;
229 }
230 strncat(buf, "\n", PAGE_SIZE - ret - 1);
231 ret++;
232 mutex_unlock(&rng_mutex);
233
234 return ret;
235}
236
237static CLASS_DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR,
238 hwrng_attr_current_show,
239 hwrng_attr_current_store);
240static CLASS_DEVICE_ATTR(rng_available, S_IRUGO,
241 hwrng_attr_available_show,
242 NULL);
243
244
245static void unregister_miscdev(void)
246{
247 class_device_remove_file(rng_miscdev.class,
248 &class_device_attr_rng_available);
249 class_device_remove_file(rng_miscdev.class,
250 &class_device_attr_rng_current);
251 misc_deregister(&rng_miscdev);
252}
253
254static int register_miscdev(void)
255{
256 int err;
257
258 err = misc_register(&rng_miscdev);
259 if (err)
260 goto out;
261 err = class_device_create_file(rng_miscdev.class,
262 &class_device_attr_rng_current);
263 if (err)
264 goto err_misc_dereg;
265 err = class_device_create_file(rng_miscdev.class,
266 &class_device_attr_rng_available);
267 if (err)
268 goto err_remove_current;
269out:
270 return err;
271
272err_remove_current:
273 class_device_remove_file(rng_miscdev.class,
274 &class_device_attr_rng_current);
275err_misc_dereg:
276 misc_deregister(&rng_miscdev);
277 goto out;
278}
279
280int hwrng_register(struct hwrng *rng)
281{
282 int must_register_misc;
283 int err = -EINVAL;
284 struct hwrng *old_rng, *tmp;
285
286 if (rng->name == NULL ||
287 rng->data_read == NULL)
288 goto out;
289
290 mutex_lock(&rng_mutex);
291
292 /* Must not register two RNGs with the same name. */
293 err = -EEXIST;
294 list_for_each_entry(tmp, &rng_list, list) {
295 if (strcmp(tmp->name, rng->name) == 0)
296 goto out_unlock;
297 }
298
299 must_register_misc = (current_rng == NULL);
300 old_rng = current_rng;
301 if (!old_rng) {
302 err = hwrng_init(rng);
303 if (err)
304 goto out_unlock;
305 current_rng = rng;
306 }
307 err = 0;
308 if (must_register_misc) {
309 err = register_miscdev();
310 if (err) {
311 if (!old_rng) {
312 hwrng_cleanup(rng);
313 current_rng = NULL;
314 }
315 goto out_unlock;
316 }
317 }
318 INIT_LIST_HEAD(&rng->list);
319 list_add_tail(&rng->list, &rng_list);
320out_unlock:
321 mutex_unlock(&rng_mutex);
322out:
323 return err;
324}
325EXPORT_SYMBOL_GPL(hwrng_register);
326
327void hwrng_unregister(struct hwrng *rng)
328{
329 int err;
330
331 mutex_lock(&rng_mutex);
332
333 list_del(&rng->list);
334 if (current_rng == rng) {
335 hwrng_cleanup(rng);
336 if (list_empty(&rng_list)) {
337 current_rng = NULL;
338 } else {
339 current_rng = list_entry(rng_list.prev, struct hwrng, list);
340 err = hwrng_init(current_rng);
341 if (err)
342 current_rng = NULL;
343 }
344 }
345 if (list_empty(&rng_list))
346 unregister_miscdev();
347
348 mutex_unlock(&rng_mutex);
349}
350EXPORT_SYMBOL_GPL(hwrng_unregister);
351
352
353MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver");
354MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c
new file mode 100644
index 000000000000..be61f22ee7bb
--- /dev/null
+++ b/drivers/char/hw_random/geode-rng.c
@@ -0,0 +1,128 @@
1/*
2 * RNG driver for AMD Geode RNGs
3 *
4 * Copyright 2005 (c) MontaVista Software, Inc.
5 *
6 * with the majority of the code coming from:
7 *
8 * Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG)
9 * (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com>
10 *
11 * derived from
12 *
13 * Hardware driver for the AMD 768 Random Number Generator (RNG)
14 * (c) Copyright 2001 Red Hat Inc <alan@redhat.com>
15 *
16 * derived from
17 *
18 * Hardware driver for Intel i810 Random Number Generator (RNG)
19 * Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
20 * Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
21 *
22 * This file is licensed under the terms of the GNU General Public
23 * License version 2. This program is licensed "as is" without any
24 * warranty of any kind, whether express or implied.
25 */
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/pci.h>
30#include <linux/hw_random.h>
31#include <asm/io.h>
32
33
34#define PFX KBUILD_MODNAME ": "
35
36#define GEODE_RNG_DATA_REG 0x50
37#define GEODE_RNG_STATUS_REG 0x54
38
39/*
40 * Data for PCI driver interface
41 *
42 * This data only exists for exporting the supported
43 * PCI ids via MODULE_DEVICE_TABLE. We do not actually
44 * register a pci_driver, because someone else might one day
45 * want to register another driver on the same PCI id.
46 */
47static const struct pci_device_id pci_tbl[] = {
48 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LX_AES,
49 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
50 { 0, }, /* terminate list */
51};
52MODULE_DEVICE_TABLE(pci, pci_tbl);
53
54
55static int geode_rng_data_read(struct hwrng *rng, u32 *data)
56{
57 void __iomem *mem = (void __iomem *)rng->priv;
58
59 *data = readl(mem + GEODE_RNG_DATA_REG);
60
61 return 4;
62}
63
64static int geode_rng_data_present(struct hwrng *rng)
65{
66 void __iomem *mem = (void __iomem *)rng->priv;
67
68 return !!(readl(mem + GEODE_RNG_STATUS_REG));
69}
70
71
72static struct hwrng geode_rng = {
73 .name = "geode",
74 .data_present = geode_rng_data_present,
75 .data_read = geode_rng_data_read,
76};
77
78
79static int __init mod_init(void)
80{
81 int err = -ENODEV;
82 struct pci_dev *pdev = NULL;
83 const struct pci_device_id *ent;
84 void __iomem *mem;
85 unsigned long rng_base;
86
87 for_each_pci_dev(pdev) {
88 ent = pci_match_id(pci_tbl, pdev);
89 if (ent)
90 goto found;
91 }
92 /* Device not found. */
93 goto out;
94
95found:
96 rng_base = pci_resource_start(pdev, 0);
97 if (rng_base == 0)
98 goto out;
99 err = -ENOMEM;
100 mem = ioremap(rng_base, 0x58);
101 if (!mem)
102 goto out;
103 geode_rng.priv = (unsigned long)mem;
104
105 printk(KERN_INFO "AMD Geode RNG detected\n");
106 err = hwrng_register(&geode_rng);
107 if (err) {
108 printk(KERN_ERR PFX "RNG registering failed (%d)\n",
109 err);
110 goto out;
111 }
112out:
113 return err;
114}
115
116static void __exit mod_exit(void)
117{
118 void __iomem *mem = (void __iomem *)geode_rng.priv;
119
120 hwrng_unregister(&geode_rng);
121 iounmap(mem);
122}
123
124subsys_initcall(mod_init);
125module_exit(mod_exit);
126
127MODULE_DESCRIPTION("H/W RNG driver for AMD Geode LX CPUs");
128MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c
new file mode 100644
index 000000000000..6594bd5645f4
--- /dev/null
+++ b/drivers/char/hw_random/intel-rng.c
@@ -0,0 +1,189 @@
1/*
2 * RNG driver for Intel RNGs
3 *
4 * Copyright 2005 (c) MontaVista Software, Inc.
5 *
6 * with the majority of the code coming from:
7 *
8 * Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG)
9 * (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com>
10 *
11 * derived from
12 *
13 * Hardware driver for the AMD 768 Random Number Generator (RNG)
14 * (c) Copyright 2001 Red Hat Inc <alan@redhat.com>
15 *
16 * derived from
17 *
18 * Hardware driver for Intel i810 Random Number Generator (RNG)
19 * Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
20 * Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
21 *
22 * This file is licensed under the terms of the GNU General Public
23 * License version 2. This program is licensed "as is" without any
24 * warranty of any kind, whether express or implied.
25 */
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/pci.h>
30#include <linux/hw_random.h>
31#include <asm/io.h>
32
33
34#define PFX KBUILD_MODNAME ": "
35
36/*
37 * RNG registers
38 */
39#define INTEL_RNG_HW_STATUS 0
40#define INTEL_RNG_PRESENT 0x40
41#define INTEL_RNG_ENABLED 0x01
42#define INTEL_RNG_STATUS 1
43#define INTEL_RNG_DATA_PRESENT 0x01
44#define INTEL_RNG_DATA 2
45
46/*
47 * Magic address at which Intel PCI bridges locate the RNG
48 */
49#define INTEL_RNG_ADDR 0xFFBC015F
50#define INTEL_RNG_ADDR_LEN 3
51
52/*
53 * Data for PCI driver interface
54 *
55 * This data only exists for exporting the supported
56 * PCI ids via MODULE_DEVICE_TABLE. We do not actually
57 * register a pci_driver, because someone else might one day
58 * want to register another driver on the same PCI id.
59 */
60static const struct pci_device_id pci_tbl[] = {
61 { 0x8086, 0x2418, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
62 { 0x8086, 0x2428, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
63 { 0x8086, 0x2430, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
64 { 0x8086, 0x2448, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
65 { 0x8086, 0x244e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
66 { 0x8086, 0x245e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
67 { 0, }, /* terminate list */
68};
69MODULE_DEVICE_TABLE(pci, pci_tbl);
70
71
72static inline u8 hwstatus_get(void __iomem *mem)
73{
74 return readb(mem + INTEL_RNG_HW_STATUS);
75}
76
77static inline u8 hwstatus_set(void __iomem *mem,
78 u8 hw_status)
79{
80 writeb(hw_status, mem + INTEL_RNG_HW_STATUS);
81 return hwstatus_get(mem);
82}
83
84static int intel_rng_data_present(struct hwrng *rng)
85{
86 void __iomem *mem = (void __iomem *)rng->priv;
87
88 return !!(readb(mem + INTEL_RNG_STATUS) & INTEL_RNG_DATA_PRESENT);
89}
90
91static int intel_rng_data_read(struct hwrng *rng, u32 *data)
92{
93 void __iomem *mem = (void __iomem *)rng->priv;
94
95 *data = readb(mem + INTEL_RNG_DATA);
96
97 return 1;
98}
99
100static int intel_rng_init(struct hwrng *rng)
101{
102 void __iomem *mem = (void __iomem *)rng->priv;
103 u8 hw_status;
104 int err = -EIO;
105
106 hw_status = hwstatus_get(mem);
107 /* turn RNG h/w on, if it's off */
108 if ((hw_status & INTEL_RNG_ENABLED) == 0)
109 hw_status = hwstatus_set(mem, hw_status | INTEL_RNG_ENABLED);
110 if ((hw_status & INTEL_RNG_ENABLED) == 0) {
111 printk(KERN_ERR PFX "cannot enable RNG, aborting\n");
112 goto out;
113 }
114 err = 0;
115out:
116 return err;
117}
118
119static void intel_rng_cleanup(struct hwrng *rng)
120{
121 void __iomem *mem = (void __iomem *)rng->priv;
122 u8 hw_status;
123
124 hw_status = hwstatus_get(mem);
125 if (hw_status & INTEL_RNG_ENABLED)
126 hwstatus_set(mem, hw_status & ~INTEL_RNG_ENABLED);
127 else
128 printk(KERN_WARNING PFX "unusual: RNG already disabled\n");
129}
130
131
132static struct hwrng intel_rng = {
133 .name = "intel",
134 .init = intel_rng_init,
135 .cleanup = intel_rng_cleanup,
136 .data_present = intel_rng_data_present,
137 .data_read = intel_rng_data_read,
138};
139
140
141static int __init mod_init(void)
142{
143 int err = -ENODEV;
144 void __iomem *mem;
145 u8 hw_status;
146
147 if (!pci_dev_present(pci_tbl))
148 goto out; /* Device not found. */
149
150 err = -ENOMEM;
151 mem = ioremap(INTEL_RNG_ADDR, INTEL_RNG_ADDR_LEN);
152 if (!mem)
153 goto out;
154 intel_rng.priv = (unsigned long)mem;
155
156 /* Check for Intel 82802 */
157 err = -ENODEV;
158 hw_status = hwstatus_get(mem);
159 if ((hw_status & INTEL_RNG_PRESENT) == 0)
160 goto err_unmap;
161
162 printk(KERN_INFO "Intel 82802 RNG detected\n");
163 err = hwrng_register(&intel_rng);
164 if (err) {
165 printk(KERN_ERR PFX "RNG registering failed (%d)\n",
166 err);
167 goto out;
168 }
169out:
170 return err;
171
172err_unmap:
173 iounmap(mem);
174 goto out;
175}
176
177static void __exit mod_exit(void)
178{
179 void __iomem *mem = (void __iomem *)intel_rng.priv;
180
181 hwrng_unregister(&intel_rng);
182 iounmap(mem);
183}
184
185subsys_initcall(mod_init);
186module_exit(mod_exit);
187
188MODULE_DESCRIPTION("H/W RNG driver for Intel chipsets");
189MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/ixp4xx-rng.c b/drivers/char/hw_random/ixp4xx-rng.c
new file mode 100644
index 000000000000..ef71022423c9
--- /dev/null
+++ b/drivers/char/hw_random/ixp4xx-rng.c
@@ -0,0 +1,73 @@
1/*
2 * drivers/char/rng/ixp4xx-rng.c
3 *
4 * RNG driver for Intel IXP4xx family of NPUs
5 *
6 * Author: Deepak Saxena <dsaxena@plexity.net>
7 *
8 * Copyright 2005 (c) MontaVista Software, Inc.
9 *
10 * Fixes by Michael Buesch
11 *
12 * This file is licensed under the terms of the GNU General Public
13 * License version 2. This program is licensed "as is" without any
14 * warranty of any kind, whether express or implied.
15 */
16
17#include <linux/kernel.h>
18#include <linux/config.h>
19#include <linux/types.h>
20#include <linux/module.h>
21#include <linux/moduleparam.h>
22#include <linux/init.h>
23#include <linux/bitops.h>
24#include <linux/hw_random.h>
25
26#include <asm/io.h>
27#include <asm/hardware.h>
28
29
30static int ixp4xx_rng_data_read(struct hwrng *rng, u32 *buffer)
31{
32 void __iomem * rng_base = (void __iomem *)rng->priv;
33
34 *buffer = __raw_readl(rng_base);
35
36 return 4;
37}
38
39static struct hwrng ixp4xx_rng_ops = {
40 .name = "ixp4xx",
41 .data_read = ixp4xx_rng_data_read,
42};
43
44static int __init ixp4xx_rng_init(void)
45{
46 void __iomem * rng_base;
47 int err;
48
49 rng_base = ioremap(0x70002100, 4);
50 if (!rng_base)
51 return -ENOMEM;
52 ixp4xx_rng_ops.priv = (unsigned long)rng_base;
53 err = hwrng_register(&ixp4xx_rng_ops);
54 if (err)
55 iounmap(rng_base);
56
57 return err;
58}
59
60static void __exit ixp4xx_rng_exit(void)
61{
62 void __iomem * rng_base = (void __iomem *)ixp4xx_rng_ops.priv;
63
64 hwrng_unregister(&ixp4xx_rng_ops);
65 iounmap(rng_base);
66}
67
68subsys_initcall(ixp4xx_rng_init);
69module_exit(ixp4xx_rng_exit);
70
71MODULE_AUTHOR("Deepak Saxena <dsaxena@plexity.net>");
72MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver for IXP4xx");
73MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
new file mode 100644
index 000000000000..819516b35a79
--- /dev/null
+++ b/drivers/char/hw_random/omap-rng.c
@@ -0,0 +1,208 @@
1/*
2 * driver/char/hw_random/omap-rng.c
3 *
4 * RNG driver for TI OMAP CPU family
5 *
6 * Author: Deepak Saxena <dsaxena@plexity.net>
7 *
8 * Copyright 2005 (c) MontaVista Software, Inc.
9 *
10 * Mostly based on original driver:
11 *
12 * Copyright (C) 2005 Nokia Corporation
13 * Author: Juha Yrj��<juha.yrjola@nokia.com>
14 *
15 * This file is licensed under the terms of the GNU General Public
16 * License version 2. This program is licensed "as is" without any
17 * warranty of any kind, whether express or implied.
18 *
19 * TODO:
20 *
21 * - Make status updated be interrupt driven so we don't poll
22 *
23 */
24
25#include <linux/module.h>
26#include <linux/init.h>
27#include <linux/random.h>
28#include <linux/err.h>
29#include <linux/device.h>
30#include <linux/hw_random.h>
31
32#include <asm/io.h>
33#include <asm/hardware/clock.h>
34
35#define RNG_OUT_REG 0x00 /* Output register */
36#define RNG_STAT_REG 0x04 /* Status register
37 [0] = STAT_BUSY */
38#define RNG_ALARM_REG 0x24 /* Alarm register
39 [7:0] = ALARM_COUNTER */
40#define RNG_CONFIG_REG 0x28 /* Configuration register
41 [11:6] = RESET_COUNT
42 [5:3] = RING2_DELAY
43 [2:0] = RING1_DELAY */
44#define RNG_REV_REG 0x3c /* Revision register
45 [7:0] = REV_NB */
46#define RNG_MASK_REG 0x40 /* Mask and reset register
47 [2] = IT_EN
48 [1] = SOFTRESET
49 [0] = AUTOIDLE */
50#define RNG_SYSSTATUS 0x44 /* System status
51 [0] = RESETDONE */
52
53static void __iomem *rng_base;
54static struct clk *rng_ick;
55static struct device *rng_dev;
56
57static u32 omap_rng_read_reg(int reg)
58{
59 return __raw_readl(rng_base + reg);
60}
61
62static void omap_rng_write_reg(int reg, u32 val)
63{
64 __raw_writel(val, rng_base + reg);
65}
66
67/* REVISIT: Does the status bit really work on 16xx? */
68static int omap_rng_data_present(struct hwrng *rng)
69{
70 return omap_rng_read_reg(RNG_STAT_REG) ? 0 : 1;
71}
72
73static int omap_rng_data_read(struct hwrng *rng, u32 *data)
74{
75 *data = omap_rng_read_reg(RNG_OUT_REG);
76
77 return 4;
78}
79
80static struct hwrng omap_rng_ops = {
81 .name = "omap",
82 .data_present = omap_rng_data_present,
83 .data_read = omap_rng_data_read,
84};
85
86static int __init omap_rng_probe(struct device *dev)
87{
88 struct platform_device *pdev = to_platform_device(dev);
89 struct resource *res, *mem;
90 int ret;
91
92 /*
93 * A bit ugly, and it will never actually happen but there can
94 * be only one RNG and this catches any bork
95 */
96 BUG_ON(rng_dev);
97
98 if (cpu_is_omap24xx()) {
99 rng_ick = clk_get(NULL, "rng_ick");
100 if (IS_ERR(rng_ick)) {
101 dev_err(dev, "Could not get rng_ick\n");
102 ret = PTR_ERR(rng_ick);
103 return ret;
104 }
105 else {
106 clk_use(rng_ick);
107 }
108 }
109
110 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
111
112 if (!res)
113 return -ENOENT;
114
115 mem = request_mem_region(res->start, res->end - res->start + 1,
116 pdev->name);
117 if (mem == NULL)
118 return -EBUSY;
119
120 dev_set_drvdata(dev, mem);
121 rng_base = (u32 __iomem *)io_p2v(res->start);
122
123 ret = hwrng_register(&omap_rng_ops);
124 if (ret) {
125 release_resource(mem);
126 rng_base = NULL;
127 return ret;
128 }
129
130 dev_info(dev, "OMAP Random Number Generator ver. %02x\n",
131 omap_rng_read_reg(RNG_REV_REG));
132 omap_rng_write_reg(RNG_MASK_REG, 0x1);
133
134 rng_dev = dev;
135
136 return 0;
137}
138
139static int __exit omap_rng_remove(struct device *dev)
140{
141 struct resource *mem = dev_get_drvdata(dev);
142
143 hwrng_unregister(&omap_rng_ops);
144
145 omap_rng_write_reg(RNG_MASK_REG, 0x0);
146
147 if (cpu_is_omap24xx()) {
148 clk_unuse(rng_ick);
149 clk_put(rng_ick);
150 }
151
152 release_resource(mem);
153 rng_base = NULL;
154
155 return 0;
156}
157
158#ifdef CONFIG_PM
159
160static int omap_rng_suspend(struct device *dev, pm_message_t message, u32 level)
161{
162 omap_rng_write_reg(RNG_MASK_REG, 0x0);
163
164 return 0;
165}
166
167static int omap_rng_resume(struct device *dev, pm_message_t message, u32 level)
168{
169 omap_rng_write_reg(RNG_MASK_REG, 0x1);
170
171 return 1;
172}
173
174#else
175
176#define omap_rng_suspend NULL
177#define omap_rng_resume NULL
178
179#endif
180
181
182static struct device_driver omap_rng_driver = {
183 .name = "omap_rng",
184 .bus = &platform_bus_type,
185 .probe = omap_rng_probe,
186 .remove = __exit_p(omap_rng_remove),
187 .suspend = omap_rng_suspend,
188 .resume = omap_rng_resume
189};
190
191static int __init omap_rng_init(void)
192{
193 if (!cpu_is_omap16xx() && !cpu_is_omap24xx())
194 return -ENODEV;
195
196 return driver_register(&omap_rng_driver);
197}
198
199static void __exit omap_rng_exit(void)
200{
201 driver_unregister(&omap_rng_driver);
202}
203
204module_init(omap_rng_init);
205module_exit(omap_rng_exit);
206
207MODULE_AUTHOR("Deepak Saxena (and others)");
208MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
new file mode 100644
index 000000000000..0e786b617bb8
--- /dev/null
+++ b/drivers/char/hw_random/via-rng.c
@@ -0,0 +1,183 @@
1/*
2 * RNG driver for VIA RNGs
3 *
4 * Copyright 2005 (c) MontaVista Software, Inc.
5 *
6 * with the majority of the code coming from:
7 *
8 * Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG)
9 * (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com>
10 *
11 * derived from
12 *
13 * Hardware driver for the AMD 768 Random Number Generator (RNG)
14 * (c) Copyright 2001 Red Hat Inc <alan@redhat.com>
15 *
16 * derived from
17 *
18 * Hardware driver for Intel i810 Random Number Generator (RNG)
19 * Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
20 * Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
21 *
22 * This file is licensed under the terms of the GNU General Public
23 * License version 2. This program is licensed "as is" without any
24 * warranty of any kind, whether express or implied.
25 */
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/pci.h>
30#include <linux/hw_random.h>
31#include <asm/io.h>
32#include <asm/msr.h>
33#include <asm/cpufeature.h>
34
35
36#define PFX KBUILD_MODNAME ": "
37
38
39enum {
40 VIA_STRFILT_CNT_SHIFT = 16,
41 VIA_STRFILT_FAIL = (1 << 15),
42 VIA_STRFILT_ENABLE = (1 << 14),
43 VIA_RAWBITS_ENABLE = (1 << 13),
44 VIA_RNG_ENABLE = (1 << 6),
45 VIA_XSTORE_CNT_MASK = 0x0F,
46
47 VIA_RNG_CHUNK_8 = 0x00, /* 64 rand bits, 64 stored bits */
48 VIA_RNG_CHUNK_4 = 0x01, /* 32 rand bits, 32 stored bits */
49 VIA_RNG_CHUNK_4_MASK = 0xFFFFFFFF,
50 VIA_RNG_CHUNK_2 = 0x02, /* 16 rand bits, 32 stored bits */
51 VIA_RNG_CHUNK_2_MASK = 0xFFFF,
52 VIA_RNG_CHUNK_1 = 0x03, /* 8 rand bits, 32 stored bits */
53 VIA_RNG_CHUNK_1_MASK = 0xFF,
54};
55
56/*
57 * Investigate using the 'rep' prefix to obtain 32 bits of random data
58 * in one insn. The upside is potentially better performance. The
59 * downside is that the instruction becomes no longer atomic. Due to
60 * this, just like familiar issues with /dev/random itself, the worst
61 * case of a 'rep xstore' could potentially pause a cpu for an
62 * unreasonably long time. In practice, this condition would likely
63 * only occur when the hardware is failing. (or so we hope :))
64 *
65 * Another possible performance boost may come from simply buffering
66 * until we have 4 bytes, thus returning a u32 at a time,
67 * instead of the current u8-at-a-time.
68 */
69
70static inline u32 xstore(u32 *addr, u32 edx_in)
71{
72 u32 eax_out;
73
74 asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */"
75 :"=m"(*addr), "=a"(eax_out)
76 :"D"(addr), "d"(edx_in));
77
78 return eax_out;
79}
80
81static int via_rng_data_present(struct hwrng *rng)
82{
83 u32 bytes_out;
84 u32 *via_rng_datum = (u32 *)(&rng->priv);
85
86 /* We choose the recommended 1-byte-per-instruction RNG rate,
87 * for greater randomness at the expense of speed. Larger
88 * values 2, 4, or 8 bytes-per-instruction yield greater
89 * speed at lesser randomness.
90 *
91 * If you change this to another VIA_CHUNK_n, you must also
92 * change the ->n_bytes values in rng_vendor_ops[] tables.
93 * VIA_CHUNK_8 requires further code changes.
94 *
95 * A copy of MSR_VIA_RNG is placed in eax_out when xstore
96 * completes.
97 */
98
99 *via_rng_datum = 0; /* paranoia, not really necessary */
100 bytes_out = xstore(via_rng_datum, VIA_RNG_CHUNK_1);
101 bytes_out &= VIA_XSTORE_CNT_MASK;
102 if (bytes_out == 0)
103 return 0;
104 return 1;
105}
106
107static int via_rng_data_read(struct hwrng *rng, u32 *data)
108{
109 u32 via_rng_datum = (u32)rng->priv;
110
111 *data = via_rng_datum;
112
113 return 1;
114}
115
116static int via_rng_init(struct hwrng *rng)
117{
118 u32 lo, hi, old_lo;
119
120 /* Control the RNG via MSR. Tread lightly and pay very close
121 * close attention to values written, as the reserved fields
122 * are documented to be "undefined and unpredictable"; but it
123 * does not say to write them as zero, so I make a guess that
124 * we restore the values we find in the register.
125 */
126 rdmsr(MSR_VIA_RNG, lo, hi);
127
128 old_lo = lo;
129 lo &= ~(0x7f << VIA_STRFILT_CNT_SHIFT);
130 lo &= ~VIA_XSTORE_CNT_MASK;
131 lo &= ~(VIA_STRFILT_ENABLE | VIA_STRFILT_FAIL | VIA_RAWBITS_ENABLE);
132 lo |= VIA_RNG_ENABLE;
133
134 if (lo != old_lo)
135 wrmsr(MSR_VIA_RNG, lo, hi);
136
137 /* perhaps-unnecessary sanity check; remove after testing if
138 unneeded */
139 rdmsr(MSR_VIA_RNG, lo, hi);
140 if ((lo & VIA_RNG_ENABLE) == 0) {
141 printk(KERN_ERR PFX "cannot enable VIA C3 RNG, aborting\n");
142 return -ENODEV;
143 }
144
145 return 0;
146}
147
148
149static struct hwrng via_rng = {
150 .name = "via",
151 .init = via_rng_init,
152 .data_present = via_rng_data_present,
153 .data_read = via_rng_data_read,
154};
155
156
157static int __init mod_init(void)
158{
159 int err;
160
161 if (!cpu_has_xstore)
162 return -ENODEV;
163 printk(KERN_INFO "VIA RNG detected\n");
164 err = hwrng_register(&via_rng);
165 if (err) {
166 printk(KERN_ERR PFX "RNG registering failed (%d)\n",
167 err);
168 goto out;
169 }
170out:
171 return err;
172}
173
174static void __exit mod_exit(void)
175{
176 hwrng_unregister(&via_rng);
177}
178
179subsys_initcall(mod_init);
180module_exit(mod_exit);
181
182MODULE_DESCRIPTION("H/W RNG driver for VIA chipsets");
183MODULE_LICENSE("GPL");
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 9f2f8fdec69a..23028559dbc4 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -936,11 +936,8 @@ int ipmi_set_gets_events(ipmi_user_t user, int val)
936 936
937 if (val) { 937 if (val) {
938 /* Deliver any queued events. */ 938 /* Deliver any queued events. */
939 list_for_each_entry_safe(msg, msg2, &intf->waiting_events, 939 list_for_each_entry_safe(msg, msg2, &intf->waiting_events, link)
940 link) { 940 list_move_tail(&msg->link, &msgs);
941 list_del(&msg->link);
942 list_add_tail(&msg->link, &msgs);
943 }
944 intf->waiting_events_count = 0; 941 intf->waiting_events_count = 0;
945 } 942 }
946 943
diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index edd996f6fb87..13e3126c1de5 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -151,6 +151,7 @@ unsigned char kbd_sysrq_xlate[KEY_MAX + 1] =
151 "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */ 151 "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */
152 "\r\000/"; /* 0x60 - 0x6f */ 152 "\r\000/"; /* 0x60 - 0x6f */
153static int sysrq_down; 153static int sysrq_down;
154static int sysrq_alt_use;
154#endif 155#endif
155static int sysrq_alt; 156static int sysrq_alt;
156 157
@@ -1143,7 +1144,7 @@ static void kbd_keycode(unsigned int keycode, int down,
1143 kbd = kbd_table + fg_console; 1144 kbd = kbd_table + fg_console;
1144 1145
1145 if (keycode == KEY_LEFTALT || keycode == KEY_RIGHTALT) 1146 if (keycode == KEY_LEFTALT || keycode == KEY_RIGHTALT)
1146 sysrq_alt = down; 1147 sysrq_alt = down ? keycode : 0;
1147#ifdef CONFIG_SPARC 1148#ifdef CONFIG_SPARC
1148 if (keycode == KEY_STOP) 1149 if (keycode == KEY_STOP)
1149 sparc_l1_a_state = down; 1150 sparc_l1_a_state = down;
@@ -1163,9 +1164,14 @@ static void kbd_keycode(unsigned int keycode, int down,
1163 1164
1164#ifdef CONFIG_MAGIC_SYSRQ /* Handle the SysRq Hack */ 1165#ifdef CONFIG_MAGIC_SYSRQ /* Handle the SysRq Hack */
1165 if (keycode == KEY_SYSRQ && (sysrq_down || (down == 1 && sysrq_alt))) { 1166 if (keycode == KEY_SYSRQ && (sysrq_down || (down == 1 && sysrq_alt))) {
1166 sysrq_down = down; 1167 if (!sysrq_down) {
1168 sysrq_down = down;
1169 sysrq_alt_use = sysrq_alt;
1170 }
1167 return; 1171 return;
1168 } 1172 }
1173 if (sysrq_down && !down && keycode == sysrq_alt_use)
1174 sysrq_down = 0;
1169 if (sysrq_down && down && !rep) { 1175 if (sysrq_down && down && !rep) {
1170 handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty); 1176 handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty);
1171 return; 1177 return;
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 6c94879e0b99..714d95ff2f1e 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -98,7 +98,22 @@
98#include <asm/system.h> 98#include <asm/system.h>
99#include <asm/uaccess.h> 99#include <asm/uaccess.h>
100 100
101#define MAX_NR_CON_DRIVER 16
101 102
103#define CON_DRIVER_FLAG_MODULE 1
104#define CON_DRIVER_FLAG_INIT 2
105
106struct con_driver {
107 const struct consw *con;
108 const char *desc;
109 struct class_device *class_dev;
110 int node;
111 int first;
112 int last;
113 int flag;
114};
115
116static struct con_driver registered_con_driver[MAX_NR_CON_DRIVER];
102const struct consw *conswitchp; 117const struct consw *conswitchp;
103 118
104/* A bitmap for codes <32. A bit of 1 indicates that the code 119/* A bitmap for codes <32. A bit of 1 indicates that the code
@@ -2557,7 +2572,7 @@ static int __init con_init(void)
2557{ 2572{
2558 const char *display_desc = NULL; 2573 const char *display_desc = NULL;
2559 struct vc_data *vc; 2574 struct vc_data *vc;
2560 unsigned int currcons = 0; 2575 unsigned int currcons = 0, i;
2561 2576
2562 acquire_console_sem(); 2577 acquire_console_sem();
2563 2578
@@ -2569,6 +2584,22 @@ static int __init con_init(void)
2569 return 0; 2584 return 0;
2570 } 2585 }
2571 2586
2587 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
2588 struct con_driver *con_driver = &registered_con_driver[i];
2589
2590 if (con_driver->con == NULL) {
2591 con_driver->con = conswitchp;
2592 con_driver->desc = display_desc;
2593 con_driver->flag = CON_DRIVER_FLAG_INIT;
2594 con_driver->first = 0;
2595 con_driver->last = MAX_NR_CONSOLES - 1;
2596 break;
2597 }
2598 }
2599
2600 for (i = 0; i < MAX_NR_CONSOLES; i++)
2601 con_driver_map[i] = conswitchp;
2602
2572 init_timer(&console_timer); 2603 init_timer(&console_timer);
2573 console_timer.function = blank_screen_t; 2604 console_timer.function = blank_screen_t;
2574 if (blankinterval) { 2605 if (blankinterval) {
@@ -2656,38 +2687,53 @@ int __init vty_init(void)
2656} 2687}
2657 2688
2658#ifndef VT_SINGLE_DRIVER 2689#ifndef VT_SINGLE_DRIVER
2690#include <linux/device.h>
2659 2691
2660/* 2692static struct class *vtconsole_class;
2661 * If we support more console drivers, this function is used
2662 * when a driver wants to take over some existing consoles
2663 * and become default driver for newly opened ones.
2664 */
2665 2693
2666int take_over_console(const struct consw *csw, int first, int last, int deflt) 2694static int bind_con_driver(const struct consw *csw, int first, int last,
2695 int deflt)
2667{ 2696{
2668 int i, j = -1; 2697 struct module *owner = csw->owner;
2669 const char *desc; 2698 const char *desc = NULL;
2670 struct module *owner; 2699 struct con_driver *con_driver;
2700 int i, j = -1, k = -1, retval = -ENODEV;
2671 2701
2672 owner = csw->owner;
2673 if (!try_module_get(owner)) 2702 if (!try_module_get(owner))
2674 return -ENODEV; 2703 return -ENODEV;
2675 2704
2676 acquire_console_sem(); 2705 acquire_console_sem();
2677 2706
2678 desc = csw->con_startup(); 2707 /* check if driver is registered */
2679 if (!desc) { 2708 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
2680 release_console_sem(); 2709 con_driver = &registered_con_driver[i];
2681 module_put(owner); 2710
2682 return -ENODEV; 2711 if (con_driver->con == csw) {
2712 desc = con_driver->desc;
2713 retval = 0;
2714 break;
2715 }
2716 }
2717
2718 if (retval)
2719 goto err;
2720
2721 if (!(con_driver->flag & CON_DRIVER_FLAG_INIT)) {
2722 csw->con_startup();
2723 con_driver->flag |= CON_DRIVER_FLAG_INIT;
2683 } 2724 }
2725
2684 if (deflt) { 2726 if (deflt) {
2685 if (conswitchp) 2727 if (conswitchp)
2686 module_put(conswitchp->owner); 2728 module_put(conswitchp->owner);
2729
2687 __module_get(owner); 2730 __module_get(owner);
2688 conswitchp = csw; 2731 conswitchp = csw;
2689 } 2732 }
2690 2733
2734 first = max(first, con_driver->first);
2735 last = min(last, con_driver->last);
2736
2691 for (i = first; i <= last; i++) { 2737 for (i = first; i <= last; i++) {
2692 int old_was_color; 2738 int old_was_color;
2693 struct vc_data *vc = vc_cons[i].d; 2739 struct vc_data *vc = vc_cons[i].d;
@@ -2701,15 +2747,17 @@ int take_over_console(const struct consw *csw, int first, int last, int deflt)
2701 continue; 2747 continue;
2702 2748
2703 j = i; 2749 j = i;
2704 if (CON_IS_VISIBLE(vc)) 2750
2751 if (CON_IS_VISIBLE(vc)) {
2752 k = i;
2705 save_screen(vc); 2753 save_screen(vc);
2754 }
2755
2706 old_was_color = vc->vc_can_do_color; 2756 old_was_color = vc->vc_can_do_color;
2707 vc->vc_sw->con_deinit(vc); 2757 vc->vc_sw->con_deinit(vc);
2708 vc->vc_origin = (unsigned long)vc->vc_screenbuf; 2758 vc->vc_origin = (unsigned long)vc->vc_screenbuf;
2709 vc->vc_visible_origin = vc->vc_origin;
2710 vc->vc_scr_end = vc->vc_origin + vc->vc_screenbuf_size;
2711 vc->vc_pos = vc->vc_origin + vc->vc_size_row * vc->vc_y + 2 * vc->vc_x;
2712 visual_init(vc, i, 0); 2759 visual_init(vc, i, 0);
2760 set_origin(vc);
2713 update_attr(vc); 2761 update_attr(vc);
2714 2762
2715 /* If the console changed between mono <-> color, then 2763 /* If the console changed between mono <-> color, then
@@ -2718,36 +2766,506 @@ int take_over_console(const struct consw *csw, int first, int last, int deflt)
2718 */ 2766 */
2719 if (old_was_color != vc->vc_can_do_color) 2767 if (old_was_color != vc->vc_can_do_color)
2720 clear_buffer_attributes(vc); 2768 clear_buffer_attributes(vc);
2721
2722 if (CON_IS_VISIBLE(vc))
2723 update_screen(vc);
2724 } 2769 }
2770
2725 printk("Console: switching "); 2771 printk("Console: switching ");
2726 if (!deflt) 2772 if (!deflt)
2727 printk("consoles %d-%d ", first+1, last+1); 2773 printk("consoles %d-%d ", first+1, last+1);
2728 if (j >= 0) 2774 if (j >= 0) {
2775 struct vc_data *vc = vc_cons[j].d;
2776
2729 printk("to %s %s %dx%d\n", 2777 printk("to %s %s %dx%d\n",
2730 vc_cons[j].d->vc_can_do_color ? "colour" : "mono", 2778 vc->vc_can_do_color ? "colour" : "mono",
2731 desc, vc_cons[j].d->vc_cols, vc_cons[j].d->vc_rows); 2779 desc, vc->vc_cols, vc->vc_rows);
2732 else 2780
2781 if (k >= 0) {
2782 vc = vc_cons[k].d;
2783 update_screen(vc);
2784 }
2785 } else
2733 printk("to %s\n", desc); 2786 printk("to %s\n", desc);
2734 2787
2788 retval = 0;
2789err:
2735 release_console_sem(); 2790 release_console_sem();
2791 module_put(owner);
2792 return retval;
2793};
2794
2795#ifdef CONFIG_VT_HW_CONSOLE_BINDING
2796static int con_is_graphics(const struct consw *csw, int first, int last)
2797{
2798 int i, retval = 0;
2799
2800 for (i = first; i <= last; i++) {
2801 struct vc_data *vc = vc_cons[i].d;
2802
2803 if (vc && vc->vc_mode == KD_GRAPHICS) {
2804 retval = 1;
2805 break;
2806 }
2807 }
2808
2809 return retval;
2810}
2811
2812static int unbind_con_driver(const struct consw *csw, int first, int last,
2813 int deflt)
2814{
2815 struct module *owner = csw->owner;
2816 const struct consw *defcsw = NULL;
2817 struct con_driver *con_driver = NULL, *con_back = NULL;
2818 int i, retval = -ENODEV;
2819
2820 if (!try_module_get(owner))
2821 return -ENODEV;
2822
2823 acquire_console_sem();
2824
2825 /* check if driver is registered and if it is unbindable */
2826 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
2827 con_driver = &registered_con_driver[i];
2828
2829 if (con_driver->con == csw &&
2830 con_driver->flag & CON_DRIVER_FLAG_MODULE) {
2831 retval = 0;
2832 break;
2833 }
2834 }
2835
2836 if (retval) {
2837 release_console_sem();
2838 goto err;
2839 }
2840
2841 retval = -ENODEV;
2842
2843 /* check if backup driver exists */
2844 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
2845 con_back = &registered_con_driver[i];
2846
2847 if (con_back->con &&
2848 !(con_back->flag & CON_DRIVER_FLAG_MODULE)) {
2849 defcsw = con_back->con;
2850 retval = 0;
2851 break;
2852 }
2853 }
2854
2855 if (retval) {
2856 release_console_sem();
2857 goto err;
2858 }
2859
2860 if (!con_is_bound(csw)) {
2861 release_console_sem();
2862 goto err;
2863 }
2864
2865 first = max(first, con_driver->first);
2866 last = min(last, con_driver->last);
2867
2868 for (i = first; i <= last; i++) {
2869 if (con_driver_map[i] == csw) {
2870 module_put(csw->owner);
2871 con_driver_map[i] = NULL;
2872 }
2873 }
2874
2875 if (!con_is_bound(defcsw)) {
2876 const struct consw *defconsw = conswitchp;
2877
2878 defcsw->con_startup();
2879 con_back->flag |= CON_DRIVER_FLAG_INIT;
2880 /*
2881 * vgacon may change the default driver to point
2882 * to dummycon, we restore it here...
2883 */
2884 conswitchp = defconsw;
2885 }
2886
2887 if (!con_is_bound(csw))
2888 con_driver->flag &= ~CON_DRIVER_FLAG_INIT;
2736 2889
2890 release_console_sem();
2891 /* ignore return value, binding should not fail */
2892 bind_con_driver(defcsw, first, last, deflt);
2893err:
2737 module_put(owner); 2894 module_put(owner);
2895 return retval;
2896
2897}
2898
2899static int vt_bind(struct con_driver *con)
2900{
2901 const struct consw *defcsw = NULL, *csw = NULL;
2902 int i, more = 1, first = -1, last = -1, deflt = 0;
2903
2904 if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE) ||
2905 con_is_graphics(con->con, con->first, con->last))
2906 goto err;
2907
2908 csw = con->con;
2909
2910 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
2911 struct con_driver *con = &registered_con_driver[i];
2912
2913 if (con->con && !(con->flag & CON_DRIVER_FLAG_MODULE)) {
2914 defcsw = con->con;
2915 break;
2916 }
2917 }
2918
2919 if (!defcsw)
2920 goto err;
2921
2922 while (more) {
2923 more = 0;
2924
2925 for (i = con->first; i <= con->last; i++) {
2926 if (con_driver_map[i] == defcsw) {
2927 if (first == -1)
2928 first = i;
2929 last = i;
2930 more = 1;
2931 } else if (first != -1)
2932 break;
2933 }
2934
2935 if (first == 0 && last == MAX_NR_CONSOLES -1)
2936 deflt = 1;
2937
2938 if (first != -1)
2939 bind_con_driver(csw, first, last, deflt);
2940
2941 first = -1;
2942 last = -1;
2943 deflt = 0;
2944 }
2945
2946err:
2738 return 0; 2947 return 0;
2739} 2948}
2740 2949
2741void give_up_console(const struct consw *csw) 2950static int vt_unbind(struct con_driver *con)
2951{
2952 const struct consw *csw = NULL;
2953 int i, more = 1, first = -1, last = -1, deflt = 0;
2954
2955 if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE) ||
2956 con_is_graphics(con->con, con->first, con->last))
2957 goto err;
2958
2959 csw = con->con;
2960
2961 while (more) {
2962 more = 0;
2963
2964 for (i = con->first; i <= con->last; i++) {
2965 if (con_driver_map[i] == csw) {
2966 if (first == -1)
2967 first = i;
2968 last = i;
2969 more = 1;
2970 } else if (first != -1)
2971 break;
2972 }
2973
2974 if (first == 0 && last == MAX_NR_CONSOLES -1)
2975 deflt = 1;
2976
2977 if (first != -1)
2978 unbind_con_driver(csw, first, last, deflt);
2979
2980 first = -1;
2981 last = -1;
2982 deflt = 0;
2983 }
2984
2985err:
2986 return 0;
2987}
2988#else
2989static inline int vt_bind(struct con_driver *con)
2990{
2991 return 0;
2992}
2993static inline int vt_unbind(struct con_driver *con)
2994{
2995 return 0;
2996}
2997#endif /* CONFIG_VT_HW_CONSOLE_BINDING */
2998
2999static ssize_t store_bind(struct class_device *class_device,
3000 const char *buf, size_t count)
3001{
3002 struct con_driver *con = class_get_devdata(class_device);
3003 int bind = simple_strtoul(buf, NULL, 0);
3004
3005 if (bind)
3006 vt_bind(con);
3007 else
3008 vt_unbind(con);
3009
3010 return count;
3011}
3012
3013static ssize_t show_bind(struct class_device *class_device, char *buf)
3014{
3015 struct con_driver *con = class_get_devdata(class_device);
3016 int bind = con_is_bound(con->con);
3017
3018 return snprintf(buf, PAGE_SIZE, "%i\n", bind);
3019}
3020
3021static ssize_t show_name(struct class_device *class_device, char *buf)
3022{
3023 struct con_driver *con = class_get_devdata(class_device);
3024
3025 return snprintf(buf, PAGE_SIZE, "%s %s\n",
3026 (con->flag & CON_DRIVER_FLAG_MODULE) ? "(M)" : "(S)",
3027 con->desc);
3028
3029}
3030
3031static struct class_device_attribute class_device_attrs[] = {
3032 __ATTR(bind, S_IRUGO|S_IWUSR, show_bind, store_bind),
3033 __ATTR(name, S_IRUGO, show_name, NULL),
3034};
3035
3036static int vtconsole_init_class_device(struct con_driver *con)
3037{
3038 int i;
3039
3040 class_set_devdata(con->class_dev, con);
3041 for (i = 0; i < ARRAY_SIZE(class_device_attrs); i++)
3042 class_device_create_file(con->class_dev,
3043 &class_device_attrs[i]);
3044
3045 return 0;
3046}
3047
3048static void vtconsole_deinit_class_device(struct con_driver *con)
2742{ 3049{
2743 int i; 3050 int i;
2744 3051
2745 for(i = 0; i < MAX_NR_CONSOLES; i++) 3052 for (i = 0; i < ARRAY_SIZE(class_device_attrs); i++)
3053 class_device_remove_file(con->class_dev,
3054 &class_device_attrs[i]);
3055}
3056
3057/**
3058 * con_is_bound - checks if driver is bound to the console
3059 * @csw: console driver
3060 *
3061 * RETURNS: zero if unbound, nonzero if bound
3062 *
3063 * Drivers can call this and if zero, they should release
3064 * all resources allocated on con_startup()
3065 */
3066int con_is_bound(const struct consw *csw)
3067{
3068 int i, bound = 0;
3069
3070 for (i = 0; i < MAX_NR_CONSOLES; i++) {
2746 if (con_driver_map[i] == csw) { 3071 if (con_driver_map[i] == csw) {
2747 module_put(csw->owner); 3072 bound = 1;
2748 con_driver_map[i] = NULL; 3073 break;
3074 }
3075 }
3076
3077 return bound;
3078}
3079EXPORT_SYMBOL(con_is_bound);
3080
3081/**
3082 * register_con_driver - register console driver to console layer
3083 * @csw: console driver
3084 * @first: the first console to take over, minimum value is 0
3085 * @last: the last console to take over, maximum value is MAX_NR_CONSOLES -1
3086 *
3087 * DESCRIPTION: This function registers a console driver which can later
3088 * bind to a range of consoles specified by @first and @last. It will
3089 * also initialize the console driver by calling con_startup().
3090 */
3091int register_con_driver(const struct consw *csw, int first, int last)
3092{
3093 struct module *owner = csw->owner;
3094 struct con_driver *con_driver;
3095 const char *desc;
3096 int i, retval = 0;
3097
3098 if (!try_module_get(owner))
3099 return -ENODEV;
3100
3101 acquire_console_sem();
3102
3103 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
3104 con_driver = &registered_con_driver[i];
3105
3106 /* already registered */
3107 if (con_driver->con == csw)
3108 retval = -EINVAL;
3109 }
3110
3111 if (retval)
3112 goto err;
3113
3114 desc = csw->con_startup();
3115
3116 if (!desc)
3117 goto err;
3118
3119 retval = -EINVAL;
3120
3121 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
3122 con_driver = &registered_con_driver[i];
3123
3124 if (con_driver->con == NULL) {
3125 con_driver->con = csw;
3126 con_driver->desc = desc;
3127 con_driver->node = i;
3128 con_driver->flag = CON_DRIVER_FLAG_MODULE |
3129 CON_DRIVER_FLAG_INIT;
3130 con_driver->first = first;
3131 con_driver->last = last;
3132 retval = 0;
3133 break;
3134 }
3135 }
3136
3137 if (retval)
3138 goto err;
3139
3140 con_driver->class_dev = class_device_create(vtconsole_class, NULL,
3141 MKDEV(0, con_driver->node),
3142 NULL, "vtcon%i",
3143 con_driver->node);
3144
3145 if (IS_ERR(con_driver->class_dev)) {
3146 printk(KERN_WARNING "Unable to create class_device for %s; "
3147 "errno = %ld\n", con_driver->desc,
3148 PTR_ERR(con_driver->class_dev));
3149 con_driver->class_dev = NULL;
3150 } else {
3151 vtconsole_init_class_device(con_driver);
3152 }
3153err:
3154 release_console_sem();
3155 module_put(owner);
3156 return retval;
3157}
3158EXPORT_SYMBOL(register_con_driver);
3159
3160/**
3161 * unregister_con_driver - unregister console driver from console layer
3162 * @csw: console driver
3163 *
3164 * DESCRIPTION: All drivers that registers to the console layer must
3165 * call this function upon exit, or if the console driver is in a state
3166 * where it won't be able to handle console services, such as the
3167 * framebuffer console without loaded framebuffer drivers.
3168 *
3169 * The driver must unbind first prior to unregistration.
3170 */
3171int unregister_con_driver(const struct consw *csw)
3172{
3173 int i, retval = -ENODEV;
3174
3175 acquire_console_sem();
3176
3177 /* cannot unregister a bound driver */
3178 if (con_is_bound(csw))
3179 goto err;
3180
3181 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
3182 struct con_driver *con_driver = &registered_con_driver[i];
3183
3184 if (con_driver->con == csw &&
3185 con_driver->flag & CON_DRIVER_FLAG_MODULE) {
3186 vtconsole_deinit_class_device(con_driver);
3187 class_device_destroy(vtconsole_class,
3188 MKDEV(0, con_driver->node));
3189 con_driver->con = NULL;
3190 con_driver->desc = NULL;
3191 con_driver->class_dev = NULL;
3192 con_driver->node = 0;
3193 con_driver->flag = 0;
3194 con_driver->first = 0;
3195 con_driver->last = 0;
3196 retval = 0;
3197 break;
3198 }
3199 }
3200err:
3201 release_console_sem();
3202 return retval;
3203}
3204EXPORT_SYMBOL(unregister_con_driver);
3205
3206/*
3207 * If we support more console drivers, this function is used
3208 * when a driver wants to take over some existing consoles
3209 * and become default driver for newly opened ones.
3210 *
3211 * take_over_console is basically a register followed by unbind
3212 */
3213int take_over_console(const struct consw *csw, int first, int last, int deflt)
3214{
3215 int err;
3216
3217 err = register_con_driver(csw, first, last);
3218
3219 if (!err)
3220 bind_con_driver(csw, first, last, deflt);
3221
3222 return err;
3223}
3224
3225/*
3226 * give_up_console is a wrapper to unregister_con_driver. It will only
3227 * work if driver is fully unbound.
3228 */
3229void give_up_console(const struct consw *csw)
3230{
3231 unregister_con_driver(csw);
3232}
3233
3234static int __init vtconsole_class_init(void)
3235{
3236 int i;
3237
3238 vtconsole_class = class_create(THIS_MODULE, "vtconsole");
3239 if (IS_ERR(vtconsole_class)) {
3240 printk(KERN_WARNING "Unable to create vt console class; "
3241 "errno = %ld\n", PTR_ERR(vtconsole_class));
3242 vtconsole_class = NULL;
3243 }
3244
3245 /* Add system drivers to sysfs */
3246 for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
3247 struct con_driver *con = &registered_con_driver[i];
3248
3249 if (con->con && !con->class_dev) {
3250 con->class_dev =
3251 class_device_create(vtconsole_class, NULL,
3252 MKDEV(0, con->node), NULL,
3253 "vtcon%i", con->node);
3254
3255 if (IS_ERR(con->class_dev)) {
3256 printk(KERN_WARNING "Unable to create "
3257 "class_device for %s; errno = %ld\n",
3258 con->desc, PTR_ERR(con->class_dev));
3259 con->class_dev = NULL;
3260 } else {
3261 vtconsole_init_class_device(con);
3262 }
2749 } 3263 }
3264 }
3265
3266 return 0;
2750} 3267}
3268postcore_initcall(vtconsole_class_init);
2751 3269
2752#endif 3270#endif
2753 3271
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
new file mode 100644
index 000000000000..a52225470225
--- /dev/null
+++ b/drivers/clocksource/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o
2obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
3obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
new file mode 100644
index 000000000000..7ad3be8c0f49
--- /dev/null
+++ b/drivers/clocksource/acpi_pm.c
@@ -0,0 +1,177 @@
1/*
2 * linux/drivers/clocksource/acpi_pm.c
3 *
4 * This file contains the ACPI PM based clocksource.
5 *
6 * This code was largely moved from the i386 timer_pm.c file
7 * which was (C) Dominik Brodowski <linux@brodo.de> 2003
8 * and contained the following comments:
9 *
10 * Driver to use the Power Management Timer (PMTMR) available in some
11 * southbridges as primary timing source for the Linux kernel.
12 *
13 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
14 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
15 *
16 * This file is licensed under the GPL v2.
17 */
18
19#include <linux/clocksource.h>
20#include <linux/errno.h>
21#include <linux/init.h>
22#include <linux/pci.h>
23#include <asm/io.h>
24
25/* Number of PMTMR ticks expected during calibration run */
26#define PMTMR_TICKS_PER_SEC 3579545
27
28/*
29 * The I/O port the PMTMR resides at.
30 * The location is detected during setup_arch(),
31 * in arch/i386/acpi/boot.c
32 */
33u32 pmtmr_ioport __read_mostly;
34
35#define ACPI_PM_MASK CLOCKSOURCE_MASK(24) /* limit it to 24 bits */
36
37static inline u32 read_pmtmr(void)
38{
39 /* mask the output to 24 bits */
40 return inl(pmtmr_ioport) & ACPI_PM_MASK;
41}
42
43static cycle_t acpi_pm_read_verified(void)
44{
45 u32 v1 = 0, v2 = 0, v3 = 0;
46
47 /*
48 * It has been reported that because of various broken
49 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM clock
50 * source is not latched, you must read it multiple
51 * times to ensure a safe value is read:
52 */
53 do {
54 v1 = read_pmtmr();
55 v2 = read_pmtmr();
56 v3 = read_pmtmr();
57 } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
58 || (v3 > v1 && v3 < v2));
59
60 return (cycle_t)v2;
61}
62
63static cycle_t acpi_pm_read(void)
64{
65 return (cycle_t)read_pmtmr();
66}
67
68static struct clocksource clocksource_acpi_pm = {
69 .name = "acpi_pm",
70 .rating = 200,
71 .read = acpi_pm_read,
72 .mask = (cycle_t)ACPI_PM_MASK,
73 .mult = 0, /*to be caluclated*/
74 .shift = 22,
75 .is_continuous = 1,
76};
77
78
79#ifdef CONFIG_PCI
80static int acpi_pm_good;
81static int __init acpi_pm_good_setup(char *__str)
82{
83 acpi_pm_good = 1;
84 return 1;
85}
86__setup("acpi_pm_good", acpi_pm_good_setup);
87
88static inline void acpi_pm_need_workaround(void)
89{
90 clocksource_acpi_pm.read = acpi_pm_read_verified;
91 clocksource_acpi_pm.rating = 110;
92}
93
94/*
95 * PIIX4 Errata:
96 *
97 * The power management timer may return improper results when read.
98 * Although the timer value settles properly after incrementing,
99 * while incrementing there is a 3 ns window every 69.8 ns where the
100 * timer value is indeterminate (a 4.2% chance that the data will be
101 * incorrect when read). As a result, the ACPI free running count up
102 * timer specification is violated due to erroneous reads.
103 */
104static void __devinit acpi_pm_check_blacklist(struct pci_dev *dev)
105{
106 u8 rev;
107
108 if (acpi_pm_good)
109 return;
110
111 pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
112 /* the bug has been fixed in PIIX4M */
113 if (rev < 3) {
114 printk(KERN_WARNING "* Found PM-Timer Bug on the chipset."
115 " Due to workarounds for a bug,\n"
116 "* this clock source is slow. Consider trying"
117 " other clock sources\n");
118
119 acpi_pm_need_workaround();
120 }
121}
122DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3,
123 acpi_pm_check_blacklist);
124
125static void __devinit acpi_pm_check_graylist(struct pci_dev *dev)
126{
127 if (acpi_pm_good)
128 return;
129
130 printk(KERN_WARNING "* The chipset may have PM-Timer Bug. Due to"
131 " workarounds for a bug,\n"
132 "* this clock source is slow. If you are sure your timer"
133 " does not have\n"
134 "* this bug, please use \"acpi_pm_good\" to disable the"
135 " workaround\n");
136
137 acpi_pm_need_workaround();
138}
139DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0,
140 acpi_pm_check_graylist);
141#endif
142
143
144static int __init init_acpi_pm_clocksource(void)
145{
146 u32 value1, value2;
147 unsigned int i;
148
149 if (!pmtmr_ioport)
150 return -ENODEV;
151
152 clocksource_acpi_pm.mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC,
153 clocksource_acpi_pm.shift);
154
155 /* "verify" this timing source: */
156 value1 = read_pmtmr();
157 for (i = 0; i < 10000; i++) {
158 value2 = read_pmtmr();
159 if (value2 == value1)
160 continue;
161 if (value2 > value1)
162 goto pm_good;
163 if ((value2 < value1) && ((value2) < 0xFFF))
164 goto pm_good;
165 printk(KERN_INFO "PM-Timer had inconsistent results:"
166 " 0x%#x, 0x%#x - aborting.\n", value1, value2);
167 return -EINVAL;
168 }
169 printk(KERN_INFO "PM-Timer had no reasonable result:"
170 " 0x%#x - aborting.\n", value1);
171 return -ENODEV;
172
173pm_good:
174 return clocksource_register(&clocksource_acpi_pm);
175}
176
177module_init(init_acpi_pm_clocksource);
diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c
new file mode 100644
index 000000000000..bf4d3d50d1c4
--- /dev/null
+++ b/drivers/clocksource/cyclone.c
@@ -0,0 +1,119 @@
1#include <linux/clocksource.h>
2#include <linux/string.h>
3#include <linux/errno.h>
4#include <linux/timex.h>
5#include <linux/init.h>
6
7#include <asm/pgtable.h>
8#include <asm/io.h>
9
10#include "mach_timer.h"
11
12#define CYCLONE_CBAR_ADDR 0xFEB00CD0 /* base address ptr */
13#define CYCLONE_PMCC_OFFSET 0x51A0 /* offset to control register */
14#define CYCLONE_MPCS_OFFSET 0x51A8 /* offset to select register */
15#define CYCLONE_MPMC_OFFSET 0x51D0 /* offset to count register */
16#define CYCLONE_TIMER_FREQ 99780000 /* 100Mhz, but not really */
17#define CYCLONE_TIMER_MASK CLOCKSOURCE_MASK(32) /* 32 bit mask */
18
19int use_cyclone = 0;
20static void __iomem *cyclone_ptr;
21
22static cycle_t read_cyclone(void)
23{
24 return (cycle_t)readl(cyclone_ptr);
25}
26
27static struct clocksource clocksource_cyclone = {
28 .name = "cyclone",
29 .rating = 250,
30 .read = read_cyclone,
31 .mask = CYCLONE_TIMER_MASK,
32 .mult = 10,
33 .shift = 0,
34 .is_continuous = 1,
35};
36
37static int __init init_cyclone_clocksource(void)
38{
39 unsigned long base; /* saved value from CBAR */
40 unsigned long offset;
41 u32 __iomem* volatile cyclone_timer; /* Cyclone MPMC0 register */
42 u32 __iomem* reg;
43 int i;
44
45 /* make sure we're on a summit box: */
46 if (!use_cyclone)
47 return -ENODEV;
48
49 printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
50
51 /* find base address: */
52 offset = CYCLONE_CBAR_ADDR;
53 reg = ioremap_nocache(offset, sizeof(reg));
54 if (!reg) {
55 printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
56 return -ENODEV;
57 }
58 /* even on 64bit systems, this is only 32bits: */
59 base = readl(reg);
60 if (!base) {
61 printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
62 return -ENODEV;
63 }
64 iounmap(reg);
65
66 /* setup PMCC: */
67 offset = base + CYCLONE_PMCC_OFFSET;
68 reg = ioremap_nocache(offset, sizeof(reg));
69 if (!reg) {
70 printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
71 return -ENODEV;
72 }
73 writel(0x00000001,reg);
74 iounmap(reg);
75
76 /* setup MPCS: */
77 offset = base + CYCLONE_MPCS_OFFSET;
78 reg = ioremap_nocache(offset, sizeof(reg));
79 if (!reg) {
80 printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
81 return -ENODEV;
82 }
83 writel(0x00000001,reg);
84 iounmap(reg);
85
86 /* map in cyclone_timer: */
87 offset = base + CYCLONE_MPMC_OFFSET;
88 cyclone_timer = ioremap_nocache(offset, sizeof(u64));
89 if (!cyclone_timer) {
90 printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
91 return -ENODEV;
92 }
93
94 /* quick test to make sure its ticking: */
95 for (i = 0; i < 3; i++){
96 u32 old = readl(cyclone_timer);
97 int stall = 100;
98
99 while (stall--)
100 barrier();
101
102 if (readl(cyclone_timer) == old) {
103 printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
104 iounmap(cyclone_timer);
105 cyclone_timer = NULL;
106 return -ENODEV;
107 }
108 }
109 cyclone_ptr = cyclone_timer;
110
111 /* sort out mult/shift values: */
112 clocksource_cyclone.shift = 22;
113 clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ,
114 clocksource_cyclone.shift);
115
116 return clocksource_register(&clocksource_cyclone);
117}
118
119module_init(init_cyclone_clocksource);
diff --git a/drivers/clocksource/scx200_hrt.c b/drivers/clocksource/scx200_hrt.c
new file mode 100644
index 000000000000..d418b8297211
--- /dev/null
+++ b/drivers/clocksource/scx200_hrt.c
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 2006 Jim Cromie
3 *
4 * This is a clocksource driver for the Geode SCx200's 1 or 27 MHz
5 * high-resolution timer. The Geode SC-1100 (at least) has a buggy
6 * time stamp counter (TSC), which loses time unless 'idle=poll' is
7 * given as a boot-arg. In its absence, the Generic Timekeeping code
8 * will detect and de-rate the bad TSC, allowing this timer to take
9 * over timekeeping duties.
10 *
11 * Based on work by John Stultz, and Ted Phelps (in a 2.6.12-rc6 patch)
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License as
15 * published by the Free Software Foundation; either version 2 of the
16 * License, or (at your option) any later version.
17 */
18
19#include <linux/clocksource.h>
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/ioport.h>
23#include <linux/scx200.h>
24
25#define NAME "scx200_hrt"
26
27static int mhz27;
28module_param(mhz27, int, 0); /* load time only */
29MODULE_PARM_DESC(mhz27, "count at 27.0 MHz (default is 1.0 MHz)");
30
31static int ppm;
32module_param(ppm, int, 0); /* load time only */
33MODULE_PARM_DESC(ppm, "+-adjust to actual XO freq (ppm)");
34
35/* HiRes Timer configuration register address */
36#define SCx200_TMCNFG_OFFSET (SCx200_TIMER_OFFSET + 5)
37
38/* and config settings */
39#define HR_TMEN (1 << 0) /* timer interrupt enable */
40#define HR_TMCLKSEL (1 << 1) /* 1|0 counts at 27|1 MHz */
41#define HR_TM27MPD (1 << 2) /* 1 turns off input clock (power-down) */
42
43/* The base timer frequency, * 27 if selected */
44#define HRT_FREQ 1000000
45
46static cycle_t read_hrt(void)
47{
48 /* Read the timer value */
49 return (cycle_t) inl(scx200_cb_base + SCx200_TIMER_OFFSET);
50}
51
52#define HRT_SHIFT_1 22
53#define HRT_SHIFT_27 26
54
55static struct clocksource cs_hrt = {
56 .name = "scx200_hrt",
57 .rating = 250,
58 .read = read_hrt,
59 .mask = CLOCKSOURCE_MASK(32),
60 .is_continuous = 1,
61 /* mult, shift are set based on mhz27 flag */
62};
63
64static int __init init_hrt_clocksource(void)
65{
66 /* Make sure scx200 has initializedd the configuration block */
67 if (!scx200_cb_present())
68 return -ENODEV;
69
70 /* Reserve the timer's ISA io-region for ourselves */
71 if (!request_region(scx200_cb_base + SCx200_TIMER_OFFSET,
72 SCx200_TIMER_SIZE,
73 "NatSemi SCx200 High-Resolution Timer")) {
74 printk(KERN_WARNING NAME ": unable to lock timer region\n");
75 return -ENODEV;
76 }
77
78 /* write timer config */
79 outb(HR_TMEN | (mhz27) ? HR_TMCLKSEL : 0,
80 scx200_cb_base + SCx200_TMCNFG_OFFSET);
81
82 if (mhz27) {
83 cs_hrt.shift = HRT_SHIFT_27;
84 cs_hrt.mult = clocksource_hz2mult((HRT_FREQ + ppm) * 27,
85 cs_hrt.shift);
86 } else {
87 cs_hrt.shift = HRT_SHIFT_1;
88 cs_hrt.mult = clocksource_hz2mult(HRT_FREQ + ppm,
89 cs_hrt.shift);
90 }
91 printk(KERN_INFO "enabling scx200 high-res timer (%s MHz +%d ppm)\n",
92 mhz27 ? "27":"1", ppm);
93
94 return clocksource_register(&cs_hrt);
95}
96
97module_init(init_hrt_clocksource);
98
99MODULE_AUTHOR("Jim Cromie <jim.cromie@gmail.com>");
100MODULE_DESCRIPTION("clocksource on SCx200 HiRes Timer");
101MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index 0fdf7fbd6495..2801d14a5e42 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -824,10 +824,9 @@ static int __init ioat_init_module(void)
824{ 824{
825 /* it's currently unsafe to unload this module */ 825 /* it's currently unsafe to unload this module */
826 /* if forced, worst case is that rmmod hangs */ 826 /* if forced, worst case is that rmmod hangs */
827 if (THIS_MODULE != NULL) 827 __unsafe(THIS_MODULE);
828 THIS_MODULE->unsafe = 1;
829 828
830 return pci_module_init(&ioat_pci_drv); 829 pci_module_init(&ioat_pci_drv);
831} 830}
832 831
833module_init(ioat_init_module); 832module_init(ioat_init_module);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 622a55c72f03..d2428cef1598 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -959,7 +959,7 @@ static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
959 printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name); 959 printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name);
960 SELECT_DRIVE(drive); 960 SELECT_DRIVE(drive);
961 HWIF(drive)->OUTB(8, HWIF(drive)->io_ports[IDE_CONTROL_OFFSET]); 961 HWIF(drive)->OUTB(8, HWIF(drive)->io_ports[IDE_CONTROL_OFFSET]);
962 rc = ide_wait_not_busy(HWIF(drive), 10000); 962 rc = ide_wait_not_busy(HWIF(drive), 100000);
963 if (rc) 963 if (rc)
964 printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name); 964 printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);
965 } 965 }
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 16a143133f93..7ddb11828731 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -485,7 +485,7 @@ static u8 ide_dump_ata_status(ide_drive_t *drive, const char *msg, u8 stat)
485 unsigned long flags; 485 unsigned long flags;
486 u8 err = 0; 486 u8 err = 0;
487 487
488 local_irq_set(flags); 488 local_irq_save(flags);
489 printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); 489 printk("%s: %s: status=0x%02x { ", drive->name, msg, stat);
490 if (stat & BUSY_STAT) 490 if (stat & BUSY_STAT)
491 printk("Busy "); 491 printk("Busy ");
@@ -567,7 +567,7 @@ static u8 ide_dump_atapi_status(ide_drive_t *drive, const char *msg, u8 stat)
567 567
568 status.all = stat; 568 status.all = stat;
569 error.all = 0; 569 error.all = 0;
570 local_irq_set(flags); 570 local_irq_save(flags);
571 printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); 571 printk("%s: %s: status=0x%02x { ", drive->name, msg, stat);
572 if (status.b.bsy) 572 if (status.b.bsy)
573 printk("Busy "); 573 printk("Busy ");
diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h
index 2fcfac6e967a..c0864b1e9228 100644
--- a/drivers/ide/ide-timing.h
+++ b/drivers/ide/ide-timing.h
@@ -220,6 +220,12 @@ static int ide_timing_compute(ide_drive_t *drive, short speed, struct ide_timing
220 return -EINVAL; 220 return -EINVAL;
221 221
222/* 222/*
223 * Copy the timing from the table.
224 */
225
226 *t = *s;
227
228/*
223 * If the drive is an EIDE drive, it can tell us it needs extended 229 * If the drive is an EIDE drive, it can tell us it needs extended
224 * PIO/MWDMA cycle timing. 230 * PIO/MWDMA cycle timing.
225 */ 231 */
@@ -247,7 +253,7 @@ static int ide_timing_compute(ide_drive_t *drive, short speed, struct ide_timing
247 * Convert the timing to bus clock counts. 253 * Convert the timing to bus clock counts.
248 */ 254 */
249 255
250 ide_timing_quantize(s, t, T, UT); 256 ide_timing_quantize(t, t, T, UT);
251 257
252/* 258/*
253 * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, S.M.A.R.T 259 * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, S.M.A.R.T
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index 7ce5bf783688..22d17548ecdb 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -370,7 +370,6 @@ chipset_is_set:
370 if (!(speed)) { 370 if (!(speed)) {
371 /* restore original pci-config space */ 371 /* restore original pci-config space */
372 pci_write_config_dword(dev, drive_pci, drive_conf); 372 pci_write_config_dword(dev, drive_pci, drive_conf);
373 hwif->tuneproc(drive, 5);
374 return 0; 373 return 0;
375 } 374 }
376 375
@@ -415,8 +414,6 @@ static void pdc202xx_old_ide_dma_start(ide_drive_t *drive)
415 if (drive->addressing == 1) { 414 if (drive->addressing == 1) {
416 struct request *rq = HWGROUP(drive)->rq; 415 struct request *rq = HWGROUP(drive)->rq;
417 ide_hwif_t *hwif = HWIF(drive); 416 ide_hwif_t *hwif = HWIF(drive);
418// struct pci_dev *dev = hwif->pci_dev;
419// unsgned long high_16 = pci_resource_start(dev, 4);
420 unsigned long high_16 = hwif->dma_master; 417 unsigned long high_16 = hwif->dma_master;
421 unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20); 418 unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20);
422 u32 word_count = 0; 419 u32 word_count = 0;
@@ -436,7 +433,6 @@ static int pdc202xx_old_ide_dma_end(ide_drive_t *drive)
436{ 433{
437 if (drive->addressing == 1) { 434 if (drive->addressing == 1) {
438 ide_hwif_t *hwif = HWIF(drive); 435 ide_hwif_t *hwif = HWIF(drive);
439// unsigned long high_16 = pci_resource_start(hwif->pci_dev, 4);
440 unsigned long high_16 = hwif->dma_master; 436 unsigned long high_16 = hwif->dma_master;
441 unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20); 437 unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20);
442 u8 clock = 0; 438 u8 clock = 0;
@@ -453,8 +449,6 @@ static int pdc202xx_old_ide_dma_end(ide_drive_t *drive)
453static int pdc202xx_old_ide_dma_test_irq(ide_drive_t *drive) 449static int pdc202xx_old_ide_dma_test_irq(ide_drive_t *drive)
454{ 450{
455 ide_hwif_t *hwif = HWIF(drive); 451 ide_hwif_t *hwif = HWIF(drive);
456// struct pci_dev *dev = hwif->pci_dev;
457// unsigned long high_16 = pci_resource_start(dev, 4);
458 unsigned long high_16 = hwif->dma_master; 452 unsigned long high_16 = hwif->dma_master;
459 u8 dma_stat = hwif->INB(hwif->dma_status); 453 u8 dma_stat = hwif->INB(hwif->dma_status);
460 u8 sc1d = hwif->INB((high_16 + 0x001d)); 454 u8 sc1d = hwif->INB((high_16 + 0x001d));
@@ -492,12 +486,7 @@ static int pdc202xx_ide_dma_timeout(ide_drive_t *drive)
492 486
493static void pdc202xx_reset_host (ide_hwif_t *hwif) 487static void pdc202xx_reset_host (ide_hwif_t *hwif)
494{ 488{
495#ifdef CONFIG_BLK_DEV_IDEDMA
496// unsigned long high_16 = hwif->dma_base - (8*(hwif->channel));
497 unsigned long high_16 = hwif->dma_master; 489 unsigned long high_16 = hwif->dma_master;
498#else /* !CONFIG_BLK_DEV_IDEDMA */
499 unsigned long high_16 = pci_resource_start(hwif->pci_dev, 4);
500#endif /* CONFIG_BLK_DEV_IDEDMA */
501 u8 udma_speed_flag = hwif->INB(high_16|0x001f); 490 u8 udma_speed_flag = hwif->INB(high_16|0x001f);
502 491
503 hwif->OUTB((udma_speed_flag | 0x10), (high_16|0x001f)); 492 hwif->OUTB((udma_speed_flag | 0x10), (high_16|0x001f));
@@ -550,31 +539,6 @@ static void pdc202xx_reset (ide_drive_t *drive)
550#endif 539#endif
551} 540}
552 541
553/*
554 * Since SUN Cobalt is attempting to do this operation, I should disclose
555 * this has been a long time ago Thu Jul 27 16:40:57 2000 was the patch date
556 * HOTSWAP ATA Infrastructure.
557 */
558static int pdc202xx_tristate (ide_drive_t * drive, int state)
559{
560 ide_hwif_t *hwif = HWIF(drive);
561// unsigned long high_16 = hwif->dma_base - (8*(hwif->channel));
562 unsigned long high_16 = hwif->dma_master;
563 u8 sc1f = hwif->INB(high_16|0x001f);
564
565 if (!hwif)
566 return -EINVAL;
567
568// hwif->bus_state = state;
569
570 if (state) {
571 hwif->OUTB(sc1f | 0x08, (high_16|0x001f));
572 } else {
573 hwif->OUTB(sc1f & ~0x08, (high_16|0x001f));
574 }
575 return 0;
576}
577
578static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev, const char *name) 542static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev, const char *name)
579{ 543{
580 if (dev->resource[PCI_ROM_RESOURCE].start) { 544 if (dev->resource[PCI_ROM_RESOURCE].start) {
@@ -624,10 +588,8 @@ static void __devinit init_hwif_pdc202xx(ide_hwif_t *hwif)
624 hwif->tuneproc = &config_chipset_for_pio; 588 hwif->tuneproc = &config_chipset_for_pio;
625 hwif->quirkproc = &pdc202xx_quirkproc; 589 hwif->quirkproc = &pdc202xx_quirkproc;
626 590
627 if (hwif->pci_dev->device != PCI_DEVICE_ID_PROMISE_20246) { 591 if (hwif->pci_dev->device != PCI_DEVICE_ID_PROMISE_20246)
628 hwif->busproc = &pdc202xx_tristate;
629 hwif->resetproc = &pdc202xx_reset; 592 hwif->resetproc = &pdc202xx_reset;
630 }
631 593
632 hwif->speedproc = &pdc202xx_tune_chipset; 594 hwif->speedproc = &pdc202xx_tune_chipset;
633 595
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index e9b83e1a3028..7fac6f57b5d6 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -222,6 +222,8 @@ static void piix_tune_drive (ide_drive_t *drive, u8 pio)
222 unsigned long flags; 222 unsigned long flags;
223 u16 master_data; 223 u16 master_data;
224 u8 slave_data; 224 u8 slave_data;
225 static DEFINE_SPINLOCK(tune_lock);
226
225 /* ISP RTC */ 227 /* ISP RTC */
226 u8 timings[][2] = { { 0, 0 }, 228 u8 timings[][2] = { { 0, 0 },
227 { 0, 0 }, 229 { 0, 0 },
@@ -230,7 +232,13 @@ static void piix_tune_drive (ide_drive_t *drive, u8 pio)
230 { 2, 3 }, }; 232 { 2, 3 }, };
231 233
232 pio = ide_get_best_pio_mode(drive, pio, 5, NULL); 234 pio = ide_get_best_pio_mode(drive, pio, 5, NULL);
233 spin_lock_irqsave(&ide_lock, flags); 235
236 /*
237 * Master vs slave is synchronized above us but the slave register is
238 * shared by the two hwifs so the corner case of two slave timeouts in
239 * parallel must be locked.
240 */
241 spin_lock_irqsave(&tune_lock, flags);
234 pci_read_config_word(dev, master_port, &master_data); 242 pci_read_config_word(dev, master_port, &master_data);
235 if (is_slave) { 243 if (is_slave) {
236 master_data = master_data | 0x4000; 244 master_data = master_data | 0x4000;
@@ -250,7 +258,7 @@ static void piix_tune_drive (ide_drive_t *drive, u8 pio)
250 pci_write_config_word(dev, master_port, master_data); 258 pci_write_config_word(dev, master_port, master_data);
251 if (is_slave) 259 if (is_slave)
252 pci_write_config_byte(dev, slave_port, slave_data); 260 pci_write_config_byte(dev, slave_port, slave_data);
253 spin_unlock_irqrestore(&ide_lock, flags); 261 spin_unlock_irqrestore(&tune_lock, flags);
254} 262}
255 263
256/** 264/**
diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index 5bda15904a08..2d5b57be98c3 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -1074,8 +1074,7 @@ static inline int update_partial_datagram(struct list_head *pdgl, struct list_he
1074 1074
1075 /* Move list entry to beginnig of list so that oldest partial 1075 /* Move list entry to beginnig of list so that oldest partial
1076 * datagrams percolate to the end of the list */ 1076 * datagrams percolate to the end of the list */
1077 list_del(lh); 1077 list_move(lh, pdgl);
1078 list_add(lh, pdgl);
1079 1078
1080 return 0; 1079 return 0;
1081} 1080}
diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c
index 20ce539580f1..571ea68c0cf2 100644
--- a/drivers/ieee1394/raw1394.c
+++ b/drivers/ieee1394/raw1394.c
@@ -132,8 +132,7 @@ static void free_pending_request(struct pending_request *req)
132static void __queue_complete_req(struct pending_request *req) 132static void __queue_complete_req(struct pending_request *req)
133{ 133{
134 struct file_info *fi = req->file_info; 134 struct file_info *fi = req->file_info;
135 list_del(&req->list); 135 list_move_tail(&req->list, &fi->req_complete);
136 list_add_tail(&req->list, &fi->req_complete);
137 136
138 up(&fi->complete_sem); 137 up(&fi->complete_sem);
139 wake_up_interruptible(&fi->poll_wait_complete); 138 wake_up_interruptible(&fi->poll_wait_complete);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b38e02a5db35..5ed4dab52a6f 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1775,11 +1775,9 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
1775void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) 1775void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1776{ 1776{
1777 mad_send_wr->timeout = 0; 1777 mad_send_wr->timeout = 0;
1778 if (mad_send_wr->refcount == 1) { 1778 if (mad_send_wr->refcount == 1)
1779 list_del(&mad_send_wr->agent_list); 1779 list_move_tail(&mad_send_wr->agent_list,
1780 list_add_tail(&mad_send_wr->agent_list,
1781 &mad_send_wr->mad_agent_priv->done_list); 1780 &mad_send_wr->mad_agent_priv->done_list);
1782 }
1783} 1781}
1784 1782
1785static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, 1783static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
@@ -2098,8 +2096,7 @@ retry:
2098 queued_send_wr = container_of(mad_list, 2096 queued_send_wr = container_of(mad_list,
2099 struct ib_mad_send_wr_private, 2097 struct ib_mad_send_wr_private,
2100 mad_list); 2098 mad_list);
2101 list_del(&mad_list->list); 2099 list_move_tail(&mad_list->list, &send_queue->list);
2102 list_add_tail(&mad_list->list, &send_queue->list);
2103 } 2100 }
2104 spin_unlock_irqrestore(&send_queue->lock, flags); 2101 spin_unlock_irqrestore(&send_queue->lock, flags);
2105 2102
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index d4704e054e30..ebcd5b181770 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -665,8 +665,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
665 goto out; 665 goto out;
666 666
667 mad_send_wr->refcount++; 667 mad_send_wr->refcount++;
668 list_del(&mad_send_wr->agent_list); 668 list_move_tail(&mad_send_wr->agent_list,
669 list_add_tail(&mad_send_wr->agent_list,
670 &mad_send_wr->mad_agent_priv->send_list); 669 &mad_send_wr->mad_agent_priv->send_list);
671 } 670 }
672out: 671out:
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 216471fa01cc..ab40488182b3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -864,8 +864,7 @@ void ipoib_mcast_restart_task(void *dev_ptr)
864 864
865 if (mcast) { 865 if (mcast) {
866 /* Destroy the send only entry */ 866 /* Destroy the send only entry */
867 list_del(&mcast->list); 867 list_move_tail(&mcast->list, &remove_list);
868 list_add_tail(&mcast->list, &remove_list);
869 868
870 rb_replace_node(&mcast->rb_node, 869 rb_replace_node(&mcast->rb_node,
871 &nmcast->rb_node, 870 &nmcast->rb_node,
@@ -890,8 +889,7 @@ void ipoib_mcast_restart_task(void *dev_ptr)
890 rb_erase(&mcast->rb_node, &priv->multicast_tree); 889 rb_erase(&mcast->rb_node, &priv->multicast_tree);
891 890
892 /* Move to the remove list */ 891 /* Move to the remove list */
893 list_del(&mcast->list); 892 list_move_tail(&mcast->list, &remove_list);
894 list_add_tail(&mcast->list, &remove_list);
895 } 893 }
896 } 894 }
897 895
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 173c899a1fb4..2e541fa02024 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -87,6 +87,11 @@ struct capincci;
87#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE 87#ifdef CONFIG_ISDN_CAPI_MIDDLEWARE
88struct capiminor; 88struct capiminor;
89 89
90struct datahandle_queue {
91 struct list_head list;
92 u16 datahandle;
93};
94
90struct capiminor { 95struct capiminor {
91 struct list_head list; 96 struct list_head list;
92 struct capincci *nccip; 97 struct capincci *nccip;
@@ -109,12 +114,9 @@ struct capiminor {
109 int outbytes; 114 int outbytes;
110 115
111 /* transmit path */ 116 /* transmit path */
112 struct datahandle_queue { 117 struct list_head ackqueue;
113 struct datahandle_queue *next;
114 u16 datahandle;
115 } *ackqueue;
116 int nack; 118 int nack;
117 119 spinlock_t ackqlock;
118}; 120};
119#endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */ 121#endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
120 122
@@ -156,48 +158,54 @@ static LIST_HEAD(capiminor_list);
156 158
157static int capincci_add_ack(struct capiminor *mp, u16 datahandle) 159static int capincci_add_ack(struct capiminor *mp, u16 datahandle)
158{ 160{
159 struct datahandle_queue *n, **pp; 161 struct datahandle_queue *n;
162 unsigned long flags;
160 163
161 n = kmalloc(sizeof(*n), GFP_ATOMIC); 164 n = kmalloc(sizeof(*n), GFP_ATOMIC);
162 if (!n) { 165 if (unlikely(!n)) {
163 printk(KERN_ERR "capi: alloc datahandle failed\n"); 166 printk(KERN_ERR "capi: alloc datahandle failed\n");
164 return -1; 167 return -1;
165 } 168 }
166 n->next = NULL;
167 n->datahandle = datahandle; 169 n->datahandle = datahandle;
168 for (pp = &mp->ackqueue; *pp; pp = &(*pp)->next) ; 170 INIT_LIST_HEAD(&n->list);
169 *pp = n; 171 spin_lock_irqsave(&mp->ackqlock, flags);
172 list_add_tail(&n->list, &mp->ackqueue);
170 mp->nack++; 173 mp->nack++;
174 spin_unlock_irqrestore(&mp->ackqlock, flags);
171 return 0; 175 return 0;
172} 176}
173 177
174static int capiminor_del_ack(struct capiminor *mp, u16 datahandle) 178static int capiminor_del_ack(struct capiminor *mp, u16 datahandle)
175{ 179{
176 struct datahandle_queue **pp, *p; 180 struct datahandle_queue *p, *tmp;
181 unsigned long flags;
177 182
178 for (pp = &mp->ackqueue; *pp; pp = &(*pp)->next) { 183 spin_lock_irqsave(&mp->ackqlock, flags);
179 if ((*pp)->datahandle == datahandle) { 184 list_for_each_entry_safe(p, tmp, &mp->ackqueue, list) {
180 p = *pp; 185 if (p->datahandle == datahandle) {
181 *pp = (*pp)->next; 186 list_del(&p->list);
182 kfree(p); 187 kfree(p);
183 mp->nack--; 188 mp->nack--;
189 spin_unlock_irqrestore(&mp->ackqlock, flags);
184 return 0; 190 return 0;
185 } 191 }
186 } 192 }
193 spin_unlock_irqrestore(&mp->ackqlock, flags);
187 return -1; 194 return -1;
188} 195}
189 196
190static void capiminor_del_all_ack(struct capiminor *mp) 197static void capiminor_del_all_ack(struct capiminor *mp)
191{ 198{
192 struct datahandle_queue **pp, *p; 199 struct datahandle_queue *p, *tmp;
200 unsigned long flags;
193 201
194 pp = &mp->ackqueue; 202 spin_lock_irqsave(&mp->ackqlock, flags);
195 while (*pp) { 203 list_for_each_entry_safe(p, tmp, &mp->ackqueue, list) {
196 p = *pp; 204 list_del(&p->list);
197 *pp = (*pp)->next;
198 kfree(p); 205 kfree(p);
199 mp->nack--; 206 mp->nack--;
200 } 207 }
208 spin_unlock_irqrestore(&mp->ackqlock, flags);
201} 209}
202 210
203 211
@@ -220,6 +228,8 @@ static struct capiminor *capiminor_alloc(struct capi20_appl *ap, u32 ncci)
220 mp->ncci = ncci; 228 mp->ncci = ncci;
221 mp->msgid = 0; 229 mp->msgid = 0;
222 atomic_set(&mp->ttyopencount,0); 230 atomic_set(&mp->ttyopencount,0);
231 INIT_LIST_HEAD(&mp->ackqueue);
232 spin_lock_init(&mp->ackqlock);
223 233
224 skb_queue_head_init(&mp->inqueue); 234 skb_queue_head_init(&mp->inqueue);
225 skb_queue_head_init(&mp->outqueue); 235 skb_queue_head_init(&mp->outqueue);
diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c
index eb41aba3ddef..8a45715dd4c1 100644
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -65,23 +65,22 @@ static struct usb_device_id gigaset_table [] = {
65 65
66MODULE_DEVICE_TABLE(usb, gigaset_table); 66MODULE_DEVICE_TABLE(usb, gigaset_table);
67 67
68/*======================= local function prototypes =============================*/ 68/*======================= local function prototypes ==========================*/
69 69
70/* This function is called if a new device is connected to the USB port. It 70/* function called if a new device belonging to this driver is connected */
71 * checks whether this new device belongs to this driver.
72 */
73static int gigaset_probe(struct usb_interface *interface, 71static int gigaset_probe(struct usb_interface *interface,
74 const struct usb_device_id *id); 72 const struct usb_device_id *id);
75 73
76/* Function will be called if the device is unplugged */ 74/* Function will be called if the device is unplugged */
77static void gigaset_disconnect(struct usb_interface *interface); 75static void gigaset_disconnect(struct usb_interface *interface);
78 76
79static void read_ctrl_callback(struct urb *, struct pt_regs *); 77static int atread_submit(struct cardstate *, int);
80static void stopurbs(struct bas_bc_state *); 78static void stopurbs(struct bas_bc_state *);
79static int req_submit(struct bc_state *, int, int, int);
81static int atwrite_submit(struct cardstate *, unsigned char *, int); 80static int atwrite_submit(struct cardstate *, unsigned char *, int);
82static int start_cbsend(struct cardstate *); 81static int start_cbsend(struct cardstate *);
83 82
84/*==============================================================================*/ 83/*============================================================================*/
85 84
86struct bas_cardstate { 85struct bas_cardstate {
87 struct usb_device *udev; /* USB device pointer */ 86 struct usb_device *udev; /* USB device pointer */
@@ -91,6 +90,7 @@ struct bas_cardstate {
91 struct urb *urb_ctrl; /* control pipe default URB */ 90 struct urb *urb_ctrl; /* control pipe default URB */
92 struct usb_ctrlrequest dr_ctrl; 91 struct usb_ctrlrequest dr_ctrl;
93 struct timer_list timer_ctrl; /* control request timeout */ 92 struct timer_list timer_ctrl; /* control request timeout */
93 int retry_ctrl;
94 94
95 struct timer_list timer_atrdy; /* AT command ready timeout */ 95 struct timer_list timer_atrdy; /* AT command ready timeout */
96 struct urb *urb_cmd_out; /* for sending AT commands */ 96 struct urb *urb_cmd_out; /* for sending AT commands */
@@ -307,6 +307,7 @@ static int gigaset_set_line_ctrl(struct cardstate *cs, unsigned cflag)
307 * hang up any existing connection because of an unrecoverable error 307 * hang up any existing connection because of an unrecoverable error
308 * This function may be called from any context and takes care of scheduling 308 * This function may be called from any context and takes care of scheduling
309 * the necessary actions for execution outside of interrupt context. 309 * the necessary actions for execution outside of interrupt context.
310 * cs->lock must not be held.
310 * argument: 311 * argument:
311 * B channel control structure 312 * B channel control structure
312 */ 313 */
@@ -325,14 +326,17 @@ static inline void error_hangup(struct bc_state *bcs)
325 326
326/* error_reset 327/* error_reset
327 * reset Gigaset device because of an unrecoverable error 328 * reset Gigaset device because of an unrecoverable error
328 * This function may be called from any context, and should take care of 329 * This function may be called from any context, and takes care of
329 * scheduling the necessary actions for execution outside of interrupt context. 330 * scheduling the necessary actions for execution outside of interrupt context.
330 * Right now, it just generates a kernel message calling for help. 331 * cs->lock must not be held.
331 * argument: 332 * argument:
332 * controller state structure 333 * controller state structure
333 */ 334 */
334static inline void error_reset(struct cardstate *cs) 335static inline void error_reset(struct cardstate *cs)
335{ 336{
337 /* close AT command channel to recover (ignore errors) */
338 req_submit(cs->bcs, HD_CLOSE_ATCHANNEL, 0, BAS_TIMEOUT);
339
336 //FIXME try to recover without bothering the user 340 //FIXME try to recover without bothering the user
337 dev_err(cs->dev, 341 dev_err(cs->dev,
338 "unrecoverable error - please disconnect Gigaset base to reset\n"); 342 "unrecoverable error - please disconnect Gigaset base to reset\n");
@@ -403,14 +407,30 @@ static void cmd_in_timeout(unsigned long data)
403{ 407{
404 struct cardstate *cs = (struct cardstate *) data; 408 struct cardstate *cs = (struct cardstate *) data;
405 struct bas_cardstate *ucs = cs->hw.bas; 409 struct bas_cardstate *ucs = cs->hw.bas;
410 int rc;
406 411
407 if (!ucs->rcvbuf_size) { 412 if (!ucs->rcvbuf_size) {
408 gig_dbg(DEBUG_USBREQ, "%s: no receive in progress", __func__); 413 gig_dbg(DEBUG_USBREQ, "%s: no receive in progress", __func__);
409 return; 414 return;
410 } 415 }
411 416
412 dev_err(cs->dev, "timeout reading AT response\n"); 417 if (ucs->retry_cmd_in++ < BAS_RETRY) {
413 error_reset(cs); //FIXME retry? 418 dev_notice(cs->dev, "control read: timeout, retry %d\n",
419 ucs->retry_cmd_in);
420 rc = atread_submit(cs, BAS_TIMEOUT);
421 if (rc >= 0 || rc == -ENODEV)
422 /* resubmitted or disconnected */
423 /* - bypass regular exit block */
424 return;
425 } else {
426 dev_err(cs->dev,
427 "control read: timeout, giving up after %d tries\n",
428 ucs->retry_cmd_in);
429 }
430 kfree(ucs->rcvbuf);
431 ucs->rcvbuf = NULL;
432 ucs->rcvbuf_size = 0;
433 error_reset(cs);
414} 434}
415 435
416/* set/clear bits in base connection state, return previous state 436/* set/clear bits in base connection state, return previous state
@@ -428,6 +448,96 @@ inline static int update_basstate(struct bas_cardstate *ucs,
428 return state; 448 return state;
429} 449}
430 450
451/* read_ctrl_callback
452 * USB completion handler for control pipe input
453 * called by the USB subsystem in interrupt context
454 * parameter:
455 * urb USB request block
456 * urb->context = inbuf structure for controller state
457 */
458static void read_ctrl_callback(struct urb *urb, struct pt_regs *regs)
459{
460 struct inbuf_t *inbuf = urb->context;
461 struct cardstate *cs = inbuf->cs;
462 struct bas_cardstate *ucs = cs->hw.bas;
463 int have_data = 0;
464 unsigned numbytes;
465 int rc;
466
467 update_basstate(ucs, 0, BS_ATRDPEND);
468
469 if (!ucs->rcvbuf_size) {
470 dev_warn(cs->dev, "%s: no receive in progress\n", __func__);
471 return;
472 }
473
474 del_timer(&ucs->timer_cmd_in);
475
476 switch (urb->status) {
477 case 0: /* normal completion */
478 numbytes = urb->actual_length;
479 if (unlikely(numbytes != ucs->rcvbuf_size)) {
480 dev_warn(cs->dev,
481 "control read: received %d chars, expected %d\n",
482 numbytes, ucs->rcvbuf_size);
483 if (numbytes > ucs->rcvbuf_size)
484 numbytes = ucs->rcvbuf_size;
485 }
486
487 /* copy received bytes to inbuf */
488 have_data = gigaset_fill_inbuf(inbuf, ucs->rcvbuf, numbytes);
489
490 if (unlikely(numbytes < ucs->rcvbuf_size)) {
491 /* incomplete - resubmit for remaining bytes */
492 ucs->rcvbuf_size -= numbytes;
493 ucs->retry_cmd_in = 0;
494 rc = atread_submit(cs, BAS_TIMEOUT);
495 if (rc >= 0 || rc == -ENODEV)
496 /* resubmitted or disconnected */
497 /* - bypass regular exit block */
498 return;
499 error_reset(cs);
500 }
501 break;
502
503 case -ENOENT: /* cancelled */
504 case -ECONNRESET: /* cancelled (async) */
505 case -EINPROGRESS: /* pending */
506 case -ENODEV: /* device removed */
507 case -ESHUTDOWN: /* device shut down */
508 /* no action necessary */
509 gig_dbg(DEBUG_USBREQ, "%s: %s",
510 __func__, get_usb_statmsg(urb->status));
511 break;
512
513 default: /* severe trouble */
514 dev_warn(cs->dev, "control read: %s\n",
515 get_usb_statmsg(urb->status));
516 if (ucs->retry_cmd_in++ < BAS_RETRY) {
517 dev_notice(cs->dev, "control read: retry %d\n",
518 ucs->retry_cmd_in);
519 rc = atread_submit(cs, BAS_TIMEOUT);
520 if (rc >= 0 || rc == -ENODEV)
521 /* resubmitted or disconnected */
522 /* - bypass regular exit block */
523 return;
524 } else {
525 dev_err(cs->dev,
526 "control read: giving up after %d tries\n",
527 ucs->retry_cmd_in);
528 }
529 error_reset(cs);
530 }
531
532 kfree(ucs->rcvbuf);
533 ucs->rcvbuf = NULL;
534 ucs->rcvbuf_size = 0;
535 if (have_data) {
536 gig_dbg(DEBUG_INTR, "%s-->BH", __func__);
537 gigaset_schedule_event(cs);
538 }
539}
540
431/* atread_submit 541/* atread_submit
432 * submit an HD_READ_ATMESSAGE command URB and optionally start a timeout 542 * submit an HD_READ_ATMESSAGE command URB and optionally start a timeout
433 * parameters: 543 * parameters:
@@ -466,7 +576,7 @@ static int atread_submit(struct cardstate *cs, int timeout)
466 if ((ret = usb_submit_urb(ucs->urb_cmd_in, SLAB_ATOMIC)) != 0) { 576 if ((ret = usb_submit_urb(ucs->urb_cmd_in, SLAB_ATOMIC)) != 0) {
467 update_basstate(ucs, 0, BS_ATRDPEND); 577 update_basstate(ucs, 0, BS_ATRDPEND);
468 dev_err(cs->dev, "could not submit HD_READ_ATMESSAGE: %s\n", 578 dev_err(cs->dev, "could not submit HD_READ_ATMESSAGE: %s\n",
469 get_usb_statmsg(ret)); 579 get_usb_rcmsg(ret));
470 return ret; 580 return ret;
471 } 581 }
472 582
@@ -611,9 +721,12 @@ static void read_int_callback(struct urb *urb, struct pt_regs *regs)
611 kfree(ucs->rcvbuf); 721 kfree(ucs->rcvbuf);
612 ucs->rcvbuf = NULL; 722 ucs->rcvbuf = NULL;
613 ucs->rcvbuf_size = 0; 723 ucs->rcvbuf_size = 0;
614 if (rc != -ENODEV) 724 if (rc != -ENODEV) {
615 //FIXME corrective action? 725 //FIXME corrective action?
726 spin_unlock_irqrestore(&cs->lock, flags);
616 error_reset(cs); 727 error_reset(cs);
728 break;
729 }
617 } 730 }
618 spin_unlock_irqrestore(&cs->lock, flags); 731 spin_unlock_irqrestore(&cs->lock, flags);
619 break; 732 break;
@@ -643,97 +756,6 @@ resubmit:
643 } 756 }
644} 757}
645 758
646/* read_ctrl_callback
647 * USB completion handler for control pipe input
648 * called by the USB subsystem in interrupt context
649 * parameter:
650 * urb USB request block
651 * urb->context = inbuf structure for controller state
652 */
653static void read_ctrl_callback(struct urb *urb, struct pt_regs *regs)
654{
655 struct inbuf_t *inbuf = urb->context;
656 struct cardstate *cs = inbuf->cs;
657 struct bas_cardstate *ucs = cs->hw.bas;
658 int have_data = 0;
659 unsigned numbytes;
660 int rc;
661
662 update_basstate(ucs, 0, BS_ATRDPEND);
663
664 if (!ucs->rcvbuf_size) {
665 dev_warn(cs->dev, "%s: no receive in progress\n", __func__);
666 return;
667 }
668
669 del_timer(&ucs->timer_cmd_in);
670
671 switch (urb->status) {
672 case 0: /* normal completion */
673 numbytes = urb->actual_length;
674 if (unlikely(numbytes == 0)) {
675 dev_warn(cs->dev,
676 "control read: empty block received\n");
677 goto retry;
678 }
679 if (unlikely(numbytes != ucs->rcvbuf_size)) {
680 dev_warn(cs->dev,
681 "control read: received %d chars, expected %d\n",
682 numbytes, ucs->rcvbuf_size);
683 if (numbytes > ucs->rcvbuf_size)
684 numbytes = ucs->rcvbuf_size;
685 }
686
687 /* copy received bytes to inbuf */
688 have_data = gigaset_fill_inbuf(inbuf, ucs->rcvbuf, numbytes);
689
690 if (unlikely(numbytes < ucs->rcvbuf_size)) {
691 /* incomplete - resubmit for remaining bytes */
692 ucs->rcvbuf_size -= numbytes;
693 ucs->retry_cmd_in = 0;
694 goto retry;
695 }
696 break;
697
698 case -ENOENT: /* cancelled */
699 case -ECONNRESET: /* cancelled (async) */
700 case -EINPROGRESS: /* pending */
701 case -ENODEV: /* device removed */
702 case -ESHUTDOWN: /* device shut down */
703 /* no action necessary */
704 gig_dbg(DEBUG_USBREQ, "%s: %s",
705 __func__, get_usb_statmsg(urb->status));
706 break;
707
708 default: /* severe trouble */
709 dev_warn(cs->dev, "control read: %s\n",
710 get_usb_statmsg(urb->status));
711 retry:
712 if (ucs->retry_cmd_in++ < BAS_RETRY) {
713 dev_notice(cs->dev, "control read: retry %d\n",
714 ucs->retry_cmd_in);
715 rc = atread_submit(cs, BAS_TIMEOUT);
716 if (rc >= 0 || rc == -ENODEV)
717 /* resubmitted or disconnected */
718 /* - bypass regular exit block */
719 return;
720 } else {
721 dev_err(cs->dev,
722 "control read: giving up after %d tries\n",
723 ucs->retry_cmd_in);
724 }
725 error_reset(cs);
726 }
727
728 kfree(ucs->rcvbuf);
729 ucs->rcvbuf = NULL;
730 ucs->rcvbuf_size = 0;
731 if (have_data) {
732 gig_dbg(DEBUG_INTR, "%s-->BH", __func__);
733 gigaset_schedule_event(cs);
734 }
735}
736
737/* read_iso_callback 759/* read_iso_callback
738 * USB completion handler for B channel isochronous input 760 * USB completion handler for B channel isochronous input
739 * called by the USB subsystem in interrupt context 761 * called by the USB subsystem in interrupt context
@@ -1378,6 +1400,7 @@ static void req_timeout(unsigned long data)
1378 case HD_CLOSE_B1CHANNEL: 1400 case HD_CLOSE_B1CHANNEL:
1379 dev_err(bcs->cs->dev, "timeout closing channel %d\n", 1401 dev_err(bcs->cs->dev, "timeout closing channel %d\n",
1380 bcs->channel + 1); 1402 bcs->channel + 1);
1403 error_reset(bcs->cs);
1381 break; 1404 break;
1382 1405
1383 default: 1406 default:
@@ -1396,22 +1419,61 @@ static void req_timeout(unsigned long data)
1396static void write_ctrl_callback(struct urb *urb, struct pt_regs *regs) 1419static void write_ctrl_callback(struct urb *urb, struct pt_regs *regs)
1397{ 1420{
1398 struct bas_cardstate *ucs = urb->context; 1421 struct bas_cardstate *ucs = urb->context;
1422 int rc;
1399 unsigned long flags; 1423 unsigned long flags;
1400 1424
1401 spin_lock_irqsave(&ucs->lock, flags); 1425 /* check status */
1402 if (urb->status && ucs->pending) { 1426 switch (urb->status) {
1403 dev_err(&ucs->interface->dev, 1427 case 0: /* normal completion */
1404 "control request 0x%02x failed: %s\n", 1428 spin_lock_irqsave(&ucs->lock, flags);
1405 ucs->pending, get_usb_statmsg(urb->status)); 1429 switch (ucs->pending) {
1406 del_timer(&ucs->timer_ctrl); 1430 case HD_DEVICE_INIT_ACK: /* no reply expected */
1407 ucs->pending = 0; 1431 del_timer(&ucs->timer_ctrl);
1408 } 1432 ucs->pending = 0;
1409 /* individual handling of specific request types */ 1433 break;
1410 switch (ucs->pending) { 1434 }
1411 case HD_DEVICE_INIT_ACK: /* no reply expected */ 1435 spin_unlock_irqrestore(&ucs->lock, flags);
1412 ucs->pending = 0; 1436 return;
1437
1438 case -ENOENT: /* cancelled */
1439 case -ECONNRESET: /* cancelled (async) */
1440 case -EINPROGRESS: /* pending */
1441 case -ENODEV: /* device removed */
1442 case -ESHUTDOWN: /* device shut down */
1443 /* ignore silently */
1444 gig_dbg(DEBUG_USBREQ, "%s: %s",
1445 __func__, get_usb_statmsg(urb->status));
1413 break; 1446 break;
1447
1448 default: /* any failure */
1449 if (++ucs->retry_ctrl > BAS_RETRY) {
1450 dev_err(&ucs->interface->dev,
1451 "control request 0x%02x failed: %s\n",
1452 ucs->dr_ctrl.bRequest,
1453 get_usb_statmsg(urb->status));
1454 break; /* give up */
1455 }
1456 dev_notice(&ucs->interface->dev,
1457 "control request 0x%02x: %s, retry %d\n",
1458 ucs->dr_ctrl.bRequest, get_usb_statmsg(urb->status),
1459 ucs->retry_ctrl);
1460 /* urb->dev is clobbered by USB subsystem */
1461 urb->dev = ucs->udev;
1462 rc = usb_submit_urb(urb, SLAB_ATOMIC);
1463 if (unlikely(rc)) {
1464 dev_err(&ucs->interface->dev,
1465 "could not resubmit request 0x%02x: %s\n",
1466 ucs->dr_ctrl.bRequest, get_usb_rcmsg(rc));
1467 break;
1468 }
1469 /* resubmitted */
1470 return;
1414 } 1471 }
1472
1473 /* failed, clear pending request */
1474 spin_lock_irqsave(&ucs->lock, flags);
1475 del_timer(&ucs->timer_ctrl);
1476 ucs->pending = 0;
1415 spin_unlock_irqrestore(&ucs->lock, flags); 1477 spin_unlock_irqrestore(&ucs->lock, flags);
1416} 1478}
1417 1479
@@ -1455,9 +1517,11 @@ static int req_submit(struct bc_state *bcs, int req, int val, int timeout)
1455 usb_sndctrlpipe(ucs->udev, 0), 1517 usb_sndctrlpipe(ucs->udev, 0),
1456 (unsigned char*) &ucs->dr_ctrl, NULL, 0, 1518 (unsigned char*) &ucs->dr_ctrl, NULL, 0,
1457 write_ctrl_callback, ucs); 1519 write_ctrl_callback, ucs);
1458 if ((ret = usb_submit_urb(ucs->urb_ctrl, SLAB_ATOMIC)) != 0) { 1520 ucs->retry_ctrl = 0;
1521 ret = usb_submit_urb(ucs->urb_ctrl, SLAB_ATOMIC);
1522 if (unlikely(ret)) {
1459 dev_err(bcs->cs->dev, "could not submit request 0x%02x: %s\n", 1523 dev_err(bcs->cs->dev, "could not submit request 0x%02x: %s\n",
1460 req, get_usb_statmsg(ret)); 1524 req, get_usb_rcmsg(ret));
1461 spin_unlock_irqrestore(&ucs->lock, flags); 1525 spin_unlock_irqrestore(&ucs->lock, flags);
1462 return ret; 1526 return ret;
1463 } 1527 }
diff --git a/drivers/isdn/gigaset/ev-layer.c b/drivers/isdn/gigaset/ev-layer.c
index 18e05c09b71c..44f02dbd1111 100644
--- a/drivers/isdn/gigaset/ev-layer.c
+++ b/drivers/isdn/gigaset/ev-layer.c
@@ -1262,7 +1262,8 @@ static void do_action(int action, struct cardstate *cs,
1262 break; 1262 break;
1263 case ACT_HUPMODEM: 1263 case ACT_HUPMODEM:
1264 /* send "+++" (hangup in unimodem mode) */ 1264 /* send "+++" (hangup in unimodem mode) */
1265 cs->ops->write_cmd(cs, "+++", 3, NULL); 1265 if (cs->connected)
1266 cs->ops->write_cmd(cs, "+++", 3, NULL);
1266 break; 1267 break;
1267 case ACT_RING: 1268 case ACT_RING:
1268 /* get fresh AT state structure for new CID */ 1269 /* get fresh AT state structure for new CID */
@@ -1294,7 +1295,6 @@ static void do_action(int action, struct cardstate *cs,
1294 break; 1295 break;
1295 case ACT_ICALL: 1296 case ACT_ICALL:
1296 handle_icall(cs, bcs, p_at_state); 1297 handle_icall(cs, bcs, p_at_state);
1297 at_state = *p_at_state;
1298 break; 1298 break;
1299 case ACT_FAILSDOWN: 1299 case ACT_FAILSDOWN:
1300 dev_warn(cs->dev, "Could not shut down the device.\n"); 1300 dev_warn(cs->dev, "Could not shut down the device.\n");
@@ -1334,10 +1334,8 @@ static void do_action(int action, struct cardstate *cs,
1334 */ 1334 */
1335 at_state->pending_commands |= PC_DLE0; 1335 at_state->pending_commands |= PC_DLE0;
1336 atomic_set(&cs->commands_pending, 1); 1336 atomic_set(&cs->commands_pending, 1);
1337 } else { 1337 } else
1338 disconnect(p_at_state); 1338 disconnect(p_at_state);
1339 at_state = *p_at_state;
1340 }
1341 break; 1339 break;
1342 case ACT_FAKEDLE0: 1340 case ACT_FAKEDLE0:
1343 at_state->int_var[VAR_ZDLE] = 0; 1341 at_state->int_var[VAR_ZDLE] = 0;
@@ -1354,10 +1352,8 @@ static void do_action(int action, struct cardstate *cs,
1354 at_state->cid = -1; 1352 at_state->cid = -1;
1355 if (bcs && cs->onechannel) 1353 if (bcs && cs->onechannel)
1356 at_state->pending_commands |= PC_DLE0; 1354 at_state->pending_commands |= PC_DLE0;
1357 else { 1355 else
1358 disconnect(p_at_state); 1356 disconnect(p_at_state);
1359 at_state = *p_at_state;
1360 }
1361 schedule_init(cs, MS_RECOVER); 1357 schedule_init(cs, MS_RECOVER);
1362 break; 1358 break;
1363 case ACT_FAILDLE0: 1359 case ACT_FAILDLE0:
@@ -1410,7 +1406,6 @@ static void do_action(int action, struct cardstate *cs,
1410 1406
1411 case ACT_ABORTACCEPT: /* hangup/error/timeout during ICALL processing */ 1407 case ACT_ABORTACCEPT: /* hangup/error/timeout during ICALL processing */
1412 disconnect(p_at_state); 1408 disconnect(p_at_state);
1413 at_state = *p_at_state;
1414 break; 1409 break;
1415 1410
1416 case ACT_ABORTDIAL: /* error/timeout during dial preparation */ 1411 case ACT_ABORTDIAL: /* error/timeout during dial preparation */
diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c
index abecabf8c271..aacbf0d14b64 100644
--- a/drivers/isdn/hisax/q931.c
+++ b/drivers/isdn/hisax/q931.c
@@ -1402,12 +1402,12 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir)
1402 } 1402 }
1403 /* No, locate it in the table */ 1403 /* No, locate it in the table */
1404 if (cset == 0) { 1404 if (cset == 0) {
1405 for (i = 0; i < IESIZE; i++) 1405 for (i = 0; i < IESIZE_NI1; i++)
1406 if (*buf == ielist_ni1[i].nr) 1406 if (*buf == ielist_ni1[i].nr)
1407 break; 1407 break;
1408 1408
1409 /* When not found, give appropriate msg */ 1409 /* When not found, give appropriate msg */
1410 if (i != IESIZE) { 1410 if (i != IESIZE_NI1) {
1411 dp += sprintf(dp, " %s\n", ielist_ni1[i].descr); 1411 dp += sprintf(dp, " %s\n", ielist_ni1[i].descr);
1412 dp += ielist_ni1[i].f(dp, buf); 1412 dp += ielist_ni1[i].f(dp, buf);
1413 } else 1413 } else
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index ac25a48362ac..bf869ed03eed 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -90,7 +90,7 @@ config MD_RAID10
90 depends on BLK_DEV_MD && EXPERIMENTAL 90 depends on BLK_DEV_MD && EXPERIMENTAL
91 ---help--- 91 ---help---
92 RAID-10 provides a combination of striping (RAID-0) and 92 RAID-10 provides a combination of striping (RAID-0) and
93 mirroring (RAID-1) with easier configuration and more flexable 93 mirroring (RAID-1) with easier configuration and more flexible
94 layout. 94 layout.
95 Unlike RAID-0, but like RAID-1, RAID-10 requires all devices to 95 Unlike RAID-0, but like RAID-1, RAID-10 requires all devices to
96 be the same size (or at least, only as much as the smallest device 96 be the same size (or at least, only as much as the smallest device
@@ -104,8 +104,8 @@ config MD_RAID10
104 104
105 If unsure, say Y. 105 If unsure, say Y.
106 106
107config MD_RAID5 107config MD_RAID456
108 tristate "RAID-4/RAID-5 mode" 108 tristate "RAID-4/RAID-5/RAID-6 mode"
109 depends on BLK_DEV_MD 109 depends on BLK_DEV_MD
110 ---help--- 110 ---help---
111 A RAID-5 set of N drives with a capacity of C MB per drive provides 111 A RAID-5 set of N drives with a capacity of C MB per drive provides
@@ -116,20 +116,28 @@ config MD_RAID5
116 while a RAID-5 set distributes the parity across the drives in one 116 while a RAID-5 set distributes the parity across the drives in one
117 of the available parity distribution methods. 117 of the available parity distribution methods.
118 118
119 A RAID-6 set of N drives with a capacity of C MB per drive
120 provides the capacity of C * (N - 2) MB, and protects
121 against a failure of any two drives. For a given sector
122 (row) number, (N - 2) drives contain data sectors, and two
123 drives contains two independent redundancy syndromes. Like
124 RAID-5, RAID-6 distributes the syndromes across the drives
125 in one of the available parity distribution methods.
126
119 Information about Software RAID on Linux is contained in the 127 Information about Software RAID on Linux is contained in the
120 Software-RAID mini-HOWTO, available from 128 Software-RAID mini-HOWTO, available from
121 <http://www.tldp.org/docs.html#howto>. There you will also 129 <http://www.tldp.org/docs.html#howto>. There you will also
122 learn where to get the supporting user space utilities raidtools. 130 learn where to get the supporting user space utilities raidtools.
123 131
124 If you want to use such a RAID-4/RAID-5 set, say Y. To 132 If you want to use such a RAID-4/RAID-5/RAID-6 set, say Y. To
125 compile this code as a module, choose M here: the module 133 compile this code as a module, choose M here: the module
126 will be called raid5. 134 will be called raid456.
127 135
128 If unsure, say Y. 136 If unsure, say Y.
129 137
130config MD_RAID5_RESHAPE 138config MD_RAID5_RESHAPE
131 bool "Support adding drives to a raid-5 array (experimental)" 139 bool "Support adding drives to a raid-5 array (experimental)"
132 depends on MD_RAID5 && EXPERIMENTAL 140 depends on MD_RAID456 && EXPERIMENTAL
133 ---help--- 141 ---help---
134 A RAID-5 set can be expanded by adding extra drives. This 142 A RAID-5 set can be expanded by adding extra drives. This
135 requires "restriping" the array which means (almost) every 143 requires "restriping" the array which means (almost) every
@@ -139,7 +147,7 @@ config MD_RAID5_RESHAPE
139 is online. However it is still EXPERIMENTAL code. It should 147 is online. However it is still EXPERIMENTAL code. It should
140 work, but please be sure that you have backups. 148 work, but please be sure that you have backups.
141 149
142 You will need mdadm verion 2.4.1 or later to use this 150 You will need mdadm version 2.4.1 or later to use this
143 feature safely. During the early stage of reshape there is 151 feature safely. During the early stage of reshape there is
144 a critical section where live data is being over-written. A 152 a critical section where live data is being over-written. A
145 crash during this time needs extra care for recovery. The 153 crash during this time needs extra care for recovery. The
@@ -154,28 +162,6 @@ config MD_RAID5_RESHAPE
154 There should be enough spares already present to make the new 162 There should be enough spares already present to make the new
155 array workable. 163 array workable.
156 164
157config MD_RAID6
158 tristate "RAID-6 mode"
159 depends on BLK_DEV_MD
160 ---help---
161 A RAID-6 set of N drives with a capacity of C MB per drive
162 provides the capacity of C * (N - 2) MB, and protects
163 against a failure of any two drives. For a given sector
164 (row) number, (N - 2) drives contain data sectors, and two
165 drives contains two independent redundancy syndromes. Like
166 RAID-5, RAID-6 distributes the syndromes across the drives
167 in one of the available parity distribution methods.
168
169 RAID-6 requires mdadm-1.5.0 or later, available at:
170
171 ftp://ftp.kernel.org/pub/linux/utils/raid/mdadm/
172
173 If you want to use such a RAID-6 set, say Y. To compile
174 this code as a module, choose M here: the module will be
175 called raid6.
176
177 If unsure, say Y.
178
179config MD_MULTIPATH 165config MD_MULTIPATH
180 tristate "Multipath I/O support" 166 tristate "Multipath I/O support"
181 depends on BLK_DEV_MD 167 depends on BLK_DEV_MD
@@ -235,7 +221,7 @@ config DM_SNAPSHOT
235 tristate "Snapshot target (EXPERIMENTAL)" 221 tristate "Snapshot target (EXPERIMENTAL)"
236 depends on BLK_DEV_DM && EXPERIMENTAL 222 depends on BLK_DEV_DM && EXPERIMENTAL
237 ---help--- 223 ---help---
238 Allow volume managers to take writeable snapshots of a device. 224 Allow volume managers to take writable snapshots of a device.
239 225
240config DM_MIRROR 226config DM_MIRROR
241 tristate "Mirror target (EXPERIMENTAL)" 227 tristate "Mirror target (EXPERIMENTAL)"
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index d3efedf6a6ad..34957a68d921 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,7 +8,7 @@ dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o
8dm-snapshot-objs := dm-snap.o dm-exception-store.o 8dm-snapshot-objs := dm-snap.o dm-exception-store.o
9dm-mirror-objs := dm-log.o dm-raid1.o 9dm-mirror-objs := dm-log.o dm-raid1.o
10md-mod-objs := md.o bitmap.o 10md-mod-objs := md.o bitmap.o
11raid6-objs := raid6main.o raid6algos.o raid6recov.o raid6tables.o \ 11raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
12 raid6int1.o raid6int2.o raid6int4.o \ 12 raid6int1.o raid6int2.o raid6int4.o \
13 raid6int8.o raid6int16.o raid6int32.o \ 13 raid6int8.o raid6int16.o raid6int32.o \
14 raid6altivec1.o raid6altivec2.o raid6altivec4.o \ 14 raid6altivec1.o raid6altivec2.o raid6altivec4.o \
@@ -25,8 +25,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
25obj-$(CONFIG_MD_RAID0) += raid0.o 25obj-$(CONFIG_MD_RAID0) += raid0.o
26obj-$(CONFIG_MD_RAID1) += raid1.o 26obj-$(CONFIG_MD_RAID1) += raid1.o
27obj-$(CONFIG_MD_RAID10) += raid10.o 27obj-$(CONFIG_MD_RAID10) += raid10.o
28obj-$(CONFIG_MD_RAID5) += raid5.o xor.o 28obj-$(CONFIG_MD_RAID456) += raid456.o xor.o
29obj-$(CONFIG_MD_RAID6) += raid6.o xor.o
30obj-$(CONFIG_MD_MULTIPATH) += multipath.o 29obj-$(CONFIG_MD_MULTIPATH) += multipath.o
31obj-$(CONFIG_MD_FAULTY) += faulty.o 30obj-$(CONFIG_MD_FAULTY) += faulty.o
32obj-$(CONFIG_BLK_DEV_MD) += md-mod.o 31obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index f8ffaee20ff8..ebbd2d856256 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -7,7 +7,6 @@
7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8 * - added disk storage for bitmap 8 * - added disk storage for bitmap
9 * - changes to allow various bitmap chunk sizes 9 * - changes to allow various bitmap chunk sizes
10 * - added bitmap daemon (to asynchronously clear bitmap bits from disk)
11 */ 10 */
12 11
13/* 12/*
@@ -15,9 +14,6 @@
15 * 14 *
16 * flush after percent set rather than just time based. (maybe both). 15 * flush after percent set rather than just time based. (maybe both).
17 * wait if count gets too high, wake when it drops to half. 16 * wait if count gets too high, wake when it drops to half.
18 * allow bitmap to be mirrored with superblock (before or after...)
19 * allow hot-add to re-instate a current device.
20 * allow hot-add of bitmap after quiessing device
21 */ 17 */
22 18
23#include <linux/module.h> 19#include <linux/module.h>
@@ -73,24 +69,6 @@ static inline char * bmname(struct bitmap *bitmap)
73 69
74 70
75/* 71/*
76 * test if the bitmap is active
77 */
78int bitmap_active(struct bitmap *bitmap)
79{
80 unsigned long flags;
81 int res = 0;
82
83 if (!bitmap)
84 return res;
85 spin_lock_irqsave(&bitmap->lock, flags);
86 res = bitmap->flags & BITMAP_ACTIVE;
87 spin_unlock_irqrestore(&bitmap->lock, flags);
88 return res;
89}
90
91#define WRITE_POOL_SIZE 256
92
93/*
94 * just a placeholder - calls kmalloc for bitmap pages 72 * just a placeholder - calls kmalloc for bitmap pages
95 */ 73 */
96static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) 74static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
@@ -269,6 +247,8 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
269 247
270 if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) { 248 if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) {
271 page->index = index; 249 page->index = index;
250 attach_page_buffers(page, NULL); /* so that free_buffer will
251 * quietly no-op */
272 return page; 252 return page;
273 } 253 }
274 } 254 }
@@ -300,77 +280,132 @@ static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wai
300 */ 280 */
301static int write_page(struct bitmap *bitmap, struct page *page, int wait) 281static int write_page(struct bitmap *bitmap, struct page *page, int wait)
302{ 282{
303 int ret = -ENOMEM; 283 struct buffer_head *bh;
304 284
305 if (bitmap->file == NULL) 285 if (bitmap->file == NULL)
306 return write_sb_page(bitmap->mddev, bitmap->offset, page, wait); 286 return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);
307 287
308 flush_dcache_page(page); /* make sure visible to anyone reading the file */ 288 bh = page_buffers(page);
309 289
310 if (wait) 290 while (bh && bh->b_blocknr) {
311 lock_page(page); 291 atomic_inc(&bitmap->pending_writes);
312 else { 292 set_buffer_locked(bh);
313 if (TestSetPageLocked(page)) 293 set_buffer_mapped(bh);
314 return -EAGAIN; /* already locked */ 294 submit_bh(WRITE, bh);
315 if (PageWriteback(page)) { 295 bh = bh->b_this_page;
316 unlock_page(page);
317 return -EAGAIN;
318 }
319 } 296 }
320 297
321 ret = page->mapping->a_ops->prepare_write(bitmap->file, page, 0, PAGE_SIZE); 298 if (wait) {
322 if (!ret) 299 wait_event(bitmap->write_wait,
323 ret = page->mapping->a_ops->commit_write(bitmap->file, page, 0, 300 atomic_read(&bitmap->pending_writes)==0);
324 PAGE_SIZE); 301 return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
325 if (ret) {
326 unlock_page(page);
327 return ret;
328 } 302 }
303 return 0;
304}
329 305
330 set_page_dirty(page); /* force it to be written out */ 306static void end_bitmap_write(struct buffer_head *bh, int uptodate)
331 307{
332 if (!wait) { 308 struct bitmap *bitmap = bh->b_private;
333 /* add to list to be waited for by daemon */ 309 unsigned long flags;
334 struct page_list *item = mempool_alloc(bitmap->write_pool, GFP_NOIO); 310
335 item->page = page; 311 if (!uptodate) {
336 get_page(page); 312 spin_lock_irqsave(&bitmap->lock, flags);
337 spin_lock(&bitmap->write_lock); 313 bitmap->flags |= BITMAP_WRITE_ERROR;
338 list_add(&item->list, &bitmap->complete_pages); 314 spin_unlock_irqrestore(&bitmap->lock, flags);
339 spin_unlock(&bitmap->write_lock); 315 }
340 md_wakeup_thread(bitmap->writeback_daemon); 316 if (atomic_dec_and_test(&bitmap->pending_writes))
317 wake_up(&bitmap->write_wait);
318}
319
320/* copied from buffer.c */
321static void
322__clear_page_buffers(struct page *page)
323{
324 ClearPagePrivate(page);
325 set_page_private(page, 0);
326 page_cache_release(page);
327}
328static void free_buffers(struct page *page)
329{
330 struct buffer_head *bh = page_buffers(page);
331
332 while (bh) {
333 struct buffer_head *next = bh->b_this_page;
334 free_buffer_head(bh);
335 bh = next;
341 } 336 }
342 return write_one_page(page, wait); 337 __clear_page_buffers(page);
338 put_page(page);
343} 339}
344 340
345/* read a page from a file, pinning it into cache, and return bytes_read */ 341/* read a page from a file.
342 * We both read the page, and attach buffers to the page to record the
343 * address of each block (using bmap). These addresses will be used
344 * to write the block later, completely bypassing the filesystem.
345 * This usage is similar to how swap files are handled, and allows us
346 * to write to a file with no concerns of memory allocation failing.
347 */
346static struct page *read_page(struct file *file, unsigned long index, 348static struct page *read_page(struct file *file, unsigned long index,
347 unsigned long *bytes_read) 349 struct bitmap *bitmap,
350 unsigned long count)
348{ 351{
349 struct inode *inode = file->f_mapping->host;
350 struct page *page = NULL; 352 struct page *page = NULL;
351 loff_t isize = i_size_read(inode); 353 struct inode *inode = file->f_dentry->d_inode;
352 unsigned long end_index = isize >> PAGE_SHIFT; 354 struct buffer_head *bh;
355 sector_t block;
353 356
354 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, 357 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE,
355 (unsigned long long)index << PAGE_SHIFT); 358 (unsigned long long)index << PAGE_SHIFT);
356 359
357 page = read_cache_page(inode->i_mapping, index, 360 page = alloc_page(GFP_KERNEL);
358 (filler_t *)inode->i_mapping->a_ops->readpage, file); 361 if (!page)
362 page = ERR_PTR(-ENOMEM);
359 if (IS_ERR(page)) 363 if (IS_ERR(page))
360 goto out; 364 goto out;
361 wait_on_page_locked(page); 365
362 if (!PageUptodate(page) || PageError(page)) { 366 bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
367 if (!bh) {
363 put_page(page); 368 put_page(page);
364 page = ERR_PTR(-EIO); 369 page = ERR_PTR(-ENOMEM);
365 goto out; 370 goto out;
366 } 371 }
372 attach_page_buffers(page, bh);
373 block = index << (PAGE_SHIFT - inode->i_blkbits);
374 while (bh) {
375 if (count == 0)
376 bh->b_blocknr = 0;
377 else {
378 bh->b_blocknr = bmap(inode, block);
379 if (bh->b_blocknr == 0) {
380 /* Cannot use this file! */
381 free_buffers(page);
382 page = ERR_PTR(-EINVAL);
383 goto out;
384 }
385 bh->b_bdev = inode->i_sb->s_bdev;
386 if (count < (1<<inode->i_blkbits))
387 count = 0;
388 else
389 count -= (1<<inode->i_blkbits);
390
391 bh->b_end_io = end_bitmap_write;
392 bh->b_private = bitmap;
393 atomic_inc(&bitmap->pending_writes);
394 set_buffer_locked(bh);
395 set_buffer_mapped(bh);
396 submit_bh(READ, bh);
397 }
398 block++;
399 bh = bh->b_this_page;
400 }
401 page->index = index;
367 402
368 if (index > end_index) /* we have read beyond EOF */ 403 wait_event(bitmap->write_wait,
369 *bytes_read = 0; 404 atomic_read(&bitmap->pending_writes)==0);
370 else if (index == end_index) /* possible short read */ 405 if (bitmap->flags & BITMAP_WRITE_ERROR) {
371 *bytes_read = isize & ~PAGE_MASK; 406 free_buffers(page);
372 else 407 page = ERR_PTR(-EIO);
373 *bytes_read = PAGE_SIZE; /* got a full page */ 408 }
374out: 409out:
375 if (IS_ERR(page)) 410 if (IS_ERR(page))
376 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", 411 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n",
@@ -441,16 +476,14 @@ static int bitmap_read_sb(struct bitmap *bitmap)
441 char *reason = NULL; 476 char *reason = NULL;
442 bitmap_super_t *sb; 477 bitmap_super_t *sb;
443 unsigned long chunksize, daemon_sleep, write_behind; 478 unsigned long chunksize, daemon_sleep, write_behind;
444 unsigned long bytes_read;
445 unsigned long long events; 479 unsigned long long events;
446 int err = -EINVAL; 480 int err = -EINVAL;
447 481
448 /* page 0 is the superblock, read it... */ 482 /* page 0 is the superblock, read it... */
449 if (bitmap->file) 483 if (bitmap->file)
450 bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read); 484 bitmap->sb_page = read_page(bitmap->file, 0, bitmap, PAGE_SIZE);
451 else { 485 else {
452 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0); 486 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0);
453 bytes_read = PAGE_SIZE;
454 } 487 }
455 if (IS_ERR(bitmap->sb_page)) { 488 if (IS_ERR(bitmap->sb_page)) {
456 err = PTR_ERR(bitmap->sb_page); 489 err = PTR_ERR(bitmap->sb_page);
@@ -460,13 +493,6 @@ static int bitmap_read_sb(struct bitmap *bitmap)
460 493
461 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 494 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
462 495
463 if (bytes_read < sizeof(*sb)) { /* short read */
464 printk(KERN_INFO "%s: bitmap file superblock truncated\n",
465 bmname(bitmap));
466 err = -ENOSPC;
467 goto out;
468 }
469
470 chunksize = le32_to_cpu(sb->chunksize); 496 chunksize = le32_to_cpu(sb->chunksize);
471 daemon_sleep = le32_to_cpu(sb->daemon_sleep); 497 daemon_sleep = le32_to_cpu(sb->daemon_sleep);
472 write_behind = le32_to_cpu(sb->write_behind); 498 write_behind = le32_to_cpu(sb->write_behind);
@@ -550,7 +576,6 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
550 spin_unlock_irqrestore(&bitmap->lock, flags); 576 spin_unlock_irqrestore(&bitmap->lock, flags);
551 return; 577 return;
552 } 578 }
553 get_page(bitmap->sb_page);
554 spin_unlock_irqrestore(&bitmap->lock, flags); 579 spin_unlock_irqrestore(&bitmap->lock, flags);
555 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 580 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
556 switch (op) { 581 switch (op) {
@@ -561,7 +586,6 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
561 default: BUG(); 586 default: BUG();
562 } 587 }
563 kunmap_atomic(sb, KM_USER0); 588 kunmap_atomic(sb, KM_USER0);
564 put_page(bitmap->sb_page);
565} 589}
566 590
567/* 591/*
@@ -614,48 +638,17 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
614 638
615 while (pages--) 639 while (pages--)
616 if (map[pages]->index != 0) /* 0 is sb_page, release it below */ 640 if (map[pages]->index != 0) /* 0 is sb_page, release it below */
617 put_page(map[pages]); 641 free_buffers(map[pages]);
618 kfree(map); 642 kfree(map);
619 kfree(attr); 643 kfree(attr);
620 644
621 safe_put_page(sb_page); 645 if (sb_page)
622} 646 free_buffers(sb_page);
623
624static void bitmap_stop_daemon(struct bitmap *bitmap);
625
626/* dequeue the next item in a page list -- don't call from irq context */
627static struct page_list *dequeue_page(struct bitmap *bitmap)
628{
629 struct page_list *item = NULL;
630 struct list_head *head = &bitmap->complete_pages;
631
632 spin_lock(&bitmap->write_lock);
633 if (list_empty(head))
634 goto out;
635 item = list_entry(head->prev, struct page_list, list);
636 list_del(head->prev);
637out:
638 spin_unlock(&bitmap->write_lock);
639 return item;
640}
641
642static void drain_write_queues(struct bitmap *bitmap)
643{
644 struct page_list *item;
645
646 while ((item = dequeue_page(bitmap))) {
647 /* don't bother to wait */
648 put_page(item->page);
649 mempool_free(item, bitmap->write_pool);
650 }
651
652 wake_up(&bitmap->write_wait);
653} 647}
654 648
655static void bitmap_file_put(struct bitmap *bitmap) 649static void bitmap_file_put(struct bitmap *bitmap)
656{ 650{
657 struct file *file; 651 struct file *file;
658 struct inode *inode;
659 unsigned long flags; 652 unsigned long flags;
660 653
661 spin_lock_irqsave(&bitmap->lock, flags); 654 spin_lock_irqsave(&bitmap->lock, flags);
@@ -663,17 +656,14 @@ static void bitmap_file_put(struct bitmap *bitmap)
663 bitmap->file = NULL; 656 bitmap->file = NULL;
664 spin_unlock_irqrestore(&bitmap->lock, flags); 657 spin_unlock_irqrestore(&bitmap->lock, flags);
665 658
666 bitmap_stop_daemon(bitmap); 659 if (file)
667 660 wait_event(bitmap->write_wait,
668 drain_write_queues(bitmap); 661 atomic_read(&bitmap->pending_writes)==0);
669
670 bitmap_file_unmap(bitmap); 662 bitmap_file_unmap(bitmap);
671 663
672 if (file) { 664 if (file) {
673 inode = file->f_mapping->host; 665 struct inode *inode = file->f_dentry->d_inode;
674 spin_lock(&inode->i_lock); 666 invalidate_inode_pages(inode->i_mapping);
675 atomic_set(&inode->i_writecount, 1); /* allow writes again */
676 spin_unlock(&inode->i_lock);
677 fput(file); 667 fput(file);
678 } 668 }
679} 669}
@@ -708,26 +698,27 @@ static void bitmap_file_kick(struct bitmap *bitmap)
708} 698}
709 699
710enum bitmap_page_attr { 700enum bitmap_page_attr {
711 BITMAP_PAGE_DIRTY = 1, // there are set bits that need to be synced 701 BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced
712 BITMAP_PAGE_CLEAN = 2, // there are bits that might need to be cleared 702 BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared
713 BITMAP_PAGE_NEEDWRITE=4, // there are cleared bits that need to be synced 703 BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced
714}; 704};
715 705
716static inline void set_page_attr(struct bitmap *bitmap, struct page *page, 706static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
717 enum bitmap_page_attr attr) 707 enum bitmap_page_attr attr)
718{ 708{
719 bitmap->filemap_attr[page->index] |= attr; 709 __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
720} 710}
721 711
722static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, 712static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
723 enum bitmap_page_attr attr) 713 enum bitmap_page_attr attr)
724{ 714{
725 bitmap->filemap_attr[page->index] &= ~attr; 715 __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
726} 716}
727 717
728static inline unsigned long get_page_attr(struct bitmap *bitmap, struct page *page) 718static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
719 enum bitmap_page_attr attr)
729{ 720{
730 return bitmap->filemap_attr[page->index]; 721 return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
731} 722}
732 723
733/* 724/*
@@ -751,11 +742,6 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
751 page = filemap_get_page(bitmap, chunk); 742 page = filemap_get_page(bitmap, chunk);
752 bit = file_page_offset(chunk); 743 bit = file_page_offset(chunk);
753 744
754
755 /* make sure the page stays cached until it gets written out */
756 if (! (get_page_attr(bitmap, page) & BITMAP_PAGE_DIRTY))
757 get_page(page);
758
759 /* set the bit */ 745 /* set the bit */
760 kaddr = kmap_atomic(page, KM_USER0); 746 kaddr = kmap_atomic(page, KM_USER0);
761 if (bitmap->flags & BITMAP_HOSTENDIAN) 747 if (bitmap->flags & BITMAP_HOSTENDIAN)
@@ -775,7 +761,8 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
775 * sync the dirty pages of the bitmap file to disk */ 761 * sync the dirty pages of the bitmap file to disk */
776int bitmap_unplug(struct bitmap *bitmap) 762int bitmap_unplug(struct bitmap *bitmap)
777{ 763{
778 unsigned long i, attr, flags; 764 unsigned long i, flags;
765 int dirty, need_write;
779 struct page *page; 766 struct page *page;
780 int wait = 0; 767 int wait = 0;
781 int err; 768 int err;
@@ -792,35 +779,26 @@ int bitmap_unplug(struct bitmap *bitmap)
792 return 0; 779 return 0;
793 } 780 }
794 page = bitmap->filemap[i]; 781 page = bitmap->filemap[i];
795 attr = get_page_attr(bitmap, page); 782 dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
783 need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
796 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 784 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
797 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 785 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
798 if ((attr & BITMAP_PAGE_DIRTY)) 786 if (dirty)
799 wait = 1; 787 wait = 1;
800 spin_unlock_irqrestore(&bitmap->lock, flags); 788 spin_unlock_irqrestore(&bitmap->lock, flags);
801 789
802 if (attr & (BITMAP_PAGE_DIRTY | BITMAP_PAGE_NEEDWRITE)) { 790 if (dirty | need_write)
803 err = write_page(bitmap, page, 0); 791 err = write_page(bitmap, page, 0);
804 if (err == -EAGAIN) {
805 if (attr & BITMAP_PAGE_DIRTY)
806 err = write_page(bitmap, page, 1);
807 else
808 err = 0;
809 }
810 if (err)
811 return 1;
812 }
813 } 792 }
814 if (wait) { /* if any writes were performed, we need to wait on them */ 793 if (wait) { /* if any writes were performed, we need to wait on them */
815 if (bitmap->file) { 794 if (bitmap->file)
816 spin_lock_irq(&bitmap->write_lock); 795 wait_event(bitmap->write_wait,
817 wait_event_lock_irq(bitmap->write_wait, 796 atomic_read(&bitmap->pending_writes)==0);
818 list_empty(&bitmap->complete_pages), bitmap->write_lock, 797 else
819 wake_up_process(bitmap->writeback_daemon->tsk));
820 spin_unlock_irq(&bitmap->write_lock);
821 } else
822 md_super_wait(bitmap->mddev); 798 md_super_wait(bitmap->mddev);
823 } 799 }
800 if (bitmap->flags & BITMAP_WRITE_ERROR)
801 bitmap_file_kick(bitmap);
824 return 0; 802 return 0;
825} 803}
826 804
@@ -842,7 +820,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
842 struct page *page = NULL, *oldpage = NULL; 820 struct page *page = NULL, *oldpage = NULL;
843 unsigned long num_pages, bit_cnt = 0; 821 unsigned long num_pages, bit_cnt = 0;
844 struct file *file; 822 struct file *file;
845 unsigned long bytes, offset, dummy; 823 unsigned long bytes, offset;
846 int outofdate; 824 int outofdate;
847 int ret = -ENOSPC; 825 int ret = -ENOSPC;
848 void *paddr; 826 void *paddr;
@@ -879,7 +857,12 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
879 if (!bitmap->filemap) 857 if (!bitmap->filemap)
880 goto out; 858 goto out;
881 859
882 bitmap->filemap_attr = kzalloc(sizeof(long) * num_pages, GFP_KERNEL); 860 /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
861 bitmap->filemap_attr = kzalloc(
862 (((num_pages*4/8)+sizeof(unsigned long)-1)
863 /sizeof(unsigned long))
864 *sizeof(unsigned long),
865 GFP_KERNEL);
883 if (!bitmap->filemap_attr) 866 if (!bitmap->filemap_attr)
884 goto out; 867 goto out;
885 868
@@ -890,7 +873,12 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
890 index = file_page_index(i); 873 index = file_page_index(i);
891 bit = file_page_offset(i); 874 bit = file_page_offset(i);
892 if (index != oldindex) { /* this is a new page, read it in */ 875 if (index != oldindex) { /* this is a new page, read it in */
876 int count;
893 /* unmap the old page, we're done with it */ 877 /* unmap the old page, we're done with it */
878 if (index == num_pages-1)
879 count = bytes - index * PAGE_SIZE;
880 else
881 count = PAGE_SIZE;
894 if (index == 0) { 882 if (index == 0) {
895 /* 883 /*
896 * if we're here then the superblock page 884 * if we're here then the superblock page
@@ -900,7 +888,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
900 page = bitmap->sb_page; 888 page = bitmap->sb_page;
901 offset = sizeof(bitmap_super_t); 889 offset = sizeof(bitmap_super_t);
902 } else if (file) { 890 } else if (file) {
903 page = read_page(file, index, &dummy); 891 page = read_page(file, index, bitmap, count);
904 offset = 0; 892 offset = 0;
905 } else { 893 } else {
906 page = read_sb_page(bitmap->mddev, bitmap->offset, index); 894 page = read_sb_page(bitmap->mddev, bitmap->offset, index);
@@ -971,12 +959,11 @@ void bitmap_write_all(struct bitmap *bitmap)
971 /* We don't actually write all bitmap blocks here, 959 /* We don't actually write all bitmap blocks here,
972 * just flag them as needing to be written 960 * just flag them as needing to be written
973 */ 961 */
962 int i;
974 963
975 unsigned long chunks = bitmap->chunks; 964 for (i=0; i < bitmap->file_pages; i++)
976 unsigned long bytes = (chunks+7)/8 + sizeof(bitmap_super_t); 965 set_page_attr(bitmap, bitmap->filemap[i],
977 unsigned long num_pages = (bytes + PAGE_SIZE-1) / PAGE_SIZE; 966 BITMAP_PAGE_NEEDWRITE);
978 while (num_pages--)
979 bitmap->filemap_attr[num_pages] |= BITMAP_PAGE_NEEDWRITE;
980} 967}
981 968
982 969
@@ -1007,7 +994,6 @@ int bitmap_daemon_work(struct bitmap *bitmap)
1007 struct page *page = NULL, *lastpage = NULL; 994 struct page *page = NULL, *lastpage = NULL;
1008 int err = 0; 995 int err = 0;
1009 int blocks; 996 int blocks;
1010 int attr;
1011 void *paddr; 997 void *paddr;
1012 998
1013 if (bitmap == NULL) 999 if (bitmap == NULL)
@@ -1029,43 +1015,34 @@ int bitmap_daemon_work(struct bitmap *bitmap)
1029 1015
1030 if (page != lastpage) { 1016 if (page != lastpage) {
1031 /* skip this page unless it's marked as needing cleaning */ 1017 /* skip this page unless it's marked as needing cleaning */
1032 if (!((attr=get_page_attr(bitmap, page)) & BITMAP_PAGE_CLEAN)) { 1018 if (!test_page_attr(bitmap, page, BITMAP_PAGE_CLEAN)) {
1033 if (attr & BITMAP_PAGE_NEEDWRITE) { 1019 int need_write = test_page_attr(bitmap, page,
1034 get_page(page); 1020 BITMAP_PAGE_NEEDWRITE);
1021 if (need_write)
1035 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 1022 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
1036 } 1023
1037 spin_unlock_irqrestore(&bitmap->lock, flags); 1024 spin_unlock_irqrestore(&bitmap->lock, flags);
1038 if (attr & BITMAP_PAGE_NEEDWRITE) { 1025 if (need_write) {
1039 switch (write_page(bitmap, page, 0)) { 1026 switch (write_page(bitmap, page, 0)) {
1040 case -EAGAIN:
1041 set_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
1042 break;
1043 case 0: 1027 case 0:
1044 break; 1028 break;
1045 default: 1029 default:
1046 bitmap_file_kick(bitmap); 1030 bitmap_file_kick(bitmap);
1047 } 1031 }
1048 put_page(page);
1049 } 1032 }
1050 continue; 1033 continue;
1051 } 1034 }
1052 1035
1053 /* grab the new page, sync and release the old */ 1036 /* grab the new page, sync and release the old */
1054 get_page(page);
1055 if (lastpage != NULL) { 1037 if (lastpage != NULL) {
1056 if (get_page_attr(bitmap, lastpage) & BITMAP_PAGE_NEEDWRITE) { 1038 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1057 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1039 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1058 spin_unlock_irqrestore(&bitmap->lock, flags); 1040 spin_unlock_irqrestore(&bitmap->lock, flags);
1059 err = write_page(bitmap, lastpage, 0); 1041 err = write_page(bitmap, lastpage, 0);
1060 if (err == -EAGAIN) {
1061 err = 0;
1062 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1063 }
1064 } else { 1042 } else {
1065 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1043 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1066 spin_unlock_irqrestore(&bitmap->lock, flags); 1044 spin_unlock_irqrestore(&bitmap->lock, flags);
1067 } 1045 }
1068 put_page(lastpage);
1069 if (err) 1046 if (err)
1070 bitmap_file_kick(bitmap); 1047 bitmap_file_kick(bitmap);
1071 } else 1048 } else
@@ -1107,131 +1084,19 @@ int bitmap_daemon_work(struct bitmap *bitmap)
1107 /* now sync the final page */ 1084 /* now sync the final page */
1108 if (lastpage != NULL) { 1085 if (lastpage != NULL) {
1109 spin_lock_irqsave(&bitmap->lock, flags); 1086 spin_lock_irqsave(&bitmap->lock, flags);
1110 if (get_page_attr(bitmap, lastpage) &BITMAP_PAGE_NEEDWRITE) { 1087 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1111 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1088 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1112 spin_unlock_irqrestore(&bitmap->lock, flags); 1089 spin_unlock_irqrestore(&bitmap->lock, flags);
1113 err = write_page(bitmap, lastpage, 0); 1090 err = write_page(bitmap, lastpage, 0);
1114 if (err == -EAGAIN) {
1115 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1116 err = 0;
1117 }
1118 } else { 1091 } else {
1119 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1092 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1120 spin_unlock_irqrestore(&bitmap->lock, flags); 1093 spin_unlock_irqrestore(&bitmap->lock, flags);
1121 } 1094 }
1122
1123 put_page(lastpage);
1124 } 1095 }
1125 1096
1126 return err; 1097 return err;
1127} 1098}
1128 1099
1129static void daemon_exit(struct bitmap *bitmap, mdk_thread_t **daemon)
1130{
1131 mdk_thread_t *dmn;
1132 unsigned long flags;
1133
1134 /* if no one is waiting on us, we'll free the md thread struct
1135 * and exit, otherwise we let the waiter clean things up */
1136 spin_lock_irqsave(&bitmap->lock, flags);
1137 if ((dmn = *daemon)) { /* no one is waiting, cleanup and exit */
1138 *daemon = NULL;
1139 spin_unlock_irqrestore(&bitmap->lock, flags);
1140 kfree(dmn);
1141 complete_and_exit(NULL, 0); /* do_exit not exported */
1142 }
1143 spin_unlock_irqrestore(&bitmap->lock, flags);
1144}
1145
1146static void bitmap_writeback_daemon(mddev_t *mddev)
1147{
1148 struct bitmap *bitmap = mddev->bitmap;
1149 struct page *page;
1150 struct page_list *item;
1151 int err = 0;
1152
1153 if (signal_pending(current)) {
1154 printk(KERN_INFO
1155 "%s: bitmap writeback daemon got signal, exiting...\n",
1156 bmname(bitmap));
1157 err = -EINTR;
1158 goto out;
1159 }
1160 if (bitmap == NULL)
1161 /* about to be stopped. */
1162 return;
1163
1164 PRINTK("%s: bitmap writeback daemon woke up...\n", bmname(bitmap));
1165 /* wait on bitmap page writebacks */
1166 while ((item = dequeue_page(bitmap))) {
1167 page = item->page;
1168 mempool_free(item, bitmap->write_pool);
1169 PRINTK("wait on page writeback: %p\n", page);
1170 wait_on_page_writeback(page);
1171 PRINTK("finished page writeback: %p\n", page);
1172
1173 err = PageError(page);
1174 put_page(page);
1175 if (err) {
1176 printk(KERN_WARNING "%s: bitmap file writeback "
1177 "failed (page %lu): %d\n",
1178 bmname(bitmap), page->index, err);
1179 bitmap_file_kick(bitmap);
1180 goto out;
1181 }
1182 }
1183 out:
1184 wake_up(&bitmap->write_wait);
1185 if (err) {
1186 printk(KERN_INFO "%s: bitmap writeback daemon exiting (%d)\n",
1187 bmname(bitmap), err);
1188 daemon_exit(bitmap, &bitmap->writeback_daemon);
1189 }
1190}
1191
1192static mdk_thread_t *bitmap_start_daemon(struct bitmap *bitmap,
1193 void (*func)(mddev_t *), char *name)
1194{
1195 mdk_thread_t *daemon;
1196 char namebuf[32];
1197
1198#ifdef INJECT_FATAL_FAULT_2
1199 daemon = NULL;
1200#else
1201 sprintf(namebuf, "%%s_%s", name);
1202 daemon = md_register_thread(func, bitmap->mddev, namebuf);
1203#endif
1204 if (!daemon) {
1205 printk(KERN_ERR "%s: failed to start bitmap daemon\n",
1206 bmname(bitmap));
1207 return ERR_PTR(-ECHILD);
1208 }
1209
1210 md_wakeup_thread(daemon); /* start it running */
1211
1212 PRINTK("%s: %s daemon (pid %d) started...\n",
1213 bmname(bitmap), name, daemon->tsk->pid);
1214
1215 return daemon;
1216}
1217
1218static void bitmap_stop_daemon(struct bitmap *bitmap)
1219{
1220 /* the daemon can't stop itself... it'll just exit instead... */
1221 if (bitmap->writeback_daemon && ! IS_ERR(bitmap->writeback_daemon) &&
1222 current->pid != bitmap->writeback_daemon->tsk->pid) {
1223 mdk_thread_t *daemon;
1224 unsigned long flags;
1225
1226 spin_lock_irqsave(&bitmap->lock, flags);
1227 daemon = bitmap->writeback_daemon;
1228 bitmap->writeback_daemon = NULL;
1229 spin_unlock_irqrestore(&bitmap->lock, flags);
1230 if (daemon && ! IS_ERR(daemon))
1231 md_unregister_thread(daemon); /* destroy the thread */
1232 }
1233}
1234
1235static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1100static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1236 sector_t offset, int *blocks, 1101 sector_t offset, int *blocks,
1237 int create) 1102 int create)
@@ -1500,8 +1365,6 @@ static void bitmap_free(struct bitmap *bitmap)
1500 1365
1501 /* free all allocated memory */ 1366 /* free all allocated memory */
1502 1367
1503 mempool_destroy(bitmap->write_pool);
1504
1505 if (bp) /* deallocate the page memory */ 1368 if (bp) /* deallocate the page memory */
1506 for (k = 0; k < pages; k++) 1369 for (k = 0; k < pages; k++)
1507 if (bp[k].map && !bp[k].hijacked) 1370 if (bp[k].map && !bp[k].hijacked)
@@ -1549,20 +1412,20 @@ int bitmap_create(mddev_t *mddev)
1549 return -ENOMEM; 1412 return -ENOMEM;
1550 1413
1551 spin_lock_init(&bitmap->lock); 1414 spin_lock_init(&bitmap->lock);
1552 bitmap->mddev = mddev; 1415 atomic_set(&bitmap->pending_writes, 0);
1553
1554 spin_lock_init(&bitmap->write_lock);
1555 INIT_LIST_HEAD(&bitmap->complete_pages);
1556 init_waitqueue_head(&bitmap->write_wait); 1416 init_waitqueue_head(&bitmap->write_wait);
1557 bitmap->write_pool = mempool_create_kmalloc_pool(WRITE_POOL_SIZE, 1417
1558 sizeof(struct page_list)); 1418 bitmap->mddev = mddev;
1559 err = -ENOMEM;
1560 if (!bitmap->write_pool)
1561 goto error;
1562 1419
1563 bitmap->file = file; 1420 bitmap->file = file;
1564 bitmap->offset = mddev->bitmap_offset; 1421 bitmap->offset = mddev->bitmap_offset;
1565 if (file) get_file(file); 1422 if (file) {
1423 get_file(file);
1424 do_sync_file_range(file, 0, LLONG_MAX,
1425 SYNC_FILE_RANGE_WAIT_BEFORE |
1426 SYNC_FILE_RANGE_WRITE |
1427 SYNC_FILE_RANGE_WAIT_AFTER);
1428 }
1566 /* read superblock from bitmap file (this sets bitmap->chunksize) */ 1429 /* read superblock from bitmap file (this sets bitmap->chunksize) */
1567 err = bitmap_read_sb(bitmap); 1430 err = bitmap_read_sb(bitmap);
1568 if (err) 1431 if (err)
@@ -1594,8 +1457,6 @@ int bitmap_create(mddev_t *mddev)
1594 if (!bitmap->bp) 1457 if (!bitmap->bp)
1595 goto error; 1458 goto error;
1596 1459
1597 bitmap->flags |= BITMAP_ACTIVE;
1598
1599 /* now that we have some pages available, initialize the in-memory 1460 /* now that we have some pages available, initialize the in-memory
1600 * bitmap from the on-disk bitmap */ 1461 * bitmap from the on-disk bitmap */
1601 start = 0; 1462 start = 0;
@@ -1613,15 +1474,6 @@ int bitmap_create(mddev_t *mddev)
1613 1474
1614 mddev->bitmap = bitmap; 1475 mddev->bitmap = bitmap;
1615 1476
1616 if (file)
1617 /* kick off the bitmap writeback daemon */
1618 bitmap->writeback_daemon =
1619 bitmap_start_daemon(bitmap,
1620 bitmap_writeback_daemon,
1621 "bitmap_wb");
1622
1623 if (IS_ERR(bitmap->writeback_daemon))
1624 return PTR_ERR(bitmap->writeback_daemon);
1625 mddev->thread->timeout = bitmap->daemon_sleep * HZ; 1477 mddev->thread->timeout = bitmap->daemon_sleep * HZ;
1626 1478
1627 return bitmap_update_sb(bitmap); 1479 return bitmap_update_sb(bitmap);
@@ -1638,4 +1490,3 @@ EXPORT_SYMBOL(bitmap_start_sync);
1638EXPORT_SYMBOL(bitmap_end_sync); 1490EXPORT_SYMBOL(bitmap_end_sync);
1639EXPORT_SYMBOL(bitmap_unplug); 1491EXPORT_SYMBOL(bitmap_unplug);
1640EXPORT_SYMBOL(bitmap_close_sync); 1492EXPORT_SYMBOL(bitmap_close_sync);
1641EXPORT_SYMBOL(bitmap_daemon_work);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 61a590bb6241..6022ed12a795 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -20,7 +20,7 @@
20 20
21#include "dm.h" 21#include "dm.h"
22 22
23#define PFX "crypt: " 23#define DM_MSG_PREFIX "crypt"
24 24
25/* 25/*
26 * per bio private data 26 * per bio private data
@@ -125,19 +125,19 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
125 u8 *salt; 125 u8 *salt;
126 126
127 if (opts == NULL) { 127 if (opts == NULL) {
128 ti->error = PFX "Digest algorithm missing for ESSIV mode"; 128 ti->error = "Digest algorithm missing for ESSIV mode";
129 return -EINVAL; 129 return -EINVAL;
130 } 130 }
131 131
132 /* Hash the cipher key with the given hash algorithm */ 132 /* Hash the cipher key with the given hash algorithm */
133 hash_tfm = crypto_alloc_tfm(opts, CRYPTO_TFM_REQ_MAY_SLEEP); 133 hash_tfm = crypto_alloc_tfm(opts, CRYPTO_TFM_REQ_MAY_SLEEP);
134 if (hash_tfm == NULL) { 134 if (hash_tfm == NULL) {
135 ti->error = PFX "Error initializing ESSIV hash"; 135 ti->error = "Error initializing ESSIV hash";
136 return -EINVAL; 136 return -EINVAL;
137 } 137 }
138 138
139 if (crypto_tfm_alg_type(hash_tfm) != CRYPTO_ALG_TYPE_DIGEST) { 139 if (crypto_tfm_alg_type(hash_tfm) != CRYPTO_ALG_TYPE_DIGEST) {
140 ti->error = PFX "Expected digest algorithm for ESSIV hash"; 140 ti->error = "Expected digest algorithm for ESSIV hash";
141 crypto_free_tfm(hash_tfm); 141 crypto_free_tfm(hash_tfm);
142 return -EINVAL; 142 return -EINVAL;
143 } 143 }
@@ -145,7 +145,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
145 saltsize = crypto_tfm_alg_digestsize(hash_tfm); 145 saltsize = crypto_tfm_alg_digestsize(hash_tfm);
146 salt = kmalloc(saltsize, GFP_KERNEL); 146 salt = kmalloc(saltsize, GFP_KERNEL);
147 if (salt == NULL) { 147 if (salt == NULL) {
148 ti->error = PFX "Error kmallocing salt storage in ESSIV"; 148 ti->error = "Error kmallocing salt storage in ESSIV";
149 crypto_free_tfm(hash_tfm); 149 crypto_free_tfm(hash_tfm);
150 return -ENOMEM; 150 return -ENOMEM;
151 } 151 }
@@ -159,20 +159,20 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
159 CRYPTO_TFM_MODE_ECB | 159 CRYPTO_TFM_MODE_ECB |
160 CRYPTO_TFM_REQ_MAY_SLEEP); 160 CRYPTO_TFM_REQ_MAY_SLEEP);
161 if (essiv_tfm == NULL) { 161 if (essiv_tfm == NULL) {
162 ti->error = PFX "Error allocating crypto tfm for ESSIV"; 162 ti->error = "Error allocating crypto tfm for ESSIV";
163 kfree(salt); 163 kfree(salt);
164 return -EINVAL; 164 return -EINVAL;
165 } 165 }
166 if (crypto_tfm_alg_blocksize(essiv_tfm) 166 if (crypto_tfm_alg_blocksize(essiv_tfm)
167 != crypto_tfm_alg_ivsize(cc->tfm)) { 167 != crypto_tfm_alg_ivsize(cc->tfm)) {
168 ti->error = PFX "Block size of ESSIV cipher does " 168 ti->error = "Block size of ESSIV cipher does "
169 "not match IV size of block cipher"; 169 "not match IV size of block cipher";
170 crypto_free_tfm(essiv_tfm); 170 crypto_free_tfm(essiv_tfm);
171 kfree(salt); 171 kfree(salt);
172 return -EINVAL; 172 return -EINVAL;
173 } 173 }
174 if (crypto_cipher_setkey(essiv_tfm, salt, saltsize) < 0) { 174 if (crypto_cipher_setkey(essiv_tfm, salt, saltsize) < 0) {
175 ti->error = PFX "Failed to set key for ESSIV cipher"; 175 ti->error = "Failed to set key for ESSIV cipher";
176 crypto_free_tfm(essiv_tfm); 176 crypto_free_tfm(essiv_tfm);
177 kfree(salt); 177 kfree(salt);
178 return -EINVAL; 178 return -EINVAL;
@@ -521,7 +521,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
521 unsigned long long tmpll; 521 unsigned long long tmpll;
522 522
523 if (argc != 5) { 523 if (argc != 5) {
524 ti->error = PFX "Not enough arguments"; 524 ti->error = "Not enough arguments";
525 return -EINVAL; 525 return -EINVAL;
526 } 526 }
527 527
@@ -532,21 +532,21 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
532 ivmode = strsep(&ivopts, ":"); 532 ivmode = strsep(&ivopts, ":");
533 533
534 if (tmp) 534 if (tmp)
535 DMWARN(PFX "Unexpected additional cipher options"); 535 DMWARN("Unexpected additional cipher options");
536 536
537 key_size = strlen(argv[1]) >> 1; 537 key_size = strlen(argv[1]) >> 1;
538 538
539 cc = kmalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL); 539 cc = kmalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
540 if (cc == NULL) { 540 if (cc == NULL) {
541 ti->error = 541 ti->error =
542 PFX "Cannot allocate transparent encryption context"; 542 "Cannot allocate transparent encryption context";
543 return -ENOMEM; 543 return -ENOMEM;
544 } 544 }
545 545
546 cc->key_size = key_size; 546 cc->key_size = key_size;
547 if ((!key_size && strcmp(argv[1], "-") != 0) || 547 if ((!key_size && strcmp(argv[1], "-") != 0) ||
548 (key_size && crypt_decode_key(cc->key, argv[1], key_size) < 0)) { 548 (key_size && crypt_decode_key(cc->key, argv[1], key_size) < 0)) {
549 ti->error = PFX "Error decoding key"; 549 ti->error = "Error decoding key";
550 goto bad1; 550 goto bad1;
551 } 551 }
552 552
@@ -562,22 +562,22 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
562 else if (strcmp(chainmode, "ecb") == 0) 562 else if (strcmp(chainmode, "ecb") == 0)
563 crypto_flags = CRYPTO_TFM_MODE_ECB; 563 crypto_flags = CRYPTO_TFM_MODE_ECB;
564 else { 564 else {
565 ti->error = PFX "Unknown chaining mode"; 565 ti->error = "Unknown chaining mode";
566 goto bad1; 566 goto bad1;
567 } 567 }
568 568
569 if (crypto_flags != CRYPTO_TFM_MODE_ECB && !ivmode) { 569 if (crypto_flags != CRYPTO_TFM_MODE_ECB && !ivmode) {
570 ti->error = PFX "This chaining mode requires an IV mechanism"; 570 ti->error = "This chaining mode requires an IV mechanism";
571 goto bad1; 571 goto bad1;
572 } 572 }
573 573
574 tfm = crypto_alloc_tfm(cipher, crypto_flags | CRYPTO_TFM_REQ_MAY_SLEEP); 574 tfm = crypto_alloc_tfm(cipher, crypto_flags | CRYPTO_TFM_REQ_MAY_SLEEP);
575 if (!tfm) { 575 if (!tfm) {
576 ti->error = PFX "Error allocating crypto tfm"; 576 ti->error = "Error allocating crypto tfm";
577 goto bad1; 577 goto bad1;
578 } 578 }
579 if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER) { 579 if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER) {
580 ti->error = PFX "Expected cipher algorithm"; 580 ti->error = "Expected cipher algorithm";
581 goto bad2; 581 goto bad2;
582 } 582 }
583 583
@@ -595,7 +595,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
595 else if (strcmp(ivmode, "essiv") == 0) 595 else if (strcmp(ivmode, "essiv") == 0)
596 cc->iv_gen_ops = &crypt_iv_essiv_ops; 596 cc->iv_gen_ops = &crypt_iv_essiv_ops;
597 else { 597 else {
598 ti->error = PFX "Invalid IV mode"; 598 ti->error = "Invalid IV mode";
599 goto bad2; 599 goto bad2;
600 } 600 }
601 601
@@ -610,7 +610,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
610 else { 610 else {
611 cc->iv_size = 0; 611 cc->iv_size = 0;
612 if (cc->iv_gen_ops) { 612 if (cc->iv_gen_ops) {
613 DMWARN(PFX "Selected cipher does not support IVs"); 613 DMWARN("Selected cipher does not support IVs");
614 if (cc->iv_gen_ops->dtr) 614 if (cc->iv_gen_ops->dtr)
615 cc->iv_gen_ops->dtr(cc); 615 cc->iv_gen_ops->dtr(cc);
616 cc->iv_gen_ops = NULL; 616 cc->iv_gen_ops = NULL;
@@ -619,36 +619,36 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
619 619
620 cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); 620 cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
621 if (!cc->io_pool) { 621 if (!cc->io_pool) {
622 ti->error = PFX "Cannot allocate crypt io mempool"; 622 ti->error = "Cannot allocate crypt io mempool";
623 goto bad3; 623 goto bad3;
624 } 624 }
625 625
626 cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); 626 cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
627 if (!cc->page_pool) { 627 if (!cc->page_pool) {
628 ti->error = PFX "Cannot allocate page mempool"; 628 ti->error = "Cannot allocate page mempool";
629 goto bad4; 629 goto bad4;
630 } 630 }
631 631
632 if (tfm->crt_cipher.cit_setkey(tfm, cc->key, key_size) < 0) { 632 if (tfm->crt_cipher.cit_setkey(tfm, cc->key, key_size) < 0) {
633 ti->error = PFX "Error setting key"; 633 ti->error = "Error setting key";
634 goto bad5; 634 goto bad5;
635 } 635 }
636 636
637 if (sscanf(argv[2], "%llu", &tmpll) != 1) { 637 if (sscanf(argv[2], "%llu", &tmpll) != 1) {
638 ti->error = PFX "Invalid iv_offset sector"; 638 ti->error = "Invalid iv_offset sector";
639 goto bad5; 639 goto bad5;
640 } 640 }
641 cc->iv_offset = tmpll; 641 cc->iv_offset = tmpll;
642 642
643 if (sscanf(argv[4], "%llu", &tmpll) != 1) { 643 if (sscanf(argv[4], "%llu", &tmpll) != 1) {
644 ti->error = PFX "Invalid device sector"; 644 ti->error = "Invalid device sector";
645 goto bad5; 645 goto bad5;
646 } 646 }
647 cc->start = tmpll; 647 cc->start = tmpll;
648 648
649 if (dm_get_device(ti, argv[3], cc->start, ti->len, 649 if (dm_get_device(ti, argv[3], cc->start, ti->len,
650 dm_table_get_mode(ti->table), &cc->dev)) { 650 dm_table_get_mode(ti->table), &cc->dev)) {
651 ti->error = PFX "Device lookup failed"; 651 ti->error = "Device lookup failed";
652 goto bad5; 652 goto bad5;
653 } 653 }
654 654
@@ -657,7 +657,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
657 *(ivopts - 1) = ':'; 657 *(ivopts - 1) = ':';
658 cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL); 658 cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL);
659 if (!cc->iv_mode) { 659 if (!cc->iv_mode) {
660 ti->error = PFX "Error kmallocing iv_mode string"; 660 ti->error = "Error kmallocing iv_mode string";
661 goto bad5; 661 goto bad5;
662 } 662 }
663 strcpy(cc->iv_mode, ivmode); 663 strcpy(cc->iv_mode, ivmode);
@@ -918,13 +918,13 @@ static int __init dm_crypt_init(void)
918 _kcryptd_workqueue = create_workqueue("kcryptd"); 918 _kcryptd_workqueue = create_workqueue("kcryptd");
919 if (!_kcryptd_workqueue) { 919 if (!_kcryptd_workqueue) {
920 r = -ENOMEM; 920 r = -ENOMEM;
921 DMERR(PFX "couldn't create kcryptd"); 921 DMERR("couldn't create kcryptd");
922 goto bad1; 922 goto bad1;
923 } 923 }
924 924
925 r = dm_register_target(&crypt_target); 925 r = dm_register_target(&crypt_target);
926 if (r < 0) { 926 if (r < 0) {
927 DMERR(PFX "register failed %d", r); 927 DMERR("register failed %d", r);
928 goto bad2; 928 goto bad2;
929 } 929 }
930 930
@@ -942,7 +942,7 @@ static void __exit dm_crypt_exit(void)
942 int r = dm_unregister_target(&crypt_target); 942 int r = dm_unregister_target(&crypt_target);
943 943
944 if (r < 0) 944 if (r < 0)
945 DMERR(PFX "unregister failed %d", r); 945 DMERR("unregister failed %d", r);
946 946
947 destroy_workqueue(_kcryptd_workqueue); 947 destroy_workqueue(_kcryptd_workqueue);
948 kmem_cache_destroy(_crypt_io_pool); 948 kmem_cache_destroy(_crypt_io_pool);
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index c7067674dcb7..2a374ccb30dd 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -12,6 +12,8 @@
12#include <scsi/scsi.h> 12#include <scsi/scsi.h>
13#include <scsi/scsi_cmnd.h> 13#include <scsi/scsi_cmnd.h>
14 14
15#define DM_MSG_PREFIX "multipath emc"
16
15struct emc_handler { 17struct emc_handler {
16 spinlock_t lock; 18 spinlock_t lock;
17 19
@@ -66,7 +68,7 @@ static struct bio *get_failover_bio(struct path *path, unsigned data_size)
66 68
67 bio = bio_alloc(GFP_ATOMIC, 1); 69 bio = bio_alloc(GFP_ATOMIC, 1);
68 if (!bio) { 70 if (!bio) {
69 DMERR("dm-emc: get_failover_bio: bio_alloc() failed."); 71 DMERR("get_failover_bio: bio_alloc() failed.");
70 return NULL; 72 return NULL;
71 } 73 }
72 74
@@ -78,13 +80,13 @@ static struct bio *get_failover_bio(struct path *path, unsigned data_size)
78 80
79 page = alloc_page(GFP_ATOMIC); 81 page = alloc_page(GFP_ATOMIC);
80 if (!page) { 82 if (!page) {
81 DMERR("dm-emc: get_failover_bio: alloc_page() failed."); 83 DMERR("get_failover_bio: alloc_page() failed.");
82 bio_put(bio); 84 bio_put(bio);
83 return NULL; 85 return NULL;
84 } 86 }
85 87
86 if (bio_add_page(bio, page, data_size, 0) != data_size) { 88 if (bio_add_page(bio, page, data_size, 0) != data_size) {
87 DMERR("dm-emc: get_failover_bio: alloc_page() failed."); 89 DMERR("get_failover_bio: alloc_page() failed.");
88 __free_page(page); 90 __free_page(page);
89 bio_put(bio); 91 bio_put(bio);
90 return NULL; 92 return NULL;
@@ -103,7 +105,7 @@ static struct request *get_failover_req(struct emc_handler *h,
103 /* FIXME: Figure out why it fails with GFP_ATOMIC. */ 105 /* FIXME: Figure out why it fails with GFP_ATOMIC. */
104 rq = blk_get_request(q, WRITE, __GFP_WAIT); 106 rq = blk_get_request(q, WRITE, __GFP_WAIT);
105 if (!rq) { 107 if (!rq) {
106 DMERR("dm-emc: get_failover_req: blk_get_request failed"); 108 DMERR("get_failover_req: blk_get_request failed");
107 return NULL; 109 return NULL;
108 } 110 }
109 111
@@ -160,7 +162,7 @@ static struct request *emc_trespass_get(struct emc_handler *h,
160 162
161 bio = get_failover_bio(path, data_size); 163 bio = get_failover_bio(path, data_size);
162 if (!bio) { 164 if (!bio) {
163 DMERR("dm-emc: emc_trespass_get: no bio"); 165 DMERR("emc_trespass_get: no bio");
164 return NULL; 166 return NULL;
165 } 167 }
166 168
@@ -173,7 +175,7 @@ static struct request *emc_trespass_get(struct emc_handler *h,
173 /* get request for block layer packet command */ 175 /* get request for block layer packet command */
174 rq = get_failover_req(h, bio, path); 176 rq = get_failover_req(h, bio, path);
175 if (!rq) { 177 if (!rq) {
176 DMERR("dm-emc: emc_trespass_get: no rq"); 178 DMERR("emc_trespass_get: no rq");
177 free_bio(bio); 179 free_bio(bio);
178 return NULL; 180 return NULL;
179 } 181 }
@@ -200,18 +202,18 @@ static void emc_pg_init(struct hw_handler *hwh, unsigned bypassed,
200 * initial state passed into us and then get an update here. 202 * initial state passed into us and then get an update here.
201 */ 203 */
202 if (!q) { 204 if (!q) {
203 DMINFO("dm-emc: emc_pg_init: no queue"); 205 DMINFO("emc_pg_init: no queue");
204 goto fail_path; 206 goto fail_path;
205 } 207 }
206 208
207 /* FIXME: The request should be pre-allocated. */ 209 /* FIXME: The request should be pre-allocated. */
208 rq = emc_trespass_get(hwh->context, path); 210 rq = emc_trespass_get(hwh->context, path);
209 if (!rq) { 211 if (!rq) {
210 DMERR("dm-emc: emc_pg_init: no rq"); 212 DMERR("emc_pg_init: no rq");
211 goto fail_path; 213 goto fail_path;
212 } 214 }
213 215
214 DMINFO("dm-emc: emc_pg_init: sending switch-over command"); 216 DMINFO("emc_pg_init: sending switch-over command");
215 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1); 217 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
216 return; 218 return;
217 219
@@ -241,18 +243,18 @@ static int emc_create(struct hw_handler *hwh, unsigned argc, char **argv)
241 hr = 0; 243 hr = 0;
242 short_trespass = 0; 244 short_trespass = 0;
243 } else if (argc != 2) { 245 } else if (argc != 2) {
244 DMWARN("dm-emc hwhandler: incorrect number of arguments"); 246 DMWARN("incorrect number of arguments");
245 return -EINVAL; 247 return -EINVAL;
246 } else { 248 } else {
247 if ((sscanf(argv[0], "%u", &short_trespass) != 1) 249 if ((sscanf(argv[0], "%u", &short_trespass) != 1)
248 || (short_trespass > 1)) { 250 || (short_trespass > 1)) {
249 DMWARN("dm-emc: invalid trespass mode selected"); 251 DMWARN("invalid trespass mode selected");
250 return -EINVAL; 252 return -EINVAL;
251 } 253 }
252 254
253 if ((sscanf(argv[1], "%u", &hr) != 1) 255 if ((sscanf(argv[1], "%u", &hr) != 1)
254 || (hr > 1)) { 256 || (hr > 1)) {
255 DMWARN("dm-emc: invalid honor reservation flag selected"); 257 DMWARN("invalid honor reservation flag selected");
256 return -EINVAL; 258 return -EINVAL;
257 } 259 }
258 } 260 }
@@ -264,14 +266,14 @@ static int emc_create(struct hw_handler *hwh, unsigned argc, char **argv)
264 hwh->context = h; 266 hwh->context = h;
265 267
266 if ((h->short_trespass = short_trespass)) 268 if ((h->short_trespass = short_trespass))
267 DMWARN("dm-emc: short trespass command will be send"); 269 DMWARN("short trespass command will be send");
268 else 270 else
269 DMWARN("dm-emc: long trespass command will be send"); 271 DMWARN("long trespass command will be send");
270 272
271 if ((h->hr = hr)) 273 if ((h->hr = hr))
272 DMWARN("dm-emc: honor reservation bit will be set"); 274 DMWARN("honor reservation bit will be set");
273 else 275 else
274 DMWARN("dm-emc: honor reservation bit will not be set (default)"); 276 DMWARN("honor reservation bit will not be set (default)");
275 277
276 return 0; 278 return 0;
277} 279}
@@ -336,9 +338,9 @@ static int __init dm_emc_init(void)
336 int r = dm_register_hw_handler(&emc_hwh); 338 int r = dm_register_hw_handler(&emc_hwh);
337 339
338 if (r < 0) 340 if (r < 0)
339 DMERR("emc: register failed %d", r); 341 DMERR("register failed %d", r);
340 342
341 DMINFO("dm-emc version 0.0.3 loaded"); 343 DMINFO("version 0.0.3 loaded");
342 344
343 return r; 345 return r;
344} 346}
@@ -348,7 +350,7 @@ static void __exit dm_emc_exit(void)
348 int r = dm_unregister_hw_handler(&emc_hwh); 350 int r = dm_unregister_hw_handler(&emc_hwh);
349 351
350 if (r < 0) 352 if (r < 0)
351 DMERR("emc: unregister failed %d", r); 353 DMERR("unregister failed %d", r);
352} 354}
353 355
354module_init(dm_emc_init); 356module_init(dm_emc_init);
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index cc07bbebbb16..d12379b5cdb5 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -16,6 +16,8 @@
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18 18
19#define DM_MSG_PREFIX "snapshots"
20
19/*----------------------------------------------------------------- 21/*-----------------------------------------------------------------
20 * Persistent snapshots, by persistent we mean that the snapshot 22 * Persistent snapshots, by persistent we mean that the snapshot
21 * will survive a reboot. 23 * will survive a reboot.
@@ -91,7 +93,6 @@ struct pstore {
91 struct dm_snapshot *snap; /* up pointer to my snapshot */ 93 struct dm_snapshot *snap; /* up pointer to my snapshot */
92 int version; 94 int version;
93 int valid; 95 int valid;
94 uint32_t chunk_size;
95 uint32_t exceptions_per_area; 96 uint32_t exceptions_per_area;
96 97
97 /* 98 /*
@@ -133,7 +134,7 @@ static int alloc_area(struct pstore *ps)
133 int r = -ENOMEM; 134 int r = -ENOMEM;
134 size_t len; 135 size_t len;
135 136
136 len = ps->chunk_size << SECTOR_SHIFT; 137 len = ps->snap->chunk_size << SECTOR_SHIFT;
137 138
138 /* 139 /*
139 * Allocate the chunk_size block of memory that will hold 140 * Allocate the chunk_size block of memory that will hold
@@ -160,8 +161,8 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw)
160 unsigned long bits; 161 unsigned long bits;
161 162
162 where.bdev = ps->snap->cow->bdev; 163 where.bdev = ps->snap->cow->bdev;
163 where.sector = ps->chunk_size * chunk; 164 where.sector = ps->snap->chunk_size * chunk;
164 where.count = ps->chunk_size; 165 where.count = ps->snap->chunk_size;
165 166
166 return dm_io_sync_vm(1, &where, rw, ps->area, &bits); 167 return dm_io_sync_vm(1, &where, rw, ps->area, &bits);
167} 168}
@@ -188,7 +189,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw)
188 189
189static int zero_area(struct pstore *ps, uint32_t area) 190static int zero_area(struct pstore *ps, uint32_t area)
190{ 191{
191 memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); 192 memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
192 return area_io(ps, area, WRITE); 193 return area_io(ps, area, WRITE);
193} 194}
194 195
@@ -196,6 +197,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
196{ 197{
197 int r; 198 int r;
198 struct disk_header *dh; 199 struct disk_header *dh;
200 chunk_t chunk_size;
199 201
200 r = chunk_io(ps, 0, READ); 202 r = chunk_io(ps, 0, READ);
201 if (r) 203 if (r)
@@ -210,8 +212,29 @@ static int read_header(struct pstore *ps, int *new_snapshot)
210 *new_snapshot = 0; 212 *new_snapshot = 0;
211 ps->valid = le32_to_cpu(dh->valid); 213 ps->valid = le32_to_cpu(dh->valid);
212 ps->version = le32_to_cpu(dh->version); 214 ps->version = le32_to_cpu(dh->version);
213 ps->chunk_size = le32_to_cpu(dh->chunk_size); 215 chunk_size = le32_to_cpu(dh->chunk_size);
214 216 if (ps->snap->chunk_size != chunk_size) {
217 DMWARN("chunk size %llu in device metadata overrides "
218 "table chunk size of %llu.",
219 (unsigned long long)chunk_size,
220 (unsigned long long)ps->snap->chunk_size);
221
222 /* We had a bogus chunk_size. Fix stuff up. */
223 dm_io_put(sectors_to_pages(ps->snap->chunk_size));
224 free_area(ps);
225
226 ps->snap->chunk_size = chunk_size;
227 ps->snap->chunk_mask = chunk_size - 1;
228 ps->snap->chunk_shift = ffs(chunk_size) - 1;
229
230 r = alloc_area(ps);
231 if (r)
232 return r;
233
234 r = dm_io_get(sectors_to_pages(chunk_size));
235 if (r)
236 return r;
237 }
215 } else { 238 } else {
216 DMWARN("Invalid/corrupt snapshot"); 239 DMWARN("Invalid/corrupt snapshot");
217 r = -ENXIO; 240 r = -ENXIO;
@@ -224,13 +247,13 @@ static int write_header(struct pstore *ps)
224{ 247{
225 struct disk_header *dh; 248 struct disk_header *dh;
226 249
227 memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); 250 memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
228 251
229 dh = (struct disk_header *) ps->area; 252 dh = (struct disk_header *) ps->area;
230 dh->magic = cpu_to_le32(SNAP_MAGIC); 253 dh->magic = cpu_to_le32(SNAP_MAGIC);
231 dh->valid = cpu_to_le32(ps->valid); 254 dh->valid = cpu_to_le32(ps->valid);
232 dh->version = cpu_to_le32(ps->version); 255 dh->version = cpu_to_le32(ps->version);
233 dh->chunk_size = cpu_to_le32(ps->chunk_size); 256 dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
234 257
235 return chunk_io(ps, 0, WRITE); 258 return chunk_io(ps, 0, WRITE);
236} 259}
@@ -365,7 +388,7 @@ static void persistent_destroy(struct exception_store *store)
365{ 388{
366 struct pstore *ps = get_info(store); 389 struct pstore *ps = get_info(store);
367 390
368 dm_io_put(sectors_to_pages(ps->chunk_size)); 391 dm_io_put(sectors_to_pages(ps->snap->chunk_size));
369 vfree(ps->callbacks); 392 vfree(ps->callbacks);
370 free_area(ps); 393 free_area(ps);
371 kfree(ps); 394 kfree(ps);
@@ -384,6 +407,16 @@ static int persistent_read_metadata(struct exception_store *store)
384 return r; 407 return r;
385 408
386 /* 409 /*
410 * Now we know correct chunk_size, complete the initialisation.
411 */
412 ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
413 sizeof(struct disk_exception);
414 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
415 sizeof(*ps->callbacks));
416 if (!ps->callbacks)
417 return -ENOMEM;
418
419 /*
387 * Do we need to setup a new snapshot ? 420 * Do we need to setup a new snapshot ?
388 */ 421 */
389 if (new_snapshot) { 422 if (new_snapshot) {
@@ -533,9 +566,6 @@ int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
533 ps->snap = store->snap; 566 ps->snap = store->snap;
534 ps->valid = 1; 567 ps->valid = 1;
535 ps->version = SNAPSHOT_DISK_VERSION; 568 ps->version = SNAPSHOT_DISK_VERSION;
536 ps->chunk_size = chunk_size;
537 ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
538 sizeof(struct disk_exception);
539 ps->next_free = 2; /* skipping the header and first area */ 569 ps->next_free = 2; /* skipping the header and first area */
540 ps->current_committed = 0; 570 ps->current_committed = 0;
541 571
@@ -543,18 +573,9 @@ int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
543 if (r) 573 if (r)
544 goto bad; 574 goto bad;
545 575
546 /*
547 * Allocate space for all the callbacks.
548 */
549 ps->callback_count = 0; 576 ps->callback_count = 0;
550 atomic_set(&ps->pending_count, 0); 577 atomic_set(&ps->pending_count, 0);
551 ps->callbacks = dm_vcalloc(ps->exceptions_per_area, 578 ps->callbacks = NULL;
552 sizeof(*ps->callbacks));
553
554 if (!ps->callbacks) {
555 r = -ENOMEM;
556 goto bad;
557 }
558 579
559 store->destroy = persistent_destroy; 580 store->destroy = persistent_destroy;
560 store->read_metadata = persistent_read_metadata; 581 store->read_metadata = persistent_read_metadata;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 8edd6435414d..3edb3477f987 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
3 * Copyright (C) 2004 - 2005 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004 - 2006 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
@@ -19,6 +19,7 @@
19 19
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21 21
22#define DM_MSG_PREFIX "ioctl"
22#define DM_DRIVER_EMAIL "dm-devel@redhat.com" 23#define DM_DRIVER_EMAIL "dm-devel@redhat.com"
23 24
24/*----------------------------------------------------------------- 25/*-----------------------------------------------------------------
@@ -48,7 +49,7 @@ struct vers_iter {
48static struct list_head _name_buckets[NUM_BUCKETS]; 49static struct list_head _name_buckets[NUM_BUCKETS];
49static struct list_head _uuid_buckets[NUM_BUCKETS]; 50static struct list_head _uuid_buckets[NUM_BUCKETS];
50 51
51static void dm_hash_remove_all(void); 52static void dm_hash_remove_all(int keep_open_devices);
52 53
53/* 54/*
54 * Guards access to both hash tables. 55 * Guards access to both hash tables.
@@ -73,7 +74,7 @@ static int dm_hash_init(void)
73 74
74static void dm_hash_exit(void) 75static void dm_hash_exit(void)
75{ 76{
76 dm_hash_remove_all(); 77 dm_hash_remove_all(0);
77 devfs_remove(DM_DIR); 78 devfs_remove(DM_DIR);
78} 79}
79 80
@@ -102,8 +103,10 @@ static struct hash_cell *__get_name_cell(const char *str)
102 unsigned int h = hash_str(str); 103 unsigned int h = hash_str(str);
103 104
104 list_for_each_entry (hc, _name_buckets + h, name_list) 105 list_for_each_entry (hc, _name_buckets + h, name_list)
105 if (!strcmp(hc->name, str)) 106 if (!strcmp(hc->name, str)) {
107 dm_get(hc->md);
106 return hc; 108 return hc;
109 }
107 110
108 return NULL; 111 return NULL;
109} 112}
@@ -114,8 +117,10 @@ static struct hash_cell *__get_uuid_cell(const char *str)
114 unsigned int h = hash_str(str); 117 unsigned int h = hash_str(str);
115 118
116 list_for_each_entry (hc, _uuid_buckets + h, uuid_list) 119 list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
117 if (!strcmp(hc->uuid, str)) 120 if (!strcmp(hc->uuid, str)) {
121 dm_get(hc->md);
118 return hc; 122 return hc;
123 }
119 124
120 return NULL; 125 return NULL;
121} 126}
@@ -191,7 +196,7 @@ static int unregister_with_devfs(struct hash_cell *hc)
191 */ 196 */
192static int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md) 197static int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
193{ 198{
194 struct hash_cell *cell; 199 struct hash_cell *cell, *hc;
195 200
196 /* 201 /*
197 * Allocate the new cells. 202 * Allocate the new cells.
@@ -204,14 +209,19 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
204 * Insert the cell into both hash tables. 209 * Insert the cell into both hash tables.
205 */ 210 */
206 down_write(&_hash_lock); 211 down_write(&_hash_lock);
207 if (__get_name_cell(name)) 212 hc = __get_name_cell(name);
213 if (hc) {
214 dm_put(hc->md);
208 goto bad; 215 goto bad;
216 }
209 217
210 list_add(&cell->name_list, _name_buckets + hash_str(name)); 218 list_add(&cell->name_list, _name_buckets + hash_str(name));
211 219
212 if (uuid) { 220 if (uuid) {
213 if (__get_uuid_cell(uuid)) { 221 hc = __get_uuid_cell(uuid);
222 if (hc) {
214 list_del(&cell->name_list); 223 list_del(&cell->name_list);
224 dm_put(hc->md);
215 goto bad; 225 goto bad;
216 } 226 }
217 list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid)); 227 list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
@@ -251,19 +261,41 @@ static void __hash_remove(struct hash_cell *hc)
251 free_cell(hc); 261 free_cell(hc);
252} 262}
253 263
254static void dm_hash_remove_all(void) 264static void dm_hash_remove_all(int keep_open_devices)
255{ 265{
256 int i; 266 int i, dev_skipped, dev_removed;
257 struct hash_cell *hc; 267 struct hash_cell *hc;
258 struct list_head *tmp, *n; 268 struct list_head *tmp, *n;
259 269
260 down_write(&_hash_lock); 270 down_write(&_hash_lock);
271
272retry:
273 dev_skipped = dev_removed = 0;
261 for (i = 0; i < NUM_BUCKETS; i++) { 274 for (i = 0; i < NUM_BUCKETS; i++) {
262 list_for_each_safe (tmp, n, _name_buckets + i) { 275 list_for_each_safe (tmp, n, _name_buckets + i) {
263 hc = list_entry(tmp, struct hash_cell, name_list); 276 hc = list_entry(tmp, struct hash_cell, name_list);
277
278 if (keep_open_devices &&
279 dm_lock_for_deletion(hc->md)) {
280 dev_skipped++;
281 continue;
282 }
264 __hash_remove(hc); 283 __hash_remove(hc);
284 dev_removed = 1;
265 } 285 }
266 } 286 }
287
288 /*
289 * Some mapped devices may be using other mapped devices, so if any
290 * still exist, repeat until we make no further progress.
291 */
292 if (dev_skipped) {
293 if (dev_removed)
294 goto retry;
295
296 DMWARN("remove_all left %d open device(s)", dev_skipped);
297 }
298
267 up_write(&_hash_lock); 299 up_write(&_hash_lock);
268} 300}
269 301
@@ -289,6 +321,7 @@ static int dm_hash_rename(const char *old, const char *new)
289 if (hc) { 321 if (hc) {
290 DMWARN("asked to rename to an already existing name %s -> %s", 322 DMWARN("asked to rename to an already existing name %s -> %s",
291 old, new); 323 old, new);
324 dm_put(hc->md);
292 up_write(&_hash_lock); 325 up_write(&_hash_lock);
293 kfree(new_name); 326 kfree(new_name);
294 return -EBUSY; 327 return -EBUSY;
@@ -328,6 +361,7 @@ static int dm_hash_rename(const char *old, const char *new)
328 dm_table_put(table); 361 dm_table_put(table);
329 } 362 }
330 363
364 dm_put(hc->md);
331 up_write(&_hash_lock); 365 up_write(&_hash_lock);
332 kfree(old_name); 366 kfree(old_name);
333 return 0; 367 return 0;
@@ -344,7 +378,7 @@ typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
344 378
345static int remove_all(struct dm_ioctl *param, size_t param_size) 379static int remove_all(struct dm_ioctl *param, size_t param_size)
346{ 380{
347 dm_hash_remove_all(); 381 dm_hash_remove_all(1);
348 param->data_size = 0; 382 param->data_size = 0;
349 return 0; 383 return 0;
350} 384}
@@ -524,7 +558,6 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
524{ 558{
525 struct gendisk *disk = dm_disk(md); 559 struct gendisk *disk = dm_disk(md);
526 struct dm_table *table; 560 struct dm_table *table;
527 struct block_device *bdev;
528 561
529 param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG | 562 param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
530 DM_ACTIVE_PRESENT_FLAG); 563 DM_ACTIVE_PRESENT_FLAG);
@@ -534,20 +567,12 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
534 567
535 param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); 568 param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
536 569
537 if (!(param->flags & DM_SKIP_BDGET_FLAG)) { 570 /*
538 bdev = bdget_disk(disk, 0); 571 * Yes, this will be out of date by the time it gets back
539 if (!bdev) 572 * to userland, but it is still very useful for
540 return -ENXIO; 573 * debugging.
541 574 */
542 /* 575 param->open_count = dm_open_count(md);
543 * Yes, this will be out of date by the time it gets back
544 * to userland, but it is still very useful for
545 * debugging.
546 */
547 param->open_count = bdev->bd_openers;
548 bdput(bdev);
549 } else
550 param->open_count = -1;
551 576
552 if (disk->policy) 577 if (disk->policy)
553 param->flags |= DM_READONLY_FLAG; 578 param->flags |= DM_READONLY_FLAG;
@@ -567,7 +592,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
567 592
568static int dev_create(struct dm_ioctl *param, size_t param_size) 593static int dev_create(struct dm_ioctl *param, size_t param_size)
569{ 594{
570 int r; 595 int r, m = DM_ANY_MINOR;
571 struct mapped_device *md; 596 struct mapped_device *md;
572 597
573 r = check_name(param->name); 598 r = check_name(param->name);
@@ -575,10 +600,9 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
575 return r; 600 return r;
576 601
577 if (param->flags & DM_PERSISTENT_DEV_FLAG) 602 if (param->flags & DM_PERSISTENT_DEV_FLAG)
578 r = dm_create_with_minor(MINOR(huge_decode_dev(param->dev)), &md); 603 m = MINOR(huge_decode_dev(param->dev));
579 else
580 r = dm_create(&md);
581 604
605 r = dm_create(m, &md);
582 if (r) 606 if (r)
583 return r; 607 return r;
584 608
@@ -611,10 +635,8 @@ static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
611 return __get_name_cell(param->name); 635 return __get_name_cell(param->name);
612 636
613 md = dm_get_md(huge_decode_dev(param->dev)); 637 md = dm_get_md(huge_decode_dev(param->dev));
614 if (md) { 638 if (md)
615 mdptr = dm_get_mdptr(md); 639 mdptr = dm_get_mdptr(md);
616 dm_put(md);
617 }
618 640
619 return mdptr; 641 return mdptr;
620} 642}
@@ -628,7 +650,6 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
628 hc = __find_device_hash_cell(param); 650 hc = __find_device_hash_cell(param);
629 if (hc) { 651 if (hc) {
630 md = hc->md; 652 md = hc->md;
631 dm_get(md);
632 653
633 /* 654 /*
634 * Sneakily write in both the name and the uuid 655 * Sneakily write in both the name and the uuid
@@ -653,6 +674,8 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
653static int dev_remove(struct dm_ioctl *param, size_t param_size) 674static int dev_remove(struct dm_ioctl *param, size_t param_size)
654{ 675{
655 struct hash_cell *hc; 676 struct hash_cell *hc;
677 struct mapped_device *md;
678 int r;
656 679
657 down_write(&_hash_lock); 680 down_write(&_hash_lock);
658 hc = __find_device_hash_cell(param); 681 hc = __find_device_hash_cell(param);
@@ -663,8 +686,22 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
663 return -ENXIO; 686 return -ENXIO;
664 } 687 }
665 688
689 md = hc->md;
690
691 /*
692 * Ensure the device is not open and nothing further can open it.
693 */
694 r = dm_lock_for_deletion(md);
695 if (r) {
696 DMWARN("unable to remove open device %s", hc->name);
697 up_write(&_hash_lock);
698 dm_put(md);
699 return r;
700 }
701
666 __hash_remove(hc); 702 __hash_remove(hc);
667 up_write(&_hash_lock); 703 up_write(&_hash_lock);
704 dm_put(md);
668 param->data_size = 0; 705 param->data_size = 0;
669 return 0; 706 return 0;
670} 707}
@@ -790,7 +827,6 @@ static int do_resume(struct dm_ioctl *param)
790 } 827 }
791 828
792 md = hc->md; 829 md = hc->md;
793 dm_get(md);
794 830
795 new_map = hc->new_map; 831 new_map = hc->new_map;
796 hc->new_map = NULL; 832 hc->new_map = NULL;
@@ -1078,6 +1114,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
1078{ 1114{
1079 int r; 1115 int r;
1080 struct hash_cell *hc; 1116 struct hash_cell *hc;
1117 struct mapped_device *md;
1081 1118
1082 down_write(&_hash_lock); 1119 down_write(&_hash_lock);
1083 1120
@@ -1096,7 +1133,9 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
1096 param->flags &= ~DM_INACTIVE_PRESENT_FLAG; 1133 param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
1097 1134
1098 r = __dev_status(hc->md, param); 1135 r = __dev_status(hc->md, param);
1136 md = hc->md;
1099 up_write(&_hash_lock); 1137 up_write(&_hash_lock);
1138 dm_put(md);
1100 return r; 1139 return r;
1101} 1140}
1102 1141
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index daf586c0898d..47b3c62bbdb8 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -12,6 +12,8 @@
12#include <linux/bio.h> 12#include <linux/bio.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14 14
15#define DM_MSG_PREFIX "linear"
16
15/* 17/*
16 * Linear: maps a linear range of a device. 18 * Linear: maps a linear range of a device.
17 */ 19 */
@@ -29,7 +31,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
29 unsigned long long tmp; 31 unsigned long long tmp;
30 32
31 if (argc != 2) { 33 if (argc != 2) {
32 ti->error = "dm-linear: Invalid argument count"; 34 ti->error = "Invalid argument count";
33 return -EINVAL; 35 return -EINVAL;
34 } 36 }
35 37
@@ -111,7 +113,7 @@ int __init dm_linear_init(void)
111 int r = dm_register_target(&linear_target); 113 int r = dm_register_target(&linear_target);
112 114
113 if (r < 0) 115 if (r < 0)
114 DMERR("linear: register failed %d", r); 116 DMERR("register failed %d", r);
115 117
116 return r; 118 return r;
117} 119}
@@ -121,5 +123,5 @@ void dm_linear_exit(void)
121 int r = dm_unregister_target(&linear_target); 123 int r = dm_unregister_target(&linear_target);
122 124
123 if (r < 0) 125 if (r < 0)
124 DMERR("linear: unregister failed %d", r); 126 DMERR("unregister failed %d", r);
125} 127}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index d73779a42417..64b764bd02cc 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -12,6 +12,8 @@
12#include "dm-log.h" 12#include "dm-log.h"
13#include "dm-io.h" 13#include "dm-io.h"
14 14
15#define DM_MSG_PREFIX "mirror log"
16
15static LIST_HEAD(_log_types); 17static LIST_HEAD(_log_types);
16static DEFINE_SPINLOCK(_lock); 18static DEFINE_SPINLOCK(_lock);
17 19
@@ -155,8 +157,6 @@ struct log_c {
155 157
156 struct io_region header_location; 158 struct io_region header_location;
157 struct log_header *disk_header; 159 struct log_header *disk_header;
158
159 struct io_region bits_location;
160}; 160};
161 161
162/* 162/*
@@ -241,43 +241,21 @@ static inline int write_header(struct log_c *log)
241} 241}
242 242
243/*---------------------------------------------------------------- 243/*----------------------------------------------------------------
244 * Bits IO
245 *--------------------------------------------------------------*/
246static int read_bits(struct log_c *log)
247{
248 int r;
249 unsigned long ebits;
250
251 r = dm_io_sync_vm(1, &log->bits_location, READ,
252 log->clean_bits, &ebits);
253 if (r)
254 return r;
255
256 return 0;
257}
258
259static int write_bits(struct log_c *log)
260{
261 unsigned long ebits;
262 return dm_io_sync_vm(1, &log->bits_location, WRITE,
263 log->clean_bits, &ebits);
264}
265
266/*----------------------------------------------------------------
267 * core log constructor/destructor 244 * core log constructor/destructor
268 * 245 *
269 * argv contains region_size followed optionally by [no]sync 246 * argv contains region_size followed optionally by [no]sync
270 *--------------------------------------------------------------*/ 247 *--------------------------------------------------------------*/
271#define BYTE_SHIFT 3 248#define BYTE_SHIFT 3
272static int core_ctr(struct dirty_log *log, struct dm_target *ti, 249static int create_log_context(struct dirty_log *log, struct dm_target *ti,
273 unsigned int argc, char **argv) 250 unsigned int argc, char **argv,
251 struct dm_dev *dev)
274{ 252{
275 enum sync sync = DEFAULTSYNC; 253 enum sync sync = DEFAULTSYNC;
276 254
277 struct log_c *lc; 255 struct log_c *lc;
278 uint32_t region_size; 256 uint32_t region_size;
279 unsigned int region_count; 257 unsigned int region_count;
280 size_t bitset_size; 258 size_t bitset_size, buf_size;
281 259
282 if (argc < 1 || argc > 2) { 260 if (argc < 1 || argc > 2) {
283 DMWARN("wrong number of arguments to mirror log"); 261 DMWARN("wrong number of arguments to mirror log");
@@ -319,22 +297,53 @@ static int core_ctr(struct dirty_log *log, struct dm_target *ti,
319 * Work out how many "unsigned long"s we need to hold the bitset. 297 * Work out how many "unsigned long"s we need to hold the bitset.
320 */ 298 */
321 bitset_size = dm_round_up(region_count, 299 bitset_size = dm_round_up(region_count,
322 sizeof(unsigned long) << BYTE_SHIFT); 300 sizeof(*lc->clean_bits) << BYTE_SHIFT);
323 bitset_size >>= BYTE_SHIFT; 301 bitset_size >>= BYTE_SHIFT;
324 302
325 lc->bitset_uint32_count = bitset_size / 4; 303 lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits);
326 lc->clean_bits = vmalloc(bitset_size); 304
327 if (!lc->clean_bits) { 305 /*
328 DMWARN("couldn't allocate clean bitset"); 306 * Disk log?
329 kfree(lc); 307 */
330 return -ENOMEM; 308 if (!dev) {
309 lc->clean_bits = vmalloc(bitset_size);
310 if (!lc->clean_bits) {
311 DMWARN("couldn't allocate clean bitset");
312 kfree(lc);
313 return -ENOMEM;
314 }
315 lc->disk_header = NULL;
316 } else {
317 lc->log_dev = dev;
318 lc->header_location.bdev = lc->log_dev->bdev;
319 lc->header_location.sector = 0;
320
321 /*
322 * Buffer holds both header and bitset.
323 */
324 buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
325 bitset_size, ti->limits.hardsect_size);
326 lc->header_location.count = buf_size >> SECTOR_SHIFT;
327
328 lc->disk_header = vmalloc(buf_size);
329 if (!lc->disk_header) {
330 DMWARN("couldn't allocate disk log buffer");
331 kfree(lc);
332 return -ENOMEM;
333 }
334
335 lc->clean_bits = (void *)lc->disk_header +
336 (LOG_OFFSET << SECTOR_SHIFT);
331 } 337 }
338
332 memset(lc->clean_bits, -1, bitset_size); 339 memset(lc->clean_bits, -1, bitset_size);
333 340
334 lc->sync_bits = vmalloc(bitset_size); 341 lc->sync_bits = vmalloc(bitset_size);
335 if (!lc->sync_bits) { 342 if (!lc->sync_bits) {
336 DMWARN("couldn't allocate sync bitset"); 343 DMWARN("couldn't allocate sync bitset");
337 vfree(lc->clean_bits); 344 if (!dev)
345 vfree(lc->clean_bits);
346 vfree(lc->disk_header);
338 kfree(lc); 347 kfree(lc);
339 return -ENOMEM; 348 return -ENOMEM;
340 } 349 }
@@ -345,25 +354,40 @@ static int core_ctr(struct dirty_log *log, struct dm_target *ti,
345 if (!lc->recovering_bits) { 354 if (!lc->recovering_bits) {
346 DMWARN("couldn't allocate sync bitset"); 355 DMWARN("couldn't allocate sync bitset");
347 vfree(lc->sync_bits); 356 vfree(lc->sync_bits);
348 vfree(lc->clean_bits); 357 if (!dev)
358 vfree(lc->clean_bits);
359 vfree(lc->disk_header);
349 kfree(lc); 360 kfree(lc);
350 return -ENOMEM; 361 return -ENOMEM;
351 } 362 }
352 memset(lc->recovering_bits, 0, bitset_size); 363 memset(lc->recovering_bits, 0, bitset_size);
353 lc->sync_search = 0; 364 lc->sync_search = 0;
354 log->context = lc; 365 log->context = lc;
366
355 return 0; 367 return 0;
356} 368}
357 369
358static void core_dtr(struct dirty_log *log) 370static int core_ctr(struct dirty_log *log, struct dm_target *ti,
371 unsigned int argc, char **argv)
372{
373 return create_log_context(log, ti, argc, argv, NULL);
374}
375
376static void destroy_log_context(struct log_c *lc)
359{ 377{
360 struct log_c *lc = (struct log_c *) log->context;
361 vfree(lc->clean_bits);
362 vfree(lc->sync_bits); 378 vfree(lc->sync_bits);
363 vfree(lc->recovering_bits); 379 vfree(lc->recovering_bits);
364 kfree(lc); 380 kfree(lc);
365} 381}
366 382
383static void core_dtr(struct dirty_log *log)
384{
385 struct log_c *lc = (struct log_c *) log->context;
386
387 vfree(lc->clean_bits);
388 destroy_log_context(lc);
389}
390
367/*---------------------------------------------------------------- 391/*----------------------------------------------------------------
368 * disk log constructor/destructor 392 * disk log constructor/destructor
369 * 393 *
@@ -373,8 +397,6 @@ static int disk_ctr(struct dirty_log *log, struct dm_target *ti,
373 unsigned int argc, char **argv) 397 unsigned int argc, char **argv)
374{ 398{
375 int r; 399 int r;
376 size_t size;
377 struct log_c *lc;
378 struct dm_dev *dev; 400 struct dm_dev *dev;
379 401
380 if (argc < 2 || argc > 3) { 402 if (argc < 2 || argc > 3) {
@@ -387,49 +409,22 @@ static int disk_ctr(struct dirty_log *log, struct dm_target *ti,
387 if (r) 409 if (r)
388 return r; 410 return r;
389 411
390 r = core_ctr(log, ti, argc - 1, argv + 1); 412 r = create_log_context(log, ti, argc - 1, argv + 1, dev);
391 if (r) { 413 if (r) {
392 dm_put_device(ti, dev); 414 dm_put_device(ti, dev);
393 return r; 415 return r;
394 } 416 }
395 417
396 lc = (struct log_c *) log->context;
397 lc->log_dev = dev;
398
399 /* setup the disk header fields */
400 lc->header_location.bdev = lc->log_dev->bdev;
401 lc->header_location.sector = 0;
402 lc->header_location.count = 1;
403
404 /*
405 * We can't read less than this amount, even though we'll
406 * not be using most of this space.
407 */
408 lc->disk_header = vmalloc(1 << SECTOR_SHIFT);
409 if (!lc->disk_header)
410 goto bad;
411
412 /* setup the disk bitset fields */
413 lc->bits_location.bdev = lc->log_dev->bdev;
414 lc->bits_location.sector = LOG_OFFSET;
415
416 size = dm_round_up(lc->bitset_uint32_count * sizeof(uint32_t),
417 1 << SECTOR_SHIFT);
418 lc->bits_location.count = size >> SECTOR_SHIFT;
419 return 0; 418 return 0;
420
421 bad:
422 dm_put_device(ti, lc->log_dev);
423 core_dtr(log);
424 return -ENOMEM;
425} 419}
426 420
427static void disk_dtr(struct dirty_log *log) 421static void disk_dtr(struct dirty_log *log)
428{ 422{
429 struct log_c *lc = (struct log_c *) log->context; 423 struct log_c *lc = (struct log_c *) log->context;
424
430 dm_put_device(lc->ti, lc->log_dev); 425 dm_put_device(lc->ti, lc->log_dev);
431 vfree(lc->disk_header); 426 vfree(lc->disk_header);
432 core_dtr(log); 427 destroy_log_context(lc);
433} 428}
434 429
435static int count_bits32(uint32_t *addr, unsigned size) 430static int count_bits32(uint32_t *addr, unsigned size)
@@ -454,12 +449,7 @@ static int disk_resume(struct dirty_log *log)
454 if (r) 449 if (r)
455 return r; 450 return r;
456 451
457 /* read the bits */ 452 /* set or clear any new bits -- device has grown */
458 r = read_bits(lc);
459 if (r)
460 return r;
461
462 /* set or clear any new bits */
463 if (lc->sync == NOSYNC) 453 if (lc->sync == NOSYNC)
464 for (i = lc->header.nr_regions; i < lc->region_count; i++) 454 for (i = lc->header.nr_regions; i < lc->region_count; i++)
465 /* FIXME: amazingly inefficient */ 455 /* FIXME: amazingly inefficient */
@@ -469,15 +459,14 @@ static int disk_resume(struct dirty_log *log)
469 /* FIXME: amazingly inefficient */ 459 /* FIXME: amazingly inefficient */
470 log_clear_bit(lc, lc->clean_bits, i); 460 log_clear_bit(lc, lc->clean_bits, i);
471 461
462 /* clear any old bits -- device has shrunk */
463 for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
464 log_clear_bit(lc, lc->clean_bits, i);
465
472 /* copy clean across to sync */ 466 /* copy clean across to sync */
473 memcpy(lc->sync_bits, lc->clean_bits, size); 467 memcpy(lc->sync_bits, lc->clean_bits, size);
474 lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); 468 lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count);
475 469
476 /* write the bits */
477 r = write_bits(lc);
478 if (r)
479 return r;
480
481 /* set the correct number of regions in the header */ 470 /* set the correct number of regions in the header */
482 lc->header.nr_regions = lc->region_count; 471 lc->header.nr_regions = lc->region_count;
483 472
@@ -518,7 +507,7 @@ static int disk_flush(struct dirty_log *log)
518 if (!lc->touched) 507 if (!lc->touched)
519 return 0; 508 return 0;
520 509
521 r = write_bits(lc); 510 r = write_header(lc);
522 if (!r) 511 if (!r)
523 lc->touched = 0; 512 lc->touched = 0;
524 513
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 1816f30678ed..217615b33223 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -21,6 +21,7 @@
21#include <linux/workqueue.h> 21#include <linux/workqueue.h>
22#include <asm/atomic.h> 22#include <asm/atomic.h>
23 23
24#define DM_MSG_PREFIX "multipath"
24#define MESG_STR(x) x, sizeof(x) 25#define MESG_STR(x) x, sizeof(x)
25 26
26/* Path properties */ 27/* Path properties */
@@ -446,8 +447,6 @@ struct param {
446 char *error; 447 char *error;
447}; 448};
448 449
449#define ESTR(s) ("dm-multipath: " s)
450
451static int read_param(struct param *param, char *str, unsigned *v, char **error) 450static int read_param(struct param *param, char *str, unsigned *v, char **error)
452{ 451{
453 if (!str || 452 if (!str ||
@@ -495,12 +494,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
495 unsigned ps_argc; 494 unsigned ps_argc;
496 495
497 static struct param _params[] = { 496 static struct param _params[] = {
498 {0, 1024, ESTR("invalid number of path selector args")}, 497 {0, 1024, "invalid number of path selector args"},
499 }; 498 };
500 499
501 pst = dm_get_path_selector(shift(as)); 500 pst = dm_get_path_selector(shift(as));
502 if (!pst) { 501 if (!pst) {
503 ti->error = ESTR("unknown path selector type"); 502 ti->error = "unknown path selector type";
504 return -EINVAL; 503 return -EINVAL;
505 } 504 }
506 505
@@ -511,7 +510,7 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
511 r = pst->create(&pg->ps, ps_argc, as->argv); 510 r = pst->create(&pg->ps, ps_argc, as->argv);
512 if (r) { 511 if (r) {
513 dm_put_path_selector(pst); 512 dm_put_path_selector(pst);
514 ti->error = ESTR("path selector constructor failed"); 513 ti->error = "path selector constructor failed";
515 return r; 514 return r;
516 } 515 }
517 516
@@ -529,7 +528,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
529 528
530 /* we need at least a path arg */ 529 /* we need at least a path arg */
531 if (as->argc < 1) { 530 if (as->argc < 1) {
532 ti->error = ESTR("no device given"); 531 ti->error = "no device given";
533 return NULL; 532 return NULL;
534 } 533 }
535 534
@@ -540,7 +539,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
540 r = dm_get_device(ti, shift(as), ti->begin, ti->len, 539 r = dm_get_device(ti, shift(as), ti->begin, ti->len,
541 dm_table_get_mode(ti->table), &p->path.dev); 540 dm_table_get_mode(ti->table), &p->path.dev);
542 if (r) { 541 if (r) {
543 ti->error = ESTR("error getting device"); 542 ti->error = "error getting device";
544 goto bad; 543 goto bad;
545 } 544 }
546 545
@@ -562,8 +561,8 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
562 struct dm_target *ti) 561 struct dm_target *ti)
563{ 562{
564 static struct param _params[] = { 563 static struct param _params[] = {
565 {1, 1024, ESTR("invalid number of paths")}, 564 {1, 1024, "invalid number of paths"},
566 {0, 1024, ESTR("invalid number of selector args")} 565 {0, 1024, "invalid number of selector args"}
567 }; 566 };
568 567
569 int r; 568 int r;
@@ -572,13 +571,13 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
572 571
573 if (as->argc < 2) { 572 if (as->argc < 2) {
574 as->argc = 0; 573 as->argc = 0;
575 ti->error = ESTR("not enough priority group aruments"); 574 ti->error = "not enough priority group aruments";
576 return NULL; 575 return NULL;
577 } 576 }
578 577
579 pg = alloc_priority_group(); 578 pg = alloc_priority_group();
580 if (!pg) { 579 if (!pg) {
581 ti->error = ESTR("couldn't allocate priority group"); 580 ti->error = "couldn't allocate priority group";
582 return NULL; 581 return NULL;
583 } 582 }
584 pg->m = m; 583 pg->m = m;
@@ -633,7 +632,7 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m,
633 unsigned hw_argc; 632 unsigned hw_argc;
634 633
635 static struct param _params[] = { 634 static struct param _params[] = {
636 {0, 1024, ESTR("invalid number of hardware handler args")}, 635 {0, 1024, "invalid number of hardware handler args"},
637 }; 636 };
638 637
639 r = read_param(_params, shift(as), &hw_argc, &ti->error); 638 r = read_param(_params, shift(as), &hw_argc, &ti->error);
@@ -645,14 +644,14 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m,
645 644
646 hwht = dm_get_hw_handler(shift(as)); 645 hwht = dm_get_hw_handler(shift(as));
647 if (!hwht) { 646 if (!hwht) {
648 ti->error = ESTR("unknown hardware handler type"); 647 ti->error = "unknown hardware handler type";
649 return -EINVAL; 648 return -EINVAL;
650 } 649 }
651 650
652 r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv); 651 r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv);
653 if (r) { 652 if (r) {
654 dm_put_hw_handler(hwht); 653 dm_put_hw_handler(hwht);
655 ti->error = ESTR("hardware handler constructor failed"); 654 ti->error = "hardware handler constructor failed";
656 return r; 655 return r;
657 } 656 }
658 657
@@ -669,7 +668,7 @@ static int parse_features(struct arg_set *as, struct multipath *m,
669 unsigned argc; 668 unsigned argc;
670 669
671 static struct param _params[] = { 670 static struct param _params[] = {
672 {0, 1, ESTR("invalid number of feature args")}, 671 {0, 1, "invalid number of feature args"},
673 }; 672 };
674 673
675 r = read_param(_params, shift(as), &argc, &ti->error); 674 r = read_param(_params, shift(as), &argc, &ti->error);
@@ -692,8 +691,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
692{ 691{
693 /* target parameters */ 692 /* target parameters */
694 static struct param _params[] = { 693 static struct param _params[] = {
695 {1, 1024, ESTR("invalid number of priority groups")}, 694 {1, 1024, "invalid number of priority groups"},
696 {1, 1024, ESTR("invalid initial priority group number")}, 695 {1, 1024, "invalid initial priority group number"},
697 }; 696 };
698 697
699 int r; 698 int r;
@@ -707,7 +706,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
707 706
708 m = alloc_multipath(); 707 m = alloc_multipath();
709 if (!m) { 708 if (!m) {
710 ti->error = ESTR("can't allocate multipath"); 709 ti->error = "can't allocate multipath";
711 return -EINVAL; 710 return -EINVAL;
712 } 711 }
713 712
@@ -746,7 +745,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
746 } 745 }
747 746
748 if (pg_count != m->nr_priority_groups) { 747 if (pg_count != m->nr_priority_groups) {
749 ti->error = ESTR("priority group count mismatch"); 748 ti->error = "priority group count mismatch";
750 r = -EINVAL; 749 r = -EINVAL;
751 goto bad; 750 goto bad;
752 } 751 }
@@ -807,7 +806,7 @@ static int fail_path(struct pgpath *pgpath)
807 if (!pgpath->path.is_active) 806 if (!pgpath->path.is_active)
808 goto out; 807 goto out;
809 808
810 DMWARN("dm-multipath: Failing path %s.", pgpath->path.dev->name); 809 DMWARN("Failing path %s.", pgpath->path.dev->name);
811 810
812 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 811 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
813 pgpath->path.is_active = 0; 812 pgpath->path.is_active = 0;
@@ -1250,7 +1249,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1250 r = dm_get_device(ti, argv[1], ti->begin, ti->len, 1249 r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1251 dm_table_get_mode(ti->table), &dev); 1250 dm_table_get_mode(ti->table), &dev);
1252 if (r) { 1251 if (r) {
1253 DMWARN("dm-multipath message: error getting device %s", 1252 DMWARN("message: error getting device %s",
1254 argv[1]); 1253 argv[1]);
1255 return -EINVAL; 1254 return -EINVAL;
1256 } 1255 }
@@ -1309,7 +1308,7 @@ static int __init dm_multipath_init(void)
1309 return -ENOMEM; 1308 return -ENOMEM;
1310 } 1309 }
1311 1310
1312 DMINFO("dm-multipath version %u.%u.%u loaded", 1311 DMINFO("version %u.%u.%u loaded",
1313 multipath_target.version[0], multipath_target.version[1], 1312 multipath_target.version[0], multipath_target.version[1],
1314 multipath_target.version[2]); 1313 multipath_target.version[2]);
1315 1314
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d12cf3e5e076..be48cedf986b 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -20,6 +20,8 @@
20#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
21#include <linux/workqueue.h> 21#include <linux/workqueue.h>
22 22
23#define DM_MSG_PREFIX "raid1"
24
23static struct workqueue_struct *_kmirrord_wq; 25static struct workqueue_struct *_kmirrord_wq;
24static struct work_struct _kmirrord_work; 26static struct work_struct _kmirrord_work;
25 27
@@ -106,12 +108,42 @@ struct region {
106 struct bio_list delayed_bios; 108 struct bio_list delayed_bios;
107}; 109};
108 110
111
112/*-----------------------------------------------------------------
113 * Mirror set structures.
114 *---------------------------------------------------------------*/
115struct mirror {
116 atomic_t error_count;
117 struct dm_dev *dev;
118 sector_t offset;
119};
120
121struct mirror_set {
122 struct dm_target *ti;
123 struct list_head list;
124 struct region_hash rh;
125 struct kcopyd_client *kcopyd_client;
126
127 spinlock_t lock; /* protects the next two lists */
128 struct bio_list reads;
129 struct bio_list writes;
130
131 /* recovery */
132 region_t nr_regions;
133 int in_sync;
134
135 struct mirror *default_mirror; /* Default mirror */
136
137 unsigned int nr_mirrors;
138 struct mirror mirror[0];
139};
140
109/* 141/*
110 * Conversion fns 142 * Conversion fns
111 */ 143 */
112static inline region_t bio_to_region(struct region_hash *rh, struct bio *bio) 144static inline region_t bio_to_region(struct region_hash *rh, struct bio *bio)
113{ 145{
114 return bio->bi_sector >> rh->region_shift; 146 return (bio->bi_sector - rh->ms->ti->begin) >> rh->region_shift;
115} 147}
116 148
117static inline sector_t region_to_sector(struct region_hash *rh, region_t region) 149static inline sector_t region_to_sector(struct region_hash *rh, region_t region)
@@ -458,11 +490,9 @@ static int __rh_recovery_prepare(struct region_hash *rh)
458 /* Already quiesced ? */ 490 /* Already quiesced ? */
459 if (atomic_read(&reg->pending)) 491 if (atomic_read(&reg->pending))
460 list_del_init(&reg->list); 492 list_del_init(&reg->list);
493 else
494 list_move(&reg->list, &rh->quiesced_regions);
461 495
462 else {
463 list_del_init(&reg->list);
464 list_add(&reg->list, &rh->quiesced_regions);
465 }
466 spin_unlock_irq(&rh->region_lock); 496 spin_unlock_irq(&rh->region_lock);
467 497
468 return 1; 498 return 1;
@@ -541,35 +571,6 @@ static void rh_start_recovery(struct region_hash *rh)
541 wake(); 571 wake();
542} 572}
543 573
544/*-----------------------------------------------------------------
545 * Mirror set structures.
546 *---------------------------------------------------------------*/
547struct mirror {
548 atomic_t error_count;
549 struct dm_dev *dev;
550 sector_t offset;
551};
552
553struct mirror_set {
554 struct dm_target *ti;
555 struct list_head list;
556 struct region_hash rh;
557 struct kcopyd_client *kcopyd_client;
558
559 spinlock_t lock; /* protects the next two lists */
560 struct bio_list reads;
561 struct bio_list writes;
562
563 /* recovery */
564 region_t nr_regions;
565 int in_sync;
566
567 struct mirror *default_mirror; /* Default mirror */
568
569 unsigned int nr_mirrors;
570 struct mirror mirror[0];
571};
572
573/* 574/*
574 * Every mirror should look like this one. 575 * Every mirror should look like this one.
575 */ 576 */
@@ -603,7 +604,7 @@ static void recovery_complete(int read_err, unsigned int write_err,
603 struct region *reg = (struct region *) context; 604 struct region *reg = (struct region *) context;
604 605
605 /* FIXME: better error handling */ 606 /* FIXME: better error handling */
606 rh_recovery_end(reg, read_err || write_err); 607 rh_recovery_end(reg, !(read_err || write_err));
607} 608}
608 609
609static int recover(struct mirror_set *ms, struct region *reg) 610static int recover(struct mirror_set *ms, struct region *reg)
@@ -893,7 +894,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
893 894
894 ms = kmalloc(len, GFP_KERNEL); 895 ms = kmalloc(len, GFP_KERNEL);
895 if (!ms) { 896 if (!ms) {
896 ti->error = "dm-mirror: Cannot allocate mirror context"; 897 ti->error = "Cannot allocate mirror context";
897 return NULL; 898 return NULL;
898 } 899 }
899 900
@@ -907,7 +908,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
907 ms->default_mirror = &ms->mirror[DEFAULT_MIRROR]; 908 ms->default_mirror = &ms->mirror[DEFAULT_MIRROR];
908 909
909 if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { 910 if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
910 ti->error = "dm-mirror: Error creating dirty region hash"; 911 ti->error = "Error creating dirty region hash";
911 kfree(ms); 912 kfree(ms);
912 return NULL; 913 return NULL;
913 } 914 }
@@ -937,14 +938,14 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
937 unsigned long long offset; 938 unsigned long long offset;
938 939
939 if (sscanf(argv[1], "%llu", &offset) != 1) { 940 if (sscanf(argv[1], "%llu", &offset) != 1) {
940 ti->error = "dm-mirror: Invalid offset"; 941 ti->error = "Invalid offset";
941 return -EINVAL; 942 return -EINVAL;
942 } 943 }
943 944
944 if (dm_get_device(ti, argv[0], offset, ti->len, 945 if (dm_get_device(ti, argv[0], offset, ti->len,
945 dm_table_get_mode(ti->table), 946 dm_table_get_mode(ti->table),
946 &ms->mirror[mirror].dev)) { 947 &ms->mirror[mirror].dev)) {
947 ti->error = "dm-mirror: Device lookup failure"; 948 ti->error = "Device lookup failure";
948 return -ENXIO; 949 return -ENXIO;
949 } 950 }
950 951
@@ -981,30 +982,30 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti,
981 struct dirty_log *dl; 982 struct dirty_log *dl;
982 983
983 if (argc < 2) { 984 if (argc < 2) {
984 ti->error = "dm-mirror: Insufficient mirror log arguments"; 985 ti->error = "Insufficient mirror log arguments";
985 return NULL; 986 return NULL;
986 } 987 }
987 988
988 if (sscanf(argv[1], "%u", &param_count) != 1) { 989 if (sscanf(argv[1], "%u", &param_count) != 1) {
989 ti->error = "dm-mirror: Invalid mirror log argument count"; 990 ti->error = "Invalid mirror log argument count";
990 return NULL; 991 return NULL;
991 } 992 }
992 993
993 *args_used = 2 + param_count; 994 *args_used = 2 + param_count;
994 995
995 if (argc < *args_used) { 996 if (argc < *args_used) {
996 ti->error = "dm-mirror: Insufficient mirror log arguments"; 997 ti->error = "Insufficient mirror log arguments";
997 return NULL; 998 return NULL;
998 } 999 }
999 1000
1000 dl = dm_create_dirty_log(argv[0], ti, param_count, argv + 2); 1001 dl = dm_create_dirty_log(argv[0], ti, param_count, argv + 2);
1001 if (!dl) { 1002 if (!dl) {
1002 ti->error = "dm-mirror: Error creating mirror dirty log"; 1003 ti->error = "Error creating mirror dirty log";
1003 return NULL; 1004 return NULL;
1004 } 1005 }
1005 1006
1006 if (!_check_region_size(ti, dl->type->get_region_size(dl))) { 1007 if (!_check_region_size(ti, dl->type->get_region_size(dl))) {
1007 ti->error = "dm-mirror: Invalid region size"; 1008 ti->error = "Invalid region size";
1008 dm_destroy_dirty_log(dl); 1009 dm_destroy_dirty_log(dl);
1009 return NULL; 1010 return NULL;
1010 } 1011 }
@@ -1038,7 +1039,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1038 1039
1039 if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 || 1040 if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 ||
1040 nr_mirrors < 2 || nr_mirrors > KCOPYD_MAX_REGIONS + 1) { 1041 nr_mirrors < 2 || nr_mirrors > KCOPYD_MAX_REGIONS + 1) {
1041 ti->error = "dm-mirror: Invalid number of mirrors"; 1042 ti->error = "Invalid number of mirrors";
1042 dm_destroy_dirty_log(dl); 1043 dm_destroy_dirty_log(dl);
1043 return -EINVAL; 1044 return -EINVAL;
1044 } 1045 }
@@ -1046,7 +1047,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1046 argv++, argc--; 1047 argv++, argc--;
1047 1048
1048 if (argc != nr_mirrors * 2) { 1049 if (argc != nr_mirrors * 2) {
1049 ti->error = "dm-mirror: Wrong number of mirror arguments"; 1050 ti->error = "Wrong number of mirror arguments";
1050 dm_destroy_dirty_log(dl); 1051 dm_destroy_dirty_log(dl);
1051 return -EINVAL; 1052 return -EINVAL;
1052 } 1053 }
@@ -1115,7 +1116,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
1115 struct mirror *m; 1116 struct mirror *m;
1116 struct mirror_set *ms = ti->private; 1117 struct mirror_set *ms = ti->private;
1117 1118
1118 map_context->ll = bio->bi_sector >> ms->rh.region_shift; 1119 map_context->ll = bio_to_region(&ms->rh, bio);
1119 1120
1120 if (rw == WRITE) { 1121 if (rw == WRITE) {
1121 queue_bio(ms, bio, rw); 1122 queue_bio(ms, bio, rw);
@@ -1221,7 +1222,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
1221 1222
1222static struct target_type mirror_target = { 1223static struct target_type mirror_target = {
1223 .name = "mirror", 1224 .name = "mirror",
1224 .version = {1, 0, 1}, 1225 .version = {1, 0, 2},
1225 .module = THIS_MODULE, 1226 .module = THIS_MODULE,
1226 .ctr = mirror_ctr, 1227 .ctr = mirror_ctr,
1227 .dtr = mirror_dtr, 1228 .dtr = mirror_dtr,
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c
index d0024865a789..c5a16c550122 100644
--- a/drivers/md/dm-round-robin.c
+++ b/drivers/md/dm-round-robin.c
@@ -14,6 +14,8 @@
14 14
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17#define DM_MSG_PREFIX "multipath round-robin"
18
17/*----------------------------------------------------------------- 19/*-----------------------------------------------------------------
18 * Path-handling code, paths are held in lists 20 * Path-handling code, paths are held in lists
19 *---------------------------------------------------------------*/ 21 *---------------------------------------------------------------*/
@@ -191,9 +193,9 @@ static int __init dm_rr_init(void)
191 int r = dm_register_path_selector(&rr_ps); 193 int r = dm_register_path_selector(&rr_ps);
192 194
193 if (r < 0) 195 if (r < 0)
194 DMERR("round-robin: register failed %d", r); 196 DMERR("register failed %d", r);
195 197
196 DMINFO("dm-round-robin version 1.0.0 loaded"); 198 DMINFO("version 1.0.0 loaded");
197 199
198 return r; 200 return r;
199} 201}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 08312b46463a..8eea0ddbf5ec 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -23,6 +23,8 @@
23#include "dm-bio-list.h" 23#include "dm-bio-list.h"
24#include "kcopyd.h" 24#include "kcopyd.h"
25 25
26#define DM_MSG_PREFIX "snapshots"
27
26/* 28/*
27 * The percentage increment we will wake up users at 29 * The percentage increment we will wake up users at
28 */ 30 */
@@ -117,7 +119,7 @@ static int init_origin_hash(void)
117 _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), 119 _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
118 GFP_KERNEL); 120 GFP_KERNEL);
119 if (!_origins) { 121 if (!_origins) {
120 DMERR("Device mapper: Snapshot: unable to allocate memory"); 122 DMERR("unable to allocate memory");
121 return -ENOMEM; 123 return -ENOMEM;
122 } 124 }
123 125
@@ -412,7 +414,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
412 int blocksize; 414 int blocksize;
413 415
414 if (argc < 4) { 416 if (argc < 4) {
415 ti->error = "dm-snapshot: requires exactly 4 arguments"; 417 ti->error = "requires exactly 4 arguments";
416 r = -EINVAL; 418 r = -EINVAL;
417 goto bad1; 419 goto bad1;
418 } 420 }
@@ -530,7 +532,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
530 } 532 }
531 533
532 ti->private = s; 534 ti->private = s;
533 ti->split_io = chunk_size; 535 ti->split_io = s->chunk_size;
534 536
535 return 0; 537 return 0;
536 538
@@ -1127,7 +1129,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1127 struct dm_dev *dev; 1129 struct dm_dev *dev;
1128 1130
1129 if (argc != 1) { 1131 if (argc != 1) {
1130 ti->error = "dm-origin: incorrect number of arguments"; 1132 ti->error = "origin: incorrect number of arguments";
1131 return -EINVAL; 1133 return -EINVAL;
1132 } 1134 }
1133 1135
@@ -1204,7 +1206,7 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result,
1204 1206
1205static struct target_type origin_target = { 1207static struct target_type origin_target = {
1206 .name = "snapshot-origin", 1208 .name = "snapshot-origin",
1207 .version = {1, 1, 0}, 1209 .version = {1, 4, 0},
1208 .module = THIS_MODULE, 1210 .module = THIS_MODULE,
1209 .ctr = origin_ctr, 1211 .ctr = origin_ctr,
1210 .dtr = origin_dtr, 1212 .dtr = origin_dtr,
@@ -1215,7 +1217,7 @@ static struct target_type origin_target = {
1215 1217
1216static struct target_type snapshot_target = { 1218static struct target_type snapshot_target = {
1217 .name = "snapshot", 1219 .name = "snapshot",
1218 .version = {1, 1, 0}, 1220 .version = {1, 4, 0},
1219 .module = THIS_MODULE, 1221 .module = THIS_MODULE,
1220 .ctr = snapshot_ctr, 1222 .ctr = snapshot_ctr,
1221 .dtr = snapshot_dtr, 1223 .dtr = snapshot_dtr,
@@ -1236,7 +1238,7 @@ static int __init dm_snapshot_init(void)
1236 1238
1237 r = dm_register_target(&origin_target); 1239 r = dm_register_target(&origin_target);
1238 if (r < 0) { 1240 if (r < 0) {
1239 DMERR("Device mapper: Origin: register failed %d\n", r); 1241 DMERR("Origin target register failed %d", r);
1240 goto bad1; 1242 goto bad1;
1241 } 1243 }
1242 1244
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 08328a8f5a3c..6c29fcecd892 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -12,6 +12,8 @@
12#include <linux/bio.h> 12#include <linux/bio.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14 14
15#define DM_MSG_PREFIX "striped"
16
15struct stripe { 17struct stripe {
16 struct dm_dev *dev; 18 struct dm_dev *dev;
17 sector_t physical_start; 19 sector_t physical_start;
@@ -78,19 +80,19 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
78 unsigned int i; 80 unsigned int i;
79 81
80 if (argc < 2) { 82 if (argc < 2) {
81 ti->error = "dm-stripe: Not enough arguments"; 83 ti->error = "Not enough arguments";
82 return -EINVAL; 84 return -EINVAL;
83 } 85 }
84 86
85 stripes = simple_strtoul(argv[0], &end, 10); 87 stripes = simple_strtoul(argv[0], &end, 10);
86 if (*end) { 88 if (*end) {
87 ti->error = "dm-stripe: Invalid stripe count"; 89 ti->error = "Invalid stripe count";
88 return -EINVAL; 90 return -EINVAL;
89 } 91 }
90 92
91 chunk_size = simple_strtoul(argv[1], &end, 10); 93 chunk_size = simple_strtoul(argv[1], &end, 10);
92 if (*end) { 94 if (*end) {
93 ti->error = "dm-stripe: Invalid chunk_size"; 95 ti->error = "Invalid chunk_size";
94 return -EINVAL; 96 return -EINVAL;
95 } 97 }
96 98
@@ -99,19 +101,19 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
99 */ 101 */
100 if (!chunk_size || (chunk_size & (chunk_size - 1)) || 102 if (!chunk_size || (chunk_size & (chunk_size - 1)) ||
101 (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { 103 (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
102 ti->error = "dm-stripe: Invalid chunk size"; 104 ti->error = "Invalid chunk size";
103 return -EINVAL; 105 return -EINVAL;
104 } 106 }
105 107
106 if (ti->len & (chunk_size - 1)) { 108 if (ti->len & (chunk_size - 1)) {
107 ti->error = "dm-stripe: Target length not divisible by " 109 ti->error = "Target length not divisible by "
108 "chunk size"; 110 "chunk size";
109 return -EINVAL; 111 return -EINVAL;
110 } 112 }
111 113
112 width = ti->len; 114 width = ti->len;
113 if (sector_div(width, stripes)) { 115 if (sector_div(width, stripes)) {
114 ti->error = "dm-stripe: Target length not divisible by " 116 ti->error = "Target length not divisible by "
115 "number of stripes"; 117 "number of stripes";
116 return -EINVAL; 118 return -EINVAL;
117 } 119 }
@@ -120,14 +122,14 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
120 * Do we have enough arguments for that many stripes ? 122 * Do we have enough arguments for that many stripes ?
121 */ 123 */
122 if (argc != (2 + 2 * stripes)) { 124 if (argc != (2 + 2 * stripes)) {
123 ti->error = "dm-stripe: Not enough destinations " 125 ti->error = "Not enough destinations "
124 "specified"; 126 "specified";
125 return -EINVAL; 127 return -EINVAL;
126 } 128 }
127 129
128 sc = alloc_context(stripes); 130 sc = alloc_context(stripes);
129 if (!sc) { 131 if (!sc) {
130 ti->error = "dm-stripe: Memory allocation for striped context " 132 ti->error = "Memory allocation for striped context "
131 "failed"; 133 "failed";
132 return -ENOMEM; 134 return -ENOMEM;
133 } 135 }
@@ -149,8 +151,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
149 151
150 r = get_stripe(ti, sc, i, argv); 152 r = get_stripe(ti, sc, i, argv);
151 if (r < 0) { 153 if (r < 0) {
152 ti->error = "dm-stripe: Couldn't parse stripe " 154 ti->error = "Couldn't parse stripe destination";
153 "destination";
154 while (i--) 155 while (i--)
155 dm_put_device(ti, sc->stripe[i].dev); 156 dm_put_device(ti, sc->stripe[i].dev);
156 kfree(sc); 157 kfree(sc);
@@ -227,7 +228,7 @@ int __init dm_stripe_init(void)
227 228
228 r = dm_register_target(&stripe_target); 229 r = dm_register_target(&stripe_target);
229 if (r < 0) 230 if (r < 0)
230 DMWARN("striped target registration failed"); 231 DMWARN("target registration failed");
231 232
232 return r; 233 return r;
233} 234}
@@ -235,7 +236,7 @@ int __init dm_stripe_init(void)
235void dm_stripe_exit(void) 236void dm_stripe_exit(void)
236{ 237{
237 if (dm_unregister_target(&stripe_target)) 238 if (dm_unregister_target(&stripe_target))
238 DMWARN("striped target unregistration failed"); 239 DMWARN("target unregistration failed");
239 240
240 return; 241 return;
241} 242}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 8f56a54cf0ce..75fe9493e6af 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -17,6 +17,8 @@
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <asm/atomic.h> 18#include <asm/atomic.h>
19 19
20#define DM_MSG_PREFIX "table"
21
20#define MAX_DEPTH 16 22#define MAX_DEPTH 16
21#define NODE_SIZE L1_CACHE_BYTES 23#define NODE_SIZE L1_CACHE_BYTES
22#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) 24#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
@@ -237,6 +239,44 @@ int dm_table_create(struct dm_table **result, int mode,
237 return 0; 239 return 0;
238} 240}
239 241
242int dm_create_error_table(struct dm_table **result, struct mapped_device *md)
243{
244 struct dm_table *t;
245 sector_t dev_size = 1;
246 int r;
247
248 /*
249 * Find current size of device.
250 * Default to 1 sector if inactive.
251 */
252 t = dm_get_table(md);
253 if (t) {
254 dev_size = dm_table_get_size(t);
255 dm_table_put(t);
256 }
257
258 r = dm_table_create(&t, FMODE_READ, 1, md);
259 if (r)
260 return r;
261
262 r = dm_table_add_target(t, "error", 0, dev_size, NULL);
263 if (r)
264 goto out;
265
266 r = dm_table_complete(t);
267 if (r)
268 goto out;
269
270 *result = t;
271
272out:
273 if (r)
274 dm_table_put(t);
275
276 return r;
277}
278EXPORT_SYMBOL_GPL(dm_create_error_table);
279
240static void free_devices(struct list_head *devices) 280static void free_devices(struct list_head *devices)
241{ 281{
242 struct list_head *tmp, *next; 282 struct list_head *tmp, *next;
@@ -590,6 +630,12 @@ int dm_split_args(int *argc, char ***argvp, char *input)
590 unsigned array_size = 0; 630 unsigned array_size = 0;
591 631
592 *argc = 0; 632 *argc = 0;
633
634 if (!input) {
635 *argvp = NULL;
636 return 0;
637 }
638
593 argv = realloc_argv(&array_size, argv); 639 argv = realloc_argv(&array_size, argv);
594 if (!argv) 640 if (!argv)
595 return -ENOMEM; 641 return -ENOMEM;
@@ -671,15 +717,14 @@ int dm_table_add_target(struct dm_table *t, const char *type,
671 memset(tgt, 0, sizeof(*tgt)); 717 memset(tgt, 0, sizeof(*tgt));
672 718
673 if (!len) { 719 if (!len) {
674 tgt->error = "zero-length target"; 720 DMERR("%s: zero-length target", dm_device_name(t->md));
675 DMERR("%s", tgt->error);
676 return -EINVAL; 721 return -EINVAL;
677 } 722 }
678 723
679 tgt->type = dm_get_target_type(type); 724 tgt->type = dm_get_target_type(type);
680 if (!tgt->type) { 725 if (!tgt->type) {
681 tgt->error = "unknown target type"; 726 DMERR("%s: %s: unknown target type", dm_device_name(t->md),
682 DMERR("%s", tgt->error); 727 type);
683 return -EINVAL; 728 return -EINVAL;
684 } 729 }
685 730
@@ -716,7 +761,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
716 return 0; 761 return 0;
717 762
718 bad: 763 bad:
719 DMERR("%s", tgt->error); 764 DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error);
720 dm_put_target_type(tgt->type); 765 dm_put_target_type(tgt->type);
721 return r; 766 return r;
722} 767}
@@ -802,7 +847,7 @@ sector_t dm_table_get_size(struct dm_table *t)
802 847
803struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) 848struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
804{ 849{
805 if (index > t->num_targets) 850 if (index >= t->num_targets)
806 return NULL; 851 return NULL;
807 852
808 return t->targets + index; 853 return t->targets + index;
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 64fd8e79ea4c..477a041a41cf 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -12,6 +12,8 @@
12#include <linux/bio.h> 12#include <linux/bio.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14 14
15#define DM_MSG_PREFIX "target"
16
15struct tt_internal { 17struct tt_internal {
16 struct target_type tt; 18 struct target_type tt;
17 19
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index 51c0639b2487..ea569f7348d2 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -10,13 +10,15 @@
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/bio.h> 11#include <linux/bio.h>
12 12
13#define DM_MSG_PREFIX "zero"
14
13/* 15/*
14 * Construct a dummy mapping that only returns zeros 16 * Construct a dummy mapping that only returns zeros
15 */ 17 */
16static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) 18static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
17{ 19{
18 if (argc != 0) { 20 if (argc != 0) {
19 ti->error = "dm-zero: No arguments required"; 21 ti->error = "No arguments required";
20 return -EINVAL; 22 return -EINVAL;
21 } 23 }
22 24
@@ -60,7 +62,7 @@ static int __init dm_zero_init(void)
60 int r = dm_register_target(&zero_target); 62 int r = dm_register_target(&zero_target);
61 63
62 if (r < 0) 64 if (r < 0)
63 DMERR("zero: register failed %d", r); 65 DMERR("register failed %d", r);
64 66
65 return r; 67 return r;
66} 68}
@@ -70,7 +72,7 @@ static void __exit dm_zero_exit(void)
70 int r = dm_unregister_target(&zero_target); 72 int r = dm_unregister_target(&zero_target);
71 73
72 if (r < 0) 74 if (r < 0)
73 DMERR("zero: unregister failed %d", r); 75 DMERR("unregister failed %d", r);
74} 76}
75 77
76module_init(dm_zero_init) 78module_init(dm_zero_init)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4d710b7a133b..3ed2e53b9eb6 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
3 * Copyright (C) 2004 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
@@ -21,11 +21,14 @@
21#include <linux/hdreg.h> 21#include <linux/hdreg.h>
22#include <linux/blktrace_api.h> 22#include <linux/blktrace_api.h>
23 23
24#define DM_MSG_PREFIX "core"
25
24static const char *_name = DM_NAME; 26static const char *_name = DM_NAME;
25 27
26static unsigned int major = 0; 28static unsigned int major = 0;
27static unsigned int _major = 0; 29static unsigned int _major = 0;
28 30
31static DEFINE_SPINLOCK(_minor_lock);
29/* 32/*
30 * One of these is allocated per bio. 33 * One of these is allocated per bio.
31 */ 34 */
@@ -49,23 +52,28 @@ struct target_io {
49 52
50union map_info *dm_get_mapinfo(struct bio *bio) 53union map_info *dm_get_mapinfo(struct bio *bio)
51{ 54{
52 if (bio && bio->bi_private) 55 if (bio && bio->bi_private)
53 return &((struct target_io *)bio->bi_private)->info; 56 return &((struct target_io *)bio->bi_private)->info;
54 return NULL; 57 return NULL;
55} 58}
56 59
60#define MINOR_ALLOCED ((void *)-1)
61
57/* 62/*
58 * Bits for the md->flags field. 63 * Bits for the md->flags field.
59 */ 64 */
60#define DMF_BLOCK_IO 0 65#define DMF_BLOCK_IO 0
61#define DMF_SUSPENDED 1 66#define DMF_SUSPENDED 1
62#define DMF_FROZEN 2 67#define DMF_FROZEN 2
68#define DMF_FREEING 3
69#define DMF_DELETING 4
63 70
64struct mapped_device { 71struct mapped_device {
65 struct rw_semaphore io_lock; 72 struct rw_semaphore io_lock;
66 struct semaphore suspend_lock; 73 struct semaphore suspend_lock;
67 rwlock_t map_lock; 74 rwlock_t map_lock;
68 atomic_t holders; 75 atomic_t holders;
76 atomic_t open_count;
69 77
70 unsigned long flags; 78 unsigned long flags;
71 79
@@ -218,9 +226,25 @@ static int dm_blk_open(struct inode *inode, struct file *file)
218{ 226{
219 struct mapped_device *md; 227 struct mapped_device *md;
220 228
229 spin_lock(&_minor_lock);
230
221 md = inode->i_bdev->bd_disk->private_data; 231 md = inode->i_bdev->bd_disk->private_data;
232 if (!md)
233 goto out;
234
235 if (test_bit(DMF_FREEING, &md->flags) ||
236 test_bit(DMF_DELETING, &md->flags)) {
237 md = NULL;
238 goto out;
239 }
240
222 dm_get(md); 241 dm_get(md);
223 return 0; 242 atomic_inc(&md->open_count);
243
244out:
245 spin_unlock(&_minor_lock);
246
247 return md ? 0 : -ENXIO;
224} 248}
225 249
226static int dm_blk_close(struct inode *inode, struct file *file) 250static int dm_blk_close(struct inode *inode, struct file *file)
@@ -228,10 +252,35 @@ static int dm_blk_close(struct inode *inode, struct file *file)
228 struct mapped_device *md; 252 struct mapped_device *md;
229 253
230 md = inode->i_bdev->bd_disk->private_data; 254 md = inode->i_bdev->bd_disk->private_data;
255 atomic_dec(&md->open_count);
231 dm_put(md); 256 dm_put(md);
232 return 0; 257 return 0;
233} 258}
234 259
260int dm_open_count(struct mapped_device *md)
261{
262 return atomic_read(&md->open_count);
263}
264
265/*
266 * Guarantees nothing is using the device before it's deleted.
267 */
268int dm_lock_for_deletion(struct mapped_device *md)
269{
270 int r = 0;
271
272 spin_lock(&_minor_lock);
273
274 if (dm_open_count(md))
275 r = -EBUSY;
276 else
277 set_bit(DMF_DELETING, &md->flags);
278
279 spin_unlock(&_minor_lock);
280
281 return r;
282}
283
235static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) 284static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
236{ 285{
237 struct mapped_device *md = bdev->bd_disk->private_data; 286 struct mapped_device *md = bdev->bd_disk->private_data;
@@ -456,8 +505,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
456 if (r > 0) { 505 if (r > 0) {
457 /* the bio has been remapped so dispatch it */ 506 /* the bio has been remapped so dispatch it */
458 507
459 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, 508 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
460 tio->io->bio->bi_bdev->bd_dev, sector, 509 tio->io->bio->bi_bdev->bd_dev, sector,
461 clone->bi_sector); 510 clone->bi_sector);
462 511
463 generic_make_request(clone); 512 generic_make_request(clone);
@@ -744,43 +793,39 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
744/*----------------------------------------------------------------- 793/*-----------------------------------------------------------------
745 * An IDR is used to keep track of allocated minor numbers. 794 * An IDR is used to keep track of allocated minor numbers.
746 *---------------------------------------------------------------*/ 795 *---------------------------------------------------------------*/
747static DEFINE_MUTEX(_minor_lock);
748static DEFINE_IDR(_minor_idr); 796static DEFINE_IDR(_minor_idr);
749 797
750static void free_minor(unsigned int minor) 798static void free_minor(int minor)
751{ 799{
752 mutex_lock(&_minor_lock); 800 spin_lock(&_minor_lock);
753 idr_remove(&_minor_idr, minor); 801 idr_remove(&_minor_idr, minor);
754 mutex_unlock(&_minor_lock); 802 spin_unlock(&_minor_lock);
755} 803}
756 804
757/* 805/*
758 * See if the device with a specific minor # is free. 806 * See if the device with a specific minor # is free.
759 */ 807 */
760static int specific_minor(struct mapped_device *md, unsigned int minor) 808static int specific_minor(struct mapped_device *md, int minor)
761{ 809{
762 int r, m; 810 int r, m;
763 811
764 if (minor >= (1 << MINORBITS)) 812 if (minor >= (1 << MINORBITS))
765 return -EINVAL; 813 return -EINVAL;
766 814
767 mutex_lock(&_minor_lock); 815 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
816 if (!r)
817 return -ENOMEM;
818
819 spin_lock(&_minor_lock);
768 820
769 if (idr_find(&_minor_idr, minor)) { 821 if (idr_find(&_minor_idr, minor)) {
770 r = -EBUSY; 822 r = -EBUSY;
771 goto out; 823 goto out;
772 } 824 }
773 825
774 r = idr_pre_get(&_minor_idr, GFP_KERNEL); 826 r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m);
775 if (!r) { 827 if (r)
776 r = -ENOMEM;
777 goto out;
778 }
779
780 r = idr_get_new_above(&_minor_idr, md, minor, &m);
781 if (r) {
782 goto out; 828 goto out;
783 }
784 829
785 if (m != minor) { 830 if (m != minor) {
786 idr_remove(&_minor_idr, m); 831 idr_remove(&_minor_idr, m);
@@ -789,24 +834,21 @@ static int specific_minor(struct mapped_device *md, unsigned int minor)
789 } 834 }
790 835
791out: 836out:
792 mutex_unlock(&_minor_lock); 837 spin_unlock(&_minor_lock);
793 return r; 838 return r;
794} 839}
795 840
796static int next_free_minor(struct mapped_device *md, unsigned int *minor) 841static int next_free_minor(struct mapped_device *md, int *minor)
797{ 842{
798 int r; 843 int r, m;
799 unsigned int m;
800
801 mutex_lock(&_minor_lock);
802 844
803 r = idr_pre_get(&_minor_idr, GFP_KERNEL); 845 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
804 if (!r) { 846 if (!r)
805 r = -ENOMEM; 847 return -ENOMEM;
806 goto out; 848
807 } 849 spin_lock(&_minor_lock);
808 850
809 r = idr_get_new(&_minor_idr, md, &m); 851 r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
810 if (r) { 852 if (r) {
811 goto out; 853 goto out;
812 } 854 }
@@ -820,7 +862,7 @@ static int next_free_minor(struct mapped_device *md, unsigned int *minor)
820 *minor = m; 862 *minor = m;
821 863
822out: 864out:
823 mutex_unlock(&_minor_lock); 865 spin_unlock(&_minor_lock);
824 return r; 866 return r;
825} 867}
826 868
@@ -829,18 +871,25 @@ static struct block_device_operations dm_blk_dops;
829/* 871/*
830 * Allocate and initialise a blank device with a given minor. 872 * Allocate and initialise a blank device with a given minor.
831 */ 873 */
832static struct mapped_device *alloc_dev(unsigned int minor, int persistent) 874static struct mapped_device *alloc_dev(int minor)
833{ 875{
834 int r; 876 int r;
835 struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); 877 struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
878 void *old_md;
836 879
837 if (!md) { 880 if (!md) {
838 DMWARN("unable to allocate device, out of memory."); 881 DMWARN("unable to allocate device, out of memory.");
839 return NULL; 882 return NULL;
840 } 883 }
841 884
885 if (!try_module_get(THIS_MODULE))
886 goto bad0;
887
842 /* get a minor number for the dev */ 888 /* get a minor number for the dev */
843 r = persistent ? specific_minor(md, minor) : next_free_minor(md, &minor); 889 if (minor == DM_ANY_MINOR)
890 r = next_free_minor(md, &minor);
891 else
892 r = specific_minor(md, minor);
844 if (r < 0) 893 if (r < 0)
845 goto bad1; 894 goto bad1;
846 895
@@ -849,6 +898,7 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
849 init_MUTEX(&md->suspend_lock); 898 init_MUTEX(&md->suspend_lock);
850 rwlock_init(&md->map_lock); 899 rwlock_init(&md->map_lock);
851 atomic_set(&md->holders, 1); 900 atomic_set(&md->holders, 1);
901 atomic_set(&md->open_count, 0);
852 atomic_set(&md->event_nr, 0); 902 atomic_set(&md->event_nr, 0);
853 903
854 md->queue = blk_alloc_queue(GFP_KERNEL); 904 md->queue = blk_alloc_queue(GFP_KERNEL);
@@ -875,6 +925,10 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
875 if (!md->disk) 925 if (!md->disk)
876 goto bad4; 926 goto bad4;
877 927
928 atomic_set(&md->pending, 0);
929 init_waitqueue_head(&md->wait);
930 init_waitqueue_head(&md->eventq);
931
878 md->disk->major = _major; 932 md->disk->major = _major;
879 md->disk->first_minor = minor; 933 md->disk->first_minor = minor;
880 md->disk->fops = &dm_blk_dops; 934 md->disk->fops = &dm_blk_dops;
@@ -884,9 +938,12 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
884 add_disk(md->disk); 938 add_disk(md->disk);
885 format_dev_t(md->name, MKDEV(_major, minor)); 939 format_dev_t(md->name, MKDEV(_major, minor));
886 940
887 atomic_set(&md->pending, 0); 941 /* Populate the mapping, nobody knows we exist yet */
888 init_waitqueue_head(&md->wait); 942 spin_lock(&_minor_lock);
889 init_waitqueue_head(&md->eventq); 943 old_md = idr_replace(&_minor_idr, md, minor);
944 spin_unlock(&_minor_lock);
945
946 BUG_ON(old_md != MINOR_ALLOCED);
890 947
891 return md; 948 return md;
892 949
@@ -898,13 +955,15 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
898 blk_cleanup_queue(md->queue); 955 blk_cleanup_queue(md->queue);
899 free_minor(minor); 956 free_minor(minor);
900 bad1: 957 bad1:
958 module_put(THIS_MODULE);
959 bad0:
901 kfree(md); 960 kfree(md);
902 return NULL; 961 return NULL;
903} 962}
904 963
905static void free_dev(struct mapped_device *md) 964static void free_dev(struct mapped_device *md)
906{ 965{
907 unsigned int minor = md->disk->first_minor; 966 int minor = md->disk->first_minor;
908 967
909 if (md->suspended_bdev) { 968 if (md->suspended_bdev) {
910 thaw_bdev(md->suspended_bdev, NULL); 969 thaw_bdev(md->suspended_bdev, NULL);
@@ -914,8 +973,14 @@ static void free_dev(struct mapped_device *md)
914 mempool_destroy(md->io_pool); 973 mempool_destroy(md->io_pool);
915 del_gendisk(md->disk); 974 del_gendisk(md->disk);
916 free_minor(minor); 975 free_minor(minor);
976
977 spin_lock(&_minor_lock);
978 md->disk->private_data = NULL;
979 spin_unlock(&_minor_lock);
980
917 put_disk(md->disk); 981 put_disk(md->disk);
918 blk_cleanup_queue(md->queue); 982 blk_cleanup_queue(md->queue);
983 module_put(THIS_MODULE);
919 kfree(md); 984 kfree(md);
920} 985}
921 986
@@ -984,12 +1049,11 @@ static void __unbind(struct mapped_device *md)
984/* 1049/*
985 * Constructor for a new device. 1050 * Constructor for a new device.
986 */ 1051 */
987static int create_aux(unsigned int minor, int persistent, 1052int dm_create(int minor, struct mapped_device **result)
988 struct mapped_device **result)
989{ 1053{
990 struct mapped_device *md; 1054 struct mapped_device *md;
991 1055
992 md = alloc_dev(minor, persistent); 1056 md = alloc_dev(minor);
993 if (!md) 1057 if (!md)
994 return -ENXIO; 1058 return -ENXIO;
995 1059
@@ -997,16 +1061,6 @@ static int create_aux(unsigned int minor, int persistent,
997 return 0; 1061 return 0;
998} 1062}
999 1063
1000int dm_create(struct mapped_device **result)
1001{
1002 return create_aux(0, 0, result);
1003}
1004
1005int dm_create_with_minor(unsigned int minor, struct mapped_device **result)
1006{
1007 return create_aux(minor, 1, result);
1008}
1009
1010static struct mapped_device *dm_find_md(dev_t dev) 1064static struct mapped_device *dm_find_md(dev_t dev)
1011{ 1065{
1012 struct mapped_device *md; 1066 struct mapped_device *md;
@@ -1015,13 +1069,18 @@ static struct mapped_device *dm_find_md(dev_t dev)
1015 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) 1069 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
1016 return NULL; 1070 return NULL;
1017 1071
1018 mutex_lock(&_minor_lock); 1072 spin_lock(&_minor_lock);
1019 1073
1020 md = idr_find(&_minor_idr, minor); 1074 md = idr_find(&_minor_idr, minor);
1021 if (!md || (dm_disk(md)->first_minor != minor)) 1075 if (md && (md == MINOR_ALLOCED ||
1076 (dm_disk(md)->first_minor != minor) ||
1077 test_bit(DMF_FREEING, &md->flags))) {
1022 md = NULL; 1078 md = NULL;
1079 goto out;
1080 }
1023 1081
1024 mutex_unlock(&_minor_lock); 1082out:
1083 spin_unlock(&_minor_lock);
1025 1084
1026 return md; 1085 return md;
1027} 1086}
@@ -1051,12 +1110,23 @@ void dm_get(struct mapped_device *md)
1051 atomic_inc(&md->holders); 1110 atomic_inc(&md->holders);
1052} 1111}
1053 1112
1113const char *dm_device_name(struct mapped_device *md)
1114{
1115 return md->name;
1116}
1117EXPORT_SYMBOL_GPL(dm_device_name);
1118
1054void dm_put(struct mapped_device *md) 1119void dm_put(struct mapped_device *md)
1055{ 1120{
1056 struct dm_table *map; 1121 struct dm_table *map;
1057 1122
1058 if (atomic_dec_and_test(&md->holders)) { 1123 BUG_ON(test_bit(DMF_FREEING, &md->flags));
1124
1125 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
1059 map = dm_get_table(md); 1126 map = dm_get_table(md);
1127 idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor);
1128 set_bit(DMF_FREEING, &md->flags);
1129 spin_unlock(&_minor_lock);
1060 if (!dm_suspended(md)) { 1130 if (!dm_suspended(md)) {
1061 dm_table_presuspend_targets(map); 1131 dm_table_presuspend_targets(map);
1062 dm_table_postsuspend_targets(map); 1132 dm_table_postsuspend_targets(map);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index fd90bc8f9e45..3c03c0ecab7e 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -2,7 +2,7 @@
2 * Internal header file for device mapper 2 * Internal header file for device mapper
3 * 3 *
4 * Copyright (C) 2001, 2002 Sistina Software 4 * Copyright (C) 2001, 2002 Sistina Software
5 * Copyright (C) 2004 Red Hat, Inc. All rights reserved. 5 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
6 * 6 *
7 * This file is released under the LGPL. 7 * This file is released under the LGPL.
8 */ 8 */
@@ -17,9 +17,10 @@
17#include <linux/hdreg.h> 17#include <linux/hdreg.h>
18 18
19#define DM_NAME "device-mapper" 19#define DM_NAME "device-mapper"
20#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) 20
21#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x) 21#define DMERR(f, arg...) printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
22#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x) 22#define DMWARN(f, arg...) printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
23#define DMINFO(f, arg...) printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
23 24
24#define DMEMIT(x...) sz += ((sz >= maxlen) ? \ 25#define DMEMIT(x...) sz += ((sz >= maxlen) ? \
25 0 : scnprintf(result + sz, maxlen - sz, x)) 26 0 : scnprintf(result + sz, maxlen - sz, x))
@@ -39,83 +40,16 @@ struct dm_dev {
39}; 40};
40 41
41struct dm_table; 42struct dm_table;
42struct mapped_device;
43
44/*-----------------------------------------------------------------
45 * Functions for manipulating a struct mapped_device.
46 * Drop the reference with dm_put when you finish with the object.
47 *---------------------------------------------------------------*/
48int dm_create(struct mapped_device **md);
49int dm_create_with_minor(unsigned int minor, struct mapped_device **md);
50void dm_set_mdptr(struct mapped_device *md, void *ptr);
51void *dm_get_mdptr(struct mapped_device *md);
52struct mapped_device *dm_get_md(dev_t dev);
53
54/*
55 * Reference counting for md.
56 */
57void dm_get(struct mapped_device *md);
58void dm_put(struct mapped_device *md);
59
60/*
61 * A device can still be used while suspended, but I/O is deferred.
62 */
63int dm_suspend(struct mapped_device *md, int with_lockfs);
64int dm_resume(struct mapped_device *md);
65
66/*
67 * The device must be suspended before calling this method.
68 */
69int dm_swap_table(struct mapped_device *md, struct dm_table *t);
70
71/*
72 * Drop a reference on the table when you've finished with the
73 * result.
74 */
75struct dm_table *dm_get_table(struct mapped_device *md);
76
77/*
78 * Event functions.
79 */
80uint32_t dm_get_event_nr(struct mapped_device *md);
81int dm_wait_event(struct mapped_device *md, int event_nr);
82
83/*
84 * Info functions.
85 */
86struct gendisk *dm_disk(struct mapped_device *md);
87int dm_suspended(struct mapped_device *md);
88
89/*
90 * Geometry functions.
91 */
92int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
93int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
94 43
95/*----------------------------------------------------------------- 44/*-----------------------------------------------------------------
96 * Functions for manipulating a table. Tables are also reference 45 * Internal table functions.
97 * counted.
98 *---------------------------------------------------------------*/ 46 *---------------------------------------------------------------*/
99int dm_table_create(struct dm_table **result, int mode,
100 unsigned num_targets, struct mapped_device *md);
101
102void dm_table_get(struct dm_table *t);
103void dm_table_put(struct dm_table *t);
104
105int dm_table_add_target(struct dm_table *t, const char *type,
106 sector_t start, sector_t len, char *params);
107int dm_table_complete(struct dm_table *t);
108void dm_table_event_callback(struct dm_table *t, 47void dm_table_event_callback(struct dm_table *t,
109 void (*fn)(void *), void *context); 48 void (*fn)(void *), void *context);
110void dm_table_event(struct dm_table *t);
111sector_t dm_table_get_size(struct dm_table *t);
112struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); 49struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
113struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); 50struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
114void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q); 51void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q);
115unsigned int dm_table_get_num_targets(struct dm_table *t);
116struct list_head *dm_table_get_devices(struct dm_table *t); 52struct list_head *dm_table_get_devices(struct dm_table *t);
117int dm_table_get_mode(struct dm_table *t);
118struct mapped_device *dm_table_get_md(struct dm_table *t);
119void dm_table_presuspend_targets(struct dm_table *t); 53void dm_table_presuspend_targets(struct dm_table *t);
120void dm_table_postsuspend_targets(struct dm_table *t); 54void dm_table_postsuspend_targets(struct dm_table *t);
121void dm_table_resume_targets(struct dm_table *t); 55void dm_table_resume_targets(struct dm_table *t);
@@ -133,7 +67,6 @@ void dm_put_target_type(struct target_type *t);
133int dm_target_iterate(void (*iter_func)(struct target_type *tt, 67int dm_target_iterate(void (*iter_func)(struct target_type *tt,
134 void *param), void *param); 68 void *param), void *param);
135 69
136
137/*----------------------------------------------------------------- 70/*-----------------------------------------------------------------
138 * Useful inlines. 71 * Useful inlines.
139 *---------------------------------------------------------------*/ 72 *---------------------------------------------------------------*/
@@ -191,5 +124,7 @@ void dm_stripe_exit(void);
191 124
192void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); 125void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
193union map_info *dm_get_mapinfo(struct bio *bio); 126union map_info *dm_get_mapinfo(struct bio *bio);
127int dm_open_count(struct mapped_device *md);
128int dm_lock_for_deletion(struct mapped_device *md);
194 129
195#endif 130#endif
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index 72480a48d88b..73ab875fb158 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -314,7 +314,7 @@ static void complete_io(unsigned long error, void *context)
314 314
315 if (error) { 315 if (error) {
316 if (job->rw == WRITE) 316 if (job->rw == WRITE)
317 job->write_err &= error; 317 job->write_err |= error;
318 else 318 else
319 job->read_err = 1; 319 job->read_err = 1;
320 320
@@ -460,7 +460,7 @@ static void segment_complete(int read_err,
460 job->read_err = 1; 460 job->read_err = 1;
461 461
462 if (write_err) 462 if (write_err)
463 job->write_err &= write_err; 463 job->write_err |= write_err;
464 464
465 /* 465 /*
466 * Only dispatch more work if there hasn't been an error. 466 * Only dispatch more work if there hasn't been an error.
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 777585458c85..ff83c9b5979e 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -111,7 +111,7 @@ static int linear_issue_flush(request_queue_t *q, struct gendisk *disk,
111 return ret; 111 return ret;
112} 112}
113 113
114static int linear_run (mddev_t *mddev) 114static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
115{ 115{
116 linear_conf_t *conf; 116 linear_conf_t *conf;
117 dev_info_t **table; 117 dev_info_t **table;
@@ -121,20 +121,21 @@ static int linear_run (mddev_t *mddev)
121 sector_t curr_offset; 121 sector_t curr_offset;
122 struct list_head *tmp; 122 struct list_head *tmp;
123 123
124 conf = kzalloc (sizeof (*conf) + mddev->raid_disks*sizeof(dev_info_t), 124 conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
125 GFP_KERNEL); 125 GFP_KERNEL);
126 if (!conf) 126 if (!conf)
127 goto out; 127 return NULL;
128
128 mddev->private = conf; 129 mddev->private = conf;
129 130
130 cnt = 0; 131 cnt = 0;
131 mddev->array_size = 0; 132 conf->array_size = 0;
132 133
133 ITERATE_RDEV(mddev,rdev,tmp) { 134 ITERATE_RDEV(mddev,rdev,tmp) {
134 int j = rdev->raid_disk; 135 int j = rdev->raid_disk;
135 dev_info_t *disk = conf->disks + j; 136 dev_info_t *disk = conf->disks + j;
136 137
137 if (j < 0 || j > mddev->raid_disks || disk->rdev) { 138 if (j < 0 || j > raid_disks || disk->rdev) {
138 printk("linear: disk numbering problem. Aborting!\n"); 139 printk("linear: disk numbering problem. Aborting!\n");
139 goto out; 140 goto out;
140 } 141 }
@@ -152,11 +153,11 @@ static int linear_run (mddev_t *mddev)
152 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); 153 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
153 154
154 disk->size = rdev->size; 155 disk->size = rdev->size;
155 mddev->array_size += rdev->size; 156 conf->array_size += rdev->size;
156 157
157 cnt++; 158 cnt++;
158 } 159 }
159 if (cnt != mddev->raid_disks) { 160 if (cnt != raid_disks) {
160 printk("linear: not enough drives present. Aborting!\n"); 161 printk("linear: not enough drives present. Aborting!\n");
161 goto out; 162 goto out;
162 } 163 }
@@ -200,7 +201,7 @@ static int linear_run (mddev_t *mddev)
200 unsigned round; 201 unsigned round;
201 unsigned long base; 202 unsigned long base;
202 203
203 sz = mddev->array_size >> conf->preshift; 204 sz = conf->array_size >> conf->preshift;
204 sz += 1; /* force round-up */ 205 sz += 1; /* force round-up */
205 base = conf->hash_spacing >> conf->preshift; 206 base = conf->hash_spacing >> conf->preshift;
206 round = sector_div(sz, base); 207 round = sector_div(sz, base);
@@ -247,14 +248,56 @@ static int linear_run (mddev_t *mddev)
247 248
248 BUG_ON(table - conf->hash_table > nb_zone); 249 BUG_ON(table - conf->hash_table > nb_zone);
249 250
251 return conf;
252
253out:
254 kfree(conf);
255 return NULL;
256}
257
258static int linear_run (mddev_t *mddev)
259{
260 linear_conf_t *conf;
261
262 conf = linear_conf(mddev, mddev->raid_disks);
263
264 if (!conf)
265 return 1;
266 mddev->private = conf;
267 mddev->array_size = conf->array_size;
268
250 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); 269 blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
251 mddev->queue->unplug_fn = linear_unplug; 270 mddev->queue->unplug_fn = linear_unplug;
252 mddev->queue->issue_flush_fn = linear_issue_flush; 271 mddev->queue->issue_flush_fn = linear_issue_flush;
253 return 0; 272 return 0;
273}
254 274
255out: 275static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
256 kfree(conf); 276{
257 return 1; 277 /* Adding a drive to a linear array allows the array to grow.
278 * It is permitted if the new drive has a matching superblock
279 * already on it, with raid_disk equal to raid_disks.
280 * It is achieved by creating a new linear_private_data structure
281 * and swapping it in in-place of the current one.
282 * The current one is never freed until the array is stopped.
283 * This avoids races.
284 */
285 linear_conf_t *newconf;
286
287 if (rdev->raid_disk != mddev->raid_disks)
288 return -EINVAL;
289
290 newconf = linear_conf(mddev,mddev->raid_disks+1);
291
292 if (!newconf)
293 return -ENOMEM;
294
295 newconf->prev = mddev_to_conf(mddev);
296 mddev->private = newconf;
297 mddev->raid_disks++;
298 mddev->array_size = newconf->array_size;
299 set_capacity(mddev->gendisk, mddev->array_size << 1);
300 return 0;
258} 301}
259 302
260static int linear_stop (mddev_t *mddev) 303static int linear_stop (mddev_t *mddev)
@@ -262,8 +305,12 @@ static int linear_stop (mddev_t *mddev)
262 linear_conf_t *conf = mddev_to_conf(mddev); 305 linear_conf_t *conf = mddev_to_conf(mddev);
263 306
264 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 307 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
265 kfree(conf->hash_table); 308 do {
266 kfree(conf); 309 linear_conf_t *t = conf->prev;
310 kfree(conf->hash_table);
311 kfree(conf);
312 conf = t;
313 } while (conf);
267 314
268 return 0; 315 return 0;
269} 316}
@@ -360,6 +407,7 @@ static struct mdk_personality linear_personality =
360 .run = linear_run, 407 .run = linear_run,
361 .stop = linear_stop, 408 .stop = linear_stop,
362 .status = linear_status, 409 .status = linear_status,
410 .hot_add_disk = linear_add,
363}; 411};
364 412
365static int __init linear_init (void) 413static int __init linear_init (void)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f19b874753a9..306268ec99ff 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -44,6 +44,7 @@
44#include <linux/suspend.h> 44#include <linux/suspend.h>
45#include <linux/poll.h> 45#include <linux/poll.h>
46#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <linux/ctype.h>
47 48
48#include <linux/init.h> 49#include <linux/init.h>
49 50
@@ -72,6 +73,10 @@ static void autostart_arrays (int part);
72static LIST_HEAD(pers_list); 73static LIST_HEAD(pers_list);
73static DEFINE_SPINLOCK(pers_lock); 74static DEFINE_SPINLOCK(pers_lock);
74 75
76static void md_print_devices(void);
77
78#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
79
75/* 80/*
76 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' 81 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
77 * is 1000 KB/sec, so the extra system load does not show up that much. 82 * is 1000 KB/sec, so the extra system load does not show up that much.
@@ -170,7 +175,7 @@ EXPORT_SYMBOL_GPL(md_new_event);
170/* Alternate version that can be called from interrupts 175/* Alternate version that can be called from interrupts
171 * when calling sysfs_notify isn't needed. 176 * when calling sysfs_notify isn't needed.
172 */ 177 */
173void md_new_event_inintr(mddev_t *mddev) 178static void md_new_event_inintr(mddev_t *mddev)
174{ 179{
175 atomic_inc(&md_event_count); 180 atomic_inc(&md_event_count);
176 wake_up(&md_event_waiters); 181 wake_up(&md_event_waiters);
@@ -732,6 +737,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
732{ 737{
733 mdp_disk_t *desc; 738 mdp_disk_t *desc;
734 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); 739 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
740 __u64 ev1 = md_event(sb);
735 741
736 rdev->raid_disk = -1; 742 rdev->raid_disk = -1;
737 rdev->flags = 0; 743 rdev->flags = 0;
@@ -748,7 +754,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
748 mddev->layout = sb->layout; 754 mddev->layout = sb->layout;
749 mddev->raid_disks = sb->raid_disks; 755 mddev->raid_disks = sb->raid_disks;
750 mddev->size = sb->size; 756 mddev->size = sb->size;
751 mddev->events = md_event(sb); 757 mddev->events = ev1;
752 mddev->bitmap_offset = 0; 758 mddev->bitmap_offset = 0;
753 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 759 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
754 760
@@ -797,7 +803,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
797 803
798 } else if (mddev->pers == NULL) { 804 } else if (mddev->pers == NULL) {
799 /* Insist on good event counter while assembling */ 805 /* Insist on good event counter while assembling */
800 __u64 ev1 = md_event(sb);
801 ++ev1; 806 ++ev1;
802 if (ev1 < mddev->events) 807 if (ev1 < mddev->events)
803 return -EINVAL; 808 return -EINVAL;
@@ -805,19 +810,21 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
805 /* if adding to array with a bitmap, then we can accept an 810 /* if adding to array with a bitmap, then we can accept an
806 * older device ... but not too old. 811 * older device ... but not too old.
807 */ 812 */
808 __u64 ev1 = md_event(sb);
809 if (ev1 < mddev->bitmap->events_cleared) 813 if (ev1 < mddev->bitmap->events_cleared)
810 return 0; 814 return 0;
811 } else /* just a hot-add of a new device, leave raid_disk at -1 */ 815 } else {
812 return 0; 816 if (ev1 < mddev->events)
817 /* just a hot-add of a new device, leave raid_disk at -1 */
818 return 0;
819 }
813 820
814 if (mddev->level != LEVEL_MULTIPATH) { 821 if (mddev->level != LEVEL_MULTIPATH) {
815 desc = sb->disks + rdev->desc_nr; 822 desc = sb->disks + rdev->desc_nr;
816 823
817 if (desc->state & (1<<MD_DISK_FAULTY)) 824 if (desc->state & (1<<MD_DISK_FAULTY))
818 set_bit(Faulty, &rdev->flags); 825 set_bit(Faulty, &rdev->flags);
819 else if (desc->state & (1<<MD_DISK_SYNC) && 826 else if (desc->state & (1<<MD_DISK_SYNC) /* &&
820 desc->raid_disk < mddev->raid_disks) { 827 desc->raid_disk < mddev->raid_disks */) {
821 set_bit(In_sync, &rdev->flags); 828 set_bit(In_sync, &rdev->flags);
822 rdev->raid_disk = desc->raid_disk; 829 rdev->raid_disk = desc->raid_disk;
823 } 830 }
@@ -1100,6 +1107,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1100static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) 1107static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1101{ 1108{
1102 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); 1109 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1110 __u64 ev1 = le64_to_cpu(sb->events);
1103 1111
1104 rdev->raid_disk = -1; 1112 rdev->raid_disk = -1;
1105 rdev->flags = 0; 1113 rdev->flags = 0;
@@ -1115,7 +1123,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1115 mddev->layout = le32_to_cpu(sb->layout); 1123 mddev->layout = le32_to_cpu(sb->layout);
1116 mddev->raid_disks = le32_to_cpu(sb->raid_disks); 1124 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1117 mddev->size = le64_to_cpu(sb->size)/2; 1125 mddev->size = le64_to_cpu(sb->size)/2;
1118 mddev->events = le64_to_cpu(sb->events); 1126 mddev->events = ev1;
1119 mddev->bitmap_offset = 0; 1127 mddev->bitmap_offset = 0;
1120 mddev->default_bitmap_offset = 1024 >> 9; 1128 mddev->default_bitmap_offset = 1024 >> 9;
1121 1129
@@ -1149,7 +1157,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1149 1157
1150 } else if (mddev->pers == NULL) { 1158 } else if (mddev->pers == NULL) {
1151 /* Insist of good event counter while assembling */ 1159 /* Insist of good event counter while assembling */
1152 __u64 ev1 = le64_to_cpu(sb->events);
1153 ++ev1; 1160 ++ev1;
1154 if (ev1 < mddev->events) 1161 if (ev1 < mddev->events)
1155 return -EINVAL; 1162 return -EINVAL;
@@ -1157,12 +1164,13 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1157 /* If adding to array with a bitmap, then we can accept an 1164 /* If adding to array with a bitmap, then we can accept an
1158 * older device, but not too old. 1165 * older device, but not too old.
1159 */ 1166 */
1160 __u64 ev1 = le64_to_cpu(sb->events);
1161 if (ev1 < mddev->bitmap->events_cleared) 1167 if (ev1 < mddev->bitmap->events_cleared)
1162 return 0; 1168 return 0;
1163 } else /* just a hot-add of a new device, leave raid_disk at -1 */ 1169 } else {
1164 return 0; 1170 if (ev1 < mddev->events)
1165 1171 /* just a hot-add of a new device, leave raid_disk at -1 */
1172 return 0;
1173 }
1166 if (mddev->level != LEVEL_MULTIPATH) { 1174 if (mddev->level != LEVEL_MULTIPATH) {
1167 int role; 1175 int role;
1168 rdev->desc_nr = le32_to_cpu(sb->dev_number); 1176 rdev->desc_nr = le32_to_cpu(sb->dev_number);
@@ -1174,7 +1182,11 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1174 set_bit(Faulty, &rdev->flags); 1182 set_bit(Faulty, &rdev->flags);
1175 break; 1183 break;
1176 default: 1184 default:
1177 set_bit(In_sync, &rdev->flags); 1185 if ((le32_to_cpu(sb->feature_map) &
1186 MD_FEATURE_RECOVERY_OFFSET))
1187 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1188 else
1189 set_bit(In_sync, &rdev->flags);
1178 rdev->raid_disk = role; 1190 rdev->raid_disk = role;
1179 break; 1191 break;
1180 } 1192 }
@@ -1198,6 +1210,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1198 1210
1199 sb->feature_map = 0; 1211 sb->feature_map = 0;
1200 sb->pad0 = 0; 1212 sb->pad0 = 0;
1213 sb->recovery_offset = cpu_to_le64(0);
1201 memset(sb->pad1, 0, sizeof(sb->pad1)); 1214 memset(sb->pad1, 0, sizeof(sb->pad1));
1202 memset(sb->pad2, 0, sizeof(sb->pad2)); 1215 memset(sb->pad2, 0, sizeof(sb->pad2));
1203 memset(sb->pad3, 0, sizeof(sb->pad3)); 1216 memset(sb->pad3, 0, sizeof(sb->pad3));
@@ -1218,6 +1231,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1218 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1231 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1219 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); 1232 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1220 } 1233 }
1234
1235 if (rdev->raid_disk >= 0 &&
1236 !test_bit(In_sync, &rdev->flags) &&
1237 rdev->recovery_offset > 0) {
1238 sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1239 sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);
1240 }
1241
1221 if (mddev->reshape_position != MaxSector) { 1242 if (mddev->reshape_position != MaxSector) {
1222 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); 1243 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1223 sb->reshape_position = cpu_to_le64(mddev->reshape_position); 1244 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
@@ -1242,11 +1263,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1242 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1263 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1243 else if (test_bit(In_sync, &rdev2->flags)) 1264 else if (test_bit(In_sync, &rdev2->flags))
1244 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); 1265 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1266 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
1267 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1245 else 1268 else
1246 sb->dev_roles[i] = cpu_to_le16(0xffff); 1269 sb->dev_roles[i] = cpu_to_le16(0xffff);
1247 } 1270 }
1248 1271
1249 sb->recovery_offset = cpu_to_le64(0); /* not supported yet */
1250 sb->sb_csum = calc_sb_1_csum(sb); 1272 sb->sb_csum = calc_sb_1_csum(sb);
1251} 1273}
1252 1274
@@ -1507,7 +1529,7 @@ static void print_rdev(mdk_rdev_t *rdev)
1507 printk(KERN_INFO "md: no rdev superblock!\n"); 1529 printk(KERN_INFO "md: no rdev superblock!\n");
1508} 1530}
1509 1531
1510void md_print_devices(void) 1532static void md_print_devices(void)
1511{ 1533{
1512 struct list_head *tmp, *tmp2; 1534 struct list_head *tmp, *tmp2;
1513 mdk_rdev_t *rdev; 1535 mdk_rdev_t *rdev;
@@ -1536,15 +1558,30 @@ void md_print_devices(void)
1536} 1558}
1537 1559
1538 1560
1539static void sync_sbs(mddev_t * mddev) 1561static void sync_sbs(mddev_t * mddev, int nospares)
1540{ 1562{
1563 /* Update each superblock (in-memory image), but
1564 * if we are allowed to, skip spares which already
1565 * have the right event counter, or have one earlier
1566 * (which would mean they aren't being marked as dirty
1567 * with the rest of the array)
1568 */
1541 mdk_rdev_t *rdev; 1569 mdk_rdev_t *rdev;
1542 struct list_head *tmp; 1570 struct list_head *tmp;
1543 1571
1544 ITERATE_RDEV(mddev,rdev,tmp) { 1572 ITERATE_RDEV(mddev,rdev,tmp) {
1545 super_types[mddev->major_version]. 1573 if (rdev->sb_events == mddev->events ||
1546 sync_super(mddev, rdev); 1574 (nospares &&
1547 rdev->sb_loaded = 1; 1575 rdev->raid_disk < 0 &&
1576 (rdev->sb_events&1)==0 &&
1577 rdev->sb_events+1 == mddev->events)) {
1578 /* Don't update this superblock */
1579 rdev->sb_loaded = 2;
1580 } else {
1581 super_types[mddev->major_version].
1582 sync_super(mddev, rdev);
1583 rdev->sb_loaded = 1;
1584 }
1548 } 1585 }
1549} 1586}
1550 1587
@@ -1554,12 +1591,42 @@ void md_update_sb(mddev_t * mddev)
1554 struct list_head *tmp; 1591 struct list_head *tmp;
1555 mdk_rdev_t *rdev; 1592 mdk_rdev_t *rdev;
1556 int sync_req; 1593 int sync_req;
1594 int nospares = 0;
1557 1595
1558repeat: 1596repeat:
1559 spin_lock_irq(&mddev->write_lock); 1597 spin_lock_irq(&mddev->write_lock);
1560 sync_req = mddev->in_sync; 1598 sync_req = mddev->in_sync;
1561 mddev->utime = get_seconds(); 1599 mddev->utime = get_seconds();
1562 mddev->events ++; 1600 if (mddev->sb_dirty == 3)
1601 /* just a clean<-> dirty transition, possibly leave spares alone,
1602 * though if events isn't the right even/odd, we will have to do
1603 * spares after all
1604 */
1605 nospares = 1;
1606
1607 /* If this is just a dirty<->clean transition, and the array is clean
1608 * and 'events' is odd, we can roll back to the previous clean state */
1609 if (mddev->sb_dirty == 3
1610 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
1611 && (mddev->events & 1))
1612 mddev->events--;
1613 else {
1614 /* otherwise we have to go forward and ... */
1615 mddev->events ++;
1616 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1617 /* .. if the array isn't clean, insist on an odd 'events' */
1618 if ((mddev->events&1)==0) {
1619 mddev->events++;
1620 nospares = 0;
1621 }
1622 } else {
1623 /* otherwise insist on an even 'events' (for clean states) */
1624 if ((mddev->events&1)) {
1625 mddev->events++;
1626 nospares = 0;
1627 }
1628 }
1629 }
1563 1630
1564 if (!mddev->events) { 1631 if (!mddev->events) {
1565 /* 1632 /*
@@ -1571,7 +1638,7 @@ repeat:
1571 mddev->events --; 1638 mddev->events --;
1572 } 1639 }
1573 mddev->sb_dirty = 2; 1640 mddev->sb_dirty = 2;
1574 sync_sbs(mddev); 1641 sync_sbs(mddev, nospares);
1575 1642
1576 /* 1643 /*
1577 * do not write anything to disk if using 1644 * do not write anything to disk if using
@@ -1593,6 +1660,8 @@ repeat:
1593 ITERATE_RDEV(mddev,rdev,tmp) { 1660 ITERATE_RDEV(mddev,rdev,tmp) {
1594 char b[BDEVNAME_SIZE]; 1661 char b[BDEVNAME_SIZE];
1595 dprintk(KERN_INFO "md: "); 1662 dprintk(KERN_INFO "md: ");
1663 if (rdev->sb_loaded != 1)
1664 continue; /* no noise on spare devices */
1596 if (test_bit(Faulty, &rdev->flags)) 1665 if (test_bit(Faulty, &rdev->flags))
1597 dprintk("(skipping faulty "); 1666 dprintk("(skipping faulty ");
1598 1667
@@ -1604,6 +1673,7 @@ repeat:
1604 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", 1673 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1605 bdevname(rdev->bdev,b), 1674 bdevname(rdev->bdev,b),
1606 (unsigned long long)rdev->sb_offset); 1675 (unsigned long long)rdev->sb_offset);
1676 rdev->sb_events = mddev->events;
1607 1677
1608 } else 1678 } else
1609 dprintk(")\n"); 1679 dprintk(")\n");
@@ -1667,6 +1737,10 @@ state_show(mdk_rdev_t *rdev, char *page)
1667 len += sprintf(page+len, "%sin_sync",sep); 1737 len += sprintf(page+len, "%sin_sync",sep);
1668 sep = ","; 1738 sep = ",";
1669 } 1739 }
1740 if (test_bit(WriteMostly, &rdev->flags)) {
1741 len += sprintf(page+len, "%swrite_mostly",sep);
1742 sep = ",";
1743 }
1670 if (!test_bit(Faulty, &rdev->flags) && 1744 if (!test_bit(Faulty, &rdev->flags) &&
1671 !test_bit(In_sync, &rdev->flags)) { 1745 !test_bit(In_sync, &rdev->flags)) {
1672 len += sprintf(page+len, "%sspare", sep); 1746 len += sprintf(page+len, "%sspare", sep);
@@ -1675,8 +1749,40 @@ state_show(mdk_rdev_t *rdev, char *page)
1675 return len+sprintf(page+len, "\n"); 1749 return len+sprintf(page+len, "\n");
1676} 1750}
1677 1751
1752static ssize_t
1753state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1754{
1755 /* can write
1756 * faulty - simulates and error
1757 * remove - disconnects the device
1758 * writemostly - sets write_mostly
1759 * -writemostly - clears write_mostly
1760 */
1761 int err = -EINVAL;
1762 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
1763 md_error(rdev->mddev, rdev);
1764 err = 0;
1765 } else if (cmd_match(buf, "remove")) {
1766 if (rdev->raid_disk >= 0)
1767 err = -EBUSY;
1768 else {
1769 mddev_t *mddev = rdev->mddev;
1770 kick_rdev_from_array(rdev);
1771 md_update_sb(mddev);
1772 md_new_event(mddev);
1773 err = 0;
1774 }
1775 } else if (cmd_match(buf, "writemostly")) {
1776 set_bit(WriteMostly, &rdev->flags);
1777 err = 0;
1778 } else if (cmd_match(buf, "-writemostly")) {
1779 clear_bit(WriteMostly, &rdev->flags);
1780 err = 0;
1781 }
1782 return err ? err : len;
1783}
1678static struct rdev_sysfs_entry 1784static struct rdev_sysfs_entry
1679rdev_state = __ATTR_RO(state); 1785rdev_state = __ATTR(state, 0644, state_show, state_store);
1680 1786
1681static ssize_t 1787static ssize_t
1682super_show(mdk_rdev_t *rdev, char *page) 1788super_show(mdk_rdev_t *rdev, char *page)
@@ -1873,6 +1979,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
1873 rdev->desc_nr = -1; 1979 rdev->desc_nr = -1;
1874 rdev->flags = 0; 1980 rdev->flags = 0;
1875 rdev->data_offset = 0; 1981 rdev->data_offset = 0;
1982 rdev->sb_events = 0;
1876 atomic_set(&rdev->nr_pending, 0); 1983 atomic_set(&rdev->nr_pending, 0);
1877 atomic_set(&rdev->read_errors, 0); 1984 atomic_set(&rdev->read_errors, 0);
1878 atomic_set(&rdev->corrected_errors, 0); 1985 atomic_set(&rdev->corrected_errors, 0);
@@ -1978,6 +2085,54 @@ static void analyze_sbs(mddev_t * mddev)
1978} 2085}
1979 2086
1980static ssize_t 2087static ssize_t
2088safe_delay_show(mddev_t *mddev, char *page)
2089{
2090 int msec = (mddev->safemode_delay*1000)/HZ;
2091 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
2092}
2093static ssize_t
2094safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2095{
2096 int scale=1;
2097 int dot=0;
2098 int i;
2099 unsigned long msec;
2100 char buf[30];
2101 char *e;
2102 /* remove a period, and count digits after it */
2103 if (len >= sizeof(buf))
2104 return -EINVAL;
2105 strlcpy(buf, cbuf, len);
2106 buf[len] = 0;
2107 for (i=0; i<len; i++) {
2108 if (dot) {
2109 if (isdigit(buf[i])) {
2110 buf[i-1] = buf[i];
2111 scale *= 10;
2112 }
2113 buf[i] = 0;
2114 } else if (buf[i] == '.') {
2115 dot=1;
2116 buf[i] = 0;
2117 }
2118 }
2119 msec = simple_strtoul(buf, &e, 10);
2120 if (e == buf || (*e && *e != '\n'))
2121 return -EINVAL;
2122 msec = (msec * 1000) / scale;
2123 if (msec == 0)
2124 mddev->safemode_delay = 0;
2125 else {
2126 mddev->safemode_delay = (msec*HZ)/1000;
2127 if (mddev->safemode_delay == 0)
2128 mddev->safemode_delay = 1;
2129 }
2130 return len;
2131}
2132static struct md_sysfs_entry md_safe_delay =
2133__ATTR(safe_mode_delay, 0644,safe_delay_show, safe_delay_store);
2134
2135static ssize_t
1981level_show(mddev_t *mddev, char *page) 2136level_show(mddev_t *mddev, char *page)
1982{ 2137{
1983 struct mdk_personality *p = mddev->pers; 2138 struct mdk_personality *p = mddev->pers;
@@ -2012,6 +2167,32 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2012static struct md_sysfs_entry md_level = 2167static struct md_sysfs_entry md_level =
2013__ATTR(level, 0644, level_show, level_store); 2168__ATTR(level, 0644, level_show, level_store);
2014 2169
2170
2171static ssize_t
2172layout_show(mddev_t *mddev, char *page)
2173{
2174 /* just a number, not meaningful for all levels */
2175 return sprintf(page, "%d\n", mddev->layout);
2176}
2177
2178static ssize_t
2179layout_store(mddev_t *mddev, const char *buf, size_t len)
2180{
2181 char *e;
2182 unsigned long n = simple_strtoul(buf, &e, 10);
2183 if (mddev->pers)
2184 return -EBUSY;
2185
2186 if (!*buf || (*e && *e != '\n'))
2187 return -EINVAL;
2188
2189 mddev->layout = n;
2190 return len;
2191}
2192static struct md_sysfs_entry md_layout =
2193__ATTR(layout, 0655, layout_show, layout_store);
2194
2195
2015static ssize_t 2196static ssize_t
2016raid_disks_show(mddev_t *mddev, char *page) 2197raid_disks_show(mddev_t *mddev, char *page)
2017{ 2198{
@@ -2067,6 +2248,200 @@ static struct md_sysfs_entry md_chunk_size =
2067__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); 2248__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store);
2068 2249
2069static ssize_t 2250static ssize_t
2251resync_start_show(mddev_t *mddev, char *page)
2252{
2253 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
2254}
2255
2256static ssize_t
2257resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2258{
2259 /* can only set chunk_size if array is not yet active */
2260 char *e;
2261 unsigned long long n = simple_strtoull(buf, &e, 10);
2262
2263 if (mddev->pers)
2264 return -EBUSY;
2265 if (!*buf || (*e && *e != '\n'))
2266 return -EINVAL;
2267
2268 mddev->recovery_cp = n;
2269 return len;
2270}
2271static struct md_sysfs_entry md_resync_start =
2272__ATTR(resync_start, 0644, resync_start_show, resync_start_store);
2273
2274/*
2275 * The array state can be:
2276 *
2277 * clear
2278 * No devices, no size, no level
2279 * Equivalent to STOP_ARRAY ioctl
2280 * inactive
2281 * May have some settings, but array is not active
2282 * all IO results in error
2283 * When written, doesn't tear down array, but just stops it
2284 * suspended (not supported yet)
2285 * All IO requests will block. The array can be reconfigured.
2286 * Writing this, if accepted, will block until array is quiessent
2287 * readonly
2288 * no resync can happen. no superblocks get written.
2289 * write requests fail
2290 * read-auto
2291 * like readonly, but behaves like 'clean' on a write request.
2292 *
2293 * clean - no pending writes, but otherwise active.
2294 * When written to inactive array, starts without resync
2295 * If a write request arrives then
2296 * if metadata is known, mark 'dirty' and switch to 'active'.
2297 * if not known, block and switch to write-pending
2298 * If written to an active array that has pending writes, then fails.
2299 * active
2300 * fully active: IO and resync can be happening.
2301 * When written to inactive array, starts with resync
2302 *
2303 * write-pending
2304 * clean, but writes are blocked waiting for 'active' to be written.
2305 *
2306 * active-idle
2307 * like active, but no writes have been seen for a while (100msec).
2308 *
2309 */
2310enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
2311 write_pending, active_idle, bad_word};
2312static char *array_states[] = {
2313 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
2314 "write-pending", "active-idle", NULL };
2315
2316static int match_word(const char *word, char **list)
2317{
2318 int n;
2319 for (n=0; list[n]; n++)
2320 if (cmd_match(word, list[n]))
2321 break;
2322 return n;
2323}
2324
2325static ssize_t
2326array_state_show(mddev_t *mddev, char *page)
2327{
2328 enum array_state st = inactive;
2329
2330 if (mddev->pers)
2331 switch(mddev->ro) {
2332 case 1:
2333 st = readonly;
2334 break;
2335 case 2:
2336 st = read_auto;
2337 break;
2338 case 0:
2339 if (mddev->in_sync)
2340 st = clean;
2341 else if (mddev->safemode)
2342 st = active_idle;
2343 else
2344 st = active;
2345 }
2346 else {
2347 if (list_empty(&mddev->disks) &&
2348 mddev->raid_disks == 0 &&
2349 mddev->size == 0)
2350 st = clear;
2351 else
2352 st = inactive;
2353 }
2354 return sprintf(page, "%s\n", array_states[st]);
2355}
2356
2357static int do_md_stop(mddev_t * mddev, int ro);
2358static int do_md_run(mddev_t * mddev);
2359static int restart_array(mddev_t *mddev);
2360
2361static ssize_t
2362array_state_store(mddev_t *mddev, const char *buf, size_t len)
2363{
2364 int err = -EINVAL;
2365 enum array_state st = match_word(buf, array_states);
2366 switch(st) {
2367 case bad_word:
2368 break;
2369 case clear:
2370 /* stopping an active array */
2371 if (mddev->pers) {
2372 if (atomic_read(&mddev->active) > 1)
2373 return -EBUSY;
2374 err = do_md_stop(mddev, 0);
2375 }
2376 break;
2377 case inactive:
2378 /* stopping an active array */
2379 if (mddev->pers) {
2380 if (atomic_read(&mddev->active) > 1)
2381 return -EBUSY;
2382 err = do_md_stop(mddev, 2);
2383 }
2384 break;
2385 case suspended:
2386 break; /* not supported yet */
2387 case readonly:
2388 if (mddev->pers)
2389 err = do_md_stop(mddev, 1);
2390 else {
2391 mddev->ro = 1;
2392 err = do_md_run(mddev);
2393 }
2394 break;
2395 case read_auto:
2396 /* stopping an active array */
2397 if (mddev->pers) {
2398 err = do_md_stop(mddev, 1);
2399 if (err == 0)
2400 mddev->ro = 2; /* FIXME mark devices writable */
2401 } else {
2402 mddev->ro = 2;
2403 err = do_md_run(mddev);
2404 }
2405 break;
2406 case clean:
2407 if (mddev->pers) {
2408 restart_array(mddev);
2409 spin_lock_irq(&mddev->write_lock);
2410 if (atomic_read(&mddev->writes_pending) == 0) {
2411 mddev->in_sync = 1;
2412 mddev->sb_dirty = 1;
2413 }
2414 spin_unlock_irq(&mddev->write_lock);
2415 } else {
2416 mddev->ro = 0;
2417 mddev->recovery_cp = MaxSector;
2418 err = do_md_run(mddev);
2419 }
2420 break;
2421 case active:
2422 if (mddev->pers) {
2423 restart_array(mddev);
2424 mddev->sb_dirty = 0;
2425 wake_up(&mddev->sb_wait);
2426 err = 0;
2427 } else {
2428 mddev->ro = 0;
2429 err = do_md_run(mddev);
2430 }
2431 break;
2432 case write_pending:
2433 case active_idle:
2434 /* these cannot be set */
2435 break;
2436 }
2437 if (err)
2438 return err;
2439 else
2440 return len;
2441}
2442static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store);
2443
2444static ssize_t
2070null_show(mddev_t *mddev, char *page) 2445null_show(mddev_t *mddev, char *page)
2071{ 2446{
2072 return -EINVAL; 2447 return -EINVAL;
@@ -2428,11 +2803,15 @@ __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
2428 2803
2429static struct attribute *md_default_attrs[] = { 2804static struct attribute *md_default_attrs[] = {
2430 &md_level.attr, 2805 &md_level.attr,
2806 &md_layout.attr,
2431 &md_raid_disks.attr, 2807 &md_raid_disks.attr,
2432 &md_chunk_size.attr, 2808 &md_chunk_size.attr,
2433 &md_size.attr, 2809 &md_size.attr,
2810 &md_resync_start.attr,
2434 &md_metadata.attr, 2811 &md_metadata.attr,
2435 &md_new_device.attr, 2812 &md_new_device.attr,
2813 &md_safe_delay.attr,
2814 &md_array_state.attr,
2436 NULL, 2815 NULL,
2437}; 2816};
2438 2817
@@ -2553,8 +2932,6 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
2553 return NULL; 2932 return NULL;
2554} 2933}
2555 2934
2556void md_wakeup_thread(mdk_thread_t *thread);
2557
2558static void md_safemode_timeout(unsigned long data) 2935static void md_safemode_timeout(unsigned long data)
2559{ 2936{
2560 mddev_t *mddev = (mddev_t *) data; 2937 mddev_t *mddev = (mddev_t *) data;
@@ -2708,7 +3085,7 @@ static int do_md_run(mddev_t * mddev)
2708 mddev->safemode = 0; 3085 mddev->safemode = 0;
2709 mddev->safemode_timer.function = md_safemode_timeout; 3086 mddev->safemode_timer.function = md_safemode_timeout;
2710 mddev->safemode_timer.data = (unsigned long) mddev; 3087 mddev->safemode_timer.data = (unsigned long) mddev;
2711 mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ 3088 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
2712 mddev->in_sync = 1; 3089 mddev->in_sync = 1;
2713 3090
2714 ITERATE_RDEV(mddev,rdev,tmp) 3091 ITERATE_RDEV(mddev,rdev,tmp)
@@ -2736,6 +3113,36 @@ static int do_md_run(mddev_t * mddev)
2736 mddev->queue->queuedata = mddev; 3113 mddev->queue->queuedata = mddev;
2737 mddev->queue->make_request_fn = mddev->pers->make_request; 3114 mddev->queue->make_request_fn = mddev->pers->make_request;
2738 3115
3116 /* If there is a partially-recovered drive we need to
3117 * start recovery here. If we leave it to md_check_recovery,
3118 * it will remove the drives and not do the right thing
3119 */
3120 if (mddev->degraded) {
3121 struct list_head *rtmp;
3122 int spares = 0;
3123 ITERATE_RDEV(mddev,rdev,rtmp)
3124 if (rdev->raid_disk >= 0 &&
3125 !test_bit(In_sync, &rdev->flags) &&
3126 !test_bit(Faulty, &rdev->flags))
3127 /* complete an interrupted recovery */
3128 spares++;
3129 if (spares && mddev->pers->sync_request) {
3130 mddev->recovery = 0;
3131 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3132 mddev->sync_thread = md_register_thread(md_do_sync,
3133 mddev,
3134 "%s_resync");
3135 if (!mddev->sync_thread) {
3136 printk(KERN_ERR "%s: could not start resync"
3137 " thread...\n",
3138 mdname(mddev));
3139 /* leave the spares where they are, it shouldn't hurt */
3140 mddev->recovery = 0;
3141 } else
3142 md_wakeup_thread(mddev->sync_thread);
3143 }
3144 }
3145
2739 mddev->changed = 1; 3146 mddev->changed = 1;
2740 md_new_event(mddev); 3147 md_new_event(mddev);
2741 return 0; 3148 return 0;
@@ -2769,18 +3176,47 @@ static int restart_array(mddev_t *mddev)
2769 */ 3176 */
2770 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3177 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2771 md_wakeup_thread(mddev->thread); 3178 md_wakeup_thread(mddev->thread);
3179 md_wakeup_thread(mddev->sync_thread);
2772 err = 0; 3180 err = 0;
2773 } else { 3181 } else
2774 printk(KERN_ERR "md: %s has no personality assigned.\n",
2775 mdname(mddev));
2776 err = -EINVAL; 3182 err = -EINVAL;
2777 }
2778 3183
2779out: 3184out:
2780 return err; 3185 return err;
2781} 3186}
2782 3187
2783static int do_md_stop(mddev_t * mddev, int ro) 3188/* similar to deny_write_access, but accounts for our holding a reference
3189 * to the file ourselves */
3190static int deny_bitmap_write_access(struct file * file)
3191{
3192 struct inode *inode = file->f_mapping->host;
3193
3194 spin_lock(&inode->i_lock);
3195 if (atomic_read(&inode->i_writecount) > 1) {
3196 spin_unlock(&inode->i_lock);
3197 return -ETXTBSY;
3198 }
3199 atomic_set(&inode->i_writecount, -1);
3200 spin_unlock(&inode->i_lock);
3201
3202 return 0;
3203}
3204
3205static void restore_bitmap_write_access(struct file *file)
3206{
3207 struct inode *inode = file->f_mapping->host;
3208
3209 spin_lock(&inode->i_lock);
3210 atomic_set(&inode->i_writecount, 1);
3211 spin_unlock(&inode->i_lock);
3212}
3213
3214/* mode:
3215 * 0 - completely stop and dis-assemble array
3216 * 1 - switch to readonly
3217 * 2 - stop but do not disassemble array
3218 */
3219static int do_md_stop(mddev_t * mddev, int mode)
2784{ 3220{
2785 int err = 0; 3221 int err = 0;
2786 struct gendisk *disk = mddev->gendisk; 3222 struct gendisk *disk = mddev->gendisk;
@@ -2792,6 +3228,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
2792 } 3228 }
2793 3229
2794 if (mddev->sync_thread) { 3230 if (mddev->sync_thread) {
3231 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
2795 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 3232 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
2796 md_unregister_thread(mddev->sync_thread); 3233 md_unregister_thread(mddev->sync_thread);
2797 mddev->sync_thread = NULL; 3234 mddev->sync_thread = NULL;
@@ -2801,12 +3238,15 @@ static int do_md_stop(mddev_t * mddev, int ro)
2801 3238
2802 invalidate_partition(disk, 0); 3239 invalidate_partition(disk, 0);
2803 3240
2804 if (ro) { 3241 switch(mode) {
3242 case 1: /* readonly */
2805 err = -ENXIO; 3243 err = -ENXIO;
2806 if (mddev->ro==1) 3244 if (mddev->ro==1)
2807 goto out; 3245 goto out;
2808 mddev->ro = 1; 3246 mddev->ro = 1;
2809 } else { 3247 break;
3248 case 0: /* disassemble */
3249 case 2: /* stop */
2810 bitmap_flush(mddev); 3250 bitmap_flush(mddev);
2811 md_super_wait(mddev); 3251 md_super_wait(mddev);
2812 if (mddev->ro) 3252 if (mddev->ro)
@@ -2821,19 +3261,20 @@ static int do_md_stop(mddev_t * mddev, int ro)
2821 if (mddev->ro) 3261 if (mddev->ro)
2822 mddev->ro = 0; 3262 mddev->ro = 0;
2823 } 3263 }
2824 if (!mddev->in_sync) { 3264 if (!mddev->in_sync || mddev->sb_dirty) {
2825 /* mark array as shutdown cleanly */ 3265 /* mark array as shutdown cleanly */
2826 mddev->in_sync = 1; 3266 mddev->in_sync = 1;
2827 md_update_sb(mddev); 3267 md_update_sb(mddev);
2828 } 3268 }
2829 if (ro) 3269 if (mode == 1)
2830 set_disk_ro(disk, 1); 3270 set_disk_ro(disk, 1);
3271 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
2831 } 3272 }
2832 3273
2833 /* 3274 /*
2834 * Free resources if final stop 3275 * Free resources if final stop
2835 */ 3276 */
2836 if (!ro) { 3277 if (mode == 0) {
2837 mdk_rdev_t *rdev; 3278 mdk_rdev_t *rdev;
2838 struct list_head *tmp; 3279 struct list_head *tmp;
2839 struct gendisk *disk; 3280 struct gendisk *disk;
@@ -2841,7 +3282,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
2841 3282
2842 bitmap_destroy(mddev); 3283 bitmap_destroy(mddev);
2843 if (mddev->bitmap_file) { 3284 if (mddev->bitmap_file) {
2844 atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1); 3285 restore_bitmap_write_access(mddev->bitmap_file);
2845 fput(mddev->bitmap_file); 3286 fput(mddev->bitmap_file);
2846 mddev->bitmap_file = NULL; 3287 mddev->bitmap_file = NULL;
2847 } 3288 }
@@ -2857,11 +3298,15 @@ static int do_md_stop(mddev_t * mddev, int ro)
2857 export_array(mddev); 3298 export_array(mddev);
2858 3299
2859 mddev->array_size = 0; 3300 mddev->array_size = 0;
3301 mddev->size = 0;
3302 mddev->raid_disks = 0;
3303 mddev->recovery_cp = 0;
3304
2860 disk = mddev->gendisk; 3305 disk = mddev->gendisk;
2861 if (disk) 3306 if (disk)
2862 set_capacity(disk, 0); 3307 set_capacity(disk, 0);
2863 mddev->changed = 1; 3308 mddev->changed = 1;
2864 } else 3309 } else if (mddev->pers)
2865 printk(KERN_INFO "md: %s switched to read-only mode.\n", 3310 printk(KERN_INFO "md: %s switched to read-only mode.\n",
2866 mdname(mddev)); 3311 mdname(mddev));
2867 err = 0; 3312 err = 0;
@@ -3264,6 +3709,17 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
3264 3709
3265 rdev->raid_disk = -1; 3710 rdev->raid_disk = -1;
3266 err = bind_rdev_to_array(rdev, mddev); 3711 err = bind_rdev_to_array(rdev, mddev);
3712 if (!err && !mddev->pers->hot_remove_disk) {
3713 /* If there is hot_add_disk but no hot_remove_disk
3714 * then added disks for geometry changes,
3715 * and should be added immediately.
3716 */
3717 super_types[mddev->major_version].
3718 validate_super(mddev, rdev);
3719 err = mddev->pers->hot_add_disk(mddev, rdev);
3720 if (err)
3721 unbind_rdev_from_array(rdev);
3722 }
3267 if (err) 3723 if (err)
3268 export_rdev(rdev); 3724 export_rdev(rdev);
3269 3725
@@ -3434,23 +3890,6 @@ abort_export:
3434 return err; 3890 return err;
3435} 3891}
3436 3892
3437/* similar to deny_write_access, but accounts for our holding a reference
3438 * to the file ourselves */
3439static int deny_bitmap_write_access(struct file * file)
3440{
3441 struct inode *inode = file->f_mapping->host;
3442
3443 spin_lock(&inode->i_lock);
3444 if (atomic_read(&inode->i_writecount) > 1) {
3445 spin_unlock(&inode->i_lock);
3446 return -ETXTBSY;
3447 }
3448 atomic_set(&inode->i_writecount, -1);
3449 spin_unlock(&inode->i_lock);
3450
3451 return 0;
3452}
3453
3454static int set_bitmap_file(mddev_t *mddev, int fd) 3893static int set_bitmap_file(mddev_t *mddev, int fd)
3455{ 3894{
3456 int err; 3895 int err;
@@ -3491,12 +3930,17 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
3491 mddev->pers->quiesce(mddev, 1); 3930 mddev->pers->quiesce(mddev, 1);
3492 if (fd >= 0) 3931 if (fd >= 0)
3493 err = bitmap_create(mddev); 3932 err = bitmap_create(mddev);
3494 if (fd < 0 || err) 3933 if (fd < 0 || err) {
3495 bitmap_destroy(mddev); 3934 bitmap_destroy(mddev);
3935 fd = -1; /* make sure to put the file */
3936 }
3496 mddev->pers->quiesce(mddev, 0); 3937 mddev->pers->quiesce(mddev, 0);
3497 } else if (fd < 0) { 3938 }
3498 if (mddev->bitmap_file) 3939 if (fd < 0) {
3940 if (mddev->bitmap_file) {
3941 restore_bitmap_write_access(mddev->bitmap_file);
3499 fput(mddev->bitmap_file); 3942 fput(mddev->bitmap_file);
3943 }
3500 mddev->bitmap_file = NULL; 3944 mddev->bitmap_file = NULL;
3501 } 3945 }
3502 3946
@@ -3977,11 +4421,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
3977 goto done_unlock; 4421 goto done_unlock;
3978 4422
3979 default: 4423 default:
3980 if (_IOC_TYPE(cmd) == MD_MAJOR)
3981 printk(KERN_WARNING "md: %s(pid %d) used"
3982 " obsolete MD ioctl, upgrade your"
3983 " software to use new ictls.\n",
3984 current->comm, current->pid);
3985 err = -EINVAL; 4424 err = -EINVAL;
3986 goto abort_unlock; 4425 goto abort_unlock;
3987 } 4426 }
@@ -4586,7 +5025,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
4586 spin_lock_irq(&mddev->write_lock); 5025 spin_lock_irq(&mddev->write_lock);
4587 if (mddev->in_sync) { 5026 if (mddev->in_sync) {
4588 mddev->in_sync = 0; 5027 mddev->in_sync = 0;
4589 mddev->sb_dirty = 1; 5028 mddev->sb_dirty = 3;
4590 md_wakeup_thread(mddev->thread); 5029 md_wakeup_thread(mddev->thread);
4591 } 5030 }
4592 spin_unlock_irq(&mddev->write_lock); 5031 spin_unlock_irq(&mddev->write_lock);
@@ -4599,7 +5038,7 @@ void md_write_end(mddev_t *mddev)
4599 if (atomic_dec_and_test(&mddev->writes_pending)) { 5038 if (atomic_dec_and_test(&mddev->writes_pending)) {
4600 if (mddev->safemode == 2) 5039 if (mddev->safemode == 2)
4601 md_wakeup_thread(mddev->thread); 5040 md_wakeup_thread(mddev->thread);
4602 else 5041 else if (mddev->safemode_delay)
4603 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); 5042 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
4604 } 5043 }
4605} 5044}
@@ -4620,10 +5059,14 @@ void md_do_sync(mddev_t *mddev)
4620 struct list_head *tmp; 5059 struct list_head *tmp;
4621 sector_t last_check; 5060 sector_t last_check;
4622 int skipped = 0; 5061 int skipped = 0;
5062 struct list_head *rtmp;
5063 mdk_rdev_t *rdev;
4623 5064
4624 /* just incase thread restarts... */ 5065 /* just incase thread restarts... */
4625 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) 5066 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
4626 return; 5067 return;
5068 if (mddev->ro) /* never try to sync a read-only array */
5069 return;
4627 5070
4628 /* we overload curr_resync somewhat here. 5071 /* we overload curr_resync somewhat here.
4629 * 0 == not engaged in resync at all 5072 * 0 == not engaged in resync at all
@@ -4682,17 +5125,30 @@ void md_do_sync(mddev_t *mddev)
4682 } 5125 }
4683 } while (mddev->curr_resync < 2); 5126 } while (mddev->curr_resync < 2);
4684 5127
5128 j = 0;
4685 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 5129 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4686 /* resync follows the size requested by the personality, 5130 /* resync follows the size requested by the personality,
4687 * which defaults to physical size, but can be virtual size 5131 * which defaults to physical size, but can be virtual size
4688 */ 5132 */
4689 max_sectors = mddev->resync_max_sectors; 5133 max_sectors = mddev->resync_max_sectors;
4690 mddev->resync_mismatches = 0; 5134 mddev->resync_mismatches = 0;
5135 /* we don't use the checkpoint if there's a bitmap */
5136 if (!mddev->bitmap &&
5137 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
5138 j = mddev->recovery_cp;
4691 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 5139 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4692 max_sectors = mddev->size << 1; 5140 max_sectors = mddev->size << 1;
4693 else 5141 else {
4694 /* recovery follows the physical size of devices */ 5142 /* recovery follows the physical size of devices */
4695 max_sectors = mddev->size << 1; 5143 max_sectors = mddev->size << 1;
5144 j = MaxSector;
5145 ITERATE_RDEV(mddev,rdev,rtmp)
5146 if (rdev->raid_disk >= 0 &&
5147 !test_bit(Faulty, &rdev->flags) &&
5148 !test_bit(In_sync, &rdev->flags) &&
5149 rdev->recovery_offset < j)
5150 j = rdev->recovery_offset;
5151 }
4696 5152
4697 printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); 5153 printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
4698 printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" 5154 printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
@@ -4702,12 +5158,7 @@ void md_do_sync(mddev_t *mddev)
4702 speed_max(mddev)); 5158 speed_max(mddev));
4703 5159
4704 is_mddev_idle(mddev); /* this also initializes IO event counters */ 5160 is_mddev_idle(mddev); /* this also initializes IO event counters */
4705 /* we don't use the checkpoint if there's a bitmap */ 5161
4706 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap
4707 && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4708 j = mddev->recovery_cp;
4709 else
4710 j = 0;
4711 io_sectors = 0; 5162 io_sectors = 0;
4712 for (m = 0; m < SYNC_MARKS; m++) { 5163 for (m = 0; m < SYNC_MARKS; m++) {
4713 mark[m] = jiffies; 5164 mark[m] = jiffies;
@@ -4828,15 +5279,28 @@ void md_do_sync(mddev_t *mddev)
4828 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 5279 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
4829 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && 5280 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
4830 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && 5281 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
4831 mddev->curr_resync > 2 && 5282 mddev->curr_resync > 2) {
4832 mddev->curr_resync >= mddev->recovery_cp) { 5283 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4833 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 5284 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
4834 printk(KERN_INFO 5285 if (mddev->curr_resync >= mddev->recovery_cp) {
4835 "md: checkpointing recovery of %s.\n", 5286 printk(KERN_INFO
4836 mdname(mddev)); 5287 "md: checkpointing recovery of %s.\n",
4837 mddev->recovery_cp = mddev->curr_resync; 5288 mdname(mddev));
4838 } else 5289 mddev->recovery_cp = mddev->curr_resync;
4839 mddev->recovery_cp = MaxSector; 5290 }
5291 } else
5292 mddev->recovery_cp = MaxSector;
5293 } else {
5294 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5295 mddev->curr_resync = MaxSector;
5296 ITERATE_RDEV(mddev,rdev,rtmp)
5297 if (rdev->raid_disk >= 0 &&
5298 !test_bit(Faulty, &rdev->flags) &&
5299 !test_bit(In_sync, &rdev->flags) &&
5300 rdev->recovery_offset < mddev->curr_resync)
5301 rdev->recovery_offset = mddev->curr_resync;
5302 mddev->sb_dirty = 1;
5303 }
4840 } 5304 }
4841 5305
4842 skip: 5306 skip:
@@ -4908,7 +5372,7 @@ void md_check_recovery(mddev_t *mddev)
4908 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 5372 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
4909 !mddev->in_sync && mddev->recovery_cp == MaxSector) { 5373 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
4910 mddev->in_sync = 1; 5374 mddev->in_sync = 1;
4911 mddev->sb_dirty = 1; 5375 mddev->sb_dirty = 3;
4912 } 5376 }
4913 if (mddev->safemode == 1) 5377 if (mddev->safemode == 1)
4914 mddev->safemode = 0; 5378 mddev->safemode = 0;
@@ -4957,6 +5421,8 @@ void md_check_recovery(mddev_t *mddev)
4957 clear_bit(MD_RECOVERY_INTR, &mddev->recovery); 5421 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
4958 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); 5422 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
4959 5423
5424 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
5425 goto unlock;
4960 /* no recovery is running. 5426 /* no recovery is running.
4961 * remove any failed drives, then 5427 * remove any failed drives, then
4962 * add spares if possible. 5428 * add spares if possible.
@@ -4979,6 +5445,7 @@ void md_check_recovery(mddev_t *mddev)
4979 ITERATE_RDEV(mddev,rdev,rtmp) 5445 ITERATE_RDEV(mddev,rdev,rtmp)
4980 if (rdev->raid_disk < 0 5446 if (rdev->raid_disk < 0
4981 && !test_bit(Faulty, &rdev->flags)) { 5447 && !test_bit(Faulty, &rdev->flags)) {
5448 rdev->recovery_offset = 0;
4982 if (mddev->pers->hot_add_disk(mddev,rdev)) { 5449 if (mddev->pers->hot_add_disk(mddev,rdev)) {
4983 char nm[20]; 5450 char nm[20];
4984 sprintf(nm, "rd%d", rdev->raid_disk); 5451 sprintf(nm, "rd%d", rdev->raid_disk);
@@ -5216,7 +5683,6 @@ EXPORT_SYMBOL(md_write_end);
5216EXPORT_SYMBOL(md_register_thread); 5683EXPORT_SYMBOL(md_register_thread);
5217EXPORT_SYMBOL(md_unregister_thread); 5684EXPORT_SYMBOL(md_unregister_thread);
5218EXPORT_SYMBOL(md_wakeup_thread); 5685EXPORT_SYMBOL(md_wakeup_thread);
5219EXPORT_SYMBOL(md_print_devices);
5220EXPORT_SYMBOL(md_check_recovery); 5686EXPORT_SYMBOL(md_check_recovery);
5221MODULE_LICENSE("GPL"); 5687MODULE_LICENSE("GPL");
5222MODULE_ALIAS("md"); 5688MODULE_ALIAS("md");
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 4070eff6f0f8..cead918578a7 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -374,26 +374,26 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
374 * already. 374 * already.
375 */ 375 */
376 if (atomic_dec_and_test(&r1_bio->remaining)) { 376 if (atomic_dec_and_test(&r1_bio->remaining)) {
377 if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { 377 if (test_bit(R1BIO_BarrierRetry, &r1_bio->state))
378 reschedule_retry(r1_bio); 378 reschedule_retry(r1_bio);
379 goto out; 379 else {
380 } 380 /* it really is the end of this request */
381 /* it really is the end of this request */ 381 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
382 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { 382 /* free extra copy of the data pages */
383 /* free extra copy of the data pages */ 383 int i = bio->bi_vcnt;
384 int i = bio->bi_vcnt; 384 while (i--)
385 while (i--) 385 safe_put_page(bio->bi_io_vec[i].bv_page);
386 safe_put_page(bio->bi_io_vec[i].bv_page); 386 }
387 /* clear the bitmap if all writes complete successfully */
388 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
389 r1_bio->sectors,
390 !test_bit(R1BIO_Degraded, &r1_bio->state),
391 behind);
392 md_write_end(r1_bio->mddev);
393 raid_end_bio_io(r1_bio);
387 } 394 }
388 /* clear the bitmap if all writes complete successfully */
389 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
390 r1_bio->sectors,
391 !test_bit(R1BIO_Degraded, &r1_bio->state),
392 behind);
393 md_write_end(r1_bio->mddev);
394 raid_end_bio_io(r1_bio);
395 } 395 }
396 out: 396
397 if (to_put) 397 if (to_put)
398 bio_put(to_put); 398 bio_put(to_put);
399 399
@@ -1625,6 +1625,12 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1625 /* before building a request, check if we can skip these blocks.. 1625 /* before building a request, check if we can skip these blocks..
1626 * This call the bitmap_start_sync doesn't actually record anything 1626 * This call the bitmap_start_sync doesn't actually record anything
1627 */ 1627 */
1628 if (mddev->bitmap == NULL &&
1629 mddev->recovery_cp == MaxSector &&
1630 conf->fullsync == 0) {
1631 *skipped = 1;
1632 return max_sector - sector_nr;
1633 }
1628 if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && 1634 if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
1629 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 1635 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
1630 /* We can skip this block, and probably several more */ 1636 /* We can skip this block, and probably several more */
@@ -1888,7 +1894,8 @@ static int run(mddev_t *mddev)
1888 1894
1889 disk = conf->mirrors + i; 1895 disk = conf->mirrors + i;
1890 1896
1891 if (!disk->rdev) { 1897 if (!disk->rdev ||
1898 !test_bit(In_sync, &disk->rdev->flags)) {
1892 disk->head_position = 0; 1899 disk->head_position = 0;
1893 mddev->degraded++; 1900 mddev->degraded++;
1894 } 1901 }
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1440935414e6..7f636283a1ba 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -29,6 +29,7 @@
29 * raid_disks 29 * raid_disks
30 * near_copies (stored in low byte of layout) 30 * near_copies (stored in low byte of layout)
31 * far_copies (stored in second byte of layout) 31 * far_copies (stored in second byte of layout)
32 * far_offset (stored in bit 16 of layout )
32 * 33 *
33 * The data to be stored is divided into chunks using chunksize. 34 * The data to be stored is divided into chunks using chunksize.
34 * Each device is divided into far_copies sections. 35 * Each device is divided into far_copies sections.
@@ -36,10 +37,14 @@
36 * near_copies copies of each chunk is stored (each on a different drive). 37 * near_copies copies of each chunk is stored (each on a different drive).
37 * The starting device for each section is offset near_copies from the starting 38 * The starting device for each section is offset near_copies from the starting
38 * device of the previous section. 39 * device of the previous section.
39 * Thus there are (near_copies*far_copies) of each chunk, and each is on a different 40 * Thus they are (near_copies*far_copies) of each chunk, and each is on a different
40 * drive. 41 * drive.
41 * near_copies and far_copies must be at least one, and their product is at most 42 * near_copies and far_copies must be at least one, and their product is at most
42 * raid_disks. 43 * raid_disks.
44 *
45 * If far_offset is true, then the far_copies are handled a bit differently.
46 * The copies are still in different stripes, but instead of be very far apart
47 * on disk, there are adjacent stripes.
43 */ 48 */
44 49
45/* 50/*
@@ -357,8 +362,7 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
357 * With this layout, and block is never stored twice on the one device. 362 * With this layout, and block is never stored twice on the one device.
358 * 363 *
359 * raid10_find_phys finds the sector offset of a given virtual sector 364 * raid10_find_phys finds the sector offset of a given virtual sector
360 * on each device that it is on. If a block isn't on a device, 365 * on each device that it is on.
361 * that entry in the array is set to MaxSector.
362 * 366 *
363 * raid10_find_virt does the reverse mapping, from a device and a 367 * raid10_find_virt does the reverse mapping, from a device and a
364 * sector offset to a virtual address 368 * sector offset to a virtual address
@@ -381,6 +385,8 @@ static void raid10_find_phys(conf_t *conf, r10bio_t *r10bio)
381 chunk *= conf->near_copies; 385 chunk *= conf->near_copies;
382 stripe = chunk; 386 stripe = chunk;
383 dev = sector_div(stripe, conf->raid_disks); 387 dev = sector_div(stripe, conf->raid_disks);
388 if (conf->far_offset)
389 stripe *= conf->far_copies;
384 390
385 sector += stripe << conf->chunk_shift; 391 sector += stripe << conf->chunk_shift;
386 392
@@ -414,16 +420,24 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
414{ 420{
415 sector_t offset, chunk, vchunk; 421 sector_t offset, chunk, vchunk;
416 422
417 while (sector > conf->stride) {
418 sector -= conf->stride;
419 if (dev < conf->near_copies)
420 dev += conf->raid_disks - conf->near_copies;
421 else
422 dev -= conf->near_copies;
423 }
424
425 offset = sector & conf->chunk_mask; 423 offset = sector & conf->chunk_mask;
426 chunk = sector >> conf->chunk_shift; 424 if (conf->far_offset) {
425 int fc;
426 chunk = sector >> conf->chunk_shift;
427 fc = sector_div(chunk, conf->far_copies);
428 dev -= fc * conf->near_copies;
429 if (dev < 0)
430 dev += conf->raid_disks;
431 } else {
432 while (sector > conf->stride) {
433 sector -= conf->stride;
434 if (dev < conf->near_copies)
435 dev += conf->raid_disks - conf->near_copies;
436 else
437 dev -= conf->near_copies;
438 }
439 chunk = sector >> conf->chunk_shift;
440 }
427 vchunk = chunk * conf->raid_disks + dev; 441 vchunk = chunk * conf->raid_disks + dev;
428 sector_div(vchunk, conf->near_copies); 442 sector_div(vchunk, conf->near_copies);
429 return (vchunk << conf->chunk_shift) + offset; 443 return (vchunk << conf->chunk_shift) + offset;
@@ -900,9 +914,12 @@ static void status(struct seq_file *seq, mddev_t *mddev)
900 seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); 914 seq_printf(seq, " %dK chunks", mddev->chunk_size/1024);
901 if (conf->near_copies > 1) 915 if (conf->near_copies > 1)
902 seq_printf(seq, " %d near-copies", conf->near_copies); 916 seq_printf(seq, " %d near-copies", conf->near_copies);
903 if (conf->far_copies > 1) 917 if (conf->far_copies > 1) {
904 seq_printf(seq, " %d far-copies", conf->far_copies); 918 if (conf->far_offset)
905 919 seq_printf(seq, " %d offset-copies", conf->far_copies);
920 else
921 seq_printf(seq, " %d far-copies", conf->far_copies);
922 }
906 seq_printf(seq, " [%d/%d] [", conf->raid_disks, 923 seq_printf(seq, " [%d/%d] [", conf->raid_disks,
907 conf->working_disks); 924 conf->working_disks);
908 for (i = 0; i < conf->raid_disks; i++) 925 for (i = 0; i < conf->raid_disks; i++)
@@ -1915,7 +1932,7 @@ static int run(mddev_t *mddev)
1915 mirror_info_t *disk; 1932 mirror_info_t *disk;
1916 mdk_rdev_t *rdev; 1933 mdk_rdev_t *rdev;
1917 struct list_head *tmp; 1934 struct list_head *tmp;
1918 int nc, fc; 1935 int nc, fc, fo;
1919 sector_t stride, size; 1936 sector_t stride, size;
1920 1937
1921 if (mddev->chunk_size == 0) { 1938 if (mddev->chunk_size == 0) {
@@ -1925,8 +1942,9 @@ static int run(mddev_t *mddev)
1925 1942
1926 nc = mddev->layout & 255; 1943 nc = mddev->layout & 255;
1927 fc = (mddev->layout >> 8) & 255; 1944 fc = (mddev->layout >> 8) & 255;
1945 fo = mddev->layout & (1<<16);
1928 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || 1946 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
1929 (mddev->layout >> 16)) { 1947 (mddev->layout >> 17)) {
1930 printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", 1948 printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",
1931 mdname(mddev), mddev->layout); 1949 mdname(mddev), mddev->layout);
1932 goto out; 1950 goto out;
@@ -1958,12 +1976,16 @@ static int run(mddev_t *mddev)
1958 conf->near_copies = nc; 1976 conf->near_copies = nc;
1959 conf->far_copies = fc; 1977 conf->far_copies = fc;
1960 conf->copies = nc*fc; 1978 conf->copies = nc*fc;
1979 conf->far_offset = fo;
1961 conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; 1980 conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
1962 conf->chunk_shift = ffz(~mddev->chunk_size) - 9; 1981 conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
1963 stride = mddev->size >> (conf->chunk_shift-1); 1982 if (fo)
1964 sector_div(stride, fc); 1983 conf->stride = 1 << conf->chunk_shift;
1965 conf->stride = stride << conf->chunk_shift; 1984 else {
1966 1985 stride = mddev->size >> (conf->chunk_shift-1);
1986 sector_div(stride, fc);
1987 conf->stride = stride << conf->chunk_shift;
1988 }
1967 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, 1989 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
1968 r10bio_pool_free, conf); 1990 r10bio_pool_free, conf);
1969 if (!conf->r10bio_pool) { 1991 if (!conf->r10bio_pool) {
@@ -2015,7 +2037,8 @@ static int run(mddev_t *mddev)
2015 2037
2016 disk = conf->mirrors + i; 2038 disk = conf->mirrors + i;
2017 2039
2018 if (!disk->rdev) { 2040 if (!disk->rdev ||
2041 !test_bit(In_sync, &rdev->flags)) {
2019 disk->head_position = 0; 2042 disk->head_position = 0;
2020 mddev->degraded++; 2043 mddev->degraded++;
2021 } 2044 }
@@ -2037,7 +2060,13 @@ static int run(mddev_t *mddev)
2037 /* 2060 /*
2038 * Ok, everything is just fine now 2061 * Ok, everything is just fine now
2039 */ 2062 */
2040 size = conf->stride * conf->raid_disks; 2063 if (conf->far_offset) {
2064 size = mddev->size >> (conf->chunk_shift-1);
2065 size *= conf->raid_disks;
2066 size <<= conf->chunk_shift;
2067 sector_div(size, conf->far_copies);
2068 } else
2069 size = conf->stride * conf->raid_disks;
2041 sector_div(size, conf->near_copies); 2070 sector_div(size, conf->near_copies);
2042 mddev->array_size = size/2; 2071 mddev->array_size = size/2;
2043 mddev->resync_max_sectors = size; 2072 mddev->resync_max_sectors = size;
@@ -2050,7 +2079,7 @@ static int run(mddev_t *mddev)
2050 * maybe... 2079 * maybe...
2051 */ 2080 */
2052 { 2081 {
2053 int stripe = conf->raid_disks * mddev->chunk_size / PAGE_SIZE; 2082 int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE);
2054 stripe /= conf->near_copies; 2083 stripe /= conf->near_copies;
2055 if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) 2084 if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
2056 mddev->queue->backing_dev_info.ra_pages = 2* stripe; 2085 mddev->queue->backing_dev_info.ra_pages = 2* stripe;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 31843604049c..f920e50ea124 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2,8 +2,11 @@
2 * raid5.c : Multiple Devices driver for Linux 2 * raid5.c : Multiple Devices driver for Linux
3 * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman 3 * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
4 * Copyright (C) 1999, 2000 Ingo Molnar 4 * Copyright (C) 1999, 2000 Ingo Molnar
5 * Copyright (C) 2002, 2003 H. Peter Anvin
5 * 6 *
6 * RAID-5 management functions. 7 * RAID-4/5/6 management functions.
8 * Thanks to Penguin Computing for making the RAID-6 development possible
9 * by donating a test server!
7 * 10 *
8 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by 12 * it under the terms of the GNU General Public License as published by
@@ -19,11 +22,11 @@
19#include <linux/config.h> 22#include <linux/config.h>
20#include <linux/module.h> 23#include <linux/module.h>
21#include <linux/slab.h> 24#include <linux/slab.h>
22#include <linux/raid/raid5.h>
23#include <linux/highmem.h> 25#include <linux/highmem.h>
24#include <linux/bitops.h> 26#include <linux/bitops.h>
25#include <linux/kthread.h> 27#include <linux/kthread.h>
26#include <asm/atomic.h> 28#include <asm/atomic.h>
29#include "raid6.h"
27 30
28#include <linux/raid/bitmap.h> 31#include <linux/raid/bitmap.h>
29 32
@@ -68,6 +71,16 @@
68#define __inline__ 71#define __inline__
69#endif 72#endif
70 73
74#if !RAID6_USE_EMPTY_ZERO_PAGE
75/* In .bss so it's zeroed */
76const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
77#endif
78
79static inline int raid6_next_disk(int disk, int raid_disks)
80{
81 disk++;
82 return (disk < raid_disks) ? disk : 0;
83}
71static void print_raid5_conf (raid5_conf_t *conf); 84static void print_raid5_conf (raid5_conf_t *conf);
72 85
73static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) 86static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
@@ -104,7 +117,7 @@ static void release_stripe(struct stripe_head *sh)
104{ 117{
105 raid5_conf_t *conf = sh->raid_conf; 118 raid5_conf_t *conf = sh->raid_conf;
106 unsigned long flags; 119 unsigned long flags;
107 120
108 spin_lock_irqsave(&conf->device_lock, flags); 121 spin_lock_irqsave(&conf->device_lock, flags);
109 __release_stripe(conf, sh); 122 __release_stripe(conf, sh);
110 spin_unlock_irqrestore(&conf->device_lock, flags); 123 spin_unlock_irqrestore(&conf->device_lock, flags);
@@ -117,7 +130,7 @@ static inline void remove_hash(struct stripe_head *sh)
117 hlist_del_init(&sh->hash); 130 hlist_del_init(&sh->hash);
118} 131}
119 132
120static void insert_hash(raid5_conf_t *conf, struct stripe_head *sh) 133static inline void insert_hash(raid5_conf_t *conf, struct stripe_head *sh)
121{ 134{
122 struct hlist_head *hp = stripe_hash(conf, sh->sector); 135 struct hlist_head *hp = stripe_hash(conf, sh->sector);
123 136
@@ -190,7 +203,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
190 (unsigned long long)sh->sector); 203 (unsigned long long)sh->sector);
191 204
192 remove_hash(sh); 205 remove_hash(sh);
193 206
194 sh->sector = sector; 207 sh->sector = sector;
195 sh->pd_idx = pd_idx; 208 sh->pd_idx = pd_idx;
196 sh->state = 0; 209 sh->state = 0;
@@ -269,8 +282,9 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
269 } else { 282 } else {
270 if (!test_bit(STRIPE_HANDLE, &sh->state)) 283 if (!test_bit(STRIPE_HANDLE, &sh->state))
271 atomic_inc(&conf->active_stripes); 284 atomic_inc(&conf->active_stripes);
272 if (!list_empty(&sh->lru)) 285 if (list_empty(&sh->lru))
273 list_del_init(&sh->lru); 286 BUG();
287 list_del_init(&sh->lru);
274 } 288 }
275 } 289 }
276 } while (sh == NULL); 290 } while (sh == NULL);
@@ -321,10 +335,9 @@ static int grow_stripes(raid5_conf_t *conf, int num)
321 return 1; 335 return 1;
322 conf->slab_cache = sc; 336 conf->slab_cache = sc;
323 conf->pool_size = devs; 337 conf->pool_size = devs;
324 while (num--) { 338 while (num--)
325 if (!grow_one_stripe(conf)) 339 if (!grow_one_stripe(conf))
326 return 1; 340 return 1;
327 }
328 return 0; 341 return 0;
329} 342}
330 343
@@ -631,8 +644,7 @@ static void raid5_build_block (struct stripe_head *sh, int i)
631 dev->req.bi_private = sh; 644 dev->req.bi_private = sh;
632 645
633 dev->flags = 0; 646 dev->flags = 0;
634 if (i != sh->pd_idx) 647 dev->sector = compute_blocknr(sh, i);
635 dev->sector = compute_blocknr(sh, i);
636} 648}
637 649
638static void error(mddev_t *mddev, mdk_rdev_t *rdev) 650static void error(mddev_t *mddev, mdk_rdev_t *rdev)
@@ -659,7 +671,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
659 " Operation continuing on %d devices\n", 671 " Operation continuing on %d devices\n",
660 bdevname(rdev->bdev,b), conf->working_disks); 672 bdevname(rdev->bdev,b), conf->working_disks);
661 } 673 }
662} 674}
663 675
664/* 676/*
665 * Input: a 'big' sector number, 677 * Input: a 'big' sector number,
@@ -697,9 +709,12 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
697 /* 709 /*
698 * Select the parity disk based on the user selected algorithm. 710 * Select the parity disk based on the user selected algorithm.
699 */ 711 */
700 if (conf->level == 4) 712 switch(conf->level) {
713 case 4:
701 *pd_idx = data_disks; 714 *pd_idx = data_disks;
702 else switch (conf->algorithm) { 715 break;
716 case 5:
717 switch (conf->algorithm) {
703 case ALGORITHM_LEFT_ASYMMETRIC: 718 case ALGORITHM_LEFT_ASYMMETRIC:
704 *pd_idx = data_disks - stripe % raid_disks; 719 *pd_idx = data_disks - stripe % raid_disks;
705 if (*dd_idx >= *pd_idx) 720 if (*dd_idx >= *pd_idx)
@@ -721,6 +736,39 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
721 default: 736 default:
722 printk(KERN_ERR "raid5: unsupported algorithm %d\n", 737 printk(KERN_ERR "raid5: unsupported algorithm %d\n",
723 conf->algorithm); 738 conf->algorithm);
739 }
740 break;
741 case 6:
742
743 /**** FIX THIS ****/
744 switch (conf->algorithm) {
745 case ALGORITHM_LEFT_ASYMMETRIC:
746 *pd_idx = raid_disks - 1 - (stripe % raid_disks);
747 if (*pd_idx == raid_disks-1)
748 (*dd_idx)++; /* Q D D D P */
749 else if (*dd_idx >= *pd_idx)
750 (*dd_idx) += 2; /* D D P Q D */
751 break;
752 case ALGORITHM_RIGHT_ASYMMETRIC:
753 *pd_idx = stripe % raid_disks;
754 if (*pd_idx == raid_disks-1)
755 (*dd_idx)++; /* Q D D D P */
756 else if (*dd_idx >= *pd_idx)
757 (*dd_idx) += 2; /* D D P Q D */
758 break;
759 case ALGORITHM_LEFT_SYMMETRIC:
760 *pd_idx = raid_disks - 1 - (stripe % raid_disks);
761 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
762 break;
763 case ALGORITHM_RIGHT_SYMMETRIC:
764 *pd_idx = stripe % raid_disks;
765 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
766 break;
767 default:
768 printk (KERN_CRIT "raid6: unsupported algorithm %d\n",
769 conf->algorithm);
770 }
771 break;
724 } 772 }
725 773
726 /* 774 /*
@@ -742,12 +790,17 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
742 int chunk_number, dummy1, dummy2, dd_idx = i; 790 int chunk_number, dummy1, dummy2, dd_idx = i;
743 sector_t r_sector; 791 sector_t r_sector;
744 792
793
745 chunk_offset = sector_div(new_sector, sectors_per_chunk); 794 chunk_offset = sector_div(new_sector, sectors_per_chunk);
746 stripe = new_sector; 795 stripe = new_sector;
747 BUG_ON(new_sector != stripe); 796 BUG_ON(new_sector != stripe);
748 797
749 798 if (i == sh->pd_idx)
750 switch (conf->algorithm) { 799 return 0;
800 switch(conf->level) {
801 case 4: break;
802 case 5:
803 switch (conf->algorithm) {
751 case ALGORITHM_LEFT_ASYMMETRIC: 804 case ALGORITHM_LEFT_ASYMMETRIC:
752 case ALGORITHM_RIGHT_ASYMMETRIC: 805 case ALGORITHM_RIGHT_ASYMMETRIC:
753 if (i > sh->pd_idx) 806 if (i > sh->pd_idx)
@@ -761,7 +814,37 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
761 break; 814 break;
762 default: 815 default:
763 printk(KERN_ERR "raid5: unsupported algorithm %d\n", 816 printk(KERN_ERR "raid5: unsupported algorithm %d\n",
817 conf->algorithm);
818 }
819 break;
820 case 6:
821 data_disks = raid_disks - 2;
822 if (i == raid6_next_disk(sh->pd_idx, raid_disks))
823 return 0; /* It is the Q disk */
824 switch (conf->algorithm) {
825 case ALGORITHM_LEFT_ASYMMETRIC:
826 case ALGORITHM_RIGHT_ASYMMETRIC:
827 if (sh->pd_idx == raid_disks-1)
828 i--; /* Q D D D P */
829 else if (i > sh->pd_idx)
830 i -= 2; /* D D P Q D */
831 break;
832 case ALGORITHM_LEFT_SYMMETRIC:
833 case ALGORITHM_RIGHT_SYMMETRIC:
834 if (sh->pd_idx == raid_disks-1)
835 i--; /* Q D D D P */
836 else {
837 /* D D P Q D */
838 if (i < sh->pd_idx)
839 i += raid_disks;
840 i -= (sh->pd_idx + 2);
841 }
842 break;
843 default:
844 printk (KERN_CRIT "raid6: unsupported algorithm %d\n",
764 conf->algorithm); 845 conf->algorithm);
846 }
847 break;
765 } 848 }
766 849
767 chunk_number = stripe * data_disks + i; 850 chunk_number = stripe * data_disks + i;
@@ -778,10 +861,11 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
778 861
779 862
780/* 863/*
781 * Copy data between a page in the stripe cache, and a bio. 864 * Copy data between a page in the stripe cache, and one or more bion
782 * There are no alignment or size guarantees between the page or the 865 * The page could align with the middle of the bio, or there could be
783 * bio except that there is some overlap. 866 * several bion, each with several bio_vecs, which cover part of the page
784 * All iovecs in the bio must be considered. 867 * Multiple bion are linked together on bi_next. There may be extras
868 * at the end of this list. We ignore them.
785 */ 869 */
786static void copy_data(int frombio, struct bio *bio, 870static void copy_data(int frombio, struct bio *bio,
787 struct page *page, 871 struct page *page,
@@ -810,7 +894,7 @@ static void copy_data(int frombio, struct bio *bio,
810 if (len > 0 && page_offset + len > STRIPE_SIZE) 894 if (len > 0 && page_offset + len > STRIPE_SIZE)
811 clen = STRIPE_SIZE - page_offset; 895 clen = STRIPE_SIZE - page_offset;
812 else clen = len; 896 else clen = len;
813 897
814 if (clen > 0) { 898 if (clen > 0) {
815 char *ba = __bio_kmap_atomic(bio, i, KM_USER0); 899 char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
816 if (frombio) 900 if (frombio)
@@ -862,14 +946,14 @@ static void compute_block(struct stripe_head *sh, int dd_idx)
862 set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); 946 set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
863} 947}
864 948
865static void compute_parity(struct stripe_head *sh, int method) 949static void compute_parity5(struct stripe_head *sh, int method)
866{ 950{
867 raid5_conf_t *conf = sh->raid_conf; 951 raid5_conf_t *conf = sh->raid_conf;
868 int i, pd_idx = sh->pd_idx, disks = sh->disks, count; 952 int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
869 void *ptr[MAX_XOR_BLOCKS]; 953 void *ptr[MAX_XOR_BLOCKS];
870 struct bio *chosen; 954 struct bio *chosen;
871 955
872 PRINTK("compute_parity, stripe %llu, method %d\n", 956 PRINTK("compute_parity5, stripe %llu, method %d\n",
873 (unsigned long long)sh->sector, method); 957 (unsigned long long)sh->sector, method);
874 958
875 count = 1; 959 count = 1;
@@ -956,9 +1040,195 @@ static void compute_parity(struct stripe_head *sh, int method)
956 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 1040 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
957} 1041}
958 1042
1043static void compute_parity6(struct stripe_head *sh, int method)
1044{
1045 raid6_conf_t *conf = sh->raid_conf;
1046 int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count;
1047 struct bio *chosen;
1048 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1049 void *ptrs[disks];
1050
1051 qd_idx = raid6_next_disk(pd_idx, disks);
1052 d0_idx = raid6_next_disk(qd_idx, disks);
1053
1054 PRINTK("compute_parity, stripe %llu, method %d\n",
1055 (unsigned long long)sh->sector, method);
1056
1057 switch(method) {
1058 case READ_MODIFY_WRITE:
1059 BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
1060 case RECONSTRUCT_WRITE:
1061 for (i= disks; i-- ;)
1062 if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
1063 chosen = sh->dev[i].towrite;
1064 sh->dev[i].towrite = NULL;
1065
1066 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1067 wake_up(&conf->wait_for_overlap);
1068
1069 if (sh->dev[i].written) BUG();
1070 sh->dev[i].written = chosen;
1071 }
1072 break;
1073 case CHECK_PARITY:
1074 BUG(); /* Not implemented yet */
1075 }
1076
1077 for (i = disks; i--;)
1078 if (sh->dev[i].written) {
1079 sector_t sector = sh->dev[i].sector;
1080 struct bio *wbi = sh->dev[i].written;
1081 while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
1082 copy_data(1, wbi, sh->dev[i].page, sector);
1083 wbi = r5_next_bio(wbi, sector);
1084 }
1085
1086 set_bit(R5_LOCKED, &sh->dev[i].flags);
1087 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1088 }
1089
1090// switch(method) {
1091// case RECONSTRUCT_WRITE:
1092// case CHECK_PARITY:
1093// case UPDATE_PARITY:
1094 /* Note that unlike RAID-5, the ordering of the disks matters greatly. */
1095 /* FIX: Is this ordering of drives even remotely optimal? */
1096 count = 0;
1097 i = d0_idx;
1098 do {
1099 ptrs[count++] = page_address(sh->dev[i].page);
1100 if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags))
1101 printk("block %d/%d not uptodate on parity calc\n", i,count);
1102 i = raid6_next_disk(i, disks);
1103 } while ( i != d0_idx );
1104// break;
1105// }
1106
1107 raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs);
1108
1109 switch(method) {
1110 case RECONSTRUCT_WRITE:
1111 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1112 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1113 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1114 set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
1115 break;
1116 case UPDATE_PARITY:
1117 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1118 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1119 break;
1120 }
1121}
1122
1123
1124/* Compute one missing block */
1125static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1126{
1127 raid6_conf_t *conf = sh->raid_conf;
1128 int i, count, disks = conf->raid_disks;
1129 void *ptr[MAX_XOR_BLOCKS], *p;
1130 int pd_idx = sh->pd_idx;
1131 int qd_idx = raid6_next_disk(pd_idx, disks);
1132
1133 PRINTK("compute_block_1, stripe %llu, idx %d\n",
1134 (unsigned long long)sh->sector, dd_idx);
1135
1136 if ( dd_idx == qd_idx ) {
1137 /* We're actually computing the Q drive */
1138 compute_parity6(sh, UPDATE_PARITY);
1139 } else {
1140 ptr[0] = page_address(sh->dev[dd_idx].page);
1141 if (!nozero) memset(ptr[0], 0, STRIPE_SIZE);
1142 count = 1;
1143 for (i = disks ; i--; ) {
1144 if (i == dd_idx || i == qd_idx)
1145 continue;
1146 p = page_address(sh->dev[i].page);
1147 if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
1148 ptr[count++] = p;
1149 else
1150 printk("compute_block() %d, stripe %llu, %d"
1151 " not present\n", dd_idx,
1152 (unsigned long long)sh->sector, i);
1153
1154 check_xor();
1155 }
1156 if (count != 1)
1157 xor_block(count, STRIPE_SIZE, ptr);
1158 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1159 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1160 }
1161}
1162
1163/* Compute two missing blocks */
1164static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1165{
1166 raid6_conf_t *conf = sh->raid_conf;
1167 int i, count, disks = conf->raid_disks;
1168 int pd_idx = sh->pd_idx;
1169 int qd_idx = raid6_next_disk(pd_idx, disks);
1170 int d0_idx = raid6_next_disk(qd_idx, disks);
1171 int faila, failb;
1172
1173 /* faila and failb are disk numbers relative to d0_idx */
1174 /* pd_idx become disks-2 and qd_idx become disks-1 */
1175 faila = (dd_idx1 < d0_idx) ? dd_idx1+(disks-d0_idx) : dd_idx1-d0_idx;
1176 failb = (dd_idx2 < d0_idx) ? dd_idx2+(disks-d0_idx) : dd_idx2-d0_idx;
1177
1178 BUG_ON(faila == failb);
1179 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
1180
1181 PRINTK("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
1182 (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb);
1183
1184 if ( failb == disks-1 ) {
1185 /* Q disk is one of the missing disks */
1186 if ( faila == disks-2 ) {
1187 /* Missing P+Q, just recompute */
1188 compute_parity6(sh, UPDATE_PARITY);
1189 return;
1190 } else {
1191 /* We're missing D+Q; recompute D from P */
1192 compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0);
1193 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
1194 return;
1195 }
1196 }
1197
1198 /* We're missing D+P or D+D; build pointer table */
1199 {
1200 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1201 void *ptrs[disks];
1202
1203 count = 0;
1204 i = d0_idx;
1205 do {
1206 ptrs[count++] = page_address(sh->dev[i].page);
1207 i = raid6_next_disk(i, disks);
1208 if (i != dd_idx1 && i != dd_idx2 &&
1209 !test_bit(R5_UPTODATE, &sh->dev[i].flags))
1210 printk("compute_2 with missing block %d/%d\n", count, i);
1211 } while ( i != d0_idx );
1212
1213 if ( failb == disks-2 ) {
1214 /* We're missing D+P. */
1215 raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs);
1216 } else {
1217 /* We're missing D+D. */
1218 raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs);
1219 }
1220
1221 /* Both the above update both missing blocks */
1222 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1223 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1224 }
1225}
1226
1227
1228
959/* 1229/*
960 * Each stripe/dev can have one or more bion attached. 1230 * Each stripe/dev can have one or more bion attached.
961 * toread/towrite point to the first in a chain. 1231 * toread/towrite point to the first in a chain.
962 * The bi_next chain must be in order. 1232 * The bi_next chain must be in order.
963 */ 1233 */
964static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite) 1234static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
@@ -1031,6 +1301,13 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1031 1301
1032static void end_reshape(raid5_conf_t *conf); 1302static void end_reshape(raid5_conf_t *conf);
1033 1303
1304static int page_is_zero(struct page *p)
1305{
1306 char *a = page_address(p);
1307 return ((*(u32*)a) == 0 &&
1308 memcmp(a, a+4, STRIPE_SIZE-4)==0);
1309}
1310
1034static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks) 1311static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
1035{ 1312{
1036 int sectors_per_chunk = conf->chunk_size >> 9; 1313 int sectors_per_chunk = conf->chunk_size >> 9;
@@ -1062,7 +1339,7 @@ static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
1062 * 1339 *
1063 */ 1340 */
1064 1341
1065static void handle_stripe(struct stripe_head *sh) 1342static void handle_stripe5(struct stripe_head *sh)
1066{ 1343{
1067 raid5_conf_t *conf = sh->raid_conf; 1344 raid5_conf_t *conf = sh->raid_conf;
1068 int disks = sh->disks; 1345 int disks = sh->disks;
@@ -1394,7 +1671,7 @@ static void handle_stripe(struct stripe_head *sh)
1394 if (locked == 0 && (rcw == 0 ||rmw == 0) && 1671 if (locked == 0 && (rcw == 0 ||rmw == 0) &&
1395 !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 1672 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
1396 PRINTK("Computing parity...\n"); 1673 PRINTK("Computing parity...\n");
1397 compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); 1674 compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
1398 /* now every locked buffer is ready to be written */ 1675 /* now every locked buffer is ready to be written */
1399 for (i=disks; i--;) 1676 for (i=disks; i--;)
1400 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { 1677 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
@@ -1421,13 +1698,10 @@ static void handle_stripe(struct stripe_head *sh)
1421 !test_bit(STRIPE_INSYNC, &sh->state)) { 1698 !test_bit(STRIPE_INSYNC, &sh->state)) {
1422 set_bit(STRIPE_HANDLE, &sh->state); 1699 set_bit(STRIPE_HANDLE, &sh->state);
1423 if (failed == 0) { 1700 if (failed == 0) {
1424 char *pagea;
1425 BUG_ON(uptodate != disks); 1701 BUG_ON(uptodate != disks);
1426 compute_parity(sh, CHECK_PARITY); 1702 compute_parity5(sh, CHECK_PARITY);
1427 uptodate--; 1703 uptodate--;
1428 pagea = page_address(sh->dev[sh->pd_idx].page); 1704 if (page_is_zero(sh->dev[sh->pd_idx].page)) {
1429 if ((*(u32*)pagea) == 0 &&
1430 !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) {
1431 /* parity is correct (on disc, not in buffer any more) */ 1705 /* parity is correct (on disc, not in buffer any more) */
1432 set_bit(STRIPE_INSYNC, &sh->state); 1706 set_bit(STRIPE_INSYNC, &sh->state);
1433 } else { 1707 } else {
@@ -1487,7 +1761,7 @@ static void handle_stripe(struct stripe_head *sh)
1487 /* Need to write out all blocks after computing parity */ 1761 /* Need to write out all blocks after computing parity */
1488 sh->disks = conf->raid_disks; 1762 sh->disks = conf->raid_disks;
1489 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks); 1763 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks);
1490 compute_parity(sh, RECONSTRUCT_WRITE); 1764 compute_parity5(sh, RECONSTRUCT_WRITE);
1491 for (i= conf->raid_disks; i--;) { 1765 for (i= conf->raid_disks; i--;) {
1492 set_bit(R5_LOCKED, &sh->dev[i].flags); 1766 set_bit(R5_LOCKED, &sh->dev[i].flags);
1493 locked++; 1767 locked++;
@@ -1615,6 +1889,569 @@ static void handle_stripe(struct stripe_head *sh)
1615 } 1889 }
1616} 1890}
1617 1891
1892static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
1893{
1894 raid6_conf_t *conf = sh->raid_conf;
1895 int disks = conf->raid_disks;
1896 struct bio *return_bi= NULL;
1897 struct bio *bi;
1898 int i;
1899 int syncing;
1900 int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
1901 int non_overwrite = 0;
1902 int failed_num[2] = {0, 0};
1903 struct r5dev *dev, *pdev, *qdev;
1904 int pd_idx = sh->pd_idx;
1905 int qd_idx = raid6_next_disk(pd_idx, disks);
1906 int p_failed, q_failed;
1907
1908 PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n",
1909 (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
1910 pd_idx, qd_idx);
1911
1912 spin_lock(&sh->lock);
1913 clear_bit(STRIPE_HANDLE, &sh->state);
1914 clear_bit(STRIPE_DELAYED, &sh->state);
1915
1916 syncing = test_bit(STRIPE_SYNCING, &sh->state);
1917 /* Now to look around and see what can be done */
1918
1919 rcu_read_lock();
1920 for (i=disks; i--; ) {
1921 mdk_rdev_t *rdev;
1922 dev = &sh->dev[i];
1923 clear_bit(R5_Insync, &dev->flags);
1924
1925 PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
1926 i, dev->flags, dev->toread, dev->towrite, dev->written);
1927 /* maybe we can reply to a read */
1928 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
1929 struct bio *rbi, *rbi2;
1930 PRINTK("Return read for disc %d\n", i);
1931 spin_lock_irq(&conf->device_lock);
1932 rbi = dev->toread;
1933 dev->toread = NULL;
1934 if (test_and_clear_bit(R5_Overlap, &dev->flags))
1935 wake_up(&conf->wait_for_overlap);
1936 spin_unlock_irq(&conf->device_lock);
1937 while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
1938 copy_data(0, rbi, dev->page, dev->sector);
1939 rbi2 = r5_next_bio(rbi, dev->sector);
1940 spin_lock_irq(&conf->device_lock);
1941 if (--rbi->bi_phys_segments == 0) {
1942 rbi->bi_next = return_bi;
1943 return_bi = rbi;
1944 }
1945 spin_unlock_irq(&conf->device_lock);
1946 rbi = rbi2;
1947 }
1948 }
1949
1950 /* now count some things */
1951 if (test_bit(R5_LOCKED, &dev->flags)) locked++;
1952 if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
1953
1954
1955 if (dev->toread) to_read++;
1956 if (dev->towrite) {
1957 to_write++;
1958 if (!test_bit(R5_OVERWRITE, &dev->flags))
1959 non_overwrite++;
1960 }
1961 if (dev->written) written++;
1962 rdev = rcu_dereference(conf->disks[i].rdev);
1963 if (!rdev || !test_bit(In_sync, &rdev->flags)) {
1964 /* The ReadError flag will just be confusing now */
1965 clear_bit(R5_ReadError, &dev->flags);
1966 clear_bit(R5_ReWrite, &dev->flags);
1967 }
1968 if (!rdev || !test_bit(In_sync, &rdev->flags)
1969 || test_bit(R5_ReadError, &dev->flags)) {
1970 if ( failed < 2 )
1971 failed_num[failed] = i;
1972 failed++;
1973 } else
1974 set_bit(R5_Insync, &dev->flags);
1975 }
1976 rcu_read_unlock();
1977 PRINTK("locked=%d uptodate=%d to_read=%d"
1978 " to_write=%d failed=%d failed_num=%d,%d\n",
1979 locked, uptodate, to_read, to_write, failed,
1980 failed_num[0], failed_num[1]);
1981 /* check if the array has lost >2 devices and, if so, some requests might
1982 * need to be failed
1983 */
1984 if (failed > 2 && to_read+to_write+written) {
1985 for (i=disks; i--; ) {
1986 int bitmap_end = 0;
1987
1988 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1989 mdk_rdev_t *rdev;
1990 rcu_read_lock();
1991 rdev = rcu_dereference(conf->disks[i].rdev);
1992 if (rdev && test_bit(In_sync, &rdev->flags))
1993 /* multiple read failures in one stripe */
1994 md_error(conf->mddev, rdev);
1995 rcu_read_unlock();
1996 }
1997
1998 spin_lock_irq(&conf->device_lock);
1999 /* fail all writes first */
2000 bi = sh->dev[i].towrite;
2001 sh->dev[i].towrite = NULL;
2002 if (bi) { to_write--; bitmap_end = 1; }
2003
2004 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
2005 wake_up(&conf->wait_for_overlap);
2006
2007 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
2008 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
2009 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2010 if (--bi->bi_phys_segments == 0) {
2011 md_write_end(conf->mddev);
2012 bi->bi_next = return_bi;
2013 return_bi = bi;
2014 }
2015 bi = nextbi;
2016 }
2017 /* and fail all 'written' */
2018 bi = sh->dev[i].written;
2019 sh->dev[i].written = NULL;
2020 if (bi) bitmap_end = 1;
2021 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) {
2022 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
2023 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2024 if (--bi->bi_phys_segments == 0) {
2025 md_write_end(conf->mddev);
2026 bi->bi_next = return_bi;
2027 return_bi = bi;
2028 }
2029 bi = bi2;
2030 }
2031
2032 /* fail any reads if this device is non-operational */
2033 if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
2034 test_bit(R5_ReadError, &sh->dev[i].flags)) {
2035 bi = sh->dev[i].toread;
2036 sh->dev[i].toread = NULL;
2037 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
2038 wake_up(&conf->wait_for_overlap);
2039 if (bi) to_read--;
2040 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
2041 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
2042 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2043 if (--bi->bi_phys_segments == 0) {
2044 bi->bi_next = return_bi;
2045 return_bi = bi;
2046 }
2047 bi = nextbi;
2048 }
2049 }
2050 spin_unlock_irq(&conf->device_lock);
2051 if (bitmap_end)
2052 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
2053 STRIPE_SECTORS, 0, 0);
2054 }
2055 }
2056 if (failed > 2 && syncing) {
2057 md_done_sync(conf->mddev, STRIPE_SECTORS,0);
2058 clear_bit(STRIPE_SYNCING, &sh->state);
2059 syncing = 0;
2060 }
2061
2062 /*
2063 * might be able to return some write requests if the parity blocks
2064 * are safe, or on a failed drive
2065 */
2066 pdev = &sh->dev[pd_idx];
2067 p_failed = (failed >= 1 && failed_num[0] == pd_idx)
2068 || (failed >= 2 && failed_num[1] == pd_idx);
2069 qdev = &sh->dev[qd_idx];
2070 q_failed = (failed >= 1 && failed_num[0] == qd_idx)
2071 || (failed >= 2 && failed_num[1] == qd_idx);
2072
2073 if ( written &&
2074 ( p_failed || ((test_bit(R5_Insync, &pdev->flags)
2075 && !test_bit(R5_LOCKED, &pdev->flags)
2076 && test_bit(R5_UPTODATE, &pdev->flags))) ) &&
2077 ( q_failed || ((test_bit(R5_Insync, &qdev->flags)
2078 && !test_bit(R5_LOCKED, &qdev->flags)
2079 && test_bit(R5_UPTODATE, &qdev->flags))) ) ) {
2080 /* any written block on an uptodate or failed drive can be
2081 * returned. Note that if we 'wrote' to a failed drive,
2082 * it will be UPTODATE, but never LOCKED, so we don't need
2083 * to test 'failed' directly.
2084 */
2085 for (i=disks; i--; )
2086 if (sh->dev[i].written) {
2087 dev = &sh->dev[i];
2088 if (!test_bit(R5_LOCKED, &dev->flags) &&
2089 test_bit(R5_UPTODATE, &dev->flags) ) {
2090 /* We can return any write requests */
2091 int bitmap_end = 0;
2092 struct bio *wbi, *wbi2;
2093 PRINTK("Return write for stripe %llu disc %d\n",
2094 (unsigned long long)sh->sector, i);
2095 spin_lock_irq(&conf->device_lock);
2096 wbi = dev->written;
2097 dev->written = NULL;
2098 while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
2099 wbi2 = r5_next_bio(wbi, dev->sector);
2100 if (--wbi->bi_phys_segments == 0) {
2101 md_write_end(conf->mddev);
2102 wbi->bi_next = return_bi;
2103 return_bi = wbi;
2104 }
2105 wbi = wbi2;
2106 }
2107 if (dev->towrite == NULL)
2108 bitmap_end = 1;
2109 spin_unlock_irq(&conf->device_lock);
2110 if (bitmap_end)
2111 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
2112 STRIPE_SECTORS,
2113 !test_bit(STRIPE_DEGRADED, &sh->state), 0);
2114 }
2115 }
2116 }
2117
2118 /* Now we might consider reading some blocks, either to check/generate
2119 * parity, or to satisfy requests
2120 * or to load a block that is being partially written.
2121 */
2122 if (to_read || non_overwrite || (to_write && failed) || (syncing && (uptodate < disks))) {
2123 for (i=disks; i--;) {
2124 dev = &sh->dev[i];
2125 if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
2126 (dev->toread ||
2127 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
2128 syncing ||
2129 (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) ||
2130 (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write))
2131 )
2132 ) {
2133 /* we would like to get this block, possibly
2134 * by computing it, but we might not be able to
2135 */
2136 if (uptodate == disks-1) {
2137 PRINTK("Computing stripe %llu block %d\n",
2138 (unsigned long long)sh->sector, i);
2139 compute_block_1(sh, i, 0);
2140 uptodate++;
2141 } else if ( uptodate == disks-2 && failed >= 2 ) {
2142 /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
2143 int other;
2144 for (other=disks; other--;) {
2145 if ( other == i )
2146 continue;
2147 if ( !test_bit(R5_UPTODATE, &sh->dev[other].flags) )
2148 break;
2149 }
2150 BUG_ON(other < 0);
2151 PRINTK("Computing stripe %llu blocks %d,%d\n",
2152 (unsigned long long)sh->sector, i, other);
2153 compute_block_2(sh, i, other);
2154 uptodate += 2;
2155 } else if (test_bit(R5_Insync, &dev->flags)) {
2156 set_bit(R5_LOCKED, &dev->flags);
2157 set_bit(R5_Wantread, &dev->flags);
2158#if 0
2159 /* if I am just reading this block and we don't have
2160 a failed drive, or any pending writes then sidestep the cache */
2161 if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
2162 ! syncing && !failed && !to_write) {
2163 sh->bh_cache[i]->b_page = sh->bh_read[i]->b_page;
2164 sh->bh_cache[i]->b_data = sh->bh_read[i]->b_data;
2165 }
2166#endif
2167 locked++;
2168 PRINTK("Reading block %d (sync=%d)\n",
2169 i, syncing);
2170 }
2171 }
2172 }
2173 set_bit(STRIPE_HANDLE, &sh->state);
2174 }
2175
2176 /* now to consider writing and what else, if anything should be read */
2177 if (to_write) {
2178 int rcw=0, must_compute=0;
2179 for (i=disks ; i--;) {
2180 dev = &sh->dev[i];
2181 /* Would I have to read this buffer for reconstruct_write */
2182 if (!test_bit(R5_OVERWRITE, &dev->flags)
2183 && i != pd_idx && i != qd_idx
2184 && (!test_bit(R5_LOCKED, &dev->flags)
2185#if 0
2186 || sh->bh_page[i] != bh->b_page
2187#endif
2188 ) &&
2189 !test_bit(R5_UPTODATE, &dev->flags)) {
2190 if (test_bit(R5_Insync, &dev->flags)) rcw++;
2191 else {
2192 PRINTK("raid6: must_compute: disk %d flags=%#lx\n", i, dev->flags);
2193 must_compute++;
2194 }
2195 }
2196 }
2197 PRINTK("for sector %llu, rcw=%d, must_compute=%d\n",
2198 (unsigned long long)sh->sector, rcw, must_compute);
2199 set_bit(STRIPE_HANDLE, &sh->state);
2200
2201 if (rcw > 0)
2202 /* want reconstruct write, but need to get some data */
2203 for (i=disks; i--;) {
2204 dev = &sh->dev[i];
2205 if (!test_bit(R5_OVERWRITE, &dev->flags)
2206 && !(failed == 0 && (i == pd_idx || i == qd_idx))
2207 && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
2208 test_bit(R5_Insync, &dev->flags)) {
2209 if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
2210 {
2211 PRINTK("Read_old stripe %llu block %d for Reconstruct\n",
2212 (unsigned long long)sh->sector, i);
2213 set_bit(R5_LOCKED, &dev->flags);
2214 set_bit(R5_Wantread, &dev->flags);
2215 locked++;
2216 } else {
2217 PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
2218 (unsigned long long)sh->sector, i);
2219 set_bit(STRIPE_DELAYED, &sh->state);
2220 set_bit(STRIPE_HANDLE, &sh->state);
2221 }
2222 }
2223 }
2224 /* now if nothing is locked, and if we have enough data, we can start a write request */
2225 if (locked == 0 && rcw == 0 &&
2226 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2227 if ( must_compute > 0 ) {
2228 /* We have failed blocks and need to compute them */
2229 switch ( failed ) {
2230 case 0: BUG();
2231 case 1: compute_block_1(sh, failed_num[0], 0); break;
2232 case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
2233 default: BUG(); /* This request should have been failed? */
2234 }
2235 }
2236
2237 PRINTK("Computing parity for stripe %llu\n", (unsigned long long)sh->sector);
2238 compute_parity6(sh, RECONSTRUCT_WRITE);
2239 /* now every locked buffer is ready to be written */
2240 for (i=disks; i--;)
2241 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2242 PRINTK("Writing stripe %llu block %d\n",
2243 (unsigned long long)sh->sector, i);
2244 locked++;
2245 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2246 }
2247 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2248 set_bit(STRIPE_INSYNC, &sh->state);
2249
2250 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2251 atomic_dec(&conf->preread_active_stripes);
2252 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
2253 md_wakeup_thread(conf->mddev->thread);
2254 }
2255 }
2256 }
2257
2258 /* maybe we need to check and possibly fix the parity for this stripe
2259 * Any reads will already have been scheduled, so we just see if enough data
2260 * is available
2261 */
2262 if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
2263 int update_p = 0, update_q = 0;
2264 struct r5dev *dev;
2265
2266 set_bit(STRIPE_HANDLE, &sh->state);
2267
2268 BUG_ON(failed>2);
2269 BUG_ON(uptodate < disks);
2270 /* Want to check and possibly repair P and Q.
2271 * However there could be one 'failed' device, in which
2272 * case we can only check one of them, possibly using the
2273 * other to generate missing data
2274 */
2275
2276 /* If !tmp_page, we cannot do the calculations,
2277 * but as we have set STRIPE_HANDLE, we will soon be called
2278 * by stripe_handle with a tmp_page - just wait until then.
2279 */
2280 if (tmp_page) {
2281 if (failed == q_failed) {
2282 /* The only possible failed device holds 'Q', so it makes
2283 * sense to check P (If anything else were failed, we would
2284 * have used P to recreate it).
2285 */
2286 compute_block_1(sh, pd_idx, 1);
2287 if (!page_is_zero(sh->dev[pd_idx].page)) {
2288 compute_block_1(sh,pd_idx,0);
2289 update_p = 1;
2290 }
2291 }
2292 if (!q_failed && failed < 2) {
2293 /* q is not failed, and we didn't use it to generate
2294 * anything, so it makes sense to check it
2295 */
2296 memcpy(page_address(tmp_page),
2297 page_address(sh->dev[qd_idx].page),
2298 STRIPE_SIZE);
2299 compute_parity6(sh, UPDATE_PARITY);
2300 if (memcmp(page_address(tmp_page),
2301 page_address(sh->dev[qd_idx].page),
2302 STRIPE_SIZE)!= 0) {
2303 clear_bit(STRIPE_INSYNC, &sh->state);
2304 update_q = 1;
2305 }
2306 }
2307 if (update_p || update_q) {
2308 conf->mddev->resync_mismatches += STRIPE_SECTORS;
2309 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2310 /* don't try to repair!! */
2311 update_p = update_q = 0;
2312 }
2313
2314 /* now write out any block on a failed drive,
2315 * or P or Q if they need it
2316 */
2317
2318 if (failed == 2) {
2319 dev = &sh->dev[failed_num[1]];
2320 locked++;
2321 set_bit(R5_LOCKED, &dev->flags);
2322 set_bit(R5_Wantwrite, &dev->flags);
2323 }
2324 if (failed >= 1) {
2325 dev = &sh->dev[failed_num[0]];
2326 locked++;
2327 set_bit(R5_LOCKED, &dev->flags);
2328 set_bit(R5_Wantwrite, &dev->flags);
2329 }
2330
2331 if (update_p) {
2332 dev = &sh->dev[pd_idx];
2333 locked ++;
2334 set_bit(R5_LOCKED, &dev->flags);
2335 set_bit(R5_Wantwrite, &dev->flags);
2336 }
2337 if (update_q) {
2338 dev = &sh->dev[qd_idx];
2339 locked++;
2340 set_bit(R5_LOCKED, &dev->flags);
2341 set_bit(R5_Wantwrite, &dev->flags);
2342 }
2343 clear_bit(STRIPE_DEGRADED, &sh->state);
2344
2345 set_bit(STRIPE_INSYNC, &sh->state);
2346 }
2347 }
2348
2349 if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
2350 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
2351 clear_bit(STRIPE_SYNCING, &sh->state);
2352 }
2353
2354 /* If the failed drives are just a ReadError, then we might need
2355 * to progress the repair/check process
2356 */
2357 if (failed <= 2 && ! conf->mddev->ro)
2358 for (i=0; i<failed;i++) {
2359 dev = &sh->dev[failed_num[i]];
2360 if (test_bit(R5_ReadError, &dev->flags)
2361 && !test_bit(R5_LOCKED, &dev->flags)
2362 && test_bit(R5_UPTODATE, &dev->flags)
2363 ) {
2364 if (!test_bit(R5_ReWrite, &dev->flags)) {
2365 set_bit(R5_Wantwrite, &dev->flags);
2366 set_bit(R5_ReWrite, &dev->flags);
2367 set_bit(R5_LOCKED, &dev->flags);
2368 } else {
2369 /* let's read it back */
2370 set_bit(R5_Wantread, &dev->flags);
2371 set_bit(R5_LOCKED, &dev->flags);
2372 }
2373 }
2374 }
2375 spin_unlock(&sh->lock);
2376
2377 while ((bi=return_bi)) {
2378 int bytes = bi->bi_size;
2379
2380 return_bi = bi->bi_next;
2381 bi->bi_next = NULL;
2382 bi->bi_size = 0;
2383 bi->bi_end_io(bi, bytes, 0);
2384 }
2385 for (i=disks; i-- ;) {
2386 int rw;
2387 struct bio *bi;
2388 mdk_rdev_t *rdev;
2389 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
2390 rw = 1;
2391 else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
2392 rw = 0;
2393 else
2394 continue;
2395
2396 bi = &sh->dev[i].req;
2397
2398 bi->bi_rw = rw;
2399 if (rw)
2400 bi->bi_end_io = raid5_end_write_request;
2401 else
2402 bi->bi_end_io = raid5_end_read_request;
2403
2404 rcu_read_lock();
2405 rdev = rcu_dereference(conf->disks[i].rdev);
2406 if (rdev && test_bit(Faulty, &rdev->flags))
2407 rdev = NULL;
2408 if (rdev)
2409 atomic_inc(&rdev->nr_pending);
2410 rcu_read_unlock();
2411
2412 if (rdev) {
2413 if (syncing)
2414 md_sync_acct(rdev->bdev, STRIPE_SECTORS);
2415
2416 bi->bi_bdev = rdev->bdev;
2417 PRINTK("for %llu schedule op %ld on disc %d\n",
2418 (unsigned long long)sh->sector, bi->bi_rw, i);
2419 atomic_inc(&sh->count);
2420 bi->bi_sector = sh->sector + rdev->data_offset;
2421 bi->bi_flags = 1 << BIO_UPTODATE;
2422 bi->bi_vcnt = 1;
2423 bi->bi_max_vecs = 1;
2424 bi->bi_idx = 0;
2425 bi->bi_io_vec = &sh->dev[i].vec;
2426 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
2427 bi->bi_io_vec[0].bv_offset = 0;
2428 bi->bi_size = STRIPE_SIZE;
2429 bi->bi_next = NULL;
2430 if (rw == WRITE &&
2431 test_bit(R5_ReWrite, &sh->dev[i].flags))
2432 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
2433 generic_make_request(bi);
2434 } else {
2435 if (rw == 1)
2436 set_bit(STRIPE_DEGRADED, &sh->state);
2437 PRINTK("skip op %ld on disc %d for sector %llu\n",
2438 bi->bi_rw, i, (unsigned long long)sh->sector);
2439 clear_bit(R5_LOCKED, &sh->dev[i].flags);
2440 set_bit(STRIPE_HANDLE, &sh->state);
2441 }
2442 }
2443}
2444
2445static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
2446{
2447 if (sh->raid_conf->level == 6)
2448 handle_stripe6(sh, tmp_page);
2449 else
2450 handle_stripe5(sh);
2451}
2452
2453
2454
1618static void raid5_activate_delayed(raid5_conf_t *conf) 2455static void raid5_activate_delayed(raid5_conf_t *conf)
1619{ 2456{
1620 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { 2457 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -1753,7 +2590,7 @@ static int make_request(request_queue_t *q, struct bio * bi)
1753 2590
1754 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 2591 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
1755 DEFINE_WAIT(w); 2592 DEFINE_WAIT(w);
1756 int disks; 2593 int disks, data_disks;
1757 2594
1758 retry: 2595 retry:
1759 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); 2596 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
@@ -1781,7 +2618,9 @@ static int make_request(request_queue_t *q, struct bio * bi)
1781 } 2618 }
1782 spin_unlock_irq(&conf->device_lock); 2619 spin_unlock_irq(&conf->device_lock);
1783 } 2620 }
1784 new_sector = raid5_compute_sector(logical_sector, disks, disks - 1, 2621 data_disks = disks - conf->max_degraded;
2622
2623 new_sector = raid5_compute_sector(logical_sector, disks, data_disks,
1785 &dd_idx, &pd_idx, conf); 2624 &dd_idx, &pd_idx, conf);
1786 PRINTK("raid5: make_request, sector %llu logical %llu\n", 2625 PRINTK("raid5: make_request, sector %llu logical %llu\n",
1787 (unsigned long long)new_sector, 2626 (unsigned long long)new_sector,
@@ -1833,7 +2672,7 @@ static int make_request(request_queue_t *q, struct bio * bi)
1833 } 2672 }
1834 finish_wait(&conf->wait_for_overlap, &w); 2673 finish_wait(&conf->wait_for_overlap, &w);
1835 raid5_plug_device(conf); 2674 raid5_plug_device(conf);
1836 handle_stripe(sh); 2675 handle_stripe(sh, NULL);
1837 release_stripe(sh); 2676 release_stripe(sh);
1838 } else { 2677 } else {
1839 /* cannot get stripe for read-ahead, just give-up */ 2678 /* cannot get stripe for read-ahead, just give-up */
@@ -1849,7 +2688,7 @@ static int make_request(request_queue_t *q, struct bio * bi)
1849 if (remaining == 0) { 2688 if (remaining == 0) {
1850 int bytes = bi->bi_size; 2689 int bytes = bi->bi_size;
1851 2690
1852 if ( bio_data_dir(bi) == WRITE ) 2691 if ( rw == WRITE )
1853 md_write_end(mddev); 2692 md_write_end(mddev);
1854 bi->bi_size = 0; 2693 bi->bi_size = 0;
1855 bi->bi_end_io(bi, bytes, 0); 2694 bi->bi_end_io(bi, bytes, 0);
@@ -1857,17 +2696,142 @@ static int make_request(request_queue_t *q, struct bio * bi)
1857 return 0; 2696 return 0;
1858} 2697}
1859 2698
1860/* FIXME go_faster isn't used */ 2699static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped)
1861static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
1862{ 2700{
2701 /* reshaping is quite different to recovery/resync so it is
2702 * handled quite separately ... here.
2703 *
2704 * On each call to sync_request, we gather one chunk worth of
2705 * destination stripes and flag them as expanding.
2706 * Then we find all the source stripes and request reads.
2707 * As the reads complete, handle_stripe will copy the data
2708 * into the destination stripe and release that stripe.
2709 */
1863 raid5_conf_t *conf = (raid5_conf_t *) mddev->private; 2710 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
1864 struct stripe_head *sh; 2711 struct stripe_head *sh;
1865 int pd_idx; 2712 int pd_idx;
1866 sector_t first_sector, last_sector; 2713 sector_t first_sector, last_sector;
2714 int raid_disks;
2715 int data_disks;
2716 int i;
2717 int dd_idx;
2718 sector_t writepos, safepos, gap;
2719
2720 if (sector_nr == 0 &&
2721 conf->expand_progress != 0) {
2722 /* restarting in the middle, skip the initial sectors */
2723 sector_nr = conf->expand_progress;
2724 sector_div(sector_nr, conf->raid_disks-1);
2725 *skipped = 1;
2726 return sector_nr;
2727 }
2728
2729 /* we update the metadata when there is more than 3Meg
2730 * in the block range (that is rather arbitrary, should
2731 * probably be time based) or when the data about to be
2732 * copied would over-write the source of the data at
2733 * the front of the range.
2734 * i.e. one new_stripe forward from expand_progress new_maps
2735 * to after where expand_lo old_maps to
2736 */
2737 writepos = conf->expand_progress +
2738 conf->chunk_size/512*(conf->raid_disks-1);
2739 sector_div(writepos, conf->raid_disks-1);
2740 safepos = conf->expand_lo;
2741 sector_div(safepos, conf->previous_raid_disks-1);
2742 gap = conf->expand_progress - conf->expand_lo;
2743
2744 if (writepos >= safepos ||
2745 gap > (conf->raid_disks-1)*3000*2 /*3Meg*/) {
2746 /* Cannot proceed until we've updated the superblock... */
2747 wait_event(conf->wait_for_overlap,
2748 atomic_read(&conf->reshape_stripes)==0);
2749 mddev->reshape_position = conf->expand_progress;
2750 mddev->sb_dirty = 1;
2751 md_wakeup_thread(mddev->thread);
2752 wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
2753 kthread_should_stop());
2754 spin_lock_irq(&conf->device_lock);
2755 conf->expand_lo = mddev->reshape_position;
2756 spin_unlock_irq(&conf->device_lock);
2757 wake_up(&conf->wait_for_overlap);
2758 }
2759
2760 for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
2761 int j;
2762 int skipped = 0;
2763 pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
2764 sh = get_active_stripe(conf, sector_nr+i,
2765 conf->raid_disks, pd_idx, 0);
2766 set_bit(STRIPE_EXPANDING, &sh->state);
2767 atomic_inc(&conf->reshape_stripes);
2768 /* If any of this stripe is beyond the end of the old
2769 * array, then we need to zero those blocks
2770 */
2771 for (j=sh->disks; j--;) {
2772 sector_t s;
2773 if (j == sh->pd_idx)
2774 continue;
2775 s = compute_blocknr(sh, j);
2776 if (s < (mddev->array_size<<1)) {
2777 skipped = 1;
2778 continue;
2779 }
2780 memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
2781 set_bit(R5_Expanded, &sh->dev[j].flags);
2782 set_bit(R5_UPTODATE, &sh->dev[j].flags);
2783 }
2784 if (!skipped) {
2785 set_bit(STRIPE_EXPAND_READY, &sh->state);
2786 set_bit(STRIPE_HANDLE, &sh->state);
2787 }
2788 release_stripe(sh);
2789 }
2790 spin_lock_irq(&conf->device_lock);
2791 conf->expand_progress = (sector_nr + i)*(conf->raid_disks-1);
2792 spin_unlock_irq(&conf->device_lock);
2793 /* Ok, those stripe are ready. We can start scheduling
2794 * reads on the source stripes.
2795 * The source stripes are determined by mapping the first and last
2796 * block on the destination stripes.
2797 */
2798 raid_disks = conf->previous_raid_disks;
2799 data_disks = raid_disks - 1;
2800 first_sector =
2801 raid5_compute_sector(sector_nr*(conf->raid_disks-1),
2802 raid_disks, data_disks,
2803 &dd_idx, &pd_idx, conf);
2804 last_sector =
2805 raid5_compute_sector((sector_nr+conf->chunk_size/512)
2806 *(conf->raid_disks-1) -1,
2807 raid_disks, data_disks,
2808 &dd_idx, &pd_idx, conf);
2809 if (last_sector >= (mddev->size<<1))
2810 last_sector = (mddev->size<<1)-1;
2811 while (first_sector <= last_sector) {
2812 pd_idx = stripe_to_pdidx(first_sector, conf, conf->previous_raid_disks);
2813 sh = get_active_stripe(conf, first_sector,
2814 conf->previous_raid_disks, pd_idx, 0);
2815 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
2816 set_bit(STRIPE_HANDLE, &sh->state);
2817 release_stripe(sh);
2818 first_sector += STRIPE_SECTORS;
2819 }
2820 return conf->chunk_size>>9;
2821}
2822
2823/* FIXME go_faster isn't used */
2824static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
2825{
2826 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
2827 struct stripe_head *sh;
2828 int pd_idx;
1867 int raid_disks = conf->raid_disks; 2829 int raid_disks = conf->raid_disks;
1868 int data_disks = raid_disks-1; 2830 int data_disks = raid_disks - conf->max_degraded;
1869 sector_t max_sector = mddev->size << 1; 2831 sector_t max_sector = mddev->size << 1;
1870 int sync_blocks; 2832 int sync_blocks;
2833 int still_degraded = 0;
2834 int i;
1871 2835
1872 if (sector_nr >= max_sector) { 2836 if (sector_nr >= max_sector) {
1873 /* just being told to finish up .. nothing much to do */ 2837 /* just being told to finish up .. nothing much to do */
@@ -1880,134 +2844,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1880 if (mddev->curr_resync < max_sector) /* aborted */ 2844 if (mddev->curr_resync < max_sector) /* aborted */
1881 bitmap_end_sync(mddev->bitmap, mddev->curr_resync, 2845 bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
1882 &sync_blocks, 1); 2846 &sync_blocks, 1);
1883 else /* compelted sync */ 2847 else /* completed sync */
1884 conf->fullsync = 0; 2848 conf->fullsync = 0;
1885 bitmap_close_sync(mddev->bitmap); 2849 bitmap_close_sync(mddev->bitmap);
1886 2850
1887 return 0; 2851 return 0;
1888 } 2852 }
1889 2853
1890 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { 2854 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
1891 /* reshaping is quite different to recovery/resync so it is 2855 return reshape_request(mddev, sector_nr, skipped);
1892 * handled quite separately ... here. 2856
1893 * 2857 /* if there is too many failed drives and we are trying
1894 * On each call to sync_request, we gather one chunk worth of
1895 * destination stripes and flag them as expanding.
1896 * Then we find all the source stripes and request reads.
1897 * As the reads complete, handle_stripe will copy the data
1898 * into the destination stripe and release that stripe.
1899 */
1900 int i;
1901 int dd_idx;
1902 sector_t writepos, safepos, gap;
1903
1904 if (sector_nr == 0 &&
1905 conf->expand_progress != 0) {
1906 /* restarting in the middle, skip the initial sectors */
1907 sector_nr = conf->expand_progress;
1908 sector_div(sector_nr, conf->raid_disks-1);
1909 *skipped = 1;
1910 return sector_nr;
1911 }
1912
1913 /* we update the metadata when there is more than 3Meg
1914 * in the block range (that is rather arbitrary, should
1915 * probably be time based) or when the data about to be
1916 * copied would over-write the source of the data at
1917 * the front of the range.
1918 * i.e. one new_stripe forward from expand_progress new_maps
1919 * to after where expand_lo old_maps to
1920 */
1921 writepos = conf->expand_progress +
1922 conf->chunk_size/512*(conf->raid_disks-1);
1923 sector_div(writepos, conf->raid_disks-1);
1924 safepos = conf->expand_lo;
1925 sector_div(safepos, conf->previous_raid_disks-1);
1926 gap = conf->expand_progress - conf->expand_lo;
1927
1928 if (writepos >= safepos ||
1929 gap > (conf->raid_disks-1)*3000*2 /*3Meg*/) {
1930 /* Cannot proceed until we've updated the superblock... */
1931 wait_event(conf->wait_for_overlap,
1932 atomic_read(&conf->reshape_stripes)==0);
1933 mddev->reshape_position = conf->expand_progress;
1934 mddev->sb_dirty = 1;
1935 md_wakeup_thread(mddev->thread);
1936 wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
1937 kthread_should_stop());
1938 spin_lock_irq(&conf->device_lock);
1939 conf->expand_lo = mddev->reshape_position;
1940 spin_unlock_irq(&conf->device_lock);
1941 wake_up(&conf->wait_for_overlap);
1942 }
1943
1944 for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
1945 int j;
1946 int skipped = 0;
1947 pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
1948 sh = get_active_stripe(conf, sector_nr+i,
1949 conf->raid_disks, pd_idx, 0);
1950 set_bit(STRIPE_EXPANDING, &sh->state);
1951 atomic_inc(&conf->reshape_stripes);
1952 /* If any of this stripe is beyond the end of the old
1953 * array, then we need to zero those blocks
1954 */
1955 for (j=sh->disks; j--;) {
1956 sector_t s;
1957 if (j == sh->pd_idx)
1958 continue;
1959 s = compute_blocknr(sh, j);
1960 if (s < (mddev->array_size<<1)) {
1961 skipped = 1;
1962 continue;
1963 }
1964 memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
1965 set_bit(R5_Expanded, &sh->dev[j].flags);
1966 set_bit(R5_UPTODATE, &sh->dev[j].flags);
1967 }
1968 if (!skipped) {
1969 set_bit(STRIPE_EXPAND_READY, &sh->state);
1970 set_bit(STRIPE_HANDLE, &sh->state);
1971 }
1972 release_stripe(sh);
1973 }
1974 spin_lock_irq(&conf->device_lock);
1975 conf->expand_progress = (sector_nr + i)*(conf->raid_disks-1);
1976 spin_unlock_irq(&conf->device_lock);
1977 /* Ok, those stripe are ready. We can start scheduling
1978 * reads on the source stripes.
1979 * The source stripes are determined by mapping the first and last
1980 * block on the destination stripes.
1981 */
1982 raid_disks = conf->previous_raid_disks;
1983 data_disks = raid_disks - 1;
1984 first_sector =
1985 raid5_compute_sector(sector_nr*(conf->raid_disks-1),
1986 raid_disks, data_disks,
1987 &dd_idx, &pd_idx, conf);
1988 last_sector =
1989 raid5_compute_sector((sector_nr+conf->chunk_size/512)
1990 *(conf->raid_disks-1) -1,
1991 raid_disks, data_disks,
1992 &dd_idx, &pd_idx, conf);
1993 if (last_sector >= (mddev->size<<1))
1994 last_sector = (mddev->size<<1)-1;
1995 while (first_sector <= last_sector) {
1996 pd_idx = stripe_to_pdidx(first_sector, conf, conf->previous_raid_disks);
1997 sh = get_active_stripe(conf, first_sector,
1998 conf->previous_raid_disks, pd_idx, 0);
1999 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
2000 set_bit(STRIPE_HANDLE, &sh->state);
2001 release_stripe(sh);
2002 first_sector += STRIPE_SECTORS;
2003 }
2004 return conf->chunk_size>>9;
2005 }
2006 /* if there is 1 or more failed drives and we are trying
2007 * to resync, then assert that we are finished, because there is 2858 * to resync, then assert that we are finished, because there is
2008 * nothing we can do. 2859 * nothing we can do.
2009 */ 2860 */
2010 if (mddev->degraded >= 1 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 2861 if (mddev->degraded >= conf->max_degraded &&
2862 test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2011 sector_t rv = (mddev->size << 1) - sector_nr; 2863 sector_t rv = (mddev->size << 1) - sector_nr;
2012 *skipped = 1; 2864 *skipped = 1;
2013 return rv; 2865 return rv;
@@ -2026,17 +2878,26 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
2026 if (sh == NULL) { 2878 if (sh == NULL) {
2027 sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0); 2879 sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0);
2028 /* make sure we don't swamp the stripe cache if someone else 2880 /* make sure we don't swamp the stripe cache if someone else
2029 * is trying to get access 2881 * is trying to get access
2030 */ 2882 */
2031 schedule_timeout_uninterruptible(1); 2883 schedule_timeout_uninterruptible(1);
2032 } 2884 }
2033 bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 0); 2885 /* Need to check if array will still be degraded after recovery/resync
2034 spin_lock(&sh->lock); 2886 * We don't need to check the 'failed' flag as when that gets set,
2887 * recovery aborts.
2888 */
2889 for (i=0; i<mddev->raid_disks; i++)
2890 if (conf->disks[i].rdev == NULL)
2891 still_degraded = 1;
2892
2893 bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
2894
2895 spin_lock(&sh->lock);
2035 set_bit(STRIPE_SYNCING, &sh->state); 2896 set_bit(STRIPE_SYNCING, &sh->state);
2036 clear_bit(STRIPE_INSYNC, &sh->state); 2897 clear_bit(STRIPE_INSYNC, &sh->state);
2037 spin_unlock(&sh->lock); 2898 spin_unlock(&sh->lock);
2038 2899
2039 handle_stripe(sh); 2900 handle_stripe(sh, NULL);
2040 release_stripe(sh); 2901 release_stripe(sh);
2041 2902
2042 return STRIPE_SECTORS; 2903 return STRIPE_SECTORS;
@@ -2091,7 +2952,7 @@ static void raid5d (mddev_t *mddev)
2091 spin_unlock_irq(&conf->device_lock); 2952 spin_unlock_irq(&conf->device_lock);
2092 2953
2093 handled++; 2954 handled++;
2094 handle_stripe(sh); 2955 handle_stripe(sh, conf->spare_page);
2095 release_stripe(sh); 2956 release_stripe(sh);
2096 2957
2097 spin_lock_irq(&conf->device_lock); 2958 spin_lock_irq(&conf->device_lock);
@@ -2181,8 +3042,8 @@ static int run(mddev_t *mddev)
2181 struct disk_info *disk; 3042 struct disk_info *disk;
2182 struct list_head *tmp; 3043 struct list_head *tmp;
2183 3044
2184 if (mddev->level != 5 && mddev->level != 4) { 3045 if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
2185 printk(KERN_ERR "raid5: %s: raid level not set to 4/5 (%d)\n", 3046 printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
2186 mdname(mddev), mddev->level); 3047 mdname(mddev), mddev->level);
2187 return -EIO; 3048 return -EIO;
2188 } 3049 }
@@ -2251,6 +3112,11 @@ static int run(mddev_t *mddev)
2251 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) 3112 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
2252 goto abort; 3113 goto abort;
2253 3114
3115 if (mddev->level == 6) {
3116 conf->spare_page = alloc_page(GFP_KERNEL);
3117 if (!conf->spare_page)
3118 goto abort;
3119 }
2254 spin_lock_init(&conf->device_lock); 3120 spin_lock_init(&conf->device_lock);
2255 init_waitqueue_head(&conf->wait_for_stripe); 3121 init_waitqueue_head(&conf->wait_for_stripe);
2256 init_waitqueue_head(&conf->wait_for_overlap); 3122 init_waitqueue_head(&conf->wait_for_overlap);
@@ -2282,12 +3148,16 @@ static int run(mddev_t *mddev)
2282 } 3148 }
2283 3149
2284 /* 3150 /*
2285 * 0 for a fully functional array, 1 for a degraded array. 3151 * 0 for a fully functional array, 1 or 2 for a degraded array.
2286 */ 3152 */
2287 mddev->degraded = conf->failed_disks = conf->raid_disks - conf->working_disks; 3153 mddev->degraded = conf->failed_disks = conf->raid_disks - conf->working_disks;
2288 conf->mddev = mddev; 3154 conf->mddev = mddev;
2289 conf->chunk_size = mddev->chunk_size; 3155 conf->chunk_size = mddev->chunk_size;
2290 conf->level = mddev->level; 3156 conf->level = mddev->level;
3157 if (conf->level == 6)
3158 conf->max_degraded = 2;
3159 else
3160 conf->max_degraded = 1;
2291 conf->algorithm = mddev->layout; 3161 conf->algorithm = mddev->layout;
2292 conf->max_nr_stripes = NR_STRIPES; 3162 conf->max_nr_stripes = NR_STRIPES;
2293 conf->expand_progress = mddev->reshape_position; 3163 conf->expand_progress = mddev->reshape_position;
@@ -2296,6 +3166,11 @@ static int run(mddev_t *mddev)
2296 mddev->size &= ~(mddev->chunk_size/1024 -1); 3166 mddev->size &= ~(mddev->chunk_size/1024 -1);
2297 mddev->resync_max_sectors = mddev->size << 1; 3167 mddev->resync_max_sectors = mddev->size << 1;
2298 3168
3169 if (conf->level == 6 && conf->raid_disks < 4) {
3170 printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
3171 mdname(mddev), conf->raid_disks);
3172 goto abort;
3173 }
2299 if (!conf->chunk_size || conf->chunk_size % 4) { 3174 if (!conf->chunk_size || conf->chunk_size % 4) {
2300 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", 3175 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
2301 conf->chunk_size, mdname(mddev)); 3176 conf->chunk_size, mdname(mddev));
@@ -2307,14 +3182,14 @@ static int run(mddev_t *mddev)
2307 conf->algorithm, mdname(mddev)); 3182 conf->algorithm, mdname(mddev));
2308 goto abort; 3183 goto abort;
2309 } 3184 }
2310 if (mddev->degraded > 1) { 3185 if (mddev->degraded > conf->max_degraded) {
2311 printk(KERN_ERR "raid5: not enough operational devices for %s" 3186 printk(KERN_ERR "raid5: not enough operational devices for %s"
2312 " (%d/%d failed)\n", 3187 " (%d/%d failed)\n",
2313 mdname(mddev), conf->failed_disks, conf->raid_disks); 3188 mdname(mddev), conf->failed_disks, conf->raid_disks);
2314 goto abort; 3189 goto abort;
2315 } 3190 }
2316 3191
2317 if (mddev->degraded == 1 && 3192 if (mddev->degraded > 0 &&
2318 mddev->recovery_cp != MaxSector) { 3193 mddev->recovery_cp != MaxSector) {
2319 if (mddev->ok_start_degraded) 3194 if (mddev->ok_start_degraded)
2320 printk(KERN_WARNING 3195 printk(KERN_WARNING
@@ -2379,11 +3254,12 @@ static int run(mddev_t *mddev)
2379 } 3254 }
2380 3255
2381 /* read-ahead size must cover two whole stripes, which is 3256 /* read-ahead size must cover two whole stripes, which is
2382 * 2 * (n-1) * chunksize where 'n' is the number of raid devices 3257 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
2383 */ 3258 */
2384 { 3259 {
2385 int stripe = (mddev->raid_disks-1) * mddev->chunk_size 3260 int data_disks = conf->previous_raid_disks - conf->max_degraded;
2386 / PAGE_SIZE; 3261 int stripe = data_disks *
3262 (mddev->chunk_size / PAGE_SIZE);
2387 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 3263 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
2388 mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 3264 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
2389 } 3265 }
@@ -2393,12 +3269,14 @@ static int run(mddev_t *mddev)
2393 3269
2394 mddev->queue->unplug_fn = raid5_unplug_device; 3270 mddev->queue->unplug_fn = raid5_unplug_device;
2395 mddev->queue->issue_flush_fn = raid5_issue_flush; 3271 mddev->queue->issue_flush_fn = raid5_issue_flush;
2396 mddev->array_size = mddev->size * (conf->previous_raid_disks - 1); 3272 mddev->array_size = mddev->size * (conf->previous_raid_disks -
3273 conf->max_degraded);
2397 3274
2398 return 0; 3275 return 0;
2399abort: 3276abort:
2400 if (conf) { 3277 if (conf) {
2401 print_raid5_conf(conf); 3278 print_raid5_conf(conf);
3279 safe_put_page(conf->spare_page);
2402 kfree(conf->disks); 3280 kfree(conf->disks);
2403 kfree(conf->stripe_hashtbl); 3281 kfree(conf->stripe_hashtbl);
2404 kfree(conf); 3282 kfree(conf);
@@ -2427,23 +3305,23 @@ static int stop(mddev_t *mddev)
2427} 3305}
2428 3306
2429#if RAID5_DEBUG 3307#if RAID5_DEBUG
2430static void print_sh (struct stripe_head *sh) 3308static void print_sh (struct seq_file *seq, struct stripe_head *sh)
2431{ 3309{
2432 int i; 3310 int i;
2433 3311
2434 printk("sh %llu, pd_idx %d, state %ld.\n", 3312 seq_printf(seq, "sh %llu, pd_idx %d, state %ld.\n",
2435 (unsigned long long)sh->sector, sh->pd_idx, sh->state); 3313 (unsigned long long)sh->sector, sh->pd_idx, sh->state);
2436 printk("sh %llu, count %d.\n", 3314 seq_printf(seq, "sh %llu, count %d.\n",
2437 (unsigned long long)sh->sector, atomic_read(&sh->count)); 3315 (unsigned long long)sh->sector, atomic_read(&sh->count));
2438 printk("sh %llu, ", (unsigned long long)sh->sector); 3316 seq_printf(seq, "sh %llu, ", (unsigned long long)sh->sector);
2439 for (i = 0; i < sh->disks; i++) { 3317 for (i = 0; i < sh->disks; i++) {
2440 printk("(cache%d: %p %ld) ", 3318 seq_printf(seq, "(cache%d: %p %ld) ",
2441 i, sh->dev[i].page, sh->dev[i].flags); 3319 i, sh->dev[i].page, sh->dev[i].flags);
2442 } 3320 }
2443 printk("\n"); 3321 seq_printf(seq, "\n");
2444} 3322}
2445 3323
2446static void printall (raid5_conf_t *conf) 3324static void printall (struct seq_file *seq, raid5_conf_t *conf)
2447{ 3325{
2448 struct stripe_head *sh; 3326 struct stripe_head *sh;
2449 struct hlist_node *hn; 3327 struct hlist_node *hn;
@@ -2454,7 +3332,7 @@ static void printall (raid5_conf_t *conf)
2454 hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) { 3332 hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) {
2455 if (sh->raid_conf != conf) 3333 if (sh->raid_conf != conf)
2456 continue; 3334 continue;
2457 print_sh(sh); 3335 print_sh(seq, sh);
2458 } 3336 }
2459 } 3337 }
2460 spin_unlock_irq(&conf->device_lock); 3338 spin_unlock_irq(&conf->device_lock);
@@ -2474,9 +3352,8 @@ static void status (struct seq_file *seq, mddev_t *mddev)
2474 test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_"); 3352 test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
2475 seq_printf (seq, "]"); 3353 seq_printf (seq, "]");
2476#if RAID5_DEBUG 3354#if RAID5_DEBUG
2477#define D(x) \ 3355 seq_printf (seq, "\n");
2478 seq_printf (seq, "<"#x":%d>", atomic_read(&conf->x)) 3356 printall(seq, conf);
2479 printall(conf);
2480#endif 3357#endif
2481} 3358}
2482 3359
@@ -2560,14 +3437,20 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
2560 int disk; 3437 int disk;
2561 struct disk_info *p; 3438 struct disk_info *p;
2562 3439
2563 if (mddev->degraded > 1) 3440 if (mddev->degraded > conf->max_degraded)
2564 /* no point adding a device */ 3441 /* no point adding a device */
2565 return 0; 3442 return 0;
2566 3443
2567 /* 3444 /*
2568 * find the disk ... 3445 * find the disk ... but prefer rdev->saved_raid_disk
3446 * if possible.
2569 */ 3447 */
2570 for (disk=0; disk < conf->raid_disks; disk++) 3448 if (rdev->saved_raid_disk >= 0 &&
3449 conf->disks[rdev->saved_raid_disk].rdev == NULL)
3450 disk = rdev->saved_raid_disk;
3451 else
3452 disk = 0;
3453 for ( ; disk < conf->raid_disks; disk++)
2571 if ((p=conf->disks + disk)->rdev == NULL) { 3454 if ((p=conf->disks + disk)->rdev == NULL) {
2572 clear_bit(In_sync, &rdev->flags); 3455 clear_bit(In_sync, &rdev->flags);
2573 rdev->raid_disk = disk; 3456 rdev->raid_disk = disk;
@@ -2590,8 +3473,10 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
2590 * any io in the removed space completes, but it hardly seems 3473 * any io in the removed space completes, but it hardly seems
2591 * worth it. 3474 * worth it.
2592 */ 3475 */
3476 raid5_conf_t *conf = mddev_to_conf(mddev);
3477
2593 sectors &= ~((sector_t)mddev->chunk_size/512 - 1); 3478 sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
2594 mddev->array_size = (sectors * (mddev->raid_disks-1))>>1; 3479 mddev->array_size = (sectors * (mddev->raid_disks-conf->max_degraded))>>1;
2595 set_capacity(mddev->gendisk, mddev->array_size << 1); 3480 set_capacity(mddev->gendisk, mddev->array_size << 1);
2596 mddev->changed = 1; 3481 mddev->changed = 1;
2597 if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) { 3482 if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) {
@@ -2680,6 +3565,7 @@ static int raid5_start_reshape(mddev_t *mddev)
2680 set_bit(In_sync, &rdev->flags); 3565 set_bit(In_sync, &rdev->flags);
2681 conf->working_disks++; 3566 conf->working_disks++;
2682 added_devices++; 3567 added_devices++;
3568 rdev->recovery_offset = 0;
2683 sprintf(nm, "rd%d", rdev->raid_disk); 3569 sprintf(nm, "rd%d", rdev->raid_disk);
2684 sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); 3570 sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
2685 } else 3571 } else
@@ -2731,6 +3617,17 @@ static void end_reshape(raid5_conf_t *conf)
2731 conf->expand_progress = MaxSector; 3617 conf->expand_progress = MaxSector;
2732 spin_unlock_irq(&conf->device_lock); 3618 spin_unlock_irq(&conf->device_lock);
2733 conf->mddev->reshape_position = MaxSector; 3619 conf->mddev->reshape_position = MaxSector;
3620
3621 /* read-ahead size must cover two whole stripes, which is
3622 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
3623 */
3624 {
3625 int data_disks = conf->previous_raid_disks - conf->max_degraded;
3626 int stripe = data_disks *
3627 (conf->mddev->chunk_size / PAGE_SIZE);
3628 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
3629 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
3630 }
2734 } 3631 }
2735} 3632}
2736 3633
@@ -2762,6 +3659,23 @@ static void raid5_quiesce(mddev_t *mddev, int state)
2762 } 3659 }
2763} 3660}
2764 3661
3662static struct mdk_personality raid6_personality =
3663{
3664 .name = "raid6",
3665 .level = 6,
3666 .owner = THIS_MODULE,
3667 .make_request = make_request,
3668 .run = run,
3669 .stop = stop,
3670 .status = status,
3671 .error_handler = error,
3672 .hot_add_disk = raid5_add_disk,
3673 .hot_remove_disk= raid5_remove_disk,
3674 .spare_active = raid5_spare_active,
3675 .sync_request = sync_request,
3676 .resize = raid5_resize,
3677 .quiesce = raid5_quiesce,
3678};
2765static struct mdk_personality raid5_personality = 3679static struct mdk_personality raid5_personality =
2766{ 3680{
2767 .name = "raid5", 3681 .name = "raid5",
@@ -2804,6 +3718,12 @@ static struct mdk_personality raid4_personality =
2804 3718
2805static int __init raid5_init(void) 3719static int __init raid5_init(void)
2806{ 3720{
3721 int e;
3722
3723 e = raid6_select_algo();
3724 if ( e )
3725 return e;
3726 register_md_personality(&raid6_personality);
2807 register_md_personality(&raid5_personality); 3727 register_md_personality(&raid5_personality);
2808 register_md_personality(&raid4_personality); 3728 register_md_personality(&raid4_personality);
2809 return 0; 3729 return 0;
@@ -2811,6 +3731,7 @@ static int __init raid5_init(void)
2811 3731
2812static void raid5_exit(void) 3732static void raid5_exit(void)
2813{ 3733{
3734 unregister_md_personality(&raid6_personality);
2814 unregister_md_personality(&raid5_personality); 3735 unregister_md_personality(&raid5_personality);
2815 unregister_md_personality(&raid4_personality); 3736 unregister_md_personality(&raid4_personality);
2816} 3737}
@@ -2823,3 +3744,10 @@ MODULE_ALIAS("md-raid5");
2823MODULE_ALIAS("md-raid4"); 3744MODULE_ALIAS("md-raid4");
2824MODULE_ALIAS("md-level-5"); 3745MODULE_ALIAS("md-level-5");
2825MODULE_ALIAS("md-level-4"); 3746MODULE_ALIAS("md-level-4");
3747MODULE_ALIAS("md-personality-8"); /* RAID6 */
3748MODULE_ALIAS("md-raid6");
3749MODULE_ALIAS("md-level-6");
3750
3751/* This used to be two separate modules, they were: */
3752MODULE_ALIAS("raid5");
3753MODULE_ALIAS("raid6");
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
deleted file mode 100644
index bc69355e0100..000000000000
--- a/drivers/md/raid6main.c
+++ /dev/null
@@ -1,2427 +0,0 @@
1/*
2 * raid6main.c : Multiple Devices driver for Linux
3 * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
4 * Copyright (C) 1999, 2000 Ingo Molnar
5 * Copyright (C) 2002, 2003 H. Peter Anvin
6 *
7 * RAID-6 management functions. This code is derived from raid5.c.
8 * Last merge from raid5.c bkcvs version 1.79 (kernel 2.6.1).
9 *
10 * Thanks to Penguin Computing for making the RAID-6 development possible
11 * by donating a test server!
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
16 * any later version.
17 *
18 * You should have received a copy of the GNU General Public License
19 * (for example /usr/src/linux/COPYING); if not, write to the Free
20 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 */
22
23
24#include <linux/config.h>
25#include <linux/module.h>
26#include <linux/slab.h>
27#include <linux/highmem.h>
28#include <linux/bitops.h>
29#include <asm/atomic.h>
30#include "raid6.h"
31
32#include <linux/raid/bitmap.h>
33
34/*
35 * Stripe cache
36 */
37
38#define NR_STRIPES 256
39#define STRIPE_SIZE PAGE_SIZE
40#define STRIPE_SHIFT (PAGE_SHIFT - 9)
41#define STRIPE_SECTORS (STRIPE_SIZE>>9)
42#define IO_THRESHOLD 1
43#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head))
44#define HASH_MASK (NR_HASH - 1)
45
46#define stripe_hash(conf, sect) (&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]))
47
48/* bio's attached to a stripe+device for I/O are linked together in bi_sector
49 * order without overlap. There may be several bio's per stripe+device, and
50 * a bio could span several devices.
51 * When walking this list for a particular stripe+device, we must never proceed
52 * beyond a bio that extends past this device, as the next bio might no longer
53 * be valid.
54 * This macro is used to determine the 'next' bio in the list, given the sector
55 * of the current stripe+device
56 */
57#define r5_next_bio(bio, sect) ( ( (bio)->bi_sector + ((bio)->bi_size>>9) < sect + STRIPE_SECTORS) ? (bio)->bi_next : NULL)
58/*
59 * The following can be used to debug the driver
60 */
61#define RAID6_DEBUG 0 /* Extremely verbose printk */
62#define RAID6_PARANOIA 1 /* Check spinlocks */
63#define RAID6_DUMPSTATE 0 /* Include stripe cache state in /proc/mdstat */
64#if RAID6_PARANOIA && defined(CONFIG_SMP)
65# define CHECK_DEVLOCK() assert_spin_locked(&conf->device_lock)
66#else
67# define CHECK_DEVLOCK()
68#endif
69
70#define PRINTK(x...) ((void)(RAID6_DEBUG && printk(KERN_DEBUG x)))
71#if RAID6_DEBUG
72#undef inline
73#undef __inline__
74#define inline
75#define __inline__
76#endif
77
78#if !RAID6_USE_EMPTY_ZERO_PAGE
79/* In .bss so it's zeroed */
80const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
81#endif
82
83static inline int raid6_next_disk(int disk, int raid_disks)
84{
85 disk++;
86 return (disk < raid_disks) ? disk : 0;
87}
88
89static void print_raid6_conf (raid6_conf_t *conf);
90
91static void __release_stripe(raid6_conf_t *conf, struct stripe_head *sh)
92{
93 if (atomic_dec_and_test(&sh->count)) {
94 BUG_ON(!list_empty(&sh->lru));
95 BUG_ON(atomic_read(&conf->active_stripes)==0);
96 if (test_bit(STRIPE_HANDLE, &sh->state)) {
97 if (test_bit(STRIPE_DELAYED, &sh->state))
98 list_add_tail(&sh->lru, &conf->delayed_list);
99 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
100 conf->seq_write == sh->bm_seq)
101 list_add_tail(&sh->lru, &conf->bitmap_list);
102 else {
103 clear_bit(STRIPE_BIT_DELAY, &sh->state);
104 list_add_tail(&sh->lru, &conf->handle_list);
105 }
106 md_wakeup_thread(conf->mddev->thread);
107 } else {
108 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
109 atomic_dec(&conf->preread_active_stripes);
110 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
111 md_wakeup_thread(conf->mddev->thread);
112 }
113 list_add_tail(&sh->lru, &conf->inactive_list);
114 atomic_dec(&conf->active_stripes);
115 if (!conf->inactive_blocked ||
116 atomic_read(&conf->active_stripes) < (conf->max_nr_stripes*3/4))
117 wake_up(&conf->wait_for_stripe);
118 }
119 }
120}
121static void release_stripe(struct stripe_head *sh)
122{
123 raid6_conf_t *conf = sh->raid_conf;
124 unsigned long flags;
125
126 spin_lock_irqsave(&conf->device_lock, flags);
127 __release_stripe(conf, sh);
128 spin_unlock_irqrestore(&conf->device_lock, flags);
129}
130
131static inline void remove_hash(struct stripe_head *sh)
132{
133 PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector);
134
135 hlist_del_init(&sh->hash);
136}
137
138static inline void insert_hash(raid6_conf_t *conf, struct stripe_head *sh)
139{
140 struct hlist_head *hp = stripe_hash(conf, sh->sector);
141
142 PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector);
143
144 CHECK_DEVLOCK();
145 hlist_add_head(&sh->hash, hp);
146}
147
148
149/* find an idle stripe, make sure it is unhashed, and return it. */
150static struct stripe_head *get_free_stripe(raid6_conf_t *conf)
151{
152 struct stripe_head *sh = NULL;
153 struct list_head *first;
154
155 CHECK_DEVLOCK();
156 if (list_empty(&conf->inactive_list))
157 goto out;
158 first = conf->inactive_list.next;
159 sh = list_entry(first, struct stripe_head, lru);
160 list_del_init(first);
161 remove_hash(sh);
162 atomic_inc(&conf->active_stripes);
163out:
164 return sh;
165}
166
167static void shrink_buffers(struct stripe_head *sh, int num)
168{
169 struct page *p;
170 int i;
171
172 for (i=0; i<num ; i++) {
173 p = sh->dev[i].page;
174 if (!p)
175 continue;
176 sh->dev[i].page = NULL;
177 put_page(p);
178 }
179}
180
181static int grow_buffers(struct stripe_head *sh, int num)
182{
183 int i;
184
185 for (i=0; i<num; i++) {
186 struct page *page;
187
188 if (!(page = alloc_page(GFP_KERNEL))) {
189 return 1;
190 }
191 sh->dev[i].page = page;
192 }
193 return 0;
194}
195
196static void raid6_build_block (struct stripe_head *sh, int i);
197
198static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx)
199{
200 raid6_conf_t *conf = sh->raid_conf;
201 int disks = conf->raid_disks, i;
202
203 BUG_ON(atomic_read(&sh->count) != 0);
204 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
205
206 CHECK_DEVLOCK();
207 PRINTK("init_stripe called, stripe %llu\n",
208 (unsigned long long)sh->sector);
209
210 remove_hash(sh);
211
212 sh->sector = sector;
213 sh->pd_idx = pd_idx;
214 sh->state = 0;
215
216 for (i=disks; i--; ) {
217 struct r5dev *dev = &sh->dev[i];
218
219 if (dev->toread || dev->towrite || dev->written ||
220 test_bit(R5_LOCKED, &dev->flags)) {
221 PRINTK("sector=%llx i=%d %p %p %p %d\n",
222 (unsigned long long)sh->sector, i, dev->toread,
223 dev->towrite, dev->written,
224 test_bit(R5_LOCKED, &dev->flags));
225 BUG();
226 }
227 dev->flags = 0;
228 raid6_build_block(sh, i);
229 }
230 insert_hash(conf, sh);
231}
232
233static struct stripe_head *__find_stripe(raid6_conf_t *conf, sector_t sector)
234{
235 struct stripe_head *sh;
236 struct hlist_node *hn;
237
238 CHECK_DEVLOCK();
239 PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
240 hlist_for_each_entry (sh, hn, stripe_hash(conf, sector), hash)
241 if (sh->sector == sector)
242 return sh;
243 PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
244 return NULL;
245}
246
247static void unplug_slaves(mddev_t *mddev);
248
249static struct stripe_head *get_active_stripe(raid6_conf_t *conf, sector_t sector,
250 int pd_idx, int noblock)
251{
252 struct stripe_head *sh;
253
254 PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector);
255
256 spin_lock_irq(&conf->device_lock);
257
258 do {
259 wait_event_lock_irq(conf->wait_for_stripe,
260 conf->quiesce == 0,
261 conf->device_lock, /* nothing */);
262 sh = __find_stripe(conf, sector);
263 if (!sh) {
264 if (!conf->inactive_blocked)
265 sh = get_free_stripe(conf);
266 if (noblock && sh == NULL)
267 break;
268 if (!sh) {
269 conf->inactive_blocked = 1;
270 wait_event_lock_irq(conf->wait_for_stripe,
271 !list_empty(&conf->inactive_list) &&
272 (atomic_read(&conf->active_stripes)
273 < (conf->max_nr_stripes *3/4)
274 || !conf->inactive_blocked),
275 conf->device_lock,
276 unplug_slaves(conf->mddev);
277 );
278 conf->inactive_blocked = 0;
279 } else
280 init_stripe(sh, sector, pd_idx);
281 } else {
282 if (atomic_read(&sh->count)) {
283 BUG_ON(!list_empty(&sh->lru));
284 } else {
285 if (!test_bit(STRIPE_HANDLE, &sh->state))
286 atomic_inc(&conf->active_stripes);
287 BUG_ON(list_empty(&sh->lru));
288 list_del_init(&sh->lru);
289 }
290 }
291 } while (sh == NULL);
292
293 if (sh)
294 atomic_inc(&sh->count);
295
296 spin_unlock_irq(&conf->device_lock);
297 return sh;
298}
299
300static int grow_one_stripe(raid6_conf_t *conf)
301{
302 struct stripe_head *sh;
303 sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
304 if (!sh)
305 return 0;
306 memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
307 sh->raid_conf = conf;
308 spin_lock_init(&sh->lock);
309
310 if (grow_buffers(sh, conf->raid_disks)) {
311 shrink_buffers(sh, conf->raid_disks);
312 kmem_cache_free(conf->slab_cache, sh);
313 return 0;
314 }
315 /* we just created an active stripe so... */
316 atomic_set(&sh->count, 1);
317 atomic_inc(&conf->active_stripes);
318 INIT_LIST_HEAD(&sh->lru);
319 release_stripe(sh);
320 return 1;
321}
322
323static int grow_stripes(raid6_conf_t *conf, int num)
324{
325 kmem_cache_t *sc;
326 int devs = conf->raid_disks;
327
328 sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev));
329
330 sc = kmem_cache_create(conf->cache_name[0],
331 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
332 0, 0, NULL, NULL);
333 if (!sc)
334 return 1;
335 conf->slab_cache = sc;
336 while (num--)
337 if (!grow_one_stripe(conf))
338 return 1;
339 return 0;
340}
341
342static int drop_one_stripe(raid6_conf_t *conf)
343{
344 struct stripe_head *sh;
345 spin_lock_irq(&conf->device_lock);
346 sh = get_free_stripe(conf);
347 spin_unlock_irq(&conf->device_lock);
348 if (!sh)
349 return 0;
350 BUG_ON(atomic_read(&sh->count));
351 shrink_buffers(sh, conf->raid_disks);
352 kmem_cache_free(conf->slab_cache, sh);
353 atomic_dec(&conf->active_stripes);
354 return 1;
355}
356
357static void shrink_stripes(raid6_conf_t *conf)
358{
359 while (drop_one_stripe(conf))
360 ;
361
362 if (conf->slab_cache)
363 kmem_cache_destroy(conf->slab_cache);
364 conf->slab_cache = NULL;
365}
366
367static int raid6_end_read_request(struct bio * bi, unsigned int bytes_done,
368 int error)
369{
370 struct stripe_head *sh = bi->bi_private;
371 raid6_conf_t *conf = sh->raid_conf;
372 int disks = conf->raid_disks, i;
373 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
374
375 if (bi->bi_size)
376 return 1;
377
378 for (i=0 ; i<disks; i++)
379 if (bi == &sh->dev[i].req)
380 break;
381
382 PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n",
383 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
384 uptodate);
385 if (i == disks) {
386 BUG();
387 return 0;
388 }
389
390 if (uptodate) {
391#if 0
392 struct bio *bio;
393 unsigned long flags;
394 spin_lock_irqsave(&conf->device_lock, flags);
395 /* we can return a buffer if we bypassed the cache or
396 * if the top buffer is not in highmem. If there are
397 * multiple buffers, leave the extra work to
398 * handle_stripe
399 */
400 buffer = sh->bh_read[i];
401 if (buffer &&
402 (!PageHighMem(buffer->b_page)
403 || buffer->b_page == bh->b_page )
404 ) {
405 sh->bh_read[i] = buffer->b_reqnext;
406 buffer->b_reqnext = NULL;
407 } else
408 buffer = NULL;
409 spin_unlock_irqrestore(&conf->device_lock, flags);
410 if (sh->bh_page[i]==bh->b_page)
411 set_buffer_uptodate(bh);
412 if (buffer) {
413 if (buffer->b_page != bh->b_page)
414 memcpy(buffer->b_data, bh->b_data, bh->b_size);
415 buffer->b_end_io(buffer, 1);
416 }
417#else
418 set_bit(R5_UPTODATE, &sh->dev[i].flags);
419#endif
420 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
421 printk(KERN_INFO "raid6: read error corrected!!\n");
422 clear_bit(R5_ReadError, &sh->dev[i].flags);
423 clear_bit(R5_ReWrite, &sh->dev[i].flags);
424 }
425 if (atomic_read(&conf->disks[i].rdev->read_errors))
426 atomic_set(&conf->disks[i].rdev->read_errors, 0);
427 } else {
428 int retry = 0;
429 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
430 atomic_inc(&conf->disks[i].rdev->read_errors);
431 if (conf->mddev->degraded)
432 printk(KERN_WARNING "raid6: read error not correctable.\n");
433 else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
434 /* Oh, no!!! */
435 printk(KERN_WARNING "raid6: read error NOT corrected!!\n");
436 else if (atomic_read(&conf->disks[i].rdev->read_errors)
437 > conf->max_nr_stripes)
438 printk(KERN_WARNING
439 "raid6: Too many read errors, failing device.\n");
440 else
441 retry = 1;
442 if (retry)
443 set_bit(R5_ReadError, &sh->dev[i].flags);
444 else {
445 clear_bit(R5_ReadError, &sh->dev[i].flags);
446 clear_bit(R5_ReWrite, &sh->dev[i].flags);
447 md_error(conf->mddev, conf->disks[i].rdev);
448 }
449 }
450 rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
451#if 0
452 /* must restore b_page before unlocking buffer... */
453 if (sh->bh_page[i] != bh->b_page) {
454 bh->b_page = sh->bh_page[i];
455 bh->b_data = page_address(bh->b_page);
456 clear_buffer_uptodate(bh);
457 }
458#endif
459 clear_bit(R5_LOCKED, &sh->dev[i].flags);
460 set_bit(STRIPE_HANDLE, &sh->state);
461 release_stripe(sh);
462 return 0;
463}
464
465static int raid6_end_write_request (struct bio *bi, unsigned int bytes_done,
466 int error)
467{
468 struct stripe_head *sh = bi->bi_private;
469 raid6_conf_t *conf = sh->raid_conf;
470 int disks = conf->raid_disks, i;
471 unsigned long flags;
472 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
473
474 if (bi->bi_size)
475 return 1;
476
477 for (i=0 ; i<disks; i++)
478 if (bi == &sh->dev[i].req)
479 break;
480
481 PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n",
482 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
483 uptodate);
484 if (i == disks) {
485 BUG();
486 return 0;
487 }
488
489 spin_lock_irqsave(&conf->device_lock, flags);
490 if (!uptodate)
491 md_error(conf->mddev, conf->disks[i].rdev);
492
493 rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
494
495 clear_bit(R5_LOCKED, &sh->dev[i].flags);
496 set_bit(STRIPE_HANDLE, &sh->state);
497 __release_stripe(conf, sh);
498 spin_unlock_irqrestore(&conf->device_lock, flags);
499 return 0;
500}
501
502
503static sector_t compute_blocknr(struct stripe_head *sh, int i);
504
505static void raid6_build_block (struct stripe_head *sh, int i)
506{
507 struct r5dev *dev = &sh->dev[i];
508 int pd_idx = sh->pd_idx;
509 int qd_idx = raid6_next_disk(pd_idx, sh->raid_conf->raid_disks);
510
511 bio_init(&dev->req);
512 dev->req.bi_io_vec = &dev->vec;
513 dev->req.bi_vcnt++;
514 dev->req.bi_max_vecs++;
515 dev->vec.bv_page = dev->page;
516 dev->vec.bv_len = STRIPE_SIZE;
517 dev->vec.bv_offset = 0;
518
519 dev->req.bi_sector = sh->sector;
520 dev->req.bi_private = sh;
521
522 dev->flags = 0;
523 if (i != pd_idx && i != qd_idx)
524 dev->sector = compute_blocknr(sh, i);
525}
526
527static void error(mddev_t *mddev, mdk_rdev_t *rdev)
528{
529 char b[BDEVNAME_SIZE];
530 raid6_conf_t *conf = (raid6_conf_t *) mddev->private;
531 PRINTK("raid6: error called\n");
532
533 if (!test_bit(Faulty, &rdev->flags)) {
534 mddev->sb_dirty = 1;
535 if (test_bit(In_sync, &rdev->flags)) {
536 conf->working_disks--;
537 mddev->degraded++;
538 conf->failed_disks++;
539 clear_bit(In_sync, &rdev->flags);
540 /*
541 * if recovery was running, make sure it aborts.
542 */
543 set_bit(MD_RECOVERY_ERR, &mddev->recovery);
544 }
545 set_bit(Faulty, &rdev->flags);
546 printk (KERN_ALERT
547 "raid6: Disk failure on %s, disabling device."
548 " Operation continuing on %d devices\n",
549 bdevname(rdev->bdev,b), conf->working_disks);
550 }
551}
552
553/*
554 * Input: a 'big' sector number,
555 * Output: index of the data and parity disk, and the sector # in them.
556 */
557static sector_t raid6_compute_sector(sector_t r_sector, unsigned int raid_disks,
558 unsigned int data_disks, unsigned int * dd_idx,
559 unsigned int * pd_idx, raid6_conf_t *conf)
560{
561 long stripe;
562 unsigned long chunk_number;
563 unsigned int chunk_offset;
564 sector_t new_sector;
565 int sectors_per_chunk = conf->chunk_size >> 9;
566
567 /* First compute the information on this sector */
568
569 /*
570 * Compute the chunk number and the sector offset inside the chunk
571 */
572 chunk_offset = sector_div(r_sector, sectors_per_chunk);
573 chunk_number = r_sector;
574 if ( r_sector != chunk_number ) {
575 printk(KERN_CRIT "raid6: ERROR: r_sector = %llu, chunk_number = %lu\n",
576 (unsigned long long)r_sector, (unsigned long)chunk_number);
577 BUG();
578 }
579
580 /*
581 * Compute the stripe number
582 */
583 stripe = chunk_number / data_disks;
584
585 /*
586 * Compute the data disk and parity disk indexes inside the stripe
587 */
588 *dd_idx = chunk_number % data_disks;
589
590 /*
591 * Select the parity disk based on the user selected algorithm.
592 */
593
594 /**** FIX THIS ****/
595 switch (conf->algorithm) {
596 case ALGORITHM_LEFT_ASYMMETRIC:
597 *pd_idx = raid_disks - 1 - (stripe % raid_disks);
598 if (*pd_idx == raid_disks-1)
599 (*dd_idx)++; /* Q D D D P */
600 else if (*dd_idx >= *pd_idx)
601 (*dd_idx) += 2; /* D D P Q D */
602 break;
603 case ALGORITHM_RIGHT_ASYMMETRIC:
604 *pd_idx = stripe % raid_disks;
605 if (*pd_idx == raid_disks-1)
606 (*dd_idx)++; /* Q D D D P */
607 else if (*dd_idx >= *pd_idx)
608 (*dd_idx) += 2; /* D D P Q D */
609 break;
610 case ALGORITHM_LEFT_SYMMETRIC:
611 *pd_idx = raid_disks - 1 - (stripe % raid_disks);
612 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
613 break;
614 case ALGORITHM_RIGHT_SYMMETRIC:
615 *pd_idx = stripe % raid_disks;
616 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
617 break;
618 default:
619 printk (KERN_CRIT "raid6: unsupported algorithm %d\n",
620 conf->algorithm);
621 }
622
623 PRINTK("raid6: chunk_number = %lu, pd_idx = %u, dd_idx = %u\n",
624 chunk_number, *pd_idx, *dd_idx);
625
626 /*
627 * Finally, compute the new sector number
628 */
629 new_sector = (sector_t) stripe * sectors_per_chunk + chunk_offset;
630 return new_sector;
631}
632
633
634static sector_t compute_blocknr(struct stripe_head *sh, int i)
635{
636 raid6_conf_t *conf = sh->raid_conf;
637 int raid_disks = conf->raid_disks, data_disks = raid_disks - 2;
638 sector_t new_sector = sh->sector, check;
639 int sectors_per_chunk = conf->chunk_size >> 9;
640 sector_t stripe;
641 int chunk_offset;
642 int chunk_number, dummy1, dummy2, dd_idx = i;
643 sector_t r_sector;
644 int i0 = i;
645
646 chunk_offset = sector_div(new_sector, sectors_per_chunk);
647 stripe = new_sector;
648 if ( new_sector != stripe ) {
649 printk(KERN_CRIT "raid6: ERROR: new_sector = %llu, stripe = %lu\n",
650 (unsigned long long)new_sector, (unsigned long)stripe);
651 BUG();
652 }
653
654 switch (conf->algorithm) {
655 case ALGORITHM_LEFT_ASYMMETRIC:
656 case ALGORITHM_RIGHT_ASYMMETRIC:
657 if (sh->pd_idx == raid_disks-1)
658 i--; /* Q D D D P */
659 else if (i > sh->pd_idx)
660 i -= 2; /* D D P Q D */
661 break;
662 case ALGORITHM_LEFT_SYMMETRIC:
663 case ALGORITHM_RIGHT_SYMMETRIC:
664 if (sh->pd_idx == raid_disks-1)
665 i--; /* Q D D D P */
666 else {
667 /* D D P Q D */
668 if (i < sh->pd_idx)
669 i += raid_disks;
670 i -= (sh->pd_idx + 2);
671 }
672 break;
673 default:
674 printk (KERN_CRIT "raid6: unsupported algorithm %d\n",
675 conf->algorithm);
676 }
677
678 PRINTK("raid6: compute_blocknr: pd_idx = %u, i0 = %u, i = %u\n", sh->pd_idx, i0, i);
679
680 chunk_number = stripe * data_disks + i;
681 r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset;
682
683 check = raid6_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
684 if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {
685 printk(KERN_CRIT "raid6: compute_blocknr: map not correct\n");
686 return 0;
687 }
688 return r_sector;
689}
690
691
692
693/*
694 * Copy data between a page in the stripe cache, and one or more bion
695 * The page could align with the middle of the bio, or there could be
696 * several bion, each with several bio_vecs, which cover part of the page
697 * Multiple bion are linked together on bi_next. There may be extras
698 * at the end of this list. We ignore them.
699 */
700static void copy_data(int frombio, struct bio *bio,
701 struct page *page,
702 sector_t sector)
703{
704 char *pa = page_address(page);
705 struct bio_vec *bvl;
706 int i;
707 int page_offset;
708
709 if (bio->bi_sector >= sector)
710 page_offset = (signed)(bio->bi_sector - sector) * 512;
711 else
712 page_offset = (signed)(sector - bio->bi_sector) * -512;
713 bio_for_each_segment(bvl, bio, i) {
714 int len = bio_iovec_idx(bio,i)->bv_len;
715 int clen;
716 int b_offset = 0;
717
718 if (page_offset < 0) {
719 b_offset = -page_offset;
720 page_offset += b_offset;
721 len -= b_offset;
722 }
723
724 if (len > 0 && page_offset + len > STRIPE_SIZE)
725 clen = STRIPE_SIZE - page_offset;
726 else clen = len;
727
728 if (clen > 0) {
729 char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
730 if (frombio)
731 memcpy(pa+page_offset, ba+b_offset, clen);
732 else
733 memcpy(ba+b_offset, pa+page_offset, clen);
734 __bio_kunmap_atomic(ba, KM_USER0);
735 }
736 if (clen < len) /* hit end of page */
737 break;
738 page_offset += len;
739 }
740}
741
742#define check_xor() do { \
743 if (count == MAX_XOR_BLOCKS) { \
744 xor_block(count, STRIPE_SIZE, ptr); \
745 count = 1; \
746 } \
747 } while(0)
748
749/* Compute P and Q syndromes */
750static void compute_parity(struct stripe_head *sh, int method)
751{
752 raid6_conf_t *conf = sh->raid_conf;
753 int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count;
754 struct bio *chosen;
755 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
756 void *ptrs[disks];
757
758 qd_idx = raid6_next_disk(pd_idx, disks);
759 d0_idx = raid6_next_disk(qd_idx, disks);
760
761 PRINTK("compute_parity, stripe %llu, method %d\n",
762 (unsigned long long)sh->sector, method);
763
764 switch(method) {
765 case READ_MODIFY_WRITE:
766 BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
767 case RECONSTRUCT_WRITE:
768 for (i= disks; i-- ;)
769 if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
770 chosen = sh->dev[i].towrite;
771 sh->dev[i].towrite = NULL;
772
773 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
774 wake_up(&conf->wait_for_overlap);
775
776 BUG_ON(sh->dev[i].written);
777 sh->dev[i].written = chosen;
778 }
779 break;
780 case CHECK_PARITY:
781 BUG(); /* Not implemented yet */
782 }
783
784 for (i = disks; i--;)
785 if (sh->dev[i].written) {
786 sector_t sector = sh->dev[i].sector;
787 struct bio *wbi = sh->dev[i].written;
788 while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
789 copy_data(1, wbi, sh->dev[i].page, sector);
790 wbi = r5_next_bio(wbi, sector);
791 }
792
793 set_bit(R5_LOCKED, &sh->dev[i].flags);
794 set_bit(R5_UPTODATE, &sh->dev[i].flags);
795 }
796
797// switch(method) {
798// case RECONSTRUCT_WRITE:
799// case CHECK_PARITY:
800// case UPDATE_PARITY:
801 /* Note that unlike RAID-5, the ordering of the disks matters greatly. */
802 /* FIX: Is this ordering of drives even remotely optimal? */
803 count = 0;
804 i = d0_idx;
805 do {
806 ptrs[count++] = page_address(sh->dev[i].page);
807 if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags))
808 printk("block %d/%d not uptodate on parity calc\n", i,count);
809 i = raid6_next_disk(i, disks);
810 } while ( i != d0_idx );
811// break;
812// }
813
814 raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs);
815
816 switch(method) {
817 case RECONSTRUCT_WRITE:
818 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
819 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
820 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
821 set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
822 break;
823 case UPDATE_PARITY:
824 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
825 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
826 break;
827 }
828}
829
830/* Compute one missing block */
831static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
832{
833 raid6_conf_t *conf = sh->raid_conf;
834 int i, count, disks = conf->raid_disks;
835 void *ptr[MAX_XOR_BLOCKS], *p;
836 int pd_idx = sh->pd_idx;
837 int qd_idx = raid6_next_disk(pd_idx, disks);
838
839 PRINTK("compute_block_1, stripe %llu, idx %d\n",
840 (unsigned long long)sh->sector, dd_idx);
841
842 if ( dd_idx == qd_idx ) {
843 /* We're actually computing the Q drive */
844 compute_parity(sh, UPDATE_PARITY);
845 } else {
846 ptr[0] = page_address(sh->dev[dd_idx].page);
847 if (!nozero) memset(ptr[0], 0, STRIPE_SIZE);
848 count = 1;
849 for (i = disks ; i--; ) {
850 if (i == dd_idx || i == qd_idx)
851 continue;
852 p = page_address(sh->dev[i].page);
853 if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
854 ptr[count++] = p;
855 else
856 printk("compute_block() %d, stripe %llu, %d"
857 " not present\n", dd_idx,
858 (unsigned long long)sh->sector, i);
859
860 check_xor();
861 }
862 if (count != 1)
863 xor_block(count, STRIPE_SIZE, ptr);
864 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
865 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
866 }
867}
868
869/* Compute two missing blocks */
870static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
871{
872 raid6_conf_t *conf = sh->raid_conf;
873 int i, count, disks = conf->raid_disks;
874 int pd_idx = sh->pd_idx;
875 int qd_idx = raid6_next_disk(pd_idx, disks);
876 int d0_idx = raid6_next_disk(qd_idx, disks);
877 int faila, failb;
878
879 /* faila and failb are disk numbers relative to d0_idx */
880 /* pd_idx become disks-2 and qd_idx become disks-1 */
881 faila = (dd_idx1 < d0_idx) ? dd_idx1+(disks-d0_idx) : dd_idx1-d0_idx;
882 failb = (dd_idx2 < d0_idx) ? dd_idx2+(disks-d0_idx) : dd_idx2-d0_idx;
883
884 BUG_ON(faila == failb);
885 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
886
887 PRINTK("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
888 (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb);
889
890 if ( failb == disks-1 ) {
891 /* Q disk is one of the missing disks */
892 if ( faila == disks-2 ) {
893 /* Missing P+Q, just recompute */
894 compute_parity(sh, UPDATE_PARITY);
895 return;
896 } else {
897 /* We're missing D+Q; recompute D from P */
898 compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0);
899 compute_parity(sh, UPDATE_PARITY); /* Is this necessary? */
900 return;
901 }
902 }
903
904 /* We're missing D+P or D+D; build pointer table */
905 {
906 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
907 void *ptrs[disks];
908
909 count = 0;
910 i = d0_idx;
911 do {
912 ptrs[count++] = page_address(sh->dev[i].page);
913 i = raid6_next_disk(i, disks);
914 if (i != dd_idx1 && i != dd_idx2 &&
915 !test_bit(R5_UPTODATE, &sh->dev[i].flags))
916 printk("compute_2 with missing block %d/%d\n", count, i);
917 } while ( i != d0_idx );
918
919 if ( failb == disks-2 ) {
920 /* We're missing D+P. */
921 raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs);
922 } else {
923 /* We're missing D+D. */
924 raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs);
925 }
926
927 /* Both the above update both missing blocks */
928 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
929 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
930 }
931}
932
933
934/*
935 * Each stripe/dev can have one or more bion attached.
936 * toread/towrite point to the first in a chain.
937 * The bi_next chain must be in order.
938 */
939static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
940{
941 struct bio **bip;
942 raid6_conf_t *conf = sh->raid_conf;
943 int firstwrite=0;
944
945 PRINTK("adding bh b#%llu to stripe s#%llu\n",
946 (unsigned long long)bi->bi_sector,
947 (unsigned long long)sh->sector);
948
949
950 spin_lock(&sh->lock);
951 spin_lock_irq(&conf->device_lock);
952 if (forwrite) {
953 bip = &sh->dev[dd_idx].towrite;
954 if (*bip == NULL && sh->dev[dd_idx].written == NULL)
955 firstwrite = 1;
956 } else
957 bip = &sh->dev[dd_idx].toread;
958 while (*bip && (*bip)->bi_sector < bi->bi_sector) {
959 if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
960 goto overlap;
961 bip = &(*bip)->bi_next;
962 }
963 if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9))
964 goto overlap;
965
966 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
967 if (*bip)
968 bi->bi_next = *bip;
969 *bip = bi;
970 bi->bi_phys_segments ++;
971 spin_unlock_irq(&conf->device_lock);
972 spin_unlock(&sh->lock);
973
974 PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n",
975 (unsigned long long)bi->bi_sector,
976 (unsigned long long)sh->sector, dd_idx);
977
978 if (conf->mddev->bitmap && firstwrite) {
979 sh->bm_seq = conf->seq_write;
980 bitmap_startwrite(conf->mddev->bitmap, sh->sector,
981 STRIPE_SECTORS, 0);
982 set_bit(STRIPE_BIT_DELAY, &sh->state);
983 }
984
985 if (forwrite) {
986 /* check if page is covered */
987 sector_t sector = sh->dev[dd_idx].sector;
988 for (bi=sh->dev[dd_idx].towrite;
989 sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
990 bi && bi->bi_sector <= sector;
991 bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
992 if (bi->bi_sector + (bi->bi_size>>9) >= sector)
993 sector = bi->bi_sector + (bi->bi_size>>9);
994 }
995 if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
996 set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
997 }
998 return 1;
999
1000 overlap:
1001 set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
1002 spin_unlock_irq(&conf->device_lock);
1003 spin_unlock(&sh->lock);
1004 return 0;
1005}
1006
1007
1008static int page_is_zero(struct page *p)
1009{
1010 char *a = page_address(p);
1011 return ((*(u32*)a) == 0 &&
1012 memcmp(a, a+4, STRIPE_SIZE-4)==0);
1013}
1014/*
1015 * handle_stripe - do things to a stripe.
1016 *
1017 * We lock the stripe and then examine the state of various bits
1018 * to see what needs to be done.
1019 * Possible results:
1020 * return some read request which now have data
1021 * return some write requests which are safely on disc
1022 * schedule a read on some buffers
1023 * schedule a write of some buffers
1024 * return confirmation of parity correctness
1025 *
1026 * Parity calculations are done inside the stripe lock
1027 * buffers are taken off read_list or write_list, and bh_cache buffers
1028 * get BH_Lock set before the stripe lock is released.
1029 *
1030 */
1031
1032static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
1033{
1034 raid6_conf_t *conf = sh->raid_conf;
1035 int disks = conf->raid_disks;
1036 struct bio *return_bi= NULL;
1037 struct bio *bi;
1038 int i;
1039 int syncing;
1040 int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
1041 int non_overwrite = 0;
1042 int failed_num[2] = {0, 0};
1043 struct r5dev *dev, *pdev, *qdev;
1044 int pd_idx = sh->pd_idx;
1045 int qd_idx = raid6_next_disk(pd_idx, disks);
1046 int p_failed, q_failed;
1047
1048 PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n",
1049 (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
1050 pd_idx, qd_idx);
1051
1052 spin_lock(&sh->lock);
1053 clear_bit(STRIPE_HANDLE, &sh->state);
1054 clear_bit(STRIPE_DELAYED, &sh->state);
1055
1056 syncing = test_bit(STRIPE_SYNCING, &sh->state);
1057 /* Now to look around and see what can be done */
1058
1059 rcu_read_lock();
1060 for (i=disks; i--; ) {
1061 mdk_rdev_t *rdev;
1062 dev = &sh->dev[i];
1063 clear_bit(R5_Insync, &dev->flags);
1064
1065 PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
1066 i, dev->flags, dev->toread, dev->towrite, dev->written);
1067 /* maybe we can reply to a read */
1068 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
1069 struct bio *rbi, *rbi2;
1070 PRINTK("Return read for disc %d\n", i);
1071 spin_lock_irq(&conf->device_lock);
1072 rbi = dev->toread;
1073 dev->toread = NULL;
1074 if (test_and_clear_bit(R5_Overlap, &dev->flags))
1075 wake_up(&conf->wait_for_overlap);
1076 spin_unlock_irq(&conf->device_lock);
1077 while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
1078 copy_data(0, rbi, dev->page, dev->sector);
1079 rbi2 = r5_next_bio(rbi, dev->sector);
1080 spin_lock_irq(&conf->device_lock);
1081 if (--rbi->bi_phys_segments == 0) {
1082 rbi->bi_next = return_bi;
1083 return_bi = rbi;
1084 }
1085 spin_unlock_irq(&conf->device_lock);
1086 rbi = rbi2;
1087 }
1088 }
1089
1090 /* now count some things */
1091 if (test_bit(R5_LOCKED, &dev->flags)) locked++;
1092 if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
1093
1094
1095 if (dev->toread) to_read++;
1096 if (dev->towrite) {
1097 to_write++;
1098 if (!test_bit(R5_OVERWRITE, &dev->flags))
1099 non_overwrite++;
1100 }
1101 if (dev->written) written++;
1102 rdev = rcu_dereference(conf->disks[i].rdev);
1103 if (!rdev || !test_bit(In_sync, &rdev->flags)) {
1104 /* The ReadError flag will just be confusing now */
1105 clear_bit(R5_ReadError, &dev->flags);
1106 clear_bit(R5_ReWrite, &dev->flags);
1107 }
1108 if (!rdev || !test_bit(In_sync, &rdev->flags)
1109 || test_bit(R5_ReadError, &dev->flags)) {
1110 if ( failed < 2 )
1111 failed_num[failed] = i;
1112 failed++;
1113 } else
1114 set_bit(R5_Insync, &dev->flags);
1115 }
1116 rcu_read_unlock();
1117 PRINTK("locked=%d uptodate=%d to_read=%d"
1118 " to_write=%d failed=%d failed_num=%d,%d\n",
1119 locked, uptodate, to_read, to_write, failed,
1120 failed_num[0], failed_num[1]);
1121 /* check if the array has lost >2 devices and, if so, some requests might
1122 * need to be failed
1123 */
1124 if (failed > 2 && to_read+to_write+written) {
1125 for (i=disks; i--; ) {
1126 int bitmap_end = 0;
1127
1128 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1129 mdk_rdev_t *rdev;
1130 rcu_read_lock();
1131 rdev = rcu_dereference(conf->disks[i].rdev);
1132 if (rdev && test_bit(In_sync, &rdev->flags))
1133 /* multiple read failures in one stripe */
1134 md_error(conf->mddev, rdev);
1135 rcu_read_unlock();
1136 }
1137
1138 spin_lock_irq(&conf->device_lock);
1139 /* fail all writes first */
1140 bi = sh->dev[i].towrite;
1141 sh->dev[i].towrite = NULL;
1142 if (bi) { to_write--; bitmap_end = 1; }
1143
1144 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1145 wake_up(&conf->wait_for_overlap);
1146
1147 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
1148 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
1149 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1150 if (--bi->bi_phys_segments == 0) {
1151 md_write_end(conf->mddev);
1152 bi->bi_next = return_bi;
1153 return_bi = bi;
1154 }
1155 bi = nextbi;
1156 }
1157 /* and fail all 'written' */
1158 bi = sh->dev[i].written;
1159 sh->dev[i].written = NULL;
1160 if (bi) bitmap_end = 1;
1161 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) {
1162 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
1163 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1164 if (--bi->bi_phys_segments == 0) {
1165 md_write_end(conf->mddev);
1166 bi->bi_next = return_bi;
1167 return_bi = bi;
1168 }
1169 bi = bi2;
1170 }
1171
1172 /* fail any reads if this device is non-operational */
1173 if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
1174 test_bit(R5_ReadError, &sh->dev[i].flags)) {
1175 bi = sh->dev[i].toread;
1176 sh->dev[i].toread = NULL;
1177 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1178 wake_up(&conf->wait_for_overlap);
1179 if (bi) to_read--;
1180 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
1181 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
1182 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1183 if (--bi->bi_phys_segments == 0) {
1184 bi->bi_next = return_bi;
1185 return_bi = bi;
1186 }
1187 bi = nextbi;
1188 }
1189 }
1190 spin_unlock_irq(&conf->device_lock);
1191 if (bitmap_end)
1192 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
1193 STRIPE_SECTORS, 0, 0);
1194 }
1195 }
1196 if (failed > 2 && syncing) {
1197 md_done_sync(conf->mddev, STRIPE_SECTORS,0);
1198 clear_bit(STRIPE_SYNCING, &sh->state);
1199 syncing = 0;
1200 }
1201
1202 /*
1203 * might be able to return some write requests if the parity blocks
1204 * are safe, or on a failed drive
1205 */
1206 pdev = &sh->dev[pd_idx];
1207 p_failed = (failed >= 1 && failed_num[0] == pd_idx)
1208 || (failed >= 2 && failed_num[1] == pd_idx);
1209 qdev = &sh->dev[qd_idx];
1210 q_failed = (failed >= 1 && failed_num[0] == qd_idx)
1211 || (failed >= 2 && failed_num[1] == qd_idx);
1212
1213 if ( written &&
1214 ( p_failed || ((test_bit(R5_Insync, &pdev->flags)
1215 && !test_bit(R5_LOCKED, &pdev->flags)
1216 && test_bit(R5_UPTODATE, &pdev->flags))) ) &&
1217 ( q_failed || ((test_bit(R5_Insync, &qdev->flags)
1218 && !test_bit(R5_LOCKED, &qdev->flags)
1219 && test_bit(R5_UPTODATE, &qdev->flags))) ) ) {
1220 /* any written block on an uptodate or failed drive can be
1221 * returned. Note that if we 'wrote' to a failed drive,
1222 * it will be UPTODATE, but never LOCKED, so we don't need
1223 * to test 'failed' directly.
1224 */
1225 for (i=disks; i--; )
1226 if (sh->dev[i].written) {
1227 dev = &sh->dev[i];
1228 if (!test_bit(R5_LOCKED, &dev->flags) &&
1229 test_bit(R5_UPTODATE, &dev->flags) ) {
1230 /* We can return any write requests */
1231 int bitmap_end = 0;
1232 struct bio *wbi, *wbi2;
1233 PRINTK("Return write for stripe %llu disc %d\n",
1234 (unsigned long long)sh->sector, i);
1235 spin_lock_irq(&conf->device_lock);
1236 wbi = dev->written;
1237 dev->written = NULL;
1238 while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
1239 wbi2 = r5_next_bio(wbi, dev->sector);
1240 if (--wbi->bi_phys_segments == 0) {
1241 md_write_end(conf->mddev);
1242 wbi->bi_next = return_bi;
1243 return_bi = wbi;
1244 }
1245 wbi = wbi2;
1246 }
1247 if (dev->towrite == NULL)
1248 bitmap_end = 1;
1249 spin_unlock_irq(&conf->device_lock);
1250 if (bitmap_end)
1251 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
1252 STRIPE_SECTORS,
1253 !test_bit(STRIPE_DEGRADED, &sh->state), 0);
1254 }
1255 }
1256 }
1257
1258 /* Now we might consider reading some blocks, either to check/generate
1259 * parity, or to satisfy requests
1260 * or to load a block that is being partially written.
1261 */
1262 if (to_read || non_overwrite || (to_write && failed) || (syncing && (uptodate < disks))) {
1263 for (i=disks; i--;) {
1264 dev = &sh->dev[i];
1265 if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
1266 (dev->toread ||
1267 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
1268 syncing ||
1269 (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) ||
1270 (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write))
1271 )
1272 ) {
1273 /* we would like to get this block, possibly
1274 * by computing it, but we might not be able to
1275 */
1276 if (uptodate == disks-1) {
1277 PRINTK("Computing stripe %llu block %d\n",
1278 (unsigned long long)sh->sector, i);
1279 compute_block_1(sh, i, 0);
1280 uptodate++;
1281 } else if ( uptodate == disks-2 && failed >= 2 ) {
1282 /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
1283 int other;
1284 for (other=disks; other--;) {
1285 if ( other == i )
1286 continue;
1287 if ( !test_bit(R5_UPTODATE, &sh->dev[other].flags) )
1288 break;
1289 }
1290 BUG_ON(other < 0);
1291 PRINTK("Computing stripe %llu blocks %d,%d\n",
1292 (unsigned long long)sh->sector, i, other);
1293 compute_block_2(sh, i, other);
1294 uptodate += 2;
1295 } else if (test_bit(R5_Insync, &dev->flags)) {
1296 set_bit(R5_LOCKED, &dev->flags);
1297 set_bit(R5_Wantread, &dev->flags);
1298#if 0
1299 /* if I am just reading this block and we don't have
1300 a failed drive, or any pending writes then sidestep the cache */
1301 if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
1302 ! syncing && !failed && !to_write) {
1303 sh->bh_cache[i]->b_page = sh->bh_read[i]->b_page;
1304 sh->bh_cache[i]->b_data = sh->bh_read[i]->b_data;
1305 }
1306#endif
1307 locked++;
1308 PRINTK("Reading block %d (sync=%d)\n",
1309 i, syncing);
1310 }
1311 }
1312 }
1313 set_bit(STRIPE_HANDLE, &sh->state);
1314 }
1315
1316 /* now to consider writing and what else, if anything should be read */
1317 if (to_write) {
1318 int rcw=0, must_compute=0;
1319 for (i=disks ; i--;) {
1320 dev = &sh->dev[i];
1321 /* Would I have to read this buffer for reconstruct_write */
1322 if (!test_bit(R5_OVERWRITE, &dev->flags)
1323 && i != pd_idx && i != qd_idx
1324 && (!test_bit(R5_LOCKED, &dev->flags)
1325#if 0
1326 || sh->bh_page[i] != bh->b_page
1327#endif
1328 ) &&
1329 !test_bit(R5_UPTODATE, &dev->flags)) {
1330 if (test_bit(R5_Insync, &dev->flags)) rcw++;
1331 else {
1332 PRINTK("raid6: must_compute: disk %d flags=%#lx\n", i, dev->flags);
1333 must_compute++;
1334 }
1335 }
1336 }
1337 PRINTK("for sector %llu, rcw=%d, must_compute=%d\n",
1338 (unsigned long long)sh->sector, rcw, must_compute);
1339 set_bit(STRIPE_HANDLE, &sh->state);
1340
1341 if (rcw > 0)
1342 /* want reconstruct write, but need to get some data */
1343 for (i=disks; i--;) {
1344 dev = &sh->dev[i];
1345 if (!test_bit(R5_OVERWRITE, &dev->flags)
1346 && !(failed == 0 && (i == pd_idx || i == qd_idx))
1347 && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
1348 test_bit(R5_Insync, &dev->flags)) {
1349 if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
1350 {
1351 PRINTK("Read_old stripe %llu block %d for Reconstruct\n",
1352 (unsigned long long)sh->sector, i);
1353 set_bit(R5_LOCKED, &dev->flags);
1354 set_bit(R5_Wantread, &dev->flags);
1355 locked++;
1356 } else {
1357 PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
1358 (unsigned long long)sh->sector, i);
1359 set_bit(STRIPE_DELAYED, &sh->state);
1360 set_bit(STRIPE_HANDLE, &sh->state);
1361 }
1362 }
1363 }
1364 /* now if nothing is locked, and if we have enough data, we can start a write request */
1365 if (locked == 0 && rcw == 0 &&
1366 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
1367 if ( must_compute > 0 ) {
1368 /* We have failed blocks and need to compute them */
1369 switch ( failed ) {
1370 case 0: BUG();
1371 case 1: compute_block_1(sh, failed_num[0], 0); break;
1372 case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
1373 default: BUG(); /* This request should have been failed? */
1374 }
1375 }
1376
1377 PRINTK("Computing parity for stripe %llu\n", (unsigned long long)sh->sector);
1378 compute_parity(sh, RECONSTRUCT_WRITE);
1379 /* now every locked buffer is ready to be written */
1380 for (i=disks; i--;)
1381 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
1382 PRINTK("Writing stripe %llu block %d\n",
1383 (unsigned long long)sh->sector, i);
1384 locked++;
1385 set_bit(R5_Wantwrite, &sh->dev[i].flags);
1386 }
1387 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
1388 set_bit(STRIPE_INSYNC, &sh->state);
1389
1390 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
1391 atomic_dec(&conf->preread_active_stripes);
1392 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
1393 md_wakeup_thread(conf->mddev->thread);
1394 }
1395 }
1396 }
1397
1398 /* maybe we need to check and possibly fix the parity for this stripe
1399 * Any reads will already have been scheduled, so we just see if enough data
1400 * is available
1401 */
1402 if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
1403 int update_p = 0, update_q = 0;
1404 struct r5dev *dev;
1405
1406 set_bit(STRIPE_HANDLE, &sh->state);
1407
1408 BUG_ON(failed>2);
1409 BUG_ON(uptodate < disks);
1410 /* Want to check and possibly repair P and Q.
1411 * However there could be one 'failed' device, in which
1412 * case we can only check one of them, possibly using the
1413 * other to generate missing data
1414 */
1415
1416 /* If !tmp_page, we cannot do the calculations,
1417 * but as we have set STRIPE_HANDLE, we will soon be called
1418 * by stripe_handle with a tmp_page - just wait until then.
1419 */
1420 if (tmp_page) {
1421 if (failed == q_failed) {
1422 /* The only possible failed device holds 'Q', so it makes
1423 * sense to check P (If anything else were failed, we would
1424 * have used P to recreate it).
1425 */
1426 compute_block_1(sh, pd_idx, 1);
1427 if (!page_is_zero(sh->dev[pd_idx].page)) {
1428 compute_block_1(sh,pd_idx,0);
1429 update_p = 1;
1430 }
1431 }
1432 if (!q_failed && failed < 2) {
1433 /* q is not failed, and we didn't use it to generate
1434 * anything, so it makes sense to check it
1435 */
1436 memcpy(page_address(tmp_page),
1437 page_address(sh->dev[qd_idx].page),
1438 STRIPE_SIZE);
1439 compute_parity(sh, UPDATE_PARITY);
1440 if (memcmp(page_address(tmp_page),
1441 page_address(sh->dev[qd_idx].page),
1442 STRIPE_SIZE)!= 0) {
1443 clear_bit(STRIPE_INSYNC, &sh->state);
1444 update_q = 1;
1445 }
1446 }
1447 if (update_p || update_q) {
1448 conf->mddev->resync_mismatches += STRIPE_SECTORS;
1449 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
1450 /* don't try to repair!! */
1451 update_p = update_q = 0;
1452 }
1453
1454 /* now write out any block on a failed drive,
1455 * or P or Q if they need it
1456 */
1457
1458 if (failed == 2) {
1459 dev = &sh->dev[failed_num[1]];
1460 locked++;
1461 set_bit(R5_LOCKED, &dev->flags);
1462 set_bit(R5_Wantwrite, &dev->flags);
1463 }
1464 if (failed >= 1) {
1465 dev = &sh->dev[failed_num[0]];
1466 locked++;
1467 set_bit(R5_LOCKED, &dev->flags);
1468 set_bit(R5_Wantwrite, &dev->flags);
1469 }
1470
1471 if (update_p) {
1472 dev = &sh->dev[pd_idx];
1473 locked ++;
1474 set_bit(R5_LOCKED, &dev->flags);
1475 set_bit(R5_Wantwrite, &dev->flags);
1476 }
1477 if (update_q) {
1478 dev = &sh->dev[qd_idx];
1479 locked++;
1480 set_bit(R5_LOCKED, &dev->flags);
1481 set_bit(R5_Wantwrite, &dev->flags);
1482 }
1483 clear_bit(STRIPE_DEGRADED, &sh->state);
1484
1485 set_bit(STRIPE_INSYNC, &sh->state);
1486 }
1487 }
1488
1489 if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
1490 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
1491 clear_bit(STRIPE_SYNCING, &sh->state);
1492 }
1493
1494 /* If the failed drives are just a ReadError, then we might need
1495 * to progress the repair/check process
1496 */
1497 if (failed <= 2 && ! conf->mddev->ro)
1498 for (i=0; i<failed;i++) {
1499 dev = &sh->dev[failed_num[i]];
1500 if (test_bit(R5_ReadError, &dev->flags)
1501 && !test_bit(R5_LOCKED, &dev->flags)
1502 && test_bit(R5_UPTODATE, &dev->flags)
1503 ) {
1504 if (!test_bit(R5_ReWrite, &dev->flags)) {
1505 set_bit(R5_Wantwrite, &dev->flags);
1506 set_bit(R5_ReWrite, &dev->flags);
1507 set_bit(R5_LOCKED, &dev->flags);
1508 } else {
1509 /* let's read it back */
1510 set_bit(R5_Wantread, &dev->flags);
1511 set_bit(R5_LOCKED, &dev->flags);
1512 }
1513 }
1514 }
1515 spin_unlock(&sh->lock);
1516
1517 while ((bi=return_bi)) {
1518 int bytes = bi->bi_size;
1519
1520 return_bi = bi->bi_next;
1521 bi->bi_next = NULL;
1522 bi->bi_size = 0;
1523 bi->bi_end_io(bi, bytes, 0);
1524 }
1525 for (i=disks; i-- ;) {
1526 int rw;
1527 struct bio *bi;
1528 mdk_rdev_t *rdev;
1529 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
1530 rw = 1;
1531 else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
1532 rw = 0;
1533 else
1534 continue;
1535
1536 bi = &sh->dev[i].req;
1537
1538 bi->bi_rw = rw;
1539 if (rw)
1540 bi->bi_end_io = raid6_end_write_request;
1541 else
1542 bi->bi_end_io = raid6_end_read_request;
1543
1544 rcu_read_lock();
1545 rdev = rcu_dereference(conf->disks[i].rdev);
1546 if (rdev && test_bit(Faulty, &rdev->flags))
1547 rdev = NULL;
1548 if (rdev)
1549 atomic_inc(&rdev->nr_pending);
1550 rcu_read_unlock();
1551
1552 if (rdev) {
1553 if (syncing)
1554 md_sync_acct(rdev->bdev, STRIPE_SECTORS);
1555
1556 bi->bi_bdev = rdev->bdev;
1557 PRINTK("for %llu schedule op %ld on disc %d\n",
1558 (unsigned long long)sh->sector, bi->bi_rw, i);
1559 atomic_inc(&sh->count);
1560 bi->bi_sector = sh->sector + rdev->data_offset;
1561 bi->bi_flags = 1 << BIO_UPTODATE;
1562 bi->bi_vcnt = 1;
1563 bi->bi_max_vecs = 1;
1564 bi->bi_idx = 0;
1565 bi->bi_io_vec = &sh->dev[i].vec;
1566 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
1567 bi->bi_io_vec[0].bv_offset = 0;
1568 bi->bi_size = STRIPE_SIZE;
1569 bi->bi_next = NULL;
1570 if (rw == WRITE &&
1571 test_bit(R5_ReWrite, &sh->dev[i].flags))
1572 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
1573 generic_make_request(bi);
1574 } else {
1575 if (rw == 1)
1576 set_bit(STRIPE_DEGRADED, &sh->state);
1577 PRINTK("skip op %ld on disc %d for sector %llu\n",
1578 bi->bi_rw, i, (unsigned long long)sh->sector);
1579 clear_bit(R5_LOCKED, &sh->dev[i].flags);
1580 set_bit(STRIPE_HANDLE, &sh->state);
1581 }
1582 }
1583}
1584
1585static void raid6_activate_delayed(raid6_conf_t *conf)
1586{
1587 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
1588 while (!list_empty(&conf->delayed_list)) {
1589 struct list_head *l = conf->delayed_list.next;
1590 struct stripe_head *sh;
1591 sh = list_entry(l, struct stripe_head, lru);
1592 list_del_init(l);
1593 clear_bit(STRIPE_DELAYED, &sh->state);
1594 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
1595 atomic_inc(&conf->preread_active_stripes);
1596 list_add_tail(&sh->lru, &conf->handle_list);
1597 }
1598 }
1599}
1600
1601static void activate_bit_delay(raid6_conf_t *conf)
1602{
1603 /* device_lock is held */
1604 struct list_head head;
1605 list_add(&head, &conf->bitmap_list);
1606 list_del_init(&conf->bitmap_list);
1607 while (!list_empty(&head)) {
1608 struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
1609 list_del_init(&sh->lru);
1610 atomic_inc(&sh->count);
1611 __release_stripe(conf, sh);
1612 }
1613}
1614
1615static void unplug_slaves(mddev_t *mddev)
1616{
1617 raid6_conf_t *conf = mddev_to_conf(mddev);
1618 int i;
1619
1620 rcu_read_lock();
1621 for (i=0; i<mddev->raid_disks; i++) {
1622 mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
1623 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
1624 request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
1625
1626 atomic_inc(&rdev->nr_pending);
1627 rcu_read_unlock();
1628
1629 if (r_queue->unplug_fn)
1630 r_queue->unplug_fn(r_queue);
1631
1632 rdev_dec_pending(rdev, mddev);
1633 rcu_read_lock();
1634 }
1635 }
1636 rcu_read_unlock();
1637}
1638
1639static void raid6_unplug_device(request_queue_t *q)
1640{
1641 mddev_t *mddev = q->queuedata;
1642 raid6_conf_t *conf = mddev_to_conf(mddev);
1643 unsigned long flags;
1644
1645 spin_lock_irqsave(&conf->device_lock, flags);
1646
1647 if (blk_remove_plug(q)) {
1648 conf->seq_flush++;
1649 raid6_activate_delayed(conf);
1650 }
1651 md_wakeup_thread(mddev->thread);
1652
1653 spin_unlock_irqrestore(&conf->device_lock, flags);
1654
1655 unplug_slaves(mddev);
1656}
1657
1658static int raid6_issue_flush(request_queue_t *q, struct gendisk *disk,
1659 sector_t *error_sector)
1660{
1661 mddev_t *mddev = q->queuedata;
1662 raid6_conf_t *conf = mddev_to_conf(mddev);
1663 int i, ret = 0;
1664
1665 rcu_read_lock();
1666 for (i=0; i<mddev->raid_disks && ret == 0; i++) {
1667 mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
1668 if (rdev && !test_bit(Faulty, &rdev->flags)) {
1669 struct block_device *bdev = rdev->bdev;
1670 request_queue_t *r_queue = bdev_get_queue(bdev);
1671
1672 if (!r_queue->issue_flush_fn)
1673 ret = -EOPNOTSUPP;
1674 else {
1675 atomic_inc(&rdev->nr_pending);
1676 rcu_read_unlock();
1677 ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
1678 error_sector);
1679 rdev_dec_pending(rdev, mddev);
1680 rcu_read_lock();
1681 }
1682 }
1683 }
1684 rcu_read_unlock();
1685 return ret;
1686}
1687
1688static inline void raid6_plug_device(raid6_conf_t *conf)
1689{
1690 spin_lock_irq(&conf->device_lock);
1691 blk_plug_device(conf->mddev->queue);
1692 spin_unlock_irq(&conf->device_lock);
1693}
1694
1695static int make_request (request_queue_t *q, struct bio * bi)
1696{
1697 mddev_t *mddev = q->queuedata;
1698 raid6_conf_t *conf = mddev_to_conf(mddev);
1699 const unsigned int raid_disks = conf->raid_disks;
1700 const unsigned int data_disks = raid_disks - 2;
1701 unsigned int dd_idx, pd_idx;
1702 sector_t new_sector;
1703 sector_t logical_sector, last_sector;
1704 struct stripe_head *sh;
1705 const int rw = bio_data_dir(bi);
1706
1707 if (unlikely(bio_barrier(bi))) {
1708 bio_endio(bi, bi->bi_size, -EOPNOTSUPP);
1709 return 0;
1710 }
1711
1712 md_write_start(mddev, bi);
1713
1714 disk_stat_inc(mddev->gendisk, ios[rw]);
1715 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
1716
1717 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
1718 last_sector = bi->bi_sector + (bi->bi_size>>9);
1719
1720 bi->bi_next = NULL;
1721 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
1722
1723 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
1724 DEFINE_WAIT(w);
1725
1726 new_sector = raid6_compute_sector(logical_sector,
1727 raid_disks, data_disks, &dd_idx, &pd_idx, conf);
1728
1729 PRINTK("raid6: make_request, sector %llu logical %llu\n",
1730 (unsigned long long)new_sector,
1731 (unsigned long long)logical_sector);
1732
1733 retry:
1734 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
1735 sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK));
1736 if (sh) {
1737 if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
1738 /* Add failed due to overlap. Flush everything
1739 * and wait a while
1740 */
1741 raid6_unplug_device(mddev->queue);
1742 release_stripe(sh);
1743 schedule();
1744 goto retry;
1745 }
1746 finish_wait(&conf->wait_for_overlap, &w);
1747 raid6_plug_device(conf);
1748 handle_stripe(sh, NULL);
1749 release_stripe(sh);
1750 } else {
1751 /* cannot get stripe for read-ahead, just give-up */
1752 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1753 finish_wait(&conf->wait_for_overlap, &w);
1754 break;
1755 }
1756
1757 }
1758 spin_lock_irq(&conf->device_lock);
1759 if (--bi->bi_phys_segments == 0) {
1760 int bytes = bi->bi_size;
1761
1762 if (rw == WRITE )
1763 md_write_end(mddev);
1764 bi->bi_size = 0;
1765 bi->bi_end_io(bi, bytes, 0);
1766 }
1767 spin_unlock_irq(&conf->device_lock);
1768 return 0;
1769}
1770
1771/* FIXME go_faster isn't used */
1772static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
1773{
1774 raid6_conf_t *conf = (raid6_conf_t *) mddev->private;
1775 struct stripe_head *sh;
1776 int sectors_per_chunk = conf->chunk_size >> 9;
1777 sector_t x;
1778 unsigned long stripe;
1779 int chunk_offset;
1780 int dd_idx, pd_idx;
1781 sector_t first_sector;
1782 int raid_disks = conf->raid_disks;
1783 int data_disks = raid_disks - 2;
1784 sector_t max_sector = mddev->size << 1;
1785 int sync_blocks;
1786 int still_degraded = 0;
1787 int i;
1788
1789 if (sector_nr >= max_sector) {
1790 /* just being told to finish up .. nothing much to do */
1791 unplug_slaves(mddev);
1792
1793 if (mddev->curr_resync < max_sector) /* aborted */
1794 bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
1795 &sync_blocks, 1);
1796 else /* completed sync */
1797 conf->fullsync = 0;
1798 bitmap_close_sync(mddev->bitmap);
1799
1800 return 0;
1801 }
1802 /* if there are 2 or more failed drives and we are trying
1803 * to resync, then assert that we are finished, because there is
1804 * nothing we can do.
1805 */
1806 if (mddev->degraded >= 2 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
1807 sector_t rv = (mddev->size << 1) - sector_nr;
1808 *skipped = 1;
1809 return rv;
1810 }
1811 if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
1812 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
1813 !conf->fullsync && sync_blocks >= STRIPE_SECTORS) {
1814 /* we can skip this block, and probably more */
1815 sync_blocks /= STRIPE_SECTORS;
1816 *skipped = 1;
1817 return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
1818 }
1819
1820 x = sector_nr;
1821 chunk_offset = sector_div(x, sectors_per_chunk);
1822 stripe = x;
1823 BUG_ON(x != stripe);
1824
1825 first_sector = raid6_compute_sector((sector_t)stripe*data_disks*sectors_per_chunk
1826 + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
1827 sh = get_active_stripe(conf, sector_nr, pd_idx, 1);
1828 if (sh == NULL) {
1829 sh = get_active_stripe(conf, sector_nr, pd_idx, 0);
1830 /* make sure we don't swamp the stripe cache if someone else
1831 * is trying to get access
1832 */
1833 schedule_timeout_uninterruptible(1);
1834 }
1835 /* Need to check if array will still be degraded after recovery/resync
1836 * We don't need to check the 'failed' flag as when that gets set,
1837 * recovery aborts.
1838 */
1839 for (i=0; i<mddev->raid_disks; i++)
1840 if (conf->disks[i].rdev == NULL)
1841 still_degraded = 1;
1842
1843 bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
1844
1845 spin_lock(&sh->lock);
1846 set_bit(STRIPE_SYNCING, &sh->state);
1847 clear_bit(STRIPE_INSYNC, &sh->state);
1848 spin_unlock(&sh->lock);
1849
1850 handle_stripe(sh, NULL);
1851 release_stripe(sh);
1852
1853 return STRIPE_SECTORS;
1854}
1855
1856/*
1857 * This is our raid6 kernel thread.
1858 *
1859 * We scan the hash table for stripes which can be handled now.
1860 * During the scan, completed stripes are saved for us by the interrupt
1861 * handler, so that they will not have to wait for our next wakeup.
1862 */
1863static void raid6d (mddev_t *mddev)
1864{
1865 struct stripe_head *sh;
1866 raid6_conf_t *conf = mddev_to_conf(mddev);
1867 int handled;
1868
1869 PRINTK("+++ raid6d active\n");
1870
1871 md_check_recovery(mddev);
1872
1873 handled = 0;
1874 spin_lock_irq(&conf->device_lock);
1875 while (1) {
1876 struct list_head *first;
1877
1878 if (conf->seq_flush - conf->seq_write > 0) {
1879 int seq = conf->seq_flush;
1880 spin_unlock_irq(&conf->device_lock);
1881 bitmap_unplug(mddev->bitmap);
1882 spin_lock_irq(&conf->device_lock);
1883 conf->seq_write = seq;
1884 activate_bit_delay(conf);
1885 }
1886
1887 if (list_empty(&conf->handle_list) &&
1888 atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD &&
1889 !blk_queue_plugged(mddev->queue) &&
1890 !list_empty(&conf->delayed_list))
1891 raid6_activate_delayed(conf);
1892
1893 if (list_empty(&conf->handle_list))
1894 break;
1895
1896 first = conf->handle_list.next;
1897 sh = list_entry(first, struct stripe_head, lru);
1898
1899 list_del_init(first);
1900 atomic_inc(&sh->count);
1901 BUG_ON(atomic_read(&sh->count)!= 1);
1902 spin_unlock_irq(&conf->device_lock);
1903
1904 handled++;
1905 handle_stripe(sh, conf->spare_page);
1906 release_stripe(sh);
1907
1908 spin_lock_irq(&conf->device_lock);
1909 }
1910 PRINTK("%d stripes handled\n", handled);
1911
1912 spin_unlock_irq(&conf->device_lock);
1913
1914 unplug_slaves(mddev);
1915
1916 PRINTK("--- raid6d inactive\n");
1917}
1918
1919static ssize_t
1920raid6_show_stripe_cache_size(mddev_t *mddev, char *page)
1921{
1922 raid6_conf_t *conf = mddev_to_conf(mddev);
1923 if (conf)
1924 return sprintf(page, "%d\n", conf->max_nr_stripes);
1925 else
1926 return 0;
1927}
1928
1929static ssize_t
1930raid6_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
1931{
1932 raid6_conf_t *conf = mddev_to_conf(mddev);
1933 char *end;
1934 int new;
1935 if (len >= PAGE_SIZE)
1936 return -EINVAL;
1937 if (!conf)
1938 return -ENODEV;
1939
1940 new = simple_strtoul(page, &end, 10);
1941 if (!*page || (*end && *end != '\n') )
1942 return -EINVAL;
1943 if (new <= 16 || new > 32768)
1944 return -EINVAL;
1945 while (new < conf->max_nr_stripes) {
1946 if (drop_one_stripe(conf))
1947 conf->max_nr_stripes--;
1948 else
1949 break;
1950 }
1951 while (new > conf->max_nr_stripes) {
1952 if (grow_one_stripe(conf))
1953 conf->max_nr_stripes++;
1954 else break;
1955 }
1956 return len;
1957}
1958
1959static struct md_sysfs_entry
1960raid6_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
1961 raid6_show_stripe_cache_size,
1962 raid6_store_stripe_cache_size);
1963
1964static ssize_t
1965stripe_cache_active_show(mddev_t *mddev, char *page)
1966{
1967 raid6_conf_t *conf = mddev_to_conf(mddev);
1968 if (conf)
1969 return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
1970 else
1971 return 0;
1972}
1973
1974static struct md_sysfs_entry
1975raid6_stripecache_active = __ATTR_RO(stripe_cache_active);
1976
1977static struct attribute *raid6_attrs[] = {
1978 &raid6_stripecache_size.attr,
1979 &raid6_stripecache_active.attr,
1980 NULL,
1981};
1982static struct attribute_group raid6_attrs_group = {
1983 .name = NULL,
1984 .attrs = raid6_attrs,
1985};
1986
1987static int run(mddev_t *mddev)
1988{
1989 raid6_conf_t *conf;
1990 int raid_disk, memory;
1991 mdk_rdev_t *rdev;
1992 struct disk_info *disk;
1993 struct list_head *tmp;
1994
1995 if (mddev->level != 6) {
1996 PRINTK("raid6: %s: raid level not set to 6 (%d)\n", mdname(mddev), mddev->level);
1997 return -EIO;
1998 }
1999
2000 mddev->private = kzalloc(sizeof (raid6_conf_t), GFP_KERNEL);
2001 if ((conf = mddev->private) == NULL)
2002 goto abort;
2003 conf->disks = kzalloc(mddev->raid_disks * sizeof(struct disk_info),
2004 GFP_KERNEL);
2005 if (!conf->disks)
2006 goto abort;
2007
2008 conf->mddev = mddev;
2009
2010 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
2011 goto abort;
2012
2013 conf->spare_page = alloc_page(GFP_KERNEL);
2014 if (!conf->spare_page)
2015 goto abort;
2016
2017 spin_lock_init(&conf->device_lock);
2018 init_waitqueue_head(&conf->wait_for_stripe);
2019 init_waitqueue_head(&conf->wait_for_overlap);
2020 INIT_LIST_HEAD(&conf->handle_list);
2021 INIT_LIST_HEAD(&conf->delayed_list);
2022 INIT_LIST_HEAD(&conf->bitmap_list);
2023 INIT_LIST_HEAD(&conf->inactive_list);
2024 atomic_set(&conf->active_stripes, 0);
2025 atomic_set(&conf->preread_active_stripes, 0);
2026
2027 PRINTK("raid6: run(%s) called.\n", mdname(mddev));
2028
2029 ITERATE_RDEV(mddev,rdev,tmp) {
2030 raid_disk = rdev->raid_disk;
2031 if (raid_disk >= mddev->raid_disks
2032 || raid_disk < 0)
2033 continue;
2034 disk = conf->disks + raid_disk;
2035
2036 disk->rdev = rdev;
2037
2038 if (test_bit(In_sync, &rdev->flags)) {
2039 char b[BDEVNAME_SIZE];
2040 printk(KERN_INFO "raid6: device %s operational as raid"
2041 " disk %d\n", bdevname(rdev->bdev,b),
2042 raid_disk);
2043 conf->working_disks++;
2044 }
2045 }
2046
2047 conf->raid_disks = mddev->raid_disks;
2048
2049 /*
2050 * 0 for a fully functional array, 1 or 2 for a degraded array.
2051 */
2052 mddev->degraded = conf->failed_disks = conf->raid_disks - conf->working_disks;
2053 conf->mddev = mddev;
2054 conf->chunk_size = mddev->chunk_size;
2055 conf->level = mddev->level;
2056 conf->algorithm = mddev->layout;
2057 conf->max_nr_stripes = NR_STRIPES;
2058
2059 /* device size must be a multiple of chunk size */
2060 mddev->size &= ~(mddev->chunk_size/1024 -1);
2061 mddev->resync_max_sectors = mddev->size << 1;
2062
2063 if (conf->raid_disks < 4) {
2064 printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
2065 mdname(mddev), conf->raid_disks);
2066 goto abort;
2067 }
2068 if (!conf->chunk_size || conf->chunk_size % 4) {
2069 printk(KERN_ERR "raid6: invalid chunk size %d for %s\n",
2070 conf->chunk_size, mdname(mddev));
2071 goto abort;
2072 }
2073 if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) {
2074 printk(KERN_ERR
2075 "raid6: unsupported parity algorithm %d for %s\n",
2076 conf->algorithm, mdname(mddev));
2077 goto abort;
2078 }
2079 if (mddev->degraded > 2) {
2080 printk(KERN_ERR "raid6: not enough operational devices for %s"
2081 " (%d/%d failed)\n",
2082 mdname(mddev), conf->failed_disks, conf->raid_disks);
2083 goto abort;
2084 }
2085
2086 if (mddev->degraded > 0 &&
2087 mddev->recovery_cp != MaxSector) {
2088 if (mddev->ok_start_degraded)
2089 printk(KERN_WARNING "raid6: starting dirty degraded array:%s"
2090 "- data corruption possible.\n",
2091 mdname(mddev));
2092 else {
2093 printk(KERN_ERR "raid6: cannot start dirty degraded array"
2094 " for %s\n", mdname(mddev));
2095 goto abort;
2096 }
2097 }
2098
2099 {
2100 mddev->thread = md_register_thread(raid6d, mddev, "%s_raid6");
2101 if (!mddev->thread) {
2102 printk(KERN_ERR
2103 "raid6: couldn't allocate thread for %s\n",
2104 mdname(mddev));
2105 goto abort;
2106 }
2107 }
2108
2109 memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
2110 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
2111 if (grow_stripes(conf, conf->max_nr_stripes)) {
2112 printk(KERN_ERR
2113 "raid6: couldn't allocate %dkB for buffers\n", memory);
2114 shrink_stripes(conf);
2115 md_unregister_thread(mddev->thread);
2116 goto abort;
2117 } else
2118 printk(KERN_INFO "raid6: allocated %dkB for %s\n",
2119 memory, mdname(mddev));
2120
2121 if (mddev->degraded == 0)
2122 printk(KERN_INFO "raid6: raid level %d set %s active with %d out of %d"
2123 " devices, algorithm %d\n", conf->level, mdname(mddev),
2124 mddev->raid_disks-mddev->degraded, mddev->raid_disks,
2125 conf->algorithm);
2126 else
2127 printk(KERN_ALERT "raid6: raid level %d set %s active with %d"
2128 " out of %d devices, algorithm %d\n", conf->level,
2129 mdname(mddev), mddev->raid_disks - mddev->degraded,
2130 mddev->raid_disks, conf->algorithm);
2131
2132 print_raid6_conf(conf);
2133
2134 /* read-ahead size must cover two whole stripes, which is
2135 * 2 * (n-2) * chunksize where 'n' is the number of raid devices
2136 */
2137 {
2138 int stripe = (mddev->raid_disks-2) * mddev->chunk_size
2139 / PAGE_SIZE;
2140 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
2141 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
2142 }
2143
2144 /* Ok, everything is just fine now */
2145 sysfs_create_group(&mddev->kobj, &raid6_attrs_group);
2146
2147 mddev->array_size = mddev->size * (mddev->raid_disks - 2);
2148
2149 mddev->queue->unplug_fn = raid6_unplug_device;
2150 mddev->queue->issue_flush_fn = raid6_issue_flush;
2151 return 0;
2152abort:
2153 if (conf) {
2154 print_raid6_conf(conf);
2155 safe_put_page(conf->spare_page);
2156 kfree(conf->stripe_hashtbl);
2157 kfree(conf->disks);
2158 kfree(conf);
2159 }
2160 mddev->private = NULL;
2161 printk(KERN_ALERT "raid6: failed to run raid set %s\n", mdname(mddev));
2162 return -EIO;
2163}
2164
2165
2166
2167static int stop (mddev_t *mddev)
2168{
2169 raid6_conf_t *conf = (raid6_conf_t *) mddev->private;
2170
2171 md_unregister_thread(mddev->thread);
2172 mddev->thread = NULL;
2173 shrink_stripes(conf);
2174 kfree(conf->stripe_hashtbl);
2175 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
2176 sysfs_remove_group(&mddev->kobj, &raid6_attrs_group);
2177 kfree(conf);
2178 mddev->private = NULL;
2179 return 0;
2180}
2181
2182#if RAID6_DUMPSTATE
2183static void print_sh (struct seq_file *seq, struct stripe_head *sh)
2184{
2185 int i;
2186
2187 seq_printf(seq, "sh %llu, pd_idx %d, state %ld.\n",
2188 (unsigned long long)sh->sector, sh->pd_idx, sh->state);
2189 seq_printf(seq, "sh %llu, count %d.\n",
2190 (unsigned long long)sh->sector, atomic_read(&sh->count));
2191 seq_printf(seq, "sh %llu, ", (unsigned long long)sh->sector);
2192 for (i = 0; i < sh->raid_conf->raid_disks; i++) {
2193 seq_printf(seq, "(cache%d: %p %ld) ",
2194 i, sh->dev[i].page, sh->dev[i].flags);
2195 }
2196 seq_printf(seq, "\n");
2197}
2198
2199static void printall (struct seq_file *seq, raid6_conf_t *conf)
2200{
2201 struct stripe_head *sh;
2202 struct hlist_node *hn;
2203 int i;
2204
2205 spin_lock_irq(&conf->device_lock);
2206 for (i = 0; i < NR_HASH; i++) {
2207 sh = conf->stripe_hashtbl[i];
2208 hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) {
2209 if (sh->raid_conf != conf)
2210 continue;
2211 print_sh(seq, sh);
2212 }
2213 }
2214 spin_unlock_irq(&conf->device_lock);
2215}
2216#endif
2217
2218static void status (struct seq_file *seq, mddev_t *mddev)
2219{
2220 raid6_conf_t *conf = (raid6_conf_t *) mddev->private;
2221 int i;
2222
2223 seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout);
2224 seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->working_disks);
2225 for (i = 0; i < conf->raid_disks; i++)
2226 seq_printf (seq, "%s",
2227 conf->disks[i].rdev &&
2228 test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
2229 seq_printf (seq, "]");
2230#if RAID6_DUMPSTATE
2231 seq_printf (seq, "\n");
2232 printall(seq, conf);
2233#endif
2234}
2235
2236static void print_raid6_conf (raid6_conf_t *conf)
2237{
2238 int i;
2239 struct disk_info *tmp;
2240
2241 printk("RAID6 conf printout:\n");
2242 if (!conf) {
2243 printk("(conf==NULL)\n");
2244 return;
2245 }
2246 printk(" --- rd:%d wd:%d fd:%d\n", conf->raid_disks,
2247 conf->working_disks, conf->failed_disks);
2248
2249 for (i = 0; i < conf->raid_disks; i++) {
2250 char b[BDEVNAME_SIZE];
2251 tmp = conf->disks + i;
2252 if (tmp->rdev)
2253 printk(" disk %d, o:%d, dev:%s\n",
2254 i, !test_bit(Faulty, &tmp->rdev->flags),
2255 bdevname(tmp->rdev->bdev,b));
2256 }
2257}
2258
2259static int raid6_spare_active(mddev_t *mddev)
2260{
2261 int i;
2262 raid6_conf_t *conf = mddev->private;
2263 struct disk_info *tmp;
2264
2265 for (i = 0; i < conf->raid_disks; i++) {
2266 tmp = conf->disks + i;
2267 if (tmp->rdev
2268 && !test_bit(Faulty, &tmp->rdev->flags)
2269 && !test_bit(In_sync, &tmp->rdev->flags)) {
2270 mddev->degraded--;
2271 conf->failed_disks--;
2272 conf->working_disks++;
2273 set_bit(In_sync, &tmp->rdev->flags);
2274 }
2275 }
2276 print_raid6_conf(conf);
2277 return 0;
2278}
2279
2280static int raid6_remove_disk(mddev_t *mddev, int number)
2281{
2282 raid6_conf_t *conf = mddev->private;
2283 int err = 0;
2284 mdk_rdev_t *rdev;
2285 struct disk_info *p = conf->disks + number;
2286
2287 print_raid6_conf(conf);
2288 rdev = p->rdev;
2289 if (rdev) {
2290 if (test_bit(In_sync, &rdev->flags) ||
2291 atomic_read(&rdev->nr_pending)) {
2292 err = -EBUSY;
2293 goto abort;
2294 }
2295 p->rdev = NULL;
2296 synchronize_rcu();
2297 if (atomic_read(&rdev->nr_pending)) {
2298 /* lost the race, try later */
2299 err = -EBUSY;
2300 p->rdev = rdev;
2301 }
2302 }
2303
2304abort:
2305
2306 print_raid6_conf(conf);
2307 return err;
2308}
2309
2310static int raid6_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
2311{
2312 raid6_conf_t *conf = mddev->private;
2313 int found = 0;
2314 int disk;
2315 struct disk_info *p;
2316
2317 if (mddev->degraded > 2)
2318 /* no point adding a device */
2319 return 0;
2320 /*
2321 * find the disk ... but prefer rdev->saved_raid_disk
2322 * if possible.
2323 */
2324 if (rdev->saved_raid_disk >= 0 &&
2325 conf->disks[rdev->saved_raid_disk].rdev == NULL)
2326 disk = rdev->saved_raid_disk;
2327 else
2328 disk = 0;
2329 for ( ; disk < mddev->raid_disks; disk++)
2330 if ((p=conf->disks + disk)->rdev == NULL) {
2331 clear_bit(In_sync, &rdev->flags);
2332 rdev->raid_disk = disk;
2333 found = 1;
2334 if (rdev->saved_raid_disk != disk)
2335 conf->fullsync = 1;
2336 rcu_assign_pointer(p->rdev, rdev);
2337 break;
2338 }
2339 print_raid6_conf(conf);
2340 return found;
2341}
2342
2343static int raid6_resize(mddev_t *mddev, sector_t sectors)
2344{
2345 /* no resync is happening, and there is enough space
2346 * on all devices, so we can resize.
2347 * We need to make sure resync covers any new space.
2348 * If the array is shrinking we should possibly wait until
2349 * any io in the removed space completes, but it hardly seems
2350 * worth it.
2351 */
2352 sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
2353 mddev->array_size = (sectors * (mddev->raid_disks-2))>>1;
2354 set_capacity(mddev->gendisk, mddev->array_size << 1);
2355 mddev->changed = 1;
2356 if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) {
2357 mddev->recovery_cp = mddev->size << 1;
2358 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2359 }
2360 mddev->size = sectors /2;
2361 mddev->resync_max_sectors = sectors;
2362 return 0;
2363}
2364
2365static void raid6_quiesce(mddev_t *mddev, int state)
2366{
2367 raid6_conf_t *conf = mddev_to_conf(mddev);
2368
2369 switch(state) {
2370 case 1: /* stop all writes */
2371 spin_lock_irq(&conf->device_lock);
2372 conf->quiesce = 1;
2373 wait_event_lock_irq(conf->wait_for_stripe,
2374 atomic_read(&conf->active_stripes) == 0,
2375 conf->device_lock, /* nothing */);
2376 spin_unlock_irq(&conf->device_lock);
2377 break;
2378
2379 case 0: /* re-enable writes */
2380 spin_lock_irq(&conf->device_lock);
2381 conf->quiesce = 0;
2382 wake_up(&conf->wait_for_stripe);
2383 spin_unlock_irq(&conf->device_lock);
2384 break;
2385 }
2386}
2387
2388static struct mdk_personality raid6_personality =
2389{
2390 .name = "raid6",
2391 .level = 6,
2392 .owner = THIS_MODULE,
2393 .make_request = make_request,
2394 .run = run,
2395 .stop = stop,
2396 .status = status,
2397 .error_handler = error,
2398 .hot_add_disk = raid6_add_disk,
2399 .hot_remove_disk= raid6_remove_disk,
2400 .spare_active = raid6_spare_active,
2401 .sync_request = sync_request,
2402 .resize = raid6_resize,
2403 .quiesce = raid6_quiesce,
2404};
2405
2406static int __init raid6_init(void)
2407{
2408 int e;
2409
2410 e = raid6_select_algo();
2411 if ( e )
2412 return e;
2413
2414 return register_md_personality(&raid6_personality);
2415}
2416
2417static void raid6_exit (void)
2418{
2419 unregister_md_personality(&raid6_personality);
2420}
2421
2422module_init(raid6_init);
2423module_exit(raid6_exit);
2424MODULE_LICENSE("GPL");
2425MODULE_ALIAS("md-personality-8"); /* RAID6 */
2426MODULE_ALIAS("md-raid6");
2427MODULE_ALIAS("md-level-6");
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 694d1d80ff3f..dcda5291b990 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -494,8 +494,7 @@ static int restart_video_queue(struct cx8800_dev *dev,
494 return 0; 494 return 0;
495 buf = list_entry(q->queued.next, struct cx88_buffer, vb.queue); 495 buf = list_entry(q->queued.next, struct cx88_buffer, vb.queue);
496 if (NULL == prev) { 496 if (NULL == prev) {
497 list_del(&buf->vb.queue); 497 list_move_tail(&buf->vb.queue, &q->active);
498 list_add_tail(&buf->vb.queue,&q->active);
499 start_video_dma(dev, q, buf); 498 start_video_dma(dev, q, buf);
500 buf->vb.state = STATE_ACTIVE; 499 buf->vb.state = STATE_ACTIVE;
501 buf->count = q->count++; 500 buf->count = q->count++;
@@ -506,8 +505,7 @@ static int restart_video_queue(struct cx8800_dev *dev,
506 } else if (prev->vb.width == buf->vb.width && 505 } else if (prev->vb.width == buf->vb.width &&
507 prev->vb.height == buf->vb.height && 506 prev->vb.height == buf->vb.height &&
508 prev->fmt == buf->fmt) { 507 prev->fmt == buf->fmt) {
509 list_del(&buf->vb.queue); 508 list_move_tail(&buf->vb.queue, &q->active);
510 list_add_tail(&buf->vb.queue,&q->active);
511 buf->vb.state = STATE_ACTIVE; 509 buf->vb.state = STATE_ACTIVE;
512 buf->count = q->count++; 510 buf->count = q->count++;
513 prev->risc.jmp[1] = cpu_to_le32(buf->risc.dma); 511 prev->risc.jmp[1] = cpu_to_le32(buf->risc.dma);
diff --git a/drivers/media/video/usbvideo/quickcam_messenger.c b/drivers/media/video/usbvideo/quickcam_messenger.c
index 3f3182a24da1..56e01b622417 100644
--- a/drivers/media/video/usbvideo/quickcam_messenger.c
+++ b/drivers/media/video/usbvideo/quickcam_messenger.c
@@ -33,7 +33,7 @@
33#include <linux/module.h> 33#include <linux/module.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/input.h> 35#include <linux/input.h>
36#include <linux/usb_input.h> 36#include <linux/usb/input.h>
37 37
38#include "usbvideo.h" 38#include "usbvideo.h"
39#include "quickcam_messenger.h" 39#include "quickcam_messenger.h"
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index cc7ff8f00e42..cb62f2a9676a 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -115,8 +115,12 @@ static nsc_chip_t chips[] = {
115 /* Contributed by Jan Frey - IBM A30/A31 */ 115 /* Contributed by Jan Frey - IBM A30/A31 */
116 { "PC8739x", { 0x2e, 0x4e, 0x0 }, 0x20, 0xea, 0xff, 116 { "PC8739x", { 0x2e, 0x4e, 0x0 }, 0x20, 0xea, 0xff,
117 nsc_ircc_probe_39x, nsc_ircc_init_39x }, 117 nsc_ircc_probe_39x, nsc_ircc_init_39x },
118 { "IBM", { 0x2e, 0x4e, 0x0 }, 0x20, 0xf4, 0xff, 118 /* IBM ThinkPads using PC8738x (T60/X60/Z60) */
119 nsc_ircc_probe_39x, nsc_ircc_init_39x }, 119 { "IBM-PC8738x", { 0x2e, 0x4e, 0x0 }, 0x20, 0xf4, 0xff,
120 nsc_ircc_probe_39x, nsc_ircc_init_39x },
121 /* IBM ThinkPads using PC8394T (T43/R52/?) */
122 { "IBM-PC8394T", { 0x2e, 0x4e, 0x0 }, 0x20, 0xf9, 0xff,
123 nsc_ircc_probe_39x, nsc_ircc_init_39x },
120 { NULL } 124 { NULL }
121}; 125};
122 126
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 01cd8ec751ea..d643a097faa5 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -2578,8 +2578,7 @@ ppp_find_channel(int unit)
2578 2578
2579 list_for_each_entry(pch, &new_channels, list) { 2579 list_for_each_entry(pch, &new_channels, list) {
2580 if (pch->file.index == unit) { 2580 if (pch->file.index == unit) {
2581 list_del(&pch->list); 2581 list_move(&pch->list, &all_channels);
2582 list_add(&pch->list, &all_channels);
2583 return pch; 2582 return pch;
2584 } 2583 }
2585 } 2584 }
diff --git a/drivers/net/wireless/bcm43xx/Kconfig b/drivers/net/wireless/bcm43xx/Kconfig
index 25ea4748f0b9..533993f538fc 100644
--- a/drivers/net/wireless/bcm43xx/Kconfig
+++ b/drivers/net/wireless/bcm43xx/Kconfig
@@ -2,6 +2,7 @@ config BCM43XX
2 tristate "Broadcom BCM43xx wireless support" 2 tristate "Broadcom BCM43xx wireless support"
3 depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL 3 depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL
4 select FW_LOADER 4 select FW_LOADER
5 select HW_RANDOM
5 ---help--- 6 ---help---
6 This is an experimental driver for the Broadcom 43xx wireless chip, 7 This is an experimental driver for the Broadcom 43xx wireless chip,
7 found in the Apple Airport Extreme and various other devices. 8 found in the Apple Airport Extreme and various other devices.
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx.h b/drivers/net/wireless/bcm43xx/bcm43xx.h
index d8f917c21ea4..17a56828e232 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx.h
+++ b/drivers/net/wireless/bcm43xx/bcm43xx.h
@@ -1,6 +1,7 @@
1#ifndef BCM43xx_H_ 1#ifndef BCM43xx_H_
2#define BCM43xx_H_ 2#define BCM43xx_H_
3 3
4#include <linux/hw_random.h>
4#include <linux/version.h> 5#include <linux/version.h>
5#include <linux/kernel.h> 6#include <linux/kernel.h>
6#include <linux/spinlock.h> 7#include <linux/spinlock.h>
@@ -82,6 +83,7 @@
82#define BCM43xx_MMIO_TSF_1 0x634 /* core rev < 3 only */ 83#define BCM43xx_MMIO_TSF_1 0x634 /* core rev < 3 only */
83#define BCM43xx_MMIO_TSF_2 0x636 /* core rev < 3 only */ 84#define BCM43xx_MMIO_TSF_2 0x636 /* core rev < 3 only */
84#define BCM43xx_MMIO_TSF_3 0x638 /* core rev < 3 only */ 85#define BCM43xx_MMIO_TSF_3 0x638 /* core rev < 3 only */
86#define BCM43xx_MMIO_RNG 0x65A
85#define BCM43xx_MMIO_POWERUP_DELAY 0x6A8 87#define BCM43xx_MMIO_POWERUP_DELAY 0x6A8
86 88
87/* SPROM offsets. */ 89/* SPROM offsets. */
@@ -750,6 +752,10 @@ struct bcm43xx_private {
750 const struct firmware *initvals0; 752 const struct firmware *initvals0;
751 const struct firmware *initvals1; 753 const struct firmware *initvals1;
752 754
755 /* Random Number Generator. */
756 struct hwrng rng;
757 char rng_name[20 + 1];
758
753 /* Debugging stuff follows. */ 759 /* Debugging stuff follows. */
754#ifdef CONFIG_BCM43XX_DEBUG 760#ifdef CONFIG_BCM43XX_DEBUG
755 struct bcm43xx_dfsentry *dfsentry; 761 struct bcm43xx_dfsentry *dfsentry;
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_main.c b/drivers/net/wireless/bcm43xx/bcm43xx_main.c
index 085d7857fe31..27bcf47228e2 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_main.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_main.c
@@ -3237,6 +3237,39 @@ static void bcm43xx_security_init(struct bcm43xx_private *bcm)
3237 bcm43xx_clear_keys(bcm); 3237 bcm43xx_clear_keys(bcm);
3238} 3238}
3239 3239
3240static int bcm43xx_rng_read(struct hwrng *rng, u32 *data)
3241{
3242 struct bcm43xx_private *bcm = (struct bcm43xx_private *)rng->priv;
3243 unsigned long flags;
3244
3245 bcm43xx_lock_irqonly(bcm, flags);
3246 *data = bcm43xx_read16(bcm, BCM43xx_MMIO_RNG);
3247 bcm43xx_unlock_irqonly(bcm, flags);
3248
3249 return (sizeof(u16));
3250}
3251
3252static void bcm43xx_rng_exit(struct bcm43xx_private *bcm)
3253{
3254 hwrng_unregister(&bcm->rng);
3255}
3256
3257static int bcm43xx_rng_init(struct bcm43xx_private *bcm)
3258{
3259 int err;
3260
3261 snprintf(bcm->rng_name, ARRAY_SIZE(bcm->rng_name),
3262 "%s_%s", KBUILD_MODNAME, bcm->net_dev->name);
3263 bcm->rng.name = bcm->rng_name;
3264 bcm->rng.data_read = bcm43xx_rng_read;
3265 bcm->rng.priv = (unsigned long)bcm;
3266 err = hwrng_register(&bcm->rng);
3267 if (err)
3268 printk(KERN_ERR PFX "RNG init failed (%d)\n", err);
3269
3270 return err;
3271}
3272
3240/* This is the opposite of bcm43xx_init_board() */ 3273/* This is the opposite of bcm43xx_init_board() */
3241static void bcm43xx_free_board(struct bcm43xx_private *bcm) 3274static void bcm43xx_free_board(struct bcm43xx_private *bcm)
3242{ 3275{
@@ -3248,6 +3281,7 @@ static void bcm43xx_free_board(struct bcm43xx_private *bcm)
3248 3281
3249 bcm43xx_set_status(bcm, BCM43xx_STAT_SHUTTINGDOWN); 3282 bcm43xx_set_status(bcm, BCM43xx_STAT_SHUTTINGDOWN);
3250 3283
3284 bcm43xx_rng_exit(bcm);
3251 for (i = 0; i < BCM43xx_MAX_80211_CORES; i++) { 3285 for (i = 0; i < BCM43xx_MAX_80211_CORES; i++) {
3252 if (!bcm->core_80211[i].available) 3286 if (!bcm->core_80211[i].available)
3253 continue; 3287 continue;
@@ -3325,6 +3359,9 @@ static int bcm43xx_init_board(struct bcm43xx_private *bcm)
3325 bcm43xx_switch_core(bcm, &bcm->core_80211[0]); 3359 bcm43xx_switch_core(bcm, &bcm->core_80211[0]);
3326 bcm43xx_mac_enable(bcm); 3360 bcm43xx_mac_enable(bcm);
3327 } 3361 }
3362 err = bcm43xx_rng_init(bcm);
3363 if (err)
3364 goto err_80211_unwind;
3328 bcm43xx_macfilter_clear(bcm, BCM43xx_MACFILTER_ASSOC); 3365 bcm43xx_macfilter_clear(bcm, BCM43xx_MACFILTER_ASSOC);
3329 bcm43xx_macfilter_set(bcm, BCM43xx_MACFILTER_SELF, (u8 *)(bcm->net_dev->dev_addr)); 3366 bcm43xx_macfilter_set(bcm, BCM43xx_MACFILTER_SELF, (u8 *)(bcm->net_dev->dev_addr));
3330 dprintk(KERN_INFO PFX "80211 cores initialized\n"); 3367 dprintk(KERN_INFO PFX "80211 cores initialized\n");
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index f94419b334f7..2eded55ae88d 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1140,10 +1140,9 @@ list_modified:
1140 } 1140 }
1141 } 1141 }
1142 /* re-insert all entries from the failed_list into ipm_list */ 1142 /* re-insert all entries from the failed_list into ipm_list */
1143 list_for_each_entry_safe(ipm, tmp, &failed_list, list) { 1143 list_for_each_entry_safe(ipm, tmp, &failed_list, list)
1144 list_del_init(&ipm->list); 1144 list_move_tail(&ipm->list, &card->ipm_list);
1145 list_add_tail(&ipm->list, &card->ipm_list); 1145
1146 }
1147 spin_unlock_irqrestore(&card->ipm_lock, flags); 1146 spin_unlock_irqrestore(&card->ipm_lock, flags);
1148} 1147}
1149 1148
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index 6ab035590ee6..b28712df0b77 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -5118,8 +5118,7 @@ static void ncr_ccb_skipped(struct ncb *np, struct ccb *cp)
5118 cp->host_status &= ~HS_SKIPMASK; 5118 cp->host_status &= ~HS_SKIPMASK;
5119 cp->start.schedule.l_paddr = 5119 cp->start.schedule.l_paddr =
5120 cpu_to_scr(NCB_SCRIPT_PHYS (np, select)); 5120 cpu_to_scr(NCB_SCRIPT_PHYS (np, select));
5121 list_del(&cp->link_ccbq); 5121 list_move_tail(&cp->link_ccbq, &lp->skip_ccbq);
5122 list_add_tail(&cp->link_ccbq, &lp->skip_ccbq);
5123 if (cp->queued) { 5122 if (cp->queued) {
5124 --lp->queuedccbs; 5123 --lp->queuedccbs;
5125 } 5124 }
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index aef093db597e..3d4487eac9b7 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2258,8 +2258,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *ha)
2258 } 2258 }
2259 2259
2260 /* Remove device from the new list and add it to DB */ 2260 /* Remove device from the new list and add it to DB */
2261 list_del(&fcport->list); 2261 list_move_tail(&fcport->list, &ha->fcports);
2262 list_add_tail(&fcport->list, &ha->fcports);
2263 2262
2264 /* Login and update database */ 2263 /* Login and update database */
2265 qla2x00_fabric_dev_login(ha, fcport, &next_loopid); 2264 qla2x00_fabric_dev_login(ha, fcport, &next_loopid);
diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c
index 2fe7fd19437b..4a22909518f5 100644
--- a/drivers/usb/host/hc_crisv10.c
+++ b/drivers/usb/host/hc_crisv10.c
@@ -411,8 +411,7 @@ static inline void urb_list_move_last(struct urb *urb, int epid)
411 urb_entry_t *urb_entry = __urb_list_entry(urb, epid); 411 urb_entry_t *urb_entry = __urb_list_entry(urb, epid);
412 assert(urb_entry); 412 assert(urb_entry);
413 413
414 list_del(&urb_entry->list); 414 list_move_tail(&urb_entry->list, &urb_list[epid]);
415 list_add_tail(&urb_entry->list, &urb_list[epid]);
416} 415}
417 416
418/* Get the next urb in the list. */ 417/* Get the next urb in the list. */
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 5b06fa366098..56ffc81302fc 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -686,19 +686,16 @@ static void whiteheat_close(struct usb_serial_port *port, struct file * filp)
686 wrap = list_entry(tmp, struct whiteheat_urb_wrap, list); 686 wrap = list_entry(tmp, struct whiteheat_urb_wrap, list);
687 urb = wrap->urb; 687 urb = wrap->urb;
688 usb_kill_urb(urb); 688 usb_kill_urb(urb);
689 list_del(tmp); 689 list_move(tmp, &info->rx_urbs_free);
690 list_add(tmp, &info->rx_urbs_free);
691 }
692 list_for_each_safe(tmp, tmp2, &info->rx_urb_q) {
693 list_del(tmp);
694 list_add(tmp, &info->rx_urbs_free);
695 } 690 }
691 list_for_each_safe(tmp, tmp2, &info->rx_urb_q)
692 list_move(tmp, &info->rx_urbs_free);
693
696 list_for_each_safe(tmp, tmp2, &info->tx_urbs_submitted) { 694 list_for_each_safe(tmp, tmp2, &info->tx_urbs_submitted) {
697 wrap = list_entry(tmp, struct whiteheat_urb_wrap, list); 695 wrap = list_entry(tmp, struct whiteheat_urb_wrap, list);
698 urb = wrap->urb; 696 urb = wrap->urb;
699 usb_kill_urb(urb); 697 usb_kill_urb(urb);
700 list_del(tmp); 698 list_move(tmp, &info->tx_urbs_free);
701 list_add(tmp, &info->tx_urbs_free);
702 } 699 }
703 spin_unlock_irqrestore(&info->lock, flags); 700 spin_unlock_irqrestore(&info->lock, flags);
704 701
@@ -1080,8 +1077,7 @@ static void whiteheat_write_callback(struct urb *urb, struct pt_regs *regs)
1080 err("%s - Not my urb!", __FUNCTION__); 1077 err("%s - Not my urb!", __FUNCTION__);
1081 return; 1078 return;
1082 } 1079 }
1083 list_del(&wrap->list); 1080 list_move(&wrap->list, &info->tx_urbs_free);
1084 list_add(&wrap->list, &info->tx_urbs_free);
1085 spin_unlock(&info->lock); 1081 spin_unlock(&info->lock);
1086 1082
1087 if (urb->status) { 1083 if (urb->status) {
@@ -1371,8 +1367,7 @@ static int start_port_read(struct usb_serial_port *port)
1371 wrap = list_entry(tmp, struct whiteheat_urb_wrap, list); 1367 wrap = list_entry(tmp, struct whiteheat_urb_wrap, list);
1372 urb = wrap->urb; 1368 urb = wrap->urb;
1373 usb_kill_urb(urb); 1369 usb_kill_urb(urb);
1374 list_del(tmp); 1370 list_move(tmp, &info->rx_urbs_free);
1375 list_add(tmp, &info->rx_urbs_free);
1376 } 1371 }
1377 break; 1372 break;
1378 } 1373 }
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 168ede7902bd..17de4c84db69 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -4,6 +4,21 @@
4 4
5menu "Graphics support" 5menu "Graphics support"
6 6
7config FIRMWARE_EDID
8 bool "Enable firmware EDID"
9 default y
10 ---help---
11 This enables access to the EDID transferred from the firmware.
12 On the i386, this is from the Video BIOS. Enable this if DDC/I2C
13 transfers do not work for your driver and if you are using
14 nvidiafb, i810fb or savagefb.
15
16 In general, choosing Y for this option is safe. If you
17 experience extremely long delays while booting before you get
18 something on your display, try setting this to N. Matrox cards in
19 combination with certain motherboards and monitors are known to
20 suffer from this problem.
21
7config FB 22config FB
8 tristate "Support for frame buffer devices" 23 tristate "Support for frame buffer devices"
9 ---help--- 24 ---help---
@@ -70,22 +85,6 @@ config FB_MACMODES
70 depends on FB 85 depends on FB
71 default n 86 default n
72 87
73config FB_FIRMWARE_EDID
74 bool "Enable firmware EDID"
75 depends on FB
76 default y
77 ---help---
78 This enables access to the EDID transferred from the firmware.
79 On the i386, this is from the Video BIOS. Enable this if DDC/I2C
80 transfers do not work for your driver and if you are using
81 nvidiafb, i810fb or savagefb.
82
83 In general, choosing Y for this option is safe. If you
84 experience extremely long delays while booting before you get
85 something on your display, try setting this to N. Matrox cards in
86 combination with certain motherboards and monitors are known to
87 suffer from this problem.
88
89config FB_BACKLIGHT 88config FB_BACKLIGHT
90 bool 89 bool
91 depends on FB 90 depends on FB
@@ -551,10 +550,14 @@ config FB_VESA
551 You will get a boot time penguin logo at no additional cost. Please 550 You will get a boot time penguin logo at no additional cost. Please
552 read <file:Documentation/fb/vesafb.txt>. If unsure, say Y. 551 read <file:Documentation/fb/vesafb.txt>. If unsure, say Y.
553 552
554config VIDEO_SELECT 553config FB_IMAC
555 bool 554 bool "Intel-based Macintosh Framebuffer Support"
556 depends on FB_VESA 555 depends on (FB = y) && X86
557 default y 556 select FB_CFB_FILLRECT
557 select FB_CFB_COPYAREA
558 select FB_CFB_IMAGEBLIT
559 help
560 This is the frame buffer device driver for the Intel-based Macintosh
558 561
559config FB_HGA 562config FB_HGA
560 tristate "Hercules mono graphics support" 563 tristate "Hercules mono graphics support"
@@ -578,12 +581,6 @@ config FB_HGA_ACCEL
578 This will compile the Hercules mono graphics with 581 This will compile the Hercules mono graphics with
579 acceleration functions. 582 acceleration functions.
580 583
581
582config VIDEO_SELECT
583 bool
584 depends on (FB = y) && X86
585 default y
586
587config FB_SGIVW 584config FB_SGIVW
588 tristate "SGI Visual Workstation framebuffer support" 585 tristate "SGI Visual Workstation framebuffer support"
589 depends on FB && X86_VISWS 586 depends on FB && X86_VISWS
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 23de3b2c7856..c335e9bc3b20 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -4,15 +4,15 @@
4 4
5# Each configuration option enables a list of files. 5# Each configuration option enables a list of files.
6 6
7obj-$(CONFIG_VT) += console/
8obj-$(CONFIG_LOGO) += logo/
9obj-$(CONFIG_SYSFS) += backlight/
10
11obj-$(CONFIG_FB) += fb.o 7obj-$(CONFIG_FB) += fb.o
12fb-y := fbmem.o fbmon.o fbcmap.o fbsysfs.o \ 8fb-y := fbmem.o fbmon.o fbcmap.o fbsysfs.o \
13 modedb.o fbcvt.o 9 modedb.o fbcvt.o
14fb-objs := $(fb-y) 10fb-objs := $(fb-y)
15 11
12obj-$(CONFIG_VT) += console/
13obj-$(CONFIG_LOGO) += logo/
14obj-$(CONFIG_SYSFS) += backlight/
15
16obj-$(CONFIG_FB_CFB_FILLRECT) += cfbfillrect.o 16obj-$(CONFIG_FB_CFB_FILLRECT) += cfbfillrect.o
17obj-$(CONFIG_FB_CFB_COPYAREA) += cfbcopyarea.o 17obj-$(CONFIG_FB_CFB_COPYAREA) += cfbcopyarea.o
18obj-$(CONFIG_FB_CFB_IMAGEBLIT) += cfbimgblt.o 18obj-$(CONFIG_FB_CFB_IMAGEBLIT) += cfbimgblt.o
@@ -97,6 +97,7 @@ obj-$(CONFIG_FB_S3C2410) += s3c2410fb.o
97 97
98# Platform or fallback drivers go here 98# Platform or fallback drivers go here
99obj-$(CONFIG_FB_VESA) += vesafb.o 99obj-$(CONFIG_FB_VESA) += vesafb.o
100obj-$(CONFIG_FB_IMAC) += imacfb.o
100obj-$(CONFIG_FB_VGA16) += vga16fb.o vgastate.o 101obj-$(CONFIG_FB_VGA16) += vga16fb.o vgastate.o
101obj-$(CONFIG_FB_OF) += offb.o 102obj-$(CONFIG_FB_OF) += offb.o
102 103
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index db878fd55fb2..11cf7fcb1d55 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -100,7 +100,7 @@
100 100
101#ifndef CONFIG_PPC_PMAC 101#ifndef CONFIG_PPC_PMAC
102/* default mode */ 102/* default mode */
103static struct fb_var_screeninfo default_var __initdata = { 103static struct fb_var_screeninfo default_var __devinitdata = {
104 /* 640x480, 60 Hz, Non-Interlaced (25.175 MHz dotclock) */ 104 /* 640x480, 60 Hz, Non-Interlaced (25.175 MHz dotclock) */
105 640, 480, 640, 480, 0, 0, 8, 0, 105 640, 480, 640, 480, 0, 0, 8, 0,
106 {0, 8, 0}, {0, 8, 0}, {0, 8, 0}, {0, 0, 0}, 106 {0, 8, 0}, {0, 8, 0}, {0, 8, 0}, {0, 0, 0},
@@ -123,7 +123,7 @@ static struct fb_var_screeninfo default_var = {
123 123
124/* default modedb mode */ 124/* default modedb mode */
125/* 640x480, 60 Hz, Non-Interlaced (25.172 MHz dotclock) */ 125/* 640x480, 60 Hz, Non-Interlaced (25.172 MHz dotclock) */
126static struct fb_videomode defaultmode __initdata = { 126static struct fb_videomode defaultmode __devinitdata = {
127 .refresh = 60, 127 .refresh = 60,
128 .xres = 640, 128 .xres = 640,
129 .yres = 480, 129 .yres = 480,
@@ -335,7 +335,7 @@ static const struct aty128_meminfo sdr_sgram =
335static const struct aty128_meminfo ddr_sgram = 335static const struct aty128_meminfo ddr_sgram =
336 { 4, 4, 3, 3, 2, 3, 1, 16, 31, 16, "64-bit DDR SGRAM" }; 336 { 4, 4, 3, 3, 2, 3, 1, 16, 31, 16, "64-bit DDR SGRAM" };
337 337
338static struct fb_fix_screeninfo aty128fb_fix __initdata = { 338static struct fb_fix_screeninfo aty128fb_fix __devinitdata = {
339 .id = "ATY Rage128", 339 .id = "ATY Rage128",
340 .type = FB_TYPE_PACKED_PIXELS, 340 .type = FB_TYPE_PACKED_PIXELS,
341 .visual = FB_VISUAL_PSEUDOCOLOR, 341 .visual = FB_VISUAL_PSEUDOCOLOR,
@@ -345,15 +345,15 @@ static struct fb_fix_screeninfo aty128fb_fix __initdata = {
345 .accel = FB_ACCEL_ATI_RAGE128, 345 .accel = FB_ACCEL_ATI_RAGE128,
346}; 346};
347 347
348static char *mode_option __initdata = NULL; 348static char *mode_option __devinitdata = NULL;
349 349
350#ifdef CONFIG_PPC_PMAC 350#ifdef CONFIG_PPC_PMAC
351static int default_vmode __initdata = VMODE_1024_768_60; 351static int default_vmode __devinitdata = VMODE_1024_768_60;
352static int default_cmode __initdata = CMODE_8; 352static int default_cmode __devinitdata = CMODE_8;
353#endif 353#endif
354 354
355static int default_crt_on __initdata = 0; 355static int default_crt_on __devinitdata = 0;
356static int default_lcd_on __initdata = 1; 356static int default_lcd_on __devinitdata = 1;
357 357
358#ifdef CONFIG_MTRR 358#ifdef CONFIG_MTRR
359static int mtrr = 1; 359static int mtrr = 1;
@@ -445,9 +445,9 @@ static int aty128_encode_var(struct fb_var_screeninfo *var,
445static int aty128_decode_var(struct fb_var_screeninfo *var, 445static int aty128_decode_var(struct fb_var_screeninfo *var,
446 struct aty128fb_par *par); 446 struct aty128fb_par *par);
447#if 0 447#if 0
448static void __init aty128_get_pllinfo(struct aty128fb_par *par, 448static void __devinit aty128_get_pllinfo(struct aty128fb_par *par,
449 void __iomem *bios); 449 void __iomem *bios);
450static void __init __iomem *aty128_map_ROM(struct pci_dev *pdev, const struct aty128fb_par *par); 450static void __devinit __iomem *aty128_map_ROM(struct pci_dev *pdev, const struct aty128fb_par *par);
451#endif 451#endif
452static void aty128_timings(struct aty128fb_par *par); 452static void aty128_timings(struct aty128fb_par *par);
453static void aty128_init_engine(struct aty128fb_par *par); 453static void aty128_init_engine(struct aty128fb_par *par);
@@ -573,7 +573,7 @@ static void aty_pll_writeupdate(const struct aty128fb_par *par)
573 573
574 574
575/* write to the scratch register to test r/w functionality */ 575/* write to the scratch register to test r/w functionality */
576static int __init register_test(const struct aty128fb_par *par) 576static int __devinit register_test(const struct aty128fb_par *par)
577{ 577{
578 u32 val; 578 u32 val;
579 int flag = 0; 579 int flag = 0;
@@ -772,7 +772,7 @@ static u32 depth_to_dst(u32 depth)
772 772
773 773
774#ifndef __sparc__ 774#ifndef __sparc__
775static void __iomem * __init aty128_map_ROM(const struct aty128fb_par *par, struct pci_dev *dev) 775static void __iomem * __devinit aty128_map_ROM(const struct aty128fb_par *par, struct pci_dev *dev)
776{ 776{
777 u16 dptr; 777 u16 dptr;
778 u8 rom_type; 778 u8 rom_type;
@@ -856,7 +856,7 @@ static void __iomem * __init aty128_map_ROM(const struct aty128fb_par *par, stru
856 return NULL; 856 return NULL;
857} 857}
858 858
859static void __init aty128_get_pllinfo(struct aty128fb_par *par, unsigned char __iomem *bios) 859static void __devinit aty128_get_pllinfo(struct aty128fb_par *par, unsigned char __iomem *bios)
860{ 860{
861 unsigned int bios_hdr; 861 unsigned int bios_hdr;
862 unsigned int bios_pll; 862 unsigned int bios_pll;
@@ -903,7 +903,7 @@ static void __iomem * __devinit aty128_find_mem_vbios(struct aty128fb_par *par)
903#endif /* ndef(__sparc__) */ 903#endif /* ndef(__sparc__) */
904 904
905/* fill in known card constants if pll_block is not available */ 905/* fill in known card constants if pll_block is not available */
906static void __init aty128_timings(struct aty128fb_par *par) 906static void __devinit aty128_timings(struct aty128fb_par *par)
907{ 907{
908#ifdef CONFIG_PPC_OF 908#ifdef CONFIG_PPC_OF
909 /* instead of a table lookup, assume OF has properly 909 /* instead of a table lookup, assume OF has properly
@@ -1645,7 +1645,7 @@ static int aty128fb_sync(struct fb_info *info)
1645} 1645}
1646 1646
1647#ifndef MODULE 1647#ifndef MODULE
1648static int __init aty128fb_setup(char *options) 1648static int __devinit aty128fb_setup(char *options)
1649{ 1649{
1650 char *this_opt; 1650 char *this_opt;
1651 1651
@@ -1893,7 +1893,7 @@ static void aty128_early_resume(void *data)
1893} 1893}
1894#endif /* CONFIG_PPC_PMAC */ 1894#endif /* CONFIG_PPC_PMAC */
1895 1895
1896static int __init aty128_init(struct pci_dev *pdev, const struct pci_device_id *ent) 1896static int __devinit aty128_init(struct pci_dev *pdev, const struct pci_device_id *ent)
1897{ 1897{
1898 struct fb_info *info = pci_get_drvdata(pdev); 1898 struct fb_info *info = pci_get_drvdata(pdev);
1899 struct aty128fb_par *par = info->par; 1899 struct aty128fb_par *par = info->par;
@@ -2037,7 +2037,7 @@ static int __init aty128_init(struct pci_dev *pdev, const struct pci_device_id *
2037 2037
2038#ifdef CONFIG_PCI 2038#ifdef CONFIG_PCI
2039/* register a card ++ajoshi */ 2039/* register a card ++ajoshi */
2040static int __init aty128_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2040static int __devinit aty128_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2041{ 2041{
2042 unsigned long fb_addr, reg_addr; 2042 unsigned long fb_addr, reg_addr;
2043 struct aty128fb_par *par; 2043 struct aty128fb_par *par;
@@ -2556,7 +2556,7 @@ static int aty128_pci_resume(struct pci_dev *pdev)
2556} 2556}
2557 2557
2558 2558
2559static int __init aty128fb_init(void) 2559static int __devinit aty128fb_init(void)
2560{ 2560{
2561#ifndef MODULE 2561#ifndef MODULE
2562 char *option = NULL; 2562 char *option = NULL;
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index c5185f7cf4ba..22e720611bf6 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -316,12 +316,12 @@ static int vram;
316static int pll; 316static int pll;
317static int mclk; 317static int mclk;
318static int xclk; 318static int xclk;
319static int comp_sync __initdata = -1; 319static int comp_sync __devinitdata = -1;
320static char *mode; 320static char *mode;
321 321
322#ifdef CONFIG_PPC 322#ifdef CONFIG_PPC
323static int default_vmode __initdata = VMODE_CHOOSE; 323static int default_vmode __devinitdata = VMODE_CHOOSE;
324static int default_cmode __initdata = CMODE_CHOOSE; 324static int default_cmode __devinitdata = CMODE_CHOOSE;
325 325
326module_param_named(vmode, default_vmode, int, 0); 326module_param_named(vmode, default_vmode, int, 0);
327MODULE_PARM_DESC(vmode, "int: video mode for mac"); 327MODULE_PARM_DESC(vmode, "int: video mode for mac");
@@ -330,10 +330,10 @@ MODULE_PARM_DESC(cmode, "int: color mode for mac");
330#endif 330#endif
331 331
332#ifdef CONFIG_ATARI 332#ifdef CONFIG_ATARI
333static unsigned int mach64_count __initdata = 0; 333static unsigned int mach64_count __devinitdata = 0;
334static unsigned long phys_vmembase[FB_MAX] __initdata = { 0, }; 334static unsigned long phys_vmembase[FB_MAX] __devinitdata = { 0, };
335static unsigned long phys_size[FB_MAX] __initdata = { 0, }; 335static unsigned long phys_size[FB_MAX] __devinitdata = { 0, };
336static unsigned long phys_guiregbase[FB_MAX] __initdata = { 0, }; 336static unsigned long phys_guiregbase[FB_MAX] __devinitdata = { 0, };
337#endif 337#endif
338 338
339/* top -> down is an evolution of mach64 chipset, any corrections? */ 339/* top -> down is an evolution of mach64 chipset, any corrections? */
@@ -583,7 +583,7 @@ static u32 atyfb_get_pixclock(struct fb_var_screeninfo *var, struct atyfb_par *p
583 * Apple monitor sense 583 * Apple monitor sense
584 */ 584 */
585 585
586static int __init read_aty_sense(const struct atyfb_par *par) 586static int __devinit read_aty_sense(const struct atyfb_par *par)
587{ 587{
588 int sense, i; 588 int sense, i;
589 589
@@ -1281,6 +1281,14 @@ static int atyfb_set_par(struct fb_info *info)
1281 1281
1282 par->accel_flags = var->accel_flags; /* hack */ 1282 par->accel_flags = var->accel_flags; /* hack */
1283 1283
1284 if (var->accel_flags) {
1285 info->fbops->fb_sync = atyfb_sync;
1286 info->flags &= ~FBINFO_HWACCEL_DISABLED;
1287 } else {
1288 info->fbops->fb_sync = NULL;
1289 info->flags |= FBINFO_HWACCEL_DISABLED;
1290 }
1291
1284 if (par->blitter_may_be_busy) 1292 if (par->blitter_may_be_busy)
1285 wait_for_idle(par); 1293 wait_for_idle(par);
1286 1294
@@ -2253,7 +2261,7 @@ static void aty_bl_exit(struct atyfb_par *par)
2253 2261
2254#endif /* CONFIG_FB_ATY_BACKLIGHT */ 2262#endif /* CONFIG_FB_ATY_BACKLIGHT */
2255 2263
2256static void __init aty_calc_mem_refresh(struct atyfb_par *par, int xclk) 2264static void __devinit aty_calc_mem_refresh(struct atyfb_par *par, int xclk)
2257{ 2265{
2258 const int ragepro_tbl[] = { 2266 const int ragepro_tbl[] = {
2259 44, 50, 55, 66, 75, 80, 100 2267 44, 50, 55, 66, 75, 80, 100
@@ -2313,7 +2321,7 @@ static int __devinit atyfb_get_timings_from_lcd(struct atyfb_par *par,
2313} 2321}
2314#endif /* defined(__i386__) && defined(CONFIG_FB_ATY_GENERIC_LCD) */ 2322#endif /* defined(__i386__) && defined(CONFIG_FB_ATY_GENERIC_LCD) */
2315 2323
2316static int __init aty_init(struct fb_info *info, const char *name) 2324static int __devinit aty_init(struct fb_info *info, const char *name)
2317{ 2325{
2318 struct atyfb_par *par = (struct atyfb_par *) info->par; 2326 struct atyfb_par *par = (struct atyfb_par *) info->par;
2319 const char *ramname = NULL, *xtal; 2327 const char *ramname = NULL, *xtal;
@@ -2394,12 +2402,15 @@ static int __init aty_init(struct fb_info *info, const char *name)
2394 break; 2402 break;
2395 } 2403 }
2396 switch (clk_type) { 2404 switch (clk_type) {
2405#ifdef CONFIG_ATARI
2397 case CLK_ATI18818_1: 2406 case CLK_ATI18818_1:
2398 par->pll_ops = &aty_pll_ati18818_1; 2407 par->pll_ops = &aty_pll_ati18818_1;
2399 break; 2408 break;
2409#else
2400 case CLK_IBMRGB514: 2410 case CLK_IBMRGB514:
2401 par->pll_ops = &aty_pll_ibm514; 2411 par->pll_ops = &aty_pll_ibm514;
2402 break; 2412 break;
2413#endif
2403#if 0 /* dead code */ 2414#if 0 /* dead code */
2404 case CLK_STG1703: 2415 case CLK_STG1703:
2405 par->pll_ops = &aty_pll_stg1703; 2416 par->pll_ops = &aty_pll_stg1703;
@@ -2604,7 +2615,11 @@ static int __init aty_init(struct fb_info *info, const char *name)
2604 2615
2605 info->fbops = &atyfb_ops; 2616 info->fbops = &atyfb_ops;
2606 info->pseudo_palette = pseudo_palette; 2617 info->pseudo_palette = pseudo_palette;
2607 info->flags = FBINFO_FLAG_DEFAULT; 2618 info->flags = FBINFO_DEFAULT |
2619 FBINFO_HWACCEL_IMAGEBLIT |
2620 FBINFO_HWACCEL_FILLRECT |
2621 FBINFO_HWACCEL_COPYAREA |
2622 FBINFO_HWACCEL_YPAN;
2608 2623
2609#ifdef CONFIG_PMAC_BACKLIGHT 2624#ifdef CONFIG_PMAC_BACKLIGHT
2610 if (M64_HAS(G3_PB_1_1) && machine_is_compatible("PowerBook1,1")) { 2625 if (M64_HAS(G3_PB_1_1) && machine_is_compatible("PowerBook1,1")) {
@@ -2733,7 +2748,7 @@ aty_init_exit:
2733} 2748}
2734 2749
2735#ifdef CONFIG_ATARI 2750#ifdef CONFIG_ATARI
2736static int __init store_video_par(char *video_str, unsigned char m64_num) 2751static int __devinit store_video_par(char *video_str, unsigned char m64_num)
2737{ 2752{
2738 char *p; 2753 char *p;
2739 unsigned long vmembase, size, guiregbase; 2754 unsigned long vmembase, size, guiregbase;
@@ -3764,7 +3779,7 @@ static struct pci_driver atyfb_driver = {
3764#endif /* CONFIG_PCI */ 3779#endif /* CONFIG_PCI */
3765 3780
3766#ifndef MODULE 3781#ifndef MODULE
3767static int __init atyfb_setup(char *options) 3782static int __devinit atyfb_setup(char *options)
3768{ 3783{
3769 char *this_opt; 3784 char *this_opt;
3770 3785
@@ -3836,7 +3851,7 @@ static int __init atyfb_setup(char *options)
3836} 3851}
3837#endif /* MODULE */ 3852#endif /* MODULE */
3838 3853
3839static int __init atyfb_init(void) 3854static int __devinit atyfb_init(void)
3840{ 3855{
3841#ifndef MODULE 3856#ifndef MODULE
3842 char *option = NULL; 3857 char *option = NULL;
diff --git a/drivers/video/aty/mach64_accel.c b/drivers/video/aty/mach64_accel.c
index c98f4a442134..1490e5e1c232 100644
--- a/drivers/video/aty/mach64_accel.c
+++ b/drivers/video/aty/mach64_accel.c
@@ -200,8 +200,6 @@ void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
200 if (!area->width || !area->height) 200 if (!area->width || !area->height)
201 return; 201 return;
202 if (!par->accel_flags) { 202 if (!par->accel_flags) {
203 if (par->blitter_may_be_busy)
204 wait_for_idle(par);
205 cfb_copyarea(info, area); 203 cfb_copyarea(info, area);
206 return; 204 return;
207 } 205 }
@@ -248,8 +246,6 @@ void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
248 if (!rect->width || !rect->height) 246 if (!rect->width || !rect->height)
249 return; 247 return;
250 if (!par->accel_flags) { 248 if (!par->accel_flags) {
251 if (par->blitter_may_be_busy)
252 wait_for_idle(par);
253 cfb_fillrect(info, rect); 249 cfb_fillrect(info, rect);
254 return; 250 return;
255 } 251 }
@@ -288,14 +284,10 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image)
288 return; 284 return;
289 if (!par->accel_flags || 285 if (!par->accel_flags ||
290 (image->depth != 1 && info->var.bits_per_pixel != image->depth)) { 286 (image->depth != 1 && info->var.bits_per_pixel != image->depth)) {
291 if (par->blitter_may_be_busy)
292 wait_for_idle(par);
293
294 cfb_imageblit(info, image); 287 cfb_imageblit(info, image);
295 return; 288 return;
296 } 289 }
297 290
298 wait_for_idle(par);
299 pix_width = pix_width_save = aty_ld_le32(DP_PIX_WIDTH, par); 291 pix_width = pix_width_save = aty_ld_le32(DP_PIX_WIDTH, par);
300 host_cntl = aty_ld_le32(HOST_CNTL, par) | HOST_BYTE_ALIGN; 292 host_cntl = aty_ld_le32(HOST_CNTL, par) | HOST_BYTE_ALIGN;
301 293
@@ -425,8 +417,6 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image)
425 } 417 }
426 } 418 }
427 419
428 wait_for_idle(par);
429
430 /* restore pix_width */ 420 /* restore pix_width */
431 wait_for_fifo(1, par); 421 wait_for_fifo(1, par);
432 aty_st_le32(DP_PIX_WIDTH, pix_width_save, par); 422 aty_st_le32(DP_PIX_WIDTH, pix_width_save, par);
diff --git a/drivers/video/aty/mach64_cursor.c b/drivers/video/aty/mach64_cursor.c
index ad8b7496f853..2a7f381c330f 100644
--- a/drivers/video/aty/mach64_cursor.c
+++ b/drivers/video/aty/mach64_cursor.c
@@ -66,11 +66,6 @@ static const u8 cursor_bits_lookup[16] = {
66 0x01, 0x41, 0x11, 0x51, 0x05, 0x45, 0x15, 0x55 66 0x01, 0x41, 0x11, 0x51, 0x05, 0x45, 0x15, 0x55
67}; 67};
68 68
69static const u8 cursor_mask_lookup[16] = {
70 0xaa, 0x2a, 0x8a, 0x0a, 0xa2, 0x22, 0x82, 0x02,
71 0xa8, 0x28, 0x88, 0x08, 0xa0, 0x20, 0x80, 0x00
72};
73
74static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor) 69static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
75{ 70{
76 struct atyfb_par *par = (struct atyfb_par *) info->par; 71 struct atyfb_par *par = (struct atyfb_par *) info->par;
@@ -130,13 +125,13 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
130 fg_idx = cursor->image.fg_color; 125 fg_idx = cursor->image.fg_color;
131 bg_idx = cursor->image.bg_color; 126 bg_idx = cursor->image.bg_color;
132 127
133 fg = (info->cmap.red[fg_idx] << 24) | 128 fg = ((info->cmap.red[fg_idx] & 0xff) << 24) |
134 (info->cmap.green[fg_idx] << 16) | 129 ((info->cmap.green[fg_idx] & 0xff) << 16) |
135 (info->cmap.blue[fg_idx] << 8) | 15; 130 ((info->cmap.blue[fg_idx] & 0xff) << 8) | 0xff;
136 131
137 bg = (info->cmap.red[bg_idx] << 24) | 132 bg = ((info->cmap.red[bg_idx] & 0xff) << 24) |
138 (info->cmap.green[bg_idx] << 16) | 133 ((info->cmap.green[bg_idx] & 0xff) << 16) |
139 (info->cmap.blue[bg_idx] << 8); 134 ((info->cmap.blue[bg_idx] & 0xff) << 8);
140 135
141 wait_for_fifo(2, par); 136 wait_for_fifo(2, par);
142 aty_st_le32(CUR_CLR0, bg, par); 137 aty_st_le32(CUR_CLR0, bg, par);
@@ -166,19 +161,17 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
166 switch (cursor->rop) { 161 switch (cursor->rop) {
167 case ROP_XOR: 162 case ROP_XOR:
168 // Upper 4 bits of mask data 163 // Upper 4 bits of mask data
169 fb_writeb(cursor_mask_lookup[m >> 4 ] | 164 fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++);
170 cursor_bits_lookup[(b ^ m) >> 4], dst++);
171 // Lower 4 bits of mask 165 // Lower 4 bits of mask
172 fb_writeb(cursor_mask_lookup[m & 0x0f ] | 166 fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f],
173 cursor_bits_lookup[(b ^ m) & 0x0f], dst++); 167 dst++);
174 break; 168 break;
175 case ROP_COPY: 169 case ROP_COPY:
176 // Upper 4 bits of mask data 170 // Upper 4 bits of mask data
177 fb_writeb(cursor_mask_lookup[m >> 4 ] | 171 fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++);
178 cursor_bits_lookup[(b & m) >> 4], dst++);
179 // Lower 4 bits of mask 172 // Lower 4 bits of mask
180 fb_writeb(cursor_mask_lookup[m & 0x0f ] | 173 fb_writeb(cursor_bits_lookup[(b & m) & 0x0f],
181 cursor_bits_lookup[(b & m) & 0x0f], dst++); 174 dst++);
182 break; 175 break;
183 } 176 }
184 } 177 }
@@ -194,7 +187,7 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
194 return 0; 187 return 0;
195} 188}
196 189
197int __init aty_init_cursor(struct fb_info *info) 190int __devinit aty_init_cursor(struct fb_info *info)
198{ 191{
199 unsigned long addr; 192 unsigned long addr;
200 193
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index c5ecbb02e01d..68b15645b893 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -2379,7 +2379,6 @@ err_release_pci0:
2379err_release_fb: 2379err_release_fb:
2380 framebuffer_release(info); 2380 framebuffer_release(info);
2381err_disable: 2381err_disable:
2382 pci_disable_device(pdev);
2383err_out: 2382err_out:
2384 return ret; 2383 return ret;
2385} 2384}
@@ -2436,7 +2435,6 @@ static void __devexit radeonfb_pci_unregister (struct pci_dev *pdev)
2436#endif 2435#endif
2437 fb_dealloc_cmap(&info->cmap); 2436 fb_dealloc_cmap(&info->cmap);
2438 framebuffer_release(info); 2437 framebuffer_release(info);
2439 pci_disable_device(pdev);
2440} 2438}
2441 2439
2442 2440
diff --git a/drivers/video/au1100fb.c b/drivers/video/au1100fb.c
index 789450bb0bc9..d63c3f485853 100644
--- a/drivers/video/au1100fb.c
+++ b/drivers/video/au1100fb.c
@@ -7,6 +7,8 @@
7 * Karl Lessard <klessard@sunrisetelecom.com> 7 * Karl Lessard <klessard@sunrisetelecom.com>
8 * <c.pellegrin@exadron.com> 8 * <c.pellegrin@exadron.com>
9 * 9 *
10 * PM support added by Rodolfo Giometti <giometti@linux.it>
11 *
10 * Copyright 2002 MontaVista Software 12 * Copyright 2002 MontaVista Software
11 * Author: MontaVista Software, Inc. 13 * Author: MontaVista Software, Inc.
12 * ppopov@mvista.com or source@mvista.com 14 * ppopov@mvista.com or source@mvista.com
@@ -602,17 +604,52 @@ int au1100fb_drv_remove(struct device *dev)
602 return 0; 604 return 0;
603} 605}
604 606
607#ifdef CONFIG_PM
608static u32 sys_clksrc;
609static struct au1100fb_regs fbregs;
610
605int au1100fb_drv_suspend(struct device *dev, pm_message_t state) 611int au1100fb_drv_suspend(struct device *dev, pm_message_t state)
606{ 612{
607 /* TODO */ 613 struct au1100fb_device *fbdev = dev_get_drvdata(dev);
614
615 if (!fbdev)
616 return 0;
617
618 /* Save the clock source state */
619 sys_clksrc = au_readl(SYS_CLKSRC);
620
621 /* Blank the LCD */
622 au1100fb_fb_blank(VESA_POWERDOWN, &fbdev->info);
623
624 /* Stop LCD clocking */
625 au_writel(sys_clksrc & ~SYS_CS_ML_MASK, SYS_CLKSRC);
626
627 memcpy(&fbregs, fbdev->regs, sizeof(struct au1100fb_regs));
628
608 return 0; 629 return 0;
609} 630}
610 631
611int au1100fb_drv_resume(struct device *dev) 632int au1100fb_drv_resume(struct device *dev)
612{ 633{
613 /* TODO */ 634 struct au1100fb_device *fbdev = dev_get_drvdata(dev);
635
636 if (!fbdev)
637 return 0;
638
639 memcpy(fbdev->regs, &fbregs, sizeof(struct au1100fb_regs));
640
641 /* Restart LCD clocking */
642 au_writel(sys_clksrc, SYS_CLKSRC);
643
644 /* Unblank the LCD */
645 au1100fb_fb_blank(VESA_NO_BLANKING, &fbdev->info);
646
614 return 0; 647 return 0;
615} 648}
649#else
650#define au1100fb_drv_suspend NULL
651#define au1100fb_drv_resume NULL
652#endif
616 653
617static struct device_driver au1100fb_driver = { 654static struct device_driver au1100fb_driver = {
618 .name = "au1100-lcd", 655 .name = "au1100-lcd",
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index b895eaaa73fd..022f9d3473f5 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -10,7 +10,7 @@ menuconfig BACKLIGHT_LCD_SUPPORT
10 10
11config BACKLIGHT_CLASS_DEVICE 11config BACKLIGHT_CLASS_DEVICE
12 tristate "Lowlevel Backlight controls" 12 tristate "Lowlevel Backlight controls"
13 depends on BACKLIGHT_LCD_SUPPORT 13 depends on BACKLIGHT_LCD_SUPPORT && FB
14 default m 14 default m
15 help 15 help
16 This framework adds support for low-level control of the LCD 16 This framework adds support for low-level control of the LCD
@@ -26,7 +26,7 @@ config BACKLIGHT_DEVICE
26 26
27config LCD_CLASS_DEVICE 27config LCD_CLASS_DEVICE
28 tristate "Lowlevel LCD controls" 28 tristate "Lowlevel LCD controls"
29 depends on BACKLIGHT_LCD_SUPPORT 29 depends on BACKLIGHT_LCD_SUPPORT && FB
30 default m 30 default m
31 help 31 help
32 This framework adds support for low-level control of LCD. 32 This framework adds support for low-level control of LCD.
@@ -50,6 +50,14 @@ config BACKLIGHT_CORGI
50 If you have a Sharp Zaurus SL-C7xx, SL-Cxx00 or SL-6000x say y to enable the 50 If you have a Sharp Zaurus SL-C7xx, SL-Cxx00 or SL-6000x say y to enable the
51 backlight driver. 51 backlight driver.
52 52
53config BACKLIGHT_LOCOMO
54 tristate "Sharp LOCOMO LCD/Backlight Driver"
55 depends on BACKLIGHT_DEVICE && SHARP_LOCOMO
56 default y
57 help
58 If you have a Sharp Zaurus SL-5500 (Collie) or SL-5600 (Poodle) say y to
59 enable the LCD/backlight driver.
60
53config BACKLIGHT_HP680 61config BACKLIGHT_HP680
54 tristate "HP Jornada 680 Backlight Driver" 62 tristate "HP Jornada 680 Backlight Driver"
55 depends on BACKLIGHT_DEVICE && SH_HP6XX 63 depends on BACKLIGHT_DEVICE && SH_HP6XX
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 744210c38e74..65e5553fc849 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -4,4 +4,4 @@ obj-$(CONFIG_LCD_CLASS_DEVICE) += lcd.o
4obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o 4obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o
5obj-$(CONFIG_BACKLIGHT_CORGI) += corgi_bl.o 5obj-$(CONFIG_BACKLIGHT_CORGI) += corgi_bl.o
6obj-$(CONFIG_BACKLIGHT_HP680) += hp680_bl.o 6obj-$(CONFIG_BACKLIGHT_HP680) += hp680_bl.o
7obj-$(CONFIG_SHARP_LOCOMO) += locomolcd.o 7obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 60831bb23685..bd879b7ec119 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -17,6 +17,8 @@
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/device.h> 18#include <linux/device.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/fb.h>
21#include <linux/backlight.h>
20 22
21#include <asm/hardware/locomo.h> 23#include <asm/hardware/locomo.h>
22#include <asm/irq.h> 24#include <asm/irq.h>
@@ -25,7 +27,10 @@
25 27
26#include "../../../arch/arm/mach-sa1100/generic.h" 28#include "../../../arch/arm/mach-sa1100/generic.h"
27 29
30static struct backlight_device *locomolcd_bl_device;
28static struct locomo_dev *locomolcd_dev; 31static struct locomo_dev *locomolcd_dev;
32static unsigned long locomolcd_flags;
33#define LOCOMOLCD_SUSPENDED 0x01
29 34
30static void locomolcd_on(int comadj) 35static void locomolcd_on(int comadj)
31{ 36{
@@ -89,12 +94,10 @@ void locomolcd_power(int on)
89 } 94 }
90 95
91 /* read comadj */ 96 /* read comadj */
92 if (comadj == -1) { 97 if (comadj == -1 && machine_is_collie())
93 if (machine_is_poodle()) 98 comadj = 128;
94 comadj = 118; 99 if (comadj == -1 && machine_is_poodle())
95 if (machine_is_collie()) 100 comadj = 118;
96 comadj = 128;
97 }
98 101
99 if (on) 102 if (on)
100 locomolcd_on(comadj); 103 locomolcd_on(comadj);
@@ -105,26 +108,100 @@ void locomolcd_power(int on)
105} 108}
106EXPORT_SYMBOL(locomolcd_power); 109EXPORT_SYMBOL(locomolcd_power);
107 110
108static int poodle_lcd_probe(struct locomo_dev *dev) 111
112static int current_intensity;
113
114static int locomolcd_set_intensity(struct backlight_device *bd)
115{
116 int intensity = bd->props->brightness;
117
118 if (bd->props->power != FB_BLANK_UNBLANK)
119 intensity = 0;
120 if (bd->props->fb_blank != FB_BLANK_UNBLANK)
121 intensity = 0;
122 if (locomolcd_flags & LOCOMOLCD_SUSPENDED)
123 intensity = 0;
124
125 switch (intensity) {
126 /* AC and non-AC are handled differently, but produce same results in sharp code? */
127 case 0: locomo_frontlight_set(locomolcd_dev, 0, 0, 161); break;
128 case 1: locomo_frontlight_set(locomolcd_dev, 117, 0, 161); break;
129 case 2: locomo_frontlight_set(locomolcd_dev, 163, 0, 148); break;
130 case 3: locomo_frontlight_set(locomolcd_dev, 194, 0, 161); break;
131 case 4: locomo_frontlight_set(locomolcd_dev, 194, 1, 161); break;
132
133 default:
134 return -ENODEV;
135 }
136 current_intensity = intensity;
137 return 0;
138}
139
140static int locomolcd_get_intensity(struct backlight_device *bd)
141{
142 return current_intensity;
143}
144
145static struct backlight_properties locomobl_data = {
146 .owner = THIS_MODULE,
147 .get_brightness = locomolcd_get_intensity,
148 .update_status = locomolcd_set_intensity,
149 .max_brightness = 4,
150};
151
152#ifdef CONFIG_PM
153static int locomolcd_suspend(struct locomo_dev *dev, pm_message_t state)
154{
155 locomolcd_flags |= LOCOMOLCD_SUSPENDED;
156 locomolcd_set_intensity(locomolcd_bl_device);
157 return 0;
158}
159
160static int locomolcd_resume(struct locomo_dev *dev)
161{
162 locomolcd_flags &= ~LOCOMOLCD_SUSPENDED;
163 locomolcd_set_intensity(locomolcd_bl_device);
164 return 0;
165}
166#else
167#define locomolcd_suspend NULL
168#define locomolcd_resume NULL
169#endif
170
171static int locomolcd_probe(struct locomo_dev *dev)
109{ 172{
110 unsigned long flags; 173 unsigned long flags;
111 174
112 local_irq_save(flags); 175 local_irq_save(flags);
113 locomolcd_dev = dev; 176 locomolcd_dev = dev;
114 177
178 locomo_gpio_set_dir(dev, LOCOMO_GPIO_FL_VR, 0);
179
115 /* the poodle_lcd_power function is called for the first time 180 /* the poodle_lcd_power function is called for the first time
116 * from fs_initcall, which is before locomo is activated. 181 * from fs_initcall, which is before locomo is activated.
117 * We need to recall poodle_lcd_power here*/ 182 * We need to recall poodle_lcd_power here*/
118#ifdef CONFIG_MACH_POODLE 183 if (machine_is_poodle())
119 locomolcd_power(1); 184 locomolcd_power(1);
120#endif 185
121 local_irq_restore(flags); 186 local_irq_restore(flags);
187
188 locomolcd_bl_device = backlight_device_register("locomo-bl", NULL, &locomobl_data);
189
190 if (IS_ERR (locomolcd_bl_device))
191 return PTR_ERR (locomolcd_bl_device);
192
193 /* Set up frontlight so that screen is readable */
194 locomobl_data.brightness = 2;
195 locomolcd_set_intensity(locomolcd_bl_device);
196
122 return 0; 197 return 0;
123} 198}
124 199
125static int poodle_lcd_remove(struct locomo_dev *dev) 200static int locomolcd_remove(struct locomo_dev *dev)
126{ 201{
127 unsigned long flags; 202 unsigned long flags;
203
204 backlight_device_unregister(locomolcd_bl_device);
128 local_irq_save(flags); 205 local_irq_save(flags);
129 locomolcd_dev = NULL; 206 locomolcd_dev = NULL;
130 local_irq_restore(flags); 207 local_irq_restore(flags);
@@ -136,19 +213,33 @@ static struct locomo_driver poodle_lcd_driver = {
136 .name = "locomo-backlight", 213 .name = "locomo-backlight",
137 }, 214 },
138 .devid = LOCOMO_DEVID_BACKLIGHT, 215 .devid = LOCOMO_DEVID_BACKLIGHT,
139 .probe = poodle_lcd_probe, 216 .probe = locomolcd_probe,
140 .remove = poodle_lcd_remove, 217 .remove = locomolcd_remove,
218 .suspend = locomolcd_suspend,
219 .resume = locomolcd_resume,
141}; 220};
142 221
143static int __init poodle_lcd_init(void) 222
223static int __init locomolcd_init(void)
144{ 224{
145 int ret = locomo_driver_register(&poodle_lcd_driver); 225 int ret = locomo_driver_register(&poodle_lcd_driver);
146 if (ret) return ret; 226 if (ret)
227 return ret;
147 228
148#ifdef CONFIG_SA1100_COLLIE 229#ifdef CONFIG_SA1100_COLLIE
149 sa1100fb_lcd_power = locomolcd_power; 230 sa1100fb_lcd_power = locomolcd_power;
150#endif 231#endif
151 return 0; 232 return 0;
152} 233}
153device_initcall(poodle_lcd_init);
154 234
235static void __exit locomolcd_exit(void)
236{
237 locomo_driver_unregister(&poodle_lcd_driver);
238}
239
240module_init(locomolcd_init);
241module_exit(locomolcd_exit);
242
243MODULE_AUTHOR("John Lenz <lenz@cs.wisc.edu>, Pavel Machek <pavel@suse.cz>");
244MODULE_DESCRIPTION("Collie LCD driver");
245MODULE_LICENSE("GPL");
diff --git a/drivers/video/cfbimgblt.c b/drivers/video/cfbimgblt.c
index 8ba6152db2fd..ad8a89bf8eae 100644
--- a/drivers/video/cfbimgblt.c
+++ b/drivers/video/cfbimgblt.c
@@ -230,6 +230,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
230 tab = cfb_tab16; 230 tab = cfb_tab16;
231 break; 231 break;
232 case 32: 232 case 32:
233 default:
233 tab = cfb_tab32; 234 tab = cfb_tab32;
234 break; 235 break;
235 } 236 }
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index 1103010af54a..dda240eb7360 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -2227,7 +2227,6 @@ static void cirrusfb_pci_unmap (struct cirrusfb_info *cinfo)
2227 release_region(0x3C0, 32); 2227 release_region(0x3C0, 32);
2228 pci_release_regions(pdev); 2228 pci_release_regions(pdev);
2229 framebuffer_release(cinfo->info); 2229 framebuffer_release(cinfo->info);
2230 pci_disable_device(pdev);
2231} 2230}
2232#endif /* CONFIG_PCI */ 2231#endif /* CONFIG_PCI */
2233 2232
@@ -2458,7 +2457,6 @@ err_release_regions:
2458err_release_fb: 2457err_release_fb:
2459 framebuffer_release(info); 2458 framebuffer_release(info);
2460err_disable: 2459err_disable:
2461 pci_disable_device(pdev);
2462err_out: 2460err_out:
2463 return ret; 2461 return ret;
2464} 2462}
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 47ba1a79adcd..5dc4083552d8 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -125,6 +125,8 @@ static int softback_lines;
125static int first_fb_vc; 125static int first_fb_vc;
126static int last_fb_vc = MAX_NR_CONSOLES - 1; 126static int last_fb_vc = MAX_NR_CONSOLES - 1;
127static int fbcon_is_default = 1; 127static int fbcon_is_default = 1;
128static int fbcon_has_exited;
129
128/* font data */ 130/* font data */
129static char fontname[40]; 131static char fontname[40];
130 132
@@ -140,7 +142,6 @@ static const struct consw fb_con;
140 142
141#define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row) 143#define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row)
142 144
143static void fbcon_free_font(struct display *);
144static int fbcon_set_origin(struct vc_data *); 145static int fbcon_set_origin(struct vc_data *);
145 146
146#define CURSOR_DRAW_DELAY (1) 147#define CURSOR_DRAW_DELAY (1)
@@ -194,6 +195,9 @@ static void fbcon_redraw_move(struct vc_data *vc, struct display *p,
194 int line, int count, int dy); 195 int line, int count, int dy);
195static void fbcon_modechanged(struct fb_info *info); 196static void fbcon_modechanged(struct fb_info *info);
196static void fbcon_set_all_vcs(struct fb_info *info); 197static void fbcon_set_all_vcs(struct fb_info *info);
198static void fbcon_start(void);
199static void fbcon_exit(void);
200static struct class_device *fbcon_class_device;
197 201
198#ifdef CONFIG_MAC 202#ifdef CONFIG_MAC
199/* 203/*
@@ -252,7 +256,7 @@ static void fbcon_rotate_all(struct fb_info *info, u32 rotate)
252 if (!ops || ops->currcon < 0 || rotate > 3) 256 if (!ops || ops->currcon < 0 || rotate > 3)
253 return; 257 return;
254 258
255 for (i = 0; i < MAX_NR_CONSOLES; i++) { 259 for (i = first_fb_vc; i <= last_fb_vc; i++) {
256 vc = vc_cons[i].d; 260 vc = vc_cons[i].d;
257 if (!vc || vc->vc_mode != KD_TEXT || 261 if (!vc || vc->vc_mode != KD_TEXT ||
258 registered_fb[con2fb_map[i]] != info) 262 registered_fb[con2fb_map[i]] != info)
@@ -389,15 +393,18 @@ static void fb_flashcursor(void *private)
389 int c; 393 int c;
390 int mode; 394 int mode;
391 395
392 if (ops->currcon != -1) 396 acquire_console_sem();
397 if (ops && ops->currcon != -1)
393 vc = vc_cons[ops->currcon].d; 398 vc = vc_cons[ops->currcon].d;
394 399
395 if (!vc || !CON_IS_VISIBLE(vc) || 400 if (!vc || !CON_IS_VISIBLE(vc) ||
396 fbcon_is_inactive(vc, info) || 401 fbcon_is_inactive(vc, info) ||
397 registered_fb[con2fb_map[vc->vc_num]] != info || 402 registered_fb[con2fb_map[vc->vc_num]] != info ||
398 vc_cons[ops->currcon].d->vc_deccm != 1) 403 vc_cons[ops->currcon].d->vc_deccm != 1) {
404 release_console_sem();
399 return; 405 return;
400 acquire_console_sem(); 406 }
407
401 p = &fb_display[vc->vc_num]; 408 p = &fb_display[vc->vc_num];
402 c = scr_readw((u16 *) vc->vc_pos); 409 c = scr_readw((u16 *) vc->vc_pos);
403 mode = (!ops->cursor_flash || ops->cursor_state.enable) ? 410 mode = (!ops->cursor_flash || ops->cursor_state.enable) ?
@@ -528,7 +535,7 @@ static int search_fb_in_map(int idx)
528{ 535{
529 int i, retval = 0; 536 int i, retval = 0;
530 537
531 for (i = 0; i < MAX_NR_CONSOLES; i++) { 538 for (i = first_fb_vc; i <= last_fb_vc; i++) {
532 if (con2fb_map[i] == idx) 539 if (con2fb_map[i] == idx)
533 retval = 1; 540 retval = 1;
534 } 541 }
@@ -539,7 +546,7 @@ static int search_for_mapped_con(void)
539{ 546{
540 int i, retval = 0; 547 int i, retval = 0;
541 548
542 for (i = 0; i < MAX_NR_CONSOLES; i++) { 549 for (i = first_fb_vc; i <= last_fb_vc; i++) {
543 if (con2fb_map[i] != -1) 550 if (con2fb_map[i] != -1)
544 retval = 1; 551 retval = 1;
545 } 552 }
@@ -561,6 +568,7 @@ static int fbcon_takeover(int show_logo)
561 568
562 err = take_over_console(&fb_con, first_fb_vc, last_fb_vc, 569 err = take_over_console(&fb_con, first_fb_vc, last_fb_vc,
563 fbcon_is_default); 570 fbcon_is_default);
571
564 if (err) { 572 if (err) {
565 for (i = first_fb_vc; i <= last_fb_vc; i++) { 573 for (i = first_fb_vc; i <= last_fb_vc; i++) {
566 con2fb_map[i] = -1; 574 con2fb_map[i] = -1;
@@ -795,8 +803,8 @@ static int set_con2fb_map(int unit, int newidx, int user)
795 if (oldidx == newidx) 803 if (oldidx == newidx)
796 return 0; 804 return 0;
797 805
798 if (!info) 806 if (!info || fbcon_has_exited)
799 err = -EINVAL; 807 return -EINVAL;
800 808
801 if (!err && !search_for_mapped_con()) { 809 if (!err && !search_for_mapped_con()) {
802 info_idx = newidx; 810 info_idx = newidx;
@@ -832,6 +840,9 @@ static int set_con2fb_map(int unit, int newidx, int user)
832 con2fb_init_display(vc, info, unit, show_logo); 840 con2fb_init_display(vc, info, unit, show_logo);
833 } 841 }
834 842
843 if (!search_fb_in_map(info_idx))
844 info_idx = newidx;
845
835 release_console_sem(); 846 release_console_sem();
836 return err; 847 return err;
837} 848}
@@ -1034,6 +1045,7 @@ static const char *fbcon_startup(void)
1034#endif /* CONFIG_MAC */ 1045#endif /* CONFIG_MAC */
1035 1046
1036 fbcon_add_cursor_timer(info); 1047 fbcon_add_cursor_timer(info);
1048 fbcon_has_exited = 0;
1037 return display_desc; 1049 return display_desc;
1038} 1050}
1039 1051
@@ -1061,17 +1073,36 @@ static void fbcon_init(struct vc_data *vc, int init)
1061 1073
1062 /* If we are not the first console on this 1074 /* If we are not the first console on this
1063 fb, copy the font from that console */ 1075 fb, copy the font from that console */
1064 t = &fb_display[svc->vc_num]; 1076 t = &fb_display[fg_console];
1065 if (!vc->vc_font.data) { 1077 if (!p->fontdata) {
1066 vc->vc_font.data = (void *)(p->fontdata = t->fontdata); 1078 if (t->fontdata) {
1067 vc->vc_font.width = (*default_mode)->vc_font.width; 1079 struct vc_data *fvc = vc_cons[fg_console].d;
1068 vc->vc_font.height = (*default_mode)->vc_font.height; 1080
1069 p->userfont = t->userfont; 1081 vc->vc_font.data = (void *)(p->fontdata =
1070 if (p->userfont) 1082 fvc->vc_font.data);
1071 REFCOUNT(p->fontdata)++; 1083 vc->vc_font.width = fvc->vc_font.width;
1084 vc->vc_font.height = fvc->vc_font.height;
1085 p->userfont = t->userfont;
1086
1087 if (p->userfont)
1088 REFCOUNT(p->fontdata)++;
1089 } else {
1090 const struct font_desc *font = NULL;
1091
1092 if (!fontname[0] || !(font = find_font(fontname)))
1093 font = get_default_font(info->var.xres,
1094 info->var.yres);
1095 vc->vc_font.width = font->width;
1096 vc->vc_font.height = font->height;
1097 vc->vc_font.data = (void *)(p->fontdata = font->data);
1098 vc->vc_font.charcount = 256; /* FIXME Need to
1099 support more fonts */
1100 }
1072 } 1101 }
1102
1073 if (p->userfont) 1103 if (p->userfont)
1074 charcnt = FNTCHARCNT(p->fontdata); 1104 charcnt = FNTCHARCNT(p->fontdata);
1105
1075 vc->vc_can_do_color = (fb_get_color_depth(&info->var, &info->fix)!=1); 1106 vc->vc_can_do_color = (fb_get_color_depth(&info->var, &info->fix)!=1);
1076 vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800; 1107 vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
1077 if (charcnt == 256) { 1108 if (charcnt == 256) {
@@ -1145,13 +1176,47 @@ static void fbcon_init(struct vc_data *vc, int init)
1145 ops->p = &fb_display[fg_console]; 1176 ops->p = &fb_display[fg_console];
1146} 1177}
1147 1178
1179static void fbcon_free_font(struct display *p)
1180{
1181 if (p->userfont && p->fontdata && (--REFCOUNT(p->fontdata) == 0))
1182 kfree(p->fontdata - FONT_EXTRA_WORDS * sizeof(int));
1183 p->fontdata = NULL;
1184 p->userfont = 0;
1185}
1186
1148static void fbcon_deinit(struct vc_data *vc) 1187static void fbcon_deinit(struct vc_data *vc)
1149{ 1188{
1150 struct display *p = &fb_display[vc->vc_num]; 1189 struct display *p = &fb_display[vc->vc_num];
1190 struct fb_info *info;
1191 struct fbcon_ops *ops;
1192 int idx;
1151 1193
1152 if (info_idx != -1)
1153 return;
1154 fbcon_free_font(p); 1194 fbcon_free_font(p);
1195 idx = con2fb_map[vc->vc_num];
1196
1197 if (idx == -1)
1198 goto finished;
1199
1200 info = registered_fb[idx];
1201
1202 if (!info)
1203 goto finished;
1204
1205 ops = info->fbcon_par;
1206
1207 if (!ops)
1208 goto finished;
1209
1210 if (CON_IS_VISIBLE(vc))
1211 fbcon_del_cursor_timer(info);
1212
1213 ops->flags &= ~FBCON_FLAGS_INIT;
1214finished:
1215
1216 if (!con_is_bound(&fb_con))
1217 fbcon_exit();
1218
1219 return;
1155} 1220}
1156 1221
1157/* ====================================================================== */ 1222/* ====================================================================== */
@@ -2099,12 +2164,11 @@ static int fbcon_switch(struct vc_data *vc)
2099 if (info->fbops->fb_set_par) 2164 if (info->fbops->fb_set_par)
2100 info->fbops->fb_set_par(info); 2165 info->fbops->fb_set_par(info);
2101 2166
2102 if (old_info != info) { 2167 if (old_info != info)
2103 fbcon_del_cursor_timer(old_info); 2168 fbcon_del_cursor_timer(old_info);
2104 fbcon_add_cursor_timer(info);
2105 }
2106 } 2169 }
2107 2170
2171 fbcon_add_cursor_timer(info);
2108 set_blitting_type(vc, info); 2172 set_blitting_type(vc, info);
2109 ops->cursor_reset = 1; 2173 ops->cursor_reset = 1;
2110 2174
@@ -2222,14 +2286,6 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
2222 return 0; 2286 return 0;
2223} 2287}
2224 2288
2225static void fbcon_free_font(struct display *p)
2226{
2227 if (p->userfont && p->fontdata && (--REFCOUNT(p->fontdata) == 0))
2228 kfree(p->fontdata - FONT_EXTRA_WORDS * sizeof(int));
2229 p->fontdata = NULL;
2230 p->userfont = 0;
2231}
2232
2233static int fbcon_get_font(struct vc_data *vc, struct console_font *font) 2289static int fbcon_get_font(struct vc_data *vc, struct console_font *font)
2234{ 2290{
2235 u8 *fontdata = vc->vc_font.data; 2291 u8 *fontdata = vc->vc_font.data;
@@ -2443,7 +2499,7 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, unsigne
2443 2499
2444 FNTSUM(new_data) = csum; 2500 FNTSUM(new_data) = csum;
2445 /* Check if the same font is on some other console already */ 2501 /* Check if the same font is on some other console already */
2446 for (i = 0; i < MAX_NR_CONSOLES; i++) { 2502 for (i = first_fb_vc; i <= last_fb_vc; i++) {
2447 struct vc_data *tmp = vc_cons[i].d; 2503 struct vc_data *tmp = vc_cons[i].d;
2448 2504
2449 if (fb_display[i].userfont && 2505 if (fb_display[i].userfont &&
@@ -2768,7 +2824,7 @@ static void fbcon_set_all_vcs(struct fb_info *info)
2768 if (!ops || ops->currcon < 0) 2824 if (!ops || ops->currcon < 0)
2769 return; 2825 return;
2770 2826
2771 for (i = 0; i < MAX_NR_CONSOLES; i++) { 2827 for (i = first_fb_vc; i <= last_fb_vc; i++) {
2772 vc = vc_cons[i].d; 2828 vc = vc_cons[i].d;
2773 if (!vc || vc->vc_mode != KD_TEXT || 2829 if (!vc || vc->vc_mode != KD_TEXT ||
2774 registered_fb[con2fb_map[i]] != info) 2830 registered_fb[con2fb_map[i]] != info)
@@ -2830,22 +2886,57 @@ static int fbcon_mode_deleted(struct fb_info *info,
2830 return found; 2886 return found;
2831} 2887}
2832 2888
2889static int fbcon_fb_unregistered(int idx)
2890{
2891 int i;
2892
2893 for (i = first_fb_vc; i <= last_fb_vc; i++) {
2894 if (con2fb_map[i] == idx)
2895 con2fb_map[i] = -1;
2896 }
2897
2898 if (idx == info_idx) {
2899 info_idx = -1;
2900
2901 for (i = 0; i < FB_MAX; i++) {
2902 if (registered_fb[i] != NULL) {
2903 info_idx = i;
2904 break;
2905 }
2906 }
2907 }
2908
2909 if (info_idx != -1) {
2910 for (i = first_fb_vc; i <= last_fb_vc; i++) {
2911 if (con2fb_map[i] == -1)
2912 con2fb_map[i] = info_idx;
2913 }
2914 }
2915
2916 if (!num_registered_fb)
2917 unregister_con_driver(&fb_con);
2918
2919 return 0;
2920}
2921
2833static int fbcon_fb_registered(int idx) 2922static int fbcon_fb_registered(int idx)
2834{ 2923{
2835 int ret = 0, i; 2924 int ret = 0, i;
2836 2925
2837 if (info_idx == -1) { 2926 if (info_idx == -1) {
2838 for (i = 0; i < MAX_NR_CONSOLES; i++) { 2927 for (i = first_fb_vc; i <= last_fb_vc; i++) {
2839 if (con2fb_map_boot[i] == idx) { 2928 if (con2fb_map_boot[i] == idx) {
2840 info_idx = idx; 2929 info_idx = idx;
2841 break; 2930 break;
2842 } 2931 }
2843 } 2932 }
2933
2844 if (info_idx != -1) 2934 if (info_idx != -1)
2845 ret = fbcon_takeover(1); 2935 ret = fbcon_takeover(1);
2846 } else { 2936 } else {
2847 for (i = 0; i < MAX_NR_CONSOLES; i++) { 2937 for (i = first_fb_vc; i <= last_fb_vc; i++) {
2848 if (con2fb_map_boot[i] == idx) 2938 if (con2fb_map_boot[i] == idx &&
2939 con2fb_map[i] == -1)
2849 set_con2fb_map(i, idx, 0); 2940 set_con2fb_map(i, idx, 0);
2850 } 2941 }
2851 } 2942 }
@@ -2882,7 +2973,7 @@ static void fbcon_new_modelist(struct fb_info *info)
2882 struct fb_var_screeninfo var; 2973 struct fb_var_screeninfo var;
2883 struct fb_videomode *mode; 2974 struct fb_videomode *mode;
2884 2975
2885 for (i = 0; i < MAX_NR_CONSOLES; i++) { 2976 for (i = first_fb_vc; i <= last_fb_vc; i++) {
2886 if (registered_fb[con2fb_map[i]] != info) 2977 if (registered_fb[con2fb_map[i]] != info)
2887 continue; 2978 continue;
2888 if (!fb_display[i].mode) 2979 if (!fb_display[i].mode)
@@ -2910,6 +3001,14 @@ static int fbcon_event_notify(struct notifier_block *self,
2910 struct fb_con2fbmap *con2fb; 3001 struct fb_con2fbmap *con2fb;
2911 int ret = 0; 3002 int ret = 0;
2912 3003
3004 /*
3005 * ignore all events except driver registration and deregistration
3006 * if fbcon is not active
3007 */
3008 if (fbcon_has_exited && !(action == FB_EVENT_FB_REGISTERED ||
3009 action == FB_EVENT_FB_UNREGISTERED))
3010 goto done;
3011
2913 switch(action) { 3012 switch(action) {
2914 case FB_EVENT_SUSPEND: 3013 case FB_EVENT_SUSPEND:
2915 fbcon_suspended(info); 3014 fbcon_suspended(info);
@@ -2930,6 +3029,9 @@ static int fbcon_event_notify(struct notifier_block *self,
2930 case FB_EVENT_FB_REGISTERED: 3029 case FB_EVENT_FB_REGISTERED:
2931 ret = fbcon_fb_registered(info->node); 3030 ret = fbcon_fb_registered(info->node);
2932 break; 3031 break;
3032 case FB_EVENT_FB_UNREGISTERED:
3033 ret = fbcon_fb_unregistered(info->node);
3034 break;
2933 case FB_EVENT_SET_CONSOLE_MAP: 3035 case FB_EVENT_SET_CONSOLE_MAP:
2934 con2fb = event->data; 3036 con2fb = event->data;
2935 ret = set_con2fb_map(con2fb->console - 1, 3037 ret = set_con2fb_map(con2fb->console - 1,
@@ -2945,16 +3047,9 @@ static int fbcon_event_notify(struct notifier_block *self,
2945 case FB_EVENT_NEW_MODELIST: 3047 case FB_EVENT_NEW_MODELIST:
2946 fbcon_new_modelist(info); 3048 fbcon_new_modelist(info);
2947 break; 3049 break;
2948 case FB_EVENT_SET_CON_ROTATE:
2949 fbcon_rotate(info, *(int *)event->data);
2950 break;
2951 case FB_EVENT_GET_CON_ROTATE:
2952 ret = fbcon_get_rotate(info);
2953 break;
2954 case FB_EVENT_SET_CON_ROTATE_ALL:
2955 fbcon_rotate_all(info, *(int *)event->data);
2956 } 3050 }
2957 3051
3052done:
2958 return ret; 3053 return ret;
2959} 3054}
2960 3055
@@ -2992,27 +3087,181 @@ static struct notifier_block fbcon_event_notifier = {
2992 .notifier_call = fbcon_event_notify, 3087 .notifier_call = fbcon_event_notify,
2993}; 3088};
2994 3089
2995static int __init fb_console_init(void) 3090static ssize_t store_rotate(struct class_device *class_device,
3091 const char *buf, size_t count)
2996{ 3092{
2997 int i; 3093 struct fb_info *info;
3094 int rotate, idx;
3095 char **last = NULL;
3096
3097 if (fbcon_has_exited)
3098 return count;
2998 3099
2999 acquire_console_sem(); 3100 acquire_console_sem();
3000 fb_register_client(&fbcon_event_notifier); 3101 idx = con2fb_map[fg_console];
3102
3103 if (idx == -1 || registered_fb[idx] == NULL)
3104 goto err;
3105
3106 info = registered_fb[idx];
3107 rotate = simple_strtoul(buf, last, 0);
3108 fbcon_rotate(info, rotate);
3109err:
3001 release_console_sem(); 3110 release_console_sem();
3111 return count;
3112}
3002 3113
3003 for (i = 0; i < MAX_NR_CONSOLES; i++) 3114static ssize_t store_rotate_all(struct class_device *class_device,
3004 con2fb_map[i] = -1; 3115 const char *buf, size_t count)
3116{
3117 struct fb_info *info;
3118 int rotate, idx;
3119 char **last = NULL;
3120
3121 if (fbcon_has_exited)
3122 return count;
3123
3124 acquire_console_sem();
3125 idx = con2fb_map[fg_console];
3126
3127 if (idx == -1 || registered_fb[idx] == NULL)
3128 goto err;
3005 3129
3130 info = registered_fb[idx];
3131 rotate = simple_strtoul(buf, last, 0);
3132 fbcon_rotate_all(info, rotate);
3133err:
3134 release_console_sem();
3135 return count;
3136}
3137
3138static ssize_t show_rotate(struct class_device *class_device, char *buf)
3139{
3140 struct fb_info *info;
3141 int rotate = 0, idx;
3142
3143 if (fbcon_has_exited)
3144 return 0;
3145
3146 acquire_console_sem();
3147 idx = con2fb_map[fg_console];
3148
3149 if (idx == -1 || registered_fb[idx] == NULL)
3150 goto err;
3151
3152 info = registered_fb[idx];
3153 rotate = fbcon_get_rotate(info);
3154err:
3155 release_console_sem();
3156 return snprintf(buf, PAGE_SIZE, "%d\n", rotate);
3157}
3158
3159static struct class_device_attribute class_device_attrs[] = {
3160 __ATTR(rotate, S_IRUGO|S_IWUSR, show_rotate, store_rotate),
3161 __ATTR(rotate_all, S_IWUSR, NULL, store_rotate_all),
3162};
3163
3164static int fbcon_init_class_device(void)
3165{
3166 int i;
3167
3168 for (i = 0; i < ARRAY_SIZE(class_device_attrs); i++)
3169 class_device_create_file(fbcon_class_device,
3170 &class_device_attrs[i]);
3171 return 0;
3172}
3173
3174static void fbcon_start(void)
3175{
3006 if (num_registered_fb) { 3176 if (num_registered_fb) {
3177 int i;
3178
3179 acquire_console_sem();
3180
3007 for (i = 0; i < FB_MAX; i++) { 3181 for (i = 0; i < FB_MAX; i++) {
3008 if (registered_fb[i] != NULL) { 3182 if (registered_fb[i] != NULL) {
3009 info_idx = i; 3183 info_idx = i;
3010 break; 3184 break;
3011 } 3185 }
3012 } 3186 }
3187
3188 release_console_sem();
3013 fbcon_takeover(0); 3189 fbcon_takeover(0);
3014 } 3190 }
3191}
3192
3193static void fbcon_exit(void)
3194{
3195 struct fb_info *info;
3196 int i, j, mapped;
3197
3198 if (fbcon_has_exited)
3199 return;
3200
3201#ifdef CONFIG_ATARI
3202 free_irq(IRQ_AUTO_4, fbcon_vbl_handler);
3203#endif
3204#ifdef CONFIG_MAC
3205 if (MACH_IS_MAC && vbl_detected)
3206 free_irq(IRQ_MAC_VBL, fbcon_vbl_handler);
3207#endif
3208
3209 kfree((void *)softback_buf);
3210 softback_buf = 0UL;
3211
3212 for (i = 0; i < FB_MAX; i++) {
3213 mapped = 0;
3214 info = registered_fb[i];
3215
3216 if (info == NULL)
3217 continue;
3218
3219 for (j = first_fb_vc; j <= last_fb_vc; j++) {
3220 if (con2fb_map[j] == i)
3221 mapped = 1;
3222 }
3223
3224 if (mapped) {
3225 if (info->fbops->fb_release)
3226 info->fbops->fb_release(info, 0);
3227 module_put(info->fbops->owner);
3228
3229 if (info->fbcon_par) {
3230 fbcon_del_cursor_timer(info);
3231 kfree(info->fbcon_par);
3232 info->fbcon_par = NULL;
3233 }
3015 3234
3235 if (info->queue.func == fb_flashcursor)
3236 info->queue.func = NULL;
3237 }
3238 }
3239
3240 fbcon_has_exited = 1;
3241}
3242
3243static int __init fb_console_init(void)
3244{
3245 int i;
3246
3247 acquire_console_sem();
3248 fb_register_client(&fbcon_event_notifier);
3249 fbcon_class_device =
3250 class_device_create(fb_class, NULL, MKDEV(0, 0), NULL, "fbcon");
3251
3252 if (IS_ERR(fbcon_class_device)) {
3253 printk(KERN_WARNING "Unable to create class_device "
3254 "for fbcon; errno = %ld\n",
3255 PTR_ERR(fbcon_class_device));
3256 fbcon_class_device = NULL;
3257 } else
3258 fbcon_init_class_device();
3259
3260 for (i = 0; i < MAX_NR_CONSOLES; i++)
3261 con2fb_map[i] = -1;
3262
3263 release_console_sem();
3264 fbcon_start();
3016 return 0; 3265 return 0;
3017} 3266}
3018 3267
@@ -3020,12 +3269,24 @@ module_init(fb_console_init);
3020 3269
3021#ifdef MODULE 3270#ifdef MODULE
3022 3271
3272static void __exit fbcon_deinit_class_device(void)
3273{
3274 int i;
3275
3276 for (i = 0; i < ARRAY_SIZE(class_device_attrs); i++)
3277 class_device_remove_file(fbcon_class_device,
3278 &class_device_attrs[i]);
3279}
3280
3023static void __exit fb_console_exit(void) 3281static void __exit fb_console_exit(void)
3024{ 3282{
3025 acquire_console_sem(); 3283 acquire_console_sem();
3026 fb_unregister_client(&fbcon_event_notifier); 3284 fb_unregister_client(&fbcon_event_notifier);
3285 fbcon_deinit_class_device();
3286 class_device_destroy(fb_class, MKDEV(0, 0));
3287 fbcon_exit();
3027 release_console_sem(); 3288 release_console_sem();
3028 give_up_console(&fb_con); 3289 unregister_con_driver(&fb_con);
3029} 3290}
3030 3291
3031module_exit(fb_console_exit); 3292module_exit(fb_console_exit);
diff --git a/drivers/video/console/fbcon.h b/drivers/video/console/fbcon.h
index c38c3d8e7a74..3487a636370a 100644
--- a/drivers/video/console/fbcon.h
+++ b/drivers/video/console/fbcon.h
@@ -175,6 +175,7 @@ extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info);
175#endif 175#endif
176extern void fbcon_set_bitops(struct fbcon_ops *ops); 176extern void fbcon_set_bitops(struct fbcon_ops *ops);
177extern int soft_cursor(struct fb_info *info, struct fb_cursor *cursor); 177extern int soft_cursor(struct fb_info *info, struct fb_cursor *cursor);
178extern struct class *fb_class;
178 179
179#define FBCON_ATTRIBUTE_UNDERLINE 1 180#define FBCON_ATTRIBUTE_UNDERLINE 1
180#define FBCON_ATTRIBUTE_REVERSE 2 181#define FBCON_ATTRIBUTE_REVERSE 2
diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c
index 7f939d066a5a..c89f90edf8ac 100644
--- a/drivers/video/console/mdacon.c
+++ b/drivers/video/console/mdacon.c
@@ -308,7 +308,7 @@ static void __init mda_initialize(void)
308 outb_p(0x00, mda_gfx_port); 308 outb_p(0x00, mda_gfx_port);
309} 309}
310 310
311static const char __init *mdacon_startup(void) 311static const char *mdacon_startup(void)
312{ 312{
313 mda_num_columns = 80; 313 mda_num_columns = 80;
314 mda_num_lines = 25; 314 mda_num_lines = 25;
diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index e99fe30e568c..03041311711b 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -51,6 +51,7 @@ static int topscan;
51static int xcurs_correction = 29; 51static int xcurs_correction = 29;
52static int newport_xsize; 52static int newport_xsize;
53static int newport_ysize; 53static int newport_ysize;
54static int newport_has_init;
54 55
55static int newport_set_def_font(int unit, struct console_font *op); 56static int newport_set_def_font(int unit, struct console_font *op);
56 57
@@ -283,6 +284,15 @@ static void newport_get_revisions(void)
283 xcurs_correction = 21; 284 xcurs_correction = 21;
284} 285}
285 286
287static void newport_exit(void)
288{
289 int i;
290
291 /* free memory used by user font */
292 for (i = 0; i < MAX_NR_CONSOLES; i++)
293 newport_set_def_font(i, NULL);
294}
295
286/* Can't be __init, take_over_console may call it later */ 296/* Can't be __init, take_over_console may call it later */
287static const char *newport_startup(void) 297static const char *newport_startup(void)
288{ 298{
@@ -290,8 +300,10 @@ static const char *newport_startup(void)
290 300
291 if (!sgi_gfxaddr) 301 if (!sgi_gfxaddr)
292 return NULL; 302 return NULL;
293 npregs = (struct newport_regs *) /* ioremap cannot fail */ 303
294 ioremap(sgi_gfxaddr, sizeof(struct newport_regs)); 304 if (!npregs)
305 npregs = (struct newport_regs *)/* ioremap cannot fail */
306 ioremap(sgi_gfxaddr, sizeof(struct newport_regs));
295 npregs->cset.config = NPORT_CFG_GD0; 307 npregs->cset.config = NPORT_CFG_GD0;
296 308
297 if (newport_wait(npregs)) 309 if (newport_wait(npregs))
@@ -307,11 +319,11 @@ static const char *newport_startup(void)
307 newport_reset(); 319 newport_reset();
308 newport_get_revisions(); 320 newport_get_revisions();
309 newport_get_screensize(); 321 newport_get_screensize();
322 newport_has_init = 1;
310 323
311 return "SGI Newport"; 324 return "SGI Newport";
312 325
313out_unmap: 326out_unmap:
314 iounmap((void *)npregs);
315 return NULL; 327 return NULL;
316} 328}
317 329
@@ -324,11 +336,10 @@ static void newport_init(struct vc_data *vc, int init)
324 336
325static void newport_deinit(struct vc_data *c) 337static void newport_deinit(struct vc_data *c)
326{ 338{
327 int i; 339 if (!con_is_bound(&newport_con) && newport_has_init) {
328 340 newport_exit();
329 /* free memory used by user font */ 341 newport_has_init = 0;
330 for (i = 0; i < MAX_NR_CONSOLES; i++) 342 }
331 newport_set_def_font(i, NULL);
332} 343}
333 344
334static void newport_clear(struct vc_data *vc, int sy, int sx, int height, 345static void newport_clear(struct vc_data *vc, int sy, int sx, int height,
@@ -728,16 +739,23 @@ const struct consw newport_con = {
728#ifdef MODULE 739#ifdef MODULE
729static int __init newport_console_init(void) 740static int __init newport_console_init(void)
730{ 741{
742
743 if (!sgi_gfxaddr)
744 return NULL;
745
746 if (!npregs)
747 npregs = (struct newport_regs *)/* ioremap cannot fail */
748 ioremap(sgi_gfxaddr, sizeof(struct newport_regs));
749
731 return take_over_console(&newport_con, 0, MAX_NR_CONSOLES - 1, 1); 750 return take_over_console(&newport_con, 0, MAX_NR_CONSOLES - 1, 1);
732} 751}
752module_init(newport_console_init);
733 753
734static void __exit newport_console_exit(void) 754static void __exit newport_console_exit(void)
735{ 755{
736 give_up_console(&newport_con); 756 give_up_console(&newport_con);
737 iounmap((void *)npregs); 757 iounmap((void *)npregs);
738} 758}
739
740module_init(newport_console_init);
741module_exit(newport_console_exit); 759module_exit(newport_console_exit);
742#endif 760#endif
743 761
diff --git a/drivers/video/console/promcon.c b/drivers/video/console/promcon.c
index 04f42fcaac59..d6e6ad537f9f 100644
--- a/drivers/video/console/promcon.c
+++ b/drivers/video/console/promcon.c
@@ -109,7 +109,7 @@ promcon_end(struct vc_data *conp, char *b)
109 return b - p; 109 return b - p;
110} 110}
111 111
112const char __init *promcon_startup(void) 112const char *promcon_startup(void)
113{ 113{
114 const char *display_desc = "PROM"; 114 const char *display_desc = "PROM";
115 int node; 115 int node;
@@ -133,7 +133,7 @@ const char __init *promcon_startup(void)
133 return display_desc; 133 return display_desc;
134} 134}
135 135
136static void __init 136static void
137promcon_init_unimap(struct vc_data *conp) 137promcon_init_unimap(struct vc_data *conp)
138{ 138{
139 mm_segment_t old_fs = get_fs(); 139 mm_segment_t old_fs = get_fs();
diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
index fd5940f41271..45c4f227e56e 100644
--- a/drivers/video/console/sticon.c
+++ b/drivers/video/console/sticon.c
@@ -75,7 +75,7 @@ static inline void cursor_undrawn(void)
75 cursor_drawn = 0; 75 cursor_drawn = 0;
76} 76}
77 77
78static const char *__init sticon_startup(void) 78static const char *sticon_startup(void)
79{ 79{
80 return "STI console"; 80 return "STI console";
81} 81}
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index e64d42e2449e..f32b590730f2 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -114,6 +114,7 @@ static int vga_512_chars;
114static int vga_video_font_height; 114static int vga_video_font_height;
115static int vga_scan_lines; 115static int vga_scan_lines;
116static unsigned int vga_rolled_over = 0; 116static unsigned int vga_rolled_over = 0;
117static int vga_init_done;
117 118
118static int __init no_scroll(char *str) 119static int __init no_scroll(char *str)
119{ 120{
@@ -190,7 +191,7 @@ static void vgacon_scrollback_init(int pitch)
190 } 191 }
191} 192}
192 193
193static void __init vgacon_scrollback_startup(void) 194static void vgacon_scrollback_startup(void)
194{ 195{
195 vgacon_scrollback = alloc_bootmem(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE 196 vgacon_scrollback = alloc_bootmem(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE
196 * 1024); 197 * 1024);
@@ -355,7 +356,7 @@ static int vgacon_scrolldelta(struct vc_data *c, int lines)
355} 356}
356#endif /* CONFIG_VGACON_SOFT_SCROLLBACK */ 357#endif /* CONFIG_VGACON_SOFT_SCROLLBACK */
357 358
358static const char __init *vgacon_startup(void) 359static const char *vgacon_startup(void)
359{ 360{
360 const char *display_desc = NULL; 361 const char *display_desc = NULL;
361 u16 saved1, saved2; 362 u16 saved1, saved2;
@@ -523,7 +524,12 @@ static const char __init *vgacon_startup(void)
523 524
524 vgacon_xres = ORIG_VIDEO_COLS * VGA_FONTWIDTH; 525 vgacon_xres = ORIG_VIDEO_COLS * VGA_FONTWIDTH;
525 vgacon_yres = vga_scan_lines; 526 vgacon_yres = vga_scan_lines;
526 vgacon_scrollback_startup(); 527
528 if (!vga_init_done) {
529 vgacon_scrollback_startup();
530 vga_init_done = 1;
531 }
532
527 return display_desc; 533 return display_desc;
528} 534}
529 535
@@ -531,10 +537,20 @@ static void vgacon_init(struct vc_data *c, int init)
531{ 537{
532 unsigned long p; 538 unsigned long p;
533 539
534 /* We cannot be loaded as a module, therefore init is always 1 */ 540 /*
541 * We cannot be loaded as a module, therefore init is always 1,
542 * but vgacon_init can be called more than once, and init will
543 * not be 1.
544 */
535 c->vc_can_do_color = vga_can_do_color; 545 c->vc_can_do_color = vga_can_do_color;
536 c->vc_cols = vga_video_num_columns; 546
537 c->vc_rows = vga_video_num_lines; 547 /* set dimensions manually if init != 0 since vc_resize() will fail */
548 if (init) {
549 c->vc_cols = vga_video_num_columns;
550 c->vc_rows = vga_video_num_lines;
551 } else
552 vc_resize(c, vga_video_num_columns, vga_video_num_lines);
553
538 c->vc_scan_lines = vga_scan_lines; 554 c->vc_scan_lines = vga_scan_lines;
539 c->vc_font.height = vga_video_font_height; 555 c->vc_font.height = vga_video_font_height;
540 c->vc_complement_mask = 0x7700; 556 c->vc_complement_mask = 0x7700;
diff --git a/drivers/video/epson1355fb.c b/drivers/video/epson1355fb.c
index 082759447bf6..f0a621ecc288 100644
--- a/drivers/video/epson1355fb.c
+++ b/drivers/video/epson1355fb.c
@@ -605,11 +605,6 @@ static void clearfb16(struct fb_info *info)
605 fb_writeb(0, dst); 605 fb_writeb(0, dst);
606} 606}
607 607
608static void epson1355fb_platform_release(struct device *device)
609{
610 dev_err(device, "This driver is broken, please bug the authors so they will fix it.\n");
611}
612
613static int epson1355fb_remove(struct platform_device *dev) 608static int epson1355fb_remove(struct platform_device *dev)
614{ 609{
615 struct fb_info *info = platform_get_drvdata(dev); 610 struct fb_info *info = platform_get_drvdata(dev);
@@ -733,13 +728,7 @@ static struct platform_driver epson1355fb_driver = {
733 }, 728 },
734}; 729};
735 730
736static struct platform_device epson1355fb_device = { 731static struct platform_device *epson1355fb_device;
737 .name = "epson1355fb",
738 .id = 0,
739 .dev = {
740 .release = epson1355fb_platform_release,
741 }
742};
743 732
744int __init epson1355fb_init(void) 733int __init epson1355fb_init(void)
745{ 734{
@@ -749,11 +738,21 @@ int __init epson1355fb_init(void)
749 return -ENODEV; 738 return -ENODEV;
750 739
751 ret = platform_driver_register(&epson1355fb_driver); 740 ret = platform_driver_register(&epson1355fb_driver);
741
752 if (!ret) { 742 if (!ret) {
753 ret = platform_device_register(&epson1355fb_device); 743 epson1355fb_device = platform_device_alloc("epson1355fb", 0);
754 if (ret) 744
745 if (epson1355fb_device)
746 ret = platform_device_add(epson1355fb_device);
747 else
748 ret = -ENOMEM;
749
750 if (ret) {
751 platform_device_put(epson1355fb_device);
755 platform_driver_unregister(&epson1355fb_driver); 752 platform_driver_unregister(&epson1355fb_driver);
753 }
756 } 754 }
755
757 return ret; 756 return ret;
758} 757}
759 758
@@ -762,7 +761,7 @@ module_init(epson1355fb_init);
762#ifdef MODULE 761#ifdef MODULE
763static void __exit epson1355fb_exit(void) 762static void __exit epson1355fb_exit(void)
764{ 763{
765 platform_device_unregister(&epson1355fb_device); 764 platform_device_unregister(epson1355fb_device);
766 platform_driver_unregister(&epson1355fb_driver); 765 platform_driver_unregister(&epson1355fb_driver);
767} 766}
768 767
diff --git a/drivers/video/fbcvt.c b/drivers/video/fbcvt.c
index ac90883dc3aa..b5498999c4ec 100644
--- a/drivers/video/fbcvt.c
+++ b/drivers/video/fbcvt.c
@@ -376,4 +376,3 @@ int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb)
376 376
377 return 0; 377 return 0;
378} 378}
379EXPORT_SYMBOL(fb_find_mode_cvt);
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 372aa1776827..31143afe7c95 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -34,7 +34,6 @@
34#endif 34#endif
35#include <linux/devfs_fs_kernel.h> 35#include <linux/devfs_fs_kernel.h>
36#include <linux/err.h> 36#include <linux/err.h>
37#include <linux/kernel.h>
38#include <linux/device.h> 37#include <linux/device.h>
39#include <linux/efi.h> 38#include <linux/efi.h>
40 39
@@ -162,7 +161,6 @@ char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size
162} 161}
163 162
164#ifdef CONFIG_LOGO 163#ifdef CONFIG_LOGO
165#include <linux/linux_logo.h>
166 164
167static inline unsigned safe_shift(unsigned d, int n) 165static inline unsigned safe_shift(unsigned d, int n)
168{ 166{
@@ -336,11 +334,11 @@ static void fb_rotate_logo_ud(const u8 *in, u8 *out, u32 width, u32 height)
336 334
337static void fb_rotate_logo_cw(const u8 *in, u8 *out, u32 width, u32 height) 335static void fb_rotate_logo_cw(const u8 *in, u8 *out, u32 width, u32 height)
338{ 336{
339 int i, j, w = width - 1; 337 int i, j, h = height - 1;
340 338
341 for (i = 0; i < height; i++) 339 for (i = 0; i < height; i++)
342 for (j = 0; j < width; j++) 340 for (j = 0; j < width; j++)
343 out[height * j + w - i] = *in++; 341 out[height * j + h - i] = *in++;
344} 342}
345 343
346static void fb_rotate_logo_ccw(const u8 *in, u8 *out, u32 width, u32 height) 344static void fb_rotate_logo_ccw(const u8 *in, u8 *out, u32 width, u32 height)
@@ -358,24 +356,24 @@ static void fb_rotate_logo(struct fb_info *info, u8 *dst,
358 u32 tmp; 356 u32 tmp;
359 357
360 if (rotate == FB_ROTATE_UD) { 358 if (rotate == FB_ROTATE_UD) {
361 image->dx = info->var.xres - image->width;
362 image->dy = info->var.yres - image->height;
363 fb_rotate_logo_ud(image->data, dst, image->width, 359 fb_rotate_logo_ud(image->data, dst, image->width,
364 image->height); 360 image->height);
361 image->dx = info->var.xres - image->width;
362 image->dy = info->var.yres - image->height;
365 } else if (rotate == FB_ROTATE_CW) { 363 } else if (rotate == FB_ROTATE_CW) {
366 tmp = image->width;
367 image->width = image->height;
368 image->height = tmp;
369 image->dx = info->var.xres - image->height;
370 fb_rotate_logo_cw(image->data, dst, image->width, 364 fb_rotate_logo_cw(image->data, dst, image->width,
371 image->height); 365 image->height);
372 } else if (rotate == FB_ROTATE_CCW) {
373 tmp = image->width; 366 tmp = image->width;
374 image->width = image->height; 367 image->width = image->height;
375 image->height = tmp; 368 image->height = tmp;
376 image->dy = info->var.yres - image->width; 369 image->dx = info->var.xres - image->width;
370 } else if (rotate == FB_ROTATE_CCW) {
377 fb_rotate_logo_ccw(image->data, dst, image->width, 371 fb_rotate_logo_ccw(image->data, dst, image->width,
378 image->height); 372 image->height);
373 tmp = image->width;
374 image->width = image->height;
375 image->height = tmp;
376 image->dy = info->var.yres - image->height;
379 } 377 }
380 378
381 image->data = dst; 379 image->data = dst;
@@ -435,7 +433,7 @@ int fb_prepare_logo(struct fb_info *info, int rotate)
435 depth = info->var.green.length; 433 depth = info->var.green.length;
436 } 434 }
437 435
438 if (info->fix.visual == FB_VISUAL_STATIC_PSEUDOCOLOR) { 436 if (info->fix.visual == FB_VISUAL_STATIC_PSEUDOCOLOR && depth > 4) {
439 /* assume console colormap */ 437 /* assume console colormap */
440 depth = 4; 438 depth = 4;
441 } 439 }
@@ -1278,8 +1276,8 @@ static struct file_operations fb_fops = {
1278#endif 1276#endif
1279}; 1277};
1280 1278
1281static struct class *fb_class; 1279struct class *fb_class;
1282 1280EXPORT_SYMBOL(fb_class);
1283/** 1281/**
1284 * register_framebuffer - registers a frame buffer device 1282 * register_framebuffer - registers a frame buffer device
1285 * @fb_info: frame buffer info structure 1283 * @fb_info: frame buffer info structure
@@ -1355,6 +1353,7 @@ register_framebuffer(struct fb_info *fb_info)
1355int 1353int
1356unregister_framebuffer(struct fb_info *fb_info) 1354unregister_framebuffer(struct fb_info *fb_info)
1357{ 1355{
1356 struct fb_event event;
1358 int i; 1357 int i;
1359 1358
1360 i = fb_info->node; 1359 i = fb_info->node;
@@ -1362,13 +1361,17 @@ unregister_framebuffer(struct fb_info *fb_info)
1362 return -EINVAL; 1361 return -EINVAL;
1363 devfs_remove("fb/%d", i); 1362 devfs_remove("fb/%d", i);
1364 1363
1365 if (fb_info->pixmap.addr && (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) 1364 if (fb_info->pixmap.addr &&
1365 (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
1366 kfree(fb_info->pixmap.addr); 1366 kfree(fb_info->pixmap.addr);
1367 fb_destroy_modelist(&fb_info->modelist); 1367 fb_destroy_modelist(&fb_info->modelist);
1368 registered_fb[i]=NULL; 1368 registered_fb[i]=NULL;
1369 num_registered_fb--; 1369 num_registered_fb--;
1370 fb_cleanup_class_device(fb_info); 1370 fb_cleanup_class_device(fb_info);
1371 class_device_destroy(fb_class, MKDEV(FB_MAJOR, i)); 1371 class_device_destroy(fb_class, MKDEV(FB_MAJOR, i));
1372 event.info = fb_info;
1373 blocking_notifier_call_chain(&fb_notifier_list,
1374 FB_EVENT_FB_UNREGISTERED, &event);
1372 return 0; 1375 return 0;
1373} 1376}
1374 1377
@@ -1491,28 +1494,6 @@ int fb_new_modelist(struct fb_info *info)
1491 return err; 1494 return err;
1492} 1495}
1493 1496
1494/**
1495 * fb_con_duit - user<->fbcon passthrough
1496 * @info: struct fb_info
1497 * @event: notification event to be passed to fbcon
1498 * @data: private data
1499 *
1500 * DESCRIPTION
1501 * This function is an fbcon-user event passing channel
1502 * which bypasses fbdev. This is hopefully temporary
1503 * until a user interface for fbcon is created
1504 */
1505int fb_con_duit(struct fb_info *info, int event, void *data)
1506{
1507 struct fb_event evnt;
1508
1509 evnt.info = info;
1510 evnt.data = data;
1511
1512 return blocking_notifier_call_chain(&fb_notifier_list, event, &evnt);
1513}
1514EXPORT_SYMBOL(fb_con_duit);
1515
1516static char *video_options[FB_MAX]; 1497static char *video_options[FB_MAX];
1517static int ofonly; 1498static int ofonly;
1518 1499
@@ -1622,6 +1603,5 @@ EXPORT_SYMBOL(fb_set_suspend);
1622EXPORT_SYMBOL(fb_register_client); 1603EXPORT_SYMBOL(fb_register_client);
1623EXPORT_SYMBOL(fb_unregister_client); 1604EXPORT_SYMBOL(fb_unregister_client);
1624EXPORT_SYMBOL(fb_get_options); 1605EXPORT_SYMBOL(fb_get_options);
1625EXPORT_SYMBOL(fb_new_modelist);
1626 1606
1627MODULE_LICENSE("GPL"); 1607MODULE_LICENSE("GPL");
diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c
index 53beeb4a9998..3ccfff715a51 100644
--- a/drivers/video/fbmon.c
+++ b/drivers/video/fbmon.c
@@ -29,9 +29,9 @@
29#include <linux/tty.h> 29#include <linux/tty.h>
30#include <linux/fb.h> 30#include <linux/fb.h>
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/pci.h>
32#include <video/edid.h> 33#include <video/edid.h>
33#ifdef CONFIG_PPC_OF 34#ifdef CONFIG_PPC_OF
34#include <linux/pci.h>
35#include <asm/prom.h> 35#include <asm/prom.h>
36#include <asm/pci-bridge.h> 36#include <asm/pci-bridge.h>
37#endif 37#endif
@@ -605,6 +605,7 @@ static int fb_get_monitor_limits(unsigned char *edid, struct fb_monspecs *specs)
605 block = edid + DETAILED_TIMING_DESCRIPTIONS_START; 605 block = edid + DETAILED_TIMING_DESCRIPTIONS_START;
606 606
607 DPRINTK(" Monitor Operating Limits: "); 607 DPRINTK(" Monitor Operating Limits: ");
608
608 for (i = 0; i < 4; i++, block += DETAILED_TIMING_DESCRIPTION_SIZE) { 609 for (i = 0; i < 4; i++, block += DETAILED_TIMING_DESCRIPTION_SIZE) {
609 if (edid_is_limits_block(block)) { 610 if (edid_is_limits_block(block)) {
610 specs->hfmin = H_MIN_RATE * 1000; 611 specs->hfmin = H_MIN_RATE * 1000;
@@ -618,11 +619,12 @@ static int fb_get_monitor_limits(unsigned char *edid, struct fb_monspecs *specs)
618 break; 619 break;
619 } 620 }
620 } 621 }
621 622
622 /* estimate monitor limits based on modes supported */ 623 /* estimate monitor limits based on modes supported */
623 if (retval) { 624 if (retval) {
624 struct fb_videomode *modes; 625 struct fb_videomode *modes, *mode;
625 int num_modes, i, hz, hscan, pixclock; 626 int num_modes, i, hz, hscan, pixclock;
627 int vtotal, htotal;
626 628
627 modes = fb_create_modedb(edid, &num_modes); 629 modes = fb_create_modedb(edid, &num_modes);
628 if (!modes) { 630 if (!modes) {
@@ -632,20 +634,38 @@ static int fb_get_monitor_limits(unsigned char *edid, struct fb_monspecs *specs)
632 634
633 retval = 0; 635 retval = 0;
634 for (i = 0; i < num_modes; i++) { 636 for (i = 0; i < num_modes; i++) {
635 hz = modes[i].refresh; 637 mode = &modes[i];
636 pixclock = PICOS2KHZ(modes[i].pixclock) * 1000; 638 pixclock = PICOS2KHZ(modes[i].pixclock) * 1000;
637 hscan = (modes[i].yres * 105 * hz + 5000)/100; 639 htotal = mode->xres + mode->right_margin + mode->hsync_len
640 + mode->left_margin;
641 vtotal = mode->yres + mode->lower_margin + mode->vsync_len
642 + mode->upper_margin;
643
644 if (mode->vmode & FB_VMODE_INTERLACED)
645 vtotal /= 2;
646
647 if (mode->vmode & FB_VMODE_DOUBLE)
648 vtotal *= 2;
649
650 hscan = (pixclock + htotal / 2) / htotal;
651 hscan = (hscan + 500) / 1000 * 1000;
652 hz = (hscan + vtotal / 2) / vtotal;
638 653
639 if (specs->dclkmax == 0 || specs->dclkmax < pixclock) 654 if (specs->dclkmax == 0 || specs->dclkmax < pixclock)
640 specs->dclkmax = pixclock; 655 specs->dclkmax = pixclock;
656
641 if (specs->dclkmin == 0 || specs->dclkmin > pixclock) 657 if (specs->dclkmin == 0 || specs->dclkmin > pixclock)
642 specs->dclkmin = pixclock; 658 specs->dclkmin = pixclock;
659
643 if (specs->hfmax == 0 || specs->hfmax < hscan) 660 if (specs->hfmax == 0 || specs->hfmax < hscan)
644 specs->hfmax = hscan; 661 specs->hfmax = hscan;
662
645 if (specs->hfmin == 0 || specs->hfmin > hscan) 663 if (specs->hfmin == 0 || specs->hfmin > hscan)
646 specs->hfmin = hscan; 664 specs->hfmin = hscan;
665
647 if (specs->vfmax == 0 || specs->vfmax < hz) 666 if (specs->vfmax == 0 || specs->vfmax < hz)
648 specs->vfmax = hz; 667 specs->vfmax = hz;
668
649 if (specs->vfmin == 0 || specs->vfmin > hz) 669 if (specs->vfmin == 0 || specs->vfmin > hz)
650 specs->vfmin = hz; 670 specs->vfmin = hz;
651 } 671 }
@@ -1281,8 +1301,7 @@ int fb_validate_mode(const struct fb_var_screeninfo *var, struct fb_info *info)
1281 -EINVAL : 0; 1301 -EINVAL : 0;
1282} 1302}
1283 1303
1284#if defined(CONFIG_FB_FIRMWARE_EDID) && defined(__i386__) 1304#if defined(CONFIG_FIRMWARE_EDID) && defined(CONFIG_X86)
1285#include <linux/pci.h>
1286 1305
1287/* 1306/*
1288 * We need to ensure that the EDID block is only returned for 1307 * We need to ensure that the EDID block is only returned for
diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c
index 3ceb8c1b392e..4f78f234473d 100644
--- a/drivers/video/fbsysfs.c
+++ b/drivers/video/fbsysfs.c
@@ -100,13 +100,22 @@ static int mode_string(char *buf, unsigned int offset,
100 const struct fb_videomode *mode) 100 const struct fb_videomode *mode)
101{ 101{
102 char m = 'U'; 102 char m = 'U';
103 char v = 'p';
104
103 if (mode->flag & FB_MODE_IS_DETAILED) 105 if (mode->flag & FB_MODE_IS_DETAILED)
104 m = 'D'; 106 m = 'D';
105 if (mode->flag & FB_MODE_IS_VESA) 107 if (mode->flag & FB_MODE_IS_VESA)
106 m = 'V'; 108 m = 'V';
107 if (mode->flag & FB_MODE_IS_STANDARD) 109 if (mode->flag & FB_MODE_IS_STANDARD)
108 m = 'S'; 110 m = 'S';
109 return snprintf(&buf[offset], PAGE_SIZE - offset, "%c:%dx%d-%d\n", m, mode->xres, mode->yres, mode->refresh); 111
112 if (mode->vmode & FB_VMODE_INTERLACED)
113 v = 'i';
114 if (mode->vmode & FB_VMODE_DOUBLE)
115 v = 'd';
116
117 return snprintf(&buf[offset], PAGE_SIZE - offset, "%c:%dx%d%c-%d\n",
118 m, mode->xres, mode->yres, v, mode->refresh);
110} 119}
111 120
112static ssize_t store_mode(struct class_device *class_device, const char * buf, 121static ssize_t store_mode(struct class_device *class_device, const char * buf,
@@ -238,45 +247,6 @@ static ssize_t show_rotate(struct class_device *class_device, char *buf)
238 return snprintf(buf, PAGE_SIZE, "%d\n", fb_info->var.rotate); 247 return snprintf(buf, PAGE_SIZE, "%d\n", fb_info->var.rotate);
239} 248}
240 249
241static ssize_t store_con_rotate(struct class_device *class_device,
242 const char *buf, size_t count)
243{
244 struct fb_info *fb_info = class_get_devdata(class_device);
245 int rotate;
246 char **last = NULL;
247
248 acquire_console_sem();
249 rotate = simple_strtoul(buf, last, 0);
250 fb_con_duit(fb_info, FB_EVENT_SET_CON_ROTATE, &rotate);
251 release_console_sem();
252 return count;
253}
254
255static ssize_t store_con_rotate_all(struct class_device *class_device,
256 const char *buf, size_t count)
257{
258 struct fb_info *fb_info = class_get_devdata(class_device);
259 int rotate;
260 char **last = NULL;
261
262 acquire_console_sem();
263 rotate = simple_strtoul(buf, last, 0);
264 fb_con_duit(fb_info, FB_EVENT_SET_CON_ROTATE_ALL, &rotate);
265 release_console_sem();
266 return count;
267}
268
269static ssize_t show_con_rotate(struct class_device *class_device, char *buf)
270{
271 struct fb_info *fb_info = class_get_devdata(class_device);
272 int rotate;
273
274 acquire_console_sem();
275 rotate = fb_con_duit(fb_info, FB_EVENT_GET_CON_ROTATE, NULL);
276 release_console_sem();
277 return snprintf(buf, PAGE_SIZE, "%d\n", rotate);
278}
279
280static ssize_t store_virtual(struct class_device *class_device, 250static ssize_t store_virtual(struct class_device *class_device,
281 const char * buf, size_t count) 251 const char * buf, size_t count)
282{ 252{
@@ -493,8 +463,6 @@ static struct class_device_attribute class_device_attrs[] = {
493 __ATTR(name, S_IRUGO, show_name, NULL), 463 __ATTR(name, S_IRUGO, show_name, NULL),
494 __ATTR(stride, S_IRUGO, show_stride, NULL), 464 __ATTR(stride, S_IRUGO, show_stride, NULL),
495 __ATTR(rotate, S_IRUGO|S_IWUSR, show_rotate, store_rotate), 465 __ATTR(rotate, S_IRUGO|S_IWUSR, show_rotate, store_rotate),
496 __ATTR(con_rotate, S_IRUGO|S_IWUSR, show_con_rotate, store_con_rotate),
497 __ATTR(con_rotate_all, S_IWUSR, NULL, store_con_rotate_all),
498 __ATTR(state, S_IRUGO|S_IWUSR, show_fbstate, store_fbstate), 466 __ATTR(state, S_IRUGO|S_IWUSR, show_fbstate, store_fbstate),
499#ifdef CONFIG_FB_BACKLIGHT 467#ifdef CONFIG_FB_BACKLIGHT
500 __ATTR(bl_curve, S_IRUGO|S_IWUSR, show_bl_curve, store_bl_curve), 468 __ATTR(bl_curve, S_IRUGO|S_IWUSR, show_bl_curve, store_bl_curve),
diff --git a/drivers/video/geode/gx1fb_core.c b/drivers/video/geode/gx1fb_core.c
index 20e69156d728..4d3a8871d3d1 100644
--- a/drivers/video/geode/gx1fb_core.c
+++ b/drivers/video/geode/gx1fb_core.c
@@ -376,8 +376,6 @@ static int __init gx1fb_probe(struct pci_dev *pdev, const struct pci_device_id *
376 release_mem_region(gx1_gx_base() + 0x8300, 0x100); 376 release_mem_region(gx1_gx_base() + 0x8300, 0x100);
377 } 377 }
378 378
379 pci_disable_device(pdev);
380
381 if (info) 379 if (info)
382 framebuffer_release(info); 380 framebuffer_release(info);
383 return ret; 381 return ret;
@@ -399,7 +397,6 @@ static void gx1fb_remove(struct pci_dev *pdev)
399 iounmap(par->dc_regs); 397 iounmap(par->dc_regs);
400 release_mem_region(gx1_gx_base() + 0x8300, 0x100); 398 release_mem_region(gx1_gx_base() + 0x8300, 0x100);
401 399
402 pci_disable_device(pdev);
403 pci_set_drvdata(pdev, NULL); 400 pci_set_drvdata(pdev, NULL);
404 401
405 framebuffer_release(info); 402 framebuffer_release(info);
diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c
index 89c34b15f5d4..5ef12a3dfa50 100644
--- a/drivers/video/geode/gxfb_core.c
+++ b/drivers/video/geode/gxfb_core.c
@@ -354,8 +354,6 @@ static int __init gxfb_probe(struct pci_dev *pdev, const struct pci_device_id *i
354 pci_release_region(pdev, 2); 354 pci_release_region(pdev, 2);
355 } 355 }
356 356
357 pci_disable_device(pdev);
358
359 if (info) 357 if (info)
360 framebuffer_release(info); 358 framebuffer_release(info);
361 return ret; 359 return ret;
@@ -377,7 +375,6 @@ static void gxfb_remove(struct pci_dev *pdev)
377 iounmap(par->dc_regs); 375 iounmap(par->dc_regs);
378 pci_release_region(pdev, 2); 376 pci_release_region(pdev, 2);
379 377
380 pci_disable_device(pdev);
381 pci_set_drvdata(pdev, NULL); 378 pci_set_drvdata(pdev, NULL);
382 379
383 framebuffer_release(info); 380 framebuffer_release(info);
diff --git a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c
index 44aa2ffff973..a1f7d80f0ac1 100644
--- a/drivers/video/i810/i810_main.c
+++ b/drivers/video/i810/i810_main.c
@@ -2110,9 +2110,6 @@ static void i810fb_release_resource(struct fb_info *info,
2110 if (par->res_flags & MMIO_REQ) 2110 if (par->res_flags & MMIO_REQ)
2111 release_mem_region(par->mmio_start_phys, MMIO_SIZE); 2111 release_mem_region(par->mmio_start_phys, MMIO_SIZE);
2112 2112
2113 if (par->res_flags & PCI_DEVICE_ENABLED)
2114 pci_disable_device(par->dev);
2115
2116 framebuffer_release(info); 2113 framebuffer_release(info);
2117 2114
2118} 2115}
diff --git a/drivers/video/imacfb.c b/drivers/video/imacfb.c
new file mode 100644
index 000000000000..7b1c168c834d
--- /dev/null
+++ b/drivers/video/imacfb.c
@@ -0,0 +1,345 @@
1/*
2 * framebuffer driver for Intel Based Mac's
3 *
4 * (c) 2006 Edgar Hucek <gimli@dark-green.com>
5 * Original imac driver written by Gerd Knorr <kraxel@goldbach.in-berlin.de>
6 *
7 */
8
9#include <linux/delay.h>
10#include <linux/errno.h>
11#include <linux/fb.h>
12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/ioport.h>
15#include <linux/mm.h>
16#include <linux/module.h>
17#include <linux/platform_device.h>
18#include <linux/slab.h>
19#include <linux/string.h>
20#include <linux/tty.h>
21
22#include <asm/io.h>
23
24#include <video/vga.h>
25
26typedef enum _MAC_TYPE {
27 M_I17,
28 M_I20,
29 M_MINI,
30 M_MACBOOK,
31 M_NEW
32} MAC_TYPE;
33
34/* --------------------------------------------------------------------- */
35
36static struct fb_var_screeninfo imacfb_defined __initdata = {
37 .activate = FB_ACTIVATE_NOW,
38 .height = -1,
39 .width = -1,
40 .right_margin = 32,
41 .upper_margin = 16,
42 .lower_margin = 4,
43 .vsync_len = 4,
44 .vmode = FB_VMODE_NONINTERLACED,
45};
46
47static struct fb_fix_screeninfo imacfb_fix __initdata = {
48 .id = "IMAC VGA",
49 .type = FB_TYPE_PACKED_PIXELS,
50 .accel = FB_ACCEL_NONE,
51 .visual = FB_VISUAL_TRUECOLOR,
52};
53
54static int inverse;
55static int model = M_NEW;
56static int manual_height;
57static int manual_width;
58
59#define DEFAULT_FB_MEM 1024*1024*16
60
61/* --------------------------------------------------------------------- */
62
63static int imacfb_setcolreg(unsigned regno, unsigned red, unsigned green,
64 unsigned blue, unsigned transp,
65 struct fb_info *info)
66{
67 /*
68 * Set a single color register. The values supplied are
69 * already rounded down to the hardware's capabilities
70 * (according to the entries in the `var' structure). Return
71 * != 0 for invalid regno.
72 */
73
74 if (regno >= info->cmap.len)
75 return 1;
76
77 if (regno < 16) {
78 red >>= 8;
79 green >>= 8;
80 blue >>= 8;
81 ((u32 *)(info->pseudo_palette))[regno] =
82 (red << info->var.red.offset) |
83 (green << info->var.green.offset) |
84 (blue << info->var.blue.offset);
85 }
86 return 0;
87}
88
89static struct fb_ops imacfb_ops = {
90 .owner = THIS_MODULE,
91 .fb_setcolreg = imacfb_setcolreg,
92 .fb_fillrect = cfb_fillrect,
93 .fb_copyarea = cfb_copyarea,
94 .fb_imageblit = cfb_imageblit,
95};
96
97static int __init imacfb_setup(char *options)
98{
99 char *this_opt;
100
101 if (!options || !*options)
102 return 0;
103
104 while ((this_opt = strsep(&options, ",")) != NULL) {
105 if (!*this_opt) continue;
106
107 if (!strcmp(this_opt, "inverse"))
108 inverse = 1;
109 else if (!strcmp(this_opt, "i17"))
110 model = M_I17;
111 else if (!strcmp(this_opt, "i20"))
112 model = M_I20;
113 else if (!strcmp(this_opt, "mini"))
114 model = M_MINI;
115 else if (!strcmp(this_opt, "macbook"))
116 model = M_MACBOOK;
117 else if (!strncmp(this_opt, "height:", 7))
118 manual_height = simple_strtoul(this_opt+7, NULL, 0);
119 else if (!strncmp(this_opt, "width:", 6))
120 manual_width = simple_strtoul(this_opt+6, NULL, 0);
121 }
122 return 0;
123}
124
125static int __init imacfb_probe(struct platform_device *dev)
126{
127 struct fb_info *info;
128 int err;
129 unsigned int size_vmode;
130 unsigned int size_remap;
131 unsigned int size_total;
132
133 screen_info.lfb_depth = 32;
134 screen_info.lfb_size = DEFAULT_FB_MEM / 0x10000;
135 screen_info.pages=1;
136 screen_info.blue_size = 8;
137 screen_info.blue_pos = 0;
138 screen_info.green_size = 8;
139 screen_info.green_pos = 8;
140 screen_info.red_size = 8;
141 screen_info.red_pos = 16;
142 screen_info.rsvd_size = 8;
143 screen_info.rsvd_pos = 24;
144
145 switch (model) {
146 case M_I17:
147 screen_info.lfb_width = 1440;
148 screen_info.lfb_height = 900;
149 screen_info.lfb_linelength = 1472 * 4;
150 screen_info.lfb_base = 0x80010000;
151 break;
152 case M_NEW:
153 case M_I20:
154 screen_info.lfb_width = 1680;
155 screen_info.lfb_height = 1050;
156 screen_info.lfb_linelength = 1728 * 4;
157 screen_info.lfb_base = 0x80010000;
158 break;
159 case M_MINI:
160 screen_info.lfb_width = 1024;
161 screen_info.lfb_height = 768;
162 screen_info.lfb_linelength = 2048 * 4;
163 screen_info.lfb_base = 0x80000000;
164 break;
165 case M_MACBOOK:
166 screen_info.lfb_width = 1280;
167 screen_info.lfb_height = 800;
168 screen_info.lfb_linelength = 2048 * 4;
169 screen_info.lfb_base = 0x80000000;
170 break;
171 }
172
173 /* if the user wants to manually specify height/width,
174 we will override the defaults */
175 /* TODO: eventually get auto-detection working */
176 if (manual_height > 0)
177 screen_info.lfb_height = manual_height;
178 if (manual_width > 0)
179 screen_info.lfb_width = manual_width;
180
181 imacfb_fix.smem_start = screen_info.lfb_base;
182 imacfb_defined.bits_per_pixel = screen_info.lfb_depth;
183 imacfb_defined.xres = screen_info.lfb_width;
184 imacfb_defined.yres = screen_info.lfb_height;
185 imacfb_fix.line_length = screen_info.lfb_linelength;
186
187 /* size_vmode -- that is the amount of memory needed for the
188 * used video mode, i.e. the minimum amount of
189 * memory we need. */
190 size_vmode = imacfb_defined.yres * imacfb_fix.line_length;
191
192 /* size_total -- all video memory we have. Used for
193 * entries, ressource allocation and bounds
194 * checking. */
195 size_total = screen_info.lfb_size * 65536;
196 if (size_total < size_vmode)
197 size_total = size_vmode;
198
199 /* size_remap -- the amount of video memory we are going to
200 * use for imacfb. With modern cards it is no
201 * option to simply use size_total as that
202 * wastes plenty of kernel address space. */
203 size_remap = size_vmode * 2;
204 if (size_remap < size_vmode)
205 size_remap = size_vmode;
206 if (size_remap > size_total)
207 size_remap = size_total;
208 imacfb_fix.smem_len = size_remap;
209
210#ifndef __i386__
211 screen_info.imacpm_seg = 0;
212#endif
213
214 if (!request_mem_region(imacfb_fix.smem_start, size_total, "imacfb")) {
215 printk(KERN_WARNING
216 "imacfb: cannot reserve video memory at 0x%lx\n",
217 imacfb_fix.smem_start);
218 /* We cannot make this fatal. Sometimes this comes from magic
219 spaces our resource handlers simply don't know about */
220 }
221
222 info = framebuffer_alloc(sizeof(u32) * 16, &dev->dev);
223 if (!info) {
224 err = -ENOMEM;
225 goto err_release_mem;
226 }
227 info->pseudo_palette = info->par;
228 info->par = NULL;
229
230 info->screen_base = ioremap(imacfb_fix.smem_start, imacfb_fix.smem_len);
231 if (!info->screen_base) {
232 printk(KERN_ERR "imacfb: abort, cannot ioremap video memory "
233 "0x%x @ 0x%lx\n",
234 imacfb_fix.smem_len, imacfb_fix.smem_start);
235 err = -EIO;
236 goto err_unmap;
237 }
238
239 printk(KERN_INFO "imacfb: framebuffer at 0x%lx, mapped to 0x%p, "
240 "using %dk, total %dk\n",
241 imacfb_fix.smem_start, info->screen_base,
242 size_remap/1024, size_total/1024);
243 printk(KERN_INFO "imacfb: mode is %dx%dx%d, linelength=%d, pages=%d\n",
244 imacfb_defined.xres, imacfb_defined.yres,
245 imacfb_defined.bits_per_pixel, imacfb_fix.line_length,
246 screen_info.pages);
247
248 imacfb_defined.xres_virtual = imacfb_defined.xres;
249 imacfb_defined.yres_virtual = imacfb_fix.smem_len /
250 imacfb_fix.line_length;
251 printk(KERN_INFO "imacfb: scrolling: redraw\n");
252 imacfb_defined.yres_virtual = imacfb_defined.yres;
253
254 /* some dummy values for timing to make fbset happy */
255 imacfb_defined.pixclock = 10000000 / imacfb_defined.xres *
256 1000 / imacfb_defined.yres;
257 imacfb_defined.left_margin = (imacfb_defined.xres / 8) & 0xf8;
258 imacfb_defined.hsync_len = (imacfb_defined.xres / 8) & 0xf8;
259
260 imacfb_defined.red.offset = screen_info.red_pos;
261 imacfb_defined.red.length = screen_info.red_size;
262 imacfb_defined.green.offset = screen_info.green_pos;
263 imacfb_defined.green.length = screen_info.green_size;
264 imacfb_defined.blue.offset = screen_info.blue_pos;
265 imacfb_defined.blue.length = screen_info.blue_size;
266 imacfb_defined.transp.offset = screen_info.rsvd_pos;
267 imacfb_defined.transp.length = screen_info.rsvd_size;
268
269 printk(KERN_INFO "imacfb: %s: "
270 "size=%d:%d:%d:%d, shift=%d:%d:%d:%d\n",
271 "Truecolor",
272 screen_info.rsvd_size,
273 screen_info.red_size,
274 screen_info.green_size,
275 screen_info.blue_size,
276 screen_info.rsvd_pos,
277 screen_info.red_pos,
278 screen_info.green_pos,
279 screen_info.blue_pos);
280
281 imacfb_fix.ypanstep = 0;
282 imacfb_fix.ywrapstep = 0;
283
284 /* request failure does not faze us, as vgacon probably has this
285 * region already (FIXME) */
286 request_region(0x3c0, 32, "imacfb");
287
288 info->fbops = &imacfb_ops;
289 info->var = imacfb_defined;
290 info->fix = imacfb_fix;
291 info->flags = FBINFO_FLAG_DEFAULT;
292
293 if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
294 err = -ENOMEM;
295 goto err_unmap;
296 }
297 if (register_framebuffer(info)<0) {
298 err = -EINVAL;
299 goto err_fb_dealoc;
300 }
301 printk(KERN_INFO "fb%d: %s frame buffer device\n",
302 info->node, info->fix.id);
303 return 0;
304
305err_fb_dealoc:
306 fb_dealloc_cmap(&info->cmap);
307err_unmap:
308 iounmap(info->screen_base);
309 framebuffer_release(info);
310err_release_mem:
311 release_mem_region(imacfb_fix.smem_start, size_total);
312 return err;
313}
314
315static struct platform_driver imacfb_driver = {
316 .probe = imacfb_probe,
317 .driver = {
318 .name = "imacfb",
319 },
320};
321
322static struct platform_device imacfb_device = {
323 .name = "imacfb",
324};
325
326static int __init imacfb_init(void)
327{
328 int ret;
329 char *option = NULL;
330
331 /* ignore error return of fb_get_options */
332 fb_get_options("imacfb", &option);
333 imacfb_setup(option);
334 ret = platform_driver_register(&imacfb_driver);
335
336 if (!ret) {
337 ret = platform_device_register(&imacfb_device);
338 if (ret)
339 platform_driver_unregister(&imacfb_driver);
340 }
341 return ret;
342}
343module_init(imacfb_init);
344
345MODULE_LICENSE("GPL");
diff --git a/drivers/video/macmodes.c b/drivers/video/macmodes.c
index c0385c6f7db5..d21321ca7c39 100644
--- a/drivers/video/macmodes.c
+++ b/drivers/video/macmodes.c
@@ -327,7 +327,6 @@ int mac_var_to_vmode(const struct fb_var_screeninfo *var, int *vmode,
327 } 327 }
328 return -EINVAL; 328 return -EINVAL;
329} 329}
330EXPORT_SYMBOL(mac_var_to_vmode);
331 330
332/** 331/**
333 * mac_map_monitor_sense - Convert monitor sense to vmode 332 * mac_map_monitor_sense - Convert monitor sense to vmode
@@ -371,8 +370,9 @@ EXPORT_SYMBOL(mac_map_monitor_sense);
371 * 370 *
372 */ 371 */
373 372
374int __init mac_find_mode(struct fb_var_screeninfo *var, struct fb_info *info, 373int __devinit mac_find_mode(struct fb_var_screeninfo *var,
375 const char *mode_option, unsigned int default_bpp) 374 struct fb_info *info, const char *mode_option,
375 unsigned int default_bpp)
376{ 376{
377 const struct fb_videomode *db = NULL; 377 const struct fb_videomode *db = NULL;
378 unsigned int dbsize = 0; 378 unsigned int dbsize = 0;
diff --git a/drivers/video/macmodes.h b/drivers/video/macmodes.h
index 232f5a09a499..babeb81f467d 100644
--- a/drivers/video/macmodes.h
+++ b/drivers/video/macmodes.h
@@ -55,9 +55,10 @@ extern int mac_vmode_to_var(int vmode, int cmode,
55extern int mac_var_to_vmode(const struct fb_var_screeninfo *var, int *vmode, 55extern int mac_var_to_vmode(const struct fb_var_screeninfo *var, int *vmode,
56 int *cmode); 56 int *cmode);
57extern int mac_map_monitor_sense(int sense); 57extern int mac_map_monitor_sense(int sense);
58extern int __init mac_find_mode(struct fb_var_screeninfo *var, 58extern int __devinit mac_find_mode(struct fb_var_screeninfo *var,
59 struct fb_info *info, const char *mode_option, 59 struct fb_info *info,
60 unsigned int default_bpp); 60 const char *mode_option,
61 unsigned int default_bpp);
61 62
62 63
63 /* 64 /*
diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c
index 26a1c618a205..ff5454601e22 100644
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -259,6 +259,10 @@ static const struct fb_videomode modedb[] = {
259 /* 1152x768, 60 Hz, PowerBook G4 Titanium I and II */ 259 /* 1152x768, 60 Hz, PowerBook G4 Titanium I and II */
260 NULL, 60, 1152, 768, 15386, 158, 26, 29, 3, 136, 6, 260 NULL, 60, 1152, 768, 15386, 158, 26, 29, 3, 136, 6,
261 FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED 261 FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED
262 }, {
263 /* 1366x768, 60 Hz, 47.403 kHz hsync, WXGA 16:9 aspect ratio */
264 NULL, 60, 1366, 768, 13806, 120, 10, 14, 3, 32, 5,
265 0, FB_VMODE_NONINTERLACED
262 }, 266 },
263}; 267};
264 268
@@ -787,8 +791,9 @@ struct fb_videomode *fb_find_best_mode(struct fb_var_screeninfo *var,
787 if (diff > d) { 791 if (diff > d) {
788 diff = d; 792 diff = d;
789 best = mode; 793 best = mode;
790 } else if (diff == d && mode->refresh > best->refresh) 794 } else if (diff == d && best &&
791 best = mode; 795 mode->refresh > best->refresh)
796 best = mode;
792 } 797 }
793 } 798 }
794 return best; 799 return best;
@@ -1016,8 +1021,6 @@ EXPORT_SYMBOL(fb_videomode_to_var);
1016EXPORT_SYMBOL(fb_var_to_videomode); 1021EXPORT_SYMBOL(fb_var_to_videomode);
1017EXPORT_SYMBOL(fb_mode_is_equal); 1022EXPORT_SYMBOL(fb_mode_is_equal);
1018EXPORT_SYMBOL(fb_add_videomode); 1023EXPORT_SYMBOL(fb_add_videomode);
1019EXPORT_SYMBOL(fb_delete_videomode);
1020EXPORT_SYMBOL(fb_destroy_modelist);
1021EXPORT_SYMBOL(fb_match_mode); 1024EXPORT_SYMBOL(fb_match_mode);
1022EXPORT_SYMBOL(fb_find_best_mode); 1025EXPORT_SYMBOL(fb_find_best_mode);
1023EXPORT_SYMBOL(fb_find_nearest_mode); 1026EXPORT_SYMBOL(fb_find_nearest_mode);
diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index 24b12f71d5a8..2f156b724d1c 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -1333,17 +1333,22 @@ static int neofb_blank(int blank_mode, struct fb_info *info)
1333 * run "setterm -powersave powerdown" to take advantage 1333 * run "setterm -powersave powerdown" to take advantage
1334 */ 1334 */
1335 struct neofb_par *par = info->par; 1335 struct neofb_par *par = info->par;
1336 int seqflags, lcdflags, dpmsflags, reg; 1336 int seqflags, lcdflags, dpmsflags, reg, tmpdisp;
1337
1338 1337
1339 /* 1338 /*
1340 * Reload the value stored in the register, if sensible. It might have 1339 * Read back the register bits related to display configuration. They might
1341 * been changed via FN keystroke. 1340 * have been changed underneath the driver via Fn key stroke.
1341 */
1342 neoUnlock();
1343 tmpdisp = vga_rgfx(NULL, 0x20) & 0x03;
1344 neoLock(&par->state);
1345
1346 /* In case we blank the screen, we want to store the possibly new
1347 * configuration in the driver. During un-blank, we re-apply this setting,
1348 * since the LCD bit will be cleared in order to switch off the backlight.
1342 */ 1349 */
1343 if (par->PanelDispCntlRegRead) { 1350 if (par->PanelDispCntlRegRead) {
1344 neoUnlock(); 1351 par->PanelDispCntlReg1 = tmpdisp;
1345 par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03;
1346 neoLock(&par->state);
1347 } 1352 }
1348 par->PanelDispCntlRegRead = !blank_mode; 1353 par->PanelDispCntlRegRead = !blank_mode;
1349 1354
@@ -1378,12 +1383,21 @@ static int neofb_blank(int blank_mode, struct fb_info *info)
1378 break; 1383 break;
1379 case FB_BLANK_NORMAL: /* just blank screen (backlight stays on) */ 1384 case FB_BLANK_NORMAL: /* just blank screen (backlight stays on) */
1380 seqflags = VGA_SR01_SCREEN_OFF; /* Disable sequencer */ 1385 seqflags = VGA_SR01_SCREEN_OFF; /* Disable sequencer */
1381 lcdflags = par->PanelDispCntlReg1 & 0x02; /* LCD normal */ 1386 /*
1387 * During a blank operation with the LID shut, we might store "LCD off"
1388 * by mistake. Due to timing issues, the BIOS may switch the lights
1389 * back on, and we turn it back off once we "unblank".
1390 *
1391 * So here is an attempt to implement ">=" - if we are in the process
1392 * of unblanking, and the LCD bit is unset in the driver but set in the
1393 * register, we must keep it.
1394 */
1395 lcdflags = ((par->PanelDispCntlReg1 | tmpdisp) & 0x02); /* LCD normal */
1382 dpmsflags = 0x00; /* no hsync/vsync suppression */ 1396 dpmsflags = 0x00; /* no hsync/vsync suppression */
1383 break; 1397 break;
1384 case FB_BLANK_UNBLANK: /* unblank */ 1398 case FB_BLANK_UNBLANK: /* unblank */
1385 seqflags = 0; /* Enable sequencer */ 1399 seqflags = 0; /* Enable sequencer */
1386 lcdflags = par->PanelDispCntlReg1 & 0x02; /* LCD normal */ 1400 lcdflags = ((par->PanelDispCntlReg1 | tmpdisp) & 0x02); /* LCD normal */
1387 dpmsflags = 0x00; /* no hsync/vsync suppression */ 1401 dpmsflags = 0x00; /* no hsync/vsync suppression */
1388#ifdef CONFIG_TOSHIBA 1402#ifdef CONFIG_TOSHIBA
1389 /* Do we still need this ? */ 1403 /* Do we still need this ? */
diff --git a/drivers/video/nvidia/nv_hw.c b/drivers/video/nvidia/nv_hw.c
index 99c3a8e6a237..9ed640d35728 100644
--- a/drivers/video/nvidia/nv_hw.c
+++ b/drivers/video/nvidia/nv_hw.c
@@ -886,7 +886,10 @@ void NVCalcStateExt(struct nvidia_par *par,
886 case NV_ARCH_20: 886 case NV_ARCH_20:
887 case NV_ARCH_30: 887 case NV_ARCH_30:
888 default: 888 default:
889 if (((par->Chipset & 0xffff) == 0x01A0) || 889 if ((par->Chipset & 0xfff0) == 0x0240) {
890 state->arbitration0 = 256;
891 state->arbitration1 = 0x0480;
892 } else if (((par->Chipset & 0xffff) == 0x01A0) ||
890 ((par->Chipset & 0xffff) == 0x01f0)) { 893 ((par->Chipset & 0xffff) == 0x01f0)) {
891 nForceUpdateArbitrationSettings(VClk, 894 nForceUpdateArbitrationSettings(VClk,
892 pixelDepth * 8, 895 pixelDepth * 8,
@@ -1235,6 +1238,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state)
1235 break; 1238 break;
1236 case 0x0160: 1239 case 0x0160:
1237 case 0x01D0: 1240 case 0x01D0:
1241 case 0x0240:
1238 NV_WR32(par->PMC, 0x1700, 1242 NV_WR32(par->PMC, 0x1700,
1239 NV_RD32(par->PFB, 0x020C)); 1243 NV_RD32(par->PFB, 0x020C));
1240 NV_WR32(par->PMC, 0x1704, 0); 1244 NV_WR32(par->PMC, 0x1704, 0);
@@ -1359,7 +1363,9 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state)
1359 if(((par->Chipset & 0xfff0) 1363 if(((par->Chipset & 0xfff0)
1360 != 0x0160) && 1364 != 0x0160) &&
1361 ((par->Chipset & 0xfff0) 1365 ((par->Chipset & 0xfff0)
1362 != 0x0220)) 1366 != 0x0220) &&
1367 ((par->Chipset & 0xfff0)
1368 != 0x240))
1363 NV_WR32(par->PGRAPH, 1369 NV_WR32(par->PGRAPH,
1364 0x6900 + i*4, 1370 0x6900 + i*4,
1365 NV_RD32(par->PFB, 1371 NV_RD32(par->PFB,
diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index 03a7c1e9ce38..7b5cffb27851 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -67,359 +67,10 @@
67#define MAX_CURS 32 67#define MAX_CURS 32
68 68
69static struct pci_device_id nvidiafb_pci_tbl[] = { 69static struct pci_device_id nvidiafb_pci_tbl[] = {
70 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_TNT, 70 {PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
71 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 71 PCI_BASE_CLASS_DISPLAY << 16, 0xff0000, 0},
72 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_TNT2, 72 { 0, }
73 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
74 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_UTNT2,
75 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
76 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_TNT_UNKNOWN,
77 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
78 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_VTNT2,
79 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
80 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_UVTNT2,
81 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
82 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_ITNT2,
83 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
84 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR,
85 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
86 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR,
87 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
88 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO,
89 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
90 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX,
91 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
92 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX2,
93 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
94 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE2_GO,
95 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
96 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO2_MXR,
97 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
98 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS,
99 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
100 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS2,
101 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
102 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE2_ULTRA,
103 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
104 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO2_PRO,
105 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
106 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_460,
107 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
108 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_440,
109 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
110 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_420,
111 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
112 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_440_SE,
113 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
114 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_440_GO,
115 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
116 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_420_GO,
117 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
118 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_460_GO,
119 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
120 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_420_GO_M32,
121 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
122 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_500XGL,
123 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
124 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_440_GO_M64,
125 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
126 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_200,
127 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
128 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_550XGL,
129 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
130 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_500_GOGL,
131 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
132 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_410_GO_M16,
133 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
134 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_440_8X,
135 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
136 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_440SE_8X,
137 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
138 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_420_8X,
139 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
140 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_4000,
141 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
142 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_448_GO,
143 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
144 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_488_GO,
145 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
146 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_580_XGL,
147 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
148 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_MAC,
149 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
150 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_280_NVS,
151 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
152 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_380_XGL,
153 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
154 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_IGEFORCE2,
155 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
156 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE3,
157 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
158 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE3_1,
159 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
160 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE3_2,
161 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
162 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_DDC,
163 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
164 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4600,
165 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
166 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4400,
167 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
168 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4200,
169 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
170 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_900XGL,
171 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
172 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_750XGL,
173 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
174 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_700XGL,
175 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
176 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800,
177 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
178 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800_8X,
179 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
180 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800SE,
181 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
182 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_4200_GO,
183 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
184 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_980_XGL,
185 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
186 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_780_XGL,
187 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
188 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO4_700_GOGL,
189 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
190 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5800_ULTRA,
191 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
192 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5800,
193 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
194 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_2000,
195 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
196 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1000,
197 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
198 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5600_ULTRA,
199 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
200 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5600,
201 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
202 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5600SE,
203 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
204 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5600,
205 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
206 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5650,
207 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
208 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO700,
209 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
210 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5200,
211 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
212 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5200_ULTRA,
213 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
214 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5200_1,
215 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
216 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5200SE,
217 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
218 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5200,
219 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
220 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5250,
221 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
222 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5250_32,
223 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
224 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO_5200,
225 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
226 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_NVS_280_PCI,
227 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
228 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_500,
229 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
230 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5300,
231 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
232 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5100,
233 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
234 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5900_ULTRA,
235 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
236 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5900,
237 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
238 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5900XT,
239 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
240 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5950_ULTRA,
241 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
242 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_3000,
243 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
244 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5700_ULTRA,
245 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
246 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5700,
247 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
248 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5700LE,
249 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
250 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5700VE,
251 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
252 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_1,
253 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
254 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_2,
255 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
256 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO1000,
257 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
258 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1100,
259 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
260 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5500,
261 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
262 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5100,
263 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
264 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_700,
265 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
266 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_5900ZT,
267 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
268 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6800_ULTRA,
269 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
270 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6800,
271 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
272 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6800_LE,
273 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
274 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6800_GT,
275 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
276 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_4000,
277 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
278 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6600_GT,
279 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
280 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6600,
281 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
282 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6610_XL,
283 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
284 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_QUADRO_FX_540,
285 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
286 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6200,
287 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
288 {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_ALT1,
289 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
290 {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6600_ALT1,
291 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
292 {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6600_ALT2,
293 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
294 {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6200_ALT1,
295 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
296 {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_GT,
297 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
298 {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_QUADRO_NVS280,
299 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
300 {PCI_VENDOR_ID_NVIDIA, 0x0252,
301 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
302 {PCI_VENDOR_ID_NVIDIA, 0x0313,
303 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
304 {PCI_VENDOR_ID_NVIDIA, 0x0316,
305 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
306 {PCI_VENDOR_ID_NVIDIA, 0x0317,
307 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
308 {PCI_VENDOR_ID_NVIDIA, 0x031D,
309 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
310 {PCI_VENDOR_ID_NVIDIA, 0x031E,
311 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
312 {PCI_VENDOR_ID_NVIDIA, 0x031F,
313 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
314 {PCI_VENDOR_ID_NVIDIA, 0x0329,
315 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
316 {PCI_VENDOR_ID_NVIDIA, 0x032F,
317 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
318 {PCI_VENDOR_ID_NVIDIA, 0x0345,
319 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
320 {PCI_VENDOR_ID_NVIDIA, 0x0349,
321 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
322 {PCI_VENDOR_ID_NVIDIA, 0x034B,
323 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
324 {PCI_VENDOR_ID_NVIDIA, 0x034F,
325 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
326 {PCI_VENDOR_ID_NVIDIA, 0x00c0,
327 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
328 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_GEFORCE_6800A,
329 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
330 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_GEFORCE_6800A_LE,
331 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
332 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_GEFORCE_GO_6800,
333 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
334 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_GEFORCE_GO_6800_ULTRA,
335 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
336 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_QUADRO_FX_GO1400,
337 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
338 {PCI_VENDOR_ID_NVIDIA, 0x00cd,
339 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
340 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_QUADRO_FX_1400,
341 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
342 {PCI_VENDOR_ID_NVIDIA, 0x0142,
343 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
344 {PCI_VENDOR_ID_NVIDIA, 0x0143,
345 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
346 {PCI_VENDOR_ID_NVIDIA, 0x0144,
347 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
348 {PCI_VENDOR_ID_NVIDIA, 0x0145,
349 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
350 {PCI_VENDOR_ID_NVIDIA, 0x0146,
351 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
352 {PCI_VENDOR_ID_NVIDIA, 0x0147,
353 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
354 {PCI_VENDOR_ID_NVIDIA, 0x0148,
355 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
356 {PCI_VENDOR_ID_NVIDIA, 0x0149,
357 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
358 {PCI_VENDOR_ID_NVIDIA, 0x014b,
359 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
360 {PCI_VENDOR_ID_NVIDIA, 0x14c,
361 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
362 {PCI_VENDOR_ID_NVIDIA, 0x014d,
363 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
364 {PCI_VENDOR_ID_NVIDIA, 0x0160,
365 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
366 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6200_TURBOCACHE,
367 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
368 {PCI_VENDOR_ID_NVIDIA, 0x0162,
369 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
370 {PCI_VENDOR_ID_NVIDIA, 0x0163,
371 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
372 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_GO_6200,
373 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
374 {PCI_VENDOR_ID_NVIDIA, 0x0165,
375 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
376 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_GO_6250,
377 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
378 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_GO_6200_1,
379 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
380 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_GO_6250_1,
381 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
382 {PCI_VENDOR_ID_NVIDIA, 0x0169,
383 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
384 {PCI_VENDOR_ID_NVIDIA, 0x016b,
385 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
386 {PCI_VENDOR_ID_NVIDIA, 0x016c,
387 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
388 {PCI_VENDOR_ID_NVIDIA, 0x016d,
389 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
390 {PCI_VENDOR_ID_NVIDIA, 0x016e,
391 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
392 {PCI_VENDOR_ID_NVIDIA, 0x0210,
393 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
394 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6800B,
395 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
396 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6800B_LE,
397 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
398 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6800B_GT,
399 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
400 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GT,
401 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
402 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GTX,
403 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
404 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_GO_7800,
405 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
406 {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_GO_7800_GTX,
407 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
408 {PCI_VENDOR_ID_NVIDIA, 0x021d,
409 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
410 {PCI_VENDOR_ID_NVIDIA, 0x021e,
411 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
412 {PCI_VENDOR_ID_NVIDIA, 0x0220,
413 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
414 {PCI_VENDOR_ID_NVIDIA, 0x0221,
415 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
416 {PCI_VENDOR_ID_NVIDIA, 0x0222,
417 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
418 {PCI_VENDOR_ID_NVIDIA, 0x0228,
419 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
420 {0,} /* terminate list */
421}; 73};
422
423MODULE_DEVICE_TABLE(pci, nvidiafb_pci_tbl); 74MODULE_DEVICE_TABLE(pci, nvidiafb_pci_tbl);
424 75
425/* command line data, set in nvidiafb_setup() */ 76/* command line data, set in nvidiafb_setup() */
@@ -1465,10 +1116,10 @@ static u32 __devinit nvidia_get_chipset(struct fb_info *info)
1465 struct nvidia_par *par = info->par; 1116 struct nvidia_par *par = info->par;
1466 u32 id = (par->pci_dev->vendor << 16) | par->pci_dev->device; 1117 u32 id = (par->pci_dev->vendor << 16) | par->pci_dev->device;
1467 1118
1468 printk("nvidiafb: PCI id - %x\n", id); 1119 printk(KERN_INFO PFX "Device ID: %x \n", id);
1120
1469 if ((id & 0xfff0) == 0x00f0) { 1121 if ((id & 0xfff0) == 0x00f0) {
1470 /* pci-e */ 1122 /* pci-e */
1471 printk("nvidiafb: PCI-E card\n");
1472 id = NV_RD32(par->REGS, 0x1800); 1123 id = NV_RD32(par->REGS, 0x1800);
1473 1124
1474 if ((id & 0x0000ffff) == 0x000010DE) 1125 if ((id & 0x0000ffff) == 0x000010DE)
@@ -1476,9 +1127,9 @@ static u32 __devinit nvidia_get_chipset(struct fb_info *info)
1476 else if ((id & 0xffff0000) == 0xDE100000) /* wrong endian */ 1127 else if ((id & 0xffff0000) == 0xDE100000) /* wrong endian */
1477 id = 0x10DE0000 | ((id << 8) & 0x0000ff00) | 1128 id = 0x10DE0000 | ((id << 8) & 0x0000ff00) |
1478 ((id >> 8) & 0x000000ff); 1129 ((id >> 8) & 0x000000ff);
1130 printk(KERN_INFO PFX "Subsystem ID: %x \n", id);
1479 } 1131 }
1480 1132
1481 printk("nvidiafb: Actual id - %x\n", id);
1482 return id; 1133 return id;
1483} 1134}
1484 1135
@@ -1520,6 +1171,7 @@ static u32 __devinit nvidia_get_arch(struct fb_info *info)
1520 case 0x0210: 1171 case 0x0210:
1521 case 0x0220: 1172 case 0x0220:
1522 case 0x0230: 1173 case 0x0230:
1174 case 0x0240:
1523 case 0x0290: 1175 case 0x0290:
1524 case 0x0390: 1176 case 0x0390:
1525 arch = NV_ARCH_40; 1177 arch = NV_ARCH_40;
@@ -1567,7 +1219,7 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd,
1567 1219
1568 if (pci_request_regions(pd, "nvidiafb")) { 1220 if (pci_request_regions(pd, "nvidiafb")) {
1569 printk(KERN_ERR PFX "cannot request PCI regions\n"); 1221 printk(KERN_ERR PFX "cannot request PCI regions\n");
1570 goto err_out_request; 1222 goto err_out_enable;
1571 } 1223 }
1572 1224
1573 par->FlatPanel = flatpanel; 1225 par->FlatPanel = flatpanel;
@@ -1596,7 +1248,6 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd,
1596 } 1248 }
1597 1249
1598 par->Chipset = nvidia_get_chipset(info); 1250 par->Chipset = nvidia_get_chipset(info);
1599 printk(KERN_INFO PFX "nVidia device/chipset %X\n", par->Chipset);
1600 par->Architecture = nvidia_get_arch(info); 1251 par->Architecture = nvidia_get_arch(info);
1601 1252
1602 if (par->Architecture == 0) { 1253 if (par->Architecture == 0) {
@@ -1687,10 +1338,8 @@ err_out_free_base1:
1687 nvidia_delete_i2c_busses(par); 1338 nvidia_delete_i2c_busses(par);
1688err_out_arch: 1339err_out_arch:
1689 iounmap(par->REGS); 1340 iounmap(par->REGS);
1690err_out_free_base0: 1341 err_out_free_base0:
1691 pci_release_regions(pd); 1342 pci_release_regions(pd);
1692err_out_request:
1693 pci_disable_device(pd);
1694err_out_enable: 1343err_out_enable:
1695 kfree(info->pixmap.addr); 1344 kfree(info->pixmap.addr);
1696err_out_kfree: 1345err_out_kfree:
@@ -1720,7 +1369,6 @@ static void __exit nvidiafb_remove(struct pci_dev *pd)
1720 nvidia_delete_i2c_busses(par); 1369 nvidia_delete_i2c_busses(par);
1721 iounmap(par->REGS); 1370 iounmap(par->REGS);
1722 pci_release_regions(pd); 1371 pci_release_regions(pd);
1723 pci_disable_device(pd);
1724 kfree(info->pixmap.addr); 1372 kfree(info->pixmap.addr);
1725 framebuffer_release(info); 1373 framebuffer_release(info);
1726 pci_set_drvdata(pd, NULL); 1374 pci_set_drvdata(pd, NULL);
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index d4384ab1df65..12af58c5cf1f 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -2152,7 +2152,6 @@ err_iounmap_ctrl_base:
2152err_release_region: 2152err_release_region:
2153 pci_release_regions(pd); 2153 pci_release_regions(pd);
2154err_disable_device: 2154err_disable_device:
2155 pci_disable_device(pd);
2156err_free_pixmap: 2155err_free_pixmap:
2157 kfree(info->pixmap.addr); 2156 kfree(info->pixmap.addr);
2158err_framebuffer_release: 2157err_framebuffer_release:
@@ -2187,7 +2186,6 @@ static void __exit rivafb_remove(struct pci_dev *pd)
2187 if (par->riva.Architecture == NV_ARCH_03) 2186 if (par->riva.Architecture == NV_ARCH_03)
2188 iounmap(par->riva.PRAMIN); 2187 iounmap(par->riva.PRAMIN);
2189 pci_release_regions(pd); 2188 pci_release_regions(pd);
2190 pci_disable_device(pd);
2191 kfree(info->pixmap.addr); 2189 kfree(info->pixmap.addr);
2192 framebuffer_release(info); 2190 framebuffer_release(info);
2193 pci_set_drvdata(pd, NULL); 2191 pci_set_drvdata(pd, NULL);
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 9451932fbaf2..fbc411850686 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -641,6 +641,7 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
641 int ret; 641 int ret;
642 int irq; 642 int irq;
643 int i; 643 int i;
644 u32 lcdcon1;
644 645
645 mach_info = pdev->dev.platform_data; 646 mach_info = pdev->dev.platform_data;
646 if (mach_info == NULL) { 647 if (mach_info == NULL) {
@@ -672,6 +673,11 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
672 673
673 memcpy(&info->regs, &mach_info->regs, sizeof(info->regs)); 674 memcpy(&info->regs, &mach_info->regs, sizeof(info->regs));
674 675
676 /* Stop the video and unset ENVID if set */
677 info->regs.lcdcon1 &= ~S3C2410_LCDCON1_ENVID;
678 lcdcon1 = readl(S3C2410_LCDCON1);
679 writel(lcdcon1 & ~S3C2410_LCDCON1_ENVID, S3C2410_LCDCON1);
680
675 info->mach_info = pdev->dev.platform_data; 681 info->mach_info = pdev->dev.platform_data;
676 682
677 fbinfo->fix.type = FB_TYPE_PACKED_PIXELS; 683 fbinfo->fix.type = FB_TYPE_PACKED_PIXELS;
@@ -794,15 +800,14 @@ dealloc_fb:
794 * shutdown the lcd controller 800 * shutdown the lcd controller
795*/ 801*/
796 802
797static void s3c2410fb_stop_lcd(void) 803static void s3c2410fb_stop_lcd(struct s3c2410fb_info *fbi)
798{ 804{
799 unsigned long flags; 805 unsigned long flags;
800 unsigned long tmp;
801 806
802 local_irq_save(flags); 807 local_irq_save(flags);
803 808
804 tmp = readl(S3C2410_LCDCON1); 809 fbi->regs.lcdcon1 &= ~S3C2410_LCDCON1_ENVID;
805 writel(tmp & ~S3C2410_LCDCON1_ENVID, S3C2410_LCDCON1); 810 writel(fbi->regs.lcdcon1, S3C2410_LCDCON1);
806 811
807 local_irq_restore(flags); 812 local_irq_restore(flags);
808} 813}
@@ -816,7 +821,7 @@ static int s3c2410fb_remove(struct platform_device *pdev)
816 struct s3c2410fb_info *info = fbinfo->par; 821 struct s3c2410fb_info *info = fbinfo->par;
817 int irq; 822 int irq;
818 823
819 s3c2410fb_stop_lcd(); 824 s3c2410fb_stop_lcd(info);
820 msleep(1); 825 msleep(1);
821 826
822 s3c2410fb_unmap_video_memory(info); 827 s3c2410fb_unmap_video_memory(info);
@@ -844,7 +849,7 @@ static int s3c2410fb_suspend(struct platform_device *dev, pm_message_t state)
844 struct fb_info *fbinfo = platform_get_drvdata(dev); 849 struct fb_info *fbinfo = platform_get_drvdata(dev);
845 struct s3c2410fb_info *info = fbinfo->par; 850 struct s3c2410fb_info *info = fbinfo->par;
846 851
847 s3c2410fb_stop_lcd(); 852 s3c2410fb_stop_lcd(info);
848 853
849 /* sleep before disabling the clock, we need to ensure 854 /* sleep before disabling the clock, we need to ensure
850 * the LCD DMA engine is not going to get back on the bus 855 * the LCD DMA engine is not going to get back on the bus
diff --git a/drivers/video/savage/savagefb.h b/drivers/video/savage/savagefb.h
index 58cfdfb41833..e648a6c0f6d9 100644
--- a/drivers/video/savage/savagefb.h
+++ b/drivers/video/savage/savagefb.h
@@ -147,7 +147,27 @@ struct xtimings {
147 int interlaced; 147 int interlaced;
148}; 148};
149 149
150struct savage_reg {
151 unsigned char MiscOutReg; /* Misc */
152 unsigned char CRTC[25]; /* Crtc Controller */
153 unsigned char Sequencer[5]; /* Video Sequencer */
154 unsigned char Graphics[9]; /* Video Graphics */
155 unsigned char Attribute[21]; /* Video Atribute */
150 156
157 unsigned int mode, refresh;
158 unsigned char SR08, SR0E, SR0F;
159 unsigned char SR10, SR11, SR12, SR13, SR15, SR18, SR29, SR30;
160 unsigned char SR54[8];
161 unsigned char Clock;
162 unsigned char CR31, CR32, CR33, CR34, CR36, CR3A, CR3B, CR3C;
163 unsigned char CR40, CR41, CR42, CR43, CR45;
164 unsigned char CR50, CR51, CR53, CR55, CR58, CR5B, CR5D, CR5E;
165 unsigned char CR60, CR63, CR65, CR66, CR67, CR68, CR69, CR6D, CR6F;
166 unsigned char CR86, CR88;
167 unsigned char CR90, CR91, CRB0;
168 unsigned int STREAMS[22]; /* yuck, streams regs */
169 unsigned int MMPR0, MMPR1, MMPR2, MMPR3;
170};
151/* --------------------------------------------------------------------- */ 171/* --------------------------------------------------------------------- */
152 172
153#define NR_PALETTE 256 173#define NR_PALETTE 256
@@ -167,6 +187,8 @@ struct savagefb_par {
167 struct pci_dev *pcidev; 187 struct pci_dev *pcidev;
168 savage_chipset chip; 188 savage_chipset chip;
169 struct savagefb_i2c_chan chan; 189 struct savagefb_i2c_chan chan;
190 struct savage_reg state;
191 struct savage_reg save;
170 unsigned char *edid; 192 unsigned char *edid;
171 u32 pseudo_palette[16]; 193 u32 pseudo_palette[16];
172 int paletteEnabled; 194 int paletteEnabled;
@@ -179,6 +201,7 @@ struct savagefb_par {
179 int minClock; 201 int minClock;
180 int numClocks; 202 int numClocks;
181 int clock[4]; 203 int clock[4];
204 int MCLK, REFCLK, LCDclk;
182 struct { 205 struct {
183 u8 __iomem *vbase; 206 u8 __iomem *vbase;
184 u32 pbase; 207 u32 pbase;
@@ -196,7 +219,6 @@ struct savagefb_par {
196 219
197 volatile u32 __iomem *bci_base; 220 volatile u32 __iomem *bci_base;
198 unsigned int bci_ptr; 221 unsigned int bci_ptr;
199
200 u32 cob_offset; 222 u32 cob_offset;
201 u32 cob_size; 223 u32 cob_size;
202 int cob_index; 224 int cob_index;
@@ -204,7 +226,6 @@ struct savagefb_par {
204 void (*SavageWaitIdle) (struct savagefb_par *par); 226 void (*SavageWaitIdle) (struct savagefb_par *par);
205 void (*SavageWaitFifo) (struct savagefb_par *par, int space); 227 void (*SavageWaitFifo) (struct savagefb_par *par, int space);
206 228
207 int MCLK, REFCLK, LCDclk;
208 int HorizScaleFactor; 229 int HorizScaleFactor;
209 230
210 /* Panels size */ 231 /* Panels size */
@@ -217,26 +238,6 @@ struct savagefb_par {
217 238
218 int depth; 239 int depth;
219 int vwidth; 240 int vwidth;
220
221 unsigned char MiscOutReg; /* Misc */
222 unsigned char CRTC[25]; /* Crtc Controller */
223 unsigned char Sequencer[5]; /* Video Sequencer */
224 unsigned char Graphics[9]; /* Video Graphics */
225 unsigned char Attribute[21]; /* Video Atribute */
226
227 unsigned int mode, refresh;
228 unsigned char SR08, SR0E, SR0F;
229 unsigned char SR10, SR11, SR12, SR13, SR15, SR18, SR29, SR30;
230 unsigned char SR54[8];
231 unsigned char Clock;
232 unsigned char CR31, CR32, CR33, CR34, CR36, CR3A, CR3B, CR3C;
233 unsigned char CR40, CR41, CR42, CR43, CR45;
234 unsigned char CR50, CR51, CR53, CR55, CR58, CR5B, CR5D, CR5E;
235 unsigned char CR60, CR63, CR65, CR66, CR67, CR68, CR69, CR6D, CR6F;
236 unsigned char CR86, CR88;
237 unsigned char CR90, CR91, CRB0;
238 unsigned int STREAMS[22]; /* yuck, streams regs */
239 unsigned int MMPR0, MMPR1, MMPR2, MMPR3;
240}; 241};
241 242
242#define BCI_BD_BW_DISABLE 0x10000000 243#define BCI_BD_BW_DISABLE 0x10000000
diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c
index 0da624e6524f..78883cf66a4d 100644
--- a/drivers/video/savage/savagefb_driver.c
+++ b/drivers/video/savage/savagefb_driver.c
@@ -86,15 +86,15 @@ MODULE_DESCRIPTION("FBDev driver for S3 Savage PCI/AGP Chips");
86 86
87/* --------------------------------------------------------------------- */ 87/* --------------------------------------------------------------------- */
88 88
89static void vgaHWSeqReset (struct savagefb_par *par, int start) 89static void vgaHWSeqReset(struct savagefb_par *par, int start)
90{ 90{
91 if (start) 91 if (start)
92 VGAwSEQ (0x00, 0x01, par); /* Synchronous Reset */ 92 VGAwSEQ(0x00, 0x01, par); /* Synchronous Reset */
93 else 93 else
94 VGAwSEQ (0x00, 0x03, par); /* End Reset */ 94 VGAwSEQ(0x00, 0x03, par); /* End Reset */
95} 95}
96 96
97static void vgaHWProtect (struct savagefb_par *par, int on) 97static void vgaHWProtect(struct savagefb_par *par, int on)
98{ 98{
99 unsigned char tmp; 99 unsigned char tmp;
100 100
@@ -102,10 +102,10 @@ static void vgaHWProtect (struct savagefb_par *par, int on)
102 /* 102 /*
103 * Turn off screen and disable sequencer. 103 * Turn off screen and disable sequencer.
104 */ 104 */
105 tmp = VGArSEQ (0x01, par); 105 tmp = VGArSEQ(0x01, par);
106 106
107 vgaHWSeqReset (par, 1); /* start synchronous reset */ 107 vgaHWSeqReset(par, 1); /* start synchronous reset */
108 VGAwSEQ (0x01, tmp | 0x20, par);/* disable the display */ 108 VGAwSEQ(0x01, tmp | 0x20, par);/* disable the display */
109 109
110 VGAenablePalette(par); 110 VGAenablePalette(par);
111 } else { 111 } else {
@@ -113,75 +113,76 @@ static void vgaHWProtect (struct savagefb_par *par, int on)
113 * Reenable sequencer, then turn on screen. 113 * Reenable sequencer, then turn on screen.
114 */ 114 */
115 115
116 tmp = VGArSEQ (0x01, par); 116 tmp = VGArSEQ(0x01, par);
117 117
118 VGAwSEQ (0x01, tmp & ~0x20, par);/* reenable display */ 118 VGAwSEQ(0x01, tmp & ~0x20, par);/* reenable display */
119 vgaHWSeqReset (par, 0); /* clear synchronous reset */ 119 vgaHWSeqReset(par, 0); /* clear synchronous reset */
120 120
121 VGAdisablePalette(par); 121 VGAdisablePalette(par);
122 } 122 }
123} 123}
124 124
125static void vgaHWRestore (struct savagefb_par *par) 125static void vgaHWRestore(struct savagefb_par *par, struct savage_reg *reg)
126{ 126{
127 int i; 127 int i;
128 128
129 VGAwMISC (par->MiscOutReg, par); 129 VGAwMISC(reg->MiscOutReg, par);
130 130
131 for (i = 1; i < 5; i++) 131 for (i = 1; i < 5; i++)
132 VGAwSEQ (i, par->Sequencer[i], par); 132 VGAwSEQ(i, reg->Sequencer[i], par);
133 133
134 /* Ensure CRTC registers 0-7 are unlocked by clearing bit 7 or 134 /* Ensure CRTC registers 0-7 are unlocked by clearing bit 7 or
135 CRTC[17] */ 135 CRTC[17] */
136 VGAwCR (17, par->CRTC[17] & ~0x80, par); 136 VGAwCR(17, reg->CRTC[17] & ~0x80, par);
137 137
138 for (i = 0; i < 25; i++) 138 for (i = 0; i < 25; i++)
139 VGAwCR (i, par->CRTC[i], par); 139 VGAwCR(i, reg->CRTC[i], par);
140 140
141 for (i = 0; i < 9; i++) 141 for (i = 0; i < 9; i++)
142 VGAwGR (i, par->Graphics[i], par); 142 VGAwGR(i, reg->Graphics[i], par);
143 143
144 VGAenablePalette(par); 144 VGAenablePalette(par);
145 145
146 for (i = 0; i < 21; i++) 146 for (i = 0; i < 21; i++)
147 VGAwATTR (i, par->Attribute[i], par); 147 VGAwATTR(i, reg->Attribute[i], par);
148 148
149 VGAdisablePalette(par); 149 VGAdisablePalette(par);
150} 150}
151 151
152static void vgaHWInit (struct fb_var_screeninfo *var, 152static void vgaHWInit(struct fb_var_screeninfo *var,
153 struct savagefb_par *par, 153 struct savagefb_par *par,
154 struct xtimings *timings) 154 struct xtimings *timings,
155 struct savage_reg *reg)
155{ 156{
156 par->MiscOutReg = 0x23; 157 reg->MiscOutReg = 0x23;
157 158
158 if (!(timings->sync & FB_SYNC_HOR_HIGH_ACT)) 159 if (!(timings->sync & FB_SYNC_HOR_HIGH_ACT))
159 par->MiscOutReg |= 0x40; 160 reg->MiscOutReg |= 0x40;
160 161
161 if (!(timings->sync & FB_SYNC_VERT_HIGH_ACT)) 162 if (!(timings->sync & FB_SYNC_VERT_HIGH_ACT))
162 par->MiscOutReg |= 0x80; 163 reg->MiscOutReg |= 0x80;
163 164
164 /* 165 /*
165 * Time Sequencer 166 * Time Sequencer
166 */ 167 */
167 par->Sequencer[0x00] = 0x00; 168 reg->Sequencer[0x00] = 0x00;
168 par->Sequencer[0x01] = 0x01; 169 reg->Sequencer[0x01] = 0x01;
169 par->Sequencer[0x02] = 0x0F; 170 reg->Sequencer[0x02] = 0x0F;
170 par->Sequencer[0x03] = 0x00; /* Font select */ 171 reg->Sequencer[0x03] = 0x00; /* Font select */
171 par->Sequencer[0x04] = 0x0E; /* Misc */ 172 reg->Sequencer[0x04] = 0x0E; /* Misc */
172 173
173 /* 174 /*
174 * CRTC Controller 175 * CRTC Controller
175 */ 176 */
176 par->CRTC[0x00] = (timings->HTotal >> 3) - 5; 177 reg->CRTC[0x00] = (timings->HTotal >> 3) - 5;
177 par->CRTC[0x01] = (timings->HDisplay >> 3) - 1; 178 reg->CRTC[0x01] = (timings->HDisplay >> 3) - 1;
178 par->CRTC[0x02] = (timings->HSyncStart >> 3) - 1; 179 reg->CRTC[0x02] = (timings->HSyncStart >> 3) - 1;
179 par->CRTC[0x03] = (((timings->HSyncEnd >> 3) - 1) & 0x1f) | 0x80; 180 reg->CRTC[0x03] = (((timings->HSyncEnd >> 3) - 1) & 0x1f) | 0x80;
180 par->CRTC[0x04] = (timings->HSyncStart >> 3); 181 reg->CRTC[0x04] = (timings->HSyncStart >> 3);
181 par->CRTC[0x05] = ((((timings->HSyncEnd >> 3) - 1) & 0x20) << 2) | 182 reg->CRTC[0x05] = ((((timings->HSyncEnd >> 3) - 1) & 0x20) << 2) |
182 (((timings->HSyncEnd >> 3)) & 0x1f); 183 (((timings->HSyncEnd >> 3)) & 0x1f);
183 par->CRTC[0x06] = (timings->VTotal - 2) & 0xFF; 184 reg->CRTC[0x06] = (timings->VTotal - 2) & 0xFF;
184 par->CRTC[0x07] = (((timings->VTotal - 2) & 0x100) >> 8) | 185 reg->CRTC[0x07] = (((timings->VTotal - 2) & 0x100) >> 8) |
185 (((timings->VDisplay - 1) & 0x100) >> 7) | 186 (((timings->VDisplay - 1) & 0x100) >> 7) |
186 ((timings->VSyncStart & 0x100) >> 6) | 187 ((timings->VSyncStart & 0x100) >> 6) |
187 (((timings->VSyncStart - 1) & 0x100) >> 5) | 188 (((timings->VSyncStart - 1) & 0x100) >> 5) |
@@ -189,27 +190,27 @@ static void vgaHWInit (struct fb_var_screeninfo *var,
189 (((timings->VTotal - 2) & 0x200) >> 4) | 190 (((timings->VTotal - 2) & 0x200) >> 4) |
190 (((timings->VDisplay - 1) & 0x200) >> 3) | 191 (((timings->VDisplay - 1) & 0x200) >> 3) |
191 ((timings->VSyncStart & 0x200) >> 2); 192 ((timings->VSyncStart & 0x200) >> 2);
192 par->CRTC[0x08] = 0x00; 193 reg->CRTC[0x08] = 0x00;
193 par->CRTC[0x09] = (((timings->VSyncStart - 1) & 0x200) >> 4) | 0x40; 194 reg->CRTC[0x09] = (((timings->VSyncStart - 1) & 0x200) >> 4) | 0x40;
194 195
195 if (timings->dblscan) 196 if (timings->dblscan)
196 par->CRTC[0x09] |= 0x80; 197 reg->CRTC[0x09] |= 0x80;
197 198
198 par->CRTC[0x0a] = 0x00; 199 reg->CRTC[0x0a] = 0x00;
199 par->CRTC[0x0b] = 0x00; 200 reg->CRTC[0x0b] = 0x00;
200 par->CRTC[0x0c] = 0x00; 201 reg->CRTC[0x0c] = 0x00;
201 par->CRTC[0x0d] = 0x00; 202 reg->CRTC[0x0d] = 0x00;
202 par->CRTC[0x0e] = 0x00; 203 reg->CRTC[0x0e] = 0x00;
203 par->CRTC[0x0f] = 0x00; 204 reg->CRTC[0x0f] = 0x00;
204 par->CRTC[0x10] = timings->VSyncStart & 0xff; 205 reg->CRTC[0x10] = timings->VSyncStart & 0xff;
205 par->CRTC[0x11] = (timings->VSyncEnd & 0x0f) | 0x20; 206 reg->CRTC[0x11] = (timings->VSyncEnd & 0x0f) | 0x20;
206 par->CRTC[0x12] = (timings->VDisplay - 1) & 0xff; 207 reg->CRTC[0x12] = (timings->VDisplay - 1) & 0xff;
207 par->CRTC[0x13] = var->xres_virtual >> 4; 208 reg->CRTC[0x13] = var->xres_virtual >> 4;
208 par->CRTC[0x14] = 0x00; 209 reg->CRTC[0x14] = 0x00;
209 par->CRTC[0x15] = (timings->VSyncStart - 1) & 0xff; 210 reg->CRTC[0x15] = (timings->VSyncStart - 1) & 0xff;
210 par->CRTC[0x16] = (timings->VSyncEnd - 1) & 0xff; 211 reg->CRTC[0x16] = (timings->VSyncEnd - 1) & 0xff;
211 par->CRTC[0x17] = 0xc3; 212 reg->CRTC[0x17] = 0xc3;
212 par->CRTC[0x18] = 0xff; 213 reg->CRTC[0x18] = 0xff;
213 214
214 /* 215 /*
215 * are these unnecessary? 216 * are these unnecessary?
@@ -220,38 +221,38 @@ static void vgaHWInit (struct fb_var_screeninfo *var,
220 /* 221 /*
221 * Graphics Display Controller 222 * Graphics Display Controller
222 */ 223 */
223 par->Graphics[0x00] = 0x00; 224 reg->Graphics[0x00] = 0x00;
224 par->Graphics[0x01] = 0x00; 225 reg->Graphics[0x01] = 0x00;
225 par->Graphics[0x02] = 0x00; 226 reg->Graphics[0x02] = 0x00;
226 par->Graphics[0x03] = 0x00; 227 reg->Graphics[0x03] = 0x00;
227 par->Graphics[0x04] = 0x00; 228 reg->Graphics[0x04] = 0x00;
228 par->Graphics[0x05] = 0x40; 229 reg->Graphics[0x05] = 0x40;
229 par->Graphics[0x06] = 0x05; /* only map 64k VGA memory !!!! */ 230 reg->Graphics[0x06] = 0x05; /* only map 64k VGA memory !!!! */
230 par->Graphics[0x07] = 0x0F; 231 reg->Graphics[0x07] = 0x0F;
231 par->Graphics[0x08] = 0xFF; 232 reg->Graphics[0x08] = 0xFF;
232 233
233 234
234 par->Attribute[0x00] = 0x00; /* standard colormap translation */ 235 reg->Attribute[0x00] = 0x00; /* standard colormap translation */
235 par->Attribute[0x01] = 0x01; 236 reg->Attribute[0x01] = 0x01;
236 par->Attribute[0x02] = 0x02; 237 reg->Attribute[0x02] = 0x02;
237 par->Attribute[0x03] = 0x03; 238 reg->Attribute[0x03] = 0x03;
238 par->Attribute[0x04] = 0x04; 239 reg->Attribute[0x04] = 0x04;
239 par->Attribute[0x05] = 0x05; 240 reg->Attribute[0x05] = 0x05;
240 par->Attribute[0x06] = 0x06; 241 reg->Attribute[0x06] = 0x06;
241 par->Attribute[0x07] = 0x07; 242 reg->Attribute[0x07] = 0x07;
242 par->Attribute[0x08] = 0x08; 243 reg->Attribute[0x08] = 0x08;
243 par->Attribute[0x09] = 0x09; 244 reg->Attribute[0x09] = 0x09;
244 par->Attribute[0x0a] = 0x0A; 245 reg->Attribute[0x0a] = 0x0A;
245 par->Attribute[0x0b] = 0x0B; 246 reg->Attribute[0x0b] = 0x0B;
246 par->Attribute[0x0c] = 0x0C; 247 reg->Attribute[0x0c] = 0x0C;
247 par->Attribute[0x0d] = 0x0D; 248 reg->Attribute[0x0d] = 0x0D;
248 par->Attribute[0x0e] = 0x0E; 249 reg->Attribute[0x0e] = 0x0E;
249 par->Attribute[0x0f] = 0x0F; 250 reg->Attribute[0x0f] = 0x0F;
250 par->Attribute[0x10] = 0x41; 251 reg->Attribute[0x10] = 0x41;
251 par->Attribute[0x11] = 0xFF; 252 reg->Attribute[0x11] = 0xFF;
252 par->Attribute[0x12] = 0x0F; 253 reg->Attribute[0x12] = 0x0F;
253 par->Attribute[0x13] = 0x00; 254 reg->Attribute[0x13] = 0x00;
254 par->Attribute[0x14] = 0x00; 255 reg->Attribute[0x14] = 0x00;
255} 256}
256 257
257/* -------------------- Hardware specific routines ------------------------- */ 258/* -------------------- Hardware specific routines ------------------------- */
@@ -304,15 +305,15 @@ savage2000_waitidle(struct savagefb_par *par)
304 while ((savage_in32(0x48C60, par) & 0x009fffff)); 305 while ((savage_in32(0x48C60, par) & 0x009fffff));
305} 306}
306 307
307 308#ifdef CONFIG_FB_SAVAGE_ACCEL
308static void 309static void
309SavageSetup2DEngine (struct savagefb_par *par) 310SavageSetup2DEngine(struct savagefb_par *par)
310{ 311{
311 unsigned long GlobalBitmapDescriptor; 312 unsigned long GlobalBitmapDescriptor;
312 313
313 GlobalBitmapDescriptor = 1 | 8 | BCI_BD_BW_DISABLE; 314 GlobalBitmapDescriptor = 1 | 8 | BCI_BD_BW_DISABLE;
314 BCI_BD_SET_BPP (GlobalBitmapDescriptor, par->depth); 315 BCI_BD_SET_BPP(GlobalBitmapDescriptor, par->depth);
315 BCI_BD_SET_STRIDE (GlobalBitmapDescriptor, par->vwidth); 316 BCI_BD_SET_STRIDE(GlobalBitmapDescriptor, par->vwidth);
316 317
317 switch(par->chip) { 318 switch(par->chip) {
318 case S3_SAVAGE3D: 319 case S3_SAVAGE3D:
@@ -361,32 +362,48 @@ SavageSetup2DEngine (struct savagefb_par *par)
361 vga_out8(0x3d5, 0x0c, par); 362 vga_out8(0x3d5, 0x0c, par);
362 363
363 /* Set stride to use GBD. */ 364 /* Set stride to use GBD. */
364 vga_out8 (0x3d4, 0x50, par); 365 vga_out8(0x3d4, 0x50, par);
365 vga_out8 (0x3d5, vga_in8(0x3d5, par) | 0xC1, par); 366 vga_out8(0x3d5, vga_in8(0x3d5, par) | 0xC1, par);
366 367
367 /* Enable 2D engine. */ 368 /* Enable 2D engine. */
368 vga_out8 (0x3d4, 0x40, par); 369 vga_out8(0x3d4, 0x40, par);
369 vga_out8 (0x3d5, 0x01, par); 370 vga_out8(0x3d5, 0x01, par);
370 371
371 savage_out32 (MONO_PAT_0, ~0, par); 372 savage_out32(MONO_PAT_0, ~0, par);
372 savage_out32 (MONO_PAT_1, ~0, par); 373 savage_out32(MONO_PAT_1, ~0, par);
373 374
374 /* Setup plane masks */ 375 /* Setup plane masks */
375 savage_out32 (0x8128, ~0, par); /* enable all write planes */ 376 savage_out32(0x8128, ~0, par); /* enable all write planes */
376 savage_out32 (0x812C, ~0, par); /* enable all read planes */ 377 savage_out32(0x812C, ~0, par); /* enable all read planes */
377 savage_out16 (0x8134, 0x27, par); 378 savage_out16(0x8134, 0x27, par);
378 savage_out16 (0x8136, 0x07, par); 379 savage_out16(0x8136, 0x07, par);
379 380
380 /* Now set the GBD */ 381 /* Now set the GBD */
381 par->bci_ptr = 0; 382 par->bci_ptr = 0;
382 par->SavageWaitFifo (par, 4); 383 par->SavageWaitFifo(par, 4);
383 384
384 BCI_SEND( BCI_CMD_SETREG | (1 << 16) | BCI_GBD1 ); 385 BCI_SEND(BCI_CMD_SETREG | (1 << 16) | BCI_GBD1);
385 BCI_SEND( 0 ); 386 BCI_SEND(0);
386 BCI_SEND( BCI_CMD_SETREG | (1 << 16) | BCI_GBD2 ); 387 BCI_SEND(BCI_CMD_SETREG | (1 << 16) | BCI_GBD2);
387 BCI_SEND( GlobalBitmapDescriptor ); 388 BCI_SEND(GlobalBitmapDescriptor);
388} 389}
389 390
391static void savagefb_set_clip(struct fb_info *info)
392{
393 struct savagefb_par *par = info->par;
394 int cmd;
395
396 cmd = BCI_CMD_NOP | BCI_CMD_CLIP_NEW;
397 par->bci_ptr = 0;
398 par->SavageWaitFifo(par,3);
399 BCI_SEND(cmd);
400 BCI_SEND(BCI_CLIP_TL(0, 0));
401 BCI_SEND(BCI_CLIP_BR(0xfff, 0xfff));
402}
403#else
404static void SavageSetup2DEngine(struct savagefb_par *par) {}
405
406#endif
390 407
391static void SavageCalcClock(long freq, int min_m, int min_n1, int max_n1, 408static void SavageCalcClock(long freq, int min_m, int min_n1, int max_n1,
392 int min_n2, int max_n2, long freq_min, 409 int min_n2, int max_n2, long freq_min,
@@ -398,11 +415,11 @@ static void SavageCalcClock(long freq, int min_m, int min_n1, int max_n1,
398 unsigned char n1, n2, best_n1=16+2, best_n2=2, best_m=125+2; 415 unsigned char n1, n2, best_n1=16+2, best_n2=2, best_m=125+2;
399 416
400 if (freq < freq_min / (1 << max_n2)) { 417 if (freq < freq_min / (1 << max_n2)) {
401 printk (KERN_ERR "invalid frequency %ld Khz\n", freq); 418 printk(KERN_ERR "invalid frequency %ld Khz\n", freq);
402 freq = freq_min / (1 << max_n2); 419 freq = freq_min / (1 << max_n2);
403 } 420 }
404 if (freq > freq_max / (1 << min_n2)) { 421 if (freq > freq_max / (1 << min_n2)) {
405 printk (KERN_ERR "invalid frequency %ld Khz\n", freq); 422 printk(KERN_ERR "invalid frequency %ld Khz\n", freq);
406 freq = freq_max / (1 << min_n2); 423 freq = freq_max / (1 << min_n2);
407 } 424 }
408 425
@@ -453,12 +470,12 @@ static int common_calc_clock(long freq, int min_m, int min_n1, int max_n1,
453 BASE_FREQ; 470 BASE_FREQ;
454 if (m < min_m + 2 || m > 127+2) 471 if (m < min_m + 2 || m > 127+2)
455 continue; 472 continue;
456 if((m * BASE_FREQ >= freq_min * n1) && 473 if ((m * BASE_FREQ >= freq_min * n1) &&
457 (m * BASE_FREQ <= freq_max * n1)) { 474 (m * BASE_FREQ <= freq_max * n1)) {
458 diff = freq * (1 << n2) * n1 - BASE_FREQ * m; 475 diff = freq * (1 << n2) * n1 - BASE_FREQ * m;
459 if(diff < 0) 476 if (diff < 0)
460 diff = -diff; 477 diff = -diff;
461 if(diff < best_diff) { 478 if (diff < best_diff) {
462 best_diff = diff; 479 best_diff = diff;
463 best_m = m; 480 best_m = m;
464 best_n1 = n1; 481 best_n1 = n1;
@@ -468,7 +485,7 @@ static int common_calc_clock(long freq, int min_m, int min_n1, int max_n1,
468 } 485 }
469 } 486 }
470 487
471 if(max_n1 == 63) 488 if (max_n1 == 63)
472 *ndiv = (best_n1 - 2) | (best_n2 << 6); 489 *ndiv = (best_n1 - 2) | (best_n2 << 6);
473 else 490 else
474 *ndiv = (best_n1 - 2) | (best_n2 << 5); 491 *ndiv = (best_n1 - 2) | (best_n2 << 5);
@@ -488,23 +505,23 @@ static void SavagePrintRegs(void)
488 int vgaCRReg = 0x3d5; 505 int vgaCRReg = 0x3d5;
489 506
490 printk(KERN_DEBUG "SR x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE " 507 printk(KERN_DEBUG "SR x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE "
491 "xF" ); 508 "xF");
492 509
493 for( i = 0; i < 0x70; i++ ) { 510 for (i = 0; i < 0x70; i++) {
494 if( !(i % 16) ) 511 if (!(i % 16))
495 printk(KERN_DEBUG "\nSR%xx ", i >> 4 ); 512 printk(KERN_DEBUG "\nSR%xx ", i >> 4);
496 vga_out8( 0x3c4, i, par); 513 vga_out8(0x3c4, i, par);
497 printk(KERN_DEBUG " %02x", vga_in8(0x3c5, par) ); 514 printk(KERN_DEBUG " %02x", vga_in8(0x3c5, par));
498 } 515 }
499 516
500 printk(KERN_DEBUG "\n\nCR x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC " 517 printk(KERN_DEBUG "\n\nCR x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC "
501 "xD xE xF" ); 518 "xD xE xF");
502 519
503 for( i = 0; i < 0xB7; i++ ) { 520 for (i = 0; i < 0xB7; i++) {
504 if( !(i % 16) ) 521 if (!(i % 16))
505 printk(KERN_DEBUG "\nCR%xx ", i >> 4 ); 522 printk(KERN_DEBUG "\nCR%xx ", i >> 4);
506 vga_out8( vgaCRIndex, i, par); 523 vga_out8(vgaCRIndex, i, par);
507 printk(KERN_DEBUG " %02x", vga_in8(vgaCRReg, par) ); 524 printk(KERN_DEBUG " %02x", vga_in8(vgaCRReg, par));
508 } 525 }
509 526
510 printk(KERN_DEBUG "\n\n"); 527 printk(KERN_DEBUG "\n\n");
@@ -513,156 +530,309 @@ static void SavagePrintRegs(void)
513 530
514/* --------------------------------------------------------------------- */ 531/* --------------------------------------------------------------------- */
515 532
516static void savage_get_default_par(struct savagefb_par *par) 533static void savage_get_default_par(struct savagefb_par *par, struct savage_reg *reg)
517{ 534{
518 unsigned char cr3a, cr53, cr66; 535 unsigned char cr3a, cr53, cr66;
519 536
520 vga_out16 (0x3d4, 0x4838, par); 537 vga_out16(0x3d4, 0x4838, par);
521 vga_out16 (0x3d4, 0xa039, par); 538 vga_out16(0x3d4, 0xa039, par);
522 vga_out16 (0x3c4, 0x0608, par); 539 vga_out16(0x3c4, 0x0608, par);
523 540
524 vga_out8 (0x3d4, 0x66, par); 541 vga_out8(0x3d4, 0x66, par);
525 cr66 = vga_in8 (0x3d5, par); 542 cr66 = vga_in8(0x3d5, par);
526 vga_out8 (0x3d5, cr66 | 0x80, par); 543 vga_out8(0x3d5, cr66 | 0x80, par);
527 vga_out8 (0x3d4, 0x3a, par); 544 vga_out8(0x3d4, 0x3a, par);
528 cr3a = vga_in8 (0x3d5, par); 545 cr3a = vga_in8(0x3d5, par);
529 vga_out8 (0x3d5, cr3a | 0x80, par); 546 vga_out8(0x3d5, cr3a | 0x80, par);
530 vga_out8 (0x3d4, 0x53, par); 547 vga_out8(0x3d4, 0x53, par);
531 cr53 = vga_in8 (0x3d5, par); 548 cr53 = vga_in8(0x3d5, par);
532 vga_out8 (0x3d5, cr53 & 0x7f, par); 549 vga_out8(0x3d5, cr53 & 0x7f, par);
533 550
534 vga_out8 (0x3d4, 0x66, par); 551 vga_out8(0x3d4, 0x66, par);
535 vga_out8 (0x3d5, cr66, par); 552 vga_out8(0x3d5, cr66, par);
536 vga_out8 (0x3d4, 0x3a, par); 553 vga_out8(0x3d4, 0x3a, par);
537 vga_out8 (0x3d5, cr3a, par); 554 vga_out8(0x3d5, cr3a, par);
538 555
539 vga_out8 (0x3d4, 0x66, par); 556 vga_out8(0x3d4, 0x66, par);
540 vga_out8 (0x3d5, cr66, par); 557 vga_out8(0x3d5, cr66, par);
541 vga_out8 (0x3d4, 0x3a, par); 558 vga_out8(0x3d4, 0x3a, par);
542 vga_out8 (0x3d5, cr3a, par); 559 vga_out8(0x3d5, cr3a, par);
543 560
544 /* unlock extended seq regs */ 561 /* unlock extended seq regs */
545 vga_out8 (0x3c4, 0x08, par); 562 vga_out8(0x3c4, 0x08, par);
546 par->SR08 = vga_in8 (0x3c5, par); 563 reg->SR08 = vga_in8(0x3c5, par);
547 vga_out8 (0x3c5, 0x06, par); 564 vga_out8(0x3c5, 0x06, par);
548 565
549 /* now save all the extended regs we need */ 566 /* now save all the extended regs we need */
550 vga_out8 (0x3d4, 0x31, par); 567 vga_out8(0x3d4, 0x31, par);
551 par->CR31 = vga_in8 (0x3d5, par); 568 reg->CR31 = vga_in8(0x3d5, par);
552 vga_out8 (0x3d4, 0x32, par); 569 vga_out8(0x3d4, 0x32, par);
553 par->CR32 = vga_in8 (0x3d5, par); 570 reg->CR32 = vga_in8(0x3d5, par);
554 vga_out8 (0x3d4, 0x34, par); 571 vga_out8(0x3d4, 0x34, par);
555 par->CR34 = vga_in8 (0x3d5, par); 572 reg->CR34 = vga_in8(0x3d5, par);
556 vga_out8 (0x3d4, 0x36, par); 573 vga_out8(0x3d4, 0x36, par);
557 par->CR36 = vga_in8 (0x3d5, par); 574 reg->CR36 = vga_in8(0x3d5, par);
558 vga_out8 (0x3d4, 0x3a, par); 575 vga_out8(0x3d4, 0x3a, par);
559 par->CR3A = vga_in8 (0x3d5, par); 576 reg->CR3A = vga_in8(0x3d5, par);
560 vga_out8 (0x3d4, 0x40, par); 577 vga_out8(0x3d4, 0x40, par);
561 par->CR40 = vga_in8 (0x3d5, par); 578 reg->CR40 = vga_in8(0x3d5, par);
562 vga_out8 (0x3d4, 0x42, par); 579 vga_out8(0x3d4, 0x42, par);
563 par->CR42 = vga_in8 (0x3d5, par); 580 reg->CR42 = vga_in8(0x3d5, par);
564 vga_out8 (0x3d4, 0x45, par); 581 vga_out8(0x3d4, 0x45, par);
565 par->CR45 = vga_in8 (0x3d5, par); 582 reg->CR45 = vga_in8(0x3d5, par);
566 vga_out8 (0x3d4, 0x50, par); 583 vga_out8(0x3d4, 0x50, par);
567 par->CR50 = vga_in8 (0x3d5, par); 584 reg->CR50 = vga_in8(0x3d5, par);
568 vga_out8 (0x3d4, 0x51, par); 585 vga_out8(0x3d4, 0x51, par);
569 par->CR51 = vga_in8 (0x3d5, par); 586 reg->CR51 = vga_in8(0x3d5, par);
570 vga_out8 (0x3d4, 0x53, par); 587 vga_out8(0x3d4, 0x53, par);
571 par->CR53 = vga_in8 (0x3d5, par); 588 reg->CR53 = vga_in8(0x3d5, par);
572 vga_out8 (0x3d4, 0x58, par); 589 vga_out8(0x3d4, 0x58, par);
573 par->CR58 = vga_in8 (0x3d5, par); 590 reg->CR58 = vga_in8(0x3d5, par);
574 vga_out8 (0x3d4, 0x60, par); 591 vga_out8(0x3d4, 0x60, par);
575 par->CR60 = vga_in8 (0x3d5, par); 592 reg->CR60 = vga_in8(0x3d5, par);
576 vga_out8 (0x3d4, 0x66, par); 593 vga_out8(0x3d4, 0x66, par);
577 par->CR66 = vga_in8 (0x3d5, par); 594 reg->CR66 = vga_in8(0x3d5, par);
578 vga_out8 (0x3d4, 0x67, par); 595 vga_out8(0x3d4, 0x67, par);
579 par->CR67 = vga_in8 (0x3d5, par); 596 reg->CR67 = vga_in8(0x3d5, par);
580 vga_out8 (0x3d4, 0x68, par); 597 vga_out8(0x3d4, 0x68, par);
581 par->CR68 = vga_in8 (0x3d5, par); 598 reg->CR68 = vga_in8(0x3d5, par);
582 vga_out8 (0x3d4, 0x69, par); 599 vga_out8(0x3d4, 0x69, par);
583 par->CR69 = vga_in8 (0x3d5, par); 600 reg->CR69 = vga_in8(0x3d5, par);
584 vga_out8 (0x3d4, 0x6f, par); 601 vga_out8(0x3d4, 0x6f, par);
585 par->CR6F = vga_in8 (0x3d5, par); 602 reg->CR6F = vga_in8(0x3d5, par);
586 603
587 vga_out8 (0x3d4, 0x33, par); 604 vga_out8(0x3d4, 0x33, par);
588 par->CR33 = vga_in8 (0x3d5, par); 605 reg->CR33 = vga_in8(0x3d5, par);
589 vga_out8 (0x3d4, 0x86, par); 606 vga_out8(0x3d4, 0x86, par);
590 par->CR86 = vga_in8 (0x3d5, par); 607 reg->CR86 = vga_in8(0x3d5, par);
591 vga_out8 (0x3d4, 0x88, par); 608 vga_out8(0x3d4, 0x88, par);
592 par->CR88 = vga_in8 (0x3d5, par); 609 reg->CR88 = vga_in8(0x3d5, par);
593 vga_out8 (0x3d4, 0x90, par); 610 vga_out8(0x3d4, 0x90, par);
594 par->CR90 = vga_in8 (0x3d5, par); 611 reg->CR90 = vga_in8(0x3d5, par);
595 vga_out8 (0x3d4, 0x91, par); 612 vga_out8(0x3d4, 0x91, par);
596 par->CR91 = vga_in8 (0x3d5, par); 613 reg->CR91 = vga_in8(0x3d5, par);
597 vga_out8 (0x3d4, 0xb0, par); 614 vga_out8(0x3d4, 0xb0, par);
598 par->CRB0 = vga_in8 (0x3d5, par) | 0x80; 615 reg->CRB0 = vga_in8(0x3d5, par) | 0x80;
616
617 /* extended mode timing regs */
618 vga_out8(0x3d4, 0x3b, par);
619 reg->CR3B = vga_in8(0x3d5, par);
620 vga_out8(0x3d4, 0x3c, par);
621 reg->CR3C = vga_in8(0x3d5, par);
622 vga_out8(0x3d4, 0x43, par);
623 reg->CR43 = vga_in8(0x3d5, par);
624 vga_out8(0x3d4, 0x5d, par);
625 reg->CR5D = vga_in8(0x3d5, par);
626 vga_out8(0x3d4, 0x5e, par);
627 reg->CR5E = vga_in8(0x3d5, par);
628 vga_out8(0x3d4, 0x65, par);
629 reg->CR65 = vga_in8(0x3d5, par);
630
631 /* save seq extended regs for DCLK PLL programming */
632 vga_out8(0x3c4, 0x0e, par);
633 reg->SR0E = vga_in8(0x3c5, par);
634 vga_out8(0x3c4, 0x0f, par);
635 reg->SR0F = vga_in8(0x3c5, par);
636 vga_out8(0x3c4, 0x10, par);
637 reg->SR10 = vga_in8(0x3c5, par);
638 vga_out8(0x3c4, 0x11, par);
639 reg->SR11 = vga_in8(0x3c5, par);
640 vga_out8(0x3c4, 0x12, par);
641 reg->SR12 = vga_in8(0x3c5, par);
642 vga_out8(0x3c4, 0x13, par);
643 reg->SR13 = vga_in8(0x3c5, par);
644 vga_out8(0x3c4, 0x29, par);
645 reg->SR29 = vga_in8(0x3c5, par);
646
647 vga_out8(0x3c4, 0x15, par);
648 reg->SR15 = vga_in8(0x3c5, par);
649 vga_out8(0x3c4, 0x30, par);
650 reg->SR30 = vga_in8(0x3c5, par);
651 vga_out8(0x3c4, 0x18, par);
652 reg->SR18 = vga_in8(0x3c5, par);
653
654 /* Save flat panel expansion regsters. */
655 if (par->chip == S3_SAVAGE_MX) {
656 int i;
657
658 for (i = 0; i < 8; i++) {
659 vga_out8(0x3c4, 0x54+i, par);
660 reg->SR54[i] = vga_in8(0x3c5, par);
661 }
662 }
663
664 vga_out8(0x3d4, 0x66, par);
665 cr66 = vga_in8(0x3d5, par);
666 vga_out8(0x3d5, cr66 | 0x80, par);
667 vga_out8(0x3d4, 0x3a, par);
668 cr3a = vga_in8(0x3d5, par);
669 vga_out8(0x3d5, cr3a | 0x80, par);
670
671 /* now save MIU regs */
672 if (par->chip != S3_SAVAGE_MX) {
673 reg->MMPR0 = savage_in32(FIFO_CONTROL_REG, par);
674 reg->MMPR1 = savage_in32(MIU_CONTROL_REG, par);
675 reg->MMPR2 = savage_in32(STREAMS_TIMEOUT_REG, par);
676 reg->MMPR3 = savage_in32(MISC_TIMEOUT_REG, par);
677 }
678
679 vga_out8(0x3d4, 0x3a, par);
680 vga_out8(0x3d5, cr3a, par);
681 vga_out8(0x3d4, 0x66, par);
682 vga_out8(0x3d5, cr66, par);
683}
684
685static void savage_set_default_par(struct savagefb_par *par,
686 struct savage_reg *reg)
687{
688 unsigned char cr3a, cr53, cr66;
689
690 vga_out16(0x3d4, 0x4838, par);
691 vga_out16(0x3d4, 0xa039, par);
692 vga_out16(0x3c4, 0x0608, par);
693
694 vga_out8(0x3d4, 0x66, par);
695 cr66 = vga_in8(0x3d5, par);
696 vga_out8(0x3d5, cr66 | 0x80, par);
697 vga_out8(0x3d4, 0x3a, par);
698 cr3a = vga_in8(0x3d5, par);
699 vga_out8(0x3d5, cr3a | 0x80, par);
700 vga_out8(0x3d4, 0x53, par);
701 cr53 = vga_in8(0x3d5, par);
702 vga_out8(0x3d5, cr53 & 0x7f, par);
703
704 vga_out8(0x3d4, 0x66, par);
705 vga_out8(0x3d5, cr66, par);
706 vga_out8(0x3d4, 0x3a, par);
707 vga_out8(0x3d5, cr3a, par);
708
709 vga_out8(0x3d4, 0x66, par);
710 vga_out8(0x3d5, cr66, par);
711 vga_out8(0x3d4, 0x3a, par);
712 vga_out8(0x3d5, cr3a, par);
713
714 /* unlock extended seq regs */
715 vga_out8(0x3c4, 0x08, par);
716 vga_out8(0x3c5, reg->SR08, par);
717 vga_out8(0x3c5, 0x06, par);
718
719 /* now restore all the extended regs we need */
720 vga_out8(0x3d4, 0x31, par);
721 vga_out8(0x3d5, reg->CR31, par);
722 vga_out8(0x3d4, 0x32, par);
723 vga_out8(0x3d5, reg->CR32, par);
724 vga_out8(0x3d4, 0x34, par);
725 vga_out8(0x3d5, reg->CR34, par);
726 vga_out8(0x3d4, 0x36, par);
727 vga_out8(0x3d5,reg->CR36, par);
728 vga_out8(0x3d4, 0x3a, par);
729 vga_out8(0x3d5, reg->CR3A, par);
730 vga_out8(0x3d4, 0x40, par);
731 vga_out8(0x3d5, reg->CR40, par);
732 vga_out8(0x3d4, 0x42, par);
733 vga_out8(0x3d5, reg->CR42, par);
734 vga_out8(0x3d4, 0x45, par);
735 vga_out8(0x3d5, reg->CR45, par);
736 vga_out8(0x3d4, 0x50, par);
737 vga_out8(0x3d5, reg->CR50, par);
738 vga_out8(0x3d4, 0x51, par);
739 vga_out8(0x3d5, reg->CR51, par);
740 vga_out8(0x3d4, 0x53, par);
741 vga_out8(0x3d5, reg->CR53, par);
742 vga_out8(0x3d4, 0x58, par);
743 vga_out8(0x3d5, reg->CR58, par);
744 vga_out8(0x3d4, 0x60, par);
745 vga_out8(0x3d5, reg->CR60, par);
746 vga_out8(0x3d4, 0x66, par);
747 vga_out8(0x3d5, reg->CR66, par);
748 vga_out8(0x3d4, 0x67, par);
749 vga_out8(0x3d5, reg->CR67, par);
750 vga_out8(0x3d4, 0x68, par);
751 vga_out8(0x3d5, reg->CR68, par);
752 vga_out8(0x3d4, 0x69, par);
753 vga_out8(0x3d5, reg->CR69, par);
754 vga_out8(0x3d4, 0x6f, par);
755 vga_out8(0x3d5, reg->CR6F, par);
756
757 vga_out8(0x3d4, 0x33, par);
758 vga_out8(0x3d5, reg->CR33, par);
759 vga_out8(0x3d4, 0x86, par);
760 vga_out8(0x3d5, reg->CR86, par);
761 vga_out8(0x3d4, 0x88, par);
762 vga_out8(0x3d5, reg->CR88, par);
763 vga_out8(0x3d4, 0x90, par);
764 vga_out8(0x3d5, reg->CR90, par);
765 vga_out8(0x3d4, 0x91, par);
766 vga_out8(0x3d5, reg->CR91, par);
767 vga_out8(0x3d4, 0xb0, par);
768 vga_out8(0x3d5, reg->CRB0, par);
599 769
600 /* extended mode timing regs */ 770 /* extended mode timing regs */
601 vga_out8 (0x3d4, 0x3b, par); 771 vga_out8(0x3d4, 0x3b, par);
602 par->CR3B = vga_in8 (0x3d5, par); 772 vga_out8(0x3d5, reg->CR3B, par);
603 vga_out8 (0x3d4, 0x3c, par); 773 vga_out8(0x3d4, 0x3c, par);
604 par->CR3C = vga_in8 (0x3d5, par); 774 vga_out8(0x3d5, reg->CR3C, par);
605 vga_out8 (0x3d4, 0x43, par); 775 vga_out8(0x3d4, 0x43, par);
606 par->CR43 = vga_in8 (0x3d5, par); 776 vga_out8(0x3d5, reg->CR43, par);
607 vga_out8 (0x3d4, 0x5d, par); 777 vga_out8(0x3d4, 0x5d, par);
608 par->CR5D = vga_in8 (0x3d5, par); 778 vga_out8(0x3d5, reg->CR5D, par);
609 vga_out8 (0x3d4, 0x5e, par); 779 vga_out8(0x3d4, 0x5e, par);
610 par->CR5E = vga_in8 (0x3d5, par); 780 vga_out8(0x3d5, reg->CR5E, par);
611 vga_out8 (0x3d4, 0x65, par); 781 vga_out8(0x3d4, 0x65, par);
612 par->CR65 = vga_in8 (0x3d5, par); 782 vga_out8(0x3d5, reg->CR65, par);
613 783
614 /* save seq extended regs for DCLK PLL programming */ 784 /* save seq extended regs for DCLK PLL programming */
615 vga_out8 (0x3c4, 0x0e, par); 785 vga_out8(0x3c4, 0x0e, par);
616 par->SR0E = vga_in8 (0x3c5, par); 786 vga_out8(0x3c5, reg->SR0E, par);
617 vga_out8 (0x3c4, 0x0f, par); 787 vga_out8(0x3c4, 0x0f, par);
618 par->SR0F = vga_in8 (0x3c5, par); 788 vga_out8(0x3c5, reg->SR0F, par);
619 vga_out8 (0x3c4, 0x10, par); 789 vga_out8(0x3c4, 0x10, par);
620 par->SR10 = vga_in8 (0x3c5, par); 790 vga_out8(0x3c5, reg->SR10, par);
621 vga_out8 (0x3c4, 0x11, par); 791 vga_out8(0x3c4, 0x11, par);
622 par->SR11 = vga_in8 (0x3c5, par); 792 vga_out8(0x3c5, reg->SR11, par);
623 vga_out8 (0x3c4, 0x12, par); 793 vga_out8(0x3c4, 0x12, par);
624 par->SR12 = vga_in8 (0x3c5, par); 794 vga_out8(0x3c5, reg->SR12, par);
625 vga_out8 (0x3c4, 0x13, par); 795 vga_out8(0x3c4, 0x13, par);
626 par->SR13 = vga_in8 (0x3c5, par); 796 vga_out8(0x3c5, reg->SR13, par);
627 vga_out8 (0x3c4, 0x29, par); 797 vga_out8(0x3c4, 0x29, par);
628 par->SR29 = vga_in8 (0x3c5, par); 798 vga_out8(0x3c5, reg->SR29, par);
629 799
630 vga_out8 (0x3c4, 0x15, par); 800 vga_out8(0x3c4, 0x15, par);
631 par->SR15 = vga_in8 (0x3c5, par); 801 vga_out8(0x3c5, reg->SR15, par);
632 vga_out8 (0x3c4, 0x30, par); 802 vga_out8(0x3c4, 0x30, par);
633 par->SR30 = vga_in8 (0x3c5, par); 803 vga_out8(0x3c5, reg->SR30, par);
634 vga_out8 (0x3c4, 0x18, par); 804 vga_out8(0x3c4, 0x18, par);
635 par->SR18 = vga_in8 (0x3c5, par); 805 vga_out8(0x3c5, reg->SR18, par);
636 806
637 /* Save flat panel expansion regsters. */ 807 /* Save flat panel expansion regsters. */
638 if (par->chip == S3_SAVAGE_MX) { 808 if (par->chip == S3_SAVAGE_MX) {
639 int i; 809 int i;
640 810
641 for (i = 0; i < 8; i++) { 811 for (i = 0; i < 8; i++) {
642 vga_out8 (0x3c4, 0x54+i, par); 812 vga_out8(0x3c4, 0x54+i, par);
643 par->SR54[i] = vga_in8 (0x3c5, par); 813 vga_out8(0x3c5, reg->SR54[i], par);
644 } 814 }
645 } 815 }
646 816
647 vga_out8 (0x3d4, 0x66, par); 817 vga_out8(0x3d4, 0x66, par);
648 cr66 = vga_in8 (0x3d5, par); 818 cr66 = vga_in8(0x3d5, par);
649 vga_out8 (0x3d5, cr66 | 0x80, par); 819 vga_out8(0x3d5, cr66 | 0x80, par);
650 vga_out8 (0x3d4, 0x3a, par); 820 vga_out8(0x3d4, 0x3a, par);
651 cr3a = vga_in8 (0x3d5, par); 821 cr3a = vga_in8(0x3d5, par);
652 vga_out8 (0x3d5, cr3a | 0x80, par); 822 vga_out8(0x3d5, cr3a | 0x80, par);
653 823
654 /* now save MIU regs */ 824 /* now save MIU regs */
655 if (par->chip != S3_SAVAGE_MX) { 825 if (par->chip != S3_SAVAGE_MX) {
656 par->MMPR0 = savage_in32(FIFO_CONTROL_REG, par); 826 savage_out32(FIFO_CONTROL_REG, reg->MMPR0, par);
657 par->MMPR1 = savage_in32(MIU_CONTROL_REG, par); 827 savage_out32(MIU_CONTROL_REG, reg->MMPR1, par);
658 par->MMPR2 = savage_in32(STREAMS_TIMEOUT_REG, par); 828 savage_out32(STREAMS_TIMEOUT_REG, reg->MMPR2, par);
659 par->MMPR3 = savage_in32(MISC_TIMEOUT_REG, par); 829 savage_out32(MISC_TIMEOUT_REG, reg->MMPR3, par);
660 } 830 }
661 831
662 vga_out8 (0x3d4, 0x3a, par); 832 vga_out8(0x3d4, 0x3a, par);
663 vga_out8 (0x3d5, cr3a, par); 833 vga_out8(0x3d5, cr3a, par);
664 vga_out8 (0x3d4, 0x66, par); 834 vga_out8(0x3d4, 0x66, par);
665 vga_out8 (0x3d5, cr66, par); 835 vga_out8(0x3d5, cr66, par);
666} 836}
667 837
668static void savage_update_var(struct fb_var_screeninfo *var, struct fb_videomode *modedb) 838static void savage_update_var(struct fb_var_screeninfo *var, struct fb_videomode *modedb)
@@ -683,8 +853,8 @@ static void savage_update_var(struct fb_var_screeninfo *var, struct fb_videomode
683 var->vmode = modedb->vmode; 853 var->vmode = modedb->vmode;
684} 854}
685 855
686static int savagefb_check_var (struct fb_var_screeninfo *var, 856static int savagefb_check_var(struct fb_var_screeninfo *var,
687 struct fb_info *info) 857 struct fb_info *info)
688{ 858{
689 struct savagefb_par *par = info->par; 859 struct savagefb_par *par = info->par;
690 int memlen, vramlen, mode_valid = 0; 860 int memlen, vramlen, mode_valid = 0;
@@ -750,10 +920,10 @@ static int savagefb_check_var (struct fb_var_screeninfo *var,
750 if (par->SavagePanelWidth && 920 if (par->SavagePanelWidth &&
751 (var->xres > par->SavagePanelWidth || 921 (var->xres > par->SavagePanelWidth ||
752 var->yres > par->SavagePanelHeight)) { 922 var->yres > par->SavagePanelHeight)) {
753 printk (KERN_INFO "Mode (%dx%d) larger than the LCD panel " 923 printk(KERN_INFO "Mode (%dx%d) larger than the LCD panel "
754 "(%dx%d)\n", var->xres, var->yres, 924 "(%dx%d)\n", var->xres, var->yres,
755 par->SavagePanelWidth, 925 par->SavagePanelWidth,
756 par->SavagePanelHeight); 926 par->SavagePanelHeight);
757 return -1; 927 return -1;
758 } 928 }
759 929
@@ -788,8 +958,9 @@ static int savagefb_check_var (struct fb_var_screeninfo *var,
788} 958}
789 959
790 960
791static int savagefb_decode_var (struct fb_var_screeninfo *var, 961static int savagefb_decode_var(struct fb_var_screeninfo *var,
792 struct savagefb_par *par) 962 struct savagefb_par *par,
963 struct savage_reg *reg)
793{ 964{
794 struct xtimings timings; 965 struct xtimings timings;
795 int width, dclk, i, j; /*, refresh; */ 966 int width, dclk, i, j; /*, refresh; */
@@ -799,7 +970,7 @@ static int savagefb_decode_var (struct fb_var_screeninfo *var,
799 970
800 DBG("savagefb_decode_var"); 971 DBG("savagefb_decode_var");
801 972
802 memset (&timings, 0, sizeof(timings)); 973 memset(&timings, 0, sizeof(timings));
803 974
804 if (!pixclock) pixclock = 10000; /* 10ns = 100MHz */ 975 if (!pixclock) pixclock = 10000; /* 10ns = 100MHz */
805 timings.Clock = 1000000000 / pixclock; 976 timings.Clock = 1000000000 / pixclock;
@@ -831,39 +1002,39 @@ static int savagefb_decode_var (struct fb_var_screeninfo *var,
831 * This will allocate the datastructure and initialize all of the 1002 * This will allocate the datastructure and initialize all of the
832 * generic VGA registers. 1003 * generic VGA registers.
833 */ 1004 */
834 vgaHWInit (var, par, &timings); 1005 vgaHWInit(var, par, &timings, reg);
835 1006
836 /* We need to set CR67 whether or not we use the BIOS. */ 1007 /* We need to set CR67 whether or not we use the BIOS. */
837 1008
838 dclk = timings.Clock; 1009 dclk = timings.Clock;
839 par->CR67 = 0x00; 1010 reg->CR67 = 0x00;
840 1011
841 switch( var->bits_per_pixel ) { 1012 switch(var->bits_per_pixel) {
842 case 8: 1013 case 8:
843 if( (par->chip == S3_SAVAGE2000) && (dclk >= 230000) ) 1014 if ((par->chip == S3_SAVAGE2000) && (dclk >= 230000))
844 par->CR67 = 0x10; /* 8bpp, 2 pixels/clock */ 1015 reg->CR67 = 0x10; /* 8bpp, 2 pixels/clock */
845 else 1016 else
846 par->CR67 = 0x00; /* 8bpp, 1 pixel/clock */ 1017 reg->CR67 = 0x00; /* 8bpp, 1 pixel/clock */
847 break; 1018 break;
848 case 15: 1019 case 15:
849 if ( S3_SAVAGE_MOBILE_SERIES(par->chip) || 1020 if (S3_SAVAGE_MOBILE_SERIES(par->chip) ||
850 ((par->chip == S3_SAVAGE2000) && (dclk >= 230000)) ) 1021 ((par->chip == S3_SAVAGE2000) && (dclk >= 230000)))
851 par->CR67 = 0x30; /* 15bpp, 2 pixel/clock */ 1022 reg->CR67 = 0x30; /* 15bpp, 2 pixel/clock */
852 else 1023 else
853 par->CR67 = 0x20; /* 15bpp, 1 pixels/clock */ 1024 reg->CR67 = 0x20; /* 15bpp, 1 pixels/clock */
854 break; 1025 break;
855 case 16: 1026 case 16:
856 if( S3_SAVAGE_MOBILE_SERIES(par->chip) || 1027 if (S3_SAVAGE_MOBILE_SERIES(par->chip) ||
857 ((par->chip == S3_SAVAGE2000) && (dclk >= 230000)) ) 1028 ((par->chip == S3_SAVAGE2000) && (dclk >= 230000)))
858 par->CR67 = 0x50; /* 16bpp, 2 pixel/clock */ 1029 reg->CR67 = 0x50; /* 16bpp, 2 pixel/clock */
859 else 1030 else
860 par->CR67 = 0x40; /* 16bpp, 1 pixels/clock */ 1031 reg->CR67 = 0x40; /* 16bpp, 1 pixels/clock */
861 break; 1032 break;
862 case 24: 1033 case 24:
863 par->CR67 = 0x70; 1034 reg->CR67 = 0x70;
864 break; 1035 break;
865 case 32: 1036 case 32:
866 par->CR67 = 0xd0; 1037 reg->CR67 = 0xd0;
867 break; 1038 break;
868 } 1039 }
869 1040
@@ -872,61 +1043,61 @@ static int savagefb_decode_var (struct fb_var_screeninfo *var,
872 * match. Fall back to traditional register-crunching. 1043 * match. Fall back to traditional register-crunching.
873 */ 1044 */
874 1045
875 vga_out8 (0x3d4, 0x3a, par); 1046 vga_out8(0x3d4, 0x3a, par);
876 tmp = vga_in8 (0x3d5, par); 1047 tmp = vga_in8(0x3d5, par);
877 if (1 /*FIXME:psav->pci_burst*/) 1048 if (1 /*FIXME:psav->pci_burst*/)
878 par->CR3A = (tmp & 0x7f) | 0x15; 1049 reg->CR3A = (tmp & 0x7f) | 0x15;
879 else 1050 else
880 par->CR3A = tmp | 0x95; 1051 reg->CR3A = tmp | 0x95;
881 1052
882 par->CR53 = 0x00; 1053 reg->CR53 = 0x00;
883 par->CR31 = 0x8c; 1054 reg->CR31 = 0x8c;
884 par->CR66 = 0x89; 1055 reg->CR66 = 0x89;
885 1056
886 vga_out8 (0x3d4, 0x58, par); 1057 vga_out8(0x3d4, 0x58, par);
887 par->CR58 = vga_in8 (0x3d5, par) & 0x80; 1058 reg->CR58 = vga_in8(0x3d5, par) & 0x80;
888 par->CR58 |= 0x13; 1059 reg->CR58 |= 0x13;
889 1060
890 par->SR15 = 0x03 | 0x80; 1061 reg->SR15 = 0x03 | 0x80;
891 par->SR18 = 0x00; 1062 reg->SR18 = 0x00;
892 par->CR43 = par->CR45 = par->CR65 = 0x00; 1063 reg->CR43 = reg->CR45 = reg->CR65 = 0x00;
893 1064
894 vga_out8 (0x3d4, 0x40, par); 1065 vga_out8(0x3d4, 0x40, par);
895 par->CR40 = vga_in8 (0x3d5, par) & ~0x01; 1066 reg->CR40 = vga_in8(0x3d5, par) & ~0x01;
896 1067
897 par->MMPR0 = 0x010400; 1068 reg->MMPR0 = 0x010400;
898 par->MMPR1 = 0x00; 1069 reg->MMPR1 = 0x00;
899 par->MMPR2 = 0x0808; 1070 reg->MMPR2 = 0x0808;
900 par->MMPR3 = 0x08080810; 1071 reg->MMPR3 = 0x08080810;
901 1072
902 SavageCalcClock (dclk, 1, 1, 127, 0, 4, 180000, 360000, &m, &n, &r); 1073 SavageCalcClock(dclk, 1, 1, 127, 0, 4, 180000, 360000, &m, &n, &r);
903 /* m = 107; n = 4; r = 2; */ 1074 /* m = 107; n = 4; r = 2; */
904 1075
905 if (par->MCLK <= 0) { 1076 if (par->MCLK <= 0) {
906 par->SR10 = 255; 1077 reg->SR10 = 255;
907 par->SR11 = 255; 1078 reg->SR11 = 255;
908 } else { 1079 } else {
909 common_calc_clock (par->MCLK, 1, 1, 31, 0, 3, 135000, 270000, 1080 common_calc_clock(par->MCLK, 1, 1, 31, 0, 3, 135000, 270000,
910 &par->SR11, &par->SR10); 1081 &reg->SR11, &reg->SR10);
911 /* par->SR10 = 80; // MCLK == 286000 */ 1082 /* reg->SR10 = 80; // MCLK == 286000 */
912 /* par->SR11 = 125; */ 1083 /* reg->SR11 = 125; */
913 } 1084 }
914 1085
915 par->SR12 = (r << 6) | (n & 0x3f); 1086 reg->SR12 = (r << 6) | (n & 0x3f);
916 par->SR13 = m & 0xff; 1087 reg->SR13 = m & 0xff;
917 par->SR29 = (r & 4) | (m & 0x100) >> 5 | (n & 0x40) >> 2; 1088 reg->SR29 = (r & 4) | (m & 0x100) >> 5 | (n & 0x40) >> 2;
918 1089
919 if (var->bits_per_pixel < 24) 1090 if (var->bits_per_pixel < 24)
920 par->MMPR0 -= 0x8000; 1091 reg->MMPR0 -= 0x8000;
921 else 1092 else
922 par->MMPR0 -= 0x4000; 1093 reg->MMPR0 -= 0x4000;
923 1094
924 if (timings.interlaced) 1095 if (timings.interlaced)
925 par->CR42 = 0x20; 1096 reg->CR42 = 0x20;
926 else 1097 else
927 par->CR42 = 0x00; 1098 reg->CR42 = 0x00;
928 1099
929 par->CR34 = 0x10; /* display fifo */ 1100 reg->CR34 = 0x10; /* display fifo */
930 1101
931 i = ((((timings.HTotal >> 3) - 5) & 0x100) >> 8) | 1102 i = ((((timings.HTotal >> 3) - 5) & 0x100) >> 8) |
932 ((((timings.HDisplay >> 3) - 1) & 0x100) >> 7) | 1103 ((((timings.HDisplay >> 3) - 1) & 0x100) >> 7) |
@@ -938,77 +1109,77 @@ static int savagefb_decode_var (struct fb_var_screeninfo *var,
938 if ((timings.HSyncEnd >> 3) - (timings.HSyncStart >> 3) > 32) 1109 if ((timings.HSyncEnd >> 3) - (timings.HSyncStart >> 3) > 32)
939 i |= 0x20; 1110 i |= 0x20;
940 1111
941 j = (par->CRTC[0] + ((i & 0x01) << 8) + 1112 j = (reg->CRTC[0] + ((i & 0x01) << 8) +
942 par->CRTC[4] + ((i & 0x10) << 4) + 1) / 2; 1113 reg->CRTC[4] + ((i & 0x10) << 4) + 1) / 2;
943 1114
944 if (j - (par->CRTC[4] + ((i & 0x10) << 4)) < 4) { 1115 if (j - (reg->CRTC[4] + ((i & 0x10) << 4)) < 4) {
945 if (par->CRTC[4] + ((i & 0x10) << 4) + 4 <= 1116 if (reg->CRTC[4] + ((i & 0x10) << 4) + 4 <=
946 par->CRTC[0] + ((i & 0x01) << 8)) 1117 reg->CRTC[0] + ((i & 0x01) << 8))
947 j = par->CRTC[4] + ((i & 0x10) << 4) + 4; 1118 j = reg->CRTC[4] + ((i & 0x10) << 4) + 4;
948 else 1119 else
949 j = par->CRTC[0] + ((i & 0x01) << 8) + 1; 1120 j = reg->CRTC[0] + ((i & 0x01) << 8) + 1;
950 } 1121 }
951 1122
952 par->CR3B = j & 0xff; 1123 reg->CR3B = j & 0xff;
953 i |= (j & 0x100) >> 2; 1124 i |= (j & 0x100) >> 2;
954 par->CR3C = (par->CRTC[0] + ((i & 0x01) << 8)) / 2; 1125 reg->CR3C = (reg->CRTC[0] + ((i & 0x01) << 8)) / 2;
955 par->CR5D = i; 1126 reg->CR5D = i;
956 par->CR5E = (((timings.VTotal - 2) & 0x400) >> 10) | 1127 reg->CR5E = (((timings.VTotal - 2) & 0x400) >> 10) |
957 (((timings.VDisplay - 1) & 0x400) >> 9) | 1128 (((timings.VDisplay - 1) & 0x400) >> 9) |
958 (((timings.VSyncStart) & 0x400) >> 8) | 1129 (((timings.VSyncStart) & 0x400) >> 8) |
959 (((timings.VSyncStart) & 0x400) >> 6) | 0x40; 1130 (((timings.VSyncStart) & 0x400) >> 6) | 0x40;
960 width = (var->xres_virtual * ((var->bits_per_pixel+7) / 8)) >> 3; 1131 width = (var->xres_virtual * ((var->bits_per_pixel+7) / 8)) >> 3;
961 par->CR91 = par->CRTC[19] = 0xff & width; 1132 reg->CR91 = reg->CRTC[19] = 0xff & width;
962 par->CR51 = (0x300 & width) >> 4; 1133 reg->CR51 = (0x300 & width) >> 4;
963 par->CR90 = 0x80 | (width >> 8); 1134 reg->CR90 = 0x80 | (width >> 8);
964 par->MiscOutReg |= 0x0c; 1135 reg->MiscOutReg |= 0x0c;
965 1136
966 /* Set frame buffer description. */ 1137 /* Set frame buffer description. */
967 1138
968 if (var->bits_per_pixel <= 8) 1139 if (var->bits_per_pixel <= 8)
969 par->CR50 = 0; 1140 reg->CR50 = 0;
970 else if (var->bits_per_pixel <= 16) 1141 else if (var->bits_per_pixel <= 16)
971 par->CR50 = 0x10; 1142 reg->CR50 = 0x10;
972 else 1143 else
973 par->CR50 = 0x30; 1144 reg->CR50 = 0x30;
974 1145
975 if (var->xres_virtual <= 640) 1146 if (var->xres_virtual <= 640)
976 par->CR50 |= 0x40; 1147 reg->CR50 |= 0x40;
977 else if (var->xres_virtual == 800) 1148 else if (var->xres_virtual == 800)
978 par->CR50 |= 0x80; 1149 reg->CR50 |= 0x80;
979 else if (var->xres_virtual == 1024) 1150 else if (var->xres_virtual == 1024)
980 par->CR50 |= 0x00; 1151 reg->CR50 |= 0x00;
981 else if (var->xres_virtual == 1152) 1152 else if (var->xres_virtual == 1152)
982 par->CR50 |= 0x01; 1153 reg->CR50 |= 0x01;
983 else if (var->xres_virtual == 1280) 1154 else if (var->xres_virtual == 1280)
984 par->CR50 |= 0xc0; 1155 reg->CR50 |= 0xc0;
985 else if (var->xres_virtual == 1600) 1156 else if (var->xres_virtual == 1600)
986 par->CR50 |= 0x81; 1157 reg->CR50 |= 0x81;
987 else 1158 else
988 par->CR50 |= 0xc1; /* Use GBD */ 1159 reg->CR50 |= 0xc1; /* Use GBD */
989 1160
990 if( par->chip == S3_SAVAGE2000 ) 1161 if (par->chip == S3_SAVAGE2000)
991 par->CR33 = 0x08; 1162 reg->CR33 = 0x08;
992 else 1163 else
993 par->CR33 = 0x20; 1164 reg->CR33 = 0x20;
994 1165
995 par->CRTC[0x17] = 0xeb; 1166 reg->CRTC[0x17] = 0xeb;
996 1167
997 par->CR67 |= 1; 1168 reg->CR67 |= 1;
998 1169
999 vga_out8(0x3d4, 0x36, par); 1170 vga_out8(0x3d4, 0x36, par);
1000 par->CR36 = vga_in8 (0x3d5, par); 1171 reg->CR36 = vga_in8(0x3d5, par);
1001 vga_out8 (0x3d4, 0x68, par); 1172 vga_out8(0x3d4, 0x68, par);
1002 par->CR68 = vga_in8 (0x3d5, par); 1173 reg->CR68 = vga_in8(0x3d5, par);
1003 par->CR69 = 0; 1174 reg->CR69 = 0;
1004 vga_out8 (0x3d4, 0x6f, par); 1175 vga_out8(0x3d4, 0x6f, par);
1005 par->CR6F = vga_in8 (0x3d5, par); 1176 reg->CR6F = vga_in8(0x3d5, par);
1006 vga_out8 (0x3d4, 0x86, par); 1177 vga_out8(0x3d4, 0x86, par);
1007 par->CR86 = vga_in8 (0x3d5, par); 1178 reg->CR86 = vga_in8(0x3d5, par);
1008 vga_out8 (0x3d4, 0x88, par); 1179 vga_out8(0x3d4, 0x88, par);
1009 par->CR88 = vga_in8 (0x3d5, par) | 0x08; 1180 reg->CR88 = vga_in8(0x3d5, par) | 0x08;
1010 vga_out8 (0x3d4, 0xb0, par); 1181 vga_out8(0x3d4, 0xb0, par);
1011 par->CRB0 = vga_in8 (0x3d5, par) | 0x80; 1182 reg->CRB0 = vga_in8(0x3d5, par) | 0x80;
1012 1183
1013 return 0; 1184 return 0;
1014} 1185}
@@ -1037,11 +1208,11 @@ static int savagefb_setcolreg(unsigned regno,
1037 1208
1038 switch (info->var.bits_per_pixel) { 1209 switch (info->var.bits_per_pixel) {
1039 case 8: 1210 case 8:
1040 vga_out8 (0x3c8, regno, par); 1211 vga_out8(0x3c8, regno, par);
1041 1212
1042 vga_out8 (0x3c9, red >> 10, par); 1213 vga_out8(0x3c9, red >> 10, par);
1043 vga_out8 (0x3c9, green >> 10, par); 1214 vga_out8(0x3c9, green >> 10, par);
1044 vga_out8 (0x3c9, blue >> 10, par); 1215 vga_out8(0x3c9, blue >> 10, par);
1045 break; 1216 break;
1046 1217
1047 case 16: 1218 case 16:
@@ -1075,21 +1246,21 @@ static int savagefb_setcolreg(unsigned regno,
1075 return 0; 1246 return 0;
1076} 1247}
1077 1248
1078static void savagefb_set_par_int (struct savagefb_par *par) 1249static void savagefb_set_par_int(struct savagefb_par *par, struct savage_reg *reg)
1079{ 1250{
1080 unsigned char tmp, cr3a, cr66, cr67; 1251 unsigned char tmp, cr3a, cr66, cr67;
1081 1252
1082 DBG ("savagefb_set_par_int"); 1253 DBG("savagefb_set_par_int");
1083 1254
1084 par->SavageWaitIdle (par); 1255 par->SavageWaitIdle(par);
1085 1256
1086 vga_out8 (0x3c2, 0x23, par); 1257 vga_out8(0x3c2, 0x23, par);
1087 1258
1088 vga_out16 (0x3d4, 0x4838, par); 1259 vga_out16(0x3d4, 0x4838, par);
1089 vga_out16 (0x3d4, 0xa539, par); 1260 vga_out16(0x3d4, 0xa539, par);
1090 vga_out16 (0x3c4, 0x0608, par); 1261 vga_out16(0x3c4, 0x0608, par);
1091 1262
1092 vgaHWProtect (par, 1); 1263 vgaHWProtect(par, 1);
1093 1264
1094 /* 1265 /*
1095 * Some Savage/MX and /IX systems go nuts when trying to exit the 1266 * Some Savage/MX and /IX systems go nuts when trying to exit the
@@ -1099,203 +1270,202 @@ static void savagefb_set_par_int (struct savagefb_par *par)
1099 */ 1270 */
1100 1271
1101 VerticalRetraceWait(par); 1272 VerticalRetraceWait(par);
1102 vga_out8 (0x3d4, 0x67, par); 1273 vga_out8(0x3d4, 0x67, par);
1103 cr67 = vga_in8 (0x3d5, par); 1274 cr67 = vga_in8(0x3d5, par);
1104 vga_out8 (0x3d5, cr67/*par->CR67*/ & ~0x0c, par); /* no STREAMS yet */ 1275 vga_out8(0x3d5, cr67/*par->CR67*/ & ~0x0c, par); /* no STREAMS yet */
1105 1276
1106 vga_out8 (0x3d4, 0x23, par); 1277 vga_out8(0x3d4, 0x23, par);
1107 vga_out8 (0x3d5, 0x00, par); 1278 vga_out8(0x3d5, 0x00, par);
1108 vga_out8 (0x3d4, 0x26, par); 1279 vga_out8(0x3d4, 0x26, par);
1109 vga_out8 (0x3d5, 0x00, par); 1280 vga_out8(0x3d5, 0x00, par);
1110 1281
1111 /* restore extended regs */ 1282 /* restore extended regs */
1112 vga_out8 (0x3d4, 0x66, par); 1283 vga_out8(0x3d4, 0x66, par);
1113 vga_out8 (0x3d5, par->CR66, par); 1284 vga_out8(0x3d5, reg->CR66, par);
1114 vga_out8 (0x3d4, 0x3a, par); 1285 vga_out8(0x3d4, 0x3a, par);
1115 vga_out8 (0x3d5, par->CR3A, par); 1286 vga_out8(0x3d5, reg->CR3A, par);
1116 vga_out8 (0x3d4, 0x31, par); 1287 vga_out8(0x3d4, 0x31, par);
1117 vga_out8 (0x3d5, par->CR31, par); 1288 vga_out8(0x3d5, reg->CR31, par);
1118 vga_out8 (0x3d4, 0x32, par); 1289 vga_out8(0x3d4, 0x32, par);
1119 vga_out8 (0x3d5, par->CR32, par); 1290 vga_out8(0x3d5, reg->CR32, par);
1120 vga_out8 (0x3d4, 0x58, par); 1291 vga_out8(0x3d4, 0x58, par);
1121 vga_out8 (0x3d5, par->CR58, par); 1292 vga_out8(0x3d5, reg->CR58, par);
1122 vga_out8 (0x3d4, 0x53, par); 1293 vga_out8(0x3d4, 0x53, par);
1123 vga_out8 (0x3d5, par->CR53 & 0x7f, par); 1294 vga_out8(0x3d5, reg->CR53 & 0x7f, par);
1124 1295
1125 vga_out16 (0x3c4, 0x0608, par); 1296 vga_out16(0x3c4, 0x0608, par);
1126 1297
1127 /* Restore DCLK registers. */ 1298 /* Restore DCLK registers. */
1128 1299
1129 vga_out8 (0x3c4, 0x0e, par); 1300 vga_out8(0x3c4, 0x0e, par);
1130 vga_out8 (0x3c5, par->SR0E, par); 1301 vga_out8(0x3c5, reg->SR0E, par);
1131 vga_out8 (0x3c4, 0x0f, par); 1302 vga_out8(0x3c4, 0x0f, par);
1132 vga_out8 (0x3c5, par->SR0F, par); 1303 vga_out8(0x3c5, reg->SR0F, par);
1133 vga_out8 (0x3c4, 0x29, par); 1304 vga_out8(0x3c4, 0x29, par);
1134 vga_out8 (0x3c5, par->SR29, par); 1305 vga_out8(0x3c5, reg->SR29, par);
1135 vga_out8 (0x3c4, 0x15, par); 1306 vga_out8(0x3c4, 0x15, par);
1136 vga_out8 (0x3c5, par->SR15, par); 1307 vga_out8(0x3c5, reg->SR15, par);
1137 1308
1138 /* Restore flat panel expansion regsters. */ 1309 /* Restore flat panel expansion regsters. */
1139 if( par->chip == S3_SAVAGE_MX ) { 1310 if (par->chip == S3_SAVAGE_MX) {
1140 int i; 1311 int i;
1141 1312
1142 for( i = 0; i < 8; i++ ) { 1313 for (i = 0; i < 8; i++) {
1143 vga_out8 (0x3c4, 0x54+i, par); 1314 vga_out8(0x3c4, 0x54+i, par);
1144 vga_out8 (0x3c5, par->SR54[i], par); 1315 vga_out8(0x3c5, reg->SR54[i], par);
1145 } 1316 }
1146 } 1317 }
1147 1318
1148 vgaHWRestore (par); 1319 vgaHWRestore (par, reg);
1149 1320
1150 /* extended mode timing registers */ 1321 /* extended mode timing registers */
1151 vga_out8 (0x3d4, 0x53, par); 1322 vga_out8(0x3d4, 0x53, par);
1152 vga_out8 (0x3d5, par->CR53, par); 1323 vga_out8(0x3d5, reg->CR53, par);
1153 vga_out8 (0x3d4, 0x5d, par); 1324 vga_out8(0x3d4, 0x5d, par);
1154 vga_out8 (0x3d5, par->CR5D, par); 1325 vga_out8(0x3d5, reg->CR5D, par);
1155 vga_out8 (0x3d4, 0x5e, par); 1326 vga_out8(0x3d4, 0x5e, par);
1156 vga_out8 (0x3d5, par->CR5E, par); 1327 vga_out8(0x3d5, reg->CR5E, par);
1157 vga_out8 (0x3d4, 0x3b, par); 1328 vga_out8(0x3d4, 0x3b, par);
1158 vga_out8 (0x3d5, par->CR3B, par); 1329 vga_out8(0x3d5, reg->CR3B, par);
1159 vga_out8 (0x3d4, 0x3c, par); 1330 vga_out8(0x3d4, 0x3c, par);
1160 vga_out8 (0x3d5, par->CR3C, par); 1331 vga_out8(0x3d5, reg->CR3C, par);
1161 vga_out8 (0x3d4, 0x43, par); 1332 vga_out8(0x3d4, 0x43, par);
1162 vga_out8 (0x3d5, par->CR43, par); 1333 vga_out8(0x3d5, reg->CR43, par);
1163 vga_out8 (0x3d4, 0x65, par); 1334 vga_out8(0x3d4, 0x65, par);
1164 vga_out8 (0x3d5, par->CR65, par); 1335 vga_out8(0x3d5, reg->CR65, par);
1165 1336
1166 /* restore the desired video mode with cr67 */ 1337 /* restore the desired video mode with cr67 */
1167 vga_out8 (0x3d4, 0x67, par); 1338 vga_out8(0x3d4, 0x67, par);
1168 /* following part not present in X11 driver */ 1339 /* following part not present in X11 driver */
1169 cr67 = vga_in8 (0x3d5, par) & 0xf; 1340 cr67 = vga_in8(0x3d5, par) & 0xf;
1170 vga_out8 (0x3d5, 0x50 | cr67, par); 1341 vga_out8(0x3d5, 0x50 | cr67, par);
1171 udelay (10000); 1342 udelay(10000);
1172 vga_out8 (0x3d4, 0x67, par); 1343 vga_out8(0x3d4, 0x67, par);
1173 /* end of part */ 1344 /* end of part */
1174 vga_out8 (0x3d5, par->CR67 & ~0x0c, par); 1345 vga_out8(0x3d5, reg->CR67 & ~0x0c, par);
1175 1346
1176 /* other mode timing and extended regs */ 1347 /* other mode timing and extended regs */
1177 vga_out8 (0x3d4, 0x34, par); 1348 vga_out8(0x3d4, 0x34, par);
1178 vga_out8 (0x3d5, par->CR34, par); 1349 vga_out8(0x3d5, reg->CR34, par);
1179 vga_out8 (0x3d4, 0x40, par); 1350 vga_out8(0x3d4, 0x40, par);
1180 vga_out8 (0x3d5, par->CR40, par); 1351 vga_out8(0x3d5, reg->CR40, par);
1181 vga_out8 (0x3d4, 0x42, par); 1352 vga_out8(0x3d4, 0x42, par);
1182 vga_out8 (0x3d5, par->CR42, par); 1353 vga_out8(0x3d5, reg->CR42, par);
1183 vga_out8 (0x3d4, 0x45, par); 1354 vga_out8(0x3d4, 0x45, par);
1184 vga_out8 (0x3d5, par->CR45, par); 1355 vga_out8(0x3d5, reg->CR45, par);
1185 vga_out8 (0x3d4, 0x50, par); 1356 vga_out8(0x3d4, 0x50, par);
1186 vga_out8 (0x3d5, par->CR50, par); 1357 vga_out8(0x3d5, reg->CR50, par);
1187 vga_out8 (0x3d4, 0x51, par); 1358 vga_out8(0x3d4, 0x51, par);
1188 vga_out8 (0x3d5, par->CR51, par); 1359 vga_out8(0x3d5, reg->CR51, par);
1189 1360
1190 /* memory timings */ 1361 /* memory timings */
1191 vga_out8 (0x3d4, 0x36, par); 1362 vga_out8(0x3d4, 0x36, par);
1192 vga_out8 (0x3d5, par->CR36, par); 1363 vga_out8(0x3d5, reg->CR36, par);
1193 vga_out8 (0x3d4, 0x60, par); 1364 vga_out8(0x3d4, 0x60, par);
1194 vga_out8 (0x3d5, par->CR60, par); 1365 vga_out8(0x3d5, reg->CR60, par);
1195 vga_out8 (0x3d4, 0x68, par); 1366 vga_out8(0x3d4, 0x68, par);
1196 vga_out8 (0x3d5, par->CR68, par); 1367 vga_out8(0x3d5, reg->CR68, par);
1197 vga_out8 (0x3d4, 0x69, par); 1368 vga_out8(0x3d4, 0x69, par);
1198 vga_out8 (0x3d5, par->CR69, par); 1369 vga_out8(0x3d5, reg->CR69, par);
1199 vga_out8 (0x3d4, 0x6f, par); 1370 vga_out8(0x3d4, 0x6f, par);
1200 vga_out8 (0x3d5, par->CR6F, par); 1371 vga_out8(0x3d5, reg->CR6F, par);
1201 1372
1202 vga_out8 (0x3d4, 0x33, par); 1373 vga_out8(0x3d4, 0x33, par);
1203 vga_out8 (0x3d5, par->CR33, par); 1374 vga_out8(0x3d5, reg->CR33, par);
1204 vga_out8 (0x3d4, 0x86, par); 1375 vga_out8(0x3d4, 0x86, par);
1205 vga_out8 (0x3d5, par->CR86, par); 1376 vga_out8(0x3d5, reg->CR86, par);
1206 vga_out8 (0x3d4, 0x88, par); 1377 vga_out8(0x3d4, 0x88, par);
1207 vga_out8 (0x3d5, par->CR88, par); 1378 vga_out8(0x3d5, reg->CR88, par);
1208 vga_out8 (0x3d4, 0x90, par); 1379 vga_out8(0x3d4, 0x90, par);
1209 vga_out8 (0x3d5, par->CR90, par); 1380 vga_out8(0x3d5, reg->CR90, par);
1210 vga_out8 (0x3d4, 0x91, par); 1381 vga_out8(0x3d4, 0x91, par);
1211 vga_out8 (0x3d5, par->CR91, par); 1382 vga_out8(0x3d5, reg->CR91, par);
1212 1383
1213 if (par->chip == S3_SAVAGE4) { 1384 if (par->chip == S3_SAVAGE4) {
1214 vga_out8 (0x3d4, 0xb0, par); 1385 vga_out8(0x3d4, 0xb0, par);
1215 vga_out8 (0x3d5, par->CRB0, par); 1386 vga_out8(0x3d5, reg->CRB0, par);
1216 } 1387 }
1217 1388
1218 vga_out8 (0x3d4, 0x32, par); 1389 vga_out8(0x3d4, 0x32, par);
1219 vga_out8 (0x3d5, par->CR32, par); 1390 vga_out8(0x3d5, reg->CR32, par);
1220 1391
1221 /* unlock extended seq regs */ 1392 /* unlock extended seq regs */
1222 vga_out8 (0x3c4, 0x08, par); 1393 vga_out8(0x3c4, 0x08, par);
1223 vga_out8 (0x3c5, 0x06, par); 1394 vga_out8(0x3c5, 0x06, par);
1224 1395
1225 /* Restore extended sequencer regs for MCLK. SR10 == 255 indicates 1396 /* Restore extended sequencer regs for MCLK. SR10 == 255 indicates
1226 * that we should leave the default SR10 and SR11 values there. 1397 * that we should leave the default SR10 and SR11 values there.
1227 */ 1398 */
1228 if (par->SR10 != 255) { 1399 if (reg->SR10 != 255) {
1229 vga_out8 (0x3c4, 0x10, par); 1400 vga_out8(0x3c4, 0x10, par);
1230 vga_out8 (0x3c5, par->SR10, par); 1401 vga_out8(0x3c5, reg->SR10, par);
1231 vga_out8 (0x3c4, 0x11, par); 1402 vga_out8(0x3c4, 0x11, par);
1232 vga_out8 (0x3c5, par->SR11, par); 1403 vga_out8(0x3c5, reg->SR11, par);
1233 } 1404 }
1234 1405
1235 /* restore extended seq regs for dclk */ 1406 /* restore extended seq regs for dclk */
1236 vga_out8 (0x3c4, 0x0e, par); 1407 vga_out8(0x3c4, 0x0e, par);
1237 vga_out8 (0x3c5, par->SR0E, par); 1408 vga_out8(0x3c5, reg->SR0E, par);
1238 vga_out8 (0x3c4, 0x0f, par); 1409 vga_out8(0x3c4, 0x0f, par);
1239 vga_out8 (0x3c5, par->SR0F, par); 1410 vga_out8(0x3c5, reg->SR0F, par);
1240 vga_out8 (0x3c4, 0x12, par); 1411 vga_out8(0x3c4, 0x12, par);
1241 vga_out8 (0x3c5, par->SR12, par); 1412 vga_out8(0x3c5, reg->SR12, par);
1242 vga_out8 (0x3c4, 0x13, par); 1413 vga_out8(0x3c4, 0x13, par);
1243 vga_out8 (0x3c5, par->SR13, par); 1414 vga_out8(0x3c5, reg->SR13, par);
1244 vga_out8 (0x3c4, 0x29, par); 1415 vga_out8(0x3c4, 0x29, par);
1245 vga_out8 (0x3c5, par->SR29, par); 1416 vga_out8(0x3c5, reg->SR29, par);
1246 1417 vga_out8(0x3c4, 0x18, par);
1247 vga_out8 (0x3c4, 0x18, par); 1418 vga_out8(0x3c5, reg->SR18, par);
1248 vga_out8 (0x3c5, par->SR18, par);
1249 1419
1250 /* load new m, n pll values for dclk & mclk */ 1420 /* load new m, n pll values for dclk & mclk */
1251 vga_out8 (0x3c4, 0x15, par); 1421 vga_out8(0x3c4, 0x15, par);
1252 tmp = vga_in8 (0x3c5, par) & ~0x21; 1422 tmp = vga_in8(0x3c5, par) & ~0x21;
1253 1423
1254 vga_out8 (0x3c5, tmp | 0x03, par); 1424 vga_out8(0x3c5, tmp | 0x03, par);
1255 vga_out8 (0x3c5, tmp | 0x23, par); 1425 vga_out8(0x3c5, tmp | 0x23, par);
1256 vga_out8 (0x3c5, tmp | 0x03, par); 1426 vga_out8(0x3c5, tmp | 0x03, par);
1257 vga_out8 (0x3c5, par->SR15, par); 1427 vga_out8(0x3c5, reg->SR15, par);
1258 udelay (100); 1428 udelay(100);
1259 1429
1260 vga_out8 (0x3c4, 0x30, par); 1430 vga_out8(0x3c4, 0x30, par);
1261 vga_out8 (0x3c5, par->SR30, par); 1431 vga_out8(0x3c5, reg->SR30, par);
1262 vga_out8 (0x3c4, 0x08, par); 1432 vga_out8(0x3c4, 0x08, par);
1263 vga_out8 (0x3c5, par->SR08, par); 1433 vga_out8(0x3c5, reg->SR08, par);
1264 1434
1265 /* now write out cr67 in full, possibly starting STREAMS */ 1435 /* now write out cr67 in full, possibly starting STREAMS */
1266 VerticalRetraceWait(par); 1436 VerticalRetraceWait(par);
1267 vga_out8 (0x3d4, 0x67, par); 1437 vga_out8(0x3d4, 0x67, par);
1268 vga_out8 (0x3d5, par->CR67, par); 1438 vga_out8(0x3d5, reg->CR67, par);
1269 1439
1270 vga_out8 (0x3d4, 0x66, par); 1440 vga_out8(0x3d4, 0x66, par);
1271 cr66 = vga_in8 (0x3d5, par); 1441 cr66 = vga_in8(0x3d5, par);
1272 vga_out8 (0x3d5, cr66 | 0x80, par); 1442 vga_out8(0x3d5, cr66 | 0x80, par);
1273 vga_out8 (0x3d4, 0x3a, par); 1443 vga_out8(0x3d4, 0x3a, par);
1274 cr3a = vga_in8 (0x3d5, par); 1444 cr3a = vga_in8(0x3d5, par);
1275 vga_out8 (0x3d5, cr3a | 0x80, par); 1445 vga_out8(0x3d5, cr3a | 0x80, par);
1276 1446
1277 if (par->chip != S3_SAVAGE_MX) { 1447 if (par->chip != S3_SAVAGE_MX) {
1278 VerticalRetraceWait(par); 1448 VerticalRetraceWait(par);
1279 savage_out32 (FIFO_CONTROL_REG, par->MMPR0, par); 1449 savage_out32(FIFO_CONTROL_REG, reg->MMPR0, par);
1280 par->SavageWaitIdle (par); 1450 par->SavageWaitIdle(par);
1281 savage_out32 (MIU_CONTROL_REG, par->MMPR1, par); 1451 savage_out32(MIU_CONTROL_REG, reg->MMPR1, par);
1282 par->SavageWaitIdle (par); 1452 par->SavageWaitIdle(par);
1283 savage_out32 (STREAMS_TIMEOUT_REG, par->MMPR2, par); 1453 savage_out32(STREAMS_TIMEOUT_REG, reg->MMPR2, par);
1284 par->SavageWaitIdle (par); 1454 par->SavageWaitIdle(par);
1285 savage_out32 (MISC_TIMEOUT_REG, par->MMPR3, par); 1455 savage_out32(MISC_TIMEOUT_REG, reg->MMPR3, par);
1286 } 1456 }
1287 1457
1288 vga_out8 (0x3d4, 0x66, par); 1458 vga_out8(0x3d4, 0x66, par);
1289 vga_out8 (0x3d5, cr66, par); 1459 vga_out8(0x3d5, cr66, par);
1290 vga_out8 (0x3d4, 0x3a, par); 1460 vga_out8(0x3d4, 0x3a, par);
1291 vga_out8 (0x3d5, cr3a, par); 1461 vga_out8(0x3d5, cr3a, par);
1292 1462
1293 SavageSetup2DEngine (par); 1463 SavageSetup2DEngine(par);
1294 vgaHWProtect (par, 0); 1464 vgaHWProtect(par, 0);
1295} 1465}
1296 1466
1297static void savagefb_update_start (struct savagefb_par *par, 1467static void savagefb_update_start(struct savagefb_par *par,
1298 struct fb_var_screeninfo *var) 1468 struct fb_var_screeninfo *var)
1299{ 1469{
1300 int base; 1470 int base;
1301 1471
@@ -1305,8 +1475,8 @@ static void savagefb_update_start (struct savagefb_par *par,
1305 /* now program the start address registers */ 1475 /* now program the start address registers */
1306 vga_out16(0x3d4, (base & 0x00ff00) | 0x0c, par); 1476 vga_out16(0x3d4, (base & 0x00ff00) | 0x0c, par);
1307 vga_out16(0x3d4, ((base & 0x00ff) << 8) | 0x0d, par); 1477 vga_out16(0x3d4, ((base & 0x00ff) << 8) | 0x0d, par);
1308 vga_out8 (0x3d4, 0x69, par); 1478 vga_out8(0x3d4, 0x69, par);
1309 vga_out8 (0x3d5, (base & 0x7f0000) >> 16, par); 1479 vga_out8(0x3d5, (base & 0x7f0000) >> 16, par);
1310} 1480}
1311 1481
1312 1482
@@ -1325,29 +1495,14 @@ static void savagefb_set_fix(struct fb_info *info)
1325 1495
1326} 1496}
1327 1497
1328#if defined(CONFIG_FB_SAVAGE_ACCEL) 1498static int savagefb_set_par(struct fb_info *info)
1329static void savagefb_set_clip(struct fb_info *info)
1330{
1331 struct savagefb_par *par = info->par;
1332 int cmd;
1333
1334 cmd = BCI_CMD_NOP | BCI_CMD_CLIP_NEW;
1335 par->bci_ptr = 0;
1336 par->SavageWaitFifo(par,3);
1337 BCI_SEND(cmd);
1338 BCI_SEND(BCI_CLIP_TL(0, 0));
1339 BCI_SEND(BCI_CLIP_BR(0xfff, 0xfff));
1340}
1341#endif
1342
1343static int savagefb_set_par (struct fb_info *info)
1344{ 1499{
1345 struct savagefb_par *par = info->par; 1500 struct savagefb_par *par = info->par;
1346 struct fb_var_screeninfo *var = &info->var; 1501 struct fb_var_screeninfo *var = &info->var;
1347 int err; 1502 int err;
1348 1503
1349 DBG("savagefb_set_par"); 1504 DBG("savagefb_set_par");
1350 err = savagefb_decode_var (var, par); 1505 err = savagefb_decode_var(var, par, &par->state);
1351 if (err) 1506 if (err)
1352 return err; 1507 return err;
1353 1508
@@ -1366,8 +1521,8 @@ static int savagefb_set_par (struct fb_info *info)
1366 par->maxClock = par->dacSpeedBpp; 1521 par->maxClock = par->dacSpeedBpp;
1367 par->minClock = 10000; 1522 par->minClock = 10000;
1368 1523
1369 savagefb_set_par_int (par); 1524 savagefb_set_par_int(par, &par->state);
1370 fb_set_cmap (&info->cmap, info); 1525 fb_set_cmap(&info->cmap, info);
1371 savagefb_set_fix(info); 1526 savagefb_set_fix(info);
1372 savagefb_set_clip(info); 1527 savagefb_set_clip(info);
1373 1528
@@ -1378,12 +1533,12 @@ static int savagefb_set_par (struct fb_info *info)
1378/* 1533/*
1379 * Pan or Wrap the Display 1534 * Pan or Wrap the Display
1380 */ 1535 */
1381static int savagefb_pan_display (struct fb_var_screeninfo *var, 1536static int savagefb_pan_display(struct fb_var_screeninfo *var,
1382 struct fb_info *info) 1537 struct fb_info *info)
1383{ 1538{
1384 struct savagefb_par *par = info->par; 1539 struct savagefb_par *par = info->par;
1385 1540
1386 savagefb_update_start (par, var); 1541 savagefb_update_start(par, var);
1387 return 0; 1542 return 0;
1388} 1543}
1389 1544
@@ -1440,6 +1595,22 @@ static int savagefb_blank(int blank, struct fb_info *info)
1440 return (blank == FB_BLANK_NORMAL) ? 1 : 0; 1595 return (blank == FB_BLANK_NORMAL) ? 1 : 0;
1441} 1596}
1442 1597
1598static void savagefb_save_state(struct fb_info *info)
1599{
1600 struct savagefb_par *par = info->par;
1601
1602 savage_get_default_par(par, &par->save);
1603}
1604
1605static void savagefb_restore_state(struct fb_info *info)
1606{
1607 struct savagefb_par *par = info->par;
1608
1609 savagefb_blank(FB_BLANK_POWERDOWN, info);
1610 savage_set_default_par(par, &par->save);
1611 savagefb_blank(FB_BLANK_UNBLANK, info);
1612}
1613
1443static struct fb_ops savagefb_ops = { 1614static struct fb_ops savagefb_ops = {
1444 .owner = THIS_MODULE, 1615 .owner = THIS_MODULE,
1445 .fb_check_var = savagefb_check_var, 1616 .fb_check_var = savagefb_check_var,
@@ -1447,6 +1618,8 @@ static struct fb_ops savagefb_ops = {
1447 .fb_setcolreg = savagefb_setcolreg, 1618 .fb_setcolreg = savagefb_setcolreg,
1448 .fb_pan_display = savagefb_pan_display, 1619 .fb_pan_display = savagefb_pan_display,
1449 .fb_blank = savagefb_blank, 1620 .fb_blank = savagefb_blank,
1621 .fb_save_state = savagefb_save_state,
1622 .fb_restore_state = savagefb_restore_state,
1450#if defined(CONFIG_FB_SAVAGE_ACCEL) 1623#if defined(CONFIG_FB_SAVAGE_ACCEL)
1451 .fb_fillrect = savagefb_fillrect, 1624 .fb_fillrect = savagefb_fillrect,
1452 .fb_copyarea = savagefb_copyarea, 1625 .fb_copyarea = savagefb_copyarea,
@@ -1479,59 +1652,59 @@ static struct fb_var_screeninfo __devinitdata savagefb_var800x600x8 = {
1479 .vmode = FB_VMODE_NONINTERLACED 1652 .vmode = FB_VMODE_NONINTERLACED
1480}; 1653};
1481 1654
1482static void savage_enable_mmio (struct savagefb_par *par) 1655static void savage_enable_mmio(struct savagefb_par *par)
1483{ 1656{
1484 unsigned char val; 1657 unsigned char val;
1485 1658
1486 DBG ("savage_enable_mmio\n"); 1659 DBG("savage_enable_mmio\n");
1487 1660
1488 val = vga_in8 (0x3c3, par); 1661 val = vga_in8(0x3c3, par);
1489 vga_out8 (0x3c3, val | 0x01, par); 1662 vga_out8(0x3c3, val | 0x01, par);
1490 val = vga_in8 (0x3cc, par); 1663 val = vga_in8(0x3cc, par);
1491 vga_out8 (0x3c2, val | 0x01, par); 1664 vga_out8(0x3c2, val | 0x01, par);
1492 1665
1493 if (par->chip >= S3_SAVAGE4) { 1666 if (par->chip >= S3_SAVAGE4) {
1494 vga_out8 (0x3d4, 0x40, par); 1667 vga_out8(0x3d4, 0x40, par);
1495 val = vga_in8 (0x3d5, par); 1668 val = vga_in8(0x3d5, par);
1496 vga_out8 (0x3d5, val | 1, par); 1669 vga_out8(0x3d5, val | 1, par);
1497 } 1670 }
1498} 1671}
1499 1672
1500 1673
1501static void savage_disable_mmio (struct savagefb_par *par) 1674static void savage_disable_mmio(struct savagefb_par *par)
1502{ 1675{
1503 unsigned char val; 1676 unsigned char val;
1504 1677
1505 DBG ("savage_disable_mmio\n"); 1678 DBG("savage_disable_mmio\n");
1506 1679
1507 if(par->chip >= S3_SAVAGE4 ) { 1680 if (par->chip >= S3_SAVAGE4) {
1508 vga_out8 (0x3d4, 0x40, par); 1681 vga_out8(0x3d4, 0x40, par);
1509 val = vga_in8 (0x3d5, par); 1682 val = vga_in8(0x3d5, par);
1510 vga_out8 (0x3d5, val | 1, par); 1683 vga_out8(0x3d5, val | 1, par);
1511 } 1684 }
1512} 1685}
1513 1686
1514 1687
1515static int __devinit savage_map_mmio (struct fb_info *info) 1688static int __devinit savage_map_mmio(struct fb_info *info)
1516{ 1689{
1517 struct savagefb_par *par = info->par; 1690 struct savagefb_par *par = info->par;
1518 DBG ("savage_map_mmio"); 1691 DBG("savage_map_mmio");
1519 1692
1520 if (S3_SAVAGE3D_SERIES (par->chip)) 1693 if (S3_SAVAGE3D_SERIES(par->chip))
1521 par->mmio.pbase = pci_resource_start (par->pcidev, 0) + 1694 par->mmio.pbase = pci_resource_start(par->pcidev, 0) +
1522 SAVAGE_NEWMMIO_REGBASE_S3; 1695 SAVAGE_NEWMMIO_REGBASE_S3;
1523 else 1696 else
1524 par->mmio.pbase = pci_resource_start (par->pcidev, 0) + 1697 par->mmio.pbase = pci_resource_start(par->pcidev, 0) +
1525 SAVAGE_NEWMMIO_REGBASE_S4; 1698 SAVAGE_NEWMMIO_REGBASE_S4;
1526 1699
1527 par->mmio.len = SAVAGE_NEWMMIO_REGSIZE; 1700 par->mmio.len = SAVAGE_NEWMMIO_REGSIZE;
1528 1701
1529 par->mmio.vbase = ioremap (par->mmio.pbase, par->mmio.len); 1702 par->mmio.vbase = ioremap(par->mmio.pbase, par->mmio.len);
1530 if (!par->mmio.vbase) { 1703 if (!par->mmio.vbase) {
1531 printk ("savagefb: unable to map memory mapped IO\n"); 1704 printk("savagefb: unable to map memory mapped IO\n");
1532 return -ENOMEM; 1705 return -ENOMEM;
1533 } else 1706 } else
1534 printk (KERN_INFO "savagefb: mapped io at %p\n", 1707 printk(KERN_INFO "savagefb: mapped io at %p\n",
1535 par->mmio.vbase); 1708 par->mmio.vbase);
1536 1709
1537 info->fix.mmio_start = par->mmio.pbase; 1710 info->fix.mmio_start = par->mmio.pbase;
@@ -1540,15 +1713,15 @@ static int __devinit savage_map_mmio (struct fb_info *info)
1540 par->bci_base = (u32 __iomem *)(par->mmio.vbase + BCI_BUFFER_OFFSET); 1713 par->bci_base = (u32 __iomem *)(par->mmio.vbase + BCI_BUFFER_OFFSET);
1541 par->bci_ptr = 0; 1714 par->bci_ptr = 0;
1542 1715
1543 savage_enable_mmio (par); 1716 savage_enable_mmio(par);
1544 1717
1545 return 0; 1718 return 0;
1546} 1719}
1547 1720
1548static void savage_unmap_mmio (struct fb_info *info) 1721static void savage_unmap_mmio(struct fb_info *info)
1549{ 1722{
1550 struct savagefb_par *par = info->par; 1723 struct savagefb_par *par = info->par;
1551 DBG ("savage_unmap_mmio"); 1724 DBG("savage_unmap_mmio");
1552 1725
1553 savage_disable_mmio(par); 1726 savage_disable_mmio(par);
1554 1727
@@ -1558,46 +1731,46 @@ static void savage_unmap_mmio (struct fb_info *info)
1558 } 1731 }
1559} 1732}
1560 1733
1561static int __devinit savage_map_video (struct fb_info *info, 1734static int __devinit savage_map_video(struct fb_info *info,
1562 int video_len) 1735 int video_len)
1563{ 1736{
1564 struct savagefb_par *par = info->par; 1737 struct savagefb_par *par = info->par;
1565 int resource; 1738 int resource;
1566 1739
1567 DBG("savage_map_video"); 1740 DBG("savage_map_video");
1568 1741
1569 if (S3_SAVAGE3D_SERIES (par->chip)) 1742 if (S3_SAVAGE3D_SERIES(par->chip))
1570 resource = 0; 1743 resource = 0;
1571 else 1744 else
1572 resource = 1; 1745 resource = 1;
1573 1746
1574 par->video.pbase = pci_resource_start (par->pcidev, resource); 1747 par->video.pbase = pci_resource_start(par->pcidev, resource);
1575 par->video.len = video_len; 1748 par->video.len = video_len;
1576 par->video.vbase = ioremap (par->video.pbase, par->video.len); 1749 par->video.vbase = ioremap(par->video.pbase, par->video.len);
1577 1750
1578 if (!par->video.vbase) { 1751 if (!par->video.vbase) {
1579 printk ("savagefb: unable to map screen memory\n"); 1752 printk("savagefb: unable to map screen memory\n");
1580 return -ENOMEM; 1753 return -ENOMEM;
1581 } else 1754 } else
1582 printk (KERN_INFO "savagefb: mapped framebuffer at %p, " 1755 printk(KERN_INFO "savagefb: mapped framebuffer at %p, "
1583 "pbase == %x\n", par->video.vbase, par->video.pbase); 1756 "pbase == %x\n", par->video.vbase, par->video.pbase);
1584 1757
1585 info->fix.smem_start = par->video.pbase; 1758 info->fix.smem_start = par->video.pbase;
1586 info->fix.smem_len = par->video.len - par->cob_size; 1759 info->fix.smem_len = par->video.len - par->cob_size;
1587 info->screen_base = par->video.vbase; 1760 info->screen_base = par->video.vbase;
1588 1761
1589#ifdef CONFIG_MTRR 1762#ifdef CONFIG_MTRR
1590 par->video.mtrr = mtrr_add (par->video.pbase, video_len, 1763 par->video.mtrr = mtrr_add(par->video.pbase, video_len,
1591 MTRR_TYPE_WRCOMB, 1); 1764 MTRR_TYPE_WRCOMB, 1);
1592#endif 1765#endif
1593 1766
1594 /* Clear framebuffer, it's all white in memory after boot */ 1767 /* Clear framebuffer, it's all white in memory after boot */
1595 memset_io (par->video.vbase, 0, par->video.len); 1768 memset_io(par->video.vbase, 0, par->video.len);
1596 1769
1597 return 0; 1770 return 0;
1598} 1771}
1599 1772
1600static void savage_unmap_video (struct fb_info *info) 1773static void savage_unmap_video(struct fb_info *info)
1601{ 1774{
1602 struct savagefb_par *par = info->par; 1775 struct savagefb_par *par = info->par;
1603 1776
@@ -1605,16 +1778,16 @@ static void savage_unmap_video (struct fb_info *info)
1605 1778
1606 if (par->video.vbase) { 1779 if (par->video.vbase) {
1607#ifdef CONFIG_MTRR 1780#ifdef CONFIG_MTRR
1608 mtrr_del (par->video.mtrr, par->video.pbase, par->video.len); 1781 mtrr_del(par->video.mtrr, par->video.pbase, par->video.len);
1609#endif 1782#endif
1610 1783
1611 iounmap (par->video.vbase); 1784 iounmap(par->video.vbase);
1612 par->video.vbase = NULL; 1785 par->video.vbase = NULL;
1613 info->screen_base = NULL; 1786 info->screen_base = NULL;
1614 } 1787 }
1615} 1788}
1616 1789
1617static int savage_init_hw (struct savagefb_par *par) 1790static int savage_init_hw(struct savagefb_par *par)
1618{ 1791{
1619 unsigned char config1, m, n, n1, n2, sr8, cr3f, cr66 = 0, tmp; 1792 unsigned char config1, m, n, n1, n2, sr8, cr3f, cr66 = 0, tmp;
1620 1793
@@ -1656,7 +1829,7 @@ static int savage_init_hw (struct savagefb_par *par)
1656 1829
1657 switch (par->chip) { 1830 switch (par->chip) {
1658 case S3_SAVAGE3D: 1831 case S3_SAVAGE3D:
1659 videoRam = RamSavage3D[ (config1 & 0xC0) >> 6 ] * 1024; 1832 videoRam = RamSavage3D[(config1 & 0xC0) >> 6 ] * 1024;
1660 break; 1833 break;
1661 1834
1662 case S3_SAVAGE4: 1835 case S3_SAVAGE4:
@@ -1667,22 +1840,22 @@ static int savage_init_hw (struct savagefb_par *par)
1667 * can do it different... 1840 * can do it different...
1668 */ 1841 */
1669 vga_out8(0x3d4, 0x68, par); /* memory control 1 */ 1842 vga_out8(0x3d4, 0x68, par); /* memory control 1 */
1670 if( (vga_in8(0x3d5, par) & 0xC0) == (0x01 << 6) ) 1843 if ((vga_in8(0x3d5, par) & 0xC0) == (0x01 << 6))
1671 RamSavage4[1] = 8; 1844 RamSavage4[1] = 8;
1672 1845
1673 /*FALLTHROUGH*/ 1846 /*FALLTHROUGH*/
1674 1847
1675 case S3_SAVAGE2000: 1848 case S3_SAVAGE2000:
1676 videoRam = RamSavage4[ (config1 & 0xE0) >> 5 ] * 1024; 1849 videoRam = RamSavage4[(config1 & 0xE0) >> 5] * 1024;
1677 break; 1850 break;
1678 1851
1679 case S3_SAVAGE_MX: 1852 case S3_SAVAGE_MX:
1680 case S3_SUPERSAVAGE: 1853 case S3_SUPERSAVAGE:
1681 videoRam = RamSavageMX[ (config1 & 0x0E) >> 1 ] * 1024; 1854 videoRam = RamSavageMX[(config1 & 0x0E) >> 1] * 1024;
1682 break; 1855 break;
1683 1856
1684 case S3_PROSAVAGE: 1857 case S3_PROSAVAGE:
1685 videoRam = RamSavageNB[ (config1 & 0xE0) >> 5 ] * 1024; 1858 videoRam = RamSavageNB[(config1 & 0xE0) >> 5] * 1024;
1686 break; 1859 break;
1687 1860
1688 default: 1861 default:
@@ -1693,31 +1866,31 @@ static int savage_init_hw (struct savagefb_par *par)
1693 1866
1694 videoRambytes = videoRam * 1024; 1867 videoRambytes = videoRam * 1024;
1695 1868
1696 printk (KERN_INFO "savagefb: probed videoram: %dk\n", videoRam); 1869 printk(KERN_INFO "savagefb: probed videoram: %dk\n", videoRam);
1697 1870
1698 /* reset graphics engine to avoid memory corruption */ 1871 /* reset graphics engine to avoid memory corruption */
1699 vga_out8 (0x3d4, 0x66, par); 1872 vga_out8(0x3d4, 0x66, par);
1700 cr66 = vga_in8 (0x3d5, par); 1873 cr66 = vga_in8(0x3d5, par);
1701 vga_out8 (0x3d5, cr66 | 0x02, par); 1874 vga_out8(0x3d5, cr66 | 0x02, par);
1702 udelay (10000); 1875 udelay(10000);
1703 1876
1704 vga_out8 (0x3d4, 0x66, par); 1877 vga_out8(0x3d4, 0x66, par);
1705 vga_out8 (0x3d5, cr66 & ~0x02, par); /* clear reset flag */ 1878 vga_out8(0x3d5, cr66 & ~0x02, par); /* clear reset flag */
1706 udelay (10000); 1879 udelay(10000);
1707 1880
1708 1881
1709 /* 1882 /*
1710 * reset memory interface, 3D engine, AGP master, PCI master, 1883 * reset memory interface, 3D engine, AGP master, PCI master,
1711 * master engine unit, motion compensation/LPB 1884 * master engine unit, motion compensation/LPB
1712 */ 1885 */
1713 vga_out8 (0x3d4, 0x3f, par); 1886 vga_out8(0x3d4, 0x3f, par);
1714 cr3f = vga_in8 (0x3d5, par); 1887 cr3f = vga_in8(0x3d5, par);
1715 vga_out8 (0x3d5, cr3f | 0x08, par); 1888 vga_out8(0x3d5, cr3f | 0x08, par);
1716 udelay (10000); 1889 udelay(10000);
1717 1890
1718 vga_out8 (0x3d4, 0x3f, par); 1891 vga_out8(0x3d4, 0x3f, par);
1719 vga_out8 (0x3d5, cr3f & ~0x08, par); /* clear reset flags */ 1892 vga_out8(0x3d5, cr3f & ~0x08, par); /* clear reset flags */
1720 udelay (10000); 1893 udelay(10000);
1721 1894
1722 /* Savage ramdac speeds */ 1895 /* Savage ramdac speeds */
1723 par->numClocks = 4; 1896 par->numClocks = 4;
@@ -1740,7 +1913,7 @@ static int savage_init_hw (struct savagefb_par *par)
1740 n1 = n & 0x1f; 1913 n1 = n & 0x1f;
1741 n2 = (n >> 5) & 0x03; 1914 n2 = (n >> 5) & 0x03;
1742 par->MCLK = ((1431818 * (m+2)) / (n1+2) / (1 << n2) + 50) / 100; 1915 par->MCLK = ((1431818 * (m+2)) / (n1+2) / (1 << n2) + 50) / 100;
1743 printk (KERN_INFO "savagefb: Detected current MCLK value of %d kHz\n", 1916 printk(KERN_INFO "savagefb: Detected current MCLK value of %d kHz\n",
1744 par->MCLK); 1917 par->MCLK);
1745 1918
1746 /* check for DVI/flat panel */ 1919 /* check for DVI/flat panel */
@@ -1769,12 +1942,12 @@ static int savage_init_hw (struct savagefb_par *par)
1769 /* Check LCD panel parrmation */ 1942 /* Check LCD panel parrmation */
1770 1943
1771 if (par->display_type == DISP_LCD) { 1944 if (par->display_type == DISP_LCD) {
1772 unsigned char cr6b = VGArCR( 0x6b, par); 1945 unsigned char cr6b = VGArCR(0x6b, par);
1773 1946
1774 int panelX = (VGArSEQ (0x61, par) + 1947 int panelX = (VGArSEQ(0x61, par) +
1775 ((VGArSEQ (0x66, par) & 0x02) << 7) + 1) * 8; 1948 ((VGArSEQ(0x66, par) & 0x02) << 7) + 1) * 8;
1776 int panelY = (VGArSEQ (0x69, par) + 1949 int panelY = (VGArSEQ(0x69, par) +
1777 ((VGArSEQ (0x6e, par) & 0x70) << 4) + 1); 1950 ((VGArSEQ(0x6e, par) & 0x70) << 4) + 1);
1778 1951
1779 char * sTechnology = "Unknown"; 1952 char * sTechnology = "Unknown";
1780 1953
@@ -1796,26 +1969,26 @@ static int savage_init_hw (struct savagefb_par *par)
1796 ActiveDUO = 0x80 1969 ActiveDUO = 0x80
1797 }; 1970 };
1798 1971
1799 if ((VGArSEQ (0x39, par) & 0x03) == 0) { 1972 if ((VGArSEQ(0x39, par) & 0x03) == 0) {
1800 sTechnology = "TFT"; 1973 sTechnology = "TFT";
1801 } else if ((VGArSEQ (0x30, par) & 0x01) == 0) { 1974 } else if ((VGArSEQ(0x30, par) & 0x01) == 0) {
1802 sTechnology = "DSTN"; 1975 sTechnology = "DSTN";
1803 } else { 1976 } else {
1804 sTechnology = "STN"; 1977 sTechnology = "STN";
1805 } 1978 }
1806 1979
1807 printk (KERN_INFO "savagefb: %dx%d %s LCD panel detected %s\n", 1980 printk(KERN_INFO "savagefb: %dx%d %s LCD panel detected %s\n",
1808 panelX, panelY, sTechnology, 1981 panelX, panelY, sTechnology,
1809 cr6b & ActiveLCD ? "and active" : "but not active"); 1982 cr6b & ActiveLCD ? "and active" : "but not active");
1810 1983
1811 if( cr6b & ActiveLCD ) { 1984 if (cr6b & ActiveLCD) {
1812 /* 1985 /*
1813 * If the LCD is active and panel expansion is enabled, 1986 * If the LCD is active and panel expansion is enabled,
1814 * we probably want to kill the HW cursor. 1987 * we probably want to kill the HW cursor.
1815 */ 1988 */
1816 1989
1817 printk (KERN_INFO "savagefb: Limiting video mode to " 1990 printk(KERN_INFO "savagefb: Limiting video mode to "
1818 "%dx%d\n", panelX, panelY ); 1991 "%dx%d\n", panelX, panelY);
1819 1992
1820 par->SavagePanelWidth = panelX; 1993 par->SavagePanelWidth = panelX;
1821 par->SavagePanelHeight = panelY; 1994 par->SavagePanelHeight = panelY;
@@ -1824,9 +1997,10 @@ static int savage_init_hw (struct savagefb_par *par)
1824 par->display_type = DISP_CRT; 1997 par->display_type = DISP_CRT;
1825 } 1998 }
1826 1999
1827 savage_get_default_par (par); 2000 savage_get_default_par(par, &par->state);
2001 par->save = par->state;
1828 2002
1829 if( S3_SAVAGE4_SERIES(par->chip) ) { 2003 if (S3_SAVAGE4_SERIES(par->chip)) {
1830 /* 2004 /*
1831 * The Savage4 and ProSavage have COB coherency bugs which 2005 * The Savage4 and ProSavage have COB coherency bugs which
1832 * render the buffer useless. We disable it. 2006 * render the buffer useless. We disable it.
@@ -1845,9 +2019,9 @@ static int savage_init_hw (struct savagefb_par *par)
1845 return videoRambytes; 2019 return videoRambytes;
1846} 2020}
1847 2021
1848static int __devinit savage_init_fb_info (struct fb_info *info, 2022static int __devinit savage_init_fb_info(struct fb_info *info,
1849 struct pci_dev *dev, 2023 struct pci_dev *dev,
1850 const struct pci_device_id *id) 2024 const struct pci_device_id *id)
1851{ 2025{
1852 struct savagefb_par *par = info->par; 2026 struct savagefb_par *par = info->par;
1853 int err = 0; 2027 int err = 0;
@@ -1863,63 +2037,63 @@ static int __devinit savage_init_fb_info (struct fb_info *info,
1863 switch (info->fix.accel) { 2037 switch (info->fix.accel) {
1864 case FB_ACCEL_SUPERSAVAGE: 2038 case FB_ACCEL_SUPERSAVAGE:
1865 par->chip = S3_SUPERSAVAGE; 2039 par->chip = S3_SUPERSAVAGE;
1866 snprintf (info->fix.id, 16, "SuperSavage"); 2040 snprintf(info->fix.id, 16, "SuperSavage");
1867 break; 2041 break;
1868 case FB_ACCEL_SAVAGE4: 2042 case FB_ACCEL_SAVAGE4:
1869 par->chip = S3_SAVAGE4; 2043 par->chip = S3_SAVAGE4;
1870 snprintf (info->fix.id, 16, "Savage4"); 2044 snprintf(info->fix.id, 16, "Savage4");
1871 break; 2045 break;
1872 case FB_ACCEL_SAVAGE3D: 2046 case FB_ACCEL_SAVAGE3D:
1873 par->chip = S3_SAVAGE3D; 2047 par->chip = S3_SAVAGE3D;
1874 snprintf (info->fix.id, 16, "Savage3D"); 2048 snprintf(info->fix.id, 16, "Savage3D");
1875 break; 2049 break;
1876 case FB_ACCEL_SAVAGE3D_MV: 2050 case FB_ACCEL_SAVAGE3D_MV:
1877 par->chip = S3_SAVAGE3D; 2051 par->chip = S3_SAVAGE3D;
1878 snprintf (info->fix.id, 16, "Savage3D-MV"); 2052 snprintf(info->fix.id, 16, "Savage3D-MV");
1879 break; 2053 break;
1880 case FB_ACCEL_SAVAGE2000: 2054 case FB_ACCEL_SAVAGE2000:
1881 par->chip = S3_SAVAGE2000; 2055 par->chip = S3_SAVAGE2000;
1882 snprintf (info->fix.id, 16, "Savage2000"); 2056 snprintf(info->fix.id, 16, "Savage2000");
1883 break; 2057 break;
1884 case FB_ACCEL_SAVAGE_MX_MV: 2058 case FB_ACCEL_SAVAGE_MX_MV:
1885 par->chip = S3_SAVAGE_MX; 2059 par->chip = S3_SAVAGE_MX;
1886 snprintf (info->fix.id, 16, "Savage/MX-MV"); 2060 snprintf(info->fix.id, 16, "Savage/MX-MV");
1887 break; 2061 break;
1888 case FB_ACCEL_SAVAGE_MX: 2062 case FB_ACCEL_SAVAGE_MX:
1889 par->chip = S3_SAVAGE_MX; 2063 par->chip = S3_SAVAGE_MX;
1890 snprintf (info->fix.id, 16, "Savage/MX"); 2064 snprintf(info->fix.id, 16, "Savage/MX");
1891 break; 2065 break;
1892 case FB_ACCEL_SAVAGE_IX_MV: 2066 case FB_ACCEL_SAVAGE_IX_MV:
1893 par->chip = S3_SAVAGE_MX; 2067 par->chip = S3_SAVAGE_MX;
1894 snprintf (info->fix.id, 16, "Savage/IX-MV"); 2068 snprintf(info->fix.id, 16, "Savage/IX-MV");
1895 break; 2069 break;
1896 case FB_ACCEL_SAVAGE_IX: 2070 case FB_ACCEL_SAVAGE_IX:
1897 par->chip = S3_SAVAGE_MX; 2071 par->chip = S3_SAVAGE_MX;
1898 snprintf (info->fix.id, 16, "Savage/IX"); 2072 snprintf(info->fix.id, 16, "Savage/IX");
1899 break; 2073 break;
1900 case FB_ACCEL_PROSAVAGE_PM: 2074 case FB_ACCEL_PROSAVAGE_PM:
1901 par->chip = S3_PROSAVAGE; 2075 par->chip = S3_PROSAVAGE;
1902 snprintf (info->fix.id, 16, "ProSavagePM"); 2076 snprintf(info->fix.id, 16, "ProSavagePM");
1903 break; 2077 break;
1904 case FB_ACCEL_PROSAVAGE_KM: 2078 case FB_ACCEL_PROSAVAGE_KM:
1905 par->chip = S3_PROSAVAGE; 2079 par->chip = S3_PROSAVAGE;
1906 snprintf (info->fix.id, 16, "ProSavageKM"); 2080 snprintf(info->fix.id, 16, "ProSavageKM");
1907 break; 2081 break;
1908 case FB_ACCEL_S3TWISTER_P: 2082 case FB_ACCEL_S3TWISTER_P:
1909 par->chip = S3_PROSAVAGE; 2083 par->chip = S3_PROSAVAGE;
1910 snprintf (info->fix.id, 16, "TwisterP"); 2084 snprintf(info->fix.id, 16, "TwisterP");
1911 break; 2085 break;
1912 case FB_ACCEL_S3TWISTER_K: 2086 case FB_ACCEL_S3TWISTER_K:
1913 par->chip = S3_PROSAVAGE; 2087 par->chip = S3_PROSAVAGE;
1914 snprintf (info->fix.id, 16, "TwisterK"); 2088 snprintf(info->fix.id, 16, "TwisterK");
1915 break; 2089 break;
1916 case FB_ACCEL_PROSAVAGE_DDR: 2090 case FB_ACCEL_PROSAVAGE_DDR:
1917 par->chip = S3_PROSAVAGE; 2091 par->chip = S3_PROSAVAGE;
1918 snprintf (info->fix.id, 16, "ProSavageDDR"); 2092 snprintf(info->fix.id, 16, "ProSavageDDR");
1919 break; 2093 break;
1920 case FB_ACCEL_PROSAVAGE_DDRK: 2094 case FB_ACCEL_PROSAVAGE_DDRK:
1921 par->chip = S3_PROSAVAGE; 2095 par->chip = S3_PROSAVAGE;
1922 snprintf (info->fix.id, 16, "ProSavage8"); 2096 snprintf(info->fix.id, 16, "ProSavage8");
1923 break; 2097 break;
1924 } 2098 }
1925 2099
@@ -1960,7 +2134,7 @@ static int __devinit savage_init_fb_info (struct fb_info *info,
1960 info->pixmap.buf_align = 4; 2134 info->pixmap.buf_align = 4;
1961 info->pixmap.access_align = 32; 2135 info->pixmap.access_align = 32;
1962 2136
1963 err = fb_alloc_cmap (&info->cmap, NR_PALETTE, 0); 2137 err = fb_alloc_cmap(&info->cmap, NR_PALETTE, 0);
1964 if (!err) 2138 if (!err)
1965 info->flags |= FBINFO_HWACCEL_COPYAREA | 2139 info->flags |= FBINFO_HWACCEL_COPYAREA |
1966 FBINFO_HWACCEL_FILLRECT | 2140 FBINFO_HWACCEL_FILLRECT |
@@ -1972,8 +2146,8 @@ static int __devinit savage_init_fb_info (struct fb_info *info,
1972 2146
1973/* --------------------------------------------------------------------- */ 2147/* --------------------------------------------------------------------- */
1974 2148
1975static int __devinit savagefb_probe (struct pci_dev* dev, 2149static int __devinit savagefb_probe(struct pci_dev* dev,
1976 const struct pci_device_id* id) 2150 const struct pci_device_id* id)
1977{ 2151{
1978 struct fb_info *info; 2152 struct fb_info *info;
1979 struct savagefb_par *par; 2153 struct savagefb_par *par;
@@ -2085,12 +2259,12 @@ static int __devinit savagefb_probe (struct pci_dev* dev,
2085 fb_destroy_modedb(info->monspecs.modedb); 2259 fb_destroy_modedb(info->monspecs.modedb);
2086 info->monspecs.modedb = NULL; 2260 info->monspecs.modedb = NULL;
2087 2261
2088 err = register_framebuffer (info); 2262 err = register_framebuffer(info);
2089 if (err < 0) 2263 if (err < 0)
2090 goto failed; 2264 goto failed;
2091 2265
2092 printk (KERN_INFO "fb: S3 %s frame buffer device\n", 2266 printk(KERN_INFO "fb: S3 %s frame buffer device\n",
2093 info->fix.id); 2267 info->fix.id);
2094 2268
2095 /* 2269 /*
2096 * Our driver data 2270 * Our driver data
@@ -2103,10 +2277,10 @@ static int __devinit savagefb_probe (struct pci_dev* dev,
2103#ifdef CONFIG_FB_SAVAGE_I2C 2277#ifdef CONFIG_FB_SAVAGE_I2C
2104 savagefb_delete_i2c_busses(info); 2278 savagefb_delete_i2c_busses(info);
2105#endif 2279#endif
2106 fb_alloc_cmap (&info->cmap, 0, 0); 2280 fb_alloc_cmap(&info->cmap, 0, 0);
2107 savage_unmap_video(info); 2281 savage_unmap_video(info);
2108 failed_video: 2282 failed_video:
2109 savage_unmap_mmio (info); 2283 savage_unmap_mmio(info);
2110 failed_mmio: 2284 failed_mmio:
2111 kfree(info->pixmap.addr); 2285 kfree(info->pixmap.addr);
2112 failed_init: 2286 failed_init:
@@ -2117,7 +2291,7 @@ static int __devinit savagefb_probe (struct pci_dev* dev,
2117 return err; 2291 return err;
2118} 2292}
2119 2293
2120static void __devexit savagefb_remove (struct pci_dev *dev) 2294static void __devexit savagefb_remove(struct pci_dev *dev)
2121{ 2295{
2122 struct fb_info *info = pci_get_drvdata(dev); 2296 struct fb_info *info = pci_get_drvdata(dev);
2123 2297
@@ -2129,16 +2303,16 @@ static void __devexit savagefb_remove (struct pci_dev *dev)
2129 * we will be leaving hooks that could cause 2303 * we will be leaving hooks that could cause
2130 * oopsen laying around. 2304 * oopsen laying around.
2131 */ 2305 */
2132 if (unregister_framebuffer (info)) 2306 if (unregister_framebuffer(info))
2133 printk (KERN_WARNING "savagefb: danger danger! " 2307 printk(KERN_WARNING "savagefb: danger danger! "
2134 "Oopsen imminent!\n"); 2308 "Oopsen imminent!\n");
2135 2309
2136#ifdef CONFIG_FB_SAVAGE_I2C 2310#ifdef CONFIG_FB_SAVAGE_I2C
2137 savagefb_delete_i2c_busses(info); 2311 savagefb_delete_i2c_busses(info);
2138#endif 2312#endif
2139 fb_alloc_cmap (&info->cmap, 0, 0); 2313 fb_alloc_cmap(&info->cmap, 0, 0);
2140 savage_unmap_video (info); 2314 savage_unmap_video(info);
2141 savage_unmap_mmio (info); 2315 savage_unmap_mmio(info);
2142 kfree(info->pixmap.addr); 2316 kfree(info->pixmap.addr);
2143 pci_release_regions(dev); 2317 pci_release_regions(dev);
2144 framebuffer_release(info); 2318 framebuffer_release(info);
@@ -2151,7 +2325,7 @@ static void __devexit savagefb_remove (struct pci_dev *dev)
2151 } 2325 }
2152} 2326}
2153 2327
2154static int savagefb_suspend (struct pci_dev* dev, pm_message_t state) 2328static int savagefb_suspend(struct pci_dev* dev, pm_message_t state)
2155{ 2329{
2156 struct fb_info *info = pci_get_drvdata(dev); 2330 struct fb_info *info = pci_get_drvdata(dev);
2157 struct savagefb_par *par = info->par; 2331 struct savagefb_par *par = info->par;
@@ -2177,6 +2351,7 @@ static int savagefb_suspend (struct pci_dev* dev, pm_message_t state)
2177 info->fbops->fb_sync(info); 2351 info->fbops->fb_sync(info);
2178 2352
2179 savagefb_blank(FB_BLANK_POWERDOWN, info); 2353 savagefb_blank(FB_BLANK_POWERDOWN, info);
2354 savage_set_default_par(par, &par->save);
2180 savage_disable_mmio(par); 2355 savage_disable_mmio(par);
2181 pci_save_state(dev); 2356 pci_save_state(dev);
2182 pci_disable_device(dev); 2357 pci_disable_device(dev);
@@ -2186,7 +2361,7 @@ static int savagefb_suspend (struct pci_dev* dev, pm_message_t state)
2186 return 0; 2361 return 0;
2187} 2362}
2188 2363
2189static int savagefb_resume (struct pci_dev* dev) 2364static int savagefb_resume(struct pci_dev* dev)
2190{ 2365{
2191 struct fb_info *info = pci_get_drvdata(dev); 2366 struct fb_info *info = pci_get_drvdata(dev);
2192 struct savagefb_par *par = info->par; 2367 struct savagefb_par *par = info->par;
@@ -2210,15 +2385,15 @@ static int savagefb_resume (struct pci_dev* dev)
2210 pci_set_power_state(dev, PCI_D0); 2385 pci_set_power_state(dev, PCI_D0);
2211 pci_restore_state(dev); 2386 pci_restore_state(dev);
2212 2387
2213 if(pci_enable_device(dev)) 2388 if (pci_enable_device(dev))
2214 DBG("err"); 2389 DBG("err");
2215 2390
2216 pci_set_master(dev); 2391 pci_set_master(dev);
2217 savage_enable_mmio(par); 2392 savage_enable_mmio(par);
2218 savage_init_hw(par); 2393 savage_init_hw(par);
2219 savagefb_set_par (info); 2394 savagefb_set_par(info);
2395 fb_set_suspend(info, 0);
2220 savagefb_blank(FB_BLANK_UNBLANK, info); 2396 savagefb_blank(FB_BLANK_UNBLANK, info);
2221 fb_set_suspend (info, 0);
2222 release_console_sem(); 2397 release_console_sem();
2223 2398
2224 return 0; 2399 return 0;
@@ -2311,10 +2486,10 @@ static struct pci_driver savagefb_driver = {
2311 2486
2312/* **************************** exit-time only **************************** */ 2487/* **************************** exit-time only **************************** */
2313 2488
2314static void __exit savage_done (void) 2489static void __exit savage_done(void)
2315{ 2490{
2316 DBG("savage_done"); 2491 DBG("savage_done");
2317 pci_unregister_driver (&savagefb_driver); 2492 pci_unregister_driver(&savagefb_driver);
2318} 2493}
2319 2494
2320 2495
@@ -2345,7 +2520,7 @@ static int __init savagefb_init(void)
2345 return -ENODEV; 2520 return -ENODEV;
2346 2521
2347 savagefb_setup(option); 2522 savagefb_setup(option);
2348 return pci_register_driver (&savagefb_driver); 2523 return pci_register_driver(&savagefb_driver);
2349 2524
2350} 2525}
2351 2526
diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c
index 8adf5bf91eee..c63c0e721b82 100644
--- a/drivers/video/sis/sis_main.c
+++ b/drivers/video/sis/sis_main.c
@@ -275,7 +275,7 @@ sisfb_search_mode(char *name, BOOLEAN quiet)
275static void __devinit 275static void __devinit
276sisfb_get_vga_mode_from_kernel(void) 276sisfb_get_vga_mode_from_kernel(void)
277{ 277{
278#if (defined(__i386__) || defined(__x86_64__)) && defined(CONFIG_VIDEO_SELECT) 278#ifdef CONFIG_X86
279 char mymode[32]; 279 char mymode[32];
280 int mydepth = screen_info.lfb_depth; 280 int mydepth = screen_info.lfb_depth;
281 281
diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c
index 9b707771d757..67f429e93189 100644
--- a/drivers/video/skeletonfb.c
+++ b/drivers/video/skeletonfb.c
@@ -906,11 +906,6 @@ static void __exit xxxfb_exit(void)
906} 906}
907#endif 907#endif
908 908
909MODULE_LICENSE("GPL");
910module_init(xxxfb_init);
911module_exit(xxxfb_exit);
912
913
914 /* 909 /*
915 * Setup 910 * Setup
916 */ 911 */
diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c
index 7398bd48ba6c..6c2c78ab9827 100644
--- a/drivers/video/tgafb.c
+++ b/drivers/video/tgafb.c
@@ -26,7 +26,6 @@
26#include <linux/selection.h> 26#include <linux/selection.h>
27#include <asm/io.h> 27#include <asm/io.h>
28#include <video/tgafb.h> 28#include <video/tgafb.h>
29#include <linux/selection.h>
30 29
31/* 30/*
32 * Local functions. 31 * Local functions.
diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c
index b0b9acfdd430..5718924b677f 100644
--- a/drivers/video/vesafb.c
+++ b/drivers/video/vesafb.c
@@ -51,7 +51,7 @@ static int inverse = 0;
51static int mtrr = 0; /* disable mtrr */ 51static int mtrr = 0; /* disable mtrr */
52static int vram_remap __initdata = 0; /* Set amount of memory to be used */ 52static int vram_remap __initdata = 0; /* Set amount of memory to be used */
53static int vram_total __initdata = 0; /* Set total amount of memory */ 53static int vram_total __initdata = 0; /* Set total amount of memory */
54static int pmi_setpal = 0; /* pmi for palette changes ??? */ 54static int pmi_setpal = 1; /* pmi for palette changes ??? */
55static int ypan = 0; /* 0..nothing, 1..ypan, 2..ywrap */ 55static int ypan = 0; /* 0..nothing, 1..ypan, 2..ywrap */
56static unsigned short *pmi_base = NULL; 56static unsigned short *pmi_base = NULL;
57static void (*pmi_start)(void); 57static void (*pmi_start)(void);
@@ -80,15 +80,30 @@ static int vesafb_pan_display(struct fb_var_screeninfo *var,
80 return 0; 80 return 0;
81} 81}
82 82
83static void vesa_setpalette(int regno, unsigned red, unsigned green, 83static int vesa_setpalette(int regno, unsigned red, unsigned green,
84 unsigned blue) 84 unsigned blue)
85{ 85{
86 int shift = 16 - depth; 86 int shift = 16 - depth;
87 int err = -EINVAL;
88
89/*
90 * Try VGA registers first...
91 */
92 if (vga_compat) {
93 outb_p(regno, dac_reg);
94 outb_p(red >> shift, dac_val);
95 outb_p(green >> shift, dac_val);
96 outb_p(blue >> shift, dac_val);
97 err = 0;
98 }
87 99
88#ifdef __i386__ 100#ifdef __i386__
89 struct { u_char blue, green, red, pad; } entry; 101/*
102 * Fallback to the PMI....
103 */
104 if (err && pmi_setpal) {
105 struct { u_char blue, green, red, pad; } entry;
90 106
91 if (pmi_setpal) {
92 entry.red = red >> shift; 107 entry.red = red >> shift;
93 entry.green = green >> shift; 108 entry.green = green >> shift;
94 entry.blue = blue >> shift; 109 entry.blue = blue >> shift;
@@ -102,26 +117,19 @@ static void vesa_setpalette(int regno, unsigned red, unsigned green,
102 "d" (regno), /* EDX */ 117 "d" (regno), /* EDX */
103 "D" (&entry), /* EDI */ 118 "D" (&entry), /* EDI */
104 "S" (&pmi_pal)); /* ESI */ 119 "S" (&pmi_pal)); /* ESI */
105 return; 120 err = 0;
106 } 121 }
107#endif 122#endif
108 123
109/* 124 return err;
110 * without protected mode interface and if VGA compatible,
111 * try VGA registers...
112 */
113 if (vga_compat) {
114 outb_p(regno, dac_reg);
115 outb_p(red >> shift, dac_val);
116 outb_p(green >> shift, dac_val);
117 outb_p(blue >> shift, dac_val);
118 }
119} 125}
120 126
121static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green, 127static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
122 unsigned blue, unsigned transp, 128 unsigned blue, unsigned transp,
123 struct fb_info *info) 129 struct fb_info *info)
124{ 130{
131 int err = 0;
132
125 /* 133 /*
126 * Set a single color register. The values supplied are 134 * Set a single color register. The values supplied are
127 * already rounded down to the hardware's capabilities 135 * already rounded down to the hardware's capabilities
@@ -133,7 +141,7 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
133 return 1; 141 return 1;
134 142
135 if (info->var.bits_per_pixel == 8) 143 if (info->var.bits_per_pixel == 8)
136 vesa_setpalette(regno,red,green,blue); 144 err = vesa_setpalette(regno,red,green,blue);
137 else if (regno < 16) { 145 else if (regno < 16) {
138 switch (info->var.bits_per_pixel) { 146 switch (info->var.bits_per_pixel) {
139 case 16: 147 case 16:
@@ -164,7 +172,7 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
164 } 172 }
165 } 173 }
166 174
167 return 0; 175 return err;
168} 176}
169 177
170static struct fb_ops vesafb_ops = { 178static struct fb_ops vesafb_ops = {
@@ -460,9 +468,7 @@ static struct platform_driver vesafb_driver = {
460 }, 468 },
461}; 469};
462 470
463static struct platform_device vesafb_device = { 471static struct platform_device *vesafb_device;
464 .name = "vesafb",
465};
466 472
467static int __init vesafb_init(void) 473static int __init vesafb_init(void)
468{ 474{
@@ -475,10 +481,19 @@ static int __init vesafb_init(void)
475 ret = platform_driver_register(&vesafb_driver); 481 ret = platform_driver_register(&vesafb_driver);
476 482
477 if (!ret) { 483 if (!ret) {
478 ret = platform_device_register(&vesafb_device); 484 vesafb_device = platform_device_alloc("vesafb", 0);
479 if (ret) 485
486 if (vesafb_device)
487 ret = platform_device_add(vesafb_device);
488 else
489 ret = -ENOMEM;
490
491 if (ret) {
492 platform_device_put(vesafb_device);
480 platform_driver_unregister(&vesafb_driver); 493 platform_driver_unregister(&vesafb_driver);
494 }
481 } 495 }
496
482 return ret; 497 return ret;
483} 498}
484module_init(vesafb_init); 499module_init(vesafb_init);
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index 77eed1fd9943..d073ffb6e1f9 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -398,12 +398,6 @@ static int __init vfb_setup(char *options)
398 * Initialisation 398 * Initialisation
399 */ 399 */
400 400
401static void vfb_platform_release(struct device *device)
402{
403 // This is called when the reference count goes to zero.
404 dev_err(device, "This driver is broken, please bug the authors so they will fix it.\n");
405}
406
407static int __init vfb_probe(struct platform_device *dev) 401static int __init vfb_probe(struct platform_device *dev)
408{ 402{
409 struct fb_info *info; 403 struct fb_info *info;
@@ -482,13 +476,7 @@ static struct platform_driver vfb_driver = {
482 }, 476 },
483}; 477};
484 478
485static struct platform_device vfb_device = { 479static struct platform_device *vfb_device;
486 .name = "vfb",
487 .id = 0,
488 .dev = {
489 .release = vfb_platform_release,
490 }
491};
492 480
493static int __init vfb_init(void) 481static int __init vfb_init(void)
494{ 482{
@@ -508,10 +496,19 @@ static int __init vfb_init(void)
508 ret = platform_driver_register(&vfb_driver); 496 ret = platform_driver_register(&vfb_driver);
509 497
510 if (!ret) { 498 if (!ret) {
511 ret = platform_device_register(&vfb_device); 499 vfb_device = platform_device_alloc("vfb", 0);
512 if (ret) 500
501 if (vfb_device)
502 ret = platform_device_add(vfb_device);
503 else
504 ret = -ENOMEM;
505
506 if (ret) {
507 platform_device_put(vfb_device);
513 platform_driver_unregister(&vfb_driver); 508 platform_driver_unregister(&vfb_driver);
509 }
514 } 510 }
511
515 return ret; 512 return ret;
516} 513}
517 514
@@ -520,7 +517,7 @@ module_init(vfb_init);
520#ifdef MODULE 517#ifdef MODULE
521static void __exit vfb_exit(void) 518static void __exit vfb_exit(void)
522{ 519{
523 platform_device_unregister(&vfb_device); 520 platform_device_unregister(vfb_device);
524 platform_driver_unregister(&vfb_driver); 521 platform_driver_unregister(&vfb_driver);
525} 522}
526 523
diff --git a/drivers/video/vga16fb.c b/drivers/video/vga16fb.c
index 4fd2a272e03d..3c404c9bd36c 100644
--- a/drivers/video/vga16fb.c
+++ b/drivers/video/vga16fb.c
@@ -1334,9 +1334,8 @@ static int vga16fb_setup(char *options)
1334} 1334}
1335#endif 1335#endif
1336 1336
1337static int __init vga16fb_probe(struct device *device) 1337static int __init vga16fb_probe(struct platform_device *dev)
1338{ 1338{
1339 struct platform_device *dev = to_platform_device(device);
1340 struct fb_info *info; 1339 struct fb_info *info;
1341 struct vga16fb_par *par; 1340 struct vga16fb_par *par;
1342 int i; 1341 int i;
@@ -1403,7 +1402,7 @@ static int __init vga16fb_probe(struct device *device)
1403 1402
1404 printk(KERN_INFO "fb%d: %s frame buffer device\n", 1403 printk(KERN_INFO "fb%d: %s frame buffer device\n",
1405 info->node, info->fix.id); 1404 info->node, info->fix.id);
1406 dev_set_drvdata(device, info); 1405 platform_set_drvdata(dev, info);
1407 1406
1408 return 0; 1407 return 0;
1409 1408
@@ -1417,9 +1416,9 @@ static int __init vga16fb_probe(struct device *device)
1417 return ret; 1416 return ret;
1418} 1417}
1419 1418
1420static int vga16fb_remove(struct device *device) 1419static int vga16fb_remove(struct platform_device *dev)
1421{ 1420{
1422 struct fb_info *info = dev_get_drvdata(device); 1421 struct fb_info *info = platform_get_drvdata(dev);
1423 1422
1424 if (info) { 1423 if (info) {
1425 unregister_framebuffer(info); 1424 unregister_framebuffer(info);
@@ -1432,16 +1431,15 @@ static int vga16fb_remove(struct device *device)
1432 return 0; 1431 return 0;
1433} 1432}
1434 1433
1435static struct device_driver vga16fb_driver = { 1434static struct platform_driver vga16fb_driver = {
1436 .name = "vga16fb",
1437 .bus = &platform_bus_type,
1438 .probe = vga16fb_probe, 1435 .probe = vga16fb_probe,
1439 .remove = vga16fb_remove, 1436 .remove = vga16fb_remove,
1437 .driver = {
1438 .name = "vga16fb",
1439 },
1440}; 1440};
1441 1441
1442static struct platform_device vga16fb_device = { 1442static struct platform_device *vga16fb_device;
1443 .name = "vga16fb",
1444};
1445 1443
1446static int __init vga16fb_init(void) 1444static int __init vga16fb_init(void)
1447{ 1445{
@@ -1454,12 +1452,20 @@ static int __init vga16fb_init(void)
1454 1452
1455 vga16fb_setup(option); 1453 vga16fb_setup(option);
1456#endif 1454#endif
1457 ret = driver_register(&vga16fb_driver); 1455 ret = platform_driver_register(&vga16fb_driver);
1458 1456
1459 if (!ret) { 1457 if (!ret) {
1460 ret = platform_device_register(&vga16fb_device); 1458 vga16fb_device = platform_device_alloc("vga16fb", 0);
1461 if (ret) 1459
1462 driver_unregister(&vga16fb_driver); 1460 if (vga16fb_device)
1461 ret = platform_device_add(vga16fb_device);
1462 else
1463 ret = -ENOMEM;
1464
1465 if (ret) {
1466 platform_device_put(vga16fb_device);
1467 platform_driver_unregister(&vga16fb_driver);
1468 }
1463 } 1469 }
1464 1470
1465 return ret; 1471 return ret;
@@ -1467,8 +1473,8 @@ static int __init vga16fb_init(void)
1467 1473
1468static void __exit vga16fb_exit(void) 1474static void __exit vga16fb_exit(void)
1469{ 1475{
1470 platform_device_unregister(&vga16fb_device); 1476 platform_device_unregister(vga16fb_device);
1471 driver_unregister(&vga16fb_driver); 1477 platform_driver_unregister(&vga16fb_driver);
1472} 1478}
1473 1479
1474MODULE_LICENSE("GPL"); 1480MODULE_LICENSE("GPL");
diff --git a/fs/Kconfig b/fs/Kconfig
index 1cdc043922d5..6c5051802bd2 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1490,7 +1490,12 @@ config NFSD
1490 select LOCKD 1490 select LOCKD
1491 select SUNRPC 1491 select SUNRPC
1492 select EXPORTFS 1492 select EXPORTFS
1493 select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL 1493 select NFSD_V2_ACL if NFSD_V3_ACL
1494 select NFS_ACL_SUPPORT if NFSD_V2_ACL
1495 select NFSD_TCP if NFSD_V4
1496 select CRYPTO_MD5 if NFSD_V4
1497 select CRYPTO if NFSD_V4
1498 select FS_POSIX_ACL if NFSD_V4
1494 help 1499 help
1495 If you want your Linux box to act as an NFS *server*, so that other 1500 If you want your Linux box to act as an NFS *server*, so that other
1496 computers on your local network which support NFS can access certain 1501 computers on your local network which support NFS can access certain
@@ -1528,7 +1533,6 @@ config NFSD_V3
1528config NFSD_V3_ACL 1533config NFSD_V3_ACL
1529 bool "Provide server support for the NFSv3 ACL protocol extension" 1534 bool "Provide server support for the NFSv3 ACL protocol extension"
1530 depends on NFSD_V3 1535 depends on NFSD_V3
1531 select NFSD_V2_ACL
1532 help 1536 help
1533 Implement the NFSv3 ACL protocol extension for manipulating POSIX 1537 Implement the NFSv3 ACL protocol extension for manipulating POSIX
1534 Access Control Lists on exported file systems. NFS clients should 1538 Access Control Lists on exported file systems. NFS clients should
@@ -1538,10 +1542,6 @@ config NFSD_V3_ACL
1538config NFSD_V4 1542config NFSD_V4
1539 bool "Provide NFSv4 server support (EXPERIMENTAL)" 1543 bool "Provide NFSv4 server support (EXPERIMENTAL)"
1540 depends on NFSD_V3 && EXPERIMENTAL 1544 depends on NFSD_V3 && EXPERIMENTAL
1541 select NFSD_TCP
1542 select CRYPTO_MD5
1543 select CRYPTO
1544 select FS_POSIX_ACL
1545 help 1545 help
1546 If you would like to include the NFSv4 server as well as the NFSv2 1546 If you would like to include the NFSv4 server as well as the NFSv2
1547 and NFSv3 servers, say Y here. This feature is experimental, and 1547 and NFSv3 servers, say Y here. This feature is experimental, and
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 009a9ae88d61..bfc1fd22d5b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -413,8 +413,7 @@ int afs_server_find_by_peer(const struct rxrpc_peer *peer,
413 413
414 /* we found it in the graveyard - resurrect it */ 414 /* we found it in the graveyard - resurrect it */
415 found_dead_server: 415 found_dead_server:
416 list_del(&server->link); 416 list_move_tail(&server->link, &cell->sv_list);
417 list_add_tail(&server->link, &cell->sv_list);
418 afs_get_server(server); 417 afs_get_server(server);
419 afs_kafstimod_del_timer(&server->timeout); 418 afs_kafstimod_del_timer(&server->timeout);
420 spin_unlock(&cell->sv_gylock); 419 spin_unlock(&cell->sv_gylock);
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index 7ac07d0d47b9..f09a794f248e 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -136,8 +136,7 @@ static int kafsasyncd(void *arg)
136 if (!list_empty(&kafsasyncd_async_attnq)) { 136 if (!list_empty(&kafsasyncd_async_attnq)) {
137 op = list_entry(kafsasyncd_async_attnq.next, 137 op = list_entry(kafsasyncd_async_attnq.next,
138 struct afs_async_op, link); 138 struct afs_async_op, link);
139 list_del(&op->link); 139 list_move_tail(&op->link,
140 list_add_tail(&op->link,
141 &kafsasyncd_async_busyq); 140 &kafsasyncd_async_busyq);
142 } 141 }
143 142
@@ -204,8 +203,7 @@ void afs_kafsasyncd_begin_op(struct afs_async_op *op)
204 init_waitqueue_entry(&op->waiter, kafsasyncd_task); 203 init_waitqueue_entry(&op->waiter, kafsasyncd_task);
205 add_wait_queue(&op->call->waitq, &op->waiter); 204 add_wait_queue(&op->call->waitq, &op->waiter);
206 205
207 list_del(&op->link); 206 list_move_tail(&op->link, &kafsasyncd_async_busyq);
208 list_add_tail(&op->link, &kafsasyncd_async_busyq);
209 207
210 spin_unlock(&kafsasyncd_async_lock); 208 spin_unlock(&kafsasyncd_async_lock);
211 209
@@ -223,8 +221,7 @@ void afs_kafsasyncd_attend_op(struct afs_async_op *op)
223 221
224 spin_lock(&kafsasyncd_async_lock); 222 spin_lock(&kafsasyncd_async_lock);
225 223
226 list_del(&op->link); 224 list_move_tail(&op->link, &kafsasyncd_async_attnq);
227 list_add_tail(&op->link, &kafsasyncd_async_attnq);
228 225
229 spin_unlock(&kafsasyncd_async_lock); 226 spin_unlock(&kafsasyncd_async_lock);
230 227
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 62b093aa41c6..22afaae1a4ce 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -123,8 +123,7 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
123 resurrect_server: 123 resurrect_server:
124 _debug("resurrecting server"); 124 _debug("resurrecting server");
125 125
126 list_del(&zombie->link); 126 list_move_tail(&zombie->link, &cell->sv_list);
127 list_add_tail(&zombie->link, &cell->sv_list);
128 afs_get_server(zombie); 127 afs_get_server(zombie);
129 afs_kafstimod_del_timer(&zombie->timeout); 128 afs_kafstimod_del_timer(&zombie->timeout);
130 spin_unlock(&cell->sv_gylock); 129 spin_unlock(&cell->sv_gylock);
@@ -168,8 +167,7 @@ void afs_put_server(struct afs_server *server)
168 } 167 }
169 168
170 spin_lock(&cell->sv_gylock); 169 spin_lock(&cell->sv_gylock);
171 list_del(&server->link); 170 list_move_tail(&server->link, &cell->sv_graveyard);
172 list_add_tail(&server->link, &cell->sv_graveyard);
173 171
174 /* time out in 10 secs */ 172 /* time out in 10 secs */
175 afs_kafstimod_add_timer(&server->timeout, 10 * HZ); 173 afs_kafstimod_add_timer(&server->timeout, 10 * HZ);
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index eced20618ecc..331f730a1fb3 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -326,8 +326,7 @@ int afs_vlocation_lookup(struct afs_cell *cell,
326 /* found in the graveyard - resurrect */ 326 /* found in the graveyard - resurrect */
327 _debug("found in graveyard"); 327 _debug("found in graveyard");
328 atomic_inc(&vlocation->usage); 328 atomic_inc(&vlocation->usage);
329 list_del(&vlocation->link); 329 list_move_tail(&vlocation->link, &cell->vl_list);
330 list_add_tail(&vlocation->link, &cell->vl_list);
331 spin_unlock(&cell->vl_gylock); 330 spin_unlock(&cell->vl_gylock);
332 331
333 afs_kafstimod_del_timer(&vlocation->timeout); 332 afs_kafstimod_del_timer(&vlocation->timeout);
@@ -478,8 +477,7 @@ static void __afs_put_vlocation(struct afs_vlocation *vlocation)
478 } 477 }
479 478
480 /* move to graveyard queue */ 479 /* move to graveyard queue */
481 list_del(&vlocation->link); 480 list_move_tail(&vlocation->link,&cell->vl_graveyard);
482 list_add_tail(&vlocation->link,&cell->vl_graveyard);
483 481
484 /* remove from pending timeout queue (refcounted if actually being 482 /* remove from pending timeout queue (refcounted if actually being
485 * updated) */ 483 * updated) */
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 9867fef3261d..cf62da5d7825 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -104,8 +104,7 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
104 vnode->cb_expiry * HZ); 104 vnode->cb_expiry * HZ);
105 105
106 spin_lock(&afs_cb_hash_lock); 106 spin_lock(&afs_cb_hash_lock);
107 list_del(&vnode->cb_hash_link); 107 list_move_tail(&vnode->cb_hash_link,
108 list_add_tail(&vnode->cb_hash_link,
109 &afs_cb_hash(server, &vnode->fid)); 108 &afs_cb_hash(server, &vnode->fid));
110 spin_unlock(&afs_cb_hash_lock); 109 spin_unlock(&afs_cb_hash_lock);
111 110
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 4456d1daa40f..8dbd44f10e9d 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -376,8 +376,7 @@ next:
376 DPRINTK("returning %p %.*s", 376 DPRINTK("returning %p %.*s",
377 expired, (int)expired->d_name.len, expired->d_name.name); 377 expired, (int)expired->d_name.len, expired->d_name.name);
378 spin_lock(&dcache_lock); 378 spin_lock(&dcache_lock);
379 list_del(&expired->d_parent->d_subdirs); 379 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
380 list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
381 spin_unlock(&dcache_lock); 380 spin_unlock(&dcache_lock);
382 return expired; 381 return expired;
383 } 382 }
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 6c6771db36da..7caee8d8ea3b 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -259,7 +259,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
259 /* If request was not a signal, enqueue and don't free */ 259 /* If request was not a signal, enqueue and don't free */
260 if (!(req->uc_flags & REQ_ASYNC)) { 260 if (!(req->uc_flags & REQ_ASYNC)) {
261 req->uc_flags |= REQ_READ; 261 req->uc_flags |= REQ_READ;
262 list_add(&(req->uc_chain), vcp->vc_processing.prev); 262 list_add_tail(&(req->uc_chain), &vcp->vc_processing);
263 goto out; 263 goto out;
264 } 264 }
265 265
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index b040eba13a7d..a5b5e631ba61 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -725,7 +725,7 @@ static int coda_upcall(struct coda_sb_info *sbi,
725 ((union inputArgs *)buffer)->ih.unique = req->uc_unique; 725 ((union inputArgs *)buffer)->ih.unique = req->uc_unique;
726 726
727 /* Append msg to pending queue and poke Venus. */ 727 /* Append msg to pending queue and poke Venus. */
728 list_add(&(req->uc_chain), vcommp->vc_pending.prev); 728 list_add_tail(&(req->uc_chain), &vcommp->vc_pending);
729 729
730 wake_up_interruptible(&vcommp->vc_waitq); 730 wake_up_interruptible(&vcommp->vc_waitq);
731 /* We can be interrupted while we wait for Venus to process 731 /* We can be interrupted while we wait for Venus to process
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9eb9824dd332..d8ecfedef189 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -80,6 +80,7 @@
80#include <net/bluetooth/rfcomm.h> 80#include <net/bluetooth/rfcomm.h>
81 81
82#include <linux/capi.h> 82#include <linux/capi.h>
83#include <linux/gigaset_dev.h>
83 84
84#include <scsi/scsi.h> 85#include <scsi/scsi.h>
85#include <scsi/scsi_ioctl.h> 86#include <scsi/scsi_ioctl.h>
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5f952187fc53..207f8006fd6c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1009,8 +1009,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1009 /* fallthrough */ 1009 /* fallthrough */
1010 default: 1010 default:
1011 if (filp->f_pos == 2) { 1011 if (filp->f_pos == 2) {
1012 list_del(q); 1012 list_move(q, &parent_sd->s_children);
1013 list_add(q, &parent_sd->s_children);
1014 } 1013 }
1015 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 1014 for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
1016 struct configfs_dirent *next; 1015 struct configfs_dirent *next;
@@ -1033,8 +1032,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1033 dt_type(next)) < 0) 1032 dt_type(next)) < 0)
1034 return 0; 1033 return 0;
1035 1034
1036 list_del(q); 1035 list_move(q, p);
1037 list_add(q, p);
1038 p = q; 1036 p = q;
1039 filp->f_pos++; 1037 filp->f_pos++;
1040 } 1038 }
diff --git a/fs/dcache.c b/fs/dcache.c
index b85fda360533..48b44a714b35 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -522,8 +522,7 @@ void shrink_dcache_sb(struct super_block * sb)
522 dentry = list_entry(tmp, struct dentry, d_lru); 522 dentry = list_entry(tmp, struct dentry, d_lru);
523 if (dentry->d_sb != sb) 523 if (dentry->d_sb != sb)
524 continue; 524 continue;
525 list_del(tmp); 525 list_move(tmp, &dentry_unused);
526 list_add(tmp, &dentry_unused);
527 } 526 }
528 527
529 /* 528 /*
@@ -638,7 +637,7 @@ resume:
638 * of the unused list for prune_dcache 637 * of the unused list for prune_dcache
639 */ 638 */
640 if (!atomic_read(&dentry->d_count)) { 639 if (!atomic_read(&dentry->d_count)) {
641 list_add(&dentry->d_lru, dentry_unused.prev); 640 list_add_tail(&dentry->d_lru, &dentry_unused);
642 dentry_stat.nr_unused++; 641 dentry_stat.nr_unused++;
643 found++; 642 found++;
644 } 643 }
diff --git a/fs/dquot.c b/fs/dquot.c
index 81d87a413c68..0122a279106a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -250,7 +250,7 @@ static inline struct dquot *find_dquot(unsigned int hashent, struct super_block
250/* Add a dquot to the tail of the free list */ 250/* Add a dquot to the tail of the free list */
251static inline void put_dquot_last(struct dquot *dquot) 251static inline void put_dquot_last(struct dquot *dquot)
252{ 252{
253 list_add(&dquot->dq_free, free_dquots.prev); 253 list_add_tail(&dquot->dq_free, &free_dquots);
254 dqstats.free_dquots++; 254 dqstats.free_dquots++;
255} 255}
256 256
@@ -266,7 +266,7 @@ static inline void put_inuse(struct dquot *dquot)
266{ 266{
267 /* We add to the back of inuse list so we don't have to restart 267 /* We add to the back of inuse list so we don't have to restart
268 * when traversing this list and we block */ 268 * when traversing this list and we block */
269 list_add(&dquot->dq_inuse, inuse_list.prev); 269 list_add_tail(&dquot->dq_inuse, &inuse_list);
270 dqstats.allocated_dquots++; 270 dqstats.allocated_dquots++;
271} 271}
272 272
diff --git a/fs/exec.c b/fs/exec.c
index 0b88bf646143..c8494f513eaf 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk)
666 * and to assume its PID: 666 * and to assume its PID:
667 */ 667 */
668 if (!thread_group_leader(current)) { 668 if (!thread_group_leader(current)) {
669 struct dentry *proc_dentry1, *proc_dentry2;
670
671 /* 669 /*
672 * Wait for the thread group leader to be a zombie. 670 * Wait for the thread group leader to be a zombie.
673 * It should already be zombie at this point, most 671 * It should already be zombie at this point, most
@@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk)
689 */ 687 */
690 current->start_time = leader->start_time; 688 current->start_time = leader->start_time;
691 689
692 spin_lock(&leader->proc_lock);
693 spin_lock(&current->proc_lock);
694 proc_dentry1 = proc_pid_unhash(current);
695 proc_dentry2 = proc_pid_unhash(leader);
696 write_lock_irq(&tasklist_lock); 690 write_lock_irq(&tasklist_lock);
697 691
698 BUG_ON(leader->tgid != current->tgid); 692 BUG_ON(leader->tgid != current->tgid);
@@ -713,7 +707,7 @@ static int de_thread(struct task_struct *tsk)
713 attach_pid(current, PIDTYPE_PID, current->pid); 707 attach_pid(current, PIDTYPE_PID, current->pid);
714 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 708 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
715 attach_pid(current, PIDTYPE_SID, current->signal->session); 709 attach_pid(current, PIDTYPE_SID, current->signal->session);
716 list_add_tail_rcu(&current->tasks, &init_task.tasks); 710 list_replace_rcu(&leader->tasks, &current->tasks);
717 711
718 current->group_leader = current; 712 current->group_leader = current;
719 leader->group_leader = current; 713 leader->group_leader = current;
@@ -721,7 +715,6 @@ static int de_thread(struct task_struct *tsk)
721 /* Reduce leader to a thread */ 715 /* Reduce leader to a thread */
722 detach_pid(leader, PIDTYPE_PGID); 716 detach_pid(leader, PIDTYPE_PGID);
723 detach_pid(leader, PIDTYPE_SID); 717 detach_pid(leader, PIDTYPE_SID);
724 list_del_init(&leader->tasks);
725 718
726 current->exit_signal = SIGCHLD; 719 current->exit_signal = SIGCHLD;
727 720
@@ -729,10 +722,6 @@ static int de_thread(struct task_struct *tsk)
729 leader->exit_state = EXIT_DEAD; 722 leader->exit_state = EXIT_DEAD;
730 723
731 write_unlock_irq(&tasklist_lock); 724 write_unlock_irq(&tasklist_lock);
732 spin_unlock(&leader->proc_lock);
733 spin_unlock(&current->proc_lock);
734 proc_pid_flush(proc_dentry1);
735 proc_pid_flush(proc_dentry2);
736 } 725 }
737 726
738 /* 727 /*
@@ -1379,67 +1368,102 @@ static void format_corename(char *corename, const char *pattern, long signr)
1379 *out_ptr = 0; 1368 *out_ptr = 0;
1380} 1369}
1381 1370
1382static void zap_threads (struct mm_struct *mm) 1371static void zap_process(struct task_struct *start)
1383{ 1372{
1384 struct task_struct *g, *p; 1373 struct task_struct *t;
1385 struct task_struct *tsk = current;
1386 struct completion *vfork_done = tsk->vfork_done;
1387 int traced = 0;
1388 1374
1389 /* 1375 start->signal->flags = SIGNAL_GROUP_EXIT;
1390 * Make sure nobody is waiting for us to release the VM, 1376 start->signal->group_stop_count = 0;
1391 * otherwise we can deadlock when we wait on each other
1392 */
1393 if (vfork_done) {
1394 tsk->vfork_done = NULL;
1395 complete(vfork_done);
1396 }
1397 1377
1398 read_lock(&tasklist_lock); 1378 t = start;
1399 do_each_thread(g,p) 1379 do {
1400 if (mm == p->mm && p != tsk) { 1380 if (t != current && t->mm) {
1401 force_sig_specific(SIGKILL, p); 1381 t->mm->core_waiters++;
1402 mm->core_waiters++; 1382 sigaddset(&t->pending.signal, SIGKILL);
1403 if (unlikely(p->ptrace) && 1383 signal_wake_up(t, 1);
1404 unlikely(p->parent->mm == mm))
1405 traced = 1;
1406 } 1384 }
1407 while_each_thread(g,p); 1385 } while ((t = next_thread(t)) != start);
1386}
1408 1387
1409 read_unlock(&tasklist_lock); 1388static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1389 int exit_code)
1390{
1391 struct task_struct *g, *p;
1392 unsigned long flags;
1393 int err = -EAGAIN;
1394
1395 spin_lock_irq(&tsk->sighand->siglock);
1396 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
1397 tsk->signal->group_exit_code = exit_code;
1398 zap_process(tsk);
1399 err = 0;
1400 }
1401 spin_unlock_irq(&tsk->sighand->siglock);
1402 if (err)
1403 return err;
1410 1404
1411 if (unlikely(traced)) { 1405 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
1412 /* 1406 goto done;
1413 * We are zapping a thread and the thread it ptraces. 1407
1414 * If the tracee went into a ptrace stop for exit tracing, 1408 rcu_read_lock();
1415 * we could deadlock since the tracer is waiting for this 1409 for_each_process(g) {
1416 * coredump to finish. Detach them so they can both die. 1410 if (g == tsk->group_leader)
1417 */ 1411 continue;
1418 write_lock_irq(&tasklist_lock); 1412
1419 do_each_thread(g,p) { 1413 p = g;
1420 if (mm == p->mm && p != tsk && 1414 do {
1421 p->ptrace && p->parent->mm == mm) { 1415 if (p->mm) {
1422 __ptrace_detach(p, 0); 1416 if (p->mm == mm) {
1417 /*
1418 * p->sighand can't disappear, but
1419 * may be changed by de_thread()
1420 */
1421 lock_task_sighand(p, &flags);
1422 zap_process(p);
1423 unlock_task_sighand(p, &flags);
1424 }
1425 break;
1423 } 1426 }
1424 } while_each_thread(g,p); 1427 } while ((p = next_thread(p)) != g);
1425 write_unlock_irq(&tasklist_lock);
1426 } 1428 }
1429 rcu_read_unlock();
1430done:
1431 return mm->core_waiters;
1427} 1432}
1428 1433
1429static void coredump_wait(struct mm_struct *mm) 1434static int coredump_wait(int exit_code)
1430{ 1435{
1431 DECLARE_COMPLETION(startup_done); 1436 struct task_struct *tsk = current;
1437 struct mm_struct *mm = tsk->mm;
1438 struct completion startup_done;
1439 struct completion *vfork_done;
1432 int core_waiters; 1440 int core_waiters;
1433 1441
1442 init_completion(&mm->core_done);
1443 init_completion(&startup_done);
1434 mm->core_startup_done = &startup_done; 1444 mm->core_startup_done = &startup_done;
1435 1445
1436 zap_threads(mm); 1446 core_waiters = zap_threads(tsk, mm, exit_code);
1437 core_waiters = mm->core_waiters;
1438 up_write(&mm->mmap_sem); 1447 up_write(&mm->mmap_sem);
1439 1448
1449 if (unlikely(core_waiters < 0))
1450 goto fail;
1451
1452 /*
1453 * Make sure nobody is waiting for us to release the VM,
1454 * otherwise we can deadlock when we wait on each other
1455 */
1456 vfork_done = tsk->vfork_done;
1457 if (vfork_done) {
1458 tsk->vfork_done = NULL;
1459 complete(vfork_done);
1460 }
1461
1440 if (core_waiters) 1462 if (core_waiters)
1441 wait_for_completion(&startup_done); 1463 wait_for_completion(&startup_done);
1464fail:
1442 BUG_ON(mm->core_waiters); 1465 BUG_ON(mm->core_waiters);
1466 return core_waiters;
1443} 1467}
1444 1468
1445int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1469int do_coredump(long signr, int exit_code, struct pt_regs * regs)
@@ -1473,22 +1497,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1473 } 1497 }
1474 mm->dumpable = 0; 1498 mm->dumpable = 0;
1475 1499
1476 retval = -EAGAIN; 1500 retval = coredump_wait(exit_code);
1477 spin_lock_irq(&current->sighand->siglock); 1501 if (retval < 0)
1478 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
1479 current->signal->flags = SIGNAL_GROUP_EXIT;
1480 current->signal->group_exit_code = exit_code;
1481 current->signal->group_stop_count = 0;
1482 retval = 0;
1483 }
1484 spin_unlock_irq(&current->sighand->siglock);
1485 if (retval) {
1486 up_write(&mm->mmap_sem);
1487 goto fail; 1502 goto fail;
1488 }
1489
1490 init_completion(&mm->core_done);
1491 coredump_wait(mm);
1492 1503
1493 /* 1504 /*
1494 * Clear any false indication of pending signals that might 1505 * Clear any false indication of pending signals that might
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b2891cc29db1..b7483360a2db 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -630,7 +630,7 @@ enum {
630 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 630 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
631 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 631 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
632 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 632 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
633 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, 633 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
634 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 634 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
635 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 635 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
636 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 636 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -666,6 +666,7 @@ static match_table_t tokens = {
666 {Opt_noreservation, "noreservation"}, 666 {Opt_noreservation, "noreservation"},
667 {Opt_noload, "noload"}, 667 {Opt_noload, "noload"},
668 {Opt_nobh, "nobh"}, 668 {Opt_nobh, "nobh"},
669 {Opt_bh, "bh"},
669 {Opt_commit, "commit=%u"}, 670 {Opt_commit, "commit=%u"},
670 {Opt_journal_update, "journal=update"}, 671 {Opt_journal_update, "journal=update"},
671 {Opt_journal_inum, "journal=%u"}, 672 {Opt_journal_inum, "journal=%u"},
@@ -1014,6 +1015,9 @@ clear_qf_name:
1014 case Opt_nobh: 1015 case Opt_nobh:
1015 set_opt(sbi->s_mount_opt, NOBH); 1016 set_opt(sbi->s_mount_opt, NOBH);
1016 break; 1017 break;
1018 case Opt_bh:
1019 clear_opt(sbi->s_mount_opt, NOBH);
1020 break;
1017 default: 1021 default:
1018 printk (KERN_ERR 1022 printk (KERN_ERR
1019 "EXT3-fs: Unrecognized mount option \"%s\" " 1023 "EXT3-fs: Unrecognized mount option \"%s\" "
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 1862e8bc101d..b8886f048eaa 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -53,8 +53,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
53 if (!instr) { 53 if (!instr) {
54 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); 54 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
55 spin_lock(&c->erase_completion_lock); 55 spin_lock(&c->erase_completion_lock);
56 list_del(&jeb->list); 56 list_move(&jeb->list, &c->erase_pending_list);
57 list_add(&jeb->list, &c->erase_pending_list);
58 c->erasing_size -= c->sector_size; 57 c->erasing_size -= c->sector_size;
59 c->dirty_size += c->sector_size; 58 c->dirty_size += c->sector_size;
60 jeb->dirty_size = c->sector_size; 59 jeb->dirty_size = c->sector_size;
@@ -86,8 +85,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
86 /* Erase failed immediately. Refile it on the list */ 85 /* Erase failed immediately. Refile it on the list */
87 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret)); 86 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret));
88 spin_lock(&c->erase_completion_lock); 87 spin_lock(&c->erase_completion_lock);
89 list_del(&jeb->list); 88 list_move(&jeb->list, &c->erase_pending_list);
90 list_add(&jeb->list, &c->erase_pending_list);
91 c->erasing_size -= c->sector_size; 89 c->erasing_size -= c->sector_size;
92 c->dirty_size += c->sector_size; 90 c->dirty_size += c->sector_size;
93 jeb->dirty_size = c->sector_size; 91 jeb->dirty_size = c->sector_size;
@@ -161,8 +159,7 @@ static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblo
161{ 159{
162 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset)); 160 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset));
163 spin_lock(&c->erase_completion_lock); 161 spin_lock(&c->erase_completion_lock);
164 list_del(&jeb->list); 162 list_move_tail(&jeb->list, &c->erase_complete_list);
165 list_add_tail(&jeb->list, &c->erase_complete_list);
166 spin_unlock(&c->erase_completion_lock); 163 spin_unlock(&c->erase_completion_lock);
167 /* Ensure that kupdated calls us again to mark them clean */ 164 /* Ensure that kupdated calls us again to mark them clean */
168 jffs2_erase_pending_trigger(c); 165 jffs2_erase_pending_trigger(c);
@@ -178,8 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
178 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 175 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
179 /* We'd like to give this block another try. */ 176 /* We'd like to give this block another try. */
180 spin_lock(&c->erase_completion_lock); 177 spin_lock(&c->erase_completion_lock);
181 list_del(&jeb->list); 178 list_move(&jeb->list, &c->erase_pending_list);
182 list_add(&jeb->list, &c->erase_pending_list);
183 c->erasing_size -= c->sector_size; 179 c->erasing_size -= c->sector_size;
184 c->dirty_size += c->sector_size; 180 c->dirty_size += c->sector_size;
185 jeb->dirty_size = c->sector_size; 181 jeb->dirty_size = c->sector_size;
@@ -191,8 +187,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
191 spin_lock(&c->erase_completion_lock); 187 spin_lock(&c->erase_completion_lock);
192 c->erasing_size -= c->sector_size; 188 c->erasing_size -= c->sector_size;
193 c->bad_size += c->sector_size; 189 c->bad_size += c->sector_size;
194 list_del(&jeb->list); 190 list_move(&jeb->list, &c->bad_list);
195 list_add(&jeb->list, &c->bad_list);
196 c->nr_erasing_blocks--; 191 c->nr_erasing_blocks--;
197 spin_unlock(&c->erase_completion_lock); 192 spin_unlock(&c->erase_completion_lock);
198 wake_up(&c->erase_wait); 193 wake_up(&c->erase_wait);
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 8bedfd2ff689..ac0c350ed7d7 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -211,8 +211,7 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
211 struct jffs2_eraseblock *ejeb; 211 struct jffs2_eraseblock *ejeb;
212 212
213 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list); 213 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list);
214 list_del(&ejeb->list); 214 list_move_tail(&ejeb->list, &c->erase_pending_list);
215 list_add_tail(&ejeb->list, &c->erase_pending_list);
216 c->nr_erasing_blocks++; 215 c->nr_erasing_blocks++;
217 jffs2_erase_pending_trigger(c); 216 jffs2_erase_pending_trigger(c);
218 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n", 217 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n",
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a7f153f79ecb..b9b700730dfe 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -495,8 +495,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
495 /* Fix up the original jeb now it's on the bad_list */ 495 /* Fix up the original jeb now it's on the bad_list */
496 if (first_raw == jeb->first_node) { 496 if (first_raw == jeb->first_node) {
497 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); 497 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset));
498 list_del(&jeb->list); 498 list_move(&jeb->list, &c->erase_pending_list);
499 list_add(&jeb->list, &c->erase_pending_list);
500 c->nr_erasing_blocks++; 499 c->nr_erasing_blocks++;
501 jffs2_erase_pending_trigger(c); 500 jffs2_erase_pending_trigger(c);
502 } 501 }
diff --git a/fs/libfs.c b/fs/libfs.c
index fc785d8befb9..ac02ea602c3d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -149,10 +149,9 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
149 /* fallthrough */ 149 /* fallthrough */
150 default: 150 default:
151 spin_lock(&dcache_lock); 151 spin_lock(&dcache_lock);
152 if (filp->f_pos == 2) { 152 if (filp->f_pos == 2)
153 list_del(q); 153 list_move(q, &dentry->d_subdirs);
154 list_add(q, &dentry->d_subdirs); 154
155 }
156 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 155 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
157 struct dentry *next; 156 struct dentry *next;
158 next = list_entry(p, struct dentry, d_u.d_child); 157 next = list_entry(p, struct dentry, d_u.d_child);
@@ -164,8 +163,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
164 return 0; 163 return 0;
165 spin_lock(&dcache_lock); 164 spin_lock(&dcache_lock);
166 /* next is still alive */ 165 /* next is still alive */
167 list_del(q); 166 list_move(q, p);
168 list_add(q, p);
169 p = q; 167 p = q;
170 filp->f_pos++; 168 filp->f_pos++;
171 } 169 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 866430bb024d..b3ed212ea416 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -526,10 +526,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
526{ 526{
527 struct vfsmount *p; 527 struct vfsmount *p;
528 528
529 for (p = mnt; p; p = next_mnt(p, mnt)) { 529 for (p = mnt; p; p = next_mnt(p, mnt))
530 list_del(&p->mnt_hash); 530 list_move(&p->mnt_hash, kill);
531 list_add(&p->mnt_hash, kill);
532 }
533 531
534 if (propagate) 532 if (propagate)
535 propagate_umount(kill); 533 propagate_umount(kill);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 96c7578cbe1e..1630b5670dc2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -529,8 +529,7 @@ move_to_confirmed(struct nfs4_client *clp)
529 529
530 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); 530 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
531 list_del_init(&clp->cl_strhash); 531 list_del_init(&clp->cl_strhash);
532 list_del_init(&clp->cl_idhash); 532 list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
533 list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
534 strhashval = clientstr_hashval(clp->cl_recdir); 533 strhashval = clientstr_hashval(clp->cl_recdir);
535 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); 534 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
536 renew_client(clp); 535 renew_client(clp);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index d852ebb538e3..fdf7cf3dfadc 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -103,8 +103,7 @@ nfsd_cache_shutdown(void)
103static void 103static void
104lru_put_end(struct svc_cacherep *rp) 104lru_put_end(struct svc_cacherep *rp)
105{ 105{
106 list_del(&rp->c_lru); 106 list_move_tail(&rp->c_lru, &lru_head);
107 list_add_tail(&rp->c_lru, &lru_head);
108} 107}
109 108
110/* 109/*
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 355593dd8ef8..87ee29cad50b 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -381,8 +381,7 @@ do_ast:
381 ret = DLM_NORMAL; 381 ret = DLM_NORMAL;
382 if (past->type == DLM_AST) { 382 if (past->type == DLM_AST) {
383 /* do not alter lock refcount. switching lists. */ 383 /* do not alter lock refcount. switching lists. */
384 list_del_init(&lock->list); 384 list_move_tail(&lock->list, &res->granted);
385 list_add_tail(&lock->list, &res->granted);
386 mlog(0, "ast: adding to granted list... type=%d, " 385 mlog(0, "ast: adding to granted list... type=%d, "
387 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); 386 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
388 if (lock->ml.convert_type != LKM_IVMODE) { 387 if (lock->ml.convert_type != LKM_IVMODE) {
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 8285228d9e37..70888b31e751 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -231,8 +231,7 @@ switch_queues:
231 231
232 lock->ml.convert_type = type; 232 lock->ml.convert_type = type;
233 /* do not alter lock refcount. switching lists. */ 233 /* do not alter lock refcount. switching lists. */
234 list_del_init(&lock->list); 234 list_move_tail(&lock->list, &res->converting);
235 list_add_tail(&lock->list, &res->converting);
236 235
237unlock_exit: 236unlock_exit:
238 spin_unlock(&lock->spinlock); 237 spin_unlock(&lock->spinlock);
@@ -248,8 +247,7 @@ void dlm_revert_pending_convert(struct dlm_lock_resource *res,
248 struct dlm_lock *lock) 247 struct dlm_lock *lock)
249{ 248{
250 /* do not alter lock refcount. switching lists. */ 249 /* do not alter lock refcount. switching lists. */
251 list_del_init(&lock->list); 250 list_move_tail(&lock->list, &res->granted);
252 list_add_tail(&lock->list, &res->granted);
253 lock->ml.convert_type = LKM_IVMODE; 251 lock->ml.convert_type = LKM_IVMODE;
254 lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); 252 lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB);
255} 253}
@@ -294,8 +292,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
294 res->state |= DLM_LOCK_RES_IN_PROGRESS; 292 res->state |= DLM_LOCK_RES_IN_PROGRESS;
295 /* move lock to local convert queue */ 293 /* move lock to local convert queue */
296 /* do not alter lock refcount. switching lists. */ 294 /* do not alter lock refcount. switching lists. */
297 list_del_init(&lock->list); 295 list_move_tail(&lock->list, &res->converting);
298 list_add_tail(&lock->list, &res->converting);
299 lock->convert_pending = 1; 296 lock->convert_pending = 1;
300 lock->ml.convert_type = type; 297 lock->ml.convert_type = type;
301 298
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 6fea28318d6d..55cda25ae11b 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -239,8 +239,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
239 mlog(0, "%s: $RECOVERY lock for this node (%u) is " 239 mlog(0, "%s: $RECOVERY lock for this node (%u) is "
240 "mastered by %u; got lock, manually granting (no ast)\n", 240 "mastered by %u; got lock, manually granting (no ast)\n",
241 dlm->name, dlm->node_num, res->owner); 241 dlm->name, dlm->node_num, res->owner);
242 list_del_init(&lock->list); 242 list_move_tail(&lock->list, &res->granted);
243 list_add_tail(&lock->list, &res->granted);
244 } 243 }
245 spin_unlock(&res->spinlock); 244 spin_unlock(&res->spinlock);
246 245
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 805cbabac051..9962190e7416 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -905,13 +905,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
905 mlog(0, "found lockres owned by dead node while " 905 mlog(0, "found lockres owned by dead node while "
906 "doing recovery for node %u. sending it.\n", 906 "doing recovery for node %u. sending it.\n",
907 dead_node); 907 dead_node);
908 list_del_init(&res->recovering); 908 list_move_tail(&res->recovering, list);
909 list_add_tail(&res->recovering, list);
910 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 909 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
911 mlog(0, "found UNKNOWN owner while doing recovery " 910 mlog(0, "found UNKNOWN owner while doing recovery "
912 "for node %u. sending it.\n", dead_node); 911 "for node %u. sending it.\n", dead_node);
913 list_del_init(&res->recovering); 912 list_move_tail(&res->recovering, list);
914 list_add_tail(&res->recovering, list);
915 } 913 }
916 } 914 }
917 spin_unlock(&dlm->spinlock); 915 spin_unlock(&dlm->spinlock);
@@ -1529,8 +1527,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1529 1527
1530 /* move the lock to its proper place */ 1528 /* move the lock to its proper place */
1531 /* do not alter lock refcount. switching lists. */ 1529 /* do not alter lock refcount. switching lists. */
1532 list_del_init(&lock->list); 1530 list_move_tail(&lock->list, queue);
1533 list_add_tail(&lock->list, queue);
1534 spin_unlock(&res->spinlock); 1531 spin_unlock(&res->spinlock);
1535 1532
1536 mlog(0, "just reordered a local lock!\n"); 1533 mlog(0, "just reordered a local lock!\n");
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 5be9d14f12cb..44d3b57ae8a8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -318,8 +318,7 @@ converting:
318 318
319 target->ml.type = target->ml.convert_type; 319 target->ml.type = target->ml.convert_type;
320 target->ml.convert_type = LKM_IVMODE; 320 target->ml.convert_type = LKM_IVMODE;
321 list_del_init(&target->list); 321 list_move_tail(&target->list, &res->granted);
322 list_add_tail(&target->list, &res->granted);
323 322
324 BUG_ON(!target->lksb); 323 BUG_ON(!target->lksb);
325 target->lksb->status = DLM_NORMAL; 324 target->lksb->status = DLM_NORMAL;
@@ -380,8 +379,7 @@ blocked:
380 target->ml.type, target->ml.node); 379 target->ml.type, target->ml.node);
381 380
382 // target->ml.type is already correct 381 // target->ml.type is already correct
383 list_del_init(&target->list); 382 list_move_tail(&target->list, &res->granted);
384 list_add_tail(&target->list, &res->granted);
385 383
386 BUG_ON(!target->lksb); 384 BUG_ON(!target->lksb);
387 target->lksb->status = DLM_NORMAL; 385 target->lksb->status = DLM_NORMAL;
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 7b1a27542674..ac89c509daf9 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -271,8 +271,7 @@ void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
271void dlm_commit_pending_cancel(struct dlm_lock_resource *res, 271void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
272 struct dlm_lock *lock) 272 struct dlm_lock *lock)
273{ 273{
274 list_del_init(&lock->list); 274 list_move_tail(&lock->list, &res->granted);
275 list_add_tail(&lock->list, &res->granted);
276 lock->ml.convert_type = LKM_IVMODE; 275 lock->ml.convert_type = LKM_IVMODE;
277} 276}
278 277
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index eebc3cfa6be8..3fe8781c22cb 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -222,8 +222,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
222 BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list)); 222 BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list));
223 223
224 OCFS2_I(inode)->ip_handle = handle; 224 OCFS2_I(inode)->ip_handle = handle;
225 list_del(&(OCFS2_I(inode)->ip_handle_list)); 225 list_move_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
226 list_add_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
227} 226}
228 227
229static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle) 228static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle)
diff --git a/fs/pnode.c b/fs/pnode.c
index 37b568ed0e05..da42ee61c1df 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -53,8 +53,7 @@ static int do_make_slave(struct vfsmount *mnt)
53 if (master) { 53 if (master) {
54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) 54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
55 slave_mnt->mnt_master = master; 55 slave_mnt->mnt_master = master;
56 list_del(&mnt->mnt_slave); 56 list_move(&mnt->mnt_slave, &master->mnt_slave_list);
57 list_add(&mnt->mnt_slave, &master->mnt_slave_list);
58 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); 57 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
59 INIT_LIST_HEAD(&mnt->mnt_slave_list); 58 INIT_LIST_HEAD(&mnt->mnt_slave_list);
60 } else { 59 } else {
@@ -283,10 +282,8 @@ static void __propagate_umount(struct vfsmount *mnt)
283 * umount the child only if the child has no 282 * umount the child only if the child has no
284 * other children 283 * other children
285 */ 284 */
286 if (child && list_empty(&child->mnt_mounts)) { 285 if (child && list_empty(&child->mnt_mounts))
287 list_del(&child->mnt_hash); 286 list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
288 list_add_tail(&child->mnt_hash, &mnt->mnt_hash);
289 }
290 } 287 }
291} 288}
292 289
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6afff725a8c9..6ba7785319de 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -74,6 +74,16 @@
74#include <linux/poll.h> 74#include <linux/poll.h>
75#include "internal.h" 75#include "internal.h"
76 76
77/* NOTE:
78 * Implementing inode permission operations in /proc is almost
79 * certainly an error. Permission checks need to happen during
80 * each system call not at open time. The reason is that most of
81 * what we wish to check for permissions in /proc varies at runtime.
82 *
83 * The classic example of a problem is opening file descriptors
84 * in /proc for a task before it execs a suid executable.
85 */
86
77/* 87/*
78 * For hysterical raisins we keep the same inumbers as in the old procfs. 88 * For hysterical raisins we keep the same inumbers as in the old procfs.
79 * Feel free to change the macro below - just keep the range distinct from 89 * Feel free to change the macro below - just keep the range distinct from
@@ -121,6 +131,8 @@ enum pid_directory_inos {
121 PROC_TGID_ATTR_PREV, 131 PROC_TGID_ATTR_PREV,
122 PROC_TGID_ATTR_EXEC, 132 PROC_TGID_ATTR_EXEC,
123 PROC_TGID_ATTR_FSCREATE, 133 PROC_TGID_ATTR_FSCREATE,
134 PROC_TGID_ATTR_KEYCREATE,
135 PROC_TGID_ATTR_SOCKCREATE,
124#endif 136#endif
125#ifdef CONFIG_AUDITSYSCALL 137#ifdef CONFIG_AUDITSYSCALL
126 PROC_TGID_LOGINUID, 138 PROC_TGID_LOGINUID,
@@ -162,6 +174,8 @@ enum pid_directory_inos {
162 PROC_TID_ATTR_PREV, 174 PROC_TID_ATTR_PREV,
163 PROC_TID_ATTR_EXEC, 175 PROC_TID_ATTR_EXEC,
164 PROC_TID_ATTR_FSCREATE, 176 PROC_TID_ATTR_FSCREATE,
177 PROC_TID_ATTR_KEYCREATE,
178 PROC_TID_ATTR_SOCKCREATE,
165#endif 179#endif
166#ifdef CONFIG_AUDITSYSCALL 180#ifdef CONFIG_AUDITSYSCALL
167 PROC_TID_LOGINUID, 181 PROC_TID_LOGINUID,
@@ -173,6 +187,9 @@ enum pid_directory_inos {
173 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 187 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
174}; 188};
175 189
190/* Worst case buffer size needed for holding an integer. */
191#define PROC_NUMBUF 10
192
176struct pid_entry { 193struct pid_entry {
177 int type; 194 int type;
178 int len; 195 int len;
@@ -275,6 +292,8 @@ static struct pid_entry tgid_attr_stuff[] = {
275 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 292 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
276 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 293 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
277 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 294 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
295 E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
296 E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
278 {0,0,NULL,0} 297 {0,0,NULL,0}
279}; 298};
280static struct pid_entry tid_attr_stuff[] = { 299static struct pid_entry tid_attr_stuff[] = {
@@ -282,6 +301,8 @@ static struct pid_entry tid_attr_stuff[] = {
282 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 301 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
283 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 302 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
284 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 303 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
304 E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
305 E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
285 {0,0,NULL,0} 306 {0,0,NULL,0}
286}; 307};
287#endif 308#endif
@@ -290,12 +311,15 @@ static struct pid_entry tid_attr_stuff[] = {
290 311
291static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 312static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
292{ 313{
293 struct task_struct *task = proc_task(inode); 314 struct task_struct *task = get_proc_task(inode);
294 struct files_struct *files; 315 struct files_struct *files = NULL;
295 struct file *file; 316 struct file *file;
296 int fd = proc_type(inode) - PROC_TID_FD_DIR; 317 int fd = proc_fd(inode);
297 318
298 files = get_files_struct(task); 319 if (task) {
320 files = get_files_struct(task);
321 put_task_struct(task);
322 }
299 if (files) { 323 if (files) {
300 /* 324 /*
301 * We are not taking a ref to the file structure, so we must 325 * We are not taking a ref to the file structure, so we must
@@ -327,29 +351,33 @@ static struct fs_struct *get_fs_struct(struct task_struct *task)
327 return fs; 351 return fs;
328} 352}
329 353
330static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 354static int get_nr_threads(struct task_struct *tsk)
331{ 355{
332 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 356 /* Must be called with the rcu_read_lock held */
333 int result = -ENOENT; 357 unsigned long flags;
334 if (fs) { 358 int count = 0;
335 read_lock(&fs->lock); 359
336 *mnt = mntget(fs->pwdmnt); 360 if (lock_task_sighand(tsk, &flags)) {
337 *dentry = dget(fs->pwd); 361 count = atomic_read(&tsk->signal->count);
338 read_unlock(&fs->lock); 362 unlock_task_sighand(tsk, &flags);
339 result = 0;
340 put_fs_struct(fs);
341 } 363 }
342 return result; 364 return count;
343} 365}
344 366
345static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 367static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
346{ 368{
347 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 369 struct task_struct *task = get_proc_task(inode);
370 struct fs_struct *fs = NULL;
348 int result = -ENOENT; 371 int result = -ENOENT;
372
373 if (task) {
374 fs = get_fs_struct(task);
375 put_task_struct(task);
376 }
349 if (fs) { 377 if (fs) {
350 read_lock(&fs->lock); 378 read_lock(&fs->lock);
351 *mnt = mntget(fs->rootmnt); 379 *mnt = mntget(fs->pwdmnt);
352 *dentry = dget(fs->root); 380 *dentry = dget(fs->pwd);
353 read_unlock(&fs->lock); 381 read_unlock(&fs->lock);
354 result = 0; 382 result = 0;
355 put_fs_struct(fs); 383 put_fs_struct(fs);
@@ -357,42 +385,16 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
357 return result; 385 return result;
358} 386}
359 387
360 388static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
361/* Same as proc_root_link, but this addionally tries to get fs from other
362 * threads in the group */
363static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
364 struct vfsmount **mnt)
365{ 389{
366 struct fs_struct *fs; 390 struct task_struct *task = get_proc_task(inode);
391 struct fs_struct *fs = NULL;
367 int result = -ENOENT; 392 int result = -ENOENT;
368 struct task_struct *leader = proc_task(inode);
369 393
370 task_lock(leader); 394 if (task) {
371 fs = leader->fs; 395 fs = get_fs_struct(task);
372 if (fs) { 396 put_task_struct(task);
373 atomic_inc(&fs->count);
374 task_unlock(leader);
375 } else {
376 /* Try to get fs from other threads */
377 task_unlock(leader);
378 read_lock(&tasklist_lock);
379 if (pid_alive(leader)) {
380 struct task_struct *task = leader;
381
382 while ((task = next_thread(task)) != leader) {
383 task_lock(task);
384 fs = task->fs;
385 if (fs) {
386 atomic_inc(&fs->count);
387 task_unlock(task);
388 break;
389 }
390 task_unlock(task);
391 }
392 }
393 read_unlock(&tasklist_lock);
394 } 397 }
395
396 if (fs) { 398 if (fs) {
397 read_lock(&fs->lock); 399 read_lock(&fs->lock);
398 *mnt = mntget(fs->rootmnt); 400 *mnt = mntget(fs->rootmnt);
@@ -404,7 +406,6 @@ static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
404 return result; 406 return result;
405} 407}
406 408
407
408#define MAY_PTRACE(task) \ 409#define MAY_PTRACE(task) \
409 (task == current || \ 410 (task == current || \
410 (task->parent == current && \ 411 (task->parent == current && \
@@ -535,142 +536,22 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
535/************************************************************************/ 536/************************************************************************/
536 537
537/* permission checks */ 538/* permission checks */
538 539static int proc_fd_access_allowed(struct inode *inode)
539/* If the process being read is separated by chroot from the reading process,
540 * don't let the reader access the threads.
541 *
542 * note: this does dput(root) and mntput(vfsmnt) on exit.
543 */
544static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
545{
546 struct dentry *de, *base;
547 struct vfsmount *our_vfsmnt, *mnt;
548 int res = 0;
549
550 read_lock(&current->fs->lock);
551 our_vfsmnt = mntget(current->fs->rootmnt);
552 base = dget(current->fs->root);
553 read_unlock(&current->fs->lock);
554
555 spin_lock(&vfsmount_lock);
556 de = root;
557 mnt = vfsmnt;
558
559 while (mnt != our_vfsmnt) {
560 if (mnt == mnt->mnt_parent)
561 goto out;
562 de = mnt->mnt_mountpoint;
563 mnt = mnt->mnt_parent;
564 }
565
566 if (!is_subdir(de, base))
567 goto out;
568 spin_unlock(&vfsmount_lock);
569
570exit:
571 dput(base);
572 mntput(our_vfsmnt);
573 dput(root);
574 mntput(vfsmnt);
575 return res;
576out:
577 spin_unlock(&vfsmount_lock);
578 res = -EACCES;
579 goto exit;
580}
581
582static int proc_check_root(struct inode *inode)
583{
584 struct dentry *root;
585 struct vfsmount *vfsmnt;
586
587 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
588 return -ENOENT;
589 return proc_check_chroot(root, vfsmnt);
590}
591
592static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
593{
594 if (generic_permission(inode, mask, NULL) != 0)
595 return -EACCES;
596 return proc_check_root(inode);
597}
598
599static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
600{
601 struct dentry *root;
602 struct vfsmount *vfsmnt;
603
604 if (generic_permission(inode, mask, NULL) != 0)
605 return -EACCES;
606
607 if (proc_task_root_link(inode, &root, &vfsmnt))
608 return -ENOENT;
609
610 return proc_check_chroot(root, vfsmnt);
611}
612
613extern struct seq_operations proc_pid_maps_op;
614static int maps_open(struct inode *inode, struct file *file)
615{
616 struct task_struct *task = proc_task(inode);
617 int ret = seq_open(file, &proc_pid_maps_op);
618 if (!ret) {
619 struct seq_file *m = file->private_data;
620 m->private = task;
621 }
622 return ret;
623}
624
625static struct file_operations proc_maps_operations = {
626 .open = maps_open,
627 .read = seq_read,
628 .llseek = seq_lseek,
629 .release = seq_release,
630};
631
632#ifdef CONFIG_NUMA
633extern struct seq_operations proc_pid_numa_maps_op;
634static int numa_maps_open(struct inode *inode, struct file *file)
635{
636 struct task_struct *task = proc_task(inode);
637 int ret = seq_open(file, &proc_pid_numa_maps_op);
638 if (!ret) {
639 struct seq_file *m = file->private_data;
640 m->private = task;
641 }
642 return ret;
643}
644
645static struct file_operations proc_numa_maps_operations = {
646 .open = numa_maps_open,
647 .read = seq_read,
648 .llseek = seq_lseek,
649 .release = seq_release,
650};
651#endif
652
653#ifdef CONFIG_MMU
654extern struct seq_operations proc_pid_smaps_op;
655static int smaps_open(struct inode *inode, struct file *file)
656{ 540{
657 struct task_struct *task = proc_task(inode); 541 struct task_struct *task;
658 int ret = seq_open(file, &proc_pid_smaps_op); 542 int allowed = 0;
659 if (!ret) { 543 /* Allow access to a task's file descriptors if it is us or we
660 struct seq_file *m = file->private_data; 544 * may use ptrace attach to the process and find out that
661 m->private = task; 545 * information.
546 */
547 task = get_proc_task(inode);
548 if (task) {
549 allowed = ptrace_may_attach(task);
550 put_task_struct(task);
662 } 551 }
663 return ret; 552 return allowed;
664} 553}
665 554
666static struct file_operations proc_smaps_operations = {
667 .open = smaps_open,
668 .read = seq_read,
669 .llseek = seq_lseek,
670 .release = seq_release,
671};
672#endif
673
674extern struct seq_operations mounts_op; 555extern struct seq_operations mounts_op;
675struct proc_mounts { 556struct proc_mounts {
676 struct seq_file m; 557 struct seq_file m;
@@ -679,16 +560,19 @@ struct proc_mounts {
679 560
680static int mounts_open(struct inode *inode, struct file *file) 561static int mounts_open(struct inode *inode, struct file *file)
681{ 562{
682 struct task_struct *task = proc_task(inode); 563 struct task_struct *task = get_proc_task(inode);
683 struct namespace *namespace; 564 struct namespace *namespace = NULL;
684 struct proc_mounts *p; 565 struct proc_mounts *p;
685 int ret = -EINVAL; 566 int ret = -EINVAL;
686 567
687 task_lock(task); 568 if (task) {
688 namespace = task->namespace; 569 task_lock(task);
689 if (namespace) 570 namespace = task->namespace;
690 get_namespace(namespace); 571 if (namespace)
691 task_unlock(task); 572 get_namespace(namespace);
573 task_unlock(task);
574 put_task_struct(task);
575 }
692 576
693 if (namespace) { 577 if (namespace) {
694 ret = -ENOMEM; 578 ret = -ENOMEM;
@@ -745,17 +629,21 @@ static struct file_operations proc_mounts_operations = {
745extern struct seq_operations mountstats_op; 629extern struct seq_operations mountstats_op;
746static int mountstats_open(struct inode *inode, struct file *file) 630static int mountstats_open(struct inode *inode, struct file *file)
747{ 631{
748 struct task_struct *task = proc_task(inode);
749 int ret = seq_open(file, &mountstats_op); 632 int ret = seq_open(file, &mountstats_op);
750 633
751 if (!ret) { 634 if (!ret) {
752 struct seq_file *m = file->private_data; 635 struct seq_file *m = file->private_data;
753 struct namespace *namespace; 636 struct namespace *namespace = NULL;
754 task_lock(task); 637 struct task_struct *task = get_proc_task(inode);
755 namespace = task->namespace; 638
756 if (namespace) 639 if (task) {
757 get_namespace(namespace); 640 task_lock(task);
758 task_unlock(task); 641 namespace = task->namespace;
642 if (namespace)
643 get_namespace(namespace);
644 task_unlock(task);
645 put_task_struct(task);
646 }
759 647
760 if (namespace) 648 if (namespace)
761 m->private = namespace; 649 m->private = namespace;
@@ -782,18 +670,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf,
782 struct inode * inode = file->f_dentry->d_inode; 670 struct inode * inode = file->f_dentry->d_inode;
783 unsigned long page; 671 unsigned long page;
784 ssize_t length; 672 ssize_t length;
785 struct task_struct *task = proc_task(inode); 673 struct task_struct *task = get_proc_task(inode);
674
675 length = -ESRCH;
676 if (!task)
677 goto out_no_task;
786 678
787 if (count > PROC_BLOCK_SIZE) 679 if (count > PROC_BLOCK_SIZE)
788 count = PROC_BLOCK_SIZE; 680 count = PROC_BLOCK_SIZE;
681
682 length = -ENOMEM;
789 if (!(page = __get_free_page(GFP_KERNEL))) 683 if (!(page = __get_free_page(GFP_KERNEL)))
790 return -ENOMEM; 684 goto out;
791 685
792 length = PROC_I(inode)->op.proc_read(task, (char*)page); 686 length = PROC_I(inode)->op.proc_read(task, (char*)page);
793 687
794 if (length >= 0) 688 if (length >= 0)
795 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 689 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
796 free_page(page); 690 free_page(page);
691out:
692 put_task_struct(task);
693out_no_task:
797 return length; 694 return length;
798} 695}
799 696
@@ -810,12 +707,15 @@ static int mem_open(struct inode* inode, struct file* file)
810static ssize_t mem_read(struct file * file, char __user * buf, 707static ssize_t mem_read(struct file * file, char __user * buf,
811 size_t count, loff_t *ppos) 708 size_t count, loff_t *ppos)
812{ 709{
813 struct task_struct *task = proc_task(file->f_dentry->d_inode); 710 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
814 char *page; 711 char *page;
815 unsigned long src = *ppos; 712 unsigned long src = *ppos;
816 int ret = -ESRCH; 713 int ret = -ESRCH;
817 struct mm_struct *mm; 714 struct mm_struct *mm;
818 715
716 if (!task)
717 goto out_no_task;
718
819 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 719 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
820 goto out; 720 goto out;
821 721
@@ -865,6 +765,8 @@ out_put:
865out_free: 765out_free:
866 free_page((unsigned long) page); 766 free_page((unsigned long) page);
867out: 767out:
768 put_task_struct(task);
769out_no_task:
868 return ret; 770 return ret;
869} 771}
870 772
@@ -877,15 +779,20 @@ static ssize_t mem_write(struct file * file, const char * buf,
877{ 779{
878 int copied = 0; 780 int copied = 0;
879 char *page; 781 char *page;
880 struct task_struct *task = proc_task(file->f_dentry->d_inode); 782 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
881 unsigned long dst = *ppos; 783 unsigned long dst = *ppos;
882 784
785 copied = -ESRCH;
786 if (!task)
787 goto out_no_task;
788
883 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 789 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
884 return -ESRCH; 790 goto out;
885 791
792 copied = -ENOMEM;
886 page = (char *)__get_free_page(GFP_USER); 793 page = (char *)__get_free_page(GFP_USER);
887 if (!page) 794 if (!page)
888 return -ENOMEM; 795 goto out;
889 796
890 while (count > 0) { 797 while (count > 0) {
891 int this_len, retval; 798 int this_len, retval;
@@ -908,6 +815,9 @@ static ssize_t mem_write(struct file * file, const char * buf,
908 } 815 }
909 *ppos = dst; 816 *ppos = dst;
910 free_page((unsigned long) page); 817 free_page((unsigned long) page);
818out:
819 put_task_struct(task);
820out_no_task:
911 return copied; 821 return copied;
912} 822}
913#endif 823#endif
@@ -938,13 +848,18 @@ static struct file_operations proc_mem_operations = {
938static ssize_t oom_adjust_read(struct file *file, char __user *buf, 848static ssize_t oom_adjust_read(struct file *file, char __user *buf,
939 size_t count, loff_t *ppos) 849 size_t count, loff_t *ppos)
940{ 850{
941 struct task_struct *task = proc_task(file->f_dentry->d_inode); 851 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
942 char buffer[8]; 852 char buffer[PROC_NUMBUF];
943 size_t len; 853 size_t len;
944 int oom_adjust = task->oomkilladj; 854 int oom_adjust;
945 loff_t __ppos = *ppos; 855 loff_t __ppos = *ppos;
946 856
947 len = sprintf(buffer, "%i\n", oom_adjust); 857 if (!task)
858 return -ESRCH;
859 oom_adjust = task->oomkilladj;
860 put_task_struct(task);
861
862 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
948 if (__ppos >= len) 863 if (__ppos >= len)
949 return 0; 864 return 0;
950 if (count > len-__ppos) 865 if (count > len-__ppos)
@@ -958,15 +873,15 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
958static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 873static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
959 size_t count, loff_t *ppos) 874 size_t count, loff_t *ppos)
960{ 875{
961 struct task_struct *task = proc_task(file->f_dentry->d_inode); 876 struct task_struct *task;
962 char buffer[8], *end; 877 char buffer[PROC_NUMBUF], *end;
963 int oom_adjust; 878 int oom_adjust;
964 879
965 if (!capable(CAP_SYS_RESOURCE)) 880 if (!capable(CAP_SYS_RESOURCE))
966 return -EPERM; 881 return -EPERM;
967 memset(buffer, 0, 8); 882 memset(buffer, 0, sizeof(buffer));
968 if (count > 6) 883 if (count > sizeof(buffer) - 1)
969 count = 6; 884 count = sizeof(buffer) - 1;
970 if (copy_from_user(buffer, buf, count)) 885 if (copy_from_user(buffer, buf, count))
971 return -EFAULT; 886 return -EFAULT;
972 oom_adjust = simple_strtol(buffer, &end, 0); 887 oom_adjust = simple_strtol(buffer, &end, 0);
@@ -974,7 +889,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
974 return -EINVAL; 889 return -EINVAL;
975 if (*end == '\n') 890 if (*end == '\n')
976 end++; 891 end++;
892 task = get_proc_task(file->f_dentry->d_inode);
893 if (!task)
894 return -ESRCH;
977 task->oomkilladj = oom_adjust; 895 task->oomkilladj = oom_adjust;
896 put_task_struct(task);
978 if (end - buffer == 0) 897 if (end - buffer == 0)
979 return -EIO; 898 return -EIO;
980 return end - buffer; 899 return end - buffer;
@@ -985,22 +904,21 @@ static struct file_operations proc_oom_adjust_operations = {
985 .write = oom_adjust_write, 904 .write = oom_adjust_write,
986}; 905};
987 906
988static struct inode_operations proc_mem_inode_operations = {
989 .permission = proc_permission,
990};
991
992#ifdef CONFIG_AUDITSYSCALL 907#ifdef CONFIG_AUDITSYSCALL
993#define TMPBUFLEN 21 908#define TMPBUFLEN 21
994static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 909static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
995 size_t count, loff_t *ppos) 910 size_t count, loff_t *ppos)
996{ 911{
997 struct inode * inode = file->f_dentry->d_inode; 912 struct inode * inode = file->f_dentry->d_inode;
998 struct task_struct *task = proc_task(inode); 913 struct task_struct *task = get_proc_task(inode);
999 ssize_t length; 914 ssize_t length;
1000 char tmpbuf[TMPBUFLEN]; 915 char tmpbuf[TMPBUFLEN];
1001 916
917 if (!task)
918 return -ESRCH;
1002 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 919 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1003 audit_get_loginuid(task->audit_context)); 920 audit_get_loginuid(task->audit_context));
921 put_task_struct(task);
1004 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 922 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1005} 923}
1006 924
@@ -1010,13 +928,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1010 struct inode * inode = file->f_dentry->d_inode; 928 struct inode * inode = file->f_dentry->d_inode;
1011 char *page, *tmp; 929 char *page, *tmp;
1012 ssize_t length; 930 ssize_t length;
1013 struct task_struct *task = proc_task(inode);
1014 uid_t loginuid; 931 uid_t loginuid;
1015 932
1016 if (!capable(CAP_AUDIT_CONTROL)) 933 if (!capable(CAP_AUDIT_CONTROL))
1017 return -EPERM; 934 return -EPERM;
1018 935
1019 if (current != task) 936 if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
1020 return -EPERM; 937 return -EPERM;
1021 938
1022 if (count >= PAGE_SIZE) 939 if (count >= PAGE_SIZE)
@@ -1040,7 +957,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1040 goto out_free_page; 957 goto out_free_page;
1041 958
1042 } 959 }
1043 length = audit_set_loginuid(task, loginuid); 960 length = audit_set_loginuid(current, loginuid);
1044 if (likely(length == 0)) 961 if (likely(length == 0))
1045 length = count; 962 length = count;
1046 963
@@ -1059,13 +976,16 @@ static struct file_operations proc_loginuid_operations = {
1059static ssize_t seccomp_read(struct file *file, char __user *buf, 976static ssize_t seccomp_read(struct file *file, char __user *buf,
1060 size_t count, loff_t *ppos) 977 size_t count, loff_t *ppos)
1061{ 978{
1062 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 979 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
1063 char __buf[20]; 980 char __buf[20];
1064 loff_t __ppos = *ppos; 981 loff_t __ppos = *ppos;
1065 size_t len; 982 size_t len;
1066 983
984 if (!tsk)
985 return -ESRCH;
1067 /* no need to print the trailing zero, so use only len */ 986 /* no need to print the trailing zero, so use only len */
1068 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 987 len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
988 put_task_struct(tsk);
1069 if (__ppos >= len) 989 if (__ppos >= len)
1070 return 0; 990 return 0;
1071 if (count > len - __ppos) 991 if (count > len - __ppos)
@@ -1079,29 +999,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
1079static ssize_t seccomp_write(struct file *file, const char __user *buf, 999static ssize_t seccomp_write(struct file *file, const char __user *buf,
1080 size_t count, loff_t *ppos) 1000 size_t count, loff_t *ppos)
1081{ 1001{
1082 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 1002 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
1083 char __buf[20], *end; 1003 char __buf[20], *end;
1084 unsigned int seccomp_mode; 1004 unsigned int seccomp_mode;
1005 ssize_t result;
1006
1007 result = -ESRCH;
1008 if (!tsk)
1009 goto out_no_task;
1085 1010
1086 /* can set it only once to be even more secure */ 1011 /* can set it only once to be even more secure */
1012 result = -EPERM;
1087 if (unlikely(tsk->seccomp.mode)) 1013 if (unlikely(tsk->seccomp.mode))
1088 return -EPERM; 1014 goto out;
1089 1015
1016 result = -EFAULT;
1090 memset(__buf, 0, sizeof(__buf)); 1017 memset(__buf, 0, sizeof(__buf));
1091 count = min(count, sizeof(__buf) - 1); 1018 count = min(count, sizeof(__buf) - 1);
1092 if (copy_from_user(__buf, buf, count)) 1019 if (copy_from_user(__buf, buf, count))
1093 return -EFAULT; 1020 goto out;
1021
1094 seccomp_mode = simple_strtoul(__buf, &end, 0); 1022 seccomp_mode = simple_strtoul(__buf, &end, 0);
1095 if (*end == '\n') 1023 if (*end == '\n')
1096 end++; 1024 end++;
1025 result = -EINVAL;
1097 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1026 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
1098 tsk->seccomp.mode = seccomp_mode; 1027 tsk->seccomp.mode = seccomp_mode;
1099 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1028 set_tsk_thread_flag(tsk, TIF_SECCOMP);
1100 } else 1029 } else
1101 return -EINVAL; 1030 goto out;
1031 result = -EIO;
1102 if (unlikely(!(end - __buf))) 1032 if (unlikely(!(end - __buf)))
1103 return -EIO; 1033 goto out;
1104 return end - __buf; 1034 result = end - __buf;
1035out:
1036 put_task_struct(tsk);
1037out_no_task:
1038 return result;
1105} 1039}
1106 1040
1107static struct file_operations proc_seccomp_operations = { 1041static struct file_operations proc_seccomp_operations = {
@@ -1118,10 +1052,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1118 /* We don't need a base pointer in the /proc filesystem */ 1052 /* We don't need a base pointer in the /proc filesystem */
1119 path_release(nd); 1053 path_release(nd);
1120 1054
1121 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1055 /* Are we allowed to snoop on the tasks file descriptors? */
1122 goto out; 1056 if (!proc_fd_access_allowed(inode))
1123 error = proc_check_root(inode);
1124 if (error)
1125 goto out; 1057 goto out;
1126 1058
1127 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1059 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
@@ -1163,12 +1095,8 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1163 struct dentry *de; 1095 struct dentry *de;
1164 struct vfsmount *mnt = NULL; 1096 struct vfsmount *mnt = NULL;
1165 1097
1166 lock_kernel(); 1098 /* Are we allowed to snoop on the tasks file descriptors? */
1167 1099 if (!proc_fd_access_allowed(inode))
1168 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
1169 goto out;
1170 error = proc_check_root(inode);
1171 if (error)
1172 goto out; 1100 goto out;
1173 1101
1174 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1102 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
@@ -1179,7 +1107,6 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1179 dput(de); 1107 dput(de);
1180 mntput(mnt); 1108 mntput(mnt);
1181out: 1109out:
1182 unlock_kernel();
1183 return error; 1110 return error;
1184} 1111}
1185 1112
@@ -1188,21 +1115,20 @@ static struct inode_operations proc_pid_link_inode_operations = {
1188 .follow_link = proc_pid_follow_link 1115 .follow_link = proc_pid_follow_link
1189}; 1116};
1190 1117
1191#define NUMBUF 10
1192
1193static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1118static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1194{ 1119{
1195 struct inode *inode = filp->f_dentry->d_inode; 1120 struct dentry *dentry = filp->f_dentry;
1196 struct task_struct *p = proc_task(inode); 1121 struct inode *inode = dentry->d_inode;
1122 struct task_struct *p = get_proc_task(inode);
1197 unsigned int fd, tid, ino; 1123 unsigned int fd, tid, ino;
1198 int retval; 1124 int retval;
1199 char buf[NUMBUF]; 1125 char buf[PROC_NUMBUF];
1200 struct files_struct * files; 1126 struct files_struct * files;
1201 struct fdtable *fdt; 1127 struct fdtable *fdt;
1202 1128
1203 retval = -ENOENT; 1129 retval = -ENOENT;
1204 if (!pid_alive(p)) 1130 if (!p)
1205 goto out; 1131 goto out_no_task;
1206 retval = 0; 1132 retval = 0;
1207 tid = p->pid; 1133 tid = p->pid;
1208 1134
@@ -1213,7 +1139,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1213 goto out; 1139 goto out;
1214 filp->f_pos++; 1140 filp->f_pos++;
1215 case 1: 1141 case 1:
1216 ino = fake_ino(tid, PROC_TID_INO); 1142 ino = parent_ino(dentry);
1217 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1143 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1218 goto out; 1144 goto out;
1219 filp->f_pos++; 1145 filp->f_pos++;
@@ -1232,7 +1158,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1232 continue; 1158 continue;
1233 rcu_read_unlock(); 1159 rcu_read_unlock();
1234 1160
1235 j = NUMBUF; 1161 j = PROC_NUMBUF;
1236 i = fd; 1162 i = fd;
1237 do { 1163 do {
1238 j--; 1164 j--;
@@ -1241,7 +1167,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1241 } while (i); 1167 } while (i);
1242 1168
1243 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1169 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1244 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1170 if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1245 rcu_read_lock(); 1171 rcu_read_lock();
1246 break; 1172 break;
1247 } 1173 }
@@ -1251,6 +1177,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1251 put_files_struct(files); 1177 put_files_struct(files);
1252 } 1178 }
1253out: 1179out:
1180 put_task_struct(p);
1181out_no_task:
1254 return retval; 1182 return retval;
1255} 1183}
1256 1184
@@ -1262,16 +1190,18 @@ static int proc_pident_readdir(struct file *filp,
1262 int pid; 1190 int pid;
1263 struct dentry *dentry = filp->f_dentry; 1191 struct dentry *dentry = filp->f_dentry;
1264 struct inode *inode = dentry->d_inode; 1192 struct inode *inode = dentry->d_inode;
1193 struct task_struct *task = get_proc_task(inode);
1265 struct pid_entry *p; 1194 struct pid_entry *p;
1266 ino_t ino; 1195 ino_t ino;
1267 int ret; 1196 int ret;
1268 1197
1269 ret = -ENOENT; 1198 ret = -ENOENT;
1270 if (!pid_alive(proc_task(inode))) 1199 if (!task)
1271 goto out; 1200 goto out;
1272 1201
1273 ret = 0; 1202 ret = 0;
1274 pid = proc_task(inode)->pid; 1203 pid = task->pid;
1204 put_task_struct(task);
1275 i = filp->f_pos; 1205 i = filp->f_pos;
1276 switch (i) { 1206 switch (i) {
1277 case 0: 1207 case 0:
@@ -1354,22 +1284,19 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1354 1284
1355 /* Common stuff */ 1285 /* Common stuff */
1356 ei = PROC_I(inode); 1286 ei = PROC_I(inode);
1357 ei->task = NULL;
1358 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1287 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1359 inode->i_ino = fake_ino(task->pid, ino); 1288 inode->i_ino = fake_ino(task->pid, ino);
1360 1289
1361 if (!pid_alive(task))
1362 goto out_unlock;
1363
1364 /* 1290 /*
1365 * grab the reference to task. 1291 * grab the reference to task.
1366 */ 1292 */
1367 get_task_struct(task); 1293 ei->pid = get_pid(task->pids[PIDTYPE_PID].pid);
1368 ei->task = task; 1294 if (!ei->pid)
1369 ei->type = ino; 1295 goto out_unlock;
1296
1370 inode->i_uid = 0; 1297 inode->i_uid = 0;
1371 inode->i_gid = 0; 1298 inode->i_gid = 0;
1372 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { 1299 if (task_dumpable(task)) {
1373 inode->i_uid = task->euid; 1300 inode->i_uid = task->euid;
1374 inode->i_gid = task->egid; 1301 inode->i_gid = task->egid;
1375 } 1302 }
@@ -1379,7 +1306,6 @@ out:
1379 return inode; 1306 return inode;
1380 1307
1381out_unlock: 1308out_unlock:
1382 ei->pde = NULL;
1383 iput(inode); 1309 iput(inode);
1384 return NULL; 1310 return NULL;
1385} 1311}
@@ -1393,13 +1319,21 @@ out_unlock:
1393 * 1319 *
1394 * Rewrite the inode's ownerships here because the owning task may have 1320 * Rewrite the inode's ownerships here because the owning task may have
1395 * performed a setuid(), etc. 1321 * performed a setuid(), etc.
1322 *
1323 * Before the /proc/pid/status file was created the only way to read
1324 * the effective uid of a /process was to stat /proc/pid. Reading
1325 * /proc/pid/status is slow enough that procps and other packages
1326 * kept stating /proc/pid. To keep the rules in /proc simple I have
1327 * made this apply to all per process world readable and executable
1328 * directories.
1396 */ 1329 */
1397static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1330static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1398{ 1331{
1399 struct inode *inode = dentry->d_inode; 1332 struct inode *inode = dentry->d_inode;
1400 struct task_struct *task = proc_task(inode); 1333 struct task_struct *task = get_proc_task(inode);
1401 if (pid_alive(task)) { 1334 if (task) {
1402 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { 1335 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1336 task_dumpable(task)) {
1403 inode->i_uid = task->euid; 1337 inode->i_uid = task->euid;
1404 inode->i_gid = task->egid; 1338 inode->i_gid = task->egid;
1405 } else { 1339 } else {
@@ -1407,59 +1341,75 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1407 inode->i_gid = 0; 1341 inode->i_gid = 0;
1408 } 1342 }
1409 security_task_to_inode(task, inode); 1343 security_task_to_inode(task, inode);
1344 put_task_struct(task);
1410 return 1; 1345 return 1;
1411 } 1346 }
1412 d_drop(dentry); 1347 d_drop(dentry);
1413 return 0; 1348 return 0;
1414} 1349}
1415 1350
1351static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1352{
1353 struct inode *inode = dentry->d_inode;
1354 struct task_struct *task;
1355 generic_fillattr(inode, stat);
1356
1357 rcu_read_lock();
1358 stat->uid = 0;
1359 stat->gid = 0;
1360 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1361 if (task) {
1362 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1363 task_dumpable(task)) {
1364 stat->uid = task->euid;
1365 stat->gid = task->egid;
1366 }
1367 }
1368 rcu_read_unlock();
1369 return 0;
1370}
1371
1416static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1372static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1417{ 1373{
1418 struct inode *inode = dentry->d_inode; 1374 struct inode *inode = dentry->d_inode;
1419 struct task_struct *task = proc_task(inode); 1375 struct task_struct *task = get_proc_task(inode);
1420 int fd = proc_type(inode) - PROC_TID_FD_DIR; 1376 int fd = proc_fd(inode);
1421 struct files_struct *files; 1377 struct files_struct *files;
1422 1378
1423 files = get_files_struct(task); 1379 if (task) {
1424 if (files) { 1380 files = get_files_struct(task);
1425 rcu_read_lock(); 1381 if (files) {
1426 if (fcheck_files(files, fd)) { 1382 rcu_read_lock();
1383 if (fcheck_files(files, fd)) {
1384 rcu_read_unlock();
1385 put_files_struct(files);
1386 if (task_dumpable(task)) {
1387 inode->i_uid = task->euid;
1388 inode->i_gid = task->egid;
1389 } else {
1390 inode->i_uid = 0;
1391 inode->i_gid = 0;
1392 }
1393 security_task_to_inode(task, inode);
1394 put_task_struct(task);
1395 return 1;
1396 }
1427 rcu_read_unlock(); 1397 rcu_read_unlock();
1428 put_files_struct(files); 1398 put_files_struct(files);
1429 if (task_dumpable(task)) {
1430 inode->i_uid = task->euid;
1431 inode->i_gid = task->egid;
1432 } else {
1433 inode->i_uid = 0;
1434 inode->i_gid = 0;
1435 }
1436 security_task_to_inode(task, inode);
1437 return 1;
1438 } 1399 }
1439 rcu_read_unlock(); 1400 put_task_struct(task);
1440 put_files_struct(files);
1441 } 1401 }
1442 d_drop(dentry); 1402 d_drop(dentry);
1443 return 0; 1403 return 0;
1444} 1404}
1445 1405
1446static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1447{
1448 struct task_struct *task = proc_task(inode);
1449 spin_lock(&task->proc_lock);
1450 if (task->proc_dentry == dentry)
1451 task->proc_dentry = NULL;
1452 spin_unlock(&task->proc_lock);
1453 iput(inode);
1454}
1455
1456static int pid_delete_dentry(struct dentry * dentry) 1406static int pid_delete_dentry(struct dentry * dentry)
1457{ 1407{
1458 /* Is the task we represent dead? 1408 /* Is the task we represent dead?
1459 * If so, then don't put the dentry on the lru list, 1409 * If so, then don't put the dentry on the lru list,
1460 * kill it immediately. 1410 * kill it immediately.
1461 */ 1411 */
1462 return !pid_alive(proc_task(dentry->d_inode)); 1412 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1463} 1413}
1464 1414
1465static struct dentry_operations tid_fd_dentry_operations = 1415static struct dentry_operations tid_fd_dentry_operations =
@@ -1474,13 +1424,6 @@ static struct dentry_operations pid_dentry_operations =
1474 .d_delete = pid_delete_dentry, 1424 .d_delete = pid_delete_dentry,
1475}; 1425};
1476 1426
1477static struct dentry_operations pid_base_dentry_operations =
1478{
1479 .d_revalidate = pid_revalidate,
1480 .d_iput = pid_base_iput,
1481 .d_delete = pid_delete_dentry,
1482};
1483
1484/* Lookups */ 1427/* Lookups */
1485 1428
1486static unsigned name_to_int(struct dentry *dentry) 1429static unsigned name_to_int(struct dentry *dentry)
@@ -1508,22 +1451,24 @@ out:
1508/* SMP-safe */ 1451/* SMP-safe */
1509static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1452static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1510{ 1453{
1511 struct task_struct *task = proc_task(dir); 1454 struct task_struct *task = get_proc_task(dir);
1512 unsigned fd = name_to_int(dentry); 1455 unsigned fd = name_to_int(dentry);
1456 struct dentry *result = ERR_PTR(-ENOENT);
1513 struct file * file; 1457 struct file * file;
1514 struct files_struct * files; 1458 struct files_struct * files;
1515 struct inode *inode; 1459 struct inode *inode;
1516 struct proc_inode *ei; 1460 struct proc_inode *ei;
1517 1461
1462 if (!task)
1463 goto out_no_task;
1518 if (fd == ~0U) 1464 if (fd == ~0U)
1519 goto out; 1465 goto out;
1520 if (!pid_alive(task))
1521 goto out;
1522 1466
1523 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1467 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1524 if (!inode) 1468 if (!inode)
1525 goto out; 1469 goto out;
1526 ei = PROC_I(inode); 1470 ei = PROC_I(inode);
1471 ei->fd = fd;
1527 files = get_files_struct(task); 1472 files = get_files_struct(task);
1528 if (!files) 1473 if (!files)
1529 goto out_unlock; 1474 goto out_unlock;
@@ -1548,19 +1493,25 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1548 ei->op.proc_get_link = proc_fd_link; 1493 ei->op.proc_get_link = proc_fd_link;
1549 dentry->d_op = &tid_fd_dentry_operations; 1494 dentry->d_op = &tid_fd_dentry_operations;
1550 d_add(dentry, inode); 1495 d_add(dentry, inode);
1551 return NULL; 1496 /* Close the race of the process dying before we return the dentry */
1497 if (tid_fd_revalidate(dentry, NULL))
1498 result = NULL;
1499out:
1500 put_task_struct(task);
1501out_no_task:
1502 return result;
1552 1503
1553out_unlock2: 1504out_unlock2:
1554 spin_unlock(&files->file_lock); 1505 spin_unlock(&files->file_lock);
1555 put_files_struct(files); 1506 put_files_struct(files);
1556out_unlock: 1507out_unlock:
1557 iput(inode); 1508 iput(inode);
1558out: 1509 goto out;
1559 return ERR_PTR(-ENOENT);
1560} 1510}
1561 1511
1562static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1512static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1563static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1513static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1514static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
1564 1515
1565static struct file_operations proc_fd_operations = { 1516static struct file_operations proc_fd_operations = {
1566 .read = generic_read_dir, 1517 .read = generic_read_dir,
@@ -1577,12 +1528,11 @@ static struct file_operations proc_task_operations = {
1577 */ 1528 */
1578static struct inode_operations proc_fd_inode_operations = { 1529static struct inode_operations proc_fd_inode_operations = {
1579 .lookup = proc_lookupfd, 1530 .lookup = proc_lookupfd,
1580 .permission = proc_permission,
1581}; 1531};
1582 1532
1583static struct inode_operations proc_task_inode_operations = { 1533static struct inode_operations proc_task_inode_operations = {
1584 .lookup = proc_task_lookup, 1534 .lookup = proc_task_lookup,
1585 .permission = proc_task_permission, 1535 .getattr = proc_task_getattr,
1586}; 1536};
1587 1537
1588#ifdef CONFIG_SECURITY 1538#ifdef CONFIG_SECURITY
@@ -1592,12 +1542,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1592 struct inode * inode = file->f_dentry->d_inode; 1542 struct inode * inode = file->f_dentry->d_inode;
1593 unsigned long page; 1543 unsigned long page;
1594 ssize_t length; 1544 ssize_t length;
1595 struct task_struct *task = proc_task(inode); 1545 struct task_struct *task = get_proc_task(inode);
1546
1547 length = -ESRCH;
1548 if (!task)
1549 goto out_no_task;
1596 1550
1597 if (count > PAGE_SIZE) 1551 if (count > PAGE_SIZE)
1598 count = PAGE_SIZE; 1552 count = PAGE_SIZE;
1553 length = -ENOMEM;
1599 if (!(page = __get_free_page(GFP_KERNEL))) 1554 if (!(page = __get_free_page(GFP_KERNEL)))
1600 return -ENOMEM; 1555 goto out;
1601 1556
1602 length = security_getprocattr(task, 1557 length = security_getprocattr(task,
1603 (char*)file->f_dentry->d_name.name, 1558 (char*)file->f_dentry->d_name.name,
@@ -1605,6 +1560,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1605 if (length >= 0) 1560 if (length >= 0)
1606 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1561 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1607 free_page(page); 1562 free_page(page);
1563out:
1564 put_task_struct(task);
1565out_no_task:
1608 return length; 1566 return length;
1609} 1567}
1610 1568
@@ -1614,26 +1572,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1614 struct inode * inode = file->f_dentry->d_inode; 1572 struct inode * inode = file->f_dentry->d_inode;
1615 char *page; 1573 char *page;
1616 ssize_t length; 1574 ssize_t length;
1617 struct task_struct *task = proc_task(inode); 1575 struct task_struct *task = get_proc_task(inode);
1618 1576
1577 length = -ESRCH;
1578 if (!task)
1579 goto out_no_task;
1619 if (count > PAGE_SIZE) 1580 if (count > PAGE_SIZE)
1620 count = PAGE_SIZE; 1581 count = PAGE_SIZE;
1621 if (*ppos != 0) { 1582
1622 /* No partial writes. */ 1583 /* No partial writes. */
1623 return -EINVAL; 1584 length = -EINVAL;
1624 } 1585 if (*ppos != 0)
1586 goto out;
1587
1588 length = -ENOMEM;
1625 page = (char*)__get_free_page(GFP_USER); 1589 page = (char*)__get_free_page(GFP_USER);
1626 if (!page) 1590 if (!page)
1627 return -ENOMEM; 1591 goto out;
1592
1628 length = -EFAULT; 1593 length = -EFAULT;
1629 if (copy_from_user(page, buf, count)) 1594 if (copy_from_user(page, buf, count))
1630 goto out; 1595 goto out_free;
1631 1596
1632 length = security_setprocattr(task, 1597 length = security_setprocattr(task,
1633 (char*)file->f_dentry->d_name.name, 1598 (char*)file->f_dentry->d_name.name,
1634 (void*)page, count); 1599 (void*)page, count);
1635out: 1600out_free:
1636 free_page((unsigned long) page); 1601 free_page((unsigned long) page);
1602out:
1603 put_task_struct(task);
1604out_no_task:
1637 return length; 1605 return length;
1638} 1606}
1639 1607
@@ -1648,24 +1616,22 @@ static struct file_operations proc_tgid_attr_operations;
1648static struct inode_operations proc_tgid_attr_inode_operations; 1616static struct inode_operations proc_tgid_attr_inode_operations;
1649#endif 1617#endif
1650 1618
1651static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
1652
1653/* SMP-safe */ 1619/* SMP-safe */
1654static struct dentry *proc_pident_lookup(struct inode *dir, 1620static struct dentry *proc_pident_lookup(struct inode *dir,
1655 struct dentry *dentry, 1621 struct dentry *dentry,
1656 struct pid_entry *ents) 1622 struct pid_entry *ents)
1657{ 1623{
1658 struct inode *inode; 1624 struct inode *inode;
1659 int error; 1625 struct dentry *error;
1660 struct task_struct *task = proc_task(dir); 1626 struct task_struct *task = get_proc_task(dir);
1661 struct pid_entry *p; 1627 struct pid_entry *p;
1662 struct proc_inode *ei; 1628 struct proc_inode *ei;
1663 1629
1664 error = -ENOENT; 1630 error = ERR_PTR(-ENOENT);
1665 inode = NULL; 1631 inode = NULL;
1666 1632
1667 if (!pid_alive(task)) 1633 if (!task)
1668 goto out; 1634 goto out_no_task;
1669 1635
1670 for (p = ents; p->name; p++) { 1636 for (p = ents; p->name; p++) {
1671 if (p->len != dentry->d_name.len) 1637 if (p->len != dentry->d_name.len)
@@ -1676,7 +1642,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1676 if (!p->name) 1642 if (!p->name)
1677 goto out; 1643 goto out;
1678 1644
1679 error = -EINVAL; 1645 error = ERR_PTR(-EINVAL);
1680 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1646 inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1681 if (!inode) 1647 if (!inode)
1682 goto out; 1648 goto out;
@@ -1689,7 +1655,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1689 */ 1655 */
1690 switch(p->type) { 1656 switch(p->type) {
1691 case PROC_TGID_TASK: 1657 case PROC_TGID_TASK:
1692 inode->i_nlink = 2 + get_tid_list(2, NULL, dir); 1658 inode->i_nlink = 2;
1693 inode->i_op = &proc_task_inode_operations; 1659 inode->i_op = &proc_task_inode_operations;
1694 inode->i_fop = &proc_task_operations; 1660 inode->i_fop = &proc_task_operations;
1695 break; 1661 break;
@@ -1759,7 +1725,6 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1759#endif 1725#endif
1760 case PROC_TID_MEM: 1726 case PROC_TID_MEM:
1761 case PROC_TGID_MEM: 1727 case PROC_TGID_MEM:
1762 inode->i_op = &proc_mem_inode_operations;
1763 inode->i_fop = &proc_mem_operations; 1728 inode->i_fop = &proc_mem_operations;
1764 break; 1729 break;
1765#ifdef CONFIG_SECCOMP 1730#ifdef CONFIG_SECCOMP
@@ -1801,6 +1766,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1801 case PROC_TGID_ATTR_EXEC: 1766 case PROC_TGID_ATTR_EXEC:
1802 case PROC_TID_ATTR_FSCREATE: 1767 case PROC_TID_ATTR_FSCREATE:
1803 case PROC_TGID_ATTR_FSCREATE: 1768 case PROC_TGID_ATTR_FSCREATE:
1769 case PROC_TID_ATTR_KEYCREATE:
1770 case PROC_TGID_ATTR_KEYCREATE:
1771 case PROC_TID_ATTR_SOCKCREATE:
1772 case PROC_TGID_ATTR_SOCKCREATE:
1804 inode->i_fop = &proc_pid_attr_operations; 1773 inode->i_fop = &proc_pid_attr_operations;
1805 break; 1774 break;
1806#endif 1775#endif
@@ -1842,14 +1811,18 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1842 default: 1811 default:
1843 printk("procfs: impossible type (%d)",p->type); 1812 printk("procfs: impossible type (%d)",p->type);
1844 iput(inode); 1813 iput(inode);
1845 return ERR_PTR(-EINVAL); 1814 error = ERR_PTR(-EINVAL);
1815 goto out;
1846 } 1816 }
1847 dentry->d_op = &pid_dentry_operations; 1817 dentry->d_op = &pid_dentry_operations;
1848 d_add(dentry, inode); 1818 d_add(dentry, inode);
1849 return NULL; 1819 /* Close the race of the process dying before we return the dentry */
1850 1820 if (pid_revalidate(dentry, NULL))
1821 error = NULL;
1851out: 1822out:
1852 return ERR_PTR(error); 1823 put_task_struct(task);
1824out_no_task:
1825 return error;
1853} 1826}
1854 1827
1855static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1828static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
@@ -1872,10 +1845,12 @@ static struct file_operations proc_tid_base_operations = {
1872 1845
1873static struct inode_operations proc_tgid_base_inode_operations = { 1846static struct inode_operations proc_tgid_base_inode_operations = {
1874 .lookup = proc_tgid_base_lookup, 1847 .lookup = proc_tgid_base_lookup,
1848 .getattr = pid_getattr,
1875}; 1849};
1876 1850
1877static struct inode_operations proc_tid_base_inode_operations = { 1851static struct inode_operations proc_tid_base_inode_operations = {
1878 .lookup = proc_tid_base_lookup, 1852 .lookup = proc_tid_base_lookup,
1853 .getattr = pid_getattr,
1879}; 1854};
1880 1855
1881#ifdef CONFIG_SECURITY 1856#ifdef CONFIG_SECURITY
@@ -1917,10 +1892,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1917 1892
1918static struct inode_operations proc_tgid_attr_inode_operations = { 1893static struct inode_operations proc_tgid_attr_inode_operations = {
1919 .lookup = proc_tgid_attr_lookup, 1894 .lookup = proc_tgid_attr_lookup,
1895 .getattr = pid_getattr,
1920}; 1896};
1921 1897
1922static struct inode_operations proc_tid_attr_inode_operations = { 1898static struct inode_operations proc_tid_attr_inode_operations = {
1923 .lookup = proc_tid_attr_lookup, 1899 .lookup = proc_tid_attr_lookup,
1900 .getattr = pid_getattr,
1924}; 1901};
1925#endif 1902#endif
1926 1903
@@ -1930,14 +1907,14 @@ static struct inode_operations proc_tid_attr_inode_operations = {
1930static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1907static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1931 int buflen) 1908 int buflen)
1932{ 1909{
1933 char tmp[30]; 1910 char tmp[PROC_NUMBUF];
1934 sprintf(tmp, "%d", current->tgid); 1911 sprintf(tmp, "%d", current->tgid);
1935 return vfs_readlink(dentry,buffer,buflen,tmp); 1912 return vfs_readlink(dentry,buffer,buflen,tmp);
1936} 1913}
1937 1914
1938static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1915static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1939{ 1916{
1940 char tmp[30]; 1917 char tmp[PROC_NUMBUF];
1941 sprintf(tmp, "%d", current->tgid); 1918 sprintf(tmp, "%d", current->tgid);
1942 return ERR_PTR(vfs_follow_link(nd,tmp)); 1919 return ERR_PTR(vfs_follow_link(nd,tmp));
1943} 1920}
@@ -1948,67 +1925,80 @@ static struct inode_operations proc_self_inode_operations = {
1948}; 1925};
1949 1926
1950/** 1927/**
1951 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. 1928 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
1952 * @p: task that should be flushed. 1929 *
1930 * @task: task that should be flushed.
1931 *
1932 * Looks in the dcache for
1933 * /proc/@pid
1934 * /proc/@tgid/task/@pid
1935 * if either directory is present flushes it and all of it'ts children
1936 * from the dcache.
1953 * 1937 *
1954 * Drops the /proc/@pid dcache entry from the hash chains. 1938 * It is safe and reasonable to cache /proc entries for a task until
1939 * that task exits. After that they just clog up the dcache with
1940 * useless entries, possibly causing useful dcache entries to be
1941 * flushed instead. This routine is proved to flush those useless
1942 * dcache entries at process exit time.
1955 * 1943 *
1956 * Dropping /proc/@pid entries and detach_pid must be synchroneous, 1944 * NOTE: This routine is just an optimization so it does not guarantee
1957 * otherwise e.g. /proc/@pid/exe might point to the wrong executable, 1945 * that no dcache entries will exist at process exit time it
1958 * if the pid value is immediately reused. This is enforced by 1946 * just makes it very unlikely that any will persist.
1959 * - caller must acquire spin_lock(p->proc_lock)
1960 * - must be called before detach_pid()
1961 * - proc_pid_lookup acquires proc_lock, and checks that
1962 * the target is not dead by looking at the attach count
1963 * of PIDTYPE_PID.
1964 */ 1947 */
1965 1948void proc_flush_task(struct task_struct *task)
1966struct dentry *proc_pid_unhash(struct task_struct *p)
1967{ 1949{
1968 struct dentry *proc_dentry; 1950 struct dentry *dentry, *leader, *dir;
1951 char buf[PROC_NUMBUF];
1952 struct qstr name;
1953
1954 name.name = buf;
1955 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1956 dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1957 if (dentry) {
1958 shrink_dcache_parent(dentry);
1959 d_drop(dentry);
1960 dput(dentry);
1961 }
1969 1962
1970 proc_dentry = p->proc_dentry; 1963 if (thread_group_leader(task))
1971 if (proc_dentry != NULL) { 1964 goto out;
1972 1965
1973 spin_lock(&dcache_lock); 1966 name.name = buf;
1974 spin_lock(&proc_dentry->d_lock); 1967 name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
1975 if (!d_unhashed(proc_dentry)) { 1968 leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1976 dget_locked(proc_dentry); 1969 if (!leader)
1977 __d_drop(proc_dentry); 1970 goto out;
1978 spin_unlock(&proc_dentry->d_lock);
1979 } else {
1980 spin_unlock(&proc_dentry->d_lock);
1981 proc_dentry = NULL;
1982 }
1983 spin_unlock(&dcache_lock);
1984 }
1985 return proc_dentry;
1986}
1987 1971
1988/** 1972 name.name = "task";
1989 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries 1973 name.len = strlen(name.name);
1990 * @proc_dentry: directoy to prune. 1974 dir = d_hash_and_lookup(leader, &name);
1991 * 1975 if (!dir)
1992 * Shrink the /proc directory that was used by the just killed thread. 1976 goto out_put_leader;
1993 */ 1977
1994 1978 name.name = buf;
1995void proc_pid_flush(struct dentry *proc_dentry) 1979 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1996{ 1980 dentry = d_hash_and_lookup(dir, &name);
1997 might_sleep(); 1981 if (dentry) {
1998 if(proc_dentry != NULL) { 1982 shrink_dcache_parent(dentry);
1999 shrink_dcache_parent(proc_dentry); 1983 d_drop(dentry);
2000 dput(proc_dentry); 1984 dput(dentry);
2001 } 1985 }
1986
1987 dput(dir);
1988out_put_leader:
1989 dput(leader);
1990out:
1991 return;
2002} 1992}
2003 1993
2004/* SMP-safe */ 1994/* SMP-safe */
2005struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1995struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2006{ 1996{
1997 struct dentry *result = ERR_PTR(-ENOENT);
2007 struct task_struct *task; 1998 struct task_struct *task;
2008 struct inode *inode; 1999 struct inode *inode;
2009 struct proc_inode *ei; 2000 struct proc_inode *ei;
2010 unsigned tgid; 2001 unsigned tgid;
2011 int died;
2012 2002
2013 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 2003 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
2014 inode = new_inode(dir->i_sb); 2004 inode = new_inode(dir->i_sb);
@@ -2029,21 +2019,18 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2029 if (tgid == ~0U) 2019 if (tgid == ~0U)
2030 goto out; 2020 goto out;
2031 2021
2032 read_lock(&tasklist_lock); 2022 rcu_read_lock();
2033 task = find_task_by_pid(tgid); 2023 task = find_task_by_pid(tgid);
2034 if (task) 2024 if (task)
2035 get_task_struct(task); 2025 get_task_struct(task);
2036 read_unlock(&tasklist_lock); 2026 rcu_read_unlock();
2037 if (!task) 2027 if (!task)
2038 goto out; 2028 goto out;
2039 2029
2040 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 2030 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
2031 if (!inode)
2032 goto out_put_task;
2041 2033
2042
2043 if (!inode) {
2044 put_task_struct(task);
2045 goto out;
2046 }
2047 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2034 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2048 inode->i_op = &proc_tgid_base_inode_operations; 2035 inode->i_op = &proc_tgid_base_inode_operations;
2049 inode->i_fop = &proc_tgid_base_operations; 2036 inode->i_fop = &proc_tgid_base_operations;
@@ -2054,45 +2041,40 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2054 inode->i_nlink = 4; 2041 inode->i_nlink = 4;
2055#endif 2042#endif
2056 2043
2057 dentry->d_op = &pid_base_dentry_operations; 2044 dentry->d_op = &pid_dentry_operations;
2058 2045
2059 died = 0;
2060 d_add(dentry, inode); 2046 d_add(dentry, inode);
2061 spin_lock(&task->proc_lock); 2047 /* Close the race of the process dying before we return the dentry */
2062 task->proc_dentry = dentry; 2048 if (pid_revalidate(dentry, NULL))
2063 if (!pid_alive(task)) { 2049 result = NULL;
2064 dentry = proc_pid_unhash(task);
2065 died = 1;
2066 }
2067 spin_unlock(&task->proc_lock);
2068 2050
2051out_put_task:
2069 put_task_struct(task); 2052 put_task_struct(task);
2070 if (died) {
2071 proc_pid_flush(dentry);
2072 goto out;
2073 }
2074 return NULL;
2075out: 2053out:
2076 return ERR_PTR(-ENOENT); 2054 return result;
2077} 2055}
2078 2056
2079/* SMP-safe */ 2057/* SMP-safe */
2080static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2058static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2081{ 2059{
2060 struct dentry *result = ERR_PTR(-ENOENT);
2082 struct task_struct *task; 2061 struct task_struct *task;
2083 struct task_struct *leader = proc_task(dir); 2062 struct task_struct *leader = get_proc_task(dir);
2084 struct inode *inode; 2063 struct inode *inode;
2085 unsigned tid; 2064 unsigned tid;
2086 2065
2066 if (!leader)
2067 goto out_no_task;
2068
2087 tid = name_to_int(dentry); 2069 tid = name_to_int(dentry);
2088 if (tid == ~0U) 2070 if (tid == ~0U)
2089 goto out; 2071 goto out;
2090 2072
2091 read_lock(&tasklist_lock); 2073 rcu_read_lock();
2092 task = find_task_by_pid(tid); 2074 task = find_task_by_pid(tid);
2093 if (task) 2075 if (task)
2094 get_task_struct(task); 2076 get_task_struct(task);
2095 read_unlock(&tasklist_lock); 2077 rcu_read_unlock();
2096 if (!task) 2078 if (!task)
2097 goto out; 2079 goto out;
2098 if (leader->tgid != task->tgid) 2080 if (leader->tgid != task->tgid)
@@ -2113,101 +2095,95 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
2113 inode->i_nlink = 3; 2095 inode->i_nlink = 3;
2114#endif 2096#endif
2115 2097
2116 dentry->d_op = &pid_base_dentry_operations; 2098 dentry->d_op = &pid_dentry_operations;
2117 2099
2118 d_add(dentry, inode); 2100 d_add(dentry, inode);
2101 /* Close the race of the process dying before we return the dentry */
2102 if (pid_revalidate(dentry, NULL))
2103 result = NULL;
2119 2104
2120 put_task_struct(task);
2121 return NULL;
2122out_drop_task: 2105out_drop_task:
2123 put_task_struct(task); 2106 put_task_struct(task);
2124out: 2107out:
2125 return ERR_PTR(-ENOENT); 2108 put_task_struct(leader);
2109out_no_task:
2110 return result;
2126} 2111}
2127 2112
2128#define PROC_NUMBUF 10
2129#define PROC_MAXPIDS 20
2130
2131/* 2113/*
2132 * Get a few tgid's to return for filldir - we need to hold the 2114 * Find the first tgid to return to user space.
2133 * tasklist lock while doing this, and we must release it before 2115 *
2134 * we actually do the filldir itself, so we use a temp buffer.. 2116 * Usually this is just whatever follows &init_task, but if the users
2117 * buffer was too small to hold the full list or there was a seek into
2118 * the middle of the directory we have more work to do.
2119 *
2120 * In the case of a short read we start with find_task_by_pid.
2121 *
2122 * In the case of a seek we start with &init_task and walk nr
2123 * threads past it.
2135 */ 2124 */
2136static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) 2125static struct task_struct *first_tgid(int tgid, unsigned int nr)
2137{ 2126{
2138 struct task_struct *p; 2127 struct task_struct *pos;
2139 int nr_tgids = 0; 2128 rcu_read_lock();
2140 2129 if (tgid && nr) {
2141 index--; 2130 pos = find_task_by_pid(tgid);
2142 read_lock(&tasklist_lock); 2131 if (pos && thread_group_leader(pos))
2143 p = NULL; 2132 goto found;
2144 if (version) {
2145 p = find_task_by_pid(version);
2146 if (p && !thread_group_leader(p))
2147 p = NULL;
2148 } 2133 }
2134 /* If nr exceeds the number of processes get out quickly */
2135 pos = NULL;
2136 if (nr && nr >= nr_processes())
2137 goto done;
2149 2138
2150 if (p) 2139 /* If we haven't found our starting place yet start with
2151 index = 0; 2140 * the init_task and walk nr tasks forward.
2152 else 2141 */
2153 p = next_task(&init_task); 2142 for (pos = next_task(&init_task); nr > 0; --nr) {
2154 2143 pos = next_task(pos);
2155 for ( ; p != &init_task; p = next_task(p)) { 2144 if (pos == &init_task) {
2156 int tgid = p->pid; 2145 pos = NULL;
2157 if (!pid_alive(p)) 2146 goto done;
2158 continue; 2147 }
2159 if (--index >= 0)
2160 continue;
2161 tgids[nr_tgids] = tgid;
2162 nr_tgids++;
2163 if (nr_tgids >= PROC_MAXPIDS)
2164 break;
2165 } 2148 }
2166 read_unlock(&tasklist_lock); 2149found:
2167 return nr_tgids; 2150 get_task_struct(pos);
2151done:
2152 rcu_read_unlock();
2153 return pos;
2168} 2154}
2169 2155
2170/* 2156/*
2171 * Get a few tid's to return for filldir - we need to hold the 2157 * Find the next task in the task list.
2172 * tasklist lock while doing this, and we must release it before 2158 * Return NULL if we loop or there is any error.
2173 * we actually do the filldir itself, so we use a temp buffer.. 2159 *
2160 * The reference to the input task_struct is released.
2174 */ 2161 */
2175static int get_tid_list(int index, unsigned int *tids, struct inode *dir) 2162static struct task_struct *next_tgid(struct task_struct *start)
2176{ 2163{
2177 struct task_struct *leader_task = proc_task(dir); 2164 struct task_struct *pos;
2178 struct task_struct *task = leader_task; 2165 rcu_read_lock();
2179 int nr_tids = 0; 2166 pos = start;
2180 2167 if (pid_alive(start))
2181 index -= 2; 2168 pos = next_task(start);
2182 read_lock(&tasklist_lock); 2169 if (pid_alive(pos) && (pos != &init_task)) {
2183 /* 2170 get_task_struct(pos);
2184 * The starting point task (leader_task) might be an already 2171 goto done;
2185 * unlinked task, which cannot be used to access the task-list 2172 }
2186 * via next_thread(). 2173 pos = NULL;
2187 */ 2174done:
2188 if (pid_alive(task)) do { 2175 rcu_read_unlock();
2189 int tid = task->pid; 2176 put_task_struct(start);
2190 2177 return pos;
2191 if (--index >= 0)
2192 continue;
2193 if (tids != NULL)
2194 tids[nr_tids] = tid;
2195 nr_tids++;
2196 if (nr_tids >= PROC_MAXPIDS)
2197 break;
2198 } while ((task = next_thread(task)) != leader_task);
2199 read_unlock(&tasklist_lock);
2200 return nr_tids;
2201} 2178}
2202 2179
2203/* for the /proc/ directory itself, after non-process stuff has been done */ 2180/* for the /proc/ directory itself, after non-process stuff has been done */
2204int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2181int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2205{ 2182{
2206 unsigned int tgid_array[PROC_MAXPIDS];
2207 char buf[PROC_NUMBUF]; 2183 char buf[PROC_NUMBUF];
2208 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2184 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2209 unsigned int nr_tgids, i; 2185 struct task_struct *task;
2210 int next_tgid; 2186 int tgid;
2211 2187
2212 if (!nr) { 2188 if (!nr) {
2213 ino_t ino = fake_ino(0,PROC_TGID_INO); 2189 ino_t ino = fake_ino(0,PROC_TGID_INO);
@@ -2216,63 +2192,116 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2216 filp->f_pos++; 2192 filp->f_pos++;
2217 nr++; 2193 nr++;
2218 } 2194 }
2195 nr -= 1;
2219 2196
2220 /* f_version caches the tgid value that the last readdir call couldn't 2197 /* f_version caches the tgid value that the last readdir call couldn't
2221 * return. lseek aka telldir automagically resets f_version to 0. 2198 * return. lseek aka telldir automagically resets f_version to 0.
2222 */ 2199 */
2223 next_tgid = filp->f_version; 2200 tgid = filp->f_version;
2224 filp->f_version = 0; 2201 filp->f_version = 0;
2225 for (;;) { 2202 for (task = first_tgid(tgid, nr);
2226 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); 2203 task;
2227 if (!nr_tgids) { 2204 task = next_tgid(task), filp->f_pos++) {
2228 /* no more entries ! */ 2205 int len;
2206 ino_t ino;
2207 tgid = task->pid;
2208 len = snprintf(buf, sizeof(buf), "%d", tgid);
2209 ino = fake_ino(tgid, PROC_TGID_INO);
2210 if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
2211 /* returning this tgid failed, save it as the first
2212 * pid for the next readir call */
2213 filp->f_version = tgid;
2214 put_task_struct(task);
2229 break; 2215 break;
2230 } 2216 }
2231 next_tgid = 0; 2217 }
2218 return 0;
2219}
2232 2220
2233 /* do not use the last found pid, reserve it for next_tgid */ 2221/*
2234 if (nr_tgids == PROC_MAXPIDS) { 2222 * Find the first tid of a thread group to return to user space.
2235 nr_tgids--; 2223 *
2236 next_tgid = tgid_array[nr_tgids]; 2224 * Usually this is just the thread group leader, but if the users
2237 } 2225 * buffer was too small or there was a seek into the middle of the
2226 * directory we have more work todo.
2227 *
2228 * In the case of a short read we start with find_task_by_pid.
2229 *
2230 * In the case of a seek we start with the leader and walk nr
2231 * threads past it.
2232 */
2233static struct task_struct *first_tid(struct task_struct *leader,
2234 int tid, int nr)
2235{
2236 struct task_struct *pos;
2238 2237
2239 for (i=0;i<nr_tgids;i++) { 2238 rcu_read_lock();
2240 int tgid = tgid_array[i]; 2239 /* Attempt to start with the pid of a thread */
2241 ino_t ino = fake_ino(tgid,PROC_TGID_INO); 2240 if (tid && (nr > 0)) {
2242 unsigned long j = PROC_NUMBUF; 2241 pos = find_task_by_pid(tid);
2242 if (pos && (pos->group_leader == leader))
2243 goto found;
2244 }
2243 2245
2244 do 2246 /* If nr exceeds the number of threads there is nothing todo */
2245 buf[--j] = '0' + (tgid % 10); 2247 pos = NULL;
2246 while ((tgid /= 10) != 0); 2248 if (nr && nr >= get_nr_threads(leader))
2249 goto out;
2247 2250
2248 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { 2251 /* If we haven't found our starting place yet start
2249 /* returning this tgid failed, save it as the first 2252 * with the leader and walk nr threads forward.
2250 * pid for the next readir call */ 2253 */
2251 filp->f_version = tgid_array[i]; 2254 for (pos = leader; nr > 0; --nr) {
2252 goto out; 2255 pos = next_thread(pos);
2253 } 2256 if (pos == leader) {
2254 filp->f_pos++; 2257 pos = NULL;
2255 nr++; 2258 goto out;
2256 } 2259 }
2257 } 2260 }
2261found:
2262 get_task_struct(pos);
2258out: 2263out:
2259 return 0; 2264 rcu_read_unlock();
2265 return pos;
2266}
2267
2268/*
2269 * Find the next thread in the thread list.
2270 * Return NULL if there is an error or no next thread.
2271 *
2272 * The reference to the input task_struct is released.
2273 */
2274static struct task_struct *next_tid(struct task_struct *start)
2275{
2276 struct task_struct *pos = NULL;
2277 rcu_read_lock();
2278 if (pid_alive(start)) {
2279 pos = next_thread(start);
2280 if (thread_group_leader(pos))
2281 pos = NULL;
2282 else
2283 get_task_struct(pos);
2284 }
2285 rcu_read_unlock();
2286 put_task_struct(start);
2287 return pos;
2260} 2288}
2261 2289
2262/* for the /proc/TGID/task/ directories */ 2290/* for the /proc/TGID/task/ directories */
2263static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2291static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
2264{ 2292{
2265 unsigned int tid_array[PROC_MAXPIDS];
2266 char buf[PROC_NUMBUF]; 2293 char buf[PROC_NUMBUF];
2267 unsigned int nr_tids, i;
2268 struct dentry *dentry = filp->f_dentry; 2294 struct dentry *dentry = filp->f_dentry;
2269 struct inode *inode = dentry->d_inode; 2295 struct inode *inode = dentry->d_inode;
2296 struct task_struct *leader = get_proc_task(inode);
2297 struct task_struct *task;
2270 int retval = -ENOENT; 2298 int retval = -ENOENT;
2271 ino_t ino; 2299 ino_t ino;
2300 int tid;
2272 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2301 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
2273 2302
2274 if (!pid_alive(proc_task(inode))) 2303 if (!leader)
2275 goto out; 2304 goto out_no_task;
2276 retval = 0; 2305 retval = 0;
2277 2306
2278 switch (pos) { 2307 switch (pos) {
@@ -2290,24 +2319,45 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2290 /* fall through */ 2319 /* fall through */
2291 } 2320 }
2292 2321
2293 nr_tids = get_tid_list(pos, tid_array, inode); 2322 /* f_version caches the tgid value that the last readdir call couldn't
2294 inode->i_nlink = pos + nr_tids; 2323 * return. lseek aka telldir automagically resets f_version to 0.
2295 2324 */
2296 for (i = 0; i < nr_tids; i++) { 2325 tid = filp->f_version;
2297 unsigned long j = PROC_NUMBUF; 2326 filp->f_version = 0;
2298 int tid = tid_array[i]; 2327 for (task = first_tid(leader, tid, pos - 2);
2299 2328 task;
2300 ino = fake_ino(tid,PROC_TID_INO); 2329 task = next_tid(task), pos++) {
2301 2330 int len;
2302 do 2331 tid = task->pid;
2303 buf[--j] = '0' + (tid % 10); 2332 len = snprintf(buf, sizeof(buf), "%d", tid);
2304 while ((tid /= 10) != 0); 2333 ino = fake_ino(tid, PROC_TID_INO);
2305 2334 if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) {
2306 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) 2335 /* returning this tgid failed, save it as the first
2336 * pid for the next readir call */
2337 filp->f_version = tid;
2338 put_task_struct(task);
2307 break; 2339 break;
2308 pos++; 2340 }
2309 } 2341 }
2310out: 2342out:
2311 filp->f_pos = pos; 2343 filp->f_pos = pos;
2344 put_task_struct(leader);
2345out_no_task:
2312 return retval; 2346 return retval;
2313} 2347}
2348
2349static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
2350{
2351 struct inode *inode = dentry->d_inode;
2352 struct task_struct *p = get_proc_task(inode);
2353 generic_fillattr(inode, stat);
2354
2355 if (p) {
2356 rcu_read_lock();
2357 stat->nlink += get_nr_threads(p);
2358 rcu_read_unlock();
2359 put_task_struct(p);
2360 }
2361
2362 return 0;
2363}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 722b9c463111..6dcef089e18e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -58,14 +58,11 @@ static void de_put(struct proc_dir_entry *de)
58static void proc_delete_inode(struct inode *inode) 58static void proc_delete_inode(struct inode *inode)
59{ 59{
60 struct proc_dir_entry *de; 60 struct proc_dir_entry *de;
61 struct task_struct *tsk;
62 61
63 truncate_inode_pages(&inode->i_data, 0); 62 truncate_inode_pages(&inode->i_data, 0);
64 63
65 /* Let go of any associated process */ 64 /* Stop tracking associated processes */
66 tsk = PROC_I(inode)->task; 65 put_pid(PROC_I(inode)->pid);
67 if (tsk)
68 put_task_struct(tsk);
69 66
70 /* Let go of any associated proc directory entry */ 67 /* Let go of any associated proc directory entry */
71 de = PROC_I(inode)->pde; 68 de = PROC_I(inode)->pde;
@@ -94,8 +91,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
94 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); 91 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL);
95 if (!ei) 92 if (!ei)
96 return NULL; 93 return NULL;
97 ei->task = NULL; 94 ei->pid = NULL;
98 ei->type = 0; 95 ei->fd = 0;
99 ei->op.proc_get_link = NULL; 96 ei->op.proc_get_link = NULL;
100 ei->pde = NULL; 97 ei->pde = NULL;
101 inode = &ei->vfs_inode; 98 inode = &ei->vfs_inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0502f17b860d..146a434ba944 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,16 +37,30 @@ extern int proc_tgid_stat(struct task_struct *, char *);
37extern int proc_pid_status(struct task_struct *, char *); 37extern int proc_pid_status(struct task_struct *, char *);
38extern int proc_pid_statm(struct task_struct *, char *); 38extern int proc_pid_statm(struct task_struct *, char *);
39 39
40extern struct file_operations proc_maps_operations;
41extern struct file_operations proc_numa_maps_operations;
42extern struct file_operations proc_smaps_operations;
43
44extern struct file_operations proc_maps_operations;
45extern struct file_operations proc_numa_maps_operations;
46extern struct file_operations proc_smaps_operations;
47
48
40void free_proc_entry(struct proc_dir_entry *de); 49void free_proc_entry(struct proc_dir_entry *de);
41 50
42int proc_init_inodecache(void); 51int proc_init_inodecache(void);
43 52
44static inline struct task_struct *proc_task(struct inode *inode) 53static inline struct pid *proc_pid(struct inode *inode)
54{
55 return PROC_I(inode)->pid;
56}
57
58static inline struct task_struct *get_proc_task(struct inode *inode)
45{ 59{
46 return PROC_I(inode)->task; 60 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
47} 61}
48 62
49static inline int proc_type(struct inode *inode) 63static inline int proc_fd(struct inode *inode)
50{ 64{
51 return PROC_I(inode)->type; 65 return PROC_I(inode)->fd;
52} 66}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 91b7c15ab373..0137ec4c1368 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,9 +75,13 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
75{ 75{
76 struct vm_area_struct * vma; 76 struct vm_area_struct * vma;
77 int result = -ENOENT; 77 int result = -ENOENT;
78 struct task_struct *task = proc_task(inode); 78 struct task_struct *task = get_proc_task(inode);
79 struct mm_struct * mm = get_task_mm(task); 79 struct mm_struct * mm = NULL;
80 80
81 if (task) {
82 mm = get_task_mm(task);
83 put_task_struct(task);
84 }
81 if (!mm) 85 if (!mm)
82 goto out; 86 goto out;
83 down_read(&mm->mmap_sem); 87 down_read(&mm->mmap_sem);
@@ -120,7 +124,8 @@ struct mem_size_stats
120 124
121static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 125static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
122{ 126{
123 struct task_struct *task = m->private; 127 struct proc_maps_private *priv = m->private;
128 struct task_struct *task = priv->task;
124 struct vm_area_struct *vma = v; 129 struct vm_area_struct *vma = v;
125 struct mm_struct *mm = vma->vm_mm; 130 struct mm_struct *mm = vma->vm_mm;
126 struct file *file = vma->vm_file; 131 struct file *file = vma->vm_file;
@@ -295,12 +300,16 @@ static int show_smap(struct seq_file *m, void *v)
295 300
296static void *m_start(struct seq_file *m, loff_t *pos) 301static void *m_start(struct seq_file *m, loff_t *pos)
297{ 302{
298 struct task_struct *task = m->private; 303 struct proc_maps_private *priv = m->private;
299 unsigned long last_addr = m->version; 304 unsigned long last_addr = m->version;
300 struct mm_struct *mm; 305 struct mm_struct *mm;
301 struct vm_area_struct *vma, *tail_vma; 306 struct vm_area_struct *vma, *tail_vma = NULL;
302 loff_t l = *pos; 307 loff_t l = *pos;
303 308
309 /* Clear the per syscall fields in priv */
310 priv->task = NULL;
311 priv->tail_vma = NULL;
312
304 /* 313 /*
305 * We remember last_addr rather than next_addr to hit with 314 * We remember last_addr rather than next_addr to hit with
306 * mmap_cache most of the time. We have zero last_addr at 315 * mmap_cache most of the time. We have zero last_addr at
@@ -311,11 +320,15 @@ static void *m_start(struct seq_file *m, loff_t *pos)
311 if (last_addr == -1UL) 320 if (last_addr == -1UL)
312 return NULL; 321 return NULL;
313 322
314 mm = get_task_mm(task); 323 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
324 if (!priv->task)
325 return NULL;
326
327 mm = get_task_mm(priv->task);
315 if (!mm) 328 if (!mm)
316 return NULL; 329 return NULL;
317 330
318 tail_vma = get_gate_vma(task); 331 priv->tail_vma = tail_vma = get_gate_vma(priv->task);
319 down_read(&mm->mmap_sem); 332 down_read(&mm->mmap_sem);
320 333
321 /* Start with last addr hint */ 334 /* Start with last addr hint */
@@ -350,11 +363,9 @@ out:
350 return tail_vma; 363 return tail_vma;
351} 364}
352 365
353static void m_stop(struct seq_file *m, void *v) 366static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
354{ 367{
355 struct task_struct *task = m->private; 368 if (vma && vma != priv->tail_vma) {
356 struct vm_area_struct *vma = v;
357 if (vma && vma != get_gate_vma(task)) {
358 struct mm_struct *mm = vma->vm_mm; 369 struct mm_struct *mm = vma->vm_mm;
359 up_read(&mm->mmap_sem); 370 up_read(&mm->mmap_sem);
360 mmput(mm); 371 mmput(mm);
@@ -363,38 +374,103 @@ static void m_stop(struct seq_file *m, void *v)
363 374
364static void *m_next(struct seq_file *m, void *v, loff_t *pos) 375static void *m_next(struct seq_file *m, void *v, loff_t *pos)
365{ 376{
366 struct task_struct *task = m->private; 377 struct proc_maps_private *priv = m->private;
367 struct vm_area_struct *vma = v; 378 struct vm_area_struct *vma = v;
368 struct vm_area_struct *tail_vma = get_gate_vma(task); 379 struct vm_area_struct *tail_vma = priv->tail_vma;
369 380
370 (*pos)++; 381 (*pos)++;
371 if (vma && (vma != tail_vma) && vma->vm_next) 382 if (vma && (vma != tail_vma) && vma->vm_next)
372 return vma->vm_next; 383 return vma->vm_next;
373 m_stop(m, v); 384 vma_stop(priv, vma);
374 return (vma != tail_vma)? tail_vma: NULL; 385 return (vma != tail_vma)? tail_vma: NULL;
375} 386}
376 387
377struct seq_operations proc_pid_maps_op = { 388static void m_stop(struct seq_file *m, void *v)
389{
390 struct proc_maps_private *priv = m->private;
391 struct vm_area_struct *vma = v;
392
393 vma_stop(priv, vma);
394 if (priv->task)
395 put_task_struct(priv->task);
396}
397
398static struct seq_operations proc_pid_maps_op = {
378 .start = m_start, 399 .start = m_start,
379 .next = m_next, 400 .next = m_next,
380 .stop = m_stop, 401 .stop = m_stop,
381 .show = show_map 402 .show = show_map
382}; 403};
383 404
384struct seq_operations proc_pid_smaps_op = { 405static struct seq_operations proc_pid_smaps_op = {
385 .start = m_start, 406 .start = m_start,
386 .next = m_next, 407 .next = m_next,
387 .stop = m_stop, 408 .stop = m_stop,
388 .show = show_smap 409 .show = show_smap
389}; 410};
390 411
412static int do_maps_open(struct inode *inode, struct file *file,
413 struct seq_operations *ops)
414{
415 struct proc_maps_private *priv;
416 int ret = -ENOMEM;
417 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
418 if (priv) {
419 priv->pid = proc_pid(inode);
420 ret = seq_open(file, ops);
421 if (!ret) {
422 struct seq_file *m = file->private_data;
423 m->private = priv;
424 } else {
425 kfree(priv);
426 }
427 }
428 return ret;
429}
430
431static int maps_open(struct inode *inode, struct file *file)
432{
433 return do_maps_open(inode, file, &proc_pid_maps_op);
434}
435
436struct file_operations proc_maps_operations = {
437 .open = maps_open,
438 .read = seq_read,
439 .llseek = seq_lseek,
440 .release = seq_release_private,
441};
442
391#ifdef CONFIG_NUMA 443#ifdef CONFIG_NUMA
392extern int show_numa_map(struct seq_file *m, void *v); 444extern int show_numa_map(struct seq_file *m, void *v);
393 445
394struct seq_operations proc_pid_numa_maps_op = { 446static struct seq_operations proc_pid_numa_maps_op = {
395 .start = m_start, 447 .start = m_start,
396 .next = m_next, 448 .next = m_next,
397 .stop = m_stop, 449 .stop = m_stop,
398 .show = show_numa_map 450 .show = show_numa_map
399}; 451};
452
453static int numa_maps_open(struct inode *inode, struct file *file)
454{
455 return do_maps_open(inode, file, &proc_pid_numa_maps_op);
456}
457
458struct file_operations proc_numa_maps_operations = {
459 .open = numa_maps_open,
460 .read = seq_read,
461 .llseek = seq_lseek,
462 .release = seq_release_private,
463};
400#endif 464#endif
465
466static int smaps_open(struct inode *inode, struct file *file)
467{
468 return do_maps_open(inode, file, &proc_pid_smaps_op);
469}
470
471struct file_operations proc_smaps_operations = {
472 .open = smaps_open,
473 .read = seq_read,
474 .llseek = seq_lseek,
475 .release = seq_release_private,
476};
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8f68827ed10e..af69f28277b6 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -156,9 +156,28 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos)
156{ 156{
157 return NULL; 157 return NULL;
158} 158}
159struct seq_operations proc_pid_maps_op = { 159static struct seq_operations proc_pid_maps_op = {
160 .start = m_start, 160 .start = m_start,
161 .next = m_next, 161 .next = m_next,
162 .stop = m_stop, 162 .stop = m_stop,
163 .show = show_map 163 .show = show_map
164}; 164};
165
166static int maps_open(struct inode *inode, struct file *file)
167{
168 int ret;
169 ret = seq_open(file, &proc_pid_maps_op);
170 if (!ret) {
171 struct seq_file *m = file->private_data;
172 m->private = NULL;
173 }
174 return ret;
175}
176
177struct file_operations proc_maps_operations = {
178 .open = maps_open,
179 .read = seq_read,
180 .llseek = seq_lseek,
181 .release = seq_release,
182};
183
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index cf6e1cf40351..752cea12e30f 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1560,12 +1560,6 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1560 return res; 1560 return res;
1561} 1561}
1562 1562
1563static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
1564 size_t count, loff_t pos)
1565{
1566 return generic_file_aio_write(iocb, buf, count, pos);
1567}
1568
1569const struct file_operations reiserfs_file_operations = { 1563const struct file_operations reiserfs_file_operations = {
1570 .read = generic_file_read, 1564 .read = generic_file_read,
1571 .write = reiserfs_file_write, 1565 .write = reiserfs_file_write,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_file_operations = {
1575 .fsync = reiserfs_sync_file, 1569 .fsync = reiserfs_sync_file,
1576 .sendfile = generic_file_sendfile, 1570 .sendfile = generic_file_sendfile,
1577 .aio_read = generic_file_aio_read, 1571 .aio_read = generic_file_aio_read,
1578 .aio_write = reiserfs_aio_write, 1572 .aio_write = generic_file_aio_write,
1579 .splice_read = generic_file_splice_read, 1573 .splice_read = generic_file_splice_read,
1580 .splice_write = generic_file_splice_write, 1574 .splice_write = generic_file_splice_write,
1581}; 1575};
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1b73529b8099..49d1a53dbef0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -834,8 +834,7 @@ static int write_ordered_buffers(spinlock_t * lock,
834 get_bh(bh); 834 get_bh(bh);
835 if (test_set_buffer_locked(bh)) { 835 if (test_set_buffer_locked(bh)) {
836 if (!buffer_dirty(bh)) { 836 if (!buffer_dirty(bh)) {
837 list_del_init(&jh->list); 837 list_move(&jh->list, &tmp);
838 list_add(&jh->list, &tmp);
839 goto loop_next; 838 goto loop_next;
840 } 839 }
841 spin_unlock(lock); 840 spin_unlock(lock);
@@ -855,8 +854,7 @@ static int write_ordered_buffers(spinlock_t * lock,
855 ret = -EIO; 854 ret = -EIO;
856 } 855 }
857 if (buffer_dirty(bh)) { 856 if (buffer_dirty(bh)) {
858 list_del_init(&jh->list); 857 list_move(&jh->list, &tmp);
859 list_add(&jh->list, &tmp);
860 add_to_chunk(&chunk, bh, lock, write_ordered_chunk); 858 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
861 } else { 859 } else {
862 reiserfs_free_jh(bh); 860 reiserfs_free_jh(bh);
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c71dd2760d32..c8e96195b96e 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -400,8 +400,7 @@ static int smb_request_send_req(struct smb_request *req)
400 if (!(req->rq_flags & SMB_REQ_TRANSMITTED)) 400 if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
401 goto out; 401 goto out;
402 402
403 list_del_init(&req->rq_queue); 403 list_move_tail(&req->rq_queue, &server->recvq);
404 list_add_tail(&req->rq_queue, &server->recvq);
405 result = 1; 404 result = 1;
406out: 405out:
407 return result; 406 return result;
@@ -435,8 +434,7 @@ int smb_request_send_server(struct smb_sb_info *server)
435 result = smb_request_send_req(req); 434 result = smb_request_send_req(req);
436 if (result < 0) { 435 if (result < 0) {
437 server->conn_error = result; 436 server->conn_error = result;
438 list_del_init(&req->rq_queue); 437 list_move(&req->rq_queue, &server->xmitq);
439 list_add(&req->rq_queue, &server->xmitq);
440 result = -EIO; 438 result = -EIO;
441 goto out; 439 goto out;
442 } 440 }
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 3f71384020cb..24577e2c489b 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -193,8 +193,7 @@ int smbiod_retry(struct smb_sb_info *server)
193 if (req->rq_flags & SMB_REQ_RETRY) { 193 if (req->rq_flags & SMB_REQ_RETRY) {
194 /* must move the request to the xmitq */ 194 /* must move the request to the xmitq */
195 VERBOSE("retrying request %p on recvq\n", req); 195 VERBOSE("retrying request %p on recvq\n", req);
196 list_del(&req->rq_queue); 196 list_move(&req->rq_queue, &server->xmitq);
197 list_add(&req->rq_queue, &server->xmitq);
198 continue; 197 continue;
199 } 198 }
200#endif 199#endif
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 610b5bdbe75b..61c42430cba3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -430,10 +430,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
430 i++; 430 i++;
431 /* fallthrough */ 431 /* fallthrough */
432 default: 432 default:
433 if (filp->f_pos == 2) { 433 if (filp->f_pos == 2)
434 list_del(q); 434 list_move(q, &parent_sd->s_children);
435 list_add(q, &parent_sd->s_children); 435
436 }
437 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 436 for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
438 struct sysfs_dirent *next; 437 struct sysfs_dirent *next;
439 const char * name; 438 const char * name;
@@ -455,8 +454,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
455 dt_type(next)) < 0) 454 dt_type(next)) < 0)
456 return 0; 455 return 0;
457 456
458 list_del(q); 457 list_move(q, p);
459 list_add(q, p);
460 p = q; 458 p = q;
461 filp->f_pos++; 459 filp->f_pos++;
462 } 460 }
diff --git a/include/asm-arm/hardware/locomo.h b/include/asm-arm/hardware/locomo.h
index 5f10048ec54e..22dfb1737768 100644
--- a/include/asm-arm/hardware/locomo.h
+++ b/include/asm-arm/hardware/locomo.h
@@ -111,6 +111,8 @@
111#define LOCOMO_ALS 0x00 /* Adjust light cycle */ 111#define LOCOMO_ALS 0x00 /* Adjust light cycle */
112#define LOCOMO_ALD 0x04 /* Adjust light duty */ 112#define LOCOMO_ALD 0x04 /* Adjust light duty */
113 113
114#define LOCOMO_ALC_EN 0x8000
115
114/* Backlight controller: TFT signal */ 116/* Backlight controller: TFT signal */
115#define LOCOMO_BACKLIGHT 0x38 117#define LOCOMO_BACKLIGHT 0x38
116#define LOCOMO_TC 0x00 /* TFT control signal */ 118#define LOCOMO_TC 0x00 /* TFT control signal */
@@ -203,4 +205,7 @@ void locomo_gpio_write(struct locomo_dev *ldev, unsigned int bits, unsigned int
203/* M62332 control function */ 205/* M62332 control function */
204void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int channel); 206void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int channel);
205 207
208/* Frontlight control */
209void locomo_frontlight_set(struct locomo_dev *dev, int duty, int vr, int bpwf);
210
206#endif 211#endif
diff --git a/include/asm-i386/delay.h b/include/asm-i386/delay.h
index 456db8501c09..b1c7650dc7b9 100644
--- a/include/asm-i386/delay.h
+++ b/include/asm-i386/delay.h
@@ -23,4 +23,6 @@ extern void __delay(unsigned long loops);
23 ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ 23 ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
24 __ndelay(n)) 24 __ndelay(n))
25 25
26void use_tsc_delay(void);
27
26#endif /* defined(_I386_DELAY_H) */ 28#endif /* defined(_I386_DELAY_H) */
diff --git a/include/asm-i386/kdebug.h b/include/asm-i386/kdebug.h
index 96d0828ce096..d18cdb9fc9a6 100644
--- a/include/asm-i386/kdebug.h
+++ b/include/asm-i386/kdebug.h
@@ -19,6 +19,8 @@ struct die_args {
19 19
20extern int register_die_notifier(struct notifier_block *); 20extern int register_die_notifier(struct notifier_block *);
21extern int unregister_die_notifier(struct notifier_block *); 21extern int unregister_die_notifier(struct notifier_block *);
22extern int register_page_fault_notifier(struct notifier_block *);
23extern int unregister_page_fault_notifier(struct notifier_block *);
22extern struct atomic_notifier_head i386die_chain; 24extern struct atomic_notifier_head i386die_chain;
23 25
24 26
diff --git a/include/asm-i386/kprobes.h b/include/asm-i386/kprobes.h
index 57d157c5cf89..0730a20f6db8 100644
--- a/include/asm-i386/kprobes.h
+++ b/include/asm-i386/kprobes.h
@@ -44,6 +44,7 @@ typedef u8 kprobe_opcode_t;
44 44
45#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry 45#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry
46#define ARCH_SUPPORTS_KRETPROBES 46#define ARCH_SUPPORTS_KRETPROBES
47#define ARCH_INACTIVE_KPROBE_COUNT 0
47 48
48void arch_remove_kprobe(struct kprobe *p); 49void arch_remove_kprobe(struct kprobe *p);
49void kretprobe_trampoline(void); 50void kretprobe_trampoline(void);
diff --git a/include/asm-i386/mach-default/mach_timer.h b/include/asm-i386/mach-default/mach_timer.h
index 4b9703bb0288..807992fd4171 100644
--- a/include/asm-i386/mach-default/mach_timer.h
+++ b/include/asm-i386/mach-default/mach_timer.h
@@ -15,7 +15,9 @@
15#ifndef _MACH_TIMER_H 15#ifndef _MACH_TIMER_H
16#define _MACH_TIMER_H 16#define _MACH_TIMER_H
17 17
18#define CALIBRATE_LATCH (5 * LATCH) 18#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
19#define CALIBRATE_LATCH \
20 ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
19 21
20static inline void mach_prepare_counter(void) 22static inline void mach_prepare_counter(void)
21{ 23{
diff --git a/include/asm-i386/mach-summit/mach_mpparse.h b/include/asm-i386/mach-summit/mach_mpparse.h
index 1cce2b924a80..94268399170d 100644
--- a/include/asm-i386/mach-summit/mach_mpparse.h
+++ b/include/asm-i386/mach-summit/mach_mpparse.h
@@ -2,6 +2,7 @@
2#define __ASM_MACH_MPPARSE_H 2#define __ASM_MACH_MPPARSE_H
3 3
4#include <mach_apic.h> 4#include <mach_apic.h>
5#include <asm/tsc.h>
5 6
6extern int use_cyclone; 7extern int use_cyclone;
7 8
@@ -29,6 +30,7 @@ static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
29 (!strncmp(productid, "VIGIL SMP", 9) 30 (!strncmp(productid, "VIGIL SMP", 9)
30 || !strncmp(productid, "EXA", 3) 31 || !strncmp(productid, "EXA", 3)
31 || !strncmp(productid, "RUTHLESS SMP", 12))){ 32 || !strncmp(productid, "RUTHLESS SMP", 12))){
33 mark_tsc_unstable();
32 use_cyclone = 1; /*enable cyclone-timer*/ 34 use_cyclone = 1; /*enable cyclone-timer*/
33 setup_summit(); 35 setup_summit();
34 return 1; 36 return 1;
@@ -42,6 +44,7 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
42 if (!strncmp(oem_id, "IBM", 3) && 44 if (!strncmp(oem_id, "IBM", 3) &&
43 (!strncmp(oem_table_id, "SERVIGIL", 8) 45 (!strncmp(oem_table_id, "SERVIGIL", 8)
44 || !strncmp(oem_table_id, "EXA", 3))){ 46 || !strncmp(oem_table_id, "EXA", 3))){
47 mark_tsc_unstable();
45 use_cyclone = 1; /*enable cyclone-timer*/ 48 use_cyclone = 1; /*enable cyclone-timer*/
46 setup_summit(); 49 setup_summit();
47 return 1; 50 return 1;
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h
index aed16437479d..d0ebd05f8516 100644
--- a/include/asm-i386/timer.h
+++ b/include/asm-i386/timer.h
@@ -3,68 +3,11 @@
3#include <linux/init.h> 3#include <linux/init.h>
4#include <linux/pm.h> 4#include <linux/pm.h>
5 5
6/**
7 * struct timer_ops - used to define a timer source
8 *
9 * @name: name of the timer.
10 * @init: Probes and initializes the timer. Takes clock= override
11 * string as an argument. Returns 0 on success, anything else
12 * on failure.
13 * @mark_offset: called by the timer interrupt.
14 * @get_offset: called by gettimeofday(). Returns the number of microseconds
15 * since the last timer interupt.
16 * @monotonic_clock: returns the number of nanoseconds since the init of the
17 * timer.
18 * @delay: delays this many clock cycles.
19 */
20struct timer_opts {
21 char* name;
22 void (*mark_offset)(void);
23 unsigned long (*get_offset)(void);
24 unsigned long long (*monotonic_clock)(void);
25 void (*delay)(unsigned long);
26 unsigned long (*read_timer)(void);
27 int (*suspend)(pm_message_t state);
28 int (*resume)(void);
29};
30
31struct init_timer_opts {
32 int (*init)(char *override);
33 struct timer_opts *opts;
34};
35
36#define TICK_SIZE (tick_nsec / 1000) 6#define TICK_SIZE (tick_nsec / 1000)
37
38extern struct timer_opts* __init select_timer(void);
39extern void clock_fallback(void);
40void setup_pit_timer(void); 7void setup_pit_timer(void);
41
42/* Modifiers for buggy PIT handling */ 8/* Modifiers for buggy PIT handling */
43
44extern int pit_latch_buggy; 9extern int pit_latch_buggy;
45
46extern struct timer_opts *cur_timer;
47extern int timer_ack; 10extern int timer_ack;
48
49/* list of externed timers */
50extern struct timer_opts timer_none;
51extern struct timer_opts timer_pit;
52extern struct init_timer_opts timer_pit_init;
53extern struct init_timer_opts timer_tsc_init;
54#ifdef CONFIG_X86_CYCLONE_TIMER
55extern struct init_timer_opts timer_cyclone_init;
56#endif
57
58extern unsigned long calibrate_tsc(void);
59extern unsigned long read_timer_tsc(void);
60extern void init_cpu_khz(void);
61extern int recalibrate_cpu_khz(void); 11extern int recalibrate_cpu_khz(void);
62#ifdef CONFIG_HPET_TIMER
63extern struct init_timer_opts timer_hpet_init;
64extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr);
65#endif
66 12
67#ifdef CONFIG_X86_PM_TIMER
68extern struct init_timer_opts timer_pmtmr_init;
69#endif
70#endif 13#endif
diff --git a/include/asm-i386/timex.h b/include/asm-i386/timex.h
index d434984303ca..3666044409f0 100644
--- a/include/asm-i386/timex.h
+++ b/include/asm-i386/timex.h
@@ -7,6 +7,7 @@
7#define _ASMi386_TIMEX_H 7#define _ASMi386_TIMEX_H
8 8
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/tsc.h>
10 11
11#ifdef CONFIG_X86_ELAN 12#ifdef CONFIG_X86_ELAN
12# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */ 13# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
@@ -15,39 +16,6 @@
15#endif 16#endif
16 17
17 18
18/*
19 * Standard way to access the cycle counter on i586+ CPUs.
20 * Currently only used on SMP.
21 *
22 * If you really have a SMP machine with i486 chips or older,
23 * compile for that, and this will just always return zero.
24 * That's ok, it just means that the nicer scheduling heuristics
25 * won't work for you.
26 *
27 * We only use the low 32 bits, and we'd simply better make sure
28 * that we reschedule before that wraps. Scheduling at least every
29 * four billion cycles just basically sounds like a good idea,
30 * regardless of how fast the machine is.
31 */
32typedef unsigned long long cycles_t;
33
34static inline cycles_t get_cycles (void)
35{
36 unsigned long long ret=0;
37
38#ifndef CONFIG_X86_TSC
39 if (!cpu_has_tsc)
40 return 0;
41#endif
42
43#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
44 rdtscll(ret);
45#endif
46 return ret;
47}
48
49extern unsigned int cpu_khz;
50
51extern int read_current_timer(unsigned long *timer_value); 19extern int read_current_timer(unsigned long *timer_value);
52#define ARCH_HAS_READ_CURRENT_TIMER 1 20#define ARCH_HAS_READ_CURRENT_TIMER 1
53 21
diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h
new file mode 100644
index 000000000000..97b828ce31e0
--- /dev/null
+++ b/include/asm-i386/tsc.h
@@ -0,0 +1,49 @@
1/*
2 * linux/include/asm-i386/tsc.h
3 *
4 * i386 TSC related functions
5 */
6#ifndef _ASM_i386_TSC_H
7#define _ASM_i386_TSC_H
8
9#include <linux/config.h>
10#include <asm/processor.h>
11
12/*
13 * Standard way to access the cycle counter on i586+ CPUs.
14 * Currently only used on SMP.
15 *
16 * If you really have a SMP machine with i486 chips or older,
17 * compile for that, and this will just always return zero.
18 * That's ok, it just means that the nicer scheduling heuristics
19 * won't work for you.
20 *
21 * We only use the low 32 bits, and we'd simply better make sure
22 * that we reschedule before that wraps. Scheduling at least every
23 * four billion cycles just basically sounds like a good idea,
24 * regardless of how fast the machine is.
25 */
26typedef unsigned long long cycles_t;
27
28extern unsigned int cpu_khz;
29extern unsigned int tsc_khz;
30
31static inline cycles_t get_cycles(void)
32{
33 unsigned long long ret = 0;
34
35#ifndef CONFIG_X86_TSC
36 if (!cpu_has_tsc)
37 return 0;
38#endif
39
40#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
41 rdtscll(ret);
42#endif
43 return ret;
44}
45
46extern void tsc_init(void);
47extern void mark_tsc_unstable(void);
48
49#endif
diff --git a/include/asm-ia64/kdebug.h b/include/asm-ia64/kdebug.h
index c195a9ad1255..aed7142f9e4a 100644
--- a/include/asm-ia64/kdebug.h
+++ b/include/asm-ia64/kdebug.h
@@ -40,6 +40,8 @@ struct die_args {
40 40
41extern int register_die_notifier(struct notifier_block *); 41extern int register_die_notifier(struct notifier_block *);
42extern int unregister_die_notifier(struct notifier_block *); 42extern int unregister_die_notifier(struct notifier_block *);
43extern int register_page_fault_notifier(struct notifier_block *);
44extern int unregister_page_fault_notifier(struct notifier_block *);
43extern struct atomic_notifier_head ia64die_chain; 45extern struct atomic_notifier_head ia64die_chain;
44 46
45enum die_val { 47enum die_val {
diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
index 8c0fc227f0fb..2418a787c405 100644
--- a/include/asm-ia64/kprobes.h
+++ b/include/asm-ia64/kprobes.h
@@ -82,6 +82,7 @@ struct kprobe_ctlblk {
82#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry 82#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry
83 83
84#define ARCH_SUPPORTS_KRETPROBES 84#define ARCH_SUPPORTS_KRETPROBES
85#define ARCH_INACTIVE_KPROBE_COUNT 1
85 86
86#define SLOT0_OPCODE_SHIFT (37) 87#define SLOT0_OPCODE_SHIFT (37)
87#define SLOT1_p1_OPCODE_SHIFT (37 - (64-46)) 88#define SLOT1_p1_OPCODE_SHIFT (37 - (64-46))
diff --git a/include/asm-powerpc/kdebug.h b/include/asm-powerpc/kdebug.h
index c01786ab5fa6..532bfee934f4 100644
--- a/include/asm-powerpc/kdebug.h
+++ b/include/asm-powerpc/kdebug.h
@@ -18,6 +18,8 @@ struct die_args {
18 18
19extern int register_die_notifier(struct notifier_block *); 19extern int register_die_notifier(struct notifier_block *);
20extern int unregister_die_notifier(struct notifier_block *); 20extern int unregister_die_notifier(struct notifier_block *);
21extern int register_page_fault_notifier(struct notifier_block *);
22extern int unregister_page_fault_notifier(struct notifier_block *);
21extern struct atomic_notifier_head powerpc_die_chain; 23extern struct atomic_notifier_head powerpc_die_chain;
22 24
23/* Grossly misnamed. */ 25/* Grossly misnamed. */
diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h
index f466bc804f41..2d0af52c823d 100644
--- a/include/asm-powerpc/kprobes.h
+++ b/include/asm-powerpc/kprobes.h
@@ -50,6 +50,8 @@ typedef unsigned int kprobe_opcode_t;
50 IS_TWI(instr) || IS_TDI(instr)) 50 IS_TWI(instr) || IS_TDI(instr))
51 51
52#define ARCH_SUPPORTS_KRETPROBES 52#define ARCH_SUPPORTS_KRETPROBES
53#define ARCH_INACTIVE_KPROBE_COUNT 1
54
53void kretprobe_trampoline(void); 55void kretprobe_trampoline(void);
54extern void arch_remove_kprobe(struct kprobe *p); 56extern void arch_remove_kprobe(struct kprobe *p);
55 57
diff --git a/include/asm-sparc64/kdebug.h b/include/asm-sparc64/kdebug.h
index 4040d127ac3e..11251bdd00cb 100644
--- a/include/asm-sparc64/kdebug.h
+++ b/include/asm-sparc64/kdebug.h
@@ -17,6 +17,8 @@ struct die_args {
17 17
18extern int register_die_notifier(struct notifier_block *); 18extern int register_die_notifier(struct notifier_block *);
19extern int unregister_die_notifier(struct notifier_block *); 19extern int unregister_die_notifier(struct notifier_block *);
20extern int register_page_fault_notifier(struct notifier_block *);
21extern int unregister_page_fault_notifier(struct notifier_block *);
20extern struct atomic_notifier_head sparc64die_chain; 22extern struct atomic_notifier_head sparc64die_chain;
21 23
22extern void bad_trap(struct pt_regs *, long); 24extern void bad_trap(struct pt_regs *, long);
diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h
index e9bb26f770ed..15065af566c2 100644
--- a/include/asm-sparc64/kprobes.h
+++ b/include/asm-sparc64/kprobes.h
@@ -12,6 +12,7 @@ typedef u32 kprobe_opcode_t;
12 12
13#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry 13#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry
14#define arch_remove_kprobe(p) do {} while (0) 14#define arch_remove_kprobe(p) do {} while (0)
15#define ARCH_INACTIVE_KPROBE_COUNT 0
15 16
16/* Architecture specific copy of original instruction*/ 17/* Architecture specific copy of original instruction*/
17struct arch_specific_insn { 18struct arch_specific_insn {
diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
index cf795631d9b4..cd52c7f33bca 100644
--- a/include/asm-x86_64/kdebug.h
+++ b/include/asm-x86_64/kdebug.h
@@ -15,6 +15,8 @@ struct die_args {
15 15
16extern int register_die_notifier(struct notifier_block *); 16extern int register_die_notifier(struct notifier_block *);
17extern int unregister_die_notifier(struct notifier_block *); 17extern int unregister_die_notifier(struct notifier_block *);
18extern int register_page_fault_notifier(struct notifier_block *);
19extern int unregister_page_fault_notifier(struct notifier_block *);
18extern struct atomic_notifier_head die_chain; 20extern struct atomic_notifier_head die_chain;
19 21
20/* Grossly misnamed. */ 22/* Grossly misnamed. */
diff --git a/include/asm-x86_64/kprobes.h b/include/asm-x86_64/kprobes.h
index 98a1e95ddb98..d36febd9bb18 100644
--- a/include/asm-x86_64/kprobes.h
+++ b/include/asm-x86_64/kprobes.h
@@ -43,6 +43,7 @@ typedef u8 kprobe_opcode_t;
43 43
44#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry 44#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry
45#define ARCH_SUPPORTS_KRETPROBES 45#define ARCH_SUPPORTS_KRETPROBES
46#define ARCH_INACTIVE_KPROBE_COUNT 1
46 47
47void kretprobe_trampoline(void); 48void kretprobe_trampoline(void);
48extern void arch_remove_kprobe(struct kprobe *p); 49extern void arch_remove_kprobe(struct kprobe *p);
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index a3dae1803f45..c37c34275a44 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -37,6 +37,7 @@ extern struct key_type key_type_user;
37extern int user_instantiate(struct key *key, const void *data, size_t datalen); 37extern int user_instantiate(struct key *key, const void *data, size_t datalen);
38extern int user_update(struct key *key, const void *data, size_t datalen); 38extern int user_update(struct key *key, const void *data, size_t datalen);
39extern int user_match(const struct key *key, const void *criterion); 39extern int user_match(const struct key *key, const void *criterion);
40extern void user_revoke(struct key *key);
40extern void user_destroy(struct key *key); 41extern void user_destroy(struct key *key);
41extern void user_describe(const struct key *user, struct seq_file *m); 42extern void user_describe(const struct key *user, struct seq_file *m);
42extern long user_read(const struct key *key, 43extern long user_read(const struct key *key,
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
new file mode 100644
index 000000000000..d852024ed095
--- /dev/null
+++ b/include/linux/clocksource.h
@@ -0,0 +1,185 @@
1/* linux/include/linux/clocksource.h
2 *
3 * This file contains the structure definitions for clocksources.
4 *
5 * If you are not a clocksource, or timekeeping code, you should
6 * not be including this file!
7 */
8#ifndef _LINUX_CLOCKSOURCE_H
9#define _LINUX_CLOCKSOURCE_H
10
11#include <linux/types.h>
12#include <linux/timex.h>
13#include <linux/time.h>
14#include <linux/list.h>
15#include <asm/div64.h>
16#include <asm/io.h>
17
18/* clocksource cycle base type */
19typedef u64 cycle_t;
20
21/**
22 * struct clocksource - hardware abstraction for a free running counter
23 * Provides mostly state-free accessors to the underlying hardware.
24 *
25 * @name: ptr to clocksource name
26 * @list: list head for registration
27 * @rating: rating value for selection (higher is better)
28 * To avoid rating inflation the following
29 * list should give you a guide as to how
30 * to assign your clocksource a rating
31 * 1-99: Unfit for real use
32 * Only available for bootup and testing purposes.
33 * 100-199: Base level usability.
34 * Functional for real use, but not desired.
35 * 200-299: Good.
36 * A correct and usable clocksource.
37 * 300-399: Desired.
38 * A reasonably fast and accurate clocksource.
39 * 400-499: Perfect
40 * The ideal clocksource. A must-use where
41 * available.
42 * @read: returns a cycle value
43 * @mask: bitmask for two's complement
44 * subtraction of non 64 bit counters
45 * @mult: cycle to nanosecond multiplier
46 * @shift: cycle to nanosecond divisor (power of two)
47 * @update_callback: called when safe to alter clocksource values
48 * @is_continuous: defines if clocksource is free-running.
49 * @cycle_interval: Used internally by timekeeping core, please ignore.
50 * @xtime_interval: Used internally by timekeeping core, please ignore.
51 */
52struct clocksource {
53 char *name;
54 struct list_head list;
55 int rating;
56 cycle_t (*read)(void);
57 cycle_t mask;
58 u32 mult;
59 u32 shift;
60 int (*update_callback)(void);
61 int is_continuous;
62
63 /* timekeeping specific data, ignore */
64 cycle_t cycle_last, cycle_interval;
65 u64 xtime_nsec, xtime_interval;
66 s64 error;
67};
68
69/* simplify initialization of mask field */
70#define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL<<bits)-1) : -1)
71
72/**
73 * clocksource_khz2mult - calculates mult from khz and shift
74 * @khz: Clocksource frequency in KHz
75 * @shift_constant: Clocksource shift factor
76 *
77 * Helper functions that converts a khz counter frequency to a timsource
78 * multiplier, given the clocksource shift value
79 */
80static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant)
81{
82 /* khz = cyc/(Million ns)
83 * mult/2^shift = ns/cyc
84 * mult = ns/cyc * 2^shift
85 * mult = 1Million/khz * 2^shift
86 * mult = 1000000 * 2^shift / khz
87 * mult = (1000000<<shift) / khz
88 */
89 u64 tmp = ((u64)1000000) << shift_constant;
90
91 tmp += khz/2; /* round for do_div */
92 do_div(tmp, khz);
93
94 return (u32)tmp;
95}
96
97/**
98 * clocksource_hz2mult - calculates mult from hz and shift
99 * @hz: Clocksource frequency in Hz
100 * @shift_constant: Clocksource shift factor
101 *
102 * Helper functions that converts a hz counter
103 * frequency to a timsource multiplier, given the
104 * clocksource shift value
105 */
106static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
107{
108 /* hz = cyc/(Billion ns)
109 * mult/2^shift = ns/cyc
110 * mult = ns/cyc * 2^shift
111 * mult = 1Billion/hz * 2^shift
112 * mult = 1000000000 * 2^shift / hz
113 * mult = (1000000000<<shift) / hz
114 */
115 u64 tmp = ((u64)1000000000) << shift_constant;
116
117 tmp += hz/2; /* round for do_div */
118 do_div(tmp, hz);
119
120 return (u32)tmp;
121}
122
123/**
124 * clocksource_read: - Access the clocksource's current cycle value
125 * @cs: pointer to clocksource being read
126 *
127 * Uses the clocksource to return the current cycle_t value
128 */
129static inline cycle_t clocksource_read(struct clocksource *cs)
130{
131 return cs->read();
132}
133
134/**
135 * cyc2ns - converts clocksource cycles to nanoseconds
136 * @cs: Pointer to clocksource
137 * @cycles: Cycles
138 *
139 * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds.
140 *
141 * XXX - This could use some mult_lxl_ll() asm optimization
142 */
143static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles)
144{
145 u64 ret = (u64)cycles;
146 ret = (ret * cs->mult) >> cs->shift;
147 return ret;
148}
149
150/**
151 * clocksource_calculate_interval - Calculates a clocksource interval struct
152 *
153 * @c: Pointer to clocksource.
154 * @length_nsec: Desired interval length in nanoseconds.
155 *
156 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
157 * pair and interval request.
158 *
159 * Unless you're the timekeeping code, you should not be using this!
160 */
161static inline void clocksource_calculate_interval(struct clocksource *c,
162 unsigned long length_nsec)
163{
164 u64 tmp;
165
166 /* XXX - All of this could use a whole lot of optimization */
167 tmp = length_nsec;
168 tmp <<= c->shift;
169 tmp += c->mult/2;
170 do_div(tmp, c->mult);
171
172 c->cycle_interval = (cycle_t)tmp;
173 if (c->cycle_interval == 0)
174 c->cycle_interval = 1;
175
176 c->xtime_interval = (u64)c->cycle_interval * c->mult;
177}
178
179
180/* used to install a new clocksource */
181int clocksource_register(struct clocksource*);
182void clocksource_reselect(void);
183struct clocksource* clocksource_get_next(void);
184
185#endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 89ab677cb993..917d62e41480 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -673,6 +673,11 @@ COMPATIBLE_IOCTL(CAPI_SET_FLAGS)
673COMPATIBLE_IOCTL(CAPI_CLR_FLAGS) 673COMPATIBLE_IOCTL(CAPI_CLR_FLAGS)
674COMPATIBLE_IOCTL(CAPI_NCCI_OPENCOUNT) 674COMPATIBLE_IOCTL(CAPI_NCCI_OPENCOUNT)
675COMPATIBLE_IOCTL(CAPI_NCCI_GETUNIT) 675COMPATIBLE_IOCTL(CAPI_NCCI_GETUNIT)
676/* Siemens Gigaset */
677COMPATIBLE_IOCTL(GIGASET_REDIR)
678COMPATIBLE_IOCTL(GIGASET_CONFIG)
679COMPATIBLE_IOCTL(GIGASET_BRKCHARS)
680COMPATIBLE_IOCTL(GIGASET_VERSION)
676/* Misc. */ 681/* Misc. */
677COMPATIBLE_IOCTL(0x41545900) /* ATYIO_CLKR */ 682COMPATIBLE_IOCTL(0x41545900) /* ATYIO_CLKR */
678COMPATIBLE_IOCTL(0x41545901) /* ATYIO_CLKW */ 683COMPATIBLE_IOCTL(0x41545901) /* ATYIO_CLKW */
diff --git a/include/linux/console.h b/include/linux/console.h
index d0f8a8009490..3bdf2155e565 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -63,9 +63,11 @@ extern const struct consw vga_con; /* VGA text console */
63extern const struct consw newport_con; /* SGI Newport console */ 63extern const struct consw newport_con; /* SGI Newport console */
64extern const struct consw prom_con; /* SPARC PROM console */ 64extern const struct consw prom_con; /* SPARC PROM console */
65 65
66int con_is_bound(const struct consw *csw);
67int register_con_driver(const struct consw *csw, int first, int last);
68int unregister_con_driver(const struct consw *csw);
66int take_over_console(const struct consw *sw, int first, int last, int deflt); 69int take_over_console(const struct consw *sw, int first, int last, int deflt);
67void give_up_console(const struct consw *sw); 70void give_up_console(const struct consw *sw);
68
69/* scroll */ 71/* scroll */
70#define SM_UP (1) 72#define SM_UP (1)
71#define SM_DOWN (2) 73#define SM_DOWN (2)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index aee10b2ea4c6..e3d1c33d1558 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -8,9 +8,12 @@
8#ifndef _LINUX_DEVICE_MAPPER_H 8#ifndef _LINUX_DEVICE_MAPPER_H
9#define _LINUX_DEVICE_MAPPER_H 9#define _LINUX_DEVICE_MAPPER_H
10 10
11#ifdef __KERNEL__
12
11struct dm_target; 13struct dm_target;
12struct dm_table; 14struct dm_table;
13struct dm_dev; 15struct dm_dev;
16struct mapped_device;
14 17
15typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; 18typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
16 19
@@ -78,7 +81,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d);
78struct target_type { 81struct target_type {
79 const char *name; 82 const char *name;
80 struct module *module; 83 struct module *module;
81 unsigned version[3]; 84 unsigned version[3];
82 dm_ctr_fn ctr; 85 dm_ctr_fn ctr;
83 dm_dtr_fn dtr; 86 dm_dtr_fn dtr;
84 dm_map_fn map; 87 dm_map_fn map;
@@ -128,4 +131,108 @@ struct dm_target {
128int dm_register_target(struct target_type *t); 131int dm_register_target(struct target_type *t);
129int dm_unregister_target(struct target_type *t); 132int dm_unregister_target(struct target_type *t);
130 133
131#endif /* _LINUX_DEVICE_MAPPER_H */ 134
135/*-----------------------------------------------------------------
136 * Functions for creating and manipulating mapped devices.
137 * Drop the reference with dm_put when you finish with the object.
138 *---------------------------------------------------------------*/
139
140/*
141 * DM_ANY_MINOR chooses the next available minor number.
142 */
143#define DM_ANY_MINOR (-1)
144int dm_create(int minor, struct mapped_device **md);
145
146/*
147 * Reference counting for md.
148 */
149struct mapped_device *dm_get_md(dev_t dev);
150void dm_get(struct mapped_device *md);
151void dm_put(struct mapped_device *md);
152
153/*
154 * An arbitrary pointer may be stored alongside a mapped device.
155 */
156void dm_set_mdptr(struct mapped_device *md, void *ptr);
157void *dm_get_mdptr(struct mapped_device *md);
158
159/*
160 * A device can still be used while suspended, but I/O is deferred.
161 */
162int dm_suspend(struct mapped_device *md, int with_lockfs);
163int dm_resume(struct mapped_device *md);
164
165/*
166 * Event functions.
167 */
168uint32_t dm_get_event_nr(struct mapped_device *md);
169int dm_wait_event(struct mapped_device *md, int event_nr);
170
171/*
172 * Info functions.
173 */
174const char *dm_device_name(struct mapped_device *md);
175struct gendisk *dm_disk(struct mapped_device *md);
176int dm_suspended(struct mapped_device *md);
177
178/*
179 * Geometry functions.
180 */
181int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
182int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
183
184
185/*-----------------------------------------------------------------
186 * Functions for manipulating device-mapper tables.
187 *---------------------------------------------------------------*/
188
189/*
190 * First create an empty table.
191 */
192int dm_table_create(struct dm_table **result, int mode,
193 unsigned num_targets, struct mapped_device *md);
194
195/*
196 * Then call this once for each target.
197 */
198int dm_table_add_target(struct dm_table *t, const char *type,
199 sector_t start, sector_t len, char *params);
200
201/*
202 * Finally call this to make the table ready for use.
203 */
204int dm_table_complete(struct dm_table *t);
205
206/*
207 * Table reference counting.
208 */
209struct dm_table *dm_get_table(struct mapped_device *md);
210void dm_table_get(struct dm_table *t);
211void dm_table_put(struct dm_table *t);
212
213/*
214 * Queries
215 */
216sector_t dm_table_get_size(struct dm_table *t);
217unsigned int dm_table_get_num_targets(struct dm_table *t);
218int dm_table_get_mode(struct dm_table *t);
219struct mapped_device *dm_table_get_md(struct dm_table *t);
220
221/*
222 * Trigger an event.
223 */
224void dm_table_event(struct dm_table *t);
225
226/*
227 * The device must be suspended before calling this method.
228 */
229int dm_swap_table(struct mapped_device *md, struct dm_table *t);
230
231/*
232 * Prepare a table for a device that will error all I/O.
233 * To make it active, call dm_suspend(), dm_swap_table() then dm_resume().
234 */
235int dm_create_error_table(struct dm_table **result, struct mapped_device *md);
236
237#endif /* __KERNEL__ */
238#endif /* _LINUX_DEVICE_MAPPER_H */
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index c67c6786612a..9623bb625090 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -285,9 +285,9 @@ typedef char ioctl_struct[308];
285#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 285#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
286 286
287#define DM_VERSION_MAJOR 4 287#define DM_VERSION_MAJOR 4
288#define DM_VERSION_MINOR 6 288#define DM_VERSION_MINOR 7
289#define DM_VERSION_PATCHLEVEL 0 289#define DM_VERSION_PATCHLEVEL 0
290#define DM_VERSION_EXTRA "-ioctl (2006-02-17)" 290#define DM_VERSION_EXTRA "-ioctl (2006-06-24)"
291 291
292/* Status bits */ 292/* Status bits */
293#define DM_READONLY_FLAG (1 << 0) /* In/Out */ 293#define DM_READONLY_FLAG (1 << 0) /* In/Out */
@@ -314,7 +314,7 @@ typedef char ioctl_struct[308];
314#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */ 314#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */
315 315
316/* 316/*
317 * Set this to improve performance when you aren't going to use open_count. 317 * This flag is now ignored.
318 */ 318 */
319#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */ 319#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */
320 320
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f1281687e549..07a08e92bc73 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -504,23 +504,19 @@ struct fb_cursor_user {
504#define FB_EVENT_MODE_DELETE 0x04 504#define FB_EVENT_MODE_DELETE 0x04
505/* A driver registered itself */ 505/* A driver registered itself */
506#define FB_EVENT_FB_REGISTERED 0x05 506#define FB_EVENT_FB_REGISTERED 0x05
507/* A driver unregistered itself */
508#define FB_EVENT_FB_UNREGISTERED 0x06
507/* CONSOLE-SPECIFIC: get console to framebuffer mapping */ 509/* CONSOLE-SPECIFIC: get console to framebuffer mapping */
508#define FB_EVENT_GET_CONSOLE_MAP 0x06 510#define FB_EVENT_GET_CONSOLE_MAP 0x07
509/* CONSOLE-SPECIFIC: set console to framebuffer mapping */ 511/* CONSOLE-SPECIFIC: set console to framebuffer mapping */
510#define FB_EVENT_SET_CONSOLE_MAP 0x07 512#define FB_EVENT_SET_CONSOLE_MAP 0x08
511/* A display blank is requested */ 513/* A display blank is requested */
512#define FB_EVENT_BLANK 0x08 514#define FB_EVENT_BLANK 0x09
513/* Private modelist is to be replaced */ 515/* Private modelist is to be replaced */
514#define FB_EVENT_NEW_MODELIST 0x09 516#define FB_EVENT_NEW_MODELIST 0x0A
515/* The resolution of the passed in fb_info about to change and 517/* The resolution of the passed in fb_info about to change and
516 all vc's should be changed */ 518 all vc's should be changed */
517#define FB_EVENT_MODE_CHANGE_ALL 0x0A 519#define FB_EVENT_MODE_CHANGE_ALL 0x0B
518/* CONSOLE-SPECIFIC: set console rotation */
519#define FB_EVENT_SET_CON_ROTATE 0x0B
520/* CONSOLE-SPECIFIC: get console rotation */
521#define FB_EVENT_GET_CON_ROTATE 0x0C
522/* CONSOLE-SPECIFIC: rotate all consoles */
523#define FB_EVENT_SET_CON_ROTATE_ALL 0x0D
524 520
525struct fb_event { 521struct fb_event {
526 struct fb_info *info; 522 struct fb_info *info;
@@ -892,7 +888,6 @@ extern int fb_get_color_depth(struct fb_var_screeninfo *var,
892 struct fb_fix_screeninfo *fix); 888 struct fb_fix_screeninfo *fix);
893extern int fb_get_options(char *name, char **option); 889extern int fb_get_options(char *name, char **option);
894extern int fb_new_modelist(struct fb_info *info); 890extern int fb_new_modelist(struct fb_info *info);
895extern int fb_con_duit(struct fb_info *info, int event, void *data);
896 891
897extern struct fb_info *registered_fb[FB_MAX]; 892extern struct fb_info *registered_fb[FB_MAX];
898extern int num_registered_fb; 893extern int num_registered_fb;
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
new file mode 100644
index 000000000000..21ea7610e177
--- /dev/null
+++ b/include/linux/hw_random.h
@@ -0,0 +1,50 @@
1/*
2 Hardware Random Number Generator
3
4 Please read Documentation/hw_random.txt for details on use.
5
6 ----------------------------------------------------------
7 This software may be used and distributed according to the terms
8 of the GNU General Public License, incorporated herein by reference.
9
10 */
11
12#ifndef LINUX_HWRANDOM_H_
13#define LINUX_HWRANDOM_H_
14#ifdef __KERNEL__
15
16#include <linux/types.h>
17#include <linux/list.h>
18
19/**
20 * struct hwrng - Hardware Random Number Generator driver
21 * @name: Unique RNG name.
22 * @init: Initialization callback (can be NULL).
23 * @cleanup: Cleanup callback (can be NULL).
24 * @data_present: Callback to determine if data is available
25 * on the RNG. If NULL, it is assumed that
26 * there is always data available.
27 * @data_read: Read data from the RNG device.
28 * Returns the number of lower random bytes in "data".
29 * Must not be NULL.
30 * @priv: Private data, for use by the RNG driver.
31 */
32struct hwrng {
33 const char *name;
34 int (*init)(struct hwrng *rng);
35 void (*cleanup)(struct hwrng *rng);
36 int (*data_present)(struct hwrng *rng);
37 int (*data_read)(struct hwrng *rng, u32 *data);
38 unsigned long priv;
39
40 /* internal. */
41 struct list_head list;
42};
43
44/** Register a new Hardware Random Number Generator driver. */
45extern int hwrng_register(struct hwrng *rng);
46/** Unregister a Hardware Random Number Generator driver. */
47extern void hwrng_unregister(struct hwrng *rng);
48
49#endif /* __KERNEL__ */
50#endif /* LINUX_HWRANDOM_H_ */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index d37c8d808b0f..f559a719dbe8 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -78,6 +78,7 @@ void *idr_find(struct idr *idp, int id);
78int idr_pre_get(struct idr *idp, gfp_t gfp_mask); 78int idr_pre_get(struct idr *idp, gfp_t gfp_mask);
79int idr_get_new(struct idr *idp, void *ptr, int *id); 79int idr_get_new(struct idr *idp, void *ptr, int *id);
80int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); 80int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
81void *idr_replace(struct idr *idp, void *ptr, int id);
81void idr_remove(struct idr *idp, int id); 82void idr_remove(struct idr *idp, int id);
82void idr_destroy(struct idr *idp); 83void idr_destroy(struct idr *idp);
83void idr_init(struct idr *idp); 84void idr_init(struct idr *idp);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 41ecbb847f32..e127ef7e8da8 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -119,7 +119,6 @@ extern struct group_info init_groups;
119 .signal = {{0}}}, \ 119 .signal = {{0}}}, \
120 .blocked = {{0}}, \ 120 .blocked = {{0}}, \
121 .alloc_lock = SPIN_LOCK_UNLOCKED, \ 121 .alloc_lock = SPIN_LOCK_UNLOCKED, \
122 .proc_lock = SPIN_LOCK_UNLOCKED, \
123 .journal_info = NULL, \ 122 .journal_info = NULL, \
124 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 123 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
125 .fs_excl = ATOMIC_INIT(0), \ 124 .fs_excl = ATOMIC_INIT(0), \
diff --git a/include/linux/key.h b/include/linux/key.h
index e81ebf910d0b..e693e729bc92 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -248,7 +248,14 @@ extern struct key *key_alloc(struct key_type *type,
248 const char *desc, 248 const char *desc,
249 uid_t uid, gid_t gid, 249 uid_t uid, gid_t gid,
250 struct task_struct *ctx, 250 struct task_struct *ctx,
251 key_perm_t perm, int not_in_quota); 251 key_perm_t perm,
252 unsigned long flags);
253
254
255#define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */
256#define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */
257#define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */
258
252extern int key_payload_reserve(struct key *key, size_t datalen); 259extern int key_payload_reserve(struct key *key, size_t datalen);
253extern int key_instantiate_and_link(struct key *key, 260extern int key_instantiate_and_link(struct key *key,
254 const void *data, 261 const void *data,
@@ -285,7 +292,7 @@ extern key_ref_t key_create_or_update(key_ref_t keyring,
285 const char *description, 292 const char *description,
286 const void *payload, 293 const void *payload,
287 size_t plen, 294 size_t plen,
288 int not_in_quota); 295 unsigned long flags);
289 296
290extern int key_update(key_ref_t key, 297extern int key_update(key_ref_t key,
291 const void *payload, 298 const void *payload,
@@ -299,7 +306,7 @@ extern int key_unlink(struct key *keyring,
299 306
300extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, 307extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
301 struct task_struct *ctx, 308 struct task_struct *ctx,
302 int not_in_quota, 309 unsigned long flags,
303 struct key *dest); 310 struct key *dest);
304 311
305extern int keyring_clear(struct key *keyring); 312extern int keyring_clear(struct key *keyring);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bc747e5d7138..03cd7551a7a1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -699,7 +699,6 @@ extern int dev_hard_start_xmit(struct sk_buff *skb,
699 699
700extern void dev_init(void); 700extern void dev_init(void);
701 701
702extern int netdev_nit;
703extern int netdev_budget; 702extern int netdev_budget;
704 703
705/* Called by rtnetlink.c:rtnl_unlock() */ 704/* Called by rtnetlink.c:rtnl_unlock() */
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index ca5a8733000f..1efe60c5c00c 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -31,6 +31,7 @@ struct netpoll_info {
31 int rx_flags; 31 int rx_flags;
32 spinlock_t rx_lock; 32 spinlock_t rx_lock;
33 struct netpoll *rx_np; /* netpoll that registered an rx_hook */ 33 struct netpoll *rx_np; /* netpoll that registered an rx_hook */
34 struct sk_buff_head arp_tx; /* list of arp requests to reply to */
34}; 35};
35 36
36void netpoll_poll(struct netpoll *np); 37void netpoll_poll(struct netpoll *np);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 5810d28fbed9..17e75783e3a5 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -99,9 +99,8 @@ extern void proc_misc_init(void);
99 99
100struct mm_struct; 100struct mm_struct;
101 101
102void proc_flush_task(struct task_struct *task);
102struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); 103struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
103struct dentry *proc_pid_unhash(struct task_struct *p);
104void proc_pid_flush(struct dentry *proc_dentry);
105int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); 104int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
106unsigned long task_vsize(struct mm_struct *); 105unsigned long task_vsize(struct mm_struct *);
107int task_statm(struct mm_struct *, int *, int *, int *, int *); 106int task_statm(struct mm_struct *, int *, int *, int *, int *);
@@ -211,8 +210,7 @@ static inline void proc_net_remove(const char *name)
211#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) 210#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; })
212static inline void proc_net_remove(const char *name) {} 211static inline void proc_net_remove(const char *name) {}
213 212
214static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; } 213static inline void proc_flush_task(struct task_struct *task) { }
215static inline void proc_pid_flush(struct dentry *proc_dentry) { }
216 214
217static inline struct proc_dir_entry *create_proc_entry(const char *name, 215static inline struct proc_dir_entry *create_proc_entry(const char *name,
218 mode_t mode, struct proc_dir_entry *parent) { return NULL; } 216 mode_t mode, struct proc_dir_entry *parent) { return NULL; }
@@ -248,8 +246,8 @@ extern void kclist_add(struct kcore_list *, void *, size_t);
248#endif 246#endif
249 247
250struct proc_inode { 248struct proc_inode {
251 struct task_struct *task; 249 struct pid *pid;
252 int type; 250 int fd;
253 union { 251 union {
254 int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); 252 int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
255 int (*proc_read)(struct task_struct *task, char *page); 253 int (*proc_read)(struct task_struct *task, char *page);
@@ -268,4 +266,10 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
268 return PROC_I(inode)->pde; 266 return PROC_I(inode)->pde;
269} 267}
270 268
269struct proc_maps_private {
270 struct pid *pid;
271 struct task_struct *task;
272 struct vm_area_struct *tail_vma;
273};
274
271#endif /* _LINUX_PROC_FS_H */ 275#endif /* _LINUX_PROC_FS_H */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index ee918bc6e18c..8b2749a259dc 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -88,7 +88,6 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us
88extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); 88extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
89extern int ptrace_attach(struct task_struct *tsk); 89extern int ptrace_attach(struct task_struct *tsk);
90extern int ptrace_detach(struct task_struct *, unsigned int); 90extern int ptrace_detach(struct task_struct *, unsigned int);
91extern void __ptrace_detach(struct task_struct *, unsigned int);
92extern void ptrace_disable(struct task_struct *); 91extern void ptrace_disable(struct task_struct *);
93extern int ptrace_check_attach(struct task_struct *task, int kill); 92extern int ptrace_check_attach(struct task_struct *task, int kill);
94extern int ptrace_request(struct task_struct *child, long request, long addr, long data); 93extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index 899437802aea..63df898fe2e9 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -140,6 +140,7 @@ typedef __u16 bitmap_counter_t;
140enum bitmap_state { 140enum bitmap_state {
141 BITMAP_ACTIVE = 0x001, /* the bitmap is in use */ 141 BITMAP_ACTIVE = 0x001, /* the bitmap is in use */
142 BITMAP_STALE = 0x002, /* the bitmap file is out of date or had -EIO */ 142 BITMAP_STALE = 0x002, /* the bitmap file is out of date or had -EIO */
143 BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */
143 BITMAP_HOSTENDIAN = 0x8000, 144 BITMAP_HOSTENDIAN = 0x8000,
144}; 145};
145 146
@@ -244,15 +245,9 @@ struct bitmap {
244 unsigned long daemon_lastrun; /* jiffies of last run */ 245 unsigned long daemon_lastrun; /* jiffies of last run */
245 unsigned long daemon_sleep; /* how many seconds between updates? */ 246 unsigned long daemon_sleep; /* how many seconds between updates? */
246 247
247 /* 248 atomic_t pending_writes; /* pending writes to the bitmap file */
248 * bitmap_writeback_daemon waits for file-pages that have been written,
249 * as there is no way to get a call-back when a page write completes.
250 */
251 mdk_thread_t *writeback_daemon;
252 spinlock_t write_lock;
253 wait_queue_head_t write_wait; 249 wait_queue_head_t write_wait;
254 struct list_head complete_pages; 250
255 mempool_t *write_pool;
256}; 251};
257 252
258/* the bitmap API */ 253/* the bitmap API */
diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h
index 7eaf290e10e7..ba15469daf11 100644
--- a/include/linux/raid/linear.h
+++ b/include/linux/raid/linear.h
@@ -13,8 +13,10 @@ typedef struct dev_info dev_info_t;
13 13
14struct linear_private_data 14struct linear_private_data
15{ 15{
16 struct linear_private_data *prev; /* earlier version */
16 dev_info_t **hash_table; 17 dev_info_t **hash_table;
17 sector_t hash_spacing; 18 sector_t hash_spacing;
19 sector_t array_size;
18 int preshift; /* shift before dividing by hash_spacing */ 20 int preshift; /* shift before dividing by hash_spacing */
19 dev_info_t disks[0]; 21 dev_info_t disks[0];
20}; 22};
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index 66b44e5e0d6e..eb3e547c8fee 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -85,8 +85,6 @@ extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
85extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev); 85extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev);
86extern void md_unplug_mddev(mddev_t *mddev); 86extern void md_unplug_mddev(mddev_t *mddev);
87 87
88extern void md_print_devices (void);
89
90extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, 88extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
91 sector_t sector, int size, struct page *page); 89 sector_t sector, int size, struct page *page);
92extern void md_super_wait(mddev_t *mddev); 90extern void md_super_wait(mddev_t *mddev);
@@ -97,7 +95,5 @@ extern void md_new_event(mddev_t *mddev);
97 95
98extern void md_update_sb(mddev_t * mddev); 96extern void md_update_sb(mddev_t * mddev);
99 97
100#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
101
102#endif 98#endif
103 99
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index e2df61f5b09a..c1e0ac55bab5 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -40,7 +40,8 @@ typedef struct mdk_rdev_s mdk_rdev_t;
40 * options passed in raidrun: 40 * options passed in raidrun:
41 */ 41 */
42 42
43#define MAX_CHUNK_SIZE (4096*1024) 43/* Currently this must fix in an 'int' */
44#define MAX_CHUNK_SIZE (1<<30)
44 45
45/* 46/*
46 * MD's 'extended' device 47 * MD's 'extended' device
@@ -57,6 +58,7 @@ struct mdk_rdev_s
57 58
58 struct page *sb_page; 59 struct page *sb_page;
59 int sb_loaded; 60 int sb_loaded;
61 __u64 sb_events;
60 sector_t data_offset; /* start of data in array */ 62 sector_t data_offset; /* start of data in array */
61 sector_t sb_offset; 63 sector_t sb_offset;
62 int sb_size; /* bytes in the superblock */ 64 int sb_size; /* bytes in the superblock */
@@ -87,6 +89,10 @@ struct mdk_rdev_s
87 * array and could again if we did a partial 89 * array and could again if we did a partial
88 * resync from the bitmap 90 * resync from the bitmap
89 */ 91 */
92 sector_t recovery_offset;/* If this device has been partially
93 * recovered, this is where we were
94 * up to.
95 */
90 96
91 atomic_t nr_pending; /* number of pending requests. 97 atomic_t nr_pending; /* number of pending requests.
92 * only maintained for arrays that 98 * only maintained for arrays that
@@ -182,6 +188,8 @@ struct mddev_s
182#define MD_RECOVERY_REQUESTED 6 188#define MD_RECOVERY_REQUESTED 6
183#define MD_RECOVERY_CHECK 7 189#define MD_RECOVERY_CHECK 7
184#define MD_RECOVERY_RESHAPE 8 190#define MD_RECOVERY_RESHAPE 8
191#define MD_RECOVERY_FROZEN 9
192
185 unsigned long recovery; 193 unsigned long recovery;
186 194
187 int in_sync; /* know to not need resync */ 195 int in_sync; /* know to not need resync */
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index f1fbae7e390e..b6ebc69bae54 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -265,9 +265,12 @@ struct mdp_superblock_1 {
265 265
266/* feature_map bits */ 266/* feature_map bits */
267#define MD_FEATURE_BITMAP_OFFSET 1 267#define MD_FEATURE_BITMAP_OFFSET 1
268#define MD_FEATURE_RECOVERY_OFFSET 2 /* recovery_offset is present and
269 * must be honoured
270 */
268#define MD_FEATURE_RESHAPE_ACTIVE 4 271#define MD_FEATURE_RESHAPE_ACTIVE 4
269 272
270#define MD_FEATURE_ALL 5 273#define MD_FEATURE_ALL (1|2|4)
271 274
272#endif 275#endif
273 276
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h
index b1103298a8c2..c41e56a7c090 100644
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -24,11 +24,16 @@ struct r10_private_data_s {
24 int far_copies; /* number of copies layed out 24 int far_copies; /* number of copies layed out
25 * at large strides across drives 25 * at large strides across drives
26 */ 26 */
27 int far_offset; /* far_copies are offset by 1 stripe
28 * instead of many
29 */
27 int copies; /* near_copies * far_copies. 30 int copies; /* near_copies * far_copies.
28 * must be <= raid_disks 31 * must be <= raid_disks
29 */ 32 */
30 sector_t stride; /* distance between far copies. 33 sector_t stride; /* distance between far copies.
31 * This is size / far_copies 34 * This is size / far_copies unless
35 * far_offset, in which case it is
36 * 1 stripe.
32 */ 37 */
33 38
34 int chunk_shift; /* shift from chunks to sectors */ 39 int chunk_shift; /* shift from chunks to sectors */
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 914af667044f..20ed4c997636 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -212,6 +212,7 @@ struct raid5_private_data {
212 mddev_t *mddev; 212 mddev_t *mddev;
213 struct disk_info *spare; 213 struct disk_info *spare;
214 int chunk_size, level, algorithm; 214 int chunk_size, level, algorithm;
215 int max_degraded;
215 int raid_disks, working_disks, failed_disks; 216 int raid_disks, working_disks, failed_disks;
216 int max_nr_stripes; 217 int max_nr_stripes;
217 218
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d11d9310db0..122a25c1b997 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -842,8 +842,6 @@ struct task_struct {
842 u32 self_exec_id; 842 u32 self_exec_id;
843/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ 843/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
844 spinlock_t alloc_lock; 844 spinlock_t alloc_lock;
845/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
846 spinlock_t proc_lock;
847 845
848#ifdef CONFIG_DEBUG_MUTEXES 846#ifdef CONFIG_DEBUG_MUTEXES
849 /* mutex deadlock detection */ 847 /* mutex deadlock detection */
@@ -856,7 +854,6 @@ struct task_struct {
856/* VM state */ 854/* VM state */
857 struct reclaim_state *reclaim_state; 855 struct reclaim_state *reclaim_state;
858 856
859 struct dentry *proc_dentry;
860 struct backing_dev_info *backing_dev_info; 857 struct backing_dev_info *backing_dev_info;
861 858
862 struct io_context *io_context; 859 struct io_context *io_context;
diff --git a/include/linux/security.h b/include/linux/security.h
index d2c17bd91a29..51805806f974 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -862,6 +862,7 @@ struct swap_info_struct;
862 * Permit allocation of a key and assign security data. Note that key does 862 * Permit allocation of a key and assign security data. Note that key does
863 * not have a serial number assigned at this point. 863 * not have a serial number assigned at this point.
864 * @key points to the key. 864 * @key points to the key.
865 * @flags is the allocation flags
865 * Return 0 if permission is granted, -ve error otherwise. 866 * Return 0 if permission is granted, -ve error otherwise.
866 * @key_free: 867 * @key_free:
867 * Notification of destruction; free security data. 868 * Notification of destruction; free security data.
@@ -1324,7 +1325,7 @@ struct security_operations {
1324 1325
1325 /* key management security hooks */ 1326 /* key management security hooks */
1326#ifdef CONFIG_KEYS 1327#ifdef CONFIG_KEYS
1327 int (*key_alloc)(struct key *key, struct task_struct *tsk); 1328 int (*key_alloc)(struct key *key, struct task_struct *tsk, unsigned long flags);
1328 void (*key_free)(struct key *key); 1329 void (*key_free)(struct key *key);
1329 int (*key_permission)(key_ref_t key_ref, 1330 int (*key_permission)(key_ref_t key_ref,
1330 struct task_struct *context, 1331 struct task_struct *context,
@@ -3040,9 +3041,10 @@ static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid
3040#ifdef CONFIG_KEYS 3041#ifdef CONFIG_KEYS
3041#ifdef CONFIG_SECURITY 3042#ifdef CONFIG_SECURITY
3042static inline int security_key_alloc(struct key *key, 3043static inline int security_key_alloc(struct key *key,
3043 struct task_struct *tsk) 3044 struct task_struct *tsk,
3045 unsigned long flags)
3044{ 3046{
3045 return security_ops->key_alloc(key, tsk); 3047 return security_ops->key_alloc(key, tsk, flags);
3046} 3048}
3047 3049
3048static inline void security_key_free(struct key *key) 3050static inline void security_key_free(struct key *key)
@@ -3060,7 +3062,8 @@ static inline int security_key_permission(key_ref_t key_ref,
3060#else 3062#else
3061 3063
3062static inline int security_key_alloc(struct key *key, 3064static inline int security_key_alloc(struct key *key,
3063 struct task_struct *tsk) 3065 struct task_struct *tsk,
3066 unsigned long flags)
3064{ 3067{
3065 return 0; 3068 return 0;
3066} 3069}
diff --git a/include/linux/time.h b/include/linux/time.h
index 0cd696cee998..65dd85b2105e 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -77,6 +77,8 @@ extern struct timespec xtime;
77extern struct timespec wall_to_monotonic; 77extern struct timespec wall_to_monotonic;
78extern seqlock_t xtime_lock; 78extern seqlock_t xtime_lock;
79 79
80void timekeeping_init(void);
81
80static inline unsigned long get_seconds(void) 82static inline unsigned long get_seconds(void)
81{ 83{
82 return xtime.tv_sec; 84 return xtime.tv_sec;
@@ -100,6 +102,7 @@ extern int do_getitimer(int which, struct itimerval *value);
100extern void getnstimeofday(struct timespec *tv); 102extern void getnstimeofday(struct timespec *tv);
101 103
102extern struct timespec timespec_trunc(struct timespec t, unsigned gran); 104extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
105extern int timekeeping_is_continuous(void);
103 106
104/** 107/**
105 * timespec_to_ns - Convert timespec to nanoseconds 108 * timespec_to_ns - Convert timespec to nanoseconds
@@ -142,6 +145,20 @@ extern struct timespec ns_to_timespec(const s64 nsec);
142 */ 145 */
143extern struct timeval ns_to_timeval(const s64 nsec); 146extern struct timeval ns_to_timeval(const s64 nsec);
144 147
148/**
149 * timespec_add_ns - Adds nanoseconds to a timespec
150 * @a: pointer to timespec to be incremented
151 * @ns: unsigned nanoseconds value to be added
152 */
153static inline void timespec_add_ns(struct timespec *a, u64 ns)
154{
155 ns += a->tv_nsec;
156 while(unlikely(ns >= NSEC_PER_SEC)) {
157 ns -= NSEC_PER_SEC;
158 a->tv_sec++;
159 }
160 a->tv_nsec = ns;
161}
145#endif /* __KERNEL__ */ 162#endif /* __KERNEL__ */
146 163
147#define NFDBITS __NFDBITS 164#define NFDBITS __NFDBITS
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 34d3ccff7bbb..19bb6538b49e 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -303,6 +303,8 @@ time_interpolator_reset(void)
303 303
304#endif /* !CONFIG_TIME_INTERPOLATION */ 304#endif /* !CONFIG_TIME_INTERPOLATION */
305 305
306#define TICK_LENGTH_SHIFT 32
307
306/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ 308/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
307extern u64 current_tick_length(void); 309extern u64 current_tick_length(void);
308 310
diff --git a/include/net/tipc/tipc_bearer.h b/include/net/tipc/tipc_bearer.h
index 098607cd4b78..e07136d74c2f 100644
--- a/include/net/tipc/tipc_bearer.h
+++ b/include/net/tipc/tipc_bearer.h
@@ -49,10 +49,18 @@
49 49
50#define TIPC_MEDIA_TYPE_ETH 1 50#define TIPC_MEDIA_TYPE_ETH 1
51 51
52/*
53 * Destination address structure used by TIPC bearers when sending messages
54 *
55 * IMPORTANT: The fields of this structure MUST be stored using the specified
56 * byte order indicated below, as the structure is exchanged between nodes
57 * as part of a link setup process.
58 */
59
52struct tipc_media_addr { 60struct tipc_media_addr {
53 __u32 type; 61 __u32 type; /* bearer type (network byte order) */
54 union { 62 union {
55 __u8 eth_addr[6]; /* Ethernet bearer */ 63 __u8 eth_addr[6]; /* 48 bit Ethernet addr (byte array) */
56#if 0 64#if 0
57 /* Prototypes for other possible bearer types */ 65 /* Prototypes for other possible bearer types */
58 66
diff --git a/init/initramfs.c b/init/initramfs.c
index f81cfa40a719..d28c1094d7e5 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -30,6 +30,7 @@ static void __init free(void *where)
30 30
31static __initdata struct hash { 31static __initdata struct hash {
32 int ino, minor, major; 32 int ino, minor, major;
33 mode_t mode;
33 struct hash *next; 34 struct hash *next;
34 char name[N_ALIGN(PATH_MAX)]; 35 char name[N_ALIGN(PATH_MAX)];
35} *head[32]; 36} *head[32];
@@ -41,7 +42,8 @@ static inline int hash(int major, int minor, int ino)
41 return tmp & 31; 42 return tmp & 31;
42} 43}
43 44
44static char __init *find_link(int major, int minor, int ino, char *name) 45static char __init *find_link(int major, int minor, int ino,
46 mode_t mode, char *name)
45{ 47{
46 struct hash **p, *q; 48 struct hash **p, *q;
47 for (p = head + hash(major, minor, ino); *p; p = &(*p)->next) { 49 for (p = head + hash(major, minor, ino); *p; p = &(*p)->next) {
@@ -51,14 +53,17 @@ static char __init *find_link(int major, int minor, int ino, char *name)
51 continue; 53 continue;
52 if ((*p)->major != major) 54 if ((*p)->major != major)
53 continue; 55 continue;
56 if (((*p)->mode ^ mode) & S_IFMT)
57 continue;
54 return (*p)->name; 58 return (*p)->name;
55 } 59 }
56 q = (struct hash *)malloc(sizeof(struct hash)); 60 q = (struct hash *)malloc(sizeof(struct hash));
57 if (!q) 61 if (!q)
58 panic("can't allocate link hash entry"); 62 panic("can't allocate link hash entry");
59 q->ino = ino;
60 q->minor = minor;
61 q->major = major; 63 q->major = major;
64 q->minor = minor;
65 q->ino = ino;
66 q->mode = mode;
62 strcpy(q->name, name); 67 strcpy(q->name, name);
63 q->next = NULL; 68 q->next = NULL;
64 *p = q; 69 *p = q;
@@ -229,13 +234,25 @@ static int __init do_reset(void)
229static int __init maybe_link(void) 234static int __init maybe_link(void)
230{ 235{
231 if (nlink >= 2) { 236 if (nlink >= 2) {
232 char *old = find_link(major, minor, ino, collected); 237 char *old = find_link(major, minor, ino, mode, collected);
233 if (old) 238 if (old)
234 return (sys_link(old, collected) < 0) ? -1 : 1; 239 return (sys_link(old, collected) < 0) ? -1 : 1;
235 } 240 }
236 return 0; 241 return 0;
237} 242}
238 243
244static void __init clean_path(char *path, mode_t mode)
245{
246 struct stat st;
247
248 if (!sys_newlstat(path, &st) && (st.st_mode^mode) & S_IFMT) {
249 if (S_ISDIR(st.st_mode))
250 sys_rmdir(path);
251 else
252 sys_unlink(path);
253 }
254}
255
239static __initdata int wfd; 256static __initdata int wfd;
240 257
241static int __init do_name(void) 258static int __init do_name(void)
@@ -248,9 +265,15 @@ static int __init do_name(void)
248 } 265 }
249 if (dry_run) 266 if (dry_run)
250 return 0; 267 return 0;
268 clean_path(collected, mode);
251 if (S_ISREG(mode)) { 269 if (S_ISREG(mode)) {
252 if (maybe_link() >= 0) { 270 int ml = maybe_link();
253 wfd = sys_open(collected, O_WRONLY|O_CREAT, mode); 271 if (ml >= 0) {
272 int openflags = O_WRONLY|O_CREAT;
273 if (ml != 1)
274 openflags |= O_TRUNC;
275 wfd = sys_open(collected, openflags, mode);
276
254 if (wfd >= 0) { 277 if (wfd >= 0) {
255 sys_fchown(wfd, uid, gid); 278 sys_fchown(wfd, uid, gid);
256 sys_fchmod(wfd, mode); 279 sys_fchmod(wfd, mode);
@@ -291,6 +314,7 @@ static int __init do_copy(void)
291static int __init do_symlink(void) 314static int __init do_symlink(void)
292{ 315{
293 collected[N_ALIGN(name_len) + body_len] = '\0'; 316 collected[N_ALIGN(name_len) + body_len] = '\0';
317 clean_path(collected, 0);
294 sys_symlink(collected + N_ALIGN(name_len), collected); 318 sys_symlink(collected + N_ALIGN(name_len), collected);
295 sys_lchown(collected, uid, gid); 319 sys_lchown(collected, uid, gid);
296 state = SkipIt; 320 state = SkipIt;
diff --git a/init/main.c b/init/main.c
index f715b9b89753..9a970d317ea5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -490,6 +490,7 @@ asmlinkage void __init start_kernel(void)
490 hrtimers_init(); 490 hrtimers_init();
491 softirq_init(); 491 softirq_init();
492 time_init(); 492 time_init();
493 timekeeping_init();
493 494
494 /* 495 /*
495 * HACK ALERT! This is early. We're enabling the console before 496 * HACK ALERT! This is early. We're enabling the console before
diff --git a/kernel/Makefile b/kernel/Makefile
index f6ef00f4f90f..bc4b8a7161ff 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o 11 hrtimer.o
12 12
13obj-y += time/
13obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 14obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
14obj-$(CONFIG_FUTEX) += futex.o 15obj-$(CONFIG_FUTEX) += futex.o
15ifeq ($(CONFIG_COMPAT),y) 16ifeq ($(CONFIG_COMPAT),y)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index fe2b8d0bfe4c..03dcd981846a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -13,10 +13,10 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/kthread.h> 14#include <linux/kthread.h>
15#include <linux/stop_machine.h> 15#include <linux/stop_machine.h>
16#include <asm/semaphore.h> 16#include <linux/mutex.h>
17 17
18/* This protects CPUs going up and down... */ 18/* This protects CPUs going up and down... */
19static DECLARE_MUTEX(cpucontrol); 19static DEFINE_MUTEX(cpucontrol);
20 20
21static BLOCKING_NOTIFIER_HEAD(cpu_chain); 21static BLOCKING_NOTIFIER_HEAD(cpu_chain);
22 22
@@ -30,9 +30,9 @@ static int __lock_cpu_hotplug(int interruptible)
30 30
31 if (lock_cpu_hotplug_owner != current) { 31 if (lock_cpu_hotplug_owner != current) {
32 if (interruptible) 32 if (interruptible)
33 ret = down_interruptible(&cpucontrol); 33 ret = mutex_lock_interruptible(&cpucontrol);
34 else 34 else
35 down(&cpucontrol); 35 mutex_lock(&cpucontrol);
36 } 36 }
37 37
38 /* 38 /*
@@ -56,7 +56,7 @@ void unlock_cpu_hotplug(void)
56{ 56{
57 if (--lock_cpu_hotplug_depth == 0) { 57 if (--lock_cpu_hotplug_depth == 0) {
58 lock_cpu_hotplug_owner = NULL; 58 lock_cpu_hotplug_owner = NULL;
59 up(&cpucontrol); 59 mutex_unlock(&cpucontrol);
60 } 60 }
61} 61}
62EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); 62EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b602f73fb38d..1535af3a912d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2442,31 +2442,43 @@ void __cpuset_memory_pressure_bump(void)
2442 */ 2442 */
2443static int proc_cpuset_show(struct seq_file *m, void *v) 2443static int proc_cpuset_show(struct seq_file *m, void *v)
2444{ 2444{
2445 struct pid *pid;
2445 struct task_struct *tsk; 2446 struct task_struct *tsk;
2446 char *buf; 2447 char *buf;
2447 int retval = 0; 2448 int retval;
2448 2449
2450 retval = -ENOMEM;
2449 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 2451 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2450 if (!buf) 2452 if (!buf)
2451 return -ENOMEM; 2453 goto out;
2454
2455 retval = -ESRCH;
2456 pid = m->private;
2457 tsk = get_pid_task(pid, PIDTYPE_PID);
2458 if (!tsk)
2459 goto out_free;
2452 2460
2453 tsk = m->private; 2461 retval = -EINVAL;
2454 mutex_lock(&manage_mutex); 2462 mutex_lock(&manage_mutex);
2463
2455 retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); 2464 retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
2456 if (retval < 0) 2465 if (retval < 0)
2457 goto out; 2466 goto out_unlock;
2458 seq_puts(m, buf); 2467 seq_puts(m, buf);
2459 seq_putc(m, '\n'); 2468 seq_putc(m, '\n');
2460out: 2469out_unlock:
2461 mutex_unlock(&manage_mutex); 2470 mutex_unlock(&manage_mutex);
2471 put_task_struct(tsk);
2472out_free:
2462 kfree(buf); 2473 kfree(buf);
2474out:
2463 return retval; 2475 return retval;
2464} 2476}
2465 2477
2466static int cpuset_open(struct inode *inode, struct file *file) 2478static int cpuset_open(struct inode *inode, struct file *file)
2467{ 2479{
2468 struct task_struct *tsk = PROC_I(inode)->task; 2480 struct pid *pid = PROC_I(inode)->pid;
2469 return single_open(file, proc_cpuset_show, tsk); 2481 return single_open(file, proc_cpuset_show, pid);
2470} 2482}
2471 2483
2472struct file_operations proc_cpuset_operations = { 2484struct file_operations proc_cpuset_operations = {
diff --git a/kernel/exit.c b/kernel/exit.c
index e76bd02e930e..304ef637be6c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -137,12 +137,8 @@ void release_task(struct task_struct * p)
137{ 137{
138 int zap_leader; 138 int zap_leader;
139 task_t *leader; 139 task_t *leader;
140 struct dentry *proc_dentry;
141
142repeat: 140repeat:
143 atomic_dec(&p->user->processes); 141 atomic_dec(&p->user->processes);
144 spin_lock(&p->proc_lock);
145 proc_dentry = proc_pid_unhash(p);
146 write_lock_irq(&tasklist_lock); 142 write_lock_irq(&tasklist_lock);
147 ptrace_unlink(p); 143 ptrace_unlink(p);
148 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); 144 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -171,8 +167,7 @@ repeat:
171 167
172 sched_exit(p); 168 sched_exit(p);
173 write_unlock_irq(&tasklist_lock); 169 write_unlock_irq(&tasklist_lock);
174 spin_unlock(&p->proc_lock); 170 proc_flush_task(p);
175 proc_pid_flush(proc_dentry);
176 release_thread(p); 171 release_thread(p);
177 call_rcu(&p->rcu, delayed_put_task_struct); 172 call_rcu(&p->rcu, delayed_put_task_struct);
178 173
diff --git a/kernel/fork.c b/kernel/fork.c
index dfd10cb370c3..9b4e54ef0225 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags,
993 if (put_user(p->pid, parent_tidptr)) 993 if (put_user(p->pid, parent_tidptr))
994 goto bad_fork_cleanup; 994 goto bad_fork_cleanup;
995 995
996 p->proc_dentry = NULL;
997
998 INIT_LIST_HEAD(&p->children); 996 INIT_LIST_HEAD(&p->children);
999 INIT_LIST_HEAD(&p->sibling); 997 INIT_LIST_HEAD(&p->sibling);
1000 p->vfork_done = NULL; 998 p->vfork_done = NULL;
1001 spin_lock_init(&p->alloc_lock); 999 spin_lock_init(&p->alloc_lock);
1002 spin_lock_init(&p->proc_lock);
1003 1000
1004 clear_tsk_thread_flag(p, TIF_SIGPENDING); 1001 clear_tsk_thread_flag(p, TIF_SIGPENDING);
1005 init_sigpending(&p->pending); 1002 init_sigpending(&p->pending);
@@ -1159,18 +1156,6 @@ static task_t *copy_process(unsigned long clone_flags,
1159 } 1156 }
1160 1157
1161 if (clone_flags & CLONE_THREAD) { 1158 if (clone_flags & CLONE_THREAD) {
1162 /*
1163 * Important: if an exit-all has been started then
1164 * do not create this new thread - the whole thread
1165 * group is supposed to exit anyway.
1166 */
1167 if (current->signal->flags & SIGNAL_GROUP_EXIT) {
1168 spin_unlock(&current->sighand->siglock);
1169 write_unlock_irq(&tasklist_lock);
1170 retval = -EAGAIN;
1171 goto bad_fork_cleanup_namespace;
1172 }
1173
1174 p->group_leader = current->group_leader; 1159 p->group_leader = current->group_leader;
1175 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); 1160 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1176 1161
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1fbf466a29aa..64aab081153b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -47,11 +47,17 @@
47 47
48static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 48static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
49static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 49static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
50static atomic_t kprobe_count;
50 51
51DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 52DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
52DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ 53DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
53static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 54static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
54 55
56static struct notifier_block kprobe_page_fault_nb = {
57 .notifier_call = kprobe_exceptions_notify,
58 .priority = 0x7fffffff /* we need to notified first */
59};
60
55#ifdef __ARCH_WANT_KPROBES_INSN_SLOT 61#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
56/* 62/*
57 * kprobe->ainsn.insn points to the copy of the instruction to be 63 * kprobe->ainsn.insn points to the copy of the instruction to be
@@ -368,16 +374,15 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
368*/ 374*/
369static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 375static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
370{ 376{
371 struct kprobe *kp;
372
373 if (p->break_handler) { 377 if (p->break_handler) {
374 list_for_each_entry_rcu(kp, &old_p->list, list) { 378 if (old_p->break_handler)
375 if (kp->break_handler) 379 return -EEXIST;
376 return -EEXIST;
377 }
378 list_add_tail_rcu(&p->list, &old_p->list); 380 list_add_tail_rcu(&p->list, &old_p->list);
381 old_p->break_handler = aggr_break_handler;
379 } else 382 } else
380 list_add_rcu(&p->list, &old_p->list); 383 list_add_rcu(&p->list, &old_p->list);
384 if (p->post_handler && !old_p->post_handler)
385 old_p->post_handler = aggr_post_handler;
381 return 0; 386 return 0;
382} 387}
383 388
@@ -390,9 +395,11 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
390 copy_kprobe(p, ap); 395 copy_kprobe(p, ap);
391 ap->addr = p->addr; 396 ap->addr = p->addr;
392 ap->pre_handler = aggr_pre_handler; 397 ap->pre_handler = aggr_pre_handler;
393 ap->post_handler = aggr_post_handler;
394 ap->fault_handler = aggr_fault_handler; 398 ap->fault_handler = aggr_fault_handler;
395 ap->break_handler = aggr_break_handler; 399 if (p->post_handler)
400 ap->post_handler = aggr_post_handler;
401 if (p->break_handler)
402 ap->break_handler = aggr_break_handler;
396 403
397 INIT_LIST_HEAD(&ap->list); 404 INIT_LIST_HEAD(&ap->list);
398 list_add_rcu(&p->list, &ap->list); 405 list_add_rcu(&p->list, &ap->list);
@@ -464,6 +471,8 @@ static int __kprobes __register_kprobe(struct kprobe *p,
464 old_p = get_kprobe(p->addr); 471 old_p = get_kprobe(p->addr);
465 if (old_p) { 472 if (old_p) {
466 ret = register_aggr_kprobe(old_p, p); 473 ret = register_aggr_kprobe(old_p, p);
474 if (!ret)
475 atomic_inc(&kprobe_count);
467 goto out; 476 goto out;
468 } 477 }
469 478
@@ -474,6 +483,10 @@ static int __kprobes __register_kprobe(struct kprobe *p,
474 hlist_add_head_rcu(&p->hlist, 483 hlist_add_head_rcu(&p->hlist,
475 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 484 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
476 485
486 if (atomic_add_return(1, &kprobe_count) == \
487 (ARCH_INACTIVE_KPROBE_COUNT + 1))
488 register_page_fault_notifier(&kprobe_page_fault_nb);
489
477 arch_arm_kprobe(p); 490 arch_arm_kprobe(p);
478 491
479out: 492out:
@@ -536,14 +549,40 @@ valid_p:
536 kfree(old_p); 549 kfree(old_p);
537 } 550 }
538 arch_remove_kprobe(p); 551 arch_remove_kprobe(p);
552 } else {
553 mutex_lock(&kprobe_mutex);
554 if (p->break_handler)
555 old_p->break_handler = NULL;
556 if (p->post_handler){
557 list_for_each_entry_rcu(list_p, &old_p->list, list){
558 if (list_p->post_handler){
559 cleanup_p = 2;
560 break;
561 }
562 }
563 if (cleanup_p == 0)
564 old_p->post_handler = NULL;
565 }
566 mutex_unlock(&kprobe_mutex);
539 } 567 }
568
569 /* Call unregister_page_fault_notifier()
570 * if no probes are active
571 */
572 mutex_lock(&kprobe_mutex);
573 if (atomic_add_return(-1, &kprobe_count) == \
574 ARCH_INACTIVE_KPROBE_COUNT)
575 unregister_page_fault_notifier(&kprobe_page_fault_nb);
576 mutex_unlock(&kprobe_mutex);
577 return;
540} 578}
541 579
542static struct notifier_block kprobe_exceptions_nb = { 580static struct notifier_block kprobe_exceptions_nb = {
543 .notifier_call = kprobe_exceptions_notify, 581 .notifier_call = kprobe_exceptions_notify,
544 .priority = 0x7fffffff /* we need to notified first */ 582 .priority = 0x7fffffff /* we need to be notified first */
545}; 583};
546 584
585
547int __kprobes register_jprobe(struct jprobe *jp) 586int __kprobes register_jprobe(struct jprobe *jp)
548{ 587{
549 /* Todo: Verify probepoint is a function entry point */ 588 /* Todo: Verify probepoint is a function entry point */
@@ -652,6 +691,7 @@ static int __init init_kprobes(void)
652 INIT_HLIST_HEAD(&kprobe_table[i]); 691 INIT_HLIST_HEAD(&kprobe_table[i]);
653 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 692 INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
654 } 693 }
694 atomic_set(&kprobe_count, 0);
655 695
656 err = arch_init_kprobes(); 696 err = arch_init_kprobes();
657 if (!err) 697 if (!err)
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index f4913c376950..036b6285b15c 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -153,13 +153,13 @@ next:
153 continue; 153 continue;
154 count++; 154 count++;
155 cursor = curr->next; 155 cursor = curr->next;
156 debug_spin_lock_restore(&debug_mutex_lock, flags); 156 debug_spin_unlock_restore(&debug_mutex_lock, flags);
157 157
158 printk("\n#%03d: ", count); 158 printk("\n#%03d: ", count);
159 printk_lock(lock, filter ? 0 : 1); 159 printk_lock(lock, filter ? 0 : 1);
160 goto next; 160 goto next;
161 } 161 }
162 debug_spin_lock_restore(&debug_mutex_lock, flags); 162 debug_spin_unlock_restore(&debug_mutex_lock, flags);
163 printk("\n"); 163 printk("\n");
164} 164}
165 165
@@ -316,7 +316,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task)
316 continue; 316 continue;
317 list_del_init(curr); 317 list_del_init(curr);
318 DEBUG_OFF(); 318 DEBUG_OFF();
319 debug_spin_lock_restore(&debug_mutex_lock, flags); 319 debug_spin_unlock_restore(&debug_mutex_lock, flags);
320 320
321 printk("BUG: %s/%d, lock held at task exit time!\n", 321 printk("BUG: %s/%d, lock held at task exit time!\n",
322 task->comm, task->pid); 322 task->comm, task->pid);
@@ -325,7 +325,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task)
325 printk("exiting task is not even the owner??\n"); 325 printk("exiting task is not even the owner??\n");
326 return; 326 return;
327 } 327 }
328 debug_spin_lock_restore(&debug_mutex_lock, flags); 328 debug_spin_unlock_restore(&debug_mutex_lock, flags);
329} 329}
330 330
331/* 331/*
@@ -352,7 +352,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
352 continue; 352 continue;
353 list_del_init(curr); 353 list_del_init(curr);
354 DEBUG_OFF(); 354 DEBUG_OFF();
355 debug_spin_lock_restore(&debug_mutex_lock, flags); 355 debug_spin_unlock_restore(&debug_mutex_lock, flags);
356 356
357 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", 357 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
358 current->comm, current->pid, lock, from, to); 358 current->comm, current->pid, lock, from, to);
@@ -362,7 +362,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
362 printk("freeing task is not even the owner??\n"); 362 printk("freeing task is not even the owner??\n");
363 return; 363 return;
364 } 364 }
365 debug_spin_lock_restore(&debug_mutex_lock, flags); 365 debug_spin_unlock_restore(&debug_mutex_lock, flags);
366} 366}
367 367
368/* 368/*
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index fd384050acb1..a5196c36a5fd 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -46,21 +46,6 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
46extern void debug_mutex_unlock(struct mutex *lock); 46extern void debug_mutex_unlock(struct mutex *lock);
47extern void debug_mutex_init(struct mutex *lock, const char *name); 47extern void debug_mutex_init(struct mutex *lock, const char *name);
48 48
49#define debug_spin_lock(lock) \
50 do { \
51 local_irq_disable(); \
52 if (debug_mutex_on) \
53 spin_lock(lock); \
54 } while (0)
55
56#define debug_spin_unlock(lock) \
57 do { \
58 if (debug_mutex_on) \
59 spin_unlock(lock); \
60 local_irq_enable(); \
61 preempt_check_resched(); \
62 } while (0)
63
64#define debug_spin_lock_save(lock, flags) \ 49#define debug_spin_lock_save(lock, flags) \
65 do { \ 50 do { \
66 local_irq_save(flags); \ 51 local_irq_save(flags); \
@@ -68,7 +53,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name);
68 spin_lock(lock); \ 53 spin_lock(lock); \
69 } while (0) 54 } while (0)
70 55
71#define debug_spin_lock_restore(lock, flags) \ 56#define debug_spin_unlock_restore(lock, flags) \
72 do { \ 57 do { \
73 if (debug_mutex_on) \ 58 if (debug_mutex_on) \
74 spin_unlock(lock); \ 59 spin_unlock(lock); \
@@ -76,20 +61,20 @@ extern void debug_mutex_init(struct mutex *lock, const char *name);
76 preempt_check_resched(); \ 61 preempt_check_resched(); \
77 } while (0) 62 } while (0)
78 63
79#define spin_lock_mutex(lock) \ 64#define spin_lock_mutex(lock, flags) \
80 do { \ 65 do { \
81 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 66 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
82 \ 67 \
83 DEBUG_WARN_ON(in_interrupt()); \ 68 DEBUG_WARN_ON(in_interrupt()); \
84 debug_spin_lock(&debug_mutex_lock); \ 69 debug_spin_lock_save(&debug_mutex_lock, flags); \
85 spin_lock(lock); \ 70 spin_lock(lock); \
86 DEBUG_WARN_ON(l->magic != l); \ 71 DEBUG_WARN_ON(l->magic != l); \
87 } while (0) 72 } while (0)
88 73
89#define spin_unlock_mutex(lock) \ 74#define spin_unlock_mutex(lock, flags) \
90 do { \ 75 do { \
91 spin_unlock(lock); \ 76 spin_unlock(lock); \
92 debug_spin_unlock(&debug_mutex_lock); \ 77 debug_spin_unlock_restore(&debug_mutex_lock, flags); \
93 } while (0) 78 } while (0)
94 79
95#define DEBUG_OFF() \ 80#define DEBUG_OFF() \
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 5449b210d9ed..7043db21bbce 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -125,10 +125,11 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
125 struct task_struct *task = current; 125 struct task_struct *task = current;
126 struct mutex_waiter waiter; 126 struct mutex_waiter waiter;
127 unsigned int old_val; 127 unsigned int old_val;
128 unsigned long flags;
128 129
129 debug_mutex_init_waiter(&waiter); 130 debug_mutex_init_waiter(&waiter);
130 131
131 spin_lock_mutex(&lock->wait_lock); 132 spin_lock_mutex(&lock->wait_lock, flags);
132 133
133 debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); 134 debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip);
134 135
@@ -157,7 +158,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
157 if (unlikely(state == TASK_INTERRUPTIBLE && 158 if (unlikely(state == TASK_INTERRUPTIBLE &&
158 signal_pending(task))) { 159 signal_pending(task))) {
159 mutex_remove_waiter(lock, &waiter, task->thread_info); 160 mutex_remove_waiter(lock, &waiter, task->thread_info);
160 spin_unlock_mutex(&lock->wait_lock); 161 spin_unlock_mutex(&lock->wait_lock, flags);
161 162
162 debug_mutex_free_waiter(&waiter); 163 debug_mutex_free_waiter(&waiter);
163 return -EINTR; 164 return -EINTR;
@@ -165,9 +166,9 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
165 __set_task_state(task, state); 166 __set_task_state(task, state);
166 167
167 /* didnt get the lock, go to sleep: */ 168 /* didnt get the lock, go to sleep: */
168 spin_unlock_mutex(&lock->wait_lock); 169 spin_unlock_mutex(&lock->wait_lock, flags);
169 schedule(); 170 schedule();
170 spin_lock_mutex(&lock->wait_lock); 171 spin_lock_mutex(&lock->wait_lock, flags);
171 } 172 }
172 173
173 /* got the lock - rejoice! */ 174 /* got the lock - rejoice! */
@@ -178,7 +179,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
178 if (likely(list_empty(&lock->wait_list))) 179 if (likely(list_empty(&lock->wait_list)))
179 atomic_set(&lock->count, 0); 180 atomic_set(&lock->count, 0);
180 181
181 spin_unlock_mutex(&lock->wait_lock); 182 spin_unlock_mutex(&lock->wait_lock, flags);
182 183
183 debug_mutex_free_waiter(&waiter); 184 debug_mutex_free_waiter(&waiter);
184 185
@@ -203,10 +204,11 @@ static fastcall noinline void
203__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) 204__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
204{ 205{
205 struct mutex *lock = container_of(lock_count, struct mutex, count); 206 struct mutex *lock = container_of(lock_count, struct mutex, count);
207 unsigned long flags;
206 208
207 DEBUG_WARN_ON(lock->owner != current_thread_info()); 209 DEBUG_WARN_ON(lock->owner != current_thread_info());
208 210
209 spin_lock_mutex(&lock->wait_lock); 211 spin_lock_mutex(&lock->wait_lock, flags);
210 212
211 /* 213 /*
212 * some architectures leave the lock unlocked in the fastpath failure 214 * some architectures leave the lock unlocked in the fastpath failure
@@ -231,7 +233,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
231 233
232 debug_mutex_clear_owner(lock); 234 debug_mutex_clear_owner(lock);
233 235
234 spin_unlock_mutex(&lock->wait_lock); 236 spin_unlock_mutex(&lock->wait_lock, flags);
235} 237}
236 238
237/* 239/*
@@ -276,9 +278,10 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__)
276static inline int __mutex_trylock_slowpath(atomic_t *lock_count) 278static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
277{ 279{
278 struct mutex *lock = container_of(lock_count, struct mutex, count); 280 struct mutex *lock = container_of(lock_count, struct mutex, count);
281 unsigned long flags;
279 int prev; 282 int prev;
280 283
281 spin_lock_mutex(&lock->wait_lock); 284 spin_lock_mutex(&lock->wait_lock, flags);
282 285
283 prev = atomic_xchg(&lock->count, -1); 286 prev = atomic_xchg(&lock->count, -1);
284 if (likely(prev == 1)) 287 if (likely(prev == 1))
@@ -287,7 +290,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
287 if (likely(list_empty(&lock->wait_list))) 290 if (likely(list_empty(&lock->wait_list)))
288 atomic_set(&lock->count, 0); 291 atomic_set(&lock->count, 0);
289 292
290 spin_unlock_mutex(&lock->wait_lock); 293 spin_unlock_mutex(&lock->wait_lock, flags);
291 294
292 return prev == 1; 295 return prev == 1;
293} 296}
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 00fe84e7b672..069189947257 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -9,8 +9,10 @@
9 * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: 9 * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
10 */ 10 */
11 11
12#define spin_lock_mutex(lock) spin_lock(lock) 12#define spin_lock_mutex(lock, flags) \
13#define spin_unlock_mutex(lock) spin_unlock(lock) 13 do { spin_lock(lock); (void)(flags); } while (0)
14#define spin_unlock_mutex(lock, flags) \
15 do { spin_unlock(lock); (void)(flags); } while (0)
14#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
15 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
16 18
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 921c22ad16e4..335c5b932e14 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -120,8 +120,18 @@ int ptrace_check_attach(struct task_struct *child, int kill)
120 120
121static int may_attach(struct task_struct *task) 121static int may_attach(struct task_struct *task)
122{ 122{
123 if (!task->mm) 123 /* May we inspect the given task?
124 return -EPERM; 124 * This check is used both for attaching with ptrace
125 * and for allowing access to sensitive information in /proc.
126 *
127 * ptrace_attach denies several cases that /proc allows
128 * because setting up the necessary parent/child relationship
129 * or halting the specified task is impossible.
130 */
131 int dumpable = 0;
132 /* Don't let security modules deny introspection */
133 if (task == current)
134 return 0;
125 if (((current->uid != task->euid) || 135 if (((current->uid != task->euid) ||
126 (current->uid != task->suid) || 136 (current->uid != task->suid) ||
127 (current->uid != task->uid) || 137 (current->uid != task->uid) ||
@@ -130,7 +140,9 @@ static int may_attach(struct task_struct *task)
130 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) 140 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
131 return -EPERM; 141 return -EPERM;
132 smp_rmb(); 142 smp_rmb();
133 if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) 143 if (task->mm)
144 dumpable = task->mm->dumpable;
145 if (!dumpable && !capable(CAP_SYS_PTRACE))
134 return -EPERM; 146 return -EPERM;
135 147
136 return security_ptrace(current, task); 148 return security_ptrace(current, task);
@@ -176,6 +188,8 @@ repeat:
176 goto repeat; 188 goto repeat;
177 } 189 }
178 190
191 if (!task->mm)
192 goto bad;
179 /* the same process cannot be attached many times */ 193 /* the same process cannot be attached many times */
180 if (task->ptrace & PT_PTRACED) 194 if (task->ptrace & PT_PTRACED)
181 goto bad; 195 goto bad;
@@ -200,7 +214,7 @@ out:
200 return retval; 214 return retval;
201} 215}
202 216
203void __ptrace_detach(struct task_struct *child, unsigned int data) 217static inline void __ptrace_detach(struct task_struct *child, unsigned int data)
204{ 218{
205 child->exit_code = data; 219 child->exit_code = data;
206 /* .. re-parent .. */ 220 /* .. re-parent .. */
@@ -219,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
219 ptrace_disable(child); 233 ptrace_disable(child);
220 234
221 write_lock_irq(&tasklist_lock); 235 write_lock_irq(&tasklist_lock);
236 /* protect against de_thread()->release_task() */
222 if (child->ptrace) 237 if (child->ptrace)
223 __ptrace_detach(child, data); 238 __ptrace_detach(child, data);
224 write_unlock_irq(&tasklist_lock); 239 write_unlock_irq(&tasklist_lock);
diff --git a/kernel/sched.c b/kernel/sched.c
index f06d059edef5..cfaf3fabeecd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4247,7 +4247,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4247 if (retval) 4247 if (retval)
4248 goto out_unlock; 4248 goto out_unlock;
4249 4249
4250 jiffies_to_timespec(p->policy & SCHED_FIFO ? 4250 jiffies_to_timespec(p->policy == SCHED_FIFO ?
4251 0 : task_timeslice(p), &t); 4251 0 : task_timeslice(p), &t);
4252 read_unlock(&tasklist_lock); 4252 read_unlock(&tasklist_lock);
4253 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; 4253 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
diff --git a/kernel/signal.c b/kernel/signal.c
index 1b3c921737e2..52adf53929f6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1531,6 +1531,35 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1531 spin_unlock_irqrestore(&sighand->siglock, flags); 1531 spin_unlock_irqrestore(&sighand->siglock, flags);
1532} 1532}
1533 1533
1534static inline int may_ptrace_stop(void)
1535{
1536 if (!likely(current->ptrace & PT_PTRACED))
1537 return 0;
1538
1539 if (unlikely(current->parent == current->real_parent &&
1540 (current->ptrace & PT_ATTACHED)))
1541 return 0;
1542
1543 if (unlikely(current->signal == current->parent->signal) &&
1544 unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))
1545 return 0;
1546
1547 /*
1548 * Are we in the middle of do_coredump?
1549 * If so and our tracer is also part of the coredump stopping
1550 * is a deadlock situation, and pointless because our tracer
1551 * is dead so don't allow us to stop.
1552 * If SIGKILL was already sent before the caller unlocked
1553 * ->siglock we must see ->core_waiters != 0. Otherwise it
1554 * is safe to enter schedule().
1555 */
1556 if (unlikely(current->mm->core_waiters) &&
1557 unlikely(current->mm == current->parent->mm))
1558 return 0;
1559
1560 return 1;
1561}
1562
1534/* 1563/*
1535 * This must be called with current->sighand->siglock held. 1564 * This must be called with current->sighand->siglock held.
1536 * 1565 *
@@ -1559,11 +1588,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
1559 spin_unlock_irq(&current->sighand->siglock); 1588 spin_unlock_irq(&current->sighand->siglock);
1560 try_to_freeze(); 1589 try_to_freeze();
1561 read_lock(&tasklist_lock); 1590 read_lock(&tasklist_lock);
1562 if (likely(current->ptrace & PT_PTRACED) && 1591 if (may_ptrace_stop()) {
1563 likely(current->parent != current->real_parent ||
1564 !(current->ptrace & PT_ATTACHED)) &&
1565 (likely(current->parent->signal != current->signal) ||
1566 !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
1567 do_notify_parent_cldstop(current, CLD_TRAPPED); 1592 do_notify_parent_cldstop(current, CLD_TRAPPED);
1568 read_unlock(&tasklist_lock); 1593 read_unlock(&tasklist_lock);
1569 schedule(); 1594 schedule();
diff --git a/kernel/time.c b/kernel/time.c
index b00ddc71cedb..5bd489747643 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -523,6 +523,7 @@ EXPORT_SYMBOL(do_gettimeofday);
523 523
524 524
525#else 525#else
526#ifndef CONFIG_GENERIC_TIME
526/* 527/*
527 * Simulate gettimeofday using do_gettimeofday which only allows a timeval 528 * Simulate gettimeofday using do_gettimeofday which only allows a timeval
528 * and therefore only yields usec accuracy 529 * and therefore only yields usec accuracy
@@ -537,6 +538,7 @@ void getnstimeofday(struct timespec *tv)
537} 538}
538EXPORT_SYMBOL_GPL(getnstimeofday); 539EXPORT_SYMBOL_GPL(getnstimeofday);
539#endif 540#endif
541#endif
540 542
541/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. 543/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
542 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 544 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
new file mode 100644
index 000000000000..e1dfd8e86cce
--- /dev/null
+++ b/kernel/time/Makefile
@@ -0,0 +1 @@
obj-y += clocksource.o jiffies.o
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
new file mode 100644
index 000000000000..74eca5939bd9
--- /dev/null
+++ b/kernel/time/clocksource.c
@@ -0,0 +1,349 @@
1/*
2 * linux/kernel/time/clocksource.c
3 *
4 * This file contains the functions which manage clocksource drivers.
5 *
6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 *
22 * TODO WishList:
23 * o Allow clocksource drivers to be unregistered
24 * o get rid of clocksource_jiffies extern
25 */
26
27#include <linux/clocksource.h>
28#include <linux/sysdev.h>
29#include <linux/init.h>
30#include <linux/module.h>
31
32/* XXX - Would like a better way for initializing curr_clocksource */
33extern struct clocksource clocksource_jiffies;
34
35/*[Clocksource internal variables]---------
36 * curr_clocksource:
37 * currently selected clocksource. Initialized to clocksource_jiffies.
38 * next_clocksource:
39 * pending next selected clocksource.
40 * clocksource_list:
41 * linked list with the registered clocksources
42 * clocksource_lock:
43 * protects manipulations to curr_clocksource and next_clocksource
44 * and the clocksource_list
45 * override_name:
46 * Name of the user-specified clocksource.
47 */
48static struct clocksource *curr_clocksource = &clocksource_jiffies;
49static struct clocksource *next_clocksource;
50static LIST_HEAD(clocksource_list);
51static DEFINE_SPINLOCK(clocksource_lock);
52static char override_name[32];
53static int finished_booting;
54
55/* clocksource_done_booting - Called near the end of bootup
56 *
57 * Hack to avoid lots of clocksource churn at boot time
58 */
59static int __init clocksource_done_booting(void)
60{
61 finished_booting = 1;
62 return 0;
63}
64
65late_initcall(clocksource_done_booting);
66
67/**
68 * clocksource_get_next - Returns the selected clocksource
69 *
70 */
71struct clocksource *clocksource_get_next(void)
72{
73 unsigned long flags;
74
75 spin_lock_irqsave(&clocksource_lock, flags);
76 if (next_clocksource && finished_booting) {
77 curr_clocksource = next_clocksource;
78 next_clocksource = NULL;
79 }
80 spin_unlock_irqrestore(&clocksource_lock, flags);
81
82 return curr_clocksource;
83}
84
85/**
86 * select_clocksource - Finds the best registered clocksource.
87 *
88 * Private function. Must hold clocksource_lock when called.
89 *
90 * Looks through the list of registered clocksources, returning
91 * the one with the highest rating value. If there is a clocksource
92 * name that matches the override string, it returns that clocksource.
93 */
94static struct clocksource *select_clocksource(void)
95{
96 struct clocksource *best = NULL;
97 struct list_head *tmp;
98
99 list_for_each(tmp, &clocksource_list) {
100 struct clocksource *src;
101
102 src = list_entry(tmp, struct clocksource, list);
103 if (!best)
104 best = src;
105
106 /* check for override: */
107 if (strlen(src->name) == strlen(override_name) &&
108 !strcmp(src->name, override_name)) {
109 best = src;
110 break;
111 }
112 /* pick the highest rating: */
113 if (src->rating > best->rating)
114 best = src;
115 }
116
117 return best;
118}
119
120/**
121 * is_registered_source - Checks if clocksource is registered
122 * @c: pointer to a clocksource
123 *
124 * Private helper function. Must hold clocksource_lock when called.
125 *
126 * Returns one if the clocksource is already registered, zero otherwise.
127 */
128static int is_registered_source(struct clocksource *c)
129{
130 int len = strlen(c->name);
131 struct list_head *tmp;
132
133 list_for_each(tmp, &clocksource_list) {
134 struct clocksource *src;
135
136 src = list_entry(tmp, struct clocksource, list);
137 if (strlen(src->name) == len && !strcmp(src->name, c->name))
138 return 1;
139 }
140
141 return 0;
142}
143
144/**
145 * clocksource_register - Used to install new clocksources
146 * @t: clocksource to be registered
147 *
148 * Returns -EBUSY if registration fails, zero otherwise.
149 */
150int clocksource_register(struct clocksource *c)
151{
152 int ret = 0;
153 unsigned long flags;
154
155 spin_lock_irqsave(&clocksource_lock, flags);
156 /* check if clocksource is already registered */
157 if (is_registered_source(c)) {
158 printk("register_clocksource: Cannot register %s. "
159 "Already registered!", c->name);
160 ret = -EBUSY;
161 } else {
162 /* register it */
163 list_add(&c->list, &clocksource_list);
164 /* scan the registered clocksources, and pick the best one */
165 next_clocksource = select_clocksource();
166 }
167 spin_unlock_irqrestore(&clocksource_lock, flags);
168 return ret;
169}
170EXPORT_SYMBOL(clocksource_register);
171
172/**
173 * clocksource_reselect - Rescan list for next clocksource
174 *
175 * A quick helper function to be used if a clocksource changes its
176 * rating. Forces the clocksource list to be re-scanned for the best
177 * clocksource.
178 */
179void clocksource_reselect(void)
180{
181 unsigned long flags;
182
183 spin_lock_irqsave(&clocksource_lock, flags);
184 next_clocksource = select_clocksource();
185 spin_unlock_irqrestore(&clocksource_lock, flags);
186}
187EXPORT_SYMBOL(clocksource_reselect);
188
189/**
190 * sysfs_show_current_clocksources - sysfs interface for current clocksource
191 * @dev: unused
192 * @buf: char buffer to be filled with clocksource list
193 *
194 * Provides sysfs interface for listing current clocksource.
195 */
196static ssize_t
197sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
198{
199 char *curr = buf;
200
201 spin_lock_irq(&clocksource_lock);
202 curr += sprintf(curr, "%s ", curr_clocksource->name);
203 spin_unlock_irq(&clocksource_lock);
204
205 curr += sprintf(curr, "\n");
206
207 return curr - buf;
208}
209
210/**
211 * sysfs_override_clocksource - interface for manually overriding clocksource
212 * @dev: unused
213 * @buf: name of override clocksource
214 * @count: length of buffer
215 *
216 * Takes input from sysfs interface for manually overriding the default
217 * clocksource selction.
218 */
219static ssize_t sysfs_override_clocksource(struct sys_device *dev,
220 const char *buf, size_t count)
221{
222 size_t ret = count;
223 /* strings from sysfs write are not 0 terminated! */
224 if (count >= sizeof(override_name))
225 return -EINVAL;
226
227 /* strip of \n: */
228 if (buf[count-1] == '\n')
229 count--;
230 if (count < 1)
231 return -EINVAL;
232
233 spin_lock_irq(&clocksource_lock);
234
235 /* copy the name given: */
236 memcpy(override_name, buf, count);
237 override_name[count] = 0;
238
239 /* try to select it: */
240 next_clocksource = select_clocksource();
241
242 spin_unlock_irq(&clocksource_lock);
243
244 return ret;
245}
246
247/**
248 * sysfs_show_available_clocksources - sysfs interface for listing clocksource
249 * @dev: unused
250 * @buf: char buffer to be filled with clocksource list
251 *
252 * Provides sysfs interface for listing registered clocksources
253 */
254static ssize_t
255sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
256{
257 struct list_head *tmp;
258 char *curr = buf;
259
260 spin_lock_irq(&clocksource_lock);
261 list_for_each(tmp, &clocksource_list) {
262 struct clocksource *src;
263
264 src = list_entry(tmp, struct clocksource, list);
265 curr += sprintf(curr, "%s ", src->name);
266 }
267 spin_unlock_irq(&clocksource_lock);
268
269 curr += sprintf(curr, "\n");
270
271 return curr - buf;
272}
273
274/*
275 * Sysfs setup bits:
276 */
277static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
278 sysfs_override_clocksource);
279
280static SYSDEV_ATTR(available_clocksource, 0600,
281 sysfs_show_available_clocksources, NULL);
282
283static struct sysdev_class clocksource_sysclass = {
284 set_kset_name("clocksource"),
285};
286
287static struct sys_device device_clocksource = {
288 .id = 0,
289 .cls = &clocksource_sysclass,
290};
291
292static int __init init_clocksource_sysfs(void)
293{
294 int error = sysdev_class_register(&clocksource_sysclass);
295
296 if (!error)
297 error = sysdev_register(&device_clocksource);
298 if (!error)
299 error = sysdev_create_file(
300 &device_clocksource,
301 &attr_current_clocksource);
302 if (!error)
303 error = sysdev_create_file(
304 &device_clocksource,
305 &attr_available_clocksource);
306 return error;
307}
308
309device_initcall(init_clocksource_sysfs);
310
311/**
312 * boot_override_clocksource - boot clock override
313 * @str: override name
314 *
315 * Takes a clocksource= boot argument and uses it
316 * as the clocksource override name.
317 */
318static int __init boot_override_clocksource(char* str)
319{
320 unsigned long flags;
321 spin_lock_irqsave(&clocksource_lock, flags);
322 if (str)
323 strlcpy(override_name, str, sizeof(override_name));
324 spin_unlock_irqrestore(&clocksource_lock, flags);
325 return 1;
326}
327
328__setup("clocksource=", boot_override_clocksource);
329
330/**
331 * boot_override_clock - Compatibility layer for deprecated boot option
332 * @str: override name
333 *
334 * DEPRECATED! Takes a clock= boot argument and uses it
335 * as the clocksource override name
336 */
337static int __init boot_override_clock(char* str)
338{
339 if (!strcmp(str, "pmtmr")) {
340 printk("Warning: clock=pmtmr is deprecated. "
341 "Use clocksource=acpi_pm.\n");
342 return boot_override_clocksource("acpi_pm");
343 }
344 printk("Warning! clock= boot option is deprecated. "
345 "Use clocksource=xyz\n");
346 return boot_override_clocksource(str);
347}
348
349__setup("clock=", boot_override_clock);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
new file mode 100644
index 000000000000..126bb30c4afe
--- /dev/null
+++ b/kernel/time/jiffies.c
@@ -0,0 +1,73 @@
1/***********************************************************************
2* linux/kernel/time/jiffies.c
3*
4* This file contains the jiffies based clocksource.
5*
6* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
7*
8* This program is free software; you can redistribute it and/or modify
9* it under the terms of the GNU General Public License as published by
10* the Free Software Foundation; either version 2 of the License, or
11* (at your option) any later version.
12*
13* This program is distributed in the hope that it will be useful,
14* but WITHOUT ANY WARRANTY; without even the implied warranty of
15* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16* GNU General Public License for more details.
17*
18* You should have received a copy of the GNU General Public License
19* along with this program; if not, write to the Free Software
20* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21*
22************************************************************************/
23#include <linux/clocksource.h>
24#include <linux/jiffies.h>
25#include <linux/init.h>
26
27/* The Jiffies based clocksource is the lowest common
28 * denominator clock source which should function on
29 * all systems. It has the same coarse resolution as
30 * the timer interrupt frequency HZ and it suffers
31 * inaccuracies caused by missed or lost timer
32 * interrupts and the inability for the timer
33 * interrupt hardware to accuratly tick at the
34 * requested HZ value. It is also not reccomended
35 * for "tick-less" systems.
36 */
37#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
38
39/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
40 * conversion, the .shift value could be zero. However
41 * this would make NTP adjustments impossible as they are
42 * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
43 * shift both the nominator and denominator the same
44 * amount, and give ntp adjustments in units of 1/2^8
45 *
46 * The value 8 is somewhat carefully chosen, as anything
47 * larger can result in overflows. NSEC_PER_JIFFY grows as
48 * HZ shrinks, so values greater then 8 overflow 32bits when
49 * HZ=100.
50 */
51#define JIFFIES_SHIFT 8
52
53static cycle_t jiffies_read(void)
54{
55 return (cycle_t) jiffies;
56}
57
58struct clocksource clocksource_jiffies = {
59 .name = "jiffies",
60 .rating = 0, /* lowest rating*/
61 .read = jiffies_read,
62 .mask = 0xffffffff, /*32bits*/
63 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
64 .shift = JIFFIES_SHIFT,
65 .is_continuous = 0, /* tick based, not free running */
66};
67
68static int __init init_jiffies_clocksource(void)
69{
70 return clocksource_register(&clocksource_jiffies);
71}
72
73module_init(init_jiffies_clocksource);
diff --git a/kernel/timer.c b/kernel/timer.c
index eb97371b87d8..5bb6b7976eec 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -597,7 +597,6 @@ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
597long time_precision = 1; /* clock precision (us) */ 597long time_precision = 1; /* clock precision (us) */
598long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ 598long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
599long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ 599long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
600static long time_phase; /* phase offset (scaled us) */
601long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; 600long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
602 /* frequency offset (scaled ppm)*/ 601 /* frequency offset (scaled ppm)*/
603static long time_adj; /* tick adjust (scaled 1 / HZ) */ 602static long time_adj; /* tick adjust (scaled 1 / HZ) */
@@ -747,27 +746,14 @@ static long adjtime_adjustment(void)
747} 746}
748 747
749/* in the NTP reference this is called "hardclock()" */ 748/* in the NTP reference this is called "hardclock()" */
750static void update_wall_time_one_tick(void) 749static void update_ntp_one_tick(void)
751{ 750{
752 long time_adjust_step, delta_nsec; 751 long time_adjust_step;
753 752
754 time_adjust_step = adjtime_adjustment(); 753 time_adjust_step = adjtime_adjustment();
755 if (time_adjust_step) 754 if (time_adjust_step)
756 /* Reduce by this step the amount of time left */ 755 /* Reduce by this step the amount of time left */
757 time_adjust -= time_adjust_step; 756 time_adjust -= time_adjust_step;
758 delta_nsec = tick_nsec + time_adjust_step * 1000;
759 /*
760 * Advance the phase, once it gets to one microsecond, then
761 * advance the tick more.
762 */
763 time_phase += time_adj;
764 if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) {
765 long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10));
766 time_phase -= ltemp << (SHIFT_SCALE - 10);
767 delta_nsec += ltemp;
768 }
769 xtime.tv_nsec += delta_nsec;
770 time_interpolator_update(delta_nsec);
771 757
772 /* Changes by adjtime() do not take effect till next tick. */ 758 /* Changes by adjtime() do not take effect till next tick. */
773 if (time_next_adjust != 0) { 759 if (time_next_adjust != 0) {
@@ -780,36 +766,378 @@ static void update_wall_time_one_tick(void)
780 * Return how long ticks are at the moment, that is, how much time 766 * Return how long ticks are at the moment, that is, how much time
781 * update_wall_time_one_tick will add to xtime next time we call it 767 * update_wall_time_one_tick will add to xtime next time we call it
782 * (assuming no calls to do_adjtimex in the meantime). 768 * (assuming no calls to do_adjtimex in the meantime).
783 * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 769 * The return value is in fixed-point nanoseconds shifted by the
784 * bits to the right of the binary point. 770 * specified number of bits to the right of the binary point.
785 * This function has no side-effects. 771 * This function has no side-effects.
786 */ 772 */
787u64 current_tick_length(void) 773u64 current_tick_length(void)
788{ 774{
789 long delta_nsec; 775 long delta_nsec;
776 u64 ret;
790 777
778 /* calculate the finest interval NTP will allow.
779 * ie: nanosecond value shifted by (SHIFT_SCALE - 10)
780 */
791 delta_nsec = tick_nsec + adjtime_adjustment() * 1000; 781 delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
792 return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; 782 ret = (u64)delta_nsec << TICK_LENGTH_SHIFT;
783 ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10));
784
785 return ret;
793} 786}
794 787
795/* 788/* XXX - all of this timekeeping code should be later moved to time.c */
796 * Using a loop looks inefficient, but "ticks" is 789#include <linux/clocksource.h>
797 * usually just one (we shouldn't be losing ticks, 790static struct clocksource *clock; /* pointer to current clocksource */
798 * we're doing this this way mainly for interrupt 791
799 * latency reasons, not because we think we'll 792#ifdef CONFIG_GENERIC_TIME
800 * have lots of lost timer ticks 793/**
794 * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
795 *
796 * private function, must hold xtime_lock lock when being
797 * called. Returns the number of nanoseconds since the
798 * last call to update_wall_time() (adjusted by NTP scaling)
799 */
800static inline s64 __get_nsec_offset(void)
801{
802 cycle_t cycle_now, cycle_delta;
803 s64 ns_offset;
804
805 /* read clocksource: */
806 cycle_now = clocksource_read(clock);
807
808 /* calculate the delta since the last update_wall_time: */
809 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
810
811 /* convert to nanoseconds: */
812 ns_offset = cyc2ns(clock, cycle_delta);
813
814 return ns_offset;
815}
816
817/**
818 * __get_realtime_clock_ts - Returns the time of day in a timespec
819 * @ts: pointer to the timespec to be set
820 *
821 * Returns the time of day in a timespec. Used by
822 * do_gettimeofday() and get_realtime_clock_ts().
801 */ 823 */
802static void update_wall_time(unsigned long ticks) 824static inline void __get_realtime_clock_ts(struct timespec *ts)
803{ 825{
826 unsigned long seq;
827 s64 nsecs;
828
829 do {
830 seq = read_seqbegin(&xtime_lock);
831
832 *ts = xtime;
833 nsecs = __get_nsec_offset();
834
835 } while (read_seqretry(&xtime_lock, seq));
836
837 timespec_add_ns(ts, nsecs);
838}
839
840/**
841 * getnstimeofday - Returns the time of day in a timespec
842 * @ts: pointer to the timespec to be set
843 *
844 * Returns the time of day in a timespec.
845 */
846void getnstimeofday(struct timespec *ts)
847{
848 __get_realtime_clock_ts(ts);
849}
850
851EXPORT_SYMBOL(getnstimeofday);
852
853/**
854 * do_gettimeofday - Returns the time of day in a timeval
855 * @tv: pointer to the timeval to be set
856 *
857 * NOTE: Users should be converted to using get_realtime_clock_ts()
858 */
859void do_gettimeofday(struct timeval *tv)
860{
861 struct timespec now;
862
863 __get_realtime_clock_ts(&now);
864 tv->tv_sec = now.tv_sec;
865 tv->tv_usec = now.tv_nsec/1000;
866}
867
868EXPORT_SYMBOL(do_gettimeofday);
869/**
870 * do_settimeofday - Sets the time of day
871 * @tv: pointer to the timespec variable containing the new time
872 *
873 * Sets the time of day to the new time and update NTP and notify hrtimers
874 */
875int do_settimeofday(struct timespec *tv)
876{
877 unsigned long flags;
878 time_t wtm_sec, sec = tv->tv_sec;
879 long wtm_nsec, nsec = tv->tv_nsec;
880
881 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
882 return -EINVAL;
883
884 write_seqlock_irqsave(&xtime_lock, flags);
885
886 nsec -= __get_nsec_offset();
887
888 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
889 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
890
891 set_normalized_timespec(&xtime, sec, nsec);
892 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
893
894 ntp_clear();
895
896 write_sequnlock_irqrestore(&xtime_lock, flags);
897
898 /* signal hrtimers about time change */
899 clock_was_set();
900
901 return 0;
902}
903
904EXPORT_SYMBOL(do_settimeofday);
905
906/**
907 * change_clocksource - Swaps clocksources if a new one is available
908 *
909 * Accumulates current time interval and initializes new clocksource
910 */
911static int change_clocksource(void)
912{
913 struct clocksource *new;
914 cycle_t now;
915 u64 nsec;
916 new = clocksource_get_next();
917 if (clock != new) {
918 now = clocksource_read(new);
919 nsec = __get_nsec_offset();
920 timespec_add_ns(&xtime, nsec);
921
922 clock = new;
923 clock->cycle_last = now;
924 printk(KERN_INFO "Time: %s clocksource has been installed.\n",
925 clock->name);
926 return 1;
927 } else if (clock->update_callback) {
928 return clock->update_callback();
929 }
930 return 0;
931}
932#else
933#define change_clocksource() (0)
934#endif
935
936/**
937 * timeofday_is_continuous - check to see if timekeeping is free running
938 */
939int timekeeping_is_continuous(void)
940{
941 unsigned long seq;
942 int ret;
943
804 do { 944 do {
805 ticks--; 945 seq = read_seqbegin(&xtime_lock);
806 update_wall_time_one_tick(); 946
807 if (xtime.tv_nsec >= 1000000000) { 947 ret = clock->is_continuous;
808 xtime.tv_nsec -= 1000000000; 948
949 } while (read_seqretry(&xtime_lock, seq));
950
951 return ret;
952}
953
954/*
955 * timekeeping_init - Initializes the clocksource and common timekeeping values
956 */
957void __init timekeeping_init(void)
958{
959 unsigned long flags;
960
961 write_seqlock_irqsave(&xtime_lock, flags);
962 clock = clocksource_get_next();
963 clocksource_calculate_interval(clock, tick_nsec);
964 clock->cycle_last = clocksource_read(clock);
965 ntp_clear();
966 write_sequnlock_irqrestore(&xtime_lock, flags);
967}
968
969
970/*
971 * timekeeping_resume - Resumes the generic timekeeping subsystem.
972 * @dev: unused
973 *
974 * This is for the generic clocksource timekeeping.
975 * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are
976 * still managed by arch specific suspend/resume code.
977 */
978static int timekeeping_resume(struct sys_device *dev)
979{
980 unsigned long flags;
981
982 write_seqlock_irqsave(&xtime_lock, flags);
983 /* restart the last cycle value */
984 clock->cycle_last = clocksource_read(clock);
985 write_sequnlock_irqrestore(&xtime_lock, flags);
986 return 0;
987}
988
989/* sysfs resume/suspend bits for timekeeping */
990static struct sysdev_class timekeeping_sysclass = {
991 .resume = timekeeping_resume,
992 set_kset_name("timekeeping"),
993};
994
995static struct sys_device device_timer = {
996 .id = 0,
997 .cls = &timekeeping_sysclass,
998};
999
1000static int __init timekeeping_init_device(void)
1001{
1002 int error = sysdev_class_register(&timekeeping_sysclass);
1003 if (!error)
1004 error = sysdev_register(&device_timer);
1005 return error;
1006}
1007
1008device_initcall(timekeeping_init_device);
1009
1010/*
1011 * If the error is already larger, we look ahead another tick,
1012 * to compensate for late or lost adjustments.
1013 */
1014static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset)
1015{
1016 int adj;
1017
1018 /*
1019 * As soon as the machine is synchronized to the external time
1020 * source this should be the common case.
1021 */
1022 error >>= 2;
1023 if (likely(sign > 0 ? error <= *interval : error >= *interval))
1024 return sign;
1025
1026 /*
1027 * An extra look ahead dampens the effect of the current error,
1028 * which can grow quite large with continously late updates, as
1029 * it would dominate the adjustment value and can lead to
1030 * oscillation.
1031 */
1032 error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1);
1033 error -= clock->xtime_interval >> 1;
1034
1035 adj = 0;
1036 while (1) {
1037 error >>= 1;
1038 if (sign > 0 ? error <= *interval : error >= *interval)
1039 break;
1040 adj++;
1041 }
1042
1043 /*
1044 * Add the current adjustments to the error and take the offset
1045 * into account, the latter can cause the error to be hardly
1046 * reduced at the next tick. Check the error again if there's
1047 * room for another adjustment, thus further reducing the error
1048 * which otherwise had to be corrected at the next update.
1049 */
1050 error = (error << 1) - *interval + *offset;
1051 if (sign > 0 ? error > *interval : error < *interval)
1052 adj++;
1053
1054 *interval <<= adj;
1055 *offset <<= adj;
1056 return sign << adj;
1057}
1058
1059/*
1060 * Adjust the multiplier to reduce the error value,
1061 * this is optimized for the most common adjustments of -1,0,1,
1062 * for other values we can do a bit more work.
1063 */
1064static void clocksource_adjust(struct clocksource *clock, s64 offset)
1065{
1066 s64 error, interval = clock->cycle_interval;
1067 int adj;
1068
1069 error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1);
1070 if (error > interval) {
1071 adj = clocksource_bigadjust(1, error, &interval, &offset);
1072 } else if (error < -interval) {
1073 interval = -interval;
1074 offset = -offset;
1075 adj = clocksource_bigadjust(-1, error, &interval, &offset);
1076 } else
1077 return;
1078
1079 clock->mult += adj;
1080 clock->xtime_interval += interval;
1081 clock->xtime_nsec -= offset;
1082 clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift);
1083}
1084
1085/*
1086 * update_wall_time - Uses the current clocksource to increment the wall time
1087 *
1088 * Called from the timer interrupt, must hold a write on xtime_lock.
1089 */
1090static void update_wall_time(void)
1091{
1092 cycle_t offset;
1093
1094 clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
1095
1096#ifdef CONFIG_GENERIC_TIME
1097 offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
1098#else
1099 offset = clock->cycle_interval;
1100#endif
1101
1102 /* normally this loop will run just once, however in the
1103 * case of lost or late ticks, it will accumulate correctly.
1104 */
1105 while (offset >= clock->cycle_interval) {
1106 /* accumulate one interval */
1107 clock->xtime_nsec += clock->xtime_interval;
1108 clock->cycle_last += clock->cycle_interval;
1109 offset -= clock->cycle_interval;
1110
1111 if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
1112 clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
809 xtime.tv_sec++; 1113 xtime.tv_sec++;
810 second_overflow(); 1114 second_overflow();
811 } 1115 }
812 } while (ticks); 1116
1117 /* interpolator bits */
1118 time_interpolator_update(clock->xtime_interval
1119 >> clock->shift);
1120 /* increment the NTP state machine */
1121 update_ntp_one_tick();
1122
1123 /* accumulate error between NTP and clock interval */
1124 clock->error += current_tick_length();
1125 clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift);
1126 }
1127
1128 /* correct the clock when NTP error is too big */
1129 clocksource_adjust(clock, offset);
1130
1131 /* store full nanoseconds into xtime */
1132 xtime.tv_nsec = clock->xtime_nsec >> clock->shift;
1133 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
1134
1135 /* check to see if there is a new clocksource to use */
1136 if (change_clocksource()) {
1137 clock->error = 0;
1138 clock->xtime_nsec = 0;
1139 clocksource_calculate_interval(clock, tick_nsec);
1140 }
813} 1141}
814 1142
815/* 1143/*
@@ -915,10 +1243,8 @@ static inline void update_times(void)
915 unsigned long ticks; 1243 unsigned long ticks;
916 1244
917 ticks = jiffies - wall_jiffies; 1245 ticks = jiffies - wall_jiffies;
918 if (ticks) { 1246 wall_jiffies += ticks;
919 wall_jiffies += ticks; 1247 update_wall_time();
920 update_wall_time(ticks);
921 }
922 calc_load(ticks); 1248 calc_load(ticks);
923} 1249}
924 1250
diff --git a/lib/idr.c b/lib/idr.c
index de19030a999b..4d096819511a 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -29,6 +29,7 @@
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/module.h> 30#include <linux/module.h>
31#endif 31#endif
32#include <linux/err.h>
32#include <linux/string.h> 33#include <linux/string.h>
33#include <linux/idr.h> 34#include <linux/idr.h>
34 35
@@ -398,6 +399,48 @@ void *idr_find(struct idr *idp, int id)
398} 399}
399EXPORT_SYMBOL(idr_find); 400EXPORT_SYMBOL(idr_find);
400 401
402/**
403 * idr_replace - replace pointer for given id
404 * @idp: idr handle
405 * @ptr: pointer you want associated with the id
406 * @id: lookup key
407 *
408 * Replace the pointer registered with an id and return the old value.
409 * A -ENOENT return indicates that @id was not found.
410 * A -EINVAL return indicates that @id was not within valid constraints.
411 *
412 * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove().
413 */
414void *idr_replace(struct idr *idp, void *ptr, int id)
415{
416 int n;
417 struct idr_layer *p, *old_p;
418
419 n = idp->layers * IDR_BITS;
420 p = idp->top;
421
422 id &= MAX_ID_MASK;
423
424 if (id >= (1 << n))
425 return ERR_PTR(-EINVAL);
426
427 n -= IDR_BITS;
428 while ((n > 0) && p) {
429 p = p->ary[(id >> n) & IDR_MASK];
430 n -= IDR_BITS;
431 }
432
433 n = id & IDR_MASK;
434 if (unlikely(p == NULL || !test_bit(n, &p->bitmap)))
435 return ERR_PTR(-ENOENT);
436
437 old_p = p->ary[n];
438 p->ary[n] = ptr;
439
440 return old_p;
441}
442EXPORT_SYMBOL(idr_replace);
443
401static void idr_cache_ctor(void * idr_layer, kmem_cache_t *idr_layer_cache, 444static void idr_cache_ctor(void * idr_layer, kmem_cache_t *idr_layer_cache,
402 unsigned long flags) 445 unsigned long flags)
403{ 446{
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 73e0f23b7f51..6b9740bbf4c0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1821,7 +1821,7 @@ static inline void check_huge_range(struct vm_area_struct *vma,
1821 1821
1822int show_numa_map(struct seq_file *m, void *v) 1822int show_numa_map(struct seq_file *m, void *v)
1823{ 1823{
1824 struct task_struct *task = m->private; 1824 struct proc_maps_private *priv = m->private;
1825 struct vm_area_struct *vma = v; 1825 struct vm_area_struct *vma = v;
1826 struct numa_maps *md; 1826 struct numa_maps *md;
1827 struct file *file = vma->vm_file; 1827 struct file *file = vma->vm_file;
@@ -1837,7 +1837,7 @@ int show_numa_map(struct seq_file *m, void *v)
1837 return 0; 1837 return 0;
1838 1838
1839 mpol_to_str(buffer, sizeof(buffer), 1839 mpol_to_str(buffer, sizeof(buffer),
1840 get_vma_policy(task, vma, vma->vm_start)); 1840 get_vma_policy(priv->task, vma, vma->vm_start));
1841 1841
1842 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1842 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1843 1843
@@ -1891,7 +1891,7 @@ out:
1891 kfree(md); 1891 kfree(md);
1892 1892
1893 if (m->count < m->size) 1893 if (m->count < m->size)
1894 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; 1894 m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
1895 return 0; 1895 return 0;
1896} 1896}
1897 1897
diff --git a/mm/swap.c b/mm/swap.c
index 03ae2076f92f..990868afc1c6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -86,8 +86,7 @@ int rotate_reclaimable_page(struct page *page)
86 zone = page_zone(page); 86 zone = page_zone(page);
87 spin_lock_irqsave(&zone->lru_lock, flags); 87 spin_lock_irqsave(&zone->lru_lock, flags);
88 if (PageLRU(page) && !PageActive(page)) { 88 if (PageLRU(page) && !PageActive(page)) {
89 list_del(&page->lru); 89 list_move_tail(&page->lru, &zone->inactive_list);
90 list_add_tail(&page->lru, &zone->inactive_list);
91 inc_page_state(pgrotated); 90 inc_page_state(pgrotated);
92 } 91 }
93 if (!test_clear_page_writeback(page)) 92 if (!test_clear_page_writeback(page))
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index a48a5d580408..5fe77df00186 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1113,10 +1113,9 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien
1113 1113
1114static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) 1114static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
1115{ 1115{
1116 unsigned char *ip;
1117
1118 uint32_t dst_ip = msg->content.in_info.in_dst_ip; 1116 uint32_t dst_ip = msg->content.in_info.in_dst_ip;
1119 in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc); 1117 in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc);
1118
1120 dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip)); 1119 dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip));
1121 ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry); 1120 ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry);
1122 if(entry == NULL){ 1121 if(entry == NULL){
diff --git a/net/core/dev.c b/net/core/dev.c
index ea2469398bd5..f1c52cbd6ef7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -230,7 +230,7 @@ extern void netdev_unregister_sysfs(struct net_device *);
230 * For efficiency 230 * For efficiency
231 */ 231 */
232 232
233int netdev_nit; 233static int netdev_nit;
234 234
235/* 235/*
236 * Add a protocol ID to the list. Now that the input handler is 236 * Add a protocol ID to the list. Now that the input handler is
@@ -1325,9 +1325,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1325 nskb->next = NULL; 1325 nskb->next = NULL;
1326 rc = dev->hard_start_xmit(nskb, dev); 1326 rc = dev->hard_start_xmit(nskb, dev);
1327 if (unlikely(rc)) { 1327 if (unlikely(rc)) {
1328 nskb->next = skb->next;
1328 skb->next = nskb; 1329 skb->next = nskb;
1329 return rc; 1330 return rc;
1330 } 1331 }
1332 if (unlikely(netif_queue_stopped(dev) && skb->next))
1333 return NETDEV_TX_BUSY;
1331 } while (skb->next); 1334 } while (skb->next);
1332 1335
1333 skb->destructor = DEV_GSO_CB(skb)->destructor; 1336 skb->destructor = DEV_GSO_CB(skb)->destructor;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9cb781830380..471da451cd48 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -54,6 +54,7 @@ static atomic_t trapped;
54 sizeof(struct iphdr) + sizeof(struct ethhdr)) 54 sizeof(struct iphdr) + sizeof(struct ethhdr))
55 55
56static void zap_completion_queue(void); 56static void zap_completion_queue(void);
57static void arp_reply(struct sk_buff *skb);
57 58
58static void queue_process(void *p) 59static void queue_process(void *p)
59{ 60{
@@ -153,6 +154,22 @@ static void poll_napi(struct netpoll *np)
153 } 154 }
154} 155}
155 156
157static void service_arp_queue(struct netpoll_info *npi)
158{
159 struct sk_buff *skb;
160
161 if (unlikely(!npi))
162 return;
163
164 skb = skb_dequeue(&npi->arp_tx);
165
166 while (skb != NULL) {
167 arp_reply(skb);
168 skb = skb_dequeue(&npi->arp_tx);
169 }
170 return;
171}
172
156void netpoll_poll(struct netpoll *np) 173void netpoll_poll(struct netpoll *np)
157{ 174{
158 if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) 175 if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
@@ -163,6 +180,8 @@ void netpoll_poll(struct netpoll *np)
163 if (np->dev->poll) 180 if (np->dev->poll)
164 poll_napi(np); 181 poll_napi(np);
165 182
183 service_arp_queue(np->dev->npinfo);
184
166 zap_completion_queue(); 185 zap_completion_queue();
167} 186}
168 187
@@ -279,14 +298,10 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
279 * network drivers do not expect to be called if the queue is 298 * network drivers do not expect to be called if the queue is
280 * stopped. 299 * stopped.
281 */ 300 */
282 if (netif_queue_stopped(np->dev)) { 301 status = NETDEV_TX_BUSY;
283 netif_tx_unlock(np->dev); 302 if (!netif_queue_stopped(np->dev))
284 netpoll_poll(np); 303 status = np->dev->hard_start_xmit(skb, np->dev);
285 udelay(50);
286 continue;
287 }
288 304
289 status = np->dev->hard_start_xmit(skb, np->dev);
290 netif_tx_unlock(np->dev); 305 netif_tx_unlock(np->dev);
291 306
292 /* success */ 307 /* success */
@@ -446,7 +461,9 @@ int __netpoll_rx(struct sk_buff *skb)
446 int proto, len, ulen; 461 int proto, len, ulen;
447 struct iphdr *iph; 462 struct iphdr *iph;
448 struct udphdr *uh; 463 struct udphdr *uh;
449 struct netpoll *np = skb->dev->npinfo->rx_np; 464 struct netpoll_info *npi = skb->dev->npinfo;
465 struct netpoll *np = npi->rx_np;
466
450 467
451 if (!np) 468 if (!np)
452 goto out; 469 goto out;
@@ -456,7 +473,7 @@ int __netpoll_rx(struct sk_buff *skb)
456 /* check if netpoll clients need ARP */ 473 /* check if netpoll clients need ARP */
457 if (skb->protocol == __constant_htons(ETH_P_ARP) && 474 if (skb->protocol == __constant_htons(ETH_P_ARP) &&
458 atomic_read(&trapped)) { 475 atomic_read(&trapped)) {
459 arp_reply(skb); 476 skb_queue_tail(&npi->arp_tx, skb);
460 return 1; 477 return 1;
461 } 478 }
462 479
@@ -651,6 +668,7 @@ int netpoll_setup(struct netpoll *np)
651 npinfo->poll_owner = -1; 668 npinfo->poll_owner = -1;
652 npinfo->tries = MAX_RETRIES; 669 npinfo->tries = MAX_RETRIES;
653 spin_lock_init(&npinfo->rx_lock); 670 spin_lock_init(&npinfo->rx_lock);
671 skb_queue_head_init(&npinfo->arp_tx);
654 } else 672 } else
655 npinfo = ndev->npinfo; 673 npinfo = ndev->npinfo;
656 674
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8e5044ba3ab6..6edbb90cbcec 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1739,12 +1739,15 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
1739 unsigned int to, struct ts_config *config, 1739 unsigned int to, struct ts_config *config,
1740 struct ts_state *state) 1740 struct ts_state *state)
1741{ 1741{
1742 unsigned int ret;
1743
1742 config->get_next_block = skb_ts_get_next_block; 1744 config->get_next_block = skb_ts_get_next_block;
1743 config->finish = skb_ts_finish; 1745 config->finish = skb_ts_finish;
1744 1746
1745 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); 1747 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
1746 1748
1747 return textsearch_find(config, state); 1749 ret = textsearch_find(config, state);
1750 return (ret <= to - from ? ret : UINT_MAX);
1748} 1751}
1749 1752
1750/** 1753/**
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0e029c4e2903..c04176be7ed1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2166,7 +2166,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
2166 if (!pskb_may_pull(skb, thlen)) 2166 if (!pskb_may_pull(skb, thlen))
2167 goto out; 2167 goto out;
2168 2168
2169 oldlen = ~htonl(skb->len); 2169 oldlen = (u16)~skb->len;
2170 __skb_pull(skb, thlen); 2170 __skb_pull(skb, thlen);
2171 2171
2172 segs = skb_segment(skb, sg); 2172 segs = skb_segment(skb, sg);
@@ -2174,7 +2174,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
2174 goto out; 2174 goto out;
2175 2175
2176 len = skb_shinfo(skb)->gso_size; 2176 len = skb_shinfo(skb)->gso_size;
2177 delta = csum_add(oldlen, htonl(thlen + len)); 2177 delta = htonl(oldlen + (thlen + len));
2178 2178
2179 skb = segs; 2179 skb = segs;
2180 th = skb->h.th; 2180 th = skb->h.th;
@@ -2183,10 +2183,10 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
2183 do { 2183 do {
2184 th->fin = th->psh = 0; 2184 th->fin = th->psh = 0;
2185 2185
2186 if (skb->ip_summed == CHECKSUM_NONE) { 2186 th->check = ~csum_fold(th->check + delta);
2187 th->check = csum_fold(csum_partial( 2187 if (skb->ip_summed != CHECKSUM_HW)
2188 skb->h.raw, thlen, csum_add(skb->csum, delta))); 2188 th->check = csum_fold(csum_partial(skb->h.raw, thlen,
2189 } 2189 skb->csum));
2190 2190
2191 seq += len; 2191 seq += len;
2192 skb = skb->next; 2192 skb = skb->next;
@@ -2196,11 +2196,11 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
2196 th->cwr = 0; 2196 th->cwr = 0;
2197 } while (skb->next); 2197 } while (skb->next);
2198 2198
2199 if (skb->ip_summed == CHECKSUM_NONE) { 2199 delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
2200 delta = csum_add(oldlen, htonl(skb->tail - skb->h.raw)); 2200 th->check = ~csum_fold(th->check + delta);
2201 th->check = csum_fold(csum_partial( 2201 if (skb->ip_summed != CHECKSUM_HW)
2202 skb->h.raw, thlen, csum_add(skb->csum, delta))); 2202 th->check = csum_fold(csum_partial(skb->h.raw, thlen,
2203 } 2203 skb->csum));
2204 2204
2205out: 2205out:
2206 return segs; 2206 return segs;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index b3b9097c87c7..c11737f472d6 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -725,15 +725,17 @@ void nr_link_failed(ax25_cb *ax25, int reason)
725 struct nr_node *nr_node = NULL; 725 struct nr_node *nr_node = NULL;
726 726
727 spin_lock_bh(&nr_neigh_list_lock); 727 spin_lock_bh(&nr_neigh_list_lock);
728 nr_neigh_for_each(s, node, &nr_neigh_list) 728 nr_neigh_for_each(s, node, &nr_neigh_list) {
729 if (s->ax25 == ax25) { 729 if (s->ax25 == ax25) {
730 nr_neigh_hold(s); 730 nr_neigh_hold(s);
731 nr_neigh = s; 731 nr_neigh = s;
732 break; 732 break;
733 } 733 }
734 }
734 spin_unlock_bh(&nr_neigh_list_lock); 735 spin_unlock_bh(&nr_neigh_list_lock);
735 736
736 if (nr_neigh == NULL) return; 737 if (nr_neigh == NULL)
738 return;
737 739
738 nr_neigh->ax25 = NULL; 740 nr_neigh->ax25 = NULL;
739 ax25_cb_put(ax25); 741 ax25_cb_put(ax25);
@@ -743,11 +745,13 @@ void nr_link_failed(ax25_cb *ax25, int reason)
743 return; 745 return;
744 } 746 }
745 spin_lock_bh(&nr_node_list_lock); 747 spin_lock_bh(&nr_node_list_lock);
746 nr_node_for_each(nr_node, node, &nr_node_list) 748 nr_node_for_each(nr_node, node, &nr_node_list) {
747 nr_node_lock(nr_node); 749 nr_node_lock(nr_node);
748 if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh) 750 if (nr_node->which < nr_node->count &&
751 nr_node->routes[nr_node->which].neighbour == nr_neigh)
749 nr_node->which++; 752 nr_node->which++;
750 nr_node_unlock(nr_node); 753 nr_node_unlock(nr_node);
754 }
751 spin_unlock_bh(&nr_node_list_lock); 755 spin_unlock_bh(&nr_node_list_lock);
752 nr_neigh_put(nr_neigh); 756 nr_neigh_put(nr_neigh);
753} 757}
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
index c4aeb7d40266..d07122b57e0d 100644
--- a/net/rxrpc/call.c
+++ b/net/rxrpc/call.c
@@ -1098,8 +1098,7 @@ static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
1098 1098
1099 call->app_ready_seq = pmsg->seq; 1099 call->app_ready_seq = pmsg->seq;
1100 call->app_ready_qty += pmsg->dsize; 1100 call->app_ready_qty += pmsg->dsize;
1101 list_del_init(&pmsg->link); 1101 list_move_tail(&pmsg->link, &call->app_readyq);
1102 list_add_tail(&pmsg->link, &call->app_readyq);
1103 } 1102 }
1104 1103
1105 /* see if we've got the last packet yet */ 1104 /* see if we've got the last packet yet */
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index 0e0a4553499f..573b572f8f91 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -402,8 +402,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
402 402
403 /* move to graveyard queue */ 403 /* move to graveyard queue */
404 _debug("burying connection: {%08x}", ntohl(conn->conn_id)); 404 _debug("burying connection: {%08x}", ntohl(conn->conn_id));
405 list_del(&conn->link); 405 list_move_tail(&conn->link, &peer->conn_graveyard);
406 list_add_tail(&conn->link, &peer->conn_graveyard);
407 406
408 rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ); 407 rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ);
409 408
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
index 1aadd026d354..cea4eb5e2497 100644
--- a/net/rxrpc/krxsecd.c
+++ b/net/rxrpc/krxsecd.c
@@ -160,8 +160,7 @@ void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans)
160 list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) { 160 list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) {
161 msg = list_entry(_p, struct rxrpc_message, link); 161 msg = list_entry(_p, struct rxrpc_message, link);
162 if (msg->trans == trans) { 162 if (msg->trans == trans) {
163 list_del(&msg->link); 163 list_move_tail(&msg->link, &tmp);
164 list_add_tail(&msg->link, &tmp);
165 atomic_dec(&rxrpc_krxsecd_qcount); 164 atomic_dec(&rxrpc_krxsecd_qcount);
166 } 165 }
167 } 166 }
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 2c4ecbe50082..54128040a124 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -49,13 +49,19 @@
49#include "name_table.h" 49#include "name_table.h"
50#include "bcast.h" 50#include "bcast.h"
51 51
52
53#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ 52#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
54 53
55#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ 54#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
56 55
57#define BCLINK_LOG_BUF_SIZE 0 56#define BCLINK_LOG_BUF_SIZE 0
58 57
58/*
59 * Loss rate for incoming broadcast frames; used to test retransmission code.
60 * Set to N to cause every N'th frame to be discarded; 0 => don't discard any.
61 */
62
63#define TIPC_BCAST_LOSS_RATE 0
64
59/** 65/**
60 * struct bcbearer_pair - a pair of bearers used by broadcast link 66 * struct bcbearer_pair - a pair of bearers used by broadcast link
61 * @primary: pointer to primary bearer 67 * @primary: pointer to primary bearer
@@ -75,7 +81,14 @@ struct bcbearer_pair {
75 * @bearer: (non-standard) broadcast bearer structure 81 * @bearer: (non-standard) broadcast bearer structure
76 * @media: (non-standard) broadcast media structure 82 * @media: (non-standard) broadcast media structure
77 * @bpairs: array of bearer pairs 83 * @bpairs: array of bearer pairs
78 * @bpairs_temp: array of bearer pairs used during creation of "bpairs" 84 * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort()
85 * @remains: temporary node map used by tipc_bcbearer_send()
86 * @remains_new: temporary node map used tipc_bcbearer_send()
87 *
88 * Note: The fields labelled "temporary" are incorporated into the bearer
89 * to avoid consuming potentially limited stack space through the use of
90 * large local variables within multicast routines. Concurrent access is
91 * prevented through use of the spinlock "bc_lock".
79 */ 92 */
80 93
81struct bcbearer { 94struct bcbearer {
@@ -83,6 +96,8 @@ struct bcbearer {
83 struct media media; 96 struct media media;
84 struct bcbearer_pair bpairs[MAX_BEARERS]; 97 struct bcbearer_pair bpairs[MAX_BEARERS];
85 struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; 98 struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
99 struct node_map remains;
100 struct node_map remains_new;
86}; 101};
87 102
88/** 103/**
@@ -165,21 +180,18 @@ static int bclink_ack_allowed(u32 n)
165 * @after: sequence number of last packet to *not* retransmit 180 * @after: sequence number of last packet to *not* retransmit
166 * @to: sequence number of last packet to retransmit 181 * @to: sequence number of last packet to retransmit
167 * 182 *
168 * Called with 'node' locked, bc_lock unlocked 183 * Called with bc_lock locked
169 */ 184 */
170 185
171static void bclink_retransmit_pkt(u32 after, u32 to) 186static void bclink_retransmit_pkt(u32 after, u32 to)
172{ 187{
173 struct sk_buff *buf; 188 struct sk_buff *buf;
174 189
175 spin_lock_bh(&bc_lock);
176 buf = bcl->first_out; 190 buf = bcl->first_out;
177 while (buf && less_eq(buf_seqno(buf), after)) { 191 while (buf && less_eq(buf_seqno(buf), after)) {
178 buf = buf->next; 192 buf = buf->next;
179 } 193 }
180 if (buf != NULL) 194 tipc_link_retransmit(bcl, buf, mod(to - after));
181 tipc_link_retransmit(bcl, buf, mod(to - after));
182 spin_unlock_bh(&bc_lock);
183} 195}
184 196
185/** 197/**
@@ -346,8 +358,10 @@ static void tipc_bclink_peek_nack(u32 dest, u32 sender_tag, u32 gap_after, u32 g
346 for (; buf; buf = buf->next) { 358 for (; buf; buf = buf->next) {
347 u32 seqno = buf_seqno(buf); 359 u32 seqno = buf_seqno(buf);
348 360
349 if (mod(seqno - prev) != 1) 361 if (mod(seqno - prev) != 1) {
350 buf = NULL; 362 buf = NULL;
363 break;
364 }
351 if (seqno == gap_after) 365 if (seqno == gap_after)
352 break; 366 break;
353 prev = seqno; 367 prev = seqno;
@@ -399,7 +413,10 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
399 */ 413 */
400 414
401void tipc_bclink_recv_pkt(struct sk_buff *buf) 415void tipc_bclink_recv_pkt(struct sk_buff *buf)
402{ 416{
417#if (TIPC_BCAST_LOSS_RATE)
418 static int rx_count = 0;
419#endif
403 struct tipc_msg *msg = buf_msg(buf); 420 struct tipc_msg *msg = buf_msg(buf);
404 struct node* node = tipc_node_find(msg_prevnode(msg)); 421 struct node* node = tipc_node_find(msg_prevnode(msg));
405 u32 next_in; 422 u32 next_in;
@@ -420,9 +437,13 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
420 tipc_node_lock(node); 437 tipc_node_lock(node);
421 tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); 438 tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
422 tipc_node_unlock(node); 439 tipc_node_unlock(node);
440 spin_lock_bh(&bc_lock);
423 bcl->stats.recv_nacks++; 441 bcl->stats.recv_nacks++;
442 bcl->owner->next = node; /* remember requestor */
424 bclink_retransmit_pkt(msg_bcgap_after(msg), 443 bclink_retransmit_pkt(msg_bcgap_after(msg),
425 msg_bcgap_to(msg)); 444 msg_bcgap_to(msg));
445 bcl->owner->next = NULL;
446 spin_unlock_bh(&bc_lock);
426 } else { 447 } else {
427 tipc_bclink_peek_nack(msg_destnode(msg), 448 tipc_bclink_peek_nack(msg_destnode(msg),
428 msg_bcast_tag(msg), 449 msg_bcast_tag(msg),
@@ -433,6 +454,14 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
433 return; 454 return;
434 } 455 }
435 456
457#if (TIPC_BCAST_LOSS_RATE)
458 if (++rx_count == TIPC_BCAST_LOSS_RATE) {
459 rx_count = 0;
460 buf_discard(buf);
461 return;
462 }
463#endif
464
436 tipc_node_lock(node); 465 tipc_node_lock(node);
437receive: 466receive:
438 deferred = node->bclink.deferred_head; 467 deferred = node->bclink.deferred_head;
@@ -531,12 +560,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
531{ 560{
532 static int send_count = 0; 561 static int send_count = 0;
533 562
534 struct node_map *remains;
535 struct node_map *remains_new;
536 struct node_map *remains_tmp;
537 int bp_index; 563 int bp_index;
538 int swap_time; 564 int swap_time;
539 int err;
540 565
541 /* Prepare buffer for broadcasting (if first time trying to send it) */ 566 /* Prepare buffer for broadcasting (if first time trying to send it) */
542 567
@@ -557,9 +582,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
557 582
558 /* Send buffer over bearers until all targets reached */ 583 /* Send buffer over bearers until all targets reached */
559 584
560 remains = kmalloc(sizeof(struct node_map), GFP_ATOMIC); 585 bcbearer->remains = tipc_cltr_bcast_nodes;
561 remains_new = kmalloc(sizeof(struct node_map), GFP_ATOMIC);
562 *remains = tipc_cltr_bcast_nodes;
563 586
564 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { 587 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
565 struct bearer *p = bcbearer->bpairs[bp_index].primary; 588 struct bearer *p = bcbearer->bpairs[bp_index].primary;
@@ -568,8 +591,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
568 if (!p) 591 if (!p)
569 break; /* no more bearers to try */ 592 break; /* no more bearers to try */
570 593
571 tipc_nmap_diff(remains, &p->nodes, remains_new); 594 tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new);
572 if (remains_new->count == remains->count) 595 if (bcbearer->remains_new.count == bcbearer->remains.count)
573 continue; /* bearer pair doesn't add anything */ 596 continue; /* bearer pair doesn't add anything */
574 597
575 if (!p->publ.blocked && 598 if (!p->publ.blocked &&
@@ -587,27 +610,17 @@ swap:
587 bcbearer->bpairs[bp_index].primary = s; 610 bcbearer->bpairs[bp_index].primary = s;
588 bcbearer->bpairs[bp_index].secondary = p; 611 bcbearer->bpairs[bp_index].secondary = p;
589update: 612update:
590 if (remains_new->count == 0) { 613 if (bcbearer->remains_new.count == 0)
591 err = TIPC_OK; 614 return TIPC_OK;
592 goto out;
593 }
594 615
595 /* swap map */ 616 bcbearer->remains = bcbearer->remains_new;
596 remains_tmp = remains;
597 remains = remains_new;
598 remains_new = remains_tmp;
599 } 617 }
600 618
601 /* Unable to reach all targets */ 619 /* Unable to reach all targets */
602 620
603 bcbearer->bearer.publ.blocked = 1; 621 bcbearer->bearer.publ.blocked = 1;
604 bcl->stats.bearer_congs++; 622 bcl->stats.bearer_congs++;
605 err = ~TIPC_OK; 623 return ~TIPC_OK;
606
607 out:
608 kfree(remains_new);
609 kfree(remains);
610 return err;
611} 624}
612 625
613/** 626/**
@@ -765,7 +778,7 @@ int tipc_bclink_init(void)
765 bclink = kmalloc(sizeof(*bclink), GFP_ATOMIC); 778 bclink = kmalloc(sizeof(*bclink), GFP_ATOMIC);
766 if (!bcbearer || !bclink) { 779 if (!bcbearer || !bclink) {
767 nomem: 780 nomem:
768 warn("Memory squeeze; Failed to create multicast link\n"); 781 warn("Multicast link creation failed, no memory\n");
769 kfree(bcbearer); 782 kfree(bcbearer);
770 bcbearer = NULL; 783 bcbearer = NULL;
771 kfree(bclink); 784 kfree(bclink);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 0e3be2ab3307..b243d9d495f0 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -180,7 +180,7 @@ static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
180 if (!item->next) { 180 if (!item->next) {
181 item->next = kmalloc(sizeof(*item), GFP_ATOMIC); 181 item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
182 if (!item->next) { 182 if (!item->next) {
183 warn("Memory squeeze: multicast destination port list is incomplete\n"); 183 warn("Incomplete multicast delivery, no memory\n");
184 return; 184 return;
185 } 185 }
186 item->next->next = NULL; 186 item->next->next = NULL;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index e213a8e54855..4fa24b5e8914 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -112,39 +112,42 @@ int tipc_register_media(u32 media_type,
112 goto exit; 112 goto exit;
113 113
114 if (!media_name_valid(name)) { 114 if (!media_name_valid(name)) {
115 warn("Media registration error: illegal name <%s>\n", name); 115 warn("Media <%s> rejected, illegal name\n", name);
116 goto exit; 116 goto exit;
117 } 117 }
118 if (!bcast_addr) { 118 if (!bcast_addr) {
119 warn("Media registration error: no broadcast address supplied\n"); 119 warn("Media <%s> rejected, no broadcast address\n", name);
120 goto exit; 120 goto exit;
121 } 121 }
122 if ((bearer_priority < TIPC_MIN_LINK_PRI) && 122 if ((bearer_priority < TIPC_MIN_LINK_PRI) &&
123 (bearer_priority > TIPC_MAX_LINK_PRI)) { 123 (bearer_priority > TIPC_MAX_LINK_PRI)) {
124 warn("Media registration error: priority %u\n", bearer_priority); 124 warn("Media <%s> rejected, illegal priority (%u)\n", name,
125 bearer_priority);
125 goto exit; 126 goto exit;
126 } 127 }
127 if ((link_tolerance < TIPC_MIN_LINK_TOL) || 128 if ((link_tolerance < TIPC_MIN_LINK_TOL) ||
128 (link_tolerance > TIPC_MAX_LINK_TOL)) { 129 (link_tolerance > TIPC_MAX_LINK_TOL)) {
129 warn("Media registration error: tolerance %u\n", link_tolerance); 130 warn("Media <%s> rejected, illegal tolerance (%u)\n", name,
131 link_tolerance);
130 goto exit; 132 goto exit;
131 } 133 }
132 134
133 media_id = media_count++; 135 media_id = media_count++;
134 if (media_id >= MAX_MEDIA) { 136 if (media_id >= MAX_MEDIA) {
135 warn("Attempt to register more than %u media\n", MAX_MEDIA); 137 warn("Media <%s> rejected, media limit reached (%u)\n", name,
138 MAX_MEDIA);
136 media_count--; 139 media_count--;
137 goto exit; 140 goto exit;
138 } 141 }
139 for (i = 0; i < media_id; i++) { 142 for (i = 0; i < media_id; i++) {
140 if (media_list[i].type_id == media_type) { 143 if (media_list[i].type_id == media_type) {
141 warn("Attempt to register second media with type %u\n", 144 warn("Media <%s> rejected, duplicate type (%u)\n", name,
142 media_type); 145 media_type);
143 media_count--; 146 media_count--;
144 goto exit; 147 goto exit;
145 } 148 }
146 if (!strcmp(name, media_list[i].name)) { 149 if (!strcmp(name, media_list[i].name)) {
147 warn("Attempt to re-register media name <%s>\n", name); 150 warn("Media <%s> rejected, duplicate name\n", name);
148 media_count--; 151 media_count--;
149 goto exit; 152 goto exit;
150 } 153 }
@@ -283,6 +286,9 @@ static struct bearer *bearer_find(const char *name)
283 struct bearer *b_ptr; 286 struct bearer *b_ptr;
284 u32 i; 287 u32 i;
285 288
289 if (tipc_mode != TIPC_NET_MODE)
290 return NULL;
291
286 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { 292 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
287 if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) 293 if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
288 return b_ptr; 294 return b_ptr;
@@ -475,26 +481,33 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
475 u32 i; 481 u32 i;
476 int res = -EINVAL; 482 int res = -EINVAL;
477 483
478 if (tipc_mode != TIPC_NET_MODE) 484 if (tipc_mode != TIPC_NET_MODE) {
485 warn("Bearer <%s> rejected, not supported in standalone mode\n",
486 name);
479 return -ENOPROTOOPT; 487 return -ENOPROTOOPT;
480 488 }
481 if (!bearer_name_validate(name, &b_name) || 489 if (!bearer_name_validate(name, &b_name)) {
482 !tipc_addr_domain_valid(bcast_scope) || 490 warn("Bearer <%s> rejected, illegal name\n", name);
483 !in_scope(bcast_scope, tipc_own_addr))
484 return -EINVAL; 491 return -EINVAL;
485 492 }
493 if (!tipc_addr_domain_valid(bcast_scope) ||
494 !in_scope(bcast_scope, tipc_own_addr)) {
495 warn("Bearer <%s> rejected, illegal broadcast scope\n", name);
496 return -EINVAL;
497 }
486 if ((priority < TIPC_MIN_LINK_PRI || 498 if ((priority < TIPC_MIN_LINK_PRI ||
487 priority > TIPC_MAX_LINK_PRI) && 499 priority > TIPC_MAX_LINK_PRI) &&
488 (priority != TIPC_MEDIA_LINK_PRI)) 500 (priority != TIPC_MEDIA_LINK_PRI)) {
501 warn("Bearer <%s> rejected, illegal priority\n", name);
489 return -EINVAL; 502 return -EINVAL;
503 }
490 504
491 write_lock_bh(&tipc_net_lock); 505 write_lock_bh(&tipc_net_lock);
492 if (!tipc_bearers)
493 goto failed;
494 506
495 m_ptr = media_find(b_name.media_name); 507 m_ptr = media_find(b_name.media_name);
496 if (!m_ptr) { 508 if (!m_ptr) {
497 warn("No media <%s>\n", b_name.media_name); 509 warn("Bearer <%s> rejected, media <%s> not registered\n", name,
510 b_name.media_name);
498 goto failed; 511 goto failed;
499 } 512 }
500 513
@@ -510,23 +523,24 @@ restart:
510 continue; 523 continue;
511 } 524 }
512 if (!strcmp(name, tipc_bearers[i].publ.name)) { 525 if (!strcmp(name, tipc_bearers[i].publ.name)) {
513 warn("Bearer <%s> already enabled\n", name); 526 warn("Bearer <%s> rejected, already enabled\n", name);
514 goto failed; 527 goto failed;
515 } 528 }
516 if ((tipc_bearers[i].priority == priority) && 529 if ((tipc_bearers[i].priority == priority) &&
517 (++with_this_prio > 2)) { 530 (++with_this_prio > 2)) {
518 if (priority-- == 0) { 531 if (priority-- == 0) {
519 warn("Third bearer <%s> with priority %u, unable to lower to %u\n", 532 warn("Bearer <%s> rejected, duplicate priority\n",
520 name, priority + 1, priority); 533 name);
521 goto failed; 534 goto failed;
522 } 535 }
523 warn("Third bearer <%s> with priority %u, lowering to %u\n", 536 warn("Bearer <%s> priority adjustment required %u->%u\n",
524 name, priority + 1, priority); 537 name, priority + 1, priority);
525 goto restart; 538 goto restart;
526 } 539 }
527 } 540 }
528 if (bearer_id >= MAX_BEARERS) { 541 if (bearer_id >= MAX_BEARERS) {
529 warn("Attempt to enable more than %d bearers\n", MAX_BEARERS); 542 warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
543 name, MAX_BEARERS);
530 goto failed; 544 goto failed;
531 } 545 }
532 546
@@ -536,7 +550,7 @@ restart:
536 strcpy(b_ptr->publ.name, name); 550 strcpy(b_ptr->publ.name, name);
537 res = m_ptr->enable_bearer(&b_ptr->publ); 551 res = m_ptr->enable_bearer(&b_ptr->publ);
538 if (res) { 552 if (res) {
539 warn("Failed to enable bearer <%s>\n", name); 553 warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res);
540 goto failed; 554 goto failed;
541 } 555 }
542 556
@@ -573,9 +587,6 @@ int tipc_block_bearer(const char *name)
573 struct link *l_ptr; 587 struct link *l_ptr;
574 struct link *temp_l_ptr; 588 struct link *temp_l_ptr;
575 589
576 if (tipc_mode != TIPC_NET_MODE)
577 return -ENOPROTOOPT;
578
579 read_lock_bh(&tipc_net_lock); 590 read_lock_bh(&tipc_net_lock);
580 b_ptr = bearer_find(name); 591 b_ptr = bearer_find(name);
581 if (!b_ptr) { 592 if (!b_ptr) {
@@ -584,6 +595,7 @@ int tipc_block_bearer(const char *name)
584 return -EINVAL; 595 return -EINVAL;
585 } 596 }
586 597
598 info("Blocking bearer <%s>\n", name);
587 spin_lock_bh(&b_ptr->publ.lock); 599 spin_lock_bh(&b_ptr->publ.lock);
588 b_ptr->publ.blocked = 1; 600 b_ptr->publ.blocked = 1;
589 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 601 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
@@ -595,7 +607,6 @@ int tipc_block_bearer(const char *name)
595 } 607 }
596 spin_unlock_bh(&b_ptr->publ.lock); 608 spin_unlock_bh(&b_ptr->publ.lock);
597 read_unlock_bh(&tipc_net_lock); 609 read_unlock_bh(&tipc_net_lock);
598 info("Blocked bearer <%s>\n", name);
599 return TIPC_OK; 610 return TIPC_OK;
600} 611}
601 612
@@ -611,15 +622,13 @@ static int bearer_disable(const char *name)
611 struct link *l_ptr; 622 struct link *l_ptr;
612 struct link *temp_l_ptr; 623 struct link *temp_l_ptr;
613 624
614 if (tipc_mode != TIPC_NET_MODE)
615 return -ENOPROTOOPT;
616
617 b_ptr = bearer_find(name); 625 b_ptr = bearer_find(name);
618 if (!b_ptr) { 626 if (!b_ptr) {
619 warn("Attempt to disable unknown bearer <%s>\n", name); 627 warn("Attempt to disable unknown bearer <%s>\n", name);
620 return -EINVAL; 628 return -EINVAL;
621 } 629 }
622 630
631 info("Disabling bearer <%s>\n", name);
623 tipc_disc_stop_link_req(b_ptr->link_req); 632 tipc_disc_stop_link_req(b_ptr->link_req);
624 spin_lock_bh(&b_ptr->publ.lock); 633 spin_lock_bh(&b_ptr->publ.lock);
625 b_ptr->link_req = NULL; 634 b_ptr->link_req = NULL;
@@ -635,7 +644,6 @@ static int bearer_disable(const char *name)
635 tipc_link_delete(l_ptr); 644 tipc_link_delete(l_ptr);
636 } 645 }
637 spin_unlock_bh(&b_ptr->publ.lock); 646 spin_unlock_bh(&b_ptr->publ.lock);
638 info("Disabled bearer <%s>\n", name);
639 memset(b_ptr, 0, sizeof(struct bearer)); 647 memset(b_ptr, 0, sizeof(struct bearer));
640 return TIPC_OK; 648 return TIPC_OK;
641} 649}
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 1aed81584e96..1dcb6940e338 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -60,8 +60,10 @@ struct cluster *tipc_cltr_create(u32 addr)
60 int alloc; 60 int alloc;
61 61
62 c_ptr = (struct cluster *)kmalloc(sizeof(*c_ptr), GFP_ATOMIC); 62 c_ptr = (struct cluster *)kmalloc(sizeof(*c_ptr), GFP_ATOMIC);
63 if (c_ptr == NULL) 63 if (c_ptr == NULL) {
64 warn("Cluster creation failure, no memory\n");
64 return NULL; 65 return NULL;
66 }
65 memset(c_ptr, 0, sizeof(*c_ptr)); 67 memset(c_ptr, 0, sizeof(*c_ptr));
66 68
67 c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0); 69 c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
@@ -70,30 +72,32 @@ struct cluster *tipc_cltr_create(u32 addr)
70 else 72 else
71 max_nodes = tipc_max_nodes + 1; 73 max_nodes = tipc_max_nodes + 1;
72 alloc = sizeof(void *) * (max_nodes + 1); 74 alloc = sizeof(void *) * (max_nodes + 1);
75
73 c_ptr->nodes = (struct node **)kmalloc(alloc, GFP_ATOMIC); 76 c_ptr->nodes = (struct node **)kmalloc(alloc, GFP_ATOMIC);
74 if (c_ptr->nodes == NULL) { 77 if (c_ptr->nodes == NULL) {
78 warn("Cluster creation failure, no memory for node area\n");
75 kfree(c_ptr); 79 kfree(c_ptr);
76 return NULL; 80 return NULL;
77 } 81 }
78 memset(c_ptr->nodes, 0, alloc); 82 memset(c_ptr->nodes, 0, alloc);
83
79 if (in_own_cluster(addr)) 84 if (in_own_cluster(addr))
80 tipc_local_nodes = c_ptr->nodes; 85 tipc_local_nodes = c_ptr->nodes;
81 c_ptr->highest_slave = LOWEST_SLAVE - 1; 86 c_ptr->highest_slave = LOWEST_SLAVE - 1;
82 c_ptr->highest_node = 0; 87 c_ptr->highest_node = 0;
83 88
84 z_ptr = tipc_zone_find(tipc_zone(addr)); 89 z_ptr = tipc_zone_find(tipc_zone(addr));
85 if (z_ptr == NULL) { 90 if (!z_ptr) {
86 z_ptr = tipc_zone_create(addr); 91 z_ptr = tipc_zone_create(addr);
87 } 92 }
88 if (z_ptr != NULL) { 93 if (!z_ptr) {
89 tipc_zone_attach_cluster(z_ptr, c_ptr); 94 kfree(c_ptr->nodes);
90 c_ptr->owner = z_ptr;
91 }
92 else {
93 kfree(c_ptr); 95 kfree(c_ptr);
94 c_ptr = NULL; 96 return NULL;
95 } 97 }
96 98
99 tipc_zone_attach_cluster(z_ptr, c_ptr);
100 c_ptr->owner = z_ptr;
97 return c_ptr; 101 return c_ptr;
98} 102}
99 103
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 48b5de2dbe60..3ec502fac8c3 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -291,13 +291,22 @@ static struct sk_buff *cfg_set_own_addr(void)
291 if (!tipc_addr_node_valid(addr)) 291 if (!tipc_addr_node_valid(addr))
292 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE 292 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
293 " (node address)"); 293 " (node address)");
294 if (tipc_own_addr) 294 if (tipc_mode == TIPC_NET_MODE)
295 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 295 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
296 " (cannot change node address once assigned)"); 296 " (cannot change node address once assigned)");
297 tipc_own_addr = addr;
298
299 /*
300 * Must release all spinlocks before calling start_net() because
301 * Linux version of TIPC calls eth_media_start() which calls
302 * register_netdevice_notifier() which may block!
303 *
304 * Temporarily releasing the lock should be harmless for non-Linux TIPC,
305 * but Linux version of eth_media_start() should really be reworked
306 * so that it can be called with spinlocks held.
307 */
297 308
298 spin_unlock_bh(&config_lock); 309 spin_unlock_bh(&config_lock);
299 tipc_core_stop_net();
300 tipc_own_addr = addr;
301 tipc_core_start_net(); 310 tipc_core_start_net();
302 spin_lock_bh(&config_lock); 311 spin_lock_bh(&config_lock);
303 return tipc_cfg_reply_none(); 312 return tipc_cfg_reply_none();
@@ -350,50 +359,21 @@ static struct sk_buff *cfg_set_max_subscriptions(void)
350 359
351static struct sk_buff *cfg_set_max_ports(void) 360static struct sk_buff *cfg_set_max_ports(void)
352{ 361{
353 int orig_mode;
354 u32 value; 362 u32 value;
355 363
356 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) 364 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
357 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 365 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
358 value = *(u32 *)TLV_DATA(req_tlv_area); 366 value = *(u32 *)TLV_DATA(req_tlv_area);
359 value = ntohl(value); 367 value = ntohl(value);
368 if (value == tipc_max_ports)
369 return tipc_cfg_reply_none();
360 if (value != delimit(value, 127, 65535)) 370 if (value != delimit(value, 127, 65535))
361 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE 371 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
362 " (max ports must be 127-65535)"); 372 " (max ports must be 127-65535)");
363 373 if (tipc_mode != TIPC_NOT_RUNNING)
364 if (value == tipc_max_ports)
365 return tipc_cfg_reply_none();
366
367 if (atomic_read(&tipc_user_count) > 2)
368 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 374 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
369 " (cannot change max ports while TIPC users exist)"); 375 " (cannot change max ports while TIPC is active)");
370
371 spin_unlock_bh(&config_lock);
372 orig_mode = tipc_get_mode();
373 if (orig_mode == TIPC_NET_MODE)
374 tipc_core_stop_net();
375 tipc_core_stop();
376 tipc_max_ports = value; 376 tipc_max_ports = value;
377 tipc_core_start();
378 if (orig_mode == TIPC_NET_MODE)
379 tipc_core_start_net();
380 spin_lock_bh(&config_lock);
381 return tipc_cfg_reply_none();
382}
383
384static struct sk_buff *set_net_max(int value, int *parameter)
385{
386 int orig_mode;
387
388 if (value != *parameter) {
389 orig_mode = tipc_get_mode();
390 if (orig_mode == TIPC_NET_MODE)
391 tipc_core_stop_net();
392 *parameter = value;
393 if (orig_mode == TIPC_NET_MODE)
394 tipc_core_start_net();
395 }
396
397 return tipc_cfg_reply_none(); 377 return tipc_cfg_reply_none();
398} 378}
399 379
@@ -405,10 +385,16 @@ static struct sk_buff *cfg_set_max_zones(void)
405 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 385 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
406 value = *(u32 *)TLV_DATA(req_tlv_area); 386 value = *(u32 *)TLV_DATA(req_tlv_area);
407 value = ntohl(value); 387 value = ntohl(value);
388 if (value == tipc_max_zones)
389 return tipc_cfg_reply_none();
408 if (value != delimit(value, 1, 255)) 390 if (value != delimit(value, 1, 255))
409 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE 391 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
410 " (max zones must be 1-255)"); 392 " (max zones must be 1-255)");
411 return set_net_max(value, &tipc_max_zones); 393 if (tipc_mode == TIPC_NET_MODE)
394 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
395 " (cannot change max zones once TIPC has joined a network)");
396 tipc_max_zones = value;
397 return tipc_cfg_reply_none();
412} 398}
413 399
414static struct sk_buff *cfg_set_max_clusters(void) 400static struct sk_buff *cfg_set_max_clusters(void)
@@ -419,8 +405,8 @@ static struct sk_buff *cfg_set_max_clusters(void)
419 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 405 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
420 value = *(u32 *)TLV_DATA(req_tlv_area); 406 value = *(u32 *)TLV_DATA(req_tlv_area);
421 value = ntohl(value); 407 value = ntohl(value);
422 if (value != 1) 408 if (value != delimit(value, 1, 1))
423 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 409 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
424 " (max clusters fixed at 1)"); 410 " (max clusters fixed at 1)");
425 return tipc_cfg_reply_none(); 411 return tipc_cfg_reply_none();
426} 412}
@@ -433,10 +419,16 @@ static struct sk_buff *cfg_set_max_nodes(void)
433 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 419 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
434 value = *(u32 *)TLV_DATA(req_tlv_area); 420 value = *(u32 *)TLV_DATA(req_tlv_area);
435 value = ntohl(value); 421 value = ntohl(value);
422 if (value == tipc_max_nodes)
423 return tipc_cfg_reply_none();
436 if (value != delimit(value, 8, 2047)) 424 if (value != delimit(value, 8, 2047))
437 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE 425 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
438 " (max nodes must be 8-2047)"); 426 " (max nodes must be 8-2047)");
439 return set_net_max(value, &tipc_max_nodes); 427 if (tipc_mode == TIPC_NET_MODE)
428 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
429 " (cannot change max nodes once TIPC has joined a network)");
430 tipc_max_nodes = value;
431 return tipc_cfg_reply_none();
440} 432}
441 433
442static struct sk_buff *cfg_set_max_slaves(void) 434static struct sk_buff *cfg_set_max_slaves(void)
@@ -461,15 +453,16 @@ static struct sk_buff *cfg_set_netid(void)
461 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); 453 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
462 value = *(u32 *)TLV_DATA(req_tlv_area); 454 value = *(u32 *)TLV_DATA(req_tlv_area);
463 value = ntohl(value); 455 value = ntohl(value);
456 if (value == tipc_net_id)
457 return tipc_cfg_reply_none();
464 if (value != delimit(value, 1, 9999)) 458 if (value != delimit(value, 1, 9999))
465 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE 459 return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
466 " (network id must be 1-9999)"); 460 " (network id must be 1-9999)");
467 461 if (tipc_mode == TIPC_NET_MODE)
468 if (tipc_own_addr)
469 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 462 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
470 " (cannot change network id once part of network)"); 463 " (cannot change network id once TIPC has joined a network)");
471 464 tipc_net_id = value;
472 return set_net_max(value, &tipc_net_id); 465 return tipc_cfg_reply_none();
473} 466}
474 467
475struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area, 468struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area,
@@ -649,7 +642,7 @@ static void cfg_named_msg_event(void *userdata,
649 if ((size < sizeof(*req_hdr)) || 642 if ((size < sizeof(*req_hdr)) ||
650 (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || 643 (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) ||
651 (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { 644 (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) {
652 warn("discarded invalid configuration message\n"); 645 warn("Invalid configuration message discarded\n");
653 return; 646 return;
654 } 647 }
655 648
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3d0a8ee4e1d3..5003acb15919 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -2,7 +2,7 @@
2 * net/tipc/core.c: TIPC module code 2 * net/tipc/core.c: TIPC module code
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005-2006, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -57,7 +57,7 @@ void tipc_socket_stop(void);
57int tipc_netlink_start(void); 57int tipc_netlink_start(void);
58void tipc_netlink_stop(void); 58void tipc_netlink_stop(void);
59 59
60#define MOD_NAME "tipc_start: " 60#define TIPC_MOD_VER "1.6.1"
61 61
62#ifndef CONFIG_TIPC_ZONES 62#ifndef CONFIG_TIPC_ZONES
63#define CONFIG_TIPC_ZONES 3 63#define CONFIG_TIPC_ZONES 3
@@ -198,7 +198,7 @@ static int __init tipc_init(void)
198 tipc_max_publications = 10000; 198 tipc_max_publications = 10000;
199 tipc_max_subscriptions = 2000; 199 tipc_max_subscriptions = 2000;
200 tipc_max_ports = delimit(CONFIG_TIPC_PORTS, 127, 65536); 200 tipc_max_ports = delimit(CONFIG_TIPC_PORTS, 127, 65536);
201 tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 511); 201 tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 255);
202 tipc_max_clusters = delimit(CONFIG_TIPC_CLUSTERS, 1, 1); 202 tipc_max_clusters = delimit(CONFIG_TIPC_CLUSTERS, 1, 1);
203 tipc_max_nodes = delimit(CONFIG_TIPC_NODES, 8, 2047); 203 tipc_max_nodes = delimit(CONFIG_TIPC_NODES, 8, 2047);
204 tipc_max_slaves = delimit(CONFIG_TIPC_SLAVE_NODES, 0, 2047); 204 tipc_max_slaves = delimit(CONFIG_TIPC_SLAVE_NODES, 0, 2047);
@@ -224,6 +224,7 @@ module_exit(tipc_exit);
224 224
225MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); 225MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication");
226MODULE_LICENSE("Dual BSD/GPL"); 226MODULE_LICENSE("Dual BSD/GPL");
227MODULE_VERSION(TIPC_MOD_VER);
227 228
228/* Native TIPC API for kernel-space applications (see tipc.h) */ 229/* Native TIPC API for kernel-space applications (see tipc.h) */
229 230
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 1f2e8b27a13f..86f54f3512f1 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -2,7 +2,7 @@
2 * net/tipc/core.h: Include file for TIPC global declarations 2 * net/tipc/core.h: Include file for TIPC global declarations
3 * 3 *
4 * Copyright (c) 2005-2006, Ericsson AB 4 * Copyright (c) 2005-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005-2006, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -111,10 +111,6 @@ void tipc_dump(struct print_buf*,const char *fmt, ...);
111 111
112#else 112#else
113 113
114#ifndef DBG_OUTPUT
115#define DBG_OUTPUT NULL
116#endif
117
118/* 114/*
119 * TIPC debug support not included: 115 * TIPC debug support not included:
120 * - system messages are printed to system console 116 * - system messages are printed to system console
@@ -129,6 +125,19 @@ void tipc_dump(struct print_buf*,const char *fmt, ...);
129#define msg_dbg(msg,txt) do {} while (0) 125#define msg_dbg(msg,txt) do {} while (0)
130#define dump(fmt,arg...) do {} while (0) 126#define dump(fmt,arg...) do {} while (0)
131 127
128
129/*
130 * TIPC_OUTPUT is defined to be the system console, while DBG_OUTPUT is
131 * the null print buffer. Thes ensures that any system or debug messages
132 * that are generated without using the above macros are handled correctly.
133 */
134
135#undef TIPC_OUTPUT
136#define TIPC_OUTPUT TIPC_CONS
137
138#undef DBG_OUTPUT
139#define DBG_OUTPUT NULL
140
132#endif 141#endif
133 142
134 143
@@ -309,7 +318,7 @@ static inline struct sk_buff *buf_acquire(u32 size)
309 * buf_discard - frees a TIPC message buffer 318 * buf_discard - frees a TIPC message buffer
310 * @skb: message buffer 319 * @skb: message buffer
311 * 320 *
312 * Frees a new buffer. If passed NULL, just returns. 321 * Frees a message buffer. If passed NULL, just returns.
313 */ 322 */
314 323
315static inline void buf_discard(struct sk_buff *skb) 324static inline void buf_discard(struct sk_buff *skb)
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 92601385e5f5..2b8441203120 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -2,7 +2,7 @@
2 * net/tipc/discover.c 2 * net/tipc/discover.c
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005-2006, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -176,7 +176,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf)
176 n_ptr = tipc_node_create(orig); 176 n_ptr = tipc_node_create(orig);
177 } 177 }
178 if (n_ptr == NULL) { 178 if (n_ptr == NULL) {
179 warn("Memory squeeze; Failed to create node\n");
180 return; 179 return;
181 } 180 }
182 spin_lock_bh(&n_ptr->lock); 181 spin_lock_bh(&n_ptr->lock);
@@ -191,10 +190,8 @@ void tipc_disc_recv_msg(struct sk_buff *buf)
191 } 190 }
192 addr = &link->media_addr; 191 addr = &link->media_addr;
193 if (memcmp(addr, &media_addr, sizeof(*addr))) { 192 if (memcmp(addr, &media_addr, sizeof(*addr))) {
194 char addr_string[16]; 193 warn("Resetting link <%s>, peer interface address changed\n",
195 194 link->name);
196 warn("New bearer address for %s\n",
197 addr_string_fill(addr_string, orig));
198 memcpy(addr, &media_addr, sizeof(*addr)); 195 memcpy(addr, &media_addr, sizeof(*addr));
199 tipc_link_reset(link); 196 tipc_link_reset(link);
200 } 197 }
@@ -270,8 +267,8 @@ static void disc_timeout(struct link_req *req)
270 /* leave timer interval "as is" if already at a "normal" rate */ 267 /* leave timer interval "as is" if already at a "normal" rate */
271 } else { 268 } else {
272 req->timer_intv *= 2; 269 req->timer_intv *= 2;
273 if (req->timer_intv > TIPC_LINK_REQ_SLOW) 270 if (req->timer_intv > TIPC_LINK_REQ_FAST)
274 req->timer_intv = TIPC_LINK_REQ_SLOW; 271 req->timer_intv = TIPC_LINK_REQ_FAST;
275 if ((req->timer_intv == TIPC_LINK_REQ_FAST) && 272 if ((req->timer_intv == TIPC_LINK_REQ_FAST) &&
276 (req->bearer->nodes.count)) 273 (req->bearer->nodes.count))
277 req->timer_intv = TIPC_LINK_REQ_SLOW; 274 req->timer_intv = TIPC_LINK_REQ_SLOW;
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 7a252785f727..682da4a28041 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -2,7 +2,7 @@
2 * net/tipc/eth_media.c: Ethernet bearer support for TIPC 2 * net/tipc/eth_media.c: Ethernet bearer support for TIPC
3 * 3 *
4 * Copyright (c) 2001-2006, Ericsson AB 4 * Copyright (c) 2001-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005-2006, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -98,17 +98,19 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
98 u32 size; 98 u32 size;
99 99
100 if (likely(eb_ptr->bearer)) { 100 if (likely(eb_ptr->bearer)) {
101 size = msg_size((struct tipc_msg *)buf->data); 101 if (likely(!dev->promiscuity) ||
102 skb_trim(buf, size); 102 !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) ||
103 if (likely(buf->len == size)) { 103 !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) {
104 buf->next = NULL; 104 size = msg_size((struct tipc_msg *)buf->data);
105 tipc_recv_msg(buf, eb_ptr->bearer); 105 skb_trim(buf, size);
106 } else { 106 if (likely(buf->len == size)) {
107 kfree_skb(buf); 107 buf->next = NULL;
108 tipc_recv_msg(buf, eb_ptr->bearer);
109 return TIPC_OK;
110 }
108 } 111 }
109 } else {
110 kfree_skb(buf);
111 } 112 }
113 kfree_skb(buf);
112 return TIPC_OK; 114 return TIPC_OK;
113} 115}
114 116
@@ -125,8 +127,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
125 127
126 /* Find device with specified name */ 128 /* Find device with specified name */
127 129
128 while (dev && dev->name && 130 while (dev && dev->name && strncmp(dev->name, driver_name, IFNAMSIZ)) {
129 (memcmp(dev->name, driver_name, strlen(dev->name)))) {
130 dev = dev->next; 131 dev = dev->next;
131 } 132 }
132 if (!dev) 133 if (!dev)
@@ -252,7 +253,9 @@ int tipc_eth_media_start(void)
252 if (eth_started) 253 if (eth_started)
253 return -EINVAL; 254 return -EINVAL;
254 255
255 memset(&bcast_addr, 0xff, sizeof(bcast_addr)); 256 bcast_addr.type = htonl(TIPC_MEDIA_TYPE_ETH);
257 memset(&bcast_addr.dev_addr, 0xff, ETH_ALEN);
258
256 memset(eth_bearers, 0, sizeof(eth_bearers)); 259 memset(eth_bearers, 0, sizeof(eth_bearers));
257 260
258 res = tipc_register_media(TIPC_MEDIA_TYPE_ETH, "eth", 261 res = tipc_register_media(TIPC_MEDIA_TYPE_ETH, "eth",
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 784b24b6d102..d64658053746 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -419,7 +419,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
419 419
420 l_ptr = (struct link *)kmalloc(sizeof(*l_ptr), GFP_ATOMIC); 420 l_ptr = (struct link *)kmalloc(sizeof(*l_ptr), GFP_ATOMIC);
421 if (!l_ptr) { 421 if (!l_ptr) {
422 warn("Memory squeeze; Failed to create link\n"); 422 warn("Link creation failed, no memory\n");
423 return NULL; 423 return NULL;
424 } 424 }
425 memset(l_ptr, 0, sizeof(*l_ptr)); 425 memset(l_ptr, 0, sizeof(*l_ptr));
@@ -469,7 +469,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
469 469
470 if (!pb) { 470 if (!pb) {
471 kfree(l_ptr); 471 kfree(l_ptr);
472 warn("Memory squeeze; Failed to create link\n"); 472 warn("Link creation failed, no memory for print buffer\n");
473 return NULL; 473 return NULL;
474 } 474 }
475 tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE); 475 tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE);
@@ -574,7 +574,6 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all)
574 break; 574 break;
575 list_del_init(&p_ptr->wait_list); 575 list_del_init(&p_ptr->wait_list);
576 p_ptr->congested_link = NULL; 576 p_ptr->congested_link = NULL;
577 assert(p_ptr->wakeup);
578 spin_lock_bh(p_ptr->publ.lock); 577 spin_lock_bh(p_ptr->publ.lock);
579 p_ptr->publ.congested = 0; 578 p_ptr->publ.congested = 0;
580 p_ptr->wakeup(&p_ptr->publ); 579 p_ptr->wakeup(&p_ptr->publ);
@@ -691,6 +690,7 @@ void tipc_link_reset(struct link *l_ptr)
691 struct sk_buff *buf; 690 struct sk_buff *buf;
692 u32 prev_state = l_ptr->state; 691 u32 prev_state = l_ptr->state;
693 u32 checkpoint = l_ptr->next_in_no; 692 u32 checkpoint = l_ptr->next_in_no;
693 int was_active_link = tipc_link_is_active(l_ptr);
694 694
695 msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1); 695 msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1);
696 696
@@ -712,7 +712,7 @@ void tipc_link_reset(struct link *l_ptr)
712 tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name); 712 tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name);
713 dbg_link_dump(); 713 dbg_link_dump();
714#endif 714#endif
715 if (tipc_node_has_active_links(l_ptr->owner) && 715 if (was_active_link && tipc_node_has_active_links(l_ptr->owner) &&
716 l_ptr->owner->permit_changeover) { 716 l_ptr->owner->permit_changeover) {
717 l_ptr->reset_checkpoint = checkpoint; 717 l_ptr->reset_checkpoint = checkpoint;
718 l_ptr->exp_msg_count = START_CHANGEOVER; 718 l_ptr->exp_msg_count = START_CHANGEOVER;
@@ -755,7 +755,7 @@ void tipc_link_reset(struct link *l_ptr)
755 755
756static void link_activate(struct link *l_ptr) 756static void link_activate(struct link *l_ptr)
757{ 757{
758 l_ptr->next_in_no = 1; 758 l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
759 tipc_node_link_up(l_ptr->owner, l_ptr); 759 tipc_node_link_up(l_ptr->owner, l_ptr);
760 tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); 760 tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
761 link_send_event(tipc_cfg_link_event, l_ptr, 1); 761 link_send_event(tipc_cfg_link_event, l_ptr, 1);
@@ -820,6 +820,8 @@ static void link_state_event(struct link *l_ptr, unsigned event)
820 break; 820 break;
821 case RESET_MSG: 821 case RESET_MSG:
822 dbg_link("RES -> RR\n"); 822 dbg_link("RES -> RR\n");
823 info("Resetting link <%s>, requested by peer\n",
824 l_ptr->name);
823 tipc_link_reset(l_ptr); 825 tipc_link_reset(l_ptr);
824 l_ptr->state = RESET_RESET; 826 l_ptr->state = RESET_RESET;
825 l_ptr->fsm_msg_cnt = 0; 827 l_ptr->fsm_msg_cnt = 0;
@@ -844,6 +846,8 @@ static void link_state_event(struct link *l_ptr, unsigned event)
844 break; 846 break;
845 case RESET_MSG: 847 case RESET_MSG:
846 dbg_link("RES -> RR\n"); 848 dbg_link("RES -> RR\n");
849 info("Resetting link <%s>, requested by peer "
850 "while probing\n", l_ptr->name);
847 tipc_link_reset(l_ptr); 851 tipc_link_reset(l_ptr);
848 l_ptr->state = RESET_RESET; 852 l_ptr->state = RESET_RESET;
849 l_ptr->fsm_msg_cnt = 0; 853 l_ptr->fsm_msg_cnt = 0;
@@ -875,6 +879,8 @@ static void link_state_event(struct link *l_ptr, unsigned event)
875 } else { /* Link has failed */ 879 } else { /* Link has failed */
876 dbg_link("-> RU (%u probes unanswered)\n", 880 dbg_link("-> RU (%u probes unanswered)\n",
877 l_ptr->fsm_msg_cnt); 881 l_ptr->fsm_msg_cnt);
882 warn("Resetting link <%s>, peer not responding\n",
883 l_ptr->name);
878 tipc_link_reset(l_ptr); 884 tipc_link_reset(l_ptr);
879 l_ptr->state = RESET_UNKNOWN; 885 l_ptr->state = RESET_UNKNOWN;
880 l_ptr->fsm_msg_cnt = 0; 886 l_ptr->fsm_msg_cnt = 0;
@@ -1050,7 +1056,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
1050 msg_dbg(msg, "TIPC: Congestion, throwing away\n"); 1056 msg_dbg(msg, "TIPC: Congestion, throwing away\n");
1051 buf_discard(buf); 1057 buf_discard(buf);
1052 if (imp > CONN_MANAGER) { 1058 if (imp > CONN_MANAGER) {
1053 warn("Resetting <%s>, send queue full", l_ptr->name); 1059 warn("Resetting link <%s>, send queue full", l_ptr->name);
1054 tipc_link_reset(l_ptr); 1060 tipc_link_reset(l_ptr);
1055 } 1061 }
1056 return dsz; 1062 return dsz;
@@ -1135,9 +1141,13 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
1135 if (n_ptr) { 1141 if (n_ptr) {
1136 tipc_node_lock(n_ptr); 1142 tipc_node_lock(n_ptr);
1137 l_ptr = n_ptr->active_links[selector & 1]; 1143 l_ptr = n_ptr->active_links[selector & 1];
1138 dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest);
1139 if (l_ptr) { 1144 if (l_ptr) {
1145 dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest);
1140 res = tipc_link_send_buf(l_ptr, buf); 1146 res = tipc_link_send_buf(l_ptr, buf);
1147 } else {
1148 dbg("Attempt to send msg to unreachable node:\n");
1149 msg_dbg(buf_msg(buf),">>>");
1150 buf_discard(buf);
1141 } 1151 }
1142 tipc_node_unlock(n_ptr); 1152 tipc_node_unlock(n_ptr);
1143 } else { 1153 } else {
@@ -1242,8 +1252,6 @@ int tipc_link_send_sections_fast(struct port *sender,
1242 int res; 1252 int res;
1243 u32 selector = msg_origport(hdr) & 1; 1253 u32 selector = msg_origport(hdr) & 1;
1244 1254
1245 assert(destaddr != tipc_own_addr);
1246
1247again: 1255again:
1248 /* 1256 /*
1249 * Try building message using port's max_pkt hint. 1257 * Try building message using port's max_pkt hint.
@@ -1604,40 +1612,121 @@ void tipc_link_push_queue(struct link *l_ptr)
1604 tipc_bearer_schedule(l_ptr->b_ptr, l_ptr); 1612 tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
1605} 1613}
1606 1614
1615static void link_reset_all(unsigned long addr)
1616{
1617 struct node *n_ptr;
1618 char addr_string[16];
1619 u32 i;
1620
1621 read_lock_bh(&tipc_net_lock);
1622 n_ptr = tipc_node_find((u32)addr);
1623 if (!n_ptr) {
1624 read_unlock_bh(&tipc_net_lock);
1625 return; /* node no longer exists */
1626 }
1627
1628 tipc_node_lock(n_ptr);
1629
1630 warn("Resetting all links to %s\n",
1631 addr_string_fill(addr_string, n_ptr->addr));
1632
1633 for (i = 0; i < MAX_BEARERS; i++) {
1634 if (n_ptr->links[i]) {
1635 link_print(n_ptr->links[i], TIPC_OUTPUT,
1636 "Resetting link\n");
1637 tipc_link_reset(n_ptr->links[i]);
1638 }
1639 }
1640
1641 tipc_node_unlock(n_ptr);
1642 read_unlock_bh(&tipc_net_lock);
1643}
1644
1645static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
1646{
1647 struct tipc_msg *msg = buf_msg(buf);
1648
1649 warn("Retransmission failure on link <%s>\n", l_ptr->name);
1650 tipc_msg_print(TIPC_OUTPUT, msg, ">RETR-FAIL>");
1651
1652 if (l_ptr->addr) {
1653
1654 /* Handle failure on standard link */
1655
1656 link_print(l_ptr, TIPC_OUTPUT, "Resetting link\n");
1657 tipc_link_reset(l_ptr);
1658
1659 } else {
1660
1661 /* Handle failure on broadcast link */
1662
1663 struct node *n_ptr;
1664 char addr_string[16];
1665
1666 tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg));
1667 tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle);
1668
1669 n_ptr = l_ptr->owner->next;
1670 tipc_node_lock(n_ptr);
1671
1672 addr_string_fill(addr_string, n_ptr->addr);
1673 tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string);
1674 tipc_printf(TIPC_OUTPUT, "Supported: %d, ", n_ptr->bclink.supported);
1675 tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked);
1676 tipc_printf(TIPC_OUTPUT, "Last in: %u, ", n_ptr->bclink.last_in);
1677 tipc_printf(TIPC_OUTPUT, "Gap after: %u, ", n_ptr->bclink.gap_after);
1678 tipc_printf(TIPC_OUTPUT, "Gap to: %u\n", n_ptr->bclink.gap_to);
1679 tipc_printf(TIPC_OUTPUT, "Nack sync: %u\n\n", n_ptr->bclink.nack_sync);
1680
1681 tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr);
1682
1683 tipc_node_unlock(n_ptr);
1684
1685 l_ptr->stale_count = 0;
1686 }
1687}
1688
1607void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, 1689void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
1608 u32 retransmits) 1690 u32 retransmits)
1609{ 1691{
1610 struct tipc_msg *msg; 1692 struct tipc_msg *msg;
1611 1693
1694 if (!buf)
1695 return;
1696
1697 msg = buf_msg(buf);
1698
1612 dbg("Retransmitting %u in link %x\n", retransmits, l_ptr); 1699 dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
1613 1700
1614 if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && buf && !skb_cloned(buf)) { 1701 if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
1615 msg_dbg(buf_msg(buf), ">NO_RETR->BCONG>"); 1702 if (!skb_cloned(buf)) {
1616 dbg_print_link(l_ptr, " "); 1703 msg_dbg(msg, ">NO_RETR->BCONG>");
1617 l_ptr->retransm_queue_head = msg_seqno(buf_msg(buf)); 1704 dbg_print_link(l_ptr, " ");
1618 l_ptr->retransm_queue_size = retransmits; 1705 l_ptr->retransm_queue_head = msg_seqno(msg);
1619 return; 1706 l_ptr->retransm_queue_size = retransmits;
1707 return;
1708 } else {
1709 /* Don't retransmit if driver already has the buffer */
1710 }
1711 } else {
1712 /* Detect repeated retransmit failures on uncongested bearer */
1713
1714 if (l_ptr->last_retransmitted == msg_seqno(msg)) {
1715 if (++l_ptr->stale_count > 100) {
1716 link_retransmit_failure(l_ptr, buf);
1717 return;
1718 }
1719 } else {
1720 l_ptr->last_retransmitted = msg_seqno(msg);
1721 l_ptr->stale_count = 1;
1722 }
1620 } 1723 }
1724
1621 while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) { 1725 while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) {
1622 msg = buf_msg(buf); 1726 msg = buf_msg(buf);
1623 msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); 1727 msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
1624 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 1728 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
1625 if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { 1729 if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
1626 /* Catch if retransmissions fail repeatedly: */
1627 if (l_ptr->last_retransmitted == msg_seqno(msg)) {
1628 if (++l_ptr->stale_count > 100) {
1629 tipc_msg_print(TIPC_CONS, buf_msg(buf), ">RETR>");
1630 info("...Retransmitted %u times\n",
1631 l_ptr->stale_count);
1632 link_print(l_ptr, TIPC_CONS, "Resetting Link\n");
1633 tipc_link_reset(l_ptr);
1634 break;
1635 }
1636 } else {
1637 l_ptr->stale_count = 0;
1638 }
1639 l_ptr->last_retransmitted = msg_seqno(msg);
1640
1641 msg_dbg(buf_msg(buf), ">RETR>"); 1730 msg_dbg(buf_msg(buf), ">RETR>");
1642 buf = buf->next; 1731 buf = buf->next;
1643 retransmits--; 1732 retransmits--;
@@ -1650,6 +1739,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
1650 return; 1739 return;
1651 } 1740 }
1652 } 1741 }
1742
1653 l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0; 1743 l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0;
1654} 1744}
1655 1745
@@ -1720,6 +1810,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1720 link_recv_non_seq(buf); 1810 link_recv_non_seq(buf);
1721 continue; 1811 continue;
1722 } 1812 }
1813
1814 if (unlikely(!msg_short(msg) &&
1815 (msg_destnode(msg) != tipc_own_addr)))
1816 goto cont;
1817
1723 n_ptr = tipc_node_find(msg_prevnode(msg)); 1818 n_ptr = tipc_node_find(msg_prevnode(msg));
1724 if (unlikely(!n_ptr)) 1819 if (unlikely(!n_ptr))
1725 goto cont; 1820 goto cont;
@@ -2140,7 +2235,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
2140 2235
2141 if (msg_linkprio(msg) && 2236 if (msg_linkprio(msg) &&
2142 (msg_linkprio(msg) != l_ptr->priority)) { 2237 (msg_linkprio(msg) != l_ptr->priority)) {
2143 warn("Changing prio <%s>: %u->%u\n", 2238 warn("Resetting link <%s>, priority change %u->%u\n",
2144 l_ptr->name, l_ptr->priority, msg_linkprio(msg)); 2239 l_ptr->name, l_ptr->priority, msg_linkprio(msg));
2145 l_ptr->priority = msg_linkprio(msg); 2240 l_ptr->priority = msg_linkprio(msg);
2146 tipc_link_reset(l_ptr); /* Enforce change to take effect */ 2241 tipc_link_reset(l_ptr); /* Enforce change to take effect */
@@ -2209,17 +2304,22 @@ void tipc_link_tunnel(struct link *l_ptr,
2209 u32 length = msg_size(msg); 2304 u32 length = msg_size(msg);
2210 2305
2211 tunnel = l_ptr->owner->active_links[selector & 1]; 2306 tunnel = l_ptr->owner->active_links[selector & 1];
2212 if (!tipc_link_is_up(tunnel)) 2307 if (!tipc_link_is_up(tunnel)) {
2308 warn("Link changeover error, "
2309 "tunnel link no longer available\n");
2213 return; 2310 return;
2311 }
2214 msg_set_size(tunnel_hdr, length + INT_H_SIZE); 2312 msg_set_size(tunnel_hdr, length + INT_H_SIZE);
2215 buf = buf_acquire(length + INT_H_SIZE); 2313 buf = buf_acquire(length + INT_H_SIZE);
2216 if (!buf) 2314 if (!buf) {
2315 warn("Link changeover error, "
2316 "unable to send tunnel msg\n");
2217 return; 2317 return;
2318 }
2218 memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE); 2319 memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE);
2219 memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length); 2320 memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length);
2220 dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane); 2321 dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
2221 msg_dbg(buf_msg(buf), ">SEND>"); 2322 msg_dbg(buf_msg(buf), ">SEND>");
2222 assert(tunnel);
2223 tipc_link_send_buf(tunnel, buf); 2323 tipc_link_send_buf(tunnel, buf);
2224} 2324}
2225 2325
@@ -2235,23 +2335,27 @@ void tipc_link_changeover(struct link *l_ptr)
2235 u32 msgcount = l_ptr->out_queue_size; 2335 u32 msgcount = l_ptr->out_queue_size;
2236 struct sk_buff *crs = l_ptr->first_out; 2336 struct sk_buff *crs = l_ptr->first_out;
2237 struct link *tunnel = l_ptr->owner->active_links[0]; 2337 struct link *tunnel = l_ptr->owner->active_links[0];
2238 int split_bundles = tipc_node_has_redundant_links(l_ptr->owner);
2239 struct tipc_msg tunnel_hdr; 2338 struct tipc_msg tunnel_hdr;
2339 int split_bundles;
2240 2340
2241 if (!tunnel) 2341 if (!tunnel)
2242 return; 2342 return;
2243 2343
2244 if (!l_ptr->owner->permit_changeover) 2344 if (!l_ptr->owner->permit_changeover) {
2345 warn("Link changeover error, "
2346 "peer did not permit changeover\n");
2245 return; 2347 return;
2348 }
2246 2349
2247 msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, 2350 msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
2248 ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); 2351 ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
2249 msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); 2352 msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
2250 msg_set_msgcnt(&tunnel_hdr, msgcount); 2353 msg_set_msgcnt(&tunnel_hdr, msgcount);
2354 dbg("Link changeover requires %u tunnel messages\n", msgcount);
2355
2251 if (!l_ptr->first_out) { 2356 if (!l_ptr->first_out) {
2252 struct sk_buff *buf; 2357 struct sk_buff *buf;
2253 2358
2254 assert(!msgcount);
2255 buf = buf_acquire(INT_H_SIZE); 2359 buf = buf_acquire(INT_H_SIZE);
2256 if (buf) { 2360 if (buf) {
2257 memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE); 2361 memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
@@ -2261,10 +2365,15 @@ void tipc_link_changeover(struct link *l_ptr)
2261 msg_dbg(&tunnel_hdr, "EMPTY>SEND>"); 2365 msg_dbg(&tunnel_hdr, "EMPTY>SEND>");
2262 tipc_link_send_buf(tunnel, buf); 2366 tipc_link_send_buf(tunnel, buf);
2263 } else { 2367 } else {
2264 warn("Memory squeeze; link changeover failed\n"); 2368 warn("Link changeover error, "
2369 "unable to send changeover msg\n");
2265 } 2370 }
2266 return; 2371 return;
2267 } 2372 }
2373
2374 split_bundles = (l_ptr->owner->active_links[0] !=
2375 l_ptr->owner->active_links[1]);
2376
2268 while (crs) { 2377 while (crs) {
2269 struct tipc_msg *msg = buf_msg(crs); 2378 struct tipc_msg *msg = buf_msg(crs);
2270 2379
@@ -2310,7 +2419,8 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
2310 msg_set_size(&tunnel_hdr, length + INT_H_SIZE); 2419 msg_set_size(&tunnel_hdr, length + INT_H_SIZE);
2311 outbuf = buf_acquire(length + INT_H_SIZE); 2420 outbuf = buf_acquire(length + INT_H_SIZE);
2312 if (outbuf == NULL) { 2421 if (outbuf == NULL) {
2313 warn("Memory squeeze; buffer duplication failed\n"); 2422 warn("Link changeover error, "
2423 "unable to send duplicate msg\n");
2314 return; 2424 return;
2315 } 2425 }
2316 memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE); 2426 memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
@@ -2364,11 +2474,15 @@ static int link_recv_changeover_msg(struct link **l_ptr,
2364 u32 msg_count = msg_msgcnt(tunnel_msg); 2474 u32 msg_count = msg_msgcnt(tunnel_msg);
2365 2475
2366 dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)]; 2476 dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)];
2367 assert(dest_link != *l_ptr);
2368 if (!dest_link) { 2477 if (!dest_link) {
2369 msg_dbg(tunnel_msg, "NOLINK/<REC<"); 2478 msg_dbg(tunnel_msg, "NOLINK/<REC<");
2370 goto exit; 2479 goto exit;
2371 } 2480 }
2481 if (dest_link == *l_ptr) {
2482 err("Unexpected changeover message on link <%s>\n",
2483 (*l_ptr)->name);
2484 goto exit;
2485 }
2372 dbg("%c<-%c:", dest_link->b_ptr->net_plane, 2486 dbg("%c<-%c:", dest_link->b_ptr->net_plane,
2373 (*l_ptr)->b_ptr->net_plane); 2487 (*l_ptr)->b_ptr->net_plane);
2374 *l_ptr = dest_link; 2488 *l_ptr = dest_link;
@@ -2381,7 +2495,7 @@ static int link_recv_changeover_msg(struct link **l_ptr,
2381 } 2495 }
2382 *buf = buf_extract(tunnel_buf,INT_H_SIZE); 2496 *buf = buf_extract(tunnel_buf,INT_H_SIZE);
2383 if (*buf == NULL) { 2497 if (*buf == NULL) {
2384 warn("Memory squeeze; failed to extract msg\n"); 2498 warn("Link changeover error, duplicate msg dropped\n");
2385 goto exit; 2499 goto exit;
2386 } 2500 }
2387 msg_dbg(tunnel_msg, "TNL<REC<"); 2501 msg_dbg(tunnel_msg, "TNL<REC<");
@@ -2393,13 +2507,17 @@ static int link_recv_changeover_msg(struct link **l_ptr,
2393 2507
2394 if (tipc_link_is_up(dest_link)) { 2508 if (tipc_link_is_up(dest_link)) {
2395 msg_dbg(tunnel_msg, "UP/FIRST/<REC<"); 2509 msg_dbg(tunnel_msg, "UP/FIRST/<REC<");
2510 info("Resetting link <%s>, changeover initiated by peer\n",
2511 dest_link->name);
2396 tipc_link_reset(dest_link); 2512 tipc_link_reset(dest_link);
2397 dest_link->exp_msg_count = msg_count; 2513 dest_link->exp_msg_count = msg_count;
2514 dbg("Expecting %u tunnelled messages\n", msg_count);
2398 if (!msg_count) 2515 if (!msg_count)
2399 goto exit; 2516 goto exit;
2400 } else if (dest_link->exp_msg_count == START_CHANGEOVER) { 2517 } else if (dest_link->exp_msg_count == START_CHANGEOVER) {
2401 msg_dbg(tunnel_msg, "BLK/FIRST/<REC<"); 2518 msg_dbg(tunnel_msg, "BLK/FIRST/<REC<");
2402 dest_link->exp_msg_count = msg_count; 2519 dest_link->exp_msg_count = msg_count;
2520 dbg("Expecting %u tunnelled messages\n", msg_count);
2403 if (!msg_count) 2521 if (!msg_count)
2404 goto exit; 2522 goto exit;
2405 } 2523 }
@@ -2407,6 +2525,8 @@ static int link_recv_changeover_msg(struct link **l_ptr,
2407 /* Receive original message */ 2525 /* Receive original message */
2408 2526
2409 if (dest_link->exp_msg_count == 0) { 2527 if (dest_link->exp_msg_count == 0) {
2528 warn("Link switchover error, "
2529 "got too many tunnelled messages\n");
2410 msg_dbg(tunnel_msg, "OVERDUE/DROP/<REC<"); 2530 msg_dbg(tunnel_msg, "OVERDUE/DROP/<REC<");
2411 dbg_print_link(dest_link, "LINK:"); 2531 dbg_print_link(dest_link, "LINK:");
2412 goto exit; 2532 goto exit;
@@ -2422,7 +2542,7 @@ static int link_recv_changeover_msg(struct link **l_ptr,
2422 buf_discard(tunnel_buf); 2542 buf_discard(tunnel_buf);
2423 return 1; 2543 return 1;
2424 } else { 2544 } else {
2425 warn("Memory squeeze; dropped incoming msg\n"); 2545 warn("Link changeover error, original msg dropped\n");
2426 } 2546 }
2427 } 2547 }
2428exit: 2548exit:
@@ -2444,13 +2564,8 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
2444 while (msgcount--) { 2564 while (msgcount--) {
2445 obuf = buf_extract(buf, pos); 2565 obuf = buf_extract(buf, pos);
2446 if (obuf == NULL) { 2566 if (obuf == NULL) {
2447 char addr_string[16]; 2567 warn("Link unable to unbundle message(s)\n");
2448 2568 break;
2449 warn("Buffer allocation failure;\n");
2450 warn(" incoming message(s) from %s lost\n",
2451 addr_string_fill(addr_string,
2452 msg_orignode(buf_msg(buf))));
2453 return;
2454 }; 2569 };
2455 pos += align(msg_size(buf_msg(obuf))); 2570 pos += align(msg_size(buf_msg(obuf)));
2456 msg_dbg(buf_msg(obuf), " /"); 2571 msg_dbg(buf_msg(obuf), " /");
@@ -2508,7 +2623,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2508 } 2623 }
2509 fragm = buf_acquire(fragm_sz + INT_H_SIZE); 2624 fragm = buf_acquire(fragm_sz + INT_H_SIZE);
2510 if (fragm == NULL) { 2625 if (fragm == NULL) {
2511 warn("Memory squeeze; failed to fragment msg\n"); 2626 warn("Link unable to fragment message\n");
2512 dsz = -ENOMEM; 2627 dsz = -ENOMEM;
2513 goto exit; 2628 goto exit;
2514 } 2629 }
@@ -2623,7 +2738,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2623 set_fragm_size(pbuf,fragm_sz); 2738 set_fragm_size(pbuf,fragm_sz);
2624 set_expected_frags(pbuf,exp_fragm_cnt - 1); 2739 set_expected_frags(pbuf,exp_fragm_cnt - 1);
2625 } else { 2740 } else {
2626 warn("Memory squeeze; got no defragmenting buffer\n"); 2741 warn("Link unable to reassemble fragmented message\n");
2627 } 2742 }
2628 buf_discard(fbuf); 2743 buf_discard(fbuf);
2629 return 0; 2744 return 0;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index a3bbc891f959..f0b063bcc2a9 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -127,7 +127,7 @@ void tipc_named_publish(struct publication *publ)
127 127
128 buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); 128 buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0);
129 if (!buf) { 129 if (!buf) {
130 warn("Memory squeeze; failed to distribute publication\n"); 130 warn("Publication distribution failure\n");
131 return; 131 return;
132 } 132 }
133 133
@@ -151,7 +151,7 @@ void tipc_named_withdraw(struct publication *publ)
151 151
152 buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); 152 buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
153 if (!buf) { 153 if (!buf) {
154 warn("Memory squeeze; failed to distribute withdrawal\n"); 154 warn("Withdrawl distribution failure\n");
155 return; 155 return;
156 } 156 }
157 157
@@ -174,7 +174,6 @@ void tipc_named_node_up(unsigned long node)
174 u32 rest; 174 u32 rest;
175 u32 max_item_buf; 175 u32 max_item_buf;
176 176
177 assert(in_own_cluster(node));
178 read_lock_bh(&tipc_nametbl_lock); 177 read_lock_bh(&tipc_nametbl_lock);
179 max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE; 178 max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE;
180 max_item_buf *= ITEM_SIZE; 179 max_item_buf *= ITEM_SIZE;
@@ -185,8 +184,8 @@ void tipc_named_node_up(unsigned long node)
185 left = (rest <= max_item_buf) ? rest : max_item_buf; 184 left = (rest <= max_item_buf) ? rest : max_item_buf;
186 rest -= left; 185 rest -= left;
187 buf = named_prepare_buf(PUBLICATION, left, node); 186 buf = named_prepare_buf(PUBLICATION, left, node);
188 if (buf == NULL) { 187 if (!buf) {
189 warn("Memory Squeeze; could not send publication\n"); 188 warn("Bulk publication distribution failure\n");
190 goto exit; 189 goto exit;
191 } 190 }
192 item = (struct distr_item *)msg_data(buf_msg(buf)); 191 item = (struct distr_item *)msg_data(buf_msg(buf));
@@ -221,15 +220,24 @@ exit:
221static void node_is_down(struct publication *publ) 220static void node_is_down(struct publication *publ)
222{ 221{
223 struct publication *p; 222 struct publication *p;
223
224 write_lock_bh(&tipc_nametbl_lock); 224 write_lock_bh(&tipc_nametbl_lock);
225 dbg("node_is_down: withdrawing %u, %u, %u\n", 225 dbg("node_is_down: withdrawing %u, %u, %u\n",
226 publ->type, publ->lower, publ->upper); 226 publ->type, publ->lower, publ->upper);
227 publ->key += 1222345; 227 publ->key += 1222345;
228 p = tipc_nametbl_remove_publ(publ->type, publ->lower, 228 p = tipc_nametbl_remove_publ(publ->type, publ->lower,
229 publ->node, publ->ref, publ->key); 229 publ->node, publ->ref, publ->key);
230 assert(p == publ);
231 write_unlock_bh(&tipc_nametbl_lock); 230 write_unlock_bh(&tipc_nametbl_lock);
232 kfree(publ); 231
232 if (p != publ) {
233 err("Unable to remove publication from failed node\n"
234 "(type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
235 publ->type, publ->lower, publ->node, publ->ref, publ->key);
236 }
237
238 if (p) {
239 kfree(p);
240 }
233} 241}
234 242
235/** 243/**
@@ -275,9 +283,15 @@ void tipc_named_recv(struct sk_buff *buf)
275 if (publ) { 283 if (publ) {
276 tipc_nodesub_unsubscribe(&publ->subscr); 284 tipc_nodesub_unsubscribe(&publ->subscr);
277 kfree(publ); 285 kfree(publ);
286 } else {
287 err("Unable to remove publication by node 0x%x\n"
288 "(type=%u, lower=%u, ref=%u, key=%u)\n",
289 msg_orignode(msg),
290 ntohl(item->type), ntohl(item->lower),
291 ntohl(item->ref), ntohl(item->key));
278 } 292 }
279 } else { 293 } else {
280 warn("tipc_named_recv: unknown msg\n"); 294 warn("Unrecognized name table message received\n");
281 } 295 }
282 item++; 296 item++;
283 } 297 }
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index d129422fc5c2..38571306aba5 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -71,7 +71,7 @@ struct sub_seq {
71 * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type'; 71 * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type';
72 * sub-sequences are sorted in ascending order 72 * sub-sequences are sorted in ascending order
73 * @alloc: number of sub-sequences currently in array 73 * @alloc: number of sub-sequences currently in array
74 * @first_free: upper bound of highest sub-sequence + 1 74 * @first_free: array index of first unused sub-sequence entry
75 * @ns_list: links to adjacent name sequences in hash chain 75 * @ns_list: links to adjacent name sequences in hash chain
76 * @subscriptions: list of subscriptions for this 'type' 76 * @subscriptions: list of subscriptions for this 'type'
77 * @lock: spinlock controlling access to name sequence structure 77 * @lock: spinlock controlling access to name sequence structure
@@ -120,7 +120,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
120 struct publication *publ = 120 struct publication *publ =
121 (struct publication *)kmalloc(sizeof(*publ), GFP_ATOMIC); 121 (struct publication *)kmalloc(sizeof(*publ), GFP_ATOMIC);
122 if (publ == NULL) { 122 if (publ == NULL) {
123 warn("Memory squeeze; failed to create publication\n"); 123 warn("Publication creation failure, no memory\n");
124 return NULL; 124 return NULL;
125 } 125 }
126 126
@@ -165,7 +165,7 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea
165 struct sub_seq *sseq = tipc_subseq_alloc(1); 165 struct sub_seq *sseq = tipc_subseq_alloc(1);
166 166
167 if (!nseq || !sseq) { 167 if (!nseq || !sseq) {
168 warn("Memory squeeze; failed to create name sequence\n"); 168 warn("Name sequence creation failed, no memory\n");
169 kfree(nseq); 169 kfree(nseq);
170 kfree(sseq); 170 kfree(sseq);
171 return NULL; 171 return NULL;
@@ -175,7 +175,7 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea
175 nseq->lock = SPIN_LOCK_UNLOCKED; 175 nseq->lock = SPIN_LOCK_UNLOCKED;
176 nseq->type = type; 176 nseq->type = type;
177 nseq->sseqs = sseq; 177 nseq->sseqs = sseq;
178 dbg("tipc_nameseq_create() nseq = %x type %u, ssseqs %x, ff: %u\n", 178 dbg("tipc_nameseq_create(): nseq = %p, type %u, ssseqs %p, ff: %u\n",
179 nseq, type, nseq->sseqs, nseq->first_free); 179 nseq, type, nseq->sseqs, nseq->first_free);
180 nseq->alloc = 1; 180 nseq->alloc = 1;
181 INIT_HLIST_NODE(&nseq->ns_list); 181 INIT_HLIST_NODE(&nseq->ns_list);
@@ -253,16 +253,16 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
253 struct sub_seq *sseq; 253 struct sub_seq *sseq;
254 int created_subseq = 0; 254 int created_subseq = 0;
255 255
256 assert(nseq->first_free <= nseq->alloc);
257 sseq = nameseq_find_subseq(nseq, lower); 256 sseq = nameseq_find_subseq(nseq, lower);
258 dbg("nameseq_ins: for seq %x,<%u,%u>, found sseq %x\n", 257 dbg("nameseq_ins: for seq %p, {%u,%u}, found sseq %p\n",
259 nseq, type, lower, sseq); 258 nseq, type, lower, sseq);
260 if (sseq) { 259 if (sseq) {
261 260
262 /* Lower end overlaps existing entry => need an exact match */ 261 /* Lower end overlaps existing entry => need an exact match */
263 262
264 if ((sseq->lower != lower) || (sseq->upper != upper)) { 263 if ((sseq->lower != lower) || (sseq->upper != upper)) {
265 warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper); 264 warn("Cannot publish {%u,%u,%u}, overlap error\n",
265 type, lower, upper);
266 return NULL; 266 return NULL;
267 } 267 }
268 } else { 268 } else {
@@ -277,25 +277,27 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
277 277
278 if ((inspos < nseq->first_free) && 278 if ((inspos < nseq->first_free) &&
279 (upper >= nseq->sseqs[inspos].lower)) { 279 (upper >= nseq->sseqs[inspos].lower)) {
280 warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper); 280 warn("Cannot publish {%u,%u,%u}, overlap error\n",
281 type, lower, upper);
281 return NULL; 282 return NULL;
282 } 283 }
283 284
284 /* Ensure there is space for new sub-sequence */ 285 /* Ensure there is space for new sub-sequence */
285 286
286 if (nseq->first_free == nseq->alloc) { 287 if (nseq->first_free == nseq->alloc) {
287 struct sub_seq *sseqs = nseq->sseqs; 288 struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2);
288 nseq->sseqs = tipc_subseq_alloc(nseq->alloc * 2); 289
289 if (nseq->sseqs != NULL) { 290 if (!sseqs) {
290 memcpy(nseq->sseqs, sseqs, 291 warn("Cannot publish {%u,%u,%u}, no memory\n",
291 nseq->alloc * sizeof (struct sub_seq)); 292 type, lower, upper);
292 kfree(sseqs);
293 dbg("Allocated %u sseqs\n", nseq->alloc);
294 nseq->alloc *= 2;
295 } else {
296 warn("Memory squeeze; failed to create sub-sequence\n");
297 return NULL; 293 return NULL;
298 } 294 }
295 dbg("Allocated %u more sseqs\n", nseq->alloc);
296 memcpy(sseqs, nseq->sseqs,
297 nseq->alloc * sizeof(struct sub_seq));
298 kfree(nseq->sseqs);
299 nseq->sseqs = sseqs;
300 nseq->alloc *= 2;
299 } 301 }
300 dbg("Have %u sseqs for type %u\n", nseq->alloc, type); 302 dbg("Have %u sseqs for type %u\n", nseq->alloc, type);
301 303
@@ -311,7 +313,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
311 sseq->upper = upper; 313 sseq->upper = upper;
312 created_subseq = 1; 314 created_subseq = 1;
313 } 315 }
314 dbg("inserting (%u %u %u) from %x:%u into sseq %x(%u,%u) of seq %x\n", 316 dbg("inserting {%u,%u,%u} from <0x%x:%u> into sseq %p(%u,%u) of seq %p\n",
315 type, lower, upper, node, port, sseq, 317 type, lower, upper, node, port, sseq,
316 sseq->lower, sseq->upper, nseq); 318 sseq->lower, sseq->upper, nseq);
317 319
@@ -320,7 +322,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
320 publ = publ_create(type, lower, upper, scope, node, port, key); 322 publ = publ_create(type, lower, upper, scope, node, port, key);
321 if (!publ) 323 if (!publ)
322 return NULL; 324 return NULL;
323 dbg("inserting publ %x, node=%x publ->node=%x, subscr->node=%x\n", 325 dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n",
324 publ, node, publ->node, publ->subscr.node); 326 publ, node, publ->node, publ->subscr.node);
325 327
326 if (!sseq->zone_list) 328 if (!sseq->zone_list)
@@ -367,45 +369,47 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
367 369
368/** 370/**
369 * tipc_nameseq_remove_publ - 371 * tipc_nameseq_remove_publ -
372 *
373 * NOTE: There may be cases where TIPC is asked to remove a publication
374 * that is not in the name table. For example, if another node issues a
375 * publication for a name sequence that overlaps an existing name sequence
376 * the publication will not be recorded, which means the publication won't
377 * be found when the name sequence is later withdrawn by that node.
378 * A failed withdraw request simply returns a failure indication and lets the
379 * caller issue any error or warning messages associated with such a problem.
370 */ 380 */
371 381
372static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst, 382static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst,
373 u32 node, u32 ref, u32 key) 383 u32 node, u32 ref, u32 key)
374{ 384{
375 struct publication *publ; 385 struct publication *publ;
386 struct publication *curr;
376 struct publication *prev; 387 struct publication *prev;
377 struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); 388 struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
378 struct sub_seq *free; 389 struct sub_seq *free;
379 struct subscription *s, *st; 390 struct subscription *s, *st;
380 int removed_subseq = 0; 391 int removed_subseq = 0;
381 392
382 assert(nseq); 393 if (!sseq)
383
384 if (!sseq) {
385 int i;
386
387 warn("Withdraw unknown <%u,%u>?\n", nseq->type, inst);
388 assert(nseq->sseqs);
389 dbg("Dumping subseqs %x for %x, alloc = %u,ff=%u\n",
390 nseq->sseqs, nseq, nseq->alloc,
391 nseq->first_free);
392 for (i = 0; i < nseq->first_free; i++) {
393 dbg("Subseq %u(%x): lower = %u,upper = %u\n",
394 i, &nseq->sseqs[i], nseq->sseqs[i].lower,
395 nseq->sseqs[i].upper);
396 }
397 return NULL; 394 return NULL;
398 } 395
399 dbg("nameseq_remove: seq: %x, sseq %x, <%u,%u> key %u\n", 396 dbg("tipc_nameseq_remove_publ: seq: %p, sseq %p, {%u,%u}, key %u\n",
400 nseq, sseq, nseq->type, inst, key); 397 nseq, sseq, nseq->type, inst, key);
401 398
399 /* Remove publication from zone scope list */
400
402 prev = sseq->zone_list; 401 prev = sseq->zone_list;
403 publ = sseq->zone_list->zone_list_next; 402 publ = sseq->zone_list->zone_list_next;
404 while ((publ->key != key) || (publ->ref != ref) || 403 while ((publ->key != key) || (publ->ref != ref) ||
405 (publ->node && (publ->node != node))) { 404 (publ->node && (publ->node != node))) {
406 prev = publ; 405 prev = publ;
407 publ = publ->zone_list_next; 406 publ = publ->zone_list_next;
408 assert(prev != sseq->zone_list); 407 if (prev == sseq->zone_list) {
408
409 /* Prevent endless loop if publication not found */
410
411 return NULL;
412 }
409 } 413 }
410 if (publ != sseq->zone_list) 414 if (publ != sseq->zone_list)
411 prev->zone_list_next = publ->zone_list_next; 415 prev->zone_list_next = publ->zone_list_next;
@@ -416,14 +420,24 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
416 sseq->zone_list = NULL; 420 sseq->zone_list = NULL;
417 } 421 }
418 422
423 /* Remove publication from cluster scope list, if present */
424
419 if (in_own_cluster(node)) { 425 if (in_own_cluster(node)) {
420 prev = sseq->cluster_list; 426 prev = sseq->cluster_list;
421 publ = sseq->cluster_list->cluster_list_next; 427 curr = sseq->cluster_list->cluster_list_next;
422 while ((publ->key != key) || (publ->ref != ref) || 428 while (curr != publ) {
423 (publ->node && (publ->node != node))) { 429 prev = curr;
424 prev = publ; 430 curr = curr->cluster_list_next;
425 publ = publ->cluster_list_next; 431 if (prev == sseq->cluster_list) {
426 assert(prev != sseq->cluster_list); 432
433 /* Prevent endless loop for malformed list */
434
435 err("Unable to de-list cluster publication\n"
436 "{%u%u}, node=0x%x, ref=%u, key=%u)\n",
437 publ->type, publ->lower, publ->node,
438 publ->ref, publ->key);
439 goto end_cluster;
440 }
427 } 441 }
428 if (publ != sseq->cluster_list) 442 if (publ != sseq->cluster_list)
429 prev->cluster_list_next = publ->cluster_list_next; 443 prev->cluster_list_next = publ->cluster_list_next;
@@ -434,15 +448,26 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
434 sseq->cluster_list = NULL; 448 sseq->cluster_list = NULL;
435 } 449 }
436 } 450 }
451end_cluster:
452
453 /* Remove publication from node scope list, if present */
437 454
438 if (node == tipc_own_addr) { 455 if (node == tipc_own_addr) {
439 prev = sseq->node_list; 456 prev = sseq->node_list;
440 publ = sseq->node_list->node_list_next; 457 curr = sseq->node_list->node_list_next;
441 while ((publ->key != key) || (publ->ref != ref) || 458 while (curr != publ) {
442 (publ->node && (publ->node != node))) { 459 prev = curr;
443 prev = publ; 460 curr = curr->node_list_next;
444 publ = publ->node_list_next; 461 if (prev == sseq->node_list) {
445 assert(prev != sseq->node_list); 462
463 /* Prevent endless loop for malformed list */
464
465 err("Unable to de-list node publication\n"
466 "{%u%u}, node=0x%x, ref=%u, key=%u)\n",
467 publ->type, publ->lower, publ->node,
468 publ->ref, publ->key);
469 goto end_node;
470 }
446 } 471 }
447 if (publ != sseq->node_list) 472 if (publ != sseq->node_list)
448 prev->node_list_next = publ->node_list_next; 473 prev->node_list_next = publ->node_list_next;
@@ -453,22 +478,18 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
453 sseq->node_list = NULL; 478 sseq->node_list = NULL;
454 } 479 }
455 } 480 }
456 assert(!publ->node || (publ->node == node)); 481end_node:
457 assert(publ->ref == ref);
458 assert(publ->key == key);
459 482
460 /* 483 /* Contract subseq list if no more publications for that subseq */
461 * Contract subseq list if no more publications: 484
462 */ 485 if (!sseq->zone_list) {
463 if (!sseq->node_list && !sseq->cluster_list && !sseq->zone_list) {
464 free = &nseq->sseqs[nseq->first_free--]; 486 free = &nseq->sseqs[nseq->first_free--];
465 memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq)); 487 memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq));
466 removed_subseq = 1; 488 removed_subseq = 1;
467 } 489 }
468 490
469 /* 491 /* Notify any waiting subscriptions */
470 * Any subscriptions waiting ? 492
471 */
472 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { 493 list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
473 tipc_subscr_report_overlap(s, 494 tipc_subscr_report_overlap(s,
474 publ->lower, 495 publ->lower,
@@ -478,6 +499,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
478 publ->node, 499 publ->node,
479 removed_subseq); 500 removed_subseq);
480 } 501 }
502
481 return publ; 503 return publ;
482} 504}
483 505
@@ -530,7 +552,7 @@ static struct name_seq *nametbl_find_seq(u32 type)
530 seq_head = &table.types[hash(type)]; 552 seq_head = &table.types[hash(type)];
531 hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { 553 hlist_for_each_entry(ns, seq_node, seq_head, ns_list) {
532 if (ns->type == type) { 554 if (ns->type == type) {
533 dbg("found %x\n", ns); 555 dbg("found %p\n", ns);
534 return ns; 556 return ns;
535 } 557 }
536 } 558 }
@@ -543,22 +565,21 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
543{ 565{
544 struct name_seq *seq = nametbl_find_seq(type); 566 struct name_seq *seq = nametbl_find_seq(type);
545 567
546 dbg("ins_publ: <%u,%x,%x> found %x\n", type, lower, upper, seq); 568 dbg("tipc_nametbl_insert_publ: {%u,%u,%u} found %p\n", type, lower, upper, seq);
547 if (lower > upper) { 569 if (lower > upper) {
548 warn("Failed to publish illegal <%u,%u,%u>\n", 570 warn("Failed to publish illegal {%u,%u,%u}\n",
549 type, lower, upper); 571 type, lower, upper);
550 return NULL; 572 return NULL;
551 } 573 }
552 574
553 dbg("Publishing <%u,%u,%u> from %x\n", type, lower, upper, node); 575 dbg("Publishing {%u,%u,%u} from 0x%x\n", type, lower, upper, node);
554 if (!seq) { 576 if (!seq) {
555 seq = tipc_nameseq_create(type, &table.types[hash(type)]); 577 seq = tipc_nameseq_create(type, &table.types[hash(type)]);
556 dbg("tipc_nametbl_insert_publ: created %x\n", seq); 578 dbg("tipc_nametbl_insert_publ: created %p\n", seq);
557 } 579 }
558 if (!seq) 580 if (!seq)
559 return NULL; 581 return NULL;
560 582
561 assert(seq->type == type);
562 return tipc_nameseq_insert_publ(seq, type, lower, upper, 583 return tipc_nameseq_insert_publ(seq, type, lower, upper,
563 scope, node, port, key); 584 scope, node, port, key);
564} 585}
@@ -572,7 +593,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
572 if (!seq) 593 if (!seq)
573 return NULL; 594 return NULL;
574 595
575 dbg("Withdrawing <%u,%u> from %x\n", type, lower, node); 596 dbg("Withdrawing {%u,%u} from 0x%x\n", type, lower, node);
576 publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); 597 publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
577 598
578 if (!seq->first_free && list_empty(&seq->subscriptions)) { 599 if (!seq->first_free && list_empty(&seq->subscriptions)) {
@@ -738,12 +759,12 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
738 struct publication *publ; 759 struct publication *publ;
739 760
740 if (table.local_publ_count >= tipc_max_publications) { 761 if (table.local_publ_count >= tipc_max_publications) {
741 warn("Failed publish: max %u local publication\n", 762 warn("Publication failed, local publication limit reached (%u)\n",
742 tipc_max_publications); 763 tipc_max_publications);
743 return NULL; 764 return NULL;
744 } 765 }
745 if ((type < TIPC_RESERVED_TYPES) && !atomic_read(&rsv_publ_ok)) { 766 if ((type < TIPC_RESERVED_TYPES) && !atomic_read(&rsv_publ_ok)) {
746 warn("Failed to publish reserved name <%u,%u,%u>\n", 767 warn("Publication failed, reserved name {%u,%u,%u}\n",
747 type, lower, upper); 768 type, lower, upper);
748 return NULL; 769 return NULL;
749 } 770 }
@@ -767,10 +788,10 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
767{ 788{
768 struct publication *publ; 789 struct publication *publ;
769 790
770 dbg("tipc_nametbl_withdraw:<%d,%d,%d>\n", type, lower, key); 791 dbg("tipc_nametbl_withdraw: {%u,%u}, key=%u\n", type, lower, key);
771 write_lock_bh(&tipc_nametbl_lock); 792 write_lock_bh(&tipc_nametbl_lock);
772 publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); 793 publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
773 if (publ) { 794 if (likely(publ)) {
774 table.local_publ_count--; 795 table.local_publ_count--;
775 if (publ->scope != TIPC_NODE_SCOPE) 796 if (publ->scope != TIPC_NODE_SCOPE)
776 tipc_named_withdraw(publ); 797 tipc_named_withdraw(publ);
@@ -780,6 +801,9 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
780 return 1; 801 return 1;
781 } 802 }
782 write_unlock_bh(&tipc_nametbl_lock); 803 write_unlock_bh(&tipc_nametbl_lock);
804 err("Unable to remove local publication\n"
805 "(type=%u, lower=%u, ref=%u, key=%u)\n",
806 type, lower, ref, key);
783 return 0; 807 return 0;
784} 808}
785 809
@@ -787,8 +811,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
787 * tipc_nametbl_subscribe - add a subscription object to the name table 811 * tipc_nametbl_subscribe - add a subscription object to the name table
788 */ 812 */
789 813
790void 814void tipc_nametbl_subscribe(struct subscription *s)
791tipc_nametbl_subscribe(struct subscription *s)
792{ 815{
793 u32 type = s->seq.type; 816 u32 type = s->seq.type;
794 struct name_seq *seq; 817 struct name_seq *seq;
@@ -800,11 +823,13 @@ tipc_nametbl_subscribe(struct subscription *s)
800 } 823 }
801 if (seq){ 824 if (seq){
802 spin_lock_bh(&seq->lock); 825 spin_lock_bh(&seq->lock);
803 dbg("tipc_nametbl_subscribe:found %x for <%u,%u,%u>\n", 826 dbg("tipc_nametbl_subscribe:found %p for {%u,%u,%u}\n",
804 seq, type, s->seq.lower, s->seq.upper); 827 seq, type, s->seq.lower, s->seq.upper);
805 assert(seq->type == type);
806 tipc_nameseq_subscribe(seq, s); 828 tipc_nameseq_subscribe(seq, s);
807 spin_unlock_bh(&seq->lock); 829 spin_unlock_bh(&seq->lock);
830 } else {
831 warn("Failed to create subscription for {%u,%u,%u}\n",
832 s->seq.type, s->seq.lower, s->seq.upper);
808 } 833 }
809 write_unlock_bh(&tipc_nametbl_lock); 834 write_unlock_bh(&tipc_nametbl_lock);
810} 835}
@@ -813,8 +838,7 @@ tipc_nametbl_subscribe(struct subscription *s)
813 * tipc_nametbl_unsubscribe - remove a subscription object from name table 838 * tipc_nametbl_unsubscribe - remove a subscription object from name table
814 */ 839 */
815 840
816void 841void tipc_nametbl_unsubscribe(struct subscription *s)
817tipc_nametbl_unsubscribe(struct subscription *s)
818{ 842{
819 struct name_seq *seq; 843 struct name_seq *seq;
820 844
@@ -1049,35 +1073,20 @@ int tipc_nametbl_init(void)
1049 1073
1050void tipc_nametbl_stop(void) 1074void tipc_nametbl_stop(void)
1051{ 1075{
1052 struct hlist_head *seq_head;
1053 struct hlist_node *seq_node;
1054 struct hlist_node *tmp;
1055 struct name_seq *seq;
1056 u32 i; 1076 u32 i;
1057 1077
1058 if (!table.types) 1078 if (!table.types)
1059 return; 1079 return;
1060 1080
1081 /* Verify name table is empty, then release it */
1082
1061 write_lock_bh(&tipc_nametbl_lock); 1083 write_lock_bh(&tipc_nametbl_lock);
1062 for (i = 0; i < tipc_nametbl_size; i++) { 1084 for (i = 0; i < tipc_nametbl_size; i++) {
1063 seq_head = &table.types[i]; 1085 if (!hlist_empty(&table.types[i]))
1064 hlist_for_each_entry_safe(seq, seq_node, tmp, seq_head, ns_list) { 1086 err("tipc_nametbl_stop(): hash chain %u is non-null\n", i);
1065 struct sub_seq *sseq = seq->sseqs;
1066
1067 for (; sseq != &seq->sseqs[seq->first_free]; sseq++) {
1068 struct publication *publ = sseq->zone_list;
1069 assert(publ);
1070 do {
1071 struct publication *next =
1072 publ->zone_list_next;
1073 kfree(publ);
1074 publ = next;
1075 }
1076 while (publ != sseq->zone_list);
1077 }
1078 }
1079 } 1087 }
1080 kfree(table.types); 1088 kfree(table.types);
1081 table.types = NULL; 1089 table.types = NULL;
1082 write_unlock_bh(&tipc_nametbl_lock); 1090 write_unlock_bh(&tipc_nametbl_lock);
1083} 1091}
1092
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0d5db06e203f..ce9678efa98a 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -61,34 +61,37 @@ struct node *tipc_node_create(u32 addr)
61 struct node **curr_node; 61 struct node **curr_node;
62 62
63 n_ptr = kmalloc(sizeof(*n_ptr),GFP_ATOMIC); 63 n_ptr = kmalloc(sizeof(*n_ptr),GFP_ATOMIC);
64 if (n_ptr != NULL) { 64 if (!n_ptr) {
65 memset(n_ptr, 0, sizeof(*n_ptr)); 65 warn("Node creation failed, no memory\n");
66 n_ptr->addr = addr; 66 return NULL;
67 n_ptr->lock = SPIN_LOCK_UNLOCKED; 67 }
68 INIT_LIST_HEAD(&n_ptr->nsub); 68
69 69 c_ptr = tipc_cltr_find(addr);
70 c_ptr = tipc_cltr_find(addr); 70 if (!c_ptr) {
71 if (c_ptr == NULL) 71 c_ptr = tipc_cltr_create(addr);
72 c_ptr = tipc_cltr_create(addr); 72 }
73 if (c_ptr != NULL) { 73 if (!c_ptr) {
74 n_ptr->owner = c_ptr; 74 kfree(n_ptr);
75 tipc_cltr_attach_node(c_ptr, n_ptr); 75 return NULL;
76 n_ptr->last_router = -1; 76 }
77 77
78 /* Insert node into ordered list */ 78 memset(n_ptr, 0, sizeof(*n_ptr));
79 for (curr_node = &tipc_nodes; *curr_node; 79 n_ptr->addr = addr;
80 curr_node = &(*curr_node)->next) { 80 n_ptr->lock = SPIN_LOCK_UNLOCKED;
81 if (addr < (*curr_node)->addr) { 81 INIT_LIST_HEAD(&n_ptr->nsub);
82 n_ptr->next = *curr_node; 82 n_ptr->owner = c_ptr;
83 break; 83 tipc_cltr_attach_node(c_ptr, n_ptr);
84 } 84 n_ptr->last_router = -1;
85 } 85
86 (*curr_node) = n_ptr; 86 /* Insert node into ordered list */
87 } else { 87 for (curr_node = &tipc_nodes; *curr_node;
88 kfree(n_ptr); 88 curr_node = &(*curr_node)->next) {
89 n_ptr = NULL; 89 if (addr < (*curr_node)->addr) {
90 } 90 n_ptr->next = *curr_node;
91 } 91 break;
92 }
93 }
94 (*curr_node) = n_ptr;
92 return n_ptr; 95 return n_ptr;
93} 96}
94 97
@@ -122,6 +125,8 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
122{ 125{
123 struct link **active = &n_ptr->active_links[0]; 126 struct link **active = &n_ptr->active_links[0];
124 127
128 n_ptr->working_links++;
129
125 info("Established link <%s> on network plane %c\n", 130 info("Established link <%s> on network plane %c\n",
126 l_ptr->name, l_ptr->b_ptr->net_plane); 131 l_ptr->name, l_ptr->b_ptr->net_plane);
127 132
@@ -132,7 +137,7 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
132 return; 137 return;
133 } 138 }
134 if (l_ptr->priority < active[0]->priority) { 139 if (l_ptr->priority < active[0]->priority) {
135 info("Link is standby\n"); 140 info("New link <%s> becomes standby\n", l_ptr->name);
136 return; 141 return;
137 } 142 }
138 tipc_link_send_duplicate(active[0], l_ptr); 143 tipc_link_send_duplicate(active[0], l_ptr);
@@ -140,8 +145,9 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
140 active[0] = l_ptr; 145 active[0] = l_ptr;
141 return; 146 return;
142 } 147 }
143 info("Link <%s> on network plane %c becomes standby\n", 148 info("Old link <%s> becomes standby\n", active[0]->name);
144 active[0]->name, active[0]->b_ptr->net_plane); 149 if (active[1] != active[0])
150 info("Old link <%s> becomes standby\n", active[1]->name);
145 active[0] = active[1] = l_ptr; 151 active[0] = active[1] = l_ptr;
146} 152}
147 153
@@ -181,6 +187,8 @@ void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr)
181{ 187{
182 struct link **active; 188 struct link **active;
183 189
190 n_ptr->working_links--;
191
184 if (!tipc_link_is_active(l_ptr)) { 192 if (!tipc_link_is_active(l_ptr)) {
185 info("Lost standby link <%s> on network plane %c\n", 193 info("Lost standby link <%s> on network plane %c\n",
186 l_ptr->name, l_ptr->b_ptr->net_plane); 194 l_ptr->name, l_ptr->b_ptr->net_plane);
@@ -210,8 +218,7 @@ int tipc_node_has_active_links(struct node *n_ptr)
210 218
211int tipc_node_has_redundant_links(struct node *n_ptr) 219int tipc_node_has_redundant_links(struct node *n_ptr)
212{ 220{
213 return (tipc_node_has_active_links(n_ptr) && 221 return (n_ptr->working_links > 1);
214 (n_ptr->active_links[0] != n_ptr->active_links[1]));
215} 222}
216 223
217static int tipc_node_has_active_routes(struct node *n_ptr) 224static int tipc_node_has_active_routes(struct node *n_ptr)
@@ -234,7 +241,6 @@ struct node *tipc_node_attach_link(struct link *l_ptr)
234 u32 bearer_id = l_ptr->b_ptr->identity; 241 u32 bearer_id = l_ptr->b_ptr->identity;
235 char addr_string[16]; 242 char addr_string[16];
236 243
237 assert(bearer_id < MAX_BEARERS);
238 if (n_ptr->link_cnt >= 2) { 244 if (n_ptr->link_cnt >= 2) {
239 char addr_string[16]; 245 char addr_string[16];
240 246
@@ -249,7 +255,7 @@ struct node *tipc_node_attach_link(struct link *l_ptr)
249 n_ptr->link_cnt++; 255 n_ptr->link_cnt++;
250 return n_ptr; 256 return n_ptr;
251 } 257 }
252 err("Attempt to establish second link on <%s> to <%s> \n", 258 err("Attempt to establish second link on <%s> to %s \n",
253 l_ptr->b_ptr->publ.name, 259 l_ptr->b_ptr->publ.name,
254 addr_string_fill(addr_string, l_ptr->addr)); 260 addr_string_fill(addr_string, l_ptr->addr));
255 } 261 }
@@ -314,7 +320,7 @@ static void node_established_contact(struct node *n_ptr)
314 struct cluster *c_ptr; 320 struct cluster *c_ptr;
315 321
316 dbg("node_established_contact:-> %x\n", n_ptr->addr); 322 dbg("node_established_contact:-> %x\n", n_ptr->addr);
317 if (!tipc_node_has_active_routes(n_ptr)) { 323 if (!tipc_node_has_active_routes(n_ptr) && in_own_cluster(n_ptr->addr)) {
318 tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); 324 tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
319 } 325 }
320 326
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 781126e084ae..a07cc79ea637 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -51,6 +51,7 @@
51 * @nsub: list of "node down" subscriptions monitoring node 51 * @nsub: list of "node down" subscriptions monitoring node
52 * @active_links: pointers to active links to node 52 * @active_links: pointers to active links to node
53 * @links: pointers to all links to node 53 * @links: pointers to all links to node
54 * @working_links: number of working links to node (both active and standby)
54 * @link_cnt: number of links to node 55 * @link_cnt: number of links to node
55 * @permit_changeover: non-zero if node has redundant links to this system 56 * @permit_changeover: non-zero if node has redundant links to this system
56 * @routers: bitmap (used for multicluster communication) 57 * @routers: bitmap (used for multicluster communication)
@@ -76,6 +77,7 @@ struct node {
76 struct link *active_links[2]; 77 struct link *active_links[2];
77 struct link *links[MAX_BEARERS]; 78 struct link *links[MAX_BEARERS];
78 int link_cnt; 79 int link_cnt;
80 int working_links;
79 int permit_changeover; 81 int permit_changeover;
80 u32 routers[512/32]; 82 u32 routers[512/32];
81 int last_router; 83 int last_router;
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index cff4068cc755..cc3fff3dec4f 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -47,18 +47,19 @@
47void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr, 47void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr,
48 void *usr_handle, net_ev_handler handle_down) 48 void *usr_handle, net_ev_handler handle_down)
49{ 49{
50 node_sub->node = NULL; 50 if (addr == tipc_own_addr) {
51 if (addr == tipc_own_addr) 51 node_sub->node = NULL;
52 return; 52 return;
53 if (!tipc_addr_node_valid(addr)) { 53 }
54 warn("node_subscr with illegal %x\n", addr); 54
55 node_sub->node = tipc_node_find(addr);
56 if (!node_sub->node) {
57 warn("Node subscription rejected, unknown node 0x%x\n", addr);
55 return; 58 return;
56 } 59 }
57
58 node_sub->handle_node_down = handle_down; 60 node_sub->handle_node_down = handle_down;
59 node_sub->usr_handle = usr_handle; 61 node_sub->usr_handle = usr_handle;
60 node_sub->node = tipc_node_find(addr); 62
61 assert(node_sub->node);
62 tipc_node_lock(node_sub->node); 63 tipc_node_lock(node_sub->node);
63 list_add_tail(&node_sub->nodesub_list, &node_sub->node->nsub); 64 list_add_tail(&node_sub->nodesub_list, &node_sub->node->nsub);
64 tipc_node_unlock(node_sub->node); 65 tipc_node_unlock(node_sub->node);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 67e96cb1e825..47d97404e3ee 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -168,7 +168,6 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
168 struct port_list *item = dp; 168 struct port_list *item = dp;
169 int cnt = 0; 169 int cnt = 0;
170 170
171 assert(buf);
172 msg = buf_msg(buf); 171 msg = buf_msg(buf);
173 172
174 /* Create destination port list, if one wasn't supplied */ 173 /* Create destination port list, if one wasn't supplied */
@@ -196,7 +195,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
196 struct sk_buff *b = skb_clone(buf, GFP_ATOMIC); 195 struct sk_buff *b = skb_clone(buf, GFP_ATOMIC);
197 196
198 if (b == NULL) { 197 if (b == NULL) {
199 warn("Buffer allocation failure\n"); 198 warn("Unable to deliver multicast message(s)\n");
200 msg_dbg(msg, "LOST:"); 199 msg_dbg(msg, "LOST:");
201 goto exit; 200 goto exit;
202 } 201 }
@@ -228,14 +227,14 @@ u32 tipc_createport_raw(void *usr_handle,
228 u32 ref; 227 u32 ref;
229 228
230 p_ptr = kmalloc(sizeof(*p_ptr), GFP_ATOMIC); 229 p_ptr = kmalloc(sizeof(*p_ptr), GFP_ATOMIC);
231 if (p_ptr == NULL) { 230 if (!p_ptr) {
232 warn("Memory squeeze; failed to create port\n"); 231 warn("Port creation failed, no memory\n");
233 return 0; 232 return 0;
234 } 233 }
235 memset(p_ptr, 0, sizeof(*p_ptr)); 234 memset(p_ptr, 0, sizeof(*p_ptr));
236 ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); 235 ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock);
237 if (!ref) { 236 if (!ref) {
238 warn("Reference Table Exhausted\n"); 237 warn("Port creation failed, reference table exhausted\n");
239 kfree(p_ptr); 238 kfree(p_ptr);
240 return 0; 239 return 0;
241 } 240 }
@@ -810,18 +809,20 @@ static void port_dispatcher_sigh(void *dummy)
810 void *usr_handle; 809 void *usr_handle;
811 int connected; 810 int connected;
812 int published; 811 int published;
812 u32 message_type;
813 813
814 struct sk_buff *next = buf->next; 814 struct sk_buff *next = buf->next;
815 struct tipc_msg *msg = buf_msg(buf); 815 struct tipc_msg *msg = buf_msg(buf);
816 u32 dref = msg_destport(msg); 816 u32 dref = msg_destport(msg);
817 817
818 message_type = msg_type(msg);
819 if (message_type > TIPC_DIRECT_MSG)
820 goto reject; /* Unsupported message type */
821
818 p_ptr = tipc_port_lock(dref); 822 p_ptr = tipc_port_lock(dref);
819 if (!p_ptr) { 823 if (!p_ptr)
820 /* Port deleted while msg in queue */ 824 goto reject; /* Port deleted while msg in queue */
821 tipc_reject_msg(buf, TIPC_ERR_NO_PORT); 825
822 buf = next;
823 continue;
824 }
825 orig.ref = msg_origport(msg); 826 orig.ref = msg_origport(msg);
826 orig.node = msg_orignode(msg); 827 orig.node = msg_orignode(msg);
827 up_ptr = p_ptr->user_port; 828 up_ptr = p_ptr->user_port;
@@ -832,7 +833,7 @@ static void port_dispatcher_sigh(void *dummy)
832 if (unlikely(msg_errcode(msg))) 833 if (unlikely(msg_errcode(msg)))
833 goto err; 834 goto err;
834 835
835 switch (msg_type(msg)) { 836 switch (message_type) {
836 837
837 case TIPC_CONN_MSG:{ 838 case TIPC_CONN_MSG:{
838 tipc_conn_msg_event cb = up_ptr->conn_msg_cb; 839 tipc_conn_msg_event cb = up_ptr->conn_msg_cb;
@@ -874,6 +875,7 @@ static void port_dispatcher_sigh(void *dummy)
874 &orig); 875 &orig);
875 break; 876 break;
876 } 877 }
878 case TIPC_MCAST_MSG:
877 case TIPC_NAMED_MSG:{ 879 case TIPC_NAMED_MSG:{
878 tipc_named_msg_event cb = up_ptr->named_msg_cb; 880 tipc_named_msg_event cb = up_ptr->named_msg_cb;
879 881
@@ -886,7 +888,8 @@ static void port_dispatcher_sigh(void *dummy)
886 goto reject; 888 goto reject;
887 dseq.type = msg_nametype(msg); 889 dseq.type = msg_nametype(msg);
888 dseq.lower = msg_nameinst(msg); 890 dseq.lower = msg_nameinst(msg);
889 dseq.upper = dseq.lower; 891 dseq.upper = (message_type == TIPC_NAMED_MSG)
892 ? dseq.lower : msg_nameupper(msg);
890 skb_pull(buf, msg_hdr_sz(msg)); 893 skb_pull(buf, msg_hdr_sz(msg));
891 cb(usr_handle, dref, &buf, msg_data(msg), 894 cb(usr_handle, dref, &buf, msg_data(msg),
892 msg_data_sz(msg), msg_importance(msg), 895 msg_data_sz(msg), msg_importance(msg),
@@ -899,7 +902,7 @@ static void port_dispatcher_sigh(void *dummy)
899 buf = next; 902 buf = next;
900 continue; 903 continue;
901err: 904err:
902 switch (msg_type(msg)) { 905 switch (message_type) {
903 906
904 case TIPC_CONN_MSG:{ 907 case TIPC_CONN_MSG:{
905 tipc_conn_shutdown_event cb = 908 tipc_conn_shutdown_event cb =
@@ -931,6 +934,7 @@ err:
931 msg_data_sz(msg), msg_errcode(msg), &orig); 934 msg_data_sz(msg), msg_errcode(msg), &orig);
932 break; 935 break;
933 } 936 }
937 case TIPC_MCAST_MSG:
934 case TIPC_NAMED_MSG:{ 938 case TIPC_NAMED_MSG:{
935 tipc_named_msg_err_event cb = 939 tipc_named_msg_err_event cb =
936 up_ptr->named_err_cb; 940 up_ptr->named_err_cb;
@@ -940,7 +944,8 @@ err:
940 break; 944 break;
941 dseq.type = msg_nametype(msg); 945 dseq.type = msg_nametype(msg);
942 dseq.lower = msg_nameinst(msg); 946 dseq.lower = msg_nameinst(msg);
943 dseq.upper = dseq.lower; 947 dseq.upper = (message_type == TIPC_NAMED_MSG)
948 ? dseq.lower : msg_nameupper(msg);
944 skb_pull(buf, msg_hdr_sz(msg)); 949 skb_pull(buf, msg_hdr_sz(msg));
945 cb(usr_handle, dref, &buf, msg_data(msg), 950 cb(usr_handle, dref, &buf, msg_data(msg),
946 msg_data_sz(msg), msg_errcode(msg), &dseq); 951 msg_data_sz(msg), msg_errcode(msg), &dseq);
@@ -1054,7 +1059,8 @@ int tipc_createport(u32 user_ref,
1054 u32 ref; 1059 u32 ref;
1055 1060
1056 up_ptr = (struct user_port *)kmalloc(sizeof(*up_ptr), GFP_ATOMIC); 1061 up_ptr = (struct user_port *)kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
1057 if (up_ptr == NULL) { 1062 if (!up_ptr) {
1063 warn("Port creation failed, no memory\n");
1058 return -ENOMEM; 1064 return -ENOMEM;
1059 } 1065 }
1060 ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance); 1066 ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance);
@@ -1165,8 +1171,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1165 p_ptr = tipc_port_lock(ref); 1171 p_ptr = tipc_port_lock(ref);
1166 if (!p_ptr) 1172 if (!p_ptr)
1167 return -EINVAL; 1173 return -EINVAL;
1168 if (!p_ptr->publ.published)
1169 goto exit;
1170 if (!seq) { 1174 if (!seq) {
1171 list_for_each_entry_safe(publ, tpubl, 1175 list_for_each_entry_safe(publ, tpubl,
1172 &p_ptr->publications, pport_list) { 1176 &p_ptr->publications, pport_list) {
@@ -1193,7 +1197,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
1193 } 1197 }
1194 if (list_empty(&p_ptr->publications)) 1198 if (list_empty(&p_ptr->publications))
1195 p_ptr->publ.published = 0; 1199 p_ptr->publ.published = 0;
1196exit:
1197 tipc_port_unlock(p_ptr); 1200 tipc_port_unlock(p_ptr);
1198 return res; 1201 return res;
1199} 1202}
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 33bbf5095094..d2f0cce10e20 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -127,7 +127,14 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
127 u32 next_plus_upper; 127 u32 next_plus_upper;
128 u32 reference = 0; 128 u32 reference = 0;
129 129
130 assert(tipc_ref_table.entries && object); 130 if (!object) {
131 err("Attempt to acquire reference to non-existent object\n");
132 return 0;
133 }
134 if (!tipc_ref_table.entries) {
135 err("Reference table not found during acquisition attempt\n");
136 return 0;
137 }
131 138
132 write_lock_bh(&ref_table_lock); 139 write_lock_bh(&ref_table_lock);
133 if (tipc_ref_table.first_free) { 140 if (tipc_ref_table.first_free) {
@@ -162,15 +169,28 @@ void tipc_ref_discard(u32 ref)
162 u32 index; 169 u32 index;
163 u32 index_mask; 170 u32 index_mask;
164 171
165 assert(tipc_ref_table.entries); 172 if (!ref) {
166 assert(ref != 0); 173 err("Attempt to discard reference 0\n");
174 return;
175 }
176 if (!tipc_ref_table.entries) {
177 err("Reference table not found during discard attempt\n");
178 return;
179 }
167 180
168 write_lock_bh(&ref_table_lock); 181 write_lock_bh(&ref_table_lock);
169 index_mask = tipc_ref_table.index_mask; 182 index_mask = tipc_ref_table.index_mask;
170 index = ref & index_mask; 183 index = ref & index_mask;
171 entry = &(tipc_ref_table.entries[index]); 184 entry = &(tipc_ref_table.entries[index]);
172 assert(entry->object != 0); 185
173 assert(entry->data.reference == ref); 186 if (!entry->object) {
187 err("Attempt to discard reference to non-existent object\n");
188 goto exit;
189 }
190 if (entry->data.reference != ref) {
191 err("Attempt to discard non-existent reference\n");
192 goto exit;
193 }
174 194
175 /* mark entry as unused */ 195 /* mark entry as unused */
176 entry->object = NULL; 196 entry->object = NULL;
@@ -184,6 +204,7 @@ void tipc_ref_discard(u32 ref)
184 204
185 /* increment upper bits of entry to invalidate subsequent references */ 205 /* increment upper bits of entry to invalidate subsequent references */
186 entry->data.next_plus_upper = (ref & ~index_mask) + (index_mask + 1); 206 entry->data.next_plus_upper = (ref & ~index_mask) + (index_mask + 1);
207exit:
187 write_unlock_bh(&ref_table_lock); 208 write_unlock_bh(&ref_table_lock);
188} 209}
189 210
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 648a734e6044..32d778448a00 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -169,12 +169,6 @@ static int tipc_create(struct socket *sock, int protocol)
169 struct sock *sk; 169 struct sock *sk;
170 u32 ref; 170 u32 ref;
171 171
172 if ((sock->type != SOCK_STREAM) &&
173 (sock->type != SOCK_SEQPACKET) &&
174 (sock->type != SOCK_DGRAM) &&
175 (sock->type != SOCK_RDM))
176 return -EPROTOTYPE;
177
178 if (unlikely(protocol != 0)) 172 if (unlikely(protocol != 0))
179 return -EPROTONOSUPPORT; 173 return -EPROTONOSUPPORT;
180 174
@@ -199,6 +193,9 @@ static int tipc_create(struct socket *sock, int protocol)
199 sock->ops = &msg_ops; 193 sock->ops = &msg_ops;
200 sock->state = SS_READY; 194 sock->state = SS_READY;
201 break; 195 break;
196 default:
197 tipc_deleteport(ref);
198 return -EPROTOTYPE;
202 } 199 }
203 200
204 sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); 201 sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1);
@@ -426,7 +423,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
426 423
427 if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) 424 if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
428 return -EFAULT; 425 return -EFAULT;
429 if ((ntohs(hdr.tcm_type) & 0xC000) & (!capable(CAP_NET_ADMIN))) 426 if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
430 return -EACCES; 427 return -EACCES;
431 428
432 return 0; 429 return 0;
@@ -437,7 +434,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
437 * @iocb: (unused) 434 * @iocb: (unused)
438 * @sock: socket structure 435 * @sock: socket structure
439 * @m: message to send 436 * @m: message to send
440 * @total_len: (unused) 437 * @total_len: length of message
441 * 438 *
442 * Message must have an destination specified explicitly. 439 * Message must have an destination specified explicitly.
443 * Used for SOCK_RDM and SOCK_DGRAM messages, 440 * Used for SOCK_RDM and SOCK_DGRAM messages,
@@ -458,7 +455,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
458 455
459 if (unlikely(!dest)) 456 if (unlikely(!dest))
460 return -EDESTADDRREQ; 457 return -EDESTADDRREQ;
461 if (unlikely(dest->family != AF_TIPC)) 458 if (unlikely((m->msg_namelen < sizeof(*dest)) ||
459 (dest->family != AF_TIPC)))
462 return -EINVAL; 460 return -EINVAL;
463 461
464 needs_conn = (sock->state != SS_READY); 462 needs_conn = (sock->state != SS_READY);
@@ -470,6 +468,10 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
470 if ((tsock->p->published) || 468 if ((tsock->p->published) ||
471 ((sock->type == SOCK_STREAM) && (total_len != 0))) 469 ((sock->type == SOCK_STREAM) && (total_len != 0)))
472 return -EOPNOTSUPP; 470 return -EOPNOTSUPP;
471 if (dest->addrtype == TIPC_ADDR_NAME) {
472 tsock->p->conn_type = dest->addr.name.name.type;
473 tsock->p->conn_instance = dest->addr.name.name.instance;
474 }
473 } 475 }
474 476
475 if (down_interruptible(&tsock->sem)) 477 if (down_interruptible(&tsock->sem))
@@ -538,7 +540,7 @@ exit:
538 * @iocb: (unused) 540 * @iocb: (unused)
539 * @sock: socket structure 541 * @sock: socket structure
540 * @m: message to send 542 * @m: message to send
541 * @total_len: (unused) 543 * @total_len: length of message
542 * 544 *
543 * Used for SOCK_SEQPACKET messages and SOCK_STREAM data. 545 * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
544 * 546 *
@@ -561,15 +563,15 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
561 return -ERESTARTSYS; 563 return -ERESTARTSYS;
562 } 564 }
563 565
564 if (unlikely(sock->state != SS_CONNECTED)) {
565 if (sock->state == SS_DISCONNECTING)
566 res = -EPIPE;
567 else
568 res = -ENOTCONN;
569 goto exit;
570 }
571
572 do { 566 do {
567 if (unlikely(sock->state != SS_CONNECTED)) {
568 if (sock->state == SS_DISCONNECTING)
569 res = -EPIPE;
570 else
571 res = -ENOTCONN;
572 goto exit;
573 }
574
573 res = tipc_send(tsock->p->ref, m->msg_iovlen, m->msg_iov); 575 res = tipc_send(tsock->p->ref, m->msg_iovlen, m->msg_iov);
574 if (likely(res != -ELINKCONG)) { 576 if (likely(res != -ELINKCONG)) {
575exit: 577exit:
@@ -597,7 +599,8 @@ exit:
597 * 599 *
598 * Used for SOCK_STREAM data. 600 * Used for SOCK_STREAM data.
599 * 601 *
600 * Returns the number of bytes sent on success, or errno otherwise 602 * Returns the number of bytes sent on success (or partial success),
603 * or errno if no data sent
601 */ 604 */
602 605
603 606
@@ -611,6 +614,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
611 char __user *curr_start; 614 char __user *curr_start;
612 int curr_left; 615 int curr_left;
613 int bytes_to_send; 616 int bytes_to_send;
617 int bytes_sent;
614 int res; 618 int res;
615 619
616 if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE)) 620 if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE))
@@ -633,11 +637,11 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
633 * of small iovec entries into send_packet(). 637 * of small iovec entries into send_packet().
634 */ 638 */
635 639
636 my_msg = *m; 640 curr_iov = m->msg_iov;
637 curr_iov = my_msg.msg_iov; 641 curr_iovlen = m->msg_iovlen;
638 curr_iovlen = my_msg.msg_iovlen;
639 my_msg.msg_iov = &my_iov; 642 my_msg.msg_iov = &my_iov;
640 my_msg.msg_iovlen = 1; 643 my_msg.msg_iovlen = 1;
644 bytes_sent = 0;
641 645
642 while (curr_iovlen--) { 646 while (curr_iovlen--) {
643 curr_start = curr_iov->iov_base; 647 curr_start = curr_iov->iov_base;
@@ -648,16 +652,18 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
648 ? curr_left : TIPC_MAX_USER_MSG_SIZE; 652 ? curr_left : TIPC_MAX_USER_MSG_SIZE;
649 my_iov.iov_base = curr_start; 653 my_iov.iov_base = curr_start;
650 my_iov.iov_len = bytes_to_send; 654 my_iov.iov_len = bytes_to_send;
651 if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) 655 if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) {
652 return res; 656 return bytes_sent ? bytes_sent : res;
657 }
653 curr_left -= bytes_to_send; 658 curr_left -= bytes_to_send;
654 curr_start += bytes_to_send; 659 curr_start += bytes_to_send;
660 bytes_sent += bytes_to_send;
655 } 661 }
656 662
657 curr_iov++; 663 curr_iov++;
658 } 664 }
659 665
660 return total_len; 666 return bytes_sent;
661} 667}
662 668
663/** 669/**
@@ -727,6 +733,7 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
727 u32 anc_data[3]; 733 u32 anc_data[3];
728 u32 err; 734 u32 err;
729 u32 dest_type; 735 u32 dest_type;
736 int has_name;
730 int res; 737 int res;
731 738
732 if (likely(m->msg_controllen == 0)) 739 if (likely(m->msg_controllen == 0))
@@ -738,10 +745,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
738 if (unlikely(err)) { 745 if (unlikely(err)) {
739 anc_data[0] = err; 746 anc_data[0] = err;
740 anc_data[1] = msg_data_sz(msg); 747 anc_data[1] = msg_data_sz(msg);
741 if ((res = put_cmsg(m, SOL_SOCKET, TIPC_ERRINFO, 8, anc_data))) 748 if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data)))
742 return res; 749 return res;
743 if (anc_data[1] && 750 if (anc_data[1] &&
744 (res = put_cmsg(m, SOL_SOCKET, TIPC_RETDATA, anc_data[1], 751 (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
745 msg_data(msg)))) 752 msg_data(msg))))
746 return res; 753 return res;
747 } 754 }
@@ -751,25 +758,28 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
751 dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; 758 dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
752 switch (dest_type) { 759 switch (dest_type) {
753 case TIPC_NAMED_MSG: 760 case TIPC_NAMED_MSG:
761 has_name = 1;
754 anc_data[0] = msg_nametype(msg); 762 anc_data[0] = msg_nametype(msg);
755 anc_data[1] = msg_namelower(msg); 763 anc_data[1] = msg_namelower(msg);
756 anc_data[2] = msg_namelower(msg); 764 anc_data[2] = msg_namelower(msg);
757 break; 765 break;
758 case TIPC_MCAST_MSG: 766 case TIPC_MCAST_MSG:
767 has_name = 1;
759 anc_data[0] = msg_nametype(msg); 768 anc_data[0] = msg_nametype(msg);
760 anc_data[1] = msg_namelower(msg); 769 anc_data[1] = msg_namelower(msg);
761 anc_data[2] = msg_nameupper(msg); 770 anc_data[2] = msg_nameupper(msg);
762 break; 771 break;
763 case TIPC_CONN_MSG: 772 case TIPC_CONN_MSG:
773 has_name = (tport->conn_type != 0);
764 anc_data[0] = tport->conn_type; 774 anc_data[0] = tport->conn_type;
765 anc_data[1] = tport->conn_instance; 775 anc_data[1] = tport->conn_instance;
766 anc_data[2] = tport->conn_instance; 776 anc_data[2] = tport->conn_instance;
767 break; 777 break;
768 default: 778 default:
769 anc_data[0] = 0; 779 has_name = 0;
770 } 780 }
771 if (anc_data[0] && 781 if (has_name &&
772 (res = put_cmsg(m, SOL_SOCKET, TIPC_DESTNAME, 12, anc_data))) 782 (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data)))
773 return res; 783 return res;
774 784
775 return 0; 785 return 0;
@@ -960,7 +970,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
960restart: 970restart:
961 if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) && 971 if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) &&
962 (flags & MSG_DONTWAIT))) { 972 (flags & MSG_DONTWAIT))) {
963 res = (sz_copied == 0) ? -EWOULDBLOCK : 0; 973 res = -EWOULDBLOCK;
964 goto exit; 974 goto exit;
965 } 975 }
966 976
@@ -1051,7 +1061,7 @@ restart:
1051 1061
1052exit: 1062exit:
1053 up(&tsock->sem); 1063 up(&tsock->sem);
1054 return res ? res : sz_copied; 1064 return sz_copied ? sz_copied : res;
1055} 1065}
1056 1066
1057/** 1067/**
@@ -1236,7 +1246,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1236 if (sock->state == SS_READY) 1246 if (sock->state == SS_READY)
1237 return -EOPNOTSUPP; 1247 return -EOPNOTSUPP;
1238 1248
1239 /* MOVE THE REST OF THIS ERROR CHECKING TO send_msg()? */ 1249 /* Issue Posix-compliant error code if socket is in the wrong state */
1250
1240 if (sock->state == SS_LISTENING) 1251 if (sock->state == SS_LISTENING)
1241 return -EOPNOTSUPP; 1252 return -EOPNOTSUPP;
1242 if (sock->state == SS_CONNECTING) 1253 if (sock->state == SS_CONNECTING)
@@ -1244,13 +1255,20 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1244 if (sock->state != SS_UNCONNECTED) 1255 if (sock->state != SS_UNCONNECTED)
1245 return -EISCONN; 1256 return -EISCONN;
1246 1257
1247 if ((dst->family != AF_TIPC) || 1258 /*
1248 ((dst->addrtype != TIPC_ADDR_NAME) && (dst->addrtype != TIPC_ADDR_ID))) 1259 * Reject connection attempt using multicast address
1260 *
1261 * Note: send_msg() validates the rest of the address fields,
1262 * so there's no need to do it here
1263 */
1264
1265 if (dst->addrtype == TIPC_ADDR_MCAST)
1249 return -EINVAL; 1266 return -EINVAL;
1250 1267
1251 /* Send a 'SYN-' to destination */ 1268 /* Send a 'SYN-' to destination */
1252 1269
1253 m.msg_name = dest; 1270 m.msg_name = dest;
1271 m.msg_namelen = destlen;
1254 if ((res = send_msg(NULL, sock, &m, 0)) < 0) { 1272 if ((res = send_msg(NULL, sock, &m, 0)) < 0) {
1255 sock->state = SS_DISCONNECTING; 1273 sock->state = SS_DISCONNECTING;
1256 return res; 1274 return res;
@@ -1269,10 +1287,6 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1269 msg = buf_msg(buf); 1287 msg = buf_msg(buf);
1270 res = auto_connect(sock, tsock, msg); 1288 res = auto_connect(sock, tsock, msg);
1271 if (!res) { 1289 if (!res) {
1272 if (dst->addrtype == TIPC_ADDR_NAME) {
1273 tsock->p->conn_type = dst->addr.name.name.type;
1274 tsock->p->conn_instance = dst->addr.name.name.instance;
1275 }
1276 if (!msg_data_sz(msg)) 1290 if (!msg_data_sz(msg))
1277 advance_queue(tsock); 1291 advance_queue(tsock);
1278 } 1292 }
@@ -1386,7 +1400,7 @@ exit:
1386/** 1400/**
1387 * shutdown - shutdown socket connection 1401 * shutdown - shutdown socket connection
1388 * @sock: socket structure 1402 * @sock: socket structure
1389 * @how: direction to close (always treated as read + write) 1403 * @how: direction to close (unused; always treated as read + write)
1390 * 1404 *
1391 * Terminates connection (if necessary), then purges socket's receive queue. 1405 * Terminates connection (if necessary), then purges socket's receive queue.
1392 * 1406 *
@@ -1469,7 +1483,8 @@ restart:
1469 * Returns 0 on success, errno otherwise 1483 * Returns 0 on success, errno otherwise
1470 */ 1484 */
1471 1485
1472static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol) 1486static int setsockopt(struct socket *sock,
1487 int lvl, int opt, char __user *ov, int ol)
1473{ 1488{
1474 struct tipc_sock *tsock = tipc_sk(sock->sk); 1489 struct tipc_sock *tsock = tipc_sk(sock->sk);
1475 u32 value; 1490 u32 value;
@@ -1525,7 +1540,8 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol)
1525 * Returns 0 on success, errno otherwise 1540 * Returns 0 on success, errno otherwise
1526 */ 1541 */
1527 1542
1528static int getsockopt(struct socket *sock, int lvl, int opt, char *ov, int *ol) 1543static int getsockopt(struct socket *sock,
1544 int lvl, int opt, char __user *ov, int *ol)
1529{ 1545{
1530 struct tipc_sock *tsock = tipc_sk(sock->sk); 1546 struct tipc_sock *tsock = tipc_sk(sock->sk);
1531 int len; 1547 int len;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index c5f026c7fd38..fc171875660c 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -266,7 +266,8 @@ static void subscr_subscribe(struct tipc_subscr *s,
266 /* Refuse subscription if global limit exceeded */ 266 /* Refuse subscription if global limit exceeded */
267 267
268 if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) { 268 if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) {
269 warn("Failed: max %u subscriptions\n", tipc_max_subscriptions); 269 warn("Subscription rejected, subscription limit reached (%u)\n",
270 tipc_max_subscriptions);
270 subscr_terminate(subscriber); 271 subscr_terminate(subscriber);
271 return; 272 return;
272 } 273 }
@@ -274,8 +275,8 @@ static void subscr_subscribe(struct tipc_subscr *s,
274 /* Allocate subscription object */ 275 /* Allocate subscription object */
275 276
276 sub = kmalloc(sizeof(*sub), GFP_ATOMIC); 277 sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
277 if (sub == NULL) { 278 if (!sub) {
278 warn("Memory squeeze; ignoring subscription\n"); 279 warn("Subscription rejected, no memory\n");
279 subscr_terminate(subscriber); 280 subscr_terminate(subscriber);
280 return; 281 return;
281 } 282 }
@@ -298,8 +299,7 @@ static void subscr_subscribe(struct tipc_subscr *s,
298 if ((((sub->filter != TIPC_SUB_PORTS) 299 if ((((sub->filter != TIPC_SUB_PORTS)
299 && (sub->filter != TIPC_SUB_SERVICE))) 300 && (sub->filter != TIPC_SUB_SERVICE)))
300 || (sub->seq.lower > sub->seq.upper)) { 301 || (sub->seq.lower > sub->seq.upper)) {
301 warn("Rejecting illegal subscription %u,%u,%u\n", 302 warn("Subscription rejected, illegal request\n");
302 sub->seq.type, sub->seq.lower, sub->seq.upper);
303 kfree(sub); 303 kfree(sub);
304 subscr_terminate(subscriber); 304 subscr_terminate(subscriber);
305 return; 305 return;
@@ -387,7 +387,7 @@ static void subscr_named_msg_event(void *usr_handle,
387 dbg("subscr_named_msg_event: orig = %x own = %x,\n", 387 dbg("subscr_named_msg_event: orig = %x own = %x,\n",
388 orig->node, tipc_own_addr); 388 orig->node, tipc_own_addr);
389 if (size && (size != sizeof(struct tipc_subscr))) { 389 if (size && (size != sizeof(struct tipc_subscr))) {
390 warn("Received tipc_subscr of invalid size\n"); 390 warn("Subscriber rejected, invalid subscription size\n");
391 return; 391 return;
392 } 392 }
393 393
@@ -395,7 +395,7 @@ static void subscr_named_msg_event(void *usr_handle,
395 395
396 subscriber = kmalloc(sizeof(struct subscriber), GFP_ATOMIC); 396 subscriber = kmalloc(sizeof(struct subscriber), GFP_ATOMIC);
397 if (subscriber == NULL) { 397 if (subscriber == NULL) {
398 warn("Memory squeeze; ignoring subscriber setup\n"); 398 warn("Subscriber rejected, no memory\n");
399 return; 399 return;
400 } 400 }
401 memset(subscriber, 0, sizeof(struct subscriber)); 401 memset(subscriber, 0, sizeof(struct subscriber));
@@ -403,7 +403,7 @@ static void subscr_named_msg_event(void *usr_handle,
403 INIT_LIST_HEAD(&subscriber->subscriber_list); 403 INIT_LIST_HEAD(&subscriber->subscriber_list);
404 subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock); 404 subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock);
405 if (subscriber->ref == 0) { 405 if (subscriber->ref == 0) {
406 warn("Failed to acquire subscriber reference\n"); 406 warn("Subscriber rejected, reference table exhausted\n");
407 kfree(subscriber); 407 kfree(subscriber);
408 return; 408 return;
409 } 409 }
@@ -422,7 +422,7 @@ static void subscr_named_msg_event(void *usr_handle,
422 NULL, 422 NULL,
423 &subscriber->port_ref); 423 &subscriber->port_ref);
424 if (subscriber->port_ref == 0) { 424 if (subscriber->port_ref == 0) {
425 warn("Memory squeeze; failed to create subscription port\n"); 425 warn("Subscriber rejected, unable to create port\n");
426 tipc_ref_discard(subscriber->ref); 426 tipc_ref_discard(subscriber->ref);
427 kfree(subscriber); 427 kfree(subscriber);
428 return; 428 return;
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 2803e1b4f170..316c4872ff5b 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -44,19 +44,24 @@
44 44
45struct _zone *tipc_zone_create(u32 addr) 45struct _zone *tipc_zone_create(u32 addr)
46{ 46{
47 struct _zone *z_ptr = NULL; 47 struct _zone *z_ptr;
48 u32 z_num; 48 u32 z_num;
49 49
50 if (!tipc_addr_domain_valid(addr)) 50 if (!tipc_addr_domain_valid(addr)) {
51 err("Zone creation failed, invalid domain 0x%x\n", addr);
51 return NULL; 52 return NULL;
53 }
52 54
53 z_ptr = (struct _zone *)kmalloc(sizeof(*z_ptr), GFP_ATOMIC); 55 z_ptr = (struct _zone *)kmalloc(sizeof(*z_ptr), GFP_ATOMIC);
54 if (z_ptr != NULL) { 56 if (!z_ptr) {
55 memset(z_ptr, 0, sizeof(*z_ptr)); 57 warn("Zone creation failed, insufficient memory\n");
56 z_num = tipc_zone(addr); 58 return NULL;
57 z_ptr->addr = tipc_addr(z_num, 0, 0);
58 tipc_net.zones[z_num] = z_ptr;
59 } 59 }
60
61 memset(z_ptr, 0, sizeof(*z_ptr));
62 z_num = tipc_zone(addr);
63 z_ptr->addr = tipc_addr(z_num, 0, 0);
64 tipc_net.zones[z_num] = z_ptr;
60 return z_ptr; 65 return z_ptr;
61} 66}
62 67
diff --git a/security/Kconfig b/security/Kconfig
index 34f593410d57..67785df264e5 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -22,16 +22,22 @@ config KEYS
22 If you are unsure as to whether this is required, answer N. 22 If you are unsure as to whether this is required, answer N.
23 23
24config KEYS_DEBUG_PROC_KEYS 24config KEYS_DEBUG_PROC_KEYS
25 bool "Enable the /proc/keys file by which all keys may be viewed" 25 bool "Enable the /proc/keys file by which keys may be viewed"
26 depends on KEYS 26 depends on KEYS
27 help 27 help
28 This option turns on support for the /proc/keys file through which 28 This option turns on support for the /proc/keys file - through which
29 all the keys on the system can be listed. 29 can be listed all the keys on the system that are viewable by the
30 reading process.
30 31
31 This option is a slight security risk in that it makes it possible 32 The only keys included in the list are those that grant View
32 for anyone to see all the keys on the system. Normally the manager 33 permission to the reading process whether or not it possesses them.
33 pretends keys that are inaccessible to a process don't exist as far 34 Note that LSM security checks are still performed, and may further
34 as that process is concerned. 35 filter out keys that the current process is not authorised to view.
36
37 Only key attributes are listed here; key payloads are not included in
38 the resulting table.
39
40 If you are unsure as to whether this is required, answer N.
35 41
36config SECURITY 42config SECURITY
37 bool "Enable different security models" 43 bool "Enable different security models"
diff --git a/security/dummy.c b/security/dummy.c
index c3c5493581e2..310fcdf7b749 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -870,7 +870,8 @@ static int dummy_setprocattr(struct task_struct *p, char *name, void *value, siz
870} 870}
871 871
872#ifdef CONFIG_KEYS 872#ifdef CONFIG_KEYS
873static inline int dummy_key_alloc(struct key *key, struct task_struct *ctx) 873static inline int dummy_key_alloc(struct key *key, struct task_struct *ctx,
874 unsigned long flags)
874{ 875{
875 return 0; 876 return 0;
876} 877}
diff --git a/security/keys/internal.h b/security/keys/internal.h
index e066e6057955..3c2877f0663e 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -99,7 +99,8 @@ extern int install_process_keyring(struct task_struct *tsk);
99extern struct key *request_key_and_link(struct key_type *type, 99extern struct key *request_key_and_link(struct key_type *type,
100 const char *description, 100 const char *description,
101 const char *callout_info, 101 const char *callout_info,
102 struct key *dest_keyring); 102 struct key *dest_keyring,
103 unsigned long flags);
103 104
104/* 105/*
105 * request_key authorisation 106 * request_key authorisation
diff --git a/security/keys/key.c b/security/keys/key.c
index 51f851557389..43295ca37b5d 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -15,11 +15,11 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/workqueue.h> 17#include <linux/workqueue.h>
18#include <linux/random.h>
18#include <linux/err.h> 19#include <linux/err.h>
19#include "internal.h" 20#include "internal.h"
20 21
21static kmem_cache_t *key_jar; 22static kmem_cache_t *key_jar;
22static key_serial_t key_serial_next = 3;
23struct rb_root key_serial_tree; /* tree of keys indexed by serial */ 23struct rb_root key_serial_tree; /* tree of keys indexed by serial */
24DEFINE_SPINLOCK(key_serial_lock); 24DEFINE_SPINLOCK(key_serial_lock);
25 25
@@ -169,22 +169,23 @@ static void __init __key_insert_serial(struct key *key)
169/*****************************************************************************/ 169/*****************************************************************************/
170/* 170/*
171 * assign a key the next unique serial number 171 * assign a key the next unique serial number
172 * - we work through all the serial numbers between 2 and 2^31-1 in turn and 172 * - these are assigned randomly to avoid security issues through covert
173 * then wrap 173 * channel problems
174 */ 174 */
175static inline void key_alloc_serial(struct key *key) 175static inline void key_alloc_serial(struct key *key)
176{ 176{
177 struct rb_node *parent, **p; 177 struct rb_node *parent, **p;
178 struct key *xkey; 178 struct key *xkey;
179 179
180 spin_lock(&key_serial_lock); 180 /* propose a random serial number and look for a hole for it in the
181
182 /* propose a likely serial number and look for a hole for it in the
183 * serial number tree */ 181 * serial number tree */
184 key->serial = key_serial_next; 182 do {
185 if (key->serial < 3) 183 get_random_bytes(&key->serial, sizeof(key->serial));
186 key->serial = 3; 184
187 key_serial_next = key->serial + 1; 185 key->serial >>= 1; /* negative numbers are not permitted */
186 } while (key->serial < 3);
187
188 spin_lock(&key_serial_lock);
188 189
189 parent = NULL; 190 parent = NULL;
190 p = &key_serial_tree.rb_node; 191 p = &key_serial_tree.rb_node;
@@ -204,12 +205,11 @@ static inline void key_alloc_serial(struct key *key)
204 205
205 /* we found a key with the proposed serial number - walk the tree from 206 /* we found a key with the proposed serial number - walk the tree from
206 * that point looking for the next unused serial number */ 207 * that point looking for the next unused serial number */
207 serial_exists: 208serial_exists:
208 for (;;) { 209 for (;;) {
209 key->serial = key_serial_next; 210 key->serial++;
210 if (key->serial < 2) 211 if (key->serial < 2)
211 key->serial = 2; 212 key->serial = 2;
212 key_serial_next = key->serial + 1;
213 213
214 if (!rb_parent(parent)) 214 if (!rb_parent(parent))
215 p = &key_serial_tree.rb_node; 215 p = &key_serial_tree.rb_node;
@@ -228,7 +228,7 @@ static inline void key_alloc_serial(struct key *key)
228 } 228 }
229 229
230 /* we've found a suitable hole - arrange for this key to occupy it */ 230 /* we've found a suitable hole - arrange for this key to occupy it */
231 insert_here: 231insert_here:
232 rb_link_node(&key->serial_node, parent, p); 232 rb_link_node(&key->serial_node, parent, p);
233 rb_insert_color(&key->serial_node, &key_serial_tree); 233 rb_insert_color(&key->serial_node, &key_serial_tree);
234 234
@@ -248,7 +248,7 @@ static inline void key_alloc_serial(struct key *key)
248 */ 248 */
249struct key *key_alloc(struct key_type *type, const char *desc, 249struct key *key_alloc(struct key_type *type, const char *desc,
250 uid_t uid, gid_t gid, struct task_struct *ctx, 250 uid_t uid, gid_t gid, struct task_struct *ctx,
251 key_perm_t perm, int not_in_quota) 251 key_perm_t perm, unsigned long flags)
252{ 252{
253 struct key_user *user = NULL; 253 struct key_user *user = NULL;
254 struct key *key; 254 struct key *key;
@@ -269,12 +269,14 @@ struct key *key_alloc(struct key_type *type, const char *desc,
269 269
270 /* check that the user's quota permits allocation of another key and 270 /* check that the user's quota permits allocation of another key and
271 * its description */ 271 * its description */
272 if (!not_in_quota) { 272 if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) {
273 spin_lock(&user->lock); 273 spin_lock(&user->lock);
274 if (user->qnkeys + 1 >= KEYQUOTA_MAX_KEYS || 274 if (!(flags & KEY_ALLOC_QUOTA_OVERRUN)) {
275 user->qnbytes + quotalen >= KEYQUOTA_MAX_BYTES 275 if (user->qnkeys + 1 >= KEYQUOTA_MAX_KEYS ||
276 ) 276 user->qnbytes + quotalen >= KEYQUOTA_MAX_BYTES
277 goto no_quota; 277 )
278 goto no_quota;
279 }
278 280
279 user->qnkeys++; 281 user->qnkeys++;
280 user->qnbytes += quotalen; 282 user->qnbytes += quotalen;
@@ -308,7 +310,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
308 key->payload.data = NULL; 310 key->payload.data = NULL;
309 key->security = NULL; 311 key->security = NULL;
310 312
311 if (!not_in_quota) 313 if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
312 key->flags |= 1 << KEY_FLAG_IN_QUOTA; 314 key->flags |= 1 << KEY_FLAG_IN_QUOTA;
313 315
314 memset(&key->type_data, 0, sizeof(key->type_data)); 316 memset(&key->type_data, 0, sizeof(key->type_data));
@@ -318,7 +320,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
318#endif 320#endif
319 321
320 /* let the security module know about the key */ 322 /* let the security module know about the key */
321 ret = security_key_alloc(key, ctx); 323 ret = security_key_alloc(key, ctx, flags);
322 if (ret < 0) 324 if (ret < 0)
323 goto security_error; 325 goto security_error;
324 326
@@ -332,7 +334,7 @@ error:
332security_error: 334security_error:
333 kfree(key->description); 335 kfree(key->description);
334 kmem_cache_free(key_jar, key); 336 kmem_cache_free(key_jar, key);
335 if (!not_in_quota) { 337 if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) {
336 spin_lock(&user->lock); 338 spin_lock(&user->lock);
337 user->qnkeys--; 339 user->qnkeys--;
338 user->qnbytes -= quotalen; 340 user->qnbytes -= quotalen;
@@ -345,7 +347,7 @@ security_error:
345no_memory_3: 347no_memory_3:
346 kmem_cache_free(key_jar, key); 348 kmem_cache_free(key_jar, key);
347no_memory_2: 349no_memory_2:
348 if (!not_in_quota) { 350 if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) {
349 spin_lock(&user->lock); 351 spin_lock(&user->lock);
350 user->qnkeys--; 352 user->qnkeys--;
351 user->qnbytes -= quotalen; 353 user->qnbytes -= quotalen;
@@ -761,7 +763,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
761 const char *description, 763 const char *description,
762 const void *payload, 764 const void *payload,
763 size_t plen, 765 size_t plen,
764 int not_in_quota) 766 unsigned long flags)
765{ 767{
766 struct key_type *ktype; 768 struct key_type *ktype;
767 struct key *keyring, *key = NULL; 769 struct key *keyring, *key = NULL;
@@ -822,7 +824,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
822 824
823 /* allocate a new key */ 825 /* allocate a new key */
824 key = key_alloc(ktype, description, current->fsuid, current->fsgid, 826 key = key_alloc(ktype, description, current->fsuid, current->fsgid,
825 current, perm, not_in_quota); 827 current, perm, flags);
826 if (IS_ERR(key)) { 828 if (IS_ERR(key)) {
827 key_ref = ERR_PTR(PTR_ERR(key)); 829 key_ref = ERR_PTR(PTR_ERR(key));
828 goto error_3; 830 goto error_3;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index ed71d86d2ce2..329411cf8768 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -102,7 +102,7 @@ asmlinkage long sys_add_key(const char __user *_type,
102 /* create or update the requested key and add it to the target 102 /* create or update the requested key and add it to the target
103 * keyring */ 103 * keyring */
104 key_ref = key_create_or_update(keyring_ref, type, description, 104 key_ref = key_create_or_update(keyring_ref, type, description,
105 payload, plen, 0); 105 payload, plen, KEY_ALLOC_IN_QUOTA);
106 if (!IS_ERR(key_ref)) { 106 if (!IS_ERR(key_ref)) {
107 ret = key_ref_to_ptr(key_ref)->serial; 107 ret = key_ref_to_ptr(key_ref)->serial;
108 key_ref_put(key_ref); 108 key_ref_put(key_ref);
@@ -184,7 +184,8 @@ asmlinkage long sys_request_key(const char __user *_type,
184 184
185 /* do the search */ 185 /* do the search */
186 key = request_key_and_link(ktype, description, callout_info, 186 key = request_key_and_link(ktype, description, callout_info,
187 key_ref_to_ptr(dest_ref)); 187 key_ref_to_ptr(dest_ref),
188 KEY_ALLOC_IN_QUOTA);
188 if (IS_ERR(key)) { 189 if (IS_ERR(key)) {
189 ret = PTR_ERR(key); 190 ret = PTR_ERR(key);
190 goto error5; 191 goto error5;
@@ -672,6 +673,7 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
672 */ 673 */
673long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid) 674long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid)
674{ 675{
676 struct key_user *newowner, *zapowner = NULL;
675 struct key *key; 677 struct key *key;
676 key_ref_t key_ref; 678 key_ref_t key_ref;
677 long ret; 679 long ret;
@@ -695,19 +697,50 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid)
695 if (!capable(CAP_SYS_ADMIN)) { 697 if (!capable(CAP_SYS_ADMIN)) {
696 /* only the sysadmin can chown a key to some other UID */ 698 /* only the sysadmin can chown a key to some other UID */
697 if (uid != (uid_t) -1 && key->uid != uid) 699 if (uid != (uid_t) -1 && key->uid != uid)
698 goto no_access; 700 goto error_put;
699 701
700 /* only the sysadmin can set the key's GID to a group other 702 /* only the sysadmin can set the key's GID to a group other
701 * than one of those that the current process subscribes to */ 703 * than one of those that the current process subscribes to */
702 if (gid != (gid_t) -1 && gid != key->gid && !in_group_p(gid)) 704 if (gid != (gid_t) -1 && gid != key->gid && !in_group_p(gid))
703 goto no_access; 705 goto error_put;
704 } 706 }
705 707
706 /* change the UID (have to update the quotas) */ 708 /* change the UID */
707 if (uid != (uid_t) -1 && uid != key->uid) { 709 if (uid != (uid_t) -1 && uid != key->uid) {
708 /* don't support UID changing yet */ 710 ret = -ENOMEM;
709 ret = -EOPNOTSUPP; 711 newowner = key_user_lookup(uid);
710 goto no_access; 712 if (!newowner)
713 goto error_put;
714
715 /* transfer the quota burden to the new user */
716 if (test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) {
717 spin_lock(&newowner->lock);
718 if (newowner->qnkeys + 1 >= KEYQUOTA_MAX_KEYS ||
719 newowner->qnbytes + key->quotalen >=
720 KEYQUOTA_MAX_BYTES)
721 goto quota_overrun;
722
723 newowner->qnkeys++;
724 newowner->qnbytes += key->quotalen;
725 spin_unlock(&newowner->lock);
726
727 spin_lock(&key->user->lock);
728 key->user->qnkeys--;
729 key->user->qnbytes -= key->quotalen;
730 spin_unlock(&key->user->lock);
731 }
732
733 atomic_dec(&key->user->nkeys);
734 atomic_inc(&newowner->nkeys);
735
736 if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
737 atomic_dec(&key->user->nikeys);
738 atomic_inc(&newowner->nikeys);
739 }
740
741 zapowner = key->user;
742 key->user = newowner;
743 key->uid = uid;
711 } 744 }
712 745
713 /* change the GID */ 746 /* change the GID */
@@ -716,12 +749,20 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid)
716 749
717 ret = 0; 750 ret = 0;
718 751
719 no_access: 752error_put:
720 up_write(&key->sem); 753 up_write(&key->sem);
721 key_put(key); 754 key_put(key);
722 error: 755 if (zapowner)
756 key_user_put(zapowner);
757error:
723 return ret; 758 return ret;
724 759
760quota_overrun:
761 spin_unlock(&newowner->lock);
762 zapowner = newowner;
763 ret = -EDQUOT;
764 goto error_put;
765
725} /* end keyctl_chown_key() */ 766} /* end keyctl_chown_key() */
726 767
727/*****************************************************************************/ 768/*****************************************************************************/
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 1357207fc9df..e8d02acc51e7 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -49,6 +49,7 @@ static inline unsigned keyring_hash(const char *desc)
49static int keyring_instantiate(struct key *keyring, 49static int keyring_instantiate(struct key *keyring,
50 const void *data, size_t datalen); 50 const void *data, size_t datalen);
51static int keyring_match(const struct key *keyring, const void *criterion); 51static int keyring_match(const struct key *keyring, const void *criterion);
52static void keyring_revoke(struct key *keyring);
52static void keyring_destroy(struct key *keyring); 53static void keyring_destroy(struct key *keyring);
53static void keyring_describe(const struct key *keyring, struct seq_file *m); 54static void keyring_describe(const struct key *keyring, struct seq_file *m);
54static long keyring_read(const struct key *keyring, 55static long keyring_read(const struct key *keyring,
@@ -59,6 +60,7 @@ struct key_type key_type_keyring = {
59 .def_datalen = sizeof(struct keyring_list), 60 .def_datalen = sizeof(struct keyring_list),
60 .instantiate = keyring_instantiate, 61 .instantiate = keyring_instantiate,
61 .match = keyring_match, 62 .match = keyring_match,
63 .revoke = keyring_revoke,
62 .destroy = keyring_destroy, 64 .destroy = keyring_destroy,
63 .describe = keyring_describe, 65 .describe = keyring_describe,
64 .read = keyring_read, 66 .read = keyring_read,
@@ -240,7 +242,7 @@ static long keyring_read(const struct key *keyring,
240 * allocate a keyring and link into the destination keyring 242 * allocate a keyring and link into the destination keyring
241 */ 243 */
242struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, 244struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
243 struct task_struct *ctx, int not_in_quota, 245 struct task_struct *ctx, unsigned long flags,
244 struct key *dest) 246 struct key *dest)
245{ 247{
246 struct key *keyring; 248 struct key *keyring;
@@ -249,7 +251,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
249 keyring = key_alloc(&key_type_keyring, description, 251 keyring = key_alloc(&key_type_keyring, description,
250 uid, gid, ctx, 252 uid, gid, ctx,
251 (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, 253 (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL,
252 not_in_quota); 254 flags);
253 255
254 if (!IS_ERR(keyring)) { 256 if (!IS_ERR(keyring)) {
255 ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); 257 ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
@@ -953,3 +955,22 @@ int keyring_clear(struct key *keyring)
953} /* end keyring_clear() */ 955} /* end keyring_clear() */
954 956
955EXPORT_SYMBOL(keyring_clear); 957EXPORT_SYMBOL(keyring_clear);
958
959/*****************************************************************************/
960/*
961 * dispose of the links from a revoked keyring
962 * - called with the key sem write-locked
963 */
964static void keyring_revoke(struct key *keyring)
965{
966 struct keyring_list *klist = keyring->payload.subscriptions;
967
968 /* adjust the quota */
969 key_payload_reserve(keyring, 0);
970
971 if (klist) {
972 rcu_assign_pointer(keyring->payload.subscriptions, NULL);
973 call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
974 }
975
976} /* end keyring_revoke() */
diff --git a/security/keys/proc.c b/security/keys/proc.c
index 12b750e51fbf..686a9ee0c5de 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -137,6 +137,13 @@ static int proc_keys_show(struct seq_file *m, void *v)
137 struct timespec now; 137 struct timespec now;
138 unsigned long timo; 138 unsigned long timo;
139 char xbuf[12]; 139 char xbuf[12];
140 int rc;
141
142 /* check whether the current task is allowed to view the key (assuming
143 * non-possession) */
144 rc = key_task_permission(make_key_ref(key, 0), current, KEY_VIEW);
145 if (rc < 0)
146 return 0;
140 147
141 now = current_kernel_time(); 148 now = current_kernel_time();
142 149
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 4d9825f9962c..32150cf7c37f 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -77,7 +77,8 @@ int alloc_uid_keyring(struct user_struct *user,
77 /* concoct a default session keyring */ 77 /* concoct a default session keyring */
78 sprintf(buf, "_uid_ses.%u", user->uid); 78 sprintf(buf, "_uid_ses.%u", user->uid);
79 79
80 session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, ctx, 0, NULL); 80 session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, ctx,
81 KEY_ALLOC_IN_QUOTA, NULL);
81 if (IS_ERR(session_keyring)) { 82 if (IS_ERR(session_keyring)) {
82 ret = PTR_ERR(session_keyring); 83 ret = PTR_ERR(session_keyring);
83 goto error; 84 goto error;
@@ -87,8 +88,8 @@ int alloc_uid_keyring(struct user_struct *user,
87 * keyring */ 88 * keyring */
88 sprintf(buf, "_uid.%u", user->uid); 89 sprintf(buf, "_uid.%u", user->uid);
89 90
90 uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, ctx, 0, 91 uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, ctx,
91 session_keyring); 92 KEY_ALLOC_IN_QUOTA, session_keyring);
92 if (IS_ERR(uid_keyring)) { 93 if (IS_ERR(uid_keyring)) {
93 key_put(session_keyring); 94 key_put(session_keyring);
94 ret = PTR_ERR(uid_keyring); 95 ret = PTR_ERR(uid_keyring);
@@ -144,7 +145,8 @@ int install_thread_keyring(struct task_struct *tsk)
144 145
145 sprintf(buf, "_tid.%u", tsk->pid); 146 sprintf(buf, "_tid.%u", tsk->pid);
146 147
147 keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, 1, NULL); 148 keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk,
149 KEY_ALLOC_QUOTA_OVERRUN, NULL);
148 if (IS_ERR(keyring)) { 150 if (IS_ERR(keyring)) {
149 ret = PTR_ERR(keyring); 151 ret = PTR_ERR(keyring);
150 goto error; 152 goto error;
@@ -178,7 +180,8 @@ int install_process_keyring(struct task_struct *tsk)
178 if (!tsk->signal->process_keyring) { 180 if (!tsk->signal->process_keyring) {
179 sprintf(buf, "_pid.%u", tsk->tgid); 181 sprintf(buf, "_pid.%u", tsk->tgid);
180 182
181 keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, 1, NULL); 183 keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk,
184 KEY_ALLOC_QUOTA_OVERRUN, NULL);
182 if (IS_ERR(keyring)) { 185 if (IS_ERR(keyring)) {
183 ret = PTR_ERR(keyring); 186 ret = PTR_ERR(keyring);
184 goto error; 187 goto error;
@@ -209,6 +212,7 @@ error:
209static int install_session_keyring(struct task_struct *tsk, 212static int install_session_keyring(struct task_struct *tsk,
210 struct key *keyring) 213 struct key *keyring)
211{ 214{
215 unsigned long flags;
212 struct key *old; 216 struct key *old;
213 char buf[20]; 217 char buf[20];
214 218
@@ -218,7 +222,12 @@ static int install_session_keyring(struct task_struct *tsk,
218 if (!keyring) { 222 if (!keyring) {
219 sprintf(buf, "_ses.%u", tsk->tgid); 223 sprintf(buf, "_ses.%u", tsk->tgid);
220 224
221 keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, 1, NULL); 225 flags = KEY_ALLOC_QUOTA_OVERRUN;
226 if (tsk->signal->session_keyring)
227 flags = KEY_ALLOC_IN_QUOTA;
228
229 keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk,
230 flags, NULL);
222 if (IS_ERR(keyring)) 231 if (IS_ERR(keyring))
223 return PTR_ERR(keyring); 232 return PTR_ERR(keyring);
224 } 233 }
@@ -728,7 +737,8 @@ long join_session_keyring(const char *name)
728 keyring = find_keyring_by_name(name, 0); 737 keyring = find_keyring_by_name(name, 0);
729 if (PTR_ERR(keyring) == -ENOKEY) { 738 if (PTR_ERR(keyring) == -ENOKEY) {
730 /* not found - try and create a new one */ 739 /* not found - try and create a new one */
731 keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk, 0, NULL); 740 keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk,
741 KEY_ALLOC_IN_QUOTA, NULL);
732 if (IS_ERR(keyring)) { 742 if (IS_ERR(keyring)) {
733 ret = PTR_ERR(keyring); 743 ret = PTR_ERR(keyring);
734 goto error2; 744 goto error2;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index eab66a06ca53..58d1efd4fc2c 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -48,8 +48,8 @@ static int call_sbin_request_key(struct key *key,
48 /* allocate a new session keyring */ 48 /* allocate a new session keyring */
49 sprintf(desc, "_req.%u", key->serial); 49 sprintf(desc, "_req.%u", key->serial);
50 50
51 keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 51 keyring = keyring_alloc(desc, current->fsuid, current->fsgid, current,
52 current, 1, NULL); 52 KEY_ALLOC_QUOTA_OVERRUN, NULL);
53 if (IS_ERR(keyring)) { 53 if (IS_ERR(keyring)) {
54 ret = PTR_ERR(keyring); 54 ret = PTR_ERR(keyring);
55 goto error_alloc; 55 goto error_alloc;
@@ -126,7 +126,8 @@ error_alloc:
126 */ 126 */
127static struct key *__request_key_construction(struct key_type *type, 127static struct key *__request_key_construction(struct key_type *type,
128 const char *description, 128 const char *description,
129 const char *callout_info) 129 const char *callout_info,
130 unsigned long flags)
130{ 131{
131 request_key_actor_t actor; 132 request_key_actor_t actor;
132 struct key_construction cons; 133 struct key_construction cons;
@@ -134,12 +135,12 @@ static struct key *__request_key_construction(struct key_type *type,
134 struct key *key, *authkey; 135 struct key *key, *authkey;
135 int ret, negated; 136 int ret, negated;
136 137
137 kenter("%s,%s,%s", type->name, description, callout_info); 138 kenter("%s,%s,%s,%lx", type->name, description, callout_info, flags);
138 139
139 /* create a key and add it to the queue */ 140 /* create a key and add it to the queue */
140 key = key_alloc(type, description, 141 key = key_alloc(type, description,
141 current->fsuid, current->fsgid, 142 current->fsuid, current->fsgid, current, KEY_POS_ALL,
142 current, KEY_POS_ALL, 0); 143 flags);
143 if (IS_ERR(key)) 144 if (IS_ERR(key))
144 goto alloc_failed; 145 goto alloc_failed;
145 146
@@ -258,15 +259,16 @@ alloc_failed:
258static struct key *request_key_construction(struct key_type *type, 259static struct key *request_key_construction(struct key_type *type,
259 const char *description, 260 const char *description,
260 struct key_user *user, 261 struct key_user *user,
261 const char *callout_info) 262 const char *callout_info,
263 unsigned long flags)
262{ 264{
263 struct key_construction *pcons; 265 struct key_construction *pcons;
264 struct key *key, *ckey; 266 struct key *key, *ckey;
265 267
266 DECLARE_WAITQUEUE(myself, current); 268 DECLARE_WAITQUEUE(myself, current);
267 269
268 kenter("%s,%s,{%d},%s", 270 kenter("%s,%s,{%d},%s,%lx",
269 type->name, description, user->uid, callout_info); 271 type->name, description, user->uid, callout_info, flags);
270 272
271 /* see if there's such a key under construction already */ 273 /* see if there's such a key under construction already */
272 down_write(&key_construction_sem); 274 down_write(&key_construction_sem);
@@ -282,7 +284,8 @@ static struct key *request_key_construction(struct key_type *type,
282 } 284 }
283 285
284 /* see about getting userspace to construct the key */ 286 /* see about getting userspace to construct the key */
285 key = __request_key_construction(type, description, callout_info); 287 key = __request_key_construction(type, description, callout_info,
288 flags);
286 error: 289 error:
287 kleave(" = %p", key); 290 kleave(" = %p", key);
288 return key; 291 return key;
@@ -389,14 +392,15 @@ static void request_key_link(struct key *key, struct key *dest_keyring)
389struct key *request_key_and_link(struct key_type *type, 392struct key *request_key_and_link(struct key_type *type,
390 const char *description, 393 const char *description,
391 const char *callout_info, 394 const char *callout_info,
392 struct key *dest_keyring) 395 struct key *dest_keyring,
396 unsigned long flags)
393{ 397{
394 struct key_user *user; 398 struct key_user *user;
395 struct key *key; 399 struct key *key;
396 key_ref_t key_ref; 400 key_ref_t key_ref;
397 401
398 kenter("%s,%s,%s,%p", 402 kenter("%s,%s,%s,%p,%lx",
399 type->name, description, callout_info, dest_keyring); 403 type->name, description, callout_info, dest_keyring, flags);
400 404
401 /* search all the process keyrings for a key */ 405 /* search all the process keyrings for a key */
402 key_ref = search_process_keyrings(type, description, type->match, 406 key_ref = search_process_keyrings(type, description, type->match,
@@ -429,7 +433,8 @@ struct key *request_key_and_link(struct key_type *type,
429 /* ask userspace (returns NULL if it waited on a key 433 /* ask userspace (returns NULL if it waited on a key
430 * being constructed) */ 434 * being constructed) */
431 key = request_key_construction(type, description, 435 key = request_key_construction(type, description,
432 user, callout_info); 436 user, callout_info,
437 flags);
433 if (key) 438 if (key)
434 break; 439 break;
435 440
@@ -485,7 +490,8 @@ struct key *request_key(struct key_type *type,
485 const char *description, 490 const char *description,
486 const char *callout_info) 491 const char *callout_info)
487{ 492{
488 return request_key_and_link(type, description, callout_info, NULL); 493 return request_key_and_link(type, description, callout_info, NULL,
494 KEY_ALLOC_IN_QUOTA);
489 495
490} /* end request_key() */ 496} /* end request_key() */
491 497
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index cb9817ced3fd..cbf58a91b00a 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -187,7 +187,7 @@ struct key *request_key_auth_new(struct key *target, const char *callout_info)
187 authkey = key_alloc(&key_type_request_key_auth, desc, 187 authkey = key_alloc(&key_type_request_key_auth, desc,
188 current->fsuid, current->fsgid, current, 188 current->fsuid, current->fsgid, current,
189 KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | 189 KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
190 KEY_USR_VIEW, 1); 190 KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA);
191 if (IS_ERR(authkey)) { 191 if (IS_ERR(authkey)) {
192 ret = PTR_ERR(authkey); 192 ret = PTR_ERR(authkey);
193 goto error_alloc; 193 goto error_alloc;
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 8e71895b97a7..5bbfdebb7acf 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -28,6 +28,7 @@ struct key_type key_type_user = {
28 .instantiate = user_instantiate, 28 .instantiate = user_instantiate,
29 .update = user_update, 29 .update = user_update,
30 .match = user_match, 30 .match = user_match,
31 .revoke = user_revoke,
31 .destroy = user_destroy, 32 .destroy = user_destroy,
32 .describe = user_describe, 33 .describe = user_describe,
33 .read = user_read, 34 .read = user_read,
@@ -67,6 +68,7 @@ error:
67 return ret; 68 return ret;
68 69
69} /* end user_instantiate() */ 70} /* end user_instantiate() */
71
70EXPORT_SYMBOL_GPL(user_instantiate); 72EXPORT_SYMBOL_GPL(user_instantiate);
71 73
72/*****************************************************************************/ 74/*****************************************************************************/
@@ -141,7 +143,28 @@ EXPORT_SYMBOL_GPL(user_match);
141 143
142/*****************************************************************************/ 144/*****************************************************************************/
143/* 145/*
144 * dispose of the data dangling from the corpse of a user 146 * dispose of the links from a revoked keyring
147 * - called with the key sem write-locked
148 */
149void user_revoke(struct key *key)
150{
151 struct user_key_payload *upayload = key->payload.data;
152
153 /* clear the quota */
154 key_payload_reserve(key, 0);
155
156 if (upayload) {
157 rcu_assign_pointer(key->payload.data, NULL);
158 call_rcu(&upayload->rcu, user_update_rcu_disposal);
159 }
160
161} /* end user_revoke() */
162
163EXPORT_SYMBOL(user_revoke);
164
165/*****************************************************************************/
166/*
167 * dispose of the data dangling from the corpse of a user key
145 */ 168 */
146void user_destroy(struct key *key) 169void user_destroy(struct key *key)
147{ 170{
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 79c16e31c884..ac7f2b2e3924 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1099,6 +1099,17 @@ static int may_create(struct inode *dir,
1099 FILESYSTEM__ASSOCIATE, &ad); 1099 FILESYSTEM__ASSOCIATE, &ad);
1100} 1100}
1101 1101
1102/* Check whether a task can create a key. */
1103static int may_create_key(u32 ksid,
1104 struct task_struct *ctx)
1105{
1106 struct task_security_struct *tsec;
1107
1108 tsec = ctx->security;
1109
1110 return avc_has_perm(tsec->sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL);
1111}
1112
1102#define MAY_LINK 0 1113#define MAY_LINK 0
1103#define MAY_UNLINK 1 1114#define MAY_UNLINK 1
1104#define MAY_RMDIR 2 1115#define MAY_RMDIR 2
@@ -1521,8 +1532,9 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm)
1521 /* Default to the current task SID. */ 1532 /* Default to the current task SID. */
1522 bsec->sid = tsec->sid; 1533 bsec->sid = tsec->sid;
1523 1534
1524 /* Reset create SID on execve. */ 1535 /* Reset create and sockcreate SID on execve. */
1525 tsec->create_sid = 0; 1536 tsec->create_sid = 0;
1537 tsec->sockcreate_sid = 0;
1526 1538
1527 if (tsec->exec_sid) { 1539 if (tsec->exec_sid) {
1528 newsid = tsec->exec_sid; 1540 newsid = tsec->exec_sid;
@@ -2574,9 +2586,10 @@ static int selinux_task_alloc_security(struct task_struct *tsk)
2574 tsec2->osid = tsec1->osid; 2586 tsec2->osid = tsec1->osid;
2575 tsec2->sid = tsec1->sid; 2587 tsec2->sid = tsec1->sid;
2576 2588
2577 /* Retain the exec and create SIDs across fork */ 2589 /* Retain the exec, create, and sock SIDs across fork */
2578 tsec2->exec_sid = tsec1->exec_sid; 2590 tsec2->exec_sid = tsec1->exec_sid;
2579 tsec2->create_sid = tsec1->create_sid; 2591 tsec2->create_sid = tsec1->create_sid;
2592 tsec2->sockcreate_sid = tsec1->sockcreate_sid;
2580 2593
2581 /* Retain ptracer SID across fork, if any. 2594 /* Retain ptracer SID across fork, if any.
2582 This will be reset by the ptrace hook upon any 2595 This will be reset by the ptrace hook upon any
@@ -2926,12 +2939,14 @@ static int selinux_socket_create(int family, int type,
2926{ 2939{
2927 int err = 0; 2940 int err = 0;
2928 struct task_security_struct *tsec; 2941 struct task_security_struct *tsec;
2942 u32 newsid;
2929 2943
2930 if (kern) 2944 if (kern)
2931 goto out; 2945 goto out;
2932 2946
2933 tsec = current->security; 2947 tsec = current->security;
2934 err = avc_has_perm(tsec->sid, tsec->sid, 2948 newsid = tsec->sockcreate_sid ? : tsec->sid;
2949 err = avc_has_perm(tsec->sid, newsid,
2935 socket_type_to_security_class(family, type, 2950 socket_type_to_security_class(family, type,
2936 protocol), SOCKET__CREATE, NULL); 2951 protocol), SOCKET__CREATE, NULL);
2937 2952
@@ -2944,12 +2959,14 @@ static void selinux_socket_post_create(struct socket *sock, int family,
2944{ 2959{
2945 struct inode_security_struct *isec; 2960 struct inode_security_struct *isec;
2946 struct task_security_struct *tsec; 2961 struct task_security_struct *tsec;
2962 u32 newsid;
2947 2963
2948 isec = SOCK_INODE(sock)->i_security; 2964 isec = SOCK_INODE(sock)->i_security;
2949 2965
2950 tsec = current->security; 2966 tsec = current->security;
2967 newsid = tsec->sockcreate_sid ? : tsec->sid;
2951 isec->sclass = socket_type_to_security_class(family, type, protocol); 2968 isec->sclass = socket_type_to_security_class(family, type, protocol);
2952 isec->sid = kern ? SECINITSID_KERNEL : tsec->sid; 2969 isec->sid = kern ? SECINITSID_KERNEL : newsid;
2953 isec->initialized = 1; 2970 isec->initialized = 1;
2954 2971
2955 return; 2972 return;
@@ -4150,6 +4167,10 @@ static int selinux_getprocattr(struct task_struct *p,
4150 sid = tsec->exec_sid; 4167 sid = tsec->exec_sid;
4151 else if (!strcmp(name, "fscreate")) 4168 else if (!strcmp(name, "fscreate"))
4152 sid = tsec->create_sid; 4169 sid = tsec->create_sid;
4170 else if (!strcmp(name, "keycreate"))
4171 sid = tsec->keycreate_sid;
4172 else if (!strcmp(name, "sockcreate"))
4173 sid = tsec->sockcreate_sid;
4153 else 4174 else
4154 return -EINVAL; 4175 return -EINVAL;
4155 4176
@@ -4182,6 +4203,10 @@ static int selinux_setprocattr(struct task_struct *p,
4182 error = task_has_perm(current, p, PROCESS__SETEXEC); 4203 error = task_has_perm(current, p, PROCESS__SETEXEC);
4183 else if (!strcmp(name, "fscreate")) 4204 else if (!strcmp(name, "fscreate"))
4184 error = task_has_perm(current, p, PROCESS__SETFSCREATE); 4205 error = task_has_perm(current, p, PROCESS__SETFSCREATE);
4206 else if (!strcmp(name, "keycreate"))
4207 error = task_has_perm(current, p, PROCESS__SETKEYCREATE);
4208 else if (!strcmp(name, "sockcreate"))
4209 error = task_has_perm(current, p, PROCESS__SETSOCKCREATE);
4185 else if (!strcmp(name, "current")) 4210 else if (!strcmp(name, "current"))
4186 error = task_has_perm(current, p, PROCESS__SETCURRENT); 4211 error = task_has_perm(current, p, PROCESS__SETCURRENT);
4187 else 4212 else
@@ -4211,6 +4236,13 @@ static int selinux_setprocattr(struct task_struct *p,
4211 tsec->exec_sid = sid; 4236 tsec->exec_sid = sid;
4212 else if (!strcmp(name, "fscreate")) 4237 else if (!strcmp(name, "fscreate"))
4213 tsec->create_sid = sid; 4238 tsec->create_sid = sid;
4239 else if (!strcmp(name, "keycreate")) {
4240 error = may_create_key(sid, p);
4241 if (error)
4242 return error;
4243 tsec->keycreate_sid = sid;
4244 } else if (!strcmp(name, "sockcreate"))
4245 tsec->sockcreate_sid = sid;
4214 else if (!strcmp(name, "current")) { 4246 else if (!strcmp(name, "current")) {
4215 struct av_decision avd; 4247 struct av_decision avd;
4216 4248
@@ -4264,7 +4296,8 @@ static int selinux_setprocattr(struct task_struct *p,
4264 4296
4265#ifdef CONFIG_KEYS 4297#ifdef CONFIG_KEYS
4266 4298
4267static int selinux_key_alloc(struct key *k, struct task_struct *tsk) 4299static int selinux_key_alloc(struct key *k, struct task_struct *tsk,
4300 unsigned long flags)
4268{ 4301{
4269 struct task_security_struct *tsec = tsk->security; 4302 struct task_security_struct *tsec = tsk->security;
4270 struct key_security_struct *ksec; 4303 struct key_security_struct *ksec;
@@ -4274,7 +4307,10 @@ static int selinux_key_alloc(struct key *k, struct task_struct *tsk)
4274 return -ENOMEM; 4307 return -ENOMEM;
4275 4308
4276 ksec->obj = k; 4309 ksec->obj = k;
4277 ksec->sid = tsec->sid; 4310 if (tsec->keycreate_sid)
4311 ksec->sid = tsec->keycreate_sid;
4312 else
4313 ksec->sid = tsec->sid;
4278 k->security = ksec; 4314 k->security = ksec;
4279 4315
4280 return 0; 4316 return 0;
@@ -4513,8 +4549,10 @@ static __init int selinux_init(void)
4513 4549
4514#ifdef CONFIG_KEYS 4550#ifdef CONFIG_KEYS
4515 /* Add security information to initial keyrings */ 4551 /* Add security information to initial keyrings */
4516 security_key_alloc(&root_user_keyring, current); 4552 selinux_key_alloc(&root_user_keyring, current,
4517 security_key_alloc(&root_session_keyring, current); 4553 KEY_ALLOC_NOT_IN_QUOTA);
4554 selinux_key_alloc(&root_session_keyring, current,
4555 KEY_ALLOC_NOT_IN_QUOTA);
4518#endif 4556#endif
4519 4557
4520 return 0; 4558 return 0;
diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h
index bc020bde6c86..7c9b58380833 100644
--- a/security/selinux/include/av_perm_to_string.h
+++ b/security/selinux/include/av_perm_to_string.h
@@ -72,6 +72,8 @@
72 S_(SECCLASS_PROCESS, PROCESS__EXECMEM, "execmem") 72 S_(SECCLASS_PROCESS, PROCESS__EXECMEM, "execmem")
73 S_(SECCLASS_PROCESS, PROCESS__EXECSTACK, "execstack") 73 S_(SECCLASS_PROCESS, PROCESS__EXECSTACK, "execstack")
74 S_(SECCLASS_PROCESS, PROCESS__EXECHEAP, "execheap") 74 S_(SECCLASS_PROCESS, PROCESS__EXECHEAP, "execheap")
75 S_(SECCLASS_PROCESS, PROCESS__SETKEYCREATE, "setkeycreate")
76 S_(SECCLASS_PROCESS, PROCESS__SETSOCKCREATE, "setsockcreate")
75 S_(SECCLASS_MSGQ, MSGQ__ENQUEUE, "enqueue") 77 S_(SECCLASS_MSGQ, MSGQ__ENQUEUE, "enqueue")
76 S_(SECCLASS_MSG, MSG__SEND, "send") 78 S_(SECCLASS_MSG, MSG__SEND, "send")
77 S_(SECCLASS_MSG, MSG__RECEIVE, "receive") 79 S_(SECCLASS_MSG, MSG__RECEIVE, "receive")
@@ -248,3 +250,4 @@
248 S_(SECCLASS_KEY, KEY__SEARCH, "search") 250 S_(SECCLASS_KEY, KEY__SEARCH, "search")
249 S_(SECCLASS_KEY, KEY__LINK, "link") 251 S_(SECCLASS_KEY, KEY__LINK, "link")
250 S_(SECCLASS_KEY, KEY__SETATTR, "setattr") 252 S_(SECCLASS_KEY, KEY__SETATTR, "setattr")
253 S_(SECCLASS_KEY, KEY__CREATE, "create")
diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h
index 1205227a3a33..69fd4b48202c 100644
--- a/security/selinux/include/av_permissions.h
+++ b/security/selinux/include/av_permissions.h
@@ -467,6 +467,8 @@
467#define PROCESS__EXECMEM 0x02000000UL 467#define PROCESS__EXECMEM 0x02000000UL
468#define PROCESS__EXECSTACK 0x04000000UL 468#define PROCESS__EXECSTACK 0x04000000UL
469#define PROCESS__EXECHEAP 0x08000000UL 469#define PROCESS__EXECHEAP 0x08000000UL
470#define PROCESS__SETKEYCREATE 0x10000000UL
471#define PROCESS__SETSOCKCREATE 0x20000000UL
470 472
471#define IPC__CREATE 0x00000001UL 473#define IPC__CREATE 0x00000001UL
472#define IPC__DESTROY 0x00000002UL 474#define IPC__DESTROY 0x00000002UL
@@ -966,4 +968,4 @@
966#define KEY__SEARCH 0x00000008UL 968#define KEY__SEARCH 0x00000008UL
967#define KEY__LINK 0x00000010UL 969#define KEY__LINK 0x00000010UL
968#define KEY__SETATTR 0x00000020UL 970#define KEY__SETATTR 0x00000020UL
969 971#define KEY__CREATE 0x00000040UL
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 8f5547ad1856..cf54a304169a 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -32,6 +32,8 @@ struct task_security_struct {
32 u32 sid; /* current SID */ 32 u32 sid; /* current SID */
33 u32 exec_sid; /* exec SID */ 33 u32 exec_sid; /* exec SID */
34 u32 create_sid; /* fscreate SID */ 34 u32 create_sid; /* fscreate SID */
35 u32 keycreate_sid; /* keycreate SID */
36 u32 sockcreate_sid; /* fscreate SID */
35 u32 ptrace_sid; /* SID of ptrace parent */ 37 u32 ptrace_sid; /* SID of ptrace parent */
36}; 38};
37 39