aboutsummaryrefslogtreecommitdiffstats
Commit message (Collapse)AuthorAge
* Merge branch 'master' of ↵David S. Miller2009-11-23
|\ | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6
| * rfkill: fix miscdev opsJohannes Berg2009-11-23
| | | | | | | | | | | | | | | | | | | | | | The /dev/rfkill ops don't refer to the module, so it is possible to unload the module while file descriptors are open. Fix this oversight. Reported-by: Maxim Levitsky <maximlevitsky@gmail.com> Cc: stable@kernel.org Signed-off-by: Johannes Berg <johannes@sipsolutions.net> Signed-off-by: John W. Linville <linville@tuxdriver.com>
| * ath9k: set ps_default as falseJohn W. Linville2009-11-23
| | | | | | | | | | | | | | | | Copied from original one-line patch here: http://bugzilla.kernel.org/show_bug.cgi?id=14267#c26 Signed-off-by: John W. Linville <linville@tuxdriver.com>
| * mac80211: fix resumeJohannes Berg2009-11-19
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When mac80211 resumes, it currently first sets suspended to false so the driver can start doing things and we can receive frames. However, if we actually receive frames then it can end up starting some work which adds timers and then later runs into a BUG_ON in the timer code because it tries add_timer() on a pending timer. Fix this by keeping track of the resuming process by introducing a new variable 'resuming' which gets set to true early on instead of setting 'suspended' to false, and allow queueing work but not receiving frames while resuming. Reported-by: Maxim Levitsky <maximlevitsky@gmail.com> Signed-off-by: Johannes Berg <johannes@sipsolutions.net> Signed-off-by: John W. Linville <linville@tuxdriver.com>
| * mac80211: fix addba timer (again...)Johannes Berg2009-11-18
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | commit 2171abc58644e09dbba546d91366b12743115396 Author: Johannes Berg <johannes@sipsolutions.net> Date: Thu Oct 29 08:34:00 2009 +0100 mac80211: fix addba timer left a problem in there, even if the timer was never started it could be deleted and then added. Linus pointed out that del_timer_sync() isn't actually needed if we make the timer able to deal with no longer being needed when it gets queued _while_ we're in the locked section that also deletes it. For that the timer function only needs to check the HT_ADDBA_RECEIVED_MSK bit as well as the HT_ADDBA_REQUESTED_MSK bit, only if the former is clear should it do anything. Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Johannes Berg <johannes@sipsolutions.net> Signed-off-by: John W. Linville <linville@tuxdriver.com>
* | hso: fix soft-lockupAntti Kaijanmäki2009-11-23
| | | | | | | | | | | | | | | | | | | | | | | | Fix soft-lockup in hso.c which is triggered on SMP machine when modem is removed while file descriptor(s) under /dev are still open: old version called kref_put() too early which resulted in destroying hso_serial and hso_device objects which were still used later on. Signed-off-by: Antti Kaijanmäki <antti.kaijanmaki@nomovok.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
* | hso: fix debug routinesAntti Kaijanmäki2009-11-23
| | | | | | | | | | | | Signed-off-by: Antti Kaijanmäki <antti.kaijanmaki@nomovok.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
* | pktgen: Fix device name comparesEric Dumazet2009-11-23
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Commit e6fce5b916cd7f7f7 (pktgen: multiqueue etc.) tried to relax the pktgen restriction of one device per kernel thread, adding a '@' tag to device names. Problem is we dont perform check on full pktgen device name. This allows adding many time same 'device' to pktgen thread pgset "add_device eth0@0" one session later : pgset "add_device eth0@0" (This doesnt find previous device) This consumes ~1.5 MBytes of vmalloc memory per round and also triggers this warning : [ 673.186380] proc_dir_entry 'pktgen/eth0@0' already registered [ 673.186383] Modules linked in: pktgen ixgbe ehci_hcd psmouse mdio mousedev evdev [last unloaded: pktgen] [ 673.186406] Pid: 6219, comm: bash Tainted: G W 2.6.32-rc7-03302-g41cec6f-dirty #16 [ 673.186410] Call Trace: [ 673.186417] [<ffffffff8104a29b>] warn_slowpath_common+0x7b/0xc0 [ 673.186422] [<ffffffff8104a341>] warn_slowpath_fmt+0x41/0x50 [ 673.186426] [<ffffffff8114e789>] proc_register+0x109/0x210 [ 673.186433] [<ffffffff8100bf2e>] ? apic_timer_interrupt+0xe/0x20 [ 673.186438] [<ffffffff8114e905>] proc_create_data+0x75/0xd0 [ 673.186444] [<ffffffffa006ad38>] pktgen_thread_write+0x568/0x640 [pktgen] [ 673.186449] [<ffffffffa006a7d0>] ? pktgen_thread_write+0x0/0x640 [pktgen] [ 673.186453] [<ffffffff81149144>] proc_reg_write+0x84/0xc0 [ 673.186458] [<ffffffff810f5a58>] vfs_write+0xb8/0x180 [ 673.186463] [<ffffffff810f5c11>] sys_write+0x51/0x90 [ 673.186468] [<ffffffff8100b51b>] system_call_fastpath+0x16/0x1b [ 673.186470] ---[ end trace ccbb991b0a8d994d ]--- Solution to this problem is to use a odevname field (includes @ tag and suffix), instead of using netdevice name. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Robert Olsson <robert.olsson@its.uu.se> Signed-off-by: David S. Miller <davem@davemloft.net>
* | stmmac: do not fail when the timer cannot be used.Giuseppe CAVALLARO2009-11-23
| | | | | | | | | | | | | | | | If the external timer cannot be used the driver will continue to work without mitigation. Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | stmmac: fixed a compilation error when use the external timerGiuseppe CAVALLARO2009-11-23
| | | | | | | | | | Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | Merge branch 'master' of ↵David S. Miller2009-11-23
|\ \ | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6
| * | netfilter: xt_limit: fix invalid return code in limit_mt_check()Patrick McHardy2009-11-23
| | | | | | | | | | | | | | | | | | | | | Commit acc738fe (netfilter: xtables: avoid pointer to self) introduced an invalid return value in limit_mt_check(). Signed-off-by: Patrick McHardy <kaber@trash.net>
| * | netfilter: nf_log: fix sleeping function called from invalid context in ↵Wu Fengguang2009-11-13
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | seq_show() [ 171.925285] BUG: sleeping function called from invalid context at kernel/mutex.c:280 [ 171.925296] in_atomic(): 1, irqs_disabled(): 0, pid: 671, name: grep [ 171.925306] 2 locks held by grep/671: [ 171.925312] #0: (&p->lock){+.+.+.}, at: [<c10b8acd>] seq_read+0x25/0x36c [ 171.925340] #1: (rcu_read_lock){.+.+..}, at: [<c1391dac>] seq_start+0x0/0x44 [ 171.925372] Pid: 671, comm: grep Not tainted 2.6.31.6-4-netbook #3 [ 171.925380] Call Trace: [ 171.925398] [<c105104e>] ? __debug_show_held_locks+0x1e/0x20 [ 171.925414] [<c10264ac>] __might_sleep+0xfb/0x102 [ 171.925430] [<c1461521>] mutex_lock_nested+0x1c/0x2ad [ 171.925444] [<c1391c9e>] seq_show+0x74/0x127 [ 171.925456] [<c10b8c5c>] seq_read+0x1b4/0x36c [ 171.925469] [<c10b8aa8>] ? seq_read+0x0/0x36c [ 171.925483] [<c10d5c8e>] proc_reg_read+0x60/0x74 [ 171.925496] [<c10d5c2e>] ? proc_reg_read+0x0/0x74 [ 171.925510] [<c10a4468>] vfs_read+0x87/0x110 [ 171.925523] [<c10a458a>] sys_read+0x3b/0x60 [ 171.925538] [<c1002a49>] syscall_call+0x7/0xb Fix it by replacing RCU with nf_log_mutex. Reported-by: "Yin, Kangkai" <kangkai.yin@intel.com> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
| * | netfilter: xt_osf: fix xt_osf_remove_callback() return valueRoel Kluin2009-11-13
| | | | | | | | | | | | | | | | | | | | | Return a negative error value. Signed-off-by: Roel Kluin <roel.kluin@gmail.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
* | | Au1x00: fix crash when trying register_netdev()Alexander Beregalov2009-11-22
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Andreas Lohre reported that the driver crashes when trying to register_netdev(), he sugessted to move dev->netdev_ops initialization before calling register_netdev(), it worked for him. Reported-by: Andreas Lohre <alohre@gmail.com> Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | netxen : fix BOND_MODE_TLB/ALB mode.Narender Kumar2009-11-21
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | o Along with netdev->perm_addr, mac address will be maintained in device private structure. o Device limitation: We need to set mac address when ever interface comes up. In ALB/TAL mode, bonding driver calls set_mac for all slave with bond mac address. But bonding driver set netdev->dev_addr field to its original value, after enslaving interfaces. When ever active slave changes, it swap dev_addr of inactive slave with active. Yet it doesn't notify driver about change in netdev->dev_addr. As netxen driver need to set mac addr when ever interface comes up, it can't rely on netdev->dev_addr field. Specially in case of bonding mode ALB/TLB. Signed-off-by: Narender Kumar <narender.kumar@qlogic.com> Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | netxen: fix promisc for NX2031.Narender Kumar2009-11-21
| | | | | | | | | | | | | | | | | | | | | | | | | | | Kernel crashes, if promisc mode set without disabling rx queue. Before changing mode in NX2031 chip, wait till rx queue drains. Signed-off-by: Narender Kumar <narender.kumar@qlogic.com> Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | netxen: fix memory initializationAmit Kumar Salecha2009-11-21
| | | | | | | | | | | | | | | | | | | | | | | | Avoid resetting memory during initialization, skip this memory block during driver probe. Signed-off-by: Amit Kumar Salecha <amit@netxen.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | TI DaVinci EMAC: Minor macro related updateschaithrika@ti.com2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | Use BIT for macro definitions wherever possible, remove unused and redundant macros. Signed-off-by: Chaithrika U S <chaithrika@ti.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | drivers/net: ks8851_mll ethernet network driver -resubmitDavid J. Choi2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Summary of Changes: -Fix to receive multicast packets by setting the corresponding hardware bit during initialization. -Fix to re-enable the interface [by interface up command(ifup)] while the interface is down. -Fix to be able to down the interface by passing the last parameter correctly to request_irq(). -Remove to read 4 extra bytes from the receiving queue after reading a packet, even though it does not cause a predictable issue now. -Remove occurrences of transmission done interrupt in order to tx throughput enhancement. -Enable IP checksum for packet receiving by setting the corresponding hardware bit during initialization. -Relocate ks_enable_int()/ks_disable_int() in order not to declare those functions at the beginning of the file. -Rename ks_enable()/_disable() to ks_enable_qmu()/ks_disable_qmu() in order to give more meaningful names and relocate them not declaire those functions at the beginning of the file. Signed-off-by: David J. Choi <david.choi@micrel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | net: ETHOC should depend on HAS_DMAGeert Uytterhoeven2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When building for Sun 3: drivers/net/ethoc.c:1091: undefined reference to `dma_free_coherent' drivers/built-in.o: In function `ethoc_probe': drivers/net/ethoc.c:965: undefined reference to `dma_alloc_coherent' drivers/net/ethoc.c:1063: undefined reference to `dma_free_coherent' Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | e1000e: do not initiate autonegotiation during OEM configurationBruce Allan2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When configuring the OEM bits in the PHY on 82577/82578, do not restart autonegotiation if the firmware is blocking it (e.g. when an IDE-R session is active) because the link must not go down. Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | e1000e: remove unnecessary 82577 workaround causing link issuesBruce Allan2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | A workaround for pre-release versions of 82577 is causing link issues on some switches. The workaround is no longer needed on production parts so remove it. Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | e1000e: flow control thresholds not correct when changing mtuBruce Allan2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | When changing MTU, save it off prior to resetting otherwise the flow control thresholds may be miscalculated. Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | e1000e: add Tx timeout factor for 100MbpsBruce Allan2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | On some devices (e.g. 82578) not having a Tx timeout factor when linked at 100Mbps can cause false reports of hardware hangs on busy hubs. Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | e1000e: set flow control thresholds properly after enabling/disabling pauseBruce Allan2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When flow control (pause) parameters were changed via ethtool (i.e. enabled or disabled), the newly calculated thresholds were not being written to the device for non-fiber media. Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | e1000e: read of PHY register may access wrong page on 82578Bruce Allan2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Remove unnecessary workaround that mistakenly does not perform a page select operation for PHY registers 29 and 30 (assuming these are the PHY debug port address and data registers) on 82578 which can cause reads of the Transmit with No Carrier Sense statistics register on page 778 to be read from an incorrect page. Also error out if the page select operation fails. Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | e1000e: partial revert of 3ec2a2b8 plus FC workraround for 82577/8Bruce Allan2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Commit 3ec2a2b80f3eb53851fe4cef9e65b5d33376ef89 broke Tx/Rx when using jumbo frames on certain parts (i.e. only PAUSE frames could be exchanged once the high water mark was reached preventing normal packet traffic). This patch reverts the breakage and sets appropriate high and low water marks of the Rx FIFO for 82577/82578 which require a workaround due to a flow control issue in hardware. Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | ixgbe: move tc variable to CONFIG_IXGBE_DCBJaswinder Singh Rajput2009-11-20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | tc is required by CONFIG_IXGBE_DCB. This also fixes compilation warning: drivers/net/ixgbe/ixgbe_main.c: In function ‘ixgbe_tx_is_paused’: drivers/net/ixgbe/ixgbe_main.c:245: warning: unused variable ‘tc’ Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | netfilter: nf_log: fix sleeping function called from invalid context in ↵Patrick McHardy2009-11-19
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | seq_show() [ 171.925285] BUG: sleeping function called from invalid context at kernel/mutex.c:280 [ 171.925296] in_atomic(): 1, irqs_disabled(): 0, pid: 671, name: grep [ 171.925306] 2 locks held by grep/671: [ 171.925312] #0: (&p->lock){+.+.+.}, at: [<c10b8acd>] seq_read+0x25/0x36c [ 171.925340] #1: (rcu_read_lock){.+.+..}, at: [<c1391dac>] seq_start+0x0/0x44 [ 171.925372] Pid: 671, comm: grep Not tainted 2.6.31.6-4-netbook #3 [ 171.925380] Call Trace: [ 171.925398] [<c105104e>] ? __debug_show_held_locks+0x1e/0x20 [ 171.925414] [<c10264ac>] __might_sleep+0xfb/0x102 [ 171.925430] [<c1461521>] mutex_lock_nested+0x1c/0x2ad [ 171.925444] [<c1391c9e>] seq_show+0x74/0x127 [ 171.925456] [<c10b8c5c>] seq_read+0x1b4/0x36c [ 171.925469] [<c10b8aa8>] ? seq_read+0x0/0x36c [ 171.925483] [<c10d5c8e>] proc_reg_read+0x60/0x74 [ 171.925496] [<c10d5c2e>] ? proc_reg_read+0x0/0x74 [ 171.925510] [<c10a4468>] vfs_read+0x87/0x110 [ 171.925523] [<c10a458a>] sys_read+0x3b/0x60 [ 171.925538] [<c1002a49>] syscall_call+0x7/0xb Fix it by replacing RCU with nf_log_mutex. Reported-by: "Yin, Kangkai" <kangkai.yin@intel.com> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | netfilter: xt_osf: fix xt_osf_remove_callback() return valuePatrick McHardy2009-11-19
| | | | | | | | | | | | | | | | | | | | | | | | Return a negative error value. Signed-off-by: Roel Kluin <roel.kluin@gmail.com> Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | veth: Fix veth_get_stats()Eric Dumazet2009-11-19
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | veth_get_stats() can be called in parallel on several cpus. It's better to not reset dev->stats as it could give wrong result on one cpu. Use temporary variables, then store the final results. Also, we should loop on every possible cpus, not only online cpus, or cpu hotplug can suddenly give wrong veth stats. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | ieee802154: dont leak skbs in ieee802154_fake_xmit()Eric Dumazet2009-11-19
| | | | | | | | | | | | | | | | | | | | | | | | ieee802154_fake_xmit() should free skbs since it returns NETDEV_TX_OK Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
* | | Merge branch 'fixes' of ↵Linus Torvalds2009-11-18
|\ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq * 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq: [CPUFREQ] Fix stale cpufreq_cpu_governor pointer [CPUFREQ] Resolve time unit thinko in ondemand/conservative govs [CPUFREQ] speedstep-ich: fix error caused by 394122ab144dae4b276d74644a2f11c44a60ac5c [CPUFREQ] Fix use after free on governor restore [CPUFREQ] acpi-cpufreq: blacklist Intel 0f68: Fix HT detection and put in notification message [CPUFREQ] powernow-k8: Fix test in get_transition_latency() [CPUFREQ] longhaul: select Longhaul version 2 for capable CPUs
| * | | [CPUFREQ] Fix stale cpufreq_cpu_governor pointerPrarit Bhargava2009-11-17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Dave, Attached is an update of my patch against the cpufreq fixes branch. Before applying the patch I compiled and booted the tree to see if the panic was still there -- to my surprise it was not. This is because of the conversion of cpufreq_cpu_governor to a char[]. While the panic is kaput, the problem of stale data continues and my patch is still valid. It is possible to end up with the wrong governor after hotplug events because CPUFREQ_DEFAULT_GOVERNOR is statically linked to a default, while the cpu siblings may have had a different governor assigned by a user. ie) the patch is still needed in order to keep the governors assigned properly when hotplugging devices Signed-off-by: Prarit Bhargava <prarit@redhat.com> Signed-off-by: Dave Jones <davej@redhat.com>
| * | | [CPUFREQ] Resolve time unit thinko in ondemand/conservative govsPallipadi, Venkatesh2009-11-17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ondemand and conservative governors are messing up time units in the code path where NO_HZ is not enabled and ignore_nice is set. The walltime idletime stored is in jiffies and nice time calculation is happening in microseconds. The problem was reported and diagnosed by Alexander here. http://marc.info/?l=linux-kernel&m=125752550404513&w=2 The patch below fixes this thinko. Reported-by: Alexander Miller <Miller@fmi.uni-stuttgart.de> Tested-by: Alexander Miller <Miller@fmi.uni-stuttgart.de> Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> Signed-off-by: Dave Jones <davej@redhat.com>
| * | | [CPUFREQ] speedstep-ich: fix error caused by ↵Rusty Russell2009-11-17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 394122ab144dae4b276d74644a2f11c44a60ac5c "[CPUFREQ] cpumask: avoid playing with cpus_allowed in speedstep-ich.c" changed the code to mistakenly pass the current cpu as the "processor" argument of speedstep_get_frequency(), whereas it should be the type of the processor. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14340 Based on a patch by Dave Mueller. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: Dominik Brodowski <linux@brodo.de> Reported-by: Dave Mueller <dave.mueller@gmx.ch> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Dave Jones <davej@redhat.com>
| * | | [CPUFREQ] Fix use after free on governor restoreDmitry Monakhov2009-11-17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Currently on governer backup/restore path we storing governor's pointer. This is wrong because one may unload governor's module after cpu goes offline. As result use-after-free will take place on restored cpu. It is not easy to exploit this bug, but still we have to close this issue ASAP. Issue was introduced by following commit 084f34939424161669467c19280dbcf637730314 ##TESTCASE## #!/bin/sh -x modprobe acpi_cpufreq # Any non default governor, in may case it is "ondemand" modprobe cpufreq_ondemand echo ondemand > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor rmmod acpi_cpufreq rmmod cpufreq_ondemand modprobe acpi_cpufreq # << use-after-free here. Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: Dave Jones <davej@redhat.com>
| * | | [CPUFREQ] acpi-cpufreq: blacklist Intel 0f68: Fix HT detection and put in ↵John Villalovos2009-11-17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | notification message Removing the SMT/HT check, since the Errata doesn't mention Hyper-Threading. Adding in a printk, so that the user knows why acpi-cpufreq refuses to load. Also, once system is blacklisted, don't repeat checks to see if blacklisted. This also causes the message to only be printed once, rather than for each CPU. Signed-off-by: John L. Villalovos <john.l.villalovos@intel.com> Signed-off-by: Dave Jones <davej@redhat.com>
| * | | [CPUFREQ] powernow-k8: Fix test in get_transition_latency()Roel Kluin2009-11-17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Not makes it a bool before the comparison. Signed-off-by: Roel Kluin <roel.kluin@gmail.com> Signed-off-by: Dave Jones <davej@redhat.com>
| * | | [CPUFREQ] longhaul: select Longhaul version 2 for capable CPUsKrzysztof Helt2009-11-17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | There is a typo in the longhaul detection code so only Longhaul v1 or Longhaul v3 is selected. The Longhaul v2 is not selected even for CPUs which are capable of. Tested on PCChips Giga Pro board. Frequency changes work and the Longhaul v2 detects that the board is not capable of changing CPU voltage. Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl> Signed-off-by: Dave Jones <davej@redhat.com>
* | | | strcmp: fix overflow and possibly signedness errorLinus Torvalds2009-11-18
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Doing the strcmp return value as signed char __res = *cs - *ct; is wrong for two reasons. The subtraction can overflow because __res doesn't use a type big enough. Moreover the compared bytes should be interpreted as unsigned char as specified by POSIX. The same problem is fixed in strncmp. Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> Cc: Michael Buesch <mb@bu3sch.de> Cc: Andreas Schwab <schwab@linux-m68k.org> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
* | | | Merge branch 'agp-fixes' of ↵Linus Torvalds2009-11-18
|\ \ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/airlied/agp-2.6 * 'agp-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/agp-2.6: agp/intel-agp: Set dma_mask for capable chipsets before agp_add_bridge()
| * | | | agp/intel-agp: Set dma_mask for capable chipsets before agp_add_bridge()David Woodhouse2009-11-18
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | We should set this before calling agp_add_bridge() so that it's done before we map the scratch page too. This should probably fix the regression reported as k.o. bug #14627. Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
* | | | | Merge branch 'for-linus' of ↵Linus Torvalds2009-11-18
|\ \ \ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6 * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6: ima: replace GFP_KERNEL with GFP_NOFS
| * | | | | ima: replace GFP_KERNEL with GFP_NOFSMimi Zohar2009-11-18
| | |/ / / | |/| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | While running fsstress tests on the NFSv4 mounted ext3 and ext4 filesystem, the following call trace was generated on the nfs server machine. Replace GFP_KERNEL with GFP_NOFS in ima_iint_insert() to avoid a potential deadlock. ================================= [ INFO: inconsistent lock state ] 2.6.31-31.el6.x86_64 #1 --------------------------------- inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. kswapd2/75 [HC0[0]:SC0[0]:HE1:SE1] takes: (jbd2_handle){+.+.?.}, at: [<ffffffff811edd5e>] jbd2_journal_start+0xfe/0x13f {RECLAIM_FS-ON-W} state was registered at: [<ffffffff81091e40>] mark_held_locks+0x65/0x99 [<ffffffff81091f31>] lockdep_trace_alloc+0xbd/0xf5 [<ffffffff81126fdd>] kmem_cache_alloc+0x40/0x185 [<ffffffff812344d7>] ima_iint_insert+0x3d/0xf1 [<ffffffff812345b0>] ima_inode_alloc+0x25/0x44 [<ffffffff811484ac>] inode_init_always+0xec/0x271 [<ffffffff81148682>] alloc_inode+0x51/0xa1 [<ffffffff81148700>] new_inode+0x2e/0x94 [<ffffffff811b2f08>] ext4_new_inode+0xb8/0xdc9 [<ffffffff811be611>] ext4_create+0xcf/0x175 [<ffffffff8113e2cd>] vfs_create+0x82/0xb8 [<ffffffff8113f337>] do_filp_open+0x32c/0x9ee [<ffffffff811309b9>] do_sys_open+0x6c/0x12c [<ffffffff81130adc>] sys_open+0x2e/0x44 [<ffffffff81011e42>] system_call_fastpath+0x16/0x1b [<ffffffffffffffff>] 0xffffffffffffffff irq event stamp: 90371 hardirqs last enabled at (90371): [<ffffffff8112708d>] kmem_cache_alloc+0xf0/0x185 hardirqs last disabled at (90370): [<ffffffff81127026>] kmem_cache_alloc+0x89/0x185 softirqs last enabled at (89492): [<ffffffff81068ecf>] __do_softirq+0x1bf/0x1eb softirqs last disabled at (89477): [<ffffffff8101312c>] call_softirq+0x1c/0x30 other info that might help us debug this: 2 locks held by kswapd2/75: #0: (shrinker_rwsem){++++..}, at: [<ffffffff810f98ba>] shrink_slab+0x44/0x177 #1: (&type->s_umount_key#25){++++..}, at: [<ffffffff811450ba>] Reported-by: Muni P. Beerakam <mbeeraka@in.ibm.com> Reported-by: Amit K. Arora <amitarora@in.ibm.com> Cc: stable@kernel.org Signed-off-by: Mimi Zohar <zohar@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
* | | | | Merge branch 'omap-fixes-for-linus' of ↵Linus Torvalds2009-11-18
|\ \ \ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap-2.6 * 'omap-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap-2.6: OMAP: cs should be positive in gpmc_cs_free() omap: fix unlikely(x) < y omap3: clock: Fixed dpll3_m2x2 rate calculation omap3: clock: Fix the DPLL freqsel computations omap: Fix keymap for zoom2 according to matrix keypad framwork
| * | | | | OMAP: cs should be positive in gpmc_cs_free()Roel Kluin2009-11-18
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The index `cs' is signed, test whether it is negative before we release gpmc_cs_mem[cs]. Signed-off-by: Roel Kluin <roel.kluin@gmail.com> Cc: Russell King <rmk@arm.linux.org.uk> Signed-off-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
| * | | | | omap: fix unlikely(x) < yRoel Kluin2009-11-18
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The closing parenthesis was not in the right location. Signed-off-by: Roel Kluin <roel.kluin@gmail.com> Cc: Russell King <rmk@arm.linux.org.uk> Signed-off-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
| * | | | | omap3: clock: Fixed dpll3_m2x2 rate calculationTero Kristo2009-11-17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Current calculation does not take into account any changes to M2 divisor, and thus when we change VDD2 OPP, dpll3_m2x2 rate does not change. Fixed by re-routing dpll3_m2x2 parent to dpll3_m2. Signed-off-by: Tero Kristo <tero.kristo@nokia.com> Signed-off-by: Paul Walmsley <paul@pwsan.com> Signed-off-by: Tony Lindgren <tony@atomide.com>
kwa">if (list_empty(&inode_unused)) break; inode = list_entry(inode_unused.prev, struct inode, i_list); if (inode->i_state || atomic_read(&inode->i_count)) { list_move(&inode->i_list, &inode_unused); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { __iget(inode); spin_unlock(&inode_lock); if (remove_inode_buffers(inode)) reap += invalidate_inode_pages(&inode->i_data); iput(inode); spin_lock(&inode_lock); if (inode != list_entry(inode_unused.next, struct inode, i_list)) continue; /* wrong inode or list_empty */ if (!can_unuse(inode)) continue; } list_move(&inode->i_list, &freeable); inode->i_state |= I_FREEING; nr_pruned++; } inodes_stat.nr_unused -= nr_pruned; spin_unlock(&inode_lock); dispose_list(&freeable); up(&iprune_sem); if (current_is_kswapd()) mod_page_state(kswapd_inodesteal, reap); else mod_page_state(pginodesteal, reap); } /* * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, * "unused" means that no dentries are referring to the inodes: the files are * not open and the dcache references to those inodes have already been * reclaimed. * * This function is passed the number of inodes to scan, and it returns the * total number of remaining possibly-reclaimable inodes. */ static int shrink_icache_memory(int nr, gfp_t gfp_mask) { if (nr) { /* * Nasty deadlock avoidance. We may hold various FS locks, * and we don't want to recurse into the FS that called us * in clear_inode() and friends.. */ if (!(gfp_mask & __GFP_FS)) return -1; prune_icache(nr); } return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } static void __wait_on_freeing_inode(struct inode *inode); /* * Called with the inode lock held. * NOTE: we are not increasing the inode-refcount, you must call __iget() * by hand after calling find_inode now! This simplifies iunique and won't * add any additional branch in the common code. */ static struct inode * find_inode(struct super_block * sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data) { struct hlist_node *node; struct inode * inode = NULL; repeat: hlist_for_each (node, head) { inode = hlist_entry(node, struct inode, i_hash); if (inode->i_sb != sb) continue; if (!test(inode, data)) continue; if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } break; } return node ? inode : NULL; } /* * find_inode_fast is the fast path version of find_inode, see the comment at * iget_locked for details. */ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head *head, unsigned long ino) { struct hlist_node *node; struct inode * inode = NULL; repeat: hlist_for_each (node, head) { inode = hlist_entry(node, struct inode, i_hash); if (inode->i_ino != ino) continue; if (inode->i_sb != sb) continue; if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } break; } return node ? inode : NULL; } /** * new_inode - obtain an inode * @sb: superblock * * Allocates a new inode for given superblock. */ struct inode *new_inode(struct super_block *sb) { static unsigned long last_ino; struct inode * inode; spin_lock_prefetch(&inode_lock); inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); } return inode; } EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) { /* * This is special! We do not need the spinlock * when clearing I_LOCK, because we're guaranteed * that nobody else tries to do anything about the * state of the inode when it is locked, as we * just created it (so there can be no old holders * that haven't tested I_LOCK). */ inode->i_state &= ~(I_LOCK|I_NEW); wake_up_inode(inode); } EXPORT_SYMBOL(unlock_new_inode); /* * This is called without the inode lock held.. Be careful. * * We no longer cache the sb_flags in i_flags - see fs.h * -- rmk@arm.uk.linux.org */ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { struct inode * inode; inode = alloc_inode(sb); if (inode) { struct inode * old; spin_lock(&inode_lock); /* We released the lock, so.. */ old = find_inode(sb, head, test, data); if (!old) { if (set(inode, data)) goto set_failed; inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); hlist_add_head(&inode->i_hash, head); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents */ return inode; } /* * Uhhuh, somebody else created the same inode under * us. Use the old inode instead of the one we just * allocated. */ __iget(old); spin_unlock(&inode_lock); destroy_inode(inode); inode = old; wait_on_inode(inode); } return inode; set_failed: spin_unlock(&inode_lock); destroy_inode(inode); return NULL; } /* * get_new_inode_fast is the fast path version of get_new_inode, see the * comment at iget_locked for details. */ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) { struct inode * inode; inode = alloc_inode(sb); if (inode) { struct inode * old; spin_lock(&inode_lock); /* We released the lock, so.. */ old = find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); hlist_add_head(&inode->i_hash, head); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents */ return inode; } /* * Uhhuh, somebody else created the same inode under * us. Use the old inode instead of the one we just * allocated. */ __iget(old); spin_unlock(&inode_lock); destroy_inode(inode); inode = old; wait_on_inode(inode); } return inode; } static inline unsigned long hash(struct super_block *sb, unsigned long hashval) { unsigned long tmp; tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / L1_CACHE_BYTES; tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); return tmp & I_HASHMASK; } /** * iunique - get a unique inode number * @sb: superblock * @max_reserved: highest reserved inode number * * Obtain an inode number that is unique on the system for a given * superblock. This is used by file systems that have no natural * permanent inode numbering system. An inode number is returned that * is higher than the reserved limit but unique. * * BUGS: * With a large number of inodes live on the file system this function * currently becomes quite slow. */ ino_t iunique(struct super_block *sb, ino_t max_reserved) { static ino_t counter; struct inode *inode; struct hlist_head * head; ino_t res; spin_lock(&inode_lock); retry: if (counter > max_reserved) { head = inode_hashtable + hash(sb,counter); res = counter++; inode = find_inode_fast(sb, head, res); if (!inode) { spin_unlock(&inode_lock); return res; } } else { counter = max_reserved + 1; } goto retry; } EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { spin_lock(&inode_lock); if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) __iget(inode); else /* * Handle the case where s_op->clear_inode is not been * called yet, and somebody is calling igrab * while the inode is getting freed. */ inode = NULL; spin_unlock(&inode_lock); return inode; } EXPORT_SYMBOL(igrab); /** * ifind - internal function, you want ilookup5() or iget5(). * @sb: super block of file system to search * @head: the head of the list to search * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test * @wait: if true wait for the inode to be unlocked, if false do not * * ifind() searches for the inode specified by @data in the inode * cache. This is a generalized version of ifind_fast() for file systems where * the inode number is not sufficient for unique identification of an inode. * * If the inode is in the cache, the inode is returned with an incremented * reference count. * * Otherwise NULL is returned. * * Note, @test is called with the inode_lock held, so can't sleep. */ static inline struct inode *ifind(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data, const int wait) { struct inode *inode; spin_lock(&inode_lock); inode = find_inode(sb, head, test, data); if (inode) { __iget(inode); spin_unlock(&inode_lock); if (likely(wait)) wait_on_inode(inode); return inode; } spin_unlock(&inode_lock); return NULL; } /** * ifind_fast - internal function, you want ilookup() or iget(). * @sb: super block of file system to search * @head: head of the list to search * @ino: inode number to search for * * ifind_fast() searches for the inode @ino in the inode cache. This is for * file systems where the inode number is sufficient for unique identification * of an inode. * * If the inode is in the cache, the inode is returned with an incremented * reference count. * * Otherwise NULL is returned. */ static inline struct inode *ifind_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) { struct inode *inode; spin_lock(&inode_lock); inode = find_inode_fast(sb, head, ino); if (inode) { __iget(inode); spin_unlock(&inode_lock); wait_on_inode(inode); return inode; } spin_unlock(&inode_lock); return NULL; } /** * ilookup5_nowait - search for an inode in the inode cache * @sb: super block of file system to search * @hashval: hash value (usually inode number) to search for * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test * * ilookup5() uses ifind() to search for the inode specified by @hashval and * @data in the inode cache. This is a generalized version of ilookup() for * file systems where the inode number is not sufficient for unique * identification of an inode. * * If the inode is in the cache, the inode is returned with an incremented * reference count. Note, the inode lock is not waited upon so you have to be * very careful what you do with the returned inode. You probably should be * using ilookup5() instead. * * Otherwise NULL is returned. * * Note, @test is called with the inode_lock held, so can't sleep. */ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); return ifind(sb, head, test, data, 0); } EXPORT_SYMBOL(ilookup5_nowait); /** * ilookup5 - search for an inode in the inode cache * @sb: super block of file system to search * @hashval: hash value (usually inode number) to search for * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test * * ilookup5() uses ifind() to search for the inode specified by @hashval and * @data in the inode cache. This is a generalized version of ilookup() for * file systems where the inode number is not sufficient for unique * identification of an inode. * * If the inode is in the cache, the inode lock is waited upon and the inode is * returned with an incremented reference count. * * Otherwise NULL is returned. * * Note, @test is called with the inode_lock held, so can't sleep. */ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); return ifind(sb, head, test, data, 1); } EXPORT_SYMBOL(ilookup5); /** * ilookup - search for an inode in the inode cache * @sb: super block of file system to search * @ino: inode number to search for * * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. * This is for file systems where the inode number is sufficient for unique * identification of an inode. * * If the inode is in the cache, the inode is returned with an incremented * reference count. * * Otherwise NULL is returned. */ struct inode *ilookup(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); return ifind_fast(sb, head, ino); } EXPORT_SYMBOL(ilookup); /** * iget5_locked - obtain an inode from a mounted file system * @sb: super block of file system * @hashval: hash value (usually inode number) to get * @test: callback used for comparisons between inodes * @set: callback used to initialize a new struct inode * @data: opaque data pointer to pass to @test and @set * * This is iget() without the read_inode() portion of get_new_inode(). * * iget5_locked() uses ifind() to search for the inode specified by @hashval * and @data in the inode cache and if present it is returned with an increased * reference count. This is a generalized version of iget_locked() for file * systems where the inode number is not sufficient for unique identification * of an inode. * * If the inode is not in cache, get_new_inode() is called to allocate a new * inode and this is returned locked, hashed, and with the I_NEW flag set. The * file system gets to fill it in before unlocking it via unlock_new_inode(). * * Note both @test and @set are called with the inode_lock held, so can't sleep. */ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; inode = ifind(sb, head, test, data, 1); if (inode) return inode; /* * get_new_inode() will do the right thing, re-trying the search * in case it had to block at any point. */ return get_new_inode(sb, head, test, set, data); } EXPORT_SYMBOL(iget5_locked); /** * iget_locked - obtain an inode from a mounted file system * @sb: super block of file system * @ino: inode number to get * * This is iget() without the read_inode() portion of get_new_inode_fast(). * * iget_locked() uses ifind_fast() to search for the inode specified by @ino in * the inode cache and if present it is returned with an increased reference * count. This is for file systems where the inode number is sufficient for * unique identification of an inode. * * If the inode is not in cache, get_new_inode_fast() is called to allocate a * new inode and this is returned locked, hashed, and with the I_NEW flag set. * The file system gets to fill it in before unlocking it via * unlock_new_inode(). */ struct inode *iget_locked(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; inode = ifind_fast(sb, head, ino); if (inode) return inode; /* * get_new_inode_fast() will do the right thing, re-trying the search * in case it had to block at any point. */ return get_new_inode_fast(sb, head, ino); } EXPORT_SYMBOL(iget_locked); /** * __insert_inode_hash - hash an inode * @inode: unhashed inode * @hashval: unsigned long value used to locate this object in the * inode_hashtable. * * Add an inode to the inode hash for this superblock. */ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); spin_lock(&inode_lock); hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); } EXPORT_SYMBOL(__insert_inode_hash); /** * remove_inode_hash - remove an inode from the hash * @inode: inode to unhash * * Remove an inode from the superblock. */ void remove_inode_hash(struct inode *inode) { spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); spin_unlock(&inode_lock); } EXPORT_SYMBOL(remove_inode_hash); /* * Tell the filesystem that this inode is no longer of any interest and should * be completely destroyed. * * We leave the inode in the inode hash table until *after* the filesystem's * ->delete_inode completes. This ensures that an iget (such as nfsd might * instigate) will always find up-to-date information either in the hash or on * disk. * * I_FREEING is set so that no-one will take a new reference to the inode while * it is being deleted. */ void generic_delete_inode(struct inode *inode) { struct super_operations *op = inode->i_sb->s_op; list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); inode->i_state|=I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); security_inode_delete(inode); if (op->delete_inode) { void (*delete)(struct inode *) = op->delete_inode; if (!is_bad_inode(inode)) DQUOT_INIT(inode); /* Filesystems implementing their own * s_op->delete_inode are required to call * truncate_inode_pages and clear_inode() * internally */ delete(inode); } else { truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); } spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); spin_unlock(&inode_lock); wake_up_inode(inode); if (inode->i_state != I_CLEAR) BUG(); destroy_inode(inode); } EXPORT_SYMBOL(generic_delete_inode); static void generic_forget_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; if (!hlist_unhashed(&inode->i_hash)) { if (!(inode->i_state & (I_DIRTY|I_LOCK))) list_move(&inode->i_list, &inode_unused); inodes_stat.nr_unused++; if (!sb || (sb->s_flags & MS_ACTIVE)) { spin_unlock(&inode_lock); return; } inode->i_state |= I_WILL_FREE; spin_unlock(&inode_lock); write_inode_now(inode, 1); spin_lock(&inode_lock); inode->i_state &= ~I_WILL_FREE; inodes_stat.nr_unused--; hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); destroy_inode(inode); } /* * Normal UNIX filesystem behaviour: delete the * inode when the usage count drops to zero, and * i_nlink is zero. */ void generic_drop_inode(struct inode *inode) { if (!inode->i_nlink) generic_delete_inode(inode); else generic_forget_inode(inode); } EXPORT_SYMBOL_GPL(generic_drop_inode); /* * Called when we're dropping the last reference * to an inode. * * Call the FS "drop()" function, defaulting to * the legacy UNIX filesystem behaviour.. * * NOTE! NOTE! NOTE! We're called with the inode lock * held, and the drop function is supposed to release * the lock! */ static inline void iput_final(struct inode *inode) { struct super_operations *op = inode->i_sb->s_op; void (*drop)(struct inode *) = generic_drop_inode; if (op && op->drop_inode) drop = op->drop_inode; drop(inode); } /** * iput - put an inode * @inode: inode to put * * Puts an inode, dropping its usage count. If the inode use count hits * zero, the inode is then freed and may also be destroyed. * * Consequently, iput() can sleep. */ void iput(struct inode *inode) { if (inode) { struct super_operations *op = inode->i_sb->s_op; BUG_ON(inode->i_state == I_CLEAR); if (op && op->put_inode) op->put_inode(inode); if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) iput_final(inode); } } EXPORT_SYMBOL(iput); /** * bmap - find a block number in a file * @inode: inode of file * @block: block to find * * Returns the block number on the device holding the inode that * is the disk block number for the block of the file requested. * That is, asked for block 4 of inode 1 the function will return the * disk block relative to the disk start that holds that block of the * file. */ sector_t bmap(struct inode * inode, sector_t block) { sector_t res = 0; if (inode->i_mapping->a_ops->bmap) res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); return res; } EXPORT_SYMBOL(bmap); /** * update_atime - update the access time * @inode: inode accessed * * Update the accessed time on an inode and mark it for writeback. * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ void update_atime(struct inode *inode) { struct timespec now; if (IS_NOATIME(inode)) return; if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode)) return; if (IS_RDONLY(inode)) return; now = current_fs_time(inode->i_sb); if (!timespec_equal(&inode->i_atime, &now)) { inode->i_atime = now; mark_inode_dirty_sync(inode); } } EXPORT_SYMBOL(update_atime); /** * inode_update_time - update mtime and ctime time * @inode: inode accessed * @ctime_too: update ctime too * * Update the mtime time on an inode and mark it for writeback. * When ctime_too is specified update the ctime too. */ void inode_update_time(struct inode *inode, int ctime_too) { struct timespec now; int sync_it = 0; if (IS_NOCMTIME(inode)) return; if (IS_RDONLY(inode)) return; now = current_fs_time(inode->i_sb); if (!timespec_equal(&inode->i_mtime, &now)) sync_it = 1; inode->i_mtime = now; if (ctime_too) { if (!timespec_equal(&inode->i_ctime, &now)) sync_it = 1; inode->i_ctime = now; } if (sync_it) mark_inode_dirty_sync(inode); } EXPORT_SYMBOL(inode_update_time); int inode_needs_sync(struct inode *inode) { if (IS_SYNC(inode)) return 1; if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) return 1; return 0; } EXPORT_SYMBOL(inode_needs_sync); /* * Quota functions that want to walk the inode lists.. */ #ifdef CONFIG_QUOTA /* Function back in dquot.c */ int remove_inode_dquot_ref(struct inode *, int, struct list_head *); void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head) { struct inode *inode; if (!sb->dq_op) return; /* nothing to do */ spin_lock(&inode_lock); /* This lock is for inodes code */ /* * We don't have to lock against quota code - test IS_QUOTAINIT is * just for speedup... */ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) if (!IS_NOQUOTA(inode)) remove_inode_dquot_ref(inode, type, tofree_head); spin_unlock(&inode_lock); } #endif int inode_wait(void *word) { schedule(); return 0; } /* * If we try to find an inode in the inode hash while it is being * deleted, we have to wait until the filesystem completes its * deletion before reporting that it isn't found. This function waits * until the deletion _might_ have completed. Callers are responsible * to recheck inode state. * * It doesn't matter if I_LOCK is not set initially, a call to * wake_up_inode() after removing from the hash list will DTRT. * * This is called with inode_lock held. */ static void __wait_on_freeing_inode(struct inode *inode) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); wq = bit_waitqueue(&inode->i_state, __I_LOCK); prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); spin_unlock(&inode_lock); schedule(); finish_wait(wq, &wait.wait); spin_lock(&inode_lock); } void wake_up_inode(struct inode *inode) { /* * Prevent speculative execution through spin_unlock(&inode_lock); */ smp_mb(); wake_up_bit(&inode->i_state, __I_LOCK); } static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { if (!str) return 0; ihash_entries = simple_strtoul(str, &str, 0); return 1; } __setup("ihash_entries=", set_ihash_entries); /* * Initialize the waitqueues and inode hash table. */ void __init inode_init_early(void) { int loop; /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ if (hashdist) return; inode_hashtable = alloc_large_system_hash("Inode-cache", sizeof(struct hlist_head), ihash_entries, 14, HASH_EARLY, &i_hash_shift, &i_hash_mask, 0); for (loop = 0; loop < (1 << i_hash_shift); loop++) INIT_HLIST_HEAD(&inode_hashtable[loop]); } void __init inode_init(unsigned long mempages) { int loop; /* inode slab cache */ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), 0, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_once, NULL); set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); /* Hash may have been set up in inode_init_early */ if (!hashdist) return; inode_hashtable = alloc_large_system_hash("Inode-cache", sizeof(struct hlist_head), ihash_entries, 14, 0, &i_hash_shift, &i_hash_mask, 0); for (loop = 0; loop < (1 << i_hash_shift); loop++) INIT_HLIST_HEAD(&inode_hashtable[loop]); } void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_mode = mode; if (S_ISCHR(mode)) { inode->i_fop = &def_chr_fops; inode->i_rdev = rdev; } else if (S_ISBLK(mode)) { inode->i_fop = &def_blk_fops; inode->i_rdev = rdev; } else if (S_ISFIFO(mode)) inode->i_fop = &def_fifo_fops; else if (S_ISSOCK(mode)) inode->i_fop = &bad_sock_fops; else printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", mode); } EXPORT_SYMBOL(init_special_inode);