aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-11-02 19:07:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-11-02 19:07:27 -0400
commit092f4c56c1927e4b61a41ee8055005f1cb437009 (patch)
tree616ceb54b7671ccc13922ae9e002b8b972f6e09e
parent80c2861672bbf000f6af838656959ee937e4ee4d (diff)
parentc1e2ee2dc436574880758b3836fc96935b774c32 (diff)
Merge branch 'akpm' (Andrew's incoming - part two)
Says Andrew: "60 patches. That's good enough for -rc1 I guess. I have quite a lot of detritus to be rechecked, work through maintainers, etc. - most of the remains of MM - rtc - various misc - cgroups - memcg - cpusets - procfs - ipc - rapidio - sysctl - pps - w1 - drivers/misc - aio" * akpm: (60 commits) memcg: replace ss->id_lock with a rwlock aio: allocate kiocbs in batches drivers/misc/vmw_balloon.c: fix typo in code comment drivers/misc/vmw_balloon.c: determine page allocation flag can_sleep outside loop w1: disable irqs in critical section drivers/w1/w1_int.c: multiple masters used same init_name drivers/power/ds2780_battery.c: fix deadlock upon insertion and removal drivers/power/ds2780_battery.c: add a nolock function to w1 interface drivers/power/ds2780_battery.c: create central point for calling w1 interface w1: ds2760 and ds2780, use ida for id and ida_simple_get() to get it pps gpio client: add missing dependency pps: new client driver using GPIO pps: default echo function include/linux/dma-mapping.h: add dma_zalloc_coherent() sysctl: make CONFIG_SYSCTL_SYSCALL default to n sysctl: add support for poll() RapidIO: documentation update drivers/net/rionet.c: fix ethernet address macros for LE platforms RapidIO: fix potential null deref in rio_setup_device() RapidIO: add mport driver for Tsi721 bridge ...
-rw-r--r--Documentation/DMA-API.txt7
-rw-r--r--Documentation/cgroups/memory.txt1
-rw-r--r--Documentation/feature-removal-schedule.txt35
-rw-r--r--Documentation/rapidio/rapidio.txt2
-rw-r--r--Documentation/rapidio/tsi721.txt49
-rw-r--r--arch/powerpc/mm/gup.c12
-rw-r--r--arch/powerpc/mm/hugetlbpage.c21
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c1
-rw-r--r--arch/s390/mm/gup.c14
-rw-r--r--arch/sparc/mm/gup.c2
-rw-r--r--arch/x86/mm/gup.c10
-rw-r--r--drivers/misc/vmw_balloon.c4
-rw-r--r--drivers/net/rionet.c4
-rw-r--r--drivers/power/ds2780_battery.c86
-rw-r--r--drivers/pps/clients/Kconfig9
-rw-r--r--drivers/pps/clients/Makefile1
-rw-r--r--drivers/pps/clients/pps-gpio.c227
-rw-r--r--drivers/pps/clients/pps-ktimer.c12
-rw-r--r--drivers/pps/clients/pps_parport.c9
-rw-r--r--drivers/pps/kapi.c20
-rw-r--r--drivers/rapidio/Kconfig6
-rw-r--r--drivers/rapidio/Makefile1
-rw-r--r--drivers/rapidio/devices/Kconfig10
-rw-r--r--drivers/rapidio/devices/Makefile5
-rw-r--r--drivers/rapidio/devices/tsi721.c2360
-rw-r--r--drivers/rapidio/devices/tsi721.h766
-rw-r--r--drivers/rapidio/rio-scan.c6
-rw-r--r--drivers/rtc/class.c32
-rw-r--r--drivers/rtc/rtc-ds1307.c27
-rw-r--r--drivers/rtc/rtc-mc13xxx.c6
-rw-r--r--drivers/w1/slaves/w1_ds2760.c48
-rw-r--r--drivers/w1/slaves/w1_ds2780.c96
-rw-r--r--drivers/w1/slaves/w1_ds2780.h2
-rw-r--r--drivers/w1/w1_int.c1
-rw-r--r--drivers/w1/w1_io.c5
-rw-r--r--fs/aio.c136
-rw-r--r--fs/binfmt_elf.c11
-rw-r--r--fs/hfs/btree.c20
-rw-r--r--fs/isofs/inode.c10
-rw-r--r--fs/proc/base.c146
-rw-r--r--fs/proc/proc_sysctl.c46
-rw-r--r--fs/ramfs/inode.c10
-rw-r--r--include/linux/aio.h1
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/dma-mapping.h10
-rw-r--r--include/linux/magic.h10
-rw-r--r--include/linux/memcontrol.h44
-rw-r--r--include/linux/mm.h67
-rw-r--r--include/linux/mm_types.h21
-rw-r--r--include/linux/pps-gpio.h32
-rw-r--r--include/linux/rio_ids.h1
-rw-r--r--include/linux/sem.h49
-rw-r--r--include/linux/sysctl.h22
-rw-r--r--include/linux/utsname.h16
-rw-r--r--init/Kconfig4
-rw-r--r--init/do_mounts.c48
-rw-r--r--init/do_mounts_rd.c14
-rw-r--r--ipc/sem.c56
-rw-r--r--kernel/cgroup.c39
-rw-r--r--kernel/cpuset.c9
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/utsname_sysctl.c23
-rw-r--r--lib/idr.c11
-rw-r--r--mm/huge_memory.c37
-rw-r--r--mm/internal.h46
-rw-r--r--mm/memcontrol.c1006
-rw-r--r--mm/memory.c2
-rw-r--r--mm/page_cgroup.c9
-rw-r--r--mm/swap.c83
-rw-r--r--mm/vmscan.c4
70 files changed, 4909 insertions, 1035 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index fe2326906610..66bd97a95f10 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -50,6 +50,13 @@ specify the GFP_ flags (see kmalloc) for the allocation (the
50implementation may choose to ignore flags that affect the location of 50implementation may choose to ignore flags that affect the location of
51the returned memory, like GFP_DMA). 51the returned memory, like GFP_DMA).
52 52
53void *
54dma_zalloc_coherent(struct device *dev, size_t size,
55 dma_addr_t *dma_handle, gfp_t flag)
56
57Wraps dma_alloc_coherent() and also zeroes the returned memory if the
58allocation attempt succeeded.
59
53void 60void
54dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, 61dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
55 dma_addr_t dma_handle) 62 dma_addr_t dma_handle)
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 06eb6d957c83..cc0ebc5241b3 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -418,7 +418,6 @@ total_unevictable - sum of all children's "unevictable"
418 418
419# The following additional stats are dependent on CONFIG_DEBUG_VM. 419# The following additional stats are dependent on CONFIG_DEBUG_VM.
420 420
421inactive_ratio - VM internal parameter. (see mm/page_alloc.c)
422recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) 421recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
423recent_rotated_file - VM internal parameter. (see mm/vmscan.c) 422recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
424recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) 423recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 7c799fc5b88e..3d849122b5b1 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -133,41 +133,6 @@ Who: Pavel Machek <pavel@ucw.cz>
133 133
134--------------------------- 134---------------------------
135 135
136What: sys_sysctl
137When: September 2010
138Option: CONFIG_SYSCTL_SYSCALL
139Why: The same information is available in a more convenient from
140 /proc/sys, and none of the sysctl variables appear to be
141 important performance wise.
142
143 Binary sysctls are a long standing source of subtle kernel
144 bugs and security issues.
145
146 When I looked several months ago all I could find after
147 searching several distributions were 5 user space programs and
148 glibc (which falls back to /proc/sys) using this syscall.
149
150 The man page for sysctl(2) documents it as unusable for user
151 space programs.
152
153 sysctl(2) is not generally ABI compatible to a 32bit user
154 space application on a 64bit and a 32bit kernel.
155
156 For the last several months the policy has been no new binary
157 sysctls and no one has put forward an argument to use them.
158
159 Binary sysctls issues seem to keep happening appearing so
160 properly deprecating them (with a warning to user space) and a
161 2 year grace warning period will mean eventually we can kill
162 them and end the pain.
163
164 In the mean time individual binary sysctls can be dealt with
165 in a piecewise fashion.
166
167Who: Eric Biederman <ebiederm@xmission.com>
168
169---------------------------
170
171What: /proc/<pid>/oom_adj 136What: /proc/<pid>/oom_adj
172When: August 2012 137When: August 2012
173Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's 138Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's
diff --git a/Documentation/rapidio/rapidio.txt b/Documentation/rapidio/rapidio.txt
index be70ee15f8ca..c75694b35d08 100644
--- a/Documentation/rapidio/rapidio.txt
+++ b/Documentation/rapidio/rapidio.txt
@@ -144,7 +144,7 @@ and the default device ID in order to access the device on the active port.
144 144
145After the host has completed enumeration of the entire network it releases 145After the host has completed enumeration of the entire network it releases
146devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint 146devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint
147in the system, it sets the Master Enable bit in the Port General Control CSR 147in the system, it sets the Discovered bit in the Port General Control CSR
148to indicate that enumeration is completed and agents are allowed to execute 148to indicate that enumeration is completed and agents are allowed to execute
149passive discovery of the network. 149passive discovery of the network.
150 150
diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/rapidio/tsi721.txt
new file mode 100644
index 000000000000..335f3c6087dc
--- /dev/null
+++ b/Documentation/rapidio/tsi721.txt
@@ -0,0 +1,49 @@
1RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
2=========================================================================
3
4I. Overview
5
6This driver implements all currently defined RapidIO mport callback functions.
7It supports maintenance read and write operations, inbound and outbound RapidIO
8doorbells, inbound maintenance port-writes and RapidIO messaging.
9
10To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA
11channels. This mechanism provides access to larger range of hop counts and
12destination IDs without need for changes in outbound window translation.
13
14RapidIO messaging support uses dedicated messaging channels for each mailbox.
15For inbound messages this driver uses destination ID matching to forward messages
16into the corresponding message queue. Messaging callbacks are implemented to be
17fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
18
19II. Known problems
20
21 None.
22
23III. To do
24
25 Add DMA data transfers (non-messaging).
26 Add inbound region (SRIO-to-PCIe) mapping.
27
28IV. Version History
29
30 1.0.0 - Initial driver release.
31
32V. License
33-----------------------------------------------
34
35 Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
36
37 This program is free software; you can redistribute it and/or modify it
38 under the terms of the GNU General Public License as published by the Free
39 Software Foundation; either version 2 of the License, or (at your option)
40 any later version.
41
42 This program is distributed in the hope that it will be useful, but WITHOUT
43 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
44 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
45 more details.
46
47 You should have received a copy of the GNU General Public License along with
48 this program; if not, write to the Free Software Foundation, Inc.,
49 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index fec13200868f..d7efdbf640c7 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -16,16 +16,6 @@
16 16
17#ifdef __HAVE_ARCH_PTE_SPECIAL 17#ifdef __HAVE_ARCH_PTE_SPECIAL
18 18
19static inline void get_huge_page_tail(struct page *page)
20{
21 /*
22 * __split_huge_page_refcount() cannot run
23 * from under us.
24 */
25 VM_BUG_ON(atomic_read(&page->_count) < 0);
26 atomic_inc(&page->_count);
27}
28
29/* 19/*
30 * The performance critical leaf functions are made noinline otherwise gcc 20 * The performance critical leaf functions are made noinline otherwise gcc
31 * inlines everything into a single function which results in too much 21 * inlines everything into a single function which results in too much
@@ -57,8 +47,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
57 put_page(page); 47 put_page(page);
58 return 0; 48 return 0;
59 } 49 }
60 if (PageTail(page))
61 get_huge_page_tail(page);
62 pages[*nr] = page; 50 pages[*nr] = page;
63 (*nr)++; 51 (*nr)++;
64 52
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0b9a5c1901b9..da5eb3885702 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -390,7 +390,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
390{ 390{
391 unsigned long mask; 391 unsigned long mask;
392 unsigned long pte_end; 392 unsigned long pte_end;
393 struct page *head, *page; 393 struct page *head, *page, *tail;
394 pte_t pte; 394 pte_t pte;
395 int refs; 395 int refs;
396 396
@@ -413,6 +413,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
413 head = pte_page(pte); 413 head = pte_page(pte);
414 414
415 page = head + ((addr & (sz-1)) >> PAGE_SHIFT); 415 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
416 tail = page;
416 do { 417 do {
417 VM_BUG_ON(compound_head(page) != head); 418 VM_BUG_ON(compound_head(page) != head);
418 pages[*nr] = page; 419 pages[*nr] = page;
@@ -428,10 +429,20 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
428 429
429 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 430 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
430 /* Could be optimized better */ 431 /* Could be optimized better */
431 while (*nr) { 432 *nr -= refs;
432 put_page(page); 433 while (refs--)
433 (*nr)--; 434 put_page(head);
434 } 435 return 0;
436 }
437
438 /*
439 * Any tail page need their mapcount reference taken before we
440 * return.
441 */
442 while (refs--) {
443 if (PageTail(tail))
444 get_huge_page_tail(tail);
445 tail++;
435 } 446 }
436 447
437 return 1; 448 return 1;
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index c65f75aa7ff7..22ffccd8bef5 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -1608,6 +1608,7 @@ int fsl_rio_setup(struct platform_device *dev)
1608 return 0; 1608 return 0;
1609err: 1609err:
1610 iounmap(priv->regs_win); 1610 iounmap(priv->regs_win);
1611 release_resource(&port->iores);
1611err_res: 1612err_res:
1612 kfree(priv); 1613 kfree(priv);
1613err_priv: 1614err_priv:
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 45b405ca2567..65cb06e2af4e 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -52,7 +52,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
52 unsigned long end, int write, struct page **pages, int *nr) 52 unsigned long end, int write, struct page **pages, int *nr)
53{ 53{
54 unsigned long mask, result; 54 unsigned long mask, result;
55 struct page *head, *page; 55 struct page *head, *page, *tail;
56 int refs; 56 int refs;
57 57
58 result = write ? 0 : _SEGMENT_ENTRY_RO; 58 result = write ? 0 : _SEGMENT_ENTRY_RO;
@@ -64,6 +64,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
64 refs = 0; 64 refs = 0;
65 head = pmd_page(pmd); 65 head = pmd_page(pmd);
66 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 66 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
67 tail = page;
67 do { 68 do {
68 VM_BUG_ON(compound_head(page) != head); 69 VM_BUG_ON(compound_head(page) != head);
69 pages[*nr] = page; 70 pages[*nr] = page;
@@ -81,6 +82,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
81 *nr -= refs; 82 *nr -= refs;
82 while (refs--) 83 while (refs--)
83 put_page(head); 84 put_page(head);
85 return 0;
86 }
87
88 /*
89 * Any tail page need their mapcount reference taken before we
90 * return.
91 */
92 while (refs--) {
93 if (PageTail(tail))
94 get_huge_page_tail(tail);
95 tail++;
84 } 96 }
85 97
86 return 1; 98 return 1;
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index a986b5d05712..42c55df3aec3 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -56,6 +56,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
56 put_page(head); 56 put_page(head);
57 return 0; 57 return 0;
58 } 58 }
59 if (head != page)
60 get_huge_page_tail(page);
59 61
60 pages[*nr] = page; 62 pages[*nr] = page;
61 (*nr)++; 63 (*nr)++;
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index dbe34b931374..ea305856151c 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -108,16 +108,6 @@ static inline void get_head_page_multiple(struct page *page, int nr)
108 SetPageReferenced(page); 108 SetPageReferenced(page);
109} 109}
110 110
111static inline void get_huge_page_tail(struct page *page)
112{
113 /*
114 * __split_huge_page_refcount() cannot run
115 * from under us.
116 */
117 VM_BUG_ON(atomic_read(&page->_count) < 0);
118 atomic_inc(&page->_count);
119}
120
121static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, 111static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
122 unsigned long end, int write, struct page **pages, int *nr) 112 unsigned long end, int write, struct page **pages, int *nr)
123{ 113{
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 053d36caf955..cd41d403c9df 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -151,7 +151,7 @@ MODULE_LICENSE("GPL");
151struct vmballoon_stats { 151struct vmballoon_stats {
152 unsigned int timer; 152 unsigned int timer;
153 153
154 /* allocation statustics */ 154 /* allocation statistics */
155 unsigned int alloc; 155 unsigned int alloc;
156 unsigned int alloc_fail; 156 unsigned int alloc_fail;
157 unsigned int sleep_alloc; 157 unsigned int sleep_alloc;
@@ -412,6 +412,7 @@ static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep)
412 gfp_t flags; 412 gfp_t flags;
413 unsigned int hv_status; 413 unsigned int hv_status;
414 bool locked = false; 414 bool locked = false;
415 flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
415 416
416 do { 417 do {
417 if (!can_sleep) 418 if (!can_sleep)
@@ -419,7 +420,6 @@ static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep)
419 else 420 else
420 STATS_INC(b->stats.sleep_alloc); 421 STATS_INC(b->stats.sleep_alloc);
421 422
422 flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
423 page = alloc_page(flags); 423 page = alloc_page(flags);
424 if (!page) { 424 if (!page) {
425 if (!can_sleep) 425 if (!can_sleep)
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 3bb131137033..7145714a5ec9 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -88,8 +88,8 @@ static struct rio_dev **rionet_active;
88#define dev_rionet_capable(dev) \ 88#define dev_rionet_capable(dev) \
89 is_rionet_capable(dev->src_ops, dev->dst_ops) 89 is_rionet_capable(dev->src_ops, dev->dst_ops)
90 90
91#define RIONET_MAC_MATCH(x) (*(u32 *)x == 0x00010001) 91#define RIONET_MAC_MATCH(x) (!memcmp((x), "\00\01\00\01", 4))
92#define RIONET_GET_DESTID(x) (*(u16 *)(x + 4)) 92#define RIONET_GET_DESTID(x) ((*((u8 *)x + 4) << 8) | *((u8 *)x + 5))
93 93
94static int rionet_rx_clean(struct net_device *ndev) 94static int rionet_rx_clean(struct net_device *ndev)
95{ 95{
diff --git a/drivers/power/ds2780_battery.c b/drivers/power/ds2780_battery.c
index 1fefe82e12e3..91a783d72360 100644
--- a/drivers/power/ds2780_battery.c
+++ b/drivers/power/ds2780_battery.c
@@ -39,6 +39,7 @@ struct ds2780_device_info {
39 struct device *dev; 39 struct device *dev;
40 struct power_supply bat; 40 struct power_supply bat;
41 struct device *w1_dev; 41 struct device *w1_dev;
42 struct task_struct *mutex_holder;
42}; 43};
43 44
44enum current_types { 45enum current_types {
@@ -49,8 +50,8 @@ enum current_types {
49static const char model[] = "DS2780"; 50static const char model[] = "DS2780";
50static const char manufacturer[] = "Maxim/Dallas"; 51static const char manufacturer[] = "Maxim/Dallas";
51 52
52static inline struct ds2780_device_info *to_ds2780_device_info( 53static inline struct ds2780_device_info *
53 struct power_supply *psy) 54to_ds2780_device_info(struct power_supply *psy)
54{ 55{
55 return container_of(psy, struct ds2780_device_info, bat); 56 return container_of(psy, struct ds2780_device_info, bat);
56} 57}
@@ -60,17 +61,28 @@ static inline struct power_supply *to_power_supply(struct device *dev)
60 return dev_get_drvdata(dev); 61 return dev_get_drvdata(dev);
61} 62}
62 63
63static inline int ds2780_read8(struct device *dev, u8 *val, int addr) 64static inline int ds2780_battery_io(struct ds2780_device_info *dev_info,
65 char *buf, int addr, size_t count, int io)
64{ 66{
65 return w1_ds2780_io(dev, val, addr, sizeof(u8), 0); 67 if (dev_info->mutex_holder == current)
68 return w1_ds2780_io_nolock(dev_info->w1_dev, buf, addr, count, io);
69 else
70 return w1_ds2780_io(dev_info->w1_dev, buf, addr, count, io);
71}
72
73static inline int ds2780_read8(struct ds2780_device_info *dev_info, u8 *val,
74 int addr)
75{
76 return ds2780_battery_io(dev_info, val, addr, sizeof(u8), 0);
66} 77}
67 78
68static int ds2780_read16(struct device *dev, s16 *val, int addr) 79static int ds2780_read16(struct ds2780_device_info *dev_info, s16 *val,
80 int addr)
69{ 81{
70 int ret; 82 int ret;
71 u8 raw[2]; 83 u8 raw[2];
72 84
73 ret = w1_ds2780_io(dev, raw, addr, sizeof(u8) * 2, 0); 85 ret = ds2780_battery_io(dev_info, raw, addr, sizeof(raw), 0);
74 if (ret < 0) 86 if (ret < 0)
75 return ret; 87 return ret;
76 88
@@ -79,16 +91,16 @@ static int ds2780_read16(struct device *dev, s16 *val, int addr)
79 return 0; 91 return 0;
80} 92}
81 93
82static inline int ds2780_read_block(struct device *dev, u8 *val, int addr, 94static inline int ds2780_read_block(struct ds2780_device_info *dev_info,
83 size_t count) 95 u8 *val, int addr, size_t count)
84{ 96{
85 return w1_ds2780_io(dev, val, addr, count, 0); 97 return ds2780_battery_io(dev_info, val, addr, count, 0);
86} 98}
87 99
88static inline int ds2780_write(struct device *dev, u8 *val, int addr, 100static inline int ds2780_write(struct ds2780_device_info *dev_info, u8 *val,
89 size_t count) 101 int addr, size_t count)
90{ 102{
91 return w1_ds2780_io(dev, val, addr, count, 1); 103 return ds2780_battery_io(dev_info, val, addr, count, 1);
92} 104}
93 105
94static inline int ds2780_store_eeprom(struct device *dev, int addr) 106static inline int ds2780_store_eeprom(struct device *dev, int addr)
@@ -122,7 +134,7 @@ static int ds2780_set_sense_register(struct ds2780_device_info *dev_info,
122{ 134{
123 int ret; 135 int ret;
124 136
125 ret = ds2780_write(dev_info->w1_dev, &conductance, 137 ret = ds2780_write(dev_info, &conductance,
126 DS2780_RSNSP_REG, sizeof(u8)); 138 DS2780_RSNSP_REG, sizeof(u8));
127 if (ret < 0) 139 if (ret < 0)
128 return ret; 140 return ret;
@@ -134,7 +146,7 @@ static int ds2780_set_sense_register(struct ds2780_device_info *dev_info,
134static int ds2780_get_rsgain_register(struct ds2780_device_info *dev_info, 146static int ds2780_get_rsgain_register(struct ds2780_device_info *dev_info,
135 u16 *rsgain) 147 u16 *rsgain)
136{ 148{
137 return ds2780_read16(dev_info->w1_dev, rsgain, DS2780_RSGAIN_MSB_REG); 149 return ds2780_read16(dev_info, rsgain, DS2780_RSGAIN_MSB_REG);
138} 150}
139 151
140/* Set RSGAIN value from 0 to 1.999 in steps of 0.001 */ 152/* Set RSGAIN value from 0 to 1.999 in steps of 0.001 */
@@ -144,8 +156,8 @@ static int ds2780_set_rsgain_register(struct ds2780_device_info *dev_info,
144 int ret; 156 int ret;
145 u8 raw[] = {rsgain >> 8, rsgain & 0xFF}; 157 u8 raw[] = {rsgain >> 8, rsgain & 0xFF};
146 158
147 ret = ds2780_write(dev_info->w1_dev, raw, 159 ret = ds2780_write(dev_info, raw,
148 DS2780_RSGAIN_MSB_REG, sizeof(u8) * 2); 160 DS2780_RSGAIN_MSB_REG, sizeof(raw));
149 if (ret < 0) 161 if (ret < 0)
150 return ret; 162 return ret;
151 163
@@ -167,7 +179,7 @@ static int ds2780_get_voltage(struct ds2780_device_info *dev_info,
167 * Bits 2 - 0 of the voltage value are in bits 7 - 5 of the 179 * Bits 2 - 0 of the voltage value are in bits 7 - 5 of the
168 * voltage LSB register 180 * voltage LSB register
169 */ 181 */
170 ret = ds2780_read16(dev_info->w1_dev, &voltage_raw, 182 ret = ds2780_read16(dev_info, &voltage_raw,
171 DS2780_VOLT_MSB_REG); 183 DS2780_VOLT_MSB_REG);
172 if (ret < 0) 184 if (ret < 0)
173 return ret; 185 return ret;
@@ -196,7 +208,7 @@ static int ds2780_get_temperature(struct ds2780_device_info *dev_info,
196 * Bits 2 - 0 of the temperature value are in bits 7 - 5 of the 208 * Bits 2 - 0 of the temperature value are in bits 7 - 5 of the
197 * temperature LSB register 209 * temperature LSB register
198 */ 210 */
199 ret = ds2780_read16(dev_info->w1_dev, &temperature_raw, 211 ret = ds2780_read16(dev_info, &temperature_raw,
200 DS2780_TEMP_MSB_REG); 212 DS2780_TEMP_MSB_REG);
201 if (ret < 0) 213 if (ret < 0)
202 return ret; 214 return ret;
@@ -222,13 +234,13 @@ static int ds2780_get_current(struct ds2780_device_info *dev_info,
222 * The units of measurement for current are dependent on the value of 234 * The units of measurement for current are dependent on the value of
223 * the sense resistor. 235 * the sense resistor.
224 */ 236 */
225 ret = ds2780_read8(dev_info->w1_dev, &sense_res_raw, DS2780_RSNSP_REG); 237 ret = ds2780_read8(dev_info, &sense_res_raw, DS2780_RSNSP_REG);
226 if (ret < 0) 238 if (ret < 0)
227 return ret; 239 return ret;
228 240
229 if (sense_res_raw == 0) { 241 if (sense_res_raw == 0) {
230 dev_err(dev_info->dev, "sense resistor value is 0\n"); 242 dev_err(dev_info->dev, "sense resistor value is 0\n");
231 return -ENXIO; 243 return -EINVAL;
232 } 244 }
233 sense_res = 1000 / sense_res_raw; 245 sense_res = 1000 / sense_res_raw;
234 246
@@ -248,7 +260,7 @@ static int ds2780_get_current(struct ds2780_device_info *dev_info,
248 * Bits 7 - 0 of the current value are in bits 7 - 0 of the current 260 * Bits 7 - 0 of the current value are in bits 7 - 0 of the current
249 * LSB register 261 * LSB register
250 */ 262 */
251 ret = ds2780_read16(dev_info->w1_dev, &current_raw, reg_msb); 263 ret = ds2780_read16(dev_info, &current_raw, reg_msb);
252 if (ret < 0) 264 if (ret < 0)
253 return ret; 265 return ret;
254 266
@@ -267,7 +279,7 @@ static int ds2780_get_accumulated_current(struct ds2780_device_info *dev_info,
267 * The units of measurement for accumulated current are dependent on 279 * The units of measurement for accumulated current are dependent on
268 * the value of the sense resistor. 280 * the value of the sense resistor.
269 */ 281 */
270 ret = ds2780_read8(dev_info->w1_dev, &sense_res_raw, DS2780_RSNSP_REG); 282 ret = ds2780_read8(dev_info, &sense_res_raw, DS2780_RSNSP_REG);
271 if (ret < 0) 283 if (ret < 0)
272 return ret; 284 return ret;
273 285
@@ -285,7 +297,7 @@ static int ds2780_get_accumulated_current(struct ds2780_device_info *dev_info,
285 * Bits 7 - 0 of the ACR value are in bits 7 - 0 of the ACR 297 * Bits 7 - 0 of the ACR value are in bits 7 - 0 of the ACR
286 * LSB register 298 * LSB register
287 */ 299 */
288 ret = ds2780_read16(dev_info->w1_dev, &current_raw, DS2780_ACR_MSB_REG); 300 ret = ds2780_read16(dev_info, &current_raw, DS2780_ACR_MSB_REG);
289 if (ret < 0) 301 if (ret < 0)
290 return ret; 302 return ret;
291 303
@@ -299,7 +311,7 @@ static int ds2780_get_capacity(struct ds2780_device_info *dev_info,
299 int ret; 311 int ret;
300 u8 raw; 312 u8 raw;
301 313
302 ret = ds2780_read8(dev_info->w1_dev, &raw, DS2780_RARC_REG); 314 ret = ds2780_read8(dev_info, &raw, DS2780_RARC_REG);
303 if (ret < 0) 315 if (ret < 0)
304 return ret; 316 return ret;
305 317
@@ -345,7 +357,7 @@ static int ds2780_get_charge_now(struct ds2780_device_info *dev_info,
345 * Bits 7 - 0 of the RAAC value are in bits 7 - 0 of the RAAC 357 * Bits 7 - 0 of the RAAC value are in bits 7 - 0 of the RAAC
346 * LSB register 358 * LSB register
347 */ 359 */
348 ret = ds2780_read16(dev_info->w1_dev, &charge_raw, DS2780_RAAC_MSB_REG); 360 ret = ds2780_read16(dev_info, &charge_raw, DS2780_RAAC_MSB_REG);
349 if (ret < 0) 361 if (ret < 0)
350 return ret; 362 return ret;
351 363
@@ -356,7 +368,7 @@ static int ds2780_get_charge_now(struct ds2780_device_info *dev_info,
356static int ds2780_get_control_register(struct ds2780_device_info *dev_info, 368static int ds2780_get_control_register(struct ds2780_device_info *dev_info,
357 u8 *control_reg) 369 u8 *control_reg)
358{ 370{
359 return ds2780_read8(dev_info->w1_dev, control_reg, DS2780_CONTROL_REG); 371 return ds2780_read8(dev_info, control_reg, DS2780_CONTROL_REG);
360} 372}
361 373
362static int ds2780_set_control_register(struct ds2780_device_info *dev_info, 374static int ds2780_set_control_register(struct ds2780_device_info *dev_info,
@@ -364,7 +376,7 @@ static int ds2780_set_control_register(struct ds2780_device_info *dev_info,
364{ 376{
365 int ret; 377 int ret;
366 378
367 ret = ds2780_write(dev_info->w1_dev, &control_reg, 379 ret = ds2780_write(dev_info, &control_reg,
368 DS2780_CONTROL_REG, sizeof(u8)); 380 DS2780_CONTROL_REG, sizeof(u8));
369 if (ret < 0) 381 if (ret < 0)
370 return ret; 382 return ret;
@@ -503,7 +515,7 @@ static ssize_t ds2780_get_sense_resistor_value(struct device *dev,
503 struct power_supply *psy = to_power_supply(dev); 515 struct power_supply *psy = to_power_supply(dev);
504 struct ds2780_device_info *dev_info = to_ds2780_device_info(psy); 516 struct ds2780_device_info *dev_info = to_ds2780_device_info(psy);
505 517
506 ret = ds2780_read8(dev_info->w1_dev, &sense_resistor, DS2780_RSNSP_REG); 518 ret = ds2780_read8(dev_info, &sense_resistor, DS2780_RSNSP_REG);
507 if (ret < 0) 519 if (ret < 0)
508 return ret; 520 return ret;
509 521
@@ -584,7 +596,7 @@ static ssize_t ds2780_get_pio_pin(struct device *dev,
584 struct power_supply *psy = to_power_supply(dev); 596 struct power_supply *psy = to_power_supply(dev);
585 struct ds2780_device_info *dev_info = to_ds2780_device_info(psy); 597 struct ds2780_device_info *dev_info = to_ds2780_device_info(psy);
586 598
587 ret = ds2780_read8(dev_info->w1_dev, &sfr, DS2780_SFR_REG); 599 ret = ds2780_read8(dev_info, &sfr, DS2780_SFR_REG);
588 if (ret < 0) 600 if (ret < 0)
589 return ret; 601 return ret;
590 602
@@ -611,7 +623,7 @@ static ssize_t ds2780_set_pio_pin(struct device *dev,
611 return -EINVAL; 623 return -EINVAL;
612 } 624 }
613 625
614 ret = ds2780_write(dev_info->w1_dev, &new_setting, 626 ret = ds2780_write(dev_info, &new_setting,
615 DS2780_SFR_REG, sizeof(u8)); 627 DS2780_SFR_REG, sizeof(u8));
616 if (ret < 0) 628 if (ret < 0)
617 return ret; 629 return ret;
@@ -632,7 +644,7 @@ static ssize_t ds2780_read_param_eeprom_bin(struct file *filp,
632 DS2780_EEPROM_BLOCK1_END - 644 DS2780_EEPROM_BLOCK1_END -
633 DS2780_EEPROM_BLOCK1_START + 1 - off); 645 DS2780_EEPROM_BLOCK1_START + 1 - off);
634 646
635 return ds2780_read_block(dev_info->w1_dev, buf, 647 return ds2780_read_block(dev_info, buf,
636 DS2780_EEPROM_BLOCK1_START + off, count); 648 DS2780_EEPROM_BLOCK1_START + off, count);
637} 649}
638 650
@@ -650,7 +662,7 @@ static ssize_t ds2780_write_param_eeprom_bin(struct file *filp,
650 DS2780_EEPROM_BLOCK1_END - 662 DS2780_EEPROM_BLOCK1_END -
651 DS2780_EEPROM_BLOCK1_START + 1 - off); 663 DS2780_EEPROM_BLOCK1_START + 1 - off);
652 664
653 ret = ds2780_write(dev_info->w1_dev, buf, 665 ret = ds2780_write(dev_info, buf,
654 DS2780_EEPROM_BLOCK1_START + off, count); 666 DS2780_EEPROM_BLOCK1_START + off, count);
655 if (ret < 0) 667 if (ret < 0)
656 return ret; 668 return ret;
@@ -685,9 +697,8 @@ static ssize_t ds2780_read_user_eeprom_bin(struct file *filp,
685 DS2780_EEPROM_BLOCK0_END - 697 DS2780_EEPROM_BLOCK0_END -
686 DS2780_EEPROM_BLOCK0_START + 1 - off); 698 DS2780_EEPROM_BLOCK0_START + 1 - off);
687 699
688 return ds2780_read_block(dev_info->w1_dev, buf, 700 return ds2780_read_block(dev_info, buf,
689 DS2780_EEPROM_BLOCK0_START + off, count); 701 DS2780_EEPROM_BLOCK0_START + off, count);
690
691} 702}
692 703
693static ssize_t ds2780_write_user_eeprom_bin(struct file *filp, 704static ssize_t ds2780_write_user_eeprom_bin(struct file *filp,
@@ -704,7 +715,7 @@ static ssize_t ds2780_write_user_eeprom_bin(struct file *filp,
704 DS2780_EEPROM_BLOCK0_END - 715 DS2780_EEPROM_BLOCK0_END -
705 DS2780_EEPROM_BLOCK0_START + 1 - off); 716 DS2780_EEPROM_BLOCK0_START + 1 - off);
706 717
707 ret = ds2780_write(dev_info->w1_dev, buf, 718 ret = ds2780_write(dev_info, buf,
708 DS2780_EEPROM_BLOCK0_START + off, count); 719 DS2780_EEPROM_BLOCK0_START + off, count);
709 if (ret < 0) 720 if (ret < 0)
710 return ret; 721 return ret;
@@ -768,6 +779,7 @@ static int __devinit ds2780_battery_probe(struct platform_device *pdev)
768 dev_info->bat.properties = ds2780_battery_props; 779 dev_info->bat.properties = ds2780_battery_props;
769 dev_info->bat.num_properties = ARRAY_SIZE(ds2780_battery_props); 780 dev_info->bat.num_properties = ARRAY_SIZE(ds2780_battery_props);
770 dev_info->bat.get_property = ds2780_battery_get_property; 781 dev_info->bat.get_property = ds2780_battery_get_property;
782 dev_info->mutex_holder = current;
771 783
772 ret = power_supply_register(&pdev->dev, &dev_info->bat); 784 ret = power_supply_register(&pdev->dev, &dev_info->bat);
773 if (ret) { 785 if (ret) {
@@ -797,6 +809,8 @@ static int __devinit ds2780_battery_probe(struct platform_device *pdev)
797 goto fail_remove_bin_file; 809 goto fail_remove_bin_file;
798 } 810 }
799 811
812 dev_info->mutex_holder = NULL;
813
800 return 0; 814 return 0;
801 815
802fail_remove_bin_file: 816fail_remove_bin_file:
@@ -816,6 +830,8 @@ static int __devexit ds2780_battery_remove(struct platform_device *pdev)
816{ 830{
817 struct ds2780_device_info *dev_info = platform_get_drvdata(pdev); 831 struct ds2780_device_info *dev_info = platform_get_drvdata(pdev);
818 832
833 dev_info->mutex_holder = current;
834
819 /* remove attributes */ 835 /* remove attributes */
820 sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2780_attr_group); 836 sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2780_attr_group);
821 837
diff --git a/drivers/pps/clients/Kconfig b/drivers/pps/clients/Kconfig
index 8520a7f4dd62..445197d4a8c4 100644
--- a/drivers/pps/clients/Kconfig
+++ b/drivers/pps/clients/Kconfig
@@ -29,4 +29,13 @@ config PPS_CLIENT_PARPORT
29 If you say yes here you get support for a PPS source connected 29 If you say yes here you get support for a PPS source connected
30 with the interrupt pin of your parallel port. 30 with the interrupt pin of your parallel port.
31 31
32config PPS_CLIENT_GPIO
33 tristate "PPS client using GPIO"
34 depends on PPS && GENERIC_HARDIRQS
35 help
36 If you say yes here you get support for a PPS source using
37 GPIO. To be useful you must also register a platform device
38 specifying the GPIO pin and other options, usually in your board
39 setup.
40
32endif 41endif
diff --git a/drivers/pps/clients/Makefile b/drivers/pps/clients/Makefile
index 4feb7e9e71ee..a461d15f4a2e 100644
--- a/drivers/pps/clients/Makefile
+++ b/drivers/pps/clients/Makefile
@@ -5,5 +5,6 @@
5obj-$(CONFIG_PPS_CLIENT_KTIMER) += pps-ktimer.o 5obj-$(CONFIG_PPS_CLIENT_KTIMER) += pps-ktimer.o
6obj-$(CONFIG_PPS_CLIENT_LDISC) += pps-ldisc.o 6obj-$(CONFIG_PPS_CLIENT_LDISC) += pps-ldisc.o
7obj-$(CONFIG_PPS_CLIENT_PARPORT) += pps_parport.o 7obj-$(CONFIG_PPS_CLIENT_PARPORT) += pps_parport.o
8obj-$(CONFIG_PPS_CLIENT_GPIO) += pps-gpio.o
8 9
9ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG 10ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG
diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c
new file mode 100644
index 000000000000..655055545479
--- /dev/null
+++ b/drivers/pps/clients/pps-gpio.c
@@ -0,0 +1,227 @@
1/*
2 * pps-gpio.c -- PPS client driver using GPIO
3 *
4 *
5 * Copyright (C) 2010 Ricardo Martins <rasm@fe.up.pt>
6 * Copyright (C) 2011 James Nuss <jamesnuss@nanometrics.ca>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 */
22
23#define PPS_GPIO_NAME "pps-gpio"
24#define pr_fmt(fmt) PPS_GPIO_NAME ": " fmt
25
26#include <linux/init.h>
27#include <linux/kernel.h>
28#include <linux/interrupt.h>
29#include <linux/module.h>
30#include <linux/platform_device.h>
31#include <linux/slab.h>
32#include <linux/pps_kernel.h>
33#include <linux/pps-gpio.h>
34#include <linux/gpio.h>
35#include <linux/list.h>
36
37/* Info for each registered platform device */
38struct pps_gpio_device_data {
39 int irq; /* IRQ used as PPS source */
40 struct pps_device *pps; /* PPS source device */
41 struct pps_source_info info; /* PPS source information */
42 const struct pps_gpio_platform_data *pdata;
43};
44
45/*
46 * Report the PPS event
47 */
48
49static irqreturn_t pps_gpio_irq_handler(int irq, void *data)
50{
51 const struct pps_gpio_device_data *info;
52 struct pps_event_time ts;
53 int rising_edge;
54
55 /* Get the time stamp first */
56 pps_get_ts(&ts);
57
58 info = data;
59
60 rising_edge = gpio_get_value(info->pdata->gpio_pin);
61 if ((rising_edge && !info->pdata->assert_falling_edge) ||
62 (!rising_edge && info->pdata->assert_falling_edge))
63 pps_event(info->pps, &ts, PPS_CAPTUREASSERT, NULL);
64 else if (info->pdata->capture_clear &&
65 ((rising_edge && info->pdata->assert_falling_edge) ||
66 (!rising_edge && !info->pdata->assert_falling_edge)))
67 pps_event(info->pps, &ts, PPS_CAPTURECLEAR, NULL);
68
69 return IRQ_HANDLED;
70}
71
72static int pps_gpio_setup(struct platform_device *pdev)
73{
74 int ret;
75 const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data;
76
77 ret = gpio_request(pdata->gpio_pin, pdata->gpio_label);
78 if (ret) {
79 pr_warning("failed to request GPIO %u\n", pdata->gpio_pin);
80 return -EINVAL;
81 }
82
83 ret = gpio_direction_input(pdata->gpio_pin);
84 if (ret) {
85 pr_warning("failed to set pin direction\n");
86 gpio_free(pdata->gpio_pin);
87 return -EINVAL;
88 }
89
90 return 0;
91}
92
93static unsigned long
94get_irqf_trigger_flags(const struct pps_gpio_platform_data *pdata)
95{
96 unsigned long flags = pdata->assert_falling_edge ?
97 IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING;
98
99 if (pdata->capture_clear) {
100 flags |= ((flags & IRQF_TRIGGER_RISING) ?
101 IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING);
102 }
103
104 return flags;
105}
106
107static int pps_gpio_probe(struct platform_device *pdev)
108{
109 struct pps_gpio_device_data *data;
110 int irq;
111 int ret;
112 int err;
113 int pps_default_params;
114 const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data;
115
116
117 /* GPIO setup */
118 ret = pps_gpio_setup(pdev);
119 if (ret)
120 return -EINVAL;
121
122 /* IRQ setup */
123 irq = gpio_to_irq(pdata->gpio_pin);
124 if (irq < 0) {
125 pr_err("failed to map GPIO to IRQ: %d\n", irq);
126 err = -EINVAL;
127 goto return_error;
128 }
129
130 /* allocate space for device info */
131 data = kzalloc(sizeof(struct pps_gpio_device_data), GFP_KERNEL);
132 if (data == NULL) {
133 err = -ENOMEM;
134 goto return_error;
135 }
136
137 /* initialize PPS specific parts of the bookkeeping data structure. */
138 data->info.mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT |
139 PPS_ECHOASSERT | PPS_CANWAIT | PPS_TSFMT_TSPEC;
140 if (pdata->capture_clear)
141 data->info.mode |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR |
142 PPS_ECHOCLEAR;
143 data->info.owner = THIS_MODULE;
144 snprintf(data->info.name, PPS_MAX_NAME_LEN - 1, "%s.%d",
145 pdev->name, pdev->id);
146
147 /* register PPS source */
148 pps_default_params = PPS_CAPTUREASSERT | PPS_OFFSETASSERT;
149 if (pdata->capture_clear)
150 pps_default_params |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR;
151 data->pps = pps_register_source(&data->info, pps_default_params);
152 if (data->pps == NULL) {
153 kfree(data);
154 pr_err("failed to register IRQ %d as PPS source\n", irq);
155 err = -EINVAL;
156 goto return_error;
157 }
158
159 data->irq = irq;
160 data->pdata = pdata;
161
162 /* register IRQ interrupt handler */
163 ret = request_irq(irq, pps_gpio_irq_handler,
164 get_irqf_trigger_flags(pdata), data->info.name, data);
165 if (ret) {
166 pps_unregister_source(data->pps);
167 kfree(data);
168 pr_err("failed to acquire IRQ %d\n", irq);
169 err = -EINVAL;
170 goto return_error;
171 }
172
173 platform_set_drvdata(pdev, data);
174 dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", irq);
175
176 return 0;
177
178return_error:
179 gpio_free(pdata->gpio_pin);
180 return err;
181}
182
183static int pps_gpio_remove(struct platform_device *pdev)
184{
185 struct pps_gpio_device_data *data = platform_get_drvdata(pdev);
186 const struct pps_gpio_platform_data *pdata = data->pdata;
187
188 platform_set_drvdata(pdev, NULL);
189 free_irq(data->irq, data);
190 gpio_free(pdata->gpio_pin);
191 pps_unregister_source(data->pps);
192 pr_info("removed IRQ %d as PPS source\n", data->irq);
193 kfree(data);
194 return 0;
195}
196
197static struct platform_driver pps_gpio_driver = {
198 .probe = pps_gpio_probe,
199 .remove = __devexit_p(pps_gpio_remove),
200 .driver = {
201 .name = PPS_GPIO_NAME,
202 .owner = THIS_MODULE
203 },
204};
205
206static int __init pps_gpio_init(void)
207{
208 int ret = platform_driver_register(&pps_gpio_driver);
209 if (ret < 0)
210 pr_err("failed to register platform driver\n");
211 return ret;
212}
213
214static void __exit pps_gpio_exit(void)
215{
216 platform_driver_unregister(&pps_gpio_driver);
217 pr_debug("unregistered platform driver\n");
218}
219
220module_init(pps_gpio_init);
221module_exit(pps_gpio_exit);
222
223MODULE_AUTHOR("Ricardo Martins <rasm@fe.up.pt>");
224MODULE_AUTHOR("James Nuss <jamesnuss@nanometrics.ca>");
225MODULE_DESCRIPTION("Use GPIO pin as PPS source");
226MODULE_LICENSE("GPL");
227MODULE_VERSION("1.0.0");
diff --git a/drivers/pps/clients/pps-ktimer.c b/drivers/pps/clients/pps-ktimer.c
index 82583b0ff82d..436b4e4e71a1 100644
--- a/drivers/pps/clients/pps-ktimer.c
+++ b/drivers/pps/clients/pps-ktimer.c
@@ -52,17 +52,6 @@ static void pps_ktimer_event(unsigned long ptr)
52} 52}
53 53
54/* 54/*
55 * The echo function
56 */
57
58static void pps_ktimer_echo(struct pps_device *pps, int event, void *data)
59{
60 dev_info(pps->dev, "echo %s %s\n",
61 event & PPS_CAPTUREASSERT ? "assert" : "",
62 event & PPS_CAPTURECLEAR ? "clear" : "");
63}
64
65/*
66 * The PPS info struct 55 * The PPS info struct
67 */ 56 */
68 57
@@ -72,7 +61,6 @@ static struct pps_source_info pps_ktimer_info = {
72 .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | 61 .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT |
73 PPS_ECHOASSERT | 62 PPS_ECHOASSERT |
74 PPS_CANWAIT | PPS_TSFMT_TSPEC, 63 PPS_CANWAIT | PPS_TSFMT_TSPEC,
75 .echo = pps_ktimer_echo,
76 .owner = THIS_MODULE, 64 .owner = THIS_MODULE,
77}; 65};
78 66
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index c571d6dd8f61..e1b4705ae3ec 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -133,14 +133,6 @@ out_both:
133 return; 133 return;
134} 134}
135 135
136/* the PPS echo function */
137static void pps_echo(struct pps_device *pps, int event, void *data)
138{
139 dev_info(pps->dev, "echo %s %s\n",
140 event & PPS_CAPTUREASSERT ? "assert" : "",
141 event & PPS_CAPTURECLEAR ? "clear" : "");
142}
143
144static void parport_attach(struct parport *port) 136static void parport_attach(struct parport *port)
145{ 137{
146 struct pps_client_pp *device; 138 struct pps_client_pp *device;
@@ -151,7 +143,6 @@ static void parport_attach(struct parport *port)
151 PPS_OFFSETASSERT | PPS_OFFSETCLEAR | \ 143 PPS_OFFSETASSERT | PPS_OFFSETCLEAR | \
152 PPS_ECHOASSERT | PPS_ECHOCLEAR | \ 144 PPS_ECHOASSERT | PPS_ECHOCLEAR | \
153 PPS_CANWAIT | PPS_TSFMT_TSPEC, 145 PPS_CANWAIT | PPS_TSFMT_TSPEC,
154 .echo = pps_echo,
155 .owner = THIS_MODULE, 146 .owner = THIS_MODULE,
156 .dev = NULL 147 .dev = NULL
157 }; 148 };
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
index a4e8eb9fece6..f197e8ea185c 100644
--- a/drivers/pps/kapi.c
+++ b/drivers/pps/kapi.c
@@ -52,6 +52,14 @@ static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset)
52 ts->sec += offset->sec; 52 ts->sec += offset->sec;
53} 53}
54 54
55static void pps_echo_client_default(struct pps_device *pps, int event,
56 void *data)
57{
58 dev_info(pps->dev, "echo %s %s\n",
59 event & PPS_CAPTUREASSERT ? "assert" : "",
60 event & PPS_CAPTURECLEAR ? "clear" : "");
61}
62
55/* 63/*
56 * Exported functions 64 * Exported functions
57 */ 65 */
@@ -80,13 +88,6 @@ struct pps_device *pps_register_source(struct pps_source_info *info,
80 err = -EINVAL; 88 err = -EINVAL;
81 goto pps_register_source_exit; 89 goto pps_register_source_exit;
82 } 90 }
83 if ((info->mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) != 0 &&
84 info->echo == NULL) {
85 pr_err("%s: echo function is not defined\n",
86 info->name);
87 err = -EINVAL;
88 goto pps_register_source_exit;
89 }
90 if ((info->mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) { 91 if ((info->mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
91 pr_err("%s: unspecified time format\n", 92 pr_err("%s: unspecified time format\n",
92 info->name); 93 info->name);
@@ -108,6 +109,11 @@ struct pps_device *pps_register_source(struct pps_source_info *info,
108 pps->params.mode = default_params; 109 pps->params.mode = default_params;
109 pps->info = *info; 110 pps->info = *info;
110 111
112 /* check for default echo function */
113 if ((pps->info.mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) &&
114 pps->info.echo == NULL)
115 pps->info.echo = pps_echo_client_default;
116
111 init_waitqueue_head(&pps->queue); 117 init_waitqueue_head(&pps->queue);
112 spin_lock_init(&pps->lock); 118 spin_lock_init(&pps->lock);
113 119
diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
index 070211a5955c..bc8719238793 100644
--- a/drivers/rapidio/Kconfig
+++ b/drivers/rapidio/Kconfig
@@ -1,6 +1,8 @@
1# 1#
2# RapidIO configuration 2# RapidIO configuration
3# 3#
4source "drivers/rapidio/devices/Kconfig"
5
4config RAPIDIO_DISC_TIMEOUT 6config RAPIDIO_DISC_TIMEOUT
5 int "Discovery timeout duration (seconds)" 7 int "Discovery timeout duration (seconds)"
6 depends on RAPIDIO 8 depends on RAPIDIO
@@ -20,8 +22,6 @@ config RAPIDIO_ENABLE_RX_TX_PORTS
20 ports for Input/Output direction to allow other traffic 22 ports for Input/Output direction to allow other traffic
21 than Maintenance transfers. 23 than Maintenance transfers.
22 24
23source "drivers/rapidio/switches/Kconfig"
24
25config RAPIDIO_DEBUG 25config RAPIDIO_DEBUG
26 bool "RapidIO subsystem debug messages" 26 bool "RapidIO subsystem debug messages"
27 depends on RAPIDIO 27 depends on RAPIDIO
@@ -32,3 +32,5 @@ config RAPIDIO_DEBUG
32 going on. 32 going on.
33 33
34 If you are unsure about this, say N here. 34 If you are unsure about this, say N here.
35
36source "drivers/rapidio/switches/Kconfig"
diff --git a/drivers/rapidio/Makefile b/drivers/rapidio/Makefile
index 89b8eca825b5..ec3fb8121004 100644
--- a/drivers/rapidio/Makefile
+++ b/drivers/rapidio/Makefile
@@ -4,5 +4,6 @@
4obj-y += rio.o rio-access.o rio-driver.o rio-scan.o rio-sysfs.o 4obj-y += rio.o rio-access.o rio-driver.o rio-scan.o rio-sysfs.o
5 5
6obj-$(CONFIG_RAPIDIO) += switches/ 6obj-$(CONFIG_RAPIDIO) += switches/
7obj-$(CONFIG_RAPIDIO) += devices/
7 8
8subdir-ccflags-$(CONFIG_RAPIDIO_DEBUG) := -DDEBUG 9subdir-ccflags-$(CONFIG_RAPIDIO_DEBUG) := -DDEBUG
diff --git a/drivers/rapidio/devices/Kconfig b/drivers/rapidio/devices/Kconfig
new file mode 100644
index 000000000000..12a9d7f7040b
--- /dev/null
+++ b/drivers/rapidio/devices/Kconfig
@@ -0,0 +1,10 @@
1#
2# RapidIO master port configuration
3#
4
5config RAPIDIO_TSI721
6 bool "IDT Tsi721 PCI Express SRIO Controller support"
7 depends on RAPIDIO && PCIEPORTBUS
8 default "n"
9 ---help---
10 Include support for IDT Tsi721 PCI Express Serial RapidIO controller.
diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile
new file mode 100644
index 000000000000..3b7b4e2dff7c
--- /dev/null
+++ b/drivers/rapidio/devices/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for RapidIO devices
3#
4
5obj-$(CONFIG_RAPIDIO_TSI721) += tsi721.o
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
new file mode 100644
index 000000000000..5225930a10cd
--- /dev/null
+++ b/drivers/rapidio/devices/tsi721.c
@@ -0,0 +1,2360 @@
1/*
2 * RapidIO mport driver for Tsi721 PCIExpress-to-SRIO bridge
3 *
4 * Copyright 2011 Integrated Device Technology, Inc.
5 * Alexandre Bounine <alexandre.bounine@idt.com>
6 * Chul Kim <chul.kim@idt.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59
20 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 */
22
23#include <linux/io.h>
24#include <linux/errno.h>
25#include <linux/init.h>
26#include <linux/ioport.h>
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/pci.h>
30#include <linux/rio.h>
31#include <linux/rio_drv.h>
32#include <linux/dma-mapping.h>
33#include <linux/interrupt.h>
34#include <linux/kfifo.h>
35#include <linux/delay.h>
36
37#include "tsi721.h"
38
39#define DEBUG_PW /* Inbound Port-Write debugging */
40
41static void tsi721_omsg_handler(struct tsi721_device *priv, int ch);
42static void tsi721_imsg_handler(struct tsi721_device *priv, int ch);
43
44/**
45 * tsi721_lcread - read from local SREP config space
46 * @mport: RapidIO master port info
47 * @index: ID of RapdiIO interface
48 * @offset: Offset into configuration space
49 * @len: Length (in bytes) of the maintenance transaction
50 * @data: Value to be read into
51 *
52 * Generates a local SREP space read. Returns %0 on
53 * success or %-EINVAL on failure.
54 */
55static int tsi721_lcread(struct rio_mport *mport, int index, u32 offset,
56 int len, u32 *data)
57{
58 struct tsi721_device *priv = mport->priv;
59
60 if (len != sizeof(u32))
61 return -EINVAL; /* only 32-bit access is supported */
62
63 *data = ioread32(priv->regs + offset);
64
65 return 0;
66}
67
68/**
69 * tsi721_lcwrite - write into local SREP config space
70 * @mport: RapidIO master port info
71 * @index: ID of RapdiIO interface
72 * @offset: Offset into configuration space
73 * @len: Length (in bytes) of the maintenance transaction
74 * @data: Value to be written
75 *
76 * Generates a local write into SREP configuration space. Returns %0 on
77 * success or %-EINVAL on failure.
78 */
79static int tsi721_lcwrite(struct rio_mport *mport, int index, u32 offset,
80 int len, u32 data)
81{
82 struct tsi721_device *priv = mport->priv;
83
84 if (len != sizeof(u32))
85 return -EINVAL; /* only 32-bit access is supported */
86
87 iowrite32(data, priv->regs + offset);
88
89 return 0;
90}
91
92/**
93 * tsi721_maint_dma - Helper function to generate RapidIO maintenance
94 * transactions using designated Tsi721 DMA channel.
95 * @priv: pointer to tsi721 private data
96 * @sys_size: RapdiIO transport system size
97 * @destid: Destination ID of transaction
98 * @hopcount: Number of hops to target device
99 * @offset: Offset into configuration space
100 * @len: Length (in bytes) of the maintenance transaction
101 * @data: Location to be read from or write into
102 * @do_wr: Operation flag (1 == MAINT_WR)
103 *
104 * Generates a RapidIO maintenance transaction (Read or Write).
105 * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
106 */
107static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
108 u16 destid, u8 hopcount, u32 offset, int len,
109 u32 *data, int do_wr)
110{
111 struct tsi721_dma_desc *bd_ptr;
112 u32 rd_count, swr_ptr, ch_stat;
113 int i, err = 0;
114 u32 op = do_wr ? MAINT_WR : MAINT_RD;
115
116 if (offset > (RIO_MAINT_SPACE_SZ - len) || (len != sizeof(u32)))
117 return -EINVAL;
118
119 bd_ptr = priv->bdma[TSI721_DMACH_MAINT].bd_base;
120
121 rd_count = ioread32(
122 priv->regs + TSI721_DMAC_DRDCNT(TSI721_DMACH_MAINT));
123
124 /* Initialize DMA descriptor */
125 bd_ptr[0].type_id = cpu_to_le32((DTYPE2 << 29) | (op << 19) | destid);
126 bd_ptr[0].bcount = cpu_to_le32((sys_size << 26) | 0x04);
127 bd_ptr[0].raddr_lo = cpu_to_le32((hopcount << 24) | offset);
128 bd_ptr[0].raddr_hi = 0;
129 if (do_wr)
130 bd_ptr[0].data[0] = cpu_to_be32p(data);
131 else
132 bd_ptr[0].data[0] = 0xffffffff;
133
134 mb();
135
136 /* Start DMA operation */
137 iowrite32(rd_count + 2,
138 priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
139 ioread32(priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
140 i = 0;
141
142 /* Wait until DMA transfer is finished */
143 while ((ch_stat = ioread32(priv->regs +
144 TSI721_DMAC_STS(TSI721_DMACH_MAINT))) & TSI721_DMAC_STS_RUN) {
145 udelay(1);
146 if (++i >= 5000000) {
147 dev_dbg(&priv->pdev->dev,
148 "%s : DMA[%d] read timeout ch_status=%x\n",
149 __func__, TSI721_DMACH_MAINT, ch_stat);
150 if (!do_wr)
151 *data = 0xffffffff;
152 err = -EIO;
153 goto err_out;
154 }
155 }
156
157 if (ch_stat & TSI721_DMAC_STS_ABORT) {
158 /* If DMA operation aborted due to error,
159 * reinitialize DMA channel
160 */
161 dev_dbg(&priv->pdev->dev, "%s : DMA ABORT ch_stat=%x\n",
162 __func__, ch_stat);
163 dev_dbg(&priv->pdev->dev, "OP=%d : destid=%x hc=%x off=%x\n",
164 do_wr ? MAINT_WR : MAINT_RD, destid, hopcount, offset);
165 iowrite32(TSI721_DMAC_INT_ALL,
166 priv->regs + TSI721_DMAC_INT(TSI721_DMACH_MAINT));
167 iowrite32(TSI721_DMAC_CTL_INIT,
168 priv->regs + TSI721_DMAC_CTL(TSI721_DMACH_MAINT));
169 udelay(10);
170 iowrite32(0, priv->regs +
171 TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
172 udelay(1);
173 if (!do_wr)
174 *data = 0xffffffff;
175 err = -EIO;
176 goto err_out;
177 }
178
179 if (!do_wr)
180 *data = be32_to_cpu(bd_ptr[0].data[0]);
181
182 /*
183 * Update descriptor status FIFO RD pointer.
184 * NOTE: Skipping check and clear FIFO entries because we are waiting
185 * for transfer to be completed.
186 */
187 swr_ptr = ioread32(priv->regs + TSI721_DMAC_DSWP(TSI721_DMACH_MAINT));
188 iowrite32(swr_ptr, priv->regs + TSI721_DMAC_DSRP(TSI721_DMACH_MAINT));
189err_out:
190
191 return err;
192}
193
194/**
195 * tsi721_cread_dma - Generate a RapidIO maintenance read transaction
196 * using Tsi721 BDMA engine.
197 * @mport: RapidIO master port control structure
198 * @index: ID of RapdiIO interface
199 * @destid: Destination ID of transaction
200 * @hopcount: Number of hops to target device
201 * @offset: Offset into configuration space
202 * @len: Length (in bytes) of the maintenance transaction
203 * @val: Location to be read into
204 *
205 * Generates a RapidIO maintenance read transaction.
206 * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
207 */
208static int tsi721_cread_dma(struct rio_mport *mport, int index, u16 destid,
209 u8 hopcount, u32 offset, int len, u32 *data)
210{
211 struct tsi721_device *priv = mport->priv;
212
213 return tsi721_maint_dma(priv, mport->sys_size, destid, hopcount,
214 offset, len, data, 0);
215}
216
217/**
218 * tsi721_cwrite_dma - Generate a RapidIO maintenance write transaction
219 * using Tsi721 BDMA engine
220 * @mport: RapidIO master port control structure
221 * @index: ID of RapdiIO interface
222 * @destid: Destination ID of transaction
223 * @hopcount: Number of hops to target device
224 * @offset: Offset into configuration space
225 * @len: Length (in bytes) of the maintenance transaction
226 * @val: Value to be written
227 *
228 * Generates a RapidIO maintenance write transaction.
229 * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
230 */
231static int tsi721_cwrite_dma(struct rio_mport *mport, int index, u16 destid,
232 u8 hopcount, u32 offset, int len, u32 data)
233{
234 struct tsi721_device *priv = mport->priv;
235 u32 temp = data;
236
237 return tsi721_maint_dma(priv, mport->sys_size, destid, hopcount,
238 offset, len, &temp, 1);
239}
240
241/**
242 * tsi721_pw_handler - Tsi721 inbound port-write interrupt handler
243 * @mport: RapidIO master port structure
244 *
245 * Handles inbound port-write interrupts. Copies PW message from an internal
246 * buffer into PW message FIFO and schedules deferred routine to process
247 * queued messages.
248 */
249static int
250tsi721_pw_handler(struct rio_mport *mport)
251{
252 struct tsi721_device *priv = mport->priv;
253 u32 pw_stat;
254 u32 pw_buf[TSI721_RIO_PW_MSG_SIZE/sizeof(u32)];
255
256
257 pw_stat = ioread32(priv->regs + TSI721_RIO_PW_RX_STAT);
258
259 if (pw_stat & TSI721_RIO_PW_RX_STAT_PW_VAL) {
260 pw_buf[0] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(0));
261 pw_buf[1] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(1));
262 pw_buf[2] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(2));
263 pw_buf[3] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(3));
264
265 /* Queue PW message (if there is room in FIFO),
266 * otherwise discard it.
267 */
268 spin_lock(&priv->pw_fifo_lock);
269 if (kfifo_avail(&priv->pw_fifo) >= TSI721_RIO_PW_MSG_SIZE)
270 kfifo_in(&priv->pw_fifo, pw_buf,
271 TSI721_RIO_PW_MSG_SIZE);
272 else
273 priv->pw_discard_count++;
274 spin_unlock(&priv->pw_fifo_lock);
275 }
276
277 /* Clear pending PW interrupts */
278 iowrite32(TSI721_RIO_PW_RX_STAT_PW_DISC | TSI721_RIO_PW_RX_STAT_PW_VAL,
279 priv->regs + TSI721_RIO_PW_RX_STAT);
280
281 schedule_work(&priv->pw_work);
282
283 return 0;
284}
285
286static void tsi721_pw_dpc(struct work_struct *work)
287{
288 struct tsi721_device *priv = container_of(work, struct tsi721_device,
289 pw_work);
290 u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)]; /* Use full size PW message
291 buffer for RIO layer */
292
293 /*
294 * Process port-write messages
295 */
296 while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)msg_buffer,
297 TSI721_RIO_PW_MSG_SIZE, &priv->pw_fifo_lock)) {
298 /* Process one message */
299#ifdef DEBUG_PW
300 {
301 u32 i;
302 pr_debug("%s : Port-Write Message:", __func__);
303 for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); ) {
304 pr_debug("0x%02x: %08x %08x %08x %08x", i*4,
305 msg_buffer[i], msg_buffer[i + 1],
306 msg_buffer[i + 2], msg_buffer[i + 3]);
307 i += 4;
308 }
309 pr_debug("\n");
310 }
311#endif
312 /* Pass the port-write message to RIO core for processing */
313 rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
314 }
315}
316
317/**
318 * tsi721_pw_enable - enable/disable port-write interface init
319 * @mport: Master port implementing the port write unit
320 * @enable: 1=enable; 0=disable port-write message handling
321 */
322static int tsi721_pw_enable(struct rio_mport *mport, int enable)
323{
324 struct tsi721_device *priv = mport->priv;
325 u32 rval;
326
327 rval = ioread32(priv->regs + TSI721_RIO_EM_INT_ENABLE);
328
329 if (enable)
330 rval |= TSI721_RIO_EM_INT_ENABLE_PW_RX;
331 else
332 rval &= ~TSI721_RIO_EM_INT_ENABLE_PW_RX;
333
334 /* Clear pending PW interrupts */
335 iowrite32(TSI721_RIO_PW_RX_STAT_PW_DISC | TSI721_RIO_PW_RX_STAT_PW_VAL,
336 priv->regs + TSI721_RIO_PW_RX_STAT);
337 /* Update enable bits */
338 iowrite32(rval, priv->regs + TSI721_RIO_EM_INT_ENABLE);
339
340 return 0;
341}
342
343/**
344 * tsi721_dsend - Send a RapidIO doorbell
345 * @mport: RapidIO master port info
346 * @index: ID of RapidIO interface
347 * @destid: Destination ID of target device
348 * @data: 16-bit info field of RapidIO doorbell
349 *
350 * Sends a RapidIO doorbell message. Always returns %0.
351 */
352static int tsi721_dsend(struct rio_mport *mport, int index,
353 u16 destid, u16 data)
354{
355 struct tsi721_device *priv = mport->priv;
356 u32 offset;
357
358 offset = (((mport->sys_size) ? RIO_TT_CODE_16 : RIO_TT_CODE_8) << 18) |
359 (destid << 2);
360
361 dev_dbg(&priv->pdev->dev,
362 "Send Doorbell 0x%04x to destID 0x%x\n", data, destid);
363 iowrite16be(data, priv->odb_base + offset);
364
365 return 0;
366}
367
368/**
369 * tsi721_dbell_handler - Tsi721 doorbell interrupt handler
370 * @mport: RapidIO master port structure
371 *
372 * Handles inbound doorbell interrupts. Copies doorbell entry from an internal
373 * buffer into DB message FIFO and schedules deferred routine to process
374 * queued DBs.
375 */
376static int
377tsi721_dbell_handler(struct rio_mport *mport)
378{
379 struct tsi721_device *priv = mport->priv;
380 u32 regval;
381
382 /* Disable IDB interrupts */
383 regval = ioread32(priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
384 regval &= ~TSI721_SR_CHINT_IDBQRCV;
385 iowrite32(regval,
386 priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
387
388 schedule_work(&priv->idb_work);
389
390 return 0;
391}
392
393static void tsi721_db_dpc(struct work_struct *work)
394{
395 struct tsi721_device *priv = container_of(work, struct tsi721_device,
396 idb_work);
397 struct rio_mport *mport;
398 struct rio_dbell *dbell;
399 int found = 0;
400 u32 wr_ptr, rd_ptr;
401 u64 *idb_entry;
402 u32 regval;
403 union {
404 u64 msg;
405 u8 bytes[8];
406 } idb;
407
408 /*
409 * Process queued inbound doorbells
410 */
411 mport = priv->mport;
412
413 wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE));
414 rd_ptr = ioread32(priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
415
416 while (wr_ptr != rd_ptr) {
417 idb_entry = (u64 *)(priv->idb_base +
418 (TSI721_IDB_ENTRY_SIZE * rd_ptr));
419 rd_ptr++;
420 idb.msg = *idb_entry;
421 *idb_entry = 0;
422
423 /* Process one doorbell */
424 list_for_each_entry(dbell, &mport->dbells, node) {
425 if ((dbell->res->start <= DBELL_INF(idb.bytes)) &&
426 (dbell->res->end >= DBELL_INF(idb.bytes))) {
427 found = 1;
428 break;
429 }
430 }
431
432 if (found) {
433 dbell->dinb(mport, dbell->dev_id, DBELL_SID(idb.bytes),
434 DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
435 } else {
436 dev_dbg(&priv->pdev->dev,
437 "spurious inb doorbell, sid %2.2x tid %2.2x"
438 " info %4.4x\n", DBELL_SID(idb.bytes),
439 DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
440 }
441 }
442
443 iowrite32(rd_ptr & (IDB_QSIZE - 1),
444 priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
445
446 /* Re-enable IDB interrupts */
447 regval = ioread32(priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
448 regval |= TSI721_SR_CHINT_IDBQRCV;
449 iowrite32(regval,
450 priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
451}
452
453/**
454 * tsi721_irqhandler - Tsi721 interrupt handler
455 * @irq: Linux interrupt number
456 * @ptr: Pointer to interrupt-specific data (mport structure)
457 *
458 * Handles Tsi721 interrupts signaled using MSI and INTA. Checks reported
459 * interrupt events and calls an event-specific handler(s).
460 */
461static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
462{
463 struct rio_mport *mport = (struct rio_mport *)ptr;
464 struct tsi721_device *priv = mport->priv;
465 u32 dev_int;
466 u32 dev_ch_int;
467 u32 intval;
468 u32 ch_inte;
469
470 dev_int = ioread32(priv->regs + TSI721_DEV_INT);
471 if (!dev_int)
472 return IRQ_NONE;
473
474 dev_ch_int = ioread32(priv->regs + TSI721_DEV_CHAN_INT);
475
476 if (dev_int & TSI721_DEV_INT_SR2PC_CH) {
477 /* Service SR2PC Channel interrupts */
478 if (dev_ch_int & TSI721_INT_SR2PC_CHAN(IDB_QUEUE)) {
479 /* Service Inbound Doorbell interrupt */
480 intval = ioread32(priv->regs +
481 TSI721_SR_CHINT(IDB_QUEUE));
482 if (intval & TSI721_SR_CHINT_IDBQRCV)
483 tsi721_dbell_handler(mport);
484 else
485 dev_info(&priv->pdev->dev,
486 "Unsupported SR_CH_INT %x\n", intval);
487
488 /* Clear interrupts */
489 iowrite32(intval,
490 priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
491 ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
492 }
493 }
494
495 if (dev_int & TSI721_DEV_INT_SMSG_CH) {
496 int ch;
497
498 /*
499 * Service channel interrupts from Messaging Engine
500 */
501
502 if (dev_ch_int & TSI721_INT_IMSG_CHAN_M) { /* Inbound Msg */
503 /* Disable signaled OB MSG Channel interrupts */
504 ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
505 ch_inte &= ~(dev_ch_int & TSI721_INT_IMSG_CHAN_M);
506 iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
507
508 /*
509 * Process Inbound Message interrupt for each MBOX
510 */
511 for (ch = 4; ch < RIO_MAX_MBOX + 4; ch++) {
512 if (!(dev_ch_int & TSI721_INT_IMSG_CHAN(ch)))
513 continue;
514 tsi721_imsg_handler(priv, ch);
515 }
516 }
517
518 if (dev_ch_int & TSI721_INT_OMSG_CHAN_M) { /* Outbound Msg */
519 /* Disable signaled OB MSG Channel interrupts */
520 ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
521 ch_inte &= ~(dev_ch_int & TSI721_INT_OMSG_CHAN_M);
522 iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
523
524 /*
525 * Process Outbound Message interrupts for each MBOX
526 */
527
528 for (ch = 0; ch < RIO_MAX_MBOX; ch++) {
529 if (!(dev_ch_int & TSI721_INT_OMSG_CHAN(ch)))
530 continue;
531 tsi721_omsg_handler(priv, ch);
532 }
533 }
534 }
535
536 if (dev_int & TSI721_DEV_INT_SRIO) {
537 /* Service SRIO MAC interrupts */
538 intval = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
539 if (intval & TSI721_RIO_EM_INT_STAT_PW_RX)
540 tsi721_pw_handler(mport);
541 }
542
543 return IRQ_HANDLED;
544}
545
546static void tsi721_interrupts_init(struct tsi721_device *priv)
547{
548 u32 intr;
549
550 /* Enable IDB interrupts */
551 iowrite32(TSI721_SR_CHINT_ALL,
552 priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
553 iowrite32(TSI721_SR_CHINT_IDBQRCV,
554 priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
555 iowrite32(TSI721_INT_SR2PC_CHAN(IDB_QUEUE),
556 priv->regs + TSI721_DEV_CHAN_INTE);
557
558 /* Enable SRIO MAC interrupts */
559 iowrite32(TSI721_RIO_EM_DEV_INT_EN_INT,
560 priv->regs + TSI721_RIO_EM_DEV_INT_EN);
561
562 if (priv->flags & TSI721_USING_MSIX)
563 intr = TSI721_DEV_INT_SRIO;
564 else
565 intr = TSI721_DEV_INT_SR2PC_CH | TSI721_DEV_INT_SRIO |
566 TSI721_DEV_INT_SMSG_CH;
567
568 iowrite32(intr, priv->regs + TSI721_DEV_INTE);
569 ioread32(priv->regs + TSI721_DEV_INTE);
570}
571
572#ifdef CONFIG_PCI_MSI
573/**
574 * tsi721_omsg_msix - MSI-X interrupt handler for outbound messaging
575 * @irq: Linux interrupt number
576 * @ptr: Pointer to interrupt-specific data (mport structure)
577 *
578 * Handles outbound messaging interrupts signaled using MSI-X.
579 */
580static irqreturn_t tsi721_omsg_msix(int irq, void *ptr)
581{
582 struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
583 int mbox;
584
585 mbox = (irq - priv->msix[TSI721_VECT_OMB0_DONE].vector) % RIO_MAX_MBOX;
586 tsi721_omsg_handler(priv, mbox);
587 return IRQ_HANDLED;
588}
589
590/**
591 * tsi721_imsg_msix - MSI-X interrupt handler for inbound messaging
592 * @irq: Linux interrupt number
593 * @ptr: Pointer to interrupt-specific data (mport structure)
594 *
595 * Handles inbound messaging interrupts signaled using MSI-X.
596 */
597static irqreturn_t tsi721_imsg_msix(int irq, void *ptr)
598{
599 struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
600 int mbox;
601
602 mbox = (irq - priv->msix[TSI721_VECT_IMB0_RCV].vector) % RIO_MAX_MBOX;
603 tsi721_imsg_handler(priv, mbox + 4);
604 return IRQ_HANDLED;
605}
606
607/**
608 * tsi721_srio_msix - Tsi721 MSI-X SRIO MAC interrupt handler
609 * @irq: Linux interrupt number
610 * @ptr: Pointer to interrupt-specific data (mport structure)
611 *
612 * Handles Tsi721 interrupts from SRIO MAC.
613 */
614static irqreturn_t tsi721_srio_msix(int irq, void *ptr)
615{
616 struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
617 u32 srio_int;
618
619 /* Service SRIO MAC interrupts */
620 srio_int = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
621 if (srio_int & TSI721_RIO_EM_INT_STAT_PW_RX)
622 tsi721_pw_handler((struct rio_mport *)ptr);
623
624 return IRQ_HANDLED;
625}
626
627/**
628 * tsi721_sr2pc_ch_msix - Tsi721 MSI-X SR2PC Channel interrupt handler
629 * @irq: Linux interrupt number
630 * @ptr: Pointer to interrupt-specific data (mport structure)
631 *
632 * Handles Tsi721 interrupts from SR2PC Channel.
633 * NOTE: At this moment services only one SR2PC channel associated with inbound
634 * doorbells.
635 */
636static irqreturn_t tsi721_sr2pc_ch_msix(int irq, void *ptr)
637{
638 struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
639 u32 sr_ch_int;
640
641 /* Service Inbound DB interrupt from SR2PC channel */
642 sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
643 if (sr_ch_int & TSI721_SR_CHINT_IDBQRCV)
644 tsi721_dbell_handler((struct rio_mport *)ptr);
645
646 /* Clear interrupts */
647 iowrite32(sr_ch_int, priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
648 /* Read back to ensure that interrupt was cleared */
649 sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
650
651 return IRQ_HANDLED;
652}
653
654/**
655 * tsi721_request_msix - register interrupt service for MSI-X mode.
656 * @mport: RapidIO master port structure
657 *
658 * Registers MSI-X interrupt service routines for interrupts that are active
659 * immediately after mport initialization. Messaging interrupt service routines
660 * should be registered during corresponding open requests.
661 */
662static int tsi721_request_msix(struct rio_mport *mport)
663{
664 struct tsi721_device *priv = mport->priv;
665 int err = 0;
666
667 err = request_irq(priv->msix[TSI721_VECT_IDB].vector,
668 tsi721_sr2pc_ch_msix, 0,
669 priv->msix[TSI721_VECT_IDB].irq_name, (void *)mport);
670 if (err)
671 goto out;
672
673 err = request_irq(priv->msix[TSI721_VECT_PWRX].vector,
674 tsi721_srio_msix, 0,
675 priv->msix[TSI721_VECT_PWRX].irq_name, (void *)mport);
676 if (err)
677 free_irq(
678 priv->msix[TSI721_VECT_IDB].vector,
679 (void *)mport);
680out:
681 return err;
682}
683
684/**
685 * tsi721_enable_msix - Attempts to enable MSI-X support for Tsi721.
686 * @priv: pointer to tsi721 private data
687 *
688 * Configures MSI-X support for Tsi721. Supports only an exact number
689 * of requested vectors.
690 */
691static int tsi721_enable_msix(struct tsi721_device *priv)
692{
693 struct msix_entry entries[TSI721_VECT_MAX];
694 int err;
695 int i;
696
697 entries[TSI721_VECT_IDB].entry = TSI721_MSIX_SR2PC_IDBQ_RCV(IDB_QUEUE);
698 entries[TSI721_VECT_PWRX].entry = TSI721_MSIX_SRIO_MAC_INT;
699
700 /*
701 * Initialize MSI-X entries for Messaging Engine:
702 * this driver supports four RIO mailboxes (inbound and outbound)
703 * NOTE: Inbound message MBOX 0...4 use IB channels 4...7. Therefore
704 * offset +4 is added to IB MBOX number.
705 */
706 for (i = 0; i < RIO_MAX_MBOX; i++) {
707 entries[TSI721_VECT_IMB0_RCV + i].entry =
708 TSI721_MSIX_IMSG_DQ_RCV(i + 4);
709 entries[TSI721_VECT_IMB0_INT + i].entry =
710 TSI721_MSIX_IMSG_INT(i + 4);
711 entries[TSI721_VECT_OMB0_DONE + i].entry =
712 TSI721_MSIX_OMSG_DONE(i);
713 entries[TSI721_VECT_OMB0_INT + i].entry =
714 TSI721_MSIX_OMSG_INT(i);
715 }
716
717 err = pci_enable_msix(priv->pdev, entries, ARRAY_SIZE(entries));
718 if (err) {
719 if (err > 0)
720 dev_info(&priv->pdev->dev,
721 "Only %d MSI-X vectors available, "
722 "not using MSI-X\n", err);
723 return err;
724 }
725
726 /*
727 * Copy MSI-X vector information into tsi721 private structure
728 */
729 priv->msix[TSI721_VECT_IDB].vector = entries[TSI721_VECT_IDB].vector;
730 snprintf(priv->msix[TSI721_VECT_IDB].irq_name, IRQ_DEVICE_NAME_MAX,
731 DRV_NAME "-idb@pci:%s", pci_name(priv->pdev));
732 priv->msix[TSI721_VECT_PWRX].vector = entries[TSI721_VECT_PWRX].vector;
733 snprintf(priv->msix[TSI721_VECT_PWRX].irq_name, IRQ_DEVICE_NAME_MAX,
734 DRV_NAME "-pwrx@pci:%s", pci_name(priv->pdev));
735
736 for (i = 0; i < RIO_MAX_MBOX; i++) {
737 priv->msix[TSI721_VECT_IMB0_RCV + i].vector =
738 entries[TSI721_VECT_IMB0_RCV + i].vector;
739 snprintf(priv->msix[TSI721_VECT_IMB0_RCV + i].irq_name,
740 IRQ_DEVICE_NAME_MAX, DRV_NAME "-imbr%d@pci:%s",
741 i, pci_name(priv->pdev));
742
743 priv->msix[TSI721_VECT_IMB0_INT + i].vector =
744 entries[TSI721_VECT_IMB0_INT + i].vector;
745 snprintf(priv->msix[TSI721_VECT_IMB0_INT + i].irq_name,
746 IRQ_DEVICE_NAME_MAX, DRV_NAME "-imbi%d@pci:%s",
747 i, pci_name(priv->pdev));
748
749 priv->msix[TSI721_VECT_OMB0_DONE + i].vector =
750 entries[TSI721_VECT_OMB0_DONE + i].vector;
751 snprintf(priv->msix[TSI721_VECT_OMB0_DONE + i].irq_name,
752 IRQ_DEVICE_NAME_MAX, DRV_NAME "-ombd%d@pci:%s",
753 i, pci_name(priv->pdev));
754
755 priv->msix[TSI721_VECT_OMB0_INT + i].vector =
756 entries[TSI721_VECT_OMB0_INT + i].vector;
757 snprintf(priv->msix[TSI721_VECT_OMB0_INT + i].irq_name,
758 IRQ_DEVICE_NAME_MAX, DRV_NAME "-ombi%d@pci:%s",
759 i, pci_name(priv->pdev));
760 }
761
762 return 0;
763}
764#endif /* CONFIG_PCI_MSI */
765
766static int tsi721_request_irq(struct rio_mport *mport)
767{
768 struct tsi721_device *priv = mport->priv;
769 int err;
770
771#ifdef CONFIG_PCI_MSI
772 if (priv->flags & TSI721_USING_MSIX)
773 err = tsi721_request_msix(mport);
774 else
775#endif
776 err = request_irq(priv->pdev->irq, tsi721_irqhandler,
777 (priv->flags & TSI721_USING_MSI) ? 0 : IRQF_SHARED,
778 DRV_NAME, (void *)mport);
779
780 if (err)
781 dev_err(&priv->pdev->dev,
782 "Unable to allocate interrupt, Error: %d\n", err);
783
784 return err;
785}
786
787/**
788 * tsi721_init_pc2sr_mapping - initializes outbound (PCIe->SRIO)
789 * translation regions.
790 * @priv: pointer to tsi721 private data
791 *
792 * Disables SREP translation regions.
793 */
794static void tsi721_init_pc2sr_mapping(struct tsi721_device *priv)
795{
796 int i;
797
798 /* Disable all PC2SR translation windows */
799 for (i = 0; i < TSI721_OBWIN_NUM; i++)
800 iowrite32(0, priv->regs + TSI721_OBWINLB(i));
801}
802
803/**
804 * tsi721_init_sr2pc_mapping - initializes inbound (SRIO->PCIe)
805 * translation regions.
806 * @priv: pointer to tsi721 private data
807 *
808 * Disables inbound windows.
809 */
810static void tsi721_init_sr2pc_mapping(struct tsi721_device *priv)
811{
812 int i;
813
814 /* Disable all SR2PC inbound windows */
815 for (i = 0; i < TSI721_IBWIN_NUM; i++)
816 iowrite32(0, priv->regs + TSI721_IBWINLB(i));
817}
818
819/**
820 * tsi721_port_write_init - Inbound port write interface init
821 * @priv: pointer to tsi721 private data
822 *
823 * Initializes inbound port write handler.
824 * Returns %0 on success or %-ENOMEM on failure.
825 */
826static int tsi721_port_write_init(struct tsi721_device *priv)
827{
828 priv->pw_discard_count = 0;
829 INIT_WORK(&priv->pw_work, tsi721_pw_dpc);
830 spin_lock_init(&priv->pw_fifo_lock);
831 if (kfifo_alloc(&priv->pw_fifo,
832 TSI721_RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
833 dev_err(&priv->pdev->dev, "PW FIFO allocation failed\n");
834 return -ENOMEM;
835 }
836
837 /* Use reliable port-write capture mode */
838 iowrite32(TSI721_RIO_PW_CTL_PWC_REL, priv->regs + TSI721_RIO_PW_CTL);
839 return 0;
840}
841
842static int tsi721_doorbell_init(struct tsi721_device *priv)
843{
844 /* Outbound Doorbells do not require any setup.
845 * Tsi721 uses dedicated PCI BAR1 to generate doorbells.
846 * That BAR1 was mapped during the probe routine.
847 */
848
849 /* Initialize Inbound Doorbell processing DPC and queue */
850 priv->db_discard_count = 0;
851 INIT_WORK(&priv->idb_work, tsi721_db_dpc);
852
853 /* Allocate buffer for inbound doorbells queue */
854 priv->idb_base = dma_alloc_coherent(&priv->pdev->dev,
855 IDB_QSIZE * TSI721_IDB_ENTRY_SIZE,
856 &priv->idb_dma, GFP_KERNEL);
857 if (!priv->idb_base)
858 return -ENOMEM;
859
860 memset(priv->idb_base, 0, IDB_QSIZE * TSI721_IDB_ENTRY_SIZE);
861
862 dev_dbg(&priv->pdev->dev, "Allocated IDB buffer @ %p (phys = %llx)\n",
863 priv->idb_base, (unsigned long long)priv->idb_dma);
864
865 iowrite32(TSI721_IDQ_SIZE_VAL(IDB_QSIZE),
866 priv->regs + TSI721_IDQ_SIZE(IDB_QUEUE));
867 iowrite32(((u64)priv->idb_dma >> 32),
868 priv->regs + TSI721_IDQ_BASEU(IDB_QUEUE));
869 iowrite32(((u64)priv->idb_dma & TSI721_IDQ_BASEL_ADDR),
870 priv->regs + TSI721_IDQ_BASEL(IDB_QUEUE));
871 /* Enable accepting all inbound doorbells */
872 iowrite32(0, priv->regs + TSI721_IDQ_MASK(IDB_QUEUE));
873
874 iowrite32(TSI721_IDQ_INIT, priv->regs + TSI721_IDQ_CTL(IDB_QUEUE));
875
876 iowrite32(0, priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
877
878 return 0;
879}
880
881static void tsi721_doorbell_free(struct tsi721_device *priv)
882{
883 if (priv->idb_base == NULL)
884 return;
885
886 /* Free buffer allocated for inbound doorbell queue */
887 dma_free_coherent(&priv->pdev->dev, IDB_QSIZE * TSI721_IDB_ENTRY_SIZE,
888 priv->idb_base, priv->idb_dma);
889 priv->idb_base = NULL;
890}
891
892static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
893{
894 struct tsi721_dma_desc *bd_ptr;
895 u64 *sts_ptr;
896 dma_addr_t bd_phys, sts_phys;
897 int sts_size;
898 int bd_num = priv->bdma[chnum].bd_num;
899
900 dev_dbg(&priv->pdev->dev, "Init Block DMA Engine, CH%d\n", chnum);
901
902 /*
903 * Initialize DMA channel for maintenance requests
904 */
905
906 /* Allocate space for DMA descriptors */
907 bd_ptr = dma_alloc_coherent(&priv->pdev->dev,
908 bd_num * sizeof(struct tsi721_dma_desc),
909 &bd_phys, GFP_KERNEL);
910 if (!bd_ptr)
911 return -ENOMEM;
912
913 priv->bdma[chnum].bd_phys = bd_phys;
914 priv->bdma[chnum].bd_base = bd_ptr;
915
916 memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
917
918 dev_dbg(&priv->pdev->dev, "DMA descriptors @ %p (phys = %llx)\n",
919 bd_ptr, (unsigned long long)bd_phys);
920
921 /* Allocate space for descriptor status FIFO */
922 sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
923 bd_num : TSI721_DMA_MINSTSSZ;
924 sts_size = roundup_pow_of_two(sts_size);
925 sts_ptr = dma_alloc_coherent(&priv->pdev->dev,
926 sts_size * sizeof(struct tsi721_dma_sts),
927 &sts_phys, GFP_KERNEL);
928 if (!sts_ptr) {
929 /* Free space allocated for DMA descriptors */
930 dma_free_coherent(&priv->pdev->dev,
931 bd_num * sizeof(struct tsi721_dma_desc),
932 bd_ptr, bd_phys);
933 priv->bdma[chnum].bd_base = NULL;
934 return -ENOMEM;
935 }
936
937 priv->bdma[chnum].sts_phys = sts_phys;
938 priv->bdma[chnum].sts_base = sts_ptr;
939 priv->bdma[chnum].sts_size = sts_size;
940
941 memset(sts_ptr, 0, sts_size);
942
943 dev_dbg(&priv->pdev->dev,
944 "desc status FIFO @ %p (phys = %llx) size=0x%x\n",
945 sts_ptr, (unsigned long long)sts_phys, sts_size);
946
947 /* Initialize DMA descriptors ring */
948 bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
949 bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
950 TSI721_DMAC_DPTRL_MASK);
951 bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
952
953 /* Setup DMA descriptor pointers */
954 iowrite32(((u64)bd_phys >> 32),
955 priv->regs + TSI721_DMAC_DPTRH(chnum));
956 iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
957 priv->regs + TSI721_DMAC_DPTRL(chnum));
958
959 /* Setup descriptor status FIFO */
960 iowrite32(((u64)sts_phys >> 32),
961 priv->regs + TSI721_DMAC_DSBH(chnum));
962 iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
963 priv->regs + TSI721_DMAC_DSBL(chnum));
964 iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
965 priv->regs + TSI721_DMAC_DSSZ(chnum));
966
967 /* Clear interrupt bits */
968 iowrite32(TSI721_DMAC_INT_ALL,
969 priv->regs + TSI721_DMAC_INT(chnum));
970
971 ioread32(priv->regs + TSI721_DMAC_INT(chnum));
972
973 /* Toggle DMA channel initialization */
974 iowrite32(TSI721_DMAC_CTL_INIT, priv->regs + TSI721_DMAC_CTL(chnum));
975 ioread32(priv->regs + TSI721_DMAC_CTL(chnum));
976 udelay(10);
977
978 return 0;
979}
980
981static int tsi721_bdma_ch_free(struct tsi721_device *priv, int chnum)
982{
983 u32 ch_stat;
984
985 if (priv->bdma[chnum].bd_base == NULL)
986 return 0;
987
988 /* Check if DMA channel still running */
989 ch_stat = ioread32(priv->regs + TSI721_DMAC_STS(chnum));
990 if (ch_stat & TSI721_DMAC_STS_RUN)
991 return -EFAULT;
992
993 /* Put DMA channel into init state */
994 iowrite32(TSI721_DMAC_CTL_INIT,
995 priv->regs + TSI721_DMAC_CTL(chnum));
996
997 /* Free space allocated for DMA descriptors */
998 dma_free_coherent(&priv->pdev->dev,
999 priv->bdma[chnum].bd_num * sizeof(struct tsi721_dma_desc),
1000 priv->bdma[chnum].bd_base, priv->bdma[chnum].bd_phys);
1001 priv->bdma[chnum].bd_base = NULL;
1002
1003 /* Free space allocated for status FIFO */
1004 dma_free_coherent(&priv->pdev->dev,
1005 priv->bdma[chnum].sts_size * sizeof(struct tsi721_dma_sts),
1006 priv->bdma[chnum].sts_base, priv->bdma[chnum].sts_phys);
1007 priv->bdma[chnum].sts_base = NULL;
1008 return 0;
1009}
1010
1011static int tsi721_bdma_init(struct tsi721_device *priv)
1012{
1013 /* Initialize BDMA channel allocated for RapidIO maintenance read/write
1014 * request generation
1015 */
1016 priv->bdma[TSI721_DMACH_MAINT].bd_num = 2;
1017 if (tsi721_bdma_ch_init(priv, TSI721_DMACH_MAINT)) {
1018 dev_err(&priv->pdev->dev, "Unable to initialize maintenance DMA"
1019 " channel %d, aborting\n", TSI721_DMACH_MAINT);
1020 return -ENOMEM;
1021 }
1022
1023 return 0;
1024}
1025
1026static void tsi721_bdma_free(struct tsi721_device *priv)
1027{
1028 tsi721_bdma_ch_free(priv, TSI721_DMACH_MAINT);
1029}
1030
1031/* Enable Inbound Messaging Interrupts */
1032static void
1033tsi721_imsg_interrupt_enable(struct tsi721_device *priv, int ch,
1034 u32 inte_mask)
1035{
1036 u32 rval;
1037
1038 if (!inte_mask)
1039 return;
1040
1041 /* Clear pending Inbound Messaging interrupts */
1042 iowrite32(inte_mask, priv->regs + TSI721_IBDMAC_INT(ch));
1043
1044 /* Enable Inbound Messaging interrupts */
1045 rval = ioread32(priv->regs + TSI721_IBDMAC_INTE(ch));
1046 iowrite32(rval | inte_mask, priv->regs + TSI721_IBDMAC_INTE(ch));
1047
1048 if (priv->flags & TSI721_USING_MSIX)
1049 return; /* Finished if we are in MSI-X mode */
1050
1051 /*
1052 * For MSI and INTA interrupt signalling we need to enable next levels
1053 */
1054
1055 /* Enable Device Channel Interrupt */
1056 rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
1057 iowrite32(rval | TSI721_INT_IMSG_CHAN(ch),
1058 priv->regs + TSI721_DEV_CHAN_INTE);
1059}
1060
1061/* Disable Inbound Messaging Interrupts */
1062static void
1063tsi721_imsg_interrupt_disable(struct tsi721_device *priv, int ch,
1064 u32 inte_mask)
1065{
1066 u32 rval;
1067
1068 if (!inte_mask)
1069 return;
1070
1071 /* Clear pending Inbound Messaging interrupts */
1072 iowrite32(inte_mask, priv->regs + TSI721_IBDMAC_INT(ch));
1073
1074 /* Disable Inbound Messaging interrupts */
1075 rval = ioread32(priv->regs + TSI721_IBDMAC_INTE(ch));
1076 rval &= ~inte_mask;
1077 iowrite32(rval, priv->regs + TSI721_IBDMAC_INTE(ch));
1078
1079 if (priv->flags & TSI721_USING_MSIX)
1080 return; /* Finished if we are in MSI-X mode */
1081
1082 /*
1083 * For MSI and INTA interrupt signalling we need to disable next levels
1084 */
1085
1086 /* Disable Device Channel Interrupt */
1087 rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
1088 rval &= ~TSI721_INT_IMSG_CHAN(ch);
1089 iowrite32(rval, priv->regs + TSI721_DEV_CHAN_INTE);
1090}
1091
1092/* Enable Outbound Messaging interrupts */
1093static void
1094tsi721_omsg_interrupt_enable(struct tsi721_device *priv, int ch,
1095 u32 inte_mask)
1096{
1097 u32 rval;
1098
1099 if (!inte_mask)
1100 return;
1101
1102 /* Clear pending Outbound Messaging interrupts */
1103 iowrite32(inte_mask, priv->regs + TSI721_OBDMAC_INT(ch));
1104
1105 /* Enable Outbound Messaging channel interrupts */
1106 rval = ioread32(priv->regs + TSI721_OBDMAC_INTE(ch));
1107 iowrite32(rval | inte_mask, priv->regs + TSI721_OBDMAC_INTE(ch));
1108
1109 if (priv->flags & TSI721_USING_MSIX)
1110 return; /* Finished if we are in MSI-X mode */
1111
1112 /*
1113 * For MSI and INTA interrupt signalling we need to enable next levels
1114 */
1115
1116 /* Enable Device Channel Interrupt */
1117 rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
1118 iowrite32(rval | TSI721_INT_OMSG_CHAN(ch),
1119 priv->regs + TSI721_DEV_CHAN_INTE);
1120}
1121
1122/* Disable Outbound Messaging interrupts */
1123static void
1124tsi721_omsg_interrupt_disable(struct tsi721_device *priv, int ch,
1125 u32 inte_mask)
1126{
1127 u32 rval;
1128
1129 if (!inte_mask)
1130 return;
1131
1132 /* Clear pending Outbound Messaging interrupts */
1133 iowrite32(inte_mask, priv->regs + TSI721_OBDMAC_INT(ch));
1134
1135 /* Disable Outbound Messaging interrupts */
1136 rval = ioread32(priv->regs + TSI721_OBDMAC_INTE(ch));
1137 rval &= ~inte_mask;
1138 iowrite32(rval, priv->regs + TSI721_OBDMAC_INTE(ch));
1139
1140 if (priv->flags & TSI721_USING_MSIX)
1141 return; /* Finished if we are in MSI-X mode */
1142
1143 /*
1144 * For MSI and INTA interrupt signalling we need to disable next levels
1145 */
1146
1147 /* Disable Device Channel Interrupt */
1148 rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
1149 rval &= ~TSI721_INT_OMSG_CHAN(ch);
1150 iowrite32(rval, priv->regs + TSI721_DEV_CHAN_INTE);
1151}
1152
1153/**
1154 * tsi721_add_outb_message - Add message to the Tsi721 outbound message queue
1155 * @mport: Master port with outbound message queue
1156 * @rdev: Target of outbound message
1157 * @mbox: Outbound mailbox
1158 * @buffer: Message to add to outbound queue
1159 * @len: Length of message
1160 */
1161static int
1162tsi721_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
1163 void *buffer, size_t len)
1164{
1165 struct tsi721_device *priv = mport->priv;
1166 struct tsi721_omsg_desc *desc;
1167 u32 tx_slot;
1168
1169 if (!priv->omsg_init[mbox] ||
1170 len > TSI721_MSG_MAX_SIZE || len < 8)
1171 return -EINVAL;
1172
1173 tx_slot = priv->omsg_ring[mbox].tx_slot;
1174
1175 /* Copy copy message into transfer buffer */
1176 memcpy(priv->omsg_ring[mbox].omq_base[tx_slot], buffer, len);
1177
1178 if (len & 0x7)
1179 len += 8;
1180
1181 /* Build descriptor associated with buffer */
1182 desc = priv->omsg_ring[mbox].omd_base;
1183 desc[tx_slot].type_id = cpu_to_le32((DTYPE4 << 29) | rdev->destid);
1184 if (tx_slot % 4 == 0)
1185 desc[tx_slot].type_id |= cpu_to_le32(TSI721_OMD_IOF);
1186
1187 desc[tx_slot].msg_info =
1188 cpu_to_le32((mport->sys_size << 26) | (mbox << 22) |
1189 (0xe << 12) | (len & 0xff8));
1190 desc[tx_slot].bufptr_lo =
1191 cpu_to_le32((u64)priv->omsg_ring[mbox].omq_phys[tx_slot] &
1192 0xffffffff);
1193 desc[tx_slot].bufptr_hi =
1194 cpu_to_le32((u64)priv->omsg_ring[mbox].omq_phys[tx_slot] >> 32);
1195
1196 priv->omsg_ring[mbox].wr_count++;
1197
1198 /* Go to next descriptor */
1199 if (++priv->omsg_ring[mbox].tx_slot == priv->omsg_ring[mbox].size) {
1200 priv->omsg_ring[mbox].tx_slot = 0;
1201 /* Move through the ring link descriptor at the end */
1202 priv->omsg_ring[mbox].wr_count++;
1203 }
1204
1205 mb();
1206
1207 /* Set new write count value */
1208 iowrite32(priv->omsg_ring[mbox].wr_count,
1209 priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
1210 ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
1211
1212 return 0;
1213}
1214
1215/**
1216 * tsi721_omsg_handler - Outbound Message Interrupt Handler
1217 * @priv: pointer to tsi721 private data
1218 * @ch: number of OB MSG channel to service
1219 *
1220 * Services channel interrupts from outbound messaging engine.
1221 */
1222static void tsi721_omsg_handler(struct tsi721_device *priv, int ch)
1223{
1224 u32 omsg_int;
1225
1226 spin_lock(&priv->omsg_ring[ch].lock);
1227
1228 omsg_int = ioread32(priv->regs + TSI721_OBDMAC_INT(ch));
1229
1230 if (omsg_int & TSI721_OBDMAC_INT_ST_FULL)
1231 dev_info(&priv->pdev->dev,
1232 "OB MBOX%d: Status FIFO is full\n", ch);
1233
1234 if (omsg_int & (TSI721_OBDMAC_INT_DONE | TSI721_OBDMAC_INT_IOF_DONE)) {
1235 u32 srd_ptr;
1236 u64 *sts_ptr, last_ptr = 0, prev_ptr = 0;
1237 int i, j;
1238 u32 tx_slot;
1239
1240 /*
1241 * Find last successfully processed descriptor
1242 */
1243
1244 /* Check and clear descriptor status FIFO entries */
1245 srd_ptr = priv->omsg_ring[ch].sts_rdptr;
1246 sts_ptr = priv->omsg_ring[ch].sts_base;
1247 j = srd_ptr * 8;
1248 while (sts_ptr[j]) {
1249 for (i = 0; i < 8 && sts_ptr[j]; i++, j++) {
1250 prev_ptr = last_ptr;
1251 last_ptr = le64_to_cpu(sts_ptr[j]);
1252 sts_ptr[j] = 0;
1253 }
1254
1255 ++srd_ptr;
1256 srd_ptr %= priv->omsg_ring[ch].sts_size;
1257 j = srd_ptr * 8;
1258 }
1259
1260 if (last_ptr == 0)
1261 goto no_sts_update;
1262
1263 priv->omsg_ring[ch].sts_rdptr = srd_ptr;
1264 iowrite32(srd_ptr, priv->regs + TSI721_OBDMAC_DSRP(ch));
1265
1266 if (!priv->mport->outb_msg[ch].mcback)
1267 goto no_sts_update;
1268
1269 /* Inform upper layer about transfer completion */
1270
1271 tx_slot = (last_ptr - (u64)priv->omsg_ring[ch].omd_phys)/
1272 sizeof(struct tsi721_omsg_desc);
1273
1274 /*
1275 * Check if this is a Link Descriptor (LD).
1276 * If yes, ignore LD and use descriptor processed
1277 * before LD.
1278 */
1279 if (tx_slot == priv->omsg_ring[ch].size) {
1280 if (prev_ptr)
1281 tx_slot = (prev_ptr -
1282 (u64)priv->omsg_ring[ch].omd_phys)/
1283 sizeof(struct tsi721_omsg_desc);
1284 else
1285 goto no_sts_update;
1286 }
1287
1288 /* Move slot index to the next message to be sent */
1289 ++tx_slot;
1290 if (tx_slot == priv->omsg_ring[ch].size)
1291 tx_slot = 0;
1292 BUG_ON(tx_slot >= priv->omsg_ring[ch].size);
1293 priv->mport->outb_msg[ch].mcback(priv->mport,
1294 priv->omsg_ring[ch].dev_id, ch,
1295 tx_slot);
1296 }
1297
1298no_sts_update:
1299
1300 if (omsg_int & TSI721_OBDMAC_INT_ERROR) {
1301 /*
1302 * Outbound message operation aborted due to error,
1303 * reinitialize OB MSG channel
1304 */
1305
1306 dev_dbg(&priv->pdev->dev, "OB MSG ABORT ch_stat=%x\n",
1307 ioread32(priv->regs + TSI721_OBDMAC_STS(ch)));
1308
1309 iowrite32(TSI721_OBDMAC_INT_ERROR,
1310 priv->regs + TSI721_OBDMAC_INT(ch));
1311 iowrite32(TSI721_OBDMAC_CTL_INIT,
1312 priv->regs + TSI721_OBDMAC_CTL(ch));
1313 ioread32(priv->regs + TSI721_OBDMAC_CTL(ch));
1314
1315 /* Inform upper level to clear all pending tx slots */
1316 if (priv->mport->outb_msg[ch].mcback)
1317 priv->mport->outb_msg[ch].mcback(priv->mport,
1318 priv->omsg_ring[ch].dev_id, ch,
1319 priv->omsg_ring[ch].tx_slot);
1320 /* Synch tx_slot tracking */
1321 iowrite32(priv->omsg_ring[ch].tx_slot,
1322 priv->regs + TSI721_OBDMAC_DRDCNT(ch));
1323 ioread32(priv->regs + TSI721_OBDMAC_DRDCNT(ch));
1324 priv->omsg_ring[ch].wr_count = priv->omsg_ring[ch].tx_slot;
1325 priv->omsg_ring[ch].sts_rdptr = 0;
1326 }
1327
1328 /* Clear channel interrupts */
1329 iowrite32(omsg_int, priv->regs + TSI721_OBDMAC_INT(ch));
1330
1331 if (!(priv->flags & TSI721_USING_MSIX)) {
1332 u32 ch_inte;
1333
1334 /* Re-enable channel interrupts */
1335 ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
1336 ch_inte |= TSI721_INT_OMSG_CHAN(ch);
1337 iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
1338 }
1339
1340 spin_unlock(&priv->omsg_ring[ch].lock);
1341}
1342
1343/**
1344 * tsi721_open_outb_mbox - Initialize Tsi721 outbound mailbox
1345 * @mport: Master port implementing Outbound Messaging Engine
1346 * @dev_id: Device specific pointer to pass on event
1347 * @mbox: Mailbox to open
1348 * @entries: Number of entries in the outbound mailbox ring
1349 */
1350static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id,
1351 int mbox, int entries)
1352{
1353 struct tsi721_device *priv = mport->priv;
1354 struct tsi721_omsg_desc *bd_ptr;
1355 int i, rc = 0;
1356
1357 if ((entries < TSI721_OMSGD_MIN_RING_SIZE) ||
1358 (entries > (TSI721_OMSGD_RING_SIZE)) ||
1359 (!is_power_of_2(entries)) || mbox >= RIO_MAX_MBOX) {
1360 rc = -EINVAL;
1361 goto out;
1362 }
1363
1364 priv->omsg_ring[mbox].dev_id = dev_id;
1365 priv->omsg_ring[mbox].size = entries;
1366 priv->omsg_ring[mbox].sts_rdptr = 0;
1367 spin_lock_init(&priv->omsg_ring[mbox].lock);
1368
1369 /* Outbound Msg Buffer allocation based on
1370 the number of maximum descriptor entries */
1371 for (i = 0; i < entries; i++) {
1372 priv->omsg_ring[mbox].omq_base[i] =
1373 dma_alloc_coherent(
1374 &priv->pdev->dev, TSI721_MSG_BUFFER_SIZE,
1375 &priv->omsg_ring[mbox].omq_phys[i],
1376 GFP_KERNEL);
1377 if (priv->omsg_ring[mbox].omq_base[i] == NULL) {
1378 dev_dbg(&priv->pdev->dev,
1379 "Unable to allocate OB MSG data buffer for"
1380 " MBOX%d\n", mbox);
1381 rc = -ENOMEM;
1382 goto out_buf;
1383 }
1384 }
1385
1386 /* Outbound message descriptor allocation */
1387 priv->omsg_ring[mbox].omd_base = dma_alloc_coherent(
1388 &priv->pdev->dev,
1389 (entries + 1) * sizeof(struct tsi721_omsg_desc),
1390 &priv->omsg_ring[mbox].omd_phys, GFP_KERNEL);
1391 if (priv->omsg_ring[mbox].omd_base == NULL) {
1392 dev_dbg(&priv->pdev->dev,
1393 "Unable to allocate OB MSG descriptor memory "
1394 "for MBOX%d\n", mbox);
1395 rc = -ENOMEM;
1396 goto out_buf;
1397 }
1398
1399 priv->omsg_ring[mbox].tx_slot = 0;
1400
1401 /* Outbound message descriptor status FIFO allocation */
1402 priv->omsg_ring[mbox].sts_size = roundup_pow_of_two(entries + 1);
1403 priv->omsg_ring[mbox].sts_base = dma_alloc_coherent(&priv->pdev->dev,
1404 priv->omsg_ring[mbox].sts_size *
1405 sizeof(struct tsi721_dma_sts),
1406 &priv->omsg_ring[mbox].sts_phys, GFP_KERNEL);
1407 if (priv->omsg_ring[mbox].sts_base == NULL) {
1408 dev_dbg(&priv->pdev->dev,
1409 "Unable to allocate OB MSG descriptor status FIFO "
1410 "for MBOX%d\n", mbox);
1411 rc = -ENOMEM;
1412 goto out_desc;
1413 }
1414
1415 memset(priv->omsg_ring[mbox].sts_base, 0,
1416 entries * sizeof(struct tsi721_dma_sts));
1417
1418 /*
1419 * Configure Outbound Messaging Engine
1420 */
1421
1422 /* Setup Outbound Message descriptor pointer */
1423 iowrite32(((u64)priv->omsg_ring[mbox].omd_phys >> 32),
1424 priv->regs + TSI721_OBDMAC_DPTRH(mbox));
1425 iowrite32(((u64)priv->omsg_ring[mbox].omd_phys &
1426 TSI721_OBDMAC_DPTRL_MASK),
1427 priv->regs + TSI721_OBDMAC_DPTRL(mbox));
1428
1429 /* Setup Outbound Message descriptor status FIFO */
1430 iowrite32(((u64)priv->omsg_ring[mbox].sts_phys >> 32),
1431 priv->regs + TSI721_OBDMAC_DSBH(mbox));
1432 iowrite32(((u64)priv->omsg_ring[mbox].sts_phys &
1433 TSI721_OBDMAC_DSBL_MASK),
1434 priv->regs + TSI721_OBDMAC_DSBL(mbox));
1435 iowrite32(TSI721_DMAC_DSSZ_SIZE(priv->omsg_ring[mbox].sts_size),
1436 priv->regs + (u32)TSI721_OBDMAC_DSSZ(mbox));
1437
1438 /* Enable interrupts */
1439
1440#ifdef CONFIG_PCI_MSI
1441 if (priv->flags & TSI721_USING_MSIX) {
1442 /* Request interrupt service if we are in MSI-X mode */
1443 rc = request_irq(
1444 priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
1445 tsi721_omsg_msix, 0,
1446 priv->msix[TSI721_VECT_OMB0_DONE + mbox].irq_name,
1447 (void *)mport);
1448
1449 if (rc) {
1450 dev_dbg(&priv->pdev->dev,
1451 "Unable to allocate MSI-X interrupt for "
1452 "OBOX%d-DONE\n", mbox);
1453 goto out_stat;
1454 }
1455
1456 rc = request_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
1457 tsi721_omsg_msix, 0,
1458 priv->msix[TSI721_VECT_OMB0_INT + mbox].irq_name,
1459 (void *)mport);
1460
1461 if (rc) {
1462 dev_dbg(&priv->pdev->dev,
1463 "Unable to allocate MSI-X interrupt for "
1464 "MBOX%d-INT\n", mbox);
1465 free_irq(
1466 priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
1467 (void *)mport);
1468 goto out_stat;
1469 }
1470 }
1471#endif /* CONFIG_PCI_MSI */
1472
1473 tsi721_omsg_interrupt_enable(priv, mbox, TSI721_OBDMAC_INT_ALL);
1474
1475 /* Initialize Outbound Message descriptors ring */
1476 bd_ptr = priv->omsg_ring[mbox].omd_base;
1477 bd_ptr[entries].type_id = cpu_to_le32(DTYPE5 << 29);
1478 bd_ptr[entries].msg_info = 0;
1479 bd_ptr[entries].next_lo =
1480 cpu_to_le32((u64)priv->omsg_ring[mbox].omd_phys &
1481 TSI721_OBDMAC_DPTRL_MASK);
1482 bd_ptr[entries].next_hi =
1483 cpu_to_le32((u64)priv->omsg_ring[mbox].omd_phys >> 32);
1484 priv->omsg_ring[mbox].wr_count = 0;
1485 mb();
1486
1487 /* Initialize Outbound Message engine */
1488 iowrite32(TSI721_OBDMAC_CTL_INIT, priv->regs + TSI721_OBDMAC_CTL(mbox));
1489 ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
1490 udelay(10);
1491
1492 priv->omsg_init[mbox] = 1;
1493
1494 return 0;
1495
1496#ifdef CONFIG_PCI_MSI
1497out_stat:
1498 dma_free_coherent(&priv->pdev->dev,
1499 priv->omsg_ring[mbox].sts_size * sizeof(struct tsi721_dma_sts),
1500 priv->omsg_ring[mbox].sts_base,
1501 priv->omsg_ring[mbox].sts_phys);
1502
1503 priv->omsg_ring[mbox].sts_base = NULL;
1504#endif /* CONFIG_PCI_MSI */
1505
1506out_desc:
1507 dma_free_coherent(&priv->pdev->dev,
1508 (entries + 1) * sizeof(struct tsi721_omsg_desc),
1509 priv->omsg_ring[mbox].omd_base,
1510 priv->omsg_ring[mbox].omd_phys);
1511
1512 priv->omsg_ring[mbox].omd_base = NULL;
1513
1514out_buf:
1515 for (i = 0; i < priv->omsg_ring[mbox].size; i++) {
1516 if (priv->omsg_ring[mbox].omq_base[i]) {
1517 dma_free_coherent(&priv->pdev->dev,
1518 TSI721_MSG_BUFFER_SIZE,
1519 priv->omsg_ring[mbox].omq_base[i],
1520 priv->omsg_ring[mbox].omq_phys[i]);
1521
1522 priv->omsg_ring[mbox].omq_base[i] = NULL;
1523 }
1524 }
1525
1526out:
1527 return rc;
1528}
1529
1530/**
1531 * tsi721_close_outb_mbox - Close Tsi721 outbound mailbox
1532 * @mport: Master port implementing the outbound message unit
1533 * @mbox: Mailbox to close
1534 */
1535static void tsi721_close_outb_mbox(struct rio_mport *mport, int mbox)
1536{
1537 struct tsi721_device *priv = mport->priv;
1538 u32 i;
1539
1540 if (!priv->omsg_init[mbox])
1541 return;
1542 priv->omsg_init[mbox] = 0;
1543
1544 /* Disable Interrupts */
1545
1546 tsi721_omsg_interrupt_disable(priv, mbox, TSI721_OBDMAC_INT_ALL);
1547
1548#ifdef CONFIG_PCI_MSI
1549 if (priv->flags & TSI721_USING_MSIX) {
1550 free_irq(priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
1551 (void *)mport);
1552 free_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
1553 (void *)mport);
1554 }
1555#endif /* CONFIG_PCI_MSI */
1556
1557 /* Free OMSG Descriptor Status FIFO */
1558 dma_free_coherent(&priv->pdev->dev,
1559 priv->omsg_ring[mbox].sts_size * sizeof(struct tsi721_dma_sts),
1560 priv->omsg_ring[mbox].sts_base,
1561 priv->omsg_ring[mbox].sts_phys);
1562
1563 priv->omsg_ring[mbox].sts_base = NULL;
1564
1565 /* Free OMSG descriptors */
1566 dma_free_coherent(&priv->pdev->dev,
1567 (priv->omsg_ring[mbox].size + 1) *
1568 sizeof(struct tsi721_omsg_desc),
1569 priv->omsg_ring[mbox].omd_base,
1570 priv->omsg_ring[mbox].omd_phys);
1571
1572 priv->omsg_ring[mbox].omd_base = NULL;
1573
1574 /* Free message buffers */
1575 for (i = 0; i < priv->omsg_ring[mbox].size; i++) {
1576 if (priv->omsg_ring[mbox].omq_base[i]) {
1577 dma_free_coherent(&priv->pdev->dev,
1578 TSI721_MSG_BUFFER_SIZE,
1579 priv->omsg_ring[mbox].omq_base[i],
1580 priv->omsg_ring[mbox].omq_phys[i]);
1581
1582 priv->omsg_ring[mbox].omq_base[i] = NULL;
1583 }
1584 }
1585}
1586
1587/**
1588 * tsi721_imsg_handler - Inbound Message Interrupt Handler
1589 * @priv: pointer to tsi721 private data
1590 * @ch: inbound message channel number to service
1591 *
1592 * Services channel interrupts from inbound messaging engine.
1593 */
1594static void tsi721_imsg_handler(struct tsi721_device *priv, int ch)
1595{
1596 u32 mbox = ch - 4;
1597 u32 imsg_int;
1598
1599 spin_lock(&priv->imsg_ring[mbox].lock);
1600
1601 imsg_int = ioread32(priv->regs + TSI721_IBDMAC_INT(ch));
1602
1603 if (imsg_int & TSI721_IBDMAC_INT_SRTO)
1604 dev_info(&priv->pdev->dev, "IB MBOX%d SRIO timeout\n",
1605 mbox);
1606
1607 if (imsg_int & TSI721_IBDMAC_INT_PC_ERROR)
1608 dev_info(&priv->pdev->dev, "IB MBOX%d PCIe error\n",
1609 mbox);
1610
1611 if (imsg_int & TSI721_IBDMAC_INT_FQ_LOW)
1612 dev_info(&priv->pdev->dev,
1613 "IB MBOX%d IB free queue low\n", mbox);
1614
1615 /* Clear IB channel interrupts */
1616 iowrite32(imsg_int, priv->regs + TSI721_IBDMAC_INT(ch));
1617
1618 /* If an IB Msg is received notify the upper layer */
1619 if (imsg_int & TSI721_IBDMAC_INT_DQ_RCV &&
1620 priv->mport->inb_msg[mbox].mcback)
1621 priv->mport->inb_msg[mbox].mcback(priv->mport,
1622 priv->imsg_ring[mbox].dev_id, mbox, -1);
1623
1624 if (!(priv->flags & TSI721_USING_MSIX)) {
1625 u32 ch_inte;
1626
1627 /* Re-enable channel interrupts */
1628 ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
1629 ch_inte |= TSI721_INT_IMSG_CHAN(ch);
1630 iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
1631 }
1632
1633 spin_unlock(&priv->imsg_ring[mbox].lock);
1634}
1635
1636/**
1637 * tsi721_open_inb_mbox - Initialize Tsi721 inbound mailbox
1638 * @mport: Master port implementing the Inbound Messaging Engine
1639 * @dev_id: Device specific pointer to pass on event
1640 * @mbox: Mailbox to open
1641 * @entries: Number of entries in the inbound mailbox ring
1642 */
1643static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id,
1644 int mbox, int entries)
1645{
1646 struct tsi721_device *priv = mport->priv;
1647 int ch = mbox + 4;
1648 int i;
1649 u64 *free_ptr;
1650 int rc = 0;
1651
1652 if ((entries < TSI721_IMSGD_MIN_RING_SIZE) ||
1653 (entries > TSI721_IMSGD_RING_SIZE) ||
1654 (!is_power_of_2(entries)) || mbox >= RIO_MAX_MBOX) {
1655 rc = -EINVAL;
1656 goto out;
1657 }
1658
1659 /* Initialize IB Messaging Ring */
1660 priv->imsg_ring[mbox].dev_id = dev_id;
1661 priv->imsg_ring[mbox].size = entries;
1662 priv->imsg_ring[mbox].rx_slot = 0;
1663 priv->imsg_ring[mbox].desc_rdptr = 0;
1664 priv->imsg_ring[mbox].fq_wrptr = 0;
1665 for (i = 0; i < priv->imsg_ring[mbox].size; i++)
1666 priv->imsg_ring[mbox].imq_base[i] = NULL;
1667 spin_lock_init(&priv->imsg_ring[mbox].lock);
1668
1669 /* Allocate buffers for incoming messages */
1670 priv->imsg_ring[mbox].buf_base =
1671 dma_alloc_coherent(&priv->pdev->dev,
1672 entries * TSI721_MSG_BUFFER_SIZE,
1673 &priv->imsg_ring[mbox].buf_phys,
1674 GFP_KERNEL);
1675
1676 if (priv->imsg_ring[mbox].buf_base == NULL) {
1677 dev_err(&priv->pdev->dev,
1678 "Failed to allocate buffers for IB MBOX%d\n", mbox);
1679 rc = -ENOMEM;
1680 goto out;
1681 }
1682
1683 /* Allocate memory for circular free list */
1684 priv->imsg_ring[mbox].imfq_base =
1685 dma_alloc_coherent(&priv->pdev->dev,
1686 entries * 8,
1687 &priv->imsg_ring[mbox].imfq_phys,
1688 GFP_KERNEL);
1689
1690 if (priv->imsg_ring[mbox].imfq_base == NULL) {
1691 dev_err(&priv->pdev->dev,
1692 "Failed to allocate free queue for IB MBOX%d\n", mbox);
1693 rc = -ENOMEM;
1694 goto out_buf;
1695 }
1696
1697 /* Allocate memory for Inbound message descriptors */
1698 priv->imsg_ring[mbox].imd_base =
1699 dma_alloc_coherent(&priv->pdev->dev,
1700 entries * sizeof(struct tsi721_imsg_desc),
1701 &priv->imsg_ring[mbox].imd_phys, GFP_KERNEL);
1702
1703 if (priv->imsg_ring[mbox].imd_base == NULL) {
1704 dev_err(&priv->pdev->dev,
1705 "Failed to allocate descriptor memory for IB MBOX%d\n",
1706 mbox);
1707 rc = -ENOMEM;
1708 goto out_dma;
1709 }
1710
1711 /* Fill free buffer pointer list */
1712 free_ptr = priv->imsg_ring[mbox].imfq_base;
1713 for (i = 0; i < entries; i++)
1714 free_ptr[i] = cpu_to_le64(
1715 (u64)(priv->imsg_ring[mbox].buf_phys) +
1716 i * 0x1000);
1717
1718 mb();
1719
1720 /*
1721 * For mapping of inbound SRIO Messages into appropriate queues we need
1722 * to set Inbound Device ID register in the messaging engine. We do it
1723 * once when first inbound mailbox is requested.
1724 */
1725 if (!(priv->flags & TSI721_IMSGID_SET)) {
1726 iowrite32((u32)priv->mport->host_deviceid,
1727 priv->regs + TSI721_IB_DEVID);
1728 priv->flags |= TSI721_IMSGID_SET;
1729 }
1730
1731 /*
1732 * Configure Inbound Messaging channel (ch = mbox + 4)
1733 */
1734
1735 /* Setup Inbound Message free queue */
1736 iowrite32(((u64)priv->imsg_ring[mbox].imfq_phys >> 32),
1737 priv->regs + TSI721_IBDMAC_FQBH(ch));
1738 iowrite32(((u64)priv->imsg_ring[mbox].imfq_phys &
1739 TSI721_IBDMAC_FQBL_MASK),
1740 priv->regs+TSI721_IBDMAC_FQBL(ch));
1741 iowrite32(TSI721_DMAC_DSSZ_SIZE(entries),
1742 priv->regs + TSI721_IBDMAC_FQSZ(ch));
1743
1744 /* Setup Inbound Message descriptor queue */
1745 iowrite32(((u64)priv->imsg_ring[mbox].imd_phys >> 32),
1746 priv->regs + TSI721_IBDMAC_DQBH(ch));
1747 iowrite32(((u32)priv->imsg_ring[mbox].imd_phys &
1748 (u32)TSI721_IBDMAC_DQBL_MASK),
1749 priv->regs+TSI721_IBDMAC_DQBL(ch));
1750 iowrite32(TSI721_DMAC_DSSZ_SIZE(entries),
1751 priv->regs + TSI721_IBDMAC_DQSZ(ch));
1752
1753 /* Enable interrupts */
1754
1755#ifdef CONFIG_PCI_MSI
1756 if (priv->flags & TSI721_USING_MSIX) {
1757 /* Request interrupt service if we are in MSI-X mode */
1758 rc = request_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
1759 tsi721_imsg_msix, 0,
1760 priv->msix[TSI721_VECT_IMB0_RCV + mbox].irq_name,
1761 (void *)mport);
1762
1763 if (rc) {
1764 dev_dbg(&priv->pdev->dev,
1765 "Unable to allocate MSI-X interrupt for "
1766 "IBOX%d-DONE\n", mbox);
1767 goto out_desc;
1768 }
1769
1770 rc = request_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
1771 tsi721_imsg_msix, 0,
1772 priv->msix[TSI721_VECT_IMB0_INT + mbox].irq_name,
1773 (void *)mport);
1774
1775 if (rc) {
1776 dev_dbg(&priv->pdev->dev,
1777 "Unable to allocate MSI-X interrupt for "
1778 "IBOX%d-INT\n", mbox);
1779 free_irq(
1780 priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
1781 (void *)mport);
1782 goto out_desc;
1783 }
1784 }
1785#endif /* CONFIG_PCI_MSI */
1786
1787 tsi721_imsg_interrupt_enable(priv, ch, TSI721_IBDMAC_INT_ALL);
1788
1789 /* Initialize Inbound Message Engine */
1790 iowrite32(TSI721_IBDMAC_CTL_INIT, priv->regs + TSI721_IBDMAC_CTL(ch));
1791 ioread32(priv->regs + TSI721_IBDMAC_CTL(ch));
1792 udelay(10);
1793 priv->imsg_ring[mbox].fq_wrptr = entries - 1;
1794 iowrite32(entries - 1, priv->regs + TSI721_IBDMAC_FQWP(ch));
1795
1796 priv->imsg_init[mbox] = 1;
1797 return 0;
1798
1799#ifdef CONFIG_PCI_MSI
1800out_desc:
1801 dma_free_coherent(&priv->pdev->dev,
1802 priv->imsg_ring[mbox].size * sizeof(struct tsi721_imsg_desc),
1803 priv->imsg_ring[mbox].imd_base,
1804 priv->imsg_ring[mbox].imd_phys);
1805
1806 priv->imsg_ring[mbox].imd_base = NULL;
1807#endif /* CONFIG_PCI_MSI */
1808
1809out_dma:
1810 dma_free_coherent(&priv->pdev->dev,
1811 priv->imsg_ring[mbox].size * 8,
1812 priv->imsg_ring[mbox].imfq_base,
1813 priv->imsg_ring[mbox].imfq_phys);
1814
1815 priv->imsg_ring[mbox].imfq_base = NULL;
1816
1817out_buf:
1818 dma_free_coherent(&priv->pdev->dev,
1819 priv->imsg_ring[mbox].size * TSI721_MSG_BUFFER_SIZE,
1820 priv->imsg_ring[mbox].buf_base,
1821 priv->imsg_ring[mbox].buf_phys);
1822
1823 priv->imsg_ring[mbox].buf_base = NULL;
1824
1825out:
1826 return rc;
1827}
1828
1829/**
1830 * tsi721_close_inb_mbox - Shut down Tsi721 inbound mailbox
1831 * @mport: Master port implementing the Inbound Messaging Engine
1832 * @mbox: Mailbox to close
1833 */
1834static void tsi721_close_inb_mbox(struct rio_mport *mport, int mbox)
1835{
1836 struct tsi721_device *priv = mport->priv;
1837 u32 rx_slot;
1838 int ch = mbox + 4;
1839
1840 if (!priv->imsg_init[mbox]) /* mbox isn't initialized yet */
1841 return;
1842 priv->imsg_init[mbox] = 0;
1843
1844 /* Disable Inbound Messaging Engine */
1845
1846 /* Disable Interrupts */
1847 tsi721_imsg_interrupt_disable(priv, ch, TSI721_OBDMAC_INT_MASK);
1848
1849#ifdef CONFIG_PCI_MSI
1850 if (priv->flags & TSI721_USING_MSIX) {
1851 free_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
1852 (void *)mport);
1853 free_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
1854 (void *)mport);
1855 }
1856#endif /* CONFIG_PCI_MSI */
1857
1858 /* Clear Inbound Buffer Queue */
1859 for (rx_slot = 0; rx_slot < priv->imsg_ring[mbox].size; rx_slot++)
1860 priv->imsg_ring[mbox].imq_base[rx_slot] = NULL;
1861
1862 /* Free memory allocated for message buffers */
1863 dma_free_coherent(&priv->pdev->dev,
1864 priv->imsg_ring[mbox].size * TSI721_MSG_BUFFER_SIZE,
1865 priv->imsg_ring[mbox].buf_base,
1866 priv->imsg_ring[mbox].buf_phys);
1867
1868 priv->imsg_ring[mbox].buf_base = NULL;
1869
1870 /* Free memory allocated for free pointr list */
1871 dma_free_coherent(&priv->pdev->dev,
1872 priv->imsg_ring[mbox].size * 8,
1873 priv->imsg_ring[mbox].imfq_base,
1874 priv->imsg_ring[mbox].imfq_phys);
1875
1876 priv->imsg_ring[mbox].imfq_base = NULL;
1877
1878 /* Free memory allocated for RX descriptors */
1879 dma_free_coherent(&priv->pdev->dev,
1880 priv->imsg_ring[mbox].size * sizeof(struct tsi721_imsg_desc),
1881 priv->imsg_ring[mbox].imd_base,
1882 priv->imsg_ring[mbox].imd_phys);
1883
1884 priv->imsg_ring[mbox].imd_base = NULL;
1885}
1886
1887/**
1888 * tsi721_add_inb_buffer - Add buffer to the Tsi721 inbound message queue
1889 * @mport: Master port implementing the Inbound Messaging Engine
1890 * @mbox: Inbound mailbox number
1891 * @buf: Buffer to add to inbound queue
1892 */
1893static int tsi721_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf)
1894{
1895 struct tsi721_device *priv = mport->priv;
1896 u32 rx_slot;
1897 int rc = 0;
1898
1899 rx_slot = priv->imsg_ring[mbox].rx_slot;
1900 if (priv->imsg_ring[mbox].imq_base[rx_slot]) {
1901 dev_err(&priv->pdev->dev,
1902 "Error adding inbound buffer %d, buffer exists\n",
1903 rx_slot);
1904 rc = -EINVAL;
1905 goto out;
1906 }
1907
1908 priv->imsg_ring[mbox].imq_base[rx_slot] = buf;
1909
1910 if (++priv->imsg_ring[mbox].rx_slot == priv->imsg_ring[mbox].size)
1911 priv->imsg_ring[mbox].rx_slot = 0;
1912
1913out:
1914 return rc;
1915}
1916
1917/**
1918 * tsi721_get_inb_message - Fetch inbound message from the Tsi721 MSG Queue
1919 * @mport: Master port implementing the Inbound Messaging Engine
1920 * @mbox: Inbound mailbox number
1921 *
1922 * Returns pointer to the message on success or NULL on failure.
1923 */
1924static void *tsi721_get_inb_message(struct rio_mport *mport, int mbox)
1925{
1926 struct tsi721_device *priv = mport->priv;
1927 struct tsi721_imsg_desc *desc;
1928 u32 rx_slot;
1929 void *rx_virt = NULL;
1930 u64 rx_phys;
1931 void *buf = NULL;
1932 u64 *free_ptr;
1933 int ch = mbox + 4;
1934 int msg_size;
1935
1936 if (!priv->imsg_init[mbox])
1937 return NULL;
1938
1939 desc = priv->imsg_ring[mbox].imd_base;
1940 desc += priv->imsg_ring[mbox].desc_rdptr;
1941
1942 if (!(le32_to_cpu(desc->msg_info) & TSI721_IMD_HO))
1943 goto out;
1944
1945 rx_slot = priv->imsg_ring[mbox].rx_slot;
1946 while (priv->imsg_ring[mbox].imq_base[rx_slot] == NULL) {
1947 if (++rx_slot == priv->imsg_ring[mbox].size)
1948 rx_slot = 0;
1949 }
1950
1951 rx_phys = ((u64)le32_to_cpu(desc->bufptr_hi) << 32) |
1952 le32_to_cpu(desc->bufptr_lo);
1953
1954 rx_virt = priv->imsg_ring[mbox].buf_base +
1955 (rx_phys - (u64)priv->imsg_ring[mbox].buf_phys);
1956
1957 buf = priv->imsg_ring[mbox].imq_base[rx_slot];
1958 msg_size = le32_to_cpu(desc->msg_info) & TSI721_IMD_BCOUNT;
1959 if (msg_size == 0)
1960 msg_size = RIO_MAX_MSG_SIZE;
1961
1962 memcpy(buf, rx_virt, msg_size);
1963 priv->imsg_ring[mbox].imq_base[rx_slot] = NULL;
1964
1965 desc->msg_info &= cpu_to_le32(~TSI721_IMD_HO);
1966 if (++priv->imsg_ring[mbox].desc_rdptr == priv->imsg_ring[mbox].size)
1967 priv->imsg_ring[mbox].desc_rdptr = 0;
1968
1969 iowrite32(priv->imsg_ring[mbox].desc_rdptr,
1970 priv->regs + TSI721_IBDMAC_DQRP(ch));
1971
1972 /* Return free buffer into the pointer list */
1973 free_ptr = priv->imsg_ring[mbox].imfq_base;
1974 free_ptr[priv->imsg_ring[mbox].fq_wrptr] = cpu_to_le64(rx_phys);
1975
1976 if (++priv->imsg_ring[mbox].fq_wrptr == priv->imsg_ring[mbox].size)
1977 priv->imsg_ring[mbox].fq_wrptr = 0;
1978
1979 iowrite32(priv->imsg_ring[mbox].fq_wrptr,
1980 priv->regs + TSI721_IBDMAC_FQWP(ch));
1981out:
1982 return buf;
1983}
1984
1985/**
1986 * tsi721_messages_init - Initialization of Messaging Engine
1987 * @priv: pointer to tsi721 private data
1988 *
1989 * Configures Tsi721 messaging engine.
1990 */
1991static int tsi721_messages_init(struct tsi721_device *priv)
1992{
1993 int ch;
1994
1995 iowrite32(0, priv->regs + TSI721_SMSG_ECC_LOG);
1996 iowrite32(0, priv->regs + TSI721_RETRY_GEN_CNT);
1997 iowrite32(0, priv->regs + TSI721_RETRY_RX_CNT);
1998
1999 /* Set SRIO Message Request/Response Timeout */
2000 iowrite32(TSI721_RQRPTO_VAL, priv->regs + TSI721_RQRPTO);
2001
2002 /* Initialize Inbound Messaging Engine Registers */
2003 for (ch = 0; ch < TSI721_IMSG_CHNUM; ch++) {
2004 /* Clear interrupt bits */
2005 iowrite32(TSI721_IBDMAC_INT_MASK,
2006 priv->regs + TSI721_IBDMAC_INT(ch));
2007 /* Clear Status */
2008 iowrite32(0, priv->regs + TSI721_IBDMAC_STS(ch));
2009
2010 iowrite32(TSI721_SMSG_ECC_COR_LOG_MASK,
2011 priv->regs + TSI721_SMSG_ECC_COR_LOG(ch));
2012 iowrite32(TSI721_SMSG_ECC_NCOR_MASK,
2013 priv->regs + TSI721_SMSG_ECC_NCOR(ch));
2014 }
2015
2016 return 0;
2017}
2018
2019/**
2020 * tsi721_disable_ints - disables all device interrupts
2021 * @priv: pointer to tsi721 private data
2022 */
2023static void tsi721_disable_ints(struct tsi721_device *priv)
2024{
2025 int ch;
2026
2027 /* Disable all device level interrupts */
2028 iowrite32(0, priv->regs + TSI721_DEV_INTE);
2029
2030 /* Disable all Device Channel interrupts */
2031 iowrite32(0, priv->regs + TSI721_DEV_CHAN_INTE);
2032
2033 /* Disable all Inbound Msg Channel interrupts */
2034 for (ch = 0; ch < TSI721_IMSG_CHNUM; ch++)
2035 iowrite32(0, priv->regs + TSI721_IBDMAC_INTE(ch));
2036
2037 /* Disable all Outbound Msg Channel interrupts */
2038 for (ch = 0; ch < TSI721_OMSG_CHNUM; ch++)
2039 iowrite32(0, priv->regs + TSI721_OBDMAC_INTE(ch));
2040
2041 /* Disable all general messaging interrupts */
2042 iowrite32(0, priv->regs + TSI721_SMSG_INTE);
2043
2044 /* Disable all BDMA Channel interrupts */
2045 for (ch = 0; ch < TSI721_DMA_MAXCH; ch++)
2046 iowrite32(0, priv->regs + TSI721_DMAC_INTE(ch));
2047
2048 /* Disable all general BDMA interrupts */
2049 iowrite32(0, priv->regs + TSI721_BDMA_INTE);
2050
2051 /* Disable all SRIO Channel interrupts */
2052 for (ch = 0; ch < TSI721_SRIO_MAXCH; ch++)
2053 iowrite32(0, priv->regs + TSI721_SR_CHINTE(ch));
2054
2055 /* Disable all general SR2PC interrupts */
2056 iowrite32(0, priv->regs + TSI721_SR2PC_GEN_INTE);
2057
2058 /* Disable all PC2SR interrupts */
2059 iowrite32(0, priv->regs + TSI721_PC2SR_INTE);
2060
2061 /* Disable all I2C interrupts */
2062 iowrite32(0, priv->regs + TSI721_I2C_INT_ENABLE);
2063
2064 /* Disable SRIO MAC interrupts */
2065 iowrite32(0, priv->regs + TSI721_RIO_EM_INT_ENABLE);
2066 iowrite32(0, priv->regs + TSI721_RIO_EM_DEV_INT_EN);
2067}
2068
2069/**
2070 * tsi721_setup_mport - Setup Tsi721 as RapidIO subsystem master port
2071 * @priv: pointer to tsi721 private data
2072 *
2073 * Configures Tsi721 as RapidIO master port.
2074 */
2075static int __devinit tsi721_setup_mport(struct tsi721_device *priv)
2076{
2077 struct pci_dev *pdev = priv->pdev;
2078 int err = 0;
2079 struct rio_ops *ops;
2080
2081 struct rio_mport *mport;
2082
2083 ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL);
2084 if (!ops) {
2085 dev_dbg(&pdev->dev, "Unable to allocate memory for rio_ops\n");
2086 return -ENOMEM;
2087 }
2088
2089 ops->lcread = tsi721_lcread;
2090 ops->lcwrite = tsi721_lcwrite;
2091 ops->cread = tsi721_cread_dma;
2092 ops->cwrite = tsi721_cwrite_dma;
2093 ops->dsend = tsi721_dsend;
2094 ops->open_inb_mbox = tsi721_open_inb_mbox;
2095 ops->close_inb_mbox = tsi721_close_inb_mbox;
2096 ops->open_outb_mbox = tsi721_open_outb_mbox;
2097 ops->close_outb_mbox = tsi721_close_outb_mbox;
2098 ops->add_outb_message = tsi721_add_outb_message;
2099 ops->add_inb_buffer = tsi721_add_inb_buffer;
2100 ops->get_inb_message = tsi721_get_inb_message;
2101
2102 mport = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
2103 if (!mport) {
2104 kfree(ops);
2105 dev_dbg(&pdev->dev, "Unable to allocate memory for mport\n");
2106 return -ENOMEM;
2107 }
2108
2109 mport->ops = ops;
2110 mport->index = 0;
2111 mport->sys_size = 0; /* small system */
2112 mport->phy_type = RIO_PHY_SERIAL;
2113 mport->priv = (void *)priv;
2114 mport->phys_efptr = 0x100;
2115
2116 INIT_LIST_HEAD(&mport->dbells);
2117
2118 rio_init_dbell_res(&mport->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff);
2119 rio_init_mbox_res(&mport->riores[RIO_INB_MBOX_RESOURCE], 0, 0);
2120 rio_init_mbox_res(&mport->riores[RIO_OUTB_MBOX_RESOURCE], 0, 0);
2121 strcpy(mport->name, "Tsi721 mport");
2122
2123 /* Hook up interrupt handler */
2124
2125#ifdef CONFIG_PCI_MSI
2126 if (!tsi721_enable_msix(priv))
2127 priv->flags |= TSI721_USING_MSIX;
2128 else if (!pci_enable_msi(pdev))
2129 priv->flags |= TSI721_USING_MSI;
2130 else
2131 dev_info(&pdev->dev,
2132 "MSI/MSI-X is not available. Using legacy INTx.\n");
2133#endif /* CONFIG_PCI_MSI */
2134
2135 err = tsi721_request_irq(mport);
2136
2137 if (!err) {
2138 tsi721_interrupts_init(priv);
2139 ops->pwenable = tsi721_pw_enable;
2140 } else
2141 dev_err(&pdev->dev, "Unable to get assigned PCI IRQ "
2142 "vector %02X err=0x%x\n", pdev->irq, err);
2143
2144 /* Enable SRIO link */
2145 iowrite32(ioread32(priv->regs + TSI721_DEVCTL) |
2146 TSI721_DEVCTL_SRBOOT_CMPL,
2147 priv->regs + TSI721_DEVCTL);
2148
2149 rio_register_mport(mport);
2150 priv->mport = mport;
2151
2152 if (mport->host_deviceid >= 0)
2153 iowrite32(RIO_PORT_GEN_HOST | RIO_PORT_GEN_MASTER |
2154 RIO_PORT_GEN_DISCOVERED,
2155 priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR));
2156 else
2157 iowrite32(0, priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR));
2158
2159 return 0;
2160}
2161
2162static int __devinit tsi721_probe(struct pci_dev *pdev,
2163 const struct pci_device_id *id)
2164{
2165 struct tsi721_device *priv;
2166 int i;
2167 int err;
2168 u32 regval;
2169
2170 priv = kzalloc(sizeof(struct tsi721_device), GFP_KERNEL);
2171 if (priv == NULL) {
2172 dev_err(&pdev->dev, "Failed to allocate memory for device\n");
2173 err = -ENOMEM;
2174 goto err_exit;
2175 }
2176
2177 err = pci_enable_device(pdev);
2178 if (err) {
2179 dev_err(&pdev->dev, "Failed to enable PCI device\n");
2180 goto err_clean;
2181 }
2182
2183 priv->pdev = pdev;
2184
2185#ifdef DEBUG
2186 for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
2187 dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n",
2188 i, (unsigned long long)pci_resource_start(pdev, i),
2189 (unsigned long)pci_resource_len(pdev, i),
2190 pci_resource_flags(pdev, i));
2191 }
2192#endif
2193 /*
2194 * Verify BAR configuration
2195 */
2196
2197 /* BAR_0 (registers) must be 512KB+ in 32-bit address space */
2198 if (!(pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM) ||
2199 pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM_64 ||
2200 pci_resource_len(pdev, BAR_0) < TSI721_REG_SPACE_SIZE) {
2201 dev_err(&pdev->dev,
2202 "Missing or misconfigured CSR BAR0, aborting.\n");
2203 err = -ENODEV;
2204 goto err_disable_pdev;
2205 }
2206
2207 /* BAR_1 (outbound doorbells) must be 16MB+ in 32-bit address space */
2208 if (!(pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM) ||
2209 pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM_64 ||
2210 pci_resource_len(pdev, BAR_1) < TSI721_DB_WIN_SIZE) {
2211 dev_err(&pdev->dev,
2212 "Missing or misconfigured Doorbell BAR1, aborting.\n");
2213 err = -ENODEV;
2214 goto err_disable_pdev;
2215 }
2216
2217 /*
2218 * BAR_2 and BAR_4 (outbound translation) must be in 64-bit PCIe address
2219 * space.
2220 * NOTE: BAR_2 and BAR_4 are not used by this version of driver.
2221 * It may be a good idea to keep them disabled using HW configuration
2222 * to save PCI memory space.
2223 */
2224 if ((pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM) &&
2225 (pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM_64)) {
2226 dev_info(&pdev->dev, "Outbound BAR2 is not used but enabled.\n");
2227 }
2228
2229 if ((pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM) &&
2230 (pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM_64)) {
2231 dev_info(&pdev->dev, "Outbound BAR4 is not used but enabled.\n");
2232 }
2233
2234 err = pci_request_regions(pdev, DRV_NAME);
2235 if (err) {
2236 dev_err(&pdev->dev, "Cannot obtain PCI resources, "
2237 "aborting.\n");
2238 goto err_disable_pdev;
2239 }
2240
2241 pci_set_master(pdev);
2242
2243 priv->regs = pci_ioremap_bar(pdev, BAR_0);
2244 if (!priv->regs) {
2245 dev_err(&pdev->dev,
2246 "Unable to map device registers space, aborting\n");
2247 err = -ENOMEM;
2248 goto err_free_res;
2249 }
2250
2251 priv->odb_base = pci_ioremap_bar(pdev, BAR_1);
2252 if (!priv->odb_base) {
2253 dev_err(&pdev->dev,
2254 "Unable to map outbound doorbells space, aborting\n");
2255 err = -ENOMEM;
2256 goto err_unmap_bars;
2257 }
2258
2259 /* Configure DMA attributes. */
2260 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
2261 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
2262 dev_info(&pdev->dev, "Unable to set DMA mask\n");
2263 goto err_unmap_bars;
2264 }
2265
2266 if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
2267 dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
2268 } else {
2269 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2270 if (err)
2271 dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
2272 }
2273
2274 /* Clear "no snoop" and "relaxed ordering" bits. */
2275 pci_read_config_dword(pdev, 0x40 + PCI_EXP_DEVCTL, &regval);
2276 regval &= ~(PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN);
2277 pci_write_config_dword(pdev, 0x40 + PCI_EXP_DEVCTL, regval);
2278
2279 /*
2280 * FIXUP: correct offsets of MSI-X tables in the MSI-X Capability Block
2281 */
2282 pci_write_config_dword(pdev, TSI721_PCIECFG_EPCTL, 0x01);
2283 pci_write_config_dword(pdev, TSI721_PCIECFG_MSIXTBL,
2284 TSI721_MSIXTBL_OFFSET);
2285 pci_write_config_dword(pdev, TSI721_PCIECFG_MSIXPBA,
2286 TSI721_MSIXPBA_OFFSET);
2287 pci_write_config_dword(pdev, TSI721_PCIECFG_EPCTL, 0);
2288 /* End of FIXUP */
2289
2290 tsi721_disable_ints(priv);
2291
2292 tsi721_init_pc2sr_mapping(priv);
2293 tsi721_init_sr2pc_mapping(priv);
2294
2295 if (tsi721_bdma_init(priv)) {
2296 dev_err(&pdev->dev, "BDMA initialization failed, aborting\n");
2297 err = -ENOMEM;
2298 goto err_unmap_bars;
2299 }
2300
2301 err = tsi721_doorbell_init(priv);
2302 if (err)
2303 goto err_free_bdma;
2304
2305 tsi721_port_write_init(priv);
2306
2307 err = tsi721_messages_init(priv);
2308 if (err)
2309 goto err_free_consistent;
2310
2311 err = tsi721_setup_mport(priv);
2312 if (err)
2313 goto err_free_consistent;
2314
2315 return 0;
2316
2317err_free_consistent:
2318 tsi721_doorbell_free(priv);
2319err_free_bdma:
2320 tsi721_bdma_free(priv);
2321err_unmap_bars:
2322 if (priv->regs)
2323 iounmap(priv->regs);
2324 if (priv->odb_base)
2325 iounmap(priv->odb_base);
2326err_free_res:
2327 pci_release_regions(pdev);
2328 pci_clear_master(pdev);
2329err_disable_pdev:
2330 pci_disable_device(pdev);
2331err_clean:
2332 kfree(priv);
2333err_exit:
2334 return err;
2335}
2336
2337static DEFINE_PCI_DEVICE_TABLE(tsi721_pci_tbl) = {
2338 { PCI_DEVICE(PCI_VENDOR_ID_IDT, PCI_DEVICE_ID_TSI721) },
2339 { 0, } /* terminate list */
2340};
2341
2342MODULE_DEVICE_TABLE(pci, tsi721_pci_tbl);
2343
2344static struct pci_driver tsi721_driver = {
2345 .name = "tsi721",
2346 .id_table = tsi721_pci_tbl,
2347 .probe = tsi721_probe,
2348};
2349
2350static int __init tsi721_init(void)
2351{
2352 return pci_register_driver(&tsi721_driver);
2353}
2354
2355static void __exit tsi721_exit(void)
2356{
2357 pci_unregister_driver(&tsi721_driver);
2358}
2359
2360device_initcall(tsi721_init);
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
new file mode 100644
index 000000000000..58be4deb1402
--- /dev/null
+++ b/drivers/rapidio/devices/tsi721.h
@@ -0,0 +1,766 @@
1/*
2 * Tsi721 PCIExpress-to-SRIO bridge definitions
3 *
4 * Copyright 2011, Integrated Device Technology, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 59
18 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21#ifndef __TSI721_H
22#define __TSI721_H
23
24#define DRV_NAME "tsi721"
25
26#define DEFAULT_HOPCOUNT 0xff
27#define DEFAULT_DESTID 0xff
28
29/* PCI device ID */
30#define PCI_DEVICE_ID_TSI721 0x80ab
31
32#define BAR_0 0
33#define BAR_1 1
34#define BAR_2 2
35#define BAR_4 4
36
37#define TSI721_PC2SR_BARS 2
38#define TSI721_PC2SR_WINS 8
39#define TSI721_PC2SR_ZONES 8
40#define TSI721_MAINT_WIN 0 /* Window for outbound maintenance requests */
41#define IDB_QUEUE 0 /* Inbound Doorbell Queue to use */
42#define IDB_QSIZE 512 /* Inbound Doorbell Queue size */
43
44/* Memory space sizes */
45#define TSI721_REG_SPACE_SIZE (512 * 1024) /* 512K */
46#define TSI721_DB_WIN_SIZE (16 * 1024 * 1024) /* 16MB */
47
48#define RIO_TT_CODE_8 0x00000000
49#define RIO_TT_CODE_16 0x00000001
50
51#define TSI721_DMA_MAXCH 8
52#define TSI721_DMA_MINSTSSZ 32
53#define TSI721_DMA_STSBLKSZ 8
54
55#define TSI721_SRIO_MAXCH 8
56
57#define DBELL_SID(buf) (((u8)buf[2] << 8) | (u8)buf[3])
58#define DBELL_TID(buf) (((u8)buf[4] << 8) | (u8)buf[5])
59#define DBELL_INF(buf) (((u8)buf[0] << 8) | (u8)buf[1])
60
61#define TSI721_RIO_PW_MSG_SIZE 16 /* Tsi721 saves only 16 bytes of PW msg */
62
63/* Register definitions */
64
65/*
66 * Registers in PCIe configuration space
67 */
68
69#define TSI721_PCIECFG_MSIXTBL 0x0a4
70#define TSI721_MSIXTBL_OFFSET 0x2c000
71#define TSI721_PCIECFG_MSIXPBA 0x0a8
72#define TSI721_MSIXPBA_OFFSET 0x2a000
73#define TSI721_PCIECFG_EPCTL 0x400
74
75/*
76 * Event Management Registers
77 */
78
79#define TSI721_RIO_EM_INT_STAT 0x10910
80#define TSI721_RIO_EM_INT_STAT_PW_RX 0x00010000
81
82#define TSI721_RIO_EM_INT_ENABLE 0x10914
83#define TSI721_RIO_EM_INT_ENABLE_PW_RX 0x00010000
84
85#define TSI721_RIO_EM_DEV_INT_EN 0x10930
86#define TSI721_RIO_EM_DEV_INT_EN_INT 0x00000001
87
88/*
89 * Port-Write Block Registers
90 */
91
92#define TSI721_RIO_PW_CTL 0x10a04
93#define TSI721_RIO_PW_CTL_PW_TIMER 0xf0000000
94#define TSI721_RIO_PW_CTL_PWT_DIS (0 << 28)
95#define TSI721_RIO_PW_CTL_PWT_103 (1 << 28)
96#define TSI721_RIO_PW_CTL_PWT_205 (1 << 29)
97#define TSI721_RIO_PW_CTL_PWT_410 (1 << 30)
98#define TSI721_RIO_PW_CTL_PWT_820 (1 << 31)
99#define TSI721_RIO_PW_CTL_PWC_MODE 0x01000000
100#define TSI721_RIO_PW_CTL_PWC_CONT 0x00000000
101#define TSI721_RIO_PW_CTL_PWC_REL 0x01000000
102
103#define TSI721_RIO_PW_RX_STAT 0x10a10
104#define TSI721_RIO_PW_RX_STAT_WR_SIZE 0x0000f000
105#define TSI_RIO_PW_RX_STAT_WDPTR 0x00000100
106#define TSI721_RIO_PW_RX_STAT_PW_SHORT 0x00000008
107#define TSI721_RIO_PW_RX_STAT_PW_TRUNC 0x00000004
108#define TSI721_RIO_PW_RX_STAT_PW_DISC 0x00000002
109#define TSI721_RIO_PW_RX_STAT_PW_VAL 0x00000001
110
111#define TSI721_RIO_PW_RX_CAPT(x) (0x10a20 + (x)*4)
112
113/*
114 * Inbound Doorbells
115 */
116
117#define TSI721_IDB_ENTRY_SIZE 64
118
119#define TSI721_IDQ_CTL(x) (0x20000 + (x) * 1000)
120#define TSI721_IDQ_SUSPEND 0x00000002
121#define TSI721_IDQ_INIT 0x00000001
122
123#define TSI721_IDQ_STS(x) (0x20004 + (x) * 1000)
124#define TSI721_IDQ_RUN 0x00200000
125
126#define TSI721_IDQ_MASK(x) (0x20008 + (x) * 1000)
127#define TSI721_IDQ_MASK_MASK 0xffff0000
128#define TSI721_IDQ_MASK_PATT 0x0000ffff
129
130#define TSI721_IDQ_RP(x) (0x2000c + (x) * 1000)
131#define TSI721_IDQ_RP_PTR 0x0007ffff
132
133#define TSI721_IDQ_WP(x) (0x20010 + (x) * 1000)
134#define TSI721_IDQ_WP_PTR 0x0007ffff
135
136#define TSI721_IDQ_BASEL(x) (0x20014 + (x) * 1000)
137#define TSI721_IDQ_BASEL_ADDR 0xffffffc0
138#define TSI721_IDQ_BASEU(x) (0x20018 + (x) * 1000)
139#define TSI721_IDQ_SIZE(x) (0x2001c + (x) * 1000)
140#define TSI721_IDQ_SIZE_VAL(size) (__fls(size) - 4)
141#define TSI721_IDQ_SIZE_MIN 512
142#define TSI721_IDQ_SIZE_MAX (512 * 1024)
143
144#define TSI721_SR_CHINT(x) (0x20040 + (x) * 1000)
145#define TSI721_SR_CHINTE(x) (0x20044 + (x) * 1000)
146#define TSI721_SR_CHINTSET(x) (0x20048 + (x) * 1000)
147#define TSI721_SR_CHINT_ODBOK 0x00000020
148#define TSI721_SR_CHINT_IDBQRCV 0x00000010
149#define TSI721_SR_CHINT_SUSP 0x00000008
150#define TSI721_SR_CHINT_ODBTO 0x00000004
151#define TSI721_SR_CHINT_ODBRTRY 0x00000002
152#define TSI721_SR_CHINT_ODBERR 0x00000001
153#define TSI721_SR_CHINT_ALL 0x0000003f
154
155#define TSI721_IBWIN_NUM 8
156
157#define TSI721_IBWINLB(x) (0x29000 + (x) * 20)
158#define TSI721_IBWINLB_BA 0xfffff000
159#define TSI721_IBWINLB_WEN 0x00000001
160
161#define TSI721_SR2PC_GEN_INTE 0x29800
162#define TSI721_SR2PC_PWE 0x29804
163#define TSI721_SR2PC_GEN_INT 0x29808
164
165#define TSI721_DEV_INTE 0x29840
166#define TSI721_DEV_INT 0x29844
167#define TSI721_DEV_INTSET 0x29848
168#define TSI721_DEV_INT_SMSG_CH 0x00000800
169#define TSI721_DEV_INT_SMSG_NCH 0x00000400
170#define TSI721_DEV_INT_SR2PC_CH 0x00000200
171#define TSI721_DEV_INT_SRIO 0x00000020
172
173#define TSI721_DEV_CHAN_INTE 0x2984c
174#define TSI721_DEV_CHAN_INT 0x29850
175
176#define TSI721_INT_SR2PC_CHAN_M 0xff000000
177#define TSI721_INT_SR2PC_CHAN(x) (1 << (24 + (x)))
178#define TSI721_INT_IMSG_CHAN_M 0x00ff0000
179#define TSI721_INT_IMSG_CHAN(x) (1 << (16 + (x)))
180#define TSI721_INT_OMSG_CHAN_M 0x0000ff00
181#define TSI721_INT_OMSG_CHAN(x) (1 << (8 + (x)))
182
183/*
184 * PC2SR block registers
185 */
186#define TSI721_OBWIN_NUM TSI721_PC2SR_WINS
187
188#define TSI721_OBWINLB(x) (0x40000 + (x) * 20)
189#define TSI721_OBWINLB_BA 0xffff8000
190#define TSI721_OBWINLB_WEN 0x00000001
191
192#define TSI721_OBWINUB(x) (0x40004 + (x) * 20)
193
194#define TSI721_OBWINSZ(x) (0x40008 + (x) * 20)
195#define TSI721_OBWINSZ_SIZE 0x00001f00
196#define TSI721_OBWIN_SIZE(size) (__fls(size) - 15)
197
198#define TSI721_ZONE_SEL 0x41300
199#define TSI721_ZONE_SEL_RD_WRB 0x00020000
200#define TSI721_ZONE_SEL_GO 0x00010000
201#define TSI721_ZONE_SEL_WIN 0x00000038
202#define TSI721_ZONE_SEL_ZONE 0x00000007
203
204#define TSI721_LUT_DATA0 0x41304
205#define TSI721_LUT_DATA0_ADD 0xfffff000
206#define TSI721_LUT_DATA0_RDTYPE 0x00000f00
207#define TSI721_LUT_DATA0_NREAD 0x00000100
208#define TSI721_LUT_DATA0_MNTRD 0x00000200
209#define TSI721_LUT_DATA0_RDCRF 0x00000020
210#define TSI721_LUT_DATA0_WRCRF 0x00000010
211#define TSI721_LUT_DATA0_WRTYPE 0x0000000f
212#define TSI721_LUT_DATA0_NWR 0x00000001
213#define TSI721_LUT_DATA0_MNTWR 0x00000002
214#define TSI721_LUT_DATA0_NWR_R 0x00000004
215
216#define TSI721_LUT_DATA1 0x41308
217
218#define TSI721_LUT_DATA2 0x4130c
219#define TSI721_LUT_DATA2_HC 0xff000000
220#define TSI721_LUT_DATA2_ADD65 0x000c0000
221#define TSI721_LUT_DATA2_TT 0x00030000
222#define TSI721_LUT_DATA2_DSTID 0x0000ffff
223
224#define TSI721_PC2SR_INTE 0x41310
225
226#define TSI721_DEVCTL 0x48004
227#define TSI721_DEVCTL_SRBOOT_CMPL 0x00000004
228
229#define TSI721_I2C_INT_ENABLE 0x49120
230
231/*
232 * Block DMA Engine Registers
233 * x = 0..7
234 */
235
236#define TSI721_DMAC_DWRCNT(x) (0x51000 + (x) * 0x1000)
237#define TSI721_DMAC_DRDCNT(x) (0x51004 + (x) * 0x1000)
238
239#define TSI721_DMAC_CTL(x) (0x51008 + (x) * 0x1000)
240#define TSI721_DMAC_CTL_SUSP 0x00000002
241#define TSI721_DMAC_CTL_INIT 0x00000001
242
243#define TSI721_DMAC_INT(x) (0x5100c + (x) * 0x1000)
244#define TSI721_DMAC_INT_STFULL 0x00000010
245#define TSI721_DMAC_INT_DONE 0x00000008
246#define TSI721_DMAC_INT_SUSP 0x00000004
247#define TSI721_DMAC_INT_ERR 0x00000002
248#define TSI721_DMAC_INT_IOFDONE 0x00000001
249#define TSI721_DMAC_INT_ALL 0x0000001f
250
251#define TSI721_DMAC_INTSET(x) (0x51010 + (x) * 0x1000)
252
253#define TSI721_DMAC_STS(x) (0x51014 + (x) * 0x1000)
254#define TSI721_DMAC_STS_ABORT 0x00400000
255#define TSI721_DMAC_STS_RUN 0x00200000
256#define TSI721_DMAC_STS_CS 0x001f0000
257
258#define TSI721_DMAC_INTE(x) (0x51018 + (x) * 0x1000)
259
260#define TSI721_DMAC_DPTRL(x) (0x51024 + (x) * 0x1000)
261#define TSI721_DMAC_DPTRL_MASK 0xffffffe0
262
263#define TSI721_DMAC_DPTRH(x) (0x51028 + (x) * 0x1000)
264
265#define TSI721_DMAC_DSBL(x) (0x5102c + (x) * 0x1000)
266#define TSI721_DMAC_DSBL_MASK 0xffffffc0
267
268#define TSI721_DMAC_DSBH(x) (0x51030 + (x) * 0x1000)
269
270#define TSI721_DMAC_DSSZ(x) (0x51034 + (x) * 0x1000)
271#define TSI721_DMAC_DSSZ_SIZE_M 0x0000000f
272#define TSI721_DMAC_DSSZ_SIZE(size) (__fls(size) - 4)
273
274
275#define TSI721_DMAC_DSRP(x) (0x51038 + (x) * 0x1000)
276#define TSI721_DMAC_DSRP_MASK 0x0007ffff
277
278#define TSI721_DMAC_DSWP(x) (0x5103c + (x) * 0x1000)
279#define TSI721_DMAC_DSWP_MASK 0x0007ffff
280
281#define TSI721_BDMA_INTE 0x5f000
282
283/*
284 * Messaging definitions
285 */
286#define TSI721_MSG_BUFFER_SIZE RIO_MAX_MSG_SIZE
287#define TSI721_MSG_MAX_SIZE RIO_MAX_MSG_SIZE
288#define TSI721_IMSG_MAXCH 8
289#define TSI721_IMSG_CHNUM TSI721_IMSG_MAXCH
290#define TSI721_IMSGD_MIN_RING_SIZE 32
291#define TSI721_IMSGD_RING_SIZE 512
292
293#define TSI721_OMSG_CHNUM 4 /* One channel per MBOX */
294#define TSI721_OMSGD_MIN_RING_SIZE 32
295#define TSI721_OMSGD_RING_SIZE 512
296
297/*
298 * Outbound Messaging Engine Registers
299 * x = 0..7
300 */
301
302#define TSI721_OBDMAC_DWRCNT(x) (0x61000 + (x) * 0x1000)
303
304#define TSI721_OBDMAC_DRDCNT(x) (0x61004 + (x) * 0x1000)
305
306#define TSI721_OBDMAC_CTL(x) (0x61008 + (x) * 0x1000)
307#define TSI721_OBDMAC_CTL_MASK 0x00000007
308#define TSI721_OBDMAC_CTL_RETRY_THR 0x00000004
309#define TSI721_OBDMAC_CTL_SUSPEND 0x00000002
310#define TSI721_OBDMAC_CTL_INIT 0x00000001
311
312#define TSI721_OBDMAC_INT(x) (0x6100c + (x) * 0x1000)
313#define TSI721_OBDMAC_INTSET(x) (0x61010 + (x) * 0x1000)
314#define TSI721_OBDMAC_INTE(x) (0x61018 + (x) * 0x1000)
315#define TSI721_OBDMAC_INT_MASK 0x0000001F
316#define TSI721_OBDMAC_INT_ST_FULL 0x00000010
317#define TSI721_OBDMAC_INT_DONE 0x00000008
318#define TSI721_OBDMAC_INT_SUSPENDED 0x00000004
319#define TSI721_OBDMAC_INT_ERROR 0x00000002
320#define TSI721_OBDMAC_INT_IOF_DONE 0x00000001
321#define TSI721_OBDMAC_INT_ALL TSI721_OBDMAC_INT_MASK
322
323#define TSI721_OBDMAC_STS(x) (0x61014 + (x) * 0x1000)
324#define TSI721_OBDMAC_STS_MASK 0x007f0000
325#define TSI721_OBDMAC_STS_ABORT 0x00400000
326#define TSI721_OBDMAC_STS_RUN 0x00200000
327#define TSI721_OBDMAC_STS_CS 0x001f0000
328
329#define TSI721_OBDMAC_PWE(x) (0x6101c + (x) * 0x1000)
330#define TSI721_OBDMAC_PWE_MASK 0x00000002
331#define TSI721_OBDMAC_PWE_ERROR_EN 0x00000002
332
333#define TSI721_OBDMAC_DPTRL(x) (0x61020 + (x) * 0x1000)
334#define TSI721_OBDMAC_DPTRL_MASK 0xfffffff0
335
336#define TSI721_OBDMAC_DPTRH(x) (0x61024 + (x) * 0x1000)
337#define TSI721_OBDMAC_DPTRH_MASK 0xffffffff
338
339#define TSI721_OBDMAC_DSBL(x) (0x61040 + (x) * 0x1000)
340#define TSI721_OBDMAC_DSBL_MASK 0xffffffc0
341
342#define TSI721_OBDMAC_DSBH(x) (0x61044 + (x) * 0x1000)
343#define TSI721_OBDMAC_DSBH_MASK 0xffffffff
344
345#define TSI721_OBDMAC_DSSZ(x) (0x61048 + (x) * 0x1000)
346#define TSI721_OBDMAC_DSSZ_MASK 0x0000000f
347
348#define TSI721_OBDMAC_DSRP(x) (0x6104c + (x) * 0x1000)
349#define TSI721_OBDMAC_DSRP_MASK 0x0007ffff
350
351#define TSI721_OBDMAC_DSWP(x) (0x61050 + (x) * 0x1000)
352#define TSI721_OBDMAC_DSWP_MASK 0x0007ffff
353
354#define TSI721_RQRPTO 0x60010
355#define TSI721_RQRPTO_MASK 0x00ffffff
356#define TSI721_RQRPTO_VAL 400 /* Response TO value */
357
358/*
359 * Inbound Messaging Engine Registers
360 * x = 0..7
361 */
362
363#define TSI721_IB_DEVID_GLOBAL 0xffff
364#define TSI721_IBDMAC_FQBL(x) (0x61200 + (x) * 0x1000)
365#define TSI721_IBDMAC_FQBL_MASK 0xffffffc0
366
367#define TSI721_IBDMAC_FQBH(x) (0x61204 + (x) * 0x1000)
368#define TSI721_IBDMAC_FQBH_MASK 0xffffffff
369
370#define TSI721_IBDMAC_FQSZ_ENTRY_INX TSI721_IMSGD_RING_SIZE
371#define TSI721_IBDMAC_FQSZ(x) (0x61208 + (x) * 0x1000)
372#define TSI721_IBDMAC_FQSZ_MASK 0x0000000f
373
374#define TSI721_IBDMAC_FQRP(x) (0x6120c + (x) * 0x1000)
375#define TSI721_IBDMAC_FQRP_MASK 0x0007ffff
376
377#define TSI721_IBDMAC_FQWP(x) (0x61210 + (x) * 0x1000)
378#define TSI721_IBDMAC_FQWP_MASK 0x0007ffff
379
380#define TSI721_IBDMAC_FQTH(x) (0x61214 + (x) * 0x1000)
381#define TSI721_IBDMAC_FQTH_MASK 0x0007ffff
382
383#define TSI721_IB_DEVID 0x60020
384#define TSI721_IB_DEVID_MASK 0x0000ffff
385
386#define TSI721_IBDMAC_CTL(x) (0x61240 + (x) * 0x1000)
387#define TSI721_IBDMAC_CTL_MASK 0x00000003
388#define TSI721_IBDMAC_CTL_SUSPEND 0x00000002
389#define TSI721_IBDMAC_CTL_INIT 0x00000001
390
391#define TSI721_IBDMAC_STS(x) (0x61244 + (x) * 0x1000)
392#define TSI721_IBDMAC_STS_MASK 0x007f0000
393#define TSI721_IBSMAC_STS_ABORT 0x00400000
394#define TSI721_IBSMAC_STS_RUN 0x00200000
395#define TSI721_IBSMAC_STS_CS 0x001f0000
396
397#define TSI721_IBDMAC_INT(x) (0x61248 + (x) * 0x1000)
398#define TSI721_IBDMAC_INTSET(x) (0x6124c + (x) * 0x1000)
399#define TSI721_IBDMAC_INTE(x) (0x61250 + (x) * 0x1000)
400#define TSI721_IBDMAC_INT_MASK 0x0000100f
401#define TSI721_IBDMAC_INT_SRTO 0x00001000
402#define TSI721_IBDMAC_INT_SUSPENDED 0x00000008
403#define TSI721_IBDMAC_INT_PC_ERROR 0x00000004
404#define TSI721_IBDMAC_INT_FQ_LOW 0x00000002
405#define TSI721_IBDMAC_INT_DQ_RCV 0x00000001
406#define TSI721_IBDMAC_INT_ALL TSI721_IBDMAC_INT_MASK
407
408#define TSI721_IBDMAC_PWE(x) (0x61254 + (x) * 0x1000)
409#define TSI721_IBDMAC_PWE_MASK 0x00001700
410#define TSI721_IBDMAC_PWE_SRTO 0x00001000
411#define TSI721_IBDMAC_PWE_ILL_FMT 0x00000400
412#define TSI721_IBDMAC_PWE_ILL_DEC 0x00000200
413#define TSI721_IBDMAC_PWE_IMP_SP 0x00000100
414
415#define TSI721_IBDMAC_DQBL(x) (0x61300 + (x) * 0x1000)
416#define TSI721_IBDMAC_DQBL_MASK 0xffffffc0
417#define TSI721_IBDMAC_DQBL_ADDR 0xffffffc0
418
419#define TSI721_IBDMAC_DQBH(x) (0x61304 + (x) * 0x1000)
420#define TSI721_IBDMAC_DQBH_MASK 0xffffffff
421
422#define TSI721_IBDMAC_DQRP(x) (0x61308 + (x) * 0x1000)
423#define TSI721_IBDMAC_DQRP_MASK 0x0007ffff
424
425#define TSI721_IBDMAC_DQWR(x) (0x6130c + (x) * 0x1000)
426#define TSI721_IBDMAC_DQWR_MASK 0x0007ffff
427
428#define TSI721_IBDMAC_DQSZ(x) (0x61314 + (x) * 0x1000)
429#define TSI721_IBDMAC_DQSZ_MASK 0x0000000f
430
431/*
432 * Messaging Engine Interrupts
433 */
434
435#define TSI721_SMSG_PWE 0x6a004
436
437#define TSI721_SMSG_INTE 0x6a000
438#define TSI721_SMSG_INT 0x6a008
439#define TSI721_SMSG_INTSET 0x6a010
440#define TSI721_SMSG_INT_MASK 0x0086ffff
441#define TSI721_SMSG_INT_UNS_RSP 0x00800000
442#define TSI721_SMSG_INT_ECC_NCOR 0x00040000
443#define TSI721_SMSG_INT_ECC_COR 0x00020000
444#define TSI721_SMSG_INT_ECC_NCOR_CH 0x0000ff00
445#define TSI721_SMSG_INT_ECC_COR_CH 0x000000ff
446
447#define TSI721_SMSG_ECC_LOG 0x6a014
448#define TSI721_SMSG_ECC_LOG_MASK 0x00070007
449#define TSI721_SMSG_ECC_LOG_ECC_NCOR_M 0x00070000
450#define TSI721_SMSG_ECC_LOG_ECC_COR_M 0x00000007
451
452#define TSI721_RETRY_GEN_CNT 0x6a100
453#define TSI721_RETRY_GEN_CNT_MASK 0xffffffff
454
455#define TSI721_RETRY_RX_CNT 0x6a104
456#define TSI721_RETRY_RX_CNT_MASK 0xffffffff
457
458#define TSI721_SMSG_ECC_COR_LOG(x) (0x6a300 + (x) * 4)
459#define TSI721_SMSG_ECC_COR_LOG_MASK 0x000000ff
460
461#define TSI721_SMSG_ECC_NCOR(x) (0x6a340 + (x) * 4)
462#define TSI721_SMSG_ECC_NCOR_MASK 0x000000ff
463
464/*
465 * Block DMA Descriptors
466 */
467
468struct tsi721_dma_desc {
469 __le32 type_id;
470
471#define TSI721_DMAD_DEVID 0x0000ffff
472#define TSI721_DMAD_CRF 0x00010000
473#define TSI721_DMAD_PRIO 0x00060000
474#define TSI721_DMAD_RTYPE 0x00780000
475#define TSI721_DMAD_IOF 0x08000000
476#define TSI721_DMAD_DTYPE 0xe0000000
477
478 __le32 bcount;
479
480#define TSI721_DMAD_BCOUNT1 0x03ffffff /* if DTYPE == 1 */
481#define TSI721_DMAD_BCOUNT2 0x0000000f /* if DTYPE == 2 */
482#define TSI721_DMAD_TT 0x0c000000
483#define TSI721_DMAD_RADDR0 0xc0000000
484
485 union {
486 __le32 raddr_lo; /* if DTYPE == (1 || 2) */
487 __le32 next_lo; /* if DTYPE == 3 */
488 };
489
490#define TSI721_DMAD_CFGOFF 0x00ffffff
491#define TSI721_DMAD_HOPCNT 0xff000000
492
493 union {
494 __le32 raddr_hi; /* if DTYPE == (1 || 2) */
495 __le32 next_hi; /* if DTYPE == 3 */
496 };
497
498 union {
499 struct { /* if DTYPE == 1 */
500 __le32 bufptr_lo;
501 __le32 bufptr_hi;
502 __le32 s_dist;
503 __le32 s_size;
504 } t1;
505 __le32 data[4]; /* if DTYPE == 2 */
506 u32 reserved[4]; /* if DTYPE == 3 */
507 };
508} __aligned(32);
509
510/*
511 * Inbound Messaging Descriptor
512 */
513struct tsi721_imsg_desc {
514 __le32 type_id;
515
516#define TSI721_IMD_DEVID 0x0000ffff
517#define TSI721_IMD_CRF 0x00010000
518#define TSI721_IMD_PRIO 0x00060000
519#define TSI721_IMD_TT 0x00180000
520#define TSI721_IMD_DTYPE 0xe0000000
521
522 __le32 msg_info;
523
524#define TSI721_IMD_BCOUNT 0x00000ff8
525#define TSI721_IMD_SSIZE 0x0000f000
526#define TSI721_IMD_LETER 0x00030000
527#define TSI721_IMD_XMBOX 0x003c0000
528#define TSI721_IMD_MBOX 0x00c00000
529#define TSI721_IMD_CS 0x78000000
530#define TSI721_IMD_HO 0x80000000
531
532 __le32 bufptr_lo;
533 __le32 bufptr_hi;
534 u32 reserved[12];
535
536} __aligned(64);
537
538/*
539 * Outbound Messaging Descriptor
540 */
541struct tsi721_omsg_desc {
542 __le32 type_id;
543
544#define TSI721_OMD_DEVID 0x0000ffff
545#define TSI721_OMD_CRF 0x00010000
546#define TSI721_OMD_PRIO 0x00060000
547#define TSI721_OMD_IOF 0x08000000
548#define TSI721_OMD_DTYPE 0xe0000000
549#define TSI721_OMD_RSRVD 0x17f80000
550
551 __le32 msg_info;
552
553#define TSI721_OMD_BCOUNT 0x00000ff8
554#define TSI721_OMD_SSIZE 0x0000f000
555#define TSI721_OMD_LETER 0x00030000
556#define TSI721_OMD_XMBOX 0x003c0000
557#define TSI721_OMD_MBOX 0x00c00000
558#define TSI721_OMD_TT 0x0c000000
559
560 union {
561 __le32 bufptr_lo; /* if DTYPE == 4 */
562 __le32 next_lo; /* if DTYPE == 5 */
563 };
564
565 union {
566 __le32 bufptr_hi; /* if DTYPE == 4 */
567 __le32 next_hi; /* if DTYPE == 5 */
568 };
569
570} __aligned(16);
571
572struct tsi721_dma_sts {
573 __le64 desc_sts[8];
574} __aligned(64);
575
576struct tsi721_desc_sts_fifo {
577 union {
578 __le64 da64;
579 struct {
580 __le32 lo;
581 __le32 hi;
582 } da32;
583 } stat[8];
584} __aligned(64);
585
586/* Descriptor types for BDMA and Messaging blocks */
587enum dma_dtype {
588 DTYPE1 = 1, /* Data Transfer DMA Descriptor */
589 DTYPE2 = 2, /* Immediate Data Transfer DMA Descriptor */
590 DTYPE3 = 3, /* Block Pointer DMA Descriptor */
591 DTYPE4 = 4, /* Outbound Msg DMA Descriptor */
592 DTYPE5 = 5, /* OB Messaging Block Pointer Descriptor */
593 DTYPE6 = 6 /* Inbound Messaging Descriptor */
594};
595
596enum dma_rtype {
597 NREAD = 0,
598 LAST_NWRITE_R = 1,
599 ALL_NWRITE = 2,
600 ALL_NWRITE_R = 3,
601 MAINT_RD = 4,
602 MAINT_WR = 5
603};
604
605/*
606 * mport Driver Definitions
607 */
608#define TSI721_DMA_CHNUM TSI721_DMA_MAXCH
609
610#define TSI721_DMACH_MAINT 0 /* DMA channel for maint requests */
611#define TSI721_DMACH_MAINT_NBD 32 /* Number of BDs for maint requests */
612
613#define MSG_DMA_ENTRY_INX_TO_SIZE(x) ((0x10 << (x)) & 0xFFFF0)
614
615enum tsi721_smsg_int_flag {
616 SMSG_INT_NONE = 0x00000000,
617 SMSG_INT_ECC_COR_CH = 0x000000ff,
618 SMSG_INT_ECC_NCOR_CH = 0x0000ff00,
619 SMSG_INT_ECC_COR = 0x00020000,
620 SMSG_INT_ECC_NCOR = 0x00040000,
621 SMSG_INT_UNS_RSP = 0x00800000,
622 SMSG_INT_ALL = 0x0006ffff
623};
624
625/* Structures */
626
627struct tsi721_bdma_chan {
628 int bd_num; /* number of buffer descriptors */
629 void *bd_base; /* start of DMA descriptors */
630 dma_addr_t bd_phys;
631 void *sts_base; /* start of DMA BD status FIFO */
632 dma_addr_t sts_phys;
633 int sts_size;
634};
635
636struct tsi721_imsg_ring {
637 u32 size;
638 /* VA/PA of data buffers for incoming messages */
639 void *buf_base;
640 dma_addr_t buf_phys;
641 /* VA/PA of circular free buffer list */
642 void *imfq_base;
643 dma_addr_t imfq_phys;
644 /* VA/PA of Inbound message descriptors */
645 void *imd_base;
646 dma_addr_t imd_phys;
647 /* Inbound Queue buffer pointers */
648 void *imq_base[TSI721_IMSGD_RING_SIZE];
649
650 u32 rx_slot;
651 void *dev_id;
652 u32 fq_wrptr;
653 u32 desc_rdptr;
654 spinlock_t lock;
655};
656
657struct tsi721_omsg_ring {
658 u32 size;
659 /* VA/PA of OB Msg descriptors */
660 void *omd_base;
661 dma_addr_t omd_phys;
662 /* VA/PA of OB Msg data buffers */
663 void *omq_base[TSI721_OMSGD_RING_SIZE];
664 dma_addr_t omq_phys[TSI721_OMSGD_RING_SIZE];
665 /* VA/PA of OB Msg descriptor status FIFO */
666 void *sts_base;
667 dma_addr_t sts_phys;
668 u32 sts_size; /* # of allocated status entries */
669 u32 sts_rdptr;
670
671 u32 tx_slot;
672 void *dev_id;
673 u32 wr_count;
674 spinlock_t lock;
675};
676
677enum tsi721_flags {
678 TSI721_USING_MSI = (1 << 0),
679 TSI721_USING_MSIX = (1 << 1),
680 TSI721_IMSGID_SET = (1 << 2),
681};
682
683#ifdef CONFIG_PCI_MSI
684/*
685 * MSI-X Table Entries (0 ... 69)
686 */
687#define TSI721_MSIX_DMACH_DONE(x) (0 + (x))
688#define TSI721_MSIX_DMACH_INT(x) (8 + (x))
689#define TSI721_MSIX_BDMA_INT 16
690#define TSI721_MSIX_OMSG_DONE(x) (17 + (x))
691#define TSI721_MSIX_OMSG_INT(x) (25 + (x))
692#define TSI721_MSIX_IMSG_DQ_RCV(x) (33 + (x))
693#define TSI721_MSIX_IMSG_INT(x) (41 + (x))
694#define TSI721_MSIX_MSG_INT 49
695#define TSI721_MSIX_SR2PC_IDBQ_RCV(x) (50 + (x))
696#define TSI721_MSIX_SR2PC_CH_INT(x) (58 + (x))
697#define TSI721_MSIX_SR2PC_INT 66
698#define TSI721_MSIX_PC2SR_INT 67
699#define TSI721_MSIX_SRIO_MAC_INT 68
700#define TSI721_MSIX_I2C_INT 69
701
702/* MSI-X vector and init table entry indexes */
703enum tsi721_msix_vect {
704 TSI721_VECT_IDB,
705 TSI721_VECT_PWRX, /* PW_RX is part of SRIO MAC Interrupt reporting */
706 TSI721_VECT_OMB0_DONE,
707 TSI721_VECT_OMB1_DONE,
708 TSI721_VECT_OMB2_DONE,
709 TSI721_VECT_OMB3_DONE,
710 TSI721_VECT_OMB0_INT,
711 TSI721_VECT_OMB1_INT,
712 TSI721_VECT_OMB2_INT,
713 TSI721_VECT_OMB3_INT,
714 TSI721_VECT_IMB0_RCV,
715 TSI721_VECT_IMB1_RCV,
716 TSI721_VECT_IMB2_RCV,
717 TSI721_VECT_IMB3_RCV,
718 TSI721_VECT_IMB0_INT,
719 TSI721_VECT_IMB1_INT,
720 TSI721_VECT_IMB2_INT,
721 TSI721_VECT_IMB3_INT,
722 TSI721_VECT_MAX
723};
724
725#define IRQ_DEVICE_NAME_MAX 64
726
727struct msix_irq {
728 u16 vector;
729 char irq_name[IRQ_DEVICE_NAME_MAX];
730};
731#endif /* CONFIG_PCI_MSI */
732
733struct tsi721_device {
734 struct pci_dev *pdev;
735 struct rio_mport *mport;
736 u32 flags;
737 void __iomem *regs;
738#ifdef CONFIG_PCI_MSI
739 struct msix_irq msix[TSI721_VECT_MAX];
740#endif
741 /* Doorbells */
742 void __iomem *odb_base;
743 void *idb_base;
744 dma_addr_t idb_dma;
745 struct work_struct idb_work;
746 u32 db_discard_count;
747
748 /* Inbound Port-Write */
749 struct work_struct pw_work;
750 struct kfifo pw_fifo;
751 spinlock_t pw_fifo_lock;
752 u32 pw_discard_count;
753
754 /* BDMA Engine */
755 struct tsi721_bdma_chan bdma[TSI721_DMA_CHNUM];
756
757 /* Inbound Messaging */
758 int imsg_init[TSI721_IMSG_CHNUM];
759 struct tsi721_imsg_ring imsg_ring[TSI721_IMSG_CHNUM];
760
761 /* Outbound Messaging */
762 int omsg_init[TSI721_OMSG_CHNUM];
763 struct tsi721_omsg_ring omsg_ring[TSI721_OMSG_CHNUM];
764};
765
766#endif
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index ebe77dd87daf..2bebd791a092 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -516,7 +516,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
516 return rdev; 516 return rdev;
517 517
518cleanup: 518cleanup:
519 if (rio_is_switch(rdev)) 519 if (rswitch)
520 kfree(rswitch->route_table); 520 kfree(rswitch->route_table);
521 521
522 kfree(rdev); 522 kfree(rdev);
@@ -923,7 +923,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
923 * rio_enum_complete- Tests if enumeration of a network is complete 923 * rio_enum_complete- Tests if enumeration of a network is complete
924 * @port: Master port to send transaction 924 * @port: Master port to send transaction
925 * 925 *
926 * Tests the Component Tag CSR for non-zero value (enumeration 926 * Tests the PGCCSR discovered bit for non-zero value (enumeration
927 * complete flag). Return %1 if enumeration is complete or %0 if 927 * complete flag). Return %1 if enumeration is complete or %0 if
928 * enumeration is incomplete. 928 * enumeration is incomplete.
929 */ 929 */
@@ -933,7 +933,7 @@ static int rio_enum_complete(struct rio_mport *port)
933 933
934 rio_local_read_config_32(port, port->phys_efptr + RIO_PORT_GEN_CTL_CSR, 934 rio_local_read_config_32(port, port->phys_efptr + RIO_PORT_GEN_CTL_CSR,
935 &regval); 935 &regval);
936 return (regval & RIO_PORT_GEN_MASTER) ? 1 : 0; 936 return (regval & RIO_PORT_GEN_DISCOVERED) ? 1 : 0;
937} 937}
938 938
939/** 939/**
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 01a7df5317c1..e8326f26fa2f 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -21,16 +21,13 @@
21#include "rtc-core.h" 21#include "rtc-core.h"
22 22
23 23
24static DEFINE_IDR(rtc_idr); 24static DEFINE_IDA(rtc_ida);
25static DEFINE_MUTEX(idr_lock);
26struct class *rtc_class; 25struct class *rtc_class;
27 26
28static void rtc_device_release(struct device *dev) 27static void rtc_device_release(struct device *dev)
29{ 28{
30 struct rtc_device *rtc = to_rtc_device(dev); 29 struct rtc_device *rtc = to_rtc_device(dev);
31 mutex_lock(&idr_lock); 30 ida_simple_remove(&rtc_ida, rtc->id);
32 idr_remove(&rtc_idr, rtc->id);
33 mutex_unlock(&idr_lock);
34 kfree(rtc); 31 kfree(rtc);
35} 32}
36 33
@@ -146,25 +143,16 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
146 struct rtc_wkalrm alrm; 143 struct rtc_wkalrm alrm;
147 int id, err; 144 int id, err;
148 145
149 if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) { 146 id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
150 err = -ENOMEM; 147 if (id < 0) {
148 err = id;
151 goto exit; 149 goto exit;
152 } 150 }
153 151
154
155 mutex_lock(&idr_lock);
156 err = idr_get_new(&rtc_idr, NULL, &id);
157 mutex_unlock(&idr_lock);
158
159 if (err < 0)
160 goto exit;
161
162 id = id & MAX_ID_MASK;
163
164 rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL); 152 rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL);
165 if (rtc == NULL) { 153 if (rtc == NULL) {
166 err = -ENOMEM; 154 err = -ENOMEM;
167 goto exit_idr; 155 goto exit_ida;
168 } 156 }
169 157
170 rtc->id = id; 158 rtc->id = id;
@@ -222,10 +210,8 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
222exit_kfree: 210exit_kfree:
223 kfree(rtc); 211 kfree(rtc);
224 212
225exit_idr: 213exit_ida:
226 mutex_lock(&idr_lock); 214 ida_simple_remove(&rtc_ida, id);
227 idr_remove(&rtc_idr, id);
228 mutex_unlock(&idr_lock);
229 215
230exit: 216exit:
231 dev_err(dev, "rtc core: unable to register %s, err = %d\n", 217 dev_err(dev, "rtc core: unable to register %s, err = %d\n",
@@ -276,7 +262,7 @@ static void __exit rtc_exit(void)
276{ 262{
277 rtc_dev_exit(); 263 rtc_dev_exit();
278 class_destroy(rtc_class); 264 class_destroy(rtc_class);
279 idr_destroy(&rtc_idr); 265 ida_destroy(&rtc_ida);
280} 266}
281 267
282subsys_initcall(rtc_init); 268subsys_initcall(rtc_init);
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index b2005b44e4f7..62b0763b7b9a 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -34,6 +34,7 @@ enum ds_type {
34 ds_1388, 34 ds_1388,
35 ds_3231, 35 ds_3231,
36 m41t00, 36 m41t00,
37 mcp7941x,
37 rx_8025, 38 rx_8025,
38 // rs5c372 too? different address... 39 // rs5c372 too? different address...
39}; 40};
@@ -43,6 +44,7 @@ enum ds_type {
43#define DS1307_REG_SECS 0x00 /* 00-59 */ 44#define DS1307_REG_SECS 0x00 /* 00-59 */
44# define DS1307_BIT_CH 0x80 45# define DS1307_BIT_CH 0x80
45# define DS1340_BIT_nEOSC 0x80 46# define DS1340_BIT_nEOSC 0x80
47# define MCP7941X_BIT_ST 0x80
46#define DS1307_REG_MIN 0x01 /* 00-59 */ 48#define DS1307_REG_MIN 0x01 /* 00-59 */
47#define DS1307_REG_HOUR 0x02 /* 00-23, or 1-12{am,pm} */ 49#define DS1307_REG_HOUR 0x02 /* 00-23, or 1-12{am,pm} */
48# define DS1307_BIT_12HR 0x40 /* in REG_HOUR */ 50# define DS1307_BIT_12HR 0x40 /* in REG_HOUR */
@@ -50,6 +52,7 @@ enum ds_type {
50# define DS1340_BIT_CENTURY_EN 0x80 /* in REG_HOUR */ 52# define DS1340_BIT_CENTURY_EN 0x80 /* in REG_HOUR */
51# define DS1340_BIT_CENTURY 0x40 /* in REG_HOUR */ 53# define DS1340_BIT_CENTURY 0x40 /* in REG_HOUR */
52#define DS1307_REG_WDAY 0x03 /* 01-07 */ 54#define DS1307_REG_WDAY 0x03 /* 01-07 */
55# define MCP7941X_BIT_VBATEN 0x08
53#define DS1307_REG_MDAY 0x04 /* 01-31 */ 56#define DS1307_REG_MDAY 0x04 /* 01-31 */
54#define DS1307_REG_MONTH 0x05 /* 01-12 */ 57#define DS1307_REG_MONTH 0x05 /* 01-12 */
55# define DS1337_BIT_CENTURY 0x80 /* in REG_MONTH */ 58# define DS1337_BIT_CENTURY 0x80 /* in REG_MONTH */
@@ -137,6 +140,8 @@ static const struct chip_desc chips[] = {
137}, 140},
138[m41t00] = { 141[m41t00] = {
139}, 142},
143[mcp7941x] = {
144},
140[rx_8025] = { 145[rx_8025] = {
141}, }; 146}, };
142 147
@@ -149,6 +154,7 @@ static const struct i2c_device_id ds1307_id[] = {
149 { "ds1340", ds_1340 }, 154 { "ds1340", ds_1340 },
150 { "ds3231", ds_3231 }, 155 { "ds3231", ds_3231 },
151 { "m41t00", m41t00 }, 156 { "m41t00", m41t00 },
157 { "mcp7941x", mcp7941x },
152 { "pt7c4338", ds_1307 }, 158 { "pt7c4338", ds_1307 },
153 { "rx8025", rx_8025 }, 159 { "rx8025", rx_8025 },
154 { } 160 { }
@@ -365,6 +371,10 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
365 buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN 371 buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN
366 | DS1340_BIT_CENTURY; 372 | DS1340_BIT_CENTURY;
367 break; 373 break;
374 case mcp7941x:
375 buf[DS1307_REG_SECS] |= MCP7941X_BIT_ST;
376 buf[DS1307_REG_WDAY] |= MCP7941X_BIT_VBATEN;
377 break;
368 default: 378 default:
369 break; 379 break;
370 } 380 }
@@ -809,6 +819,23 @@ read_rtc:
809 dev_warn(&client->dev, "SET TIME!\n"); 819 dev_warn(&client->dev, "SET TIME!\n");
810 } 820 }
811 break; 821 break;
822 case mcp7941x:
823 /* make sure that the backup battery is enabled */
824 if (!(ds1307->regs[DS1307_REG_WDAY] & MCP7941X_BIT_VBATEN)) {
825 i2c_smbus_write_byte_data(client, DS1307_REG_WDAY,
826 ds1307->regs[DS1307_REG_WDAY]
827 | MCP7941X_BIT_VBATEN);
828 }
829
830 /* clock halted? turn it on, so clock can tick. */
831 if (!(tmp & MCP7941X_BIT_ST)) {
832 i2c_smbus_write_byte_data(client, DS1307_REG_SECS,
833 MCP7941X_BIT_ST);
834 dev_warn(&client->dev, "SET TIME!\n");
835 goto read_rtc;
836 }
837
838 break;
812 case rx_8025: 839 case rx_8025:
813 case ds_1337: 840 case ds_1337:
814 case ds_1339: 841 case ds_1339:
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index a1a278bc340d..9d0c3b478d55 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -309,7 +309,7 @@ static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev)
309 return IRQ_HANDLED; 309 return IRQ_HANDLED;
310} 310}
311 311
312static int __devinit mc13xxx_rtc_probe(struct platform_device *pdev) 312static int __init mc13xxx_rtc_probe(struct platform_device *pdev)
313{ 313{
314 int ret; 314 int ret;
315 struct mc13xxx_rtc *priv; 315 struct mc13xxx_rtc *priv;
@@ -378,7 +378,7 @@ err_reset_irq_request:
378 return ret; 378 return ret;
379} 379}
380 380
381static int __devexit mc13xxx_rtc_remove(struct platform_device *pdev) 381static int __exit mc13xxx_rtc_remove(struct platform_device *pdev)
382{ 382{
383 struct mc13xxx_rtc *priv = platform_get_drvdata(pdev); 383 struct mc13xxx_rtc *priv = platform_get_drvdata(pdev);
384 384
@@ -410,7 +410,7 @@ const struct platform_device_id mc13xxx_rtc_idtable[] = {
410 410
411static struct platform_driver mc13xxx_rtc_driver = { 411static struct platform_driver mc13xxx_rtc_driver = {
412 .id_table = mc13xxx_rtc_idtable, 412 .id_table = mc13xxx_rtc_idtable,
413 .remove = __devexit_p(mc13xxx_rtc_remove), 413 .remove = __exit_p(mc13xxx_rtc_remove),
414 .driver = { 414 .driver = {
415 .name = DRIVER_NAME, 415 .name = DRIVER_NAME,
416 .owner = THIS_MODULE, 416 .owner = THIS_MODULE,
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index 483d45180911..5754c9a4f58b 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -114,43 +114,7 @@ static struct bin_attribute w1_ds2760_bin_attr = {
114 .read = w1_ds2760_read_bin, 114 .read = w1_ds2760_read_bin,
115}; 115};
116 116
117static DEFINE_IDR(bat_idr); 117static DEFINE_IDA(bat_ida);
118static DEFINE_MUTEX(bat_idr_lock);
119
120static int new_bat_id(void)
121{
122 int ret;
123
124 while (1) {
125 int id;
126
127 ret = idr_pre_get(&bat_idr, GFP_KERNEL);
128 if (ret == 0)
129 return -ENOMEM;
130
131 mutex_lock(&bat_idr_lock);
132 ret = idr_get_new(&bat_idr, NULL, &id);
133 mutex_unlock(&bat_idr_lock);
134
135 if (ret == 0) {
136 ret = id & MAX_ID_MASK;
137 break;
138 } else if (ret == -EAGAIN) {
139 continue;
140 } else {
141 break;
142 }
143 }
144
145 return ret;
146}
147
148static void release_bat_id(int id)
149{
150 mutex_lock(&bat_idr_lock);
151 idr_remove(&bat_idr, id);
152 mutex_unlock(&bat_idr_lock);
153}
154 118
155static int w1_ds2760_add_slave(struct w1_slave *sl) 119static int w1_ds2760_add_slave(struct w1_slave *sl)
156{ 120{
@@ -158,7 +122,7 @@ static int w1_ds2760_add_slave(struct w1_slave *sl)
158 int id; 122 int id;
159 struct platform_device *pdev; 123 struct platform_device *pdev;
160 124
161 id = new_bat_id(); 125 id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL);
162 if (id < 0) { 126 if (id < 0) {
163 ret = id; 127 ret = id;
164 goto noid; 128 goto noid;
@@ -187,7 +151,7 @@ bin_attr_failed:
187pdev_add_failed: 151pdev_add_failed:
188 platform_device_unregister(pdev); 152 platform_device_unregister(pdev);
189pdev_alloc_failed: 153pdev_alloc_failed:
190 release_bat_id(id); 154 ida_simple_remove(&bat_ida, id);
191noid: 155noid:
192success: 156success:
193 return ret; 157 return ret;
@@ -199,7 +163,7 @@ static void w1_ds2760_remove_slave(struct w1_slave *sl)
199 int id = pdev->id; 163 int id = pdev->id;
200 164
201 platform_device_unregister(pdev); 165 platform_device_unregister(pdev);
202 release_bat_id(id); 166 ida_simple_remove(&bat_ida, id);
203 sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2760_bin_attr); 167 sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2760_bin_attr);
204} 168}
205 169
@@ -217,14 +181,14 @@ static int __init w1_ds2760_init(void)
217{ 181{
218 printk(KERN_INFO "1-Wire driver for the DS2760 battery monitor " 182 printk(KERN_INFO "1-Wire driver for the DS2760 battery monitor "
219 " chip - (c) 2004-2005, Szabolcs Gyurko\n"); 183 " chip - (c) 2004-2005, Szabolcs Gyurko\n");
220 idr_init(&bat_idr); 184 ida_init(&bat_ida);
221 return w1_register_family(&w1_ds2760_family); 185 return w1_register_family(&w1_ds2760_family);
222} 186}
223 187
224static void __exit w1_ds2760_exit(void) 188static void __exit w1_ds2760_exit(void)
225{ 189{
226 w1_unregister_family(&w1_ds2760_family); 190 w1_unregister_family(&w1_ds2760_family);
227 idr_destroy(&bat_idr); 191 ida_destroy(&bat_ida);
228} 192}
229 193
230EXPORT_SYMBOL(w1_ds2760_read); 194EXPORT_SYMBOL(w1_ds2760_read);
diff --git a/drivers/w1/slaves/w1_ds2780.c b/drivers/w1/slaves/w1_ds2780.c
index 274c8f38303f..39f78c0b143c 100644
--- a/drivers/w1/slaves/w1_ds2780.c
+++ b/drivers/w1/slaves/w1_ds2780.c
@@ -26,20 +26,14 @@
26#include "../w1_family.h" 26#include "../w1_family.h"
27#include "w1_ds2780.h" 27#include "w1_ds2780.h"
28 28
29int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count, 29static int w1_ds2780_do_io(struct device *dev, char *buf, int addr,
30 int io) 30 size_t count, int io)
31{ 31{
32 struct w1_slave *sl = container_of(dev, struct w1_slave, dev); 32 struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
33 33
34 if (!dev) 34 if (addr > DS2780_DATA_SIZE || addr < 0)
35 return -ENODEV; 35 return 0;
36 36
37 mutex_lock(&sl->master->mutex);
38
39 if (addr > DS2780_DATA_SIZE || addr < 0) {
40 count = 0;
41 goto out;
42 }
43 count = min_t(int, count, DS2780_DATA_SIZE - addr); 37 count = min_t(int, count, DS2780_DATA_SIZE - addr);
44 38
45 if (w1_reset_select_slave(sl) == 0) { 39 if (w1_reset_select_slave(sl) == 0) {
@@ -47,7 +41,6 @@ int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
47 w1_write_8(sl->master, W1_DS2780_WRITE_DATA); 41 w1_write_8(sl->master, W1_DS2780_WRITE_DATA);
48 w1_write_8(sl->master, addr); 42 w1_write_8(sl->master, addr);
49 w1_write_block(sl->master, buf, count); 43 w1_write_block(sl->master, buf, count);
50 /* XXX w1_write_block returns void, not n_written */
51 } else { 44 } else {
52 w1_write_8(sl->master, W1_DS2780_READ_DATA); 45 w1_write_8(sl->master, W1_DS2780_READ_DATA);
53 w1_write_8(sl->master, addr); 46 w1_write_8(sl->master, addr);
@@ -55,13 +48,42 @@ int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
55 } 48 }
56 } 49 }
57 50
58out: 51 return count;
52}
53
54int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
55 int io)
56{
57 struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
58 int ret;
59
60 if (!dev)
61 return -ENODEV;
62
63 mutex_lock(&sl->master->mutex);
64
65 ret = w1_ds2780_do_io(dev, buf, addr, count, io);
66
59 mutex_unlock(&sl->master->mutex); 67 mutex_unlock(&sl->master->mutex);
60 68
61 return count; 69 return ret;
62} 70}
63EXPORT_SYMBOL(w1_ds2780_io); 71EXPORT_SYMBOL(w1_ds2780_io);
64 72
73int w1_ds2780_io_nolock(struct device *dev, char *buf, int addr, size_t count,
74 int io)
75{
76 int ret;
77
78 if (!dev)
79 return -ENODEV;
80
81 ret = w1_ds2780_do_io(dev, buf, addr, count, io);
82
83 return ret;
84}
85EXPORT_SYMBOL(w1_ds2780_io_nolock);
86
65int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd) 87int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd)
66{ 88{
67 struct w1_slave *sl = container_of(dev, struct w1_slave, dev); 89 struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
@@ -99,43 +121,7 @@ static struct bin_attribute w1_ds2780_bin_attr = {
99 .read = w1_ds2780_read_bin, 121 .read = w1_ds2780_read_bin,
100}; 122};
101 123
102static DEFINE_IDR(bat_idr); 124static DEFINE_IDA(bat_ida);
103static DEFINE_MUTEX(bat_idr_lock);
104
105static int new_bat_id(void)
106{
107 int ret;
108
109 while (1) {
110 int id;
111
112 ret = idr_pre_get(&bat_idr, GFP_KERNEL);
113 if (ret == 0)
114 return -ENOMEM;
115
116 mutex_lock(&bat_idr_lock);
117 ret = idr_get_new(&bat_idr, NULL, &id);
118 mutex_unlock(&bat_idr_lock);
119
120 if (ret == 0) {
121 ret = id & MAX_ID_MASK;
122 break;
123 } else if (ret == -EAGAIN) {
124 continue;
125 } else {
126 break;
127 }
128 }
129
130 return ret;
131}
132
133static void release_bat_id(int id)
134{
135 mutex_lock(&bat_idr_lock);
136 idr_remove(&bat_idr, id);
137 mutex_unlock(&bat_idr_lock);
138}
139 125
140static int w1_ds2780_add_slave(struct w1_slave *sl) 126static int w1_ds2780_add_slave(struct w1_slave *sl)
141{ 127{
@@ -143,7 +129,7 @@ static int w1_ds2780_add_slave(struct w1_slave *sl)
143 int id; 129 int id;
144 struct platform_device *pdev; 130 struct platform_device *pdev;
145 131
146 id = new_bat_id(); 132 id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL);
147 if (id < 0) { 133 if (id < 0) {
148 ret = id; 134 ret = id;
149 goto noid; 135 goto noid;
@@ -172,7 +158,7 @@ bin_attr_failed:
172pdev_add_failed: 158pdev_add_failed:
173 platform_device_unregister(pdev); 159 platform_device_unregister(pdev);
174pdev_alloc_failed: 160pdev_alloc_failed:
175 release_bat_id(id); 161 ida_simple_remove(&bat_ida, id);
176noid: 162noid:
177 return ret; 163 return ret;
178} 164}
@@ -183,7 +169,7 @@ static void w1_ds2780_remove_slave(struct w1_slave *sl)
183 int id = pdev->id; 169 int id = pdev->id;
184 170
185 platform_device_unregister(pdev); 171 platform_device_unregister(pdev);
186 release_bat_id(id); 172 ida_simple_remove(&bat_ida, id);
187 sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2780_bin_attr); 173 sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2780_bin_attr);
188} 174}
189 175
@@ -199,14 +185,14 @@ static struct w1_family w1_ds2780_family = {
199 185
200static int __init w1_ds2780_init(void) 186static int __init w1_ds2780_init(void)
201{ 187{
202 idr_init(&bat_idr); 188 ida_init(&bat_ida);
203 return w1_register_family(&w1_ds2780_family); 189 return w1_register_family(&w1_ds2780_family);
204} 190}
205 191
206static void __exit w1_ds2780_exit(void) 192static void __exit w1_ds2780_exit(void)
207{ 193{
208 w1_unregister_family(&w1_ds2780_family); 194 w1_unregister_family(&w1_ds2780_family);
209 idr_destroy(&bat_idr); 195 ida_destroy(&bat_ida);
210} 196}
211 197
212module_init(w1_ds2780_init); 198module_init(w1_ds2780_init);
diff --git a/drivers/w1/slaves/w1_ds2780.h b/drivers/w1/slaves/w1_ds2780.h
index a1fba79eb1b5..737379365021 100644
--- a/drivers/w1/slaves/w1_ds2780.h
+++ b/drivers/w1/slaves/w1_ds2780.h
@@ -124,6 +124,8 @@
124 124
125extern int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count, 125extern int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
126 int io); 126 int io);
127extern int w1_ds2780_io_nolock(struct device *dev, char *buf, int addr,
128 size_t count, int io);
127extern int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd); 129extern int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd);
128 130
129#endif /* !_W1_DS2780_H */ 131#endif /* !_W1_DS2780_H */
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
index d220bce2cee4..f79e62e54e8d 100644
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -78,6 +78,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl,
78 memcpy(&dev->dev, device, sizeof(struct device)); 78 memcpy(&dev->dev, device, sizeof(struct device));
79 dev_set_name(&dev->dev, "w1_bus_master%u", dev->id); 79 dev_set_name(&dev->dev, "w1_bus_master%u", dev->id);
80 snprintf(dev->name, sizeof(dev->name), "w1_bus_master%u", dev->id); 80 snprintf(dev->name, sizeof(dev->name), "w1_bus_master%u", dev->id);
81 dev->dev.init_name = dev->name;
81 82
82 dev->driver = driver; 83 dev->driver = driver;
83 84
diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c
index 765b37b62a4f..3135b2c63998 100644
--- a/drivers/w1/w1_io.c
+++ b/drivers/w1/w1_io.c
@@ -158,13 +158,18 @@ EXPORT_SYMBOL_GPL(w1_write_8);
158static u8 w1_read_bit(struct w1_master *dev) 158static u8 w1_read_bit(struct w1_master *dev)
159{ 159{
160 int result; 160 int result;
161 unsigned long flags;
161 162
163 /* sample timing is critical here */
164 local_irq_save(flags);
162 dev->bus_master->write_bit(dev->bus_master->data, 0); 165 dev->bus_master->write_bit(dev->bus_master->data, 0);
163 w1_delay(6); 166 w1_delay(6);
164 dev->bus_master->write_bit(dev->bus_master->data, 1); 167 dev->bus_master->write_bit(dev->bus_master->data, 1);
165 w1_delay(9); 168 w1_delay(9);
166 169
167 result = dev->bus_master->read_bit(dev->bus_master->data); 170 result = dev->bus_master->read_bit(dev->bus_master->data);
171 local_irq_restore(flags);
172
168 w1_delay(55); 173 w1_delay(55);
169 174
170 return result & 0x1; 175 return result & 0x1;
diff --git a/fs/aio.c b/fs/aio.c
index 632b235f4fbe..78c514cfd212 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -440,8 +440,6 @@ void exit_aio(struct mm_struct *mm)
440static struct kiocb *__aio_get_req(struct kioctx *ctx) 440static struct kiocb *__aio_get_req(struct kioctx *ctx)
441{ 441{
442 struct kiocb *req = NULL; 442 struct kiocb *req = NULL;
443 struct aio_ring *ring;
444 int okay = 0;
445 443
446 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL); 444 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
447 if (unlikely(!req)) 445 if (unlikely(!req))
@@ -459,39 +457,114 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
459 INIT_LIST_HEAD(&req->ki_run_list); 457 INIT_LIST_HEAD(&req->ki_run_list);
460 req->ki_eventfd = NULL; 458 req->ki_eventfd = NULL;
461 459
462 /* Check if the completion queue has enough free space to 460 return req;
463 * accept an event from this io. 461}
464 */ 462
463/*
464 * struct kiocb's are allocated in batches to reduce the number of
465 * times the ctx lock is acquired and released.
466 */
467#define KIOCB_BATCH_SIZE 32L
468struct kiocb_batch {
469 struct list_head head;
470 long count; /* number of requests left to allocate */
471};
472
473static void kiocb_batch_init(struct kiocb_batch *batch, long total)
474{
475 INIT_LIST_HEAD(&batch->head);
476 batch->count = total;
477}
478
479static void kiocb_batch_free(struct kiocb_batch *batch)
480{
481 struct kiocb *req, *n;
482
483 list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
484 list_del(&req->ki_batch);
485 kmem_cache_free(kiocb_cachep, req);
486 }
487}
488
489/*
490 * Allocate a batch of kiocbs. This avoids taking and dropping the
491 * context lock a lot during setup.
492 */
493static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
494{
495 unsigned short allocated, to_alloc;
496 long avail;
497 bool called_fput = false;
498 struct kiocb *req, *n;
499 struct aio_ring *ring;
500
501 to_alloc = min(batch->count, KIOCB_BATCH_SIZE);
502 for (allocated = 0; allocated < to_alloc; allocated++) {
503 req = __aio_get_req(ctx);
504 if (!req)
505 /* allocation failed, go with what we've got */
506 break;
507 list_add(&req->ki_batch, &batch->head);
508 }
509
510 if (allocated == 0)
511 goto out;
512
513retry:
465 spin_lock_irq(&ctx->ctx_lock); 514 spin_lock_irq(&ctx->ctx_lock);
466 ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0); 515 ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
467 if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) { 516
517 avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
518 BUG_ON(avail < 0);
519 if (avail == 0 && !called_fput) {
520 /*
521 * Handle a potential starvation case. It is possible that
522 * we hold the last reference on a struct file, causing us
523 * to delay the final fput to non-irq context. In this case,
524 * ctx->reqs_active is artificially high. Calling the fput
525 * routine here may free up a slot in the event completion
526 * ring, allowing this allocation to succeed.
527 */
528 kunmap_atomic(ring);
529 spin_unlock_irq(&ctx->ctx_lock);
530 aio_fput_routine(NULL);
531 called_fput = true;
532 goto retry;
533 }
534
535 if (avail < allocated) {
536 /* Trim back the number of requests. */
537 list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
538 list_del(&req->ki_batch);
539 kmem_cache_free(kiocb_cachep, req);
540 if (--allocated <= avail)
541 break;
542 }
543 }
544
545 batch->count -= allocated;
546 list_for_each_entry(req, &batch->head, ki_batch) {
468 list_add(&req->ki_list, &ctx->active_reqs); 547 list_add(&req->ki_list, &ctx->active_reqs);
469 ctx->reqs_active++; 548 ctx->reqs_active++;
470 okay = 1;
471 } 549 }
472 kunmap_atomic(ring, KM_USER0);
473 spin_unlock_irq(&ctx->ctx_lock);
474 550
475 if (!okay) { 551 kunmap_atomic(ring);
476 kmem_cache_free(kiocb_cachep, req); 552 spin_unlock_irq(&ctx->ctx_lock);
477 req = NULL;
478 }
479 553
480 return req; 554out:
555 return allocated;
481} 556}
482 557
483static inline struct kiocb *aio_get_req(struct kioctx *ctx) 558static inline struct kiocb *aio_get_req(struct kioctx *ctx,
559 struct kiocb_batch *batch)
484{ 560{
485 struct kiocb *req; 561 struct kiocb *req;
486 /* Handle a potential starvation case -- should be exceedingly rare as 562
487 * requests will be stuck on fput_head only if the aio_fput_routine is 563 if (list_empty(&batch->head))
488 * delayed and the requests were the last user of the struct file. 564 if (kiocb_batch_refill(ctx, batch) == 0)
489 */ 565 return NULL;
490 req = __aio_get_req(ctx); 566 req = list_first_entry(&batch->head, struct kiocb, ki_batch);
491 if (unlikely(NULL == req)) { 567 list_del(&req->ki_batch);
492 aio_fput_routine(NULL);
493 req = __aio_get_req(ctx);
494 }
495 return req; 568 return req;
496} 569}
497 570
@@ -1515,7 +1588,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1515} 1588}
1516 1589
1517static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1590static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1518 struct iocb *iocb, bool compat) 1591 struct iocb *iocb, struct kiocb_batch *batch,
1592 bool compat)
1519{ 1593{
1520 struct kiocb *req; 1594 struct kiocb *req;
1521 struct file *file; 1595 struct file *file;
@@ -1541,7 +1615,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1541 if (unlikely(!file)) 1615 if (unlikely(!file))
1542 return -EBADF; 1616 return -EBADF;
1543 1617
1544 req = aio_get_req(ctx); /* returns with 2 references to req */ 1618 req = aio_get_req(ctx, batch); /* returns with 2 references to req */
1545 if (unlikely(!req)) { 1619 if (unlikely(!req)) {
1546 fput(file); 1620 fput(file);
1547 return -EAGAIN; 1621 return -EAGAIN;
@@ -1621,8 +1695,9 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1621{ 1695{
1622 struct kioctx *ctx; 1696 struct kioctx *ctx;
1623 long ret = 0; 1697 long ret = 0;
1624 int i; 1698 int i = 0;
1625 struct blk_plug plug; 1699 struct blk_plug plug;
1700 struct kiocb_batch batch;
1626 1701
1627 if (unlikely(nr < 0)) 1702 if (unlikely(nr < 0))
1628 return -EINVAL; 1703 return -EINVAL;
@@ -1639,6 +1714,8 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1639 return -EINVAL; 1714 return -EINVAL;
1640 } 1715 }
1641 1716
1717 kiocb_batch_init(&batch, nr);
1718
1642 blk_start_plug(&plug); 1719 blk_start_plug(&plug);
1643 1720
1644 /* 1721 /*
@@ -1659,12 +1736,13 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1659 break; 1736 break;
1660 } 1737 }
1661 1738
1662 ret = io_submit_one(ctx, user_iocb, &tmp, compat); 1739 ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat);
1663 if (ret) 1740 if (ret)
1664 break; 1741 break;
1665 } 1742 }
1666 blk_finish_plug(&plug); 1743 blk_finish_plug(&plug);
1667 1744
1745 kiocb_batch_free(&batch);
1668 put_ioctx(ctx); 1746 put_ioctx(ctx);
1669 return i ? i : ret; 1747 return i ? i : ret;
1670} 1748}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index dd0fdfc56d38..21ac5ee4b43f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -795,7 +795,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
795 * might try to exec. This is because the brk will 795 * might try to exec. This is because the brk will
796 * follow the loader, and is not movable. */ 796 * follow the loader, and is not movable. */
797#if defined(CONFIG_X86) || defined(CONFIG_ARM) 797#if defined(CONFIG_X86) || defined(CONFIG_ARM)
798 load_bias = 0; 798 /* Memory randomization might have been switched off
799 * in runtime via sysctl.
800 * If that is the case, retain the original non-zero
801 * load_bias value in order to establish proper
802 * non-randomized mappings.
803 */
804 if (current->flags & PF_RANDOMIZE)
805 load_bias = 0;
806 else
807 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
799#else 808#else
800 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 809 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
801#endif 810#endif
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 3ebc437736fe..1cbdeea1db44 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -46,11 +46,26 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
46 case HFS_EXT_CNID: 46 case HFS_EXT_CNID:
47 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, 47 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
48 mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz)); 48 mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz));
49 if (HFS_I(tree->inode)->alloc_blocks >
50 HFS_I(tree->inode)->first_blocks) {
51 printk(KERN_ERR "hfs: invalid btree extent records\n");
52 unlock_new_inode(tree->inode);
53 goto free_inode;
54 }
55
49 tree->inode->i_mapping->a_ops = &hfs_btree_aops; 56 tree->inode->i_mapping->a_ops = &hfs_btree_aops;
50 break; 57 break;
51 case HFS_CAT_CNID: 58 case HFS_CAT_CNID:
52 hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize, 59 hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize,
53 mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz)); 60 mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz));
61
62 if (!HFS_I(tree->inode)->first_blocks) {
63 printk(KERN_ERR "hfs: invalid btree extent records "
64 "(0 size).\n");
65 unlock_new_inode(tree->inode);
66 goto free_inode;
67 }
68
54 tree->inode->i_mapping->a_ops = &hfs_btree_aops; 69 tree->inode->i_mapping->a_ops = &hfs_btree_aops;
55 break; 70 break;
56 default: 71 default:
@@ -59,11 +74,6 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
59 } 74 }
60 unlock_new_inode(tree->inode); 75 unlock_new_inode(tree->inode);
61 76
62 if (!HFS_I(tree->inode)->first_blocks) {
63 printk(KERN_ERR "hfs: invalid btree extent records (0 size).\n");
64 goto free_inode;
65 }
66
67 mapping = tree->inode->i_mapping; 77 mapping = tree->inode->i_mapping;
68 page = read_mapping_page(mapping, 0, NULL); 78 page = read_mapping_page(mapping, 0, NULL);
69 if (IS_ERR(page)) 79 if (IS_ERR(page))
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 562adabef985..f950059525fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -20,6 +20,7 @@
20#include <linux/statfs.h> 20#include <linux/statfs.h>
21#include <linux/cdrom.h> 21#include <linux/cdrom.h>
22#include <linux/parser.h> 22#include <linux/parser.h>
23#include <linux/mpage.h>
23 24
24#include "isofs.h" 25#include "isofs.h"
25#include "zisofs.h" 26#include "zisofs.h"
@@ -1148,7 +1149,13 @@ struct buffer_head *isofs_bread(struct inode *inode, sector_t block)
1148 1149
1149static int isofs_readpage(struct file *file, struct page *page) 1150static int isofs_readpage(struct file *file, struct page *page)
1150{ 1151{
1151 return block_read_full_page(page,isofs_get_block); 1152 return mpage_readpage(page, isofs_get_block);
1153}
1154
1155static int isofs_readpages(struct file *file, struct address_space *mapping,
1156 struct list_head *pages, unsigned nr_pages)
1157{
1158 return mpage_readpages(mapping, pages, nr_pages, isofs_get_block);
1152} 1159}
1153 1160
1154static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) 1161static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
@@ -1158,6 +1165,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
1158 1165
1159static const struct address_space_operations isofs_aops = { 1166static const struct address_space_operations isofs_aops = {
1160 .readpage = isofs_readpage, 1167 .readpage = isofs_readpage,
1168 .readpages = isofs_readpages,
1161 .bmap = _isofs_bmap 1169 .bmap = _isofs_bmap
1162}; 1170};
1163 1171
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 851ba3dcdc29..2db1bd3173b2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1652,12 +1652,46 @@ out:
1652 return error; 1652 return error;
1653} 1653}
1654 1654
1655static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
1656 struct kstat *stat)
1657{
1658 struct inode *inode = dentry->d_inode;
1659 struct task_struct *task = get_proc_task(inode);
1660 int rc;
1661
1662 if (task == NULL)
1663 return -ESRCH;
1664
1665 rc = -EACCES;
1666 if (lock_trace(task))
1667 goto out_task;
1668
1669 generic_fillattr(inode, stat);
1670 unlock_trace(task);
1671 rc = 0;
1672out_task:
1673 put_task_struct(task);
1674 return rc;
1675}
1676
1655static const struct inode_operations proc_pid_link_inode_operations = { 1677static const struct inode_operations proc_pid_link_inode_operations = {
1656 .readlink = proc_pid_readlink, 1678 .readlink = proc_pid_readlink,
1657 .follow_link = proc_pid_follow_link, 1679 .follow_link = proc_pid_follow_link,
1658 .setattr = proc_setattr, 1680 .setattr = proc_setattr,
1659}; 1681};
1660 1682
1683static const struct inode_operations proc_fdinfo_link_inode_operations = {
1684 .setattr = proc_setattr,
1685 .getattr = proc_pid_fd_link_getattr,
1686};
1687
1688static const struct inode_operations proc_fd_link_inode_operations = {
1689 .readlink = proc_pid_readlink,
1690 .follow_link = proc_pid_follow_link,
1691 .setattr = proc_setattr,
1692 .getattr = proc_pid_fd_link_getattr,
1693};
1694
1661 1695
1662/* building an inode */ 1696/* building an inode */
1663 1697
@@ -1889,49 +1923,61 @@ out:
1889 1923
1890static int proc_fd_info(struct inode *inode, struct path *path, char *info) 1924static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1891{ 1925{
1892 struct task_struct *task = get_proc_task(inode); 1926 struct task_struct *task;
1893 struct files_struct *files = NULL; 1927 struct files_struct *files;
1894 struct file *file; 1928 struct file *file;
1895 int fd = proc_fd(inode); 1929 int fd = proc_fd(inode);
1930 int rc;
1896 1931
1897 if (task) { 1932 task = get_proc_task(inode);
1898 files = get_files_struct(task); 1933 if (!task)
1899 put_task_struct(task); 1934 return -ENOENT;
1900 } 1935
1901 if (files) { 1936 rc = -EACCES;
1902 /* 1937 if (lock_trace(task))
1903 * We are not taking a ref to the file structure, so we must 1938 goto out_task;
1904 * hold ->file_lock. 1939
1905 */ 1940 rc = -ENOENT;
1906 spin_lock(&files->file_lock); 1941 files = get_files_struct(task);
1907 file = fcheck_files(files, fd); 1942 if (files == NULL)
1908 if (file) { 1943 goto out_unlock;
1909 unsigned int f_flags; 1944
1910 struct fdtable *fdt; 1945 /*
1911 1946 * We are not taking a ref to the file structure, so we must
1912 fdt = files_fdtable(files); 1947 * hold ->file_lock.
1913 f_flags = file->f_flags & ~O_CLOEXEC; 1948 */
1914 if (FD_ISSET(fd, fdt->close_on_exec)) 1949 spin_lock(&files->file_lock);
1915 f_flags |= O_CLOEXEC; 1950 file = fcheck_files(files, fd);
1916 1951 if (file) {
1917 if (path) { 1952 unsigned int f_flags;
1918 *path = file->f_path; 1953 struct fdtable *fdt;
1919 path_get(&file->f_path); 1954
1920 } 1955 fdt = files_fdtable(files);
1921 if (info) 1956 f_flags = file->f_flags & ~O_CLOEXEC;
1922 snprintf(info, PROC_FDINFO_MAX, 1957 if (FD_ISSET(fd, fdt->close_on_exec))
1923 "pos:\t%lli\n" 1958 f_flags |= O_CLOEXEC;
1924 "flags:\t0%o\n", 1959
1925 (long long) file->f_pos, 1960 if (path) {
1926 f_flags); 1961 *path = file->f_path;
1927 spin_unlock(&files->file_lock); 1962 path_get(&file->f_path);
1928 put_files_struct(files);
1929 return 0;
1930 } 1963 }
1931 spin_unlock(&files->file_lock); 1964 if (info)
1932 put_files_struct(files); 1965 snprintf(info, PROC_FDINFO_MAX,
1933 } 1966 "pos:\t%lli\n"
1934 return -ENOENT; 1967 "flags:\t0%o\n",
1968 (long long) file->f_pos,
1969 f_flags);
1970 rc = 0;
1971 } else
1972 rc = -ENOENT;
1973 spin_unlock(&files->file_lock);
1974 put_files_struct(files);
1975
1976out_unlock:
1977 unlock_trace(task);
1978out_task:
1979 put_task_struct(task);
1980 return rc;
1935} 1981}
1936 1982
1937static int proc_fd_link(struct inode *inode, struct path *path) 1983static int proc_fd_link(struct inode *inode, struct path *path)
@@ -2026,7 +2072,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
2026 spin_unlock(&files->file_lock); 2072 spin_unlock(&files->file_lock);
2027 put_files_struct(files); 2073 put_files_struct(files);
2028 2074
2029 inode->i_op = &proc_pid_link_inode_operations; 2075 inode->i_op = &proc_fd_link_inode_operations;
2030 inode->i_size = 64; 2076 inode->i_size = 64;
2031 ei->op.proc_get_link = proc_fd_link; 2077 ei->op.proc_get_link = proc_fd_link;
2032 d_set_d_op(dentry, &tid_fd_dentry_operations); 2078 d_set_d_op(dentry, &tid_fd_dentry_operations);
@@ -2058,7 +2104,12 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
2058 if (fd == ~0U) 2104 if (fd == ~0U)
2059 goto out; 2105 goto out;
2060 2106
2107 result = ERR_PTR(-EACCES);
2108 if (lock_trace(task))
2109 goto out;
2110
2061 result = instantiate(dir, dentry, task, &fd); 2111 result = instantiate(dir, dentry, task, &fd);
2112 unlock_trace(task);
2062out: 2113out:
2063 put_task_struct(task); 2114 put_task_struct(task);
2064out_no_task: 2115out_no_task:
@@ -2078,23 +2129,28 @@ static int proc_readfd_common(struct file * filp, void * dirent,
2078 retval = -ENOENT; 2129 retval = -ENOENT;
2079 if (!p) 2130 if (!p)
2080 goto out_no_task; 2131 goto out_no_task;
2132
2133 retval = -EACCES;
2134 if (lock_trace(p))
2135 goto out;
2136
2081 retval = 0; 2137 retval = 0;
2082 2138
2083 fd = filp->f_pos; 2139 fd = filp->f_pos;
2084 switch (fd) { 2140 switch (fd) {
2085 case 0: 2141 case 0:
2086 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 2142 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
2087 goto out; 2143 goto out_unlock;
2088 filp->f_pos++; 2144 filp->f_pos++;
2089 case 1: 2145 case 1:
2090 ino = parent_ino(dentry); 2146 ino = parent_ino(dentry);
2091 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 2147 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2092 goto out; 2148 goto out_unlock;
2093 filp->f_pos++; 2149 filp->f_pos++;
2094 default: 2150 default:
2095 files = get_files_struct(p); 2151 files = get_files_struct(p);
2096 if (!files) 2152 if (!files)
2097 goto out; 2153 goto out_unlock;
2098 rcu_read_lock(); 2154 rcu_read_lock();
2099 for (fd = filp->f_pos-2; 2155 for (fd = filp->f_pos-2;
2100 fd < files_fdtable(files)->max_fds; 2156 fd < files_fdtable(files)->max_fds;
@@ -2118,6 +2174,9 @@ static int proc_readfd_common(struct file * filp, void * dirent,
2118 rcu_read_unlock(); 2174 rcu_read_unlock();
2119 put_files_struct(files); 2175 put_files_struct(files);
2120 } 2176 }
2177
2178out_unlock:
2179 unlock_trace(p);
2121out: 2180out:
2122 put_task_struct(p); 2181 put_task_struct(p);
2123out_no_task: 2182out_no_task:
@@ -2195,6 +2254,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2195 ei->fd = fd; 2254 ei->fd = fd;
2196 inode->i_mode = S_IFREG | S_IRUSR; 2255 inode->i_mode = S_IFREG | S_IRUSR;
2197 inode->i_fop = &proc_fdinfo_file_operations; 2256 inode->i_fop = &proc_fdinfo_file_operations;
2257 inode->i_op = &proc_fdinfo_link_inode_operations;
2198 d_set_d_op(dentry, &tid_fd_dentry_operations); 2258 d_set_d_op(dentry, &tid_fd_dentry_operations);
2199 d_add(dentry, inode); 2259 d_add(dentry, inode);
2200 /* Close the race of the process dying before we return the dentry */ 2260 /* Close the race of the process dying before we return the dentry */
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b44113279e30..a6b62173d4c3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -3,6 +3,7 @@
3 */ 3 */
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/poll.h>
6#include <linux/proc_fs.h> 7#include <linux/proc_fs.h>
7#include <linux/security.h> 8#include <linux/security.h>
8#include <linux/namei.h> 9#include <linux/namei.h>
@@ -14,6 +15,15 @@ static const struct inode_operations proc_sys_inode_operations;
14static const struct file_operations proc_sys_dir_file_operations; 15static const struct file_operations proc_sys_dir_file_operations;
15static const struct inode_operations proc_sys_dir_operations; 16static const struct inode_operations proc_sys_dir_operations;
16 17
18void proc_sys_poll_notify(struct ctl_table_poll *poll)
19{
20 if (!poll)
21 return;
22
23 atomic_inc(&poll->event);
24 wake_up_interruptible(&poll->wait);
25}
26
17static struct inode *proc_sys_make_inode(struct super_block *sb, 27static struct inode *proc_sys_make_inode(struct super_block *sb,
18 struct ctl_table_header *head, struct ctl_table *table) 28 struct ctl_table_header *head, struct ctl_table *table)
19{ 29{
@@ -176,6 +186,39 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
176 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); 186 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
177} 187}
178 188
189static int proc_sys_open(struct inode *inode, struct file *filp)
190{
191 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
192
193 if (table->poll)
194 filp->private_data = proc_sys_poll_event(table->poll);
195
196 return 0;
197}
198
199static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
200{
201 struct inode *inode = filp->f_path.dentry->d_inode;
202 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
203 unsigned long event = (unsigned long)filp->private_data;
204 unsigned int ret = DEFAULT_POLLMASK;
205
206 if (!table->proc_handler)
207 goto out;
208
209 if (!table->poll)
210 goto out;
211
212 poll_wait(filp, &table->poll->wait, wait);
213
214 if (event != atomic_read(&table->poll->event)) {
215 filp->private_data = proc_sys_poll_event(table->poll);
216 ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
217 }
218
219out:
220 return ret;
221}
179 222
180static int proc_sys_fill_cache(struct file *filp, void *dirent, 223static int proc_sys_fill_cache(struct file *filp, void *dirent,
181 filldir_t filldir, 224 filldir_t filldir,
@@ -364,12 +407,15 @@ static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
364} 407}
365 408
366static const struct file_operations proc_sys_file_operations = { 409static const struct file_operations proc_sys_file_operations = {
410 .open = proc_sys_open,
411 .poll = proc_sys_poll,
367 .read = proc_sys_read, 412 .read = proc_sys_read,
368 .write = proc_sys_write, 413 .write = proc_sys_write,
369 .llseek = default_llseek, 414 .llseek = default_llseek,
370}; 415};
371 416
372static const struct file_operations proc_sys_dir_file_operations = { 417static const struct file_operations proc_sys_dir_file_operations = {
418 .read = generic_read_dir,
373 .readdir = proc_sys_readdir, 419 .readdir = proc_sys_readdir,
374 .llseek = generic_file_llseek, 420 .llseek = generic_file_llseek,
375}; 421};
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index eacb166fb259..462ceb38fec6 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -23,7 +23,6 @@
23 * caches is sufficient. 23 * caches is sufficient.
24 */ 24 */
25 25
26#include <linux/module.h>
27#include <linux/fs.h> 26#include <linux/fs.h>
28#include <linux/pagemap.h> 27#include <linux/pagemap.h>
29#include <linux/highmem.h> 28#include <linux/highmem.h>
@@ -288,14 +287,7 @@ static int __init init_ramfs_fs(void)
288{ 287{
289 return register_filesystem(&ramfs_fs_type); 288 return register_filesystem(&ramfs_fs_type);
290} 289}
291
292static void __exit exit_ramfs_fs(void)
293{
294 unregister_filesystem(&ramfs_fs_type);
295}
296
297module_init(init_ramfs_fs) 290module_init(init_ramfs_fs)
298module_exit(exit_ramfs_fs)
299 291
300int __init init_rootfs(void) 292int __init init_rootfs(void)
301{ 293{
@@ -311,5 +303,3 @@ int __init init_rootfs(void)
311 303
312 return err; 304 return err;
313} 305}
314
315MODULE_LICENSE("GPL");
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 2dcb72bff4b6..2314ad8b3c9c 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -117,6 +117,7 @@ struct kiocb {
117 117
118 struct list_head ki_list; /* the aio core uses this 118 struct list_head ki_list; /* the aio core uses this
119 * for cancellation */ 119 * for cancellation */
120 struct list_head ki_batch; /* batch allocation */
120 121
121 /* 122 /*
122 * If the aio_resfd field of the userspace iocb is not zero, 123 * If the aio_resfd field of the userspace iocb is not zero,
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index da7e4bc34e8c..1b7f9d525013 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -516,7 +516,7 @@ struct cgroup_subsys {
516 struct list_head sibling; 516 struct list_head sibling;
517 /* used when use_id == true */ 517 /* used when use_id == true */
518 struct idr idr; 518 struct idr idr;
519 spinlock_t id_lock; 519 rwlock_t id_lock;
520 520
521 /* should be defined only by modular subsystems */ 521 /* should be defined only by modular subsystems */
522 struct module *module; 522 struct module *module;
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 347fdc32177a..be86ae13893f 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -1,6 +1,7 @@
1#ifndef _LINUX_DMA_MAPPING_H 1#ifndef _LINUX_DMA_MAPPING_H
2#define _LINUX_DMA_MAPPING_H 2#define _LINUX_DMA_MAPPING_H
3 3
4#include <linux/string.h>
4#include <linux/device.h> 5#include <linux/device.h>
5#include <linux/err.h> 6#include <linux/err.h>
6#include <linux/dma-attrs.h> 7#include <linux/dma-attrs.h>
@@ -117,6 +118,15 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
117 return -EIO; 118 return -EIO;
118} 119}
119 120
121static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
122 dma_addr_t *dma_handle, gfp_t flag)
123{
124 void *ret = dma_alloc_coherent(dev, size, dma_handle, flag);
125 if (ret)
126 memset(ret, 0, size);
127 return ret;
128}
129
120#ifdef CONFIG_HAS_DMA 130#ifdef CONFIG_HAS_DMA
121static inline int dma_get_cache_alignment(void) 131static inline int dma_get_cache_alignment(void)
122{ 132{
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 1e5df2af8d84..2d4beab0d5b7 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -30,11 +30,11 @@
30#define ANON_INODE_FS_MAGIC 0x09041934 30#define ANON_INODE_FS_MAGIC 0x09041934
31#define PSTOREFS_MAGIC 0x6165676C 31#define PSTOREFS_MAGIC 0x6165676C
32 32
33#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ 33#define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */
34#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ 34#define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */
35#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */ 35#define MINIX2_SUPER_MAGIC 0x2468 /* minix v2 fs, 14 char names */
36#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */ 36#define MINIX2_SUPER_MAGIC2 0x2478 /* minix v2 fs, 30 char names */
37#define MINIX3_SUPER_MAGIC 0x4d5a /* minix V3 fs */ 37#define MINIX3_SUPER_MAGIC 0x4d5a /* minix v3 fs, 60 char names */
38 38
39#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */ 39#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */
40#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */ 40#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ac797fa03ef8..b87068a1a09e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -78,8 +78,8 @@ extern void mem_cgroup_uncharge_end(void);
78extern void mem_cgroup_uncharge_page(struct page *page); 78extern void mem_cgroup_uncharge_page(struct page *page);
79extern void mem_cgroup_uncharge_cache_page(struct page *page); 79extern void mem_cgroup_uncharge_cache_page(struct page *page);
80 80
81extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); 81extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask);
82int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); 82int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg);
83 83
84extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); 84extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
85extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); 85extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -88,26 +88,28 @@ extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
88static inline 88static inline
89int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) 89int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
90{ 90{
91 struct mem_cgroup *mem; 91 struct mem_cgroup *memcg;
92 rcu_read_lock(); 92 rcu_read_lock();
93 mem = mem_cgroup_from_task(rcu_dereference((mm)->owner)); 93 memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner));
94 rcu_read_unlock(); 94 rcu_read_unlock();
95 return cgroup == mem; 95 return cgroup == memcg;
96} 96}
97 97
98extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem); 98extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
99 99
100extern int 100extern int
101mem_cgroup_prepare_migration(struct page *page, 101mem_cgroup_prepare_migration(struct page *page,
102 struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask); 102 struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask);
103extern void mem_cgroup_end_migration(struct mem_cgroup *mem, 103extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
104 struct page *oldpage, struct page *newpage, bool migration_ok); 104 struct page *oldpage, struct page *newpage, bool migration_ok);
105 105
106/* 106/*
107 * For memory reclaim. 107 * For memory reclaim.
108 */ 108 */
109int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg); 109int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
110int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg); 110 struct zone *zone);
111int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
112 struct zone *zone);
111int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); 113int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
112unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, 114unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
113 int nid, int zid, unsigned int lrumask); 115 int nid, int zid, unsigned int lrumask);
@@ -148,7 +150,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
148unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 150unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
149 gfp_t gfp_mask, 151 gfp_t gfp_mask,
150 unsigned long *total_scanned); 152 unsigned long *total_scanned);
151u64 mem_cgroup_get_limit(struct mem_cgroup *mem); 153u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);
152 154
153void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); 155void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
154#ifdef CONFIG_TRANSPARENT_HUGEPAGE 156#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -244,18 +246,20 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm
244 return NULL; 246 return NULL;
245} 247}
246 248
247static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) 249static inline int mm_match_cgroup(struct mm_struct *mm,
250 struct mem_cgroup *memcg)
248{ 251{
249 return 1; 252 return 1;
250} 253}
251 254
252static inline int task_in_mem_cgroup(struct task_struct *task, 255static inline int task_in_mem_cgroup(struct task_struct *task,
253 const struct mem_cgroup *mem) 256 const struct mem_cgroup *memcg)
254{ 257{
255 return 1; 258 return 1;
256} 259}
257 260
258static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) 261static inline struct cgroup_subsys_state
262 *mem_cgroup_css(struct mem_cgroup *memcg)
259{ 263{
260 return NULL; 264 return NULL;
261} 265}
@@ -267,22 +271,22 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
267 return 0; 271 return 0;
268} 272}
269 273
270static inline void mem_cgroup_end_migration(struct mem_cgroup *mem, 274static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
271 struct page *oldpage, struct page *newpage, bool migration_ok) 275 struct page *oldpage, struct page *newpage, bool migration_ok)
272{ 276{
273} 277}
274 278
275static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) 279static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
276{ 280{
277 return 0; 281 return 0;
278} 282}
279 283
280static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, 284static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *memcg,
281 int priority) 285 int priority)
282{ 286{
283} 287}
284 288
285static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, 289static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *memcg,
286 int priority) 290 int priority)
287{ 291{
288} 292}
@@ -293,13 +297,13 @@ static inline bool mem_cgroup_disabled(void)
293} 297}
294 298
295static inline int 299static inline int
296mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) 300mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
297{ 301{
298 return 1; 302 return 1;
299} 303}
300 304
301static inline int 305static inline int
302mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg) 306mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
303{ 307{
304 return 1; 308 return 1;
305} 309}
@@ -348,7 +352,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
348} 352}
349 353
350static inline 354static inline
351u64 mem_cgroup_get_limit(struct mem_cgroup *mem) 355u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
352{ 356{
353 return 0; 357 return 0;
354} 358}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3b3e3b8bb706..3dc3a8c2c485 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -356,36 +356,50 @@ static inline struct page *compound_head(struct page *page)
356 return page; 356 return page;
357} 357}
358 358
359/*
360 * The atomic page->_mapcount, starts from -1: so that transitions
361 * both from it and to it can be tracked, using atomic_inc_and_test
362 * and atomic_add_negative(-1).
363 */
364static inline void reset_page_mapcount(struct page *page)
365{
366 atomic_set(&(page)->_mapcount, -1);
367}
368
369static inline int page_mapcount(struct page *page)
370{
371 return atomic_read(&(page)->_mapcount) + 1;
372}
373
359static inline int page_count(struct page *page) 374static inline int page_count(struct page *page)
360{ 375{
361 return atomic_read(&compound_head(page)->_count); 376 return atomic_read(&compound_head(page)->_count);
362} 377}
363 378
379static inline void get_huge_page_tail(struct page *page)
380{
381 /*
382 * __split_huge_page_refcount() cannot run
383 * from under us.
384 */
385 VM_BUG_ON(page_mapcount(page) < 0);
386 VM_BUG_ON(atomic_read(&page->_count) != 0);
387 atomic_inc(&page->_mapcount);
388}
389
390extern bool __get_page_tail(struct page *page);
391
364static inline void get_page(struct page *page) 392static inline void get_page(struct page *page)
365{ 393{
394 if (unlikely(PageTail(page)))
395 if (likely(__get_page_tail(page)))
396 return;
366 /* 397 /*
367 * Getting a normal page or the head of a compound page 398 * Getting a normal page or the head of a compound page
368 * requires to already have an elevated page->_count. Only if 399 * requires to already have an elevated page->_count.
369 * we're getting a tail page, the elevated page->_count is
370 * required only in the head page, so for tail pages the
371 * bugcheck only verifies that the page->_count isn't
372 * negative.
373 */ 400 */
374 VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page)); 401 VM_BUG_ON(atomic_read(&page->_count) <= 0);
375 atomic_inc(&page->_count); 402 atomic_inc(&page->_count);
376 /*
377 * Getting a tail page will elevate both the head and tail
378 * page->_count(s).
379 */
380 if (unlikely(PageTail(page))) {
381 /*
382 * This is safe only because
383 * __split_huge_page_refcount can't run under
384 * get_page().
385 */
386 VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
387 atomic_inc(&page->first_page->_count);
388 }
389} 403}
390 404
391static inline struct page *virt_to_head_page(const void *x) 405static inline struct page *virt_to_head_page(const void *x)
@@ -804,21 +818,6 @@ static inline pgoff_t page_index(struct page *page)
804} 818}
805 819
806/* 820/*
807 * The atomic page->_mapcount, like _count, starts from -1:
808 * so that transitions both from it and to it can be tracked,
809 * using atomic_inc_and_test and atomic_add_negative(-1).
810 */
811static inline void reset_page_mapcount(struct page *page)
812{
813 atomic_set(&(page)->_mapcount, -1);
814}
815
816static inline int page_mapcount(struct page *page)
817{
818 return atomic_read(&(page)->_mapcount) + 1;
819}
820
821/*
822 * Return true if this page is mapped into pagetables. 821 * Return true if this page is mapped into pagetables.
823 */ 822 */
824static inline int page_mapped(struct page *page) 823static inline int page_mapped(struct page *page)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3e01a19a91e8..5b42f1b34eb7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -62,10 +62,23 @@ struct page {
62 struct { 62 struct {
63 63
64 union { 64 union {
65 atomic_t _mapcount; /* Count of ptes mapped in mms, 65 /*
66 * to show when page is mapped 66 * Count of ptes mapped in
67 * & limit reverse map searches. 67 * mms, to show when page is
68 */ 68 * mapped & limit reverse map
69 * searches.
70 *
71 * Used also for tail pages
72 * refcounting instead of
73 * _count. Tail pages cannot
74 * be mapped and keeping the
75 * tail page _count zero at
76 * all times guarantees
77 * get_page_unless_zero() will
78 * never succeed on tail
79 * pages.
80 */
81 atomic_t _mapcount;
69 82
70 struct { 83 struct {
71 unsigned inuse:16; 84 unsigned inuse:16;
diff --git a/include/linux/pps-gpio.h b/include/linux/pps-gpio.h
new file mode 100644
index 000000000000..0035abe41b9a
--- /dev/null
+++ b/include/linux/pps-gpio.h
@@ -0,0 +1,32 @@
1/*
2 * pps-gpio.h -- PPS client for GPIOs
3 *
4 *
5 * Copyright (C) 2011 James Nuss <jamesnuss@nanometrics.ca>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22#ifndef _PPS_GPIO_H
23#define _PPS_GPIO_H
24
25struct pps_gpio_platform_data {
26 bool assert_falling_edge;
27 bool capture_clear;
28 unsigned int gpio_pin;
29 const char *gpio_label;
30};
31
32#endif
diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h
index 0cee0152aca9..b66d13d1bdc0 100644
--- a/include/linux/rio_ids.h
+++ b/include/linux/rio_ids.h
@@ -39,5 +39,6 @@
39#define RIO_DID_IDTCPS1616 0x0379 39#define RIO_DID_IDTCPS1616 0x0379
40#define RIO_DID_IDTVPS1616 0x0377 40#define RIO_DID_IDTVPS1616 0x0377
41#define RIO_DID_IDTSPS1616 0x0378 41#define RIO_DID_IDTSPS1616 0x0378
42#define RIO_DID_TSI721 0x80ab
42 43
43#endif /* LINUX_RIO_IDS_H */ 44#endif /* LINUX_RIO_IDS_H */
diff --git a/include/linux/sem.h b/include/linux/sem.h
index 1feb2de2ee57..10d6b226afc5 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -83,13 +83,6 @@ struct seminfo {
83 83
84struct task_struct; 84struct task_struct;
85 85
86/* One semaphore structure for each semaphore in the system. */
87struct sem {
88 int semval; /* current value */
89 int sempid; /* pid of last operation */
90 struct list_head sem_pending; /* pending single-sop operations */
91};
92
93/* One sem_array data structure for each set of semaphores in the system. */ 86/* One sem_array data structure for each set of semaphores in the system. */
94struct sem_array { 87struct sem_array {
95 struct kern_ipc_perm ____cacheline_aligned_in_smp 88 struct kern_ipc_perm ____cacheline_aligned_in_smp
@@ -103,51 +96,21 @@ struct sem_array {
103 int complex_count; /* pending complex operations */ 96 int complex_count; /* pending complex operations */
104}; 97};
105 98
106/* One queue for each sleeping process in the system. */ 99#ifdef CONFIG_SYSVIPC
107struct sem_queue {
108 struct list_head simple_list; /* queue of pending operations */
109 struct list_head list; /* queue of pending operations */
110 struct task_struct *sleeper; /* this process */
111 struct sem_undo *undo; /* undo structure */
112 int pid; /* process id of requesting process */
113 int status; /* completion status of operation */
114 struct sembuf *sops; /* array of pending operations */
115 int nsops; /* number of operations */
116 int alter; /* does the operation alter the array? */
117};
118
119/* Each task has a list of undo requests. They are executed automatically
120 * when the process exits.
121 */
122struct sem_undo {
123 struct list_head list_proc; /* per-process list: all undos from one process. */
124 /* rcu protected */
125 struct rcu_head rcu; /* rcu struct for sem_undo() */
126 struct sem_undo_list *ulp; /* sem_undo_list for the process */
127 struct list_head list_id; /* per semaphore array list: all undos for one array */
128 int semid; /* semaphore set identifier */
129 short * semadj; /* array of adjustments, one per semaphore */
130};
131
132/* sem_undo_list controls shared access to the list of sem_undo structures
133 * that may be shared among all a CLONE_SYSVSEM task group.
134 */
135struct sem_undo_list {
136 atomic_t refcnt;
137 spinlock_t lock;
138 struct list_head list_proc;
139};
140 100
141struct sysv_sem { 101struct sysv_sem {
142 struct sem_undo_list *undo_list; 102 struct sem_undo_list *undo_list;
143}; 103};
144 104
145#ifdef CONFIG_SYSVIPC
146
147extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk); 105extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
148extern void exit_sem(struct task_struct *tsk); 106extern void exit_sem(struct task_struct *tsk);
149 107
150#else 108#else
109
110struct sysv_sem {
111 /* empty */
112};
113
151static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 114static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
152{ 115{
153 return 0; 116 return 0;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 9a1ec10fd504..703cfa33a3ca 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -931,6 +931,7 @@ enum
931#ifdef __KERNEL__ 931#ifdef __KERNEL__
932#include <linux/list.h> 932#include <linux/list.h>
933#include <linux/rcupdate.h> 933#include <linux/rcupdate.h>
934#include <linux/wait.h>
934 935
935/* For the /proc/sys support */ 936/* For the /proc/sys support */
936struct ctl_table; 937struct ctl_table;
@@ -1011,6 +1012,26 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
1011 * cover common cases. 1012 * cover common cases.
1012 */ 1013 */
1013 1014
1015/* Support for userspace poll() to watch for changes */
1016struct ctl_table_poll {
1017 atomic_t event;
1018 wait_queue_head_t wait;
1019};
1020
1021static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
1022{
1023 return (void *)(unsigned long)atomic_read(&poll->event);
1024}
1025
1026void proc_sys_poll_notify(struct ctl_table_poll *poll);
1027
1028#define __CTL_TABLE_POLL_INITIALIZER(name) { \
1029 .event = ATOMIC_INIT(0), \
1030 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
1031
1032#define DEFINE_CTL_TABLE_POLL(name) \
1033 struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
1034
1014/* A sysctl table is an array of struct ctl_table: */ 1035/* A sysctl table is an array of struct ctl_table: */
1015struct ctl_table 1036struct ctl_table
1016{ 1037{
@@ -1021,6 +1042,7 @@ struct ctl_table
1021 struct ctl_table *child; 1042 struct ctl_table *child;
1022 struct ctl_table *parent; /* Automatically set */ 1043 struct ctl_table *parent; /* Automatically set */
1023 proc_handler *proc_handler; /* Callback for text formatting */ 1044 proc_handler *proc_handler; /* Callback for text formatting */
1045 struct ctl_table_poll *poll;
1024 void *extra1; 1046 void *extra1;
1025 void *extra2; 1047 void *extra2;
1026}; 1048};
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 4e5b0213fdc1..c714ed75eae2 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -37,6 +37,14 @@ struct new_utsname {
37#include <linux/nsproxy.h> 37#include <linux/nsproxy.h>
38#include <linux/err.h> 38#include <linux/err.h>
39 39
40enum uts_proc {
41 UTS_PROC_OSTYPE,
42 UTS_PROC_OSRELEASE,
43 UTS_PROC_VERSION,
44 UTS_PROC_HOSTNAME,
45 UTS_PROC_DOMAINNAME,
46};
47
40struct user_namespace; 48struct user_namespace;
41extern struct user_namespace init_user_ns; 49extern struct user_namespace init_user_ns;
42 50
@@ -80,6 +88,14 @@ static inline struct uts_namespace *copy_utsname(unsigned long flags,
80} 88}
81#endif 89#endif
82 90
91#ifdef CONFIG_PROC_SYSCTL
92extern void uts_proc_notify(enum uts_proc proc);
93#else
94static inline void uts_proc_notify(enum uts_proc proc)
95{
96}
97#endif
98
83static inline struct new_utsname *utsname(void) 99static inline struct new_utsname *utsname(void)
84{ 100{
85 return &current->nsproxy->uts_ns->name; 101 return &current->nsproxy->uts_ns->name;
diff --git a/init/Kconfig b/init/Kconfig
index 31ba0fd0f36b..43298f9810fb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -947,7 +947,7 @@ config UID16
947config SYSCTL_SYSCALL 947config SYSCTL_SYSCALL
948 bool "Sysctl syscall support" if EXPERT 948 bool "Sysctl syscall support" if EXPERT
949 depends on PROC_SYSCTL 949 depends on PROC_SYSCTL
950 default y 950 default n
951 select SYSCTL 951 select SYSCTL
952 ---help--- 952 ---help---
953 sys_sysctl uses binary paths that have been found challenging 953 sys_sysctl uses binary paths that have been found challenging
@@ -959,7 +959,7 @@ config SYSCTL_SYSCALL
959 trying to save some space it is probably safe to disable this, 959 trying to save some space it is probably safe to disable this,
960 making your kernel marginally smaller. 960 making your kernel marginally smaller.
961 961
962 If unsure say Y here. 962 If unsure say N here.
963 963
964config KALLSYMS 964config KALLSYMS
965 bool "Load all symbols for debugging/ksymoops" if EXPERT 965 bool "Load all symbols for debugging/ksymoops" if EXPERT
diff --git a/init/do_mounts.c b/init/do_mounts.c
index c0851a8e030c..0f6e1d985a3b 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -28,7 +28,7 @@ int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
28int root_mountflags = MS_RDONLY | MS_SILENT; 28int root_mountflags = MS_RDONLY | MS_SILENT;
29static char * __initdata root_device_name; 29static char * __initdata root_device_name;
30static char __initdata saved_root_name[64]; 30static char __initdata saved_root_name[64];
31static int __initdata root_wait; 31static int root_wait;
32 32
33dev_t ROOT_DEV; 33dev_t ROOT_DEV;
34 34
@@ -85,12 +85,15 @@ no_match:
85 85
86/** 86/**
87 * devt_from_partuuid - looks up the dev_t of a partition by its UUID 87 * devt_from_partuuid - looks up the dev_t of a partition by its UUID
88 * @uuid: 36 byte char array containing a hex ascii UUID 88 * @uuid: min 36 byte char array containing a hex ascii UUID
89 * 89 *
90 * The function will return the first partition which contains a matching 90 * The function will return the first partition which contains a matching
91 * UUID value in its partition_meta_info struct. This does not search 91 * UUID value in its partition_meta_info struct. This does not search
92 * by filesystem UUIDs. 92 * by filesystem UUIDs.
93 * 93 *
94 * If @uuid is followed by a "/PARTNROFF=%d", then the number will be
95 * extracted and used as an offset from the partition identified by the UUID.
96 *
94 * Returns the matching dev_t on success or 0 on failure. 97 * Returns the matching dev_t on success or 0 on failure.
95 */ 98 */
96static dev_t devt_from_partuuid(char *uuid_str) 99static dev_t devt_from_partuuid(char *uuid_str)
@@ -98,6 +101,28 @@ static dev_t devt_from_partuuid(char *uuid_str)
98 dev_t res = 0; 101 dev_t res = 0;
99 struct device *dev = NULL; 102 struct device *dev = NULL;
100 u8 uuid[16]; 103 u8 uuid[16];
104 struct gendisk *disk;
105 struct hd_struct *part;
106 int offset = 0;
107
108 if (strlen(uuid_str) < 36)
109 goto done;
110
111 /* Check for optional partition number offset attributes. */
112 if (uuid_str[36]) {
113 char c = 0;
114 /* Explicitly fail on poor PARTUUID syntax. */
115 if (sscanf(&uuid_str[36],
116 "/PARTNROFF=%d%c", &offset, &c) != 1) {
117 printk(KERN_ERR "VFS: PARTUUID= is invalid.\n"
118 "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
119 if (root_wait)
120 printk(KERN_ERR
121 "Disabling rootwait; root= is invalid.\n");
122 root_wait = 0;
123 goto done;
124 }
125 }
101 126
102 /* Pack the requested UUID in the expected format. */ 127 /* Pack the requested UUID in the expected format. */
103 part_pack_uuid(uuid_str, uuid); 128 part_pack_uuid(uuid_str, uuid);
@@ -107,8 +132,21 @@ static dev_t devt_from_partuuid(char *uuid_str)
107 goto done; 132 goto done;
108 133
109 res = dev->devt; 134 res = dev->devt;
110 put_device(dev);
111 135
136 /* Attempt to find the partition by offset. */
137 if (!offset)
138 goto no_offset;
139
140 res = 0;
141 disk = part_to_disk(dev_to_part(dev));
142 part = disk_get_part(disk, dev_to_part(dev)->partno + offset);
143 if (part) {
144 res = part_devt(part);
145 put_device(part_to_dev(part));
146 }
147
148no_offset:
149 put_device(dev);
112done: 150done:
113 return res; 151 return res;
114} 152}
@@ -126,6 +164,8 @@ done:
126 * used when disk name of partitioned disk ends on a digit. 164 * used when disk name of partitioned disk ends on a digit.
127 * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the 165 * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
128 * unique id of a partition if the partition table provides it. 166 * unique id of a partition if the partition table provides it.
167 * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
168 * a partition with a known unique id.
129 * 169 *
130 * If name doesn't have fall into the categories above, we return (0,0). 170 * If name doesn't have fall into the categories above, we return (0,0).
131 * block_class is used to check if something is a disk name. If the disk 171 * block_class is used to check if something is a disk name. If the disk
@@ -143,8 +183,6 @@ dev_t name_to_dev_t(char *name)
143#ifdef CONFIG_BLOCK 183#ifdef CONFIG_BLOCK
144 if (strncmp(name, "PARTUUID=", 9) == 0) { 184 if (strncmp(name, "PARTUUID=", 9) == 0) {
145 name += 9; 185 name += 9;
146 if (strlen(name) != 36)
147 goto fail;
148 res = devt_from_partuuid(name); 186 res = devt_from_partuuid(name);
149 if (!res) 187 if (!res)
150 goto fail; 188 goto fail;
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index fe9acb0ae480..887629e24c54 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -120,6 +120,20 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
120 } 120 }
121 121
122 /* 122 /*
123 * Read 512 bytes further to check if cramfs is padded
124 */
125 sys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0);
126 sys_read(fd, buf, size);
127
128 if (cramfsb->magic == CRAMFS_MAGIC) {
129 printk(KERN_NOTICE
130 "RAMDISK: cramfs filesystem found at block %d\n",
131 start_block);
132 nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
133 goto done;
134 }
135
136 /*
123 * Read block 1 to test for minix and ext2 superblock 137 * Read block 1 to test for minix and ext2 superblock
124 */ 138 */
125 sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); 139 sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0);
diff --git a/ipc/sem.c b/ipc/sem.c
index c8e00f8b4be1..5215a81420df 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -90,6 +90,52 @@
90#include <asm/uaccess.h> 90#include <asm/uaccess.h>
91#include "util.h" 91#include "util.h"
92 92
93/* One semaphore structure for each semaphore in the system. */
94struct sem {
95 int semval; /* current value */
96 int sempid; /* pid of last operation */
97 struct list_head sem_pending; /* pending single-sop operations */
98};
99
100/* One queue for each sleeping process in the system. */
101struct sem_queue {
102 struct list_head simple_list; /* queue of pending operations */
103 struct list_head list; /* queue of pending operations */
104 struct task_struct *sleeper; /* this process */
105 struct sem_undo *undo; /* undo structure */
106 int pid; /* process id of requesting process */
107 int status; /* completion status of operation */
108 struct sembuf *sops; /* array of pending operations */
109 int nsops; /* number of operations */
110 int alter; /* does *sops alter the array? */
111};
112
113/* Each task has a list of undo requests. They are executed automatically
114 * when the process exits.
115 */
116struct sem_undo {
117 struct list_head list_proc; /* per-process list: *
118 * all undos from one process
119 * rcu protected */
120 struct rcu_head rcu; /* rcu struct for sem_undo */
121 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */
122 struct list_head list_id; /* per semaphore array list:
123 * all undos for one array */
124 int semid; /* semaphore set identifier */
125 short *semadj; /* array of adjustments */
126 /* one per semaphore */
127};
128
129/* sem_undo_list controls shared access to the list of sem_undo structures
130 * that may be shared among all a CLONE_SYSVSEM task group.
131 */
132struct sem_undo_list {
133 atomic_t refcnt;
134 spinlock_t lock;
135 struct list_head list_proc;
136};
137
138
93#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 139#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
94 140
95#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 141#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
@@ -1426,6 +1472,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1426 1472
1427 queue.status = -EINTR; 1473 queue.status = -EINTR;
1428 queue.sleeper = current; 1474 queue.sleeper = current;
1475
1476sleep_again:
1429 current->state = TASK_INTERRUPTIBLE; 1477 current->state = TASK_INTERRUPTIBLE;
1430 sem_unlock(sma); 1478 sem_unlock(sma);
1431 1479
@@ -1460,7 +1508,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1460 * Array removed? If yes, leave without sem_unlock(). 1508 * Array removed? If yes, leave without sem_unlock().
1461 */ 1509 */
1462 if (IS_ERR(sma)) { 1510 if (IS_ERR(sma)) {
1463 error = -EIDRM;
1464 goto out_free; 1511 goto out_free;
1465 } 1512 }
1466 1513
@@ -1479,6 +1526,13 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1479 */ 1526 */
1480 if (timeout && jiffies_left == 0) 1527 if (timeout && jiffies_left == 0)
1481 error = -EAGAIN; 1528 error = -EAGAIN;
1529
1530 /*
1531 * If the wakeup was spurious, just retry
1532 */
1533 if (error == -EINTR && !signal_pending(current))
1534 goto sleep_again;
1535
1482 unlink_queue(sma, &queue); 1536 unlink_queue(sma, &queue);
1483 1537
1484out_unlock_free: 1538out_unlock_free:
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 453100a4159d..d9d5648f3cdc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2027,7 +2027,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2027 goto out_free_group_list; 2027 goto out_free_group_list;
2028 2028
2029 /* prevent changes to the threadgroup list while we take a snapshot. */ 2029 /* prevent changes to the threadgroup list while we take a snapshot. */
2030 rcu_read_lock(); 2030 read_lock(&tasklist_lock);
2031 if (!thread_group_leader(leader)) { 2031 if (!thread_group_leader(leader)) {
2032 /* 2032 /*
2033 * a race with de_thread from another thread's exec() may strip 2033 * a race with de_thread from another thread's exec() may strip
@@ -2036,7 +2036,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2036 * throw this task away and try again (from cgroup_procs_write); 2036 * throw this task away and try again (from cgroup_procs_write);
2037 * this is "double-double-toil-and-trouble-check locking". 2037 * this is "double-double-toil-and-trouble-check locking".
2038 */ 2038 */
2039 rcu_read_unlock(); 2039 read_unlock(&tasklist_lock);
2040 retval = -EAGAIN; 2040 retval = -EAGAIN;
2041 goto out_free_group_list; 2041 goto out_free_group_list;
2042 } 2042 }
@@ -2057,7 +2057,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2057 } while_each_thread(leader, tsk); 2057 } while_each_thread(leader, tsk);
2058 /* remember the number of threads in the array for later. */ 2058 /* remember the number of threads in the array for later. */
2059 group_size = i; 2059 group_size = i;
2060 rcu_read_unlock(); 2060 read_unlock(&tasklist_lock);
2061 2061
2062 /* 2062 /*
2063 * step 1: check that we can legitimately attach to the cgroup. 2063 * step 1: check that we can legitimately attach to the cgroup.
@@ -2135,14 +2135,17 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2135 oldcgrp = task_cgroup_from_root(tsk, root); 2135 oldcgrp = task_cgroup_from_root(tsk, root);
2136 if (cgrp == oldcgrp) 2136 if (cgrp == oldcgrp)
2137 continue; 2137 continue;
2138 /* attach each task to each subsystem */
2139 for_each_subsys(root, ss) {
2140 if (ss->attach_task)
2141 ss->attach_task(cgrp, tsk);
2142 }
2143 /* if the thread is PF_EXITING, it can just get skipped. */ 2138 /* if the thread is PF_EXITING, it can just get skipped. */
2144 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); 2139 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2145 BUG_ON(retval != 0 && retval != -ESRCH); 2140 if (retval == 0) {
2141 /* attach each task to each subsystem */
2142 for_each_subsys(root, ss) {
2143 if (ss->attach_task)
2144 ss->attach_task(cgrp, tsk);
2145 }
2146 } else {
2147 BUG_ON(retval != -ESRCH);
2148 }
2146 } 2149 }
2147 /* nothing is sensitive to fork() after this point. */ 2150 /* nothing is sensitive to fork() after this point. */
2148 2151
@@ -4880,9 +4883,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4880 4883
4881 rcu_assign_pointer(id->css, NULL); 4884 rcu_assign_pointer(id->css, NULL);
4882 rcu_assign_pointer(css->id, NULL); 4885 rcu_assign_pointer(css->id, NULL);
4883 spin_lock(&ss->id_lock); 4886 write_lock(&ss->id_lock);
4884 idr_remove(&ss->idr, id->id); 4887 idr_remove(&ss->idr, id->id);
4885 spin_unlock(&ss->id_lock); 4888 write_unlock(&ss->id_lock);
4886 kfree_rcu(id, rcu_head); 4889 kfree_rcu(id, rcu_head);
4887} 4890}
4888EXPORT_SYMBOL_GPL(free_css_id); 4891EXPORT_SYMBOL_GPL(free_css_id);
@@ -4908,10 +4911,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4908 error = -ENOMEM; 4911 error = -ENOMEM;
4909 goto err_out; 4912 goto err_out;
4910 } 4913 }
4911 spin_lock(&ss->id_lock); 4914 write_lock(&ss->id_lock);
4912 /* Don't use 0. allocates an ID of 1-65535 */ 4915 /* Don't use 0. allocates an ID of 1-65535 */
4913 error = idr_get_new_above(&ss->idr, newid, 1, &myid); 4916 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4914 spin_unlock(&ss->id_lock); 4917 write_unlock(&ss->id_lock);
4915 4918
4916 /* Returns error when there are no free spaces for new ID.*/ 4919 /* Returns error when there are no free spaces for new ID.*/
4917 if (error) { 4920 if (error) {
@@ -4926,9 +4929,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4926 return newid; 4929 return newid;
4927remove_idr: 4930remove_idr:
4928 error = -ENOSPC; 4931 error = -ENOSPC;
4929 spin_lock(&ss->id_lock); 4932 write_lock(&ss->id_lock);
4930 idr_remove(&ss->idr, myid); 4933 idr_remove(&ss->idr, myid);
4931 spin_unlock(&ss->id_lock); 4934 write_unlock(&ss->id_lock);
4932err_out: 4935err_out:
4933 kfree(newid); 4936 kfree(newid);
4934 return ERR_PTR(error); 4937 return ERR_PTR(error);
@@ -4940,7 +4943,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4940{ 4943{
4941 struct css_id *newid; 4944 struct css_id *newid;
4942 4945
4943 spin_lock_init(&ss->id_lock); 4946 rwlock_init(&ss->id_lock);
4944 idr_init(&ss->idr); 4947 idr_init(&ss->idr);
4945 4948
4946 newid = get_new_cssid(ss, 0); 4949 newid = get_new_cssid(ss, 0);
@@ -5035,9 +5038,9 @@ css_get_next(struct cgroup_subsys *ss, int id,
5035 * scan next entry from bitmap(tree), tmpid is updated after 5038 * scan next entry from bitmap(tree), tmpid is updated after
5036 * idr_get_next(). 5039 * idr_get_next().
5037 */ 5040 */
5038 spin_lock(&ss->id_lock); 5041 read_lock(&ss->id_lock);
5039 tmp = idr_get_next(&ss->idr, &tmpid); 5042 tmp = idr_get_next(&ss->idr, &tmpid);
5040 spin_unlock(&ss->id_lock); 5043 read_unlock(&ss->id_lock);
5041 5044
5042 if (!tmp) 5045 if (!tmp)
5043 break; 5046 break;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 10131fdaff70..ed0ff443f036 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -949,6 +949,8 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
949static void cpuset_change_task_nodemask(struct task_struct *tsk, 949static void cpuset_change_task_nodemask(struct task_struct *tsk,
950 nodemask_t *newmems) 950 nodemask_t *newmems)
951{ 951{
952 bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed);
953
952repeat: 954repeat:
953 /* 955 /*
954 * Allow tasks that have access to memory reserves because they have 956 * Allow tasks that have access to memory reserves because they have
@@ -963,7 +965,6 @@ repeat:
963 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); 965 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
964 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); 966 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
965 967
966
967 /* 968 /*
968 * ensure checking ->mems_allowed_change_disable after setting all new 969 * ensure checking ->mems_allowed_change_disable after setting all new
969 * allowed nodes. 970 * allowed nodes.
@@ -980,9 +981,11 @@ repeat:
980 981
981 /* 982 /*
982 * Allocation of memory is very fast, we needn't sleep when waiting 983 * Allocation of memory is very fast, we needn't sleep when waiting
983 * for the read-side. 984 * for the read-side. No wait is necessary, however, if at least one
985 * node remains unchanged.
984 */ 986 */
985 while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) { 987 while (masks_disjoint &&
988 ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
986 task_unlock(tsk); 989 task_unlock(tsk);
987 if (!task_curr(tsk)) 990 if (!task_curr(tsk))
988 yield(); 991 yield();
diff --git a/kernel/sys.c b/kernel/sys.c
index 58459509b14c..d06c091e0345 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1286,6 +1286,7 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1286 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1286 memset(u->nodename + len, 0, sizeof(u->nodename) - len);
1287 errno = 0; 1287 errno = 0;
1288 } 1288 }
1289 uts_proc_notify(UTS_PROC_HOSTNAME);
1289 up_write(&uts_sem); 1290 up_write(&uts_sem);
1290 return errno; 1291 return errno;
1291} 1292}
@@ -1336,6 +1337,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1336 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1337 memset(u->domainname + len, 0, sizeof(u->domainname) - len);
1337 errno = 0; 1338 errno = 0;
1338 } 1339 }
1340 uts_proc_notify(UTS_PROC_DOMAINNAME);
1339 up_write(&uts_sem); 1341 up_write(&uts_sem);
1340 return errno; 1342 return errno;
1341} 1343}
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index a2cd77e70d4d..3b0d48ebf81d 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -13,6 +13,7 @@
13#include <linux/uts.h> 13#include <linux/uts.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/sysctl.h> 15#include <linux/sysctl.h>
16#include <linux/wait.h>
16 17
17static void *get_uts(ctl_table *table, int write) 18static void *get_uts(ctl_table *table, int write)
18{ 19{
@@ -51,12 +52,19 @@ static int proc_do_uts_string(ctl_table *table, int write,
51 uts_table.data = get_uts(table, write); 52 uts_table.data = get_uts(table, write);
52 r = proc_dostring(&uts_table,write,buffer,lenp, ppos); 53 r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
53 put_uts(table, write, uts_table.data); 54 put_uts(table, write, uts_table.data);
55
56 if (write)
57 proc_sys_poll_notify(table->poll);
58
54 return r; 59 return r;
55} 60}
56#else 61#else
57#define proc_do_uts_string NULL 62#define proc_do_uts_string NULL
58#endif 63#endif
59 64
65static DEFINE_CTL_TABLE_POLL(hostname_poll);
66static DEFINE_CTL_TABLE_POLL(domainname_poll);
67
60static struct ctl_table uts_kern_table[] = { 68static struct ctl_table uts_kern_table[] = {
61 { 69 {
62 .procname = "ostype", 70 .procname = "ostype",
@@ -85,6 +93,7 @@ static struct ctl_table uts_kern_table[] = {
85 .maxlen = sizeof(init_uts_ns.name.nodename), 93 .maxlen = sizeof(init_uts_ns.name.nodename),
86 .mode = 0644, 94 .mode = 0644,
87 .proc_handler = proc_do_uts_string, 95 .proc_handler = proc_do_uts_string,
96 .poll = &hostname_poll,
88 }, 97 },
89 { 98 {
90 .procname = "domainname", 99 .procname = "domainname",
@@ -92,6 +101,7 @@ static struct ctl_table uts_kern_table[] = {
92 .maxlen = sizeof(init_uts_ns.name.domainname), 101 .maxlen = sizeof(init_uts_ns.name.domainname),
93 .mode = 0644, 102 .mode = 0644,
94 .proc_handler = proc_do_uts_string, 103 .proc_handler = proc_do_uts_string,
104 .poll = &domainname_poll,
95 }, 105 },
96 {} 106 {}
97}; 107};
@@ -105,6 +115,19 @@ static struct ctl_table uts_root_table[] = {
105 {} 115 {}
106}; 116};
107 117
118#ifdef CONFIG_PROC_SYSCTL
119/*
120 * Notify userspace about a change in a certain entry of uts_kern_table,
121 * identified by the parameter proc.
122 */
123void uts_proc_notify(enum uts_proc proc)
124{
125 struct ctl_table *table = &uts_kern_table[proc];
126
127 proc_sys_poll_notify(table->poll);
128}
129#endif
130
108static int __init utsname_sysctl_init(void) 131static int __init utsname_sysctl_init(void)
109{ 132{
110 register_sysctl_table(uts_root_table); 133 register_sysctl_table(uts_root_table);
diff --git a/lib/idr.c b/lib/idr.c
index bbf211aea4eb..ed055b297c81 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -944,6 +944,7 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
944{ 944{
945 int ret, id; 945 int ret, id;
946 unsigned int max; 946 unsigned int max;
947 unsigned long flags;
947 948
948 BUG_ON((int)start < 0); 949 BUG_ON((int)start < 0);
949 BUG_ON((int)end < 0); 950 BUG_ON((int)end < 0);
@@ -959,7 +960,7 @@ again:
959 if (!ida_pre_get(ida, gfp_mask)) 960 if (!ida_pre_get(ida, gfp_mask))
960 return -ENOMEM; 961 return -ENOMEM;
961 962
962 spin_lock(&simple_ida_lock); 963 spin_lock_irqsave(&simple_ida_lock, flags);
963 ret = ida_get_new_above(ida, start, &id); 964 ret = ida_get_new_above(ida, start, &id);
964 if (!ret) { 965 if (!ret) {
965 if (id > max) { 966 if (id > max) {
@@ -969,7 +970,7 @@ again:
969 ret = id; 970 ret = id;
970 } 971 }
971 } 972 }
972 spin_unlock(&simple_ida_lock); 973 spin_unlock_irqrestore(&simple_ida_lock, flags);
973 974
974 if (unlikely(ret == -EAGAIN)) 975 if (unlikely(ret == -EAGAIN))
975 goto again; 976 goto again;
@@ -985,10 +986,12 @@ EXPORT_SYMBOL(ida_simple_get);
985 */ 986 */
986void ida_simple_remove(struct ida *ida, unsigned int id) 987void ida_simple_remove(struct ida *ida, unsigned int id)
987{ 988{
989 unsigned long flags;
990
988 BUG_ON((int)id < 0); 991 BUG_ON((int)id < 0);
989 spin_lock(&simple_ida_lock); 992 spin_lock_irqsave(&simple_ida_lock, flags);
990 ida_remove(ida, id); 993 ida_remove(ida, id);
991 spin_unlock(&simple_ida_lock); 994 spin_unlock_irqrestore(&simple_ida_lock, flags);
992} 995}
993EXPORT_SYMBOL(ida_simple_remove); 996EXPORT_SYMBOL(ida_simple_remove);
994 997
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 860ec211ddd6..4298abaae153 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -990,7 +990,7 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm,
990 page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; 990 page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
991 VM_BUG_ON(!PageCompound(page)); 991 VM_BUG_ON(!PageCompound(page));
992 if (flags & FOLL_GET) 992 if (flags & FOLL_GET)
993 get_page(page); 993 get_page_foll(page);
994 994
995out: 995out:
996 return page; 996 return page;
@@ -1202,6 +1202,7 @@ static void __split_huge_page_refcount(struct page *page)
1202 unsigned long head_index = page->index; 1202 unsigned long head_index = page->index;
1203 struct zone *zone = page_zone(page); 1203 struct zone *zone = page_zone(page);
1204 int zonestat; 1204 int zonestat;
1205 int tail_count = 0;
1205 1206
1206 /* prevent PageLRU to go away from under us, and freeze lru stats */ 1207 /* prevent PageLRU to go away from under us, and freeze lru stats */
1207 spin_lock_irq(&zone->lru_lock); 1208 spin_lock_irq(&zone->lru_lock);
@@ -1210,11 +1211,27 @@ static void __split_huge_page_refcount(struct page *page)
1210 for (i = 1; i < HPAGE_PMD_NR; i++) { 1211 for (i = 1; i < HPAGE_PMD_NR; i++) {
1211 struct page *page_tail = page + i; 1212 struct page *page_tail = page + i;
1212 1213
1213 /* tail_page->_count cannot change */ 1214 /* tail_page->_mapcount cannot change */
1214 atomic_sub(atomic_read(&page_tail->_count), &page->_count); 1215 BUG_ON(page_mapcount(page_tail) < 0);
1215 BUG_ON(page_count(page) <= 0); 1216 tail_count += page_mapcount(page_tail);
1216 atomic_add(page_mapcount(page) + 1, &page_tail->_count); 1217 /* check for overflow */
1217 BUG_ON(atomic_read(&page_tail->_count) <= 0); 1218 BUG_ON(tail_count < 0);
1219 BUG_ON(atomic_read(&page_tail->_count) != 0);
1220 /*
1221 * tail_page->_count is zero and not changing from
1222 * under us. But get_page_unless_zero() may be running
1223 * from under us on the tail_page. If we used
1224 * atomic_set() below instead of atomic_add(), we
1225 * would then run atomic_set() concurrently with
1226 * get_page_unless_zero(), and atomic_set() is
1227 * implemented in C not using locked ops. spin_unlock
1228 * on x86 sometime uses locked ops because of PPro
1229 * errata 66, 92, so unless somebody can guarantee
1230 * atomic_set() here would be safe on all archs (and
1231 * not only on x86), it's safer to use atomic_add().
1232 */
1233 atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1,
1234 &page_tail->_count);
1218 1235
1219 /* after clearing PageTail the gup refcount can be released */ 1236 /* after clearing PageTail the gup refcount can be released */
1220 smp_mb(); 1237 smp_mb();
@@ -1232,10 +1249,7 @@ static void __split_huge_page_refcount(struct page *page)
1232 (1L << PG_uptodate))); 1249 (1L << PG_uptodate)));
1233 page_tail->flags |= (1L << PG_dirty); 1250 page_tail->flags |= (1L << PG_dirty);
1234 1251
1235 /* 1252 /* clear PageTail before overwriting first_page */
1236 * 1) clear PageTail before overwriting first_page
1237 * 2) clear PageTail before clearing PageHead for VM_BUG_ON
1238 */
1239 smp_wmb(); 1253 smp_wmb();
1240 1254
1241 /* 1255 /*
@@ -1252,7 +1266,6 @@ static void __split_huge_page_refcount(struct page *page)
1252 * status is achieved setting a reserved bit in the 1266 * status is achieved setting a reserved bit in the
1253 * pmd, not by clearing the present bit. 1267 * pmd, not by clearing the present bit.
1254 */ 1268 */
1255 BUG_ON(page_mapcount(page_tail));
1256 page_tail->_mapcount = page->_mapcount; 1269 page_tail->_mapcount = page->_mapcount;
1257 1270
1258 BUG_ON(page_tail->mapping); 1271 BUG_ON(page_tail->mapping);
@@ -1269,6 +1282,8 @@ static void __split_huge_page_refcount(struct page *page)
1269 1282
1270 lru_add_page_tail(zone, page, page_tail); 1283 lru_add_page_tail(zone, page, page_tail);
1271 } 1284 }
1285 atomic_sub(tail_count, &page->_count);
1286 BUG_ON(atomic_read(&page->_count) <= 0);
1272 1287
1273 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); 1288 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1274 __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); 1289 __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
diff --git a/mm/internal.h b/mm/internal.h
index d071d380fb49..2189af491783 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -37,6 +37,52 @@ static inline void __put_page(struct page *page)
37 atomic_dec(&page->_count); 37 atomic_dec(&page->_count);
38} 38}
39 39
40static inline void __get_page_tail_foll(struct page *page,
41 bool get_page_head)
42{
43 /*
44 * If we're getting a tail page, the elevated page->_count is
45 * required only in the head page and we will elevate the head
46 * page->_count and tail page->_mapcount.
47 *
48 * We elevate page_tail->_mapcount for tail pages to force
49 * page_tail->_count to be zero at all times to avoid getting
50 * false positives from get_page_unless_zero() with
51 * speculative page access (like in
52 * page_cache_get_speculative()) on tail pages.
53 */
54 VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
55 VM_BUG_ON(atomic_read(&page->_count) != 0);
56 VM_BUG_ON(page_mapcount(page) < 0);
57 if (get_page_head)
58 atomic_inc(&page->first_page->_count);
59 atomic_inc(&page->_mapcount);
60}
61
62/*
63 * This is meant to be called as the FOLL_GET operation of
64 * follow_page() and it must be called while holding the proper PT
65 * lock while the pte (or pmd_trans_huge) is still mapping the page.
66 */
67static inline void get_page_foll(struct page *page)
68{
69 if (unlikely(PageTail(page)))
70 /*
71 * This is safe only because
72 * __split_huge_page_refcount() can't run under
73 * get_page_foll() because we hold the proper PT lock.
74 */
75 __get_page_tail_foll(page, true);
76 else {
77 /*
78 * Getting a normal page or the head of a compound page
79 * requires to already have an elevated page->_count.
80 */
81 VM_BUG_ON(atomic_read(&page->_count) <= 0);
82 atomic_inc(&page->_count);
83 }
84}
85
40extern unsigned long highest_memmap_pfn; 86extern unsigned long highest_memmap_pfn;
41 87
42/* 88/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2d5755544afe..7af1d5ee1598 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -201,8 +201,8 @@ struct mem_cgroup_eventfd_list {
201 struct eventfd_ctx *eventfd; 201 struct eventfd_ctx *eventfd;
202}; 202};
203 203
204static void mem_cgroup_threshold(struct mem_cgroup *mem); 204static void mem_cgroup_threshold(struct mem_cgroup *memcg);
205static void mem_cgroup_oom_notify(struct mem_cgroup *mem); 205static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
206 206
207/* 207/*
208 * The memory controller data structure. The memory controller controls both 208 * The memory controller data structure. The memory controller controls both
@@ -362,29 +362,29 @@ enum charge_type {
362#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 362#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
363#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) 363#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
364 364
365static void mem_cgroup_get(struct mem_cgroup *mem); 365static void mem_cgroup_get(struct mem_cgroup *memcg);
366static void mem_cgroup_put(struct mem_cgroup *mem); 366static void mem_cgroup_put(struct mem_cgroup *memcg);
367static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); 367static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
368static void drain_all_stock_async(struct mem_cgroup *mem); 368static void drain_all_stock_async(struct mem_cgroup *memcg);
369 369
370static struct mem_cgroup_per_zone * 370static struct mem_cgroup_per_zone *
371mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) 371mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid)
372{ 372{
373 return &mem->info.nodeinfo[nid]->zoneinfo[zid]; 373 return &memcg->info.nodeinfo[nid]->zoneinfo[zid];
374} 374}
375 375
376struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) 376struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
377{ 377{
378 return &mem->css; 378 return &memcg->css;
379} 379}
380 380
381static struct mem_cgroup_per_zone * 381static struct mem_cgroup_per_zone *
382page_cgroup_zoneinfo(struct mem_cgroup *mem, struct page *page) 382page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
383{ 383{
384 int nid = page_to_nid(page); 384 int nid = page_to_nid(page);
385 int zid = page_zonenum(page); 385 int zid = page_zonenum(page);
386 386
387 return mem_cgroup_zoneinfo(mem, nid, zid); 387 return mem_cgroup_zoneinfo(memcg, nid, zid);
388} 388}
389 389
390static struct mem_cgroup_tree_per_zone * 390static struct mem_cgroup_tree_per_zone *
@@ -403,7 +403,7 @@ soft_limit_tree_from_page(struct page *page)
403} 403}
404 404
405static void 405static void
406__mem_cgroup_insert_exceeded(struct mem_cgroup *mem, 406__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
407 struct mem_cgroup_per_zone *mz, 407 struct mem_cgroup_per_zone *mz,
408 struct mem_cgroup_tree_per_zone *mctz, 408 struct mem_cgroup_tree_per_zone *mctz,
409 unsigned long long new_usage_in_excess) 409 unsigned long long new_usage_in_excess)
@@ -437,7 +437,7 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
437} 437}
438 438
439static void 439static void
440__mem_cgroup_remove_exceeded(struct mem_cgroup *mem, 440__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
441 struct mem_cgroup_per_zone *mz, 441 struct mem_cgroup_per_zone *mz,
442 struct mem_cgroup_tree_per_zone *mctz) 442 struct mem_cgroup_tree_per_zone *mctz)
443{ 443{
@@ -448,17 +448,17 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
448} 448}
449 449
450static void 450static void
451mem_cgroup_remove_exceeded(struct mem_cgroup *mem, 451mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
452 struct mem_cgroup_per_zone *mz, 452 struct mem_cgroup_per_zone *mz,
453 struct mem_cgroup_tree_per_zone *mctz) 453 struct mem_cgroup_tree_per_zone *mctz)
454{ 454{
455 spin_lock(&mctz->lock); 455 spin_lock(&mctz->lock);
456 __mem_cgroup_remove_exceeded(mem, mz, mctz); 456 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
457 spin_unlock(&mctz->lock); 457 spin_unlock(&mctz->lock);
458} 458}
459 459
460 460
461static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) 461static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
462{ 462{
463 unsigned long long excess; 463 unsigned long long excess;
464 struct mem_cgroup_per_zone *mz; 464 struct mem_cgroup_per_zone *mz;
@@ -471,9 +471,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
471 * Necessary to update all ancestors when hierarchy is used. 471 * Necessary to update all ancestors when hierarchy is used.
472 * because their event counter is not touched. 472 * because their event counter is not touched.
473 */ 473 */
474 for (; mem; mem = parent_mem_cgroup(mem)) { 474 for (; memcg; memcg = parent_mem_cgroup(memcg)) {
475 mz = mem_cgroup_zoneinfo(mem, nid, zid); 475 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
476 excess = res_counter_soft_limit_excess(&mem->res); 476 excess = res_counter_soft_limit_excess(&memcg->res);
477 /* 477 /*
478 * We have to update the tree if mz is on RB-tree or 478 * We have to update the tree if mz is on RB-tree or
479 * mem is over its softlimit. 479 * mem is over its softlimit.
@@ -482,18 +482,18 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
482 spin_lock(&mctz->lock); 482 spin_lock(&mctz->lock);
483 /* if on-tree, remove it */ 483 /* if on-tree, remove it */
484 if (mz->on_tree) 484 if (mz->on_tree)
485 __mem_cgroup_remove_exceeded(mem, mz, mctz); 485 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
486 /* 486 /*
487 * Insert again. mz->usage_in_excess will be updated. 487 * Insert again. mz->usage_in_excess will be updated.
488 * If excess is 0, no tree ops. 488 * If excess is 0, no tree ops.
489 */ 489 */
490 __mem_cgroup_insert_exceeded(mem, mz, mctz, excess); 490 __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
491 spin_unlock(&mctz->lock); 491 spin_unlock(&mctz->lock);
492 } 492 }
493 } 493 }
494} 494}
495 495
496static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) 496static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
497{ 497{
498 int node, zone; 498 int node, zone;
499 struct mem_cgroup_per_zone *mz; 499 struct mem_cgroup_per_zone *mz;
@@ -501,9 +501,9 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
501 501
502 for_each_node_state(node, N_POSSIBLE) { 502 for_each_node_state(node, N_POSSIBLE) {
503 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 503 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
504 mz = mem_cgroup_zoneinfo(mem, node, zone); 504 mz = mem_cgroup_zoneinfo(memcg, node, zone);
505 mctz = soft_limit_tree_node_zone(node, zone); 505 mctz = soft_limit_tree_node_zone(node, zone);
506 mem_cgroup_remove_exceeded(mem, mz, mctz); 506 mem_cgroup_remove_exceeded(memcg, mz, mctz);
507 } 507 }
508 } 508 }
509} 509}
@@ -564,7 +564,7 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
564 * common workload, threashold and synchonization as vmstat[] should be 564 * common workload, threashold and synchonization as vmstat[] should be
565 * implemented. 565 * implemented.
566 */ 566 */
567static long mem_cgroup_read_stat(struct mem_cgroup *mem, 567static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
568 enum mem_cgroup_stat_index idx) 568 enum mem_cgroup_stat_index idx)
569{ 569{
570 long val = 0; 570 long val = 0;
@@ -572,81 +572,83 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem,
572 572
573 get_online_cpus(); 573 get_online_cpus();
574 for_each_online_cpu(cpu) 574 for_each_online_cpu(cpu)
575 val += per_cpu(mem->stat->count[idx], cpu); 575 val += per_cpu(memcg->stat->count[idx], cpu);
576#ifdef CONFIG_HOTPLUG_CPU 576#ifdef CONFIG_HOTPLUG_CPU
577 spin_lock(&mem->pcp_counter_lock); 577 spin_lock(&memcg->pcp_counter_lock);
578 val += mem->nocpu_base.count[idx]; 578 val += memcg->nocpu_base.count[idx];
579 spin_unlock(&mem->pcp_counter_lock); 579 spin_unlock(&memcg->pcp_counter_lock);
580#endif 580#endif
581 put_online_cpus(); 581 put_online_cpus();
582 return val; 582 return val;
583} 583}
584 584
585static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, 585static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
586 bool charge) 586 bool charge)
587{ 587{
588 int val = (charge) ? 1 : -1; 588 int val = (charge) ? 1 : -1;
589 this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); 589 this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
590} 590}
591 591
592void mem_cgroup_pgfault(struct mem_cgroup *mem, int val) 592void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val)
593{ 593{
594 this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val); 594 this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
595} 595}
596 596
597void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val) 597void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val)
598{ 598{
599 this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val); 599 this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
600} 600}
601 601
602static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem, 602static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
603 enum mem_cgroup_events_index idx) 603 enum mem_cgroup_events_index idx)
604{ 604{
605 unsigned long val = 0; 605 unsigned long val = 0;
606 int cpu; 606 int cpu;
607 607
608 for_each_online_cpu(cpu) 608 for_each_online_cpu(cpu)
609 val += per_cpu(mem->stat->events[idx], cpu); 609 val += per_cpu(memcg->stat->events[idx], cpu);
610#ifdef CONFIG_HOTPLUG_CPU 610#ifdef CONFIG_HOTPLUG_CPU
611 spin_lock(&mem->pcp_counter_lock); 611 spin_lock(&memcg->pcp_counter_lock);
612 val += mem->nocpu_base.events[idx]; 612 val += memcg->nocpu_base.events[idx];
613 spin_unlock(&mem->pcp_counter_lock); 613 spin_unlock(&memcg->pcp_counter_lock);
614#endif 614#endif
615 return val; 615 return val;
616} 616}
617 617
618static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 618static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
619 bool file, int nr_pages) 619 bool file, int nr_pages)
620{ 620{
621 preempt_disable(); 621 preempt_disable();
622 622
623 if (file) 623 if (file)
624 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); 624 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE],
625 nr_pages);
625 else 626 else
626 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); 627 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
628 nr_pages);
627 629
628 /* pagein of a big page is an event. So, ignore page size */ 630 /* pagein of a big page is an event. So, ignore page size */
629 if (nr_pages > 0) 631 if (nr_pages > 0)
630 __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); 632 __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
631 else { 633 else {
632 __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); 634 __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
633 nr_pages = -nr_pages; /* for event */ 635 nr_pages = -nr_pages; /* for event */
634 } 636 }
635 637
636 __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); 638 __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
637 639
638 preempt_enable(); 640 preempt_enable();
639} 641}
640 642
641unsigned long 643unsigned long
642mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid, 644mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
643 unsigned int lru_mask) 645 unsigned int lru_mask)
644{ 646{
645 struct mem_cgroup_per_zone *mz; 647 struct mem_cgroup_per_zone *mz;
646 enum lru_list l; 648 enum lru_list l;
647 unsigned long ret = 0; 649 unsigned long ret = 0;
648 650
649 mz = mem_cgroup_zoneinfo(mem, nid, zid); 651 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
650 652
651 for_each_lru(l) { 653 for_each_lru(l) {
652 if (BIT(l) & lru_mask) 654 if (BIT(l) & lru_mask)
@@ -656,44 +658,45 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid,
656} 658}
657 659
658static unsigned long 660static unsigned long
659mem_cgroup_node_nr_lru_pages(struct mem_cgroup *mem, 661mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
660 int nid, unsigned int lru_mask) 662 int nid, unsigned int lru_mask)
661{ 663{
662 u64 total = 0; 664 u64 total = 0;
663 int zid; 665 int zid;
664 666
665 for (zid = 0; zid < MAX_NR_ZONES; zid++) 667 for (zid = 0; zid < MAX_NR_ZONES; zid++)
666 total += mem_cgroup_zone_nr_lru_pages(mem, nid, zid, lru_mask); 668 total += mem_cgroup_zone_nr_lru_pages(memcg,
669 nid, zid, lru_mask);
667 670
668 return total; 671 return total;
669} 672}
670 673
671static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *mem, 674static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
672 unsigned int lru_mask) 675 unsigned int lru_mask)
673{ 676{
674 int nid; 677 int nid;
675 u64 total = 0; 678 u64 total = 0;
676 679
677 for_each_node_state(nid, N_HIGH_MEMORY) 680 for_each_node_state(nid, N_HIGH_MEMORY)
678 total += mem_cgroup_node_nr_lru_pages(mem, nid, lru_mask); 681 total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
679 return total; 682 return total;
680} 683}
681 684
682static bool __memcg_event_check(struct mem_cgroup *mem, int target) 685static bool __memcg_event_check(struct mem_cgroup *memcg, int target)
683{ 686{
684 unsigned long val, next; 687 unsigned long val, next;
685 688
686 val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); 689 val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
687 next = this_cpu_read(mem->stat->targets[target]); 690 next = __this_cpu_read(memcg->stat->targets[target]);
688 /* from time_after() in jiffies.h */ 691 /* from time_after() in jiffies.h */
689 return ((long)next - (long)val < 0); 692 return ((long)next - (long)val < 0);
690} 693}
691 694
692static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target) 695static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
693{ 696{
694 unsigned long val, next; 697 unsigned long val, next;
695 698
696 val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); 699 val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
697 700
698 switch (target) { 701 switch (target) {
699 case MEM_CGROUP_TARGET_THRESH: 702 case MEM_CGROUP_TARGET_THRESH:
@@ -709,34 +712,36 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
709 return; 712 return;
710 } 713 }
711 714
712 this_cpu_write(mem->stat->targets[target], next); 715 __this_cpu_write(memcg->stat->targets[target], next);
713} 716}
714 717
715/* 718/*
716 * Check events in order. 719 * Check events in order.
717 * 720 *
718 */ 721 */
719static void memcg_check_events(struct mem_cgroup *mem, struct page *page) 722static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
720{ 723{
724 preempt_disable();
721 /* threshold event is triggered in finer grain than soft limit */ 725 /* threshold event is triggered in finer grain than soft limit */
722 if (unlikely(__memcg_event_check(mem, MEM_CGROUP_TARGET_THRESH))) { 726 if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
723 mem_cgroup_threshold(mem); 727 mem_cgroup_threshold(memcg);
724 __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); 728 __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH);
725 if (unlikely(__memcg_event_check(mem, 729 if (unlikely(__memcg_event_check(memcg,
726 MEM_CGROUP_TARGET_SOFTLIMIT))) { 730 MEM_CGROUP_TARGET_SOFTLIMIT))) {
727 mem_cgroup_update_tree(mem, page); 731 mem_cgroup_update_tree(memcg, page);
728 __mem_cgroup_target_update(mem, 732 __mem_cgroup_target_update(memcg,
729 MEM_CGROUP_TARGET_SOFTLIMIT); 733 MEM_CGROUP_TARGET_SOFTLIMIT);
730 } 734 }
731#if MAX_NUMNODES > 1 735#if MAX_NUMNODES > 1
732 if (unlikely(__memcg_event_check(mem, 736 if (unlikely(__memcg_event_check(memcg,
733 MEM_CGROUP_TARGET_NUMAINFO))) { 737 MEM_CGROUP_TARGET_NUMAINFO))) {
734 atomic_inc(&mem->numainfo_events); 738 atomic_inc(&memcg->numainfo_events);
735 __mem_cgroup_target_update(mem, 739 __mem_cgroup_target_update(memcg,
736 MEM_CGROUP_TARGET_NUMAINFO); 740 MEM_CGROUP_TARGET_NUMAINFO);
737 } 741 }
738#endif 742#endif
739 } 743 }
744 preempt_enable();
740} 745}
741 746
742static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) 747static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
@@ -762,7 +767,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
762 767
763struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) 768struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
764{ 769{
765 struct mem_cgroup *mem = NULL; 770 struct mem_cgroup *memcg = NULL;
766 771
767 if (!mm) 772 if (!mm)
768 return NULL; 773 return NULL;
@@ -773,25 +778,25 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
773 */ 778 */
774 rcu_read_lock(); 779 rcu_read_lock();
775 do { 780 do {
776 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 781 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
777 if (unlikely(!mem)) 782 if (unlikely(!memcg))
778 break; 783 break;
779 } while (!css_tryget(&mem->css)); 784 } while (!css_tryget(&memcg->css));
780 rcu_read_unlock(); 785 rcu_read_unlock();
781 return mem; 786 return memcg;
782} 787}
783 788
784/* The caller has to guarantee "mem" exists before calling this */ 789/* The caller has to guarantee "mem" exists before calling this */
785static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) 790static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg)
786{ 791{
787 struct cgroup_subsys_state *css; 792 struct cgroup_subsys_state *css;
788 int found; 793 int found;
789 794
790 if (!mem) /* ROOT cgroup has the smallest ID */ 795 if (!memcg) /* ROOT cgroup has the smallest ID */
791 return root_mem_cgroup; /*css_put/get against root is ignored*/ 796 return root_mem_cgroup; /*css_put/get against root is ignored*/
792 if (!mem->use_hierarchy) { 797 if (!memcg->use_hierarchy) {
793 if (css_tryget(&mem->css)) 798 if (css_tryget(&memcg->css))
794 return mem; 799 return memcg;
795 return NULL; 800 return NULL;
796 } 801 }
797 rcu_read_lock(); 802 rcu_read_lock();
@@ -799,13 +804,13 @@ static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
799 * searching a memory cgroup which has the smallest ID under given 804 * searching a memory cgroup which has the smallest ID under given
800 * ROOT cgroup. (ID >= 1) 805 * ROOT cgroup. (ID >= 1)
801 */ 806 */
802 css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found); 807 css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found);
803 if (css && css_tryget(css)) 808 if (css && css_tryget(css))
804 mem = container_of(css, struct mem_cgroup, css); 809 memcg = container_of(css, struct mem_cgroup, css);
805 else 810 else
806 mem = NULL; 811 memcg = NULL;
807 rcu_read_unlock(); 812 rcu_read_unlock();
808 return mem; 813 return memcg;
809} 814}
810 815
811static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, 816static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
@@ -859,29 +864,29 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
859 for_each_mem_cgroup_tree_cond(iter, NULL, true) 864 for_each_mem_cgroup_tree_cond(iter, NULL, true)
860 865
861 866
862static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) 867static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
863{ 868{
864 return (mem == root_mem_cgroup); 869 return (memcg == root_mem_cgroup);
865} 870}
866 871
867void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) 872void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
868{ 873{
869 struct mem_cgroup *mem; 874 struct mem_cgroup *memcg;
870 875
871 if (!mm) 876 if (!mm)
872 return; 877 return;
873 878
874 rcu_read_lock(); 879 rcu_read_lock();
875 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 880 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
876 if (unlikely(!mem)) 881 if (unlikely(!memcg))
877 goto out; 882 goto out;
878 883
879 switch (idx) { 884 switch (idx) {
880 case PGMAJFAULT: 885 case PGMAJFAULT:
881 mem_cgroup_pgmajfault(mem, 1); 886 mem_cgroup_pgmajfault(memcg, 1);
882 break; 887 break;
883 case PGFAULT: 888 case PGFAULT:
884 mem_cgroup_pgfault(mem, 1); 889 mem_cgroup_pgfault(memcg, 1);
885 break; 890 break;
886 default: 891 default:
887 BUG(); 892 BUG();
@@ -990,6 +995,16 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
990 return; 995 return;
991 pc = lookup_page_cgroup(page); 996 pc = lookup_page_cgroup(page);
992 VM_BUG_ON(PageCgroupAcctLRU(pc)); 997 VM_BUG_ON(PageCgroupAcctLRU(pc));
998 /*
999 * putback: charge:
1000 * SetPageLRU SetPageCgroupUsed
1001 * smp_mb smp_mb
1002 * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
1003 *
1004 * Ensure that one of the two sides adds the page to the memcg
1005 * LRU during a race.
1006 */
1007 smp_mb();
993 if (!PageCgroupUsed(pc)) 1008 if (!PageCgroupUsed(pc))
994 return; 1009 return;
995 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ 1010 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
@@ -1041,7 +1056,16 @@ static void mem_cgroup_lru_add_after_commit(struct page *page)
1041 unsigned long flags; 1056 unsigned long flags;
1042 struct zone *zone = page_zone(page); 1057 struct zone *zone = page_zone(page);
1043 struct page_cgroup *pc = lookup_page_cgroup(page); 1058 struct page_cgroup *pc = lookup_page_cgroup(page);
1044 1059 /*
1060 * putback: charge:
1061 * SetPageLRU SetPageCgroupUsed
1062 * smp_mb smp_mb
1063 * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
1064 *
1065 * Ensure that one of the two sides adds the page to the memcg
1066 * LRU during a race.
1067 */
1068 smp_mb();
1045 /* taking care of that the page is added to LRU while we commit it */ 1069 /* taking care of that the page is added to LRU while we commit it */
1046 if (likely(!PageLRU(page))) 1070 if (likely(!PageLRU(page)))
1047 return; 1071 return;
@@ -1063,21 +1087,21 @@ void mem_cgroup_move_lists(struct page *page,
1063} 1087}
1064 1088
1065/* 1089/*
1066 * Checks whether given mem is same or in the root_mem's 1090 * Checks whether given mem is same or in the root_mem_cgroup's
1067 * hierarchy subtree 1091 * hierarchy subtree
1068 */ 1092 */
1069static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_mem, 1093static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1070 struct mem_cgroup *mem) 1094 struct mem_cgroup *memcg)
1071{ 1095{
1072 if (root_mem != mem) { 1096 if (root_memcg != memcg) {
1073 return (root_mem->use_hierarchy && 1097 return (root_memcg->use_hierarchy &&
1074 css_is_ancestor(&mem->css, &root_mem->css)); 1098 css_is_ancestor(&memcg->css, &root_memcg->css));
1075 } 1099 }
1076 1100
1077 return true; 1101 return true;
1078} 1102}
1079 1103
1080int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) 1104int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
1081{ 1105{
1082 int ret; 1106 int ret;
1083 struct mem_cgroup *curr = NULL; 1107 struct mem_cgroup *curr = NULL;
@@ -1091,25 +1115,29 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
1091 if (!curr) 1115 if (!curr)
1092 return 0; 1116 return 0;
1093 /* 1117 /*
1094 * We should check use_hierarchy of "mem" not "curr". Because checking 1118 * We should check use_hierarchy of "memcg" not "curr". Because checking
1095 * use_hierarchy of "curr" here make this function true if hierarchy is 1119 * use_hierarchy of "curr" here make this function true if hierarchy is
1096 * enabled in "curr" and "curr" is a child of "mem" in *cgroup* 1120 * enabled in "curr" and "curr" is a child of "memcg" in *cgroup*
1097 * hierarchy(even if use_hierarchy is disabled in "mem"). 1121 * hierarchy(even if use_hierarchy is disabled in "memcg").
1098 */ 1122 */
1099 ret = mem_cgroup_same_or_subtree(mem, curr); 1123 ret = mem_cgroup_same_or_subtree(memcg, curr);
1100 css_put(&curr->css); 1124 css_put(&curr->css);
1101 return ret; 1125 return ret;
1102} 1126}
1103 1127
1104static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages) 1128int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
1105{ 1129{
1106 unsigned long active; 1130 unsigned long inactive_ratio;
1131 int nid = zone_to_nid(zone);
1132 int zid = zone_idx(zone);
1107 unsigned long inactive; 1133 unsigned long inactive;
1134 unsigned long active;
1108 unsigned long gb; 1135 unsigned long gb;
1109 unsigned long inactive_ratio;
1110 1136
1111 inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); 1137 inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1112 active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); 1138 BIT(LRU_INACTIVE_ANON));
1139 active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1140 BIT(LRU_ACTIVE_ANON));
1113 1141
1114 gb = (inactive + active) >> (30 - PAGE_SHIFT); 1142 gb = (inactive + active) >> (30 - PAGE_SHIFT);
1115 if (gb) 1143 if (gb)
@@ -1117,39 +1145,20 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_
1117 else 1145 else
1118 inactive_ratio = 1; 1146 inactive_ratio = 1;
1119 1147
1120 if (present_pages) { 1148 return inactive * inactive_ratio < active;
1121 present_pages[0] = inactive;
1122 present_pages[1] = active;
1123 }
1124
1125 return inactive_ratio;
1126} 1149}
1127 1150
1128int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) 1151int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
1129{
1130 unsigned long active;
1131 unsigned long inactive;
1132 unsigned long present_pages[2];
1133 unsigned long inactive_ratio;
1134
1135 inactive_ratio = calc_inactive_ratio(memcg, present_pages);
1136
1137 inactive = present_pages[0];
1138 active = present_pages[1];
1139
1140 if (inactive * inactive_ratio < active)
1141 return 1;
1142
1143 return 0;
1144}
1145
1146int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
1147{ 1152{
1148 unsigned long active; 1153 unsigned long active;
1149 unsigned long inactive; 1154 unsigned long inactive;
1155 int zid = zone_idx(zone);
1156 int nid = zone_to_nid(zone);
1150 1157
1151 inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); 1158 inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1152 active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); 1159 BIT(LRU_INACTIVE_FILE));
1160 active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1161 BIT(LRU_ACTIVE_FILE));
1153 1162
1154 return (active > inactive); 1163 return (active > inactive);
1155} 1164}
@@ -1254,13 +1263,13 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
1254 * Returns the maximum amount of memory @mem can be charged with, in 1263 * Returns the maximum amount of memory @mem can be charged with, in
1255 * pages. 1264 * pages.
1256 */ 1265 */
1257static unsigned long mem_cgroup_margin(struct mem_cgroup *mem) 1266static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
1258{ 1267{
1259 unsigned long long margin; 1268 unsigned long long margin;
1260 1269
1261 margin = res_counter_margin(&mem->res); 1270 margin = res_counter_margin(&memcg->res);
1262 if (do_swap_account) 1271 if (do_swap_account)
1263 margin = min(margin, res_counter_margin(&mem->memsw)); 1272 margin = min(margin, res_counter_margin(&memcg->memsw));
1264 return margin >> PAGE_SHIFT; 1273 return margin >> PAGE_SHIFT;
1265} 1274}
1266 1275
@@ -1275,33 +1284,33 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
1275 return memcg->swappiness; 1284 return memcg->swappiness;
1276} 1285}
1277 1286
1278static void mem_cgroup_start_move(struct mem_cgroup *mem) 1287static void mem_cgroup_start_move(struct mem_cgroup *memcg)
1279{ 1288{
1280 int cpu; 1289 int cpu;
1281 1290
1282 get_online_cpus(); 1291 get_online_cpus();
1283 spin_lock(&mem->pcp_counter_lock); 1292 spin_lock(&memcg->pcp_counter_lock);
1284 for_each_online_cpu(cpu) 1293 for_each_online_cpu(cpu)
1285 per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; 1294 per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
1286 mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1; 1295 memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
1287 spin_unlock(&mem->pcp_counter_lock); 1296 spin_unlock(&memcg->pcp_counter_lock);
1288 put_online_cpus(); 1297 put_online_cpus();
1289 1298
1290 synchronize_rcu(); 1299 synchronize_rcu();
1291} 1300}
1292 1301
1293static void mem_cgroup_end_move(struct mem_cgroup *mem) 1302static void mem_cgroup_end_move(struct mem_cgroup *memcg)
1294{ 1303{
1295 int cpu; 1304 int cpu;
1296 1305
1297 if (!mem) 1306 if (!memcg)
1298 return; 1307 return;
1299 get_online_cpus(); 1308 get_online_cpus();
1300 spin_lock(&mem->pcp_counter_lock); 1309 spin_lock(&memcg->pcp_counter_lock);
1301 for_each_online_cpu(cpu) 1310 for_each_online_cpu(cpu)
1302 per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; 1311 per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
1303 mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1; 1312 memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
1304 spin_unlock(&mem->pcp_counter_lock); 1313 spin_unlock(&memcg->pcp_counter_lock);
1305 put_online_cpus(); 1314 put_online_cpus();
1306} 1315}
1307/* 1316/*
@@ -1316,13 +1325,13 @@ static void mem_cgroup_end_move(struct mem_cgroup *mem)
1316 * waiting at hith-memory prressure caused by "move". 1325 * waiting at hith-memory prressure caused by "move".
1317 */ 1326 */
1318 1327
1319static bool mem_cgroup_stealed(struct mem_cgroup *mem) 1328static bool mem_cgroup_stealed(struct mem_cgroup *memcg)
1320{ 1329{
1321 VM_BUG_ON(!rcu_read_lock_held()); 1330 VM_BUG_ON(!rcu_read_lock_held());
1322 return this_cpu_read(mem->stat->count[MEM_CGROUP_ON_MOVE]) > 0; 1331 return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
1323} 1332}
1324 1333
1325static bool mem_cgroup_under_move(struct mem_cgroup *mem) 1334static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
1326{ 1335{
1327 struct mem_cgroup *from; 1336 struct mem_cgroup *from;
1328 struct mem_cgroup *to; 1337 struct mem_cgroup *to;
@@ -1337,17 +1346,17 @@ static bool mem_cgroup_under_move(struct mem_cgroup *mem)
1337 if (!from) 1346 if (!from)
1338 goto unlock; 1347 goto unlock;
1339 1348
1340 ret = mem_cgroup_same_or_subtree(mem, from) 1349 ret = mem_cgroup_same_or_subtree(memcg, from)
1341 || mem_cgroup_same_or_subtree(mem, to); 1350 || mem_cgroup_same_or_subtree(memcg, to);
1342unlock: 1351unlock:
1343 spin_unlock(&mc.lock); 1352 spin_unlock(&mc.lock);
1344 return ret; 1353 return ret;
1345} 1354}
1346 1355
1347static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem) 1356static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
1348{ 1357{
1349 if (mc.moving_task && current != mc.moving_task) { 1358 if (mc.moving_task && current != mc.moving_task) {
1350 if (mem_cgroup_under_move(mem)) { 1359 if (mem_cgroup_under_move(memcg)) {
1351 DEFINE_WAIT(wait); 1360 DEFINE_WAIT(wait);
1352 prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE); 1361 prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE);
1353 /* moving charge context might have finished. */ 1362 /* moving charge context might have finished. */
@@ -1431,12 +1440,12 @@ done:
1431 * This function returns the number of memcg under hierarchy tree. Returns 1440 * This function returns the number of memcg under hierarchy tree. Returns
1432 * 1(self count) if no children. 1441 * 1(self count) if no children.
1433 */ 1442 */
1434static int mem_cgroup_count_children(struct mem_cgroup *mem) 1443static int mem_cgroup_count_children(struct mem_cgroup *memcg)
1435{ 1444{
1436 int num = 0; 1445 int num = 0;
1437 struct mem_cgroup *iter; 1446 struct mem_cgroup *iter;
1438 1447
1439 for_each_mem_cgroup_tree(iter, mem) 1448 for_each_mem_cgroup_tree(iter, memcg)
1440 num++; 1449 num++;
1441 return num; 1450 return num;
1442} 1451}
@@ -1466,21 +1475,21 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
1466 * that to reclaim free pages from. 1475 * that to reclaim free pages from.
1467 */ 1476 */
1468static struct mem_cgroup * 1477static struct mem_cgroup *
1469mem_cgroup_select_victim(struct mem_cgroup *root_mem) 1478mem_cgroup_select_victim(struct mem_cgroup *root_memcg)
1470{ 1479{
1471 struct mem_cgroup *ret = NULL; 1480 struct mem_cgroup *ret = NULL;
1472 struct cgroup_subsys_state *css; 1481 struct cgroup_subsys_state *css;
1473 int nextid, found; 1482 int nextid, found;
1474 1483
1475 if (!root_mem->use_hierarchy) { 1484 if (!root_memcg->use_hierarchy) {
1476 css_get(&root_mem->css); 1485 css_get(&root_memcg->css);
1477 ret = root_mem; 1486 ret = root_memcg;
1478 } 1487 }
1479 1488
1480 while (!ret) { 1489 while (!ret) {
1481 rcu_read_lock(); 1490 rcu_read_lock();
1482 nextid = root_mem->last_scanned_child + 1; 1491 nextid = root_memcg->last_scanned_child + 1;
1483 css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css, 1492 css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css,
1484 &found); 1493 &found);
1485 if (css && css_tryget(css)) 1494 if (css && css_tryget(css))
1486 ret = container_of(css, struct mem_cgroup, css); 1495 ret = container_of(css, struct mem_cgroup, css);
@@ -1489,9 +1498,9 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1489 /* Updates scanning parameter */ 1498 /* Updates scanning parameter */
1490 if (!css) { 1499 if (!css) {
1491 /* this means start scan from ID:1 */ 1500 /* this means start scan from ID:1 */
1492 root_mem->last_scanned_child = 0; 1501 root_memcg->last_scanned_child = 0;
1493 } else 1502 } else
1494 root_mem->last_scanned_child = found; 1503 root_memcg->last_scanned_child = found;
1495 } 1504 }
1496 1505
1497 return ret; 1506 return ret;
@@ -1507,14 +1516,14 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1507 * reclaimable pages on a node. Returns true if there are any reclaimable 1516 * reclaimable pages on a node. Returns true if there are any reclaimable
1508 * pages in the node. 1517 * pages in the node.
1509 */ 1518 */
1510static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, 1519static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
1511 int nid, bool noswap) 1520 int nid, bool noswap)
1512{ 1521{
1513 if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_FILE)) 1522 if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE))
1514 return true; 1523 return true;
1515 if (noswap || !total_swap_pages) 1524 if (noswap || !total_swap_pages)
1516 return false; 1525 return false;
1517 if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_ANON)) 1526 if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON))
1518 return true; 1527 return true;
1519 return false; 1528 return false;
1520 1529
@@ -1527,29 +1536,29 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
1527 * nodes based on the zonelist. So update the list loosely once per 10 secs. 1536 * nodes based on the zonelist. So update the list loosely once per 10 secs.
1528 * 1537 *
1529 */ 1538 */
1530static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) 1539static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
1531{ 1540{
1532 int nid; 1541 int nid;
1533 /* 1542 /*
1534 * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET 1543 * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
1535 * pagein/pageout changes since the last update. 1544 * pagein/pageout changes since the last update.
1536 */ 1545 */
1537 if (!atomic_read(&mem->numainfo_events)) 1546 if (!atomic_read(&memcg->numainfo_events))
1538 return; 1547 return;
1539 if (atomic_inc_return(&mem->numainfo_updating) > 1) 1548 if (atomic_inc_return(&memcg->numainfo_updating) > 1)
1540 return; 1549 return;
1541 1550
1542 /* make a nodemask where this memcg uses memory from */ 1551 /* make a nodemask where this memcg uses memory from */
1543 mem->scan_nodes = node_states[N_HIGH_MEMORY]; 1552 memcg->scan_nodes = node_states[N_HIGH_MEMORY];
1544 1553
1545 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { 1554 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
1546 1555
1547 if (!test_mem_cgroup_node_reclaimable(mem, nid, false)) 1556 if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
1548 node_clear(nid, mem->scan_nodes); 1557 node_clear(nid, memcg->scan_nodes);
1549 } 1558 }
1550 1559
1551 atomic_set(&mem->numainfo_events, 0); 1560 atomic_set(&memcg->numainfo_events, 0);
1552 atomic_set(&mem->numainfo_updating, 0); 1561 atomic_set(&memcg->numainfo_updating, 0);
1553} 1562}
1554 1563
1555/* 1564/*
@@ -1564,16 +1573,16 @@ static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
1564 * 1573 *
1565 * Now, we use round-robin. Better algorithm is welcomed. 1574 * Now, we use round-robin. Better algorithm is welcomed.
1566 */ 1575 */
1567int mem_cgroup_select_victim_node(struct mem_cgroup *mem) 1576int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1568{ 1577{
1569 int node; 1578 int node;
1570 1579
1571 mem_cgroup_may_update_nodemask(mem); 1580 mem_cgroup_may_update_nodemask(memcg);
1572 node = mem->last_scanned_node; 1581 node = memcg->last_scanned_node;
1573 1582
1574 node = next_node(node, mem->scan_nodes); 1583 node = next_node(node, memcg->scan_nodes);
1575 if (node == MAX_NUMNODES) 1584 if (node == MAX_NUMNODES)
1576 node = first_node(mem->scan_nodes); 1585 node = first_node(memcg->scan_nodes);
1577 /* 1586 /*
1578 * We call this when we hit limit, not when pages are added to LRU. 1587 * We call this when we hit limit, not when pages are added to LRU.
1579 * No LRU may hold pages because all pages are UNEVICTABLE or 1588 * No LRU may hold pages because all pages are UNEVICTABLE or
@@ -1583,7 +1592,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1583 if (unlikely(node == MAX_NUMNODES)) 1592 if (unlikely(node == MAX_NUMNODES))
1584 node = numa_node_id(); 1593 node = numa_node_id();
1585 1594
1586 mem->last_scanned_node = node; 1595 memcg->last_scanned_node = node;
1587 return node; 1596 return node;
1588} 1597}
1589 1598
@@ -1593,7 +1602,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1593 * unused nodes. But scan_nodes is lazily updated and may not cotain 1602 * unused nodes. But scan_nodes is lazily updated and may not cotain
1594 * enough new information. We need to do double check. 1603 * enough new information. We need to do double check.
1595 */ 1604 */
1596bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) 1605bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1597{ 1606{
1598 int nid; 1607 int nid;
1599 1608
@@ -1601,12 +1610,12 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1601 * quick check...making use of scan_node. 1610 * quick check...making use of scan_node.
1602 * We can skip unused nodes. 1611 * We can skip unused nodes.
1603 */ 1612 */
1604 if (!nodes_empty(mem->scan_nodes)) { 1613 if (!nodes_empty(memcg->scan_nodes)) {
1605 for (nid = first_node(mem->scan_nodes); 1614 for (nid = first_node(memcg->scan_nodes);
1606 nid < MAX_NUMNODES; 1615 nid < MAX_NUMNODES;
1607 nid = next_node(nid, mem->scan_nodes)) { 1616 nid = next_node(nid, memcg->scan_nodes)) {
1608 1617
1609 if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) 1618 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1610 return true; 1619 return true;
1611 } 1620 }
1612 } 1621 }
@@ -1614,23 +1623,23 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1614 * Check rest of nodes. 1623 * Check rest of nodes.
1615 */ 1624 */
1616 for_each_node_state(nid, N_HIGH_MEMORY) { 1625 for_each_node_state(nid, N_HIGH_MEMORY) {
1617 if (node_isset(nid, mem->scan_nodes)) 1626 if (node_isset(nid, memcg->scan_nodes))
1618 continue; 1627 continue;
1619 if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) 1628 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1620 return true; 1629 return true;
1621 } 1630 }
1622 return false; 1631 return false;
1623} 1632}
1624 1633
1625#else 1634#else
1626int mem_cgroup_select_victim_node(struct mem_cgroup *mem) 1635int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1627{ 1636{
1628 return 0; 1637 return 0;
1629} 1638}
1630 1639
1631bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) 1640bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1632{ 1641{
1633 return test_mem_cgroup_node_reclaimable(mem, 0, noswap); 1642 return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
1634} 1643}
1635#endif 1644#endif
1636 1645
@@ -1639,14 +1648,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1639 * we reclaimed from, so that we don't end up penalizing one child extensively 1648 * we reclaimed from, so that we don't end up penalizing one child extensively
1640 * based on its position in the children list. 1649 * based on its position in the children list.
1641 * 1650 *
1642 * root_mem is the original ancestor that we've been reclaim from. 1651 * root_memcg is the original ancestor that we've been reclaim from.
1643 * 1652 *
1644 * We give up and return to the caller when we visit root_mem twice. 1653 * We give up and return to the caller when we visit root_memcg twice.
1645 * (other groups can be removed while we're walking....) 1654 * (other groups can be removed while we're walking....)
1646 * 1655 *
1647 * If shrink==true, for avoiding to free too much, this returns immedieately. 1656 * If shrink==true, for avoiding to free too much, this returns immedieately.
1648 */ 1657 */
1649static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, 1658static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
1650 struct zone *zone, 1659 struct zone *zone,
1651 gfp_t gfp_mask, 1660 gfp_t gfp_mask,
1652 unsigned long reclaim_options, 1661 unsigned long reclaim_options,
@@ -1661,15 +1670,15 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1661 unsigned long excess; 1670 unsigned long excess;
1662 unsigned long nr_scanned; 1671 unsigned long nr_scanned;
1663 1672
1664 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; 1673 excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
1665 1674
1666 /* If memsw_is_minimum==1, swap-out is of-no-use. */ 1675 /* If memsw_is_minimum==1, swap-out is of-no-use. */
1667 if (!check_soft && !shrink && root_mem->memsw_is_minimum) 1676 if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
1668 noswap = true; 1677 noswap = true;
1669 1678
1670 while (1) { 1679 while (1) {
1671 victim = mem_cgroup_select_victim(root_mem); 1680 victim = mem_cgroup_select_victim(root_memcg);
1672 if (victim == root_mem) { 1681 if (victim == root_memcg) {
1673 loop++; 1682 loop++;
1674 /* 1683 /*
1675 * We are not draining per cpu cached charges during 1684 * We are not draining per cpu cached charges during
@@ -1678,7 +1687,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1678 * charges will not give any. 1687 * charges will not give any.
1679 */ 1688 */
1680 if (!check_soft && loop >= 1) 1689 if (!check_soft && loop >= 1)
1681 drain_all_stock_async(root_mem); 1690 drain_all_stock_async(root_memcg);
1682 if (loop >= 2) { 1691 if (loop >= 2) {
1683 /* 1692 /*
1684 * If we have not been able to reclaim 1693 * If we have not been able to reclaim
@@ -1725,9 +1734,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1725 return ret; 1734 return ret;
1726 total += ret; 1735 total += ret;
1727 if (check_soft) { 1736 if (check_soft) {
1728 if (!res_counter_soft_limit_excess(&root_mem->res)) 1737 if (!res_counter_soft_limit_excess(&root_memcg->res))
1729 return total; 1738 return total;
1730 } else if (mem_cgroup_margin(root_mem)) 1739 } else if (mem_cgroup_margin(root_memcg))
1731 return total; 1740 return total;
1732 } 1741 }
1733 return total; 1742 return total;
@@ -1738,12 +1747,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1738 * If someone is running, return false. 1747 * If someone is running, return false.
1739 * Has to be called with memcg_oom_lock 1748 * Has to be called with memcg_oom_lock
1740 */ 1749 */
1741static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) 1750static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
1742{ 1751{
1743 struct mem_cgroup *iter, *failed = NULL; 1752 struct mem_cgroup *iter, *failed = NULL;
1744 bool cond = true; 1753 bool cond = true;
1745 1754
1746 for_each_mem_cgroup_tree_cond(iter, mem, cond) { 1755 for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
1747 if (iter->oom_lock) { 1756 if (iter->oom_lock) {
1748 /* 1757 /*
1749 * this subtree of our hierarchy is already locked 1758 * this subtree of our hierarchy is already locked
@@ -1763,7 +1772,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
1763 * what we set up to the failing subtree 1772 * what we set up to the failing subtree
1764 */ 1773 */
1765 cond = true; 1774 cond = true;
1766 for_each_mem_cgroup_tree_cond(iter, mem, cond) { 1775 for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
1767 if (iter == failed) { 1776 if (iter == failed) {
1768 cond = false; 1777 cond = false;
1769 continue; 1778 continue;
@@ -1776,24 +1785,24 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
1776/* 1785/*
1777 * Has to be called with memcg_oom_lock 1786 * Has to be called with memcg_oom_lock
1778 */ 1787 */
1779static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) 1788static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
1780{ 1789{
1781 struct mem_cgroup *iter; 1790 struct mem_cgroup *iter;
1782 1791
1783 for_each_mem_cgroup_tree(iter, mem) 1792 for_each_mem_cgroup_tree(iter, memcg)
1784 iter->oom_lock = false; 1793 iter->oom_lock = false;
1785 return 0; 1794 return 0;
1786} 1795}
1787 1796
1788static void mem_cgroup_mark_under_oom(struct mem_cgroup *mem) 1797static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
1789{ 1798{
1790 struct mem_cgroup *iter; 1799 struct mem_cgroup *iter;
1791 1800
1792 for_each_mem_cgroup_tree(iter, mem) 1801 for_each_mem_cgroup_tree(iter, memcg)
1793 atomic_inc(&iter->under_oom); 1802 atomic_inc(&iter->under_oom);
1794} 1803}
1795 1804
1796static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem) 1805static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
1797{ 1806{
1798 struct mem_cgroup *iter; 1807 struct mem_cgroup *iter;
1799 1808
@@ -1802,7 +1811,7 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem)
1802 * mem_cgroup_oom_lock() may not be called. We have to use 1811 * mem_cgroup_oom_lock() may not be called. We have to use
1803 * atomic_add_unless() here. 1812 * atomic_add_unless() here.
1804 */ 1813 */
1805 for_each_mem_cgroup_tree(iter, mem) 1814 for_each_mem_cgroup_tree(iter, memcg)
1806 atomic_add_unless(&iter->under_oom, -1, 0); 1815 atomic_add_unless(&iter->under_oom, -1, 0);
1807} 1816}
1808 1817
@@ -1817,85 +1826,85 @@ struct oom_wait_info {
1817static int memcg_oom_wake_function(wait_queue_t *wait, 1826static int memcg_oom_wake_function(wait_queue_t *wait,
1818 unsigned mode, int sync, void *arg) 1827 unsigned mode, int sync, void *arg)
1819{ 1828{
1820 struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg, 1829 struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg,
1821 *oom_wait_mem; 1830 *oom_wait_memcg;
1822 struct oom_wait_info *oom_wait_info; 1831 struct oom_wait_info *oom_wait_info;
1823 1832
1824 oom_wait_info = container_of(wait, struct oom_wait_info, wait); 1833 oom_wait_info = container_of(wait, struct oom_wait_info, wait);
1825 oom_wait_mem = oom_wait_info->mem; 1834 oom_wait_memcg = oom_wait_info->mem;
1826 1835
1827 /* 1836 /*
1828 * Both of oom_wait_info->mem and wake_mem are stable under us. 1837 * Both of oom_wait_info->mem and wake_mem are stable under us.
1829 * Then we can use css_is_ancestor without taking care of RCU. 1838 * Then we can use css_is_ancestor without taking care of RCU.
1830 */ 1839 */
1831 if (!mem_cgroup_same_or_subtree(oom_wait_mem, wake_mem) 1840 if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
1832 && !mem_cgroup_same_or_subtree(wake_mem, oom_wait_mem)) 1841 && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
1833 return 0; 1842 return 0;
1834 return autoremove_wake_function(wait, mode, sync, arg); 1843 return autoremove_wake_function(wait, mode, sync, arg);
1835} 1844}
1836 1845
1837static void memcg_wakeup_oom(struct mem_cgroup *mem) 1846static void memcg_wakeup_oom(struct mem_cgroup *memcg)
1838{ 1847{
1839 /* for filtering, pass "mem" as argument. */ 1848 /* for filtering, pass "memcg" as argument. */
1840 __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, mem); 1849 __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
1841} 1850}
1842 1851
1843static void memcg_oom_recover(struct mem_cgroup *mem) 1852static void memcg_oom_recover(struct mem_cgroup *memcg)
1844{ 1853{
1845 if (mem && atomic_read(&mem->under_oom)) 1854 if (memcg && atomic_read(&memcg->under_oom))
1846 memcg_wakeup_oom(mem); 1855 memcg_wakeup_oom(memcg);
1847} 1856}
1848 1857
1849/* 1858/*
1850 * try to call OOM killer. returns false if we should exit memory-reclaim loop. 1859 * try to call OOM killer. returns false if we should exit memory-reclaim loop.
1851 */ 1860 */
1852bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) 1861bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
1853{ 1862{
1854 struct oom_wait_info owait; 1863 struct oom_wait_info owait;
1855 bool locked, need_to_kill; 1864 bool locked, need_to_kill;
1856 1865
1857 owait.mem = mem; 1866 owait.mem = memcg;
1858 owait.wait.flags = 0; 1867 owait.wait.flags = 0;
1859 owait.wait.func = memcg_oom_wake_function; 1868 owait.wait.func = memcg_oom_wake_function;
1860 owait.wait.private = current; 1869 owait.wait.private = current;
1861 INIT_LIST_HEAD(&owait.wait.task_list); 1870 INIT_LIST_HEAD(&owait.wait.task_list);
1862 need_to_kill = true; 1871 need_to_kill = true;
1863 mem_cgroup_mark_under_oom(mem); 1872 mem_cgroup_mark_under_oom(memcg);
1864 1873
1865 /* At first, try to OOM lock hierarchy under mem.*/ 1874 /* At first, try to OOM lock hierarchy under memcg.*/
1866 spin_lock(&memcg_oom_lock); 1875 spin_lock(&memcg_oom_lock);
1867 locked = mem_cgroup_oom_lock(mem); 1876 locked = mem_cgroup_oom_lock(memcg);
1868 /* 1877 /*
1869 * Even if signal_pending(), we can't quit charge() loop without 1878 * Even if signal_pending(), we can't quit charge() loop without
1870 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL 1879 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
1871 * under OOM is always welcomed, use TASK_KILLABLE here. 1880 * under OOM is always welcomed, use TASK_KILLABLE here.
1872 */ 1881 */
1873 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); 1882 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
1874 if (!locked || mem->oom_kill_disable) 1883 if (!locked || memcg->oom_kill_disable)
1875 need_to_kill = false; 1884 need_to_kill = false;
1876 if (locked) 1885 if (locked)
1877 mem_cgroup_oom_notify(mem); 1886 mem_cgroup_oom_notify(memcg);
1878 spin_unlock(&memcg_oom_lock); 1887 spin_unlock(&memcg_oom_lock);
1879 1888
1880 if (need_to_kill) { 1889 if (need_to_kill) {
1881 finish_wait(&memcg_oom_waitq, &owait.wait); 1890 finish_wait(&memcg_oom_waitq, &owait.wait);
1882 mem_cgroup_out_of_memory(mem, mask); 1891 mem_cgroup_out_of_memory(memcg, mask);
1883 } else { 1892 } else {
1884 schedule(); 1893 schedule();
1885 finish_wait(&memcg_oom_waitq, &owait.wait); 1894 finish_wait(&memcg_oom_waitq, &owait.wait);
1886 } 1895 }
1887 spin_lock(&memcg_oom_lock); 1896 spin_lock(&memcg_oom_lock);
1888 if (locked) 1897 if (locked)
1889 mem_cgroup_oom_unlock(mem); 1898 mem_cgroup_oom_unlock(memcg);
1890 memcg_wakeup_oom(mem); 1899 memcg_wakeup_oom(memcg);
1891 spin_unlock(&memcg_oom_lock); 1900 spin_unlock(&memcg_oom_lock);
1892 1901
1893 mem_cgroup_unmark_under_oom(mem); 1902 mem_cgroup_unmark_under_oom(memcg);
1894 1903
1895 if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) 1904 if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
1896 return false; 1905 return false;
1897 /* Give chance to dying process */ 1906 /* Give chance to dying process */
1898 schedule_timeout(1); 1907 schedule_timeout_uninterruptible(1);
1899 return true; 1908 return true;
1900} 1909}
1901 1910
@@ -1926,7 +1935,7 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
1926void mem_cgroup_update_page_stat(struct page *page, 1935void mem_cgroup_update_page_stat(struct page *page,
1927 enum mem_cgroup_page_stat_item idx, int val) 1936 enum mem_cgroup_page_stat_item idx, int val)
1928{ 1937{
1929 struct mem_cgroup *mem; 1938 struct mem_cgroup *memcg;
1930 struct page_cgroup *pc = lookup_page_cgroup(page); 1939 struct page_cgroup *pc = lookup_page_cgroup(page);
1931 bool need_unlock = false; 1940 bool need_unlock = false;
1932 unsigned long uninitialized_var(flags); 1941 unsigned long uninitialized_var(flags);
@@ -1935,16 +1944,16 @@ void mem_cgroup_update_page_stat(struct page *page,
1935 return; 1944 return;
1936 1945
1937 rcu_read_lock(); 1946 rcu_read_lock();
1938 mem = pc->mem_cgroup; 1947 memcg = pc->mem_cgroup;
1939 if (unlikely(!mem || !PageCgroupUsed(pc))) 1948 if (unlikely(!memcg || !PageCgroupUsed(pc)))
1940 goto out; 1949 goto out;
1941 /* pc->mem_cgroup is unstable ? */ 1950 /* pc->mem_cgroup is unstable ? */
1942 if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { 1951 if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) {
1943 /* take a lock against to access pc->mem_cgroup */ 1952 /* take a lock against to access pc->mem_cgroup */
1944 move_lock_page_cgroup(pc, &flags); 1953 move_lock_page_cgroup(pc, &flags);
1945 need_unlock = true; 1954 need_unlock = true;
1946 mem = pc->mem_cgroup; 1955 memcg = pc->mem_cgroup;
1947 if (!mem || !PageCgroupUsed(pc)) 1956 if (!memcg || !PageCgroupUsed(pc))
1948 goto out; 1957 goto out;
1949 } 1958 }
1950 1959
@@ -1960,7 +1969,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1960 BUG(); 1969 BUG();
1961 } 1970 }
1962 1971
1963 this_cpu_add(mem->stat->count[idx], val); 1972 this_cpu_add(memcg->stat->count[idx], val);
1964 1973
1965out: 1974out:
1966 if (unlikely(need_unlock)) 1975 if (unlikely(need_unlock))
@@ -1991,13 +2000,13 @@ static DEFINE_MUTEX(percpu_charge_mutex);
1991 * cgroup which is not current target, returns false. This stock will be 2000 * cgroup which is not current target, returns false. This stock will be
1992 * refilled. 2001 * refilled.
1993 */ 2002 */
1994static bool consume_stock(struct mem_cgroup *mem) 2003static bool consume_stock(struct mem_cgroup *memcg)
1995{ 2004{
1996 struct memcg_stock_pcp *stock; 2005 struct memcg_stock_pcp *stock;
1997 bool ret = true; 2006 bool ret = true;
1998 2007
1999 stock = &get_cpu_var(memcg_stock); 2008 stock = &get_cpu_var(memcg_stock);
2000 if (mem == stock->cached && stock->nr_pages) 2009 if (memcg == stock->cached && stock->nr_pages)
2001 stock->nr_pages--; 2010 stock->nr_pages--;
2002 else /* need to call res_counter_charge */ 2011 else /* need to call res_counter_charge */
2003 ret = false; 2012 ret = false;
@@ -2038,24 +2047,24 @@ static void drain_local_stock(struct work_struct *dummy)
2038 * Cache charges(val) which is from res_counter, to local per_cpu area. 2047 * Cache charges(val) which is from res_counter, to local per_cpu area.
2039 * This will be consumed by consume_stock() function, later. 2048 * This will be consumed by consume_stock() function, later.
2040 */ 2049 */
2041static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages) 2050static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
2042{ 2051{
2043 struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); 2052 struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
2044 2053
2045 if (stock->cached != mem) { /* reset if necessary */ 2054 if (stock->cached != memcg) { /* reset if necessary */
2046 drain_stock(stock); 2055 drain_stock(stock);
2047 stock->cached = mem; 2056 stock->cached = memcg;
2048 } 2057 }
2049 stock->nr_pages += nr_pages; 2058 stock->nr_pages += nr_pages;
2050 put_cpu_var(memcg_stock); 2059 put_cpu_var(memcg_stock);
2051} 2060}
2052 2061
2053/* 2062/*
2054 * Drains all per-CPU charge caches for given root_mem resp. subtree 2063 * Drains all per-CPU charge caches for given root_memcg resp. subtree
2055 * of the hierarchy under it. sync flag says whether we should block 2064 * of the hierarchy under it. sync flag says whether we should block
2056 * until the work is done. 2065 * until the work is done.
2057 */ 2066 */
2058static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) 2067static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
2059{ 2068{
2060 int cpu, curcpu; 2069 int cpu, curcpu;
2061 2070
@@ -2064,12 +2073,12 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
2064 curcpu = get_cpu(); 2073 curcpu = get_cpu();
2065 for_each_online_cpu(cpu) { 2074 for_each_online_cpu(cpu) {
2066 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); 2075 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
2067 struct mem_cgroup *mem; 2076 struct mem_cgroup *memcg;
2068 2077
2069 mem = stock->cached; 2078 memcg = stock->cached;
2070 if (!mem || !stock->nr_pages) 2079 if (!memcg || !stock->nr_pages)
2071 continue; 2080 continue;
2072 if (!mem_cgroup_same_or_subtree(root_mem, mem)) 2081 if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
2073 continue; 2082 continue;
2074 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { 2083 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
2075 if (cpu == curcpu) 2084 if (cpu == curcpu)
@@ -2098,23 +2107,23 @@ out:
2098 * expects some charges will be back to res_counter later but cannot wait for 2107 * expects some charges will be back to res_counter later but cannot wait for
2099 * it. 2108 * it.
2100 */ 2109 */
2101static void drain_all_stock_async(struct mem_cgroup *root_mem) 2110static void drain_all_stock_async(struct mem_cgroup *root_memcg)
2102{ 2111{
2103 /* 2112 /*
2104 * If someone calls draining, avoid adding more kworker runs. 2113 * If someone calls draining, avoid adding more kworker runs.
2105 */ 2114 */
2106 if (!mutex_trylock(&percpu_charge_mutex)) 2115 if (!mutex_trylock(&percpu_charge_mutex))
2107 return; 2116 return;
2108 drain_all_stock(root_mem, false); 2117 drain_all_stock(root_memcg, false);
2109 mutex_unlock(&percpu_charge_mutex); 2118 mutex_unlock(&percpu_charge_mutex);
2110} 2119}
2111 2120
2112/* This is a synchronous drain interface. */ 2121/* This is a synchronous drain interface. */
2113static void drain_all_stock_sync(struct mem_cgroup *root_mem) 2122static void drain_all_stock_sync(struct mem_cgroup *root_memcg)
2114{ 2123{
2115 /* called when force_empty is called */ 2124 /* called when force_empty is called */
2116 mutex_lock(&percpu_charge_mutex); 2125 mutex_lock(&percpu_charge_mutex);
2117 drain_all_stock(root_mem, true); 2126 drain_all_stock(root_memcg, true);
2118 mutex_unlock(&percpu_charge_mutex); 2127 mutex_unlock(&percpu_charge_mutex);
2119} 2128}
2120 2129
@@ -2122,35 +2131,35 @@ static void drain_all_stock_sync(struct mem_cgroup *root_mem)
2122 * This function drains percpu counter value from DEAD cpu and 2131 * This function drains percpu counter value from DEAD cpu and
2123 * move it to local cpu. Note that this function can be preempted. 2132 * move it to local cpu. Note that this function can be preempted.
2124 */ 2133 */
2125static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) 2134static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
2126{ 2135{
2127 int i; 2136 int i;
2128 2137
2129 spin_lock(&mem->pcp_counter_lock); 2138 spin_lock(&memcg->pcp_counter_lock);
2130 for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { 2139 for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
2131 long x = per_cpu(mem->stat->count[i], cpu); 2140 long x = per_cpu(memcg->stat->count[i], cpu);
2132 2141
2133 per_cpu(mem->stat->count[i], cpu) = 0; 2142 per_cpu(memcg->stat->count[i], cpu) = 0;
2134 mem->nocpu_base.count[i] += x; 2143 memcg->nocpu_base.count[i] += x;
2135 } 2144 }
2136 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { 2145 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
2137 unsigned long x = per_cpu(mem->stat->events[i], cpu); 2146 unsigned long x = per_cpu(memcg->stat->events[i], cpu);
2138 2147
2139 per_cpu(mem->stat->events[i], cpu) = 0; 2148 per_cpu(memcg->stat->events[i], cpu) = 0;
2140 mem->nocpu_base.events[i] += x; 2149 memcg->nocpu_base.events[i] += x;
2141 } 2150 }
2142 /* need to clear ON_MOVE value, works as a kind of lock. */ 2151 /* need to clear ON_MOVE value, works as a kind of lock. */
2143 per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; 2152 per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
2144 spin_unlock(&mem->pcp_counter_lock); 2153 spin_unlock(&memcg->pcp_counter_lock);
2145} 2154}
2146 2155
2147static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu) 2156static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu)
2148{ 2157{
2149 int idx = MEM_CGROUP_ON_MOVE; 2158 int idx = MEM_CGROUP_ON_MOVE;
2150 2159
2151 spin_lock(&mem->pcp_counter_lock); 2160 spin_lock(&memcg->pcp_counter_lock);
2152 per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx]; 2161 per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx];
2153 spin_unlock(&mem->pcp_counter_lock); 2162 spin_unlock(&memcg->pcp_counter_lock);
2154} 2163}
2155 2164
2156static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, 2165static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
@@ -2188,7 +2197,7 @@ enum {
2188 CHARGE_OOM_DIE, /* the current is killed because of OOM */ 2197 CHARGE_OOM_DIE, /* the current is killed because of OOM */
2189}; 2198};
2190 2199
2191static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, 2200static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2192 unsigned int nr_pages, bool oom_check) 2201 unsigned int nr_pages, bool oom_check)
2193{ 2202{
2194 unsigned long csize = nr_pages * PAGE_SIZE; 2203 unsigned long csize = nr_pages * PAGE_SIZE;
@@ -2197,16 +2206,16 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
2197 unsigned long flags = 0; 2206 unsigned long flags = 0;
2198 int ret; 2207 int ret;
2199 2208
2200 ret = res_counter_charge(&mem->res, csize, &fail_res); 2209 ret = res_counter_charge(&memcg->res, csize, &fail_res);
2201 2210
2202 if (likely(!ret)) { 2211 if (likely(!ret)) {
2203 if (!do_swap_account) 2212 if (!do_swap_account)
2204 return CHARGE_OK; 2213 return CHARGE_OK;
2205 ret = res_counter_charge(&mem->memsw, csize, &fail_res); 2214 ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
2206 if (likely(!ret)) 2215 if (likely(!ret))
2207 return CHARGE_OK; 2216 return CHARGE_OK;
2208 2217
2209 res_counter_uncharge(&mem->res, csize); 2218 res_counter_uncharge(&memcg->res, csize);
2210 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 2219 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
2211 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 2220 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
2212 } else 2221 } else
@@ -2264,12 +2273,12 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
2264static int __mem_cgroup_try_charge(struct mm_struct *mm, 2273static int __mem_cgroup_try_charge(struct mm_struct *mm,
2265 gfp_t gfp_mask, 2274 gfp_t gfp_mask,
2266 unsigned int nr_pages, 2275 unsigned int nr_pages,
2267 struct mem_cgroup **memcg, 2276 struct mem_cgroup **ptr,
2268 bool oom) 2277 bool oom)
2269{ 2278{
2270 unsigned int batch = max(CHARGE_BATCH, nr_pages); 2279 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2271 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; 2280 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
2272 struct mem_cgroup *mem = NULL; 2281 struct mem_cgroup *memcg = NULL;
2273 int ret; 2282 int ret;
2274 2283
2275 /* 2284 /*
@@ -2287,17 +2296,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
2287 * thread group leader migrates. It's possible that mm is not 2296 * thread group leader migrates. It's possible that mm is not
2288 * set, if so charge the init_mm (happens for pagecache usage). 2297 * set, if so charge the init_mm (happens for pagecache usage).
2289 */ 2298 */
2290 if (!*memcg && !mm) 2299 if (!*ptr && !mm)
2291 goto bypass; 2300 goto bypass;
2292again: 2301again:
2293 if (*memcg) { /* css should be a valid one */ 2302 if (*ptr) { /* css should be a valid one */
2294 mem = *memcg; 2303 memcg = *ptr;
2295 VM_BUG_ON(css_is_removed(&mem->css)); 2304 VM_BUG_ON(css_is_removed(&memcg->css));
2296 if (mem_cgroup_is_root(mem)) 2305 if (mem_cgroup_is_root(memcg))
2297 goto done; 2306 goto done;
2298 if (nr_pages == 1 && consume_stock(mem)) 2307 if (nr_pages == 1 && consume_stock(memcg))
2299 goto done; 2308 goto done;
2300 css_get(&mem->css); 2309 css_get(&memcg->css);
2301 } else { 2310 } else {
2302 struct task_struct *p; 2311 struct task_struct *p;
2303 2312
@@ -2305,7 +2314,7 @@ again:
2305 p = rcu_dereference(mm->owner); 2314 p = rcu_dereference(mm->owner);
2306 /* 2315 /*
2307 * Because we don't have task_lock(), "p" can exit. 2316 * Because we don't have task_lock(), "p" can exit.
2308 * In that case, "mem" can point to root or p can be NULL with 2317 * In that case, "memcg" can point to root or p can be NULL with
2309 * race with swapoff. Then, we have small risk of mis-accouning. 2318 * race with swapoff. Then, we have small risk of mis-accouning.
2310 * But such kind of mis-account by race always happens because 2319 * But such kind of mis-account by race always happens because
2311 * we don't have cgroup_mutex(). It's overkill and we allo that 2320 * we don't have cgroup_mutex(). It's overkill and we allo that
@@ -2313,12 +2322,12 @@ again:
2313 * (*) swapoff at el will charge against mm-struct not against 2322 * (*) swapoff at el will charge against mm-struct not against
2314 * task-struct. So, mm->owner can be NULL. 2323 * task-struct. So, mm->owner can be NULL.
2315 */ 2324 */
2316 mem = mem_cgroup_from_task(p); 2325 memcg = mem_cgroup_from_task(p);
2317 if (!mem || mem_cgroup_is_root(mem)) { 2326 if (!memcg || mem_cgroup_is_root(memcg)) {
2318 rcu_read_unlock(); 2327 rcu_read_unlock();
2319 goto done; 2328 goto done;
2320 } 2329 }
2321 if (nr_pages == 1 && consume_stock(mem)) { 2330 if (nr_pages == 1 && consume_stock(memcg)) {
2322 /* 2331 /*
2323 * It seems dagerous to access memcg without css_get(). 2332 * It seems dagerous to access memcg without css_get().
2324 * But considering how consume_stok works, it's not 2333 * But considering how consume_stok works, it's not
@@ -2331,7 +2340,7 @@ again:
2331 goto done; 2340 goto done;
2332 } 2341 }
2333 /* after here, we may be blocked. we need to get refcnt */ 2342 /* after here, we may be blocked. we need to get refcnt */
2334 if (!css_tryget(&mem->css)) { 2343 if (!css_tryget(&memcg->css)) {
2335 rcu_read_unlock(); 2344 rcu_read_unlock();
2336 goto again; 2345 goto again;
2337 } 2346 }
@@ -2343,7 +2352,7 @@ again:
2343 2352
2344 /* If killed, bypass charge */ 2353 /* If killed, bypass charge */
2345 if (fatal_signal_pending(current)) { 2354 if (fatal_signal_pending(current)) {
2346 css_put(&mem->css); 2355 css_put(&memcg->css);
2347 goto bypass; 2356 goto bypass;
2348 } 2357 }
2349 2358
@@ -2353,43 +2362,43 @@ again:
2353 nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; 2362 nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
2354 } 2363 }
2355 2364
2356 ret = mem_cgroup_do_charge(mem, gfp_mask, batch, oom_check); 2365 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check);
2357 switch (ret) { 2366 switch (ret) {
2358 case CHARGE_OK: 2367 case CHARGE_OK:
2359 break; 2368 break;
2360 case CHARGE_RETRY: /* not in OOM situation but retry */ 2369 case CHARGE_RETRY: /* not in OOM situation but retry */
2361 batch = nr_pages; 2370 batch = nr_pages;
2362 css_put(&mem->css); 2371 css_put(&memcg->css);
2363 mem = NULL; 2372 memcg = NULL;
2364 goto again; 2373 goto again;
2365 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ 2374 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
2366 css_put(&mem->css); 2375 css_put(&memcg->css);
2367 goto nomem; 2376 goto nomem;
2368 case CHARGE_NOMEM: /* OOM routine works */ 2377 case CHARGE_NOMEM: /* OOM routine works */
2369 if (!oom) { 2378 if (!oom) {
2370 css_put(&mem->css); 2379 css_put(&memcg->css);
2371 goto nomem; 2380 goto nomem;
2372 } 2381 }
2373 /* If oom, we never return -ENOMEM */ 2382 /* If oom, we never return -ENOMEM */
2374 nr_oom_retries--; 2383 nr_oom_retries--;
2375 break; 2384 break;
2376 case CHARGE_OOM_DIE: /* Killed by OOM Killer */ 2385 case CHARGE_OOM_DIE: /* Killed by OOM Killer */
2377 css_put(&mem->css); 2386 css_put(&memcg->css);
2378 goto bypass; 2387 goto bypass;
2379 } 2388 }
2380 } while (ret != CHARGE_OK); 2389 } while (ret != CHARGE_OK);
2381 2390
2382 if (batch > nr_pages) 2391 if (batch > nr_pages)
2383 refill_stock(mem, batch - nr_pages); 2392 refill_stock(memcg, batch - nr_pages);
2384 css_put(&mem->css); 2393 css_put(&memcg->css);
2385done: 2394done:
2386 *memcg = mem; 2395 *ptr = memcg;
2387 return 0; 2396 return 0;
2388nomem: 2397nomem:
2389 *memcg = NULL; 2398 *ptr = NULL;
2390 return -ENOMEM; 2399 return -ENOMEM;
2391bypass: 2400bypass:
2392 *memcg = NULL; 2401 *ptr = NULL;
2393 return 0; 2402 return 0;
2394} 2403}
2395 2404
@@ -2398,15 +2407,15 @@ bypass:
2398 * This function is for that and do uncharge, put css's refcnt. 2407 * This function is for that and do uncharge, put css's refcnt.
2399 * gotten by try_charge(). 2408 * gotten by try_charge().
2400 */ 2409 */
2401static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, 2410static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
2402 unsigned int nr_pages) 2411 unsigned int nr_pages)
2403{ 2412{
2404 if (!mem_cgroup_is_root(mem)) { 2413 if (!mem_cgroup_is_root(memcg)) {
2405 unsigned long bytes = nr_pages * PAGE_SIZE; 2414 unsigned long bytes = nr_pages * PAGE_SIZE;
2406 2415
2407 res_counter_uncharge(&mem->res, bytes); 2416 res_counter_uncharge(&memcg->res, bytes);
2408 if (do_swap_account) 2417 if (do_swap_account)
2409 res_counter_uncharge(&mem->memsw, bytes); 2418 res_counter_uncharge(&memcg->memsw, bytes);
2410 } 2419 }
2411} 2420}
2412 2421
@@ -2431,7 +2440,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2431 2440
2432struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) 2441struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2433{ 2442{
2434 struct mem_cgroup *mem = NULL; 2443 struct mem_cgroup *memcg = NULL;
2435 struct page_cgroup *pc; 2444 struct page_cgroup *pc;
2436 unsigned short id; 2445 unsigned short id;
2437 swp_entry_t ent; 2446 swp_entry_t ent;
@@ -2441,23 +2450,23 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2441 pc = lookup_page_cgroup(page); 2450 pc = lookup_page_cgroup(page);
2442 lock_page_cgroup(pc); 2451 lock_page_cgroup(pc);
2443 if (PageCgroupUsed(pc)) { 2452 if (PageCgroupUsed(pc)) {
2444 mem = pc->mem_cgroup; 2453 memcg = pc->mem_cgroup;
2445 if (mem && !css_tryget(&mem->css)) 2454 if (memcg && !css_tryget(&memcg->css))
2446 mem = NULL; 2455 memcg = NULL;
2447 } else if (PageSwapCache(page)) { 2456 } else if (PageSwapCache(page)) {
2448 ent.val = page_private(page); 2457 ent.val = page_private(page);
2449 id = lookup_swap_cgroup(ent); 2458 id = lookup_swap_cgroup(ent);
2450 rcu_read_lock(); 2459 rcu_read_lock();
2451 mem = mem_cgroup_lookup(id); 2460 memcg = mem_cgroup_lookup(id);
2452 if (mem && !css_tryget(&mem->css)) 2461 if (memcg && !css_tryget(&memcg->css))
2453 mem = NULL; 2462 memcg = NULL;
2454 rcu_read_unlock(); 2463 rcu_read_unlock();
2455 } 2464 }
2456 unlock_page_cgroup(pc); 2465 unlock_page_cgroup(pc);
2457 return mem; 2466 return memcg;
2458} 2467}
2459 2468
2460static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, 2469static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2461 struct page *page, 2470 struct page *page,
2462 unsigned int nr_pages, 2471 unsigned int nr_pages,
2463 struct page_cgroup *pc, 2472 struct page_cgroup *pc,
@@ -2466,14 +2475,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2466 lock_page_cgroup(pc); 2475 lock_page_cgroup(pc);
2467 if (unlikely(PageCgroupUsed(pc))) { 2476 if (unlikely(PageCgroupUsed(pc))) {
2468 unlock_page_cgroup(pc); 2477 unlock_page_cgroup(pc);
2469 __mem_cgroup_cancel_charge(mem, nr_pages); 2478 __mem_cgroup_cancel_charge(memcg, nr_pages);
2470 return; 2479 return;
2471 } 2480 }
2472 /* 2481 /*
2473 * we don't need page_cgroup_lock about tail pages, becase they are not 2482 * we don't need page_cgroup_lock about tail pages, becase they are not
2474 * accessed by any other context at this point. 2483 * accessed by any other context at this point.
2475 */ 2484 */
2476 pc->mem_cgroup = mem; 2485 pc->mem_cgroup = memcg;
2477 /* 2486 /*
2478 * We access a page_cgroup asynchronously without lock_page_cgroup(). 2487 * We access a page_cgroup asynchronously without lock_page_cgroup().
2479 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup 2488 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
@@ -2496,14 +2505,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2496 break; 2505 break;
2497 } 2506 }
2498 2507
2499 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); 2508 mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
2500 unlock_page_cgroup(pc); 2509 unlock_page_cgroup(pc);
2501 /* 2510 /*
2502 * "charge_statistics" updated event counter. Then, check it. 2511 * "charge_statistics" updated event counter. Then, check it.
2503 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. 2512 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
2504 * if they exceeds softlimit. 2513 * if they exceeds softlimit.
2505 */ 2514 */
2506 memcg_check_events(mem, page); 2515 memcg_check_events(memcg, page);
2507} 2516}
2508 2517
2509#ifdef CONFIG_TRANSPARENT_HUGEPAGE 2518#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -2690,7 +2699,7 @@ out:
2690static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 2699static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
2691 gfp_t gfp_mask, enum charge_type ctype) 2700 gfp_t gfp_mask, enum charge_type ctype)
2692{ 2701{
2693 struct mem_cgroup *mem = NULL; 2702 struct mem_cgroup *memcg = NULL;
2694 unsigned int nr_pages = 1; 2703 unsigned int nr_pages = 1;
2695 struct page_cgroup *pc; 2704 struct page_cgroup *pc;
2696 bool oom = true; 2705 bool oom = true;
@@ -2709,11 +2718,11 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
2709 pc = lookup_page_cgroup(page); 2718 pc = lookup_page_cgroup(page);
2710 BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */ 2719 BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */
2711 2720
2712 ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &mem, oom); 2721 ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
2713 if (ret || !mem) 2722 if (ret || !memcg)
2714 return ret; 2723 return ret;
2715 2724
2716 __mem_cgroup_commit_charge(mem, page, nr_pages, pc, ctype); 2725 __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
2717 return 0; 2726 return 0;
2718} 2727}
2719 2728
@@ -2742,7 +2751,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
2742 enum charge_type ctype); 2751 enum charge_type ctype);
2743 2752
2744static void 2753static void
2745__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, 2754__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
2746 enum charge_type ctype) 2755 enum charge_type ctype)
2747{ 2756{
2748 struct page_cgroup *pc = lookup_page_cgroup(page); 2757 struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -2752,7 +2761,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
2752 * LRU. Take care of it. 2761 * LRU. Take care of it.
2753 */ 2762 */
2754 mem_cgroup_lru_del_before_commit(page); 2763 mem_cgroup_lru_del_before_commit(page);
2755 __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); 2764 __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
2756 mem_cgroup_lru_add_after_commit(page); 2765 mem_cgroup_lru_add_after_commit(page);
2757 return; 2766 return;
2758} 2767}
@@ -2760,7 +2769,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
2760int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 2769int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
2761 gfp_t gfp_mask) 2770 gfp_t gfp_mask)
2762{ 2771{
2763 struct mem_cgroup *mem = NULL; 2772 struct mem_cgroup *memcg = NULL;
2764 int ret; 2773 int ret;
2765 2774
2766 if (mem_cgroup_disabled()) 2775 if (mem_cgroup_disabled())
@@ -2772,8 +2781,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
2772 mm = &init_mm; 2781 mm = &init_mm;
2773 2782
2774 if (page_is_file_cache(page)) { 2783 if (page_is_file_cache(page)) {
2775 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true); 2784 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
2776 if (ret || !mem) 2785 if (ret || !memcg)
2777 return ret; 2786 return ret;
2778 2787
2779 /* 2788 /*
@@ -2781,15 +2790,15 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
2781 * put that would remove them from the LRU list, make 2790 * put that would remove them from the LRU list, make
2782 * sure that they get relinked properly. 2791 * sure that they get relinked properly.
2783 */ 2792 */
2784 __mem_cgroup_commit_charge_lrucare(page, mem, 2793 __mem_cgroup_commit_charge_lrucare(page, memcg,
2785 MEM_CGROUP_CHARGE_TYPE_CACHE); 2794 MEM_CGROUP_CHARGE_TYPE_CACHE);
2786 return ret; 2795 return ret;
2787 } 2796 }
2788 /* shmem */ 2797 /* shmem */
2789 if (PageSwapCache(page)) { 2798 if (PageSwapCache(page)) {
2790 ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); 2799 ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
2791 if (!ret) 2800 if (!ret)
2792 __mem_cgroup_commit_charge_swapin(page, mem, 2801 __mem_cgroup_commit_charge_swapin(page, memcg,
2793 MEM_CGROUP_CHARGE_TYPE_SHMEM); 2802 MEM_CGROUP_CHARGE_TYPE_SHMEM);
2794 } else 2803 } else
2795 ret = mem_cgroup_charge_common(page, mm, gfp_mask, 2804 ret = mem_cgroup_charge_common(page, mm, gfp_mask,
@@ -2808,7 +2817,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
2808 struct page *page, 2817 struct page *page,
2809 gfp_t mask, struct mem_cgroup **ptr) 2818 gfp_t mask, struct mem_cgroup **ptr)
2810{ 2819{
2811 struct mem_cgroup *mem; 2820 struct mem_cgroup *memcg;
2812 int ret; 2821 int ret;
2813 2822
2814 *ptr = NULL; 2823 *ptr = NULL;
@@ -2826,12 +2835,12 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
2826 */ 2835 */
2827 if (!PageSwapCache(page)) 2836 if (!PageSwapCache(page))
2828 goto charge_cur_mm; 2837 goto charge_cur_mm;
2829 mem = try_get_mem_cgroup_from_page(page); 2838 memcg = try_get_mem_cgroup_from_page(page);
2830 if (!mem) 2839 if (!memcg)
2831 goto charge_cur_mm; 2840 goto charge_cur_mm;
2832 *ptr = mem; 2841 *ptr = memcg;
2833 ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true); 2842 ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
2834 css_put(&mem->css); 2843 css_put(&memcg->css);
2835 return ret; 2844 return ret;
2836charge_cur_mm: 2845charge_cur_mm:
2837 if (unlikely(!mm)) 2846 if (unlikely(!mm))
@@ -2891,16 +2900,16 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
2891 MEM_CGROUP_CHARGE_TYPE_MAPPED); 2900 MEM_CGROUP_CHARGE_TYPE_MAPPED);
2892} 2901}
2893 2902
2894void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) 2903void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
2895{ 2904{
2896 if (mem_cgroup_disabled()) 2905 if (mem_cgroup_disabled())
2897 return; 2906 return;
2898 if (!mem) 2907 if (!memcg)
2899 return; 2908 return;
2900 __mem_cgroup_cancel_charge(mem, 1); 2909 __mem_cgroup_cancel_charge(memcg, 1);
2901} 2910}
2902 2911
2903static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, 2912static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
2904 unsigned int nr_pages, 2913 unsigned int nr_pages,
2905 const enum charge_type ctype) 2914 const enum charge_type ctype)
2906{ 2915{
@@ -2918,7 +2927,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
2918 * uncharges. Then, it's ok to ignore memcg's refcnt. 2927 * uncharges. Then, it's ok to ignore memcg's refcnt.
2919 */ 2928 */
2920 if (!batch->memcg) 2929 if (!batch->memcg)
2921 batch->memcg = mem; 2930 batch->memcg = memcg;
2922 /* 2931 /*
2923 * do_batch > 0 when unmapping pages or inode invalidate/truncate. 2932 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
2924 * In those cases, all pages freed continuously can be expected to be in 2933 * In those cases, all pages freed continuously can be expected to be in
@@ -2938,7 +2947,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
2938 * merge a series of uncharges to an uncharge of res_counter. 2947 * merge a series of uncharges to an uncharge of res_counter.
2939 * If not, we uncharge res_counter ony by one. 2948 * If not, we uncharge res_counter ony by one.
2940 */ 2949 */
2941 if (batch->memcg != mem) 2950 if (batch->memcg != memcg)
2942 goto direct_uncharge; 2951 goto direct_uncharge;
2943 /* remember freed charge and uncharge it later */ 2952 /* remember freed charge and uncharge it later */
2944 batch->nr_pages++; 2953 batch->nr_pages++;
@@ -2946,11 +2955,11 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
2946 batch->memsw_nr_pages++; 2955 batch->memsw_nr_pages++;
2947 return; 2956 return;
2948direct_uncharge: 2957direct_uncharge:
2949 res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE); 2958 res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
2950 if (uncharge_memsw) 2959 if (uncharge_memsw)
2951 res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE); 2960 res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
2952 if (unlikely(batch->memcg != mem)) 2961 if (unlikely(batch->memcg != memcg))
2953 memcg_oom_recover(mem); 2962 memcg_oom_recover(memcg);
2954 return; 2963 return;
2955} 2964}
2956 2965
@@ -2960,7 +2969,7 @@ direct_uncharge:
2960static struct mem_cgroup * 2969static struct mem_cgroup *
2961__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) 2970__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2962{ 2971{
2963 struct mem_cgroup *mem = NULL; 2972 struct mem_cgroup *memcg = NULL;
2964 unsigned int nr_pages = 1; 2973 unsigned int nr_pages = 1;
2965 struct page_cgroup *pc; 2974 struct page_cgroup *pc;
2966 2975
@@ -2983,7 +2992,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2983 2992
2984 lock_page_cgroup(pc); 2993 lock_page_cgroup(pc);
2985 2994
2986 mem = pc->mem_cgroup; 2995 memcg = pc->mem_cgroup;
2987 2996
2988 if (!PageCgroupUsed(pc)) 2997 if (!PageCgroupUsed(pc))
2989 goto unlock_out; 2998 goto unlock_out;
@@ -3006,7 +3015,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
3006 break; 3015 break;
3007 } 3016 }
3008 3017
3009 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages); 3018 mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -nr_pages);
3010 3019
3011 ClearPageCgroupUsed(pc); 3020 ClearPageCgroupUsed(pc);
3012 /* 3021 /*
@@ -3018,18 +3027,18 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
3018 3027
3019 unlock_page_cgroup(pc); 3028 unlock_page_cgroup(pc);
3020 /* 3029 /*
3021 * even after unlock, we have mem->res.usage here and this memcg 3030 * even after unlock, we have memcg->res.usage here and this memcg
3022 * will never be freed. 3031 * will never be freed.
3023 */ 3032 */
3024 memcg_check_events(mem, page); 3033 memcg_check_events(memcg, page);
3025 if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) { 3034 if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
3026 mem_cgroup_swap_statistics(mem, true); 3035 mem_cgroup_swap_statistics(memcg, true);
3027 mem_cgroup_get(mem); 3036 mem_cgroup_get(memcg);
3028 } 3037 }
3029 if (!mem_cgroup_is_root(mem)) 3038 if (!mem_cgroup_is_root(memcg))
3030 mem_cgroup_do_uncharge(mem, nr_pages, ctype); 3039 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
3031 3040
3032 return mem; 3041 return memcg;
3033 3042
3034unlock_out: 3043unlock_out:
3035 unlock_page_cgroup(pc); 3044 unlock_page_cgroup(pc);
@@ -3219,7 +3228,7 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
3219int mem_cgroup_prepare_migration(struct page *page, 3228int mem_cgroup_prepare_migration(struct page *page,
3220 struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask) 3229 struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
3221{ 3230{
3222 struct mem_cgroup *mem = NULL; 3231 struct mem_cgroup *memcg = NULL;
3223 struct page_cgroup *pc; 3232 struct page_cgroup *pc;
3224 enum charge_type ctype; 3233 enum charge_type ctype;
3225 int ret = 0; 3234 int ret = 0;
@@ -3233,8 +3242,8 @@ int mem_cgroup_prepare_migration(struct page *page,
3233 pc = lookup_page_cgroup(page); 3242 pc = lookup_page_cgroup(page);
3234 lock_page_cgroup(pc); 3243 lock_page_cgroup(pc);
3235 if (PageCgroupUsed(pc)) { 3244 if (PageCgroupUsed(pc)) {
3236 mem = pc->mem_cgroup; 3245 memcg = pc->mem_cgroup;
3237 css_get(&mem->css); 3246 css_get(&memcg->css);
3238 /* 3247 /*
3239 * At migrating an anonymous page, its mapcount goes down 3248 * At migrating an anonymous page, its mapcount goes down
3240 * to 0 and uncharge() will be called. But, even if it's fully 3249 * to 0 and uncharge() will be called. But, even if it's fully
@@ -3272,12 +3281,12 @@ int mem_cgroup_prepare_migration(struct page *page,
3272 * If the page is not charged at this point, 3281 * If the page is not charged at this point,
3273 * we return here. 3282 * we return here.
3274 */ 3283 */
3275 if (!mem) 3284 if (!memcg)
3276 return 0; 3285 return 0;
3277 3286
3278 *ptr = mem; 3287 *ptr = memcg;
3279 ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false); 3288 ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false);
3280 css_put(&mem->css);/* drop extra refcnt */ 3289 css_put(&memcg->css);/* drop extra refcnt */
3281 if (ret || *ptr == NULL) { 3290 if (ret || *ptr == NULL) {
3282 if (PageAnon(page)) { 3291 if (PageAnon(page)) {
3283 lock_page_cgroup(pc); 3292 lock_page_cgroup(pc);
@@ -3303,21 +3312,21 @@ int mem_cgroup_prepare_migration(struct page *page,
3303 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; 3312 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
3304 else 3313 else
3305 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; 3314 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3306 __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); 3315 __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
3307 return ret; 3316 return ret;
3308} 3317}
3309 3318
3310/* remove redundant charge if migration failed*/ 3319/* remove redundant charge if migration failed*/
3311void mem_cgroup_end_migration(struct mem_cgroup *mem, 3320void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3312 struct page *oldpage, struct page *newpage, bool migration_ok) 3321 struct page *oldpage, struct page *newpage, bool migration_ok)
3313{ 3322{
3314 struct page *used, *unused; 3323 struct page *used, *unused;
3315 struct page_cgroup *pc; 3324 struct page_cgroup *pc;
3316 3325
3317 if (!mem) 3326 if (!memcg)
3318 return; 3327 return;
3319 /* blocks rmdir() */ 3328 /* blocks rmdir() */
3320 cgroup_exclude_rmdir(&mem->css); 3329 cgroup_exclude_rmdir(&memcg->css);
3321 if (!migration_ok) { 3330 if (!migration_ok) {
3322 used = oldpage; 3331 used = oldpage;
3323 unused = newpage; 3332 unused = newpage;
@@ -3353,7 +3362,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
3353 * So, rmdir()->pre_destroy() can be called while we do this charge. 3362 * So, rmdir()->pre_destroy() can be called while we do this charge.
3354 * In that case, we need to call pre_destroy() again. check it here. 3363 * In that case, we need to call pre_destroy() again. check it here.
3355 */ 3364 */
3356 cgroup_release_and_wakeup_rmdir(&mem->css); 3365 cgroup_release_and_wakeup_rmdir(&memcg->css);
3357} 3366}
3358 3367
3359#ifdef CONFIG_DEBUG_VM 3368#ifdef CONFIG_DEBUG_VM
@@ -3432,7 +3441,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
3432 /* 3441 /*
3433 * Rather than hide all in some function, I do this in 3442 * Rather than hide all in some function, I do this in
3434 * open coded manner. You see what this really does. 3443 * open coded manner. You see what this really does.
3435 * We have to guarantee mem->res.limit < mem->memsw.limit. 3444 * We have to guarantee memcg->res.limit < memcg->memsw.limit.
3436 */ 3445 */
3437 mutex_lock(&set_limit_mutex); 3446 mutex_lock(&set_limit_mutex);
3438 memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); 3447 memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
@@ -3494,7 +3503,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
3494 /* 3503 /*
3495 * Rather than hide all in some function, I do this in 3504 * Rather than hide all in some function, I do this in
3496 * open coded manner. You see what this really does. 3505 * open coded manner. You see what this really does.
3497 * We have to guarantee mem->res.limit < mem->memsw.limit. 3506 * We have to guarantee memcg->res.limit < memcg->memsw.limit.
3498 */ 3507 */
3499 mutex_lock(&set_limit_mutex); 3508 mutex_lock(&set_limit_mutex);
3500 memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); 3509 memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -3632,7 +3641,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3632 * This routine traverse page_cgroup in given list and drop them all. 3641 * This routine traverse page_cgroup in given list and drop them all.
3633 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 3642 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
3634 */ 3643 */
3635static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, 3644static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3636 int node, int zid, enum lru_list lru) 3645 int node, int zid, enum lru_list lru)
3637{ 3646{
3638 struct zone *zone; 3647 struct zone *zone;
@@ -3643,7 +3652,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
3643 int ret = 0; 3652 int ret = 0;
3644 3653
3645 zone = &NODE_DATA(node)->node_zones[zid]; 3654 zone = &NODE_DATA(node)->node_zones[zid];
3646 mz = mem_cgroup_zoneinfo(mem, node, zid); 3655 mz = mem_cgroup_zoneinfo(memcg, node, zid);
3647 list = &mz->lists[lru]; 3656 list = &mz->lists[lru];
3648 3657
3649 loop = MEM_CGROUP_ZSTAT(mz, lru); 3658 loop = MEM_CGROUP_ZSTAT(mz, lru);
@@ -3670,7 +3679,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
3670 3679
3671 page = lookup_cgroup_page(pc); 3680 page = lookup_cgroup_page(pc);
3672 3681
3673 ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); 3682 ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
3674 if (ret == -ENOMEM) 3683 if (ret == -ENOMEM)
3675 break; 3684 break;
3676 3685
@@ -3691,14 +3700,14 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
3691 * make mem_cgroup's charge to be 0 if there is no task. 3700 * make mem_cgroup's charge to be 0 if there is no task.
3692 * This enables deleting this mem_cgroup. 3701 * This enables deleting this mem_cgroup.
3693 */ 3702 */
3694static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) 3703static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all)
3695{ 3704{
3696 int ret; 3705 int ret;
3697 int node, zid, shrink; 3706 int node, zid, shrink;
3698 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 3707 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
3699 struct cgroup *cgrp = mem->css.cgroup; 3708 struct cgroup *cgrp = memcg->css.cgroup;
3700 3709
3701 css_get(&mem->css); 3710 css_get(&memcg->css);
3702 3711
3703 shrink = 0; 3712 shrink = 0;
3704 /* should free all ? */ 3713 /* should free all ? */
@@ -3714,14 +3723,14 @@ move_account:
3714 goto out; 3723 goto out;
3715 /* This is for making all *used* pages to be on LRU. */ 3724 /* This is for making all *used* pages to be on LRU. */
3716 lru_add_drain_all(); 3725 lru_add_drain_all();
3717 drain_all_stock_sync(mem); 3726 drain_all_stock_sync(memcg);
3718 ret = 0; 3727 ret = 0;
3719 mem_cgroup_start_move(mem); 3728 mem_cgroup_start_move(memcg);
3720 for_each_node_state(node, N_HIGH_MEMORY) { 3729 for_each_node_state(node, N_HIGH_MEMORY) {
3721 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { 3730 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
3722 enum lru_list l; 3731 enum lru_list l;
3723 for_each_lru(l) { 3732 for_each_lru(l) {
3724 ret = mem_cgroup_force_empty_list(mem, 3733 ret = mem_cgroup_force_empty_list(memcg,
3725 node, zid, l); 3734 node, zid, l);
3726 if (ret) 3735 if (ret)
3727 break; 3736 break;
@@ -3730,16 +3739,16 @@ move_account:
3730 if (ret) 3739 if (ret)
3731 break; 3740 break;
3732 } 3741 }
3733 mem_cgroup_end_move(mem); 3742 mem_cgroup_end_move(memcg);
3734 memcg_oom_recover(mem); 3743 memcg_oom_recover(memcg);
3735 /* it seems parent cgroup doesn't have enough mem */ 3744 /* it seems parent cgroup doesn't have enough mem */
3736 if (ret == -ENOMEM) 3745 if (ret == -ENOMEM)
3737 goto try_to_free; 3746 goto try_to_free;
3738 cond_resched(); 3747 cond_resched();
3739 /* "ret" should also be checked to ensure all lists are empty. */ 3748 /* "ret" should also be checked to ensure all lists are empty. */
3740 } while (mem->res.usage > 0 || ret); 3749 } while (memcg->res.usage > 0 || ret);
3741out: 3750out:
3742 css_put(&mem->css); 3751 css_put(&memcg->css);
3743 return ret; 3752 return ret;
3744 3753
3745try_to_free: 3754try_to_free:
@@ -3752,14 +3761,14 @@ try_to_free:
3752 lru_add_drain_all(); 3761 lru_add_drain_all();
3753 /* try to free all pages in this cgroup */ 3762 /* try to free all pages in this cgroup */
3754 shrink = 1; 3763 shrink = 1;
3755 while (nr_retries && mem->res.usage > 0) { 3764 while (nr_retries && memcg->res.usage > 0) {
3756 int progress; 3765 int progress;
3757 3766
3758 if (signal_pending(current)) { 3767 if (signal_pending(current)) {
3759 ret = -EINTR; 3768 ret = -EINTR;
3760 goto out; 3769 goto out;
3761 } 3770 }
3762 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, 3771 progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
3763 false); 3772 false);
3764 if (!progress) { 3773 if (!progress) {
3765 nr_retries--; 3774 nr_retries--;
@@ -3788,12 +3797,12 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
3788 u64 val) 3797 u64 val)
3789{ 3798{
3790 int retval = 0; 3799 int retval = 0;
3791 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 3800 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
3792 struct cgroup *parent = cont->parent; 3801 struct cgroup *parent = cont->parent;
3793 struct mem_cgroup *parent_mem = NULL; 3802 struct mem_cgroup *parent_memcg = NULL;
3794 3803
3795 if (parent) 3804 if (parent)
3796 parent_mem = mem_cgroup_from_cont(parent); 3805 parent_memcg = mem_cgroup_from_cont(parent);
3797 3806
3798 cgroup_lock(); 3807 cgroup_lock();
3799 /* 3808 /*
@@ -3804,10 +3813,10 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
3804 * For the root cgroup, parent_mem is NULL, we allow value to be 3813 * For the root cgroup, parent_mem is NULL, we allow value to be
3805 * set if there are no children. 3814 * set if there are no children.
3806 */ 3815 */
3807 if ((!parent_mem || !parent_mem->use_hierarchy) && 3816 if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
3808 (val == 1 || val == 0)) { 3817 (val == 1 || val == 0)) {
3809 if (list_empty(&cont->children)) 3818 if (list_empty(&cont->children))
3810 mem->use_hierarchy = val; 3819 memcg->use_hierarchy = val;
3811 else 3820 else
3812 retval = -EBUSY; 3821 retval = -EBUSY;
3813 } else 3822 } else
@@ -3818,14 +3827,14 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
3818} 3827}
3819 3828
3820 3829
3821static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem, 3830static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
3822 enum mem_cgroup_stat_index idx) 3831 enum mem_cgroup_stat_index idx)
3823{ 3832{
3824 struct mem_cgroup *iter; 3833 struct mem_cgroup *iter;
3825 long val = 0; 3834 long val = 0;
3826 3835
3827 /* Per-cpu values can be negative, use a signed accumulator */ 3836 /* Per-cpu values can be negative, use a signed accumulator */
3828 for_each_mem_cgroup_tree(iter, mem) 3837 for_each_mem_cgroup_tree(iter, memcg)
3829 val += mem_cgroup_read_stat(iter, idx); 3838 val += mem_cgroup_read_stat(iter, idx);
3830 3839
3831 if (val < 0) /* race ? */ 3840 if (val < 0) /* race ? */
@@ -3833,29 +3842,29 @@ static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem,
3833 return val; 3842 return val;
3834} 3843}
3835 3844
3836static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) 3845static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
3837{ 3846{
3838 u64 val; 3847 u64 val;
3839 3848
3840 if (!mem_cgroup_is_root(mem)) { 3849 if (!mem_cgroup_is_root(memcg)) {
3841 if (!swap) 3850 if (!swap)
3842 return res_counter_read_u64(&mem->res, RES_USAGE); 3851 return res_counter_read_u64(&memcg->res, RES_USAGE);
3843 else 3852 else
3844 return res_counter_read_u64(&mem->memsw, RES_USAGE); 3853 return res_counter_read_u64(&memcg->memsw, RES_USAGE);
3845 } 3854 }
3846 3855
3847 val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE); 3856 val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
3848 val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS); 3857 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
3849 3858
3850 if (swap) 3859 if (swap)
3851 val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT); 3860 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
3852 3861
3853 return val << PAGE_SHIFT; 3862 return val << PAGE_SHIFT;
3854} 3863}
3855 3864
3856static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 3865static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
3857{ 3866{
3858 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 3867 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
3859 u64 val; 3868 u64 val;
3860 int type, name; 3869 int type, name;
3861 3870
@@ -3864,15 +3873,15 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
3864 switch (type) { 3873 switch (type) {
3865 case _MEM: 3874 case _MEM:
3866 if (name == RES_USAGE) 3875 if (name == RES_USAGE)
3867 val = mem_cgroup_usage(mem, false); 3876 val = mem_cgroup_usage(memcg, false);
3868 else 3877 else
3869 val = res_counter_read_u64(&mem->res, name); 3878 val = res_counter_read_u64(&memcg->res, name);
3870 break; 3879 break;
3871 case _MEMSWAP: 3880 case _MEMSWAP:
3872 if (name == RES_USAGE) 3881 if (name == RES_USAGE)
3873 val = mem_cgroup_usage(mem, true); 3882 val = mem_cgroup_usage(memcg, true);
3874 else 3883 else
3875 val = res_counter_read_u64(&mem->memsw, name); 3884 val = res_counter_read_u64(&memcg->memsw, name);
3876 break; 3885 break;
3877 default: 3886 default:
3878 BUG(); 3887 BUG();
@@ -3960,24 +3969,24 @@ out:
3960 3969
3961static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) 3970static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
3962{ 3971{
3963 struct mem_cgroup *mem; 3972 struct mem_cgroup *memcg;
3964 int type, name; 3973 int type, name;
3965 3974
3966 mem = mem_cgroup_from_cont(cont); 3975 memcg = mem_cgroup_from_cont(cont);
3967 type = MEMFILE_TYPE(event); 3976 type = MEMFILE_TYPE(event);
3968 name = MEMFILE_ATTR(event); 3977 name = MEMFILE_ATTR(event);
3969 switch (name) { 3978 switch (name) {
3970 case RES_MAX_USAGE: 3979 case RES_MAX_USAGE:
3971 if (type == _MEM) 3980 if (type == _MEM)
3972 res_counter_reset_max(&mem->res); 3981 res_counter_reset_max(&memcg->res);
3973 else 3982 else
3974 res_counter_reset_max(&mem->memsw); 3983 res_counter_reset_max(&memcg->memsw);
3975 break; 3984 break;
3976 case RES_FAILCNT: 3985 case RES_FAILCNT:
3977 if (type == _MEM) 3986 if (type == _MEM)
3978 res_counter_reset_failcnt(&mem->res); 3987 res_counter_reset_failcnt(&memcg->res);
3979 else 3988 else
3980 res_counter_reset_failcnt(&mem->memsw); 3989 res_counter_reset_failcnt(&memcg->memsw);
3981 break; 3990 break;
3982 } 3991 }
3983 3992
@@ -3994,7 +4003,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
3994static int mem_cgroup_move_charge_write(struct cgroup *cgrp, 4003static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
3995 struct cftype *cft, u64 val) 4004 struct cftype *cft, u64 val)
3996{ 4005{
3997 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); 4006 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
3998 4007
3999 if (val >= (1 << NR_MOVE_TYPE)) 4008 if (val >= (1 << NR_MOVE_TYPE))
4000 return -EINVAL; 4009 return -EINVAL;
@@ -4004,7 +4013,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
4004 * inconsistent. 4013 * inconsistent.
4005 */ 4014 */
4006 cgroup_lock(); 4015 cgroup_lock();
4007 mem->move_charge_at_immigrate = val; 4016 memcg->move_charge_at_immigrate = val;
4008 cgroup_unlock(); 4017 cgroup_unlock();
4009 4018
4010 return 0; 4019 return 0;
@@ -4061,49 +4070,49 @@ struct {
4061 4070
4062 4071
4063static void 4072static void
4064mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) 4073mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
4065{ 4074{
4066 s64 val; 4075 s64 val;
4067 4076
4068 /* per cpu stat */ 4077 /* per cpu stat */
4069 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); 4078 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
4070 s->stat[MCS_CACHE] += val * PAGE_SIZE; 4079 s->stat[MCS_CACHE] += val * PAGE_SIZE;
4071 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); 4080 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS);
4072 s->stat[MCS_RSS] += val * PAGE_SIZE; 4081 s->stat[MCS_RSS] += val * PAGE_SIZE;
4073 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED); 4082 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
4074 s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; 4083 s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
4075 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGIN); 4084 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN);
4076 s->stat[MCS_PGPGIN] += val; 4085 s->stat[MCS_PGPGIN] += val;
4077 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGOUT); 4086 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT);
4078 s->stat[MCS_PGPGOUT] += val; 4087 s->stat[MCS_PGPGOUT] += val;
4079 if (do_swap_account) { 4088 if (do_swap_account) {
4080 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT); 4089 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
4081 s->stat[MCS_SWAP] += val * PAGE_SIZE; 4090 s->stat[MCS_SWAP] += val * PAGE_SIZE;
4082 } 4091 }
4083 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT); 4092 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT);
4084 s->stat[MCS_PGFAULT] += val; 4093 s->stat[MCS_PGFAULT] += val;
4085 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT); 4094 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT);
4086 s->stat[MCS_PGMAJFAULT] += val; 4095 s->stat[MCS_PGMAJFAULT] += val;
4087 4096
4088 /* per zone stat */ 4097 /* per zone stat */
4089 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON)); 4098 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
4090 s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; 4099 s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE;
4091 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_ANON)); 4100 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
4092 s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; 4101 s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE;
4093 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_FILE)); 4102 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
4094 s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; 4103 s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE;
4095 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_FILE)); 4104 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
4096 s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; 4105 s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
4097 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_UNEVICTABLE)); 4106 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
4098 s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; 4107 s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
4099} 4108}
4100 4109
4101static void 4110static void
4102mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) 4111mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
4103{ 4112{
4104 struct mem_cgroup *iter; 4113 struct mem_cgroup *iter;
4105 4114
4106 for_each_mem_cgroup_tree(iter, mem) 4115 for_each_mem_cgroup_tree(iter, memcg)
4107 mem_cgroup_get_local_stat(iter, s); 4116 mem_cgroup_get_local_stat(iter, s);
4108} 4117}
4109 4118
@@ -4189,8 +4198,6 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
4189 } 4198 }
4190 4199
4191#ifdef CONFIG_DEBUG_VM 4200#ifdef CONFIG_DEBUG_VM
4192 cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
4193
4194 { 4201 {
4195 int nid, zid; 4202 int nid, zid;
4196 struct mem_cgroup_per_zone *mz; 4203 struct mem_cgroup_per_zone *mz;
@@ -4327,20 +4334,20 @@ static int compare_thresholds(const void *a, const void *b)
4327 return _a->threshold - _b->threshold; 4334 return _a->threshold - _b->threshold;
4328} 4335}
4329 4336
4330static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) 4337static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
4331{ 4338{
4332 struct mem_cgroup_eventfd_list *ev; 4339 struct mem_cgroup_eventfd_list *ev;
4333 4340
4334 list_for_each_entry(ev, &mem->oom_notify, list) 4341 list_for_each_entry(ev, &memcg->oom_notify, list)
4335 eventfd_signal(ev->eventfd, 1); 4342 eventfd_signal(ev->eventfd, 1);
4336 return 0; 4343 return 0;
4337} 4344}
4338 4345
4339static void mem_cgroup_oom_notify(struct mem_cgroup *mem) 4346static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
4340{ 4347{
4341 struct mem_cgroup *iter; 4348 struct mem_cgroup *iter;
4342 4349
4343 for_each_mem_cgroup_tree(iter, mem) 4350 for_each_mem_cgroup_tree(iter, memcg)
4344 mem_cgroup_oom_notify_cb(iter); 4351 mem_cgroup_oom_notify_cb(iter);
4345} 4352}
4346 4353
@@ -4530,7 +4537,7 @@ static int mem_cgroup_oom_register_event(struct cgroup *cgrp,
4530static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, 4537static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
4531 struct cftype *cft, struct eventfd_ctx *eventfd) 4538 struct cftype *cft, struct eventfd_ctx *eventfd)
4532{ 4539{
4533 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); 4540 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
4534 struct mem_cgroup_eventfd_list *ev, *tmp; 4541 struct mem_cgroup_eventfd_list *ev, *tmp;
4535 int type = MEMFILE_TYPE(cft->private); 4542 int type = MEMFILE_TYPE(cft->private);
4536 4543
@@ -4538,7 +4545,7 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
4538 4545
4539 spin_lock(&memcg_oom_lock); 4546 spin_lock(&memcg_oom_lock);
4540 4547
4541 list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) { 4548 list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) {
4542 if (ev->eventfd == eventfd) { 4549 if (ev->eventfd == eventfd) {
4543 list_del(&ev->list); 4550 list_del(&ev->list);
4544 kfree(ev); 4551 kfree(ev);
@@ -4551,11 +4558,11 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
4551static int mem_cgroup_oom_control_read(struct cgroup *cgrp, 4558static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
4552 struct cftype *cft, struct cgroup_map_cb *cb) 4559 struct cftype *cft, struct cgroup_map_cb *cb)
4553{ 4560{
4554 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); 4561 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
4555 4562
4556 cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable); 4563 cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable);
4557 4564
4558 if (atomic_read(&mem->under_oom)) 4565 if (atomic_read(&memcg->under_oom))
4559 cb->fill(cb, "under_oom", 1); 4566 cb->fill(cb, "under_oom", 1);
4560 else 4567 else
4561 cb->fill(cb, "under_oom", 0); 4568 cb->fill(cb, "under_oom", 0);
@@ -4565,7 +4572,7 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
4565static int mem_cgroup_oom_control_write(struct cgroup *cgrp, 4572static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
4566 struct cftype *cft, u64 val) 4573 struct cftype *cft, u64 val)
4567{ 4574{
4568 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); 4575 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
4569 struct mem_cgroup *parent; 4576 struct mem_cgroup *parent;
4570 4577
4571 /* cannot set to root cgroup and only 0 and 1 are allowed */ 4578 /* cannot set to root cgroup and only 0 and 1 are allowed */
@@ -4577,13 +4584,13 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
4577 cgroup_lock(); 4584 cgroup_lock();
4578 /* oom-kill-disable is a flag for subhierarchy. */ 4585 /* oom-kill-disable is a flag for subhierarchy. */
4579 if ((parent->use_hierarchy) || 4586 if ((parent->use_hierarchy) ||
4580 (mem->use_hierarchy && !list_empty(&cgrp->children))) { 4587 (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
4581 cgroup_unlock(); 4588 cgroup_unlock();
4582 return -EINVAL; 4589 return -EINVAL;
4583 } 4590 }
4584 mem->oom_kill_disable = val; 4591 memcg->oom_kill_disable = val;
4585 if (!val) 4592 if (!val)
4586 memcg_oom_recover(mem); 4593 memcg_oom_recover(memcg);
4587 cgroup_unlock(); 4594 cgroup_unlock();
4588 return 0; 4595 return 0;
4589} 4596}
@@ -4719,7 +4726,7 @@ static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
4719} 4726}
4720#endif 4727#endif
4721 4728
4722static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) 4729static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4723{ 4730{
4724 struct mem_cgroup_per_node *pn; 4731 struct mem_cgroup_per_node *pn;
4725 struct mem_cgroup_per_zone *mz; 4732 struct mem_cgroup_per_zone *mz;
@@ -4739,21 +4746,21 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
4739 if (!pn) 4746 if (!pn)
4740 return 1; 4747 return 1;
4741 4748
4742 mem->info.nodeinfo[node] = pn;
4743 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 4749 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
4744 mz = &pn->zoneinfo[zone]; 4750 mz = &pn->zoneinfo[zone];
4745 for_each_lru(l) 4751 for_each_lru(l)
4746 INIT_LIST_HEAD(&mz->lists[l]); 4752 INIT_LIST_HEAD(&mz->lists[l]);
4747 mz->usage_in_excess = 0; 4753 mz->usage_in_excess = 0;
4748 mz->on_tree = false; 4754 mz->on_tree = false;
4749 mz->mem = mem; 4755 mz->mem = memcg;
4750 } 4756 }
4757 memcg->info.nodeinfo[node] = pn;
4751 return 0; 4758 return 0;
4752} 4759}
4753 4760
4754static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) 4761static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4755{ 4762{
4756 kfree(mem->info.nodeinfo[node]); 4763 kfree(memcg->info.nodeinfo[node]);
4757} 4764}
4758 4765
4759static struct mem_cgroup *mem_cgroup_alloc(void) 4766static struct mem_cgroup *mem_cgroup_alloc(void)
@@ -4795,51 +4802,51 @@ out_free:
4795 * Removal of cgroup itself succeeds regardless of refs from swap. 4802 * Removal of cgroup itself succeeds regardless of refs from swap.
4796 */ 4803 */
4797 4804
4798static void __mem_cgroup_free(struct mem_cgroup *mem) 4805static void __mem_cgroup_free(struct mem_cgroup *memcg)
4799{ 4806{
4800 int node; 4807 int node;
4801 4808
4802 mem_cgroup_remove_from_trees(mem); 4809 mem_cgroup_remove_from_trees(memcg);
4803 free_css_id(&mem_cgroup_subsys, &mem->css); 4810 free_css_id(&mem_cgroup_subsys, &memcg->css);
4804 4811
4805 for_each_node_state(node, N_POSSIBLE) 4812 for_each_node_state(node, N_POSSIBLE)
4806 free_mem_cgroup_per_zone_info(mem, node); 4813 free_mem_cgroup_per_zone_info(memcg, node);
4807 4814
4808 free_percpu(mem->stat); 4815 free_percpu(memcg->stat);
4809 if (sizeof(struct mem_cgroup) < PAGE_SIZE) 4816 if (sizeof(struct mem_cgroup) < PAGE_SIZE)
4810 kfree(mem); 4817 kfree(memcg);
4811 else 4818 else
4812 vfree(mem); 4819 vfree(memcg);
4813} 4820}
4814 4821
4815static void mem_cgroup_get(struct mem_cgroup *mem) 4822static void mem_cgroup_get(struct mem_cgroup *memcg)
4816{ 4823{
4817 atomic_inc(&mem->refcnt); 4824 atomic_inc(&memcg->refcnt);
4818} 4825}
4819 4826
4820static void __mem_cgroup_put(struct mem_cgroup *mem, int count) 4827static void __mem_cgroup_put(struct mem_cgroup *memcg, int count)
4821{ 4828{
4822 if (atomic_sub_and_test(count, &mem->refcnt)) { 4829 if (atomic_sub_and_test(count, &memcg->refcnt)) {
4823 struct mem_cgroup *parent = parent_mem_cgroup(mem); 4830 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
4824 __mem_cgroup_free(mem); 4831 __mem_cgroup_free(memcg);
4825 if (parent) 4832 if (parent)
4826 mem_cgroup_put(parent); 4833 mem_cgroup_put(parent);
4827 } 4834 }
4828} 4835}
4829 4836
4830static void mem_cgroup_put(struct mem_cgroup *mem) 4837static void mem_cgroup_put(struct mem_cgroup *memcg)
4831{ 4838{
4832 __mem_cgroup_put(mem, 1); 4839 __mem_cgroup_put(memcg, 1);
4833} 4840}
4834 4841
4835/* 4842/*
4836 * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. 4843 * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
4837 */ 4844 */
4838static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem) 4845static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
4839{ 4846{
4840 if (!mem->res.parent) 4847 if (!memcg->res.parent)
4841 return NULL; 4848 return NULL;
4842 return mem_cgroup_from_res_counter(mem->res.parent, res); 4849 return mem_cgroup_from_res_counter(memcg->res.parent, res);
4843} 4850}
4844 4851
4845#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4852#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -4882,16 +4889,16 @@ static int mem_cgroup_soft_limit_tree_init(void)
4882static struct cgroup_subsys_state * __ref 4889static struct cgroup_subsys_state * __ref
4883mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 4890mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4884{ 4891{
4885 struct mem_cgroup *mem, *parent; 4892 struct mem_cgroup *memcg, *parent;
4886 long error = -ENOMEM; 4893 long error = -ENOMEM;
4887 int node; 4894 int node;
4888 4895
4889 mem = mem_cgroup_alloc(); 4896 memcg = mem_cgroup_alloc();
4890 if (!mem) 4897 if (!memcg)
4891 return ERR_PTR(error); 4898 return ERR_PTR(error);
4892 4899
4893 for_each_node_state(node, N_POSSIBLE) 4900 for_each_node_state(node, N_POSSIBLE)
4894 if (alloc_mem_cgroup_per_zone_info(mem, node)) 4901 if (alloc_mem_cgroup_per_zone_info(memcg, node))
4895 goto free_out; 4902 goto free_out;
4896 4903
4897 /* root ? */ 4904 /* root ? */
@@ -4899,7 +4906,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4899 int cpu; 4906 int cpu;
4900 enable_swap_cgroup(); 4907 enable_swap_cgroup();
4901 parent = NULL; 4908 parent = NULL;
4902 root_mem_cgroup = mem; 4909 root_mem_cgroup = memcg;
4903 if (mem_cgroup_soft_limit_tree_init()) 4910 if (mem_cgroup_soft_limit_tree_init())
4904 goto free_out; 4911 goto free_out;
4905 for_each_possible_cpu(cpu) { 4912 for_each_possible_cpu(cpu) {
@@ -4910,13 +4917,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4910 hotcpu_notifier(memcg_cpu_hotplug_callback, 0); 4917 hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
4911 } else { 4918 } else {
4912 parent = mem_cgroup_from_cont(cont->parent); 4919 parent = mem_cgroup_from_cont(cont->parent);
4913 mem->use_hierarchy = parent->use_hierarchy; 4920 memcg->use_hierarchy = parent->use_hierarchy;
4914 mem->oom_kill_disable = parent->oom_kill_disable; 4921 memcg->oom_kill_disable = parent->oom_kill_disable;
4915 } 4922 }
4916 4923
4917 if (parent && parent->use_hierarchy) { 4924 if (parent && parent->use_hierarchy) {
4918 res_counter_init(&mem->res, &parent->res); 4925 res_counter_init(&memcg->res, &parent->res);
4919 res_counter_init(&mem->memsw, &parent->memsw); 4926 res_counter_init(&memcg->memsw, &parent->memsw);
4920 /* 4927 /*
4921 * We increment refcnt of the parent to ensure that we can 4928 * We increment refcnt of the parent to ensure that we can
4922 * safely access it on res_counter_charge/uncharge. 4929 * safely access it on res_counter_charge/uncharge.
@@ -4925,21 +4932,21 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4925 */ 4932 */
4926 mem_cgroup_get(parent); 4933 mem_cgroup_get(parent);
4927 } else { 4934 } else {
4928 res_counter_init(&mem->res, NULL); 4935 res_counter_init(&memcg->res, NULL);
4929 res_counter_init(&mem->memsw, NULL); 4936 res_counter_init(&memcg->memsw, NULL);
4930 } 4937 }
4931 mem->last_scanned_child = 0; 4938 memcg->last_scanned_child = 0;
4932 mem->last_scanned_node = MAX_NUMNODES; 4939 memcg->last_scanned_node = MAX_NUMNODES;
4933 INIT_LIST_HEAD(&mem->oom_notify); 4940 INIT_LIST_HEAD(&memcg->oom_notify);
4934 4941
4935 if (parent) 4942 if (parent)
4936 mem->swappiness = mem_cgroup_swappiness(parent); 4943 memcg->swappiness = mem_cgroup_swappiness(parent);
4937 atomic_set(&mem->refcnt, 1); 4944 atomic_set(&memcg->refcnt, 1);
4938 mem->move_charge_at_immigrate = 0; 4945 memcg->move_charge_at_immigrate = 0;
4939 mutex_init(&mem->thresholds_lock); 4946 mutex_init(&memcg->thresholds_lock);
4940 return &mem->css; 4947 return &memcg->css;
4941free_out: 4948free_out:
4942 __mem_cgroup_free(mem); 4949 __mem_cgroup_free(memcg);
4943 root_mem_cgroup = NULL; 4950 root_mem_cgroup = NULL;
4944 return ERR_PTR(error); 4951 return ERR_PTR(error);
4945} 4952}
@@ -4947,17 +4954,17 @@ free_out:
4947static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, 4954static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
4948 struct cgroup *cont) 4955 struct cgroup *cont)
4949{ 4956{
4950 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 4957 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
4951 4958
4952 return mem_cgroup_force_empty(mem, false); 4959 return mem_cgroup_force_empty(memcg, false);
4953} 4960}
4954 4961
4955static void mem_cgroup_destroy(struct cgroup_subsys *ss, 4962static void mem_cgroup_destroy(struct cgroup_subsys *ss,
4956 struct cgroup *cont) 4963 struct cgroup *cont)
4957{ 4964{
4958 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 4965 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
4959 4966
4960 mem_cgroup_put(mem); 4967 mem_cgroup_put(memcg);
4961} 4968}
4962 4969
4963static int mem_cgroup_populate(struct cgroup_subsys *ss, 4970static int mem_cgroup_populate(struct cgroup_subsys *ss,
@@ -4980,9 +4987,9 @@ static int mem_cgroup_do_precharge(unsigned long count)
4980{ 4987{
4981 int ret = 0; 4988 int ret = 0;
4982 int batch_count = PRECHARGE_COUNT_AT_ONCE; 4989 int batch_count = PRECHARGE_COUNT_AT_ONCE;
4983 struct mem_cgroup *mem = mc.to; 4990 struct mem_cgroup *memcg = mc.to;
4984 4991
4985 if (mem_cgroup_is_root(mem)) { 4992 if (mem_cgroup_is_root(memcg)) {
4986 mc.precharge += count; 4993 mc.precharge += count;
4987 /* we don't need css_get for root */ 4994 /* we don't need css_get for root */
4988 return ret; 4995 return ret;
@@ -4991,16 +4998,16 @@ static int mem_cgroup_do_precharge(unsigned long count)
4991 if (count > 1) { 4998 if (count > 1) {
4992 struct res_counter *dummy; 4999 struct res_counter *dummy;
4993 /* 5000 /*
4994 * "mem" cannot be under rmdir() because we've already checked 5001 * "memcg" cannot be under rmdir() because we've already checked
4995 * by cgroup_lock_live_cgroup() that it is not removed and we 5002 * by cgroup_lock_live_cgroup() that it is not removed and we
4996 * are still under the same cgroup_mutex. So we can postpone 5003 * are still under the same cgroup_mutex. So we can postpone
4997 * css_get(). 5004 * css_get().
4998 */ 5005 */
4999 if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy)) 5006 if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
5000 goto one_by_one; 5007 goto one_by_one;
5001 if (do_swap_account && res_counter_charge(&mem->memsw, 5008 if (do_swap_account && res_counter_charge(&memcg->memsw,
5002 PAGE_SIZE * count, &dummy)) { 5009 PAGE_SIZE * count, &dummy)) {
5003 res_counter_uncharge(&mem->res, PAGE_SIZE * count); 5010 res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
5004 goto one_by_one; 5011 goto one_by_one;
5005 } 5012 }
5006 mc.precharge += count; 5013 mc.precharge += count;
@@ -5017,8 +5024,9 @@ one_by_one:
5017 batch_count = PRECHARGE_COUNT_AT_ONCE; 5024 batch_count = PRECHARGE_COUNT_AT_ONCE;
5018 cond_resched(); 5025 cond_resched();
5019 } 5026 }
5020 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &mem, false); 5027 ret = __mem_cgroup_try_charge(NULL,
5021 if (ret || !mem) 5028 GFP_KERNEL, 1, &memcg, false);
5029 if (ret || !memcg)
5022 /* mem_cgroup_clear_mc() will do uncharge later */ 5030 /* mem_cgroup_clear_mc() will do uncharge later */
5023 return -ENOMEM; 5031 return -ENOMEM;
5024 mc.precharge++; 5032 mc.precharge++;
@@ -5292,13 +5300,13 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
5292 struct task_struct *p) 5300 struct task_struct *p)
5293{ 5301{
5294 int ret = 0; 5302 int ret = 0;
5295 struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup); 5303 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
5296 5304
5297 if (mem->move_charge_at_immigrate) { 5305 if (memcg->move_charge_at_immigrate) {
5298 struct mm_struct *mm; 5306 struct mm_struct *mm;
5299 struct mem_cgroup *from = mem_cgroup_from_task(p); 5307 struct mem_cgroup *from = mem_cgroup_from_task(p);
5300 5308
5301 VM_BUG_ON(from == mem); 5309 VM_BUG_ON(from == memcg);
5302 5310
5303 mm = get_task_mm(p); 5311 mm = get_task_mm(p);
5304 if (!mm) 5312 if (!mm)
@@ -5313,7 +5321,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
5313 mem_cgroup_start_move(from); 5321 mem_cgroup_start_move(from);
5314 spin_lock(&mc.lock); 5322 spin_lock(&mc.lock);
5315 mc.from = from; 5323 mc.from = from;
5316 mc.to = mem; 5324 mc.to = memcg;
5317 spin_unlock(&mc.lock); 5325 spin_unlock(&mc.lock);
5318 /* We set mc.moving_task later */ 5326 /* We set mc.moving_task later */
5319 5327
diff --git a/mm/memory.c b/mm/memory.c
index a56e3ba816b2..b2b87315cdc6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1503,7 +1503,7 @@ split_fallthrough:
1503 } 1503 }
1504 1504
1505 if (flags & FOLL_GET) 1505 if (flags & FOLL_GET)
1506 get_page(page); 1506 get_page_foll(page);
1507 if (flags & FOLL_TOUCH) { 1507 if (flags & FOLL_TOUCH) {
1508 if ((flags & FOLL_WRITE) && 1508 if ((flags & FOLL_WRITE) &&
1509 !pte_dirty(pte) && !PageDirty(page)) 1509 !pte_dirty(pte) && !PageDirty(page))
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6bdc67dbbc28..2d123f94a8df 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -133,10 +133,13 @@ struct page *lookup_cgroup_page(struct page_cgroup *pc)
133static void *__meminit alloc_page_cgroup(size_t size, int nid) 133static void *__meminit alloc_page_cgroup(size_t size, int nid)
134{ 134{
135 void *addr = NULL; 135 void *addr = NULL;
136 gfp_t flags = GFP_KERNEL | __GFP_NOWARN;
136 137
137 addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN); 138 addr = alloc_pages_exact_nid(nid, size, flags);
138 if (addr) 139 if (addr) {
140 kmemleak_alloc(addr, size, 1, flags);
139 return addr; 141 return addr;
142 }
140 143
141 if (node_state(nid, N_HIGH_MEMORY)) 144 if (node_state(nid, N_HIGH_MEMORY))
142 addr = vmalloc_node(size, nid); 145 addr = vmalloc_node(size, nid);
@@ -357,7 +360,7 @@ struct swap_cgroup_ctrl {
357 spinlock_t lock; 360 spinlock_t lock;
358}; 361};
359 362
360struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; 363static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
361 364
362struct swap_cgroup { 365struct swap_cgroup {
363 unsigned short id; 366 unsigned short id;
diff --git a/mm/swap.c b/mm/swap.c
index 3a442f18b0b3..87627f181c3f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -78,39 +78,22 @@ static void put_compound_page(struct page *page)
78{ 78{
79 if (unlikely(PageTail(page))) { 79 if (unlikely(PageTail(page))) {
80 /* __split_huge_page_refcount can run under us */ 80 /* __split_huge_page_refcount can run under us */
81 struct page *page_head = page->first_page; 81 struct page *page_head = compound_trans_head(page);
82 smp_rmb(); 82
83 /* 83 if (likely(page != page_head &&
84 * If PageTail is still set after smp_rmb() we can be sure 84 get_page_unless_zero(page_head))) {
85 * that the page->first_page we read wasn't a dangling pointer.
86 * See __split_huge_page_refcount() smp_wmb().
87 */
88 if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
89 unsigned long flags; 85 unsigned long flags;
90 /* 86 /*
91 * Verify that our page_head wasn't converted 87 * page_head wasn't a dangling pointer but it
92 * to a a regular page before we got a 88 * may not be a head page anymore by the time
93 * reference on it. 89 * we obtain the lock. That is ok as long as it
90 * can't be freed from under us.
94 */ 91 */
95 if (unlikely(!PageHead(page_head))) {
96 /* PageHead is cleared after PageTail */
97 smp_rmb();
98 VM_BUG_ON(PageTail(page));
99 goto out_put_head;
100 }
101 /*
102 * Only run compound_lock on a valid PageHead,
103 * after having it pinned with
104 * get_page_unless_zero() above.
105 */
106 smp_mb();
107 /* page_head wasn't a dangling pointer */
108 flags = compound_lock_irqsave(page_head); 92 flags = compound_lock_irqsave(page_head);
109 if (unlikely(!PageTail(page))) { 93 if (unlikely(!PageTail(page))) {
110 /* __split_huge_page_refcount run before us */ 94 /* __split_huge_page_refcount run before us */
111 compound_unlock_irqrestore(page_head, flags); 95 compound_unlock_irqrestore(page_head, flags);
112 VM_BUG_ON(PageHead(page_head)); 96 VM_BUG_ON(PageHead(page_head));
113 out_put_head:
114 if (put_page_testzero(page_head)) 97 if (put_page_testzero(page_head))
115 __put_single_page(page_head); 98 __put_single_page(page_head);
116 out_put_single: 99 out_put_single:
@@ -121,16 +104,17 @@ static void put_compound_page(struct page *page)
121 VM_BUG_ON(page_head != page->first_page); 104 VM_BUG_ON(page_head != page->first_page);
122 /* 105 /*
123 * We can release the refcount taken by 106 * We can release the refcount taken by
124 * get_page_unless_zero now that 107 * get_page_unless_zero() now that
125 * split_huge_page_refcount is blocked on the 108 * __split_huge_page_refcount() is blocked on
126 * compound_lock. 109 * the compound_lock.
127 */ 110 */
128 if (put_page_testzero(page_head)) 111 if (put_page_testzero(page_head))
129 VM_BUG_ON(1); 112 VM_BUG_ON(1);
130 /* __split_huge_page_refcount will wait now */ 113 /* __split_huge_page_refcount will wait now */
131 VM_BUG_ON(atomic_read(&page->_count) <= 0); 114 VM_BUG_ON(page_mapcount(page) <= 0);
132 atomic_dec(&page->_count); 115 atomic_dec(&page->_mapcount);
133 VM_BUG_ON(atomic_read(&page_head->_count) <= 0); 116 VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
117 VM_BUG_ON(atomic_read(&page->_count) != 0);
134 compound_unlock_irqrestore(page_head, flags); 118 compound_unlock_irqrestore(page_head, flags);
135 if (put_page_testzero(page_head)) { 119 if (put_page_testzero(page_head)) {
136 if (PageHead(page_head)) 120 if (PageHead(page_head))
@@ -160,6 +144,45 @@ void put_page(struct page *page)
160} 144}
161EXPORT_SYMBOL(put_page); 145EXPORT_SYMBOL(put_page);
162 146
147/*
148 * This function is exported but must not be called by anything other
149 * than get_page(). It implements the slow path of get_page().
150 */
151bool __get_page_tail(struct page *page)
152{
153 /*
154 * This takes care of get_page() if run on a tail page
155 * returned by one of the get_user_pages/follow_page variants.
156 * get_user_pages/follow_page itself doesn't need the compound
157 * lock because it runs __get_page_tail_foll() under the
158 * proper PT lock that already serializes against
159 * split_huge_page().
160 */
161 unsigned long flags;
162 bool got = false;
163 struct page *page_head = compound_trans_head(page);
164
165 if (likely(page != page_head && get_page_unless_zero(page_head))) {
166 /*
167 * page_head wasn't a dangling pointer but it
168 * may not be a head page anymore by the time
169 * we obtain the lock. That is ok as long as it
170 * can't be freed from under us.
171 */
172 flags = compound_lock_irqsave(page_head);
173 /* here __split_huge_page_refcount won't run anymore */
174 if (likely(PageTail(page))) {
175 __get_page_tail_foll(page, false);
176 got = true;
177 }
178 compound_unlock_irqrestore(page_head, flags);
179 if (unlikely(!got))
180 put_page(page_head);
181 }
182 return got;
183}
184EXPORT_SYMBOL(__get_page_tail);
185
163/** 186/**
164 * put_pages_list() - release a list of pages 187 * put_pages_list() - release a list of pages
165 * @pages: list of pages threaded on page->lru 188 * @pages: list of pages threaded on page->lru
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a90c603a8d02..132d1ddb2238 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1767,7 +1767,7 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1767 if (scanning_global_lru(sc)) 1767 if (scanning_global_lru(sc))
1768 low = inactive_anon_is_low_global(zone); 1768 low = inactive_anon_is_low_global(zone);
1769 else 1769 else
1770 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); 1770 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
1771 return low; 1771 return low;
1772} 1772}
1773#else 1773#else
@@ -1810,7 +1810,7 @@ static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
1810 if (scanning_global_lru(sc)) 1810 if (scanning_global_lru(sc))
1811 low = inactive_file_is_low_global(zone); 1811 low = inactive_file_is_low_global(zone);
1812 else 1812 else
1813 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup); 1813 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
1814 return low; 1814 return low;
1815} 1815}
1816 1816