aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2009-01-09 03:38:15 -0500
committerLen Brown <len.brown@intel.com>2009-01-09 03:38:15 -0500
commit3cc8a5f4ba91f67bbdb81a43a99281a26aab8d77 (patch)
tree82b60e24ce5457e2e21e00bfa3b2f171d7eb97ce
parentd0302bc62af7983040ea1cd1b5dd2b36e1a6a509 (diff)
parentada9cfdd158abb8169873dc8e5ae39b1ec6ffa8c (diff)
Merge branch 'suspend' into release
-rw-r--r--Documentation/kernel-parameters.txt45
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/e820.c21
-rw-r--r--drivers/acpi/main.c67
-rw-r--r--include/linux/acpi.h1
-rw-r--r--include/linux/suspend.h13
-rw-r--r--kernel/power/disk.c6
-rw-r--r--kernel/power/snapshot.c370
-rw-r--r--kernel/power/swsusp.c122
9 files changed, 446 insertions, 201 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 136f02842de9..f3a89fac2202 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -150,16 +150,20 @@ and is between 256 and 4096 characters. It is defined in the file
150 default: 0 150 default: 0
151 151
152 acpi_sleep= [HW,ACPI] Sleep options 152 acpi_sleep= [HW,ACPI] Sleep options
153 Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering } 153 Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
154 See Documentation/power/video.txt for s3_bios and s3_mode. 154 old_ordering, s4_nonvs }
155 See Documentation/power/video.txt for information on
156 s3_bios and s3_mode.
155 s3_beep is for debugging; it makes the PC's speaker beep 157 s3_beep is for debugging; it makes the PC's speaker beep
156 as soon as the kernel's real-mode entry point is called. 158 as soon as the kernel's real-mode entry point is called.
157 s4_nohwsig prevents ACPI hardware signature from being 159 s4_nohwsig prevents ACPI hardware signature from being
158 used during resume from hibernation. 160 used during resume from hibernation.
159 old_ordering causes the ACPI 1.0 ordering of the _PTS 161 old_ordering causes the ACPI 1.0 ordering of the _PTS
160 control method, wrt putting devices into low power 162 control method, with respect to putting devices into
161 states, to be enforced (the ACPI 2.0 ordering of _PTS is 163 low power states, to be enforced (the ACPI 2.0 ordering
162 used by default). 164 of _PTS is used by default).
165 s4_nonvs prevents the kernel from saving/restoring the
166 ACPI NVS memory during hibernation.
163 167
164 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode 168 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
165 Format: { level | edge | high | low } 169 Format: { level | edge | high | low }
@@ -194,7 +198,7 @@ and is between 256 and 4096 characters. It is defined in the file
194 acpi_skip_timer_override [HW,ACPI] 198 acpi_skip_timer_override [HW,ACPI]
195 Recognize and ignore IRQ0/pin2 Interrupt Override. 199 Recognize and ignore IRQ0/pin2 Interrupt Override.
196 For broken nForce2 BIOS resulting in XT-PIC timer. 200 For broken nForce2 BIOS resulting in XT-PIC timer.
197 acpi_use_timer_override [HW,ACPI} 201 acpi_use_timer_override [HW,ACPI]
198 Use timer override. For some broken Nvidia NF5 boards 202 Use timer override. For some broken Nvidia NF5 boards
199 that require a timer override, but don't have 203 that require a timer override, but don't have
200 HPET 204 HPET
@@ -861,17 +865,19 @@ and is between 256 and 4096 characters. It is defined in the file
861 See Documentation/ide/ide.txt. 865 See Documentation/ide/ide.txt.
862 866
863 idle= [X86] 867 idle= [X86]
864 Format: idle=poll or idle=mwait, idle=halt, idle=nomwait 868 Format: idle=poll, idle=mwait, idle=halt, idle=nomwait
865 Poll forces a polling idle loop that can slightly improves the performance 869 Poll forces a polling idle loop that can slightly
866 of waking up a idle CPU, but will use a lot of power and make the system 870 improve the performance of waking up a idle CPU, but
867 run hot. Not recommended. 871 will use a lot of power and make the system run hot.
868 idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose 872 Not recommended.
869 to not use it because it doesn't save as much power as a normal idle 873 idle=mwait: On systems which support MONITOR/MWAIT but
870 loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same 874 the kernel chose to not use it because it doesn't save
871 as idle=poll. 875 as much power as a normal idle loop, use the
872 idle=halt. Halt is forced to be used for CPU idle. 876 MONITOR/MWAIT idle loop anyways. Performance should be
877 the same as idle=poll.
878 idle=halt: Halt is forced to be used for CPU idle.
873 In such case C2/C3 won't be used again. 879 In such case C2/C3 won't be used again.
874 idle=nomwait. Disable mwait for CPU C-states 880 idle=nomwait: Disable mwait for CPU C-states
875 881
876 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem 882 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
877 Claim all unknown PCI IDE storage controllers. 883 Claim all unknown PCI IDE storage controllers.
@@ -1053,8 +1059,8 @@ and is between 256 and 4096 characters. It is defined in the file
1053 lapic [X86-32,APIC] Enable the local APIC even if BIOS 1059 lapic [X86-32,APIC] Enable the local APIC even if BIOS
1054 disabled it. 1060 disabled it.
1055 1061
1056 lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer in 1062 lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer
1057 C2 power state. 1063 in C2 power state.
1058 1064
1059 libata.dma= [LIBATA] DMA control 1065 libata.dma= [LIBATA] DMA control
1060 libata.dma=0 Disable all PATA and SATA DMA 1066 libata.dma=0 Disable all PATA and SATA DMA
@@ -2242,7 +2248,8 @@ and is between 256 and 4096 characters. It is defined in the file
2242 2248
2243 thermal.psv= [HW,ACPI] 2249 thermal.psv= [HW,ACPI]
2244 -1: disable all passive trip points 2250 -1: disable all passive trip points
2245 <degrees C>: override all passive trip points to this value 2251 <degrees C>: override all passive trip points to this
2252 value
2246 2253
2247 thermal.tzp= [HW,ACPI] 2254 thermal.tzp= [HW,ACPI]
2248 Specify global default ACPI thermal zone polling rate 2255 Specify global default ACPI thermal zone polling rate
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 806b4e9051b4..707c1f6f95fa 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -159,6 +159,8 @@ static int __init acpi_sleep_setup(char *str)
159#endif 159#endif
160 if (strncmp(str, "old_ordering", 12) == 0) 160 if (strncmp(str, "old_ordering", 12) == 0)
161 acpi_old_suspend_ordering(); 161 acpi_old_suspend_ordering();
162 if (strncmp(str, "s4_nonvs", 8) == 0)
163 acpi_s4_no_nvs();
162 str = strchr(str, ','); 164 str = strchr(str, ',');
163 if (str != NULL) 165 if (str != NULL)
164 str += strspn(str, ", \t"); 166 str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7aafeb5263ef..74c6a21fdc8c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -665,6 +665,27 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
665} 665}
666#endif 666#endif
667 667
668#ifdef CONFIG_HIBERNATION
669/**
670 * Mark ACPI NVS memory region, so that we can save/restore it during
671 * hibernation and the subsequent resume.
672 */
673static int __init e820_mark_nvs_memory(void)
674{
675 int i;
676
677 for (i = 0; i < e820.nr_map; i++) {
678 struct e820entry *ei = &e820.map[i];
679
680 if (ei->type == E820_NVS)
681 hibernate_nvs_register(ei->addr, ei->size);
682 }
683
684 return 0;
685}
686core_initcall(e820_mark_nvs_memory);
687#endif
688
668/* 689/*
669 * Early reserved memory areas. 690 * Early reserved memory areas.
670 */ 691 */
diff --git a/drivers/acpi/main.c b/drivers/acpi/main.c
index d8242772de92..7e3c609cbef2 100644
--- a/drivers/acpi/main.c
+++ b/drivers/acpi/main.c
@@ -101,6 +101,19 @@ void __init acpi_old_suspend_ordering(void)
101 * cases. 101 * cases.
102 */ 102 */
103static bool set_sci_en_on_resume; 103static bool set_sci_en_on_resume;
104/*
105 * The ACPI specification wants us to save NVS memory regions during hibernation
106 * and to restore them during the subsequent resume. However, it is not certain
107 * if this mechanism is going to work on all machines, so we allow the user to
108 * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line
109 * option.
110 */
111static bool s4_no_nvs;
112
113void __init acpi_s4_no_nvs(void)
114{
115 s4_no_nvs = true;
116}
104 117
105/** 118/**
106 * acpi_pm_disable_gpes - Disable the GPEs. 119 * acpi_pm_disable_gpes - Disable the GPEs.
@@ -394,9 +407,25 @@ void __init acpi_no_s4_hw_signature(void)
394 407
395static int acpi_hibernation_begin(void) 408static int acpi_hibernation_begin(void)
396{ 409{
397 acpi_target_sleep_state = ACPI_STATE_S4; 410 int error;
398 acpi_sleep_tts_switch(acpi_target_sleep_state); 411
399 return 0; 412 error = s4_no_nvs ? 0 : hibernate_nvs_alloc();
413 if (!error) {
414 acpi_target_sleep_state = ACPI_STATE_S4;
415 acpi_sleep_tts_switch(acpi_target_sleep_state);
416 }
417
418 return error;
419}
420
421static int acpi_hibernation_pre_snapshot(void)
422{
423 int error = acpi_pm_prepare();
424
425 if (!error)
426 hibernate_nvs_save();
427
428 return error;
400} 429}
401 430
402static int acpi_hibernation_enter(void) 431static int acpi_hibernation_enter(void)
@@ -417,6 +446,12 @@ static int acpi_hibernation_enter(void)
417 return ACPI_SUCCESS(status) ? 0 : -EFAULT; 446 return ACPI_SUCCESS(status) ? 0 : -EFAULT;
418} 447}
419 448
449static void acpi_hibernation_finish(void)
450{
451 hibernate_nvs_free();
452 acpi_pm_finish();
453}
454
420static void acpi_hibernation_leave(void) 455static void acpi_hibernation_leave(void)
421{ 456{
422 /* 457 /*
@@ -432,6 +467,8 @@ static void acpi_hibernation_leave(void)
432 "cannot resume!\n"); 467 "cannot resume!\n");
433 panic("ACPI S4 hardware signature mismatch"); 468 panic("ACPI S4 hardware signature mismatch");
434 } 469 }
470 /* Restore the NVS memory area */
471 hibernate_nvs_restore();
435} 472}
436 473
437static void acpi_pm_enable_gpes(void) 474static void acpi_pm_enable_gpes(void)
@@ -442,8 +479,8 @@ static void acpi_pm_enable_gpes(void)
442static struct platform_hibernation_ops acpi_hibernation_ops = { 479static struct platform_hibernation_ops acpi_hibernation_ops = {
443 .begin = acpi_hibernation_begin, 480 .begin = acpi_hibernation_begin,
444 .end = acpi_pm_end, 481 .end = acpi_pm_end,
445 .pre_snapshot = acpi_pm_prepare, 482 .pre_snapshot = acpi_hibernation_pre_snapshot,
446 .finish = acpi_pm_finish, 483 .finish = acpi_hibernation_finish,
447 .prepare = acpi_pm_prepare, 484 .prepare = acpi_pm_prepare,
448 .enter = acpi_hibernation_enter, 485 .enter = acpi_hibernation_enter,
449 .leave = acpi_hibernation_leave, 486 .leave = acpi_hibernation_leave,
@@ -469,8 +506,22 @@ static int acpi_hibernation_begin_old(void)
469 506
470 error = acpi_sleep_prepare(ACPI_STATE_S4); 507 error = acpi_sleep_prepare(ACPI_STATE_S4);
471 508
509 if (!error) {
510 if (!s4_no_nvs)
511 error = hibernate_nvs_alloc();
512 if (!error)
513 acpi_target_sleep_state = ACPI_STATE_S4;
514 }
515 return error;
516}
517
518static int acpi_hibernation_pre_snapshot_old(void)
519{
520 int error = acpi_pm_disable_gpes();
521
472 if (!error) 522 if (!error)
473 acpi_target_sleep_state = ACPI_STATE_S4; 523 hibernate_nvs_save();
524
474 return error; 525 return error;
475} 526}
476 527
@@ -481,8 +532,8 @@ static int acpi_hibernation_begin_old(void)
481static struct platform_hibernation_ops acpi_hibernation_ops_old = { 532static struct platform_hibernation_ops acpi_hibernation_ops_old = {
482 .begin = acpi_hibernation_begin_old, 533 .begin = acpi_hibernation_begin_old,
483 .end = acpi_pm_end, 534 .end = acpi_pm_end,
484 .pre_snapshot = acpi_pm_disable_gpes, 535 .pre_snapshot = acpi_hibernation_pre_snapshot_old,
485 .finish = acpi_pm_finish, 536 .finish = acpi_hibernation_finish,
486 .prepare = acpi_pm_disable_gpes, 537 .prepare = acpi_pm_disable_gpes,
487 .enter = acpi_hibernation_enter, 538 .enter = acpi_hibernation_enter,
488 .leave = acpi_hibernation_leave, 539 .leave = acpi_hibernation_leave,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index fba8051fb297..dfa0a5356c53 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -270,6 +270,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
270#ifdef CONFIG_PM_SLEEP 270#ifdef CONFIG_PM_SLEEP
271void __init acpi_no_s4_hw_signature(void); 271void __init acpi_no_s4_hw_signature(void);
272void __init acpi_old_suspend_ordering(void); 272void __init acpi_old_suspend_ordering(void);
273void __init acpi_s4_no_nvs(void);
273#endif /* CONFIG_PM_SLEEP */ 274#endif /* CONFIG_PM_SLEEP */
274#else /* CONFIG_ACPI */ 275#else /* CONFIG_ACPI */
275 276
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 2ce8207686e2..2b409c44db83 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -232,6 +232,11 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
232 232
233extern void hibernation_set_ops(struct platform_hibernation_ops *ops); 233extern void hibernation_set_ops(struct platform_hibernation_ops *ops);
234extern int hibernate(void); 234extern int hibernate(void);
235extern int hibernate_nvs_register(unsigned long start, unsigned long size);
236extern int hibernate_nvs_alloc(void);
237extern void hibernate_nvs_free(void);
238extern void hibernate_nvs_save(void);
239extern void hibernate_nvs_restore(void);
235#else /* CONFIG_HIBERNATION */ 240#else /* CONFIG_HIBERNATION */
236static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } 241static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
237static inline void swsusp_set_page_free(struct page *p) {} 242static inline void swsusp_set_page_free(struct page *p) {}
@@ -239,6 +244,14 @@ static inline void swsusp_unset_page_free(struct page *p) {}
239 244
240static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {} 245static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {}
241static inline int hibernate(void) { return -ENOSYS; } 246static inline int hibernate(void) { return -ENOSYS; }
247static inline int hibernate_nvs_register(unsigned long a, unsigned long b)
248{
249 return 0;
250}
251static inline int hibernate_nvs_alloc(void) { return 0; }
252static inline void hibernate_nvs_free(void) {}
253static inline void hibernate_nvs_save(void) {}
254static inline void hibernate_nvs_restore(void) {}
242#endif /* CONFIG_HIBERNATION */ 255#endif /* CONFIG_HIBERNATION */
243 256
244#ifdef CONFIG_PM_SLEEP 257#ifdef CONFIG_PM_SLEEP
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index c9d74083746f..096fe4899ea4 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -259,12 +259,12 @@ int hibernation_snapshot(int platform_mode)
259{ 259{
260 int error, ftrace_save; 260 int error, ftrace_save;
261 261
262 /* Free memory before shutting down devices. */ 262 error = platform_begin(platform_mode);
263 error = swsusp_shrink_memory();
264 if (error) 263 if (error)
265 return error; 264 return error;
266 265
267 error = platform_begin(platform_mode); 266 /* Free memory before shutting down devices. */
267 error = swsusp_shrink_memory();
268 if (error) 268 if (error)
269 goto Close; 269 goto Close;
270 270
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5d2ab836e998..f5fc2d7680f2 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -25,6 +25,7 @@
25#include <linux/syscalls.h> 25#include <linux/syscalls.h>
26#include <linux/console.h> 26#include <linux/console.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/list.h>
28 29
29#include <asm/uaccess.h> 30#include <asm/uaccess.h>
30#include <asm/mmu_context.h> 31#include <asm/mmu_context.h>
@@ -192,12 +193,6 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
192 return ret; 193 return ret;
193} 194}
194 195
195static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
196{
197 free_list_of_pages(ca->chain, clear_page_nosave);
198 memset(ca, 0, sizeof(struct chain_allocator));
199}
200
201/** 196/**
202 * Data types related to memory bitmaps. 197 * Data types related to memory bitmaps.
203 * 198 *
@@ -233,7 +228,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
233#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) 228#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
234 229
235struct bm_block { 230struct bm_block {
236 struct bm_block *next; /* next element of the list */ 231 struct list_head hook; /* hook into a list of bitmap blocks */
237 unsigned long start_pfn; /* pfn represented by the first bit */ 232 unsigned long start_pfn; /* pfn represented by the first bit */
238 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ 233 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
239 unsigned long *data; /* bitmap representing pages */ 234 unsigned long *data; /* bitmap representing pages */
@@ -244,24 +239,15 @@ static inline unsigned long bm_block_bits(struct bm_block *bb)
244 return bb->end_pfn - bb->start_pfn; 239 return bb->end_pfn - bb->start_pfn;
245} 240}
246 241
247struct zone_bitmap {
248 struct zone_bitmap *next; /* next element of the list */
249 unsigned long start_pfn; /* minimal pfn in this zone */
250 unsigned long end_pfn; /* maximal pfn in this zone plus 1 */
251 struct bm_block *bm_blocks; /* list of bitmap blocks */
252 struct bm_block *cur_block; /* recently used bitmap block */
253};
254
255/* strcut bm_position is used for browsing memory bitmaps */ 242/* strcut bm_position is used for browsing memory bitmaps */
256 243
257struct bm_position { 244struct bm_position {
258 struct zone_bitmap *zone_bm;
259 struct bm_block *block; 245 struct bm_block *block;
260 int bit; 246 int bit;
261}; 247};
262 248
263struct memory_bitmap { 249struct memory_bitmap {
264 struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */ 250 struct list_head blocks; /* list of bitmap blocks */
265 struct linked_page *p_list; /* list of pages used to store zone 251 struct linked_page *p_list; /* list of pages used to store zone
266 * bitmap objects and bitmap block 252 * bitmap objects and bitmap block
267 * objects 253 * objects
@@ -273,11 +259,7 @@ struct memory_bitmap {
273 259
274static void memory_bm_position_reset(struct memory_bitmap *bm) 260static void memory_bm_position_reset(struct memory_bitmap *bm)
275{ 261{
276 struct zone_bitmap *zone_bm; 262 bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
277
278 zone_bm = bm->zone_bm_list;
279 bm->cur.zone_bm = zone_bm;
280 bm->cur.block = zone_bm->bm_blocks;
281 bm->cur.bit = 0; 263 bm->cur.bit = 0;
282} 264}
283 265
@@ -285,151 +267,184 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
285 267
286/** 268/**
287 * create_bm_block_list - create a list of block bitmap objects 269 * create_bm_block_list - create a list of block bitmap objects
270 * @nr_blocks - number of blocks to allocate
271 * @list - list to put the allocated blocks into
272 * @ca - chain allocator to be used for allocating memory
288 */ 273 */
289 274static int create_bm_block_list(unsigned long pages,
290static inline struct bm_block * 275 struct list_head *list,
291create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca) 276 struct chain_allocator *ca)
292{ 277{
293 struct bm_block *bblist = NULL; 278 unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
294 279
295 while (nr_blocks-- > 0) { 280 while (nr_blocks-- > 0) {
296 struct bm_block *bb; 281 struct bm_block *bb;
297 282
298 bb = chain_alloc(ca, sizeof(struct bm_block)); 283 bb = chain_alloc(ca, sizeof(struct bm_block));
299 if (!bb) 284 if (!bb)
300 return NULL; 285 return -ENOMEM;
301 286 list_add(&bb->hook, list);
302 bb->next = bblist;
303 bblist = bb;
304 } 287 }
305 return bblist; 288
289 return 0;
306} 290}
307 291
292struct mem_extent {
293 struct list_head hook;
294 unsigned long start;
295 unsigned long end;
296};
297
308/** 298/**
309 * create_zone_bm_list - create a list of zone bitmap objects 299 * free_mem_extents - free a list of memory extents
300 * @list - list of extents to empty
310 */ 301 */
302static void free_mem_extents(struct list_head *list)
303{
304 struct mem_extent *ext, *aux;
311 305
312static inline struct zone_bitmap * 306 list_for_each_entry_safe(ext, aux, list, hook) {
313create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca) 307 list_del(&ext->hook);
308 kfree(ext);
309 }
310}
311
312/**
313 * create_mem_extents - create a list of memory extents representing
314 * contiguous ranges of PFNs
315 * @list - list to put the extents into
316 * @gfp_mask - mask to use for memory allocations
317 */
318static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
314{ 319{
315 struct zone_bitmap *zbmlist = NULL; 320 struct zone *zone;
316 321
317 while (nr_zones-- > 0) { 322 INIT_LIST_HEAD(list);
318 struct zone_bitmap *zbm;
319 323
320 zbm = chain_alloc(ca, sizeof(struct zone_bitmap)); 324 for_each_zone(zone) {
321 if (!zbm) 325 unsigned long zone_start, zone_end;
322 return NULL; 326 struct mem_extent *ext, *cur, *aux;
327
328 if (!populated_zone(zone))
329 continue;
323 330
324 zbm->next = zbmlist; 331 zone_start = zone->zone_start_pfn;
325 zbmlist = zbm; 332 zone_end = zone->zone_start_pfn + zone->spanned_pages;
333
334 list_for_each_entry(ext, list, hook)
335 if (zone_start <= ext->end)
336 break;
337
338 if (&ext->hook == list || zone_end < ext->start) {
339 /* New extent is necessary */
340 struct mem_extent *new_ext;
341
342 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
343 if (!new_ext) {
344 free_mem_extents(list);
345 return -ENOMEM;
346 }
347 new_ext->start = zone_start;
348 new_ext->end = zone_end;
349 list_add_tail(&new_ext->hook, &ext->hook);
350 continue;
351 }
352
353 /* Merge this zone's range of PFNs with the existing one */
354 if (zone_start < ext->start)
355 ext->start = zone_start;
356 if (zone_end > ext->end)
357 ext->end = zone_end;
358
359 /* More merging may be possible */
360 cur = ext;
361 list_for_each_entry_safe_continue(cur, aux, list, hook) {
362 if (zone_end < cur->start)
363 break;
364 if (zone_end < cur->end)
365 ext->end = cur->end;
366 list_del(&cur->hook);
367 kfree(cur);
368 }
326 } 369 }
327 return zbmlist; 370
371 return 0;
328} 372}
329 373
330/** 374/**
331 * memory_bm_create - allocate memory for a memory bitmap 375 * memory_bm_create - allocate memory for a memory bitmap
332 */ 376 */
333
334static int 377static int
335memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) 378memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
336{ 379{
337 struct chain_allocator ca; 380 struct chain_allocator ca;
338 struct zone *zone; 381 struct list_head mem_extents;
339 struct zone_bitmap *zone_bm; 382 struct mem_extent *ext;
340 struct bm_block *bb; 383 int error;
341 unsigned int nr;
342 384
343 chain_init(&ca, gfp_mask, safe_needed); 385 chain_init(&ca, gfp_mask, safe_needed);
386 INIT_LIST_HEAD(&bm->blocks);
344 387
345 /* Compute the number of zones */ 388 error = create_mem_extents(&mem_extents, gfp_mask);
346 nr = 0; 389 if (error)
347 for_each_zone(zone) 390 return error;
348 if (populated_zone(zone))
349 nr++;
350
351 /* Allocate the list of zones bitmap objects */
352 zone_bm = create_zone_bm_list(nr, &ca);
353 bm->zone_bm_list = zone_bm;
354 if (!zone_bm) {
355 chain_free(&ca, PG_UNSAFE_CLEAR);
356 return -ENOMEM;
357 }
358
359 /* Initialize the zone bitmap objects */
360 for_each_zone(zone) {
361 unsigned long pfn;
362 391
363 if (!populated_zone(zone)) 392 list_for_each_entry(ext, &mem_extents, hook) {
364 continue; 393 struct bm_block *bb;
394 unsigned long pfn = ext->start;
395 unsigned long pages = ext->end - ext->start;
365 396
366 zone_bm->start_pfn = zone->zone_start_pfn; 397 bb = list_entry(bm->blocks.prev, struct bm_block, hook);
367 zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
368 /* Allocate the list of bitmap block objects */
369 nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
370 bb = create_bm_block_list(nr, &ca);
371 zone_bm->bm_blocks = bb;
372 zone_bm->cur_block = bb;
373 if (!bb)
374 goto Free;
375 398
376 nr = zone->spanned_pages; 399 error = create_bm_block_list(pages, bm->blocks.prev, &ca);
377 pfn = zone->zone_start_pfn; 400 if (error)
378 /* Initialize the bitmap block objects */ 401 goto Error;
379 while (bb) {
380 unsigned long *ptr;
381 402
382 ptr = get_image_page(gfp_mask, safe_needed); 403 list_for_each_entry_continue(bb, &bm->blocks, hook) {
383 bb->data = ptr; 404 bb->data = get_image_page(gfp_mask, safe_needed);
384 if (!ptr) 405 if (!bb->data) {
385 goto Free; 406 error = -ENOMEM;
407 goto Error;
408 }
386 409
387 bb->start_pfn = pfn; 410 bb->start_pfn = pfn;
388 if (nr >= BM_BITS_PER_BLOCK) { 411 if (pages >= BM_BITS_PER_BLOCK) {
389 pfn += BM_BITS_PER_BLOCK; 412 pfn += BM_BITS_PER_BLOCK;
390 nr -= BM_BITS_PER_BLOCK; 413 pages -= BM_BITS_PER_BLOCK;
391 } else { 414 } else {
392 /* This is executed only once in the loop */ 415 /* This is executed only once in the loop */
393 pfn += nr; 416 pfn += pages;
394 } 417 }
395 bb->end_pfn = pfn; 418 bb->end_pfn = pfn;
396 bb = bb->next;
397 } 419 }
398 zone_bm = zone_bm->next;
399 } 420 }
421
400 bm->p_list = ca.chain; 422 bm->p_list = ca.chain;
401 memory_bm_position_reset(bm); 423 memory_bm_position_reset(bm);
402 return 0; 424 Exit:
425 free_mem_extents(&mem_extents);
426 return error;
403 427
404 Free: 428 Error:
405 bm->p_list = ca.chain; 429 bm->p_list = ca.chain;
406 memory_bm_free(bm, PG_UNSAFE_CLEAR); 430 memory_bm_free(bm, PG_UNSAFE_CLEAR);
407 return -ENOMEM; 431 goto Exit;
408} 432}
409 433
410/** 434/**
411 * memory_bm_free - free memory occupied by the memory bitmap @bm 435 * memory_bm_free - free memory occupied by the memory bitmap @bm
412 */ 436 */
413
414static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 437static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
415{ 438{
416 struct zone_bitmap *zone_bm; 439 struct bm_block *bb;
417 440
418 /* Free the list of bit blocks for each zone_bitmap object */ 441 list_for_each_entry(bb, &bm->blocks, hook)
419 zone_bm = bm->zone_bm_list; 442 if (bb->data)
420 while (zone_bm) { 443 free_image_page(bb->data, clear_nosave_free);
421 struct bm_block *bb;
422 444
423 bb = zone_bm->bm_blocks;
424 while (bb) {
425 if (bb->data)
426 free_image_page(bb->data, clear_nosave_free);
427 bb = bb->next;
428 }
429 zone_bm = zone_bm->next;
430 }
431 free_list_of_pages(bm->p_list, clear_nosave_free); 445 free_list_of_pages(bm->p_list, clear_nosave_free);
432 bm->zone_bm_list = NULL; 446
447 INIT_LIST_HEAD(&bm->blocks);
433} 448}
434 449
435/** 450/**
@@ -437,38 +452,33 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
437 * to given pfn. The cur_zone_bm member of @bm and the cur_block member 452 * to given pfn. The cur_zone_bm member of @bm and the cur_block member
438 * of @bm->cur_zone_bm are updated. 453 * of @bm->cur_zone_bm are updated.
439 */ 454 */
440
441static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 455static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
442 void **addr, unsigned int *bit_nr) 456 void **addr, unsigned int *bit_nr)
443{ 457{
444 struct zone_bitmap *zone_bm;
445 struct bm_block *bb; 458 struct bm_block *bb;
446 459
447 /* Check if the pfn is from the current zone */ 460 /*
448 zone_bm = bm->cur.zone_bm; 461 * Check if the pfn corresponds to the current bitmap block and find
449 if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { 462 * the block where it fits if this is not the case.
450 zone_bm = bm->zone_bm_list; 463 */
451 /* We don't assume that the zones are sorted by pfns */ 464 bb = bm->cur.block;
452 while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
453 zone_bm = zone_bm->next;
454
455 if (!zone_bm)
456 return -EFAULT;
457 }
458 bm->cur.zone_bm = zone_bm;
459 }
460 /* Check if the pfn corresponds to the current bitmap block */
461 bb = zone_bm->cur_block;
462 if (pfn < bb->start_pfn) 465 if (pfn < bb->start_pfn)
463 bb = zone_bm->bm_blocks; 466 list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
467 if (pfn >= bb->start_pfn)
468 break;
464 469
465 while (pfn >= bb->end_pfn) { 470 if (pfn >= bb->end_pfn)
466 bb = bb->next; 471 list_for_each_entry_continue(bb, &bm->blocks, hook)
472 if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
473 break;
467 474
468 BUG_ON(!bb); 475 if (&bb->hook == &bm->blocks)
469 } 476 return -EFAULT;
470 zone_bm->cur_block = bb; 477
478 /* The block has been found */
479 bm->cur.block = bb;
471 pfn -= bb->start_pfn; 480 pfn -= bb->start_pfn;
481 bm->cur.bit = pfn + 1;
472 *bit_nr = pfn; 482 *bit_nr = pfn;
473 *addr = bb->data; 483 *addr = bb->data;
474 return 0; 484 return 0;
@@ -519,6 +529,14 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
519 return test_bit(bit, addr); 529 return test_bit(bit, addr);
520} 530}
521 531
532static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
533{
534 void *addr;
535 unsigned int bit;
536
537 return !memory_bm_find_bit(bm, pfn, &addr, &bit);
538}
539
522/** 540/**
523 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit 541 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit
524 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is 542 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
@@ -530,29 +548,21 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
530 548
531static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 549static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
532{ 550{
533 struct zone_bitmap *zone_bm;
534 struct bm_block *bb; 551 struct bm_block *bb;
535 int bit; 552 int bit;
536 553
554 bb = bm->cur.block;
537 do { 555 do {
538 bb = bm->cur.block; 556 bit = bm->cur.bit;
539 do { 557 bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
540 bit = bm->cur.bit; 558 if (bit < bm_block_bits(bb))
541 bit = find_next_bit(bb->data, bm_block_bits(bb), bit); 559 goto Return_pfn;
542 if (bit < bm_block_bits(bb)) 560
543 goto Return_pfn; 561 bb = list_entry(bb->hook.next, struct bm_block, hook);
544 562 bm->cur.block = bb;
545 bb = bb->next; 563 bm->cur.bit = 0;
546 bm->cur.block = bb; 564 } while (&bb->hook != &bm->blocks);
547 bm->cur.bit = 0; 565
548 } while (bb);
549 zone_bm = bm->cur.zone_bm->next;
550 if (zone_bm) {
551 bm->cur.zone_bm = zone_bm;
552 bm->cur.block = zone_bm->bm_blocks;
553 bm->cur.bit = 0;
554 }
555 } while (zone_bm);
556 memory_bm_position_reset(bm); 566 memory_bm_position_reset(bm);
557 return BM_END_OF_MAP; 567 return BM_END_OF_MAP;
558 568
@@ -808,8 +818,7 @@ static unsigned int count_free_highmem_pages(void)
808 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 818 * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
809 * and it isn't a part of a free chunk of pages. 819 * and it isn't a part of a free chunk of pages.
810 */ 820 */
811 821static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
812static struct page *saveable_highmem_page(unsigned long pfn)
813{ 822{
814 struct page *page; 823 struct page *page;
815 824
@@ -817,6 +826,8 @@ static struct page *saveable_highmem_page(unsigned long pfn)
817 return NULL; 826 return NULL;
818 827
819 page = pfn_to_page(pfn); 828 page = pfn_to_page(pfn);
829 if (page_zone(page) != zone)
830 return NULL;
820 831
821 BUG_ON(!PageHighMem(page)); 832 BUG_ON(!PageHighMem(page));
822 833
@@ -846,13 +857,16 @@ unsigned int count_highmem_pages(void)
846 mark_free_pages(zone); 857 mark_free_pages(zone);
847 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 858 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
848 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 859 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
849 if (saveable_highmem_page(pfn)) 860 if (saveable_highmem_page(zone, pfn))
850 n++; 861 n++;
851 } 862 }
852 return n; 863 return n;
853} 864}
854#else 865#else
855static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } 866static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
867{
868 return NULL;
869}
856#endif /* CONFIG_HIGHMEM */ 870#endif /* CONFIG_HIGHMEM */
857 871
858/** 872/**
@@ -863,8 +877,7 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
863 * of pages statically defined as 'unsaveable', and it isn't a part of 877 * of pages statically defined as 'unsaveable', and it isn't a part of
864 * a free chunk of pages. 878 * a free chunk of pages.
865 */ 879 */
866 880static struct page *saveable_page(struct zone *zone, unsigned long pfn)
867static struct page *saveable_page(unsigned long pfn)
868{ 881{
869 struct page *page; 882 struct page *page;
870 883
@@ -872,6 +885,8 @@ static struct page *saveable_page(unsigned long pfn)
872 return NULL; 885 return NULL;
873 886
874 page = pfn_to_page(pfn); 887 page = pfn_to_page(pfn);
888 if (page_zone(page) != zone)
889 return NULL;
875 890
876 BUG_ON(PageHighMem(page)); 891 BUG_ON(PageHighMem(page));
877 892
@@ -903,7 +918,7 @@ unsigned int count_data_pages(void)
903 mark_free_pages(zone); 918 mark_free_pages(zone);
904 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 919 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
905 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 920 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
906 if(saveable_page(pfn)) 921 if (saveable_page(zone, pfn))
907 n++; 922 n++;
908 } 923 }
909 return n; 924 return n;
@@ -944,7 +959,7 @@ static inline struct page *
944page_is_saveable(struct zone *zone, unsigned long pfn) 959page_is_saveable(struct zone *zone, unsigned long pfn)
945{ 960{
946 return is_highmem(zone) ? 961 return is_highmem(zone) ?
947 saveable_highmem_page(pfn) : saveable_page(pfn); 962 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
948} 963}
949 964
950static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 965static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
@@ -966,7 +981,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
966 * data modified by kmap_atomic() 981 * data modified by kmap_atomic()
967 */ 982 */
968 safe_copy_page(buffer, s_page); 983 safe_copy_page(buffer, s_page);
969 dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); 984 dst = kmap_atomic(d_page, KM_USER0);
970 memcpy(dst, buffer, PAGE_SIZE); 985 memcpy(dst, buffer, PAGE_SIZE);
971 kunmap_atomic(dst, KM_USER0); 986 kunmap_atomic(dst, KM_USER0);
972 } else { 987 } else {
@@ -975,7 +990,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
975 } 990 }
976} 991}
977#else 992#else
978#define page_is_saveable(zone, pfn) saveable_page(pfn) 993#define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
979 994
980static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 995static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
981{ 996{
@@ -1459,9 +1474,7 @@ load_header(struct swsusp_info *info)
1459 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set 1474 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1460 * the corresponding bit in the memory bitmap @bm 1475 * the corresponding bit in the memory bitmap @bm
1461 */ 1476 */
1462 1477static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1463static inline void
1464unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1465{ 1478{
1466 int j; 1479 int j;
1467 1480
@@ -1469,8 +1482,13 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1469 if (unlikely(buf[j] == BM_END_OF_MAP)) 1482 if (unlikely(buf[j] == BM_END_OF_MAP))
1470 break; 1483 break;
1471 1484
1472 memory_bm_set_bit(bm, buf[j]); 1485 if (memory_bm_pfn_present(bm, buf[j]))
1486 memory_bm_set_bit(bm, buf[j]);
1487 else
1488 return -EFAULT;
1473 } 1489 }
1490
1491 return 0;
1474} 1492}
1475 1493
1476/* List of "safe" pages that may be used to store data loaded from the suspend 1494/* List of "safe" pages that may be used to store data loaded from the suspend
@@ -1608,7 +1626,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1608 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 1626 pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1609 if (!pbe) { 1627 if (!pbe) {
1610 swsusp_free(); 1628 swsusp_free();
1611 return NULL; 1629 return ERR_PTR(-ENOMEM);
1612 } 1630 }
1613 pbe->orig_page = page; 1631 pbe->orig_page = page;
1614 if (safe_highmem_pages > 0) { 1632 if (safe_highmem_pages > 0) {
@@ -1677,7 +1695,7 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1677static inline void * 1695static inline void *
1678get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 1696get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1679{ 1697{
1680 return NULL; 1698 return ERR_PTR(-EINVAL);
1681} 1699}
1682 1700
1683static inline void copy_last_highmem_page(void) {} 1701static inline void copy_last_highmem_page(void) {}
@@ -1788,8 +1806,13 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1788static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 1806static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1789{ 1807{
1790 struct pbe *pbe; 1808 struct pbe *pbe;
1791 struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); 1809 struct page *page;
1810 unsigned long pfn = memory_bm_next_pfn(bm);
1792 1811
1812 if (pfn == BM_END_OF_MAP)
1813 return ERR_PTR(-EFAULT);
1814
1815 page = pfn_to_page(pfn);
1793 if (PageHighMem(page)) 1816 if (PageHighMem(page))
1794 return get_highmem_page_buffer(page, ca); 1817 return get_highmem_page_buffer(page, ca);
1795 1818
@@ -1805,7 +1828,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1805 pbe = chain_alloc(ca, sizeof(struct pbe)); 1828 pbe = chain_alloc(ca, sizeof(struct pbe));
1806 if (!pbe) { 1829 if (!pbe) {
1807 swsusp_free(); 1830 swsusp_free();
1808 return NULL; 1831 return ERR_PTR(-ENOMEM);
1809 } 1832 }
1810 pbe->orig_address = page_address(page); 1833 pbe->orig_address = page_address(page);
1811 pbe->address = safe_pages_list; 1834 pbe->address = safe_pages_list;
@@ -1868,7 +1891,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1868 return error; 1891 return error;
1869 1892
1870 } else if (handle->prev <= nr_meta_pages) { 1893 } else if (handle->prev <= nr_meta_pages) {
1871 unpack_orig_pfns(buffer, &copy_bm); 1894 error = unpack_orig_pfns(buffer, &copy_bm);
1895 if (error)
1896 return error;
1897
1872 if (handle->prev == nr_meta_pages) { 1898 if (handle->prev == nr_meta_pages) {
1873 error = prepare_image(&orig_bm, &copy_bm); 1899 error = prepare_image(&orig_bm, &copy_bm);
1874 if (error) 1900 if (error)
@@ -1879,12 +1905,14 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1879 restore_pblist = NULL; 1905 restore_pblist = NULL;
1880 handle->buffer = get_buffer(&orig_bm, &ca); 1906 handle->buffer = get_buffer(&orig_bm, &ca);
1881 handle->sync_read = 0; 1907 handle->sync_read = 0;
1882 if (!handle->buffer) 1908 if (IS_ERR(handle->buffer))
1883 return -ENOMEM; 1909 return PTR_ERR(handle->buffer);
1884 } 1910 }
1885 } else { 1911 } else {
1886 copy_last_highmem_page(); 1912 copy_last_highmem_page();
1887 handle->buffer = get_buffer(&orig_bm, &ca); 1913 handle->buffer = get_buffer(&orig_bm, &ca);
1914 if (IS_ERR(handle->buffer))
1915 return PTR_ERR(handle->buffer);
1888 if (handle->buffer != buffer) 1916 if (handle->buffer != buffer)
1889 handle->sync_read = 0; 1917 handle->sync_read = 0;
1890 } 1918 }
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 023ff2a31d89..a92c91451559 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -262,3 +262,125 @@ int swsusp_shrink_memory(void)
262 262
263 return 0; 263 return 0;
264} 264}
265
266/*
267 * Platforms, like ACPI, may want us to save some memory used by them during
268 * hibernation and to restore the contents of this memory during the subsequent
269 * resume. The code below implements a mechanism allowing us to do that.
270 */
271
272struct nvs_page {
273 unsigned long phys_start;
274 unsigned int size;
275 void *kaddr;
276 void *data;
277 struct list_head node;
278};
279
280static LIST_HEAD(nvs_list);
281
282/**
283 * hibernate_nvs_register - register platform NVS memory region to save
284 * @start - physical address of the region
285 * @size - size of the region
286 *
287 * The NVS region need not be page-aligned (both ends) and we arrange
288 * things so that the data from page-aligned addresses in this region will
289 * be copied into separate RAM pages.
290 */
291int hibernate_nvs_register(unsigned long start, unsigned long size)
292{
293 struct nvs_page *entry, *next;
294
295 while (size > 0) {
296 unsigned int nr_bytes;
297
298 entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
299 if (!entry)
300 goto Error;
301
302 list_add_tail(&entry->node, &nvs_list);
303 entry->phys_start = start;
304 nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
305 entry->size = (size < nr_bytes) ? size : nr_bytes;
306
307 start += entry->size;
308 size -= entry->size;
309 }
310 return 0;
311
312 Error:
313 list_for_each_entry_safe(entry, next, &nvs_list, node) {
314 list_del(&entry->node);
315 kfree(entry);
316 }
317 return -ENOMEM;
318}
319
320/**
321 * hibernate_nvs_free - free data pages allocated for saving NVS regions
322 */
323void hibernate_nvs_free(void)
324{
325 struct nvs_page *entry;
326
327 list_for_each_entry(entry, &nvs_list, node)
328 if (entry->data) {
329 free_page((unsigned long)entry->data);
330 entry->data = NULL;
331 if (entry->kaddr) {
332 iounmap(entry->kaddr);
333 entry->kaddr = NULL;
334 }
335 }
336}
337
338/**
339 * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
340 */
341int hibernate_nvs_alloc(void)
342{
343 struct nvs_page *entry;
344
345 list_for_each_entry(entry, &nvs_list, node) {
346 entry->data = (void *)__get_free_page(GFP_KERNEL);
347 if (!entry->data) {
348 hibernate_nvs_free();
349 return -ENOMEM;
350 }
351 }
352 return 0;
353}
354
355/**
356 * hibernate_nvs_save - save NVS memory regions
357 */
358void hibernate_nvs_save(void)
359{
360 struct nvs_page *entry;
361
362 printk(KERN_INFO "PM: Saving platform NVS memory\n");
363
364 list_for_each_entry(entry, &nvs_list, node)
365 if (entry->data) {
366 entry->kaddr = ioremap(entry->phys_start, entry->size);
367 memcpy(entry->data, entry->kaddr, entry->size);
368 }
369}
370
371/**
372 * hibernate_nvs_restore - restore NVS memory regions
373 *
374 * This function is going to be called with interrupts disabled, so it
375 * cannot iounmap the virtual addresses used to access the NVS region.
376 */
377void hibernate_nvs_restore(void)
378{
379 struct nvs_page *entry;
380
381 printk(KERN_INFO "PM: Restoring platform NVS memory\n");
382
383 list_for_each_entry(entry, &nvs_list, node)
384 if (entry->data)
385 memcpy(entry->kaddr, entry->data, entry->size);
386}
id='n1251' href='#n1251'>1251 1252 1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420












                                                                          

































                                                                              



                        




                          
                             



                           
                                

                           
                           






                           
                       
                          
                      

                     









                                                                         
 
                                                           
                      
 
                  


                    
                                           
                                          
                         

  
                                                         







                                                         

                                                                                 

                                                                                 

                                                                 

                                                                 

                                                         
                                               

                                                         

                                                         
 
                                                                

                             

                        

                                       



                          












                                                         

                                                                                            

                                                        
                             




                                         












                                                                                             

                                                        
                             




                                         

















                                                                         








                                                                             
                                             
















































































































                                                                                          
                                                      
 

                                 


                                                                      

                                    

                                                  
                                      
         
                       

 
















                                                                  
                                                                  


                                       
                                       




                          

                                                              
                                                        
                                        

                              
 

                                




                                                   


                                      
 
                 





                                                                    

                                                     


                                         
                 
                               






                                                                 

                                              


                                        


                                                                
                                                













                                         
                                                              



                                         
                                      

  


                                                                  



                                                        
                                                    



                                                                
                                          


                                                               


                                              
 

                                            







                                                 
                                                                  





                                          




                                                                                                                      
                                                            

                           




                                                        


                                        

                         
                                                  
                         





                                                                                         


                              


                      
                                                                 











                                                                  
                                                                               




                                  


                                 
                                                          
























                                                                         
                                                                               





















                                                      

                              






                               
                                                                    

                                                    
                   
                   
                                                                               

                                  



                                 
                                                          
                         
 
                         

                                                 
                         
 
                   




















                                                                         


                              



















                                                                    
                                                           








                                                                   
                                                                               
                                 
                   
                       

                              




                                      
                                                                   












                                                                          
                                 
                                       

                       


                                           


                                                    

                                                                           


                               
                                                           

                              



                                                                          
                                      
                              




                              
                                                                  



                                           




                                                                        
                                                            
                                                        


                               

                              

                                                                         
                              





                                                                               
                                                            

                         




                                        
                                                              

                              

                                      











                                                
                           





                                                  
                                                       







                                        
                                                                








                                                                
                                                                         



                              

                              

                                                                 
                             












                                                                       
                                                                         

                                  




                                 

                                                         
                        
                                        
                         
 
                         


                                              

                         


                                                      
                         



                                                               

                         
                                     





                             

 
                                                               




                                        




















































                                                                            
                                                                    




                                                  
                                                                              






                                                                  

                                                                    
                         



                                                                              
                              


































                                                                                      

                                                                    
                         








                                                                  


                     
                                                                       
                                            

                                               

  



                                                  
 

                             
 








                                        
 
 
                                                                                           


                              
 
                                 
 






                                                                        




                                                 
                                                  







                                          
         

                                            
    




                     

 
                                                                                       
 
                                              

                                      
 








                                                                   

                 
                          
                 

 








                                                                        






                                                                    



                                                                      



                                                                   





                                                  
                                                      
                                                    
                                      





                         
                                                    
 














                                                                    

                                                                                                    











                                                                           



                                                                              
                                                         




































                                                                          



























                                                                                           
 
                   










                                                                            

                                                            


                                                       
                 

                                               
         
                       

 


                                                                         
                                                        
                                

                                   













                                                                  
                                                                      



                                                                    
                                          
                                                
                 
                                      




                       





                                                          

                                                                   
 





                                                
 
                                                     


                           
                    

                                       
                              
                                





                                                                    

                                       
                                



                                                   
                                       
                                
 




                                                      

                                                                             
                             
 


                     
                                       
                                
         
                    
                 

 

















                                                                                                     








                                                                                 

                                                                            
                                                    



                                                     

                                    
 

























                                                                                 




                                                             
                                                                                           













                                                        
                                                          

                                           




                                           
                                                                 
                                        
                                       

  







                                                                   
                                                     




















                                                                             

                                                               

                                                                

                            
                             
                                                      
                                   
 
                                 

                     

                                 
 



                                                                    

                                        




                                                                  
                     

                         
                                                              
    

                              
                     

 






                                                                                     





                                                           
                                                    

                                                        
                                   




                      
                                 


                        






















                                                                 

                                        
                                                                                       







                                         

                              
                   

 



                                                                        
                                                            














                                                        
                                                                              







                                                                                         

 


                                                                               
                                                            
























                                                        
                                                                              








                                                          
                                                                



                                              
                                            





                                                     

  
                                                    


                                                              
                                                                              

 
                                                                
                                           
                                                

  
                                                             

                                                                            

                                                                              

 
                                                                       
                                               
                                      
                                       

  



















                                                                               
                                                                   




                                                






                                                                 
                                      
                                                       
























                                                                            

                                                                   
 
                                  
                            
                              
                                                









                                                                        



                                          
                                                  


















                                                    





                     



                                                                                
                                   






                                        

                                                                 




                                                                  
                     









                                                            






                                                                                   



                                                                         
                        



                                       
      


                                                        
                        



                                                        
      





                                                                              
  

                
                                                         
                                                                
 
                                             








                                                 
                  
                                              
      
                                                
                     
                                                    
      




                                               
                 
                                          

                      
                                                     

                      
                                              

                        
                                                  

                     
                                           
      

                                                       
                          
                                                     
      


                                                           


                                                    
  
 
                                                     


                                                              
                                                                                

 
                                                                 
                                           
                                                 

  
                                                                                                            

                                                                                

 
                                                                        
                                                
                                      
                                       
  

   








                                                                            
  




                                                                    
  


                                                                      
   
                                              
 
                                             
                              









                                                               
 

                                      
 




                                                                
 












                                                               
         





                     

 


                                                                               



                                                
                                                     





















                                                                             

                                                                                               
                                                 
                                 
                      
 



                                                          



                                   
                        


                                      
                          


                         
                                                               
                              
    
                      

 
  
                                        
  
   
                                                       
 

                                 
 
                        




















                                                                    
         
                          
                    

 
                                                                       
 








                                                                                  


                                                                            
                                                            
                                                                                 

                                 
 


                                 
                                                                       
                                                           
                                                                               
                                 

         

                                         
                  
                                                                 
                                 
                                                 
                                                                                 
                                              
                                 
                 
         

                                                  

                                

                 
 
  


                                            







                                               
                  
                                             
      
                                               
                     
                                                   
      



                                          
                 
                                         

                      
                                                    

                      
                                             

                        
                                                 

                     
                                          
      

                                                      
                          
                                                    
      


                                                           









                                                                                                           

                                                                              

 
                                                                



                                                
                                                                       




                                               




                                                                   
                                                     





















                                                                             




                                                                                                       


















                                       
                                                                








                                










                                                                  

                                                                
 
                                
 
                        


                                                       

                                                         
         
 
                                                                       


                                               
 

                                                           
           





                                          
         


                             
                          










                                                              
                                       
                        
                               
                                         




                                             
                          

                               

 








                                                                                   


                                                                                  
                                                    
                                              
                                          
                                 

                             
                

                                                                                









                                            

                                 
















                                                                   







                                                                              
                                
                                                                                 



                                                                           
                              
                 


                          

                                

                      



                                                                                             
                                                     

                                      




                                                 



                 
 
                                                                   




                                            
                                                            


                                            
/*
 *  linux/fs/proc/base.c
 *
 *  Copyright (C) 1991, 1992 Linus Torvalds
 *
 *  proc base directory handling functions
 *
 *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
 *  Instead of using magical inumbers to determine the kind of object
 *  we allocate and fill in-core inodes upon lookup. They don't even
 *  go into icache. We cache the reference to task_struct upon lookup too.
 *  Eventually it should become a filesystem in its own. We don't use the
 *  rest of procfs anymore.
 *
 *
 *  Changelog:
 *  17-Jan-2005
 *  Allan Bezerra
 *  Bruna Moreira <bruna.moreira@indt.org.br>
 *  Edjard Mota <edjard.mota@indt.org.br>
 *  Ilias Biris <ilias.biris@indt.org.br>
 *  Mauricio Lin <mauricio.lin@indt.org.br>
 *
 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
 *
 *  A new process specific entry (smaps) included in /proc. It shows the
 *  size of rss for each memory area. The maps entry lacks information
 *  about physical memory size (rss) for each mapped file, i.e.,
 *  rss information for executables and library files.
 *  This additional information is useful for any tools that need to know
 *  about physical memory consumption for a process specific library.
 *
 *  Changelog:
 *  21-Feb-2005
 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
 *  Pud inclusion in the page table walking.
 *
 *  ChangeLog:
 *  10-Mar-2005
 *  10LE Instituto Nokia de Tecnologia - INdT:
 *  A better way to walks through the page table as suggested by Hugh Dickins.
 *
 *  Simo Piiroinen <simo.piiroinen@nokia.com>:
 *  Smaps information related to shared, private, clean and dirty pages.
 *
 *  Paul Mundt <paul.mundt@nokia.com>:
 *  Overall revision about smaps.
 */

#include <asm/uaccess.h>

#include <linux/errno.h>
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/string.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
#include <linux/mnt_namespace.h>
#include <linux/mm.h>
#include <linux/smp_lock.h>
#include <linux/rcupdate.h>
#include <linux/kallsyms.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/seccomp.h>
#include <linux/cpuset.h>
#include <linux/audit.h>
#include <linux/poll.h>
#include <linux/nsproxy.h>
#include <linux/oom.h>
#include "internal.h"

/* NOTE:
 *	Implementing inode permission operations in /proc is almost
 *	certainly an error.  Permission checks need to happen during
 *	each system call not at open time.  The reason is that most of
 *	what we wish to check for permissions in /proc varies at runtime.
 *
 *	The classic example of a problem is opening file descriptors
 *	in /proc for a task before it execs a suid executable.
 */


/* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 13

struct pid_entry {
	int len;
	char *name;
	mode_t mode;
	const struct inode_operations *iop;
	const struct file_operations *fop;
	union proc_op op;
};

#define NOD(NAME, MODE, IOP, FOP, OP) {			\
	.len  = sizeof(NAME) - 1,			\
	.name = (NAME),					\
	.mode = MODE,					\
	.iop  = IOP,					\
	.fop  = FOP,					\
	.op   = OP,					\
}

#define DIR(NAME, MODE, OTYPE)							\
	NOD(NAME, (S_IFDIR|(MODE)),						\
		&proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations,	\
		{} )
#define LNK(NAME, OTYPE)					\
	NOD(NAME, (S_IFLNK|S_IRWXUGO),				\
		&proc_pid_link_inode_operations, NULL,		\
		{ .proc_get_link = &proc_##OTYPE##_link } )
#define REG(NAME, MODE, OTYPE)				\
	NOD(NAME, (S_IFREG|(MODE)), NULL,		\
		&proc_##OTYPE##_operations, {})
#define INF(NAME, MODE, OTYPE)				\
	NOD(NAME, (S_IFREG|(MODE)), 			\
		NULL, &proc_info_file_operations,	\
		{ .proc_read = &proc_##OTYPE } )

static struct fs_struct *get_fs_struct(struct task_struct *task)
{
	struct fs_struct *fs;
	task_lock(task);
	fs = task->fs;
	if(fs)
		atomic_inc(&fs->count);
	task_unlock(task);
	return fs;
}

static int get_nr_threads(struct task_struct *tsk)
{
	/* Must be called with the rcu_read_lock held */
	unsigned long flags;
	int count = 0;

	if (lock_task_sighand(tsk, &flags)) {
		count = atomic_read(&tsk->signal->count);
		unlock_task_sighand(tsk, &flags);
	}
	return count;
}

static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
{
	struct task_struct *task = get_proc_task(inode);
	struct fs_struct *fs = NULL;
	int result = -ENOENT;

	if (task) {
		fs = get_fs_struct(task);
		put_task_struct(task);
	}
	if (fs) {
		read_lock(&fs->lock);
		*mnt = mntget(fs->pwdmnt);
		*dentry = dget(fs->pwd);
		read_unlock(&fs->lock);
		result = 0;
		put_fs_struct(fs);
	}
	return result;
}

static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
{
	struct task_struct *task = get_proc_task(inode);
	struct fs_struct *fs = NULL;
	int result = -ENOENT;

	if (task) {
		fs = get_fs_struct(task);
		put_task_struct(task);
	}
	if (fs) {
		read_lock(&fs->lock);
		*mnt = mntget(fs->rootmnt);
		*dentry = dget(fs->root);
		read_unlock(&fs->lock);
		result = 0;
		put_fs_struct(fs);
	}
	return result;
}

#define MAY_PTRACE(task) \
	(task == current || \
	(task->parent == current && \
	(task->ptrace & PT_PTRACED) && \
	 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
	 security_ptrace(current,task) == 0))

static int proc_pid_environ(struct task_struct *task, char * buffer)
{
	int res = 0;
	struct mm_struct *mm = get_task_mm(task);
	if (mm) {
		unsigned int len = mm->env_end - mm->env_start;
		if (len > PAGE_SIZE)
			len = PAGE_SIZE;
		res = access_process_vm(task, mm->env_start, buffer, len, 0);
		if (!ptrace_may_attach(task))
			res = -ESRCH;
		mmput(mm);
	}
	return res;
}

static int proc_pid_cmdline(struct task_struct *task, char * buffer)
{
	int res = 0;
	unsigned int len;
	struct mm_struct *mm = get_task_mm(task);
	if (!mm)
		goto out;
	if (!mm->arg_end)
		goto out_mm;	/* Shh! No looking before we're done */

 	len = mm->arg_end - mm->arg_start;
 
	if (len > PAGE_SIZE)
		len = PAGE_SIZE;
 
	res = access_process_vm(task, mm->arg_start, buffer, len, 0);

	// If the nul at the end of args has been overwritten, then
	// assume application is using setproctitle(3).
	if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
		len = strnlen(buffer, res);
		if (len < res) {
		    res = len;
		} else {
			len = mm->env_end - mm->env_start;
			if (len > PAGE_SIZE - res)
				len = PAGE_SIZE - res;
			res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
			res = strnlen(buffer, res);
		}
	}
out_mm:
	mmput(mm);
out:
	return res;
}

static int proc_pid_auxv(struct task_struct *task, char *buffer)
{
	int res = 0;
	struct mm_struct *mm = get_task_mm(task);
	if (mm) {
		unsigned int nwords = 0;
		do
			nwords += 2;
		while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
		res = nwords * sizeof(mm->saved_auxv[0]);
		if (res > PAGE_SIZE)
			res = PAGE_SIZE;
		memcpy(buffer, mm->saved_auxv, res);
		mmput(mm);
	}
	return res;
}


#ifdef CONFIG_KALLSYMS
/*
 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
 * Returns the resolved symbol.  If that fails, simply return the address.
 */
static int proc_pid_wchan(struct task_struct *task, char *buffer)
{
	char *modname;
	const char *sym_name;
	unsigned long wchan, size, offset;
	char namebuf[KSYM_NAME_LEN+1];

	wchan = get_wchan(task);

	sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf);
	if (sym_name)
		return sprintf(buffer, "%s", sym_name);
	return sprintf(buffer, "%lu", wchan);
}
#endif /* CONFIG_KALLSYMS */

#ifdef CONFIG_SCHEDSTATS
/*
 * Provides /proc/PID/schedstat
 */
static int proc_pid_schedstat(struct task_struct *task, char *buffer)
{
	return sprintf(buffer, "%lu %lu %lu\n",
			task->sched_info.cpu_time,
			task->sched_info.run_delay,
			task->sched_info.pcnt);
}
#endif

/* The badness from the OOM killer */
unsigned long badness(struct task_struct *p, unsigned long uptime);
static int proc_oom_score(struct task_struct *task, char *buffer)
{
	unsigned long points;
	struct timespec uptime;

	do_posix_clock_monotonic_gettime(&uptime);
	points = badness(task, uptime.tv_sec);
	return sprintf(buffer, "%lu\n", points);
}

/************************************************************************/
/*                       Here the fs part begins                        */
/************************************************************************/

/* permission checks */
static int proc_fd_access_allowed(struct inode *inode)
{
	struct task_struct *task;
	int allowed = 0;
	/* Allow access to a task's file descriptors if it is us or we
	 * may use ptrace attach to the process and find out that
	 * information.
	 */
	task = get_proc_task(inode);
	if (task) {
		allowed = ptrace_may_attach(task);
		put_task_struct(task);
	}
	return allowed;
}

static int proc_setattr(struct dentry *dentry, struct iattr *attr)
{
	int error;
	struct inode *inode = dentry->d_inode;

	if (attr->ia_valid & ATTR_MODE)
		return -EPERM;

	error = inode_change_ok(inode, attr);
	if (!error) {
		error = security_inode_setattr(dentry, attr);
		if (!error)
			error = inode_setattr(inode, attr);
	}
	return error;
}

static const struct inode_operations proc_def_inode_operations = {
	.setattr	= proc_setattr,
};

extern struct seq_operations mounts_op;
struct proc_mounts {
	struct seq_file m;
	int event;
};

static int mounts_open(struct inode *inode, struct file *file)
{
	struct task_struct *task = get_proc_task(inode);
	struct mnt_namespace *ns = NULL;
	struct proc_mounts *p;
	int ret = -EINVAL;

	if (task) {
		task_lock(task);
		if (task->nsproxy) {
			ns = task->nsproxy->mnt_ns;
			if (ns)
				get_mnt_ns(ns);
		}
		task_unlock(task);
		put_task_struct(task);
	}

	if (ns) {
		ret = -ENOMEM;
		p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
		if (p) {
			file->private_data = &p->m;
			ret = seq_open(file, &mounts_op);
			if (!ret) {
				p->m.private = ns;
				p->event = ns->event;
				return 0;
			}
			kfree(p);
		}
		put_mnt_ns(ns);
	}
	return ret;
}

static int mounts_release(struct inode *inode, struct file *file)
{
	struct seq_file *m = file->private_data;
	struct mnt_namespace *ns = m->private;
	put_mnt_ns(ns);
	return seq_release(inode, file);
}

static unsigned mounts_poll(struct file *file, poll_table *wait)
{
	struct proc_mounts *p = file->private_data;
	struct mnt_namespace *ns = p->m.private;
	unsigned res = 0;

	poll_wait(file, &ns->poll, wait);

	spin_lock(&vfsmount_lock);
	if (p->event != ns->event) {
		p->event = ns->event;
		res = POLLERR;
	}
	spin_unlock(&vfsmount_lock);

	return res;
}

static const struct file_operations proc_mounts_operations = {
	.open		= mounts_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= mounts_release,
	.poll		= mounts_poll,
};

extern struct seq_operations mountstats_op;
static int mountstats_open(struct inode *inode, struct file *file)
{
	int ret = seq_open(file, &mountstats_op);

	if (!ret) {
		struct seq_file *m = file->private_data;
		struct mnt_namespace *mnt_ns = NULL;
		struct task_struct *task = get_proc_task(inode);

		if (task) {
			task_lock(task);
			if (task->nsproxy)
				mnt_ns = task->nsproxy->mnt_ns;
			if (mnt_ns)
				get_mnt_ns(mnt_ns);
			task_unlock(task);
			put_task_struct(task);
		}

		if (mnt_ns)
			m->private = mnt_ns;
		else {
			seq_release(inode, file);
			ret = -EINVAL;
		}
	}
	return ret;
}

static const struct file_operations proc_mountstats_operations = {
	.open		= mountstats_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= mounts_release,
};

#define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */

static ssize_t proc_info_read(struct file * file, char __user * buf,
			  size_t count, loff_t *ppos)
{
	struct inode * inode = file->f_path.dentry->d_inode;
	unsigned long page;
	ssize_t length;
	struct task_struct *task = get_proc_task(inode);

	length = -ESRCH;
	if (!task)
		goto out_no_task;

	if (count > PROC_BLOCK_SIZE)
		count = PROC_BLOCK_SIZE;

	length = -ENOMEM;
	if (!(page = __get_free_page(GFP_KERNEL)))
		goto out;

	length = PROC_I(inode)->op.proc_read(task, (char*)page);

	if (length >= 0)
		length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
	free_page(page);
out:
	put_task_struct(task);
out_no_task:
	return length;
}

static const struct file_operations proc_info_file_operations = {
	.read		= proc_info_read,
};

static int mem_open(struct inode* inode, struct file* file)
{
	file->private_data = (void*)((long)current->self_exec_id);
	return 0;
}

static ssize_t mem_read(struct file * file, char __user * buf,
			size_t count, loff_t *ppos)
{
	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
	char *page;
	unsigned long src = *ppos;
	int ret = -ESRCH;
	struct mm_struct *mm;

	if (!task)
		goto out_no_task;

	if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
		goto out;

	ret = -ENOMEM;
	page = (char *)__get_free_page(GFP_USER);
	if (!page)
		goto out;

	ret = 0;
 
	mm = get_task_mm(task);
	if (!mm)
		goto out_free;

	ret = -EIO;
 
	if (file->private_data != (void*)((long)current->self_exec_id))
		goto out_put;

	ret = 0;
 
	while (count > 0) {
		int this_len, retval;

		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
		retval = access_process_vm(task, src, page, this_len, 0);
		if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) {
			if (!ret)
				ret = -EIO;
			break;
		}

		if (copy_to_user(buf, page, retval)) {
			ret = -EFAULT;
			break;
		}
 
		ret += retval;
		src += retval;
		buf += retval;
		count -= retval;
	}
	*ppos = src;

out_put:
	mmput(mm);
out_free:
	free_page((unsigned long) page);
out:
	put_task_struct(task);
out_no_task:
	return ret;
}

#define mem_write NULL

#ifndef mem_write
/* This is a security hazard */
static ssize_t mem_write(struct file * file, const char __user *buf,
			 size_t count, loff_t *ppos)
{
	int copied;
	char *page;
	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
	unsigned long dst = *ppos;

	copied = -ESRCH;
	if (!task)
		goto out_no_task;

	if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
		goto out;

	copied = -ENOMEM;
	page = (char *)__get_free_page(GFP_USER);
	if (!page)
		goto out;

	copied = 0;
	while (count > 0) {
		int this_len, retval;

		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
		if (copy_from_user(page, buf, this_len)) {
			copied = -EFAULT;
			break;
		}
		retval = access_process_vm(task, dst, page, this_len, 1);
		if (!retval) {
			if (!copied)
				copied = -EIO;
			break;
		}
		copied += retval;
		buf += retval;
		dst += retval;
		count -= retval;			
	}
	*ppos = dst;
	free_page((unsigned long) page);
out:
	put_task_struct(task);
out_no_task:
	return copied;
}
#endif

static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
{
	switch (orig) {
	case 0:
		file->f_pos = offset;
		break;
	case 1:
		file->f_pos += offset;
		break;
	default:
		return -EINVAL;
	}
	force_successful_syscall_return();
	return file->f_pos;
}

static const struct file_operations proc_mem_operations = {
	.llseek		= mem_lseek,
	.read		= mem_read,
	.write		= mem_write,
	.open		= mem_open,
};

static ssize_t oom_adjust_read(struct file *file, char __user *buf,
				size_t count, loff_t *ppos)
{
	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
	char buffer[PROC_NUMBUF];
	size_t len;
	int oom_adjust;
	loff_t __ppos = *ppos;

	if (!task)
		return -ESRCH;
	oom_adjust = task->oomkilladj;
	put_task_struct(task);

	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
	if (__ppos >= len)
		return 0;
	if (count > len-__ppos)
		count = len-__ppos;
	if (copy_to_user(buf, buffer + __ppos, count))
		return -EFAULT;
	*ppos = __ppos + count;
	return count;
}

static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	struct task_struct *task;
	char buffer[PROC_NUMBUF], *end;
	int oom_adjust;

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count))
		return -EFAULT;
	oom_adjust = simple_strtol(buffer, &end, 0);
	if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
	     oom_adjust != OOM_DISABLE)
		return -EINVAL;
	if (*end == '\n')
		end++;
	task = get_proc_task(file->f_path.dentry->d_inode);
	if (!task)
		return -ESRCH;
	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
		put_task_struct(task);
		return -EACCES;
	}
	task->oomkilladj = oom_adjust;
	put_task_struct(task);
	if (end - buffer == 0)
		return -EIO;
	return end - buffer;
}

static const struct file_operations proc_oom_adjust_operations = {
	.read		= oom_adjust_read,
	.write		= oom_adjust_write,
};

#ifdef CONFIG_AUDITSYSCALL
#define TMPBUFLEN 21
static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
				  size_t count, loff_t *ppos)
{
	struct inode * inode = file->f_path.dentry->d_inode;
	struct task_struct *task = get_proc_task(inode);
	ssize_t length;
	char tmpbuf[TMPBUFLEN];

	if (!task)
		return -ESRCH;
	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
				audit_get_loginuid(task->audit_context));
	put_task_struct(task);
	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}

static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
				   size_t count, loff_t *ppos)
{
	struct inode * inode = file->f_path.dentry->d_inode;
	char *page, *tmp;
	ssize_t length;
	uid_t loginuid;

	if (!capable(CAP_AUDIT_CONTROL))
		return -EPERM;

	if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
		return -EPERM;

	if (count >= PAGE_SIZE)
		count = PAGE_SIZE - 1;

	if (*ppos != 0) {
		/* No partial writes. */
		return -EINVAL;
	}
	page = (char*)__get_free_page(GFP_USER);
	if (!page)
		return -ENOMEM;
	length = -EFAULT;
	if (copy_from_user(page, buf, count))
		goto out_free_page;

	page[count] = '\0';
	loginuid = simple_strtoul(page, &tmp, 10);
	if (tmp == page) {
		length = -EINVAL;
		goto out_free_page;

	}
	length = audit_set_loginuid(current, loginuid);
	if (likely(length == 0))
		length = count;

out_free_page:
	free_page((unsigned long) page);
	return length;
}

static const struct file_operations proc_loginuid_operations = {
	.read		= proc_loginuid_read,
	.write		= proc_loginuid_write,
};
#endif

#ifdef CONFIG_SECCOMP
static ssize_t seccomp_read(struct file *file, char __user *buf,
			    size_t count, loff_t *ppos)
{
	struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
	char __buf[20];
	loff_t __ppos = *ppos;
	size_t len;

	if (!tsk)
		return -ESRCH;
	/* no need to print the trailing zero, so use only len */
	len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
	put_task_struct(tsk);
	if (__ppos >= len)
		return 0;
	if (count > len - __ppos)
		count = len - __ppos;
	if (copy_to_user(buf, __buf + __ppos, count))
		return -EFAULT;
	*ppos = __ppos + count;
	return count;
}

static ssize_t seccomp_write(struct file *file, const char __user *buf,
			     size_t count, loff_t *ppos)
{
	struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
	char __buf[20], *end;
	unsigned int seccomp_mode;
	ssize_t result;

	result = -ESRCH;
	if (!tsk)
		goto out_no_task;

	/* can set it only once to be even more secure */
	result = -EPERM;
	if (unlikely(tsk->seccomp.mode))
		goto out;

	result = -EFAULT;
	memset(__buf, 0, sizeof(__buf));
	count = min(count, sizeof(__buf) - 1);
	if (copy_from_user(__buf, buf, count))
		goto out;

	seccomp_mode = simple_strtoul(__buf, &end, 0);
	if (*end == '\n')
		end++;
	result = -EINVAL;
	if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
		tsk->seccomp.mode = seccomp_mode;
		set_tsk_thread_flag(tsk, TIF_SECCOMP);
	} else
		goto out;
	result = -EIO;
	if (unlikely(!(end - __buf)))
		goto out;
	result = end - __buf;
out:
	put_task_struct(tsk);
out_no_task:
	return result;
}

static const struct file_operations proc_seccomp_operations = {
	.read		= seccomp_read,
	.write		= seccomp_write,
};
#endif /* CONFIG_SECCOMP */

#ifdef CONFIG_FAULT_INJECTION
static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
				      size_t count, loff_t *ppos)
{
	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
	char buffer[PROC_NUMBUF];
	size_t len;
	int make_it_fail;
	loff_t __ppos = *ppos;

	if (!task)
		return -ESRCH;
	make_it_fail = task->make_it_fail;
	put_task_struct(task);

	len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
	if (__ppos >= len)
		return 0;
	if (count > len-__ppos)
		count = len-__ppos;
	if (copy_to_user(buf, buffer + __ppos, count))
		return -EFAULT;
	*ppos = __ppos + count;
	return count;
}

static ssize_t proc_fault_inject_write(struct file * file,
			const char __user * buf, size_t count, loff_t *ppos)
{
	struct task_struct *task;
	char buffer[PROC_NUMBUF], *end;
	int make_it_fail;

	if (!capable(CAP_SYS_RESOURCE))
		return -EPERM;
	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count))
		return -EFAULT;
	make_it_fail = simple_strtol(buffer, &end, 0);
	if (*end == '\n')
		end++;
	task = get_proc_task(file->f_dentry->d_inode);
	if (!task)
		return -ESRCH;
	task->make_it_fail = make_it_fail;
	put_task_struct(task);
	if (end - buffer == 0)
		return -EIO;
	return end - buffer;
}

static const struct file_operations proc_fault_inject_operations = {
	.read		= proc_fault_inject_read,
	.write		= proc_fault_inject_write,
};
#endif

static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
{
	struct inode *inode = dentry->d_inode;
	int error = -EACCES;

	/* We don't need a base pointer in the /proc filesystem */
	path_release(nd);

	/* Are we allowed to snoop on the tasks file descriptors? */
	if (!proc_fd_access_allowed(inode))
		goto out;

	error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
	nd->last_type = LAST_BIND;
out:
	return ERR_PTR(error);
}

static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
			    char __user *buffer, int buflen)
{
	struct inode * inode;
	char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
	int len;

	if (!tmp)
		return -ENOMEM;
		
	inode = dentry->d_inode;
	path = d_path(dentry, mnt, tmp, PAGE_SIZE);
	len = PTR_ERR(path);
	if (IS_ERR(path))
		goto out;
	len = tmp + PAGE_SIZE - 1 - path;

	if (len > buflen)
		len = buflen;
	if (copy_to_user(buffer, path, len))
		len = -EFAULT;
 out:
	free_page((unsigned long)tmp);
	return len;
}

static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
{
	int error = -EACCES;
	struct inode *inode = dentry->d_inode;
	struct dentry *de;
	struct vfsmount *mnt = NULL;

	/* Are we allowed to snoop on the tasks file descriptors? */
	if (!proc_fd_access_allowed(inode))
		goto out;

	error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
	if (error)
		goto out;

	error = do_proc_readlink(de, mnt, buffer, buflen);
	dput(de);
	mntput(mnt);
out:
	return error;
}

static const struct inode_operations proc_pid_link_inode_operations = {
	.readlink	= proc_pid_readlink,
	.follow_link	= proc_pid_follow_link,
	.setattr	= proc_setattr,
};


/* building an inode */

static int task_dumpable(struct task_struct *task)
{
	int dumpable = 0;
	struct mm_struct *mm;

	task_lock(task);
	mm = task->mm;
	if (mm)
		dumpable = mm->dumpable;
	task_unlock(task);
	if(dumpable == 1)
		return 1;
	return 0;
}


static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
{
	struct inode * inode;
	struct proc_inode *ei;

	/* We need a new inode */

	inode = new_inode(sb);
	if (!inode)
		goto out;

	/* Common stuff */
	ei = PROC_I(inode);
	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
	inode->i_op = &proc_def_inode_operations;

	/*
	 * grab the reference to task.
	 */
	ei->pid = get_task_pid(task, PIDTYPE_PID);
	if (!ei->pid)
		goto out_unlock;

	inode->i_uid = 0;
	inode->i_gid = 0;
	if (task_dumpable(task)) {
		inode->i_uid = task->euid;
		inode->i_gid = task->egid;
	}
	security_task_to_inode(task, inode);

out:
	return inode;

out_unlock:
	iput(inode);
	return NULL;
}

static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
	struct inode *inode = dentry->d_inode;
	struct task_struct *task;
	generic_fillattr(inode, stat);

	rcu_read_lock();
	stat->uid = 0;
	stat->gid = 0;
	task = pid_task(proc_pid(inode), PIDTYPE_PID);
	if (task) {
		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
		    task_dumpable(task)) {
			stat->uid = task->euid;
			stat->gid = task->egid;
		}
	}
	rcu_read_unlock();
	return 0;
}

/* dentry stuff */

/*
 *	Exceptional case: normally we are not allowed to unhash a busy
 * directory. In this case, however, we can do it - no aliasing problems
 * due to the way we treat inodes.
 *
 * Rewrite the inode's ownerships here because the owning task may have
 * performed a setuid(), etc.
 *
 * Before the /proc/pid/status file was created the only way to read
 * the effective uid of a /process was to stat /proc/pid.  Reading
 * /proc/pid/status is slow enough that procps and other packages
 * kept stating /proc/pid.  To keep the rules in /proc simple I have
 * made this apply to all per process world readable and executable
 * directories.
 */
static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
{
	struct inode *inode = dentry->d_inode;
	struct task_struct *task = get_proc_task(inode);
	if (task) {
		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
		    task_dumpable(task)) {
			inode->i_uid = task->euid;
			inode->i_gid = task->egid;
		} else {
			inode->i_uid = 0;
			inode->i_gid = 0;
		}
		inode->i_mode &= ~(S_ISUID | S_ISGID);
		security_task_to_inode(task, inode);
		put_task_struct(task);
		return 1;
	}
	d_drop(dentry);
	return 0;
}

static int pid_delete_dentry(struct dentry * dentry)
{
	/* Is the task we represent dead?
	 * If so, then don't put the dentry on the lru list,
	 * kill it immediately.
	 */
	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
}

static struct dentry_operations pid_dentry_operations =
{
	.d_revalidate	= pid_revalidate,
	.d_delete	= pid_delete_dentry,
};

/* Lookups */

typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *);

/*
 * Fill a directory entry.
 *
 * If possible create the dcache entry and derive our inode number and
 * file type from dcache entry.
 *
 * Since all of the proc inode numbers are dynamically generated, the inode
 * numbers do not exist until the inode is cache.  This means creating the
 * the dcache entry in readdir is necessary to keep the inode numbers
 * reported by readdir in sync with the inode numbers reported
 * by stat.
 */
static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
	char *name, int len,
	instantiate_t instantiate, struct task_struct *task, void *ptr)
{
	struct dentry *child, *dir = filp->f_path.dentry;
	struct inode *inode;
	struct qstr qname;
	ino_t ino = 0;
	unsigned type = DT_UNKNOWN;

	qname.name = name;
	qname.len  = len;
	qname.hash = full_name_hash(name, len);

	child = d_lookup(dir, &qname);
	if (!child) {
		struct dentry *new;
		new = d_alloc(dir, &qname);
		if (new) {
			child = instantiate(dir->d_inode, new, task, ptr);
			if (child)
				dput(new);
			else
				child = new;
		}
	}
	if (!child || IS_ERR(child) || !child->d_inode)
		goto end_instantiate;
	inode = child->d_inode;
	if (inode) {
		ino = inode->i_ino;
		type = inode->i_mode >> 12;
	}
	dput(child);
end_instantiate:
	if (!ino)
		ino = find_inode_number(dir, &qname);
	if (!ino)
		ino = 1;
	return filldir(dirent, name, len, filp->f_pos, ino, type);
}

static unsigned name_to_int(struct dentry *dentry)
{
	const char *name = dentry->d_name.name;
	int len = dentry->d_name.len;
	unsigned n = 0;

	if (len > 1 && *name == '0')
		goto out;
	while (len-- > 0) {
		unsigned c = *name++ - '0';
		if (c > 9)
			goto out;
		if (n >= (~0U-9)/10)
			goto out;
		n *= 10;
		n += c;
	}
	return n;
out:
	return ~0U;
}

static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
{
	struct task_struct *task = get_proc_task(inode);
	struct files_struct *files = NULL;
	struct file *file;
	int fd = proc_fd(inode);

	if (task) {
		files = get_files_struct(task);
		put_task_struct(task);
	}
	if (files) {
		/*
		 * We are not taking a ref to the file structure, so we must
		 * hold ->file_lock.
		 */
		spin_lock(&files->file_lock);
		file = fcheck_files(files, fd);
		if (file) {
			*mnt = mntget(file->f_path.mnt);
			*dentry = dget(file->f_path.dentry);
			spin_unlock(&files->file_lock);
			put_files_struct(files);
			return 0;
		}
		spin_unlock(&files->file_lock);
		put_files_struct(files);
	}
	return -ENOENT;
}

static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
{
	struct inode *inode = dentry->d_inode;
	struct task_struct *task = get_proc_task(inode);
	int fd = proc_fd(inode);
	struct files_struct *files;

	if (task) {
		files = get_files_struct(task);
		if (files) {
			rcu_read_lock();
			if (fcheck_files(files, fd)) {
				rcu_read_unlock();
				put_files_struct(files);
				if (task_dumpable(task)) {
					inode->i_uid = task->euid;
					inode->i_gid = task->egid;
				} else {
					inode->i_uid = 0;
					inode->i_gid = 0;
				}
				inode->i_mode &= ~(S_ISUID | S_ISGID);
				security_task_to_inode(task, inode);
				put_task_struct(task);
				return 1;
			}
			rcu_read_unlock();
			put_files_struct(files);
		}
		put_task_struct(task);
	}
	d_drop(dentry);
	return 0;
}

static struct dentry_operations tid_fd_dentry_operations =
{
	.d_revalidate	= tid_fd_revalidate,
	.d_delete	= pid_delete_dentry,
};

static struct dentry *proc_fd_instantiate(struct inode *dir,
	struct dentry *dentry, struct task_struct *task, void *ptr)
{
	unsigned fd = *(unsigned *)ptr;
	struct file *file;
	struct files_struct *files;
 	struct inode *inode;
 	struct proc_inode *ei;
	struct dentry *error = ERR_PTR(-ENOENT);

	inode = proc_pid_make_inode(dir->i_sb, task);
	if (!inode)
		goto out;
	ei = PROC_I(inode);
	ei->fd = fd;
	files = get_files_struct(task);
	if (!files)
		goto out_iput;
	inode->i_mode = S_IFLNK;

	/*
	 * We are not taking a ref to the file structure, so we must
	 * hold ->file_lock.
	 */
	spin_lock(&files->file_lock);
	file = fcheck_files(files, fd);
	if (!file)
		goto out_unlock;
	if (file->f_mode & 1)
		inode->i_mode |= S_IRUSR | S_IXUSR;
	if (file->f_mode & 2)
		inode->i_mode |= S_IWUSR | S_IXUSR;
	spin_unlock(&files->file_lock);
	put_files_struct(files);

	inode->i_op = &proc_pid_link_inode_operations;
	inode->i_size = 64;
	ei->op.proc_get_link = proc_fd_link;
	dentry->d_op = &tid_fd_dentry_operations;
	d_add(dentry, inode);
	/* Close the race of the process dying before we return the dentry */
	if (tid_fd_revalidate(dentry, NULL))
		error = NULL;

 out:
	return error;
out_unlock:
	spin_unlock(&files->file_lock);
	put_files_struct(files);
out_iput:
	iput(inode);
	goto out;
}

static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
{
	struct task_struct *task = get_proc_task(dir);
	unsigned fd = name_to_int(dentry);
	struct dentry *result = ERR_PTR(-ENOENT);

	if (!task)
		goto out_no_task;
	if (fd == ~0U)
		goto out;

	result = proc_fd_instantiate(dir, dentry, task, &fd);
out:
	put_task_struct(task);
out_no_task:
	return result;
}

static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
	struct task_struct *task, int fd)
{
	char name[PROC_NUMBUF];
	int len = snprintf(name, sizeof(name), "%d", fd);
	return proc_fill_cache(filp, dirent, filldir, name, len,
				proc_fd_instantiate, task, &fd);
}

static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
{
	struct dentry *dentry = filp->f_path.dentry;
	struct inode *inode = dentry->d_inode;
	struct task_struct *p = get_proc_task(inode);
	unsigned int fd, tid, ino;
	int retval;
	struct files_struct * files;
	struct fdtable *fdt;

	retval = -ENOENT;
	if (!p)
		goto out_no_task;
	retval = 0;
	tid = p->pid;

	fd = filp->f_pos;
	switch (fd) {
		case 0:
			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
				goto out;
			filp->f_pos++;
		case 1:
			ino = parent_ino(dentry);
			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
				goto out;
			filp->f_pos++;
		default:
			files = get_files_struct(p);
			if (!files)
				goto out;
			rcu_read_lock();
			fdt = files_fdtable(files);
			for (fd = filp->f_pos-2;
			     fd < fdt->max_fds;
			     fd++, filp->f_pos++) {

				if (!fcheck_files(files, fd))
					continue;
				rcu_read_unlock();

				if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) {
					rcu_read_lock();
					break;
				}
				rcu_read_lock();
			}
			rcu_read_unlock();
			put_files_struct(files);
	}
out:
	put_task_struct(p);
out_no_task:
	return retval;
}

static const struct file_operations proc_fd_operations = {
	.read		= generic_read_dir,
	.readdir	= proc_readfd,
};

/*
 * proc directories can do almost nothing..
 */
static const struct inode_operations proc_fd_inode_operations = {
	.lookup		= proc_lookupfd,
	.setattr	= proc_setattr,
};

static struct dentry *proc_pident_instantiate(struct inode *dir,
	struct dentry *dentry, struct task_struct *task, void *ptr)
{
	struct pid_entry *p = ptr;
	struct inode *inode;
	struct proc_inode *ei;
	struct dentry *error = ERR_PTR(-EINVAL);

	inode = proc_pid_make_inode(dir->i_sb, task);
	if (!inode)
		goto out;

	ei = PROC_I(inode);
	inode->i_mode = p->mode;
	if (S_ISDIR(inode->i_mode))
		inode->i_nlink = 2;	/* Use getattr to fix if necessary */
	if (p->iop)
		inode->i_op = p->iop;
	if (p->fop)
		inode->i_fop = p->fop;
	ei->op = p->op;
	dentry->d_op = &pid_dentry_operations;
	d_add(dentry, inode);
	/* Close the race of the process dying before we return the dentry */
	if (pid_revalidate(dentry, NULL))
		error = NULL;
out:
	return error;
}

static struct dentry *proc_pident_lookup(struct inode *dir, 
					 struct dentry *dentry,
					 struct pid_entry *ents,
					 unsigned int nents)
{
	struct inode *inode;
	struct dentry *error;
	struct task_struct *task = get_proc_task(dir);
	struct pid_entry *p, *last;

	error = ERR_PTR(-ENOENT);
	inode = NULL;

	if (!task)
		goto out_no_task;

	/*
	 * Yes, it does not scale. And it should not. Don't add
	 * new entries into /proc/<tgid>/ without very good reasons.
	 */
	last = &ents[nents - 1];
	for (p = ents; p <= last; p++) {
		if (p->len != dentry->d_name.len)
			continue;
		if (!memcmp(dentry->d_name.name, p->name, p->len))
			break;
	}
	if (p > last)
		goto out;

	error = proc_pident_instantiate(dir, dentry, task, p);
out:
	put_task_struct(task);
out_no_task:
	return error;
}

static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
	struct task_struct *task, struct pid_entry *p)
{
	return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
				proc_pident_instantiate, task, p);
}

static int proc_pident_readdir(struct file *filp,
		void *dirent, filldir_t filldir,
		struct pid_entry *ents, unsigned int nents)
{
	int i;
	int pid;
	struct dentry *dentry = filp->f_path.dentry;
	struct inode *inode = dentry->d_inode;
	struct task_struct *task = get_proc_task(inode);
	struct pid_entry *p, *last;
	ino_t ino;
	int ret;

	ret = -ENOENT;
	if (!task)
		goto out_no_task;

	ret = 0;
	pid = task->pid;
	i = filp->f_pos;
	switch (i) {
	case 0:
		ino = inode->i_ino;
		if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
			goto out;
		i++;
		filp->f_pos++;
		/* fall through */
	case 1:
		ino = parent_ino(dentry);
		if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
			goto out;
		i++;
		filp->f_pos++;
		/* fall through */
	default:
		i -= 2;
		if (i >= nents) {
			ret = 1;
			goto out;
		}
		p = ents + i;
		last = &ents[nents - 1];
		while (p <= last) {
			if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
				goto out;
			filp->f_pos++;
			p++;
		}
	}

	ret = 1;
out:
	put_task_struct(task);
out_no_task:
	return ret;
}

#ifdef CONFIG_SECURITY
static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
				  size_t count, loff_t *ppos)
{
	struct inode * inode = file->f_path.dentry->d_inode;
	unsigned long page;
	ssize_t length;
	struct task_struct *task = get_proc_task(inode);

	length = -ESRCH;
	if (!task)
		goto out_no_task;

	if (count > PAGE_SIZE)
		count = PAGE_SIZE;
	length = -ENOMEM;
	if (!(page = __get_free_page(GFP_KERNEL)))
		goto out;

	length = security_getprocattr(task,
				      (char*)file->f_path.dentry->d_name.name,
				      (void*)page, count);
	if (length >= 0)
		length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
	free_page(page);
out:
	put_task_struct(task);
out_no_task:
	return length;
}

static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
				   size_t count, loff_t *ppos)
{
	struct inode * inode = file->f_path.dentry->d_inode;
	char *page;
	ssize_t length;
	struct task_struct *task = get_proc_task(inode);

	length = -ESRCH;
	if (!task)
		goto out_no_task;
	if (count > PAGE_SIZE)
		count = PAGE_SIZE;

	/* No partial writes. */
	length = -EINVAL;
	if (*ppos != 0)
		goto out;

	length = -ENOMEM;
	page = (char*)__get_free_page(GFP_USER);
	if (!page)
		goto out;

	length = -EFAULT;
	if (copy_from_user(page, buf, count))
		goto out_free;

	length = security_setprocattr(task,
				      (char*)file->f_path.dentry->d_name.name,
				      (void*)page, count);
out_free:
	free_page((unsigned long) page);
out:
	put_task_struct(task);
out_no_task:
	return length;
}

static const struct file_operations proc_pid_attr_operations = {
	.read		= proc_pid_attr_read,
	.write		= proc_pid_attr_write,
};

static struct pid_entry attr_dir_stuff[] = {
	REG("current",    S_IRUGO|S_IWUGO, pid_attr),
	REG("prev",       S_IRUGO,	   pid_attr),
	REG("exec",       S_IRUGO|S_IWUGO, pid_attr),
	REG("fscreate",   S_IRUGO|S_IWUGO, pid_attr),
	REG("keycreate",  S_IRUGO|S_IWUGO, pid_attr),
	REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr),
};

static int proc_attr_dir_readdir(struct file * filp,
			     void * dirent, filldir_t filldir)
{
	return proc_pident_readdir(filp,dirent,filldir,
				   attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
}

static const struct file_operations proc_attr_dir_operations = {
	.read		= generic_read_dir,
	.readdir	= proc_attr_dir_readdir,
};

static struct dentry *proc_attr_dir_lookup(struct inode *dir,
				struct dentry *dentry, struct nameidata *nd)
{
	return proc_pident_lookup(dir, dentry,
				  attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
}

static const struct inode_operations proc_attr_dir_inode_operations = {
	.lookup		= proc_attr_dir_lookup,
	.getattr	= pid_getattr,
	.setattr	= proc_setattr,
};

#endif

/*
 * /proc/self:
 */
static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
			      int buflen)
{
	char tmp[PROC_NUMBUF];
	sprintf(tmp, "%d", current->tgid);
	return vfs_readlink(dentry,buffer,buflen,tmp);
}

static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
	char tmp[PROC_NUMBUF];
	sprintf(tmp, "%d", current->tgid);
	return ERR_PTR(vfs_follow_link(nd,tmp));
}

static const struct inode_operations proc_self_inode_operations = {
	.readlink	= proc_self_readlink,
	.follow_link	= proc_self_follow_link,
};

/*
 * proc base
 *
 * These are the directory entries in the root directory of /proc
 * that properly belong to the /proc filesystem, as they describe
 * describe something that is process related.
 */
static struct pid_entry proc_base_stuff[] = {
	NOD("self", S_IFLNK|S_IRWXUGO,
		&proc_self_inode_operations, NULL, {}),
};

/*
 *	Exceptional case: normally we are not allowed to unhash a busy
 * directory. In this case, however, we can do it - no aliasing problems
 * due to the way we treat inodes.
 */
static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
{
	struct inode *inode = dentry->d_inode;
	struct task_struct *task = get_proc_task(inode);
	if (task) {
		put_task_struct(task);
		return 1;
	}
	d_drop(dentry);
	return 0;
}

static struct dentry_operations proc_base_dentry_operations =
{
	.d_revalidate	= proc_base_revalidate,
	.d_delete	= pid_delete_dentry,
};

static struct dentry *proc_base_instantiate(struct inode *dir,
	struct dentry *dentry, struct task_struct *task, void *ptr)
{
	struct pid_entry *p = ptr;
	struct inode *inode;
	struct proc_inode *ei;
	struct dentry *error = ERR_PTR(-EINVAL);

	/* Allocate the inode */
	error = ERR_PTR(-ENOMEM);
	inode = new_inode(dir->i_sb);
	if (!inode)
		goto out;

	/* Initialize the inode */
	ei = PROC_I(inode);
	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;

	/*
	 * grab the reference to the task.
	 */
	ei->pid = get_task_pid(task, PIDTYPE_PID);
	if (!ei->pid)
		goto out_iput;

	inode->i_uid = 0;
	inode->i_gid = 0;
	inode->i_mode = p->mode;
	if (S_ISDIR(inode->i_mode))
		inode->i_nlink = 2;
	if (S_ISLNK(inode->i_mode))
		inode->i_size = 64;
	if (p->iop)
		inode->i_op = p->iop;
	if (p->fop)
		inode->i_fop = p->fop;
	ei->op = p->op;
	dentry->d_op = &proc_base_dentry_operations;
	d_add(dentry, inode);
	error = NULL;
out:
	return error;
out_iput:
	iput(inode);
	goto out;
}

static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
{
	struct dentry *error;
	struct task_struct *task = get_proc_task(dir);
	struct pid_entry *p, *last;

	error = ERR_PTR(-ENOENT);

	if (!task)
		goto out_no_task;

	/* Lookup the directory entry */
	last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
	for (p = proc_base_stuff; p <= last; p++) {
		if (p->len != dentry->d_name.len)
			continue;
		if (!memcmp(dentry->d_name.name, p->name, p->len))
			break;
	}
	if (p > last)
		goto out;

	error = proc_base_instantiate(dir, dentry, task, p);

out:
	put_task_struct(task);
out_no_task:
	return error;
}

static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
	struct task_struct *task, struct pid_entry *p)
{
	return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
				proc_base_instantiate, task, p);
}

#ifdef CONFIG_TASK_IO_ACCOUNTING
static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
{
	return sprintf(buffer,
#ifdef CONFIG_TASK_XACCT
			"rchar: %llu\n"
			"wchar: %llu\n"
			"syscr: %llu\n"
			"syscw: %llu\n"
#endif
			"read_bytes: %llu\n"
			"write_bytes: %llu\n"