diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/ABI/testing/sysfs-ibft | 23 | ||||
-rw-r--r-- | Documentation/DocBook/kernel-api.tmpl | 5 | ||||
-rw-r--r-- | Documentation/DocBook/kernel-locking.tmpl | 6 | ||||
-rw-r--r-- | Documentation/cpusets.txt | 72 | ||||
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 15 | ||||
-rw-r--r-- | Documentation/filesystems/sysfs.txt | 9 | ||||
-rw-r--r-- | Documentation/firmware_class/firmware_sample_driver.c | 115 | ||||
-rw-r--r-- | Documentation/firmware_class/firmware_sample_firmware_class.c | 207 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 12 | ||||
-rw-r--r-- | Documentation/power/devices.txt | 5 | ||||
-rw-r--r-- | Documentation/powerpc/booting-without-of.txt | 622 | ||||
-rw-r--r-- | Documentation/powerpc/phyp-assisted-dump.txt | 127 | ||||
-rw-r--r-- | Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c | 96 | ||||
-rw-r--r-- | Documentation/prctl/disable-tsc-on-off-stress-test.c | 95 | ||||
-rw-r--r-- | Documentation/prctl/disable-tsc-test.c | 94 | ||||
-rw-r--r-- | Documentation/scheduler/sched-rt-group.txt | 188 |
16 files changed, 1298 insertions, 393 deletions
diff --git a/Documentation/ABI/testing/sysfs-ibft b/Documentation/ABI/testing/sysfs-ibft new file mode 100644 index 000000000000..c2b7d1154bec --- /dev/null +++ b/Documentation/ABI/testing/sysfs-ibft | |||
@@ -0,0 +1,23 @@ | |||
1 | What: /sys/firmware/ibft/initiator | ||
2 | Date: November 2007 | ||
3 | Contact: Konrad Rzeszutek <ketuzsezr@darnok.org> | ||
4 | Description: The /sys/firmware/ibft/initiator directory will contain | ||
5 | files that expose the iSCSI Boot Firmware Table initiator data. | ||
6 | Usually this contains the Initiator name. | ||
7 | |||
8 | What: /sys/firmware/ibft/targetX | ||
9 | Date: November 2007 | ||
10 | Contact: Konrad Rzeszutek <ketuzsezr@darnok.org> | ||
11 | Description: The /sys/firmware/ibft/targetX directory will contain | ||
12 | files that expose the iSCSI Boot Firmware Table target data. | ||
13 | Usually this contains the target's IP address, boot LUN, | ||
14 | target name, and what NIC it is associated with. It can also | ||
15 | contain the CHAP name (and password), the reverse CHAP | ||
16 | name (and password) | ||
17 | |||
18 | What: /sys/firmware/ibft/ethernetX | ||
19 | Date: November 2007 | ||
20 | Contact: Konrad Rzeszutek <ketuzsezr@darnok.org> | ||
21 | Description: The /sys/firmware/ibft/ethernetX directory will contain | ||
22 | files that expose the iSCSI Boot Firmware Table NIC data. | ||
23 | This can this can the IP address, MAC, and gateway of the NIC. | ||
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index dc0f30c3e571..488dd4a4945b 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -297,11 +297,6 @@ X!Earch/x86/kernel/mca_32.c | |||
297 | !Ikernel/acct.c | 297 | !Ikernel/acct.c |
298 | </chapter> | 298 | </chapter> |
299 | 299 | ||
300 | <chapter id="pmfuncs"> | ||
301 | <title>Power Management</title> | ||
302 | !Ekernel/power/pm.c | ||
303 | </chapter> | ||
304 | |||
305 | <chapter id="devdrivers"> | 300 | <chapter id="devdrivers"> |
306 | <title>Device drivers infrastructure</title> | 301 | <title>Device drivers infrastructure</title> |
307 | <sect1><title>Device Drivers Base</title> | 302 | <sect1><title>Device Drivers Base</title> |
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 2e9d6b41f034..435413ca40dc 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl | |||
@@ -241,7 +241,7 @@ | |||
241 | </para> | 241 | </para> |
242 | <para> | 242 | <para> |
243 | The third type is a semaphore | 243 | The third type is a semaphore |
244 | (<filename class="headerfile">include/asm/semaphore.h</filename>): it | 244 | (<filename class="headerfile">include/linux/semaphore.h</filename>): it |
245 | can have more than one holder at any time (the number decided at | 245 | can have more than one holder at any time (the number decided at |
246 | initialization time), although it is most commonly used as a | 246 | initialization time), although it is most commonly used as a |
247 | single-holder lock (a mutex). If you can't get a semaphore, your | 247 | single-holder lock (a mutex). If you can't get a semaphore, your |
@@ -290,7 +290,7 @@ | |||
290 | <para> | 290 | <para> |
291 | If you have a data structure which is only ever accessed from | 291 | If you have a data structure which is only ever accessed from |
292 | user context, then you can use a simple semaphore | 292 | user context, then you can use a simple semaphore |
293 | (<filename>linux/asm/semaphore.h</filename>) to protect it. This | 293 | (<filename>linux/linux/semaphore.h</filename>) to protect it. This |
294 | is the most trivial case: you initialize the semaphore to the number | 294 | is the most trivial case: you initialize the semaphore to the number |
295 | of resources available (usually 1), and call | 295 | of resources available (usually 1), and call |
296 | <function>down_interruptible()</function> to grab the semaphore, and | 296 | <function>down_interruptible()</function> to grab the semaphore, and |
@@ -1656,7 +1656,7 @@ the amount of locking which needs to be done. | |||
1656 | #include <linux/slab.h> | 1656 | #include <linux/slab.h> |
1657 | #include <linux/string.h> | 1657 | #include <linux/string.h> |
1658 | +#include <linux/rcupdate.h> | 1658 | +#include <linux/rcupdate.h> |
1659 | #include <asm/semaphore.h> | 1659 | #include <linux/semaphore.h> |
1660 | #include <asm/errno.h> | 1660 | #include <asm/errno.h> |
1661 | 1661 | ||
1662 | struct object | 1662 | struct object |
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index ad2bb3b3acc1..aa854b9b18cd 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt | |||
@@ -8,6 +8,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. | |||
8 | Modified by Paul Jackson <pj@sgi.com> | 8 | Modified by Paul Jackson <pj@sgi.com> |
9 | Modified by Christoph Lameter <clameter@sgi.com> | 9 | Modified by Christoph Lameter <clameter@sgi.com> |
10 | Modified by Paul Menage <menage@google.com> | 10 | Modified by Paul Menage <menage@google.com> |
11 | Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | ||
11 | 12 | ||
12 | CONTENTS: | 13 | CONTENTS: |
13 | ========= | 14 | ========= |
@@ -20,7 +21,8 @@ CONTENTS: | |||
20 | 1.5 What is memory_pressure ? | 21 | 1.5 What is memory_pressure ? |
21 | 1.6 What is memory spread ? | 22 | 1.6 What is memory spread ? |
22 | 1.7 What is sched_load_balance ? | 23 | 1.7 What is sched_load_balance ? |
23 | 1.8 How do I use cpusets ? | 24 | 1.8 What is sched_relax_domain_level ? |
25 | 1.9 How do I use cpusets ? | ||
24 | 2. Usage Examples and Syntax | 26 | 2. Usage Examples and Syntax |
25 | 2.1 Basic Usage | 27 | 2.1 Basic Usage |
26 | 2.2 Adding/removing cpus | 28 | 2.2 Adding/removing cpus |
@@ -497,7 +499,73 @@ the cpuset code to update these sched domains, it compares the new | |||
497 | partition requested with the current, and updates its sched domains, | 499 | partition requested with the current, and updates its sched domains, |
498 | removing the old and adding the new, for each change. | 500 | removing the old and adding the new, for each change. |
499 | 501 | ||
500 | 1.8 How do I use cpusets ? | 502 | |
503 | 1.8 What is sched_relax_domain_level ? | ||
504 | -------------------------------------- | ||
505 | |||
506 | In sched domain, the scheduler migrates tasks in 2 ways; periodic load | ||
507 | balance on tick, and at time of some schedule events. | ||
508 | |||
509 | When a task is woken up, scheduler try to move the task on idle CPU. | ||
510 | For example, if a task A running on CPU X activates another task B | ||
511 | on the same CPU X, and if CPU Y is X's sibling and performing idle, | ||
512 | then scheduler migrate task B to CPU Y so that task B can start on | ||
513 | CPU Y without waiting task A on CPU X. | ||
514 | |||
515 | And if a CPU run out of tasks in its runqueue, the CPU try to pull | ||
516 | extra tasks from other busy CPUs to help them before it is going to | ||
517 | be idle. | ||
518 | |||
519 | Of course it takes some searching cost to find movable tasks and/or | ||
520 | idle CPUs, the scheduler might not search all CPUs in the domain | ||
521 | everytime. In fact, in some architectures, the searching ranges on | ||
522 | events are limited in the same socket or node where the CPU locates, | ||
523 | while the load balance on tick searchs all. | ||
524 | |||
525 | For example, assume CPU Z is relatively far from CPU X. Even if CPU Z | ||
526 | is idle while CPU X and the siblings are busy, scheduler can't migrate | ||
527 | woken task B from X to Z since it is out of its searching range. | ||
528 | As the result, task B on CPU X need to wait task A or wait load balance | ||
529 | on the next tick. For some applications in special situation, waiting | ||
530 | 1 tick may be too long. | ||
531 | |||
532 | The 'sched_relax_domain_level' file allows you to request changing | ||
533 | this searching range as you like. This file takes int value which | ||
534 | indicates size of searching range in levels ideally as follows, | ||
535 | otherwise initial value -1 that indicates the cpuset has no request. | ||
536 | |||
537 | -1 : no request. use system default or follow request of others. | ||
538 | 0 : no search. | ||
539 | 1 : search siblings (hyperthreads in a core). | ||
540 | 2 : search cores in a package. | ||
541 | 3 : search cpus in a node [= system wide on non-NUMA system] | ||
542 | ( 4 : search nodes in a chunk of node [on NUMA system] ) | ||
543 | ( 5~ : search system wide [on NUMA system]) | ||
544 | |||
545 | This file is per-cpuset and affect the sched domain where the cpuset | ||
546 | belongs to. Therefore if the flag 'sched_load_balance' of a cpuset | ||
547 | is disabled, then 'sched_relax_domain_level' have no effect since | ||
548 | there is no sched domain belonging the cpuset. | ||
549 | |||
550 | If multiple cpusets are overlapping and hence they form a single sched | ||
551 | domain, the largest value among those is used. Be careful, if one | ||
552 | requests 0 and others are -1 then 0 is used. | ||
553 | |||
554 | Note that modifying this file will have both good and bad effects, | ||
555 | and whether it is acceptable or not will be depend on your situation. | ||
556 | Don't modify this file if you are not sure. | ||
557 | |||
558 | If your situation is: | ||
559 | - The migration costs between each cpu can be assumed considerably | ||
560 | small(for you) due to your special application's behavior or | ||
561 | special hardware support for CPU cache etc. | ||
562 | - The searching cost doesn't have impact(for you) or you can make | ||
563 | the searching cost enough small by managing cpuset to compact etc. | ||
564 | - The latency is required even it sacrifices cache hit rate etc. | ||
565 | then increasing 'sched_relax_domain_level' would benefit you. | ||
566 | |||
567 | |||
568 | 1.9 How do I use cpusets ? | ||
501 | -------------------------- | 569 | -------------------------- |
502 | 570 | ||
503 | In order to minimize the impact of cpusets on critical kernel | 571 | In order to minimize the impact of cpusets on critical kernel |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index af0e9393bf68..b45ea28abc99 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -282,6 +282,13 @@ Why: Not used in-tree. The current out-of-tree users used it to | |||
282 | out-of-tree driver. | 282 | out-of-tree driver. |
283 | Who: Thomas Gleixner <tglx@linutronix.de> | 283 | Who: Thomas Gleixner <tglx@linutronix.de> |
284 | 284 | ||
285 | ---------------------------- | ||
286 | |||
287 | What: usedac i386 kernel parameter | ||
288 | When: 2.6.27 | ||
289 | Why: replaced by allowdac and no dac combination | ||
290 | Who: Glauber Costa <gcosta@redhat.com> | ||
291 | |||
285 | --------------------------- | 292 | --------------------------- |
286 | 293 | ||
287 | What: /sys/o2cb symlink | 294 | What: /sys/o2cb symlink |
@@ -291,3 +298,11 @@ Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb | |||
291 | ocfs2-tools. 2 years should be sufficient time to phase in new versions | 298 | ocfs2-tools. 2 years should be sufficient time to phase in new versions |
292 | which know to look in /sys/fs/o2cb. | 299 | which know to look in /sys/fs/o2cb. |
293 | Who: ocfs2-devel@oss.oracle.com | 300 | Who: ocfs2-devel@oss.oracle.com |
301 | |||
302 | --------------------------- | ||
303 | |||
304 | What: asm/semaphore.h | ||
305 | When: 2.6.26 | ||
306 | Why: Implementation became generic; users should now include | ||
307 | linux/semaphore.h instead. | ||
308 | Who: Matthew Wilcox <willy@linux.intel.com> | ||
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index 4598ef7b622b..7f27b8f840d0 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt | |||
@@ -176,8 +176,10 @@ implementations: | |||
176 | Recall that an attribute should only be exporting one value, or an | 176 | Recall that an attribute should only be exporting one value, or an |
177 | array of similar values, so this shouldn't be that expensive. | 177 | array of similar values, so this shouldn't be that expensive. |
178 | 178 | ||
179 | This allows userspace to do partial reads and seeks arbitrarily over | 179 | This allows userspace to do partial reads and forward seeks |
180 | the entire file at will. | 180 | arbitrarily over the entire file at will. If userspace seeks back to |
181 | zero or does a pread(2) with an offset of '0' the show() method will | ||
182 | be called again, rearmed, to fill the buffer. | ||
181 | 183 | ||
182 | - On write(2), sysfs expects the entire buffer to be passed during the | 184 | - On write(2), sysfs expects the entire buffer to be passed during the |
183 | first write. Sysfs then passes the entire buffer to the store() | 185 | first write. Sysfs then passes the entire buffer to the store() |
@@ -192,6 +194,9 @@ implementations: | |||
192 | 194 | ||
193 | Other notes: | 195 | Other notes: |
194 | 196 | ||
197 | - Writing causes the show() method to be rearmed regardless of current | ||
198 | file position. | ||
199 | |||
195 | - The buffer will always be PAGE_SIZE bytes in length. On i386, this | 200 | - The buffer will always be PAGE_SIZE bytes in length. On i386, this |
196 | is 4096. | 201 | is 4096. |
197 | 202 | ||
diff --git a/Documentation/firmware_class/firmware_sample_driver.c b/Documentation/firmware_class/firmware_sample_driver.c deleted file mode 100644 index 6865cbe075ec..000000000000 --- a/Documentation/firmware_class/firmware_sample_driver.c +++ /dev/null | |||
@@ -1,115 +0,0 @@ | |||
1 | /* | ||
2 | * firmware_sample_driver.c - | ||
3 | * | ||
4 | * Copyright (c) 2003 Manuel Estrada Sainz | ||
5 | * | ||
6 | * Sample code on how to use request_firmware() from drivers. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/device.h> | ||
14 | #include <linux/string.h> | ||
15 | |||
16 | #include "linux/firmware.h" | ||
17 | |||
18 | static struct device ghost_device = { | ||
19 | .bus_id = "ghost0", | ||
20 | }; | ||
21 | |||
22 | |||
23 | static void sample_firmware_load(char *firmware, int size) | ||
24 | { | ||
25 | u8 buf[size+1]; | ||
26 | memcpy(buf, firmware, size); | ||
27 | buf[size] = '\0'; | ||
28 | printk(KERN_INFO "firmware_sample_driver: firmware: %s\n", buf); | ||
29 | } | ||
30 | |||
31 | static void sample_probe_default(void) | ||
32 | { | ||
33 | /* uses the default method to get the firmware */ | ||
34 | const struct firmware *fw_entry; | ||
35 | printk(KERN_INFO "firmware_sample_driver: a ghost device got inserted :)\n"); | ||
36 | |||
37 | if(request_firmware(&fw_entry, "sample_driver_fw", &ghost_device)!=0) | ||
38 | { | ||
39 | printk(KERN_ERR | ||
40 | "firmware_sample_driver: Firmware not available\n"); | ||
41 | return; | ||
42 | } | ||
43 | |||
44 | sample_firmware_load(fw_entry->data, fw_entry->size); | ||
45 | |||
46 | release_firmware(fw_entry); | ||
47 | |||
48 | /* finish setting up the device */ | ||
49 | } | ||
50 | static void sample_probe_specific(void) | ||
51 | { | ||
52 | /* Uses some specific hotplug support to get the firmware from | ||
53 | * userspace directly into the hardware, or via some sysfs file */ | ||
54 | |||
55 | /* NOTE: This currently doesn't work */ | ||
56 | |||
57 | printk(KERN_INFO "firmware_sample_driver: a ghost device got inserted :)\n"); | ||
58 | |||
59 | if(request_firmware(NULL, "sample_driver_fw", &ghost_device)!=0) | ||
60 | { | ||
61 | printk(KERN_ERR | ||
62 | "firmware_sample_driver: Firmware load failed\n"); | ||
63 | return; | ||
64 | } | ||
65 | |||
66 | /* request_firmware blocks until userspace finished, so at | ||
67 | * this point the firmware should be already in the device */ | ||
68 | |||
69 | /* finish setting up the device */ | ||
70 | } | ||
71 | static void sample_probe_async_cont(const struct firmware *fw, void *context) | ||
72 | { | ||
73 | if(!fw){ | ||
74 | printk(KERN_ERR | ||
75 | "firmware_sample_driver: firmware load failed\n"); | ||
76 | return; | ||
77 | } | ||
78 | |||
79 | printk(KERN_INFO "firmware_sample_driver: device pointer \"%s\"\n", | ||
80 | (char *)context); | ||
81 | sample_firmware_load(fw->data, fw->size); | ||
82 | } | ||
83 | static void sample_probe_async(void) | ||
84 | { | ||
85 | /* Let's say that I can't sleep */ | ||
86 | int error; | ||
87 | error = request_firmware_nowait (THIS_MODULE, FW_ACTION_NOHOTPLUG, | ||
88 | "sample_driver_fw", &ghost_device, | ||
89 | "my device pointer", | ||
90 | sample_probe_async_cont); | ||
91 | if(error){ | ||
92 | printk(KERN_ERR | ||
93 | "firmware_sample_driver:" | ||
94 | " request_firmware_nowait failed\n"); | ||
95 | } | ||
96 | } | ||
97 | |||
98 | static int sample_init(void) | ||
99 | { | ||
100 | device_initialize(&ghost_device); | ||
101 | /* since there is no real hardware insertion I just call the | ||
102 | * sample probe functions here */ | ||
103 | sample_probe_specific(); | ||
104 | sample_probe_default(); | ||
105 | sample_probe_async(); | ||
106 | return 0; | ||
107 | } | ||
108 | static void __exit sample_exit(void) | ||
109 | { | ||
110 | } | ||
111 | |||
112 | module_init (sample_init); | ||
113 | module_exit (sample_exit); | ||
114 | |||
115 | MODULE_LICENSE("GPL"); | ||
diff --git a/Documentation/firmware_class/firmware_sample_firmware_class.c b/Documentation/firmware_class/firmware_sample_firmware_class.c deleted file mode 100644 index 2de62854f0e5..000000000000 --- a/Documentation/firmware_class/firmware_sample_firmware_class.c +++ /dev/null | |||
@@ -1,207 +0,0 @@ | |||
1 | /* | ||
2 | * firmware_sample_firmware_class.c - | ||
3 | * | ||
4 | * Copyright (c) 2003 Manuel Estrada Sainz | ||
5 | * | ||
6 | * NOTE: This is just a probe of concept, if you think that your driver would | ||
7 | * be well served by this mechanism please contact me first. | ||
8 | * | ||
9 | * DON'T USE THIS CODE AS IS | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/device.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/timer.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/firmware.h> | ||
20 | |||
21 | |||
22 | MODULE_AUTHOR("Manuel Estrada Sainz"); | ||
23 | MODULE_DESCRIPTION("Hackish sample for using firmware class directly"); | ||
24 | MODULE_LICENSE("GPL"); | ||
25 | |||
26 | static inline struct class_device *to_class_dev(struct kobject *obj) | ||
27 | { | ||
28 | return container_of(obj,struct class_device,kobj); | ||
29 | } | ||
30 | static inline | ||
31 | struct class_device_attribute *to_class_dev_attr(struct attribute *_attr) | ||
32 | { | ||
33 | return container_of(_attr,struct class_device_attribute,attr); | ||
34 | } | ||
35 | |||
36 | int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr); | ||
37 | int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr); | ||
38 | |||
39 | struct firmware_priv { | ||
40 | char fw_id[FIRMWARE_NAME_MAX]; | ||
41 | s32 loading:2; | ||
42 | u32 abort:1; | ||
43 | }; | ||
44 | |||
45 | extern struct class firmware_class; | ||
46 | |||
47 | static ssize_t firmware_loading_show(struct class_device *class_dev, char *buf) | ||
48 | { | ||
49 | struct firmware_priv *fw_priv = class_get_devdata(class_dev); | ||
50 | return sprintf(buf, "%d\n", fw_priv->loading); | ||
51 | } | ||
52 | static ssize_t firmware_loading_store(struct class_device *class_dev, | ||
53 | const char *buf, size_t count) | ||
54 | { | ||
55 | struct firmware_priv *fw_priv = class_get_devdata(class_dev); | ||
56 | int prev_loading = fw_priv->loading; | ||
57 | |||
58 | fw_priv->loading = simple_strtol(buf, NULL, 10); | ||
59 | |||
60 | switch(fw_priv->loading){ | ||
61 | case -1: | ||
62 | /* abort load an panic */ | ||
63 | break; | ||
64 | case 1: | ||
65 | /* setup load */ | ||
66 | break; | ||
67 | case 0: | ||
68 | if(prev_loading==1){ | ||
69 | /* finish load and get the device back to working | ||
70 | * state */ | ||
71 | } | ||
72 | break; | ||
73 | } | ||
74 | |||
75 | return count; | ||
76 | } | ||
77 | static CLASS_DEVICE_ATTR(loading, 0644, | ||
78 | firmware_loading_show, firmware_loading_store); | ||
79 | |||
80 | static ssize_t firmware_data_read(struct kobject *kobj, | ||
81 | struct bin_attribute *bin_attr, | ||
82 | char *buffer, loff_t offset, size_t count) | ||
83 | { | ||
84 | struct class_device *class_dev = to_class_dev(kobj); | ||
85 | struct firmware_priv *fw_priv = class_get_devdata(class_dev); | ||
86 | |||
87 | /* read from the devices firmware memory */ | ||
88 | |||
89 | return count; | ||
90 | } | ||
91 | static ssize_t firmware_data_write(struct kobject *kobj, | ||
92 | struct bin_attribute *bin_attr, | ||
93 | char *buffer, loff_t offset, size_t count) | ||
94 | { | ||
95 | struct class_device *class_dev = to_class_dev(kobj); | ||
96 | struct firmware_priv *fw_priv = class_get_devdata(class_dev); | ||
97 | |||
98 | /* write to the devices firmware memory */ | ||
99 | |||
100 | return count; | ||
101 | } | ||
102 | static struct bin_attribute firmware_attr_data = { | ||
103 | .attr = {.name = "data", .mode = 0644}, | ||
104 | .size = 0, | ||
105 | .read = firmware_data_read, | ||
106 | .write = firmware_data_write, | ||
107 | }; | ||
108 | static int fw_setup_class_device(struct class_device *class_dev, | ||
109 | const char *fw_name, | ||
110 | struct device *device) | ||
111 | { | ||
112 | int retval; | ||
113 | struct firmware_priv *fw_priv; | ||
114 | |||
115 | fw_priv = kzalloc(sizeof(struct firmware_priv), GFP_KERNEL); | ||
116 | if (!fw_priv) { | ||
117 | retval = -ENOMEM; | ||
118 | goto out; | ||
119 | } | ||
120 | |||
121 | memset(class_dev, 0, sizeof(*class_dev)); | ||
122 | |||
123 | strncpy(fw_priv->fw_id, fw_name, FIRMWARE_NAME_MAX); | ||
124 | fw_priv->fw_id[FIRMWARE_NAME_MAX-1] = '\0'; | ||
125 | |||
126 | strncpy(class_dev->class_id, device->bus_id, BUS_ID_SIZE); | ||
127 | class_dev->class_id[BUS_ID_SIZE-1] = '\0'; | ||
128 | class_dev->dev = device; | ||
129 | |||
130 | class_dev->class = &firmware_class, | ||
131 | class_set_devdata(class_dev, fw_priv); | ||
132 | retval = class_device_register(class_dev); | ||
133 | if (retval){ | ||
134 | printk(KERN_ERR "%s: class_device_register failed\n", | ||
135 | __FUNCTION__); | ||
136 | goto error_free_fw_priv; | ||
137 | } | ||
138 | |||
139 | retval = sysfs_create_bin_file(&class_dev->kobj, &firmware_attr_data); | ||
140 | if (retval){ | ||
141 | printk(KERN_ERR "%s: sysfs_create_bin_file failed\n", | ||
142 | __FUNCTION__); | ||
143 | goto error_unreg_class_dev; | ||
144 | } | ||
145 | |||
146 | retval = class_device_create_file(class_dev, | ||
147 | &class_device_attr_loading); | ||
148 | if (retval){ | ||
149 | printk(KERN_ERR "%s: class_device_create_file failed\n", | ||
150 | __FUNCTION__); | ||
151 | goto error_remove_data; | ||
152 | } | ||
153 | |||
154 | goto out; | ||
155 | |||
156 | error_remove_data: | ||
157 | sysfs_remove_bin_file(&class_dev->kobj, &firmware_attr_data); | ||
158 | error_unreg_class_dev: | ||
159 | class_device_unregister(class_dev); | ||
160 | error_free_fw_priv: | ||
161 | kfree(fw_priv); | ||
162 | out: | ||
163 | return retval; | ||
164 | } | ||
165 | static void fw_remove_class_device(struct class_device *class_dev) | ||
166 | { | ||
167 | struct firmware_priv *fw_priv = class_get_devdata(class_dev); | ||
168 | |||
169 | class_device_remove_file(class_dev, &class_device_attr_loading); | ||
170 | sysfs_remove_bin_file(&class_dev->kobj, &firmware_attr_data); | ||
171 | class_device_unregister(class_dev); | ||
172 | } | ||
173 | |||
174 | static struct class_device *class_dev; | ||
175 | |||
176 | static struct device my_device = { | ||
177 | .bus_id = "my_dev0", | ||
178 | }; | ||
179 | |||
180 | static int __init firmware_sample_init(void) | ||
181 | { | ||
182 | int error; | ||
183 | |||
184 | device_initialize(&my_device); | ||
185 | class_dev = kmalloc(sizeof(struct class_device), GFP_KERNEL); | ||
186 | if(!class_dev) | ||
187 | return -ENOMEM; | ||
188 | |||
189 | error = fw_setup_class_device(class_dev, "my_firmware_image", | ||
190 | &my_device); | ||
191 | if(error){ | ||
192 | kfree(class_dev); | ||
193 | return error; | ||
194 | } | ||
195 | return 0; | ||
196 | |||
197 | } | ||
198 | static void __exit firmware_sample_exit(void) | ||
199 | { | ||
200 | struct firmware_priv *fw_priv = class_get_devdata(class_dev); | ||
201 | fw_remove_class_device(class_dev); | ||
202 | kfree(fw_priv); | ||
203 | kfree(class_dev); | ||
204 | } | ||
205 | module_init(firmware_sample_init); | ||
206 | module_exit(firmware_sample_exit); | ||
207 | |||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f7492cd10093..bf6303ec0bde 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -958,6 +958,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
958 | 958 | ||
959 | l2cr= [PPC] | 959 | l2cr= [PPC] |
960 | 960 | ||
961 | l3cr= [PPC] | ||
962 | |||
961 | lapic [X86-32,APIC] Enable the local APIC even if BIOS | 963 | lapic [X86-32,APIC] Enable the local APIC even if BIOS |
962 | disabled it. | 964 | disabled it. |
963 | 965 | ||
@@ -1284,8 +1286,16 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1284 | noexec [IA-64] | 1286 | noexec [IA-64] |
1285 | 1287 | ||
1286 | noexec [X86-32,X86-64] | 1288 | noexec [X86-32,X86-64] |
1289 | On X86-32 available only on PAE configured kernels. | ||
1287 | noexec=on: enable non-executable mappings (default) | 1290 | noexec=on: enable non-executable mappings (default) |
1288 | noexec=off: disable nn-executable mappings | 1291 | noexec=off: disable non-executable mappings |
1292 | |||
1293 | noexec32 [X86-64] | ||
1294 | This affects only 32-bit executables. | ||
1295 | noexec32=on: enable non-executable mappings (default) | ||
1296 | read doesn't imply executable mappings | ||
1297 | noexec32=off: disable non-executable mappings | ||
1298 | read implies executable mappings | ||
1289 | 1299 | ||
1290 | nofxsr [BUGS=X86-32] Disables x86 floating point extended | 1300 | nofxsr [BUGS=X86-32] Disables x86 floating point extended |
1291 | register save and restore. The kernel will only save | 1301 | register save and restore. The kernel will only save |
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 461e4f1dbec4..421e7d00ffd0 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt | |||
@@ -196,6 +196,11 @@ its parent; and can't be removed or suspended after that parent. | |||
196 | 196 | ||
197 | The policy is that the device tree should match hardware bus topology. | 197 | The policy is that the device tree should match hardware bus topology. |
198 | (Or at least the control bus, for devices which use multiple busses.) | 198 | (Or at least the control bus, for devices which use multiple busses.) |
199 | In particular, this means that a device registration may fail if the parent of | ||
200 | the device is suspending (ie. has been chosen by the PM core as the next | ||
201 | device to suspend) or has already suspended, as well as after all of the other | ||
202 | devices have been suspended. Device drivers must be prepared to cope with such | ||
203 | situations. | ||
199 | 204 | ||
200 | 205 | ||
201 | Suspending Devices | 206 | Suspending Devices |
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 7b4e8a70882c..4cc780024e6c 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt | |||
@@ -59,12 +59,39 @@ Table of Contents | |||
59 | p) Freescale Synchronous Serial Interface | 59 | p) Freescale Synchronous Serial Interface |
60 | q) USB EHCI controllers | 60 | q) USB EHCI controllers |
61 | 61 | ||
62 | VII - Specifying interrupt information for devices | 62 | VII - Marvell Discovery mv64[345]6x System Controller chips |
63 | 1) The /system-controller node | ||
64 | 2) Child nodes of /system-controller | ||
65 | a) Marvell Discovery MDIO bus | ||
66 | b) Marvell Discovery ethernet controller | ||
67 | c) Marvell Discovery PHY nodes | ||
68 | d) Marvell Discovery SDMA nodes | ||
69 | e) Marvell Discovery BRG nodes | ||
70 | f) Marvell Discovery CUNIT nodes | ||
71 | g) Marvell Discovery MPSCROUTING nodes | ||
72 | h) Marvell Discovery MPSCINTR nodes | ||
73 | i) Marvell Discovery MPSC nodes | ||
74 | j) Marvell Discovery Watch Dog Timer nodes | ||
75 | k) Marvell Discovery I2C nodes | ||
76 | l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes | ||
77 | m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes | ||
78 | n) Marvell Discovery GPP (General Purpose Pins) nodes | ||
79 | o) Marvell Discovery PCI host bridge node | ||
80 | p) Marvell Discovery CPU Error nodes | ||
81 | q) Marvell Discovery SRAM Controller nodes | ||
82 | r) Marvell Discovery PCI Error Handler nodes | ||
83 | s) Marvell Discovery Memory Controller nodes | ||
84 | |||
85 | VIII - Specifying interrupt information for devices | ||
63 | 1) interrupts property | 86 | 1) interrupts property |
64 | 2) interrupt-parent property | 87 | 2) interrupt-parent property |
65 | 3) OpenPIC Interrupt Controllers | 88 | 3) OpenPIC Interrupt Controllers |
66 | 4) ISA Interrupt Controllers | 89 | 4) ISA Interrupt Controllers |
67 | 90 | ||
91 | VIII - Specifying GPIO information for devices | ||
92 | 1) gpios property | ||
93 | 2) gpio-controller nodes | ||
94 | |||
68 | Appendix A - Sample SOC node for MPC8540 | 95 | Appendix A - Sample SOC node for MPC8540 |
69 | 96 | ||
70 | 97 | ||
@@ -1269,10 +1296,6 @@ platforms are moved over to use the flattened-device-tree model. | |||
1269 | 1296 | ||
1270 | Recommended properties: | 1297 | Recommended properties: |
1271 | 1298 | ||
1272 | - linux,network-index : This is the intended "index" of this | ||
1273 | network device. This is used by the bootwrapper to interpret | ||
1274 | MAC addresses passed by the firmware when no information other | ||
1275 | than indices is available to associate an address with a device. | ||
1276 | - phy-connection-type : a string naming the controller/PHY interface type, | 1299 | - phy-connection-type : a string naming the controller/PHY interface type, |
1277 | i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii", | 1300 | i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii", |
1278 | "tbi", or "rtbi". This property is only really needed if the connection | 1301 | "tbi", or "rtbi". This property is only really needed if the connection |
@@ -1622,8 +1645,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1622 | - device_type : should be "network", "hldc", "uart", "transparent" | 1645 | - device_type : should be "network", "hldc", "uart", "transparent" |
1623 | "bisync", "atm", or "serial". | 1646 | "bisync", "atm", or "serial". |
1624 | - compatible : could be "ucc_geth" or "fsl_atm" and so on. | 1647 | - compatible : could be "ucc_geth" or "fsl_atm" and so on. |
1625 | - model : should be "UCC". | 1648 | - cell-index : the ucc number(1-8), corresponding to UCCx in UM. |
1626 | - device-id : the ucc number(1-8), corresponding to UCCx in UM. | ||
1627 | - reg : Offset and length of the register set for the device | 1649 | - reg : Offset and length of the register set for the device |
1628 | - interrupts : <a b> where a is the interrupt number and b is a | 1650 | - interrupts : <a b> where a is the interrupt number and b is a |
1629 | field that represents an encoding of the sense and level | 1651 | field that represents an encoding of the sense and level |
@@ -1667,10 +1689,6 @@ platforms are moved over to use the flattened-device-tree model. | |||
1667 | - phy-handle : The phandle for the PHY connected to this controller. | 1689 | - phy-handle : The phandle for the PHY connected to this controller. |
1668 | 1690 | ||
1669 | Recommended properties: | 1691 | Recommended properties: |
1670 | - linux,network-index : This is the intended "index" of this | ||
1671 | network device. This is used by the bootwrapper to interpret | ||
1672 | MAC addresses passed by the firmware when no information other | ||
1673 | than indices is available to associate an address with a device. | ||
1674 | - phy-connection-type : a string naming the controller/PHY interface type, | 1692 | - phy-connection-type : a string naming the controller/PHY interface type, |
1675 | i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal | 1693 | i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal |
1676 | Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only), | 1694 | Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only), |
@@ -1680,8 +1698,7 @@ platforms are moved over to use the flattened-device-tree model. | |||
1680 | ucc@2000 { | 1698 | ucc@2000 { |
1681 | device_type = "network"; | 1699 | device_type = "network"; |
1682 | compatible = "ucc_geth"; | 1700 | compatible = "ucc_geth"; |
1683 | model = "UCC"; | 1701 | cell-index = <1>; |
1684 | device-id = <1>; | ||
1685 | reg = <2000 200>; | 1702 | reg = <2000 200>; |
1686 | interrupts = <a0 0>; | 1703 | interrupts = <a0 0>; |
1687 | interrupt-parent = <700>; | 1704 | interrupt-parent = <700>; |
@@ -1995,7 +2012,6 @@ platforms are moved over to use the flattened-device-tree model. | |||
1995 | interrupts = <20 8>; | 2012 | interrupts = <20 8>; |
1996 | interrupt-parent = <&PIC>; | 2013 | interrupt-parent = <&PIC>; |
1997 | phy-handle = <&PHY0>; | 2014 | phy-handle = <&PHY0>; |
1998 | linux,network-index = <0>; | ||
1999 | fsl,cpm-command = <12000300>; | 2015 | fsl,cpm-command = <12000300>; |
2000 | }; | 2016 | }; |
2001 | 2017 | ||
@@ -2217,12 +2233,6 @@ platforms are moved over to use the flattened-device-tree model. | |||
2217 | EMAC, that is the content of the current (bogus) "phy-port" | 2233 | EMAC, that is the content of the current (bogus) "phy-port" |
2218 | property. | 2234 | property. |
2219 | 2235 | ||
2220 | Recommended properties: | ||
2221 | - linux,network-index : This is the intended "index" of this | ||
2222 | network device. This is used by the bootwrapper to interpret | ||
2223 | MAC addresses passed by the firmware when no information other | ||
2224 | than indices is available to associate an address with a device. | ||
2225 | |||
2226 | Optional properties: | 2236 | Optional properties: |
2227 | - phy-address : 1 cell, optional, MDIO address of the PHY. If absent, | 2237 | - phy-address : 1 cell, optional, MDIO address of the PHY. If absent, |
2228 | a search is performed. | 2238 | a search is performed. |
@@ -2246,7 +2256,6 @@ platforms are moved over to use the flattened-device-tree model. | |||
2246 | Example: | 2256 | Example: |
2247 | 2257 | ||
2248 | EMAC0: ethernet@40000800 { | 2258 | EMAC0: ethernet@40000800 { |
2249 | linux,network-index = <0>; | ||
2250 | device_type = "network"; | 2259 | device_type = "network"; |
2251 | compatible = "ibm,emac-440gp", "ibm,emac"; | 2260 | compatible = "ibm,emac-440gp", "ibm,emac"; |
2252 | interrupt-parent = <&UIC1>; | 2261 | interrupt-parent = <&UIC1>; |
@@ -2817,9 +2826,528 @@ platforms are moved over to use the flattened-device-tree model. | |||
2817 | }; | 2826 | }; |
2818 | 2827 | ||
2819 | 2828 | ||
2820 | More devices will be defined as this spec matures. | 2829 | VII - Marvell Discovery mv64[345]6x System Controller chips |
2830 | =========================================================== | ||
2831 | |||
2832 | The Marvell mv64[345]60 series of system controller chips contain | ||
2833 | many of the peripherals needed to implement a complete computer | ||
2834 | system. In this section, we define device tree nodes to describe | ||
2835 | the system controller chip itself and each of the peripherals | ||
2836 | which it contains. Compatible string values for each node are | ||
2837 | prefixed with the string "marvell,", for Marvell Technology Group Ltd. | ||
2838 | |||
2839 | 1) The /system-controller node | ||
2840 | |||
2841 | This node is used to represent the system-controller and must be | ||
2842 | present when the system uses a system contller chip. The top-level | ||
2843 | system-controller node contains information that is global to all | ||
2844 | devices within the system controller chip. The node name begins | ||
2845 | with "system-controller" followed by the unit address, which is | ||
2846 | the base address of the memory-mapped register set for the system | ||
2847 | controller chip. | ||
2848 | |||
2849 | Required properties: | ||
2850 | |||
2851 | - ranges : Describes the translation of system controller addresses | ||
2852 | for memory mapped registers. | ||
2853 | - clock-frequency: Contains the main clock frequency for the system | ||
2854 | controller chip. | ||
2855 | - reg : This property defines the address and size of the | ||
2856 | memory-mapped registers contained within the system controller | ||
2857 | chip. The address specified in the "reg" property should match | ||
2858 | the unit address of the system-controller node. | ||
2859 | - #address-cells : Address representation for system controller | ||
2860 | devices. This field represents the number of cells needed to | ||
2861 | represent the address of the memory-mapped registers of devices | ||
2862 | within the system controller chip. | ||
2863 | - #size-cells : Size representation for for the memory-mapped | ||
2864 | registers within the system controller chip. | ||
2865 | - #interrupt-cells : Defines the width of cells used to represent | ||
2866 | interrupts. | ||
2867 | |||
2868 | Optional properties: | ||
2869 | |||
2870 | - model : The specific model of the system controller chip. Such | ||
2871 | as, "mv64360", "mv64460", or "mv64560". | ||
2872 | - compatible : A string identifying the compatibility identifiers | ||
2873 | of the system controller chip. | ||
2874 | |||
2875 | The system-controller node contains child nodes for each system | ||
2876 | controller device that the platform uses. Nodes should not be created | ||
2877 | for devices which exist on the system controller chip but are not used | ||
2878 | |||
2879 | Example Marvell Discovery mv64360 system-controller node: | ||
2880 | |||
2881 | system-controller@f1000000 { /* Marvell Discovery mv64360 */ | ||
2882 | #address-cells = <1>; | ||
2883 | #size-cells = <1>; | ||
2884 | model = "mv64360"; /* Default */ | ||
2885 | compatible = "marvell,mv64360"; | ||
2886 | clock-frequency = <133333333>; | ||
2887 | reg = <0xf1000000 0x10000>; | ||
2888 | virtual-reg = <0xf1000000>; | ||
2889 | ranges = <0x88000000 0x88000000 0x1000000 /* PCI 0 I/O Space */ | ||
2890 | 0x80000000 0x80000000 0x8000000 /* PCI 0 MEM Space */ | ||
2891 | 0xa0000000 0xa0000000 0x4000000 /* User FLASH */ | ||
2892 | 0x00000000 0xf1000000 0x0010000 /* Bridge's regs */ | ||
2893 | 0xf2000000 0xf2000000 0x0040000>;/* Integrated SRAM */ | ||
2894 | |||
2895 | [ child node definitions... ] | ||
2896 | } | ||
2897 | |||
2898 | 2) Child nodes of /system-controller | ||
2899 | |||
2900 | a) Marvell Discovery MDIO bus | ||
2901 | |||
2902 | The MDIO is a bus to which the PHY devices are connected. For each | ||
2903 | device that exists on this bus, a child node should be created. See | ||
2904 | the definition of the PHY node below for an example of how to define | ||
2905 | a PHY. | ||
2906 | |||
2907 | Required properties: | ||
2908 | - #address-cells : Should be <1> | ||
2909 | - #size-cells : Should be <0> | ||
2910 | - device_type : Should be "mdio" | ||
2911 | - compatible : Should be "marvell,mv64360-mdio" | ||
2912 | |||
2913 | Example: | ||
2914 | |||
2915 | mdio { | ||
2916 | #address-cells = <1>; | ||
2917 | #size-cells = <0>; | ||
2918 | device_type = "mdio"; | ||
2919 | compatible = "marvell,mv64360-mdio"; | ||
2920 | |||
2921 | ethernet-phy@0 { | ||
2922 | ...... | ||
2923 | }; | ||
2924 | }; | ||
2925 | |||
2926 | |||
2927 | b) Marvell Discovery ethernet controller | ||
2928 | |||
2929 | The Discover ethernet controller is described with two levels | ||
2930 | of nodes. The first level describes an ethernet silicon block | ||
2931 | and the second level describes up to 3 ethernet nodes within | ||
2932 | that block. The reason for the multiple levels is that the | ||
2933 | registers for the node are interleaved within a single set | ||
2934 | of registers. The "ethernet-block" level describes the | ||
2935 | shared register set, and the "ethernet" nodes describe ethernet | ||
2936 | port-specific properties. | ||
2937 | |||
2938 | Ethernet block node | ||
2939 | |||
2940 | Required properties: | ||
2941 | - #address-cells : <1> | ||
2942 | - #size-cells : <0> | ||
2943 | - compatible : "marvell,mv64360-eth-block" | ||
2944 | - reg : Offset and length of the register set for this block | ||
2945 | |||
2946 | Example Discovery Ethernet block node: | ||
2947 | ethernet-block@2000 { | ||
2948 | #address-cells = <1>; | ||
2949 | #size-cells = <0>; | ||
2950 | compatible = "marvell,mv64360-eth-block"; | ||
2951 | reg = <0x2000 0x2000>; | ||
2952 | ethernet@0 { | ||
2953 | ....... | ||
2954 | }; | ||
2955 | }; | ||
2956 | |||
2957 | Ethernet port node | ||
2958 | |||
2959 | Required properties: | ||
2960 | - device_type : Should be "network". | ||
2961 | - compatible : Should be "marvell,mv64360-eth". | ||
2962 | - reg : Should be <0>, <1>, or <2>, according to which registers | ||
2963 | within the silicon block the device uses. | ||
2964 | - interrupts : <a> where a is the interrupt number for the port. | ||
2965 | - interrupt-parent : the phandle for the interrupt controller | ||
2966 | that services interrupts for this device. | ||
2967 | - phy : the phandle for the PHY connected to this ethernet | ||
2968 | controller. | ||
2969 | - local-mac-address : 6 bytes, MAC address | ||
2970 | |||
2971 | Example Discovery Ethernet port node: | ||
2972 | ethernet@0 { | ||
2973 | device_type = "network"; | ||
2974 | compatible = "marvell,mv64360-eth"; | ||
2975 | reg = <0>; | ||
2976 | interrupts = <32>; | ||
2977 | interrupt-parent = <&PIC>; | ||
2978 | phy = <&PHY0>; | ||
2979 | local-mac-address = [ 00 00 00 00 00 00 ]; | ||
2980 | }; | ||
2981 | |||
2982 | |||
2983 | |||
2984 | c) Marvell Discovery PHY nodes | ||
2985 | |||
2986 | Required properties: | ||
2987 | - device_type : Should be "ethernet-phy" | ||
2988 | - interrupts : <a> where a is the interrupt number for this phy. | ||
2989 | - interrupt-parent : the phandle for the interrupt controller that | ||
2990 | services interrupts for this device. | ||
2991 | - reg : The ID number for the phy, usually a small integer | ||
2992 | |||
2993 | Example Discovery PHY node: | ||
2994 | ethernet-phy@1 { | ||
2995 | device_type = "ethernet-phy"; | ||
2996 | compatible = "broadcom,bcm5421"; | ||
2997 | interrupts = <76>; /* GPP 12 */ | ||
2998 | interrupt-parent = <&PIC>; | ||
2999 | reg = <1>; | ||
3000 | }; | ||
3001 | |||
3002 | |||
3003 | d) Marvell Discovery SDMA nodes | ||
3004 | |||
3005 | Represent DMA hardware associated with the MPSC (multiprotocol | ||
3006 | serial controllers). | ||
3007 | |||
3008 | Required properties: | ||
3009 | - compatible : "marvell,mv64360-sdma" | ||
3010 | - reg : Offset and length of the register set for this device | ||
3011 | - interrupts : <a> where a is the interrupt number for the DMA | ||
3012 | device. | ||
3013 | - interrupt-parent : the phandle for the interrupt controller | ||
3014 | that services interrupts for this device. | ||
3015 | |||
3016 | Example Discovery SDMA node: | ||
3017 | sdma@4000 { | ||
3018 | compatible = "marvell,mv64360-sdma"; | ||
3019 | reg = <0x4000 0xc18>; | ||
3020 | virtual-reg = <0xf1004000>; | ||
3021 | interrupts = <36>; | ||
3022 | interrupt-parent = <&PIC>; | ||
3023 | }; | ||
3024 | |||
3025 | |||
3026 | e) Marvell Discovery BRG nodes | ||
3027 | |||
3028 | Represent baud rate generator hardware associated with the MPSC | ||
3029 | (multiprotocol serial controllers). | ||
3030 | |||
3031 | Required properties: | ||
3032 | - compatible : "marvell,mv64360-brg" | ||
3033 | - reg : Offset and length of the register set for this device | ||
3034 | - clock-src : A value from 0 to 15 which selects the clock | ||
3035 | source for the baud rate generator. This value corresponds | ||
3036 | to the CLKS value in the BRGx configuration register. See | ||
3037 | the mv64x60 User's Manual. | ||
3038 | - clock-frequence : The frequency (in Hz) of the baud rate | ||
3039 | generator's input clock. | ||
3040 | - current-speed : The current speed setting (presumably by | ||
3041 | firmware) of the baud rate generator. | ||
3042 | |||
3043 | Example Discovery BRG node: | ||
3044 | brg@b200 { | ||
3045 | compatible = "marvell,mv64360-brg"; | ||
3046 | reg = <0xb200 0x8>; | ||
3047 | clock-src = <8>; | ||
3048 | clock-frequency = <133333333>; | ||
3049 | current-speed = <9600>; | ||
3050 | }; | ||
3051 | |||
3052 | |||
3053 | f) Marvell Discovery CUNIT nodes | ||
3054 | |||
3055 | Represent the Serial Communications Unit device hardware. | ||
3056 | |||
3057 | Required properties: | ||
3058 | - reg : Offset and length of the register set for this device | ||
3059 | |||
3060 | Example Discovery CUNIT node: | ||
3061 | cunit@f200 { | ||
3062 | reg = <0xf200 0x200>; | ||
3063 | }; | ||
3064 | |||
3065 | |||
3066 | g) Marvell Discovery MPSCROUTING nodes | ||
3067 | |||
3068 | Represent the Discovery's MPSC routing hardware | ||
3069 | |||
3070 | Required properties: | ||
3071 | - reg : Offset and length of the register set for this device | ||
3072 | |||
3073 | Example Discovery CUNIT node: | ||
3074 | mpscrouting@b500 { | ||
3075 | reg = <0xb400 0xc>; | ||
3076 | }; | ||
3077 | |||
3078 | |||
3079 | h) Marvell Discovery MPSCINTR nodes | ||
3080 | |||
3081 | Represent the Discovery's MPSC DMA interrupt hardware registers | ||
3082 | (SDMA cause and mask registers). | ||
3083 | |||
3084 | Required properties: | ||
3085 | - reg : Offset and length of the register set for this device | ||
2821 | 3086 | ||
2822 | VII - Specifying interrupt information for devices | 3087 | Example Discovery MPSCINTR node: |
3088 | mpsintr@b800 { | ||
3089 | reg = <0xb800 0x100>; | ||
3090 | }; | ||
3091 | |||
3092 | |||
3093 | i) Marvell Discovery MPSC nodes | ||
3094 | |||
3095 | Represent the Discovery's MPSC (Multiprotocol Serial Controller) | ||
3096 | serial port. | ||
3097 | |||
3098 | Required properties: | ||
3099 | - device_type : "serial" | ||
3100 | - compatible : "marvell,mv64360-mpsc" | ||
3101 | - reg : Offset and length of the register set for this device | ||
3102 | - sdma : the phandle for the SDMA node used by this port | ||
3103 | - brg : the phandle for the BRG node used by this port | ||
3104 | - cunit : the phandle for the CUNIT node used by this port | ||
3105 | - mpscrouting : the phandle for the MPSCROUTING node used by this port | ||
3106 | - mpscintr : the phandle for the MPSCINTR node used by this port | ||
3107 | - cell-index : the hardware index of this cell in the MPSC core | ||
3108 | - max_idle : value needed for MPSC CHR3 (Maximum Frame Length) | ||
3109 | register | ||
3110 | - interrupts : <a> where a is the interrupt number for the MPSC. | ||
3111 | - interrupt-parent : the phandle for the interrupt controller | ||
3112 | that services interrupts for this device. | ||
3113 | |||
3114 | Example Discovery MPSCINTR node: | ||
3115 | mpsc@8000 { | ||
3116 | device_type = "serial"; | ||
3117 | compatible = "marvell,mv64360-mpsc"; | ||
3118 | reg = <0x8000 0x38>; | ||
3119 | virtual-reg = <0xf1008000>; | ||
3120 | sdma = <&SDMA0>; | ||
3121 | brg = <&BRG0>; | ||
3122 | cunit = <&CUNIT>; | ||
3123 | mpscrouting = <&MPSCROUTING>; | ||
3124 | mpscintr = <&MPSCINTR>; | ||
3125 | cell-index = <0>; | ||
3126 | max_idle = <40>; | ||
3127 | interrupts = <40>; | ||
3128 | interrupt-parent = <&PIC>; | ||
3129 | }; | ||
3130 | |||
3131 | |||
3132 | j) Marvell Discovery Watch Dog Timer nodes | ||
3133 | |||
3134 | Represent the Discovery's watchdog timer hardware | ||
3135 | |||
3136 | Required properties: | ||
3137 | - compatible : "marvell,mv64360-wdt" | ||
3138 | - reg : Offset and length of the register set for this device | ||
3139 | |||
3140 | Example Discovery Watch Dog Timer node: | ||
3141 | wdt@b410 { | ||
3142 | compatible = "marvell,mv64360-wdt"; | ||
3143 | reg = <0xb410 0x8>; | ||
3144 | }; | ||
3145 | |||
3146 | |||
3147 | k) Marvell Discovery I2C nodes | ||
3148 | |||
3149 | Represent the Discovery's I2C hardware | ||
3150 | |||
3151 | Required properties: | ||
3152 | - device_type : "i2c" | ||
3153 | - compatible : "marvell,mv64360-i2c" | ||
3154 | - reg : Offset and length of the register set for this device | ||
3155 | - interrupts : <a> where a is the interrupt number for the I2C. | ||
3156 | - interrupt-parent : the phandle for the interrupt controller | ||
3157 | that services interrupts for this device. | ||
3158 | |||
3159 | Example Discovery I2C node: | ||
3160 | compatible = "marvell,mv64360-i2c"; | ||
3161 | reg = <0xc000 0x20>; | ||
3162 | virtual-reg = <0xf100c000>; | ||
3163 | interrupts = <37>; | ||
3164 | interrupt-parent = <&PIC>; | ||
3165 | }; | ||
3166 | |||
3167 | |||
3168 | l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes | ||
3169 | |||
3170 | Represent the Discovery's PIC hardware | ||
3171 | |||
3172 | Required properties: | ||
3173 | - #interrupt-cells : <1> | ||
3174 | - #address-cells : <0> | ||
3175 | - compatible : "marvell,mv64360-pic" | ||
3176 | - reg : Offset and length of the register set for this device | ||
3177 | - interrupt-controller | ||
3178 | |||
3179 | Example Discovery PIC node: | ||
3180 | pic { | ||
3181 | #interrupt-cells = <1>; | ||
3182 | #address-cells = <0>; | ||
3183 | compatible = "marvell,mv64360-pic"; | ||
3184 | reg = <0x0 0x88>; | ||
3185 | interrupt-controller; | ||
3186 | }; | ||
3187 | |||
3188 | |||
3189 | m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes | ||
3190 | |||
3191 | Represent the Discovery's MPP hardware | ||
3192 | |||
3193 | Required properties: | ||
3194 | - compatible : "marvell,mv64360-mpp" | ||
3195 | - reg : Offset and length of the register set for this device | ||
3196 | |||
3197 | Example Discovery MPP node: | ||
3198 | mpp@f000 { | ||
3199 | compatible = "marvell,mv64360-mpp"; | ||
3200 | reg = <0xf000 0x10>; | ||
3201 | }; | ||
3202 | |||
3203 | |||
3204 | n) Marvell Discovery GPP (General Purpose Pins) nodes | ||
3205 | |||
3206 | Represent the Discovery's GPP hardware | ||
3207 | |||
3208 | Required properties: | ||
3209 | - compatible : "marvell,mv64360-gpp" | ||
3210 | - reg : Offset and length of the register set for this device | ||
3211 | |||
3212 | Example Discovery GPP node: | ||
3213 | gpp@f000 { | ||
3214 | compatible = "marvell,mv64360-gpp"; | ||
3215 | reg = <0xf100 0x20>; | ||
3216 | }; | ||
3217 | |||
3218 | |||
3219 | o) Marvell Discovery PCI host bridge node | ||
3220 | |||
3221 | Represents the Discovery's PCI host bridge device. The properties | ||
3222 | for this node conform to Rev 2.1 of the PCI Bus Binding to IEEE | ||
3223 | 1275-1994. A typical value for the compatible property is | ||
3224 | "marvell,mv64360-pci". | ||
3225 | |||
3226 | Example Discovery PCI host bridge node | ||
3227 | pci@80000000 { | ||
3228 | #address-cells = <3>; | ||
3229 | #size-cells = <2>; | ||
3230 | #interrupt-cells = <1>; | ||
3231 | device_type = "pci"; | ||
3232 | compatible = "marvell,mv64360-pci"; | ||
3233 | reg = <0xcf8 0x8>; | ||
3234 | ranges = <0x01000000 0x0 0x0 | ||
3235 | 0x88000000 0x0 0x01000000 | ||
3236 | 0x02000000 0x0 0x80000000 | ||
3237 | 0x80000000 0x0 0x08000000>; | ||
3238 | bus-range = <0 255>; | ||
3239 | clock-frequency = <66000000>; | ||
3240 | interrupt-parent = <&PIC>; | ||
3241 | interrupt-map-mask = <0xf800 0x0 0x0 0x7>; | ||
3242 | interrupt-map = < | ||
3243 | /* IDSEL 0x0a */ | ||
3244 | 0x5000 0 0 1 &PIC 80 | ||
3245 | 0x5000 0 0 2 &PIC 81 | ||
3246 | 0x5000 0 0 3 &PIC 91 | ||
3247 | 0x5000 0 0 4 &PIC 93 | ||
3248 | |||
3249 | /* IDSEL 0x0b */ | ||
3250 | 0x5800 0 0 1 &PIC 91 | ||
3251 | 0x5800 0 0 2 &PIC 93 | ||
3252 | 0x5800 0 0 3 &PIC 80 | ||
3253 | 0x5800 0 0 4 &PIC 81 | ||
3254 | |||
3255 | /* IDSEL 0x0c */ | ||
3256 | 0x6000 0 0 1 &PIC 91 | ||
3257 | 0x6000 0 0 2 &PIC 93 | ||
3258 | 0x6000 0 0 3 &PIC 80 | ||
3259 | 0x6000 0 0 4 &PIC 81 | ||
3260 | |||
3261 | /* IDSEL 0x0d */ | ||
3262 | 0x6800 0 0 1 &PIC 93 | ||
3263 | 0x6800 0 0 2 &PIC 80 | ||
3264 | 0x6800 0 0 3 &PIC 81 | ||
3265 | 0x6800 0 0 4 &PIC 91 | ||
3266 | >; | ||
3267 | }; | ||
3268 | |||
3269 | |||
3270 | p) Marvell Discovery CPU Error nodes | ||
3271 | |||
3272 | Represent the Discovery's CPU error handler device. | ||
3273 | |||
3274 | Required properties: | ||
3275 | - compatible : "marvell,mv64360-cpu-error" | ||
3276 | - reg : Offset and length of the register set for this device | ||
3277 | - interrupts : the interrupt number for this device | ||
3278 | - interrupt-parent : the phandle for the interrupt controller | ||
3279 | that services interrupts for this device. | ||
3280 | |||
3281 | Example Discovery CPU Error node: | ||
3282 | cpu-error@0070 { | ||
3283 | compatible = "marvell,mv64360-cpu-error"; | ||
3284 | reg = <0x70 0x10 0x128 0x28>; | ||
3285 | interrupts = <3>; | ||
3286 | interrupt-parent = <&PIC>; | ||
3287 | }; | ||
3288 | |||
3289 | |||
3290 | q) Marvell Discovery SRAM Controller nodes | ||
3291 | |||
3292 | Represent the Discovery's SRAM controller device. | ||
3293 | |||
3294 | Required properties: | ||
3295 | - compatible : "marvell,mv64360-sram-ctrl" | ||
3296 | - reg : Offset and length of the register set for this device | ||
3297 | - interrupts : the interrupt number for this device | ||
3298 | - interrupt-parent : the phandle for the interrupt controller | ||
3299 | that services interrupts for this device. | ||
3300 | |||
3301 | Example Discovery SRAM Controller node: | ||
3302 | sram-ctrl@0380 { | ||
3303 | compatible = "marvell,mv64360-sram-ctrl"; | ||
3304 | reg = <0x380 0x80>; | ||
3305 | interrupts = <13>; | ||
3306 | interrupt-parent = <&PIC>; | ||
3307 | }; | ||
3308 | |||
3309 | |||
3310 | r) Marvell Discovery PCI Error Handler nodes | ||
3311 | |||
3312 | Represent the Discovery's PCI error handler device. | ||
3313 | |||
3314 | Required properties: | ||
3315 | - compatible : "marvell,mv64360-pci-error" | ||
3316 | - reg : Offset and length of the register set for this device | ||
3317 | - interrupts : the interrupt number for this device | ||
3318 | - interrupt-parent : the phandle for the interrupt controller | ||
3319 | that services interrupts for this device. | ||
3320 | |||
3321 | Example Discovery PCI Error Handler node: | ||
3322 | pci-error@1d40 { | ||
3323 | compatible = "marvell,mv64360-pci-error"; | ||
3324 | reg = <0x1d40 0x40 0xc28 0x4>; | ||
3325 | interrupts = <12>; | ||
3326 | interrupt-parent = <&PIC>; | ||
3327 | }; | ||
3328 | |||
3329 | |||
3330 | s) Marvell Discovery Memory Controller nodes | ||
3331 | |||
3332 | Represent the Discovery's memory controller device. | ||
3333 | |||
3334 | Required properties: | ||
3335 | - compatible : "marvell,mv64360-mem-ctrl" | ||
3336 | - reg : Offset and length of the register set for this device | ||
3337 | - interrupts : the interrupt number for this device | ||
3338 | - interrupt-parent : the phandle for the interrupt controller | ||
3339 | that services interrupts for this device. | ||
3340 | |||
3341 | Example Discovery Memory Controller node: | ||
3342 | mem-ctrl@1400 { | ||
3343 | compatible = "marvell,mv64360-mem-ctrl"; | ||
3344 | reg = <0x1400 0x60>; | ||
3345 | interrupts = <17>; | ||
3346 | interrupt-parent = <&PIC>; | ||
3347 | }; | ||
3348 | |||
3349 | |||
3350 | VIII - Specifying interrupt information for devices | ||
2823 | =================================================== | 3351 | =================================================== |
2824 | 3352 | ||
2825 | The device tree represents the busses and devices of a hardware | 3353 | The device tree represents the busses and devices of a hardware |
@@ -2905,6 +3433,54 @@ encodings listed below: | |||
2905 | 2 = high to low edge sensitive type enabled | 3433 | 2 = high to low edge sensitive type enabled |
2906 | 3 = low to high edge sensitive type enabled | 3434 | 3 = low to high edge sensitive type enabled |
2907 | 3435 | ||
3436 | VIII - Specifying GPIO information for devices | ||
3437 | ============================================== | ||
3438 | |||
3439 | 1) gpios property | ||
3440 | ----------------- | ||
3441 | |||
3442 | Nodes that makes use of GPIOs should define them using `gpios' property, | ||
3443 | format of which is: <&gpio-controller1-phandle gpio1-specifier | ||
3444 | &gpio-controller2-phandle gpio2-specifier | ||
3445 | 0 /* holes are permitted, means no GPIO 3 */ | ||
3446 | &gpio-controller4-phandle gpio4-specifier | ||
3447 | ...>; | ||
3448 | |||
3449 | Note that gpio-specifier length is controller dependent. | ||
3450 | |||
3451 | gpio-specifier may encode: bank, pin position inside the bank, | ||
3452 | whether pin is open-drain and whether pin is logically inverted. | ||
3453 | |||
3454 | Example of the node using GPIOs: | ||
3455 | |||
3456 | node { | ||
3457 | gpios = <&qe_pio_e 18 0>; | ||
3458 | }; | ||
3459 | |||
3460 | In this example gpio-specifier is "18 0" and encodes GPIO pin number, | ||
3461 | and empty GPIO flags as accepted by the "qe_pio_e" gpio-controller. | ||
3462 | |||
3463 | 2) gpio-controller nodes | ||
3464 | ------------------------ | ||
3465 | |||
3466 | Every GPIO controller node must have #gpio-cells property defined, | ||
3467 | this information will be used to translate gpio-specifiers. | ||
3468 | |||
3469 | Example of two SOC GPIO banks defined as gpio-controller nodes: | ||
3470 | |||
3471 | qe_pio_a: gpio-controller@1400 { | ||
3472 | #gpio-cells = <2>; | ||
3473 | compatible = "fsl,qe-pario-bank-a", "fsl,qe-pario-bank"; | ||
3474 | reg = <0x1400 0x18>; | ||
3475 | gpio-controller; | ||
3476 | }; | ||
3477 | |||
3478 | qe_pio_e: gpio-controller@1460 { | ||
3479 | #gpio-cells = <2>; | ||
3480 | compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank"; | ||
3481 | reg = <0x1460 0x18>; | ||
3482 | gpio-controller; | ||
3483 | }; | ||
2908 | 3484 | ||
2909 | Appendix A - Sample SOC node for MPC8540 | 3485 | Appendix A - Sample SOC node for MPC8540 |
2910 | ======================================== | 3486 | ======================================== |
diff --git a/Documentation/powerpc/phyp-assisted-dump.txt b/Documentation/powerpc/phyp-assisted-dump.txt new file mode 100644 index 000000000000..c4682b982a2e --- /dev/null +++ b/Documentation/powerpc/phyp-assisted-dump.txt | |||
@@ -0,0 +1,127 @@ | |||
1 | |||
2 | Hypervisor-Assisted Dump | ||
3 | ------------------------ | ||
4 | November 2007 | ||
5 | |||
6 | The goal of hypervisor-assisted dump is to enable the dump of | ||
7 | a crashed system, and to do so from a fully-reset system, and | ||
8 | to minimize the total elapsed time until the system is back | ||
9 | in production use. | ||
10 | |||
11 | As compared to kdump or other strategies, hypervisor-assisted | ||
12 | dump offers several strong, practical advantages: | ||
13 | |||
14 | -- Unlike kdump, the system has been reset, and loaded | ||
15 | with a fresh copy of the kernel. In particular, | ||
16 | PCI and I/O devices have been reinitialized and are | ||
17 | in a clean, consistent state. | ||
18 | -- As the dump is performed, the dumped memory becomes | ||
19 | immediately available to the system for normal use. | ||
20 | -- After the dump is completed, no further reboots are | ||
21 | required; the system will be fully usable, and running | ||
22 | in it's normal, production mode on it normal kernel. | ||
23 | |||
24 | The above can only be accomplished by coordination with, | ||
25 | and assistance from the hypervisor. The procedure is | ||
26 | as follows: | ||
27 | |||
28 | -- When a system crashes, the hypervisor will save | ||
29 | the low 256MB of RAM to a previously registered | ||
30 | save region. It will also save system state, system | ||
31 | registers, and hardware PTE's. | ||
32 | |||
33 | -- After the low 256MB area has been saved, the | ||
34 | hypervisor will reset PCI and other hardware state. | ||
35 | It will *not* clear RAM. It will then launch the | ||
36 | bootloader, as normal. | ||
37 | |||
38 | -- The freshly booted kernel will notice that there | ||
39 | is a new node (ibm,dump-kernel) in the device tree, | ||
40 | indicating that there is crash data available from | ||
41 | a previous boot. It will boot into only 256MB of RAM, | ||
42 | reserving the rest of system memory. | ||
43 | |||
44 | -- Userspace tools will parse /sys/kernel/release_region | ||
45 | and read /proc/vmcore to obtain the contents of memory, | ||
46 | which holds the previous crashed kernel. The userspace | ||
47 | tools may copy this info to disk, or network, nas, san, | ||
48 | iscsi, etc. as desired. | ||
49 | |||
50 | For Example: the values in /sys/kernel/release-region | ||
51 | would look something like this (address-range pairs). | ||
52 | CPU:0x177fee000-0x10000: HPTE:0x177ffe020-0x1000: / | ||
53 | DUMP:0x177fff020-0x10000000, 0x10000000-0x16F1D370A | ||
54 | |||
55 | -- As the userspace tools complete saving a portion of | ||
56 | dump, they echo an offset and size to | ||
57 | /sys/kernel/release_region to release the reserved | ||
58 | memory back to general use. | ||
59 | |||
60 | An example of this is: | ||
61 | "echo 0x40000000 0x10000000 > /sys/kernel/release_region" | ||
62 | which will release 256MB at the 1GB boundary. | ||
63 | |||
64 | Please note that the hypervisor-assisted dump feature | ||
65 | is only available on Power6-based systems with recent | ||
66 | firmware versions. | ||
67 | |||
68 | Implementation details: | ||
69 | ---------------------- | ||
70 | |||
71 | During boot, a check is made to see if firmware supports | ||
72 | this feature on this particular machine. If it does, then | ||
73 | we check to see if a active dump is waiting for us. If yes | ||
74 | then everything but 256 MB of RAM is reserved during early | ||
75 | boot. This area is released once we collect a dump from user | ||
76 | land scripts that are run. If there is dump data, then | ||
77 | the /sys/kernel/release_region file is created, and | ||
78 | the reserved memory is held. | ||
79 | |||
80 | If there is no waiting dump data, then only the highest | ||
81 | 256MB of the ram is reserved as a scratch area. This area | ||
82 | is *not* released: this region will be kept permanently | ||
83 | reserved, so that it can act as a receptacle for a copy | ||
84 | of the low 256MB in the case a crash does occur. See, | ||
85 | however, "open issues" below, as to whether | ||
86 | such a reserved region is really needed. | ||
87 | |||
88 | Currently the dump will be copied from /proc/vmcore to a | ||
89 | a new file upon user intervention. The starting address | ||
90 | to be read and the range for each data point in provided | ||
91 | in /sys/kernel/release_region. | ||
92 | |||
93 | The tools to examine the dump will be same as the ones | ||
94 | used for kdump. | ||
95 | |||
96 | General notes: | ||
97 | -------------- | ||
98 | Security: please note that there are potential security issues | ||
99 | with any sort of dump mechanism. In particular, plaintext | ||
100 | (unencrypted) data, and possibly passwords, may be present in | ||
101 | the dump data. Userspace tools must take adequate precautions to | ||
102 | preserve security. | ||
103 | |||
104 | Open issues/ToDo: | ||
105 | ------------ | ||
106 | o The various code paths that tell the hypervisor that a crash | ||
107 | occurred, vs. it simply being a normal reboot, should be | ||
108 | reviewed, and possibly clarified/fixed. | ||
109 | |||
110 | o Instead of using /sys/kernel, should there be a /sys/dump | ||
111 | instead? There is a dump_subsys being created by the s390 code, | ||
112 | perhaps the pseries code should use a similar layout as well. | ||
113 | |||
114 | o Is reserving a 256MB region really required? The goal of | ||
115 | reserving a 256MB scratch area is to make sure that no | ||
116 | important crash data is clobbered when the hypervisor | ||
117 | save low mem to the scratch area. But, if one could assure | ||
118 | that nothing important is located in some 256MB area, then | ||
119 | it would not need to be reserved. Something that can be | ||
120 | improved in subsequent versions. | ||
121 | |||
122 | o Still working the kdump team to integrate this with kdump, | ||
123 | some work remains but this would not affect the current | ||
124 | patches. | ||
125 | |||
126 | o Still need to write a shell script, to copy the dump away. | ||
127 | Currently I am parsing it manually. | ||
diff --git a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c new file mode 100644 index 000000000000..f8e8e95e81fd --- /dev/null +++ b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...) | ||
3 | * | ||
4 | * Tests if the control register is updated correctly | ||
5 | * at context switches | ||
6 | * | ||
7 | * Warning: this test will cause a very high load for a few seconds | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <stdio.h> | ||
12 | #include <stdlib.h> | ||
13 | #include <unistd.h> | ||
14 | #include <signal.h> | ||
15 | #include <inttypes.h> | ||
16 | #include <wait.h> | ||
17 | |||
18 | |||
19 | #include <sys/prctl.h> | ||
20 | #include <linux/prctl.h> | ||
21 | |||
22 | /* Get/set the process' ability to use the timestamp counter instruction */ | ||
23 | #ifndef PR_GET_TSC | ||
24 | #define PR_GET_TSC 25 | ||
25 | #define PR_SET_TSC 26 | ||
26 | # define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ | ||
27 | # define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ | ||
28 | #endif | ||
29 | |||
30 | uint64_t rdtsc() { | ||
31 | uint32_t lo, hi; | ||
32 | /* We cannot use "=A", since this would use %rax on x86_64 */ | ||
33 | __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); | ||
34 | return (uint64_t)hi << 32 | lo; | ||
35 | } | ||
36 | |||
37 | void sigsegv_expect(int sig) | ||
38 | { | ||
39 | /* */ | ||
40 | } | ||
41 | |||
42 | void segvtask(void) | ||
43 | { | ||
44 | if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0) | ||
45 | { | ||
46 | perror("prctl"); | ||
47 | exit(0); | ||
48 | } | ||
49 | signal(SIGSEGV, sigsegv_expect); | ||
50 | alarm(10); | ||
51 | rdtsc(); | ||
52 | fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n"); | ||
53 | exit(0); | ||
54 | } | ||
55 | |||
56 | |||
57 | void sigsegv_fail(int sig) | ||
58 | { | ||
59 | fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n"); | ||
60 | exit(0); | ||
61 | } | ||
62 | |||
63 | void rdtsctask(void) | ||
64 | { | ||
65 | if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0) | ||
66 | { | ||
67 | perror("prctl"); | ||
68 | exit(0); | ||
69 | } | ||
70 | signal(SIGSEGV, sigsegv_fail); | ||
71 | alarm(10); | ||
72 | for(;;) rdtsc(); | ||
73 | } | ||
74 | |||
75 | |||
76 | int main(int argc, char **argv) | ||
77 | { | ||
78 | int n_tasks = 100, i; | ||
79 | |||
80 | fprintf(stderr, "[No further output means we're allright]\n"); | ||
81 | |||
82 | for (i=0; i<n_tasks; i++) | ||
83 | if (fork() == 0) | ||
84 | { | ||
85 | if (i & 1) | ||
86 | segvtask(); | ||
87 | else | ||
88 | rdtsctask(); | ||
89 | } | ||
90 | |||
91 | for (i=0; i<n_tasks; i++) | ||
92 | wait(NULL); | ||
93 | |||
94 | exit(0); | ||
95 | } | ||
96 | |||
diff --git a/Documentation/prctl/disable-tsc-on-off-stress-test.c b/Documentation/prctl/disable-tsc-on-off-stress-test.c new file mode 100644 index 000000000000..1fcd91445375 --- /dev/null +++ b/Documentation/prctl/disable-tsc-on-off-stress-test.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...) | ||
3 | * | ||
4 | * Tests if the control register is updated correctly | ||
5 | * when set with prctl() | ||
6 | * | ||
7 | * Warning: this test will cause a very high load for a few seconds | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <stdio.h> | ||
12 | #include <stdlib.h> | ||
13 | #include <unistd.h> | ||
14 | #include <signal.h> | ||
15 | #include <inttypes.h> | ||
16 | #include <wait.h> | ||
17 | |||
18 | |||
19 | #include <sys/prctl.h> | ||
20 | #include <linux/prctl.h> | ||
21 | |||
22 | /* Get/set the process' ability to use the timestamp counter instruction */ | ||
23 | #ifndef PR_GET_TSC | ||
24 | #define PR_GET_TSC 25 | ||
25 | #define PR_SET_TSC 26 | ||
26 | # define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ | ||
27 | # define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ | ||
28 | #endif | ||
29 | |||
30 | /* snippet from wikipedia :-) */ | ||
31 | |||
32 | uint64_t rdtsc() { | ||
33 | uint32_t lo, hi; | ||
34 | /* We cannot use "=A", since this would use %rax on x86_64 */ | ||
35 | __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); | ||
36 | return (uint64_t)hi << 32 | lo; | ||
37 | } | ||
38 | |||
39 | int should_segv = 0; | ||
40 | |||
41 | void sigsegv_cb(int sig) | ||
42 | { | ||
43 | if (!should_segv) | ||
44 | { | ||
45 | fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n"); | ||
46 | exit(0); | ||
47 | } | ||
48 | if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0) | ||
49 | { | ||
50 | perror("prctl"); | ||
51 | exit(0); | ||
52 | } | ||
53 | should_segv = 0; | ||
54 | |||
55 | rdtsc(); | ||
56 | } | ||
57 | |||
58 | void task(void) | ||
59 | { | ||
60 | signal(SIGSEGV, sigsegv_cb); | ||
61 | alarm(10); | ||
62 | for(;;) | ||
63 | { | ||
64 | rdtsc(); | ||
65 | if (should_segv) | ||
66 | { | ||
67 | fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n"); | ||
68 | exit(0); | ||
69 | } | ||
70 | if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0) | ||
71 | { | ||
72 | perror("prctl"); | ||
73 | exit(0); | ||
74 | } | ||
75 | should_segv = 1; | ||
76 | } | ||
77 | } | ||
78 | |||
79 | |||
80 | int main(int argc, char **argv) | ||
81 | { | ||
82 | int n_tasks = 100, i; | ||
83 | |||
84 | fprintf(stderr, "[No further output means we're allright]\n"); | ||
85 | |||
86 | for (i=0; i<n_tasks; i++) | ||
87 | if (fork() == 0) | ||
88 | task(); | ||
89 | |||
90 | for (i=0; i<n_tasks; i++) | ||
91 | wait(NULL); | ||
92 | |||
93 | exit(0); | ||
94 | } | ||
95 | |||
diff --git a/Documentation/prctl/disable-tsc-test.c b/Documentation/prctl/disable-tsc-test.c new file mode 100644 index 000000000000..843c81eac235 --- /dev/null +++ b/Documentation/prctl/disable-tsc-test.c | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...) | ||
3 | * | ||
4 | * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC | ||
5 | */ | ||
6 | |||
7 | #include <stdio.h> | ||
8 | #include <stdlib.h> | ||
9 | #include <unistd.h> | ||
10 | #include <signal.h> | ||
11 | #include <inttypes.h> | ||
12 | |||
13 | |||
14 | #include <sys/prctl.h> | ||
15 | #include <linux/prctl.h> | ||
16 | |||
17 | /* Get/set the process' ability to use the timestamp counter instruction */ | ||
18 | #ifndef PR_GET_TSC | ||
19 | #define PR_GET_TSC 25 | ||
20 | #define PR_SET_TSC 26 | ||
21 | # define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ | ||
22 | # define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ | ||
23 | #endif | ||
24 | |||
25 | const char *tsc_names[] = | ||
26 | { | ||
27 | [0] = "[not set]", | ||
28 | [PR_TSC_ENABLE] = "PR_TSC_ENABLE", | ||
29 | [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV", | ||
30 | }; | ||
31 | |||
32 | uint64_t rdtsc() { | ||
33 | uint32_t lo, hi; | ||
34 | /* We cannot use "=A", since this would use %rax on x86_64 */ | ||
35 | __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); | ||
36 | return (uint64_t)hi << 32 | lo; | ||
37 | } | ||
38 | |||
39 | void sigsegv_cb(int sig) | ||
40 | { | ||
41 | int tsc_val = 0; | ||
42 | |||
43 | printf("[ SIG_SEGV ]\n"); | ||
44 | printf("prctl(PR_GET_TSC, &tsc_val); "); | ||
45 | fflush(stdout); | ||
46 | |||
47 | if ( prctl(PR_GET_TSC, &tsc_val) == -1) | ||
48 | perror("prctl"); | ||
49 | |||
50 | printf("tsc_val == %s\n", tsc_names[tsc_val]); | ||
51 | printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n"); | ||
52 | fflush(stdout); | ||
53 | if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1) | ||
54 | perror("prctl"); | ||
55 | |||
56 | printf("rdtsc() == "); | ||
57 | } | ||
58 | |||
59 | int main(int argc, char **argv) | ||
60 | { | ||
61 | int tsc_val = 0; | ||
62 | |||
63 | signal(SIGSEGV, sigsegv_cb); | ||
64 | |||
65 | printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); | ||
66 | printf("prctl(PR_GET_TSC, &tsc_val); "); | ||
67 | fflush(stdout); | ||
68 | |||
69 | if ( prctl(PR_GET_TSC, &tsc_val) == -1) | ||
70 | perror("prctl"); | ||
71 | |||
72 | printf("tsc_val == %s\n", tsc_names[tsc_val]); | ||
73 | printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); | ||
74 | printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n"); | ||
75 | fflush(stdout); | ||
76 | |||
77 | if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1) | ||
78 | perror("prctl"); | ||
79 | |||
80 | printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); | ||
81 | printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n"); | ||
82 | fflush(stdout); | ||
83 | |||
84 | if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1) | ||
85 | perror("prctl"); | ||
86 | |||
87 | printf("rdtsc() == "); | ||
88 | fflush(stdout); | ||
89 | printf("%llu\n", (unsigned long long)rdtsc()); | ||
90 | fflush(stdout); | ||
91 | |||
92 | exit(EXIT_SUCCESS); | ||
93 | } | ||
94 | |||
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 1c6332f4543c..14f901f639ee 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt | |||
@@ -1,59 +1,177 @@ | |||
1 | Real-Time group scheduling | ||
2 | -------------------------- | ||
1 | 3 | ||
4 | CONTENTS | ||
5 | ======== | ||
2 | 6 | ||
3 | Real-Time group scheduling. | 7 | 1. Overview |
8 | 1.1 The problem | ||
9 | 1.2 The solution | ||
10 | 2. The interface | ||
11 | 2.1 System-wide settings | ||
12 | 2.2 Default behaviour | ||
13 | 2.3 Basis for grouping tasks | ||
14 | 3. Future plans | ||
4 | 15 | ||
5 | The problem space: | ||
6 | 16 | ||
7 | In order to schedule multiple groups of realtime tasks each group must | 17 | 1. Overview |
8 | be assigned a fixed portion of the CPU time available. Without a minimum | 18 | =========== |
9 | guarantee a realtime group can obviously fall short. A fuzzy upper limit | ||
10 | is of no use since it cannot be relied upon. Which leaves us with just | ||
11 | the single fixed portion. | ||
12 | 19 | ||
13 | CPU time is divided by means of specifying how much time can be spent | ||
14 | running in a given period. Say a frame fixed realtime renderer must | ||
15 | deliver 25 frames a second, which yields a period of 0.04s. Now say | ||
16 | it will also have to play some music and respond to input, leaving it | ||
17 | with around 80% for the graphics. We can then give this group a runtime | ||
18 | of 0.8 * 0.04s = 0.032s. | ||
19 | 20 | ||
20 | This way the graphics group will have a 0.04s period with a 0.032s runtime | 21 | 1.1 The problem |
21 | limit. | 22 | --------------- |
22 | 23 | ||
23 | Now if the audio thread needs to refill the DMA buffer every 0.005s, but | 24 | Realtime scheduling is all about determinism, a group has to be able to rely on |
24 | needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s | 25 | the amount of bandwidth (eg. CPU time) being constant. In order to schedule |
25 | = 0.00015s. | 26 | multiple groups of realtime tasks, each group must be assigned a fixed portion |
27 | of the CPU time available. Without a minimum guarantee a realtime group can | ||
28 | obviously fall short. A fuzzy upper limit is of no use since it cannot be | ||
29 | relied upon. Which leaves us with just the single fixed portion. | ||
26 | 30 | ||
31 | 1.2 The solution | ||
32 | ---------------- | ||
27 | 33 | ||
28 | The Interface: | 34 | CPU time is divided by means of specifying how much time can be spent running |
35 | in a given period. We allocate this "run time" for each realtime group which | ||
36 | the other realtime groups will not be permitted to use. | ||
29 | 37 | ||
30 | system wide: | 38 | Any time not allocated to a realtime group will be used to run normal priority |
39 | tasks (SCHED_OTHER). Any allocated run time not used will also be picked up by | ||
40 | SCHED_OTHER. | ||
31 | 41 | ||
32 | /proc/sys/kernel/sched_rt_period_ms | 42 | Let's consider an example: a frame fixed realtime renderer must deliver 25 |
33 | /proc/sys/kernel/sched_rt_runtime_us | 43 | frames a second, which yields a period of 0.04s per frame. Now say it will also |
44 | have to play some music and respond to input, leaving it with around 80% CPU | ||
45 | time dedicated for the graphics. We can then give this group a run time of 0.8 | ||
46 | * 0.04s = 0.032s. | ||
34 | 47 | ||
35 | CONFIG_FAIR_USER_SCHED | 48 | This way the graphics group will have a 0.04s period with a 0.032s run time |
49 | limit. Now if the audio thread needs to refill the DMA buffer every 0.005s, but | ||
50 | needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s = | ||
51 | 0.00015s. So this group can be scheduled with a period of 0.005s and a run time | ||
52 | of 0.00015s. | ||
36 | 53 | ||
37 | /sys/kernel/uids/<uid>/cpu_rt_runtime_us | 54 | The remaining CPU time will be used for user input and other tass. Because |
55 | realtime tasks have explicitly allocated the CPU time they need to perform | ||
56 | their tasks, buffer underruns in the graphocs or audio can be eliminated. | ||
38 | 57 | ||
39 | or | 58 | NOTE: the above example is not fully implemented as of yet (2.6.25). We still |
59 | lack an EDF scheduler to make non-uniform periods usable. | ||
40 | 60 | ||
41 | CONFIG_FAIR_CGROUP_SCHED | ||
42 | 61 | ||
43 | /cgroup/<cgroup>/cpu.rt_runtime_us | 62 | 2. The Interface |
63 | ================ | ||
44 | 64 | ||
45 | [ time is specified in us because the interface is s32; this gives an | ||
46 | operating range of ~35m to 1us ] | ||
47 | 65 | ||
48 | The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ]. | 66 | 2.1 System wide settings |
67 | ------------------------ | ||
49 | 68 | ||
50 | A runtime of -1 specifies runtime == period, ie. no limit. | 69 | The system wide settings are configured under the /proc virtual file system: |
51 | 70 | ||
52 | New groups get the period from /proc/sys/kernel/sched_rt_period_us and | 71 | /proc/sys/kernel/sched_rt_period_us: |
53 | a runtime of 0. | 72 | The scheduling period that is equivalent to 100% CPU bandwidth |
54 | 73 | ||
55 | Settings are constrained to: | 74 | /proc/sys/kernel/sched_rt_runtime_us: |
75 | A global limit on how much time realtime scheduling may use. Even without | ||
76 | CONFIG_RT_GROUP_SCHED enabled, this will limit time reserved to realtime | ||
77 | processes. With CONFIG_RT_GROUP_SCHED it signifies the total bandwidth | ||
78 | available to all realtime groups. | ||
79 | |||
80 | * Time is specified in us because the interface is s32. This gives an | ||
81 | operating range from 1us to about 35 minutes. | ||
82 | * sched_rt_period_us takes values from 1 to INT_MAX. | ||
83 | * sched_rt_runtime_us takes values from -1 to (INT_MAX - 1). | ||
84 | * A run time of -1 specifies runtime == period, ie. no limit. | ||
85 | |||
86 | |||
87 | 2.2 Default behaviour | ||
88 | --------------------- | ||
89 | |||
90 | The default values for sched_rt_period_us (1000000 or 1s) and | ||
91 | sched_rt_runtime_us (950000 or 0.95s). This gives 0.05s to be used by | ||
92 | SCHED_OTHER (non-RT tasks). These defaults were chosen so that a run-away | ||
93 | realtime tasks will not lock up the machine but leave a little time to recover | ||
94 | it. By setting runtime to -1 you'd get the old behaviour back. | ||
95 | |||
96 | By default all bandwidth is assigned to the root group and new groups get the | ||
97 | period from /proc/sys/kernel/sched_rt_period_us and a run time of 0. If you | ||
98 | want to assign bandwidth to another group, reduce the root group's bandwidth | ||
99 | and assign some or all of the difference to another group. | ||
100 | |||
101 | Realtime group scheduling means you have to assign a portion of total CPU | ||
102 | bandwidth to the group before it will accept realtime tasks. Therefore you will | ||
103 | not be able to run realtime tasks as any user other than root until you have | ||
104 | done that, even if the user has the rights to run processes with realtime | ||
105 | priority! | ||
106 | |||
107 | |||
108 | 2.3 Basis for grouping tasks | ||
109 | ---------------------------- | ||
110 | |||
111 | There are two compile-time settings for allocating CPU bandwidth. These are | ||
112 | configured using the "Basis for grouping tasks" multiple choice menu under | ||
113 | General setup > Group CPU Scheduler: | ||
114 | |||
115 | a. CONFIG_USER_SCHED (aka "Basis for grouping tasks" = "user id") | ||
116 | |||
117 | This lets you use the virtual files under | ||
118 | "/sys/kernel/uids/<uid>/cpu_rt_runtime_us" to control he CPU time reserved for | ||
119 | each user . | ||
120 | |||
121 | The other option is: | ||
122 | |||
123 | .o CONFIG_CGROUP_SCHED (aka "Basis for grouping tasks" = "Control groups") | ||
124 | |||
125 | This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_us" | ||
126 | to control the CPU time reserved for each control group instead. | ||
127 | |||
128 | For more information on working with control groups, you should read | ||
129 | Documentation/cgroups.txt as well. | ||
130 | |||
131 | Group settings are checked against the following limits in order to keep the configuration | ||
132 | schedulable: | ||
56 | 133 | ||
57 | \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period | 134 | \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period |
58 | 135 | ||
59 | in order to keep the configuration schedulable. | 136 | For now, this can be simplified to just the following (but see Future plans): |
137 | |||
138 | \Sum_{i} runtime_{i} <= global_runtime | ||
139 | |||
140 | |||
141 | 3. Future plans | ||
142 | =============== | ||
143 | |||
144 | There is work in progress to make the scheduling period for each group | ||
145 | ("/sys/kernel/uids/<uid>/cpu_rt_period_us" or | ||
146 | "/cgroup/<cgroup>/cpu.rt_period_us" respectively) configurable as well. | ||
147 | |||
148 | The constraint on the period is that a subgroup must have a smaller or | ||
149 | equal period to its parent. But realistically its not very useful _yet_ | ||
150 | as its prone to starvation without deadline scheduling. | ||
151 | |||
152 | Consider two sibling groups A and B; both have 50% bandwidth, but A's | ||
153 | period is twice the length of B's. | ||
154 | |||
155 | * group A: period=100000us, runtime=10000us | ||
156 | - this runs for 0.01s once every 0.1s | ||
157 | |||
158 | * group B: period= 50000us, runtime=10000us | ||
159 | - this runs for 0.01s twice every 0.1s (or once every 0.05 sec). | ||
160 | |||
161 | This means that currently a while (1) loop in A will run for the full period of | ||
162 | B and can starve B's tasks (assuming they are of lower priority) for a whole | ||
163 | period. | ||
164 | |||
165 | The next project will be SCHED_EDF (Earliest Deadline First scheduling) to bring | ||
166 | full deadline scheduling to the linux kernel. Deadline scheduling the above | ||
167 | groups and treating end of the period as a deadline will ensure that they both | ||
168 | get their allocated time. | ||
169 | |||
170 | Implementing SCHED_EDF might take a while to complete. Priority Inheritance is | ||
171 | the biggest challenge as the current linux PI infrastructure is geared towards | ||
172 | the limited static priority levels 0-139. With deadline scheduling you need to | ||
173 | do deadline inheritance (since priority is inversely proportional to the | ||
174 | deadline delta (deadline - now). | ||
175 | |||
176 | This means the whole PI machinery will have to be reworked - and that is one of | ||
177 | the most complex pieces of code we have. | ||