diff options
Diffstat (limited to 'Documentation')
64 files changed, 3000 insertions, 505 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index c3014df066c4..33f55917f23f 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX | |||
@@ -14,6 +14,7 @@ Following translations are available on the WWW: | |||
14 | - this file. | 14 | - this file. |
15 | ABI/ | 15 | ABI/ |
16 | - info on kernel <-> userspace ABI and relative interface stability. | 16 | - info on kernel <-> userspace ABI and relative interface stability. |
17 | |||
17 | BUG-HUNTING | 18 | BUG-HUNTING |
18 | - brute force method of doing binary search of patches to find bug. | 19 | - brute force method of doing binary search of patches to find bug. |
19 | Changes | 20 | Changes |
@@ -66,6 +67,8 @@ VGA-softcursor.txt | |||
66 | - how to change your VGA cursor from a blinking underscore. | 67 | - how to change your VGA cursor from a blinking underscore. |
67 | accounting/ | 68 | accounting/ |
68 | - documentation on accounting and taskstats. | 69 | - documentation on accounting and taskstats. |
70 | acpi/ | ||
71 | - info on ACPI-specific hooks in the kernel. | ||
69 | aoe/ | 72 | aoe/ |
70 | - description of AoE (ATA over Ethernet) along with config examples. | 73 | - description of AoE (ATA over Ethernet) along with config examples. |
71 | applying-patches.txt | 74 | applying-patches.txt |
@@ -154,7 +157,7 @@ firmware_class/ | |||
154 | - request_firmware() hotplug interface info. | 157 | - request_firmware() hotplug interface info. |
155 | floppy.txt | 158 | floppy.txt |
156 | - notes and driver options for the floppy disk driver. | 159 | - notes and driver options for the floppy disk driver. |
157 | fujitsu/ | 160 | frv/ |
158 | - Fujitsu FR-V Linux documentation. | 161 | - Fujitsu FR-V Linux documentation. |
159 | gpio.txt | 162 | gpio.txt |
160 | - overview of GPIO (General Purpose Input/Output) access conventions. | 163 | - overview of GPIO (General Purpose Input/Output) access conventions. |
@@ -364,8 +367,6 @@ sharedsubtree.txt | |||
364 | - a description of shared subtrees for namespaces. | 367 | - a description of shared subtrees for namespaces. |
365 | smart-config.txt | 368 | smart-config.txt |
366 | - description of the Smart Config makefile feature. | 369 | - description of the Smart Config makefile feature. |
367 | smp.txt | ||
368 | - a few notes on symmetric multi-processing. | ||
369 | sony-laptop.txt | 370 | sony-laptop.txt |
370 | - Sony Notebook Control Driver (SNC) Readme. | 371 | - Sony Notebook Control Driver (SNC) Readme. |
371 | sonypi.txt | 372 | sonypi.txt |
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb index 9734577d1711..11a3c1682cec 100644 --- a/Documentation/ABI/testing/sysfs-bus-usb +++ b/Documentation/ABI/testing/sysfs-bus-usb | |||
@@ -52,3 +52,36 @@ Description: | |||
52 | facility is inherently dangerous, it is disabled by default | 52 | facility is inherently dangerous, it is disabled by default |
53 | for all devices except hubs. For more information, see | 53 | for all devices except hubs. For more information, see |
54 | Documentation/usb/persist.txt. | 54 | Documentation/usb/persist.txt. |
55 | |||
56 | What: /sys/bus/usb/device/.../power/connected_duration | ||
57 | Date: January 2008 | ||
58 | KernelVersion: 2.6.25 | ||
59 | Contact: Sarah Sharp <sarah.a.sharp@intel.com> | ||
60 | Description: | ||
61 | If CONFIG_PM and CONFIG_USB_SUSPEND are enabled, then this file | ||
62 | is present. When read, it returns the total time (in msec) | ||
63 | that the USB device has been connected to the machine. This | ||
64 | file is read-only. | ||
65 | Users: | ||
66 | PowerTOP <power@bughost.org> | ||
67 | http://www.lesswatts.org/projects/powertop/ | ||
68 | |||
69 | What: /sys/bus/usb/device/.../power/active_duration | ||
70 | Date: January 2008 | ||
71 | KernelVersion: 2.6.25 | ||
72 | Contact: Sarah Sharp <sarah.a.sharp@intel.com> | ||
73 | Description: | ||
74 | If CONFIG_PM and CONFIG_USB_SUSPEND are enabled, then this file | ||
75 | is present. When read, it returns the total time (in msec) | ||
76 | that the USB device has been active, i.e. not in a suspended | ||
77 | state. This file is read-only. | ||
78 | |||
79 | Tools can use this file and the connected_duration file to | ||
80 | compute the percentage of time that a device has been active. | ||
81 | For example, | ||
82 | echo $((100 * `cat active_duration` / `cat connected_duration`)) | ||
83 | will give an integer percentage. Note that this does not | ||
84 | account for counter wrap. | ||
85 | Users: | ||
86 | PowerTOP <power@bughost.org> | ||
87 | http://www.lesswatts.org/projects/powertop/ | ||
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi new file mode 100644 index 000000000000..9470ed9afcc0 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-acpi | |||
@@ -0,0 +1,99 @@ | |||
1 | What: /sys/firmware/acpi/interrupts/ | ||
2 | Date: February 2008 | ||
3 | Contact: Len Brown <lenb@kernel.org> | ||
4 | Description: | ||
5 | All ACPI interrupts are handled via a single IRQ, | ||
6 | the System Control Interrupt (SCI), which appears | ||
7 | as "acpi" in /proc/interrupts. | ||
8 | |||
9 | However, one of the main functions of ACPI is to make | ||
10 | the platform understand random hardware without | ||
11 | special driver support. So while the SCI handles a few | ||
12 | well known (fixed feature) interrupts sources, such | ||
13 | as the power button, it can also handle a variable | ||
14 | number of a "General Purpose Events" (GPE). | ||
15 | |||
16 | A GPE vectors to a specified handler in AML, which | ||
17 | can do a anything the BIOS writer wants from | ||
18 | OS context. GPE 0x12, for example, would vector | ||
19 | to a level or edge handler called _L12 or _E12. | ||
20 | The handler may do its business and return. | ||
21 | Or the handler may send send a Notify event | ||
22 | to a Linux device driver registered on an ACPI device, | ||
23 | such as a battery, or a processor. | ||
24 | |||
25 | To figure out where all the SCI's are coming from, | ||
26 | /sys/firmware/acpi/interrupts contains a file listing | ||
27 | every possible source, and the count of how many | ||
28 | times it has triggered. | ||
29 | |||
30 | $ cd /sys/firmware/acpi/interrupts | ||
31 | $ grep . * | ||
32 | error:0 | ||
33 | ff_gbl_lock:0 | ||
34 | ff_pmtimer:0 | ||
35 | ff_pwr_btn:0 | ||
36 | ff_rt_clk:0 | ||
37 | ff_slp_btn:0 | ||
38 | gpe00:0 | ||
39 | gpe01:0 | ||
40 | gpe02:0 | ||
41 | gpe03:0 | ||
42 | gpe04:0 | ||
43 | gpe05:0 | ||
44 | gpe06:0 | ||
45 | gpe07:0 | ||
46 | gpe08:0 | ||
47 | gpe09:174 | ||
48 | gpe0A:0 | ||
49 | gpe0B:0 | ||
50 | gpe0C:0 | ||
51 | gpe0D:0 | ||
52 | gpe0E:0 | ||
53 | gpe0F:0 | ||
54 | gpe10:0 | ||
55 | gpe11:60 | ||
56 | gpe12:0 | ||
57 | gpe13:0 | ||
58 | gpe14:0 | ||
59 | gpe15:0 | ||
60 | gpe16:0 | ||
61 | gpe17:0 | ||
62 | gpe18:0 | ||
63 | gpe19:7 | ||
64 | gpe1A:0 | ||
65 | gpe1B:0 | ||
66 | gpe1C:0 | ||
67 | gpe1D:0 | ||
68 | gpe1E:0 | ||
69 | gpe1F:0 | ||
70 | gpe_all:241 | ||
71 | sci:241 | ||
72 | |||
73 | sci - The total number of times the ACPI SCI | ||
74 | has claimed an interrupt. | ||
75 | |||
76 | gpe_all - count of SCI caused by GPEs. | ||
77 | |||
78 | gpeXX - count for individual GPE source | ||
79 | |||
80 | ff_gbl_lock - Global Lock | ||
81 | |||
82 | ff_pmtimer - PM Timer | ||
83 | |||
84 | ff_pwr_btn - Power Button | ||
85 | |||
86 | ff_rt_clk - Real Time Clock | ||
87 | |||
88 | ff_slp_btn - Sleep Button | ||
89 | |||
90 | error - an interrupt that can't be accounted for above. | ||
91 | |||
92 | Root has permission to clear any of these counters. Eg. | ||
93 | # echo 0 > gpe11 | ||
94 | |||
95 | All counters can be cleared by clearing the total "sci": | ||
96 | # echo 0 > sci | ||
97 | |||
98 | None of these counters has an effect on the function | ||
99 | of the system, they are simply statistics. | ||
diff --git a/Documentation/BUG-HUNTING b/Documentation/BUG-HUNTING index 35f5bd243336..65022a87bf17 100644 --- a/Documentation/BUG-HUNTING +++ b/Documentation/BUG-HUNTING | |||
@@ -53,7 +53,7 @@ Finding it the old way | |||
53 | 53 | ||
54 | [Sat Mar 2 10:32:33 PST 1996 KERNEL_BUG-HOWTO lm@sgi.com (Larry McVoy)] | 54 | [Sat Mar 2 10:32:33 PST 1996 KERNEL_BUG-HOWTO lm@sgi.com (Larry McVoy)] |
55 | 55 | ||
56 | This is how to track down a bug if you know nothing about kernel hacking. | 56 | This is how to track down a bug if you know nothing about kernel hacking. |
57 | It's a brute force approach but it works pretty well. | 57 | It's a brute force approach but it works pretty well. |
58 | 58 | ||
59 | You need: | 59 | You need: |
@@ -66,12 +66,12 @@ You will then do: | |||
66 | 66 | ||
67 | . Rebuild a revision that you believe works, install, and verify that. | 67 | . Rebuild a revision that you believe works, install, and verify that. |
68 | . Do a binary search over the kernels to figure out which one | 68 | . Do a binary search over the kernels to figure out which one |
69 | introduced the bug. I.e., suppose 1.3.28 didn't have the bug, but | 69 | introduced the bug. I.e., suppose 1.3.28 didn't have the bug, but |
70 | you know that 1.3.69 does. Pick a kernel in the middle and build | 70 | you know that 1.3.69 does. Pick a kernel in the middle and build |
71 | that, like 1.3.50. Build & test; if it works, pick the mid point | 71 | that, like 1.3.50. Build & test; if it works, pick the mid point |
72 | between .50 and .69, else the mid point between .28 and .50. | 72 | between .50 and .69, else the mid point between .28 and .50. |
73 | . You'll narrow it down to the kernel that introduced the bug. You | 73 | . You'll narrow it down to the kernel that introduced the bug. You |
74 | can probably do better than this but it gets tricky. | 74 | can probably do better than this but it gets tricky. |
75 | 75 | ||
76 | . Narrow it down to a subdirectory | 76 | . Narrow it down to a subdirectory |
77 | 77 | ||
@@ -81,27 +81,27 @@ You will then do: | |||
81 | directories: | 81 | directories: |
82 | 82 | ||
83 | Copy the non-working directory next to the working directory | 83 | Copy the non-working directory next to the working directory |
84 | as "dir.63". | 84 | as "dir.63". |
85 | One directory at time, try moving the working directory to | 85 | One directory at time, try moving the working directory to |
86 | "dir.62" and mv dir.63 dir"time, try | 86 | "dir.62" and mv dir.63 dir"time, try |
87 | 87 | ||
88 | mv dir dir.62 | 88 | mv dir dir.62 |
89 | mv dir.63 dir | 89 | mv dir.63 dir |
90 | find dir -name '*.[oa]' -print | xargs rm -f | 90 | find dir -name '*.[oa]' -print | xargs rm -f |
91 | 91 | ||
92 | And then rebuild and retest. Assuming that all related | 92 | And then rebuild and retest. Assuming that all related |
93 | changes were contained in the sub directory, this should | 93 | changes were contained in the sub directory, this should |
94 | isolate the change to a directory. | 94 | isolate the change to a directory. |
95 | 95 | ||
96 | Problems: changes in header files may have occurred; I've | 96 | Problems: changes in header files may have occurred; I've |
97 | found in my case that they were self explanatory - you may | 97 | found in my case that they were self explanatory - you may |
98 | or may not want to give up when that happens. | 98 | or may not want to give up when that happens. |
99 | 99 | ||
100 | . Narrow it down to a file | 100 | . Narrow it down to a file |
101 | 101 | ||
102 | - You can apply the same technique to each file in the directory, | 102 | - You can apply the same technique to each file in the directory, |
103 | hoping that the changes in that file are self contained. | 103 | hoping that the changes in that file are self contained. |
104 | 104 | ||
105 | . Narrow it down to a routine | 105 | . Narrow it down to a routine |
106 | 106 | ||
107 | - You can take the old file and the new file and manually create | 107 | - You can take the old file and the new file and manually create |
@@ -130,7 +130,7 @@ You will then do: | |||
130 | that makes the difference. | 130 | that makes the difference. |
131 | 131 | ||
132 | Finally, you take all the info that you have, kernel revisions, bug | 132 | Finally, you take all the info that you have, kernel revisions, bug |
133 | description, the extent to which you have narrowed it down, and pass | 133 | description, the extent to which you have narrowed it down, and pass |
134 | that off to whomever you believe is the maintainer of that section. | 134 | that off to whomever you believe is the maintainer of that section. |
135 | A post to linux.dev.kernel isn't such a bad idea if you've done some | 135 | A post to linux.dev.kernel isn't such a bad idea if you've done some |
136 | work to narrow it down. | 136 | work to narrow it down. |
@@ -214,6 +214,23 @@ And recompile the kernel with CONFIG_DEBUG_INFO enabled: | |||
214 | gdb vmlinux | 214 | gdb vmlinux |
215 | (gdb) p vt_ioctl | 215 | (gdb) p vt_ioctl |
216 | (gdb) l *(0x<address of vt_ioctl> + 0xda8) | 216 | (gdb) l *(0x<address of vt_ioctl> + 0xda8) |
217 | or, as one command | ||
218 | (gdb) l *(vt_ioctl + 0xda8) | ||
219 | |||
220 | If you have a call trace, such as :- | ||
221 | >Call Trace: | ||
222 | > [<ffffffff8802c8e9>] :jbd:log_wait_commit+0xa3/0xf5 | ||
223 | > [<ffffffff810482d9>] autoremove_wake_function+0x0/0x2e | ||
224 | > [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee | ||
225 | > ... | ||
226 | this shows the problem in the :jbd: module. You can load that module in gdb | ||
227 | and list the relevant code. | ||
228 | gdb fs/jbd/jbd.ko | ||
229 | (gdb) p log_wait_commit | ||
230 | (gdb) l *(0x<address> + 0xa3) | ||
231 | or | ||
232 | (gdb) l *(log_wait_commit + 0xa3) | ||
233 | |||
217 | 234 | ||
218 | Another very useful option of the Kernel Hacking section in menuconfig is | 235 | Another very useful option of the Kernel Hacking section in menuconfig is |
219 | Debug memory allocations. This will help you see whether data has been | 236 | Debug memory allocations. This will help you see whether data has been |
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 77436d735013..059aaf20951a 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -165,6 +165,7 @@ X!Ilib/string.c | |||
165 | !Emm/vmalloc.c | 165 | !Emm/vmalloc.c |
166 | !Imm/page_alloc.c | 166 | !Imm/page_alloc.c |
167 | !Emm/mempool.c | 167 | !Emm/mempool.c |
168 | !Emm/dmapool.c | ||
168 | !Emm/page-writeback.c | 169 | !Emm/page-writeback.c |
169 | !Emm/truncate.c | 170 | !Emm/truncate.c |
170 | </sect1> | 171 | </sect1> |
@@ -371,7 +372,6 @@ X!Iinclude/linux/device.h | |||
371 | !Edrivers/base/class.c | 372 | !Edrivers/base/class.c |
372 | !Edrivers/base/firmware_class.c | 373 | !Edrivers/base/firmware_class.c |
373 | !Edrivers/base/transport_class.c | 374 | !Edrivers/base/transport_class.c |
374 | !Edrivers/base/dmapool.c | ||
375 | <!-- Cannot be included, because | 375 | <!-- Cannot be included, because |
376 | attribute_container_add_class_device_adapter | 376 | attribute_container_add_class_device_adapter |
377 | and attribute_container_classdev_to_container | 377 | and attribute_container_classdev_to_container |
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 01825ee7db64..2e9d6b41f034 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl | |||
@@ -717,7 +717,7 @@ used, and when it gets full, throws out the least used one. | |||
717 | <para> | 717 | <para> |
718 | For our first example, we assume that all operations are in user | 718 | For our first example, we assume that all operations are in user |
719 | context (ie. from system calls), so we can sleep. This means we can | 719 | context (ie. from system calls), so we can sleep. This means we can |
720 | use a semaphore to protect the cache and all the objects within | 720 | use a mutex to protect the cache and all the objects within |
721 | it. Here's the code: | 721 | it. Here's the code: |
722 | </para> | 722 | </para> |
723 | 723 | ||
@@ -725,7 +725,7 @@ it. Here's the code: | |||
725 | #include <linux/list.h> | 725 | #include <linux/list.h> |
726 | #include <linux/slab.h> | 726 | #include <linux/slab.h> |
727 | #include <linux/string.h> | 727 | #include <linux/string.h> |
728 | #include <asm/semaphore.h> | 728 | #include <linux/mutex.h> |
729 | #include <asm/errno.h> | 729 | #include <asm/errno.h> |
730 | 730 | ||
731 | struct object | 731 | struct object |
@@ -737,7 +737,7 @@ struct object | |||
737 | }; | 737 | }; |
738 | 738 | ||
739 | /* Protects the cache, cache_num, and the objects within it */ | 739 | /* Protects the cache, cache_num, and the objects within it */ |
740 | static DECLARE_MUTEX(cache_lock); | 740 | static DEFINE_MUTEX(cache_lock); |
741 | static LIST_HEAD(cache); | 741 | static LIST_HEAD(cache); |
742 | static unsigned int cache_num = 0; | 742 | static unsigned int cache_num = 0; |
743 | #define MAX_CACHE_SIZE 10 | 743 | #define MAX_CACHE_SIZE 10 |
@@ -789,17 +789,17 @@ int cache_add(int id, const char *name) | |||
789 | obj->id = id; | 789 | obj->id = id; |
790 | obj->popularity = 0; | 790 | obj->popularity = 0; |
791 | 791 | ||
792 | down(&cache_lock); | 792 | mutex_lock(&cache_lock); |
793 | __cache_add(obj); | 793 | __cache_add(obj); |
794 | up(&cache_lock); | 794 | mutex_unlock(&cache_lock); |
795 | return 0; | 795 | return 0; |
796 | } | 796 | } |
797 | 797 | ||
798 | void cache_delete(int id) | 798 | void cache_delete(int id) |
799 | { | 799 | { |
800 | down(&cache_lock); | 800 | mutex_lock(&cache_lock); |
801 | __cache_delete(__cache_find(id)); | 801 | __cache_delete(__cache_find(id)); |
802 | up(&cache_lock); | 802 | mutex_unlock(&cache_lock); |
803 | } | 803 | } |
804 | 804 | ||
805 | int cache_find(int id, char *name) | 805 | int cache_find(int id, char *name) |
@@ -807,13 +807,13 @@ int cache_find(int id, char *name) | |||
807 | struct object *obj; | 807 | struct object *obj; |
808 | int ret = -ENOENT; | 808 | int ret = -ENOENT; |
809 | 809 | ||
810 | down(&cache_lock); | 810 | mutex_lock(&cache_lock); |
811 | obj = __cache_find(id); | 811 | obj = __cache_find(id); |
812 | if (obj) { | 812 | if (obj) { |
813 | ret = 0; | 813 | ret = 0; |
814 | strcpy(name, obj->name); | 814 | strcpy(name, obj->name); |
815 | } | 815 | } |
816 | up(&cache_lock); | 816 | mutex_unlock(&cache_lock); |
817 | return ret; | 817 | return ret; |
818 | } | 818 | } |
819 | </programlisting> | 819 | </programlisting> |
@@ -853,7 +853,7 @@ The change is shown below, in standard patch format: the | |||
853 | int popularity; | 853 | int popularity; |
854 | }; | 854 | }; |
855 | 855 | ||
856 | -static DECLARE_MUTEX(cache_lock); | 856 | -static DEFINE_MUTEX(cache_lock); |
857 | +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; | 857 | +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; |
858 | static LIST_HEAD(cache); | 858 | static LIST_HEAD(cache); |
859 | static unsigned int cache_num = 0; | 859 | static unsigned int cache_num = 0; |
@@ -870,22 +870,22 @@ The change is shown below, in standard patch format: the | |||
870 | obj->id = id; | 870 | obj->id = id; |
871 | obj->popularity = 0; | 871 | obj->popularity = 0; |
872 | 872 | ||
873 | - down(&cache_lock); | 873 | - mutex_lock(&cache_lock); |
874 | + spin_lock_irqsave(&cache_lock, flags); | 874 | + spin_lock_irqsave(&cache_lock, flags); |
875 | __cache_add(obj); | 875 | __cache_add(obj); |
876 | - up(&cache_lock); | 876 | - mutex_unlock(&cache_lock); |
877 | + spin_unlock_irqrestore(&cache_lock, flags); | 877 | + spin_unlock_irqrestore(&cache_lock, flags); |
878 | return 0; | 878 | return 0; |
879 | } | 879 | } |
880 | 880 | ||
881 | void cache_delete(int id) | 881 | void cache_delete(int id) |
882 | { | 882 | { |
883 | - down(&cache_lock); | 883 | - mutex_lock(&cache_lock); |
884 | + unsigned long flags; | 884 | + unsigned long flags; |
885 | + | 885 | + |
886 | + spin_lock_irqsave(&cache_lock, flags); | 886 | + spin_lock_irqsave(&cache_lock, flags); |
887 | __cache_delete(__cache_find(id)); | 887 | __cache_delete(__cache_find(id)); |
888 | - up(&cache_lock); | 888 | - mutex_unlock(&cache_lock); |
889 | + spin_unlock_irqrestore(&cache_lock, flags); | 889 | + spin_unlock_irqrestore(&cache_lock, flags); |
890 | } | 890 | } |
891 | 891 | ||
@@ -895,14 +895,14 @@ The change is shown below, in standard patch format: the | |||
895 | int ret = -ENOENT; | 895 | int ret = -ENOENT; |
896 | + unsigned long flags; | 896 | + unsigned long flags; |
897 | 897 | ||
898 | - down(&cache_lock); | 898 | - mutex_lock(&cache_lock); |
899 | + spin_lock_irqsave(&cache_lock, flags); | 899 | + spin_lock_irqsave(&cache_lock, flags); |
900 | obj = __cache_find(id); | 900 | obj = __cache_find(id); |
901 | if (obj) { | 901 | if (obj) { |
902 | ret = 0; | 902 | ret = 0; |
903 | strcpy(name, obj->name); | 903 | strcpy(name, obj->name); |
904 | } | 904 | } |
905 | - up(&cache_lock); | 905 | - mutex_unlock(&cache_lock); |
906 | + spin_unlock_irqrestore(&cache_lock, flags); | 906 | + spin_unlock_irqrestore(&cache_lock, flags); |
907 | return ret; | 907 | return ret; |
908 | } | 908 | } |
diff --git a/Documentation/DocBook/s390-drivers.tmpl b/Documentation/DocBook/s390-drivers.tmpl index 3d2f31b99dd9..4acc73240a6d 100644 --- a/Documentation/DocBook/s390-drivers.tmpl +++ b/Documentation/DocBook/s390-drivers.tmpl | |||
@@ -59,7 +59,7 @@ | |||
59 | <title>Introduction</title> | 59 | <title>Introduction</title> |
60 | <para> | 60 | <para> |
61 | This document describes the interfaces available for device drivers that | 61 | This document describes the interfaces available for device drivers that |
62 | drive s390 based channel attached devices. This includes interfaces for | 62 | drive s390 based channel attached I/O devices. This includes interfaces for |
63 | interaction with the hardware and interfaces for interacting with the | 63 | interaction with the hardware and interfaces for interacting with the |
64 | common driver core. Those interfaces are provided by the s390 common I/O | 64 | common driver core. Those interfaces are provided by the s390 common I/O |
65 | layer. | 65 | layer. |
@@ -86,9 +86,10 @@ | |||
86 | The ccw bus typically contains the majority of devices available to | 86 | The ccw bus typically contains the majority of devices available to |
87 | a s390 system. Named after the channel command word (ccw), the basic | 87 | a s390 system. Named after the channel command word (ccw), the basic |
88 | command structure used to address its devices, the ccw bus contains | 88 | command structure used to address its devices, the ccw bus contains |
89 | so-called channel attached devices. They are addressed via subchannels, | 89 | so-called channel attached devices. They are addressed via I/O |
90 | visible on the css bus. A device driver, however, will never interact | 90 | subchannels, visible on the css bus. A device driver for |
91 | with the subchannel directly, but only via the device on the ccw bus, | 91 | channel-attached devices, however, will never interact with the |
92 | subchannel directly, but only via the I/O device on the ccw bus, | ||
92 | the ccw device. | 93 | the ccw device. |
93 | </para> | 94 | </para> |
94 | <sect1 id="channelIO"> | 95 | <sect1 id="channelIO"> |
@@ -116,7 +117,6 @@ | |||
116 | !Iinclude/asm-s390/ccwdev.h | 117 | !Iinclude/asm-s390/ccwdev.h |
117 | !Edrivers/s390/cio/device.c | 118 | !Edrivers/s390/cio/device.c |
118 | !Edrivers/s390/cio/device_ops.c | 119 | !Edrivers/s390/cio/device_ops.c |
119 | !Edrivers/s390/cio/airq.c | ||
120 | </sect1> | 120 | </sect1> |
121 | <sect1 id="cmf"> | 121 | <sect1 id="cmf"> |
122 | <title>The channel-measurement facility</title> | 122 | <title>The channel-measurement facility</title> |
@@ -147,4 +147,15 @@ | |||
147 | </sect1> | 147 | </sect1> |
148 | </chapter> | 148 | </chapter> |
149 | 149 | ||
150 | <chapter id="genericinterfaces"> | ||
151 | <title>Generic interfaces</title> | ||
152 | <para> | ||
153 | Some interfaces are available to other drivers that do not necessarily | ||
154 | have anything to do with the busses described above, but still are | ||
155 | indirectly using basic infrastructure in the common I/O layer. | ||
156 | One example is the support for adapter interrupts. | ||
157 | </para> | ||
158 | !Edrivers/s390/cio/airq.c | ||
159 | </chapter> | ||
160 | |||
150 | </book> | 161 | </book> |
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt new file mode 100644 index 000000000000..989c2fcd8111 --- /dev/null +++ b/Documentation/Smack.txt | |||
@@ -0,0 +1,493 @@ | |||
1 | |||
2 | |||
3 | "Good for you, you've decided to clean the elevator!" | ||
4 | - The Elevator, from Dark Star | ||
5 | |||
6 | Smack is the the Simplified Mandatory Access Control Kernel. | ||
7 | Smack is a kernel based implementation of mandatory access | ||
8 | control that includes simplicity in its primary design goals. | ||
9 | |||
10 | Smack is not the only Mandatory Access Control scheme | ||
11 | available for Linux. Those new to Mandatory Access Control | ||
12 | are encouraged to compare Smack with the other mechanisms | ||
13 | available to determine which is best suited to the problem | ||
14 | at hand. | ||
15 | |||
16 | Smack consists of three major components: | ||
17 | - The kernel | ||
18 | - A start-up script and a few modified applications | ||
19 | - Configuration data | ||
20 | |||
21 | The kernel component of Smack is implemented as a Linux | ||
22 | Security Modules (LSM) module. It requires netlabel and | ||
23 | works best with file systems that support extended attributes, | ||
24 | although xattr support is not strictly required. | ||
25 | It is safe to run a Smack kernel under a "vanilla" distribution. | ||
26 | Smack kernels use the CIPSO IP option. Some network | ||
27 | configurations are intolerant of IP options and can impede | ||
28 | access to systems that use them as Smack does. | ||
29 | |||
30 | The startup script etc-init.d-smack should be installed | ||
31 | in /etc/init.d/smack and should be invoked early in the | ||
32 | start-up process. On Fedora rc5.d/S02smack is recommended. | ||
33 | This script ensures that certain devices have the correct | ||
34 | Smack attributes and loads the Smack configuration if | ||
35 | any is defined. This script invokes two programs that | ||
36 | ensure configuration data is properly formatted. These | ||
37 | programs are /usr/sbin/smackload and /usr/sin/smackcipso. | ||
38 | The system will run just fine without these programs, | ||
39 | but it will be difficult to set access rules properly. | ||
40 | |||
41 | A version of "ls" that provides a "-M" option to display | ||
42 | Smack labels on long listing is available. | ||
43 | |||
44 | A hacked version of sshd that allows network logins by users | ||
45 | with specific Smack labels is available. This version does | ||
46 | not work for scp. You must set the /etc/ssh/sshd_config | ||
47 | line: | ||
48 | UsePrivilegeSeparation no | ||
49 | |||
50 | The format of /etc/smack/usr is: | ||
51 | |||
52 | username smack | ||
53 | |||
54 | In keeping with the intent of Smack, configuration data is | ||
55 | minimal and not strictly required. The most important | ||
56 | configuration step is mounting the smackfs pseudo filesystem. | ||
57 | |||
58 | Add this line to /etc/fstab: | ||
59 | |||
60 | smackfs /smack smackfs smackfsdef=* 0 0 | ||
61 | |||
62 | and create the /smack directory for mounting. | ||
63 | |||
64 | Smack uses extended attributes (xattrs) to store file labels. | ||
65 | The command to set a Smack label on a file is: | ||
66 | |||
67 | # attr -S -s SMACK64 -V "value" path | ||
68 | |||
69 | NOTE: Smack labels are limited to 23 characters. The attr command | ||
70 | does not enforce this restriction and can be used to set | ||
71 | invalid Smack labels on files. | ||
72 | |||
73 | If you don't do anything special all users will get the floor ("_") | ||
74 | label when they log in. If you do want to log in via the hacked ssh | ||
75 | at other labels use the attr command to set the smack value on the | ||
76 | home directory and it's contents. | ||
77 | |||
78 | You can add access rules in /etc/smack/accesses. They take the form: | ||
79 | |||
80 | subjectlabel objectlabel access | ||
81 | |||
82 | access is a combination of the letters rwxa which specify the | ||
83 | kind of access permitted a subject with subjectlabel on an | ||
84 | object with objectlabel. If there is no rule no access is allowed. | ||
85 | |||
86 | A process can see the smack label it is running with by | ||
87 | reading /proc/self/attr/current. A privileged process can | ||
88 | set the process smack by writing there. | ||
89 | |||
90 | Look for additional programs on http://schaufler-ca.com | ||
91 | |||
92 | From the Smack Whitepaper: | ||
93 | |||
94 | The Simplified Mandatory Access Control Kernel | ||
95 | |||
96 | Casey Schaufler | ||
97 | casey@schaufler-ca.com | ||
98 | |||
99 | Mandatory Access Control | ||
100 | |||
101 | Computer systems employ a variety of schemes to constrain how information is | ||
102 | shared among the people and services using the machine. Some of these schemes | ||
103 | allow the program or user to decide what other programs or users are allowed | ||
104 | access to pieces of data. These schemes are called discretionary access | ||
105 | control mechanisms because the access control is specified at the discretion | ||
106 | of the user. Other schemes do not leave the decision regarding what a user or | ||
107 | program can access up to users or programs. These schemes are called mandatory | ||
108 | access control mechanisms because you don't have a choice regarding the users | ||
109 | or programs that have access to pieces of data. | ||
110 | |||
111 | Bell & LaPadula | ||
112 | |||
113 | From the middle of the 1980's until the turn of the century Mandatory Access | ||
114 | Control (MAC) was very closely associated with the Bell & LaPadula security | ||
115 | model, a mathematical description of the United States Department of Defense | ||
116 | policy for marking paper documents. MAC in this form enjoyed a following | ||
117 | within the Capital Beltway and Scandinavian supercomputer centers but was | ||
118 | often sited as failing to address general needs. | ||
119 | |||
120 | Domain Type Enforcement | ||
121 | |||
122 | Around the turn of the century Domain Type Enforcement (DTE) became popular. | ||
123 | This scheme organizes users, programs, and data into domains that are | ||
124 | protected from each other. This scheme has been widely deployed as a component | ||
125 | of popular Linux distributions. The administrative overhead required to | ||
126 | maintain this scheme and the detailed understanding of the whole system | ||
127 | necessary to provide a secure domain mapping leads to the scheme being | ||
128 | disabled or used in limited ways in the majority of cases. | ||
129 | |||
130 | Smack | ||
131 | |||
132 | Smack is a Mandatory Access Control mechanism designed to provide useful MAC | ||
133 | while avoiding the pitfalls of its predecessors. The limitations of Bell & | ||
134 | LaPadula are addressed by providing a scheme whereby access can be controlled | ||
135 | according to the requirements of the system and its purpose rather than those | ||
136 | imposed by an arcane government policy. The complexity of Domain Type | ||
137 | Enforcement and avoided by defining access controls in terms of the access | ||
138 | modes already in use. | ||
139 | |||
140 | Smack Terminology | ||
141 | |||
142 | The jargon used to talk about Smack will be familiar to those who have dealt | ||
143 | with other MAC systems and shouldn't be too difficult for the uninitiated to | ||
144 | pick up. There are four terms that are used in a specific way and that are | ||
145 | especially important: | ||
146 | |||
147 | Subject: A subject is an active entity on the computer system. | ||
148 | On Smack a subject is a task, which is in turn the basic unit | ||
149 | of execution. | ||
150 | |||
151 | Object: An object is a passive entity on the computer system. | ||
152 | On Smack files of all types, IPC, and tasks can be objects. | ||
153 | |||
154 | Access: Any attempt by a subject to put information into or get | ||
155 | information from an object is an access. | ||
156 | |||
157 | Label: Data that identifies the Mandatory Access Control | ||
158 | characteristics of a subject or an object. | ||
159 | |||
160 | These definitions are consistent with the traditional use in the security | ||
161 | community. There are also some terms from Linux that are likely to crop up: | ||
162 | |||
163 | Capability: A task that possesses a capability has permission to | ||
164 | violate an aspect of the system security policy, as identified by | ||
165 | the specific capability. A task that possesses one or more | ||
166 | capabilities is a privileged task, whereas a task with no | ||
167 | capabilities is an unprivileged task. | ||
168 | |||
169 | Privilege: A task that is allowed to violate the system security | ||
170 | policy is said to have privilege. As of this writing a task can | ||
171 | have privilege either by possessing capabilities or by having an | ||
172 | effective user of root. | ||
173 | |||
174 | Smack Basics | ||
175 | |||
176 | Smack is an extension to a Linux system. It enforces additional restrictions | ||
177 | on what subjects can access which objects, based on the labels attached to | ||
178 | each of the subject and the object. | ||
179 | |||
180 | Labels | ||
181 | |||
182 | Smack labels are ASCII character strings, one to twenty-three characters in | ||
183 | length. Single character labels using special characters, that being anything | ||
184 | other than a letter or digit, are reserved for use by the Smack development | ||
185 | team. Smack labels are unstructured, case sensitive, and the only operation | ||
186 | ever performed on them is comparison for equality. Smack labels cannot | ||
187 | contain unprintable characters or the "/" (slash) character. | ||
188 | |||
189 | There are some predefined labels: | ||
190 | |||
191 | _ Pronounced "floor", a single underscore character. | ||
192 | ^ Pronounced "hat", a single circumflex character. | ||
193 | * Pronounced "star", a single asterisk character. | ||
194 | ? Pronounced "huh", a single question mark character. | ||
195 | |||
196 | Every task on a Smack system is assigned a label. System tasks, such as | ||
197 | init(8) and systems daemons, are run with the floor ("_") label. User tasks | ||
198 | are assigned labels according to the specification found in the | ||
199 | /etc/smack/user configuration file. | ||
200 | |||
201 | Access Rules | ||
202 | |||
203 | Smack uses the traditional access modes of Linux. These modes are read, | ||
204 | execute, write, and occasionally append. There are a few cases where the | ||
205 | access mode may not be obvious. These include: | ||
206 | |||
207 | Signals: A signal is a write operation from the subject task to | ||
208 | the object task. | ||
209 | Internet Domain IPC: Transmission of a packet is considered a | ||
210 | write operation from the source task to the destination task. | ||
211 | |||
212 | Smack restricts access based on the label attached to a subject and the label | ||
213 | attached to the object it is trying to access. The rules enforced are, in | ||
214 | order: | ||
215 | |||
216 | 1. Any access requested by a task labeled "*" is denied. | ||
217 | 2. A read or execute access requested by a task labeled "^" | ||
218 | is permitted. | ||
219 | 3. A read or execute access requested on an object labeled "_" | ||
220 | is permitted. | ||
221 | 4. Any access requested on an object labeled "*" is permitted. | ||
222 | 5. Any access requested by a task on an object with the same | ||
223 | label is permitted. | ||
224 | 6. Any access requested that is explicitly defined in the loaded | ||
225 | rule set is permitted. | ||
226 | 7. Any other access is denied. | ||
227 | |||
228 | Smack Access Rules | ||
229 | |||
230 | With the isolation provided by Smack access separation is simple. There are | ||
231 | many interesting cases where limited access by subjects to objects with | ||
232 | different labels is desired. One example is the familiar spy model of | ||
233 | sensitivity, where a scientist working on a highly classified project would be | ||
234 | able to read documents of lower classifications and anything she writes will | ||
235 | be "born" highly classified. To accommodate such schemes Smack includes a | ||
236 | mechanism for specifying rules allowing access between labels. | ||
237 | |||
238 | Access Rule Format | ||
239 | |||
240 | The format of an access rule is: | ||
241 | |||
242 | subject-label object-label access | ||
243 | |||
244 | Where subject-label is the Smack label of the task, object-label is the Smack | ||
245 | label of the thing being accessed, and access is a string specifying the sort | ||
246 | of access allowed. The Smack labels are limited to 23 characters. The access | ||
247 | specification is searched for letters that describe access modes: | ||
248 | |||
249 | a: indicates that append access should be granted. | ||
250 | r: indicates that read access should be granted. | ||
251 | w: indicates that write access should be granted. | ||
252 | x: indicates that execute access should be granted. | ||
253 | |||
254 | Uppercase values for the specification letters are allowed as well. | ||
255 | Access mode specifications can be in any order. Examples of acceptable rules | ||
256 | are: | ||
257 | |||
258 | TopSecret Secret rx | ||
259 | Secret Unclass R | ||
260 | Manager Game x | ||
261 | User HR w | ||
262 | New Old rRrRr | ||
263 | Closed Off - | ||
264 | |||
265 | Examples of unacceptable rules are: | ||
266 | |||
267 | Top Secret Secret rx | ||
268 | Ace Ace r | ||
269 | Odd spells waxbeans | ||
270 | |||
271 | Spaces are not allowed in labels. Since a subject always has access to files | ||
272 | with the same label specifying a rule for that case is pointless. Only | ||
273 | valid letters (rwxaRWXA) and the dash ('-') character are allowed in | ||
274 | access specifications. The dash is a placeholder, so "a-r" is the same | ||
275 | as "ar". A lone dash is used to specify that no access should be allowed. | ||
276 | |||
277 | Applying Access Rules | ||
278 | |||
279 | The developers of Linux rarely define new sorts of things, usually importing | ||
280 | schemes and concepts from other systems. Most often, the other systems are | ||
281 | variants of Unix. Unix has many endearing properties, but consistency of | ||
282 | access control models is not one of them. Smack strives to treat accesses as | ||
283 | uniformly as is sensible while keeping with the spirit of the underlying | ||
284 | mechanism. | ||
285 | |||
286 | File system objects including files, directories, named pipes, symbolic links, | ||
287 | and devices require access permissions that closely match those used by mode | ||
288 | bit access. To open a file for reading read access is required on the file. To | ||
289 | search a directory requires execute access. Creating a file with write access | ||
290 | requires both read and write access on the containing directory. Deleting a | ||
291 | file requires read and write access to the file and to the containing | ||
292 | directory. It is possible that a user may be able to see that a file exists | ||
293 | but not any of its attributes by the circumstance of having read access to the | ||
294 | containing directory but not to the differently labeled file. This is an | ||
295 | artifact of the file name being data in the directory, not a part of the file. | ||
296 | |||
297 | IPC objects, message queues, semaphore sets, and memory segments exist in flat | ||
298 | namespaces and access requests are only required to match the object in | ||
299 | question. | ||
300 | |||
301 | Process objects reflect tasks on the system and the Smack label used to access | ||
302 | them is the same Smack label that the task would use for its own access | ||
303 | attempts. Sending a signal via the kill() system call is a write operation | ||
304 | from the signaler to the recipient. Debugging a process requires both reading | ||
305 | and writing. Creating a new task is an internal operation that results in two | ||
306 | tasks with identical Smack labels and requires no access checks. | ||
307 | |||
308 | Sockets are data structures attached to processes and sending a packet from | ||
309 | one process to another requires that the sender have write access to the | ||
310 | receiver. The receiver is not required to have read access to the sender. | ||
311 | |||
312 | Setting Access Rules | ||
313 | |||
314 | The configuration file /etc/smack/accesses contains the rules to be set at | ||
315 | system startup. The contents are written to the special file /smack/load. | ||
316 | Rules can be written to /smack/load at any time and take effect immediately. | ||
317 | For any pair of subject and object labels there can be only one rule, with the | ||
318 | most recently specified overriding any earlier specification. | ||
319 | |||
320 | The program smackload is provided to ensure data is formatted | ||
321 | properly when written to /smack/load. This program reads lines | ||
322 | of the form | ||
323 | |||
324 | subjectlabel objectlabel mode. | ||
325 | |||
326 | Task Attribute | ||
327 | |||
328 | The Smack label of a process can be read from /proc/<pid>/attr/current. A | ||
329 | process can read its own Smack label from /proc/self/attr/current. A | ||
330 | privileged process can change its own Smack label by writing to | ||
331 | /proc/self/attr/current but not the label of another process. | ||
332 | |||
333 | File Attribute | ||
334 | |||
335 | The Smack label of a filesystem object is stored as an extended attribute | ||
336 | named SMACK64 on the file. This attribute is in the security namespace. It can | ||
337 | only be changed by a process with privilege. | ||
338 | |||
339 | Privilege | ||
340 | |||
341 | A process with CAP_MAC_OVERRIDE is privileged. | ||
342 | |||
343 | Smack Networking | ||
344 | |||
345 | As mentioned before, Smack enforces access control on network protocol | ||
346 | transmissions. Every packet sent by a Smack process is tagged with its Smack | ||
347 | label. This is done by adding a CIPSO tag to the header of the IP packet. Each | ||
348 | packet received is expected to have a CIPSO tag that identifies the label and | ||
349 | if it lacks such a tag the network ambient label is assumed. Before the packet | ||
350 | is delivered a check is made to determine that a subject with the label on the | ||
351 | packet has write access to the receiving process and if that is not the case | ||
352 | the packet is dropped. | ||
353 | |||
354 | CIPSO Configuration | ||
355 | |||
356 | It is normally unnecessary to specify the CIPSO configuration. The default | ||
357 | values used by the system handle all internal cases. Smack will compose CIPSO | ||
358 | label values to match the Smack labels being used without administrative | ||
359 | intervention. Unlabeled packets that come into the system will be given the | ||
360 | ambient label. | ||
361 | |||
362 | Smack requires configuration in the case where packets from a system that is | ||
363 | not smack that speaks CIPSO may be encountered. Usually this will be a Trusted | ||
364 | Solaris system, but there are other, less widely deployed systems out there. | ||
365 | CIPSO provides 3 important values, a Domain Of Interpretation (DOI), a level, | ||
366 | and a category set with each packet. The DOI is intended to identify a group | ||
367 | of systems that use compatible labeling schemes, and the DOI specified on the | ||
368 | smack system must match that of the remote system or packets will be | ||
369 | discarded. The DOI is 3 by default. The value can be read from /smack/doi and | ||
370 | can be changed by writing to /smack/doi. | ||
371 | |||
372 | The label and category set are mapped to a Smack label as defined in | ||
373 | /etc/smack/cipso. | ||
374 | |||
375 | A Smack/CIPSO mapping has the form: | ||
376 | |||
377 | smack level [category [category]*] | ||
378 | |||
379 | Smack does not expect the level or category sets to be related in any | ||
380 | particular way and does not assume or assign accesses based on them. Some | ||
381 | examples of mappings: | ||
382 | |||
383 | TopSecret 7 | ||
384 | TS:A,B 7 1 2 | ||
385 | SecBDE 5 2 4 6 | ||
386 | RAFTERS 7 12 26 | ||
387 | |||
388 | The ":" and "," characters are permitted in a Smack label but have no special | ||
389 | meaning. | ||
390 | |||
391 | The mapping of Smack labels to CIPSO values is defined by writing to | ||
392 | /smack/cipso. Again, the format of data written to this special file | ||
393 | is highly restrictive, so the program smackcipso is provided to | ||
394 | ensure the writes are done properly. This program takes mappings | ||
395 | on the standard input and sends them to /smack/cipso properly. | ||
396 | |||
397 | In addition to explicit mappings Smack supports direct CIPSO mappings. One | ||
398 | CIPSO level is used to indicate that the category set passed in the packet is | ||
399 | in fact an encoding of the Smack label. The level used is 250 by default. The | ||
400 | value can be read from /smack/direct and changed by writing to /smack/direct. | ||
401 | |||
402 | Socket Attributes | ||
403 | |||
404 | There are two attributes that are associated with sockets. These attributes | ||
405 | can only be set by privileged tasks, but any task can read them for their own | ||
406 | sockets. | ||
407 | |||
408 | SMACK64IPIN: The Smack label of the task object. A privileged | ||
409 | program that will enforce policy may set this to the star label. | ||
410 | |||
411 | SMACK64IPOUT: The Smack label transmitted with outgoing packets. | ||
412 | A privileged program may set this to match the label of another | ||
413 | task with which it hopes to communicate. | ||
414 | |||
415 | Writing Applications for Smack | ||
416 | |||
417 | There are three sorts of applications that will run on a Smack system. How an | ||
418 | application interacts with Smack will determine what it will have to do to | ||
419 | work properly under Smack. | ||
420 | |||
421 | Smack Ignorant Applications | ||
422 | |||
423 | By far the majority of applications have no reason whatever to care about the | ||
424 | unique properties of Smack. Since invoking a program has no impact on the | ||
425 | Smack label associated with the process the only concern likely to arise is | ||
426 | whether the process has execute access to the program. | ||
427 | |||
428 | Smack Relevant Applications | ||
429 | |||
430 | Some programs can be improved by teaching them about Smack, but do not make | ||
431 | any security decisions themselves. The utility ls(1) is one example of such a | ||
432 | program. | ||
433 | |||
434 | Smack Enforcing Applications | ||
435 | |||
436 | These are special programs that not only know about Smack, but participate in | ||
437 | the enforcement of system policy. In most cases these are the programs that | ||
438 | set up user sessions. There are also network services that provide information | ||
439 | to processes running with various labels. | ||
440 | |||
441 | File System Interfaces | ||
442 | |||
443 | Smack maintains labels on file system objects using extended attributes. The | ||
444 | Smack label of a file, directory, or other file system object can be obtained | ||
445 | using getxattr(2). | ||
446 | |||
447 | len = getxattr("/", "security.SMACK64", value, sizeof (value)); | ||
448 | |||
449 | will put the Smack label of the root directory into value. A privileged | ||
450 | process can set the Smack label of a file system object with setxattr(2). | ||
451 | |||
452 | len = strlen("Rubble"); | ||
453 | rc = setxattr("/foo", "security.SMACK64", "Rubble", len, 0); | ||
454 | |||
455 | will set the Smack label of /foo to "Rubble" if the program has appropriate | ||
456 | privilege. | ||
457 | |||
458 | Socket Interfaces | ||
459 | |||
460 | The socket attributes can be read using fgetxattr(2). | ||
461 | |||
462 | A privileged process can set the Smack label of outgoing packets with | ||
463 | fsetxattr(2). | ||
464 | |||
465 | len = strlen("Rubble"); | ||
466 | rc = fsetxattr(fd, "security.SMACK64IPOUT", "Rubble", len, 0); | ||
467 | |||
468 | will set the Smack label "Rubble" on packets going out from the socket if the | ||
469 | program has appropriate privilege. | ||
470 | |||
471 | rc = fsetxattr(fd, "security.SMACK64IPIN, "*", strlen("*"), 0); | ||
472 | |||
473 | will set the Smack label "*" as the object label against which incoming | ||
474 | packets will be checked if the program has appropriate privilege. | ||
475 | |||
476 | Administration | ||
477 | |||
478 | Smack supports some mount options: | ||
479 | |||
480 | smackfsdef=label: specifies the label to give files that lack | ||
481 | the Smack label extended attribute. | ||
482 | |||
483 | smackfsroot=label: specifies the label to assign the root of the | ||
484 | file system if it lacks the Smack extended attribute. | ||
485 | |||
486 | smackfshat=label: specifies a label that must have read access to | ||
487 | all labels set on the filesystem. Not yet enforced. | ||
488 | |||
489 | smackfsfloor=label: specifies a label to which all labels set on the | ||
490 | filesystem must have read access. Not yet enforced. | ||
491 | |||
492 | These mount options apply to all file system types. | ||
493 | |||
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 681e2b36195c..08a1ed1cb5d8 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches | |||
@@ -220,20 +220,8 @@ decreasing the likelihood of your MIME-attached change being accepted. | |||
220 | Exception: If your mailer is mangling patches then someone may ask | 220 | Exception: If your mailer is mangling patches then someone may ask |
221 | you to re-send them using MIME. | 221 | you to re-send them using MIME. |
222 | 222 | ||
223 | 223 | See Documentation/email-clients.txt for hints about configuring | |
224 | WARNING: Some mailers like Mozilla send your messages with | 224 | your e-mail client so that it sends your patches untouched. |
225 | ---- message header ---- | ||
226 | Content-Type: text/plain; charset=us-ascii; format=flowed | ||
227 | ---- message header ---- | ||
228 | The problem is that "format=flowed" makes some of the mailers | ||
229 | on receiving side to replace TABs with spaces and do similar | ||
230 | changes. Thus the patches from you can look corrupted. | ||
231 | |||
232 | To fix this just make your mozilla defaults/pref/mailnews.js file to look like: | ||
233 | pref("mailnews.send_plaintext_flowed", false); // RFC 2646======= | ||
234 | pref("mailnews.display.disable_format_flowed_support", true); | ||
235 | |||
236 | |||
237 | 225 | ||
238 | 8) E-mail size. | 226 | 8) E-mail size. |
239 | 227 | ||
diff --git a/Documentation/acpi/method-tracing.txt b/Documentation/acpi/method-tracing.txt new file mode 100644 index 000000000000..f6efb1ea559a --- /dev/null +++ b/Documentation/acpi/method-tracing.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | /sys/module/acpi/parameters/: | ||
2 | |||
3 | trace_method_name | ||
4 | The AML method name that the user wants to trace | ||
5 | |||
6 | trace_debug_layer | ||
7 | The temporary debug_layer used when tracing the method. | ||
8 | Using 0xffffffff by default if it is 0. | ||
9 | |||
10 | trace_debug_level | ||
11 | The temporary debug_level used when tracing the method. | ||
12 | Using 0x00ffffff by default if it is 0. | ||
13 | |||
14 | trace_state | ||
15 | The status of the tracing feature. | ||
16 | |||
17 | "enabled" means this feature is enabled | ||
18 | and the AML method is traced every time it's executed. | ||
19 | |||
20 | "1" means this feature is enabled and the AML method | ||
21 | will only be traced during the next execution. | ||
22 | |||
23 | "disabled" means this feature is disabled. | ||
24 | Users can enable/disable this debug tracing feature by | ||
25 | "echo string > /sys/module/acpi/parameters/trace_state". | ||
26 | "string" should be one of "enable", "disable" and "1". | ||
diff --git a/Documentation/arm/Sharp-LH/IOBarrier b/Documentation/arm/Sharp-LH/IOBarrier index c0d8853672dc..2e953e228f4d 100644 --- a/Documentation/arm/Sharp-LH/IOBarrier +++ b/Documentation/arm/Sharp-LH/IOBarrier | |||
@@ -32,7 +32,7 @@ BARRIER IO before the access to the SMC chip because the AEN latch | |||
32 | only needs occurs after the SMC IO write cycle. The routines that | 32 | only needs occurs after the SMC IO write cycle. The routines that |
33 | implement this work-around make an additional concession which is to | 33 | implement this work-around make an additional concession which is to |
34 | disable interrupts during the IO sequence. Other hardware devices | 34 | disable interrupts during the IO sequence. Other hardware devices |
35 | (the LogicPD CPLD) have registers in the same the physical memory | 35 | (the LogicPD CPLD) have registers in the same physical memory |
36 | region as the SMC chip. An interrupt might allow an access to one of | 36 | region as the SMC chip. An interrupt might allow an access to one of |
37 | those registers while SMC IO is being performed. | 37 | those registers while SMC IO is being performed. |
38 | 38 | ||
diff --git a/Documentation/debugging-modules.txt b/Documentation/debugging-modules.txt index 24029f65fc94..172ad4aec493 100644 --- a/Documentation/debugging-modules.txt +++ b/Documentation/debugging-modules.txt | |||
@@ -16,3 +16,7 @@ echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe | |||
16 | echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe | 16 | echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe |
17 | chmod a+x /tmp/modprobe | 17 | chmod a+x /tmp/modprobe |
18 | echo /tmp/modprobe > /proc/sys/kernel/modprobe | 18 | echo /tmp/modprobe > /proc/sys/kernel/modprobe |
19 | |||
20 | Note that the above applies only when the *kernel* is requesting | ||
21 | that the module be loaded -- it won't have any effect if that module | ||
22 | is being loaded explicitly using "modprobe" from userspace. | ||
diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt index 2a97320ee17f..83009fdcbbc8 100644 --- a/Documentation/driver-model/platform.txt +++ b/Documentation/driver-model/platform.txt | |||
@@ -122,15 +122,15 @@ None the less, there are some APIs to support such legacy drivers. Avoid | |||
122 | using these calls except with such hotplug-deficient drivers. | 122 | using these calls except with such hotplug-deficient drivers. |
123 | 123 | ||
124 | struct platform_device *platform_device_alloc( | 124 | struct platform_device *platform_device_alloc( |
125 | char *name, unsigned id); | 125 | const char *name, int id); |
126 | 126 | ||
127 | You can use platform_device_alloc() to dynamically allocate a device, which | 127 | You can use platform_device_alloc() to dynamically allocate a device, which |
128 | you will then initialize with resources and platform_device_register(). | 128 | you will then initialize with resources and platform_device_register(). |
129 | A better solution is usually: | 129 | A better solution is usually: |
130 | 130 | ||
131 | struct platform_device *platform_device_register_simple( | 131 | struct platform_device *platform_device_register_simple( |
132 | char *name, unsigned id, | 132 | const char *name, int id, |
133 | struct resource *res, unsigned nres); | 133 | struct resource *res, unsigned int nres); |
134 | 134 | ||
135 | You can use platform_device_register_simple() as a one-step call to allocate | 135 | You can use platform_device_register_simple() as a one-step call to allocate |
136 | and register a device. | 136 | and register a device. |
diff --git a/Documentation/fb/deferred_io.txt b/Documentation/fb/deferred_io.txt index 63883a892120..748328370250 100644 --- a/Documentation/fb/deferred_io.txt +++ b/Documentation/fb/deferred_io.txt | |||
@@ -7,10 +7,10 @@ IO. The following example may be a useful explanation of how one such setup | |||
7 | works: | 7 | works: |
8 | 8 | ||
9 | - userspace app like Xfbdev mmaps framebuffer | 9 | - userspace app like Xfbdev mmaps framebuffer |
10 | - deferred IO and driver sets up nopage and page_mkwrite handlers | 10 | - deferred IO and driver sets up fault and page_mkwrite handlers |
11 | - userspace app tries to write to mmaped vaddress | 11 | - userspace app tries to write to mmaped vaddress |
12 | - we get pagefault and reach nopage handler | 12 | - we get pagefault and reach fault handler |
13 | - nopage handler finds and returns physical page | 13 | - fault handler finds and returns physical page |
14 | - we get page_mkwrite where we add this page to a list | 14 | - we get page_mkwrite where we add this page to a list |
15 | - schedule a workqueue task to be run after a delay | 15 | - schedule a workqueue task to be run after a delay |
16 | - app continues writing to that page with no additional cost. this is | 16 | - app continues writing to that page with no additional cost. this is |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 181bff005167..68ce1300a360 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -156,22 +156,6 @@ Who: Arjan van de Ven <arjan@linux.intel.com> | |||
156 | 156 | ||
157 | --------------------------- | 157 | --------------------------- |
158 | 158 | ||
159 | What: USB driver API moves to EXPORT_SYMBOL_GPL | ||
160 | When: February 2008 | ||
161 | Files: include/linux/usb.h, drivers/usb/core/driver.c | ||
162 | Why: The USB subsystem has changed a lot over time, and it has been | ||
163 | possible to create userspace USB drivers using usbfs/libusb/gadgetfs | ||
164 | that operate as fast as the USB bus allows. Because of this, the USB | ||
165 | subsystem will not be allowing closed source kernel drivers to | ||
166 | register with it, after this grace period is over. If anyone needs | ||
167 | any help in converting their closed source drivers over to use the | ||
168 | userspace filesystems, please contact the | ||
169 | linux-usb-devel@lists.sourceforge.net mailing list, and the developers | ||
170 | there will be glad to help you out. | ||
171 | Who: Greg Kroah-Hartman <gregkh@suse.de> | ||
172 | |||
173 | --------------------------- | ||
174 | |||
175 | What: vm_ops.nopage | 159 | What: vm_ops.nopage |
176 | When: Soon, provided in-kernel callers have been converted | 160 | When: Soon, provided in-kernel callers have been converted |
177 | Why: This interface is replaced by vm_ops.fault, but it has been around | 161 | Why: This interface is replaced by vm_ops.fault, but it has been around |
@@ -224,13 +208,6 @@ Who: Randy Dunlap <randy.dunlap@oracle.com> | |||
224 | 208 | ||
225 | --------------------------- | 209 | --------------------------- |
226 | 210 | ||
227 | What: drivers depending on OSS_OBSOLETE | ||
228 | When: options in 2.6.23, code in 2.6.25 | ||
229 | Why: obsolete OSS drivers | ||
230 | Who: Adrian Bunk <bunk@stusta.de> | ||
231 | |||
232 | --------------------------- | ||
233 | |||
234 | What: libata spindown skipping and warning | 211 | What: libata spindown skipping and warning |
235 | When: Dec 2008 | 212 | When: Dec 2008 |
236 | Why: Some halt(8) implementations synchronize caches for and spin | 213 | Why: Some halt(8) implementations synchronize caches for and spin |
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt index d1b98257d000..44c97e6accb2 100644 --- a/Documentation/filesystems/configfs/configfs.txt +++ b/Documentation/filesystems/configfs/configfs.txt | |||
@@ -377,7 +377,7 @@ more explicit to have a method whereby userspace sees this divergence. | |||
377 | Rather than have a group where some items behave differently than | 377 | Rather than have a group where some items behave differently than |
378 | others, configfs provides a method whereby one or many subgroups are | 378 | others, configfs provides a method whereby one or many subgroups are |
379 | automatically created inside the parent at its creation. Thus, | 379 | automatically created inside the parent at its creation. Thus, |
380 | mkdir("parent) results in "parent", "parent/subgroup1", up through | 380 | mkdir("parent") results in "parent", "parent/subgroup1", up through |
381 | "parent/subgroupN". Items of type 1 can now be created in | 381 | "parent/subgroupN". Items of type 1 can now be created in |
382 | "parent/subgroup1", and items of type N can be created in | 382 | "parent/subgroup1", and items of type N can be created in |
383 | "parent/subgroupN". | 383 | "parent/subgroupN". |
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index dac45c92d872..0f33c77bc14b 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -1,6 +1,6 @@ | |||
1 | Changes since 2.5.0: | 1 | Changes since 2.5.0: |
2 | 2 | ||
3 | --- | 3 | --- |
4 | [recommended] | 4 | [recommended] |
5 | 5 | ||
6 | New helpers: sb_bread(), sb_getblk(), sb_find_get_block(), set_bh(), | 6 | New helpers: sb_bread(), sb_getblk(), sb_find_get_block(), set_bh(), |
@@ -10,7 +10,7 @@ Use them. | |||
10 | 10 | ||
11 | (sb_find_get_block() replaces 2.4's get_hash_table()) | 11 | (sb_find_get_block() replaces 2.4's get_hash_table()) |
12 | 12 | ||
13 | --- | 13 | --- |
14 | [recommended] | 14 | [recommended] |
15 | 15 | ||
16 | New methods: ->alloc_inode() and ->destroy_inode(). | 16 | New methods: ->alloc_inode() and ->destroy_inode(). |
@@ -28,7 +28,7 @@ Declare | |||
28 | 28 | ||
29 | Use FOO_I(inode) instead of &inode->u.foo_inode_i; | 29 | Use FOO_I(inode) instead of &inode->u.foo_inode_i; |
30 | 30 | ||
31 | Add foo_alloc_inode() and foo_destory_inode() - the former should allocate | 31 | Add foo_alloc_inode() and foo_destroy_inode() - the former should allocate |
32 | foo_inode_info and return the address of ->vfs_inode, the latter should free | 32 | foo_inode_info and return the address of ->vfs_inode, the latter should free |
33 | FOO_I(inode) (see in-tree filesystems for examples). | 33 | FOO_I(inode) (see in-tree filesystems for examples). |
34 | 34 | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 194c8f351320..5681e2fa1496 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -216,6 +216,7 @@ Table 1-3: Contents of the stat files (as of 2.6.22-rc3) | |||
216 | priority priority level | 216 | priority priority level |
217 | nice nice level | 217 | nice nice level |
218 | num_threads number of threads | 218 | num_threads number of threads |
219 | it_real_value (obsolete, always 0) | ||
219 | start_time time the process started after system boot | 220 | start_time time the process started after system boot |
220 | vsize virtual memory size | 221 | vsize virtual memory size |
221 | rss resident set memory size | 222 | rss resident set memory size |
@@ -1028,6 +1029,14 @@ nr_inodes | |||
1028 | Denotes the number of inodes the system has allocated. This number will | 1029 | Denotes the number of inodes the system has allocated. This number will |
1029 | grow and shrink dynamically. | 1030 | grow and shrink dynamically. |
1030 | 1031 | ||
1032 | nr_open | ||
1033 | ------- | ||
1034 | |||
1035 | Denotes the maximum number of file-handles a process can | ||
1036 | allocate. Default value is 1024*1024 (1048576) which should be | ||
1037 | enough for most machines. Actual limit depends on RLIMIT_NOFILE | ||
1038 | resource limit. | ||
1039 | |||
1031 | nr_free_inodes | 1040 | nr_free_inodes |
1032 | -------------- | 1041 | -------------- |
1033 | 1042 | ||
@@ -1314,13 +1323,28 @@ for writeout by the pdflush daemons. It is expressed in 100'ths of a second. | |||
1314 | Data which has been dirty in-memory for longer than this interval will be | 1323 | Data which has been dirty in-memory for longer than this interval will be |
1315 | written out next time a pdflush daemon wakes up. | 1324 | written out next time a pdflush daemon wakes up. |
1316 | 1325 | ||
1326 | highmem_is_dirtyable | ||
1327 | -------------------- | ||
1328 | |||
1329 | Only present if CONFIG_HIGHMEM is set. | ||
1330 | |||
1331 | This defaults to 0 (false), meaning that the ratios set above are calculated | ||
1332 | as a percentage of lowmem only. This protects against excessive scanning | ||
1333 | in page reclaim, swapping and general VM distress. | ||
1334 | |||
1335 | Setting this to 1 can be useful on 32 bit machines where you want to make | ||
1336 | random changes within an MMAPed file that is larger than your available | ||
1337 | lowmem without causing large quantities of random IO. Is is safe if the | ||
1338 | behavior of all programs running on the machine is known and memory will | ||
1339 | not be otherwise stressed. | ||
1340 | |||
1317 | legacy_va_layout | 1341 | legacy_va_layout |
1318 | ---------------- | 1342 | ---------------- |
1319 | 1343 | ||
1320 | If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel | 1344 | If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel |
1321 | will use the legacy (2.4) layout for all processes. | 1345 | will use the legacy (2.4) layout for all processes. |
1322 | 1346 | ||
1323 | lower_zone_protection | 1347 | lowmem_reserve_ratio |
1324 | --------------------- | 1348 | --------------------- |
1325 | 1349 | ||
1326 | For some specialised workloads on highmem machines it is dangerous for | 1350 | For some specialised workloads on highmem machines it is dangerous for |
@@ -1340,25 +1364,71 @@ captured into pinned user memory. | |||
1340 | mechanism will also defend that region from allocations which could use | 1364 | mechanism will also defend that region from allocations which could use |
1341 | highmem or lowmem). | 1365 | highmem or lowmem). |
1342 | 1366 | ||
1343 | The `lower_zone_protection' tunable determines how aggressive the kernel is | 1367 | The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is |
1344 | in defending these lower zones. The default value is zero - no | 1368 | in defending these lower zones. |
1345 | protection at all. | ||
1346 | 1369 | ||
1347 | If you have a machine which uses highmem or ISA DMA and your | 1370 | If you have a machine which uses highmem or ISA DMA and your |
1348 | applications are using mlock(), or if you are running with no swap then | 1371 | applications are using mlock(), or if you are running with no swap then |
1349 | you probably should increase the lower_zone_protection setting. | 1372 | you probably should change the lowmem_reserve_ratio setting. |
1350 | 1373 | ||
1351 | The units of this tunable are fairly vague. It is approximately equal | 1374 | The lowmem_reserve_ratio is an array. You can see them by reading this file. |
1352 | to "megabytes," so setting lower_zone_protection=100 will protect around 100 | 1375 | - |
1353 | megabytes of the lowmem zone from user allocations. It will also make | 1376 | % cat /proc/sys/vm/lowmem_reserve_ratio |
1354 | those 100 megabytes unavailable for use by applications and by | 1377 | 256 256 32 |
1355 | pagecache, so there is a cost. | 1378 | - |
1356 | 1379 | Note: # of this elements is one fewer than number of zones. Because the highest | |
1357 | The effects of this tunable may be observed by monitoring | 1380 | zone's value is not necessary for following calculation. |
1358 | /proc/meminfo:LowFree. Write a single huge file and observe the point | 1381 | |
1359 | at which LowFree ceases to fall. | 1382 | But, these values are not used directly. The kernel calculates # of protection |
1360 | 1383 | pages for each zones from them. These are shown as array of protection pages | |
1361 | A reasonable value for lower_zone_protection is 100. | 1384 | in /proc/zoneinfo like followings. (This is an example of x86-64 box). |
1385 | Each zone has an array of protection pages like this. | ||
1386 | |||
1387 | - | ||
1388 | Node 0, zone DMA | ||
1389 | pages free 1355 | ||
1390 | min 3 | ||
1391 | low 3 | ||
1392 | high 4 | ||
1393 | : | ||
1394 | : | ||
1395 | numa_other 0 | ||
1396 | protection: (0, 2004, 2004, 2004) | ||
1397 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
1398 | pagesets | ||
1399 | cpu: 0 pcp: 0 | ||
1400 | : | ||
1401 | - | ||
1402 | These protections are added to score to judge whether this zone should be used | ||
1403 | for page allocation or should be reclaimed. | ||
1404 | |||
1405 | In this example, if normal pages (index=2) are required to this DMA zone and | ||
1406 | pages_high is used for watermark, the kernel judges this zone should not be | ||
1407 | used because pages_free(1355) is smaller than watermark + protection[2] | ||
1408 | (4 + 2004 = 2008). If this protection value is 0, this zone would be used for | ||
1409 | normal page requirement. If requirement is DMA zone(index=0), protection[0] | ||
1410 | (=0) is used. | ||
1411 | |||
1412 | zone[i]'s protection[j] is calculated by following exprssion. | ||
1413 | |||
1414 | (i < j): | ||
1415 | zone[i]->protection[j] | ||
1416 | = (total sums of present_pages from zone[i+1] to zone[j] on the node) | ||
1417 | / lowmem_reserve_ratio[i]; | ||
1418 | (i = j): | ||
1419 | (should not be protected. = 0; | ||
1420 | (i > j): | ||
1421 | (not necessary, but looks 0) | ||
1422 | |||
1423 | The default values of lowmem_reserve_ratio[i] are | ||
1424 | 256 (if zone[i] means DMA or DMA32 zone) | ||
1425 | 32 (others). | ||
1426 | As above expression, they are reciprocal number of ratio. | ||
1427 | 256 means 1/256. # of protection pages becomes about "0.39%" of total present | ||
1428 | pages of higher zones on the node. | ||
1429 | |||
1430 | If you would like to protect more pages, smaller values are effective. | ||
1431 | The minimum value is 1 (1/1 -> 100%). | ||
1362 | 1432 | ||
1363 | page-cluster | 1433 | page-cluster |
1364 | ------------ | 1434 | ------------ |
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt index 339c6a4f220e..7be232b44ee4 100644 --- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt +++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt | |||
@@ -118,7 +118,7 @@ All this differs from the old initrd in several ways: | |||
118 | with the new root (cd /newmount; mount --move . /; chroot .), attach | 118 | with the new root (cd /newmount; mount --move . /; chroot .), attach |
119 | stdin/stdout/stderr to the new /dev/console, and exec the new init. | 119 | stdin/stdout/stderr to the new /dev/console, and exec the new init. |
120 | 120 | ||
121 | Since this is a remarkably persnickity process (and involves deleting | 121 | Since this is a remarkably persnickety process (and involves deleting |
122 | commands before you can run them), the klibc package introduced a helper | 122 | commands before you can run them), the klibc package introduced a helper |
123 | program (utils/run_init.c) to do all this for you. Most other packages | 123 | program (utils/run_init.c) to do all this for you. Most other packages |
124 | (such as busybox) have named this command "switch_root". | 124 | (such as busybox) have named this command "switch_root". |
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt index 18d23f9a18c7..094f2d2f38b1 100644 --- a/Documentation/filesystems/relay.txt +++ b/Documentation/filesystems/relay.txt | |||
@@ -140,7 +140,7 @@ close() decrements the channel buffer's refcount. When the refcount | |||
140 | In order for a user application to make use of relay files, the | 140 | In order for a user application to make use of relay files, the |
141 | host filesystem must be mounted. For example, | 141 | host filesystem must be mounted. For example, |
142 | 142 | ||
143 | mount -t debugfs debugfs /debug | 143 | mount -t debugfs debugfs /sys/kernel/debug |
144 | 144 | ||
145 | NOTE: the host filesystem doesn't need to be mounted for kernel | 145 | NOTE: the host filesystem doesn't need to be mounted for kernel |
146 | clients to create or use channels - it only needs to be | 146 | clients to create or use channels - it only needs to be |
diff --git a/Documentation/fujitsu/frv/README.txt b/Documentation/frv/README.txt index a984faa968e8..a984faa968e8 100644 --- a/Documentation/fujitsu/frv/README.txt +++ b/Documentation/frv/README.txt | |||
diff --git a/Documentation/fujitsu/frv/atomic-ops.txt b/Documentation/frv/atomic-ops.txt index 96638e9b9fe0..96638e9b9fe0 100644 --- a/Documentation/fujitsu/frv/atomic-ops.txt +++ b/Documentation/frv/atomic-ops.txt | |||
diff --git a/Documentation/fujitsu/frv/booting.txt b/Documentation/frv/booting.txt index 4e229056ef22..ace200b7c214 100644 --- a/Documentation/fujitsu/frv/booting.txt +++ b/Documentation/frv/booting.txt | |||
@@ -177,5 +177,5 @@ separated by spaces: | |||
177 | (*) vdc=... | 177 | (*) vdc=... |
178 | 178 | ||
179 | This option configures the MB93493 companion chip visual display | 179 | This option configures the MB93493 companion chip visual display |
180 | driver. Please see Documentation/fujitsu/mb93493/vdc.txt for more | 180 | driver. Please see Documentation/frv/mb93493/vdc.txt for more |
181 | information. | 181 | information. |
diff --git a/Documentation/fujitsu/frv/clock.txt b/Documentation/frv/clock.txt index c72d350e177a..c72d350e177a 100644 --- a/Documentation/fujitsu/frv/clock.txt +++ b/Documentation/frv/clock.txt | |||
diff --git a/Documentation/fujitsu/frv/configuring.txt b/Documentation/frv/configuring.txt index 36e76a2336fa..36e76a2336fa 100644 --- a/Documentation/fujitsu/frv/configuring.txt +++ b/Documentation/frv/configuring.txt | |||
diff --git a/Documentation/fujitsu/frv/features.txt b/Documentation/frv/features.txt index fa20c0e72833..fa20c0e72833 100644 --- a/Documentation/fujitsu/frv/features.txt +++ b/Documentation/frv/features.txt | |||
diff --git a/Documentation/fujitsu/frv/gdbinit b/Documentation/frv/gdbinit index 51517b6f307f..51517b6f307f 100644 --- a/Documentation/fujitsu/frv/gdbinit +++ b/Documentation/frv/gdbinit | |||
diff --git a/Documentation/fujitsu/frv/gdbstub.txt b/Documentation/frv/gdbstub.txt index b92bfd902a4e..b92bfd902a4e 100644 --- a/Documentation/fujitsu/frv/gdbstub.txt +++ b/Documentation/frv/gdbstub.txt | |||
diff --git a/Documentation/fujitsu/frv/kernel-ABI.txt b/Documentation/frv/kernel-ABI.txt index aaa1cec86f0b..aaa1cec86f0b 100644 --- a/Documentation/fujitsu/frv/kernel-ABI.txt +++ b/Documentation/frv/kernel-ABI.txt | |||
diff --git a/Documentation/fujitsu/frv/mmu-layout.txt b/Documentation/frv/mmu-layout.txt index db10250df6be..db10250df6be 100644 --- a/Documentation/fujitsu/frv/mmu-layout.txt +++ b/Documentation/frv/mmu-layout.txt | |||
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 6bc2ba215df9..8da724e2a0ff 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt | |||
@@ -32,7 +32,7 @@ The exact capabilities of GPIOs vary between systems. Common options: | |||
32 | - Input values are likewise readable (1, 0). Some chips support readback | 32 | - Input values are likewise readable (1, 0). Some chips support readback |
33 | of pins configured as "output", which is very useful in such "wire-OR" | 33 | of pins configured as "output", which is very useful in such "wire-OR" |
34 | cases (to support bidirectional signaling). GPIO controllers may have | 34 | cases (to support bidirectional signaling). GPIO controllers may have |
35 | input de-glitch logic, sometimes with software controls. | 35 | input de-glitch/debounce logic, sometimes with software controls. |
36 | 36 | ||
37 | - Inputs can often be used as IRQ signals, often edge triggered but | 37 | - Inputs can often be used as IRQ signals, often edge triggered but |
38 | sometimes level triggered. Such IRQs may be configurable as system | 38 | sometimes level triggered. Such IRQs may be configurable as system |
@@ -60,10 +60,13 @@ used on a board that's wired differently. Only least-common-denominator | |||
60 | functionality can be very portable. Other features are platform-specific, | 60 | functionality can be very portable. Other features are platform-specific, |
61 | and that can be critical for glue logic. | 61 | and that can be critical for glue logic. |
62 | 62 | ||
63 | Plus, this doesn't define an implementation framework, just an interface. | 63 | Plus, this doesn't require any implementation framework, just an interface. |
64 | One platform might implement it as simple inline functions accessing chip | 64 | One platform might implement it as simple inline functions accessing chip |
65 | registers; another might implement it by delegating through abstractions | 65 | registers; another might implement it by delegating through abstractions |
66 | used for several very different kinds of GPIO controller. | 66 | used for several very different kinds of GPIO controller. (There is some |
67 | optional code supporting such an implementation strategy, described later | ||
68 | in this document, but drivers acting as clients to the GPIO interface must | ||
69 | not care how it's implemented.) | ||
67 | 70 | ||
68 | That said, if the convention is supported on their platform, drivers should | 71 | That said, if the convention is supported on their platform, drivers should |
69 | use it when possible. Platforms should declare GENERIC_GPIO support in | 72 | use it when possible. Platforms should declare GENERIC_GPIO support in |
@@ -121,6 +124,11 @@ before tasking is enabled, as part of early board setup. | |||
121 | For output GPIOs, the value provided becomes the initial output value. | 124 | For output GPIOs, the value provided becomes the initial output value. |
122 | This helps avoid signal glitching during system startup. | 125 | This helps avoid signal glitching during system startup. |
123 | 126 | ||
127 | For compatibility with legacy interfaces to GPIOs, setting the direction | ||
128 | of a GPIO implicitly requests that GPIO (see below) if it has not been | ||
129 | requested already. That compatibility may be removed in the future; | ||
130 | explicitly requesting GPIOs is strongly preferred. | ||
131 | |||
124 | Setting the direction can fail if the GPIO number is invalid, or when | 132 | Setting the direction can fail if the GPIO number is invalid, or when |
125 | that particular GPIO can't be used in that mode. It's generally a bad | 133 | that particular GPIO can't be used in that mode. It's generally a bad |
126 | idea to rely on boot firmware to have set the direction correctly, since | 134 | idea to rely on boot firmware to have set the direction correctly, since |
@@ -133,6 +141,7 @@ Spinlock-Safe GPIO access | |||
133 | ------------------------- | 141 | ------------------------- |
134 | Most GPIO controllers can be accessed with memory read/write instructions. | 142 | Most GPIO controllers can be accessed with memory read/write instructions. |
135 | That doesn't need to sleep, and can safely be done from inside IRQ handlers. | 143 | That doesn't need to sleep, and can safely be done from inside IRQ handlers. |
144 | (That includes hardirq contexts on RT kernels.) | ||
136 | 145 | ||
137 | Use these calls to access such GPIOs: | 146 | Use these calls to access such GPIOs: |
138 | 147 | ||
@@ -145,7 +154,7 @@ Use these calls to access such GPIOs: | |||
145 | The values are boolean, zero for low, nonzero for high. When reading the | 154 | The values are boolean, zero for low, nonzero for high. When reading the |
146 | value of an output pin, the value returned should be what's seen on the | 155 | value of an output pin, the value returned should be what's seen on the |
147 | pin ... that won't always match the specified output value, because of | 156 | pin ... that won't always match the specified output value, because of |
148 | issues including wire-OR and output latencies. | 157 | issues including open-drain signaling and output latencies. |
149 | 158 | ||
150 | The get/set calls have no error returns because "invalid GPIO" should have | 159 | The get/set calls have no error returns because "invalid GPIO" should have |
151 | been reported earlier from gpio_direction_*(). However, note that not all | 160 | been reported earlier from gpio_direction_*(). However, note that not all |
@@ -170,7 +179,8 @@ get to the head of a queue to transmit a command and get its response. | |||
170 | This requires sleeping, which can't be done from inside IRQ handlers. | 179 | This requires sleeping, which can't be done from inside IRQ handlers. |
171 | 180 | ||
172 | Platforms that support this type of GPIO distinguish them from other GPIOs | 181 | Platforms that support this type of GPIO distinguish them from other GPIOs |
173 | by returning nonzero from this call: | 182 | by returning nonzero from this call (which requires a valid GPIO number, |
183 | either explicitly or implicitly requested): | ||
174 | 184 | ||
175 | int gpio_cansleep(unsigned gpio); | 185 | int gpio_cansleep(unsigned gpio); |
176 | 186 | ||
@@ -209,8 +219,11 @@ before tasking is enabled, as part of early board setup. | |||
209 | These calls serve two basic purposes. One is marking the signals which | 219 | These calls serve two basic purposes. One is marking the signals which |
210 | are actually in use as GPIOs, for better diagnostics; systems may have | 220 | are actually in use as GPIOs, for better diagnostics; systems may have |
211 | several hundred potential GPIOs, but often only a dozen are used on any | 221 | several hundred potential GPIOs, but often only a dozen are used on any |
212 | given board. Another is to catch conflicts between drivers, reporting | 222 | given board. Another is to catch conflicts, identifying errors when |
213 | errors when drivers wrongly think they have exclusive use of that signal. | 223 | (a) two or more drivers wrongly think they have exclusive use of that |
224 | signal, or (b) something wrongly believes it's safe to remove drivers | ||
225 | needed to manage a signal that's in active use. That is, requesting a | ||
226 | GPIO can serve as a kind of lock. | ||
214 | 227 | ||
215 | These two calls are optional because not not all current Linux platforms | 228 | These two calls are optional because not not all current Linux platforms |
216 | offer such functionality in their GPIO support; a valid implementation | 229 | offer such functionality in their GPIO support; a valid implementation |
@@ -223,6 +236,9 @@ Note that requesting a GPIO does NOT cause it to be configured in any | |||
223 | way; it just marks that GPIO as in use. Separate code must handle any | 236 | way; it just marks that GPIO as in use. Separate code must handle any |
224 | pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown). | 237 | pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown). |
225 | 238 | ||
239 | Also note that it's your responsibility to have stopped using a GPIO | ||
240 | before you free it. | ||
241 | |||
226 | 242 | ||
227 | GPIOs mapped to IRQs | 243 | GPIOs mapped to IRQs |
228 | -------------------- | 244 | -------------------- |
@@ -238,7 +254,7 @@ map between them using calls like: | |||
238 | 254 | ||
239 | Those return either the corresponding number in the other namespace, or | 255 | Those return either the corresponding number in the other namespace, or |
240 | else a negative errno code if the mapping can't be done. (For example, | 256 | else a negative errno code if the mapping can't be done. (For example, |
241 | some GPIOs can't used as IRQs.) It is an unchecked error to use a GPIO | 257 | some GPIOs can't be used as IRQs.) It is an unchecked error to use a GPIO |
242 | number that wasn't set up as an input using gpio_direction_input(), or | 258 | number that wasn't set up as an input using gpio_direction_input(), or |
243 | to use an IRQ number that didn't originally come from gpio_to_irq(). | 259 | to use an IRQ number that didn't originally come from gpio_to_irq(). |
244 | 260 | ||
@@ -299,17 +315,110 @@ Related to multiplexing is configuration and enabling of the pullups or | |||
299 | pulldowns integrated on some platforms. Not all platforms support them, | 315 | pulldowns integrated on some platforms. Not all platforms support them, |
300 | or support them in the same way; and any given board might use external | 316 | or support them in the same way; and any given board might use external |
301 | pullups (or pulldowns) so that the on-chip ones should not be used. | 317 | pullups (or pulldowns) so that the on-chip ones should not be used. |
318 | (When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) | ||
302 | 319 | ||
303 | There are other system-specific mechanisms that are not specified here, | 320 | There are other system-specific mechanisms that are not specified here, |
304 | like the aforementioned options for input de-glitching and wire-OR output. | 321 | like the aforementioned options for input de-glitching and wire-OR output. |
305 | Hardware may support reading or writing GPIOs in gangs, but that's usually | 322 | Hardware may support reading or writing GPIOs in gangs, but that's usually |
306 | configuration dependent: for GPIOs sharing the same bank. (GPIOs are | 323 | configuration dependent: for GPIOs sharing the same bank. (GPIOs are |
307 | commonly grouped in banks of 16 or 32, with a given SOC having several such | 324 | commonly grouped in banks of 16 or 32, with a given SOC having several such |
308 | banks.) Some systems can trigger IRQs from output GPIOs. Code relying on | 325 | banks.) Some systems can trigger IRQs from output GPIOs, or read values |
309 | such mechanisms will necessarily be nonportable. | 326 | from pins not managed as GPIOs. Code relying on such mechanisms will |
327 | necessarily be nonportable. | ||
310 | 328 | ||
311 | Dynamic definition of GPIOs is not currently supported; for example, as | 329 | Dynamic definition of GPIOs is not currently standard; for example, as |
312 | a side effect of configuring an add-on board with some GPIO expanders. | 330 | a side effect of configuring an add-on board with some GPIO expanders. |
313 | 331 | ||
314 | These calls are purely for kernel space, but a userspace API could be built | 332 | These calls are purely for kernel space, but a userspace API could be built |
315 | on top of it. | 333 | on top of them. |
334 | |||
335 | |||
336 | GPIO implementor's framework (OPTIONAL) | ||
337 | ======================================= | ||
338 | As noted earlier, there is an optional implementation framework making it | ||
339 | easier for platforms to support different kinds of GPIO controller using | ||
340 | the same programming interface. | ||
341 | |||
342 | As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file | ||
343 | will be found there. That will list all the controllers registered through | ||
344 | this framework, and the state of the GPIOs currently in use. | ||
345 | |||
346 | |||
347 | Controller Drivers: gpio_chip | ||
348 | ----------------------------- | ||
349 | In this framework each GPIO controller is packaged as a "struct gpio_chip" | ||
350 | with information common to each controller of that type: | ||
351 | |||
352 | - methods to establish GPIO direction | ||
353 | - methods used to access GPIO values | ||
354 | - flag saying whether calls to its methods may sleep | ||
355 | - optional debugfs dump method (showing extra state like pullup config) | ||
356 | - label for diagnostics | ||
357 | |||
358 | There is also per-instance data, which may come from device.platform_data: | ||
359 | the number of its first GPIO, and how many GPIOs it exposes. | ||
360 | |||
361 | The code implementing a gpio_chip should support multiple instances of the | ||
362 | controller, possibly using the driver model. That code will configure each | ||
363 | gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be | ||
364 | rare; use gpiochip_remove() when it is unavoidable. | ||
365 | |||
366 | Most often a gpio_chip is part of an instance-specific structure with state | ||
367 | not exposed by the GPIO interfaces, such as addressing, power management, | ||
368 | and more. Chips such as codecs will have complex non-GPIO state, | ||
369 | |||
370 | Any debugfs dump method should normally ignore signals which haven't been | ||
371 | requested as GPIOs. They can use gpiochip_is_requested(), which returns | ||
372 | either NULL or the label associated with that GPIO when it was requested. | ||
373 | |||
374 | |||
375 | Platform Support | ||
376 | ---------------- | ||
377 | To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB" | ||
378 | and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines | ||
379 | three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). | ||
380 | They may also want to provide a custom value for ARCH_NR_GPIOS. | ||
381 | |||
382 | Trivial implementations of those functions can directly use framework | ||
383 | code, which always dispatches through the gpio_chip: | ||
384 | |||
385 | #define gpio_get_value __gpio_get_value | ||
386 | #define gpio_set_value __gpio_set_value | ||
387 | #define gpio_cansleep __gpio_cansleep | ||
388 | |||
389 | Fancier implementations could instead define those as inline functions with | ||
390 | logic optimizing access to specific SOC-based GPIOs. For example, if the | ||
391 | referenced GPIO is the constant "12", getting or setting its value could | ||
392 | cost as little as two or three instructions, never sleeping. When such an | ||
393 | optimization is not possible those calls must delegate to the framework | ||
394 | code, costing at least a few dozen instructions. For bitbanged I/O, such | ||
395 | instruction savings can be significant. | ||
396 | |||
397 | For SOCs, platform-specific code defines and registers gpio_chip instances | ||
398 | for each bank of on-chip GPIOs. Those GPIOs should be numbered/labeled to | ||
399 | match chip vendor documentation, and directly match board schematics. They | ||
400 | may well start at zero and go up to a platform-specific limit. Such GPIOs | ||
401 | are normally integrated into platform initialization to make them always be | ||
402 | available, from arch_initcall() or earlier; they can often serve as IRQs. | ||
403 | |||
404 | |||
405 | Board Support | ||
406 | ------------- | ||
407 | For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi | ||
408 | function devices, FPGAs or CPLDs -- most often board-specific code handles | ||
409 | registering controller devices and ensures that their drivers know what GPIO | ||
410 | numbers to use with gpiochip_add(). Their numbers often start right after | ||
411 | platform-specific GPIOs. | ||
412 | |||
413 | For example, board setup code could create structures identifying the range | ||
414 | of GPIOs that chip will expose, and passes them to each GPIO expander chip | ||
415 | using platform_data. Then the chip driver's probe() routine could pass that | ||
416 | data to gpiochip_add(). | ||
417 | |||
418 | Initialization order can be important. For example, when a device relies on | ||
419 | an I2C-based GPIO, its probe() routine should only be called after that GPIO | ||
420 | becomes available. That may mean the device should not be registered until | ||
421 | calls for that GPIO can work. One way to address such dependencies is for | ||
422 | such gpio_chip controllers to provide setup() and teardown() callbacks to | ||
423 | board specific code; those board specific callbacks would register devices | ||
424 | once all the necessary resources are available. | ||
diff --git a/Documentation/i2c/chips/pca9539 b/Documentation/i2c/chips/pca9539 index c4fce6a13537..1d81c530c4a5 100644 --- a/Documentation/i2c/chips/pca9539 +++ b/Documentation/i2c/chips/pca9539 | |||
@@ -1,6 +1,9 @@ | |||
1 | Kernel driver pca9539 | 1 | Kernel driver pca9539 |
2 | ===================== | 2 | ===================== |
3 | 3 | ||
4 | NOTE: this driver is deprecated and will be dropped soon, use | ||
5 | drivers/gpio/pca9539.c instead. | ||
6 | |||
4 | Supported chips: | 7 | Supported chips: |
5 | * Philips PCA9539 | 8 | * Philips PCA9539 |
6 | Prefix: 'pca9539' | 9 | Prefix: 'pca9539' |
diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c index 773a814d4093..d23610fb2ff9 100644 --- a/Documentation/ia64/aliasing-test.c +++ b/Documentation/ia64/aliasing-test.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <fcntl.h> | 16 | #include <fcntl.h> |
17 | #include <fnmatch.h> | 17 | #include <fnmatch.h> |
18 | #include <string.h> | 18 | #include <string.h> |
19 | #include <sys/ioctl.h> | ||
19 | #include <sys/mman.h> | 20 | #include <sys/mman.h> |
20 | #include <sys/stat.h> | 21 | #include <sys/stat.h> |
21 | #include <unistd.h> | 22 | #include <unistd.h> |
@@ -65,7 +66,7 @@ int scan_tree(char *path, char *file, off_t offset, size_t length, int touch) | |||
65 | { | 66 | { |
66 | struct dirent **namelist; | 67 | struct dirent **namelist; |
67 | char *name, *path2; | 68 | char *name, *path2; |
68 | int i, n, r, rc, result = 0; | 69 | int i, n, r, rc = 0, result = 0; |
69 | struct stat buf; | 70 | struct stat buf; |
70 | 71 | ||
71 | n = scandir(path, &namelist, 0, alphasort); | 72 | n = scandir(path, &namelist, 0, alphasort); |
@@ -113,7 +114,7 @@ skip: | |||
113 | free(namelist[i]); | 114 | free(namelist[i]); |
114 | } | 115 | } |
115 | free(namelist); | 116 | free(namelist); |
116 | return rc; | 117 | return result; |
117 | } | 118 | } |
118 | 119 | ||
119 | char buf[1024]; | 120 | char buf[1024]; |
@@ -149,7 +150,7 @@ int scan_rom(char *path, char *file) | |||
149 | { | 150 | { |
150 | struct dirent **namelist; | 151 | struct dirent **namelist; |
151 | char *name, *path2; | 152 | char *name, *path2; |
152 | int i, n, r, rc, result = 0; | 153 | int i, n, r, rc = 0, result = 0; |
153 | struct stat buf; | 154 | struct stat buf; |
154 | 155 | ||
155 | n = scandir(path, &namelist, 0, alphasort); | 156 | n = scandir(path, &namelist, 0, alphasort); |
@@ -180,7 +181,7 @@ int scan_rom(char *path, char *file) | |||
180 | * important thing is that no MCA happened. | 181 | * important thing is that no MCA happened. |
181 | */ | 182 | */ |
182 | if (rc > 0) | 183 | if (rc > 0) |
183 | fprintf(stderr, "PASS: %s read %ld bytes\n", path2, rc); | 184 | fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc); |
184 | else { | 185 | else { |
185 | fprintf(stderr, "PASS: %s not readable\n", path2); | 186 | fprintf(stderr, "PASS: %s not readable\n", path2); |
186 | return rc; | 187 | return rc; |
@@ -201,10 +202,10 @@ skip: | |||
201 | free(namelist[i]); | 202 | free(namelist[i]); |
202 | } | 203 | } |
203 | free(namelist); | 204 | free(namelist); |
204 | return rc; | 205 | return result; |
205 | } | 206 | } |
206 | 207 | ||
207 | int main() | 208 | int main(void) |
208 | { | 209 | { |
209 | int rc; | 210 | int rc; |
210 | 211 | ||
@@ -256,4 +257,6 @@ int main() | |||
256 | scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0); | 257 | scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0); |
257 | scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1); | 258 | scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1); |
258 | scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0); | 259 | scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0); |
260 | |||
261 | return rc; | ||
259 | } | 262 | } |
diff --git a/Documentation/ide/ChangeLog.ide-tape.1995-2002 b/Documentation/ide/ChangeLog.ide-tape.1995-2002 new file mode 100644 index 000000000000..877fac8770b3 --- /dev/null +++ b/Documentation/ide/ChangeLog.ide-tape.1995-2002 | |||
@@ -0,0 +1,257 @@ | |||
1 | /* | ||
2 | * Ver 0.1 Nov 1 95 Pre-working code :-) | ||
3 | * Ver 0.2 Nov 23 95 A short backup (few megabytes) and restore procedure | ||
4 | * was successful ! (Using tar cvf ... on the block | ||
5 | * device interface). | ||
6 | * A longer backup resulted in major swapping, bad | ||
7 | * overall Linux performance and eventually failed as | ||
8 | * we received non serial read-ahead requests from the | ||
9 | * buffer cache. | ||
10 | * Ver 0.3 Nov 28 95 Long backups are now possible, thanks to the | ||
11 | * character device interface. Linux's responsiveness | ||
12 | * and performance doesn't seem to be much affected | ||
13 | * from the background backup procedure. | ||
14 | * Some general mtio.h magnetic tape operations are | ||
15 | * now supported by our character device. As a result, | ||
16 | * popular tape utilities are starting to work with | ||
17 | * ide tapes :-) | ||
18 | * The following configurations were tested: | ||
19 | * 1. An IDE ATAPI TAPE shares the same interface | ||
20 | * and irq with an IDE ATAPI CDROM. | ||
21 | * 2. An IDE ATAPI TAPE shares the same interface | ||
22 | * and irq with a normal IDE disk. | ||
23 | * Both configurations seemed to work just fine ! | ||
24 | * However, to be on the safe side, it is meanwhile | ||
25 | * recommended to give the IDE TAPE its own interface | ||
26 | * and irq. | ||
27 | * The one thing which needs to be done here is to | ||
28 | * add a "request postpone" feature to ide.c, | ||
29 | * so that we won't have to wait for the tape to finish | ||
30 | * performing a long media access (DSC) request (such | ||
31 | * as a rewind) before we can access the other device | ||
32 | * on the same interface. This effect doesn't disturb | ||
33 | * normal operation most of the time because read/write | ||
34 | * requests are relatively fast, and once we are | ||
35 | * performing one tape r/w request, a lot of requests | ||
36 | * from the other device can be queued and ide.c will | ||
37 | * service all of them after this single tape request. | ||
38 | * Ver 1.0 Dec 11 95 Integrated into Linux 1.3.46 development tree. | ||
39 | * On each read / write request, we now ask the drive | ||
40 | * if we can transfer a constant number of bytes | ||
41 | * (a parameter of the drive) only to its buffers, | ||
42 | * without causing actual media access. If we can't, | ||
43 | * we just wait until we can by polling the DSC bit. | ||
44 | * This ensures that while we are not transferring | ||
45 | * more bytes than the constant referred to above, the | ||
46 | * interrupt latency will not become too high and | ||
47 | * we won't cause an interrupt timeout, as happened | ||
48 | * occasionally in the previous version. | ||
49 | * While polling for DSC, the current request is | ||
50 | * postponed and ide.c is free to handle requests from | ||
51 | * the other device. This is handled transparently to | ||
52 | * ide.c. The hwgroup locking method which was used | ||
53 | * in the previous version was removed. | ||
54 | * Use of new general features which are provided by | ||
55 | * ide.c for use with atapi devices. | ||
56 | * (Programming done by Mark Lord) | ||
57 | * Few potential bug fixes (Again, suggested by Mark) | ||
58 | * Single character device data transfers are now | ||
59 | * not limited in size, as they were before. | ||
60 | * We are asking the tape about its recommended | ||
61 | * transfer unit and send a larger data transfer | ||
62 | * as several transfers of the above size. | ||
63 | * For best results, use an integral number of this | ||
64 | * basic unit (which is shown during driver | ||
65 | * initialization). I will soon add an ioctl to get | ||
66 | * this important parameter. | ||
67 | * Our data transfer buffer is allocated on startup, | ||
68 | * rather than before each data transfer. This should | ||
69 | * ensure that we will indeed have a data buffer. | ||
70 | * Ver 1.1 Dec 14 95 Fixed random problems which occurred when the tape | ||
71 | * shared an interface with another device. | ||
72 | * (poll_for_dsc was a complete mess). | ||
73 | * Removed some old (non-active) code which had | ||
74 | * to do with supporting buffer cache originated | ||
75 | * requests. | ||
76 | * The block device interface can now be opened, so | ||
77 | * that general ide driver features like the unmask | ||
78 | * interrupts flag can be selected with an ioctl. | ||
79 | * This is the only use of the block device interface. | ||
80 | * New fast pipelined operation mode (currently only on | ||
81 | * writes). When using the pipelined mode, the | ||
82 | * throughput can potentially reach the maximum | ||
83 | * tape supported throughput, regardless of the | ||
84 | * user backup program. On my tape drive, it sometimes | ||
85 | * boosted performance by a factor of 2. Pipelined | ||
86 | * mode is enabled by default, but since it has a few | ||
87 | * downfalls as well, you may want to disable it. | ||
88 | * A short explanation of the pipelined operation mode | ||
89 | * is available below. | ||
90 | * Ver 1.2 Jan 1 96 Eliminated pipelined mode race condition. | ||
91 | * Added pipeline read mode. As a result, restores | ||
92 | * are now as fast as backups. | ||
93 | * Optimized shared interface behavior. The new behavior | ||
94 | * typically results in better IDE bus efficiency and | ||
95 | * higher tape throughput. | ||
96 | * Pre-calculation of the expected read/write request | ||
97 | * service time, based on the tape's parameters. In | ||
98 | * the pipelined operation mode, this allows us to | ||
99 | * adjust our polling frequency to a much lower value, | ||
100 | * and thus to dramatically reduce our load on Linux, | ||
101 | * without any decrease in performance. | ||
102 | * Implemented additional mtio.h operations. | ||
103 | * The recommended user block size is returned by | ||
104 | * the MTIOCGET ioctl. | ||
105 | * Additional minor changes. | ||
106 | * Ver 1.3 Feb 9 96 Fixed pipelined read mode bug which prevented the | ||
107 | * use of some block sizes during a restore procedure. | ||
108 | * The character device interface will now present a | ||
109 | * continuous view of the media - any mix of block sizes | ||
110 | * during a backup/restore procedure is supported. The | ||
111 | * driver will buffer the requests internally and | ||
112 | * convert them to the tape's recommended transfer | ||
113 | * unit, making performance almost independent of the | ||
114 | * chosen user block size. | ||
115 | * Some improvements in error recovery. | ||
116 | * By cooperating with ide-dma.c, bus mastering DMA can | ||
117 | * now sometimes be used with IDE tape drives as well. | ||
118 | * Bus mastering DMA has the potential to dramatically | ||
119 | * reduce the CPU's overhead when accessing the device, | ||
120 | * and can be enabled by using hdparm -d1 on the tape's | ||
121 | * block device interface. For more info, read the | ||
122 | * comments in ide-dma.c. | ||
123 | * Ver 1.4 Mar 13 96 Fixed serialize support. | ||
124 | * Ver 1.5 Apr 12 96 Fixed shared interface operation, broken in 1.3.85. | ||
125 | * Fixed pipelined read mode inefficiency. | ||
126 | * Fixed nasty null dereferencing bug. | ||
127 | * Ver 1.6 Aug 16 96 Fixed FPU usage in the driver. | ||
128 | * Fixed end of media bug. | ||
129 | * Ver 1.7 Sep 10 96 Minor changes for the CONNER CTT8000-A model. | ||
130 | * Ver 1.8 Sep 26 96 Attempt to find a better balance between good | ||
131 | * interactive response and high system throughput. | ||
132 | * Ver 1.9 Nov 5 96 Automatically cross encountered filemarks rather | ||
133 | * than requiring an explicit FSF command. | ||
134 | * Abort pending requests at end of media. | ||
135 | * MTTELL was sometimes returning incorrect results. | ||
136 | * Return the real block size in the MTIOCGET ioctl. | ||
137 | * Some error recovery bug fixes. | ||
138 | * Ver 1.10 Nov 5 96 Major reorganization. | ||
139 | * Reduced CPU overhead a bit by eliminating internal | ||
140 | * bounce buffers. | ||
141 | * Added module support. | ||
142 | * Added multiple tape drives support. | ||
143 | * Added partition support. | ||
144 | * Rewrote DSC handling. | ||
145 | * Some portability fixes. | ||
146 | * Removed ide-tape.h. | ||
147 | * Additional minor changes. | ||
148 | * Ver 1.11 Dec 2 96 Bug fix in previous DSC timeout handling. | ||
149 | * Use ide_stall_queue() for DSC overlap. | ||
150 | * Use the maximum speed rather than the current speed | ||
151 | * to compute the request service time. | ||
152 | * Ver 1.12 Dec 7 97 Fix random memory overwriting and/or last block data | ||
153 | * corruption, which could occur if the total number | ||
154 | * of bytes written to the tape was not an integral | ||
155 | * number of tape blocks. | ||
156 | * Add support for INTERRUPT DRQ devices. | ||
157 | * Ver 1.13 Jan 2 98 Add "speed == 0" work-around for HP COLORADO 5GB | ||
158 | * Ver 1.14 Dec 30 98 Partial fixes for the Sony/AIWA tape drives. | ||
159 | * Replace cli()/sti() with hwgroup spinlocks. | ||
160 | * Ver 1.15 Mar 25 99 Fix SMP race condition by replacing hwgroup | ||
161 | * spinlock with private per-tape spinlock. | ||
162 | * Ver 1.16 Sep 1 99 Add OnStream tape support. | ||
163 | * Abort read pipeline on EOD. | ||
164 | * Wait for the tape to become ready in case it returns | ||
165 | * "in the process of becoming ready" on open(). | ||
166 | * Fix zero padding of the last written block in | ||
167 | * case the tape block size is larger than PAGE_SIZE. | ||
168 | * Decrease the default disconnection time to tn. | ||
169 | * Ver 1.16e Oct 3 99 Minor fixes. | ||
170 | * Ver 1.16e1 Oct 13 99 Patches by Arnold Niessen, | ||
171 | * niessen@iae.nl / arnold.niessen@philips.com | ||
172 | * GO-1) Undefined code in idetape_read_position | ||
173 | * according to Gadi's email | ||
174 | * AJN-1) Minor fix asc == 11 should be asc == 0x11 | ||
175 | * in idetape_issue_packet_command (did effect | ||
176 | * debugging output only) | ||
177 | * AJN-2) Added more debugging output, and | ||
178 | * added ide-tape: where missing. I would also | ||
179 | * like to add tape->name where possible | ||
180 | * AJN-3) Added different debug_level's | ||
181 | * via /proc/ide/hdc/settings | ||
182 | * "debug_level" determines amount of debugging output; | ||
183 | * can be changed using /proc/ide/hdx/settings | ||
184 | * 0 : almost no debugging output | ||
185 | * 1 : 0+output errors only | ||
186 | * 2 : 1+output all sensekey/asc | ||
187 | * 3 : 2+follow all chrdev related procedures | ||
188 | * 4 : 3+follow all procedures | ||
189 | * 5 : 4+include pc_stack rq_stack info | ||
190 | * 6 : 5+USE_COUNT updates | ||
191 | * AJN-4) Fixed timeout for retension in idetape_queue_pc_tail | ||
192 | * from 5 to 10 minutes | ||
193 | * AJN-5) Changed maximum number of blocks to skip when | ||
194 | * reading tapes with multiple consecutive write | ||
195 | * errors from 100 to 1000 in idetape_get_logical_blk | ||
196 | * Proposed changes to code: | ||
197 | * 1) output "logical_blk_num" via /proc | ||
198 | * 2) output "current_operation" via /proc | ||
199 | * 3) Either solve or document the fact that `mt rewind' is | ||
200 | * required after reading from /dev/nhtx to be | ||
201 | * able to rmmod the idetape module; | ||
202 | * Also, sometimes an application finishes but the | ||
203 | * device remains `busy' for some time. Same cause ? | ||
204 | * Proposed changes to release-notes: | ||
205 | * 4) write a simple `quickstart' section in the | ||
206 | * release notes; I volunteer if you don't want to | ||
207 | * 5) include a pointer to video4linux in the doc | ||
208 | * to stimulate video applications | ||
209 | * 6) release notes lines 331 and 362: explain what happens | ||
210 | * if the application data rate is higher than 1100 KB/s; | ||
211 | * similar approach to lower-than-500 kB/s ? | ||
212 | * 7) 6.6 Comparison; wouldn't it be better to allow different | ||
213 | * strategies for read and write ? | ||
214 | * Wouldn't it be better to control the tape buffer | ||
215 | * contents instead of the bandwidth ? | ||
216 | * 8) line 536: replace will by would (if I understand | ||
217 | * this section correctly, a hypothetical and unwanted situation | ||
218 | * is being described) | ||
219 | * Ver 1.16f Dec 15 99 Change place of the secondary OnStream header frames. | ||
220 | * Ver 1.17 Nov 2000 / Jan 2001 Marcel Mol, marcel@mesa.nl | ||
221 | * - Add idetape_onstream_mode_sense_tape_parameter_page | ||
222 | * function to get tape capacity in frames: tape->capacity. | ||
223 | * - Add support for DI-50 drives( or any DI- drive). | ||
224 | * - 'workaround' for read error/blank block around block 3000. | ||
225 | * - Implement Early warning for end of media for Onstream. | ||
226 | * - Cosmetic code changes for readability. | ||
227 | * - Idetape_position_tape should not use SKIP bit during | ||
228 | * Onstream read recovery. | ||
229 | * - Add capacity, logical_blk_num and first/last_frame_position | ||
230 | * to /proc/ide/hd?/settings. | ||
231 | * - Module use count was gone in the Linux 2.4 driver. | ||
232 | * Ver 1.17a Apr 2001 Willem Riede osst@riede.org | ||
233 | * - Get drive's actual block size from mode sense block descriptor | ||
234 | * - Limit size of pipeline | ||
235 | * Ver 1.17b Oct 2002 Alan Stern <stern@rowland.harvard.edu> | ||
236 | * Changed IDETAPE_MIN_PIPELINE_STAGES to 1 and actually used | ||
237 | * it in the code! | ||
238 | * Actually removed aborted stages in idetape_abort_pipeline | ||
239 | * instead of just changing the command code. | ||
240 | * Made the transfer byte count for Request Sense equal to the | ||
241 | * actual length of the data transfer. | ||
242 | * Changed handling of partial data transfers: they do not | ||
243 | * cause DMA errors. | ||
244 | * Moved initiation of DMA transfers to the correct place. | ||
245 | * Removed reference to unallocated memory. | ||
246 | * Made __idetape_discard_read_pipeline return the number of | ||
247 | * sectors skipped, not the number of stages. | ||
248 | * Replaced errant kfree() calls with __idetape_kfree_stage(). | ||
249 | * Fixed off-by-one error in testing the pipeline length. | ||
250 | * Fixed handling of filemarks in the read pipeline. | ||
251 | * Small code optimization for MTBSF and MTBSFM ioctls. | ||
252 | * Don't try to unlock the door during device close if is | ||
253 | * already unlocked! | ||
254 | * Cosmetic fixes to miscellaneous debugging output messages. | ||
255 | * Set the minimum /proc/ide/hd?/settings values for "pipeline", | ||
256 | * "pipeline_min", and "pipeline_max" to 1. | ||
257 | */ | ||
diff --git a/Documentation/ide/ide-tape.txt b/Documentation/ide/ide-tape.txt new file mode 100644 index 000000000000..658f271a373f --- /dev/null +++ b/Documentation/ide/ide-tape.txt | |||
@@ -0,0 +1,146 @@ | |||
1 | /* | ||
2 | * IDE ATAPI streaming tape driver. | ||
3 | * | ||
4 | * This driver is a part of the Linux ide driver. | ||
5 | * | ||
6 | * The driver, in co-operation with ide.c, basically traverses the | ||
7 | * request-list for the block device interface. The character device | ||
8 | * interface, on the other hand, creates new requests, adds them | ||
9 | * to the request-list of the block device, and waits for their completion. | ||
10 | * | ||
11 | * Pipelined operation mode is now supported on both reads and writes. | ||
12 | * | ||
13 | * The block device major and minor numbers are determined from the | ||
14 | * tape's relative position in the ide interfaces, as explained in ide.c. | ||
15 | * | ||
16 | * The character device interface consists of the following devices: | ||
17 | * | ||
18 | * ht0 major 37, minor 0 first IDE tape, rewind on close. | ||
19 | * ht1 major 37, minor 1 second IDE tape, rewind on close. | ||
20 | * ... | ||
21 | * nht0 major 37, minor 128 first IDE tape, no rewind on close. | ||
22 | * nht1 major 37, minor 129 second IDE tape, no rewind on close. | ||
23 | * ... | ||
24 | * | ||
25 | * The general magnetic tape commands compatible interface, as defined by | ||
26 | * include/linux/mtio.h, is accessible through the character device. | ||
27 | * | ||
28 | * General ide driver configuration options, such as the interrupt-unmask | ||
29 | * flag, can be configured by issuing an ioctl to the block device interface, | ||
30 | * as any other ide device. | ||
31 | * | ||
32 | * Our own ide-tape ioctl's can be issued to either the block device or | ||
33 | * the character device interface. | ||
34 | * | ||
35 | * Maximal throughput with minimal bus load will usually be achieved in the | ||
36 | * following scenario: | ||
37 | * | ||
38 | * 1. ide-tape is operating in the pipelined operation mode. | ||
39 | * 2. No buffering is performed by the user backup program. | ||
40 | * | ||
41 | * Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive. | ||
42 | * | ||
43 | * Here are some words from the first releases of hd.c, which are quoted | ||
44 | * in ide.c and apply here as well: | ||
45 | * | ||
46 | * | Special care is recommended. Have Fun! | ||
47 | * | ||
48 | * | ||
49 | * An overview of the pipelined operation mode. | ||
50 | * | ||
51 | * In the pipelined write mode, we will usually just add requests to our | ||
52 | * pipeline and return immediately, before we even start to service them. The | ||
53 | * user program will then have enough time to prepare the next request while | ||
54 | * we are still busy servicing previous requests. In the pipelined read mode, | ||
55 | * the situation is similar - we add read-ahead requests into the pipeline, | ||
56 | * before the user even requested them. | ||
57 | * | ||
58 | * The pipeline can be viewed as a "safety net" which will be activated when | ||
59 | * the system load is high and prevents the user backup program from keeping up | ||
60 | * with the current tape speed. At this point, the pipeline will get | ||
61 | * shorter and shorter but the tape will still be streaming at the same speed. | ||
62 | * Assuming we have enough pipeline stages, the system load will hopefully | ||
63 | * decrease before the pipeline is completely empty, and the backup program | ||
64 | * will be able to "catch up" and refill the pipeline again. | ||
65 | * | ||
66 | * When using the pipelined mode, it would be best to disable any type of | ||
67 | * buffering done by the user program, as ide-tape already provides all the | ||
68 | * benefits in the kernel, where it can be done in a more efficient way. | ||
69 | * As we will usually not block the user program on a request, the most | ||
70 | * efficient user code will then be a simple read-write-read-... cycle. | ||
71 | * Any additional logic will usually just slow down the backup process. | ||
72 | * | ||
73 | * Using the pipelined mode, I get a constant over 400 KBps throughput, | ||
74 | * which seems to be the maximum throughput supported by my tape. | ||
75 | * | ||
76 | * However, there are some downfalls: | ||
77 | * | ||
78 | * 1. We use memory (for data buffers) in proportional to the number | ||
79 | * of pipeline stages (each stage is about 26 KB with my tape). | ||
80 | * 2. In the pipelined write mode, we cheat and postpone error codes | ||
81 | * to the user task. In read mode, the actual tape position | ||
82 | * will be a bit further than the last requested block. | ||
83 | * | ||
84 | * Concerning (1): | ||
85 | * | ||
86 | * 1. We allocate stages dynamically only when we need them. When | ||
87 | * we don't need them, we don't consume additional memory. In | ||
88 | * case we can't allocate stages, we just manage without them | ||
89 | * (at the expense of decreased throughput) so when Linux is | ||
90 | * tight in memory, we will not pose additional difficulties. | ||
91 | * | ||
92 | * 2. The maximum number of stages (which is, in fact, the maximum | ||
93 | * amount of memory) which we allocate is limited by the compile | ||
94 | * time parameter IDETAPE_MAX_PIPELINE_STAGES. | ||
95 | * | ||
96 | * 3. The maximum number of stages is a controlled parameter - We | ||
97 | * don't start from the user defined maximum number of stages | ||
98 | * but from the lower IDETAPE_MIN_PIPELINE_STAGES (again, we | ||
99 | * will not even allocate this amount of stages if the user | ||
100 | * program can't handle the speed). We then implement a feedback | ||
101 | * loop which checks if the pipeline is empty, and if it is, we | ||
102 | * increase the maximum number of stages as necessary until we | ||
103 | * reach the optimum value which just manages to keep the tape | ||
104 | * busy with minimum allocated memory or until we reach | ||
105 | * IDETAPE_MAX_PIPELINE_STAGES. | ||
106 | * | ||
107 | * Concerning (2): | ||
108 | * | ||
109 | * In pipelined write mode, ide-tape can not return accurate error codes | ||
110 | * to the user program since we usually just add the request to the | ||
111 | * pipeline without waiting for it to be serviced. In case an error | ||
112 | * occurs, I will report it on the next user request. | ||
113 | * | ||
114 | * In the pipelined read mode, subsequent read requests or forward | ||
115 | * filemark spacing will perform correctly, as we preserve all blocks | ||
116 | * and filemarks which we encountered during our excess read-ahead. | ||
117 | * | ||
118 | * For accurate tape positioning and error reporting, disabling | ||
119 | * pipelined mode might be the best option. | ||
120 | * | ||
121 | * You can enable/disable/tune the pipelined operation mode by adjusting | ||
122 | * the compile time parameters below. | ||
123 | * | ||
124 | * | ||
125 | * Possible improvements. | ||
126 | * | ||
127 | * 1. Support for the ATAPI overlap protocol. | ||
128 | * | ||
129 | * In order to maximize bus throughput, we currently use the DSC | ||
130 | * overlap method which enables ide.c to service requests from the | ||
131 | * other device while the tape is busy executing a command. The | ||
132 | * DSC overlap method involves polling the tape's status register | ||
133 | * for the DSC bit, and servicing the other device while the tape | ||
134 | * isn't ready. | ||
135 | * | ||
136 | * In the current QIC development standard (December 1995), | ||
137 | * it is recommended that new tape drives will *in addition* | ||
138 | * implement the ATAPI overlap protocol, which is used for the | ||
139 | * same purpose - efficient use of the IDE bus, but is interrupt | ||
140 | * driven and thus has much less CPU overhead. | ||
141 | * | ||
142 | * ATAPI overlap is likely to be supported in most new ATAPI | ||
143 | * devices, including new ATAPI cdroms, and thus provides us | ||
144 | * a method by which we can achieve higher throughput when | ||
145 | * sharing a (fast) ATA-2 disk with any (slow) new ATAPI device. | ||
146 | */ | ||
diff --git a/Documentation/initrd.txt b/Documentation/initrd.txt index 74f68b35f7c1..1ba84f3584e3 100644 --- a/Documentation/initrd.txt +++ b/Documentation/initrd.txt | |||
@@ -85,7 +85,7 @@ involve special block devices or loopbacks; you merely create a directory on | |||
85 | disk with the desired initrd content, cd to that directory, and run (as an | 85 | disk with the desired initrd content, cd to that directory, and run (as an |
86 | example): | 86 | example): |
87 | 87 | ||
88 | find . | cpio --quiet -c -o | gzip -9 -n > /boot/imagefile.img | 88 | find . | cpio --quiet -H newc -o | gzip -9 -n > /boot/imagefile.img |
89 | 89 | ||
90 | Examining the contents of an existing image file is just as simple: | 90 | Examining the contents of an existing image file is just as simple: |
91 | 91 | ||
diff --git a/Documentation/ja_JP/stable_kernel_rules.txt b/Documentation/ja_JP/stable_kernel_rules.txt new file mode 100644 index 000000000000..17d87519e468 --- /dev/null +++ b/Documentation/ja_JP/stable_kernel_rules.txt | |||
@@ -0,0 +1,79 @@ | |||
1 | NOTE: | ||
2 | This is Japanese translated version of "Documentation/stable_kernel_rules.txt". | ||
3 | This one is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com> | ||
4 | and JF Project team <www.linux.or.jp/JF>. | ||
5 | If you find difference with original file or problem in translation, | ||
6 | please contact maintainer of this file or JF project. | ||
7 | |||
8 | Please also note that purpose of this file is easier to read for non | ||
9 | English natives and do no intended to fork. So, if you have any | ||
10 | comment or update of this file, please try to update Original(English) | ||
11 | file at first. | ||
12 | |||
13 | ================================== | ||
14 | ããã¯ã | ||
15 | linux-2.6.24/Documentation/stable_kernel_rules.txt | ||
16 | ã®å訳ã§ãã | ||
17 | |||
18 | 翻訳å£ä½ï¼ JF ããã¸ã§ã¯ã < http://www.linux.or.jp/JF/ > | ||
19 | 翻訳æ¥ï¼ 2007/12/30 | ||
20 | 翻訳è ï¼ Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com> | ||
21 | æ ¡æ£è ï¼ æ¦äºä¼¸å ããã<takei at webmasters dot gr dot jp> | ||
22 | ããããã (Seiji Kaneko) <skaneko at a2 dot mbn dot or dot jp> | ||
23 | å°æ é å ¸ãã (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp> | ||
24 | éå£ãã (Kenji Noguchi) <tokyo246 at gmail dot com> | ||
25 | ç¥å®®ä¿¡å¤ªéãã <jin at libjingu dot jp> | ||
26 | ================================== | ||
27 | |||
28 | ãã£ã¨ç¥ãããã£ã Linux 2.6 -stable ãªãªã¼ã¹ã®å ¨ã¦ | ||
29 | |||
30 | "-stable" ããªã¼ã«ã©ã®ãããªç¨®é¡ã®ããããåãå ¥ããããããã©ã®ãã㪠| ||
31 | ãã®ãåãå ¥ããããªãããã«ã¤ãã¦ã®è¦å- | ||
32 | |||
33 | - æããã«æ£ããããã¹ãããã¦ãããã®ã§ãªããã°ãªããªãã | ||
34 | - æè(å¤æ´è¡ã®åå¾)ãå«ã㦠100 è¡ãã大ããã¦ã¯ãããªãã | ||
35 | - ãã ä¸åã®ãã¨ã ããä¿®æ£ãã¦ããã¹ãã | ||
36 | - çãæ©ã¾ãã¦ããæ¬ç©ã®ãã°ãä¿®æ£ããªããã°ãªããªãã("ããã¯ãã°ã§ | ||
37 | ãããããããªãã..." ã®ãããªãã®ã§ã¯ãªã) | ||
38 | - ãã«ãã¨ã©ã¼(CONFIG_BROKENã«ãªã£ã¦ãããã®ãé¤ã), oops, ãã³ã°ããã¼ | ||
39 | ã¿ç ´å£ãç¾å®ã®ã»ãã¥ãªãã£åé¡ããã®ä» "ãããããã¯ãã¡ã ã"ã¨ãã | ||
40 | ãããªãã®ãä¿®æ£ããªããã°ãªããªããçãè¨ãã°ãé大ãªåé¡ã | ||
41 | - ã©ã®ããã«ç«¶åç¶æ ãçºçãããã®èª¬æãä¸ç·ã«æ¸ããã¦ããªãéãã | ||
42 | "çè«çã«ã¯ç«¶åç¶æ ã«ãªã"ãããªãã®ã¯ä¸å¯ã | ||
43 | - ãããªãäºç´°ãªä¿®æ£ãå«ãããã¨ã¯ã§ããªãã(ã¹ãã«ã®ä¿®æ£ã空ç½ã®ã¯ãªã¼ | ||
44 | ã³ã¢ãããªã©) | ||
45 | - 対å¿ãããµãã·ã¹ãã ã¡ã³ãããåãå ¥ãããã®ã§ãªããã°ãªããªãã | ||
46 | - Documentation/SubmittingPatches ã®è¦åã«å¾ã£ããã®ã§ãªããã°ãªããªãã | ||
47 | |||
48 | -stable ããªã¼ã«ããããéä»ããæç¶ã- | ||
49 | |||
50 | - ä¸è¨ã®è¦åã«å¾ã£ã¦ãããã確èªããå¾ã«ãstable@kernel.org ã«ããã | ||
51 | ãéãã | ||
52 | - éä¿¡è ã¯ãããããã¥ã¼ã«åãä»ããããéã«ã¯ ACK ããå´ä¸ãããå ´å | ||
53 | ã«ã¯ NAK ãåãåãããã®åå¿ã¯éçºè ãã¡ã®ã¹ã±ã¸ã¥ã¼ã«ã«ãã£ã¦ãæ° | ||
54 | æ¥ãããå ´åãããã | ||
55 | - ããåãåããããããããã¯ä»ã®éçºè ãã¡ã®ã¬ãã¥ã¼ã®ããã« | ||
56 | -stable ãã¥ã¼ã«è¿½å ãããã | ||
57 | - ã»ãã¥ãªãã£ãããã¯ãã®ã¨ã¤ãªã¢ã¹ (stable@kernel.org) ã«éãããã¹ | ||
58 | ãã§ã¯ãªãã代ããã« security@kernel.org ã®ã¢ãã¬ã¹ã«éãããã | ||
59 | |||
60 | ã¬ãã¥ã¼ãµã¤ã¯ã«- | ||
61 | |||
62 | - -stable ã¡ã³ãããã¬ãã¥ã¼ãµã¤ã¯ã«ã決ããã¨ãããããã¯ã¬ãã¥ã¼å§ | ||
63 | å¡ä¼ã¨ããããå½±é¿ããé åã®ã¡ã³ãã(æä¾è ããã®é åã®ã¡ã³ããã§ç¡ | ||
64 | ãéã)ã«éãããlinux-kernel ã¡ã¼ãªã³ã°ãªã¹ãã«CCãããã | ||
65 | - ã¬ãã¥ã¼å§å¡ä¼ã¯ 48æéã®éã« ACK ã NAK ãåºãã | ||
66 | - ããããããå§å¡ä¼ã®ã¡ã³ãããå´ä¸ããããã¡ã³ããéãã¡ã³ããæ°ä» | ||
67 | ããªãã£ãåé¡ãæã¡ããããlinux-kernel ã¡ã³ãããããã«ç°è°ãå±ã | ||
68 | ãå ´åã«ã¯ããããã¯ãã¥ã¼ããåé¤ãããã | ||
69 | - ã¬ãã¥ã¼ãµã¤ã¯ã«ã®æå¾ã«ãACK ãåãããããã¯ææ°ã® -stable ãªãªã¼ | ||
70 | ã¹ã«è¿½å ããããã®å¾ã«æ°ãã -stable ãªãªã¼ã¹ãè¡ãããã | ||
71 | - ã»ãã¥ãªãã£ãããã¯ãé常ã®ã¬ãã¥ã¼ãµã¤ã¯ã«ãéãããã»ãã¥ãªã㣠| ||
72 | ã«ã¼ãã«ãã¼ã ããç´æ¥ -stable ããªã¼ã«åãä»ããããã | ||
73 | ãã®æç¶ãã®è©³ç´°ã«ã¤ãã¦ã¯ kernel security ãã¼ã ã«åãåããããã¨ã | ||
74 | |||
75 | ã¬ãã¥ã¼å§å¡ä¼- | ||
76 | |||
77 | - ãã®å§å¡ä¼ã¯ããã®ã¿ã¹ã¯ã«ã¤ãã¦æ´»åããå¤ãã®ãã©ã³ãã£ã¢ã¨ãå°æ°ã® | ||
78 | éãã©ã³ãã£ã¢ã®ã«ã¼ãã«éçºè éã§æ§æããã¦ããã | ||
79 | |||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 92c40d174355..8ea41b6e6a85 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -147,8 +147,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
147 | default: 0 | 147 | default: 0 |
148 | 148 | ||
149 | acpi_sleep= [HW,ACPI] Sleep options | 149 | acpi_sleep= [HW,ACPI] Sleep options |
150 | Format: { s3_bios, s3_mode } | 150 | Format: { s3_bios, s3_mode, s3_beep } |
151 | See Documentation/power/video.txt | 151 | See Documentation/power/video.txt for s3_bios and s3_mode. |
152 | s3_beep is for debugging; it makes the PC's speaker beep | ||
153 | as soon as the kernel's real-mode entry point is called. | ||
152 | 154 | ||
153 | acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode | 155 | acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode |
154 | Format: { level | edge | high | low } | 156 | Format: { level | edge | high | low } |
@@ -168,6 +170,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
168 | acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA | 170 | acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA |
169 | Format: <irq>,<irq>... | 171 | Format: <irq>,<irq>... |
170 | 172 | ||
173 | acpi_new_pts_ordering [HW,ACPI] | ||
174 | Enforce the ACPI 2.0 ordering of the _PTS control | ||
175 | method wrt putting devices into low power states | ||
176 | default: pre ACPI 2.0 ordering of _PTS | ||
177 | |||
171 | acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT | 178 | acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT |
172 | 179 | ||
173 | acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS | 180 | acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS |
@@ -544,7 +551,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
544 | 1 will print _a lot_ more information - normally | 551 | 1 will print _a lot_ more information - normally |
545 | only useful to kernel developers. | 552 | only useful to kernel developers. |
546 | 553 | ||
547 | decnet= [HW,NET] | 554 | decnet.addr= [HW,NET] |
548 | Format: <area>[,<node>] | 555 | Format: <area>[,<node>] |
549 | See also Documentation/networking/decnet.txt. | 556 | See also Documentation/networking/decnet.txt. |
550 | 557 | ||
@@ -775,6 +782,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
775 | loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same | 782 | loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same |
776 | as idle=poll. | 783 | as idle=poll. |
777 | 784 | ||
785 | ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem | ||
786 | Claim all unknown PCI IDE storage controllers. | ||
787 | |||
778 | ignore_loglevel [KNL] | 788 | ignore_loglevel [KNL] |
779 | Ignore loglevel setting - this will print /all/ | 789 | Ignore loglevel setting - this will print /all/ |
780 | kernel messages to the console. Useful for debugging. | 790 | kernel messages to the console. Useful for debugging. |
@@ -1556,14 +1566,17 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1556 | ramdisk_size= [RAM] Sizes of RAM disks in kilobytes | 1566 | ramdisk_size= [RAM] Sizes of RAM disks in kilobytes |
1557 | See Documentation/ramdisk.txt. | 1567 | See Documentation/ramdisk.txt. |
1558 | 1568 | ||
1559 | rcu.blimit= [KNL,BOOT] Set maximum number of finished | 1569 | rcupdate.blimit= [KNL,BOOT] |
1560 | RCU callbacks to process in one batch. | 1570 | Set maximum number of finished RCU callbacks to process |
1571 | in one batch. | ||
1561 | 1572 | ||
1562 | rcu.qhimark= [KNL,BOOT] Set threshold of queued | 1573 | rcupdate.qhimark= [KNL,BOOT] |
1574 | Set threshold of queued | ||
1563 | RCU callbacks over which batch limiting is disabled. | 1575 | RCU callbacks over which batch limiting is disabled. |
1564 | 1576 | ||
1565 | rcu.qlowmark= [KNL,BOOT] Set threshold of queued | 1577 | rcupdate.qlowmark= [KNL,BOOT] |
1566 | RCU callbacks below which batch limiting is re-enabled. | 1578 | Set threshold of queued RCU callbacks below which |
1579 | batch limiting is re-enabled. | ||
1567 | 1580 | ||
1568 | rdinit= [KNL] | 1581 | rdinit= [KNL] |
1569 | Format: <full_path> | 1582 | Format: <full_path> |
@@ -1883,9 +1896,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1883 | st= [HW,SCSI] SCSI tape parameters (buffers, etc.) | 1896 | st= [HW,SCSI] SCSI tape parameters (buffers, etc.) |
1884 | See Documentation/scsi/st.txt. | 1897 | See Documentation/scsi/st.txt. |
1885 | 1898 | ||
1886 | st0x= [HW,SCSI] | ||
1887 | See header of drivers/scsi/seagate.c. | ||
1888 | |||
1889 | sti= [PARISC,HW] | 1899 | sti= [PARISC,HW] |
1890 | Format: <num> | 1900 | Format: <num> |
1891 | Set the STI (builtin display/keyboard on the HP-PARISC | 1901 | Set the STI (builtin display/keyboard on the HP-PARISC |
@@ -1970,9 +1980,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1970 | tipar.delay= [HW,PPT] | 1980 | tipar.delay= [HW,PPT] |
1971 | Set inter-bit delay in microseconds (default 10). | 1981 | Set inter-bit delay in microseconds (default 10). |
1972 | 1982 | ||
1973 | tmc8xx= [HW,SCSI] | ||
1974 | See header of drivers/scsi/seagate.c. | ||
1975 | |||
1976 | tmscsim= [HW,SCSI] | 1983 | tmscsim= [HW,SCSI] |
1977 | See comment before function dc390_setup() in | 1984 | See comment before function dc390_setup() in |
1978 | drivers/scsi/tmscsim.c. | 1985 | drivers/scsi/tmscsim.c. |
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 53a63890aea4..30c101761d0d 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt | |||
@@ -96,7 +96,9 @@ or in registers (e.g., for x86_64 or for an i386 fastcall function). | |||
96 | The jprobe will work in either case, so long as the handler's | 96 | The jprobe will work in either case, so long as the handler's |
97 | prototype matches that of the probed function. | 97 | prototype matches that of the probed function. |
98 | 98 | ||
99 | 1.3 How Does a Return Probe Work? | 99 | 1.3 Return Probes |
100 | |||
101 | 1.3.1 How Does a Return Probe Work? | ||
100 | 102 | ||
101 | When you call register_kretprobe(), Kprobes establishes a kprobe at | 103 | When you call register_kretprobe(), Kprobes establishes a kprobe at |
102 | the entry to the function. When the probed function is called and this | 104 | the entry to the function. When the probed function is called and this |
@@ -107,9 +109,9 @@ At boot time, Kprobes registers a kprobe at the trampoline. | |||
107 | 109 | ||
108 | When the probed function executes its return instruction, control | 110 | When the probed function executes its return instruction, control |
109 | passes to the trampoline and that probe is hit. Kprobes' trampoline | 111 | passes to the trampoline and that probe is hit. Kprobes' trampoline |
110 | handler calls the user-specified handler associated with the kretprobe, | 112 | handler calls the user-specified return handler associated with the |
111 | then sets the saved instruction pointer to the saved return address, | 113 | kretprobe, then sets the saved instruction pointer to the saved return |
112 | and that's where execution resumes upon return from the trap. | 114 | address, and that's where execution resumes upon return from the trap. |
113 | 115 | ||
114 | While the probed function is executing, its return address is | 116 | While the probed function is executing, its return address is |
115 | stored in an object of type kretprobe_instance. Before calling | 117 | stored in an object of type kretprobe_instance. Before calling |
@@ -131,6 +133,30 @@ zero when the return probe is registered, and is incremented every | |||
131 | time the probed function is entered but there is no kretprobe_instance | 133 | time the probed function is entered but there is no kretprobe_instance |
132 | object available for establishing the return probe. | 134 | object available for establishing the return probe. |
133 | 135 | ||
136 | 1.3.2 Kretprobe entry-handler | ||
137 | |||
138 | Kretprobes also provides an optional user-specified handler which runs | ||
139 | on function entry. This handler is specified by setting the entry_handler | ||
140 | field of the kretprobe struct. Whenever the kprobe placed by kretprobe at the | ||
141 | function entry is hit, the user-defined entry_handler, if any, is invoked. | ||
142 | If the entry_handler returns 0 (success) then a corresponding return handler | ||
143 | is guaranteed to be called upon function return. If the entry_handler | ||
144 | returns a non-zero error then Kprobes leaves the return address as is, and | ||
145 | the kretprobe has no further effect for that particular function instance. | ||
146 | |||
147 | Multiple entry and return handler invocations are matched using the unique | ||
148 | kretprobe_instance object associated with them. Additionally, a user | ||
149 | may also specify per return-instance private data to be part of each | ||
150 | kretprobe_instance object. This is especially useful when sharing private | ||
151 | data between corresponding user entry and return handlers. The size of each | ||
152 | private data object can be specified at kretprobe registration time by | ||
153 | setting the data_size field of the kretprobe struct. This data can be | ||
154 | accessed through the data field of each kretprobe_instance object. | ||
155 | |||
156 | In case probed function is entered but there is no kretprobe_instance | ||
157 | object available, then in addition to incrementing the nmissed count, | ||
158 | the user entry_handler invocation is also skipped. | ||
159 | |||
134 | 2. Architectures Supported | 160 | 2. Architectures Supported |
135 | 161 | ||
136 | Kprobes, jprobes, and return probes are implemented on the following | 162 | Kprobes, jprobes, and return probes are implemented on the following |
@@ -274,6 +300,8 @@ of interest: | |||
274 | - ret_addr: the return address | 300 | - ret_addr: the return address |
275 | - rp: points to the corresponding kretprobe object | 301 | - rp: points to the corresponding kretprobe object |
276 | - task: points to the corresponding task struct | 302 | - task: points to the corresponding task struct |
303 | - data: points to per return-instance private data; see "Kretprobe | ||
304 | entry-handler" for details. | ||
277 | 305 | ||
278 | The regs_return_value(regs) macro provides a simple abstraction to | 306 | The regs_return_value(regs) macro provides a simple abstraction to |
279 | extract the return value from the appropriate register as defined by | 307 | extract the return value from the appropriate register as defined by |
@@ -556,23 +584,52 @@ report failed calls to sys_open(). | |||
556 | #include <linux/kernel.h> | 584 | #include <linux/kernel.h> |
557 | #include <linux/module.h> | 585 | #include <linux/module.h> |
558 | #include <linux/kprobes.h> | 586 | #include <linux/kprobes.h> |
587 | #include <linux/ktime.h> | ||
588 | |||
589 | /* per-instance private data */ | ||
590 | struct my_data { | ||
591 | ktime_t entry_stamp; | ||
592 | }; | ||
559 | 593 | ||
560 | static const char *probed_func = "sys_open"; | 594 | static const char *probed_func = "sys_open"; |
561 | 595 | ||
562 | /* Return-probe handler: If the probed function fails, log the return value. */ | 596 | /* Timestamp function entry. */ |
563 | static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) | 597 | static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) |
598 | { | ||
599 | struct my_data *data; | ||
600 | |||
601 | if(!current->mm) | ||
602 | return 1; /* skip kernel threads */ | ||
603 | |||
604 | data = (struct my_data *)ri->data; | ||
605 | data->entry_stamp = ktime_get(); | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | /* If the probed function failed, log the return value and duration. | ||
610 | * Duration may turn out to be zero consistently, depending upon the | ||
611 | * granularity of time accounting on the platform. */ | ||
612 | static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) | ||
564 | { | 613 | { |
565 | int retval = regs_return_value(regs); | 614 | int retval = regs_return_value(regs); |
615 | struct my_data *data = (struct my_data *)ri->data; | ||
616 | s64 delta; | ||
617 | ktime_t now; | ||
618 | |||
566 | if (retval < 0) { | 619 | if (retval < 0) { |
567 | printk("%s returns %d\n", probed_func, retval); | 620 | now = ktime_get(); |
621 | delta = ktime_to_ns(ktime_sub(now, data->entry_stamp)); | ||
622 | printk("%s: return val = %d (duration = %lld ns)\n", | ||
623 | probed_func, retval, delta); | ||
568 | } | 624 | } |
569 | return 0; | 625 | return 0; |
570 | } | 626 | } |
571 | 627 | ||
572 | static struct kretprobe my_kretprobe = { | 628 | static struct kretprobe my_kretprobe = { |
573 | .handler = ret_handler, | 629 | .handler = return_handler, |
574 | /* Probe up to 20 instances concurrently. */ | 630 | .entry_handler = entry_handler, |
575 | .maxactive = 20 | 631 | .data_size = sizeof(struct my_data), |
632 | .maxactive = 20, /* probe up to 20 instances concurrently */ | ||
576 | }; | 633 | }; |
577 | 634 | ||
578 | static int __init kretprobe_init(void) | 635 | static int __init kretprobe_init(void) |
@@ -584,7 +641,7 @@ static int __init kretprobe_init(void) | |||
584 | printk("register_kretprobe failed, returned %d\n", ret); | 641 | printk("register_kretprobe failed, returned %d\n", ret); |
585 | return -1; | 642 | return -1; |
586 | } | 643 | } |
587 | printk("Planted return probe at %p\n", my_kretprobe.kp.addr); | 644 | printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name); |
588 | return 0; | 645 | return 0; |
589 | } | 646 | } |
590 | 647 | ||
@@ -594,7 +651,7 @@ static void __exit kretprobe_exit(void) | |||
594 | printk("kretprobe unregistered\n"); | 651 | printk("kretprobe unregistered\n"); |
595 | /* nmissed > 0 suggests that maxactive was set too low. */ | 652 | /* nmissed > 0 suggests that maxactive was set too low. */ |
596 | printk("Missed probing %d instances of %s\n", | 653 | printk("Missed probing %d instances of %s\n", |
597 | my_kretprobe.nmissed, probed_func); | 654 | my_kretprobe.nmissed, probed_func); |
598 | } | 655 | } |
599 | 656 | ||
600 | module_init(kretprobe_init) | 657 | module_init(kretprobe_init) |
diff --git a/Documentation/kref.txt b/Documentation/kref.txt index f38b59d00c63..130b6e87aa7e 100644 --- a/Documentation/kref.txt +++ b/Documentation/kref.txt | |||
@@ -141,10 +141,10 @@ The last rule (rule 3) is the nastiest one to handle. Say, for | |||
141 | instance, you have a list of items that are each kref-ed, and you wish | 141 | instance, you have a list of items that are each kref-ed, and you wish |
142 | to get the first one. You can't just pull the first item off the list | 142 | to get the first one. You can't just pull the first item off the list |
143 | and kref_get() it. That violates rule 3 because you are not already | 143 | and kref_get() it. That violates rule 3 because you are not already |
144 | holding a valid pointer. You must add locks or semaphores. For | 144 | holding a valid pointer. You must add a mutex (or some other lock). |
145 | instance: | 145 | For instance: |
146 | 146 | ||
147 | static DECLARE_MUTEX(sem); | 147 | static DEFINE_MUTEX(mutex); |
148 | static LIST_HEAD(q); | 148 | static LIST_HEAD(q); |
149 | struct my_data | 149 | struct my_data |
150 | { | 150 | { |
@@ -155,12 +155,12 @@ struct my_data | |||
155 | static struct my_data *get_entry() | 155 | static struct my_data *get_entry() |
156 | { | 156 | { |
157 | struct my_data *entry = NULL; | 157 | struct my_data *entry = NULL; |
158 | down(&sem); | 158 | mutex_lock(&mutex); |
159 | if (!list_empty(&q)) { | 159 | if (!list_empty(&q)) { |
160 | entry = container_of(q.next, struct my_q_entry, link); | 160 | entry = container_of(q.next, struct my_q_entry, link); |
161 | kref_get(&entry->refcount); | 161 | kref_get(&entry->refcount); |
162 | } | 162 | } |
163 | up(&sem); | 163 | mutex_unlock(&mutex); |
164 | return entry; | 164 | return entry; |
165 | } | 165 | } |
166 | 166 | ||
@@ -174,9 +174,9 @@ static void release_entry(struct kref *ref) | |||
174 | 174 | ||
175 | static void put_entry(struct my_data *entry) | 175 | static void put_entry(struct my_data *entry) |
176 | { | 176 | { |
177 | down(&sem); | 177 | mutex_lock(&mutex); |
178 | kref_put(&entry->refcount, release_entry); | 178 | kref_put(&entry->refcount, release_entry); |
179 | up(&sem); | 179 | mutex_unlock(&mutex); |
180 | } | 180 | } |
181 | 181 | ||
182 | The kref_put() return value is useful if you do not want to hold the | 182 | The kref_put() return value is useful if you do not want to hold the |
@@ -191,13 +191,13 @@ static void release_entry(struct kref *ref) | |||
191 | 191 | ||
192 | static void put_entry(struct my_data *entry) | 192 | static void put_entry(struct my_data *entry) |
193 | { | 193 | { |
194 | down(&sem); | 194 | mutex_lock(&mutex); |
195 | if (kref_put(&entry->refcount, release_entry)) { | 195 | if (kref_put(&entry->refcount, release_entry)) { |
196 | list_del(&entry->link); | 196 | list_del(&entry->link); |
197 | up(&sem); | 197 | mutex_unlock(&mutex); |
198 | kfree(entry); | 198 | kfree(entry); |
199 | } else | 199 | } else |
200 | up(&sem); | 200 | mutex_unlock(&mutex); |
201 | } | 201 | } |
202 | 202 | ||
203 | This is really more useful if you have to call other routines as part | 203 | This is really more useful if you have to call other routines as part |
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 6c8a2386cd50..0f23d67f958f 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <zlib.h> | 34 | #include <zlib.h> |
35 | #include <assert.h> | 35 | #include <assert.h> |
36 | #include <sched.h> | 36 | #include <sched.h> |
37 | #include <limits.h> | ||
38 | #include <stddef.h> | ||
37 | #include "linux/lguest_launcher.h" | 39 | #include "linux/lguest_launcher.h" |
38 | #include "linux/virtio_config.h" | 40 | #include "linux/virtio_config.h" |
39 | #include "linux/virtio_net.h" | 41 | #include "linux/virtio_net.h" |
@@ -99,13 +101,11 @@ struct device_list | |||
99 | /* The descriptor page for the devices. */ | 101 | /* The descriptor page for the devices. */ |
100 | u8 *descpage; | 102 | u8 *descpage; |
101 | 103 | ||
102 | /* The tail of the last descriptor. */ | ||
103 | unsigned int desc_used; | ||
104 | |||
105 | /* A single linked list of devices. */ | 104 | /* A single linked list of devices. */ |
106 | struct device *dev; | 105 | struct device *dev; |
107 | /* ... And an end pointer so we can easily append new devices */ | 106 | /* And a pointer to the last device for easy append and also for |
108 | struct device **lastdev; | 107 | * configuration appending. */ |
108 | struct device *lastdev; | ||
109 | }; | 109 | }; |
110 | 110 | ||
111 | /* The list of Guest devices, based on command line arguments. */ | 111 | /* The list of Guest devices, based on command line arguments. */ |
@@ -191,7 +191,14 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, | |||
191 | #define cpu_to_le64(v64) (v64) | 191 | #define cpu_to_le64(v64) (v64) |
192 | #define le16_to_cpu(v16) (v16) | 192 | #define le16_to_cpu(v16) (v16) |
193 | #define le32_to_cpu(v32) (v32) | 193 | #define le32_to_cpu(v32) (v32) |
194 | #define le64_to_cpu(v32) (v64) | 194 | #define le64_to_cpu(v64) (v64) |
195 | |||
196 | /* The device virtqueue descriptors are followed by feature bitmasks. */ | ||
197 | static u8 *get_feature_bits(struct device *dev) | ||
198 | { | ||
199 | return (u8 *)(dev->desc + 1) | ||
200 | + dev->desc->num_vq * sizeof(struct lguest_vqconfig); | ||
201 | } | ||
195 | 202 | ||
196 | /*L:100 The Launcher code itself takes us out into userspace, that scary place | 203 | /*L:100 The Launcher code itself takes us out into userspace, that scary place |
197 | * where pointers run wild and free! Unfortunately, like most userspace | 204 | * where pointers run wild and free! Unfortunately, like most userspace |
@@ -914,21 +921,58 @@ static void enable_fd(int fd, struct virtqueue *vq) | |||
914 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 921 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); |
915 | } | 922 | } |
916 | 923 | ||
924 | /* Resetting a device is fairly easy. */ | ||
925 | static void reset_device(struct device *dev) | ||
926 | { | ||
927 | struct virtqueue *vq; | ||
928 | |||
929 | verbose("Resetting device %s\n", dev->name); | ||
930 | /* Clear the status. */ | ||
931 | dev->desc->status = 0; | ||
932 | |||
933 | /* Clear any features they've acked. */ | ||
934 | memset(get_feature_bits(dev) + dev->desc->feature_len, 0, | ||
935 | dev->desc->feature_len); | ||
936 | |||
937 | /* Zero out the virtqueues. */ | ||
938 | for (vq = dev->vq; vq; vq = vq->next) { | ||
939 | memset(vq->vring.desc, 0, | ||
940 | vring_size(vq->config.num, getpagesize())); | ||
941 | vq->last_avail_idx = 0; | ||
942 | } | ||
943 | } | ||
944 | |||
917 | /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ | 945 | /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ |
918 | static void handle_output(int fd, unsigned long addr) | 946 | static void handle_output(int fd, unsigned long addr) |
919 | { | 947 | { |
920 | struct device *i; | 948 | struct device *i; |
921 | struct virtqueue *vq; | 949 | struct virtqueue *vq; |
922 | 950 | ||
923 | /* Check each virtqueue. */ | 951 | /* Check each device and virtqueue. */ |
924 | for (i = devices.dev; i; i = i->next) { | 952 | for (i = devices.dev; i; i = i->next) { |
953 | /* Notifications to device descriptors reset the device. */ | ||
954 | if (from_guest_phys(addr) == i->desc) { | ||
955 | reset_device(i); | ||
956 | return; | ||
957 | } | ||
958 | |||
959 | /* Notifications to virtqueues mean output has occurred. */ | ||
925 | for (vq = i->vq; vq; vq = vq->next) { | 960 | for (vq = i->vq; vq; vq = vq->next) { |
926 | if (vq->config.pfn == addr/getpagesize() | 961 | if (vq->config.pfn != addr/getpagesize()) |
927 | && vq->handle_output) { | 962 | continue; |
928 | verbose("Output to %s\n", vq->dev->name); | 963 | |
929 | vq->handle_output(fd, vq); | 964 | /* Guest should acknowledge (and set features!) before |
965 | * using the device. */ | ||
966 | if (i->desc->status == 0) { | ||
967 | warnx("%s gave early output", i->name); | ||
930 | return; | 968 | return; |
931 | } | 969 | } |
970 | |||
971 | if (strcmp(vq->dev->name, "console") != 0) | ||
972 | verbose("Output to %s\n", vq->dev->name); | ||
973 | if (vq->handle_output) | ||
974 | vq->handle_output(fd, vq); | ||
975 | return; | ||
932 | } | 976 | } |
933 | } | 977 | } |
934 | 978 | ||
@@ -986,54 +1030,44 @@ static void handle_input(int fd) | |||
986 | * | 1030 | * |
987 | * All devices need a descriptor so the Guest knows it exists, and a "struct | 1031 | * All devices need a descriptor so the Guest knows it exists, and a "struct |
988 | * device" so the Launcher can keep track of it. We have common helper | 1032 | * device" so the Launcher can keep track of it. We have common helper |
989 | * routines to allocate them. | 1033 | * routines to allocate and manage them. */ |
990 | * | ||
991 | * This routine allocates a new "struct lguest_device_desc" from descriptor | ||
992 | * table just above the Guest's normal memory. It returns a pointer to that | ||
993 | * descriptor. */ | ||
994 | static struct lguest_device_desc *new_dev_desc(u16 type) | ||
995 | { | ||
996 | struct lguest_device_desc *d; | ||
997 | 1034 | ||
998 | /* We only have one page for all the descriptors. */ | 1035 | /* The layout of the device page is a "struct lguest_device_desc" followed by a |
999 | if (devices.desc_used + sizeof(*d) > getpagesize()) | 1036 | * number of virtqueue descriptors, then two sets of feature bits, then an |
1000 | errx(1, "Too many devices"); | 1037 | * array of configuration bytes. This routine returns the configuration |
1001 | 1038 | * pointer. */ | |
1002 | /* We don't need to set config_len or status: page is 0 already. */ | 1039 | static u8 *device_config(const struct device *dev) |
1003 | d = (void *)devices.descpage + devices.desc_used; | 1040 | { |
1004 | d->type = type; | 1041 | return (void *)(dev->desc + 1) |
1005 | devices.desc_used += sizeof(*d); | 1042 | + dev->desc->num_vq * sizeof(struct lguest_vqconfig) |
1006 | 1043 | + dev->desc->feature_len * 2; | |
1007 | return d; | ||
1008 | } | 1044 | } |
1009 | 1045 | ||
1010 | /* Each device descriptor is followed by some configuration information. | 1046 | /* This routine allocates a new "struct lguest_device_desc" from descriptor |
1011 | * Each configuration field looks like: u8 type, u8 len, [... len bytes...]. | 1047 | * table page just above the Guest's normal memory. It returns a pointer to |
1012 | * | 1048 | * that descriptor. */ |
1013 | * This routine adds a new field to an existing device's descriptor. It only | 1049 | static struct lguest_device_desc *new_dev_desc(u16 type) |
1014 | * works for the last device, but that's OK because that's how we use it. */ | ||
1015 | static void add_desc_field(struct device *dev, u8 type, u8 len, const void *c) | ||
1016 | { | 1050 | { |
1017 | /* This is the last descriptor, right? */ | 1051 | struct lguest_device_desc d = { .type = type }; |
1018 | assert(devices.descpage + devices.desc_used | 1052 | void *p; |
1019 | == (u8 *)(dev->desc + 1) + dev->desc->config_len); | ||
1020 | 1053 | ||
1021 | /* We only have one page of device descriptions. */ | 1054 | /* Figure out where the next device config is, based on the last one. */ |
1022 | if (devices.desc_used + 2 + len > getpagesize()) | 1055 | if (devices.lastdev) |
1023 | errx(1, "Too many devices"); | 1056 | p = device_config(devices.lastdev) |
1057 | + devices.lastdev->desc->config_len; | ||
1058 | else | ||
1059 | p = devices.descpage; | ||
1024 | 1060 | ||
1025 | /* Copy in the new config header: type then length. */ | 1061 | /* We only have one page for all the descriptors. */ |
1026 | devices.descpage[devices.desc_used++] = type; | 1062 | if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) |
1027 | devices.descpage[devices.desc_used++] = len; | 1063 | errx(1, "Too many devices"); |
1028 | memcpy(devices.descpage + devices.desc_used, c, len); | ||
1029 | devices.desc_used += len; | ||
1030 | 1064 | ||
1031 | /* Update the device descriptor length: two byte head then data. */ | 1065 | /* p might not be aligned, so we memcpy in. */ |
1032 | dev->desc->config_len += 2 + len; | 1066 | return memcpy(p, &d, sizeof(d)); |
1033 | } | 1067 | } |
1034 | 1068 | ||
1035 | /* This routine adds a virtqueue to a device. We specify how many descriptors | 1069 | /* Each device descriptor is followed by the description of its virtqueues. We |
1036 | * the virtqueue is to have. */ | 1070 | * specify how many descriptors the virtqueue is to have. */ |
1037 | static void add_virtqueue(struct device *dev, unsigned int num_descs, | 1071 | static void add_virtqueue(struct device *dev, unsigned int num_descs, |
1038 | void (*handle_output)(int fd, struct virtqueue *me)) | 1072 | void (*handle_output)(int fd, struct virtqueue *me)) |
1039 | { | 1073 | { |
@@ -1059,9 +1093,15 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1059 | /* Initialize the vring. */ | 1093 | /* Initialize the vring. */ |
1060 | vring_init(&vq->vring, num_descs, p, getpagesize()); | 1094 | vring_init(&vq->vring, num_descs, p, getpagesize()); |
1061 | 1095 | ||
1062 | /* Add the configuration information to this device's descriptor. */ | 1096 | /* Append virtqueue to this device's descriptor. We use |
1063 | add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, | 1097 | * device_config() to get the end of the device's current virtqueues; |
1064 | sizeof(vq->config), &vq->config); | 1098 | * we check that we haven't added any config or feature information |
1099 | * yet, otherwise we'd be overwriting them. */ | ||
1100 | assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); | ||
1101 | memcpy(device_config(dev), &vq->config, sizeof(vq->config)); | ||
1102 | dev->desc->num_vq++; | ||
1103 | |||
1104 | verbose("Virtqueue page %#lx\n", to_guest_phys(p)); | ||
1065 | 1105 | ||
1066 | /* Add to tail of list, so dev->vq is first vq, dev->vq->next is | 1106 | /* Add to tail of list, so dev->vq is first vq, dev->vq->next is |
1067 | * second. */ | 1107 | * second. */ |
@@ -1072,11 +1112,41 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1072 | * virtqueue. */ | 1112 | * virtqueue. */ |
1073 | vq->handle_output = handle_output; | 1113 | vq->handle_output = handle_output; |
1074 | 1114 | ||
1075 | /* Set the "Don't Notify Me" flag if we don't have a handler */ | 1115 | /* As an optimization, set the advisory "Don't Notify Me" flag if we |
1116 | * don't have a handler */ | ||
1076 | if (!handle_output) | 1117 | if (!handle_output) |
1077 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; | 1118 | vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; |
1078 | } | 1119 | } |
1079 | 1120 | ||
1121 | /* The first half of the feature bitmask is for us to advertise features. The | ||
1122 | * second half if for the Guest to accept features. */ | ||
1123 | static void add_feature(struct device *dev, unsigned bit) | ||
1124 | { | ||
1125 | u8 *features = get_feature_bits(dev); | ||
1126 | |||
1127 | /* We can't extend the feature bits once we've added config bytes */ | ||
1128 | if (dev->desc->feature_len <= bit / CHAR_BIT) { | ||
1129 | assert(dev->desc->config_len == 0); | ||
1130 | dev->desc->feature_len = (bit / CHAR_BIT) + 1; | ||
1131 | } | ||
1132 | |||
1133 | features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); | ||
1134 | } | ||
1135 | |||
1136 | /* This routine sets the configuration fields for an existing device's | ||
1137 | * descriptor. It only works for the last device, but that's OK because that's | ||
1138 | * how we use it. */ | ||
1139 | static void set_config(struct device *dev, unsigned len, const void *conf) | ||
1140 | { | ||
1141 | /* Check we haven't overflowed our single page. */ | ||
1142 | if (device_config(dev) + len > devices.descpage + getpagesize()) | ||
1143 | errx(1, "Too many devices"); | ||
1144 | |||
1145 | /* Copy in the config information, and store the length. */ | ||
1146 | memcpy(device_config(dev), conf, len); | ||
1147 | dev->desc->config_len = len; | ||
1148 | } | ||
1149 | |||
1080 | /* This routine does all the creation and setup of a new device, including | 1150 | /* This routine does all the creation and setup of a new device, including |
1081 | * calling new_dev_desc() to allocate the descriptor and device memory. */ | 1151 | * calling new_dev_desc() to allocate the descriptor and device memory. */ |
1082 | static struct device *new_device(const char *name, u16 type, int fd, | 1152 | static struct device *new_device(const char *name, u16 type, int fd, |
@@ -1084,14 +1154,6 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1084 | { | 1154 | { |
1085 | struct device *dev = malloc(sizeof(*dev)); | 1155 | struct device *dev = malloc(sizeof(*dev)); |
1086 | 1156 | ||
1087 | /* Append to device list. Prepending to a single-linked list is | ||
1088 | * easier, but the user expects the devices to be arranged on the bus | ||
1089 | * in command-line order. The first network device on the command line | ||
1090 | * is eth0, the first block device /dev/vda, etc. */ | ||
1091 | *devices.lastdev = dev; | ||
1092 | dev->next = NULL; | ||
1093 | devices.lastdev = &dev->next; | ||
1094 | |||
1095 | /* Now we populate the fields one at a time. */ | 1157 | /* Now we populate the fields one at a time. */ |
1096 | dev->fd = fd; | 1158 | dev->fd = fd; |
1097 | /* If we have an input handler for this file descriptor, then we add it | 1159 | /* If we have an input handler for this file descriptor, then we add it |
@@ -1102,6 +1164,17 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1102 | dev->handle_input = handle_input; | 1164 | dev->handle_input = handle_input; |
1103 | dev->name = name; | 1165 | dev->name = name; |
1104 | dev->vq = NULL; | 1166 | dev->vq = NULL; |
1167 | |||
1168 | /* Append to device list. Prepending to a single-linked list is | ||
1169 | * easier, but the user expects the devices to be arranged on the bus | ||
1170 | * in command-line order. The first network device on the command line | ||
1171 | * is eth0, the first block device /dev/vda, etc. */ | ||
1172 | if (devices.lastdev) | ||
1173 | devices.lastdev->next = dev; | ||
1174 | else | ||
1175 | devices.dev = dev; | ||
1176 | devices.lastdev = dev; | ||
1177 | |||
1105 | return dev; | 1178 | return dev; |
1106 | } | 1179 | } |
1107 | 1180 | ||
@@ -1226,7 +1299,7 @@ static void setup_tun_net(const char *arg) | |||
1226 | int netfd, ipfd; | 1299 | int netfd, ipfd; |
1227 | u32 ip; | 1300 | u32 ip; |
1228 | const char *br_name = NULL; | 1301 | const char *br_name = NULL; |
1229 | u8 hwaddr[6]; | 1302 | struct virtio_net_config conf; |
1230 | 1303 | ||
1231 | /* We open the /dev/net/tun device and tell it we want a tap device. A | 1304 | /* We open the /dev/net/tun device and tell it we want a tap device. A |
1232 | * tap device is like a tun device, only somehow different. To tell | 1305 | * tap device is like a tun device, only somehow different. To tell |
@@ -1265,12 +1338,13 @@ static void setup_tun_net(const char *arg) | |||
1265 | ip = str2ip(arg); | 1338 | ip = str2ip(arg); |
1266 | 1339 | ||
1267 | /* Set up the tun device, and get the mac address for the interface. */ | 1340 | /* Set up the tun device, and get the mac address for the interface. */ |
1268 | configure_device(ipfd, ifr.ifr_name, ip, hwaddr); | 1341 | configure_device(ipfd, ifr.ifr_name, ip, conf.mac); |
1269 | 1342 | ||
1270 | /* Tell Guest what MAC address to use. */ | 1343 | /* Tell Guest what MAC address to use. */ |
1271 | add_desc_field(dev, VIRTIO_CONFIG_NET_MAC_F, sizeof(hwaddr), hwaddr); | 1344 | add_feature(dev, VIRTIO_NET_F_MAC); |
1345 | set_config(dev, sizeof(conf), &conf); | ||
1272 | 1346 | ||
1273 | /* We don't seed the socket any more; setup is done. */ | 1347 | /* We don't need the socket any more; setup is done. */ |
1274 | close(ipfd); | 1348 | close(ipfd); |
1275 | 1349 | ||
1276 | verbose("device %u: tun net %u.%u.%u.%u\n", | 1350 | verbose("device %u: tun net %u.%u.%u.%u\n", |
@@ -1458,8 +1532,7 @@ static void setup_block_file(const char *filename) | |||
1458 | struct device *dev; | 1532 | struct device *dev; |
1459 | struct vblk_info *vblk; | 1533 | struct vblk_info *vblk; |
1460 | void *stack; | 1534 | void *stack; |
1461 | u64 cap; | 1535 | struct virtio_blk_config conf; |
1462 | unsigned int val; | ||
1463 | 1536 | ||
1464 | /* This is the pipe the I/O thread will use to tell us I/O is done. */ | 1537 | /* This is the pipe the I/O thread will use to tell us I/O is done. */ |
1465 | pipe(p); | 1538 | pipe(p); |
@@ -1477,14 +1550,18 @@ static void setup_block_file(const char *filename) | |||
1477 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); | 1550 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); |
1478 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); | 1551 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); |
1479 | 1552 | ||
1553 | /* We support barriers. */ | ||
1554 | add_feature(dev, VIRTIO_BLK_F_BARRIER); | ||
1555 | |||
1480 | /* Tell Guest how many sectors this device has. */ | 1556 | /* Tell Guest how many sectors this device has. */ |
1481 | cap = cpu_to_le64(vblk->len / 512); | 1557 | conf.capacity = cpu_to_le64(vblk->len / 512); |
1482 | add_desc_field(dev, VIRTIO_CONFIG_BLK_F_CAPACITY, sizeof(cap), &cap); | ||
1483 | 1558 | ||
1484 | /* Tell Guest not to put in too many descriptors at once: two are used | 1559 | /* Tell Guest not to put in too many descriptors at once: two are used |
1485 | * for the in and out elements. */ | 1560 | * for the in and out elements. */ |
1486 | val = cpu_to_le32(VIRTQUEUE_NUM - 2); | 1561 | add_feature(dev, VIRTIO_BLK_F_SEG_MAX); |
1487 | add_desc_field(dev, VIRTIO_CONFIG_BLK_F_SEG_MAX, sizeof(val), &val); | 1562 | conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); |
1563 | |||
1564 | set_config(dev, sizeof(conf), &conf); | ||
1488 | 1565 | ||
1489 | /* The I/O thread writes to this end of the pipe when done. */ | 1566 | /* The I/O thread writes to this end of the pipe when done. */ |
1490 | vblk->done_fd = p[1]; | 1567 | vblk->done_fd = p[1]; |
@@ -1505,7 +1582,7 @@ static void setup_block_file(const char *filename) | |||
1505 | close(vblk->workpipe[0]); | 1582 | close(vblk->workpipe[0]); |
1506 | 1583 | ||
1507 | verbose("device %u: virtblock %llu sectors\n", | 1584 | verbose("device %u: virtblock %llu sectors\n", |
1508 | devices.device_num, cap); | 1585 | devices.device_num, le64_to_cpu(conf.capacity)); |
1509 | } | 1586 | } |
1510 | /* That's the end of device setup. :*/ | 1587 | /* That's the end of device setup. :*/ |
1511 | 1588 | ||
@@ -1610,12 +1687,12 @@ int main(int argc, char *argv[]) | |||
1610 | /* First we initialize the device list. Since console and network | 1687 | /* First we initialize the device list. Since console and network |
1611 | * device receive input from a file descriptor, we keep an fdset | 1688 | * device receive input from a file descriptor, we keep an fdset |
1612 | * (infds) and the maximum fd number (max_infd) with the head of the | 1689 | * (infds) and the maximum fd number (max_infd) with the head of the |
1613 | * list. We also keep a pointer to the last device, for easy appending | 1690 | * list. We also keep a pointer to the last device. Finally, we keep |
1614 | * to the list. Finally, we keep the next interrupt number to hand out | 1691 | * the next interrupt number to hand out (1: remember that 0 is used by |
1615 | * (1: remember that 0 is used by the timer). */ | 1692 | * the timer). */ |
1616 | FD_ZERO(&devices.infds); | 1693 | FD_ZERO(&devices.infds); |
1617 | devices.max_infd = -1; | 1694 | devices.max_infd = -1; |
1618 | devices.lastdev = &devices.dev; | 1695 | devices.lastdev = NULL; |
1619 | devices.next_irq = 1; | 1696 | devices.next_irq = 1; |
1620 | 1697 | ||
1621 | cpu_id = 0; | 1698 | cpu_id = 0; |
diff --git a/Documentation/md.txt b/Documentation/md.txt index 5818628207b5..396cdd982c26 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -416,6 +416,16 @@ also have | |||
416 | sectors in total that could need to be processed. The two | 416 | sectors in total that could need to be processed. The two |
417 | numbers are separated by a '/' thus effectively showing one | 417 | numbers are separated by a '/' thus effectively showing one |
418 | value, a fraction of the process that is complete. | 418 | value, a fraction of the process that is complete. |
419 | A 'select' on this attribute will return when resync completes, | ||
420 | when it reaches the current sync_max (below) and possibly at | ||
421 | other times. | ||
422 | |||
423 | sync_max | ||
424 | This is a number of sectors at which point a resync/recovery | ||
425 | process will pause. When a resync is active, the value can | ||
426 | only ever be increased, never decreased. The value of 'max' | ||
427 | effectively disables the limit. | ||
428 | |||
419 | 429 | ||
420 | sync_speed | 430 | sync_speed |
421 | This shows the current actual speed, in K/sec, of the current | 431 | This shows the current actual speed, in K/sec, of the current |
diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt index badb7480ea62..d8968958d839 100644 --- a/Documentation/networking/decnet.txt +++ b/Documentation/networking/decnet.txt | |||
@@ -60,7 +60,7 @@ operation of the local communications in any other way though. | |||
60 | 60 | ||
61 | The kernel command line takes options looking like the following: | 61 | The kernel command line takes options looking like the following: |
62 | 62 | ||
63 | decnet=1,2 | 63 | decnet.addr=1,2 |
64 | 64 | ||
65 | the two numbers are the node address 1,2 = 1.2 For 2.2.xx kernels | 65 | the two numbers are the node address 1,2 = 1.2 For 2.2.xx kernels |
66 | and early 2.3.xx kernels, you must use a comma when specifying the | 66 | and early 2.3.xx kernels, you must use a comma when specifying the |
diff --git a/Documentation/pci.txt b/Documentation/pci.txt index 7754f5aea4e9..72b20c639596 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt | |||
@@ -274,8 +274,6 @@ the PCI device by calling pci_enable_device(). This will: | |||
274 | o allocate an IRQ (if BIOS did not). | 274 | o allocate an IRQ (if BIOS did not). |
275 | 275 | ||
276 | NOTE: pci_enable_device() can fail! Check the return value. | 276 | NOTE: pci_enable_device() can fail! Check the return value. |
277 | NOTE2: Also see pci_enable_device_bars() below. Drivers can | ||
278 | attempt to enable only a subset of BARs they need. | ||
279 | 277 | ||
280 | [ OS BUG: we don't check resource allocations before enabling those | 278 | [ OS BUG: we don't check resource allocations before enabling those |
281 | resources. The sequence would make more sense if we called | 279 | resources. The sequence would make more sense if we called |
@@ -605,40 +603,7 @@ device lists. This is still possible but discouraged. | |||
605 | 603 | ||
606 | 604 | ||
607 | 605 | ||
608 | 10. pci_enable_device_bars() and Legacy I/O Port space | 606 | 10. MMIO Space and "Write Posting" |
609 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
610 | |||
611 | Large servers may not be able to provide I/O port resources to all PCI | ||
612 | devices. I/O Port space is only 64KB on Intel Architecture[1] and is | ||
613 | likely also fragmented since the I/O base register of PCI-to-PCI | ||
614 | bridge will usually be aligned to a 4KB boundary[2]. On such systems, | ||
615 | pci_enable_device() and pci_request_region() will fail when | ||
616 | attempting to enable I/O Port regions that don't have I/O Port | ||
617 | resources assigned. | ||
618 | |||
619 | Fortunately, many PCI devices which request I/O Port resources also | ||
620 | provide access to the same registers via MMIO BARs. These devices can | ||
621 | be handled without using I/O port space and the drivers typically | ||
622 | offer a CONFIG_ option to only use MMIO regions | ||
623 | (e.g. CONFIG_TULIP_MMIO). PCI devices typically provide I/O port | ||
624 | interface for legacy OSes and will work when I/O port resources are not | ||
625 | assigned. The "PCI Local Bus Specification Revision 3.0" discusses | ||
626 | this on p.44, "IMPLEMENTATION NOTE". | ||
627 | |||
628 | If your PCI device driver doesn't need I/O port resources assigned to | ||
629 | I/O Port BARs, you should use pci_enable_device_bars() instead of | ||
630 | pci_enable_device() in order not to enable I/O port regions for the | ||
631 | corresponding devices. In addition, you should use | ||
632 | pci_request_selected_regions() and pci_release_selected_regions() | ||
633 | instead of pci_request_regions()/pci_release_regions() in order not to | ||
634 | request/release I/O port regions for the corresponding devices. | ||
635 | |||
636 | [1] Some systems support 64KB I/O port space per PCI segment. | ||
637 | [2] Some PCI-to-PCI bridges support optional 1KB aligned I/O base. | ||
638 | |||
639 | |||
640 | |||
641 | 11. MMIO Space and "Write Posting" | ||
642 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 607 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
643 | 608 | ||
644 | Converting a driver from using I/O Port space to using MMIO space | 609 | Converting a driver from using I/O Port space to using MMIO space |
diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index 4739c5c3face..96f155e68750 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt | |||
@@ -33,8 +33,8 @@ This file details changes in 2.6 which affect PCMCIA card driver authors: | |||
33 | and can be used (e.g. for SET_NETDEV_DEV) by using | 33 | and can be used (e.g. for SET_NETDEV_DEV) by using |
34 | handle_to_dev(client_handle_t * handle). | 34 | handle_to_dev(client_handle_t * handle). |
35 | 35 | ||
36 | * Convert internal I/O port addresses to unsigned long (as of 2.6.11) | 36 | * Convert internal I/O port addresses to unsigned int (as of 2.6.11) |
37 | ioaddr_t should be replaced by kio_addr_t in PCMCIA card drivers. | 37 | ioaddr_t should be replaced by unsigned int in PCMCIA card drivers. |
38 | 38 | ||
39 | * irq_mask and irq_list parameters (as of 2.6.11) | 39 | * irq_mask and irq_list parameters (as of 2.6.11) |
40 | The irq_mask and irq_list parameters should no longer be used in | 40 | The irq_mask and irq_list parameters should no longer be used in |
diff --git a/Documentation/pm_qos_interface.txt b/Documentation/pm_qos_interface.txt new file mode 100644 index 000000000000..49adb1a33514 --- /dev/null +++ b/Documentation/pm_qos_interface.txt | |||
@@ -0,0 +1,59 @@ | |||
1 | PM quality of Service interface. | ||
2 | |||
3 | This interface provides a kernel and user mode interface for registering | ||
4 | performance expectations by drivers, subsystems and user space applications on | ||
5 | one of the parameters. | ||
6 | |||
7 | Currently we have {cpu_dma_latency, network_latency, network_throughput} as the | ||
8 | initial set of pm_qos parameters. | ||
9 | |||
10 | The infrastructure exposes multiple misc device nodes one per implemented | ||
11 | parameter. The set of parameters implement is defined by pm_qos_power_init() | ||
12 | and pm_qos_params.h. This is done because having the available parameters | ||
13 | being runtime configurable or changeable from a driver was seen as too easy to | ||
14 | abuse. | ||
15 | |||
16 | For each parameter a list of performance requirements is maintained along with | ||
17 | an aggregated target value. The aggregated target value is updated with | ||
18 | changes to the requirement list or elements of the list. Typically the | ||
19 | aggregated target value is simply the max or min of the requirement values held | ||
20 | in the parameter list elements. | ||
21 | |||
22 | From kernel mode the use of this interface is simple: | ||
23 | pm_qos_add_requirement(param_id, name, target_value): | ||
24 | Will insert a named element in the list for that identified PM_QOS parameter | ||
25 | with the target value. Upon change to this list the new target is recomputed | ||
26 | and any registered notifiers are called only if the target value is now | ||
27 | different. | ||
28 | |||
29 | pm_qos_update_requirement(param_id, name, new_target_value): | ||
30 | Will search the list identified by the param_id for the named list element and | ||
31 | then update its target value, calling the notification tree if the aggregated | ||
32 | target is changed. with that name is already registered. | ||
33 | |||
34 | pm_qos_remove_requirement(param_id, name): | ||
35 | Will search the identified list for the named element and remove it, after | ||
36 | removal it will update the aggregate target and call the notification tree if | ||
37 | the target was changed as a result of removing the named requirement. | ||
38 | |||
39 | |||
40 | From user mode: | ||
41 | Only processes can register a pm_qos requirement. To provide for automatic | ||
42 | cleanup for process the interface requires the process to register its | ||
43 | parameter requirements in the following way: | ||
44 | |||
45 | To register the default pm_qos target for the specific parameter, the process | ||
46 | must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] | ||
47 | |||
48 | As long as the device node is held open that process has a registered | ||
49 | requirement on the parameter. The name of the requirement is "process_<PID>" | ||
50 | derived from the current->pid from within the open system call. | ||
51 | |||
52 | To change the requested target value the process needs to write a s32 value to | ||
53 | the open device node. This translates to a pm_qos_update_requirement call. | ||
54 | |||
55 | To remove the user mode request for a target value simply close the device | ||
56 | node. | ||
57 | |||
58 | |||
59 | |||
diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt index 57aef2f6e0de..1555001bc733 100644 --- a/Documentation/power/basic-pm-debugging.txt +++ b/Documentation/power/basic-pm-debugging.txt | |||
@@ -1,45 +1,111 @@ | |||
1 | Debugging suspend and resume | 1 | Debugging hibernation and suspend |
2 | (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL | 2 | (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL |
3 | 3 | ||
4 | 1. Testing suspend to disk (STD) | 4 | 1. Testing hibernation (aka suspend to disk or STD) |
5 | 5 | ||
6 | To verify that the STD works, you can try to suspend in the "reboot" mode: | 6 | To check if hibernation works, you can try to hibernate in the "reboot" mode: |
7 | 7 | ||
8 | # echo reboot > /sys/power/disk | 8 | # echo reboot > /sys/power/disk |
9 | # echo disk > /sys/power/state | 9 | # echo disk > /sys/power/state |
10 | 10 | ||
11 | and the system should suspend, reboot, resume and get back to the command prompt | 11 | and the system should create a hibernation image, reboot, resume and get back to |
12 | where you have started the transition. If that happens, the STD is most likely | 12 | the command prompt where you have started the transition. If that happens, |
13 | to work correctly, but you need to repeat the test at least a couple of times in | 13 | hibernation is most likely to work correctly. Still, you need to repeat the |
14 | a row for confidence. This is necessary, because some problems only show up on | 14 | test at least a couple of times in a row for confidence. [This is necessary, |
15 | a second attempt at suspending and resuming the system. You should also test | 15 | because some problems only show up on a second attempt at suspending and |
16 | the "platform" and "shutdown" modes of suspend: | 16 | resuming the system.] Moreover, hibernating in the "reboot" and "shutdown" |
17 | modes causes the PM core to skip some platform-related callbacks which on ACPI | ||
18 | systems might be necessary to make hibernation work. Thus, if you machine fails | ||
19 | to hibernate or resume in the "reboot" mode, you should try the "platform" mode: | ||
17 | 20 | ||
18 | # echo platform > /sys/power/disk | 21 | # echo platform > /sys/power/disk |
19 | # echo disk > /sys/power/state | 22 | # echo disk > /sys/power/state |
20 | 23 | ||
21 | or | 24 | which is the default and recommended mode of hibernation. |
25 | |||
26 | Unfortunately, the "platform" mode of hibernation does not work on some systems | ||
27 | with broken BIOSes. In such cases the "shutdown" mode of hibernation might | ||
28 | work: | ||
22 | 29 | ||
23 | # echo shutdown > /sys/power/disk | 30 | # echo shutdown > /sys/power/disk |
24 | # echo disk > /sys/power/state | 31 | # echo disk > /sys/power/state |
25 | 32 | ||
26 | in which cases you will have to press the power button to make the system | 33 | (it is similar to the "reboot" mode, but it requires you to press the power |
27 | resume. If that does not work, you will need to identify what goes wrong. | 34 | button to make the system resume). |
35 | |||
36 | If neither "platform" nor "shutdown" hibernation mode works, you will need to | ||
37 | identify what goes wrong. | ||
38 | |||
39 | a) Test modes of hibernation | ||
40 | |||
41 | To find out why hibernation fails on your system, you can use a special testing | ||
42 | facility available if the kernel is compiled with CONFIG_PM_DEBUG set. Then, | ||
43 | there is the file /sys/power/pm_test that can be used to make the hibernation | ||
44 | core run in a test mode. There are 5 test modes available: | ||
45 | |||
46 | freezer | ||
47 | - test the freezing of processes | ||
48 | |||
49 | devices | ||
50 | - test the freezing of processes and suspending of devices | ||
28 | 51 | ||
29 | a) Test mode of STD | 52 | platform |
53 | - test the freezing of processes, suspending of devices and platform | ||
54 | global control methods(*) | ||
30 | 55 | ||
31 | To verify if there are any drivers that cause problems you can run the STD | 56 | processors |
32 | in the test mode: | 57 | - test the freezing of processes, suspending of devices, platform |
58 | global control methods(*) and the disabling of nonboot CPUs | ||
33 | 59 | ||
34 | # echo test > /sys/power/disk | 60 | core |
61 | - test the freezing of processes, suspending of devices, platform global | ||
62 | control methods(*), the disabling of nonboot CPUs and suspending of | ||
63 | platform/system devices | ||
64 | |||
65 | (*) the platform global control methods are only available on ACPI systems | ||
66 | and are only tested if the hibernation mode is set to "platform" | ||
67 | |||
68 | To use one of them it is necessary to write the corresponding string to | ||
69 | /sys/power/pm_test (eg. "devices" to test the freezing of processes and | ||
70 | suspending devices) and issue the standard hibernation commands. For example, | ||
71 | to use the "devices" test mode along with the "platform" mode of hibernation, | ||
72 | you should do the following: | ||
73 | |||
74 | # echo devices > /sys/power/pm_test | ||
75 | # echo platform > /sys/power/disk | ||
35 | # echo disk > /sys/power/state | 76 | # echo disk > /sys/power/state |
36 | 77 | ||
37 | in which case the system should freeze tasks, suspend devices, disable nonboot | 78 | Then, the kernel will try to freeze processes, suspend devices, wait 5 seconds, |
38 | CPUs (if any), wait for 5 seconds, enable nonboot CPUs, resume devices, thaw | 79 | resume devices and thaw processes. If "platform" is written to |
39 | tasks and return to your command prompt. If that fails, most likely there is | 80 | /sys/power/pm_test , then after suspending devices the kernel will additionally |
40 | a driver that fails to either suspend or resume (in the latter case the system | 81 | invoke the global control methods (eg. ACPI global control methods) used to |
41 | may hang or be unstable after the test, so please take that into consideration). | 82 | prepare the platform firmware for hibernation. Next, it will wait 5 seconds and |
42 | To find this driver, you can carry out a binary search according to the rules: | 83 | invoke the platform (eg. ACPI) global methods used to cancel hibernation etc. |
84 | |||
85 | Writing "none" to /sys/power/pm_test causes the kernel to switch to the normal | ||
86 | hibernation/suspend operations. Also, when open for reading, /sys/power/pm_test | ||
87 | contains a space-separated list of all available tests (including "none" that | ||
88 | represents the normal functionality) in which the current test level is | ||
89 | indicated by square brackets. | ||
90 | |||
91 | Generally, as you can see, each test level is more "invasive" than the previous | ||
92 | one and the "core" level tests the hardware and drivers as deeply as possible | ||
93 | without creating a hibernation image. Obviously, if the "devices" test fails, | ||
94 | the "platform" test will fail as well and so on. Thus, as a rule of thumb, you | ||
95 | should try the test modes starting from "freezer", through "devices", "platform" | ||
96 | and "processors" up to "core" (repeat the test on each level a couple of times | ||
97 | to make sure that any random factors are avoided). | ||
98 | |||
99 | If the "freezer" test fails, there is a task that cannot be frozen (in that case | ||
100 | it usually is possible to identify the offending task by analysing the output of | ||
101 | dmesg obtained after the failing test). Failure at this level usually means | ||
102 | that there is a problem with the tasks freezer subsystem that should be | ||
103 | reported. | ||
104 | |||
105 | If the "devices" test fails, most likely there is a driver that cannot suspend | ||
106 | or resume its device (in the latter case the system may hang or become unstable | ||
107 | after the test, so please take that into consideration). To find this driver, | ||
108 | you can carry out a binary search according to the rules: | ||
43 | - if the test fails, unload a half of the drivers currently loaded and repeat | 109 | - if the test fails, unload a half of the drivers currently loaded and repeat |
44 | (that would probably involve rebooting the system, so always note what drivers | 110 | (that would probably involve rebooting the system, so always note what drivers |
45 | have been loaded before the test), | 111 | have been loaded before the test), |
@@ -47,23 +113,46 @@ have been loaded before the test), | |||
47 | recently and repeat. | 113 | recently and repeat. |
48 | 114 | ||
49 | Once you have found the failing driver (there can be more than just one of | 115 | Once you have found the failing driver (there can be more than just one of |
50 | them), you have to unload it every time before the STD transition. In that case | 116 | them), you have to unload it every time before hibernation. In that case please |
51 | please make sure to report the problem with the driver. | 117 | make sure to report the problem with the driver. |
52 | 118 | ||
53 | It is also possible that a cycle can still fail after you have unloaded | 119 | It is also possible that the "devices" test will still fail after you have |
54 | all modules. In that case, you would want to look in your kernel configuration | 120 | unloaded all modules. In that case, you may want to look in your kernel |
55 | for the drivers that can be compiled as modules (testing again with them as | 121 | configuration for the drivers that can be compiled as modules (and test again |
56 | modules), and possibly also try boot time options such as "noapic" or "noacpi". | 122 | with these drivers compiled as modules). You may also try to use some special |
123 | kernel command line options such as "noapic", "noacpi" or even "acpi=off". | ||
124 | |||
125 | If the "platform" test fails, there is a problem with the handling of the | ||
126 | platform (eg. ACPI) firmware on your system. In that case the "platform" mode | ||
127 | of hibernation is not likely to work. You can try the "shutdown" mode, but that | ||
128 | is rather a poor man's workaround. | ||
129 | |||
130 | If the "processors" test fails, the disabling/enabling of nonboot CPUs does not | ||
131 | work (of course, this only may be an issue on SMP systems) and the problem | ||
132 | should be reported. In that case you can also try to switch the nonboot CPUs | ||
133 | off and on using the /sys/devices/system/cpu/cpu*/online sysfs attributes and | ||
134 | see if that works. | ||
135 | |||
136 | If the "core" test fails, which means that suspending of the system/platform | ||
137 | devices has failed (these devices are suspended on one CPU with interrupts off), | ||
138 | the problem is most probably hardware-related and serious, so it should be | ||
139 | reported. | ||
140 | |||
141 | A failure of any of the "platform", "processors" or "core" tests may cause your | ||
142 | system to hang or become unstable, so please beware. Such a failure usually | ||
143 | indicates a serious problem that very well may be related to the hardware, but | ||
144 | please report it anyway. | ||
57 | 145 | ||
58 | b) Testing minimal configuration | 146 | b) Testing minimal configuration |
59 | 147 | ||
60 | If the test mode of STD works, you can boot the system with "init=/bin/bash" | 148 | If all of the hibernation test modes work, you can boot the system with the |
61 | and attempt to suspend in the "reboot", "shutdown" and "platform" modes. If | 149 | "init=/bin/bash" command line parameter and attempt to hibernate in the |
62 | that does not work, there probably is a problem with a driver statically | 150 | "reboot", "shutdown" and "platform" modes. If that does not work, there |
63 | compiled into the kernel and you can try to compile more drivers as modules, | 151 | probably is a problem with a driver statically compiled into the kernel and you |
64 | so that they can be tested individually. Otherwise, there is a problem with a | 152 | can try to compile more drivers as modules, so that they can be tested |
65 | modular driver and you can find it by loading a half of the modules you normally | 153 | individually. Otherwise, there is a problem with a modular driver and you can |
66 | use and binary searching in accordance with the algorithm: | 154 | find it by loading a half of the modules you normally use and binary searching |
155 | in accordance with the algorithm: | ||
67 | - if there are n modules loaded and the attempt to suspend and resume fails, | 156 | - if there are n modules loaded and the attempt to suspend and resume fails, |
68 | unload n/2 of the modules and try again (that would probably involve rebooting | 157 | unload n/2 of the modules and try again (that would probably involve rebooting |
69 | the system), | 158 | the system), |
@@ -71,19 +160,19 @@ the system), | |||
71 | load n/2 modules more and try again. | 160 | load n/2 modules more and try again. |
72 | 161 | ||
73 | Again, if you find the offending module(s), it(they) must be unloaded every time | 162 | Again, if you find the offending module(s), it(they) must be unloaded every time |
74 | before the STD transition, and please report the problem with it(them). | 163 | before hibernation, and please report the problem with it(them). |
75 | 164 | ||
76 | c) Advanced debugging | 165 | c) Advanced debugging |
77 | 166 | ||
78 | In case the STD does not work on your system even in the minimal configuration | 167 | In case that hibernation does not work on your system even in the minimal |
79 | and compiling more drivers as modules is not practical or some modules cannot | 168 | configuration and compiling more drivers as modules is not practical or some |
80 | be unloaded, you can use one of the more advanced debugging techniques to find | 169 | modules cannot be unloaded, you can use one of the more advanced debugging |
81 | the problem. First, if there is a serial port in your box, you can boot the | 170 | techniques to find the problem. First, if there is a serial port in your box, |
82 | kernel with the 'no_console_suspend' parameter and try to log kernel | 171 | you can boot the kernel with the 'no_console_suspend' parameter and try to log |
83 | messages using the serial console. This may provide you with some information | 172 | kernel messages using the serial console. This may provide you with some |
84 | about the reasons of the suspend (resume) failure. Alternatively, it may be | 173 | information about the reasons of the suspend (resume) failure. Alternatively, |
85 | possible to use a FireWire port for debugging with firescope | 174 | it may be possible to use a FireWire port for debugging with firescope |
86 | (ftp://ftp.firstfloor.org/pub/ak/firescope/). On i386 it is also possible to | 175 | (ftp://ftp.firstfloor.org/pub/ak/firescope/). On x86 it is also possible to |
87 | use the PM_TRACE mechanism documented in Documentation/s2ram.txt . | 176 | use the PM_TRACE mechanism documented in Documentation/s2ram.txt . |
88 | 177 | ||
89 | 2. Testing suspend to RAM (STR) | 178 | 2. Testing suspend to RAM (STR) |
@@ -91,16 +180,25 @@ use the PM_TRACE mechanism documented in Documentation/s2ram.txt . | |||
91 | To verify that the STR works, it is generally more convenient to use the s2ram | 180 | To verify that the STR works, it is generally more convenient to use the s2ram |
92 | tool available from http://suspend.sf.net and documented at | 181 | tool available from http://suspend.sf.net and documented at |
93 | http://en.opensuse.org/s2ram . However, before doing that it is recommended to | 182 | http://en.opensuse.org/s2ram . However, before doing that it is recommended to |
94 | carry out the procedure described in section 1. | 183 | carry out STR testing using the facility described in section 1. |
95 | 184 | ||
96 | Assume you have resolved the problems with the STD and you have found some | 185 | Namely, after writing "freezer", "devices", "platform", "processors", or "core" |
97 | failing drivers. These drivers are also likely to fail during the STR or | 186 | into /sys/power/pm_test (available if the kernel is compiled with |
98 | during the resume, so it is better to unload them every time before the STR | 187 | CONFIG_PM_DEBUG set) the suspend code will work in the test mode corresponding |
99 | transition. Now, you can follow the instructions at | 188 | to given string. The STR test modes are defined in the same way as for |
100 | http://en.opensuse.org/s2ram to test the system, but if it does not work | 189 | hibernation, so please refer to Section 1 for more information about them. In |
101 | "out of the box", you may need to boot it with "init=/bin/bash" and test | 190 | particular, the "core" test allows you to test everything except for the actual |
102 | s2ram in the minimal configuration. In that case, you may be able to search | 191 | invocation of the platform firmware in order to put the system into the sleep |
103 | for failing drivers by following the procedure analogous to the one described in | 192 | state. |
104 | 1b). If you find some failing drivers, you will have to unload them every time | 193 | |
105 | before the STR transition (ie. before you run s2ram), and please report the | 194 | Among other things, the testing with the help of /sys/power/pm_test may allow |
106 | problems with them. | 195 | you to identify drivers that fail to suspend or resume their devices. They |
196 | should be unloaded every time before an STR transition. | ||
197 | |||
198 | Next, you can follow the instructions at http://en.opensuse.org/s2ram to test | ||
199 | the system, but if it does not work "out of the box", you may need to boot it | ||
200 | with "init=/bin/bash" and test s2ram in the minimal configuration. In that | ||
201 | case, you may be able to search for failing drivers by following the procedure | ||
202 | analogous to the one described in section 1. If you find some failing drivers, | ||
203 | you will have to unload them every time before an STR transition (ie. before | ||
204 | you run s2ram), and please report the problems with them. | ||
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index d0e79d5820a5..c53d26361919 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt | |||
@@ -502,52 +502,3 @@ If the CPU can have a "cpufreq" driver, there also may be opportunities | |||
502 | to shift to lower voltage settings and reduce the power cost of executing | 502 | to shift to lower voltage settings and reduce the power cost of executing |
503 | a given number of instructions. (Without voltage adjustment, it's rare | 503 | a given number of instructions. (Without voltage adjustment, it's rare |
504 | for cpufreq to save much power; the cost-per-instruction must go down.) | 504 | for cpufreq to save much power; the cost-per-instruction must go down.) |
505 | |||
506 | |||
507 | /sys/devices/.../power/state files | ||
508 | ================================== | ||
509 | For now you can also test some of this functionality using sysfs. | ||
510 | |||
511 | DEPRECATED: USE "power/state" ONLY FOR DRIVER TESTING, AND | ||
512 | AVOID USING dev->power.power_state IN DRIVERS. | ||
513 | |||
514 | THESE WILL BE REMOVED. IF THE "power/state" FILE GETS REPLACED, | ||
515 | IT WILL BECOME SOMETHING COUPLED TO THE BUS OR DRIVER. | ||
516 | |||
517 | In each device's directory, there is a 'power' directory, which contains | ||
518 | at least a 'state' file. The value of this field is effectively boolean, | ||
519 | PM_EVENT_ON or PM_EVENT_SUSPEND. | ||
520 | |||
521 | * Reading from this file displays a value corresponding to | ||
522 | the power.power_state.event field. All nonzero values are | ||
523 | displayed as "2", corresponding to a low power state; zero | ||
524 | is displayed as "0", corresponding to normal operation. | ||
525 | |||
526 | * Writing to this file initiates a transition using the | ||
527 | specified event code number; only '0', '2', and '3' are | ||
528 | accepted (without a newline); '2' and '3' are both | ||
529 | mapped to PM_EVENT_SUSPEND. | ||
530 | |||
531 | On writes, the PM core relies on that recorded event code and the device/bus | ||
532 | capabilities to determine whether it uses a partial suspend() or resume() | ||
533 | sequence to change things so that the recorded event corresponds to the | ||
534 | numeric parameter. | ||
535 | |||
536 | - If the bus requires the irqs-disabled suspend_late()/resume_early() | ||
537 | phases, writes fail because those operations are not supported here. | ||
538 | |||
539 | - If the recorded value is the expected value, nothing is done. | ||
540 | |||
541 | - If the recorded value is nonzero, the device is partially resumed, | ||
542 | using the bus.resume() and/or class.resume() methods. | ||
543 | |||
544 | - If the target value is nonzero, the device is partially suspended, | ||
545 | using the class.suspend() and/or bus.suspend() methods and the | ||
546 | PM_EVENT_SUSPEND message. | ||
547 | |||
548 | Drivers have no way to tell whether their suspend() and resume() calls | ||
549 | have come through the sysfs power/state file or as part of entering a | ||
550 | system sleep state, except that when accessed through sysfs the normal | ||
551 | parent/child sequencing rules are ignored. Drivers (such as bus, bridge, | ||
552 | or hub drivers) which expose child devices may need to enforce those rules | ||
553 | on their own. | ||
diff --git a/Documentation/power/drivers-testing.txt b/Documentation/power/drivers-testing.txt index e4bdcaee24e4..7f7a737f7f9f 100644 --- a/Documentation/power/drivers-testing.txt +++ b/Documentation/power/drivers-testing.txt | |||
@@ -6,9 +6,9 @@ Testing suspend and resume support in device drivers | |||
6 | Unfortunately, to effectively test the support for the system-wide suspend and | 6 | Unfortunately, to effectively test the support for the system-wide suspend and |
7 | resume transitions in a driver, it is necessary to suspend and resume a fully | 7 | resume transitions in a driver, it is necessary to suspend and resume a fully |
8 | functional system with this driver loaded. Moreover, that should be done | 8 | functional system with this driver loaded. Moreover, that should be done |
9 | several times, preferably several times in a row, and separately for the suspend | 9 | several times, preferably several times in a row, and separately for hibernation |
10 | to disk (STD) and the suspend to RAM (STR) transitions, because each of these | 10 | (aka suspend to disk or STD) and suspend to RAM (STR), because each of these |
11 | cases involves different ordering of operations and different interactions with | 11 | cases involves slightly different operations and different interactions with |
12 | the machine's BIOS. | 12 | the machine's BIOS. |
13 | 13 | ||
14 | Of course, for this purpose the test system has to be known to suspend and | 14 | Of course, for this purpose the test system has to be known to suspend and |
@@ -22,20 +22,24 @@ for more information about the debugging of suspend/resume functionality. | |||
22 | Once you have resolved the suspend/resume-related problems with your test system | 22 | Once you have resolved the suspend/resume-related problems with your test system |
23 | without the new driver, you are ready to test it: | 23 | without the new driver, you are ready to test it: |
24 | 24 | ||
25 | a) Build the driver as a module, load it and try the STD in the test mode (see: | 25 | a) Build the driver as a module, load it and try the test modes of hibernation |
26 | Documents/power/basic-pm-debugging.txt, 1a)). | 26 | (see: Documents/power/basic-pm-debugging.txt, 1). |
27 | 27 | ||
28 | b) Load the driver and attempt to suspend to disk in the "reboot", "shutdown" | 28 | b) Load the driver and attempt to hibernate in the "reboot", "shutdown" and |
29 | and "platform" modes (see: Documents/power/basic-pm-debugging.txt, 1). | 29 | "platform" modes (see: Documents/power/basic-pm-debugging.txt, 1). |
30 | 30 | ||
31 | c) Compile the driver directly into the kernel and try the STD in the test mode. | 31 | c) Compile the driver directly into the kernel and try the test modes of |
32 | hibernation. | ||
32 | 33 | ||
33 | d) Attempt to suspend to disk with the driver compiled directly into the kernel | 34 | d) Attempt to hibernate with the driver compiled directly into the kernel |
34 | in the "reboot", "shutdown" and "platform" modes. | 35 | in the "reboot", "shutdown" and "platform" modes. |
35 | 36 | ||
36 | e) Attempt to suspend to RAM using the s2ram tool with the driver loaded (see: | 37 | e) Try the test modes of suspend (see: Documents/power/basic-pm-debugging.txt, |
37 | Documents/power/basic-pm-debugging.txt, 2). As far as the STR tests are | 38 | 2). [As far as the STR tests are concerned, it should not matter whether or |
38 | concerned, it should not matter whether or not the driver is built as a module. | 39 | not the driver is built as a module.] |
40 | |||
41 | f) Attempt to suspend to RAM using the s2ram tool with the driver loaded | ||
42 | (see: Documents/power/basic-pm-debugging.txt, 2). | ||
39 | 43 | ||
40 | Each of the above tests should be repeated several times and the STD tests | 44 | Each of the above tests should be repeated several times and the STD tests |
41 | should be mixed with the STR tests. If any of them fails, the driver cannot be | 45 | should be mixed with the STR tests. If any of them fails, the driver cannot be |
diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt index 9293e4bc857c..ae1b7ec07684 100644 --- a/Documentation/power/notifiers.txt +++ b/Documentation/power/notifiers.txt | |||
@@ -28,6 +28,14 @@ PM_POST_HIBERNATION The system memory state has been restored from a | |||
28 | hibernation. Device drivers' .resume() callbacks have | 28 | hibernation. Device drivers' .resume() callbacks have |
29 | been executed and tasks have been thawed. | 29 | been executed and tasks have been thawed. |
30 | 30 | ||
31 | PM_RESTORE_PREPARE The system is going to restore a hibernation image. | ||
32 | If all goes well the restored kernel will issue a | ||
33 | PM_POST_HIBERNATION notification. | ||
34 | |||
35 | PM_POST_RESTORE An error occurred during the hibernation restore. | ||
36 | Device drivers' .resume() callbacks have been executed | ||
37 | and tasks have been thawed. | ||
38 | |||
31 | PM_SUSPEND_PREPARE The system is preparing for a suspend. | 39 | PM_SUSPEND_PREPARE The system is preparing for a suspend. |
32 | 40 | ||
33 | PM_POST_SUSPEND The system has just resumed or an error occured during | 41 | PM_POST_SUSPEND The system has just resumed or an error occured during |
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index aea7e9209667..9d60ab717a7b 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt | |||
@@ -386,6 +386,11 @@ before suspending; then remount them after resuming. | |||
386 | There is a work-around for this problem. For more information, see | 386 | There is a work-around for this problem. For more information, see |
387 | Documentation/usb/persist.txt. | 387 | Documentation/usb/persist.txt. |
388 | 388 | ||
389 | Q: Can I suspend-to-disk using a swap partition under LVM? | ||
390 | |||
391 | A: No. You can suspend successfully, but you'll not be able to | ||
392 | resume. uswsusp should be able to work with LVM. See suspend.sf.net. | ||
393 | |||
389 | Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were | 394 | Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were |
390 | compiled with the similar configuration files. Anyway I found that | 395 | compiled with the similar configuration files. Anyway I found that |
391 | suspend to disk (and resume) is much slower on 2.6.16 compared to | 396 | suspend to disk (and resume) is much slower on 2.6.16 compared to |
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt index e00c6cf09e85..7b99636564c8 100644 --- a/Documentation/power/userland-swsusp.txt +++ b/Documentation/power/userland-swsusp.txt | |||
@@ -14,7 +14,7 @@ are going to develop your own suspend/resume utilities. | |||
14 | 14 | ||
15 | The interface consists of a character device providing the open(), | 15 | The interface consists of a character device providing the open(), |
16 | release(), read(), and write() operations as well as several ioctl() | 16 | release(), read(), and write() operations as well as several ioctl() |
17 | commands defined in kernel/power/power.h. The major and minor | 17 | commands defined in include/linux/suspend_ioctls.h . The major and minor |
18 | numbers of the device are, respectively, 10 and 231, and they can | 18 | numbers of the device are, respectively, 10 and 231, and they can |
19 | be read from /sys/class/misc/snapshot/dev. | 19 | be read from /sys/class/misc/snapshot/dev. |
20 | 20 | ||
@@ -27,17 +27,17 @@ once at a time. | |||
27 | The ioctl() commands recognized by the device are: | 27 | The ioctl() commands recognized by the device are: |
28 | 28 | ||
29 | SNAPSHOT_FREEZE - freeze user space processes (the current process is | 29 | SNAPSHOT_FREEZE - freeze user space processes (the current process is |
30 | not frozen); this is required for SNAPSHOT_ATOMIC_SNAPSHOT | 30 | not frozen); this is required for SNAPSHOT_CREATE_IMAGE |
31 | and SNAPSHOT_ATOMIC_RESTORE to succeed | 31 | and SNAPSHOT_ATOMIC_RESTORE to succeed |
32 | 32 | ||
33 | SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE | 33 | SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE |
34 | 34 | ||
35 | SNAPSHOT_ATOMIC_SNAPSHOT - create a snapshot of the system memory; the | 35 | SNAPSHOT_CREATE_IMAGE - create a snapshot of the system memory; the |
36 | last argument of ioctl() should be a pointer to an int variable, | 36 | last argument of ioctl() should be a pointer to an int variable, |
37 | the value of which will indicate whether the call returned after | 37 | the value of which will indicate whether the call returned after |
38 | creating the snapshot (1) or after restoring the system memory state | 38 | creating the snapshot (1) or after restoring the system memory state |
39 | from it (0) (after resume the system finds itself finishing the | 39 | from it (0) (after resume the system finds itself finishing the |
40 | SNAPSHOT_ATOMIC_SNAPSHOT ioctl() again); after the snapshot | 40 | SNAPSHOT_CREATE_IMAGE ioctl() again); after the snapshot |
41 | has been created the read() operation can be used to transfer | 41 | has been created the read() operation can be used to transfer |
42 | it out of the kernel | 42 | it out of the kernel |
43 | 43 | ||
@@ -49,39 +49,37 @@ SNAPSHOT_ATOMIC_RESTORE - restore the system memory state from the | |||
49 | 49 | ||
50 | SNAPSHOT_FREE - free memory allocated for the snapshot image | 50 | SNAPSHOT_FREE - free memory allocated for the snapshot image |
51 | 51 | ||
52 | SNAPSHOT_SET_IMAGE_SIZE - set the preferred maximum size of the image | 52 | SNAPSHOT_PREF_IMAGE_SIZE - set the preferred maximum size of the image |
53 | (the kernel will do its best to ensure the image size will not exceed | 53 | (the kernel will do its best to ensure the image size will not exceed |
54 | this number, but if it turns out to be impossible, the kernel will | 54 | this number, but if it turns out to be impossible, the kernel will |
55 | create the smallest image possible) | 55 | create the smallest image possible) |
56 | 56 | ||
57 | SNAPSHOT_AVAIL_SWAP - return the amount of available swap in bytes (the last | 57 | SNAPSHOT_GET_IMAGE_SIZE - return the actual size of the hibernation image |
58 | argument should be a pointer to an unsigned int variable that will | 58 | |
59 | SNAPSHOT_AVAIL_SWAP_SIZE - return the amount of available swap in bytes (the | ||
60 | last argument should be a pointer to an unsigned int variable that will | ||
59 | contain the result if the call is successful). | 61 | contain the result if the call is successful). |
60 | 62 | ||
61 | SNAPSHOT_GET_SWAP_PAGE - allocate a swap page from the resume partition | 63 | SNAPSHOT_ALLOC_SWAP_PAGE - allocate a swap page from the resume partition |
62 | (the last argument should be a pointer to a loff_t variable that | 64 | (the last argument should be a pointer to a loff_t variable that |
63 | will contain the swap page offset if the call is successful) | 65 | will contain the swap page offset if the call is successful) |
64 | 66 | ||
65 | SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated with | 67 | SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated by |
66 | SNAPSHOT_GET_SWAP_PAGE | 68 | SNAPSHOT_ALLOC_SWAP_PAGE |
67 | |||
68 | SNAPSHOT_SET_SWAP_FILE - set the resume partition (the last ioctl() argument | ||
69 | should specify the device's major and minor numbers in the old | ||
70 | two-byte format, as returned by the stat() function in the .st_rdev | ||
71 | member of the stat structure) | ||
72 | 69 | ||
73 | SNAPSHOT_SET_SWAP_AREA - set the resume partition and the offset (in <PAGE_SIZE> | 70 | SNAPSHOT_SET_SWAP_AREA - set the resume partition and the offset (in <PAGE_SIZE> |
74 | units) from the beginning of the partition at which the swap header is | 71 | units) from the beginning of the partition at which the swap header is |
75 | located (the last ioctl() argument should point to a struct | 72 | located (the last ioctl() argument should point to a struct |
76 | resume_swap_area, as defined in kernel/power/power.h, containing the | 73 | resume_swap_area, as defined in kernel/power/suspend_ioctls.h, |
77 | resume device specification, as for the SNAPSHOT_SET_SWAP_FILE ioctl(), | 74 | containing the resume device specification and the offset); for swap |
78 | and the offset); for swap partitions the offset is always 0, but it is | 75 | partitions the offset is always 0, but it is different from zero for |
79 | different to zero for swap files (please see | 76 | swap files (see Documentation/swsusp-and-swap-files.txt for details). |
80 | Documentation/swsusp-and-swap-files.txt for details). | 77 | |
81 | The SNAPSHOT_SET_SWAP_AREA ioctl() is considered as a replacement for | 78 | SNAPSHOT_PLATFORM_SUPPORT - enable/disable the hibernation platform support, |
82 | SNAPSHOT_SET_SWAP_FILE which is regarded as obsolete. It is | 79 | depending on the argument value (enable, if the argument is nonzero) |
83 | recommended to always use this call, because the code to set the resume | 80 | |
84 | partition may be removed from future kernels | 81 | SNAPSHOT_POWER_OFF - make the kernel transition the system to the hibernation |
82 | state (eg. ACPI S4) using the platform (eg. ACPI) driver | ||
85 | 83 | ||
86 | SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to | 84 | SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to |
87 | immediately enter the suspend-to-RAM state, so this call must always | 85 | immediately enter the suspend-to-RAM state, so this call must always |
@@ -93,24 +91,6 @@ SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to | |||
93 | to resume the system from RAM if there's enough battery power or restore | 91 | to resume the system from RAM if there's enough battery power or restore |
94 | its state on the basis of the saved suspend image otherwise) | 92 | its state on the basis of the saved suspend image otherwise) |
95 | 93 | ||
96 | SNAPSHOT_PMOPS - enable the usage of the hibernation_ops->prepare, | ||
97 | hibernate_ops->enter and hibernation_ops->finish methods (the in-kernel | ||
98 | swsusp knows these as the "platform method") which are needed on many | ||
99 | machines to (among others) speed up the resume by letting the BIOS skip | ||
100 | some steps or to let the system recognise the correct state of the | ||
101 | hardware after the resume (in particular on many machines this ensures | ||
102 | that unplugged AC adapters get correctly detected and that kacpid does | ||
103 | not run wild after the resume). The last ioctl() argument can take one | ||
104 | of the three values, defined in kernel/power/power.h: | ||
105 | PMOPS_PREPARE - make the kernel carry out the | ||
106 | hibernation_ops->prepare() operation | ||
107 | PMOPS_ENTER - make the kernel power off the system by calling | ||
108 | hibernation_ops->enter() | ||
109 | PMOPS_FINISH - make the kernel carry out the | ||
110 | hibernation_ops->finish() operation | ||
111 | Note that the actual constants are misnamed because they surface | ||
112 | internal kernel implementation details that have changed. | ||
113 | |||
114 | The device's read() operation can be used to transfer the snapshot image from | 94 | The device's read() operation can be used to transfer the snapshot image from |
115 | the kernel. It has the following limitations: | 95 | the kernel. It has the following limitations: |
116 | - you cannot read() more than one virtual memory page at a time | 96 | - you cannot read() more than one virtual memory page at a time |
@@ -122,7 +102,7 @@ The device's write() operation is used for uploading the system memory snapshot | |||
122 | into the kernel. It has the same limitations as the read() operation. | 102 | into the kernel. It has the same limitations as the read() operation. |
123 | 103 | ||
124 | The release() operation frees all memory allocated for the snapshot image | 104 | The release() operation frees all memory allocated for the snapshot image |
125 | and all swap pages allocated with SNAPSHOT_GET_SWAP_PAGE (if any). | 105 | and all swap pages allocated with SNAPSHOT_ALLOC_SWAP_PAGE (if any). |
126 | Thus it is not necessary to use either SNAPSHOT_FREE or | 106 | Thus it is not necessary to use either SNAPSHOT_FREE or |
127 | SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also | 107 | SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also |
128 | unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are | 108 | unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are |
@@ -133,16 +113,12 @@ snapshot image from/to the kernel will use a swap parition, called the resume | |||
133 | partition, or a swap file as storage space (if a swap file is used, the resume | 113 | partition, or a swap file as storage space (if a swap file is used, the resume |
134 | partition is the partition that holds this file). However, this is not really | 114 | partition is the partition that holds this file). However, this is not really |
135 | required, as they can use, for example, a special (blank) suspend partition or | 115 | required, as they can use, for example, a special (blank) suspend partition or |
136 | a file on a partition that is unmounted before SNAPSHOT_ATOMIC_SNAPSHOT and | 116 | a file on a partition that is unmounted before SNAPSHOT_CREATE_IMAGE and |
137 | mounted afterwards. | 117 | mounted afterwards. |
138 | 118 | ||
139 | These utilities SHOULD NOT make any assumptions regarding the ordering of | 119 | These utilities MUST NOT make any assumptions regarding the ordering of |
140 | data within the snapshot image, except for the image header that MAY be | 120 | data within the snapshot image. The contents of the image are entirely owned |
141 | assumed to start with an swsusp_info structure, as specified in | 121 | by the kernel and its structure may be changed in future kernel releases. |
142 | kernel/power/power.h. This structure MAY be used by the userland utilities | ||
143 | to obtain some information about the snapshot image, such as the size | ||
144 | of the snapshot image, including the metadata and the header itself, | ||
145 | contained in the .size member of swsusp_info. | ||
146 | 122 | ||
147 | The snapshot image MUST be written to the kernel unaltered (ie. all of the image | 123 | The snapshot image MUST be written to the kernel unaltered (ie. all of the image |
148 | data, metadata and header MUST be written in _exactly_ the same amount, form | 124 | data, metadata and header MUST be written in _exactly_ the same amount, form |
@@ -159,7 +135,7 @@ means, such as checksums, to ensure the integrity of the snapshot image. | |||
159 | The suspending and resuming utilities MUST lock themselves in memory, | 135 | The suspending and resuming utilities MUST lock themselves in memory, |
160 | preferrably using mlockall(), before calling SNAPSHOT_FREEZE. | 136 | preferrably using mlockall(), before calling SNAPSHOT_FREEZE. |
161 | 137 | ||
162 | The suspending utility MUST check the value stored by SNAPSHOT_ATOMIC_SNAPSHOT | 138 | The suspending utility MUST check the value stored by SNAPSHOT_CREATE_IMAGE |
163 | in the memory location pointed to by the last argument of ioctl() and proceed | 139 | in the memory location pointed to by the last argument of ioctl() and proceed |
164 | in accordance with it: | 140 | in accordance with it: |
165 | 1. If the value is 1 (ie. the system memory snapshot has just been | 141 | 1. If the value is 1 (ie. the system memory snapshot has just been |
@@ -173,7 +149,7 @@ in accordance with it: | |||
173 | image has been saved. | 149 | image has been saved. |
174 | (b) The suspending utility SHOULD NOT attempt to perform any | 150 | (b) The suspending utility SHOULD NOT attempt to perform any |
175 | file system operations (including reads) on the file systems | 151 | file system operations (including reads) on the file systems |
176 | that were mounted before SNAPSHOT_ATOMIC_SNAPSHOT has been | 152 | that were mounted before SNAPSHOT_CREATE_IMAGE has been |
177 | called. However, it MAY mount a file system that was not | 153 | called. However, it MAY mount a file system that was not |
178 | mounted at that time and perform some operations on it (eg. | 154 | mounted at that time and perform some operations on it (eg. |
179 | use it for saving the image). | 155 | use it for saving the image). |
diff --git a/Documentation/power_supply_class.txt b/Documentation/power_supply_class.txt index 9758cf433c06..a8686e5a6857 100644 --- a/Documentation/power_supply_class.txt +++ b/Documentation/power_supply_class.txt | |||
@@ -87,6 +87,10 @@ batteries use voltage for very approximated calculation of capacity. | |||
87 | Battery driver also can use this attribute just to inform userspace | 87 | Battery driver also can use this attribute just to inform userspace |
88 | about maximal and minimal voltage thresholds of a given battery. | 88 | about maximal and minimal voltage thresholds of a given battery. |
89 | 89 | ||
90 | VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that | ||
91 | these ones should be used if hardware could only guess (measure and | ||
92 | retain) the thresholds of a given power supply. | ||
93 | |||
90 | CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when | 94 | CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when |
91 | battery considered full/empty. | 95 | battery considered full/empty. |
92 | 96 | ||
@@ -100,8 +104,6 @@ age)". I.e. these attributes represents real thresholds, not design values. | |||
100 | ENERGY_FULL, ENERGY_EMPTY - same as above but for energy. | 104 | ENERGY_FULL, ENERGY_EMPTY - same as above but for energy. |
101 | 105 | ||
102 | CAPACITY - capacity in percents. | 106 | CAPACITY - capacity in percents. |
103 | CAPACITY_LEVEL - capacity level. This corresponds to | ||
104 | POWER_SUPPLY_CAPACITY_LEVEL_*. | ||
105 | 107 | ||
106 | TEMP - temperature of the power supply. | 108 | TEMP - temperature of the power supply. |
107 | TEMP_AMBIENT - ambient temperature. | 109 | TEMP_AMBIENT - ambient temperature. |
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt index e20b19c1b60d..8deffcd68cb8 100644 --- a/Documentation/rtc.txt +++ b/Documentation/rtc.txt | |||
@@ -182,8 +182,8 @@ driver returns ENOIOCTLCMD. Some common examples: | |||
182 | since the frequency is stored in the irq_freq member of the rtc_device | 182 | since the frequency is stored in the irq_freq member of the rtc_device |
183 | structure. Your driver needs to initialize the irq_freq member during | 183 | structure. Your driver needs to initialize the irq_freq member during |
184 | init. Make sure you check the requested frequency is in range of your | 184 | init. Make sure you check the requested frequency is in range of your |
185 | hardware in the irq_set_freq function. If you cannot actually change | 185 | hardware in the irq_set_freq function. If it isn't, return -EINVAL. If |
186 | the frequency, just return -ENOTTY. | 186 | you cannot actually change the frequency, do not define irq_set_freq. |
187 | 187 | ||
188 | If all else fails, check out the rtc-test.c driver! | 188 | If all else fails, check out the rtc-test.c driver! |
189 | 189 | ||
@@ -268,8 +268,8 @@ int main(int argc, char **argv) | |||
268 | /* This read will block */ | 268 | /* This read will block */ |
269 | retval = read(fd, &data, sizeof(unsigned long)); | 269 | retval = read(fd, &data, sizeof(unsigned long)); |
270 | if (retval == -1) { | 270 | if (retval == -1) { |
271 | perror("read"); | 271 | perror("read"); |
272 | exit(errno); | 272 | exit(errno); |
273 | } | 273 | } |
274 | fprintf(stderr, " %d",i); | 274 | fprintf(stderr, " %d",i); |
275 | fflush(stderr); | 275 | fflush(stderr); |
@@ -326,11 +326,11 @@ test_READ: | |||
326 | rtc_tm.tm_sec %= 60; | 326 | rtc_tm.tm_sec %= 60; |
327 | rtc_tm.tm_min++; | 327 | rtc_tm.tm_min++; |
328 | } | 328 | } |
329 | if (rtc_tm.tm_min == 60) { | 329 | if (rtc_tm.tm_min == 60) { |
330 | rtc_tm.tm_min = 0; | 330 | rtc_tm.tm_min = 0; |
331 | rtc_tm.tm_hour++; | 331 | rtc_tm.tm_hour++; |
332 | } | 332 | } |
333 | if (rtc_tm.tm_hour == 24) | 333 | if (rtc_tm.tm_hour == 24) |
334 | rtc_tm.tm_hour = 0; | 334 | rtc_tm.tm_hour = 0; |
335 | 335 | ||
336 | retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); | 336 | retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); |
@@ -407,8 +407,8 @@ test_PIE: | |||
407 | "\n...Periodic IRQ rate is fixed\n"); | 407 | "\n...Periodic IRQ rate is fixed\n"); |
408 | goto done; | 408 | goto done; |
409 | } | 409 | } |
410 | perror("RTC_IRQP_SET ioctl"); | 410 | perror("RTC_IRQP_SET ioctl"); |
411 | exit(errno); | 411 | exit(errno); |
412 | } | 412 | } |
413 | 413 | ||
414 | fprintf(stderr, "\n%ldHz:\t", tmp); | 414 | fprintf(stderr, "\n%ldHz:\t", tmp); |
@@ -417,27 +417,27 @@ test_PIE: | |||
417 | /* Enable periodic interrupts */ | 417 | /* Enable periodic interrupts */ |
418 | retval = ioctl(fd, RTC_PIE_ON, 0); | 418 | retval = ioctl(fd, RTC_PIE_ON, 0); |
419 | if (retval == -1) { | 419 | if (retval == -1) { |
420 | perror("RTC_PIE_ON ioctl"); | 420 | perror("RTC_PIE_ON ioctl"); |
421 | exit(errno); | 421 | exit(errno); |
422 | } | 422 | } |
423 | 423 | ||
424 | for (i=1; i<21; i++) { | 424 | for (i=1; i<21; i++) { |
425 | /* This blocks */ | 425 | /* This blocks */ |
426 | retval = read(fd, &data, sizeof(unsigned long)); | 426 | retval = read(fd, &data, sizeof(unsigned long)); |
427 | if (retval == -1) { | 427 | if (retval == -1) { |
428 | perror("read"); | 428 | perror("read"); |
429 | exit(errno); | 429 | exit(errno); |
430 | } | 430 | } |
431 | fprintf(stderr, " %d",i); | 431 | fprintf(stderr, " %d",i); |
432 | fflush(stderr); | 432 | fflush(stderr); |
433 | irqcount++; | 433 | irqcount++; |
434 | } | 434 | } |
435 | 435 | ||
436 | /* Disable periodic interrupts */ | 436 | /* Disable periodic interrupts */ |
437 | retval = ioctl(fd, RTC_PIE_OFF, 0); | 437 | retval = ioctl(fd, RTC_PIE_OFF, 0); |
438 | if (retval == -1) { | 438 | if (retval == -1) { |
439 | perror("RTC_PIE_OFF ioctl"); | 439 | perror("RTC_PIE_OFF ioctl"); |
440 | exit(errno); | 440 | exit(errno); |
441 | } | 441 | } |
442 | } | 442 | } |
443 | 443 | ||
diff --git a/Documentation/smp.txt b/Documentation/smp.txt deleted file mode 100644 index 82fc50b6305d..000000000000 --- a/Documentation/smp.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | To set up SMP | ||
2 | |||
3 | Configure the kernel and answer Y to CONFIG_SMP. | ||
4 | |||
5 | If you are using LILO, it is handy to have both SMP and non-SMP | ||
6 | kernel images on hand. Edit /etc/lilo.conf to create an entry | ||
7 | for another kernel image called "linux-smp" or something. | ||
8 | |||
9 | The next time you compile the kernel, when running a SMP kernel, | ||
10 | edit linux/Makefile and change "MAKE=make" to "MAKE=make -jN" | ||
11 | (where N = number of CPU + 1, or if you have tons of memory/swap | ||
12 | you can just use "-j" without a number). Feel free to experiment | ||
13 | with this one. | ||
14 | |||
15 | Of course you should time how long each build takes :-) | ||
16 | Example: | ||
17 | make config | ||
18 | time -v sh -c 'make clean install modules modules_install' | ||
19 | |||
20 | If you are using some Compaq MP compliant machines you will need to set | ||
21 | the operating system in the BIOS settings to "Unixware" - don't ask me | ||
22 | why Compaqs don't work otherwise. | ||
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index aa986a35e994..f99254327ae5 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt | |||
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs: | |||
23 | - inode-max | 23 | - inode-max |
24 | - inode-nr | 24 | - inode-nr |
25 | - inode-state | 25 | - inode-state |
26 | - nr_open | ||
26 | - overflowuid | 27 | - overflowuid |
27 | - overflowgid | 28 | - overflowgid |
28 | - suid_dumpable | 29 | - suid_dumpable |
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum. | |||
91 | 92 | ||
92 | ============================================================== | 93 | ============================================================== |
93 | 94 | ||
95 | nr_open: | ||
96 | |||
97 | This denotes the maximum number of file-handles a process can | ||
98 | allocate. Default value is 1024*1024 (1048576) which should be | ||
99 | enough for most machines. Actual limit depends on RLIMIT_NOFILE | ||
100 | resource limit. | ||
101 | |||
102 | ============================================================== | ||
103 | |||
94 | inode-max, inode-nr & inode-state: | 104 | inode-max, inode-nr & inode-state: |
95 | 105 | ||
96 | As with file handles, the kernel allocates the inode structures | 106 | As with file handles, the kernel allocates the inode structures |
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 6f31f0a247d0..24eac1bc735d 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -22,6 +22,7 @@ Currently, these files are in /proc/sys/vm: | |||
22 | - dirty_background_ratio | 22 | - dirty_background_ratio |
23 | - dirty_expire_centisecs | 23 | - dirty_expire_centisecs |
24 | - dirty_writeback_centisecs | 24 | - dirty_writeback_centisecs |
25 | - highmem_is_dirtyable (only if CONFIG_HIGHMEM set) | ||
25 | - max_map_count | 26 | - max_map_count |
26 | - min_free_kbytes | 27 | - min_free_kbytes |
27 | - laptop_mode | 28 | - laptop_mode |
@@ -40,9 +41,9 @@ Currently, these files are in /proc/sys/vm: | |||
40 | ============================================================== | 41 | ============================================================== |
41 | 42 | ||
42 | dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, | 43 | dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, |
43 | dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, | 44 | dirty_writeback_centisecs, highmem_is_dirtyable, |
44 | block_dump, swap_token_timeout, drop-caches, | 45 | vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, |
45 | hugepages_treat_as_movable: | 46 | drop-caches, hugepages_treat_as_movable: |
46 | 47 | ||
47 | See Documentation/filesystems/proc.txt | 48 | See Documentation/filesystems/proc.txt |
48 | 49 | ||
diff --git a/Documentation/thinkpad-acpi.txt b/Documentation/thinkpad-acpi.txt index 10c041ca13c7..6c2477754a2a 100644 --- a/Documentation/thinkpad-acpi.txt +++ b/Documentation/thinkpad-acpi.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | ThinkPad ACPI Extras Driver | 1 | ThinkPad ACPI Extras Driver |
2 | 2 | ||
3 | Version 0.17 | 3 | Version 0.19 |
4 | October 04th, 2007 | 4 | January 06th, 2008 |
5 | 5 | ||
6 | Borislav Deianov <borislav@users.sf.net> | 6 | Borislav Deianov <borislav@users.sf.net> |
7 | Henrique de Moraes Holschuh <hmh@hmh.eng.br> | 7 | Henrique de Moraes Holschuh <hmh@hmh.eng.br> |
@@ -215,6 +215,11 @@ The following commands can be written to the /proc/acpi/ibm/hotkey file: | |||
215 | ... any other 8-hex-digit mask ... | 215 | ... any other 8-hex-digit mask ... |
216 | echo reset > /proc/acpi/ibm/hotkey -- restore the original mask | 216 | echo reset > /proc/acpi/ibm/hotkey -- restore the original mask |
217 | 217 | ||
218 | The procfs interface does not support NVRAM polling control. So as to | ||
219 | maintain maximum bug-to-bug compatibility, it does not report any masks, | ||
220 | nor does it allow one to manipulate the hot key mask when the firmware | ||
221 | does not support masks at all, even if NVRAM polling is in use. | ||
222 | |||
218 | sysfs notes: | 223 | sysfs notes: |
219 | 224 | ||
220 | hotkey_bios_enabled: | 225 | hotkey_bios_enabled: |
@@ -231,17 +236,26 @@ sysfs notes: | |||
231 | to this value. | 236 | to this value. |
232 | 237 | ||
233 | hotkey_enable: | 238 | hotkey_enable: |
234 | Enables/disables the hot keys feature, and reports | 239 | Enables/disables the hot keys feature in the ACPI |
235 | current status of the hot keys feature. | 240 | firmware, and reports current status of the hot keys |
241 | feature. Has no effect on the NVRAM hot key polling | ||
242 | functionality. | ||
236 | 243 | ||
237 | 0: disables the hot keys feature / feature disabled | 244 | 0: disables the hot keys feature / feature disabled |
238 | 1: enables the hot keys feature / feature enabled | 245 | 1: enables the hot keys feature / feature enabled |
239 | 246 | ||
240 | hotkey_mask: | 247 | hotkey_mask: |
241 | bit mask to enable driver-handling and ACPI event | 248 | bit mask to enable driver-handling (and depending on |
242 | generation for each hot key (see above). Returns the | 249 | the firmware, ACPI event generation) for each hot key |
243 | current status of the hot keys mask, and allows one to | 250 | (see above). Returns the current status of the hot keys |
244 | modify it. | 251 | mask, and allows one to modify it. |
252 | |||
253 | Note: when NVRAM polling is active, the firmware mask | ||
254 | will be different from the value returned by | ||
255 | hotkey_mask. The driver will retain enabled bits for | ||
256 | hotkeys that are under NVRAM polling even if the | ||
257 | firmware refuses them, and will not set these bits on | ||
258 | the firmware hot key mask. | ||
245 | 259 | ||
246 | hotkey_all_mask: | 260 | hotkey_all_mask: |
247 | bit mask that should enable event reporting for all | 261 | bit mask that should enable event reporting for all |
@@ -257,12 +271,48 @@ sysfs notes: | |||
257 | handled by the firmware anyway. Echo it to | 271 | handled by the firmware anyway. Echo it to |
258 | hotkey_mask above, to use. | 272 | hotkey_mask above, to use. |
259 | 273 | ||
274 | hotkey_source_mask: | ||
275 | bit mask that selects which hot keys will the driver | ||
276 | poll the NVRAM for. This is auto-detected by the driver | ||
277 | based on the capabilities reported by the ACPI firmware, | ||
278 | but it can be overridden at runtime. | ||
279 | |||
280 | Hot keys whose bits are set in both hotkey_source_mask | ||
281 | and also on hotkey_mask are polled for in NVRAM. Only a | ||
282 | few hot keys are available through CMOS NVRAM polling. | ||
283 | |||
284 | Warning: when in NVRAM mode, the volume up/down/mute | ||
285 | keys are synthesized according to changes in the mixer, | ||
286 | so you have to use volume up or volume down to unmute, | ||
287 | as per the ThinkPad volume mixer user interface. When | ||
288 | in ACPI event mode, volume up/down/mute are reported as | ||
289 | separate events, but this behaviour may be corrected in | ||
290 | future releases of this driver, in which case the | ||
291 | ThinkPad volume mixer user interface semanthics will be | ||
292 | enforced. | ||
293 | |||
294 | hotkey_poll_freq: | ||
295 | frequency in Hz for hot key polling. It must be between | ||
296 | 0 and 25 Hz. Polling is only carried out when strictly | ||
297 | needed. | ||
298 | |||
299 | Setting hotkey_poll_freq to zero disables polling, and | ||
300 | will cause hot key presses that require NVRAM polling | ||
301 | to never be reported. | ||
302 | |||
303 | Setting hotkey_poll_freq too low will cause repeated | ||
304 | pressings of the same hot key to be misreported as a | ||
305 | single key press, or to not even be detected at all. | ||
306 | The recommended polling frequency is 10Hz. | ||
307 | |||
260 | hotkey_radio_sw: | 308 | hotkey_radio_sw: |
261 | if the ThinkPad has a hardware radio switch, this | 309 | if the ThinkPad has a hardware radio switch, this |
262 | attribute will read 0 if the switch is in the "radios | 310 | attribute will read 0 if the switch is in the "radios |
263 | disabled" postition, and 1 if the switch is in the | 311 | disabled" postition, and 1 if the switch is in the |
264 | "radios enabled" position. | 312 | "radios enabled" position. |
265 | 313 | ||
314 | This attribute has poll()/select() support. | ||
315 | |||
266 | hotkey_report_mode: | 316 | hotkey_report_mode: |
267 | Returns the state of the procfs ACPI event report mode | 317 | Returns the state of the procfs ACPI event report mode |
268 | filter for hot keys. If it is set to 1 (the default), | 318 | filter for hot keys. If it is set to 1 (the default), |
@@ -277,6 +327,25 @@ sysfs notes: | |||
277 | May return -EPERM (write access locked out by module | 327 | May return -EPERM (write access locked out by module |
278 | parameter) or -EACCES (read-only). | 328 | parameter) or -EACCES (read-only). |
279 | 329 | ||
330 | wakeup_reason: | ||
331 | Set to 1 if the system is waking up because the user | ||
332 | requested a bay ejection. Set to 2 if the system is | ||
333 | waking up because the user requested the system to | ||
334 | undock. Set to zero for normal wake-ups or wake-ups | ||
335 | due to unknown reasons. | ||
336 | |||
337 | This attribute has poll()/select() support. | ||
338 | |||
339 | wakeup_hotunplug_complete: | ||
340 | Set to 1 if the system was waken up because of an | ||
341 | undock or bay ejection request, and that request | ||
342 | was sucessfully completed. At this point, it might | ||
343 | be useful to send the system back to sleep, at the | ||
344 | user's choice. Refer to HKEY events 0x4003 and | ||
345 | 0x3003, below. | ||
346 | |||
347 | This attribute has poll()/select() support. | ||
348 | |||
280 | input layer notes: | 349 | input layer notes: |
281 | 350 | ||
282 | A Hot key is mapped to a single input layer EV_KEY event, possibly | 351 | A Hot key is mapped to a single input layer EV_KEY event, possibly |
@@ -427,6 +496,23 @@ Non hot-key ACPI HKEY event map: | |||
427 | The above events are not propagated by the driver, except for legacy | 496 | The above events are not propagated by the driver, except for legacy |
428 | compatibility purposes when hotkey_report_mode is set to 1. | 497 | compatibility purposes when hotkey_report_mode is set to 1. |
429 | 498 | ||
499 | 0x2304 System is waking up from suspend to undock | ||
500 | 0x2305 System is waking up from suspend to eject bay | ||
501 | 0x2404 System is waking up from hibernation to undock | ||
502 | 0x2405 System is waking up from hibernation to eject bay | ||
503 | |||
504 | The above events are never propagated by the driver. | ||
505 | |||
506 | 0x3003 Bay ejection (see 0x2x05) complete, can sleep again | ||
507 | 0x4003 Undocked (see 0x2x04), can sleep again | ||
508 | 0x5009 Tablet swivel: switched to tablet mode | ||
509 | 0x500A Tablet swivel: switched to normal mode | ||
510 | 0x500B Tablet pen insterted into its storage bay | ||
511 | 0x500C Tablet pen removed from its storage bay | ||
512 | 0x5010 Brightness level changed (newer Lenovo BIOSes) | ||
513 | |||
514 | The above events are propagated by the driver. | ||
515 | |||
430 | Compatibility notes: | 516 | Compatibility notes: |
431 | 517 | ||
432 | ibm-acpi and thinkpad-acpi 0.15 (mainline kernels before 2.6.23) never | 518 | ibm-acpi and thinkpad-acpi 0.15 (mainline kernels before 2.6.23) never |
@@ -1263,3 +1349,17 @@ Sysfs interface changelog: | |||
1263 | and the hwmon class for libsensors4 (lm-sensors 3) | 1349 | and the hwmon class for libsensors4 (lm-sensors 3) |
1264 | compatibility. Moved all hwmon attributes to this | 1350 | compatibility. Moved all hwmon attributes to this |
1265 | new platform device. | 1351 | new platform device. |
1352 | |||
1353 | 0x020100: Marker for thinkpad-acpi with hot key NVRAM polling | ||
1354 | support. If you must, use it to know you should not | ||
1355 | start an userspace NVRAM poller (allows to detect when | ||
1356 | NVRAM is compiled out by the user because it is | ||
1357 | unneeded/undesired in the first place). | ||
1358 | 0x020101: Marker for thinkpad-acpi with hot key NVRAM polling | ||
1359 | and proper hotkey_mask semanthics (version 8 of the | ||
1360 | NVRAM polling patch). Some development snapshots of | ||
1361 | 0.18 had an earlier version that did strange things | ||
1362 | to hotkey_mask. | ||
1363 | |||
1364 | 0x020200: Add poll()/select() support to the following attributes: | ||
1365 | hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason | ||
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt new file mode 100644 index 000000000000..6223eace3c09 --- /dev/null +++ b/Documentation/unaligned-memory-access.txt | |||
@@ -0,0 +1,226 @@ | |||
1 | UNALIGNED MEMORY ACCESSES | ||
2 | ========================= | ||
3 | |||
4 | Linux runs on a wide variety of architectures which have varying behaviour | ||
5 | when it comes to memory access. This document presents some details about | ||
6 | unaligned accesses, why you need to write code that doesn't cause them, | ||
7 | and how to write such code! | ||
8 | |||
9 | |||
10 | The definition of an unaligned access | ||
11 | ===================================== | ||
12 | |||
13 | Unaligned memory accesses occur when you try to read N bytes of data starting | ||
14 | from an address that is not evenly divisible by N (i.e. addr % N != 0). | ||
15 | For example, reading 4 bytes of data from address 0x10004 is fine, but | ||
16 | reading 4 bytes of data from address 0x10005 would be an unaligned memory | ||
17 | access. | ||
18 | |||
19 | The above may seem a little vague, as memory access can happen in different | ||
20 | ways. The context here is at the machine code level: certain instructions read | ||
21 | or write a number of bytes to or from memory (e.g. movb, movw, movl in x86 | ||
22 | assembly). As will become clear, it is relatively easy to spot C statements | ||
23 | which will compile to multiple-byte memory access instructions, namely when | ||
24 | dealing with types such as u16, u32 and u64. | ||
25 | |||
26 | |||
27 | Natural alignment | ||
28 | ================= | ||
29 | |||
30 | The rule mentioned above forms what we refer to as natural alignment: | ||
31 | When accessing N bytes of memory, the base memory address must be evenly | ||
32 | divisible by N, i.e. addr % N == 0. | ||
33 | |||
34 | When writing code, assume the target architecture has natural alignment | ||
35 | requirements. | ||
36 | |||
37 | In reality, only a few architectures require natural alignment on all sizes | ||
38 | of memory access. However, we must consider ALL supported architectures; | ||
39 | writing code that satisfies natural alignment requirements is the easiest way | ||
40 | to achieve full portability. | ||
41 | |||
42 | |||
43 | Why unaligned access is bad | ||
44 | =========================== | ||
45 | |||
46 | The effects of performing an unaligned memory access vary from architecture | ||
47 | to architecture. It would be easy to write a whole document on the differences | ||
48 | here; a summary of the common scenarios is presented below: | ||
49 | |||
50 | - Some architectures are able to perform unaligned memory accesses | ||
51 | transparently, but there is usually a significant performance cost. | ||
52 | - Some architectures raise processor exceptions when unaligned accesses | ||
53 | happen. The exception handler is able to correct the unaligned access, | ||
54 | at significant cost to performance. | ||
55 | - Some architectures raise processor exceptions when unaligned accesses | ||
56 | happen, but the exceptions do not contain enough information for the | ||
57 | unaligned access to be corrected. | ||
58 | - Some architectures are not capable of unaligned memory access, but will | ||
59 | silently perform a different memory access to the one that was requested, | ||
60 | resulting a a subtle code bug that is hard to detect! | ||
61 | |||
62 | It should be obvious from the above that if your code causes unaligned | ||
63 | memory accesses to happen, your code will not work correctly on certain | ||
64 | platforms and will cause performance problems on others. | ||
65 | |||
66 | |||
67 | Code that does not cause unaligned access | ||
68 | ========================================= | ||
69 | |||
70 | At first, the concepts above may seem a little hard to relate to actual | ||
71 | coding practice. After all, you don't have a great deal of control over | ||
72 | memory addresses of certain variables, etc. | ||
73 | |||
74 | Fortunately things are not too complex, as in most cases, the compiler | ||
75 | ensures that things will work for you. For example, take the following | ||
76 | structure: | ||
77 | |||
78 | struct foo { | ||
79 | u16 field1; | ||
80 | u32 field2; | ||
81 | u8 field3; | ||
82 | }; | ||
83 | |||
84 | Let us assume that an instance of the above structure resides in memory | ||
85 | starting at address 0x10000. With a basic level of understanding, it would | ||
86 | not be unreasonable to expect that accessing field2 would cause an unaligned | ||
87 | access. You'd be expecting field2 to be located at offset 2 bytes into the | ||
88 | structure, i.e. address 0x10002, but that address is not evenly divisible | ||
89 | by 4 (remember, we're reading a 4 byte value here). | ||
90 | |||
91 | Fortunately, the compiler understands the alignment constraints, so in the | ||
92 | above case it would insert 2 bytes of padding in between field1 and field2. | ||
93 | Therefore, for standard structure types you can always rely on the compiler | ||
94 | to pad structures so that accesses to fields are suitably aligned (assuming | ||
95 | you do not cast the field to a type of different length). | ||
96 | |||
97 | Similarly, you can also rely on the compiler to align variables and function | ||
98 | parameters to a naturally aligned scheme, based on the size of the type of | ||
99 | the variable. | ||
100 | |||
101 | At this point, it should be clear that accessing a single byte (u8 or char) | ||
102 | will never cause an unaligned access, because all memory addresses are evenly | ||
103 | divisible by one. | ||
104 | |||
105 | On a related topic, with the above considerations in mind you may observe | ||
106 | that you could reorder the fields in the structure in order to place fields | ||
107 | where padding would otherwise be inserted, and hence reduce the overall | ||
108 | resident memory size of structure instances. The optimal layout of the | ||
109 | above example is: | ||
110 | |||
111 | struct foo { | ||
112 | u32 field2; | ||
113 | u16 field1; | ||
114 | u8 field3; | ||
115 | }; | ||
116 | |||
117 | For a natural alignment scheme, the compiler would only have to add a single | ||
118 | byte of padding at the end of the structure. This padding is added in order | ||
119 | to satisfy alignment constraints for arrays of these structures. | ||
120 | |||
121 | Another point worth mentioning is the use of __attribute__((packed)) on a | ||
122 | structure type. This GCC-specific attribute tells the compiler never to | ||
123 | insert any padding within structures, useful when you want to use a C struct | ||
124 | to represent some data that comes in a fixed arrangement 'off the wire'. | ||
125 | |||
126 | You might be inclined to believe that usage of this attribute can easily | ||
127 | lead to unaligned accesses when accessing fields that do not satisfy | ||
128 | architectural alignment requirements. However, again, the compiler is aware | ||
129 | of the alignment constraints and will generate extra instructions to perform | ||
130 | the memory access in a way that does not cause unaligned access. Of course, | ||
131 | the extra instructions obviously cause a loss in performance compared to the | ||
132 | non-packed case, so the packed attribute should only be used when avoiding | ||
133 | structure padding is of importance. | ||
134 | |||
135 | |||
136 | Code that causes unaligned access | ||
137 | ================================= | ||
138 | |||
139 | With the above in mind, let's move onto a real life example of a function | ||
140 | that can cause an unaligned memory access. The following function adapted | ||
141 | from include/linux/etherdevice.h is an optimized routine to compare two | ||
142 | ethernet MAC addresses for equality. | ||
143 | |||
144 | unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2) | ||
145 | { | ||
146 | const u16 *a = (const u16 *) addr1; | ||
147 | const u16 *b = (const u16 *) addr2; | ||
148 | return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; | ||
149 | } | ||
150 | |||
151 | In the above function, the reference to a[0] causes 2 bytes (16 bits) to | ||
152 | be read from memory starting at address addr1. Think about what would happen | ||
153 | if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned | ||
154 | access.) | ||
155 | |||
156 | Despite the potential unaligned access problems with the above function, it | ||
157 | is included in the kernel anyway but is understood to only work on | ||
158 | 16-bit-aligned addresses. It is up to the caller to ensure this alignment or | ||
159 | not use this function at all. This alignment-unsafe function is still useful | ||
160 | as it is a decent optimization for the cases when you can ensure alignment, | ||
161 | which is true almost all of the time in ethernet networking context. | ||
162 | |||
163 | |||
164 | Here is another example of some code that could cause unaligned accesses: | ||
165 | void myfunc(u8 *data, u32 value) | ||
166 | { | ||
167 | [...] | ||
168 | *((u32 *) data) = cpu_to_le32(value); | ||
169 | [...] | ||
170 | } | ||
171 | |||
172 | This code will cause unaligned accesses every time the data parameter points | ||
173 | to an address that is not evenly divisible by 4. | ||
174 | |||
175 | In summary, the 2 main scenarios where you may run into unaligned access | ||
176 | problems involve: | ||
177 | 1. Casting variables to types of different lengths | ||
178 | 2. Pointer arithmetic followed by access to at least 2 bytes of data | ||
179 | |||
180 | |||
181 | Avoiding unaligned accesses | ||
182 | =========================== | ||
183 | |||
184 | The easiest way to avoid unaligned access is to use the get_unaligned() and | ||
185 | put_unaligned() macros provided by the <asm/unaligned.h> header file. | ||
186 | |||
187 | Going back to an earlier example of code that potentially causes unaligned | ||
188 | access: | ||
189 | |||
190 | void myfunc(u8 *data, u32 value) | ||
191 | { | ||
192 | [...] | ||
193 | *((u32 *) data) = cpu_to_le32(value); | ||
194 | [...] | ||
195 | } | ||
196 | |||
197 | To avoid the unaligned memory access, you would rewrite it as follows: | ||
198 | |||
199 | void myfunc(u8 *data, u32 value) | ||
200 | { | ||
201 | [...] | ||
202 | value = cpu_to_le32(value); | ||
203 | put_unaligned(value, (u32 *) data); | ||
204 | [...] | ||
205 | } | ||
206 | |||
207 | The get_unaligned() macro works similarly. Assuming 'data' is a pointer to | ||
208 | memory and you wish to avoid unaligned access, its usage is as follows: | ||
209 | |||
210 | u32 value = get_unaligned((u32 *) data); | ||
211 | |||
212 | These macros work work for memory accesses of any length (not just 32 bits as | ||
213 | in the examples above). Be aware that when compared to standard access of | ||
214 | aligned memory, using these macros to access unaligned memory can be costly in | ||
215 | terms of performance. | ||
216 | |||
217 | If use of such macros is not convenient, another option is to use memcpy(), | ||
218 | where the source or destination (or both) are of type u8* or unsigned char*. | ||
219 | Due to the byte-wise nature of this operation, unaligned accesses are avoided. | ||
220 | |||
221 | -- | ||
222 | Author: Daniel Drake <dsd@gentoo.org> | ||
223 | With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt, | ||
224 | Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, | ||
225 | Uli Kunitz, Vadim Lobanov | ||
226 | |||
diff --git a/Documentation/usb/gadget_printer.txt b/Documentation/usb/gadget_printer.txt new file mode 100644 index 000000000000..ad995bf0db41 --- /dev/null +++ b/Documentation/usb/gadget_printer.txt | |||
@@ -0,0 +1,510 @@ | |||
1 | |||
2 | Linux USB Printer Gadget Driver | ||
3 | 06/04/2007 | ||
4 | |||
5 | Copyright (C) 2007 Craig W. Nadler <craig@nadler.us> | ||
6 | |||
7 | |||
8 | |||
9 | GENERAL | ||
10 | ======= | ||
11 | |||
12 | This driver may be used if you are writing printer firmware using Linux as | ||
13 | the embedded OS. This driver has nothing to do with using a printer with | ||
14 | your Linux host system. | ||
15 | |||
16 | You will need a USB device controller and a Linux driver for it that accepts | ||
17 | a gadget / "device class" driver using the Linux USB Gadget API. After the | ||
18 | USB device controller driver is loaded then load the printer gadget driver. | ||
19 | This will present a printer interface to the USB Host that your USB Device | ||
20 | port is connected to. | ||
21 | |||
22 | This driver is structured for printer firmware that runs in user mode. The | ||
23 | user mode printer firmware will read and write data from the kernel mode | ||
24 | printer gadget driver using a device file. The printer returns a printer status | ||
25 | byte when the USB HOST sends a device request to get the printer status. The | ||
26 | user space firmware can read or write this status byte using a device file | ||
27 | /dev/g_printer . Both blocking and non-blocking read/write calls are supported. | ||
28 | |||
29 | |||
30 | |||
31 | |||
32 | HOWTO USE THIS DRIVER | ||
33 | ===================== | ||
34 | |||
35 | To load the USB device controller driver and the printer gadget driver. The | ||
36 | following example uses the Netchip 2280 USB device controller driver: | ||
37 | |||
38 | modprobe net2280 | ||
39 | modprobe g_printer | ||
40 | |||
41 | |||
42 | The follow command line parameter can be used when loading the printer gadget | ||
43 | (ex: modprobe g_printer idVendor=0x0525 idProduct=0xa4a8 ): | ||
44 | |||
45 | idVendor - This is the Vendor ID used in the device descriptor. The default is | ||
46 | the Netchip vendor id 0x0525. YOU MUST CHANGE TO YOUR OWN VENDOR ID | ||
47 | BEFORE RELEASING A PRODUCT. If you plan to release a product and don't | ||
48 | already have a Vendor ID please see www.usb.org for details on how to | ||
49 | get one. | ||
50 | |||
51 | idProduct - This is the Product ID used in the device descriptor. The default | ||
52 | is 0xa4a8, you should change this to an ID that's not used by any of | ||
53 | your other USB products if you have any. It would be a good idea to | ||
54 | start numbering your products starting with say 0x0001. | ||
55 | |||
56 | bcdDevice - This is the version number of your product. It would be a good idea | ||
57 | to put your firmware version here. | ||
58 | |||
59 | iManufacturer - A string containing the name of the Vendor. | ||
60 | |||
61 | iProduct - A string containing the Product Name. | ||
62 | |||
63 | iSerialNum - A string containing the Serial Number. This should be changed for | ||
64 | each unit of your product. | ||
65 | |||
66 | iPNPstring - The PNP ID string used for this printer. You will want to set | ||
67 | either on the command line or hard code the PNP ID string used for | ||
68 | your printer product. | ||
69 | |||
70 | qlen - The number of 8k buffers to use per endpoint. The default is 10, you | ||
71 | should tune this for your product. You may also want to tune the | ||
72 | size of each buffer for your product. | ||
73 | |||
74 | |||
75 | |||
76 | |||
77 | USING THE EXAMPLE CODE | ||
78 | ====================== | ||
79 | |||
80 | This example code talks to stdout, instead of a print engine. | ||
81 | |||
82 | To compile the test code below: | ||
83 | |||
84 | 1) save it to a file called prn_example.c | ||
85 | 2) compile the code with the follow command: | ||
86 | gcc prn_example.c -o prn_example | ||
87 | |||
88 | |||
89 | |||
90 | To read printer data from the host to stdout: | ||
91 | |||
92 | # prn_example -read_data | ||
93 | |||
94 | |||
95 | To write printer data from a file (data_file) to the host: | ||
96 | |||
97 | # cat data_file | prn_example -write_data | ||
98 | |||
99 | |||
100 | To get the current printer status for the gadget driver: | ||
101 | |||
102 | # prn_example -get_status | ||
103 | |||
104 | Printer status is: | ||
105 | Printer is NOT Selected | ||
106 | Paper is Out | ||
107 | Printer OK | ||
108 | |||
109 | |||
110 | To set printer to Selected/On-line: | ||
111 | |||
112 | # prn_example -selected | ||
113 | |||
114 | |||
115 | To set printer to Not Selected/Off-line: | ||
116 | |||
117 | # prn_example -not_selected | ||
118 | |||
119 | |||
120 | To set paper status to paper out: | ||
121 | |||
122 | # prn_example -paper_out | ||
123 | |||
124 | |||
125 | To set paper status to paper loaded: | ||
126 | |||
127 | # prn_example -paper_loaded | ||
128 | |||
129 | |||
130 | To set error status to printer OK: | ||
131 | |||
132 | # prn_example -no_error | ||
133 | |||
134 | |||
135 | To set error status to ERROR: | ||
136 | |||
137 | # prn_example -error | ||
138 | |||
139 | |||
140 | |||
141 | |||
142 | EXAMPLE CODE | ||
143 | ============ | ||
144 | |||
145 | |||
146 | #include <stdio.h> | ||
147 | #include <stdlib.h> | ||
148 | #include <fcntl.h> | ||
149 | #include <linux/poll.h> | ||
150 | #include <sys/ioctl.h> | ||
151 | #include <linux/usb/g_printer.h> | ||
152 | |||
153 | #define PRINTER_FILE "/dev/g_printer" | ||
154 | #define BUF_SIZE 512 | ||
155 | |||
156 | |||
157 | /* | ||
158 | * 'usage()' - Show program usage. | ||
159 | */ | ||
160 | |||
161 | static void | ||
162 | usage(const char *option) /* I - Option string or NULL */ | ||
163 | { | ||
164 | if (option) { | ||
165 | fprintf(stderr,"prn_example: Unknown option \"%s\"!\n", | ||
166 | option); | ||
167 | } | ||
168 | |||
169 | fputs("\n", stderr); | ||
170 | fputs("Usage: prn_example -[options]\n", stderr); | ||
171 | fputs("Options:\n", stderr); | ||
172 | fputs("\n", stderr); | ||
173 | fputs("-get_status Get the current printer status.\n", stderr); | ||
174 | fputs("-selected Set the selected status to selected.\n", stderr); | ||
175 | fputs("-not_selected Set the selected status to NOT selected.\n", | ||
176 | stderr); | ||
177 | fputs("-error Set the error status to error.\n", stderr); | ||
178 | fputs("-no_error Set the error status to NO error.\n", stderr); | ||
179 | fputs("-paper_out Set the paper status to paper out.\n", stderr); | ||
180 | fputs("-paper_loaded Set the paper status to paper loaded.\n", | ||
181 | stderr); | ||
182 | fputs("-read_data Read printer data from driver.\n", stderr); | ||
183 | fputs("-write_data Write printer sata to driver.\n", stderr); | ||
184 | fputs("-NB_read_data (Non-Blocking) Read printer data from driver.\n", | ||
185 | stderr); | ||
186 | fputs("\n\n", stderr); | ||
187 | |||
188 | exit(1); | ||
189 | } | ||
190 | |||
191 | |||
192 | static int | ||
193 | read_printer_data() | ||
194 | { | ||
195 | struct pollfd fd[1]; | ||
196 | |||
197 | /* Open device file for printer gadget. */ | ||
198 | fd[0].fd = open(PRINTER_FILE, O_RDWR); | ||
199 | if (fd[0].fd < 0) { | ||
200 | printf("Error %d opening %s\n", fd[0].fd, PRINTER_FILE); | ||
201 | close(fd[0].fd); | ||
202 | return(-1); | ||
203 | } | ||
204 | |||
205 | fd[0].events = POLLIN | POLLRDNORM; | ||
206 | |||
207 | while (1) { | ||
208 | static char buf[BUF_SIZE]; | ||
209 | int bytes_read; | ||
210 | int retval; | ||
211 | |||
212 | /* Wait for up to 1 second for data. */ | ||
213 | retval = poll(fd, 1, 1000); | ||
214 | |||
215 | if (retval && (fd[0].revents & POLLRDNORM)) { | ||
216 | |||
217 | /* Read data from printer gadget driver. */ | ||
218 | bytes_read = read(fd[0].fd, buf, BUF_SIZE); | ||
219 | |||
220 | if (bytes_read < 0) { | ||
221 | printf("Error %d reading from %s\n", | ||
222 | fd[0].fd, PRINTER_FILE); | ||
223 | close(fd[0].fd); | ||
224 | return(-1); | ||
225 | } else if (bytes_read > 0) { | ||
226 | /* Write data to standard OUTPUT (stdout). */ | ||
227 | fwrite(buf, 1, bytes_read, stdout); | ||
228 | fflush(stdout); | ||
229 | } | ||
230 | |||
231 | } | ||
232 | |||
233 | } | ||
234 | |||
235 | /* Close the device file. */ | ||
236 | close(fd[0].fd); | ||
237 | |||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | |||
242 | static int | ||
243 | write_printer_data() | ||
244 | { | ||
245 | struct pollfd fd[1]; | ||
246 | |||
247 | /* Open device file for printer gadget. */ | ||
248 | fd[0].fd = open (PRINTER_FILE, O_RDWR); | ||
249 | if (fd[0].fd < 0) { | ||
250 | printf("Error %d opening %s\n", fd[0].fd, PRINTER_FILE); | ||
251 | close(fd[0].fd); | ||
252 | return(-1); | ||
253 | } | ||
254 | |||
255 | fd[0].events = POLLOUT | POLLWRNORM; | ||
256 | |||
257 | while (1) { | ||
258 | int retval; | ||
259 | static char buf[BUF_SIZE]; | ||
260 | /* Read data from standard INPUT (stdin). */ | ||
261 | int bytes_read = fread(buf, 1, BUF_SIZE, stdin); | ||
262 | |||
263 | if (!bytes_read) { | ||
264 | break; | ||
265 | } | ||
266 | |||
267 | while (bytes_read) { | ||
268 | |||
269 | /* Wait for up to 1 second to sent data. */ | ||
270 | retval = poll(fd, 1, 1000); | ||
271 | |||
272 | /* Write data to printer gadget driver. */ | ||
273 | if (retval && (fd[0].revents & POLLWRNORM)) { | ||
274 | retval = write(fd[0].fd, buf, bytes_read); | ||
275 | if (retval < 0) { | ||
276 | printf("Error %d writing to %s\n", | ||
277 | fd[0].fd, | ||
278 | PRINTER_FILE); | ||
279 | close(fd[0].fd); | ||
280 | return(-1); | ||
281 | } else { | ||
282 | bytes_read -= retval; | ||
283 | } | ||
284 | |||
285 | } | ||
286 | |||
287 | } | ||
288 | |||
289 | } | ||
290 | |||
291 | /* Wait until the data has been sent. */ | ||
292 | fsync(fd[0].fd); | ||
293 | |||
294 | /* Close the device file. */ | ||
295 | close(fd[0].fd); | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | |||
301 | static int | ||
302 | read_NB_printer_data() | ||
303 | { | ||
304 | int fd; | ||
305 | static char buf[BUF_SIZE]; | ||
306 | int bytes_read; | ||
307 | |||
308 | /* Open device file for printer gadget. */ | ||
309 | fd = open(PRINTER_FILE, O_RDWR|O_NONBLOCK); | ||
310 | if (fd < 0) { | ||
311 | printf("Error %d opening %s\n", fd, PRINTER_FILE); | ||
312 | close(fd); | ||
313 | return(-1); | ||
314 | } | ||
315 | |||
316 | while (1) { | ||
317 | /* Read data from printer gadget driver. */ | ||
318 | bytes_read = read(fd, buf, BUF_SIZE); | ||
319 | if (bytes_read <= 0) { | ||
320 | break; | ||
321 | } | ||
322 | |||
323 | /* Write data to standard OUTPUT (stdout). */ | ||
324 | fwrite(buf, 1, bytes_read, stdout); | ||
325 | fflush(stdout); | ||
326 | } | ||
327 | |||
328 | /* Close the device file. */ | ||
329 | close(fd); | ||
330 | |||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | |||
335 | static int | ||
336 | get_printer_status() | ||
337 | { | ||
338 | int retval; | ||
339 | int fd; | ||
340 | |||
341 | /* Open device file for printer gadget. */ | ||
342 | fd = open(PRINTER_FILE, O_RDWR); | ||
343 | if (fd < 0) { | ||
344 | printf("Error %d opening %s\n", fd, PRINTER_FILE); | ||
345 | close(fd); | ||
346 | return(-1); | ||
347 | } | ||
348 | |||
349 | /* Make the IOCTL call. */ | ||
350 | retval = ioctl(fd, GADGET_GET_PRINTER_STATUS); | ||
351 | if (retval < 0) { | ||
352 | fprintf(stderr, "ERROR: Failed to set printer status\n"); | ||
353 | return(-1); | ||
354 | } | ||
355 | |||
356 | /* Close the device file. */ | ||
357 | close(fd); | ||
358 | |||
359 | return(retval); | ||
360 | } | ||
361 | |||
362 | |||
363 | static int | ||
364 | set_printer_status(unsigned char buf, int clear_printer_status_bit) | ||
365 | { | ||
366 | int retval; | ||
367 | int fd; | ||
368 | |||
369 | retval = get_printer_status(); | ||
370 | if (retval < 0) { | ||
371 | fprintf(stderr, "ERROR: Failed to get printer status\n"); | ||
372 | return(-1); | ||
373 | } | ||
374 | |||
375 | /* Open device file for printer gadget. */ | ||
376 | fd = open(PRINTER_FILE, O_RDWR); | ||
377 | |||
378 | if (fd < 0) { | ||
379 | printf("Error %d opening %s\n", fd, PRINTER_FILE); | ||
380 | close(fd); | ||
381 | return(-1); | ||
382 | } | ||
383 | |||
384 | if (clear_printer_status_bit) { | ||
385 | retval &= ~buf; | ||
386 | } else { | ||
387 | retval |= buf; | ||
388 | } | ||
389 | |||
390 | /* Make the IOCTL call. */ | ||
391 | if (ioctl(fd, GADGET_SET_PRINTER_STATUS, (unsigned char)retval)) { | ||
392 | fprintf(stderr, "ERROR: Failed to set printer status\n"); | ||
393 | return(-1); | ||
394 | } | ||
395 | |||
396 | /* Close the device file. */ | ||
397 | close(fd); | ||
398 | |||
399 | return 0; | ||
400 | } | ||
401 | |||
402 | |||
403 | static int | ||
404 | display_printer_status() | ||
405 | { | ||
406 | char printer_status; | ||
407 | |||
408 | printer_status = get_printer_status(); | ||
409 | if (printer_status < 0) { | ||
410 | fprintf(stderr, "ERROR: Failed to get printer status\n"); | ||
411 | return(-1); | ||
412 | } | ||
413 | |||
414 | printf("Printer status is:\n"); | ||
415 | if (printer_status & PRINTER_SELECTED) { | ||
416 | printf(" Printer is Selected\n"); | ||
417 | } else { | ||
418 | printf(" Printer is NOT Selected\n"); | ||
419 | } | ||
420 | if (printer_status & PRINTER_PAPER_EMPTY) { | ||
421 | printf(" Paper is Out\n"); | ||
422 | } else { | ||
423 | printf(" Paper is Loaded\n"); | ||
424 | } | ||
425 | if (printer_status & PRINTER_NOT_ERROR) { | ||
426 | printf(" Printer OK\n"); | ||
427 | } else { | ||
428 | printf(" Printer ERROR\n"); | ||
429 | } | ||
430 | |||
431 | return(0); | ||
432 | } | ||
433 | |||
434 | |||
435 | int | ||
436 | main(int argc, char *argv[]) | ||
437 | { | ||
438 | int i; /* Looping var */ | ||
439 | int retval = 0; | ||
440 | |||
441 | /* No Args */ | ||
442 | if (argc == 1) { | ||
443 | usage(0); | ||
444 | exit(0); | ||
445 | } | ||
446 | |||
447 | for (i = 1; i < argc && !retval; i ++) { | ||
448 | |||
449 | if (argv[i][0] != '-') { | ||
450 | continue; | ||
451 | } | ||
452 | |||
453 | if (!strcmp(argv[i], "-get_status")) { | ||
454 | if (display_printer_status()) { | ||
455 | retval = 1; | ||
456 | } | ||
457 | |||
458 | } else if (!strcmp(argv[i], "-paper_loaded")) { | ||
459 | if (set_printer_status(PRINTER_PAPER_EMPTY, 1)) { | ||
460 | retval = 1; | ||
461 | } | ||
462 | |||
463 | } else if (!strcmp(argv[i], "-paper_out")) { | ||
464 | if (set_printer_status(PRINTER_PAPER_EMPTY, 0)) { | ||
465 | retval = 1; | ||
466 | } | ||
467 | |||
468 | } else if (!strcmp(argv[i], "-selected")) { | ||
469 | if (set_printer_status(PRINTER_SELECTED, 0)) { | ||
470 | retval = 1; | ||
471 | } | ||
472 | |||
473 | } else if (!strcmp(argv[i], "-not_selected")) { | ||
474 | if (set_printer_status(PRINTER_SELECTED, 1)) { | ||
475 | retval = 1; | ||
476 | } | ||
477 | |||
478 | } else if (!strcmp(argv[i], "-error")) { | ||
479 | if (set_printer_status(PRINTER_NOT_ERROR, 1)) { | ||
480 | retval = 1; | ||
481 | } | ||
482 | |||
483 | } else if (!strcmp(argv[i], "-no_error")) { | ||
484 | if (set_printer_status(PRINTER_NOT_ERROR, 0)) { | ||
485 | retval = 1; | ||
486 | } | ||
487 | |||
488 | } else if (!strcmp(argv[i], "-read_data")) { | ||
489 | if (read_printer_data()) { | ||
490 | retval = 1; | ||
491 | } | ||
492 | |||
493 | } else if (!strcmp(argv[i], "-write_data")) { | ||
494 | if (write_printer_data()) { | ||
495 | retval = 1; | ||
496 | } | ||
497 | |||
498 | } else if (!strcmp(argv[i], "-NB_read_data")) { | ||
499 | if (read_NB_printer_data()) { | ||
500 | retval = 1; | ||
501 | } | ||
502 | |||
503 | } else { | ||
504 | usage(argv[i]); | ||
505 | retval = 1; | ||
506 | } | ||
507 | } | ||
508 | |||
509 | exit(retval); | ||
510 | } | ||
diff --git a/Documentation/usb/iuu_phoenix.txt b/Documentation/usb/iuu_phoenix.txt new file mode 100644 index 000000000000..e5f048067da4 --- /dev/null +++ b/Documentation/usb/iuu_phoenix.txt | |||
@@ -0,0 +1,84 @@ | |||
1 | Infinity Usb Unlimited Readme | ||
2 | ----------------------------- | ||
3 | |||
4 | Hi all, | ||
5 | |||
6 | |||
7 | This module provide a serial interface to use your | ||
8 | IUU unit in phoenix mode. Loading this module will | ||
9 | bring a ttyUSB[0-x] interface. This driver must be | ||
10 | used by your favorite application to pilot the IUU | ||
11 | |||
12 | This driver is still in beta stage, so bugs can | ||
13 | occur and your system may freeze. As far I now, | ||
14 | I never had any problem with it, but I'm not a real | ||
15 | guru, so don't blame me if your system is unstable | ||
16 | |||
17 | You can plug more than one IUU. Every unit will | ||
18 | have his own device file(/dev/ttyUSB0,/dev/ttyUSB1,...) | ||
19 | |||
20 | |||
21 | |||
22 | How to tune the reader speed ? | ||
23 | |||
24 | A few parameters can be used at load time | ||
25 | To use parameters, just unload the module if it is | ||
26 | already loaded and use modprobe iuu_phoenix param=value. | ||
27 | In case of prebuilt module, use the command | ||
28 | insmod iuu_phoenix param=value. | ||
29 | |||
30 | Example: | ||
31 | |||
32 | modprobe iuu_phoenix clockmode=3 | ||
33 | |||
34 | The parameters are: | ||
35 | |||
36 | parm: clockmode:1=3Mhz579,2=3Mhz680,3=6Mhz (int) | ||
37 | parm: boost:overclock boost percent 100 to 500 (int) | ||
38 | parm: cdmode:Card detect mode 0=none, 1=CD, 2=!CD, 3=DSR, 4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING (int) | ||
39 | parm: xmas:xmas color enabled or not (bool) | ||
40 | parm: debug:Debug enabled or not (bool) | ||
41 | |||
42 | - clockmode will provide 3 different base settings commonly adopted by | ||
43 | different software: | ||
44 | 1. 3Mhz579 | ||
45 | 2. 3Mhz680 | ||
46 | 3. 6Mhz | ||
47 | |||
48 | - boost provide a way to overclock the reader ( my favorite :-) ) | ||
49 | For example to have best performance than a simple clockmode=3, try this: | ||
50 | |||
51 | modprobe boost=195 | ||
52 | |||
53 | This will put the reader in a base of 3Mhz579 but boosted a 195 % ! | ||
54 | the real clock will be now : 6979050 Hz ( 6Mhz979 ) and will increase | ||
55 | the speed to a score 10 to 20% better than the simple clockmode=3 !!! | ||
56 | |||
57 | |||
58 | - cdmode permit to setup the signal used to inform the userland ( ioctl answer ) | ||
59 | if the card is present or not. Eight signals are possible. | ||
60 | |||
61 | - xmas is completely useless except for your eyes. This is one of my friend who was | ||
62 | so sad to have a nice device like the iuu without seeing all color range available. | ||
63 | So I have added this option to permit him to see a lot of color ( each activity change the color | ||
64 | and the frequency randomly ) | ||
65 | |||
66 | - debug will produce a lot of debugging messages... | ||
67 | |||
68 | |||
69 | Last notes: | ||
70 | |||
71 | Don't worry about the serial settings, the serial emulation | ||
72 | is an abstraction, so use any speed or parity setting will | ||
73 | work. ( This will not change anything ).Later I will perhaps | ||
74 | use this settings to deduce de boost but is that feature | ||
75 | really necessary ? | ||
76 | The autodetect feature used is the serial CD. If that doesn't | ||
77 | work for your software, disable detection mechanism in it. | ||
78 | |||
79 | |||
80 | Have fun ! | ||
81 | |||
82 | Alain Degreffe | ||
83 | |||
84 | eczema(at)ecze.com | ||
diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX index 752613c4cea2..7b0ceaaad7af 100644 --- a/Documentation/w1/masters/00-INDEX +++ b/Documentation/w1/masters/00-INDEX | |||
@@ -4,3 +4,5 @@ ds2482 | |||
4 | - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses. | 4 | - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses. |
5 | ds2490 | 5 | ds2490 |
6 | - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges. | 6 | - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges. |
7 | w1-gpio | ||
8 | - GPIO 1-wire bus master driver. | ||
diff --git a/Documentation/w1/masters/w1-gpio b/Documentation/w1/masters/w1-gpio new file mode 100644 index 000000000000..af5d3b4aa851 --- /dev/null +++ b/Documentation/w1/masters/w1-gpio | |||
@@ -0,0 +1,33 @@ | |||
1 | Kernel driver w1-gpio | ||
2 | ===================== | ||
3 | |||
4 | Author: Ville Syrjala <syrjala@sci.fi> | ||
5 | |||
6 | |||
7 | Description | ||
8 | ----------- | ||
9 | |||
10 | GPIO 1-wire bus master driver. The driver uses the GPIO API to control the | ||
11 | wire and the GPIO pin can be specified using platform data. | ||
12 | |||
13 | |||
14 | Example (mach-at91) | ||
15 | ------------------- | ||
16 | |||
17 | #include <linux/w1-gpio.h> | ||
18 | |||
19 | static struct w1_gpio_platform_data foo_w1_gpio_pdata = { | ||
20 | .pin = AT91_PIN_PB20, | ||
21 | .is_open_drain = 1, | ||
22 | }; | ||
23 | |||
24 | static struct platform_device foo_w1_device = { | ||
25 | .name = "w1-gpio", | ||
26 | .id = -1, | ||
27 | .dev.platform_data = &foo_w1_gpio_pdata, | ||
28 | }; | ||
29 | |||
30 | ... | ||
31 | at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1); | ||
32 | at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1); | ||
33 | platform_device_register(&foo_w1_device); | ||
diff --git a/Documentation/x86_64/00-INDEX b/Documentation/x86_64/00-INDEX new file mode 100644 index 000000000000..92fc20ab5f0e --- /dev/null +++ b/Documentation/x86_64/00-INDEX | |||
@@ -0,0 +1,16 @@ | |||
1 | 00-INDEX | ||
2 | - This file | ||
3 | boot-options.txt | ||
4 | - AMD64-specific boot options. | ||
5 | cpu-hotplug-spec | ||
6 | - Firmware support for CPU hotplug under Linux/x86-64 | ||
7 | fake-numa-for-cpusets | ||
8 | - Using numa=fake and CPUSets for Resource Management | ||
9 | kernel-stacks | ||
10 | - Context-specific per-processor interrupt stacks. | ||
11 | machinecheck | ||
12 | - Configurable sysfs parameters for the x86-64 machine check code. | ||
13 | mm.txt | ||
14 | - Memory layout of x86-64 (4 level page tables, 46 bits physical). | ||
15 | uefi.txt | ||
16 | - Booting Linux via Unified Extensible Firmware Interface. | ||