aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/00-INDEX3
-rw-r--r--Documentation/BUG-HUNTING17
-rw-r--r--Documentation/DocBook/kernel-api.tmpl2
-rw-r--r--Documentation/DocBook/kernel-locking.tmpl32
-rw-r--r--Documentation/acpi/method-tracing.txt26
-rw-r--r--Documentation/fb/deferred_io.txt6
-rw-r--r--Documentation/feature-removal-schedule.txt7
-rw-r--r--Documentation/filesystems/proc.txt8
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/kprobes.txt81
-rw-r--r--Documentation/kref.txt20
-rw-r--r--Documentation/md.txt10
-rw-r--r--Documentation/power/swsusp.txt5
-rw-r--r--Documentation/rtc.txt42
-rw-r--r--Documentation/sysctl/fs.txt10
-rw-r--r--Documentation/thinkpad-acpi.txt116
-rw-r--r--Documentation/unaligned-memory-access.txt226
-rw-r--r--Documentation/w1/masters/00-INDEX2
-rw-r--r--Documentation/w1/masters/w1-gpio33
19 files changed, 575 insertions, 80 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 40ac7759c3bb..33f55917f23f 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -14,6 +14,7 @@ Following translations are available on the WWW:
14 - this file. 14 - this file.
15ABI/ 15ABI/
16 - info on kernel <-> userspace ABI and relative interface stability. 16 - info on kernel <-> userspace ABI and relative interface stability.
17
17BUG-HUNTING 18BUG-HUNTING
18 - brute force method of doing binary search of patches to find bug. 19 - brute force method of doing binary search of patches to find bug.
19Changes 20Changes
@@ -66,6 +67,8 @@ VGA-softcursor.txt
66 - how to change your VGA cursor from a blinking underscore. 67 - how to change your VGA cursor from a blinking underscore.
67accounting/ 68accounting/
68 - documentation on accounting and taskstats. 69 - documentation on accounting and taskstats.
70acpi/
71 - info on ACPI-specific hooks in the kernel.
69aoe/ 72aoe/
70 - description of AoE (ATA over Ethernet) along with config examples. 73 - description of AoE (ATA over Ethernet) along with config examples.
71applying-patches.txt 74applying-patches.txt
diff --git a/Documentation/BUG-HUNTING b/Documentation/BUG-HUNTING
index 6c816751b868..65022a87bf17 100644
--- a/Documentation/BUG-HUNTING
+++ b/Documentation/BUG-HUNTING
@@ -214,6 +214,23 @@ And recompile the kernel with CONFIG_DEBUG_INFO enabled:
214 gdb vmlinux 214 gdb vmlinux
215 (gdb) p vt_ioctl 215 (gdb) p vt_ioctl
216 (gdb) l *(0x<address of vt_ioctl> + 0xda8) 216 (gdb) l *(0x<address of vt_ioctl> + 0xda8)
217or, as one command
218 (gdb) l *(vt_ioctl + 0xda8)
219
220If you have a call trace, such as :-
221>Call Trace:
222> [<ffffffff8802c8e9>] :jbd:log_wait_commit+0xa3/0xf5
223> [<ffffffff810482d9>] autoremove_wake_function+0x0/0x2e
224> [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee
225> ...
226this shows the problem in the :jbd: module. You can load that module in gdb
227and list the relevant code.
228 gdb fs/jbd/jbd.ko
229 (gdb) p log_wait_commit
230 (gdb) l *(0x<address> + 0xa3)
231or
232 (gdb) l *(log_wait_commit + 0xa3)
233
217 234
218Another very useful option of the Kernel Hacking section in menuconfig is 235Another very useful option of the Kernel Hacking section in menuconfig is
219Debug memory allocations. This will help you see whether data has been 236Debug memory allocations. This will help you see whether data has been
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 77436d735013..059aaf20951a 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -165,6 +165,7 @@ X!Ilib/string.c
165!Emm/vmalloc.c 165!Emm/vmalloc.c
166!Imm/page_alloc.c 166!Imm/page_alloc.c
167!Emm/mempool.c 167!Emm/mempool.c
168!Emm/dmapool.c
168!Emm/page-writeback.c 169!Emm/page-writeback.c
169!Emm/truncate.c 170!Emm/truncate.c
170 </sect1> 171 </sect1>
@@ -371,7 +372,6 @@ X!Iinclude/linux/device.h
371!Edrivers/base/class.c 372!Edrivers/base/class.c
372!Edrivers/base/firmware_class.c 373!Edrivers/base/firmware_class.c
373!Edrivers/base/transport_class.c 374!Edrivers/base/transport_class.c
374!Edrivers/base/dmapool.c
375<!-- Cannot be included, because 375<!-- Cannot be included, because
376 attribute_container_add_class_device_adapter 376 attribute_container_add_class_device_adapter
377 and attribute_container_classdev_to_container 377 and attribute_container_classdev_to_container
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 01825ee7db64..2e9d6b41f034 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -717,7 +717,7 @@ used, and when it gets full, throws out the least used one.
717 <para> 717 <para>
718For our first example, we assume that all operations are in user 718For our first example, we assume that all operations are in user
719context (ie. from system calls), so we can sleep. This means we can 719context (ie. from system calls), so we can sleep. This means we can
720use a semaphore to protect the cache and all the objects within 720use a mutex to protect the cache and all the objects within
721it. Here's the code: 721it. Here's the code:
722 </para> 722 </para>
723 723
@@ -725,7 +725,7 @@ it. Here's the code:
725#include &lt;linux/list.h&gt; 725#include &lt;linux/list.h&gt;
726#include &lt;linux/slab.h&gt; 726#include &lt;linux/slab.h&gt;
727#include &lt;linux/string.h&gt; 727#include &lt;linux/string.h&gt;
728#include &lt;asm/semaphore.h&gt; 728#include &lt;linux/mutex.h&gt;
729#include &lt;asm/errno.h&gt; 729#include &lt;asm/errno.h&gt;
730 730
731struct object 731struct object
@@ -737,7 +737,7 @@ struct object
737}; 737};
738 738
739/* Protects the cache, cache_num, and the objects within it */ 739/* Protects the cache, cache_num, and the objects within it */
740static DECLARE_MUTEX(cache_lock); 740static DEFINE_MUTEX(cache_lock);
741static LIST_HEAD(cache); 741static LIST_HEAD(cache);
742static unsigned int cache_num = 0; 742static unsigned int cache_num = 0;
743#define MAX_CACHE_SIZE 10 743#define MAX_CACHE_SIZE 10
@@ -789,17 +789,17 @@ int cache_add(int id, const char *name)
789 obj-&gt;id = id; 789 obj-&gt;id = id;
790 obj-&gt;popularity = 0; 790 obj-&gt;popularity = 0;
791 791
792 down(&amp;cache_lock); 792 mutex_lock(&amp;cache_lock);
793 __cache_add(obj); 793 __cache_add(obj);
794 up(&amp;cache_lock); 794 mutex_unlock(&amp;cache_lock);
795 return 0; 795 return 0;
796} 796}
797 797
798void cache_delete(int id) 798void cache_delete(int id)
799{ 799{
800 down(&amp;cache_lock); 800 mutex_lock(&amp;cache_lock);
801 __cache_delete(__cache_find(id)); 801 __cache_delete(__cache_find(id));
802 up(&amp;cache_lock); 802 mutex_unlock(&amp;cache_lock);
803} 803}
804 804
805int cache_find(int id, char *name) 805int cache_find(int id, char *name)
@@ -807,13 +807,13 @@ int cache_find(int id, char *name)
807 struct object *obj; 807 struct object *obj;
808 int ret = -ENOENT; 808 int ret = -ENOENT;
809 809
810 down(&amp;cache_lock); 810 mutex_lock(&amp;cache_lock);
811 obj = __cache_find(id); 811 obj = __cache_find(id);
812 if (obj) { 812 if (obj) {
813 ret = 0; 813 ret = 0;
814 strcpy(name, obj-&gt;name); 814 strcpy(name, obj-&gt;name);
815 } 815 }
816 up(&amp;cache_lock); 816 mutex_unlock(&amp;cache_lock);
817 return ret; 817 return ret;
818} 818}
819</programlisting> 819</programlisting>
@@ -853,7 +853,7 @@ The change is shown below, in standard patch format: the
853 int popularity; 853 int popularity;
854 }; 854 };
855 855
856-static DECLARE_MUTEX(cache_lock); 856-static DEFINE_MUTEX(cache_lock);
857+static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; 857+static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED;
858 static LIST_HEAD(cache); 858 static LIST_HEAD(cache);
859 static unsigned int cache_num = 0; 859 static unsigned int cache_num = 0;
@@ -870,22 +870,22 @@ The change is shown below, in standard patch format: the
870 obj-&gt;id = id; 870 obj-&gt;id = id;
871 obj-&gt;popularity = 0; 871 obj-&gt;popularity = 0;
872 872
873- down(&amp;cache_lock); 873- mutex_lock(&amp;cache_lock);
874+ spin_lock_irqsave(&amp;cache_lock, flags); 874+ spin_lock_irqsave(&amp;cache_lock, flags);
875 __cache_add(obj); 875 __cache_add(obj);
876- up(&amp;cache_lock); 876- mutex_unlock(&amp;cache_lock);
877+ spin_unlock_irqrestore(&amp;cache_lock, flags); 877+ spin_unlock_irqrestore(&amp;cache_lock, flags);
878 return 0; 878 return 0;
879 } 879 }
880 880
881 void cache_delete(int id) 881 void cache_delete(int id)
882 { 882 {
883- down(&amp;cache_lock); 883- mutex_lock(&amp;cache_lock);
884+ unsigned long flags; 884+ unsigned long flags;
885+ 885+
886+ spin_lock_irqsave(&amp;cache_lock, flags); 886+ spin_lock_irqsave(&amp;cache_lock, flags);
887 __cache_delete(__cache_find(id)); 887 __cache_delete(__cache_find(id));
888- up(&amp;cache_lock); 888- mutex_unlock(&amp;cache_lock);
889+ spin_unlock_irqrestore(&amp;cache_lock, flags); 889+ spin_unlock_irqrestore(&amp;cache_lock, flags);
890 } 890 }
891 891
@@ -895,14 +895,14 @@ The change is shown below, in standard patch format: the
895 int ret = -ENOENT; 895 int ret = -ENOENT;
896+ unsigned long flags; 896+ unsigned long flags;
897 897
898- down(&amp;cache_lock); 898- mutex_lock(&amp;cache_lock);
899+ spin_lock_irqsave(&amp;cache_lock, flags); 899+ spin_lock_irqsave(&amp;cache_lock, flags);
900 obj = __cache_find(id); 900 obj = __cache_find(id);
901 if (obj) { 901 if (obj) {
902 ret = 0; 902 ret = 0;
903 strcpy(name, obj-&gt;name); 903 strcpy(name, obj-&gt;name);
904 } 904 }
905- up(&amp;cache_lock); 905- mutex_unlock(&amp;cache_lock);
906+ spin_unlock_irqrestore(&amp;cache_lock, flags); 906+ spin_unlock_irqrestore(&amp;cache_lock, flags);
907 return ret; 907 return ret;
908 } 908 }
diff --git a/Documentation/acpi/method-tracing.txt b/Documentation/acpi/method-tracing.txt
new file mode 100644
index 000000000000..f6efb1ea559a
--- /dev/null
+++ b/Documentation/acpi/method-tracing.txt
@@ -0,0 +1,26 @@
1/sys/module/acpi/parameters/:
2
3trace_method_name
4 The AML method name that the user wants to trace
5
6trace_debug_layer
7 The temporary debug_layer used when tracing the method.
8 Using 0xffffffff by default if it is 0.
9
10trace_debug_level
11 The temporary debug_level used when tracing the method.
12 Using 0x00ffffff by default if it is 0.
13
14trace_state
15 The status of the tracing feature.
16
17 "enabled" means this feature is enabled
18 and the AML method is traced every time it's executed.
19
20 "1" means this feature is enabled and the AML method
21 will only be traced during the next execution.
22
23 "disabled" means this feature is disabled.
24 Users can enable/disable this debug tracing feature by
25 "echo string > /sys/module/acpi/parameters/trace_state".
26 "string" should be one of "enable", "disable" and "1".
diff --git a/Documentation/fb/deferred_io.txt b/Documentation/fb/deferred_io.txt
index 63883a892120..748328370250 100644
--- a/Documentation/fb/deferred_io.txt
+++ b/Documentation/fb/deferred_io.txt
@@ -7,10 +7,10 @@ IO. The following example may be a useful explanation of how one such setup
7works: 7works:
8 8
9- userspace app like Xfbdev mmaps framebuffer 9- userspace app like Xfbdev mmaps framebuffer
10- deferred IO and driver sets up nopage and page_mkwrite handlers 10- deferred IO and driver sets up fault and page_mkwrite handlers
11- userspace app tries to write to mmaped vaddress 11- userspace app tries to write to mmaped vaddress
12- we get pagefault and reach nopage handler 12- we get pagefault and reach fault handler
13- nopage handler finds and returns physical page 13- fault handler finds and returns physical page
14- we get page_mkwrite where we add this page to a list 14- we get page_mkwrite where we add this page to a list
15- schedule a workqueue task to be run after a delay 15- schedule a workqueue task to be run after a delay
16- app continues writing to that page with no additional cost. this is 16- app continues writing to that page with no additional cost. this is
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index a7d9d179131a..68ce1300a360 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -208,13 +208,6 @@ Who: Randy Dunlap <randy.dunlap@oracle.com>
208 208
209--------------------------- 209---------------------------
210 210
211What: drivers depending on OSS_OBSOLETE
212When: options in 2.6.23, code in 2.6.25
213Why: obsolete OSS drivers
214Who: Adrian Bunk <bunk@stusta.de>
215
216---------------------------
217
218What: libata spindown skipping and warning 211What: libata spindown skipping and warning
219When: Dec 2008 212When: Dec 2008
220Why: Some halt(8) implementations synchronize caches for and spin 213Why: Some halt(8) implementations synchronize caches for and spin
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index e2799b5fafea..5681e2fa1496 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1029,6 +1029,14 @@ nr_inodes
1029Denotes the number of inodes the system has allocated. This number will 1029Denotes the number of inodes the system has allocated. This number will
1030grow and shrink dynamically. 1030grow and shrink dynamically.
1031 1031
1032nr_open
1033-------
1034
1035Denotes the maximum number of file-handles a process can
1036allocate. Default value is 1024*1024 (1048576) which should be
1037enough for most machines. Actual limit depends on RLIMIT_NOFILE
1038resource limit.
1039
1032nr_free_inodes 1040nr_free_inodes
1033-------------- 1041--------------
1034 1042
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9ad4e6fc56fd..8ea41b6e6a85 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -147,8 +147,10 @@ and is between 256 and 4096 characters. It is defined in the file
147 default: 0 147 default: 0
148 148
149 acpi_sleep= [HW,ACPI] Sleep options 149 acpi_sleep= [HW,ACPI] Sleep options
150 Format: { s3_bios, s3_mode } 150 Format: { s3_bios, s3_mode, s3_beep }
151 See Documentation/power/video.txt 151 See Documentation/power/video.txt for s3_bios and s3_mode.
152 s3_beep is for debugging; it makes the PC's speaker beep
153 as soon as the kernel's real-mode entry point is called.
152 154
153 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode 155 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
154 Format: { level | edge | high | low } 156 Format: { level | edge | high | low }
@@ -780,6 +782,9 @@ and is between 256 and 4096 characters. It is defined in the file
780 loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same 782 loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
781 as idle=poll. 783 as idle=poll.
782 784
785 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
786 Claim all unknown PCI IDE storage controllers.
787
783 ignore_loglevel [KNL] 788 ignore_loglevel [KNL]
784 Ignore loglevel setting - this will print /all/ 789 Ignore loglevel setting - this will print /all/
785 kernel messages to the console. Useful for debugging. 790 kernel messages to the console. Useful for debugging.
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 53a63890aea4..30c101761d0d 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -96,7 +96,9 @@ or in registers (e.g., for x86_64 or for an i386 fastcall function).
96The jprobe will work in either case, so long as the handler's 96The jprobe will work in either case, so long as the handler's
97prototype matches that of the probed function. 97prototype matches that of the probed function.
98 98
991.3 How Does a Return Probe Work? 991.3 Return Probes
100
1011.3.1 How Does a Return Probe Work?
100 102
101When you call register_kretprobe(), Kprobes establishes a kprobe at 103When you call register_kretprobe(), Kprobes establishes a kprobe at
102the entry to the function. When the probed function is called and this 104the entry to the function. When the probed function is called and this
@@ -107,9 +109,9 @@ At boot time, Kprobes registers a kprobe at the trampoline.
107 109
108When the probed function executes its return instruction, control 110When the probed function executes its return instruction, control
109passes to the trampoline and that probe is hit. Kprobes' trampoline 111passes to the trampoline and that probe is hit. Kprobes' trampoline
110handler calls the user-specified handler associated with the kretprobe, 112handler calls the user-specified return handler associated with the
111then sets the saved instruction pointer to the saved return address, 113kretprobe, then sets the saved instruction pointer to the saved return
112and that's where execution resumes upon return from the trap. 114address, and that's where execution resumes upon return from the trap.
113 115
114While the probed function is executing, its return address is 116While the probed function is executing, its return address is
115stored in an object of type kretprobe_instance. Before calling 117stored in an object of type kretprobe_instance. Before calling
@@ -131,6 +133,30 @@ zero when the return probe is registered, and is incremented every
131time the probed function is entered but there is no kretprobe_instance 133time the probed function is entered but there is no kretprobe_instance
132object available for establishing the return probe. 134object available for establishing the return probe.
133 135
1361.3.2 Kretprobe entry-handler
137
138Kretprobes also provides an optional user-specified handler which runs
139on function entry. This handler is specified by setting the entry_handler
140field of the kretprobe struct. Whenever the kprobe placed by kretprobe at the
141function entry is hit, the user-defined entry_handler, if any, is invoked.
142If the entry_handler returns 0 (success) then a corresponding return handler
143is guaranteed to be called upon function return. If the entry_handler
144returns a non-zero error then Kprobes leaves the return address as is, and
145the kretprobe has no further effect for that particular function instance.
146
147Multiple entry and return handler invocations are matched using the unique
148kretprobe_instance object associated with them. Additionally, a user
149may also specify per return-instance private data to be part of each
150kretprobe_instance object. This is especially useful when sharing private
151data between corresponding user entry and return handlers. The size of each
152private data object can be specified at kretprobe registration time by
153setting the data_size field of the kretprobe struct. This data can be
154accessed through the data field of each kretprobe_instance object.
155
156In case probed function is entered but there is no kretprobe_instance
157object available, then in addition to incrementing the nmissed count,
158the user entry_handler invocation is also skipped.
159
1342. Architectures Supported 1602. Architectures Supported
135 161
136Kprobes, jprobes, and return probes are implemented on the following 162Kprobes, jprobes, and return probes are implemented on the following
@@ -274,6 +300,8 @@ of interest:
274- ret_addr: the return address 300- ret_addr: the return address
275- rp: points to the corresponding kretprobe object 301- rp: points to the corresponding kretprobe object
276- task: points to the corresponding task struct 302- task: points to the corresponding task struct
303- data: points to per return-instance private data; see "Kretprobe
304 entry-handler" for details.
277 305
278The regs_return_value(regs) macro provides a simple abstraction to 306The regs_return_value(regs) macro provides a simple abstraction to
279extract the return value from the appropriate register as defined by 307extract the return value from the appropriate register as defined by
@@ -556,23 +584,52 @@ report failed calls to sys_open().
556#include <linux/kernel.h> 584#include <linux/kernel.h>
557#include <linux/module.h> 585#include <linux/module.h>
558#include <linux/kprobes.h> 586#include <linux/kprobes.h>
587#include <linux/ktime.h>
588
589/* per-instance private data */
590struct my_data {
591 ktime_t entry_stamp;
592};
559 593
560static const char *probed_func = "sys_open"; 594static const char *probed_func = "sys_open";
561 595
562/* Return-probe handler: If the probed function fails, log the return value. */ 596/* Timestamp function entry. */
563static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) 597static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
598{
599 struct my_data *data;
600
601 if(!current->mm)
602 return 1; /* skip kernel threads */
603
604 data = (struct my_data *)ri->data;
605 data->entry_stamp = ktime_get();
606 return 0;
607}
608
609/* If the probed function failed, log the return value and duration.
610 * Duration may turn out to be zero consistently, depending upon the
611 * granularity of time accounting on the platform. */
612static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
564{ 613{
565 int retval = regs_return_value(regs); 614 int retval = regs_return_value(regs);
615 struct my_data *data = (struct my_data *)ri->data;
616 s64 delta;
617 ktime_t now;
618
566 if (retval < 0) { 619 if (retval < 0) {
567 printk("%s returns %d\n", probed_func, retval); 620 now = ktime_get();
621 delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
622 printk("%s: return val = %d (duration = %lld ns)\n",
623 probed_func, retval, delta);
568 } 624 }
569 return 0; 625 return 0;
570} 626}
571 627
572static struct kretprobe my_kretprobe = { 628static struct kretprobe my_kretprobe = {
573 .handler = ret_handler, 629 .handler = return_handler,
574 /* Probe up to 20 instances concurrently. */ 630 .entry_handler = entry_handler,
575 .maxactive = 20 631 .data_size = sizeof(struct my_data),
632 .maxactive = 20, /* probe up to 20 instances concurrently */
576}; 633};
577 634
578static int __init kretprobe_init(void) 635static int __init kretprobe_init(void)
@@ -584,7 +641,7 @@ static int __init kretprobe_init(void)
584 printk("register_kretprobe failed, returned %d\n", ret); 641 printk("register_kretprobe failed, returned %d\n", ret);
585 return -1; 642 return -1;
586 } 643 }
587 printk("Planted return probe at %p\n", my_kretprobe.kp.addr); 644 printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name);
588 return 0; 645 return 0;
589} 646}
590 647
@@ -594,7 +651,7 @@ static void __exit kretprobe_exit(void)
594 printk("kretprobe unregistered\n"); 651 printk("kretprobe unregistered\n");
595 /* nmissed > 0 suggests that maxactive was set too low. */ 652 /* nmissed > 0 suggests that maxactive was set too low. */
596 printk("Missed probing %d instances of %s\n", 653 printk("Missed probing %d instances of %s\n",
597 my_kretprobe.nmissed, probed_func); 654 my_kretprobe.nmissed, probed_func);
598} 655}
599 656
600module_init(kretprobe_init) 657module_init(kretprobe_init)
diff --git a/Documentation/kref.txt b/Documentation/kref.txt
index f38b59d00c63..130b6e87aa7e 100644
--- a/Documentation/kref.txt
+++ b/Documentation/kref.txt
@@ -141,10 +141,10 @@ The last rule (rule 3) is the nastiest one to handle. Say, for
141instance, you have a list of items that are each kref-ed, and you wish 141instance, you have a list of items that are each kref-ed, and you wish
142to get the first one. You can't just pull the first item off the list 142to get the first one. You can't just pull the first item off the list
143and kref_get() it. That violates rule 3 because you are not already 143and kref_get() it. That violates rule 3 because you are not already
144holding a valid pointer. You must add locks or semaphores. For 144holding a valid pointer. You must add a mutex (or some other lock).
145instance: 145For instance:
146 146
147static DECLARE_MUTEX(sem); 147static DEFINE_MUTEX(mutex);
148static LIST_HEAD(q); 148static LIST_HEAD(q);
149struct my_data 149struct my_data
150{ 150{
@@ -155,12 +155,12 @@ struct my_data
155static struct my_data *get_entry() 155static struct my_data *get_entry()
156{ 156{
157 struct my_data *entry = NULL; 157 struct my_data *entry = NULL;
158 down(&sem); 158 mutex_lock(&mutex);
159 if (!list_empty(&q)) { 159 if (!list_empty(&q)) {
160 entry = container_of(q.next, struct my_q_entry, link); 160 entry = container_of(q.next, struct my_q_entry, link);
161 kref_get(&entry->refcount); 161 kref_get(&entry->refcount);
162 } 162 }
163 up(&sem); 163 mutex_unlock(&mutex);
164 return entry; 164 return entry;
165} 165}
166 166
@@ -174,9 +174,9 @@ static void release_entry(struct kref *ref)
174 174
175static void put_entry(struct my_data *entry) 175static void put_entry(struct my_data *entry)
176{ 176{
177 down(&sem); 177 mutex_lock(&mutex);
178 kref_put(&entry->refcount, release_entry); 178 kref_put(&entry->refcount, release_entry);
179 up(&sem); 179 mutex_unlock(&mutex);
180} 180}
181 181
182The kref_put() return value is useful if you do not want to hold the 182The kref_put() return value is useful if you do not want to hold the
@@ -191,13 +191,13 @@ static void release_entry(struct kref *ref)
191 191
192static void put_entry(struct my_data *entry) 192static void put_entry(struct my_data *entry)
193{ 193{
194 down(&sem); 194 mutex_lock(&mutex);
195 if (kref_put(&entry->refcount, release_entry)) { 195 if (kref_put(&entry->refcount, release_entry)) {
196 list_del(&entry->link); 196 list_del(&entry->link);
197 up(&sem); 197 mutex_unlock(&mutex);
198 kfree(entry); 198 kfree(entry);
199 } else 199 } else
200 up(&sem); 200 mutex_unlock(&mutex);
201} 201}
202 202
203This is really more useful if you have to call other routines as part 203This is really more useful if you have to call other routines as part
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 5818628207b5..396cdd982c26 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -416,6 +416,16 @@ also have
416 sectors in total that could need to be processed. The two 416 sectors in total that could need to be processed. The two
417 numbers are separated by a '/' thus effectively showing one 417 numbers are separated by a '/' thus effectively showing one
418 value, a fraction of the process that is complete. 418 value, a fraction of the process that is complete.
419 A 'select' on this attribute will return when resync completes,
420 when it reaches the current sync_max (below) and possibly at
421 other times.
422
423 sync_max
424 This is a number of sectors at which point a resync/recovery
425 process will pause. When a resync is active, the value can
426 only ever be increased, never decreased. The value of 'max'
427 effectively disables the limit.
428
419 429
420 sync_speed 430 sync_speed
421 This shows the current actual speed, in K/sec, of the current 431 This shows the current actual speed, in K/sec, of the current
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index aea7e9209667..9d60ab717a7b 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -386,6 +386,11 @@ before suspending; then remount them after resuming.
386There is a work-around for this problem. For more information, see 386There is a work-around for this problem. For more information, see
387Documentation/usb/persist.txt. 387Documentation/usb/persist.txt.
388 388
389Q: Can I suspend-to-disk using a swap partition under LVM?
390
391A: No. You can suspend successfully, but you'll not be able to
392resume. uswsusp should be able to work with LVM. See suspend.sf.net.
393
389Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were 394Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
390compiled with the similar configuration files. Anyway I found that 395compiled with the similar configuration files. Anyway I found that
391suspend to disk (and resume) is much slower on 2.6.16 compared to 396suspend to disk (and resume) is much slower on 2.6.16 compared to
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index e20b19c1b60d..8deffcd68cb8 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -182,8 +182,8 @@ driver returns ENOIOCTLCMD. Some common examples:
182 since the frequency is stored in the irq_freq member of the rtc_device 182 since the frequency is stored in the irq_freq member of the rtc_device
183 structure. Your driver needs to initialize the irq_freq member during 183 structure. Your driver needs to initialize the irq_freq member during
184 init. Make sure you check the requested frequency is in range of your 184 init. Make sure you check the requested frequency is in range of your
185 hardware in the irq_set_freq function. If you cannot actually change 185 hardware in the irq_set_freq function. If it isn't, return -EINVAL. If
186 the frequency, just return -ENOTTY. 186 you cannot actually change the frequency, do not define irq_set_freq.
187 187
188If all else fails, check out the rtc-test.c driver! 188If all else fails, check out the rtc-test.c driver!
189 189
@@ -268,8 +268,8 @@ int main(int argc, char **argv)
268 /* This read will block */ 268 /* This read will block */
269 retval = read(fd, &data, sizeof(unsigned long)); 269 retval = read(fd, &data, sizeof(unsigned long));
270 if (retval == -1) { 270 if (retval == -1) {
271 perror("read"); 271 perror("read");
272 exit(errno); 272 exit(errno);
273 } 273 }
274 fprintf(stderr, " %d",i); 274 fprintf(stderr, " %d",i);
275 fflush(stderr); 275 fflush(stderr);
@@ -326,11 +326,11 @@ test_READ:
326 rtc_tm.tm_sec %= 60; 326 rtc_tm.tm_sec %= 60;
327 rtc_tm.tm_min++; 327 rtc_tm.tm_min++;
328 } 328 }
329 if (rtc_tm.tm_min == 60) { 329 if (rtc_tm.tm_min == 60) {
330 rtc_tm.tm_min = 0; 330 rtc_tm.tm_min = 0;
331 rtc_tm.tm_hour++; 331 rtc_tm.tm_hour++;
332 } 332 }
333 if (rtc_tm.tm_hour == 24) 333 if (rtc_tm.tm_hour == 24)
334 rtc_tm.tm_hour = 0; 334 rtc_tm.tm_hour = 0;
335 335
336 retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); 336 retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
@@ -407,8 +407,8 @@ test_PIE:
407 "\n...Periodic IRQ rate is fixed\n"); 407 "\n...Periodic IRQ rate is fixed\n");
408 goto done; 408 goto done;
409 } 409 }
410 perror("RTC_IRQP_SET ioctl"); 410 perror("RTC_IRQP_SET ioctl");
411 exit(errno); 411 exit(errno);
412 } 412 }
413 413
414 fprintf(stderr, "\n%ldHz:\t", tmp); 414 fprintf(stderr, "\n%ldHz:\t", tmp);
@@ -417,27 +417,27 @@ test_PIE:
417 /* Enable periodic interrupts */ 417 /* Enable periodic interrupts */
418 retval = ioctl(fd, RTC_PIE_ON, 0); 418 retval = ioctl(fd, RTC_PIE_ON, 0);
419 if (retval == -1) { 419 if (retval == -1) {
420 perror("RTC_PIE_ON ioctl"); 420 perror("RTC_PIE_ON ioctl");
421 exit(errno); 421 exit(errno);
422 } 422 }
423 423
424 for (i=1; i<21; i++) { 424 for (i=1; i<21; i++) {
425 /* This blocks */ 425 /* This blocks */
426 retval = read(fd, &data, sizeof(unsigned long)); 426 retval = read(fd, &data, sizeof(unsigned long));
427 if (retval == -1) { 427 if (retval == -1) {
428 perror("read"); 428 perror("read");
429 exit(errno); 429 exit(errno);
430 } 430 }
431 fprintf(stderr, " %d",i); 431 fprintf(stderr, " %d",i);
432 fflush(stderr); 432 fflush(stderr);
433 irqcount++; 433 irqcount++;
434 } 434 }
435 435
436 /* Disable periodic interrupts */ 436 /* Disable periodic interrupts */
437 retval = ioctl(fd, RTC_PIE_OFF, 0); 437 retval = ioctl(fd, RTC_PIE_OFF, 0);
438 if (retval == -1) { 438 if (retval == -1) {
439 perror("RTC_PIE_OFF ioctl"); 439 perror("RTC_PIE_OFF ioctl");
440 exit(errno); 440 exit(errno);
441 } 441 }
442 } 442 }
443 443
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index aa986a35e994..f99254327ae5 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
23- inode-max 23- inode-max
24- inode-nr 24- inode-nr
25- inode-state 25- inode-state
26- nr_open
26- overflowuid 27- overflowuid
27- overflowgid 28- overflowgid
28- suid_dumpable 29- suid_dumpable
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum.
91 92
92============================================================== 93==============================================================
93 94
95nr_open:
96
97This denotes the maximum number of file-handles a process can
98allocate. Default value is 1024*1024 (1048576) which should be
99enough for most machines. Actual limit depends on RLIMIT_NOFILE
100resource limit.
101
102==============================================================
103
94inode-max, inode-nr & inode-state: 104inode-max, inode-nr & inode-state:
95 105
96As with file handles, the kernel allocates the inode structures 106As with file handles, the kernel allocates the inode structures
diff --git a/Documentation/thinkpad-acpi.txt b/Documentation/thinkpad-acpi.txt
index 10c041ca13c7..6c2477754a2a 100644
--- a/Documentation/thinkpad-acpi.txt
+++ b/Documentation/thinkpad-acpi.txt
@@ -1,7 +1,7 @@
1 ThinkPad ACPI Extras Driver 1 ThinkPad ACPI Extras Driver
2 2
3 Version 0.17 3 Version 0.19
4 October 04th, 2007 4 January 06th, 2008
5 5
6 Borislav Deianov <borislav@users.sf.net> 6 Borislav Deianov <borislav@users.sf.net>
7 Henrique de Moraes Holschuh <hmh@hmh.eng.br> 7 Henrique de Moraes Holschuh <hmh@hmh.eng.br>
@@ -215,6 +215,11 @@ The following commands can be written to the /proc/acpi/ibm/hotkey file:
215 ... any other 8-hex-digit mask ... 215 ... any other 8-hex-digit mask ...
216 echo reset > /proc/acpi/ibm/hotkey -- restore the original mask 216 echo reset > /proc/acpi/ibm/hotkey -- restore the original mask
217 217
218The procfs interface does not support NVRAM polling control. So as to
219maintain maximum bug-to-bug compatibility, it does not report any masks,
220nor does it allow one to manipulate the hot key mask when the firmware
221does not support masks at all, even if NVRAM polling is in use.
222
218sysfs notes: 223sysfs notes:
219 224
220 hotkey_bios_enabled: 225 hotkey_bios_enabled:
@@ -231,17 +236,26 @@ sysfs notes:
231 to this value. 236 to this value.
232 237
233 hotkey_enable: 238 hotkey_enable:
234 Enables/disables the hot keys feature, and reports 239 Enables/disables the hot keys feature in the ACPI
235 current status of the hot keys feature. 240 firmware, and reports current status of the hot keys
241 feature. Has no effect on the NVRAM hot key polling
242 functionality.
236 243
237 0: disables the hot keys feature / feature disabled 244 0: disables the hot keys feature / feature disabled
238 1: enables the hot keys feature / feature enabled 245 1: enables the hot keys feature / feature enabled
239 246
240 hotkey_mask: 247 hotkey_mask:
241 bit mask to enable driver-handling and ACPI event 248 bit mask to enable driver-handling (and depending on
242 generation for each hot key (see above). Returns the 249 the firmware, ACPI event generation) for each hot key
243 current status of the hot keys mask, and allows one to 250 (see above). Returns the current status of the hot keys
244 modify it. 251 mask, and allows one to modify it.
252
253 Note: when NVRAM polling is active, the firmware mask
254 will be different from the value returned by
255 hotkey_mask. The driver will retain enabled bits for
256 hotkeys that are under NVRAM polling even if the
257 firmware refuses them, and will not set these bits on
258 the firmware hot key mask.
245 259
246 hotkey_all_mask: 260 hotkey_all_mask:
247 bit mask that should enable event reporting for all 261 bit mask that should enable event reporting for all
@@ -257,12 +271,48 @@ sysfs notes:
257 handled by the firmware anyway. Echo it to 271 handled by the firmware anyway. Echo it to
258 hotkey_mask above, to use. 272 hotkey_mask above, to use.
259 273
274 hotkey_source_mask:
275 bit mask that selects which hot keys will the driver
276 poll the NVRAM for. This is auto-detected by the driver
277 based on the capabilities reported by the ACPI firmware,
278 but it can be overridden at runtime.
279
280 Hot keys whose bits are set in both hotkey_source_mask
281 and also on hotkey_mask are polled for in NVRAM. Only a
282 few hot keys are available through CMOS NVRAM polling.
283
284 Warning: when in NVRAM mode, the volume up/down/mute
285 keys are synthesized according to changes in the mixer,
286 so you have to use volume up or volume down to unmute,
287 as per the ThinkPad volume mixer user interface. When
288 in ACPI event mode, volume up/down/mute are reported as
289 separate events, but this behaviour may be corrected in
290 future releases of this driver, in which case the
291 ThinkPad volume mixer user interface semanthics will be
292 enforced.
293
294 hotkey_poll_freq:
295 frequency in Hz for hot key polling. It must be between
296 0 and 25 Hz. Polling is only carried out when strictly
297 needed.
298
299 Setting hotkey_poll_freq to zero disables polling, and
300 will cause hot key presses that require NVRAM polling
301 to never be reported.
302
303 Setting hotkey_poll_freq too low will cause repeated
304 pressings of the same hot key to be misreported as a
305 single key press, or to not even be detected at all.
306 The recommended polling frequency is 10Hz.
307
260 hotkey_radio_sw: 308 hotkey_radio_sw:
261 if the ThinkPad has a hardware radio switch, this 309 if the ThinkPad has a hardware radio switch, this
262 attribute will read 0 if the switch is in the "radios 310 attribute will read 0 if the switch is in the "radios
263 disabled" postition, and 1 if the switch is in the 311 disabled" postition, and 1 if the switch is in the
264 "radios enabled" position. 312 "radios enabled" position.
265 313
314 This attribute has poll()/select() support.
315
266 hotkey_report_mode: 316 hotkey_report_mode:
267 Returns the state of the procfs ACPI event report mode 317 Returns the state of the procfs ACPI event report mode
268 filter for hot keys. If it is set to 1 (the default), 318 filter for hot keys. If it is set to 1 (the default),
@@ -277,6 +327,25 @@ sysfs notes:
277 May return -EPERM (write access locked out by module 327 May return -EPERM (write access locked out by module
278 parameter) or -EACCES (read-only). 328 parameter) or -EACCES (read-only).
279 329
330 wakeup_reason:
331 Set to 1 if the system is waking up because the user
332 requested a bay ejection. Set to 2 if the system is
333 waking up because the user requested the system to
334 undock. Set to zero for normal wake-ups or wake-ups
335 due to unknown reasons.
336
337 This attribute has poll()/select() support.
338
339 wakeup_hotunplug_complete:
340 Set to 1 if the system was waken up because of an
341 undock or bay ejection request, and that request
342 was sucessfully completed. At this point, it might
343 be useful to send the system back to sleep, at the
344 user's choice. Refer to HKEY events 0x4003 and
345 0x3003, below.
346
347 This attribute has poll()/select() support.
348
280input layer notes: 349input layer notes:
281 350
282A Hot key is mapped to a single input layer EV_KEY event, possibly 351A Hot key is mapped to a single input layer EV_KEY event, possibly
@@ -427,6 +496,23 @@ Non hot-key ACPI HKEY event map:
427The above events are not propagated by the driver, except for legacy 496The above events are not propagated by the driver, except for legacy
428compatibility purposes when hotkey_report_mode is set to 1. 497compatibility purposes when hotkey_report_mode is set to 1.
429 498
4990x2304 System is waking up from suspend to undock
5000x2305 System is waking up from suspend to eject bay
5010x2404 System is waking up from hibernation to undock
5020x2405 System is waking up from hibernation to eject bay
503
504The above events are never propagated by the driver.
505
5060x3003 Bay ejection (see 0x2x05) complete, can sleep again
5070x4003 Undocked (see 0x2x04), can sleep again
5080x5009 Tablet swivel: switched to tablet mode
5090x500A Tablet swivel: switched to normal mode
5100x500B Tablet pen insterted into its storage bay
5110x500C Tablet pen removed from its storage bay
5120x5010 Brightness level changed (newer Lenovo BIOSes)
513
514The above events are propagated by the driver.
515
430Compatibility notes: 516Compatibility notes:
431 517
432ibm-acpi and thinkpad-acpi 0.15 (mainline kernels before 2.6.23) never 518ibm-acpi and thinkpad-acpi 0.15 (mainline kernels before 2.6.23) never
@@ -1263,3 +1349,17 @@ Sysfs interface changelog:
1263 and the hwmon class for libsensors4 (lm-sensors 3) 1349 and the hwmon class for libsensors4 (lm-sensors 3)
1264 compatibility. Moved all hwmon attributes to this 1350 compatibility. Moved all hwmon attributes to this
1265 new platform device. 1351 new platform device.
1352
13530x020100: Marker for thinkpad-acpi with hot key NVRAM polling
1354 support. If you must, use it to know you should not
1355 start an userspace NVRAM poller (allows to detect when
1356 NVRAM is compiled out by the user because it is
1357 unneeded/undesired in the first place).
13580x020101: Marker for thinkpad-acpi with hot key NVRAM polling
1359 and proper hotkey_mask semanthics (version 8 of the
1360 NVRAM polling patch). Some development snapshots of
1361 0.18 had an earlier version that did strange things
1362 to hotkey_mask.
1363
13640x020200: Add poll()/select() support to the following attributes:
1365 hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
new file mode 100644
index 000000000000..6223eace3c09
--- /dev/null
+++ b/Documentation/unaligned-memory-access.txt
@@ -0,0 +1,226 @@
1UNALIGNED MEMORY ACCESSES
2=========================
3
4Linux runs on a wide variety of architectures which have varying behaviour
5when it comes to memory access. This document presents some details about
6unaligned accesses, why you need to write code that doesn't cause them,
7and how to write such code!
8
9
10The definition of an unaligned access
11=====================================
12
13Unaligned memory accesses occur when you try to read N bytes of data starting
14from an address that is not evenly divisible by N (i.e. addr % N != 0).
15For example, reading 4 bytes of data from address 0x10004 is fine, but
16reading 4 bytes of data from address 0x10005 would be an unaligned memory
17access.
18
19The above may seem a little vague, as memory access can happen in different
20ways. The context here is at the machine code level: certain instructions read
21or write a number of bytes to or from memory (e.g. movb, movw, movl in x86
22assembly). As will become clear, it is relatively easy to spot C statements
23which will compile to multiple-byte memory access instructions, namely when
24dealing with types such as u16, u32 and u64.
25
26
27Natural alignment
28=================
29
30The rule mentioned above forms what we refer to as natural alignment:
31When accessing N bytes of memory, the base memory address must be evenly
32divisible by N, i.e. addr % N == 0.
33
34When writing code, assume the target architecture has natural alignment
35requirements.
36
37In reality, only a few architectures require natural alignment on all sizes
38of memory access. However, we must consider ALL supported architectures;
39writing code that satisfies natural alignment requirements is the easiest way
40to achieve full portability.
41
42
43Why unaligned access is bad
44===========================
45
46The effects of performing an unaligned memory access vary from architecture
47to architecture. It would be easy to write a whole document on the differences
48here; a summary of the common scenarios is presented below:
49
50 - Some architectures are able to perform unaligned memory accesses
51 transparently, but there is usually a significant performance cost.
52 - Some architectures raise processor exceptions when unaligned accesses
53 happen. The exception handler is able to correct the unaligned access,
54 at significant cost to performance.
55 - Some architectures raise processor exceptions when unaligned accesses
56 happen, but the exceptions do not contain enough information for the
57 unaligned access to be corrected.
58 - Some architectures are not capable of unaligned memory access, but will
59 silently perform a different memory access to the one that was requested,
60 resulting a a subtle code bug that is hard to detect!
61
62It should be obvious from the above that if your code causes unaligned
63memory accesses to happen, your code will not work correctly on certain
64platforms and will cause performance problems on others.
65
66
67Code that does not cause unaligned access
68=========================================
69
70At first, the concepts above may seem a little hard to relate to actual
71coding practice. After all, you don't have a great deal of control over
72memory addresses of certain variables, etc.
73
74Fortunately things are not too complex, as in most cases, the compiler
75ensures that things will work for you. For example, take the following
76structure:
77
78 struct foo {
79 u16 field1;
80 u32 field2;
81 u8 field3;
82 };
83
84Let us assume that an instance of the above structure resides in memory
85starting at address 0x10000. With a basic level of understanding, it would
86not be unreasonable to expect that accessing field2 would cause an unaligned
87access. You'd be expecting field2 to be located at offset 2 bytes into the
88structure, i.e. address 0x10002, but that address is not evenly divisible
89by 4 (remember, we're reading a 4 byte value here).
90
91Fortunately, the compiler understands the alignment constraints, so in the
92above case it would insert 2 bytes of padding in between field1 and field2.
93Therefore, for standard structure types you can always rely on the compiler
94to pad structures so that accesses to fields are suitably aligned (assuming
95you do not cast the field to a type of different length).
96
97Similarly, you can also rely on the compiler to align variables and function
98parameters to a naturally aligned scheme, based on the size of the type of
99the variable.
100
101At this point, it should be clear that accessing a single byte (u8 or char)
102will never cause an unaligned access, because all memory addresses are evenly
103divisible by one.
104
105On a related topic, with the above considerations in mind you may observe
106that you could reorder the fields in the structure in order to place fields
107where padding would otherwise be inserted, and hence reduce the overall
108resident memory size of structure instances. The optimal layout of the
109above example is:
110
111 struct foo {
112 u32 field2;
113 u16 field1;
114 u8 field3;
115 };
116
117For a natural alignment scheme, the compiler would only have to add a single
118byte of padding at the end of the structure. This padding is added in order
119to satisfy alignment constraints for arrays of these structures.
120
121Another point worth mentioning is the use of __attribute__((packed)) on a
122structure type. This GCC-specific attribute tells the compiler never to
123insert any padding within structures, useful when you want to use a C struct
124to represent some data that comes in a fixed arrangement 'off the wire'.
125
126You might be inclined to believe that usage of this attribute can easily
127lead to unaligned accesses when accessing fields that do not satisfy
128architectural alignment requirements. However, again, the compiler is aware
129of the alignment constraints and will generate extra instructions to perform
130the memory access in a way that does not cause unaligned access. Of course,
131the extra instructions obviously cause a loss in performance compared to the
132non-packed case, so the packed attribute should only be used when avoiding
133structure padding is of importance.
134
135
136Code that causes unaligned access
137=================================
138
139With the above in mind, let's move onto a real life example of a function
140that can cause an unaligned memory access. The following function adapted
141from include/linux/etherdevice.h is an optimized routine to compare two
142ethernet MAC addresses for equality.
143
144unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2)
145{
146 const u16 *a = (const u16 *) addr1;
147 const u16 *b = (const u16 *) addr2;
148 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
149}
150
151In the above function, the reference to a[0] causes 2 bytes (16 bits) to
152be read from memory starting at address addr1. Think about what would happen
153if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned
154access.)
155
156Despite the potential unaligned access problems with the above function, it
157is included in the kernel anyway but is understood to only work on
15816-bit-aligned addresses. It is up to the caller to ensure this alignment or
159not use this function at all. This alignment-unsafe function is still useful
160as it is a decent optimization for the cases when you can ensure alignment,
161which is true almost all of the time in ethernet networking context.
162
163
164Here is another example of some code that could cause unaligned accesses:
165 void myfunc(u8 *data, u32 value)
166 {
167 [...]
168 *((u32 *) data) = cpu_to_le32(value);
169 [...]
170 }
171
172This code will cause unaligned accesses every time the data parameter points
173to an address that is not evenly divisible by 4.
174
175In summary, the 2 main scenarios where you may run into unaligned access
176problems involve:
177 1. Casting variables to types of different lengths
178 2. Pointer arithmetic followed by access to at least 2 bytes of data
179
180
181Avoiding unaligned accesses
182===========================
183
184The easiest way to avoid unaligned access is to use the get_unaligned() and
185put_unaligned() macros provided by the <asm/unaligned.h> header file.
186
187Going back to an earlier example of code that potentially causes unaligned
188access:
189
190 void myfunc(u8 *data, u32 value)
191 {
192 [...]
193 *((u32 *) data) = cpu_to_le32(value);
194 [...]
195 }
196
197To avoid the unaligned memory access, you would rewrite it as follows:
198
199 void myfunc(u8 *data, u32 value)
200 {
201 [...]
202 value = cpu_to_le32(value);
203 put_unaligned(value, (u32 *) data);
204 [...]
205 }
206
207The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
208memory and you wish to avoid unaligned access, its usage is as follows:
209
210 u32 value = get_unaligned((u32 *) data);
211
212These macros work work for memory accesses of any length (not just 32 bits as
213in the examples above). Be aware that when compared to standard access of
214aligned memory, using these macros to access unaligned memory can be costly in
215terms of performance.
216
217If use of such macros is not convenient, another option is to use memcpy(),
218where the source or destination (or both) are of type u8* or unsigned char*.
219Due to the byte-wise nature of this operation, unaligned accesses are avoided.
220
221--
222Author: Daniel Drake <dsd@gentoo.org>
223With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
224Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock,
225Uli Kunitz, Vadim Lobanov
226
diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX
index 752613c4cea2..7b0ceaaad7af 100644
--- a/Documentation/w1/masters/00-INDEX
+++ b/Documentation/w1/masters/00-INDEX
@@ -4,3 +4,5 @@ ds2482
4 - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses. 4 - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses.
5ds2490 5ds2490
6 - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges. 6 - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges.
7w1-gpio
8 - GPIO 1-wire bus master driver.
diff --git a/Documentation/w1/masters/w1-gpio b/Documentation/w1/masters/w1-gpio
new file mode 100644
index 000000000000..af5d3b4aa851
--- /dev/null
+++ b/Documentation/w1/masters/w1-gpio
@@ -0,0 +1,33 @@
1Kernel driver w1-gpio
2=====================
3
4Author: Ville Syrjala <syrjala@sci.fi>
5
6
7Description
8-----------
9
10GPIO 1-wire bus master driver. The driver uses the GPIO API to control the
11wire and the GPIO pin can be specified using platform data.
12
13
14Example (mach-at91)
15-------------------
16
17#include <linux/w1-gpio.h>
18
19static struct w1_gpio_platform_data foo_w1_gpio_pdata = {
20 .pin = AT91_PIN_PB20,
21 .is_open_drain = 1,
22};
23
24static struct platform_device foo_w1_device = {
25 .name = "w1-gpio",
26 .id = -1,
27 .dev.platform_data = &foo_w1_gpio_pdata,
28};
29
30...
31 at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1);
32 at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1);
33 platform_device_register(&foo_w1_device);