diff options
Diffstat (limited to 'Documentation/driver-api')
90 files changed, 15175 insertions, 111 deletions
diff --git a/Documentation/driver-api/backlight/lp855x-driver.rst b/Documentation/driver-api/backlight/lp855x-driver.rst new file mode 100644 index 000000000000..1e0b224fc397 --- /dev/null +++ b/Documentation/driver-api/backlight/lp855x-driver.rst | |||
@@ -0,0 +1,81 @@ | |||
1 | ==================== | ||
2 | Kernel driver lp855x | ||
3 | ==================== | ||
4 | |||
5 | Backlight driver for LP855x ICs | ||
6 | |||
7 | Supported chips: | ||
8 | |||
9 | Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and | ||
10 | LP8557 | ||
11 | |||
12 | Author: Milo(Woogyom) Kim <milo.kim@ti.com> | ||
13 | |||
14 | Description | ||
15 | ----------- | ||
16 | |||
17 | * Brightness control | ||
18 | |||
19 | Brightness can be controlled by the pwm input or the i2c command. | ||
20 | The lp855x driver supports both cases. | ||
21 | |||
22 | * Device attributes | ||
23 | |||
24 | 1) bl_ctl_mode | ||
25 | |||
26 | Backlight control mode. | ||
27 | |||
28 | Value: pwm based or register based | ||
29 | |||
30 | 2) chip_id | ||
31 | |||
32 | The lp855x chip id. | ||
33 | |||
34 | Value: lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557 | ||
35 | |||
36 | Platform data for lp855x | ||
37 | ------------------------ | ||
38 | |||
39 | For supporting platform specific data, the lp855x platform data can be used. | ||
40 | |||
41 | * name: | ||
42 | Backlight driver name. If it is not defined, default name is set. | ||
43 | * device_control: | ||
44 | Value of DEVICE CONTROL register. | ||
45 | * initial_brightness: | ||
46 | Initial value of backlight brightness. | ||
47 | * period_ns: | ||
48 | Platform specific PWM period value. unit is nano. | ||
49 | Only valid when brightness is pwm input mode. | ||
50 | * size_program: | ||
51 | Total size of lp855x_rom_data. | ||
52 | * rom_data: | ||
53 | List of new eeprom/eprom registers. | ||
54 | |||
55 | Examples | ||
56 | ======== | ||
57 | |||
58 | 1) lp8552 platform data: i2c register mode with new eeprom data:: | ||
59 | |||
60 | #define EEPROM_A5_ADDR 0xA5 | ||
61 | #define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */ | ||
62 | |||
63 | static struct lp855x_rom_data lp8552_eeprom_arr[] = { | ||
64 | {EEPROM_A5_ADDR, EEPROM_A5_VAL}, | ||
65 | }; | ||
66 | |||
67 | static struct lp855x_platform_data lp8552_pdata = { | ||
68 | .name = "lcd-bl", | ||
69 | .device_control = I2C_CONFIG(LP8552), | ||
70 | .initial_brightness = INITIAL_BRT, | ||
71 | .size_program = ARRAY_SIZE(lp8552_eeprom_arr), | ||
72 | .rom_data = lp8552_eeprom_arr, | ||
73 | }; | ||
74 | |||
75 | 2) lp8556 platform data: pwm input mode with default rom data:: | ||
76 | |||
77 | static struct lp855x_platform_data lp8556_pdata = { | ||
78 | .device_control = PWM_CONFIG(LP8556), | ||
79 | .initial_brightness = INITIAL_BRT, | ||
80 | .period_ns = 1000000, | ||
81 | }; | ||
diff --git a/Documentation/driver-api/bt8xxgpio.rst b/Documentation/driver-api/bt8xxgpio.rst new file mode 100644 index 000000000000..a845feb074de --- /dev/null +++ b/Documentation/driver-api/bt8xxgpio.rst | |||
@@ -0,0 +1,62 @@ | |||
1 | =================================================================== | ||
2 | A driver for a selfmade cheap BT8xx based PCI GPIO-card (bt8xxgpio) | ||
3 | =================================================================== | ||
4 | |||
5 | For advanced documentation, see http://www.bu3sch.de/btgpio.php | ||
6 | |||
7 | A generic digital 24-port PCI GPIO card can be built out of an ordinary | ||
8 | Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The | ||
9 | Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily | ||
10 | find them used for low prices on the net. | ||
11 | |||
12 | The bt8xx chip does have 24 digital GPIO ports. | ||
13 | These ports are accessible via 24 pins on the SMD chip package. | ||
14 | |||
15 | |||
16 | How to physically access the GPIO pins | ||
17 | ====================================== | ||
18 | |||
19 | The are several ways to access these pins. One might unsolder the whole chip | ||
20 | and put it on a custom PCI board, or one might only unsolder each individual | ||
21 | GPIO pin and solder that to some tiny wire. As the chip package really is tiny | ||
22 | there are some advanced soldering skills needed in any case. | ||
23 | |||
24 | The physical pinouts are drawn in the following ASCII art. | ||
25 | The GPIO pins are marked with G00-G23:: | ||
26 | |||
27 | G G G G G G G G G G G G G G G G G G | ||
28 | 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 | ||
29 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 | ||
30 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ||
31 | --------------------------------------------------------------------------- | ||
32 | --| ^ ^ |-- | ||
33 | --| pin 86 pin 67 |-- | ||
34 | --| |-- | ||
35 | --| pin 61 > |-- G18 | ||
36 | --| |-- G19 | ||
37 | --| |-- G20 | ||
38 | --| |-- G21 | ||
39 | --| |-- G22 | ||
40 | --| pin 56 > |-- G23 | ||
41 | --| |-- | ||
42 | --| Brooktree 878/879 |-- | ||
43 | --| |-- | ||
44 | --| |-- | ||
45 | --| |-- | ||
46 | --| |-- | ||
47 | --| |-- | ||
48 | --| |-- | ||
49 | --| |-- | ||
50 | --| |-- | ||
51 | --| |-- | ||
52 | --| |-- | ||
53 | --| |-- | ||
54 | --| |-- | ||
55 | --| |-- | ||
56 | --| O |-- | ||
57 | --| |-- | ||
58 | --------------------------------------------------------------------------- | ||
59 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ||
60 | ^ | ||
61 | This is pin 1 | ||
62 | |||
diff --git a/Documentation/driver-api/connector.rst b/Documentation/driver-api/connector.rst new file mode 100644 index 000000000000..c100c7482289 --- /dev/null +++ b/Documentation/driver-api/connector.rst | |||
@@ -0,0 +1,156 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ================ | ||
4 | Kernel Connector | ||
5 | ================ | ||
6 | |||
7 | Kernel connector - new netlink based userspace <-> kernel space easy | ||
8 | to use communication module. | ||
9 | |||
10 | The Connector driver makes it easy to connect various agents using a | ||
11 | netlink based network. One must register a callback and an identifier. | ||
12 | When the driver receives a special netlink message with the appropriate | ||
13 | identifier, the appropriate callback will be called. | ||
14 | |||
15 | From the userspace point of view it's quite straightforward: | ||
16 | |||
17 | - socket(); | ||
18 | - bind(); | ||
19 | - send(); | ||
20 | - recv(); | ||
21 | |||
22 | But if kernelspace wants to use the full power of such connections, the | ||
23 | driver writer must create special sockets, must know about struct sk_buff | ||
24 | handling, etc... The Connector driver allows any kernelspace agents to use | ||
25 | netlink based networking for inter-process communication in a significantly | ||
26 | easier way:: | ||
27 | |||
28 | int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *)); | ||
29 | void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask); | ||
30 | void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask); | ||
31 | |||
32 | struct cb_id | ||
33 | { | ||
34 | __u32 idx; | ||
35 | __u32 val; | ||
36 | }; | ||
37 | |||
38 | idx and val are unique identifiers which must be registered in the | ||
39 | connector.h header for in-kernel usage. `void (*callback) (void *)` is a | ||
40 | callback function which will be called when a message with above idx.val | ||
41 | is received by the connector core. The argument for that function must | ||
42 | be dereferenced to `struct cn_msg *`:: | ||
43 | |||
44 | struct cn_msg | ||
45 | { | ||
46 | struct cb_id id; | ||
47 | |||
48 | __u32 seq; | ||
49 | __u32 ack; | ||
50 | |||
51 | __u32 len; /* Length of the following data */ | ||
52 | __u8 data[0]; | ||
53 | }; | ||
54 | |||
55 | Connector interfaces | ||
56 | ==================== | ||
57 | |||
58 | .. kernel-doc:: include/linux/connector.h | ||
59 | |||
60 | Note: | ||
61 | When registering new callback user, connector core assigns | ||
62 | netlink group to the user which is equal to its id.idx. | ||
63 | |||
64 | Protocol description | ||
65 | ==================== | ||
66 | |||
67 | The current framework offers a transport layer with fixed headers. The | ||
68 | recommended protocol which uses such a header is as following: | ||
69 | |||
70 | msg->seq and msg->ack are used to determine message genealogy. When | ||
71 | someone sends a message, they use a locally unique sequence and random | ||
72 | acknowledge number. The sequence number may be copied into | ||
73 | nlmsghdr->nlmsg_seq too. | ||
74 | |||
75 | The sequence number is incremented with each message sent. | ||
76 | |||
77 | If you expect a reply to the message, then the sequence number in the | ||
78 | received message MUST be the same as in the original message, and the | ||
79 | acknowledge number MUST be the same + 1. | ||
80 | |||
81 | If we receive a message and its sequence number is not equal to one we | ||
82 | are expecting, then it is a new message. If we receive a message and | ||
83 | its sequence number is the same as one we are expecting, but its | ||
84 | acknowledge is not equal to the sequence number in the original | ||
85 | message + 1, then it is a new message. | ||
86 | |||
87 | Obviously, the protocol header contains the above id. | ||
88 | |||
89 | The connector allows event notification in the following form: kernel | ||
90 | driver or userspace process can ask connector to notify it when | ||
91 | selected ids will be turned on or off (registered or unregistered its | ||
92 | callback). It is done by sending a special command to the connector | ||
93 | driver (it also registers itself with id={-1, -1}). | ||
94 | |||
95 | As example of this usage can be found in the cn_test.c module which | ||
96 | uses the connector to request notification and to send messages. | ||
97 | |||
98 | Reliability | ||
99 | =========== | ||
100 | |||
101 | Netlink itself is not a reliable protocol. That means that messages can | ||
102 | be lost due to memory pressure or process' receiving queue overflowed, | ||
103 | so caller is warned that it must be prepared. That is why the struct | ||
104 | cn_msg [main connector's message header] contains u32 seq and u32 ack | ||
105 | fields. | ||
106 | |||
107 | Userspace usage | ||
108 | =============== | ||
109 | |||
110 | 2.6.14 has a new netlink socket implementation, which by default does not | ||
111 | allow people to send data to netlink groups other than 1. | ||
112 | So, if you wish to use a netlink socket (for example using connector) | ||
113 | with a different group number, the userspace application must subscribe to | ||
114 | that group first. It can be achieved by the following pseudocode:: | ||
115 | |||
116 | s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); | ||
117 | |||
118 | l_local.nl_family = AF_NETLINK; | ||
119 | l_local.nl_groups = 12345; | ||
120 | l_local.nl_pid = 0; | ||
121 | |||
122 | if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) { | ||
123 | perror("bind"); | ||
124 | close(s); | ||
125 | return -1; | ||
126 | } | ||
127 | |||
128 | { | ||
129 | int on = l_local.nl_groups; | ||
130 | setsockopt(s, 270, 1, &on, sizeof(on)); | ||
131 | } | ||
132 | |||
133 | Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket | ||
134 | option. To drop a multicast subscription, one should call the above socket | ||
135 | option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0. | ||
136 | |||
137 | 2.6.14 netlink code only allows to select a group which is less or equal to | ||
138 | the maximum group number, which is used at netlink_kernel_create() time. | ||
139 | In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use | ||
140 | group number 12345, you must increment CN_NETLINK_USERS to that number. | ||
141 | Additional 0xf numbers are allocated to be used by non-in-kernel users. | ||
142 | |||
143 | Due to this limitation, group 0xffffffff does not work now, so one can | ||
144 | not use add/remove connector's group notifications, but as far as I know, | ||
145 | only cn_test.c test module used it. | ||
146 | |||
147 | Some work in netlink area is still being done, so things can be changed in | ||
148 | 2.6.15 timeframe, if it will happen, documentation will be updated for that | ||
149 | kernel. | ||
150 | |||
151 | Code samples | ||
152 | ============ | ||
153 | |||
154 | Sample code for a connector test module and user space can be found | ||
155 | in samples/connector/. To build this code, enable CONFIG_CONNECTOR | ||
156 | and CONFIG_SAMPLES. | ||
diff --git a/Documentation/driver-api/console.rst b/Documentation/driver-api/console.rst new file mode 100644 index 000000000000..8394ad7747ac --- /dev/null +++ b/Documentation/driver-api/console.rst | |||
@@ -0,0 +1,152 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | =============== | ||
4 | Console Drivers | ||
5 | =============== | ||
6 | |||
7 | The Linux kernel has 2 general types of console drivers. The first type is | ||
8 | assigned by the kernel to all the virtual consoles during the boot process. | ||
9 | This type will be called 'system driver', and only one system driver is allowed | ||
10 | to exist. The system driver is persistent and it can never be unloaded, though | ||
11 | it may become inactive. | ||
12 | |||
13 | The second type has to be explicitly loaded and unloaded. This will be called | ||
14 | 'modular driver' by this document. Multiple modular drivers can coexist at | ||
15 | any time with each driver sharing the console with other drivers including | ||
16 | the system driver. However, modular drivers cannot take over the console | ||
17 | that is currently occupied by another modular driver. (Exception: Drivers that | ||
18 | call do_take_over_console() will succeed in the takeover regardless of the type | ||
19 | of driver occupying the consoles.) They can only take over the console that is | ||
20 | occupied by the system driver. In the same token, if the modular driver is | ||
21 | released by the console, the system driver will take over. | ||
22 | |||
23 | Modular drivers, from the programmer's point of view, have to call:: | ||
24 | |||
25 | do_take_over_console() - load and bind driver to console layer | ||
26 | give_up_console() - unload driver; it will only work if driver | ||
27 | is fully unbound | ||
28 | |||
29 | In newer kernels, the following are also available:: | ||
30 | |||
31 | do_register_con_driver() | ||
32 | do_unregister_con_driver() | ||
33 | |||
34 | If sysfs is enabled, the contents of /sys/class/vtconsole can be | ||
35 | examined. This shows the console backends currently registered by the | ||
36 | system which are named vtcon<n> where <n> is an integer from 0 to 15. | ||
37 | Thus:: | ||
38 | |||
39 | ls /sys/class/vtconsole | ||
40 | . .. vtcon0 vtcon1 | ||
41 | |||
42 | Each directory in /sys/class/vtconsole has 3 files:: | ||
43 | |||
44 | ls /sys/class/vtconsole/vtcon0 | ||
45 | . .. bind name uevent | ||
46 | |||
47 | What do these files signify? | ||
48 | |||
49 | 1. bind - this is a read/write file. It shows the status of the driver if | ||
50 | read, or acts to bind or unbind the driver to the virtual consoles | ||
51 | when written to. The possible values are: | ||
52 | |||
53 | 0 | ||
54 | - means the driver is not bound and if echo'ed, commands the driver | ||
55 | to unbind | ||
56 | |||
57 | 1 | ||
58 | - means the driver is bound and if echo'ed, commands the driver to | ||
59 | bind | ||
60 | |||
61 | 2. name - read-only file. Shows the name of the driver in this format:: | ||
62 | |||
63 | cat /sys/class/vtconsole/vtcon0/name | ||
64 | (S) VGA+ | ||
65 | |||
66 | '(S)' stands for a (S)ystem driver, i.e., it cannot be directly | ||
67 | commanded to bind or unbind | ||
68 | |||
69 | 'VGA+' is the name of the driver | ||
70 | |||
71 | cat /sys/class/vtconsole/vtcon1/name | ||
72 | (M) frame buffer device | ||
73 | |||
74 | In this case, '(M)' stands for a (M)odular driver, one that can be | ||
75 | directly commanded to bind or unbind. | ||
76 | |||
77 | 3. uevent - ignore this file | ||
78 | |||
79 | When unbinding, the modular driver is detached first, and then the system | ||
80 | driver takes over the consoles vacated by the driver. Binding, on the other | ||
81 | hand, will bind the driver to the consoles that are currently occupied by a | ||
82 | system driver. | ||
83 | |||
84 | NOTE1: | ||
85 | Binding and unbinding must be selected in Kconfig. It's under:: | ||
86 | |||
87 | Device Drivers -> | ||
88 | Character devices -> | ||
89 | Support for binding and unbinding console drivers | ||
90 | |||
91 | NOTE2: | ||
92 | If any of the virtual consoles are in KD_GRAPHICS mode, then binding or | ||
93 | unbinding will not succeed. An example of an application that sets the | ||
94 | console to KD_GRAPHICS is X. | ||
95 | |||
96 | How useful is this feature? This is very useful for console driver | ||
97 | developers. By unbinding the driver from the console layer, one can unload the | ||
98 | driver, make changes, recompile, reload and rebind the driver without any need | ||
99 | for rebooting the kernel. For regular users who may want to switch from | ||
100 | framebuffer console to VGA console and vice versa, this feature also makes | ||
101 | this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb | ||
102 | for more details.) | ||
103 | |||
104 | Notes for developers | ||
105 | ==================== | ||
106 | |||
107 | do_take_over_console() is now broken up into:: | ||
108 | |||
109 | do_register_con_driver() | ||
110 | do_bind_con_driver() - private function | ||
111 | |||
112 | give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must | ||
113 | be fully unbound for this call to succeed. con_is_bound() will check if the | ||
114 | driver is bound or not. | ||
115 | |||
116 | Guidelines for console driver writers | ||
117 | ===================================== | ||
118 | |||
119 | In order for binding to and unbinding from the console to properly work, | ||
120 | console drivers must follow these guidelines: | ||
121 | |||
122 | 1. All drivers, except system drivers, must call either do_register_con_driver() | ||
123 | or do_take_over_console(). do_register_con_driver() will just add the driver | ||
124 | to the console's internal list. It won't take over the | ||
125 | console. do_take_over_console(), as it name implies, will also take over (or | ||
126 | bind to) the console. | ||
127 | |||
128 | 2. All resources allocated during con->con_init() must be released in | ||
129 | con->con_deinit(). | ||
130 | |||
131 | 3. All resources allocated in con->con_startup() must be released when the | ||
132 | driver, which was previously bound, becomes unbound. The console layer | ||
133 | does not have a complementary call to con->con_startup() so it's up to the | ||
134 | driver to check when it's legal to release these resources. Calling | ||
135 | con_is_bound() in con->con_deinit() will help. If the call returned | ||
136 | false(), then it's safe to release the resources. This balance has to be | ||
137 | ensured because con->con_startup() can be called again when a request to | ||
138 | rebind the driver to the console arrives. | ||
139 | |||
140 | 4. Upon exit of the driver, ensure that the driver is totally unbound. If the | ||
141 | condition is satisfied, then the driver must call do_unregister_con_driver() | ||
142 | or give_up_console(). | ||
143 | |||
144 | 5. do_unregister_con_driver() can also be called on conditions which make it | ||
145 | impossible for the driver to service console requests. This can happen | ||
146 | with the framebuffer console that suddenly lost all of its drivers. | ||
147 | |||
148 | The current crop of console drivers should still work correctly, but binding | ||
149 | and unbinding them may cause problems. With minimal fixes, these drivers can | ||
150 | be made to work correctly. | ||
151 | |||
152 | Antonino Daplas <adaplas@pol.net> | ||
diff --git a/Documentation/driver-api/dcdbas.rst b/Documentation/driver-api/dcdbas.rst new file mode 100644 index 000000000000..309cc57a7c1c --- /dev/null +++ b/Documentation/driver-api/dcdbas.rst | |||
@@ -0,0 +1,99 @@ | |||
1 | =================================== | ||
2 | Dell Systems Management Base Driver | ||
3 | =================================== | ||
4 | |||
5 | Overview | ||
6 | ======== | ||
7 | |||
8 | The Dell Systems Management Base Driver provides a sysfs interface for | ||
9 | systems management software such as Dell OpenManage to perform system | ||
10 | management interrupts and host control actions (system power cycle or | ||
11 | power off after OS shutdown) on certain Dell systems. | ||
12 | |||
13 | Dell OpenManage requires this driver on the following Dell PowerEdge systems: | ||
14 | 300, 1300, 1400, 400SC, 500SC, 1500SC, 1550, 600SC, 1600SC, 650, 1655MC, | ||
15 | 700, and 750. Other Dell software such as the open source libsmbios project | ||
16 | is expected to make use of this driver, and it may include the use of this | ||
17 | driver on other Dell systems. | ||
18 | |||
19 | The Dell libsmbios project aims towards providing access to as much BIOS | ||
20 | information as possible. See http://linux.dell.com/libsmbios/main/ for | ||
21 | more information about the libsmbios project. | ||
22 | |||
23 | |||
24 | System Management Interrupt | ||
25 | =========================== | ||
26 | |||
27 | On some Dell systems, systems management software must access certain | ||
28 | management information via a system management interrupt (SMI). The SMI data | ||
29 | buffer must reside in 32-bit address space, and the physical address of the | ||
30 | buffer is required for the SMI. The driver maintains the memory required for | ||
31 | the SMI and provides a way for the application to generate the SMI. | ||
32 | The driver creates the following sysfs entries for systems management | ||
33 | software to perform these system management interrupts:: | ||
34 | |||
35 | /sys/devices/platform/dcdbas/smi_data | ||
36 | /sys/devices/platform/dcdbas/smi_data_buf_phys_addr | ||
37 | /sys/devices/platform/dcdbas/smi_data_buf_size | ||
38 | /sys/devices/platform/dcdbas/smi_request | ||
39 | |||
40 | Systems management software must perform the following steps to execute | ||
41 | a SMI using this driver: | ||
42 | |||
43 | 1) Lock smi_data. | ||
44 | 2) Write system management command to smi_data. | ||
45 | 3) Write "1" to smi_request to generate a calling interface SMI or | ||
46 | "2" to generate a raw SMI. | ||
47 | 4) Read system management command response from smi_data. | ||
48 | 5) Unlock smi_data. | ||
49 | |||
50 | |||
51 | Host Control Action | ||
52 | =================== | ||
53 | |||
54 | Dell OpenManage supports a host control feature that allows the administrator | ||
55 | to perform a power cycle or power off of the system after the OS has finished | ||
56 | shutting down. On some Dell systems, this host control feature requires that | ||
57 | a driver perform a SMI after the OS has finished shutting down. | ||
58 | |||
59 | The driver creates the following sysfs entries for systems management software | ||
60 | to schedule the driver to perform a power cycle or power off host control | ||
61 | action after the system has finished shutting down: | ||
62 | |||
63 | /sys/devices/platform/dcdbas/host_control_action | ||
64 | /sys/devices/platform/dcdbas/host_control_smi_type | ||
65 | /sys/devices/platform/dcdbas/host_control_on_shutdown | ||
66 | |||
67 | Dell OpenManage performs the following steps to execute a power cycle or | ||
68 | power off host control action using this driver: | ||
69 | |||
70 | 1) Write host control action to be performed to host_control_action. | ||
71 | 2) Write type of SMI that driver needs to perform to host_control_smi_type. | ||
72 | 3) Write "1" to host_control_on_shutdown to enable host control action. | ||
73 | 4) Initiate OS shutdown. | ||
74 | (Driver will perform host control SMI when it is notified that the OS | ||
75 | has finished shutting down.) | ||
76 | |||
77 | |||
78 | Host Control SMI Type | ||
79 | ===================== | ||
80 | |||
81 | The following table shows the value to write to host_control_smi_type to | ||
82 | perform a power cycle or power off host control action: | ||
83 | |||
84 | =================== ===================== | ||
85 | PowerEdge System Host Control SMI Type | ||
86 | =================== ===================== | ||
87 | 300 HC_SMITYPE_TYPE1 | ||
88 | 1300 HC_SMITYPE_TYPE1 | ||
89 | 1400 HC_SMITYPE_TYPE2 | ||
90 | 500SC HC_SMITYPE_TYPE2 | ||
91 | 1500SC HC_SMITYPE_TYPE2 | ||
92 | 1550 HC_SMITYPE_TYPE2 | ||
93 | 600SC HC_SMITYPE_TYPE2 | ||
94 | 1600SC HC_SMITYPE_TYPE2 | ||
95 | 650 HC_SMITYPE_TYPE2 | ||
96 | 1655MC HC_SMITYPE_TYPE2 | ||
97 | 700 HC_SMITYPE_TYPE3 | ||
98 | 750 HC_SMITYPE_TYPE3 | ||
99 | =================== ===================== | ||
diff --git a/Documentation/driver-api/dell_rbu.rst b/Documentation/driver-api/dell_rbu.rst new file mode 100644 index 000000000000..5d1ce7bcd04d --- /dev/null +++ b/Documentation/driver-api/dell_rbu.rst | |||
@@ -0,0 +1,128 @@ | |||
1 | ============================================================= | ||
2 | Usage of the new open sourced rbu (Remote BIOS Update) driver | ||
3 | ============================================================= | ||
4 | |||
5 | Purpose | ||
6 | ======= | ||
7 | |||
8 | Document demonstrating the use of the Dell Remote BIOS Update driver. | ||
9 | for updating BIOS images on Dell servers and desktops. | ||
10 | |||
11 | Scope | ||
12 | ===== | ||
13 | |||
14 | This document discusses the functionality of the rbu driver only. | ||
15 | It does not cover the support needed from applications to enable the BIOS to | ||
16 | update itself with the image downloaded in to the memory. | ||
17 | |||
18 | Overview | ||
19 | ======== | ||
20 | |||
21 | This driver works with Dell OpenManage or Dell Update Packages for updating | ||
22 | the BIOS on Dell servers (starting from servers sold since 1999), desktops | ||
23 | and notebooks (starting from those sold in 2005). | ||
24 | |||
25 | Please go to http://support.dell.com register and you can find info on | ||
26 | OpenManage and Dell Update packages (DUP). | ||
27 | |||
28 | Libsmbios can also be used to update BIOS on Dell systems go to | ||
29 | http://linux.dell.com/libsmbios/ for details. | ||
30 | |||
31 | Dell_RBU driver supports BIOS update using the monolithic image and packetized | ||
32 | image methods. In case of monolithic the driver allocates a contiguous chunk | ||
33 | of physical pages having the BIOS image. In case of packetized the app | ||
34 | using the driver breaks the image in to packets of fixed sizes and the driver | ||
35 | would place each packet in contiguous physical memory. The driver also | ||
36 | maintains a link list of packets for reading them back. | ||
37 | |||
38 | If the dell_rbu driver is unloaded all the allocated memory is freed. | ||
39 | |||
40 | The rbu driver needs to have an application (as mentioned above)which will | ||
41 | inform the BIOS to enable the update in the next system reboot. | ||
42 | |||
43 | The user should not unload the rbu driver after downloading the BIOS image | ||
44 | or updating. | ||
45 | |||
46 | The driver load creates the following directories under the /sys file system:: | ||
47 | |||
48 | /sys/class/firmware/dell_rbu/loading | ||
49 | /sys/class/firmware/dell_rbu/data | ||
50 | /sys/devices/platform/dell_rbu/image_type | ||
51 | /sys/devices/platform/dell_rbu/data | ||
52 | /sys/devices/platform/dell_rbu/packet_size | ||
53 | |||
54 | The driver supports two types of update mechanism; monolithic and packetized. | ||
55 | These update mechanism depends upon the BIOS currently running on the system. | ||
56 | Most of the Dell systems support a monolithic update where the BIOS image is | ||
57 | copied to a single contiguous block of physical memory. | ||
58 | |||
59 | In case of packet mechanism the single memory can be broken in smaller chunks | ||
60 | of contiguous memory and the BIOS image is scattered in these packets. | ||
61 | |||
62 | By default the driver uses monolithic memory for the update type. This can be | ||
63 | changed to packets during the driver load time by specifying the load | ||
64 | parameter image_type=packet. This can also be changed later as below:: | ||
65 | |||
66 | echo packet > /sys/devices/platform/dell_rbu/image_type | ||
67 | |||
68 | In packet update mode the packet size has to be given before any packets can | ||
69 | be downloaded. It is done as below:: | ||
70 | |||
71 | echo XXXX > /sys/devices/platform/dell_rbu/packet_size | ||
72 | |||
73 | In the packet update mechanism, the user needs to create a new file having | ||
74 | packets of data arranged back to back. It can be done as follows | ||
75 | The user creates packets header, gets the chunk of the BIOS image and | ||
76 | places it next to the packetheader; now, the packetheader + BIOS image chunk | ||
77 | added together should match the specified packet_size. This makes one | ||
78 | packet, the user needs to create more such packets out of the entire BIOS | ||
79 | image file and then arrange all these packets back to back in to one single | ||
80 | file. | ||
81 | |||
82 | This file is then copied to /sys/class/firmware/dell_rbu/data. | ||
83 | Once this file gets to the driver, the driver extracts packet_size data from | ||
84 | the file and spreads it across the physical memory in contiguous packet_sized | ||
85 | space. | ||
86 | |||
87 | This method makes sure that all the packets get to the driver in a single operation. | ||
88 | |||
89 | In monolithic update the user simply get the BIOS image (.hdr file) and copies | ||
90 | to the data file as is without any change to the BIOS image itself. | ||
91 | |||
92 | Do the steps below to download the BIOS image. | ||
93 | |||
94 | 1) echo 1 > /sys/class/firmware/dell_rbu/loading | ||
95 | 2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data | ||
96 | 3) echo 0 > /sys/class/firmware/dell_rbu/loading | ||
97 | |||
98 | The /sys/class/firmware/dell_rbu/ entries will remain till the following is | ||
99 | done. | ||
100 | |||
101 | :: | ||
102 | |||
103 | echo -1 > /sys/class/firmware/dell_rbu/loading | ||
104 | |||
105 | Until this step is completed the driver cannot be unloaded. | ||
106 | |||
107 | Also echoing either mono, packet or init in to image_type will free up the | ||
108 | memory allocated by the driver. | ||
109 | |||
110 | If a user by accident executes steps 1 and 3 above without executing step 2; | ||
111 | it will make the /sys/class/firmware/dell_rbu/ entries disappear. | ||
112 | |||
113 | The entries can be recreated by doing the following:: | ||
114 | |||
115 | echo init > /sys/devices/platform/dell_rbu/image_type | ||
116 | |||
117 | .. note:: echoing init in image_type does not change it original value. | ||
118 | |||
119 | Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to | ||
120 | read back the image downloaded. | ||
121 | |||
122 | .. note:: | ||
123 | |||
124 | After updating the BIOS image a user mode application needs to execute | ||
125 | code which sends the BIOS update request to the BIOS. So on the next reboot | ||
126 | the BIOS knows about the new image downloaded and it updates itself. | ||
127 | Also don't unload the rbu driver if the image has to be updated. | ||
128 | |||
diff --git a/Documentation/driver-api/driver-model/binding.rst b/Documentation/driver-api/driver-model/binding.rst new file mode 100644 index 000000000000..7ea1d7a41e1d --- /dev/null +++ b/Documentation/driver-api/driver-model/binding.rst | |||
@@ -0,0 +1,98 @@ | |||
1 | ============== | ||
2 | Driver Binding | ||
3 | ============== | ||
4 | |||
5 | Driver binding is the process of associating a device with a device | ||
6 | driver that can control it. Bus drivers have typically handled this | ||
7 | because there have been bus-specific structures to represent the | ||
8 | devices and the drivers. With generic device and device driver | ||
9 | structures, most of the binding can take place using common code. | ||
10 | |||
11 | |||
12 | Bus | ||
13 | ~~~ | ||
14 | |||
15 | The bus type structure contains a list of all devices that are on that bus | ||
16 | type in the system. When device_register is called for a device, it is | ||
17 | inserted into the end of this list. The bus object also contains a | ||
18 | list of all drivers of that bus type. When driver_register is called | ||
19 | for a driver, it is inserted at the end of this list. These are the | ||
20 | two events which trigger driver binding. | ||
21 | |||
22 | |||
23 | device_register | ||
24 | ~~~~~~~~~~~~~~~ | ||
25 | |||
26 | When a new device is added, the bus's list of drivers is iterated over | ||
27 | to find one that supports it. In order to determine that, the device | ||
28 | ID of the device must match one of the device IDs that the driver | ||
29 | supports. The format and semantics for comparing IDs is bus-specific. | ||
30 | Instead of trying to derive a complex state machine and matching | ||
31 | algorithm, it is up to the bus driver to provide a callback to compare | ||
32 | a device against the IDs of a driver. The bus returns 1 if a match was | ||
33 | found; 0 otherwise. | ||
34 | |||
35 | int match(struct device * dev, struct device_driver * drv); | ||
36 | |||
37 | If a match is found, the device's driver field is set to the driver | ||
38 | and the driver's probe callback is called. This gives the driver a | ||
39 | chance to verify that it really does support the hardware, and that | ||
40 | it's in a working state. | ||
41 | |||
42 | Device Class | ||
43 | ~~~~~~~~~~~~ | ||
44 | |||
45 | Upon the successful completion of probe, the device is registered with | ||
46 | the class to which it belongs. Device drivers belong to one and only one | ||
47 | class, and that is set in the driver's devclass field. | ||
48 | devclass_add_device is called to enumerate the device within the class | ||
49 | and actually register it with the class, which happens with the | ||
50 | class's register_dev callback. | ||
51 | |||
52 | |||
53 | Driver | ||
54 | ~~~~~~ | ||
55 | |||
56 | When a driver is attached to a device, the device is inserted into the | ||
57 | driver's list of devices. | ||
58 | |||
59 | |||
60 | sysfs | ||
61 | ~~~~~ | ||
62 | |||
63 | A symlink is created in the bus's 'devices' directory that points to | ||
64 | the device's directory in the physical hierarchy. | ||
65 | |||
66 | A symlink is created in the driver's 'devices' directory that points | ||
67 | to the device's directory in the physical hierarchy. | ||
68 | |||
69 | A directory for the device is created in the class's directory. A | ||
70 | symlink is created in that directory that points to the device's | ||
71 | physical location in the sysfs tree. | ||
72 | |||
73 | A symlink can be created (though this isn't done yet) in the device's | ||
74 | physical directory to either its class directory, or the class's | ||
75 | top-level directory. One can also be created to point to its driver's | ||
76 | directory also. | ||
77 | |||
78 | |||
79 | driver_register | ||
80 | ~~~~~~~~~~~~~~~ | ||
81 | |||
82 | The process is almost identical for when a new driver is added. | ||
83 | The bus's list of devices is iterated over to find a match. Devices | ||
84 | that already have a driver are skipped. All the devices are iterated | ||
85 | over, to bind as many devices as possible to the driver. | ||
86 | |||
87 | |||
88 | Removal | ||
89 | ~~~~~~~ | ||
90 | |||
91 | When a device is removed, the reference count for it will eventually | ||
92 | go to 0. When it does, the remove callback of the driver is called. It | ||
93 | is removed from the driver's list of devices and the reference count | ||
94 | of the driver is decremented. All symlinks between the two are removed. | ||
95 | |||
96 | When a driver is removed, the list of devices that it supports is | ||
97 | iterated over, and the driver's remove callback is called for each | ||
98 | one. The device is removed from that list and the symlinks removed. | ||
diff --git a/Documentation/driver-api/driver-model/bus.rst b/Documentation/driver-api/driver-model/bus.rst new file mode 100644 index 000000000000..016b15a6e8ea --- /dev/null +++ b/Documentation/driver-api/driver-model/bus.rst | |||
@@ -0,0 +1,146 @@ | |||
1 | ========= | ||
2 | Bus Types | ||
3 | ========= | ||
4 | |||
5 | Definition | ||
6 | ~~~~~~~~~~ | ||
7 | See the kerneldoc for the struct bus_type. | ||
8 | |||
9 | int bus_register(struct bus_type * bus); | ||
10 | |||
11 | |||
12 | Declaration | ||
13 | ~~~~~~~~~~~ | ||
14 | |||
15 | Each bus type in the kernel (PCI, USB, etc) should declare one static | ||
16 | object of this type. They must initialize the name field, and may | ||
17 | optionally initialize the match callback:: | ||
18 | |||
19 | struct bus_type pci_bus_type = { | ||
20 | .name = "pci", | ||
21 | .match = pci_bus_match, | ||
22 | }; | ||
23 | |||
24 | The structure should be exported to drivers in a header file: | ||
25 | |||
26 | extern struct bus_type pci_bus_type; | ||
27 | |||
28 | |||
29 | Registration | ||
30 | ~~~~~~~~~~~~ | ||
31 | |||
32 | When a bus driver is initialized, it calls bus_register. This | ||
33 | initializes the rest of the fields in the bus object and inserts it | ||
34 | into a global list of bus types. Once the bus object is registered, | ||
35 | the fields in it are usable by the bus driver. | ||
36 | |||
37 | |||
38 | Callbacks | ||
39 | ~~~~~~~~~ | ||
40 | |||
41 | match(): Attaching Drivers to Devices | ||
42 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
43 | |||
44 | The format of device ID structures and the semantics for comparing | ||
45 | them are inherently bus-specific. Drivers typically declare an array | ||
46 | of device IDs of devices they support that reside in a bus-specific | ||
47 | driver structure. | ||
48 | |||
49 | The purpose of the match callback is to give the bus an opportunity to | ||
50 | determine if a particular driver supports a particular device by | ||
51 | comparing the device IDs the driver supports with the device ID of a | ||
52 | particular device, without sacrificing bus-specific functionality or | ||
53 | type-safety. | ||
54 | |||
55 | When a driver is registered with the bus, the bus's list of devices is | ||
56 | iterated over, and the match callback is called for each device that | ||
57 | does not have a driver associated with it. | ||
58 | |||
59 | |||
60 | |||
61 | Device and Driver Lists | ||
62 | ~~~~~~~~~~~~~~~~~~~~~~~ | ||
63 | |||
64 | The lists of devices and drivers are intended to replace the local | ||
65 | lists that many buses keep. They are lists of struct devices and | ||
66 | struct device_drivers, respectively. Bus drivers are free to use the | ||
67 | lists as they please, but conversion to the bus-specific type may be | ||
68 | necessary. | ||
69 | |||
70 | The LDM core provides helper functions for iterating over each list:: | ||
71 | |||
72 | int bus_for_each_dev(struct bus_type * bus, struct device * start, | ||
73 | void * data, | ||
74 | int (*fn)(struct device *, void *)); | ||
75 | |||
76 | int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, | ||
77 | void * data, int (*fn)(struct device_driver *, void *)); | ||
78 | |||
79 | These helpers iterate over the respective list, and call the callback | ||
80 | for each device or driver in the list. All list accesses are | ||
81 | synchronized by taking the bus's lock (read currently). The reference | ||
82 | count on each object in the list is incremented before the callback is | ||
83 | called; it is decremented after the next object has been obtained. The | ||
84 | lock is not held when calling the callback. | ||
85 | |||
86 | |||
87 | sysfs | ||
88 | ~~~~~~~~ | ||
89 | There is a top-level directory named 'bus'. | ||
90 | |||
91 | Each bus gets a directory in the bus directory, along with two default | ||
92 | directories:: | ||
93 | |||
94 | /sys/bus/pci/ | ||
95 | |-- devices | ||
96 | `-- drivers | ||
97 | |||
98 | Drivers registered with the bus get a directory in the bus's drivers | ||
99 | directory:: | ||
100 | |||
101 | /sys/bus/pci/ | ||
102 | |-- devices | ||
103 | `-- drivers | ||
104 | |-- Intel ICH | ||
105 | |-- Intel ICH Joystick | ||
106 | |-- agpgart | ||
107 | `-- e100 | ||
108 | |||
109 | Each device that is discovered on a bus of that type gets a symlink in | ||
110 | the bus's devices directory to the device's directory in the physical | ||
111 | hierarchy:: | ||
112 | |||
113 | /sys/bus/pci/ | ||
114 | |-- devices | ||
115 | | |-- 00:00.0 -> ../../../root/pci0/00:00.0 | ||
116 | | |-- 00:01.0 -> ../../../root/pci0/00:01.0 | ||
117 | | `-- 00:02.0 -> ../../../root/pci0/00:02.0 | ||
118 | `-- drivers | ||
119 | |||
120 | |||
121 | Exporting Attributes | ||
122 | ~~~~~~~~~~~~~~~~~~~~ | ||
123 | |||
124 | :: | ||
125 | |||
126 | struct bus_attribute { | ||
127 | struct attribute attr; | ||
128 | ssize_t (*show)(struct bus_type *, char * buf); | ||
129 | ssize_t (*store)(struct bus_type *, const char * buf, size_t count); | ||
130 | }; | ||
131 | |||
132 | Bus drivers can export attributes using the BUS_ATTR_RW macro that works | ||
133 | similarly to the DEVICE_ATTR_RW macro for devices. For example, a | ||
134 | definition like this:: | ||
135 | |||
136 | static BUS_ATTR_RW(debug); | ||
137 | |||
138 | is equivalent to declaring:: | ||
139 | |||
140 | static bus_attribute bus_attr_debug; | ||
141 | |||
142 | This can then be used to add and remove the attribute from the bus's | ||
143 | sysfs directory using:: | ||
144 | |||
145 | int bus_create_file(struct bus_type *, struct bus_attribute *); | ||
146 | void bus_remove_file(struct bus_type *, struct bus_attribute *); | ||
diff --git a/Documentation/driver-api/driver-model/class.rst b/Documentation/driver-api/driver-model/class.rst new file mode 100644 index 000000000000..fff55b80e86a --- /dev/null +++ b/Documentation/driver-api/driver-model/class.rst | |||
@@ -0,0 +1,149 @@ | |||
1 | ============== | ||
2 | Device Classes | ||
3 | ============== | ||
4 | |||
5 | Introduction | ||
6 | ~~~~~~~~~~~~ | ||
7 | A device class describes a type of device, like an audio or network | ||
8 | device. The following device classes have been identified: | ||
9 | |||
10 | <Insert List of Device Classes Here> | ||
11 | |||
12 | |||
13 | Each device class defines a set of semantics and a programming interface | ||
14 | that devices of that class adhere to. Device drivers are the | ||
15 | implementation of that programming interface for a particular device on | ||
16 | a particular bus. | ||
17 | |||
18 | Device classes are agnostic with respect to what bus a device resides | ||
19 | on. | ||
20 | |||
21 | |||
22 | Programming Interface | ||
23 | ~~~~~~~~~~~~~~~~~~~~~ | ||
24 | The device class structure looks like:: | ||
25 | |||
26 | |||
27 | typedef int (*devclass_add)(struct device *); | ||
28 | typedef void (*devclass_remove)(struct device *); | ||
29 | |||
30 | See the kerneldoc for the struct class. | ||
31 | |||
32 | A typical device class definition would look like:: | ||
33 | |||
34 | struct device_class input_devclass = { | ||
35 | .name = "input", | ||
36 | .add_device = input_add_device, | ||
37 | .remove_device = input_remove_device, | ||
38 | }; | ||
39 | |||
40 | Each device class structure should be exported in a header file so it | ||
41 | can be used by drivers, extensions and interfaces. | ||
42 | |||
43 | Device classes are registered and unregistered with the core using:: | ||
44 | |||
45 | int devclass_register(struct device_class * cls); | ||
46 | void devclass_unregister(struct device_class * cls); | ||
47 | |||
48 | |||
49 | Devices | ||
50 | ~~~~~~~ | ||
51 | As devices are bound to drivers, they are added to the device class | ||
52 | that the driver belongs to. Before the driver model core, this would | ||
53 | typically happen during the driver's probe() callback, once the device | ||
54 | has been initialized. It now happens after the probe() callback | ||
55 | finishes from the core. | ||
56 | |||
57 | The device is enumerated in the class. Each time a device is added to | ||
58 | the class, the class's devnum field is incremented and assigned to the | ||
59 | device. The field is never decremented, so if the device is removed | ||
60 | from the class and re-added, it will receive a different enumerated | ||
61 | value. | ||
62 | |||
63 | The class is allowed to create a class-specific structure for the | ||
64 | device and store it in the device's class_data pointer. | ||
65 | |||
66 | There is no list of devices in the device class. Each driver has a | ||
67 | list of devices that it supports. The device class has a list of | ||
68 | drivers of that particular class. To access all of the devices in the | ||
69 | class, iterate over the device lists of each driver in the class. | ||
70 | |||
71 | |||
72 | Device Drivers | ||
73 | ~~~~~~~~~~~~~~ | ||
74 | Device drivers are added to device classes when they are registered | ||
75 | with the core. A driver specifies the class it belongs to by setting | ||
76 | the struct device_driver::devclass field. | ||
77 | |||
78 | |||
79 | sysfs directory structure | ||
80 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
81 | There is a top-level sysfs directory named 'class'. | ||
82 | |||
83 | Each class gets a directory in the class directory, along with two | ||
84 | default subdirectories:: | ||
85 | |||
86 | class/ | ||
87 | `-- input | ||
88 | |-- devices | ||
89 | `-- drivers | ||
90 | |||
91 | |||
92 | Drivers registered with the class get a symlink in the drivers/ directory | ||
93 | that points to the driver's directory (under its bus directory):: | ||
94 | |||
95 | class/ | ||
96 | `-- input | ||
97 | |-- devices | ||
98 | `-- drivers | ||
99 | `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/ | ||
100 | |||
101 | |||
102 | Each device gets a symlink in the devices/ directory that points to the | ||
103 | device's directory in the physical hierarchy:: | ||
104 | |||
105 | class/ | ||
106 | `-- input | ||
107 | |-- devices | ||
108 | | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ | ||
109 | `-- drivers | ||
110 | |||
111 | |||
112 | Exporting Attributes | ||
113 | ~~~~~~~~~~~~~~~~~~~~ | ||
114 | |||
115 | :: | ||
116 | |||
117 | struct devclass_attribute { | ||
118 | struct attribute attr; | ||
119 | ssize_t (*show)(struct device_class *, char * buf, size_t count, loff_t off); | ||
120 | ssize_t (*store)(struct device_class *, const char * buf, size_t count, loff_t off); | ||
121 | }; | ||
122 | |||
123 | Class drivers can export attributes using the DEVCLASS_ATTR macro that works | ||
124 | similarly to the DEVICE_ATTR macro for devices. For example, a definition | ||
125 | like this:: | ||
126 | |||
127 | static DEVCLASS_ATTR(debug,0644,show_debug,store_debug); | ||
128 | |||
129 | is equivalent to declaring:: | ||
130 | |||
131 | static devclass_attribute devclass_attr_debug; | ||
132 | |||
133 | The bus driver can add and remove the attribute from the class's | ||
134 | sysfs directory using:: | ||
135 | |||
136 | int devclass_create_file(struct device_class *, struct devclass_attribute *); | ||
137 | void devclass_remove_file(struct device_class *, struct devclass_attribute *); | ||
138 | |||
139 | In the example above, the file will be named 'debug' in placed in the | ||
140 | class's directory in sysfs. | ||
141 | |||
142 | |||
143 | Interfaces | ||
144 | ~~~~~~~~~~ | ||
145 | There may exist multiple mechanisms for accessing the same device of a | ||
146 | particular class type. Device interfaces describe these mechanisms. | ||
147 | |||
148 | When a device is added to a device class, the core attempts to add it | ||
149 | to every interface that is registered with the device class. | ||
diff --git a/Documentation/driver-api/driver-model/design-patterns.rst b/Documentation/driver-api/driver-model/design-patterns.rst new file mode 100644 index 000000000000..41eb8f41f7dd --- /dev/null +++ b/Documentation/driver-api/driver-model/design-patterns.rst | |||
@@ -0,0 +1,116 @@ | |||
1 | ============================= | ||
2 | Device Driver Design Patterns | ||
3 | ============================= | ||
4 | |||
5 | This document describes a few common design patterns found in device drivers. | ||
6 | It is likely that subsystem maintainers will ask driver developers to | ||
7 | conform to these design patterns. | ||
8 | |||
9 | 1. State Container | ||
10 | 2. container_of() | ||
11 | |||
12 | |||
13 | 1. State Container | ||
14 | ~~~~~~~~~~~~~~~~~~ | ||
15 | |||
16 | While the kernel contains a few device drivers that assume that they will | ||
17 | only be probed() once on a certain system (singletons), it is custom to assume | ||
18 | that the device the driver binds to will appear in several instances. This | ||
19 | means that the probe() function and all callbacks need to be reentrant. | ||
20 | |||
21 | The most common way to achieve this is to use the state container design | ||
22 | pattern. It usually has this form:: | ||
23 | |||
24 | struct foo { | ||
25 | spinlock_t lock; /* Example member */ | ||
26 | (...) | ||
27 | }; | ||
28 | |||
29 | static int foo_probe(...) | ||
30 | { | ||
31 | struct foo *foo; | ||
32 | |||
33 | foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL); | ||
34 | if (!foo) | ||
35 | return -ENOMEM; | ||
36 | spin_lock_init(&foo->lock); | ||
37 | (...) | ||
38 | } | ||
39 | |||
40 | This will create an instance of struct foo in memory every time probe() is | ||
41 | called. This is our state container for this instance of the device driver. | ||
42 | Of course it is then necessary to always pass this instance of the | ||
43 | state around to all functions that need access to the state and its members. | ||
44 | |||
45 | For example, if the driver is registering an interrupt handler, you would | ||
46 | pass around a pointer to struct foo like this:: | ||
47 | |||
48 | static irqreturn_t foo_handler(int irq, void *arg) | ||
49 | { | ||
50 | struct foo *foo = arg; | ||
51 | (...) | ||
52 | } | ||
53 | |||
54 | static int foo_probe(...) | ||
55 | { | ||
56 | struct foo *foo; | ||
57 | |||
58 | (...) | ||
59 | ret = request_irq(irq, foo_handler, 0, "foo", foo); | ||
60 | } | ||
61 | |||
62 | This way you always get a pointer back to the correct instance of foo in | ||
63 | your interrupt handler. | ||
64 | |||
65 | |||
66 | 2. container_of() | ||
67 | ~~~~~~~~~~~~~~~~~ | ||
68 | |||
69 | Continuing on the above example we add an offloaded work:: | ||
70 | |||
71 | struct foo { | ||
72 | spinlock_t lock; | ||
73 | struct workqueue_struct *wq; | ||
74 | struct work_struct offload; | ||
75 | (...) | ||
76 | }; | ||
77 | |||
78 | static void foo_work(struct work_struct *work) | ||
79 | { | ||
80 | struct foo *foo = container_of(work, struct foo, offload); | ||
81 | |||
82 | (...) | ||
83 | } | ||
84 | |||
85 | static irqreturn_t foo_handler(int irq, void *arg) | ||
86 | { | ||
87 | struct foo *foo = arg; | ||
88 | |||
89 | queue_work(foo->wq, &foo->offload); | ||
90 | (...) | ||
91 | } | ||
92 | |||
93 | static int foo_probe(...) | ||
94 | { | ||
95 | struct foo *foo; | ||
96 | |||
97 | foo->wq = create_singlethread_workqueue("foo-wq"); | ||
98 | INIT_WORK(&foo->offload, foo_work); | ||
99 | (...) | ||
100 | } | ||
101 | |||
102 | The design pattern is the same for an hrtimer or something similar that will | ||
103 | return a single argument which is a pointer to a struct member in the | ||
104 | callback. | ||
105 | |||
106 | container_of() is a macro defined in <linux/kernel.h> | ||
107 | |||
108 | What container_of() does is to obtain a pointer to the containing struct from | ||
109 | a pointer to a member by a simple subtraction using the offsetof() macro from | ||
110 | standard C, which allows something similar to object oriented behaviours. | ||
111 | Notice that the contained member must not be a pointer, but an actual member | ||
112 | for this to work. | ||
113 | |||
114 | We can see here that we avoid having global pointers to our struct foo * | ||
115 | instance this way, while still keeping the number of parameters passed to the | ||
116 | work function to a single pointer. | ||
diff --git a/Documentation/driver-api/driver-model/device.rst b/Documentation/driver-api/driver-model/device.rst new file mode 100644 index 000000000000..2b868d49d349 --- /dev/null +++ b/Documentation/driver-api/driver-model/device.rst | |||
@@ -0,0 +1,109 @@ | |||
1 | ========================== | ||
2 | The Basic Device Structure | ||
3 | ========================== | ||
4 | |||
5 | See the kerneldoc for the struct device. | ||
6 | |||
7 | |||
8 | Programming Interface | ||
9 | ~~~~~~~~~~~~~~~~~~~~~ | ||
10 | The bus driver that discovers the device uses this to register the | ||
11 | device with the core:: | ||
12 | |||
13 | int device_register(struct device * dev); | ||
14 | |||
15 | The bus should initialize the following fields: | ||
16 | |||
17 | - parent | ||
18 | - name | ||
19 | - bus_id | ||
20 | - bus | ||
21 | |||
22 | A device is removed from the core when its reference count goes to | ||
23 | 0. The reference count can be adjusted using:: | ||
24 | |||
25 | struct device * get_device(struct device * dev); | ||
26 | void put_device(struct device * dev); | ||
27 | |||
28 | get_device() will return a pointer to the struct device passed to it | ||
29 | if the reference is not already 0 (if it's in the process of being | ||
30 | removed already). | ||
31 | |||
32 | A driver can access the lock in the device structure using:: | ||
33 | |||
34 | void lock_device(struct device * dev); | ||
35 | void unlock_device(struct device * dev); | ||
36 | |||
37 | |||
38 | Attributes | ||
39 | ~~~~~~~~~~ | ||
40 | |||
41 | :: | ||
42 | |||
43 | struct device_attribute { | ||
44 | struct attribute attr; | ||
45 | ssize_t (*show)(struct device *dev, struct device_attribute *attr, | ||
46 | char *buf); | ||
47 | ssize_t (*store)(struct device *dev, struct device_attribute *attr, | ||
48 | const char *buf, size_t count); | ||
49 | }; | ||
50 | |||
51 | Attributes of devices can be exported by a device driver through sysfs. | ||
52 | |||
53 | Please see Documentation/filesystems/sysfs.txt for more information | ||
54 | on how sysfs works. | ||
55 | |||
56 | As explained in Documentation/kobject.txt, device attributes must be | ||
57 | created before the KOBJ_ADD uevent is generated. The only way to realize | ||
58 | that is by defining an attribute group. | ||
59 | |||
60 | Attributes are declared using a macro called DEVICE_ATTR:: | ||
61 | |||
62 | #define DEVICE_ATTR(name,mode,show,store) | ||
63 | |||
64 | Example::: | ||
65 | |||
66 | static DEVICE_ATTR(type, 0444, show_type, NULL); | ||
67 | static DEVICE_ATTR(power, 0644, show_power, store_power); | ||
68 | |||
69 | This declares two structures of type struct device_attribute with respective | ||
70 | names 'dev_attr_type' and 'dev_attr_power'. These two attributes can be | ||
71 | organized as follows into a group:: | ||
72 | |||
73 | static struct attribute *dev_attrs[] = { | ||
74 | &dev_attr_type.attr, | ||
75 | &dev_attr_power.attr, | ||
76 | NULL, | ||
77 | }; | ||
78 | |||
79 | static struct attribute_group dev_attr_group = { | ||
80 | .attrs = dev_attrs, | ||
81 | }; | ||
82 | |||
83 | static const struct attribute_group *dev_attr_groups[] = { | ||
84 | &dev_attr_group, | ||
85 | NULL, | ||
86 | }; | ||
87 | |||
88 | This array of groups can then be associated with a device by setting the | ||
89 | group pointer in struct device before device_register() is invoked:: | ||
90 | |||
91 | dev->groups = dev_attr_groups; | ||
92 | device_register(dev); | ||
93 | |||
94 | The device_register() function will use the 'groups' pointer to create the | ||
95 | device attributes and the device_unregister() function will use this pointer | ||
96 | to remove the device attributes. | ||
97 | |||
98 | Word of warning: While the kernel allows device_create_file() and | ||
99 | device_remove_file() to be called on a device at any time, userspace has | ||
100 | strict expectations on when attributes get created. When a new device is | ||
101 | registered in the kernel, a uevent is generated to notify userspace (like | ||
102 | udev) that a new device is available. If attributes are added after the | ||
103 | device is registered, then userspace won't get notified and userspace will | ||
104 | not know about the new attributes. | ||
105 | |||
106 | This is important for device driver that need to publish additional | ||
107 | attributes for a device at driver probe time. If the device driver simply | ||
108 | calls device_create_file() on the device structure passed to it, then | ||
109 | userspace will never be notified of the new attributes. | ||
diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst new file mode 100644 index 000000000000..4ac99122b5f1 --- /dev/null +++ b/Documentation/driver-api/driver-model/devres.rst | |||
@@ -0,0 +1,414 @@ | |||
1 | ================================ | ||
2 | Devres - Managed Device Resource | ||
3 | ================================ | ||
4 | |||
5 | Tejun Heo <teheo@suse.de> | ||
6 | |||
7 | First draft 10 January 2007 | ||
8 | |||
9 | .. contents | ||
10 | |||
11 | 1. Intro : Huh? Devres? | ||
12 | 2. Devres : Devres in a nutshell | ||
13 | 3. Devres Group : Group devres'es and release them together | ||
14 | 4. Details : Life time rules, calling context, ... | ||
15 | 5. Overhead : How much do we have to pay for this? | ||
16 | 6. List of managed interfaces: Currently implemented managed interfaces | ||
17 | |||
18 | |||
19 | 1. Intro | ||
20 | -------- | ||
21 | |||
22 | devres came up while trying to convert libata to use iomap. Each | ||
23 | iomapped address should be kept and unmapped on driver detach. For | ||
24 | example, a plain SFF ATA controller (that is, good old PCI IDE) in | ||
25 | native mode makes use of 5 PCI BARs and all of them should be | ||
26 | maintained. | ||
27 | |||
28 | As with many other device drivers, libata low level drivers have | ||
29 | sufficient bugs in ->remove and ->probe failure path. Well, yes, | ||
30 | that's probably because libata low level driver developers are lazy | ||
31 | bunch, but aren't all low level driver developers? After spending a | ||
32 | day fiddling with braindamaged hardware with no document or | ||
33 | braindamaged document, if it's finally working, well, it's working. | ||
34 | |||
35 | For one reason or another, low level drivers don't receive as much | ||
36 | attention or testing as core code, and bugs on driver detach or | ||
37 | initialization failure don't happen often enough to be noticeable. | ||
38 | Init failure path is worse because it's much less travelled while | ||
39 | needs to handle multiple entry points. | ||
40 | |||
41 | So, many low level drivers end up leaking resources on driver detach | ||
42 | and having half broken failure path implementation in ->probe() which | ||
43 | would leak resources or even cause oops when failure occurs. iomap | ||
44 | adds more to this mix. So do msi and msix. | ||
45 | |||
46 | |||
47 | 2. Devres | ||
48 | --------- | ||
49 | |||
50 | devres is basically linked list of arbitrarily sized memory areas | ||
51 | associated with a struct device. Each devres entry is associated with | ||
52 | a release function. A devres can be released in several ways. No | ||
53 | matter what, all devres entries are released on driver detach. On | ||
54 | release, the associated release function is invoked and then the | ||
55 | devres entry is freed. | ||
56 | |||
57 | Managed interface is created for resources commonly used by device | ||
58 | drivers using devres. For example, coherent DMA memory is acquired | ||
59 | using dma_alloc_coherent(). The managed version is called | ||
60 | dmam_alloc_coherent(). It is identical to dma_alloc_coherent() except | ||
61 | for the DMA memory allocated using it is managed and will be | ||
62 | automatically released on driver detach. Implementation looks like | ||
63 | the following:: | ||
64 | |||
65 | struct dma_devres { | ||
66 | size_t size; | ||
67 | void *vaddr; | ||
68 | dma_addr_t dma_handle; | ||
69 | }; | ||
70 | |||
71 | static void dmam_coherent_release(struct device *dev, void *res) | ||
72 | { | ||
73 | struct dma_devres *this = res; | ||
74 | |||
75 | dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle); | ||
76 | } | ||
77 | |||
78 | dmam_alloc_coherent(dev, size, dma_handle, gfp) | ||
79 | { | ||
80 | struct dma_devres *dr; | ||
81 | void *vaddr; | ||
82 | |||
83 | dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp); | ||
84 | ... | ||
85 | |||
86 | /* alloc DMA memory as usual */ | ||
87 | vaddr = dma_alloc_coherent(...); | ||
88 | ... | ||
89 | |||
90 | /* record size, vaddr, dma_handle in dr */ | ||
91 | dr->vaddr = vaddr; | ||
92 | ... | ||
93 | |||
94 | devres_add(dev, dr); | ||
95 | |||
96 | return vaddr; | ||
97 | } | ||
98 | |||
99 | If a driver uses dmam_alloc_coherent(), the area is guaranteed to be | ||
100 | freed whether initialization fails half-way or the device gets | ||
101 | detached. If most resources are acquired using managed interface, a | ||
102 | driver can have much simpler init and exit code. Init path basically | ||
103 | looks like the following:: | ||
104 | |||
105 | my_init_one() | ||
106 | { | ||
107 | struct mydev *d; | ||
108 | |||
109 | d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); | ||
110 | if (!d) | ||
111 | return -ENOMEM; | ||
112 | |||
113 | d->ring = dmam_alloc_coherent(...); | ||
114 | if (!d->ring) | ||
115 | return -ENOMEM; | ||
116 | |||
117 | if (check something) | ||
118 | return -EINVAL; | ||
119 | ... | ||
120 | |||
121 | return register_to_upper_layer(d); | ||
122 | } | ||
123 | |||
124 | And exit path:: | ||
125 | |||
126 | my_remove_one() | ||
127 | { | ||
128 | unregister_from_upper_layer(d); | ||
129 | shutdown_my_hardware(); | ||
130 | } | ||
131 | |||
132 | As shown above, low level drivers can be simplified a lot by using | ||
133 | devres. Complexity is shifted from less maintained low level drivers | ||
134 | to better maintained higher layer. Also, as init failure path is | ||
135 | shared with exit path, both can get more testing. | ||
136 | |||
137 | Note though that when converting current calls or assignments to | ||
138 | managed devm_* versions it is up to you to check if internal operations | ||
139 | like allocating memory, have failed. Managed resources pertains to the | ||
140 | freeing of these resources *only* - all other checks needed are still | ||
141 | on you. In some cases this may mean introducing checks that were not | ||
142 | necessary before moving to the managed devm_* calls. | ||
143 | |||
144 | |||
145 | 3. Devres group | ||
146 | --------------- | ||
147 | |||
148 | Devres entries can be grouped using devres group. When a group is | ||
149 | released, all contained normal devres entries and properly nested | ||
150 | groups are released. One usage is to rollback series of acquired | ||
151 | resources on failure. For example:: | ||
152 | |||
153 | if (!devres_open_group(dev, NULL, GFP_KERNEL)) | ||
154 | return -ENOMEM; | ||
155 | |||
156 | acquire A; | ||
157 | if (failed) | ||
158 | goto err; | ||
159 | |||
160 | acquire B; | ||
161 | if (failed) | ||
162 | goto err; | ||
163 | ... | ||
164 | |||
165 | devres_remove_group(dev, NULL); | ||
166 | return 0; | ||
167 | |||
168 | err: | ||
169 | devres_release_group(dev, NULL); | ||
170 | return err_code; | ||
171 | |||
172 | As resource acquisition failure usually means probe failure, constructs | ||
173 | like above are usually useful in midlayer driver (e.g. libata core | ||
174 | layer) where interface function shouldn't have side effect on failure. | ||
175 | For LLDs, just returning error code suffices in most cases. | ||
176 | |||
177 | Each group is identified by `void *id`. It can either be explicitly | ||
178 | specified by @id argument to devres_open_group() or automatically | ||
179 | created by passing NULL as @id as in the above example. In both | ||
180 | cases, devres_open_group() returns the group's id. The returned id | ||
181 | can be passed to other devres functions to select the target group. | ||
182 | If NULL is given to those functions, the latest open group is | ||
183 | selected. | ||
184 | |||
185 | For example, you can do something like the following:: | ||
186 | |||
187 | int my_midlayer_create_something() | ||
188 | { | ||
189 | if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL)) | ||
190 | return -ENOMEM; | ||
191 | |||
192 | ... | ||
193 | |||
194 | devres_close_group(dev, my_midlayer_create_something); | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | void my_midlayer_destroy_something() | ||
199 | { | ||
200 | devres_release_group(dev, my_midlayer_create_something); | ||
201 | } | ||
202 | |||
203 | |||
204 | 4. Details | ||
205 | ---------- | ||
206 | |||
207 | Lifetime of a devres entry begins on devres allocation and finishes | ||
208 | when it is released or destroyed (removed and freed) - no reference | ||
209 | counting. | ||
210 | |||
211 | devres core guarantees atomicity to all basic devres operations and | ||
212 | has support for single-instance devres types (atomic | ||
213 | lookup-and-add-if-not-found). Other than that, synchronizing | ||
214 | concurrent accesses to allocated devres data is caller's | ||
215 | responsibility. This is usually non-issue because bus ops and | ||
216 | resource allocations already do the job. | ||
217 | |||
218 | For an example of single-instance devres type, read pcim_iomap_table() | ||
219 | in lib/devres.c. | ||
220 | |||
221 | All devres interface functions can be called without context if the | ||
222 | right gfp mask is given. | ||
223 | |||
224 | |||
225 | 5. Overhead | ||
226 | ----------- | ||
227 | |||
228 | Each devres bookkeeping info is allocated together with requested data | ||
229 | area. With debug option turned off, bookkeeping info occupies 16 | ||
230 | bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded | ||
231 | up to ull alignment). If singly linked list is used, it can be | ||
232 | reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit). | ||
233 | |||
234 | Each devres group occupies 8 pointers. It can be reduced to 6 if | ||
235 | singly linked list is used. | ||
236 | |||
237 | Memory space overhead on ahci controller with two ports is between 300 | ||
238 | and 400 bytes on 32bit machine after naive conversion (we can | ||
239 | certainly invest a bit more effort into libata core layer). | ||
240 | |||
241 | |||
242 | 6. List of managed interfaces | ||
243 | ----------------------------- | ||
244 | |||
245 | CLOCK | ||
246 | devm_clk_get() | ||
247 | devm_clk_get_optional() | ||
248 | devm_clk_put() | ||
249 | devm_clk_hw_register() | ||
250 | devm_of_clk_add_hw_provider() | ||
251 | devm_clk_hw_register_clkdev() | ||
252 | |||
253 | DMA | ||
254 | dmaenginem_async_device_register() | ||
255 | dmam_alloc_coherent() | ||
256 | dmam_alloc_attrs() | ||
257 | dmam_free_coherent() | ||
258 | dmam_pool_create() | ||
259 | dmam_pool_destroy() | ||
260 | |||
261 | DRM | ||
262 | devm_drm_dev_init() | ||
263 | |||
264 | GPIO | ||
265 | devm_gpiod_get() | ||
266 | devm_gpiod_get_index() | ||
267 | devm_gpiod_get_index_optional() | ||
268 | devm_gpiod_get_optional() | ||
269 | devm_gpiod_put() | ||
270 | devm_gpiod_unhinge() | ||
271 | devm_gpiochip_add_data() | ||
272 | devm_gpio_request() | ||
273 | devm_gpio_request_one() | ||
274 | devm_gpio_free() | ||
275 | |||
276 | I2C | ||
277 | devm_i2c_new_dummy_device() | ||
278 | |||
279 | IIO | ||
280 | devm_iio_device_alloc() | ||
281 | devm_iio_device_free() | ||
282 | devm_iio_device_register() | ||
283 | devm_iio_device_unregister() | ||
284 | devm_iio_kfifo_allocate() | ||
285 | devm_iio_kfifo_free() | ||
286 | devm_iio_triggered_buffer_setup() | ||
287 | devm_iio_triggered_buffer_cleanup() | ||
288 | devm_iio_trigger_alloc() | ||
289 | devm_iio_trigger_free() | ||
290 | devm_iio_trigger_register() | ||
291 | devm_iio_trigger_unregister() | ||
292 | devm_iio_channel_get() | ||
293 | devm_iio_channel_release() | ||
294 | devm_iio_channel_get_all() | ||
295 | devm_iio_channel_release_all() | ||
296 | |||
297 | INPUT | ||
298 | devm_input_allocate_device() | ||
299 | |||
300 | IO region | ||
301 | devm_release_mem_region() | ||
302 | devm_release_region() | ||
303 | devm_release_resource() | ||
304 | devm_request_mem_region() | ||
305 | devm_request_region() | ||
306 | devm_request_resource() | ||
307 | |||
308 | IOMAP | ||
309 | devm_ioport_map() | ||
310 | devm_ioport_unmap() | ||
311 | devm_ioremap() | ||
312 | devm_ioremap_nocache() | ||
313 | devm_ioremap_wc() | ||
314 | devm_ioremap_resource() : checks resource, requests memory region, ioremaps | ||
315 | devm_iounmap() | ||
316 | pcim_iomap() | ||
317 | pcim_iomap_regions() : do request_region() and iomap() on multiple BARs | ||
318 | pcim_iomap_table() : array of mapped addresses indexed by BAR | ||
319 | pcim_iounmap() | ||
320 | |||
321 | IRQ | ||
322 | devm_free_irq() | ||
323 | devm_request_any_context_irq() | ||
324 | devm_request_irq() | ||
325 | devm_request_threaded_irq() | ||
326 | devm_irq_alloc_descs() | ||
327 | devm_irq_alloc_desc() | ||
328 | devm_irq_alloc_desc_at() | ||
329 | devm_irq_alloc_desc_from() | ||
330 | devm_irq_alloc_descs_from() | ||
331 | devm_irq_alloc_generic_chip() | ||
332 | devm_irq_setup_generic_chip() | ||
333 | devm_irq_sim_init() | ||
334 | |||
335 | LED | ||
336 | devm_led_classdev_register() | ||
337 | devm_led_classdev_unregister() | ||
338 | |||
339 | MDIO | ||
340 | devm_mdiobus_alloc() | ||
341 | devm_mdiobus_alloc_size() | ||
342 | devm_mdiobus_free() | ||
343 | |||
344 | MEM | ||
345 | devm_free_pages() | ||
346 | devm_get_free_pages() | ||
347 | devm_kasprintf() | ||
348 | devm_kcalloc() | ||
349 | devm_kfree() | ||
350 | devm_kmalloc() | ||
351 | devm_kmalloc_array() | ||
352 | devm_kmemdup() | ||
353 | devm_kstrdup() | ||
354 | devm_kvasprintf() | ||
355 | devm_kzalloc() | ||
356 | |||
357 | MFD | ||
358 | devm_mfd_add_devices() | ||
359 | |||
360 | MUX | ||
361 | devm_mux_chip_alloc() | ||
362 | devm_mux_chip_register() | ||
363 | devm_mux_control_get() | ||
364 | |||
365 | PER-CPU MEM | ||
366 | devm_alloc_percpu() | ||
367 | devm_free_percpu() | ||
368 | |||
369 | PCI | ||
370 | devm_pci_alloc_host_bridge() : managed PCI host bridge allocation | ||
371 | devm_pci_remap_cfgspace() : ioremap PCI configuration space | ||
372 | devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource | ||
373 | pcim_enable_device() : after success, all PCI ops become managed | ||
374 | pcim_pin_device() : keep PCI device enabled after release | ||
375 | |||
376 | PHY | ||
377 | devm_usb_get_phy() | ||
378 | devm_usb_put_phy() | ||
379 | |||
380 | PINCTRL | ||
381 | devm_pinctrl_get() | ||
382 | devm_pinctrl_put() | ||
383 | devm_pinctrl_register() | ||
384 | devm_pinctrl_unregister() | ||
385 | |||
386 | POWER | ||
387 | devm_reboot_mode_register() | ||
388 | devm_reboot_mode_unregister() | ||
389 | |||
390 | PWM | ||
391 | devm_pwm_get() | ||
392 | devm_pwm_put() | ||
393 | |||
394 | REGULATOR | ||
395 | devm_regulator_bulk_get() | ||
396 | devm_regulator_get() | ||
397 | devm_regulator_put() | ||
398 | devm_regulator_register() | ||
399 | |||
400 | RESET | ||
401 | devm_reset_control_get() | ||
402 | devm_reset_controller_register() | ||
403 | |||
404 | SERDEV | ||
405 | devm_serdev_device_open() | ||
406 | |||
407 | SLAVE DMA ENGINE | ||
408 | devm_acpi_dma_controller_register() | ||
409 | |||
410 | SPI | ||
411 | devm_spi_register_master() | ||
412 | |||
413 | WATCHDOG | ||
414 | devm_watchdog_register_device() | ||
diff --git a/Documentation/driver-api/driver-model/driver.rst b/Documentation/driver-api/driver-model/driver.rst new file mode 100644 index 000000000000..11d281506a04 --- /dev/null +++ b/Documentation/driver-api/driver-model/driver.rst | |||
@@ -0,0 +1,223 @@ | |||
1 | ============== | ||
2 | Device Drivers | ||
3 | ============== | ||
4 | |||
5 | See the kerneldoc for the struct device_driver. | ||
6 | |||
7 | |||
8 | Allocation | ||
9 | ~~~~~~~~~~ | ||
10 | |||
11 | Device drivers are statically allocated structures. Though there may | ||
12 | be multiple devices in a system that a driver supports, struct | ||
13 | device_driver represents the driver as a whole (not a particular | ||
14 | device instance). | ||
15 | |||
16 | Initialization | ||
17 | ~~~~~~~~~~~~~~ | ||
18 | |||
19 | The driver must initialize at least the name and bus fields. It should | ||
20 | also initialize the devclass field (when it arrives), so it may obtain | ||
21 | the proper linkage internally. It should also initialize as many of | ||
22 | the callbacks as possible, though each is optional. | ||
23 | |||
24 | Declaration | ||
25 | ~~~~~~~~~~~ | ||
26 | |||
27 | As stated above, struct device_driver objects are statically | ||
28 | allocated. Below is an example declaration of the eepro100 | ||
29 | driver. This declaration is hypothetical only; it relies on the driver | ||
30 | being converted completely to the new model:: | ||
31 | |||
32 | static struct device_driver eepro100_driver = { | ||
33 | .name = "eepro100", | ||
34 | .bus = &pci_bus_type, | ||
35 | |||
36 | .probe = eepro100_probe, | ||
37 | .remove = eepro100_remove, | ||
38 | .suspend = eepro100_suspend, | ||
39 | .resume = eepro100_resume, | ||
40 | }; | ||
41 | |||
42 | Most drivers will not be able to be converted completely to the new | ||
43 | model because the bus they belong to has a bus-specific structure with | ||
44 | bus-specific fields that cannot be generalized. | ||
45 | |||
46 | The most common example of this are device ID structures. A driver | ||
47 | typically defines an array of device IDs that it supports. The format | ||
48 | of these structures and the semantics for comparing device IDs are | ||
49 | completely bus-specific. Defining them as bus-specific entities would | ||
50 | sacrifice type-safety, so we keep bus-specific structures around. | ||
51 | |||
52 | Bus-specific drivers should include a generic struct device_driver in | ||
53 | the definition of the bus-specific driver. Like this:: | ||
54 | |||
55 | struct pci_driver { | ||
56 | const struct pci_device_id *id_table; | ||
57 | struct device_driver driver; | ||
58 | }; | ||
59 | |||
60 | A definition that included bus-specific fields would look like | ||
61 | (using the eepro100 driver again):: | ||
62 | |||
63 | static struct pci_driver eepro100_driver = { | ||
64 | .id_table = eepro100_pci_tbl, | ||
65 | .driver = { | ||
66 | .name = "eepro100", | ||
67 | .bus = &pci_bus_type, | ||
68 | .probe = eepro100_probe, | ||
69 | .remove = eepro100_remove, | ||
70 | .suspend = eepro100_suspend, | ||
71 | .resume = eepro100_resume, | ||
72 | }, | ||
73 | }; | ||
74 | |||
75 | Some may find the syntax of embedded struct initialization awkward or | ||
76 | even a bit ugly. So far, it's the best way we've found to do what we want... | ||
77 | |||
78 | Registration | ||
79 | ~~~~~~~~~~~~ | ||
80 | |||
81 | :: | ||
82 | |||
83 | int driver_register(struct device_driver *drv); | ||
84 | |||
85 | The driver registers the structure on startup. For drivers that have | ||
86 | no bus-specific fields (i.e. don't have a bus-specific driver | ||
87 | structure), they would use driver_register and pass a pointer to their | ||
88 | struct device_driver object. | ||
89 | |||
90 | Most drivers, however, will have a bus-specific structure and will | ||
91 | need to register with the bus using something like pci_driver_register. | ||
92 | |||
93 | It is important that drivers register their driver structure as early as | ||
94 | possible. Registration with the core initializes several fields in the | ||
95 | struct device_driver object, including the reference count and the | ||
96 | lock. These fields are assumed to be valid at all times and may be | ||
97 | used by the device model core or the bus driver. | ||
98 | |||
99 | |||
100 | Transition Bus Drivers | ||
101 | ~~~~~~~~~~~~~~~~~~~~~~ | ||
102 | |||
103 | By defining wrapper functions, the transition to the new model can be | ||
104 | made easier. Drivers can ignore the generic structure altogether and | ||
105 | let the bus wrapper fill in the fields. For the callbacks, the bus can | ||
106 | define generic callbacks that forward the call to the bus-specific | ||
107 | callbacks of the drivers. | ||
108 | |||
109 | This solution is intended to be only temporary. In order to get class | ||
110 | information in the driver, the drivers must be modified anyway. Since | ||
111 | converting drivers to the new model should reduce some infrastructural | ||
112 | complexity and code size, it is recommended that they are converted as | ||
113 | class information is added. | ||
114 | |||
115 | Access | ||
116 | ~~~~~~ | ||
117 | |||
118 | Once the object has been registered, it may access the common fields of | ||
119 | the object, like the lock and the list of devices:: | ||
120 | |||
121 | int driver_for_each_dev(struct device_driver *drv, void *data, | ||
122 | int (*callback)(struct device *dev, void *data)); | ||
123 | |||
124 | The devices field is a list of all the devices that have been bound to | ||
125 | the driver. The LDM core provides a helper function to operate on all | ||
126 | the devices a driver controls. This helper locks the driver on each | ||
127 | node access, and does proper reference counting on each device as it | ||
128 | accesses it. | ||
129 | |||
130 | |||
131 | sysfs | ||
132 | ~~~~~ | ||
133 | |||
134 | When a driver is registered, a sysfs directory is created in its | ||
135 | bus's directory. In this directory, the driver can export an interface | ||
136 | to userspace to control operation of the driver on a global basis; | ||
137 | e.g. toggling debugging output in the driver. | ||
138 | |||
139 | A future feature of this directory will be a 'devices' directory. This | ||
140 | directory will contain symlinks to the directories of devices it | ||
141 | supports. | ||
142 | |||
143 | |||
144 | |||
145 | Callbacks | ||
146 | ~~~~~~~~~ | ||
147 | |||
148 | :: | ||
149 | |||
150 | int (*probe) (struct device *dev); | ||
151 | |||
152 | The probe() entry is called in task context, with the bus's rwsem locked | ||
153 | and the driver partially bound to the device. Drivers commonly use | ||
154 | container_of() to convert "dev" to a bus-specific type, both in probe() | ||
155 | and other routines. That type often provides device resource data, such | ||
156 | as pci_dev.resource[] or platform_device.resources, which is used in | ||
157 | addition to dev->platform_data to initialize the driver. | ||
158 | |||
159 | This callback holds the driver-specific logic to bind the driver to a | ||
160 | given device. That includes verifying that the device is present, that | ||
161 | it's a version the driver can handle, that driver data structures can | ||
162 | be allocated and initialized, and that any hardware can be initialized. | ||
163 | Drivers often store a pointer to their state with dev_set_drvdata(). | ||
164 | When the driver has successfully bound itself to that device, then probe() | ||
165 | returns zero and the driver model code will finish its part of binding | ||
166 | the driver to that device. | ||
167 | |||
168 | A driver's probe() may return a negative errno value to indicate that | ||
169 | the driver did not bind to this device, in which case it should have | ||
170 | released all resources it allocated:: | ||
171 | |||
172 | int (*remove) (struct device *dev); | ||
173 | |||
174 | remove is called to unbind a driver from a device. This may be | ||
175 | called if a device is physically removed from the system, if the | ||
176 | driver module is being unloaded, during a reboot sequence, or | ||
177 | in other cases. | ||
178 | |||
179 | It is up to the driver to determine if the device is present or | ||
180 | not. It should free any resources allocated specifically for the | ||
181 | device; i.e. anything in the device's driver_data field. | ||
182 | |||
183 | If the device is still present, it should quiesce the device and place | ||
184 | it into a supported low-power state:: | ||
185 | |||
186 | int (*suspend) (struct device *dev, pm_message_t state); | ||
187 | |||
188 | suspend is called to put the device in a low power state:: | ||
189 | |||
190 | int (*resume) (struct device *dev); | ||
191 | |||
192 | Resume is used to bring a device back from a low power state. | ||
193 | |||
194 | |||
195 | Attributes | ||
196 | ~~~~~~~~~~ | ||
197 | |||
198 | :: | ||
199 | |||
200 | struct driver_attribute { | ||
201 | struct attribute attr; | ||
202 | ssize_t (*show)(struct device_driver *driver, char *buf); | ||
203 | ssize_t (*store)(struct device_driver *, const char *buf, size_t count); | ||
204 | }; | ||
205 | |||
206 | Device drivers can export attributes via their sysfs directories. | ||
207 | Drivers can declare attributes using a DRIVER_ATTR_RW and DRIVER_ATTR_RO | ||
208 | macro that works identically to the DEVICE_ATTR_RW and DEVICE_ATTR_RO | ||
209 | macros. | ||
210 | |||
211 | Example:: | ||
212 | |||
213 | DRIVER_ATTR_RW(debug); | ||
214 | |||
215 | This is equivalent to declaring:: | ||
216 | |||
217 | struct driver_attribute driver_attr_debug; | ||
218 | |||
219 | This can then be used to add and remove the attribute from the | ||
220 | driver's directory using:: | ||
221 | |||
222 | int driver_create_file(struct device_driver *, const struct driver_attribute *); | ||
223 | void driver_remove_file(struct device_driver *, const struct driver_attribute *); | ||
diff --git a/Documentation/driver-api/driver-model/index.rst b/Documentation/driver-api/driver-model/index.rst new file mode 100644 index 000000000000..755016422269 --- /dev/null +++ b/Documentation/driver-api/driver-model/index.rst | |||
@@ -0,0 +1,24 @@ | |||
1 | ============ | ||
2 | Driver Model | ||
3 | ============ | ||
4 | |||
5 | .. toctree:: | ||
6 | :maxdepth: 1 | ||
7 | |||
8 | binding | ||
9 | bus | ||
10 | class | ||
11 | design-patterns | ||
12 | device | ||
13 | devres | ||
14 | driver | ||
15 | overview | ||
16 | platform | ||
17 | porting | ||
18 | |||
19 | .. only:: subproject and html | ||
20 | |||
21 | Indices | ||
22 | ======= | ||
23 | |||
24 | * :ref:`genindex` | ||
diff --git a/Documentation/driver-api/driver-model/overview.rst b/Documentation/driver-api/driver-model/overview.rst new file mode 100644 index 000000000000..d4d1e9b40e0c --- /dev/null +++ b/Documentation/driver-api/driver-model/overview.rst | |||
@@ -0,0 +1,124 @@ | |||
1 | ============================= | ||
2 | The Linux Kernel Device Model | ||
3 | ============================= | ||
4 | |||
5 | Patrick Mochel <mochel@digitalimplant.org> | ||
6 | |||
7 | Drafted 26 August 2002 | ||
8 | Updated 31 January 2006 | ||
9 | |||
10 | |||
11 | Overview | ||
12 | ~~~~~~~~ | ||
13 | |||
14 | The Linux Kernel Driver Model is a unification of all the disparate driver | ||
15 | models that were previously used in the kernel. It is intended to augment the | ||
16 | bus-specific drivers for bridges and devices by consolidating a set of data | ||
17 | and operations into globally accessible data structures. | ||
18 | |||
19 | Traditional driver models implemented some sort of tree-like structure | ||
20 | (sometimes just a list) for the devices they control. There wasn't any | ||
21 | uniformity across the different bus types. | ||
22 | |||
23 | The current driver model provides a common, uniform data model for describing | ||
24 | a bus and the devices that can appear under the bus. The unified bus | ||
25 | model includes a set of common attributes which all busses carry, and a set | ||
26 | of common callbacks, such as device discovery during bus probing, bus | ||
27 | shutdown, bus power management, etc. | ||
28 | |||
29 | The common device and bridge interface reflects the goals of the modern | ||
30 | computer: namely the ability to do seamless device "plug and play", power | ||
31 | management, and hot plug. In particular, the model dictated by Intel and | ||
32 | Microsoft (namely ACPI) ensures that almost every device on almost any bus | ||
33 | on an x86-compatible system can work within this paradigm. Of course, | ||
34 | not every bus is able to support all such operations, although most | ||
35 | buses support most of those operations. | ||
36 | |||
37 | |||
38 | Downstream Access | ||
39 | ~~~~~~~~~~~~~~~~~ | ||
40 | |||
41 | Common data fields have been moved out of individual bus layers into a common | ||
42 | data structure. These fields must still be accessed by the bus layers, | ||
43 | and sometimes by the device-specific drivers. | ||
44 | |||
45 | Other bus layers are encouraged to do what has been done for the PCI layer. | ||
46 | struct pci_dev now looks like this:: | ||
47 | |||
48 | struct pci_dev { | ||
49 | ... | ||
50 | |||
51 | struct device dev; /* Generic device interface */ | ||
52 | ... | ||
53 | }; | ||
54 | |||
55 | Note first that the struct device dev within the struct pci_dev is | ||
56 | statically allocated. This means only one allocation on device discovery. | ||
57 | |||
58 | Note also that that struct device dev is not necessarily defined at the | ||
59 | front of the pci_dev structure. This is to make people think about what | ||
60 | they're doing when switching between the bus driver and the global driver, | ||
61 | and to discourage meaningless and incorrect casts between the two. | ||
62 | |||
63 | The PCI bus layer freely accesses the fields of struct device. It knows about | ||
64 | the structure of struct pci_dev, and it should know the structure of struct | ||
65 | device. Individual PCI device drivers that have been converted to the current | ||
66 | driver model generally do not and should not touch the fields of struct device, | ||
67 | unless there is a compelling reason to do so. | ||
68 | |||
69 | The above abstraction prevents unnecessary pain during transitional phases. | ||
70 | If it were not done this way, then when a field was renamed or removed, every | ||
71 | downstream driver would break. On the other hand, if only the bus layer | ||
72 | (and not the device layer) accesses the struct device, it is only the bus | ||
73 | layer that needs to change. | ||
74 | |||
75 | |||
76 | User Interface | ||
77 | ~~~~~~~~~~~~~~ | ||
78 | |||
79 | By virtue of having a complete hierarchical view of all the devices in the | ||
80 | system, exporting a complete hierarchical view to userspace becomes relatively | ||
81 | easy. This has been accomplished by implementing a special purpose virtual | ||
82 | file system named sysfs. | ||
83 | |||
84 | Almost all mainstream Linux distros mount this filesystem automatically; you | ||
85 | can see some variation of the following in the output of the "mount" command:: | ||
86 | |||
87 | $ mount | ||
88 | ... | ||
89 | none on /sys type sysfs (rw,noexec,nosuid,nodev) | ||
90 | ... | ||
91 | $ | ||
92 | |||
93 | The auto-mounting of sysfs is typically accomplished by an entry similar to | ||
94 | the following in the /etc/fstab file:: | ||
95 | |||
96 | none /sys sysfs defaults 0 0 | ||
97 | |||
98 | or something similar in the /lib/init/fstab file on Debian-based systems:: | ||
99 | |||
100 | none /sys sysfs nodev,noexec,nosuid 0 0 | ||
101 | |||
102 | If sysfs is not automatically mounted, you can always do it manually with:: | ||
103 | |||
104 | # mount -t sysfs sysfs /sys | ||
105 | |||
106 | Whenever a device is inserted into the tree, a directory is created for it. | ||
107 | This directory may be populated at each layer of discovery - the global layer, | ||
108 | the bus layer, or the device layer. | ||
109 | |||
110 | The global layer currently creates two files - 'name' and 'power'. The | ||
111 | former only reports the name of the device. The latter reports the | ||
112 | current power state of the device. It will also be used to set the current | ||
113 | power state. | ||
114 | |||
115 | The bus layer may also create files for the devices it finds while probing the | ||
116 | bus. For example, the PCI layer currently creates 'irq' and 'resource' files | ||
117 | for each PCI device. | ||
118 | |||
119 | A device-specific driver may also export files in its directory to expose | ||
120 | device-specific data or tunable interfaces. | ||
121 | |||
122 | More information about the sysfs directory layout can be found in | ||
123 | the other documents in this directory and in the file | ||
124 | Documentation/filesystems/sysfs.txt. | ||
diff --git a/Documentation/driver-api/driver-model/platform.rst b/Documentation/driver-api/driver-model/platform.rst new file mode 100644 index 000000000000..334dd4071ae4 --- /dev/null +++ b/Documentation/driver-api/driver-model/platform.rst | |||
@@ -0,0 +1,246 @@ | |||
1 | ============================ | ||
2 | Platform Devices and Drivers | ||
3 | ============================ | ||
4 | |||
5 | See <linux/platform_device.h> for the driver model interface to the | ||
6 | platform bus: platform_device, and platform_driver. This pseudo-bus | ||
7 | is used to connect devices on busses with minimal infrastructure, | ||
8 | like those used to integrate peripherals on many system-on-chip | ||
9 | processors, or some "legacy" PC interconnects; as opposed to large | ||
10 | formally specified ones like PCI or USB. | ||
11 | |||
12 | |||
13 | Platform devices | ||
14 | ~~~~~~~~~~~~~~~~ | ||
15 | Platform devices are devices that typically appear as autonomous | ||
16 | entities in the system. This includes legacy port-based devices and | ||
17 | host bridges to peripheral buses, and most controllers integrated | ||
18 | into system-on-chip platforms. What they usually have in common | ||
19 | is direct addressing from a CPU bus. Rarely, a platform_device will | ||
20 | be connected through a segment of some other kind of bus; but its | ||
21 | registers will still be directly addressable. | ||
22 | |||
23 | Platform devices are given a name, used in driver binding, and a | ||
24 | list of resources such as addresses and IRQs:: | ||
25 | |||
26 | struct platform_device { | ||
27 | const char *name; | ||
28 | u32 id; | ||
29 | struct device dev; | ||
30 | u32 num_resources; | ||
31 | struct resource *resource; | ||
32 | }; | ||
33 | |||
34 | |||
35 | Platform drivers | ||
36 | ~~~~~~~~~~~~~~~~ | ||
37 | Platform drivers follow the standard driver model convention, where | ||
38 | discovery/enumeration is handled outside the drivers, and drivers | ||
39 | provide probe() and remove() methods. They support power management | ||
40 | and shutdown notifications using the standard conventions:: | ||
41 | |||
42 | struct platform_driver { | ||
43 | int (*probe)(struct platform_device *); | ||
44 | int (*remove)(struct platform_device *); | ||
45 | void (*shutdown)(struct platform_device *); | ||
46 | int (*suspend)(struct platform_device *, pm_message_t state); | ||
47 | int (*suspend_late)(struct platform_device *, pm_message_t state); | ||
48 | int (*resume_early)(struct platform_device *); | ||
49 | int (*resume)(struct platform_device *); | ||
50 | struct device_driver driver; | ||
51 | }; | ||
52 | |||
53 | Note that probe() should in general verify that the specified device hardware | ||
54 | actually exists; sometimes platform setup code can't be sure. The probing | ||
55 | can use device resources, including clocks, and device platform_data. | ||
56 | |||
57 | Platform drivers register themselves the normal way:: | ||
58 | |||
59 | int platform_driver_register(struct platform_driver *drv); | ||
60 | |||
61 | Or, in common situations where the device is known not to be hot-pluggable, | ||
62 | the probe() routine can live in an init section to reduce the driver's | ||
63 | runtime memory footprint:: | ||
64 | |||
65 | int platform_driver_probe(struct platform_driver *drv, | ||
66 | int (*probe)(struct platform_device *)) | ||
67 | |||
68 | Kernel modules can be composed of several platform drivers. The platform core | ||
69 | provides helpers to register and unregister an array of drivers:: | ||
70 | |||
71 | int __platform_register_drivers(struct platform_driver * const *drivers, | ||
72 | unsigned int count, struct module *owner); | ||
73 | void platform_unregister_drivers(struct platform_driver * const *drivers, | ||
74 | unsigned int count); | ||
75 | |||
76 | If one of the drivers fails to register, all drivers registered up to that | ||
77 | point will be unregistered in reverse order. Note that there is a convenience | ||
78 | macro that passes THIS_MODULE as owner parameter:: | ||
79 | |||
80 | #define platform_register_drivers(drivers, count) | ||
81 | |||
82 | |||
83 | Device Enumeration | ||
84 | ~~~~~~~~~~~~~~~~~~ | ||
85 | As a rule, platform specific (and often board-specific) setup code will | ||
86 | register platform devices:: | ||
87 | |||
88 | int platform_device_register(struct platform_device *pdev); | ||
89 | |||
90 | int platform_add_devices(struct platform_device **pdevs, int ndev); | ||
91 | |||
92 | The general rule is to register only those devices that actually exist, | ||
93 | but in some cases extra devices might be registered. For example, a kernel | ||
94 | might be configured to work with an external network adapter that might not | ||
95 | be populated on all boards, or likewise to work with an integrated controller | ||
96 | that some boards might not hook up to any peripherals. | ||
97 | |||
98 | In some cases, boot firmware will export tables describing the devices | ||
99 | that are populated on a given board. Without such tables, often the | ||
100 | only way for system setup code to set up the correct devices is to build | ||
101 | a kernel for a specific target board. Such board-specific kernels are | ||
102 | common with embedded and custom systems development. | ||
103 | |||
104 | In many cases, the memory and IRQ resources associated with the platform | ||
105 | device are not enough to let the device's driver work. Board setup code | ||
106 | will often provide additional information using the device's platform_data | ||
107 | field to hold additional information. | ||
108 | |||
109 | Embedded systems frequently need one or more clocks for platform devices, | ||
110 | which are normally kept off until they're actively needed (to save power). | ||
111 | System setup also associates those clocks with the device, so that that | ||
112 | calls to clk_get(&pdev->dev, clock_name) return them as needed. | ||
113 | |||
114 | |||
115 | Legacy Drivers: Device Probing | ||
116 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
117 | Some drivers are not fully converted to the driver model, because they take | ||
118 | on a non-driver role: the driver registers its platform device, rather than | ||
119 | leaving that for system infrastructure. Such drivers can't be hotplugged | ||
120 | or coldplugged, since those mechanisms require device creation to be in a | ||
121 | different system component than the driver. | ||
122 | |||
123 | The only "good" reason for this is to handle older system designs which, like | ||
124 | original IBM PCs, rely on error-prone "probe-the-hardware" models for hardware | ||
125 | configuration. Newer systems have largely abandoned that model, in favor of | ||
126 | bus-level support for dynamic configuration (PCI, USB), or device tables | ||
127 | provided by the boot firmware (e.g. PNPACPI on x86). There are too many | ||
128 | conflicting options about what might be where, and even educated guesses by | ||
129 | an operating system will be wrong often enough to make trouble. | ||
130 | |||
131 | This style of driver is discouraged. If you're updating such a driver, | ||
132 | please try to move the device enumeration to a more appropriate location, | ||
133 | outside the driver. This will usually be cleanup, since such drivers | ||
134 | tend to already have "normal" modes, such as ones using device nodes that | ||
135 | were created by PNP or by platform device setup. | ||
136 | |||
137 | None the less, there are some APIs to support such legacy drivers. Avoid | ||
138 | using these calls except with such hotplug-deficient drivers:: | ||
139 | |||
140 | struct platform_device *platform_device_alloc( | ||
141 | const char *name, int id); | ||
142 | |||
143 | You can use platform_device_alloc() to dynamically allocate a device, which | ||
144 | you will then initialize with resources and platform_device_register(). | ||
145 | A better solution is usually:: | ||
146 | |||
147 | struct platform_device *platform_device_register_simple( | ||
148 | const char *name, int id, | ||
149 | struct resource *res, unsigned int nres); | ||
150 | |||
151 | You can use platform_device_register_simple() as a one-step call to allocate | ||
152 | and register a device. | ||
153 | |||
154 | |||
155 | Device Naming and Driver Binding | ||
156 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
157 | The platform_device.dev.bus_id is the canonical name for the devices. | ||
158 | It's built from two components: | ||
159 | |||
160 | * platform_device.name ... which is also used to for driver matching. | ||
161 | |||
162 | * platform_device.id ... the device instance number, or else "-1" | ||
163 | to indicate there's only one. | ||
164 | |||
165 | These are concatenated, so name/id "serial"/0 indicates bus_id "serial.0", and | ||
166 | "serial/3" indicates bus_id "serial.3"; both would use the platform_driver | ||
167 | named "serial". While "my_rtc"/-1 would be bus_id "my_rtc" (no instance id) | ||
168 | and use the platform_driver called "my_rtc". | ||
169 | |||
170 | Driver binding is performed automatically by the driver core, invoking | ||
171 | driver probe() after finding a match between device and driver. If the | ||
172 | probe() succeeds, the driver and device are bound as usual. There are | ||
173 | three different ways to find such a match: | ||
174 | |||
175 | - Whenever a device is registered, the drivers for that bus are | ||
176 | checked for matches. Platform devices should be registered very | ||
177 | early during system boot. | ||
178 | |||
179 | - When a driver is registered using platform_driver_register(), all | ||
180 | unbound devices on that bus are checked for matches. Drivers | ||
181 | usually register later during booting, or by module loading. | ||
182 | |||
183 | - Registering a driver using platform_driver_probe() works just like | ||
184 | using platform_driver_register(), except that the driver won't | ||
185 | be probed later if another device registers. (Which is OK, since | ||
186 | this interface is only for use with non-hotpluggable devices.) | ||
187 | |||
188 | |||
189 | Early Platform Devices and Drivers | ||
190 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
191 | The early platform interfaces provide platform data to platform device | ||
192 | drivers early on during the system boot. The code is built on top of the | ||
193 | early_param() command line parsing and can be executed very early on. | ||
194 | |||
195 | Example: "earlyprintk" class early serial console in 6 steps | ||
196 | |||
197 | 1. Registering early platform device data | ||
198 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
199 | The architecture code registers platform device data using the function | ||
200 | early_platform_add_devices(). In the case of early serial console this | ||
201 | should be hardware configuration for the serial port. Devices registered | ||
202 | at this point will later on be matched against early platform drivers. | ||
203 | |||
204 | 2. Parsing kernel command line | ||
205 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
206 | The architecture code calls parse_early_param() to parse the kernel | ||
207 | command line. This will execute all matching early_param() callbacks. | ||
208 | User specified early platform devices will be registered at this point. | ||
209 | For the early serial console case the user can specify port on the | ||
210 | kernel command line as "earlyprintk=serial.0" where "earlyprintk" is | ||
211 | the class string, "serial" is the name of the platform driver and | ||
212 | 0 is the platform device id. If the id is -1 then the dot and the | ||
213 | id can be omitted. | ||
214 | |||
215 | 3. Installing early platform drivers belonging to a certain class | ||
216 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
217 | The architecture code may optionally force registration of all early | ||
218 | platform drivers belonging to a certain class using the function | ||
219 | early_platform_driver_register_all(). User specified devices from | ||
220 | step 2 have priority over these. This step is omitted by the serial | ||
221 | driver example since the early serial driver code should be disabled | ||
222 | unless the user has specified port on the kernel command line. | ||
223 | |||
224 | 4. Early platform driver registration | ||
225 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
226 | Compiled-in platform drivers making use of early_platform_init() are | ||
227 | automatically registered during step 2 or 3. The serial driver example | ||
228 | should use early_platform_init("earlyprintk", &platform_driver). | ||
229 | |||
230 | 5. Probing of early platform drivers belonging to a certain class | ||
231 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
232 | The architecture code calls early_platform_driver_probe() to match | ||
233 | registered early platform devices associated with a certain class with | ||
234 | registered early platform drivers. Matched devices will get probed(). | ||
235 | This step can be executed at any point during the early boot. As soon | ||
236 | as possible may be good for the serial port case. | ||
237 | |||
238 | 6. Inside the early platform driver probe() | ||
239 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
240 | The driver code needs to take special care during early boot, especially | ||
241 | when it comes to memory allocation and interrupt registration. The code | ||
242 | in the probe() function can use is_early_platform_device() to check if | ||
243 | it is called at early platform device or at the regular platform device | ||
244 | time. The early serial driver performs register_console() at this point. | ||
245 | |||
246 | For further information, see <linux/platform_device.h>. | ||
diff --git a/Documentation/driver-api/driver-model/porting.rst b/Documentation/driver-api/driver-model/porting.rst new file mode 100644 index 000000000000..931ea879af3f --- /dev/null +++ b/Documentation/driver-api/driver-model/porting.rst | |||
@@ -0,0 +1,448 @@ | |||
1 | ======================================= | ||
2 | Porting Drivers to the New Driver Model | ||
3 | ======================================= | ||
4 | |||
5 | Patrick Mochel | ||
6 | |||
7 | 7 January 2003 | ||
8 | |||
9 | |||
10 | Overview | ||
11 | |||
12 | Please refer to `Documentation/driver-api/driver-model/*.rst` for definitions of | ||
13 | various driver types and concepts. | ||
14 | |||
15 | Most of the work of porting devices drivers to the new model happens | ||
16 | at the bus driver layer. This was intentional, to minimize the | ||
17 | negative effect on kernel drivers, and to allow a gradual transition | ||
18 | of bus drivers. | ||
19 | |||
20 | In a nutshell, the driver model consists of a set of objects that can | ||
21 | be embedded in larger, bus-specific objects. Fields in these generic | ||
22 | objects can replace fields in the bus-specific objects. | ||
23 | |||
24 | The generic objects must be registered with the driver model core. By | ||
25 | doing so, they will exported via the sysfs filesystem. sysfs can be | ||
26 | mounted by doing:: | ||
27 | |||
28 | # mount -t sysfs sysfs /sys | ||
29 | |||
30 | |||
31 | |||
32 | The Process | ||
33 | |||
34 | Step 0: Read include/linux/device.h for object and function definitions. | ||
35 | |||
36 | Step 1: Registering the bus driver. | ||
37 | |||
38 | |||
39 | - Define a struct bus_type for the bus driver:: | ||
40 | |||
41 | struct bus_type pci_bus_type = { | ||
42 | .name = "pci", | ||
43 | }; | ||
44 | |||
45 | |||
46 | - Register the bus type. | ||
47 | |||
48 | This should be done in the initialization function for the bus type, | ||
49 | which is usually the module_init(), or equivalent, function:: | ||
50 | |||
51 | static int __init pci_driver_init(void) | ||
52 | { | ||
53 | return bus_register(&pci_bus_type); | ||
54 | } | ||
55 | |||
56 | subsys_initcall(pci_driver_init); | ||
57 | |||
58 | |||
59 | The bus type may be unregistered (if the bus driver may be compiled | ||
60 | as a module) by doing:: | ||
61 | |||
62 | bus_unregister(&pci_bus_type); | ||
63 | |||
64 | |||
65 | - Export the bus type for others to use. | ||
66 | |||
67 | Other code may wish to reference the bus type, so declare it in a | ||
68 | shared header file and export the symbol. | ||
69 | |||
70 | From include/linux/pci.h:: | ||
71 | |||
72 | extern struct bus_type pci_bus_type; | ||
73 | |||
74 | |||
75 | From file the above code appears in:: | ||
76 | |||
77 | EXPORT_SYMBOL(pci_bus_type); | ||
78 | |||
79 | |||
80 | |||
81 | - This will cause the bus to show up in /sys/bus/pci/ with two | ||
82 | subdirectories: 'devices' and 'drivers':: | ||
83 | |||
84 | # tree -d /sys/bus/pci/ | ||
85 | /sys/bus/pci/ | ||
86 | |-- devices | ||
87 | `-- drivers | ||
88 | |||
89 | |||
90 | |||
91 | Step 2: Registering Devices. | ||
92 | |||
93 | struct device represents a single device. It mainly contains metadata | ||
94 | describing the relationship the device has to other entities. | ||
95 | |||
96 | |||
97 | - Embed a struct device in the bus-specific device type:: | ||
98 | |||
99 | |||
100 | struct pci_dev { | ||
101 | ... | ||
102 | struct device dev; /* Generic device interface */ | ||
103 | ... | ||
104 | }; | ||
105 | |||
106 | It is recommended that the generic device not be the first item in | ||
107 | the struct to discourage programmers from doing mindless casts | ||
108 | between the object types. Instead macros, or inline functions, | ||
109 | should be created to convert from the generic object type:: | ||
110 | |||
111 | |||
112 | #define to_pci_dev(n) container_of(n, struct pci_dev, dev) | ||
113 | |||
114 | or | ||
115 | |||
116 | static inline struct pci_dev * to_pci_dev(struct kobject * kobj) | ||
117 | { | ||
118 | return container_of(n, struct pci_dev, dev); | ||
119 | } | ||
120 | |||
121 | This allows the compiler to verify type-safety of the operations | ||
122 | that are performed (which is Good). | ||
123 | |||
124 | |||
125 | - Initialize the device on registration. | ||
126 | |||
127 | When devices are discovered or registered with the bus type, the | ||
128 | bus driver should initialize the generic device. The most important | ||
129 | things to initialize are the bus_id, parent, and bus fields. | ||
130 | |||
131 | The bus_id is an ASCII string that contains the device's address on | ||
132 | the bus. The format of this string is bus-specific. This is | ||
133 | necessary for representing devices in sysfs. | ||
134 | |||
135 | parent is the physical parent of the device. It is important that | ||
136 | the bus driver sets this field correctly. | ||
137 | |||
138 | The driver model maintains an ordered list of devices that it uses | ||
139 | for power management. This list must be in order to guarantee that | ||
140 | devices are shutdown before their physical parents, and vice versa. | ||
141 | The order of this list is determined by the parent of registered | ||
142 | devices. | ||
143 | |||
144 | Also, the location of the device's sysfs directory depends on a | ||
145 | device's parent. sysfs exports a directory structure that mirrors | ||
146 | the device hierarchy. Accurately setting the parent guarantees that | ||
147 | sysfs will accurately represent the hierarchy. | ||
148 | |||
149 | The device's bus field is a pointer to the bus type the device | ||
150 | belongs to. This should be set to the bus_type that was declared | ||
151 | and initialized before. | ||
152 | |||
153 | Optionally, the bus driver may set the device's name and release | ||
154 | fields. | ||
155 | |||
156 | The name field is an ASCII string describing the device, like | ||
157 | |||
158 | "ATI Technologies Inc Radeon QD" | ||
159 | |||
160 | The release field is a callback that the driver model core calls | ||
161 | when the device has been removed, and all references to it have | ||
162 | been released. More on this in a moment. | ||
163 | |||
164 | |||
165 | - Register the device. | ||
166 | |||
167 | Once the generic device has been initialized, it can be registered | ||
168 | with the driver model core by doing:: | ||
169 | |||
170 | device_register(&dev->dev); | ||
171 | |||
172 | It can later be unregistered by doing:: | ||
173 | |||
174 | device_unregister(&dev->dev); | ||
175 | |||
176 | This should happen on buses that support hotpluggable devices. | ||
177 | If a bus driver unregisters a device, it should not immediately free | ||
178 | it. It should instead wait for the driver model core to call the | ||
179 | device's release method, then free the bus-specific object. | ||
180 | (There may be other code that is currently referencing the device | ||
181 | structure, and it would be rude to free the device while that is | ||
182 | happening). | ||
183 | |||
184 | |||
185 | When the device is registered, a directory in sysfs is created. | ||
186 | The PCI tree in sysfs looks like:: | ||
187 | |||
188 | /sys/devices/pci0/ | ||
189 | |-- 00:00.0 | ||
190 | |-- 00:01.0 | ||
191 | | `-- 01:00.0 | ||
192 | |-- 00:02.0 | ||
193 | | `-- 02:1f.0 | ||
194 | | `-- 03:00.0 | ||
195 | |-- 00:1e.0 | ||
196 | | `-- 04:04.0 | ||
197 | |-- 00:1f.0 | ||
198 | |-- 00:1f.1 | ||
199 | | |-- ide0 | ||
200 | | | |-- 0.0 | ||
201 | | | `-- 0.1 | ||
202 | | `-- ide1 | ||
203 | | `-- 1.0 | ||
204 | |-- 00:1f.2 | ||
205 | |-- 00:1f.3 | ||
206 | `-- 00:1f.5 | ||
207 | |||
208 | Also, symlinks are created in the bus's 'devices' directory | ||
209 | that point to the device's directory in the physical hierarchy:: | ||
210 | |||
211 | /sys/bus/pci/devices/ | ||
212 | |-- 00:00.0 -> ../../../devices/pci0/00:00.0 | ||
213 | |-- 00:01.0 -> ../../../devices/pci0/00:01.0 | ||
214 | |-- 00:02.0 -> ../../../devices/pci0/00:02.0 | ||
215 | |-- 00:1e.0 -> ../../../devices/pci0/00:1e.0 | ||
216 | |-- 00:1f.0 -> ../../../devices/pci0/00:1f.0 | ||
217 | |-- 00:1f.1 -> ../../../devices/pci0/00:1f.1 | ||
218 | |-- 00:1f.2 -> ../../../devices/pci0/00:1f.2 | ||
219 | |-- 00:1f.3 -> ../../../devices/pci0/00:1f.3 | ||
220 | |-- 00:1f.5 -> ../../../devices/pci0/00:1f.5 | ||
221 | |-- 01:00.0 -> ../../../devices/pci0/00:01.0/01:00.0 | ||
222 | |-- 02:1f.0 -> ../../../devices/pci0/00:02.0/02:1f.0 | ||
223 | |-- 03:00.0 -> ../../../devices/pci0/00:02.0/02:1f.0/03:00.0 | ||
224 | `-- 04:04.0 -> ../../../devices/pci0/00:1e.0/04:04.0 | ||
225 | |||
226 | |||
227 | |||
228 | Step 3: Registering Drivers. | ||
229 | |||
230 | struct device_driver is a simple driver structure that contains a set | ||
231 | of operations that the driver model core may call. | ||
232 | |||
233 | |||
234 | - Embed a struct device_driver in the bus-specific driver. | ||
235 | |||
236 | Just like with devices, do something like:: | ||
237 | |||
238 | struct pci_driver { | ||
239 | ... | ||
240 | struct device_driver driver; | ||
241 | }; | ||
242 | |||
243 | |||
244 | - Initialize the generic driver structure. | ||
245 | |||
246 | When the driver registers with the bus (e.g. doing pci_register_driver()), | ||
247 | initialize the necessary fields of the driver: the name and bus | ||
248 | fields. | ||
249 | |||
250 | |||
251 | - Register the driver. | ||
252 | |||
253 | After the generic driver has been initialized, call:: | ||
254 | |||
255 | driver_register(&drv->driver); | ||
256 | |||
257 | to register the driver with the core. | ||
258 | |||
259 | When the driver is unregistered from the bus, unregister it from the | ||
260 | core by doing:: | ||
261 | |||
262 | driver_unregister(&drv->driver); | ||
263 | |||
264 | Note that this will block until all references to the driver have | ||
265 | gone away. Normally, there will not be any. | ||
266 | |||
267 | |||
268 | - Sysfs representation. | ||
269 | |||
270 | Drivers are exported via sysfs in their bus's 'driver's directory. | ||
271 | For example:: | ||
272 | |||
273 | /sys/bus/pci/drivers/ | ||
274 | |-- 3c59x | ||
275 | |-- Ensoniq AudioPCI | ||
276 | |-- agpgart-amdk7 | ||
277 | |-- e100 | ||
278 | `-- serial | ||
279 | |||
280 | |||
281 | Step 4: Define Generic Methods for Drivers. | ||
282 | |||
283 | struct device_driver defines a set of operations that the driver model | ||
284 | core calls. Most of these operations are probably similar to | ||
285 | operations the bus already defines for drivers, but taking different | ||
286 | parameters. | ||
287 | |||
288 | It would be difficult and tedious to force every driver on a bus to | ||
289 | simultaneously convert their drivers to generic format. Instead, the | ||
290 | bus driver should define single instances of the generic methods that | ||
291 | forward call to the bus-specific drivers. For instance:: | ||
292 | |||
293 | |||
294 | static int pci_device_remove(struct device * dev) | ||
295 | { | ||
296 | struct pci_dev * pci_dev = to_pci_dev(dev); | ||
297 | struct pci_driver * drv = pci_dev->driver; | ||
298 | |||
299 | if (drv) { | ||
300 | if (drv->remove) | ||
301 | drv->remove(pci_dev); | ||
302 | pci_dev->driver = NULL; | ||
303 | } | ||
304 | return 0; | ||
305 | } | ||
306 | |||
307 | |||
308 | The generic driver should be initialized with these methods before it | ||
309 | is registered:: | ||
310 | |||
311 | /* initialize common driver fields */ | ||
312 | drv->driver.name = drv->name; | ||
313 | drv->driver.bus = &pci_bus_type; | ||
314 | drv->driver.probe = pci_device_probe; | ||
315 | drv->driver.resume = pci_device_resume; | ||
316 | drv->driver.suspend = pci_device_suspend; | ||
317 | drv->driver.remove = pci_device_remove; | ||
318 | |||
319 | /* register with core */ | ||
320 | driver_register(&drv->driver); | ||
321 | |||
322 | |||
323 | Ideally, the bus should only initialize the fields if they are not | ||
324 | already set. This allows the drivers to implement their own generic | ||
325 | methods. | ||
326 | |||
327 | |||
328 | Step 5: Support generic driver binding. | ||
329 | |||
330 | The model assumes that a device or driver can be dynamically | ||
331 | registered with the bus at any time. When registration happens, | ||
332 | devices must be bound to a driver, or drivers must be bound to all | ||
333 | devices that it supports. | ||
334 | |||
335 | A driver typically contains a list of device IDs that it supports. The | ||
336 | bus driver compares these IDs to the IDs of devices registered with it. | ||
337 | The format of the device IDs, and the semantics for comparing them are | ||
338 | bus-specific, so the generic model does attempt to generalize them. | ||
339 | |||
340 | Instead, a bus may supply a method in struct bus_type that does the | ||
341 | comparison:: | ||
342 | |||
343 | int (*match)(struct device * dev, struct device_driver * drv); | ||
344 | |||
345 | match should return positive value if the driver supports the device, | ||
346 | and zero otherwise. It may also return error code (for example | ||
347 | -EPROBE_DEFER) if determining that given driver supports the device is | ||
348 | not possible. | ||
349 | |||
350 | When a device is registered, the bus's list of drivers is iterated | ||
351 | over. bus->match() is called for each one until a match is found. | ||
352 | |||
353 | When a driver is registered, the bus's list of devices is iterated | ||
354 | over. bus->match() is called for each device that is not already | ||
355 | claimed by a driver. | ||
356 | |||
357 | When a device is successfully bound to a driver, device->driver is | ||
358 | set, the device is added to a per-driver list of devices, and a | ||
359 | symlink is created in the driver's sysfs directory that points to the | ||
360 | device's physical directory:: | ||
361 | |||
362 | /sys/bus/pci/drivers/ | ||
363 | |-- 3c59x | ||
364 | | `-- 00:0b.0 -> ../../../../devices/pci0/00:0b.0 | ||
365 | |-- Ensoniq AudioPCI | ||
366 | |-- agpgart-amdk7 | ||
367 | | `-- 00:00.0 -> ../../../../devices/pci0/00:00.0 | ||
368 | |-- e100 | ||
369 | | `-- 00:0c.0 -> ../../../../devices/pci0/00:0c.0 | ||
370 | `-- serial | ||
371 | |||
372 | |||
373 | This driver binding should replace the existing driver binding | ||
374 | mechanism the bus currently uses. | ||
375 | |||
376 | |||
377 | Step 6: Supply a hotplug callback. | ||
378 | |||
379 | Whenever a device is registered with the driver model core, the | ||
380 | userspace program /sbin/hotplug is called to notify userspace. | ||
381 | Users can define actions to perform when a device is inserted or | ||
382 | removed. | ||
383 | |||
384 | The driver model core passes several arguments to userspace via | ||
385 | environment variables, including | ||
386 | |||
387 | - ACTION: set to 'add' or 'remove' | ||
388 | - DEVPATH: set to the device's physical path in sysfs. | ||
389 | |||
390 | A bus driver may also supply additional parameters for userspace to | ||
391 | consume. To do this, a bus must implement the 'hotplug' method in | ||
392 | struct bus_type:: | ||
393 | |||
394 | int (*hotplug) (struct device *dev, char **envp, | ||
395 | int num_envp, char *buffer, int buffer_size); | ||
396 | |||
397 | This is called immediately before /sbin/hotplug is executed. | ||
398 | |||
399 | |||
400 | Step 7: Cleaning up the bus driver. | ||
401 | |||
402 | The generic bus, device, and driver structures provide several fields | ||
403 | that can replace those defined privately to the bus driver. | ||
404 | |||
405 | - Device list. | ||
406 | |||
407 | struct bus_type contains a list of all devices registered with the bus | ||
408 | type. This includes all devices on all instances of that bus type. | ||
409 | An internal list that the bus uses may be removed, in favor of using | ||
410 | this one. | ||
411 | |||
412 | The core provides an iterator to access these devices:: | ||
413 | |||
414 | int bus_for_each_dev(struct bus_type * bus, struct device * start, | ||
415 | void * data, int (*fn)(struct device *, void *)); | ||
416 | |||
417 | |||
418 | - Driver list. | ||
419 | |||
420 | struct bus_type also contains a list of all drivers registered with | ||
421 | it. An internal list of drivers that the bus driver maintains may | ||
422 | be removed in favor of using the generic one. | ||
423 | |||
424 | The drivers may be iterated over, like devices:: | ||
425 | |||
426 | int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, | ||
427 | void * data, int (*fn)(struct device_driver *, void *)); | ||
428 | |||
429 | |||
430 | Please see drivers/base/bus.c for more information. | ||
431 | |||
432 | |||
433 | - rwsem | ||
434 | |||
435 | struct bus_type contains an rwsem that protects all core accesses to | ||
436 | the device and driver lists. This can be used by the bus driver | ||
437 | internally, and should be used when accessing the device or driver | ||
438 | lists the bus maintains. | ||
439 | |||
440 | |||
441 | - Device and driver fields. | ||
442 | |||
443 | Some of the fields in struct device and struct device_driver duplicate | ||
444 | fields in the bus-specific representations of these objects. Feel free | ||
445 | to remove the bus-specific ones and favor the generic ones. Note | ||
446 | though, that this will likely mean fixing up all the drivers that | ||
447 | reference the bus-specific fields (though those should all be 1-line | ||
448 | changes). | ||
diff --git a/Documentation/driver-api/early-userspace/buffer-format.rst b/Documentation/driver-api/early-userspace/buffer-format.rst new file mode 100644 index 000000000000..7f74e301fdf3 --- /dev/null +++ b/Documentation/driver-api/early-userspace/buffer-format.rst | |||
@@ -0,0 +1,119 @@ | |||
1 | ======================= | ||
2 | initramfs buffer format | ||
3 | ======================= | ||
4 | |||
5 | Al Viro, H. Peter Anvin | ||
6 | |||
7 | Last revision: 2002-01-13 | ||
8 | |||
9 | Starting with kernel 2.5.x, the old "initial ramdisk" protocol is | ||
10 | getting {replaced/complemented} with the new "initial ramfs" | ||
11 | (initramfs) protocol. The initramfs contents is passed using the same | ||
12 | memory buffer protocol used by the initrd protocol, but the contents | ||
13 | is different. The initramfs buffer contains an archive which is | ||
14 | expanded into a ramfs filesystem; this document details the format of | ||
15 | the initramfs buffer format. | ||
16 | |||
17 | The initramfs buffer format is based around the "newc" or "crc" CPIO | ||
18 | formats, and can be created with the cpio(1) utility. The cpio | ||
19 | archive can be compressed using gzip(1). One valid version of an | ||
20 | initramfs buffer is thus a single .cpio.gz file. | ||
21 | |||
22 | The full format of the initramfs buffer is defined by the following | ||
23 | grammar, where:: | ||
24 | |||
25 | * is used to indicate "0 or more occurrences of" | ||
26 | (|) indicates alternatives | ||
27 | + indicates concatenation | ||
28 | GZIP() indicates the gzip(1) of the operand | ||
29 | ALGN(n) means padding with null bytes to an n-byte boundary | ||
30 | |||
31 | initramfs := ("\0" | cpio_archive | cpio_gzip_archive)* | ||
32 | |||
33 | cpio_gzip_archive := GZIP(cpio_archive) | ||
34 | |||
35 | cpio_archive := cpio_file* + (<nothing> | cpio_trailer) | ||
36 | |||
37 | cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data | ||
38 | |||
39 | cpio_trailer := ALGN(4) + cpio_header + "TRAILER!!!\0" + ALGN(4) | ||
40 | |||
41 | |||
42 | In human terms, the initramfs buffer contains a collection of | ||
43 | compressed and/or uncompressed cpio archives (in the "newc" or "crc" | ||
44 | formats); arbitrary amounts zero bytes (for padding) can be added | ||
45 | between members. | ||
46 | |||
47 | The cpio "TRAILER!!!" entry (cpio end-of-archive) is optional, but is | ||
48 | not ignored; see "handling of hard links" below. | ||
49 | |||
50 | The structure of the cpio_header is as follows (all fields contain | ||
51 | hexadecimal ASCII numbers fully padded with '0' on the left to the | ||
52 | full width of the field, for example, the integer 4780 is represented | ||
53 | by the ASCII string "000012ac"): | ||
54 | |||
55 | ============= ================== ============================================== | ||
56 | Field name Field size Meaning | ||
57 | ============= ================== ============================================== | ||
58 | c_magic 6 bytes The string "070701" or "070702" | ||
59 | c_ino 8 bytes File inode number | ||
60 | c_mode 8 bytes File mode and permissions | ||
61 | c_uid 8 bytes File uid | ||
62 | c_gid 8 bytes File gid | ||
63 | c_nlink 8 bytes Number of links | ||
64 | c_mtime 8 bytes Modification time | ||
65 | c_filesize 8 bytes Size of data field | ||
66 | c_maj 8 bytes Major part of file device number | ||
67 | c_min 8 bytes Minor part of file device number | ||
68 | c_rmaj 8 bytes Major part of device node reference | ||
69 | c_rmin 8 bytes Minor part of device node reference | ||
70 | c_namesize 8 bytes Length of filename, including final \0 | ||
71 | c_chksum 8 bytes Checksum of data field if c_magic is 070702; | ||
72 | otherwise zero | ||
73 | ============= ================== ============================================== | ||
74 | |||
75 | The c_mode field matches the contents of st_mode returned by stat(2) | ||
76 | on Linux, and encodes the file type and file permissions. | ||
77 | |||
78 | The c_filesize should be zero for any file which is not a regular file | ||
79 | or symlink. | ||
80 | |||
81 | The c_chksum field contains a simple 32-bit unsigned sum of all the | ||
82 | bytes in the data field. cpio(1) refers to this as "crc", which is | ||
83 | clearly incorrect (a cyclic redundancy check is a different and | ||
84 | significantly stronger integrity check), however, this is the | ||
85 | algorithm used. | ||
86 | |||
87 | If the filename is "TRAILER!!!" this is actually an end-of-archive | ||
88 | marker; the c_filesize for an end-of-archive marker must be zero. | ||
89 | |||
90 | |||
91 | Handling of hard links | ||
92 | ====================== | ||
93 | |||
94 | When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino) | ||
95 | tuple is looked up in a tuple buffer. If not found, it is entered in | ||
96 | the tuple buffer and the entry is created as usual; if found, a hard | ||
97 | link rather than a second copy of the file is created. It is not | ||
98 | necessary (but permitted) to include a second copy of the file | ||
99 | contents; if the file contents is not included, the c_filesize field | ||
100 | should be set to zero to indicate no data section follows. If data is | ||
101 | present, the previous instance of the file is overwritten; this allows | ||
102 | the data-carrying instance of a file to occur anywhere in the sequence | ||
103 | (GNU cpio is reported to attach the data to the last instance of a | ||
104 | file only.) | ||
105 | |||
106 | c_filesize must not be zero for a symlink. | ||
107 | |||
108 | When a "TRAILER!!!" end-of-archive marker is seen, the tuple buffer is | ||
109 | reset. This permits archives which are generated independently to be | ||
110 | concatenated. | ||
111 | |||
112 | To combine file data from different sources (without having to | ||
113 | regenerate the (c_maj,c_min,c_ino) fields), therefore, either one of | ||
114 | the following techniques can be used: | ||
115 | |||
116 | a) Separate the different file data sources with a "TRAILER!!!" | ||
117 | end-of-archive marker, or | ||
118 | |||
119 | b) Make sure c_nlink == 1 for all nondirectory entries. | ||
diff --git a/Documentation/driver-api/early-userspace/early_userspace_support.rst b/Documentation/driver-api/early-userspace/early_userspace_support.rst new file mode 100644 index 000000000000..3deefb34046b --- /dev/null +++ b/Documentation/driver-api/early-userspace/early_userspace_support.rst | |||
@@ -0,0 +1,154 @@ | |||
1 | ======================= | ||
2 | Early userspace support | ||
3 | ======================= | ||
4 | |||
5 | Last update: 2004-12-20 tlh | ||
6 | |||
7 | |||
8 | "Early userspace" is a set of libraries and programs that provide | ||
9 | various pieces of functionality that are important enough to be | ||
10 | available while a Linux kernel is coming up, but that don't need to be | ||
11 | run inside the kernel itself. | ||
12 | |||
13 | It consists of several major infrastructure components: | ||
14 | |||
15 | - gen_init_cpio, a program that builds a cpio-format archive | ||
16 | containing a root filesystem image. This archive is compressed, and | ||
17 | the compressed image is linked into the kernel image. | ||
18 | - initramfs, a chunk of code that unpacks the compressed cpio image | ||
19 | midway through the kernel boot process. | ||
20 | - klibc, a userspace C library, currently packaged separately, that is | ||
21 | optimized for correctness and small size. | ||
22 | |||
23 | The cpio file format used by initramfs is the "newc" (aka "cpio -H newc") | ||
24 | format, and is documented in the file "buffer-format.txt". There are | ||
25 | two ways to add an early userspace image: specify an existing cpio | ||
26 | archive to be used as the image or have the kernel build process build | ||
27 | the image from specifications. | ||
28 | |||
29 | CPIO ARCHIVE method | ||
30 | ------------------- | ||
31 | |||
32 | You can create a cpio archive that contains the early userspace image. | ||
33 | Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it | ||
34 | will be used directly. Only a single cpio file may be specified in | ||
35 | CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in | ||
36 | combination with a cpio archive. | ||
37 | |||
38 | IMAGE BUILDING method | ||
39 | --------------------- | ||
40 | |||
41 | The kernel build process can also build an early userspace image from | ||
42 | source parts rather than supplying a cpio archive. This method provides | ||
43 | a way to create images with root-owned files even though the image was | ||
44 | built by an unprivileged user. | ||
45 | |||
46 | The image is specified as one or more sources in | ||
47 | CONFIG_INITRAMFS_SOURCE. Sources can be either directories or files - | ||
48 | cpio archives are *not* allowed when building from sources. | ||
49 | |||
50 | A source directory will have it and all of its contents packaged. The | ||
51 | specified directory name will be mapped to '/'. When packaging a | ||
52 | directory, limited user and group ID translation can be performed. | ||
53 | INITRAMFS_ROOT_UID can be set to a user ID that needs to be mapped to | ||
54 | user root (0). INITRAMFS_ROOT_GID can be set to a group ID that needs | ||
55 | to be mapped to group root (0). | ||
56 | |||
57 | A source file must be directives in the format required by the | ||
58 | usr/gen_init_cpio utility (run 'usr/gen_init_cpio -h' to get the | ||
59 | file format). The directives in the file will be passed directly to | ||
60 | usr/gen_init_cpio. | ||
61 | |||
62 | When a combination of directories and files are specified then the | ||
63 | initramfs image will be an aggregate of all of them. In this way a user | ||
64 | can create a 'root-image' directory and install all files into it. | ||
65 | Because device-special files cannot be created by a unprivileged user, | ||
66 | special files can be listed in a 'root-files' file. Both 'root-image' | ||
67 | and 'root-files' can be listed in CONFIG_INITRAMFS_SOURCE and a complete | ||
68 | early userspace image can be built by an unprivileged user. | ||
69 | |||
70 | As a technical note, when directories and files are specified, the | ||
71 | entire CONFIG_INITRAMFS_SOURCE is passed to | ||
72 | usr/gen_initramfs_list.sh. This means that CONFIG_INITRAMFS_SOURCE | ||
73 | can really be interpreted as any legal argument to | ||
74 | gen_initramfs_list.sh. If a directory is specified as an argument then | ||
75 | the contents are scanned, uid/gid translation is performed, and | ||
76 | usr/gen_init_cpio file directives are output. If a directory is | ||
77 | specified as an argument to usr/gen_initramfs_list.sh then the | ||
78 | contents of the file are simply copied to the output. All of the output | ||
79 | directives from directory scanning and file contents copying are | ||
80 | processed by usr/gen_init_cpio. | ||
81 | |||
82 | See also 'usr/gen_initramfs_list.sh -h'. | ||
83 | |||
84 | Where's this all leading? | ||
85 | ========================= | ||
86 | |||
87 | The klibc distribution contains some of the necessary software to make | ||
88 | early userspace useful. The klibc distribution is currently | ||
89 | maintained separately from the kernel. | ||
90 | |||
91 | You can obtain somewhat infrequent snapshots of klibc from | ||
92 | https://www.kernel.org/pub/linux/libs/klibc/ | ||
93 | |||
94 | For active users, you are better off using the klibc git | ||
95 | repository, at http://git.kernel.org/?p=libs/klibc/klibc.git | ||
96 | |||
97 | The standalone klibc distribution currently provides three components, | ||
98 | in addition to the klibc library: | ||
99 | |||
100 | - ipconfig, a program that configures network interfaces. It can | ||
101 | configure them statically, or use DHCP to obtain information | ||
102 | dynamically (aka "IP autoconfiguration"). | ||
103 | - nfsmount, a program that can mount an NFS filesystem. | ||
104 | - kinit, the "glue" that uses ipconfig and nfsmount to replace the old | ||
105 | support for IP autoconfig, mount a filesystem over NFS, and continue | ||
106 | system boot using that filesystem as root. | ||
107 | |||
108 | kinit is built as a single statically linked binary to save space. | ||
109 | |||
110 | Eventually, several more chunks of kernel functionality will hopefully | ||
111 | move to early userspace: | ||
112 | |||
113 | - Almost all of init/do_mounts* (the beginning of this is already in | ||
114 | place) | ||
115 | - ACPI table parsing | ||
116 | - Insert unwieldy subsystem that doesn't really need to be in kernel | ||
117 | space here | ||
118 | |||
119 | If kinit doesn't meet your current needs and you've got bytes to burn, | ||
120 | the klibc distribution includes a small Bourne-compatible shell (ash) | ||
121 | and a number of other utilities, so you can replace kinit and build | ||
122 | custom initramfs images that meet your needs exactly. | ||
123 | |||
124 | For questions and help, you can sign up for the early userspace | ||
125 | mailing list at http://www.zytor.com/mailman/listinfo/klibc | ||
126 | |||
127 | How does it work? | ||
128 | ================= | ||
129 | |||
130 | The kernel has currently 3 ways to mount the root filesystem: | ||
131 | |||
132 | a) all required device and filesystem drivers compiled into the kernel, no | ||
133 | initrd. init/main.c:init() will call prepare_namespace() to mount the | ||
134 | final root filesystem, based on the root= option and optional init= to run | ||
135 | some other init binary than listed at the end of init/main.c:init(). | ||
136 | |||
137 | b) some device and filesystem drivers built as modules and stored in an | ||
138 | initrd. The initrd must contain a binary '/linuxrc' which is supposed to | ||
139 | load these driver modules. It is also possible to mount the final root | ||
140 | filesystem via linuxrc and use the pivot_root syscall. The initrd is | ||
141 | mounted and executed via prepare_namespace(). | ||
142 | |||
143 | c) using initramfs. The call to prepare_namespace() must be skipped. | ||
144 | This means that a binary must do all the work. Said binary can be stored | ||
145 | into initramfs either via modifying usr/gen_init_cpio.c or via the new | ||
146 | initrd format, an cpio archive. It must be called "/init". This binary | ||
147 | is responsible to do all the things prepare_namespace() would do. | ||
148 | |||
149 | To maintain backwards compatibility, the /init binary will only run if it | ||
150 | comes via an initramfs cpio archive. If this is not the case, | ||
151 | init/main.c:init() will run prepare_namespace() to mount the final root | ||
152 | and exec one of the predefined init binaries. | ||
153 | |||
154 | Bryan O'Sullivan <bos@serpentine.com> | ||
diff --git a/Documentation/driver-api/early-userspace/index.rst b/Documentation/driver-api/early-userspace/index.rst new file mode 100644 index 000000000000..149c1822f06d --- /dev/null +++ b/Documentation/driver-api/early-userspace/index.rst | |||
@@ -0,0 +1,18 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | =============== | ||
4 | Early Userspace | ||
5 | =============== | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | early_userspace_support | ||
11 | buffer-format | ||
12 | |||
13 | .. only:: subproject and html | ||
14 | |||
15 | Indices | ||
16 | ======= | ||
17 | |||
18 | * :ref:`genindex` | ||
diff --git a/Documentation/driver-api/edid.rst b/Documentation/driver-api/edid.rst new file mode 100644 index 000000000000..b1b5acd501ed --- /dev/null +++ b/Documentation/driver-api/edid.rst | |||
@@ -0,0 +1,58 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ==== | ||
4 | EDID | ||
5 | ==== | ||
6 | |||
7 | In the good old days when graphics parameters were configured explicitly | ||
8 | in a file called xorg.conf, even broken hardware could be managed. | ||
9 | |||
10 | Today, with the advent of Kernel Mode Setting, a graphics board is | ||
11 | either correctly working because all components follow the standards - | ||
12 | or the computer is unusable, because the screen remains dark after | ||
13 | booting or it displays the wrong area. Cases when this happens are: | ||
14 | - The graphics board does not recognize the monitor. | ||
15 | - The graphics board is unable to detect any EDID data. | ||
16 | - The graphics board incorrectly forwards EDID data to the driver. | ||
17 | - The monitor sends no or bogus EDID data. | ||
18 | - A KVM sends its own EDID data instead of querying the connected monitor. | ||
19 | Adding the kernel parameter "nomodeset" helps in most cases, but causes | ||
20 | restrictions later on. | ||
21 | |||
22 | As a remedy for such situations, the kernel configuration item | ||
23 | CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an | ||
24 | individually prepared or corrected EDID data set in the /lib/firmware | ||
25 | directory from where it is loaded via the firmware interface. The code | ||
26 | (see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for | ||
27 | commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200, | ||
28 | 1680x1050, 1920x1080) as binary blobs, but the kernel source tree does | ||
29 | not contain code to create these data. In order to elucidate the origin | ||
30 | of the built-in binary EDID blobs and to facilitate the creation of | ||
31 | individual data for a specific misbehaving monitor, commented sources | ||
32 | and a Makefile environment are given here. | ||
33 | |||
34 | To create binary EDID and C source code files from the existing data | ||
35 | material, simply type "make". | ||
36 | |||
37 | If you want to create your own EDID file, copy the file 1024x768.S, | ||
38 | replace the settings with your own data and add a new target to the | ||
39 | Makefile. Please note that the EDID data structure expects the timing | ||
40 | values in a different way as compared to the standard X11 format. | ||
41 | |||
42 | X11: | ||
43 | HTimings: | ||
44 | hdisp hsyncstart hsyncend htotal | ||
45 | VTimings: | ||
46 | vdisp vsyncstart vsyncend vtotal | ||
47 | |||
48 | EDID:: | ||
49 | |||
50 | #define XPIX hdisp | ||
51 | #define XBLANK htotal-hdisp | ||
52 | #define XOFFSET hsyncstart-hdisp | ||
53 | #define XPULSE hsyncend-hsyncstart | ||
54 | |||
55 | #define YPIX vdisp | ||
56 | #define YBLANK vtotal-vdisp | ||
57 | #define YOFFSET vsyncstart-vdisp | ||
58 | #define YPULSE vsyncend-vsyncstart | ||
diff --git a/Documentation/driver-api/eisa.rst b/Documentation/driver-api/eisa.rst new file mode 100644 index 000000000000..c07565ba57da --- /dev/null +++ b/Documentation/driver-api/eisa.rst | |||
@@ -0,0 +1,230 @@ | |||
1 | ================ | ||
2 | EISA bus support | ||
3 | ================ | ||
4 | |||
5 | :Author: Marc Zyngier <maz@wild-wind.fr.eu.org> | ||
6 | |||
7 | This document groups random notes about porting EISA drivers to the | ||
8 | new EISA/sysfs API. | ||
9 | |||
10 | Starting from version 2.5.59, the EISA bus is almost given the same | ||
11 | status as other much more mainstream busses such as PCI or USB. This | ||
12 | has been possible through sysfs, which defines a nice enough set of | ||
13 | abstractions to manage busses, devices and drivers. | ||
14 | |||
15 | Although the new API is quite simple to use, converting existing | ||
16 | drivers to the new infrastructure is not an easy task (mostly because | ||
17 | detection code is generally also used to probe ISA cards). Moreover, | ||
18 | most EISA drivers are among the oldest Linux drivers so, as you can | ||
19 | imagine, some dust has settled here over the years. | ||
20 | |||
21 | The EISA infrastructure is made up of three parts: | ||
22 | |||
23 | - The bus code implements most of the generic code. It is shared | ||
24 | among all the architectures that the EISA code runs on. It | ||
25 | implements bus probing (detecting EISA cards available on the bus), | ||
26 | allocates I/O resources, allows fancy naming through sysfs, and | ||
27 | offers interfaces for driver to register. | ||
28 | |||
29 | - The bus root driver implements the glue between the bus hardware | ||
30 | and the generic bus code. It is responsible for discovering the | ||
31 | device implementing the bus, and setting it up to be latter probed | ||
32 | by the bus code. This can go from something as simple as reserving | ||
33 | an I/O region on x86, to the rather more complex, like the hppa | ||
34 | EISA code. This is the part to implement in order to have EISA | ||
35 | running on an "new" platform. | ||
36 | |||
37 | - The driver offers the bus a list of devices that it manages, and | ||
38 | implements the necessary callbacks to probe and release devices | ||
39 | whenever told to. | ||
40 | |||
41 | Every function/structure below lives in <linux/eisa.h>, which depends | ||
42 | heavily on <linux/device.h>. | ||
43 | |||
44 | Bus root driver | ||
45 | =============== | ||
46 | |||
47 | :: | ||
48 | |||
49 | int eisa_root_register (struct eisa_root_device *root); | ||
50 | |||
51 | The eisa_root_register function is used to declare a device as the | ||
52 | root of an EISA bus. The eisa_root_device structure holds a reference | ||
53 | to this device, as well as some parameters for probing purposes:: | ||
54 | |||
55 | struct eisa_root_device { | ||
56 | struct device *dev; /* Pointer to bridge device */ | ||
57 | struct resource *res; | ||
58 | unsigned long bus_base_addr; | ||
59 | int slots; /* Max slot number */ | ||
60 | int force_probe; /* Probe even when no slot 0 */ | ||
61 | u64 dma_mask; /* from bridge device */ | ||
62 | int bus_nr; /* Set by eisa_root_register */ | ||
63 | struct resource eisa_root_res; /* ditto */ | ||
64 | }; | ||
65 | |||
66 | ============= ====================================================== | ||
67 | node used for eisa_root_register internal purpose | ||
68 | dev pointer to the root device | ||
69 | res root device I/O resource | ||
70 | bus_base_addr slot 0 address on this bus | ||
71 | slots max slot number to probe | ||
72 | force_probe Probe even when slot 0 is empty (no EISA mainboard) | ||
73 | dma_mask Default DMA mask. Usually the bridge device dma_mask. | ||
74 | bus_nr unique bus id, set by eisa_root_register | ||
75 | ============= ====================================================== | ||
76 | |||
77 | Driver | ||
78 | ====== | ||
79 | |||
80 | :: | ||
81 | |||
82 | int eisa_driver_register (struct eisa_driver *edrv); | ||
83 | void eisa_driver_unregister (struct eisa_driver *edrv); | ||
84 | |||
85 | Clear enough ? | ||
86 | |||
87 | :: | ||
88 | |||
89 | struct eisa_device_id { | ||
90 | char sig[EISA_SIG_LEN]; | ||
91 | unsigned long driver_data; | ||
92 | }; | ||
93 | |||
94 | struct eisa_driver { | ||
95 | const struct eisa_device_id *id_table; | ||
96 | struct device_driver driver; | ||
97 | }; | ||
98 | |||
99 | =============== ==================================================== | ||
100 | id_table an array of NULL terminated EISA id strings, | ||
101 | followed by an empty string. Each string can | ||
102 | optionally be paired with a driver-dependent value | ||
103 | (driver_data). | ||
104 | |||
105 | driver a generic driver, such as described in | ||
106 | Documentation/driver-api/driver-model/driver.rst. Only .name, | ||
107 | .probe and .remove members are mandatory. | ||
108 | =============== ==================================================== | ||
109 | |||
110 | An example is the 3c59x driver:: | ||
111 | |||
112 | static struct eisa_device_id vortex_eisa_ids[] = { | ||
113 | { "TCM5920", EISA_3C592_OFFSET }, | ||
114 | { "TCM5970", EISA_3C597_OFFSET }, | ||
115 | { "" } | ||
116 | }; | ||
117 | |||
118 | static struct eisa_driver vortex_eisa_driver = { | ||
119 | .id_table = vortex_eisa_ids, | ||
120 | .driver = { | ||
121 | .name = "3c59x", | ||
122 | .probe = vortex_eisa_probe, | ||
123 | .remove = vortex_eisa_remove | ||
124 | } | ||
125 | }; | ||
126 | |||
127 | Device | ||
128 | ====== | ||
129 | |||
130 | The sysfs framework calls .probe and .remove functions upon device | ||
131 | discovery and removal (note that the .remove function is only called | ||
132 | when driver is built as a module). | ||
133 | |||
134 | Both functions are passed a pointer to a 'struct device', which is | ||
135 | encapsulated in a 'struct eisa_device' described as follows:: | ||
136 | |||
137 | struct eisa_device { | ||
138 | struct eisa_device_id id; | ||
139 | int slot; | ||
140 | int state; | ||
141 | unsigned long base_addr; | ||
142 | struct resource res[EISA_MAX_RESOURCES]; | ||
143 | u64 dma_mask; | ||
144 | struct device dev; /* generic device */ | ||
145 | }; | ||
146 | |||
147 | ======== ============================================================ | ||
148 | id EISA id, as read from device. id.driver_data is set from the | ||
149 | matching driver EISA id. | ||
150 | slot slot number which the device was detected on | ||
151 | state set of flags indicating the state of the device. Current | ||
152 | flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED. | ||
153 | res set of four 256 bytes I/O regions allocated to this device | ||
154 | dma_mask DMA mask set from the parent device. | ||
155 | dev generic device (see Documentation/driver-api/driver-model/device.rst) | ||
156 | ======== ============================================================ | ||
157 | |||
158 | You can get the 'struct eisa_device' from 'struct device' using the | ||
159 | 'to_eisa_device' macro. | ||
160 | |||
161 | Misc stuff | ||
162 | ========== | ||
163 | |||
164 | :: | ||
165 | |||
166 | void eisa_set_drvdata (struct eisa_device *edev, void *data); | ||
167 | |||
168 | Stores data into the device's driver_data area. | ||
169 | |||
170 | :: | ||
171 | |||
172 | void *eisa_get_drvdata (struct eisa_device *edev): | ||
173 | |||
174 | Gets the pointer previously stored into the device's driver_data area. | ||
175 | |||
176 | :: | ||
177 | |||
178 | int eisa_get_region_index (void *addr); | ||
179 | |||
180 | Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given | ||
181 | address. | ||
182 | |||
183 | Kernel parameters | ||
184 | ================= | ||
185 | |||
186 | eisa_bus.enable_dev | ||
187 | A comma-separated list of slots to be enabled, even if the firmware | ||
188 | set the card as disabled. The driver must be able to properly | ||
189 | initialize the device in such conditions. | ||
190 | |||
191 | eisa_bus.disable_dev | ||
192 | A comma-separated list of slots to be enabled, even if the firmware | ||
193 | set the card as enabled. The driver won't be called to handle this | ||
194 | device. | ||
195 | |||
196 | virtual_root.force_probe | ||
197 | Force the probing code to probe EISA slots even when it cannot find an | ||
198 | EISA compliant mainboard (nothing appears on slot 0). Defaults to 0 | ||
199 | (don't force), and set to 1 (force probing) when either | ||
200 | CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set. | ||
201 | |||
202 | Random notes | ||
203 | ============ | ||
204 | |||
205 | Converting an EISA driver to the new API mostly involves *deleting* | ||
206 | code (since probing is now in the core EISA code). Unfortunately, most | ||
207 | drivers share their probing routine between ISA, and EISA. Special | ||
208 | care must be taken when ripping out the EISA code, so other busses | ||
209 | won't suffer from these surgical strikes... | ||
210 | |||
211 | You *must not* expect any EISA device to be detected when returning | ||
212 | from eisa_driver_register, since the chances are that the bus has not | ||
213 | yet been probed. In fact, that's what happens most of the time (the | ||
214 | bus root driver usually kicks in rather late in the boot process). | ||
215 | Unfortunately, most drivers are doing the probing by themselves, and | ||
216 | expect to have explored the whole machine when they exit their probe | ||
217 | routine. | ||
218 | |||
219 | For example, switching your favorite EISA SCSI card to the "hotplug" | ||
220 | model is "the right thing"(tm). | ||
221 | |||
222 | Thanks | ||
223 | ====== | ||
224 | |||
225 | I'd like to thank the following people for their help: | ||
226 | |||
227 | - Xavier Benigni for lending me a wonderful Alpha Jensen, | ||
228 | - James Bottomley, Jeff Garzik for getting this stuff into the kernel, | ||
229 | - Andries Brouwer for contributing numerous EISA ids, | ||
230 | - Catrin Jones for coping with far too many machines at home. | ||
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst index 349f2dc33029..921c71a3d683 100644 --- a/Documentation/driver-api/gpio/driver.rst +++ b/Documentation/driver-api/gpio/driver.rst | |||
@@ -399,7 +399,7 @@ symbol: | |||
399 | will pass the struct gpio_chip* for the chip to all IRQ callbacks, so the | 399 | will pass the struct gpio_chip* for the chip to all IRQ callbacks, so the |
400 | callbacks need to embed the gpio_chip in its state container and obtain a | 400 | callbacks need to embed the gpio_chip in its state container and obtain a |
401 | pointer to the container using container_of(). | 401 | pointer to the container using container_of(). |
402 | (See Documentation/driver-model/design-patterns.rst) | 402 | (See Documentation/driver-api/driver-model/design-patterns.rst) |
403 | 403 | ||
404 | - gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip, | 404 | - gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip, |
405 | as discussed above regarding different types of cascaded irqchips. The | 405 | as discussed above regarding different types of cascaded irqchips. The |
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst index 6cd750a03ea0..d12a80f386a6 100644 --- a/Documentation/driver-api/index.rst +++ b/Documentation/driver-api/index.rst | |||
@@ -14,8 +14,10 @@ available subsections can be seen below. | |||
14 | .. toctree:: | 14 | .. toctree:: |
15 | :maxdepth: 2 | 15 | :maxdepth: 2 |
16 | 16 | ||
17 | driver-model/index | ||
17 | basics | 18 | basics |
18 | infrastructure | 19 | infrastructure |
20 | early-userspace/index | ||
19 | pm/index | 21 | pm/index |
20 | clk | 22 | clk |
21 | device-io | 23 | device-io |
@@ -36,6 +38,7 @@ available subsections can be seen below. | |||
36 | i2c | 38 | i2c |
37 | ipmb | 39 | ipmb |
38 | i3c/index | 40 | i3c/index |
41 | interconnect | ||
39 | hsi | 42 | hsi |
40 | edac | 43 | edac |
41 | scsi | 44 | scsi |
@@ -44,8 +47,11 @@ available subsections can be seen below. | |||
44 | mtdnand | 47 | mtdnand |
45 | miscellaneous | 48 | miscellaneous |
46 | mei/index | 49 | mei/index |
50 | mtd/index | ||
51 | mmc/index | ||
52 | nvdimm/index | ||
47 | w1 | 53 | w1 |
48 | rapidio | 54 | rapidio/index |
49 | s390-drivers | 55 | s390-drivers |
50 | vme | 56 | vme |
51 | 80211/index | 57 | 80211/index |
@@ -53,13 +59,48 @@ available subsections can be seen below. | |||
53 | firmware/index | 59 | firmware/index |
54 | pinctl | 60 | pinctl |
55 | gpio/index | 61 | gpio/index |
62 | md/index | ||
56 | misc_devices | 63 | misc_devices |
64 | nfc/index | ||
57 | dmaengine/index | 65 | dmaengine/index |
58 | slimbus | 66 | slimbus |
59 | soundwire/index | 67 | soundwire/index |
60 | fpga/index | 68 | fpga/index |
61 | acpi/index | 69 | acpi/index |
70 | backlight/lp855x-driver.rst | ||
71 | bt8xxgpio | ||
72 | connector | ||
73 | console | ||
74 | dcdbas | ||
75 | dell_rbu | ||
76 | edid | ||
77 | eisa | ||
78 | isa | ||
79 | isapnp | ||
62 | generic-counter | 80 | generic-counter |
81 | lightnvm-pblk | ||
82 | memory-devices/index | ||
83 | men-chameleon-bus | ||
84 | ntb | ||
85 | nvmem | ||
86 | parport-lowlevel | ||
87 | pps | ||
88 | ptp | ||
89 | phy/index | ||
90 | pti_intel_mid | ||
91 | pwm | ||
92 | rfkill | ||
93 | serial/index | ||
94 | sgi-ioc4 | ||
95 | sm501 | ||
96 | smsc_ece1099 | ||
97 | switchtec | ||
98 | sync_file | ||
99 | vfio-mediated-device | ||
100 | vfio | ||
101 | xilinx/index | ||
102 | xillybus | ||
103 | zorro | ||
63 | 104 | ||
64 | .. only:: subproject and html | 105 | .. only:: subproject and html |
65 | 106 | ||
diff --git a/Documentation/driver-api/interconnect.rst b/Documentation/driver-api/interconnect.rst new file mode 100644 index 000000000000..c3e004893796 --- /dev/null +++ b/Documentation/driver-api/interconnect.rst | |||
@@ -0,0 +1,93 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ===================================== | ||
4 | GENERIC SYSTEM INTERCONNECT SUBSYSTEM | ||
5 | ===================================== | ||
6 | |||
7 | Introduction | ||
8 | ------------ | ||
9 | |||
10 | This framework is designed to provide a standard kernel interface to control | ||
11 | the settings of the interconnects on an SoC. These settings can be throughput, | ||
12 | latency and priority between multiple interconnected devices or functional | ||
13 | blocks. This can be controlled dynamically in order to save power or provide | ||
14 | maximum performance. | ||
15 | |||
16 | The interconnect bus is hardware with configurable parameters, which can be | ||
17 | set on a data path according to the requests received from various drivers. | ||
18 | An example of interconnect buses are the interconnects between various | ||
19 | components or functional blocks in chipsets. There can be multiple interconnects | ||
20 | on an SoC that can be multi-tiered. | ||
21 | |||
22 | Below is a simplified diagram of a real-world SoC interconnect bus topology. | ||
23 | |||
24 | :: | ||
25 | |||
26 | +----------------+ +----------------+ | ||
27 | | HW Accelerator |--->| M NoC |<---------------+ | ||
28 | +----------------+ +----------------+ | | ||
29 | | | +------------+ | ||
30 | +-----+ +-------------+ V +------+ | | | ||
31 | | DDR | | +--------+ | PCIe | | | | ||
32 | +-----+ | | Slaves | +------+ | | | ||
33 | ^ ^ | +--------+ | | C NoC | | ||
34 | | | V V | | | ||
35 | +------------------+ +------------------------+ | | +-----+ | ||
36 | | |-->| |-->| |-->| CPU | | ||
37 | | |-->| |<--| | +-----+ | ||
38 | | Mem NoC | | S NoC | +------------+ | ||
39 | | |<--| |---------+ | | ||
40 | | |<--| |<------+ | | +--------+ | ||
41 | +------------------+ +------------------------+ | | +-->| Slaves | | ||
42 | ^ ^ ^ ^ ^ | | +--------+ | ||
43 | | | | | | | V | ||
44 | +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ | ||
45 | | CPUs | | | GPU | | DSP | | Masters |-->| P NoC |-->| Slaves | | ||
46 | +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ | ||
47 | | | ||
48 | +-------+ | ||
49 | | Modem | | ||
50 | +-------+ | ||
51 | |||
52 | Terminology | ||
53 | ----------- | ||
54 | |||
55 | Interconnect provider is the software definition of the interconnect hardware. | ||
56 | The interconnect providers on the above diagram are M NoC, S NoC, C NoC, P NoC | ||
57 | and Mem NoC. | ||
58 | |||
59 | Interconnect node is the software definition of the interconnect hardware | ||
60 | port. Each interconnect provider consists of multiple interconnect nodes, | ||
61 | which are connected to other SoC components including other interconnect | ||
62 | providers. The point on the diagram where the CPUs connect to the memory is | ||
63 | called an interconnect node, which belongs to the Mem NoC interconnect provider. | ||
64 | |||
65 | Interconnect endpoints are the first or the last element of the path. Every | ||
66 | endpoint is a node, but not every node is an endpoint. | ||
67 | |||
68 | Interconnect path is everything between two endpoints including all the nodes | ||
69 | that have to be traversed to reach from a source to destination node. It may | ||
70 | include multiple master-slave pairs across several interconnect providers. | ||
71 | |||
72 | Interconnect consumers are the entities which make use of the data paths exposed | ||
73 | by the providers. The consumers send requests to providers requesting various | ||
74 | throughput, latency and priority. Usually the consumers are device drivers, that | ||
75 | send request based on their needs. An example for a consumer is a video decoder | ||
76 | that supports various formats and image sizes. | ||
77 | |||
78 | Interconnect providers | ||
79 | ---------------------- | ||
80 | |||
81 | Interconnect provider is an entity that implements methods to initialize and | ||
82 | configure interconnect bus hardware. The interconnect provider drivers should | ||
83 | be registered with the interconnect provider core. | ||
84 | |||
85 | .. kernel-doc:: include/linux/interconnect-provider.h | ||
86 | |||
87 | Interconnect consumers | ||
88 | ---------------------- | ||
89 | |||
90 | Interconnect consumers are the clients which use the interconnect APIs to | ||
91 | get paths between endpoints and set their bandwidth/latency/QoS requirements | ||
92 | for these interconnect paths. These interfaces are not currently | ||
93 | documented. | ||
diff --git a/Documentation/driver-api/isa.rst b/Documentation/driver-api/isa.rst new file mode 100644 index 000000000000..def4a7b690b5 --- /dev/null +++ b/Documentation/driver-api/isa.rst | |||
@@ -0,0 +1,122 @@ | |||
1 | =========== | ||
2 | ISA Drivers | ||
3 | =========== | ||
4 | |||
5 | The following text is adapted from the commit message of the initial | ||
6 | commit of the ISA bus driver authored by Rene Herman. | ||
7 | |||
8 | During the recent "isa drivers using platform devices" discussion it was | ||
9 | pointed out that (ALSA) ISA drivers ran into the problem of not having | ||
10 | the option to fail driver load (device registration rather) upon not | ||
11 | finding their hardware due to a probe() error not being passed up | ||
12 | through the driver model. In the course of that, I suggested a separate | ||
13 | ISA bus might be best; Russell King agreed and suggested this bus could | ||
14 | use the .match() method for the actual device discovery. | ||
15 | |||
16 | The attached does this. For this old non (generically) discoverable ISA | ||
17 | hardware only the driver itself can do discovery so as a difference with | ||
18 | the platform_bus, this isa_bus also distributes match() up to the | ||
19 | driver. | ||
20 | |||
21 | As another difference: these devices only exist in the driver model due | ||
22 | to the driver creating them because it might want to drive them, meaning | ||
23 | that all device creation has been made internal as well. | ||
24 | |||
25 | The usage model this provides is nice, and has been acked from the ALSA | ||
26 | side by Takashi Iwai and Jaroslav Kysela. The ALSA driver module_init's | ||
27 | now (for oldisa-only drivers) become:: | ||
28 | |||
29 | static int __init alsa_card_foo_init(void) | ||
30 | { | ||
31 | return isa_register_driver(&snd_foo_isa_driver, SNDRV_CARDS); | ||
32 | } | ||
33 | |||
34 | static void __exit alsa_card_foo_exit(void) | ||
35 | { | ||
36 | isa_unregister_driver(&snd_foo_isa_driver); | ||
37 | } | ||
38 | |||
39 | Quite like the other bus models therefore. This removes a lot of | ||
40 | duplicated init code from the ALSA ISA drivers. | ||
41 | |||
42 | The passed in isa_driver struct is the regular driver struct embedding a | ||
43 | struct device_driver, the normal probe/remove/shutdown/suspend/resume | ||
44 | callbacks, and as indicated that .match callback. | ||
45 | |||
46 | The "SNDRV_CARDS" you see being passed in is a "unsigned int ndev" | ||
47 | parameter, indicating how many devices to create and call our methods | ||
48 | with. | ||
49 | |||
50 | The platform_driver callbacks are called with a platform_device param; | ||
51 | the isa_driver callbacks are being called with a ``struct device *dev, | ||
52 | unsigned int id`` pair directly -- with the device creation completely | ||
53 | internal to the bus it's much cleaner to not leak isa_dev's by passing | ||
54 | them in at all. The id is the only thing we ever want other then the | ||
55 | struct device anyways, and it makes for nicer code in the callbacks as | ||
56 | well. | ||
57 | |||
58 | With this additional .match() callback ISA drivers have all options. If | ||
59 | ALSA would want to keep the old non-load behaviour, it could stick all | ||
60 | of the old .probe in .match, which would only keep them registered after | ||
61 | everything was found to be present and accounted for. If it wanted the | ||
62 | behaviour of always loading as it inadvertently did for a bit after the | ||
63 | changeover to platform devices, it could just not provide a .match() and | ||
64 | do everything in .probe() as before. | ||
65 | |||
66 | If it, as Takashi Iwai already suggested earlier as a way of following | ||
67 | the model from saner buses more closely, wants to load when a later bind | ||
68 | could conceivably succeed, it could use .match() for the prerequisites | ||
69 | (such as checking the user wants the card enabled and that port/irq/dma | ||
70 | values have been passed in) and .probe() for everything else. This is | ||
71 | the nicest model. | ||
72 | |||
73 | To the code... | ||
74 | |||
75 | This exports only two functions; isa_{,un}register_driver(). | ||
76 | |||
77 | isa_register_driver() register's the struct device_driver, and then | ||
78 | loops over the passed in ndev creating devices and registering them. | ||
79 | This causes the bus match method to be called for them, which is:: | ||
80 | |||
81 | int isa_bus_match(struct device *dev, struct device_driver *driver) | ||
82 | { | ||
83 | struct isa_driver *isa_driver = to_isa_driver(driver); | ||
84 | |||
85 | if (dev->platform_data == isa_driver) { | ||
86 | if (!isa_driver->match || | ||
87 | isa_driver->match(dev, to_isa_dev(dev)->id)) | ||
88 | return 1; | ||
89 | dev->platform_data = NULL; | ||
90 | } | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | The first thing this does is check if this device is in fact one of this | ||
95 | driver's devices by seeing if the device's platform_data pointer is set | ||
96 | to this driver. Platform devices compare strings, but we don't need to | ||
97 | do that with everything being internal, so isa_register_driver() abuses | ||
98 | dev->platform_data as a isa_driver pointer which we can then check here. | ||
99 | I believe platform_data is available for this, but if rather not, moving | ||
100 | the isa_driver pointer to the private struct isa_dev is ofcourse fine as | ||
101 | well. | ||
102 | |||
103 | Then, if the the driver did not provide a .match, it matches. If it did, | ||
104 | the driver match() method is called to determine a match. | ||
105 | |||
106 | If it did **not** match, dev->platform_data is reset to indicate this to | ||
107 | isa_register_driver which can then unregister the device again. | ||
108 | |||
109 | If during all this, there's any error, or no devices matched at all | ||
110 | everything is backed out again and the error, or -ENODEV, is returned. | ||
111 | |||
112 | isa_unregister_driver() just unregisters the matched devices and the | ||
113 | driver itself. | ||
114 | |||
115 | module_isa_driver is a helper macro for ISA drivers which do not do | ||
116 | anything special in module init/exit. This eliminates a lot of | ||
117 | boilerplate code. Each module may only use this macro once, and calling | ||
118 | it replaces module_init and module_exit. | ||
119 | |||
120 | max_num_isa_dev is a macro to determine the maximum possible number of | ||
121 | ISA devices which may be registered in the I/O port address space given | ||
122 | the address extent of the ISA devices. | ||
diff --git a/Documentation/driver-api/isapnp.rst b/Documentation/driver-api/isapnp.rst new file mode 100644 index 000000000000..8d0840ac847b --- /dev/null +++ b/Documentation/driver-api/isapnp.rst | |||
@@ -0,0 +1,15 @@ | |||
1 | ========================================================== | ||
2 | ISA Plug & Play support by Jaroslav Kysela <perex@suse.cz> | ||
3 | ========================================================== | ||
4 | |||
5 | Interface /proc/isapnp | ||
6 | ====================== | ||
7 | |||
8 | The interface has been removed. See pnp.txt for more details. | ||
9 | |||
10 | Interface /proc/bus/isapnp | ||
11 | ========================== | ||
12 | |||
13 | This directory allows access to ISA PnP cards and logical devices. | ||
14 | The regular files contain the contents of ISA PnP registers for | ||
15 | a logical device. | ||
diff --git a/Documentation/driver-api/lightnvm-pblk.rst b/Documentation/driver-api/lightnvm-pblk.rst new file mode 100644 index 000000000000..1040ed1cec81 --- /dev/null +++ b/Documentation/driver-api/lightnvm-pblk.rst | |||
@@ -0,0 +1,21 @@ | |||
1 | pblk: Physical Block Device Target | ||
2 | ================================== | ||
3 | |||
4 | pblk implements a fully associative, host-based FTL that exposes a traditional | ||
5 | block I/O interface. Its primary responsibilities are: | ||
6 | |||
7 | - Map logical addresses onto physical addresses (4KB granularity) in a | ||
8 | logical-to-physical (L2P) table. | ||
9 | - Maintain the integrity and consistency of the L2P table as well as its | ||
10 | recovery from normal tear down and power outage. | ||
11 | - Deal with controller- and media-specific constrains. | ||
12 | - Handle I/O errors. | ||
13 | - Implement garbage collection. | ||
14 | - Maintain consistency across the I/O stack during synchronization points. | ||
15 | |||
16 | For more information please refer to: | ||
17 | |||
18 | http://lightnvm.io | ||
19 | |||
20 | which maintains updated FAQs, manual pages, technical documentation, tools, | ||
21 | contacts, etc. | ||
diff --git a/Documentation/driver-api/md/index.rst b/Documentation/driver-api/md/index.rst new file mode 100644 index 000000000000..18f54a7d7d6e --- /dev/null +++ b/Documentation/driver-api/md/index.rst | |||
@@ -0,0 +1,12 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ==== | ||
4 | RAID | ||
5 | ==== | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | md-cluster | ||
11 | raid5-cache | ||
12 | raid5-ppl | ||
diff --git a/Documentation/driver-api/md/md-cluster.rst b/Documentation/driver-api/md/md-cluster.rst new file mode 100644 index 000000000000..96eb52cec7eb --- /dev/null +++ b/Documentation/driver-api/md/md-cluster.rst | |||
@@ -0,0 +1,385 @@ | |||
1 | ========== | ||
2 | MD Cluster | ||
3 | ========== | ||
4 | |||
5 | The cluster MD is a shared-device RAID for a cluster, it supports | ||
6 | two levels: raid1 and raid10 (limited support). | ||
7 | |||
8 | |||
9 | 1. On-disk format | ||
10 | ================= | ||
11 | |||
12 | Separate write-intent-bitmaps are used for each cluster node. | ||
13 | The bitmaps record all writes that may have been started on that node, | ||
14 | and may not yet have finished. The on-disk layout is:: | ||
15 | |||
16 | 0 4k 8k 12k | ||
17 | ------------------------------------------------------------------- | ||
18 | | idle | md super | bm super [0] + bits | | ||
19 | | bm bits[0, contd] | bm super[1] + bits | bm bits[1, contd] | | ||
20 | | bm super[2] + bits | bm bits [2, contd] | bm super[3] + bits | | ||
21 | | bm bits [3, contd] | | | | ||
22 | |||
23 | During "normal" functioning we assume the filesystem ensures that only | ||
24 | one node writes to any given block at a time, so a write request will | ||
25 | |||
26 | - set the appropriate bit (if not already set) | ||
27 | - commit the write to all mirrors | ||
28 | - schedule the bit to be cleared after a timeout. | ||
29 | |||
30 | Reads are just handled normally. It is up to the filesystem to ensure | ||
31 | one node doesn't read from a location where another node (or the same | ||
32 | node) is writing. | ||
33 | |||
34 | |||
35 | 2. DLM Locks for management | ||
36 | =========================== | ||
37 | |||
38 | There are three groups of locks for managing the device: | ||
39 | |||
40 | 2.1 Bitmap lock resource (bm_lockres) | ||
41 | ------------------------------------- | ||
42 | |||
43 | The bm_lockres protects individual node bitmaps. They are named in | ||
44 | the form bitmap000 for node 1, bitmap001 for node 2 and so on. When a | ||
45 | node joins the cluster, it acquires the lock in PW mode and it stays | ||
46 | so during the lifetime the node is part of the cluster. The lock | ||
47 | resource number is based on the slot number returned by the DLM | ||
48 | subsystem. Since DLM starts node count from one and bitmap slots | ||
49 | start from zero, one is subtracted from the DLM slot number to arrive | ||
50 | at the bitmap slot number. | ||
51 | |||
52 | The LVB of the bitmap lock for a particular node records the range | ||
53 | of sectors that are being re-synced by that node. No other | ||
54 | node may write to those sectors. This is used when a new nodes | ||
55 | joins the cluster. | ||
56 | |||
57 | 2.2 Message passing locks | ||
58 | ------------------------- | ||
59 | |||
60 | Each node has to communicate with other nodes when starting or ending | ||
61 | resync, and for metadata superblock updates. This communication is | ||
62 | managed through three locks: "token", "message", and "ack", together | ||
63 | with the Lock Value Block (LVB) of one of the "message" lock. | ||
64 | |||
65 | 2.3 new-device management | ||
66 | ------------------------- | ||
67 | |||
68 | A single lock: "no-new-dev" is used to co-ordinate the addition of | ||
69 | new devices - this must be synchronized across the array. | ||
70 | Normally all nodes hold a concurrent-read lock on this device. | ||
71 | |||
72 | 3. Communication | ||
73 | ================ | ||
74 | |||
75 | Messages can be broadcast to all nodes, and the sender waits for all | ||
76 | other nodes to acknowledge the message before proceeding. Only one | ||
77 | message can be processed at a time. | ||
78 | |||
79 | 3.1 Message Types | ||
80 | ----------------- | ||
81 | |||
82 | There are six types of messages which are passed: | ||
83 | |||
84 | 3.1.1 METADATA_UPDATED | ||
85 | ^^^^^^^^^^^^^^^^^^^^^^ | ||
86 | |||
87 | informs other nodes that the metadata has | ||
88 | been updated, and the node must re-read the md superblock. This is | ||
89 | performed synchronously. It is primarily used to signal device | ||
90 | failure. | ||
91 | |||
92 | 3.1.2 RESYNCING | ||
93 | ^^^^^^^^^^^^^^^ | ||
94 | informs other nodes that a resync is initiated or | ||
95 | ended so that each node may suspend or resume the region. Each | ||
96 | RESYNCING message identifies a range of the devices that the | ||
97 | sending node is about to resync. This overrides any previous | ||
98 | notification from that node: only one ranged can be resynced at a | ||
99 | time per-node. | ||
100 | |||
101 | 3.1.3 NEWDISK | ||
102 | ^^^^^^^^^^^^^ | ||
103 | |||
104 | informs other nodes that a device is being added to | ||
105 | the array. Message contains an identifier for that device. See | ||
106 | below for further details. | ||
107 | |||
108 | 3.1.4 REMOVE | ||
109 | ^^^^^^^^^^^^ | ||
110 | |||
111 | A failed or spare device is being removed from the | ||
112 | array. The slot-number of the device is included in the message. | ||
113 | |||
114 | 3.1.5 RE_ADD: | ||
115 | |||
116 | A failed device is being re-activated - the assumption | ||
117 | is that it has been determined to be working again. | ||
118 | |||
119 | 3.1.6 BITMAP_NEEDS_SYNC: | ||
120 | |||
121 | If a node is stopped locally but the bitmap | ||
122 | isn't clean, then another node is informed to take the ownership of | ||
123 | resync. | ||
124 | |||
125 | 3.2 Communication mechanism | ||
126 | --------------------------- | ||
127 | |||
128 | The DLM LVB is used to communicate within nodes of the cluster. There | ||
129 | are three resources used for the purpose: | ||
130 | |||
131 | 3.2.1 token | ||
132 | ^^^^^^^^^^^ | ||
133 | The resource which protects the entire communication | ||
134 | system. The node having the token resource is allowed to | ||
135 | communicate. | ||
136 | |||
137 | 3.2.2 message | ||
138 | ^^^^^^^^^^^^^ | ||
139 | The lock resource which carries the data to communicate. | ||
140 | |||
141 | 3.2.3 ack | ||
142 | ^^^^^^^^^ | ||
143 | |||
144 | The resource, acquiring which means the message has been | ||
145 | acknowledged by all nodes in the cluster. The BAST of the resource | ||
146 | is used to inform the receiving node that a node wants to | ||
147 | communicate. | ||
148 | |||
149 | The algorithm is: | ||
150 | |||
151 | 1. receive status - all nodes have concurrent-reader lock on "ack":: | ||
152 | |||
153 | sender receiver receiver | ||
154 | "ack":CR "ack":CR "ack":CR | ||
155 | |||
156 | 2. sender get EX on "token", | ||
157 | sender get EX on "message":: | ||
158 | |||
159 | sender receiver receiver | ||
160 | "token":EX "ack":CR "ack":CR | ||
161 | "message":EX | ||
162 | "ack":CR | ||
163 | |||
164 | Sender checks that it still needs to send a message. Messages | ||
165 | received or other events that happened while waiting for the | ||
166 | "token" may have made this message inappropriate or redundant. | ||
167 | |||
168 | 3. sender writes LVB | ||
169 | |||
170 | sender down-convert "message" from EX to CW | ||
171 | |||
172 | sender try to get EX of "ack" | ||
173 | |||
174 | :: | ||
175 | |||
176 | [ wait until all receivers have *processed* the "message" ] | ||
177 | |||
178 | [ triggered by bast of "ack" ] | ||
179 | receiver get CR on "message" | ||
180 | receiver read LVB | ||
181 | receiver processes the message | ||
182 | [ wait finish ] | ||
183 | receiver releases "ack" | ||
184 | receiver tries to get PR on "message" | ||
185 | |||
186 | sender receiver receiver | ||
187 | "token":EX "message":CR "message":CR | ||
188 | "message":CW | ||
189 | "ack":EX | ||
190 | |||
191 | 4. triggered by grant of EX on "ack" (indicating all receivers | ||
192 | have processed message) | ||
193 | |||
194 | sender down-converts "ack" from EX to CR | ||
195 | |||
196 | sender releases "message" | ||
197 | |||
198 | sender releases "token" | ||
199 | |||
200 | :: | ||
201 | |||
202 | receiver upconvert to PR on "message" | ||
203 | receiver get CR of "ack" | ||
204 | receiver release "message" | ||
205 | |||
206 | sender receiver receiver | ||
207 | "ack":CR "ack":CR "ack":CR | ||
208 | |||
209 | |||
210 | 4. Handling Failures | ||
211 | ==================== | ||
212 | |||
213 | 4.1 Node Failure | ||
214 | ---------------- | ||
215 | |||
216 | When a node fails, the DLM informs the cluster with the slot | ||
217 | number. The node starts a cluster recovery thread. The cluster | ||
218 | recovery thread: | ||
219 | |||
220 | - acquires the bitmap<number> lock of the failed node | ||
221 | - opens the bitmap | ||
222 | - reads the bitmap of the failed node | ||
223 | - copies the set bitmap to local node | ||
224 | - cleans the bitmap of the failed node | ||
225 | - releases bitmap<number> lock of the failed node | ||
226 | - initiates resync of the bitmap on the current node | ||
227 | md_check_recovery is invoked within recover_bitmaps, | ||
228 | then md_check_recovery -> metadata_update_start/finish, | ||
229 | it will lock the communication by lock_comm. | ||
230 | Which means when one node is resyncing it blocks all | ||
231 | other nodes from writing anywhere on the array. | ||
232 | |||
233 | The resync process is the regular md resync. However, in a clustered | ||
234 | environment when a resync is performed, it needs to tell other nodes | ||
235 | of the areas which are suspended. Before a resync starts, the node | ||
236 | send out RESYNCING with the (lo,hi) range of the area which needs to | ||
237 | be suspended. Each node maintains a suspend_list, which contains the | ||
238 | list of ranges which are currently suspended. On receiving RESYNCING, | ||
239 | the node adds the range to the suspend_list. Similarly, when the node | ||
240 | performing resync finishes, it sends RESYNCING with an empty range to | ||
241 | other nodes and other nodes remove the corresponding entry from the | ||
242 | suspend_list. | ||
243 | |||
244 | A helper function, ->area_resyncing() can be used to check if a | ||
245 | particular I/O range should be suspended or not. | ||
246 | |||
247 | 4.2 Device Failure | ||
248 | ================== | ||
249 | |||
250 | Device failures are handled and communicated with the metadata update | ||
251 | routine. When a node detects a device failure it does not allow | ||
252 | any further writes to that device until the failure has been | ||
253 | acknowledged by all other nodes. | ||
254 | |||
255 | 5. Adding a new Device | ||
256 | ---------------------- | ||
257 | |||
258 | For adding a new device, it is necessary that all nodes "see" the new | ||
259 | device to be added. For this, the following algorithm is used: | ||
260 | |||
261 | 1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues | ||
262 | ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CLUSTER_ADD) | ||
263 | 2. Node 1 sends a NEWDISK message with uuid and slot number | ||
264 | 3. Other nodes issue kobject_uevent_env with uuid and slot number | ||
265 | (Steps 4,5 could be a udev rule) | ||
266 | 4. In userspace, the node searches for the disk, perhaps | ||
267 | using blkid -t SUB_UUID="" | ||
268 | 5. Other nodes issue either of the following depending on whether | ||
269 | the disk was found: | ||
270 | ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and | ||
271 | disc.number set to slot number) | ||
272 | ioctl(CLUSTERED_DISK_NACK) | ||
273 | 6. Other nodes drop lock on "no-new-devs" (CR) if device is found | ||
274 | 7. Node 1 attempts EX lock on "no-new-dev" | ||
275 | 8. If node 1 gets the lock, it sends METADATA_UPDATED after | ||
276 | unmarking the disk as SpareLocal | ||
277 | 9. If not (get "no-new-dev" lock), it fails the operation and sends | ||
278 | METADATA_UPDATED. | ||
279 | 10. Other nodes get the information whether a disk is added or not | ||
280 | by the following METADATA_UPDATED. | ||
281 | |||
282 | 6. Module interface | ||
283 | =================== | ||
284 | |||
285 | There are 17 call-backs which the md core can make to the cluster | ||
286 | module. Understanding these can give a good overview of the whole | ||
287 | process. | ||
288 | |||
289 | 6.1 join(nodes) and leave() | ||
290 | --------------------------- | ||
291 | |||
292 | These are called when an array is started with a clustered bitmap, | ||
293 | and when the array is stopped. join() ensures the cluster is | ||
294 | available and initializes the various resources. | ||
295 | Only the first 'nodes' nodes in the cluster can use the array. | ||
296 | |||
297 | 6.2 slot_number() | ||
298 | ----------------- | ||
299 | |||
300 | Reports the slot number advised by the cluster infrastructure. | ||
301 | Range is from 0 to nodes-1. | ||
302 | |||
303 | 6.3 resync_info_update() | ||
304 | ------------------------ | ||
305 | |||
306 | This updates the resync range that is stored in the bitmap lock. | ||
307 | The starting point is updated as the resync progresses. The | ||
308 | end point is always the end of the array. | ||
309 | It does *not* send a RESYNCING message. | ||
310 | |||
311 | 6.4 resync_start(), resync_finish() | ||
312 | ----------------------------------- | ||
313 | |||
314 | These are called when resync/recovery/reshape starts or stops. | ||
315 | They update the resyncing range in the bitmap lock and also | ||
316 | send a RESYNCING message. resync_start reports the whole | ||
317 | array as resyncing, resync_finish reports none of it. | ||
318 | |||
319 | resync_finish() also sends a BITMAP_NEEDS_SYNC message which | ||
320 | allows some other node to take over. | ||
321 | |||
322 | 6.5 metadata_update_start(), metadata_update_finish(), metadata_update_cancel() | ||
323 | ------------------------------------------------------------------------------- | ||
324 | |||
325 | metadata_update_start is used to get exclusive access to | ||
326 | the metadata. If a change is still needed once that access is | ||
327 | gained, metadata_update_finish() will send a METADATA_UPDATE | ||
328 | message to all other nodes, otherwise metadata_update_cancel() | ||
329 | can be used to release the lock. | ||
330 | |||
331 | 6.6 area_resyncing() | ||
332 | -------------------- | ||
333 | |||
334 | This combines two elements of functionality. | ||
335 | |||
336 | Firstly, it will check if any node is currently resyncing | ||
337 | anything in a given range of sectors. If any resync is found, | ||
338 | then the caller will avoid writing or read-balancing in that | ||
339 | range. | ||
340 | |||
341 | Secondly, while node recovery is happening it reports that | ||
342 | all areas are resyncing for READ requests. This avoids races | ||
343 | between the cluster-filesystem and the cluster-RAID handling | ||
344 | a node failure. | ||
345 | |||
346 | 6.7 add_new_disk_start(), add_new_disk_finish(), new_disk_ack() | ||
347 | --------------------------------------------------------------- | ||
348 | |||
349 | These are used to manage the new-disk protocol described above. | ||
350 | When a new device is added, add_new_disk_start() is called before | ||
351 | it is bound to the array and, if that succeeds, add_new_disk_finish() | ||
352 | is called the device is fully added. | ||
353 | |||
354 | When a device is added in acknowledgement to a previous | ||
355 | request, or when the device is declared "unavailable", | ||
356 | new_disk_ack() is called. | ||
357 | |||
358 | 6.8 remove_disk() | ||
359 | ----------------- | ||
360 | |||
361 | This is called when a spare or failed device is removed from | ||
362 | the array. It causes a REMOVE message to be send to other nodes. | ||
363 | |||
364 | 6.9 gather_bitmaps() | ||
365 | -------------------- | ||
366 | |||
367 | This sends a RE_ADD message to all other nodes and then | ||
368 | gathers bitmap information from all bitmaps. This combined | ||
369 | bitmap is then used to recovery the re-added device. | ||
370 | |||
371 | 6.10 lock_all_bitmaps() and unlock_all_bitmaps() | ||
372 | ------------------------------------------------ | ||
373 | |||
374 | These are called when change bitmap to none. If a node plans | ||
375 | to clear the cluster raid's bitmap, it need to make sure no other | ||
376 | nodes are using the raid which is achieved by lock all bitmap | ||
377 | locks within the cluster, and also those locks are unlocked | ||
378 | accordingly. | ||
379 | |||
380 | 7. Unsupported features | ||
381 | ======================= | ||
382 | |||
383 | There are somethings which are not supported by cluster MD yet. | ||
384 | |||
385 | - change array_sectors. | ||
diff --git a/Documentation/driver-api/md/raid5-cache.rst b/Documentation/driver-api/md/raid5-cache.rst new file mode 100644 index 000000000000..d7a15f44a7c3 --- /dev/null +++ b/Documentation/driver-api/md/raid5-cache.rst | |||
@@ -0,0 +1,111 @@ | |||
1 | ================ | ||
2 | RAID 4/5/6 cache | ||
3 | ================ | ||
4 | |||
5 | Raid 4/5/6 could include an extra disk for data cache besides normal RAID | ||
6 | disks. The role of RAID disks isn't changed with the cache disk. The cache disk | ||
7 | caches data to the RAID disks. The cache can be in write-through (supported | ||
8 | since 4.4) or write-back mode (supported since 4.10). mdadm (supported since | ||
9 | 3.4) has a new option '--write-journal' to create array with cache. Please | ||
10 | refer to mdadm manual for details. By default (RAID array starts), the cache is | ||
11 | in write-through mode. A user can switch it to write-back mode by:: | ||
12 | |||
13 | echo "write-back" > /sys/block/md0/md/journal_mode | ||
14 | |||
15 | And switch it back to write-through mode by:: | ||
16 | |||
17 | echo "write-through" > /sys/block/md0/md/journal_mode | ||
18 | |||
19 | In both modes, all writes to the array will hit cache disk first. This means | ||
20 | the cache disk must be fast and sustainable. | ||
21 | |||
22 | write-through mode | ||
23 | ================== | ||
24 | |||
25 | This mode mainly fixes the 'write hole' issue. For RAID 4/5/6 array, an unclean | ||
26 | shutdown can cause data in some stripes to not be in consistent state, eg, data | ||
27 | and parity don't match. The reason is that a stripe write involves several RAID | ||
28 | disks and it's possible the writes don't hit all RAID disks yet before the | ||
29 | unclean shutdown. We call an array degraded if it has inconsistent data. MD | ||
30 | tries to resync the array to bring it back to normal state. But before the | ||
31 | resync completes, any system crash will expose the chance of real data | ||
32 | corruption in the RAID array. This problem is called 'write hole'. | ||
33 | |||
34 | The write-through cache will cache all data on cache disk first. After the data | ||
35 | is safe on the cache disk, the data will be flushed onto RAID disks. The | ||
36 | two-step write will guarantee MD can recover correct data after unclean | ||
37 | shutdown even the array is degraded. Thus the cache can close the 'write hole'. | ||
38 | |||
39 | In write-through mode, MD reports IO completion to upper layer (usually | ||
40 | filesystems) after the data is safe on RAID disks, so cache disk failure | ||
41 | doesn't cause data loss. Of course cache disk failure means the array is | ||
42 | exposed to 'write hole' again. | ||
43 | |||
44 | In write-through mode, the cache disk isn't required to be big. Several | ||
45 | hundreds megabytes are enough. | ||
46 | |||
47 | write-back mode | ||
48 | =============== | ||
49 | |||
50 | write-back mode fixes the 'write hole' issue too, since all write data is | ||
51 | cached on cache disk. But the main goal of 'write-back' cache is to speed up | ||
52 | write. If a write crosses all RAID disks of a stripe, we call it full-stripe | ||
53 | write. For non-full-stripe writes, MD must read old data before the new parity | ||
54 | can be calculated. These synchronous reads hurt write throughput. Some writes | ||
55 | which are sequential but not dispatched in the same time will suffer from this | ||
56 | overhead too. Write-back cache will aggregate the data and flush the data to | ||
57 | RAID disks only after the data becomes a full stripe write. This will | ||
58 | completely avoid the overhead, so it's very helpful for some workloads. A | ||
59 | typical workload which does sequential write followed by fsync is an example. | ||
60 | |||
61 | In write-back mode, MD reports IO completion to upper layer (usually | ||
62 | filesystems) right after the data hits cache disk. The data is flushed to raid | ||
63 | disks later after specific conditions met. So cache disk failure will cause | ||
64 | data loss. | ||
65 | |||
66 | In write-back mode, MD also caches data in memory. The memory cache includes | ||
67 | the same data stored on cache disk, so a power loss doesn't cause data loss. | ||
68 | The memory cache size has performance impact for the array. It's recommended | ||
69 | the size is big. A user can configure the size by:: | ||
70 | |||
71 | echo "2048" > /sys/block/md0/md/stripe_cache_size | ||
72 | |||
73 | Too small cache disk will make the write aggregation less efficient in this | ||
74 | mode depending on the workloads. It's recommended to use a cache disk with at | ||
75 | least several gigabytes size in write-back mode. | ||
76 | |||
77 | The implementation | ||
78 | ================== | ||
79 | |||
80 | The write-through and write-back cache use the same disk format. The cache disk | ||
81 | is organized as a simple write log. The log consists of 'meta data' and 'data' | ||
82 | pairs. The meta data describes the data. It also includes checksum and sequence | ||
83 | ID for recovery identification. Data can be IO data and parity data. Data is | ||
84 | checksumed too. The checksum is stored in the meta data ahead of the data. The | ||
85 | checksum is an optimization because MD can write meta and data freely without | ||
86 | worry about the order. MD superblock has a field pointed to the valid meta data | ||
87 | of log head. | ||
88 | |||
89 | The log implementation is pretty straightforward. The difficult part is the | ||
90 | order in which MD writes data to cache disk and RAID disks. Specifically, in | ||
91 | write-through mode, MD calculates parity for IO data, writes both IO data and | ||
92 | parity to the log, writes the data and parity to RAID disks after the data and | ||
93 | parity is settled down in log and finally the IO is finished. Read just reads | ||
94 | from raid disks as usual. | ||
95 | |||
96 | In write-back mode, MD writes IO data to the log and reports IO completion. The | ||
97 | data is also fully cached in memory at that time, which means read must query | ||
98 | memory cache. If some conditions are met, MD will flush the data to RAID disks. | ||
99 | MD will calculate parity for the data and write parity into the log. After this | ||
100 | is finished, MD will write both data and parity into RAID disks, then MD can | ||
101 | release the memory cache. The flush conditions could be stripe becomes a full | ||
102 | stripe write, free cache disk space is low or free in-kernel memory cache space | ||
103 | is low. | ||
104 | |||
105 | After an unclean shutdown, MD does recovery. MD reads all meta data and data | ||
106 | from the log. The sequence ID and checksum will help us detect corrupted meta | ||
107 | data and data. If MD finds a stripe with data and valid parities (1 parity for | ||
108 | raid4/5 and 2 for raid6), MD will write the data and parities to RAID disks. If | ||
109 | parities are incompleted, they are discarded. If part of data is corrupted, | ||
110 | they are discarded too. MD then loads valid data and writes them to RAID disks | ||
111 | in normal way. | ||
diff --git a/Documentation/driver-api/md/raid5-ppl.rst b/Documentation/driver-api/md/raid5-ppl.rst new file mode 100644 index 000000000000..357e5515bc55 --- /dev/null +++ b/Documentation/driver-api/md/raid5-ppl.rst | |||
@@ -0,0 +1,47 @@ | |||
1 | ================== | ||
2 | Partial Parity Log | ||
3 | ================== | ||
4 | |||
5 | Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue | ||
6 | addressed by PPL is that after a dirty shutdown, parity of a particular stripe | ||
7 | may become inconsistent with data on other member disks. If the array is also | ||
8 | in degraded state, there is no way to recalculate parity, because one of the | ||
9 | disks is missing. This can lead to silent data corruption when rebuilding the | ||
10 | array or using it is as degraded - data calculated from parity for array blocks | ||
11 | that have not been touched by a write request during the unclean shutdown can | ||
12 | be incorrect. Such condition is known as the RAID5 Write Hole. Because of | ||
13 | this, md by default does not allow starting a dirty degraded array. | ||
14 | |||
15 | Partial parity for a write operation is the XOR of stripe data chunks not | ||
16 | modified by this write. It is just enough data needed for recovering from the | ||
17 | write hole. XORing partial parity with the modified chunks produces parity for | ||
18 | the stripe, consistent with its state before the write operation, regardless of | ||
19 | which chunk writes have completed. If one of the not modified data disks of | ||
20 | this stripe is missing, this updated parity can be used to recover its | ||
21 | contents. PPL recovery is also performed when starting an array after an | ||
22 | unclean shutdown and all disks are available, eliminating the need to resync | ||
23 | the array. Because of this, using write-intent bitmap and PPL together is not | ||
24 | supported. | ||
25 | |||
26 | When handling a write request PPL writes partial parity before new data and | ||
27 | parity are dispatched to disks. PPL is a distributed log - it is stored on | ||
28 | array member drives in the metadata area, on the parity drive of a particular | ||
29 | stripe. It does not require a dedicated journaling drive. Write performance is | ||
30 | reduced by up to 30%-40% but it scales with the number of drives in the array | ||
31 | and the journaling drive does not become a bottleneck or a single point of | ||
32 | failure. | ||
33 | |||
34 | Unlike raid5-cache, the other solution in md for closing the write hole, PPL is | ||
35 | not a true journal. It does not protect from losing in-flight data, only from | ||
36 | silent data corruption. If a dirty disk of a stripe is lost, no PPL recovery is | ||
37 | performed for this stripe (parity is not updated). So it is possible to have | ||
38 | arbitrary data in the written part of a stripe if that disk is lost. In such | ||
39 | case the behavior is the same as in plain raid5. | ||
40 | |||
41 | PPL is available for md version-1 metadata and external (specifically IMSM) | ||
42 | metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl. | ||
43 | |||
44 | There is a limitation of maximum 64 disks in the array for PPL. It allows to | ||
45 | keep data structures and implementation simple. RAID5 arrays with so many disks | ||
46 | are not likely due to high risk of multiple disks failure. Such restriction | ||
47 | should not be a real life limitation. | ||
diff --git a/Documentation/driver-api/memory-devices/index.rst b/Documentation/driver-api/memory-devices/index.rst new file mode 100644 index 000000000000..28101458cda5 --- /dev/null +++ b/Documentation/driver-api/memory-devices/index.rst | |||
@@ -0,0 +1,18 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ========================= | ||
4 | Memory Controller drivers | ||
5 | ========================= | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | ti-emif | ||
11 | ti-gpmc | ||
12 | |||
13 | .. only:: subproject and html | ||
14 | |||
15 | Indices | ||
16 | ======= | ||
17 | |||
18 | * :ref:`genindex` | ||
diff --git a/Documentation/driver-api/memory-devices/ti-emif.rst b/Documentation/driver-api/memory-devices/ti-emif.rst new file mode 100644 index 000000000000..dea2ad9bcd7e --- /dev/null +++ b/Documentation/driver-api/memory-devices/ti-emif.rst | |||
@@ -0,0 +1,64 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | =============================== | ||
4 | TI EMIF SDRAM Controller Driver | ||
5 | =============================== | ||
6 | |||
7 | Author | ||
8 | ====== | ||
9 | Aneesh V <aneesh@ti.com> | ||
10 | |||
11 | Location | ||
12 | ======== | ||
13 | driver/memory/emif.c | ||
14 | |||
15 | Supported SoCs: | ||
16 | =============== | ||
17 | TI OMAP44xx | ||
18 | TI OMAP54xx | ||
19 | |||
20 | Menuconfig option: | ||
21 | ================== | ||
22 | Device Drivers | ||
23 | Memory devices | ||
24 | Texas Instruments EMIF driver | ||
25 | |||
26 | Description | ||
27 | =========== | ||
28 | This driver is for the EMIF module available in Texas Instruments | ||
29 | SoCs. EMIF is an SDRAM controller that, based on its revision, | ||
30 | supports one or more of DDR2, DDR3, and LPDDR2 SDRAM protocols. | ||
31 | This driver takes care of only LPDDR2 memories presently. The | ||
32 | functions of the driver includes re-configuring AC timing | ||
33 | parameters and other settings during frequency, voltage and | ||
34 | temperature changes | ||
35 | |||
36 | Platform Data (see include/linux/platform_data/emif_plat.h) | ||
37 | =========================================================== | ||
38 | DDR device details and other board dependent and SoC dependent | ||
39 | information can be passed through platform data (struct emif_platform_data) | ||
40 | |||
41 | - DDR device details: 'struct ddr_device_info' | ||
42 | - Device AC timings: 'struct lpddr2_timings' and 'struct lpddr2_min_tck' | ||
43 | - Custom configurations: customizable policy options through | ||
44 | 'struct emif_custom_configs' | ||
45 | - IP revision | ||
46 | - PHY type | ||
47 | |||
48 | Interface to the external world | ||
49 | =============================== | ||
50 | EMIF driver registers notifiers for voltage and frequency changes | ||
51 | affecting EMIF and takes appropriate actions when these are invoked. | ||
52 | |||
53 | - freq_pre_notify_handling() | ||
54 | - freq_post_notify_handling() | ||
55 | - volt_notify_handling() | ||
56 | |||
57 | Debugfs | ||
58 | ======= | ||
59 | The driver creates two debugfs entries per device. | ||
60 | |||
61 | - regcache_dump : dump of register values calculated and saved for all | ||
62 | frequencies used so far. | ||
63 | - mr4 : last polled value of MR4 register in the LPDDR2 device. MR4 | ||
64 | indicates the current temperature level of the device. | ||
diff --git a/Documentation/driver-api/memory-devices/ti-gpmc.rst b/Documentation/driver-api/memory-devices/ti-gpmc.rst new file mode 100644 index 000000000000..33efcb81f080 --- /dev/null +++ b/Documentation/driver-api/memory-devices/ti-gpmc.rst | |||
@@ -0,0 +1,179 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ======================================== | ||
4 | GPMC (General Purpose Memory Controller) | ||
5 | ======================================== | ||
6 | |||
7 | GPMC is an unified memory controller dedicated to interfacing external | ||
8 | memory devices like | ||
9 | |||
10 | * Asynchronous SRAM like memories and application specific integrated | ||
11 | circuit devices. | ||
12 | * Asynchronous, synchronous, and page mode burst NOR flash devices | ||
13 | NAND flash | ||
14 | * Pseudo-SRAM devices | ||
15 | |||
16 | GPMC is found on Texas Instruments SoC's (OMAP based) | ||
17 | IP details: http://www.ti.com/lit/pdf/spruh73 section 7.1 | ||
18 | |||
19 | |||
20 | GPMC generic timing calculation: | ||
21 | ================================ | ||
22 | |||
23 | GPMC has certain timings that has to be programmed for proper | ||
24 | functioning of the peripheral, while peripheral has another set of | ||
25 | timings. To have peripheral work with gpmc, peripheral timings has to | ||
26 | be translated to the form gpmc can understand. The way it has to be | ||
27 | translated depends on the connected peripheral. Also there is a | ||
28 | dependency for certain gpmc timings on gpmc clock frequency. Hence a | ||
29 | generic timing routine was developed to achieve above requirements. | ||
30 | |||
31 | Generic routine provides a generic method to calculate gpmc timings | ||
32 | from gpmc peripheral timings. struct gpmc_device_timings fields has to | ||
33 | be updated with timings from the datasheet of the peripheral that is | ||
34 | connected to gpmc. A few of the peripheral timings can be fed either | ||
35 | in time or in cycles, provision to handle this scenario has been | ||
36 | provided (refer struct gpmc_device_timings definition). It may so | ||
37 | happen that timing as specified by peripheral datasheet is not present | ||
38 | in timing structure, in this scenario, try to correlate peripheral | ||
39 | timing to the one available. If that doesn't work, try to add a new | ||
40 | field as required by peripheral, educate generic timing routine to | ||
41 | handle it, make sure that it does not break any of the existing. | ||
42 | Then there may be cases where peripheral datasheet doesn't mention | ||
43 | certain fields of struct gpmc_device_timings, zero those entries. | ||
44 | |||
45 | Generic timing routine has been verified to work properly on | ||
46 | multiple onenand's and tusb6010 peripherals. | ||
47 | |||
48 | A word of caution: generic timing routine has been developed based | ||
49 | on understanding of gpmc timings, peripheral timings, available | ||
50 | custom timing routines, a kind of reverse engineering without | ||
51 | most of the datasheets & hardware (to be exact none of those supported | ||
52 | in mainline having custom timing routine) and by simulation. | ||
53 | |||
54 | gpmc timing dependency on peripheral timings: | ||
55 | |||
56 | [<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...] | ||
57 | |||
58 | 1. common | ||
59 | |||
60 | cs_on: | ||
61 | t_ceasu | ||
62 | adv_on: | ||
63 | t_avdasu, t_ceavd | ||
64 | |||
65 | 2. sync common | ||
66 | |||
67 | sync_clk: | ||
68 | clk | ||
69 | page_burst_access: | ||
70 | t_bacc | ||
71 | clk_activation: | ||
72 | t_ces, t_avds | ||
73 | |||
74 | 3. read async muxed | ||
75 | |||
76 | adv_rd_off: | ||
77 | t_avdp_r | ||
78 | oe_on: | ||
79 | t_oeasu, t_aavdh | ||
80 | access: | ||
81 | t_iaa, t_oe, t_ce, t_aa | ||
82 | rd_cycle: | ||
83 | t_rd_cycle, t_cez_r, t_oez | ||
84 | |||
85 | 4. read async non-muxed | ||
86 | |||
87 | adv_rd_off: | ||
88 | t_avdp_r | ||
89 | oe_on: | ||
90 | t_oeasu | ||
91 | access: | ||
92 | t_iaa, t_oe, t_ce, t_aa | ||
93 | rd_cycle: | ||
94 | t_rd_cycle, t_cez_r, t_oez | ||
95 | |||
96 | 5. read sync muxed | ||
97 | |||
98 | adv_rd_off: | ||
99 | t_avdp_r, t_avdh | ||
100 | oe_on: | ||
101 | t_oeasu, t_ach, cyc_aavdh_oe | ||
102 | access: | ||
103 | t_iaa, cyc_iaa, cyc_oe | ||
104 | rd_cycle: | ||
105 | t_cez_r, t_oez, t_ce_rdyz | ||
106 | |||
107 | 6. read sync non-muxed | ||
108 | |||
109 | adv_rd_off: | ||
110 | t_avdp_r | ||
111 | oe_on: | ||
112 | t_oeasu | ||
113 | access: | ||
114 | t_iaa, cyc_iaa, cyc_oe | ||
115 | rd_cycle: | ||
116 | t_cez_r, t_oez, t_ce_rdyz | ||
117 | |||
118 | 7. write async muxed | ||
119 | |||
120 | adv_wr_off: | ||
121 | t_avdp_w | ||
122 | we_on, wr_data_mux_bus: | ||
123 | t_weasu, t_aavdh, cyc_aavhd_we | ||
124 | we_off: | ||
125 | t_wpl | ||
126 | cs_wr_off: | ||
127 | t_wph | ||
128 | wr_cycle: | ||
129 | t_cez_w, t_wr_cycle | ||
130 | |||
131 | 8. write async non-muxed | ||
132 | |||
133 | adv_wr_off: | ||
134 | t_avdp_w | ||
135 | we_on, wr_data_mux_bus: | ||
136 | t_weasu | ||
137 | we_off: | ||
138 | t_wpl | ||
139 | cs_wr_off: | ||
140 | t_wph | ||
141 | wr_cycle: | ||
142 | t_cez_w, t_wr_cycle | ||
143 | |||
144 | 9. write sync muxed | ||
145 | |||
146 | adv_wr_off: | ||
147 | t_avdp_w, t_avdh | ||
148 | we_on, wr_data_mux_bus: | ||
149 | t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we | ||
150 | we_off: | ||
151 | t_wpl, cyc_wpl | ||
152 | cs_wr_off: | ||
153 | t_wph | ||
154 | wr_cycle: | ||
155 | t_cez_w, t_ce_rdyz | ||
156 | |||
157 | 10. write sync non-muxed | ||
158 | |||
159 | adv_wr_off: | ||
160 | t_avdp_w | ||
161 | we_on, wr_data_mux_bus: | ||
162 | t_weasu, t_rdyo | ||
163 | we_off: | ||
164 | t_wpl, cyc_wpl | ||
165 | cs_wr_off: | ||
166 | t_wph | ||
167 | wr_cycle: | ||
168 | t_cez_w, t_ce_rdyz | ||
169 | |||
170 | |||
171 | Note: | ||
172 | Many of gpmc timings are dependent on other gpmc timings (a few | ||
173 | gpmc timings purely dependent on other gpmc timings, a reason that | ||
174 | some of the gpmc timings are missing above), and it will result in | ||
175 | indirect dependency of peripheral timings to gpmc timings other than | ||
176 | mentioned above, refer timing routine for more details. To know what | ||
177 | these peripheral timings correspond to, please see explanations in | ||
178 | struct gpmc_device_timings definition. And for gpmc timings refer | ||
179 | IP details (link above). | ||
diff --git a/Documentation/driver-api/men-chameleon-bus.rst b/Documentation/driver-api/men-chameleon-bus.rst new file mode 100644 index 000000000000..1b1f048aa748 --- /dev/null +++ b/Documentation/driver-api/men-chameleon-bus.rst | |||
@@ -0,0 +1,175 @@ | |||
1 | ================= | ||
2 | MEN Chameleon Bus | ||
3 | ================= | ||
4 | |||
5 | .. Table of Contents | ||
6 | ================= | ||
7 | 1 Introduction | ||
8 | 1.1 Scope of this Document | ||
9 | 1.2 Limitations of the current implementation | ||
10 | 2 Architecture | ||
11 | 2.1 MEN Chameleon Bus | ||
12 | 2.2 Carrier Devices | ||
13 | 2.3 Parser | ||
14 | 3 Resource handling | ||
15 | 3.1 Memory Resources | ||
16 | 3.2 IRQs | ||
17 | 4 Writing an MCB driver | ||
18 | 4.1 The driver structure | ||
19 | 4.2 Probing and attaching | ||
20 | 4.3 Initializing the driver | ||
21 | |||
22 | |||
23 | Introduction | ||
24 | ============ | ||
25 | |||
26 | This document describes the architecture and implementation of the MEN | ||
27 | Chameleon Bus (called MCB throughout this document). | ||
28 | |||
29 | Scope of this Document | ||
30 | ---------------------- | ||
31 | |||
32 | This document is intended to be a short overview of the current | ||
33 | implementation and does by no means describe the complete possibilities of MCB | ||
34 | based devices. | ||
35 | |||
36 | Limitations of the current implementation | ||
37 | ----------------------------------------- | ||
38 | |||
39 | The current implementation is limited to PCI and PCIe based carrier devices | ||
40 | that only use a single memory resource and share the PCI legacy IRQ. Not | ||
41 | implemented are: | ||
42 | |||
43 | - Multi-resource MCB devices like the VME Controller or M-Module carrier. | ||
44 | - MCB devices that need another MCB device, like SRAM for a DMA Controller's | ||
45 | buffer descriptors or a video controller's video memory. | ||
46 | - A per-carrier IRQ domain for carrier devices that have one (or more) IRQs | ||
47 | per MCB device like PCIe based carriers with MSI or MSI-X support. | ||
48 | |||
49 | Architecture | ||
50 | ============ | ||
51 | |||
52 | MCB is divided into 3 functional blocks: | ||
53 | |||
54 | - The MEN Chameleon Bus itself, | ||
55 | - drivers for MCB Carrier Devices and | ||
56 | - the parser for the Chameleon table. | ||
57 | |||
58 | MEN Chameleon Bus | ||
59 | ----------------- | ||
60 | |||
61 | The MEN Chameleon Bus is an artificial bus system that attaches to a so | ||
62 | called Chameleon FPGA device found on some hardware produced my MEN Mikro | ||
63 | Elektronik GmbH. These devices are multi-function devices implemented in a | ||
64 | single FPGA and usually attached via some sort of PCI or PCIe link. Each | ||
65 | FPGA contains a header section describing the content of the FPGA. The | ||
66 | header lists the device id, PCI BAR, offset from the beginning of the PCI | ||
67 | BAR, size in the FPGA, interrupt number and some other properties currently | ||
68 | not handled by the MCB implementation. | ||
69 | |||
70 | Carrier Devices | ||
71 | --------------- | ||
72 | |||
73 | A carrier device is just an abstraction for the real world physical bus the | ||
74 | Chameleon FPGA is attached to. Some IP Core drivers may need to interact with | ||
75 | properties of the carrier device (like querying the IRQ number of a PCI | ||
76 | device). To provide abstraction from the real hardware bus, an MCB carrier | ||
77 | device provides callback methods to translate the driver's MCB function calls | ||
78 | to hardware related function calls. For example a carrier device may | ||
79 | implement the get_irq() method which can be translated into a hardware bus | ||
80 | query for the IRQ number the device should use. | ||
81 | |||
82 | Parser | ||
83 | ------ | ||
84 | |||
85 | The parser reads the first 512 bytes of a Chameleon device and parses the | ||
86 | Chameleon table. Currently the parser only supports the Chameleon v2 variant | ||
87 | of the Chameleon table but can easily be adopted to support an older or | ||
88 | possible future variant. While parsing the table's entries new MCB devices | ||
89 | are allocated and their resources are assigned according to the resource | ||
90 | assignment in the Chameleon table. After resource assignment is finished, the | ||
91 | MCB devices are registered at the MCB and thus at the driver core of the | ||
92 | Linux kernel. | ||
93 | |||
94 | Resource handling | ||
95 | ================= | ||
96 | |||
97 | The current implementation assigns exactly one memory and one IRQ resource | ||
98 | per MCB device. But this is likely going to change in the future. | ||
99 | |||
100 | Memory Resources | ||
101 | ---------------- | ||
102 | |||
103 | Each MCB device has exactly one memory resource, which can be requested from | ||
104 | the MCB bus. This memory resource is the physical address of the MCB device | ||
105 | inside the carrier and is intended to be passed to ioremap() and friends. It | ||
106 | is already requested from the kernel by calling request_mem_region(). | ||
107 | |||
108 | IRQs | ||
109 | ---- | ||
110 | |||
111 | Each MCB device has exactly one IRQ resource, which can be requested from the | ||
112 | MCB bus. If a carrier device driver implements the ->get_irq() callback | ||
113 | method, the IRQ number assigned by the carrier device will be returned, | ||
114 | otherwise the IRQ number inside the Chameleon table will be returned. This | ||
115 | number is suitable to be passed to request_irq(). | ||
116 | |||
117 | Writing an MCB driver | ||
118 | ===================== | ||
119 | |||
120 | The driver structure | ||
121 | -------------------- | ||
122 | |||
123 | Each MCB driver has a structure to identify the device driver as well as | ||
124 | device ids which identify the IP Core inside the FPGA. The driver structure | ||
125 | also contains callback methods which get executed on driver probe and | ||
126 | removal from the system:: | ||
127 | |||
128 | static const struct mcb_device_id foo_ids[] = { | ||
129 | { .device = 0x123 }, | ||
130 | { } | ||
131 | }; | ||
132 | MODULE_DEVICE_TABLE(mcb, foo_ids); | ||
133 | |||
134 | static struct mcb_driver foo_driver = { | ||
135 | driver = { | ||
136 | .name = "foo-bar", | ||
137 | .owner = THIS_MODULE, | ||
138 | }, | ||
139 | .probe = foo_probe, | ||
140 | .remove = foo_remove, | ||
141 | .id_table = foo_ids, | ||
142 | }; | ||
143 | |||
144 | Probing and attaching | ||
145 | --------------------- | ||
146 | |||
147 | When a driver is loaded and the MCB devices it services are found, the MCB | ||
148 | core will call the driver's probe callback method. When the driver is removed | ||
149 | from the system, the MCB core will call the driver's remove callback method:: | ||
150 | |||
151 | static init foo_probe(struct mcb_device *mdev, const struct mcb_device_id *id); | ||
152 | static void foo_remove(struct mcb_device *mdev); | ||
153 | |||
154 | Initializing the driver | ||
155 | ----------------------- | ||
156 | |||
157 | When the kernel is booted or your foo driver module is inserted, you have to | ||
158 | perform driver initialization. Usually it is enough to register your driver | ||
159 | module at the MCB core:: | ||
160 | |||
161 | static int __init foo_init(void) | ||
162 | { | ||
163 | return mcb_register_driver(&foo_driver); | ||
164 | } | ||
165 | module_init(foo_init); | ||
166 | |||
167 | static void __exit foo_exit(void) | ||
168 | { | ||
169 | mcb_unregister_driver(&foo_driver); | ||
170 | } | ||
171 | module_exit(foo_exit); | ||
172 | |||
173 | The module_mcb_driver() macro can be used to reduce the above code:: | ||
174 | |||
175 | module_mcb_driver(foo_driver); | ||
diff --git a/Documentation/driver-api/mmc/index.rst b/Documentation/driver-api/mmc/index.rst new file mode 100644 index 000000000000..7339736ac774 --- /dev/null +++ b/Documentation/driver-api/mmc/index.rst | |||
@@ -0,0 +1,13 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ======================== | ||
4 | MMC/SD/SDIO card support | ||
5 | ======================== | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | mmc-dev-attrs | ||
11 | mmc-dev-parts | ||
12 | mmc-async-req | ||
13 | mmc-tools | ||
diff --git a/Documentation/driver-api/mmc/mmc-async-req.rst b/Documentation/driver-api/mmc/mmc-async-req.rst new file mode 100644 index 000000000000..0f7197c9c3b5 --- /dev/null +++ b/Documentation/driver-api/mmc/mmc-async-req.rst | |||
@@ -0,0 +1,98 @@ | |||
1 | ======================== | ||
2 | MMC Asynchronous Request | ||
3 | ======================== | ||
4 | |||
5 | Rationale | ||
6 | ========= | ||
7 | |||
8 | How significant is the cache maintenance overhead? | ||
9 | |||
10 | It depends. Fast eMMC and multiple cache levels with speculative cache | ||
11 | pre-fetch makes the cache overhead relatively significant. If the DMA | ||
12 | preparations for the next request are done in parallel with the current | ||
13 | transfer, the DMA preparation overhead would not affect the MMC performance. | ||
14 | |||
15 | The intention of non-blocking (asynchronous) MMC requests is to minimize the | ||
16 | time between when an MMC request ends and another MMC request begins. | ||
17 | |||
18 | Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and | ||
19 | dma_unmap_sg are processing. Using non-blocking MMC requests makes it | ||
20 | possible to prepare the caches for next job in parallel with an active | ||
21 | MMC request. | ||
22 | |||
23 | MMC block driver | ||
24 | ================ | ||
25 | |||
26 | The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking. | ||
27 | |||
28 | The increase in throughput is proportional to the time it takes to | ||
29 | prepare (major part of preparations are dma_map_sg() and dma_unmap_sg()) | ||
30 | a request and how fast the memory is. The faster the MMC/SD is the | ||
31 | more significant the prepare request time becomes. Roughly the expected | ||
32 | performance gain is 5% for large writes and 10% on large reads on a L2 cache | ||
33 | platform. In power save mode, when clocks run on a lower frequency, the DMA | ||
34 | preparation may cost even more. As long as these slower preparations are run | ||
35 | in parallel with the transfer performance won't be affected. | ||
36 | |||
37 | Details on measurements from IOZone and mmc_test | ||
38 | ================================================ | ||
39 | |||
40 | https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req | ||
41 | |||
42 | MMC core API extension | ||
43 | ====================== | ||
44 | |||
45 | There is one new public function mmc_start_req(). | ||
46 | |||
47 | It starts a new MMC command request for a host. The function isn't | ||
48 | truly non-blocking. If there is an ongoing async request it waits | ||
49 | for completion of that request and starts the new one and returns. It | ||
50 | doesn't wait for the new request to complete. If there is no ongoing | ||
51 | request it starts the new request and returns immediately. | ||
52 | |||
53 | MMC host extensions | ||
54 | =================== | ||
55 | |||
56 | There are two optional members in the mmc_host_ops -- pre_req() and | ||
57 | post_req() -- that the host driver may implement in order to move work | ||
58 | to before and after the actual mmc_host_ops.request() function is called. | ||
59 | |||
60 | In the DMA case pre_req() may do dma_map_sg() and prepare the DMA | ||
61 | descriptor, and post_req() runs the dma_unmap_sg(). | ||
62 | |||
63 | Optimize for the first request | ||
64 | ============================== | ||
65 | |||
66 | The first request in a series of requests can't be prepared in parallel | ||
67 | with the previous transfer, since there is no previous request. | ||
68 | |||
69 | The argument is_first_req in pre_req() indicates that there is no previous | ||
70 | request. The host driver may optimize for this scenario to minimize | ||
71 | the performance loss. A way to optimize for this is to split the current | ||
72 | request in two chunks, prepare the first chunk and start the request, | ||
73 | and finally prepare the second chunk and start the transfer. | ||
74 | |||
75 | Pseudocode to handle is_first_req scenario with minimal prepare overhead:: | ||
76 | |||
77 | if (is_first_req && req->size > threshold) | ||
78 | /* start MMC transfer for the complete transfer size */ | ||
79 | mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE); | ||
80 | |||
81 | /* | ||
82 | * Begin to prepare DMA while cmd is being processed by MMC. | ||
83 | * The first chunk of the request should take the same time | ||
84 | * to prepare as the "MMC process command time". | ||
85 | * If prepare time exceeds MMC cmd time | ||
86 | * the transfer is delayed, guesstimate max 4k as first chunk size. | ||
87 | */ | ||
88 | prepare_1st_chunk_for_dma(req); | ||
89 | /* flush pending desc to the DMAC (dmaengine.h) */ | ||
90 | dma_issue_pending(req->dma_desc); | ||
91 | |||
92 | prepare_2nd_chunk_for_dma(req); | ||
93 | /* | ||
94 | * The second issue_pending should be called before MMC runs out | ||
95 | * of the first chunk. If the MMC runs out of the first data chunk | ||
96 | * before this call, the transfer is delayed. | ||
97 | */ | ||
98 | dma_issue_pending(req->dma_desc); | ||
diff --git a/Documentation/driver-api/mmc/mmc-dev-attrs.rst b/Documentation/driver-api/mmc/mmc-dev-attrs.rst new file mode 100644 index 000000000000..4f44b1b730d6 --- /dev/null +++ b/Documentation/driver-api/mmc/mmc-dev-attrs.rst | |||
@@ -0,0 +1,91 @@ | |||
1 | ================================== | ||
2 | SD and MMC Block Device Attributes | ||
3 | ================================== | ||
4 | |||
5 | These attributes are defined for the block devices associated with the | ||
6 | SD or MMC device. | ||
7 | |||
8 | The following attributes are read/write. | ||
9 | |||
10 | ======== =============================================== | ||
11 | force_ro Enforce read-only access even if write protect switch is off. | ||
12 | ======== =============================================== | ||
13 | |||
14 | SD and MMC Device Attributes | ||
15 | ============================ | ||
16 | |||
17 | All attributes are read-only. | ||
18 | |||
19 | ====================== =============================================== | ||
20 | cid Card Identification Register | ||
21 | csd Card Specific Data Register | ||
22 | scr SD Card Configuration Register (SD only) | ||
23 | date Manufacturing Date (from CID Register) | ||
24 | fwrev Firmware/Product Revision (from CID Register) | ||
25 | (SD and MMCv1 only) | ||
26 | hwrev Hardware/Product Revision (from CID Register) | ||
27 | (SD and MMCv1 only) | ||
28 | manfid Manufacturer ID (from CID Register) | ||
29 | name Product Name (from CID Register) | ||
30 | oemid OEM/Application ID (from CID Register) | ||
31 | prv Product Revision (from CID Register) | ||
32 | (SD and MMCv4 only) | ||
33 | serial Product Serial Number (from CID Register) | ||
34 | erase_size Erase group size | ||
35 | preferred_erase_size Preferred erase size | ||
36 | raw_rpmb_size_mult RPMB partition size | ||
37 | rel_sectors Reliable write sector count | ||
38 | ocr Operation Conditions Register | ||
39 | dsr Driver Stage Register | ||
40 | cmdq_en Command Queue enabled: | ||
41 | |||
42 | 1 => enabled, 0 => not enabled | ||
43 | ====================== =============================================== | ||
44 | |||
45 | Note on Erase Size and Preferred Erase Size: | ||
46 | |||
47 | "erase_size" is the minimum size, in bytes, of an erase | ||
48 | operation. For MMC, "erase_size" is the erase group size | ||
49 | reported by the card. Note that "erase_size" does not apply | ||
50 | to trim or secure trim operations where the minimum size is | ||
51 | always one 512 byte sector. For SD, "erase_size" is 512 | ||
52 | if the card is block-addressed, 0 otherwise. | ||
53 | |||
54 | SD/MMC cards can erase an arbitrarily large area up to and | ||
55 | including the whole card. When erasing a large area it may | ||
56 | be desirable to do it in smaller chunks for three reasons: | ||
57 | |||
58 | 1. A single erase command will make all other I/O on | ||
59 | the card wait. This is not a problem if the whole card | ||
60 | is being erased, but erasing one partition will make | ||
61 | I/O for another partition on the same card wait for the | ||
62 | duration of the erase - which could be a several | ||
63 | minutes. | ||
64 | 2. To be able to inform the user of erase progress. | ||
65 | 3. The erase timeout becomes too large to be very | ||
66 | useful. Because the erase timeout contains a margin | ||
67 | which is multiplied by the size of the erase area, | ||
68 | the value can end up being several minutes for large | ||
69 | areas. | ||
70 | |||
71 | "erase_size" is not the most efficient unit to erase | ||
72 | (especially for SD where it is just one sector), | ||
73 | hence "preferred_erase_size" provides a good chunk | ||
74 | size for erasing large areas. | ||
75 | |||
76 | For MMC, "preferred_erase_size" is the high-capacity | ||
77 | erase size if a card specifies one, otherwise it is | ||
78 | based on the capacity of the card. | ||
79 | |||
80 | For SD, "preferred_erase_size" is the allocation unit | ||
81 | size specified by the card. | ||
82 | |||
83 | "preferred_erase_size" is in bytes. | ||
84 | |||
85 | Note on raw_rpmb_size_mult: | ||
86 | |||
87 | "raw_rpmb_size_mult" is a multiple of 128kB block. | ||
88 | |||
89 | RPMB size in byte is calculated by using the following equation: | ||
90 | |||
91 | RPMB partition size = 128kB x raw_rpmb_size_mult | ||
diff --git a/Documentation/driver-api/mmc/mmc-dev-parts.rst b/Documentation/driver-api/mmc/mmc-dev-parts.rst new file mode 100644 index 000000000000..995922f1f744 --- /dev/null +++ b/Documentation/driver-api/mmc/mmc-dev-parts.rst | |||
@@ -0,0 +1,41 @@ | |||
1 | ============================ | ||
2 | SD and MMC Device Partitions | ||
3 | ============================ | ||
4 | |||
5 | Device partitions are additional logical block devices present on the | ||
6 | SD/MMC device. | ||
7 | |||
8 | As of this writing, MMC boot partitions as supported and exposed as | ||
9 | /dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the | ||
10 | parent /dev/mmcblkX. | ||
11 | |||
12 | MMC Boot Partitions | ||
13 | =================== | ||
14 | |||
15 | Read and write access is provided to the two MMC boot partitions. Due to | ||
16 | the sensitive nature of the boot partition contents, which often store | ||
17 | a bootloader or bootloader configuration tables crucial to booting the | ||
18 | platform, write access is disabled by default to reduce the chance of | ||
19 | accidental bricking. | ||
20 | |||
21 | To enable write access to /dev/mmcblkXbootY, disable the forced read-only | ||
22 | access with:: | ||
23 | |||
24 | echo 0 > /sys/block/mmcblkXbootY/force_ro | ||
25 | |||
26 | To re-enable read-only access:: | ||
27 | |||
28 | echo 1 > /sys/block/mmcblkXbootY/force_ro | ||
29 | |||
30 | The boot partitions can also be locked read only until the next power on, | ||
31 | with:: | ||
32 | |||
33 | echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on | ||
34 | |||
35 | This is a feature of the card and not of the kernel. If the card does | ||
36 | not support boot partition locking, the file will not exist. If the | ||
37 | feature has been disabled on the card, the file will be read-only. | ||
38 | |||
39 | The boot partitions can also be locked permanently, but this feature is | ||
40 | not accessible through sysfs in order to avoid accidental or malicious | ||
41 | bricking. | ||
diff --git a/Documentation/driver-api/mmc/mmc-tools.rst b/Documentation/driver-api/mmc/mmc-tools.rst new file mode 100644 index 000000000000..54406093768b --- /dev/null +++ b/Documentation/driver-api/mmc/mmc-tools.rst | |||
@@ -0,0 +1,37 @@ | |||
1 | ====================== | ||
2 | MMC tools introduction | ||
3 | ====================== | ||
4 | |||
5 | There is one MMC test tools called mmc-utils, which is maintained by Chris Ball, | ||
6 | you can find it at the below public git repository: | ||
7 | |||
8 | http://git.kernel.org/cgit/linux/kernel/git/cjb/mmc-utils.git/ | ||
9 | |||
10 | Functions | ||
11 | ========= | ||
12 | |||
13 | The mmc-utils tools can do the following: | ||
14 | |||
15 | - Print and parse extcsd data. | ||
16 | - Determine the eMMC writeprotect status. | ||
17 | - Set the eMMC writeprotect status. | ||
18 | - Set the eMMC data sector size to 4KB by disabling emulation. | ||
19 | - Create general purpose partition. | ||
20 | - Enable the enhanced user area. | ||
21 | - Enable write reliability per partition. | ||
22 | - Print the response to STATUS_SEND (CMD13). | ||
23 | - Enable the boot partition. | ||
24 | - Set Boot Bus Conditions. | ||
25 | - Enable the eMMC BKOPS feature. | ||
26 | - Permanently enable the eMMC H/W Reset feature. | ||
27 | - Permanently disable the eMMC H/W Reset feature. | ||
28 | - Send Sanitize command. | ||
29 | - Program authentication key for the device. | ||
30 | - Counter value for the rpmb device will be read to stdout. | ||
31 | - Read from rpmb device to output. | ||
32 | - Write to rpmb device from data file. | ||
33 | - Enable the eMMC cache feature. | ||
34 | - Disable the eMMC cache feature. | ||
35 | - Print and parse CID data. | ||
36 | - Print and parse CSD data. | ||
37 | - Print and parse SCR data. | ||
diff --git a/Documentation/driver-api/mtd/index.rst b/Documentation/driver-api/mtd/index.rst new file mode 100644 index 000000000000..436ba5a851d7 --- /dev/null +++ b/Documentation/driver-api/mtd/index.rst | |||
@@ -0,0 +1,12 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ============================== | ||
4 | Memory Technology Device (MTD) | ||
5 | ============================== | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | intel-spi | ||
11 | nand_ecc | ||
12 | spi-nor | ||
diff --git a/Documentation/driver-api/mtd/intel-spi.rst b/Documentation/driver-api/mtd/intel-spi.rst new file mode 100644 index 000000000000..0e6d9cd5388d --- /dev/null +++ b/Documentation/driver-api/mtd/intel-spi.rst | |||
@@ -0,0 +1,90 @@ | |||
1 | ============================== | ||
2 | Upgrading BIOS using intel-spi | ||
3 | ============================== | ||
4 | |||
5 | Many Intel CPUs like Baytrail and Braswell include SPI serial flash host | ||
6 | controller which is used to hold BIOS and other platform specific data. | ||
7 | Since contents of the SPI serial flash is crucial for machine to function, | ||
8 | it is typically protected by different hardware protection mechanisms to | ||
9 | avoid accidental (or on purpose) overwrite of the content. | ||
10 | |||
11 | Not all manufacturers protect the SPI serial flash, mainly because it | ||
12 | allows upgrading the BIOS image directly from an OS. | ||
13 | |||
14 | The intel-spi driver makes it possible to read and write the SPI serial | ||
15 | flash, if certain protection bits are not set and locked. If it finds | ||
16 | any of them set, the whole MTD device is made read-only to prevent | ||
17 | partial overwrites. By default the driver exposes SPI serial flash | ||
18 | contents as read-only but it can be changed from kernel command line, | ||
19 | passing "intel-spi.writeable=1". | ||
20 | |||
21 | Please keep in mind that overwriting the BIOS image on SPI serial flash | ||
22 | might render the machine unbootable and requires special equipment like | ||
23 | Dediprog to revive. You have been warned! | ||
24 | |||
25 | Below are the steps how to upgrade MinnowBoard MAX BIOS directly from | ||
26 | Linux. | ||
27 | |||
28 | 1) Download and extract the latest Minnowboard MAX BIOS SPI image | ||
29 | [1]. At the time writing this the latest image is v92. | ||
30 | |||
31 | 2) Install mtd-utils package [2]. We need this in order to erase the SPI | ||
32 | serial flash. Distros like Debian and Fedora have this prepackaged with | ||
33 | name "mtd-utils". | ||
34 | |||
35 | 3) Add "intel-spi.writeable=1" to the kernel command line and reboot | ||
36 | the board (you can also reload the driver passing "writeable=1" as | ||
37 | module parameter to modprobe). | ||
38 | |||
39 | 4) Once the board is up and running again, find the right MTD partition | ||
40 | (it is named as "BIOS"):: | ||
41 | |||
42 | # cat /proc/mtd | ||
43 | dev: size erasesize name | ||
44 | mtd0: 00800000 00001000 "BIOS" | ||
45 | |||
46 | So here it will be /dev/mtd0 but it may vary. | ||
47 | |||
48 | 5) Make backup of the existing image first:: | ||
49 | |||
50 | # dd if=/dev/mtd0ro of=bios.bak | ||
51 | 16384+0 records in | ||
52 | 16384+0 records out | ||
53 | 8388608 bytes (8.4 MB) copied, 10.0269 s, 837 kB/s | ||
54 | |||
55 | 6) Verify the backup: | ||
56 | |||
57 | # sha1sum /dev/mtd0ro bios.bak | ||
58 | fdbb011920572ca6c991377c4b418a0502668b73 /dev/mtd0ro | ||
59 | fdbb011920572ca6c991377c4b418a0502668b73 bios.bak | ||
60 | |||
61 | The SHA1 sums must match. Otherwise do not continue any further! | ||
62 | |||
63 | 7) Erase the SPI serial flash. After this step, do not reboot the | ||
64 | board! Otherwise it will not start anymore:: | ||
65 | |||
66 | # flash_erase /dev/mtd0 0 0 | ||
67 | Erasing 4 Kibyte @ 7ff000 -- 100 % complete | ||
68 | |||
69 | 8) Once completed without errors you can write the new BIOS image: | ||
70 | |||
71 | # dd if=MNW2MAX1.X64.0092.R01.1605221712.bin of=/dev/mtd0 | ||
72 | |||
73 | 9) Verify that the new content of the SPI serial flash matches the new | ||
74 | BIOS image:: | ||
75 | |||
76 | # sha1sum /dev/mtd0ro MNW2MAX1.X64.0092.R01.1605221712.bin | ||
77 | 9b4df9e4be2057fceec3a5529ec3d950836c87a2 /dev/mtd0ro | ||
78 | 9b4df9e4be2057fceec3a5529ec3d950836c87a2 MNW2MAX1.X64.0092.R01.1605221712.bin | ||
79 | |||
80 | The SHA1 sums should match. | ||
81 | |||
82 | 10) Now you can reboot your board and observe the new BIOS starting up | ||
83 | properly. | ||
84 | |||
85 | References | ||
86 | ---------- | ||
87 | |||
88 | [1] https://firmware.intel.com/sites/default/files/MinnowBoard%2EMAX_%2EX64%2E92%2ER01%2Ezip | ||
89 | |||
90 | [2] http://www.linux-mtd.infradead.org/ | ||
diff --git a/Documentation/driver-api/mtd/nand_ecc.rst b/Documentation/driver-api/mtd/nand_ecc.rst new file mode 100644 index 000000000000..e8d3c53a5056 --- /dev/null +++ b/Documentation/driver-api/mtd/nand_ecc.rst | |||
@@ -0,0 +1,763 @@ | |||
1 | ========================== | ||
2 | NAND Error-correction Code | ||
3 | ========================== | ||
4 | |||
5 | Introduction | ||
6 | ============ | ||
7 | |||
8 | Having looked at the linux mtd/nand driver and more specific at nand_ecc.c | ||
9 | I felt there was room for optimisation. I bashed the code for a few hours | ||
10 | performing tricks like table lookup removing superfluous code etc. | ||
11 | After that the speed was increased by 35-40%. | ||
12 | Still I was not too happy as I felt there was additional room for improvement. | ||
13 | |||
14 | Bad! I was hooked. | ||
15 | I decided to annotate my steps in this file. Perhaps it is useful to someone | ||
16 | or someone learns something from it. | ||
17 | |||
18 | |||
19 | The problem | ||
20 | =========== | ||
21 | |||
22 | NAND flash (at least SLC one) typically has sectors of 256 bytes. | ||
23 | However NAND flash is not extremely reliable so some error detection | ||
24 | (and sometimes correction) is needed. | ||
25 | |||
26 | This is done by means of a Hamming code. I'll try to explain it in | ||
27 | laymans terms (and apologies to all the pro's in the field in case I do | ||
28 | not use the right terminology, my coding theory class was almost 30 | ||
29 | years ago, and I must admit it was not one of my favourites). | ||
30 | |||
31 | As I said before the ecc calculation is performed on sectors of 256 | ||
32 | bytes. This is done by calculating several parity bits over the rows and | ||
33 | columns. The parity used is even parity which means that the parity bit = 1 | ||
34 | if the data over which the parity is calculated is 1 and the parity bit = 0 | ||
35 | if the data over which the parity is calculated is 0. So the total | ||
36 | number of bits over the data over which the parity is calculated + the | ||
37 | parity bit is even. (see wikipedia if you can't follow this). | ||
38 | Parity is often calculated by means of an exclusive or operation, | ||
39 | sometimes also referred to as xor. In C the operator for xor is ^ | ||
40 | |||
41 | Back to ecc. | ||
42 | Let's give a small figure: | ||
43 | |||
44 | ========= ==== ==== ==== ==== ==== ==== ==== ==== === === === === ==== | ||
45 | byte 0: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp4 ... rp14 | ||
46 | byte 1: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp2 rp4 ... rp14 | ||
47 | byte 2: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp4 ... rp14 | ||
48 | byte 3: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp4 ... rp14 | ||
49 | byte 4: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp5 ... rp14 | ||
50 | ... | ||
51 | byte 254: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp5 ... rp15 | ||
52 | byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15 | ||
53 | cp1 cp0 cp1 cp0 cp1 cp0 cp1 cp0 | ||
54 | cp3 cp3 cp2 cp2 cp3 cp3 cp2 cp2 | ||
55 | cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4 | ||
56 | ========= ==== ==== ==== ==== ==== ==== ==== ==== === === === === ==== | ||
57 | |||
58 | This figure represents a sector of 256 bytes. | ||
59 | cp is my abbreviation for column parity, rp for row parity. | ||
60 | |||
61 | Let's start to explain column parity. | ||
62 | |||
63 | - cp0 is the parity that belongs to all bit0, bit2, bit4, bit6. | ||
64 | |||
65 | so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even. | ||
66 | |||
67 | Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7. | ||
68 | |||
69 | - cp2 is the parity over bit0, bit1, bit4 and bit5 | ||
70 | - cp3 is the parity over bit2, bit3, bit6 and bit7. | ||
71 | - cp4 is the parity over bit0, bit1, bit2 and bit3. | ||
72 | - cp5 is the parity over bit4, bit5, bit6 and bit7. | ||
73 | |||
74 | Note that each of cp0 .. cp5 is exactly one bit. | ||
75 | |||
76 | Row parity actually works almost the same. | ||
77 | |||
78 | - rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254) | ||
79 | - rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255) | ||
80 | - rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ... | ||
81 | (so handle two bytes, then skip 2 bytes). | ||
82 | - rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...) | ||
83 | - for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc. | ||
84 | |||
85 | so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...) | ||
86 | - and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, .. | ||
87 | |||
88 | The story now becomes quite boring. I guess you get the idea. | ||
89 | |||
90 | - rp6 covers 8 bytes then skips 8 etc | ||
91 | - rp7 skips 8 bytes then covers 8 etc | ||
92 | - rp8 covers 16 bytes then skips 16 etc | ||
93 | - rp9 skips 16 bytes then covers 16 etc | ||
94 | - rp10 covers 32 bytes then skips 32 etc | ||
95 | - rp11 skips 32 bytes then covers 32 etc | ||
96 | - rp12 covers 64 bytes then skips 64 etc | ||
97 | - rp13 skips 64 bytes then covers 64 etc | ||
98 | - rp14 covers 128 bytes then skips 128 | ||
99 | - rp15 skips 128 bytes then covers 128 | ||
100 | |||
101 | In the end the parity bits are grouped together in three bytes as | ||
102 | follows: | ||
103 | |||
104 | ===== ===== ===== ===== ===== ===== ===== ===== ===== | ||
105 | ECC Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0 | ||
106 | ===== ===== ===== ===== ===== ===== ===== ===== ===== | ||
107 | ECC 0 rp07 rp06 rp05 rp04 rp03 rp02 rp01 rp00 | ||
108 | ECC 1 rp15 rp14 rp13 rp12 rp11 rp10 rp09 rp08 | ||
109 | ECC 2 cp5 cp4 cp3 cp2 cp1 cp0 1 1 | ||
110 | ===== ===== ===== ===== ===== ===== ===== ===== ===== | ||
111 | |||
112 | I detected after writing this that ST application note AN1823 | ||
113 | (http://www.st.com/stonline/) gives a much | ||
114 | nicer picture.(but they use line parity as term where I use row parity) | ||
115 | Oh well, I'm graphically challenged, so suffer with me for a moment :-) | ||
116 | |||
117 | And I could not reuse the ST picture anyway for copyright reasons. | ||
118 | |||
119 | |||
120 | Attempt 0 | ||
121 | ========= | ||
122 | |||
123 | Implementing the parity calculation is pretty simple. | ||
124 | In C pseudocode:: | ||
125 | |||
126 | for (i = 0; i < 256; i++) | ||
127 | { | ||
128 | if (i & 0x01) | ||
129 | rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1; | ||
130 | else | ||
131 | rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0; | ||
132 | if (i & 0x02) | ||
133 | rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3; | ||
134 | else | ||
135 | rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2; | ||
136 | if (i & 0x04) | ||
137 | rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5; | ||
138 | else | ||
139 | rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4; | ||
140 | if (i & 0x08) | ||
141 | rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7; | ||
142 | else | ||
143 | rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6; | ||
144 | if (i & 0x10) | ||
145 | rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9; | ||
146 | else | ||
147 | rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8; | ||
148 | if (i & 0x20) | ||
149 | rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11; | ||
150 | else | ||
151 | rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10; | ||
152 | if (i & 0x40) | ||
153 | rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13; | ||
154 | else | ||
155 | rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12; | ||
156 | if (i & 0x80) | ||
157 | rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15; | ||
158 | else | ||
159 | rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14; | ||
160 | cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0; | ||
161 | cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1; | ||
162 | cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2; | ||
163 | cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3 | ||
164 | cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4 | ||
165 | cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5 | ||
166 | } | ||
167 | |||
168 | |||
169 | Analysis 0 | ||
170 | ========== | ||
171 | |||
172 | C does have bitwise operators but not really operators to do the above | ||
173 | efficiently (and most hardware has no such instructions either). | ||
174 | Therefore without implementing this it was clear that the code above was | ||
175 | not going to bring me a Nobel prize :-) | ||
176 | |||
177 | Fortunately the exclusive or operation is commutative, so we can combine | ||
178 | the values in any order. So instead of calculating all the bits | ||
179 | individually, let us try to rearrange things. | ||
180 | For the column parity this is easy. We can just xor the bytes and in the | ||
181 | end filter out the relevant bits. This is pretty nice as it will bring | ||
182 | all cp calculation out of the for loop. | ||
183 | |||
184 | Similarly we can first xor the bytes for the various rows. | ||
185 | This leads to: | ||
186 | |||
187 | |||
188 | Attempt 1 | ||
189 | ========= | ||
190 | |||
191 | :: | ||
192 | |||
193 | const char parity[256] = { | ||
194 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
195 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
196 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
197 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
198 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
199 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
200 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
201 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
202 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
203 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
204 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
205 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
206 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, | ||
207 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
208 | 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, | ||
209 | 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 | ||
210 | }; | ||
211 | |||
212 | void ecc1(const unsigned char *buf, unsigned char *code) | ||
213 | { | ||
214 | int i; | ||
215 | const unsigned char *bp = buf; | ||
216 | unsigned char cur; | ||
217 | unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7; | ||
218 | unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15; | ||
219 | unsigned char par; | ||
220 | |||
221 | par = 0; | ||
222 | rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0; | ||
223 | rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0; | ||
224 | rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0; | ||
225 | rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0; | ||
226 | |||
227 | for (i = 0; i < 256; i++) | ||
228 | { | ||
229 | cur = *bp++; | ||
230 | par ^= cur; | ||
231 | if (i & 0x01) rp1 ^= cur; else rp0 ^= cur; | ||
232 | if (i & 0x02) rp3 ^= cur; else rp2 ^= cur; | ||
233 | if (i & 0x04) rp5 ^= cur; else rp4 ^= cur; | ||
234 | if (i & 0x08) rp7 ^= cur; else rp6 ^= cur; | ||
235 | if (i & 0x10) rp9 ^= cur; else rp8 ^= cur; | ||
236 | if (i & 0x20) rp11 ^= cur; else rp10 ^= cur; | ||
237 | if (i & 0x40) rp13 ^= cur; else rp12 ^= cur; | ||
238 | if (i & 0x80) rp15 ^= cur; else rp14 ^= cur; | ||
239 | } | ||
240 | code[0] = | ||
241 | (parity[rp7] << 7) | | ||
242 | (parity[rp6] << 6) | | ||
243 | (parity[rp5] << 5) | | ||
244 | (parity[rp4] << 4) | | ||
245 | (parity[rp3] << 3) | | ||
246 | (parity[rp2] << 2) | | ||
247 | (parity[rp1] << 1) | | ||
248 | (parity[rp0]); | ||
249 | code[1] = | ||
250 | (parity[rp15] << 7) | | ||
251 | (parity[rp14] << 6) | | ||
252 | (parity[rp13] << 5) | | ||
253 | (parity[rp12] << 4) | | ||
254 | (parity[rp11] << 3) | | ||
255 | (parity[rp10] << 2) | | ||
256 | (parity[rp9] << 1) | | ||
257 | (parity[rp8]); | ||
258 | code[2] = | ||
259 | (parity[par & 0xf0] << 7) | | ||
260 | (parity[par & 0x0f] << 6) | | ||
261 | (parity[par & 0xcc] << 5) | | ||
262 | (parity[par & 0x33] << 4) | | ||
263 | (parity[par & 0xaa] << 3) | | ||
264 | (parity[par & 0x55] << 2); | ||
265 | code[0] = ~code[0]; | ||
266 | code[1] = ~code[1]; | ||
267 | code[2] = ~code[2]; | ||
268 | } | ||
269 | |||
270 | Still pretty straightforward. The last three invert statements are there to | ||
271 | give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash | ||
272 | all data is 0xff, so the checksum then matches. | ||
273 | |||
274 | I also introduced the parity lookup. I expected this to be the fastest | ||
275 | way to calculate the parity, but I will investigate alternatives later | ||
276 | on. | ||
277 | |||
278 | |||
279 | Analysis 1 | ||
280 | ========== | ||
281 | |||
282 | The code works, but is not terribly efficient. On my system it took | ||
283 | almost 4 times as much time as the linux driver code. But hey, if it was | ||
284 | *that* easy this would have been done long before. | ||
285 | No pain. no gain. | ||
286 | |||
287 | Fortunately there is plenty of room for improvement. | ||
288 | |||
289 | In step 1 we moved from bit-wise calculation to byte-wise calculation. | ||
290 | However in C we can also use the unsigned long data type and virtually | ||
291 | every modern microprocessor supports 32 bit operations, so why not try | ||
292 | to write our code in such a way that we process data in 32 bit chunks. | ||
293 | |||
294 | Of course this means some modification as the row parity is byte by | ||
295 | byte. A quick analysis: | ||
296 | for the column parity we use the par variable. When extending to 32 bits | ||
297 | we can in the end easily calculate rp0 and rp1 from it. | ||
298 | (because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0 | ||
299 | respectively, from MSB to LSB) | ||
300 | also rp2 and rp3 can be easily retrieved from par as rp3 covers the | ||
301 | first two MSBs and rp2 covers the last two LSBs. | ||
302 | |||
303 | Note that of course now the loop is executed only 64 times (256/4). | ||
304 | And note that care must taken wrt byte ordering. The way bytes are | ||
305 | ordered in a long is machine dependent, and might affect us. | ||
306 | Anyway, if there is an issue: this code is developed on x86 (to be | ||
307 | precise: a DELL PC with a D920 Intel CPU) | ||
308 | |||
309 | And of course the performance might depend on alignment, but I expect | ||
310 | that the I/O buffers in the nand driver are aligned properly (and | ||
311 | otherwise that should be fixed to get maximum performance). | ||
312 | |||
313 | Let's give it a try... | ||
314 | |||
315 | |||
316 | Attempt 2 | ||
317 | ========= | ||
318 | |||
319 | :: | ||
320 | |||
321 | extern const char parity[256]; | ||
322 | |||
323 | void ecc2(const unsigned char *buf, unsigned char *code) | ||
324 | { | ||
325 | int i; | ||
326 | const unsigned long *bp = (unsigned long *)buf; | ||
327 | unsigned long cur; | ||
328 | unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7; | ||
329 | unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15; | ||
330 | unsigned long par; | ||
331 | |||
332 | par = 0; | ||
333 | rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0; | ||
334 | rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0; | ||
335 | rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0; | ||
336 | rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0; | ||
337 | |||
338 | for (i = 0; i < 64; i++) | ||
339 | { | ||
340 | cur = *bp++; | ||
341 | par ^= cur; | ||
342 | if (i & 0x01) rp5 ^= cur; else rp4 ^= cur; | ||
343 | if (i & 0x02) rp7 ^= cur; else rp6 ^= cur; | ||
344 | if (i & 0x04) rp9 ^= cur; else rp8 ^= cur; | ||
345 | if (i & 0x08) rp11 ^= cur; else rp10 ^= cur; | ||
346 | if (i & 0x10) rp13 ^= cur; else rp12 ^= cur; | ||
347 | if (i & 0x20) rp15 ^= cur; else rp14 ^= cur; | ||
348 | } | ||
349 | /* | ||
350 | we need to adapt the code generation for the fact that rp vars are now | ||
351 | long; also the column parity calculation needs to be changed. | ||
352 | we'll bring rp4 to 15 back to single byte entities by shifting and | ||
353 | xoring | ||
354 | */ | ||
355 | rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff; | ||
356 | rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff; | ||
357 | rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff; | ||
358 | rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff; | ||
359 | rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff; | ||
360 | rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff; | ||
361 | rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff; | ||
362 | rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff; | ||
363 | rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff; | ||
364 | rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff; | ||
365 | rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff; | ||
366 | rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff; | ||
367 | rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff; | ||
368 | rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff; | ||
369 | par ^= (par >> 16); | ||
370 | rp1 = (par >> 8); rp1 &= 0xff; | ||
371 | rp0 = (par & 0xff); | ||
372 | par ^= (par >> 8); par &= 0xff; | ||
373 | |||
374 | code[0] = | ||
375 | (parity[rp7] << 7) | | ||
376 | (parity[rp6] << 6) | | ||
377 | (parity[rp5] << 5) | | ||
378 | (parity[rp4] << 4) | | ||
379 | (parity[rp3] << 3) | | ||
380 | (parity[rp2] << 2) | | ||
381 | (parity[rp1] << 1) | | ||
382 | (parity[rp0]); | ||
383 | code[1] = | ||
384 | (parity[rp15] << 7) | | ||
385 | (parity[rp14] << 6) | | ||
386 | (parity[rp13] << 5) | | ||
387 | (parity[rp12] << 4) | | ||
388 | (parity[rp11] << 3) | | ||
389 | (parity[rp10] << 2) | | ||
390 | (parity[rp9] << 1) | | ||
391 | (parity[rp8]); | ||
392 | code[2] = | ||
393 | (parity[par & 0xf0] << 7) | | ||
394 | (parity[par & 0x0f] << 6) | | ||
395 | (parity[par & 0xcc] << 5) | | ||
396 | (parity[par & 0x33] << 4) | | ||
397 | (parity[par & 0xaa] << 3) | | ||
398 | (parity[par & 0x55] << 2); | ||
399 | code[0] = ~code[0]; | ||
400 | code[1] = ~code[1]; | ||
401 | code[2] = ~code[2]; | ||
402 | } | ||
403 | |||
404 | The parity array is not shown any more. Note also that for these | ||
405 | examples I kinda deviated from my regular programming style by allowing | ||
406 | multiple statements on a line, not using { } in then and else blocks | ||
407 | with only a single statement and by using operators like ^= | ||
408 | |||
409 | |||
410 | Analysis 2 | ||
411 | ========== | ||
412 | |||
413 | The code (of course) works, and hurray: we are a little bit faster than | ||
414 | the linux driver code (about 15%). But wait, don't cheer too quickly. | ||
415 | There is more to be gained. | ||
416 | If we look at e.g. rp14 and rp15 we see that we either xor our data with | ||
417 | rp14 or with rp15. However we also have par which goes over all data. | ||
418 | This means there is no need to calculate rp14 as it can be calculated from | ||
419 | rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15; | ||
420 | (or if desired we can avoid calculating rp15 and calculate it from | ||
421 | rp14). That is why some places refer to inverse parity. | ||
422 | Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13. | ||
423 | Effectively this means we can eliminate the else clause from the if | ||
424 | statements. Also we can optimise the calculation in the end a little bit | ||
425 | by going from long to byte first. Actually we can even avoid the table | ||
426 | lookups | ||
427 | |||
428 | Attempt 3 | ||
429 | ========= | ||
430 | |||
431 | Odd replaced:: | ||
432 | |||
433 | if (i & 0x01) rp5 ^= cur; else rp4 ^= cur; | ||
434 | if (i & 0x02) rp7 ^= cur; else rp6 ^= cur; | ||
435 | if (i & 0x04) rp9 ^= cur; else rp8 ^= cur; | ||
436 | if (i & 0x08) rp11 ^= cur; else rp10 ^= cur; | ||
437 | if (i & 0x10) rp13 ^= cur; else rp12 ^= cur; | ||
438 | if (i & 0x20) rp15 ^= cur; else rp14 ^= cur; | ||
439 | |||
440 | with:: | ||
441 | |||
442 | if (i & 0x01) rp5 ^= cur; | ||
443 | if (i & 0x02) rp7 ^= cur; | ||
444 | if (i & 0x04) rp9 ^= cur; | ||
445 | if (i & 0x08) rp11 ^= cur; | ||
446 | if (i & 0x10) rp13 ^= cur; | ||
447 | if (i & 0x20) rp15 ^= cur; | ||
448 | |||
449 | and outside the loop added:: | ||
450 | |||
451 | rp4 = par ^ rp5; | ||
452 | rp6 = par ^ rp7; | ||
453 | rp8 = par ^ rp9; | ||
454 | rp10 = par ^ rp11; | ||
455 | rp12 = par ^ rp13; | ||
456 | rp14 = par ^ rp15; | ||
457 | |||
458 | And after that the code takes about 30% more time, although the number of | ||
459 | statements is reduced. This is also reflected in the assembly code. | ||
460 | |||
461 | |||
462 | Analysis 3 | ||
463 | ========== | ||
464 | |||
465 | Very weird. Guess it has to do with caching or instruction parallellism | ||
466 | or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting | ||
467 | observation was that this one is only 30% slower (according to time) | ||
468 | executing the code as my 3Ghz D920 processor. | ||
469 | |||
470 | Well, it was expected not to be easy so maybe instead move to a | ||
471 | different track: let's move back to the code from attempt2 and do some | ||
472 | loop unrolling. This will eliminate a few if statements. I'll try | ||
473 | different amounts of unrolling to see what works best. | ||
474 | |||
475 | |||
476 | Attempt 4 | ||
477 | ========= | ||
478 | |||
479 | Unrolled the loop 1, 2, 3 and 4 times. | ||
480 | For 4 the code starts with:: | ||
481 | |||
482 | for (i = 0; i < 4; i++) | ||
483 | { | ||
484 | cur = *bp++; | ||
485 | par ^= cur; | ||
486 | rp4 ^= cur; | ||
487 | rp6 ^= cur; | ||
488 | rp8 ^= cur; | ||
489 | rp10 ^= cur; | ||
490 | if (i & 0x1) rp13 ^= cur; else rp12 ^= cur; | ||
491 | if (i & 0x2) rp15 ^= cur; else rp14 ^= cur; | ||
492 | cur = *bp++; | ||
493 | par ^= cur; | ||
494 | rp5 ^= cur; | ||
495 | rp6 ^= cur; | ||
496 | ... | ||
497 | |||
498 | |||
499 | Analysis 4 | ||
500 | ========== | ||
501 | |||
502 | Unrolling once gains about 15% | ||
503 | |||
504 | Unrolling twice keeps the gain at about 15% | ||
505 | |||
506 | Unrolling three times gives a gain of 30% compared to attempt 2. | ||
507 | |||
508 | Unrolling four times gives a marginal improvement compared to unrolling | ||
509 | three times. | ||
510 | |||
511 | I decided to proceed with a four time unrolled loop anyway. It was my gut | ||
512 | feeling that in the next steps I would obtain additional gain from it. | ||
513 | |||
514 | The next step was triggered by the fact that par contains the xor of all | ||
515 | bytes and rp4 and rp5 each contain the xor of half of the bytes. | ||
516 | So in effect par = rp4 ^ rp5. But as xor is commutative we can also say | ||
517 | that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can | ||
518 | eliminate rp5 (or rp4, but I already foresaw another optimisation). | ||
519 | The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15. | ||
520 | |||
521 | |||
522 | Attempt 5 | ||
523 | ========= | ||
524 | |||
525 | Effectively so all odd digit rp assignments in the loop were removed. | ||
526 | This included the else clause of the if statements. | ||
527 | Of course after the loop we need to correct things by adding code like:: | ||
528 | |||
529 | rp5 = par ^ rp4; | ||
530 | |||
531 | Also the initial assignments (rp5 = 0; etc) could be removed. | ||
532 | Along the line I also removed the initialisation of rp0/1/2/3. | ||
533 | |||
534 | |||
535 | Analysis 5 | ||
536 | ========== | ||
537 | |||
538 | Measurements showed this was a good move. The run-time roughly halved | ||
539 | compared with attempt 4 with 4 times unrolled, and we only require 1/3rd | ||
540 | of the processor time compared to the current code in the linux kernel. | ||
541 | |||
542 | However, still I thought there was more. I didn't like all the if | ||
543 | statements. Why not keep a running parity and only keep the last if | ||
544 | statement. Time for yet another version! | ||
545 | |||
546 | |||
547 | Attempt 6 | ||
548 | ========= | ||
549 | |||
550 | THe code within the for loop was changed to:: | ||
551 | |||
552 | for (i = 0; i < 4; i++) | ||
553 | { | ||
554 | cur = *bp++; tmppar = cur; rp4 ^= cur; | ||
555 | cur = *bp++; tmppar ^= cur; rp6 ^= tmppar; | ||
556 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; | ||
557 | cur = *bp++; tmppar ^= cur; rp8 ^= tmppar; | ||
558 | |||
559 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; | ||
560 | cur = *bp++; tmppar ^= cur; rp6 ^= cur; | ||
561 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; | ||
562 | cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; | ||
563 | |||
564 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur; | ||
565 | cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur; | ||
566 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur; | ||
567 | cur = *bp++; tmppar ^= cur; rp8 ^= cur; | ||
568 | |||
569 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; | ||
570 | cur = *bp++; tmppar ^= cur; rp6 ^= cur; | ||
571 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; | ||
572 | cur = *bp++; tmppar ^= cur; | ||
573 | |||
574 | par ^= tmppar; | ||
575 | if ((i & 0x1) == 0) rp12 ^= tmppar; | ||
576 | if ((i & 0x2) == 0) rp14 ^= tmppar; | ||
577 | } | ||
578 | |||
579 | As you can see tmppar is used to accumulate the parity within a for | ||
580 | iteration. In the last 3 statements is added to par and, if needed, | ||
581 | to rp12 and rp14. | ||
582 | |||
583 | While making the changes I also found that I could exploit that tmppar | ||
584 | contains the running parity for this iteration. So instead of having: | ||
585 | rp4 ^= cur; rp6 ^= cur; | ||
586 | I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next | ||
587 | statement. A similar change was done for rp8 and rp10 | ||
588 | |||
589 | |||
590 | Analysis 6 | ||
591 | ========== | ||
592 | |||
593 | Measuring this code again showed big gain. When executing the original | ||
594 | linux code 1 million times, this took about 1 second on my system. | ||
595 | (using time to measure the performance). After this iteration I was back | ||
596 | to 0.075 sec. Actually I had to decide to start measuring over 10 | ||
597 | million iterations in order not to lose too much accuracy. This one | ||
598 | definitely seemed to be the jackpot! | ||
599 | |||
600 | There is a little bit more room for improvement though. There are three | ||
601 | places with statements:: | ||
602 | |||
603 | rp4 ^= cur; rp6 ^= cur; | ||
604 | |||
605 | It seems more efficient to also maintain a variable rp4_6 in the while | ||
606 | loop; This eliminates 3 statements per loop. Of course after the loop we | ||
607 | need to correct by adding:: | ||
608 | |||
609 | rp4 ^= rp4_6; | ||
610 | rp6 ^= rp4_6 | ||
611 | |||
612 | Furthermore there are 4 sequential assignments to rp8. This can be | ||
613 | encoded slightly more efficiently by saving tmppar before those 4 lines | ||
614 | and later do rp8 = rp8 ^ tmppar ^ notrp8; | ||
615 | (where notrp8 is the value of rp8 before those 4 lines). | ||
616 | Again a use of the commutative property of xor. | ||
617 | Time for a new test! | ||
618 | |||
619 | |||
620 | Attempt 7 | ||
621 | ========= | ||
622 | |||
623 | The new code now looks like:: | ||
624 | |||
625 | for (i = 0; i < 4; i++) | ||
626 | { | ||
627 | cur = *bp++; tmppar = cur; rp4 ^= cur; | ||
628 | cur = *bp++; tmppar ^= cur; rp6 ^= tmppar; | ||
629 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; | ||
630 | cur = *bp++; tmppar ^= cur; rp8 ^= tmppar; | ||
631 | |||
632 | cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; | ||
633 | cur = *bp++; tmppar ^= cur; rp6 ^= cur; | ||
634 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; | ||
635 | cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; | ||
636 | |||
637 | notrp8 = tmppar; | ||
638 | cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; | ||
639 | cur = *bp++; tmppar ^= cur; rp6 ^= cur; | ||
640 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; | ||
641 | cur = *bp++; tmppar ^= cur; | ||
642 | rp8 = rp8 ^ tmppar ^ notrp8; | ||
643 | |||
644 | cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; | ||
645 | cur = *bp++; tmppar ^= cur; rp6 ^= cur; | ||
646 | cur = *bp++; tmppar ^= cur; rp4 ^= cur; | ||
647 | cur = *bp++; tmppar ^= cur; | ||
648 | |||
649 | par ^= tmppar; | ||
650 | if ((i & 0x1) == 0) rp12 ^= tmppar; | ||
651 | if ((i & 0x2) == 0) rp14 ^= tmppar; | ||
652 | } | ||
653 | rp4 ^= rp4_6; | ||
654 | rp6 ^= rp4_6; | ||
655 | |||
656 | |||
657 | Not a big change, but every penny counts :-) | ||
658 | |||
659 | |||
660 | Analysis 7 | ||
661 | ========== | ||
662 | |||
663 | Actually this made things worse. Not very much, but I don't want to move | ||
664 | into the wrong direction. Maybe something to investigate later. Could | ||
665 | have to do with caching again. | ||
666 | |||
667 | Guess that is what there is to win within the loop. Maybe unrolling one | ||
668 | more time will help. I'll keep the optimisations from 7 for now. | ||
669 | |||
670 | |||
671 | Attempt 8 | ||
672 | ========= | ||
673 | |||
674 | Unrolled the loop one more time. | ||
675 | |||
676 | |||
677 | Analysis 8 | ||
678 | ========== | ||
679 | |||
680 | This makes things worse. Let's stick with attempt 6 and continue from there. | ||
681 | Although it seems that the code within the loop cannot be optimised | ||
682 | further there is still room to optimize the generation of the ecc codes. | ||
683 | We can simply calculate the total parity. If this is 0 then rp4 = rp5 | ||
684 | etc. If the parity is 1, then rp4 = !rp5; | ||
685 | |||
686 | But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits | ||
687 | in the result byte and then do something like:: | ||
688 | |||
689 | code[0] |= (code[0] << 1); | ||
690 | |||
691 | Lets test this. | ||
692 | |||
693 | |||
694 | Attempt 9 | ||
695 | ========= | ||
696 | |||
697 | Changed the code but again this slightly degrades performance. Tried all | ||
698 | kind of other things, like having dedicated parity arrays to avoid the | ||
699 | shift after parity[rp7] << 7; No gain. | ||
700 | Change the lookup using the parity array by using shift operators (e.g. | ||
701 | replace parity[rp7] << 7 with:: | ||
702 | |||
703 | rp7 ^= (rp7 << 4); | ||
704 | rp7 ^= (rp7 << 2); | ||
705 | rp7 ^= (rp7 << 1); | ||
706 | rp7 &= 0x80; | ||
707 | |||
708 | No gain. | ||
709 | |||
710 | The only marginal change was inverting the parity bits, so we can remove | ||
711 | the last three invert statements. | ||
712 | |||
713 | Ah well, pity this does not deliver more. Then again 10 million | ||
714 | iterations using the linux driver code takes between 13 and 13.5 | ||
715 | seconds, whereas my code now takes about 0.73 seconds for those 10 | ||
716 | million iterations. So basically I've improved the performance by a | ||
717 | factor 18 on my system. Not that bad. Of course on different hardware | ||
718 | you will get different results. No warranties! | ||
719 | |||
720 | But of course there is no such thing as a free lunch. The codesize almost | ||
721 | tripled (from 562 bytes to 1434 bytes). Then again, it is not that much. | ||
722 | |||
723 | |||
724 | Correcting errors | ||
725 | ================= | ||
726 | |||
727 | For correcting errors I again used the ST application note as a starter, | ||
728 | but I also peeked at the existing code. | ||
729 | |||
730 | The algorithm itself is pretty straightforward. Just xor the given and | ||
731 | the calculated ecc. If all bytes are 0 there is no problem. If 11 bits | ||
732 | are 1 we have one correctable bit error. If there is 1 bit 1, we have an | ||
733 | error in the given ecc code. | ||
734 | |||
735 | It proved to be fastest to do some table lookups. Performance gain | ||
736 | introduced by this is about a factor 2 on my system when a repair had to | ||
737 | be done, and 1% or so if no repair had to be done. | ||
738 | |||
739 | Code size increased from 330 bytes to 686 bytes for this function. | ||
740 | (gcc 4.2, -O3) | ||
741 | |||
742 | |||
743 | Conclusion | ||
744 | ========== | ||
745 | |||
746 | The gain when calculating the ecc is tremendous. Om my development hardware | ||
747 | a speedup of a factor of 18 for ecc calculation was achieved. On a test on an | ||
748 | embedded system with a MIPS core a factor 7 was obtained. | ||
749 | |||
750 | On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor | ||
751 | 5 (big endian mode, gcc 4.1.2, -O3) | ||
752 | |||
753 | For correction not much gain could be obtained (as bitflips are rare). Then | ||
754 | again there are also much less cycles spent there. | ||
755 | |||
756 | It seems there is not much more gain possible in this, at least when | ||
757 | programmed in C. Of course it might be possible to squeeze something more | ||
758 | out of it with an assembler program, but due to pipeline behaviour etc | ||
759 | this is very tricky (at least for intel hw). | ||
760 | |||
761 | Author: Frans Meulenbroeks | ||
762 | |||
763 | Copyright (C) 2008 Koninklijke Philips Electronics NV. | ||
diff --git a/Documentation/driver-api/mtd/spi-nor.rst b/Documentation/driver-api/mtd/spi-nor.rst new file mode 100644 index 000000000000..f5333e3bf486 --- /dev/null +++ b/Documentation/driver-api/mtd/spi-nor.rst | |||
@@ -0,0 +1,66 @@ | |||
1 | ================= | ||
2 | SPI NOR framework | ||
3 | ================= | ||
4 | |||
5 | Part I - Why do we need this framework? | ||
6 | --------------------------------------- | ||
7 | |||
8 | SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus | ||
9 | controller operates agnostic of the specific device attached. However, some | ||
10 | controllers (such as Freescale's QuadSPI controller) cannot easily handle | ||
11 | arbitrary streams of bytes, but rather are designed specifically for SPI NOR. | ||
12 | |||
13 | In particular, Freescale's QuadSPI controller must know the NOR commands to | ||
14 | find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of | ||
15 | opcodes, addresses, or data payloads; a SPI controller simply knows to send or | ||
16 | receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under | ||
17 | which the controller driver is aware of the opcodes, addressing, and other | ||
18 | details of the SPI NOR protocol. | ||
19 | |||
20 | Part II - How does the framework work? | ||
21 | -------------------------------------- | ||
22 | |||
23 | This framework just adds a new layer between the MTD and the SPI bus driver. | ||
24 | With this new layer, the SPI NOR controller driver does not depend on the | ||
25 | m25p80 code anymore. | ||
26 | |||
27 | Before this framework, the layer is like:: | ||
28 | |||
29 | MTD | ||
30 | ------------------------ | ||
31 | m25p80 | ||
32 | ------------------------ | ||
33 | SPI bus driver | ||
34 | ------------------------ | ||
35 | SPI NOR chip | ||
36 | |||
37 | After this framework, the layer is like: | ||
38 | MTD | ||
39 | ------------------------ | ||
40 | SPI NOR framework | ||
41 | ------------------------ | ||
42 | m25p80 | ||
43 | ------------------------ | ||
44 | SPI bus driver | ||
45 | ------------------------ | ||
46 | SPI NOR chip | ||
47 | |||
48 | With the SPI NOR controller driver (Freescale QuadSPI), it looks like: | ||
49 | MTD | ||
50 | ------------------------ | ||
51 | SPI NOR framework | ||
52 | ------------------------ | ||
53 | fsl-quadSPI | ||
54 | ------------------------ | ||
55 | SPI NOR chip | ||
56 | |||
57 | Part III - How can drivers use the framework? | ||
58 | --------------------------------------------- | ||
59 | |||
60 | The main API is spi_nor_scan(). Before you call the hook, a driver should | ||
61 | initialize the necessary fields for spi_nor{}. Please see | ||
62 | drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c | ||
63 | when you want to write a new driver for a SPI NOR controller. | ||
64 | Another API is spi_nor_restore(), this is used to restore the status of SPI | ||
65 | flash chip such as addressing mode. Call it whenever detach the driver from | ||
66 | device or reboot the system. | ||
diff --git a/Documentation/driver-api/nfc/index.rst b/Documentation/driver-api/nfc/index.rst new file mode 100644 index 000000000000..b6e9eedbff29 --- /dev/null +++ b/Documentation/driver-api/nfc/index.rst | |||
@@ -0,0 +1,11 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ======================== | ||
4 | Near Field Communication | ||
5 | ======================== | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | nfc-hci | ||
11 | nfc-pn544 | ||
diff --git a/Documentation/driver-api/nfc/nfc-hci.rst b/Documentation/driver-api/nfc/nfc-hci.rst new file mode 100644 index 000000000000..eb8a1a14e919 --- /dev/null +++ b/Documentation/driver-api/nfc/nfc-hci.rst | |||
@@ -0,0 +1,311 @@ | |||
1 | ======================== | ||
2 | HCI backend for NFC Core | ||
3 | ======================== | ||
4 | |||
5 | - Author: Eric Lapuyade, Samuel Ortiz | ||
6 | - Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com | ||
7 | |||
8 | General | ||
9 | ------- | ||
10 | |||
11 | The HCI layer implements much of the ETSI TS 102 622 V10.2.0 specification. It | ||
12 | enables easy writing of HCI-based NFC drivers. The HCI layer runs as an NFC Core | ||
13 | backend, implementing an abstract nfc device and translating NFC Core API | ||
14 | to HCI commands and events. | ||
15 | |||
16 | HCI | ||
17 | --- | ||
18 | |||
19 | HCI registers as an nfc device with NFC Core. Requests coming from userspace are | ||
20 | routed through netlink sockets to NFC Core and then to HCI. From this point, | ||
21 | they are translated in a sequence of HCI commands sent to the HCI layer in the | ||
22 | host controller (the chip). Commands can be executed synchronously (the sending | ||
23 | context blocks waiting for response) or asynchronously (the response is returned | ||
24 | from HCI Rx context). | ||
25 | HCI events can also be received from the host controller. They will be handled | ||
26 | and a translation will be forwarded to NFC Core as needed. There are hooks to | ||
27 | let the HCI driver handle proprietary events or override standard behavior. | ||
28 | HCI uses 2 execution contexts: | ||
29 | |||
30 | - one for executing commands : nfc_hci_msg_tx_work(). Only one command | ||
31 | can be executing at any given moment. | ||
32 | - one for dispatching received events and commands : nfc_hci_msg_rx_work(). | ||
33 | |||
34 | HCI Session initialization | ||
35 | -------------------------- | ||
36 | |||
37 | The Session initialization is an HCI standard which must unfortunately | ||
38 | support proprietary gates. This is the reason why the driver will pass a list | ||
39 | of proprietary gates that must be part of the session. HCI will ensure all | ||
40 | those gates have pipes connected when the hci device is set up. | ||
41 | In case the chip supports pre-opened gates and pseudo-static pipes, the driver | ||
42 | can pass that information to HCI core. | ||
43 | |||
44 | HCI Gates and Pipes | ||
45 | ------------------- | ||
46 | |||
47 | A gate defines the 'port' where some service can be found. In order to access | ||
48 | a service, one must create a pipe to that gate and open it. In this | ||
49 | implementation, pipes are totally hidden. The public API only knows gates. | ||
50 | This is consistent with the driver need to send commands to proprietary gates | ||
51 | without knowing the pipe connected to it. | ||
52 | |||
53 | Driver interface | ||
54 | ---------------- | ||
55 | |||
56 | A driver is generally written in two parts : the physical link management and | ||
57 | the HCI management. This makes it easier to maintain a driver for a chip that | ||
58 | can be connected using various phy (i2c, spi, ...) | ||
59 | |||
60 | HCI Management | ||
61 | -------------- | ||
62 | |||
63 | A driver would normally register itself with HCI and provide the following | ||
64 | entry points:: | ||
65 | |||
66 | struct nfc_hci_ops { | ||
67 | int (*open)(struct nfc_hci_dev *hdev); | ||
68 | void (*close)(struct nfc_hci_dev *hdev); | ||
69 | int (*hci_ready) (struct nfc_hci_dev *hdev); | ||
70 | int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb); | ||
71 | int (*start_poll) (struct nfc_hci_dev *hdev, | ||
72 | u32 im_protocols, u32 tm_protocols); | ||
73 | int (*dep_link_up)(struct nfc_hci_dev *hdev, struct nfc_target *target, | ||
74 | u8 comm_mode, u8 *gb, size_t gb_len); | ||
75 | int (*dep_link_down)(struct nfc_hci_dev *hdev); | ||
76 | int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate, | ||
77 | struct nfc_target *target); | ||
78 | int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate, | ||
79 | struct nfc_target *target); | ||
80 | int (*im_transceive) (struct nfc_hci_dev *hdev, | ||
81 | struct nfc_target *target, struct sk_buff *skb, | ||
82 | data_exchange_cb_t cb, void *cb_context); | ||
83 | int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb); | ||
84 | int (*check_presence)(struct nfc_hci_dev *hdev, | ||
85 | struct nfc_target *target); | ||
86 | int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event, | ||
87 | struct sk_buff *skb); | ||
88 | }; | ||
89 | |||
90 | - open() and close() shall turn the hardware on and off. | ||
91 | - hci_ready() is an optional entry point that is called right after the hci | ||
92 | session has been set up. The driver can use it to do additional initialization | ||
93 | that must be performed using HCI commands. | ||
94 | - xmit() shall simply write a frame to the physical link. | ||
95 | - start_poll() is an optional entrypoint that shall set the hardware in polling | ||
96 | mode. This must be implemented only if the hardware uses proprietary gates or a | ||
97 | mechanism slightly different from the HCI standard. | ||
98 | - dep_link_up() is called after a p2p target has been detected, to finish | ||
99 | the p2p connection setup with hardware parameters that need to be passed back | ||
100 | to nfc core. | ||
101 | - dep_link_down() is called to bring the p2p link down. | ||
102 | - target_from_gate() is an optional entrypoint to return the nfc protocols | ||
103 | corresponding to a proprietary gate. | ||
104 | - complete_target_discovered() is an optional entry point to let the driver | ||
105 | perform additional proprietary processing necessary to auto activate the | ||
106 | discovered target. | ||
107 | - im_transceive() must be implemented by the driver if proprietary HCI commands | ||
108 | are required to send data to the tag. Some tag types will require custom | ||
109 | commands, others can be written to using the standard HCI commands. The driver | ||
110 | can check the tag type and either do proprietary processing, or return 1 to ask | ||
111 | for standard processing. The data exchange command itself must be sent | ||
112 | asynchronously. | ||
113 | - tm_send() is called to send data in the case of a p2p connection | ||
114 | - check_presence() is an optional entry point that will be called regularly | ||
115 | by the core to check that an activated tag is still in the field. If this is | ||
116 | not implemented, the core will not be able to push tag_lost events to the user | ||
117 | space | ||
118 | - event_received() is called to handle an event coming from the chip. Driver | ||
119 | can handle the event or return 1 to let HCI attempt standard processing. | ||
120 | |||
121 | On the rx path, the driver is responsible to push incoming HCP frames to HCI | ||
122 | using nfc_hci_recv_frame(). HCI will take care of re-aggregation and handling | ||
123 | This must be done from a context that can sleep. | ||
124 | |||
125 | PHY Management | ||
126 | -------------- | ||
127 | |||
128 | The physical link (i2c, ...) management is defined by the following structure:: | ||
129 | |||
130 | struct nfc_phy_ops { | ||
131 | int (*write)(void *dev_id, struct sk_buff *skb); | ||
132 | int (*enable)(void *dev_id); | ||
133 | void (*disable)(void *dev_id); | ||
134 | }; | ||
135 | |||
136 | enable(): | ||
137 | turn the phy on (power on), make it ready to transfer data | ||
138 | disable(): | ||
139 | turn the phy off | ||
140 | write(): | ||
141 | Send a data frame to the chip. Note that to enable higher | ||
142 | layers such as an llc to store the frame for re-emission, this | ||
143 | function must not alter the skb. It must also not return a positive | ||
144 | result (return 0 for success, negative for failure). | ||
145 | |||
146 | Data coming from the chip shall be sent directly to nfc_hci_recv_frame(). | ||
147 | |||
148 | LLC | ||
149 | --- | ||
150 | |||
151 | Communication between the CPU and the chip often requires some link layer | ||
152 | protocol. Those are isolated as modules managed by the HCI layer. There are | ||
153 | currently two modules : nop (raw transfert) and shdlc. | ||
154 | A new llc must implement the following functions:: | ||
155 | |||
156 | struct nfc_llc_ops { | ||
157 | void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, | ||
158 | rcv_to_hci_t rcv_to_hci, int tx_headroom, | ||
159 | int tx_tailroom, int *rx_headroom, int *rx_tailroom, | ||
160 | llc_failure_t llc_failure); | ||
161 | void (*deinit) (struct nfc_llc *llc); | ||
162 | int (*start) (struct nfc_llc *llc); | ||
163 | int (*stop) (struct nfc_llc *llc); | ||
164 | void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb); | ||
165 | int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb); | ||
166 | }; | ||
167 | |||
168 | init(): | ||
169 | allocate and init your private storage | ||
170 | deinit(): | ||
171 | cleanup | ||
172 | start(): | ||
173 | establish the logical connection | ||
174 | stop (): | ||
175 | terminate the logical connection | ||
176 | rcv_from_drv(): | ||
177 | handle data coming from the chip, going to HCI | ||
178 | xmit_from_hci(): | ||
179 | handle data sent by HCI, going to the chip | ||
180 | |||
181 | The llc must be registered with nfc before it can be used. Do that by | ||
182 | calling:: | ||
183 | |||
184 | nfc_llc_register(const char *name, struct nfc_llc_ops *ops); | ||
185 | |||
186 | Again, note that the llc does not handle the physical link. It is thus very | ||
187 | easy to mix any physical link with any llc for a given chip driver. | ||
188 | |||
189 | Included Drivers | ||
190 | ---------------- | ||
191 | |||
192 | An HCI based driver for an NXP PN544, connected through I2C bus, and using | ||
193 | shdlc is included. | ||
194 | |||
195 | Execution Contexts | ||
196 | ------------------ | ||
197 | |||
198 | The execution contexts are the following: | ||
199 | - IRQ handler (IRQH): | ||
200 | fast, cannot sleep. sends incoming frames to HCI where they are passed to | ||
201 | the current llc. In case of shdlc, the frame is queued in shdlc rx queue. | ||
202 | |||
203 | - SHDLC State Machine worker (SMW) | ||
204 | |||
205 | Only when llc_shdlc is used: handles shdlc rx & tx queues. | ||
206 | |||
207 | Dispatches HCI cmd responses. | ||
208 | |||
209 | - HCI Tx Cmd worker (MSGTXWQ) | ||
210 | |||
211 | Serializes execution of HCI commands. | ||
212 | |||
213 | Completes execution in case of response timeout. | ||
214 | |||
215 | - HCI Rx worker (MSGRXWQ) | ||
216 | |||
217 | Dispatches incoming HCI commands or events. | ||
218 | |||
219 | - Syscall context from a userspace call (SYSCALL) | ||
220 | |||
221 | Any entrypoint in HCI called from NFC Core | ||
222 | |||
223 | Workflow executing an HCI command (using shdlc) | ||
224 | ----------------------------------------------- | ||
225 | |||
226 | Executing an HCI command can easily be performed synchronously using the | ||
227 | following API:: | ||
228 | |||
229 | int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd, | ||
230 | const u8 *param, size_t param_len, struct sk_buff **skb) | ||
231 | |||
232 | The API must be invoked from a context that can sleep. Most of the time, this | ||
233 | will be the syscall context. skb will return the result that was received in | ||
234 | the response. | ||
235 | |||
236 | Internally, execution is asynchronous. So all this API does is to enqueue the | ||
237 | HCI command, setup a local wait queue on stack, and wait_event() for completion. | ||
238 | The wait is not interruptible because it is guaranteed that the command will | ||
239 | complete after some short timeout anyway. | ||
240 | |||
241 | MSGTXWQ context will then be scheduled and invoke nfc_hci_msg_tx_work(). | ||
242 | This function will dequeue the next pending command and send its HCP fragments | ||
243 | to the lower layer which happens to be shdlc. It will then start a timer to be | ||
244 | able to complete the command with a timeout error if no response arrive. | ||
245 | |||
246 | SMW context gets scheduled and invokes nfc_shdlc_sm_work(). This function | ||
247 | handles shdlc framing in and out. It uses the driver xmit to send frames and | ||
248 | receives incoming frames in an skb queue filled from the driver IRQ handler. | ||
249 | SHDLC I(nformation) frames payload are HCP fragments. They are aggregated to | ||
250 | form complete HCI frames, which can be a response, command, or event. | ||
251 | |||
252 | HCI Responses are dispatched immediately from this context to unblock | ||
253 | waiting command execution. Response processing involves invoking the completion | ||
254 | callback that was provided by nfc_hci_msg_tx_work() when it sent the command. | ||
255 | The completion callback will then wake the syscall context. | ||
256 | |||
257 | It is also possible to execute the command asynchronously using this API:: | ||
258 | |||
259 | static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, | ||
260 | const u8 *param, size_t param_len, | ||
261 | data_exchange_cb_t cb, void *cb_context) | ||
262 | |||
263 | The workflow is the same, except that the API call returns immediately, and | ||
264 | the callback will be called with the result from the SMW context. | ||
265 | |||
266 | Workflow receiving an HCI event or command | ||
267 | ------------------------------------------ | ||
268 | |||
269 | HCI commands or events are not dispatched from SMW context. Instead, they are | ||
270 | queued to HCI rx_queue and will be dispatched from HCI rx worker | ||
271 | context (MSGRXWQ). This is done this way to allow a cmd or event handler | ||
272 | to also execute other commands (for example, handling the | ||
273 | NFC_HCI_EVT_TARGET_DISCOVERED event from PN544 requires to issue an | ||
274 | ANY_GET_PARAMETER to the reader A gate to get information on the target | ||
275 | that was discovered). | ||
276 | |||
277 | Typically, such an event will be propagated to NFC Core from MSGRXWQ context. | ||
278 | |||
279 | Error management | ||
280 | ---------------- | ||
281 | |||
282 | Errors that occur synchronously with the execution of an NFC Core request are | ||
283 | simply returned as the execution result of the request. These are easy. | ||
284 | |||
285 | Errors that occur asynchronously (e.g. in a background protocol handling thread) | ||
286 | must be reported such that upper layers don't stay ignorant that something | ||
287 | went wrong below and know that expected events will probably never happen. | ||
288 | Handling of these errors is done as follows: | ||
289 | |||
290 | - driver (pn544) fails to deliver an incoming frame: it stores the error such | ||
291 | that any subsequent call to the driver will result in this error. Then it | ||
292 | calls the standard nfc_shdlc_recv_frame() with a NULL argument to report the | ||
293 | problem above. shdlc stores a EREMOTEIO sticky status, which will trigger | ||
294 | SMW to report above in turn. | ||
295 | |||
296 | - SMW is basically a background thread to handle incoming and outgoing shdlc | ||
297 | frames. This thread will also check the shdlc sticky status and report to HCI | ||
298 | when it discovers it is not able to run anymore because of an unrecoverable | ||
299 | error that happened within shdlc or below. If the problem occurs during shdlc | ||
300 | connection, the error is reported through the connect completion. | ||
301 | |||
302 | - HCI: if an internal HCI error happens (frame is lost), or HCI is reported an | ||
303 | error from a lower layer, HCI will either complete the currently executing | ||
304 | command with that error, or notify NFC Core directly if no command is | ||
305 | executing. | ||
306 | |||
307 | - NFC Core: when NFC Core is notified of an error from below and polling is | ||
308 | active, it will send a tag discovered event with an empty tag list to the user | ||
309 | space to let it know that the poll operation will never be able to detect a | ||
310 | tag. If polling is not active and the error was sticky, lower levels will | ||
311 | return it at next invocation. | ||
diff --git a/Documentation/driver-api/nfc/nfc-pn544.rst b/Documentation/driver-api/nfc/nfc-pn544.rst new file mode 100644 index 000000000000..6b2d8aae0c4e --- /dev/null +++ b/Documentation/driver-api/nfc/nfc-pn544.rst | |||
@@ -0,0 +1,34 @@ | |||
1 | ============================================================================ | ||
2 | Kernel driver for the NXP Semiconductors PN544 Near Field Communication chip | ||
3 | ============================================================================ | ||
4 | |||
5 | |||
6 | General | ||
7 | ------- | ||
8 | |||
9 | The PN544 is an integrated transmission module for contactless | ||
10 | communication. The driver goes under drives/nfc/ and is compiled as a | ||
11 | module named "pn544". | ||
12 | |||
13 | Host Interfaces: I2C, SPI and HSU, this driver supports currently only I2C. | ||
14 | |||
15 | Protocols | ||
16 | --------- | ||
17 | |||
18 | In the normal (HCI) mode and in the firmware update mode read and | ||
19 | write functions behave a bit differently because the message formats | ||
20 | or the protocols are different. | ||
21 | |||
22 | In the normal (HCI) mode the protocol used is derived from the ETSI | ||
23 | HCI specification. The firmware is updated using a specific protocol, | ||
24 | which is different from HCI. | ||
25 | |||
26 | HCI messages consist of an eight bit header and the message body. The | ||
27 | header contains the message length. Maximum size for an HCI message is | ||
28 | 33. In HCI mode sent messages are tested for a correct | ||
29 | checksum. Firmware update messages have the length in the second (MSB) | ||
30 | and third (LSB) bytes of the message. The maximum FW message length is | ||
31 | 1024 bytes. | ||
32 | |||
33 | For the ETSI HCI specification see | ||
34 | http://www.etsi.org/WebSite/Technologies/ProtocolSpecification.aspx | ||
diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst new file mode 100644 index 000000000000..074a423c853c --- /dev/null +++ b/Documentation/driver-api/ntb.rst | |||
@@ -0,0 +1,236 @@ | |||
1 | =========== | ||
2 | NTB Drivers | ||
3 | =========== | ||
4 | |||
5 | NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects | ||
6 | the separate memory systems of two or more computers to the same PCI-Express | ||
7 | fabric. Existing NTB hardware supports a common feature set: doorbell | ||
8 | registers and memory translation windows, as well as non common features like | ||
9 | scratchpad and message registers. Scratchpad registers are read-and-writable | ||
10 | registers that are accessible from either side of the device, so that peers can | ||
11 | exchange a small amount of information at a fixed address. Message registers can | ||
12 | be utilized for the same purpose. Additionally they are provided with with | ||
13 | special status bits to make sure the information isn't rewritten by another | ||
14 | peer. Doorbell registers provide a way for peers to send interrupt events. | ||
15 | Memory windows allow translated read and write access to the peer memory. | ||
16 | |||
17 | NTB Core Driver (ntb) | ||
18 | ===================== | ||
19 | |||
20 | The NTB core driver defines an api wrapping the common feature set, and allows | ||
21 | clients interested in NTB features to discover NTB the devices supported by | ||
22 | hardware drivers. The term "client" is used here to mean an upper layer | ||
23 | component making use of the NTB api. The term "driver," or "hardware driver," | ||
24 | is used here to mean a driver for a specific vendor and model of NTB hardware. | ||
25 | |||
26 | NTB Client Drivers | ||
27 | ================== | ||
28 | |||
29 | NTB client drivers should register with the NTB core driver. After | ||
30 | registering, the client probe and remove functions will be called appropriately | ||
31 | as ntb hardware, or hardware drivers, are inserted and removed. The | ||
32 | registration uses the Linux Device framework, so it should feel familiar to | ||
33 | anyone who has written a pci driver. | ||
34 | |||
35 | NTB Typical client driver implementation | ||
36 | ---------------------------------------- | ||
37 | |||
38 | Primary purpose of NTB is to share some peace of memory between at least two | ||
39 | systems. So the NTB device features like Scratchpad/Message registers are | ||
40 | mainly used to perform the proper memory window initialization. Typically | ||
41 | there are two types of memory window interfaces supported by the NTB API: | ||
42 | inbound translation configured on the local ntb port and outbound translation | ||
43 | configured by the peer, on the peer ntb port. The first type is | ||
44 | depicted on the next figure:: | ||
45 | |||
46 | Inbound translation: | ||
47 | |||
48 | Memory: Local NTB Port: Peer NTB Port: Peer MMIO: | ||
49 | ____________ | ||
50 | | dma-mapped |-ntb_mw_set_trans(addr) | | ||
51 | | memory | _v____________ | ______________ | ||
52 | | (addr) |<======| MW xlat addr |<====| MW base addr |<== memory-mapped IO | ||
53 | |------------| |--------------| | |--------------| | ||
54 | |||
55 | So typical scenario of the first type memory window initialization looks: | ||
56 | 1) allocate a memory region, 2) put translated address to NTB config, | ||
57 | 3) somehow notify a peer device of performed initialization, 4) peer device | ||
58 | maps corresponding outbound memory window so to have access to the shared | ||
59 | memory region. | ||
60 | |||
61 | The second type of interface, that implies the shared windows being | ||
62 | initialized by a peer device, is depicted on the figure:: | ||
63 | |||
64 | Outbound translation: | ||
65 | |||
66 | Memory: Local NTB Port: Peer NTB Port: Peer MMIO: | ||
67 | ____________ ______________ | ||
68 | | dma-mapped | | | MW base addr |<== memory-mapped IO | ||
69 | | memory | | |--------------| | ||
70 | | (addr) |<===================| MW xlat addr |<-ntb_peer_mw_set_trans(addr) | ||
71 | |------------| | |--------------| | ||
72 | |||
73 | Typical scenario of the second type interface initialization would be: | ||
74 | 1) allocate a memory region, 2) somehow deliver a translated address to a peer | ||
75 | device, 3) peer puts the translated address to NTB config, 4) peer device maps | ||
76 | outbound memory window so to have access to the shared memory region. | ||
77 | |||
78 | As one can see the described scenarios can be combined in one portable | ||
79 | algorithm. | ||
80 | |||
81 | Local device: | ||
82 | 1) Allocate memory for a shared window | ||
83 | 2) Initialize memory window by translated address of the allocated region | ||
84 | (it may fail if local memory window initialization is unsupported) | ||
85 | 3) Send the translated address and memory window index to a peer device | ||
86 | |||
87 | Peer device: | ||
88 | 1) Initialize memory window with retrieved address of the allocated | ||
89 | by another device memory region (it may fail if peer memory window | ||
90 | initialization is unsupported) | ||
91 | 2) Map outbound memory window | ||
92 | |||
93 | In accordance with this scenario, the NTB Memory Window API can be used as | ||
94 | follows: | ||
95 | |||
96 | Local device: | ||
97 | 1) ntb_mw_count(pidx) - retrieve number of memory ranges, which can | ||
98 | be allocated for memory windows between local device and peer device | ||
99 | of port with specified index. | ||
100 | 2) ntb_get_align(pidx, midx) - retrieve parameters restricting the | ||
101 | shared memory region alignment and size. Then memory can be properly | ||
102 | allocated. | ||
103 | 3) Allocate physically contiguous memory region in compliance with | ||
104 | restrictions retrieved in 2). | ||
105 | 4) ntb_mw_set_trans(pidx, midx) - try to set translation address of | ||
106 | the memory window with specified index for the defined peer device | ||
107 | (it may fail if local translated address setting is not supported) | ||
108 | 5) Send translated base address (usually together with memory window | ||
109 | number) to the peer device using, for instance, scratchpad or message | ||
110 | registers. | ||
111 | |||
112 | Peer device: | ||
113 | 1) ntb_peer_mw_set_trans(pidx, midx) - try to set received from other | ||
114 | device (related to pidx) translated address for specified memory | ||
115 | window. It may fail if retrieved address, for instance, exceeds | ||
116 | maximum possible address or isn't properly aligned. | ||
117 | 2) ntb_peer_mw_get_addr(widx) - retrieve MMIO address to map the memory | ||
118 | window so to have an access to the shared memory. | ||
119 | |||
120 | Also it is worth to note, that method ntb_mw_count(pidx) should return the | ||
121 | same value as ntb_peer_mw_count() on the peer with port index - pidx. | ||
122 | |||
123 | NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev) | ||
124 | ------------------------------------------------------------------ | ||
125 | |||
126 | The primary client for NTB is the Transport client, used in tandem with NTB | ||
127 | Netdev. These drivers function together to create a logical link to the peer, | ||
128 | across the ntb, to exchange packets of network data. The Transport client | ||
129 | establishes a logical link to the peer, and creates queue pairs to exchange | ||
130 | messages and data. The NTB Netdev then creates an ethernet device using a | ||
131 | Transport queue pair. Network data is copied between socket buffers and the | ||
132 | Transport queue pair buffer. The Transport client may be used for other things | ||
133 | besides Netdev, however no other applications have yet been written. | ||
134 | |||
135 | NTB Ping Pong Test Client (ntb\_pingpong) | ||
136 | ----------------------------------------- | ||
137 | |||
138 | The Ping Pong test client serves as a demonstration to exercise the doorbell | ||
139 | and scratchpad registers of NTB hardware, and as an example simple NTB client. | ||
140 | Ping Pong enables the link when started, waits for the NTB link to come up, and | ||
141 | then proceeds to read and write the doorbell scratchpad registers of the NTB. | ||
142 | The peers interrupt each other using a bit mask of doorbell bits, which is | ||
143 | shifted by one in each round, to test the behavior of multiple doorbell bits | ||
144 | and interrupt vectors. The Ping Pong driver also reads the first local | ||
145 | scratchpad, and writes the value plus one to the first peer scratchpad, each | ||
146 | round before writing the peer doorbell register. | ||
147 | |||
148 | Module Parameters: | ||
149 | |||
150 | * unsafe - Some hardware has known issues with scratchpad and doorbell | ||
151 | registers. By default, Ping Pong will not attempt to exercise such | ||
152 | hardware. You may override this behavior at your own risk by setting | ||
153 | unsafe=1. | ||
154 | * delay\_ms - Specify the delay between receiving a doorbell | ||
155 | interrupt event and setting the peer doorbell register for the next | ||
156 | round. | ||
157 | * init\_db - Specify the doorbell bits to start new series of rounds. A new | ||
158 | series begins once all the doorbell bits have been shifted out of | ||
159 | range. | ||
160 | * dyndbg - It is suggested to specify dyndbg=+p when loading this module, and | ||
161 | then to observe debugging output on the console. | ||
162 | |||
163 | NTB Tool Test Client (ntb\_tool) | ||
164 | -------------------------------- | ||
165 | |||
166 | The Tool test client serves for debugging, primarily, ntb hardware and drivers. | ||
167 | The Tool provides access through debugfs for reading, setting, and clearing the | ||
168 | NTB doorbell, and reading and writing scratchpads. | ||
169 | |||
170 | The Tool does not currently have any module parameters. | ||
171 | |||
172 | Debugfs Files: | ||
173 | |||
174 | * *debugfs*/ntb\_tool/*hw*/ | ||
175 | A directory in debugfs will be created for each | ||
176 | NTB device probed by the tool. This directory is shortened to *hw* | ||
177 | below. | ||
178 | * *hw*/db | ||
179 | This file is used to read, set, and clear the local doorbell. Not | ||
180 | all operations may be supported by all hardware. To read the doorbell, | ||
181 | read the file. To set the doorbell, write `s` followed by the bits to | ||
182 | set (eg: `echo 's 0x0101' > db`). To clear the doorbell, write `c` | ||
183 | followed by the bits to clear. | ||
184 | * *hw*/mask | ||
185 | This file is used to read, set, and clear the local doorbell mask. | ||
186 | See *db* for details. | ||
187 | * *hw*/peer\_db | ||
188 | This file is used to read, set, and clear the peer doorbell. | ||
189 | See *db* for details. | ||
190 | * *hw*/peer\_mask | ||
191 | This file is used to read, set, and clear the peer doorbell | ||
192 | mask. See *db* for details. | ||
193 | * *hw*/spad | ||
194 | This file is used to read and write local scratchpads. To read | ||
195 | the values of all scratchpads, read the file. To write values, write a | ||
196 | series of pairs of scratchpad number and value | ||
197 | (eg: `echo '4 0x123 7 0xabc' > spad` | ||
198 | # to set scratchpads `4` and `7` to `0x123` and `0xabc`, respectively). | ||
199 | * *hw*/peer\_spad | ||
200 | This file is used to read and write peer scratchpads. See | ||
201 | *spad* for details. | ||
202 | |||
203 | NTB Hardware Drivers | ||
204 | ==================== | ||
205 | |||
206 | NTB hardware drivers should register devices with the NTB core driver. After | ||
207 | registering, clients probe and remove functions will be called. | ||
208 | |||
209 | NTB Intel Hardware Driver (ntb\_hw\_intel) | ||
210 | ------------------------------------------ | ||
211 | |||
212 | The Intel hardware driver supports NTB on Xeon and Atom CPUs. | ||
213 | |||
214 | Module Parameters: | ||
215 | |||
216 | * b2b\_mw\_idx | ||
217 | If the peer ntb is to be accessed via a memory window, then use | ||
218 | this memory window to access the peer ntb. A value of zero or positive | ||
219 | starts from the first mw idx, and a negative value starts from the last | ||
220 | mw idx. Both sides MUST set the same value here! The default value is | ||
221 | `-1`. | ||
222 | * b2b\_mw\_share | ||
223 | If the peer ntb is to be accessed via a memory window, and if | ||
224 | the memory window is large enough, still allow the client to use the | ||
225 | second half of the memory window for address translation to the peer. | ||
226 | * xeon\_b2b\_usd\_bar2\_addr64 | ||
227 | If using B2B topology on Xeon hardware, use | ||
228 | this 64 bit address on the bus between the NTB devices for the window | ||
229 | at BAR2, on the upstream side of the link. | ||
230 | * xeon\_b2b\_usd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*. | ||
231 | * xeon\_b2b\_usd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*. | ||
232 | * xeon\_b2b\_usd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*. | ||
233 | * xeon\_b2b\_dsd\_bar2\_addr64 - See *xeon\_b2b\_bar2\_addr64*. | ||
234 | * xeon\_b2b\_dsd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*. | ||
235 | * xeon\_b2b\_dsd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*. | ||
236 | * xeon\_b2b\_dsd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*. | ||
diff --git a/Documentation/driver-api/nvdimm/btt.rst b/Documentation/driver-api/nvdimm/btt.rst new file mode 100644 index 000000000000..107395c042ae --- /dev/null +++ b/Documentation/driver-api/nvdimm/btt.rst | |||
@@ -0,0 +1,285 @@ | |||
1 | ============================= | ||
2 | BTT - Block Translation Table | ||
3 | ============================= | ||
4 | |||
5 | |||
6 | 1. Introduction | ||
7 | =============== | ||
8 | |||
9 | Persistent memory based storage is able to perform IO at byte (or more | ||
10 | accurately, cache line) granularity. However, we often want to expose such | ||
11 | storage as traditional block devices. The block drivers for persistent memory | ||
12 | will do exactly this. However, they do not provide any atomicity guarantees. | ||
13 | Traditional SSDs typically provide protection against torn sectors in hardware, | ||
14 | using stored energy in capacitors to complete in-flight block writes, or perhaps | ||
15 | in firmware. We don't have this luxury with persistent memory - if a write is in | ||
16 | progress, and we experience a power failure, the block will contain a mix of old | ||
17 | and new data. Applications may not be prepared to handle such a scenario. | ||
18 | |||
19 | The Block Translation Table (BTT) provides atomic sector update semantics for | ||
20 | persistent memory devices, so that applications that rely on sector writes not | ||
21 | being torn can continue to do so. The BTT manifests itself as a stacked block | ||
22 | device, and reserves a portion of the underlying storage for its metadata. At | ||
23 | the heart of it, is an indirection table that re-maps all the blocks on the | ||
24 | volume. It can be thought of as an extremely simple file system that only | ||
25 | provides atomic sector updates. | ||
26 | |||
27 | |||
28 | 2. Static Layout | ||
29 | ================ | ||
30 | |||
31 | The underlying storage on which a BTT can be laid out is not limited in any way. | ||
32 | The BTT, however, splits the available space into chunks of up to 512 GiB, | ||
33 | called "Arenas". | ||
34 | |||
35 | Each arena follows the same layout for its metadata, and all references in an | ||
36 | arena are internal to it (with the exception of one field that points to the | ||
37 | next arena). The following depicts the "On-disk" metadata layout:: | ||
38 | |||
39 | |||
40 | Backing Store +-------> Arena | ||
41 | +---------------+ | +------------------+ | ||
42 | | | | | Arena info block | | ||
43 | | Arena 0 +---+ | 4K | | ||
44 | | 512G | +------------------+ | ||
45 | | | | | | ||
46 | +---------------+ | | | ||
47 | | | | | | ||
48 | | Arena 1 | | Data Blocks | | ||
49 | | 512G | | | | ||
50 | | | | | | ||
51 | +---------------+ | | | ||
52 | | . | | | | ||
53 | | . | | | | ||
54 | | . | | | | ||
55 | | | | | | ||
56 | | | | | | ||
57 | +---------------+ +------------------+ | ||
58 | | | | ||
59 | | BTT Map | | ||
60 | | | | ||
61 | | | | ||
62 | +------------------+ | ||
63 | | | | ||
64 | | BTT Flog | | ||
65 | | | | ||
66 | +------------------+ | ||
67 | | Info block copy | | ||
68 | | 4K | | ||
69 | +------------------+ | ||
70 | |||
71 | |||
72 | 3. Theory of Operation | ||
73 | ====================== | ||
74 | |||
75 | |||
76 | a. The BTT Map | ||
77 | -------------- | ||
78 | |||
79 | The map is a simple lookup/indirection table that maps an LBA to an internal | ||
80 | block. Each map entry is 32 bits. The two most significant bits are special | ||
81 | flags, and the remaining form the internal block number. | ||
82 | |||
83 | ======== ============================================================= | ||
84 | Bit Description | ||
85 | ======== ============================================================= | ||
86 | 31 - 30 Error and Zero flags - Used in the following way:: | ||
87 | |||
88 | == == ==================================================== | ||
89 | 31 30 Description | ||
90 | == == ==================================================== | ||
91 | 0 0 Initial state. Reads return zeroes; Premap = Postmap | ||
92 | 0 1 Zero state: Reads return zeroes | ||
93 | 1 0 Error state: Reads fail; Writes clear 'E' bit | ||
94 | 1 1 Normal Block – has valid postmap | ||
95 | == == ==================================================== | ||
96 | |||
97 | 29 - 0 Mappings to internal 'postmap' blocks | ||
98 | ======== ============================================================= | ||
99 | |||
100 | |||
101 | Some of the terminology that will be subsequently used: | ||
102 | |||
103 | ============ ================================================================ | ||
104 | External LBA LBA as made visible to upper layers. | ||
105 | ABA Arena Block Address - Block offset/number within an arena | ||
106 | Premap ABA The block offset into an arena, which was decided upon by range | ||
107 | checking the External LBA | ||
108 | Postmap ABA The block number in the "Data Blocks" area obtained after | ||
109 | indirection from the map | ||
110 | nfree The number of free blocks that are maintained at any given time. | ||
111 | This is the number of concurrent writes that can happen to the | ||
112 | arena. | ||
113 | ============ ================================================================ | ||
114 | |||
115 | |||
116 | For example, after adding a BTT, we surface a disk of 1024G. We get a read for | ||
117 | the external LBA at 768G. This falls into the second arena, and of the 512G | ||
118 | worth of blocks that this arena contributes, this block is at 256G. Thus, the | ||
119 | premap ABA is 256G. We now refer to the map, and find out the mapping for block | ||
120 | 'X' (256G) points to block 'Y', say '64'. Thus the postmap ABA is 64. | ||
121 | |||
122 | |||
123 | b. The BTT Flog | ||
124 | --------------- | ||
125 | |||
126 | The BTT provides sector atomicity by making every write an "allocating write", | ||
127 | i.e. Every write goes to a "free" block. A running list of free blocks is | ||
128 | maintained in the form of the BTT flog. 'Flog' is a combination of the words | ||
129 | "free list" and "log". The flog contains 'nfree' entries, and an entry contains: | ||
130 | |||
131 | ======== ===================================================================== | ||
132 | lba The premap ABA that is being written to | ||
133 | old_map The old postmap ABA - after 'this' write completes, this will be a | ||
134 | free block. | ||
135 | new_map The new postmap ABA. The map will up updated to reflect this | ||
136 | lba->postmap_aba mapping, but we log it here in case we have to | ||
137 | recover. | ||
138 | seq Sequence number to mark which of the 2 sections of this flog entry is | ||
139 | valid/newest. It cycles between 01->10->11->01 (binary) under normal | ||
140 | operation, with 00 indicating an uninitialized state. | ||
141 | lba' alternate lba entry | ||
142 | old_map' alternate old postmap entry | ||
143 | new_map' alternate new postmap entry | ||
144 | seq' alternate sequence number. | ||
145 | ======== ===================================================================== | ||
146 | |||
147 | Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also | ||
148 | padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are | ||
149 | done such that for any entry being written, it: | ||
150 | a. overwrites the 'old' section in the entry based on sequence numbers | ||
151 | b. writes the 'new' section such that the sequence number is written last. | ||
152 | |||
153 | |||
154 | c. The concept of lanes | ||
155 | ----------------------- | ||
156 | |||
157 | While 'nfree' describes the number of concurrent IOs an arena can process | ||
158 | concurrently, 'nlanes' is the number of IOs the BTT device as a whole can | ||
159 | process:: | ||
160 | |||
161 | nlanes = min(nfree, num_cpus) | ||
162 | |||
163 | A lane number is obtained at the start of any IO, and is used for indexing into | ||
164 | all the on-disk and in-memory data structures for the duration of the IO. If | ||
165 | there are more CPUs than the max number of available lanes, than lanes are | ||
166 | protected by spinlocks. | ||
167 | |||
168 | |||
169 | d. In-memory data structure: Read Tracking Table (RTT) | ||
170 | ------------------------------------------------------ | ||
171 | |||
172 | Consider a case where we have two threads, one doing reads and the other, | ||
173 | writes. We can hit a condition where the writer thread grabs a free block to do | ||
174 | a new IO, but the (slow) reader thread is still reading from it. In other words, | ||
175 | the reader consulted a map entry, and started reading the corresponding block. A | ||
176 | writer started writing to the same external LBA, and finished the write updating | ||
177 | the map for that external LBA to point to its new postmap ABA. At this point the | ||
178 | internal, postmap block that the reader is (still) reading has been inserted | ||
179 | into the list of free blocks. If another write comes in for the same LBA, it can | ||
180 | grab this free block, and start writing to it, causing the reader to read | ||
181 | incorrect data. To prevent this, we introduce the RTT. | ||
182 | |||
183 | The RTT is a simple, per arena table with 'nfree' entries. Every reader inserts | ||
184 | into rtt[lane_number], the postmap ABA it is reading, and clears it after the | ||
185 | read is complete. Every writer thread, after grabbing a free block, checks the | ||
186 | RTT for its presence. If the postmap free block is in the RTT, it waits till the | ||
187 | reader clears the RTT entry, and only then starts writing to it. | ||
188 | |||
189 | |||
190 | e. In-memory data structure: map locks | ||
191 | -------------------------------------- | ||
192 | |||
193 | Consider a case where two writer threads are writing to the same LBA. There can | ||
194 | be a race in the following sequence of steps:: | ||
195 | |||
196 | free[lane] = map[premap_aba] | ||
197 | map[premap_aba] = postmap_aba | ||
198 | |||
199 | Both threads can update their respective free[lane] with the same old, freed | ||
200 | postmap_aba. This has made the layout inconsistent by losing a free entry, and | ||
201 | at the same time, duplicating another free entry for two lanes. | ||
202 | |||
203 | To solve this, we could have a single map lock (per arena) that has to be taken | ||
204 | before performing the above sequence, but we feel that could be too contentious. | ||
205 | Instead we use an array of (nfree) map_locks that is indexed by | ||
206 | (premap_aba modulo nfree). | ||
207 | |||
208 | |||
209 | f. Reconstruction from the Flog | ||
210 | ------------------------------- | ||
211 | |||
212 | On startup, we analyze the BTT flog to create our list of free blocks. We walk | ||
213 | through all the entries, and for each lane, of the set of two possible | ||
214 | 'sections', we always look at the most recent one only (based on the sequence | ||
215 | number). The reconstruction rules/steps are simple: | ||
216 | |||
217 | - Read map[log_entry.lba]. | ||
218 | - If log_entry.new matches the map entry, then log_entry.old is free. | ||
219 | - If log_entry.new does not match the map entry, then log_entry.new is free. | ||
220 | (This case can only be caused by power-fails/unsafe shutdowns) | ||
221 | |||
222 | |||
223 | g. Summarizing - Read and Write flows | ||
224 | ------------------------------------- | ||
225 | |||
226 | Read: | ||
227 | |||
228 | 1. Convert external LBA to arena number + pre-map ABA | ||
229 | 2. Get a lane (and take lane_lock) | ||
230 | 3. Read map to get the entry for this pre-map ABA | ||
231 | 4. Enter post-map ABA into RTT[lane] | ||
232 | 5. If TRIM flag set in map, return zeroes, and end IO (go to step 8) | ||
233 | 6. If ERROR flag set in map, end IO with EIO (go to step 8) | ||
234 | 7. Read data from this block | ||
235 | 8. Remove post-map ABA entry from RTT[lane] | ||
236 | 9. Release lane (and lane_lock) | ||
237 | |||
238 | Write: | ||
239 | |||
240 | 1. Convert external LBA to Arena number + pre-map ABA | ||
241 | 2. Get a lane (and take lane_lock) | ||
242 | 3. Use lane to index into in-memory free list and obtain a new block, next flog | ||
243 | index, next sequence number | ||
244 | 4. Scan the RTT to check if free block is present, and spin/wait if it is. | ||
245 | 5. Write data to this free block | ||
246 | 6. Read map to get the existing post-map ABA entry for this pre-map ABA | ||
247 | 7. Write flog entry: [premap_aba / old postmap_aba / new postmap_aba / seq_num] | ||
248 | 8. Write new post-map ABA into map. | ||
249 | 9. Write old post-map entry into the free list | ||
250 | 10. Calculate next sequence number and write into the free list entry | ||
251 | 11. Release lane (and lane_lock) | ||
252 | |||
253 | |||
254 | 4. Error Handling | ||
255 | ================= | ||
256 | |||
257 | An arena would be in an error state if any of the metadata is corrupted | ||
258 | irrecoverably, either due to a bug or a media error. The following conditions | ||
259 | indicate an error: | ||
260 | |||
261 | - Info block checksum does not match (and recovering from the copy also fails) | ||
262 | - All internal available blocks are not uniquely and entirely addressed by the | ||
263 | sum of mapped blocks and free blocks (from the BTT flog). | ||
264 | - Rebuilding free list from the flog reveals missing/duplicate/impossible | ||
265 | entries | ||
266 | - A map entry is out of bounds | ||
267 | |||
268 | If any of these error conditions are encountered, the arena is put into a read | ||
269 | only state using a flag in the info block. | ||
270 | |||
271 | |||
272 | 5. Usage | ||
273 | ======== | ||
274 | |||
275 | The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem | ||
276 | (pmem, or blk mode). The easiest way to set up such a namespace is using the | ||
277 | 'ndctl' utility [1]: | ||
278 | |||
279 | For example, the ndctl command line to setup a btt with a 4k sector size is:: | ||
280 | |||
281 | ndctl create-namespace -f -e namespace0.0 -m sector -l 4k | ||
282 | |||
283 | See ndctl create-namespace --help for more options. | ||
284 | |||
285 | [1]: https://github.com/pmem/ndctl | ||
diff --git a/Documentation/driver-api/nvdimm/index.rst b/Documentation/driver-api/nvdimm/index.rst new file mode 100644 index 000000000000..a4f8f98aeb94 --- /dev/null +++ b/Documentation/driver-api/nvdimm/index.rst | |||
@@ -0,0 +1,12 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | =================================== | ||
4 | Non-Volatile Memory Device (NVDIMM) | ||
5 | =================================== | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | nvdimm | ||
11 | btt | ||
12 | security | ||
diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst new file mode 100644 index 000000000000..08f855cbb4e6 --- /dev/null +++ b/Documentation/driver-api/nvdimm/nvdimm.rst | |||
@@ -0,0 +1,887 @@ | |||
1 | =============================== | ||
2 | LIBNVDIMM: Non-Volatile Devices | ||
3 | =============================== | ||
4 | |||
5 | libnvdimm - kernel / libndctl - userspace helper library | ||
6 | |||
7 | linux-nvdimm@lists.01.org | ||
8 | |||
9 | Version 13 | ||
10 | |||
11 | .. contents: | ||
12 | |||
13 | Glossary | ||
14 | Overview | ||
15 | Supporting Documents | ||
16 | Git Trees | ||
17 | LIBNVDIMM PMEM and BLK | ||
18 | Why BLK? | ||
19 | PMEM vs BLK | ||
20 | BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX | ||
21 | Example NVDIMM Platform | ||
22 | LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API | ||
23 | LIBNDCTL: Context | ||
24 | libndctl: instantiate a new library context example | ||
25 | LIBNVDIMM/LIBNDCTL: Bus | ||
26 | libnvdimm: control class device in /sys/class | ||
27 | libnvdimm: bus | ||
28 | libndctl: bus enumeration example | ||
29 | LIBNVDIMM/LIBNDCTL: DIMM (NMEM) | ||
30 | libnvdimm: DIMM (NMEM) | ||
31 | libndctl: DIMM enumeration example | ||
32 | LIBNVDIMM/LIBNDCTL: Region | ||
33 | libnvdimm: region | ||
34 | libndctl: region enumeration example | ||
35 | Why Not Encode the Region Type into the Region Name? | ||
36 | How Do I Determine the Major Type of a Region? | ||
37 | LIBNVDIMM/LIBNDCTL: Namespace | ||
38 | libnvdimm: namespace | ||
39 | libndctl: namespace enumeration example | ||
40 | libndctl: namespace creation example | ||
41 | Why the Term "namespace"? | ||
42 | LIBNVDIMM/LIBNDCTL: Block Translation Table "btt" | ||
43 | libnvdimm: btt layout | ||
44 | libndctl: btt creation example | ||
45 | Summary LIBNDCTL Diagram | ||
46 | |||
47 | |||
48 | Glossary | ||
49 | ======== | ||
50 | |||
51 | PMEM: | ||
52 | A system-physical-address range where writes are persistent. A | ||
53 | block device composed of PMEM is capable of DAX. A PMEM address range | ||
54 | may span an interleave of several DIMMs. | ||
55 | |||
56 | BLK: | ||
57 | A set of one or more programmable memory mapped apertures provided | ||
58 | by a DIMM to access its media. This indirection precludes the | ||
59 | performance benefit of interleaving, but enables DIMM-bounded failure | ||
60 | modes. | ||
61 | |||
62 | DPA: | ||
63 | DIMM Physical Address, is a DIMM-relative offset. With one DIMM in | ||
64 | the system there would be a 1:1 system-physical-address:DPA association. | ||
65 | Once more DIMMs are added a memory controller interleave must be | ||
66 | decoded to determine the DPA associated with a given | ||
67 | system-physical-address. BLK capacity always has a 1:1 relationship | ||
68 | with a single-DIMM's DPA range. | ||
69 | |||
70 | DAX: | ||
71 | File system extensions to bypass the page cache and block layer to | ||
72 | mmap persistent memory, from a PMEM block device, directly into a | ||
73 | process address space. | ||
74 | |||
75 | DSM: | ||
76 | Device Specific Method: ACPI method to to control specific | ||
77 | device - in this case the firmware. | ||
78 | |||
79 | DCR: | ||
80 | NVDIMM Control Region Structure defined in ACPI 6 Section 5.2.25.5. | ||
81 | It defines a vendor-id, device-id, and interface format for a given DIMM. | ||
82 | |||
83 | BTT: | ||
84 | Block Translation Table: Persistent memory is byte addressable. | ||
85 | Existing software may have an expectation that the power-fail-atomicity | ||
86 | of writes is at least one sector, 512 bytes. The BTT is an indirection | ||
87 | table with atomic update semantics to front a PMEM/BLK block device | ||
88 | driver and present arbitrary atomic sector sizes. | ||
89 | |||
90 | LABEL: | ||
91 | Metadata stored on a DIMM device that partitions and identifies | ||
92 | (persistently names) storage between PMEM and BLK. It also partitions | ||
93 | BLK storage to host BTTs with different parameters per BLK-partition. | ||
94 | Note that traditional partition tables, GPT/MBR, are layered on top of a | ||
95 | BLK or PMEM device. | ||
96 | |||
97 | |||
98 | Overview | ||
99 | ======== | ||
100 | |||
101 | The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely, | ||
102 | PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM | ||
103 | and BLK mode access. These three modes of operation are described by | ||
104 | the "NVDIMM Firmware Interface Table" (NFIT) in ACPI 6. While the LIBNVDIMM | ||
105 | implementation is generic and supports pre-NFIT platforms, it was guided | ||
106 | by the superset of capabilities need to support this ACPI 6 definition | ||
107 | for NVDIMM resources. The bulk of the kernel implementation is in place | ||
108 | to handle the case where DPA accessible via PMEM is aliased with DPA | ||
109 | accessible via BLK. When that occurs a LABEL is needed to reserve DPA | ||
110 | for exclusive access via one mode a time. | ||
111 | |||
112 | Supporting Documents | ||
113 | -------------------- | ||
114 | |||
115 | ACPI 6: | ||
116 | http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf | ||
117 | NVDIMM Namespace: | ||
118 | http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf | ||
119 | DSM Interface Example: | ||
120 | http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf | ||
121 | Driver Writer's Guide: | ||
122 | http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf | ||
123 | |||
124 | Git Trees | ||
125 | --------- | ||
126 | |||
127 | LIBNVDIMM: | ||
128 | https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git | ||
129 | LIBNDCTL: | ||
130 | https://github.com/pmem/ndctl.git | ||
131 | PMEM: | ||
132 | https://github.com/01org/prd | ||
133 | |||
134 | |||
135 | LIBNVDIMM PMEM and BLK | ||
136 | ====================== | ||
137 | |||
138 | Prior to the arrival of the NFIT, non-volatile memory was described to a | ||
139 | system in various ad-hoc ways. Usually only the bare minimum was | ||
140 | provided, namely, a single system-physical-address range where writes | ||
141 | are expected to be durable after a system power loss. Now, the NFIT | ||
142 | specification standardizes not only the description of PMEM, but also | ||
143 | BLK and platform message-passing entry points for control and | ||
144 | configuration. | ||
145 | |||
146 | For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block | ||
147 | device driver: | ||
148 | |||
149 | 1. PMEM (nd_pmem.ko): Drives a system-physical-address range. This | ||
150 | range is contiguous in system memory and may be interleaved (hardware | ||
151 | memory controller striped) across multiple DIMMs. When interleaved the | ||
152 | platform may optionally provide details of which DIMMs are participating | ||
153 | in the interleave. | ||
154 | |||
155 | Note that while LIBNVDIMM describes system-physical-address ranges that may | ||
156 | alias with BLK access as ND_NAMESPACE_PMEM ranges and those without | ||
157 | alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no | ||
158 | distinction. The different device-types are an implementation detail | ||
159 | that userspace can exploit to implement policies like "only interface | ||
160 | with address ranges from certain DIMMs". It is worth noting that when | ||
161 | aliasing is present and a DIMM lacks a label, then no block device can | ||
162 | be created by default as userspace needs to do at least one allocation | ||
163 | of DPA to the PMEM range. In contrast ND_NAMESPACE_IO ranges, once | ||
164 | registered, can be immediately attached to nd_pmem. | ||
165 | |||
166 | 2. BLK (nd_blk.ko): This driver performs I/O using a set of platform | ||
167 | defined apertures. A set of apertures will access just one DIMM. | ||
168 | Multiple windows (apertures) allow multiple concurrent accesses, much like | ||
169 | tagged-command-queuing, and would likely be used by different threads or | ||
170 | different CPUs. | ||
171 | |||
172 | The NFIT specification defines a standard format for a BLK-aperture, but | ||
173 | the spec also allows for vendor specific layouts, and non-NFIT BLK | ||
174 | implementations may have other designs for BLK I/O. For this reason | ||
175 | "nd_blk" calls back into platform-specific code to perform the I/O. | ||
176 | |||
177 | One such implementation is defined in the "Driver Writer's Guide" and "DSM | ||
178 | Interface Example". | ||
179 | |||
180 | |||
181 | Why BLK? | ||
182 | ======== | ||
183 | |||
184 | While PMEM provides direct byte-addressable CPU-load/store access to | ||
185 | NVDIMM storage, it does not provide the best system RAS (recovery, | ||
186 | availability, and serviceability) model. An access to a corrupted | ||
187 | system-physical-address address causes a CPU exception while an access | ||
188 | to a corrupted address through an BLK-aperture causes that block window | ||
189 | to raise an error status in a register. The latter is more aligned with | ||
190 | the standard error model that host-bus-adapter attached disks present. | ||
191 | |||
192 | Also, if an administrator ever wants to replace a memory it is easier to | ||
193 | service a system at DIMM module boundaries. Compare this to PMEM where | ||
194 | data could be interleaved in an opaque hardware specific manner across | ||
195 | several DIMMs. | ||
196 | |||
197 | PMEM vs BLK | ||
198 | ----------- | ||
199 | |||
200 | BLK-apertures solve these RAS problems, but their presence is also the | ||
201 | major contributing factor to the complexity of the ND subsystem. They | ||
202 | complicate the implementation because PMEM and BLK alias in DPA space. | ||
203 | Any given DIMM's DPA-range may contribute to one or more | ||
204 | system-physical-address sets of interleaved DIMMs, *and* may also be | ||
205 | accessed in its entirety through its BLK-aperture. Accessing a DPA | ||
206 | through a system-physical-address while simultaneously accessing the | ||
207 | same DPA through a BLK-aperture has undefined results. For this reason, | ||
208 | DIMMs with this dual interface configuration include a DSM function to | ||
209 | store/retrieve a LABEL. The LABEL effectively partitions the DPA-space | ||
210 | into exclusive system-physical-address and BLK-aperture accessible | ||
211 | regions. For simplicity a DIMM is allowed a PMEM "region" per each | ||
212 | interleave set in which it is a member. The remaining DPA space can be | ||
213 | carved into an arbitrary number of BLK devices with discontiguous | ||
214 | extents. | ||
215 | |||
216 | BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX | ||
217 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
218 | |||
219 | One of the few | ||
220 | reasons to allow multiple BLK namespaces per REGION is so that each | ||
221 | BLK-namespace can be configured with a BTT with unique atomic sector | ||
222 | sizes. While a PMEM device can host a BTT the LABEL specification does | ||
223 | not provide for a sector size to be specified for a PMEM namespace. | ||
224 | |||
225 | This is due to the expectation that the primary usage model for PMEM is | ||
226 | via DAX, and the BTT is incompatible with DAX. However, for the cases | ||
227 | where an application or filesystem still needs atomic sector update | ||
228 | guarantees it can register a BTT on a PMEM device or partition. See | ||
229 | LIBNVDIMM/NDCTL: Block Translation Table "btt" | ||
230 | |||
231 | |||
232 | Example NVDIMM Platform | ||
233 | ======================= | ||
234 | |||
235 | For the remainder of this document the following diagram will be | ||
236 | referenced for any example sysfs layouts:: | ||
237 | |||
238 | |||
239 | (a) (b) DIMM BLK-REGION | ||
240 | +-------------------+--------+--------+--------+ | ||
241 | +------+ | pm0.0 | blk2.0 | pm1.0 | blk2.1 | 0 region2 | ||
242 | | imc0 +--+- - - region0- - - +--------+ +--------+ | ||
243 | +--+---+ | pm0.0 | blk3.0 | pm1.0 | blk3.1 | 1 region3 | ||
244 | | +-------------------+--------v v--------+ | ||
245 | +--+---+ | | | ||
246 | | cpu0 | region1 | ||
247 | +--+---+ | | | ||
248 | | +----------------------------^ ^--------+ | ||
249 | +--+---+ | blk4.0 | pm1.0 | blk4.0 | 2 region4 | ||
250 | | imc1 +--+----------------------------| +--------+ | ||
251 | +------+ | blk5.0 | pm1.0 | blk5.0 | 3 region5 | ||
252 | +----------------------------+--------+--------+ | ||
253 | |||
254 | In this platform we have four DIMMs and two memory controllers in one | ||
255 | socket. Each unique interface (BLK or PMEM) to DPA space is identified | ||
256 | by a region device with a dynamically assigned id (REGION0 - REGION5). | ||
257 | |||
258 | 1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A | ||
259 | single PMEM namespace is created in the REGION0-SPA-range that spans most | ||
260 | of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that | ||
261 | interleaved system-physical-address range is reclaimed as BLK-aperture | ||
262 | accessed space starting at DPA-offset (a) into each DIMM. In that | ||
263 | reclaimed space we create two BLK-aperture "namespaces" from REGION2 and | ||
264 | REGION3 where "blk2.0" and "blk3.0" are just human readable names that | ||
265 | could be set to any user-desired name in the LABEL. | ||
266 | |||
267 | 2. In the last portion of DIMM0 and DIMM1 we have an interleaved | ||
268 | system-physical-address range, REGION1, that spans those two DIMMs as | ||
269 | well as DIMM2 and DIMM3. Some of REGION1 is allocated to a PMEM namespace | ||
270 | named "pm1.0", the rest is reclaimed in 4 BLK-aperture namespaces (for | ||
271 | each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and | ||
272 | "blk5.0". | ||
273 | |||
274 | 3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1 | ||
275 | interleaved system-physical-address range (i.e. the DPA address past | ||
276 | offset (b) are also included in the "blk4.0" and "blk5.0" namespaces. | ||
277 | Note, that this example shows that BLK-aperture namespaces don't need to | ||
278 | be contiguous in DPA-space. | ||
279 | |||
280 | This bus is provided by the kernel under the device | ||
281 | /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and | ||
282 | the nfit_test.ko module is loaded. This not only test LIBNVDIMM but the | ||
283 | acpi_nfit.ko driver as well. | ||
284 | |||
285 | |||
286 | LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API | ||
287 | ======================================================== | ||
288 | |||
289 | What follows is a description of the LIBNVDIMM sysfs layout and a | ||
290 | corresponding object hierarchy diagram as viewed through the LIBNDCTL | ||
291 | API. The example sysfs paths and diagrams are relative to the Example | ||
292 | NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit | ||
293 | test. | ||
294 | |||
295 | LIBNDCTL: Context | ||
296 | ----------------- | ||
297 | |||
298 | Every API call in the LIBNDCTL library requires a context that holds the | ||
299 | logging parameters and other library instance state. The library is | ||
300 | based on the libabc template: | ||
301 | |||
302 | https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git | ||
303 | |||
304 | LIBNDCTL: instantiate a new library context example | ||
305 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
306 | |||
307 | :: | ||
308 | |||
309 | struct ndctl_ctx *ctx; | ||
310 | |||
311 | if (ndctl_new(&ctx) == 0) | ||
312 | return ctx; | ||
313 | else | ||
314 | return NULL; | ||
315 | |||
316 | LIBNVDIMM/LIBNDCTL: Bus | ||
317 | ----------------------- | ||
318 | |||
319 | A bus has a 1:1 relationship with an NFIT. The current expectation for | ||
320 | ACPI based systems is that there is only ever one platform-global NFIT. | ||
321 | That said, it is trivial to register multiple NFITs, the specification | ||
322 | does not preclude it. The infrastructure supports multiple busses and | ||
323 | we use this capability to test multiple NFIT configurations in the unit | ||
324 | test. | ||
325 | |||
326 | LIBNVDIMM: control class device in /sys/class | ||
327 | --------------------------------------------- | ||
328 | |||
329 | This character device accepts DSM messages to be passed to DIMM | ||
330 | identified by its NFIT handle:: | ||
331 | |||
332 | /sys/class/nd/ndctl0 | ||
333 | |-- dev | ||
334 | |-- device -> ../../../ndbus0 | ||
335 | |-- subsystem -> ../../../../../../../class/nd | ||
336 | |||
337 | |||
338 | |||
339 | LIBNVDIMM: bus | ||
340 | -------------- | ||
341 | |||
342 | :: | ||
343 | |||
344 | struct nvdimm_bus *nvdimm_bus_register(struct device *parent, | ||
345 | struct nvdimm_bus_descriptor *nfit_desc); | ||
346 | |||
347 | :: | ||
348 | |||
349 | /sys/devices/platform/nfit_test.0/ndbus0 | ||
350 | |-- commands | ||
351 | |-- nd | ||
352 | |-- nfit | ||
353 | |-- nmem0 | ||
354 | |-- nmem1 | ||
355 | |-- nmem2 | ||
356 | |-- nmem3 | ||
357 | |-- power | ||
358 | |-- provider | ||
359 | |-- region0 | ||
360 | |-- region1 | ||
361 | |-- region2 | ||
362 | |-- region3 | ||
363 | |-- region4 | ||
364 | |-- region5 | ||
365 | |-- uevent | ||
366 | `-- wait_probe | ||
367 | |||
368 | LIBNDCTL: bus enumeration example | ||
369 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
370 | |||
371 | Find the bus handle that describes the bus from Example NVDIMM Platform:: | ||
372 | |||
373 | static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx, | ||
374 | const char *provider) | ||
375 | { | ||
376 | struct ndctl_bus *bus; | ||
377 | |||
378 | ndctl_bus_foreach(ctx, bus) | ||
379 | if (strcmp(provider, ndctl_bus_get_provider(bus)) == 0) | ||
380 | return bus; | ||
381 | |||
382 | return NULL; | ||
383 | } | ||
384 | |||
385 | bus = get_bus_by_provider(ctx, "nfit_test.0"); | ||
386 | |||
387 | |||
388 | LIBNVDIMM/LIBNDCTL: DIMM (NMEM) | ||
389 | ------------------------------- | ||
390 | |||
391 | The DIMM device provides a character device for sending commands to | ||
392 | hardware, and it is a container for LABELs. If the DIMM is defined by | ||
393 | NFIT then an optional 'nfit' attribute sub-directory is available to add | ||
394 | NFIT-specifics. | ||
395 | |||
396 | Note that the kernel device name for "DIMMs" is "nmemX". The NFIT | ||
397 | describes these devices via "Memory Device to System Physical Address | ||
398 | Range Mapping Structure", and there is no requirement that they actually | ||
399 | be physical DIMMs, so we use a more generic name. | ||
400 | |||
401 | LIBNVDIMM: DIMM (NMEM) | ||
402 | ^^^^^^^^^^^^^^^^^^^^^^ | ||
403 | |||
404 | :: | ||
405 | |||
406 | struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, | ||
407 | const struct attribute_group **groups, unsigned long flags, | ||
408 | unsigned long *dsm_mask); | ||
409 | |||
410 | :: | ||
411 | |||
412 | /sys/devices/platform/nfit_test.0/ndbus0 | ||
413 | |-- nmem0 | ||
414 | | |-- available_slots | ||
415 | | |-- commands | ||
416 | | |-- dev | ||
417 | | |-- devtype | ||
418 | | |-- driver -> ../../../../../bus/nd/drivers/nvdimm | ||
419 | | |-- modalias | ||
420 | | |-- nfit | ||
421 | | | |-- device | ||
422 | | | |-- format | ||
423 | | | |-- handle | ||
424 | | | |-- phys_id | ||
425 | | | |-- rev_id | ||
426 | | | |-- serial | ||
427 | | | `-- vendor | ||
428 | | |-- state | ||
429 | | |-- subsystem -> ../../../../../bus/nd | ||
430 | | `-- uevent | ||
431 | |-- nmem1 | ||
432 | [..] | ||
433 | |||
434 | |||
435 | LIBNDCTL: DIMM enumeration example | ||
436 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
437 | |||
438 | Note, in this example we are assuming NFIT-defined DIMMs which are | ||
439 | identified by an "nfit_handle" a 32-bit value where: | ||
440 | |||
441 | - Bit 3:0 DIMM number within the memory channel | ||
442 | - Bit 7:4 memory channel number | ||
443 | - Bit 11:8 memory controller ID | ||
444 | - Bit 15:12 socket ID (within scope of a Node controller if node | ||
445 | controller is present) | ||
446 | - Bit 27:16 Node Controller ID | ||
447 | - Bit 31:28 Reserved | ||
448 | |||
449 | :: | ||
450 | |||
451 | static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus, | ||
452 | unsigned int handle) | ||
453 | { | ||
454 | struct ndctl_dimm *dimm; | ||
455 | |||
456 | ndctl_dimm_foreach(bus, dimm) | ||
457 | if (ndctl_dimm_get_handle(dimm) == handle) | ||
458 | return dimm; | ||
459 | |||
460 | return NULL; | ||
461 | } | ||
462 | |||
463 | #define DIMM_HANDLE(n, s, i, c, d) \ | ||
464 | (((n & 0xfff) << 16) | ((s & 0xf) << 12) | ((i & 0xf) << 8) \ | ||
465 | | ((c & 0xf) << 4) | (d & 0xf)) | ||
466 | |||
467 | dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0)); | ||
468 | |||
469 | LIBNVDIMM/LIBNDCTL: Region | ||
470 | -------------------------- | ||
471 | |||
472 | A generic REGION device is registered for each PMEM range or BLK-aperture | ||
473 | set. Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture | ||
474 | sets on the "nfit_test.0" bus. The primary role of regions are to be a | ||
475 | container of "mappings". A mapping is a tuple of <DIMM, | ||
476 | DPA-start-offset, length>. | ||
477 | |||
478 | LIBNVDIMM provides a built-in driver for these REGION devices. This driver | ||
479 | is responsible for reconciling the aliased DPA mappings across all | ||
480 | regions, parsing the LABEL, if present, and then emitting NAMESPACE | ||
481 | devices with the resolved/exclusive DPA-boundaries for the nd_pmem or | ||
482 | nd_blk device driver to consume. | ||
483 | |||
484 | In addition to the generic attributes of "mapping"s, "interleave_ways" | ||
485 | and "size" the REGION device also exports some convenience attributes. | ||
486 | "nstype" indicates the integer type of namespace-device this region | ||
487 | emits, "devtype" duplicates the DEVTYPE variable stored by udev at the | ||
488 | 'add' event, "modalias" duplicates the MODALIAS variable stored by udev | ||
489 | at the 'add' event, and finally, the optional "spa_index" is provided in | ||
490 | the case where the region is defined by a SPA. | ||
491 | |||
492 | LIBNVDIMM: region:: | ||
493 | |||
494 | struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, | ||
495 | struct nd_region_desc *ndr_desc); | ||
496 | struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, | ||
497 | struct nd_region_desc *ndr_desc); | ||
498 | |||
499 | :: | ||
500 | |||
501 | /sys/devices/platform/nfit_test.0/ndbus0 | ||
502 | |-- region0 | ||
503 | | |-- available_size | ||
504 | | |-- btt0 | ||
505 | | |-- btt_seed | ||
506 | | |-- devtype | ||
507 | | |-- driver -> ../../../../../bus/nd/drivers/nd_region | ||
508 | | |-- init_namespaces | ||
509 | | |-- mapping0 | ||
510 | | |-- mapping1 | ||
511 | | |-- mappings | ||
512 | | |-- modalias | ||
513 | | |-- namespace0.0 | ||
514 | | |-- namespace_seed | ||
515 | | |-- numa_node | ||
516 | | |-- nfit | ||
517 | | | `-- spa_index | ||
518 | | |-- nstype | ||
519 | | |-- set_cookie | ||
520 | | |-- size | ||
521 | | |-- subsystem -> ../../../../../bus/nd | ||
522 | | `-- uevent | ||
523 | |-- region1 | ||
524 | [..] | ||
525 | |||
526 | LIBNDCTL: region enumeration example | ||
527 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
528 | |||
529 | Sample region retrieval routines based on NFIT-unique data like | ||
530 | "spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for | ||
531 | BLK:: | ||
532 | |||
533 | static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus, | ||
534 | unsigned int spa_index) | ||
535 | { | ||
536 | struct ndctl_region *region; | ||
537 | |||
538 | ndctl_region_foreach(bus, region) { | ||
539 | if (ndctl_region_get_type(region) != ND_DEVICE_REGION_PMEM) | ||
540 | continue; | ||
541 | if (ndctl_region_get_spa_index(region) == spa_index) | ||
542 | return region; | ||
543 | } | ||
544 | return NULL; | ||
545 | } | ||
546 | |||
547 | static struct ndctl_region *get_blk_region_by_dimm_handle(struct ndctl_bus *bus, | ||
548 | unsigned int handle) | ||
549 | { | ||
550 | struct ndctl_region *region; | ||
551 | |||
552 | ndctl_region_foreach(bus, region) { | ||
553 | struct ndctl_mapping *map; | ||
554 | |||
555 | if (ndctl_region_get_type(region) != ND_DEVICE_REGION_BLOCK) | ||
556 | continue; | ||
557 | ndctl_mapping_foreach(region, map) { | ||
558 | struct ndctl_dimm *dimm = ndctl_mapping_get_dimm(map); | ||
559 | |||
560 | if (ndctl_dimm_get_handle(dimm) == handle) | ||
561 | return region; | ||
562 | } | ||
563 | } | ||
564 | return NULL; | ||
565 | } | ||
566 | |||
567 | |||
568 | Why Not Encode the Region Type into the Region Name? | ||
569 | ---------------------------------------------------- | ||
570 | |||
571 | At first glance it seems since NFIT defines just PMEM and BLK interface | ||
572 | types that we should simply name REGION devices with something derived | ||
573 | from those type names. However, the ND subsystem explicitly keeps the | ||
574 | REGION name generic and expects userspace to always consider the | ||
575 | region-attributes for four reasons: | ||
576 | |||
577 | 1. There are already more than two REGION and "namespace" types. For | ||
578 | PMEM there are two subtypes. As mentioned previously we have PMEM where | ||
579 | the constituent DIMM devices are known and anonymous PMEM. For BLK | ||
580 | regions the NFIT specification already anticipates vendor specific | ||
581 | implementations. The exact distinction of what a region contains is in | ||
582 | the region-attributes not the region-name or the region-devtype. | ||
583 | |||
584 | 2. A region with zero child-namespaces is a possible configuration. For | ||
585 | example, the NFIT allows for a DCR to be published without a | ||
586 | corresponding BLK-aperture. This equates to a DIMM that can only accept | ||
587 | control/configuration messages, but no i/o through a descendant block | ||
588 | device. Again, this "type" is advertised in the attributes ('mappings' | ||
589 | == 0) and the name does not tell you much. | ||
590 | |||
591 | 3. What if a third major interface type arises in the future? Outside | ||
592 | of vendor specific implementations, it's not difficult to envision a | ||
593 | third class of interface type beyond BLK and PMEM. With a generic name | ||
594 | for the REGION level of the device-hierarchy old userspace | ||
595 | implementations can still make sense of new kernel advertised | ||
596 | region-types. Userspace can always rely on the generic region | ||
597 | attributes like "mappings", "size", etc and the expected child devices | ||
598 | named "namespace". This generic format of the device-model hierarchy | ||
599 | allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and | ||
600 | future-proof. | ||
601 | |||
602 | 4. There are more robust mechanisms for determining the major type of a | ||
603 | region than a device name. See the next section, How Do I Determine the | ||
604 | Major Type of a Region? | ||
605 | |||
606 | How Do I Determine the Major Type of a Region? | ||
607 | ---------------------------------------------- | ||
608 | |||
609 | Outside of the blanket recommendation of "use libndctl", or simply | ||
610 | looking at the kernel header (/usr/include/linux/ndctl.h) to decode the | ||
611 | "nstype" integer attribute, here are some other options. | ||
612 | |||
613 | 1. module alias lookup | ||
614 | ^^^^^^^^^^^^^^^^^^^^^^ | ||
615 | |||
616 | The whole point of region/namespace device type differentiation is to | ||
617 | decide which block-device driver will attach to a given LIBNVDIMM namespace. | ||
618 | One can simply use the modalias to lookup the resulting module. It's | ||
619 | important to note that this method is robust in the presence of a | ||
620 | vendor-specific driver down the road. If a vendor-specific | ||
621 | implementation wants to supplant the standard nd_blk driver it can with | ||
622 | minimal impact to the rest of LIBNVDIMM. | ||
623 | |||
624 | In fact, a vendor may also want to have a vendor-specific region-driver | ||
625 | (outside of nd_region). For example, if a vendor defined its own LABEL | ||
626 | format it would need its own region driver to parse that LABEL and emit | ||
627 | the resulting namespaces. The output from module resolution is more | ||
628 | accurate than a region-name or region-devtype. | ||
629 | |||
630 | 2. udev | ||
631 | ^^^^^^^ | ||
632 | |||
633 | The kernel "devtype" is registered in the udev database:: | ||
634 | |||
635 | # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0 | ||
636 | P: /devices/platform/nfit_test.0/ndbus0/region0 | ||
637 | E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0 | ||
638 | E: DEVTYPE=nd_pmem | ||
639 | E: MODALIAS=nd:t2 | ||
640 | E: SUBSYSTEM=nd | ||
641 | |||
642 | # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4 | ||
643 | P: /devices/platform/nfit_test.0/ndbus0/region4 | ||
644 | E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4 | ||
645 | E: DEVTYPE=nd_blk | ||
646 | E: MODALIAS=nd:t3 | ||
647 | E: SUBSYSTEM=nd | ||
648 | |||
649 | ...and is available as a region attribute, but keep in mind that the | ||
650 | "devtype" does not indicate sub-type variations and scripts should | ||
651 | really be understanding the other attributes. | ||
652 | |||
653 | 3. type specific attributes | ||
654 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
655 | |||
656 | As it currently stands a BLK-aperture region will never have a | ||
657 | "nfit/spa_index" attribute, but neither will a non-NFIT PMEM region. A | ||
658 | BLK region with a "mappings" value of 0 is, as mentioned above, a DIMM | ||
659 | that does not allow I/O. A PMEM region with a "mappings" value of zero | ||
660 | is a simple system-physical-address range. | ||
661 | |||
662 | |||
663 | LIBNVDIMM/LIBNDCTL: Namespace | ||
664 | ----------------------------- | ||
665 | |||
666 | A REGION, after resolving DPA aliasing and LABEL specified boundaries, | ||
667 | surfaces one or more "namespace" devices. The arrival of a "namespace" | ||
668 | device currently triggers either the nd_blk or nd_pmem driver to load | ||
669 | and register a disk/block device. | ||
670 | |||
671 | LIBNVDIMM: namespace | ||
672 | ^^^^^^^^^^^^^^^^^^^^ | ||
673 | |||
674 | Here is a sample layout from the three major types of NAMESPACE where | ||
675 | namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid' | ||
676 | attribute), namespace2.0 represents a BLK namespace (note it has a | ||
677 | 'sector_size' attribute) that, and namespace6.0 represents an anonymous | ||
678 | PMEM namespace (note that has no 'uuid' attribute due to not support a | ||
679 | LABEL):: | ||
680 | |||
681 | /sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0 | ||
682 | |-- alt_name | ||
683 | |-- devtype | ||
684 | |-- dpa_extents | ||
685 | |-- force_raw | ||
686 | |-- modalias | ||
687 | |-- numa_node | ||
688 | |-- resource | ||
689 | |-- size | ||
690 | |-- subsystem -> ../../../../../../bus/nd | ||
691 | |-- type | ||
692 | |-- uevent | ||
693 | `-- uuid | ||
694 | /sys/devices/platform/nfit_test.0/ndbus0/region2/namespace2.0 | ||
695 | |-- alt_name | ||
696 | |-- devtype | ||
697 | |-- dpa_extents | ||
698 | |-- force_raw | ||
699 | |-- modalias | ||
700 | |-- numa_node | ||
701 | |-- sector_size | ||
702 | |-- size | ||
703 | |-- subsystem -> ../../../../../../bus/nd | ||
704 | |-- type | ||
705 | |-- uevent | ||
706 | `-- uuid | ||
707 | /sys/devices/platform/nfit_test.1/ndbus1/region6/namespace6.0 | ||
708 | |-- block | ||
709 | | `-- pmem0 | ||
710 | |-- devtype | ||
711 | |-- driver -> ../../../../../../bus/nd/drivers/pmem | ||
712 | |-- force_raw | ||
713 | |-- modalias | ||
714 | |-- numa_node | ||
715 | |-- resource | ||
716 | |-- size | ||
717 | |-- subsystem -> ../../../../../../bus/nd | ||
718 | |-- type | ||
719 | `-- uevent | ||
720 | |||
721 | LIBNDCTL: namespace enumeration example | ||
722 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
723 | Namespaces are indexed relative to their parent region, example below. | ||
724 | These indexes are mostly static from boot to boot, but subsystem makes | ||
725 | no guarantees in this regard. For a static namespace identifier use its | ||
726 | 'uuid' attribute. | ||
727 | |||
728 | :: | ||
729 | |||
730 | static struct ndctl_namespace | ||
731 | *get_namespace_by_id(struct ndctl_region *region, unsigned int id) | ||
732 | { | ||
733 | struct ndctl_namespace *ndns; | ||
734 | |||
735 | ndctl_namespace_foreach(region, ndns) | ||
736 | if (ndctl_namespace_get_id(ndns) == id) | ||
737 | return ndns; | ||
738 | |||
739 | return NULL; | ||
740 | } | ||
741 | |||
742 | LIBNDCTL: namespace creation example | ||
743 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
744 | |||
745 | Idle namespaces are automatically created by the kernel if a given | ||
746 | region has enough available capacity to create a new namespace. | ||
747 | Namespace instantiation involves finding an idle namespace and | ||
748 | configuring it. For the most part the setting of namespace attributes | ||
749 | can occur in any order, the only constraint is that 'uuid' must be set | ||
750 | before 'size'. This enables the kernel to track DPA allocations | ||
751 | internally with a static identifier:: | ||
752 | |||
753 | static int configure_namespace(struct ndctl_region *region, | ||
754 | struct ndctl_namespace *ndns, | ||
755 | struct namespace_parameters *parameters) | ||
756 | { | ||
757 | char devname[50]; | ||
758 | |||
759 | snprintf(devname, sizeof(devname), "namespace%d.%d", | ||
760 | ndctl_region_get_id(region), paramaters->id); | ||
761 | |||
762 | ndctl_namespace_set_alt_name(ndns, devname); | ||
763 | /* 'uuid' must be set prior to setting size! */ | ||
764 | ndctl_namespace_set_uuid(ndns, paramaters->uuid); | ||
765 | ndctl_namespace_set_size(ndns, paramaters->size); | ||
766 | /* unlike pmem namespaces, blk namespaces have a sector size */ | ||
767 | if (parameters->lbasize) | ||
768 | ndctl_namespace_set_sector_size(ndns, parameters->lbasize); | ||
769 | ndctl_namespace_enable(ndns); | ||
770 | } | ||
771 | |||
772 | |||
773 | Why the Term "namespace"? | ||
774 | ^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
775 | |||
776 | 1. Why not "volume" for instance? "volume" ran the risk of confusing | ||
777 | ND (libnvdimm subsystem) to a volume manager like device-mapper. | ||
778 | |||
779 | 2. The term originated to describe the sub-devices that can be created | ||
780 | within a NVME controller (see the nvme specification: | ||
781 | http://www.nvmexpress.org/specifications/), and NFIT namespaces are | ||
782 | meant to parallel the capabilities and configurability of | ||
783 | NVME-namespaces. | ||
784 | |||
785 | |||
786 | LIBNVDIMM/LIBNDCTL: Block Translation Table "btt" | ||
787 | ------------------------------------------------- | ||
788 | |||
789 | A BTT (design document: http://pmem.io/2014/09/23/btt.html) is a stacked | ||
790 | block device driver that fronts either the whole block device or a | ||
791 | partition of a block device emitted by either a PMEM or BLK NAMESPACE. | ||
792 | |||
793 | LIBNVDIMM: btt layout | ||
794 | ^^^^^^^^^^^^^^^^^^^^^ | ||
795 | |||
796 | Every region will start out with at least one BTT device which is the | ||
797 | seed device. To activate it set the "namespace", "uuid", and | ||
798 | "sector_size" attributes and then bind the device to the nd_pmem or | ||
799 | nd_blk driver depending on the region type:: | ||
800 | |||
801 | /sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/ | ||
802 | |-- namespace | ||
803 | |-- delete | ||
804 | |-- devtype | ||
805 | |-- modalias | ||
806 | |-- numa_node | ||
807 | |-- sector_size | ||
808 | |-- subsystem -> ../../../../../bus/nd | ||
809 | |-- uevent | ||
810 | `-- uuid | ||
811 | |||
812 | LIBNDCTL: btt creation example | ||
813 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
814 | |||
815 | Similar to namespaces an idle BTT device is automatically created per | ||
816 | region. Each time this "seed" btt device is configured and enabled a new | ||
817 | seed is created. Creating a BTT configuration involves two steps of | ||
818 | finding and idle BTT and assigning it to consume a PMEM or BLK namespace:: | ||
819 | |||
820 | static struct ndctl_btt *get_idle_btt(struct ndctl_region *region) | ||
821 | { | ||
822 | struct ndctl_btt *btt; | ||
823 | |||
824 | ndctl_btt_foreach(region, btt) | ||
825 | if (!ndctl_btt_is_enabled(btt) | ||
826 | && !ndctl_btt_is_configured(btt)) | ||
827 | return btt; | ||
828 | |||
829 | return NULL; | ||
830 | } | ||
831 | |||
832 | static int configure_btt(struct ndctl_region *region, | ||
833 | struct btt_parameters *parameters) | ||
834 | { | ||
835 | btt = get_idle_btt(region); | ||
836 | |||
837 | ndctl_btt_set_uuid(btt, parameters->uuid); | ||
838 | ndctl_btt_set_sector_size(btt, parameters->sector_size); | ||
839 | ndctl_btt_set_namespace(btt, parameters->ndns); | ||
840 | /* turn off raw mode device */ | ||
841 | ndctl_namespace_disable(parameters->ndns); | ||
842 | /* turn on btt access */ | ||
843 | ndctl_btt_enable(btt); | ||
844 | } | ||
845 | |||
846 | Once instantiated a new inactive btt seed device will appear underneath | ||
847 | the region. | ||
848 | |||
849 | Once a "namespace" is removed from a BTT that instance of the BTT device | ||
850 | will be deleted or otherwise reset to default values. This deletion is | ||
851 | only at the device model level. In order to destroy a BTT the "info | ||
852 | block" needs to be destroyed. Note, that to destroy a BTT the media | ||
853 | needs to be written in raw mode. By default, the kernel will autodetect | ||
854 | the presence of a BTT and disable raw mode. This autodetect behavior | ||
855 | can be suppressed by enabling raw mode for the namespace via the | ||
856 | ndctl_namespace_set_raw_mode() API. | ||
857 | |||
858 | |||
859 | Summary LIBNDCTL Diagram | ||
860 | ------------------------ | ||
861 | |||
862 | For the given example above, here is the view of the objects as seen by the | ||
863 | LIBNDCTL API:: | ||
864 | |||
865 | +---+ | ||
866 | |CTX| +---------+ +--------------+ +---------------+ | ||
867 | +-+-+ +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" | | ||
868 | | | +---------+ +--------------+ +---------------+ | ||
869 | +-------+ | | +---------+ +--------------+ +---------------+ | ||
870 | | DIMM0 <-+ | +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" | | ||
871 | +-------+ | | | +---------+ +--------------+ +---------------+ | ||
872 | | DIMM1 <-+ +-v--+ | +---------+ +--------------+ +---------------+ | ||
873 | +-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6 "blk2.0" | | ||
874 | | DIMM2 <-+ +----+ | +---------+ | +--------------+ +----------------------+ | ||
875 | +-------+ | | +-> NAMESPACE2.1 +--> ND5 "blk2.1" | BTT2 | | ||
876 | | DIMM3 <-+ | +--------------+ +----------------------+ | ||
877 | +-------+ | +---------+ +--------------+ +---------------+ | ||
878 | +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4 "blk3.0" | | ||
879 | | +---------+ | +--------------+ +----------------------+ | ||
880 | | +-> NAMESPACE3.1 +--> ND3 "blk3.1" | BTT1 | | ||
881 | | +--------------+ +----------------------+ | ||
882 | | +---------+ +--------------+ +---------------+ | ||
883 | +-> REGION4 +---> NAMESPACE4.0 +--> ND2 "blk4.0" | | ||
884 | | +---------+ +--------------+ +---------------+ | ||
885 | | +---------+ +--------------+ +----------------------+ | ||
886 | +-> REGION5 +---> NAMESPACE5.0 +--> ND1 "blk5.0" | BTT0 | | ||
887 | +---------+ +--------------+ +---------------+------+ | ||
diff --git a/Documentation/driver-api/nvdimm/security.rst b/Documentation/driver-api/nvdimm/security.rst new file mode 100644 index 000000000000..ad9dea099b34 --- /dev/null +++ b/Documentation/driver-api/nvdimm/security.rst | |||
@@ -0,0 +1,143 @@ | |||
1 | =============== | ||
2 | NVDIMM Security | ||
3 | =============== | ||
4 | |||
5 | 1. Introduction | ||
6 | --------------- | ||
7 | |||
8 | With the introduction of Intel Device Specific Methods (DSM) v1.8 | ||
9 | specification [1], security DSMs are introduced. The spec added the following | ||
10 | security DSMs: "get security state", "set passphrase", "disable passphrase", | ||
11 | "unlock unit", "freeze lock", "secure erase", and "overwrite". A security_ops | ||
12 | data structure has been added to struct dimm in order to support the security | ||
13 | operations and generic APIs are exposed to allow vendor neutral operations. | ||
14 | |||
15 | 2. Sysfs Interface | ||
16 | ------------------ | ||
17 | The "security" sysfs attribute is provided in the nvdimm sysfs directory. For | ||
18 | example: | ||
19 | /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/security | ||
20 | |||
21 | The "show" attribute of that attribute will display the security state for | ||
22 | that DIMM. The following states are available: disabled, unlocked, locked, | ||
23 | frozen, and overwrite. If security is not supported, the sysfs attribute | ||
24 | will not be visible. | ||
25 | |||
26 | The "store" attribute takes several commands when it is being written to | ||
27 | in order to support some of the security functionalities: | ||
28 | update <old_keyid> <new_keyid> - enable or update passphrase. | ||
29 | disable <keyid> - disable enabled security and remove key. | ||
30 | freeze - freeze changing of security states. | ||
31 | erase <keyid> - delete existing user encryption key. | ||
32 | overwrite <keyid> - wipe the entire nvdimm. | ||
33 | master_update <keyid> <new_keyid> - enable or update master passphrase. | ||
34 | master_erase <keyid> - delete existing user encryption key. | ||
35 | |||
36 | 3. Key Management | ||
37 | ----------------- | ||
38 | |||
39 | The key is associated to the payload by the DIMM id. For example: | ||
40 | # cat /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/nfit/id | ||
41 | 8089-a2-1740-00000133 | ||
42 | The DIMM id would be provided along with the key payload (passphrase) to | ||
43 | the kernel. | ||
44 | |||
45 | The security keys are managed on the basis of a single key per DIMM. The | ||
46 | key "passphrase" is expected to be 32bytes long. This is similar to the ATA | ||
47 | security specification [2]. A key is initially acquired via the request_key() | ||
48 | kernel API call during nvdimm unlock. It is up to the user to make sure that | ||
49 | all the keys are in the kernel user keyring for unlock. | ||
50 | |||
51 | A nvdimm encrypted-key of format enc32 has the description format of: | ||
52 | nvdimm:<bus-provider-specific-unique-id> | ||
53 | |||
54 | See file ``Documentation/security/keys/trusted-encrypted.rst`` for creating | ||
55 | encrypted-keys of enc32 format. TPM usage with a master trusted key is | ||
56 | preferred for sealing the encrypted-keys. | ||
57 | |||
58 | 4. Unlocking | ||
59 | ------------ | ||
60 | When the DIMMs are being enumerated by the kernel, the kernel will attempt to | ||
61 | retrieve the key from the kernel user keyring. This is the only time | ||
62 | a locked DIMM can be unlocked. Once unlocked, the DIMM will remain unlocked | ||
63 | until reboot. Typically an entity (i.e. shell script) will inject all the | ||
64 | relevant encrypted-keys into the kernel user keyring during the initramfs phase. | ||
65 | This provides the unlock function access to all the related keys that contain | ||
66 | the passphrase for the respective nvdimms. It is also recommended that the | ||
67 | keys are injected before libnvdimm is loaded by modprobe. | ||
68 | |||
69 | 5. Update | ||
70 | --------- | ||
71 | When doing an update, it is expected that the existing key is removed from | ||
72 | the kernel user keyring and reinjected as different (old) key. It's irrelevant | ||
73 | what the key description is for the old key since we are only interested in the | ||
74 | keyid when doing the update operation. It is also expected that the new key | ||
75 | is injected with the description format described from earlier in this | ||
76 | document. The update command written to the sysfs attribute will be with | ||
77 | the format: | ||
78 | update <old keyid> <new keyid> | ||
79 | |||
80 | If there is no old keyid due to a security enabling, then a 0 should be | ||
81 | passed in. | ||
82 | |||
83 | 6. Freeze | ||
84 | --------- | ||
85 | The freeze operation does not require any keys. The security config can be | ||
86 | frozen by a user with root privelege. | ||
87 | |||
88 | 7. Disable | ||
89 | ---------- | ||
90 | The security disable command format is: | ||
91 | disable <keyid> | ||
92 | |||
93 | An key with the current passphrase payload that is tied to the nvdimm should be | ||
94 | in the kernel user keyring. | ||
95 | |||
96 | 8. Secure Erase | ||
97 | --------------- | ||
98 | The command format for doing a secure erase is: | ||
99 | erase <keyid> | ||
100 | |||
101 | An key with the current passphrase payload that is tied to the nvdimm should be | ||
102 | in the kernel user keyring. | ||
103 | |||
104 | 9. Overwrite | ||
105 | ------------ | ||
106 | The command format for doing an overwrite is: | ||
107 | overwrite <keyid> | ||
108 | |||
109 | Overwrite can be done without a key if security is not enabled. A key serial | ||
110 | of 0 can be passed in to indicate no key. | ||
111 | |||
112 | The sysfs attribute "security" can be polled to wait on overwrite completion. | ||
113 | Overwrite can last tens of minutes or more depending on nvdimm size. | ||
114 | |||
115 | An encrypted-key with the current user passphrase that is tied to the nvdimm | ||
116 | should be injected and its keyid should be passed in via sysfs. | ||
117 | |||
118 | 10. Master Update | ||
119 | ----------------- | ||
120 | The command format for doing a master update is: | ||
121 | update <old keyid> <new keyid> | ||
122 | |||
123 | The operating mechanism for master update is identical to update except the | ||
124 | master passphrase key is passed to the kernel. The master passphrase key | ||
125 | is just another encrypted-key. | ||
126 | |||
127 | This command is only available when security is disabled. | ||
128 | |||
129 | 11. Master Erase | ||
130 | ---------------- | ||
131 | The command format for doing a master erase is: | ||
132 | master_erase <current keyid> | ||
133 | |||
134 | This command has the same operating mechanism as erase except the master | ||
135 | passphrase key is passed to the kernel. The master passphrase key is just | ||
136 | another encrypted-key. | ||
137 | |||
138 | This command is only available when the master security is enabled, indicated | ||
139 | by the extended security status. | ||
140 | |||
141 | [1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf | ||
142 | |||
143 | [2]: http://www.t13.org/documents/UploadedDocuments/docs2006/e05179r4-ACS-SecurityClarifications.pdf | ||
diff --git a/Documentation/driver-api/nvmem.rst b/Documentation/driver-api/nvmem.rst new file mode 100644 index 000000000000..d9d958d5c824 --- /dev/null +++ b/Documentation/driver-api/nvmem.rst | |||
@@ -0,0 +1,189 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | =============== | ||
4 | NVMEM Subsystem | ||
5 | =============== | ||
6 | |||
7 | Srinivas Kandagatla <srinivas.kandagatla@linaro.org> | ||
8 | |||
9 | This document explains the NVMEM Framework along with the APIs provided, | ||
10 | and how to use it. | ||
11 | |||
12 | 1. Introduction | ||
13 | =============== | ||
14 | *NVMEM* is the abbreviation for Non Volatile Memory layer. It is used to | ||
15 | retrieve configuration of SOC or Device specific data from non volatile | ||
16 | memories like eeprom, efuses and so on. | ||
17 | |||
18 | Before this framework existed, NVMEM drivers like eeprom were stored in | ||
19 | drivers/misc, where they all had to duplicate pretty much the same code to | ||
20 | register a sysfs file, allow in-kernel users to access the content of the | ||
21 | devices they were driving, etc. | ||
22 | |||
23 | This was also a problem as far as other in-kernel users were involved, since | ||
24 | the solutions used were pretty much different from one driver to another, there | ||
25 | was a rather big abstraction leak. | ||
26 | |||
27 | This framework aims at solve these problems. It also introduces DT | ||
28 | representation for consumer devices to go get the data they require (MAC | ||
29 | Addresses, SoC/Revision ID, part numbers, and so on) from the NVMEMs. This | ||
30 | framework is based on regmap, so that most of the abstraction available in | ||
31 | regmap can be reused, across multiple types of buses. | ||
32 | |||
33 | NVMEM Providers | ||
34 | +++++++++++++++ | ||
35 | |||
36 | NVMEM provider refers to an entity that implements methods to initialize, read | ||
37 | and write the non-volatile memory. | ||
38 | |||
39 | 2. Registering/Unregistering the NVMEM provider | ||
40 | =============================================== | ||
41 | |||
42 | A NVMEM provider can register with NVMEM core by supplying relevant | ||
43 | nvmem configuration to nvmem_register(), on success core would return a valid | ||
44 | nvmem_device pointer. | ||
45 | |||
46 | nvmem_unregister(nvmem) is used to unregister a previously registered provider. | ||
47 | |||
48 | For example, a simple qfprom case:: | ||
49 | |||
50 | static struct nvmem_config econfig = { | ||
51 | .name = "qfprom", | ||
52 | .owner = THIS_MODULE, | ||
53 | }; | ||
54 | |||
55 | static int qfprom_probe(struct platform_device *pdev) | ||
56 | { | ||
57 | ... | ||
58 | econfig.dev = &pdev->dev; | ||
59 | nvmem = nvmem_register(&econfig); | ||
60 | ... | ||
61 | } | ||
62 | |||
63 | It is mandatory that the NVMEM provider has a regmap associated with its | ||
64 | struct device. Failure to do would return error code from nvmem_register(). | ||
65 | |||
66 | Users of board files can define and register nvmem cells using the | ||
67 | nvmem_cell_table struct:: | ||
68 | |||
69 | static struct nvmem_cell_info foo_nvmem_cells[] = { | ||
70 | { | ||
71 | .name = "macaddr", | ||
72 | .offset = 0x7f00, | ||
73 | .bytes = ETH_ALEN, | ||
74 | } | ||
75 | }; | ||
76 | |||
77 | static struct nvmem_cell_table foo_nvmem_cell_table = { | ||
78 | .nvmem_name = "i2c-eeprom", | ||
79 | .cells = foo_nvmem_cells, | ||
80 | .ncells = ARRAY_SIZE(foo_nvmem_cells), | ||
81 | }; | ||
82 | |||
83 | nvmem_add_cell_table(&foo_nvmem_cell_table); | ||
84 | |||
85 | Additionally it is possible to create nvmem cell lookup entries and register | ||
86 | them with the nvmem framework from machine code as shown in the example below:: | ||
87 | |||
88 | static struct nvmem_cell_lookup foo_nvmem_lookup = { | ||
89 | .nvmem_name = "i2c-eeprom", | ||
90 | .cell_name = "macaddr", | ||
91 | .dev_id = "foo_mac.0", | ||
92 | .con_id = "mac-address", | ||
93 | }; | ||
94 | |||
95 | nvmem_add_cell_lookups(&foo_nvmem_lookup, 1); | ||
96 | |||
97 | NVMEM Consumers | ||
98 | +++++++++++++++ | ||
99 | |||
100 | NVMEM consumers are the entities which make use of the NVMEM provider to | ||
101 | read from and to NVMEM. | ||
102 | |||
103 | 3. NVMEM cell based consumer APIs | ||
104 | ================================= | ||
105 | |||
106 | NVMEM cells are the data entries/fields in the NVMEM. | ||
107 | The NVMEM framework provides 3 APIs to read/write NVMEM cells:: | ||
108 | |||
109 | struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name); | ||
110 | struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name); | ||
111 | |||
112 | void nvmem_cell_put(struct nvmem_cell *cell); | ||
113 | void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); | ||
114 | |||
115 | void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len); | ||
116 | int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len); | ||
117 | |||
118 | `*nvmem_cell_get()` apis will get a reference to nvmem cell for a given id, | ||
119 | and nvmem_cell_read/write() can then read or write to the cell. | ||
120 | Once the usage of the cell is finished the consumer should call | ||
121 | `*nvmem_cell_put()` to free all the allocation memory for the cell. | ||
122 | |||
123 | 4. Direct NVMEM device based consumer APIs | ||
124 | ========================================== | ||
125 | |||
126 | In some instances it is necessary to directly read/write the NVMEM. | ||
127 | To facilitate such consumers NVMEM framework provides below apis:: | ||
128 | |||
129 | struct nvmem_device *nvmem_device_get(struct device *dev, const char *name); | ||
130 | struct nvmem_device *devm_nvmem_device_get(struct device *dev, | ||
131 | const char *name); | ||
132 | void nvmem_device_put(struct nvmem_device *nvmem); | ||
133 | int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset, | ||
134 | size_t bytes, void *buf); | ||
135 | int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset, | ||
136 | size_t bytes, void *buf); | ||
137 | int nvmem_device_cell_read(struct nvmem_device *nvmem, | ||
138 | struct nvmem_cell_info *info, void *buf); | ||
139 | int nvmem_device_cell_write(struct nvmem_device *nvmem, | ||
140 | struct nvmem_cell_info *info, void *buf); | ||
141 | |||
142 | Before the consumers can read/write NVMEM directly, it should get hold | ||
143 | of nvmem_controller from one of the `*nvmem_device_get()` api. | ||
144 | |||
145 | The difference between these apis and cell based apis is that these apis always | ||
146 | take nvmem_device as parameter. | ||
147 | |||
148 | 5. Releasing a reference to the NVMEM | ||
149 | ===================================== | ||
150 | |||
151 | When a consumer no longer needs the NVMEM, it has to release the reference | ||
152 | to the NVMEM it has obtained using the APIs mentioned in the above section. | ||
153 | The NVMEM framework provides 2 APIs to release a reference to the NVMEM:: | ||
154 | |||
155 | void nvmem_cell_put(struct nvmem_cell *cell); | ||
156 | void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); | ||
157 | void nvmem_device_put(struct nvmem_device *nvmem); | ||
158 | void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem); | ||
159 | |||
160 | Both these APIs are used to release a reference to the NVMEM and | ||
161 | devm_nvmem_cell_put and devm_nvmem_device_put destroys the devres associated | ||
162 | with this NVMEM. | ||
163 | |||
164 | Userspace | ||
165 | +++++++++ | ||
166 | |||
167 | 6. Userspace binary interface | ||
168 | ============================== | ||
169 | |||
170 | Userspace can read/write the raw NVMEM file located at:: | ||
171 | |||
172 | /sys/bus/nvmem/devices/*/nvmem | ||
173 | |||
174 | ex:: | ||
175 | |||
176 | hexdump /sys/bus/nvmem/devices/qfprom0/nvmem | ||
177 | |||
178 | 0000000 0000 0000 0000 0000 0000 0000 0000 0000 | ||
179 | * | ||
180 | 00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00 | ||
181 | 0000000 0000 0000 0000 0000 0000 0000 0000 0000 | ||
182 | ... | ||
183 | * | ||
184 | 0001000 | ||
185 | |||
186 | 7. DeviceTree Binding | ||
187 | ===================== | ||
188 | |||
189 | See Documentation/devicetree/bindings/nvmem/nvmem.txt | ||
diff --git a/Documentation/driver-api/parport-lowlevel.rst b/Documentation/driver-api/parport-lowlevel.rst new file mode 100644 index 000000000000..0633d70ffda7 --- /dev/null +++ b/Documentation/driver-api/parport-lowlevel.rst | |||
@@ -0,0 +1,1832 @@ | |||
1 | =============================== | ||
2 | PARPORT interface documentation | ||
3 | =============================== | ||
4 | |||
5 | :Time-stamp: <2000-02-24 13:30:20 twaugh> | ||
6 | |||
7 | Described here are the following functions: | ||
8 | |||
9 | Global functions:: | ||
10 | parport_register_driver | ||
11 | parport_unregister_driver | ||
12 | parport_enumerate | ||
13 | parport_register_device | ||
14 | parport_unregister_device | ||
15 | parport_claim | ||
16 | parport_claim_or_block | ||
17 | parport_release | ||
18 | parport_yield | ||
19 | parport_yield_blocking | ||
20 | parport_wait_peripheral | ||
21 | parport_poll_peripheral | ||
22 | parport_wait_event | ||
23 | parport_negotiate | ||
24 | parport_read | ||
25 | parport_write | ||
26 | parport_open | ||
27 | parport_close | ||
28 | parport_device_id | ||
29 | parport_device_coords | ||
30 | parport_find_class | ||
31 | parport_find_device | ||
32 | parport_set_timeout | ||
33 | |||
34 | Port functions (can be overridden by low-level drivers): | ||
35 | |||
36 | SPP:: | ||
37 | port->ops->read_data | ||
38 | port->ops->write_data | ||
39 | port->ops->read_status | ||
40 | port->ops->read_control | ||
41 | port->ops->write_control | ||
42 | port->ops->frob_control | ||
43 | port->ops->enable_irq | ||
44 | port->ops->disable_irq | ||
45 | port->ops->data_forward | ||
46 | port->ops->data_reverse | ||
47 | |||
48 | EPP:: | ||
49 | port->ops->epp_write_data | ||
50 | port->ops->epp_read_data | ||
51 | port->ops->epp_write_addr | ||
52 | port->ops->epp_read_addr | ||
53 | |||
54 | ECP:: | ||
55 | port->ops->ecp_write_data | ||
56 | port->ops->ecp_read_data | ||
57 | port->ops->ecp_write_addr | ||
58 | |||
59 | Other:: | ||
60 | port->ops->nibble_read_data | ||
61 | port->ops->byte_read_data | ||
62 | port->ops->compat_write_data | ||
63 | |||
64 | The parport subsystem comprises ``parport`` (the core port-sharing | ||
65 | code), and a variety of low-level drivers that actually do the port | ||
66 | accesses. Each low-level driver handles a particular style of port | ||
67 | (PC, Amiga, and so on). | ||
68 | |||
69 | The parport interface to the device driver author can be broken down | ||
70 | into global functions and port functions. | ||
71 | |||
72 | The global functions are mostly for communicating between the device | ||
73 | driver and the parport subsystem: acquiring a list of available ports, | ||
74 | claiming a port for exclusive use, and so on. They also include | ||
75 | ``generic`` functions for doing standard things that will work on any | ||
76 | IEEE 1284-capable architecture. | ||
77 | |||
78 | The port functions are provided by the low-level drivers, although the | ||
79 | core parport module provides generic ``defaults`` for some routines. | ||
80 | The port functions can be split into three groups: SPP, EPP, and ECP. | ||
81 | |||
82 | SPP (Standard Parallel Port) functions modify so-called ``SPP`` | ||
83 | registers: data, status, and control. The hardware may not actually | ||
84 | have registers exactly like that, but the PC does and this interface is | ||
85 | modelled after common PC implementations. Other low-level drivers may | ||
86 | be able to emulate most of the functionality. | ||
87 | |||
88 | EPP (Enhanced Parallel Port) functions are provided for reading and | ||
89 | writing in IEEE 1284 EPP mode, and ECP (Extended Capabilities Port) | ||
90 | functions are used for IEEE 1284 ECP mode. (What about BECP? Does | ||
91 | anyone care?) | ||
92 | |||
93 | Hardware assistance for EPP and/or ECP transfers may or may not be | ||
94 | available, and if it is available it may or may not be used. If | ||
95 | hardware is not used, the transfer will be software-driven. In order | ||
96 | to cope with peripherals that only tenuously support IEEE 1284, a | ||
97 | low-level driver specific function is provided, for altering 'fudge | ||
98 | factors'. | ||
99 | |||
100 | Global functions | ||
101 | ================ | ||
102 | |||
103 | parport_register_driver - register a device driver with parport | ||
104 | --------------------------------------------------------------- | ||
105 | |||
106 | SYNOPSIS | ||
107 | ^^^^^^^^ | ||
108 | |||
109 | :: | ||
110 | |||
111 | #include <linux/parport.h> | ||
112 | |||
113 | struct parport_driver { | ||
114 | const char *name; | ||
115 | void (*attach) (struct parport *); | ||
116 | void (*detach) (struct parport *); | ||
117 | struct parport_driver *next; | ||
118 | }; | ||
119 | int parport_register_driver (struct parport_driver *driver); | ||
120 | |||
121 | DESCRIPTION | ||
122 | ^^^^^^^^^^^ | ||
123 | |||
124 | In order to be notified about parallel ports when they are detected, | ||
125 | parport_register_driver should be called. Your driver will | ||
126 | immediately be notified of all ports that have already been detected, | ||
127 | and of each new port as low-level drivers are loaded. | ||
128 | |||
129 | A ``struct parport_driver`` contains the textual name of your driver, | ||
130 | a pointer to a function to handle new ports, and a pointer to a | ||
131 | function to handle ports going away due to a low-level driver | ||
132 | unloading. Ports will only be detached if they are not being used | ||
133 | (i.e. there are no devices registered on them). | ||
134 | |||
135 | The visible parts of the ``struct parport *`` argument given to | ||
136 | attach/detach are:: | ||
137 | |||
138 | struct parport | ||
139 | { | ||
140 | struct parport *next; /* next parport in list */ | ||
141 | const char *name; /* port's name */ | ||
142 | unsigned int modes; /* bitfield of hardware modes */ | ||
143 | struct parport_device_info probe_info; | ||
144 | /* IEEE1284 info */ | ||
145 | int number; /* parport index */ | ||
146 | struct parport_operations *ops; | ||
147 | ... | ||
148 | }; | ||
149 | |||
150 | There are other members of the structure, but they should not be | ||
151 | touched. | ||
152 | |||
153 | The ``modes`` member summarises the capabilities of the underlying | ||
154 | hardware. It consists of flags which may be bitwise-ored together: | ||
155 | |||
156 | ============================= =============================================== | ||
157 | PARPORT_MODE_PCSPP IBM PC registers are available, | ||
158 | i.e. functions that act on data, | ||
159 | control and status registers are | ||
160 | probably writing directly to the | ||
161 | hardware. | ||
162 | PARPORT_MODE_TRISTATE The data drivers may be turned off. | ||
163 | This allows the data lines to be used | ||
164 | for reverse (peripheral to host) | ||
165 | transfers. | ||
166 | PARPORT_MODE_COMPAT The hardware can assist with | ||
167 | compatibility-mode (printer) | ||
168 | transfers, i.e. compat_write_block. | ||
169 | PARPORT_MODE_EPP The hardware can assist with EPP | ||
170 | transfers. | ||
171 | PARPORT_MODE_ECP The hardware can assist with ECP | ||
172 | transfers. | ||
173 | PARPORT_MODE_DMA The hardware can use DMA, so you might | ||
174 | want to pass ISA DMA-able memory | ||
175 | (i.e. memory allocated using the | ||
176 | GFP_DMA flag with kmalloc) to the | ||
177 | low-level driver in order to take | ||
178 | advantage of it. | ||
179 | ============================= =============================================== | ||
180 | |||
181 | There may be other flags in ``modes`` as well. | ||
182 | |||
183 | The contents of ``modes`` is advisory only. For example, if the | ||
184 | hardware is capable of DMA, and PARPORT_MODE_DMA is in ``modes``, it | ||
185 | doesn't necessarily mean that DMA will always be used when possible. | ||
186 | Similarly, hardware that is capable of assisting ECP transfers won't | ||
187 | necessarily be used. | ||
188 | |||
189 | RETURN VALUE | ||
190 | ^^^^^^^^^^^^ | ||
191 | |||
192 | Zero on success, otherwise an error code. | ||
193 | |||
194 | ERRORS | ||
195 | ^^^^^^ | ||
196 | |||
197 | None. (Can it fail? Why return int?) | ||
198 | |||
199 | EXAMPLE | ||
200 | ^^^^^^^ | ||
201 | |||
202 | :: | ||
203 | |||
204 | static void lp_attach (struct parport *port) | ||
205 | { | ||
206 | ... | ||
207 | private = kmalloc (...); | ||
208 | dev[count++] = parport_register_device (...); | ||
209 | ... | ||
210 | } | ||
211 | |||
212 | static void lp_detach (struct parport *port) | ||
213 | { | ||
214 | ... | ||
215 | } | ||
216 | |||
217 | static struct parport_driver lp_driver = { | ||
218 | "lp", | ||
219 | lp_attach, | ||
220 | lp_detach, | ||
221 | NULL /* always put NULL here */ | ||
222 | }; | ||
223 | |||
224 | int lp_init (void) | ||
225 | { | ||
226 | ... | ||
227 | if (parport_register_driver (&lp_driver)) { | ||
228 | /* Failed; nothing we can do. */ | ||
229 | return -EIO; | ||
230 | } | ||
231 | ... | ||
232 | } | ||
233 | |||
234 | |||
235 | SEE ALSO | ||
236 | ^^^^^^^^ | ||
237 | |||
238 | parport_unregister_driver, parport_register_device, parport_enumerate | ||
239 | |||
240 | |||
241 | |||
242 | parport_unregister_driver - tell parport to forget about this driver | ||
243 | -------------------------------------------------------------------- | ||
244 | |||
245 | SYNOPSIS | ||
246 | ^^^^^^^^ | ||
247 | |||
248 | :: | ||
249 | |||
250 | #include <linux/parport.h> | ||
251 | |||
252 | struct parport_driver { | ||
253 | const char *name; | ||
254 | void (*attach) (struct parport *); | ||
255 | void (*detach) (struct parport *); | ||
256 | struct parport_driver *next; | ||
257 | }; | ||
258 | void parport_unregister_driver (struct parport_driver *driver); | ||
259 | |||
260 | DESCRIPTION | ||
261 | ^^^^^^^^^^^ | ||
262 | |||
263 | This tells parport not to notify the device driver of new ports or of | ||
264 | ports going away. Registered devices belonging to that driver are NOT | ||
265 | unregistered: parport_unregister_device must be used for each one. | ||
266 | |||
267 | EXAMPLE | ||
268 | ^^^^^^^ | ||
269 | |||
270 | :: | ||
271 | |||
272 | void cleanup_module (void) | ||
273 | { | ||
274 | ... | ||
275 | /* Stop notifications. */ | ||
276 | parport_unregister_driver (&lp_driver); | ||
277 | |||
278 | /* Unregister devices. */ | ||
279 | for (i = 0; i < NUM_DEVS; i++) | ||
280 | parport_unregister_device (dev[i]); | ||
281 | ... | ||
282 | } | ||
283 | |||
284 | SEE ALSO | ||
285 | ^^^^^^^^ | ||
286 | |||
287 | parport_register_driver, parport_enumerate | ||
288 | |||
289 | |||
290 | |||
291 | parport_enumerate - retrieve a list of parallel ports (DEPRECATED) | ||
292 | ------------------------------------------------------------------ | ||
293 | |||
294 | SYNOPSIS | ||
295 | ^^^^^^^^ | ||
296 | |||
297 | :: | ||
298 | |||
299 | #include <linux/parport.h> | ||
300 | |||
301 | struct parport *parport_enumerate (void); | ||
302 | |||
303 | DESCRIPTION | ||
304 | ^^^^^^^^^^^ | ||
305 | |||
306 | Retrieve the first of a list of valid parallel ports for this machine. | ||
307 | Successive parallel ports can be found using the ``struct parport | ||
308 | *next`` element of the ``struct parport *`` that is returned. If ``next`` | ||
309 | is NULL, there are no more parallel ports in the list. The number of | ||
310 | ports in the list will not exceed PARPORT_MAX. | ||
311 | |||
312 | RETURN VALUE | ||
313 | ^^^^^^^^^^^^ | ||
314 | |||
315 | A ``struct parport *`` describing a valid parallel port for the machine, | ||
316 | or NULL if there are none. | ||
317 | |||
318 | ERRORS | ||
319 | ^^^^^^ | ||
320 | |||
321 | This function can return NULL to indicate that there are no parallel | ||
322 | ports to use. | ||
323 | |||
324 | EXAMPLE | ||
325 | ^^^^^^^ | ||
326 | |||
327 | :: | ||
328 | |||
329 | int detect_device (void) | ||
330 | { | ||
331 | struct parport *port; | ||
332 | |||
333 | for (port = parport_enumerate (); | ||
334 | port != NULL; | ||
335 | port = port->next) { | ||
336 | /* Try to detect a device on the port... */ | ||
337 | ... | ||
338 | } | ||
339 | } | ||
340 | |||
341 | ... | ||
342 | } | ||
343 | |||
344 | NOTES | ||
345 | ^^^^^ | ||
346 | |||
347 | parport_enumerate is deprecated; parport_register_driver should be | ||
348 | used instead. | ||
349 | |||
350 | SEE ALSO | ||
351 | ^^^^^^^^ | ||
352 | |||
353 | parport_register_driver, parport_unregister_driver | ||
354 | |||
355 | |||
356 | |||
357 | parport_register_device - register to use a port | ||
358 | ------------------------------------------------ | ||
359 | |||
360 | SYNOPSIS | ||
361 | ^^^^^^^^ | ||
362 | |||
363 | :: | ||
364 | |||
365 | #include <linux/parport.h> | ||
366 | |||
367 | typedef int (*preempt_func) (void *handle); | ||
368 | typedef void (*wakeup_func) (void *handle); | ||
369 | typedef int (*irq_func) (int irq, void *handle, struct pt_regs *); | ||
370 | |||
371 | struct pardevice *parport_register_device(struct parport *port, | ||
372 | const char *name, | ||
373 | preempt_func preempt, | ||
374 | wakeup_func wakeup, | ||
375 | irq_func irq, | ||
376 | int flags, | ||
377 | void *handle); | ||
378 | |||
379 | DESCRIPTION | ||
380 | ^^^^^^^^^^^ | ||
381 | |||
382 | Use this function to register your device driver on a parallel port | ||
383 | (``port``). Once you have done that, you will be able to use | ||
384 | parport_claim and parport_release in order to use the port. | ||
385 | |||
386 | The (``name``) argument is the name of the device that appears in /proc | ||
387 | filesystem. The string must be valid for the whole lifetime of the | ||
388 | device (until parport_unregister_device is called). | ||
389 | |||
390 | This function will register three callbacks into your driver: | ||
391 | ``preempt``, ``wakeup`` and ``irq``. Each of these may be NULL in order to | ||
392 | indicate that you do not want a callback. | ||
393 | |||
394 | When the ``preempt`` function is called, it is because another driver | ||
395 | wishes to use the parallel port. The ``preempt`` function should return | ||
396 | non-zero if the parallel port cannot be released yet -- if zero is | ||
397 | returned, the port is lost to another driver and the port must be | ||
398 | re-claimed before use. | ||
399 | |||
400 | The ``wakeup`` function is called once another driver has released the | ||
401 | port and no other driver has yet claimed it. You can claim the | ||
402 | parallel port from within the ``wakeup`` function (in which case the | ||
403 | claim is guaranteed to succeed), or choose not to if you don't need it | ||
404 | now. | ||
405 | |||
406 | If an interrupt occurs on the parallel port your driver has claimed, | ||
407 | the ``irq`` function will be called. (Write something about shared | ||
408 | interrupts here.) | ||
409 | |||
410 | The ``handle`` is a pointer to driver-specific data, and is passed to | ||
411 | the callback functions. | ||
412 | |||
413 | ``flags`` may be a bitwise combination of the following flags: | ||
414 | |||
415 | ===================== ================================================= | ||
416 | Flag Meaning | ||
417 | ===================== ================================================= | ||
418 | PARPORT_DEV_EXCL The device cannot share the parallel port at all. | ||
419 | Use this only when absolutely necessary. | ||
420 | ===================== ================================================= | ||
421 | |||
422 | The typedefs are not actually defined -- they are only shown in order | ||
423 | to make the function prototype more readable. | ||
424 | |||
425 | The visible parts of the returned ``struct pardevice`` are:: | ||
426 | |||
427 | struct pardevice { | ||
428 | struct parport *port; /* Associated port */ | ||
429 | void *private; /* Device driver's 'handle' */ | ||
430 | ... | ||
431 | }; | ||
432 | |||
433 | RETURN VALUE | ||
434 | ^^^^^^^^^^^^ | ||
435 | |||
436 | A ``struct pardevice *``: a handle to the registered parallel port | ||
437 | device that can be used for parport_claim, parport_release, etc. | ||
438 | |||
439 | ERRORS | ||
440 | ^^^^^^ | ||
441 | |||
442 | A return value of NULL indicates that there was a problem registering | ||
443 | a device on that port. | ||
444 | |||
445 | EXAMPLE | ||
446 | ^^^^^^^ | ||
447 | |||
448 | :: | ||
449 | |||
450 | static int preempt (void *handle) | ||
451 | { | ||
452 | if (busy_right_now) | ||
453 | return 1; | ||
454 | |||
455 | must_reclaim_port = 1; | ||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | static void wakeup (void *handle) | ||
460 | { | ||
461 | struct toaster *private = handle; | ||
462 | struct pardevice *dev = private->dev; | ||
463 | if (!dev) return; /* avoid races */ | ||
464 | |||
465 | if (want_port) | ||
466 | parport_claim (dev); | ||
467 | } | ||
468 | |||
469 | static int toaster_detect (struct toaster *private, struct parport *port) | ||
470 | { | ||
471 | private->dev = parport_register_device (port, "toaster", preempt, | ||
472 | wakeup, NULL, 0, | ||
473 | private); | ||
474 | if (!private->dev) | ||
475 | /* Couldn't register with parport. */ | ||
476 | return -EIO; | ||
477 | |||
478 | must_reclaim_port = 0; | ||
479 | busy_right_now = 1; | ||
480 | parport_claim_or_block (private->dev); | ||
481 | ... | ||
482 | /* Don't need the port while the toaster warms up. */ | ||
483 | busy_right_now = 0; | ||
484 | ... | ||
485 | busy_right_now = 1; | ||
486 | if (must_reclaim_port) { | ||
487 | parport_claim_or_block (private->dev); | ||
488 | must_reclaim_port = 0; | ||
489 | } | ||
490 | ... | ||
491 | } | ||
492 | |||
493 | SEE ALSO | ||
494 | ^^^^^^^^ | ||
495 | |||
496 | parport_unregister_device, parport_claim | ||
497 | |||
498 | |||
499 | |||
500 | parport_unregister_device - finish using a port | ||
501 | ----------------------------------------------- | ||
502 | |||
503 | SYNPOPSIS | ||
504 | |||
505 | :: | ||
506 | |||
507 | #include <linux/parport.h> | ||
508 | |||
509 | void parport_unregister_device (struct pardevice *dev); | ||
510 | |||
511 | DESCRIPTION | ||
512 | ^^^^^^^^^^^ | ||
513 | |||
514 | This function is the opposite of parport_register_device. After using | ||
515 | parport_unregister_device, ``dev`` is no longer a valid device handle. | ||
516 | |||
517 | You should not unregister a device that is currently claimed, although | ||
518 | if you do it will be released automatically. | ||
519 | |||
520 | EXAMPLE | ||
521 | ^^^^^^^ | ||
522 | |||
523 | :: | ||
524 | |||
525 | ... | ||
526 | kfree (dev->private); /* before we lose the pointer */ | ||
527 | parport_unregister_device (dev); | ||
528 | ... | ||
529 | |||
530 | SEE ALSO | ||
531 | ^^^^^^^^ | ||
532 | |||
533 | |||
534 | parport_unregister_driver | ||
535 | |||
536 | parport_claim, parport_claim_or_block - claim the parallel port for a device | ||
537 | ---------------------------------------------------------------------------- | ||
538 | |||
539 | SYNOPSIS | ||
540 | ^^^^^^^^ | ||
541 | |||
542 | :: | ||
543 | |||
544 | #include <linux/parport.h> | ||
545 | |||
546 | int parport_claim (struct pardevice *dev); | ||
547 | int parport_claim_or_block (struct pardevice *dev); | ||
548 | |||
549 | DESCRIPTION | ||
550 | ^^^^^^^^^^^ | ||
551 | |||
552 | These functions attempt to gain control of the parallel port on which | ||
553 | ``dev`` is registered. ``parport_claim`` does not block, but | ||
554 | ``parport_claim_or_block`` may do. (Put something here about blocking | ||
555 | interruptibly or non-interruptibly.) | ||
556 | |||
557 | You should not try to claim a port that you have already claimed. | ||
558 | |||
559 | RETURN VALUE | ||
560 | ^^^^^^^^^^^^ | ||
561 | |||
562 | A return value of zero indicates that the port was successfully | ||
563 | claimed, and the caller now has possession of the parallel port. | ||
564 | |||
565 | If ``parport_claim_or_block`` blocks before returning successfully, the | ||
566 | return value is positive. | ||
567 | |||
568 | ERRORS | ||
569 | ^^^^^^ | ||
570 | |||
571 | ========== ========================================================== | ||
572 | -EAGAIN The port is unavailable at the moment, but another attempt | ||
573 | to claim it may succeed. | ||
574 | ========== ========================================================== | ||
575 | |||
576 | SEE ALSO | ||
577 | ^^^^^^^^ | ||
578 | |||
579 | |||
580 | parport_release | ||
581 | |||
582 | parport_release - release the parallel port | ||
583 | ------------------------------------------- | ||
584 | |||
585 | SYNOPSIS | ||
586 | ^^^^^^^^ | ||
587 | |||
588 | :: | ||
589 | |||
590 | #include <linux/parport.h> | ||
591 | |||
592 | void parport_release (struct pardevice *dev); | ||
593 | |||
594 | DESCRIPTION | ||
595 | ^^^^^^^^^^^ | ||
596 | |||
597 | Once a parallel port device has been claimed, it can be released using | ||
598 | ``parport_release``. It cannot fail, but you should not release a | ||
599 | device that you do not have possession of. | ||
600 | |||
601 | EXAMPLE | ||
602 | ^^^^^^^ | ||
603 | |||
604 | :: | ||
605 | |||
606 | static size_t write (struct pardevice *dev, const void *buf, | ||
607 | size_t len) | ||
608 | { | ||
609 | ... | ||
610 | written = dev->port->ops->write_ecp_data (dev->port, buf, | ||
611 | len); | ||
612 | parport_release (dev); | ||
613 | ... | ||
614 | } | ||
615 | |||
616 | |||
617 | SEE ALSO | ||
618 | ^^^^^^^^ | ||
619 | |||
620 | change_mode, parport_claim, parport_claim_or_block, parport_yield | ||
621 | |||
622 | |||
623 | |||
624 | parport_yield, parport_yield_blocking - temporarily release a parallel port | ||
625 | --------------------------------------------------------------------------- | ||
626 | |||
627 | SYNOPSIS | ||
628 | ^^^^^^^^ | ||
629 | |||
630 | :: | ||
631 | |||
632 | #include <linux/parport.h> | ||
633 | |||
634 | int parport_yield (struct pardevice *dev) | ||
635 | int parport_yield_blocking (struct pardevice *dev); | ||
636 | |||
637 | DESCRIPTION | ||
638 | ^^^^^^^^^^^ | ||
639 | |||
640 | When a driver has control of a parallel port, it may allow another | ||
641 | driver to temporarily ``borrow`` it. ``parport_yield`` does not block; | ||
642 | ``parport_yield_blocking`` may do. | ||
643 | |||
644 | RETURN VALUE | ||
645 | ^^^^^^^^^^^^ | ||
646 | |||
647 | A return value of zero indicates that the caller still owns the port | ||
648 | and the call did not block. | ||
649 | |||
650 | A positive return value from ``parport_yield_blocking`` indicates that | ||
651 | the caller still owns the port and the call blocked. | ||
652 | |||
653 | A return value of -EAGAIN indicates that the caller no longer owns the | ||
654 | port, and it must be re-claimed before use. | ||
655 | |||
656 | ERRORS | ||
657 | ^^^^^^ | ||
658 | |||
659 | ========= ========================================================== | ||
660 | -EAGAIN Ownership of the parallel port was given away. | ||
661 | ========= ========================================================== | ||
662 | |||
663 | SEE ALSO | ||
664 | ^^^^^^^^ | ||
665 | |||
666 | parport_release | ||
667 | |||
668 | |||
669 | |||
670 | parport_wait_peripheral - wait for status lines, up to 35ms | ||
671 | ----------------------------------------------------------- | ||
672 | |||
673 | SYNOPSIS | ||
674 | ^^^^^^^^ | ||
675 | |||
676 | :: | ||
677 | |||
678 | #include <linux/parport.h> | ||
679 | |||
680 | int parport_wait_peripheral (struct parport *port, | ||
681 | unsigned char mask, | ||
682 | unsigned char val); | ||
683 | |||
684 | DESCRIPTION | ||
685 | ^^^^^^^^^^^ | ||
686 | |||
687 | Wait for the status lines in mask to match the values in val. | ||
688 | |||
689 | RETURN VALUE | ||
690 | ^^^^^^^^^^^^ | ||
691 | |||
692 | ======== ========================================================== | ||
693 | -EINTR a signal is pending | ||
694 | 0 the status lines in mask have values in val | ||
695 | 1 timed out while waiting (35ms elapsed) | ||
696 | ======== ========================================================== | ||
697 | |||
698 | SEE ALSO | ||
699 | ^^^^^^^^ | ||
700 | |||
701 | parport_poll_peripheral | ||
702 | |||
703 | |||
704 | |||
705 | parport_poll_peripheral - wait for status lines, in usec | ||
706 | -------------------------------------------------------- | ||
707 | |||
708 | SYNOPSIS | ||
709 | ^^^^^^^^ | ||
710 | |||
711 | :: | ||
712 | |||
713 | #include <linux/parport.h> | ||
714 | |||
715 | int parport_poll_peripheral (struct parport *port, | ||
716 | unsigned char mask, | ||
717 | unsigned char val, | ||
718 | int usec); | ||
719 | |||
720 | DESCRIPTION | ||
721 | ^^^^^^^^^^^ | ||
722 | |||
723 | Wait for the status lines in mask to match the values in val. | ||
724 | |||
725 | RETURN VALUE | ||
726 | ^^^^^^^^^^^^ | ||
727 | |||
728 | ======== ========================================================== | ||
729 | -EINTR a signal is pending | ||
730 | 0 the status lines in mask have values in val | ||
731 | 1 timed out while waiting (usec microseconds have elapsed) | ||
732 | ======== ========================================================== | ||
733 | |||
734 | SEE ALSO | ||
735 | ^^^^^^^^ | ||
736 | |||
737 | parport_wait_peripheral | ||
738 | |||
739 | |||
740 | |||
741 | parport_wait_event - wait for an event on a port | ||
742 | ------------------------------------------------ | ||
743 | |||
744 | SYNOPSIS | ||
745 | ^^^^^^^^ | ||
746 | |||
747 | :: | ||
748 | |||
749 | #include <linux/parport.h> | ||
750 | |||
751 | int parport_wait_event (struct parport *port, signed long timeout) | ||
752 | |||
753 | DESCRIPTION | ||
754 | ^^^^^^^^^^^ | ||
755 | |||
756 | Wait for an event (e.g. interrupt) on a port. The timeout is in | ||
757 | jiffies. | ||
758 | |||
759 | RETURN VALUE | ||
760 | ^^^^^^^^^^^^ | ||
761 | |||
762 | ======= ========================================================== | ||
763 | 0 success | ||
764 | <0 error (exit as soon as possible) | ||
765 | >0 timed out | ||
766 | ======= ========================================================== | ||
767 | |||
768 | parport_negotiate - perform IEEE 1284 negotiation | ||
769 | ------------------------------------------------- | ||
770 | |||
771 | SYNOPSIS | ||
772 | ^^^^^^^^ | ||
773 | |||
774 | :: | ||
775 | |||
776 | #include <linux/parport.h> | ||
777 | |||
778 | int parport_negotiate (struct parport *, int mode); | ||
779 | |||
780 | DESCRIPTION | ||
781 | ^^^^^^^^^^^ | ||
782 | |||
783 | Perform IEEE 1284 negotiation. | ||
784 | |||
785 | RETURN VALUE | ||
786 | ^^^^^^^^^^^^ | ||
787 | |||
788 | ======= ========================================================== | ||
789 | 0 handshake OK; IEEE 1284 peripheral and mode available | ||
790 | -1 handshake failed; peripheral not compliant (or none present) | ||
791 | 1 handshake OK; IEEE 1284 peripheral present but mode not | ||
792 | available | ||
793 | ======= ========================================================== | ||
794 | |||
795 | SEE ALSO | ||
796 | ^^^^^^^^ | ||
797 | |||
798 | parport_read, parport_write | ||
799 | |||
800 | |||
801 | |||
802 | parport_read - read data from device | ||
803 | ------------------------------------ | ||
804 | |||
805 | SYNOPSIS | ||
806 | ^^^^^^^^ | ||
807 | |||
808 | :: | ||
809 | |||
810 | #include <linux/parport.h> | ||
811 | |||
812 | ssize_t parport_read (struct parport *, void *buf, size_t len); | ||
813 | |||
814 | DESCRIPTION | ||
815 | ^^^^^^^^^^^ | ||
816 | |||
817 | Read data from device in current IEEE 1284 transfer mode. This only | ||
818 | works for modes that support reverse data transfer. | ||
819 | |||
820 | RETURN VALUE | ||
821 | ^^^^^^^^^^^^ | ||
822 | |||
823 | If negative, an error code; otherwise the number of bytes transferred. | ||
824 | |||
825 | SEE ALSO | ||
826 | ^^^^^^^^ | ||
827 | |||
828 | parport_write, parport_negotiate | ||
829 | |||
830 | |||
831 | |||
832 | parport_write - write data to device | ||
833 | ------------------------------------ | ||
834 | |||
835 | SYNOPSIS | ||
836 | ^^^^^^^^ | ||
837 | |||
838 | :: | ||
839 | |||
840 | #include <linux/parport.h> | ||
841 | |||
842 | ssize_t parport_write (struct parport *, const void *buf, size_t len); | ||
843 | |||
844 | DESCRIPTION | ||
845 | ^^^^^^^^^^^ | ||
846 | |||
847 | Write data to device in current IEEE 1284 transfer mode. This only | ||
848 | works for modes that support forward data transfer. | ||
849 | |||
850 | RETURN VALUE | ||
851 | ^^^^^^^^^^^^ | ||
852 | |||
853 | If negative, an error code; otherwise the number of bytes transferred. | ||
854 | |||
855 | SEE ALSO | ||
856 | ^^^^^^^^ | ||
857 | |||
858 | parport_read, parport_negotiate | ||
859 | |||
860 | |||
861 | |||
862 | parport_open - register device for particular device number | ||
863 | ----------------------------------------------------------- | ||
864 | |||
865 | SYNOPSIS | ||
866 | ^^^^^^^^ | ||
867 | |||
868 | :: | ||
869 | |||
870 | #include <linux/parport.h> | ||
871 | |||
872 | struct pardevice *parport_open (int devnum, const char *name, | ||
873 | int (*pf) (void *), | ||
874 | void (*kf) (void *), | ||
875 | void (*irqf) (int, void *, | ||
876 | struct pt_regs *), | ||
877 | int flags, void *handle); | ||
878 | |||
879 | DESCRIPTION | ||
880 | ^^^^^^^^^^^ | ||
881 | |||
882 | This is like parport_register_device but takes a device number instead | ||
883 | of a pointer to a struct parport. | ||
884 | |||
885 | RETURN VALUE | ||
886 | ^^^^^^^^^^^^ | ||
887 | |||
888 | See parport_register_device. If no device is associated with devnum, | ||
889 | NULL is returned. | ||
890 | |||
891 | SEE ALSO | ||
892 | ^^^^^^^^ | ||
893 | |||
894 | parport_register_device | ||
895 | |||
896 | |||
897 | |||
898 | parport_close - unregister device for particular device number | ||
899 | -------------------------------------------------------------- | ||
900 | |||
901 | SYNOPSIS | ||
902 | ^^^^^^^^ | ||
903 | |||
904 | :: | ||
905 | |||
906 | #include <linux/parport.h> | ||
907 | |||
908 | void parport_close (struct pardevice *dev); | ||
909 | |||
910 | DESCRIPTION | ||
911 | ^^^^^^^^^^^ | ||
912 | |||
913 | This is the equivalent of parport_unregister_device for parport_open. | ||
914 | |||
915 | SEE ALSO | ||
916 | ^^^^^^^^ | ||
917 | |||
918 | parport_unregister_device, parport_open | ||
919 | |||
920 | |||
921 | |||
922 | parport_device_id - obtain IEEE 1284 Device ID | ||
923 | ---------------------------------------------- | ||
924 | |||
925 | SYNOPSIS | ||
926 | ^^^^^^^^ | ||
927 | |||
928 | :: | ||
929 | |||
930 | #include <linux/parport.h> | ||
931 | |||
932 | ssize_t parport_device_id (int devnum, char *buffer, size_t len); | ||
933 | |||
934 | DESCRIPTION | ||
935 | ^^^^^^^^^^^ | ||
936 | |||
937 | Obtains the IEEE 1284 Device ID associated with a given device. | ||
938 | |||
939 | RETURN VALUE | ||
940 | ^^^^^^^^^^^^ | ||
941 | |||
942 | If negative, an error code; otherwise, the number of bytes of buffer | ||
943 | that contain the device ID. The format of the device ID is as | ||
944 | follows:: | ||
945 | |||
946 | [length][ID] | ||
947 | |||
948 | The first two bytes indicate the inclusive length of the entire Device | ||
949 | ID, and are in big-endian order. The ID is a sequence of pairs of the | ||
950 | form:: | ||
951 | |||
952 | key:value; | ||
953 | |||
954 | NOTES | ||
955 | ^^^^^ | ||
956 | |||
957 | Many devices have ill-formed IEEE 1284 Device IDs. | ||
958 | |||
959 | SEE ALSO | ||
960 | ^^^^^^^^ | ||
961 | |||
962 | parport_find_class, parport_find_device | ||
963 | |||
964 | |||
965 | |||
966 | parport_device_coords - convert device number to device coordinates | ||
967 | ------------------------------------------------------------------- | ||
968 | |||
969 | SYNOPSIS | ||
970 | ^^^^^^^^ | ||
971 | |||
972 | :: | ||
973 | |||
974 | #include <linux/parport.h> | ||
975 | |||
976 | int parport_device_coords (int devnum, int *parport, int *mux, | ||
977 | int *daisy); | ||
978 | |||
979 | DESCRIPTION | ||
980 | ^^^^^^^^^^^ | ||
981 | |||
982 | Convert between device number (zero-based) and device coordinates | ||
983 | (port, multiplexor, daisy chain address). | ||
984 | |||
985 | RETURN VALUE | ||
986 | ^^^^^^^^^^^^ | ||
987 | |||
988 | Zero on success, in which case the coordinates are (``*parport``, ``*mux``, | ||
989 | ``*daisy``). | ||
990 | |||
991 | SEE ALSO | ||
992 | ^^^^^^^^ | ||
993 | |||
994 | parport_open, parport_device_id | ||
995 | |||
996 | |||
997 | |||
998 | parport_find_class - find a device by its class | ||
999 | ----------------------------------------------- | ||
1000 | |||
1001 | SYNOPSIS | ||
1002 | ^^^^^^^^ | ||
1003 | |||
1004 | :: | ||
1005 | |||
1006 | #include <linux/parport.h> | ||
1007 | |||
1008 | typedef enum { | ||
1009 | PARPORT_CLASS_LEGACY = 0, /* Non-IEEE1284 device */ | ||
1010 | PARPORT_CLASS_PRINTER, | ||
1011 | PARPORT_CLASS_MODEM, | ||
1012 | PARPORT_CLASS_NET, | ||
1013 | PARPORT_CLASS_HDC, /* Hard disk controller */ | ||
1014 | PARPORT_CLASS_PCMCIA, | ||
1015 | PARPORT_CLASS_MEDIA, /* Multimedia device */ | ||
1016 | PARPORT_CLASS_FDC, /* Floppy disk controller */ | ||
1017 | PARPORT_CLASS_PORTS, | ||
1018 | PARPORT_CLASS_SCANNER, | ||
1019 | PARPORT_CLASS_DIGCAM, | ||
1020 | PARPORT_CLASS_OTHER, /* Anything else */ | ||
1021 | PARPORT_CLASS_UNSPEC, /* No CLS field in ID */ | ||
1022 | PARPORT_CLASS_SCSIADAPTER | ||
1023 | } parport_device_class; | ||
1024 | |||
1025 | int parport_find_class (parport_device_class cls, int from); | ||
1026 | |||
1027 | DESCRIPTION | ||
1028 | ^^^^^^^^^^^ | ||
1029 | |||
1030 | Find a device by class. The search starts from device number from+1. | ||
1031 | |||
1032 | RETURN VALUE | ||
1033 | ^^^^^^^^^^^^ | ||
1034 | |||
1035 | The device number of the next device in that class, or -1 if no such | ||
1036 | device exists. | ||
1037 | |||
1038 | NOTES | ||
1039 | ^^^^^ | ||
1040 | |||
1041 | Example usage:: | ||
1042 | |||
1043 | int devnum = -1; | ||
1044 | while ((devnum = parport_find_class (PARPORT_CLASS_DIGCAM, devnum)) != -1) { | ||
1045 | struct pardevice *dev = parport_open (devnum, ...); | ||
1046 | ... | ||
1047 | } | ||
1048 | |||
1049 | SEE ALSO | ||
1050 | ^^^^^^^^ | ||
1051 | |||
1052 | parport_find_device, parport_open, parport_device_id | ||
1053 | |||
1054 | |||
1055 | |||
1056 | parport_find_device - find a device by its class | ||
1057 | ------------------------------------------------ | ||
1058 | |||
1059 | SYNOPSIS | ||
1060 | ^^^^^^^^ | ||
1061 | |||
1062 | :: | ||
1063 | |||
1064 | #include <linux/parport.h> | ||
1065 | |||
1066 | int parport_find_device (const char *mfg, const char *mdl, int from); | ||
1067 | |||
1068 | DESCRIPTION | ||
1069 | ^^^^^^^^^^^ | ||
1070 | |||
1071 | Find a device by vendor and model. The search starts from device | ||
1072 | number from+1. | ||
1073 | |||
1074 | RETURN VALUE | ||
1075 | ^^^^^^^^^^^^ | ||
1076 | |||
1077 | The device number of the next device matching the specifications, or | ||
1078 | -1 if no such device exists. | ||
1079 | |||
1080 | NOTES | ||
1081 | ^^^^^ | ||
1082 | |||
1083 | Example usage:: | ||
1084 | |||
1085 | int devnum = -1; | ||
1086 | while ((devnum = parport_find_device ("IOMEGA", "ZIP+", devnum)) != -1) { | ||
1087 | struct pardevice *dev = parport_open (devnum, ...); | ||
1088 | ... | ||
1089 | } | ||
1090 | |||
1091 | SEE ALSO | ||
1092 | ^^^^^^^^ | ||
1093 | |||
1094 | parport_find_class, parport_open, parport_device_id | ||
1095 | |||
1096 | |||
1097 | |||
1098 | parport_set_timeout - set the inactivity timeout | ||
1099 | ------------------------------------------------ | ||
1100 | |||
1101 | SYNOPSIS | ||
1102 | ^^^^^^^^ | ||
1103 | |||
1104 | :: | ||
1105 | |||
1106 | #include <linux/parport.h> | ||
1107 | |||
1108 | long parport_set_timeout (struct pardevice *dev, long inactivity); | ||
1109 | |||
1110 | DESCRIPTION | ||
1111 | ^^^^^^^^^^^ | ||
1112 | |||
1113 | Set the inactivity timeout, in jiffies, for a registered device. The | ||
1114 | previous timeout is returned. | ||
1115 | |||
1116 | RETURN VALUE | ||
1117 | ^^^^^^^^^^^^ | ||
1118 | |||
1119 | The previous timeout, in jiffies. | ||
1120 | |||
1121 | NOTES | ||
1122 | ^^^^^ | ||
1123 | |||
1124 | Some of the port->ops functions for a parport may take time, owing to | ||
1125 | delays at the peripheral. After the peripheral has not responded for | ||
1126 | ``inactivity`` jiffies, a timeout will occur and the blocking function | ||
1127 | will return. | ||
1128 | |||
1129 | A timeout of 0 jiffies is a special case: the function must do as much | ||
1130 | as it can without blocking or leaving the hardware in an unknown | ||
1131 | state. If port operations are performed from within an interrupt | ||
1132 | handler, for instance, a timeout of 0 jiffies should be used. | ||
1133 | |||
1134 | Once set for a registered device, the timeout will remain at the set | ||
1135 | value until set again. | ||
1136 | |||
1137 | SEE ALSO | ||
1138 | ^^^^^^^^ | ||
1139 | |||
1140 | port->ops->xxx_read/write_yyy | ||
1141 | |||
1142 | |||
1143 | |||
1144 | |||
1145 | PORT FUNCTIONS | ||
1146 | ============== | ||
1147 | |||
1148 | The functions in the port->ops structure (struct parport_operations) | ||
1149 | are provided by the low-level driver responsible for that port. | ||
1150 | |||
1151 | port->ops->read_data - read the data register | ||
1152 | --------------------------------------------- | ||
1153 | |||
1154 | SYNOPSIS | ||
1155 | ^^^^^^^^ | ||
1156 | |||
1157 | :: | ||
1158 | |||
1159 | #include <linux/parport.h> | ||
1160 | |||
1161 | struct parport_operations { | ||
1162 | ... | ||
1163 | unsigned char (*read_data) (struct parport *port); | ||
1164 | ... | ||
1165 | }; | ||
1166 | |||
1167 | DESCRIPTION | ||
1168 | ^^^^^^^^^^^ | ||
1169 | |||
1170 | If port->modes contains the PARPORT_MODE_TRISTATE flag and the | ||
1171 | PARPORT_CONTROL_DIRECTION bit in the control register is set, this | ||
1172 | returns the value on the data pins. If port->modes contains the | ||
1173 | PARPORT_MODE_TRISTATE flag and the PARPORT_CONTROL_DIRECTION bit is | ||
1174 | not set, the return value _may_ be the last value written to the data | ||
1175 | register. Otherwise the return value is undefined. | ||
1176 | |||
1177 | SEE ALSO | ||
1178 | ^^^^^^^^ | ||
1179 | |||
1180 | write_data, read_status, write_control | ||
1181 | |||
1182 | |||
1183 | |||
1184 | port->ops->write_data - write the data register | ||
1185 | ----------------------------------------------- | ||
1186 | |||
1187 | SYNOPSIS | ||
1188 | ^^^^^^^^ | ||
1189 | |||
1190 | :: | ||
1191 | |||
1192 | #include <linux/parport.h> | ||
1193 | |||
1194 | struct parport_operations { | ||
1195 | ... | ||
1196 | void (*write_data) (struct parport *port, unsigned char d); | ||
1197 | ... | ||
1198 | }; | ||
1199 | |||
1200 | DESCRIPTION | ||
1201 | ^^^^^^^^^^^ | ||
1202 | |||
1203 | Writes to the data register. May have side-effects (a STROBE pulse, | ||
1204 | for instance). | ||
1205 | |||
1206 | SEE ALSO | ||
1207 | ^^^^^^^^ | ||
1208 | |||
1209 | read_data, read_status, write_control | ||
1210 | |||
1211 | |||
1212 | |||
1213 | port->ops->read_status - read the status register | ||
1214 | ------------------------------------------------- | ||
1215 | |||
1216 | SYNOPSIS | ||
1217 | ^^^^^^^^ | ||
1218 | |||
1219 | :: | ||
1220 | |||
1221 | #include <linux/parport.h> | ||
1222 | |||
1223 | struct parport_operations { | ||
1224 | ... | ||
1225 | unsigned char (*read_status) (struct parport *port); | ||
1226 | ... | ||
1227 | }; | ||
1228 | |||
1229 | DESCRIPTION | ||
1230 | ^^^^^^^^^^^ | ||
1231 | |||
1232 | Reads from the status register. This is a bitmask: | ||
1233 | |||
1234 | - PARPORT_STATUS_ERROR (printer fault, "nFault") | ||
1235 | - PARPORT_STATUS_SELECT (on-line, "Select") | ||
1236 | - PARPORT_STATUS_PAPEROUT (no paper, "PError") | ||
1237 | - PARPORT_STATUS_ACK (handshake, "nAck") | ||
1238 | - PARPORT_STATUS_BUSY (busy, "Busy") | ||
1239 | |||
1240 | There may be other bits set. | ||
1241 | |||
1242 | SEE ALSO | ||
1243 | ^^^^^^^^ | ||
1244 | |||
1245 | read_data, write_data, write_control | ||
1246 | |||
1247 | |||
1248 | |||
1249 | port->ops->read_control - read the control register | ||
1250 | --------------------------------------------------- | ||
1251 | |||
1252 | SYNOPSIS | ||
1253 | ^^^^^^^^ | ||
1254 | |||
1255 | :: | ||
1256 | |||
1257 | #include <linux/parport.h> | ||
1258 | |||
1259 | struct parport_operations { | ||
1260 | ... | ||
1261 | unsigned char (*read_control) (struct parport *port); | ||
1262 | ... | ||
1263 | }; | ||
1264 | |||
1265 | DESCRIPTION | ||
1266 | ^^^^^^^^^^^ | ||
1267 | |||
1268 | Returns the last value written to the control register (either from | ||
1269 | write_control or frob_control). No port access is performed. | ||
1270 | |||
1271 | SEE ALSO | ||
1272 | ^^^^^^^^ | ||
1273 | |||
1274 | read_data, write_data, read_status, write_control | ||
1275 | |||
1276 | |||
1277 | |||
1278 | port->ops->write_control - write the control register | ||
1279 | ----------------------------------------------------- | ||
1280 | |||
1281 | SYNOPSIS | ||
1282 | ^^^^^^^^ | ||
1283 | |||
1284 | :: | ||
1285 | |||
1286 | #include <linux/parport.h> | ||
1287 | |||
1288 | struct parport_operations { | ||
1289 | ... | ||
1290 | void (*write_control) (struct parport *port, unsigned char s); | ||
1291 | ... | ||
1292 | }; | ||
1293 | |||
1294 | DESCRIPTION | ||
1295 | ^^^^^^^^^^^ | ||
1296 | |||
1297 | Writes to the control register. This is a bitmask:: | ||
1298 | |||
1299 | _______ | ||
1300 | - PARPORT_CONTROL_STROBE (nStrobe) | ||
1301 | _______ | ||
1302 | - PARPORT_CONTROL_AUTOFD (nAutoFd) | ||
1303 | _____ | ||
1304 | - PARPORT_CONTROL_INIT (nInit) | ||
1305 | _________ | ||
1306 | - PARPORT_CONTROL_SELECT (nSelectIn) | ||
1307 | |||
1308 | SEE ALSO | ||
1309 | ^^^^^^^^ | ||
1310 | |||
1311 | read_data, write_data, read_status, frob_control | ||
1312 | |||
1313 | |||
1314 | |||
1315 | port->ops->frob_control - write control register bits | ||
1316 | ----------------------------------------------------- | ||
1317 | |||
1318 | SYNOPSIS | ||
1319 | ^^^^^^^^ | ||
1320 | |||
1321 | :: | ||
1322 | |||
1323 | #include <linux/parport.h> | ||
1324 | |||
1325 | struct parport_operations { | ||
1326 | ... | ||
1327 | unsigned char (*frob_control) (struct parport *port, | ||
1328 | unsigned char mask, | ||
1329 | unsigned char val); | ||
1330 | ... | ||
1331 | }; | ||
1332 | |||
1333 | DESCRIPTION | ||
1334 | ^^^^^^^^^^^ | ||
1335 | |||
1336 | This is equivalent to reading from the control register, masking out | ||
1337 | the bits in mask, exclusive-or'ing with the bits in val, and writing | ||
1338 | the result to the control register. | ||
1339 | |||
1340 | As some ports don't allow reads from the control port, a software copy | ||
1341 | of its contents is maintained, so frob_control is in fact only one | ||
1342 | port access. | ||
1343 | |||
1344 | SEE ALSO | ||
1345 | ^^^^^^^^ | ||
1346 | |||
1347 | read_data, write_data, read_status, write_control | ||
1348 | |||
1349 | |||
1350 | |||
1351 | port->ops->enable_irq - enable interrupt generation | ||
1352 | --------------------------------------------------- | ||
1353 | |||
1354 | SYNOPSIS | ||
1355 | ^^^^^^^^ | ||
1356 | |||
1357 | :: | ||
1358 | |||
1359 | #include <linux/parport.h> | ||
1360 | |||
1361 | struct parport_operations { | ||
1362 | ... | ||
1363 | void (*enable_irq) (struct parport *port); | ||
1364 | ... | ||
1365 | }; | ||
1366 | |||
1367 | DESCRIPTION | ||
1368 | ^^^^^^^^^^^ | ||
1369 | |||
1370 | The parallel port hardware is instructed to generate interrupts at | ||
1371 | appropriate moments, although those moments are | ||
1372 | architecture-specific. For the PC architecture, interrupts are | ||
1373 | commonly generated on the rising edge of nAck. | ||
1374 | |||
1375 | SEE ALSO | ||
1376 | ^^^^^^^^ | ||
1377 | |||
1378 | disable_irq | ||
1379 | |||
1380 | |||
1381 | |||
1382 | port->ops->disable_irq - disable interrupt generation | ||
1383 | ----------------------------------------------------- | ||
1384 | |||
1385 | SYNOPSIS | ||
1386 | ^^^^^^^^ | ||
1387 | |||
1388 | :: | ||
1389 | |||
1390 | #include <linux/parport.h> | ||
1391 | |||
1392 | struct parport_operations { | ||
1393 | ... | ||
1394 | void (*disable_irq) (struct parport *port); | ||
1395 | ... | ||
1396 | }; | ||
1397 | |||
1398 | DESCRIPTION | ||
1399 | ^^^^^^^^^^^ | ||
1400 | |||
1401 | The parallel port hardware is instructed not to generate interrupts. | ||
1402 | The interrupt itself is not masked. | ||
1403 | |||
1404 | SEE ALSO | ||
1405 | ^^^^^^^^ | ||
1406 | |||
1407 | enable_irq | ||
1408 | |||
1409 | |||
1410 | |||
1411 | port->ops->data_forward - enable data drivers | ||
1412 | --------------------------------------------- | ||
1413 | |||
1414 | SYNOPSIS | ||
1415 | ^^^^^^^^ | ||
1416 | |||
1417 | :: | ||
1418 | |||
1419 | #include <linux/parport.h> | ||
1420 | |||
1421 | struct parport_operations { | ||
1422 | ... | ||
1423 | void (*data_forward) (struct parport *port); | ||
1424 | ... | ||
1425 | }; | ||
1426 | |||
1427 | DESCRIPTION | ||
1428 | ^^^^^^^^^^^ | ||
1429 | |||
1430 | Enables the data line drivers, for 8-bit host-to-peripheral | ||
1431 | communications. | ||
1432 | |||
1433 | SEE ALSO | ||
1434 | ^^^^^^^^ | ||
1435 | |||
1436 | data_reverse | ||
1437 | |||
1438 | |||
1439 | |||
1440 | port->ops->data_reverse - tristate the buffer | ||
1441 | --------------------------------------------- | ||
1442 | |||
1443 | SYNOPSIS | ||
1444 | ^^^^^^^^ | ||
1445 | |||
1446 | :: | ||
1447 | |||
1448 | #include <linux/parport.h> | ||
1449 | |||
1450 | struct parport_operations { | ||
1451 | ... | ||
1452 | void (*data_reverse) (struct parport *port); | ||
1453 | ... | ||
1454 | }; | ||
1455 | |||
1456 | DESCRIPTION | ||
1457 | ^^^^^^^^^^^ | ||
1458 | |||
1459 | Places the data bus in a high impedance state, if port->modes has the | ||
1460 | PARPORT_MODE_TRISTATE bit set. | ||
1461 | |||
1462 | SEE ALSO | ||
1463 | ^^^^^^^^ | ||
1464 | |||
1465 | data_forward | ||
1466 | |||
1467 | |||
1468 | |||
1469 | port->ops->epp_write_data - write EPP data | ||
1470 | ------------------------------------------ | ||
1471 | |||
1472 | SYNOPSIS | ||
1473 | ^^^^^^^^ | ||
1474 | |||
1475 | :: | ||
1476 | |||
1477 | #include <linux/parport.h> | ||
1478 | |||
1479 | struct parport_operations { | ||
1480 | ... | ||
1481 | size_t (*epp_write_data) (struct parport *port, const void *buf, | ||
1482 | size_t len, int flags); | ||
1483 | ... | ||
1484 | }; | ||
1485 | |||
1486 | DESCRIPTION | ||
1487 | ^^^^^^^^^^^ | ||
1488 | |||
1489 | Writes data in EPP mode, and returns the number of bytes written. | ||
1490 | |||
1491 | The ``flags`` parameter may be one or more of the following, | ||
1492 | bitwise-or'ed together: | ||
1493 | |||
1494 | ======================= ================================================= | ||
1495 | PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and | ||
1496 | 32-bit registers. However, if a transfer | ||
1497 | times out, the return value may be unreliable. | ||
1498 | ======================= ================================================= | ||
1499 | |||
1500 | SEE ALSO | ||
1501 | ^^^^^^^^ | ||
1502 | |||
1503 | epp_read_data, epp_write_addr, epp_read_addr | ||
1504 | |||
1505 | |||
1506 | |||
1507 | port->ops->epp_read_data - read EPP data | ||
1508 | ---------------------------------------- | ||
1509 | |||
1510 | SYNOPSIS | ||
1511 | ^^^^^^^^ | ||
1512 | |||
1513 | :: | ||
1514 | |||
1515 | #include <linux/parport.h> | ||
1516 | |||
1517 | struct parport_operations { | ||
1518 | ... | ||
1519 | size_t (*epp_read_data) (struct parport *port, void *buf, | ||
1520 | size_t len, int flags); | ||
1521 | ... | ||
1522 | }; | ||
1523 | |||
1524 | DESCRIPTION | ||
1525 | ^^^^^^^^^^^ | ||
1526 | |||
1527 | Reads data in EPP mode, and returns the number of bytes read. | ||
1528 | |||
1529 | The ``flags`` parameter may be one or more of the following, | ||
1530 | bitwise-or'ed together: | ||
1531 | |||
1532 | ======================= ================================================= | ||
1533 | PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and | ||
1534 | 32-bit registers. However, if a transfer | ||
1535 | times out, the return value may be unreliable. | ||
1536 | ======================= ================================================= | ||
1537 | |||
1538 | SEE ALSO | ||
1539 | ^^^^^^^^ | ||
1540 | |||
1541 | epp_write_data, epp_write_addr, epp_read_addr | ||
1542 | |||
1543 | |||
1544 | |||
1545 | port->ops->epp_write_addr - write EPP address | ||
1546 | --------------------------------------------- | ||
1547 | |||
1548 | SYNOPSIS | ||
1549 | ^^^^^^^^ | ||
1550 | |||
1551 | :: | ||
1552 | |||
1553 | #include <linux/parport.h> | ||
1554 | |||
1555 | struct parport_operations { | ||
1556 | ... | ||
1557 | size_t (*epp_write_addr) (struct parport *port, | ||
1558 | const void *buf, size_t len, int flags); | ||
1559 | ... | ||
1560 | }; | ||
1561 | |||
1562 | DESCRIPTION | ||
1563 | ^^^^^^^^^^^ | ||
1564 | |||
1565 | Writes EPP addresses (8 bits each), and returns the number written. | ||
1566 | |||
1567 | The ``flags`` parameter may be one or more of the following, | ||
1568 | bitwise-or'ed together: | ||
1569 | |||
1570 | ======================= ================================================= | ||
1571 | PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and | ||
1572 | 32-bit registers. However, if a transfer | ||
1573 | times out, the return value may be unreliable. | ||
1574 | ======================= ================================================= | ||
1575 | |||
1576 | (Does PARPORT_EPP_FAST make sense for this function?) | ||
1577 | |||
1578 | SEE ALSO | ||
1579 | ^^^^^^^^ | ||
1580 | |||
1581 | epp_write_data, epp_read_data, epp_read_addr | ||
1582 | |||
1583 | |||
1584 | |||
1585 | port->ops->epp_read_addr - read EPP address | ||
1586 | ------------------------------------------- | ||
1587 | |||
1588 | SYNOPSIS | ||
1589 | ^^^^^^^^ | ||
1590 | |||
1591 | :: | ||
1592 | |||
1593 | #include <linux/parport.h> | ||
1594 | |||
1595 | struct parport_operations { | ||
1596 | ... | ||
1597 | size_t (*epp_read_addr) (struct parport *port, void *buf, | ||
1598 | size_t len, int flags); | ||
1599 | ... | ||
1600 | }; | ||
1601 | |||
1602 | DESCRIPTION | ||
1603 | ^^^^^^^^^^^ | ||
1604 | |||
1605 | Reads EPP addresses (8 bits each), and returns the number read. | ||
1606 | |||
1607 | The ``flags`` parameter may be one or more of the following, | ||
1608 | bitwise-or'ed together: | ||
1609 | |||
1610 | ======================= ================================================= | ||
1611 | PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and | ||
1612 | 32-bit registers. However, if a transfer | ||
1613 | times out, the return value may be unreliable. | ||
1614 | ======================= ================================================= | ||
1615 | |||
1616 | (Does PARPORT_EPP_FAST make sense for this function?) | ||
1617 | |||
1618 | SEE ALSO | ||
1619 | ^^^^^^^^ | ||
1620 | |||
1621 | epp_write_data, epp_read_data, epp_write_addr | ||
1622 | |||
1623 | |||
1624 | |||
1625 | port->ops->ecp_write_data - write a block of ECP data | ||
1626 | ----------------------------------------------------- | ||
1627 | |||
1628 | SYNOPSIS | ||
1629 | ^^^^^^^^ | ||
1630 | |||
1631 | :: | ||
1632 | |||
1633 | #include <linux/parport.h> | ||
1634 | |||
1635 | struct parport_operations { | ||
1636 | ... | ||
1637 | size_t (*ecp_write_data) (struct parport *port, | ||
1638 | const void *buf, size_t len, int flags); | ||
1639 | ... | ||
1640 | }; | ||
1641 | |||
1642 | DESCRIPTION | ||
1643 | ^^^^^^^^^^^ | ||
1644 | |||
1645 | Writes a block of ECP data. The ``flags`` parameter is ignored. | ||
1646 | |||
1647 | RETURN VALUE | ||
1648 | ^^^^^^^^^^^^ | ||
1649 | |||
1650 | The number of bytes written. | ||
1651 | |||
1652 | SEE ALSO | ||
1653 | ^^^^^^^^ | ||
1654 | |||
1655 | ecp_read_data, ecp_write_addr | ||
1656 | |||
1657 | |||
1658 | |||
1659 | port->ops->ecp_read_data - read a block of ECP data | ||
1660 | --------------------------------------------------- | ||
1661 | |||
1662 | SYNOPSIS | ||
1663 | ^^^^^^^^ | ||
1664 | |||
1665 | :: | ||
1666 | |||
1667 | #include <linux/parport.h> | ||
1668 | |||
1669 | struct parport_operations { | ||
1670 | ... | ||
1671 | size_t (*ecp_read_data) (struct parport *port, | ||
1672 | void *buf, size_t len, int flags); | ||
1673 | ... | ||
1674 | }; | ||
1675 | |||
1676 | DESCRIPTION | ||
1677 | ^^^^^^^^^^^ | ||
1678 | |||
1679 | Reads a block of ECP data. The ``flags`` parameter is ignored. | ||
1680 | |||
1681 | RETURN VALUE | ||
1682 | ^^^^^^^^^^^^ | ||
1683 | |||
1684 | The number of bytes read. NB. There may be more unread data in a | ||
1685 | FIFO. Is there a way of stunning the FIFO to prevent this? | ||
1686 | |||
1687 | SEE ALSO | ||
1688 | ^^^^^^^^ | ||
1689 | |||
1690 | ecp_write_block, ecp_write_addr | ||
1691 | |||
1692 | |||
1693 | |||
1694 | port->ops->ecp_write_addr - write a block of ECP addresses | ||
1695 | ---------------------------------------------------------- | ||
1696 | |||
1697 | SYNOPSIS | ||
1698 | ^^^^^^^^ | ||
1699 | |||
1700 | :: | ||
1701 | |||
1702 | #include <linux/parport.h> | ||
1703 | |||
1704 | struct parport_operations { | ||
1705 | ... | ||
1706 | size_t (*ecp_write_addr) (struct parport *port, | ||
1707 | const void *buf, size_t len, int flags); | ||
1708 | ... | ||
1709 | }; | ||
1710 | |||
1711 | DESCRIPTION | ||
1712 | ^^^^^^^^^^^ | ||
1713 | |||
1714 | Writes a block of ECP addresses. The ``flags`` parameter is ignored. | ||
1715 | |||
1716 | RETURN VALUE | ||
1717 | ^^^^^^^^^^^^ | ||
1718 | |||
1719 | The number of bytes written. | ||
1720 | |||
1721 | NOTES | ||
1722 | ^^^^^ | ||
1723 | |||
1724 | This may use a FIFO, and if so shall not return until the FIFO is empty. | ||
1725 | |||
1726 | SEE ALSO | ||
1727 | ^^^^^^^^ | ||
1728 | |||
1729 | ecp_read_data, ecp_write_data | ||
1730 | |||
1731 | |||
1732 | |||
1733 | port->ops->nibble_read_data - read a block of data in nibble mode | ||
1734 | ----------------------------------------------------------------- | ||
1735 | |||
1736 | SYNOPSIS | ||
1737 | ^^^^^^^^ | ||
1738 | |||
1739 | :: | ||
1740 | |||
1741 | #include <linux/parport.h> | ||
1742 | |||
1743 | struct parport_operations { | ||
1744 | ... | ||
1745 | size_t (*nibble_read_data) (struct parport *port, | ||
1746 | void *buf, size_t len, int flags); | ||
1747 | ... | ||
1748 | }; | ||
1749 | |||
1750 | DESCRIPTION | ||
1751 | ^^^^^^^^^^^ | ||
1752 | |||
1753 | Reads a block of data in nibble mode. The ``flags`` parameter is ignored. | ||
1754 | |||
1755 | RETURN VALUE | ||
1756 | ^^^^^^^^^^^^ | ||
1757 | |||
1758 | The number of whole bytes read. | ||
1759 | |||
1760 | SEE ALSO | ||
1761 | ^^^^^^^^ | ||
1762 | |||
1763 | byte_read_data, compat_write_data | ||
1764 | |||
1765 | |||
1766 | |||
1767 | port->ops->byte_read_data - read a block of data in byte mode | ||
1768 | ------------------------------------------------------------- | ||
1769 | |||
1770 | SYNOPSIS | ||
1771 | ^^^^^^^^ | ||
1772 | |||
1773 | :: | ||
1774 | |||
1775 | #include <linux/parport.h> | ||
1776 | |||
1777 | struct parport_operations { | ||
1778 | ... | ||
1779 | size_t (*byte_read_data) (struct parport *port, | ||
1780 | void *buf, size_t len, int flags); | ||
1781 | ... | ||
1782 | }; | ||
1783 | |||
1784 | DESCRIPTION | ||
1785 | ^^^^^^^^^^^ | ||
1786 | |||
1787 | Reads a block of data in byte mode. The ``flags`` parameter is ignored. | ||
1788 | |||
1789 | RETURN VALUE | ||
1790 | ^^^^^^^^^^^^ | ||
1791 | |||
1792 | The number of bytes read. | ||
1793 | |||
1794 | SEE ALSO | ||
1795 | ^^^^^^^^ | ||
1796 | |||
1797 | nibble_read_data, compat_write_data | ||
1798 | |||
1799 | |||
1800 | |||
1801 | port->ops->compat_write_data - write a block of data in compatibility mode | ||
1802 | -------------------------------------------------------------------------- | ||
1803 | |||
1804 | SYNOPSIS | ||
1805 | ^^^^^^^^ | ||
1806 | |||
1807 | :: | ||
1808 | |||
1809 | #include <linux/parport.h> | ||
1810 | |||
1811 | struct parport_operations { | ||
1812 | ... | ||
1813 | size_t (*compat_write_data) (struct parport *port, | ||
1814 | const void *buf, size_t len, int flags); | ||
1815 | ... | ||
1816 | }; | ||
1817 | |||
1818 | DESCRIPTION | ||
1819 | ^^^^^^^^^^^ | ||
1820 | |||
1821 | Writes a block of data in compatibility mode. The ``flags`` parameter | ||
1822 | is ignored. | ||
1823 | |||
1824 | RETURN VALUE | ||
1825 | ^^^^^^^^^^^^ | ||
1826 | |||
1827 | The number of bytes written. | ||
1828 | |||
1829 | SEE ALSO | ||
1830 | ^^^^^^^^ | ||
1831 | |||
1832 | nibble_read_data, byte_read_data | ||
diff --git a/Documentation/driver-api/phy/index.rst b/Documentation/driver-api/phy/index.rst new file mode 100644 index 000000000000..69ba1216de72 --- /dev/null +++ b/Documentation/driver-api/phy/index.rst | |||
@@ -0,0 +1,18 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ===================== | ||
4 | Generic PHY Framework | ||
5 | ===================== | ||
6 | |||
7 | .. toctree:: | ||
8 | |||
9 | phy | ||
10 | samsung-usb2 | ||
11 | |||
12 | .. only:: subproject and html | ||
13 | |||
14 | Indices | ||
15 | ======= | ||
16 | |||
17 | * :ref:`genindex` | ||
18 | |||
diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst new file mode 100644 index 000000000000..457c3e0f86d6 --- /dev/null +++ b/Documentation/driver-api/phy/phy.rst | |||
@@ -0,0 +1,197 @@ | |||
1 | ============= | ||
2 | PHY subsystem | ||
3 | ============= | ||
4 | |||
5 | :Author: Kishon Vijay Abraham I <kishon@ti.com> | ||
6 | |||
7 | This document explains the Generic PHY Framework along with the APIs provided, | ||
8 | and how-to-use. | ||
9 | |||
10 | Introduction | ||
11 | ============ | ||
12 | |||
13 | *PHY* is the abbreviation for physical layer. It is used to connect a device | ||
14 | to the physical medium e.g., the USB controller has a PHY to provide functions | ||
15 | such as serialization, de-serialization, encoding, decoding and is responsible | ||
16 | for obtaining the required data transmission rate. Note that some USB | ||
17 | controllers have PHY functionality embedded into it and others use an external | ||
18 | PHY. Other peripherals that use PHY include Wireless LAN, Ethernet, | ||
19 | SATA etc. | ||
20 | |||
21 | The intention of creating this framework is to bring the PHY drivers spread | ||
22 | all over the Linux kernel to drivers/phy to increase code re-use and for | ||
23 | better code maintainability. | ||
24 | |||
25 | This framework will be of use only to devices that use external PHY (PHY | ||
26 | functionality is not embedded within the controller). | ||
27 | |||
28 | Registering/Unregistering the PHY provider | ||
29 | ========================================== | ||
30 | |||
31 | PHY provider refers to an entity that implements one or more PHY instances. | ||
32 | For the simple case where the PHY provider implements only a single instance of | ||
33 | the PHY, the framework provides its own implementation of of_xlate in | ||
34 | of_phy_simple_xlate. If the PHY provider implements multiple instances, it | ||
35 | should provide its own implementation of of_xlate. of_xlate is used only for | ||
36 | dt boot case. | ||
37 | |||
38 | :: | ||
39 | |||
40 | #define of_phy_provider_register(dev, xlate) \ | ||
41 | __of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate)) | ||
42 | |||
43 | #define devm_of_phy_provider_register(dev, xlate) \ | ||
44 | __devm_of_phy_provider_register((dev), NULL, THIS_MODULE, | ||
45 | (xlate)) | ||
46 | |||
47 | of_phy_provider_register and devm_of_phy_provider_register macros can be used to | ||
48 | register the phy_provider and it takes device and of_xlate as | ||
49 | arguments. For the dt boot case, all PHY providers should use one of the above | ||
50 | 2 macros to register the PHY provider. | ||
51 | |||
52 | Often the device tree nodes associated with a PHY provider will contain a set | ||
53 | of children that each represent a single PHY. Some bindings may nest the child | ||
54 | nodes within extra levels for context and extensibility, in which case the low | ||
55 | level of_phy_provider_register_full() and devm_of_phy_provider_register_full() | ||
56 | macros can be used to override the node containing the children. | ||
57 | |||
58 | :: | ||
59 | |||
60 | #define of_phy_provider_register_full(dev, children, xlate) \ | ||
61 | __of_phy_provider_register(dev, children, THIS_MODULE, xlate) | ||
62 | |||
63 | #define devm_of_phy_provider_register_full(dev, children, xlate) \ | ||
64 | __devm_of_phy_provider_register_full(dev, children, | ||
65 | THIS_MODULE, xlate) | ||
66 | |||
67 | void devm_of_phy_provider_unregister(struct device *dev, | ||
68 | struct phy_provider *phy_provider); | ||
69 | void of_phy_provider_unregister(struct phy_provider *phy_provider); | ||
70 | |||
71 | devm_of_phy_provider_unregister and of_phy_provider_unregister can be used to | ||
72 | unregister the PHY. | ||
73 | |||
74 | Creating the PHY | ||
75 | ================ | ||
76 | |||
77 | The PHY driver should create the PHY in order for other peripheral controllers | ||
78 | to make use of it. The PHY framework provides 2 APIs to create the PHY. | ||
79 | |||
80 | :: | ||
81 | |||
82 | struct phy *phy_create(struct device *dev, struct device_node *node, | ||
83 | const struct phy_ops *ops); | ||
84 | struct phy *devm_phy_create(struct device *dev, | ||
85 | struct device_node *node, | ||
86 | const struct phy_ops *ops); | ||
87 | |||
88 | The PHY drivers can use one of the above 2 APIs to create the PHY by passing | ||
89 | the device pointer and phy ops. | ||
90 | phy_ops is a set of function pointers for performing PHY operations such as | ||
91 | init, exit, power_on and power_off. | ||
92 | |||
93 | Inorder to dereference the private data (in phy_ops), the phy provider driver | ||
94 | can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in | ||
95 | phy_ops to get back the private data. | ||
96 | |||
97 | 4. Getting a reference to the PHY | ||
98 | |||
99 | Before the controller can make use of the PHY, it has to get a reference to | ||
100 | it. This framework provides the following APIs to get a reference to the PHY. | ||
101 | |||
102 | :: | ||
103 | |||
104 | struct phy *phy_get(struct device *dev, const char *string); | ||
105 | struct phy *phy_optional_get(struct device *dev, const char *string); | ||
106 | struct phy *devm_phy_get(struct device *dev, const char *string); | ||
107 | struct phy *devm_phy_optional_get(struct device *dev, | ||
108 | const char *string); | ||
109 | struct phy *devm_of_phy_get_by_index(struct device *dev, | ||
110 | struct device_node *np, | ||
111 | int index); | ||
112 | |||
113 | phy_get, phy_optional_get, devm_phy_get and devm_phy_optional_get can | ||
114 | be used to get the PHY. In the case of dt boot, the string arguments | ||
115 | should contain the phy name as given in the dt data and in the case of | ||
116 | non-dt boot, it should contain the label of the PHY. The two | ||
117 | devm_phy_get associates the device with the PHY using devres on | ||
118 | successful PHY get. On driver detach, release function is invoked on | ||
119 | the devres data and devres data is freed. phy_optional_get and | ||
120 | devm_phy_optional_get should be used when the phy is optional. These | ||
121 | two functions will never return -ENODEV, but instead returns NULL when | ||
122 | the phy cannot be found.Some generic drivers, such as ehci, may use multiple | ||
123 | phys and for such drivers referencing phy(s) by name(s) does not make sense. In | ||
124 | this case, devm_of_phy_get_by_index can be used to get a phy reference based on | ||
125 | the index. | ||
126 | |||
127 | It should be noted that NULL is a valid phy reference. All phy | ||
128 | consumer calls on the NULL phy become NOPs. That is the release calls, | ||
129 | the phy_init() and phy_exit() calls, and phy_power_on() and | ||
130 | phy_power_off() calls are all NOP when applied to a NULL phy. The NULL | ||
131 | phy is useful in devices for handling optional phy devices. | ||
132 | |||
133 | Releasing a reference to the PHY | ||
134 | ================================ | ||
135 | |||
136 | When the controller no longer needs the PHY, it has to release the reference | ||
137 | to the PHY it has obtained using the APIs mentioned in the above section. The | ||
138 | PHY framework provides 2 APIs to release a reference to the PHY. | ||
139 | |||
140 | :: | ||
141 | |||
142 | void phy_put(struct phy *phy); | ||
143 | void devm_phy_put(struct device *dev, struct phy *phy); | ||
144 | |||
145 | Both these APIs are used to release a reference to the PHY and devm_phy_put | ||
146 | destroys the devres associated with this PHY. | ||
147 | |||
148 | Destroying the PHY | ||
149 | ================== | ||
150 | |||
151 | When the driver that created the PHY is unloaded, it should destroy the PHY it | ||
152 | created using one of the following 2 APIs:: | ||
153 | |||
154 | void phy_destroy(struct phy *phy); | ||
155 | void devm_phy_destroy(struct device *dev, struct phy *phy); | ||
156 | |||
157 | Both these APIs destroy the PHY and devm_phy_destroy destroys the devres | ||
158 | associated with this PHY. | ||
159 | |||
160 | PM Runtime | ||
161 | ========== | ||
162 | |||
163 | This subsystem is pm runtime enabled. So while creating the PHY, | ||
164 | pm_runtime_enable of the phy device created by this subsystem is called and | ||
165 | while destroying the PHY, pm_runtime_disable is called. Note that the phy | ||
166 | device created by this subsystem will be a child of the device that calls | ||
167 | phy_create (PHY provider device). | ||
168 | |||
169 | So pm_runtime_get_sync of the phy_device created by this subsystem will invoke | ||
170 | pm_runtime_get_sync of PHY provider device because of parent-child relationship. | ||
171 | It should also be noted that phy_power_on and phy_power_off performs | ||
172 | phy_pm_runtime_get_sync and phy_pm_runtime_put respectively. | ||
173 | There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync, | ||
174 | phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and | ||
175 | phy_pm_runtime_forbid for performing PM operations. | ||
176 | |||
177 | PHY Mappings | ||
178 | ============ | ||
179 | |||
180 | In order to get reference to a PHY without help from DeviceTree, the framework | ||
181 | offers lookups which can be compared to clkdev that allow clk structures to be | ||
182 | bound to devices. A lookup can be made be made during runtime when a handle to | ||
183 | the struct phy already exists. | ||
184 | |||
185 | The framework offers the following API for registering and unregistering the | ||
186 | lookups:: | ||
187 | |||
188 | int phy_create_lookup(struct phy *phy, const char *con_id, | ||
189 | const char *dev_id); | ||
190 | void phy_remove_lookup(struct phy *phy, const char *con_id, | ||
191 | const char *dev_id); | ||
192 | |||
193 | DeviceTree Binding | ||
194 | ================== | ||
195 | |||
196 | The documentation for PHY dt binding can be found @ | ||
197 | Documentation/devicetree/bindings/phy/phy-bindings.txt | ||
diff --git a/Documentation/driver-api/phy/samsung-usb2.rst b/Documentation/driver-api/phy/samsung-usb2.rst new file mode 100644 index 000000000000..c48c8b9797b9 --- /dev/null +++ b/Documentation/driver-api/phy/samsung-usb2.rst | |||
@@ -0,0 +1,137 @@ | |||
1 | ==================================== | ||
2 | Samsung USB 2.0 PHY adaptation layer | ||
3 | ==================================== | ||
4 | |||
5 | 1. Description | ||
6 | -------------- | ||
7 | |||
8 | The architecture of the USB 2.0 PHY module in Samsung SoCs is similar | ||
9 | among many SoCs. In spite of the similarities it proved difficult to | ||
10 | create a one driver that would fit all these PHY controllers. Often | ||
11 | the differences were minor and were found in particular bits of the | ||
12 | registers of the PHY. In some rare cases the order of register writes or | ||
13 | the PHY powering up process had to be altered. This adaptation layer is | ||
14 | a compromise between having separate drivers and having a single driver | ||
15 | with added support for many special cases. | ||
16 | |||
17 | 2. Files description | ||
18 | -------------------- | ||
19 | |||
20 | - phy-samsung-usb2.c | ||
21 | This is the main file of the adaptation layer. This file contains | ||
22 | the probe function and provides two callbacks to the Generic PHY | ||
23 | Framework. This two callbacks are used to power on and power off the | ||
24 | phy. They carry out the common work that has to be done on all version | ||
25 | of the PHY module. Depending on which SoC was chosen they execute SoC | ||
26 | specific callbacks. The specific SoC version is selected by choosing | ||
27 | the appropriate compatible string. In addition, this file contains | ||
28 | struct of_device_id definitions for particular SoCs. | ||
29 | |||
30 | - phy-samsung-usb2.h | ||
31 | This is the include file. It declares the structures used by this | ||
32 | driver. In addition it should contain extern declarations for | ||
33 | structures that describe particular SoCs. | ||
34 | |||
35 | 3. Supporting SoCs | ||
36 | ------------------ | ||
37 | |||
38 | To support a new SoC a new file should be added to the drivers/phy | ||
39 | directory. Each SoC's configuration is stored in an instance of the | ||
40 | struct samsung_usb2_phy_config:: | ||
41 | |||
42 | struct samsung_usb2_phy_config { | ||
43 | const struct samsung_usb2_common_phy *phys; | ||
44 | int (*rate_to_clk)(unsigned long, u32 *); | ||
45 | unsigned int num_phys; | ||
46 | bool has_mode_switch; | ||
47 | }; | ||
48 | |||
49 | The num_phys is the number of phys handled by the driver. `*phys` is an | ||
50 | array that contains the configuration for each phy. The has_mode_switch | ||
51 | property is a boolean flag that determines whether the SoC has USB host | ||
52 | and device on a single pair of pins. If so, a special register has to | ||
53 | be modified to change the internal routing of these pins between a USB | ||
54 | device or host module. | ||
55 | |||
56 | For example the configuration for Exynos 4210 is following:: | ||
57 | |||
58 | const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = { | ||
59 | .has_mode_switch = 0, | ||
60 | .num_phys = EXYNOS4210_NUM_PHYS, | ||
61 | .phys = exynos4210_phys, | ||
62 | .rate_to_clk = exynos4210_rate_to_clk, | ||
63 | } | ||
64 | |||
65 | - `int (*rate_to_clk)(unsigned long, u32 *)` | ||
66 | |||
67 | The rate_to_clk callback is to convert the rate of the clock | ||
68 | used as the reference clock for the PHY module to the value | ||
69 | that should be written in the hardware register. | ||
70 | |||
71 | The exynos4210_phys configuration array is as follows:: | ||
72 | |||
73 | static const struct samsung_usb2_common_phy exynos4210_phys[] = { | ||
74 | { | ||
75 | .label = "device", | ||
76 | .id = EXYNOS4210_DEVICE, | ||
77 | .power_on = exynos4210_power_on, | ||
78 | .power_off = exynos4210_power_off, | ||
79 | }, | ||
80 | { | ||
81 | .label = "host", | ||
82 | .id = EXYNOS4210_HOST, | ||
83 | .power_on = exynos4210_power_on, | ||
84 | .power_off = exynos4210_power_off, | ||
85 | }, | ||
86 | { | ||
87 | .label = "hsic0", | ||
88 | .id = EXYNOS4210_HSIC0, | ||
89 | .power_on = exynos4210_power_on, | ||
90 | .power_off = exynos4210_power_off, | ||
91 | }, | ||
92 | { | ||
93 | .label = "hsic1", | ||
94 | .id = EXYNOS4210_HSIC1, | ||
95 | .power_on = exynos4210_power_on, | ||
96 | .power_off = exynos4210_power_off, | ||
97 | }, | ||
98 | {}, | ||
99 | }; | ||
100 | |||
101 | - `int (*power_on)(struct samsung_usb2_phy_instance *);` | ||
102 | `int (*power_off)(struct samsung_usb2_phy_instance *);` | ||
103 | |||
104 | These two callbacks are used to power on and power off the phy | ||
105 | by modifying appropriate registers. | ||
106 | |||
107 | Final change to the driver is adding appropriate compatible value to the | ||
108 | phy-samsung-usb2.c file. In case of Exynos 4210 the following lines were | ||
109 | added to the struct of_device_id samsung_usb2_phy_of_match[] array:: | ||
110 | |||
111 | #ifdef CONFIG_PHY_EXYNOS4210_USB2 | ||
112 | { | ||
113 | .compatible = "samsung,exynos4210-usb2-phy", | ||
114 | .data = &exynos4210_usb2_phy_config, | ||
115 | }, | ||
116 | #endif | ||
117 | |||
118 | To add further flexibility to the driver the Kconfig file enables to | ||
119 | include support for selected SoCs in the compiled driver. The Kconfig | ||
120 | entry for Exynos 4210 is following:: | ||
121 | |||
122 | config PHY_EXYNOS4210_USB2 | ||
123 | bool "Support for Exynos 4210" | ||
124 | depends on PHY_SAMSUNG_USB2 | ||
125 | depends on CPU_EXYNOS4210 | ||
126 | help | ||
127 | Enable USB PHY support for Exynos 4210. This option requires that | ||
128 | Samsung USB 2.0 PHY driver is enabled and means that support for this | ||
129 | particular SoC is compiled in the driver. In case of Exynos 4210 four | ||
130 | phys are available - device, host, HSCI0 and HSCI1. | ||
131 | |||
132 | The newly created file that supports the new SoC has to be also added to the | ||
133 | Makefile. In case of Exynos 4210 the added line is following:: | ||
134 | |||
135 | obj-$(CONFIG_PHY_EXYNOS4210_USB2) += phy-exynos4210-usb2.o | ||
136 | |||
137 | After completing these steps the support for the new SoC should be ready. | ||
diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst index 1456d2c32ebd..2d6b99766ee8 100644 --- a/Documentation/driver-api/pps.rst +++ b/Documentation/driver-api/pps.rst | |||
@@ -1,4 +1,4 @@ | |||
1 | :orphan: | 1 | .. SPDX-License-Identifier: GPL-2.0 |
2 | 2 | ||
3 | ====================== | 3 | ====================== |
4 | PPS - Pulse Per Second | 4 | PPS - Pulse Per Second |
diff --git a/Documentation/driver-api/pti_intel_mid.rst b/Documentation/driver-api/pti_intel_mid.rst new file mode 100644 index 000000000000..20f1cff42d5f --- /dev/null +++ b/Documentation/driver-api/pti_intel_mid.rst | |||
@@ -0,0 +1,106 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ============= | ||
4 | Intel MID PTI | ||
5 | ============= | ||
6 | |||
7 | The Intel MID PTI project is HW implemented in Intel Atom | ||
8 | system-on-a-chip designs based on the Parallel Trace | ||
9 | Interface for MIPI P1149.7 cJTAG standard. The kernel solution | ||
10 | for this platform involves the following files:: | ||
11 | |||
12 | ./include/linux/pti.h | ||
13 | ./drivers/.../n_tracesink.h | ||
14 | ./drivers/.../n_tracerouter.c | ||
15 | ./drivers/.../n_tracesink.c | ||
16 | ./drivers/.../pti.c | ||
17 | |||
18 | pti.c is the driver that enables various debugging features | ||
19 | popular on platforms from certain mobile manufacturers. | ||
20 | n_tracerouter.c and n_tracesink.c allow extra system information to | ||
21 | be collected and routed to the pti driver, such as trace | ||
22 | debugging data from a modem. Although n_tracerouter | ||
23 | and n_tracesink are a part of the complete PTI solution, | ||
24 | these two line disciplines can work separately from | ||
25 | pti.c and route any data stream from one /dev/tty node | ||
26 | to another /dev/tty node via kernel-space. This provides | ||
27 | a stable, reliable connection that will not break unless | ||
28 | the user-space application shuts down (plus avoids | ||
29 | kernel->user->kernel context switch overheads of routing | ||
30 | data). | ||
31 | |||
32 | An example debugging usage for this driver system: | ||
33 | |||
34 | * Hook /dev/ttyPTI0 to syslogd. Opening this port will also start | ||
35 | a console device to further capture debugging messages to PTI. | ||
36 | * Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW. | ||
37 | This is where n_tracerouter and n_tracesink are used. | ||
38 | * Hook /dev/pti to a user-level debugging application for writing | ||
39 | to PTI HW. | ||
40 | * `Use mipi_` Kernel Driver API in other device drivers for | ||
41 | debugging to PTI by first requesting a PTI write address via | ||
42 | mipi_request_masterchannel(1). | ||
43 | |||
44 | Below is example pseudo-code on how a 'privileged' application | ||
45 | can hook up n_tracerouter and n_tracesink to any tty on | ||
46 | a system. 'Privileged' means the application has enough | ||
47 | privileges to successfully manipulate the ldisc drivers | ||
48 | but is not just blindly executing as 'root'. Keep in mind | ||
49 | the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter | ||
50 | and n_tracesink line discpline drivers but is a generic | ||
51 | operation for a program to use a line discpline driver | ||
52 | on a tty port other than the default n_tty:: | ||
53 | |||
54 | /////////// To hook up n_tracerouter and n_tracesink ///////// | ||
55 | |||
56 | // Note that n_tracerouter depends on n_tracesink. | ||
57 | #include <errno.h> | ||
58 | #define ONE_TTY "/dev/ttyOne" | ||
59 | #define TWO_TTY "/dev/ttyTwo" | ||
60 | |||
61 | // needed global to hand onto ldisc connection | ||
62 | static int g_fd_source = -1; | ||
63 | static int g_fd_sink = -1; | ||
64 | |||
65 | // these two vars used to grab LDISC values from loaded ldisc drivers | ||
66 | // in OS. Look at /proc/tty/ldiscs to get the right numbers from | ||
67 | // the ldiscs loaded in the system. | ||
68 | int source_ldisc_num, sink_ldisc_num = -1; | ||
69 | int retval; | ||
70 | |||
71 | g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W | ||
72 | g_fd_sink = open(TWO_TTY, O_RDWR); // must be R/W | ||
73 | |||
74 | if (g_fd_source <= 0) || (g_fd_sink <= 0) { | ||
75 | // doubt you'll want to use these exact error lines of code | ||
76 | printf("Error on open(). errno: %d\n",errno); | ||
77 | return errno; | ||
78 | } | ||
79 | |||
80 | retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num); | ||
81 | if (retval < 0) { | ||
82 | printf("Error on ioctl(). errno: %d\n", errno); | ||
83 | return errno; | ||
84 | } | ||
85 | |||
86 | retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num); | ||
87 | if (retval < 0) { | ||
88 | printf("Error on ioctl(). errno: %d\n", errno); | ||
89 | return errno; | ||
90 | } | ||
91 | |||
92 | /////////// To disconnect n_tracerouter and n_tracesink //////// | ||
93 | |||
94 | // First make sure data through the ldiscs has stopped. | ||
95 | |||
96 | // Second, disconnect ldiscs. This provides a | ||
97 | // little cleaner shutdown on tty stack. | ||
98 | sink_ldisc_num = 0; | ||
99 | source_ldisc_num = 0; | ||
100 | ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num); | ||
101 | ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num); | ||
102 | |||
103 | // Three, program closes connection, and cleanup: | ||
104 | close(g_fd_uart); | ||
105 | close(g_fd_gadget); | ||
106 | g_fd_uart = g_fd_gadget = NULL; | ||
diff --git a/Documentation/driver-api/ptp.rst b/Documentation/driver-api/ptp.rst index b6e65d66d37a..a15192e32347 100644 --- a/Documentation/driver-api/ptp.rst +++ b/Documentation/driver-api/ptp.rst | |||
@@ -1,4 +1,4 @@ | |||
1 | :orphan: | 1 | .. SPDX-License-Identifier: GPL-2.0 |
2 | 2 | ||
3 | =========================================== | 3 | =========================================== |
4 | PTP hardware clock infrastructure for Linux | 4 | PTP hardware clock infrastructure for Linux |
diff --git a/Documentation/driver-api/pwm.rst b/Documentation/driver-api/pwm.rst new file mode 100644 index 000000000000..ab62f1bb0366 --- /dev/null +++ b/Documentation/driver-api/pwm.rst | |||
@@ -0,0 +1,165 @@ | |||
1 | ====================================== | ||
2 | Pulse Width Modulation (PWM) interface | ||
3 | ====================================== | ||
4 | |||
5 | This provides an overview about the Linux PWM interface | ||
6 | |||
7 | PWMs are commonly used for controlling LEDs, fans or vibrators in | ||
8 | cell phones. PWMs with a fixed purpose have no need implementing | ||
9 | the Linux PWM API (although they could). However, PWMs are often | ||
10 | found as discrete devices on SoCs which have no fixed purpose. It's | ||
11 | up to the board designer to connect them to LEDs or fans. To provide | ||
12 | this kind of flexibility the generic PWM API exists. | ||
13 | |||
14 | Identifying PWMs | ||
15 | ---------------- | ||
16 | |||
17 | Users of the legacy PWM API use unique IDs to refer to PWM devices. | ||
18 | |||
19 | Instead of referring to a PWM device via its unique ID, board setup code | ||
20 | should instead register a static mapping that can be used to match PWM | ||
21 | consumers to providers, as given in the following example:: | ||
22 | |||
23 | static struct pwm_lookup board_pwm_lookup[] = { | ||
24 | PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL, | ||
25 | 50000, PWM_POLARITY_NORMAL), | ||
26 | }; | ||
27 | |||
28 | static void __init board_init(void) | ||
29 | { | ||
30 | ... | ||
31 | pwm_add_table(board_pwm_lookup, ARRAY_SIZE(board_pwm_lookup)); | ||
32 | ... | ||
33 | } | ||
34 | |||
35 | Using PWMs | ||
36 | ---------- | ||
37 | |||
38 | Legacy users can request a PWM device using pwm_request() and free it | ||
39 | after usage with pwm_free(). | ||
40 | |||
41 | New users should use the pwm_get() function and pass to it the consumer | ||
42 | device or a consumer name. pwm_put() is used to free the PWM device. Managed | ||
43 | variants of these functions, devm_pwm_get() and devm_pwm_put(), also exist. | ||
44 | |||
45 | After being requested, a PWM has to be configured using:: | ||
46 | |||
47 | int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state); | ||
48 | |||
49 | This API controls both the PWM period/duty_cycle config and the | ||
50 | enable/disable state. | ||
51 | |||
52 | The pwm_config(), pwm_enable() and pwm_disable() functions are just wrappers | ||
53 | around pwm_apply_state() and should not be used if the user wants to change | ||
54 | several parameter at once. For example, if you see pwm_config() and | ||
55 | pwm_{enable,disable}() calls in the same function, this probably means you | ||
56 | should switch to pwm_apply_state(). | ||
57 | |||
58 | The PWM user API also allows one to query the PWM state with pwm_get_state(). | ||
59 | |||
60 | In addition to the PWM state, the PWM API also exposes PWM arguments, which | ||
61 | are the reference PWM config one should use on this PWM. | ||
62 | PWM arguments are usually platform-specific and allows the PWM user to only | ||
63 | care about dutycycle relatively to the full period (like, duty = 50% of the | ||
64 | period). struct pwm_args contains 2 fields (period and polarity) and should | ||
65 | be used to set the initial PWM config (usually done in the probe function | ||
66 | of the PWM user). PWM arguments are retrieved with pwm_get_args(). | ||
67 | |||
68 | All consumers should really be reconfiguring the PWM upon resume as | ||
69 | appropriate. This is the only way to ensure that everything is resumed in | ||
70 | the proper order. | ||
71 | |||
72 | Using PWMs with the sysfs interface | ||
73 | ----------------------------------- | ||
74 | |||
75 | If CONFIG_SYSFS is enabled in your kernel configuration a simple sysfs | ||
76 | interface is provided to use the PWMs from userspace. It is exposed at | ||
77 | /sys/class/pwm/. Each probed PWM controller/chip will be exported as | ||
78 | pwmchipN, where N is the base of the PWM chip. Inside the directory you | ||
79 | will find: | ||
80 | |||
81 | npwm | ||
82 | The number of PWM channels this chip supports (read-only). | ||
83 | |||
84 | export | ||
85 | Exports a PWM channel for use with sysfs (write-only). | ||
86 | |||
87 | unexport | ||
88 | Unexports a PWM channel from sysfs (write-only). | ||
89 | |||
90 | The PWM channels are numbered using a per-chip index from 0 to npwm-1. | ||
91 | |||
92 | When a PWM channel is exported a pwmX directory will be created in the | ||
93 | pwmchipN directory it is associated with, where X is the number of the | ||
94 | channel that was exported. The following properties will then be available: | ||
95 | |||
96 | period | ||
97 | The total period of the PWM signal (read/write). | ||
98 | Value is in nanoseconds and is the sum of the active and inactive | ||
99 | time of the PWM. | ||
100 | |||
101 | duty_cycle | ||
102 | The active time of the PWM signal (read/write). | ||
103 | Value is in nanoseconds and must be less than the period. | ||
104 | |||
105 | polarity | ||
106 | Changes the polarity of the PWM signal (read/write). | ||
107 | Writes to this property only work if the PWM chip supports changing | ||
108 | the polarity. The polarity can only be changed if the PWM is not | ||
109 | enabled. Value is the string "normal" or "inversed". | ||
110 | |||
111 | enable | ||
112 | Enable/disable the PWM signal (read/write). | ||
113 | |||
114 | - 0 - disabled | ||
115 | - 1 - enabled | ||
116 | |||
117 | Implementing a PWM driver | ||
118 | ------------------------- | ||
119 | |||
120 | Currently there are two ways to implement pwm drivers. Traditionally | ||
121 | there only has been the barebone API meaning that each driver has | ||
122 | to implement the pwm_*() functions itself. This means that it's impossible | ||
123 | to have multiple PWM drivers in the system. For this reason it's mandatory | ||
124 | for new drivers to use the generic PWM framework. | ||
125 | |||
126 | A new PWM controller/chip can be added using pwmchip_add() and removed | ||
127 | again with pwmchip_remove(). pwmchip_add() takes a filled in struct | ||
128 | pwm_chip as argument which provides a description of the PWM chip, the | ||
129 | number of PWM devices provided by the chip and the chip-specific | ||
130 | implementation of the supported PWM operations to the framework. | ||
131 | |||
132 | When implementing polarity support in a PWM driver, make sure to respect the | ||
133 | signal conventions in the PWM framework. By definition, normal polarity | ||
134 | characterizes a signal starts high for the duration of the duty cycle and | ||
135 | goes low for the remainder of the period. Conversely, a signal with inversed | ||
136 | polarity starts low for the duration of the duty cycle and goes high for the | ||
137 | remainder of the period. | ||
138 | |||
139 | Drivers are encouraged to implement ->apply() instead of the legacy | ||
140 | ->enable(), ->disable() and ->config() methods. Doing that should provide | ||
141 | atomicity in the PWM config workflow, which is required when the PWM controls | ||
142 | a critical device (like a regulator). | ||
143 | |||
144 | The implementation of ->get_state() (a method used to retrieve initial PWM | ||
145 | state) is also encouraged for the same reason: letting the PWM user know | ||
146 | about the current PWM state would allow him to avoid glitches. | ||
147 | |||
148 | Drivers should not implement any power management. In other words, | ||
149 | consumers should implement it as described in the "Using PWMs" section. | ||
150 | |||
151 | Locking | ||
152 | ------- | ||
153 | |||
154 | The PWM core list manipulations are protected by a mutex, so pwm_request() | ||
155 | and pwm_free() may not be called from an atomic context. Currently the | ||
156 | PWM core does not enforce any locking to pwm_enable(), pwm_disable() and | ||
157 | pwm_config(), so the calling context is currently driver specific. This | ||
158 | is an issue derived from the former barebone API and should be fixed soon. | ||
159 | |||
160 | Helpers | ||
161 | ------- | ||
162 | |||
163 | Currently a PWM can only be configured with period_ns and duty_ns. For several | ||
164 | use cases freq_hz and duty_percent might be better. Instead of calculating | ||
165 | this in your driver please consider adding appropriate helpers to the framework. | ||
diff --git a/Documentation/driver-api/rapidio.rst b/Documentation/driver-api/rapidio.rst deleted file mode 100644 index 71ff658ab78e..000000000000 --- a/Documentation/driver-api/rapidio.rst +++ /dev/null | |||
@@ -1,107 +0,0 @@ | |||
1 | ======================= | ||
2 | RapidIO Subsystem Guide | ||
3 | ======================= | ||
4 | |||
5 | :Author: Matt Porter | ||
6 | |||
7 | Introduction | ||
8 | ============ | ||
9 | |||
10 | RapidIO is a high speed switched fabric interconnect with features aimed | ||
11 | at the embedded market. RapidIO provides support for memory-mapped I/O | ||
12 | as well as message-based transactions over the switched fabric network. | ||
13 | RapidIO has a standardized discovery mechanism not unlike the PCI bus | ||
14 | standard that allows simple detection of devices in a network. | ||
15 | |||
16 | This documentation is provided for developers intending to support | ||
17 | RapidIO on new architectures, write new drivers, or to understand the | ||
18 | subsystem internals. | ||
19 | |||
20 | Known Bugs and Limitations | ||
21 | ========================== | ||
22 | |||
23 | Bugs | ||
24 | ---- | ||
25 | |||
26 | None. ;) | ||
27 | |||
28 | Limitations | ||
29 | ----------- | ||
30 | |||
31 | 1. Access/management of RapidIO memory regions is not supported | ||
32 | |||
33 | 2. Multiple host enumeration is not supported | ||
34 | |||
35 | RapidIO driver interface | ||
36 | ======================== | ||
37 | |||
38 | Drivers are provided a set of calls in order to interface with the | ||
39 | subsystem to gather info on devices, request/map memory region | ||
40 | resources, and manage mailboxes/doorbells. | ||
41 | |||
42 | Functions | ||
43 | --------- | ||
44 | |||
45 | .. kernel-doc:: include/linux/rio_drv.h | ||
46 | :internal: | ||
47 | |||
48 | .. kernel-doc:: drivers/rapidio/rio-driver.c | ||
49 | :export: | ||
50 | |||
51 | .. kernel-doc:: drivers/rapidio/rio.c | ||
52 | :export: | ||
53 | |||
54 | Internals | ||
55 | ========= | ||
56 | |||
57 | This chapter contains the autogenerated documentation of the RapidIO | ||
58 | subsystem. | ||
59 | |||
60 | Structures | ||
61 | ---------- | ||
62 | |||
63 | .. kernel-doc:: include/linux/rio.h | ||
64 | :internal: | ||
65 | |||
66 | Enumeration and Discovery | ||
67 | ------------------------- | ||
68 | |||
69 | .. kernel-doc:: drivers/rapidio/rio-scan.c | ||
70 | :internal: | ||
71 | |||
72 | Driver functionality | ||
73 | -------------------- | ||
74 | |||
75 | .. kernel-doc:: drivers/rapidio/rio.c | ||
76 | :internal: | ||
77 | |||
78 | .. kernel-doc:: drivers/rapidio/rio-access.c | ||
79 | :internal: | ||
80 | |||
81 | Device model support | ||
82 | -------------------- | ||
83 | |||
84 | .. kernel-doc:: drivers/rapidio/rio-driver.c | ||
85 | :internal: | ||
86 | |||
87 | PPC32 support | ||
88 | ------------- | ||
89 | |||
90 | .. kernel-doc:: arch/powerpc/sysdev/fsl_rio.c | ||
91 | :internal: | ||
92 | |||
93 | Credits | ||
94 | ======= | ||
95 | |||
96 | The following people have contributed to the RapidIO subsystem directly | ||
97 | or indirectly: | ||
98 | |||
99 | 1. Matt Porter\ mporter@kernel.crashing.org | ||
100 | |||
101 | 2. Randy Vinson\ rvinson@mvista.com | ||
102 | |||
103 | 3. Dan Malek\ dan@embeddedalley.com | ||
104 | |||
105 | The following people have contributed to this document: | ||
106 | |||
107 | 1. Matt Porter\ mporter@kernel.crashing.org | ||
diff --git a/Documentation/driver-api/rapidio/index.rst b/Documentation/driver-api/rapidio/index.rst new file mode 100644 index 000000000000..a41b4242d16f --- /dev/null +++ b/Documentation/driver-api/rapidio/index.rst | |||
@@ -0,0 +1,15 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | =========================== | ||
4 | The Linux RapidIO Subsystem | ||
5 | =========================== | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | rapidio | ||
11 | sysfs | ||
12 | |||
13 | tsi721 | ||
14 | mport_cdev | ||
15 | rio_cm | ||
diff --git a/Documentation/driver-api/rapidio/mport_cdev.rst b/Documentation/driver-api/rapidio/mport_cdev.rst new file mode 100644 index 000000000000..df77a7f7be7d --- /dev/null +++ b/Documentation/driver-api/rapidio/mport_cdev.rst | |||
@@ -0,0 +1,110 @@ | |||
1 | ================================================================== | ||
2 | RapidIO subsystem mport character device driver (rio_mport_cdev.c) | ||
3 | ================================================================== | ||
4 | |||
5 | 1. Overview | ||
6 | =========== | ||
7 | |||
8 | This device driver is the result of collaboration within the RapidIO.org | ||
9 | Software Task Group (STG) between Texas Instruments, Freescale, | ||
10 | Prodrive Technologies, Nokia Networks, BAE and IDT. Additional input was | ||
11 | received from other members of RapidIO.org. The objective was to create a | ||
12 | character mode driver interface which exposes the capabilities of RapidIO | ||
13 | devices directly to applications, in a manner that allows the numerous and | ||
14 | varied RapidIO implementations to interoperate. | ||
15 | |||
16 | This driver (MPORT_CDEV) provides access to basic RapidIO subsystem operations | ||
17 | for user-space applications. Most of RapidIO operations are supported through | ||
18 | 'ioctl' system calls. | ||
19 | |||
20 | When loaded this device driver creates filesystem nodes named rio_mportX in /dev | ||
21 | directory for each registered RapidIO mport device. 'X' in the node name matches | ||
22 | to unique port ID assigned to each local mport device. | ||
23 | |||
24 | Using available set of ioctl commands user-space applications can perform | ||
25 | following RapidIO bus and subsystem operations: | ||
26 | |||
27 | - Reads and writes from/to configuration registers of mport devices | ||
28 | (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL) | ||
29 | - Reads and writes from/to configuration registers of remote RapidIO devices. | ||
30 | This operations are defined as RapidIO Maintenance reads/writes in RIO spec. | ||
31 | (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE) | ||
32 | - Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET) | ||
33 | - Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET) | ||
34 | - Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET) | ||
35 | - Query capabilities and RapidIO link configuration of mport devices | ||
36 | (RIO_MPORT_GET_PROPERTIES) | ||
37 | - Enable/Disable reporting of RapidIO doorbell events to user-space applications | ||
38 | (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE) | ||
39 | - Enable/Disable reporting of RIO port-write events to user-space applications | ||
40 | (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE) | ||
41 | - Query/Control type of events reported through this driver: doorbells, | ||
42 | port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK) | ||
43 | - Configure/Map mport's outbound requests window(s) for specific size, | ||
44 | RapidIO destination ID, hopcount and request type | ||
45 | (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND) | ||
46 | - Configure/Map mport's inbound requests window(s) for specific size, | ||
47 | RapidIO base address and local memory base address | ||
48 | (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND) | ||
49 | - Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers | ||
50 | to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA) | ||
51 | - Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER). | ||
52 | Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data | ||
53 | transfer modes. | ||
54 | - Check/Wait for completion of asynchronous DMA data transfer | ||
55 | (RIO_WAIT_FOR_ASYNC) | ||
56 | - Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL). | ||
57 | This allows implementation of various RapidIO fabric enumeration algorithms | ||
58 | as user-space applications while using remaining functionality provided by | ||
59 | kernel RapidIO subsystem. | ||
60 | |||
61 | 2. Hardware Compatibility | ||
62 | ========================= | ||
63 | |||
64 | This device driver uses standard interfaces defined by kernel RapidIO subsystem | ||
65 | and therefore it can be used with any mport device driver registered by RapidIO | ||
66 | subsystem with limitations set by available mport implementation. | ||
67 | |||
68 | At this moment the most common limitation is availability of RapidIO-specific | ||
69 | DMA engine framework for specific mport device. Users should verify available | ||
70 | functionality of their platform when planning to use this driver: | ||
71 | |||
72 | - IDT Tsi721 PCIe-to-RapidIO bridge device and its mport device driver are fully | ||
73 | compatible with this driver. | ||
74 | - Freescale SoCs 'fsl_rio' mport driver does not have implementation for RapidIO | ||
75 | specific DMA engine support and therefore DMA data transfers mport_cdev driver | ||
76 | are not available. | ||
77 | |||
78 | 3. Module parameters | ||
79 | ==================== | ||
80 | |||
81 | - 'dma_timeout' | ||
82 | - DMA transfer completion timeout (in msec, default value 3000). | ||
83 | This parameter set a maximum completion wait time for SYNC mode DMA | ||
84 | transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests. | ||
85 | |||
86 | - 'dbg_level' | ||
87 | - This parameter allows to control amount of debug information | ||
88 | generated by this device driver. This parameter is formed by set of | ||
89 | bit masks that correspond to the specific functional blocks. | ||
90 | For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c' | ||
91 | This parameter can be changed dynamically. | ||
92 | Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level. | ||
93 | |||
94 | 4. Known problems | ||
95 | ================= | ||
96 | |||
97 | None. | ||
98 | |||
99 | 5. User-space Applications and API | ||
100 | ================================== | ||
101 | |||
102 | API library and applications that use this device driver are available from | ||
103 | RapidIO.org. | ||
104 | |||
105 | 6. TODO List | ||
106 | ============ | ||
107 | |||
108 | - Add support for sending/receiving "raw" RapidIO messaging packets. | ||
109 | - Add memory mapped DMA data transfers as an option when RapidIO-specific DMA | ||
110 | is not available. | ||
diff --git a/Documentation/driver-api/rapidio/rapidio.rst b/Documentation/driver-api/rapidio/rapidio.rst new file mode 100644 index 000000000000..fb8942d3ba85 --- /dev/null +++ b/Documentation/driver-api/rapidio/rapidio.rst | |||
@@ -0,0 +1,362 @@ | |||
1 | ============ | ||
2 | Introduction | ||
3 | ============ | ||
4 | |||
5 | The RapidIO standard is a packet-based fabric interconnect standard designed for | ||
6 | use in embedded systems. Development of the RapidIO standard is directed by the | ||
7 | RapidIO Trade Association (RTA). The current version of the RapidIO specification | ||
8 | is publicly available for download from the RTA web-site [1]. | ||
9 | |||
10 | This document describes the basics of the Linux RapidIO subsystem and provides | ||
11 | information on its major components. | ||
12 | |||
13 | 1 Overview | ||
14 | ========== | ||
15 | |||
16 | Because the RapidIO subsystem follows the Linux device model it is integrated | ||
17 | into the kernel similarly to other buses by defining RapidIO-specific device and | ||
18 | bus types and registering them within the device model. | ||
19 | |||
20 | The Linux RapidIO subsystem is architecture independent and therefore defines | ||
21 | architecture-specific interfaces that provide support for common RapidIO | ||
22 | subsystem operations. | ||
23 | |||
24 | 2. Core Components | ||
25 | ================== | ||
26 | |||
27 | A typical RapidIO network is a combination of endpoints and switches. | ||
28 | Each of these components is represented in the subsystem by an associated data | ||
29 | structure. The core logical components of the RapidIO subsystem are defined | ||
30 | in include/linux/rio.h file. | ||
31 | |||
32 | 2.1 Master Port | ||
33 | --------------- | ||
34 | |||
35 | A master port (or mport) is a RapidIO interface controller that is local to the | ||
36 | processor executing the Linux code. A master port generates and receives RapidIO | ||
37 | packets (transactions). In the RapidIO subsystem each master port is represented | ||
38 | by a rio_mport data structure. This structure contains master port specific | ||
39 | resources such as mailboxes and doorbells. The rio_mport also includes a unique | ||
40 | host device ID that is valid when a master port is configured as an enumerating | ||
41 | host. | ||
42 | |||
43 | RapidIO master ports are serviced by subsystem specific mport device drivers | ||
44 | that provide functionality defined for this subsystem. To provide a hardware | ||
45 | independent interface for RapidIO subsystem operations, rio_mport structure | ||
46 | includes rio_ops data structure which contains pointers to hardware specific | ||
47 | implementations of RapidIO functions. | ||
48 | |||
49 | 2.2 Device | ||
50 | ---------- | ||
51 | |||
52 | A RapidIO device is any endpoint (other than mport) or switch in the network. | ||
53 | All devices are presented in the RapidIO subsystem by corresponding rio_dev data | ||
54 | structure. Devices form one global device list and per-network device lists | ||
55 | (depending on number of available mports and networks). | ||
56 | |||
57 | 2.3 Switch | ||
58 | ---------- | ||
59 | |||
60 | A RapidIO switch is a special class of device that routes packets between its | ||
61 | ports towards their final destination. The packet destination port within a | ||
62 | switch is defined by an internal routing table. A switch is presented in the | ||
63 | RapidIO subsystem by rio_dev data structure expanded by additional rio_switch | ||
64 | data structure, which contains switch specific information such as copy of the | ||
65 | routing table and pointers to switch specific functions. | ||
66 | |||
67 | The RapidIO subsystem defines the format and initialization method for subsystem | ||
68 | specific switch drivers that are designed to provide hardware-specific | ||
69 | implementation of common switch management routines. | ||
70 | |||
71 | 2.4 Network | ||
72 | ----------- | ||
73 | |||
74 | A RapidIO network is a combination of interconnected endpoint and switch devices. | ||
75 | Each RapidIO network known to the system is represented by corresponding rio_net | ||
76 | data structure. This structure includes lists of all devices and local master | ||
77 | ports that form the same network. It also contains a pointer to the default | ||
78 | master port that is used to communicate with devices within the network. | ||
79 | |||
80 | 2.5 Device Drivers | ||
81 | ------------------ | ||
82 | |||
83 | RapidIO device-specific drivers follow Linux Kernel Driver Model and are | ||
84 | intended to support specific RapidIO devices attached to the RapidIO network. | ||
85 | |||
86 | 2.6 Subsystem Interfaces | ||
87 | ------------------------ | ||
88 | |||
89 | RapidIO interconnect specification defines features that may be used to provide | ||
90 | one or more common service layers for all participating RapidIO devices. These | ||
91 | common services may act separately from device-specific drivers or be used by | ||
92 | device-specific drivers. Example of such service provider is the RIONET driver | ||
93 | which implements Ethernet-over-RapidIO interface. Because only one driver can be | ||
94 | registered for a device, all common RapidIO services have to be registered as | ||
95 | subsystem interfaces. This allows to have multiple common services attached to | ||
96 | the same device without blocking attachment of a device-specific driver. | ||
97 | |||
98 | 3. Subsystem Initialization | ||
99 | =========================== | ||
100 | |||
101 | In order to initialize the RapidIO subsystem, a platform must initialize and | ||
102 | register at least one master port within the RapidIO network. To register mport | ||
103 | within the subsystem controller driver's initialization code calls function | ||
104 | rio_register_mport() for each available master port. | ||
105 | |||
106 | After all active master ports are registered with a RapidIO subsystem, | ||
107 | an enumeration and/or discovery routine may be called automatically or | ||
108 | by user-space command. | ||
109 | |||
110 | RapidIO subsystem can be configured to be built as a statically linked or | ||
111 | modular component of the kernel (see details below). | ||
112 | |||
113 | 4. Enumeration and Discovery | ||
114 | ============================ | ||
115 | |||
116 | 4.1 Overview | ||
117 | ------------ | ||
118 | |||
119 | RapidIO subsystem configuration options allow users to build enumeration and | ||
120 | discovery methods as statically linked components or loadable modules. | ||
121 | An enumeration/discovery method implementation and available input parameters | ||
122 | define how any given method can be attached to available RapidIO mports: | ||
123 | simply to all available mports OR individually to the specified mport device. | ||
124 | |||
125 | Depending on selected enumeration/discovery build configuration, there are | ||
126 | several methods to initiate an enumeration and/or discovery process: | ||
127 | |||
128 | (a) Statically linked enumeration and discovery process can be started | ||
129 | automatically during kernel initialization time using corresponding module | ||
130 | parameters. This was the original method used since introduction of RapidIO | ||
131 | subsystem. Now this method relies on enumerator module parameter which is | ||
132 | 'rio-scan.scan' for existing basic enumeration/discovery method. | ||
133 | When automatic start of enumeration/discovery is used a user has to ensure | ||
134 | that all discovering endpoints are started before the enumerating endpoint | ||
135 | and are waiting for enumeration to be completed. | ||
136 | Configuration option CONFIG_RAPIDIO_DISC_TIMEOUT defines time that discovering | ||
137 | endpoint waits for enumeration to be completed. If the specified timeout | ||
138 | expires the discovery process is terminated without obtaining RapidIO network | ||
139 | information. NOTE: a timed out discovery process may be restarted later using | ||
140 | a user-space command as it is described below (if the given endpoint was | ||
141 | enumerated successfully). | ||
142 | |||
143 | (b) Statically linked enumeration and discovery process can be started by | ||
144 | a command from user space. This initiation method provides more flexibility | ||
145 | for a system startup compared to the option (a) above. After all participating | ||
146 | endpoints have been successfully booted, an enumeration process shall be | ||
147 | started first by issuing a user-space command, after an enumeration is | ||
148 | completed a discovery process can be started on all remaining endpoints. | ||
149 | |||
150 | (c) Modular enumeration and discovery process can be started by a command from | ||
151 | user space. After an enumeration/discovery module is loaded, a network scan | ||
152 | process can be started by issuing a user-space command. | ||
153 | Similar to the option (b) above, an enumerator has to be started first. | ||
154 | |||
155 | (d) Modular enumeration and discovery process can be started by a module | ||
156 | initialization routine. In this case an enumerating module shall be loaded | ||
157 | first. | ||
158 | |||
159 | When a network scan process is started it calls an enumeration or discovery | ||
160 | routine depending on the configured role of a master port: host or agent. | ||
161 | |||
162 | Enumeration is performed by a master port if it is configured as a host port by | ||
163 | assigning a host destination ID greater than or equal to zero. The host | ||
164 | destination ID can be assigned to a master port using various methods depending | ||
165 | on RapidIO subsystem build configuration: | ||
166 | |||
167 | (a) For a statically linked RapidIO subsystem core use command line parameter | ||
168 | "rapidio.hdid=" with a list of destination ID assignments in order of mport | ||
169 | device registration. For example, in a system with two RapidIO controllers | ||
170 | the command line parameter "rapidio.hdid=-1,7" will result in assignment of | ||
171 | the host destination ID=7 to the second RapidIO controller, while the first | ||
172 | one will be assigned destination ID=-1. | ||
173 | |||
174 | (b) If the RapidIO subsystem core is built as a loadable module, in addition | ||
175 | to the method shown above, the host destination ID(s) can be specified using | ||
176 | traditional methods of passing module parameter "hdid=" during its loading: | ||
177 | |||
178 | - from command line: "modprobe rapidio hdid=-1,7", or | ||
179 | - from modprobe configuration file using configuration command "options", | ||
180 | like in this example: "options rapidio hdid=-1,7". An example of modprobe | ||
181 | configuration file is provided in the section below. | ||
182 | |||
183 | NOTES: | ||
184 | (i) if "hdid=" parameter is omitted all available mport will be assigned | ||
185 | destination ID = -1; | ||
186 | |||
187 | (ii) the "hdid=" parameter in systems with multiple mports can have | ||
188 | destination ID assignments omitted from the end of list (default = -1). | ||
189 | |||
190 | If the host device ID for a specific master port is set to -1, the discovery | ||
191 | process will be performed for it. | ||
192 | |||
193 | The enumeration and discovery routines use RapidIO maintenance transactions | ||
194 | to access the configuration space of devices. | ||
195 | |||
196 | NOTE: If RapidIO switch-specific device drivers are built as loadable modules | ||
197 | they must be loaded before enumeration/discovery process starts. | ||
198 | This requirement is cased by the fact that enumeration/discovery methods invoke | ||
199 | vendor-specific callbacks on early stages. | ||
200 | |||
201 | 4.2 Automatic Start of Enumeration and Discovery | ||
202 | ------------------------------------------------ | ||
203 | |||
204 | Automatic enumeration/discovery start method is applicable only to built-in | ||
205 | enumeration/discovery RapidIO configuration selection. To enable automatic | ||
206 | enumeration/discovery start by existing basic enumerator method set use boot | ||
207 | command line parameter "rio-scan.scan=1". | ||
208 | |||
209 | This configuration requires synchronized start of all RapidIO endpoints that | ||
210 | form a network which will be enumerated/discovered. Discovering endpoints have | ||
211 | to be started before an enumeration starts to ensure that all RapidIO | ||
212 | controllers have been initialized and are ready to be discovered. Configuration | ||
213 | parameter CONFIG_RAPIDIO_DISC_TIMEOUT defines time (in seconds) which | ||
214 | a discovering endpoint will wait for enumeration to be completed. | ||
215 | |||
216 | When automatic enumeration/discovery start is selected, basic method's | ||
217 | initialization routine calls rio_init_mports() to perform enumeration or | ||
218 | discovery for all known mport devices. | ||
219 | |||
220 | Depending on RapidIO network size and configuration this automatic | ||
221 | enumeration/discovery start method may be difficult to use due to the | ||
222 | requirement for synchronized start of all endpoints. | ||
223 | |||
224 | 4.3 User-space Start of Enumeration and Discovery | ||
225 | ------------------------------------------------- | ||
226 | |||
227 | User-space start of enumeration and discovery can be used with built-in and | ||
228 | modular build configurations. For user-space controlled start RapidIO subsystem | ||
229 | creates the sysfs write-only attribute file '/sys/bus/rapidio/scan'. To initiate | ||
230 | an enumeration or discovery process on specific mport device, a user needs to | ||
231 | write mport_ID (not RapidIO destination ID) into that file. The mport_ID is a | ||
232 | sequential number (0 ... RIO_MAX_MPORTS) assigned during mport device | ||
233 | registration. For example for machine with single RapidIO controller, mport_ID | ||
234 | for that controller always will be 0. | ||
235 | |||
236 | To initiate RapidIO enumeration/discovery on all available mports a user may | ||
237 | write '-1' (or RIO_MPORT_ANY) into the scan attribute file. | ||
238 | |||
239 | 4.4 Basic Enumeration Method | ||
240 | ---------------------------- | ||
241 | |||
242 | This is an original enumeration/discovery method which is available since | ||
243 | first release of RapidIO subsystem code. The enumeration process is | ||
244 | implemented according to the enumeration algorithm outlined in the RapidIO | ||
245 | Interconnect Specification: Annex I [1]. | ||
246 | |||
247 | This method can be configured as statically linked or loadable module. | ||
248 | The method's single parameter "scan" allows to trigger the enumeration/discovery | ||
249 | process from module initialization routine. | ||
250 | |||
251 | This enumeration/discovery method can be started only once and does not support | ||
252 | unloading if it is built as a module. | ||
253 | |||
254 | The enumeration process traverses the network using a recursive depth-first | ||
255 | algorithm. When a new device is found, the enumerator takes ownership of that | ||
256 | device by writing into the Host Device ID Lock CSR. It does this to ensure that | ||
257 | the enumerator has exclusive right to enumerate the device. If device ownership | ||
258 | is successfully acquired, the enumerator allocates a new rio_dev structure and | ||
259 | initializes it according to device capabilities. | ||
260 | |||
261 | If the device is an endpoint, a unique device ID is assigned to it and its value | ||
262 | is written into the device's Base Device ID CSR. | ||
263 | |||
264 | If the device is a switch, the enumerator allocates an additional rio_switch | ||
265 | structure to store switch specific information. Then the switch's vendor ID and | ||
266 | device ID are queried against a table of known RapidIO switches. Each switch | ||
267 | table entry contains a pointer to a switch-specific initialization routine that | ||
268 | initializes pointers to the rest of switch specific operations, and performs | ||
269 | hardware initialization if necessary. A RapidIO switch does not have a unique | ||
270 | device ID; it relies on hopcount and routing for device ID of an attached | ||
271 | endpoint if access to its configuration registers is required. If a switch (or | ||
272 | chain of switches) does not have any endpoint (except enumerator) attached to | ||
273 | it, a fake device ID will be assigned to configure a route to that switch. | ||
274 | In the case of a chain of switches without endpoint, one fake device ID is used | ||
275 | to configure a route through the entire chain and switches are differentiated by | ||
276 | their hopcount value. | ||
277 | |||
278 | For both endpoints and switches the enumerator writes a unique component tag | ||
279 | into device's Component Tag CSR. That unique value is used by the error | ||
280 | management notification mechanism to identify a device that is reporting an | ||
281 | error management event. | ||
282 | |||
283 | Enumeration beyond a switch is completed by iterating over each active egress | ||
284 | port of that switch. For each active link, a route to a default device ID | ||
285 | (0xFF for 8-bit systems and 0xFFFF for 16-bit systems) is temporarily written | ||
286 | into the routing table. The algorithm recurs by calling itself with hopcount + 1 | ||
287 | and the default device ID in order to access the device on the active port. | ||
288 | |||
289 | After the host has completed enumeration of the entire network it releases | ||
290 | devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint | ||
291 | in the system, it sets the Discovered bit in the Port General Control CSR | ||
292 | to indicate that enumeration is completed and agents are allowed to execute | ||
293 | passive discovery of the network. | ||
294 | |||
295 | The discovery process is performed by agents and is similar to the enumeration | ||
296 | process that is described above. However, the discovery process is performed | ||
297 | without changes to the existing routing because agents only gather information | ||
298 | about RapidIO network structure and are building an internal map of discovered | ||
299 | devices. This way each Linux-based component of the RapidIO subsystem has | ||
300 | a complete view of the network. The discovery process can be performed | ||
301 | simultaneously by several agents. After initializing its RapidIO master port | ||
302 | each agent waits for enumeration completion by the host for the configured wait | ||
303 | time period. If this wait time period expires before enumeration is completed, | ||
304 | an agent skips RapidIO discovery and continues with remaining kernel | ||
305 | initialization. | ||
306 | |||
307 | 4.5 Adding New Enumeration/Discovery Method | ||
308 | ------------------------------------------- | ||
309 | |||
310 | RapidIO subsystem code organization allows addition of new enumeration/discovery | ||
311 | methods as new configuration options without significant impact to the core | ||
312 | RapidIO code. | ||
313 | |||
314 | A new enumeration/discovery method has to be attached to one or more mport | ||
315 | devices before an enumeration/discovery process can be started. Normally, | ||
316 | method's module initialization routine calls rio_register_scan() to attach | ||
317 | an enumerator to a specified mport device (or devices). The basic enumerator | ||
318 | implementation demonstrates this process. | ||
319 | |||
320 | 4.6 Using Loadable RapidIO Switch Drivers | ||
321 | ----------------------------------------- | ||
322 | |||
323 | In the case when RapidIO switch drivers are built as loadable modules a user | ||
324 | must ensure that they are loaded before the enumeration/discovery starts. | ||
325 | This process can be automated by specifying pre- or post- dependencies in the | ||
326 | RapidIO-specific modprobe configuration file as shown in the example below. | ||
327 | |||
328 | File /etc/modprobe.d/rapidio.conf:: | ||
329 | |||
330 | # Configure RapidIO subsystem modules | ||
331 | |||
332 | # Set enumerator host destination ID (overrides kernel command line option) | ||
333 | options rapidio hdid=-1,2 | ||
334 | |||
335 | # Load RapidIO switch drivers immediately after rapidio core module was loaded | ||
336 | softdep rapidio post: idt_gen2 idtcps tsi57x | ||
337 | |||
338 | # OR : | ||
339 | |||
340 | # Load RapidIO switch drivers just before rio-scan enumerator module is loaded | ||
341 | softdep rio-scan pre: idt_gen2 idtcps tsi57x | ||
342 | |||
343 | -------------------------- | ||
344 | |||
345 | NOTE: | ||
346 | In the example above, one of "softdep" commands must be removed or | ||
347 | commented out to keep required module loading sequence. | ||
348 | |||
349 | 5. References | ||
350 | ============= | ||
351 | |||
352 | [1] RapidIO Trade Association. RapidIO Interconnect Specifications. | ||
353 | http://www.rapidio.org. | ||
354 | |||
355 | [2] Rapidio TA. Technology Comparisons. | ||
356 | http://www.rapidio.org/education/technology_comparisons/ | ||
357 | |||
358 | [3] RapidIO support for Linux. | ||
359 | http://lwn.net/Articles/139118/ | ||
360 | |||
361 | [4] Matt Porter. RapidIO for Linux. Ottawa Linux Symposium, 2005 | ||
362 | http://www.kernel.org/doc/ols/2005/ols2005v2-pages-43-56.pdf | ||
diff --git a/Documentation/driver-api/rapidio/rio_cm.rst b/Documentation/driver-api/rapidio/rio_cm.rst new file mode 100644 index 000000000000..5294430a7a74 --- /dev/null +++ b/Documentation/driver-api/rapidio/rio_cm.rst | |||
@@ -0,0 +1,135 @@ | |||
1 | ========================================================================== | ||
2 | RapidIO subsystem Channelized Messaging character device driver (rio_cm.c) | ||
3 | ========================================================================== | ||
4 | |||
5 | |||
6 | 1. Overview | ||
7 | =========== | ||
8 | |||
9 | This device driver is the result of collaboration within the RapidIO.org | ||
10 | Software Task Group (STG) between Texas Instruments, Prodrive Technologies, | ||
11 | Nokia Networks, BAE and IDT. Additional input was received from other members | ||
12 | of RapidIO.org. | ||
13 | |||
14 | The objective was to create a character mode driver interface which exposes | ||
15 | messaging capabilities of RapidIO endpoint devices (mports) directly | ||
16 | to applications, in a manner that allows the numerous and varied RapidIO | ||
17 | implementations to interoperate. | ||
18 | |||
19 | This driver (RIO_CM) provides to user-space applications shared access to | ||
20 | RapidIO mailbox messaging resources. | ||
21 | |||
22 | RapidIO specification (Part 2) defines that endpoint devices may have up to four | ||
23 | messaging mailboxes in case of multi-packet message (up to 4KB) and | ||
24 | up to 64 mailboxes if single-packet messages (up to 256 B) are used. In addition | ||
25 | to protocol definition limitations, a particular hardware implementation can | ||
26 | have reduced number of messaging mailboxes. RapidIO aware applications must | ||
27 | therefore share the messaging resources of a RapidIO endpoint. | ||
28 | |||
29 | Main purpose of this device driver is to provide RapidIO mailbox messaging | ||
30 | capability to large number of user-space processes by introducing socket-like | ||
31 | operations using a single messaging mailbox. This allows applications to | ||
32 | use the limited RapidIO messaging hardware resources efficiently. | ||
33 | |||
34 | Most of device driver's operations are supported through 'ioctl' system calls. | ||
35 | |||
36 | When loaded this device driver creates a single file system node named rio_cm | ||
37 | in /dev directory common for all registered RapidIO mport devices. | ||
38 | |||
39 | Following ioctl commands are available to user-space applications: | ||
40 | |||
41 | - RIO_CM_MPORT_GET_LIST: | ||
42 | Returns to caller list of local mport devices that | ||
43 | support messaging operations (number of entries up to RIO_MAX_MPORTS). | ||
44 | Each list entry is combination of mport's index in the system and RapidIO | ||
45 | destination ID assigned to the port. | ||
46 | - RIO_CM_EP_GET_LIST_SIZE: | ||
47 | Returns number of messaging capable remote endpoints | ||
48 | in a RapidIO network associated with the specified mport device. | ||
49 | - RIO_CM_EP_GET_LIST: | ||
50 | Returns list of RapidIO destination IDs for messaging | ||
51 | capable remote endpoints (peers) available in a RapidIO network associated | ||
52 | with the specified mport device. | ||
53 | - RIO_CM_CHAN_CREATE: | ||
54 | Creates RapidIO message exchange channel data structure | ||
55 | with channel ID assigned automatically or as requested by a caller. | ||
56 | - RIO_CM_CHAN_BIND: | ||
57 | Binds the specified channel data structure to the specified | ||
58 | mport device. | ||
59 | - RIO_CM_CHAN_LISTEN: | ||
60 | Enables listening for connection requests on the specified | ||
61 | channel. | ||
62 | - RIO_CM_CHAN_ACCEPT: | ||
63 | Accepts a connection request from peer on the specified | ||
64 | channel. If wait timeout for this request is specified by a caller it is | ||
65 | a blocking call. If timeout set to 0 this is non-blocking call - ioctl | ||
66 | handler checks for a pending connection request and if one is not available | ||
67 | exits with -EGAIN error status immediately. | ||
68 | - RIO_CM_CHAN_CONNECT: | ||
69 | Sends a connection request to a remote peer/channel. | ||
70 | - RIO_CM_CHAN_SEND: | ||
71 | Sends a data message through the specified channel. | ||
72 | The handler for this request assumes that message buffer specified by | ||
73 | a caller includes the reserved space for a packet header required by | ||
74 | this driver. | ||
75 | - RIO_CM_CHAN_RECEIVE: | ||
76 | Receives a data message through a connected channel. | ||
77 | If the channel does not have an incoming message ready to return this ioctl | ||
78 | handler will wait for new message until timeout specified by a caller | ||
79 | expires. If timeout value is set to 0, ioctl handler uses a default value | ||
80 | defined by MAX_SCHEDULE_TIMEOUT. | ||
81 | - RIO_CM_CHAN_CLOSE: | ||
82 | Closes a specified channel and frees associated buffers. | ||
83 | If the specified channel is in the CONNECTED state, sends close notification | ||
84 | to the remote peer. | ||
85 | |||
86 | The ioctl command codes and corresponding data structures intended for use by | ||
87 | user-space applications are defined in 'include/uapi/linux/rio_cm_cdev.h'. | ||
88 | |||
89 | 2. Hardware Compatibility | ||
90 | ========================= | ||
91 | |||
92 | This device driver uses standard interfaces defined by kernel RapidIO subsystem | ||
93 | and therefore it can be used with any mport device driver registered by RapidIO | ||
94 | subsystem with limitations set by available mport HW implementation of messaging | ||
95 | mailboxes. | ||
96 | |||
97 | 3. Module parameters | ||
98 | ==================== | ||
99 | |||
100 | - 'dbg_level' | ||
101 | - This parameter allows to control amount of debug information | ||
102 | generated by this device driver. This parameter is formed by set of | ||
103 | bit masks that correspond to the specific functional block. | ||
104 | For mask definitions see 'drivers/rapidio/devices/rio_cm.c' | ||
105 | This parameter can be changed dynamically. | ||
106 | Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level. | ||
107 | |||
108 | - 'cmbox' | ||
109 | - Number of RapidIO mailbox to use (default value is 1). | ||
110 | This parameter allows to set messaging mailbox number that will be used | ||
111 | within entire RapidIO network. It can be used when default mailbox is | ||
112 | used by other device drivers or is not supported by some nodes in the | ||
113 | RapidIO network. | ||
114 | |||
115 | - 'chstart' | ||
116 | - Start channel number for dynamic assignment. Default value - 256. | ||
117 | Allows to exclude channel numbers below this parameter from dynamic | ||
118 | allocation to avoid conflicts with software components that use | ||
119 | reserved predefined channel numbers. | ||
120 | |||
121 | 4. Known problems | ||
122 | ================= | ||
123 | |||
124 | None. | ||
125 | |||
126 | 5. User-space Applications and API Library | ||
127 | ========================================== | ||
128 | |||
129 | Messaging API library and applications that use this device driver are available | ||
130 | from RapidIO.org. | ||
131 | |||
132 | 6. TODO List | ||
133 | ============ | ||
134 | |||
135 | - Add support for system notification messages (reserved channel 0). | ||
diff --git a/Documentation/driver-api/rapidio/sysfs.rst b/Documentation/driver-api/rapidio/sysfs.rst new file mode 100644 index 000000000000..540f72683496 --- /dev/null +++ b/Documentation/driver-api/rapidio/sysfs.rst | |||
@@ -0,0 +1,7 @@ | |||
1 | ============= | ||
2 | Sysfs entries | ||
3 | ============= | ||
4 | |||
5 | The RapidIO sysfs files have moved to: | ||
6 | Documentation/ABI/testing/sysfs-bus-rapidio and | ||
7 | Documentation/ABI/testing/sysfs-class-rapidio | ||
diff --git a/Documentation/driver-api/rapidio/tsi721.rst b/Documentation/driver-api/rapidio/tsi721.rst new file mode 100644 index 000000000000..42aea438cd20 --- /dev/null +++ b/Documentation/driver-api/rapidio/tsi721.rst | |||
@@ -0,0 +1,112 @@ | |||
1 | ========================================================================= | ||
2 | RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge. | ||
3 | ========================================================================= | ||
4 | |||
5 | 1. Overview | ||
6 | =========== | ||
7 | |||
8 | This driver implements all currently defined RapidIO mport callback functions. | ||
9 | It supports maintenance read and write operations, inbound and outbound RapidIO | ||
10 | doorbells, inbound maintenance port-writes and RapidIO messaging. | ||
11 | |||
12 | To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA | ||
13 | channels. This mechanism provides access to larger range of hop counts and | ||
14 | destination IDs without need for changes in outbound window translation. | ||
15 | |||
16 | RapidIO messaging support uses dedicated messaging channels for each mailbox. | ||
17 | For inbound messages this driver uses destination ID matching to forward messages | ||
18 | into the corresponding message queue. Messaging callbacks are implemented to be | ||
19 | fully compatible with RIONET driver (Ethernet over RapidIO messaging services). | ||
20 | |||
21 | 1. Module parameters: | ||
22 | |||
23 | - 'dbg_level' | ||
24 | - This parameter allows to control amount of debug information | ||
25 | generated by this device driver. This parameter is formed by set of | ||
26 | This parameter can be changed bit masks that correspond to the specific | ||
27 | functional block. | ||
28 | For mask definitions see 'drivers/rapidio/devices/tsi721.h' | ||
29 | This parameter can be changed dynamically. | ||
30 | Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level. | ||
31 | |||
32 | - 'dma_desc_per_channel' | ||
33 | - This parameter defines number of hardware buffer | ||
34 | descriptors allocated for each registered Tsi721 DMA channel. | ||
35 | Its default value is 128. | ||
36 | |||
37 | - 'dma_txqueue_sz' | ||
38 | - DMA transactions queue size. Defines number of pending | ||
39 | transaction requests that can be accepted by each DMA channel. | ||
40 | Default value is 16. | ||
41 | |||
42 | - 'dma_sel' | ||
43 | - DMA channel selection mask. Bitmask that defines which hardware | ||
44 | DMA channels (0 ... 6) will be registered with DmaEngine core. | ||
45 | If bit is set to 1, the corresponding DMA channel will be registered. | ||
46 | DMA channels not selected by this mask will not be used by this device | ||
47 | driver. Default value is 0x7f (use all channels). | ||
48 | |||
49 | - 'pcie_mrrs' | ||
50 | - override value for PCIe Maximum Read Request Size (MRRS). | ||
51 | This parameter gives an ability to override MRRS value set during PCIe | ||
52 | configuration process. Tsi721 supports read request sizes up to 4096B. | ||
53 | Value for this parameter must be set as defined by PCIe specification: | ||
54 | 0 = 128B, 1 = 256B, 2 = 512B, 3 = 1024B, 4 = 2048B and 5 = 4096B. | ||
55 | Default value is '-1' (= keep platform setting). | ||
56 | |||
57 | - 'mbox_sel' | ||
58 | - RIO messaging MBOX selection mask. This is a bitmask that defines | ||
59 | messaging MBOXes are managed by this device driver. Mask bits 0 - 3 | ||
60 | correspond to MBOX0 - MBOX3. MBOX is under driver's control if the | ||
61 | corresponding bit is set to '1'. Default value is 0x0f (= all). | ||
62 | |||
63 | 2. Known problems | ||
64 | ================= | ||
65 | |||
66 | None. | ||
67 | |||
68 | 3. DMA Engine Support | ||
69 | ===================== | ||
70 | |||
71 | Tsi721 mport driver supports DMA data transfers between local system memory and | ||
72 | remote RapidIO devices. This functionality is implemented according to SLAVE | ||
73 | mode API defined by common Linux kernel DMA Engine framework. | ||
74 | |||
75 | Depending on system requirements RapidIO DMA operations can be included/excluded | ||
76 | by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven | ||
77 | out of eight available BDMA channels to support DMA data transfers. | ||
78 | One BDMA channel is reserved for generation of maintenance read/write requests. | ||
79 | |||
80 | If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included, | ||
81 | this driver will accept DMA-specific module parameter: | ||
82 | |||
83 | "dma_desc_per_channel" | ||
84 | - defines number of hardware buffer descriptors used by | ||
85 | each BDMA channel of Tsi721 (by default - 128). | ||
86 | |||
87 | 4. Version History | ||
88 | |||
89 | ===== ==================================================================== | ||
90 | 1.1.0 DMA operations re-worked to support data scatter/gather lists larger | ||
91 | than hardware buffer descriptors ring. | ||
92 | 1.0.0 Initial driver release. | ||
93 | ===== ==================================================================== | ||
94 | |||
95 | 5. License | ||
96 | =========== | ||
97 | |||
98 | Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved. | ||
99 | |||
100 | This program is free software; you can redistribute it and/or modify it | ||
101 | under the terms of the GNU General Public License as published by the Free | ||
102 | Software Foundation; either version 2 of the License, or (at your option) | ||
103 | any later version. | ||
104 | |||
105 | This program is distributed in the hope that it will be useful, but WITHOUT | ||
106 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
107 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
108 | more details. | ||
109 | |||
110 | You should have received a copy of the GNU General Public License along with | ||
111 | this program; if not, write to the Free Software Foundation, Inc., | ||
112 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
diff --git a/Documentation/driver-api/rfkill.rst b/Documentation/driver-api/rfkill.rst new file mode 100644 index 000000000000..7d3684e81df6 --- /dev/null +++ b/Documentation/driver-api/rfkill.rst | |||
@@ -0,0 +1,132 @@ | |||
1 | =============================== | ||
2 | rfkill - RF kill switch support | ||
3 | =============================== | ||
4 | |||
5 | |||
6 | .. contents:: | ||
7 | :depth: 2 | ||
8 | |||
9 | Introduction | ||
10 | ============ | ||
11 | |||
12 | The rfkill subsystem provides a generic interface for disabling any radio | ||
13 | transmitter in the system. When a transmitter is blocked, it shall not | ||
14 | radiate any power. | ||
15 | |||
16 | The subsystem also provides the ability to react on button presses and | ||
17 | disable all transmitters of a certain type (or all). This is intended for | ||
18 | situations where transmitters need to be turned off, for example on | ||
19 | aircraft. | ||
20 | |||
21 | The rfkill subsystem has a concept of "hard" and "soft" block, which | ||
22 | differ little in their meaning (block == transmitters off) but rather in | ||
23 | whether they can be changed or not: | ||
24 | |||
25 | - hard block | ||
26 | read-only radio block that cannot be overridden by software | ||
27 | |||
28 | - soft block | ||
29 | writable radio block (need not be readable) that is set by | ||
30 | the system software. | ||
31 | |||
32 | The rfkill subsystem has two parameters, rfkill.default_state and | ||
33 | rfkill.master_switch_mode, which are documented in | ||
34 | admin-guide/kernel-parameters.rst. | ||
35 | |||
36 | |||
37 | Implementation details | ||
38 | ====================== | ||
39 | |||
40 | The rfkill subsystem is composed of three main components: | ||
41 | |||
42 | * the rfkill core, | ||
43 | * the deprecated rfkill-input module (an input layer handler, being | ||
44 | replaced by userspace policy code) and | ||
45 | * the rfkill drivers. | ||
46 | |||
47 | The rfkill core provides API for kernel drivers to register their radio | ||
48 | transmitter with the kernel, methods for turning it on and off, and letting | ||
49 | the system know about hardware-disabled states that may be implemented on | ||
50 | the device. | ||
51 | |||
52 | The rfkill core code also notifies userspace of state changes, and provides | ||
53 | ways for userspace to query the current states. See the "Userspace support" | ||
54 | section below. | ||
55 | |||
56 | When the device is hard-blocked (either by a call to rfkill_set_hw_state() | ||
57 | or from query_hw_block), set_block() will be invoked for additional software | ||
58 | block, but drivers can ignore the method call since they can use the return | ||
59 | value of the function rfkill_set_hw_state() to sync the software state | ||
60 | instead of keeping track of calls to set_block(). In fact, drivers should | ||
61 | use the return value of rfkill_set_hw_state() unless the hardware actually | ||
62 | keeps track of soft and hard block separately. | ||
63 | |||
64 | |||
65 | Kernel API | ||
66 | ========== | ||
67 | |||
68 | Drivers for radio transmitters normally implement an rfkill driver. | ||
69 | |||
70 | Platform drivers might implement input devices if the rfkill button is just | ||
71 | that, a button. If that button influences the hardware then you need to | ||
72 | implement an rfkill driver instead. This also applies if the platform provides | ||
73 | a way to turn on/off the transmitter(s). | ||
74 | |||
75 | For some platforms, it is possible that the hardware state changes during | ||
76 | suspend/hibernation, in which case it will be necessary to update the rfkill | ||
77 | core with the current state at resume time. | ||
78 | |||
79 | To create an rfkill driver, driver's Kconfig needs to have:: | ||
80 | |||
81 | depends on RFKILL || !RFKILL | ||
82 | |||
83 | to ensure the driver cannot be built-in when rfkill is modular. The !RFKILL | ||
84 | case allows the driver to be built when rfkill is not configured, in which | ||
85 | case all rfkill API can still be used but will be provided by static inlines | ||
86 | which compile to almost nothing. | ||
87 | |||
88 | Calling rfkill_set_hw_state() when a state change happens is required from | ||
89 | rfkill drivers that control devices that can be hard-blocked unless they also | ||
90 | assign the poll_hw_block() callback (then the rfkill core will poll the | ||
91 | device). Don't do this unless you cannot get the event in any other way. | ||
92 | |||
93 | rfkill provides per-switch LED triggers, which can be used to drive LEDs | ||
94 | according to the switch state (LED_FULL when blocked, LED_OFF otherwise). | ||
95 | |||
96 | |||
97 | Userspace support | ||
98 | ================= | ||
99 | |||
100 | The recommended userspace interface to use is /dev/rfkill, which is a misc | ||
101 | character device that allows userspace to obtain and set the state of rfkill | ||
102 | devices and sets of devices. It also notifies userspace about device addition | ||
103 | and removal. The API is a simple read/write API that is defined in | ||
104 | linux/rfkill.h, with one ioctl that allows turning off the deprecated input | ||
105 | handler in the kernel for the transition period. | ||
106 | |||
107 | Except for the one ioctl, communication with the kernel is done via read() | ||
108 | and write() of instances of 'struct rfkill_event'. In this structure, the | ||
109 | soft and hard block are properly separated (unlike sysfs, see below) and | ||
110 | userspace is able to get a consistent snapshot of all rfkill devices in the | ||
111 | system. Also, it is possible to switch all rfkill drivers (or all drivers of | ||
112 | a specified type) into a state which also updates the default state for | ||
113 | hotplugged devices. | ||
114 | |||
115 | After an application opens /dev/rfkill, it can read the current state of all | ||
116 | devices. Changes can be obtained by either polling the descriptor for | ||
117 | hotplug or state change events or by listening for uevents emitted by the | ||
118 | rfkill core framework. | ||
119 | |||
120 | Additionally, each rfkill device is registered in sysfs and emits uevents. | ||
121 | |||
122 | rfkill devices issue uevents (with an action of "change"), with the following | ||
123 | environment variables set:: | ||
124 | |||
125 | RFKILL_NAME | ||
126 | RFKILL_STATE | ||
127 | RFKILL_TYPE | ||
128 | |||
129 | The content of these variables corresponds to the "name", "state" and | ||
130 | "type" sysfs files explained above. | ||
131 | |||
132 | For further details consult Documentation/ABI/stable/sysfs-class-rfkill. | ||
diff --git a/Documentation/driver-api/serial/cyclades_z.rst b/Documentation/driver-api/serial/cyclades_z.rst new file mode 100644 index 000000000000..532ff67e2f1c --- /dev/null +++ b/Documentation/driver-api/serial/cyclades_z.rst | |||
@@ -0,0 +1,11 @@ | |||
1 | ================ | ||
2 | Cyclades-Z notes | ||
3 | ================ | ||
4 | |||
5 | The Cyclades-Z must have firmware loaded onto the card before it will | ||
6 | operate. This operation should be performed during system startup, | ||
7 | |||
8 | The firmware, loader program and the latest device driver code are | ||
9 | available from Cyclades at | ||
10 | |||
11 | ftp://ftp.cyclades.com/pub/cyclades/cyclades-z/linux/ | ||
diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst new file mode 100644 index 000000000000..31bd4e16fb1f --- /dev/null +++ b/Documentation/driver-api/serial/driver.rst | |||
@@ -0,0 +1,549 @@ | |||
1 | ==================== | ||
2 | Low Level Serial API | ||
3 | ==================== | ||
4 | |||
5 | |||
6 | This document is meant as a brief overview of some aspects of the new serial | ||
7 | driver. It is not complete, any questions you have should be directed to | ||
8 | <rmk@arm.linux.org.uk> | ||
9 | |||
10 | The reference implementation is contained within amba-pl011.c. | ||
11 | |||
12 | |||
13 | |||
14 | Low Level Serial Hardware Driver | ||
15 | -------------------------------- | ||
16 | |||
17 | The low level serial hardware driver is responsible for supplying port | ||
18 | information (defined by uart_port) and a set of control methods (defined | ||
19 | by uart_ops) to the core serial driver. The low level driver is also | ||
20 | responsible for handling interrupts for the port, and providing any | ||
21 | console support. | ||
22 | |||
23 | |||
24 | Console Support | ||
25 | --------------- | ||
26 | |||
27 | The serial core provides a few helper functions. This includes identifing | ||
28 | the correct port structure (via uart_get_console) and decoding command line | ||
29 | arguments (uart_parse_options). | ||
30 | |||
31 | There is also a helper function (uart_console_write) which performs a | ||
32 | character by character write, translating newlines to CRLF sequences. | ||
33 | Driver writers are recommended to use this function rather than implementing | ||
34 | their own version. | ||
35 | |||
36 | |||
37 | Locking | ||
38 | ------- | ||
39 | |||
40 | It is the responsibility of the low level hardware driver to perform the | ||
41 | necessary locking using port->lock. There are some exceptions (which | ||
42 | are described in the uart_ops listing below.) | ||
43 | |||
44 | There are two locks. A per-port spinlock, and an overall semaphore. | ||
45 | |||
46 | From the core driver perspective, the port->lock locks the following | ||
47 | data:: | ||
48 | |||
49 | port->mctrl | ||
50 | port->icount | ||
51 | port->state->xmit.head (circ_buf->head) | ||
52 | port->state->xmit.tail (circ_buf->tail) | ||
53 | |||
54 | The low level driver is free to use this lock to provide any additional | ||
55 | locking. | ||
56 | |||
57 | The port_sem semaphore is used to protect against ports being added/ | ||
58 | removed or reconfigured at inappropriate times. Since v2.6.27, this | ||
59 | semaphore has been the 'mutex' member of the tty_port struct, and | ||
60 | commonly referred to as the port mutex. | ||
61 | |||
62 | |||
63 | uart_ops | ||
64 | -------- | ||
65 | |||
66 | The uart_ops structure is the main interface between serial_core and the | ||
67 | hardware specific driver. It contains all the methods to control the | ||
68 | hardware. | ||
69 | |||
70 | tx_empty(port) | ||
71 | This function tests whether the transmitter fifo and shifter | ||
72 | for the port described by 'port' is empty. If it is empty, | ||
73 | this function should return TIOCSER_TEMT, otherwise return 0. | ||
74 | If the port does not support this operation, then it should | ||
75 | return TIOCSER_TEMT. | ||
76 | |||
77 | Locking: none. | ||
78 | |||
79 | Interrupts: caller dependent. | ||
80 | |||
81 | This call must not sleep | ||
82 | |||
83 | set_mctrl(port, mctrl) | ||
84 | This function sets the modem control lines for port described | ||
85 | by 'port' to the state described by mctrl. The relevant bits | ||
86 | of mctrl are: | ||
87 | |||
88 | - TIOCM_RTS RTS signal. | ||
89 | - TIOCM_DTR DTR signal. | ||
90 | - TIOCM_OUT1 OUT1 signal. | ||
91 | - TIOCM_OUT2 OUT2 signal. | ||
92 | - TIOCM_LOOP Set the port into loopback mode. | ||
93 | |||
94 | If the appropriate bit is set, the signal should be driven | ||
95 | active. If the bit is clear, the signal should be driven | ||
96 | inactive. | ||
97 | |||
98 | Locking: port->lock taken. | ||
99 | |||
100 | Interrupts: locally disabled. | ||
101 | |||
102 | This call must not sleep | ||
103 | |||
104 | get_mctrl(port) | ||
105 | Returns the current state of modem control inputs. The state | ||
106 | of the outputs should not be returned, since the core keeps | ||
107 | track of their state. The state information should include: | ||
108 | |||
109 | - TIOCM_CAR state of DCD signal | ||
110 | - TIOCM_CTS state of CTS signal | ||
111 | - TIOCM_DSR state of DSR signal | ||
112 | - TIOCM_RI state of RI signal | ||
113 | |||
114 | The bit is set if the signal is currently driven active. If | ||
115 | the port does not support CTS, DCD or DSR, the driver should | ||
116 | indicate that the signal is permanently active. If RI is | ||
117 | not available, the signal should not be indicated as active. | ||
118 | |||
119 | Locking: port->lock taken. | ||
120 | |||
121 | Interrupts: locally disabled. | ||
122 | |||
123 | This call must not sleep | ||
124 | |||
125 | stop_tx(port) | ||
126 | Stop transmitting characters. This might be due to the CTS | ||
127 | line becoming inactive or the tty layer indicating we want | ||
128 | to stop transmission due to an XOFF character. | ||
129 | |||
130 | The driver should stop transmitting characters as soon as | ||
131 | possible. | ||
132 | |||
133 | Locking: port->lock taken. | ||
134 | |||
135 | Interrupts: locally disabled. | ||
136 | |||
137 | This call must not sleep | ||
138 | |||
139 | start_tx(port) | ||
140 | Start transmitting characters. | ||
141 | |||
142 | Locking: port->lock taken. | ||
143 | |||
144 | Interrupts: locally disabled. | ||
145 | |||
146 | This call must not sleep | ||
147 | |||
148 | throttle(port) | ||
149 | Notify the serial driver that input buffers for the line discipline are | ||
150 | close to full, and it should somehow signal that no more characters | ||
151 | should be sent to the serial port. | ||
152 | This will be called only if hardware assisted flow control is enabled. | ||
153 | |||
154 | Locking: serialized with .unthrottle() and termios modification by the | ||
155 | tty layer. | ||
156 | |||
157 | unthrottle(port) | ||
158 | Notify the serial driver that characters can now be sent to the serial | ||
159 | port without fear of overrunning the input buffers of the line | ||
160 | disciplines. | ||
161 | |||
162 | This will be called only if hardware assisted flow control is enabled. | ||
163 | |||
164 | Locking: serialized with .throttle() and termios modification by the | ||
165 | tty layer. | ||
166 | |||
167 | send_xchar(port,ch) | ||
168 | Transmit a high priority character, even if the port is stopped. | ||
169 | This is used to implement XON/XOFF flow control and tcflow(). If | ||
170 | the serial driver does not implement this function, the tty core | ||
171 | will append the character to the circular buffer and then call | ||
172 | start_tx() / stop_tx() to flush the data out. | ||
173 | |||
174 | Do not transmit if ch == '\0' (__DISABLED_CHAR). | ||
175 | |||
176 | Locking: none. | ||
177 | |||
178 | Interrupts: caller dependent. | ||
179 | |||
180 | stop_rx(port) | ||
181 | Stop receiving characters; the port is in the process of | ||
182 | being closed. | ||
183 | |||
184 | Locking: port->lock taken. | ||
185 | |||
186 | Interrupts: locally disabled. | ||
187 | |||
188 | This call must not sleep | ||
189 | |||
190 | enable_ms(port) | ||
191 | Enable the modem status interrupts. | ||
192 | |||
193 | This method may be called multiple times. Modem status | ||
194 | interrupts should be disabled when the shutdown method is | ||
195 | called. | ||
196 | |||
197 | Locking: port->lock taken. | ||
198 | |||
199 | Interrupts: locally disabled. | ||
200 | |||
201 | This call must not sleep | ||
202 | |||
203 | break_ctl(port,ctl) | ||
204 | Control the transmission of a break signal. If ctl is | ||
205 | nonzero, the break signal should be transmitted. The signal | ||
206 | should be terminated when another call is made with a zero | ||
207 | ctl. | ||
208 | |||
209 | Locking: caller holds tty_port->mutex | ||
210 | |||
211 | startup(port) | ||
212 | Grab any interrupt resources and initialise any low level driver | ||
213 | state. Enable the port for reception. It should not activate | ||
214 | RTS nor DTR; this will be done via a separate call to set_mctrl. | ||
215 | |||
216 | This method will only be called when the port is initially opened. | ||
217 | |||
218 | Locking: port_sem taken. | ||
219 | |||
220 | Interrupts: globally disabled. | ||
221 | |||
222 | shutdown(port) | ||
223 | Disable the port, disable any break condition that may be in | ||
224 | effect, and free any interrupt resources. It should not disable | ||
225 | RTS nor DTR; this will have already been done via a separate | ||
226 | call to set_mctrl. | ||
227 | |||
228 | Drivers must not access port->state once this call has completed. | ||
229 | |||
230 | This method will only be called when there are no more users of | ||
231 | this port. | ||
232 | |||
233 | Locking: port_sem taken. | ||
234 | |||
235 | Interrupts: caller dependent. | ||
236 | |||
237 | flush_buffer(port) | ||
238 | Flush any write buffers, reset any DMA state and stop any | ||
239 | ongoing DMA transfers. | ||
240 | |||
241 | This will be called whenever the port->state->xmit circular | ||
242 | buffer is cleared. | ||
243 | |||
244 | Locking: port->lock taken. | ||
245 | |||
246 | Interrupts: locally disabled. | ||
247 | |||
248 | This call must not sleep | ||
249 | |||
250 | set_termios(port,termios,oldtermios) | ||
251 | Change the port parameters, including word length, parity, stop | ||
252 | bits. Update read_status_mask and ignore_status_mask to indicate | ||
253 | the types of events we are interested in receiving. Relevant | ||
254 | termios->c_cflag bits are: | ||
255 | |||
256 | CSIZE | ||
257 | - word size | ||
258 | CSTOPB | ||
259 | - 2 stop bits | ||
260 | PARENB | ||
261 | - parity enable | ||
262 | PARODD | ||
263 | - odd parity (when PARENB is in force) | ||
264 | CREAD | ||
265 | - enable reception of characters (if not set, | ||
266 | still receive characters from the port, but | ||
267 | throw them away. | ||
268 | CRTSCTS | ||
269 | - if set, enable CTS status change reporting | ||
270 | CLOCAL | ||
271 | - if not set, enable modem status change | ||
272 | reporting. | ||
273 | |||
274 | Relevant termios->c_iflag bits are: | ||
275 | |||
276 | INPCK | ||
277 | - enable frame and parity error events to be | ||
278 | passed to the TTY layer. | ||
279 | BRKINT / PARMRK | ||
280 | - both of these enable break events to be | ||
281 | passed to the TTY layer. | ||
282 | |||
283 | IGNPAR | ||
284 | - ignore parity and framing errors | ||
285 | IGNBRK | ||
286 | - ignore break errors, If IGNPAR is also | ||
287 | set, ignore overrun errors as well. | ||
288 | |||
289 | The interaction of the iflag bits is as follows (parity error | ||
290 | given as an example): | ||
291 | |||
292 | =============== ======= ====== ============================= | ||
293 | Parity error INPCK IGNPAR | ||
294 | =============== ======= ====== ============================= | ||
295 | n/a 0 n/a character received, marked as | ||
296 | TTY_NORMAL | ||
297 | None 1 n/a character received, marked as | ||
298 | TTY_NORMAL | ||
299 | Yes 1 0 character received, marked as | ||
300 | TTY_PARITY | ||
301 | Yes 1 1 character discarded | ||
302 | =============== ======= ====== ============================= | ||
303 | |||
304 | Other flags may be used (eg, xon/xoff characters) if your | ||
305 | hardware supports hardware "soft" flow control. | ||
306 | |||
307 | Locking: caller holds tty_port->mutex | ||
308 | |||
309 | Interrupts: caller dependent. | ||
310 | |||
311 | This call must not sleep | ||
312 | |||
313 | set_ldisc(port,termios) | ||
314 | Notifier for discipline change. See Documentation/driver-api/serial/tty.rst. | ||
315 | |||
316 | Locking: caller holds tty_port->mutex | ||
317 | |||
318 | pm(port,state,oldstate) | ||
319 | Perform any power management related activities on the specified | ||
320 | port. State indicates the new state (defined by | ||
321 | enum uart_pm_state), oldstate indicates the previous state. | ||
322 | |||
323 | This function should not be used to grab any resources. | ||
324 | |||
325 | This will be called when the port is initially opened and finally | ||
326 | closed, except when the port is also the system console. This | ||
327 | will occur even if CONFIG_PM is not set. | ||
328 | |||
329 | Locking: none. | ||
330 | |||
331 | Interrupts: caller dependent. | ||
332 | |||
333 | type(port) | ||
334 | Return a pointer to a string constant describing the specified | ||
335 | port, or return NULL, in which case the string 'unknown' is | ||
336 | substituted. | ||
337 | |||
338 | Locking: none. | ||
339 | |||
340 | Interrupts: caller dependent. | ||
341 | |||
342 | release_port(port) | ||
343 | Release any memory and IO region resources currently in use by | ||
344 | the port. | ||
345 | |||
346 | Locking: none. | ||
347 | |||
348 | Interrupts: caller dependent. | ||
349 | |||
350 | request_port(port) | ||
351 | Request any memory and IO region resources required by the port. | ||
352 | If any fail, no resources should be registered when this function | ||
353 | returns, and it should return -EBUSY on failure. | ||
354 | |||
355 | Locking: none. | ||
356 | |||
357 | Interrupts: caller dependent. | ||
358 | |||
359 | config_port(port,type) | ||
360 | Perform any autoconfiguration steps required for the port. `type` | ||
361 | contains a bit mask of the required configuration. UART_CONFIG_TYPE | ||
362 | indicates that the port requires detection and identification. | ||
363 | port->type should be set to the type found, or PORT_UNKNOWN if | ||
364 | no port was detected. | ||
365 | |||
366 | UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal, | ||
367 | which should be probed using standard kernel autoprobing techniques. | ||
368 | This is not necessary on platforms where ports have interrupts | ||
369 | internally hard wired (eg, system on a chip implementations). | ||
370 | |||
371 | Locking: none. | ||
372 | |||
373 | Interrupts: caller dependent. | ||
374 | |||
375 | verify_port(port,serinfo) | ||
376 | Verify the new serial port information contained within serinfo is | ||
377 | suitable for this port type. | ||
378 | |||
379 | Locking: none. | ||
380 | |||
381 | Interrupts: caller dependent. | ||
382 | |||
383 | ioctl(port,cmd,arg) | ||
384 | Perform any port specific IOCTLs. IOCTL commands must be defined | ||
385 | using the standard numbering system found in <asm/ioctl.h> | ||
386 | |||
387 | Locking: none. | ||
388 | |||
389 | Interrupts: caller dependent. | ||
390 | |||
391 | poll_init(port) | ||
392 | Called by kgdb to perform the minimal hardware initialization needed | ||
393 | to support poll_put_char() and poll_get_char(). Unlike ->startup() | ||
394 | this should not request interrupts. | ||
395 | |||
396 | Locking: tty_mutex and tty_port->mutex taken. | ||
397 | |||
398 | Interrupts: n/a. | ||
399 | |||
400 | poll_put_char(port,ch) | ||
401 | Called by kgdb to write a single character directly to the serial | ||
402 | port. It can and should block until there is space in the TX FIFO. | ||
403 | |||
404 | Locking: none. | ||
405 | |||
406 | Interrupts: caller dependent. | ||
407 | |||
408 | This call must not sleep | ||
409 | |||
410 | poll_get_char(port) | ||
411 | Called by kgdb to read a single character directly from the serial | ||
412 | port. If data is available, it should be returned; otherwise | ||
413 | the function should return NO_POLL_CHAR immediately. | ||
414 | |||
415 | Locking: none. | ||
416 | |||
417 | Interrupts: caller dependent. | ||
418 | |||
419 | This call must not sleep | ||
420 | |||
421 | Other functions | ||
422 | --------------- | ||
423 | |||
424 | uart_update_timeout(port,cflag,baud) | ||
425 | Update the FIFO drain timeout, port->timeout, according to the | ||
426 | number of bits, parity, stop bits and baud rate. | ||
427 | |||
428 | Locking: caller is expected to take port->lock | ||
429 | |||
430 | Interrupts: n/a | ||
431 | |||
432 | uart_get_baud_rate(port,termios,old,min,max) | ||
433 | Return the numeric baud rate for the specified termios, taking | ||
434 | account of the special 38400 baud "kludge". The B0 baud rate | ||
435 | is mapped to 9600 baud. | ||
436 | |||
437 | If the baud rate is not within min..max, then if old is non-NULL, | ||
438 | the original baud rate will be tried. If that exceeds the | ||
439 | min..max constraint, 9600 baud will be returned. termios will | ||
440 | be updated to the baud rate in use. | ||
441 | |||
442 | Note: min..max must always allow 9600 baud to be selected. | ||
443 | |||
444 | Locking: caller dependent. | ||
445 | |||
446 | Interrupts: n/a | ||
447 | |||
448 | uart_get_divisor(port,baud) | ||
449 | Return the divisor (baud_base / baud) for the specified baud | ||
450 | rate, appropriately rounded. | ||
451 | |||
452 | If 38400 baud and custom divisor is selected, return the | ||
453 | custom divisor instead. | ||
454 | |||
455 | Locking: caller dependent. | ||
456 | |||
457 | Interrupts: n/a | ||
458 | |||
459 | uart_match_port(port1,port2) | ||
460 | This utility function can be used to determine whether two | ||
461 | uart_port structures describe the same port. | ||
462 | |||
463 | Locking: n/a | ||
464 | |||
465 | Interrupts: n/a | ||
466 | |||
467 | uart_write_wakeup(port) | ||
468 | A driver is expected to call this function when the number of | ||
469 | characters in the transmit buffer have dropped below a threshold. | ||
470 | |||
471 | Locking: port->lock should be held. | ||
472 | |||
473 | Interrupts: n/a | ||
474 | |||
475 | uart_register_driver(drv) | ||
476 | Register a uart driver with the core driver. We in turn register | ||
477 | with the tty layer, and initialise the core driver per-port state. | ||
478 | |||
479 | drv->port should be NULL, and the per-port structures should be | ||
480 | registered using uart_add_one_port after this call has succeeded. | ||
481 | |||
482 | Locking: none | ||
483 | |||
484 | Interrupts: enabled | ||
485 | |||
486 | uart_unregister_driver() | ||
487 | Remove all references to a driver from the core driver. The low | ||
488 | level driver must have removed all its ports via the | ||
489 | uart_remove_one_port() if it registered them with uart_add_one_port(). | ||
490 | |||
491 | Locking: none | ||
492 | |||
493 | Interrupts: enabled | ||
494 | |||
495 | **uart_suspend_port()** | ||
496 | |||
497 | **uart_resume_port()** | ||
498 | |||
499 | **uart_add_one_port()** | ||
500 | |||
501 | **uart_remove_one_port()** | ||
502 | |||
503 | Other notes | ||
504 | ----------- | ||
505 | |||
506 | It is intended some day to drop the 'unused' entries from uart_port, and | ||
507 | allow low level drivers to register their own individual uart_port's with | ||
508 | the core. This will allow drivers to use uart_port as a pointer to a | ||
509 | structure containing both the uart_port entry with their own extensions, | ||
510 | thus:: | ||
511 | |||
512 | struct my_port { | ||
513 | struct uart_port port; | ||
514 | int my_stuff; | ||
515 | }; | ||
516 | |||
517 | Modem control lines via GPIO | ||
518 | ---------------------------- | ||
519 | |||
520 | Some helpers are provided in order to set/get modem control lines via GPIO. | ||
521 | |||
522 | mctrl_gpio_init(port, idx): | ||
523 | This will get the {cts,rts,...}-gpios from device tree if they are | ||
524 | present and request them, set direction etc, and return an | ||
525 | allocated structure. `devm_*` functions are used, so there's no need | ||
526 | to call mctrl_gpio_free(). | ||
527 | As this sets up the irq handling make sure to not handle changes to the | ||
528 | gpio input lines in your driver, too. | ||
529 | |||
530 | mctrl_gpio_free(dev, gpios): | ||
531 | This will free the requested gpios in mctrl_gpio_init(). | ||
532 | As `devm_*` functions are used, there's generally no need to call | ||
533 | this function. | ||
534 | |||
535 | mctrl_gpio_to_gpiod(gpios, gidx) | ||
536 | This returns the gpio_desc structure associated to the modem line | ||
537 | index. | ||
538 | |||
539 | mctrl_gpio_set(gpios, mctrl): | ||
540 | This will sets the gpios according to the mctrl state. | ||
541 | |||
542 | mctrl_gpio_get(gpios, mctrl): | ||
543 | This will update mctrl with the gpios values. | ||
544 | |||
545 | mctrl_gpio_enable_ms(gpios): | ||
546 | Enables irqs and handling of changes to the ms lines. | ||
547 | |||
548 | mctrl_gpio_disable_ms(gpios): | ||
549 | Disables irqs and handling of changes to the ms lines. | ||
diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst new file mode 100644 index 000000000000..33ad10d05b26 --- /dev/null +++ b/Documentation/driver-api/serial/index.rst | |||
@@ -0,0 +1,32 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ========================== | ||
4 | Support for Serial devices | ||
5 | ========================== | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | |||
11 | driver | ||
12 | tty | ||
13 | |||
14 | Serial drivers | ||
15 | ============== | ||
16 | |||
17 | .. toctree:: | ||
18 | :maxdepth: 1 | ||
19 | |||
20 | cyclades_z | ||
21 | moxa-smartio | ||
22 | n_gsm | ||
23 | rocket | ||
24 | serial-iso7816 | ||
25 | serial-rs485 | ||
26 | |||
27 | .. only:: subproject and html | ||
28 | |||
29 | Indices | ||
30 | ======= | ||
31 | |||
32 | * :ref:`genindex` | ||
diff --git a/Documentation/driver-api/serial/moxa-smartio.rst b/Documentation/driver-api/serial/moxa-smartio.rst new file mode 100644 index 000000000000..156100f17c3f --- /dev/null +++ b/Documentation/driver-api/serial/moxa-smartio.rst | |||
@@ -0,0 +1,615 @@ | |||
1 | ============================================================= | ||
2 | MOXA Smartio/Industio Family Device Driver Installation Guide | ||
3 | ============================================================= | ||
4 | |||
5 | .. note:: | ||
6 | |||
7 | This file is outdated. It needs some care in order to make it | ||
8 | updated to Kernel 5.0 and upper | ||
9 | |||
10 | Copyright (C) 2008, Moxa Inc. | ||
11 | |||
12 | Date: 01/21/2008 | ||
13 | |||
14 | .. Content | ||
15 | |||
16 | 1. Introduction | ||
17 | 2. System Requirement | ||
18 | 3. Installation | ||
19 | 3.1 Hardware installation | ||
20 | 3.2 Driver files | ||
21 | 3.3 Device naming convention | ||
22 | 3.4 Module driver configuration | ||
23 | 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x. | ||
24 | 3.6 Custom configuration | ||
25 | 3.7 Verify driver installation | ||
26 | 4. Utilities | ||
27 | 5. Setserial | ||
28 | 6. Troubleshooting | ||
29 | |||
30 | 1. Introduction | ||
31 | ^^^^^^^^^^^^^^^ | ||
32 | |||
33 | The Smartio/Industio/UPCI family Linux driver supports following multiport | ||
34 | boards. | ||
35 | |||
36 | - 2 ports multiport board | ||
37 | CP-102U, CP-102UL, CP-102UF | ||
38 | CP-132U-I, CP-132UL, | ||
39 | CP-132, CP-132I, CP132S, CP-132IS, | ||
40 | CI-132, CI-132I, CI-132IS, | ||
41 | (C102H, C102HI, C102HIS, C102P, CP-102, CP-102S) | ||
42 | |||
43 | - 4 ports multiport board | ||
44 | CP-104EL, | ||
45 | CP-104UL, CP-104JU, | ||
46 | CP-134U, CP-134U-I, | ||
47 | C104H/PCI, C104HS/PCI, | ||
48 | CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL, | ||
49 | C104H, C104HS, | ||
50 | CI-104J, CI-104JS, | ||
51 | CI-134, CI-134I, CI-134IS, | ||
52 | (C114HI, CT-114I, C104P), | ||
53 | POS-104UL, | ||
54 | CB-114, | ||
55 | CB-134I | ||
56 | |||
57 | - 8 ports multiport board | ||
58 | CP-118EL, CP-168EL, | ||
59 | CP-118U, CP-168U, | ||
60 | C168H/PCI, | ||
61 | C168H, C168HS, | ||
62 | (C168P), | ||
63 | CB-108 | ||
64 | |||
65 | This driver and installation procedure have been developed upon Linux Kernel | ||
66 | 2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order | ||
67 | to maintain compatibility, this version has also been properly tested with | ||
68 | RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem | ||
69 | occurs, please contact Moxa at support@moxa.com.tw. | ||
70 | |||
71 | In addition to device driver, useful utilities are also provided in this | ||
72 | version. They are: | ||
73 | |||
74 | - msdiag | ||
75 | Diagnostic program for displaying installed Moxa | ||
76 | Smartio/Industio boards. | ||
77 | - msmon | ||
78 | Monitor program to observe data count and line status signals. | ||
79 | - msterm A simple terminal program which is useful in testing serial | ||
80 | ports. | ||
81 | - io-irq.exe | ||
82 | Configuration program to setup ISA boards. Please note that | ||
83 | this program can only be executed under DOS. | ||
84 | |||
85 | All the drivers and utilities are published in form of source code under | ||
86 | GNU General Public License in this version. Please refer to GNU General | ||
87 | Public License announcement in each source code file for more detail. | ||
88 | |||
89 | In Moxa's Web sites, you may always find latest driver at http://www.moxa.com/. | ||
90 | |||
91 | This version of driver can be installed as Loadable Module (Module driver) | ||
92 | or built-in into kernel (Static driver). You may refer to following | ||
93 | installation procedure for suitable one. Before you install the driver, | ||
94 | please refer to hardware installation procedure in the User's Manual. | ||
95 | |||
96 | We assume the user should be familiar with following documents. | ||
97 | |||
98 | - Serial-HOWTO | ||
99 | - Kernel-HOWTO | ||
100 | |||
101 | 2. System Requirement | ||
102 | ^^^^^^^^^^^^^^^^^^^^^ | ||
103 | |||
104 | - Hardware platform: Intel x86 machine | ||
105 | - Kernel version: 2.4.x or 2.6.x | ||
106 | - gcc version 2.72 or later | ||
107 | - Maximum 4 boards can be installed in combination | ||
108 | |||
109 | 3. Installation | ||
110 | ^^^^^^^^^^^^^^^ | ||
111 | |||
112 | 3.1 Hardware installation | ||
113 | ========================= | ||
114 | |||
115 | There are two types of buses, ISA and PCI, for Smartio/Industio | ||
116 | family multiport board. | ||
117 | |||
118 | ISA board | ||
119 | --------- | ||
120 | |||
121 | You'll have to configure CAP address, I/O address, Interrupt Vector | ||
122 | as well as IRQ before installing this driver. Please refer to hardware | ||
123 | installation procedure in User's Manual before proceed any further. | ||
124 | Please make sure the JP1 is open after the ISA board is set properly. | ||
125 | |||
126 | PCI/UPCI board | ||
127 | -------------- | ||
128 | |||
129 | You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict | ||
130 | with other ISA devices. Please refer to hardware installation | ||
131 | procedure in User's Manual in advance. | ||
132 | |||
133 | PCI IRQ Sharing | ||
134 | --------------- | ||
135 | |||
136 | Each port within the same multiport board shares the same IRQ. Up to | ||
137 | 4 Moxa Smartio/Industio PCI Family multiport boards can be installed | ||
138 | together on one system and they can share the same IRQ. | ||
139 | |||
140 | |||
141 | 3.2 Driver files | ||
142 | ================ | ||
143 | |||
144 | The driver file may be obtained from ftp, CD-ROM or floppy disk. The | ||
145 | first step, anyway, is to copy driver file "mxser.tgz" into specified | ||
146 | directory. e.g. /moxa. The execute commands as below:: | ||
147 | |||
148 | # cd / | ||
149 | # mkdir moxa | ||
150 | # cd /moxa | ||
151 | # tar xvf /dev/fd0 | ||
152 | |||
153 | or:: | ||
154 | |||
155 | # cd / | ||
156 | # mkdir moxa | ||
157 | # cd /moxa | ||
158 | # cp /mnt/cdrom/<driver directory>/mxser.tgz . | ||
159 | # tar xvfz mxser.tgz | ||
160 | |||
161 | |||
162 | 3.3 Device naming convention | ||
163 | ============================ | ||
164 | |||
165 | You may find all the driver and utilities files in /moxa/mxser. | ||
166 | Following installation procedure depends on the model you'd like to | ||
167 | run the driver. If you prefer module driver, please refer to 3.4. | ||
168 | If static driver is required, please refer to 3.5. | ||
169 | |||
170 | Dialin and callout port | ||
171 | ----------------------- | ||
172 | |||
173 | This driver remains traditional serial device properties. There are | ||
174 | two special file name for each serial port. One is dial-in port | ||
175 | which is named "ttyMxx". For callout port, the naming convention | ||
176 | is "cumxx". | ||
177 | |||
178 | Device naming when more than 2 boards installed | ||
179 | ----------------------------------------------- | ||
180 | |||
181 | Naming convention for each Smartio/Industio multiport board is | ||
182 | pre-defined as below. | ||
183 | |||
184 | ============ =============== ============== | ||
185 | Board Num. Dial-in Port Callout port | ||
186 | 1st board ttyM0 - ttyM7 cum0 - cum7 | ||
187 | 2nd board ttyM8 - ttyM15 cum8 - cum15 | ||
188 | 3rd board ttyM16 - ttyM23 cum16 - cum23 | ||
189 | 4th board ttyM24 - ttym31 cum24 - cum31 | ||
190 | ============ =============== ============== | ||
191 | |||
192 | .. note:: | ||
193 | |||
194 | Under Kernel 2.6 and upper, the cum Device is Obsolete. So use ttyM* | ||
195 | device instead. | ||
196 | |||
197 | Board sequence | ||
198 | -------------- | ||
199 | |||
200 | This driver will activate ISA boards according to the parameter set | ||
201 | in the driver. After all specified ISA board activated, PCI board | ||
202 | will be installed in the system automatically driven. | ||
203 | Therefore the board number is sorted by the CAP address of ISA boards. | ||
204 | For PCI boards, their sequence will be after ISA boards and C168H/PCI | ||
205 | has higher priority than C104H/PCI boards. | ||
206 | |||
207 | 3.4 Module driver configuration | ||
208 | =============================== | ||
209 | |||
210 | Module driver is easiest way to install. If you prefer static driver | ||
211 | installation, please skip this paragraph. | ||
212 | |||
213 | |||
214 | ------------- Prepare to use the MOXA driver -------------------- | ||
215 | |||
216 | 3.4.1 Create tty device with correct major number | ||
217 | ------------------------------------------------- | ||
218 | |||
219 | Before using MOXA driver, your system must have the tty devices | ||
220 | which are created with driver's major number. We offer one shell | ||
221 | script "msmknod" to simplify the procedure. | ||
222 | This step is only needed to be executed once. But you still | ||
223 | need to do this procedure when: | ||
224 | |||
225 | a. You change the driver's major number. Please refer the "3.7" | ||
226 | section. | ||
227 | b. Your total installed MOXA boards number is changed. Maybe you | ||
228 | add/delete one MOXA board. | ||
229 | c. You want to change the tty name. This needs to modify the | ||
230 | shell script "msmknod" | ||
231 | |||
232 | The procedure is:: | ||
233 | |||
234 | # cd /moxa/mxser/driver | ||
235 | # ./msmknod | ||
236 | |||
237 | This shell script will require the major number for dial-in | ||
238 | device and callout device to create tty device. You also need | ||
239 | to specify the total installed MOXA board number. Default major | ||
240 | numbers for dial-in device and callout device are 30, 35. If | ||
241 | you need to change to other number, please refer section "3.7" | ||
242 | for more detailed procedure. | ||
243 | Msmknod will delete any special files occupying the same device | ||
244 | naming. | ||
245 | |||
246 | 3.4.2 Build the MOXA driver and utilities | ||
247 | ----------------------------------------- | ||
248 | |||
249 | Before using the MOXA driver and utilities, you need compile the | ||
250 | all the source code. This step is only need to be executed once. | ||
251 | But you still re-compile the source code if you modify the source | ||
252 | code. For example, if you change the driver's major number (see | ||
253 | "3.7" section), then you need to do this step again. | ||
254 | |||
255 | Find "Makefile" in /moxa/mxser, then run | ||
256 | |||
257 | # make clean; make install | ||
258 | |||
259 | ..note:: | ||
260 | |||
261 | For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1: | ||
262 | # make clean; make installsp1 | ||
263 | |||
264 | For Red Hat Enterprise Linux AS4/ES4/WS4: | ||
265 | # make clean; make installsp2 | ||
266 | |||
267 | The driver files "mxser.o" and utilities will be properly compiled | ||
268 | and copied to system directories respectively. | ||
269 | |||
270 | ------------- Load MOXA driver-------------------- | ||
271 | |||
272 | 3.4.3 Load the MOXA driver | ||
273 | -------------------------- | ||
274 | |||
275 | :: | ||
276 | |||
277 | # modprobe mxser <argument> | ||
278 | |||
279 | will activate the module driver. You may run "lsmod" to check | ||
280 | if "mxser" is activated. If the MOXA board is ISA board, the | ||
281 | <argument> is needed. Please refer to section "3.4.5" for more | ||
282 | information. | ||
283 | |||
284 | ------------- Load MOXA driver on boot -------------------- | ||
285 | |||
286 | 3.4.4 Load the mxser driver | ||
287 | --------------------------- | ||
288 | |||
289 | |||
290 | For the above description, you may manually execute | ||
291 | "modprobe mxser" to activate this driver and run | ||
292 | "rmmod mxser" to remove it. | ||
293 | |||
294 | However, it's better to have a boot time configuration to | ||
295 | eliminate manual operation. Boot time configuration can be | ||
296 | achieved by rc file. We offer one "rc.mxser" file to simplify | ||
297 | the procedure under "moxa/mxser/driver". | ||
298 | |||
299 | But if you use ISA board, please modify the "modprobe ..." command | ||
300 | to add the argument (see "3.4.5" section). After modifying the | ||
301 | rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser" | ||
302 | manually to make sure the modification is ok. If any error | ||
303 | encountered, please try to modify again. If the modification is | ||
304 | completed, follow the below step. | ||
305 | |||
306 | Run following command for setting rc files:: | ||
307 | |||
308 | # cd /moxa/mxser/driver | ||
309 | # cp ./rc.mxser /etc/rc.d | ||
310 | # cd /etc/rc.d | ||
311 | |||
312 | Check "rc.serial" is existed or not. If "rc.serial" doesn't exist, | ||
313 | create it by vi, run "chmod 755 rc.serial" to change the permission. | ||
314 | |||
315 | Add "/etc/rc.d/rc.mxser" in last line. | ||
316 | |||
317 | Reboot and check if moxa.o activated by "lsmod" command. | ||
318 | |||
319 | 3.4.5. specify CAP address | ||
320 | -------------------------- | ||
321 | |||
322 | If you'd like to drive Smartio/Industio ISA boards in the system, | ||
323 | you'll have to add parameter to specify CAP address of given | ||
324 | board while activating "mxser.o". The format for parameters are | ||
325 | as follows.:: | ||
326 | |||
327 | modprobe mxser ioaddr=0x???,0x???,0x???,0x??? | ||
328 | | | | | | ||
329 | | | | +- 4th ISA board | ||
330 | | | +------ 3rd ISA board | ||
331 | | +------------ 2nd ISA board | ||
332 | +-------------------1st ISA board | ||
333 | |||
334 | 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x | ||
335 | ================================================================ | ||
336 | |||
337 | Note: | ||
338 | To use static driver, you must install the linux kernel | ||
339 | source package. | ||
340 | |||
341 | 3.5.1 Backup the built-in driver in the kernel | ||
342 | ---------------------------------------------- | ||
343 | |||
344 | :: | ||
345 | |||
346 | # cd /usr/src/linux/drivers/char | ||
347 | # mv mxser.c mxser.c.old | ||
348 | |||
349 | For Red Hat 7.x user, you need to create link: | ||
350 | # cd /usr/src | ||
351 | # ln -s linux-2.4 linux | ||
352 | |||
353 | 3.5.2 Create link | ||
354 | ----------------- | ||
355 | :: | ||
356 | |||
357 | # cd /usr/src/linux/drivers/char | ||
358 | # ln -s /moxa/mxser/driver/mxser.c mxser.c | ||
359 | |||
360 | 3.5.3 Add CAP address list for ISA boards. | ||
361 | ------------------------------------------ | ||
362 | |||
363 | For PCI boards user, please skip this step. | ||
364 | |||
365 | In module mode, the CAP address for ISA board is given by | ||
366 | parameter. In static driver configuration, you'll have to | ||
367 | assign it within driver's source code. If you will not | ||
368 | install any ISA boards, you may skip to next portion. | ||
369 | The instructions to modify driver source code are as | ||
370 | below. | ||
371 | |||
372 | a. run:: | ||
373 | |||
374 | # cd /moxa/mxser/driver | ||
375 | # vi mxser.c | ||
376 | |||
377 | b. Find the array mxserBoardCAP[] as below:: | ||
378 | |||
379 | static int mxserBoardCAP[] = {0x00, 0x00, 0x00, 0x00}; | ||
380 | |||
381 | c. Change the address within this array using vi. For | ||
382 | example, to driver 2 ISA boards with CAP address | ||
383 | 0x280 and 0x180 as 1st and 2nd board. Just to change | ||
384 | the source code as follows:: | ||
385 | |||
386 | static int mxserBoardCAP[] = {0x280, 0x180, 0x00, 0x00}; | ||
387 | |||
388 | 3.5.4 Setup kernel configuration | ||
389 | -------------------------------- | ||
390 | |||
391 | Configure the kernel:: | ||
392 | |||
393 | # cd /usr/src/linux | ||
394 | # make menuconfig | ||
395 | |||
396 | You will go into a menu-driven system. Please select [Character | ||
397 | devices][Non-standard serial port support], enable the [Moxa | ||
398 | SmartIO support] driver with "[*]" for built-in (not "[M]"), then | ||
399 | select [Exit] to exit this program. | ||
400 | |||
401 | 3.5.5 Rebuild kernel | ||
402 | -------------------- | ||
403 | |||
404 | The following are for Linux kernel rebuilding, for your | ||
405 | reference only. | ||
406 | |||
407 | For appropriate details, please refer to the Linux document: | ||
408 | |||
409 | a. Run the following commands:: | ||
410 | |||
411 | cd /usr/src/linux | ||
412 | make clean # take a few minutes | ||
413 | make dep # take a few minutes | ||
414 | make bzImage # take probably 10-20 minutes | ||
415 | make install # copy boot image to correct position | ||
416 | |||
417 | f. Please make sure the boot kernel (vmlinuz) is in the | ||
418 | correct position. | ||
419 | g. If you use 'lilo' utility, you should check /etc/lilo.conf | ||
420 | 'image' item specified the path which is the 'vmlinuz' path, | ||
421 | or you will load wrong (or old) boot kernel image (vmlinuz). | ||
422 | After checking /etc/lilo.conf, please run "lilo". | ||
423 | |||
424 | Note that if the result of "make bzImage" is ERROR, then you have to | ||
425 | go back to Linux configuration Setup. Type "make menuconfig" in | ||
426 | directory /usr/src/linux. | ||
427 | |||
428 | |||
429 | 3.5.6 Make tty device and special file | ||
430 | -------------------------------------- | ||
431 | |||
432 | :: | ||
433 | # cd /moxa/mxser/driver | ||
434 | # ./msmknod | ||
435 | |||
436 | 3.5.7 Make utility | ||
437 | ------------------ | ||
438 | |||
439 | :: | ||
440 | |||
441 | # cd /moxa/mxser/utility | ||
442 | # make clean; make install | ||
443 | |||
444 | 3.5.8 Reboot | ||
445 | ------------ | ||
446 | |||
447 | |||
448 | |||
449 | 3.6 Custom configuration | ||
450 | ======================== | ||
451 | |||
452 | Although this driver already provides you default configuration, you | ||
453 | still can change the device name and major number. The instruction to | ||
454 | change these parameters are shown as below. | ||
455 | |||
456 | a. Change Device name | ||
457 | |||
458 | If you'd like to use other device names instead of default naming | ||
459 | convention, all you have to do is to modify the internal code | ||
460 | within the shell script "msmknod". First, you have to open "msmknod" | ||
461 | by vi. Locate each line contains "ttyM" and "cum" and change them | ||
462 | to the device name you desired. "msmknod" creates the device names | ||
463 | you need next time executed. | ||
464 | |||
465 | b. Change Major number | ||
466 | |||
467 | If major number 30 and 35 had been occupied, you may have to select | ||
468 | 2 free major numbers for this driver. There are 3 steps to change | ||
469 | major numbers. | ||
470 | |||
471 | 3.6.1 Find free major numbers | ||
472 | ----------------------------- | ||
473 | |||
474 | In /proc/devices, you may find all the major numbers occupied | ||
475 | in the system. Please select 2 major numbers that are available. | ||
476 | e.g. 40, 45. | ||
477 | |||
478 | 3.6.2 Create special files | ||
479 | -------------------------- | ||
480 | |||
481 | Run /moxa/mxser/driver/msmknod to create special files with | ||
482 | specified major numbers. | ||
483 | |||
484 | 3.6.3 Modify driver with new major number | ||
485 | ----------------------------------------- | ||
486 | |||
487 | Run vi to open /moxa/mxser/driver/mxser.c. Locate the line | ||
488 | contains "MXSERMAJOR". Change the content as below:: | ||
489 | |||
490 | #define MXSERMAJOR 40 | ||
491 | #define MXSERCUMAJOR 45 | ||
492 | |||
493 | 3.6.4 Run "make clean; make install" in /moxa/mxser/driver. | ||
494 | |||
495 | 3.7 Verify driver installation | ||
496 | ============================== | ||
497 | |||
498 | You may refer to /var/log/messages to check the latest status | ||
499 | log reported by this driver whenever it's activated. | ||
500 | |||
501 | 4. Utilities | ||
502 | ^^^^^^^^^^^^ | ||
503 | |||
504 | There are 3 utilities contained in this driver. They are msdiag, msmon and | ||
505 | msterm. These 3 utilities are released in form of source code. They should | ||
506 | be compiled into executable file and copied into /usr/bin. | ||
507 | |||
508 | Before using these utilities, please load driver (refer 3.4 & 3.5) and | ||
509 | make sure you had run the "msmknod" utility. | ||
510 | |||
511 | msdiag - Diagnostic | ||
512 | =================== | ||
513 | |||
514 | This utility provides the function to display what Moxa Smartio/Industio | ||
515 | board found by driver in the system. | ||
516 | |||
517 | msmon - Port Monitoring | ||
518 | ======================= | ||
519 | |||
520 | This utility gives the user a quick view about all the MOXA ports' | ||
521 | activities. One can easily learn each port's total received/transmitted | ||
522 | (Rx/Tx) character count since the time when the monitoring is started. | ||
523 | |||
524 | Rx/Tx throughputs per second are also reported in interval basis (e.g. | ||
525 | the last 5 seconds) and in average basis (since the time the monitoring | ||
526 | is started). You can reset all ports' count by <HOME> key. <+> <-> | ||
527 | (plus/minus) keys to change the displaying time interval. Press <ENTER> | ||
528 | on the port, that cursor stay, to view the port's communication | ||
529 | parameters, signal status, and input/output queue. | ||
530 | |||
531 | msterm - Terminal Emulation | ||
532 | =========================== | ||
533 | |||
534 | This utility provides data sending and receiving ability of all tty ports, | ||
535 | especially for MOXA ports. It is quite useful for testing simple | ||
536 | application, for example, sending AT command to a modem connected to the | ||
537 | port or used as a terminal for login purpose. Note that this is only a | ||
538 | dumb terminal emulation without handling full screen operation. | ||
539 | |||
540 | 5. Setserial | ||
541 | ^^^^^^^^^^^^ | ||
542 | |||
543 | Supported Setserial parameters are listed as below. | ||
544 | |||
545 | ============== ========================================================= | ||
546 | uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO) | ||
547 | close_delay set the amount of time(in 1/100 of a second) that DTR | ||
548 | should be kept low while being closed. | ||
549 | closing_wait set the amount of time(in 1/100 of a second) that the | ||
550 | serial port should wait for data to be drained while | ||
551 | being closed, before the receiver is disable. | ||
552 | spd_hi Use 57.6kb when the application requests 38.4kb. | ||
553 | spd_vhi Use 115.2kb when the application requests 38.4kb. | ||
554 | spd_shi Use 230.4kb when the application requests 38.4kb. | ||
555 | spd_warp Use 460.8kb when the application requests 38.4kb. | ||
556 | spd_normal Use 38.4kb when the application requests 38.4kb. | ||
557 | spd_cust Use the custom divisor to set the speed when the | ||
558 | application requests 38.4kb. | ||
559 | divisor This option set the custom division. | ||
560 | baud_base This option set the base baud rate. | ||
561 | ============== ========================================================= | ||
562 | |||
563 | 6. Troubleshooting | ||
564 | ^^^^^^^^^^^^^^^^^^ | ||
565 | |||
566 | The boot time error messages and solutions are stated as clearly as | ||
567 | possible. If all the possible solutions fail, please contact our technical | ||
568 | support team to get more help. | ||
569 | |||
570 | |||
571 | Error msg: | ||
572 | More than 4 Moxa Smartio/Industio family boards found. Fifth board | ||
573 | and after are ignored. | ||
574 | |||
575 | Solution: | ||
576 | To avoid this problem, please unplug fifth and after board, because Moxa | ||
577 | driver supports up to 4 boards. | ||
578 | |||
579 | Error msg: | ||
580 | Request_irq fail, IRQ(?) may be conflict with another device. | ||
581 | |||
582 | Solution: | ||
583 | Other PCI or ISA devices occupy the assigned IRQ. If you are not sure | ||
584 | which device causes the situation, please check /proc/interrupts to find | ||
585 | free IRQ and simply change another free IRQ for Moxa board. | ||
586 | |||
587 | Error msg: | ||
588 | Board #: C1xx Series(CAP=xxx) interrupt number invalid. | ||
589 | |||
590 | Solution: | ||
591 | Each port within the same multiport board shares the same IRQ. Please set | ||
592 | one IRQ (IRQ doesn't equal to zero) for one Moxa board. | ||
593 | |||
594 | Error msg: | ||
595 | No interrupt vector be set for Moxa ISA board(CAP=xxx). | ||
596 | |||
597 | Solution: | ||
598 | Moxa ISA board needs an interrupt vector.Please refer to user's manual | ||
599 | "Hardware Installation" chapter to set interrupt vector. | ||
600 | |||
601 | Error msg: | ||
602 | Couldn't install MOXA Smartio/Industio family driver! | ||
603 | |||
604 | Solution: | ||
605 | Load Moxa driver fail, the major number may conflict with other devices. | ||
606 | Please refer to previous section 3.7 to change a free major number for | ||
607 | Moxa driver. | ||
608 | |||
609 | Error msg: | ||
610 | Couldn't install MOXA Smartio/Industio family callout driver! | ||
611 | |||
612 | Solution: | ||
613 | Load Moxa callout driver fail, the callout device major number may | ||
614 | conflict with other devices. Please refer to previous section 3.7 to | ||
615 | change a free callout device major number for Moxa driver. | ||
diff --git a/Documentation/driver-api/serial/n_gsm.rst b/Documentation/driver-api/serial/n_gsm.rst new file mode 100644 index 000000000000..f3ad9fd26408 --- /dev/null +++ b/Documentation/driver-api/serial/n_gsm.rst | |||
@@ -0,0 +1,103 @@ | |||
1 | ============================== | ||
2 | GSM 0710 tty multiplexor HOWTO | ||
3 | ============================== | ||
4 | |||
5 | This line discipline implements the GSM 07.10 multiplexing protocol | ||
6 | detailed in the following 3GPP document: | ||
7 | |||
8 | http://www.3gpp.org/ftp/Specs/archive/07_series/07.10/0710-720.zip | ||
9 | |||
10 | This document give some hints on how to use this driver with GPRS and 3G | ||
11 | modems connected to a physical serial port. | ||
12 | |||
13 | How to use it | ||
14 | ------------- | ||
15 | 1. initialize the modem in 0710 mux mode (usually AT+CMUX= command) through | ||
16 | its serial port. Depending on the modem used, you can pass more or less | ||
17 | parameters to this command, | ||
18 | 2. switch the serial line to using the n_gsm line discipline by using | ||
19 | TIOCSETD ioctl, | ||
20 | 3. configure the mux using GSMIOC_GETCONF / GSMIOC_SETCONF ioctl, | ||
21 | |||
22 | Major parts of the initialization program : | ||
23 | (a good starting point is util-linux-ng/sys-utils/ldattach.c):: | ||
24 | |||
25 | #include <linux/gsmmux.h> | ||
26 | #define N_GSM0710 21 /* GSM 0710 Mux */ | ||
27 | #define DEFAULT_SPEED B115200 | ||
28 | #define SERIAL_PORT /dev/ttyS0 | ||
29 | |||
30 | int ldisc = N_GSM0710; | ||
31 | struct gsm_config c; | ||
32 | struct termios configuration; | ||
33 | |||
34 | /* open the serial port connected to the modem */ | ||
35 | fd = open(SERIAL_PORT, O_RDWR | O_NOCTTY | O_NDELAY); | ||
36 | |||
37 | /* configure the serial port : speed, flow control ... */ | ||
38 | |||
39 | /* send the AT commands to switch the modem to CMUX mode | ||
40 | and check that it's successful (should return OK) */ | ||
41 | write(fd, "AT+CMUX=0\r", 10); | ||
42 | |||
43 | /* experience showed that some modems need some time before | ||
44 | being able to answer to the first MUX packet so a delay | ||
45 | may be needed here in some case */ | ||
46 | sleep(3); | ||
47 | |||
48 | /* use n_gsm line discipline */ | ||
49 | ioctl(fd, TIOCSETD, &ldisc); | ||
50 | |||
51 | /* get n_gsm configuration */ | ||
52 | ioctl(fd, GSMIOC_GETCONF, &c); | ||
53 | /* we are initiator and need encoding 0 (basic) */ | ||
54 | c.initiator = 1; | ||
55 | c.encapsulation = 0; | ||
56 | /* our modem defaults to a maximum size of 127 bytes */ | ||
57 | c.mru = 127; | ||
58 | c.mtu = 127; | ||
59 | /* set the new configuration */ | ||
60 | ioctl(fd, GSMIOC_SETCONF, &c); | ||
61 | |||
62 | /* and wait for ever to keep the line discipline enabled */ | ||
63 | daemon(0,0); | ||
64 | pause(); | ||
65 | |||
66 | 4. create the devices corresponding to the "virtual" serial ports (take care, | ||
67 | each modem has its configuration and some DLC have dedicated functions, | ||
68 | for example GPS), starting with minor 1 (DLC0 is reserved for the management | ||
69 | of the mux):: | ||
70 | |||
71 | MAJOR=`cat /proc/devices |grep gsmtty | awk '{print $1}` | ||
72 | for i in `seq 1 4`; do | ||
73 | mknod /dev/ttygsm$i c $MAJOR $i | ||
74 | done | ||
75 | |||
76 | 5. use these devices as plain serial ports. | ||
77 | |||
78 | for example, it's possible: | ||
79 | |||
80 | - and to use gnokii to send / receive SMS on ttygsm1 | ||
81 | - to use ppp to establish a datalink on ttygsm2 | ||
82 | |||
83 | 6. first close all virtual ports before closing the physical port. | ||
84 | |||
85 | Note that after closing the physical port the modem is still in multiplexing | ||
86 | mode. This may prevent a successful re-opening of the port later. To avoid | ||
87 | this situation either reset the modem if your hardware allows that or send | ||
88 | a disconnect command frame manually before initializing the multiplexing mode | ||
89 | for the second time. The byte sequence for the disconnect command frame is:: | ||
90 | |||
91 | 0xf9, 0x03, 0xef, 0x03, 0xc3, 0x16, 0xf9. | ||
92 | |||
93 | Additional Documentation | ||
94 | ------------------------ | ||
95 | More practical details on the protocol and how it's supported by industrial | ||
96 | modems can be found in the following documents : | ||
97 | |||
98 | - http://www.telit.com/module/infopool/download.php?id=616 | ||
99 | - http://www.u-blox.com/images/downloads/Product_Docs/LEON-G100-G200-MuxImplementation_ApplicationNote_%28GSM%20G1-CS-10002%29.pdf | ||
100 | - http://www.sierrawireless.com/Support/Downloads/AirPrime/WMP_Series/~/media/Support_Downloads/AirPrime/Application_notes/CMUX_Feature_Application_Note-Rev004.ashx | ||
101 | - http://wm.sim.com/sim/News/photo/2010721161442.pdf | ||
102 | |||
103 | 11-03-08 - Eric Bénard - <eric@eukrea.com> | ||
diff --git a/Documentation/driver-api/serial/rocket.rst b/Documentation/driver-api/serial/rocket.rst new file mode 100644 index 000000000000..23761eae4282 --- /dev/null +++ b/Documentation/driver-api/serial/rocket.rst | |||
@@ -0,0 +1,185 @@ | |||
1 | ================================================ | ||
2 | Comtrol(tm) RocketPort(R)/RocketModem(TM) Series | ||
3 | ================================================ | ||
4 | |||
5 | Device Driver for the Linux Operating System | ||
6 | ============================================ | ||
7 | |||
8 | Product overview | ||
9 | ---------------- | ||
10 | |||
11 | This driver provides a loadable kernel driver for the Comtrol RocketPort | ||
12 | and RocketModem PCI boards. These boards provide, 2, 4, 8, 16, or 32 | ||
13 | high-speed serial ports or modems. This driver supports up to a combination | ||
14 | of four RocketPort or RocketModems boards in one machine simultaneously. | ||
15 | This file assumes that you are using the RocketPort driver which is | ||
16 | integrated into the kernel sources. | ||
17 | |||
18 | The driver can also be installed as an external module using the usual | ||
19 | "make;make install" routine. This external module driver, obtainable | ||
20 | from the Comtrol website listed below, is useful for updating the driver | ||
21 | or installing it into kernels which do not have the driver configured | ||
22 | into them. Installations instructions for the external module | ||
23 | are in the included README and HW_INSTALL files. | ||
24 | |||
25 | RocketPort ISA and RocketModem II PCI boards currently are only supported by | ||
26 | this driver in module form. | ||
27 | |||
28 | The RocketPort ISA board requires I/O ports to be configured by the DIP | ||
29 | switches on the board. See the section "ISA Rocketport Boards" below for | ||
30 | information on how to set the DIP switches. | ||
31 | |||
32 | You pass the I/O port to the driver using the following module parameters: | ||
33 | |||
34 | board1: | ||
35 | I/O port for the first ISA board | ||
36 | board2: | ||
37 | I/O port for the second ISA board | ||
38 | board3: | ||
39 | I/O port for the third ISA board | ||
40 | board4: | ||
41 | I/O port for the fourth ISA board | ||
42 | |||
43 | There is a set of utilities and scripts provided with the external driver | ||
44 | (downloadable from http://www.comtrol.com) that ease the configuration and | ||
45 | setup of the ISA cards. | ||
46 | |||
47 | The RocketModem II PCI boards require firmware to be loaded into the card | ||
48 | before it will function. The driver has only been tested as a module for this | ||
49 | board. | ||
50 | |||
51 | Installation Procedures | ||
52 | ----------------------- | ||
53 | |||
54 | RocketPort/RocketModem PCI cards require no driver configuration, they are | ||
55 | automatically detected and configured. | ||
56 | |||
57 | The RocketPort driver can be installed as a module (recommended) or built | ||
58 | into the kernel. This is selected, as for other drivers, through the `make config` | ||
59 | command from the root of the Linux source tree during the kernel build process. | ||
60 | |||
61 | The RocketPort/RocketModem serial ports installed by this driver are assigned | ||
62 | device major number 46, and will be named /dev/ttyRx, where x is the port number | ||
63 | starting at zero (ex. /dev/ttyR0, /devttyR1, ...). If you have multiple cards | ||
64 | installed in the system, the mapping of port names to serial ports is displayed | ||
65 | in the system log at /var/log/messages. | ||
66 | |||
67 | If installed as a module, the module must be loaded. This can be done | ||
68 | manually by entering "modprobe rocket". To have the module loaded automatically | ||
69 | upon system boot, edit a `/etc/modprobe.d/*.conf` file and add the line | ||
70 | "alias char-major-46 rocket". | ||
71 | |||
72 | In order to use the ports, their device names (nodes) must be created with mknod. | ||
73 | This is only required once, the system will retain the names once created. To | ||
74 | create the RocketPort/RocketModem device names, use the command | ||
75 | "mknod /dev/ttyRx c 46 x" where x is the port number starting at zero. | ||
76 | |||
77 | For example:: | ||
78 | |||
79 | > mknod /dev/ttyR0 c 46 0 | ||
80 | > mknod /dev/ttyR1 c 46 1 | ||
81 | > mknod /dev/ttyR2 c 46 2 | ||
82 | |||
83 | The Linux script MAKEDEV will create the first 16 ttyRx device names (nodes) | ||
84 | for you:: | ||
85 | |||
86 | >/dev/MAKEDEV ttyR | ||
87 | |||
88 | ISA Rocketport Boards | ||
89 | --------------------- | ||
90 | |||
91 | You must assign and configure the I/O addresses used by the ISA Rocketport | ||
92 | card before installing and using it. This is done by setting a set of DIP | ||
93 | switches on the Rocketport board. | ||
94 | |||
95 | |||
96 | Setting the I/O address | ||
97 | ----------------------- | ||
98 | |||
99 | Before installing RocketPort(R) or RocketPort RA boards, you must find | ||
100 | a range of I/O addresses for it to use. The first RocketPort card | ||
101 | requires a 68-byte contiguous block of I/O addresses, starting at one | ||
102 | of the following: 0x100h, 0x140h, 0x180h, 0x200h, 0x240h, 0x280h, | ||
103 | 0x300h, 0x340h, 0x380h. This I/O address must be reflected in the DIP | ||
104 | switches of *all* of the Rocketport cards. | ||
105 | |||
106 | The second, third, and fourth RocketPort cards require a 64-byte | ||
107 | contiguous block of I/O addresses, starting at one of the following | ||
108 | I/O addresses: 0x100h, 0x140h, 0x180h, 0x1C0h, 0x200h, 0x240h, 0x280h, | ||
109 | 0x2C0h, 0x300h, 0x340h, 0x380h, 0x3C0h. The I/O address used by the | ||
110 | second, third, and fourth Rocketport cards (if present) are set via | ||
111 | software control. The DIP switch settings for the I/O address must be | ||
112 | set to the value of the first Rocketport cards. | ||
113 | |||
114 | In order to distinguish each of the card from the others, each card | ||
115 | must have a unique board ID set on the dip switches. The first | ||
116 | Rocketport board must be set with the DIP switches corresponding to | ||
117 | the first board, the second board must be set with the DIP switches | ||
118 | corresponding to the second board, etc. IMPORTANT: The board ID is | ||
119 | the only place where the DIP switch settings should differ between the | ||
120 | various Rocketport boards in a system. | ||
121 | |||
122 | The I/O address range used by any of the RocketPort cards must not | ||
123 | conflict with any other cards in the system, including other | ||
124 | RocketPort cards. Below, you will find a list of commonly used I/O | ||
125 | address ranges which may be in use by other devices in your system. | ||
126 | On a Linux system, "cat /proc/ioports" will also be helpful in | ||
127 | identifying what I/O addresses are being used by devices on your | ||
128 | system. | ||
129 | |||
130 | Remember, the FIRST RocketPort uses 68 I/O addresses. So, if you set it | ||
131 | for 0x100, it will occupy 0x100 to 0x143. This would mean that you | ||
132 | CAN NOT set the second, third or fourth board for address 0x140 since | ||
133 | the first 4 bytes of that range are used by the first board. You would | ||
134 | need to set the second, third, or fourth board to one of the next available | ||
135 | blocks such as 0x180. | ||
136 | |||
137 | RocketPort and RocketPort RA SW1 Settings:: | ||
138 | |||
139 | +-------------------------------+ | ||
140 | | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | | ||
141 | +-------+-------+---------------+ | ||
142 | | Unused| Card | I/O Port Block| | ||
143 | +-------------------------------+ | ||
144 | |||
145 | DIP Switches DIP Switches | ||
146 | 7 8 6 5 | ||
147 | =================== =================== | ||
148 | On On UNUSED, MUST BE ON. On On First Card <==== Default | ||
149 | On Off Second Card | ||
150 | Off On Third Card | ||
151 | Off Off Fourth Card | ||
152 | |||
153 | DIP Switches I/O Address Range | ||
154 | 4 3 2 1 Used by the First Card | ||
155 | ===================================== | ||
156 | On Off On Off 100-143 | ||
157 | On Off Off On 140-183 | ||
158 | On Off Off Off 180-1C3 <==== Default | ||
159 | Off On On Off 200-243 | ||
160 | Off On Off On 240-283 | ||
161 | Off On Off Off 280-2C3 | ||
162 | Off Off On Off 300-343 | ||
163 | Off Off Off On 340-383 | ||
164 | Off Off Off Off 380-3C3 | ||
165 | |||
166 | Reporting Bugs | ||
167 | -------------- | ||
168 | |||
169 | For technical support, please provide the following | ||
170 | information: Driver version, kernel release, distribution of | ||
171 | kernel, and type of board you are using. Error messages and log | ||
172 | printouts port configuration details are especially helpful. | ||
173 | |||
174 | USA: | ||
175 | :Phone: (612) 494-4100 | ||
176 | :FAX: (612) 494-4199 | ||
177 | :email: support@comtrol.com | ||
178 | |||
179 | Comtrol Europe: | ||
180 | :Phone: +44 (0) 1 869 323-220 | ||
181 | :FAX: +44 (0) 1 869 323-211 | ||
182 | :email: support@comtrol.co.uk | ||
183 | |||
184 | Web: http://www.comtrol.com | ||
185 | FTP: ftp.comtrol.com | ||
diff --git a/Documentation/driver-api/serial/serial-iso7816.rst b/Documentation/driver-api/serial/serial-iso7816.rst new file mode 100644 index 000000000000..d990143de0c6 --- /dev/null +++ b/Documentation/driver-api/serial/serial-iso7816.rst | |||
@@ -0,0 +1,90 @@ | |||
1 | ============================= | ||
2 | ISO7816 Serial Communications | ||
3 | ============================= | ||
4 | |||
5 | 1. Introduction | ||
6 | =============== | ||
7 | |||
8 | ISO/IEC7816 is a series of standards specifying integrated circuit cards (ICC) | ||
9 | also known as smart cards. | ||
10 | |||
11 | 2. Hardware-related considerations | ||
12 | ================================== | ||
13 | |||
14 | Some CPUs/UARTs (e.g., Microchip AT91) contain a built-in mode capable of | ||
15 | handling communication with a smart card. | ||
16 | |||
17 | For these microcontrollers, the Linux driver should be made capable of | ||
18 | working in both modes, and proper ioctls (see later) should be made | ||
19 | available at user-level to allow switching from one mode to the other, and | ||
20 | vice versa. | ||
21 | |||
22 | 3. Data Structures Already Available in the Kernel | ||
23 | ================================================== | ||
24 | |||
25 | The Linux kernel provides the serial_iso7816 structure (see [1]) to handle | ||
26 | ISO7816 communications. This data structure is used to set and configure | ||
27 | ISO7816 parameters in ioctls. | ||
28 | |||
29 | Any driver for devices capable of working both as RS232 and ISO7816 should | ||
30 | implement the iso7816_config callback in the uart_port structure. The | ||
31 | serial_core calls iso7816_config to do the device specific part in response | ||
32 | to TIOCGISO7816 and TIOCSISO7816 ioctls (see below). The iso7816_config | ||
33 | callback receives a pointer to struct serial_iso7816. | ||
34 | |||
35 | 4. Usage from user-level | ||
36 | ======================== | ||
37 | |||
38 | From user-level, ISO7816 configuration can be get/set using the previous | ||
39 | ioctls. For instance, to set ISO7816 you can use the following code:: | ||
40 | |||
41 | #include <linux/serial.h> | ||
42 | |||
43 | /* Include definition for ISO7816 ioctls: TIOCSISO7816 and TIOCGISO7816 */ | ||
44 | #include <sys/ioctl.h> | ||
45 | |||
46 | /* Open your specific device (e.g., /dev/mydevice): */ | ||
47 | int fd = open ("/dev/mydevice", O_RDWR); | ||
48 | if (fd < 0) { | ||
49 | /* Error handling. See errno. */ | ||
50 | } | ||
51 | |||
52 | struct serial_iso7816 iso7816conf; | ||
53 | |||
54 | /* Reserved fields as to be zeroed */ | ||
55 | memset(&iso7816conf, 0, sizeof(iso7816conf)); | ||
56 | |||
57 | /* Enable ISO7816 mode: */ | ||
58 | iso7816conf.flags |= SER_ISO7816_ENABLED; | ||
59 | |||
60 | /* Select the protocol: */ | ||
61 | /* T=0 */ | ||
62 | iso7816conf.flags |= SER_ISO7816_T(0); | ||
63 | /* or T=1 */ | ||
64 | iso7816conf.flags |= SER_ISO7816_T(1); | ||
65 | |||
66 | /* Set the guard time: */ | ||
67 | iso7816conf.tg = 2; | ||
68 | |||
69 | /* Set the clock frequency*/ | ||
70 | iso7816conf.clk = 3571200; | ||
71 | |||
72 | /* Set transmission factors: */ | ||
73 | iso7816conf.sc_fi = 372; | ||
74 | iso7816conf.sc_di = 1; | ||
75 | |||
76 | if (ioctl(fd_usart, TIOCSISO7816, &iso7816conf) < 0) { | ||
77 | /* Error handling. See errno. */ | ||
78 | } | ||
79 | |||
80 | /* Use read() and write() syscalls here... */ | ||
81 | |||
82 | /* Close the device when finished: */ | ||
83 | if (close (fd) < 0) { | ||
84 | /* Error handling. See errno. */ | ||
85 | } | ||
86 | |||
87 | 5. References | ||
88 | ============= | ||
89 | |||
90 | [1] include/uapi/linux/serial.h | ||
diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst new file mode 100644 index 000000000000..6bc824f948f9 --- /dev/null +++ b/Documentation/driver-api/serial/serial-rs485.rst | |||
@@ -0,0 +1,103 @@ | |||
1 | =========================== | ||
2 | RS485 Serial Communications | ||
3 | =========================== | ||
4 | |||
5 | 1. Introduction | ||
6 | =============== | ||
7 | |||
8 | EIA-485, also known as TIA/EIA-485 or RS-485, is a standard defining the | ||
9 | electrical characteristics of drivers and receivers for use in balanced | ||
10 | digital multipoint systems. | ||
11 | This standard is widely used for communications in industrial automation | ||
12 | because it can be used effectively over long distances and in electrically | ||
13 | noisy environments. | ||
14 | |||
15 | 2. Hardware-related Considerations | ||
16 | ================================== | ||
17 | |||
18 | Some CPUs/UARTs (e.g., Atmel AT91 or 16C950 UART) contain a built-in | ||
19 | half-duplex mode capable of automatically controlling line direction by | ||
20 | toggling RTS or DTR signals. That can be used to control external | ||
21 | half-duplex hardware like an RS485 transceiver or any RS232-connected | ||
22 | half-duplex devices like some modems. | ||
23 | |||
24 | For these microcontrollers, the Linux driver should be made capable of | ||
25 | working in both modes, and proper ioctls (see later) should be made | ||
26 | available at user-level to allow switching from one mode to the other, and | ||
27 | vice versa. | ||
28 | |||
29 | 3. Data Structures Already Available in the Kernel | ||
30 | ================================================== | ||
31 | |||
32 | The Linux kernel provides the serial_rs485 structure (see [1]) to handle | ||
33 | RS485 communications. This data structure is used to set and configure RS485 | ||
34 | parameters in the platform data and in ioctls. | ||
35 | |||
36 | The device tree can also provide RS485 boot time parameters (see [2] | ||
37 | for bindings). The driver is in charge of filling this data structure from | ||
38 | the values given by the device tree. | ||
39 | |||
40 | Any driver for devices capable of working both as RS232 and RS485 should | ||
41 | implement the rs485_config callback in the uart_port structure. The | ||
42 | serial_core calls rs485_config to do the device specific part in response | ||
43 | to TIOCSRS485 and TIOCGRS485 ioctls (see below). The rs485_config callback | ||
44 | receives a pointer to struct serial_rs485. | ||
45 | |||
46 | 4. Usage from user-level | ||
47 | ======================== | ||
48 | |||
49 | From user-level, RS485 configuration can be get/set using the previous | ||
50 | ioctls. For instance, to set RS485 you can use the following code:: | ||
51 | |||
52 | #include <linux/serial.h> | ||
53 | |||
54 | /* Include definition for RS485 ioctls: TIOCGRS485 and TIOCSRS485 */ | ||
55 | #include <sys/ioctl.h> | ||
56 | |||
57 | /* Open your specific device (e.g., /dev/mydevice): */ | ||
58 | int fd = open ("/dev/mydevice", O_RDWR); | ||
59 | if (fd < 0) { | ||
60 | /* Error handling. See errno. */ | ||
61 | } | ||
62 | |||
63 | struct serial_rs485 rs485conf; | ||
64 | |||
65 | /* Enable RS485 mode: */ | ||
66 | rs485conf.flags |= SER_RS485_ENABLED; | ||
67 | |||
68 | /* Set logical level for RTS pin equal to 1 when sending: */ | ||
69 | rs485conf.flags |= SER_RS485_RTS_ON_SEND; | ||
70 | /* or, set logical level for RTS pin equal to 0 when sending: */ | ||
71 | rs485conf.flags &= ~(SER_RS485_RTS_ON_SEND); | ||
72 | |||
73 | /* Set logical level for RTS pin equal to 1 after sending: */ | ||
74 | rs485conf.flags |= SER_RS485_RTS_AFTER_SEND; | ||
75 | /* or, set logical level for RTS pin equal to 0 after sending: */ | ||
76 | rs485conf.flags &= ~(SER_RS485_RTS_AFTER_SEND); | ||
77 | |||
78 | /* Set rts delay before send, if needed: */ | ||
79 | rs485conf.delay_rts_before_send = ...; | ||
80 | |||
81 | /* Set rts delay after send, if needed: */ | ||
82 | rs485conf.delay_rts_after_send = ...; | ||
83 | |||
84 | /* Set this flag if you want to receive data even while sending data */ | ||
85 | rs485conf.flags |= SER_RS485_RX_DURING_TX; | ||
86 | |||
87 | if (ioctl (fd, TIOCSRS485, &rs485conf) < 0) { | ||
88 | /* Error handling. See errno. */ | ||
89 | } | ||
90 | |||
91 | /* Use read() and write() syscalls here... */ | ||
92 | |||
93 | /* Close the device when finished: */ | ||
94 | if (close (fd) < 0) { | ||
95 | /* Error handling. See errno. */ | ||
96 | } | ||
97 | |||
98 | 5. References | ||
99 | ============= | ||
100 | |||
101 | [1] include/uapi/linux/serial.h | ||
102 | |||
103 | [2] Documentation/devicetree/bindings/serial/rs485.txt | ||
diff --git a/Documentation/driver-api/serial/tty.rst b/Documentation/driver-api/serial/tty.rst new file mode 100644 index 000000000000..dd972caacf3e --- /dev/null +++ b/Documentation/driver-api/serial/tty.rst | |||
@@ -0,0 +1,328 @@ | |||
1 | ================= | ||
2 | The Lockronomicon | ||
3 | ================= | ||
4 | |||
5 | Your guide to the ancient and twisted locking policies of the tty layer and | ||
6 | the warped logic behind them. Beware all ye who read on. | ||
7 | |||
8 | |||
9 | Line Discipline | ||
10 | --------------- | ||
11 | |||
12 | Line disciplines are registered with tty_register_ldisc() passing the | ||
13 | discipline number and the ldisc structure. At the point of registration the | ||
14 | discipline must be ready to use and it is possible it will get used before | ||
15 | the call returns success. If the call returns an error then it won't get | ||
16 | called. Do not re-use ldisc numbers as they are part of the userspace ABI | ||
17 | and writing over an existing ldisc will cause demons to eat your computer. | ||
18 | After the return the ldisc data has been copied so you may free your own | ||
19 | copy of the structure. You must not re-register over the top of the line | ||
20 | discipline even with the same data or your computer again will be eaten by | ||
21 | demons. | ||
22 | |||
23 | In order to remove a line discipline call tty_unregister_ldisc(). | ||
24 | In ancient times this always worked. In modern times the function will | ||
25 | return -EBUSY if the ldisc is currently in use. Since the ldisc referencing | ||
26 | code manages the module counts this should not usually be a concern. | ||
27 | |||
28 | Heed this warning: the reference count field of the registered copies of the | ||
29 | tty_ldisc structure in the ldisc table counts the number of lines using this | ||
30 | discipline. The reference count of the tty_ldisc structure within a tty | ||
31 | counts the number of active users of the ldisc at this instant. In effect it | ||
32 | counts the number of threads of execution within an ldisc method (plus those | ||
33 | about to enter and exit although this detail matters not). | ||
34 | |||
35 | Line Discipline Methods | ||
36 | ----------------------- | ||
37 | |||
38 | TTY side interfaces | ||
39 | ^^^^^^^^^^^^^^^^^^^ | ||
40 | |||
41 | ======================= ======================================================= | ||
42 | open() Called when the line discipline is attached to | ||
43 | the terminal. No other call into the line | ||
44 | discipline for this tty will occur until it | ||
45 | completes successfully. Should initialize any | ||
46 | state needed by the ldisc, and set receive_room | ||
47 | in the tty_struct to the maximum amount of data | ||
48 | the line discipline is willing to accept from the | ||
49 | driver with a single call to receive_buf(). | ||
50 | Returning an error will prevent the ldisc from | ||
51 | being attached. Can sleep. | ||
52 | |||
53 | close() This is called on a terminal when the line | ||
54 | discipline is being unplugged. At the point of | ||
55 | execution no further users will enter the | ||
56 | ldisc code for this tty. Can sleep. | ||
57 | |||
58 | hangup() Called when the tty line is hung up. | ||
59 | The line discipline should cease I/O to the tty. | ||
60 | No further calls into the ldisc code will occur. | ||
61 | The return value is ignored. Can sleep. | ||
62 | |||
63 | read() (optional) A process requests reading data from | ||
64 | the line. Multiple read calls may occur in parallel | ||
65 | and the ldisc must deal with serialization issues. | ||
66 | If not defined, the process will receive an EIO | ||
67 | error. May sleep. | ||
68 | |||
69 | write() (optional) A process requests writing data to the | ||
70 | line. Multiple write calls are serialized by the | ||
71 | tty layer for the ldisc. If not defined, the | ||
72 | process will receive an EIO error. May sleep. | ||
73 | |||
74 | flush_buffer() (optional) May be called at any point between | ||
75 | open and close, and instructs the line discipline | ||
76 | to empty its input buffer. | ||
77 | |||
78 | set_termios() (optional) Called on termios structure changes. | ||
79 | The caller passes the old termios data and the | ||
80 | current data is in the tty. Called under the | ||
81 | termios semaphore so allowed to sleep. Serialized | ||
82 | against itself only. | ||
83 | |||
84 | poll() (optional) Check the status for the poll/select | ||
85 | calls. Multiple poll calls may occur in parallel. | ||
86 | May sleep. | ||
87 | |||
88 | ioctl() (optional) Called when an ioctl is handed to the | ||
89 | tty layer that might be for the ldisc. Multiple | ||
90 | ioctl calls may occur in parallel. May sleep. | ||
91 | |||
92 | compat_ioctl() (optional) Called when a 32 bit ioctl is handed | ||
93 | to the tty layer that might be for the ldisc. | ||
94 | Multiple ioctl calls may occur in parallel. | ||
95 | May sleep. | ||
96 | ======================= ======================================================= | ||
97 | |||
98 | Driver Side Interfaces | ||
99 | ^^^^^^^^^^^^^^^^^^^^^^ | ||
100 | |||
101 | ======================= ======================================================= | ||
102 | receive_buf() (optional) Called by the low-level driver to hand | ||
103 | a buffer of received bytes to the ldisc for | ||
104 | processing. The number of bytes is guaranteed not | ||
105 | to exceed the current value of tty->receive_room. | ||
106 | All bytes must be processed. | ||
107 | |||
108 | receive_buf2() (optional) Called by the low-level driver to hand | ||
109 | a buffer of received bytes to the ldisc for | ||
110 | processing. Returns the number of bytes processed. | ||
111 | |||
112 | If both receive_buf() and receive_buf2() are | ||
113 | defined, receive_buf2() should be preferred. | ||
114 | |||
115 | write_wakeup() May be called at any point between open and close. | ||
116 | The TTY_DO_WRITE_WAKEUP flag indicates if a call | ||
117 | is needed but always races versus calls. Thus the | ||
118 | ldisc must be careful about setting order and to | ||
119 | handle unexpected calls. Must not sleep. | ||
120 | |||
121 | The driver is forbidden from calling this directly | ||
122 | from the ->write call from the ldisc as the ldisc | ||
123 | is permitted to call the driver write method from | ||
124 | this function. In such a situation defer it. | ||
125 | |||
126 | dcd_change() Report to the tty line the current DCD pin status | ||
127 | changes and the relative timestamp. The timestamp | ||
128 | cannot be NULL. | ||
129 | ======================= ======================================================= | ||
130 | |||
131 | |||
132 | Driver Access | ||
133 | ^^^^^^^^^^^^^ | ||
134 | |||
135 | Line discipline methods can call the following methods of the underlying | ||
136 | hardware driver through the function pointers within the tty->driver | ||
137 | structure: | ||
138 | |||
139 | ======================= ======================================================= | ||
140 | write() Write a block of characters to the tty device. | ||
141 | Returns the number of characters accepted. The | ||
142 | character buffer passed to this method is already | ||
143 | in kernel space. | ||
144 | |||
145 | put_char() Queues a character for writing to the tty device. | ||
146 | If there is no room in the queue, the character is | ||
147 | ignored. | ||
148 | |||
149 | flush_chars() (Optional) If defined, must be called after | ||
150 | queueing characters with put_char() in order to | ||
151 | start transmission. | ||
152 | |||
153 | write_room() Returns the numbers of characters the tty driver | ||
154 | will accept for queueing to be written. | ||
155 | |||
156 | ioctl() Invoke device specific ioctl. | ||
157 | Expects data pointers to refer to userspace. | ||
158 | Returns ENOIOCTLCMD for unrecognized ioctl numbers. | ||
159 | |||
160 | set_termios() Notify the tty driver that the device's termios | ||
161 | settings have changed. New settings are in | ||
162 | tty->termios. Previous settings should be passed in | ||
163 | the "old" argument. | ||
164 | |||
165 | The API is defined such that the driver should return | ||
166 | the actual modes selected. This means that the | ||
167 | driver function is responsible for modifying any | ||
168 | bits in the request it cannot fulfill to indicate | ||
169 | the actual modes being used. A device with no | ||
170 | hardware capability for change (e.g. a USB dongle or | ||
171 | virtual port) can provide NULL for this method. | ||
172 | |||
173 | throttle() Notify the tty driver that input buffers for the | ||
174 | line discipline are close to full, and it should | ||
175 | somehow signal that no more characters should be | ||
176 | sent to the tty. | ||
177 | |||
178 | unthrottle() Notify the tty driver that characters can now be | ||
179 | sent to the tty without fear of overrunning the | ||
180 | input buffers of the line disciplines. | ||
181 | |||
182 | stop() Ask the tty driver to stop outputting characters | ||
183 | to the tty device. | ||
184 | |||
185 | start() Ask the tty driver to resume sending characters | ||
186 | to the tty device. | ||
187 | |||
188 | hangup() Ask the tty driver to hang up the tty device. | ||
189 | |||
190 | break_ctl() (Optional) Ask the tty driver to turn on or off | ||
191 | BREAK status on the RS-232 port. If state is -1, | ||
192 | then the BREAK status should be turned on; if | ||
193 | state is 0, then BREAK should be turned off. | ||
194 | If this routine is not implemented, use ioctls | ||
195 | TIOCSBRK / TIOCCBRK instead. | ||
196 | |||
197 | wait_until_sent() Waits until the device has written out all of the | ||
198 | characters in its transmitter FIFO. | ||
199 | |||
200 | send_xchar() Send a high-priority XON/XOFF character to the device. | ||
201 | ======================= ======================================================= | ||
202 | |||
203 | |||
204 | Flags | ||
205 | ^^^^^ | ||
206 | |||
207 | Line discipline methods have access to tty->flags field containing the | ||
208 | following interesting flags: | ||
209 | |||
210 | ======================= ======================================================= | ||
211 | TTY_THROTTLED Driver input is throttled. The ldisc should call | ||
212 | tty->driver->unthrottle() in order to resume | ||
213 | reception when it is ready to process more data. | ||
214 | |||
215 | TTY_DO_WRITE_WAKEUP If set, causes the driver to call the ldisc's | ||
216 | write_wakeup() method in order to resume | ||
217 | transmission when it can accept more data | ||
218 | to transmit. | ||
219 | |||
220 | TTY_IO_ERROR If set, causes all subsequent userspace read/write | ||
221 | calls on the tty to fail, returning -EIO. | ||
222 | |||
223 | TTY_OTHER_CLOSED Device is a pty and the other side has closed. | ||
224 | |||
225 | TTY_NO_WRITE_SPLIT Prevent driver from splitting up writes into | ||
226 | smaller chunks. | ||
227 | ======================= ======================================================= | ||
228 | |||
229 | |||
230 | Locking | ||
231 | ^^^^^^^ | ||
232 | |||
233 | Callers to the line discipline functions from the tty layer are required to | ||
234 | take line discipline locks. The same is true of calls from the driver side | ||
235 | but not yet enforced. | ||
236 | |||
237 | Three calls are now provided:: | ||
238 | |||
239 | ldisc = tty_ldisc_ref(tty); | ||
240 | |||
241 | takes a handle to the line discipline in the tty and returns it. If no ldisc | ||
242 | is currently attached or the ldisc is being closed and re-opened at this | ||
243 | point then NULL is returned. While this handle is held the ldisc will not | ||
244 | change or go away:: | ||
245 | |||
246 | tty_ldisc_deref(ldisc) | ||
247 | |||
248 | Returns the ldisc reference and allows the ldisc to be closed. Returning the | ||
249 | reference takes away your right to call the ldisc functions until you take | ||
250 | a new reference:: | ||
251 | |||
252 | ldisc = tty_ldisc_ref_wait(tty); | ||
253 | |||
254 | Performs the same function as tty_ldisc_ref except that it will wait for an | ||
255 | ldisc change to complete and then return a reference to the new ldisc. | ||
256 | |||
257 | While these functions are slightly slower than the old code they should have | ||
258 | minimal impact as most receive logic uses the flip buffers and they only | ||
259 | need to take a reference when they push bits up through the driver. | ||
260 | |||
261 | A caution: The ldisc->open(), ldisc->close() and driver->set_ldisc | ||
262 | functions are called with the ldisc unavailable. Thus tty_ldisc_ref will | ||
263 | fail in this situation if used within these functions. Ldisc and driver | ||
264 | code calling its own functions must be careful in this case. | ||
265 | |||
266 | |||
267 | Driver Interface | ||
268 | ---------------- | ||
269 | |||
270 | ======================= ======================================================= | ||
271 | open() Called when a device is opened. May sleep | ||
272 | |||
273 | close() Called when a device is closed. At the point of | ||
274 | return from this call the driver must make no | ||
275 | further ldisc calls of any kind. May sleep | ||
276 | |||
277 | write() Called to write bytes to the device. May not | ||
278 | sleep. May occur in parallel in special cases. | ||
279 | Because this includes panic paths drivers generally | ||
280 | shouldn't try and do clever locking here. | ||
281 | |||
282 | put_char() Stuff a single character onto the queue. The | ||
283 | driver is guaranteed following up calls to | ||
284 | flush_chars. | ||
285 | |||
286 | flush_chars() Ask the kernel to write put_char queue | ||
287 | |||
288 | write_room() Return the number of characters that can be stuffed | ||
289 | into the port buffers without overflow (or less). | ||
290 | The ldisc is responsible for being intelligent | ||
291 | about multi-threading of write_room/write calls | ||
292 | |||
293 | ioctl() Called when an ioctl may be for the driver | ||
294 | |||
295 | set_termios() Called on termios change, serialized against | ||
296 | itself by a semaphore. May sleep. | ||
297 | |||
298 | set_ldisc() Notifier for discipline change. At the point this | ||
299 | is done the discipline is not yet usable. Can now | ||
300 | sleep (I think) | ||
301 | |||
302 | throttle() Called by the ldisc to ask the driver to do flow | ||
303 | control. Serialization including with unthrottle | ||
304 | is the job of the ldisc layer. | ||
305 | |||
306 | unthrottle() Called by the ldisc to ask the driver to stop flow | ||
307 | control. | ||
308 | |||
309 | stop() Ldisc notifier to the driver to stop output. As with | ||
310 | throttle the serializations with start() are down | ||
311 | to the ldisc layer. | ||
312 | |||
313 | start() Ldisc notifier to the driver to start output. | ||
314 | |||
315 | hangup() Ask the tty driver to cause a hangup initiated | ||
316 | from the host side. [Can sleep ??] | ||
317 | |||
318 | break_ctl() Send RS232 break. Can sleep. Can get called in | ||
319 | parallel, driver must serialize (for now), and | ||
320 | with write calls. | ||
321 | |||
322 | wait_until_sent() Wait for characters to exit the hardware queue | ||
323 | of the driver. Can sleep | ||
324 | |||
325 | send_xchar() Send XON/XOFF and if possible jump the queue with | ||
326 | it in order to get fast flow control responses. | ||
327 | Cannot sleep ?? | ||
328 | ======================= ======================================================= | ||
diff --git a/Documentation/driver-api/sgi-ioc4.rst b/Documentation/driver-api/sgi-ioc4.rst new file mode 100644 index 000000000000..72709222d3c0 --- /dev/null +++ b/Documentation/driver-api/sgi-ioc4.rst | |||
@@ -0,0 +1,49 @@ | |||
1 | ==================================== | ||
2 | SGI IOC4 PCI (multi function) device | ||
3 | ==================================== | ||
4 | |||
5 | The SGI IOC4 PCI device is a bit of a strange beast, so some notes on | ||
6 | it are in order. | ||
7 | |||
8 | First, even though the IOC4 performs multiple functions, such as an | ||
9 | IDE controller, a serial controller, a PS/2 keyboard/mouse controller, | ||
10 | and an external interrupt mechanism, it's not implemented as a | ||
11 | multifunction device. The consequence of this from a software | ||
12 | standpoint is that all these functions share a single IRQ, and | ||
13 | they can't all register to own the same PCI device ID. To make | ||
14 | matters a bit worse, some of the register blocks (and even registers | ||
15 | themselves) present in IOC4 are mixed-purpose between these several | ||
16 | functions, meaning that there's no clear "owning" device driver. | ||
17 | |||
18 | The solution is to organize the IOC4 driver into several independent | ||
19 | drivers, "ioc4", "sgiioc4", and "ioc4_serial". Note that there is no | ||
20 | PS/2 controller driver as this functionality has never been wired up | ||
21 | on a shipping IO card. | ||
22 | |||
23 | ioc4 | ||
24 | ==== | ||
25 | This is the core (or shim) driver for IOC4. It is responsible for | ||
26 | initializing the basic functionality of the chip, and allocating | ||
27 | the PCI resources that are shared between the IOC4 functions. | ||
28 | |||
29 | This driver also provides registration functions that the other | ||
30 | IOC4 drivers can call to make their presence known. Each driver | ||
31 | needs to provide a probe and remove function, which are invoked | ||
32 | by the core driver at appropriate times. The interface of these | ||
33 | IOC4 function probe and remove operations isn't precisely the same | ||
34 | as PCI device probe and remove operations, but is logically the | ||
35 | same operation. | ||
36 | |||
37 | sgiioc4 | ||
38 | ======= | ||
39 | This is the IDE driver for IOC4. Its name isn't very descriptive | ||
40 | simply for historical reasons (it used to be the only IOC4 driver | ||
41 | component). There's not much to say about it other than it hooks | ||
42 | up to the ioc4 driver via the appropriate registration, probe, and | ||
43 | remove functions. | ||
44 | |||
45 | ioc4_serial | ||
46 | =========== | ||
47 | This is the serial driver for IOC4. There's not much to say about it | ||
48 | other than it hooks up to the ioc4 driver via the appropriate registration, | ||
49 | probe, and remove functions. | ||
diff --git a/Documentation/driver-api/sm501.rst b/Documentation/driver-api/sm501.rst new file mode 100644 index 000000000000..882507453ba4 --- /dev/null +++ b/Documentation/driver-api/sm501.rst | |||
@@ -0,0 +1,74 @@ | |||
1 | .. include:: <isonum.txt> | ||
2 | |||
3 | ============ | ||
4 | SM501 Driver | ||
5 | ============ | ||
6 | |||
7 | :Copyright: |copy| 2006, 2007 Simtec Electronics | ||
8 | |||
9 | The Silicon Motion SM501 multimedia companion chip is a multifunction device | ||
10 | which may provide numerous interfaces including USB host controller USB gadget, | ||
11 | asynchronous serial ports, audio functions, and a dual display video interface. | ||
12 | The device may be connected by PCI or local bus with varying functions enabled. | ||
13 | |||
14 | Core | ||
15 | ---- | ||
16 | |||
17 | The core driver in drivers/mfd provides common services for the | ||
18 | drivers which manage the specific hardware blocks. These services | ||
19 | include locking for common registers, clock control and resource | ||
20 | management. | ||
21 | |||
22 | The core registers drivers for both PCI and generic bus based | ||
23 | chips via the platform device and driver system. | ||
24 | |||
25 | On detection of a device, the core initialises the chip (which may | ||
26 | be specified by the platform data) and then exports the selected | ||
27 | peripheral set as platform devices for the specific drivers. | ||
28 | |||
29 | The core re-uses the platform device system as the platform device | ||
30 | system provides enough features to support the drivers without the | ||
31 | need to create a new bus-type and the associated code to go with it. | ||
32 | |||
33 | |||
34 | Resources | ||
35 | --------- | ||
36 | |||
37 | Each peripheral has a view of the device which is implicitly narrowed to | ||
38 | the specific set of resources that peripheral requires in order to | ||
39 | function correctly. | ||
40 | |||
41 | The centralised memory allocation allows the driver to ensure that the | ||
42 | maximum possible resource allocation can be made to the video subsystem | ||
43 | as this is by-far the most resource-sensitive of the on-chip functions. | ||
44 | |||
45 | The primary issue with memory allocation is that of moving the video | ||
46 | buffers once a display mode is chosen. Indeed when a video mode change | ||
47 | occurs the memory footprint of the video subsystem changes. | ||
48 | |||
49 | Since video memory is difficult to move without changing the display | ||
50 | (unless sufficient contiguous memory can be provided for the old and new | ||
51 | modes simultaneously) the video driver fully utilises the memory area | ||
52 | given to it by aligning fb0 to the start of the area and fb1 to the end | ||
53 | of it. Any memory left over in the middle is used for the acceleration | ||
54 | functions, which are transient and thus their location is less critical | ||
55 | as it can be moved. | ||
56 | |||
57 | |||
58 | Configuration | ||
59 | ------------- | ||
60 | |||
61 | The platform device driver uses a set of platform data to pass | ||
62 | configurations through to the core and the subsidiary drivers | ||
63 | so that there can be support for more than one system carrying | ||
64 | an SM501 built into a single kernel image. | ||
65 | |||
66 | The PCI driver assumes that the PCI card behaves as per the Silicon | ||
67 | Motion reference design. | ||
68 | |||
69 | There is an errata (AB-5) affecting the selection of the | ||
70 | of the M1XCLK and M1CLK frequencies. These two clocks | ||
71 | must be sourced from the same PLL, although they can then | ||
72 | be divided down individually. If this is not set, then SM501 may | ||
73 | lock and hang the whole system. The driver will refuse to | ||
74 | attach if the PLL selection is different. | ||
diff --git a/Documentation/driver-api/smsc_ece1099.rst b/Documentation/driver-api/smsc_ece1099.rst new file mode 100644 index 000000000000..079277421eaf --- /dev/null +++ b/Documentation/driver-api/smsc_ece1099.rst | |||
@@ -0,0 +1,60 @@ | |||
1 | ================================================= | ||
2 | Msc Keyboard Scan Expansion/GPIO Expansion device | ||
3 | ================================================= | ||
4 | |||
5 | What is smsc-ece1099? | ||
6 | ---------------------- | ||
7 | |||
8 | The ECE1099 is a 40-Pin 3.3V Keyboard Scan Expansion | ||
9 | or GPIO Expansion device. The device supports a keyboard | ||
10 | scan matrix of 23x8. The device is connected to a Master | ||
11 | via the SMSC BC-Link interface or via the SMBus. | ||
12 | Keypad scan Input(KSI) and Keypad Scan Output(KSO) signals | ||
13 | are multiplexed with GPIOs. | ||
14 | |||
15 | Interrupt generation | ||
16 | -------------------- | ||
17 | |||
18 | Interrupts can be generated by an edge detection on a GPIO | ||
19 | pin or an edge detection on one of the bus interface pins. | ||
20 | Interrupts can also be detected on the keyboard scan interface. | ||
21 | The bus interrupt pin (BC_INT# or SMBUS_INT#) is asserted if | ||
22 | any bit in one of the Interrupt Status registers is 1 and | ||
23 | the corresponding Interrupt Mask bit is also 1. | ||
24 | |||
25 | In order for software to determine which device is the source | ||
26 | of an interrupt, it should first read the Group Interrupt Status Register | ||
27 | to determine which Status register group is a source for the interrupt. | ||
28 | Software should read both the Status register and the associated Mask register, | ||
29 | then AND the two values together. Bits that are 1 in the result of the AND | ||
30 | are active interrupts. Software clears an interrupt by writing a 1 to the | ||
31 | corresponding bit in the Status register. | ||
32 | |||
33 | Communication Protocol | ||
34 | ---------------------- | ||
35 | |||
36 | - SMbus slave Interface | ||
37 | The host processor communicates with the ECE1099 device | ||
38 | through a series of read/write registers via the SMBus | ||
39 | interface. SMBus is a serial communication protocol between | ||
40 | a computer host and its peripheral devices. The SMBus data | ||
41 | rate is 10KHz minimum to 400 KHz maximum | ||
42 | |||
43 | - Slave Bus Interface | ||
44 | The ECE1099 device SMBus implementation is a subset of the | ||
45 | SMBus interface to the host. The device is a slave-only SMBus device. | ||
46 | The implementation in the device is a subset of SMBus since it | ||
47 | only supports four protocols. | ||
48 | |||
49 | The Write Byte, Read Byte, Send Byte, and Receive Byte protocols are the | ||
50 | only valid SMBus protocols for the device. | ||
51 | |||
52 | - BC-LinkTM Interface | ||
53 | The BC-Link is a proprietary bus that allows communication | ||
54 | between a Master device and a Companion device. The Master | ||
55 | device uses this serial bus to read and write registers | ||
56 | located on the Companion device. The bus comprises three signals, | ||
57 | BC_CLK, BC_DAT and BC_INT#. The Master device always provides the | ||
58 | clock, BC_CLK, and the Companion device is the source for an | ||
59 | independent asynchronous interrupt signal, BC_INT#. The ECE1099 | ||
60 | supports BC-Link speeds up to 24MHz. | ||
diff --git a/Documentation/driver-api/switchtec.rst b/Documentation/driver-api/switchtec.rst new file mode 100644 index 000000000000..7611fdc53e19 --- /dev/null +++ b/Documentation/driver-api/switchtec.rst | |||
@@ -0,0 +1,102 @@ | |||
1 | ======================== | ||
2 | Linux Switchtec Support | ||
3 | ======================== | ||
4 | |||
5 | Microsemi's "Switchtec" line of PCI switch devices is already | ||
6 | supported by the kernel with standard PCI switch drivers. However, the | ||
7 | Switchtec device advertises a special management endpoint which | ||
8 | enables some additional functionality. This includes: | ||
9 | |||
10 | * Packet and Byte Counters | ||
11 | * Firmware Upgrades | ||
12 | * Event and Error logs | ||
13 | * Querying port link status | ||
14 | * Custom user firmware commands | ||
15 | |||
16 | The switchtec kernel module implements this functionality. | ||
17 | |||
18 | |||
19 | Interface | ||
20 | ========= | ||
21 | |||
22 | The primary means of communicating with the Switchtec management firmware is | ||
23 | through the Memory-mapped Remote Procedure Call (MRPC) interface. | ||
24 | Commands are submitted to the interface with a 4-byte command | ||
25 | identifier and up to 1KB of command specific data. The firmware will | ||
26 | respond with a 4-byte return code and up to 1KB of command-specific | ||
27 | data. The interface only processes a single command at a time. | ||
28 | |||
29 | |||
30 | Userspace Interface | ||
31 | =================== | ||
32 | |||
33 | The MRPC interface will be exposed to userspace through a simple char | ||
34 | device: /dev/switchtec#, one for each management endpoint in the system. | ||
35 | |||
36 | The char device has the following semantics: | ||
37 | |||
38 | * A write must consist of at least 4 bytes and no more than 1028 bytes. | ||
39 | The first 4 bytes will be interpreted as the Command ID and the | ||
40 | remainder will be used as the input data. A write will send the | ||
41 | command to the firmware to begin processing. | ||
42 | |||
43 | * Each write must be followed by exactly one read. Any double write will | ||
44 | produce an error and any read that doesn't follow a write will | ||
45 | produce an error. | ||
46 | |||
47 | * A read will block until the firmware completes the command and return | ||
48 | the 4-byte Command Return Value plus up to 1024 bytes of output | ||
49 | data. (The length will be specified by the size parameter of the read | ||
50 | call -- reading less than 4 bytes will produce an error.) | ||
51 | |||
52 | * The poll call will also be supported for userspace applications that | ||
53 | need to do other things while waiting for the command to complete. | ||
54 | |||
55 | The following IOCTLs are also supported by the device: | ||
56 | |||
57 | * SWITCHTEC_IOCTL_FLASH_INFO - Retrieve firmware length and number | ||
58 | of partitions in the device. | ||
59 | |||
60 | * SWITCHTEC_IOCTL_FLASH_PART_INFO - Retrieve address and lengeth for | ||
61 | any specified partition in flash. | ||
62 | |||
63 | * SWITCHTEC_IOCTL_EVENT_SUMMARY - Read a structure of bitmaps | ||
64 | indicating all uncleared events. | ||
65 | |||
66 | * SWITCHTEC_IOCTL_EVENT_CTL - Get the current count, clear and set flags | ||
67 | for any event. This ioctl takes in a switchtec_ioctl_event_ctl struct | ||
68 | with the event_id, index and flags set (index being the partition or PFF | ||
69 | number for non-global events). It returns whether the event has | ||
70 | occurred, the number of times and any event specific data. The flags | ||
71 | can be used to clear the count or enable and disable actions to | ||
72 | happen when the event occurs. | ||
73 | By using the SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL flag, | ||
74 | you can set an event to trigger a poll command to return with | ||
75 | POLLPRI. In this way, userspace can wait for events to occur. | ||
76 | |||
77 | * SWITCHTEC_IOCTL_PFF_TO_PORT and SWITCHTEC_IOCTL_PORT_TO_PFF convert | ||
78 | between PCI Function Framework number (used by the event system) | ||
79 | and Switchtec Logic Port ID and Partition number (which is more | ||
80 | user friendly). | ||
81 | |||
82 | |||
83 | Non-Transparent Bridge (NTB) Driver | ||
84 | =================================== | ||
85 | |||
86 | An NTB hardware driver is provided for the Switchtec hardware in | ||
87 | ntb_hw_switchtec. Currently, it only supports switches configured with | ||
88 | exactly 2 NT partitions and zero or more non-NT partitions. It also requires | ||
89 | the following configuration settings: | ||
90 | |||
91 | * Both NT partitions must be able to access each other's GAS spaces. | ||
92 | Thus, the bits in the GAS Access Vector under Management Settings | ||
93 | must be set to support this. | ||
94 | * Kernel configuration MUST include support for NTB (CONFIG_NTB needs | ||
95 | to be set) | ||
96 | |||
97 | NT EP BAR 2 will be dynamically configured as a Direct Window, and | ||
98 | the configuration file does not need to configure it explicitly. | ||
99 | |||
100 | Please refer to Documentation/driver-api/ntb.rst in Linux source tree for an overall | ||
101 | understanding of the Linux NTB stack. ntb_hw_switchtec works as an NTB | ||
102 | Hardware Driver in this stack. | ||
diff --git a/Documentation/driver-api/sync_file.rst b/Documentation/driver-api/sync_file.rst new file mode 100644 index 000000000000..496fb2c3b3e6 --- /dev/null +++ b/Documentation/driver-api/sync_file.rst | |||
@@ -0,0 +1,86 @@ | |||
1 | =================== | ||
2 | Sync File API Guide | ||
3 | =================== | ||
4 | |||
5 | :Author: Gustavo Padovan <gustavo at padovan dot org> | ||
6 | |||
7 | This document serves as a guide for device drivers writers on what the | ||
8 | sync_file API is, and how drivers can support it. Sync file is the carrier of | ||
9 | the fences(struct dma_fence) that are needed to synchronize between drivers or | ||
10 | across process boundaries. | ||
11 | |||
12 | The sync_file API is meant to be used to send and receive fence information | ||
13 | to/from userspace. It enables userspace to do explicit fencing, where instead | ||
14 | of attaching a fence to the buffer a producer driver (such as a GPU or V4L | ||
15 | driver) sends the fence related to the buffer to userspace via a sync_file. | ||
16 | |||
17 | The sync_file then can be sent to the consumer (DRM driver for example), that | ||
18 | will not use the buffer for anything before the fence(s) signals, i.e., the | ||
19 | driver that issued the fence is not using/processing the buffer anymore, so it | ||
20 | signals that the buffer is ready to use. And vice-versa for the consumer -> | ||
21 | producer part of the cycle. | ||
22 | |||
23 | Sync files allows userspace awareness on buffer sharing synchronization between | ||
24 | drivers. | ||
25 | |||
26 | Sync file was originally added in the Android kernel but current Linux Desktop | ||
27 | can benefit a lot from it. | ||
28 | |||
29 | in-fences and out-fences | ||
30 | ------------------------ | ||
31 | |||
32 | Sync files can go either to or from userspace. When a sync_file is sent from | ||
33 | the driver to userspace we call the fences it contains 'out-fences'. They are | ||
34 | related to a buffer that the driver is processing or is going to process, so | ||
35 | the driver creates an out-fence to be able to notify, through | ||
36 | dma_fence_signal(), when it has finished using (or processing) that buffer. | ||
37 | Out-fences are fences that the driver creates. | ||
38 | |||
39 | On the other hand if the driver receives fence(s) through a sync_file from | ||
40 | userspace we call these fence(s) 'in-fences'. Receiving in-fences means that | ||
41 | we need to wait for the fence(s) to signal before using any buffer related to | ||
42 | the in-fences. | ||
43 | |||
44 | Creating Sync Files | ||
45 | ------------------- | ||
46 | |||
47 | When a driver needs to send an out-fence userspace it creates a sync_file. | ||
48 | |||
49 | Interface:: | ||
50 | |||
51 | struct sync_file *sync_file_create(struct dma_fence *fence); | ||
52 | |||
53 | The caller pass the out-fence and gets back the sync_file. That is just the | ||
54 | first step, next it needs to install an fd on sync_file->file. So it gets an | ||
55 | fd:: | ||
56 | |||
57 | fd = get_unused_fd_flags(O_CLOEXEC); | ||
58 | |||
59 | and installs it on sync_file->file:: | ||
60 | |||
61 | fd_install(fd, sync_file->file); | ||
62 | |||
63 | The sync_file fd now can be sent to userspace. | ||
64 | |||
65 | If the creation process fail, or the sync_file needs to be released by any | ||
66 | other reason fput(sync_file->file) should be used. | ||
67 | |||
68 | Receiving Sync Files from Userspace | ||
69 | ----------------------------------- | ||
70 | |||
71 | When userspace needs to send an in-fence to the driver it passes file descriptor | ||
72 | of the Sync File to the kernel. The kernel can then retrieve the fences | ||
73 | from it. | ||
74 | |||
75 | Interface:: | ||
76 | |||
77 | struct dma_fence *sync_file_get_fence(int fd); | ||
78 | |||
79 | |||
80 | The returned reference is owned by the caller and must be disposed of | ||
81 | afterwards using dma_fence_put(). In case of error, a NULL is returned instead. | ||
82 | |||
83 | References: | ||
84 | |||
85 | 1. struct sync_file in include/linux/sync_file.h | ||
86 | 2. All interfaces mentioned above defined in include/linux/sync_file.h | ||
diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst new file mode 100644 index 000000000000..25eb7d5b834b --- /dev/null +++ b/Documentation/driver-api/vfio-mediated-device.rst | |||
@@ -0,0 +1,414 @@ | |||
1 | .. include:: <isonum.txt> | ||
2 | |||
3 | ===================== | ||
4 | VFIO Mediated devices | ||
5 | ===================== | ||
6 | |||
7 | :Copyright: |copy| 2016, NVIDIA CORPORATION. All rights reserved. | ||
8 | :Author: Neo Jia <cjia@nvidia.com> | ||
9 | :Author: Kirti Wankhede <kwankhede@nvidia.com> | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License version 2 as | ||
13 | published by the Free Software Foundation. | ||
14 | |||
15 | |||
16 | Virtual Function I/O (VFIO) Mediated devices[1] | ||
17 | =============================================== | ||
18 | |||
19 | The number of use cases for virtualizing DMA devices that do not have built-in | ||
20 | SR_IOV capability is increasing. Previously, to virtualize such devices, | ||
21 | developers had to create their own management interfaces and APIs, and then | ||
22 | integrate them with user space software. To simplify integration with user space | ||
23 | software, we have identified common requirements and a unified management | ||
24 | interface for such devices. | ||
25 | |||
26 | The VFIO driver framework provides unified APIs for direct device access. It is | ||
27 | an IOMMU/device-agnostic framework for exposing direct device access to user | ||
28 | space in a secure, IOMMU-protected environment. This framework is used for | ||
29 | multiple devices, such as GPUs, network adapters, and compute accelerators. With | ||
30 | direct device access, virtual machines or user space applications have direct | ||
31 | access to the physical device. This framework is reused for mediated devices. | ||
32 | |||
33 | The mediated core driver provides a common interface for mediated device | ||
34 | management that can be used by drivers of different devices. This module | ||
35 | provides a generic interface to perform these operations: | ||
36 | |||
37 | * Create and destroy a mediated device | ||
38 | * Add a mediated device to and remove it from a mediated bus driver | ||
39 | * Add a mediated device to and remove it from an IOMMU group | ||
40 | |||
41 | The mediated core driver also provides an interface to register a bus driver. | ||
42 | For example, the mediated VFIO mdev driver is designed for mediated devices and | ||
43 | supports VFIO APIs. The mediated bus driver adds a mediated device to and | ||
44 | removes it from a VFIO group. | ||
45 | |||
46 | The following high-level block diagram shows the main components and interfaces | ||
47 | in the VFIO mediated driver framework. The diagram shows NVIDIA, Intel, and IBM | ||
48 | devices as examples, as these devices are the first devices to use this module:: | ||
49 | |||
50 | +---------------+ | ||
51 | | | | ||
52 | | +-----------+ | mdev_register_driver() +--------------+ | ||
53 | | | | +<------------------------+ | | ||
54 | | | mdev | | | | | ||
55 | | | bus | +------------------------>+ vfio_mdev.ko |<-> VFIO user | ||
56 | | | driver | | probe()/remove() | | APIs | ||
57 | | | | | +--------------+ | ||
58 | | +-----------+ | | ||
59 | | | | ||
60 | | MDEV CORE | | ||
61 | | MODULE | | ||
62 | | mdev.ko | | ||
63 | | +-----------+ | mdev_register_device() +--------------+ | ||
64 | | | | +<------------------------+ | | ||
65 | | | | | | nvidia.ko |<-> physical | ||
66 | | | | +------------------------>+ | device | ||
67 | | | | | callbacks +--------------+ | ||
68 | | | Physical | | | ||
69 | | | device | | mdev_register_device() +--------------+ | ||
70 | | | interface | |<------------------------+ | | ||
71 | | | | | | i915.ko |<-> physical | ||
72 | | | | +------------------------>+ | device | ||
73 | | | | | callbacks +--------------+ | ||
74 | | | | | | ||
75 | | | | | mdev_register_device() +--------------+ | ||
76 | | | | +<------------------------+ | | ||
77 | | | | | | ccw_device.ko|<-> physical | ||
78 | | | | +------------------------>+ | device | ||
79 | | | | | callbacks +--------------+ | ||
80 | | +-----------+ | | ||
81 | +---------------+ | ||
82 | |||
83 | |||
84 | Registration Interfaces | ||
85 | ======================= | ||
86 | |||
87 | The mediated core driver provides the following types of registration | ||
88 | interfaces: | ||
89 | |||
90 | * Registration interface for a mediated bus driver | ||
91 | * Physical device driver interface | ||
92 | |||
93 | Registration Interface for a Mediated Bus Driver | ||
94 | ------------------------------------------------ | ||
95 | |||
96 | The registration interface for a mediated bus driver provides the following | ||
97 | structure to represent a mediated device's driver:: | ||
98 | |||
99 | /* | ||
100 | * struct mdev_driver [2] - Mediated device's driver | ||
101 | * @name: driver name | ||
102 | * @probe: called when new device created | ||
103 | * @remove: called when device removed | ||
104 | * @driver: device driver structure | ||
105 | */ | ||
106 | struct mdev_driver { | ||
107 | const char *name; | ||
108 | int (*probe) (struct device *dev); | ||
109 | void (*remove) (struct device *dev); | ||
110 | struct device_driver driver; | ||
111 | }; | ||
112 | |||
113 | A mediated bus driver for mdev should use this structure in the function calls | ||
114 | to register and unregister itself with the core driver: | ||
115 | |||
116 | * Register:: | ||
117 | |||
118 | extern int mdev_register_driver(struct mdev_driver *drv, | ||
119 | struct module *owner); | ||
120 | |||
121 | * Unregister:: | ||
122 | |||
123 | extern void mdev_unregister_driver(struct mdev_driver *drv); | ||
124 | |||
125 | The mediated bus driver is responsible for adding mediated devices to the VFIO | ||
126 | group when devices are bound to the driver and removing mediated devices from | ||
127 | the VFIO when devices are unbound from the driver. | ||
128 | |||
129 | |||
130 | Physical Device Driver Interface | ||
131 | -------------------------------- | ||
132 | |||
133 | The physical device driver interface provides the mdev_parent_ops[3] structure | ||
134 | to define the APIs to manage work in the mediated core driver that is related | ||
135 | to the physical device. | ||
136 | |||
137 | The structures in the mdev_parent_ops structure are as follows: | ||
138 | |||
139 | * dev_attr_groups: attributes of the parent device | ||
140 | * mdev_attr_groups: attributes of the mediated device | ||
141 | * supported_config: attributes to define supported configurations | ||
142 | |||
143 | The functions in the mdev_parent_ops structure are as follows: | ||
144 | |||
145 | * create: allocate basic resources in a driver for a mediated device | ||
146 | * remove: free resources in a driver when a mediated device is destroyed | ||
147 | |||
148 | (Note that mdev-core provides no implicit serialization of create/remove | ||
149 | callbacks per mdev parent device, per mdev type, or any other categorization. | ||
150 | Vendor drivers are expected to be fully asynchronous in this respect or | ||
151 | provide their own internal resource protection.) | ||
152 | |||
153 | The callbacks in the mdev_parent_ops structure are as follows: | ||
154 | |||
155 | * open: open callback of mediated device | ||
156 | * close: close callback of mediated device | ||
157 | * ioctl: ioctl callback of mediated device | ||
158 | * read : read emulation callback | ||
159 | * write: write emulation callback | ||
160 | * mmap: mmap emulation callback | ||
161 | |||
162 | A driver should use the mdev_parent_ops structure in the function call to | ||
163 | register itself with the mdev core driver:: | ||
164 | |||
165 | extern int mdev_register_device(struct device *dev, | ||
166 | const struct mdev_parent_ops *ops); | ||
167 | |||
168 | However, the mdev_parent_ops structure is not required in the function call | ||
169 | that a driver should use to unregister itself with the mdev core driver:: | ||
170 | |||
171 | extern void mdev_unregister_device(struct device *dev); | ||
172 | |||
173 | |||
174 | Mediated Device Management Interface Through sysfs | ||
175 | ================================================== | ||
176 | |||
177 | The management interface through sysfs enables user space software, such as | ||
178 | libvirt, to query and configure mediated devices in a hardware-agnostic fashion. | ||
179 | This management interface provides flexibility to the underlying physical | ||
180 | device's driver to support features such as: | ||
181 | |||
182 | * Mediated device hot plug | ||
183 | * Multiple mediated devices in a single virtual machine | ||
184 | * Multiple mediated devices from different physical devices | ||
185 | |||
186 | Links in the mdev_bus Class Directory | ||
187 | ------------------------------------- | ||
188 | The /sys/class/mdev_bus/ directory contains links to devices that are registered | ||
189 | with the mdev core driver. | ||
190 | |||
191 | Directories and files under the sysfs for Each Physical Device | ||
192 | -------------------------------------------------------------- | ||
193 | |||
194 | :: | ||
195 | |||
196 | |- [parent physical device] | ||
197 | |--- Vendor-specific-attributes [optional] | ||
198 | |--- [mdev_supported_types] | ||
199 | | |--- [<type-id>] | ||
200 | | | |--- create | ||
201 | | | |--- name | ||
202 | | | |--- available_instances | ||
203 | | | |--- device_api | ||
204 | | | |--- description | ||
205 | | | |--- [devices] | ||
206 | | |--- [<type-id>] | ||
207 | | | |--- create | ||
208 | | | |--- name | ||
209 | | | |--- available_instances | ||
210 | | | |--- device_api | ||
211 | | | |--- description | ||
212 | | | |--- [devices] | ||
213 | | |--- [<type-id>] | ||
214 | | |--- create | ||
215 | | |--- name | ||
216 | | |--- available_instances | ||
217 | | |--- device_api | ||
218 | | |--- description | ||
219 | | |--- [devices] | ||
220 | |||
221 | * [mdev_supported_types] | ||
222 | |||
223 | The list of currently supported mediated device types and their details. | ||
224 | |||
225 | [<type-id>], device_api, and available_instances are mandatory attributes | ||
226 | that should be provided by vendor driver. | ||
227 | |||
228 | * [<type-id>] | ||
229 | |||
230 | The [<type-id>] name is created by adding the device driver string as a prefix | ||
231 | to the string provided by the vendor driver. This format of this name is as | ||
232 | follows:: | ||
233 | |||
234 | sprintf(buf, "%s-%s", dev_driver_string(parent->dev), group->name); | ||
235 | |||
236 | (or using mdev_parent_dev(mdev) to arrive at the parent device outside | ||
237 | of the core mdev code) | ||
238 | |||
239 | * device_api | ||
240 | |||
241 | This attribute should show which device API is being created, for example, | ||
242 | "vfio-pci" for a PCI device. | ||
243 | |||
244 | * available_instances | ||
245 | |||
246 | This attribute should show the number of devices of type <type-id> that can be | ||
247 | created. | ||
248 | |||
249 | * [device] | ||
250 | |||
251 | This directory contains links to the devices of type <type-id> that have been | ||
252 | created. | ||
253 | |||
254 | * name | ||
255 | |||
256 | This attribute should show human readable name. This is optional attribute. | ||
257 | |||
258 | * description | ||
259 | |||
260 | This attribute should show brief features/description of the type. This is | ||
261 | optional attribute. | ||
262 | |||
263 | Directories and Files Under the sysfs for Each mdev Device | ||
264 | ---------------------------------------------------------- | ||
265 | |||
266 | :: | ||
267 | |||
268 | |- [parent phy device] | ||
269 | |--- [$MDEV_UUID] | ||
270 | |--- remove | ||
271 | |--- mdev_type {link to its type} | ||
272 | |--- vendor-specific-attributes [optional] | ||
273 | |||
274 | * remove (write only) | ||
275 | |||
276 | Writing '1' to the 'remove' file destroys the mdev device. The vendor driver can | ||
277 | fail the remove() callback if that device is active and the vendor driver | ||
278 | doesn't support hot unplug. | ||
279 | |||
280 | Example:: | ||
281 | |||
282 | # echo 1 > /sys/bus/mdev/devices/$mdev_UUID/remove | ||
283 | |||
284 | Mediated device Hot plug | ||
285 | ------------------------ | ||
286 | |||
287 | Mediated devices can be created and assigned at runtime. The procedure to hot | ||
288 | plug a mediated device is the same as the procedure to hot plug a PCI device. | ||
289 | |||
290 | Translation APIs for Mediated Devices | ||
291 | ===================================== | ||
292 | |||
293 | The following APIs are provided for translating user pfn to host pfn in a VFIO | ||
294 | driver:: | ||
295 | |||
296 | extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, | ||
297 | int npage, int prot, unsigned long *phys_pfn); | ||
298 | |||
299 | extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, | ||
300 | int npage); | ||
301 | |||
302 | These functions call back into the back-end IOMMU module by using the pin_pages | ||
303 | and unpin_pages callbacks of the struct vfio_iommu_driver_ops[4]. Currently | ||
304 | these callbacks are supported in the TYPE1 IOMMU module. To enable them for | ||
305 | other IOMMU backend modules, such as PPC64 sPAPR module, they need to provide | ||
306 | these two callback functions. | ||
307 | |||
308 | Using the Sample Code | ||
309 | ===================== | ||
310 | |||
311 | mtty.c in samples/vfio-mdev/ directory is a sample driver program to | ||
312 | demonstrate how to use the mediated device framework. | ||
313 | |||
314 | The sample driver creates an mdev device that simulates a serial port over a PCI | ||
315 | card. | ||
316 | |||
317 | 1. Build and load the mtty.ko module. | ||
318 | |||
319 | This step creates a dummy device, /sys/devices/virtual/mtty/mtty/ | ||
320 | |||
321 | Files in this device directory in sysfs are similar to the following:: | ||
322 | |||
323 | # tree /sys/devices/virtual/mtty/mtty/ | ||
324 | /sys/devices/virtual/mtty/mtty/ | ||
325 | |-- mdev_supported_types | ||
326 | | |-- mtty-1 | ||
327 | | | |-- available_instances | ||
328 | | | |-- create | ||
329 | | | |-- device_api | ||
330 | | | |-- devices | ||
331 | | | `-- name | ||
332 | | `-- mtty-2 | ||
333 | | |-- available_instances | ||
334 | | |-- create | ||
335 | | |-- device_api | ||
336 | | |-- devices | ||
337 | | `-- name | ||
338 | |-- mtty_dev | ||
339 | | `-- sample_mtty_dev | ||
340 | |-- power | ||
341 | | |-- autosuspend_delay_ms | ||
342 | | |-- control | ||
343 | | |-- runtime_active_time | ||
344 | | |-- runtime_status | ||
345 | | `-- runtime_suspended_time | ||
346 | |-- subsystem -> ../../../../class/mtty | ||
347 | `-- uevent | ||
348 | |||
349 | 2. Create a mediated device by using the dummy device that you created in the | ||
350 | previous step:: | ||
351 | |||
352 | # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > \ | ||
353 | /sys/devices/virtual/mtty/mtty/mdev_supported_types/mtty-2/create | ||
354 | |||
355 | 3. Add parameters to qemu-kvm:: | ||
356 | |||
357 | -device vfio-pci,\ | ||
358 | sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 | ||
359 | |||
360 | 4. Boot the VM. | ||
361 | |||
362 | In the Linux guest VM, with no hardware on the host, the device appears | ||
363 | as follows:: | ||
364 | |||
365 | # lspci -s 00:05.0 -xxvv | ||
366 | 00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550]) | ||
367 | Subsystem: Device 4348:3253 | ||
368 | Physical Slot: 5 | ||
369 | Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- | ||
370 | Stepping- SERR- FastB2B- DisINTx- | ||
371 | Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- | ||
372 | <TAbort- <MAbort- >SERR- <PERR- INTx- | ||
373 | Interrupt: pin A routed to IRQ 10 | ||
374 | Region 0: I/O ports at c150 [size=8] | ||
375 | Region 1: I/O ports at c158 [size=8] | ||
376 | Kernel driver in use: serial | ||
377 | 00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00 | ||
378 | 10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00 | ||
379 | 20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32 | ||
380 | 30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00 | ||
381 | |||
382 | In the Linux guest VM, dmesg output for the device is as follows: | ||
383 | |||
384 | serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10 | ||
385 | 0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A | ||
386 | 0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A | ||
387 | |||
388 | |||
389 | 5. In the Linux guest VM, check the serial ports:: | ||
390 | |||
391 | # setserial -g /dev/ttyS* | ||
392 | /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4 | ||
393 | /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10 | ||
394 | /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10 | ||
395 | |||
396 | 6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or | ||
397 | /dev/ttyS2 with hardware flow control disabled. | ||
398 | |||
399 | 7. Type data on the minicom terminal or send data to the terminal emulation | ||
400 | program and read the data. | ||
401 | |||
402 | Data is loop backed from hosts mtty driver. | ||
403 | |||
404 | 8. Destroy the mediated device that you created:: | ||
405 | |||
406 | # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove | ||
407 | |||
408 | References | ||
409 | ========== | ||
410 | |||
411 | 1. See Documentation/driver-api/vfio.rst for more information on VFIO. | ||
412 | 2. struct mdev_driver in include/linux/mdev.h | ||
413 | 3. struct mdev_parent_ops in include/linux/mdev.h | ||
414 | 4. struct vfio_iommu_driver_ops in include/linux/vfio.h | ||
diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst new file mode 100644 index 000000000000..f1a4d3c3ba0b --- /dev/null +++ b/Documentation/driver-api/vfio.rst | |||
@@ -0,0 +1,520 @@ | |||
1 | ================================== | ||
2 | VFIO - "Virtual Function I/O" [1]_ | ||
3 | ================================== | ||
4 | |||
5 | Many modern system now provide DMA and interrupt remapping facilities | ||
6 | to help ensure I/O devices behave within the boundaries they've been | ||
7 | allotted. This includes x86 hardware with AMD-Vi and Intel VT-d, | ||
8 | POWER systems with Partitionable Endpoints (PEs) and embedded PowerPC | ||
9 | systems such as Freescale PAMU. The VFIO driver is an IOMMU/device | ||
10 | agnostic framework for exposing direct device access to userspace, in | ||
11 | a secure, IOMMU protected environment. In other words, this allows | ||
12 | safe [2]_, non-privileged, userspace drivers. | ||
13 | |||
14 | Why do we want that? Virtual machines often make use of direct device | ||
15 | access ("device assignment") when configured for the highest possible | ||
16 | I/O performance. From a device and host perspective, this simply | ||
17 | turns the VM into a userspace driver, with the benefits of | ||
18 | significantly reduced latency, higher bandwidth, and direct use of | ||
19 | bare-metal device drivers [3]_. | ||
20 | |||
21 | Some applications, particularly in the high performance computing | ||
22 | field, also benefit from low-overhead, direct device access from | ||
23 | userspace. Examples include network adapters (often non-TCP/IP based) | ||
24 | and compute accelerators. Prior to VFIO, these drivers had to either | ||
25 | go through the full development cycle to become proper upstream | ||
26 | driver, be maintained out of tree, or make use of the UIO framework, | ||
27 | which has no notion of IOMMU protection, limited interrupt support, | ||
28 | and requires root privileges to access things like PCI configuration | ||
29 | space. | ||
30 | |||
31 | The VFIO driver framework intends to unify these, replacing both the | ||
32 | KVM PCI specific device assignment code as well as provide a more | ||
33 | secure, more featureful userspace driver environment than UIO. | ||
34 | |||
35 | Groups, Devices, and IOMMUs | ||
36 | --------------------------- | ||
37 | |||
38 | Devices are the main target of any I/O driver. Devices typically | ||
39 | create a programming interface made up of I/O access, interrupts, | ||
40 | and DMA. Without going into the details of each of these, DMA is | ||
41 | by far the most critical aspect for maintaining a secure environment | ||
42 | as allowing a device read-write access to system memory imposes the | ||
43 | greatest risk to the overall system integrity. | ||
44 | |||
45 | To help mitigate this risk, many modern IOMMUs now incorporate | ||
46 | isolation properties into what was, in many cases, an interface only | ||
47 | meant for translation (ie. solving the addressing problems of devices | ||
48 | with limited address spaces). With this, devices can now be isolated | ||
49 | from each other and from arbitrary memory access, thus allowing | ||
50 | things like secure direct assignment of devices into virtual machines. | ||
51 | |||
52 | This isolation is not always at the granularity of a single device | ||
53 | though. Even when an IOMMU is capable of this, properties of devices, | ||
54 | interconnects, and IOMMU topologies can each reduce this isolation. | ||
55 | For instance, an individual device may be part of a larger multi- | ||
56 | function enclosure. While the IOMMU may be able to distinguish | ||
57 | between devices within the enclosure, the enclosure may not require | ||
58 | transactions between devices to reach the IOMMU. Examples of this | ||
59 | could be anything from a multi-function PCI device with backdoors | ||
60 | between functions to a non-PCI-ACS (Access Control Services) capable | ||
61 | bridge allowing redirection without reaching the IOMMU. Topology | ||
62 | can also play a factor in terms of hiding devices. A PCIe-to-PCI | ||
63 | bridge masks the devices behind it, making transaction appear as if | ||
64 | from the bridge itself. Obviously IOMMU design plays a major factor | ||
65 | as well. | ||
66 | |||
67 | Therefore, while for the most part an IOMMU may have device level | ||
68 | granularity, any system is susceptible to reduced granularity. The | ||
69 | IOMMU API therefore supports a notion of IOMMU groups. A group is | ||
70 | a set of devices which is isolatable from all other devices in the | ||
71 | system. Groups are therefore the unit of ownership used by VFIO. | ||
72 | |||
73 | While the group is the minimum granularity that must be used to | ||
74 | ensure secure user access, it's not necessarily the preferred | ||
75 | granularity. In IOMMUs which make use of page tables, it may be | ||
76 | possible to share a set of page tables between different groups, | ||
77 | reducing the overhead both to the platform (reduced TLB thrashing, | ||
78 | reduced duplicate page tables), and to the user (programming only | ||
79 | a single set of translations). For this reason, VFIO makes use of | ||
80 | a container class, which may hold one or more groups. A container | ||
81 | is created by simply opening the /dev/vfio/vfio character device. | ||
82 | |||
83 | On its own, the container provides little functionality, with all | ||
84 | but a couple version and extension query interfaces locked away. | ||
85 | The user needs to add a group into the container for the next level | ||
86 | of functionality. To do this, the user first needs to identify the | ||
87 | group associated with the desired device. This can be done using | ||
88 | the sysfs links described in the example below. By unbinding the | ||
89 | device from the host driver and binding it to a VFIO driver, a new | ||
90 | VFIO group will appear for the group as /dev/vfio/$GROUP, where | ||
91 | $GROUP is the IOMMU group number of which the device is a member. | ||
92 | If the IOMMU group contains multiple devices, each will need to | ||
93 | be bound to a VFIO driver before operations on the VFIO group | ||
94 | are allowed (it's also sufficient to only unbind the device from | ||
95 | host drivers if a VFIO driver is unavailable; this will make the | ||
96 | group available, but not that particular device). TBD - interface | ||
97 | for disabling driver probing/locking a device. | ||
98 | |||
99 | Once the group is ready, it may be added to the container by opening | ||
100 | the VFIO group character device (/dev/vfio/$GROUP) and using the | ||
101 | VFIO_GROUP_SET_CONTAINER ioctl, passing the file descriptor of the | ||
102 | previously opened container file. If desired and if the IOMMU driver | ||
103 | supports sharing the IOMMU context between groups, multiple groups may | ||
104 | be set to the same container. If a group fails to set to a container | ||
105 | with existing groups, a new empty container will need to be used | ||
106 | instead. | ||
107 | |||
108 | With a group (or groups) attached to a container, the remaining | ||
109 | ioctls become available, enabling access to the VFIO IOMMU interfaces. | ||
110 | Additionally, it now becomes possible to get file descriptors for each | ||
111 | device within a group using an ioctl on the VFIO group file descriptor. | ||
112 | |||
113 | The VFIO device API includes ioctls for describing the device, the I/O | ||
114 | regions and their read/write/mmap offsets on the device descriptor, as | ||
115 | well as mechanisms for describing and registering interrupt | ||
116 | notifications. | ||
117 | |||
118 | VFIO Usage Example | ||
119 | ------------------ | ||
120 | |||
121 | Assume user wants to access PCI device 0000:06:0d.0:: | ||
122 | |||
123 | $ readlink /sys/bus/pci/devices/0000:06:0d.0/iommu_group | ||
124 | ../../../../kernel/iommu_groups/26 | ||
125 | |||
126 | This device is therefore in IOMMU group 26. This device is on the | ||
127 | pci bus, therefore the user will make use of vfio-pci to manage the | ||
128 | group:: | ||
129 | |||
130 | # modprobe vfio-pci | ||
131 | |||
132 | Binding this device to the vfio-pci driver creates the VFIO group | ||
133 | character devices for this group:: | ||
134 | |||
135 | $ lspci -n -s 0000:06:0d.0 | ||
136 | 06:0d.0 0401: 1102:0002 (rev 08) | ||
137 | # echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind | ||
138 | # echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id | ||
139 | |||
140 | Now we need to look at what other devices are in the group to free | ||
141 | it for use by VFIO:: | ||
142 | |||
143 | $ ls -l /sys/bus/pci/devices/0000:06:0d.0/iommu_group/devices | ||
144 | total 0 | ||
145 | lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:00:1e.0 -> | ||
146 | ../../../../devices/pci0000:00/0000:00:1e.0 | ||
147 | lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.0 -> | ||
148 | ../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.0 | ||
149 | lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.1 -> | ||
150 | ../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.1 | ||
151 | |||
152 | This device is behind a PCIe-to-PCI bridge [4]_, therefore we also | ||
153 | need to add device 0000:06:0d.1 to the group following the same | ||
154 | procedure as above. Device 0000:00:1e.0 is a bridge that does | ||
155 | not currently have a host driver, therefore it's not required to | ||
156 | bind this device to the vfio-pci driver (vfio-pci does not currently | ||
157 | support PCI bridges). | ||
158 | |||
159 | The final step is to provide the user with access to the group if | ||
160 | unprivileged operation is desired (note that /dev/vfio/vfio provides | ||
161 | no capabilities on its own and is therefore expected to be set to | ||
162 | mode 0666 by the system):: | ||
163 | |||
164 | # chown user:user /dev/vfio/26 | ||
165 | |||
166 | The user now has full access to all the devices and the iommu for this | ||
167 | group and can access them as follows:: | ||
168 | |||
169 | int container, group, device, i; | ||
170 | struct vfio_group_status group_status = | ||
171 | { .argsz = sizeof(group_status) }; | ||
172 | struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) }; | ||
173 | struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map) }; | ||
174 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; | ||
175 | |||
176 | /* Create a new container */ | ||
177 | container = open("/dev/vfio/vfio", O_RDWR); | ||
178 | |||
179 | if (ioctl(container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) | ||
180 | /* Unknown API version */ | ||
181 | |||
182 | if (!ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) | ||
183 | /* Doesn't support the IOMMU driver we want. */ | ||
184 | |||
185 | /* Open the group */ | ||
186 | group = open("/dev/vfio/26", O_RDWR); | ||
187 | |||
188 | /* Test the group is viable and available */ | ||
189 | ioctl(group, VFIO_GROUP_GET_STATUS, &group_status); | ||
190 | |||
191 | if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) | ||
192 | /* Group is not viable (ie, not all devices bound for vfio) */ | ||
193 | |||
194 | /* Add the group to the container */ | ||
195 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | ||
196 | |||
197 | /* Enable the IOMMU model we want */ | ||
198 | ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); | ||
199 | |||
200 | /* Get addition IOMMU info */ | ||
201 | ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info); | ||
202 | |||
203 | /* Allocate some space and setup a DMA mapping */ | ||
204 | dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, | ||
205 | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); | ||
206 | dma_map.size = 1024 * 1024; | ||
207 | dma_map.iova = 0; /* 1MB starting at 0x0 from device view */ | ||
208 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; | ||
209 | |||
210 | ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); | ||
211 | |||
212 | /* Get a file descriptor for the device */ | ||
213 | device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0"); | ||
214 | |||
215 | /* Test and setup the device */ | ||
216 | ioctl(device, VFIO_DEVICE_GET_INFO, &device_info); | ||
217 | |||
218 | for (i = 0; i < device_info.num_regions; i++) { | ||
219 | struct vfio_region_info reg = { .argsz = sizeof(reg) }; | ||
220 | |||
221 | reg.index = i; | ||
222 | |||
223 | ioctl(device, VFIO_DEVICE_GET_REGION_INFO, ®); | ||
224 | |||
225 | /* Setup mappings... read/write offsets, mmaps | ||
226 | * For PCI devices, config space is a region */ | ||
227 | } | ||
228 | |||
229 | for (i = 0; i < device_info.num_irqs; i++) { | ||
230 | struct vfio_irq_info irq = { .argsz = sizeof(irq) }; | ||
231 | |||
232 | irq.index = i; | ||
233 | |||
234 | ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq); | ||
235 | |||
236 | /* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */ | ||
237 | } | ||
238 | |||
239 | /* Gratuitous device reset and go... */ | ||
240 | ioctl(device, VFIO_DEVICE_RESET); | ||
241 | |||
242 | VFIO User API | ||
243 | ------------------------------------------------------------------------------- | ||
244 | |||
245 | Please see include/linux/vfio.h for complete API documentation. | ||
246 | |||
247 | VFIO bus driver API | ||
248 | ------------------------------------------------------------------------------- | ||
249 | |||
250 | VFIO bus drivers, such as vfio-pci make use of only a few interfaces | ||
251 | into VFIO core. When devices are bound and unbound to the driver, | ||
252 | the driver should call vfio_add_group_dev() and vfio_del_group_dev() | ||
253 | respectively:: | ||
254 | |||
255 | extern int vfio_add_group_dev(struct device *dev, | ||
256 | const struct vfio_device_ops *ops, | ||
257 | void *device_data); | ||
258 | |||
259 | extern void *vfio_del_group_dev(struct device *dev); | ||
260 | |||
261 | vfio_add_group_dev() indicates to the core to begin tracking the | ||
262 | iommu_group of the specified dev and register the dev as owned by | ||
263 | a VFIO bus driver. The driver provides an ops structure for callbacks | ||
264 | similar to a file operations structure:: | ||
265 | |||
266 | struct vfio_device_ops { | ||
267 | int (*open)(void *device_data); | ||
268 | void (*release)(void *device_data); | ||
269 | ssize_t (*read)(void *device_data, char __user *buf, | ||
270 | size_t count, loff_t *ppos); | ||
271 | ssize_t (*write)(void *device_data, const char __user *buf, | ||
272 | size_t size, loff_t *ppos); | ||
273 | long (*ioctl)(void *device_data, unsigned int cmd, | ||
274 | unsigned long arg); | ||
275 | int (*mmap)(void *device_data, struct vm_area_struct *vma); | ||
276 | }; | ||
277 | |||
278 | Each function is passed the device_data that was originally registered | ||
279 | in the vfio_add_group_dev() call above. This allows the bus driver | ||
280 | an easy place to store its opaque, private data. The open/release | ||
281 | callbacks are issued when a new file descriptor is created for a | ||
282 | device (via VFIO_GROUP_GET_DEVICE_FD). The ioctl interface provides | ||
283 | a direct pass through for VFIO_DEVICE_* ioctls. The read/write/mmap | ||
284 | interfaces implement the device region access defined by the device's | ||
285 | own VFIO_DEVICE_GET_REGION_INFO ioctl. | ||
286 | |||
287 | |||
288 | PPC64 sPAPR implementation note | ||
289 | ------------------------------- | ||
290 | |||
291 | This implementation has some specifics: | ||
292 | |||
293 | 1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per | ||
294 | container is supported as an IOMMU table is allocated at the boot time, | ||
295 | one table per a IOMMU group which is a Partitionable Endpoint (PE) | ||
296 | (PE is often a PCI domain but not always). | ||
297 | |||
298 | Newer systems (POWER8 with IODA2) have improved hardware design which allows | ||
299 | to remove this limitation and have multiple IOMMU groups per a VFIO | ||
300 | container. | ||
301 | |||
302 | 2) The hardware supports so called DMA windows - the PCI address range | ||
303 | within which DMA transfer is allowed, any attempt to access address space | ||
304 | out of the window leads to the whole PE isolation. | ||
305 | |||
306 | 3) PPC64 guests are paravirtualized but not fully emulated. There is an API | ||
307 | to map/unmap pages for DMA, and it normally maps 1..32 pages per call and | ||
308 | currently there is no way to reduce the number of calls. In order to make | ||
309 | things faster, the map/unmap handling has been implemented in real mode | ||
310 | which provides an excellent performance which has limitations such as | ||
311 | inability to do locked pages accounting in real time. | ||
312 | |||
313 | 4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O | ||
314 | subtree that can be treated as a unit for the purposes of partitioning and | ||
315 | error recovery. A PE may be a single or multi-function IOA (IO Adapter), a | ||
316 | function of a multi-function IOA, or multiple IOAs (possibly including | ||
317 | switch and bridge structures above the multiple IOAs). PPC64 guests detect | ||
318 | PCI errors and recover from them via EEH RTAS services, which works on the | ||
319 | basis of additional ioctl commands. | ||
320 | |||
321 | So 4 additional ioctls have been added: | ||
322 | |||
323 | VFIO_IOMMU_SPAPR_TCE_GET_INFO | ||
324 | returns the size and the start of the DMA window on the PCI bus. | ||
325 | |||
326 | VFIO_IOMMU_ENABLE | ||
327 | enables the container. The locked pages accounting | ||
328 | is done at this point. This lets user first to know what | ||
329 | the DMA window is and adjust rlimit before doing any real job. | ||
330 | |||
331 | VFIO_IOMMU_DISABLE | ||
332 | disables the container. | ||
333 | |||
334 | VFIO_EEH_PE_OP | ||
335 | provides an API for EEH setup, error detection and recovery. | ||
336 | |||
337 | The code flow from the example above should be slightly changed:: | ||
338 | |||
339 | struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 }; | ||
340 | |||
341 | ..... | ||
342 | /* Add the group to the container */ | ||
343 | ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | ||
344 | |||
345 | /* Enable the IOMMU model we want */ | ||
346 | ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU) | ||
347 | |||
348 | /* Get addition sPAPR IOMMU info */ | ||
349 | vfio_iommu_spapr_tce_info spapr_iommu_info; | ||
350 | ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info); | ||
351 | |||
352 | if (ioctl(container, VFIO_IOMMU_ENABLE)) | ||
353 | /* Cannot enable container, may be low rlimit */ | ||
354 | |||
355 | /* Allocate some space and setup a DMA mapping */ | ||
356 | dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, | ||
357 | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); | ||
358 | |||
359 | dma_map.size = 1024 * 1024; | ||
360 | dma_map.iova = 0; /* 1MB starting at 0x0 from device view */ | ||
361 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; | ||
362 | |||
363 | /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ | ||
364 | ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); | ||
365 | |||
366 | /* Get a file descriptor for the device */ | ||
367 | device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0"); | ||
368 | |||
369 | .... | ||
370 | |||
371 | /* Gratuitous device reset and go... */ | ||
372 | ioctl(device, VFIO_DEVICE_RESET); | ||
373 | |||
374 | /* Make sure EEH is supported */ | ||
375 | ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH); | ||
376 | |||
377 | /* Enable the EEH functionality on the device */ | ||
378 | pe_op.op = VFIO_EEH_PE_ENABLE; | ||
379 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
380 | |||
381 | /* You're suggested to create additional data struct to represent | ||
382 | * PE, and put child devices belonging to same IOMMU group to the | ||
383 | * PE instance for later reference. | ||
384 | */ | ||
385 | |||
386 | /* Check the PE's state and make sure it's in functional state */ | ||
387 | pe_op.op = VFIO_EEH_PE_GET_STATE; | ||
388 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
389 | |||
390 | /* Save device state using pci_save_state(). | ||
391 | * EEH should be enabled on the specified device. | ||
392 | */ | ||
393 | |||
394 | .... | ||
395 | |||
396 | /* Inject EEH error, which is expected to be caused by 32-bits | ||
397 | * config load. | ||
398 | */ | ||
399 | pe_op.op = VFIO_EEH_PE_INJECT_ERR; | ||
400 | pe_op.err.type = EEH_ERR_TYPE_32; | ||
401 | pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR; | ||
402 | pe_op.err.addr = 0ul; | ||
403 | pe_op.err.mask = 0ul; | ||
404 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
405 | |||
406 | .... | ||
407 | |||
408 | /* When 0xFF's returned from reading PCI config space or IO BARs | ||
409 | * of the PCI device. Check the PE's state to see if that has been | ||
410 | * frozen. | ||
411 | */ | ||
412 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
413 | |||
414 | /* Waiting for pending PCI transactions to be completed and don't | ||
415 | * produce any more PCI traffic from/to the affected PE until | ||
416 | * recovery is finished. | ||
417 | */ | ||
418 | |||
419 | /* Enable IO for the affected PE and collect logs. Usually, the | ||
420 | * standard part of PCI config space, AER registers are dumped | ||
421 | * as logs for further analysis. | ||
422 | */ | ||
423 | pe_op.op = VFIO_EEH_PE_UNFREEZE_IO; | ||
424 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
425 | |||
426 | /* | ||
427 | * Issue PE reset: hot or fundamental reset. Usually, hot reset | ||
428 | * is enough. However, the firmware of some PCI adapters would | ||
429 | * require fundamental reset. | ||
430 | */ | ||
431 | pe_op.op = VFIO_EEH_PE_RESET_HOT; | ||
432 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
433 | pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE; | ||
434 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
435 | |||
436 | /* Configure the PCI bridges for the affected PE */ | ||
437 | pe_op.op = VFIO_EEH_PE_CONFIGURE; | ||
438 | ioctl(container, VFIO_EEH_PE_OP, &pe_op); | ||
439 | |||
440 | /* Restored state we saved at initialization time. pci_restore_state() | ||
441 | * is good enough as an example. | ||
442 | */ | ||
443 | |||
444 | /* Hopefully, error is recovered successfully. Now, you can resume to | ||
445 | * start PCI traffic to/from the affected PE. | ||
446 | */ | ||
447 | |||
448 | .... | ||
449 | |||
450 | 5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/ | ||
451 | VFIO_IOMMU_DISABLE and implements 2 new ioctls: | ||
452 | VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY | ||
453 | (which are unsupported in v1 IOMMU). | ||
454 | |||
455 | PPC64 paravirtualized guests generate a lot of map/unmap requests, | ||
456 | and the handling of those includes pinning/unpinning pages and updating | ||
457 | mm::locked_vm counter to make sure we do not exceed the rlimit. | ||
458 | The v2 IOMMU splits accounting and pinning into separate operations: | ||
459 | |||
460 | - VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls | ||
461 | receive a user space address and size of the block to be pinned. | ||
462 | Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to | ||
463 | be called with the exact address and size used for registering | ||
464 | the memory block. The userspace is not expected to call these often. | ||
465 | The ranges are stored in a linked list in a VFIO container. | ||
466 | |||
467 | - VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual | ||
468 | IOMMU table and do not do pinning; instead these check that the userspace | ||
469 | address is from pre-registered range. | ||
470 | |||
471 | This separation helps in optimizing DMA for guests. | ||
472 | |||
473 | 6) sPAPR specification allows guests to have an additional DMA window(s) on | ||
474 | a PCI bus with a variable page size. Two ioctls have been added to support | ||
475 | this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE. | ||
476 | The platform has to support the functionality or error will be returned to | ||
477 | the userspace. The existing hardware supports up to 2 DMA windows, one is | ||
478 | 2GB long, uses 4K pages and called "default 32bit window"; the other can | ||
479 | be as big as entire RAM, use different page size, it is optional - guests | ||
480 | create those in run-time if the guest driver supports 64bit DMA. | ||
481 | |||
482 | VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and | ||
483 | a number of TCE table levels (if a TCE table is going to be big enough and | ||
484 | the kernel may not be able to allocate enough of physically contiguous | ||
485 | memory). It creates a new window in the available slot and returns the bus | ||
486 | address where the new window starts. Due to hardware limitation, the user | ||
487 | space cannot choose the location of DMA windows. | ||
488 | |||
489 | VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window | ||
490 | and removes it. | ||
491 | |||
492 | ------------------------------------------------------------------------------- | ||
493 | |||
494 | .. [1] VFIO was originally an acronym for "Virtual Function I/O" in its | ||
495 | initial implementation by Tom Lyon while as Cisco. We've since | ||
496 | outgrown the acronym, but it's catchy. | ||
497 | |||
498 | .. [2] "safe" also depends upon a device being "well behaved". It's | ||
499 | possible for multi-function devices to have backdoors between | ||
500 | functions and even for single function devices to have alternative | ||
501 | access to things like PCI config space through MMIO registers. To | ||
502 | guard against the former we can include additional precautions in the | ||
503 | IOMMU driver to group multi-function PCI devices together | ||
504 | (iommu=group_mf). The latter we can't prevent, but the IOMMU should | ||
505 | still provide isolation. For PCI, SR-IOV Virtual Functions are the | ||
506 | best indicator of "well behaved", as these are designed for | ||
507 | virtualization usage models. | ||
508 | |||
509 | .. [3] As always there are trade-offs to virtual machine device | ||
510 | assignment that are beyond the scope of VFIO. It's expected that | ||
511 | future IOMMU technologies will reduce some, but maybe not all, of | ||
512 | these trade-offs. | ||
513 | |||
514 | .. [4] In this case the device is below a PCI bridge, so transactions | ||
515 | from either function of the device are indistinguishable to the iommu:: | ||
516 | |||
517 | -[0000:00]-+-1e.0-[06]--+-0d.0 | ||
518 | \-0d.1 | ||
519 | |||
520 | 00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 90) | ||
diff --git a/Documentation/driver-api/xilinx/eemi.rst b/Documentation/driver-api/xilinx/eemi.rst new file mode 100644 index 000000000000..9dcbc6f18d75 --- /dev/null +++ b/Documentation/driver-api/xilinx/eemi.rst | |||
@@ -0,0 +1,67 @@ | |||
1 | ==================================== | ||
2 | Xilinx Zynq MPSoC EEMI Documentation | ||
3 | ==================================== | ||
4 | |||
5 | Xilinx Zynq MPSoC Firmware Interface | ||
6 | ------------------------------------- | ||
7 | The zynqmp-firmware node describes the interface to platform firmware. | ||
8 | ZynqMP has an interface to communicate with secure firmware. Firmware | ||
9 | driver provides an interface to firmware APIs. Interface APIs can be | ||
10 | used by any driver to communicate with PMC(Platform Management Controller). | ||
11 | |||
12 | Embedded Energy Management Interface (EEMI) | ||
13 | ---------------------------------------------- | ||
14 | The embedded energy management interface is used to allow software | ||
15 | components running across different processing clusters on a chip or | ||
16 | device to communicate with a power management controller (PMC) on a | ||
17 | device to issue or respond to power management requests. | ||
18 | |||
19 | EEMI ops is a structure containing all eemi APIs supported by Zynq MPSoC. | ||
20 | The zynqmp-firmware driver maintain all EEMI APIs in zynqmp_eemi_ops | ||
21 | structure. Any driver who want to communicate with PMC using EEMI APIs | ||
22 | can call zynqmp_pm_get_eemi_ops(). | ||
23 | |||
24 | Example of EEMI ops:: | ||
25 | |||
26 | /* zynqmp-firmware driver maintain all EEMI APIs */ | ||
27 | struct zynqmp_eemi_ops { | ||
28 | int (*get_api_version)(u32 *version); | ||
29 | int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out); | ||
30 | }; | ||
31 | |||
32 | static const struct zynqmp_eemi_ops eemi_ops = { | ||
33 | .get_api_version = zynqmp_pm_get_api_version, | ||
34 | .query_data = zynqmp_pm_query_data, | ||
35 | }; | ||
36 | |||
37 | Example of EEMI ops usage:: | ||
38 | |||
39 | static const struct zynqmp_eemi_ops *eemi_ops; | ||
40 | u32 ret_payload[PAYLOAD_ARG_CNT]; | ||
41 | int ret; | ||
42 | |||
43 | eemi_ops = zynqmp_pm_get_eemi_ops(); | ||
44 | if (IS_ERR(eemi_ops)) | ||
45 | return PTR_ERR(eemi_ops); | ||
46 | |||
47 | ret = eemi_ops->query_data(qdata, ret_payload); | ||
48 | |||
49 | IOCTL | ||
50 | ------ | ||
51 | IOCTL API is for device control and configuration. It is not a system | ||
52 | IOCTL but it is an EEMI API. This API can be used by master to control | ||
53 | any device specific configuration. IOCTL definitions can be platform | ||
54 | specific. This API also manage shared device configuration. | ||
55 | |||
56 | The following IOCTL IDs are valid for device control: | ||
57 | - IOCTL_SET_PLL_FRAC_MODE 8 | ||
58 | - IOCTL_GET_PLL_FRAC_MODE 9 | ||
59 | - IOCTL_SET_PLL_FRAC_DATA 10 | ||
60 | - IOCTL_GET_PLL_FRAC_DATA 11 | ||
61 | |||
62 | Refer EEMI API guide [0] for IOCTL specific parameters and other EEMI APIs. | ||
63 | |||
64 | References | ||
65 | ---------- | ||
66 | [0] Embedded Energy Management Interface (EEMI) API guide: | ||
67 | https://www.xilinx.com/support/documentation/user_guides/ug1200-eemi-api.pdf | ||
diff --git a/Documentation/driver-api/xilinx/index.rst b/Documentation/driver-api/xilinx/index.rst new file mode 100644 index 000000000000..13f7589ed442 --- /dev/null +++ b/Documentation/driver-api/xilinx/index.rst | |||
@@ -0,0 +1,16 @@ | |||
1 | |||
2 | =========== | ||
3 | Xilinx FPGA | ||
4 | =========== | ||
5 | |||
6 | .. toctree:: | ||
7 | :maxdepth: 1 | ||
8 | |||
9 | eemi | ||
10 | |||
11 | .. only:: subproject and html | ||
12 | |||
13 | Indices | ||
14 | ======= | ||
15 | |||
16 | * :ref:`genindex` | ||
diff --git a/Documentation/driver-api/xillybus.rst b/Documentation/driver-api/xillybus.rst new file mode 100644 index 000000000000..2446ee303c09 --- /dev/null +++ b/Documentation/driver-api/xillybus.rst | |||
@@ -0,0 +1,379 @@ | |||
1 | ========================================== | ||
2 | Xillybus driver for generic FPGA interface | ||
3 | ========================================== | ||
4 | |||
5 | :Author: Eli Billauer, Xillybus Ltd. (http://xillybus.com) | ||
6 | :Email: eli.billauer@gmail.com or as advertised on Xillybus' site. | ||
7 | |||
8 | .. Contents: | ||
9 | |||
10 | - Introduction | ||
11 | -- Background | ||
12 | -- Xillybus Overview | ||
13 | |||
14 | - Usage | ||
15 | -- User interface | ||
16 | -- Synchronization | ||
17 | -- Seekable pipes | ||
18 | |||
19 | - Internals | ||
20 | -- Source code organization | ||
21 | -- Pipe attributes | ||
22 | -- Host never reads from the FPGA | ||
23 | -- Channels, pipes, and the message channel | ||
24 | -- Data streaming | ||
25 | -- Data granularity | ||
26 | -- Probing | ||
27 | -- Buffer allocation | ||
28 | -- The "nonempty" message (supporting poll) | ||
29 | |||
30 | |||
31 | Introduction | ||
32 | ============ | ||
33 | |||
34 | Background | ||
35 | ---------- | ||
36 | |||
37 | An FPGA (Field Programmable Gate Array) is a piece of logic hardware, which | ||
38 | can be programmed to become virtually anything that is usually found as a | ||
39 | dedicated chipset: For instance, a display adapter, network interface card, | ||
40 | or even a processor with its peripherals. FPGAs are the LEGO of hardware: | ||
41 | Based upon certain building blocks, you make your own toys the way you like | ||
42 | them. It's usually pointless to reimplement something that is already | ||
43 | available on the market as a chipset, so FPGAs are mostly used when some | ||
44 | special functionality is needed, and the production volume is relatively low | ||
45 | (hence not justifying the development of an ASIC). | ||
46 | |||
47 | The challenge with FPGAs is that everything is implemented at a very low | ||
48 | level, even lower than assembly language. In order to allow FPGA designers to | ||
49 | focus on their specific project, and not reinvent the wheel over and over | ||
50 | again, pre-designed building blocks, IP cores, are often used. These are the | ||
51 | FPGA parallels of library functions. IP cores may implement certain | ||
52 | mathematical functions, a functional unit (e.g. a USB interface), an entire | ||
53 | processor (e.g. ARM) or anything that might come handy. Think of them as a | ||
54 | building block, with electrical wires dangling on the sides for connection to | ||
55 | other blocks. | ||
56 | |||
57 | One of the daunting tasks in FPGA design is communicating with a fullblown | ||
58 | operating system (actually, with the processor running it): Implementing the | ||
59 | low-level bus protocol and the somewhat higher-level interface with the host | ||
60 | (registers, interrupts, DMA etc.) is a project in itself. When the FPGA's | ||
61 | function is a well-known one (e.g. a video adapter card, or a NIC), it can | ||
62 | make sense to design the FPGA's interface logic specifically for the project. | ||
63 | A special driver is then written to present the FPGA as a well-known interface | ||
64 | to the kernel and/or user space. In that case, there is no reason to treat the | ||
65 | FPGA differently than any device on the bus. | ||
66 | |||
67 | It's however common that the desired data communication doesn't fit any well- | ||
68 | known peripheral function. Also, the effort of designing an elegant | ||
69 | abstraction for the data exchange is often considered too big. In those cases, | ||
70 | a quicker and possibly less elegant solution is sought: The driver is | ||
71 | effectively written as a user space program, leaving the kernel space part | ||
72 | with just elementary data transport. This still requires designing some | ||
73 | interface logic for the FPGA, and write a simple ad-hoc driver for the kernel. | ||
74 | |||
75 | Xillybus Overview | ||
76 | ----------------- | ||
77 | |||
78 | Xillybus is an IP core and a Linux driver. Together, they form a kit for | ||
79 | elementary data transport between an FPGA and the host, providing pipe-like | ||
80 | data streams with a straightforward user interface. It's intended as a low- | ||
81 | effort solution for mixed FPGA-host projects, for which it makes sense to | ||
82 | have the project-specific part of the driver running in a user-space program. | ||
83 | |||
84 | Since the communication requirements may vary significantly from one FPGA | ||
85 | project to another (the number of data pipes needed in each direction and | ||
86 | their attributes), there isn't one specific chunk of logic being the Xillybus | ||
87 | IP core. Rather, the IP core is configured and built based upon a | ||
88 | specification given by its end user. | ||
89 | |||
90 | Xillybus presents independent data streams, which resemble pipes or TCP/IP | ||
91 | communication to the user. At the host side, a character device file is used | ||
92 | just like any pipe file. On the FPGA side, hardware FIFOs are used to stream | ||
93 | the data. This is contrary to a common method of communicating through fixed- | ||
94 | sized buffers (even though such buffers are used by Xillybus under the hood). | ||
95 | There may be more than a hundred of these streams on a single IP core, but | ||
96 | also no more than one, depending on the configuration. | ||
97 | |||
98 | In order to ease the deployment of the Xillybus IP core, it contains a simple | ||
99 | data structure which completely defines the core's configuration. The Linux | ||
100 | driver fetches this data structure during its initialization process, and sets | ||
101 | up the DMA buffers and character devices accordingly. As a result, a single | ||
102 | driver is used to work out of the box with any Xillybus IP core. | ||
103 | |||
104 | The data structure just mentioned should not be confused with PCI's | ||
105 | configuration space or the Flattened Device Tree. | ||
106 | |||
107 | Usage | ||
108 | ===== | ||
109 | |||
110 | User interface | ||
111 | -------------- | ||
112 | |||
113 | On the host, all interface with Xillybus is done through /dev/xillybus_* | ||
114 | device files, which are generated automatically as the drivers loads. The | ||
115 | names of these files depend on the IP core that is loaded in the FPGA (see | ||
116 | Probing below). To communicate with the FPGA, open the device file that | ||
117 | corresponds to the hardware FIFO you want to send data or receive data from, | ||
118 | and use plain write() or read() calls, just like with a regular pipe. In | ||
119 | particular, it makes perfect sense to go:: | ||
120 | |||
121 | $ cat mydata > /dev/xillybus_thisfifo | ||
122 | |||
123 | $ cat /dev/xillybus_thatfifo > hisdata | ||
124 | |||
125 | possibly pressing CTRL-C as some stage, even though the xillybus_* pipes have | ||
126 | the capability to send an EOF (but may not use it). | ||
127 | |||
128 | The driver and hardware are designed to behave sensibly as pipes, including: | ||
129 | |||
130 | * Supporting non-blocking I/O (by setting O_NONBLOCK on open() ). | ||
131 | |||
132 | * Supporting poll() and select(). | ||
133 | |||
134 | * Being bandwidth efficient under load (using DMA) but also handle small | ||
135 | pieces of data sent across (like TCP/IP) by autoflushing. | ||
136 | |||
137 | A device file can be read only, write only or bidirectional. Bidirectional | ||
138 | device files are treated like two independent pipes (except for sharing a | ||
139 | "channel" structure in the implementation code). | ||
140 | |||
141 | Synchronization | ||
142 | --------------- | ||
143 | |||
144 | Xillybus pipes are configured (on the IP core) to be either synchronous or | ||
145 | asynchronous. For a synchronous pipe, write() returns successfully only after | ||
146 | some data has been submitted and acknowledged by the FPGA. This slows down | ||
147 | bulk data transfers, and is nearly impossible for use with streams that | ||
148 | require data at a constant rate: There is no data transmitted to the FPGA | ||
149 | between write() calls, in particular when the process loses the CPU. | ||
150 | |||
151 | When a pipe is configured asynchronous, write() returns if there was enough | ||
152 | room in the buffers to store any of the data in the buffers. | ||
153 | |||
154 | For FPGA to host pipes, asynchronous pipes allow data transfer from the FPGA | ||
155 | as soon as the respective device file is opened, regardless of if the data | ||
156 | has been requested by a read() call. On synchronous pipes, only the amount | ||
157 | of data requested by a read() call is transmitted. | ||
158 | |||
159 | In summary, for synchronous pipes, data between the host and FPGA is | ||
160 | transmitted only to satisfy the read() or write() call currently handled | ||
161 | by the driver, and those calls wait for the transmission to complete before | ||
162 | returning. | ||
163 | |||
164 | Note that the synchronization attribute has nothing to do with the possibility | ||
165 | that read() or write() completes less bytes than requested. There is a | ||
166 | separate configuration flag ("allowpartial") that determines whether such a | ||
167 | partial completion is allowed. | ||
168 | |||
169 | Seekable pipes | ||
170 | -------------- | ||
171 | |||
172 | A synchronous pipe can be configured to have the stream's position exposed | ||
173 | to the user logic at the FPGA. Such a pipe is also seekable on the host API. | ||
174 | With this feature, a memory or register interface can be attached on the | ||
175 | FPGA side to the seekable stream. Reading or writing to a certain address in | ||
176 | the attached memory is done by seeking to the desired address, and calling | ||
177 | read() or write() as required. | ||
178 | |||
179 | |||
180 | Internals | ||
181 | ========= | ||
182 | |||
183 | Source code organization | ||
184 | ------------------------ | ||
185 | |||
186 | The Xillybus driver consists of a core module, xillybus_core.c, and modules | ||
187 | that depend on the specific bus interface (xillybus_of.c and xillybus_pcie.c). | ||
188 | |||
189 | The bus specific modules are those probed when a suitable device is found by | ||
190 | the kernel. Since the DMA mapping and synchronization functions, which are bus | ||
191 | dependent by their nature, are used by the core module, a | ||
192 | xilly_endpoint_hardware structure is passed to the core module on | ||
193 | initialization. This structure is populated with pointers to wrapper functions | ||
194 | which execute the DMA-related operations on the bus. | ||
195 | |||
196 | Pipe attributes | ||
197 | --------------- | ||
198 | |||
199 | Each pipe has a number of attributes which are set when the FPGA component | ||
200 | (IP core) is built. They are fetched from the IDT (the data structure which | ||
201 | defines the core's configuration, see Probing below) by xilly_setupchannels() | ||
202 | in xillybus_core.c as follows: | ||
203 | |||
204 | * is_writebuf: The pipe's direction. A non-zero value means it's an FPGA to | ||
205 | host pipe (the FPGA "writes"). | ||
206 | |||
207 | * channelnum: The pipe's identification number in communication between the | ||
208 | host and FPGA. | ||
209 | |||
210 | * format: The underlying data width. See Data Granularity below. | ||
211 | |||
212 | * allowpartial: A non-zero value means that a read() or write() (whichever | ||
213 | applies) may return with less than the requested number of bytes. The common | ||
214 | choice is a non-zero value, to match standard UNIX behavior. | ||
215 | |||
216 | * synchronous: A non-zero value means that the pipe is synchronous. See | ||
217 | Synchronization above. | ||
218 | |||
219 | * bufsize: Each DMA buffer's size. Always a power of two. | ||
220 | |||
221 | * bufnum: The number of buffers allocated for this pipe. Always a power of two. | ||
222 | |||
223 | * exclusive_open: A non-zero value forces exclusive opening of the associated | ||
224 | device file. If the device file is bidirectional, and already opened only in | ||
225 | one direction, the opposite direction may be opened once. | ||
226 | |||
227 | * seekable: A non-zero value indicates that the pipe is seekable. See | ||
228 | Seekable pipes above. | ||
229 | |||
230 | * supports_nonempty: A non-zero value (which is typical) indicates that the | ||
231 | hardware will send the messages that are necessary to support select() and | ||
232 | poll() for this pipe. | ||
233 | |||
234 | Host never reads from the FPGA | ||
235 | ------------------------------ | ||
236 | |||
237 | Even though PCI Express is hotpluggable in general, a typical motherboard | ||
238 | doesn't expect a card to go away all of the sudden. But since the PCIe card | ||
239 | is based upon reprogrammable logic, a sudden disappearance from the bus is | ||
240 | quite likely as a result of an accidental reprogramming of the FPGA while the | ||
241 | host is up. In practice, nothing happens immediately in such a situation. But | ||
242 | if the host attempts to read from an address that is mapped to the PCI Express | ||
243 | device, that leads to an immediate freeze of the system on some motherboards, | ||
244 | even though the PCIe standard requires a graceful recovery. | ||
245 | |||
246 | In order to avoid these freezes, the Xillybus driver refrains completely from | ||
247 | reading from the device's register space. All communication from the FPGA to | ||
248 | the host is done through DMA. In particular, the Interrupt Service Routine | ||
249 | doesn't follow the common practice of checking a status register when it's | ||
250 | invoked. Rather, the FPGA prepares a small buffer which contains short | ||
251 | messages, which inform the host what the interrupt was about. | ||
252 | |||
253 | This mechanism is used on non-PCIe buses as well for the sake of uniformity. | ||
254 | |||
255 | |||
256 | Channels, pipes, and the message channel | ||
257 | ---------------------------------------- | ||
258 | |||
259 | Each of the (possibly bidirectional) pipes presented to the user is allocated | ||
260 | a data channel between the FPGA and the host. The distinction between channels | ||
261 | and pipes is necessary only because of channel 0, which is used for interrupt- | ||
262 | related messages from the FPGA, and has no pipe attached to it. | ||
263 | |||
264 | Data streaming | ||
265 | -------------- | ||
266 | |||
267 | Even though a non-segmented data stream is presented to the user at both | ||
268 | sides, the implementation relies on a set of DMA buffers which is allocated | ||
269 | for each channel. For the sake of illustration, let's take the FPGA to host | ||
270 | direction: As data streams into the respective channel's interface in the | ||
271 | FPGA, the Xillybus IP core writes it to one of the DMA buffers. When the | ||
272 | buffer is full, the FPGA informs the host about that (appending a | ||
273 | XILLYMSG_OPCODE_RELEASEBUF message channel 0 and sending an interrupt if | ||
274 | necessary). The host responds by making the data available for reading through | ||
275 | the character device. When all data has been read, the host writes on the | ||
276 | the FPGA's buffer control register, allowing the buffer's overwriting. Flow | ||
277 | control mechanisms exist on both sides to prevent underflows and overflows. | ||
278 | |||
279 | This is not good enough for creating a TCP/IP-like stream: If the data flow | ||
280 | stops momentarily before a DMA buffer is filled, the intuitive expectation is | ||
281 | that the partial data in buffer will arrive anyhow, despite the buffer not | ||
282 | being completed. This is implemented by adding a field in the | ||
283 | XILLYMSG_OPCODE_RELEASEBUF message, through which the FPGA informs not just | ||
284 | which buffer is submitted, but how much data it contains. | ||
285 | |||
286 | But the FPGA will submit a partially filled buffer only if directed to do so | ||
287 | by the host. This situation occurs when the read() method has been blocking | ||
288 | for XILLY_RX_TIMEOUT jiffies (currently 10 ms), after which the host commands | ||
289 | the FPGA to submit a DMA buffer as soon as it can. This timeout mechanism | ||
290 | balances between bus bandwidth efficiency (preventing a lot of partially | ||
291 | filled buffers being sent) and a latency held fairly low for tails of data. | ||
292 | |||
293 | A similar setting is used in the host to FPGA direction. The handling of | ||
294 | partial DMA buffers is somewhat different, though. The user can tell the | ||
295 | driver to submit all data it has in the buffers to the FPGA, by issuing a | ||
296 | write() with the byte count set to zero. This is similar to a flush request, | ||
297 | but it doesn't block. There is also an autoflushing mechanism, which triggers | ||
298 | an equivalent flush roughly XILLY_RX_TIMEOUT jiffies after the last write(). | ||
299 | This allows the user to be oblivious about the underlying buffering mechanism | ||
300 | and yet enjoy a stream-like interface. | ||
301 | |||
302 | Note that the issue of partial buffer flushing is irrelevant for pipes having | ||
303 | the "synchronous" attribute nonzero, since synchronous pipes don't allow data | ||
304 | to lay around in the DMA buffers between read() and write() anyhow. | ||
305 | |||
306 | Data granularity | ||
307 | ---------------- | ||
308 | |||
309 | The data arrives or is sent at the FPGA as 8, 16 or 32 bit wide words, as | ||
310 | configured by the "format" attribute. Whenever possible, the driver attempts | ||
311 | to hide this when the pipe is accessed differently from its natural alignment. | ||
312 | For example, reading single bytes from a pipe with 32 bit granularity works | ||
313 | with no issues. Writing single bytes to pipes with 16 or 32 bit granularity | ||
314 | will also work, but the driver can't send partially completed words to the | ||
315 | FPGA, so the transmission of up to one word may be held until it's fully | ||
316 | occupied with user data. | ||
317 | |||
318 | This somewhat complicates the handling of host to FPGA streams, because | ||
319 | when a buffer is flushed, it may contain up to 3 bytes don't form a word in | ||
320 | the FPGA, and hence can't be sent. To prevent loss of data, these leftover | ||
321 | bytes need to be moved to the next buffer. The parts in xillybus_core.c | ||
322 | that mention "leftovers" in some way are related to this complication. | ||
323 | |||
324 | Probing | ||
325 | ------- | ||
326 | |||
327 | As mentioned earlier, the number of pipes that are created when the driver | ||
328 | loads and their attributes depend on the Xillybus IP core in the FPGA. During | ||
329 | the driver's initialization, a blob containing configuration info, the | ||
330 | Interface Description Table (IDT), is sent from the FPGA to the host. The | ||
331 | bootstrap process is done in three phases: | ||
332 | |||
333 | 1. Acquire the length of the IDT, so a buffer can be allocated for it. This | ||
334 | is done by sending a quiesce command to the device, since the acknowledge | ||
335 | for this command contains the IDT's buffer length. | ||
336 | |||
337 | 2. Acquire the IDT itself. | ||
338 | |||
339 | 3. Create the interfaces according to the IDT. | ||
340 | |||
341 | Buffer allocation | ||
342 | ----------------- | ||
343 | |||
344 | In order to simplify the logic that prevents illegal boundary crossings of | ||
345 | PCIe packets, the following rule applies: If a buffer is smaller than 4kB, | ||
346 | it must not cross a 4kB boundary. Otherwise, it must be 4kB aligned. The | ||
347 | xilly_setupchannels() functions allocates these buffers by requesting whole | ||
348 | pages from the kernel, and diving them into DMA buffers as necessary. Since | ||
349 | all buffers' sizes are powers of two, it's possible to pack any set of such | ||
350 | buffers, with a maximal waste of one page of memory. | ||
351 | |||
352 | All buffers are allocated when the driver is loaded. This is necessary, | ||
353 | since large continuous physical memory segments are sometimes requested, | ||
354 | which are more likely to be available when the system is freshly booted. | ||
355 | |||
356 | The allocation of buffer memory takes place in the same order they appear in | ||
357 | the IDT. The driver relies on a rule that the pipes are sorted with decreasing | ||
358 | buffer size in the IDT. If a requested buffer is larger or equal to a page, | ||
359 | the necessary number of pages is requested from the kernel, and these are | ||
360 | used for this buffer. If the requested buffer is smaller than a page, one | ||
361 | single page is requested from the kernel, and that page is partially used. | ||
362 | Or, if there already is a partially used page at hand, the buffer is packed | ||
363 | into that page. It can be shown that all pages requested from the kernel | ||
364 | (except possibly for the last) are 100% utilized this way. | ||
365 | |||
366 | The "nonempty" message (supporting poll) | ||
367 | ---------------------------------------- | ||
368 | |||
369 | In order to support the "poll" method (and hence select() ), there is a small | ||
370 | catch regarding the FPGA to host direction: The FPGA may have filled a DMA | ||
371 | buffer with some data, but not submitted that buffer. If the host waited for | ||
372 | the buffer's submission by the FPGA, there would be a possibility that the | ||
373 | FPGA side has sent data, but a select() call would still block, because the | ||
374 | host has not received any notification about this. This is solved with | ||
375 | XILLYMSG_OPCODE_NONEMPTY messages sent by the FPGA when a channel goes from | ||
376 | completely empty to containing some data. | ||
377 | |||
378 | These messages are used only to support poll() and select(). The IP core can | ||
379 | be configured not to send them for a slight reduction of bandwidth. | ||
diff --git a/Documentation/driver-api/zorro.rst b/Documentation/driver-api/zorro.rst new file mode 100644 index 000000000000..664072b017e3 --- /dev/null +++ b/Documentation/driver-api/zorro.rst | |||
@@ -0,0 +1,104 @@ | |||
1 | ======================================== | ||
2 | Writing Device Drivers for Zorro Devices | ||
3 | ======================================== | ||
4 | |||
5 | :Author: Written by Geert Uytterhoeven <geert@linux-m68k.org> | ||
6 | :Last revised: September 5, 2003 | ||
7 | |||
8 | |||
9 | Introduction | ||
10 | ------------ | ||
11 | |||
12 | The Zorro bus is the bus used in the Amiga family of computers. Thanks to | ||
13 | AutoConfig(tm), it's 100% Plug-and-Play. | ||
14 | |||
15 | There are two types of Zorro buses, Zorro II and Zorro III: | ||
16 | |||
17 | - The Zorro II address space is 24-bit and lies within the first 16 MB of the | ||
18 | Amiga's address map. | ||
19 | |||
20 | - Zorro III is a 32-bit extension of Zorro II, which is backwards compatible | ||
21 | with Zorro II. The Zorro III address space lies outside the first 16 MB. | ||
22 | |||
23 | |||
24 | Probing for Zorro Devices | ||
25 | ------------------------- | ||
26 | |||
27 | Zorro devices are found by calling ``zorro_find_device()``, which returns a | ||
28 | pointer to the ``next`` Zorro device with the specified Zorro ID. A probe loop | ||
29 | for the board with Zorro ID ``ZORRO_PROD_xxx`` looks like:: | ||
30 | |||
31 | struct zorro_dev *z = NULL; | ||
32 | |||
33 | while ((z = zorro_find_device(ZORRO_PROD_xxx, z))) { | ||
34 | if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE, | ||
35 | "My explanation")) | ||
36 | ... | ||
37 | } | ||
38 | |||
39 | ``ZORRO_WILDCARD`` acts as a wildcard and finds any Zorro device. If your driver | ||
40 | supports different types of boards, you can use a construct like:: | ||
41 | |||
42 | struct zorro_dev *z = NULL; | ||
43 | |||
44 | while ((z = zorro_find_device(ZORRO_WILDCARD, z))) { | ||
45 | if (z->id != ZORRO_PROD_xxx1 && z->id != ZORRO_PROD_xxx2 && ...) | ||
46 | continue; | ||
47 | if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE, | ||
48 | "My explanation")) | ||
49 | ... | ||
50 | } | ||
51 | |||
52 | |||
53 | Zorro Resources | ||
54 | --------------- | ||
55 | |||
56 | Before you can access a Zorro device's registers, you have to make sure it's | ||
57 | not yet in use. This is done using the I/O memory space resource management | ||
58 | functions:: | ||
59 | |||
60 | request_mem_region() | ||
61 | release_mem_region() | ||
62 | |||
63 | Shortcuts to claim the whole device's address space are provided as well:: | ||
64 | |||
65 | zorro_request_device | ||
66 | zorro_release_device | ||
67 | |||
68 | |||
69 | Accessing the Zorro Address Space | ||
70 | --------------------------------- | ||
71 | |||
72 | The address regions in the Zorro device resources are Zorro bus address | ||
73 | regions. Due to the identity bus-physical address mapping on the Zorro bus, | ||
74 | they are CPU physical addresses as well. | ||
75 | |||
76 | The treatment of these regions depends on the type of Zorro space: | ||
77 | |||
78 | - Zorro II address space is always mapped and does not have to be mapped | ||
79 | explicitly using z_ioremap(). | ||
80 | |||
81 | Conversion from bus/physical Zorro II addresses to kernel virtual addresses | ||
82 | and vice versa is done using:: | ||
83 | |||
84 | virt_addr = ZTWO_VADDR(bus_addr); | ||
85 | bus_addr = ZTWO_PADDR(virt_addr); | ||
86 | |||
87 | - Zorro III address space must be mapped explicitly using z_ioremap() first | ||
88 | before it can be accessed:: | ||
89 | |||
90 | virt_addr = z_ioremap(bus_addr, size); | ||
91 | ... | ||
92 | z_iounmap(virt_addr); | ||
93 | |||
94 | |||
95 | References | ||
96 | ---------- | ||
97 | |||
98 | #. linux/include/linux/zorro.h | ||
99 | #. linux/include/uapi/linux/zorro.h | ||
100 | #. linux/include/uapi/linux/zorro_ids.h | ||
101 | #. linux/arch/m68k/include/asm/zorro.h | ||
102 | #. linux/drivers/zorro | ||
103 | #. /proc/bus/zorro | ||
104 | |||