diff options
-rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 4 | ||||
-rw-r--r-- | Documentation/driver-api/s390-drivers.rst | 4 | ||||
-rw-r--r-- | Documentation/s390/3270.rst (renamed from Documentation/s390/3270.txt) | 85 | ||||
-rw-r--r-- | Documentation/s390/cds.rst (renamed from Documentation/s390/cds.txt) | 368 | ||||
-rw-r--r-- | Documentation/s390/common_io.rst (renamed from Documentation/s390/CommonIO) | 49 | ||||
-rw-r--r-- | Documentation/s390/dasd.rst (renamed from Documentation/s390/DASD) | 33 | ||||
-rw-r--r-- | Documentation/s390/debugging390.rst (renamed from Documentation/s390/Debugging390.txt) | 2419 | ||||
-rw-r--r-- | Documentation/s390/driver-model.rst (renamed from Documentation/s390/driver-model.txt) | 179 | ||||
-rw-r--r-- | Documentation/s390/index.rst | 30 | ||||
-rw-r--r-- | Documentation/s390/monreader.rst (renamed from Documentation/s390/monreader.txt) | 85 | ||||
-rw-r--r-- | Documentation/s390/qeth.rst (renamed from Documentation/s390/qeth.txt) | 36 | ||||
-rw-r--r-- | Documentation/s390/s390dbf.rst | 803 | ||||
-rw-r--r-- | Documentation/s390/s390dbf.txt | 667 | ||||
-rw-r--r-- | Documentation/s390/text_files.rst | 11 | ||||
-rw-r--r-- | Documentation/s390/vfio-ap.rst (renamed from Documentation/s390/vfio-ap.txt) | 499 | ||||
-rw-r--r-- | Documentation/s390/vfio-ccw.rst (renamed from Documentation/s390/vfio-ccw.txt) | 92 | ||||
-rw-r--r-- | Documentation/s390/zfcpdump.rst (renamed from Documentation/s390/zfcpdump.txt) | 2 | ||||
-rw-r--r-- | MAINTAINERS | 4 | ||||
-rw-r--r-- | arch/s390/Kconfig | 4 | ||||
-rw-r--r-- | arch/s390/include/asm/debug.h | 4 | ||||
-rw-r--r-- | drivers/s390/char/zcore.c | 2 |
21 files changed, 3118 insertions, 2262 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 138f6664b2e2..b9b0623be925 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -478,7 +478,7 @@ | |||
478 | others). | 478 | others). |
479 | 479 | ||
480 | ccw_timeout_log [S390] | 480 | ccw_timeout_log [S390] |
481 | See Documentation/s390/CommonIO for details. | 481 | See Documentation/s390/common_io.rst for details. |
482 | 482 | ||
483 | cgroup_disable= [KNL] Disable a particular controller | 483 | cgroup_disable= [KNL] Disable a particular controller |
484 | Format: {name of the controller(s) to disable} | 484 | Format: {name of the controller(s) to disable} |
@@ -516,7 +516,7 @@ | |||
516 | /selinux/checkreqprot. | 516 | /selinux/checkreqprot. |
517 | 517 | ||
518 | cio_ignore= [S390] | 518 | cio_ignore= [S390] |
519 | See Documentation/s390/CommonIO for details. | 519 | See Documentation/s390/common_io.rst for details. |
520 | clk_ignore_unused | 520 | clk_ignore_unused |
521 | [CLK] | 521 | [CLK] |
522 | Prevents the clock framework from automatically gating | 522 | Prevents the clock framework from automatically gating |
diff --git a/Documentation/driver-api/s390-drivers.rst b/Documentation/driver-api/s390-drivers.rst index 30e6aa7e160b..5158577bc29b 100644 --- a/Documentation/driver-api/s390-drivers.rst +++ b/Documentation/driver-api/s390-drivers.rst | |||
@@ -27,7 +27,7 @@ not strictly considered I/O devices. They are considered here as well, | |||
27 | although they are not the focus of this document. | 27 | although they are not the focus of this document. |
28 | 28 | ||
29 | Some additional information can also be found in the kernel source under | 29 | Some additional information can also be found in the kernel source under |
30 | Documentation/s390/driver-model.txt. | 30 | Documentation/s390/driver-model.rst. |
31 | 31 | ||
32 | The css bus | 32 | The css bus |
33 | =========== | 33 | =========== |
@@ -38,7 +38,7 @@ into several categories: | |||
38 | * Standard I/O subchannels, for use by the system. They have a child | 38 | * Standard I/O subchannels, for use by the system. They have a child |
39 | device on the ccw bus and are described below. | 39 | device on the ccw bus and are described below. |
40 | * I/O subchannels bound to the vfio-ccw driver. See | 40 | * I/O subchannels bound to the vfio-ccw driver. See |
41 | Documentation/s390/vfio-ccw.txt. | 41 | Documentation/s390/vfio-ccw.rst. |
42 | * Message subchannels. No Linux driver currently exists. | 42 | * Message subchannels. No Linux driver currently exists. |
43 | * CHSC subchannels (at most one). The chsc subchannel driver can be used | 43 | * CHSC subchannels (at most one). The chsc subchannel driver can be used |
44 | to send asynchronous chsc commands. | 44 | to send asynchronous chsc commands. |
diff --git a/Documentation/s390/3270.txt b/Documentation/s390/3270.rst index 7c715de99774..e09e77954238 100644 --- a/Documentation/s390/3270.txt +++ b/Documentation/s390/3270.rst | |||
@@ -1,13 +1,17 @@ | |||
1 | =============================== | ||
1 | IBM 3270 Display System support | 2 | IBM 3270 Display System support |
3 | =============================== | ||
2 | 4 | ||
3 | This file describes the driver that supports local channel attachment | 5 | This file describes the driver that supports local channel attachment |
4 | of IBM 3270 devices. It consists of three sections: | 6 | of IBM 3270 devices. It consists of three sections: |
7 | |||
5 | * Introduction | 8 | * Introduction |
6 | * Installation | 9 | * Installation |
7 | * Operation | 10 | * Operation |
8 | 11 | ||
9 | 12 | ||
10 | INTRODUCTION. | 13 | Introduction |
14 | ============ | ||
11 | 15 | ||
12 | This paper describes installing and operating 3270 devices under | 16 | This paper describes installing and operating 3270 devices under |
13 | Linux/390. A 3270 device is a block-mode rows-and-columns terminal of | 17 | Linux/390. A 3270 device is a block-mode rows-and-columns terminal of |
@@ -17,12 +21,12 @@ twenty and thirty years ago. | |||
17 | You may have 3270s in-house and not know it. If you're using the | 21 | You may have 3270s in-house and not know it. If you're using the |
18 | VM-ESA operating system, define a 3270 to your virtual machine by using | 22 | VM-ESA operating system, define a 3270 to your virtual machine by using |
19 | the command "DEF GRAF <hex-address>" This paper presumes you will be | 23 | the command "DEF GRAF <hex-address>" This paper presumes you will be |
20 | defining four 3270s with the CP/CMS commands | 24 | defining four 3270s with the CP/CMS commands: |
21 | 25 | ||
22 | DEF GRAF 620 | 26 | - DEF GRAF 620 |
23 | DEF GRAF 621 | 27 | - DEF GRAF 621 |
24 | DEF GRAF 622 | 28 | - DEF GRAF 622 |
25 | DEF GRAF 623 | 29 | - DEF GRAF 623 |
26 | 30 | ||
27 | Your network connection from VM-ESA allows you to use x3270, tn3270, or | 31 | Your network connection from VM-ESA allows you to use x3270, tn3270, or |
28 | another 3270 emulator, started from an xterm window on your PC or | 32 | another 3270 emulator, started from an xterm window on your PC or |
@@ -34,7 +38,8 @@ This paper covers installation of the driver and operation of a | |||
34 | dialed-in x3270. | 38 | dialed-in x3270. |
35 | 39 | ||
36 | 40 | ||
37 | INSTALLATION. | 41 | Installation |
42 | ============ | ||
38 | 43 | ||
39 | You install the driver by installing a patch, doing a kernel build, and | 44 | You install the driver by installing a patch, doing a kernel build, and |
40 | running the configuration script (config3270.sh, in this directory). | 45 | running the configuration script (config3270.sh, in this directory). |
@@ -59,13 +64,15 @@ Use #CP TERM CONMODE 3270 to change it to 3270. If you generate only | |||
59 | at boot time to a 3270 if it is a 3215. | 64 | at boot time to a 3270 if it is a 3215. |
60 | 65 | ||
61 | In brief, these are the steps: | 66 | In brief, these are the steps: |
67 | |||
62 | 1. Install the tub3270 patch | 68 | 1. Install the tub3270 patch |
63 | 2. (If a module) add a line to a file in /etc/modprobe.d/*.conf | 69 | 2. (If a module) add a line to a file in `/etc/modprobe.d/*.conf` |
64 | 3. (If VM) define devices with DEF GRAF | 70 | 3. (If VM) define devices with DEF GRAF |
65 | 4. Reboot | 71 | 4. Reboot |
66 | 5. Configure | 72 | 5. Configure |
67 | 73 | ||
68 | To test that everything works, assuming VM and x3270, | 74 | To test that everything works, assuming VM and x3270, |
75 | |||
69 | 1. Bring up an x3270 window. | 76 | 1. Bring up an x3270 window. |
70 | 2. Use the DIAL command in that window. | 77 | 2. Use the DIAL command in that window. |
71 | 3. You should immediately see a Linux login screen. | 78 | 3. You should immediately see a Linux login screen. |
@@ -74,7 +81,8 @@ Here are the installation steps in detail: | |||
74 | 81 | ||
75 | 1. The 3270 driver is a part of the official Linux kernel | 82 | 1. The 3270 driver is a part of the official Linux kernel |
76 | source. Build a tree with the kernel source and any necessary | 83 | source. Build a tree with the kernel source and any necessary |
77 | patches. Then do | 84 | patches. Then do:: |
85 | |||
78 | make oldconfig | 86 | make oldconfig |
79 | (If you wish to disable 3215 console support, edit | 87 | (If you wish to disable 3215 console support, edit |
80 | .config; change CONFIG_TN3215's value to "n"; | 88 | .config; change CONFIG_TN3215's value to "n"; |
@@ -84,20 +92,22 @@ Here are the installation steps in detail: | |||
84 | make modules_install | 92 | make modules_install |
85 | 93 | ||
86 | 2. (Perform this step only if you have configured tub3270 as a | 94 | 2. (Perform this step only if you have configured tub3270 as a |
87 | module.) Add a line to a file /etc/modprobe.d/*.conf to automatically | 95 | module.) Add a line to a file `/etc/modprobe.d/*.conf` to automatically |
88 | load the driver when it's needed. With this line added, you will see | 96 | load the driver when it's needed. With this line added, you will see |
89 | login prompts appear on your 3270s as soon as boot is complete (or | 97 | login prompts appear on your 3270s as soon as boot is complete (or |
90 | with emulated 3270s, as soon as you dial into your vm guest using the | 98 | with emulated 3270s, as soon as you dial into your vm guest using the |
91 | command "DIAL <vmguestname>"). Since the line-mode major number is | 99 | command "DIAL <vmguestname>"). Since the line-mode major number is |
92 | 227, the line to add should be: | 100 | 227, the line to add should be:: |
101 | |||
93 | alias char-major-227 tub3270 | 102 | alias char-major-227 tub3270 |
94 | 103 | ||
95 | 3. Define graphic devices to your vm guest machine, if you | 104 | 3. Define graphic devices to your vm guest machine, if you |
96 | haven't already. Define them before you reboot (reipl): | 105 | haven't already. Define them before you reboot (reipl): |
97 | DEFINE GRAF 620 | 106 | |
98 | DEFINE GRAF 621 | 107 | - DEFINE GRAF 620 |
99 | DEFINE GRAF 622 | 108 | - DEFINE GRAF 621 |
100 | DEFINE GRAF 623 | 109 | - DEFINE GRAF 622 |
110 | - DEFINE GRAF 623 | ||
101 | 111 | ||
102 | 4. Reboot. The reboot process scans hardware devices, including | 112 | 4. Reboot. The reboot process scans hardware devices, including |
103 | 3270s, and this enables the tub3270 driver once loaded to respond | 113 | 3270s, and this enables the tub3270 driver once loaded to respond |
@@ -107,21 +117,23 @@ Here are the installation steps in detail: | |||
107 | 117 | ||
108 | 5. Run the 3270 configuration script config3270. It is | 118 | 5. Run the 3270 configuration script config3270. It is |
109 | distributed in this same directory, Documentation/s390, as | 119 | distributed in this same directory, Documentation/s390, as |
110 | config3270.sh. Inspect the output script it produces, | 120 | config3270.sh. Inspect the output script it produces, |
111 | /tmp/mkdev3270, and then run that script. This will create the | 121 | /tmp/mkdev3270, and then run that script. This will create the |
112 | necessary character special device files and make the necessary | 122 | necessary character special device files and make the necessary |
113 | changes to /etc/inittab. | 123 | changes to /etc/inittab. |
114 | 124 | ||
115 | Then notify /sbin/init that /etc/inittab has changed, by issuing | 125 | Then notify /sbin/init that /etc/inittab has changed, by issuing |
116 | the telinit command with the q operand: | 126 | the telinit command with the q operand:: |
127 | |||
117 | cd Documentation/s390 | 128 | cd Documentation/s390 |
118 | sh config3270.sh | 129 | sh config3270.sh |
119 | sh /tmp/mkdev3270 | 130 | sh /tmp/mkdev3270 |
120 | telinit q | 131 | telinit q |
121 | 132 | ||
122 | This should be sufficient for your first time. If your 3270 | 133 | This should be sufficient for your first time. If your 3270 |
123 | configuration has changed and you're reusing config3270, you | 134 | configuration has changed and you're reusing config3270, you |
124 | should follow these steps: | 135 | should follow these steps:: |
136 | |||
125 | Change 3270 configuration | 137 | Change 3270 configuration |
126 | Reboot | 138 | Reboot |
127 | Run config3270 and /tmp/mkdev3270 | 139 | Run config3270 and /tmp/mkdev3270 |
@@ -132,8 +144,10 @@ Here are the testing steps in detail: | |||
132 | 1. Bring up an x3270 window, or use an actual hardware 3278 or | 144 | 1. Bring up an x3270 window, or use an actual hardware 3278 or |
133 | 3279, or use the 3270 emulator of your choice. You would be | 145 | 3279, or use the 3270 emulator of your choice. You would be |
134 | running the emulator on your PC or workstation. You would use | 146 | running the emulator on your PC or workstation. You would use |
135 | the command, for example, | 147 | the command, for example:: |
148 | |||
136 | x3270 vm-esa-domain-name & | 149 | x3270 vm-esa-domain-name & |
150 | |||
137 | if you wanted a 3278 Model 4 with 43 rows of 80 columns, the | 151 | if you wanted a 3278 Model 4 with 43 rows of 80 columns, the |
138 | default model number. The driver does not take advantage of | 152 | default model number. The driver does not take advantage of |
139 | extended attributes. | 153 | extended attributes. |
@@ -144,7 +158,8 @@ Here are the testing steps in detail: | |||
144 | 158 | ||
145 | 2. Use the DIAL command instead of the LOGIN command to connect | 159 | 2. Use the DIAL command instead of the LOGIN command to connect |
146 | to one of the virtual 3270s you defined with the DEF GRAF | 160 | to one of the virtual 3270s you defined with the DEF GRAF |
147 | commands: | 161 | commands:: |
162 | |||
148 | dial my-vm-guest-name | 163 | dial my-vm-guest-name |
149 | 164 | ||
150 | 3. You should immediately see a login prompt from your | 165 | 3. You should immediately see a login prompt from your |
@@ -171,14 +186,17 @@ Here are the testing steps in detail: | |||
171 | Wrong major number? Wrong minor number? There's your | 186 | Wrong major number? Wrong minor number? There's your |
172 | problem! | 187 | problem! |
173 | 188 | ||
174 | D. Do you get the message | 189 | D. Do you get the message:: |
190 | |||
175 | "HCPDIA047E my-vm-guest-name 0620 does not exist"? | 191 | "HCPDIA047E my-vm-guest-name 0620 does not exist"? |
192 | |||
176 | If so, you must issue the command "DEF GRAF 620" from your VM | 193 | If so, you must issue the command "DEF GRAF 620" from your VM |
177 | 3215 console and then reboot the system. | 194 | 3215 console and then reboot the system. |
178 | 195 | ||
179 | 196 | ||
180 | 197 | ||
181 | OPERATION. | 198 | OPERATION. |
199 | ========== | ||
182 | 200 | ||
183 | The driver defines three areas on the 3270 screen: the log area, the | 201 | The driver defines three areas on the 3270 screen: the log area, the |
184 | input area, and the status area. | 202 | input area, and the status area. |
@@ -203,8 +221,10 @@ which indicates no scrolling will occur. (If you hit ENTER with "Linux | |||
203 | Running" and nothing typed, the application receives a newline.) | 221 | Running" and nothing typed, the application receives a newline.) |
204 | 222 | ||
205 | You may change the scrolling timeout value. For example, the following | 223 | You may change the scrolling timeout value. For example, the following |
206 | command line: | 224 | command line:: |
225 | |||
207 | echo scrolltime=60 > /proc/tty/driver/tty3270 | 226 | echo scrolltime=60 > /proc/tty/driver/tty3270 |
227 | |||
208 | changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if | 228 | changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if |
209 | you wish to prevent scrolling entirely. | 229 | you wish to prevent scrolling entirely. |
210 | 230 | ||
@@ -228,7 +248,8 @@ cause an EOF also by typing "^D" and hitting ENTER. | |||
228 | No PF key is preassigned to cause a job suspension, but you may cause a | 248 | No PF key is preassigned to cause a job suspension, but you may cause a |
229 | job suspension by typing "^Z" and hitting ENTER. You may wish to | 249 | job suspension by typing "^Z" and hitting ENTER. You may wish to |
230 | assign this function to a PF key. To make PF7 cause job suspension, | 250 | assign this function to a PF key. To make PF7 cause job suspension, |
231 | execute the command: | 251 | execute the command:: |
252 | |||
232 | echo pf7=^z > /proc/tty/driver/tty3270 | 253 | echo pf7=^z > /proc/tty/driver/tty3270 |
233 | 254 | ||
234 | If the input you type does not end with the two characters "^n", the | 255 | If the input you type does not end with the two characters "^n", the |
@@ -243,8 +264,10 @@ command is entered into the stack only when the input area is not made | |||
243 | invisible (such as for password entry) and it is not identical to the | 264 | invisible (such as for password entry) and it is not identical to the |
244 | current top entry. PF10 rotates backward through the command stack; | 265 | current top entry. PF10 rotates backward through the command stack; |
245 | PF11 rotates forward. You may assign the backward function to any PF | 266 | PF11 rotates forward. You may assign the backward function to any PF |
246 | key (or PA key, for that matter), say, PA3, with the command: | 267 | key (or PA key, for that matter), say, PA3, with the command:: |
268 | |||
247 | echo -e pa3=\\033k > /proc/tty/driver/tty3270 | 269 | echo -e pa3=\\033k > /proc/tty/driver/tty3270 |
270 | |||
248 | This assigns the string ESC-k to PA3. Similarly, the string ESC-j | 271 | This assigns the string ESC-k to PA3. Similarly, the string ESC-j |
249 | performs the forward function. (Rationale: In bash with vi-mode line | 272 | performs the forward function. (Rationale: In bash with vi-mode line |
250 | editing, ESC-k and ESC-j retrieve backward and forward history. | 273 | editing, ESC-k and ESC-j retrieve backward and forward history. |
@@ -252,15 +275,19 @@ Suggestions welcome.) | |||
252 | 275 | ||
253 | Is a stack size of twenty commands not to your liking? Change it on | 276 | Is a stack size of twenty commands not to your liking? Change it on |
254 | the fly. To change to saving the last 100 commands, execute the | 277 | the fly. To change to saving the last 100 commands, execute the |
255 | command: | 278 | command:: |
279 | |||
256 | echo recallsize=100 > /proc/tty/driver/tty3270 | 280 | echo recallsize=100 > /proc/tty/driver/tty3270 |
257 | 281 | ||
258 | Have a command you issue frequently? Assign it to a PF or PA key! Use | 282 | Have a command you issue frequently? Assign it to a PF or PA key! Use |
259 | the command | 283 | the command:: |
260 | echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270 | 284 | |
285 | echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270 | ||
286 | |||
261 | to execute the commands mkdir foobar and cd foobar immediately when you | 287 | to execute the commands mkdir foobar and cd foobar immediately when you |
262 | hit PF24. Want to see the command line first, before you execute it? | 288 | hit PF24. Want to see the command line first, before you execute it? |
263 | Use the -n option of the echo command: | 289 | Use the -n option of the echo command:: |
290 | |||
264 | echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270 | 291 | echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270 |
265 | 292 | ||
266 | 293 | ||
diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.rst index 480a78ef5a1e..7006d8209d2e 100644 --- a/Documentation/s390/cds.txt +++ b/Documentation/s390/cds.rst | |||
@@ -1,14 +1,18 @@ | |||
1 | =========================== | ||
1 | Linux for S/390 and zSeries | 2 | Linux for S/390 and zSeries |
3 | =========================== | ||
2 | 4 | ||
3 | Common Device Support (CDS) | 5 | Common Device Support (CDS) |
4 | Device Driver I/O Support Routines | 6 | Device Driver I/O Support Routines |
5 | 7 | ||
6 | Authors : Ingo Adlung | 8 | Authors: |
7 | Cornelia Huck | 9 | - Ingo Adlung |
10 | - Cornelia Huck | ||
8 | 11 | ||
9 | Copyright, IBM Corp. 1999-2002 | 12 | Copyright, IBM Corp. 1999-2002 |
10 | 13 | ||
11 | Introduction | 14 | Introduction |
15 | ============ | ||
12 | 16 | ||
13 | This document describes the common device support routines for Linux/390. | 17 | This document describes the common device support routines for Linux/390. |
14 | Different than other hardware architectures, ESA/390 has defined a unified | 18 | Different than other hardware architectures, ESA/390 has defined a unified |
@@ -27,18 +31,20 @@ Operation manual (IBM Form. No. SA22-7201). | |||
27 | 31 | ||
28 | In order to build common device support for ESA/390 I/O interfaces, a | 32 | In order to build common device support for ESA/390 I/O interfaces, a |
29 | functional layer was introduced that provides generic I/O access methods to | 33 | functional layer was introduced that provides generic I/O access methods to |
30 | the hardware. | 34 | the hardware. |
31 | 35 | ||
32 | The common device support layer comprises the I/O support routines defined | 36 | The common device support layer comprises the I/O support routines defined |
33 | below. Some of them implement common Linux device driver interfaces, while | 37 | below. Some of them implement common Linux device driver interfaces, while |
34 | some of them are ESA/390 platform specific. | 38 | some of them are ESA/390 platform specific. |
35 | 39 | ||
36 | Note: | 40 | Note: |
37 | In order to write a driver for S/390, you also need to look into the interface | 41 | In order to write a driver for S/390, you also need to look into the interface |
38 | described in Documentation/s390/driver-model.txt. | 42 | described in Documentation/s390/driver-model.rst. |
39 | 43 | ||
40 | Note for porting drivers from 2.4: | 44 | Note for porting drivers from 2.4: |
45 | |||
41 | The major changes are: | 46 | The major changes are: |
47 | |||
42 | * The functions use a ccw_device instead of an irq (subchannel). | 48 | * The functions use a ccw_device instead of an irq (subchannel). |
43 | * All drivers must define a ccw_driver (see driver-model.txt) and the associated | 49 | * All drivers must define a ccw_driver (see driver-model.txt) and the associated |
44 | functions. | 50 | functions. |
@@ -57,19 +63,16 @@ The major changes are: | |||
57 | ccw_device_get_ciw() | 63 | ccw_device_get_ciw() |
58 | get commands from extended sense data. | 64 | get commands from extended sense data. |
59 | 65 | ||
60 | ccw_device_start() | 66 | ccw_device_start(), ccw_device_start_timeout(), ccw_device_start_key(), ccw_device_start_key_timeout() |
61 | ccw_device_start_timeout() | ||
62 | ccw_device_start_key() | ||
63 | ccw_device_start_key_timeout() | ||
64 | initiate an I/O request. | 67 | initiate an I/O request. |
65 | 68 | ||
66 | ccw_device_resume() | 69 | ccw_device_resume() |
67 | resume channel program execution. | 70 | resume channel program execution. |
68 | 71 | ||
69 | ccw_device_halt() | 72 | ccw_device_halt() |
70 | terminate the current I/O request processed on the device. | 73 | terminate the current I/O request processed on the device. |
71 | 74 | ||
72 | do_IRQ() | 75 | do_IRQ() |
73 | generic interrupt routine. This function is called by the interrupt entry | 76 | generic interrupt routine. This function is called by the interrupt entry |
74 | routine whenever an I/O interrupt is presented to the system. The do_IRQ() | 77 | routine whenever an I/O interrupt is presented to the system. The do_IRQ() |
75 | routine determines the interrupt status and calls the device specific | 78 | routine determines the interrupt status and calls the device specific |
@@ -82,12 +85,15 @@ first level interrupt handler only and does not comprise a device driver | |||
82 | callable interface. Instead, the functional description of do_IO() also | 85 | callable interface. Instead, the functional description of do_IO() also |
83 | describes the input to the device specific interrupt handler. | 86 | describes the input to the device specific interrupt handler. |
84 | 87 | ||
85 | Note: All explanations apply also to the 64 bit architecture s390x. | 88 | Note: |
89 | All explanations apply also to the 64 bit architecture s390x. | ||
86 | 90 | ||
87 | 91 | ||
88 | Common Device Support (CDS) for Linux/390 Device Drivers | 92 | Common Device Support (CDS) for Linux/390 Device Drivers |
93 | ======================================================== | ||
89 | 94 | ||
90 | General Information | 95 | General Information |
96 | ------------------- | ||
91 | 97 | ||
92 | The following chapters describe the I/O related interface routines the | 98 | The following chapters describe the I/O related interface routines the |
93 | Linux/390 common device support (CDS) provides to allow for device specific | 99 | Linux/390 common device support (CDS) provides to allow for device specific |
@@ -101,6 +107,7 @@ can be found in the architecture specific C header file | |||
101 | linux/arch/s390/include/asm/irq.h. | 107 | linux/arch/s390/include/asm/irq.h. |
102 | 108 | ||
103 | Overview of CDS interface concepts | 109 | Overview of CDS interface concepts |
110 | ---------------------------------- | ||
104 | 111 | ||
105 | Different to other hardware platforms, the ESA/390 architecture doesn't define | 112 | Different to other hardware platforms, the ESA/390 architecture doesn't define |
106 | interrupt lines managed by a specific interrupt controller and bus systems | 113 | interrupt lines managed by a specific interrupt controller and bus systems |
@@ -126,7 +133,7 @@ has to call every single device driver registered on this IRQ in order to | |||
126 | determine the device driver owning the device that raised the interrupt. | 133 | determine the device driver owning the device that raised the interrupt. |
127 | 134 | ||
128 | Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel). | 135 | Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel). |
129 | For internal use of the common I/O layer, these are still there. However, | 136 | For internal use of the common I/O layer, these are still there. However, |
130 | device drivers should use the new calling interface via the ccw_device only. | 137 | device drivers should use the new calling interface via the ccw_device only. |
131 | 138 | ||
132 | During its startup the Linux/390 system checks for peripheral devices. Each | 139 | During its startup the Linux/390 system checks for peripheral devices. Each |
@@ -134,7 +141,7 @@ of those devices is uniquely defined by a so called subchannel by the ESA/390 | |||
134 | channel subsystem. While the subchannel numbers are system generated, each | 141 | channel subsystem. While the subchannel numbers are system generated, each |
135 | subchannel also takes a user defined attribute, the so called device number. | 142 | subchannel also takes a user defined attribute, the so called device number. |
136 | Both subchannel number and device number cannot exceed 65535. During sysfs | 143 | Both subchannel number and device number cannot exceed 65535. During sysfs |
137 | initialisation, the information about control unit type and device types that | 144 | initialisation, the information about control unit type and device types that |
138 | imply specific I/O commands (channel command words - CCWs) in order to operate | 145 | imply specific I/O commands (channel command words - CCWs) in order to operate |
139 | the device are gathered. Device drivers can retrieve this set of hardware | 146 | the device are gathered. Device drivers can retrieve this set of hardware |
140 | information during their initialization step to recognize the devices they | 147 | information during their initialization step to recognize the devices they |
@@ -164,18 +171,26 @@ get_ciw() - get command information word | |||
164 | This call enables a device driver to get information about supported commands | 171 | This call enables a device driver to get information about supported commands |
165 | from the extended SenseID data. | 172 | from the extended SenseID data. |
166 | 173 | ||
167 | struct ciw * | 174 | :: |
168 | ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd); | ||
169 | 175 | ||
170 | cdev - The ccw_device for which the command is to be retrieved. | 176 | struct ciw * |
171 | cmd - The command type to be retrieved. | 177 | ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd); |
178 | |||
179 | ==== ======================================================== | ||
180 | cdev The ccw_device for which the command is to be retrieved. | ||
181 | cmd The command type to be retrieved. | ||
182 | ==== ======================================================== | ||
172 | 183 | ||
173 | ccw_device_get_ciw() returns: | 184 | ccw_device_get_ciw() returns: |
174 | NULL - No extended data available, invalid device or command not found. | ||
175 | !NULL - The command requested. | ||
176 | 185 | ||
186 | ===== ================================================================ | ||
187 | NULL No extended data available, invalid device or command not found. | ||
188 | !NULL The command requested. | ||
189 | ===== ================================================================ | ||
190 | |||
191 | :: | ||
177 | 192 | ||
178 | ccw_device_start() - Initiate I/O Request | 193 | ccw_device_start() - Initiate I/O Request |
179 | 194 | ||
180 | The ccw_device_start() routines is the I/O request front-end processor. All | 195 | The ccw_device_start() routines is the I/O request front-end processor. All |
181 | device driver I/O requests must be issued using this routine. A device driver | 196 | device driver I/O requests must be issued using this routine. A device driver |
@@ -186,93 +201,105 @@ This description also covers the status information passed to the device | |||
186 | driver's interrupt handler as this is related to the rules (flags) defined | 201 | driver's interrupt handler as this is related to the rules (flags) defined |
187 | with the associated I/O request when calling ccw_device_start(). | 202 | with the associated I/O request when calling ccw_device_start(). |
188 | 203 | ||
189 | int ccw_device_start(struct ccw_device *cdev, | 204 | :: |
190 | struct ccw1 *cpa, | 205 | |
191 | unsigned long intparm, | 206 | int ccw_device_start(struct ccw_device *cdev, |
192 | __u8 lpm, | 207 | struct ccw1 *cpa, |
193 | unsigned long flags); | 208 | unsigned long intparm, |
194 | int ccw_device_start_timeout(struct ccw_device *cdev, | 209 | __u8 lpm, |
195 | struct ccw1 *cpa, | 210 | unsigned long flags); |
196 | unsigned long intparm, | 211 | int ccw_device_start_timeout(struct ccw_device *cdev, |
197 | __u8 lpm, | 212 | struct ccw1 *cpa, |
198 | unsigned long flags, | 213 | unsigned long intparm, |
199 | int expires); | 214 | __u8 lpm, |
200 | int ccw_device_start_key(struct ccw_device *cdev, | 215 | unsigned long flags, |
201 | struct ccw1 *cpa, | 216 | int expires); |
202 | unsigned long intparm, | 217 | int ccw_device_start_key(struct ccw_device *cdev, |
203 | __u8 lpm, | 218 | struct ccw1 *cpa, |
204 | __u8 key, | 219 | unsigned long intparm, |
205 | unsigned long flags); | 220 | __u8 lpm, |
206 | int ccw_device_start_key_timeout(struct ccw_device *cdev, | 221 | __u8 key, |
207 | struct ccw1 *cpa, | 222 | unsigned long flags); |
208 | unsigned long intparm, | 223 | int ccw_device_start_key_timeout(struct ccw_device *cdev, |
209 | __u8 lpm, | 224 | struct ccw1 *cpa, |
210 | __u8 key, | 225 | unsigned long intparm, |
211 | unsigned long flags, | 226 | __u8 lpm, |
212 | int expires); | 227 | __u8 key, |
213 | 228 | unsigned long flags, | |
214 | cdev : ccw_device the I/O is destined for | 229 | int expires); |
215 | cpa : logical start address of channel program | 230 | |
216 | user_intparm : user specific interrupt information; will be presented | 231 | ============= ============================================================= |
217 | back to the device driver's interrupt handler. Allows a | 232 | cdev ccw_device the I/O is destined for |
218 | device driver to associate the interrupt with a | 233 | cpa logical start address of channel program |
219 | particular I/O request. | 234 | user_intparm user specific interrupt information; will be presented |
220 | lpm : defines the channel path to be used for a specific I/O | 235 | back to the device driver's interrupt handler. Allows a |
221 | request. A value of 0 will make cio use the opm. | 236 | device driver to associate the interrupt with a |
222 | key : the storage key to use for the I/O (useful for operating on a | 237 | particular I/O request. |
223 | storage with a storage key != default key) | 238 | lpm defines the channel path to be used for a specific I/O |
224 | flag : defines the action to be performed for I/O processing | 239 | request. A value of 0 will make cio use the opm. |
225 | expires : timeout value in jiffies. The common I/O layer will terminate | 240 | key the storage key to use for the I/O (useful for operating on a |
226 | the running program after this and call the interrupt handler | 241 | storage with a storage key != default key) |
227 | with ERR_PTR(-ETIMEDOUT) as irb. | 242 | flag defines the action to be performed for I/O processing |
228 | 243 | expires timeout value in jiffies. The common I/O layer will terminate | |
229 | Possible flag values are : | 244 | the running program after this and call the interrupt handler |
230 | 245 | with ERR_PTR(-ETIMEDOUT) as irb. | |
231 | DOIO_ALLOW_SUSPEND - channel program may become suspended | 246 | ============= ============================================================= |
232 | DOIO_DENY_PREFETCH - don't allow for CCW prefetch; usually | 247 | |
233 | this implies the channel program might | 248 | Possible flag values are: |
234 | become modified | 249 | |
235 | DOIO_SUPPRESS_INTER - don't call the handler on intermediate status | 250 | ========================= ============================================= |
236 | 251 | DOIO_ALLOW_SUSPEND channel program may become suspended | |
237 | The cpa parameter points to the first format 1 CCW of a channel program : | 252 | DOIO_DENY_PREFETCH don't allow for CCW prefetch; usually |
238 | 253 | this implies the channel program might | |
239 | struct ccw1 { | 254 | become modified |
240 | __u8 cmd_code;/* command code */ | 255 | DOIO_SUPPRESS_INTER don't call the handler on intermediate status |
241 | __u8 flags; /* flags, like IDA addressing, etc. */ | 256 | ========================= ============================================= |
242 | __u16 count; /* byte count */ | 257 | |
243 | __u32 cda; /* data address */ | 258 | The cpa parameter points to the first format 1 CCW of a channel program:: |
244 | } __attribute__ ((packed,aligned(8))); | 259 | |
245 | 260 | struct ccw1 { | |
246 | with the following CCW flags values defined : | 261 | __u8 cmd_code;/* command code */ |
247 | 262 | __u8 flags; /* flags, like IDA addressing, etc. */ | |
248 | CCW_FLAG_DC - data chaining | 263 | __u16 count; /* byte count */ |
249 | CCW_FLAG_CC - command chaining | 264 | __u32 cda; /* data address */ |
250 | CCW_FLAG_SLI - suppress incorrect length | 265 | } __attribute__ ((packed,aligned(8))); |
251 | CCW_FLAG_SKIP - skip | 266 | |
252 | CCW_FLAG_PCI - PCI | 267 | with the following CCW flags values defined: |
253 | CCW_FLAG_IDA - indirect addressing | 268 | |
254 | CCW_FLAG_SUSPEND - suspend | 269 | =================== ========================= |
270 | CCW_FLAG_DC data chaining | ||
271 | CCW_FLAG_CC command chaining | ||
272 | CCW_FLAG_SLI suppress incorrect length | ||
273 | CCW_FLAG_SKIP skip | ||
274 | CCW_FLAG_PCI PCI | ||
275 | CCW_FLAG_IDA indirect addressing | ||
276 | CCW_FLAG_SUSPEND suspend | ||
277 | =================== ========================= | ||
255 | 278 | ||
256 | 279 | ||
257 | Via ccw_device_set_options(), the device driver may specify the following | 280 | Via ccw_device_set_options(), the device driver may specify the following |
258 | options for the device: | 281 | options for the device: |
259 | 282 | ||
260 | DOIO_EARLY_NOTIFICATION - allow for early interrupt notification | 283 | ========================= ====================================== |
261 | DOIO_REPORT_ALL - report all interrupt conditions | 284 | DOIO_EARLY_NOTIFICATION allow for early interrupt notification |
285 | DOIO_REPORT_ALL report all interrupt conditions | ||
286 | ========================= ====================================== | ||
262 | 287 | ||
263 | 288 | ||
264 | The ccw_device_start() function returns : | 289 | The ccw_device_start() function returns: |
265 | 290 | ||
266 | 0 - successful completion or request successfully initiated | 291 | ======== ====================================================================== |
267 | -EBUSY - The device is currently processing a previous I/O request, or there is | 292 | 0 successful completion or request successfully initiated |
268 | a status pending at the device. | 293 | -EBUSY The device is currently processing a previous I/O request, or there is |
269 | -ENODEV - cdev is invalid, the device is not operational or the ccw_device is | 294 | a status pending at the device. |
270 | not online. | 295 | -ENODEV cdev is invalid, the device is not operational or the ccw_device is |
296 | not online. | ||
297 | ======== ====================================================================== | ||
271 | 298 | ||
272 | When the I/O request completes, the CDS first level interrupt handler will | 299 | When the I/O request completes, the CDS first level interrupt handler will |
273 | accumulate the status in a struct irb and then call the device interrupt handler. | 300 | accumulate the status in a struct irb and then call the device interrupt handler. |
274 | The intparm field will contain the value the device driver has associated with a | 301 | The intparm field will contain the value the device driver has associated with a |
275 | particular I/O request. If a pending device status was recognized, | 302 | particular I/O request. If a pending device status was recognized, |
276 | intparm will be set to 0 (zero). This may happen during I/O initiation or delayed | 303 | intparm will be set to 0 (zero). This may happen during I/O initiation or delayed |
277 | by an alert status notification. In any case this status is not related to the | 304 | by an alert status notification. In any case this status is not related to the |
278 | current (last) I/O request. In case of a delayed status notification no special | 305 | current (last) I/O request. In case of a delayed status notification no special |
@@ -282,9 +309,11 @@ never started, even though ccw_device_start() returned with successful completio | |||
282 | The irb may contain an error value, and the device driver should check for this | 309 | The irb may contain an error value, and the device driver should check for this |
283 | first: | 310 | first: |
284 | 311 | ||
285 | -ETIMEDOUT: the common I/O layer terminated the request after the specified | 312 | ========== ================================================================= |
286 | timeout value | 313 | -ETIMEDOUT the common I/O layer terminated the request after the specified |
287 | -EIO: the common I/O layer terminated the request due to an error state | 314 | timeout value |
315 | -EIO the common I/O layer terminated the request due to an error state | ||
316 | ========== ================================================================= | ||
288 | 317 | ||
289 | If the concurrent sense flag in the extended status word (esw) in the irb is | 318 | If the concurrent sense flag in the extended status word (esw) in the irb is |
290 | set, the field erw.scnt in the esw describes the number of device specific | 319 | set, the field erw.scnt in the esw describes the number of device specific |
@@ -294,6 +323,7 @@ sensing by the device driver itself is required. | |||
294 | The device interrupt handler can use the following definitions to investigate | 323 | The device interrupt handler can use the following definitions to investigate |
295 | the primary unit check source coded in sense byte 0 : | 324 | the primary unit check source coded in sense byte 0 : |
296 | 325 | ||
326 | ======================= ==== | ||
297 | SNS0_CMD_REJECT 0x80 | 327 | SNS0_CMD_REJECT 0x80 |
298 | SNS0_INTERVENTION_REQ 0x40 | 328 | SNS0_INTERVENTION_REQ 0x40 |
299 | SNS0_BUS_OUT_CHECK 0x20 | 329 | SNS0_BUS_OUT_CHECK 0x20 |
@@ -301,36 +331,41 @@ SNS0_EQUIPMENT_CHECK 0x10 | |||
301 | SNS0_DATA_CHECK 0x08 | 331 | SNS0_DATA_CHECK 0x08 |
302 | SNS0_OVERRUN 0x04 | 332 | SNS0_OVERRUN 0x04 |
303 | SNS0_INCOMPL_DOMAIN 0x01 | 333 | SNS0_INCOMPL_DOMAIN 0x01 |
334 | ======================= ==== | ||
304 | 335 | ||
305 | Depending on the device status, multiple of those values may be set together. | 336 | Depending on the device status, multiple of those values may be set together. |
306 | Please refer to the device specific documentation for details. | 337 | Please refer to the device specific documentation for details. |
307 | 338 | ||
308 | The irb->scsw.cstat field provides the (accumulated) subchannel status : | 339 | The irb->scsw.cstat field provides the (accumulated) subchannel status : |
309 | 340 | ||
310 | SCHN_STAT_PCI - program controlled interrupt | 341 | ========================= ============================ |
311 | SCHN_STAT_INCORR_LEN - incorrect length | 342 | SCHN_STAT_PCI program controlled interrupt |
312 | SCHN_STAT_PROG_CHECK - program check | 343 | SCHN_STAT_INCORR_LEN incorrect length |
313 | SCHN_STAT_PROT_CHECK - protection check | 344 | SCHN_STAT_PROG_CHECK program check |
314 | SCHN_STAT_CHN_DATA_CHK - channel data check | 345 | SCHN_STAT_PROT_CHECK protection check |
315 | SCHN_STAT_CHN_CTRL_CHK - channel control check | 346 | SCHN_STAT_CHN_DATA_CHK channel data check |
316 | SCHN_STAT_INTF_CTRL_CHK - interface control check | 347 | SCHN_STAT_CHN_CTRL_CHK channel control check |
317 | SCHN_STAT_CHAIN_CHECK - chaining check | 348 | SCHN_STAT_INTF_CTRL_CHK interface control check |
349 | SCHN_STAT_CHAIN_CHECK chaining check | ||
350 | ========================= ============================ | ||
318 | 351 | ||
319 | The irb->scsw.dstat field provides the (accumulated) device status : | 352 | The irb->scsw.dstat field provides the (accumulated) device status : |
320 | 353 | ||
321 | DEV_STAT_ATTENTION - attention | 354 | ===================== ================= |
322 | DEV_STAT_STAT_MOD - status modifier | 355 | DEV_STAT_ATTENTION attention |
323 | DEV_STAT_CU_END - control unit end | 356 | DEV_STAT_STAT_MOD status modifier |
324 | DEV_STAT_BUSY - busy | 357 | DEV_STAT_CU_END control unit end |
325 | DEV_STAT_CHN_END - channel end | 358 | DEV_STAT_BUSY busy |
326 | DEV_STAT_DEV_END - device end | 359 | DEV_STAT_CHN_END channel end |
327 | DEV_STAT_UNIT_CHECK - unit check | 360 | DEV_STAT_DEV_END device end |
328 | DEV_STAT_UNIT_EXCEP - unit exception | 361 | DEV_STAT_UNIT_CHECK unit check |
362 | DEV_STAT_UNIT_EXCEP unit exception | ||
363 | ===================== ================= | ||
329 | 364 | ||
330 | Please see the ESA/390 Principles of Operation manual for details on the | 365 | Please see the ESA/390 Principles of Operation manual for details on the |
331 | individual flag meanings. | 366 | individual flag meanings. |
332 | 367 | ||
333 | Usage Notes : | 368 | Usage Notes: |
334 | 369 | ||
335 | ccw_device_start() must be called disabled and with the ccw device lock held. | 370 | ccw_device_start() must be called disabled and with the ccw device lock held. |
336 | 371 | ||
@@ -374,32 +409,39 @@ secondary status without error (alert status) is presented, this indicates | |||
374 | successful completion for all overlapping ccw_device_start() requests that have | 409 | successful completion for all overlapping ccw_device_start() requests that have |
375 | been issued since the last secondary (final) status. | 410 | been issued since the last secondary (final) status. |
376 | 411 | ||
377 | Channel programs that intend to set the suspend flag on a channel command word | 412 | Channel programs that intend to set the suspend flag on a channel command word |
378 | (CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the | 413 | (CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the |
379 | suspend flag will cause a channel program check. At the time the channel program | 414 | suspend flag will cause a channel program check. At the time the channel program |
380 | becomes suspended an intermediate interrupt will be generated by the channel | 415 | becomes suspended an intermediate interrupt will be generated by the channel |
381 | subsystem. | 416 | subsystem. |
382 | 417 | ||
383 | ccw_device_resume() - Resume Channel Program Execution | 418 | ccw_device_resume() - Resume Channel Program Execution |
384 | 419 | ||
385 | If a device driver chooses to suspend the current channel program execution by | 420 | If a device driver chooses to suspend the current channel program execution by |
386 | setting the CCW suspend flag on a particular CCW, the channel program execution | 421 | setting the CCW suspend flag on a particular CCW, the channel program execution |
387 | is suspended. In order to resume channel program execution the CIO layer | 422 | is suspended. In order to resume channel program execution the CIO layer |
388 | provides the ccw_device_resume() routine. | 423 | provides the ccw_device_resume() routine. |
389 | 424 | ||
390 | int ccw_device_resume(struct ccw_device *cdev); | 425 | :: |
391 | 426 | ||
392 | cdev - ccw_device the resume operation is requested for | 427 | int ccw_device_resume(struct ccw_device *cdev); |
428 | |||
429 | ==== ================================================ | ||
430 | cdev ccw_device the resume operation is requested for | ||
431 | ==== ================================================ | ||
393 | 432 | ||
394 | The ccw_device_resume() function returns: | 433 | The ccw_device_resume() function returns: |
395 | 434 | ||
396 | 0 - suspended channel program is resumed | 435 | ========= ============================================== |
397 | -EBUSY - status pending | 436 | 0 suspended channel program is resumed |
398 | -ENODEV - cdev invalid or not-operational subchannel | 437 | -EBUSY status pending |
399 | -EINVAL - resume function not applicable | 438 | -ENODEV cdev invalid or not-operational subchannel |
400 | -ENOTCONN - there is no I/O request pending for completion | 439 | -EINVAL resume function not applicable |
440 | -ENOTCONN there is no I/O request pending for completion | ||
441 | ========= ============================================== | ||
401 | 442 | ||
402 | Usage Notes: | 443 | Usage Notes: |
444 | |||
403 | Please have a look at the ccw_device_start() usage notes for more details on | 445 | Please have a look at the ccw_device_start() usage notes for more details on |
404 | suspended channel programs. | 446 | suspended channel programs. |
405 | 447 | ||
@@ -412,22 +454,28 @@ command is provided. | |||
412 | 454 | ||
413 | ccw_device_halt() must be called disabled and with the ccw device lock held. | 455 | ccw_device_halt() must be called disabled and with the ccw device lock held. |
414 | 456 | ||
415 | int ccw_device_halt(struct ccw_device *cdev, | 457 | :: |
416 | unsigned long intparm); | 458 | |
459 | int ccw_device_halt(struct ccw_device *cdev, | ||
460 | unsigned long intparm); | ||
417 | 461 | ||
418 | cdev : ccw_device the halt operation is requested for | 462 | ======= ===================================================== |
419 | intparm : interruption parameter; value is only used if no I/O | 463 | cdev ccw_device the halt operation is requested for |
420 | is outstanding, otherwise the intparm associated with | 464 | intparm interruption parameter; value is only used if no I/O |
421 | the I/O request is returned | 465 | is outstanding, otherwise the intparm associated with |
466 | the I/O request is returned | ||
467 | ======= ===================================================== | ||
422 | 468 | ||
423 | The ccw_device_halt() function returns : | 469 | The ccw_device_halt() function returns: |
424 | 470 | ||
425 | 0 - request successfully initiated | 471 | ======= ============================================================== |
426 | -EBUSY - the device is currently busy, or status pending. | 472 | 0 request successfully initiated |
427 | -ENODEV - cdev invalid. | 473 | -EBUSY the device is currently busy, or status pending. |
428 | -EINVAL - The device is not operational or the ccw device is not online. | 474 | -ENODEV cdev invalid. |
475 | -EINVAL The device is not operational or the ccw device is not online. | ||
476 | ======= ============================================================== | ||
429 | 477 | ||
430 | Usage Notes : | 478 | Usage Notes: |
431 | 479 | ||
432 | A device driver may write a never-ending channel program by writing a channel | 480 | A device driver may write a never-ending channel program by writing a channel |
433 | program that at its end loops back to its beginning by means of a transfer in | 481 | program that at its end loops back to its beginning by means of a transfer in |
@@ -438,25 +486,34 @@ can then perform an appropriate action. Prior to interrupt of an outstanding | |||
438 | read to a network device (with or without PCI flag) a ccw_device_halt() | 486 | read to a network device (with or without PCI flag) a ccw_device_halt() |
439 | is required to end the pending operation. | 487 | is required to end the pending operation. |
440 | 488 | ||
441 | ccw_device_clear() - Terminage I/O Request Processing | 489 | :: |
490 | |||
491 | ccw_device_clear() - Terminage I/O Request Processing | ||
442 | 492 | ||
443 | In order to terminate all I/O processing at the subchannel, the clear subchannel | 493 | In order to terminate all I/O processing at the subchannel, the clear subchannel |
444 | (CSCH) command is used. It can be issued via ccw_device_clear(). | 494 | (CSCH) command is used. It can be issued via ccw_device_clear(). |
445 | 495 | ||
446 | ccw_device_clear() must be called disabled and with the ccw device lock held. | 496 | ccw_device_clear() must be called disabled and with the ccw device lock held. |
447 | 497 | ||
448 | int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm); | 498 | :: |
499 | |||
500 | int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm); | ||
449 | 501 | ||
450 | cdev: ccw_device the clear operation is requested for | 502 | ======= =============================================== |
451 | intparm: interruption parameter (see ccw_device_halt()) | 503 | cdev ccw_device the clear operation is requested for |
504 | intparm interruption parameter (see ccw_device_halt()) | ||
505 | ======= =============================================== | ||
452 | 506 | ||
453 | The ccw_device_clear() function returns: | 507 | The ccw_device_clear() function returns: |
454 | 508 | ||
455 | 0 - request successfully initiated | 509 | ======= ============================================================== |
456 | -ENODEV - cdev invalid | 510 | 0 request successfully initiated |
457 | -EINVAL - The device is not operational or the ccw device is not online. | 511 | -ENODEV cdev invalid |
512 | -EINVAL The device is not operational or the ccw device is not online. | ||
513 | ======= ============================================================== | ||
458 | 514 | ||
459 | Miscellaneous Support Routines | 515 | Miscellaneous Support Routines |
516 | ------------------------------ | ||
460 | 517 | ||
461 | This chapter describes various routines to be used in a Linux/390 device | 518 | This chapter describes various routines to be used in a Linux/390 device |
462 | driver programming environment. | 519 | driver programming environment. |
@@ -466,7 +523,8 @@ get_ccwdev_lock() | |||
466 | Get the address of the device specific lock. This is then used in | 523 | Get the address of the device specific lock. This is then used in |
467 | spin_lock() / spin_unlock() calls. | 524 | spin_lock() / spin_unlock() calls. |
468 | 525 | ||
526 | :: | ||
469 | 527 | ||
470 | __u8 ccw_device_get_path_mask(struct ccw_device *cdev); | 528 | __u8 ccw_device_get_path_mask(struct ccw_device *cdev); |
471 | 529 | ||
472 | Get the mask of the path currently available for cdev. | 530 | Get the mask of the path currently available for cdev. |
diff --git a/Documentation/s390/CommonIO b/Documentation/s390/common_io.rst index 6e0f63f343b4..846485681ce7 100644 --- a/Documentation/s390/CommonIO +++ b/Documentation/s390/common_io.rst | |||
@@ -1,5 +1,9 @@ | |||
1 | S/390 common I/O-Layer - command line parameters, procfs and debugfs entries | 1 | ====================== |
2 | ============================================================================ | 2 | S/390 common I/O-Layer |
3 | ====================== | ||
4 | |||
5 | command line parameters, procfs and debugfs entries | ||
6 | =================================================== | ||
3 | 7 | ||
4 | Command line parameters | 8 | Command line parameters |
5 | ----------------------- | 9 | ----------------------- |
@@ -13,7 +17,7 @@ Command line parameters | |||
13 | device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>} | 17 | device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>} |
14 | 18 | ||
15 | The given devices will be ignored by the common I/O-layer; no detection | 19 | The given devices will be ignored by the common I/O-layer; no detection |
16 | and device sensing will be done on any of those devices. The subchannel to | 20 | and device sensing will be done on any of those devices. The subchannel to |
17 | which the device in question is attached will be treated as if no device was | 21 | which the device in question is attached will be treated as if no device was |
18 | attached. | 22 | attached. |
19 | 23 | ||
@@ -28,14 +32,20 @@ Command line parameters | |||
28 | keywords can be used to refer to the CCW based boot device and CCW console | 32 | keywords can be used to refer to the CCW based boot device and CCW console |
29 | device respectively (these are probably useful only when combined with the '!' | 33 | device respectively (these are probably useful only when combined with the '!' |
30 | operator). The '!' operator will cause the I/O-layer to _not_ ignore a device. | 34 | operator). The '!' operator will cause the I/O-layer to _not_ ignore a device. |
31 | The command line is parsed from left to right. | 35 | The command line |
36 | is parsed from left to right. | ||
37 | |||
38 | For example:: | ||
32 | 39 | ||
33 | For example, | ||
34 | cio_ignore=0.0.0023-0.0.0042,0.0.4711 | 40 | cio_ignore=0.0.0023-0.0.0042,0.0.4711 |
41 | |||
35 | will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device | 42 | will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device |
36 | 0.0.4711, if detected. | 43 | 0.0.4711, if detected. |
37 | As another example, | 44 | |
45 | As another example:: | ||
46 | |||
38 | cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02 | 47 | cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02 |
48 | |||
39 | will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02. | 49 | will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02. |
40 | 50 | ||
41 | By default, no devices are ignored. | 51 | By default, no devices are ignored. |
@@ -48,40 +58,45 @@ Command line parameters | |||
48 | 58 | ||
49 | Lists the ranges of devices (by bus id) which are ignored by common I/O. | 59 | Lists the ranges of devices (by bus id) which are ignored by common I/O. |
50 | 60 | ||
51 | You can un-ignore certain or all devices by piping to /proc/cio_ignore. | 61 | You can un-ignore certain or all devices by piping to /proc/cio_ignore. |
52 | "free all" will un-ignore all ignored devices, | 62 | "free all" will un-ignore all ignored devices, |
53 | "free <device range>, <device range>, ..." will un-ignore the specified | 63 | "free <device range>, <device range>, ..." will un-ignore the specified |
54 | devices. | 64 | devices. |
55 | 65 | ||
56 | For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored, | 66 | For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored, |
67 | |||
57 | - echo free 0.0.0030-0.0.0032 > /proc/cio_ignore | 68 | - echo free 0.0.0030-0.0.0032 > /proc/cio_ignore |
58 | will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023 | 69 | will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023 |
59 | to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored; | 70 | to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored; |
60 | - echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device | 71 | - echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device |
61 | 0.0.0041; | 72 | 0.0.0041; |
62 | - echo free all > /proc/cio_ignore will un-ignore all remaining ignored | 73 | - echo free all > /proc/cio_ignore will un-ignore all remaining ignored |
63 | devices. | 74 | devices. |
64 | 75 | ||
65 | When a device is un-ignored, device recognition and sensing is performed and | 76 | When a device is un-ignored, device recognition and sensing is performed and |
66 | the device driver will be notified if possible, so the device will become | 77 | the device driver will be notified if possible, so the device will become |
67 | available to the system. Note that un-ignoring is performed asynchronously. | 78 | available to the system. Note that un-ignoring is performed asynchronously. |
68 | 79 | ||
69 | You can also add ranges of devices to be ignored by piping to | 80 | You can also add ranges of devices to be ignored by piping to |
70 | /proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the | 81 | /proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the |
71 | specified devices. | 82 | specified devices. |
72 | 83 | ||
73 | Note: While already known devices can be added to the list of devices to be | 84 | Note: While already known devices can be added to the list of devices to be |
74 | ignored, there will be no effect on then. However, if such a device | 85 | ignored, there will be no effect on then. However, if such a device |
75 | disappears and then reappears, it will then be ignored. To make | 86 | disappears and then reappears, it will then be ignored. To make |
76 | known devices go away, you need the "purge" command (see below). | 87 | known devices go away, you need the "purge" command (see below). |
77 | 88 | ||
78 | For example, | 89 | For example:: |
90 | |||
79 | "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore" | 91 | "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore" |
92 | |||
80 | will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored | 93 | will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored |
81 | devices. | 94 | devices. |
82 | 95 | ||
83 | You can remove already known but now ignored devices via | 96 | You can remove already known but now ignored devices via:: |
97 | |||
84 | "echo purge > /proc/cio_ignore" | 98 | "echo purge > /proc/cio_ignore" |
99 | |||
85 | All devices ignored but still registered and not online (= not in use) | 100 | All devices ignored but still registered and not online (= not in use) |
86 | will be deregistered and thus removed from the system. | 101 | will be deregistered and thus removed from the system. |
87 | 102 | ||
@@ -115,11 +130,11 @@ debugfs entries | |||
115 | Various debug messages from the common I/O-layer. | 130 | Various debug messages from the common I/O-layer. |
116 | 131 | ||
117 | - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii | 132 | - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii |
118 | Logs the calling of functions in the common I/O-layer and, if applicable, | 133 | Logs the calling of functions in the common I/O-layer and, if applicable, |
119 | which subchannel they were called for, as well as dumps of some data | 134 | which subchannel they were called for, as well as dumps of some data |
120 | structures (like irb in an error case). | 135 | structures (like irb in an error case). |
121 | 136 | ||
122 | The level of logging can be changed to be more or less verbose by piping to | 137 | The level of logging can be changed to be more or less verbose by piping to |
123 | /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the | 138 | /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the |
124 | documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt) | 139 | documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst) |
125 | for details. | 140 | for details. |
diff --git a/Documentation/s390/DASD b/Documentation/s390/dasd.rst index 9963f1e9c98a..9e22247285c8 100644 --- a/Documentation/s390/DASD +++ b/Documentation/s390/dasd.rst | |||
@@ -1,4 +1,6 @@ | |||
1 | ================== | ||
1 | DASD device driver | 2 | DASD device driver |
3 | ================== | ||
2 | 4 | ||
3 | S/390's disk devices (DASDs) are managed by Linux via the DASD device | 5 | S/390's disk devices (DASDs) are managed by Linux via the DASD device |
4 | driver. It is valid for all types of DASDs and represents them to | 6 | driver. It is valid for all types of DASDs and represents them to |
@@ -14,14 +16,14 @@ parameters are to be given in hexadecimal notation without a leading | |||
14 | If you supply kernel parameters the different instances are processed | 16 | If you supply kernel parameters the different instances are processed |
15 | in order of appearance and a minor number is reserved for any device | 17 | in order of appearance and a minor number is reserved for any device |
16 | covered by the supplied range up to 64 volumes. Additional DASDs are | 18 | covered by the supplied range up to 64 volumes. Additional DASDs are |
17 | ignored. If you do not supply the 'dasd=' kernel parameter at all, the | 19 | ignored. If you do not supply the 'dasd=' kernel parameter at all, the |
18 | DASD driver registers all supported DASDs of your system to a minor | 20 | DASD driver registers all supported DASDs of your system to a minor |
19 | number in ascending order of the subchannel number. | 21 | number in ascending order of the subchannel number. |
20 | 22 | ||
21 | The driver currently supports ECKD-devices and there are stubs for | 23 | The driver currently supports ECKD-devices and there are stubs for |
22 | support of the FBA and CKD architectures. For the FBA architecture | 24 | support of the FBA and CKD architectures. For the FBA architecture |
23 | only some smart data structures are missing to make the support | 25 | only some smart data structures are missing to make the support |
24 | complete. | 26 | complete. |
25 | We performed our testing on 3380 and 3390 type disks of different | 27 | We performed our testing on 3380 and 3390 type disks of different |
26 | sizes, under VM and on the bare hardware (LPAR), using internal disks | 28 | sizes, under VM and on the bare hardware (LPAR), using internal disks |
27 | of the multiprise as well as a RAMAC virtual array. Disks exported by | 29 | of the multiprise as well as a RAMAC virtual array. Disks exported by |
@@ -34,19 +36,22 @@ accessibility of the DASD from other OSs. In a later stage we will | |||
34 | provide support of partitions, maybe VTOC oriented or using a kind of | 36 | provide support of partitions, maybe VTOC oriented or using a kind of |
35 | partition table in the label record. | 37 | partition table in the label record. |
36 | 38 | ||
37 | USAGE | 39 | Usage |
40 | ===== | ||
38 | 41 | ||
39 | -Low-level format (?CKD only) | 42 | -Low-level format (?CKD only) |
40 | For using an ECKD-DASD as a Linux harddisk you have to low-level | 43 | For using an ECKD-DASD as a Linux harddisk you have to low-level |
41 | format the tracks by issuing the BLKDASDFORMAT-ioctl on that | 44 | format the tracks by issuing the BLKDASDFORMAT-ioctl on that |
42 | device. This will erase any data on that volume including IBM volume | 45 | device. This will erase any data on that volume including IBM volume |
43 | labels, VTOCs etc. The ioctl may take a 'struct format_data *' or | 46 | labels, VTOCs etc. The ioctl may take a `struct format_data *` or |
44 | 'NULL' as an argument. | 47 | 'NULL' as an argument:: |
45 | typedef struct { | 48 | |
49 | typedef struct { | ||
46 | int start_unit; | 50 | int start_unit; |
47 | int stop_unit; | 51 | int stop_unit; |
48 | int blksize; | 52 | int blksize; |
49 | } format_data_t; | 53 | } format_data_t; |
54 | |||
50 | When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole | 55 | When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole |
51 | disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit | 56 | disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit |
52 | and stop_unit are the first and last track to be formatted. If | 57 | and stop_unit are the first and last track to be formatted. If |
@@ -56,17 +61,23 @@ up to the last track. blksize can be any power of two between 512 and | |||
56 | 1kB blocks anyway and you gain approx. 50% of capacity increasing your | 61 | 1kB blocks anyway and you gain approx. 50% of capacity increasing your |
57 | blksize from 512 byte to 1kB. | 62 | blksize from 512 byte to 1kB. |
58 | 63 | ||
59 | -Make a filesystem | 64 | Make a filesystem |
65 | ================= | ||
66 | |||
60 | Then you can mk??fs the filesystem of your choice on that volume or | 67 | Then you can mk??fs the filesystem of your choice on that volume or |
61 | partition. For reasons of sanity you should build your filesystem on | 68 | partition. For reasons of sanity you should build your filesystem on |
62 | the partition /dev/dd?1 instead of the whole volume. You only lose 3kB | 69 | the partition /dev/dd?1 instead of the whole volume. You only lose 3kB |
63 | but may be sure that you can reuse your data after introduction of a | 70 | but may be sure that you can reuse your data after introduction of a |
64 | real partition table. | 71 | real partition table. |
65 | 72 | ||
66 | BUGS: | 73 | Bugs |
74 | ==== | ||
75 | |||
67 | - Performance sometimes is rather low because we don't fully exploit clustering | 76 | - Performance sometimes is rather low because we don't fully exploit clustering |
68 | 77 | ||
69 | TODO-List: | 78 | TODO-List |
79 | ========= | ||
80 | |||
70 | - Add IBM'S Disk layout to genhd | 81 | - Add IBM'S Disk layout to genhd |
71 | - Enhance driver to use more than one major number | 82 | - Enhance driver to use more than one major number |
72 | - Enable usage as a module | 83 | - Enable usage as a module |
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/debugging390.rst index c35804c238ad..d49305fd5e1a 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/debugging390.rst | |||
@@ -1,9 +1,12 @@ | |||
1 | ============================================= | ||
2 | Debugging on Linux for s/390 & z/Architecture | ||
3 | ============================================= | ||
1 | 4 | ||
2 | Debugging on Linux for s/390 & z/Architecture | 5 | Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) |
3 | by | 6 | |
4 | Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) | 7 | Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation |
5 | Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation | 8 | |
6 | Best viewed with fixed width fonts | 9 | .. Best viewed with fixed width fonts |
7 | 10 | ||
8 | Overview of Document: | 11 | Overview of Document: |
9 | ===================== | 12 | ===================== |
@@ -17,32 +20,32 @@ It is intended like the Enterprise Systems Architecture/390 Reference Summary | |||
17 | to be printed out & used as a quick cheat sheet self help style reference when | 20 | to be printed out & used as a quick cheat sheet self help style reference when |
18 | problems occur. | 21 | problems occur. |
19 | 22 | ||
20 | Contents | 23 | .. Contents |
21 | ======== | 24 | ======== |
22 | Register Set | 25 | Register Set |
23 | Address Spaces on Intel Linux | 26 | Address Spaces on Intel Linux |
24 | Address Spaces on Linux for s/390 & z/Architecture | 27 | Address Spaces on Linux for s/390 & z/Architecture |
25 | The Linux for s/390 & z/Architecture Kernel Task Structure | 28 | The Linux for s/390 & z/Architecture Kernel Task Structure |
26 | Register Usage & Stackframes on Linux for s/390 & z/Architecture | 29 | Register Usage & Stackframes on Linux for s/390 & z/Architecture |
27 | A sample program with comments | 30 | A sample program with comments |
28 | Compiling programs for debugging on Linux for s/390 & z/Architecture | 31 | Compiling programs for debugging on Linux for s/390 & z/Architecture |
29 | Debugging under VM | 32 | Debugging under VM |
30 | s/390 & z/Architecture IO Overview | 33 | s/390 & z/Architecture IO Overview |
31 | Debugging IO on s/390 & z/Architecture under VM | 34 | Debugging IO on s/390 & z/Architecture under VM |
32 | GDB on s/390 & z/Architecture | 35 | GDB on s/390 & z/Architecture |
33 | Stack chaining in gdb by hand | 36 | Stack chaining in gdb by hand |
34 | Examining core dumps | 37 | Examining core dumps |
35 | ldd | 38 | ldd |
36 | Debugging modules | 39 | Debugging modules |
37 | The proc file system | 40 | The proc file system |
38 | SysRq | 41 | SysRq |
39 | References | 42 | References |
40 | Special Thanks | 43 | Special Thanks |
41 | 44 | ||
42 | Register Set | 45 | Register Set |
43 | ============ | 46 | ============ |
44 | The current architectures have the following registers. | 47 | The current architectures have the following registers. |
45 | 48 | ||
46 | 16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture, | 49 | 16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture, |
47 | r0-r15 (or gpr0-gpr15), used for arithmetic and addressing. | 50 | r0-r15 (or gpr0-gpr15), used for arithmetic and addressing. |
48 | 51 | ||
@@ -59,20 +62,22 @@ Access register 0 (and access register 1 on z/Architecture, which needs a | |||
59 | 64 bit pointer) is currently used by the pthread library as a pointer to | 62 | 64 bit pointer) is currently used by the pthread library as a pointer to |
60 | the current running threads private area. | 63 | the current running threads private area. |
61 | 64 | ||
62 | 16 64 bit floating point registers (fp0-fp15 ) IEEE & HFP floating | 65 | 16 64-bit floating point registers (fp0-fp15 ) IEEE & HFP floating |
63 | point format compliant on G5 upwards & a Floating point control reg (FPC) | 66 | point format compliant on G5 upwards & a Floating point control reg (FPC) |
64 | 4 64 bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines. | 67 | |
68 | 4 64-bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines. | ||
69 | |||
65 | Note: | 70 | Note: |
66 | Linux (currently) always uses IEEE & emulates G5 IEEE format on older machines, | 71 | Linux (currently) always uses IEEE & emulates G5 IEEE format on older |
67 | ( provided the kernel is configured for this ). | 72 | machines, ( provided the kernel is configured for this ). |
68 | 73 | ||
69 | 74 | ||
70 | The PSW is the most important register on the machine it | 75 | The PSW is the most important register on the machine it |
71 | is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of | 76 | is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of |
72 | a program counter (pc), condition code register,memory space designator. | 77 | a program counter (pc), condition code register,memory space designator. |
73 | In IBM standard notation I am counting bit 0 as the MSB. | 78 | In IBM standard notation I am counting bit 0 as the MSB. |
74 | It has several advantages over a normal program counter | 79 | It has several advantages over a normal program counter |
75 | in that you can change address translation & program counter | 80 | in that you can change address translation & program counter |
76 | in a single instruction. To change address translation, | 81 | in a single instruction. To change address translation, |
77 | e.g. switching address translation off requires that you | 82 | e.g. switching address translation off requires that you |
78 | have a logical=physical mapping for the address you are | 83 | have a logical=physical mapping for the address you are |
@@ -206,14 +211,18 @@ It exists between the real addresses 0-4096 on s/390 and between 0-8192 on | |||
206 | z/Architecture and is exchanged with one page on s/390 or two pages on | 211 | z/Architecture and is exchanged with one page on s/390 or two pages on |
207 | z/Architecture in absolute storage by the set prefix instruction during Linux | 212 | z/Architecture in absolute storage by the set prefix instruction during Linux |
208 | startup. | 213 | startup. |
214 | |||
209 | This page is mapped to a different prefix for each processor in an SMP | 215 | This page is mapped to a different prefix for each processor in an SMP |
210 | configuration (assuming the OS designer is sane of course). | 216 | configuration (assuming the OS designer is sane of course). |
217 | |||
211 | Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on | 218 | Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on |
212 | z/Architecture are used by the processor itself for holding such information | 219 | z/Architecture are used by the processor itself for holding such information |
213 | as exception indications and entry points for exceptions. | 220 | as exception indications and entry points for exceptions. |
221 | |||
214 | Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and | 222 | Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and |
215 | z/Architecture (there is a gap on z/Architecture currently between 0xc00 and | 223 | z/Architecture (there is a gap on z/Architecture currently between 0xc00 and |
216 | 0x1000, too, which is used by Linux). | 224 | 0x1000, too, which is used by Linux). |
225 | |||
217 | The closest thing to this on traditional architectures is the interrupt | 226 | The closest thing to this on traditional architectures is the interrupt |
218 | vector table. This is a good thing & does simplify some of the kernel coding | 227 | vector table. This is a good thing & does simplify some of the kernel coding |
219 | however it means that we now cannot catch stray NULL pointers in the | 228 | however it means that we now cannot catch stray NULL pointers in the |
@@ -225,27 +234,29 @@ Address Spaces on Intel Linux | |||
225 | ============================= | 234 | ============================= |
226 | 235 | ||
227 | The traditional Intel Linux is approximately mapped as follows forgive | 236 | The traditional Intel Linux is approximately mapped as follows forgive |
228 | the ascii art. | 237 | the ascii art:: |
229 | 0xFFFFFFFF 4GB Himem ***************** | 238 | |
230 | * * | 239 | 0xFFFFFFFF 4GB Himem ***************** |
231 | * Kernel Space * | 240 | * * |
232 | * * | 241 | * Kernel Space * |
233 | ***************** **************** | 242 | * * |
234 | User Space Himem * User Stack * * * | 243 | ***************** **************** |
235 | (typically 0xC0000000 3GB ) ***************** * * | 244 | User Space Himem * User Stack * * * |
236 | * Shared Libs * * Next Process * | 245 | (typically 0xC0000000 3GB ) ***************** * * |
237 | ***************** * to * | 246 | * Shared Libs * * Next Process * |
238 | * * <== * Run * <== | 247 | ***************** * to * |
239 | * User Program * * * | 248 | * * <== * Run * <== |
240 | * Data BSS * * * | 249 | * User Program * * * |
241 | * Text * * * | 250 | * Data BSS * * * |
242 | * Sections * * * | 251 | * Text * * * |
243 | 0x00000000 ***************** **************** | 252 | * Sections * * * |
253 | 0x00000000 ***************** **************** | ||
244 | 254 | ||
245 | Now it is easy to see that on Intel it is quite easy to recognise a kernel | 255 | Now it is easy to see that on Intel it is quite easy to recognise a kernel |
246 | address as being one greater than user space himem (in this case 0xC0000000), | 256 | address as being one greater than user space himem (in this case 0xC0000000), |
247 | and addresses of less than this are the ones in the current running program on | 257 | and addresses of less than this are the ones in the current running program on |
248 | this processor (if an smp box). | 258 | this processor (if an smp box). |
259 | |||
249 | If using the virtual machine ( VM ) as a debugger it is quite difficult to | 260 | If using the virtual machine ( VM ) as a debugger it is quite difficult to |
250 | know which user process is running as the address space you are looking at | 261 | know which user process is running as the address space you are looking at |
251 | could be from any process in the run queue. | 262 | could be from any process in the run queue. |
@@ -256,6 +267,7 @@ of Real Address=Virtual Address-User Space Himem. | |||
256 | This means that on Intel the kernel linux can typically only address | 267 | This means that on Intel the kernel linux can typically only address |
257 | Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines | 268 | Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines |
258 | can typically use. | 269 | can typically use. |
270 | |||
259 | They can lower User Himem to 2GB or lower & thus be | 271 | They can lower User Himem to 2GB or lower & thus be |
260 | able to use 2GB of RAM however this shrinks the maximum size | 272 | able to use 2GB of RAM however this shrinks the maximum size |
261 | of User Space from 3GB to 2GB they have a no win limit of 4GB unless | 273 | of User Space from 3GB to 2GB they have a no win limit of 4GB unless |
@@ -264,31 +276,31 @@ they go to 64 Bit. | |||
264 | 276 | ||
265 | On 390 our limitations & strengths make us slightly different. | 277 | On 390 our limitations & strengths make us slightly different. |
266 | For backward compatibility we are only allowed use 31 bits (2GB) | 278 | For backward compatibility we are only allowed use 31 bits (2GB) |
267 | of our 32 bit addresses, however, we use entirely separate address | 279 | of our 32 bit addresses, however, we use entirely separate address |
268 | spaces for the user & kernel. | 280 | spaces for the user & kernel. |
269 | 281 | ||
270 | This means we can support 2GB of non Extended RAM on s/390, & more | 282 | This means we can support 2GB of non Extended RAM on s/390, & more |
271 | with the Extended memory management swap device & | 283 | with the Extended memory management swap device & |
272 | currently 4TB of physical memory currently on z/Architecture. | 284 | currently 4TB of physical memory currently on z/Architecture. |
273 | 285 | ||
274 | 286 | ||
275 | Address Spaces on Linux for s/390 & z/Architecture | 287 | Address Spaces on Linux for s/390 & z/Architecture |
276 | ================================================== | 288 | ================================================== |
277 | 289 | ||
278 | Our addressing scheme is basically as follows: | 290 | Our addressing scheme is basically as follows:: |
279 | 291 | ||
280 | Primary Space Home Space | 292 | Primary Space Home Space |
281 | Himem 0x7fffffff 2GB on s/390 ***************** **************** | 293 | Himem 0x7fffffff 2GB on s/390 ***************** **************** |
282 | currently 0x3ffffffffff (2^42)-1 * User Stack * * * | 294 | currently 0x3ffffffffff (2^42)-1 * User Stack * * * |
283 | on z/Architecture. ***************** * * | 295 | on z/Architecture. ***************** * * |
284 | * Shared Libs * * * | 296 | * Shared Libs * * * |
285 | ***************** * * | 297 | ***************** * * |
286 | * * * Kernel * | 298 | * * * Kernel * |
287 | * User Program * * * | 299 | * User Program * * * |
288 | * Data BSS * * * | 300 | * Data BSS * * * |
289 | * Text * * * | 301 | * Text * * * |
290 | * Sections * * * | 302 | * Sections * * * |
291 | 0x00000000 ***************** **************** | 303 | 0x00000000 ***************** **************** |
292 | 304 | ||
293 | This also means that we need to look at the PSW problem state bit and the | 305 | This also means that we need to look at the PSW problem state bit and the |
294 | addressing mode to decide whether we are looking at user or kernel space. | 306 | addressing mode to decide whether we are looking at user or kernel space. |
@@ -304,20 +316,25 @@ instruction on a user space address is performed. | |||
304 | When also looking at the ASCE control registers, this means: | 316 | When also looking at the ASCE control registers, this means: |
305 | 317 | ||
306 | User space: | 318 | User space: |
319 | |||
307 | - runs in primary or access register mode | 320 | - runs in primary or access register mode |
308 | - cr1 contains the user asce | 321 | - cr1 contains the user asce |
309 | - cr7 contains the user asce | 322 | - cr7 contains the user asce |
310 | - cr13 contains the kernel asce | 323 | - cr13 contains the kernel asce |
311 | 324 | ||
312 | Kernel space: | 325 | Kernel space: |
326 | |||
313 | - runs in home space mode | 327 | - runs in home space mode |
314 | - cr1 contains the user or kernel asce | 328 | - cr1 contains the user or kernel asce |
315 | -> the kernel asce is loaded when a uaccess requires primary or | 329 | |
316 | secondary address mode | 330 | - the kernel asce is loaded when a uaccess requires primary or |
331 | secondary address mode | ||
332 | |||
317 | - cr7 contains the user or kernel asce, (changed with set_fs()) | 333 | - cr7 contains the user or kernel asce, (changed with set_fs()) |
318 | - cr13 contains the kernel asce | 334 | - cr13 contains the kernel asce |
319 | 335 | ||
320 | In case of uaccess the kernel changes to: | 336 | In case of uaccess the kernel changes to: |
337 | |||
321 | - primary space mode in case of a uaccess (copy_to_user) and uses | 338 | - primary space mode in case of a uaccess (copy_to_user) and uses |
322 | e.g. the mvcp instruction to access user space. However the kernel | 339 | e.g. the mvcp instruction to access user space. However the kernel |
323 | will stay in home space mode if the mvcos instruction is available | 340 | will stay in home space mode if the mvcos instruction is available |
@@ -337,41 +354,44 @@ Virtual Addresses on s/390 & z/Architecture | |||
337 | A virtual address on s/390 is made up of 3 parts | 354 | A virtual address on s/390 is made up of 3 parts |
338 | The SX (segment index, roughly corresponding to the PGD & PMD in Linux | 355 | The SX (segment index, roughly corresponding to the PGD & PMD in Linux |
339 | terminology) being bits 1-11. | 356 | terminology) being bits 1-11. |
357 | |||
340 | The PX (page index, corresponding to the page table entry (pte) in Linux | 358 | The PX (page index, corresponding to the page table entry (pte) in Linux |
341 | terminology) being bits 12-19. | 359 | terminology) being bits 12-19. |
360 | |||
342 | The remaining bits BX (the byte index are the offset in the page ) | 361 | The remaining bits BX (the byte index are the offset in the page ) |
343 | i.e. bits 20 to 31. | 362 | i.e. bits 20 to 31. |
344 | 363 | ||
345 | On z/Architecture in linux we currently make up an address from 4 parts. | 364 | On z/Architecture in linux we currently make up an address from 4 parts. |
346 | The region index bits (RX) 0-32 we currently use bits 22-32 | 365 | |
347 | The segment index (SX) being bits 33-43 | 366 | - The region index bits (RX) 0-32 we currently use bits 22-32 |
348 | The page index (PX) being bits 44-51 | 367 | - The segment index (SX) being bits 33-43 |
349 | The byte index (BX) being bits 52-63 | 368 | - The page index (PX) being bits 44-51 |
369 | - The byte index (BX) being bits 52-63 | ||
350 | 370 | ||
351 | Notes: | 371 | Notes: |
352 | 1) s/390 has no PMD so the PMD is really the PGD also. | 372 | 1) s/390 has no PMD so the PMD is really the PGD also. |
353 | A lot of this stuff is defined in pgtable.h. | 373 | A lot of this stuff is defined in pgtable.h. |
354 | 374 | ||
355 | 2) Also seeing as s/390's page indexes are only 1k in size | 375 | 2) Also seeing as s/390's page indexes are only 1k in size |
356 | (bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k ) | 376 | (bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k ) |
357 | to make the best use of memory by updating 4 segment indices | 377 | to make the best use of memory by updating 4 segment indices |
358 | entries each time we mess with a PMD & use offsets | 378 | entries each time we mess with a PMD & use offsets |
359 | 0,1024,2048 & 3072 in this page as for our segment indexes. | 379 | 0,1024,2048 & 3072 in this page as for our segment indexes. |
360 | On z/Architecture our page indexes are now 2k in size | 380 | On z/Architecture our page indexes are now 2k in size |
361 | ( bits 12-19 x 8 bytes per pte ) we do a similar trick | 381 | ( bits 12-19 x 8 bytes per pte ) we do a similar trick |
362 | but only mess with 2 segment indices each time we mess with | 382 | but only mess with 2 segment indices each time we mess with |
363 | a PMD. | 383 | a PMD. |
364 | 384 | ||
365 | 3) As z/Architecture supports up to a massive 5-level page table lookup we | 385 | 3) As z/Architecture supports up to a massive 5-level page table lookup we |
366 | can only use 3 currently on Linux ( as this is all the generic kernel | 386 | can only use 3 currently on Linux ( as this is all the generic kernel |
367 | currently supports ) however this may change in future | 387 | currently supports ) however this may change in future |
368 | this allows us to access ( according to my sums ) | 388 | this allows us to access ( according to my sums ) |
369 | 4TB of virtual storage per process i.e. | 389 | 4TB of virtual storage per process i.e. |
370 | 4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes, | 390 | 4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes, |
371 | enough for another 2 or 3 of years I think :-). | 391 | enough for another 2 or 3 of years I think :-). |
372 | to do this we use a region-third-table designation type in | 392 | to do this we use a region-third-table designation type in |
373 | our address space control registers. | 393 | our address space control registers. |
374 | 394 | ||
375 | 395 | ||
376 | The Linux for s/390 & z/Architecture Kernel Task Structure | 396 | The Linux for s/390 & z/Architecture Kernel Task Structure |
377 | ========================================================== | 397 | ========================================================== |
@@ -382,42 +402,43 @@ the __LC_KERNEL_STACK variable in the spare prefix area for this cpu | |||
382 | (which we use for per-processor globals). | 402 | (which we use for per-processor globals). |
383 | 403 | ||
384 | The kernel stack pointer is intimately tied with the task structure for | 404 | The kernel stack pointer is intimately tied with the task structure for |
385 | each processor as follows. | 405 | each processor as follows:: |
386 | 406 | ||
387 | s/390 | 407 | s/390 |
388 | ************************ | 408 | ************************ |
389 | * 1 page kernel stack * | 409 | * 1 page kernel stack * |
390 | * ( 4K ) * | 410 | * ( 4K ) * |
391 | ************************ | 411 | ************************ |
392 | * 1 page task_struct * | 412 | * 1 page task_struct * |
393 | * ( 4K ) * | 413 | * ( 4K ) * |
394 | 8K aligned ************************ | 414 | 8K aligned ************************ |
395 | 415 | ||
396 | z/Architecture | 416 | z/Architecture |
397 | ************************ | 417 | ************************ |
398 | * 2 page kernel stack * | 418 | * 2 page kernel stack * |
399 | * ( 8K ) * | 419 | * ( 8K ) * |
400 | ************************ | 420 | ************************ |
401 | * 2 page task_struct * | 421 | * 2 page task_struct * |
402 | * ( 8K ) * | 422 | * ( 8K ) * |
403 | 16K aligned ************************ | 423 | 16K aligned ************************ |
404 | 424 | ||
405 | What this means is that we don't need to dedicate any register or global | 425 | What this means is that we don't need to dedicate any register or global |
406 | variable to point to the current running process & can retrieve it with the | 426 | variable to point to the current running process & can retrieve it with the |
407 | following very simple construct for s/390 & one very similar for z/Architecture. | 427 | following very simple construct for s/390 & one very similar for |
428 | z/Architecture:: | ||
408 | 429 | ||
409 | static inline struct task_struct * get_current(void) | 430 | static inline struct task_struct * get_current(void) |
410 | { | 431 | { |
411 | struct task_struct *current; | 432 | struct task_struct *current; |
412 | __asm__("lhi %0,-8192\n\t" | 433 | __asm__("lhi %0,-8192\n\t" |
413 | "nr %0,15" | 434 | "nr %0,15" |
414 | : "=r" (current) ); | 435 | : "=r" (current) ); |
415 | return current; | 436 | return current; |
416 | } | 437 | } |
417 | 438 | ||
418 | i.e. just anding the current kernel stack pointer with the mask -8192. | 439 | i.e. just anding the current kernel stack pointer with the mask -8192. |
419 | Thankfully because Linux doesn't have support for nested IO interrupts | 440 | Thankfully because Linux doesn't have support for nested IO interrupts |
420 | & our devices have large buffers can survive interrupts being shut for | 441 | & our devices have large buffers can survive interrupts being shut for |
421 | short amounts of time we don't need a separate stack for interrupts. | 442 | short amounts of time we don't need a separate stack for interrupts. |
422 | 443 | ||
423 | 444 | ||
@@ -428,7 +449,7 @@ Register Usage & Stackframes on Linux for s/390 & z/Architecture | |||
428 | Overview: | 449 | Overview: |
429 | --------- | 450 | --------- |
430 | This is the code that gcc produces at the top & the bottom of | 451 | This is the code that gcc produces at the top & the bottom of |
431 | each function. It usually is fairly consistent & similar from | 452 | each function. It usually is fairly consistent & similar from |
432 | function to function & if you know its layout you can probably | 453 | function to function & if you know its layout you can probably |
433 | make some headway in finding the ultimate cause of a problem | 454 | make some headway in finding the ultimate cause of a problem |
434 | after a crash without a source level debugger. | 455 | after a crash without a source level debugger. |
@@ -443,87 +464,95 @@ didn't have to maintain compatibility with older linkage formats. | |||
443 | Glossary: | 464 | Glossary: |
444 | --------- | 465 | --------- |
445 | alloca: | 466 | alloca: |
446 | This is a built in compiler function for runtime allocation | 467 | This is a built in compiler function for runtime allocation |
447 | of extra space on the callers stack which is obviously freed | 468 | of extra space on the callers stack which is obviously freed |
448 | up on function exit ( e.g. the caller may choose to allocate nothing | 469 | up on function exit ( e.g. the caller may choose to allocate nothing |
449 | of a buffer of 4k if required for temporary purposes ), it generates | 470 | of a buffer of 4k if required for temporary purposes ), it generates |
450 | very efficient code ( a few cycles ) when compared to alternatives | 471 | very efficient code ( a few cycles ) when compared to alternatives |
451 | like malloc. | 472 | like malloc. |
452 | 473 | ||
453 | automatics: These are local variables on the stack, | 474 | automatics: |
454 | i.e they aren't in registers & they aren't static. | 475 | These are local variables on the stack, i.e they aren't in registers & |
476 | they aren't static. | ||
455 | 477 | ||
456 | back-chain: | 478 | back-chain: |
457 | This is a pointer to the stack pointer before entering a | 479 | This is a pointer to the stack pointer before entering a |
458 | framed functions ( see frameless function ) prologue got by | 480 | framed functions ( see frameless function ) prologue got by |
459 | dereferencing the address of the current stack pointer, | 481 | dereferencing the address of the current stack pointer, |
460 | i.e. got by accessing the 32 bit value at the stack pointers | 482 | i.e. got by accessing the 32 bit value at the stack pointers |
461 | current location. | 483 | current location. |
462 | 484 | ||
463 | base-pointer: | 485 | base-pointer: |
464 | This is a pointer to the back of the literal pool which | 486 | This is a pointer to the back of the literal pool which |
465 | is an area just behind each procedure used to store constants | 487 | is an area just behind each procedure used to store constants |
466 | in each function. | 488 | in each function. |
467 | 489 | ||
468 | call-clobbered: The caller probably needs to save these registers if there | 490 | call-clobbered: |
469 | is something of value in them, on the stack or elsewhere before making a | 491 | The caller probably needs to save these registers if there |
470 | call to another procedure so that it can restore it later. | 492 | is something of value in them, on the stack or elsewhere before making a |
493 | call to another procedure so that it can restore it later. | ||
471 | 494 | ||
472 | epilogue: | 495 | epilogue: |
473 | The code generated by the compiler to return to the caller. | 496 | The code generated by the compiler to return to the caller. |
474 | 497 | ||
475 | frameless-function | 498 | frameless-function: |
476 | A frameless function in Linux for s390 & z/Architecture is one which doesn't | 499 | A frameless function in Linux for s390 & z/Architecture is one which doesn't |
477 | need more than the register save area (96 bytes on s/390, 160 on z/Architecture) | 500 | need more than the register save area (96 bytes on s/390, 160 on z/Architecture) |
478 | given to it by the caller. | 501 | given to it by the caller. |
479 | A frameless function never: | 502 | |
480 | 1) Sets up a back chain. | 503 | A frameless function never: |
481 | 2) Calls alloca. | 504 | |
482 | 3) Calls other normal functions | 505 | 1) Sets up a back chain. |
483 | 4) Has automatics. | 506 | 2) Calls alloca. |
507 | 3) Calls other normal functions | ||
508 | 4) Has automatics. | ||
484 | 509 | ||
485 | GOT-pointer: | 510 | GOT-pointer: |
486 | This is a pointer to the global-offset-table in ELF | 511 | This is a pointer to the global-offset-table in ELF |
487 | ( Executable Linkable Format, Linux'es most common executable format ), | 512 | ( Executable Linkable Format, Linux'es most common executable format ), |
488 | all globals & shared library objects are found using this pointer. | 513 | all globals & shared library objects are found using this pointer. |
489 | 514 | ||
490 | lazy-binding | 515 | lazy-binding |
491 | ELF shared libraries are typically only loaded when routines in the shared | 516 | ELF shared libraries are typically only loaded when routines in the shared |
492 | library are actually first called at runtime. This is lazy binding. | 517 | library are actually first called at runtime. This is lazy binding. |
493 | 518 | ||
494 | procedure-linkage-table | 519 | procedure-linkage-table |
495 | This is a table found from the GOT which contains pointers to routines | 520 | This is a table found from the GOT which contains pointers to routines |
496 | in other shared libraries which can't be called to by easier means. | 521 | in other shared libraries which can't be called to by easier means. |
497 | 522 | ||
498 | prologue: | 523 | prologue: |
499 | The code generated by the compiler to set up the stack frame. | 524 | The code generated by the compiler to set up the stack frame. |
500 | 525 | ||
501 | outgoing-args: | 526 | outgoing-args: |
502 | This is extra area allocated on the stack of the calling function if the | 527 | This is extra area allocated on the stack of the calling function if the |
503 | parameters for the callee's cannot all be put in registers, the same | 528 | parameters for the callee's cannot all be put in registers, the same |
504 | area can be reused by each function the caller calls. | 529 | area can be reused by each function the caller calls. |
505 | 530 | ||
506 | routine-descriptor: | 531 | routine-descriptor: |
507 | A COFF executable format based concept of a procedure reference | 532 | A COFF executable format based concept of a procedure reference |
508 | actually being 8 bytes or more as opposed to a simple pointer to the routine. | 533 | actually being 8 bytes or more as opposed to a simple pointer to the routine. |
509 | This is typically defined as follows | 534 | This is typically defined as follows: |
510 | Routine Descriptor offset 0=Pointer to Function | ||
511 | Routine Descriptor offset 4=Pointer to Table of Contents | ||
512 | The table of contents/TOC is roughly equivalent to a GOT pointer. | ||
513 | & it means that shared libraries etc. can be shared between several | ||
514 | environments each with their own TOC. | ||
515 | |||
516 | |||
517 | static-chain: This is used in nested functions a concept adopted from pascal | ||
518 | by gcc not used in ansi C or C++ ( although quite useful ), basically it | ||
519 | is a pointer used to reference local variables of enclosing functions. | ||
520 | You might come across this stuff once or twice in your lifetime. | ||
521 | 535 | ||
522 | e.g. | 536 | - Routine Descriptor offset 0=Pointer to Function |
523 | The function below should return 11 though gcc may get upset & toss warnings | 537 | - Routine Descriptor offset 4=Pointer to Table of Contents |
524 | about unused variables. | 538 | |
525 | int FunctionA(int a) | 539 | The table of contents/TOC is roughly equivalent to a GOT pointer. |
526 | { | 540 | & it means that shared libraries etc. can be shared between several |
541 | environments each with their own TOC. | ||
542 | |||
543 | static-chain: | ||
544 | This is used in nested functions a concept adopted from pascal | ||
545 | by gcc not used in ansi C or C++ ( although quite useful ), basically it | ||
546 | is a pointer used to reference local variables of enclosing functions. | ||
547 | You might come across this stuff once or twice in your lifetime. | ||
548 | |||
549 | e.g. | ||
550 | |||
551 | The function below should return 11 though gcc may get upset & toss warnings | ||
552 | about unused variables:: | ||
553 | |||
554 | int FunctionA(int a) | ||
555 | { | ||
527 | int b; | 556 | int b; |
528 | FunctionC(int c) | 557 | FunctionC(int c) |
529 | { | 558 | { |
@@ -531,19 +560,21 @@ int FunctionA(int a) | |||
531 | } | 560 | } |
532 | FunctionC(10); | 561 | FunctionC(10); |
533 | return(b); | 562 | return(b); |
534 | } | 563 | } |
535 | 564 | ||
536 | 565 | ||
537 | s/390 & z/Architecture Register usage | 566 | s/390 & z/Architecture Register usage |
538 | ===================================== | 567 | ===================================== |
568 | |||
569 | ======== ========================================== =============== | ||
539 | r0 used by syscalls/assembly call-clobbered | 570 | r0 used by syscalls/assembly call-clobbered |
540 | r1 used by syscalls/assembly call-clobbered | 571 | r1 used by syscalls/assembly call-clobbered |
541 | r2 argument 0 / return value 0 call-clobbered | 572 | r2 argument 0 / return value 0 call-clobbered |
542 | r3 argument 1 / return value 1 (if long long) call-clobbered | 573 | r3 argument 1 / return value 1 (if long long) call-clobbered |
543 | r4 argument 2 call-clobbered | 574 | r4 argument 2 call-clobbered |
544 | r5 argument 3 call-clobbered | 575 | r5 argument 3 call-clobbered |
545 | r6 argument 4 saved | 576 | r6 argument 4 saved |
546 | r7 pointer-to arguments 5 to ... saved | 577 | r7 pointer-to arguments 5 to ... saved |
547 | r8 this & that saved | 578 | r8 this & that saved |
548 | r9 this & that saved | 579 | r9 this & that saved |
549 | r10 static-chain ( if nested function ) saved | 580 | r10 static-chain ( if nested function ) saved |
@@ -557,65 +588,74 @@ f0 argument 0 / return value ( float/double ) call-clobbered | |||
557 | f2 argument 1 call-clobbered | 588 | f2 argument 1 call-clobbered |
558 | f4 z/Architecture argument 2 saved | 589 | f4 z/Architecture argument 2 saved |
559 | f6 z/Architecture argument 3 saved | 590 | f6 z/Architecture argument 3 saved |
591 | ======== ========================================== =============== | ||
592 | |||
560 | The remaining floating points | 593 | The remaining floating points |
561 | f1,f3,f5 f7-f15 are call-clobbered. | 594 | f1,f3,f5 f7-f15 are call-clobbered. |
562 | 595 | ||
563 | Notes: | 596 | Notes: |
564 | ------ | 597 | ------ |
565 | 1) The only requirement is that registers which are used | 598 | 1) The only requirement is that registers which are used |
566 | by the callee are saved, e.g. the compiler is perfectly | 599 | by the callee are saved, e.g. the compiler is perfectly |
567 | capable of using r11 for purposes other than a frame a | 600 | capable of using r11 for purposes other than a frame a |
568 | frame pointer if a frame pointer is not needed. | 601 | frame pointer if a frame pointer is not needed. |
569 | 2) In functions with variable arguments e.g. printf the calling procedure | 602 | 2) In functions with variable arguments e.g. printf the calling procedure |
570 | is identical to one without variable arguments & the same number of | 603 | is identical to one without variable arguments & the same number of |
571 | parameters. However, the prologue of this function is somewhat more | 604 | parameters. However, the prologue of this function is somewhat more |
572 | hairy owing to it having to move these parameters to the stack to | 605 | hairy owing to it having to move these parameters to the stack to |
573 | get va_start, va_arg & va_end to work. | 606 | get va_start, va_arg & va_end to work. |
574 | 3) Access registers are currently unused by gcc but are used in | 607 | 3) Access registers are currently unused by gcc but are used in |
575 | the kernel. Possibilities exist to use them at the moment for | 608 | the kernel. Possibilities exist to use them at the moment for |
576 | temporary storage but it isn't recommended. | 609 | temporary storage but it isn't recommended. |
577 | 4) Only 4 of the floating point registers are used for | 610 | 4) Only 4 of the floating point registers are used for |
578 | parameter passing as older machines such as G3 only have only 4 | 611 | parameter passing as older machines such as G3 only have only 4 |
579 | & it keeps the stack frame compatible with other compilers. | 612 | & it keeps the stack frame compatible with other compilers. |
580 | However with IEEE floating point emulation under linux on the | 613 | However with IEEE floating point emulation under linux on the |
581 | older machines you are free to use the other 12. | 614 | older machines you are free to use the other 12. |
582 | 5) A long long or double parameter cannot be have the | 615 | 5) A long long or double parameter cannot be have the |
583 | first 4 bytes in a register & the second four bytes in the | 616 | first 4 bytes in a register & the second four bytes in the |
584 | outgoing args area. It must be purely in the outgoing args | 617 | outgoing args area. It must be purely in the outgoing args |
585 | area if crossing this boundary. | 618 | area if crossing this boundary. |
586 | 6) Floating point parameters are mixed with outgoing args | 619 | 6) Floating point parameters are mixed with outgoing args |
587 | on the outgoing args area in the order the are passed in as parameters. | 620 | on the outgoing args area in the order the are passed in as parameters. |
588 | 7) Floating point arguments 2 & 3 are saved in the outgoing args area for | 621 | 7) Floating point arguments 2 & 3 are saved in the outgoing args area for |
589 | z/Architecture | 622 | z/Architecture |
590 | 623 | ||
591 | 624 | ||
592 | Stack Frame Layout | 625 | Stack Frame Layout |
593 | ------------------ | 626 | ------------------ |
627 | |||
628 | ========= ============== ====================================================== | ||
594 | s/390 z/Architecture | 629 | s/390 z/Architecture |
595 | 0 0 back chain ( a 0 here signifies end of back chain ) | 630 | ========= ============== ====================================================== |
596 | 4 8 eos ( end of stack, not used on Linux for S390 used in other linkage formats ) | 631 | 0 0 back chain ( a 0 here signifies end of back chain ) |
597 | 8 16 glue used in other s/390 linkage formats for saved routine descriptors etc. | 632 | 4 8 eos ( end of stack, not used on Linux for S390 used |
598 | 12 24 glue used in other s/390 linkage formats for saved routine descriptors etc. | 633 | in other linkage formats ) |
599 | 16 32 scratch area | 634 | 8 16 glue used in other s/390 linkage formats for saved |
600 | 20 40 scratch area | 635 | routine descriptors etc. |
601 | 24 48 saved r6 of caller function | 636 | 12 24 glue used in other s/390 linkage formats for saved |
602 | 28 56 saved r7 of caller function | 637 | routine descriptors etc. |
603 | 32 64 saved r8 of caller function | 638 | 16 32 scratch area |
604 | 36 72 saved r9 of caller function | 639 | 20 40 scratch area |
605 | 40 80 saved r10 of caller function | 640 | 24 48 saved r6 of caller function |
606 | 44 88 saved r11 of caller function | 641 | 28 56 saved r7 of caller function |
607 | 48 96 saved r12 of caller function | 642 | 32 64 saved r8 of caller function |
608 | 52 104 saved r13 of caller function | 643 | 36 72 saved r9 of caller function |
609 | 56 112 saved r14 of caller function | 644 | 40 80 saved r10 of caller function |
610 | 60 120 saved r15 of caller function | 645 | 44 88 saved r11 of caller function |
611 | 64 128 saved f4 of caller function | 646 | 48 96 saved r12 of caller function |
612 | 72 132 saved f6 of caller function | 647 | 52 104 saved r13 of caller function |
613 | 80 undefined | 648 | 56 112 saved r14 of caller function |
614 | 96 160 outgoing args passed from caller to callee | 649 | 60 120 saved r15 of caller function |
615 | 96+x 160+x possible stack alignment ( 8 bytes desirable ) | 650 | 64 128 saved f4 of caller function |
616 | 96+x+y 160+x+y alloca space of caller ( if used ) | 651 | 72 132 saved f6 of caller function |
617 | 96+x+y+z 160+x+y+z automatics of caller ( if used ) | 652 | 80 undefined |
618 | 0 back-chain | 653 | 96 160 outgoing args passed from caller to callee |
654 | 96+x 160+x possible stack alignment ( 8 bytes desirable ) | ||
655 | 96+x+y 160+x+y alloca space of caller ( if used ) | ||
656 | 96+x+y+z 160+x+y+z automatics of caller ( if used ) | ||
657 | 0 back-chain | ||
658 | ========= ============== ====================================================== | ||
619 | 659 | ||
620 | A sample program with comments. | 660 | A sample program with comments. |
621 | =============================== | 661 | =============================== |
@@ -623,82 +663,86 @@ A sample program with comments. | |||
623 | Comments on the function test | 663 | Comments on the function test |
624 | ----------------------------- | 664 | ----------------------------- |
625 | 1) It didn't need to set up a pointer to the constant pool gpr13 as it is not | 665 | 1) It didn't need to set up a pointer to the constant pool gpr13 as it is not |
626 | used ( :-( ). | 666 | used ( :-( ). |
627 | 2) This is a frameless function & no stack is bought. | 667 | 2) This is a frameless function & no stack is bought. |
628 | 3) The compiler was clever enough to recognise that it could return the | 668 | 3) The compiler was clever enough to recognise that it could return the |
629 | value in r2 as well as use it for the passed in parameter ( :-) ). | 669 | value in r2 as well as use it for the passed in parameter ( :-) ). |
630 | 4) The basr ( branch relative & save ) trick works as follows the instruction | 670 | 4) The basr ( branch relative & save ) trick works as follows the instruction |
631 | has a special case with r0,r0 with some instruction operands is understood as | 671 | has a special case with r0,r0 with some instruction operands is understood as |
632 | the literal value 0, some risc architectures also do this ). So now | 672 | the literal value 0, some risc architectures also do this ). So now |
633 | we are branching to the next address & the address new program counter is | 673 | we are branching to the next address & the address new program counter is |
634 | in r13,so now we subtract the size of the function prologue we have executed | 674 | in r13,so now we subtract the size of the function prologue we have executed |
635 | + the size of the literal pool to get to the top of the literal pool | 675 | the size of the literal pool to get to the top of the literal pool:: |
636 | 0040037c int test(int b) | 676 | |
637 | { # Function prologue below | 677 | |
638 | 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14 | 678 | 0040037c int test(int b) |
639 | 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using | 679 | { # Function prologue below |
640 | 400382: a7 da ff fa ahi %r13,-6 # basr trick | 680 | 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14 |
681 | 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using | ||
682 | 400382: a7 da ff fa ahi %r13,-6 # basr trick | ||
641 | return(5+b); | 683 | return(5+b); |
642 | # Huge main program | 684 | # Huge main program |
643 | 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2 | 685 | 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2 |
644 | 686 | ||
645 | # Function epilogue below | 687 | # Function epilogue below |
646 | 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14 | 688 | 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14 |
647 | 40038e: 07 fe br %r14 # return | 689 | 40038e: 07 fe br %r14 # return |
648 | } | 690 | } |
649 | 691 | ||
650 | Comments on the function main | 692 | Comments on the function main |
651 | ----------------------------- | 693 | ----------------------------- |
652 | 1) The compiler did this function optimally ( 8-) ) | 694 | 1) The compiler did this function optimally ( 8-) ):: |
653 | 695 | ||
654 | Literal pool for main. | 696 | Literal pool for main. |
655 | 400390: ff ff ff ec .long 0xffffffec | 697 | 400390: ff ff ff ec .long 0xffffffec |
656 | main(int argc,char *argv[]) | 698 | main(int argc,char *argv[]) |
657 | { # Function prologue below | 699 | { # Function prologue below |
658 | 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers | 700 | 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers |
659 | 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0 | 701 | 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0 |
660 | 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving | 702 | 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving |
661 | 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to | 703 | 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to |
662 | 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool | 704 | 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool |
663 | 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain | 705 | 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain |
664 | 706 | ||
665 | return(test(5)); # Main Program Below | 707 | return(test(5)); # Main Program Below |
666 | 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from | 708 | 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from |
667 | # literal pool | 709 | # literal pool |
668 | 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5 | 710 | 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5 |
669 | 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return | 711 | 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return |
670 | # address using branch & save instruction. | 712 | # address using branch & save instruction. |
671 | 713 | ||
672 | # Function Epilogue below | 714 | # Function Epilogue below |
673 | 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers. | 715 | 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers. |
674 | 4003b8: 07 fe br %r14 # return to do program exit | 716 | 4003b8: 07 fe br %r14 # return to do program exit |
675 | } | 717 | } |
676 | 718 | ||
677 | 719 | ||
678 | Compiler updates | 720 | Compiler updates |
679 | ---------------- | 721 | ---------------- |
680 | 722 | ||
681 | main(int argc,char *argv[]) | 723 | :: |
682 | { | 724 | |
683 | 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15) | 725 | main(int argc,char *argv[]) |
684 | 400500: a7 d5 00 04 bras %r13,400508 <main+0xc> | 726 | { |
685 | 400504: 00 40 04 f4 .long 0x004004f4 | 727 | 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15) |
686 | # compiler now puts constant pool in code to so it saves an instruction | 728 | 400500: a7 d5 00 04 bras %r13,400508 <main+0xc> |
687 | 400508: 18 0f lr %r0,%r15 | 729 | 400504: 00 40 04 f4 .long 0x004004f4 |
688 | 40050a: a7 fa ff a0 ahi %r15,-96 | 730 | # compiler now puts constant pool in code to so it saves an instruction |
689 | 40050e: 50 00 f0 00 st %r0,0(%r15) | 731 | 400508: 18 0f lr %r0,%r15 |
732 | 40050a: a7 fa ff a0 ahi %r15,-96 | ||
733 | 40050e: 50 00 f0 00 st %r0,0(%r15) | ||
690 | return(test(5)); | 734 | return(test(5)); |
691 | 400512: 58 10 d0 00 l %r1,0(%r13) | 735 | 400512: 58 10 d0 00 l %r1,0(%r13) |
692 | 400516: a7 28 00 05 lhi %r2,5 | 736 | 400516: a7 28 00 05 lhi %r2,5 |
693 | 40051a: 0d e1 basr %r14,%r1 | 737 | 40051a: 0d e1 basr %r14,%r1 |
694 | # compiler adds 1 extra instruction to epilogue this is done to | 738 | # compiler adds 1 extra instruction to epilogue this is done to |
695 | # avoid processor pipeline stalls owing to data dependencies on g5 & | 739 | # avoid processor pipeline stalls owing to data dependencies on g5 & |
696 | # above as register 14 in the old code was needed directly after being loaded | 740 | # above as register 14 in the old code was needed directly after being loaded |
697 | # by the lm %r11,%r15,140(%r15) for the br %14. | 741 | # by the lm %r11,%r15,140(%r15) for the br %14. |
698 | 40051c: 58 40 f0 98 l %r4,152(%r15) | 742 | 40051c: 58 40 f0 98 l %r4,152(%r15) |
699 | 400520: 98 7f f0 7c lm %r7,%r15,124(%r15) | 743 | 400520: 98 7f f0 7c lm %r7,%r15,124(%r15) |
700 | 400524: 07 f4 br %r4 | 744 | 400524: 07 f4 br %r4 |
701 | } | 745 | } |
702 | 746 | ||
703 | 747 | ||
704 | Hartmut ( our compiler developer ) also has been threatening to take out the | 748 | Hartmut ( our compiler developer ) also has been threatening to take out the |
@@ -709,38 +753,39 @@ have been warned. | |||
709 | -------------------------------------- | 753 | -------------------------------------- |
710 | 754 | ||
711 | If you understand the stuff above you'll understand the stuff | 755 | If you understand the stuff above you'll understand the stuff |
712 | below too so I'll avoid repeating myself & just say that | 756 | below too so I'll avoid repeating myself & just say that |
713 | some of the instructions have g's on the end of them to indicate | 757 | some of the instructions have g's on the end of them to indicate |
714 | they are 64 bit & the stack offsets are a bigger, | 758 | they are 64 bit & the stack offsets are a bigger, |
715 | the only other difference you'll find between 32 & 64 bit is that | 759 | the only other difference you'll find between 32 & 64 bit is that |
716 | we now use f4 & f6 for floating point arguments on 64 bit. | 760 | we now use f4 & f6 for floating point arguments on 64 bit:: |
717 | 00000000800005b0 <test>: | 761 | |
718 | int test(int b) | 762 | 00000000800005b0 <test>: |
719 | { | 763 | int test(int b) |
764 | { | ||
720 | return(5+b); | 765 | return(5+b); |
721 | 800005b0: a7 2a 00 05 ahi %r2,5 | 766 | 800005b0: a7 2a 00 05 ahi %r2,5 |
722 | 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer | 767 | 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer |
723 | 800005b8: 07 fe br %r14 | 768 | 800005b8: 07 fe br %r14 |
724 | 800005ba: 07 07 bcr 0,%r7 | 769 | 800005ba: 07 07 bcr 0,%r7 |
725 | 770 | ||
726 | 771 | ||
727 | } | 772 | } |
728 | 773 | ||
729 | 00000000800005bc <main>: | 774 | 00000000800005bc <main>: |
730 | main(int argc,char *argv[]) | 775 | main(int argc,char *argv[]) |
731 | { | 776 | { |
732 | 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15) | 777 | 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15) |
733 | 800005c2: b9 04 00 1f lgr %r1,%r15 | 778 | 800005c2: b9 04 00 1f lgr %r1,%r15 |
734 | 800005c6: a7 fb ff 60 aghi %r15,-160 | 779 | 800005c6: a7 fb ff 60 aghi %r15,-160 |
735 | 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15) | 780 | 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15) |
736 | return(test(5)); | 781 | return(test(5)); |
737 | 800005d0: a7 29 00 05 lghi %r2,5 | 782 | 800005d0: a7 29 00 05 lghi %r2,5 |
738 | # brasl allows jumps > 64k & is overkill here bras would do fune | 783 | # brasl allows jumps > 64k & is overkill here bras would do fune |
739 | 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test> | 784 | 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test> |
740 | 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15) | 785 | 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15) |
741 | 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15) | 786 | 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15) |
742 | 800005e6: 07 f4 br %r4 | 787 | 800005e6: 07 f4 br %r4 |
743 | } | 788 | } |
744 | 789 | ||
745 | 790 | ||
746 | 791 | ||
@@ -749,15 +794,15 @@ Compiling programs for debugging on Linux for s/390 & z/Architecture | |||
749 | -gdwarf-2 now works it should be considered the default debugging | 794 | -gdwarf-2 now works it should be considered the default debugging |
750 | format for s/390 & z/Architecture as it is more reliable for debugging | 795 | format for s/390 & z/Architecture as it is more reliable for debugging |
751 | shared libraries, normal -g debugging works much better now | 796 | shared libraries, normal -g debugging works much better now |
752 | Thanks to the IBM java compiler developers bug reports. | 797 | Thanks to the IBM java compiler developers bug reports. |
753 | 798 | ||
754 | This is typically done adding/appending the flags -g or -gdwarf-2 to the | 799 | This is typically done adding/appending the flags -g or -gdwarf-2 to the |
755 | CFLAGS & LDFLAGS variables Makefile of the program concerned. | 800 | CFLAGS & LDFLAGS variables Makefile of the program concerned. |
756 | 801 | ||
757 | If using gdb & you would like accurate displays of registers & | 802 | If using gdb & you would like accurate displays of registers & |
758 | stack traces compile without optimisation i.e make sure | 803 | stack traces compile without optimisation i.e make sure |
759 | that there is no -O2 or similar on the CFLAGS line of the Makefile & | 804 | that there is no -O2 or similar on the CFLAGS line of the Makefile & |
760 | the emitted gcc commands, obviously this will produce worse code | 805 | the emitted gcc commands, obviously this will produce worse code |
761 | ( not advisable for shipment ) but it is an aid to the debugging process. | 806 | ( not advisable for shipment ) but it is an aid to the debugging process. |
762 | 807 | ||
763 | This aids debugging because the compiler will copy parameters passed in | 808 | This aids debugging because the compiler will copy parameters passed in |
@@ -766,7 +811,7 @@ parameters will work, however some larger programs which use inline functions | |||
766 | will not compile without optimisation. | 811 | will not compile without optimisation. |
767 | 812 | ||
768 | Debugging with optimisation has since much improved after fixing | 813 | Debugging with optimisation has since much improved after fixing |
769 | some bugs, please make sure you are using gdb-5.0 or later developed | 814 | some bugs, please make sure you are using gdb-5.0 or later developed |
770 | after Nov'2000. | 815 | after Nov'2000. |
771 | 816 | ||
772 | 817 | ||
@@ -779,7 +824,7 @@ Notes | |||
779 | Addresses & values in the VM debugger are always hex never decimal | 824 | Addresses & values in the VM debugger are always hex never decimal |
780 | Address ranges are of the format <HexValue1>-<HexValue2> or | 825 | Address ranges are of the format <HexValue1>-<HexValue2> or |
781 | <HexValue1>.<HexValue2> | 826 | <HexValue1>.<HexValue2> |
782 | For example, the address range 0x2000 to 0x3000 can be described as 2000-3000 | 827 | For example, the address range 0x2000 to 0x3000 can be described as 2000-3000 |
783 | or 2000.1000 | 828 | or 2000.1000 |
784 | 829 | ||
785 | The VM Debugger is case insensitive. | 830 | The VM Debugger is case insensitive. |
@@ -798,27 +843,31 @@ operands are nibble (half byte aligned). | |||
798 | So if you have an objdump listing by hand, it is quite easy to follow, and if | 843 | So if you have an objdump listing by hand, it is quite easy to follow, and if |
799 | you don't have an objdump listing keep a copy of the s/390 Reference Summary | 844 | you don't have an objdump listing keep a copy of the s/390 Reference Summary |
800 | or alternatively the s/390 principles of operation next to you. | 845 | or alternatively the s/390 principles of operation next to you. |
801 | e.g. even I can guess that | 846 | e.g. even I can guess that |
802 | 0001AFF8' LR 180F CC 0 | 847 | 0001AFF8' LR 180F CC 0 |
803 | is a ( load register ) lr r0,r15 | 848 | is a ( load register ) lr r0,r15 |
804 | 849 | ||
805 | Also it is very easy to tell the length of a 390 instruction from the 2 most | 850 | Also it is very easy to tell the length of a 390 instruction from the 2 most |
806 | significant bits in the instruction (not that this info is really useful except | 851 | significant bits in the instruction (not that this info is really useful except |
807 | if you are trying to make sense of a hexdump of code). | 852 | if you are trying to make sense of a hexdump of code). |
808 | Here is a table | 853 | Here is a table |
854 | |||
855 | ======================= ================== | ||
809 | Bits Instruction Length | 856 | Bits Instruction Length |
810 | ------------------------------------------ | 857 | ======================= ================== |
811 | 00 2 Bytes | 858 | 00 2 Bytes |
812 | 01 4 Bytes | 859 | 01 4 Bytes |
813 | 10 4 Bytes | 860 | 10 4 Bytes |
814 | 11 6 Bytes | 861 | 11 6 Bytes |
862 | ======================= ================== | ||
815 | 863 | ||
816 | The debugger also displays other useful info on the same line such as the | 864 | The debugger also displays other useful info on the same line such as the |
817 | addresses being operated on destination addresses of branches & condition codes. | 865 | addresses being operated on destination addresses of branches & condition codes. |
818 | e.g. | 866 | e.g.:: |
819 | 00019736' AHI A7DAFF0E CC 1 | 867 | |
820 | 000198BA' BRC A7840004 -> 000198C2' CC 0 | 868 | 00019736' AHI A7DAFF0E CC 1 |
821 | 000198CE' STM 900EF068 >> 0FA95E78 CC 2 | 869 | 000198BA' BRC A7840004 -> 000198C2' CC 0 |
870 | 000198CE' STM 900EF068 >> 0FA95E78 CC 2 | ||
822 | 871 | ||
823 | 872 | ||
824 | 873 | ||
@@ -826,54 +875,79 @@ Useful VM debugger commands | |||
826 | --------------------------- | 875 | --------------------------- |
827 | 876 | ||
828 | I suppose I'd better mention this before I start | 877 | I suppose I'd better mention this before I start |
829 | to list the current active traces do | 878 | to list the current active traces do:: |
830 | Q TR | 879 | |
880 | Q TR | ||
881 | |||
831 | there can be a maximum of 255 of these per set | 882 | there can be a maximum of 255 of these per set |
832 | ( more about trace sets later ). | 883 | ( more about trace sets later ). |
833 | To stop traces issue a | 884 | |
834 | TR END. | 885 | To stop traces issue a:: |
835 | To delete a particular breakpoint issue | 886 | |
836 | TR DEL <breakpoint number> | 887 | TR END. |
888 | |||
889 | To delete a particular breakpoint issue:: | ||
890 | |||
891 | TR DEL <breakpoint number> | ||
837 | 892 | ||
838 | The PA1 key drops to CP mode so you can issue debugger commands, | 893 | The PA1 key drops to CP mode so you can issue debugger commands, |
839 | Doing alt c (on my 3270 console at least ) clears the screen. | 894 | Doing alt c (on my 3270 console at least ) clears the screen. |
895 | |||
840 | hitting b <enter> comes back to the running operating system | 896 | hitting b <enter> comes back to the running operating system |
841 | from cp mode ( in our case linux ). | 897 | from cp mode ( in our case linux ). |
898 | |||
842 | It is typically useful to add shortcuts to your profile.exec file | 899 | It is typically useful to add shortcuts to your profile.exec file |
843 | if you have one ( this is roughly equivalent to autoexec.bat in DOS ). | 900 | if you have one ( this is roughly equivalent to autoexec.bat in DOS ). |
844 | file here are a few from mine. | 901 | file here are a few from mine:: |
845 | /* this gives me command history on issuing f12 */ | 902 | |
846 | set pf12 retrieve | 903 | /* this gives me command history on issuing f12 */ |
847 | /* this continues */ | 904 | set pf12 retrieve |
848 | set pf8 imm b | 905 | /* this continues */ |
849 | /* goes to trace set a */ | 906 | set pf8 imm b |
850 | set pf1 imm tr goto a | 907 | /* goes to trace set a */ |
851 | /* goes to trace set b */ | 908 | set pf1 imm tr goto a |
852 | set pf2 imm tr goto b | 909 | /* goes to trace set b */ |
853 | /* goes to trace set c */ | 910 | set pf2 imm tr goto b |
854 | set pf3 imm tr goto c | 911 | /* goes to trace set c */ |
912 | set pf3 imm tr goto c | ||
855 | 913 | ||
856 | 914 | ||
857 | 915 | ||
858 | Instruction Tracing | 916 | Instruction Tracing |
859 | ------------------- | 917 | ------------------- |
860 | Setting a simple breakpoint | 918 | Setting a simple breakpoint:: |
861 | TR I PSWA <address> | 919 | |
862 | To debug a particular function try | 920 | TR I PSWA <address> |
863 | TR I R <function address range> | 921 | |
864 | TR I on its own will single step. | 922 | To debug a particular function try:: |
865 | TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics | 923 | |
866 | e.g. | 924 | TR I R <function address range> |
867 | TR I DATA 4D R 0197BC.4000 | 925 | TR I on its own will single step. |
926 | TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics | ||
927 | |||
928 | e.g.:: | ||
929 | |||
930 | TR I DATA 4D R 0197BC.4000 | ||
931 | |||
868 | will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000 | 932 | will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000 |
933 | |||
869 | if you were inclined you could add traces for all branch instructions & | 934 | if you were inclined you could add traces for all branch instructions & |
870 | suffix them with the run prefix so you would have a backtrace on screen | 935 | suffix them with the run prefix so you would have a backtrace on screen |
871 | when a program crashes. | 936 | when a program crashes:: |
872 | TR BR <INTO OR FROM> will trace branches into or out of an address. | 937 | |
873 | e.g. | 938 | TR BR <INTO OR FROM> will trace branches into or out of an address. |
874 | TR BR INTO 0 is often quite useful if a program is getting awkward & deciding | 939 | |
940 | e.g.:: | ||
941 | |||
942 | TR BR INTO 0 | ||
943 | |||
944 | is often quite useful if a program is getting awkward & deciding | ||
875 | to branch to 0 & crashing as this will stop at the address before in jumps to 0. | 945 | to branch to 0 & crashing as this will stop at the address before in jumps to 0. |
876 | TR I R <address range> RUN cmd d g | 946 | |
947 | :: | ||
948 | |||
949 | TR I R <address range> RUN cmd d g | ||
950 | |||
877 | single steps a range of addresses but stays running & | 951 | single steps a range of addresses but stays running & |
878 | displays the gprs on each step. | 952 | displays the gprs on each step. |
879 | 953 | ||
@@ -881,93 +955,129 @@ displays the gprs on each step. | |||
881 | 955 | ||
882 | Displaying & modifying Registers | 956 | Displaying & modifying Registers |
883 | -------------------------------- | 957 | -------------------------------- |
884 | D G will display all the gprs | 958 | D G |
885 | Adding a extra G to all the commands is necessary to access the full 64 bit | 959 | will display all the gprs |
960 | |||
961 | Adding a extra G to all the commands is necessary to access the full 64 bit | ||
886 | content in VM on z/Architecture. Obviously this isn't required for access | 962 | content in VM on z/Architecture. Obviously this isn't required for access |
887 | registers as these are still 32 bit. | 963 | registers as these are still 32 bit. |
888 | e.g. DGG instead of DG | 964 | |
889 | D X will display all the control registers | 965 | e.g. |
890 | D AR will display all the access registers | 966 | |
891 | D AR4-7 will display access registers 4 to 7 | 967 | DGG |
892 | CPU ALL D G will display the GRPS of all CPUS in the configuration | 968 | instead of DG |
893 | D PSW will display the current PSW | 969 | |
894 | st PSW 2000 will put the value 2000 into the PSW & | 970 | D X |
895 | cause crash your machine. | 971 | will display all the control registers |
896 | D PREFIX displays the prefix offset | 972 | D AR |
973 | will display all the access registers | ||
974 | D AR4-7 | ||
975 | will display access registers 4 to 7 | ||
976 | CPU ALL D G | ||
977 | will display the GRPS of all CPUS in the configuration | ||
978 | D PSW | ||
979 | will display the current PSW | ||
980 | st PSW 2000 | ||
981 | will put the value 2000 into the PSW & cause crash your machine. | ||
982 | D PREFIX | ||
983 | displays the prefix offset | ||
897 | 984 | ||
898 | 985 | ||
899 | Displaying Memory | 986 | Displaying Memory |
900 | ----------------- | 987 | ----------------- |
901 | To display memory mapped using the current PSW's mapping try | 988 | To display memory mapped using the current PSW's mapping try:: |
902 | D <range> | 989 | |
990 | D <range> | ||
991 | |||
903 | To make VM display a message each time it hits a particular address and | 992 | To make VM display a message each time it hits a particular address and |
904 | continue try | 993 | continue try: |
905 | D I<range> will disassemble/display a range of instructions. | 994 | |
906 | ST addr 32 bit word will store a 32 bit aligned address | 995 | D I<range> |
907 | D T<range> will display the EBCDIC in an address (if you are that way inclined) | 996 | will disassemble/display a range of instructions. |
908 | D R<range> will display real addresses ( without DAT ) but with prefixing. | 997 | |
998 | ST addr 32 bit word | ||
999 | will store a 32 bit aligned address | ||
1000 | D T<range> | ||
1001 | will display the EBCDIC in an address (if you are that way inclined) | ||
1002 | D R<range> | ||
1003 | will display real addresses ( without DAT ) but with prefixing. | ||
1004 | |||
909 | There are other complex options to display if you need to get at say home space | 1005 | There are other complex options to display if you need to get at say home space |
910 | but are in primary space the easiest thing to do is to temporarily | 1006 | but are in primary space the easiest thing to do is to temporarily |
911 | modify the PSW to the other addressing mode, display the stuff & then | 1007 | modify the PSW to the other addressing mode, display the stuff & then |
912 | restore it. | 1008 | restore it. |
913 | 1009 | ||
914 | 1010 | ||
915 | 1011 | ||
916 | Hints | 1012 | Hints |
917 | ----- | 1013 | ----- |
918 | If you want to issue a debugger command without halting your virtual machine | 1014 | If you want to issue a debugger command without halting your virtual machine |
919 | with the PA1 key try prefixing the command with #CP e.g. | 1015 | with the PA1 key try prefixing the command with #CP e.g.:: |
920 | #cp tr i pswa 2000 | 1016 | |
1017 | #cp tr i pswa 2000 | ||
1018 | |||
921 | also suffixing most debugger commands with RUN will cause them not | 1019 | also suffixing most debugger commands with RUN will cause them not |
922 | to stop just display the mnemonic at the current instruction on the console. | 1020 | to stop just display the mnemonic at the current instruction on the console. |
1021 | |||
923 | If you have several breakpoints you want to put into your program & | 1022 | If you have several breakpoints you want to put into your program & |
924 | you get fed up of cross referencing with System.map | 1023 | you get fed up of cross referencing with System.map |
925 | you can do the following trick for several symbols. | 1024 | you can do the following trick for several symbols. |
926 | grep do_signal System.map | ||
927 | which emits the following among other things | ||
928 | 0001f4e0 T do_signal | ||
929 | now you can do | ||
930 | 1025 | ||
931 | TR I PSWA 0001f4e0 cmd msg * do_signal | 1026 | :: |
1027 | |||
1028 | grep do_signal System.map | ||
1029 | |||
1030 | which emits the following among other things:: | ||
1031 | |||
1032 | 0001f4e0 T do_signal | ||
1033 | |||
1034 | now you can do:: | ||
1035 | |||
1036 | TR I PSWA 0001f4e0 cmd msg * do_signal | ||
1037 | |||
932 | This sends a message to your own console each time do_signal is entered. | 1038 | This sends a message to your own console each time do_signal is entered. |
933 | ( As an aside I wrote a perl script once which automatically generated a REXX | 1039 | ( As an aside I wrote a perl script once which automatically generated a REXX |
934 | script with breakpoints on every kernel procedure, this isn't a good idea | 1040 | script with breakpoints on every kernel procedure, this isn't a good idea |
935 | because there are thousands of these routines & VM can only set 255 breakpoints | 1041 | because there are thousands of these routines & VM can only set 255 breakpoints |
936 | at a time so you nearly had to spend as long pruning the file down as you would | 1042 | at a time so you nearly had to spend as long pruning the file down as you would |
937 | entering the msgs by hand), however, the trick might be useful for a single | 1043 | entering the msgs by hand), however, the trick might be useful for a single |
938 | object file. In the 3270 terminal emulator x3270 there is a very useful option | 1044 | object file. In the 3270 terminal emulator x3270 there is a very useful option |
939 | in the file menu called "Save Screen In File" - this is very good for keeping a | 1045 | in the file menu called "Save Screen In File" - this is very good for keeping a |
940 | copy of traces. | 1046 | copy of traces. |
941 | 1047 | ||
942 | From CMS help <command name> will give you online help on a particular command. | 1048 | From CMS help <command name> will give you online help on a particular command. |
943 | e.g. | 1049 | e.g.:: |
944 | HELP DISPLAY | 1050 | |
1051 | HELP DISPLAY | ||
945 | 1052 | ||
946 | Also CP has a file called profile.exec which automatically gets called | 1053 | Also CP has a file called profile.exec which automatically gets called |
947 | on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session | 1054 | on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session |
948 | CP has a feature similar to doskey, it may be useful for you to | 1055 | CP has a feature similar to doskey, it may be useful for you to |
949 | use profile.exec to define some keystrokes. | 1056 | use profile.exec to define some keystrokes. |
950 | e.g. | 1057 | |
951 | SET PF9 IMM B | 1058 | SET PF9 IMM B |
952 | This does a single step in VM on pressing F8. | 1059 | This does a single step in VM on pressing F8. |
1060 | |||
953 | SET PF10 ^ | 1061 | SET PF10 ^ |
954 | This sets up the ^ key. | 1062 | This sets up the ^ key. |
955 | which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed directly | 1063 | which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed |
956 | into some 3270 consoles. | 1064 | directly into some 3270 consoles. |
1065 | |||
957 | SET PF11 ^- | 1066 | SET PF11 ^- |
958 | This types the starting keystrokes for a sysrq see SysRq below. | 1067 | This types the starting keystrokes for a sysrq see SysRq below. |
959 | SET PF12 RETRIEVE | 1068 | SET PF12 RETRIEVE |
960 | This retrieves command history on pressing F12. | 1069 | This retrieves command history on pressing F12. |
961 | 1070 | ||
962 | 1071 | ||
963 | Sometimes in VM the display is set up to scroll automatically this | 1072 | Sometimes in VM the display is set up to scroll automatically this |
964 | can be very annoying if there are messages you wish to look at | 1073 | can be very annoying if there are messages you wish to look at |
965 | to stop this do | 1074 | to stop this do |
1075 | |||
966 | TERM MORE 255 255 | 1076 | TERM MORE 255 255 |
967 | This will nearly stop automatic screen updates, however it will | 1077 | This will nearly stop automatic screen updates, however it will |
968 | cause a denial of service if lots of messages go to the 3270 console, | 1078 | cause a denial of service if lots of messages go to the 3270 console, |
969 | so it would be foolish to use this as the default on a production machine. | 1079 | so it would be foolish to use this as the default on a production machine. |
970 | 1080 | ||
971 | 1081 | ||
972 | Tracing particular processes | 1082 | Tracing particular processes |
973 | ---------------------------- | 1083 | ---------------------------- |
@@ -976,69 +1086,116 @@ very seldom collide with text segments of user programs ( thanks Martin ), | |||
976 | this simplifies debugging the kernel. | 1086 | this simplifies debugging the kernel. |
977 | However it is quite common for user processes to have addresses which collide | 1087 | However it is quite common for user processes to have addresses which collide |
978 | this can make debugging a particular process under VM painful under normal | 1088 | this can make debugging a particular process under VM painful under normal |
979 | circumstances as the process may change when doing a | 1089 | circumstances as the process may change when doing a:: |
980 | TR I R <address range>. | 1090 | |
1091 | TR I R <address range>. | ||
1092 | |||
981 | Thankfully after reading VM's online help I figured out how to debug | 1093 | Thankfully after reading VM's online help I figured out how to debug |
982 | I particular process. | 1094 | I particular process. |
983 | 1095 | ||
984 | Your first problem is to find the STD ( segment table designation ) | 1096 | Your first problem is to find the STD ( segment table designation ) |
985 | of the program you wish to debug. | 1097 | of the program you wish to debug. |
986 | There are several ways you can do this here are a few | 1098 | There are several ways you can do this here are a few |
987 | 1) objdump --syms <program to be debugged> | grep main | 1099 | |
988 | To get the address of main in the program. | 1100 | Run:: |
989 | tr i pswa <address of main> | 1101 | |
1102 | objdump --syms <program to be debugged> | grep main | ||
1103 | |||
1104 | To get the address of main in the program. Then:: | ||
1105 | |||
1106 | tr i pswa <address of main> | ||
1107 | |||
990 | Start the program, if VM drops to CP on what looks like the entry | 1108 | Start the program, if VM drops to CP on what looks like the entry |
991 | point of the main function this is most likely the process you wish to debug. | 1109 | point of the main function this is most likely the process you wish to debug. |
992 | Now do a D X13 or D XG13 on z/Architecture. | 1110 | Now do a D X13 or D XG13 on z/Architecture. |
993 | On 31 bit the STD is bits 1-19 ( the STO segment table origin ) | 1111 | |
1112 | On 31 bit the STD is bits 1-19 ( the STO segment table origin ) | ||
994 | & 25-31 ( the STL segment table length ) of CR13. | 1113 | & 25-31 ( the STL segment table length ) of CR13. |
995 | now type | 1114 | |
996 | TR I R STD <CR13's value> 0.7fffffff | 1115 | now type:: |
997 | e.g. | 1116 | |
998 | TR I R STD 8F32E1FF 0.7fffffff | 1117 | TR I R STD <CR13's value> 0.7fffffff |
999 | Another very useful variation is | 1118 | |
1000 | TR STORE INTO STD <CR13's value> <address range> | 1119 | e.g.:: |
1120 | |||
1121 | TR I R STD 8F32E1FF 0.7fffffff | ||
1122 | |||
1123 | Another very useful variation is:: | ||
1124 | |||
1125 | TR STORE INTO STD <CR13's value> <address range> | ||
1126 | |||
1001 | for finding out when a particular variable changes. | 1127 | for finding out when a particular variable changes. |
1002 | 1128 | ||
1003 | An alternative way of finding the STD of a currently running process | 1129 | An alternative way of finding the STD of a currently running process |
1004 | is to do the following, ( this method is more complex but | 1130 | is to do the following, ( this method is more complex but |
1005 | could be quite convenient if you aren't updating the kernel much & | 1131 | could be quite convenient if you aren't updating the kernel much & |
1006 | so your kernel structures will stay constant for a reasonable period of | 1132 | so your kernel structures will stay constant for a reasonable period of |
1007 | time ). | 1133 | time ). |
1008 | 1134 | ||
1009 | grep task /proc/<pid>/status | 1135 | :: |
1010 | from this you should see something like | 1136 | |
1011 | task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68 | 1137 | grep task /proc/<pid>/status |
1138 | |||
1139 | from this you should see something like:: | ||
1140 | |||
1141 | task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68 | ||
1142 | |||
1012 | This now gives you a pointer to the task structure. | 1143 | This now gives you a pointer to the task structure. |
1013 | Now make CC:="s390-gcc -g" kernel/sched.s | 1144 | |
1145 | Now make:: | ||
1146 | |||
1147 | CC:="s390-gcc -g" kernel/sched.s | ||
1148 | |||
1014 | To get the task_struct stabinfo. | 1149 | To get the task_struct stabinfo. |
1150 | |||
1015 | ( task_struct is defined in include/linux/sched.h ). | 1151 | ( task_struct is defined in include/linux/sched.h ). |
1152 | |||
1016 | Now we want to look at | 1153 | Now we want to look at |
1017 | task->active_mm->pgd | 1154 | task->active_mm->pgd |
1155 | |||
1018 | on my machine the active_mm in the task structure stab is | 1156 | on my machine the active_mm in the task structure stab is |
1019 | active_mm:(4,12),672,32 | 1157 | active_mm:(4,12),672,32 |
1158 | |||
1020 | its offset is 672/8=84=0x54 | 1159 | its offset is 672/8=84=0x54 |
1160 | |||
1021 | the pgd member in the mm_struct stab is | 1161 | the pgd member in the mm_struct stab is |
1022 | pgd:(4,6)=*(29,5),96,32 | 1162 | pgd:(4,6)=*(29,5),96,32 |
1023 | so its offset is 96/8=12=0xc | 1163 | so its offset is 96/8=12=0xc |
1024 | 1164 | ||
1025 | so we'll | 1165 | so we'll:: |
1026 | hexdump -s 0xf160054 /dev/mem | more | 1166 | |
1167 | hexdump -s 0xf160054 /dev/mem | more | ||
1168 | |||
1027 | i.e. task_struct+active_mm offset | 1169 | i.e. task_struct+active_mm offset |
1028 | to look at the active_mm member | 1170 | to look at the active_mm member:: |
1029 | f160054 0fee cc60 0019 e334 0000 0000 0000 0011 | 1171 | |
1030 | hexdump -s 0x0feecc6c /dev/mem | more | 1172 | f160054 0fee cc60 0019 e334 0000 0000 0000 0011 |
1031 | i.e. active_mm+pgd offset | 1173 | |
1032 | feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010 | 1174 | :: |
1175 | |||
1176 | hexdump -s 0x0feecc6c /dev/mem | more | ||
1177 | |||
1178 | i.e. active_mm+pgd offset:: | ||
1179 | |||
1180 | feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010 | ||
1181 | |||
1033 | we get something like | 1182 | we get something like |
1034 | now do | 1183 | now do:: |
1035 | TR I R STD <pgd|0x7f> 0.7fffffff | 1184 | |
1185 | TR I R STD <pgd|0x7f> 0.7fffffff | ||
1186 | |||
1036 | i.e. the 0x7f is added because the pgd only | 1187 | i.e. the 0x7f is added because the pgd only |
1037 | gives the page table origin & we need to set the low bits | 1188 | gives the page table origin & we need to set the low bits |
1038 | to the maximum possible segment table length. | 1189 | to the maximum possible segment table length. |
1039 | TR I R STD 0f2c007f 0.7fffffff | 1190 | |
1040 | on z/Architecture you'll probably need to do | 1191 | :: |
1041 | TR I R STD <pgd|0x7> 0.ffffffffffffffff | 1192 | |
1193 | TR I R STD 0f2c007f 0.7fffffff | ||
1194 | |||
1195 | on z/Architecture you'll probably need to do:: | ||
1196 | |||
1197 | TR I R STD <pgd|0x7> 0.ffffffffffffffff | ||
1198 | |||
1042 | to set the TableType to 0x1 & the Table length to 3. | 1199 | to set the TableType to 0x1 & the Table length to 3. |
1043 | 1200 | ||
1044 | 1201 | ||
@@ -1051,40 +1208,51 @@ You can restart linux & trace these using the tr prog <range or value> trace | |||
1051 | option. | 1208 | option. |
1052 | 1209 | ||
1053 | 1210 | ||
1054 | The most common ones you will normally be tracing for is | 1211 | The most common ones you will normally be tracing for is: |
1055 | 1=operation exception | 1212 | |
1056 | 2=privileged operation exception | 1213 | - 1=operation exception |
1057 | 4=protection exception | 1214 | - 2=privileged operation exception |
1058 | 5=addressing exception | 1215 | - 4=protection exception |
1059 | 6=specification exception | 1216 | - 5=addressing exception |
1060 | 10=segment translation exception | 1217 | - 6=specification exception |
1061 | 11=page translation exception | 1218 | - 10=segment translation exception |
1219 | - 11=page translation exception | ||
1062 | 1220 | ||
1063 | The full list of these is on page 22 of the current s/390 Reference Summary. | 1221 | The full list of these is on page 22 of the current s/390 Reference Summary. |
1064 | e.g. | 1222 | e.g. |
1223 | |||
1065 | tr prog 10 will trace segment translation exceptions. | 1224 | tr prog 10 will trace segment translation exceptions. |
1225 | |||
1066 | tr prog on its own will trace all program interruption codes. | 1226 | tr prog on its own will trace all program interruption codes. |
1067 | 1227 | ||
1068 | Trace Sets | 1228 | Trace Sets |
1069 | ---------- | 1229 | ---------- |
1070 | On starting VM you are initially in the INITIAL trace set. | 1230 | On starting VM you are initially in the INITIAL trace set. |
1071 | You can do a Q TR to verify this. | 1231 | You can do a Q TR to verify this. |
1072 | If you have a complex tracing situation where you wish to wait for instance | 1232 | If you have a complex tracing situation where you wish to wait for instance |
1073 | till a driver is open before you start tracing IO, but know in your | 1233 | till a driver is open before you start tracing IO, but know in your |
1074 | heart that you are going to have to make several runs through the code till you | 1234 | heart that you are going to have to make several runs through the code till you |
1075 | have a clue whats going on. | 1235 | have a clue whats going on. |
1236 | |||
1237 | What you can do is:: | ||
1238 | |||
1239 | TR I PSWA <Driver open address> | ||
1076 | 1240 | ||
1077 | What you can do is | ||
1078 | TR I PSWA <Driver open address> | ||
1079 | hit b to continue till breakpoint | 1241 | hit b to continue till breakpoint |
1242 | |||
1080 | reach the breakpoint | 1243 | reach the breakpoint |
1081 | now do your | 1244 | |
1082 | TR GOTO B | 1245 | now do your:: |
1083 | TR IO 7c08-7c09 inst int run | 1246 | |
1247 | TR GOTO B | ||
1248 | TR IO 7c08-7c09 inst int run | ||
1249 | |||
1084 | or whatever the IO channels you wish to trace are & hit b | 1250 | or whatever the IO channels you wish to trace are & hit b |
1085 | 1251 | ||
1086 | To got back to the initial trace set do | 1252 | To got back to the initial trace set do:: |
1087 | TR GOTO INITIAL | 1253 | |
1254 | TR GOTO INITIAL | ||
1255 | |||
1088 | & the TR I PSWA <Driver open address> will be the only active breakpoint again. | 1256 | & the TR I PSWA <Driver open address> will be the only active breakpoint again. |
1089 | 1257 | ||
1090 | 1258 | ||
@@ -1093,11 +1261,14 @@ Tracing linux syscalls under VM | |||
1093 | Syscalls are implemented on Linux for S390 by the Supervisor call instruction | 1261 | Syscalls are implemented on Linux for S390 by the Supervisor call instruction |
1094 | (SVC). There 256 possibilities of these as the instruction is made up of a 0xA | 1262 | (SVC). There 256 possibilities of these as the instruction is made up of a 0xA |
1095 | opcode and the second byte being the syscall number. They are traced using the | 1263 | opcode and the second byte being the syscall number. They are traced using the |
1096 | simple command: | 1264 | simple command:: |
1097 | TR SVC <Optional value or range> | 1265 | |
1266 | TR SVC <Optional value or range> | ||
1267 | |||
1098 | the syscalls are defined in linux/arch/s390/include/asm/unistd.h | 1268 | the syscalls are defined in linux/arch/s390/include/asm/unistd.h |
1099 | e.g. to trace all file opens just do | 1269 | e.g. to trace all file opens just do:: |
1100 | TR SVC 5 ( as this is the syscall number of open ) | 1270 | |
1271 | TR SVC 5 ( as this is the syscall number of open ) | ||
1101 | 1272 | ||
1102 | 1273 | ||
1103 | SMP Specific commands | 1274 | SMP Specific commands |
@@ -1105,33 +1276,51 @@ SMP Specific commands | |||
1105 | To find out how many cpus you have | 1276 | To find out how many cpus you have |
1106 | Q CPUS displays all the CPU's available to your virtual machine | 1277 | Q CPUS displays all the CPU's available to your virtual machine |
1107 | To find the cpu that the current cpu VM debugger commands are being directed at | 1278 | To find the cpu that the current cpu VM debugger commands are being directed at |
1108 | do Q CPU to change the current cpu VM debugger commands are being directed at do | 1279 | do Q CPU to change the current cpu VM debugger commands are being directed at |
1109 | CPU <desired cpu no> | 1280 | do:: |
1281 | |||
1282 | CPU <desired cpu no> | ||
1110 | 1283 | ||
1111 | On a SMP guest issue a command to all CPUs try prefixing the command with cpu | 1284 | On a SMP guest issue a command to all CPUs try prefixing the command with cpu |
1112 | all. To issue a command to a particular cpu try cpu <cpu number> e.g. | 1285 | all. To issue a command to a particular cpu try cpu <cpu number> e.g.:: |
1113 | CPU 01 TR I R 2000.3000 | 1286 | |
1287 | CPU 01 TR I R 2000.3000 | ||
1288 | |||
1114 | If you are running on a guest with several cpus & you have a IO related problem | 1289 | If you are running on a guest with several cpus & you have a IO related problem |
1115 | & cannot follow the flow of code but you know it isn't smp related. | 1290 | & cannot follow the flow of code but you know it isn't smp related. |
1116 | from the bash prompt issue | 1291 | |
1117 | shutdown -h now or halt. | 1292 | from the bash prompt issue:: |
1118 | do a Q CPUS to find out how many cpus you have | 1293 | |
1119 | detach each one of them from cp except cpu 0 | 1294 | shutdown -h now or halt. |
1120 | by issuing a | 1295 | |
1121 | DETACH CPU 01-(number of cpus in configuration) | 1296 | do a:: |
1297 | |||
1298 | Q CPUS | ||
1299 | |||
1300 | to find out how many cpus you have detach each one of them from cp except | ||
1301 | cpu 0 by issuing a:: | ||
1302 | |||
1303 | DETACH CPU 01-(number of cpus in configuration) | ||
1304 | |||
1122 | & boot linux again. | 1305 | & boot linux again. |
1123 | TR SIGP will trace inter processor signal processor instructions. | 1306 | |
1124 | DEFINE CPU 01-(number in configuration) | 1307 | TR SIGP |
1125 | will get your guests cpus back. | 1308 | will trace inter processor signal processor instructions. |
1309 | |||
1310 | DEFINE CPU 01-(number in configuration) | ||
1311 | will get your guests cpus back. | ||
1126 | 1312 | ||
1127 | 1313 | ||
1128 | Help for displaying ascii textstrings | 1314 | Help for displaying ascii textstrings |
1129 | ------------------------------------- | 1315 | ------------------------------------- |
1130 | On the very latest VM Nucleus'es VM can now display ascii | 1316 | On the very latest VM Nucleus'es VM can now display ascii |
1131 | ( thanks Neale for the hint ) by doing | 1317 | ( thanks Neale for the hint ) by doing:: |
1132 | D TX<lowaddr>.<len> | 1318 | |
1133 | e.g. | 1319 | D TX<lowaddr>.<len> |
1134 | D TX0.100 | 1320 | |
1321 | e.g.:: | ||
1322 | |||
1323 | D TX0.100 | ||
1135 | 1324 | ||
1136 | Alternatively | 1325 | Alternatively |
1137 | ============= | 1326 | ============= |
@@ -1143,66 +1332,85 @@ to your xterm if you are debugging from a linuxbox. | |||
1143 | This is quite useful when looking at a parameter passed in as a text string | 1332 | This is quite useful when looking at a parameter passed in as a text string |
1144 | under VM ( unless you are good at decoding ASCII in your head ). | 1333 | under VM ( unless you are good at decoding ASCII in your head ). |
1145 | 1334 | ||
1146 | e.g. consider tracing an open syscall | 1335 | e.g. consider tracing an open syscall:: |
1147 | TR SVC 5 | 1336 | |
1148 | We have stopped at a breakpoint | 1337 | TR SVC 5 |
1149 | 000151B0' SVC 0A05 -> 0001909A' CC 0 | 1338 | |
1339 | We have stopped at a breakpoint:: | ||
1340 | |||
1341 | 000151B0' SVC 0A05 -> 0001909A' CC 0 | ||
1150 | 1342 | ||
1151 | D 20.8 to check the SVC old psw in the prefix area and see was it from userspace | 1343 | D 20.8 to check the SVC old psw in the prefix area and see was it from userspace |
1152 | (for the layout of the prefix area consult the "Fixed Storage Locations" | 1344 | (for the layout of the prefix area consult the "Fixed Storage Locations" |
1153 | chapter of the s/390 Reference Summary if you have it available). | 1345 | chapter of the s/390 Reference Summary if you have it available). |
1154 | V00000020 070C2000 800151B2 | 1346 | |
1347 | :: | ||
1348 | |||
1349 | V00000020 070C2000 800151B2 | ||
1350 | |||
1155 | The problem state bit wasn't set & it's also too early in the boot sequence | 1351 | The problem state bit wasn't set & it's also too early in the boot sequence |
1156 | for it to be a userspace SVC if it was we would have to temporarily switch the | 1352 | for it to be a userspace SVC if it was we would have to temporarily switch the |
1157 | psw to user space addressing so we could get at the first parameter of the open | 1353 | psw to user space addressing so we could get at the first parameter of the open |
1158 | in gpr2. | 1354 | in gpr2. |
1159 | Next do a | 1355 | |
1160 | D G2 | 1356 | Next do a:: |
1161 | GPR 2 = 00014CB4 | 1357 | |
1162 | Now display what gpr2 is pointing to | 1358 | D G2 |
1163 | D 00014CB4.20 | 1359 | GPR 2 = 00014CB4 |
1164 | V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5 | 1360 | |
1165 | V00014CC4 FC00014C B4001001 E0001000 B8070707 | 1361 | Now display what gpr2 is pointing to:: |
1362 | |||
1363 | D 00014CB4.20 | ||
1364 | V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5 | ||
1365 | V00014CC4 FC00014C B4001001 E0001000 B8070707 | ||
1366 | |||
1166 | Now copy the text till the first 00 hex ( which is the end of the string | 1367 | Now copy the text till the first 00 hex ( which is the end of the string |
1167 | to an xterm & do hex2ascii on it. | 1368 | to an xterm & do hex2ascii on it:: |
1168 | hex2ascii 2F646576 2F636F6E 736F6C65 00 | 1369 | |
1169 | outputs | 1370 | hex2ascii 2F646576 2F636F6E 736F6C65 00 |
1170 | Decoded Hex:=/ d e v / c o n s o l e 0x00 | 1371 | |
1372 | outputs:: | ||
1373 | |||
1374 | Decoded Hex:=/ d e v / c o n s o l e 0x00 | ||
1375 | |||
1171 | We were opening the console device, | 1376 | We were opening the console device, |
1172 | 1377 | ||
1173 | You can compile the code below yourself for practice :-), | 1378 | You can compile the code below yourself for practice :-), |
1174 | /* | 1379 | |
1175 | * hex2ascii.c | 1380 | :: |
1176 | * a useful little tool for converting a hexadecimal command line to ascii | 1381 | |
1177 | * | 1382 | /* |
1178 | * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) | 1383 | * hex2ascii.c |
1179 | * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation. | 1384 | * a useful little tool for converting a hexadecimal command line to ascii |
1180 | */ | 1385 | * |
1181 | #include <stdio.h> | 1386 | * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) |
1182 | 1387 | * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation. | |
1183 | int main(int argc,char *argv[]) | 1388 | */ |
1184 | { | 1389 | #include <stdio.h> |
1185 | int cnt1,cnt2,len,toggle=0; | 1390 | |
1186 | int startcnt=1; | 1391 | int main(int argc,char *argv[]) |
1187 | unsigned char c,hex; | ||
1188 | |||
1189 | if(argc>1&&(strcmp(argv[1],"-a")==0)) | ||
1190 | startcnt=2; | ||
1191 | printf("Decoded Hex:="); | ||
1192 | for(cnt1=startcnt;cnt1<argc;cnt1++) | ||
1193 | { | 1392 | { |
1194 | len=strlen(argv[cnt1]); | 1393 | int cnt1,cnt2,len,toggle=0; |
1195 | for(cnt2=0;cnt2<len;cnt2++) | 1394 | int startcnt=1; |
1395 | unsigned char c,hex; | ||
1396 | |||
1397 | if(argc>1&&(strcmp(argv[1],"-a")==0)) | ||
1398 | startcnt=2; | ||
1399 | printf("Decoded Hex:="); | ||
1400 | for(cnt1=startcnt;cnt1<argc;cnt1++) | ||
1196 | { | 1401 | { |
1197 | c=argv[cnt1][cnt2]; | 1402 | len=strlen(argv[cnt1]); |
1198 | if(c>='0'&&c<='9') | 1403 | for(cnt2=0;cnt2<len;cnt2++) |
1404 | { | ||
1405 | c=argv[cnt1][cnt2]; | ||
1406 | if(c>='0'&&c<='9') | ||
1199 | c=c-'0'; | 1407 | c=c-'0'; |
1200 | if(c>='A'&&c<='F') | 1408 | if(c>='A'&&c<='F') |
1201 | c=c-'A'+10; | 1409 | c=c-'A'+10; |
1202 | if(c>='a'&&c<='f') | 1410 | if(c>='a'&&c<='f') |
1203 | c=c-'a'+10; | 1411 | c=c-'a'+10; |
1204 | switch(toggle) | 1412 | switch(toggle) |
1205 | { | 1413 | { |
1206 | case 0: | 1414 | case 0: |
1207 | hex=c<<4; | 1415 | hex=c<<4; |
1208 | toggle=1; | 1416 | toggle=1; |
@@ -1224,11 +1432,11 @@ int main(int argc,char *argv[]) | |||
1224 | } | 1432 | } |
1225 | toggle=0; | 1433 | toggle=0; |
1226 | break; | 1434 | break; |
1227 | } | 1435 | } |
1436 | } | ||
1228 | } | 1437 | } |
1438 | printf("\n"); | ||
1229 | } | 1439 | } |
1230 | printf("\n"); | ||
1231 | } | ||
1232 | 1440 | ||
1233 | 1441 | ||
1234 | 1442 | ||
@@ -1248,48 +1456,58 @@ should be able to sniff further back if you follow the following tricks. | |||
1248 | 1) A kernel address should be easy to recognise since it is in | 1456 | 1) A kernel address should be easy to recognise since it is in |
1249 | primary space & the problem state bit isn't set & also | 1457 | primary space & the problem state bit isn't set & also |
1250 | The Hi bit of the address is set. | 1458 | The Hi bit of the address is set. |
1251 | 2) Another backchain should also be easy to recognise since it is an | 1459 | 2) Another backchain should also be easy to recognise since it is an |
1252 | address pointing to another address approximately 100 bytes or 0x70 hex | 1460 | address pointing to another address approximately 100 bytes or 0x70 hex |
1253 | behind the current stackpointer. | 1461 | behind the current stackpointer. |
1254 | 1462 | ||
1255 | 1463 | ||
1256 | Here is some practice. | 1464 | Here is some practice. |
1465 | |||
1257 | boot the kernel & hit PA1 at some random time | 1466 | boot the kernel & hit PA1 at some random time |
1258 | d g to display the gprs, this should display something like | 1467 | |
1259 | GPR 0 = 00000001 00156018 0014359C 00000000 | 1468 | d g to display the gprs, this should display something like:: |
1260 | GPR 4 = 00000001 001B8888 000003E0 00000000 | 1469 | |
1261 | GPR 8 = 00100080 00100084 00000000 000FE000 | 1470 | GPR 0 = 00000001 00156018 0014359C 00000000 |
1262 | GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8 | 1471 | GPR 4 = 00000001 001B8888 000003E0 00000000 |
1472 | GPR 8 = 00100080 00100084 00000000 000FE000 | ||
1473 | GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8 | ||
1474 | |||
1263 | Note that GPR14 is a return address but as we are real men we are going to | 1475 | Note that GPR14 is a return address but as we are real men we are going to |
1264 | trace the stack. | 1476 | trace the stack. |
1265 | display 0x40 bytes after the stack pointer. | 1477 | display 0x40 bytes after the stack pointer:: |
1266 | 1478 | ||
1267 | V000FFED8 000FFF38 8001B838 80014C8E 000FFF38 | 1479 | V000FFED8 000FFF38 8001B838 80014C8E 000FFF38 |
1268 | V000FFEE8 00000000 00000000 000003E0 00000000 | 1480 | V000FFEE8 00000000 00000000 000003E0 00000000 |
1269 | V000FFEF8 00100080 00100084 00000000 000FE000 | 1481 | V000FFEF8 00100080 00100084 00000000 000FE000 |
1270 | V000FFF08 00010400 8001B2DC 8001B36A 000FFED8 | 1482 | V000FFF08 00010400 8001B2DC 8001B36A 000FFED8 |
1271 | 1483 | ||
1272 | 1484 | ||
1273 | Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if | 1485 | Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if |
1274 | you look above at our stackframe & also agrees with GPR14. | 1486 | you look above at our stackframe & also agrees with GPR14. |
1275 | 1487 | ||
1276 | now backchain | 1488 | now backchain:: |
1277 | d 000FFF38.40 | 1489 | |
1278 | we now are taking the contents of SP to get our first backchain. | 1490 | d 000FFF38.40 |
1279 | 1491 | ||
1280 | V000FFF38 000FFFA0 00000000 00014995 00147094 | 1492 | we now are taking the contents of SP to get our first backchain:: |
1281 | V000FFF48 00147090 001470A0 000003E0 00000000 | 1493 | |
1282 | V000FFF58 00100080 00100084 00000000 001BF1D0 | 1494 | V000FFF38 000FFFA0 00000000 00014995 00147094 |
1283 | V000FFF68 00010400 800149BA 80014CA6 000FFF38 | 1495 | V000FFF48 00147090 001470A0 000003E0 00000000 |
1496 | V000FFF58 00100080 00100084 00000000 001BF1D0 | ||
1497 | V000FFF68 00010400 800149BA 80014CA6 000FFF38 | ||
1284 | 1498 | ||
1285 | This displays a 2nd return address of 80014CA6 | 1499 | This displays a 2nd return address of 80014CA6 |
1286 | 1500 | ||
1287 | now do d 000FFFA0.40 for our 3rd backchain | 1501 | now do:: |
1502 | |||
1503 | d 000FFFA0.40 | ||
1504 | |||
1505 | for our 3rd backchain:: | ||
1288 | 1506 | ||
1289 | V000FFFA0 04B52002 0001107F 00000000 00000000 | 1507 | V000FFFA0 04B52002 0001107F 00000000 00000000 |
1290 | V000FFFB0 00000000 00000000 FF000000 0001107F | 1508 | V000FFFB0 00000000 00000000 FF000000 0001107F |
1291 | V000FFFC0 00000000 00000000 00000000 00000000 | 1509 | V000FFFC0 00000000 00000000 00000000 00000000 |
1292 | V000FFFD0 00010400 80010802 8001085A 000FFFA0 | 1510 | V000FFFD0 00010400 80010802 8001085A 000FFFA0 |
1293 | 1511 | ||
1294 | 1512 | ||
1295 | our 3rd return address is 8001085A | 1513 | our 3rd return address is 8001085A |
@@ -1297,23 +1515,35 @@ our 3rd return address is 8001085A | |||
1297 | as the 04B52002 looks suspiciously like rubbish it is fair to assume that the | 1515 | as the 04B52002 looks suspiciously like rubbish it is fair to assume that the |
1298 | kernel entry routines for the sake of optimisation don't set up a backchain. | 1516 | kernel entry routines for the sake of optimisation don't set up a backchain. |
1299 | 1517 | ||
1300 | now look at System.map to see if the addresses make any sense. | 1518 | now look at System.map to see if the addresses make any sense:: |
1519 | |||
1520 | grep -i 0001b3 System.map | ||
1521 | |||
1522 | outputs among other things:: | ||
1523 | |||
1524 | 0001b304 T cpu_idle | ||
1301 | 1525 | ||
1302 | grep -i 0001b3 System.map | ||
1303 | outputs among other things | ||
1304 | 0001b304 T cpu_idle | ||
1305 | so 8001B36A | 1526 | so 8001B36A |
1306 | is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it ) | 1527 | is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it ) |
1307 | 1528 | ||
1529 | :: | ||
1530 | |||
1531 | grep -i 00014 System.map | ||
1532 | |||
1533 | produces among other things:: | ||
1534 | |||
1535 | 00014a78 T start_kernel | ||
1308 | 1536 | ||
1309 | grep -i 00014 System.map | ||
1310 | produces among other things | ||
1311 | 00014a78 T start_kernel | ||
1312 | so 0014CA6 is start_kernel+some hex number I can't add in my head. | 1537 | so 0014CA6 is start_kernel+some hex number I can't add in my head. |
1313 | 1538 | ||
1314 | grep -i 00108 System.map | 1539 | :: |
1315 | this produces | 1540 | |
1316 | 00010800 T _stext | 1541 | grep -i 00108 System.map |
1542 | |||
1543 | this produces:: | ||
1544 | |||
1545 | 00010800 T _stext | ||
1546 | |||
1317 | so 8001085A is _stext+0x5a | 1547 | so 8001085A is _stext+0x5a |
1318 | 1548 | ||
1319 | Congrats you've done your first backchain. | 1549 | Congrats you've done your first backchain. |
@@ -1337,47 +1567,49 @@ system might be choking with around 64. | |||
1337 | Here is some of the common IO terminology: | 1567 | Here is some of the common IO terminology: |
1338 | 1568 | ||
1339 | Subchannel: | 1569 | Subchannel: |
1340 | This is the logical number most IO commands use to talk to an IO device. There | 1570 | This is the logical number most IO commands use to talk to an IO device. There |
1341 | can be up to 0x10000 (65536) of these in a configuration, typically there are a | 1571 | can be up to 0x10000 (65536) of these in a configuration, typically there are a |
1342 | few hundred. Under VM for simplicity they are allocated contiguously, however | 1572 | few hundred. Under VM for simplicity they are allocated contiguously, however |
1343 | on the native hardware they are not. They typically stay consistent between | 1573 | on the native hardware they are not. They typically stay consistent between |
1344 | boots provided no new hardware is inserted or removed. | 1574 | boots provided no new hardware is inserted or removed. |
1345 | Under Linux for s390 we use these as IRQ's and also when issuing an IO command | 1575 | |
1346 | (CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL, | 1576 | Under Linux for s390 we use these as IRQ's and also when issuing an IO command |
1347 | START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID | 1577 | (CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL, |
1348 | of the device we wish to talk to. The most important of these instructions are | 1578 | START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID |
1349 | START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO | 1579 | of the device we wish to talk to. The most important of these instructions are |
1350 | completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have | 1580 | START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO |
1351 | up to 8 channel paths to a device, this offers redundancy if one is not | 1581 | completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have |
1352 | available. | 1582 | up to 8 channel paths to a device, this offers redundancy if one is not |
1583 | available. | ||
1353 | 1584 | ||
1354 | Device Number: | 1585 | Device Number: |
1355 | This number remains static and is closely tied to the hardware. There are 65536 | 1586 | This number remains static and is closely tied to the hardware. There are 65536 |
1356 | of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and | 1587 | of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and |
1357 | another lsb 8 bits. These remain static even if more devices are inserted or | 1588 | another lsb 8 bits. These remain static even if more devices are inserted or |
1358 | removed from the hardware. There is a 1 to 1 mapping between subchannels and | 1589 | removed from the hardware. There is a 1 to 1 mapping between subchannels and |
1359 | device numbers, provided devices aren't inserted or removed. | 1590 | device numbers, provided devices aren't inserted or removed. |
1360 | 1591 | ||
1361 | Channel Control Words: | 1592 | Channel Control Words: |
1362 | CCWs are linked lists of instructions initially pointed to by an operation | 1593 | CCWs are linked lists of instructions initially pointed to by an operation |
1363 | request block (ORB), which is initially given to Start Subchannel (SSCH) | 1594 | request block (ORB), which is initially given to Start Subchannel (SSCH) |
1364 | command along with the subchannel number for the IO subsystem to process | 1595 | command along with the subchannel number for the IO subsystem to process |
1365 | while the CPU continues executing normal code. | 1596 | while the CPU continues executing normal code. |
1366 | CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and | 1597 | CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and |
1367 | Format 1 (31 bit). These are typically used to issue read and write (and many | 1598 | Format 1 (31 bit). These are typically used to issue read and write (and many |
1368 | other) instructions. They consist of a length field and an absolute address | 1599 | other) instructions. They consist of a length field and an absolute address |
1369 | field. | 1600 | field. |
1370 | Each IO typically gets 1 or 2 interrupts, one for channel end (primary status) | 1601 | |
1371 | when the channel is idle, and the second for device end (secondary status). | 1602 | Each IO typically gets 1 or 2 interrupts, one for channel end (primary status) |
1372 | Sometimes you get both concurrently. You check how the IO went on by issuing a | 1603 | when the channel is idle, and the second for device end (secondary status). |
1373 | TEST SUBCHANNEL at each interrupt, from which you receive an Interruption | 1604 | Sometimes you get both concurrently. You check how the IO went on by issuing a |
1374 | response block (IRB). If you get channel and device end status in the IRB | 1605 | TEST SUBCHANNEL at each interrupt, from which you receive an Interruption |
1375 | without channel checks etc. your IO probably went okay. If you didn't you | 1606 | response block (IRB). If you get channel and device end status in the IRB |
1376 | probably need to examine the IRB, extended status word etc. | 1607 | without channel checks etc. your IO probably went okay. If you didn't you |
1377 | If an error occurs, more sophisticated control units have a facility known as | 1608 | probably need to examine the IRB, extended status word etc. |
1378 | concurrent sense. This means that if an error occurs Extended sense information | 1609 | If an error occurs, more sophisticated control units have a facility known as |
1379 | will be presented in the Extended status word in the IRB. If not you have to | 1610 | concurrent sense. This means that if an error occurs Extended sense information |
1380 | issue a subsequent SENSE CCW command after the test subchannel. | 1611 | will be presented in the Extended status word in the IRB. If not you have to |
1612 | issue a subsequent SENSE CCW command after the test subchannel. | ||
1381 | 1613 | ||
1382 | 1614 | ||
1383 | TPI (Test pending interrupt) can also be used for polled IO, but in | 1615 | TPI (Test pending interrupt) can also be used for polled IO, but in |
@@ -1388,58 +1620,62 @@ Store Subchannel and Modify Subchannel can be used to examine and modify | |||
1388 | operating characteristics of a subchannel (e.g. channel paths). | 1620 | operating characteristics of a subchannel (e.g. channel paths). |
1389 | 1621 | ||
1390 | Other IO related Terms: | 1622 | Other IO related Terms: |
1391 | Sysplex: S390's Clustering Technology | 1623 | |
1392 | QDIO: S390's new high speed IO architecture to support devices such as gigabit | 1624 | Sysplex: |
1393 | ethernet, this architecture is also designed to be forward compatible with | 1625 | S390's Clustering Technology |
1394 | upcoming 64 bit machines. | 1626 | QDIO: |
1627 | S390's new high speed IO architecture to support devices such as gigabit | ||
1628 | ethernet, this architecture is also designed to be forward compatible with | ||
1629 | upcoming 64 bit machines. | ||
1395 | 1630 | ||
1396 | 1631 | ||
1397 | General Concepts | 1632 | General Concepts |
1633 | ---------------- | ||
1398 | 1634 | ||
1399 | Input Output Processors (IOP's) are responsible for communicating between | 1635 | Input Output Processors (IOP's) are responsible for communicating between |
1400 | the mainframe CPU's & the channel & relieve the mainframe CPU's from the | 1636 | the mainframe CPU's & the channel & relieve the mainframe CPU's from the |
1401 | burden of communicating with IO devices directly, this allows the CPU's to | 1637 | burden of communicating with IO devices directly, this allows the CPU's to |
1402 | concentrate on data processing. | 1638 | concentrate on data processing. |
1403 | 1639 | ||
1404 | IOP's can use one or more links ( known as channel paths ) to talk to each | 1640 | IOP's can use one or more links ( known as channel paths ) to talk to each |
1405 | IO device. It first checks for path availability & chooses an available one, | 1641 | IO device. It first checks for path availability & chooses an available one, |
1406 | then starts ( & sometimes terminates IO ). | 1642 | then starts ( & sometimes terminates IO ). |
1407 | There are two types of channel path: ESCON & the Parallel IO interface. | 1643 | There are two types of channel path: ESCON & the Parallel IO interface. |
1408 | 1644 | ||
1409 | IO devices are attached to control units, control units provide the | 1645 | IO devices are attached to control units, control units provide the |
1410 | logic to interface the channel paths & channel path IO protocols to | 1646 | logic to interface the channel paths & channel path IO protocols to |
1411 | the IO devices, they can be integrated with the devices or housed separately | 1647 | the IO devices, they can be integrated with the devices or housed separately |
1412 | & often talk to several similar devices ( typical examples would be raid | 1648 | & often talk to several similar devices ( typical examples would be raid |
1413 | controllers or a control unit which connects to 1000 3270 terminals ). | 1649 | controllers or a control unit which connects to 1000 3270 terminals ):: |
1414 | 1650 | ||
1415 | 1651 | ||
1416 | +---------------------------------------------------------------+ | 1652 | +---------------------------------------------------------------+ |
1417 | | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ | | 1653 | | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ | |
1418 | | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | | | 1654 | | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | | |
1419 | | | | | | | | | | | Memory | | Storage | | | 1655 | | | | | | | | | | | Memory | | Storage | | |
1420 | | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ | | 1656 | | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ | |
1421 | |---------------------------------------------------------------+ | 1657 | |---------------------------------------------------------------+ |
1422 | | IOP | IOP | IOP | | 1658 | | IOP | IOP | IOP | |
1423 | |--------------------------------------------------------------- | 1659 | |--------------------------------------------------------------- |
1424 | | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | | 1660 | | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | |
1425 | ---------------------------------------------------------------- | 1661 | ---------------------------------------------------------------- |
1426 | || || | 1662 | || || |
1427 | || Bus & Tag Channel Path || ESCON | 1663 | || Bus & Tag Channel Path || ESCON |
1428 | || ====================== || Channel | 1664 | || ====================== || Channel |
1429 | || || || || Path | 1665 | || || || || Path |
1430 | +----------+ +----------+ +----------+ | 1666 | +----------+ +----------+ +----------+ |
1431 | | | | | | | | 1667 | | | | | | | |
1432 | | CU | | CU | | CU | | 1668 | | CU | | CU | | CU | |
1433 | | | | | | | | 1669 | | | | | | | |
1434 | +----------+ +----------+ +----------+ | 1670 | +----------+ +----------+ +----------+ |
1435 | | | | | | | 1671 | | | | | | |
1436 | +----------+ +----------+ +----------+ +----------+ +----------+ | 1672 | +----------+ +----------+ +----------+ +----------+ +----------+ |
1437 | |I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device| | 1673 | |I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device| |
1438 | +----------+ +----------+ +----------+ +----------+ +----------+ | 1674 | +----------+ +----------+ +----------+ +----------+ +----------+ |
1439 | CPU = Central Processing Unit | 1675 | CPU = Central Processing Unit |
1440 | C = Channel | 1676 | C = Channel |
1441 | IOP = IP Processor | 1677 | IOP = IP Processor |
1442 | CU = Control Unit | 1678 | CU = Control Unit |
1443 | 1679 | ||
1444 | The 390 IO systems come in 2 flavours the current 390 machines support both | 1680 | The 390 IO systems come in 2 flavours the current 390 machines support both |
1445 | 1681 | ||
@@ -1447,7 +1683,7 @@ The Older 360 & 370 Interface,sometimes called the Parallel I/O interface, | |||
1447 | sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers | 1683 | sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers |
1448 | Interface (OEMI). | 1684 | Interface (OEMI). |
1449 | 1685 | ||
1450 | This byte wide Parallel channel path/bus has parity & data on the "Bus" cable | 1686 | This byte wide Parallel channel path/bus has parity & data on the "Bus" cable |
1451 | and control lines on the "Tag" cable. These can operate in byte multiplex mode | 1687 | and control lines on the "Tag" cable. These can operate in byte multiplex mode |
1452 | for sharing between several slow devices or burst mode and monopolize the | 1688 | for sharing between several slow devices or burst mode and monopolize the |
1453 | channel for the whole burst. Up to 256 devices can be addressed on one of these | 1689 | channel for the whole burst. Up to 256 devices can be addressed on one of these |
@@ -1459,13 +1695,13 @@ support only transfer rates of 3.0, 2.0 & 1.0 MB/sec. | |||
1459 | One of these paths can be daisy chained to up to 8 control units. | 1695 | One of these paths can be daisy chained to up to 8 control units. |
1460 | 1696 | ||
1461 | 1697 | ||
1462 | ESCON if fibre optic it is also called FICON | 1698 | ESCON if fibre optic it is also called FICON |
1463 | Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or | 1699 | Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or |
1464 | lasers for communication at a signaling rate of up to 200 megabits/sec. As | 1700 | lasers for communication at a signaling rate of up to 200 megabits/sec. As |
1465 | 10bits are transferred for every 8 bits info this drops to 160 megabits/sec | 1701 | 10bits are transferred for every 8 bits info this drops to 160 megabits/sec |
1466 | and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only | 1702 | and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only |
1467 | operates in burst mode. | 1703 | operates in burst mode. |
1468 | 1704 | ||
1469 | ESCONs typical max cable length is 3km for the led version and 20km for the | 1705 | ESCONs typical max cable length is 3km for the led version and 20km for the |
1470 | laser version known as XDF (extended distance facility). This can be further | 1706 | laser version known as XDF (extended distance facility). This can be further |
1471 | extended by using an ESCON director which triples the above mentioned ranges. | 1707 | extended by using an ESCON director which triples the above mentioned ranges. |
@@ -1489,31 +1725,29 @@ Debugging IO on s/390 & z/Architecture under VM | |||
1489 | 1725 | ||
1490 | Now we are ready to go on with IO tracing commands under VM | 1726 | Now we are ready to go on with IO tracing commands under VM |
1491 | 1727 | ||
1492 | A few self explanatory queries: | 1728 | A few self explanatory queries:: |
1493 | Q OSA | ||
1494 | Q CTC | ||
1495 | Q DISK ( This command is CMS specific ) | ||
1496 | Q DASD | ||
1497 | |||
1498 | |||
1499 | 1729 | ||
1730 | Q OSA | ||
1731 | Q CTC | ||
1732 | Q DISK ( This command is CMS specific ) | ||
1733 | Q DASD | ||
1500 | 1734 | ||
1735 | Q OSA on my machine returns:: | ||
1501 | 1736 | ||
1502 | 1737 | OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000 | |
1503 | Q OSA on my machine returns | 1738 | OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001 |
1504 | OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000 | 1739 | OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002 |
1505 | OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001 | 1740 | OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003 |
1506 | OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002 | ||
1507 | OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003 | ||
1508 | 1741 | ||
1509 | If you have a guest with certain privileges you may be able to see devices | 1742 | If you have a guest with certain privileges you may be able to see devices |
1510 | which don't belong to you. To avoid this, add the option V. | 1743 | which don't belong to you. To avoid this, add the option V. |
1511 | e.g. | 1744 | e.g.:: |
1512 | Q V OSA | 1745 | |
1746 | Q V OSA | ||
1513 | 1747 | ||
1514 | Now using the device numbers returned by this command we will | 1748 | Now using the device numbers returned by this command we will |
1515 | Trace the io starting up on the first device 7c08 & 7c09 | 1749 | Trace the io starting up on the first device 7c08 & 7c09 |
1516 | In our simplest case we can trace the | 1750 | In our simplest case we can trace the |
1517 | start subchannels | 1751 | start subchannels |
1518 | like TR SSCH 7C08-7C09 | 1752 | like TR SSCH 7C08-7C09 |
1519 | or the halt subchannels | 1753 | or the halt subchannels |
@@ -1524,34 +1758,47 @@ A good trick is tracing all the IO's and CCWS and spooling them into the reader | |||
1524 | of another VM guest so he can ftp the logfile back to his own machine. I'll do | 1758 | of another VM guest so he can ftp the logfile back to his own machine. I'll do |
1525 | a small bit of this and give you a look at the output. | 1759 | a small bit of this and give you a look at the output. |
1526 | 1760 | ||
1527 | 1) Spool stdout to VM reader | 1761 | 1) Spool stdout to VM reader:: |
1528 | SP PRT TO (another vm guest ) or * for the local vm guest | 1762 | |
1529 | 2) Fill the reader with the trace | 1763 | SP PRT TO (another vm guest ) or * for the local vm guest |
1530 | TR IO 7c08-7c09 INST INT CCW PRT RUN | 1764 | |
1531 | 3) Start up linux | 1765 | 2) Fill the reader with the trace:: |
1532 | i 00c | 1766 | |
1533 | 4) Finish the trace | 1767 | TR IO 7c08-7c09 INST INT CCW PRT RUN |
1534 | TR END | 1768 | |
1535 | 5) close the reader | 1769 | 3) Start up linux:: |
1536 | C PRT | 1770 | |
1537 | 6) list reader contents | 1771 | i 00c |
1538 | RDRLIST | 1772 | 4) Finish the trace:: |
1539 | 7) copy it to linux4's minidisk | 1773 | |
1540 | RECEIVE / LOG TXT A1 ( replace | 1774 | TR END |
1775 | |||
1776 | 5) close the reader:: | ||
1777 | |||
1778 | C PRT | ||
1779 | |||
1780 | 6) list reader contents:: | ||
1781 | |||
1782 | RDRLIST | ||
1783 | |||
1784 | 7) copy it to linux4's minidisk:: | ||
1785 | |||
1786 | RECEIVE / LOG TXT A1 ( replace | ||
1787 | |||
1541 | 8) | 1788 | 8) |
1542 | filel & press F11 to look at it | 1789 | filel & press F11 to look at it |
1543 | You should see something like: | 1790 | You should see something like:: |
1544 | 1791 | ||
1545 | 00020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08 | 1792 | 00020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08 |
1546 | CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80 | 1793 | CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80 |
1547 | CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........ | 1794 | CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........ |
1548 | IDAL 43D8AFE8 | 1795 | IDAL 43D8AFE8 |
1549 | IDAL 0FB76000 | 1796 | IDAL 0FB76000 |
1550 | 00020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4 | 1797 | 00020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4 |
1551 | 00021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08 | 1798 | 00021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08 |
1552 | CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC | 1799 | CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC |
1553 | KEY 0 FPI C0 CC 0 CTLS 4007 | 1800 | KEY 0 FPI C0 CC 0 CTLS 4007 |
1554 | 00022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08 | 1801 | 00022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08 |
1555 | 1802 | ||
1556 | If you don't like messing up your readed ( because you possibly booted from it ) | 1803 | If you don't like messing up your readed ( because you possibly booted from it ) |
1557 | you can alternatively spool it to another readers guest. | 1804 | you can alternatively spool it to another readers guest. |
@@ -1563,43 +1810,58 @@ These commands are listed only because they have | |||
1563 | been of use to me in the past & may be of use to | 1810 | been of use to me in the past & may be of use to |
1564 | you too. For more complete info on each of the commands | 1811 | you too. For more complete info on each of the commands |
1565 | use type HELP <command> from CMS. | 1812 | use type HELP <command> from CMS. |
1566 | detaching devices | 1813 | |
1567 | DET <devno range> | 1814 | detaching devices:: |
1568 | ATT <devno range> <guest> | 1815 | |
1816 | DET <devno range> | ||
1817 | ATT <devno range> <guest> | ||
1818 | |||
1569 | attach a device to guest * for your own guest | 1819 | attach a device to guest * for your own guest |
1570 | READY <devno> cause VM to issue a fake interrupt. | ||
1571 | 1820 | ||
1572 | The VARY command is normally only available to VM administrators. | 1821 | READY <devno> |
1573 | VARY ON PATH <path> TO <devno range> | 1822 | cause VM to issue a fake interrupt. |
1574 | VARY OFF PATH <PATH> FROM <devno range> | 1823 | |
1824 | The VARY command is normally only available to VM administrators:: | ||
1825 | |||
1826 | VARY ON PATH <path> TO <devno range> | ||
1827 | VARY OFF PATH <PATH> FROM <devno range> | ||
1828 | |||
1575 | This is used to switch on or off channel paths to devices. | 1829 | This is used to switch on or off channel paths to devices. |
1576 | 1830 | ||
1577 | Q CHPID <channel path ID> | 1831 | Q CHPID <channel path ID> |
1578 | This displays state of devices using this channel path | 1832 | This displays state of devices using this channel path |
1833 | |||
1579 | D SCHIB <subchannel> | 1834 | D SCHIB <subchannel> |
1580 | This displays the subchannel information SCHIB block for the device. | 1835 | This displays the subchannel information SCHIB block for the device. |
1581 | this I believe is also only available to administrators. | 1836 | this I believe is also only available to administrators. |
1837 | |||
1582 | DEFINE CTC <devno> | 1838 | DEFINE CTC <devno> |
1583 | defines a virtual CTC channel to channel connection | 1839 | defines a virtual CTC channel to channel connection |
1584 | 2 need to be defined on each guest for the CTC driver to use. | 1840 | 2 need to be defined on each guest for the CTC driver to use. |
1841 | |||
1585 | COUPLE devno userid remote devno | 1842 | COUPLE devno userid remote devno |
1586 | Joins a local virtual device to a remote virtual device | 1843 | Joins a local virtual device to a remote virtual device |
1587 | ( commonly used for the CTC driver ). | 1844 | ( commonly used for the CTC driver ). |
1845 | |||
1846 | Building a VM ramdisk under CMS which linux can use:: | ||
1847 | |||
1848 | def vfb-<blocksize> <subchannel> <number blocks> | ||
1588 | 1849 | ||
1589 | Building a VM ramdisk under CMS which linux can use | ||
1590 | def vfb-<blocksize> <subchannel> <number blocks> | ||
1591 | blocksize is commonly 4096 for linux. | 1850 | blocksize is commonly 4096 for linux. |
1592 | Formatting it | ||
1593 | format <subchannel> <driver letter e.g. x> (blksize <blocksize> | ||
1594 | 1851 | ||
1595 | Sharing a disk between multiple guests | 1852 | Formatting it:: |
1596 | LINK userid devno1 devno2 mode password | 1853 | |
1854 | format <subchannel> <driver letter e.g. x> (blksize <blocksize> | ||
1855 | |||
1856 | Sharing a disk between multiple guests:: | ||
1857 | |||
1858 | LINK userid devno1 devno2 mode password | ||
1597 | 1859 | ||
1598 | 1860 | ||
1599 | 1861 | ||
1600 | GDB on S390 | 1862 | GDB on S390 |
1601 | =========== | 1863 | =========== |
1602 | N.B. if compiling for debugging gdb works better without optimisation | 1864 | N.B. if compiling for debugging gdb works better without optimisation |
1603 | ( see Compiling programs for debugging ) | 1865 | ( see Compiling programs for debugging ) |
1604 | 1866 | ||
1605 | invocation | 1867 | invocation |
@@ -1609,113 +1871,169 @@ gdb <victim program> <optional corefile> | |||
1609 | Online help | 1871 | Online help |
1610 | ----------- | 1872 | ----------- |
1611 | help: gives help on commands | 1873 | help: gives help on commands |
1612 | e.g. | 1874 | |
1613 | help | 1875 | e.g.:: |
1614 | help display | 1876 | |
1877 | help | ||
1878 | help display | ||
1879 | |||
1615 | Note gdb's online help is very good use it. | 1880 | Note gdb's online help is very good use it. |
1616 | 1881 | ||
1617 | 1882 | ||
1618 | Assembly | 1883 | Assembly |
1619 | -------- | 1884 | -------- |
1620 | info registers: displays registers other than floating point. | 1885 | info registers: |
1621 | info all-registers: displays floating points as well. | 1886 | displays registers other than floating point. |
1622 | disassemble: disassembles | 1887 | |
1623 | e.g. | 1888 | info all-registers: |
1624 | disassemble without parameters will disassemble the current function | 1889 | displays floating points as well. |
1625 | disassemble $pc $pc+10 | 1890 | |
1891 | disassemble: | ||
1892 | disassembles | ||
1893 | |||
1894 | e.g.:: | ||
1895 | |||
1896 | disassemble without parameters will disassemble the current function | ||
1897 | disassemble $pc $pc+10 | ||
1626 | 1898 | ||
1627 | Viewing & modifying variables | 1899 | Viewing & modifying variables |
1628 | ----------------------------- | 1900 | ----------------------------- |
1629 | print or p: displays variable or register | 1901 | print or p: |
1902 | displays variable or register | ||
1903 | |||
1630 | e.g. p/x $sp will display the stack pointer | 1904 | e.g. p/x $sp will display the stack pointer |
1631 | 1905 | ||
1632 | display: prints variable or register each time program stops | 1906 | display: |
1633 | e.g. | 1907 | prints variable or register each time program stops |
1634 | display/x $pc will display the program counter | 1908 | |
1635 | display argc | 1909 | e.g.:: |
1910 | |||
1911 | display/x $pc will display the program counter | ||
1912 | display argc | ||
1913 | |||
1914 | undisplay: | ||
1915 | undo's display's | ||
1636 | 1916 | ||
1637 | undisplay : undo's display's | 1917 | info breakpoints: |
1918 | shows all current breakpoints | ||
1638 | 1919 | ||
1639 | info breakpoints: shows all current breakpoints | 1920 | info stack: |
1921 | shows stack back trace (if this doesn't work too well, I'll show | ||
1922 | you the stacktrace by hand below). | ||
1640 | 1923 | ||
1641 | info stack: shows stack back trace (if this doesn't work too well, I'll show | 1924 | info locals: |
1642 | you the stacktrace by hand below). | 1925 | displays local variables. |
1643 | 1926 | ||
1644 | info locals: displays local variables. | 1927 | info args: |
1928 | display current procedure arguments. | ||
1645 | 1929 | ||
1646 | info args: display current procedure arguments. | 1930 | set args: |
1931 | will set argc & argv each time the victim program is invoked | ||
1647 | 1932 | ||
1648 | set args: will set argc & argv each time the victim program is invoked. | 1933 | e.g.:: |
1649 | 1934 | ||
1650 | set <variable>=value | 1935 | set <variable>=value |
1651 | set argc=100 | 1936 | set argc=100 |
1652 | set $pc=0 | 1937 | set $pc=0 |
1653 | 1938 | ||
1654 | 1939 | ||
1655 | 1940 | ||
1656 | Modifying execution | 1941 | Modifying execution |
1657 | ------------------- | 1942 | ------------------- |
1658 | step: steps n lines of sourcecode | 1943 | step: |
1659 | step steps 1 line. | 1944 | steps n lines of sourcecode |
1660 | step 100 steps 100 lines of code. | ||
1661 | 1945 | ||
1662 | next: like step except this will not step into subroutines | 1946 | step |
1947 | steps 1 line. | ||
1663 | 1948 | ||
1664 | stepi: steps a single machine code instruction. | 1949 | step 100 |
1665 | e.g. stepi 100 | 1950 | steps 100 lines of code. |
1666 | 1951 | ||
1667 | nexti: steps a single machine code instruction but will not step into | 1952 | next: |
1668 | subroutines. | 1953 | like step except this will not step into subroutines |
1669 | 1954 | ||
1670 | finish: will run until exit of the current routine | 1955 | stepi: |
1956 | steps a single machine code instruction. | ||
1671 | 1957 | ||
1672 | run: (re)starts a program | 1958 | e.g.:: |
1673 | 1959 | ||
1674 | cont: continues a program | 1960 | stepi 100 |
1675 | 1961 | ||
1676 | quit: exits gdb. | 1962 | nexti: |
1963 | steps a single machine code instruction but will not step into | ||
1964 | subroutines. | ||
1965 | |||
1966 | finish: | ||
1967 | will run until exit of the current routine | ||
1968 | |||
1969 | run: | ||
1970 | (re)starts a program | ||
1971 | |||
1972 | cont: | ||
1973 | continues a program | ||
1974 | |||
1975 | quit: | ||
1976 | exits gdb. | ||
1677 | 1977 | ||
1678 | 1978 | ||
1679 | breakpoints | 1979 | breakpoints |
1680 | ------------ | 1980 | ------------ |
1681 | 1981 | ||
1682 | break | 1982 | break |
1683 | sets a breakpoint | 1983 | sets a breakpoint |
1684 | e.g. | ||
1685 | |||
1686 | break main | ||
1687 | 1984 | ||
1688 | break *$pc | 1985 | e.g.:: |
1689 | 1986 | ||
1690 | break *0x400618 | 1987 | break main |
1988 | break *$pc | ||
1989 | break *0x400618 | ||
1691 | 1990 | ||
1692 | Here's a really useful one for large programs | 1991 | Here's a really useful one for large programs |
1992 | |||
1693 | rbr | 1993 | rbr |
1694 | Set a breakpoint for all functions matching REGEXP | 1994 | Set a breakpoint for all functions matching REGEXP |
1695 | e.g. | 1995 | |
1696 | rbr 390 | 1996 | e.g.:: |
1997 | |||
1998 | rbr 390 | ||
1999 | |||
1697 | will set a breakpoint with all functions with 390 in their name. | 2000 | will set a breakpoint with all functions with 390 in their name. |
1698 | 2001 | ||
1699 | info breakpoints | 2002 | info breakpoints |
1700 | lists all breakpoints | 2003 | lists all breakpoints |
2004 | |||
2005 | delete: | ||
2006 | delete breakpoint by number or delete them all | ||
1701 | 2007 | ||
1702 | delete: delete breakpoint by number or delete them all | ||
1703 | e.g. | 2008 | e.g. |
1704 | delete 1 will delete the first breakpoint | ||
1705 | delete will delete them all | ||
1706 | 2009 | ||
1707 | watch: This will set a watchpoint ( usually hardware assisted ), | 2010 | delete 1 |
2011 | will delete the first breakpoint | ||
2012 | |||
2013 | |||
2014 | delete | ||
2015 | will delete them all | ||
2016 | |||
2017 | watch: | ||
2018 | This will set a watchpoint ( usually hardware assisted ), | ||
2019 | |||
1708 | This will watch a variable till it changes | 2020 | This will watch a variable till it changes |
2021 | |||
1709 | e.g. | 2022 | e.g. |
1710 | watch cnt, will watch the variable cnt till it changes. | 2023 | |
2024 | watch cnt | ||
2025 | will watch the variable cnt till it changes. | ||
2026 | |||
1711 | As an aside unfortunately gdb's, architecture independent watchpoint code | 2027 | As an aside unfortunately gdb's, architecture independent watchpoint code |
1712 | is inconsistent & not very good, watchpoints usually work but not always. | 2028 | is inconsistent & not very good, watchpoints usually work but not always. |
1713 | 2029 | ||
1714 | info watchpoints: Display currently active watchpoints | 2030 | info watchpoints: |
2031 | Display currently active watchpoints | ||
1715 | 2032 | ||
1716 | condition: ( another useful one ) | 2033 | condition: ( another useful one ) |
1717 | Specify breakpoint number N to break only if COND is true. | 2034 | Specify breakpoint number N to break only if COND is true. |
1718 | Usage is `condition N COND', where N is an integer and COND is an | 2035 | |
2036 | Usage is `condition N COND`, where N is an integer and COND is an | ||
1719 | expression to be evaluated whenever breakpoint N is reached. | 2037 | expression to be evaluated whenever breakpoint N is reached. |
1720 | 2038 | ||
1721 | 2039 | ||
@@ -1723,41 +2041,51 @@ expression to be evaluated whenever breakpoint N is reached. | |||
1723 | User defined functions/macros | 2041 | User defined functions/macros |
1724 | ----------------------------- | 2042 | ----------------------------- |
1725 | define: ( Note this is very very useful,simple & powerful ) | 2043 | define: ( Note this is very very useful,simple & powerful ) |
2044 | |||
1726 | usage define <name> <list of commands> end | 2045 | usage define <name> <list of commands> end |
1727 | 2046 | ||
1728 | examples which you should consider putting into .gdbinit in your home directory | 2047 | examples which you should consider putting into .gdbinit in your home |
1729 | define d | 2048 | directory:: |
1730 | stepi | ||
1731 | disassemble $pc $pc+10 | ||
1732 | end | ||
1733 | 2049 | ||
1734 | define e | 2050 | define d |
1735 | nexti | 2051 | stepi |
1736 | disassemble $pc $pc+10 | 2052 | disassemble $pc $pc+10 |
1737 | end | 2053 | end |
2054 | define e | ||
2055 | nexti | ||
2056 | disassemble $pc $pc+10 | ||
2057 | end | ||
1738 | 2058 | ||
1739 | 2059 | ||
1740 | Other hard to classify stuff | 2060 | Other hard to classify stuff |
1741 | ---------------------------- | 2061 | ---------------------------- |
1742 | signal n: | 2062 | signal n: |
1743 | sends the victim program a signal. | 2063 | sends the victim program a signal. |
1744 | e.g. signal 3 will send a SIGQUIT. | 2064 | |
2065 | e.g. `signal 3` will send a SIGQUIT. | ||
1745 | 2066 | ||
1746 | info signals: | 2067 | info signals: |
1747 | what gdb does when the victim receives certain signals. | 2068 | what gdb does when the victim receives certain signals. |
1748 | 2069 | ||
1749 | list: | 2070 | list: |
1750 | e.g. | 2071 | |
1751 | list lists current function source | 2072 | e.g.: |
1752 | list 1,10 list first 10 lines of current file. | 2073 | |
2074 | list | ||
2075 | lists current function source | ||
2076 | list 1,10 | ||
2077 | list first 10 lines of current file. | ||
2078 | |||
1753 | list test.c:1,10 | 2079 | list test.c:1,10 |
1754 | 2080 | ||
1755 | 2081 | ||
1756 | directory: | 2082 | directory: |
1757 | Adds directories to be searched for source if gdb cannot find the source. | 2083 | Adds directories to be searched for source if gdb cannot find the source. |
1758 | (note it is a bit sensitive about slashes) | 2084 | (note it is a bit sensitive about slashes) |
1759 | e.g. To add the root of the filesystem to the searchpath do | 2085 | |
1760 | directory // | 2086 | e.g. To add the root of the filesystem to the searchpath do:: |
2087 | |||
2088 | directory // | ||
1761 | 2089 | ||
1762 | 2090 | ||
1763 | call <function> | 2091 | call <function> |
@@ -1765,153 +2093,205 @@ This calls a function in the victim program, this is pretty powerful | |||
1765 | e.g. | 2093 | e.g. |
1766 | (gdb) call printf("hello world") | 2094 | (gdb) call printf("hello world") |
1767 | outputs: | 2095 | outputs: |
1768 | $1 = 11 | 2096 | $1 = 11 |
1769 | 2097 | ||
1770 | You might now be thinking that the line above didn't work, something extra had | 2098 | You might now be thinking that the line above didn't work, something extra had |
1771 | to be done. | 2099 | to be done. |
1772 | (gdb) call fflush(stdout) | 2100 | (gdb) call fflush(stdout) |
1773 | hello world$2 = 0 | 2101 | hello world$2 = 0 |
1774 | As an aside the debugger also calls malloc & free under the hood | 2102 | As an aside the debugger also calls malloc & free under the hood |
1775 | to make space for the "hello world" string. | 2103 | to make space for the "hello world" string. |
1776 | 2104 | ||
1777 | 2105 | ||
1778 | 2106 | ||
1779 | hints | 2107 | hints |
1780 | ----- | 2108 | ----- |
1781 | 1) command completion works just like bash | 2109 | 1) command completion works just like bash |
1782 | ( if you are a bad typist like me this really helps ) | 2110 | ( if you are a bad typist like me this really helps ) |
2111 | |||
1783 | e.g. hit br <TAB> & cursor up & down :-). | 2112 | e.g. hit br <TAB> & cursor up & down :-). |
1784 | 2113 | ||
1785 | 2) if you have a debugging problem that takes a few steps to recreate | 2114 | 2) if you have a debugging problem that takes a few steps to recreate |
1786 | put the steps into a file called .gdbinit in your current working directory | 2115 | put the steps into a file called .gdbinit in your current working directory |
1787 | if you have defined a few extra useful user defined commands put these in | 2116 | if you have defined a few extra useful user defined commands put these in |
1788 | your home directory & they will be read each time gdb is launched. | 2117 | your home directory & they will be read each time gdb is launched. |
1789 | 2118 | ||
1790 | A typical .gdbinit file might be. | 2119 | A typical .gdbinit file might be.:: |
1791 | break main | 2120 | |
1792 | run | 2121 | break main |
1793 | break runtime_exception | 2122 | run |
1794 | cont | 2123 | break runtime_exception |
2124 | cont | ||
1795 | 2125 | ||
1796 | 2126 | ||
1797 | stack chaining in gdb by hand | 2127 | stack chaining in gdb by hand |
1798 | ----------------------------- | 2128 | ----------------------------- |
1799 | This is done using a the same trick described for VM | 2129 | This is done using a the same trick described for VM:: |
1800 | p/x (*($sp+56))&0x7fffffff get the first backchain. | 2130 | |
2131 | p/x (*($sp+56))&0x7fffffff | ||
2132 | |||
2133 | get the first backchain. | ||
1801 | 2134 | ||
1802 | For z/Architecture | 2135 | For z/Architecture |
1803 | Replace 56 with 112 & ignore the &0x7fffffff | 2136 | Replace 56 with 112 & ignore the &0x7fffffff |
1804 | in the macros below & do nasty casts to longs like the following | 2137 | in the macros below & do nasty casts to longs like the following |
1805 | as gdb unfortunately deals with printed arguments as ints which | 2138 | as gdb unfortunately deals with printed arguments as ints which |
1806 | messes up everything. | 2139 | messes up everything. |
1807 | i.e. here is a 3rd backchain dereference | 2140 | |
1808 | p/x *(long *)(***(long ***)$sp+112) | 2141 | i.e. here is a 3rd backchain dereference:: |
2142 | |||
2143 | p/x *(long *)(***(long ***)$sp+112) | ||
1809 | 2144 | ||
1810 | 2145 | ||
1811 | this outputs | 2146 | this outputs:: |
1812 | $5 = 0x528f18 | 2147 | |
2148 | $5 = 0x528f18 | ||
2149 | |||
1813 | on my machine. | 2150 | on my machine. |
1814 | Now you can use | 2151 | |
1815 | info symbol (*($sp+56))&0x7fffffff | 2152 | Now you can use:: |
1816 | you might see something like. | 2153 | |
1817 | rl_getc + 36 in section .text telling you what is located at address 0x528f18 | 2154 | info symbol (*($sp+56))&0x7fffffff |
1818 | Now do. | 2155 | |
1819 | p/x (*(*$sp+56))&0x7fffffff | 2156 | you might see something like:: |
1820 | This outputs | 2157 | |
1821 | $6 = 0x528ed0 | 2158 | rl_getc + 36 in section .text |
1822 | Now do. | 2159 | |
1823 | info symbol (*(*$sp+56))&0x7fffffff | 2160 | telling you what is located at address 0x528f18 |
1824 | rl_read_key + 180 in section .text | 2161 | Now do:: |
1825 | now do | 2162 | |
1826 | p/x (*(**$sp+56))&0x7fffffff | 2163 | p/x (*(*$sp+56))&0x7fffffff |
2164 | |||
2165 | This outputs:: | ||
2166 | |||
2167 | $6 = 0x528ed0 | ||
2168 | |||
2169 | Now do:: | ||
2170 | |||
2171 | info symbol (*(*$sp+56))&0x7fffffff | ||
2172 | rl_read_key + 180 in section .text | ||
2173 | |||
2174 | now do:: | ||
2175 | |||
2176 | p/x (*(**$sp+56))&0x7fffffff | ||
2177 | |||
1827 | & so on. | 2178 | & so on. |
1828 | 2179 | ||
1829 | Disassembling instructions without debug info | 2180 | Disassembling instructions without debug info |
1830 | --------------------------------------------- | 2181 | --------------------------------------------- |
1831 | gdb typically complains if there is a lack of debugging | 2182 | gdb typically complains if there is a lack of debugging |
1832 | symbols in the disassemble command with | 2183 | symbols in the disassemble command with |
1833 | "No function contains specified address." To get around | 2184 | "No function contains specified address." To get around |
1834 | this do | 2185 | this do:: |
1835 | x/<number lines to disassemble>xi <address> | 2186 | |
1836 | e.g. | 2187 | x/<number lines to disassemble>xi <address> |
1837 | x/20xi 0x400730 | 2188 | |
2189 | e.g.:: | ||
1838 | 2190 | ||
2191 | x/20xi 0x400730 | ||
1839 | 2192 | ||
1840 | 2193 | ||
1841 | Note: Remember gdb has history just like bash you don't need to retype the | 2194 | |
1842 | whole line just use the up & down arrows. | 2195 | Note: |
2196 | Remember gdb has history just like bash you don't need to retype the | ||
2197 | whole line just use the up & down arrows. | ||
1843 | 2198 | ||
1844 | 2199 | ||
1845 | 2200 | ||
1846 | For more info | 2201 | For more info |
1847 | ------------- | 2202 | ------------- |
1848 | From your linuxbox do | 2203 | From your linuxbox do:: |
1849 | man gdb or info gdb. | 2204 | |
2205 | man gdb | ||
2206 | |||
2207 | or:: | ||
2208 | |||
2209 | info gdb. | ||
1850 | 2210 | ||
1851 | core dumps | 2211 | core dumps |
1852 | ---------- | 2212 | ---------- |
1853 | What a core dump ?, | 2213 | |
2214 | What a core dump ? | ||
2215 | ^^^^^^^^^^^^^^^^^^ | ||
2216 | |||
1854 | A core dump is a file generated by the kernel (if allowed) which contains the | 2217 | A core dump is a file generated by the kernel (if allowed) which contains the |
1855 | registers and all active pages of the program which has crashed. | 2218 | registers and all active pages of the program which has crashed. |
2219 | |||
1856 | From this file gdb will allow you to look at the registers, stack trace and | 2220 | From this file gdb will allow you to look at the registers, stack trace and |
1857 | memory of the program as if it just crashed on your system. It is usually | 2221 | memory of the program as if it just crashed on your system. It is usually |
1858 | called core and created in the current working directory. | 2222 | called core and created in the current working directory. |
2223 | |||
1859 | This is very useful in that a customer can mail a core dump to a technical | 2224 | This is very useful in that a customer can mail a core dump to a technical |
1860 | support department and the technical support department can reconstruct what | 2225 | support department and the technical support department can reconstruct what |
1861 | happened. Provided they have an identical copy of this program with debugging | 2226 | happened. Provided they have an identical copy of this program with debugging |
1862 | symbols compiled in and the source base of this build is available. | 2227 | symbols compiled in and the source base of this build is available. |
2228 | |||
1863 | In short it is far more useful than something like a crash log could ever hope | 2229 | In short it is far more useful than something like a crash log could ever hope |
1864 | to be. | 2230 | to be. |
1865 | 2231 | ||
1866 | Why have I never seen one ?. | 2232 | Why have I never seen one ? |
1867 | Probably because you haven't used the command | 2233 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
1868 | ulimit -c unlimited in bash | 2234 | |
1869 | to allow core dumps, now do | 2235 | Probably because you haven't used the command:: |
1870 | ulimit -a | 2236 | |
2237 | ulimit -c unlimited in bash | ||
2238 | |||
2239 | to allow core dumps, now do:: | ||
2240 | |||
2241 | ulimit -a | ||
2242 | |||
1871 | to verify that the limit was accepted. | 2243 | to verify that the limit was accepted. |
1872 | 2244 | ||
1873 | A sample core dump | 2245 | A sample core dump |
1874 | To create this I'm going to do | 2246 | To create this I'm going to do:: |
1875 | ulimit -c unlimited | 2247 | |
1876 | gdb | 2248 | ulimit -c unlimited |
1877 | to launch gdb (my victim app. ) now be bad & do the following from another | 2249 | gdb |
1878 | telnet/xterm session to the same machine | 2250 | |
1879 | ps -aux | grep gdb | 2251 | to launch gdb (my victim app. ) now be bad & do the following from another |
1880 | kill -SIGSEGV <gdb's pid> | 2252 | telnet/xterm session to the same machine:: |
1881 | or alternatively use killall -SIGSEGV gdb if you have the killall command. | 2253 | |
1882 | Now look at the core dump. | 2254 | ps -aux | grep gdb |
1883 | ./gdb core | 2255 | kill -SIGSEGV <gdb's pid> |
1884 | Displays the following | 2256 | |
1885 | GNU gdb 4.18 | 2257 | or alternatively use `killall -SIGSEGV gdb` if you have the killall command. |
1886 | Copyright 1998 Free Software Foundation, Inc. | 2258 | |
1887 | GDB is free software, covered by the GNU General Public License, and you are | 2259 | Now look at the core dump:: |
1888 | welcome to change it and/or distribute copies of it under certain conditions. | 2260 | |
1889 | Type "show copying" to see the conditions. | 2261 | ./gdb core |
1890 | There is absolutely no warranty for GDB. Type "show warranty" for details. | 2262 | |
1891 | This GDB was configured as "s390-ibm-linux"... | 2263 | Displays the following:: |
1892 | Core was generated by `./gdb'. | 2264 | |
1893 | Program terminated with signal 11, Segmentation fault. | 2265 | GNU gdb 4.18 |
1894 | Reading symbols from /usr/lib/libncurses.so.4...done. | 2266 | Copyright 1998 Free Software Foundation, Inc. |
1895 | Reading symbols from /lib/libm.so.6...done. | 2267 | GDB is free software, covered by the GNU General Public License, and you are |
1896 | Reading symbols from /lib/libc.so.6...done. | 2268 | welcome to change it and/or distribute copies of it under certain conditions. |
1897 | Reading symbols from /lib/ld-linux.so.2...done. | 2269 | Type "show copying" to see the conditions. |
1898 | #0 0x40126d1a in read () from /lib/libc.so.6 | 2270 | There is absolutely no warranty for GDB. Type "show warranty" for details. |
1899 | Setting up the environment for debugging gdb. | 2271 | This GDB was configured as "s390-ibm-linux"... |
1900 | Breakpoint 1 at 0x4dc6f8: file utils.c, line 471. | 2272 | Core was generated by `./gdb'. |
1901 | Breakpoint 2 at 0x4d87a4: file top.c, line 2609. | 2273 | Program terminated with signal 11, Segmentation fault. |
1902 | (top-gdb) info stack | 2274 | Reading symbols from /usr/lib/libncurses.so.4...done. |
1903 | #0 0x40126d1a in read () from /lib/libc.so.6 | 2275 | Reading symbols from /lib/libm.so.6...done. |
1904 | #1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402 | 2276 | Reading symbols from /lib/libc.so.6...done. |
1905 | #2 0x528ed0 in rl_read_key () at input.c:381 | 2277 | Reading symbols from /lib/ld-linux.so.2...done. |
1906 | #3 0x5167e6 in readline_internal_char () at readline.c:454 | 2278 | #0 0x40126d1a in read () from /lib/libc.so.6 |
1907 | #4 0x5168ee in readline_internal_charloop () at readline.c:507 | 2279 | Setting up the environment for debugging gdb. |
1908 | #5 0x51692c in readline_internal () at readline.c:521 | 2280 | Breakpoint 1 at 0x4dc6f8: file utils.c, line 471. |
1909 | #6 0x5164fe in readline (prompt=0x7ffff810) | 2281 | Breakpoint 2 at 0x4d87a4: file top.c, line 2609. |
1910 | at readline.c:349 | 2282 | (top-gdb) info stack |
1911 | #7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1, | 2283 | #0 0x40126d1a in read () from /lib/libc.so.6 |
1912 | annotation_suffix=0x4d6b44 "prompt") at top.c:2091 | 2284 | #1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402 |
1913 | #8 0x4d6cf0 in command_loop () at top.c:1345 | 2285 | #2 0x528ed0 in rl_read_key () at input.c:381 |
1914 | #9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635 | 2286 | #3 0x5167e6 in readline_internal_char () at readline.c:454 |
2287 | #4 0x5168ee in readline_internal_charloop () at readline.c:507 | ||
2288 | #5 0x51692c in readline_internal () at readline.c:521 | ||
2289 | #6 0x5164fe in readline (prompt=0x7ffff810) | ||
2290 | at readline.c:349 | ||
2291 | #7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1, | ||
2292 | annotation_suffix=0x4d6b44 "prompt") at top.c:2091 | ||
2293 | #8 0x4d6cf0 in command_loop () at top.c:1345 | ||
2294 | #9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635 | ||
1915 | 2295 | ||
1916 | 2296 | ||
1917 | LDD | 2297 | LDD |
@@ -1919,27 +2299,32 @@ LDD | |||
1919 | This is a program which lists the shared libraries which a library needs, | 2299 | This is a program which lists the shared libraries which a library needs, |
1920 | Note you also get the relocations of the shared library text segments which | 2300 | Note you also get the relocations of the shared library text segments which |
1921 | help when using objdump --source. | 2301 | help when using objdump --source. |
1922 | e.g. | 2302 | |
1923 | ldd ./gdb | 2303 | e.g.:: |
1924 | outputs | 2304 | |
1925 | libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000) | 2305 | ldd ./gdb |
1926 | libm.so.6 => /lib/libm.so.6 (0x4005e000) | 2306 | |
1927 | libc.so.6 => /lib/libc.so.6 (0x40084000) | 2307 | outputs:: |
1928 | /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000) | 2308 | |
2309 | libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000) | ||
2310 | libm.so.6 => /lib/libm.so.6 (0x4005e000) | ||
2311 | libc.so.6 => /lib/libc.so.6 (0x40084000) | ||
2312 | /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000) | ||
1929 | 2313 | ||
1930 | 2314 | ||
1931 | Debugging shared libraries | 2315 | Debugging shared libraries |
1932 | ========================== | 2316 | ========================== |
1933 | Most programs use shared libraries, however it can be very painful | 2317 | Most programs use shared libraries, however it can be very painful |
1934 | when you single step instruction into a function like printf for the | 2318 | when you single step instruction into a function like printf for the |
1935 | first time & you end up in functions like _dl_runtime_resolve this is | 2319 | first time & you end up in functions like _dl_runtime_resolve this is |
1936 | the ld.so doing lazy binding, lazy binding is a concept in ELF where | 2320 | the ld.so doing lazy binding, lazy binding is a concept in ELF where |
1937 | shared library functions are not loaded into memory unless they are | 2321 | shared library functions are not loaded into memory unless they are |
1938 | actually used, great for saving memory but a pain to debug. | 2322 | actually used, great for saving memory but a pain to debug. |
1939 | To get around this either relink the program -static or exit gdb type | 2323 | |
1940 | export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing | 2324 | To get around this either relink the program -static or exit gdb type |
2325 | export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing | ||
1941 | the program in question. | 2326 | the program in question. |
1942 | 2327 | ||
1943 | 2328 | ||
1944 | 2329 | ||
1945 | Debugging modules | 2330 | Debugging modules |
@@ -1955,106 +2340,127 @@ It is a filesystem created by the kernel with files which are created on demand | |||
1955 | by the kernel if read, or can be used to modify kernel parameters, | 2340 | by the kernel if read, or can be used to modify kernel parameters, |
1956 | it is a powerful concept. | 2341 | it is a powerful concept. |
1957 | 2342 | ||
1958 | e.g. | 2343 | e.g.:: |
2344 | |||
2345 | cat /proc/sys/net/ipv4/ip_forward | ||
2346 | |||
2347 | On my machine outputs:: | ||
2348 | |||
2349 | 0 | ||
2350 | |||
2351 | telling me ip_forwarding is not on to switch it on I can do:: | ||
2352 | |||
2353 | echo 1 > /proc/sys/net/ipv4/ip_forward | ||
2354 | |||
2355 | cat it again:: | ||
2356 | |||
2357 | cat /proc/sys/net/ipv4/ip_forward | ||
2358 | |||
2359 | On my machine now outputs:: | ||
2360 | |||
2361 | 1 | ||
1959 | 2362 | ||
1960 | cat /proc/sys/net/ipv4/ip_forward | ||
1961 | On my machine outputs | ||
1962 | 0 | ||
1963 | telling me ip_forwarding is not on to switch it on I can do | ||
1964 | echo 1 > /proc/sys/net/ipv4/ip_forward | ||
1965 | cat it again | ||
1966 | cat /proc/sys/net/ipv4/ip_forward | ||
1967 | On my machine now outputs | ||
1968 | 1 | ||
1969 | IP forwarding is on. | 2363 | IP forwarding is on. |
2364 | |||
1970 | There is a lot of useful info in here best found by going in and having a look | 2365 | There is a lot of useful info in here best found by going in and having a look |
1971 | around, so I'll take you through some entries I consider important. | 2366 | around, so I'll take you through some entries I consider important. |
1972 | 2367 | ||
1973 | All the processes running on the machine have their own entry defined by | 2368 | All the processes running on the machine have their own entry defined by |
1974 | /proc/<pid> | 2369 | /proc/<pid> |
1975 | So lets have a look at the init process | ||
1976 | cd /proc/1 | ||
1977 | 2370 | ||
1978 | cat cmdline | 2371 | So lets have a look at the init process:: |
1979 | emits | 2372 | |
1980 | init [2] | 2373 | cd /proc/1 |
2374 | cat cmdline | ||
2375 | |||
2376 | emits:: | ||
2377 | |||
2378 | init [2] | ||
2379 | |||
2380 | :: | ||
2381 | |||
2382 | cd /proc/1/fd | ||
1981 | 2383 | ||
1982 | cd /proc/1/fd | ||
1983 | This contains numerical entries of all the open files, | 2384 | This contains numerical entries of all the open files, |
1984 | some of these you can cat e.g. stdout (2) | 2385 | some of these you can cat e.g. stdout (2):: |
1985 | 2386 | ||
1986 | cat /proc/29/maps | 2387 | cat /proc/29/maps |
1987 | on my machine emits | 2388 | |
1988 | 2389 | on my machine emits:: | |
1989 | 00400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash | 2390 | |
1990 | 00478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash | 2391 | 00400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash |
1991 | 0047e000-00492000 rwxp 00000000 00:00 0 | 2392 | 00478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash |
1992 | 40000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so | 2393 | 0047e000-00492000 rwxp 00000000 00:00 0 |
1993 | 40015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so | 2394 | 40000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so |
1994 | 40016000-40017000 rwxp 00000000 00:00 0 | 2395 | 40015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so |
1995 | 40017000-40018000 rw-p 00000000 00:00 0 | 2396 | 40016000-40017000 rwxp 00000000 00:00 0 |
1996 | 40018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8 | 2397 | 40017000-40018000 rw-p 00000000 00:00 0 |
1997 | 4001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8 | 2398 | 40018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8 |
1998 | 4001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so | 2399 | 4001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8 |
1999 | 4010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so | 2400 | 4001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so |
2000 | 40111000-40114000 rw-p 00000000 00:00 0 | 2401 | 4010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so |
2001 | 40114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so | 2402 | 40111000-40114000 rw-p 00000000 00:00 0 |
2002 | 4011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so | 2403 | 40114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so |
2003 | 7fffd000-80000000 rwxp ffffe000 00:00 0 | 2404 | 4011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so |
2405 | 7fffd000-80000000 rwxp ffffe000 00:00 0 | ||
2004 | 2406 | ||
2005 | 2407 | ||
2006 | Showing us the shared libraries init uses where they are in memory | 2408 | Showing us the shared libraries init uses where they are in memory |
2007 | & memory access permissions for each virtual memory area. | 2409 | & memory access permissions for each virtual memory area. |
2008 | 2410 | ||
2009 | /proc/1/cwd is a softlink to the current working directory. | 2411 | /proc/1/cwd is a softlink to the current working directory. |
2010 | /proc/1/root is the root of the filesystem for this process. | 2412 | |
2413 | /proc/1/root is the root of the filesystem for this process. | ||
2011 | 2414 | ||
2012 | /proc/1/mem is the current running processes memory which you | 2415 | /proc/1/mem is the current running processes memory which you |
2013 | can read & write to like a file. | 2416 | can read & write to like a file. |
2417 | |||
2014 | strace uses this sometimes as it is a bit faster than the | 2418 | strace uses this sometimes as it is a bit faster than the |
2015 | rather inefficient ptrace interface for peeking at DATA. | 2419 | rather inefficient ptrace interface for peeking at DATA. |
2016 | 2420 | ||
2421 | :: | ||
2422 | |||
2423 | cat status | ||
2424 | |||
2425 | Name: init | ||
2426 | State: S (sleeping) | ||
2427 | Pid: 1 | ||
2428 | PPid: 0 | ||
2429 | Uid: 0 0 0 0 | ||
2430 | Gid: 0 0 0 0 | ||
2431 | Groups: | ||
2432 | VmSize: 408 kB | ||
2433 | VmLck: 0 kB | ||
2434 | VmRSS: 208 kB | ||
2435 | VmData: 24 kB | ||
2436 | VmStk: 8 kB | ||
2437 | VmExe: 368 kB | ||
2438 | VmLib: 0 kB | ||
2439 | SigPnd: 0000000000000000 | ||
2440 | SigBlk: 0000000000000000 | ||
2441 | SigIgn: 7fffffffd7f0d8fc | ||
2442 | SigCgt: 00000000280b2603 | ||
2443 | CapInh: 00000000fffffeff | ||
2444 | CapPrm: 00000000ffffffff | ||
2445 | CapEff: 00000000fffffeff | ||
2446 | |||
2447 | User PSW: 070de000 80414146 | ||
2448 | task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68 | ||
2449 | User GPRS: | ||
2450 | 00000400 00000000 0000000b 7ffffa90 | ||
2451 | 00000000 00000000 00000000 0045d9f4 | ||
2452 | 0045cafc 7ffffa90 7fffff18 0045cb08 | ||
2453 | 00010400 804039e8 80403af8 7ffff8b0 | ||
2454 | User ACRS: | ||
2455 | 00000000 00000000 00000000 00000000 | ||
2456 | 00000001 00000000 00000000 00000000 | ||
2457 | 00000000 00000000 00000000 00000000 | ||
2458 | 00000000 00000000 00000000 00000000 | ||
2459 | Kernel BackChain CallChain BackChain CallChain | ||
2460 | 004b7ca8 8002bd0c 004b7d18 8002b92c | ||
2461 | 004b7db8 8005cd50 004b7e38 8005d12a | ||
2462 | 004b7f08 80019114 | ||
2017 | 2463 | ||
2018 | cat status | ||
2019 | |||
2020 | Name: init | ||
2021 | State: S (sleeping) | ||
2022 | Pid: 1 | ||
2023 | PPid: 0 | ||
2024 | Uid: 0 0 0 0 | ||
2025 | Gid: 0 0 0 0 | ||
2026 | Groups: | ||
2027 | VmSize: 408 kB | ||
2028 | VmLck: 0 kB | ||
2029 | VmRSS: 208 kB | ||
2030 | VmData: 24 kB | ||
2031 | VmStk: 8 kB | ||
2032 | VmExe: 368 kB | ||
2033 | VmLib: 0 kB | ||
2034 | SigPnd: 0000000000000000 | ||
2035 | SigBlk: 0000000000000000 | ||
2036 | SigIgn: 7fffffffd7f0d8fc | ||
2037 | SigCgt: 00000000280b2603 | ||
2038 | CapInh: 00000000fffffeff | ||
2039 | CapPrm: 00000000ffffffff | ||
2040 | CapEff: 00000000fffffeff | ||
2041 | |||
2042 | User PSW: 070de000 80414146 | ||
2043 | task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68 | ||
2044 | User GPRS: | ||
2045 | 00000400 00000000 0000000b 7ffffa90 | ||
2046 | 00000000 00000000 00000000 0045d9f4 | ||
2047 | 0045cafc 7ffffa90 7fffff18 0045cb08 | ||
2048 | 00010400 804039e8 80403af8 7ffff8b0 | ||
2049 | User ACRS: | ||
2050 | 00000000 00000000 00000000 00000000 | ||
2051 | 00000001 00000000 00000000 00000000 | ||
2052 | 00000000 00000000 00000000 00000000 | ||
2053 | 00000000 00000000 00000000 00000000 | ||
2054 | Kernel BackChain CallChain BackChain CallChain | ||
2055 | 004b7ca8 8002bd0c 004b7d18 8002b92c | ||
2056 | 004b7db8 8005cd50 004b7e38 8005d12a | ||
2057 | 004b7f08 80019114 | ||
2058 | Showing among other things memory usage & status of some signals & | 2464 | Showing among other things memory usage & status of some signals & |
2059 | the processes'es registers from the kernel task_structure | 2465 | the processes'es registers from the kernel task_structure |
2060 | as well as a backchain which may be useful if a process crashes | 2466 | as well as a backchain which may be useful if a process crashes |
@@ -2067,11 +2473,16 @@ debug feature | |||
2067 | Some of our drivers now support a "debug feature" in | 2473 | Some of our drivers now support a "debug feature" in |
2068 | /proc/s390dbf see s390dbf.txt in the linux/Documentation directory | 2474 | /proc/s390dbf see s390dbf.txt in the linux/Documentation directory |
2069 | for more info. | 2475 | for more info. |
2070 | e.g. | 2476 | |
2071 | to switch on the lcs "debug feature" | 2477 | e.g. |
2072 | echo 5 > /proc/s390dbf/lcs/level | 2478 | to switch on the lcs "debug feature":: |
2073 | & then after the error occurred. | 2479 | |
2074 | cat /proc/s390dbf/lcs/sprintf >/logfile | 2480 | echo 5 > /proc/s390dbf/lcs/level |
2481 | |||
2482 | & then after the error occurred:: | ||
2483 | |||
2484 | cat /proc/s390dbf/lcs/sprintf >/logfile | ||
2485 | |||
2075 | the logfile now contains some information which may help | 2486 | the logfile now contains some information which may help |
2076 | tech support resolve a problem in the field. | 2487 | tech support resolve a problem in the field. |
2077 | 2488 | ||
@@ -2083,35 +2494,50 @@ ifconfig is a quite useful command | |||
2083 | it gives the current state of network drivers. | 2494 | it gives the current state of network drivers. |
2084 | 2495 | ||
2085 | If you suspect your network device driver is dead | 2496 | If you suspect your network device driver is dead |
2086 | one way to check is type | 2497 | one way to check is type:: |
2087 | ifconfig <network device> | 2498 | |
2499 | ifconfig <network device> | ||
2500 | |||
2088 | e.g. tr0 | 2501 | e.g. tr0 |
2089 | You should see something like | 2502 | |
2090 | tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48 | 2503 | You should see something like:: |
2091 | inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0 | 2504 | |
2092 | UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1 | 2505 | ifconfig tr0 |
2093 | RX packets:246134 errors:0 dropped:0 overruns:0 frame:0 | 2506 | tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48 |
2094 | TX packets:5 errors:0 dropped:0 overruns:0 carrier:0 | 2507 | inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0 |
2095 | collisions:0 txqueuelen:100 | 2508 | UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1 |
2509 | RX packets:246134 errors:0 dropped:0 overruns:0 frame:0 | ||
2510 | TX packets:5 errors:0 dropped:0 overruns:0 carrier:0 | ||
2511 | collisions:0 txqueuelen:100 | ||
2096 | 2512 | ||
2097 | if the device doesn't say up | 2513 | if the device doesn't say up |
2098 | try | 2514 | try:: |
2099 | /etc/rc.d/init.d/network start | 2515 | |
2516 | /etc/rc.d/init.d/network start | ||
2517 | |||
2100 | ( this starts the network stack & hopefully calls ifconfig tr0 up ). | 2518 | ( this starts the network stack & hopefully calls ifconfig tr0 up ). |
2101 | ifconfig looks at the output of /proc/net/dev and presents it in a more | 2519 | ifconfig looks at the output of /proc/net/dev and presents it in a more |
2102 | presentable form. | 2520 | presentable form. |
2521 | |||
2103 | Now ping the device from a machine in the same subnet. | 2522 | Now ping the device from a machine in the same subnet. |
2523 | |||
2104 | if the RX packets count & TX packets counts don't increment you probably | 2524 | if the RX packets count & TX packets counts don't increment you probably |
2105 | have problems. | 2525 | have problems. |
2106 | next | 2526 | |
2107 | cat /proc/net/arp | 2527 | next:: |
2528 | |||
2529 | cat /proc/net/arp | ||
2530 | |||
2108 | Do you see any hardware addresses in the cache if not you may have problems. | 2531 | Do you see any hardware addresses in the cache if not you may have problems. |
2109 | Next try | 2532 | Next try:: |
2110 | ping -c 5 <broadcast_addr> i.e. the Bcast field above in the output of | 2533 | |
2534 | ping -c 5 <broadcast_addr> | ||
2535 | |||
2536 | i.e. the Bcast field above in the output of | ||
2111 | ifconfig. Do you see any replies from machines other than the local machine | 2537 | ifconfig. Do you see any replies from machines other than the local machine |
2112 | if not you may have problems. also if the TX packets count in ifconfig | 2538 | if not you may have problems. also if the TX packets count in ifconfig |
2113 | hasn't incremented either you have serious problems in your driver | 2539 | hasn't incremented either you have serious problems in your driver |
2114 | (e.g. the txbusy field of the network device being stuck on ) | 2540 | (e.g. the txbusy field of the network device being stuck on ) |
2115 | or you may have multiple network devices connected. | 2541 | or you may have multiple network devices connected. |
2116 | 2542 | ||
2117 | 2543 | ||
@@ -2119,28 +2545,43 @@ chandev | |||
2119 | ------- | 2545 | ------- |
2120 | There is a new device layer for channel devices, some | 2546 | There is a new device layer for channel devices, some |
2121 | drivers e.g. lcs are registered with this layer. | 2547 | drivers e.g. lcs are registered with this layer. |
2548 | |||
2122 | If the device uses the channel device layer you'll be | 2549 | If the device uses the channel device layer you'll be |
2123 | able to find what interrupts it uses & the current state | 2550 | able to find what interrupts it uses & the current state |
2124 | of the device. | 2551 | of the device. |
2552 | |||
2125 | See the manpage chandev.8 &type cat /proc/chandev for more info. | 2553 | See the manpage chandev.8 &type cat /proc/chandev for more info. |
2126 | 2554 | ||
2127 | 2555 | ||
2128 | SysRq | 2556 | SysRq |
2129 | ===== | 2557 | ===== |
2130 | This is now supported by linux for s/390 & z/Architecture. | 2558 | This is now supported by linux for s/390 & z/Architecture. |
2131 | To enable it do compile the kernel with | 2559 | |
2132 | Kernel Hacking -> Magic SysRq Key Enabled | 2560 | To enable it do compile the kernel with:: |
2133 | echo "1" > /proc/sys/kernel/sysrq | 2561 | |
2134 | also type | 2562 | Kernel Hacking -> Magic SysRq Key Enabled |
2135 | echo "8" >/proc/sys/kernel/printk | 2563 | |
2564 | Then:: | ||
2565 | |||
2566 | echo "1" > /proc/sys/kernel/sysrq | ||
2567 | |||
2568 | also type:: | ||
2569 | |||
2570 | echo "8" >/proc/sys/kernel/printk | ||
2571 | |||
2136 | To make printk output go to console. | 2572 | To make printk output go to console. |
2137 | On 390 all commands are prefixed with | 2573 | |
2138 | ^- | 2574 | On 390 all commands are prefixed with:: |
2139 | e.g. | 2575 | |
2140 | ^-t will show tasks. | 2576 | ^- |
2141 | ^-? or some unknown command will display help. | 2577 | |
2578 | e.g.:: | ||
2579 | |||
2580 | ^-t will show tasks. | ||
2581 | ^-? or some unknown command will display help. | ||
2582 | |||
2142 | The sysrq key reading is very picky ( I have to type the keys in an | 2583 | The sysrq key reading is very picky ( I have to type the keys in an |
2143 | xterm session & paste them into the x3270 console ) | 2584 | xterm session & paste them into the x3270 console ) |
2144 | & it may be wise to predefine the keys as described in the VM hints above | 2585 | & it may be wise to predefine the keys as described in the VM hints above |
2145 | 2586 | ||
2146 | This is particularly useful for syncing disks unmounting & rebooting | 2587 | This is particularly useful for syncing disks unmounting & rebooting |
@@ -2150,19 +2591,19 @@ Read Documentation/admin-guide/sysrq.rst for more info | |||
2150 | 2591 | ||
2151 | References: | 2592 | References: |
2152 | =========== | 2593 | =========== |
2153 | Enterprise Systems Architecture Reference Summary | 2594 | - Enterprise Systems Architecture Reference Summary |
2154 | Enterprise Systems Architecture Principles of Operation | 2595 | - Enterprise Systems Architecture Principles of Operation |
2155 | Hartmut Penners s390 stack frame sheet. | 2596 | - Hartmut Penners s390 stack frame sheet. |
2156 | IBM Mainframe Channel Attachment a technology brief from a CISCO webpage | 2597 | - IBM Mainframe Channel Attachment a technology brief from a CISCO webpage |
2157 | Various bits of man & info pages of Linux. | 2598 | - Various bits of man & info pages of Linux. |
2158 | Linux & GDB source. | 2599 | - Linux & GDB source. |
2159 | Various info & man pages. | 2600 | - Various info & man pages. |
2160 | CMS Help on tracing commands. | 2601 | - CMS Help on tracing commands. |
2161 | Linux for s/390 Elf Application Binary Interface | 2602 | - Linux for s/390 Elf Application Binary Interface |
2162 | Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended ) | 2603 | - Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended ) |
2163 | z/Architecture Principles of Operation SA22-7832-00 | 2604 | - z/Architecture Principles of Operation SA22-7832-00 |
2164 | Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the | 2605 | - Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the |
2165 | Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05 | 2606 | - Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05 |
2166 | 2607 | ||
2167 | Special Thanks | 2608 | Special Thanks |
2168 | ============== | 2609 | ============== |
diff --git a/Documentation/s390/driver-model.txt b/Documentation/s390/driver-model.rst index ed265cf54cde..ad4bc2dbea43 100644 --- a/Documentation/s390/driver-model.txt +++ b/Documentation/s390/driver-model.rst | |||
@@ -1,5 +1,6 @@ | |||
1 | ============================= | ||
1 | S/390 driver model interfaces | 2 | S/390 driver model interfaces |
2 | ----------------------------- | 3 | ============================= |
3 | 4 | ||
4 | 1. CCW devices | 5 | 1. CCW devices |
5 | -------------- | 6 | -------------- |
@@ -7,13 +8,13 @@ S/390 driver model interfaces | |||
7 | All devices which can be addressed by means of ccws are called 'CCW devices' - | 8 | All devices which can be addressed by means of ccws are called 'CCW devices' - |
8 | even if they aren't actually driven by ccws. | 9 | even if they aren't actually driven by ccws. |
9 | 10 | ||
10 | All ccw devices are accessed via a subchannel, this is reflected in the | 11 | All ccw devices are accessed via a subchannel, this is reflected in the |
11 | structures under devices/: | 12 | structures under devices/:: |
12 | 13 | ||
13 | devices/ | 14 | devices/ |
14 | - system/ | 15 | - system/ |
15 | - css0/ | 16 | - css0/ |
16 | - 0.0.0000/0.0.0815/ | 17 | - 0.0.0000/0.0.0815/ |
17 | - 0.0.0001/0.0.4711/ | 18 | - 0.0.0001/0.0.4711/ |
18 | - 0.0.0002/ | 19 | - 0.0.0002/ |
19 | - 0.1.0000/0.1.1234/ | 20 | - 0.1.0000/0.1.1234/ |
@@ -35,14 +36,18 @@ be found under bus/ccw/devices/. | |||
35 | 36 | ||
36 | All ccw devices export some data via sysfs. | 37 | All ccw devices export some data via sysfs. |
37 | 38 | ||
38 | cutype: The control unit type / model. | 39 | cutype: |
40 | The control unit type / model. | ||
39 | 41 | ||
40 | devtype: The device type / model, if applicable. | 42 | devtype: |
43 | The device type / model, if applicable. | ||
41 | 44 | ||
42 | availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for | 45 | availability: |
46 | Can be 'good' or 'boxed'; 'no path' or 'no device' for | ||
43 | disconnected devices. | 47 | disconnected devices. |
44 | 48 | ||
45 | online: An interface to set the device online and offline. | 49 | online: |
50 | An interface to set the device online and offline. | ||
46 | In the special case of the device being disconnected (see the | 51 | In the special case of the device being disconnected (see the |
47 | notify function under 1.2), piping 0 to online will forcibly delete | 52 | notify function under 1.2), piping 0 to online will forcibly delete |
48 | the device. | 53 | the device. |
@@ -52,9 +57,11 @@ The device drivers can add entries to export per-device data and interfaces. | |||
52 | There is also some data exported on a per-subchannel basis (see under | 57 | There is also some data exported on a per-subchannel basis (see under |
53 | bus/css/devices/): | 58 | bus/css/devices/): |
54 | 59 | ||
55 | chpids: Via which chpids the device is connected. | 60 | chpids: |
61 | Via which chpids the device is connected. | ||
56 | 62 | ||
57 | pimpampom: The path installed, path available and path operational masks. | 63 | pimpampom: |
64 | The path installed, path available and path operational masks. | ||
58 | 65 | ||
59 | There also might be additional data, for example for block devices. | 66 | There also might be additional data, for example for block devices. |
60 | 67 | ||
@@ -74,77 +81,93 @@ b. After a. has been performed, if necessary, the device is finally brought up | |||
74 | ------------------------------------ | 81 | ------------------------------------ |
75 | 82 | ||
76 | The basic struct ccw_device and struct ccw_driver data structures can be found | 83 | The basic struct ccw_device and struct ccw_driver data structures can be found |
77 | under include/asm/ccwdev.h. | 84 | under include/asm/ccwdev.h:: |
78 | 85 | ||
79 | struct ccw_device { | 86 | struct ccw_device { |
80 | spinlock_t *ccwlock; | 87 | spinlock_t *ccwlock; |
81 | struct ccw_device_private *private; | 88 | struct ccw_device_private *private; |
82 | struct ccw_device_id id; | 89 | struct ccw_device_id id; |
83 | 90 | ||
84 | struct ccw_driver *drv; | 91 | struct ccw_driver *drv; |
85 | struct device dev; | 92 | struct device dev; |
86 | int online; | 93 | int online; |
87 | 94 | ||
88 | void (*handler) (struct ccw_device *dev, unsigned long intparm, | 95 | void (*handler) (struct ccw_device *dev, unsigned long intparm, |
89 | struct irb *irb); | 96 | struct irb *irb); |
90 | }; | 97 | }; |
91 | 98 | ||
92 | struct ccw_driver { | 99 | struct ccw_driver { |
93 | struct module *owner; | 100 | struct module *owner; |
94 | struct ccw_device_id *ids; | 101 | struct ccw_device_id *ids; |
95 | int (*probe) (struct ccw_device *); | 102 | int (*probe) (struct ccw_device *); |
96 | int (*remove) (struct ccw_device *); | 103 | int (*remove) (struct ccw_device *); |
97 | int (*set_online) (struct ccw_device *); | 104 | int (*set_online) (struct ccw_device *); |
98 | int (*set_offline) (struct ccw_device *); | 105 | int (*set_offline) (struct ccw_device *); |
99 | int (*notify) (struct ccw_device *, int); | 106 | int (*notify) (struct ccw_device *, int); |
100 | struct device_driver driver; | 107 | struct device_driver driver; |
101 | char *name; | 108 | char *name; |
102 | }; | 109 | }; |
103 | 110 | ||
104 | The 'private' field contains data needed for internal i/o operation only, and | 111 | The 'private' field contains data needed for internal i/o operation only, and |
105 | is not available to the device driver. | 112 | is not available to the device driver. |
106 | 113 | ||
107 | Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models | 114 | Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models |
108 | and/or device types/models it is interested. This information can later be found | 115 | and/or device types/models it is interested. This information can later be found |
109 | in the struct ccw_device_id fields: | 116 | in the struct ccw_device_id fields:: |
110 | 117 | ||
111 | struct ccw_device_id { | 118 | struct ccw_device_id { |
112 | __u16 match_flags; | 119 | __u16 match_flags; |
113 | 120 | ||
114 | __u16 cu_type; | 121 | __u16 cu_type; |
115 | __u16 dev_type; | 122 | __u16 dev_type; |
116 | __u8 cu_model; | 123 | __u8 cu_model; |
117 | __u8 dev_model; | 124 | __u8 dev_model; |
118 | 125 | ||
119 | unsigned long driver_info; | 126 | unsigned long driver_info; |
120 | }; | 127 | }; |
121 | 128 | ||
122 | The functions in ccw_driver should be used in the following way: | 129 | The functions in ccw_driver should be used in the following way: |
123 | probe: This function is called by the device layer for each device the driver | 130 | |
131 | probe: | ||
132 | This function is called by the device layer for each device the driver | ||
124 | is interested in. The driver should only allocate private structures | 133 | is interested in. The driver should only allocate private structures |
125 | to put in dev->driver_data and create attributes (if needed). Also, | 134 | to put in dev->driver_data and create attributes (if needed). Also, |
126 | the interrupt handler (see below) should be set here. | 135 | the interrupt handler (see below) should be set here. |
127 | 136 | ||
128 | int (*probe) (struct ccw_device *cdev); | 137 | :: |
138 | |||
139 | int (*probe) (struct ccw_device *cdev); | ||
129 | 140 | ||
130 | Parameters: cdev - the device to be probed. | 141 | Parameters: |
142 | cdev | ||
143 | - the device to be probed. | ||
131 | 144 | ||
132 | 145 | ||
133 | remove: This function is called by the device layer upon removal of the driver, | 146 | remove: |
147 | This function is called by the device layer upon removal of the driver, | ||
134 | the device or the module. The driver should perform cleanups here. | 148 | the device or the module. The driver should perform cleanups here. |
135 | 149 | ||
136 | int (*remove) (struct ccw_device *cdev); | 150 | :: |
137 | 151 | ||
138 | Parameters: cdev - the device to be removed. | 152 | int (*remove) (struct ccw_device *cdev); |
139 | 153 | ||
154 | Parameters: | ||
155 | cdev | ||
156 | - the device to be removed. | ||
140 | 157 | ||
141 | set_online: This function is called by the common I/O layer when the device is | 158 | |
159 | set_online: | ||
160 | This function is called by the common I/O layer when the device is | ||
142 | activated via the 'online' attribute. The driver should finally | 161 | activated via the 'online' attribute. The driver should finally |
143 | setup and activate the device here. | 162 | setup and activate the device here. |
144 | 163 | ||
145 | int (*set_online) (struct ccw_device *); | 164 | :: |
165 | |||
166 | int (*set_online) (struct ccw_device *); | ||
146 | 167 | ||
147 | Parameters: cdev - the device to be activated. The common layer has | 168 | Parameters: |
169 | cdev | ||
170 | - the device to be activated. The common layer has | ||
148 | verified that the device is not already online. | 171 | verified that the device is not already online. |
149 | 172 | ||
150 | 173 | ||
@@ -152,15 +175,22 @@ set_offline: This function is called by the common I/O layer when the device is | |||
152 | de-activated via the 'online' attribute. The driver should shut | 175 | de-activated via the 'online' attribute. The driver should shut |
153 | down the device, but not de-allocate its private data. | 176 | down the device, but not de-allocate its private data. |
154 | 177 | ||
155 | int (*set_offline) (struct ccw_device *); | 178 | :: |
156 | 179 | ||
157 | Parameters: cdev - the device to be deactivated. The common layer has | 180 | int (*set_offline) (struct ccw_device *); |
181 | |||
182 | Parameters: | ||
183 | cdev | ||
184 | - the device to be deactivated. The common layer has | ||
158 | verified that the device is online. | 185 | verified that the device is online. |
159 | 186 | ||
160 | 187 | ||
161 | notify: This function is called by the common I/O layer for some state changes | 188 | notify: |
189 | This function is called by the common I/O layer for some state changes | ||
162 | of the device. | 190 | of the device. |
191 | |||
163 | Signalled to the driver are: | 192 | Signalled to the driver are: |
193 | |||
164 | * In online state, device detached (CIO_GONE) or last path gone | 194 | * In online state, device detached (CIO_GONE) or last path gone |
165 | (CIO_NO_PATH). The driver must return !0 to keep the device; for | 195 | (CIO_NO_PATH). The driver must return !0 to keep the device; for |
166 | return code 0, the device will be deleted as usual (also when no | 196 | return code 0, the device will be deleted as usual (also when no |
@@ -173,32 +203,40 @@ notify: This function is called by the common I/O layer for some state changes | |||
173 | return code of the notify function the device driver signals if it | 203 | return code of the notify function the device driver signals if it |
174 | wants the device back: !0 for keeping, 0 to make the device being | 204 | wants the device back: !0 for keeping, 0 to make the device being |
175 | removed and re-registered. | 205 | removed and re-registered. |
176 | |||
177 | int (*notify) (struct ccw_device *, int); | ||
178 | 206 | ||
179 | Parameters: cdev - the device whose state changed. | 207 | :: |
180 | event - the event that happened. This can be one of CIO_GONE, | 208 | |
181 | CIO_NO_PATH or CIO_OPER. | 209 | int (*notify) (struct ccw_device *, int); |
210 | |||
211 | Parameters: | ||
212 | cdev | ||
213 | - the device whose state changed. | ||
214 | |||
215 | event | ||
216 | - the event that happened. This can be one of CIO_GONE, | ||
217 | CIO_NO_PATH or CIO_OPER. | ||
182 | 218 | ||
183 | The handler field of the struct ccw_device is meant to be set to the interrupt | 219 | The handler field of the struct ccw_device is meant to be set to the interrupt |
184 | handler for the device. In order to accommodate drivers which use several | 220 | handler for the device. In order to accommodate drivers which use several |
185 | distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device | 221 | distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device |
186 | instead of ccw_driver. | 222 | instead of ccw_driver. |
187 | The handler is registered with the common layer during set_online() processing | 223 | The handler is registered with the common layer during set_online() processing |
188 | before the driver is called, and is deregistered during set_offline() after the | 224 | before the driver is called, and is deregistered during set_offline() after the |
189 | driver has been called. Also, after registering / before deregistering, path | 225 | driver has been called. Also, after registering / before deregistering, path |
190 | grouping resp. disbanding of the path group (if applicable) are performed. | 226 | grouping resp. disbanding of the path group (if applicable) are performed. |
191 | 227 | ||
192 | void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb); | 228 | :: |
193 | 229 | ||
194 | Parameters: dev - the device the handler is called for | 230 | void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb); |
231 | |||
232 | Parameters: dev - the device the handler is called for | ||
195 | intparm - the intparm which allows the device driver to identify | 233 | intparm - the intparm which allows the device driver to identify |
196 | the i/o the interrupt is associated with, or to recognize | 234 | the i/o the interrupt is associated with, or to recognize |
197 | the interrupt as unsolicited. | 235 | the interrupt as unsolicited. |
198 | irb - interruption response block which contains the accumulated | 236 | irb - interruption response block which contains the accumulated |
199 | status. | 237 | status. |
200 | 238 | ||
201 | The device driver is called from the common ccw_device layer and can retrieve | 239 | The device driver is called from the common ccw_device layer and can retrieve |
202 | information about the interrupt from the irb parameter. | 240 | information about the interrupt from the irb parameter. |
203 | 241 | ||
204 | 242 | ||
@@ -237,23 +275,27 @@ only the logical state and not the physical state, since we cannot track the | |||
237 | latter consistently due to lacking machine support (we don't need to be aware | 275 | latter consistently due to lacking machine support (we don't need to be aware |
238 | of it anyway). | 276 | of it anyway). |
239 | 277 | ||
240 | status - Can be 'online' or 'offline'. | 278 | status |
279 | - Can be 'online' or 'offline'. | ||
241 | Piping 'on' or 'off' sets the chpid logically online/offline. | 280 | Piping 'on' or 'off' sets the chpid logically online/offline. |
242 | Piping 'on' to an online chpid triggers path reprobing for all devices | 281 | Piping 'on' to an online chpid triggers path reprobing for all devices |
243 | the chpid connects to. This can be used to force the kernel to re-use | 282 | the chpid connects to. This can be used to force the kernel to re-use |
244 | a channel path the user knows to be online, but the machine hasn't | 283 | a channel path the user knows to be online, but the machine hasn't |
245 | created a machine check for. | 284 | created a machine check for. |
246 | 285 | ||
247 | type - The physical type of the channel path. | 286 | type |
287 | - The physical type of the channel path. | ||
248 | 288 | ||
249 | shared - Whether the channel path is shared. | 289 | shared |
290 | - Whether the channel path is shared. | ||
250 | 291 | ||
251 | cmg - The channel measurement group. | 292 | cmg |
293 | - The channel measurement group. | ||
252 | 294 | ||
253 | 3. System devices | 295 | 3. System devices |
254 | ----------------- | 296 | ----------------- |
255 | 297 | ||
256 | 3.1 xpram | 298 | 3.1 xpram |
257 | --------- | 299 | --------- |
258 | 300 | ||
259 | xpram shows up under devices/system/ as 'xpram'. | 301 | xpram shows up under devices/system/ as 'xpram'. |
@@ -279,9 +321,8 @@ Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interfa | |||
279 | number is assigned sequentially to the connections defined via the 'connection' | 321 | number is assigned sequentially to the connections defined via the 'connection' |
280 | attribute. | 322 | attribute. |
281 | 323 | ||
282 | user - shows the connection partner. | 324 | user |
283 | 325 | - shows the connection partner. | |
284 | buffer - maximum buffer size. | ||
285 | Pipe to it to change buffer size. | ||
286 | |||
287 | 326 | ||
327 | buffer | ||
328 | - maximum buffer size. Pipe to it to change buffer size. | ||
diff --git a/Documentation/s390/index.rst b/Documentation/s390/index.rst new file mode 100644 index 000000000000..1a914da2a07b --- /dev/null +++ b/Documentation/s390/index.rst | |||
@@ -0,0 +1,30 @@ | |||
1 | :orphan: | ||
2 | |||
3 | ================= | ||
4 | s390 Architecture | ||
5 | ================= | ||
6 | |||
7 | .. toctree:: | ||
8 | :maxdepth: 1 | ||
9 | |||
10 | cds | ||
11 | 3270 | ||
12 | debugging390 | ||
13 | driver-model | ||
14 | monreader | ||
15 | qeth | ||
16 | s390dbf | ||
17 | vfio-ap | ||
18 | vfio-ccw | ||
19 | zfcpdump | ||
20 | dasd | ||
21 | common_io | ||
22 | |||
23 | text_files | ||
24 | |||
25 | .. only:: subproject and html | ||
26 | |||
27 | Indices | ||
28 | ======= | ||
29 | |||
30 | * :ref:`genindex` | ||
diff --git a/Documentation/s390/monreader.txt b/Documentation/s390/monreader.rst index d3729585fdb0..1e857575c113 100644 --- a/Documentation/s390/monreader.txt +++ b/Documentation/s390/monreader.rst | |||
@@ -1,24 +1,26 @@ | |||
1 | ================================================= | ||
2 | Linux API for read access to z/VM Monitor Records | ||
3 | ================================================= | ||
1 | 4 | ||
2 | Date : 2004-Nov-26 | 5 | Date : 2004-Nov-26 |
6 | |||
3 | Author: Gerald Schaefer (geraldsc@de.ibm.com) | 7 | Author: Gerald Schaefer (geraldsc@de.ibm.com) |
4 | 8 | ||
5 | 9 | ||
6 | Linux API for read access to z/VM Monitor Records | ||
7 | ================================================= | ||
8 | 10 | ||
9 | 11 | ||
10 | Description | 12 | Description |
11 | =========== | 13 | =========== |
12 | This item delivers a new Linux API in the form of a misc char device that is | 14 | This item delivers a new Linux API in the form of a misc char device that is |
13 | usable from user space and allows read access to the z/VM Monitor Records | 15 | usable from user space and allows read access to the z/VM Monitor Records |
14 | collected by the *MONITOR System Service of z/VM. | 16 | collected by the `*MONITOR` System Service of z/VM. |
15 | 17 | ||
16 | 18 | ||
17 | User Requirements | 19 | User Requirements |
18 | ================= | 20 | ================= |
19 | The z/VM guest on which you want to access this API needs to be configured in | 21 | The z/VM guest on which you want to access this API needs to be configured in |
20 | order to allow IUCV connections to the *MONITOR service, i.e. it needs the | 22 | order to allow IUCV connections to the `*MONITOR` service, i.e. it needs the |
21 | IUCV *MONITOR statement in its user entry. If the monitor DCSS to be used is | 23 | IUCV `*MONITOR` statement in its user entry. If the monitor DCSS to be used is |
22 | restricted (likely), you also need the NAMESAVE <DCSS NAME> statement. | 24 | restricted (likely), you also need the NAMESAVE <DCSS NAME> statement. |
23 | This item will use the IUCV device driver to access the z/VM services, so you | 25 | This item will use the IUCV device driver to access the z/VM services, so you |
24 | need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1. | 26 | need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1. |
@@ -50,7 +52,9 @@ Your guest virtual storage has to end below the starting address of the DCSS | |||
50 | and you have to specify the "mem=" kernel parameter in your parmfile with a | 52 | and you have to specify the "mem=" kernel parameter in your parmfile with a |
51 | value greater than the ending address of the DCSS. | 53 | value greater than the ending address of the DCSS. |
52 | 54 | ||
53 | Example: DEF STOR 140M | 55 | Example:: |
56 | |||
57 | DEF STOR 140M | ||
54 | 58 | ||
55 | This defines 140MB storage size for your guest, the parameter "mem=160M" is | 59 | This defines 140MB storage size for your guest, the parameter "mem=160M" is |
56 | added to the parmfile. | 60 | added to the parmfile. |
@@ -66,24 +70,27 @@ kernel, the kernel parameter "monreader.mondcss=<DCSS NAME>" can be specified | |||
66 | in the parmfile. | 70 | in the parmfile. |
67 | 71 | ||
68 | The default name for the DCSS is "MONDCSS" if none is specified. In case that | 72 | The default name for the DCSS is "MONDCSS" if none is specified. In case that |
69 | there are other users already connected to the *MONITOR service (e.g. | 73 | there are other users already connected to the `*MONITOR` service (e.g. |
70 | Performance Toolkit), the monitor DCSS is already defined and you have to use | 74 | Performance Toolkit), the monitor DCSS is already defined and you have to use |
71 | the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name | 75 | the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name |
72 | of the monitor DCSS, if already defined, and the users connected to the | 76 | of the monitor DCSS, if already defined, and the users connected to the |
73 | *MONITOR service. | 77 | `*MONITOR` service. |
74 | Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor | 78 | Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor |
75 | DCSS if your z/VM doesn't have one already, you need Class E privileges to | 79 | DCSS if your z/VM doesn't have one already, you need Class E privileges to |
76 | define and save a DCSS. | 80 | define and save a DCSS. |
77 | 81 | ||
78 | Example: | 82 | Example: |
79 | -------- | 83 | -------- |
80 | modprobe monreader mondcss=MYDCSS | 84 | |
85 | :: | ||
86 | |||
87 | modprobe monreader mondcss=MYDCSS | ||
81 | 88 | ||
82 | This loads the module and sets the DCSS name to "MYDCSS". | 89 | This loads the module and sets the DCSS name to "MYDCSS". |
83 | 90 | ||
84 | NOTE: | 91 | NOTE: |
85 | ----- | 92 | ----- |
86 | This API provides no interface to control the *MONITOR service, e.g. specify | 93 | This API provides no interface to control the `*MONITOR` service, e.g. specify |
87 | which data should be collected. This can be done by the CP command MONITOR | 94 | which data should be collected. This can be done by the CP command MONITOR |
88 | (Class E privileged), see "CP Command and Utility Reference". | 95 | (Class E privileged), see "CP Command and Utility Reference". |
89 | 96 | ||
@@ -98,6 +105,7 @@ If your distribution does not support udev, a device node will not be created | |||
98 | automatically and you have to create it manually after loading the module. | 105 | automatically and you have to create it manually after loading the module. |
99 | Therefore you need to know the major and minor numbers of the device. These | 106 | Therefore you need to know the major and minor numbers of the device. These |
100 | numbers can be found in /sys/class/misc/monreader/dev. | 107 | numbers can be found in /sys/class/misc/monreader/dev. |
108 | |||
101 | Typing cat /sys/class/misc/monreader/dev will give an output of the form | 109 | Typing cat /sys/class/misc/monreader/dev will give an output of the form |
102 | <major>:<minor>. The device node can be created via the mknod command, enter | 110 | <major>:<minor>. The device node can be created via the mknod command, enter |
103 | mknod <name> c <major> <minor>, where <name> is the name of the device node | 111 | mknod <name> c <major> <minor>, where <name> is the name of the device node |
@@ -105,10 +113,13 @@ to be created. | |||
105 | 113 | ||
106 | Example: | 114 | Example: |
107 | -------- | 115 | -------- |
108 | # modprobe monreader | 116 | |
109 | # cat /sys/class/misc/monreader/dev | 117 | :: |
110 | 10:63 | 118 | |
111 | # mknod /dev/monreader c 10 63 | 119 | # modprobe monreader |
120 | # cat /sys/class/misc/monreader/dev | ||
121 | 10:63 | ||
122 | # mknod /dev/monreader c 10 63 | ||
112 | 123 | ||
113 | This loads the module with the default monitor DCSS (MONDCSS) and creates a | 124 | This loads the module with the default monitor DCSS (MONDCSS) and creates a |
114 | device node. | 125 | device node. |
@@ -133,20 +144,21 @@ last byte of data. The start address is needed to handle "end-of-frame" records | |||
133 | correctly (domain 1, record 13), i.e. it can be used to determine the record | 144 | correctly (domain 1, record 13), i.e. it can be used to determine the record |
134 | start offset relative to a 4K page (frame) boundary. | 145 | start offset relative to a 4K page (frame) boundary. |
135 | 146 | ||
136 | See "Appendix A: *MONITOR" in the "z/VM Performance" document for a description | 147 | See "Appendix A: `*MONITOR`" in the "z/VM Performance" document for a description |
137 | of the monitor control element layout. The layout of the monitor records can | 148 | of the monitor control element layout. The layout of the monitor records can |
138 | be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html | 149 | be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html |
139 | 150 | ||
140 | The layout of the data stream provided by the monreader device is as follows: | 151 | The layout of the data stream provided by the monreader device is as follows:: |
141 | ... | 152 | |
142 | <0 byte read> | 153 | ... |
143 | <first MCE> \ | 154 | <0 byte read> |
144 | <first set of records> | | 155 | <first MCE> \ |
145 | ... |- data set | 156 | <first set of records> | |
146 | <last MCE> | | 157 | ... |- data set |
147 | <last set of records> / | 158 | <last MCE> | |
148 | <0 byte read> | 159 | <last set of records> / |
149 | ... | 160 | <0 byte read> |
161 | ... | ||
150 | 162 | ||
151 | There may be more than one combination of MCE and corresponding record set | 163 | There may be more than one combination of MCE and corresponding record set |
152 | within one data set and the end of each data set is indicated by a successful | 164 | within one data set and the end of each data set is indicated by a successful |
@@ -165,15 +177,19 @@ As with most char devices, error conditions are indicated by returning a | |||
165 | negative value for the number of bytes read. In this case, the errno variable | 177 | negative value for the number of bytes read. In this case, the errno variable |
166 | indicates the error condition: | 178 | indicates the error condition: |
167 | 179 | ||
168 | EIO: reply failed, read data is invalid and the application | 180 | EIO: |
181 | reply failed, read data is invalid and the application | ||
169 | should discard the data read since the last successful read with 0 size. | 182 | should discard the data read since the last successful read with 0 size. |
170 | EFAULT: copy_to_user failed, read data is invalid and the application should | 183 | EFAULT: |
171 | discard the data read since the last successful read with 0 size. | 184 | copy_to_user failed, read data is invalid and the application should |
172 | EAGAIN: occurs on a non-blocking read if there is no data available at the | 185 | discard the data read since the last successful read with 0 size. |
173 | moment. There is no data missing or corrupted, just try again or rather | 186 | EAGAIN: |
174 | use polling for non-blocking reads. | 187 | occurs on a non-blocking read if there is no data available at the |
175 | EOVERFLOW: message limit reached, the data read since the last successful | 188 | moment. There is no data missing or corrupted, just try again or rather |
176 | read with 0 size is valid but subsequent records may be missing. | 189 | use polling for non-blocking reads. |
190 | EOVERFLOW: | ||
191 | message limit reached, the data read since the last successful | ||
192 | read with 0 size is valid but subsequent records may be missing. | ||
177 | 193 | ||
178 | In the last case (EOVERFLOW) there may be missing data, in the first two cases | 194 | In the last case (EOVERFLOW) there may be missing data, in the first two cases |
179 | (EIO, EFAULT) there will be missing data. It's up to the application if it will | 195 | (EIO, EFAULT) there will be missing data. It's up to the application if it will |
@@ -183,7 +199,7 @@ Open: | |||
183 | ----- | 199 | ----- |
184 | Only one user is allowed to open the char device. If it is already in use, the | 200 | Only one user is allowed to open the char device. If it is already in use, the |
185 | open function will fail (return a negative value) and set errno to EBUSY. | 201 | open function will fail (return a negative value) and set errno to EBUSY. |
186 | The open function may also fail if an IUCV connection to the *MONITOR service | 202 | The open function may also fail if an IUCV connection to the `*MONITOR` service |
187 | cannot be established. In this case errno will be set to EIO and an error | 203 | cannot be established. In this case errno will be set to EIO and an error |
188 | message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER | 204 | message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER |
189 | codes are described in the "z/VM Performance" book, Appendix A. | 205 | codes are described in the "z/VM Performance" book, Appendix A. |
@@ -194,4 +210,3 @@ As soon as the device is opened, incoming messages will be accepted and they | |||
194 | will account for the message limit, i.e. opening the device without reading | 210 | will account for the message limit, i.e. opening the device without reading |
195 | from it will provoke the "message limit reached" error (EOVERFLOW error code) | 211 | from it will provoke the "message limit reached" error (EOVERFLOW error code) |
196 | eventually. | 212 | eventually. |
197 | |||
diff --git a/Documentation/s390/qeth.txt b/Documentation/s390/qeth.rst index aa06fcf5f8c2..f02fdaa68de0 100644 --- a/Documentation/s390/qeth.txt +++ b/Documentation/s390/qeth.rst | |||
@@ -1,8 +1,12 @@ | |||
1 | ============================= | ||
1 | IBM s390 QDIO Ethernet Driver | 2 | IBM s390 QDIO Ethernet Driver |
3 | ============================= | ||
2 | 4 | ||
3 | OSA and HiperSockets Bridge Port Support | 5 | OSA and HiperSockets Bridge Port Support |
6 | ======================================== | ||
4 | 7 | ||
5 | Uevents | 8 | Uevents |
9 | ------- | ||
6 | 10 | ||
7 | To generate the events the device must be assigned a role of either | 11 | To generate the events the device must be assigned a role of either |
8 | a primary or a secondary Bridge Port. For more information, see | 12 | a primary or a secondary Bridge Port. For more information, see |
@@ -13,12 +17,15 @@ of some configured Bridge Port device on the channel changes, a udev | |||
13 | event with ACTION=CHANGE is emitted on behalf of the corresponding | 17 | event with ACTION=CHANGE is emitted on behalf of the corresponding |
14 | ccwgroup device. The event has the following attributes: | 18 | ccwgroup device. The event has the following attributes: |
15 | 19 | ||
16 | BRIDGEPORT=statechange - indicates that the Bridge Port device changed | 20 | BRIDGEPORT=statechange |
21 | indicates that the Bridge Port device changed | ||
17 | its state. | 22 | its state. |
18 | 23 | ||
19 | ROLE={primary|secondary|none} - the role assigned to the port. | 24 | ROLE={primary|secondary|none} |
25 | the role assigned to the port. | ||
20 | 26 | ||
21 | STATE={active|standby|inactive} - the newly assumed state of the port. | 27 | STATE={active|standby|inactive} |
28 | the newly assumed state of the port. | ||
22 | 29 | ||
23 | When run on HiperSockets Bridge Capable Port hardware with host address | 30 | When run on HiperSockets Bridge Capable Port hardware with host address |
24 | notifications enabled, a udev event with ACTION=CHANGE is emitted. | 31 | notifications enabled, a udev event with ACTION=CHANGE is emitted. |
@@ -26,25 +33,32 @@ It is emitted on behalf of the corresponding ccwgroup device when a host | |||
26 | or a VLAN is registered or unregistered on the network served by the device. | 33 | or a VLAN is registered or unregistered on the network served by the device. |
27 | The event has the following attributes: | 34 | The event has the following attributes: |
28 | 35 | ||
29 | BRIDGEDHOST={reset|register|deregister|abort} - host address | 36 | BRIDGEDHOST={reset|register|deregister|abort} |
37 | host address | ||
30 | notifications are started afresh, a new host or VLAN is registered or | 38 | notifications are started afresh, a new host or VLAN is registered or |
31 | deregistered on the Bridge Port HiperSockets channel, or address | 39 | deregistered on the Bridge Port HiperSockets channel, or address |
32 | notifications are aborted. | 40 | notifications are aborted. |
33 | 41 | ||
34 | VLAN=numeric-vlan-id - VLAN ID on which the event occurred. Not included | 42 | VLAN=numeric-vlan-id |
43 | VLAN ID on which the event occurred. Not included | ||
35 | if no VLAN is involved in the event. | 44 | if no VLAN is involved in the event. |
36 | 45 | ||
37 | MAC=xx:xx:xx:xx:xx:xx - MAC address of the host that is being registered | 46 | MAC=xx:xx:xx:xx:xx:xx |
47 | MAC address of the host that is being registered | ||
38 | or deregistered from the HiperSockets channel. Not reported if the | 48 | or deregistered from the HiperSockets channel. Not reported if the |
39 | event reports the creation or destruction of a VLAN. | 49 | event reports the creation or destruction of a VLAN. |
40 | 50 | ||
41 | NTOK_BUSID=x.y.zzzz - device bus ID (CSSID, SSID and device number). | 51 | NTOK_BUSID=x.y.zzzz |
52 | device bus ID (CSSID, SSID and device number). | ||
42 | 53 | ||
43 | NTOK_IID=xx - device IID. | 54 | NTOK_IID=xx |
55 | device IID. | ||
44 | 56 | ||
45 | NTOK_CHPID=xx - device CHPID. | 57 | NTOK_CHPID=xx |
58 | device CHPID. | ||
46 | 59 | ||
47 | NTOK_CHID=xxxx - device channel ID. | 60 | NTOK_CHID=xxxx |
61 | device channel ID. | ||
48 | 62 | ||
49 | Note that the NTOK_* attributes refer to devices other than the one | 63 | Note that the `NTOK_*` attributes refer to devices other than the one |
50 | connected to the system on which the OS is running. | 64 | connected to the system on which the OS is running. |
diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst new file mode 100644 index 000000000000..ec2a1faa414b --- /dev/null +++ b/Documentation/s390/s390dbf.rst | |||
@@ -0,0 +1,803 @@ | |||
1 | ================== | ||
2 | S390 Debug Feature | ||
3 | ================== | ||
4 | |||
5 | files: | ||
6 | - arch/s390/kernel/debug.c | ||
7 | - arch/s390/include/asm/debug.h | ||
8 | |||
9 | Description: | ||
10 | ------------ | ||
11 | The goal of this feature is to provide a kernel debug logging API | ||
12 | where log records can be stored efficiently in memory, where each component | ||
13 | (e.g. device drivers) can have one separate debug log. | ||
14 | One purpose of this is to inspect the debug logs after a production system crash | ||
15 | in order to analyze the reason for the crash. | ||
16 | |||
17 | If the system still runs but only a subcomponent which uses dbf fails, | ||
18 | it is possible to look at the debug logs on a live system via the Linux | ||
19 | debugfs filesystem. | ||
20 | |||
21 | The debug feature may also very useful for kernel and driver development. | ||
22 | |||
23 | Design: | ||
24 | ------- | ||
25 | Kernel components (e.g. device drivers) can register themselves at the debug | ||
26 | feature with the function call debug_register(). This function initializes a | ||
27 | debug log for the caller. For each debug log exists a number of debug areas | ||
28 | where exactly one is active at one time. Each debug area consists of contiguous | ||
29 | pages in memory. In the debug areas there are stored debug entries (log records) | ||
30 | which are written by event- and exception-calls. | ||
31 | |||
32 | An event-call writes the specified debug entry to the active debug | ||
33 | area and updates the log pointer for the active area. If the end | ||
34 | of the active debug area is reached, a wrap around is done (ring buffer) | ||
35 | and the next debug entry will be written at the beginning of the active | ||
36 | debug area. | ||
37 | |||
38 | An exception-call writes the specified debug entry to the log and | ||
39 | switches to the next debug area. This is done in order to be sure | ||
40 | that the records which describe the origin of the exception are not | ||
41 | overwritten when a wrap around for the current area occurs. | ||
42 | |||
43 | The debug areas themselves are also ordered in form of a ring buffer. | ||
44 | When an exception is thrown in the last debug area, the following debug | ||
45 | entries are then written again in the very first area. | ||
46 | |||
47 | There are three versions for the event- and exception-calls: One for | ||
48 | logging raw data, one for text and one for numbers. | ||
49 | |||
50 | Each debug entry contains the following data: | ||
51 | |||
52 | - Timestamp | ||
53 | - Cpu-Number of calling task | ||
54 | - Level of debug entry (0...6) | ||
55 | - Return Address to caller | ||
56 | - Flag, if entry is an exception or not | ||
57 | |||
58 | The debug logs can be inspected in a live system through entries in | ||
59 | the debugfs-filesystem. Under the toplevel directory "s390dbf" there is | ||
60 | a directory for each registered component, which is named like the | ||
61 | corresponding component. The debugfs normally should be mounted to | ||
62 | /sys/kernel/debug therefore the debug feature can be accessed under | ||
63 | /sys/kernel/debug/s390dbf. | ||
64 | |||
65 | The content of the directories are files which represent different views | ||
66 | to the debug log. Each component can decide which views should be | ||
67 | used through registering them with the function debug_register_view(). | ||
68 | Predefined views for hex/ascii, sprintf and raw binary data are provided. | ||
69 | It is also possible to define other views. The content of | ||
70 | a view can be inspected simply by reading the corresponding debugfs file. | ||
71 | |||
72 | All debug logs have an actual debug level (range from 0 to 6). | ||
73 | The default level is 3. Event and Exception functions have a 'level' | ||
74 | parameter. Only debug entries with a level that is lower or equal | ||
75 | than the actual level are written to the log. This means, when | ||
76 | writing events, high priority log entries should have a low level | ||
77 | value whereas low priority entries should have a high one. | ||
78 | The actual debug level can be changed with the help of the debugfs-filesystem | ||
79 | through writing a number string "x" to the 'level' debugfs file which is | ||
80 | provided for every debug log. Debugging can be switched off completely | ||
81 | by using "-" on the 'level' debugfs file. | ||
82 | |||
83 | Example:: | ||
84 | |||
85 | > echo "-" > /sys/kernel/debug/s390dbf/dasd/level | ||
86 | |||
87 | It is also possible to deactivate the debug feature globally for every | ||
88 | debug log. You can change the behavior using 2 sysctl parameters in | ||
89 | /proc/sys/s390dbf: | ||
90 | |||
91 | There are currently 2 possible triggers, which stop the debug feature | ||
92 | globally. The first possibility is to use the "debug_active" sysctl. If | ||
93 | set to 1 the debug feature is running. If "debug_active" is set to 0 the | ||
94 | debug feature is turned off. | ||
95 | |||
96 | The second trigger which stops the debug feature is a kernel oops. | ||
97 | That prevents the debug feature from overwriting debug information that | ||
98 | happened before the oops. After an oops you can reactivate the debug feature | ||
99 | by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not | ||
100 | suggested to use an oopsed kernel in a production environment. | ||
101 | |||
102 | If you want to disallow the deactivation of the debug feature, you can use | ||
103 | the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug | ||
104 | feature cannot be stopped. If the debug feature is already stopped, it | ||
105 | will stay deactivated. | ||
106 | |||
107 | ---------------------------------------------------------------------------- | ||
108 | |||
109 | Kernel Interfaces: | ||
110 | ------------------ | ||
111 | |||
112 | :: | ||
113 | |||
114 | debug_info_t *debug_register(char *name, int pages, int nr_areas, | ||
115 | int buf_size); | ||
116 | |||
117 | Parameter: | ||
118 | name: | ||
119 | Name of debug log (e.g. used for debugfs entry) | ||
120 | pages: | ||
121 | Number of pages, which will be allocated per area | ||
122 | nr_areas: | ||
123 | Number of debug areas | ||
124 | buf_size: | ||
125 | Size of data area in each debug entry | ||
126 | |||
127 | Return Value: | ||
128 | Handle for generated debug area | ||
129 | |||
130 | NULL if register failed | ||
131 | |||
132 | Description: Allocates memory for a debug log | ||
133 | Must not be called within an interrupt handler | ||
134 | |||
135 | ---------------------------------------------------------------------------- | ||
136 | |||
137 | :: | ||
138 | |||
139 | debug_info_t *debug_register_mode(char *name, int pages, int nr_areas, | ||
140 | int buf_size, mode_t mode, uid_t uid, | ||
141 | gid_t gid); | ||
142 | |||
143 | Parameter: | ||
144 | name: | ||
145 | Name of debug log (e.g. used for debugfs entry) | ||
146 | pages: | ||
147 | Number of pages, which will be allocated per area | ||
148 | nr_areas: | ||
149 | Number of debug areas | ||
150 | buf_size: | ||
151 | Size of data area in each debug entry | ||
152 | mode: | ||
153 | File mode for debugfs files. E.g. S_IRWXUGO | ||
154 | uid: | ||
155 | User ID for debugfs files. Currently only 0 is | ||
156 | supported. | ||
157 | gid: | ||
158 | Group ID for debugfs files. Currently only 0 is | ||
159 | supported. | ||
160 | |||
161 | Return Value: | ||
162 | Handle for generated debug area | ||
163 | |||
164 | NULL if register failed | ||
165 | |||
166 | Description: | ||
167 | Allocates memory for a debug log | ||
168 | Must not be called within an interrupt handler | ||
169 | |||
170 | --------------------------------------------------------------------------- | ||
171 | |||
172 | :: | ||
173 | |||
174 | void debug_unregister (debug_info_t * id); | ||
175 | |||
176 | Parameter: | ||
177 | id: | ||
178 | handle for debug log | ||
179 | |||
180 | Return Value: | ||
181 | none | ||
182 | |||
183 | Description: | ||
184 | frees memory for a debug log and removes all registered debug | ||
185 | views. | ||
186 | |||
187 | Must not be called within an interrupt handler | ||
188 | |||
189 | --------------------------------------------------------------------------- | ||
190 | |||
191 | :: | ||
192 | |||
193 | void debug_set_level (debug_info_t * id, int new_level); | ||
194 | |||
195 | Parameter: id: handle for debug log | ||
196 | new_level: new debug level | ||
197 | |||
198 | Return Value: | ||
199 | none | ||
200 | |||
201 | Description: | ||
202 | Sets new actual debug level if new_level is valid. | ||
203 | |||
204 | --------------------------------------------------------------------------- | ||
205 | |||
206 | :: | ||
207 | |||
208 | bool debug_level_enabled (debug_info_t * id, int level); | ||
209 | |||
210 | Parameter: | ||
211 | id: | ||
212 | handle for debug log | ||
213 | level: | ||
214 | debug level | ||
215 | |||
216 | Return Value: | ||
217 | True if level is less or equal to the current debug level. | ||
218 | |||
219 | Description: | ||
220 | Returns true if debug events for the specified level would be | ||
221 | logged. Otherwise returns false. | ||
222 | |||
223 | --------------------------------------------------------------------------- | ||
224 | |||
225 | :: | ||
226 | |||
227 | void debug_stop_all(void); | ||
228 | |||
229 | Parameter: | ||
230 | none | ||
231 | |||
232 | Return Value: | ||
233 | none | ||
234 | |||
235 | Description: | ||
236 | stops the debug feature if stopping is allowed. Currently | ||
237 | used in case of a kernel oops. | ||
238 | |||
239 | --------------------------------------------------------------------------- | ||
240 | |||
241 | :: | ||
242 | |||
243 | debug_entry_t* debug_event (debug_info_t* id, int level, void* data, | ||
244 | int length); | ||
245 | |||
246 | Parameter: | ||
247 | id: | ||
248 | handle for debug log | ||
249 | level: | ||
250 | debug level | ||
251 | data: | ||
252 | pointer to data for debug entry | ||
253 | length: | ||
254 | length of data in bytes | ||
255 | |||
256 | Return Value: | ||
257 | Address of written debug entry | ||
258 | |||
259 | Description: | ||
260 | writes debug entry to active debug area (if level <= actual | ||
261 | debug level) | ||
262 | |||
263 | --------------------------------------------------------------------------- | ||
264 | |||
265 | :: | ||
266 | |||
267 | debug_entry_t* debug_int_event (debug_info_t * id, int level, | ||
268 | unsigned int data); | ||
269 | debug_entry_t* debug_long_event(debug_info_t * id, int level, | ||
270 | unsigned long data); | ||
271 | |||
272 | Parameter: | ||
273 | id: | ||
274 | handle for debug log | ||
275 | level: | ||
276 | debug level | ||
277 | data: | ||
278 | integer value for debug entry | ||
279 | |||
280 | Return Value: | ||
281 | Address of written debug entry | ||
282 | |||
283 | Description: | ||
284 | writes debug entry to active debug area (if level <= actual | ||
285 | debug level) | ||
286 | |||
287 | --------------------------------------------------------------------------- | ||
288 | |||
289 | :: | ||
290 | |||
291 | debug_entry_t* debug_text_event (debug_info_t * id, int level, | ||
292 | const char* data); | ||
293 | |||
294 | Parameter: | ||
295 | id: | ||
296 | handle for debug log | ||
297 | level: | ||
298 | debug level | ||
299 | data: | ||
300 | string for debug entry | ||
301 | |||
302 | Return Value: | ||
303 | Address of written debug entry | ||
304 | |||
305 | Description: | ||
306 | writes debug entry in ascii format to active debug area | ||
307 | (if level <= actual debug level) | ||
308 | |||
309 | --------------------------------------------------------------------------- | ||
310 | |||
311 | :: | ||
312 | |||
313 | debug_entry_t* debug_sprintf_event (debug_info_t * id, int level, | ||
314 | char* string,...); | ||
315 | |||
316 | Parameter: | ||
317 | id: | ||
318 | handle for debug log | ||
319 | level: | ||
320 | debug level | ||
321 | string: | ||
322 | format string for debug entry | ||
323 | ...: | ||
324 | varargs used as in sprintf() | ||
325 | |||
326 | Return Value: Address of written debug entry | ||
327 | |||
328 | Description: | ||
329 | writes debug entry with format string and varargs (longs) to | ||
330 | active debug area (if level $<=$ actual debug level). | ||
331 | floats and long long datatypes cannot be used as varargs. | ||
332 | |||
333 | --------------------------------------------------------------------------- | ||
334 | |||
335 | :: | ||
336 | |||
337 | debug_entry_t* debug_exception (debug_info_t* id, int level, void* data, | ||
338 | int length); | ||
339 | |||
340 | Parameter: | ||
341 | id: | ||
342 | handle for debug log | ||
343 | level: | ||
344 | debug level | ||
345 | data: | ||
346 | pointer to data for debug entry | ||
347 | length: | ||
348 | length of data in bytes | ||
349 | |||
350 | Return Value: | ||
351 | Address of written debug entry | ||
352 | |||
353 | Description: | ||
354 | writes debug entry to active debug area (if level <= actual | ||
355 | debug level) and switches to next debug area | ||
356 | |||
357 | --------------------------------------------------------------------------- | ||
358 | |||
359 | :: | ||
360 | |||
361 | debug_entry_t* debug_int_exception (debug_info_t * id, int level, | ||
362 | unsigned int data); | ||
363 | debug_entry_t* debug_long_exception(debug_info_t * id, int level, | ||
364 | unsigned long data); | ||
365 | |||
366 | Parameter: id: handle for debug log | ||
367 | level: debug level | ||
368 | data: integer value for debug entry | ||
369 | |||
370 | Return Value: Address of written debug entry | ||
371 | |||
372 | Description: writes debug entry to active debug area (if level <= actual | ||
373 | debug level) and switches to next debug area | ||
374 | |||
375 | --------------------------------------------------------------------------- | ||
376 | |||
377 | :: | ||
378 | |||
379 | debug_entry_t* debug_text_exception (debug_info_t * id, int level, | ||
380 | const char* data); | ||
381 | |||
382 | Parameter: id: handle for debug log | ||
383 | level: debug level | ||
384 | data: string for debug entry | ||
385 | |||
386 | Return Value: Address of written debug entry | ||
387 | |||
388 | Description: writes debug entry in ascii format to active debug area | ||
389 | (if level <= actual debug level) and switches to next debug | ||
390 | area | ||
391 | |||
392 | --------------------------------------------------------------------------- | ||
393 | |||
394 | :: | ||
395 | |||
396 | debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level, | ||
397 | char* string,...); | ||
398 | |||
399 | Parameter: id: handle for debug log | ||
400 | level: debug level | ||
401 | string: format string for debug entry | ||
402 | ...: varargs used as in sprintf() | ||
403 | |||
404 | Return Value: Address of written debug entry | ||
405 | |||
406 | Description: writes debug entry with format string and varargs (longs) to | ||
407 | active debug area (if level $<=$ actual debug level) and | ||
408 | switches to next debug area. | ||
409 | floats and long long datatypes cannot be used as varargs. | ||
410 | |||
411 | --------------------------------------------------------------------------- | ||
412 | |||
413 | :: | ||
414 | |||
415 | int debug_register_view (debug_info_t * id, struct debug_view *view); | ||
416 | |||
417 | Parameter: id: handle for debug log | ||
418 | view: pointer to debug view struct | ||
419 | |||
420 | Return Value: 0 : ok | ||
421 | < 0: Error | ||
422 | |||
423 | Description: registers new debug view and creates debugfs dir entry | ||
424 | |||
425 | --------------------------------------------------------------------------- | ||
426 | |||
427 | :: | ||
428 | |||
429 | int debug_unregister_view (debug_info_t * id, struct debug_view *view); | ||
430 | |||
431 | Parameter: id: handle for debug log | ||
432 | view: pointer to debug view struct | ||
433 | |||
434 | Return Value: 0 : ok | ||
435 | < 0: Error | ||
436 | |||
437 | Description: unregisters debug view and removes debugfs dir entry | ||
438 | |||
439 | |||
440 | |||
441 | Predefined views: | ||
442 | ----------------- | ||
443 | |||
444 | extern struct debug_view debug_hex_ascii_view; | ||
445 | |||
446 | extern struct debug_view debug_raw_view; | ||
447 | |||
448 | extern struct debug_view debug_sprintf_view; | ||
449 | |||
450 | Examples | ||
451 | -------- | ||
452 | |||
453 | :: | ||
454 | |||
455 | /* | ||
456 | * hex_ascii- + raw-view Example | ||
457 | */ | ||
458 | |||
459 | #include <linux/init.h> | ||
460 | #include <asm/debug.h> | ||
461 | |||
462 | static debug_info_t* debug_info; | ||
463 | |||
464 | static int init(void) | ||
465 | { | ||
466 | /* register 4 debug areas with one page each and 4 byte data field */ | ||
467 | |||
468 | debug_info = debug_register ("test", 1, 4, 4 ); | ||
469 | debug_register_view(debug_info,&debug_hex_ascii_view); | ||
470 | debug_register_view(debug_info,&debug_raw_view); | ||
471 | |||
472 | debug_text_event(debug_info, 4 , "one "); | ||
473 | debug_int_exception(debug_info, 4, 4711); | ||
474 | debug_event(debug_info, 3, &debug_info, 4); | ||
475 | |||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | static void cleanup(void) | ||
480 | { | ||
481 | debug_unregister (debug_info); | ||
482 | } | ||
483 | |||
484 | module_init(init); | ||
485 | module_exit(cleanup); | ||
486 | |||
487 | --------------------------------------------------------------------------- | ||
488 | |||
489 | :: | ||
490 | |||
491 | /* | ||
492 | * sprintf-view Example | ||
493 | */ | ||
494 | |||
495 | #include <linux/init.h> | ||
496 | #include <asm/debug.h> | ||
497 | |||
498 | static debug_info_t* debug_info; | ||
499 | |||
500 | static int init(void) | ||
501 | { | ||
502 | /* register 4 debug areas with one page each and data field for */ | ||
503 | /* format string pointer + 2 varargs (= 3 * sizeof(long)) */ | ||
504 | |||
505 | debug_info = debug_register ("test", 1, 4, sizeof(long) * 3); | ||
506 | debug_register_view(debug_info,&debug_sprintf_view); | ||
507 | |||
508 | debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__); | ||
509 | debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info); | ||
510 | |||
511 | return 0; | ||
512 | } | ||
513 | |||
514 | static void cleanup(void) | ||
515 | { | ||
516 | debug_unregister (debug_info); | ||
517 | } | ||
518 | |||
519 | module_init(init); | ||
520 | module_exit(cleanup); | ||
521 | |||
522 | Debugfs Interface | ||
523 | ----------------- | ||
524 | Views to the debug logs can be investigated through reading the corresponding | ||
525 | debugfs-files: | ||
526 | |||
527 | Example:: | ||
528 | |||
529 | > ls /sys/kernel/debug/s390dbf/dasd | ||
530 | flush hex_ascii level pages raw | ||
531 | > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s | ||
532 | 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | .... | ||
533 | 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE | ||
534 | 00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | .... | ||
535 | 00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP | ||
536 | 01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD | ||
537 | 01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | .... | ||
538 | 01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ... | ||
539 | 01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | .... | ||
540 | 01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE | ||
541 | 01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | .... | ||
542 | |||
543 | See section about predefined views for explanation of the above output! | ||
544 | |||
545 | Changing the debug level | ||
546 | ------------------------ | ||
547 | |||
548 | Example:: | ||
549 | |||
550 | |||
551 | > cat /sys/kernel/debug/s390dbf/dasd/level | ||
552 | 3 | ||
553 | > echo "5" > /sys/kernel/debug/s390dbf/dasd/level | ||
554 | > cat /sys/kernel/debug/s390dbf/dasd/level | ||
555 | 5 | ||
556 | |||
557 | Flushing debug areas | ||
558 | -------------------- | ||
559 | Debug areas can be flushed with piping the number of the desired | ||
560 | area (0...n) to the debugfs file "flush". When using "-" all debug areas | ||
561 | are flushed. | ||
562 | |||
563 | Examples: | ||
564 | |||
565 | 1. Flush debug area 0:: | ||
566 | |||
567 | > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush | ||
568 | |||
569 | 2. Flush all debug areas:: | ||
570 | |||
571 | > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush | ||
572 | |||
573 | Changing the size of debug areas | ||
574 | ------------------------------------ | ||
575 | It is possible the change the size of debug areas through piping | ||
576 | the number of pages to the debugfs file "pages". The resize request will | ||
577 | also flush the debug areas. | ||
578 | |||
579 | Example: | ||
580 | |||
581 | Define 4 pages for the debug areas of debug feature "dasd":: | ||
582 | |||
583 | > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages | ||
584 | |||
585 | Stooping the debug feature | ||
586 | -------------------------- | ||
587 | Example: | ||
588 | |||
589 | 1. Check if stopping is allowed:: | ||
590 | |||
591 | > cat /proc/sys/s390dbf/debug_stoppable | ||
592 | |||
593 | 2. Stop debug feature:: | ||
594 | |||
595 | > echo 0 > /proc/sys/s390dbf/debug_active | ||
596 | |||
597 | lcrash Interface | ||
598 | ---------------- | ||
599 | It is planned that the dump analysis tool lcrash gets an additional command | ||
600 | 's390dbf' to display all the debug logs. With this tool it will be possible | ||
601 | to investigate the debug logs on a live system and with a memory dump after | ||
602 | a system crash. | ||
603 | |||
604 | Investigating raw memory | ||
605 | ------------------------ | ||
606 | One last possibility to investigate the debug logs at a live | ||
607 | system and after a system crash is to look at the raw memory | ||
608 | under VM or at the Service Element. | ||
609 | It is possible to find the anker of the debug-logs through | ||
610 | the 'debug_area_first' symbol in the System map. Then one has | ||
611 | to follow the correct pointers of the data-structures defined | ||
612 | in debug.h and find the debug-areas in memory. | ||
613 | Normally modules which use the debug feature will also have | ||
614 | a global variable with the pointer to the debug-logs. Following | ||
615 | this pointer it will also be possible to find the debug logs in | ||
616 | memory. | ||
617 | |||
618 | For this method it is recommended to use '16 * x + 4' byte (x = 0..n) | ||
619 | for the length of the data field in debug_register() in | ||
620 | order to see the debug entries well formatted. | ||
621 | |||
622 | |||
623 | Predefined Views | ||
624 | ---------------- | ||
625 | |||
626 | There are three predefined views: hex_ascii, raw and sprintf. | ||
627 | The hex_ascii view shows the data field in hex and ascii representation | ||
628 | (e.g. '45 43 4b 44 | ECKD'). | ||
629 | The raw view returns a bytestream as the debug areas are stored in memory. | ||
630 | |||
631 | The sprintf view formats the debug entries in the same way as the sprintf | ||
632 | function would do. The sprintf event/exception functions write to the | ||
633 | debug entry a pointer to the format string (size = sizeof(long)) | ||
634 | and for each vararg a long value. So e.g. for a debug entry with a format | ||
635 | string plus two varargs one would need to allocate a (3 * sizeof(long)) | ||
636 | byte data area in the debug_register() function. | ||
637 | |||
638 | IMPORTANT: | ||
639 | Using "%s" in sprintf event functions is dangerous. You can only | ||
640 | use "%s" in the sprintf event functions, if the memory for the passed string | ||
641 | is available as long as the debug feature exists. The reason behind this is | ||
642 | that due to performance considerations only a pointer to the string is stored | ||
643 | in the debug feature. If you log a string that is freed afterwards, you will | ||
644 | get an OOPS when inspecting the debug feature, because then the debug feature | ||
645 | will access the already freed memory. | ||
646 | |||
647 | NOTE: | ||
648 | If using the sprintf view do NOT use other event/exception functions | ||
649 | than the sprintf-event and -exception functions. | ||
650 | |||
651 | The format of the hex_ascii and sprintf view is as follows: | ||
652 | |||
653 | - Number of area | ||
654 | - Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated | ||
655 | Universal Time (UTC), January 1, 1970) | ||
656 | - level of debug entry | ||
657 | - Exception flag (* = Exception) | ||
658 | - Cpu-Number of calling task | ||
659 | - Return Address to caller | ||
660 | - data field | ||
661 | |||
662 | The format of the raw view is: | ||
663 | |||
664 | - Header as described in debug.h | ||
665 | - datafield | ||
666 | |||
667 | A typical line of the hex_ascii view will look like the following (first line | ||
668 | is only for explanation and will not be displayed when 'cating' the view): | ||
669 | |||
670 | area time level exception cpu caller data (hex + ascii) | ||
671 | -------------------------------------------------------------------------- | ||
672 | 00 00964419409:440690 1 - 00 88023fe | ||
673 | |||
674 | |||
675 | Defining views | ||
676 | -------------- | ||
677 | |||
678 | Views are specified with the 'debug_view' structure. There are defined | ||
679 | callback functions which are used for reading and writing the debugfs files:: | ||
680 | |||
681 | struct debug_view { | ||
682 | char name[DEBUG_MAX_PROCF_LEN]; | ||
683 | debug_prolog_proc_t* prolog_proc; | ||
684 | debug_header_proc_t* header_proc; | ||
685 | debug_format_proc_t* format_proc; | ||
686 | debug_input_proc_t* input_proc; | ||
687 | void* private_data; | ||
688 | }; | ||
689 | |||
690 | where:: | ||
691 | |||
692 | typedef int (debug_header_proc_t) (debug_info_t* id, | ||
693 | struct debug_view* view, | ||
694 | int area, | ||
695 | debug_entry_t* entry, | ||
696 | char* out_buf); | ||
697 | |||
698 | typedef int (debug_format_proc_t) (debug_info_t* id, | ||
699 | struct debug_view* view, char* out_buf, | ||
700 | const char* in_buf); | ||
701 | typedef int (debug_prolog_proc_t) (debug_info_t* id, | ||
702 | struct debug_view* view, | ||
703 | char* out_buf); | ||
704 | typedef int (debug_input_proc_t) (debug_info_t* id, | ||
705 | struct debug_view* view, | ||
706 | struct file* file, const char* user_buf, | ||
707 | size_t in_buf_size, loff_t* offset); | ||
708 | |||
709 | |||
710 | The "private_data" member can be used as pointer to view specific data. | ||
711 | It is not used by the debug feature itself. | ||
712 | |||
713 | The output when reading a debugfs file is structured like this:: | ||
714 | |||
715 | "prolog_proc output" | ||
716 | |||
717 | "header_proc output 1" "format_proc output 1" | ||
718 | "header_proc output 2" "format_proc output 2" | ||
719 | "header_proc output 3" "format_proc output 3" | ||
720 | ... | ||
721 | |||
722 | When a view is read from the debugfs, the Debug Feature calls the | ||
723 | 'prolog_proc' once for writing the prolog. | ||
724 | Then 'header_proc' and 'format_proc' are called for each | ||
725 | existing debug entry. | ||
726 | |||
727 | The input_proc can be used to implement functionality when it is written to | ||
728 | the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level). | ||
729 | |||
730 | For header_proc there can be used the default function | ||
731 | debug_dflt_header_fn() which is defined in debug.h. | ||
732 | and which produces the same header output as the predefined views. | ||
733 | E.g:: | ||
734 | |||
735 | 00 00964419409:440761 2 - 00 88023ec | ||
736 | |||
737 | In order to see how to use the callback functions check the implementation | ||
738 | of the default views! | ||
739 | |||
740 | Example:: | ||
741 | |||
742 | #include <asm/debug.h> | ||
743 | |||
744 | #define UNKNOWNSTR "data: %08x" | ||
745 | |||
746 | const char* messages[] = | ||
747 | {"This error...........\n", | ||
748 | "That error...........\n", | ||
749 | "Problem..............\n", | ||
750 | "Something went wrong.\n", | ||
751 | "Everything ok........\n", | ||
752 | NULL | ||
753 | }; | ||
754 | |||
755 | static int debug_test_format_fn( | ||
756 | debug_info_t * id, struct debug_view *view, | ||
757 | char *out_buf, const char *in_buf | ||
758 | ) | ||
759 | { | ||
760 | int i, rc = 0; | ||
761 | |||
762 | if(id->buf_size >= 4) { | ||
763 | int msg_nr = *((int*)in_buf); | ||
764 | if(msg_nr < sizeof(messages)/sizeof(char*) - 1) | ||
765 | rc += sprintf(out_buf, "%s", messages[msg_nr]); | ||
766 | else | ||
767 | rc += sprintf(out_buf, UNKNOWNSTR, msg_nr); | ||
768 | } | ||
769 | out: | ||
770 | return rc; | ||
771 | } | ||
772 | |||
773 | struct debug_view debug_test_view = { | ||
774 | "myview", /* name of view */ | ||
775 | NULL, /* no prolog */ | ||
776 | &debug_dflt_header_fn, /* default header for each entry */ | ||
777 | &debug_test_format_fn, /* our own format function */ | ||
778 | NULL, /* no input function */ | ||
779 | NULL /* no private data */ | ||
780 | }; | ||
781 | |||
782 | test: | ||
783 | ===== | ||
784 | |||
785 | :: | ||
786 | |||
787 | debug_info_t *debug_info; | ||
788 | ... | ||
789 | debug_info = debug_register ("test", 0, 4, 4 )); | ||
790 | debug_register_view(debug_info, &debug_test_view); | ||
791 | for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i); | ||
792 | |||
793 | > cat /sys/kernel/debug/s390dbf/test/myview | ||
794 | 00 00964419734:611402 1 - 00 88042ca This error........... | ||
795 | 00 00964419734:611405 1 - 00 88042ca That error........... | ||
796 | 00 00964419734:611408 1 - 00 88042ca Problem.............. | ||
797 | 00 00964419734:611411 1 - 00 88042ca Something went wrong. | ||
798 | 00 00964419734:611414 1 - 00 88042ca Everything ok........ | ||
799 | 00 00964419734:611417 1 - 00 88042ca data: 00000005 | ||
800 | 00 00964419734:611419 1 - 00 88042ca data: 00000006 | ||
801 | 00 00964419734:611422 1 - 00 88042ca data: 00000007 | ||
802 | 00 00964419734:611425 1 - 00 88042ca data: 00000008 | ||
803 | 00 00964419734:611428 1 - 00 88042ca data: 00000009 | ||
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt deleted file mode 100644 index 61329fd62e89..000000000000 --- a/Documentation/s390/s390dbf.txt +++ /dev/null | |||
@@ -1,667 +0,0 @@ | |||
1 | S390 Debug Feature | ||
2 | ================== | ||
3 | |||
4 | files: arch/s390/kernel/debug.c | ||
5 | arch/s390/include/asm/debug.h | ||
6 | |||
7 | Description: | ||
8 | ------------ | ||
9 | The goal of this feature is to provide a kernel debug logging API | ||
10 | where log records can be stored efficiently in memory, where each component | ||
11 | (e.g. device drivers) can have one separate debug log. | ||
12 | One purpose of this is to inspect the debug logs after a production system crash | ||
13 | in order to analyze the reason for the crash. | ||
14 | If the system still runs but only a subcomponent which uses dbf fails, | ||
15 | it is possible to look at the debug logs on a live system via the Linux | ||
16 | debugfs filesystem. | ||
17 | The debug feature may also very useful for kernel and driver development. | ||
18 | |||
19 | Design: | ||
20 | ------- | ||
21 | Kernel components (e.g. device drivers) can register themselves at the debug | ||
22 | feature with the function call debug_register(). This function initializes a | ||
23 | debug log for the caller. For each debug log exists a number of debug areas | ||
24 | where exactly one is active at one time. Each debug area consists of contiguous | ||
25 | pages in memory. In the debug areas there are stored debug entries (log records) | ||
26 | which are written by event- and exception-calls. | ||
27 | |||
28 | An event-call writes the specified debug entry to the active debug | ||
29 | area and updates the log pointer for the active area. If the end | ||
30 | of the active debug area is reached, a wrap around is done (ring buffer) | ||
31 | and the next debug entry will be written at the beginning of the active | ||
32 | debug area. | ||
33 | |||
34 | An exception-call writes the specified debug entry to the log and | ||
35 | switches to the next debug area. This is done in order to be sure | ||
36 | that the records which describe the origin of the exception are not | ||
37 | overwritten when a wrap around for the current area occurs. | ||
38 | |||
39 | The debug areas themselves are also ordered in form of a ring buffer. | ||
40 | When an exception is thrown in the last debug area, the following debug | ||
41 | entries are then written again in the very first area. | ||
42 | |||
43 | There are three versions for the event- and exception-calls: One for | ||
44 | logging raw data, one for text and one for numbers. | ||
45 | |||
46 | Each debug entry contains the following data: | ||
47 | |||
48 | - Timestamp | ||
49 | - Cpu-Number of calling task | ||
50 | - Level of debug entry (0...6) | ||
51 | - Return Address to caller | ||
52 | - Flag, if entry is an exception or not | ||
53 | |||
54 | The debug logs can be inspected in a live system through entries in | ||
55 | the debugfs-filesystem. Under the toplevel directory "s390dbf" there is | ||
56 | a directory for each registered component, which is named like the | ||
57 | corresponding component. The debugfs normally should be mounted to | ||
58 | /sys/kernel/debug therefore the debug feature can be accessed under | ||
59 | /sys/kernel/debug/s390dbf. | ||
60 | |||
61 | The content of the directories are files which represent different views | ||
62 | to the debug log. Each component can decide which views should be | ||
63 | used through registering them with the function debug_register_view(). | ||
64 | Predefined views for hex/ascii, sprintf and raw binary data are provided. | ||
65 | It is also possible to define other views. The content of | ||
66 | a view can be inspected simply by reading the corresponding debugfs file. | ||
67 | |||
68 | All debug logs have an actual debug level (range from 0 to 6). | ||
69 | The default level is 3. Event and Exception functions have a 'level' | ||
70 | parameter. Only debug entries with a level that is lower or equal | ||
71 | than the actual level are written to the log. This means, when | ||
72 | writing events, high priority log entries should have a low level | ||
73 | value whereas low priority entries should have a high one. | ||
74 | The actual debug level can be changed with the help of the debugfs-filesystem | ||
75 | through writing a number string "x" to the 'level' debugfs file which is | ||
76 | provided for every debug log. Debugging can be switched off completely | ||
77 | by using "-" on the 'level' debugfs file. | ||
78 | |||
79 | Example: | ||
80 | |||
81 | > echo "-" > /sys/kernel/debug/s390dbf/dasd/level | ||
82 | |||
83 | It is also possible to deactivate the debug feature globally for every | ||
84 | debug log. You can change the behavior using 2 sysctl parameters in | ||
85 | /proc/sys/s390dbf: | ||
86 | There are currently 2 possible triggers, which stop the debug feature | ||
87 | globally. The first possibility is to use the "debug_active" sysctl. If | ||
88 | set to 1 the debug feature is running. If "debug_active" is set to 0 the | ||
89 | debug feature is turned off. | ||
90 | The second trigger which stops the debug feature is a kernel oops. | ||
91 | That prevents the debug feature from overwriting debug information that | ||
92 | happened before the oops. After an oops you can reactivate the debug feature | ||
93 | by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not | ||
94 | suggested to use an oopsed kernel in a production environment. | ||
95 | If you want to disallow the deactivation of the debug feature, you can use | ||
96 | the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug | ||
97 | feature cannot be stopped. If the debug feature is already stopped, it | ||
98 | will stay deactivated. | ||
99 | |||
100 | Kernel Interfaces: | ||
101 | ------------------ | ||
102 | |||
103 | ---------------------------------------------------------------------------- | ||
104 | debug_info_t *debug_register(char *name, int pages, int nr_areas, | ||
105 | int buf_size); | ||
106 | |||
107 | Parameter: name: Name of debug log (e.g. used for debugfs entry) | ||
108 | pages: number of pages, which will be allocated per area | ||
109 | nr_areas: number of debug areas | ||
110 | buf_size: size of data area in each debug entry | ||
111 | |||
112 | Return Value: Handle for generated debug area | ||
113 | NULL if register failed | ||
114 | |||
115 | Description: Allocates memory for a debug log | ||
116 | Must not be called within an interrupt handler | ||
117 | |||
118 | ---------------------------------------------------------------------------- | ||
119 | debug_info_t *debug_register_mode(char *name, int pages, int nr_areas, | ||
120 | int buf_size, mode_t mode, uid_t uid, | ||
121 | gid_t gid); | ||
122 | |||
123 | Parameter: name: Name of debug log (e.g. used for debugfs entry) | ||
124 | pages: Number of pages, which will be allocated per area | ||
125 | nr_areas: Number of debug areas | ||
126 | buf_size: Size of data area in each debug entry | ||
127 | mode: File mode for debugfs files. E.g. S_IRWXUGO | ||
128 | uid: User ID for debugfs files. Currently only 0 is | ||
129 | supported. | ||
130 | gid: Group ID for debugfs files. Currently only 0 is | ||
131 | supported. | ||
132 | |||
133 | Return Value: Handle for generated debug area | ||
134 | NULL if register failed | ||
135 | |||
136 | Description: Allocates memory for a debug log | ||
137 | Must not be called within an interrupt handler | ||
138 | |||
139 | --------------------------------------------------------------------------- | ||
140 | void debug_unregister (debug_info_t * id); | ||
141 | |||
142 | Parameter: id: handle for debug log | ||
143 | |||
144 | Return Value: none | ||
145 | |||
146 | Description: frees memory for a debug log and removes all registered debug | ||
147 | views. | ||
148 | Must not be called within an interrupt handler | ||
149 | |||
150 | --------------------------------------------------------------------------- | ||
151 | void debug_set_level (debug_info_t * id, int new_level); | ||
152 | |||
153 | Parameter: id: handle for debug log | ||
154 | new_level: new debug level | ||
155 | |||
156 | Return Value: none | ||
157 | |||
158 | Description: Sets new actual debug level if new_level is valid. | ||
159 | |||
160 | --------------------------------------------------------------------------- | ||
161 | bool debug_level_enabled (debug_info_t * id, int level); | ||
162 | |||
163 | Parameter: id: handle for debug log | ||
164 | level: debug level | ||
165 | |||
166 | Return Value: True if level is less or equal to the current debug level. | ||
167 | |||
168 | Description: Returns true if debug events for the specified level would be | ||
169 | logged. Otherwise returns false. | ||
170 | --------------------------------------------------------------------------- | ||
171 | void debug_stop_all(void); | ||
172 | |||
173 | Parameter: none | ||
174 | |||
175 | Return Value: none | ||
176 | |||
177 | Description: stops the debug feature if stopping is allowed. Currently | ||
178 | used in case of a kernel oops. | ||
179 | |||
180 | --------------------------------------------------------------------------- | ||
181 | debug_entry_t* debug_event (debug_info_t* id, int level, void* data, | ||
182 | int length); | ||
183 | |||
184 | Parameter: id: handle for debug log | ||
185 | level: debug level | ||
186 | data: pointer to data for debug entry | ||
187 | length: length of data in bytes | ||
188 | |||
189 | Return Value: Address of written debug entry | ||
190 | |||
191 | Description: writes debug entry to active debug area (if level <= actual | ||
192 | debug level) | ||
193 | |||
194 | --------------------------------------------------------------------------- | ||
195 | debug_entry_t* debug_int_event (debug_info_t * id, int level, | ||
196 | unsigned int data); | ||
197 | debug_entry_t* debug_long_event(debug_info_t * id, int level, | ||
198 | unsigned long data); | ||
199 | |||
200 | Parameter: id: handle for debug log | ||
201 | level: debug level | ||
202 | data: integer value for debug entry | ||
203 | |||
204 | Return Value: Address of written debug entry | ||
205 | |||
206 | Description: writes debug entry to active debug area (if level <= actual | ||
207 | debug level) | ||
208 | |||
209 | --------------------------------------------------------------------------- | ||
210 | debug_entry_t* debug_text_event (debug_info_t * id, int level, | ||
211 | const char* data); | ||
212 | |||
213 | Parameter: id: handle for debug log | ||
214 | level: debug level | ||
215 | data: string for debug entry | ||
216 | |||
217 | Return Value: Address of written debug entry | ||
218 | |||
219 | Description: writes debug entry in ascii format to active debug area | ||
220 | (if level <= actual debug level) | ||
221 | |||
222 | --------------------------------------------------------------------------- | ||
223 | debug_entry_t* debug_sprintf_event (debug_info_t * id, int level, | ||
224 | char* string,...); | ||
225 | |||
226 | Parameter: id: handle for debug log | ||
227 | level: debug level | ||
228 | string: format string for debug entry | ||
229 | ...: varargs used as in sprintf() | ||
230 | |||
231 | Return Value: Address of written debug entry | ||
232 | |||
233 | Description: writes debug entry with format string and varargs (longs) to | ||
234 | active debug area (if level $<=$ actual debug level). | ||
235 | floats and long long datatypes cannot be used as varargs. | ||
236 | |||
237 | --------------------------------------------------------------------------- | ||
238 | |||
239 | debug_entry_t* debug_exception (debug_info_t* id, int level, void* data, | ||
240 | int length); | ||
241 | |||
242 | Parameter: id: handle for debug log | ||
243 | level: debug level | ||
244 | data: pointer to data for debug entry | ||
245 | length: length of data in bytes | ||
246 | |||
247 | Return Value: Address of written debug entry | ||
248 | |||
249 | Description: writes debug entry to active debug area (if level <= actual | ||
250 | debug level) and switches to next debug area | ||
251 | |||
252 | --------------------------------------------------------------------------- | ||
253 | debug_entry_t* debug_int_exception (debug_info_t * id, int level, | ||
254 | unsigned int data); | ||
255 | debug_entry_t* debug_long_exception(debug_info_t * id, int level, | ||
256 | unsigned long data); | ||
257 | |||
258 | Parameter: id: handle for debug log | ||
259 | level: debug level | ||
260 | data: integer value for debug entry | ||
261 | |||
262 | Return Value: Address of written debug entry | ||
263 | |||
264 | Description: writes debug entry to active debug area (if level <= actual | ||
265 | debug level) and switches to next debug area | ||
266 | |||
267 | --------------------------------------------------------------------------- | ||
268 | debug_entry_t* debug_text_exception (debug_info_t * id, int level, | ||
269 | const char* data); | ||
270 | |||
271 | Parameter: id: handle for debug log | ||
272 | level: debug level | ||
273 | data: string for debug entry | ||
274 | |||
275 | Return Value: Address of written debug entry | ||
276 | |||
277 | Description: writes debug entry in ascii format to active debug area | ||
278 | (if level <= actual debug level) and switches to next debug | ||
279 | area | ||
280 | |||
281 | --------------------------------------------------------------------------- | ||
282 | debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level, | ||
283 | char* string,...); | ||
284 | |||
285 | Parameter: id: handle for debug log | ||
286 | level: debug level | ||
287 | string: format string for debug entry | ||
288 | ...: varargs used as in sprintf() | ||
289 | |||
290 | Return Value: Address of written debug entry | ||
291 | |||
292 | Description: writes debug entry with format string and varargs (longs) to | ||
293 | active debug area (if level $<=$ actual debug level) and | ||
294 | switches to next debug area. | ||
295 | floats and long long datatypes cannot be used as varargs. | ||
296 | |||
297 | --------------------------------------------------------------------------- | ||
298 | |||
299 | int debug_register_view (debug_info_t * id, struct debug_view *view); | ||
300 | |||
301 | Parameter: id: handle for debug log | ||
302 | view: pointer to debug view struct | ||
303 | |||
304 | Return Value: 0 : ok | ||
305 | < 0: Error | ||
306 | |||
307 | Description: registers new debug view and creates debugfs dir entry | ||
308 | |||
309 | --------------------------------------------------------------------------- | ||
310 | int debug_unregister_view (debug_info_t * id, struct debug_view *view); | ||
311 | |||
312 | Parameter: id: handle for debug log | ||
313 | view: pointer to debug view struct | ||
314 | |||
315 | Return Value: 0 : ok | ||
316 | < 0: Error | ||
317 | |||
318 | Description: unregisters debug view and removes debugfs dir entry | ||
319 | |||
320 | |||
321 | |||
322 | Predefined views: | ||
323 | ----------------- | ||
324 | |||
325 | extern struct debug_view debug_hex_ascii_view; | ||
326 | extern struct debug_view debug_raw_view; | ||
327 | extern struct debug_view debug_sprintf_view; | ||
328 | |||
329 | Examples | ||
330 | -------- | ||
331 | |||
332 | /* | ||
333 | * hex_ascii- + raw-view Example | ||
334 | */ | ||
335 | |||
336 | #include <linux/init.h> | ||
337 | #include <asm/debug.h> | ||
338 | |||
339 | static debug_info_t* debug_info; | ||
340 | |||
341 | static int init(void) | ||
342 | { | ||
343 | /* register 4 debug areas with one page each and 4 byte data field */ | ||
344 | |||
345 | debug_info = debug_register ("test", 1, 4, 4 ); | ||
346 | debug_register_view(debug_info,&debug_hex_ascii_view); | ||
347 | debug_register_view(debug_info,&debug_raw_view); | ||
348 | |||
349 | debug_text_event(debug_info, 4 , "one "); | ||
350 | debug_int_exception(debug_info, 4, 4711); | ||
351 | debug_event(debug_info, 3, &debug_info, 4); | ||
352 | |||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | static void cleanup(void) | ||
357 | { | ||
358 | debug_unregister (debug_info); | ||
359 | } | ||
360 | |||
361 | module_init(init); | ||
362 | module_exit(cleanup); | ||
363 | |||
364 | --------------------------------------------------------------------------- | ||
365 | |||
366 | /* | ||
367 | * sprintf-view Example | ||
368 | */ | ||
369 | |||
370 | #include <linux/init.h> | ||
371 | #include <asm/debug.h> | ||
372 | |||
373 | static debug_info_t* debug_info; | ||
374 | |||
375 | static int init(void) | ||
376 | { | ||
377 | /* register 4 debug areas with one page each and data field for */ | ||
378 | /* format string pointer + 2 varargs (= 3 * sizeof(long)) */ | ||
379 | |||
380 | debug_info = debug_register ("test", 1, 4, sizeof(long) * 3); | ||
381 | debug_register_view(debug_info,&debug_sprintf_view); | ||
382 | |||
383 | debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__); | ||
384 | debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info); | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | static void cleanup(void) | ||
390 | { | ||
391 | debug_unregister (debug_info); | ||
392 | } | ||
393 | |||
394 | module_init(init); | ||
395 | module_exit(cleanup); | ||
396 | |||
397 | |||
398 | |||
399 | Debugfs Interface | ||
400 | ---------------- | ||
401 | Views to the debug logs can be investigated through reading the corresponding | ||
402 | debugfs-files: | ||
403 | |||
404 | Example: | ||
405 | |||
406 | > ls /sys/kernel/debug/s390dbf/dasd | ||
407 | flush hex_ascii level pages raw | ||
408 | > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s | ||
409 | 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | .... | ||
410 | 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE | ||
411 | 00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | .... | ||
412 | 00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP | ||
413 | 01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD | ||
414 | 01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | .... | ||
415 | 01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ... | ||
416 | 01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | .... | ||
417 | 01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE | ||
418 | 01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | .... | ||
419 | |||
420 | See section about predefined views for explanation of the above output! | ||
421 | |||
422 | Changing the debug level | ||
423 | ------------------------ | ||
424 | |||
425 | Example: | ||
426 | |||
427 | |||
428 | > cat /sys/kernel/debug/s390dbf/dasd/level | ||
429 | 3 | ||
430 | > echo "5" > /sys/kernel/debug/s390dbf/dasd/level | ||
431 | > cat /sys/kernel/debug/s390dbf/dasd/level | ||
432 | 5 | ||
433 | |||
434 | Flushing debug areas | ||
435 | -------------------- | ||
436 | Debug areas can be flushed with piping the number of the desired | ||
437 | area (0...n) to the debugfs file "flush". When using "-" all debug areas | ||
438 | are flushed. | ||
439 | |||
440 | Examples: | ||
441 | |||
442 | 1. Flush debug area 0: | ||
443 | > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush | ||
444 | |||
445 | 2. Flush all debug areas: | ||
446 | > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush | ||
447 | |||
448 | Changing the size of debug areas | ||
449 | ------------------------------------ | ||
450 | It is possible the change the size of debug areas through piping | ||
451 | the number of pages to the debugfs file "pages". The resize request will | ||
452 | also flush the debug areas. | ||
453 | |||
454 | Example: | ||
455 | |||
456 | Define 4 pages for the debug areas of debug feature "dasd": | ||
457 | > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages | ||
458 | |||
459 | Stooping the debug feature | ||
460 | -------------------------- | ||
461 | Example: | ||
462 | |||
463 | 1. Check if stopping is allowed | ||
464 | > cat /proc/sys/s390dbf/debug_stoppable | ||
465 | 2. Stop debug feature | ||
466 | > echo 0 > /proc/sys/s390dbf/debug_active | ||
467 | |||
468 | lcrash Interface | ||
469 | ---------------- | ||
470 | It is planned that the dump analysis tool lcrash gets an additional command | ||
471 | 's390dbf' to display all the debug logs. With this tool it will be possible | ||
472 | to investigate the debug logs on a live system and with a memory dump after | ||
473 | a system crash. | ||
474 | |||
475 | Investigating raw memory | ||
476 | ------------------------ | ||
477 | One last possibility to investigate the debug logs at a live | ||
478 | system and after a system crash is to look at the raw memory | ||
479 | under VM or at the Service Element. | ||
480 | It is possible to find the anker of the debug-logs through | ||
481 | the 'debug_area_first' symbol in the System map. Then one has | ||
482 | to follow the correct pointers of the data-structures defined | ||
483 | in debug.h and find the debug-areas in memory. | ||
484 | Normally modules which use the debug feature will also have | ||
485 | a global variable with the pointer to the debug-logs. Following | ||
486 | this pointer it will also be possible to find the debug logs in | ||
487 | memory. | ||
488 | |||
489 | For this method it is recommended to use '16 * x + 4' byte (x = 0..n) | ||
490 | for the length of the data field in debug_register() in | ||
491 | order to see the debug entries well formatted. | ||
492 | |||
493 | |||
494 | Predefined Views | ||
495 | ---------------- | ||
496 | |||
497 | There are three predefined views: hex_ascii, raw and sprintf. | ||
498 | The hex_ascii view shows the data field in hex and ascii representation | ||
499 | (e.g. '45 43 4b 44 | ECKD'). | ||
500 | The raw view returns a bytestream as the debug areas are stored in memory. | ||
501 | |||
502 | The sprintf view formats the debug entries in the same way as the sprintf | ||
503 | function would do. The sprintf event/exception functions write to the | ||
504 | debug entry a pointer to the format string (size = sizeof(long)) | ||
505 | and for each vararg a long value. So e.g. for a debug entry with a format | ||
506 | string plus two varargs one would need to allocate a (3 * sizeof(long)) | ||
507 | byte data area in the debug_register() function. | ||
508 | |||
509 | IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only | ||
510 | use "%s" in the sprintf event functions, if the memory for the passed string is | ||
511 | available as long as the debug feature exists. The reason behind this is that | ||
512 | due to performance considerations only a pointer to the string is stored in | ||
513 | the debug feature. If you log a string that is freed afterwards, you will get | ||
514 | an OOPS when inspecting the debug feature, because then the debug feature will | ||
515 | access the already freed memory. | ||
516 | |||
517 | NOTE: If using the sprintf view do NOT use other event/exception functions | ||
518 | than the sprintf-event and -exception functions. | ||
519 | |||
520 | The format of the hex_ascii and sprintf view is as follows: | ||
521 | - Number of area | ||
522 | - Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated | ||
523 | Universal Time (UTC), January 1, 1970) | ||
524 | - level of debug entry | ||
525 | - Exception flag (* = Exception) | ||
526 | - Cpu-Number of calling task | ||
527 | - Return Address to caller | ||
528 | - data field | ||
529 | |||
530 | The format of the raw view is: | ||
531 | - Header as described in debug.h | ||
532 | - datafield | ||
533 | |||
534 | A typical line of the hex_ascii view will look like the following (first line | ||
535 | is only for explanation and will not be displayed when 'cating' the view): | ||
536 | |||
537 | area time level exception cpu caller data (hex + ascii) | ||
538 | -------------------------------------------------------------------------- | ||
539 | 00 00964419409:440690 1 - 00 88023fe | ||
540 | |||
541 | |||
542 | Defining views | ||
543 | -------------- | ||
544 | |||
545 | Views are specified with the 'debug_view' structure. There are defined | ||
546 | callback functions which are used for reading and writing the debugfs files: | ||
547 | |||
548 | struct debug_view { | ||
549 | char name[DEBUG_MAX_PROCF_LEN]; | ||
550 | debug_prolog_proc_t* prolog_proc; | ||
551 | debug_header_proc_t* header_proc; | ||
552 | debug_format_proc_t* format_proc; | ||
553 | debug_input_proc_t* input_proc; | ||
554 | void* private_data; | ||
555 | }; | ||
556 | |||
557 | where | ||
558 | |||
559 | typedef int (debug_header_proc_t) (debug_info_t* id, | ||
560 | struct debug_view* view, | ||
561 | int area, | ||
562 | debug_entry_t* entry, | ||
563 | char* out_buf); | ||
564 | |||
565 | typedef int (debug_format_proc_t) (debug_info_t* id, | ||
566 | struct debug_view* view, char* out_buf, | ||
567 | const char* in_buf); | ||
568 | typedef int (debug_prolog_proc_t) (debug_info_t* id, | ||
569 | struct debug_view* view, | ||
570 | char* out_buf); | ||
571 | typedef int (debug_input_proc_t) (debug_info_t* id, | ||
572 | struct debug_view* view, | ||
573 | struct file* file, const char* user_buf, | ||
574 | size_t in_buf_size, loff_t* offset); | ||
575 | |||
576 | |||
577 | The "private_data" member can be used as pointer to view specific data. | ||
578 | It is not used by the debug feature itself. | ||
579 | |||
580 | The output when reading a debugfs file is structured like this: | ||
581 | |||
582 | "prolog_proc output" | ||
583 | |||
584 | "header_proc output 1" "format_proc output 1" | ||
585 | "header_proc output 2" "format_proc output 2" | ||
586 | "header_proc output 3" "format_proc output 3" | ||
587 | ... | ||
588 | |||
589 | When a view is read from the debugfs, the Debug Feature calls the | ||
590 | 'prolog_proc' once for writing the prolog. | ||
591 | Then 'header_proc' and 'format_proc' are called for each | ||
592 | existing debug entry. | ||
593 | |||
594 | The input_proc can be used to implement functionality when it is written to | ||
595 | the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level). | ||
596 | |||
597 | For header_proc there can be used the default function | ||
598 | debug_dflt_header_fn() which is defined in debug.h. | ||
599 | and which produces the same header output as the predefined views. | ||
600 | E.g: | ||
601 | 00 00964419409:440761 2 - 00 88023ec | ||
602 | |||
603 | In order to see how to use the callback functions check the implementation | ||
604 | of the default views! | ||
605 | |||
606 | Example | ||
607 | |||
608 | #include <asm/debug.h> | ||
609 | |||
610 | #define UNKNOWNSTR "data: %08x" | ||
611 | |||
612 | const char* messages[] = | ||
613 | {"This error...........\n", | ||
614 | "That error...........\n", | ||
615 | "Problem..............\n", | ||
616 | "Something went wrong.\n", | ||
617 | "Everything ok........\n", | ||
618 | NULL | ||
619 | }; | ||
620 | |||
621 | static int debug_test_format_fn( | ||
622 | debug_info_t * id, struct debug_view *view, | ||
623 | char *out_buf, const char *in_buf | ||
624 | ) | ||
625 | { | ||
626 | int i, rc = 0; | ||
627 | |||
628 | if(id->buf_size >= 4) { | ||
629 | int msg_nr = *((int*)in_buf); | ||
630 | if(msg_nr < sizeof(messages)/sizeof(char*) - 1) | ||
631 | rc += sprintf(out_buf, "%s", messages[msg_nr]); | ||
632 | else | ||
633 | rc += sprintf(out_buf, UNKNOWNSTR, msg_nr); | ||
634 | } | ||
635 | out: | ||
636 | return rc; | ||
637 | } | ||
638 | |||
639 | struct debug_view debug_test_view = { | ||
640 | "myview", /* name of view */ | ||
641 | NULL, /* no prolog */ | ||
642 | &debug_dflt_header_fn, /* default header for each entry */ | ||
643 | &debug_test_format_fn, /* our own format function */ | ||
644 | NULL, /* no input function */ | ||
645 | NULL /* no private data */ | ||
646 | }; | ||
647 | |||
648 | ===== | ||
649 | test: | ||
650 | ===== | ||
651 | debug_info_t *debug_info; | ||
652 | ... | ||
653 | debug_info = debug_register ("test", 0, 4, 4 )); | ||
654 | debug_register_view(debug_info, &debug_test_view); | ||
655 | for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i); | ||
656 | |||
657 | > cat /sys/kernel/debug/s390dbf/test/myview | ||
658 | 00 00964419734:611402 1 - 00 88042ca This error........... | ||
659 | 00 00964419734:611405 1 - 00 88042ca That error........... | ||
660 | 00 00964419734:611408 1 - 00 88042ca Problem.............. | ||
661 | 00 00964419734:611411 1 - 00 88042ca Something went wrong. | ||
662 | 00 00964419734:611414 1 - 00 88042ca Everything ok........ | ||
663 | 00 00964419734:611417 1 - 00 88042ca data: 00000005 | ||
664 | 00 00964419734:611419 1 - 00 88042ca data: 00000006 | ||
665 | 00 00964419734:611422 1 - 00 88042ca data: 00000007 | ||
666 | 00 00964419734:611425 1 - 00 88042ca data: 00000008 | ||
667 | 00 00964419734:611428 1 - 00 88042ca data: 00000009 | ||
diff --git a/Documentation/s390/text_files.rst b/Documentation/s390/text_files.rst new file mode 100644 index 000000000000..c94d05d4fa17 --- /dev/null +++ b/Documentation/s390/text_files.rst | |||
@@ -0,0 +1,11 @@ | |||
1 | ibm 3270 changelog | ||
2 | ------------------ | ||
3 | |||
4 | .. include:: 3270.ChangeLog | ||
5 | :literal: | ||
6 | |||
7 | ibm 3270 config3270.sh | ||
8 | ---------------------- | ||
9 | |||
10 | .. literalinclude:: config3270.sh | ||
11 | :language: shell | ||
diff --git a/Documentation/s390/vfio-ap.txt b/Documentation/s390/vfio-ap.rst index 65167cfe4485..b5c51f7c748d 100644 --- a/Documentation/s390/vfio-ap.txt +++ b/Documentation/s390/vfio-ap.rst | |||
@@ -1,4 +1,9 @@ | |||
1 | Introduction: | 1 | =============================== |
2 | Adjunct Processor (AP) facility | ||
3 | =============================== | ||
4 | |||
5 | |||
6 | Introduction | ||
2 | ============ | 7 | ============ |
3 | The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised | 8 | The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised |
4 | of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards. | 9 | of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards. |
@@ -11,7 +16,7 @@ framework. This implementation relies considerably on the s390 virtualization | |||
11 | facilities which do most of the hard work of providing direct access to AP | 16 | facilities which do most of the hard work of providing direct access to AP |
12 | devices. | 17 | devices. |
13 | 18 | ||
14 | AP Architectural Overview: | 19 | AP Architectural Overview |
15 | ========================= | 20 | ========================= |
16 | To facilitate the comprehension of the design, let's start with some | 21 | To facilitate the comprehension of the design, let's start with some |
17 | definitions: | 22 | definitions: |
@@ -31,13 +36,13 @@ definitions: | |||
31 | in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and | 36 | in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and |
32 | creates a sysfs device for each assigned adapter. For example, if AP adapters | 37 | creates a sysfs device for each assigned adapter. For example, if AP adapters |
33 | 4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following | 38 | 4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following |
34 | sysfs device entries: | 39 | sysfs device entries:: |
35 | 40 | ||
36 | /sys/devices/ap/card04 | 41 | /sys/devices/ap/card04 |
37 | /sys/devices/ap/card0a | 42 | /sys/devices/ap/card0a |
38 | 43 | ||
39 | Symbolic links to these devices will also be created in the AP bus devices | 44 | Symbolic links to these devices will also be created in the AP bus devices |
40 | sub-directory: | 45 | sub-directory:: |
41 | 46 | ||
42 | /sys/bus/ap/devices/[card04] | 47 | /sys/bus/ap/devices/[card04] |
43 | /sys/bus/ap/devices/[card04] | 48 | /sys/bus/ap/devices/[card04] |
@@ -84,7 +89,7 @@ definitions: | |||
84 | the cross product of the AP adapter and usage domain numbers detected when the | 89 | the cross product of the AP adapter and usage domain numbers detected when the |
85 | AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage | 90 | AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage |
86 | domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the | 91 | domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the |
87 | following sysfs entries: | 92 | following sysfs entries:: |
88 | 93 | ||
89 | /sys/devices/ap/card04/04.0006 | 94 | /sys/devices/ap/card04/04.0006 |
90 | /sys/devices/ap/card04/04.0047 | 95 | /sys/devices/ap/card04/04.0047 |
@@ -92,7 +97,7 @@ definitions: | |||
92 | /sys/devices/ap/card0a/0a.0047 | 97 | /sys/devices/ap/card0a/0a.0047 |
93 | 98 | ||
94 | The following symbolic links to these devices will be created in the AP bus | 99 | The following symbolic links to these devices will be created in the AP bus |
95 | devices subdirectory: | 100 | devices subdirectory:: |
96 | 101 | ||
97 | /sys/bus/ap/devices/[04.0006] | 102 | /sys/bus/ap/devices/[04.0006] |
98 | /sys/bus/ap/devices/[04.0047] | 103 | /sys/bus/ap/devices/[04.0047] |
@@ -112,7 +117,7 @@ definitions: | |||
112 | domain that is not one of the usage domains, but the modified domain | 117 | domain that is not one of the usage domains, but the modified domain |
113 | must be one of the control domains. | 118 | must be one of the control domains. |
114 | 119 | ||
115 | AP and SIE: | 120 | AP and SIE |
116 | ========== | 121 | ========== |
117 | Let's now take a look at how AP instructions executed on a guest are interpreted | 122 | Let's now take a look at how AP instructions executed on a guest are interpreted |
118 | by the hardware. | 123 | by the hardware. |
@@ -153,7 +158,7 @@ and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6), | |||
153 | 158 | ||
154 | The APQNs can provide secure key functionality - i.e., a private key is stored | 159 | The APQNs can provide secure key functionality - i.e., a private key is stored |
155 | on the adapter card for each of its domains - so each APQN must be assigned to | 160 | on the adapter card for each of its domains - so each APQN must be assigned to |
156 | at most one guest or to the linux host. | 161 | at most one guest or to the linux host:: |
157 | 162 | ||
158 | Example 1: Valid configuration: | 163 | Example 1: Valid configuration: |
159 | ------------------------------ | 164 | ------------------------------ |
@@ -181,8 +186,8 @@ at most one guest or to the linux host. | |||
181 | This is an invalid configuration because both guests have access to | 186 | This is an invalid configuration because both guests have access to |
182 | APQN (1,6). | 187 | APQN (1,6). |
183 | 188 | ||
184 | The Design: | 189 | The Design |
185 | =========== | 190 | ========== |
186 | The design introduces three new objects: | 191 | The design introduces three new objects: |
187 | 192 | ||
188 | 1. AP matrix device | 193 | 1. AP matrix device |
@@ -205,43 +210,43 @@ The VFIO AP (vfio_ap) device driver serves the following purposes: | |||
205 | Reserve APQNs for exclusive use of KVM guests | 210 | Reserve APQNs for exclusive use of KVM guests |
206 | --------------------------------------------- | 211 | --------------------------------------------- |
207 | The following block diagram illustrates the mechanism by which APQNs are | 212 | The following block diagram illustrates the mechanism by which APQNs are |
208 | reserved: | 213 | reserved:: |
209 | 214 | ||
210 | +------------------+ | 215 | +------------------+ |
211 | 7 remove | | | 216 | 7 remove | | |
212 | +--------------------> cex4queue driver | | 217 | +--------------------> cex4queue driver | |
213 | | | | | 218 | | | | |
214 | | +------------------+ | 219 | | +------------------+ |
215 | | | 220 | | |
216 | | | 221 | | |
217 | | +------------------+ +-----------------+ | 222 | | +------------------+ +----------------+ |
218 | | 5 register driver | | 3 create | | | 223 | | 5 register driver | | 3 create | | |
219 | | +----------------> Device core +----------> matrix device | | 224 | | +----------------> Device core +----------> matrix device | |
220 | | | | | | | | 225 | | | | | | | |
221 | | | +--------^---------+ +-----------------+ | 226 | | | +--------^---------+ +----------------+ |
222 | | | | | 227 | | | | |
223 | | | +-------------------+ | 228 | | | +-------------------+ |
224 | | | +-----------------------------------+ | | 229 | | | +-----------------------------------+ | |
225 | | | | 4 register AP driver | | 2 register device | 230 | | | | 4 register AP driver | | 2 register device |
226 | | | | | | | 231 | | | | | | |
227 | +--------+---+-v---+ +--------+-------+-+ | 232 | +--------+---+-v---+ +--------+-------+-+ |
228 | | | | | | 233 | | | | | |
229 | | ap_bus +--------------------- > vfio_ap driver | | 234 | | ap_bus +--------------------- > vfio_ap driver | |
230 | | | 8 probe | | | 235 | | | 8 probe | | |
231 | +--------^---------+ +--^--^------------+ | 236 | +--------^---------+ +--^--^------------+ |
232 | 6 edit | | | | 237 | 6 edit | | | |
233 | apmask | +-----------------------------+ | 9 mdev create | 238 | apmask | +-----------------------------+ | 9 mdev create |
234 | aqmask | | 1 modprobe | | 239 | aqmask | | 1 modprobe | |
235 | +--------+-----+---+ +----------------+-+ +------------------+ | 240 | +--------+-----+---+ +----------------+-+ +----------------+ |
236 | | | | |8 create | mediated | | 241 | | | | |8 create | mediated | |
237 | | admin | | VFIO device core |---------> matrix | | 242 | | admin | | VFIO device core |---------> matrix | |
238 | | + | | | device | | 243 | | + | | | device | |
239 | +------+-+---------+ +--------^---------+ +--------^---------+ | 244 | +------+-+---------+ +--------^---------+ +--------^-------+ |
240 | | | | | | 245 | | | | | |
241 | | | 9 create vfio_ap-passthrough | | | 246 | | | 9 create vfio_ap-passthrough | | |
242 | | +------------------------------+ | | 247 | | +------------------------------+ | |
243 | +-------------------------------------------------------------+ | 248 | +-------------------------------------------------------------+ |
244 | 10 assign adapter/domain/control domain | 249 | 10 assign adapter/domain/control domain |
245 | 250 | ||
246 | The process for reserving an AP queue for use by a KVM guest is: | 251 | The process for reserving an AP queue for use by a KVM guest is: |
247 | 252 | ||
@@ -250,7 +255,7 @@ The process for reserving an AP queue for use by a KVM guest is: | |||
250 | device with the device core. This will serve as the parent device for | 255 | device with the device core. This will serve as the parent device for |
251 | all mediated matrix devices used to configure an AP matrix for a guest. | 256 | all mediated matrix devices used to configure an AP matrix for a guest. |
252 | 3. The /sys/devices/vfio_ap/matrix device is created by the device core | 257 | 3. The /sys/devices/vfio_ap/matrix device is created by the device core |
253 | 4 The vfio_ap device driver will register with the AP bus for AP queue devices | 258 | 4. The vfio_ap device driver will register with the AP bus for AP queue devices |
254 | of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap | 259 | of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap |
255 | driver's probe and remove callback interfaces. Devices older than CEX4 queues | 260 | driver's probe and remove callback interfaces. Devices older than CEX4 queues |
256 | are not supported to simplify the implementation by not needlessly | 261 | are not supported to simplify the implementation by not needlessly |
@@ -266,13 +271,14 @@ The process for reserving an AP queue for use by a KVM guest is: | |||
266 | it. | 271 | it. |
267 | 9. The administrator creates a passthrough type mediated matrix device to be | 272 | 9. The administrator creates a passthrough type mediated matrix device to be |
268 | used by a guest | 273 | used by a guest |
269 | 10 The administrator assigns the adapters, usage domains and control domains | 274 | 10. The administrator assigns the adapters, usage domains and control domains |
270 | to be exclusively used by a guest. | 275 | to be exclusively used by a guest. |
271 | 276 | ||
272 | Set up the VFIO mediated device interfaces | 277 | Set up the VFIO mediated device interfaces |
273 | ------------------------------------------ | 278 | ------------------------------------------ |
274 | The VFIO AP device driver utilizes the common interface of the VFIO mediated | 279 | The VFIO AP device driver utilizes the common interface of the VFIO mediated |
275 | device core driver to: | 280 | device core driver to: |
281 | |||
276 | * Register an AP mediated bus driver to add a mediated matrix device to and | 282 | * Register an AP mediated bus driver to add a mediated matrix device to and |
277 | remove it from a VFIO group. | 283 | remove it from a VFIO group. |
278 | * Create and destroy a mediated matrix device | 284 | * Create and destroy a mediated matrix device |
@@ -280,25 +286,25 @@ device core driver to: | |||
280 | * Add a mediated matrix device to and remove it from an IOMMU group | 286 | * Add a mediated matrix device to and remove it from an IOMMU group |
281 | 287 | ||
282 | The following high-level block diagram shows the main components and interfaces | 288 | The following high-level block diagram shows the main components and interfaces |
283 | of the VFIO AP mediated matrix device driver: | 289 | of the VFIO AP mediated matrix device driver:: |
284 | 290 | ||
285 | +-------------+ | 291 | +-------------+ |
286 | | | | 292 | | | |
287 | | +---------+ | mdev_register_driver() +--------------+ | 293 | | +---------+ | mdev_register_driver() +--------------+ |
288 | | | Mdev | +<-----------------------+ | | 294 | | | Mdev | +<-----------------------+ | |
289 | | | bus | | | vfio_mdev.ko | | 295 | | | bus | | | vfio_mdev.ko | |
290 | | | driver | +----------------------->+ |<-> VFIO user | 296 | | | driver | +----------------------->+ |<-> VFIO user |
291 | | +---------+ | probe()/remove() +--------------+ APIs | 297 | | +---------+ | probe()/remove() +--------------+ APIs |
292 | | | | 298 | | | |
293 | | MDEV CORE | | 299 | | MDEV CORE | |
294 | | MODULE | | 300 | | MODULE | |
295 | | mdev.ko | | 301 | | mdev.ko | |
296 | | +---------+ | mdev_register_device() +--------------+ | 302 | | +---------+ | mdev_register_device() +--------------+ |
297 | | |Physical | +<-----------------------+ | | 303 | | |Physical | +<-----------------------+ | |
298 | | | device | | | vfio_ap.ko |<-> matrix | 304 | | | device | | | vfio_ap.ko |<-> matrix |
299 | | |interface| +----------------------->+ | device | 305 | | |interface| +----------------------->+ | device |
300 | | +---------+ | callback +--------------+ | 306 | | +---------+ | callback +--------------+ |
301 | +-------------+ | 307 | +-------------+ |
302 | 308 | ||
303 | During initialization of the vfio_ap module, the matrix device is registered | 309 | During initialization of the vfio_ap module, the matrix device is registered |
304 | with an 'mdev_parent_ops' structure that provides the sysfs attribute | 310 | with an 'mdev_parent_ops' structure that provides the sysfs attribute |
@@ -306,7 +312,8 @@ structures, mdev functions and callback interfaces for managing the mediated | |||
306 | matrix device. | 312 | matrix device. |
307 | 313 | ||
308 | * sysfs attribute structures: | 314 | * sysfs attribute structures: |
309 | * supported_type_groups | 315 | |
316 | supported_type_groups | ||
310 | The VFIO mediated device framework supports creation of user-defined | 317 | The VFIO mediated device framework supports creation of user-defined |
311 | mediated device types. These mediated device types are specified | 318 | mediated device types. These mediated device types are specified |
312 | via the 'supported_type_groups' structure when a device is registered | 319 | via the 'supported_type_groups' structure when a device is registered |
@@ -318,61 +325,72 @@ matrix device. | |||
318 | 325 | ||
319 | The VFIO AP device driver will register one mediated device type for | 326 | The VFIO AP device driver will register one mediated device type for |
320 | passthrough devices: | 327 | passthrough devices: |
328 | |||
321 | /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough | 329 | /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough |
330 | |||
322 | Only the read-only attributes required by the VFIO mdev framework will | 331 | Only the read-only attributes required by the VFIO mdev framework will |
323 | be provided: | 332 | be provided:: |
324 | ... name | 333 | |
325 | ... device_api | 334 | ... name |
326 | ... available_instances | 335 | ... device_api |
327 | ... device_api | 336 | ... available_instances |
328 | Where: | 337 | ... device_api |
329 | * name: specifies the name of the mediated device type | 338 | |
330 | * device_api: the mediated device type's API | 339 | Where: |
331 | * available_instances: the number of mediated matrix passthrough devices | 340 | |
332 | that can be created | 341 | * name: |
333 | * device_api: specifies the VFIO API | 342 | specifies the name of the mediated device type |
334 | * mdev_attr_groups | 343 | * device_api: |
344 | the mediated device type's API | ||
345 | * available_instances: | ||
346 | the number of mediated matrix passthrough devices | ||
347 | that can be created | ||
348 | * device_api: | ||
349 | specifies the VFIO API | ||
350 | mdev_attr_groups | ||
335 | This attribute group identifies the user-defined sysfs attributes of the | 351 | This attribute group identifies the user-defined sysfs attributes of the |
336 | mediated device. When a device is registered with the VFIO mediated device | 352 | mediated device. When a device is registered with the VFIO mediated device |
337 | framework, the sysfs attribute files identified in the 'mdev_attr_groups' | 353 | framework, the sysfs attribute files identified in the 'mdev_attr_groups' |
338 | structure will be created in the mediated matrix device's directory. The | 354 | structure will be created in the mediated matrix device's directory. The |
339 | sysfs attributes for a mediated matrix device are: | 355 | sysfs attributes for a mediated matrix device are: |
340 | * assign_adapter: | 356 | |
341 | * unassign_adapter: | 357 | assign_adapter / unassign_adapter: |
342 | Write-only attributes for assigning/unassigning an AP adapter to/from the | 358 | Write-only attributes for assigning/unassigning an AP adapter to/from the |
343 | mediated matrix device. To assign/unassign an adapter, the APID of the | 359 | mediated matrix device. To assign/unassign an adapter, the APID of the |
344 | adapter is echoed to the respective attribute file. | 360 | adapter is echoed to the respective attribute file. |
345 | * assign_domain: | 361 | assign_domain / unassign_domain: |
346 | * unassign_domain: | ||
347 | Write-only attributes for assigning/unassigning an AP usage domain to/from | 362 | Write-only attributes for assigning/unassigning an AP usage domain to/from |
348 | the mediated matrix device. To assign/unassign a domain, the domain | 363 | the mediated matrix device. To assign/unassign a domain, the domain |
349 | number of the the usage domain is echoed to the respective attribute | 364 | number of the the usage domain is echoed to the respective attribute |
350 | file. | 365 | file. |
351 | * matrix: | 366 | matrix: |
352 | A read-only file for displaying the APQNs derived from the cross product | 367 | A read-only file for displaying the APQNs derived from the cross product |
353 | of the adapter and domain numbers assigned to the mediated matrix device. | 368 | of the adapter and domain numbers assigned to the mediated matrix device. |
354 | * assign_control_domain: | 369 | assign_control_domain / unassign_control_domain: |
355 | * unassign_control_domain: | ||
356 | Write-only attributes for assigning/unassigning an AP control domain | 370 | Write-only attributes for assigning/unassigning an AP control domain |
357 | to/from the mediated matrix device. To assign/unassign a control domain, | 371 | to/from the mediated matrix device. To assign/unassign a control domain, |
358 | the ID of the domain to be assigned/unassigned is echoed to the respective | 372 | the ID of the domain to be assigned/unassigned is echoed to the respective |
359 | attribute file. | 373 | attribute file. |
360 | * control_domains: | 374 | control_domains: |
361 | A read-only file for displaying the control domain numbers assigned to the | 375 | A read-only file for displaying the control domain numbers assigned to the |
362 | mediated matrix device. | 376 | mediated matrix device. |
363 | 377 | ||
364 | * functions: | 378 | * functions: |
365 | * create: | 379 | |
380 | create: | ||
366 | allocates the ap_matrix_mdev structure used by the vfio_ap driver to: | 381 | allocates the ap_matrix_mdev structure used by the vfio_ap driver to: |
382 | |||
367 | * Store the reference to the KVM structure for the guest using the mdev | 383 | * Store the reference to the KVM structure for the guest using the mdev |
368 | * Store the AP matrix configuration for the adapters, domains, and control | 384 | * Store the AP matrix configuration for the adapters, domains, and control |
369 | domains assigned via the corresponding sysfs attributes files | 385 | domains assigned via the corresponding sysfs attributes files |
370 | * remove: | 386 | |
387 | remove: | ||
371 | deallocates the mediated matrix device's ap_matrix_mdev structure. This will | 388 | deallocates the mediated matrix device's ap_matrix_mdev structure. This will |
372 | be allowed only if a running guest is not using the mdev. | 389 | be allowed only if a running guest is not using the mdev. |
373 | 390 | ||
374 | * callback interfaces | 391 | * callback interfaces |
375 | * open: | 392 | |
393 | open: | ||
376 | The vfio_ap driver uses this callback to register a | 394 | The vfio_ap driver uses this callback to register a |
377 | VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix | 395 | VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix |
378 | device. The open is invoked when QEMU connects the VFIO iommu group | 396 | device. The open is invoked when QEMU connects the VFIO iommu group |
@@ -380,16 +398,17 @@ matrix device. | |||
380 | to configure the KVM guest is provided via this callback. The KVM structure, | 398 | to configure the KVM guest is provided via this callback. The KVM structure, |
381 | is used to configure the guest's access to the AP matrix defined via the | 399 | is used to configure the guest's access to the AP matrix defined via the |
382 | mediated matrix device's sysfs attribute files. | 400 | mediated matrix device's sysfs attribute files. |
383 | * release: | 401 | release: |
384 | unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the | 402 | unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the |
385 | mdev matrix device and deconfigures the guest's AP matrix. | 403 | mdev matrix device and deconfigures the guest's AP matrix. |
386 | 404 | ||
387 | Configure the APM, AQM and ADM in the CRYCB: | 405 | Configure the APM, AQM and ADM in the CRYCB |
388 | ------------------------------------------- | 406 | ------------------------------------------- |
389 | Configuring the AP matrix for a KVM guest will be performed when the | 407 | Configuring the AP matrix for a KVM guest will be performed when the |
390 | VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier | 408 | VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier |
391 | function is called when QEMU connects to KVM. The guest's AP matrix is | 409 | function is called when QEMU connects to KVM. The guest's AP matrix is |
392 | configured via it's CRYCB by: | 410 | configured via it's CRYCB by: |
411 | |||
393 | * Setting the bits in the APM corresponding to the APIDs assigned to the | 412 | * Setting the bits in the APM corresponding to the APIDs assigned to the |
394 | mediated matrix device via its 'assign_adapter' interface. | 413 | mediated matrix device via its 'assign_adapter' interface. |
395 | * Setting the bits in the AQM corresponding to the domains assigned to the | 414 | * Setting the bits in the AQM corresponding to the domains assigned to the |
@@ -418,12 +437,12 @@ available to a KVM guest via the following CPU model features: | |||
418 | 437 | ||
419 | Note: If the user chooses to specify a CPU model different than the 'host' | 438 | Note: If the user chooses to specify a CPU model different than the 'host' |
420 | model to QEMU, the CPU model features and facilities need to be turned on | 439 | model to QEMU, the CPU model features and facilities need to be turned on |
421 | explicitly; for example: | 440 | explicitly; for example:: |
422 | 441 | ||
423 | /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on | 442 | /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on |
424 | 443 | ||
425 | A guest can be precluded from using AP features/facilities by turning them off | 444 | A guest can be precluded from using AP features/facilities by turning them off |
426 | explicitly; for example: | 445 | explicitly; for example:: |
427 | 446 | ||
428 | /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off | 447 | /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off |
429 | 448 | ||
@@ -435,7 +454,7 @@ the APFT facility is not installed on the guest, then the probe of device | |||
435 | drivers will fail since only type 10 and newer devices can be configured for | 454 | drivers will fail since only type 10 and newer devices can be configured for |
436 | guest use. | 455 | guest use. |
437 | 456 | ||
438 | Example: | 457 | Example |
439 | ======= | 458 | ======= |
440 | Let's now provide an example to illustrate how KVM guests may be given | 459 | Let's now provide an example to illustrate how KVM guests may be given |
441 | access to AP facilities. For this example, we will show how to configure | 460 | access to AP facilities. For this example, we will show how to configure |
@@ -444,30 +463,36 @@ look like this: | |||
444 | 463 | ||
445 | Guest1 | 464 | Guest1 |
446 | ------ | 465 | ------ |
466 | =========== ===== ============ | ||
447 | CARD.DOMAIN TYPE MODE | 467 | CARD.DOMAIN TYPE MODE |
448 | ------------------------------ | 468 | =========== ===== ============ |
449 | 05 CEX5C CCA-Coproc | 469 | 05 CEX5C CCA-Coproc |
450 | 05.0004 CEX5C CCA-Coproc | 470 | 05.0004 CEX5C CCA-Coproc |
451 | 05.00ab CEX5C CCA-Coproc | 471 | 05.00ab CEX5C CCA-Coproc |
452 | 06 CEX5A Accelerator | 472 | 06 CEX5A Accelerator |
453 | 06.0004 CEX5A Accelerator | 473 | 06.0004 CEX5A Accelerator |
454 | 06.00ab CEX5C CCA-Coproc | 474 | 06.00ab CEX5C CCA-Coproc |
475 | =========== ===== ============ | ||
455 | 476 | ||
456 | Guest2 | 477 | Guest2 |
457 | ------ | 478 | ------ |
479 | =========== ===== ============ | ||
458 | CARD.DOMAIN TYPE MODE | 480 | CARD.DOMAIN TYPE MODE |
459 | ------------------------------ | 481 | =========== ===== ============ |
460 | 05 CEX5A Accelerator | 482 | 05 CEX5A Accelerator |
461 | 05.0047 CEX5A Accelerator | 483 | 05.0047 CEX5A Accelerator |
462 | 05.00ff CEX5A Accelerator | 484 | 05.00ff CEX5A Accelerator |
485 | =========== ===== ============ | ||
463 | 486 | ||
464 | Guest2 | 487 | Guest2 |
465 | ------ | 488 | ------ |
489 | =========== ===== ============ | ||
466 | CARD.DOMAIN TYPE MODE | 490 | CARD.DOMAIN TYPE MODE |
467 | ------------------------------ | 491 | =========== ===== ============ |
468 | 06 CEX5A Accelerator | 492 | 06 CEX5A Accelerator |
469 | 06.0047 CEX5A Accelerator | 493 | 06.0047 CEX5A Accelerator |
470 | 06.00ff CEX5A Accelerator | 494 | 06.00ff CEX5A Accelerator |
495 | =========== ===== ============ | ||
471 | 496 | ||
472 | These are the steps: | 497 | These are the steps: |
473 | 498 | ||
@@ -492,25 +517,26 @@ These are the steps: | |||
492 | * VFIO_MDEV_DEVICE | 517 | * VFIO_MDEV_DEVICE |
493 | * KVM | 518 | * KVM |
494 | 519 | ||
495 | If using make menuconfig select the following to build the vfio_ap module: | 520 | If using make menuconfig select the following to build the vfio_ap module:: |
496 | -> Device Drivers | 521 | |
497 | -> IOMMU Hardware Support | 522 | -> Device Drivers |
498 | select S390 AP IOMMU Support | 523 | -> IOMMU Hardware Support |
499 | -> VFIO Non-Privileged userspace driver framework | 524 | select S390 AP IOMMU Support |
500 | -> Mediated device driver frramework | 525 | -> VFIO Non-Privileged userspace driver framework |
501 | -> VFIO driver for Mediated devices | 526 | -> Mediated device driver frramework |
502 | -> I/O subsystem | 527 | -> VFIO driver for Mediated devices |
503 | -> VFIO support for AP devices | 528 | -> I/O subsystem |
529 | -> VFIO support for AP devices | ||
504 | 530 | ||
505 | 2. Secure the AP queues to be used by the three guests so that the host can not | 531 | 2. Secure the AP queues to be used by the three guests so that the host can not |
506 | access them. To secure them, there are two sysfs files that specify | 532 | access them. To secure them, there are two sysfs files that specify |
507 | bitmasks marking a subset of the APQN range as 'usable by the default AP | 533 | bitmasks marking a subset of the APQN range as 'usable by the default AP |
508 | queue device drivers' or 'not usable by the default device drivers' and thus | 534 | queue device drivers' or 'not usable by the default device drivers' and thus |
509 | available for use by the vfio_ap device driver'. The location of the sysfs | 535 | available for use by the vfio_ap device driver'. The location of the sysfs |
510 | files containing the masks are: | 536 | files containing the masks are:: |
511 | 537 | ||
512 | /sys/bus/ap/apmask | 538 | /sys/bus/ap/apmask |
513 | /sys/bus/ap/aqmask | 539 | /sys/bus/ap/aqmask |
514 | 540 | ||
515 | The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs | 541 | The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs |
516 | (APID). Each bit in the mask, from left to right (i.e., from most significant | 542 | (APID). Each bit in the mask, from left to right (i.e., from most significant |
@@ -526,7 +552,7 @@ These are the steps: | |||
526 | queue device drivers; otherwise, the APQI is usable by the vfio_ap device | 552 | queue device drivers; otherwise, the APQI is usable by the vfio_ap device |
527 | driver. | 553 | driver. |
528 | 554 | ||
529 | Take, for example, the following mask: | 555 | Take, for example, the following mask:: |
530 | 556 | ||
531 | 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff | 557 | 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff |
532 | 558 | ||
@@ -548,68 +574,70 @@ These are the steps: | |||
548 | respective sysfs mask file in one of two formats: | 574 | respective sysfs mask file in one of two formats: |
549 | 575 | ||
550 | * An absolute hex string starting with 0x - like "0x12345678" - sets | 576 | * An absolute hex string starting with 0x - like "0x12345678" - sets |
551 | the mask. If the given string is shorter than the mask, it is padded | 577 | the mask. If the given string is shorter than the mask, it is padded |
552 | with 0s on the right; for example, specifying a mask value of 0x41 is | 578 | with 0s on the right; for example, specifying a mask value of 0x41 is |
553 | the same as specifying: | 579 | the same as specifying:: |
554 | 580 | ||
555 | 0x4100000000000000000000000000000000000000000000000000000000000000 | 581 | 0x4100000000000000000000000000000000000000000000000000000000000000 |
556 | 582 | ||
557 | Keep in mind that the mask reads from left to right (i.e., most | 583 | Keep in mind that the mask reads from left to right (i.e., most |
558 | significant to least significant bit in big endian order), so the mask | 584 | significant to least significant bit in big endian order), so the mask |
559 | above identifies device numbers 1 and 7 (01000001). | 585 | above identifies device numbers 1 and 7 (01000001). |
560 | 586 | ||
561 | If the string is longer than the mask, the operation is terminated with | 587 | If the string is longer than the mask, the operation is terminated with |
562 | an error (EINVAL). | 588 | an error (EINVAL). |
563 | 589 | ||
564 | * Individual bits in the mask can be switched on and off by specifying | 590 | * Individual bits in the mask can be switched on and off by specifying |
565 | each bit number to be switched in a comma separated list. Each bit | 591 | each bit number to be switched in a comma separated list. Each bit |
566 | number string must be prepended with a ('+') or minus ('-') to indicate | 592 | number string must be prepended with a ('+') or minus ('-') to indicate |
567 | the corresponding bit is to be switched on ('+') or off ('-'). Some | 593 | the corresponding bit is to be switched on ('+') or off ('-'). Some |
568 | valid values are: | 594 | valid values are: |
569 | 595 | ||
570 | "+0" switches bit 0 on | 596 | - "+0" switches bit 0 on |
571 | "-13" switches bit 13 off | 597 | - "-13" switches bit 13 off |
572 | "+0x41" switches bit 65 on | 598 | - "+0x41" switches bit 65 on |
573 | "-0xff" switches bit 255 off | 599 | - "-0xff" switches bit 255 off |
574 | 600 | ||
575 | The following example: | 601 | The following example: |
576 | +0,-6,+0x47,-0xf0 | ||
577 | 602 | ||
578 | Switches bits 0 and 71 (0x47) on | 603 | +0,-6,+0x47,-0xf0 |
579 | Switches bits 6 and 240 (0xf0) off | ||
580 | 604 | ||
581 | Note that the bits not specified in the list remain as they were before | 605 | Switches bits 0 and 71 (0x47) on |
582 | the operation. | 606 | |
607 | Switches bits 6 and 240 (0xf0) off | ||
608 | |||
609 | Note that the bits not specified in the list remain as they were before | ||
610 | the operation. | ||
583 | 611 | ||
584 | 2. The masks can also be changed at boot time via parameters on the kernel | 612 | 2. The masks can also be changed at boot time via parameters on the kernel |
585 | command line like this: | 613 | command line like this: |
586 | 614 | ||
587 | ap.apmask=0xffff ap.aqmask=0x40 | 615 | ap.apmask=0xffff ap.aqmask=0x40 |
588 | 616 | ||
589 | This would create the following masks: | 617 | This would create the following masks:: |
590 | 618 | ||
591 | apmask: | 619 | apmask: |
592 | 0xffff000000000000000000000000000000000000000000000000000000000000 | 620 | 0xffff000000000000000000000000000000000000000000000000000000000000 |
593 | 621 | ||
594 | aqmask: | 622 | aqmask: |
595 | 0x4000000000000000000000000000000000000000000000000000000000000000 | 623 | 0x4000000000000000000000000000000000000000000000000000000000000000 |
596 | 624 | ||
597 | Resulting in these two pools: | 625 | Resulting in these two pools:: |
598 | 626 | ||
599 | default drivers pool: adapter 0-15, domain 1 | 627 | default drivers pool: adapter 0-15, domain 1 |
600 | alternate drivers pool: adapter 16-255, domains 0, 2-255 | 628 | alternate drivers pool: adapter 16-255, domains 0, 2-255 |
601 | 629 | ||
602 | Securing the APQNs for our example: | 630 | Securing the APQNs for our example |
603 | ---------------------------------- | 631 | ---------------------------------- |
604 | To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047, | 632 | To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047, |
605 | 06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding | 633 | 06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding |
606 | APQNs can either be removed from the default masks: | 634 | APQNs can either be removed from the default masks:: |
607 | 635 | ||
608 | echo -5,-6 > /sys/bus/ap/apmask | 636 | echo -5,-6 > /sys/bus/ap/apmask |
609 | 637 | ||
610 | echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask | 638 | echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask |
611 | 639 | ||
612 | Or the masks can be set as follows: | 640 | Or the masks can be set as follows:: |
613 | 641 | ||
614 | echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \ | 642 | echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \ |
615 | > apmask | 643 | > apmask |
@@ -620,19 +648,19 @@ These are the steps: | |||
620 | This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, | 648 | This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, |
621 | 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The | 649 | 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The |
622 | sysfs directory for the vfio_ap device driver will now contain symbolic links | 650 | sysfs directory for the vfio_ap device driver will now contain symbolic links |
623 | to the AP queue devices bound to it: | 651 | to the AP queue devices bound to it:: |
624 | 652 | ||
625 | /sys/bus/ap | 653 | /sys/bus/ap |
626 | ... [drivers] | 654 | ... [drivers] |
627 | ...... [vfio_ap] | 655 | ...... [vfio_ap] |
628 | ......... [05.0004] | 656 | ......... [05.0004] |
629 | ......... [05.0047] | 657 | ......... [05.0047] |
630 | ......... [05.00ab] | 658 | ......... [05.00ab] |
631 | ......... [05.00ff] | 659 | ......... [05.00ff] |
632 | ......... [06.0004] | 660 | ......... [06.0004] |
633 | ......... [06.0047] | 661 | ......... [06.0047] |
634 | ......... [06.00ab] | 662 | ......... [06.00ab] |
635 | ......... [06.00ff] | 663 | ......... [06.00ff] |
636 | 664 | ||
637 | Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later) | 665 | Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later) |
638 | can be bound to the vfio_ap device driver. The reason for this is to | 666 | can be bound to the vfio_ap device driver. The reason for this is to |
@@ -645,96 +673,96 @@ These are the steps: | |||
645 | queue device can be read from the parent card's sysfs directory. For example, | 673 | queue device can be read from the parent card's sysfs directory. For example, |
646 | to see the hardware type of the queue 05.0004: | 674 | to see the hardware type of the queue 05.0004: |
647 | 675 | ||
648 | cat /sys/bus/ap/devices/card05/hwtype | 676 | cat /sys/bus/ap/devices/card05/hwtype |
649 | 677 | ||
650 | The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the | 678 | The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the |
651 | vfio_ap device driver. | 679 | vfio_ap device driver. |
652 | 680 | ||
653 | 3. Create the mediated devices needed to configure the AP matrixes for the | 681 | 3. Create the mediated devices needed to configure the AP matrixes for the |
654 | three guests and to provide an interface to the vfio_ap driver for | 682 | three guests and to provide an interface to the vfio_ap driver for |
655 | use by the guests: | 683 | use by the guests:: |
656 | 684 | ||
657 | /sys/devices/vfio_ap/matrix/ | 685 | /sys/devices/vfio_ap/matrix/ |
658 | --- [mdev_supported_types] | 686 | --- [mdev_supported_types] |
659 | ------ [vfio_ap-passthrough] (passthrough mediated matrix device type) | 687 | ------ [vfio_ap-passthrough] (passthrough mediated matrix device type) |
660 | --------- create | 688 | --------- create |
661 | --------- [devices] | 689 | --------- [devices] |
662 | 690 | ||
663 | To create the mediated devices for the three guests: | 691 | To create the mediated devices for the three guests:: |
664 | 692 | ||
665 | uuidgen > create | 693 | uuidgen > create |
666 | uuidgen > create | 694 | uuidgen > create |
667 | uuidgen > create | 695 | uuidgen > create |
668 | 696 | ||
669 | or | 697 | or |
670 | 698 | ||
671 | echo $uuid1 > create | 699 | echo $uuid1 > create |
672 | echo $uuid2 > create | 700 | echo $uuid2 > create |
673 | echo $uuid3 > create | 701 | echo $uuid3 > create |
674 | 702 | ||
675 | This will create three mediated devices in the [devices] subdirectory named | 703 | This will create three mediated devices in the [devices] subdirectory named |
676 | after the UUID written to the create attribute file. We call them $uuid1, | 704 | after the UUID written to the create attribute file. We call them $uuid1, |
677 | $uuid2 and $uuid3 and this is the sysfs directory structure after creation: | 705 | $uuid2 and $uuid3 and this is the sysfs directory structure after creation:: |
678 | 706 | ||
679 | /sys/devices/vfio_ap/matrix/ | 707 | /sys/devices/vfio_ap/matrix/ |
680 | --- [mdev_supported_types] | 708 | --- [mdev_supported_types] |
681 | ------ [vfio_ap-passthrough] | 709 | ------ [vfio_ap-passthrough] |
682 | --------- [devices] | 710 | --------- [devices] |
683 | ------------ [$uuid1] | 711 | ------------ [$uuid1] |
684 | --------------- assign_adapter | 712 | --------------- assign_adapter |
685 | --------------- assign_control_domain | 713 | --------------- assign_control_domain |
686 | --------------- assign_domain | 714 | --------------- assign_domain |
687 | --------------- matrix | 715 | --------------- matrix |
688 | --------------- unassign_adapter | 716 | --------------- unassign_adapter |
689 | --------------- unassign_control_domain | 717 | --------------- unassign_control_domain |
690 | --------------- unassign_domain | 718 | --------------- unassign_domain |
691 | 719 | ||
692 | ------------ [$uuid2] | 720 | ------------ [$uuid2] |
693 | --------------- assign_adapter | 721 | --------------- assign_adapter |
694 | --------------- assign_control_domain | 722 | --------------- assign_control_domain |
695 | --------------- assign_domain | 723 | --------------- assign_domain |
696 | --------------- matrix | 724 | --------------- matrix |
697 | --------------- unassign_adapter | 725 | --------------- unassign_adapter |
698 | ----------------unassign_control_domain | 726 | ----------------unassign_control_domain |
699 | ----------------unassign_domain | 727 | ----------------unassign_domain |
700 | 728 | ||
701 | ------------ [$uuid3] | 729 | ------------ [$uuid3] |
702 | --------------- assign_adapter | 730 | --------------- assign_adapter |
703 | --------------- assign_control_domain | 731 | --------------- assign_control_domain |
704 | --------------- assign_domain | 732 | --------------- assign_domain |
705 | --------------- matrix | 733 | --------------- matrix |
706 | --------------- unassign_adapter | 734 | --------------- unassign_adapter |
707 | ----------------unassign_control_domain | 735 | ----------------unassign_control_domain |
708 | ----------------unassign_domain | 736 | ----------------unassign_domain |
709 | 737 | ||
710 | 4. The administrator now needs to configure the matrixes for the mediated | 738 | 4. The administrator now needs to configure the matrixes for the mediated |
711 | devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3). | 739 | devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3). |
712 | 740 | ||
713 | This is how the matrix is configured for Guest1: | 741 | This is how the matrix is configured for Guest1:: |
714 | 742 | ||
715 | echo 5 > assign_adapter | 743 | echo 5 > assign_adapter |
716 | echo 6 > assign_adapter | 744 | echo 6 > assign_adapter |
717 | echo 4 > assign_domain | 745 | echo 4 > assign_domain |
718 | echo 0xab > assign_domain | 746 | echo 0xab > assign_domain |
719 | 747 | ||
720 | Control domains can similarly be assigned using the assign_control_domain | 748 | Control domains can similarly be assigned using the assign_control_domain |
721 | sysfs file. | 749 | sysfs file. |
722 | 750 | ||
723 | If a mistake is made configuring an adapter, domain or control domain, | 751 | If a mistake is made configuring an adapter, domain or control domain, |
724 | you can use the unassign_xxx files to unassign the adapter, domain or | 752 | you can use the unassign_xxx files to unassign the adapter, domain or |
725 | control domain. | 753 | control domain. |
726 | 754 | ||
727 | To display the matrix configuration for Guest1: | 755 | To display the matrix configuration for Guest1:: |
728 | 756 | ||
729 | cat matrix | 757 | cat matrix |
730 | 758 | ||
731 | This is how the matrix is configured for Guest2: | 759 | This is how the matrix is configured for Guest2:: |
732 | 760 | ||
733 | echo 5 > assign_adapter | 761 | echo 5 > assign_adapter |
734 | echo 0x47 > assign_domain | 762 | echo 0x47 > assign_domain |
735 | echo 0xff > assign_domain | 763 | echo 0xff > assign_domain |
736 | 764 | ||
737 | This is how the matrix is configured for Guest3: | 765 | This is how the matrix is configured for Guest3:: |
738 | 766 | ||
739 | echo 6 > assign_adapter | 767 | echo 6 > assign_adapter |
740 | echo 0x47 > assign_domain | 768 | echo 0x47 > assign_domain |
@@ -783,24 +811,24 @@ These are the steps: | |||
783 | configured for the system. If a control domain number higher than the maximum | 811 | configured for the system. If a control domain number higher than the maximum |
784 | is specified, the operation will terminate with an error (ENODEV). | 812 | is specified, the operation will terminate with an error (ENODEV). |
785 | 813 | ||
786 | 5. Start Guest1: | 814 | 5. Start Guest1:: |
787 | 815 | ||
788 | /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ | 816 | /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ |
789 | -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ... | 817 | -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ... |
790 | 818 | ||
791 | 7. Start Guest2: | 819 | 7. Start Guest2:: |
792 | 820 | ||
793 | /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ | 821 | /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ |
794 | -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ... | 822 | -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ... |
795 | 823 | ||
796 | 7. Start Guest3: | 824 | 7. Start Guest3:: |
797 | 825 | ||
798 | /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ | 826 | /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ |
799 | -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ... | 827 | -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ... |
800 | 828 | ||
801 | When the guest is shut down, the mediated matrix devices may be removed. | 829 | When the guest is shut down, the mediated matrix devices may be removed. |
802 | 830 | ||
803 | Using our example again, to remove the mediated matrix device $uuid1: | 831 | Using our example again, to remove the mediated matrix device $uuid1:: |
804 | 832 | ||
805 | /sys/devices/vfio_ap/matrix/ | 833 | /sys/devices/vfio_ap/matrix/ |
806 | --- [mdev_supported_types] | 834 | --- [mdev_supported_types] |
@@ -809,18 +837,19 @@ Using our example again, to remove the mediated matrix device $uuid1: | |||
809 | ------------ [$uuid1] | 837 | ------------ [$uuid1] |
810 | --------------- remove | 838 | --------------- remove |
811 | 839 | ||
840 | :: | ||
812 | 841 | ||
813 | echo 1 > remove | 842 | echo 1 > remove |
814 | 843 | ||
815 | This will remove all of the mdev matrix device's sysfs structures including | 844 | This will remove all of the mdev matrix device's sysfs structures including |
816 | the mdev device itself. To recreate and reconfigure the mdev matrix device, | 845 | the mdev device itself. To recreate and reconfigure the mdev matrix device, |
817 | all of the steps starting with step 3 will have to be performed again. Note | 846 | all of the steps starting with step 3 will have to be performed again. Note |
818 | that the remove will fail if a guest using the mdev is still running. | 847 | that the remove will fail if a guest using the mdev is still running. |
819 | 848 | ||
820 | It is not necessary to remove an mdev matrix device, but one may want to | 849 | It is not necessary to remove an mdev matrix device, but one may want to |
821 | remove it if no guest will use it during the remaining lifetime of the linux | 850 | remove it if no guest will use it during the remaining lifetime of the linux |
822 | host. If the mdev matrix device is removed, one may want to also reconfigure | 851 | host. If the mdev matrix device is removed, one may want to also reconfigure |
823 | the pool of adapters and queues reserved for use by the default drivers. | 852 | the pool of adapters and queues reserved for use by the default drivers. |
824 | 853 | ||
825 | Limitations | 854 | Limitations |
826 | =========== | 855 | =========== |
diff --git a/Documentation/s390/vfio-ccw.txt b/Documentation/s390/vfio-ccw.rst index 2be11ad864ff..1f6d0b56d53e 100644 --- a/Documentation/s390/vfio-ccw.txt +++ b/Documentation/s390/vfio-ccw.rst | |||
@@ -1,3 +1,4 @@ | |||
1 | ================================== | ||
1 | vfio-ccw: the basic infrastructure | 2 | vfio-ccw: the basic infrastructure |
2 | ================================== | 3 | ================================== |
3 | 4 | ||
@@ -11,9 +12,11 @@ virtual machine, while vfio is the means. | |||
11 | Different than other hardware architectures, s390 has defined a unified | 12 | Different than other hardware architectures, s390 has defined a unified |
12 | I/O access method, which is so called Channel I/O. It has its own access | 13 | I/O access method, which is so called Channel I/O. It has its own access |
13 | patterns: | 14 | patterns: |
15 | |||
14 | - Channel programs run asynchronously on a separate (co)processor. | 16 | - Channel programs run asynchronously on a separate (co)processor. |
15 | - The channel subsystem will access any memory designated by the caller | 17 | - The channel subsystem will access any memory designated by the caller |
16 | in the channel program directly, i.e. there is no iommu involved. | 18 | in the channel program directly, i.e. there is no iommu involved. |
19 | |||
17 | Thus when we introduce vfio support for these devices, we realize it | 20 | Thus when we introduce vfio support for these devices, we realize it |
18 | with a mediated device (mdev) implementation. The vfio mdev will be | 21 | with a mediated device (mdev) implementation. The vfio mdev will be |
19 | added to an iommu group, so as to make itself able to be managed by the | 22 | added to an iommu group, so as to make itself able to be managed by the |
@@ -24,6 +27,7 @@ to perform I/O instructions. | |||
24 | 27 | ||
25 | This document does not intend to explain the s390 I/O architecture in | 28 | This document does not intend to explain the s390 I/O architecture in |
26 | every detail. More information/reference could be found here: | 29 | every detail. More information/reference could be found here: |
30 | |||
27 | - A good start to know Channel I/O in general: | 31 | - A good start to know Channel I/O in general: |
28 | https://en.wikipedia.org/wiki/Channel_I/O | 32 | https://en.wikipedia.org/wiki/Channel_I/O |
29 | - s390 architecture: | 33 | - s390 architecture: |
@@ -80,6 +84,7 @@ until interrupted. The I/O completion result is received by the | |||
80 | interrupt handler in the form of interrupt response block (IRB). | 84 | interrupt handler in the form of interrupt response block (IRB). |
81 | 85 | ||
82 | Back to vfio-ccw, in short: | 86 | Back to vfio-ccw, in short: |
87 | |||
83 | - ORBs and channel programs are built in guest kernel (with guest | 88 | - ORBs and channel programs are built in guest kernel (with guest |
84 | physical addresses). | 89 | physical addresses). |
85 | - ORBs and channel programs are passed to the host kernel. | 90 | - ORBs and channel programs are passed to the host kernel. |
@@ -106,6 +111,7 @@ it gets sent to hardware. | |||
106 | 111 | ||
107 | Within this implementation, we have two drivers for two types of | 112 | Within this implementation, we have two drivers for two types of |
108 | devices: | 113 | devices: |
114 | |||
109 | - The vfio_ccw driver for the physical subchannel device. | 115 | - The vfio_ccw driver for the physical subchannel device. |
110 | This is an I/O subchannel driver for the real subchannel device. It | 116 | This is an I/O subchannel driver for the real subchannel device. It |
111 | realizes a group of callbacks and registers to the mdev framework as a | 117 | realizes a group of callbacks and registers to the mdev framework as a |
@@ -137,7 +143,7 @@ devices: | |||
137 | vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu | 143 | vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu |
138 | backend for the physical devices to pin and unpin pages by demand. | 144 | backend for the physical devices to pin and unpin pages by demand. |
139 | 145 | ||
140 | Below is a high Level block diagram. | 146 | Below is a high Level block diagram:: |
141 | 147 | ||
142 | +-------------+ | 148 | +-------------+ |
143 | | | | 149 | | | |
@@ -158,6 +164,7 @@ Below is a high Level block diagram. | |||
158 | +-------------+ | 164 | +-------------+ |
159 | 165 | ||
160 | The process of how these work together. | 166 | The process of how these work together. |
167 | |||
161 | 1. vfio_ccw.ko drives the physical I/O subchannel, and registers the | 168 | 1. vfio_ccw.ko drives the physical I/O subchannel, and registers the |
162 | physical device (with callbacks) to mdev framework. | 169 | physical device (with callbacks) to mdev framework. |
163 | When vfio_ccw probing the subchannel device, it registers device | 170 | When vfio_ccw probing the subchannel device, it registers device |
@@ -178,17 +185,17 @@ vfio-ccw I/O region | |||
178 | 185 | ||
179 | An I/O region is used to accept channel program request from user | 186 | An I/O region is used to accept channel program request from user |
180 | space and store I/O interrupt result for user space to retrieve. The | 187 | space and store I/O interrupt result for user space to retrieve. The |
181 | definition of the region is: | 188 | definition of the region is:: |
182 | 189 | ||
183 | struct ccw_io_region { | 190 | struct ccw_io_region { |
184 | #define ORB_AREA_SIZE 12 | 191 | #define ORB_AREA_SIZE 12 |
185 | __u8 orb_area[ORB_AREA_SIZE]; | 192 | __u8 orb_area[ORB_AREA_SIZE]; |
186 | #define SCSW_AREA_SIZE 12 | 193 | #define SCSW_AREA_SIZE 12 |
187 | __u8 scsw_area[SCSW_AREA_SIZE]; | 194 | __u8 scsw_area[SCSW_AREA_SIZE]; |
188 | #define IRB_AREA_SIZE 96 | 195 | #define IRB_AREA_SIZE 96 |
189 | __u8 irb_area[IRB_AREA_SIZE]; | 196 | __u8 irb_area[IRB_AREA_SIZE]; |
190 | __u32 ret_code; | 197 | __u32 ret_code; |
191 | } __packed; | 198 | } __packed; |
192 | 199 | ||
193 | While starting an I/O request, orb_area should be filled with the | 200 | While starting an I/O request, orb_area should be filled with the |
194 | guest ORB, and scsw_area should be filled with the SCSW of the Virtual | 201 | guest ORB, and scsw_area should be filled with the SCSW of the Virtual |
@@ -205,7 +212,7 @@ vfio-ccw follows what vfio-pci did on the s390 platform and uses | |||
205 | vfio-iommu-type1 as the vfio iommu backend. | 212 | vfio-iommu-type1 as the vfio iommu backend. |
206 | 213 | ||
207 | * CCW translation APIs | 214 | * CCW translation APIs |
208 | A group of APIs (start with 'cp_') to do CCW translation. The CCWs | 215 | A group of APIs (start with `cp_`) to do CCW translation. The CCWs |
209 | passed in by a user space program are organized with their guest | 216 | passed in by a user space program are organized with their guest |
210 | physical memory addresses. These APIs will copy the CCWs into kernel | 217 | physical memory addresses. These APIs will copy the CCWs into kernel |
211 | space, and assemble a runnable kernel channel program by updating the | 218 | space, and assemble a runnable kernel channel program by updating the |
@@ -217,12 +224,14 @@ vfio-iommu-type1 as the vfio iommu backend. | |||
217 | This driver utilizes the CCW translation APIs and introduces | 224 | This driver utilizes the CCW translation APIs and introduces |
218 | vfio_ccw, which is the driver for the I/O subchannel devices you want | 225 | vfio_ccw, which is the driver for the I/O subchannel devices you want |
219 | to pass through. | 226 | to pass through. |
220 | vfio_ccw implements the following vfio ioctls: | 227 | vfio_ccw implements the following vfio ioctls:: |
228 | |||
221 | VFIO_DEVICE_GET_INFO | 229 | VFIO_DEVICE_GET_INFO |
222 | VFIO_DEVICE_GET_IRQ_INFO | 230 | VFIO_DEVICE_GET_IRQ_INFO |
223 | VFIO_DEVICE_GET_REGION_INFO | 231 | VFIO_DEVICE_GET_REGION_INFO |
224 | VFIO_DEVICE_RESET | 232 | VFIO_DEVICE_RESET |
225 | VFIO_DEVICE_SET_IRQS | 233 | VFIO_DEVICE_SET_IRQS |
234 | |||
226 | This provides an I/O region, so that the user space program can pass a | 235 | This provides an I/O region, so that the user space program can pass a |
227 | channel program to the kernel, to do further CCW translation before | 236 | channel program to the kernel, to do further CCW translation before |
228 | issuing them to a real device. | 237 | issuing them to a real device. |
@@ -236,32 +245,49 @@ bit more detail how an I/O request triggered by the QEMU guest will be | |||
236 | handled (without error handling). | 245 | handled (without error handling). |
237 | 246 | ||
238 | Explanation: | 247 | Explanation: |
239 | Q1-Q7: QEMU side process. | ||
240 | K1-K5: Kernel side process. | ||
241 | 248 | ||
242 | Q1. Get I/O region info during initialization. | 249 | - Q1-Q7: QEMU side process. |
243 | Q2. Setup event notifier and handler to handle I/O completion. | 250 | - K1-K5: Kernel side process. |
251 | |||
252 | Q1. | ||
253 | Get I/O region info during initialization. | ||
254 | |||
255 | Q2. | ||
256 | Setup event notifier and handler to handle I/O completion. | ||
244 | 257 | ||
245 | ... ... | 258 | ... ... |
246 | 259 | ||
247 | Q3. Intercept a ssch instruction. | 260 | Q3. |
248 | Q4. Write the guest channel program and ORB to the I/O region. | 261 | Intercept a ssch instruction. |
249 | K1. Copy from guest to kernel. | 262 | Q4. |
250 | K2. Translate the guest channel program to a host kernel space | 263 | Write the guest channel program and ORB to the I/O region. |
251 | channel program, which becomes runnable for a real device. | 264 | |
252 | K3. With the necessary information contained in the orb passed in | 265 | K1. |
253 | by QEMU, issue the ccwchain to the device. | 266 | Copy from guest to kernel. |
254 | K4. Return the ssch CC code. | 267 | K2. |
255 | Q5. Return the CC code to the guest. | 268 | Translate the guest channel program to a host kernel space |
269 | channel program, which becomes runnable for a real device. | ||
270 | K3. | ||
271 | With the necessary information contained in the orb passed in | ||
272 | by QEMU, issue the ccwchain to the device. | ||
273 | K4. | ||
274 | Return the ssch CC code. | ||
275 | Q5. | ||
276 | Return the CC code to the guest. | ||
256 | 277 | ||
257 | ... ... | 278 | ... ... |
258 | 279 | ||
259 | K5. Interrupt handler gets the I/O result and write the result to | 280 | K5. |
260 | the I/O region. | 281 | Interrupt handler gets the I/O result and write the result to |
261 | K6. Signal QEMU to retrieve the result. | 282 | the I/O region. |
262 | Q6. Get the signal and event handler reads out the result from the I/O | 283 | K6. |
284 | Signal QEMU to retrieve the result. | ||
285 | |||
286 | Q6. | ||
287 | Get the signal and event handler reads out the result from the I/O | ||
263 | region. | 288 | region. |
264 | Q7. Update the irb for the guest. | 289 | Q7. |
290 | Update the irb for the guest. | ||
265 | 291 | ||
266 | Limitations | 292 | Limitations |
267 | ----------- | 293 | ----------- |
@@ -295,6 +321,6 @@ Reference | |||
295 | 1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832) | 321 | 1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832) |
296 | 2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204) | 322 | 2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204) |
297 | 3. https://en.wikipedia.org/wiki/Channel_I/O | 323 | 3. https://en.wikipedia.org/wiki/Channel_I/O |
298 | 4. Documentation/s390/cds.txt | 324 | 4. Documentation/s390/cds.rst |
299 | 5. Documentation/vfio.txt | 325 | 5. Documentation/vfio.txt |
300 | 6. Documentation/vfio-mediated-device.txt | 326 | 6. Documentation/vfio-mediated-device.txt |
diff --git a/Documentation/s390/zfcpdump.txt b/Documentation/s390/zfcpdump.rst index b064aa59714d..54e8e7caf7e7 100644 --- a/Documentation/s390/zfcpdump.txt +++ b/Documentation/s390/zfcpdump.rst | |||
@@ -1,4 +1,6 @@ | |||
1 | ================================== | ||
1 | The s390 SCSI dump tool (zfcpdump) | 2 | The s390 SCSI dump tool (zfcpdump) |
3 | ================================== | ||
2 | 4 | ||
3 | System z machines (z900 or higher) provide hardware support for creating system | 5 | System z machines (z900 or higher) provide hardware support for creating system |
4 | dumps on SCSI disks. The dump process is initiated by booting a dump tool, which | 6 | dumps on SCSI disks. The dump process is initiated by booting a dump tool, which |
diff --git a/MAINTAINERS b/MAINTAINERS index a6954776a37e..0e904873fb0a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -13703,7 +13703,7 @@ L: linux-s390@vger.kernel.org | |||
13703 | L: kvm@vger.kernel.org | 13703 | L: kvm@vger.kernel.org |
13704 | S: Supported | 13704 | S: Supported |
13705 | F: drivers/s390/cio/vfio_ccw* | 13705 | F: drivers/s390/cio/vfio_ccw* |
13706 | F: Documentation/s390/vfio-ccw.txt | 13706 | F: Documentation/s390/vfio-ccw.rst |
13707 | F: include/uapi/linux/vfio_ccw.h | 13707 | F: include/uapi/linux/vfio_ccw.h |
13708 | 13708 | ||
13709 | S390 ZCRYPT DRIVER | 13709 | S390 ZCRYPT DRIVER |
@@ -13723,7 +13723,7 @@ S: Supported | |||
13723 | F: drivers/s390/crypto/vfio_ap_drv.c | 13723 | F: drivers/s390/crypto/vfio_ap_drv.c |
13724 | F: drivers/s390/crypto/vfio_ap_private.h | 13724 | F: drivers/s390/crypto/vfio_ap_private.h |
13725 | F: drivers/s390/crypto/vfio_ap_ops.c | 13725 | F: drivers/s390/crypto/vfio_ap_ops.c |
13726 | F: Documentation/s390/vfio-ap.txt | 13726 | F: Documentation/s390/vfio-ap.rst |
13727 | 13727 | ||
13728 | S390 ZFCP DRIVER | 13728 | S390 ZFCP DRIVER |
13729 | M: Steffen Maier <maier@linux.ibm.com> | 13729 | M: Steffen Maier <maier@linux.ibm.com> |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 66be2d813951..65522d6956ca 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -810,9 +810,9 @@ config CRASH_DUMP | |||
810 | Crash dump kernels are loaded in the main kernel with kexec-tools | 810 | Crash dump kernels are loaded in the main kernel with kexec-tools |
811 | into a specially reserved region and then later executed after | 811 | into a specially reserved region and then later executed after |
812 | a crash by kdump/kexec. | 812 | a crash by kdump/kexec. |
813 | Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. | 813 | Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this. |
814 | This option also enables s390 zfcpdump. | 814 | This option also enables s390 zfcpdump. |
815 | See also <file:Documentation/s390/zfcpdump.txt> | 815 | See also <file:Documentation/s390/zfcpdump.rst> |
816 | 816 | ||
817 | endmenu | 817 | endmenu |
818 | 818 | ||
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index c305d39f5016..b94783f71322 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h | |||
@@ -152,7 +152,7 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, | |||
152 | 152 | ||
153 | /* | 153 | /* |
154 | * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are | 154 | * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are |
155 | * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! | 155 | * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! |
156 | */ | 156 | */ |
157 | extern debug_entry_t * | 157 | extern debug_entry_t * |
158 | __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) | 158 | __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) |
@@ -210,7 +210,7 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, | |||
210 | 210 | ||
211 | /* | 211 | /* |
212 | * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are | 212 | * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are |
213 | * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! | 213 | * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! |
214 | */ | 214 | */ |
215 | extern debug_entry_t * | 215 | extern debug_entry_t * |
216 | __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) | 216 | __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) |
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 405a60538630..08f812475f5e 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same | 4 | * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same |
5 | * dump format as s390 standalone dumps. | 5 | * dump format as s390 standalone dumps. |
6 | * | 6 | * |
7 | * For more information please refer to Documentation/s390/zfcpdump.txt | 7 | * For more information please refer to Documentation/s390/zfcpdump.rst |
8 | * | 8 | * |
9 | * Copyright IBM Corp. 2003, 2008 | 9 | * Copyright IBM Corp. 2003, 2008 |
10 | * Author(s): Michael Holzheu | 10 | * Author(s): Michael Holzheu |