diff options
57 files changed, 1855 insertions, 973 deletions
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index 159e2a0c3e80..76b44290c154 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt | |||
| @@ -217,6 +217,12 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs) | |||
| 217 | to represent the cpuset hierarchy provides for a familiar permission | 217 | to represent the cpuset hierarchy provides for a familiar permission |
| 218 | and name space for cpusets, with a minimum of additional kernel code. | 218 | and name space for cpusets, with a minimum of additional kernel code. |
| 219 | 219 | ||
| 220 | The cpus file in the root (top_cpuset) cpuset is read-only. | ||
| 221 | It automatically tracks the value of cpu_online_map, using a CPU | ||
| 222 | hotplug notifier. If and when memory nodes can be hotplugged, | ||
| 223 | we expect to make the mems file in the root cpuset read-only | ||
| 224 | as well, and have it track the value of node_online_map. | ||
| 225 | |||
| 220 | 226 | ||
| 221 | 1.4 What are exclusive cpusets ? | 227 | 1.4 What are exclusive cpusets ? |
| 222 | -------------------------------- | 228 | -------------------------------- |
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 66fdc0744fe0..16dec61d7671 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
| @@ -62,8 +62,8 @@ ramfs-rootfs-initramfs.txt | |||
| 62 | - info on the 'in memory' filesystems ramfs, rootfs and initramfs. | 62 | - info on the 'in memory' filesystems ramfs, rootfs and initramfs. |
| 63 | reiser4.txt | 63 | reiser4.txt |
| 64 | - info on the Reiser4 filesystem based on dancing tree algorithms. | 64 | - info on the Reiser4 filesystem based on dancing tree algorithms. |
| 65 | relayfs.txt | 65 | relay.txt |
| 66 | - info on relayfs, for efficient streaming from kernel to user space. | 66 | - info on relay, for efficient streaming from kernel to user space. |
| 67 | romfs.txt | 67 | romfs.txt |
| 68 | - description of the ROMFS filesystem. | 68 | - description of the ROMFS filesystem. |
| 69 | smbfs.txt | 69 | smbfs.txt |
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt new file mode 100644 index 000000000000..d6788dae0349 --- /dev/null +++ b/Documentation/filesystems/relay.txt | |||
| @@ -0,0 +1,479 @@ | |||
| 1 | relay interface (formerly relayfs) | ||
| 2 | ================================== | ||
| 3 | |||
| 4 | The relay interface provides a means for kernel applications to | ||
| 5 | efficiently log and transfer large quantities of data from the kernel | ||
| 6 | to userspace via user-defined 'relay channels'. | ||
| 7 | |||
| 8 | A 'relay channel' is a kernel->user data relay mechanism implemented | ||
| 9 | as a set of per-cpu kernel buffers ('channel buffers'), each | ||
| 10 | represented as a regular file ('relay file') in user space. Kernel | ||
| 11 | clients write into the channel buffers using efficient write | ||
| 12 | functions; these automatically log into the current cpu's channel | ||
| 13 | buffer. User space applications mmap() or read() from the relay files | ||
| 14 | and retrieve the data as it becomes available. The relay files | ||
| 15 | themselves are files created in a host filesystem, e.g. debugfs, and | ||
| 16 | are associated with the channel buffers using the API described below. | ||
| 17 | |||
| 18 | The format of the data logged into the channel buffers is completely | ||
| 19 | up to the kernel client; the relay interface does however provide | ||
| 20 | hooks which allow kernel clients to impose some structure on the | ||
| 21 | buffer data. The relay interface doesn't implement any form of data | ||
| 22 | filtering - this also is left to the kernel client. The purpose is to | ||
| 23 | keep things as simple as possible. | ||
| 24 | |||
| 25 | This document provides an overview of the relay interface API. The | ||
| 26 | details of the function parameters are documented along with the | ||
| 27 | functions in the relay interface code - please see that for details. | ||
| 28 | |||
| 29 | Semantics | ||
| 30 | ========= | ||
| 31 | |||
| 32 | Each relay channel has one buffer per CPU, each buffer has one or more | ||
| 33 | sub-buffers. Messages are written to the first sub-buffer until it is | ||
| 34 | too full to contain a new message, in which case it it is written to | ||
| 35 | the next (if available). Messages are never split across sub-buffers. | ||
| 36 | At this point, userspace can be notified so it empties the first | ||
| 37 | sub-buffer, while the kernel continues writing to the next. | ||
| 38 | |||
| 39 | When notified that a sub-buffer is full, the kernel knows how many | ||
| 40 | bytes of it are padding i.e. unused space occurring because a complete | ||
| 41 | message couldn't fit into a sub-buffer. Userspace can use this | ||
| 42 | knowledge to copy only valid data. | ||
| 43 | |||
| 44 | After copying it, userspace can notify the kernel that a sub-buffer | ||
| 45 | has been consumed. | ||
| 46 | |||
| 47 | A relay channel can operate in a mode where it will overwrite data not | ||
| 48 | yet collected by userspace, and not wait for it to be consumed. | ||
| 49 | |||
| 50 | The relay channel itself does not provide for communication of such | ||
| 51 | data between userspace and kernel, allowing the kernel side to remain | ||
| 52 | simple and not impose a single interface on userspace. It does | ||
| 53 | provide a set of examples and a separate helper though, described | ||
| 54 | below. | ||
| 55 | |||
| 56 | The read() interface both removes padding and internally consumes the | ||
| 57 | read sub-buffers; thus in cases where read(2) is being used to drain | ||
| 58 | the channel buffers, special-purpose communication between kernel and | ||
| 59 | user isn't necessary for basic operation. | ||
| 60 | |||
| 61 | One of the major goals of the relay interface is to provide a low | ||
| 62 | overhead mechanism for conveying kernel data to userspace. While the | ||
| 63 | read() interface is easy to use, it's not as efficient as the mmap() | ||
| 64 | approach; the example code attempts to make the tradeoff between the | ||
| 65 | two approaches as small as possible. | ||
| 66 | |||
| 67 | klog and relay-apps example code | ||
| 68 | ================================ | ||
| 69 | |||
| 70 | The relay interface itself is ready to use, but to make things easier, | ||
| 71 | a couple simple utility functions and a set of examples are provided. | ||
| 72 | |||
| 73 | The relay-apps example tarball, available on the relay sourceforge | ||
| 74 | site, contains a set of self-contained examples, each consisting of a | ||
| 75 | pair of .c files containing boilerplate code for each of the user and | ||
| 76 | kernel sides of a relay application. When combined these two sets of | ||
| 77 | boilerplate code provide glue to easily stream data to disk, without | ||
| 78 | having to bother with mundane housekeeping chores. | ||
| 79 | |||
| 80 | The 'klog debugging functions' patch (klog.patch in the relay-apps | ||
| 81 | tarball) provides a couple of high-level logging functions to the | ||
| 82 | kernel which allow writing formatted text or raw data to a channel, | ||
| 83 | regardless of whether a channel to write into exists or not, or even | ||
| 84 | whether the relay interface is compiled into the kernel or not. These | ||
| 85 | functions allow you to put unconditional 'trace' statements anywhere | ||
| 86 | in the kernel or kernel modules; only when there is a 'klog handler' | ||
| 87 | registered will data actually be logged (see the klog and kleak | ||
| 88 | examples for details). | ||
| 89 | |||
| 90 | It is of course possible to use the relay interface from scratch, | ||
| 91 | i.e. without using any of the relay-apps example code or klog, but | ||
| 92 | you'll have to implement communication between userspace and kernel, | ||
| 93 | allowing both to convey the state of buffers (full, empty, amount of | ||
| 94 | padding). The read() interface both removes padding and internally | ||
| 95 | consumes the read sub-buffers; thus in cases where read(2) is being | ||
| 96 | used to drain the channel buffers, special-purpose communication | ||
| 97 | between kernel and user isn't necessary for basic operation. Things | ||
| 98 | such as buffer-full conditions would still need to be communicated via | ||
| 99 | some channel though. | ||
| 100 | |||
| 101 | klog and the relay-apps examples can be found in the relay-apps | ||
| 102 | tarball on http://relayfs.sourceforge.net | ||
| 103 | |||
| 104 | The relay interface user space API | ||
| 105 | ================================== | ||
| 106 | |||
| 107 | The relay interface implements basic file operations for user space | ||
| 108 | access to relay channel buffer data. Here are the file operations | ||
| 109 | that are available and some comments regarding their behavior: | ||
| 110 | |||
| 111 | open() enables user to open an _existing_ channel buffer. | ||
| 112 | |||
| 113 | mmap() results in channel buffer being mapped into the caller's | ||
| 114 | memory space. Note that you can't do a partial mmap - you | ||
| 115 | must map the entire file, which is NRBUF * SUBBUFSIZE. | ||
| 116 | |||
| 117 | read() read the contents of a channel buffer. The bytes read are | ||
| 118 | 'consumed' by the reader, i.e. they won't be available | ||
| 119 | again to subsequent reads. If the channel is being used | ||
| 120 | in no-overwrite mode (the default), it can be read at any | ||
| 121 | time even if there's an active kernel writer. If the | ||
| 122 | channel is being used in overwrite mode and there are | ||
| 123 | active channel writers, results may be unpredictable - | ||
| 124 | users should make sure that all logging to the channel has | ||
| 125 | ended before using read() with overwrite mode. Sub-buffer | ||
| 126 | padding is automatically removed and will not be seen by | ||
| 127 | the reader. | ||
| 128 | |||
| 129 | sendfile() transfer data from a channel buffer to an output file | ||
| 130 | descriptor. Sub-buffer padding is automatically removed | ||
| 131 | and will not be seen by the reader. | ||
| 132 | |||
| 133 | poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are | ||
| 134 | notified when sub-buffer boundaries are crossed. | ||
| 135 | |||
| 136 | close() decrements the channel buffer's refcount. When the refcount | ||
| 137 | reaches 0, i.e. when no process or kernel client has the | ||
| 138 | buffer open, the channel buffer is freed. | ||
| 139 | |||
| 140 | In order for a user application to make use of relay files, the | ||
| 141 | host filesystem must be mounted. For example, | ||
| 142 | |||
| 143 | mount -t debugfs debugfs /debug | ||
| 144 | |||
| 145 | NOTE: the host filesystem doesn't need to be mounted for kernel | ||
| 146 | clients to create or use channels - it only needs to be | ||
| 147 | mounted when user space applications need access to the buffer | ||
| 148 | data. | ||
| 149 | |||
| 150 | |||
| 151 | The relay interface kernel API | ||
| 152 | ============================== | ||
| 153 | |||
| 154 | Here's a summary of the API the relay interface provides to in-kernel clients: | ||
| 155 | |||
| 156 | TBD(curr. line MT:/API/) | ||
| 157 | channel management functions: | ||
| 158 | |||
| 159 | relay_open(base_filename, parent, subbuf_size, n_subbufs, | ||
| 160 | callbacks) | ||
| 161 | relay_close(chan) | ||
| 162 | relay_flush(chan) | ||
| 163 | relay_reset(chan) | ||
| 164 | |||
| 165 | channel management typically called on instigation of userspace: | ||
| 166 | |||
| 167 | relay_subbufs_consumed(chan, cpu, subbufs_consumed) | ||
| 168 | |||
| 169 | write functions: | ||
| 170 | |||
| 171 | relay_write(chan, data, length) | ||
| 172 | __relay_write(chan, data, length) | ||
| 173 | relay_reserve(chan, length) | ||
| 174 | |||
| 175 | callbacks: | ||
| 176 | |||
| 177 | subbuf_start(buf, subbuf, prev_subbuf, prev_padding) | ||
| 178 | buf_mapped(buf, filp) | ||
| 179 | buf_unmapped(buf, filp) | ||
| 180 | create_buf_file(filename, parent, mode, buf, is_global) | ||
| 181 | remove_buf_file(dentry) | ||
| 182 | |||
| 183 | helper functions: | ||
| 184 | |||
| 185 | relay_buf_full(buf) | ||
| 186 | subbuf_start_reserve(buf, length) | ||
| 187 | |||
| 188 | |||
| 189 | Creating a channel | ||
| 190 | ------------------ | ||
| 191 | |||
| 192 | relay_open() is used to create a channel, along with its per-cpu | ||
| 193 | channel buffers. Each channel buffer will have an associated file | ||
| 194 | created for it in the host filesystem, which can be and mmapped or | ||
| 195 | read from in user space. The files are named basename0...basenameN-1 | ||
| 196 | where N is the number of online cpus, and by default will be created | ||
| 197 | in the root of the filesystem (if the parent param is NULL). If you | ||
| 198 | want a directory structure to contain your relay files, you should | ||
| 199 | create it using the host filesystem's directory creation function, | ||
| 200 | e.g. debugfs_create_dir(), and pass the parent directory to | ||
| 201 | relay_open(). Users are responsible for cleaning up any directory | ||
| 202 | structure they create, when the channel is closed - again the host | ||
| 203 | filesystem's directory removal functions should be used for that, | ||
| 204 | e.g. debugfs_remove(). | ||
| 205 | |||
| 206 | In order for a channel to be created and the host filesystem's files | ||
| 207 | associated with its channel buffers, the user must provide definitions | ||
| 208 | for two callback functions, create_buf_file() and remove_buf_file(). | ||
| 209 | create_buf_file() is called once for each per-cpu buffer from | ||
| 210 | relay_open() and allows the user to create the file which will be used | ||
| 211 | to represent the corresponding channel buffer. The callback should | ||
| 212 | return the dentry of the file created to represent the channel buffer. | ||
| 213 | remove_buf_file() must also be defined; it's responsible for deleting | ||
| 214 | the file(s) created in create_buf_file() and is called during | ||
| 215 | relay_close(). | ||
| 216 | |||
| 217 | Here are some typical definitions for these callbacks, in this case | ||
| 218 | using debugfs: | ||
| 219 | |||
| 220 | /* | ||
| 221 | * create_buf_file() callback. Creates relay file in debugfs. | ||
| 222 | */ | ||
| 223 | static struct dentry *create_buf_file_handler(const char *filename, | ||
| 224 | struct dentry *parent, | ||
| 225 | int mode, | ||
| 226 | struct rchan_buf *buf, | ||
| 227 | int *is_global) | ||
| 228 | { | ||
| 229 | return debugfs_create_file(filename, mode, parent, buf, | ||
| 230 | &relay_file_operations); | ||
| 231 | } | ||
| 232 | |||
| 233 | /* | ||
| 234 | * remove_buf_file() callback. Removes relay file from debugfs. | ||
| 235 | */ | ||
| 236 | static int remove_buf_file_handler(struct dentry *dentry) | ||
| 237 | { | ||
| 238 | debugfs_remove(dentry); | ||
| 239 | |||
| 240 | return 0; | ||
| 241 | } | ||
| 242 | |||
| 243 | /* | ||
| 244 | * relay interface callbacks | ||
| 245 | */ | ||
| 246 | static struct rchan_callbacks relay_callbacks = | ||
| 247 | { | ||
| 248 | .create_buf_file = create_buf_file_handler, | ||
| 249 | .remove_buf_file = remove_buf_file_handler, | ||
| 250 | }; | ||
| 251 | |||
| 252 | And an example relay_open() invocation using them: | ||
| 253 | |||
| 254 | chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks); | ||
| 255 | |||
| 256 | If the create_buf_file() callback fails, or isn't defined, channel | ||
| 257 | creation and thus relay_open() will fail. | ||
| 258 | |||
| 259 | The total size of each per-cpu buffer is calculated by multiplying the | ||
| 260 | number of sub-buffers by the sub-buffer size passed into relay_open(). | ||
| 261 | The idea behind sub-buffers is that they're basically an extension of | ||
| 262 | double-buffering to N buffers, and they also allow applications to | ||
| 263 | easily implement random-access-on-buffer-boundary schemes, which can | ||
| 264 | be important for some high-volume applications. The number and size | ||
| 265 | of sub-buffers is completely dependent on the application and even for | ||
| 266 | the same application, different conditions will warrant different | ||
| 267 | values for these parameters at different times. Typically, the right | ||
| 268 | values to use are best decided after some experimentation; in general, | ||
| 269 | though, it's safe to assume that having only 1 sub-buffer is a bad | ||
| 270 | idea - you're guaranteed to either overwrite data or lose events | ||
| 271 | depending on the channel mode being used. | ||
| 272 | |||
| 273 | The create_buf_file() implementation can also be defined in such a way | ||
| 274 | as to allow the creation of a single 'global' buffer instead of the | ||
| 275 | default per-cpu set. This can be useful for applications interested | ||
| 276 | mainly in seeing the relative ordering of system-wide events without | ||
| 277 | the need to bother with saving explicit timestamps for the purpose of | ||
| 278 | merging/sorting per-cpu files in a postprocessing step. | ||
| 279 | |||
| 280 | To have relay_open() create a global buffer, the create_buf_file() | ||
| 281 | implementation should set the value of the is_global outparam to a | ||
| 282 | non-zero value in addition to creating the file that will be used to | ||
| 283 | represent the single buffer. In the case of a global buffer, | ||
| 284 | create_buf_file() and remove_buf_file() will be called only once. The | ||
| 285 | normal channel-writing functions, e.g. relay_write(), can still be | ||
| 286 | used - writes from any cpu will transparently end up in the global | ||
| 287 | buffer - but since it is a global buffer, callers should make sure | ||
| 288 | they use the proper locking for such a buffer, either by wrapping | ||
| 289 | writes in a spinlock, or by copying a write function from relay.h and | ||
| 290 | creating a local version that internally does the proper locking. | ||
| 291 | |||
| 292 | Channel 'modes' | ||
| 293 | --------------- | ||
| 294 | |||
| 295 | relay channels can be used in either of two modes - 'overwrite' or | ||
| 296 | 'no-overwrite'. The mode is entirely determined by the implementation | ||
| 297 | of the subbuf_start() callback, as described below. The default if no | ||
| 298 | subbuf_start() callback is defined is 'no-overwrite' mode. If the | ||
| 299 | default mode suits your needs, and you plan to use the read() | ||
| 300 | interface to retrieve channel data, you can ignore the details of this | ||
| 301 | section, as it pertains mainly to mmap() implementations. | ||
| 302 | |||
| 303 | In 'overwrite' mode, also known as 'flight recorder' mode, writes | ||
| 304 | continuously cycle around the buffer and will never fail, but will | ||
| 305 | unconditionally overwrite old data regardless of whether it's actually | ||
| 306 | been consumed. In no-overwrite mode, writes will fail, i.e. data will | ||
| 307 | be lost, if the number of unconsumed sub-buffers equals the total | ||
| 308 | number of sub-buffers in the channel. It should be clear that if | ||
| 309 | there is no consumer or if the consumer can't consume sub-buffers fast | ||
| 310 | enough, data will be lost in either case; the only difference is | ||
| 311 | whether data is lost from the beginning or the end of a buffer. | ||
| 312 | |||
| 313 | As explained above, a relay channel is made of up one or more | ||
| 314 | per-cpu channel buffers, each implemented as a circular buffer | ||
| 315 | subdivided into one or more sub-buffers. Messages are written into | ||
| 316 | the current sub-buffer of the channel's current per-cpu buffer via the | ||
| 317 | write functions described below. Whenever a message can't fit into | ||
| 318 | the current sub-buffer, because there's no room left for it, the | ||
| 319 | client is notified via the subbuf_start() callback that a switch to a | ||
| 320 | new sub-buffer is about to occur. The client uses this callback to 1) | ||
| 321 | initialize the next sub-buffer if appropriate 2) finalize the previous | ||
| 322 | sub-buffer if appropriate and 3) return a boolean value indicating | ||
| 323 | whether or not to actually move on to the next sub-buffer. | ||
| 324 | |||
| 325 | To implement 'no-overwrite' mode, the userspace client would provide | ||
| 326 | an implementation of the subbuf_start() callback something like the | ||
| 327 | following: | ||
| 328 | |||
| 329 | static int subbuf_start(struct rchan_buf *buf, | ||
| 330 | void *subbuf, | ||
| 331 | void *prev_subbuf, | ||
| 332 | unsigned int prev_padding) | ||
| 333 | { | ||
| 334 | if (prev_subbuf) | ||
| 335 | *((unsigned *)prev_subbuf) = prev_padding; | ||
| 336 | |||
| 337 | if (relay_buf_full(buf)) | ||
| 338 | return 0; | ||
| 339 | |||
| 340 | subbuf_start_reserve(buf, sizeof(unsigned int)); | ||
| 341 | |||
| 342 | return 1; | ||
| 343 | } | ||
| 344 | |||
| 345 | If the current buffer is full, i.e. all sub-buffers remain unconsumed, | ||
| 346 | the callback returns 0 to indicate that the buffer switch should not | ||
| 347 | occur yet, i.e. until the consumer has had a chance to read the | ||
| 348 | current set of ready sub-buffers. For the relay_buf_full() function | ||
| 349 | to make sense, the consumer is reponsible for notifying the relay | ||
| 350 | interface when sub-buffers have been consumed via | ||
| 351 | relay_subbufs_consumed(). Any subsequent attempts to write into the | ||
| 352 | buffer will again invoke the subbuf_start() callback with the same | ||
| 353 | parameters; only when the consumer has consumed one or more of the | ||
| 354 | ready sub-buffers will relay_buf_full() return 0, in which case the | ||
| 355 | buffer switch can continue. | ||
| 356 | |||
| 357 | The implementation of the subbuf_start() callback for 'overwrite' mode | ||
| 358 | would be very similar: | ||
| 359 | |||
| 360 | static int subbuf_start(struct rchan_buf *buf, | ||
| 361 | void *subbuf, | ||
| 362 | void *prev_subbuf, | ||
| 363 | unsigned int prev_padding) | ||
| 364 | { | ||
| 365 | if (prev_subbuf) | ||
| 366 | *((unsigned *)prev_subbuf) = prev_padding; | ||
| 367 | |||
| 368 | subbuf_start_reserve(buf, sizeof(unsigned int)); | ||
| 369 | |||
| 370 | return 1; | ||
| 371 | } | ||
| 372 | |||
| 373 | In this case, the relay_buf_full() check is meaningless and the | ||
| 374 | callback always returns 1, causing the buffer switch to occur | ||
| 375 | unconditionally. It's also meaningless for the client to use the | ||
| 376 | relay_subbufs_consumed() function in this mode, as it's never | ||
| 377 | consulted. | ||
| 378 | |||
| 379 | The default subbuf_start() implementation, used if the client doesn't | ||
| 380 | define any callbacks, or doesn't define the subbuf_start() callback, | ||
| 381 | implements the simplest possible 'no-overwrite' mode, i.e. it does | ||
| 382 | nothing but return 0. | ||
| 383 | |||
| 384 | Header information can be reserved at the beginning of each sub-buffer | ||
| 385 | by calling the subbuf_start_reserve() helper function from within the | ||
| 386 | subbuf_start() callback. This reserved area can be used to store | ||
| 387 | whatever information the client wants. In the example above, room is | ||
| 388 | reserved in each sub-buffer to store the padding count for that | ||
| 389 | sub-buffer. This is filled in for the previous sub-buffer in the | ||
| 390 | subbuf_start() implementation; the padding value for the previous | ||
| 391 | sub-buffer is passed into the subbuf_start() callback along with a | ||
| 392 | pointer to the previous sub-buffer, since the padding value isn't | ||
| 393 | known until a sub-buffer is filled. The subbuf_start() callback is | ||
| 394 | also called for the first sub-buffer when the channel is opened, to | ||
| 395 | give the client a chance to reserve space in it. In this case the | ||
| 396 | previous sub-buffer pointer passed into the callback will be NULL, so | ||
| 397 | the client should check the value of the prev_subbuf pointer before | ||
| 398 | writing into the previous sub-buffer. | ||
| 399 | |||
| 400 | Writing to a channel | ||
| 401 | -------------------- | ||
| 402 | |||
| 403 | Kernel clients write data into the current cpu's channel buffer using | ||
| 404 | relay_write() or __relay_write(). relay_write() is the main logging | ||
| 405 | function - it uses local_irqsave() to protect the buffer and should be | ||
| 406 | used if you might be logging from interrupt context. If you know | ||
| 407 | you'll never be logging from interrupt context, you can use | ||
| 408 | __relay_write(), which only disables preemption. These functions | ||
| 409 | don't return a value, so you can't determine whether or not they | ||
| 410 | failed - the assumption is that you wouldn't want to check a return | ||
| 411 | value in the fast logging path anyway, and that they'll always succeed | ||
| 412 | unless the buffer is full and no-overwrite mode is being used, in | ||
| 413 | which case you can detect a failed write in the subbuf_start() | ||
| 414 | callback by calling the relay_buf_full() helper function. | ||
| 415 | |||
| 416 | relay_reserve() is used to reserve a slot in a channel buffer which | ||
| 417 | can be written to later. This would typically be used in applications | ||
| 418 | that need to write directly into a channel buffer without having to | ||
| 419 | stage data in a temporary buffer beforehand. Because the actual write | ||
| 420 | may not happen immediately after the slot is reserved, applications | ||
| 421 | using relay_reserve() can keep a count of the number of bytes actually | ||
| 422 | written, either in space reserved in the sub-buffers themselves or as | ||
| 423 | a separate array. See the 'reserve' example in the relay-apps tarball | ||
| 424 | at http://relayfs.sourceforge.net for an example of how this can be | ||
| 425 | done. Because the write is under control of the client and is | ||
| 426 | separated from the reserve, relay_reserve() doesn't protect the buffer | ||
| 427 | at all - it's up to the client to provide the appropriate | ||
| 428 | synchronization when using relay_reserve(). | ||
| 429 | |||
| 430 | Closing a channel | ||
| 431 | ----------------- | ||
| 432 | |||
| 433 | The client calls relay_close() when it's finished using the channel. | ||
| 434 | The channel and its associated buffers are destroyed when there are no | ||
| 435 | longer any references to any of the channel buffers. relay_flush() | ||
| 436 | forces a sub-buffer switch on all the channel buffers, and can be used | ||
| 437 | to finalize and process the last sub-buffers before the channel is | ||
| 438 | closed. | ||
| 439 | |||
| 440 | Misc | ||
| 441 | ---- | ||
| 442 | |||
| 443 | Some applications may want to keep a channel around and re-use it | ||
| 444 | rather than open and close a new channel for each use. relay_reset() | ||
| 445 | can be used for this purpose - it resets a channel to its initial | ||
| 446 | state without reallocating channel buffer memory or destroying | ||
| 447 | existing mappings. It should however only be called when it's safe to | ||
| 448 | do so, i.e. when the channel isn't currently being written to. | ||
| 449 | |||
| 450 | Finally, there are a couple of utility callbacks that can be used for | ||
| 451 | different purposes. buf_mapped() is called whenever a channel buffer | ||
| 452 | is mmapped from user space and buf_unmapped() is called when it's | ||
| 453 | unmapped. The client can use this notification to trigger actions | ||
| 454 | within the kernel application, such as enabling/disabling logging to | ||
| 455 | the channel. | ||
| 456 | |||
| 457 | |||
| 458 | Resources | ||
| 459 | ========= | ||
| 460 | |||
| 461 | For news, example code, mailing list, etc. see the relay interface homepage: | ||
| 462 | |||
| 463 | http://relayfs.sourceforge.net | ||
| 464 | |||
| 465 | |||
| 466 | Credits | ||
| 467 | ======= | ||
| 468 | |||
| 469 | The ideas and specs for the relay interface came about as a result of | ||
| 470 | discussions on tracing involving the following: | ||
| 471 | |||
| 472 | Michel Dagenais <michel.dagenais@polymtl.ca> | ||
| 473 | Richard Moore <richardj_moore@uk.ibm.com> | ||
| 474 | Bob Wisniewski <bob@watson.ibm.com> | ||
| 475 | Karim Yaghmour <karim@opersys.com> | ||
| 476 | Tom Zanussi <zanussi@us.ibm.com> | ||
| 477 | |||
| 478 | Also thanks to Hubertus Franke for a lot of useful suggestions and bug | ||
| 479 | reports. | ||
diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt deleted file mode 100644 index 5832377b7340..000000000000 --- a/Documentation/filesystems/relayfs.txt +++ /dev/null | |||
| @@ -1,442 +0,0 @@ | |||
| 1 | |||
| 2 | relayfs - a high-speed data relay filesystem | ||
| 3 | ============================================ | ||
| 4 | |||
| 5 | relayfs is a filesystem designed to provide an efficient mechanism for | ||
| 6 | tools and facilities to relay large and potentially sustained streams | ||
| 7 | of data from kernel space to user space. | ||
| 8 | |||
| 9 | The main abstraction of relayfs is the 'channel'. A channel consists | ||
| 10 | of a set of per-cpu kernel buffers each represented by a file in the | ||
| 11 | relayfs filesystem. Kernel clients write into a channel using | ||
| 12 | efficient write functions which automatically log to the current cpu's | ||
| 13 | channel buffer. User space applications mmap() the per-cpu files and | ||
| 14 | retrieve the data as it becomes available. | ||
| 15 | |||
| 16 | The format of the data logged into the channel buffers is completely | ||
| 17 | up to the relayfs client; relayfs does however provide hooks which | ||
| 18 | allow clients to impose some structure on the buffer data. Nor does | ||
| 19 | relayfs implement any form of data filtering - this also is left to | ||
| 20 | the client. The purpose is to keep relayfs as simple as possible. | ||
| 21 | |||
| 22 | This document provides an overview of the relayfs API. The details of | ||
| 23 | the function parameters are documented along with the functions in the | ||
| 24 | filesystem code - please see that for details. | ||
| 25 | |||
| 26 | Semantics | ||
| 27 | ========= | ||
| 28 | |||
| 29 | Each relayfs channel has one buffer per CPU, each buffer has one or | ||
| 30 | more sub-buffers. Messages are written to the first sub-buffer until | ||
| 31 | it is too full to contain a new message, in which case it it is | ||
| 32 | written to the next (if available). Messages are never split across | ||
| 33 | sub-buffers. At this point, userspace can be notified so it empties | ||
| 34 | the first sub-buffer, while the kernel continues writing to the next. | ||
| 35 | |||
| 36 | When notified that a sub-buffer is full, the kernel knows how many | ||
| 37 | bytes of it are padding i.e. unused. Userspace can use this knowledge | ||
| 38 | to copy only valid data. | ||
| 39 | |||
| 40 | After copying it, userspace can notify the kernel that a sub-buffer | ||
| 41 | has been consumed. | ||
| 42 | |||
| 43 | relayfs can operate in a mode where it will overwrite data not yet | ||
| 44 | collected by userspace, and not wait for it to consume it. | ||
| 45 | |||
| 46 | relayfs itself does not provide for communication of such data between | ||
| 47 | userspace and kernel, allowing the kernel side to remain simple and | ||
| 48 | not impose a single interface on userspace. It does provide a set of | ||
| 49 | examples and a separate helper though, described below. | ||
| 50 | |||
| 51 | klog and relay-apps example code | ||
| 52 | ================================ | ||
| 53 | |||
| 54 | relayfs itself is ready to use, but to make things easier, a couple | ||
| 55 | simple utility functions and a set of examples are provided. | ||
| 56 | |||
| 57 | The relay-apps example tarball, available on the relayfs sourceforge | ||
| 58 | site, contains a set of self-contained examples, each consisting of a | ||
| 59 | pair of .c files containing boilerplate code for each of the user and | ||
| 60 | kernel sides of a relayfs application; combined these two sets of | ||
| 61 | boilerplate code provide glue to easily stream data to disk, without | ||
| 62 | having to bother with mundane housekeeping chores. | ||
| 63 | |||
| 64 | The 'klog debugging functions' patch (klog.patch in the relay-apps | ||
| 65 | tarball) provides a couple of high-level logging functions to the | ||
| 66 | kernel which allow writing formatted text or raw data to a channel, | ||
| 67 | regardless of whether a channel to write into exists or not, or | ||
| 68 | whether relayfs is compiled into the kernel or is configured as a | ||
| 69 | module. These functions allow you to put unconditional 'trace' | ||
| 70 | statements anywhere in the kernel or kernel modules; only when there | ||
| 71 | is a 'klog handler' registered will data actually be logged (see the | ||
| 72 | klog and kleak examples for details). | ||
| 73 | |||
| 74 | It is of course possible to use relayfs from scratch i.e. without | ||
| 75 | using any of the relay-apps example code or klog, but you'll have to | ||
| 76 | implement communication between userspace and kernel, allowing both to | ||
| 77 | convey the state of buffers (full, empty, amount of padding). | ||
| 78 | |||
| 79 | klog and the relay-apps examples can be found in the relay-apps | ||
| 80 | tarball on http://relayfs.sourceforge.net | ||
| 81 | |||
| 82 | |||
| 83 | The relayfs user space API | ||
| 84 | ========================== | ||
| 85 | |||
| 86 | relayfs implements basic file operations for user space access to | ||
| 87 | relayfs channel buffer data. Here are the file operations that are | ||
| 88 | available and some comments regarding their behavior: | ||
| 89 | |||
| 90 | open() enables user to open an _existing_ buffer. | ||
| 91 | |||
| 92 | mmap() results in channel buffer being mapped into the caller's | ||
| 93 | memory space. Note that you can't do a partial mmap - you must | ||
| 94 | map the entire file, which is NRBUF * SUBBUFSIZE. | ||
| 95 | |||
| 96 | read() read the contents of a channel buffer. The bytes read are | ||
| 97 | 'consumed' by the reader i.e. they won't be available again | ||
| 98 | to subsequent reads. If the channel is being used in | ||
| 99 | no-overwrite mode (the default), it can be read at any time | ||
| 100 | even if there's an active kernel writer. If the channel is | ||
| 101 | being used in overwrite mode and there are active channel | ||
| 102 | writers, results may be unpredictable - users should make | ||
| 103 | sure that all logging to the channel has ended before using | ||
| 104 | read() with overwrite mode. | ||
| 105 | |||
| 106 | poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are | ||
| 107 | notified when sub-buffer boundaries are crossed. | ||
| 108 | |||
| 109 | close() decrements the channel buffer's refcount. When the refcount | ||
| 110 | reaches 0 i.e. when no process or kernel client has the buffer | ||
| 111 | open, the channel buffer is freed. | ||
| 112 | |||
| 113 | |||
| 114 | In order for a user application to make use of relayfs files, the | ||
| 115 | relayfs filesystem must be mounted. For example, | ||
| 116 | |||
| 117 | mount -t relayfs relayfs /mnt/relay | ||
| 118 | |||
| 119 | NOTE: relayfs doesn't need to be mounted for kernel clients to create | ||
| 120 | or use channels - it only needs to be mounted when user space | ||
| 121 | applications need access to the buffer data. | ||
| 122 | |||
| 123 | |||
| 124 | The relayfs kernel API | ||
| 125 | ====================== | ||
| 126 | |||
| 127 | Here's a summary of the API relayfs provides to in-kernel clients: | ||
| 128 | |||
| 129 | |||
| 130 | channel management functions: | ||
| 131 | |||
| 132 | relay_open(base_filename, parent, subbuf_size, n_subbufs, | ||
| 133 | callbacks) | ||
| 134 | relay_close(chan) | ||
| 135 | relay_flush(chan) | ||
| 136 | relay_reset(chan) | ||
| 137 | relayfs_create_dir(name, parent) | ||
| 138 | relayfs_remove_dir(dentry) | ||
| 139 | relayfs_create_file(name, parent, mode, fops, data) | ||
| 140 | relayfs_remove_file(dentry) | ||
| 141 | |||
| 142 | channel management typically called on instigation of userspace: | ||
| 143 | |||
| 144 | relay_subbufs_consumed(chan, cpu, subbufs_consumed) | ||
| 145 | |||
| 146 | write functions: | ||
| 147 | |||
| 148 | relay_write(chan, data, length) | ||
| 149 | __relay_write(chan, data, length) | ||
| 150 | relay_reserve(chan, length) | ||
| 151 | |||
| 152 | callbacks: | ||
| 153 | |||
| 154 | subbuf_start(buf, subbuf, prev_subbuf, prev_padding) | ||
| 155 | buf_mapped(buf, filp) | ||
| 156 | buf_unmapped(buf, filp) | ||
| 157 | create_buf_file(filename, parent, mode, buf, is_global) | ||
| 158 | remove_buf_file(dentry) | ||
| 159 | |||
| 160 | helper functions: | ||
| 161 | |||
| 162 | relay_buf_full(buf) | ||
| 163 | subbuf_start_reserve(buf, length) | ||
| 164 | |||
| 165 | |||
| 166 | Creating a channel | ||
| 167 | ------------------ | ||
| 168 | |||
| 169 | relay_open() is used to create a channel, along with its per-cpu | ||
| 170 | channel buffers. Each channel buffer will have an associated file | ||
| 171 | created for it in the relayfs filesystem, which can be opened and | ||
| 172 | mmapped from user space if desired. The files are named | ||
| 173 | basename0...basenameN-1 where N is the number of online cpus, and by | ||
| 174 | default will be created in the root of the filesystem. If you want a | ||
| 175 | directory structure to contain your relayfs files, you can create it | ||
| 176 | with relayfs_create_dir() and pass the parent directory to | ||
| 177 | relay_open(). Clients are responsible for cleaning up any directory | ||
| 178 | structure they create when the channel is closed - use | ||
| 179 | relayfs_remove_dir() for that. | ||
| 180 | |||
| 181 | The total size of each per-cpu buffer is calculated by multiplying the | ||
| 182 | number of sub-buffers by the sub-buffer size passed into relay_open(). | ||
| 183 | The idea behind sub-buffers is that they're basically an extension of | ||
| 184 | double-buffering to N buffers, and they also allow applications to | ||
| 185 | easily implement random-access-on-buffer-boundary schemes, which can | ||
| 186 | be important for some high-volume applications. The number and size | ||
| 187 | of sub-buffers is completely dependent on the application and even for | ||
| 188 | the same application, different conditions will warrant different | ||
| 189 | values for these parameters at different times. Typically, the right | ||
| 190 | values to use are best decided after some experimentation; in general, | ||
| 191 | though, it's safe to assume that having only 1 sub-buffer is a bad | ||
| 192 | idea - you're guaranteed to either overwrite data or lose events | ||
| 193 | depending on the channel mode being used. | ||
| 194 | |||
| 195 | Channel 'modes' | ||
| 196 | --------------- | ||
| 197 | |||
| 198 | relayfs channels can be used in either of two modes - 'overwrite' or | ||
| 199 | 'no-overwrite'. The mode is entirely determined by the implementation | ||
| 200 | of the subbuf_start() callback, as described below. In 'overwrite' | ||
| 201 | mode, also known as 'flight recorder' mode, writes continuously cycle | ||
| 202 | around the buffer and will never fail, but will unconditionally | ||
| 203 | overwrite old data regardless of whether it's actually been consumed. | ||
| 204 | In no-overwrite mode, writes will fail i.e. data will be lost, if the | ||
| 205 | number of unconsumed sub-buffers equals the total number of | ||
| 206 | sub-buffers in the channel. It should be clear that if there is no | ||
| 207 | consumer or if the consumer can't consume sub-buffers fast enought, | ||
| 208 | data will be lost in either case; the only difference is whether data | ||
| 209 | is lost from the beginning or the end of a buffer. | ||
| 210 | |||
| 211 | As explained above, a relayfs channel is made of up one or more | ||
| 212 | per-cpu channel buffers, each implemented as a circular buffer | ||
| 213 | subdivided into one or more sub-buffers. Messages are written into | ||
| 214 | the current sub-buffer of the channel's current per-cpu buffer via the | ||
| 215 | write functions described below. Whenever a message can't fit into | ||
| 216 | the current sub-buffer, because there's no room left for it, the | ||
| 217 | client is notified via the subbuf_start() callback that a switch to a | ||
| 218 | new sub-buffer is about to occur. The client uses this callback to 1) | ||
| 219 | initialize the next sub-buffer if appropriate 2) finalize the previous | ||
| 220 | sub-buffer if appropriate and 3) return a boolean value indicating | ||
| 221 | whether or not to actually go ahead with the sub-buffer switch. | ||
| 222 | |||
| 223 | To implement 'no-overwrite' mode, the userspace client would provide | ||
| 224 | an implementation of the subbuf_start() callback something like the | ||
| 225 | following: | ||
| 226 | |||
| 227 | static int subbuf_start(struct rchan_buf *buf, | ||
| 228 | void *subbuf, | ||
| 229 | void *prev_subbuf, | ||
| 230 | unsigned int prev_padding) | ||
| 231 | { | ||
| 232 | if (prev_subbuf) | ||
| 233 | *((unsigned *)prev_subbuf) = prev_padding; | ||
| 234 | |||
| 235 | if (relay_buf_full(buf)) | ||
| 236 | return 0; | ||
| 237 | |||
| 238 | subbuf_start_reserve(buf, sizeof(unsigned int)); | ||
| 239 | |||
| 240 | return 1; | ||
| 241 | } | ||
| 242 | |||
| 243 | If the current buffer is full i.e. all sub-buffers remain unconsumed, | ||
| 244 | the callback returns 0 to indicate that the buffer switch should not | ||
| 245 | occur yet i.e. until the consumer has had a chance to read the current | ||
| 246 | set of ready sub-buffers. For the relay_buf_full() function to make | ||
| 247 | sense, the consumer is reponsible for notifying relayfs when | ||
| 248 | sub-buffers have been consumed via relay_subbufs_consumed(). Any | ||
| 249 | subsequent attempts to write into the buffer will again invoke the | ||
| 250 | subbuf_start() callback with the same parameters; only when the | ||
| 251 | consumer has consumed one or more of the ready sub-buffers will | ||
| 252 | relay_buf_full() return 0, in which case the buffer switch can | ||
| 253 | continue. | ||
| 254 | |||
| 255 | The implementation of the subbuf_start() callback for 'overwrite' mode | ||
| 256 | would be very similar: | ||
| 257 | |||
| 258 | static int subbuf_start(struct rchan_buf *buf, | ||
| 259 | void *subbuf, | ||
| 260 | void *prev_subbuf, | ||
| 261 | unsigned int prev_padding) | ||
| 262 | { | ||
| 263 | if (prev_subbuf) | ||
| 264 | *((unsigned *)prev_subbuf) = prev_padding; | ||
| 265 | |||
| 266 | subbuf_start_reserve(buf, sizeof(unsigned int)); | ||
| 267 | |||
| 268 | return 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | In this case, the relay_buf_full() check is meaningless and the | ||
| 272 | callback always returns 1, causing the buffer switch to occur | ||
| 273 | unconditionally. It's also meaningless for the client to use the | ||
| 274 | relay_subbufs_consumed() function in this mode, as it's never | ||
| 275 | consulted. | ||
| 276 | |||
| 277 | The default subbuf_start() implementation, used if the client doesn't | ||
| 278 | define any callbacks, or doesn't define the subbuf_start() callback, | ||
| 279 | implements the simplest possible 'no-overwrite' mode i.e. it does | ||
| 280 | nothing but return 0. | ||
| 281 | |||
| 282 | Header information can be reserved at the beginning of each sub-buffer | ||
| 283 | by calling the subbuf_start_reserve() helper function from within the | ||
| 284 | subbuf_start() callback. This reserved area can be used to store | ||
| 285 | whatever information the client wants. In the example above, room is | ||
| 286 | reserved in each sub-buffer to store the padding count for that | ||
| 287 | sub-buffer. This is filled in for the previous sub-buffer in the | ||
| 288 | subbuf_start() implementation; the padding value for the previous | ||
| 289 | sub-buffer is passed into the subbuf_start() callback along with a | ||
| 290 | pointer to the previous sub-buffer, since the padding value isn't | ||
| 291 | known until a sub-buffer is filled. The subbuf_start() callback is | ||
| 292 | also called for the first sub-buffer when the channel is opened, to | ||
| 293 | give the client a chance to reserve space in it. In this case the | ||
| 294 | previous sub-buffer pointer passed into the callback will be NULL, so | ||
| 295 | the client should check the value of the prev_subbuf pointer before | ||
| 296 | writing into the previous sub-buffer. | ||
| 297 | |||
| 298 | Writing to a channel | ||
| 299 | -------------------- | ||
| 300 | |||
| 301 | kernel clients write data into the current cpu's channel buffer using | ||
| 302 | relay_write() or __relay_write(). relay_write() is the main logging | ||
| 303 | function - it uses local_irqsave() to protect the buffer and should be | ||
| 304 | used if you might be logging from interrupt context. If you know | ||
| 305 | you'll never be logging from interrupt context, you can use | ||
| 306 | __relay_write(), which only disables preemption. These functions | ||
| 307 | don't return a value, so you can't determine whether or not they | ||
| 308 | failed - the assumption is that you wouldn't want to check a return | ||
| 309 | value in the fast logging path anyway, and that they'll always succeed | ||
| 310 | unless the buffer is full and no-overwrite mode is being used, in | ||
| 311 | which case you can detect a failed write in the subbuf_start() | ||
| 312 | callback by calling the relay_buf_full() helper function. | ||
| 313 | |||
| 314 | relay_reserve() is used to reserve a slot in a channel buffer which | ||
| 315 | can be written to later. This would typically be used in applications | ||
| 316 | that need to write directly into a channel buffer without having to | ||
| 317 | stage data in a temporary buffer beforehand. Because the actual write | ||
| 318 | may not happen immediately after the slot is reserved, applications | ||
| 319 | using relay_reserve() can keep a count of the number of bytes actually | ||
| 320 | written, either in space reserved in the sub-buffers themselves or as | ||
| 321 | a separate array. See the 'reserve' example in the relay-apps tarball | ||
| 322 | at http://relayfs.sourceforge.net for an example of how this can be | ||
| 323 | done. Because the write is under control of the client and is | ||
| 324 | separated from the reserve, relay_reserve() doesn't protect the buffer | ||
| 325 | at all - it's up to the client to provide the appropriate | ||
| 326 | synchronization when using relay_reserve(). | ||
| 327 | |||
| 328 | Closing a channel | ||
| 329 | ----------------- | ||
| 330 | |||
| 331 | The client calls relay_close() when it's finished using the channel. | ||
| 332 | The channel and its associated buffers are destroyed when there are no | ||
| 333 | longer any references to any of the channel buffers. relay_flush() | ||
| 334 | forces a sub-buffer switch on all the channel buffers, and can be used | ||
| 335 | to finalize and process the last sub-buffers before the channel is | ||
| 336 | closed. | ||
| 337 | |||
| 338 | Creating non-relay files | ||
| 339 | ------------------------ | ||
| 340 | |||
| 341 | relay_open() automatically creates files in the relayfs filesystem to | ||
| 342 | represent the per-cpu kernel buffers; it's often useful for | ||
| 343 | applications to be able to create their own files alongside the relay | ||
| 344 | files in the relayfs filesystem as well e.g. 'control' files much like | ||
| 345 | those created in /proc or debugfs for similar purposes, used to | ||
| 346 | communicate control information between the kernel and user sides of a | ||
| 347 | relayfs application. For this purpose the relayfs_create_file() and | ||
| 348 | relayfs_remove_file() API functions exist. For relayfs_create_file(), | ||
| 349 | the caller passes in a set of user-defined file operations to be used | ||
| 350 | for the file and an optional void * to a user-specified data item, | ||
| 351 | which will be accessible via inode->u.generic_ip (see the relay-apps | ||
| 352 | tarball for examples). The file_operations are a required parameter | ||
| 353 | to relayfs_create_file() and thus the semantics of these files are | ||
| 354 | completely defined by the caller. | ||
| 355 | |||
| 356 | See the relay-apps tarball at http://relayfs.sourceforge.net for | ||
| 357 | examples of how these non-relay files are meant to be used. | ||
| 358 | |||
| 359 | Creating relay files in other filesystems | ||
| 360 | ----------------------------------------- | ||
| 361 | |||
| 362 | By default of course, relay_open() creates relay files in the relayfs | ||
| 363 | filesystem. Because relay_file_operations is exported, however, it's | ||
| 364 | also possible to create and use relay files in other pseudo-filesytems | ||
| 365 | such as debugfs. | ||
| 366 | |||
| 367 | For this purpose, two callback functions are provided, | ||
| 368 | create_buf_file() and remove_buf_file(). create_buf_file() is called | ||
| 369 | once for each per-cpu buffer from relay_open() to allow the client to | ||
| 370 | create a file to be used to represent the corresponding buffer; if | ||
| 371 | this callback is not defined, the default implementation will create | ||
| 372 | and return a file in the relayfs filesystem to represent the buffer. | ||
| 373 | The callback should return the dentry of the file created to represent | ||
| 374 | the relay buffer. Note that the parent directory passed to | ||
| 375 | relay_open() (and passed along to the callback), if specified, must | ||
| 376 | exist in the same filesystem the new relay file is created in. If | ||
| 377 | create_buf_file() is defined, remove_buf_file() must also be defined; | ||
| 378 | it's responsible for deleting the file(s) created in create_buf_file() | ||
| 379 | and is called during relay_close(). | ||
| 380 | |||
| 381 | The create_buf_file() implementation can also be defined in such a way | ||
| 382 | as to allow the creation of a single 'global' buffer instead of the | ||
| 383 | default per-cpu set. This can be useful for applications interested | ||
| 384 | mainly in seeing the relative ordering of system-wide events without | ||
| 385 | the need to bother with saving explicit timestamps for the purpose of | ||
| 386 | merging/sorting per-cpu files in a postprocessing step. | ||
| 387 | |||
| 388 | To have relay_open() create a global buffer, the create_buf_file() | ||
| 389 | implementation should set the value of the is_global outparam to a | ||
| 390 | non-zero value in addition to creating the file that will be used to | ||
| 391 | represent the single buffer. In the case of a global buffer, | ||
| 392 | create_buf_file() and remove_buf_file() will be called only once. The | ||
| 393 | normal channel-writing functions e.g. relay_write() can still be used | ||
| 394 | - writes from any cpu will transparently end up in the global buffer - | ||
| 395 | but since it is a global buffer, callers should make sure they use the | ||
| 396 | proper locking for such a buffer, either by wrapping writes in a | ||
| 397 | spinlock, or by copying a write function from relayfs_fs.h and | ||
| 398 | creating a local version that internally does the proper locking. | ||
| 399 | |||
| 400 | See the 'exported-relayfile' examples in the relay-apps tarball for | ||
| 401 | examples of creating and using relay files in debugfs. | ||
| 402 | |||
| 403 | Misc | ||
| 404 | ---- | ||
| 405 | |||
| 406 | Some applications may want to keep a channel around and re-use it | ||
| 407 | rather than open and close a new channel for each use. relay_reset() | ||
| 408 | can be used for this purpose - it resets a channel to its initial | ||
| 409 | state without reallocating channel buffer memory or destroying | ||
| 410 | existing mappings. It should however only be called when it's safe to | ||
| 411 | do so i.e. when the channel isn't currently being written to. | ||
| 412 | |||
| 413 | Finally, there are a couple of utility callbacks that can be used for | ||
| 414 | different purposes. buf_mapped() is called whenever a channel buffer | ||
| 415 | is mmapped from user space and buf_unmapped() is called when it's | ||
| 416 | unmapped. The client can use this notification to trigger actions | ||
| 417 | within the kernel application, such as enabling/disabling logging to | ||
| 418 | the channel. | ||
| 419 | |||
| 420 | |||
| 421 | Resources | ||
| 422 | ========= | ||
| 423 | |||
| 424 | For news, example code, mailing list, etc. see the relayfs homepage: | ||
| 425 | |||
| 426 | http://relayfs.sourceforge.net | ||
| 427 | |||
| 428 | |||
| 429 | Credits | ||
| 430 | ======= | ||
| 431 | |||
| 432 | The ideas and specs for relayfs came about as a result of discussions | ||
| 433 | on tracing involving the following: | ||
| 434 | |||
| 435 | Michel Dagenais <michel.dagenais@polymtl.ca> | ||
| 436 | Richard Moore <richardj_moore@uk.ibm.com> | ||
| 437 | Bob Wisniewski <bob@watson.ibm.com> | ||
| 438 | Karim Yaghmour <karim@opersys.com> | ||
| 439 | Tom Zanussi <zanussi@us.ibm.com> | ||
| 440 | |||
| 441 | Also thanks to Hubertus Franke for a lot of useful suggestions and bug | ||
| 442 | reports. | ||
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 0b62c62142cf..5c3a51905969 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt | |||
| @@ -25,6 +25,7 @@ Currently, these files are in /proc/sys/fs: | |||
| 25 | - inode-state | 25 | - inode-state |
| 26 | - overflowuid | 26 | - overflowuid |
| 27 | - overflowgid | 27 | - overflowgid |
| 28 | - suid_dumpable | ||
| 28 | - super-max | 29 | - super-max |
| 29 | - super-nr | 30 | - super-nr |
| 30 | 31 | ||
| @@ -131,6 +132,25 @@ The default is 65534. | |||
| 131 | 132 | ||
| 132 | ============================================================== | 133 | ============================================================== |
| 133 | 134 | ||
| 135 | suid_dumpable: | ||
| 136 | |||
| 137 | This value can be used to query and set the core dump mode for setuid | ||
| 138 | or otherwise protected/tainted binaries. The modes are | ||
| 139 | |||
| 140 | 0 - (default) - traditional behaviour. Any process which has changed | ||
| 141 | privilege levels or is execute only will not be dumped | ||
| 142 | 1 - (debug) - all processes dump core when possible. The core dump is | ||
| 143 | owned by the current user and no security is applied. This is | ||
| 144 | intended for system debugging situations only. Ptrace is unchecked. | ||
| 145 | 2 - (suidsafe) - any binary which normally would not be dumped is dumped | ||
| 146 | readable by root only. This allows the end user to remove | ||
| 147 | such a dump but not access it directly. For security reasons | ||
| 148 | core dumps in this mode will not overwrite one another or | ||
| 149 | other files. This mode is appropriate when adminstrators are | ||
| 150 | attempting to debug problems in a normal environment. | ||
| 151 | |||
| 152 | ============================================================== | ||
| 153 | |||
| 134 | super-max & super-nr: | 154 | super-max & super-nr: |
| 135 | 155 | ||
| 136 | These numbers control the maximum number of superblocks, and | 156 | These numbers control the maximum number of superblocks, and |
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 7345c338080a..89bf8c20a586 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
| @@ -50,7 +50,6 @@ show up in /proc/sys/kernel: | |||
| 50 | - shmmax [ sysv ipc ] | 50 | - shmmax [ sysv ipc ] |
| 51 | - shmmni | 51 | - shmmni |
| 52 | - stop-a [ SPARC only ] | 52 | - stop-a [ SPARC only ] |
| 53 | - suid_dumpable | ||
| 54 | - sysrq ==> Documentation/sysrq.txt | 53 | - sysrq ==> Documentation/sysrq.txt |
| 55 | - tainted | 54 | - tainted |
| 56 | - threads-max | 55 | - threads-max |
| @@ -310,25 +309,6 @@ kernel. This value defaults to SHMMAX. | |||
| 310 | 309 | ||
| 311 | ============================================================== | 310 | ============================================================== |
| 312 | 311 | ||
| 313 | suid_dumpable: | ||
| 314 | |||
| 315 | This value can be used to query and set the core dump mode for setuid | ||
| 316 | or otherwise protected/tainted binaries. The modes are | ||
| 317 | |||
| 318 | 0 - (default) - traditional behaviour. Any process which has changed | ||
| 319 | privilege levels or is execute only will not be dumped | ||
| 320 | 1 - (debug) - all processes dump core when possible. The core dump is | ||
| 321 | owned by the current user and no security is applied. This is | ||
| 322 | intended for system debugging situations only. Ptrace is unchecked. | ||
| 323 | 2 - (suidsafe) - any binary which normally would not be dumped is dumped | ||
| 324 | readable by root only. This allows the end user to remove | ||
| 325 | such a dump but not access it directly. For security reasons | ||
| 326 | core dumps in this mode will not overwrite one another or | ||
| 327 | other files. This mode is appropriate when adminstrators are | ||
| 328 | attempting to debug problems in a normal environment. | ||
| 329 | |||
| 330 | ============================================================== | ||
| 331 | |||
| 332 | tainted: | 312 | tainted: |
| 333 | 313 | ||
| 334 | Non-zero if the kernel has been tainted. Numeric values, which | 314 | Non-zero if the kernel has been tainted. Numeric values, which |
| @@ -1,7 +1,7 @@ | |||
| 1 | VERSION = 2 | 1 | VERSION = 2 |
| 2 | PATCHLEVEL = 6 | 2 | PATCHLEVEL = 6 |
| 3 | SUBLEVEL = 18 | 3 | SUBLEVEL = 18 |
| 4 | EXTRAVERSION = -rc4 | 4 | EXTRAVERSION = -rc5 |
| 5 | NAME=Crazed Snow-Weasel | 5 | NAME=Crazed Snow-Weasel |
| 6 | 6 | ||
| 7 | # *DOCUMENTATION* | 7 | # *DOCUMENTATION* |
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index f71fb4a029cb..b2751eadbc56 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
| @@ -142,6 +142,7 @@ config X86_SUMMIT | |||
| 142 | In particular, it is needed for the x440. | 142 | In particular, it is needed for the x440. |
| 143 | 143 | ||
| 144 | If you don't have one of these computers, you should say N here. | 144 | If you don't have one of these computers, you should say N here. |
| 145 | If you want to build a NUMA kernel, you must select ACPI. | ||
| 145 | 146 | ||
| 146 | config X86_BIGSMP | 147 | config X86_BIGSMP |
| 147 | bool "Support for other sub-arch SMP systems with more than 8 CPUs" | 148 | bool "Support for other sub-arch SMP systems with more than 8 CPUs" |
| @@ -169,6 +170,7 @@ config X86_GENERICARCH | |||
| 169 | help | 170 | help |
| 170 | This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. | 171 | This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. |
| 171 | It is intended for a generic binary kernel. | 172 | It is intended for a generic binary kernel. |
| 173 | If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. | ||
| 172 | 174 | ||
| 173 | config X86_ES7000 | 175 | config X86_ES7000 |
| 174 | bool "Support for Unisys ES7000 IA32 series" | 176 | bool "Support for Unisys ES7000 IA32 series" |
| @@ -542,7 +544,7 @@ config X86_PAE | |||
| 542 | # Common NUMA Features | 544 | # Common NUMA Features |
| 543 | config NUMA | 545 | config NUMA |
| 544 | bool "Numa Memory Allocation and Scheduler Support" | 546 | bool "Numa Memory Allocation and Scheduler Support" |
| 545 | depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)) | 547 | depends on SMP && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) |
| 546 | default n if X86_PC | 548 | default n if X86_PC |
| 547 | default y if (X86_NUMAQ || X86_SUMMIT) | 549 | default y if (X86_NUMAQ || X86_SUMMIT) |
| 548 | 550 | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index efb41e81351c..e6ea00edcb54 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -567,16 +567,11 @@ static struct cpufreq_driver acpi_cpufreq_driver = { | |||
| 567 | static int __init | 567 | static int __init |
| 568 | acpi_cpufreq_init (void) | 568 | acpi_cpufreq_init (void) |
| 569 | { | 569 | { |
| 570 | int result = 0; | ||
| 571 | |||
| 572 | dprintk("acpi_cpufreq_init\n"); | 570 | dprintk("acpi_cpufreq_init\n"); |
| 573 | 571 | ||
| 574 | result = acpi_cpufreq_early_init_acpi(); | 572 | acpi_cpufreq_early_init_acpi(); |
| 575 | 573 | ||
| 576 | if (!result) | 574 | return cpufreq_register_driver(&acpi_cpufreq_driver); |
| 577 | result = cpufreq_register_driver(&acpi_cpufreq_driver); | ||
| 578 | |||
| 579 | return (result); | ||
| 580 | } | 575 | } |
| 581 | 576 | ||
| 582 | 577 | ||
diff --git a/drivers/base/node.c b/drivers/base/node.c index d7de1753e094..e9b0957f15d1 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c | |||
| @@ -64,7 +64,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) | |||
| 64 | "Node %d Mapped: %8lu kB\n" | 64 | "Node %d Mapped: %8lu kB\n" |
| 65 | "Node %d AnonPages: %8lu kB\n" | 65 | "Node %d AnonPages: %8lu kB\n" |
| 66 | "Node %d PageTables: %8lu kB\n" | 66 | "Node %d PageTables: %8lu kB\n" |
| 67 | "Node %d NFS Unstable: %8lu kB\n" | 67 | "Node %d NFS_Unstable: %8lu kB\n" |
| 68 | "Node %d Bounce: %8lu kB\n" | 68 | "Node %d Bounce: %8lu kB\n" |
| 69 | "Node %d Slab: %8lu kB\n", | 69 | "Node %d Slab: %8lu kB\n", |
| 70 | nid, K(i.totalram), | 70 | nid, K(i.totalram), |
diff --git a/drivers/cdrom/gscd.c b/drivers/cdrom/gscd.c index b6ee50a2916d..fa7082489765 100644 --- a/drivers/cdrom/gscd.c +++ b/drivers/cdrom/gscd.c | |||
| @@ -266,7 +266,7 @@ repeat: | |||
| 266 | goto out; | 266 | goto out; |
| 267 | 267 | ||
| 268 | if (req->cmd != READ) { | 268 | if (req->cmd != READ) { |
| 269 | printk("GSCD: bad cmd %lu\n", rq_data_dir(req)); | 269 | printk("GSCD: bad cmd %u\n", rq_data_dir(req)); |
| 270 | end_request(req, 0); | 270 | end_request(req, 0); |
| 271 | goto repeat; | 271 | goto repeat; |
| 272 | } | 272 | } |
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index 4ea7bd5f4f56..a369dd6877d8 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c | |||
| @@ -142,6 +142,7 @@ typedef struct _moxa_board_conf { | |||
| 142 | 142 | ||
| 143 | static moxa_board_conf moxa_boards[MAX_BOARDS]; | 143 | static moxa_board_conf moxa_boards[MAX_BOARDS]; |
| 144 | static void __iomem *moxaBaseAddr[MAX_BOARDS]; | 144 | static void __iomem *moxaBaseAddr[MAX_BOARDS]; |
| 145 | static int loadstat[MAX_BOARDS]; | ||
| 145 | 146 | ||
| 146 | struct moxa_str { | 147 | struct moxa_str { |
| 147 | int type; | 148 | int type; |
| @@ -1688,6 +1689,8 @@ int MoxaDriverPoll(void) | |||
| 1688 | if (moxaCard == 0) | 1689 | if (moxaCard == 0) |
| 1689 | return (-1); | 1690 | return (-1); |
| 1690 | for (card = 0; card < MAX_BOARDS; card++) { | 1691 | for (card = 0; card < MAX_BOARDS; card++) { |
| 1692 | if (loadstat[card] == 0) | ||
| 1693 | continue; | ||
| 1691 | if ((ports = moxa_boards[card].numPorts) == 0) | 1694 | if ((ports = moxa_boards[card].numPorts) == 0) |
| 1692 | continue; | 1695 | continue; |
| 1693 | if (readb(moxaIntPend[card]) == 0xff) { | 1696 | if (readb(moxaIntPend[card]) == 0xff) { |
| @@ -2903,6 +2906,7 @@ static int moxaloadcode(int cardno, unsigned char __user *tmp, int len) | |||
| 2903 | } | 2906 | } |
| 2904 | break; | 2907 | break; |
| 2905 | } | 2908 | } |
| 2909 | loadstat[cardno] = 1; | ||
| 2906 | return (0); | 2910 | return (0); |
| 2907 | } | 2911 | } |
| 2908 | 2912 | ||
| @@ -2920,7 +2924,7 @@ static int moxaloadc218(int cardno, void __iomem *baseAddr, int len) | |||
| 2920 | len1 = len >> 1; | 2924 | len1 = len >> 1; |
| 2921 | ptr = (ushort *) moxaBuff; | 2925 | ptr = (ushort *) moxaBuff; |
| 2922 | for (i = 0; i < len1; i++) | 2926 | for (i = 0; i < len1; i++) |
| 2923 | usum += *(ptr + i); | 2927 | usum += le16_to_cpu(*(ptr + i)); |
| 2924 | retry = 0; | 2928 | retry = 0; |
| 2925 | do { | 2929 | do { |
| 2926 | len1 = len >> 1; | 2930 | len1 = len >> 1; |
| @@ -2992,7 +2996,7 @@ static int moxaloadc320(int cardno, void __iomem *baseAddr, int len, int *numPor | |||
| 2992 | wlen = len >> 1; | 2996 | wlen = len >> 1; |
| 2993 | uptr = (ushort *) moxaBuff; | 2997 | uptr = (ushort *) moxaBuff; |
| 2994 | for (i = 0; i < wlen; i++) | 2998 | for (i = 0; i < wlen; i++) |
| 2995 | usum += uptr[i]; | 2999 | usum += le16_to_cpu(uptr[i]); |
| 2996 | retry = 0; | 3000 | retry = 0; |
| 2997 | j = 0; | 3001 | j = 0; |
| 2998 | do { | 3002 | do { |
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index bfdb90242a90..bb0d9199e994 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c | |||
| @@ -153,6 +153,15 @@ int tty_ioctl(struct inode * inode, struct file * file, | |||
| 153 | static int tty_fasync(int fd, struct file * filp, int on); | 153 | static int tty_fasync(int fd, struct file * filp, int on); |
| 154 | static void release_mem(struct tty_struct *tty, int idx); | 154 | static void release_mem(struct tty_struct *tty, int idx); |
| 155 | 155 | ||
| 156 | /** | ||
| 157 | * alloc_tty_struct - allocate a tty object | ||
| 158 | * | ||
| 159 | * Return a new empty tty structure. The data fields have not | ||
| 160 | * been initialized in any way but has been zeroed | ||
| 161 | * | ||
| 162 | * Locking: none | ||
| 163 | * FIXME: use kzalloc | ||
| 164 | */ | ||
| 156 | 165 | ||
| 157 | static struct tty_struct *alloc_tty_struct(void) | 166 | static struct tty_struct *alloc_tty_struct(void) |
| 158 | { | 167 | { |
| @@ -166,6 +175,15 @@ static struct tty_struct *alloc_tty_struct(void) | |||
| 166 | 175 | ||
| 167 | static void tty_buffer_free_all(struct tty_struct *); | 176 | static void tty_buffer_free_all(struct tty_struct *); |
| 168 | 177 | ||
| 178 | /** | ||
| 179 | * free_tty_struct - free a disused tty | ||
| 180 | * @tty: tty struct to free | ||
| 181 | * | ||
| 182 | * Free the write buffers, tty queue and tty memory itself. | ||
| 183 | * | ||
| 184 | * Locking: none. Must be called after tty is definitely unused | ||
| 185 | */ | ||
| 186 | |||
| 169 | static inline void free_tty_struct(struct tty_struct *tty) | 187 | static inline void free_tty_struct(struct tty_struct *tty) |
| 170 | { | 188 | { |
| 171 | kfree(tty->write_buf); | 189 | kfree(tty->write_buf); |
| @@ -175,6 +193,17 @@ static inline void free_tty_struct(struct tty_struct *tty) | |||
| 175 | 193 | ||
| 176 | #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) | 194 | #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) |
| 177 | 195 | ||
| 196 | /** | ||
| 197 | * tty_name - return tty naming | ||
| 198 | * @tty: tty structure | ||
| 199 | * @buf: buffer for output | ||
| 200 | * | ||
| 201 | * Convert a tty structure into a name. The name reflects the kernel | ||
| 202 | * naming policy and if udev is in use may not reflect user space | ||
| 203 | * | ||
| 204 | * Locking: none | ||
| 205 | */ | ||
| 206 | |||
| 178 | char *tty_name(struct tty_struct *tty, char *buf) | 207 | char *tty_name(struct tty_struct *tty, char *buf) |
| 179 | { | 208 | { |
| 180 | if (!tty) /* Hmm. NULL pointer. That's fun. */ | 209 | if (!tty) /* Hmm. NULL pointer. That's fun. */ |
| @@ -235,6 +264,28 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) | |||
| 235 | * Tty buffer allocation management | 264 | * Tty buffer allocation management |
| 236 | */ | 265 | */ |
| 237 | 266 | ||
| 267 | |||
| 268 | /** | ||
| 269 | * tty_buffer_free_all - free buffers used by a tty | ||
| 270 | * @tty: tty to free from | ||
| 271 | * | ||
| 272 | * Remove all the buffers pending on a tty whether queued with data | ||
| 273 | * or in the free ring. Must be called when the tty is no longer in use | ||
| 274 | * | ||
| 275 | * Locking: none | ||
| 276 | */ | ||
| 277 | |||
| 278 | |||
| 279 | /** | ||
| 280 | * tty_buffer_free_all - free buffers used by a tty | ||
| 281 | * @tty: tty to free from | ||
| 282 | * | ||
| 283 | * Remove all the buffers pending on a tty whether queued with data | ||
| 284 | * or in the free ring. Must be called when the tty is no longer in use | ||
| 285 | * | ||
| 286 | * Locking: none | ||
| 287 | */ | ||
| 288 | |||
| 238 | static void tty_buffer_free_all(struct tty_struct *tty) | 289 | static void tty_buffer_free_all(struct tty_struct *tty) |
| 239 | { | 290 | { |
| 240 | struct tty_buffer *thead; | 291 | struct tty_buffer *thead; |
| @@ -247,19 +298,47 @@ static void tty_buffer_free_all(struct tty_struct *tty) | |||
| 247 | kfree(thead); | 298 | kfree(thead); |
| 248 | } | 299 | } |
| 249 | tty->buf.tail = NULL; | 300 | tty->buf.tail = NULL; |
| 301 | tty->buf.memory_used = 0; | ||
| 250 | } | 302 | } |
| 251 | 303 | ||
| 304 | /** | ||
| 305 | * tty_buffer_init - prepare a tty buffer structure | ||
| 306 | * @tty: tty to initialise | ||
| 307 | * | ||
| 308 | * Set up the initial state of the buffer management for a tty device. | ||
| 309 | * Must be called before the other tty buffer functions are used. | ||
| 310 | * | ||
| 311 | * Locking: none | ||
| 312 | */ | ||
| 313 | |||
| 252 | static void tty_buffer_init(struct tty_struct *tty) | 314 | static void tty_buffer_init(struct tty_struct *tty) |
| 253 | { | 315 | { |
| 254 | spin_lock_init(&tty->buf.lock); | 316 | spin_lock_init(&tty->buf.lock); |
| 255 | tty->buf.head = NULL; | 317 | tty->buf.head = NULL; |
| 256 | tty->buf.tail = NULL; | 318 | tty->buf.tail = NULL; |
| 257 | tty->buf.free = NULL; | 319 | tty->buf.free = NULL; |
| 320 | tty->buf.memory_used = 0; | ||
| 258 | } | 321 | } |
| 259 | 322 | ||
| 260 | static struct tty_buffer *tty_buffer_alloc(size_t size) | 323 | /** |
| 324 | * tty_buffer_alloc - allocate a tty buffer | ||
| 325 | * @tty: tty device | ||
| 326 | * @size: desired size (characters) | ||
| 327 | * | ||
| 328 | * Allocate a new tty buffer to hold the desired number of characters. | ||
| 329 | * Return NULL if out of memory or the allocation would exceed the | ||
| 330 | * per device queue | ||
| 331 | * | ||
| 332 | * Locking: Caller must hold tty->buf.lock | ||
| 333 | */ | ||
| 334 | |||
| 335 | static struct tty_buffer *tty_buffer_alloc(struct tty_struct *tty, size_t size) | ||
| 261 | { | 336 | { |
| 262 | struct tty_buffer *p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC); | 337 | struct tty_buffer *p; |
| 338 | |||
| 339 | if (tty->buf.memory_used + size > 65536) | ||
| 340 | return NULL; | ||
| 341 | p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC); | ||
| 263 | if(p == NULL) | 342 | if(p == NULL) |
| 264 | return NULL; | 343 | return NULL; |
| 265 | p->used = 0; | 344 | p->used = 0; |
| @@ -269,17 +348,27 @@ static struct tty_buffer *tty_buffer_alloc(size_t size) | |||
| 269 | p->read = 0; | 348 | p->read = 0; |
| 270 | p->char_buf_ptr = (char *)(p->data); | 349 | p->char_buf_ptr = (char *)(p->data); |
| 271 | p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size; | 350 | p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size; |
| 272 | /* printk("Flip create %p\n", p); */ | 351 | tty->buf.memory_used += size; |
| 273 | return p; | 352 | return p; |
| 274 | } | 353 | } |
| 275 | 354 | ||
| 276 | /* Must be called with the tty_read lock held. This needs to acquire strategy | 355 | /** |
| 277 | code to decide if we should kfree or relink a given expired buffer */ | 356 | * tty_buffer_free - free a tty buffer |
| 357 | * @tty: tty owning the buffer | ||
| 358 | * @b: the buffer to free | ||
| 359 | * | ||
| 360 | * Free a tty buffer, or add it to the free list according to our | ||
| 361 | * internal strategy | ||
| 362 | * | ||
| 363 | * Locking: Caller must hold tty->buf.lock | ||
| 364 | */ | ||
| 278 | 365 | ||
| 279 | static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b) | 366 | static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b) |
| 280 | { | 367 | { |
| 281 | /* Dumb strategy for now - should keep some stats */ | 368 | /* Dumb strategy for now - should keep some stats */ |
| 282 | /* printk("Flip dispose %p\n", b); */ | 369 | tty->buf.memory_used -= b->size; |
| 370 | WARN_ON(tty->buf.memory_used < 0); | ||
| 371 | |||
| 283 | if(b->size >= 512) | 372 | if(b->size >= 512) |
| 284 | kfree(b); | 373 | kfree(b); |
| 285 | else { | 374 | else { |
| @@ -288,6 +377,18 @@ static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b) | |||
| 288 | } | 377 | } |
| 289 | } | 378 | } |
| 290 | 379 | ||
| 380 | /** | ||
| 381 | * tty_buffer_find - find a free tty buffer | ||
| 382 | * @tty: tty owning the buffer | ||
| 383 | * @size: characters wanted | ||
| 384 | * | ||
| 385 | * Locate an existing suitable tty buffer or if we are lacking one then | ||
| 386 | * allocate a new one. We round our buffers off in 256 character chunks | ||
| 387 | * to get better allocation behaviour. | ||
| 388 | * | ||
| 389 | * Locking: Caller must hold tty->buf.lock | ||
| 390 | */ | ||
| 391 | |||
| 291 | static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size) | 392 | static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size) |
| 292 | { | 393 | { |
| 293 | struct tty_buffer **tbh = &tty->buf.free; | 394 | struct tty_buffer **tbh = &tty->buf.free; |
| @@ -299,20 +400,28 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size) | |||
| 299 | t->used = 0; | 400 | t->used = 0; |
| 300 | t->commit = 0; | 401 | t->commit = 0; |
| 301 | t->read = 0; | 402 | t->read = 0; |
| 302 | /* DEBUG ONLY */ | 403 | tty->buf.memory_used += t->size; |
| 303 | /* memset(t->data, '*', size); */ | ||
| 304 | /* printk("Flip recycle %p\n", t); */ | ||
| 305 | return t; | 404 | return t; |
| 306 | } | 405 | } |
| 307 | tbh = &((*tbh)->next); | 406 | tbh = &((*tbh)->next); |
| 308 | } | 407 | } |
| 309 | /* Round the buffer size out */ | 408 | /* Round the buffer size out */ |
| 310 | size = (size + 0xFF) & ~ 0xFF; | 409 | size = (size + 0xFF) & ~ 0xFF; |
| 311 | return tty_buffer_alloc(size); | 410 | return tty_buffer_alloc(tty, size); |
| 312 | /* Should possibly check if this fails for the largest buffer we | 411 | /* Should possibly check if this fails for the largest buffer we |
| 313 | have queued and recycle that ? */ | 412 | have queued and recycle that ? */ |
| 314 | } | 413 | } |
| 315 | 414 | ||
| 415 | /** | ||
| 416 | * tty_buffer_request_room - grow tty buffer if needed | ||
| 417 | * @tty: tty structure | ||
| 418 | * @size: size desired | ||
| 419 | * | ||
| 420 | * Make at least size bytes of linear space available for the tty | ||
| 421 | * buffer. If we fail return the size we managed to find. | ||
| 422 | * | ||
| 423 | * Locking: Takes tty->buf.lock | ||
| 424 | */ | ||
| 316 | int tty_buffer_request_room(struct tty_struct *tty, size_t size) | 425 | int tty_buffer_request_room(struct tty_struct *tty, size_t size) |
| 317 | { | 426 | { |
| 318 | struct tty_buffer *b, *n; | 427 | struct tty_buffer *b, *n; |
| @@ -347,6 +456,18 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size) | |||
| 347 | } | 456 | } |
| 348 | EXPORT_SYMBOL_GPL(tty_buffer_request_room); | 457 | EXPORT_SYMBOL_GPL(tty_buffer_request_room); |
| 349 | 458 | ||
| 459 | /** | ||
| 460 | * tty_insert_flip_string - Add characters to the tty buffer | ||
| 461 | * @tty: tty structure | ||
| 462 | * @chars: characters | ||
| 463 | * @size: size | ||
| 464 | * | ||
| 465 | * Queue a series of bytes to the tty buffering. All the characters | ||
| 466 | * passed are marked as without error. Returns the number added. | ||
| 467 | * | ||
| 468 | * Locking: Called functions may take tty->buf.lock | ||
| 469 | */ | ||
| 470 | |||
| 350 | int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, | 471 | int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, |
| 351 | size_t size) | 472 | size_t size) |
| 352 | { | 473 | { |
| @@ -370,6 +491,20 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, | |||
| 370 | } | 491 | } |
| 371 | EXPORT_SYMBOL(tty_insert_flip_string); | 492 | EXPORT_SYMBOL(tty_insert_flip_string); |
| 372 | 493 | ||
| 494 | /** | ||
| 495 | * tty_insert_flip_string_flags - Add characters to the tty buffer | ||
| 496 | * @tty: tty structure | ||
| 497 | * @chars: characters | ||
| 498 | * @flags: flag bytes | ||
| 499 | * @size: size | ||
| 500 | * | ||
| 501 | * Queue a series of bytes to the tty buffering. For each character | ||
| 502 | * the flags array indicates the status of the character. Returns the | ||
| 503 | * number added. | ||
| 504 | * | ||
| 505 | * Locking: Called functions may take tty->buf.lock | ||
| 506 | */ | ||
| 507 | |||
| 373 | int tty_insert_flip_string_flags(struct tty_struct *tty, | 508 | int tty_insert_flip_string_flags(struct tty_struct *tty, |
| 374 | const unsigned char *chars, const char *flags, size_t size) | 509 | const unsigned char *chars, const char *flags, size_t size) |
| 375 | { | 510 | { |
| @@ -394,6 +529,17 @@ int tty_insert_flip_string_flags(struct tty_struct *tty, | |||
| 394 | } | 529 | } |
| 395 | EXPORT_SYMBOL(tty_insert_flip_string_flags); | 530 | EXPORT_SYMBOL(tty_insert_flip_string_flags); |
| 396 | 531 | ||
| 532 | /** | ||
| 533 | * tty_schedule_flip - push characters to ldisc | ||
| 534 | * @tty: tty to push from | ||
| 535 | * | ||
| 536 | * Takes any pending buffers and transfers their ownership to the | ||
| 537 | * ldisc side of the queue. It then schedules those characters for | ||
| 538 | * processing by the line discipline. | ||
| 539 | * | ||
| 540 | * Locking: Takes tty->buf.lock | ||
| 541 | */ | ||
| 542 | |||
| 397 | void tty_schedule_flip(struct tty_struct *tty) | 543 | void tty_schedule_flip(struct tty_struct *tty) |
| 398 | { | 544 | { |
| 399 | unsigned long flags; | 545 | unsigned long flags; |
| @@ -405,12 +551,19 @@ void tty_schedule_flip(struct tty_struct *tty) | |||
| 405 | } | 551 | } |
| 406 | EXPORT_SYMBOL(tty_schedule_flip); | 552 | EXPORT_SYMBOL(tty_schedule_flip); |
| 407 | 553 | ||
| 408 | /* | 554 | /** |
| 555 | * tty_prepare_flip_string - make room for characters | ||
| 556 | * @tty: tty | ||
| 557 | * @chars: return pointer for character write area | ||
| 558 | * @size: desired size | ||
| 559 | * | ||
| 409 | * Prepare a block of space in the buffer for data. Returns the length | 560 | * Prepare a block of space in the buffer for data. Returns the length |
| 410 | * available and buffer pointer to the space which is now allocated and | 561 | * available and buffer pointer to the space which is now allocated and |
| 411 | * accounted for as ready for normal characters. This is used for drivers | 562 | * accounted for as ready for normal characters. This is used for drivers |
| 412 | * that need their own block copy routines into the buffer. There is no | 563 | * that need their own block copy routines into the buffer. There is no |
| 413 | * guarantee the buffer is a DMA target! | 564 | * guarantee the buffer is a DMA target! |
| 565 | * | ||
| 566 | * Locking: May call functions taking tty->buf.lock | ||
| 414 | */ | 567 | */ |
| 415 | 568 | ||
| 416 | int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size) | 569 | int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size) |
| @@ -427,12 +580,20 @@ int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_ | |||
| 427 | 580 | ||
| 428 | EXPORT_SYMBOL_GPL(tty_prepare_flip_string); | 581 | EXPORT_SYMBOL_GPL(tty_prepare_flip_string); |
| 429 | 582 | ||
| 430 | /* | 583 | /** |
| 584 | * tty_prepare_flip_string_flags - make room for characters | ||
| 585 | * @tty: tty | ||
| 586 | * @chars: return pointer for character write area | ||
| 587 | * @flags: return pointer for status flag write area | ||
| 588 | * @size: desired size | ||
| 589 | * | ||
| 431 | * Prepare a block of space in the buffer for data. Returns the length | 590 | * Prepare a block of space in the buffer for data. Returns the length |
| 432 | * available and buffer pointer to the space which is now allocated and | 591 | * available and buffer pointer to the space which is now allocated and |
| 433 | * accounted for as ready for characters. This is used for drivers | 592 | * accounted for as ready for characters. This is used for drivers |
| 434 | * that need their own block copy routines into the buffer. There is no | 593 | * that need their own block copy routines into the buffer. There is no |
| 435 | * guarantee the buffer is a DMA target! | 594 | * guarantee the buffer is a DMA target! |
| 595 | * | ||
| 596 | * Locking: May call functions taking tty->buf.lock | ||
| 436 | */ | 597 | */ |
| 437 | 598 | ||
| 438 | int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size) | 599 | int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size) |
| @@ -451,10 +612,16 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags); | |||
| 451 | 612 | ||
| 452 | 613 | ||
| 453 | 614 | ||
| 454 | /* | 615 | /** |
| 616 | * tty_set_termios_ldisc - set ldisc field | ||
| 617 | * @tty: tty structure | ||
| 618 | * @num: line discipline number | ||
| 619 | * | ||
| 455 | * This is probably overkill for real world processors but | 620 | * This is probably overkill for real world processors but |
| 456 | * they are not on hot paths so a little discipline won't do | 621 | * they are not on hot paths so a little discipline won't do |
| 457 | * any harm. | 622 | * any harm. |
| 623 | * | ||
| 624 | * Locking: takes termios_sem | ||
| 458 | */ | 625 | */ |
| 459 | 626 | ||
| 460 | static void tty_set_termios_ldisc(struct tty_struct *tty, int num) | 627 | static void tty_set_termios_ldisc(struct tty_struct *tty, int num) |
| @@ -474,6 +641,19 @@ static DEFINE_SPINLOCK(tty_ldisc_lock); | |||
| 474 | static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); | 641 | static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); |
| 475 | static struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table */ | 642 | static struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table */ |
| 476 | 643 | ||
| 644 | /** | ||
| 645 | * tty_register_ldisc - install a line discipline | ||
| 646 | * @disc: ldisc number | ||
| 647 | * @new_ldisc: pointer to the ldisc object | ||
| 648 | * | ||
| 649 | * Installs a new line discipline into the kernel. The discipline | ||
| 650 | * is set up as unreferenced and then made available to the kernel | ||
| 651 | * from this point onwards. | ||
| 652 | * | ||
| 653 | * Locking: | ||
| 654 | * takes tty_ldisc_lock to guard against ldisc races | ||
| 655 | */ | ||
| 656 | |||
| 477 | int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) | 657 | int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) |
| 478 | { | 658 | { |
| 479 | unsigned long flags; | 659 | unsigned long flags; |
| @@ -493,6 +673,18 @@ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) | |||
| 493 | } | 673 | } |
| 494 | EXPORT_SYMBOL(tty_register_ldisc); | 674 | EXPORT_SYMBOL(tty_register_ldisc); |
| 495 | 675 | ||
| 676 | /** | ||
| 677 | * tty_unregister_ldisc - unload a line discipline | ||
| 678 | * @disc: ldisc number | ||
| 679 | * @new_ldisc: pointer to the ldisc object | ||
| 680 | * | ||
| 681 | * Remove a line discipline from the kernel providing it is not | ||
| 682 | * currently in use. | ||
| 683 | * | ||
| 684 | * Locking: | ||
| 685 | * takes tty_ldisc_lock to guard against ldisc races | ||
| 686 | */ | ||
| 687 | |||
| 496 | int tty_unregister_ldisc(int disc) | 688 | int tty_unregister_ldisc(int disc) |
| 497 | { | 689 | { |
| 498 | unsigned long flags; | 690 | unsigned long flags; |
| @@ -512,6 +704,19 @@ int tty_unregister_ldisc(int disc) | |||
| 512 | } | 704 | } |
| 513 | EXPORT_SYMBOL(tty_unregister_ldisc); | 705 | EXPORT_SYMBOL(tty_unregister_ldisc); |
| 514 | 706 | ||
| 707 | /** | ||
| 708 | * tty_ldisc_get - take a reference to an ldisc | ||
| 709 | * @disc: ldisc number | ||
| 710 | * | ||
| 711 | * Takes a reference to a line discipline. Deals with refcounts and | ||
| 712 | * module locking counts. Returns NULL if the discipline is not available. | ||
| 713 | * Returns a pointer to the discipline and bumps the ref count if it is | ||
| 714 | * available | ||
| 715 | * | ||
| 716 | * Locking: | ||
| 717 | * takes tty_ldisc_lock to guard against ldisc races | ||
| 718 | */ | ||
| 719 | |||
| 515 | struct tty_ldisc *tty_ldisc_get(int disc) | 720 | struct tty_ldisc *tty_ldisc_get(int disc) |
| 516 | { | 721 | { |
| 517 | unsigned long flags; | 722 | unsigned long flags; |
| @@ -540,6 +745,17 @@ struct tty_ldisc *tty_ldisc_get(int disc) | |||
| 540 | 745 | ||
| 541 | EXPORT_SYMBOL_GPL(tty_ldisc_get); | 746 | EXPORT_SYMBOL_GPL(tty_ldisc_get); |
| 542 | 747 | ||
| 748 | /** | ||
| 749 | * tty_ldisc_put - drop ldisc reference | ||
| 750 | * @disc: ldisc number | ||
| 751 | * | ||
| 752 | * Drop a reference to a line discipline. Manage refcounts and | ||
| 753 | * module usage counts | ||
| 754 | * | ||
| 755 | * Locking: | ||
| 756 | * takes tty_ldisc_lock to guard against ldisc races | ||
| 757 | */ | ||
| 758 | |||
| 543 | void tty_ldisc_put(int disc) | 759 | void tty_ldisc_put(int disc) |
| 544 | { | 760 | { |
| 545 | struct tty_ldisc *ld; | 761 | struct tty_ldisc *ld; |
| @@ -557,6 +773,19 @@ void tty_ldisc_put(int disc) | |||
| 557 | 773 | ||
| 558 | EXPORT_SYMBOL_GPL(tty_ldisc_put); | 774 | EXPORT_SYMBOL_GPL(tty_ldisc_put); |
| 559 | 775 | ||
| 776 | /** | ||
| 777 | * tty_ldisc_assign - set ldisc on a tty | ||
| 778 | * @tty: tty to assign | ||
| 779 | * @ld: line discipline | ||
| 780 | * | ||
| 781 | * Install an instance of a line discipline into a tty structure. The | ||
| 782 | * ldisc must have a reference count above zero to ensure it remains/ | ||
| 783 | * The tty instance refcount starts at zero. | ||
| 784 | * | ||
| 785 | * Locking: | ||
| 786 | * Caller must hold references | ||
| 787 | */ | ||
| 788 | |||
| 560 | static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) | 789 | static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) |
| 561 | { | 790 | { |
| 562 | tty->ldisc = *ld; | 791 | tty->ldisc = *ld; |
| @@ -571,6 +800,8 @@ static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) | |||
| 571 | * the tty ldisc. Return 0 on failure or 1 on success. This is | 800 | * the tty ldisc. Return 0 on failure or 1 on success. This is |
| 572 | * used to implement both the waiting and non waiting versions | 801 | * used to implement both the waiting and non waiting versions |
| 573 | * of tty_ldisc_ref | 802 | * of tty_ldisc_ref |
| 803 | * | ||
| 804 | * Locking: takes tty_ldisc_lock | ||
| 574 | */ | 805 | */ |
| 575 | 806 | ||
| 576 | static int tty_ldisc_try(struct tty_struct *tty) | 807 | static int tty_ldisc_try(struct tty_struct *tty) |
| @@ -602,6 +833,8 @@ static int tty_ldisc_try(struct tty_struct *tty) | |||
| 602 | * must also be careful not to hold other locks that will deadlock | 833 | * must also be careful not to hold other locks that will deadlock |
| 603 | * against a discipline change, such as an existing ldisc reference | 834 | * against a discipline change, such as an existing ldisc reference |
| 604 | * (which we check for) | 835 | * (which we check for) |
| 836 | * | ||
| 837 | * Locking: call functions take tty_ldisc_lock | ||
| 605 | */ | 838 | */ |
| 606 | 839 | ||
| 607 | struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) | 840 | struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) |
| @@ -622,6 +855,8 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); | |||
| 622 | * Dereference the line discipline for the terminal and take a | 855 | * Dereference the line discipline for the terminal and take a |
| 623 | * reference to it. If the line discipline is in flux then | 856 | * reference to it. If the line discipline is in flux then |
| 624 | * return NULL. Can be called from IRQ and timer functions. | 857 | * return NULL. Can be called from IRQ and timer functions. |
| 858 | * | ||
| 859 | * Locking: called functions take tty_ldisc_lock | ||
| 625 | */ | 860 | */ |
| 626 | 861 | ||
| 627 | struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) | 862 | struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) |
| @@ -639,6 +874,8 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref); | |||
| 639 | * | 874 | * |
| 640 | * Undoes the effect of tty_ldisc_ref or tty_ldisc_ref_wait. May | 875 | * Undoes the effect of tty_ldisc_ref or tty_ldisc_ref_wait. May |
| 641 | * be called in IRQ context. | 876 | * be called in IRQ context. |
| 877 | * | ||
| 878 | * Locking: takes tty_ldisc_lock | ||
| 642 | */ | 879 | */ |
| 643 | 880 | ||
| 644 | void tty_ldisc_deref(struct tty_ldisc *ld) | 881 | void tty_ldisc_deref(struct tty_ldisc *ld) |
| @@ -683,6 +920,9 @@ static void tty_ldisc_enable(struct tty_struct *tty) | |||
| 683 | * | 920 | * |
| 684 | * Set the discipline of a tty line. Must be called from a process | 921 | * Set the discipline of a tty line. Must be called from a process |
| 685 | * context. | 922 | * context. |
| 923 | * | ||
| 924 | * Locking: takes tty_ldisc_lock. | ||
| 925 | * called functions take termios_sem | ||
| 686 | */ | 926 | */ |
| 687 | 927 | ||
| 688 | static int tty_set_ldisc(struct tty_struct *tty, int ldisc) | 928 | static int tty_set_ldisc(struct tty_struct *tty, int ldisc) |
| @@ -846,9 +1086,17 @@ restart: | |||
| 846 | return retval; | 1086 | return retval; |
| 847 | } | 1087 | } |
| 848 | 1088 | ||
| 849 | /* | 1089 | /** |
| 850 | * This routine returns a tty driver structure, given a device number | 1090 | * get_tty_driver - find device of a tty |
| 1091 | * @dev_t: device identifier | ||
| 1092 | * @index: returns the index of the tty | ||
| 1093 | * | ||
| 1094 | * This routine returns a tty driver structure, given a device number | ||
| 1095 | * and also passes back the index number. | ||
| 1096 | * | ||
| 1097 | * Locking: caller must hold tty_mutex | ||
| 851 | */ | 1098 | */ |
| 1099 | |||
| 852 | static struct tty_driver *get_tty_driver(dev_t device, int *index) | 1100 | static struct tty_driver *get_tty_driver(dev_t device, int *index) |
| 853 | { | 1101 | { |
| 854 | struct tty_driver *p; | 1102 | struct tty_driver *p; |
| @@ -863,11 +1111,17 @@ static struct tty_driver *get_tty_driver(dev_t device, int *index) | |||
| 863 | return NULL; | 1111 | return NULL; |
| 864 | } | 1112 | } |
| 865 | 1113 | ||
| 866 | /* | 1114 | /** |
| 867 | * If we try to write to, or set the state of, a terminal and we're | 1115 | * tty_check_change - check for POSIX terminal changes |
| 868 | * not in the foreground, send a SIGTTOU. If the signal is blocked or | 1116 | * @tty: tty to check |
| 869 | * ignored, go ahead and perform the operation. (POSIX 7.2) | 1117 | * |
| 1118 | * If we try to write to, or set the state of, a terminal and we're | ||
| 1119 | * not in the foreground, send a SIGTTOU. If the signal is blocked or | ||
| 1120 | * ignored, go ahead and perform the operation. (POSIX 7.2) | ||
| 1121 | * | ||
| 1122 | * Locking: none | ||
| 870 | */ | 1123 | */ |
| 1124 | |||
| 871 | int tty_check_change(struct tty_struct * tty) | 1125 | int tty_check_change(struct tty_struct * tty) |
| 872 | { | 1126 | { |
| 873 | if (current->signal->tty != tty) | 1127 | if (current->signal->tty != tty) |
| @@ -1005,10 +1259,27 @@ void tty_ldisc_flush(struct tty_struct *tty) | |||
| 1005 | 1259 | ||
| 1006 | EXPORT_SYMBOL_GPL(tty_ldisc_flush); | 1260 | EXPORT_SYMBOL_GPL(tty_ldisc_flush); |
| 1007 | 1261 | ||
| 1008 | /* | 1262 | /** |
| 1009 | * This can be called by the "eventd" kernel thread. That is process synchronous, | 1263 | * do_tty_hangup - actual handler for hangup events |
| 1010 | * but doesn't hold any locks, so we need to make sure we have the appropriate | 1264 | * @data: tty device |
| 1011 | * locks for what we're doing.. | 1265 | * |
| 1266 | * This can be called by the "eventd" kernel thread. That is process | ||
| 1267 | * synchronous but doesn't hold any locks, so we need to make sure we | ||
| 1268 | * have the appropriate locks for what we're doing. | ||
| 1269 | * | ||
| 1270 | * The hangup event clears any pending redirections onto the hung up | ||
| 1271 | * device. It ensures future writes will error and it does the needed | ||
| 1272 | * line discipline hangup and signal delivery. The tty object itself | ||
| 1273 | * remains intact. | ||
| 1274 | * | ||
| 1275 | * Locking: | ||
| 1276 | * BKL | ||
| 1277 | * redirect lock for undoing redirection | ||
| 1278 | * file list lock for manipulating list of ttys | ||
| 1279 | * tty_ldisc_lock from called functions | ||
| 1280 | * termios_sem resetting termios data | ||
| 1281 | * tasklist_lock to walk task list for hangup event | ||
| 1282 | * | ||
| 1012 | */ | 1283 | */ |
| 1013 | static void do_tty_hangup(void *data) | 1284 | static void do_tty_hangup(void *data) |
| 1014 | { | 1285 | { |
| @@ -1133,6 +1404,14 @@ static void do_tty_hangup(void *data) | |||
| 1133 | fput(f); | 1404 | fput(f); |
| 1134 | } | 1405 | } |
| 1135 | 1406 | ||
| 1407 | /** | ||
| 1408 | * tty_hangup - trigger a hangup event | ||
| 1409 | * @tty: tty to hangup | ||
| 1410 | * | ||
| 1411 | * A carrier loss (virtual or otherwise) has occurred on this like | ||
| 1412 | * schedule a hangup sequence to run after this event. | ||
| 1413 | */ | ||
| 1414 | |||
| 1136 | void tty_hangup(struct tty_struct * tty) | 1415 | void tty_hangup(struct tty_struct * tty) |
| 1137 | { | 1416 | { |
| 1138 | #ifdef TTY_DEBUG_HANGUP | 1417 | #ifdef TTY_DEBUG_HANGUP |
| @@ -1145,6 +1424,15 @@ void tty_hangup(struct tty_struct * tty) | |||
| 1145 | 1424 | ||
| 1146 | EXPORT_SYMBOL(tty_hangup); | 1425 | EXPORT_SYMBOL(tty_hangup); |
| 1147 | 1426 | ||
| 1427 | /** | ||
| 1428 | * tty_vhangup - process vhangup | ||
| 1429 | * @tty: tty to hangup | ||
| 1430 | * | ||
| 1431 | * The user has asked via system call for the terminal to be hung up. | ||
| 1432 | * We do this synchronously so that when the syscall returns the process | ||
| 1433 | * is complete. That guarantee is neccessary for security reasons. | ||
| 1434 | */ | ||
| 1435 | |||
| 1148 | void tty_vhangup(struct tty_struct * tty) | 1436 | void tty_vhangup(struct tty_struct * tty) |
| 1149 | { | 1437 | { |
| 1150 | #ifdef TTY_DEBUG_HANGUP | 1438 | #ifdef TTY_DEBUG_HANGUP |
| @@ -1156,6 +1444,14 @@ void tty_vhangup(struct tty_struct * tty) | |||
| 1156 | } | 1444 | } |
| 1157 | EXPORT_SYMBOL(tty_vhangup); | 1445 | EXPORT_SYMBOL(tty_vhangup); |
| 1158 | 1446 | ||
| 1447 | /** | ||
| 1448 | * tty_hung_up_p - was tty hung up | ||
| 1449 | * @filp: file pointer of tty | ||
| 1450 | * | ||
| 1451 | * Return true if the tty has been subject to a vhangup or a carrier | ||
| 1452 | * loss | ||
| 1453 | */ | ||
| 1454 | |||
| 1159 | int tty_hung_up_p(struct file * filp) | 1455 | int tty_hung_up_p(struct file * filp) |
| 1160 | { | 1456 | { |
| 1161 | return (filp->f_op == &hung_up_tty_fops); | 1457 | return (filp->f_op == &hung_up_tty_fops); |
| @@ -1163,19 +1459,28 @@ int tty_hung_up_p(struct file * filp) | |||
| 1163 | 1459 | ||
| 1164 | EXPORT_SYMBOL(tty_hung_up_p); | 1460 | EXPORT_SYMBOL(tty_hung_up_p); |
| 1165 | 1461 | ||
| 1166 | /* | 1462 | /** |
| 1167 | * This function is typically called only by the session leader, when | 1463 | * disassociate_ctty - disconnect controlling tty |
| 1168 | * it wants to disassociate itself from its controlling tty. | 1464 | * @on_exit: true if exiting so need to "hang up" the session |
| 1465 | * | ||
| 1466 | * This function is typically called only by the session leader, when | ||
| 1467 | * it wants to disassociate itself from its controlling tty. | ||
| 1169 | * | 1468 | * |
| 1170 | * It performs the following functions: | 1469 | * It performs the following functions: |
| 1171 | * (1) Sends a SIGHUP and SIGCONT to the foreground process group | 1470 | * (1) Sends a SIGHUP and SIGCONT to the foreground process group |
| 1172 | * (2) Clears the tty from being controlling the session | 1471 | * (2) Clears the tty from being controlling the session |
| 1173 | * (3) Clears the controlling tty for all processes in the | 1472 | * (3) Clears the controlling tty for all processes in the |
| 1174 | * session group. | 1473 | * session group. |
| 1175 | * | 1474 | * |
| 1176 | * The argument on_exit is set to 1 if called when a process is | 1475 | * The argument on_exit is set to 1 if called when a process is |
| 1177 | * exiting; it is 0 if called by the ioctl TIOCNOTTY. | 1476 | * exiting; it is 0 if called by the ioctl TIOCNOTTY. |
| 1477 | * | ||
| 1478 | * Locking: tty_mutex is taken to protect current->signal->tty | ||
| 1479 | * BKL is taken for hysterical raisins | ||
| 1480 | * Tasklist lock is taken (under tty_mutex) to walk process | ||
| 1481 | * lists for the session. | ||
| 1178 | */ | 1482 | */ |
| 1483 | |||
| 1179 | void disassociate_ctty(int on_exit) | 1484 | void disassociate_ctty(int on_exit) |
| 1180 | { | 1485 | { |
| 1181 | struct tty_struct *tty; | 1486 | struct tty_struct *tty; |
| @@ -1222,6 +1527,25 @@ void disassociate_ctty(int on_exit) | |||
| 1222 | unlock_kernel(); | 1527 | unlock_kernel(); |
| 1223 | } | 1528 | } |
| 1224 | 1529 | ||
| 1530 | |||
| 1531 | /** | ||
| 1532 | * stop_tty - propogate flow control | ||
| 1533 | * @tty: tty to stop | ||
| 1534 | * | ||
| 1535 | * Perform flow control to the driver. For PTY/TTY pairs we | ||
| 1536 | * must also propogate the TIOCKPKT status. May be called | ||
| 1537 | * on an already stopped device and will not re-call the driver | ||
| 1538 | * method. | ||
| 1539 | * | ||
| 1540 | * This functionality is used by both the line disciplines for | ||
| 1541 | * halting incoming flow and by the driver. It may therefore be | ||
| 1542 | * called from any context, may be under the tty atomic_write_lock | ||
| 1543 | * but not always. | ||
| 1544 | * | ||
| 1545 | * Locking: | ||
| 1546 | * Broken. Relies on BKL which is unsafe here. | ||
| 1547 | */ | ||
| 1548 | |||
| 1225 | void stop_tty(struct tty_struct *tty) | 1549 | void stop_tty(struct tty_struct *tty) |
| 1226 | { | 1550 | { |
| 1227 | if (tty->stopped) | 1551 | if (tty->stopped) |
| @@ -1238,6 +1562,19 @@ void stop_tty(struct tty_struct *tty) | |||
| 1238 | 1562 | ||
| 1239 | EXPORT_SYMBOL(stop_tty); | 1563 | EXPORT_SYMBOL(stop_tty); |
| 1240 | 1564 | ||
| 1565 | /** | ||
| 1566 | * start_tty - propogate flow control | ||
| 1567 | * @tty: tty to start | ||
| 1568 | * | ||
| 1569 | * Start a tty that has been stopped if at all possible. Perform | ||
| 1570 | * any neccessary wakeups and propogate the TIOCPKT status. If this | ||
| 1571 | * is the tty was previous stopped and is being started then the | ||
| 1572 | * driver start method is invoked and the line discipline woken. | ||
| 1573 | * | ||
| 1574 | * Locking: | ||
| 1575 | * Broken. Relies on BKL which is unsafe here. | ||
| 1576 | */ | ||
| 1577 | |||
| 1241 | void start_tty(struct tty_struct *tty) | 1578 | void start_tty(struct tty_struct *tty) |
| 1242 | { | 1579 | { |
| 1243 | if (!tty->stopped || tty->flow_stopped) | 1580 | if (!tty->stopped || tty->flow_stopped) |
| @@ -1258,6 +1595,23 @@ void start_tty(struct tty_struct *tty) | |||
| 1258 | 1595 | ||
| 1259 | EXPORT_SYMBOL(start_tty); | 1596 | EXPORT_SYMBOL(start_tty); |
| 1260 | 1597 | ||
| 1598 | /** | ||
| 1599 | * tty_read - read method for tty device files | ||
| 1600 | * @file: pointer to tty file | ||
| 1601 | * @buf: user buffer | ||
| 1602 | * @count: size of user buffer | ||
| 1603 | * @ppos: unused | ||
| 1604 | * | ||
| 1605 | * Perform the read system call function on this terminal device. Checks | ||
| 1606 | * for hung up devices before calling the line discipline method. | ||
| 1607 | * | ||
| 1608 | * Locking: | ||
| 1609 | * Locks the line discipline internally while needed | ||
| 1610 | * For historical reasons the line discipline read method is | ||
| 1611 | * invoked under the BKL. This will go away in time so do not rely on it | ||
| 1612 | * in new code. Multiple read calls may be outstanding in parallel. | ||
| 1613 | */ | ||
| 1614 | |||
| 1261 | static ssize_t tty_read(struct file * file, char __user * buf, size_t count, | 1615 | static ssize_t tty_read(struct file * file, char __user * buf, size_t count, |
| 1262 | loff_t *ppos) | 1616 | loff_t *ppos) |
| 1263 | { | 1617 | { |
| @@ -1302,6 +1656,7 @@ static inline ssize_t do_tty_write( | |||
| 1302 | ssize_t ret = 0, written = 0; | 1656 | ssize_t ret = 0, written = 0; |
| 1303 | unsigned int chunk; | 1657 | unsigned int chunk; |
| 1304 | 1658 | ||
| 1659 | /* FIXME: O_NDELAY ... */ | ||
| 1305 | if (mutex_lock_interruptible(&tty->atomic_write_lock)) { | 1660 | if (mutex_lock_interruptible(&tty->atomic_write_lock)) { |
| 1306 | return -ERESTARTSYS; | 1661 | return -ERESTARTSYS; |
| 1307 | } | 1662 | } |
| @@ -1318,6 +1673,9 @@ static inline ssize_t do_tty_write( | |||
| 1318 | * layer has problems with bigger chunks. It will | 1673 | * layer has problems with bigger chunks. It will |
| 1319 | * claim to be able to handle more characters than | 1674 | * claim to be able to handle more characters than |
| 1320 | * it actually does. | 1675 | * it actually does. |
| 1676 | * | ||
| 1677 | * FIXME: This can probably go away now except that 64K chunks | ||
| 1678 | * are too likely to fail unless switched to vmalloc... | ||
| 1321 | */ | 1679 | */ |
| 1322 | chunk = 2048; | 1680 | chunk = 2048; |
| 1323 | if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags)) | 1681 | if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags)) |
| @@ -1375,6 +1733,24 @@ static inline ssize_t do_tty_write( | |||
| 1375 | } | 1733 | } |
| 1376 | 1734 | ||
| 1377 | 1735 | ||
| 1736 | /** | ||
| 1737 | * tty_write - write method for tty device file | ||
| 1738 | * @file: tty file pointer | ||
| 1739 | * @buf: user data to write | ||
| 1740 | * @count: bytes to write | ||
| 1741 | * @ppos: unused | ||
| 1742 | * | ||
| 1743 | * Write data to a tty device via the line discipline. | ||
| 1744 | * | ||
| 1745 | * Locking: | ||
| 1746 | * Locks the line discipline as required | ||
| 1747 | * Writes to the tty driver are serialized by the atomic_write_lock | ||
| 1748 | * and are then processed in chunks to the device. The line discipline | ||
| 1749 | * write method will not be involked in parallel for each device | ||
| 1750 | * The line discipline write method is called under the big | ||
| 1751 | * kernel lock for historical reasons. New code should not rely on this. | ||
| 1752 | */ | ||
| 1753 | |||
| 1378 | static ssize_t tty_write(struct file * file, const char __user * buf, size_t count, | 1754 | static ssize_t tty_write(struct file * file, const char __user * buf, size_t count, |
| 1379 | loff_t *ppos) | 1755 | loff_t *ppos) |
| 1380 | { | 1756 | { |
| @@ -1422,7 +1798,18 @@ ssize_t redirected_tty_write(struct file * file, const char __user * buf, size_t | |||
| 1422 | 1798 | ||
| 1423 | static char ptychar[] = "pqrstuvwxyzabcde"; | 1799 | static char ptychar[] = "pqrstuvwxyzabcde"; |
| 1424 | 1800 | ||
| 1425 | static inline void pty_line_name(struct tty_driver *driver, int index, char *p) | 1801 | /** |
| 1802 | * pty_line_name - generate name for a pty | ||
| 1803 | * @driver: the tty driver in use | ||
| 1804 | * @index: the minor number | ||
| 1805 | * @p: output buffer of at least 6 bytes | ||
| 1806 | * | ||
| 1807 | * Generate a name from a driver reference and write it to the output | ||
| 1808 | * buffer. | ||
| 1809 | * | ||
| 1810 | * Locking: None | ||
| 1811 | */ | ||
| 1812 | static void pty_line_name(struct tty_driver *driver, int index, char *p) | ||
| 1426 | { | 1813 | { |
| 1427 | int i = index + driver->name_base; | 1814 | int i = index + driver->name_base; |
| 1428 | /* ->name is initialized to "ttyp", but "tty" is expected */ | 1815 | /* ->name is initialized to "ttyp", but "tty" is expected */ |
| @@ -1431,24 +1818,53 @@ static inline void pty_line_name(struct tty_driver *driver, int index, char *p) | |||
| 1431 | ptychar[i >> 4 & 0xf], i & 0xf); | 1818 | ptychar[i >> 4 & 0xf], i & 0xf); |
| 1432 | } | 1819 | } |
| 1433 | 1820 | ||
| 1434 | static inline void tty_line_name(struct tty_driver *driver, int index, char *p) | 1821 | /** |
| 1822 | * pty_line_name - generate name for a tty | ||
| 1823 | * @driver: the tty driver in use | ||
| 1824 | * @index: the minor number | ||
| 1825 | * @p: output buffer of at least 7 bytes | ||
| 1826 | * | ||
| 1827 | * Generate a name from a driver reference and write it to the output | ||
| 1828 | * buffer. | ||
| 1829 | * | ||
| 1830 | * Locking: None | ||
| 1831 | */ | ||
| 1832 | static void tty_line_name(struct tty_driver *driver, int index, char *p) | ||
| 1435 | { | 1833 | { |
| 1436 | sprintf(p, "%s%d", driver->name, index + driver->name_base); | 1834 | sprintf(p, "%s%d", driver->name, index + driver->name_base); |
| 1437 | } | 1835 | } |
| 1438 | 1836 | ||
| 1439 | /* | 1837 | /** |
| 1838 | * init_dev - initialise a tty device | ||
| 1839 | * @driver: tty driver we are opening a device on | ||
| 1840 | * @idx: device index | ||
| 1841 | * @tty: returned tty structure | ||
| 1842 | * | ||
| 1843 | * Prepare a tty device. This may not be a "new" clean device but | ||
| 1844 | * could also be an active device. The pty drivers require special | ||
| 1845 | * handling because of this. | ||
| 1846 | * | ||
| 1847 | * Locking: | ||
| 1848 | * The function is called under the tty_mutex, which | ||
| 1849 | * protects us from the tty struct or driver itself going away. | ||
| 1850 | * | ||
| 1851 | * On exit the tty device has the line discipline attached and | ||
| 1852 | * a reference count of 1. If a pair was created for pty/tty use | ||
| 1853 | * and the other was a pty master then it too has a reference count of 1. | ||
| 1854 | * | ||
| 1440 | * WSH 06/09/97: Rewritten to remove races and properly clean up after a | 1855 | * WSH 06/09/97: Rewritten to remove races and properly clean up after a |
| 1441 | * failed open. The new code protects the open with a mutex, so it's | 1856 | * failed open. The new code protects the open with a mutex, so it's |
| 1442 | * really quite straightforward. The mutex locking can probably be | 1857 | * really quite straightforward. The mutex locking can probably be |
| 1443 | * relaxed for the (most common) case of reopening a tty. | 1858 | * relaxed for the (most common) case of reopening a tty. |
| 1444 | */ | 1859 | */ |
| 1860 | |||
| 1445 | static int init_dev(struct tty_driver *driver, int idx, | 1861 | static int init_dev(struct tty_driver *driver, int idx, |
| 1446 | struct tty_struct **ret_tty) | 1862 | struct tty_struct **ret_tty) |
| 1447 | { | 1863 | { |
| 1448 | struct tty_struct *tty, *o_tty; | 1864 | struct tty_struct *tty, *o_tty; |
| 1449 | struct termios *tp, **tp_loc, *o_tp, **o_tp_loc; | 1865 | struct termios *tp, **tp_loc, *o_tp, **o_tp_loc; |
| 1450 | struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc; | 1866 | struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc; |
| 1451 | int retval=0; | 1867 | int retval = 0; |
| 1452 | 1868 | ||
| 1453 | /* check whether we're reopening an existing tty */ | 1869 | /* check whether we're reopening an existing tty */ |
| 1454 | if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { | 1870 | if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { |
| @@ -1662,10 +2078,20 @@ release_mem_out: | |||
| 1662 | goto end_init; | 2078 | goto end_init; |
| 1663 | } | 2079 | } |
| 1664 | 2080 | ||
| 1665 | /* | 2081 | /** |
| 1666 | * Releases memory associated with a tty structure, and clears out the | 2082 | * release_mem - release tty structure memory |
| 1667 | * driver table slots. | 2083 | * |
| 2084 | * Releases memory associated with a tty structure, and clears out the | ||
| 2085 | * driver table slots. This function is called when a device is no longer | ||
| 2086 | * in use. It also gets called when setup of a device fails. | ||
| 2087 | * | ||
| 2088 | * Locking: | ||
| 2089 | * tty_mutex - sometimes only | ||
| 2090 | * takes the file list lock internally when working on the list | ||
| 2091 | * of ttys that the driver keeps. | ||
| 2092 | * FIXME: should we require tty_mutex is held here ?? | ||
| 1668 | */ | 2093 | */ |
| 2094 | |||
| 1669 | static void release_mem(struct tty_struct *tty, int idx) | 2095 | static void release_mem(struct tty_struct *tty, int idx) |
| 1670 | { | 2096 | { |
| 1671 | struct tty_struct *o_tty; | 2097 | struct tty_struct *o_tty; |
| @@ -2006,18 +2432,27 @@ static void release_dev(struct file * filp) | |||
| 2006 | 2432 | ||
| 2007 | } | 2433 | } |
| 2008 | 2434 | ||
| 2009 | /* | 2435 | /** |
| 2010 | * tty_open and tty_release keep up the tty count that contains the | 2436 | * tty_open - open a tty device |
| 2011 | * number of opens done on a tty. We cannot use the inode-count, as | 2437 | * @inode: inode of device file |
| 2012 | * different inodes might point to the same tty. | 2438 | * @filp: file pointer to tty |
| 2439 | * | ||
| 2440 | * tty_open and tty_release keep up the tty count that contains the | ||
| 2441 | * number of opens done on a tty. We cannot use the inode-count, as | ||
| 2442 | * different inodes might point to the same tty. | ||
| 2013 | * | 2443 | * |
| 2014 | * Open-counting is needed for pty masters, as well as for keeping | 2444 | * Open-counting is needed for pty masters, as well as for keeping |
| 2015 | * track of serial lines: DTR is dropped when the last close happens. | 2445 | * track of serial lines: DTR is dropped when the last close happens. |
| 2016 | * (This is not done solely through tty->count, now. - Ted 1/27/92) | 2446 | * (This is not done solely through tty->count, now. - Ted 1/27/92) |
| 2017 | * | 2447 | * |
| 2018 | * The termios state of a pty is reset on first open so that | 2448 | * The termios state of a pty is reset on first open so that |
| 2019 | * settings don't persist across reuse. | 2449 | * settings don't persist across reuse. |
| 2450 | * | ||
| 2451 | * Locking: tty_mutex protects current->signal->tty, get_tty_driver and | ||
| 2452 | * init_dev work. tty->count should protect the rest. | ||
| 2453 | * task_lock is held to update task details for sessions | ||
| 2020 | */ | 2454 | */ |
| 2455 | |||
| 2021 | static int tty_open(struct inode * inode, struct file * filp) | 2456 | static int tty_open(struct inode * inode, struct file * filp) |
| 2022 | { | 2457 | { |
| 2023 | struct tty_struct *tty; | 2458 | struct tty_struct *tty; |
| @@ -2132,6 +2567,18 @@ got_driver: | |||
| 2132 | } | 2567 | } |
| 2133 | 2568 | ||
| 2134 | #ifdef CONFIG_UNIX98_PTYS | 2569 | #ifdef CONFIG_UNIX98_PTYS |
| 2570 | /** | ||
| 2571 | * ptmx_open - open a unix 98 pty master | ||
| 2572 | * @inode: inode of device file | ||
| 2573 | * @filp: file pointer to tty | ||
| 2574 | * | ||
| 2575 | * Allocate a unix98 pty master device from the ptmx driver. | ||
| 2576 | * | ||
| 2577 | * Locking: tty_mutex protects theinit_dev work. tty->count should | ||
| 2578 | protect the rest. | ||
| 2579 | * allocated_ptys_lock handles the list of free pty numbers | ||
| 2580 | */ | ||
| 2581 | |||
| 2135 | static int ptmx_open(struct inode * inode, struct file * filp) | 2582 | static int ptmx_open(struct inode * inode, struct file * filp) |
| 2136 | { | 2583 | { |
| 2137 | struct tty_struct *tty; | 2584 | struct tty_struct *tty; |
| @@ -2191,6 +2638,18 @@ out: | |||
| 2191 | } | 2638 | } |
| 2192 | #endif | 2639 | #endif |
| 2193 | 2640 | ||
| 2641 | /** | ||
| 2642 | * tty_release - vfs callback for close | ||
| 2643 | * @inode: inode of tty | ||
| 2644 | * @filp: file pointer for handle to tty | ||
| 2645 | * | ||
| 2646 | * Called the last time each file handle is closed that references | ||
| 2647 | * this tty. There may however be several such references. | ||
| 2648 | * | ||
| 2649 | * Locking: | ||
| 2650 | * Takes bkl. See release_dev | ||
| 2651 | */ | ||
| 2652 | |||
| 2194 | static int tty_release(struct inode * inode, struct file * filp) | 2653 | static int tty_release(struct inode * inode, struct file * filp) |
| 2195 | { | 2654 | { |
| 2196 | lock_kernel(); | 2655 | lock_kernel(); |
| @@ -2199,7 +2658,18 @@ static int tty_release(struct inode * inode, struct file * filp) | |||
| 2199 | return 0; | 2658 | return 0; |
| 2200 | } | 2659 | } |
| 2201 | 2660 | ||
| 2202 | /* No kernel lock held - fine */ | 2661 | /** |
| 2662 | * tty_poll - check tty status | ||
| 2663 | * @filp: file being polled | ||
| 2664 | * @wait: poll wait structures to update | ||
| 2665 | * | ||
| 2666 | * Call the line discipline polling method to obtain the poll | ||
| 2667 | * status of the device. | ||
| 2668 | * | ||
| 2669 | * Locking: locks called line discipline but ldisc poll method | ||
| 2670 | * may be re-entered freely by other callers. | ||
| 2671 | */ | ||
| 2672 | |||
| 2203 | static unsigned int tty_poll(struct file * filp, poll_table * wait) | 2673 | static unsigned int tty_poll(struct file * filp, poll_table * wait) |
| 2204 | { | 2674 | { |
| 2205 | struct tty_struct * tty; | 2675 | struct tty_struct * tty; |
| @@ -2243,6 +2713,21 @@ static int tty_fasync(int fd, struct file * filp, int on) | |||
| 2243 | return 0; | 2713 | return 0; |
| 2244 | } | 2714 | } |
| 2245 | 2715 | ||
| 2716 | /** | ||
| 2717 | * tiocsti - fake input character | ||
| 2718 | * @tty: tty to fake input into | ||
| 2719 | * @p: pointer to character | ||
| 2720 | * | ||
| 2721 | * Fake input to a tty device. Does the neccessary locking and | ||
| 2722 | * input management. | ||
| 2723 | * | ||
| 2724 | * FIXME: does not honour flow control ?? | ||
| 2725 | * | ||
| 2726 | * Locking: | ||
| 2727 | * Called functions take tty_ldisc_lock | ||
| 2728 | * current->signal->tty check is safe without locks | ||
| 2729 | */ | ||
| 2730 | |||
| 2246 | static int tiocsti(struct tty_struct *tty, char __user *p) | 2731 | static int tiocsti(struct tty_struct *tty, char __user *p) |
| 2247 | { | 2732 | { |
| 2248 | char ch, mbz = 0; | 2733 | char ch, mbz = 0; |
| @@ -2258,6 +2743,18 @@ static int tiocsti(struct tty_struct *tty, char __user *p) | |||
| 2258 | return 0; | 2743 | return 0; |
| 2259 | } | 2744 | } |
| 2260 | 2745 | ||
| 2746 | /** | ||
| 2747 | * tiocgwinsz - implement window query ioctl | ||
| 2748 | * @tty; tty | ||
| 2749 | * @arg: user buffer for result | ||
| 2750 | * | ||
| 2751 | * Copies the kernel idea of the window size into the user buffer. No | ||
| 2752 | * locking is done. | ||
| 2753 | * | ||
| 2754 | * FIXME: Returning random values racing a window size set is wrong | ||
| 2755 | * should lock here against that | ||
| 2756 | */ | ||
| 2757 | |||
| 2261 | static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg) | 2758 | static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg) |
| 2262 | { | 2759 | { |
| 2263 | if (copy_to_user(arg, &tty->winsize, sizeof(*arg))) | 2760 | if (copy_to_user(arg, &tty->winsize, sizeof(*arg))) |
| @@ -2265,6 +2762,24 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg) | |||
| 2265 | return 0; | 2762 | return 0; |
| 2266 | } | 2763 | } |
| 2267 | 2764 | ||
| 2765 | /** | ||
| 2766 | * tiocswinsz - implement window size set ioctl | ||
| 2767 | * @tty; tty | ||
| 2768 | * @arg: user buffer for result | ||
| 2769 | * | ||
| 2770 | * Copies the user idea of the window size to the kernel. Traditionally | ||
| 2771 | * this is just advisory information but for the Linux console it | ||
| 2772 | * actually has driver level meaning and triggers a VC resize. | ||
| 2773 | * | ||
| 2774 | * Locking: | ||
| 2775 | * The console_sem is used to ensure we do not try and resize | ||
| 2776 | * the console twice at once. | ||
| 2777 | * FIXME: Two racing size sets may leave the console and kernel | ||
| 2778 | * parameters disagreeing. Is this exploitable ? | ||
| 2779 | * FIXME: Random values racing a window size get is wrong | ||
| 2780 | * should lock here against that | ||
| 2781 | */ | ||
| 2782 | |||
| 2268 | static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, | 2783 | static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, |
| 2269 | struct winsize __user * arg) | 2784 | struct winsize __user * arg) |
| 2270 | { | 2785 | { |
| @@ -2294,6 +2809,15 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, | |||
| 2294 | return 0; | 2809 | return 0; |
| 2295 | } | 2810 | } |
| 2296 | 2811 | ||
| 2812 | /** | ||
| 2813 | * tioccons - allow admin to move logical console | ||
| 2814 | * @file: the file to become console | ||
| 2815 | * | ||
| 2816 | * Allow the adminstrator to move the redirected console device | ||
| 2817 | * | ||
| 2818 | * Locking: uses redirect_lock to guard the redirect information | ||
| 2819 | */ | ||
| 2820 | |||
| 2297 | static int tioccons(struct file *file) | 2821 | static int tioccons(struct file *file) |
| 2298 | { | 2822 | { |
| 2299 | if (!capable(CAP_SYS_ADMIN)) | 2823 | if (!capable(CAP_SYS_ADMIN)) |
| @@ -2319,6 +2843,17 @@ static int tioccons(struct file *file) | |||
| 2319 | return 0; | 2843 | return 0; |
| 2320 | } | 2844 | } |
| 2321 | 2845 | ||
| 2846 | /** | ||
| 2847 | * fionbio - non blocking ioctl | ||
| 2848 | * @file: file to set blocking value | ||
| 2849 | * @p: user parameter | ||
| 2850 | * | ||
| 2851 | * Historical tty interfaces had a blocking control ioctl before | ||
| 2852 | * the generic functionality existed. This piece of history is preserved | ||
| 2853 | * in the expected tty API of posix OS's. | ||
| 2854 | * | ||
| 2855 | * Locking: none, the open fle handle ensures it won't go away. | ||
| 2856 | */ | ||
| 2322 | 2857 | ||
| 2323 | static int fionbio(struct file *file, int __user *p) | 2858 | static int fionbio(struct file *file, int __user *p) |
| 2324 | { | 2859 | { |
| @@ -2334,6 +2869,23 @@ static int fionbio(struct file *file, int __user *p) | |||
| 2334 | return 0; | 2869 | return 0; |
| 2335 | } | 2870 | } |
| 2336 | 2871 | ||
| 2872 | /** | ||
| 2873 | * tiocsctty - set controlling tty | ||
| 2874 | * @tty: tty structure | ||
| 2875 | * @arg: user argument | ||
| 2876 | * | ||
| 2877 | * This ioctl is used to manage job control. It permits a session | ||
| 2878 | * leader to set this tty as the controlling tty for the session. | ||
| 2879 | * | ||
| 2880 | * Locking: | ||
| 2881 | * Takes tasklist lock internally to walk sessions | ||
| 2882 | * Takes task_lock() when updating signal->tty | ||
| 2883 | * | ||
| 2884 | * FIXME: tty_mutex is needed to protect signal->tty references. | ||
| 2885 | * FIXME: why task_lock on the signal->tty reference ?? | ||
| 2886 | * | ||
| 2887 | */ | ||
| 2888 | |||
| 2337 | static int tiocsctty(struct tty_struct *tty, int arg) | 2889 | static int tiocsctty(struct tty_struct *tty, int arg) |
| 2338 | { | 2890 | { |
| 2339 | struct task_struct *p; | 2891 | struct task_struct *p; |
| @@ -2374,6 +2926,18 @@ static int tiocsctty(struct tty_struct *tty, int arg) | |||
| 2374 | return 0; | 2926 | return 0; |
| 2375 | } | 2927 | } |
| 2376 | 2928 | ||
| 2929 | /** | ||
| 2930 | * tiocgpgrp - get process group | ||
| 2931 | * @tty: tty passed by user | ||
| 2932 | * @real_tty: tty side of the tty pased by the user if a pty else the tty | ||
| 2933 | * @p: returned pid | ||
| 2934 | * | ||
| 2935 | * Obtain the process group of the tty. If there is no process group | ||
| 2936 | * return an error. | ||
| 2937 | * | ||
| 2938 | * Locking: none. Reference to ->signal->tty is safe. | ||
| 2939 | */ | ||
| 2940 | |||
| 2377 | static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) | 2941 | static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) |
| 2378 | { | 2942 | { |
| 2379 | /* | 2943 | /* |
| @@ -2385,6 +2949,20 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t | |||
| 2385 | return put_user(real_tty->pgrp, p); | 2949 | return put_user(real_tty->pgrp, p); |
| 2386 | } | 2950 | } |
| 2387 | 2951 | ||
| 2952 | /** | ||
| 2953 | * tiocspgrp - attempt to set process group | ||
| 2954 | * @tty: tty passed by user | ||
| 2955 | * @real_tty: tty side device matching tty passed by user | ||
| 2956 | * @p: pid pointer | ||
| 2957 | * | ||
| 2958 | * Set the process group of the tty to the session passed. Only | ||
| 2959 | * permitted where the tty session is our session. | ||
| 2960 | * | ||
| 2961 | * Locking: None | ||
| 2962 | * | ||
| 2963 | * FIXME: current->signal->tty referencing is unsafe. | ||
| 2964 | */ | ||
| 2965 | |||
| 2388 | static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) | 2966 | static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) |
| 2389 | { | 2967 | { |
| 2390 | pid_t pgrp; | 2968 | pid_t pgrp; |
| @@ -2408,6 +2986,18 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t | |||
| 2408 | return 0; | 2986 | return 0; |
| 2409 | } | 2987 | } |
| 2410 | 2988 | ||
| 2989 | /** | ||
| 2990 | * tiocgsid - get session id | ||
| 2991 | * @tty: tty passed by user | ||
| 2992 | * @real_tty: tty side of the tty pased by the user if a pty else the tty | ||
| 2993 | * @p: pointer to returned session id | ||
| 2994 | * | ||
| 2995 | * Obtain the session id of the tty. If there is no session | ||
| 2996 | * return an error. | ||
| 2997 | * | ||
| 2998 | * Locking: none. Reference to ->signal->tty is safe. | ||
| 2999 | */ | ||
| 3000 | |||
| 2411 | static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) | 3001 | static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) |
| 2412 | { | 3002 | { |
| 2413 | /* | 3003 | /* |
| @@ -2421,6 +3011,16 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _ | |||
| 2421 | return put_user(real_tty->session, p); | 3011 | return put_user(real_tty->session, p); |
| 2422 | } | 3012 | } |
| 2423 | 3013 | ||
| 3014 | /** | ||
| 3015 | * tiocsetd - set line discipline | ||
| 3016 | * @tty: tty device | ||
| 3017 | * @p: pointer to user data | ||
| 3018 | * | ||
| 3019 | * Set the line discipline according to user request. | ||
| 3020 | * | ||
| 3021 | * Locking: see tty_set_ldisc, this function is just a helper | ||
| 3022 | */ | ||
| 3023 | |||
| 2424 | static int tiocsetd(struct tty_struct *tty, int __user *p) | 3024 | static int tiocsetd(struct tty_struct *tty, int __user *p) |
| 2425 | { | 3025 | { |
| 2426 | int ldisc; | 3026 | int ldisc; |
| @@ -2430,6 +3030,21 @@ static int tiocsetd(struct tty_struct *tty, int __user *p) | |||
| 2430 | return tty_set_ldisc(tty, ldisc); | 3030 | return tty_set_ldisc(tty, ldisc); |
| 2431 | } | 3031 | } |
| 2432 | 3032 | ||
| 3033 | /** | ||
| 3034 | * send_break - performed time break | ||
| 3035 | * @tty: device to break on | ||
| 3036 | * @duration: timeout in mS | ||
| 3037 | * | ||
| 3038 | * Perform a timed break on hardware that lacks its own driver level | ||
| 3039 | * timed break functionality. | ||
| 3040 | * | ||
| 3041 | * Locking: | ||
| 3042 | * None | ||
| 3043 | * | ||
| 3044 | * FIXME: | ||
| 3045 | * What if two overlap | ||
| 3046 | */ | ||
| 3047 | |||
| 2433 | static int send_break(struct tty_struct *tty, unsigned int duration) | 3048 | static int send_break(struct tty_struct *tty, unsigned int duration) |
| 2434 | { | 3049 | { |
| 2435 | tty->driver->break_ctl(tty, -1); | 3050 | tty->driver->break_ctl(tty, -1); |
| @@ -2442,8 +3057,19 @@ static int send_break(struct tty_struct *tty, unsigned int duration) | |||
| 2442 | return 0; | 3057 | return 0; |
| 2443 | } | 3058 | } |
| 2444 | 3059 | ||
| 2445 | static int | 3060 | /** |
| 2446 | tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p) | 3061 | * tiocmget - get modem status |
| 3062 | * @tty: tty device | ||
| 3063 | * @file: user file pointer | ||
| 3064 | * @p: pointer to result | ||
| 3065 | * | ||
| 3066 | * Obtain the modem status bits from the tty driver if the feature | ||
| 3067 | * is supported. Return -EINVAL if it is not available. | ||
| 3068 | * | ||
| 3069 | * Locking: none (up to the driver) | ||
| 3070 | */ | ||
| 3071 | |||
| 3072 | static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p) | ||
| 2447 | { | 3073 | { |
| 2448 | int retval = -EINVAL; | 3074 | int retval = -EINVAL; |
| 2449 | 3075 | ||
| @@ -2456,8 +3082,20 @@ tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p) | |||
| 2456 | return retval; | 3082 | return retval; |
| 2457 | } | 3083 | } |
| 2458 | 3084 | ||
| 2459 | static int | 3085 | /** |
| 2460 | tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd, | 3086 | * tiocmset - set modem status |
| 3087 | * @tty: tty device | ||
| 3088 | * @file: user file pointer | ||
| 3089 | * @cmd: command - clear bits, set bits or set all | ||
| 3090 | * @p: pointer to desired bits | ||
| 3091 | * | ||
| 3092 | * Set the modem status bits from the tty driver if the feature | ||
| 3093 | * is supported. Return -EINVAL if it is not available. | ||
| 3094 | * | ||
| 3095 | * Locking: none (up to the driver) | ||
| 3096 | */ | ||
| 3097 | |||
| 3098 | static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd, | ||
| 2461 | unsigned __user *p) | 3099 | unsigned __user *p) |
| 2462 | { | 3100 | { |
| 2463 | int retval = -EINVAL; | 3101 | int retval = -EINVAL; |
| @@ -2573,6 +3211,7 @@ int tty_ioctl(struct inode * inode, struct file * file, | |||
| 2573 | clear_bit(TTY_EXCLUSIVE, &tty->flags); | 3211 | clear_bit(TTY_EXCLUSIVE, &tty->flags); |
| 2574 | return 0; | 3212 | return 0; |
| 2575 | case TIOCNOTTY: | 3213 | case TIOCNOTTY: |
| 3214 | /* FIXME: taks lock or tty_mutex ? */ | ||
| 2576 | if (current->signal->tty != tty) | 3215 | if (current->signal->tty != tty) |
| 2577 | return -ENOTTY; | 3216 | return -ENOTTY; |
| 2578 | if (current->signal->leader) | 3217 | if (current->signal->leader) |
| @@ -2753,9 +3392,16 @@ void do_SAK(struct tty_struct *tty) | |||
| 2753 | 3392 | ||
| 2754 | EXPORT_SYMBOL(do_SAK); | 3393 | EXPORT_SYMBOL(do_SAK); |
| 2755 | 3394 | ||
| 2756 | /* | 3395 | /** |
| 2757 | * This routine is called out of the software interrupt to flush data | 3396 | * flush_to_ldisc |
| 2758 | * from the buffer chain to the line discipline. | 3397 | * @private_: tty structure passed from work queue. |
| 3398 | * | ||
| 3399 | * This routine is called out of the software interrupt to flush data | ||
| 3400 | * from the buffer chain to the line discipline. | ||
| 3401 | * | ||
| 3402 | * Locking: holds tty->buf.lock to guard buffer list. Drops the lock | ||
| 3403 | * while invoking the line discipline receive_buf method. The | ||
| 3404 | * receive_buf method is single threaded for each tty instance. | ||
| 2759 | */ | 3405 | */ |
| 2760 | 3406 | ||
| 2761 | static void flush_to_ldisc(void *private_) | 3407 | static void flush_to_ldisc(void *private_) |
| @@ -2831,6 +3477,8 @@ static int n_baud_table = ARRAY_SIZE(baud_table); | |||
| 2831 | * Convert termios baud rate data into a speed. This should be called | 3477 | * Convert termios baud rate data into a speed. This should be called |
| 2832 | * with the termios lock held if this termios is a terminal termios | 3478 | * with the termios lock held if this termios is a terminal termios |
| 2833 | * structure. May change the termios data. | 3479 | * structure. May change the termios data. |
| 3480 | * | ||
| 3481 | * Locking: none | ||
| 2834 | */ | 3482 | */ |
| 2835 | 3483 | ||
| 2836 | int tty_termios_baud_rate(struct termios *termios) | 3484 | int tty_termios_baud_rate(struct termios *termios) |
| @@ -2859,6 +3507,8 @@ EXPORT_SYMBOL(tty_termios_baud_rate); | |||
| 2859 | * Returns the baud rate as an integer for this terminal. The | 3507 | * Returns the baud rate as an integer for this terminal. The |
| 2860 | * termios lock must be held by the caller and the terminal bit | 3508 | * termios lock must be held by the caller and the terminal bit |
| 2861 | * flags may be updated. | 3509 | * flags may be updated. |
| 3510 | * | ||
| 3511 | * Locking: none | ||
| 2862 | */ | 3512 | */ |
| 2863 | 3513 | ||
| 2864 | int tty_get_baud_rate(struct tty_struct *tty) | 3514 | int tty_get_baud_rate(struct tty_struct *tty) |
| @@ -2888,6 +3538,8 @@ EXPORT_SYMBOL(tty_get_baud_rate); | |||
| 2888 | * | 3538 | * |
| 2889 | * In the event of the queue being busy for flipping the work will be | 3539 | * In the event of the queue being busy for flipping the work will be |
| 2890 | * held off and retried later. | 3540 | * held off and retried later. |
| 3541 | * | ||
| 3542 | * Locking: tty buffer lock. Driver locks in low latency mode. | ||
| 2891 | */ | 3543 | */ |
| 2892 | 3544 | ||
| 2893 | void tty_flip_buffer_push(struct tty_struct *tty) | 3545 | void tty_flip_buffer_push(struct tty_struct *tty) |
| @@ -2907,9 +3559,16 @@ void tty_flip_buffer_push(struct tty_struct *tty) | |||
| 2907 | EXPORT_SYMBOL(tty_flip_buffer_push); | 3559 | EXPORT_SYMBOL(tty_flip_buffer_push); |
| 2908 | 3560 | ||
| 2909 | 3561 | ||
| 2910 | /* | 3562 | /** |
| 2911 | * This subroutine initializes a tty structure. | 3563 | * initialize_tty_struct |
| 3564 | * @tty: tty to initialize | ||
| 3565 | * | ||
| 3566 | * This subroutine initializes a tty structure that has been newly | ||
| 3567 | * allocated. | ||
| 3568 | * | ||
| 3569 | * Locking: none - tty in question must not be exposed at this point | ||
| 2912 | */ | 3570 | */ |
| 3571 | |||
| 2913 | static void initialize_tty_struct(struct tty_struct *tty) | 3572 | static void initialize_tty_struct(struct tty_struct *tty) |
| 2914 | { | 3573 | { |
| 2915 | memset(tty, 0, sizeof(struct tty_struct)); | 3574 | memset(tty, 0, sizeof(struct tty_struct)); |
| @@ -2935,6 +3594,7 @@ static void initialize_tty_struct(struct tty_struct *tty) | |||
| 2935 | /* | 3594 | /* |
| 2936 | * The default put_char routine if the driver did not define one. | 3595 | * The default put_char routine if the driver did not define one. |
| 2937 | */ | 3596 | */ |
| 3597 | |||
| 2938 | static void tty_default_put_char(struct tty_struct *tty, unsigned char ch) | 3598 | static void tty_default_put_char(struct tty_struct *tty, unsigned char ch) |
| 2939 | { | 3599 | { |
| 2940 | tty->driver->write(tty, &ch, 1); | 3600 | tty->driver->write(tty, &ch, 1); |
| @@ -2943,19 +3603,23 @@ static void tty_default_put_char(struct tty_struct *tty, unsigned char ch) | |||
| 2943 | static struct class *tty_class; | 3603 | static struct class *tty_class; |
| 2944 | 3604 | ||
| 2945 | /** | 3605 | /** |
| 2946 | * tty_register_device - register a tty device | 3606 | * tty_register_device - register a tty device |
| 2947 | * @driver: the tty driver that describes the tty device | 3607 | * @driver: the tty driver that describes the tty device |
| 2948 | * @index: the index in the tty driver for this tty device | 3608 | * @index: the index in the tty driver for this tty device |
| 2949 | * @device: a struct device that is associated with this tty device. | 3609 | * @device: a struct device that is associated with this tty device. |
| 2950 | * This field is optional, if there is no known struct device for this | 3610 | * This field is optional, if there is no known struct device |
| 2951 | * tty device it can be set to NULL safely. | 3611 | * for this tty device it can be set to NULL safely. |
| 2952 | * | 3612 | * |
| 2953 | * Returns a pointer to the class device (or ERR_PTR(-EFOO) on error). | 3613 | * Returns a pointer to the class device (or ERR_PTR(-EFOO) on error). |
| 2954 | * | 3614 | * |
| 2955 | * This call is required to be made to register an individual tty device if | 3615 | * This call is required to be made to register an individual tty device |
| 2956 | * the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set. If that | 3616 | * if the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set. If |
| 2957 | * bit is not set, this function should not be called by a tty driver. | 3617 | * that bit is not set, this function should not be called by a tty |
| 3618 | * driver. | ||
| 3619 | * | ||
| 3620 | * Locking: ?? | ||
| 2958 | */ | 3621 | */ |
| 3622 | |||
| 2959 | struct class_device *tty_register_device(struct tty_driver *driver, | 3623 | struct class_device *tty_register_device(struct tty_driver *driver, |
| 2960 | unsigned index, struct device *device) | 3624 | unsigned index, struct device *device) |
| 2961 | { | 3625 | { |
| @@ -2977,13 +3641,16 @@ struct class_device *tty_register_device(struct tty_driver *driver, | |||
| 2977 | } | 3641 | } |
| 2978 | 3642 | ||
| 2979 | /** | 3643 | /** |
| 2980 | * tty_unregister_device - unregister a tty device | 3644 | * tty_unregister_device - unregister a tty device |
| 2981 | * @driver: the tty driver that describes the tty device | 3645 | * @driver: the tty driver that describes the tty device |
| 2982 | * @index: the index in the tty driver for this tty device | 3646 | * @index: the index in the tty driver for this tty device |
| 2983 | * | 3647 | * |
| 2984 | * If a tty device is registered with a call to tty_register_device() then | 3648 | * If a tty device is registered with a call to tty_register_device() then |
| 2985 | * this function must be made when the tty device is gone. | 3649 | * this function must be called when the tty device is gone. |
| 3650 | * | ||
| 3651 | * Locking: ?? | ||
| 2986 | */ | 3652 | */ |
| 3653 | |||
| 2987 | void tty_unregister_device(struct tty_driver *driver, unsigned index) | 3654 | void tty_unregister_device(struct tty_driver *driver, unsigned index) |
| 2988 | { | 3655 | { |
| 2989 | class_device_destroy(tty_class, MKDEV(driver->major, driver->minor_start) + index); | 3656 | class_device_destroy(tty_class, MKDEV(driver->major, driver->minor_start) + index); |
| @@ -3094,7 +3761,6 @@ int tty_register_driver(struct tty_driver *driver) | |||
| 3094 | driver->cdev.owner = driver->owner; | 3761 | driver->cdev.owner = driver->owner; |
| 3095 | error = cdev_add(&driver->cdev, dev, driver->num); | 3762 | error = cdev_add(&driver->cdev, dev, driver->num); |
| 3096 | if (error) { | 3763 | if (error) { |
| 3097 | cdev_del(&driver->cdev); | ||
| 3098 | unregister_chrdev_region(dev, driver->num); | 3764 | unregister_chrdev_region(dev, driver->num); |
| 3099 | driver->ttys = NULL; | 3765 | driver->ttys = NULL; |
| 3100 | driver->termios = driver->termios_locked = NULL; | 3766 | driver->termios = driver->termios_locked = NULL; |
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index f19cf9d7792d..4ad47d321bd4 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c | |||
| @@ -36,6 +36,18 @@ | |||
| 36 | #define TERMIOS_WAIT 2 | 36 | #define TERMIOS_WAIT 2 |
| 37 | #define TERMIOS_TERMIO 4 | 37 | #define TERMIOS_TERMIO 4 |
| 38 | 38 | ||
| 39 | |||
| 40 | /** | ||
| 41 | * tty_wait_until_sent - wait for I/O to finish | ||
| 42 | * @tty: tty we are waiting for | ||
| 43 | * @timeout: how long we will wait | ||
| 44 | * | ||
| 45 | * Wait for characters pending in a tty driver to hit the wire, or | ||
| 46 | * for a timeout to occur (eg due to flow control) | ||
| 47 | * | ||
| 48 | * Locking: none | ||
| 49 | */ | ||
| 50 | |||
| 39 | void tty_wait_until_sent(struct tty_struct * tty, long timeout) | 51 | void tty_wait_until_sent(struct tty_struct * tty, long timeout) |
| 40 | { | 52 | { |
| 41 | DECLARE_WAITQUEUE(wait, current); | 53 | DECLARE_WAITQUEUE(wait, current); |
| @@ -94,6 +106,18 @@ static void unset_locked_termios(struct termios *termios, | |||
| 94 | old->c_cc[i] : termios->c_cc[i]; | 106 | old->c_cc[i] : termios->c_cc[i]; |
| 95 | } | 107 | } |
| 96 | 108 | ||
| 109 | /** | ||
| 110 | * change_termios - update termios values | ||
| 111 | * @tty: tty to update | ||
| 112 | * @new_termios: desired new value | ||
| 113 | * | ||
| 114 | * Perform updates to the termios values set on this terminal. There | ||
| 115 | * is a bit of layering violation here with n_tty in terms of the | ||
| 116 | * internal knowledge of this function. | ||
| 117 | * | ||
| 118 | * Locking: termios_sem | ||
| 119 | */ | ||
| 120 | |||
| 97 | static void change_termios(struct tty_struct * tty, struct termios * new_termios) | 121 | static void change_termios(struct tty_struct * tty, struct termios * new_termios) |
| 98 | { | 122 | { |
| 99 | int canon_change; | 123 | int canon_change; |
| @@ -155,6 +179,19 @@ static void change_termios(struct tty_struct * tty, struct termios * new_termios | |||
| 155 | up(&tty->termios_sem); | 179 | up(&tty->termios_sem); |
| 156 | } | 180 | } |
| 157 | 181 | ||
| 182 | /** | ||
| 183 | * set_termios - set termios values for a tty | ||
| 184 | * @tty: terminal device | ||
| 185 | * @arg: user data | ||
| 186 | * @opt: option information | ||
| 187 | * | ||
| 188 | * Helper function to prepare termios data and run neccessary other | ||
| 189 | * functions before using change_termios to do the actual changes. | ||
| 190 | * | ||
| 191 | * Locking: | ||
| 192 | * Called functions take ldisc and termios_sem locks | ||
| 193 | */ | ||
| 194 | |||
| 158 | static int set_termios(struct tty_struct * tty, void __user *arg, int opt) | 195 | static int set_termios(struct tty_struct * tty, void __user *arg, int opt) |
| 159 | { | 196 | { |
| 160 | struct termios tmp_termios; | 197 | struct termios tmp_termios; |
| @@ -284,6 +321,17 @@ static void set_sgflags(struct termios * termios, int flags) | |||
| 284 | } | 321 | } |
| 285 | } | 322 | } |
| 286 | 323 | ||
| 324 | /** | ||
| 325 | * set_sgttyb - set legacy terminal values | ||
| 326 | * @tty: tty structure | ||
| 327 | * @sgttyb: pointer to old style terminal structure | ||
| 328 | * | ||
| 329 | * Updates a terminal from the legacy BSD style terminal information | ||
| 330 | * structure. | ||
| 331 | * | ||
| 332 | * Locking: termios_sem | ||
| 333 | */ | ||
| 334 | |||
| 287 | static int set_sgttyb(struct tty_struct * tty, struct sgttyb __user * sgttyb) | 335 | static int set_sgttyb(struct tty_struct * tty, struct sgttyb __user * sgttyb) |
| 288 | { | 336 | { |
| 289 | int retval; | 337 | int retval; |
| @@ -369,9 +417,16 @@ static int set_ltchars(struct tty_struct * tty, struct ltchars __user * ltchars) | |||
| 369 | } | 417 | } |
| 370 | #endif | 418 | #endif |
| 371 | 419 | ||
| 372 | /* | 420 | /** |
| 373 | * Send a high priority character to the tty. | 421 | * send_prio_char - send priority character |
| 422 | * | ||
| 423 | * Send a high priority character to the tty even if stopped | ||
| 424 | * | ||
| 425 | * Locking: none | ||
| 426 | * | ||
| 427 | * FIXME: overlapping calls with start/stop tty lose state of tty | ||
| 374 | */ | 428 | */ |
| 429 | |||
| 375 | static void send_prio_char(struct tty_struct *tty, char ch) | 430 | static void send_prio_char(struct tty_struct *tty, char ch) |
| 376 | { | 431 | { |
| 377 | int was_stopped = tty->stopped; | 432 | int was_stopped = tty->stopped; |
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index eccffaf26faa..a5628a8b6620 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c | |||
| @@ -1011,6 +1011,8 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, | |||
| 1011 | return -EPERM; | 1011 | return -EPERM; |
| 1012 | vt_dont_switch = 0; | 1012 | vt_dont_switch = 0; |
| 1013 | return 0; | 1013 | return 0; |
| 1014 | case VT_GETHIFONTMASK: | ||
| 1015 | return put_user(vc->vc_hi_font_mask, (unsigned short __user *)arg); | ||
| 1014 | default: | 1016 | default: |
| 1015 | return -ENOIOCTLCMD; | 1017 | return -ENOIOCTLCMD; |
| 1016 | } | 1018 | } |
diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index d4bad6704bbe..448df2773377 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c | |||
| @@ -3552,6 +3552,8 @@ static int ohci1394_pci_resume (struct pci_dev *pdev) | |||
| 3552 | 3552 | ||
| 3553 | static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state) | 3553 | static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state) |
| 3554 | { | 3554 | { |
| 3555 | pci_save_state(pdev); | ||
| 3556 | |||
| 3555 | #ifdef CONFIG_PPC_PMAC | 3557 | #ifdef CONFIG_PPC_PMAC |
| 3556 | if (machine_is(powermac)) { | 3558 | if (machine_is(powermac)) { |
| 3557 | struct device_node *of_node; | 3559 | struct device_node *of_node; |
| @@ -3563,8 +3565,6 @@ static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state) | |||
| 3563 | } | 3565 | } |
| 3564 | #endif | 3566 | #endif |
| 3565 | 3567 | ||
| 3566 | pci_save_state(pdev); | ||
| 3567 | |||
| 3568 | return 0; | 3568 | return 0; |
| 3569 | } | 3569 | } |
| 3570 | 3570 | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index be48cedf986b..c54de989eb00 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
| @@ -255,7 +255,9 @@ static struct region *__rh_alloc(struct region_hash *rh, region_t region) | |||
| 255 | struct region *reg, *nreg; | 255 | struct region *reg, *nreg; |
| 256 | 256 | ||
| 257 | read_unlock(&rh->hash_lock); | 257 | read_unlock(&rh->hash_lock); |
| 258 | nreg = mempool_alloc(rh->region_pool, GFP_NOIO); | 258 | nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); |
| 259 | if (unlikely(!nreg)) | ||
| 260 | nreg = kmalloc(sizeof(struct region), GFP_NOIO); | ||
| 259 | nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? | 261 | nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? |
| 260 | RH_CLEAN : RH_NOSYNC; | 262 | RH_CLEAN : RH_NOSYNC; |
| 261 | nreg->rh = rh; | 263 | nreg->rh = rh; |
diff --git a/drivers/md/md.c b/drivers/md/md.c index b6d16022a53e..8dbab2ef3885 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -1597,6 +1597,19 @@ void md_update_sb(mddev_t * mddev) | |||
| 1597 | 1597 | ||
| 1598 | repeat: | 1598 | repeat: |
| 1599 | spin_lock_irq(&mddev->write_lock); | 1599 | spin_lock_irq(&mddev->write_lock); |
| 1600 | |||
| 1601 | if (mddev->degraded && mddev->sb_dirty == 3) | ||
| 1602 | /* If the array is degraded, then skipping spares is both | ||
| 1603 | * dangerous and fairly pointless. | ||
| 1604 | * Dangerous because a device that was removed from the array | ||
| 1605 | * might have a event_count that still looks up-to-date, | ||
| 1606 | * so it can be re-added without a resync. | ||
| 1607 | * Pointless because if there are any spares to skip, | ||
| 1608 | * then a recovery will happen and soon that array won't | ||
| 1609 | * be degraded any more and the spare can go back to sleep then. | ||
| 1610 | */ | ||
| 1611 | mddev->sb_dirty = 1; | ||
| 1612 | |||
| 1600 | sync_req = mddev->in_sync; | 1613 | sync_req = mddev->in_sync; |
| 1601 | mddev->utime = get_seconds(); | 1614 | mddev->utime = get_seconds(); |
| 1602 | if (mddev->sb_dirty == 3) | 1615 | if (mddev->sb_dirty == 3) |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1efe22a2d041..87bfe9e7d8ca 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -1625,15 +1625,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
| 1625 | return 0; | 1625 | return 0; |
| 1626 | } | 1626 | } |
| 1627 | 1627 | ||
| 1628 | /* before building a request, check if we can skip these blocks.. | ||
| 1629 | * This call the bitmap_start_sync doesn't actually record anything | ||
| 1630 | */ | ||
| 1631 | if (mddev->bitmap == NULL && | 1628 | if (mddev->bitmap == NULL && |
| 1632 | mddev->recovery_cp == MaxSector && | 1629 | mddev->recovery_cp == MaxSector && |
| 1630 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && | ||
| 1633 | conf->fullsync == 0) { | 1631 | conf->fullsync == 0) { |
| 1634 | *skipped = 1; | 1632 | *skipped = 1; |
| 1635 | return max_sector - sector_nr; | 1633 | return max_sector - sector_nr; |
| 1636 | } | 1634 | } |
| 1635 | /* before building a request, check if we can skip these blocks.. | ||
| 1636 | * This call the bitmap_start_sync doesn't actually record anything | ||
| 1637 | */ | ||
| 1637 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && | 1638 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && |
| 1638 | !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | 1639 | !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { |
| 1639 | /* We can skip this block, and probably several more */ | 1640 | /* We can skip this block, and probably several more */ |
diff --git a/drivers/mtd/nand/ams-delta.c b/drivers/mtd/nand/ams-delta.c index d7897dc6b3c8..a0ba07c36ee9 100644 --- a/drivers/mtd/nand/ams-delta.c +++ b/drivers/mtd/nand/ams-delta.c | |||
| @@ -130,11 +130,13 @@ static void ams_delta_hwcontrol(struct mtd_info *mtd, int cmd, | |||
| 130 | if (ctrl & NAND_CTRL_CHANGE) { | 130 | if (ctrl & NAND_CTRL_CHANGE) { |
| 131 | unsigned long bits; | 131 | unsigned long bits; |
| 132 | 132 | ||
| 133 | bits = (~ctrl & NAND_NCE) << 2; | 133 | bits = (~ctrl & NAND_NCE) ? AMS_DELTA_LATCH2_NAND_NCE : 0; |
| 134 | bits |= (ctrl & NAND_CLE) << 7; | 134 | bits |= (ctrl & NAND_CLE) ? AMS_DELTA_LATCH2_NAND_CLE : 0; |
| 135 | bits |= (ctrl & NAND_ALE) << 6; | 135 | bits |= (ctrl & NAND_ALE) ? AMS_DELTA_LATCH2_NAND_ALE : 0; |
| 136 | 136 | ||
| 137 | ams_delta_latch2_write(0xC2, bits); | 137 | ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_CLE | |
| 138 | AMS_DELTA_LATCH2_NAND_ALE | | ||
| 139 | AMS_DELTA_LATCH2_NAND_NCE, bits); | ||
| 138 | } | 140 | } |
| 139 | 141 | ||
| 140 | if (cmd != NAND_CMD_NONE) | 142 | if (cmd != NAND_CMD_NONE) |
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 62b861304e03..c8cbc00243fe 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c | |||
| @@ -1093,9 +1093,10 @@ static int nand_read(struct mtd_info *mtd, loff_t from, size_t len, | |||
| 1093 | 1093 | ||
| 1094 | ret = nand_do_read_ops(mtd, from, &chip->ops); | 1094 | ret = nand_do_read_ops(mtd, from, &chip->ops); |
| 1095 | 1095 | ||
| 1096 | *retlen = chip->ops.retlen; | ||
| 1097 | |||
| 1096 | nand_release_device(mtd); | 1098 | nand_release_device(mtd); |
| 1097 | 1099 | ||
| 1098 | *retlen = chip->ops.retlen; | ||
| 1099 | return ret; | 1100 | return ret; |
| 1100 | } | 1101 | } |
| 1101 | 1102 | ||
| @@ -1691,9 +1692,10 @@ static int nand_write(struct mtd_info *mtd, loff_t to, size_t len, | |||
| 1691 | 1692 | ||
| 1692 | ret = nand_do_write_ops(mtd, to, &chip->ops); | 1693 | ret = nand_do_write_ops(mtd, to, &chip->ops); |
| 1693 | 1694 | ||
| 1695 | *retlen = chip->ops.retlen; | ||
| 1696 | |||
| 1694 | nand_release_device(mtd); | 1697 | nand_release_device(mtd); |
| 1695 | 1698 | ||
| 1696 | *retlen = chip->ops.retlen; | ||
| 1697 | return ret; | 1699 | return ret; |
| 1698 | } | 1700 | } |
| 1699 | 1701 | ||
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index d6d1bff52b8e..2c7de79c83b9 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c | |||
| @@ -69,12 +69,12 @@ static void s3c_rtc_setaie(int to) | |||
| 69 | 69 | ||
| 70 | pr_debug("%s: aie=%d\n", __FUNCTION__, to); | 70 | pr_debug("%s: aie=%d\n", __FUNCTION__, to); |
| 71 | 71 | ||
| 72 | tmp = readb(S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN; | 72 | tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN; |
| 73 | 73 | ||
| 74 | if (to) | 74 | if (to) |
| 75 | tmp |= S3C2410_RTCALM_ALMEN; | 75 | tmp |= S3C2410_RTCALM_ALMEN; |
| 76 | 76 | ||
| 77 | writeb(tmp, S3C2410_RTCALM); | 77 | writeb(tmp, s3c_rtc_base + S3C2410_RTCALM); |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | static void s3c_rtc_setpie(int to) | 80 | static void s3c_rtc_setpie(int to) |
| @@ -84,12 +84,12 @@ static void s3c_rtc_setpie(int to) | |||
| 84 | pr_debug("%s: pie=%d\n", __FUNCTION__, to); | 84 | pr_debug("%s: pie=%d\n", __FUNCTION__, to); |
| 85 | 85 | ||
| 86 | spin_lock_irq(&s3c_rtc_pie_lock); | 86 | spin_lock_irq(&s3c_rtc_pie_lock); |
| 87 | tmp = readb(S3C2410_TICNT) & ~S3C2410_TICNT_ENABLE; | 87 | tmp = readb(s3c_rtc_base + S3C2410_TICNT) & ~S3C2410_TICNT_ENABLE; |
| 88 | 88 | ||
| 89 | if (to) | 89 | if (to) |
| 90 | tmp |= S3C2410_TICNT_ENABLE; | 90 | tmp |= S3C2410_TICNT_ENABLE; |
| 91 | 91 | ||
| 92 | writeb(tmp, S3C2410_TICNT); | 92 | writeb(tmp, s3c_rtc_base + S3C2410_TICNT); |
| 93 | spin_unlock_irq(&s3c_rtc_pie_lock); | 93 | spin_unlock_irq(&s3c_rtc_pie_lock); |
| 94 | } | 94 | } |
| 95 | 95 | ||
| @@ -98,13 +98,13 @@ static void s3c_rtc_setfreq(int freq) | |||
| 98 | unsigned int tmp; | 98 | unsigned int tmp; |
| 99 | 99 | ||
| 100 | spin_lock_irq(&s3c_rtc_pie_lock); | 100 | spin_lock_irq(&s3c_rtc_pie_lock); |
| 101 | tmp = readb(S3C2410_TICNT) & S3C2410_TICNT_ENABLE; | 101 | tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE; |
| 102 | 102 | ||
| 103 | s3c_rtc_freq = freq; | 103 | s3c_rtc_freq = freq; |
| 104 | 104 | ||
| 105 | tmp |= (128 / freq)-1; | 105 | tmp |= (128 / freq)-1; |
| 106 | 106 | ||
| 107 | writeb(tmp, S3C2410_TICNT); | 107 | writeb(tmp, s3c_rtc_base + S3C2410_TICNT); |
| 108 | spin_unlock_irq(&s3c_rtc_pie_lock); | 108 | spin_unlock_irq(&s3c_rtc_pie_lock); |
| 109 | } | 109 | } |
| 110 | 110 | ||
| @@ -113,14 +113,15 @@ static void s3c_rtc_setfreq(int freq) | |||
| 113 | static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) | 113 | static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) |
| 114 | { | 114 | { |
| 115 | unsigned int have_retried = 0; | 115 | unsigned int have_retried = 0; |
| 116 | void __iomem *base = s3c_rtc_base; | ||
| 116 | 117 | ||
| 117 | retry_get_time: | 118 | retry_get_time: |
| 118 | rtc_tm->tm_min = readb(S3C2410_RTCMIN); | 119 | rtc_tm->tm_min = readb(base + S3C2410_RTCMIN); |
| 119 | rtc_tm->tm_hour = readb(S3C2410_RTCHOUR); | 120 | rtc_tm->tm_hour = readb(base + S3C2410_RTCHOUR); |
| 120 | rtc_tm->tm_mday = readb(S3C2410_RTCDATE); | 121 | rtc_tm->tm_mday = readb(base + S3C2410_RTCDATE); |
| 121 | rtc_tm->tm_mon = readb(S3C2410_RTCMON); | 122 | rtc_tm->tm_mon = readb(base + S3C2410_RTCMON); |
| 122 | rtc_tm->tm_year = readb(S3C2410_RTCYEAR); | 123 | rtc_tm->tm_year = readb(base + S3C2410_RTCYEAR); |
| 123 | rtc_tm->tm_sec = readb(S3C2410_RTCSEC); | 124 | rtc_tm->tm_sec = readb(base + S3C2410_RTCSEC); |
| 124 | 125 | ||
| 125 | /* the only way to work out wether the system was mid-update | 126 | /* the only way to work out wether the system was mid-update |
| 126 | * when we read it is to check the second counter, and if it | 127 | * when we read it is to check the second counter, and if it |
| @@ -151,17 +152,26 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) | |||
| 151 | 152 | ||
| 152 | static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) | 153 | static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) |
| 153 | { | 154 | { |
| 154 | /* the rtc gets round the y2k problem by just not supporting it */ | 155 | void __iomem *base = s3c_rtc_base; |
| 156 | int year = tm->tm_year - 100; | ||
| 155 | 157 | ||
| 156 | if (tm->tm_year < 100) | 158 | pr_debug("set time %02d.%02d.%02d %02d/%02d/%02d\n", |
| 159 | tm->tm_year, tm->tm_mon, tm->tm_mday, | ||
| 160 | tm->tm_hour, tm->tm_min, tm->tm_sec); | ||
| 161 | |||
| 162 | /* we get around y2k by simply not supporting it */ | ||
| 163 | |||
| 164 | if (year < 0 || year >= 100) { | ||
| 165 | dev_err(dev, "rtc only supports 100 years\n"); | ||
| 157 | return -EINVAL; | 166 | return -EINVAL; |
| 167 | } | ||
| 158 | 168 | ||
| 159 | writeb(BIN2BCD(tm->tm_sec), S3C2410_RTCSEC); | 169 | writeb(BIN2BCD(tm->tm_sec), base + S3C2410_RTCSEC); |
| 160 | writeb(BIN2BCD(tm->tm_min), S3C2410_RTCMIN); | 170 | writeb(BIN2BCD(tm->tm_min), base + S3C2410_RTCMIN); |
| 161 | writeb(BIN2BCD(tm->tm_hour), S3C2410_RTCHOUR); | 171 | writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR); |
| 162 | writeb(BIN2BCD(tm->tm_mday), S3C2410_RTCDATE); | 172 | writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE); |
| 163 | writeb(BIN2BCD(tm->tm_mon + 1), S3C2410_RTCMON); | 173 | writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON); |
| 164 | writeb(BIN2BCD(tm->tm_year - 100), S3C2410_RTCYEAR); | 174 | writeb(BIN2BCD(year), base + S3C2410_RTCYEAR); |
| 165 | 175 | ||
| 166 | return 0; | 176 | return 0; |
| 167 | } | 177 | } |
| @@ -169,16 +179,17 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) | |||
| 169 | static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) | 179 | static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) |
| 170 | { | 180 | { |
| 171 | struct rtc_time *alm_tm = &alrm->time; | 181 | struct rtc_time *alm_tm = &alrm->time; |
| 182 | void __iomem *base = s3c_rtc_base; | ||
| 172 | unsigned int alm_en; | 183 | unsigned int alm_en; |
| 173 | 184 | ||
| 174 | alm_tm->tm_sec = readb(S3C2410_ALMSEC); | 185 | alm_tm->tm_sec = readb(base + S3C2410_ALMSEC); |
| 175 | alm_tm->tm_min = readb(S3C2410_ALMMIN); | 186 | alm_tm->tm_min = readb(base + S3C2410_ALMMIN); |
| 176 | alm_tm->tm_hour = readb(S3C2410_ALMHOUR); | 187 | alm_tm->tm_hour = readb(base + S3C2410_ALMHOUR); |
| 177 | alm_tm->tm_mon = readb(S3C2410_ALMMON); | 188 | alm_tm->tm_mon = readb(base + S3C2410_ALMMON); |
| 178 | alm_tm->tm_mday = readb(S3C2410_ALMDATE); | 189 | alm_tm->tm_mday = readb(base + S3C2410_ALMDATE); |
| 179 | alm_tm->tm_year = readb(S3C2410_ALMYEAR); | 190 | alm_tm->tm_year = readb(base + S3C2410_ALMYEAR); |
| 180 | 191 | ||
| 181 | alm_en = readb(S3C2410_RTCALM); | 192 | alm_en = readb(base + S3C2410_RTCALM); |
| 182 | 193 | ||
| 183 | pr_debug("read alarm %02x %02x.%02x.%02x %02x/%02x/%02x\n", | 194 | pr_debug("read alarm %02x %02x.%02x.%02x %02x/%02x/%02x\n", |
| 184 | alm_en, | 195 | alm_en, |
| @@ -226,6 +237,7 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) | |||
| 226 | static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) | 237 | static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) |
| 227 | { | 238 | { |
| 228 | struct rtc_time *tm = &alrm->time; | 239 | struct rtc_time *tm = &alrm->time; |
| 240 | void __iomem *base = s3c_rtc_base; | ||
| 229 | unsigned int alrm_en; | 241 | unsigned int alrm_en; |
| 230 | 242 | ||
| 231 | pr_debug("s3c_rtc_setalarm: %d, %02x/%02x/%02x %02x.%02x.%02x\n", | 243 | pr_debug("s3c_rtc_setalarm: %d, %02x/%02x/%02x %02x.%02x.%02x\n", |
| @@ -234,32 +246,32 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) | |||
| 234 | tm->tm_hour & 0xff, tm->tm_min & 0xff, tm->tm_sec); | 246 | tm->tm_hour & 0xff, tm->tm_min & 0xff, tm->tm_sec); |
| 235 | 247 | ||
| 236 | 248 | ||
| 237 | alrm_en = readb(S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN; | 249 | alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN; |
| 238 | writeb(0x00, S3C2410_RTCALM); | 250 | writeb(0x00, base + S3C2410_RTCALM); |
| 239 | 251 | ||
| 240 | if (tm->tm_sec < 60 && tm->tm_sec >= 0) { | 252 | if (tm->tm_sec < 60 && tm->tm_sec >= 0) { |
| 241 | alrm_en |= S3C2410_RTCALM_SECEN; | 253 | alrm_en |= S3C2410_RTCALM_SECEN; |
| 242 | writeb(BIN2BCD(tm->tm_sec), S3C2410_ALMSEC); | 254 | writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC); |
| 243 | } | 255 | } |
| 244 | 256 | ||
| 245 | if (tm->tm_min < 60 && tm->tm_min >= 0) { | 257 | if (tm->tm_min < 60 && tm->tm_min >= 0) { |
| 246 | alrm_en |= S3C2410_RTCALM_MINEN; | 258 | alrm_en |= S3C2410_RTCALM_MINEN; |
| 247 | writeb(BIN2BCD(tm->tm_min), S3C2410_ALMMIN); | 259 | writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN); |
| 248 | } | 260 | } |
| 249 | 261 | ||
| 250 | if (tm->tm_hour < 24 && tm->tm_hour >= 0) { | 262 | if (tm->tm_hour < 24 && tm->tm_hour >= 0) { |
| 251 | alrm_en |= S3C2410_RTCALM_HOUREN; | 263 | alrm_en |= S3C2410_RTCALM_HOUREN; |
| 252 | writeb(BIN2BCD(tm->tm_hour), S3C2410_ALMHOUR); | 264 | writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR); |
| 253 | } | 265 | } |
| 254 | 266 | ||
| 255 | pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en); | 267 | pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en); |
| 256 | 268 | ||
| 257 | writeb(alrm_en, S3C2410_RTCALM); | 269 | writeb(alrm_en, base + S3C2410_RTCALM); |
| 258 | 270 | ||
| 259 | if (0) { | 271 | if (0) { |
| 260 | alrm_en = readb(S3C2410_RTCALM); | 272 | alrm_en = readb(base + S3C2410_RTCALM); |
| 261 | alrm_en &= ~S3C2410_RTCALM_ALMEN; | 273 | alrm_en &= ~S3C2410_RTCALM_ALMEN; |
| 262 | writeb(alrm_en, S3C2410_RTCALM); | 274 | writeb(alrm_en, base + S3C2410_RTCALM); |
| 263 | disable_irq_wake(s3c_rtc_alarmno); | 275 | disable_irq_wake(s3c_rtc_alarmno); |
| 264 | } | 276 | } |
| 265 | 277 | ||
| @@ -319,8 +331,8 @@ static int s3c_rtc_ioctl(struct device *dev, | |||
| 319 | 331 | ||
| 320 | static int s3c_rtc_proc(struct device *dev, struct seq_file *seq) | 332 | static int s3c_rtc_proc(struct device *dev, struct seq_file *seq) |
| 321 | { | 333 | { |
| 322 | unsigned int rtcalm = readb(S3C2410_RTCALM); | 334 | unsigned int rtcalm = readb(s3c_rtc_base + S3C2410_RTCALM); |
| 323 | unsigned int ticnt = readb (S3C2410_TICNT); | 335 | unsigned int ticnt = readb(s3c_rtc_base + S3C2410_TICNT); |
| 324 | 336 | ||
| 325 | seq_printf(seq, "alarm_IRQ\t: %s\n", | 337 | seq_printf(seq, "alarm_IRQ\t: %s\n", |
| 326 | (rtcalm & S3C2410_RTCALM_ALMEN) ? "yes" : "no" ); | 338 | (rtcalm & S3C2410_RTCALM_ALMEN) ? "yes" : "no" ); |
| @@ -387,39 +399,40 @@ static struct rtc_class_ops s3c_rtcops = { | |||
| 387 | 399 | ||
| 388 | static void s3c_rtc_enable(struct platform_device *pdev, int en) | 400 | static void s3c_rtc_enable(struct platform_device *pdev, int en) |
| 389 | { | 401 | { |
| 402 | void __iomem *base = s3c_rtc_base; | ||
| 390 | unsigned int tmp; | 403 | unsigned int tmp; |
| 391 | 404 | ||
| 392 | if (s3c_rtc_base == NULL) | 405 | if (s3c_rtc_base == NULL) |
| 393 | return; | 406 | return; |
| 394 | 407 | ||
| 395 | if (!en) { | 408 | if (!en) { |
| 396 | tmp = readb(S3C2410_RTCCON); | 409 | tmp = readb(base + S3C2410_RTCCON); |
| 397 | writeb(tmp & ~S3C2410_RTCCON_RTCEN, S3C2410_RTCCON); | 410 | writeb(tmp & ~S3C2410_RTCCON_RTCEN, base + S3C2410_RTCCON); |
| 398 | 411 | ||
| 399 | tmp = readb(S3C2410_TICNT); | 412 | tmp = readb(base + S3C2410_TICNT); |
| 400 | writeb(tmp & ~S3C2410_TICNT_ENABLE, S3C2410_TICNT); | 413 | writeb(tmp & ~S3C2410_TICNT_ENABLE, base + S3C2410_TICNT); |
| 401 | } else { | 414 | } else { |
| 402 | /* re-enable the device, and check it is ok */ | 415 | /* re-enable the device, and check it is ok */ |
| 403 | 416 | ||
| 404 | if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0){ | 417 | if ((readb(base+S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0){ |
| 405 | dev_info(&pdev->dev, "rtc disabled, re-enabling\n"); | 418 | dev_info(&pdev->dev, "rtc disabled, re-enabling\n"); |
| 406 | 419 | ||
| 407 | tmp = readb(S3C2410_RTCCON); | 420 | tmp = readb(base + S3C2410_RTCCON); |
| 408 | writeb(tmp | S3C2410_RTCCON_RTCEN , S3C2410_RTCCON); | 421 | writeb(tmp|S3C2410_RTCCON_RTCEN, base+S3C2410_RTCCON); |
| 409 | } | 422 | } |
| 410 | 423 | ||
| 411 | if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)){ | 424 | if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)){ |
| 412 | dev_info(&pdev->dev, "removing RTCCON_CNTSEL\n"); | 425 | dev_info(&pdev->dev, "removing RTCCON_CNTSEL\n"); |
| 413 | 426 | ||
| 414 | tmp = readb(S3C2410_RTCCON); | 427 | tmp = readb(base + S3C2410_RTCCON); |
| 415 | writeb(tmp& ~S3C2410_RTCCON_CNTSEL , S3C2410_RTCCON); | 428 | writeb(tmp& ~S3C2410_RTCCON_CNTSEL, base+S3C2410_RTCCON); |
| 416 | } | 429 | } |
| 417 | 430 | ||
| 418 | if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)){ | 431 | if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)){ |
| 419 | dev_info(&pdev->dev, "removing RTCCON_CLKRST\n"); | 432 | dev_info(&pdev->dev, "removing RTCCON_CLKRST\n"); |
| 420 | 433 | ||
| 421 | tmp = readb(S3C2410_RTCCON); | 434 | tmp = readb(base + S3C2410_RTCCON); |
| 422 | writeb(tmp & ~S3C2410_RTCCON_CLKRST, S3C2410_RTCCON); | 435 | writeb(tmp & ~S3C2410_RTCCON_CLKRST, base+S3C2410_RTCCON); |
| 423 | } | 436 | } |
| 424 | } | 437 | } |
| 425 | } | 438 | } |
| @@ -475,8 +488,8 @@ static int s3c_rtc_probe(struct platform_device *pdev) | |||
| 475 | } | 488 | } |
| 476 | 489 | ||
| 477 | s3c_rtc_mem = request_mem_region(res->start, | 490 | s3c_rtc_mem = request_mem_region(res->start, |
| 478 | res->end-res->start+1, | 491 | res->end-res->start+1, |
| 479 | pdev->name); | 492 | pdev->name); |
| 480 | 493 | ||
| 481 | if (s3c_rtc_mem == NULL) { | 494 | if (s3c_rtc_mem == NULL) { |
| 482 | dev_err(&pdev->dev, "failed to reserve memory region\n"); | 495 | dev_err(&pdev->dev, "failed to reserve memory region\n"); |
| @@ -495,7 +508,8 @@ static int s3c_rtc_probe(struct platform_device *pdev) | |||
| 495 | 508 | ||
| 496 | s3c_rtc_enable(pdev, 1); | 509 | s3c_rtc_enable(pdev, 1); |
| 497 | 510 | ||
| 498 | pr_debug("s3c2410_rtc: RTCCON=%02x\n", readb(S3C2410_RTCCON)); | 511 | pr_debug("s3c2410_rtc: RTCCON=%02x\n", |
| 512 | readb(s3c_rtc_base + S3C2410_RTCCON)); | ||
| 499 | 513 | ||
| 500 | s3c_rtc_setfreq(s3c_rtc_freq); | 514 | s3c_rtc_setfreq(s3c_rtc_freq); |
| 501 | 515 | ||
| @@ -543,7 +557,7 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state) | |||
| 543 | 557 | ||
| 544 | /* save TICNT for anyone using periodic interrupts */ | 558 | /* save TICNT for anyone using periodic interrupts */ |
| 545 | 559 | ||
| 546 | ticnt_save = readb(S3C2410_TICNT); | 560 | ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT); |
| 547 | 561 | ||
| 548 | /* calculate time delta for suspend */ | 562 | /* calculate time delta for suspend */ |
| 549 | 563 | ||
| @@ -567,7 +581,7 @@ static int s3c_rtc_resume(struct platform_device *pdev) | |||
| 567 | rtc_tm_to_time(&tm, &time.tv_sec); | 581 | rtc_tm_to_time(&tm, &time.tv_sec); |
| 568 | restore_time_delta(&s3c_rtc_delta, &time); | 582 | restore_time_delta(&s3c_rtc_delta, &time); |
| 569 | 583 | ||
| 570 | writeb(ticnt_save, S3C2410_TICNT); | 584 | writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT); |
| 571 | return 0; | 585 | return 0; |
| 572 | } | 586 | } |
| 573 | #else | 587 | #else |
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index f7b5d7372d26..94d1de55607f 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c | |||
| @@ -517,7 +517,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive) | |||
| 517 | /* No more interrupts */ | 517 | /* No more interrupts */ |
| 518 | if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) | 518 | if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) |
| 519 | printk (KERN_INFO "Packet command completed, %d bytes transferred\n", pc->actually_transferred); | 519 | printk (KERN_INFO "Packet command completed, %d bytes transferred\n", pc->actually_transferred); |
| 520 | local_irq_enable(); | 520 | local_irq_enable_in_hardirq(); |
| 521 | if (status.b.check) | 521 | if (status.b.check) |
| 522 | rq->errors++; | 522 | rq->errors++; |
| 523 | idescsi_end_request (drive, 1, 0); | 523 | idescsi_end_request (drive, 1, 0); |
diff --git a/drivers/video/imacfb.c b/drivers/video/imacfb.c index b485bece5fc9..18ea4a549105 100644 --- a/drivers/video/imacfb.c +++ b/drivers/video/imacfb.c | |||
| @@ -71,10 +71,10 @@ static int set_system(struct dmi_system_id *id) | |||
| 71 | static struct dmi_system_id __initdata dmi_system_table[] = { | 71 | static struct dmi_system_id __initdata dmi_system_table[] = { |
| 72 | { set_system, "iMac4,1", { | 72 | { set_system, "iMac4,1", { |
| 73 | DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."), | 73 | DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."), |
| 74 | DMI_MATCH(DMI_BIOS_VERSION,"iMac4,1") }, (void*)M_I17}, | 74 | DMI_MATCH(DMI_PRODUCT_NAME,"iMac4,1") }, (void*)M_I17}, |
| 75 | { set_system, "MacBookPro1,1", { | 75 | { set_system, "MacBookPro1,1", { |
| 76 | DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."), | 76 | DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."), |
| 77 | DMI_MATCH(DMI_BIOS_VERSION,"MacBookPro1,1") }, (void*)M_I17}, | 77 | DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro1,1") }, (void*)M_I17}, |
| 78 | { set_system, "MacBook1,1", { | 78 | { set_system, "MacBook1,1", { |
| 79 | DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."), | 79 | DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."), |
| 80 | DMI_MATCH(DMI_PRODUCT_NAME,"MacBook1,1")}, (void *)M_MACBOOK}, | 80 | DMI_MATCH(DMI_PRODUCT_NAME,"MacBook1,1")}, (void *)M_MACBOOK}, |
diff --git a/drivers/video/matrox/g450_pll.c b/drivers/video/matrox/g450_pll.c index 440272ad10e7..7c76e079ca7d 100644 --- a/drivers/video/matrox/g450_pll.c +++ b/drivers/video/matrox/g450_pll.c | |||
| @@ -331,7 +331,15 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll, | |||
| 331 | tmp |= M1064_XPIXCLKCTRL_PLL_UP; | 331 | tmp |= M1064_XPIXCLKCTRL_PLL_UP; |
| 332 | } | 332 | } |
| 333 | matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp); | 333 | matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp); |
| 334 | #ifdef __powerpc__ | ||
| 335 | /* This is necessary to avoid jitter on PowerPC | ||
| 336 | * (OpenFirmware) systems, but apparently | ||
| 337 | * introduces jitter, at least on a x86-64 | ||
| 338 | * using DVI. | ||
| 339 | * A simple workaround is disable for non-PPC. | ||
| 340 | */ | ||
| 334 | matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL, 0); | 341 | matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL, 0); |
| 342 | #endif /* __powerpc__ */ | ||
| 335 | matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, xpwrctrl); | 343 | matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, xpwrctrl); |
| 336 | 344 | ||
| 337 | matroxfb_DAC_unlock_irqrestore(flags); | 345 | matroxfb_DAC_unlock_irqrestore(flags); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 37534573960b..045f98854f14 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -884,6 +884,61 @@ void bd_set_size(struct block_device *bdev, loff_t size) | |||
| 884 | } | 884 | } |
| 885 | EXPORT_SYMBOL(bd_set_size); | 885 | EXPORT_SYMBOL(bd_set_size); |
| 886 | 886 | ||
| 887 | static int __blkdev_put(struct block_device *bdev, unsigned int subclass) | ||
| 888 | { | ||
| 889 | int ret = 0; | ||
| 890 | struct inode *bd_inode = bdev->bd_inode; | ||
| 891 | struct gendisk *disk = bdev->bd_disk; | ||
| 892 | |||
| 893 | mutex_lock_nested(&bdev->bd_mutex, subclass); | ||
| 894 | lock_kernel(); | ||
| 895 | if (!--bdev->bd_openers) { | ||
| 896 | sync_blockdev(bdev); | ||
| 897 | kill_bdev(bdev); | ||
| 898 | } | ||
| 899 | if (bdev->bd_contains == bdev) { | ||
| 900 | if (disk->fops->release) | ||
| 901 | ret = disk->fops->release(bd_inode, NULL); | ||
| 902 | } else { | ||
| 903 | mutex_lock_nested(&bdev->bd_contains->bd_mutex, | ||
| 904 | subclass + 1); | ||
| 905 | bdev->bd_contains->bd_part_count--; | ||
| 906 | mutex_unlock(&bdev->bd_contains->bd_mutex); | ||
| 907 | } | ||
| 908 | if (!bdev->bd_openers) { | ||
| 909 | struct module *owner = disk->fops->owner; | ||
| 910 | |||
| 911 | put_disk(disk); | ||
| 912 | module_put(owner); | ||
| 913 | |||
| 914 | if (bdev->bd_contains != bdev) { | ||
| 915 | kobject_put(&bdev->bd_part->kobj); | ||
| 916 | bdev->bd_part = NULL; | ||
| 917 | } | ||
| 918 | bdev->bd_disk = NULL; | ||
| 919 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | ||
| 920 | if (bdev != bdev->bd_contains) | ||
| 921 | __blkdev_put(bdev->bd_contains, subclass + 1); | ||
| 922 | bdev->bd_contains = NULL; | ||
| 923 | } | ||
| 924 | unlock_kernel(); | ||
| 925 | mutex_unlock(&bdev->bd_mutex); | ||
| 926 | bdput(bdev); | ||
| 927 | return ret; | ||
| 928 | } | ||
| 929 | |||
| 930 | int blkdev_put(struct block_device *bdev) | ||
| 931 | { | ||
| 932 | return __blkdev_put(bdev, BD_MUTEX_NORMAL); | ||
| 933 | } | ||
| 934 | EXPORT_SYMBOL(blkdev_put); | ||
| 935 | |||
| 936 | int blkdev_put_partition(struct block_device *bdev) | ||
| 937 | { | ||
| 938 | return __blkdev_put(bdev, BD_MUTEX_PARTITION); | ||
| 939 | } | ||
| 940 | EXPORT_SYMBOL(blkdev_put_partition); | ||
| 941 | |||
| 887 | static int | 942 | static int |
| 888 | blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); | 943 | blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); |
| 889 | 944 | ||
| @@ -980,7 +1035,7 @@ out_first: | |||
| 980 | bdev->bd_disk = NULL; | 1035 | bdev->bd_disk = NULL; |
| 981 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1036 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
| 982 | if (bdev != bdev->bd_contains) | 1037 | if (bdev != bdev->bd_contains) |
| 983 | blkdev_put(bdev->bd_contains); | 1038 | __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE); |
| 984 | bdev->bd_contains = NULL; | 1039 | bdev->bd_contains = NULL; |
| 985 | put_disk(disk); | 1040 | put_disk(disk); |
| 986 | module_put(owner); | 1041 | module_put(owner); |
| @@ -1079,63 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp) | |||
| 1079 | return res; | 1134 | return res; |
| 1080 | } | 1135 | } |
| 1081 | 1136 | ||
| 1082 | static int __blkdev_put(struct block_device *bdev, unsigned int subclass) | ||
| 1083 | { | ||
| 1084 | int ret = 0; | ||
| 1085 | struct inode *bd_inode = bdev->bd_inode; | ||
| 1086 | struct gendisk *disk = bdev->bd_disk; | ||
| 1087 | |||
| 1088 | mutex_lock_nested(&bdev->bd_mutex, subclass); | ||
| 1089 | lock_kernel(); | ||
| 1090 | if (!--bdev->bd_openers) { | ||
| 1091 | sync_blockdev(bdev); | ||
| 1092 | kill_bdev(bdev); | ||
| 1093 | } | ||
| 1094 | if (bdev->bd_contains == bdev) { | ||
| 1095 | if (disk->fops->release) | ||
| 1096 | ret = disk->fops->release(bd_inode, NULL); | ||
| 1097 | } else { | ||
| 1098 | mutex_lock_nested(&bdev->bd_contains->bd_mutex, | ||
| 1099 | subclass + 1); | ||
| 1100 | bdev->bd_contains->bd_part_count--; | ||
| 1101 | mutex_unlock(&bdev->bd_contains->bd_mutex); | ||
| 1102 | } | ||
| 1103 | if (!bdev->bd_openers) { | ||
| 1104 | struct module *owner = disk->fops->owner; | ||
| 1105 | |||
| 1106 | put_disk(disk); | ||
| 1107 | module_put(owner); | ||
| 1108 | |||
| 1109 | if (bdev->bd_contains != bdev) { | ||
| 1110 | kobject_put(&bdev->bd_part->kobj); | ||
| 1111 | bdev->bd_part = NULL; | ||
| 1112 | } | ||
| 1113 | bdev->bd_disk = NULL; | ||
| 1114 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | ||
| 1115 | if (bdev != bdev->bd_contains) | ||
| 1116 | __blkdev_put(bdev->bd_contains, subclass + 1); | ||
| 1117 | bdev->bd_contains = NULL; | ||
| 1118 | } | ||
| 1119 | unlock_kernel(); | ||
| 1120 | mutex_unlock(&bdev->bd_mutex); | ||
| 1121 | bdput(bdev); | ||
| 1122 | return ret; | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | int blkdev_put(struct block_device *bdev) | ||
| 1126 | { | ||
| 1127 | return __blkdev_put(bdev, BD_MUTEX_NORMAL); | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | EXPORT_SYMBOL(blkdev_put); | ||
| 1131 | |||
| 1132 | int blkdev_put_partition(struct block_device *bdev) | ||
| 1133 | { | ||
| 1134 | return __blkdev_put(bdev, BD_MUTEX_PARTITION); | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | EXPORT_SYMBOL(blkdev_put_partition); | ||
| 1138 | |||
| 1139 | static int blkdev_close(struct inode * inode, struct file * filp) | 1137 | static int blkdev_close(struct inode * inode, struct file * filp) |
| 1140 | { | 1138 | { |
| 1141 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 1139 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 19ffb043abbc..3a3567433b92 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
| @@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) | |||
| 1168 | eexit_1: | 1168 | eexit_1: |
| 1169 | 1169 | ||
| 1170 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", | 1170 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", |
| 1171 | current, ep, epi->file, error)); | 1171 | current, ep, epi->ffd.file, error)); |
| 1172 | 1172 | ||
| 1173 | return error; | 1173 | return error; |
| 1174 | } | 1174 | } |
| @@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
| 1236 | struct eventpoll *ep = epi->ep; | 1236 | struct eventpoll *ep = epi->ep; |
| 1237 | 1237 | ||
| 1238 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", | 1238 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", |
| 1239 | current, epi->file, epi, ep)); | 1239 | current, epi->ffd.file, epi, ep)); |
| 1240 | 1240 | ||
| 1241 | write_lock_irqsave(&ep->lock, flags); | 1241 | write_lock_irqsave(&ep->lock, flags); |
| 1242 | 1242 | ||
| @@ -751,7 +751,7 @@ no_thread_group: | |||
| 751 | 751 | ||
| 752 | write_lock_irq(&tasklist_lock); | 752 | write_lock_irq(&tasklist_lock); |
| 753 | spin_lock(&oldsighand->siglock); | 753 | spin_lock(&oldsighand->siglock); |
| 754 | spin_lock(&newsighand->siglock); | 754 | spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING); |
| 755 | 755 | ||
| 756 | rcu_assign_pointer(current->sighand, newsighand); | 756 | rcu_assign_pointer(current->sighand, newsighand); |
| 757 | recalc_sigpending(); | 757 | recalc_sigpending(); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f2702cda9779..681dea8f9532 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
| @@ -775,7 +775,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 775 | if (EXT2_INODE_SIZE(sb) == 0) | 775 | if (EXT2_INODE_SIZE(sb) == 0) |
| 776 | goto cantfind_ext2; | 776 | goto cantfind_ext2; |
| 777 | sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); | 777 | sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); |
| 778 | if (sbi->s_inodes_per_block == 0) | 778 | if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0) |
| 779 | goto cantfind_ext2; | 779 | goto cantfind_ext2; |
| 780 | sbi->s_itb_per_group = sbi->s_inodes_per_group / | 780 | sbi->s_itb_per_group = sbi->s_inodes_per_group / |
| 781 | sbi->s_inodes_per_block; | 781 | sbi->s_inodes_per_block; |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a504a40d6d29..063d994bda0b 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
| @@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, | |||
| 1269 | goal = le32_to_cpu(es->s_first_data_block); | 1269 | goal = le32_to_cpu(es->s_first_data_block); |
| 1270 | group_no = (goal - le32_to_cpu(es->s_first_data_block)) / | 1270 | group_no = (goal - le32_to_cpu(es->s_first_data_block)) / |
| 1271 | EXT3_BLOCKS_PER_GROUP(sb); | 1271 | EXT3_BLOCKS_PER_GROUP(sb); |
| 1272 | goal_group = group_no; | ||
| 1273 | retry_alloc: | ||
| 1272 | gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); | 1274 | gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); |
| 1273 | if (!gdp) | 1275 | if (!gdp) |
| 1274 | goto io_error; | 1276 | goto io_error; |
| 1275 | 1277 | ||
| 1276 | goal_group = group_no; | ||
| 1277 | retry: | ||
| 1278 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | 1278 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); |
| 1279 | /* | 1279 | /* |
| 1280 | * if there is not enough free blocks to make a new resevation | 1280 | * if there is not enough free blocks to make a new resevation |
| @@ -1349,7 +1349,7 @@ retry: | |||
| 1349 | if (my_rsv) { | 1349 | if (my_rsv) { |
| 1350 | my_rsv = NULL; | 1350 | my_rsv = NULL; |
| 1351 | group_no = goal_group; | 1351 | group_no = goal_group; |
| 1352 | goto retry; | 1352 | goto retry_alloc; |
| 1353 | } | 1353 | } |
| 1354 | /* No space left on the device */ | 1354 | /* No space left on the device */ |
| 1355 | *errp = -ENOSPC; | 1355 | *errp = -ENOSPC; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 0971814c38b8..42da60784311 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
| @@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal) | |||
| 261 | struct buffer_head *bh = jh2bh(jh); | 261 | struct buffer_head *bh = jh2bh(jh); |
| 262 | 262 | ||
| 263 | jbd_lock_bh_state(bh); | 263 | jbd_lock_bh_state(bh); |
| 264 | kfree(jh->b_committed_data); | 264 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
| 265 | jh->b_committed_data = NULL; | 265 | jh->b_committed_data = NULL; |
| 266 | jbd_unlock_bh_state(bh); | 266 | jbd_unlock_bh_state(bh); |
| 267 | } | 267 | } |
| @@ -745,14 +745,14 @@ restart_loop: | |||
| 745 | * Otherwise, we can just throw away the frozen data now. | 745 | * Otherwise, we can just throw away the frozen data now. |
| 746 | */ | 746 | */ |
| 747 | if (jh->b_committed_data) { | 747 | if (jh->b_committed_data) { |
| 748 | kfree(jh->b_committed_data); | 748 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
| 749 | jh->b_committed_data = NULL; | 749 | jh->b_committed_data = NULL; |
| 750 | if (jh->b_frozen_data) { | 750 | if (jh->b_frozen_data) { |
| 751 | jh->b_committed_data = jh->b_frozen_data; | 751 | jh->b_committed_data = jh->b_frozen_data; |
| 752 | jh->b_frozen_data = NULL; | 752 | jh->b_frozen_data = NULL; |
| 753 | } | 753 | } |
| 754 | } else if (jh->b_frozen_data) { | 754 | } else if (jh->b_frozen_data) { |
| 755 | kfree(jh->b_frozen_data); | 755 | jbd_slab_free(jh->b_frozen_data, bh->b_size); |
| 756 | jh->b_frozen_data = NULL; | 756 | jh->b_frozen_data = NULL; |
| 757 | } | 757 | } |
| 758 | 758 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 8c9b28dff119..f66724ce443a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
| @@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit); | |||
| 84 | 84 | ||
| 85 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 85 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
| 86 | static void __journal_abort_soft (journal_t *journal, int errno); | 86 | static void __journal_abort_soft (journal_t *journal, int errno); |
| 87 | static int journal_create_jbd_slab(size_t slab_size); | ||
| 87 | 88 | ||
| 88 | /* | 89 | /* |
| 89 | * Helper function used to manage commit timeouts | 90 | * Helper function used to manage commit timeouts |
| @@ -328,10 +329,10 @@ repeat: | |||
| 328 | char *tmp; | 329 | char *tmp; |
| 329 | 330 | ||
| 330 | jbd_unlock_bh_state(bh_in); | 331 | jbd_unlock_bh_state(bh_in); |
| 331 | tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); | 332 | tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); |
| 332 | jbd_lock_bh_state(bh_in); | 333 | jbd_lock_bh_state(bh_in); |
| 333 | if (jh_in->b_frozen_data) { | 334 | if (jh_in->b_frozen_data) { |
| 334 | kfree(tmp); | 335 | jbd_slab_free(tmp, bh_in->b_size); |
| 335 | goto repeat; | 336 | goto repeat; |
| 336 | } | 337 | } |
| 337 | 338 | ||
| @@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal) | |||
| 1069 | int journal_load(journal_t *journal) | 1070 | int journal_load(journal_t *journal) |
| 1070 | { | 1071 | { |
| 1071 | int err; | 1072 | int err; |
| 1073 | journal_superblock_t *sb; | ||
| 1072 | 1074 | ||
| 1073 | err = load_superblock(journal); | 1075 | err = load_superblock(journal); |
| 1074 | if (err) | 1076 | if (err) |
| 1075 | return err; | 1077 | return err; |
| 1076 | 1078 | ||
| 1079 | sb = journal->j_superblock; | ||
| 1077 | /* If this is a V2 superblock, then we have to check the | 1080 | /* If this is a V2 superblock, then we have to check the |
| 1078 | * features flags on it. */ | 1081 | * features flags on it. */ |
| 1079 | 1082 | ||
| 1080 | if (journal->j_format_version >= 2) { | 1083 | if (journal->j_format_version >= 2) { |
| 1081 | journal_superblock_t *sb = journal->j_superblock; | ||
| 1082 | |||
| 1083 | if ((sb->s_feature_ro_compat & | 1084 | if ((sb->s_feature_ro_compat & |
| 1084 | ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || | 1085 | ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || |
| 1085 | (sb->s_feature_incompat & | 1086 | (sb->s_feature_incompat & |
| @@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal) | |||
| 1090 | } | 1091 | } |
| 1091 | } | 1092 | } |
| 1092 | 1093 | ||
| 1094 | /* | ||
| 1095 | * Create a slab for this blocksize | ||
| 1096 | */ | ||
| 1097 | err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); | ||
| 1098 | if (err) | ||
| 1099 | return err; | ||
| 1100 | |||
| 1093 | /* Let the recovery code check whether it needs to recover any | 1101 | /* Let the recovery code check whether it needs to recover any |
| 1094 | * data from the journal. */ | 1102 | * data from the journal. */ |
| 1095 | if (journal_recover(journal)) | 1103 | if (journal_recover(journal)) |
| @@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) | |||
| 1612 | } | 1620 | } |
| 1613 | 1621 | ||
| 1614 | /* | 1622 | /* |
| 1623 | * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed | ||
| 1624 | * and allocate frozen and commit buffers from these slabs. | ||
| 1625 | * | ||
| 1626 | * Reason for doing this is to avoid, SLAB_DEBUG - since it could | ||
| 1627 | * cause bh to cross page boundary. | ||
| 1628 | */ | ||
| 1629 | |||
| 1630 | #define JBD_MAX_SLABS 5 | ||
| 1631 | #define JBD_SLAB_INDEX(size) (size >> 11) | ||
| 1632 | |||
| 1633 | static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; | ||
| 1634 | static const char *jbd_slab_names[JBD_MAX_SLABS] = { | ||
| 1635 | "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" | ||
| 1636 | }; | ||
| 1637 | |||
| 1638 | static void journal_destroy_jbd_slabs(void) | ||
| 1639 | { | ||
| 1640 | int i; | ||
| 1641 | |||
| 1642 | for (i = 0; i < JBD_MAX_SLABS; i++) { | ||
| 1643 | if (jbd_slab[i]) | ||
| 1644 | kmem_cache_destroy(jbd_slab[i]); | ||
| 1645 | jbd_slab[i] = NULL; | ||
| 1646 | } | ||
| 1647 | } | ||
| 1648 | |||
| 1649 | static int journal_create_jbd_slab(size_t slab_size) | ||
| 1650 | { | ||
| 1651 | int i = JBD_SLAB_INDEX(slab_size); | ||
| 1652 | |||
| 1653 | BUG_ON(i >= JBD_MAX_SLABS); | ||
| 1654 | |||
| 1655 | /* | ||
| 1656 | * Check if we already have a slab created for this size | ||
| 1657 | */ | ||
| 1658 | if (jbd_slab[i]) | ||
| 1659 | return 0; | ||
| 1660 | |||
| 1661 | /* | ||
| 1662 | * Create a slab and force alignment to be same as slabsize - | ||
| 1663 | * this will make sure that allocations won't cross the page | ||
| 1664 | * boundary. | ||
| 1665 | */ | ||
| 1666 | jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], | ||
| 1667 | slab_size, slab_size, 0, NULL, NULL); | ||
| 1668 | if (!jbd_slab[i]) { | ||
| 1669 | printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); | ||
| 1670 | return -ENOMEM; | ||
| 1671 | } | ||
| 1672 | return 0; | ||
| 1673 | } | ||
| 1674 | |||
| 1675 | void * jbd_slab_alloc(size_t size, gfp_t flags) | ||
| 1676 | { | ||
| 1677 | int idx; | ||
| 1678 | |||
| 1679 | idx = JBD_SLAB_INDEX(size); | ||
| 1680 | BUG_ON(jbd_slab[idx] == NULL); | ||
| 1681 | return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | void jbd_slab_free(void *ptr, size_t size) | ||
| 1685 | { | ||
| 1686 | int idx; | ||
| 1687 | |||
| 1688 | idx = JBD_SLAB_INDEX(size); | ||
| 1689 | BUG_ON(jbd_slab[idx] == NULL); | ||
| 1690 | kmem_cache_free(jbd_slab[idx], ptr); | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | /* | ||
| 1615 | * Journal_head storage management | 1694 | * Journal_head storage management |
| 1616 | */ | 1695 | */ |
| 1617 | static kmem_cache_t *journal_head_cache; | 1696 | static kmem_cache_t *journal_head_cache; |
| @@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
| 1799 | printk(KERN_WARNING "%s: freeing " | 1878 | printk(KERN_WARNING "%s: freeing " |
| 1800 | "b_frozen_data\n", | 1879 | "b_frozen_data\n", |
| 1801 | __FUNCTION__); | 1880 | __FUNCTION__); |
| 1802 | kfree(jh->b_frozen_data); | 1881 | jbd_slab_free(jh->b_frozen_data, bh->b_size); |
| 1803 | } | 1882 | } |
| 1804 | if (jh->b_committed_data) { | 1883 | if (jh->b_committed_data) { |
| 1805 | printk(KERN_WARNING "%s: freeing " | 1884 | printk(KERN_WARNING "%s: freeing " |
| 1806 | "b_committed_data\n", | 1885 | "b_committed_data\n", |
| 1807 | __FUNCTION__); | 1886 | __FUNCTION__); |
| 1808 | kfree(jh->b_committed_data); | 1887 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
| 1809 | } | 1888 | } |
| 1810 | bh->b_private = NULL; | 1889 | bh->b_private = NULL; |
| 1811 | jh->b_bh = NULL; /* debug, really */ | 1890 | jh->b_bh = NULL; /* debug, really */ |
| @@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void) | |||
| 1961 | journal_destroy_revoke_caches(); | 2040 | journal_destroy_revoke_caches(); |
| 1962 | journal_destroy_journal_head_cache(); | 2041 | journal_destroy_journal_head_cache(); |
| 1963 | journal_destroy_handle_cache(); | 2042 | journal_destroy_handle_cache(); |
| 2043 | journal_destroy_jbd_slabs(); | ||
| 1964 | } | 2044 | } |
| 1965 | 2045 | ||
| 1966 | static int __init journal_init(void) | 2046 | static int __init journal_init(void) |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 508b2ea91f43..de2e4cbbf79a 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
| @@ -666,8 +666,9 @@ repeat: | |||
| 666 | if (!frozen_buffer) { | 666 | if (!frozen_buffer) { |
| 667 | JBUFFER_TRACE(jh, "allocate memory for buffer"); | 667 | JBUFFER_TRACE(jh, "allocate memory for buffer"); |
| 668 | jbd_unlock_bh_state(bh); | 668 | jbd_unlock_bh_state(bh); |
| 669 | frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, | 669 | frozen_buffer = |
| 670 | GFP_NOFS); | 670 | jbd_slab_alloc(jh2bh(jh)->b_size, |
| 671 | GFP_NOFS); | ||
| 671 | if (!frozen_buffer) { | 672 | if (!frozen_buffer) { |
| 672 | printk(KERN_EMERG | 673 | printk(KERN_EMERG |
| 673 | "%s: OOM for frozen_buffer\n", | 674 | "%s: OOM for frozen_buffer\n", |
| @@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) | |||
| 879 | 880 | ||
| 880 | repeat: | 881 | repeat: |
| 881 | if (!jh->b_committed_data) { | 882 | if (!jh->b_committed_data) { |
| 882 | committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); | 883 | committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); |
| 883 | if (!committed_data) { | 884 | if (!committed_data) { |
| 884 | printk(KERN_EMERG "%s: No memory for committed data\n", | 885 | printk(KERN_EMERG "%s: No memory for committed data\n", |
| 885 | __FUNCTION__); | 886 | __FUNCTION__); |
| @@ -906,7 +907,7 @@ repeat: | |||
| 906 | out: | 907 | out: |
| 907 | journal_put_journal_head(jh); | 908 | journal_put_journal_head(jh); |
| 908 | if (unlikely(committed_data)) | 909 | if (unlikely(committed_data)) |
| 909 | kfree(committed_data); | 910 | jbd_slab_free(committed_data, bh->b_size); |
| 910 | return err; | 911 | return err; |
| 911 | } | 912 | } |
| 912 | 913 | ||
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 9ea91c5eeb7b..330ff9fc7cf0 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
| @@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) | |||
| 204 | /* | 204 | /* |
| 205 | * Allocate the buffer map to keep the superblock small. | 205 | * Allocate the buffer map to keep the superblock small. |
| 206 | */ | 206 | */ |
| 207 | if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) | ||
| 208 | goto out_illegal_sb; | ||
| 207 | i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); | 209 | i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); |
| 208 | map = kmalloc(i, GFP_KERNEL); | 210 | map = kmalloc(i, GFP_KERNEL); |
| 209 | if (!map) | 211 | if (!map) |
| @@ -263,7 +265,7 @@ out_no_root: | |||
| 263 | 265 | ||
| 264 | out_no_bitmap: | 266 | out_no_bitmap: |
| 265 | printk("MINIX-fs: bad superblock or unable to read bitmaps\n"); | 267 | printk("MINIX-fs: bad superblock or unable to read bitmaps\n"); |
| 266 | out_freemap: | 268 | out_freemap: |
| 267 | for (i = 0; i < sbi->s_imap_blocks; i++) | 269 | for (i = 0; i < sbi->s_imap_blocks; i++) |
| 268 | brelse(sbi->s_imap[i]); | 270 | brelse(sbi->s_imap[i]); |
| 269 | for (i = 0; i < sbi->s_zmap_blocks; i++) | 271 | for (i = 0; i < sbi->s_zmap_blocks; i++) |
| @@ -276,11 +278,16 @@ out_no_map: | |||
| 276 | printk("MINIX-fs: can't allocate map\n"); | 278 | printk("MINIX-fs: can't allocate map\n"); |
| 277 | goto out_release; | 279 | goto out_release; |
| 278 | 280 | ||
| 281 | out_illegal_sb: | ||
| 282 | if (!silent) | ||
| 283 | printk("MINIX-fs: bad superblock\n"); | ||
| 284 | goto out_release; | ||
| 285 | |||
| 279 | out_no_fs: | 286 | out_no_fs: |
| 280 | if (!silent) | 287 | if (!silent) |
| 281 | printk("VFS: Can't find a Minix or Minix V2 filesystem " | 288 | printk("VFS: Can't find a Minix or Minix V2 filesystem " |
| 282 | "on device %s\n", s->s_id); | 289 | "on device %s\n", s->s_id); |
| 283 | out_release: | 290 | out_release: |
| 284 | brelse(bh); | 291 | brelse(bh); |
| 285 | goto out; | 292 | goto out; |
| 286 | 293 | ||
| @@ -290,7 +297,7 @@ out_bad_hblock: | |||
| 290 | 297 | ||
| 291 | out_bad_sb: | 298 | out_bad_sb: |
| 292 | printk("MINIX-fs: unable to read superblock\n"); | 299 | printk("MINIX-fs: unable to read superblock\n"); |
| 293 | out: | 300 | out: |
| 294 | s->s_fs_info = NULL; | 301 | s->s_fs_info = NULL; |
| 295 | kfree(sbi); | 302 | kfree(sbi); |
| 296 | return -EINVAL; | 303 | return -EINVAL; |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 9f2cfc30f9cf..942156225447 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
| @@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
| 169 | "Mapped: %8lu kB\n" | 169 | "Mapped: %8lu kB\n" |
| 170 | "Slab: %8lu kB\n" | 170 | "Slab: %8lu kB\n" |
| 171 | "PageTables: %8lu kB\n" | 171 | "PageTables: %8lu kB\n" |
| 172 | "NFS Unstable: %8lu kB\n" | 172 | "NFS_Unstable: %8lu kB\n" |
| 173 | "Bounce: %8lu kB\n" | 173 | "Bounce: %8lu kB\n" |
| 174 | "CommitLimit: %8lu kB\n" | 174 | "CommitLimit: %8lu kB\n" |
| 175 | "Committed_AS: %8lu kB\n" | 175 | "Committed_AS: %8lu kB\n" |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 39fedaa88a0c..d935fb9394e3 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
| @@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf) | |||
| 424 | int res = -ENOTDIR; | 424 | int res = -ENOTDIR; |
| 425 | if (!file->f_op || !file->f_op->readdir) | 425 | if (!file->f_op || !file->f_op->readdir) |
| 426 | goto out; | 426 | goto out; |
| 427 | mutex_lock(&inode->i_mutex); | 427 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); |
| 428 | // down(&inode->i_zombie); | 428 | // down(&inode->i_zombie); |
| 429 | res = -ENOENT; | 429 | res = -ENOENT; |
| 430 | if (!IS_DEADDIR(inode)) { | 430 | if (!IS_DEADDIR(inode)) { |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index e7c8615beb65..30c6e8a9446c 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
| @@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh) | |||
| 169 | 169 | ||
| 170 | static struct buffer_head * | 170 | static struct buffer_head * |
| 171 | ufs_clear_frags(struct inode *inode, sector_t beg, | 171 | ufs_clear_frags(struct inode *inode, sector_t beg, |
| 172 | unsigned int n) | 172 | unsigned int n, sector_t want) |
| 173 | { | 173 | { |
| 174 | struct buffer_head *res, *bh; | 174 | struct buffer_head *res = NULL, *bh; |
| 175 | sector_t end = beg + n; | 175 | sector_t end = beg + n; |
| 176 | 176 | ||
| 177 | res = sb_getblk(inode->i_sb, beg); | 177 | for (; beg < end; ++beg) { |
| 178 | ufs_clear_frag(inode, res); | ||
| 179 | for (++beg; beg < end; ++beg) { | ||
| 180 | bh = sb_getblk(inode->i_sb, beg); | 178 | bh = sb_getblk(inode->i_sb, beg); |
| 181 | ufs_clear_frag(inode, bh); | 179 | ufs_clear_frag(inode, bh); |
| 182 | brelse(bh); | 180 | if (want != beg) |
| 181 | brelse(bh); | ||
| 182 | else | ||
| 183 | res = bh; | ||
| 183 | } | 184 | } |
| 185 | BUG_ON(!res); | ||
| 184 | return res; | 186 | return res; |
| 185 | } | 187 | } |
| 186 | 188 | ||
| @@ -265,7 +267,9 @@ repeat: | |||
| 265 | lastfrag = ufsi->i_lastfrag; | 267 | lastfrag = ufsi->i_lastfrag; |
| 266 | 268 | ||
| 267 | } | 269 | } |
| 268 | goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb; | 270 | tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]); |
| 271 | if (tmp) | ||
| 272 | goal = tmp + uspi->s_fpb; | ||
| 269 | tmp = ufs_new_fragments (inode, p, fragment - blockoff, | 273 | tmp = ufs_new_fragments (inode, p, fragment - blockoff, |
| 270 | goal, required + blockoff, | 274 | goal, required + blockoff, |
| 271 | err, locked_page); | 275 | err, locked_page); |
| @@ -277,13 +281,15 @@ repeat: | |||
| 277 | tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), | 281 | tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), |
| 278 | fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), | 282 | fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), |
| 279 | err, locked_page); | 283 | err, locked_page); |
| 280 | } | 284 | } else /* (lastblock > block) */ { |
| 281 | /* | 285 | /* |
| 282 | * We will allocate new block before last allocated block | 286 | * We will allocate new block before last allocated block |
| 283 | */ | 287 | */ |
| 284 | else /* (lastblock > block) */ { | 288 | if (block) { |
| 285 | if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1]))) | 289 | tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]); |
| 286 | goal = tmp + uspi->s_fpb; | 290 | if (tmp) |
| 291 | goal = tmp + uspi->s_fpb; | ||
| 292 | } | ||
| 287 | tmp = ufs_new_fragments(inode, p, fragment - blockoff, | 293 | tmp = ufs_new_fragments(inode, p, fragment - blockoff, |
| 288 | goal, uspi->s_fpb, err, locked_page); | 294 | goal, uspi->s_fpb, err, locked_page); |
| 289 | } | 295 | } |
| @@ -296,7 +302,7 @@ repeat: | |||
| 296 | } | 302 | } |
| 297 | 303 | ||
| 298 | if (!phys) { | 304 | if (!phys) { |
| 299 | result = ufs_clear_frags(inode, tmp + blockoff, required); | 305 | result = ufs_clear_frags(inode, tmp, required, tmp + blockoff); |
| 300 | } else { | 306 | } else { |
| 301 | *phys = tmp + blockoff; | 307 | *phys = tmp + blockoff; |
| 302 | result = NULL; | 308 | result = NULL; |
| @@ -383,7 +389,7 @@ repeat: | |||
| 383 | } | 389 | } |
| 384 | } | 390 | } |
| 385 | 391 | ||
| 386 | if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb)) | 392 | if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]))) |
| 387 | goal = tmp + uspi->s_fpb; | 393 | goal = tmp + uspi->s_fpb; |
| 388 | else | 394 | else |
| 389 | goal = bh->b_blocknr + uspi->s_fpb; | 395 | goal = bh->b_blocknr + uspi->s_fpb; |
| @@ -397,7 +403,8 @@ repeat: | |||
| 397 | 403 | ||
| 398 | 404 | ||
| 399 | if (!phys) { | 405 | if (!phys) { |
| 400 | result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb); | 406 | result = ufs_clear_frags(inode, tmp, uspi->s_fpb, |
| 407 | tmp + blockoff); | ||
| 401 | } else { | 408 | } else { |
| 402 | *phys = tmp + blockoff; | 409 | *phys = tmp + blockoff; |
| 403 | *new = 1; | 410 | *new = 1; |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index c9b55872079b..ea11d04c41a0 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
| @@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode) | |||
| 375 | int err = 0; | 375 | int err = 0; |
| 376 | struct address_space *mapping = inode->i_mapping; | 376 | struct address_space *mapping = inode->i_mapping; |
| 377 | struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; | 377 | struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; |
| 378 | struct ufs_inode_info *ufsi = UFS_I(inode); | ||
| 379 | unsigned lastfrag, i, end; | 378 | unsigned lastfrag, i, end; |
| 380 | struct page *lastpage; | 379 | struct page *lastpage; |
| 381 | struct buffer_head *bh; | 380 | struct buffer_head *bh; |
| 382 | 381 | ||
| 383 | lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; | 382 | lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; |
| 384 | 383 | ||
| 385 | if (!lastfrag) { | 384 | if (!lastfrag) |
| 386 | ufsi->i_lastfrag = 0; | ||
| 387 | goto out; | 385 | goto out; |
| 388 | } | 386 | |
| 389 | lastfrag--; | 387 | lastfrag--; |
| 390 | 388 | ||
| 391 | lastpage = ufs_get_locked_page(mapping, lastfrag >> | 389 | lastpage = ufs_get_locked_page(mapping, lastfrag >> |
| @@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode) | |||
| 400 | for (i = 0; i < end; ++i) | 398 | for (i = 0; i < end; ++i) |
| 401 | bh = bh->b_this_page; | 399 | bh = bh->b_this_page; |
| 402 | 400 | ||
| 403 | if (!buffer_mapped(bh)) { | 401 | |
| 404 | err = ufs_getfrag_block(inode, lastfrag, bh, 1); | 402 | err = ufs_getfrag_block(inode, lastfrag, bh, 1); |
| 405 | 403 | ||
| 406 | if (unlikely(err)) | 404 | if (unlikely(err)) |
| 407 | goto out_unlock; | 405 | goto out_unlock; |
| 408 | 406 | ||
| 409 | if (buffer_new(bh)) { | 407 | if (buffer_new(bh)) { |
| 410 | clear_buffer_new(bh); | 408 | clear_buffer_new(bh); |
| 411 | unmap_underlying_metadata(bh->b_bdev, | 409 | unmap_underlying_metadata(bh->b_bdev, |
| 412 | bh->b_blocknr); | 410 | bh->b_blocknr); |
| 413 | /* | 411 | /* |
| 414 | * we do not zeroize fragment, because of | 412 | * we do not zeroize fragment, because of |
| 415 | * if it maped to hole, it already contains zeroes | 413 | * if it maped to hole, it already contains zeroes |
| 416 | */ | 414 | */ |
| 417 | set_buffer_uptodate(bh); | 415 | set_buffer_uptodate(bh); |
| 418 | mark_buffer_dirty(bh); | 416 | mark_buffer_dirty(bh); |
| 419 | set_page_dirty(lastpage); | 417 | set_page_dirty(lastpage); |
| 420 | } | ||
| 421 | } | 418 | } |
| 419 | |||
| 422 | out_unlock: | 420 | out_unlock: |
| 423 | ufs_put_locked_page(lastpage); | 421 | ufs_put_locked_page(lastpage); |
| 424 | out: | 422 | out: |
| @@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) | |||
| 440 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 438 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
| 441 | return -EPERM; | 439 | return -EPERM; |
| 442 | 440 | ||
| 443 | if (inode->i_size > old_i_size) { | 441 | err = ufs_alloc_lastblock(inode); |
| 444 | /* | ||
| 445 | * if we expand file we should care about | ||
| 446 | * allocation of block for last byte first of all | ||
| 447 | */ | ||
| 448 | err = ufs_alloc_lastblock(inode); | ||
| 449 | 442 | ||
| 450 | if (err) { | 443 | if (err) { |
| 451 | i_size_write(inode, old_i_size); | 444 | i_size_write(inode, old_i_size); |
| 452 | goto out; | 445 | goto out; |
| 453 | } | ||
| 454 | /* | ||
| 455 | * go away, because of we expand file, and we do not | ||
| 456 | * need free blocks, and zeroizes page | ||
| 457 | */ | ||
| 458 | lock_kernel(); | ||
| 459 | goto almost_end; | ||
| 460 | } | 446 | } |
| 461 | 447 | ||
| 462 | block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); | 448 | block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); |
| @@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) | |||
| 477 | yield(); | 463 | yield(); |
| 478 | } | 464 | } |
| 479 | 465 | ||
| 480 | if (inode->i_size < old_i_size) { | ||
| 481 | /* | ||
| 482 | * now we should have enough space | ||
| 483 | * to allocate block for last byte | ||
| 484 | */ | ||
| 485 | err = ufs_alloc_lastblock(inode); | ||
| 486 | if (err) | ||
| 487 | /* | ||
| 488 | * looks like all the same - we have no space, | ||
| 489 | * but we truncate file already | ||
| 490 | */ | ||
| 491 | inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize; | ||
| 492 | } | ||
| 493 | almost_end: | ||
| 494 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | 466 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
| 467 | ufsi->i_lastfrag = DIRECT_FRAGMENT; | ||
| 495 | unlock_kernel(); | 468 | unlock_kernel(); |
| 496 | mark_inode_dirty(inode); | 469 | mark_inode_dirty(inode); |
| 497 | out: | 470 | out: |
diff --git a/include/asm-arm/arch-s3c2410/regs-rtc.h b/include/asm-arm/arch-s3c2410/regs-rtc.h index 228983f89bc8..0fbec07bb6b8 100644 --- a/include/asm-arm/arch-s3c2410/regs-rtc.h +++ b/include/asm-arm/arch-s3c2410/regs-rtc.h | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #ifndef __ASM_ARCH_REGS_RTC_H | 18 | #ifndef __ASM_ARCH_REGS_RTC_H |
| 19 | #define __ASM_ARCH_REGS_RTC_H __FILE__ | 19 | #define __ASM_ARCH_REGS_RTC_H __FILE__ |
| 20 | 20 | ||
| 21 | #define S3C2410_RTCREG(x) ((x) + S3C24XX_VA_RTC) | 21 | #define S3C2410_RTCREG(x) (x) |
| 22 | 22 | ||
| 23 | #define S3C2410_RTCCON S3C2410_RTCREG(0x40) | 23 | #define S3C2410_RTCCON S3C2410_RTCREG(0x40) |
| 24 | #define S3C2410_RTCCON_RTCEN (1<<0) | 24 | #define S3C2410_RTCCON_RTCEN (1<<0) |
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index e33e9f9e4c66..22cb07cc8f32 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h | |||
| @@ -14,7 +14,7 @@ extern struct pglist_data *node_data[]; | |||
| 14 | 14 | ||
| 15 | #ifdef CONFIG_X86_NUMAQ | 15 | #ifdef CONFIG_X86_NUMAQ |
| 16 | #include <asm/numaq.h> | 16 | #include <asm/numaq.h> |
| 17 | #else /* summit or generic arch */ | 17 | #elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */ |
| 18 | #include <asm/srat.h> | 18 | #include <asm/srat.h> |
| 19 | #endif | 19 | #endif |
| 20 | 20 | ||
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 269d000bb2a3..bea0255196c4 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h | |||
| @@ -216,6 +216,7 @@ COMPATIBLE_IOCTL(VT_RESIZE) | |||
| 216 | COMPATIBLE_IOCTL(VT_RESIZEX) | 216 | COMPATIBLE_IOCTL(VT_RESIZEX) |
| 217 | COMPATIBLE_IOCTL(VT_LOCKSWITCH) | 217 | COMPATIBLE_IOCTL(VT_LOCKSWITCH) |
| 218 | COMPATIBLE_IOCTL(VT_UNLOCKSWITCH) | 218 | COMPATIBLE_IOCTL(VT_UNLOCKSWITCH) |
| 219 | COMPATIBLE_IOCTL(VT_GETHIFONTMASK) | ||
| 219 | /* Little p (/dev/rtc, /dev/envctrl, etc.) */ | 220 | /* Little p (/dev/rtc, /dev/envctrl, etc.) */ |
| 220 | COMPATIBLE_IOCTL(RTC_AIE_ON) | 221 | COMPATIBLE_IOCTL(RTC_AIE_ON) |
| 221 | COMPATIBLE_IOCTL(RTC_AIE_OFF) | 222 | COMPATIBLE_IOCTL(RTC_AIE_OFF) |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 25610205c90d..555bc195c420 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -570,13 +570,14 @@ struct inode { | |||
| 570 | * 3: quota file | 570 | * 3: quota file |
| 571 | * | 571 | * |
| 572 | * The locking order between these classes is | 572 | * The locking order between these classes is |
| 573 | * parent -> child -> normal -> quota | 573 | * parent -> child -> normal -> xattr -> quota |
| 574 | */ | 574 | */ |
| 575 | enum inode_i_mutex_lock_class | 575 | enum inode_i_mutex_lock_class |
| 576 | { | 576 | { |
| 577 | I_MUTEX_NORMAL, | 577 | I_MUTEX_NORMAL, |
| 578 | I_MUTEX_PARENT, | 578 | I_MUTEX_PARENT, |
| 579 | I_MUTEX_CHILD, | 579 | I_MUTEX_CHILD, |
| 580 | I_MUTEX_XATTR, | ||
| 580 | I_MUTEX_QUOTA | 581 | I_MUTEX_QUOTA |
| 581 | }; | 582 | }; |
| 582 | 583 | ||
diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 20eb34403d0c..a04c154c5207 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h | |||
| @@ -72,6 +72,9 @@ extern int journal_enable_debug; | |||
| 72 | #endif | 72 | #endif |
| 73 | 73 | ||
| 74 | extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry); | 74 | extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry); |
| 75 | extern void * jbd_slab_alloc(size_t size, gfp_t flags); | ||
| 76 | extern void jbd_slab_free(void *ptr, size_t size); | ||
| 77 | |||
| 75 | #define jbd_kmalloc(size, flags) \ | 78 | #define jbd_kmalloc(size, flags) \ |
| 76 | __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) | 79 | __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) |
| 77 | #define jbd_rep_kmalloc(size, flags) \ | 80 | #define jbd_rep_kmalloc(size, flags) \ |
diff --git a/include/linux/node.h b/include/linux/node.h index 81dcec84cd8f..bc001bc225c3 100644 --- a/include/linux/node.h +++ b/include/linux/node.h | |||
| @@ -30,12 +30,20 @@ extern struct node node_devices[]; | |||
| 30 | 30 | ||
| 31 | extern int register_node(struct node *, int, struct node *); | 31 | extern int register_node(struct node *, int, struct node *); |
| 32 | extern void unregister_node(struct node *node); | 32 | extern void unregister_node(struct node *node); |
| 33 | #ifdef CONFIG_NUMA | ||
| 33 | extern int register_one_node(int nid); | 34 | extern int register_one_node(int nid); |
| 34 | extern void unregister_one_node(int nid); | 35 | extern void unregister_one_node(int nid); |
| 35 | #ifdef CONFIG_NUMA | ||
| 36 | extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); | 36 | extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); |
| 37 | extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); | 37 | extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); |
| 38 | #else | 38 | #else |
| 39 | static inline int register_one_node(int nid) | ||
| 40 | { | ||
| 41 | return 0; | ||
| 42 | } | ||
| 43 | static inline int unregister_one_node(int nid) | ||
| 44 | { | ||
| 45 | return 0; | ||
| 46 | } | ||
| 39 | static inline int register_cpu_under_node(unsigned int cpu, unsigned int nid) | 47 | static inline int register_cpu_under_node(unsigned int cpu, unsigned int nid) |
| 40 | { | 48 | { |
| 41 | return 0; | 49 | return 0; |
diff --git a/include/linux/tty.h b/include/linux/tty.h index e421d5e34818..04827ca65781 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h | |||
| @@ -59,6 +59,7 @@ struct tty_bufhead { | |||
| 59 | struct tty_buffer *head; /* Queue head */ | 59 | struct tty_buffer *head; /* Queue head */ |
| 60 | struct tty_buffer *tail; /* Active buffer */ | 60 | struct tty_buffer *tail; /* Active buffer */ |
| 61 | struct tty_buffer *free; /* Free queue head */ | 61 | struct tty_buffer *free; /* Free queue head */ |
| 62 | int memory_used; /* Buffer space used excluding free queue */ | ||
| 62 | }; | 63 | }; |
| 63 | /* | 64 | /* |
| 64 | * The pty uses char_buf and flag_buf as a contiguous buffer | 65 | * The pty uses char_buf and flag_buf as a contiguous buffer |
diff --git a/include/linux/vt.h b/include/linux/vt.h index 8ab334a48222..ba806e8711be 100644 --- a/include/linux/vt.h +++ b/include/linux/vt.h | |||
| @@ -60,5 +60,6 @@ struct vt_consize { | |||
| 60 | #define VT_RESIZEX 0x560A /* set kernel's idea of screensize + more */ | 60 | #define VT_RESIZEX 0x560A /* set kernel's idea of screensize + more */ |
| 61 | #define VT_LOCKSWITCH 0x560B /* disallow vt switching */ | 61 | #define VT_LOCKSWITCH 0x560B /* disallow vt switching */ |
| 62 | #define VT_UNLOCKSWITCH 0x560C /* allow vt switching */ | 62 | #define VT_UNLOCKSWITCH 0x560C /* allow vt switching */ |
| 63 | #define VT_GETHIFONTMASK 0x560D /* return hi font mask */ | ||
| 63 | 64 | ||
| 64 | #endif /* _LINUX_VT_H */ | 65 | #endif /* _LINUX_VT_H */ |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1a649f2bb9bb..4ea6f0dc2fc5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -816,6 +816,10 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
| 816 | struct cpuset trialcs; | 816 | struct cpuset trialcs; |
| 817 | int retval, cpus_unchanged; | 817 | int retval, cpus_unchanged; |
| 818 | 818 | ||
| 819 | /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ | ||
| 820 | if (cs == &top_cpuset) | ||
| 821 | return -EACCES; | ||
| 822 | |||
| 819 | trialcs = *cs; | 823 | trialcs = *cs; |
| 820 | retval = cpulist_parse(buf, trialcs.cpus_allowed); | 824 | retval = cpulist_parse(buf, trialcs.cpus_allowed); |
| 821 | if (retval < 0) | 825 | if (retval < 0) |
| @@ -2033,6 +2037,33 @@ out: | |||
| 2033 | return err; | 2037 | return err; |
| 2034 | } | 2038 | } |
| 2035 | 2039 | ||
| 2040 | /* | ||
| 2041 | * The top_cpuset tracks what CPUs and Memory Nodes are online, | ||
| 2042 | * period. This is necessary in order to make cpusets transparent | ||
| 2043 | * (of no affect) on systems that are actively using CPU hotplug | ||
| 2044 | * but making no active use of cpusets. | ||
| 2045 | * | ||
| 2046 | * This handles CPU hotplug (cpuhp) events. If someday Memory | ||
| 2047 | * Nodes can be hotplugged (dynamically changing node_online_map) | ||
| 2048 | * then we should handle that too, perhaps in a similar way. | ||
| 2049 | */ | ||
| 2050 | |||
| 2051 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 2052 | static int cpuset_handle_cpuhp(struct notifier_block *nb, | ||
| 2053 | unsigned long phase, void *cpu) | ||
| 2054 | { | ||
| 2055 | mutex_lock(&manage_mutex); | ||
| 2056 | mutex_lock(&callback_mutex); | ||
| 2057 | |||
| 2058 | top_cpuset.cpus_allowed = cpu_online_map; | ||
| 2059 | |||
| 2060 | mutex_unlock(&callback_mutex); | ||
| 2061 | mutex_unlock(&manage_mutex); | ||
| 2062 | |||
| 2063 | return 0; | ||
| 2064 | } | ||
| 2065 | #endif | ||
| 2066 | |||
| 2036 | /** | 2067 | /** |
| 2037 | * cpuset_init_smp - initialize cpus_allowed | 2068 | * cpuset_init_smp - initialize cpus_allowed |
| 2038 | * | 2069 | * |
| @@ -2043,6 +2074,8 @@ void __init cpuset_init_smp(void) | |||
| 2043 | { | 2074 | { |
| 2044 | top_cpuset.cpus_allowed = cpu_online_map; | 2075 | top_cpuset.cpus_allowed = cpu_online_map; |
| 2045 | top_cpuset.mems_allowed = node_online_map; | 2076 | top_cpuset.mems_allowed = node_online_map; |
| 2077 | |||
| 2078 | hotcpu_notifier(cpuset_handle_cpuhp, 0); | ||
| 2046 | } | 2079 | } |
| 2047 | 2080 | ||
| 2048 | /** | 2081 | /** |
| @@ -2387,7 +2420,7 @@ EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); | |||
| 2387 | int cpuset_excl_nodes_overlap(const struct task_struct *p) | 2420 | int cpuset_excl_nodes_overlap(const struct task_struct *p) |
| 2388 | { | 2421 | { |
| 2389 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ | 2422 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ |
| 2390 | int overlap = 0; /* do cpusets overlap? */ | 2423 | int overlap = 1; /* do cpusets overlap? */ |
| 2391 | 2424 | ||
| 2392 | task_lock(current); | 2425 | task_lock(current); |
| 2393 | if (current->flags & PF_EXITING) { | 2426 | if (current->flags & PF_EXITING) { |
diff --git a/kernel/futex.c b/kernel/futex.c index d4633c588f33..b9b8aea5389e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -397,7 +397,7 @@ static struct task_struct * futex_find_get_task(pid_t pid) | |||
| 397 | p = NULL; | 397 | p = NULL; |
| 398 | goto out_unlock; | 398 | goto out_unlock; |
| 399 | } | 399 | } |
| 400 | if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) { | 400 | if (p->exit_state != 0) { |
| 401 | p = NULL; | 401 | p = NULL; |
| 402 | goto out_unlock; | 402 | goto out_unlock; |
| 403 | } | 403 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index a2be2d055299..a234fbee1238 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -4162,10 +4162,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
| 4162 | read_unlock_irq(&tasklist_lock); | 4162 | read_unlock_irq(&tasklist_lock); |
| 4163 | return -ESRCH; | 4163 | return -ESRCH; |
| 4164 | } | 4164 | } |
| 4165 | get_task_struct(p); | ||
| 4166 | read_unlock_irq(&tasklist_lock); | ||
| 4167 | retval = sched_setscheduler(p, policy, &lparam); | 4165 | retval = sched_setscheduler(p, policy, &lparam); |
| 4168 | put_task_struct(p); | 4166 | read_unlock_irq(&tasklist_lock); |
| 4169 | 4167 | ||
| 4170 | return retval; | 4168 | return retval; |
| 4171 | } | 4169 | } |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index dcfb5d731466..51cacd111dbd 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -111,7 +111,6 @@ static int stop_machine(void) | |||
| 111 | /* If some failed, kill them all. */ | 111 | /* If some failed, kill them all. */ |
| 112 | if (ret < 0) { | 112 | if (ret < 0) { |
| 113 | stopmachine_set_state(STOPMACHINE_EXIT); | 113 | stopmachine_set_state(STOPMACHINE_EXIT); |
| 114 | up(&stopmachine_mutex); | ||
| 115 | return ret; | 114 | return ret; |
| 116 | } | 115 | } |
| 117 | 116 | ||
diff --git a/mm/swapfile.c b/mm/swapfile.c index e70d6c6d6fee..f1f5ec783781 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
| @@ -442,11 +442,12 @@ int swap_type_of(dev_t device) | |||
| 442 | 442 | ||
| 443 | if (!(swap_info[i].flags & SWP_WRITEOK)) | 443 | if (!(swap_info[i].flags & SWP_WRITEOK)) |
| 444 | continue; | 444 | continue; |
| 445 | |||
| 445 | if (!device) { | 446 | if (!device) { |
| 446 | spin_unlock(&swap_lock); | 447 | spin_unlock(&swap_lock); |
| 447 | return i; | 448 | return i; |
| 448 | } | 449 | } |
| 449 | inode = swap_info->swap_file->f_dentry->d_inode; | 450 | inode = swap_info[i].swap_file->f_dentry->d_inode; |
| 450 | if (S_ISBLK(inode->i_mode) && | 451 | if (S_ISBLK(inode->i_mode) && |
| 451 | device == MKDEV(imajor(inode), iminor(inode))) { | 452 | device == MKDEV(imajor(inode), iminor(inode))) { |
| 452 | spin_unlock(&swap_lock); | 453 | spin_unlock(&swap_lock); |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 0f85970ee6d1..090bc39e8199 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
| @@ -342,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, | |||
| 342 | new_packet->dccphtx_ccval = | 342 | new_packet->dccphtx_ccval = |
| 343 | DCCP_SKB_CB(skb)->dccpd_ccval = | 343 | DCCP_SKB_CB(skb)->dccpd_ccval = |
| 344 | hctx->ccid3hctx_last_win_count; | 344 | hctx->ccid3hctx_last_win_count; |
| 345 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, | ||
| 346 | hctx->ccid3hctx_t_ipi); | ||
| 345 | } | 347 | } |
| 346 | out: | 348 | out: |
| 347 | return rc; | 349 | return rc; |
| @@ -413,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) | |||
| 413 | case TFRC_SSTATE_NO_FBACK: | 415 | case TFRC_SSTATE_NO_FBACK: |
| 414 | case TFRC_SSTATE_FBACK: | 416 | case TFRC_SSTATE_FBACK: |
| 415 | if (len > 0) { | 417 | if (len > 0) { |
| 416 | hctx->ccid3hctx_t_nom = now; | 418 | timeval_sub_usecs(&hctx->ccid3hctx_t_nom, |
| 419 | hctx->ccid3hctx_t_ipi); | ||
| 417 | ccid3_calc_new_t_ipi(hctx); | 420 | ccid3_calc_new_t_ipi(hctx); |
| 418 | ccid3_calc_new_delta(hctx); | 421 | ccid3_calc_new_delta(hctx); |
| 419 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, | 422 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, |
| @@ -757,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) | |||
| 757 | } | 760 | } |
| 758 | 761 | ||
| 759 | hcrx->ccid3hcrx_tstamp_last_feedback = now; | 762 | hcrx->ccid3hcrx_tstamp_last_feedback = now; |
| 760 | hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; | 763 | hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; |
| 761 | hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; | ||
| 762 | hcrx->ccid3hcrx_bytes_recv = 0; | 764 | hcrx->ccid3hcrx_bytes_recv = 0; |
| 763 | 765 | ||
| 764 | /* Convert to multiples of 10us */ | 766 | /* Convert to multiples of 10us */ |
| @@ -782,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | |||
| 782 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) | 784 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) |
| 783 | return 0; | 785 | return 0; |
| 784 | 786 | ||
| 785 | DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; | 787 | DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; |
| 786 | 788 | ||
| 787 | if (dccp_packet_without_ack(skb)) | 789 | if (dccp_packet_without_ack(skb)) |
| 788 | return 0; | 790 | return 0; |
| @@ -854,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) | |||
| 854 | interval = 1; | 856 | interval = 1; |
| 855 | } | 857 | } |
| 856 | found: | 858 | found: |
| 859 | if (!tail) { | ||
| 860 | LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n", | ||
| 861 | __FUNCTION__); | ||
| 862 | return ~0; | ||
| 863 | } | ||
| 857 | rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; | 864 | rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; |
| 858 | ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", | 865 | ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", |
| 859 | dccp_role(sk), sk, rtt); | 866 | dccp_role(sk), sk, rtt); |
| @@ -864,9 +871,20 @@ found: | |||
| 864 | delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); | 871 | delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); |
| 865 | x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); | 872 | x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); |
| 866 | 873 | ||
| 874 | if (x_recv == 0) | ||
| 875 | x_recv = hcrx->ccid3hcrx_x_recv; | ||
| 876 | |||
| 867 | tmp1 = (u64)x_recv * (u64)rtt; | 877 | tmp1 = (u64)x_recv * (u64)rtt; |
| 868 | do_div(tmp1,10000000); | 878 | do_div(tmp1,10000000); |
| 869 | tmp2 = (u32)tmp1; | 879 | tmp2 = (u32)tmp1; |
| 880 | |||
| 881 | if (!tmp2) { | ||
| 882 | LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 " | ||
| 883 | "%s: x_recv = %u, rtt =%u\n", | ||
| 884 | __FUNCTION__, x_recv, rtt); | ||
| 885 | return ~0; | ||
| 886 | } | ||
| 887 | |||
| 870 | fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; | 888 | fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; |
| 871 | /* do not alter order above or you will get overflow on 32 bit */ | 889 | /* do not alter order above or you will get overflow on 32 bit */ |
| 872 | p = tfrc_calc_x_reverse_lookup(fval); | 890 | p = tfrc_calc_x_reverse_lookup(fval); |
| @@ -882,31 +900,101 @@ found: | |||
| 882 | static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) | 900 | static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) |
| 883 | { | 901 | { |
| 884 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 902 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
| 903 | struct dccp_li_hist_entry *next, *head; | ||
| 904 | u64 seq_temp; | ||
| 885 | 905 | ||
| 886 | if (seq_loss != DCCP_MAX_SEQNO + 1 && | 906 | if (list_empty(&hcrx->ccid3hcrx_li_hist)) { |
| 887 | list_empty(&hcrx->ccid3hcrx_li_hist)) { | 907 | if (!dccp_li_hist_interval_new(ccid3_li_hist, |
| 888 | struct dccp_li_hist_entry *li_tail; | 908 | &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) |
| 909 | return; | ||
| 889 | 910 | ||
| 890 | li_tail = dccp_li_hist_interval_new(ccid3_li_hist, | 911 | next = (struct dccp_li_hist_entry *) |
| 891 | &hcrx->ccid3hcrx_li_hist, | 912 | hcrx->ccid3hcrx_li_hist.next; |
| 892 | seq_loss, win_loss); | 913 | next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); |
| 893 | if (li_tail == NULL) | 914 | } else { |
| 915 | struct dccp_li_hist_entry *entry; | ||
| 916 | struct list_head *tail; | ||
| 917 | |||
| 918 | head = (struct dccp_li_hist_entry *) | ||
| 919 | hcrx->ccid3hcrx_li_hist.next; | ||
| 920 | /* FIXME win count check removed as was wrong */ | ||
| 921 | /* should make this check with receive history */ | ||
| 922 | /* and compare there as per section 10.2 of RFC4342 */ | ||
| 923 | |||
| 924 | /* new loss event detected */ | ||
| 925 | /* calculate last interval length */ | ||
| 926 | seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); | ||
| 927 | entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); | ||
| 928 | |||
| 929 | if (entry == NULL) { | ||
| 930 | printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__); | ||
| 931 | dump_stack(); | ||
| 894 | return; | 932 | return; |
| 895 | li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); | 933 | } |
| 896 | } else | 934 | |
| 897 | LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of " | 935 | list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); |
| 898 | "interval\n", __FUNCTION__); | 936 | |
| 937 | tail = hcrx->ccid3hcrx_li_hist.prev; | ||
| 938 | list_del(tail); | ||
| 939 | kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); | ||
| 940 | |||
| 941 | /* Create the newest interval */ | ||
| 942 | entry->dccplih_seqno = seq_loss; | ||
| 943 | entry->dccplih_interval = seq_temp; | ||
| 944 | entry->dccplih_win_count = win_loss; | ||
| 945 | } | ||
| 899 | } | 946 | } |
| 900 | 947 | ||
| 901 | static void ccid3_hc_rx_detect_loss(struct sock *sk) | 948 | static int ccid3_hc_rx_detect_loss(struct sock *sk, |
| 949 | struct dccp_rx_hist_entry *packet) | ||
| 902 | { | 950 | { |
| 903 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 951 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
| 904 | u8 win_loss; | 952 | struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); |
| 905 | const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, | 953 | u64 seqno = packet->dccphrx_seqno; |
| 906 | &hcrx->ccid3hcrx_li_hist, | 954 | u64 tmp_seqno; |
| 907 | &win_loss); | 955 | int loss = 0; |
| 956 | u8 ccval; | ||
| 957 | |||
| 958 | |||
| 959 | tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; | ||
| 960 | |||
| 961 | if (!rx_hist || | ||
| 962 | follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { | ||
| 963 | hcrx->ccid3hcrx_seqno_nonloss = seqno; | ||
| 964 | hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; | ||
| 965 | goto detect_out; | ||
| 966 | } | ||
| 967 | |||
| 908 | 968 | ||
| 909 | ccid3_hc_rx_update_li(sk, seq_loss, win_loss); | 969 | while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) |
| 970 | > TFRC_RECV_NUM_LATE_LOSS) { | ||
| 971 | loss = 1; | ||
| 972 | ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, | ||
| 973 | hcrx->ccid3hcrx_ccval_nonloss); | ||
| 974 | tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; | ||
| 975 | dccp_inc_seqno(&tmp_seqno); | ||
| 976 | hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; | ||
| 977 | dccp_inc_seqno(&tmp_seqno); | ||
| 978 | while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, | ||
| 979 | tmp_seqno, &ccval)) { | ||
| 980 | hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; | ||
| 981 | hcrx->ccid3hcrx_ccval_nonloss = ccval; | ||
| 982 | dccp_inc_seqno(&tmp_seqno); | ||
| 983 | } | ||
| 984 | } | ||
| 985 | |||
| 986 | /* FIXME - this code could be simplified with above while */ | ||
| 987 | /* but works at moment */ | ||
| 988 | if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { | ||
| 989 | hcrx->ccid3hcrx_seqno_nonloss = seqno; | ||
| 990 | hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; | ||
| 991 | } | ||
| 992 | |||
| 993 | detect_out: | ||
| 994 | dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, | ||
| 995 | &hcrx->ccid3hcrx_li_hist, packet, | ||
| 996 | hcrx->ccid3hcrx_seqno_nonloss); | ||
| 997 | return loss; | ||
| 910 | } | 998 | } |
| 911 | 999 | ||
| 912 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | 1000 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) |
| @@ -916,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
| 916 | struct dccp_rx_hist_entry *packet; | 1004 | struct dccp_rx_hist_entry *packet; |
| 917 | struct timeval now; | 1005 | struct timeval now; |
| 918 | u8 win_count; | 1006 | u8 win_count; |
| 919 | u32 p_prev, r_sample, t_elapsed; | 1007 | u32 p_prev, rtt_prev, r_sample, t_elapsed; |
| 920 | int ins; | 1008 | int loss; |
| 921 | 1009 | ||
| 922 | BUG_ON(hcrx == NULL || | 1010 | BUG_ON(hcrx == NULL || |
| 923 | !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || | 1011 | !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || |
| @@ -932,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
| 932 | case DCCP_PKT_DATAACK: | 1020 | case DCCP_PKT_DATAACK: |
| 933 | if (opt_recv->dccpor_timestamp_echo == 0) | 1021 | if (opt_recv->dccpor_timestamp_echo == 0) |
| 934 | break; | 1022 | break; |
| 935 | p_prev = hcrx->ccid3hcrx_rtt; | 1023 | rtt_prev = hcrx->ccid3hcrx_rtt; |
| 936 | dccp_timestamp(sk, &now); | 1024 | dccp_timestamp(sk, &now); |
| 937 | timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); | 1025 | timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); |
| 938 | r_sample = timeval_usecs(&now); | 1026 | r_sample = timeval_usecs(&now); |
| @@ -951,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
| 951 | hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + | 1039 | hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + |
| 952 | r_sample / 10; | 1040 | r_sample / 10; |
| 953 | 1041 | ||
| 954 | if (p_prev != hcrx->ccid3hcrx_rtt) | 1042 | if (rtt_prev != hcrx->ccid3hcrx_rtt) |
| 955 | ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", | 1043 | ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n", |
| 956 | dccp_role(sk), hcrx->ccid3hcrx_rtt, | 1044 | dccp_role(sk), hcrx->ccid3hcrx_rtt, |
| 957 | opt_recv->dccpor_elapsed_time); | 1045 | opt_recv->dccpor_elapsed_time); |
| 958 | break; | 1046 | break; |
| @@ -973,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
| 973 | 1061 | ||
| 974 | win_count = packet->dccphrx_ccval; | 1062 | win_count = packet->dccphrx_ccval; |
| 975 | 1063 | ||
| 976 | ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, | 1064 | loss = ccid3_hc_rx_detect_loss(sk, packet); |
| 977 | &hcrx->ccid3hcrx_li_hist, packet); | ||
| 978 | 1065 | ||
| 979 | if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) | 1066 | if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) |
| 980 | return; | 1067 | return; |
| @@ -991,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
| 991 | case TFRC_RSTATE_DATA: | 1078 | case TFRC_RSTATE_DATA: |
| 992 | hcrx->ccid3hcrx_bytes_recv += skb->len - | 1079 | hcrx->ccid3hcrx_bytes_recv += skb->len - |
| 993 | dccp_hdr(skb)->dccph_doff * 4; | 1080 | dccp_hdr(skb)->dccph_doff * 4; |
| 994 | if (ins != 0) | 1081 | if (loss) |
| 995 | break; | 1082 | break; |
| 996 | 1083 | ||
| 997 | dccp_timestamp(sk, &now); | 1084 | dccp_timestamp(sk, &now); |
| @@ -1012,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
| 1012 | ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", | 1099 | ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", |
| 1013 | dccp_role(sk), sk, dccp_state_name(sk->sk_state)); | 1100 | dccp_role(sk), sk, dccp_state_name(sk->sk_state)); |
| 1014 | 1101 | ||
| 1015 | ccid3_hc_rx_detect_loss(sk); | ||
| 1016 | p_prev = hcrx->ccid3hcrx_p; | 1102 | p_prev = hcrx->ccid3hcrx_p; |
| 1017 | 1103 | ||
| 1018 | /* Calculate loss event rate */ | 1104 | /* Calculate loss event rate */ |
| @@ -1022,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
| 1022 | /* Scaling up by 1000000 as fixed decimal */ | 1108 | /* Scaling up by 1000000 as fixed decimal */ |
| 1023 | if (i_mean != 0) | 1109 | if (i_mean != 0) |
| 1024 | hcrx->ccid3hcrx_p = 1000000 / i_mean; | 1110 | hcrx->ccid3hcrx_p = 1000000 / i_mean; |
| 1111 | } else { | ||
| 1112 | printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__); | ||
| 1113 | dump_stack(); | ||
| 1025 | } | 1114 | } |
| 1026 | 1115 | ||
| 1027 | if (hcrx->ccid3hcrx_p > p_prev) { | 1116 | if (hcrx->ccid3hcrx_p > p_prev) { |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 22cb9f80a09d..0a2cb7536d26 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
| @@ -120,9 +120,10 @@ struct ccid3_hc_rx_sock { | |||
| 120 | #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv | 120 | #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv |
| 121 | #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt | 121 | #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt |
| 122 | #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p | 122 | #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p |
| 123 | u64 ccid3hcrx_seqno_last_counter:48, | 123 | u64 ccid3hcrx_seqno_nonloss:48, |
| 124 | ccid3hcrx_ccval_nonloss:4, | ||
| 124 | ccid3hcrx_state:8, | 125 | ccid3hcrx_state:8, |
| 125 | ccid3hcrx_last_counter:4; | 126 | ccid3hcrx_ccval_last_counter:4; |
| 126 | u32 ccid3hcrx_bytes_recv; | 127 | u32 ccid3hcrx_bytes_recv; |
| 127 | struct timeval ccid3hcrx_tstamp_last_feedback; | 128 | struct timeval ccid3hcrx_tstamp_last_feedback; |
| 128 | struct timeval ccid3hcrx_tstamp_last_ack; | 129 | struct timeval ccid3hcrx_tstamp_last_ack; |
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index b93d9fc98cb8..906c81ab9d4f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | */ | 12 | */ |
| 13 | 13 | ||
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <net/sock.h> | ||
| 15 | 16 | ||
| 16 | #include "loss_interval.h" | 17 | #include "loss_interval.h" |
| 17 | 18 | ||
| @@ -90,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) | |||
| 90 | u32 w_tot = 0; | 91 | u32 w_tot = 0; |
| 91 | 92 | ||
| 92 | list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { | 93 | list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { |
| 93 | if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { | 94 | if (li_entry->dccplih_interval != ~0) { |
| 94 | i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; | 95 | i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; |
| 95 | w_tot += dccp_li_hist_w[i]; | 96 | w_tot += dccp_li_hist_w[i]; |
| 97 | if (i != 0) | ||
| 98 | i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; | ||
| 96 | } | 99 | } |
| 97 | 100 | ||
| 98 | if (i != 0) | ||
| 99 | i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; | ||
| 100 | 101 | ||
| 101 | if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) | 102 | if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) |
| 102 | break; | 103 | break; |
| @@ -107,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) | |||
| 107 | 108 | ||
| 108 | i_tot = max(i_tot0, i_tot1); | 109 | i_tot = max(i_tot0, i_tot1); |
| 109 | 110 | ||
| 110 | /* FIXME: Why do we do this? -Ian McDonald */ | 111 | if (!w_tot) { |
| 111 | if (i_tot * 4 < w_tot) | 112 | LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__); |
| 112 | i_tot = w_tot * 4; | 113 | return 1; |
| 114 | } | ||
| 113 | 115 | ||
| 114 | return i_tot * 4 / w_tot; | 116 | return i_tot / w_tot; |
| 115 | } | 117 | } |
| 116 | 118 | ||
| 117 | EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); | 119 | EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); |
| 118 | 120 | ||
| 119 | struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, | 121 | int dccp_li_hist_interval_new(struct dccp_li_hist *hist, |
| 120 | struct list_head *list, | 122 | struct list_head *list, const u64 seq_loss, const u8 win_loss) |
| 121 | const u64 seq_loss, | ||
| 122 | const u8 win_loss) | ||
| 123 | { | 123 | { |
| 124 | struct dccp_li_hist_entry *tail = NULL, *entry; | 124 | struct dccp_li_hist_entry *entry; |
| 125 | int i; | 125 | int i; |
| 126 | 126 | ||
| 127 | for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { | 127 | for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { |
| 128 | entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); | 128 | entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); |
| 129 | if (entry == NULL) { | 129 | if (entry == NULL) { |
| 130 | dccp_li_hist_purge(hist, list); | 130 | dccp_li_hist_purge(hist, list); |
| 131 | return NULL; | 131 | dump_stack(); |
| 132 | return 0; | ||
| 132 | } | 133 | } |
| 133 | if (tail == NULL) | 134 | entry->dccplih_interval = ~0; |
| 134 | tail = entry; | ||
| 135 | list_add(&entry->dccplih_node, list); | 135 | list_add(&entry->dccplih_node, list); |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | entry->dccplih_seqno = seq_loss; | 138 | entry->dccplih_seqno = seq_loss; |
| 139 | entry->dccplih_win_count = win_loss; | 139 | entry->dccplih_win_count = win_loss; |
| 140 | return tail; | 140 | return 1; |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); | 143 | EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); |
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index dcb370a53f57..0ae85f0340b2 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h | |||
| @@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist, | |||
| 52 | 52 | ||
| 53 | extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); | 53 | extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); |
| 54 | 54 | ||
| 55 | extern struct dccp_li_hist_entry * | 55 | extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, |
| 56 | dccp_li_hist_interval_new(struct dccp_li_hist *hist, | 56 | struct list_head *list, const u64 seq_loss, const u8 win_loss); |
| 57 | struct list_head *list, | ||
| 58 | const u64 seq_loss, | ||
| 59 | const u8 win_loss); | ||
| 60 | #endif /* _DCCP_LI_HIST_ */ | 57 | #endif /* _DCCP_LI_HIST_ */ |
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 420c60f8604d..b876c9c81c65 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c | |||
| @@ -112,64 +112,27 @@ struct dccp_rx_hist_entry * | |||
| 112 | 112 | ||
| 113 | EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); | 113 | EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); |
| 114 | 114 | ||
| 115 | int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, | 115 | void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, |
| 116 | struct list_head *rx_list, | 116 | struct list_head *rx_list, |
| 117 | struct list_head *li_list, | 117 | struct list_head *li_list, |
| 118 | struct dccp_rx_hist_entry *packet) | 118 | struct dccp_rx_hist_entry *packet, |
| 119 | u64 nonloss_seqno) | ||
| 119 | { | 120 | { |
| 120 | struct dccp_rx_hist_entry *entry, *next, *iter; | 121 | struct dccp_rx_hist_entry *entry, *next; |
| 121 | u8 num_later = 0; | 122 | u8 num_later = 0; |
| 122 | 123 | ||
| 123 | iter = dccp_rx_hist_head(rx_list); | 124 | list_add(&packet->dccphrx_node, rx_list); |
| 124 | if (iter == NULL) | ||
| 125 | dccp_rx_hist_add_entry(rx_list, packet); | ||
| 126 | else { | ||
| 127 | const u64 seqno = packet->dccphrx_seqno; | ||
| 128 | |||
| 129 | if (after48(seqno, iter->dccphrx_seqno)) | ||
| 130 | dccp_rx_hist_add_entry(rx_list, packet); | ||
| 131 | else { | ||
| 132 | if (dccp_rx_hist_entry_data_packet(iter)) | ||
| 133 | num_later = 1; | ||
| 134 | |||
| 135 | list_for_each_entry_continue(iter, rx_list, | ||
| 136 | dccphrx_node) { | ||
| 137 | if (after48(seqno, iter->dccphrx_seqno)) { | ||
| 138 | dccp_rx_hist_add_entry(&iter->dccphrx_node, | ||
| 139 | packet); | ||
| 140 | goto trim_history; | ||
| 141 | } | ||
| 142 | |||
| 143 | if (dccp_rx_hist_entry_data_packet(iter)) | ||
| 144 | num_later++; | ||
| 145 | |||
| 146 | if (num_later == TFRC_RECV_NUM_LATE_LOSS) { | ||
| 147 | dccp_rx_hist_entry_delete(hist, packet); | ||
| 148 | return 1; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | if (num_later < TFRC_RECV_NUM_LATE_LOSS) | ||
| 153 | dccp_rx_hist_add_entry(rx_list, packet); | ||
| 154 | /* | ||
| 155 | * FIXME: else what? should we destroy the packet | ||
| 156 | * like above? | ||
| 157 | */ | ||
| 158 | } | ||
| 159 | } | ||
| 160 | 125 | ||
| 161 | trim_history: | ||
| 162 | /* | ||
| 163 | * Trim history (remove all packets after the NUM_LATE_LOSS + 1 | ||
| 164 | * data packets) | ||
| 165 | */ | ||
| 166 | num_later = TFRC_RECV_NUM_LATE_LOSS + 1; | 126 | num_later = TFRC_RECV_NUM_LATE_LOSS + 1; |
| 167 | 127 | ||
| 168 | if (!list_empty(li_list)) { | 128 | if (!list_empty(li_list)) { |
| 169 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | 129 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { |
| 170 | if (num_later == 0) { | 130 | if (num_later == 0) { |
| 171 | list_del_init(&entry->dccphrx_node); | 131 | if (after48(nonloss_seqno, |
| 172 | dccp_rx_hist_entry_delete(hist, entry); | 132 | entry->dccphrx_seqno)) { |
| 133 | list_del_init(&entry->dccphrx_node); | ||
| 134 | dccp_rx_hist_entry_delete(hist, entry); | ||
| 135 | } | ||
| 173 | } else if (dccp_rx_hist_entry_data_packet(entry)) | 136 | } else if (dccp_rx_hist_entry_data_packet(entry)) |
| 174 | --num_later; | 137 | --num_later; |
| 175 | } | 138 | } |
| @@ -217,94 +180,10 @@ trim_history: | |||
| 217 | --num_later; | 180 | --num_later; |
| 218 | } | 181 | } |
| 219 | } | 182 | } |
| 220 | |||
| 221 | return 0; | ||
| 222 | } | 183 | } |
| 223 | 184 | ||
| 224 | EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); | 185 | EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); |
| 225 | 186 | ||
| 226 | u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, | ||
| 227 | struct list_head *li_list, u8 *win_loss) | ||
| 228 | { | ||
| 229 | struct dccp_rx_hist_entry *entry, *next, *packet; | ||
| 230 | struct dccp_rx_hist_entry *a_loss = NULL; | ||
| 231 | struct dccp_rx_hist_entry *b_loss = NULL; | ||
| 232 | u64 seq_loss = DCCP_MAX_SEQNO + 1; | ||
| 233 | u8 num_later = TFRC_RECV_NUM_LATE_LOSS; | ||
| 234 | |||
| 235 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | ||
| 236 | if (num_later == 0) { | ||
| 237 | b_loss = entry; | ||
| 238 | break; | ||
| 239 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
| 240 | --num_later; | ||
| 241 | } | ||
| 242 | |||
| 243 | if (b_loss == NULL) | ||
| 244 | goto out; | ||
| 245 | |||
| 246 | num_later = 1; | ||
| 247 | list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { | ||
| 248 | if (num_later == 0) { | ||
| 249 | a_loss = entry; | ||
| 250 | break; | ||
| 251 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
| 252 | --num_later; | ||
| 253 | } | ||
| 254 | |||
| 255 | if (a_loss == NULL) { | ||
| 256 | if (list_empty(li_list)) { | ||
| 257 | /* no loss event have occured yet */ | ||
| 258 | LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " | ||
| 259 | "comparing to initial seqno\n", | ||
| 260 | __FUNCTION__); | ||
| 261 | goto out; | ||
| 262 | } else { | ||
| 263 | LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", | ||
| 264 | __FUNCTION__); | ||
| 265 | goto out; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | /* Locate a lost data packet */ | ||
| 270 | entry = packet = b_loss; | ||
| 271 | list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { | ||
| 272 | u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, | ||
| 273 | packet->dccphrx_seqno); | ||
| 274 | |||
| 275 | if (delta != 0) { | ||
| 276 | if (dccp_rx_hist_entry_data_packet(packet)) | ||
| 277 | --delta; | ||
| 278 | /* | ||
| 279 | * FIXME: check this, probably this % usage is because | ||
| 280 | * in earlier drafts the ndp count was just 8 bits | ||
| 281 | * long, but now it cam be up to 24 bits long. | ||
| 282 | */ | ||
| 283 | #if 0 | ||
| 284 | if (delta % DCCP_NDP_LIMIT != | ||
| 285 | (packet->dccphrx_ndp - | ||
| 286 | entry->dccphrx_ndp) % DCCP_NDP_LIMIT) | ||
| 287 | #endif | ||
| 288 | if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { | ||
| 289 | seq_loss = entry->dccphrx_seqno; | ||
| 290 | dccp_inc_seqno(&seq_loss); | ||
| 291 | } | ||
| 292 | } | ||
| 293 | packet = entry; | ||
| 294 | if (packet == a_loss) | ||
| 295 | break; | ||
| 296 | } | ||
| 297 | out: | ||
| 298 | if (seq_loss != DCCP_MAX_SEQNO + 1) | ||
| 299 | *win_loss = a_loss->dccphrx_ccval; | ||
| 300 | else | ||
| 301 | *win_loss = 0; /* Paranoia */ | ||
| 302 | |||
| 303 | return seq_loss; | ||
| 304 | } | ||
| 305 | |||
| 306 | EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); | ||
| 307 | |||
| 308 | struct dccp_tx_hist *dccp_tx_hist_new(const char *name) | 187 | struct dccp_tx_hist *dccp_tx_hist_new(const char *name) |
| 309 | { | 188 | { |
| 310 | struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | 189 | struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); |
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index aea9c5d70910..067cf1c85a37 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h | |||
| @@ -166,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, | |||
| 166 | extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, | 166 | extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, |
| 167 | struct list_head *list); | 167 | struct list_head *list); |
| 168 | 168 | ||
| 169 | static inline void dccp_rx_hist_add_entry(struct list_head *list, | ||
| 170 | struct dccp_rx_hist_entry *entry) | ||
| 171 | { | ||
| 172 | list_add(&entry->dccphrx_node, list); | ||
| 173 | } | ||
| 174 | |||
| 175 | static inline struct dccp_rx_hist_entry * | 169 | static inline struct dccp_rx_hist_entry * |
| 176 | dccp_rx_hist_head(struct list_head *list) | 170 | dccp_rx_hist_head(struct list_head *list) |
| 177 | { | 171 | { |
| @@ -190,10 +184,11 @@ static inline int | |||
| 190 | entry->dccphrx_type == DCCP_PKT_DATAACK; | 184 | entry->dccphrx_type == DCCP_PKT_DATAACK; |
| 191 | } | 185 | } |
| 192 | 186 | ||
| 193 | extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, | 187 | extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, |
| 194 | struct list_head *rx_list, | 188 | struct list_head *rx_list, |
| 195 | struct list_head *li_list, | 189 | struct list_head *li_list, |
| 196 | struct dccp_rx_hist_entry *packet); | 190 | struct dccp_rx_hist_entry *packet, |
| 191 | u64 nonloss_seqno); | ||
| 197 | 192 | ||
| 198 | extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, | 193 | extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, |
| 199 | struct list_head *li_list, u8 *win_loss); | 194 | struct list_head *li_list, u8 *win_loss); |
