diff options
56 files changed, 36787 insertions, 0 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index abccb5dab9a..18f5a85b47c 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -497,6 +497,23 @@ config OCFS2_DEBUG_FS | |||
| 497 | this option for debugging only as it is likely to decrease | 497 | this option for debugging only as it is likely to decrease |
| 498 | performance of the filesystem. | 498 | performance of the filesystem. |
| 499 | 499 | ||
| 500 | config BTRFS_FS | ||
| 501 | tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" | ||
| 502 | depends on EXPERIMENTAL | ||
| 503 | select LIBCRC32C | ||
| 504 | help | ||
| 505 | Btrfs is a new filesystem with extents, writable snapshotting, | ||
| 506 | support for multiple devices and many more features. | ||
| 507 | |||
| 508 | Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET | ||
| 509 | FINALIZED. You should say N here unless you are interested in | ||
| 510 | testing Btrfs with non-critical data. | ||
| 511 | |||
| 512 | To compile this file system support as a module, choose M here. The | ||
| 513 | module will be called btrfs. | ||
| 514 | |||
| 515 | If unsure, say N. | ||
| 516 | |||
| 500 | endif # BLOCK | 517 | endif # BLOCK |
| 501 | 518 | ||
| 502 | config DNOTIFY | 519 | config DNOTIFY |
diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff1..41fcc858c97 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -121,4 +121,5 @@ obj-$(CONFIG_HOSTFS) += hostfs/ | |||
| 121 | obj-$(CONFIG_HPPFS) += hppfs/ | 121 | obj-$(CONFIG_HPPFS) += hppfs/ |
| 122 | obj-$(CONFIG_DEBUG_FS) += debugfs/ | 122 | obj-$(CONFIG_DEBUG_FS) += debugfs/ |
| 123 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ | 123 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ |
| 124 | obj-$(CONFIG_BTRFS_FS) += btrfs/ | ||
| 124 | obj-$(CONFIG_GFS2_FS) += gfs2/ | 125 | obj-$(CONFIG_GFS2_FS) += gfs2/ |
diff --git a/fs/btrfs/COPYING b/fs/btrfs/COPYING new file mode 100644 index 00000000000..ca442d313d8 --- /dev/null +++ b/fs/btrfs/COPYING | |||
| @@ -0,0 +1,356 @@ | |||
| 1 | |||
| 2 | NOTE! This copyright does *not* cover user programs that use kernel | ||
| 3 | services by normal system calls - this is merely considered normal use | ||
| 4 | of the kernel, and does *not* fall under the heading of "derived work". | ||
| 5 | Also note that the GPL below is copyrighted by the Free Software | ||
| 6 | Foundation, but the instance of code that it refers to (the Linux | ||
| 7 | kernel) is copyrighted by me and others who actually wrote it. | ||
| 8 | |||
| 9 | Also note that the only valid version of the GPL as far as the kernel | ||
| 10 | is concerned is _this_ particular version of the license (ie v2, not | ||
| 11 | v2.2 or v3.x or whatever), unless explicitly otherwise stated. | ||
| 12 | |||
| 13 | Linus Torvalds | ||
| 14 | |||
| 15 | ---------------------------------------- | ||
| 16 | |||
| 17 | GNU GENERAL PUBLIC LICENSE | ||
| 18 | Version 2, June 1991 | ||
| 19 | |||
| 20 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. | ||
| 21 | 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 22 | Everyone is permitted to copy and distribute verbatim copies | ||
| 23 | of this license document, but changing it is not allowed. | ||
| 24 | |||
| 25 | Preamble | ||
| 26 | |||
| 27 | The licenses for most software are designed to take away your | ||
| 28 | freedom to share and change it. By contrast, the GNU General Public | ||
| 29 | License is intended to guarantee your freedom to share and change free | ||
| 30 | software--to make sure the software is free for all its users. This | ||
| 31 | General Public License applies to most of the Free Software | ||
| 32 | Foundation's software and to any other program whose authors commit to | ||
| 33 | using it. (Some other Free Software Foundation software is covered by | ||
| 34 | the GNU Library General Public License instead.) You can apply it to | ||
| 35 | your programs, too. | ||
| 36 | |||
| 37 | When we speak of free software, we are referring to freedom, not | ||
| 38 | price. Our General Public Licenses are designed to make sure that you | ||
| 39 | have the freedom to distribute copies of free software (and charge for | ||
| 40 | this service if you wish), that you receive source code or can get it | ||
| 41 | if you want it, that you can change the software or use pieces of it | ||
| 42 | in new free programs; and that you know you can do these things. | ||
| 43 | |||
| 44 | To protect your rights, we need to make restrictions that forbid | ||
| 45 | anyone to deny you these rights or to ask you to surrender the rights. | ||
| 46 | These restrictions translate to certain responsibilities for you if you | ||
| 47 | distribute copies of the software, or if you modify it. | ||
| 48 | |||
| 49 | For example, if you distribute copies of such a program, whether | ||
| 50 | gratis or for a fee, you must give the recipients all the rights that | ||
| 51 | you have. You must make sure that they, too, receive or can get the | ||
| 52 | source code. And you must show them these terms so they know their | ||
| 53 | rights. | ||
| 54 | |||
| 55 | We protect your rights with two steps: (1) copyright the software, and | ||
| 56 | (2) offer you this license which gives you legal permission to copy, | ||
| 57 | distribute and/or modify the software. | ||
| 58 | |||
| 59 | Also, for each author's protection and ours, we want to make certain | ||
| 60 | that everyone understands that there is no warranty for this free | ||
| 61 | software. If the software is modified by someone else and passed on, we | ||
| 62 | want its recipients to know that what they have is not the original, so | ||
| 63 | that any problems introduced by others will not reflect on the original | ||
| 64 | authors' reputations. | ||
| 65 | |||
| 66 | Finally, any free program is threatened constantly by software | ||
| 67 | patents. We wish to avoid the danger that redistributors of a free | ||
| 68 | program will individually obtain patent licenses, in effect making the | ||
| 69 | program proprietary. To prevent this, we have made it clear that any | ||
| 70 | patent must be licensed for everyone's free use or not licensed at all. | ||
| 71 | |||
| 72 | The precise terms and conditions for copying, distribution and | ||
| 73 | modification follow. | ||
| 74 | |||
| 75 | GNU GENERAL PUBLIC LICENSE | ||
| 76 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION | ||
| 77 | |||
| 78 | 0. This License applies to any program or other work which contains | ||
| 79 | a notice placed by the copyright holder saying it may be distributed | ||
| 80 | under the terms of this General Public License. The "Program", below, | ||
| 81 | refers to any such program or work, and a "work based on the Program" | ||
| 82 | means either the Program or any derivative work under copyright law: | ||
| 83 | that is to say, a work containing the Program or a portion of it, | ||
| 84 | either verbatim or with modifications and/or translated into another | ||
| 85 | language. (Hereinafter, translation is included without limitation in | ||
| 86 | the term "modification".) Each licensee is addressed as "you". | ||
| 87 | |||
| 88 | Activities other than copying, distribution and modification are not | ||
| 89 | covered by this License; they are outside its scope. The act of | ||
| 90 | running the Program is not restricted, and the output from the Program | ||
| 91 | is covered only if its contents constitute a work based on the | ||
| 92 | Program (independent of having been made by running the Program). | ||
| 93 | Whether that is true depends on what the Program does. | ||
| 94 | |||
| 95 | 1. You may copy and distribute verbatim copies of the Program's | ||
| 96 | source code as you receive it, in any medium, provided that you | ||
| 97 | conspicuously and appropriately publish on each copy an appropriate | ||
| 98 | copyright notice and disclaimer of warranty; keep intact all the | ||
| 99 | notices that refer to this License and to the absence of any warranty; | ||
| 100 | and give any other recipients of the Program a copy of this License | ||
| 101 | along with the Program. | ||
| 102 | |||
| 103 | You may charge a fee for the physical act of transferring a copy, and | ||
| 104 | you may at your option offer warranty protection in exchange for a fee. | ||
| 105 | |||
| 106 | 2. You may modify your copy or copies of the Program or any portion | ||
| 107 | of it, thus forming a work based on the Program, and copy and | ||
| 108 | distribute such modifications or work under the terms of Section 1 | ||
| 109 | above, provided that you also meet all of these conditions: | ||
| 110 | |||
| 111 | a) You must cause the modified files to carry prominent notices | ||
| 112 | stating that you changed the files and the date of any change. | ||
| 113 | |||
| 114 | b) You must cause any work that you distribute or publish, that in | ||
| 115 | whole or in part contains or is derived from the Program or any | ||
| 116 | part thereof, to be licensed as a whole at no charge to all third | ||
| 117 | parties under the terms of this License. | ||
| 118 | |||
| 119 | c) If the modified program normally reads commands interactively | ||
| 120 | when run, you must cause it, when started running for such | ||
| 121 | interactive use in the most ordinary way, to print or display an | ||
| 122 | announcement including an appropriate copyright notice and a | ||
| 123 | notice that there is no warranty (or else, saying that you provide | ||
| 124 | a warranty) and that users may redistribute the program under | ||
| 125 | these conditions, and telling the user how to view a copy of this | ||
| 126 | License. (Exception: if the Program itself is interactive but | ||
| 127 | does not normally print such an announcement, your work based on | ||
| 128 | the Program is not required to print an announcement.) | ||
| 129 | |||
| 130 | These requirements apply to the modified work as a whole. If | ||
| 131 | identifiable sections of that work are not derived from the Program, | ||
| 132 | and can be reasonably considered independent and separate works in | ||
| 133 | themselves, then this License, and its terms, do not apply to those | ||
| 134 | sections when you distribute them as separate works. But when you | ||
| 135 | distribute the same sections as part of a whole which is a work based | ||
| 136 | on the Program, the distribution of the whole must be on the terms of | ||
| 137 | this License, whose permissions for other licensees extend to the | ||
| 138 | entire whole, and thus to each and every part regardless of who wrote it. | ||
| 139 | |||
| 140 | Thus, it is not the intent of this section to claim rights or contest | ||
| 141 | your rights to work written entirely by you; rather, the intent is to | ||
| 142 | exercise the right to control the distribution of derivative or | ||
| 143 | collective works based on the Program. | ||
| 144 | |||
| 145 | In addition, mere aggregation of another work not based on the Program | ||
| 146 | with the Program (or with a work based on the Program) on a volume of | ||
| 147 | a storage or distribution medium does not bring the other work under | ||
| 148 | the scope of this License. | ||
| 149 | |||
| 150 | 3. You may copy and distribute the Program (or a work based on it, | ||
| 151 | under Section 2) in object code or executable form under the terms of | ||
| 152 | Sections 1 and 2 above provided that you also do one of the following: | ||
| 153 | |||
| 154 | a) Accompany it with the complete corresponding machine-readable | ||
| 155 | source code, which must be distributed under the terms of Sections | ||
| 156 | 1 and 2 above on a medium customarily used for software interchange; or, | ||
| 157 | |||
| 158 | b) Accompany it with a written offer, valid for at least three | ||
| 159 | years, to give any third party, for a charge no more than your | ||
| 160 | cost of physically performing source distribution, a complete | ||
| 161 | machine-readable copy of the corresponding source code, to be | ||
| 162 | distributed under the terms of Sections 1 and 2 above on a medium | ||
| 163 | customarily used for software interchange; or, | ||
| 164 | |||
| 165 | c) Accompany it with the information you received as to the offer | ||
| 166 | to distribute corresponding source code. (This alternative is | ||
| 167 | allowed only for noncommercial distribution and only if you | ||
| 168 | received the program in object code or executable form with such | ||
| 169 | an offer, in accord with Subsection b above.) | ||
| 170 | |||
| 171 | The source code for a work means the preferred form of the work for | ||
| 172 | making modifications to it. For an executable work, complete source | ||
| 173 | code means all the source code for all modules it contains, plus any | ||
| 174 | associated interface definition files, plus the scripts used to | ||
| 175 | control compilation and installation of the executable. However, as a | ||
| 176 | special exception, the source code distributed need not include | ||
| 177 | anything that is normally distributed (in either source or binary | ||
| 178 | form) with the major components (compiler, kernel, and so on) of the | ||
| 179 | operating system on which the executable runs, unless that component | ||
| 180 | itself accompanies the executable. | ||
| 181 | |||
| 182 | If distribution of executable or object code is made by offering | ||
| 183 | access to copy from a designated place, then offering equivalent | ||
| 184 | access to copy the source code from the same place counts as | ||
| 185 | distribution of the source code, even though third parties are not | ||
| 186 | compelled to copy the source along with the object code. | ||
| 187 | |||
| 188 | 4. You may not copy, modify, sublicense, or distribute the Program | ||
| 189 | except as expressly provided under this License. Any attempt | ||
| 190 | otherwise to copy, modify, sublicense or distribute the Program is | ||
| 191 | void, and will automatically terminate your rights under this License. | ||
| 192 | However, parties who have received copies, or rights, from you under | ||
| 193 | this License will not have their licenses terminated so long as such | ||
| 194 | parties remain in full compliance. | ||
| 195 | |||
| 196 | 5. You are not required to accept this License, since you have not | ||
| 197 | signed it. However, nothing else grants you permission to modify or | ||
| 198 | distribute the Program or its derivative works. These actions are | ||
| 199 | prohibited by law if you do not accept this License. Therefore, by | ||
| 200 | modifying or distributing the Program (or any work based on the | ||
| 201 | Program), you indicate your acceptance of this License to do so, and | ||
| 202 | all its terms and conditions for copying, distributing or modifying | ||
| 203 | the Program or works based on it. | ||
| 204 | |||
| 205 | 6. Each time you redistribute the Program (or any work based on the | ||
| 206 | Program), the recipient automatically receives a license from the | ||
| 207 | original licensor to copy, distribute or modify the Program subject to | ||
| 208 | these terms and conditions. You may not impose any further | ||
| 209 | restrictions on the recipients' exercise of the rights granted herein. | ||
| 210 | You are not responsible for enforcing compliance by third parties to | ||
| 211 | this License. | ||
| 212 | |||
| 213 | 7. If, as a consequence of a court judgment or allegation of patent | ||
| 214 | infringement or for any other reason (not limited to patent issues), | ||
| 215 | conditions are imposed on you (whether by court order, agreement or | ||
| 216 | otherwise) that contradict the conditions of this License, they do not | ||
| 217 | excuse you from the conditions of this License. If you cannot | ||
| 218 | distribute so as to satisfy simultaneously your obligations under this | ||
| 219 | License and any other pertinent obligations, then as a consequence you | ||
| 220 | may not distribute the Program at all. For example, if a patent | ||
| 221 | license would not permit royalty-free redistribution of the Program by | ||
| 222 | all those who receive copies directly or indirectly through you, then | ||
| 223 | the only way you could satisfy both it and this License would be to | ||
| 224 | refrain entirely from distribution of the Program. | ||
| 225 | |||
| 226 | If any portion of this section is held invalid or unenforceable under | ||
| 227 | any particular circumstance, the balance of the section is intended to | ||
| 228 | apply and the section as a whole is intended to apply in other | ||
| 229 | circumstances. | ||
| 230 | |||
| 231 | It is not the purpose of this section to induce you to infringe any | ||
| 232 | patents or other property right claims or to contest validity of any | ||
| 233 | such claims; this section has the sole purpose of protecting the | ||
| 234 | integrity of the free software distribution system, which is | ||
| 235 | implemented by public license practices. Many people have made | ||
| 236 | generous contributions to the wide range of software distributed | ||
| 237 | through that system in reliance on consistent application of that | ||
| 238 | system; it is up to the author/donor to decide if he or she is willing | ||
| 239 | to distribute software through any other system and a licensee cannot | ||
| 240 | impose that choice. | ||
| 241 | |||
| 242 | This section is intended to make thoroughly clear what is believed to | ||
| 243 | be a consequence of the rest of this License. | ||
| 244 | |||
| 245 | 8. If the distribution and/or use of the Program is restricted in | ||
| 246 | certain countries either by patents or by copyrighted interfaces, the | ||
| 247 | original copyright holder who places the Program under this License | ||
| 248 | may add an explicit geographical distribution limitation excluding | ||
| 249 | those countries, so that distribution is permitted only in or among | ||
| 250 | countries not thus excluded. In such case, this License incorporates | ||
| 251 | the limitation as if written in the body of this License. | ||
| 252 | |||
| 253 | 9. The Free Software Foundation may publish revised and/or new versions | ||
| 254 | of the General Public License from time to time. Such new versions will | ||
| 255 | be similar in spirit to the present version, but may differ in detail to | ||
| 256 | address new problems or concerns. | ||
| 257 | |||
| 258 | Each version is given a distinguishing version number. If the Program | ||
| 259 | specifies a version number of this License which applies to it and "any | ||
| 260 | later version", you have the option of following the terms and conditions | ||
| 261 | either of that version or of any later version published by the Free | ||
| 262 | Software Foundation. If the Program does not specify a version number of | ||
| 263 | this License, you may choose any version ever published by the Free Software | ||
| 264 | Foundation. | ||
| 265 | |||
| 266 | 10. If you wish to incorporate parts of the Program into other free | ||
| 267 | programs whose distribution conditions are different, write to the author | ||
| 268 | to ask for permission. For software which is copyrighted by the Free | ||
| 269 | Software Foundation, write to the Free Software Foundation; we sometimes | ||
| 270 | make exceptions for this. Our decision will be guided by the two goals | ||
| 271 | of preserving the free status of all derivatives of our free software and | ||
| 272 | of promoting the sharing and reuse of software generally. | ||
| 273 | |||
| 274 | NO WARRANTY | ||
| 275 | |||
| 276 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY | ||
| 277 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN | ||
| 278 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES | ||
| 279 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED | ||
| 280 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 281 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS | ||
| 282 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE | ||
| 283 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, | ||
| 284 | REPAIR OR CORRECTION. | ||
| 285 | |||
| 286 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | ||
| 287 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR | ||
| 288 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, | ||
| 289 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING | ||
| 290 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED | ||
| 291 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY | ||
| 292 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER | ||
| 293 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE | ||
| 294 | POSSIBILITY OF SUCH DAMAGES. | ||
| 295 | |||
| 296 | END OF TERMS AND CONDITIONS | ||
| 297 | |||
| 298 | How to Apply These Terms to Your New Programs | ||
| 299 | |||
| 300 | If you develop a new program, and you want it to be of the greatest | ||
| 301 | possible use to the public, the best way to achieve this is to make it | ||
| 302 | free software which everyone can redistribute and change under these terms. | ||
| 303 | |||
| 304 | To do so, attach the following notices to the program. It is safest | ||
| 305 | to attach them to the start of each source file to most effectively | ||
| 306 | convey the exclusion of warranty; and each file should have at least | ||
| 307 | the "copyright" line and a pointer to where the full notice is found. | ||
| 308 | |||
| 309 | <one line to give the program's name and a brief idea of what it does.> | ||
| 310 | Copyright (C) <year> <name of author> | ||
| 311 | |||
| 312 | This program is free software; you can redistribute it and/or modify | ||
| 313 | it under the terms of the GNU General Public License as published by | ||
| 314 | the Free Software Foundation; either version 2 of the License, or | ||
| 315 | (at your option) any later version. | ||
| 316 | |||
| 317 | This program is distributed in the hope that it will be useful, | ||
| 318 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 319 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 320 | GNU General Public License for more details. | ||
| 321 | |||
| 322 | You should have received a copy of the GNU General Public License | ||
| 323 | along with this program; if not, write to the Free Software | ||
| 324 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 325 | |||
| 326 | |||
| 327 | Also add information on how to contact you by electronic and paper mail. | ||
| 328 | |||
| 329 | If the program is interactive, make it output a short notice like this | ||
| 330 | when it starts in an interactive mode: | ||
| 331 | |||
| 332 | Gnomovision version 69, Copyright (C) year name of author | ||
| 333 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. | ||
| 334 | This is free software, and you are welcome to redistribute it | ||
| 335 | under certain conditions; type `show c' for details. | ||
| 336 | |||
| 337 | The hypothetical commands `show w' and `show c' should show the appropriate | ||
| 338 | parts of the General Public License. Of course, the commands you use may | ||
| 339 | be called something other than `show w' and `show c'; they could even be | ||
| 340 | mouse-clicks or menu items--whatever suits your program. | ||
| 341 | |||
| 342 | You should also get your employer (if you work as a programmer) or your | ||
| 343 | school, if any, to sign a "copyright disclaimer" for the program, if | ||
| 344 | necessary. Here is a sample; alter the names: | ||
| 345 | |||
| 346 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program | ||
| 347 | `Gnomovision' (which makes passes at compilers) written by James Hacker. | ||
| 348 | |||
| 349 | <signature of Ty Coon>, 1 April 1989 | ||
| 350 | Ty Coon, President of Vice | ||
| 351 | |||
| 352 | This General Public License does not permit incorporating your program into | ||
| 353 | proprietary programs. If your program is a subroutine library, you may | ||
| 354 | consider it more useful to permit linking proprietary applications with the | ||
| 355 | library. If this is what you want to do, use the GNU Library General | ||
| 356 | Public License instead of this License. | ||
diff --git a/fs/btrfs/INSTALL b/fs/btrfs/INSTALL new file mode 100644 index 00000000000..16b45a56878 --- /dev/null +++ b/fs/btrfs/INSTALL | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | Install Instructions | ||
| 2 | |||
| 3 | Btrfs puts snapshots and subvolumes into the root directory of the FS. This | ||
| 4 | directory can only be changed by btrfsctl right now, and normal filesystem | ||
| 5 | operations do not work on it. The default subvolume is called 'default', | ||
| 6 | and you can create files and directories in mount_point/default | ||
| 7 | |||
| 8 | Btrfs uses libcrc32c in the kernel for file and metadata checksums. You need | ||
| 9 | to compile the kernel with: | ||
| 10 | |||
| 11 | CONFIG_LIBCRC32C=m | ||
| 12 | |||
| 13 | libcrc32c can be static as well. Once your kernel is setup, typing make in the | ||
| 14 | btrfs module sources will build against the running kernel. When the build is | ||
| 15 | complete: | ||
| 16 | |||
| 17 | modprobe libcrc32c | ||
| 18 | insmod btrfs.ko | ||
| 19 | |||
| 20 | The Btrfs utility programs require libuuid to build. This can be found | ||
| 21 | in the e2fsprogs sources, and is usually available as libuuid or | ||
| 22 | e2fsprogs-devel from various distros. | ||
| 23 | |||
| 24 | Building the utilities is just make ; make install. The programs go | ||
| 25 | into /usr/local/bin. The commands available are: | ||
| 26 | |||
| 27 | mkfs.btrfs: create a filesystem | ||
| 28 | |||
| 29 | btrfsctl: control program to create snapshots and subvolumes: | ||
| 30 | |||
| 31 | mount /dev/sda2 /mnt | ||
| 32 | btrfsctl -s new_subvol_name /mnt | ||
| 33 | btrfsctl -s snapshot_of_default /mnt/default | ||
| 34 | btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name | ||
| 35 | btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol | ||
| 36 | ls /mnt | ||
| 37 | default snapshot_of_a_snapshot snapshot_of_new_subvol | ||
| 38 | new_subvol_name snapshot_of_default | ||
| 39 | |||
| 40 | Snapshots and subvolumes cannot be deleted right now, but you can | ||
| 41 | rm -rf all the files and directories inside them. | ||
| 42 | |||
| 43 | btrfsck: do a limited check of the FS extent trees.</li> | ||
| 44 | |||
| 45 | debug-tree: print all of the FS metadata in text form. Example: | ||
| 46 | |||
| 47 | debug-tree /dev/sda2 >& big_output_file | ||
| 48 | |||
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile new file mode 100644 index 00000000000..7125716e142 --- /dev/null +++ b/fs/btrfs/Makefile | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | ifneq ($(KERNELRELEASE),) | ||
| 2 | # kbuild part of makefile | ||
| 3 | |||
| 4 | obj-$(CONFIG_BTRFS_FS) := btrfs.o | ||
| 5 | btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | ||
| 6 | file-item.o inode-item.o inode-map.o disk-io.o \ | ||
| 7 | transaction.o inode.o file.o tree-defrag.o \ | ||
| 8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | ||
| 9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | ||
| 10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o | ||
| 11 | else | ||
| 12 | |||
| 13 | # Normal Makefile | ||
| 14 | |||
| 15 | KERNELDIR := /lib/modules/`uname -r`/build | ||
| 16 | all: | ||
| 17 | $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules | ||
| 18 | |||
| 19 | modules_install: | ||
| 20 | $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install | ||
| 21 | clean: | ||
| 22 | $(MAKE) -C $(KERNELDIR) M=`pwd` clean | ||
| 23 | |||
| 24 | endif | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c new file mode 100644 index 00000000000..867eaf1f8ef --- /dev/null +++ b/fs/btrfs/acl.c | |||
| @@ -0,0 +1,352 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Red Hat. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/fs.h> | ||
| 20 | #include <linux/string.h> | ||
| 21 | #include <linux/xattr.h> | ||
| 22 | #include <linux/posix_acl_xattr.h> | ||
| 23 | #include <linux/posix_acl.h> | ||
| 24 | #include <linux/sched.h> | ||
| 25 | |||
| 26 | #include "ctree.h" | ||
| 27 | #include "btrfs_inode.h" | ||
| 28 | #include "xattr.h" | ||
| 29 | |||
| 30 | #ifdef CONFIG_FS_POSIX_ACL | ||
| 31 | |||
| 32 | static void btrfs_update_cached_acl(struct inode *inode, | ||
| 33 | struct posix_acl **p_acl, | ||
| 34 | struct posix_acl *acl) | ||
| 35 | { | ||
| 36 | spin_lock(&inode->i_lock); | ||
| 37 | if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED) | ||
| 38 | posix_acl_release(*p_acl); | ||
| 39 | *p_acl = posix_acl_dup(acl); | ||
| 40 | spin_unlock(&inode->i_lock); | ||
| 41 | } | ||
| 42 | |||
| 43 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | ||
| 44 | { | ||
| 45 | int size; | ||
| 46 | const char *name; | ||
| 47 | char *value = NULL; | ||
| 48 | struct posix_acl *acl = NULL, **p_acl; | ||
| 49 | |||
| 50 | switch (type) { | ||
| 51 | case ACL_TYPE_ACCESS: | ||
| 52 | name = POSIX_ACL_XATTR_ACCESS; | ||
| 53 | p_acl = &BTRFS_I(inode)->i_acl; | ||
| 54 | break; | ||
| 55 | case ACL_TYPE_DEFAULT: | ||
| 56 | name = POSIX_ACL_XATTR_DEFAULT; | ||
| 57 | p_acl = &BTRFS_I(inode)->i_default_acl; | ||
| 58 | break; | ||
| 59 | default: | ||
| 60 | return ERR_PTR(-EINVAL); | ||
| 61 | } | ||
| 62 | |||
| 63 | spin_lock(&inode->i_lock); | ||
| 64 | if (*p_acl != BTRFS_ACL_NOT_CACHED) | ||
| 65 | acl = posix_acl_dup(*p_acl); | ||
| 66 | spin_unlock(&inode->i_lock); | ||
| 67 | |||
| 68 | if (acl) | ||
| 69 | return acl; | ||
| 70 | |||
| 71 | |||
| 72 | size = __btrfs_getxattr(inode, name, "", 0); | ||
| 73 | if (size > 0) { | ||
| 74 | value = kzalloc(size, GFP_NOFS); | ||
| 75 | if (!value) | ||
| 76 | return ERR_PTR(-ENOMEM); | ||
| 77 | size = __btrfs_getxattr(inode, name, value, size); | ||
| 78 | if (size > 0) { | ||
| 79 | acl = posix_acl_from_xattr(value, size); | ||
| 80 | btrfs_update_cached_acl(inode, p_acl, acl); | ||
| 81 | } | ||
| 82 | kfree(value); | ||
| 83 | } else if (size == -ENOENT) { | ||
| 84 | acl = NULL; | ||
| 85 | btrfs_update_cached_acl(inode, p_acl, acl); | ||
| 86 | } | ||
| 87 | |||
| 88 | return acl; | ||
| 89 | } | ||
| 90 | |||
| 91 | static int btrfs_xattr_get_acl(struct inode *inode, int type, | ||
| 92 | void *value, size_t size) | ||
| 93 | { | ||
| 94 | struct posix_acl *acl; | ||
| 95 | int ret = 0; | ||
| 96 | |||
| 97 | acl = btrfs_get_acl(inode, type); | ||
| 98 | |||
| 99 | if (IS_ERR(acl)) | ||
| 100 | return PTR_ERR(acl); | ||
| 101 | if (acl == NULL) | ||
| 102 | return -ENODATA; | ||
| 103 | ret = posix_acl_to_xattr(acl, value, size); | ||
| 104 | posix_acl_release(acl); | ||
| 105 | |||
| 106 | return ret; | ||
| 107 | } | ||
| 108 | |||
| 109 | /* | ||
| 110 | * Needs to be called with fs_mutex held | ||
| 111 | */ | ||
| 112 | static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | ||
| 113 | { | ||
| 114 | int ret, size = 0; | ||
| 115 | const char *name; | ||
| 116 | struct posix_acl **p_acl; | ||
| 117 | char *value = NULL; | ||
| 118 | mode_t mode; | ||
| 119 | |||
| 120 | if (acl) { | ||
| 121 | ret = posix_acl_valid(acl); | ||
| 122 | if (ret < 0) | ||
| 123 | return ret; | ||
| 124 | ret = 0; | ||
| 125 | } | ||
| 126 | |||
| 127 | switch (type) { | ||
| 128 | case ACL_TYPE_ACCESS: | ||
| 129 | mode = inode->i_mode; | ||
| 130 | ret = posix_acl_equiv_mode(acl, &mode); | ||
| 131 | if (ret < 0) | ||
| 132 | return ret; | ||
| 133 | ret = 0; | ||
| 134 | inode->i_mode = mode; | ||
| 135 | name = POSIX_ACL_XATTR_ACCESS; | ||
| 136 | p_acl = &BTRFS_I(inode)->i_acl; | ||
| 137 | break; | ||
| 138 | case ACL_TYPE_DEFAULT: | ||
| 139 | if (!S_ISDIR(inode->i_mode)) | ||
| 140 | return acl ? -EINVAL : 0; | ||
| 141 | name = POSIX_ACL_XATTR_DEFAULT; | ||
| 142 | p_acl = &BTRFS_I(inode)->i_default_acl; | ||
| 143 | break; | ||
| 144 | default: | ||
| 145 | return -EINVAL; | ||
| 146 | } | ||
| 147 | |||
| 148 | if (acl) { | ||
| 149 | size = posix_acl_xattr_size(acl->a_count); | ||
| 150 | value = kmalloc(size, GFP_NOFS); | ||
| 151 | if (!value) { | ||
| 152 | ret = -ENOMEM; | ||
| 153 | goto out; | ||
| 154 | } | ||
| 155 | |||
| 156 | ret = posix_acl_to_xattr(acl, value, size); | ||
| 157 | if (ret < 0) | ||
| 158 | goto out; | ||
| 159 | } | ||
| 160 | |||
| 161 | ret = __btrfs_setxattr(inode, name, value, size, 0); | ||
| 162 | |||
| 163 | out: | ||
| 164 | if (value) | ||
| 165 | kfree(value); | ||
| 166 | |||
| 167 | if (!ret) | ||
| 168 | btrfs_update_cached_acl(inode, p_acl, acl); | ||
| 169 | |||
| 170 | return ret; | ||
| 171 | } | ||
| 172 | |||
| 173 | static int btrfs_xattr_set_acl(struct inode *inode, int type, | ||
| 174 | const void *value, size_t size) | ||
| 175 | { | ||
| 176 | int ret = 0; | ||
| 177 | struct posix_acl *acl = NULL; | ||
| 178 | |||
| 179 | if (value) { | ||
| 180 | acl = posix_acl_from_xattr(value, size); | ||
| 181 | if (acl == NULL) { | ||
| 182 | value = NULL; | ||
| 183 | size = 0; | ||
| 184 | } else if (IS_ERR(acl)) { | ||
| 185 | return PTR_ERR(acl); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | ret = btrfs_set_acl(inode, acl, type); | ||
| 190 | |||
| 191 | posix_acl_release(acl); | ||
| 192 | |||
| 193 | return ret; | ||
| 194 | } | ||
| 195 | |||
| 196 | |||
| 197 | static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name, | ||
| 198 | void *value, size_t size) | ||
| 199 | { | ||
| 200 | return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size); | ||
| 201 | } | ||
| 202 | |||
| 203 | static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name, | ||
| 204 | const void *value, size_t size, int flags) | ||
| 205 | { | ||
| 206 | return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); | ||
| 207 | } | ||
| 208 | |||
| 209 | static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, | ||
| 210 | void *value, size_t size) | ||
| 211 | { | ||
| 212 | return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size); | ||
| 213 | } | ||
| 214 | |||
| 215 | static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, | ||
| 216 | const void *value, size_t size, int flags) | ||
| 217 | { | ||
| 218 | return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); | ||
| 219 | } | ||
| 220 | |||
| 221 | int btrfs_check_acl(struct inode *inode, int mask) | ||
| 222 | { | ||
| 223 | struct posix_acl *acl; | ||
| 224 | int error = -EAGAIN; | ||
| 225 | |||
| 226 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); | ||
| 227 | |||
| 228 | if (IS_ERR(acl)) | ||
| 229 | return PTR_ERR(acl); | ||
| 230 | if (acl) { | ||
| 231 | error = posix_acl_permission(inode, acl, mask); | ||
| 232 | posix_acl_release(acl); | ||
| 233 | } | ||
| 234 | |||
| 235 | return error; | ||
| 236 | } | ||
| 237 | |||
| 238 | /* | ||
| 239 | * btrfs_init_acl is already generally called under fs_mutex, so the locking | ||
| 240 | * stuff has been fixed to work with that. If the locking stuff changes, we | ||
| 241 | * need to re-evaluate the acl locking stuff. | ||
| 242 | */ | ||
| 243 | int btrfs_init_acl(struct inode *inode, struct inode *dir) | ||
| 244 | { | ||
| 245 | struct posix_acl *acl = NULL; | ||
| 246 | int ret = 0; | ||
| 247 | |||
| 248 | /* this happens with subvols */ | ||
| 249 | if (!dir) | ||
| 250 | return 0; | ||
| 251 | |||
| 252 | if (!S_ISLNK(inode->i_mode)) { | ||
| 253 | if (IS_POSIXACL(dir)) { | ||
| 254 | acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT); | ||
| 255 | if (IS_ERR(acl)) | ||
| 256 | return PTR_ERR(acl); | ||
| 257 | } | ||
| 258 | |||
| 259 | if (!acl) | ||
| 260 | inode->i_mode &= ~current->fs->umask; | ||
| 261 | } | ||
| 262 | |||
| 263 | if (IS_POSIXACL(dir) && acl) { | ||
| 264 | struct posix_acl *clone; | ||
| 265 | mode_t mode; | ||
| 266 | |||
| 267 | if (S_ISDIR(inode->i_mode)) { | ||
| 268 | ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); | ||
| 269 | if (ret) | ||
| 270 | goto failed; | ||
| 271 | } | ||
| 272 | clone = posix_acl_clone(acl, GFP_NOFS); | ||
| 273 | ret = -ENOMEM; | ||
| 274 | if (!clone) | ||
| 275 | goto failed; | ||
| 276 | |||
| 277 | mode = inode->i_mode; | ||
| 278 | ret = posix_acl_create_masq(clone, &mode); | ||
| 279 | if (ret >= 0) { | ||
| 280 | inode->i_mode = mode; | ||
| 281 | if (ret > 0) { | ||
| 282 | /* we need an acl */ | ||
| 283 | ret = btrfs_set_acl(inode, clone, | ||
| 284 | ACL_TYPE_ACCESS); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | } | ||
| 288 | failed: | ||
| 289 | posix_acl_release(acl); | ||
| 290 | |||
| 291 | return ret; | ||
| 292 | } | ||
| 293 | |||
| 294 | int btrfs_acl_chmod(struct inode *inode) | ||
| 295 | { | ||
| 296 | struct posix_acl *acl, *clone; | ||
| 297 | int ret = 0; | ||
| 298 | |||
| 299 | if (S_ISLNK(inode->i_mode)) | ||
| 300 | return -EOPNOTSUPP; | ||
| 301 | |||
| 302 | if (!IS_POSIXACL(inode)) | ||
| 303 | return 0; | ||
| 304 | |||
| 305 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); | ||
| 306 | if (IS_ERR(acl) || !acl) | ||
| 307 | return PTR_ERR(acl); | ||
| 308 | |||
| 309 | clone = posix_acl_clone(acl, GFP_KERNEL); | ||
| 310 | posix_acl_release(acl); | ||
| 311 | if (!clone) | ||
| 312 | return -ENOMEM; | ||
| 313 | |||
| 314 | ret = posix_acl_chmod_masq(clone, inode->i_mode); | ||
| 315 | if (!ret) | ||
| 316 | ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); | ||
| 317 | |||
| 318 | posix_acl_release(clone); | ||
| 319 | |||
| 320 | return ret; | ||
| 321 | } | ||
| 322 | |||
| 323 | struct xattr_handler btrfs_xattr_acl_default_handler = { | ||
| 324 | .prefix = POSIX_ACL_XATTR_DEFAULT, | ||
| 325 | .get = btrfs_xattr_acl_default_get, | ||
| 326 | .set = btrfs_xattr_acl_default_set, | ||
| 327 | }; | ||
| 328 | |||
| 329 | struct xattr_handler btrfs_xattr_acl_access_handler = { | ||
| 330 | .prefix = POSIX_ACL_XATTR_ACCESS, | ||
| 331 | .get = btrfs_xattr_acl_access_get, | ||
| 332 | .set = btrfs_xattr_acl_access_set, | ||
| 333 | }; | ||
| 334 | |||
| 335 | #else /* CONFIG_FS_POSIX_ACL */ | ||
| 336 | |||
| 337 | int btrfs_acl_chmod(struct inode *inode) | ||
| 338 | { | ||
| 339 | return 0; | ||
| 340 | } | ||
| 341 | |||
| 342 | int btrfs_init_acl(struct inode *inode, struct inode *dir) | ||
| 343 | { | ||
| 344 | return 0; | ||
| 345 | } | ||
| 346 | |||
| 347 | int btrfs_check_acl(struct inode *inode, int mask) | ||
| 348 | { | ||
| 349 | return 0; | ||
| 350 | } | ||
| 351 | |||
| 352 | #endif /* CONFIG_FS_POSIX_ACL */ | ||
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c new file mode 100644 index 00000000000..d82efd722a4 --- /dev/null +++ b/fs/btrfs/async-thread.c | |||
| @@ -0,0 +1,357 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/version.h> | ||
| 20 | #include <linux/kthread.h> | ||
| 21 | #include <linux/list.h> | ||
| 22 | #include <linux/spinlock.h> | ||
| 23 | # include <linux/freezer.h> | ||
| 24 | #include "async-thread.h" | ||
| 25 | |||
| 26 | /* | ||
| 27 | * container for the kthread task pointer and the list of pending work | ||
| 28 | * One of these is allocated per thread. | ||
| 29 | */ | ||
| 30 | struct btrfs_worker_thread { | ||
| 31 | /* pool we belong to */ | ||
| 32 | struct btrfs_workers *workers; | ||
| 33 | |||
| 34 | /* list of struct btrfs_work that are waiting for service */ | ||
| 35 | struct list_head pending; | ||
| 36 | |||
| 37 | /* list of worker threads from struct btrfs_workers */ | ||
| 38 | struct list_head worker_list; | ||
| 39 | |||
| 40 | /* kthread */ | ||
| 41 | struct task_struct *task; | ||
| 42 | |||
| 43 | /* number of things on the pending list */ | ||
| 44 | atomic_t num_pending; | ||
| 45 | |||
| 46 | unsigned long sequence; | ||
| 47 | |||
| 48 | /* protects the pending list. */ | ||
| 49 | spinlock_t lock; | ||
| 50 | |||
| 51 | /* set to non-zero when this thread is already awake and kicking */ | ||
| 52 | int working; | ||
| 53 | |||
| 54 | /* are we currently idle */ | ||
| 55 | int idle; | ||
| 56 | }; | ||
| 57 | |||
| 58 | /* | ||
| 59 | * helper function to move a thread onto the idle list after it | ||
| 60 | * has finished some requests. | ||
| 61 | */ | ||
| 62 | static void check_idle_worker(struct btrfs_worker_thread *worker) | ||
| 63 | { | ||
| 64 | if (!worker->idle && atomic_read(&worker->num_pending) < | ||
| 65 | worker->workers->idle_thresh / 2) { | ||
| 66 | unsigned long flags; | ||
| 67 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 68 | worker->idle = 1; | ||
| 69 | list_move(&worker->worker_list, &worker->workers->idle_list); | ||
| 70 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | /* | ||
| 75 | * helper function to move a thread off the idle list after new | ||
| 76 | * pending work is added. | ||
| 77 | */ | ||
| 78 | static void check_busy_worker(struct btrfs_worker_thread *worker) | ||
| 79 | { | ||
| 80 | if (worker->idle && atomic_read(&worker->num_pending) >= | ||
| 81 | worker->workers->idle_thresh) { | ||
| 82 | unsigned long flags; | ||
| 83 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 84 | worker->idle = 0; | ||
| 85 | list_move_tail(&worker->worker_list, | ||
| 86 | &worker->workers->worker_list); | ||
| 87 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | /* | ||
| 92 | * main loop for servicing work items | ||
| 93 | */ | ||
| 94 | static int worker_loop(void *arg) | ||
| 95 | { | ||
| 96 | struct btrfs_worker_thread *worker = arg; | ||
| 97 | struct list_head *cur; | ||
| 98 | struct btrfs_work *work; | ||
| 99 | do { | ||
| 100 | spin_lock_irq(&worker->lock); | ||
| 101 | while(!list_empty(&worker->pending)) { | ||
| 102 | cur = worker->pending.next; | ||
| 103 | work = list_entry(cur, struct btrfs_work, list); | ||
| 104 | list_del(&work->list); | ||
| 105 | clear_bit(0, &work->flags); | ||
| 106 | |||
| 107 | work->worker = worker; | ||
| 108 | spin_unlock_irq(&worker->lock); | ||
| 109 | |||
| 110 | work->func(work); | ||
| 111 | |||
| 112 | atomic_dec(&worker->num_pending); | ||
| 113 | spin_lock_irq(&worker->lock); | ||
| 114 | check_idle_worker(worker); | ||
| 115 | } | ||
| 116 | worker->working = 0; | ||
| 117 | if (freezing(current)) { | ||
| 118 | refrigerator(); | ||
| 119 | } else { | ||
| 120 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 121 | spin_unlock_irq(&worker->lock); | ||
| 122 | schedule(); | ||
| 123 | __set_current_state(TASK_RUNNING); | ||
| 124 | } | ||
| 125 | } while (!kthread_should_stop()); | ||
| 126 | return 0; | ||
| 127 | } | ||
| 128 | |||
| 129 | /* | ||
| 130 | * this will wait for all the worker threads to shutdown | ||
| 131 | */ | ||
| 132 | int btrfs_stop_workers(struct btrfs_workers *workers) | ||
| 133 | { | ||
| 134 | struct list_head *cur; | ||
| 135 | struct btrfs_worker_thread *worker; | ||
| 136 | |||
| 137 | list_splice_init(&workers->idle_list, &workers->worker_list); | ||
| 138 | while(!list_empty(&workers->worker_list)) { | ||
| 139 | cur = workers->worker_list.next; | ||
| 140 | worker = list_entry(cur, struct btrfs_worker_thread, | ||
| 141 | worker_list); | ||
| 142 | kthread_stop(worker->task); | ||
| 143 | list_del(&worker->worker_list); | ||
| 144 | kfree(worker); | ||
| 145 | } | ||
| 146 | return 0; | ||
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * simple init on struct btrfs_workers | ||
| 151 | */ | ||
| 152 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | ||
| 153 | { | ||
| 154 | workers->num_workers = 0; | ||
| 155 | INIT_LIST_HEAD(&workers->worker_list); | ||
| 156 | INIT_LIST_HEAD(&workers->idle_list); | ||
| 157 | spin_lock_init(&workers->lock); | ||
| 158 | workers->max_workers = max; | ||
| 159 | workers->idle_thresh = 32; | ||
| 160 | workers->name = name; | ||
| 161 | } | ||
| 162 | |||
| 163 | /* | ||
| 164 | * starts new worker threads. This does not enforce the max worker | ||
| 165 | * count in case you need to temporarily go past it. | ||
| 166 | */ | ||
| 167 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | ||
| 168 | { | ||
| 169 | struct btrfs_worker_thread *worker; | ||
| 170 | int ret = 0; | ||
| 171 | int i; | ||
| 172 | |||
| 173 | for (i = 0; i < num_workers; i++) { | ||
| 174 | worker = kzalloc(sizeof(*worker), GFP_NOFS); | ||
| 175 | if (!worker) { | ||
| 176 | ret = -ENOMEM; | ||
| 177 | goto fail; | ||
| 178 | } | ||
| 179 | |||
| 180 | INIT_LIST_HEAD(&worker->pending); | ||
| 181 | INIT_LIST_HEAD(&worker->worker_list); | ||
| 182 | spin_lock_init(&worker->lock); | ||
| 183 | atomic_set(&worker->num_pending, 0); | ||
| 184 | worker->task = kthread_run(worker_loop, worker, | ||
| 185 | "btrfs-%s-%d", workers->name, | ||
| 186 | workers->num_workers + i); | ||
| 187 | worker->workers = workers; | ||
| 188 | if (IS_ERR(worker->task)) { | ||
| 189 | kfree(worker); | ||
| 190 | ret = PTR_ERR(worker->task); | ||
| 191 | goto fail; | ||
| 192 | } | ||
| 193 | |||
| 194 | spin_lock_irq(&workers->lock); | ||
| 195 | list_add_tail(&worker->worker_list, &workers->idle_list); | ||
| 196 | worker->idle = 1; | ||
| 197 | workers->num_workers++; | ||
| 198 | spin_unlock_irq(&workers->lock); | ||
| 199 | } | ||
| 200 | return 0; | ||
| 201 | fail: | ||
| 202 | btrfs_stop_workers(workers); | ||
| 203 | return ret; | ||
| 204 | } | ||
| 205 | |||
| 206 | /* | ||
| 207 | * run through the list and find a worker thread that doesn't have a lot | ||
| 208 | * to do right now. This can return null if we aren't yet at the thread | ||
| 209 | * count limit and all of the threads are busy. | ||
| 210 | */ | ||
| 211 | static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | ||
| 212 | { | ||
| 213 | struct btrfs_worker_thread *worker; | ||
| 214 | struct list_head *next; | ||
| 215 | int enforce_min = workers->num_workers < workers->max_workers; | ||
| 216 | |||
| 217 | /* | ||
| 218 | * if we find an idle thread, don't move it to the end of the | ||
| 219 | * idle list. This improves the chance that the next submission | ||
| 220 | * will reuse the same thread, and maybe catch it while it is still | ||
| 221 | * working | ||
| 222 | */ | ||
| 223 | if (!list_empty(&workers->idle_list)) { | ||
| 224 | next = workers->idle_list.next; | ||
| 225 | worker = list_entry(next, struct btrfs_worker_thread, | ||
| 226 | worker_list); | ||
| 227 | return worker; | ||
| 228 | } | ||
| 229 | if (enforce_min || list_empty(&workers->worker_list)) | ||
| 230 | return NULL; | ||
| 231 | |||
| 232 | /* | ||
| 233 | * if we pick a busy task, move the task to the end of the list. | ||
| 234 | * hopefully this will keep things somewhat evenly balanced. | ||
| 235 | * Do the move in batches based on the sequence number. This groups | ||
| 236 | * requests submitted at roughly the same time onto the same worker. | ||
| 237 | */ | ||
| 238 | next = workers->worker_list.next; | ||
| 239 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | ||
| 240 | atomic_inc(&worker->num_pending); | ||
| 241 | worker->sequence++; | ||
| 242 | |||
| 243 | if (worker->sequence % workers->idle_thresh == 0) | ||
| 244 | list_move_tail(next, &workers->worker_list); | ||
| 245 | return worker; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* | ||
| 249 | * selects a worker thread to take the next job. This will either find | ||
| 250 | * an idle worker, start a new worker up to the max count, or just return | ||
| 251 | * one of the existing busy workers. | ||
| 252 | */ | ||
| 253 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | ||
| 254 | { | ||
| 255 | struct btrfs_worker_thread *worker; | ||
| 256 | unsigned long flags; | ||
| 257 | |||
| 258 | again: | ||
| 259 | spin_lock_irqsave(&workers->lock, flags); | ||
| 260 | worker = next_worker(workers); | ||
| 261 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 262 | |||
| 263 | if (!worker) { | ||
| 264 | spin_lock_irqsave(&workers->lock, flags); | ||
| 265 | if (workers->num_workers >= workers->max_workers) { | ||
| 266 | struct list_head *fallback = NULL; | ||
| 267 | /* | ||
| 268 | * we have failed to find any workers, just | ||
| 269 | * return the force one | ||
| 270 | */ | ||
| 271 | if (!list_empty(&workers->worker_list)) | ||
| 272 | fallback = workers->worker_list.next; | ||
| 273 | if (!list_empty(&workers->idle_list)) | ||
| 274 | fallback = workers->idle_list.next; | ||
| 275 | BUG_ON(!fallback); | ||
| 276 | worker = list_entry(fallback, | ||
| 277 | struct btrfs_worker_thread, worker_list); | ||
| 278 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 279 | } else { | ||
| 280 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 281 | /* we're below the limit, start another worker */ | ||
| 282 | btrfs_start_workers(workers, 1); | ||
| 283 | goto again; | ||
| 284 | } | ||
| 285 | } | ||
| 286 | return worker; | ||
| 287 | } | ||
| 288 | |||
| 289 | /* | ||
| 290 | * btrfs_requeue_work just puts the work item back on the tail of the list | ||
| 291 | * it was taken from. It is intended for use with long running work functions | ||
| 292 | * that make some progress and want to give the cpu up for others. | ||
| 293 | */ | ||
| 294 | int btrfs_requeue_work(struct btrfs_work *work) | ||
| 295 | { | ||
| 296 | struct btrfs_worker_thread *worker = work->worker; | ||
| 297 | unsigned long flags; | ||
| 298 | |||
| 299 | if (test_and_set_bit(0, &work->flags)) | ||
| 300 | goto out; | ||
| 301 | |||
| 302 | spin_lock_irqsave(&worker->lock, flags); | ||
| 303 | atomic_inc(&worker->num_pending); | ||
| 304 | list_add_tail(&work->list, &worker->pending); | ||
| 305 | |||
| 306 | /* by definition we're busy, take ourselves off the idle | ||
| 307 | * list | ||
| 308 | */ | ||
| 309 | if (worker->idle) { | ||
| 310 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 311 | worker->idle = 0; | ||
| 312 | list_move_tail(&worker->worker_list, | ||
| 313 | &worker->workers->worker_list); | ||
| 314 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 315 | } | ||
| 316 | |||
| 317 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 318 | |||
| 319 | out: | ||
| 320 | return 0; | ||
| 321 | } | ||
| 322 | |||
| 323 | /* | ||
| 324 | * places a struct btrfs_work into the pending queue of one of the kthreads | ||
| 325 | */ | ||
| 326 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | ||
| 327 | { | ||
| 328 | struct btrfs_worker_thread *worker; | ||
| 329 | unsigned long flags; | ||
| 330 | int wake = 0; | ||
| 331 | |||
| 332 | /* don't requeue something already on a list */ | ||
| 333 | if (test_and_set_bit(0, &work->flags)) | ||
| 334 | goto out; | ||
| 335 | |||
| 336 | worker = find_worker(workers); | ||
| 337 | |||
| 338 | spin_lock_irqsave(&worker->lock, flags); | ||
| 339 | atomic_inc(&worker->num_pending); | ||
| 340 | check_busy_worker(worker); | ||
| 341 | list_add_tail(&work->list, &worker->pending); | ||
| 342 | |||
| 343 | /* | ||
| 344 | * avoid calling into wake_up_process if this thread has already | ||
| 345 | * been kicked | ||
| 346 | */ | ||
| 347 | if (!worker->working) | ||
| 348 | wake = 1; | ||
| 349 | worker->working = 1; | ||
| 350 | |||
| 351 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 352 | |||
| 353 | if (wake) | ||
| 354 | wake_up_process(worker->task); | ||
| 355 | out: | ||
| 356 | return 0; | ||
| 357 | } | ||
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h new file mode 100644 index 00000000000..4ec9a2ee0f9 --- /dev/null +++ b/fs/btrfs/async-thread.h | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_ASYNC_THREAD_ | ||
| 20 | #define __BTRFS_ASYNC_THREAD_ | ||
| 21 | |||
| 22 | struct btrfs_worker_thread; | ||
| 23 | |||
| 24 | /* | ||
| 25 | * This is similar to a workqueue, but it is meant to spread the operations | ||
| 26 | * across all available cpus instead of just the CPU that was used to | ||
| 27 | * queue the work. There is also some batching introduced to try and | ||
| 28 | * cut down on context switches. | ||
| 29 | * | ||
| 30 | * By default threads are added on demand up to 2 * the number of cpus. | ||
| 31 | * Changing struct btrfs_workers->max_workers is one way to prevent | ||
| 32 | * demand creation of kthreads. | ||
| 33 | * | ||
| 34 | * the basic model of these worker threads is to embed a btrfs_work | ||
| 35 | * structure in your own data struct, and use container_of in a | ||
| 36 | * work function to get back to your data struct. | ||
| 37 | */ | ||
| 38 | struct btrfs_work { | ||
| 39 | /* | ||
| 40 | * only func should be set to the function you want called | ||
| 41 | * your work struct is passed as the only arg | ||
| 42 | */ | ||
| 43 | void (*func)(struct btrfs_work *work); | ||
| 44 | |||
| 45 | /* | ||
| 46 | * flags should be set to zero. It is used to make sure the | ||
| 47 | * struct is only inserted once into the list. | ||
| 48 | */ | ||
| 49 | unsigned long flags; | ||
| 50 | |||
| 51 | /* don't touch these */ | ||
| 52 | struct btrfs_worker_thread *worker; | ||
| 53 | struct list_head list; | ||
| 54 | }; | ||
| 55 | |||
| 56 | struct btrfs_workers { | ||
| 57 | /* current number of running workers */ | ||
| 58 | int num_workers; | ||
| 59 | |||
| 60 | /* max number of workers allowed. changed by btrfs_start_workers */ | ||
| 61 | int max_workers; | ||
| 62 | |||
| 63 | /* once a worker has this many requests or fewer, it is idle */ | ||
| 64 | int idle_thresh; | ||
| 65 | |||
| 66 | /* list with all the work threads. The workers on the idle thread | ||
| 67 | * may be actively servicing jobs, but they haven't yet hit the | ||
| 68 | * idle thresh limit above. | ||
| 69 | */ | ||
| 70 | struct list_head worker_list; | ||
| 71 | struct list_head idle_list; | ||
| 72 | |||
| 73 | /* lock for finding the next worker thread to queue on */ | ||
| 74 | spinlock_t lock; | ||
| 75 | |||
| 76 | /* extra name for this worker, used for current->name */ | ||
| 77 | char *name; | ||
| 78 | }; | ||
| 79 | |||
| 80 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | ||
| 81 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | ||
| 82 | int btrfs_stop_workers(struct btrfs_workers *workers); | ||
| 83 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); | ||
| 84 | int btrfs_requeue_work(struct btrfs_work *work); | ||
| 85 | #endif | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h new file mode 100644 index 00000000000..0b2e623cf42 --- /dev/null +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_I__ | ||
| 20 | #define __BTRFS_I__ | ||
| 21 | |||
| 22 | #include "extent_map.h" | ||
| 23 | #include "extent_io.h" | ||
| 24 | #include "ordered-data.h" | ||
| 25 | |||
| 26 | /* in memory btrfs inode */ | ||
| 27 | struct btrfs_inode { | ||
| 28 | /* which subvolume this inode belongs to */ | ||
| 29 | struct btrfs_root *root; | ||
| 30 | |||
| 31 | /* the block group preferred for allocations. This pointer is buggy | ||
| 32 | * and needs to be replaced with a bytenr instead | ||
| 33 | */ | ||
| 34 | struct btrfs_block_group_cache *block_group; | ||
| 35 | |||
| 36 | /* key used to find this inode on disk. This is used by the code | ||
| 37 | * to read in roots of subvolumes | ||
| 38 | */ | ||
| 39 | struct btrfs_key location; | ||
| 40 | |||
| 41 | /* the extent_tree has caches of all the extent mappings to disk */ | ||
| 42 | struct extent_map_tree extent_tree; | ||
| 43 | |||
| 44 | /* the io_tree does range state (DIRTY, LOCKED etc) */ | ||
| 45 | struct extent_io_tree io_tree; | ||
| 46 | |||
| 47 | /* special utility tree used to record which mirrors have already been | ||
| 48 | * tried when checksums fail for a given block | ||
| 49 | */ | ||
| 50 | struct extent_io_tree io_failure_tree; | ||
| 51 | |||
| 52 | /* held while inserting checksums to avoid races */ | ||
| 53 | struct mutex csum_mutex; | ||
| 54 | |||
| 55 | /* held while inesrting or deleting extents from files */ | ||
| 56 | struct mutex extent_mutex; | ||
| 57 | |||
| 58 | /* held while logging the inode in tree-log.c */ | ||
| 59 | struct mutex log_mutex; | ||
| 60 | |||
| 61 | /* used to order data wrt metadata */ | ||
| 62 | struct btrfs_ordered_inode_tree ordered_tree; | ||
| 63 | |||
| 64 | /* standard acl pointers */ | ||
| 65 | struct posix_acl *i_acl; | ||
| 66 | struct posix_acl *i_default_acl; | ||
| 67 | |||
| 68 | /* for keeping track of orphaned inodes */ | ||
| 69 | struct list_head i_orphan; | ||
| 70 | |||
| 71 | /* list of all the delalloc inodes in the FS. There are times we need | ||
| 72 | * to write all the delalloc pages to disk, and this list is used | ||
| 73 | * to walk them all. | ||
| 74 | */ | ||
| 75 | struct list_head delalloc_inodes; | ||
| 76 | |||
| 77 | /* full 64 bit generation number, struct vfs_inode doesn't have a big | ||
| 78 | * enough field for this. | ||
| 79 | */ | ||
| 80 | u64 generation; | ||
| 81 | |||
| 82 | /* | ||
| 83 | * transid of the trans_handle that last modified this inode | ||
| 84 | */ | ||
| 85 | u64 last_trans; | ||
| 86 | /* | ||
| 87 | * transid that last logged this inode | ||
| 88 | */ | ||
| 89 | u64 logged_trans; | ||
| 90 | |||
| 91 | /* | ||
| 92 | * trans that last made a change that should be fully fsync'd. This | ||
| 93 | * gets reset to zero each time the inode is logged | ||
| 94 | */ | ||
| 95 | u64 log_dirty_trans; | ||
| 96 | |||
| 97 | /* total number of bytes pending delalloc, used by stat to calc the | ||
| 98 | * real block usage of the file | ||
| 99 | */ | ||
| 100 | u64 delalloc_bytes; | ||
| 101 | |||
| 102 | /* | ||
| 103 | * the size of the file stored in the metadata on disk. data=ordered | ||
| 104 | * means the in-memory i_size might be larger than the size on disk | ||
| 105 | * because not all the blocks are written yet. | ||
| 106 | */ | ||
| 107 | u64 disk_i_size; | ||
| 108 | |||
| 109 | /* flags field from the on disk inode */ | ||
| 110 | u32 flags; | ||
| 111 | |||
| 112 | /* | ||
| 113 | * if this is a directory then index_cnt is the counter for the index | ||
| 114 | * number for new files that are created | ||
| 115 | */ | ||
| 116 | u64 index_cnt; | ||
| 117 | |||
| 118 | struct inode vfs_inode; | ||
| 119 | }; | ||
| 120 | |||
| 121 | static inline struct btrfs_inode *BTRFS_I(struct inode *inode) | ||
| 122 | { | ||
| 123 | return container_of(inode, struct btrfs_inode, vfs_inode); | ||
| 124 | } | ||
| 125 | |||
| 126 | static inline void btrfs_i_size_write(struct inode *inode, u64 size) | ||
| 127 | { | ||
| 128 | inode->i_size = size; | ||
| 129 | BTRFS_I(inode)->disk_i_size = size; | ||
| 130 | } | ||
| 131 | |||
| 132 | |||
| 133 | #endif | ||
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h new file mode 100644 index 00000000000..cd6598b169d --- /dev/null +++ b/fs/btrfs/compat.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | #ifndef _COMPAT_H_ | ||
| 2 | #define _COMPAT_H_ | ||
| 3 | |||
| 4 | #define btrfs_drop_nlink(inode) drop_nlink(inode) | ||
| 5 | #define btrfs_inc_nlink(inode) inc_nlink(inode) | ||
| 6 | |||
| 7 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27) | ||
| 8 | static inline struct dentry *d_obtain_alias(struct inode *inode) | ||
| 9 | { | ||
| 10 | struct dentry *d; | ||
| 11 | |||
| 12 | if (!inode) | ||
| 13 | return NULL; | ||
| 14 | if (IS_ERR(inode)) | ||
| 15 | return ERR_CAST(inode); | ||
| 16 | |||
| 17 | d = d_alloc_anon(inode); | ||
| 18 | if (!d) | ||
| 19 | iput(inode); | ||
| 20 | return d; | ||
| 21 | } | ||
| 22 | #endif | ||
| 23 | |||
| 24 | #endif /* _COMPAT_H_ */ | ||
diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h new file mode 100644 index 00000000000..1eaf11d334f --- /dev/null +++ b/fs/btrfs/crc32c.h | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_CRC32C__ | ||
| 20 | #define __BTRFS_CRC32C__ | ||
| 21 | #include <asm/byteorder.h> | ||
| 22 | #include <linux/crc32c.h> | ||
| 23 | #include <linux/version.h> | ||
| 24 | |||
| 25 | /* #define CONFIG_BTRFS_HW_SUM 1 */ | ||
| 26 | |||
| 27 | #ifdef CONFIG_BTRFS_HW_SUM | ||
| 28 | #ifdef CONFIG_X86 | ||
| 29 | /* | ||
| 30 | * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. | ||
| 31 | * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) | ||
| 32 | * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: | ||
| 33 | * http://www.intel.com/products/processor/manuals/ | ||
| 34 | * Intel(R) 64 and IA-32 Architectures Software Developer's Manual | ||
| 35 | * Volume 2A: Instruction Set Reference, A-M | ||
| 36 | */ | ||
| 37 | |||
| 38 | #include <asm/cpufeature.h> | ||
| 39 | #include <asm/processor.h> | ||
| 40 | |||
| 41 | #define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */ | ||
| 42 | #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) | ||
| 43 | |||
| 44 | #ifdef CONFIG_X86_64 | ||
| 45 | #define REX_PRE "0x48, " | ||
| 46 | #define SCALE_F 8 | ||
| 47 | #else | ||
| 48 | #define REX_PRE | ||
| 49 | #define SCALE_F 4 | ||
| 50 | #endif | ||
| 51 | |||
| 52 | static inline u32 btrfs_crc32c_le_hw_byte(u32 crc, unsigned char const *data, | ||
| 53 | size_t length) | ||
| 54 | { | ||
| 55 | while (length--) { | ||
| 56 | __asm__ __volatile__( | ||
| 57 | ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" | ||
| 58 | :"=S"(crc) | ||
| 59 | :"0"(crc), "c"(*data) | ||
| 60 | ); | ||
| 61 | data++; | ||
| 62 | } | ||
| 63 | |||
| 64 | return crc; | ||
| 65 | } | ||
| 66 | |||
| 67 | static inline u32 __pure btrfs_crc32c_le_hw(u32 crc, unsigned char const *p, | ||
| 68 | size_t len) | ||
| 69 | { | ||
| 70 | unsigned int iquotient = len / SCALE_F; | ||
| 71 | unsigned int iremainder = len % SCALE_F; | ||
| 72 | #ifdef CONFIG_X86_64 | ||
| 73 | u64 *ptmp = (u64 *)p; | ||
| 74 | #else | ||
| 75 | u32 *ptmp = (u32 *)p; | ||
| 76 | #endif | ||
| 77 | |||
| 78 | while (iquotient--) { | ||
| 79 | __asm__ __volatile__( | ||
| 80 | ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" | ||
| 81 | :"=S"(crc) | ||
| 82 | :"0"(crc), "c"(*ptmp) | ||
| 83 | ); | ||
| 84 | ptmp++; | ||
| 85 | } | ||
| 86 | |||
| 87 | if (iremainder) | ||
| 88 | crc = btrfs_crc32c_le_hw_byte(crc, (unsigned char *)ptmp, | ||
| 89 | iremainder); | ||
| 90 | |||
| 91 | return crc; | ||
| 92 | } | ||
| 93 | #endif /* CONFIG_BTRFS_HW_SUM */ | ||
| 94 | |||
| 95 | static inline u32 __btrfs_crc32c(u32 crc, unsigned char const *address, | ||
| 96 | size_t len) | ||
| 97 | { | ||
| 98 | #ifdef CONFIG_BTRFS_HW_SUM | ||
| 99 | if (cpu_has_xmm4_2) | ||
| 100 | return btrfs_crc32c_le_hw(crc, address, len); | ||
| 101 | #endif | ||
| 102 | return crc32c_le(crc, address, len); | ||
| 103 | } | ||
| 104 | |||
| 105 | #else | ||
| 106 | |||
| 107 | #define __btrfs_crc32c(seed, data, length) crc32c(seed, data, length) | ||
| 108 | |||
| 109 | #endif /* CONFIG_X86 */ | ||
| 110 | |||
| 111 | /** | ||
| 112 | * implementation of crc32c_le() changed in linux-2.6.23, | ||
| 113 | * has of v0.13 btrfs-progs is using the latest version. | ||
| 114 | * We must workaround older implementations of crc32c_le() | ||
| 115 | * found on older kernel versions. | ||
| 116 | */ | ||
| 117 | #define btrfs_crc32c(seed, data, length) \ | ||
| 118 | __btrfs_crc32c(seed, (unsigned char const *)data, length) | ||
| 119 | #endif | ||
| 120 | |||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c new file mode 100644 index 00000000000..9caeb377de6 --- /dev/null +++ b/fs/btrfs/ctree.c | |||
| @@ -0,0 +1,3716 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007,2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/sched.h> | ||
| 20 | #include "ctree.h" | ||
| 21 | #include "disk-io.h" | ||
| 22 | #include "transaction.h" | ||
| 23 | #include "print-tree.h" | ||
| 24 | #include "locking.h" | ||
| 25 | |||
| 26 | static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 27 | *root, struct btrfs_path *path, int level); | ||
| 28 | static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 29 | *root, struct btrfs_key *ins_key, | ||
| 30 | struct btrfs_path *path, int data_size, int extend); | ||
| 31 | static int push_node_left(struct btrfs_trans_handle *trans, | ||
| 32 | struct btrfs_root *root, struct extent_buffer *dst, | ||
| 33 | struct extent_buffer *src, int empty); | ||
| 34 | static int balance_node_right(struct btrfs_trans_handle *trans, | ||
| 35 | struct btrfs_root *root, | ||
| 36 | struct extent_buffer *dst_buf, | ||
| 37 | struct extent_buffer *src_buf); | ||
| 38 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 39 | struct btrfs_path *path, int level, int slot); | ||
| 40 | |||
| 41 | inline void btrfs_init_path(struct btrfs_path *p) | ||
| 42 | { | ||
| 43 | memset(p, 0, sizeof(*p)); | ||
| 44 | } | ||
| 45 | |||
| 46 | struct btrfs_path *btrfs_alloc_path(void) | ||
| 47 | { | ||
| 48 | struct btrfs_path *path; | ||
| 49 | path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); | ||
| 50 | if (path) { | ||
| 51 | btrfs_init_path(path); | ||
| 52 | path->reada = 1; | ||
| 53 | } | ||
| 54 | return path; | ||
| 55 | } | ||
| 56 | |||
| 57 | /* this also releases the path */ | ||
| 58 | void btrfs_free_path(struct btrfs_path *p) | ||
| 59 | { | ||
| 60 | btrfs_release_path(NULL, p); | ||
| 61 | kmem_cache_free(btrfs_path_cachep, p); | ||
| 62 | } | ||
| 63 | |||
| 64 | /* | ||
| 65 | * path release drops references on the extent buffers in the path | ||
| 66 | * and it drops any locks held by this path | ||
| 67 | * | ||
| 68 | * It is safe to call this on paths that no locks or extent buffers held. | ||
| 69 | */ | ||
| 70 | void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | ||
| 71 | { | ||
| 72 | int i; | ||
| 73 | |||
| 74 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | ||
| 75 | p->slots[i] = 0; | ||
| 76 | if (!p->nodes[i]) | ||
| 77 | continue; | ||
| 78 | if (p->locks[i]) { | ||
| 79 | btrfs_tree_unlock(p->nodes[i]); | ||
| 80 | p->locks[i] = 0; | ||
| 81 | } | ||
| 82 | free_extent_buffer(p->nodes[i]); | ||
| 83 | p->nodes[i] = NULL; | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | /* | ||
| 88 | * safely gets a reference on the root node of a tree. A lock | ||
| 89 | * is not taken, so a concurrent writer may put a different node | ||
| 90 | * at the root of the tree. See btrfs_lock_root_node for the | ||
| 91 | * looping required. | ||
| 92 | * | ||
| 93 | * The extent buffer returned by this has a reference taken, so | ||
| 94 | * it won't disappear. It may stop being the root of the tree | ||
| 95 | * at any time because there are no locks held. | ||
| 96 | */ | ||
| 97 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | ||
| 98 | { | ||
| 99 | struct extent_buffer *eb; | ||
| 100 | spin_lock(&root->node_lock); | ||
| 101 | eb = root->node; | ||
| 102 | extent_buffer_get(eb); | ||
| 103 | spin_unlock(&root->node_lock); | ||
| 104 | return eb; | ||
| 105 | } | ||
| 106 | |||
| 107 | /* loop around taking references on and locking the root node of the | ||
| 108 | * tree until you end up with a lock on the root. A locked buffer | ||
| 109 | * is returned, with a reference held. | ||
| 110 | */ | ||
| 111 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | ||
| 112 | { | ||
| 113 | struct extent_buffer *eb; | ||
| 114 | |||
| 115 | while(1) { | ||
| 116 | eb = btrfs_root_node(root); | ||
| 117 | btrfs_tree_lock(eb); | ||
| 118 | |||
| 119 | spin_lock(&root->node_lock); | ||
| 120 | if (eb == root->node) { | ||
| 121 | spin_unlock(&root->node_lock); | ||
| 122 | break; | ||
| 123 | } | ||
| 124 | spin_unlock(&root->node_lock); | ||
| 125 | |||
| 126 | btrfs_tree_unlock(eb); | ||
| 127 | free_extent_buffer(eb); | ||
| 128 | } | ||
| 129 | return eb; | ||
| 130 | } | ||
| 131 | |||
| 132 | /* cowonly root (everything not a reference counted cow subvolume), just get | ||
| 133 | * put onto a simple dirty list. transaction.c walks this to make sure they | ||
| 134 | * get properly updated on disk. | ||
| 135 | */ | ||
| 136 | static void add_root_to_dirty_list(struct btrfs_root *root) | ||
| 137 | { | ||
| 138 | if (root->track_dirty && list_empty(&root->dirty_list)) { | ||
| 139 | list_add(&root->dirty_list, | ||
| 140 | &root->fs_info->dirty_cowonly_roots); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | /* | ||
| 145 | * used by snapshot creation to make a copy of a root for a tree with | ||
| 146 | * a given objectid. The buffer with the new root node is returned in | ||
| 147 | * cow_ret, and this func returns zero on success or a negative error code. | ||
| 148 | */ | ||
| 149 | int btrfs_copy_root(struct btrfs_trans_handle *trans, | ||
| 150 | struct btrfs_root *root, | ||
| 151 | struct extent_buffer *buf, | ||
| 152 | struct extent_buffer **cow_ret, u64 new_root_objectid) | ||
| 153 | { | ||
| 154 | struct extent_buffer *cow; | ||
| 155 | u32 nritems; | ||
| 156 | int ret = 0; | ||
| 157 | int level; | ||
| 158 | struct btrfs_root *new_root; | ||
| 159 | |||
| 160 | new_root = kmalloc(sizeof(*new_root), GFP_NOFS); | ||
| 161 | if (!new_root) | ||
| 162 | return -ENOMEM; | ||
| 163 | |||
| 164 | memcpy(new_root, root, sizeof(*new_root)); | ||
| 165 | new_root->root_key.objectid = new_root_objectid; | ||
| 166 | |||
| 167 | WARN_ON(root->ref_cows && trans->transid != | ||
| 168 | root->fs_info->running_transaction->transid); | ||
| 169 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | ||
| 170 | |||
| 171 | level = btrfs_header_level(buf); | ||
| 172 | nritems = btrfs_header_nritems(buf); | ||
| 173 | |||
| 174 | cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, | ||
| 175 | new_root_objectid, trans->transid, | ||
| 176 | level, buf->start, 0); | ||
| 177 | if (IS_ERR(cow)) { | ||
| 178 | kfree(new_root); | ||
| 179 | return PTR_ERR(cow); | ||
| 180 | } | ||
| 181 | |||
| 182 | copy_extent_buffer(cow, buf, 0, 0, cow->len); | ||
| 183 | btrfs_set_header_bytenr(cow, cow->start); | ||
| 184 | btrfs_set_header_generation(cow, trans->transid); | ||
| 185 | btrfs_set_header_owner(cow, new_root_objectid); | ||
| 186 | btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); | ||
| 187 | |||
| 188 | WARN_ON(btrfs_header_generation(buf) > trans->transid); | ||
| 189 | ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); | ||
| 190 | kfree(new_root); | ||
| 191 | |||
| 192 | if (ret) | ||
| 193 | return ret; | ||
| 194 | |||
| 195 | btrfs_mark_buffer_dirty(cow); | ||
| 196 | *cow_ret = cow; | ||
| 197 | return 0; | ||
| 198 | } | ||
| 199 | |||
| 200 | /* | ||
| 201 | * does the dirty work in cow of a single block. The parent block | ||
| 202 | * (if supplied) is updated to point to the new cow copy. The new | ||
| 203 | * buffer is marked dirty and returned locked. If you modify the block | ||
| 204 | * it needs to be marked dirty again. | ||
| 205 | * | ||
| 206 | * search_start -- an allocation hint for the new block | ||
| 207 | * | ||
| 208 | * empty_size -- a hint that you plan on doing more cow. This is the size in bytes | ||
| 209 | * the allocator should try to find free next to the block it returns. This is | ||
| 210 | * just a hint and may be ignored by the allocator. | ||
| 211 | * | ||
| 212 | * prealloc_dest -- if you have already reserved a destination for the cow, | ||
| 213 | * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent | ||
| 214 | * is used to finish the allocation. | ||
| 215 | */ | ||
| 216 | int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | ||
| 217 | struct btrfs_root *root, | ||
| 218 | struct extent_buffer *buf, | ||
| 219 | struct extent_buffer *parent, int parent_slot, | ||
| 220 | struct extent_buffer **cow_ret, | ||
| 221 | u64 search_start, u64 empty_size, | ||
| 222 | u64 prealloc_dest) | ||
| 223 | { | ||
| 224 | u64 parent_start; | ||
| 225 | struct extent_buffer *cow; | ||
| 226 | u32 nritems; | ||
| 227 | int ret = 0; | ||
| 228 | int level; | ||
| 229 | int unlock_orig = 0; | ||
| 230 | |||
| 231 | if (*cow_ret == buf) | ||
| 232 | unlock_orig = 1; | ||
| 233 | |||
| 234 | WARN_ON(!btrfs_tree_locked(buf)); | ||
| 235 | |||
| 236 | if (parent) | ||
| 237 | parent_start = parent->start; | ||
| 238 | else | ||
| 239 | parent_start = 0; | ||
| 240 | |||
| 241 | WARN_ON(root->ref_cows && trans->transid != | ||
| 242 | root->fs_info->running_transaction->transid); | ||
| 243 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | ||
| 244 | |||
| 245 | level = btrfs_header_level(buf); | ||
| 246 | nritems = btrfs_header_nritems(buf); | ||
| 247 | |||
| 248 | if (prealloc_dest) { | ||
| 249 | struct btrfs_key ins; | ||
| 250 | |||
| 251 | ins.objectid = prealloc_dest; | ||
| 252 | ins.offset = buf->len; | ||
| 253 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 254 | |||
| 255 | ret = btrfs_alloc_reserved_extent(trans, root, parent_start, | ||
| 256 | root->root_key.objectid, | ||
| 257 | trans->transid, level, &ins); | ||
| 258 | BUG_ON(ret); | ||
| 259 | cow = btrfs_init_new_buffer(trans, root, prealloc_dest, | ||
| 260 | buf->len); | ||
| 261 | } else { | ||
| 262 | cow = btrfs_alloc_free_block(trans, root, buf->len, | ||
| 263 | parent_start, | ||
| 264 | root->root_key.objectid, | ||
| 265 | trans->transid, level, | ||
| 266 | search_start, empty_size); | ||
| 267 | } | ||
| 268 | if (IS_ERR(cow)) | ||
| 269 | return PTR_ERR(cow); | ||
| 270 | |||
| 271 | copy_extent_buffer(cow, buf, 0, 0, cow->len); | ||
| 272 | btrfs_set_header_bytenr(cow, cow->start); | ||
| 273 | btrfs_set_header_generation(cow, trans->transid); | ||
| 274 | btrfs_set_header_owner(cow, root->root_key.objectid); | ||
| 275 | btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); | ||
| 276 | |||
| 277 | WARN_ON(btrfs_header_generation(buf) > trans->transid); | ||
| 278 | if (btrfs_header_generation(buf) != trans->transid) { | ||
| 279 | u32 nr_extents; | ||
| 280 | ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); | ||
| 281 | if (ret) | ||
| 282 | return ret; | ||
| 283 | |||
| 284 | ret = btrfs_cache_ref(trans, root, buf, nr_extents); | ||
| 285 | WARN_ON(ret); | ||
| 286 | } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) { | ||
| 287 | /* | ||
| 288 | * There are only two places that can drop reference to | ||
| 289 | * tree blocks owned by living reloc trees, one is here, | ||
| 290 | * the other place is btrfs_merge_path. In both places, | ||
| 291 | * we check reference count while tree block is locked. | ||
| 292 | * Furthermore, if reference count is one, it won't get | ||
| 293 | * increased by someone else. | ||
| 294 | */ | ||
| 295 | u32 refs; | ||
| 296 | ret = btrfs_lookup_extent_ref(trans, root, buf->start, | ||
| 297 | buf->len, &refs); | ||
| 298 | BUG_ON(ret); | ||
| 299 | if (refs == 1) { | ||
| 300 | ret = btrfs_update_ref(trans, root, buf, cow, | ||
| 301 | 0, nritems); | ||
| 302 | clean_tree_block(trans, root, buf); | ||
| 303 | } else { | ||
| 304 | ret = btrfs_inc_ref(trans, root, buf, cow, NULL); | ||
| 305 | } | ||
| 306 | BUG_ON(ret); | ||
| 307 | } else { | ||
| 308 | ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); | ||
| 309 | if (ret) | ||
| 310 | return ret; | ||
| 311 | clean_tree_block(trans, root, buf); | ||
| 312 | } | ||
| 313 | |||
| 314 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
| 315 | ret = btrfs_add_reloc_mapping(root, buf->start, | ||
| 316 | buf->len, cow->start); | ||
| 317 | BUG_ON(ret); | ||
| 318 | ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); | ||
| 319 | WARN_ON(ret); | ||
| 320 | } | ||
| 321 | |||
| 322 | if (buf == root->node) { | ||
| 323 | WARN_ON(parent && parent != buf); | ||
| 324 | |||
| 325 | spin_lock(&root->node_lock); | ||
| 326 | root->node = cow; | ||
| 327 | extent_buffer_get(cow); | ||
| 328 | spin_unlock(&root->node_lock); | ||
| 329 | |||
| 330 | if (buf != root->commit_root) { | ||
| 331 | btrfs_free_extent(trans, root, buf->start, | ||
| 332 | buf->len, buf->start, | ||
| 333 | root->root_key.objectid, | ||
| 334 | btrfs_header_generation(buf), | ||
| 335 | level, 1); | ||
| 336 | } | ||
| 337 | free_extent_buffer(buf); | ||
| 338 | add_root_to_dirty_list(root); | ||
| 339 | } else { | ||
| 340 | btrfs_set_node_blockptr(parent, parent_slot, | ||
| 341 | cow->start); | ||
| 342 | WARN_ON(trans->transid == 0); | ||
| 343 | btrfs_set_node_ptr_generation(parent, parent_slot, | ||
| 344 | trans->transid); | ||
| 345 | btrfs_mark_buffer_dirty(parent); | ||
| 346 | WARN_ON(btrfs_header_generation(parent) != trans->transid); | ||
| 347 | btrfs_free_extent(trans, root, buf->start, buf->len, | ||
| 348 | parent_start, btrfs_header_owner(parent), | ||
| 349 | btrfs_header_generation(parent), level, 1); | ||
| 350 | } | ||
| 351 | if (unlock_orig) | ||
| 352 | btrfs_tree_unlock(buf); | ||
| 353 | free_extent_buffer(buf); | ||
| 354 | btrfs_mark_buffer_dirty(cow); | ||
| 355 | *cow_ret = cow; | ||
| 356 | return 0; | ||
| 357 | } | ||
| 358 | |||
| 359 | /* | ||
| 360 | * cows a single block, see __btrfs_cow_block for the real work. | ||
| 361 | * This version of it has extra checks so that a block isn't cow'd more than | ||
| 362 | * once per transaction, as long as it hasn't been written yet | ||
| 363 | */ | ||
| 364 | int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, | ||
| 365 | struct btrfs_root *root, struct extent_buffer *buf, | ||
| 366 | struct extent_buffer *parent, int parent_slot, | ||
| 367 | struct extent_buffer **cow_ret, u64 prealloc_dest) | ||
| 368 | { | ||
| 369 | u64 search_start; | ||
| 370 | int ret; | ||
| 371 | |||
| 372 | if (trans->transaction != root->fs_info->running_transaction) { | ||
| 373 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
| 374 | root->fs_info->running_transaction->transid); | ||
| 375 | WARN_ON(1); | ||
| 376 | } | ||
| 377 | if (trans->transid != root->fs_info->generation) { | ||
| 378 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
| 379 | root->fs_info->generation); | ||
| 380 | WARN_ON(1); | ||
| 381 | } | ||
| 382 | |||
| 383 | spin_lock(&root->fs_info->hash_lock); | ||
| 384 | if (btrfs_header_generation(buf) == trans->transid && | ||
| 385 | btrfs_header_owner(buf) == root->root_key.objectid && | ||
| 386 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
| 387 | *cow_ret = buf; | ||
| 388 | spin_unlock(&root->fs_info->hash_lock); | ||
| 389 | WARN_ON(prealloc_dest); | ||
| 390 | return 0; | ||
| 391 | } | ||
| 392 | spin_unlock(&root->fs_info->hash_lock); | ||
| 393 | search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); | ||
| 394 | ret = __btrfs_cow_block(trans, root, buf, parent, | ||
| 395 | parent_slot, cow_ret, search_start, 0, | ||
| 396 | prealloc_dest); | ||
| 397 | return ret; | ||
| 398 | } | ||
| 399 | |||
| 400 | /* | ||
| 401 | * helper function for defrag to decide if two blocks pointed to by a | ||
| 402 | * node are actually close by | ||
| 403 | */ | ||
| 404 | static int close_blocks(u64 blocknr, u64 other, u32 blocksize) | ||
| 405 | { | ||
| 406 | if (blocknr < other && other - (blocknr + blocksize) < 32768) | ||
| 407 | return 1; | ||
| 408 | if (blocknr > other && blocknr - (other + blocksize) < 32768) | ||
| 409 | return 1; | ||
| 410 | return 0; | ||
| 411 | } | ||
| 412 | |||
| 413 | /* | ||
| 414 | * compare two keys in a memcmp fashion | ||
| 415 | */ | ||
| 416 | static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) | ||
| 417 | { | ||
| 418 | struct btrfs_key k1; | ||
| 419 | |||
| 420 | btrfs_disk_key_to_cpu(&k1, disk); | ||
| 421 | |||
| 422 | if (k1.objectid > k2->objectid) | ||
| 423 | return 1; | ||
| 424 | if (k1.objectid < k2->objectid) | ||
| 425 | return -1; | ||
| 426 | if (k1.type > k2->type) | ||
| 427 | return 1; | ||
| 428 | if (k1.type < k2->type) | ||
| 429 | return -1; | ||
| 430 | if (k1.offset > k2->offset) | ||
| 431 | return 1; | ||
| 432 | if (k1.offset < k2->offset) | ||
| 433 | return -1; | ||
| 434 | return 0; | ||
| 435 | } | ||
| 436 | |||
| 437 | |||
| 438 | /* | ||
| 439 | * this is used by the defrag code to go through all the | ||
| 440 | * leaves pointed to by a node and reallocate them so that | ||
| 441 | * disk order is close to key order | ||
| 442 | */ | ||
| 443 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | ||
| 444 | struct btrfs_root *root, struct extent_buffer *parent, | ||
| 445 | int start_slot, int cache_only, u64 *last_ret, | ||
| 446 | struct btrfs_key *progress) | ||
| 447 | { | ||
| 448 | struct extent_buffer *cur; | ||
| 449 | u64 blocknr; | ||
| 450 | u64 gen; | ||
| 451 | u64 search_start = *last_ret; | ||
| 452 | u64 last_block = 0; | ||
| 453 | u64 other; | ||
| 454 | u32 parent_nritems; | ||
| 455 | int end_slot; | ||
| 456 | int i; | ||
| 457 | int err = 0; | ||
| 458 | int parent_level; | ||
| 459 | int uptodate; | ||
| 460 | u32 blocksize; | ||
| 461 | int progress_passed = 0; | ||
| 462 | struct btrfs_disk_key disk_key; | ||
| 463 | |||
| 464 | parent_level = btrfs_header_level(parent); | ||
| 465 | if (cache_only && parent_level != 1) | ||
| 466 | return 0; | ||
| 467 | |||
| 468 | if (trans->transaction != root->fs_info->running_transaction) { | ||
| 469 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
| 470 | root->fs_info->running_transaction->transid); | ||
| 471 | WARN_ON(1); | ||
| 472 | } | ||
| 473 | if (trans->transid != root->fs_info->generation) { | ||
| 474 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
| 475 | root->fs_info->generation); | ||
| 476 | WARN_ON(1); | ||
| 477 | } | ||
| 478 | |||
| 479 | parent_nritems = btrfs_header_nritems(parent); | ||
| 480 | blocksize = btrfs_level_size(root, parent_level - 1); | ||
| 481 | end_slot = parent_nritems; | ||
| 482 | |||
| 483 | if (parent_nritems == 1) | ||
| 484 | return 0; | ||
| 485 | |||
| 486 | for (i = start_slot; i < end_slot; i++) { | ||
| 487 | int close = 1; | ||
| 488 | |||
| 489 | if (!parent->map_token) { | ||
| 490 | map_extent_buffer(parent, | ||
| 491 | btrfs_node_key_ptr_offset(i), | ||
| 492 | sizeof(struct btrfs_key_ptr), | ||
| 493 | &parent->map_token, &parent->kaddr, | ||
| 494 | &parent->map_start, &parent->map_len, | ||
| 495 | KM_USER1); | ||
| 496 | } | ||
| 497 | btrfs_node_key(parent, &disk_key, i); | ||
| 498 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) | ||
| 499 | continue; | ||
| 500 | |||
| 501 | progress_passed = 1; | ||
| 502 | blocknr = btrfs_node_blockptr(parent, i); | ||
| 503 | gen = btrfs_node_ptr_generation(parent, i); | ||
| 504 | if (last_block == 0) | ||
| 505 | last_block = blocknr; | ||
| 506 | |||
| 507 | if (i > 0) { | ||
| 508 | other = btrfs_node_blockptr(parent, i - 1); | ||
| 509 | close = close_blocks(blocknr, other, blocksize); | ||
| 510 | } | ||
| 511 | if (!close && i < end_slot - 2) { | ||
| 512 | other = btrfs_node_blockptr(parent, i + 1); | ||
| 513 | close = close_blocks(blocknr, other, blocksize); | ||
| 514 | } | ||
| 515 | if (close) { | ||
| 516 | last_block = blocknr; | ||
| 517 | continue; | ||
| 518 | } | ||
| 519 | if (parent->map_token) { | ||
| 520 | unmap_extent_buffer(parent, parent->map_token, | ||
| 521 | KM_USER1); | ||
| 522 | parent->map_token = NULL; | ||
| 523 | } | ||
| 524 | |||
| 525 | cur = btrfs_find_tree_block(root, blocknr, blocksize); | ||
| 526 | if (cur) | ||
| 527 | uptodate = btrfs_buffer_uptodate(cur, gen); | ||
| 528 | else | ||
| 529 | uptodate = 0; | ||
| 530 | if (!cur || !uptodate) { | ||
| 531 | if (cache_only) { | ||
| 532 | free_extent_buffer(cur); | ||
| 533 | continue; | ||
| 534 | } | ||
| 535 | if (!cur) { | ||
| 536 | cur = read_tree_block(root, blocknr, | ||
| 537 | blocksize, gen); | ||
| 538 | } else if (!uptodate) { | ||
| 539 | btrfs_read_buffer(cur, gen); | ||
| 540 | } | ||
| 541 | } | ||
| 542 | if (search_start == 0) | ||
| 543 | search_start = last_block; | ||
| 544 | |||
| 545 | btrfs_tree_lock(cur); | ||
| 546 | err = __btrfs_cow_block(trans, root, cur, parent, i, | ||
| 547 | &cur, search_start, | ||
| 548 | min(16 * blocksize, | ||
| 549 | (end_slot - i) * blocksize), 0); | ||
| 550 | if (err) { | ||
| 551 | btrfs_tree_unlock(cur); | ||
| 552 | free_extent_buffer(cur); | ||
| 553 | break; | ||
| 554 | } | ||
| 555 | search_start = cur->start; | ||
| 556 | last_block = cur->start; | ||
| 557 | *last_ret = search_start; | ||
| 558 | btrfs_tree_unlock(cur); | ||
| 559 | free_extent_buffer(cur); | ||
| 560 | } | ||
| 561 | if (parent->map_token) { | ||
| 562 | unmap_extent_buffer(parent, parent->map_token, | ||
| 563 | KM_USER1); | ||
| 564 | parent->map_token = NULL; | ||
| 565 | } | ||
| 566 | return err; | ||
| 567 | } | ||
| 568 | |||
| 569 | /* | ||
| 570 | * The leaf data grows from end-to-front in the node. | ||
| 571 | * this returns the address of the start of the last item, | ||
| 572 | * which is the stop of the leaf data stack | ||
| 573 | */ | ||
| 574 | static inline unsigned int leaf_data_end(struct btrfs_root *root, | ||
| 575 | struct extent_buffer *leaf) | ||
| 576 | { | ||
| 577 | u32 nr = btrfs_header_nritems(leaf); | ||
| 578 | if (nr == 0) | ||
| 579 | return BTRFS_LEAF_DATA_SIZE(root); | ||
| 580 | return btrfs_item_offset_nr(leaf, nr - 1); | ||
| 581 | } | ||
| 582 | |||
| 583 | /* | ||
| 584 | * extra debugging checks to make sure all the items in a key are | ||
| 585 | * well formed and in the proper order | ||
| 586 | */ | ||
| 587 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, | ||
| 588 | int level) | ||
| 589 | { | ||
| 590 | struct extent_buffer *parent = NULL; | ||
| 591 | struct extent_buffer *node = path->nodes[level]; | ||
| 592 | struct btrfs_disk_key parent_key; | ||
| 593 | struct btrfs_disk_key node_key; | ||
| 594 | int parent_slot; | ||
| 595 | int slot; | ||
| 596 | struct btrfs_key cpukey; | ||
| 597 | u32 nritems = btrfs_header_nritems(node); | ||
| 598 | |||
| 599 | if (path->nodes[level + 1]) | ||
| 600 | parent = path->nodes[level + 1]; | ||
| 601 | |||
| 602 | slot = path->slots[level]; | ||
| 603 | BUG_ON(nritems == 0); | ||
| 604 | if (parent) { | ||
| 605 | parent_slot = path->slots[level + 1]; | ||
| 606 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
| 607 | btrfs_node_key(node, &node_key, 0); | ||
| 608 | BUG_ON(memcmp(&parent_key, &node_key, | ||
| 609 | sizeof(struct btrfs_disk_key))); | ||
| 610 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
| 611 | btrfs_header_bytenr(node)); | ||
| 612 | } | ||
| 613 | BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); | ||
| 614 | if (slot != 0) { | ||
| 615 | btrfs_node_key_to_cpu(node, &cpukey, slot - 1); | ||
| 616 | btrfs_node_key(node, &node_key, slot); | ||
| 617 | BUG_ON(comp_keys(&node_key, &cpukey) <= 0); | ||
| 618 | } | ||
| 619 | if (slot < nritems - 1) { | ||
| 620 | btrfs_node_key_to_cpu(node, &cpukey, slot + 1); | ||
| 621 | btrfs_node_key(node, &node_key, slot); | ||
| 622 | BUG_ON(comp_keys(&node_key, &cpukey) >= 0); | ||
| 623 | } | ||
| 624 | return 0; | ||
| 625 | } | ||
| 626 | |||
| 627 | /* | ||
| 628 | * extra checking to make sure all the items in a leaf are | ||
| 629 | * well formed and in the proper order | ||
| 630 | */ | ||
| 631 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, | ||
| 632 | int level) | ||
| 633 | { | ||
| 634 | struct extent_buffer *leaf = path->nodes[level]; | ||
| 635 | struct extent_buffer *parent = NULL; | ||
| 636 | int parent_slot; | ||
| 637 | struct btrfs_key cpukey; | ||
| 638 | struct btrfs_disk_key parent_key; | ||
| 639 | struct btrfs_disk_key leaf_key; | ||
| 640 | int slot = path->slots[0]; | ||
| 641 | |||
| 642 | u32 nritems = btrfs_header_nritems(leaf); | ||
| 643 | |||
| 644 | if (path->nodes[level + 1]) | ||
| 645 | parent = path->nodes[level + 1]; | ||
| 646 | |||
| 647 | if (nritems == 0) | ||
| 648 | return 0; | ||
| 649 | |||
| 650 | if (parent) { | ||
| 651 | parent_slot = path->slots[level + 1]; | ||
| 652 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
| 653 | btrfs_item_key(leaf, &leaf_key, 0); | ||
| 654 | |||
| 655 | BUG_ON(memcmp(&parent_key, &leaf_key, | ||
| 656 | sizeof(struct btrfs_disk_key))); | ||
| 657 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
| 658 | btrfs_header_bytenr(leaf)); | ||
| 659 | } | ||
| 660 | #if 0 | ||
| 661 | for (i = 0; nritems > 1 && i < nritems - 2; i++) { | ||
| 662 | btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); | ||
| 663 | btrfs_item_key(leaf, &leaf_key, i); | ||
| 664 | if (comp_keys(&leaf_key, &cpukey) >= 0) { | ||
| 665 | btrfs_print_leaf(root, leaf); | ||
| 666 | printk("slot %d offset bad key\n", i); | ||
| 667 | BUG_ON(1); | ||
| 668 | } | ||
| 669 | if (btrfs_item_offset_nr(leaf, i) != | ||
| 670 | btrfs_item_end_nr(leaf, i + 1)) { | ||
| 671 | btrfs_print_leaf(root, leaf); | ||
| 672 | printk("slot %d offset bad\n", i); | ||
| 673 | BUG_ON(1); | ||
| 674 | } | ||
| 675 | if (i == 0) { | ||
| 676 | if (btrfs_item_offset_nr(leaf, i) + | ||
| 677 | btrfs_item_size_nr(leaf, i) != | ||
| 678 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 679 | btrfs_print_leaf(root, leaf); | ||
| 680 | printk("slot %d first offset bad\n", i); | ||
| 681 | BUG_ON(1); | ||
| 682 | } | ||
| 683 | } | ||
| 684 | } | ||
| 685 | if (nritems > 0) { | ||
| 686 | if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { | ||
| 687 | btrfs_print_leaf(root, leaf); | ||
| 688 | printk("slot %d bad size \n", nritems - 1); | ||
| 689 | BUG_ON(1); | ||
| 690 | } | ||
| 691 | } | ||
| 692 | #endif | ||
| 693 | if (slot != 0 && slot < nritems - 1) { | ||
| 694 | btrfs_item_key(leaf, &leaf_key, slot); | ||
| 695 | btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); | ||
| 696 | if (comp_keys(&leaf_key, &cpukey) <= 0) { | ||
| 697 | btrfs_print_leaf(root, leaf); | ||
| 698 | printk("slot %d offset bad key\n", slot); | ||
| 699 | BUG_ON(1); | ||
| 700 | } | ||
| 701 | if (btrfs_item_offset_nr(leaf, slot - 1) != | ||
| 702 | btrfs_item_end_nr(leaf, slot)) { | ||
| 703 | btrfs_print_leaf(root, leaf); | ||
| 704 | printk("slot %d offset bad\n", slot); | ||
| 705 | BUG_ON(1); | ||
| 706 | } | ||
| 707 | } | ||
| 708 | if (slot < nritems - 1) { | ||
| 709 | btrfs_item_key(leaf, &leaf_key, slot); | ||
| 710 | btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); | ||
| 711 | BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); | ||
| 712 | if (btrfs_item_offset_nr(leaf, slot) != | ||
| 713 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
| 714 | btrfs_print_leaf(root, leaf); | ||
| 715 | printk("slot %d offset bad\n", slot); | ||
| 716 | BUG_ON(1); | ||
| 717 | } | ||
| 718 | } | ||
| 719 | BUG_ON(btrfs_item_offset_nr(leaf, 0) + | ||
| 720 | btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); | ||
| 721 | return 0; | ||
| 722 | } | ||
| 723 | |||
| 724 | static int noinline check_block(struct btrfs_root *root, | ||
| 725 | struct btrfs_path *path, int level) | ||
| 726 | { | ||
| 727 | u64 found_start; | ||
| 728 | return 0; | ||
| 729 | if (btrfs_header_level(path->nodes[level]) != level) | ||
| 730 | printk("warning: bad level %Lu wanted %d found %d\n", | ||
| 731 | path->nodes[level]->start, level, | ||
| 732 | btrfs_header_level(path->nodes[level])); | ||
| 733 | found_start = btrfs_header_bytenr(path->nodes[level]); | ||
| 734 | if (found_start != path->nodes[level]->start) { | ||
| 735 | printk("warning: bad bytentr %Lu found %Lu\n", | ||
| 736 | path->nodes[level]->start, found_start); | ||
| 737 | } | ||
| 738 | #if 0 | ||
| 739 | struct extent_buffer *buf = path->nodes[level]; | ||
| 740 | |||
| 741 | if (memcmp_extent_buffer(buf, root->fs_info->fsid, | ||
| 742 | (unsigned long)btrfs_header_fsid(buf), | ||
| 743 | BTRFS_FSID_SIZE)) { | ||
| 744 | printk("warning bad block %Lu\n", buf->start); | ||
| 745 | return 1; | ||
| 746 | } | ||
| 747 | #endif | ||
| 748 | if (level == 0) | ||
| 749 | return check_leaf(root, path, level); | ||
| 750 | return check_node(root, path, level); | ||
| 751 | } | ||
| 752 | |||
| 753 | /* | ||
| 754 | * search for key in the extent_buffer. The items start at offset p, | ||
| 755 | * and they are item_size apart. There are 'max' items in p. | ||
| 756 | * | ||
| 757 | * the slot in the array is returned via slot, and it points to | ||
| 758 | * the place where you would insert key if it is not found in | ||
| 759 | * the array. | ||
| 760 | * | ||
| 761 | * slot may point to max if the key is bigger than all of the keys | ||
| 762 | */ | ||
| 763 | static noinline int generic_bin_search(struct extent_buffer *eb, | ||
| 764 | unsigned long p, | ||
| 765 | int item_size, struct btrfs_key *key, | ||
| 766 | int max, int *slot) | ||
| 767 | { | ||
| 768 | int low = 0; | ||
| 769 | int high = max; | ||
| 770 | int mid; | ||
| 771 | int ret; | ||
| 772 | struct btrfs_disk_key *tmp = NULL; | ||
| 773 | struct btrfs_disk_key unaligned; | ||
| 774 | unsigned long offset; | ||
| 775 | char *map_token = NULL; | ||
| 776 | char *kaddr = NULL; | ||
| 777 | unsigned long map_start = 0; | ||
| 778 | unsigned long map_len = 0; | ||
| 779 | int err; | ||
| 780 | |||
| 781 | while(low < high) { | ||
| 782 | mid = (low + high) / 2; | ||
| 783 | offset = p + mid * item_size; | ||
| 784 | |||
| 785 | if (!map_token || offset < map_start || | ||
| 786 | (offset + sizeof(struct btrfs_disk_key)) > | ||
| 787 | map_start + map_len) { | ||
| 788 | if (map_token) { | ||
| 789 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
| 790 | map_token = NULL; | ||
| 791 | } | ||
| 792 | err = map_extent_buffer(eb, offset, | ||
| 793 | sizeof(struct btrfs_disk_key), | ||
| 794 | &map_token, &kaddr, | ||
| 795 | &map_start, &map_len, KM_USER0); | ||
| 796 | |||
| 797 | if (!err) { | ||
| 798 | tmp = (struct btrfs_disk_key *)(kaddr + offset - | ||
| 799 | map_start); | ||
| 800 | } else { | ||
| 801 | read_extent_buffer(eb, &unaligned, | ||
| 802 | offset, sizeof(unaligned)); | ||
| 803 | tmp = &unaligned; | ||
| 804 | } | ||
| 805 | |||
| 806 | } else { | ||
| 807 | tmp = (struct btrfs_disk_key *)(kaddr + offset - | ||
| 808 | map_start); | ||
| 809 | } | ||
| 810 | ret = comp_keys(tmp, key); | ||
| 811 | |||
| 812 | if (ret < 0) | ||
| 813 | low = mid + 1; | ||
| 814 | else if (ret > 0) | ||
| 815 | high = mid; | ||
| 816 | else { | ||
| 817 | *slot = mid; | ||
| 818 | if (map_token) | ||
| 819 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
| 820 | return 0; | ||
| 821 | } | ||
| 822 | } | ||
| 823 | *slot = low; | ||
| 824 | if (map_token) | ||
| 825 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
| 826 | return 1; | ||
| 827 | } | ||
| 828 | |||
| 829 | /* | ||
| 830 | * simple bin_search frontend that does the right thing for | ||
| 831 | * leaves vs nodes | ||
| 832 | */ | ||
| 833 | static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, | ||
| 834 | int level, int *slot) | ||
| 835 | { | ||
| 836 | if (level == 0) { | ||
| 837 | return generic_bin_search(eb, | ||
| 838 | offsetof(struct btrfs_leaf, items), | ||
| 839 | sizeof(struct btrfs_item), | ||
| 840 | key, btrfs_header_nritems(eb), | ||
| 841 | slot); | ||
| 842 | } else { | ||
| 843 | return generic_bin_search(eb, | ||
| 844 | offsetof(struct btrfs_node, ptrs), | ||
| 845 | sizeof(struct btrfs_key_ptr), | ||
| 846 | key, btrfs_header_nritems(eb), | ||
| 847 | slot); | ||
| 848 | } | ||
| 849 | return -1; | ||
| 850 | } | ||
| 851 | |||
| 852 | /* given a node and slot number, this reads the blocks it points to. The | ||
| 853 | * extent buffer is returned with a reference taken (but unlocked). | ||
| 854 | * NULL is returned on error. | ||
| 855 | */ | ||
| 856 | static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, | ||
| 857 | struct extent_buffer *parent, int slot) | ||
| 858 | { | ||
| 859 | int level = btrfs_header_level(parent); | ||
| 860 | if (slot < 0) | ||
| 861 | return NULL; | ||
| 862 | if (slot >= btrfs_header_nritems(parent)) | ||
| 863 | return NULL; | ||
| 864 | |||
| 865 | BUG_ON(level == 0); | ||
| 866 | |||
| 867 | return read_tree_block(root, btrfs_node_blockptr(parent, slot), | ||
| 868 | btrfs_level_size(root, level - 1), | ||
| 869 | btrfs_node_ptr_generation(parent, slot)); | ||
| 870 | } | ||
| 871 | |||
| 872 | /* | ||
| 873 | * node level balancing, used to make sure nodes are in proper order for | ||
| 874 | * item deletion. We balance from the top down, so we have to make sure | ||
| 875 | * that a deletion won't leave an node completely empty later on. | ||
| 876 | */ | ||
| 877 | static noinline int balance_level(struct btrfs_trans_handle *trans, | ||
| 878 | struct btrfs_root *root, | ||
| 879 | struct btrfs_path *path, int level) | ||
| 880 | { | ||
| 881 | struct extent_buffer *right = NULL; | ||
| 882 | struct extent_buffer *mid; | ||
| 883 | struct extent_buffer *left = NULL; | ||
| 884 | struct extent_buffer *parent = NULL; | ||
| 885 | int ret = 0; | ||
| 886 | int wret; | ||
| 887 | int pslot; | ||
| 888 | int orig_slot = path->slots[level]; | ||
| 889 | int err_on_enospc = 0; | ||
| 890 | u64 orig_ptr; | ||
| 891 | |||
| 892 | if (level == 0) | ||
| 893 | return 0; | ||
| 894 | |||
| 895 | mid = path->nodes[level]; | ||
| 896 | WARN_ON(!path->locks[level]); | ||
| 897 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | ||
| 898 | |||
| 899 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | ||
| 900 | |||
| 901 | if (level < BTRFS_MAX_LEVEL - 1) | ||
| 902 | parent = path->nodes[level + 1]; | ||
| 903 | pslot = path->slots[level + 1]; | ||
| 904 | |||
| 905 | /* | ||
| 906 | * deal with the case where there is only one pointer in the root | ||
| 907 | * by promoting the node below to a root | ||
| 908 | */ | ||
| 909 | if (!parent) { | ||
| 910 | struct extent_buffer *child; | ||
| 911 | |||
| 912 | if (btrfs_header_nritems(mid) != 1) | ||
| 913 | return 0; | ||
| 914 | |||
| 915 | /* promote the child to a root */ | ||
| 916 | child = read_node_slot(root, mid, 0); | ||
| 917 | btrfs_tree_lock(child); | ||
| 918 | BUG_ON(!child); | ||
| 919 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); | ||
| 920 | BUG_ON(ret); | ||
| 921 | |||
| 922 | spin_lock(&root->node_lock); | ||
| 923 | root->node = child; | ||
| 924 | spin_unlock(&root->node_lock); | ||
| 925 | |||
| 926 | ret = btrfs_update_extent_ref(trans, root, child->start, | ||
| 927 | mid->start, child->start, | ||
| 928 | root->root_key.objectid, | ||
| 929 | trans->transid, level - 1); | ||
| 930 | BUG_ON(ret); | ||
| 931 | |||
| 932 | add_root_to_dirty_list(root); | ||
| 933 | btrfs_tree_unlock(child); | ||
| 934 | path->locks[level] = 0; | ||
| 935 | path->nodes[level] = NULL; | ||
| 936 | clean_tree_block(trans, root, mid); | ||
| 937 | btrfs_tree_unlock(mid); | ||
| 938 | /* once for the path */ | ||
| 939 | free_extent_buffer(mid); | ||
| 940 | ret = btrfs_free_extent(trans, root, mid->start, mid->len, | ||
| 941 | mid->start, root->root_key.objectid, | ||
| 942 | btrfs_header_generation(mid), | ||
| 943 | level, 1); | ||
| 944 | /* once for the root ptr */ | ||
| 945 | free_extent_buffer(mid); | ||
| 946 | return ret; | ||
| 947 | } | ||
| 948 | if (btrfs_header_nritems(mid) > | ||
| 949 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | ||
| 950 | return 0; | ||
| 951 | |||
| 952 | if (btrfs_header_nritems(mid) < 2) | ||
| 953 | err_on_enospc = 1; | ||
| 954 | |||
| 955 | left = read_node_slot(root, parent, pslot - 1); | ||
| 956 | if (left) { | ||
| 957 | btrfs_tree_lock(left); | ||
| 958 | wret = btrfs_cow_block(trans, root, left, | ||
| 959 | parent, pslot - 1, &left, 0); | ||
| 960 | if (wret) { | ||
| 961 | ret = wret; | ||
| 962 | goto enospc; | ||
| 963 | } | ||
| 964 | } | ||
| 965 | right = read_node_slot(root, parent, pslot + 1); | ||
| 966 | if (right) { | ||
| 967 | btrfs_tree_lock(right); | ||
| 968 | wret = btrfs_cow_block(trans, root, right, | ||
| 969 | parent, pslot + 1, &right, 0); | ||
| 970 | if (wret) { | ||
| 971 | ret = wret; | ||
| 972 | goto enospc; | ||
| 973 | } | ||
| 974 | } | ||
| 975 | |||
| 976 | /* first, try to make some room in the middle buffer */ | ||
| 977 | if (left) { | ||
| 978 | orig_slot += btrfs_header_nritems(left); | ||
| 979 | wret = push_node_left(trans, root, left, mid, 1); | ||
| 980 | if (wret < 0) | ||
| 981 | ret = wret; | ||
| 982 | if (btrfs_header_nritems(mid) < 2) | ||
| 983 | err_on_enospc = 1; | ||
| 984 | } | ||
| 985 | |||
| 986 | /* | ||
| 987 | * then try to empty the right most buffer into the middle | ||
| 988 | */ | ||
| 989 | if (right) { | ||
| 990 | wret = push_node_left(trans, root, mid, right, 1); | ||
| 991 | if (wret < 0 && wret != -ENOSPC) | ||
| 992 | ret = wret; | ||
| 993 | if (btrfs_header_nritems(right) == 0) { | ||
| 994 | u64 bytenr = right->start; | ||
| 995 | u64 generation = btrfs_header_generation(parent); | ||
| 996 | u32 blocksize = right->len; | ||
| 997 | |||
| 998 | clean_tree_block(trans, root, right); | ||
| 999 | btrfs_tree_unlock(right); | ||
| 1000 | free_extent_buffer(right); | ||
| 1001 | right = NULL; | ||
| 1002 | wret = del_ptr(trans, root, path, level + 1, pslot + | ||
| 1003 | 1); | ||
| 1004 | if (wret) | ||
| 1005 | ret = wret; | ||
| 1006 | wret = btrfs_free_extent(trans, root, bytenr, | ||
| 1007 | blocksize, parent->start, | ||
| 1008 | btrfs_header_owner(parent), | ||
| 1009 | generation, level, 1); | ||
| 1010 | if (wret) | ||
| 1011 | ret = wret; | ||
| 1012 | } else { | ||
| 1013 | struct btrfs_disk_key right_key; | ||
| 1014 | btrfs_node_key(right, &right_key, 0); | ||
| 1015 | btrfs_set_node_key(parent, &right_key, pslot + 1); | ||
| 1016 | btrfs_mark_buffer_dirty(parent); | ||
| 1017 | } | ||
| 1018 | } | ||
| 1019 | if (btrfs_header_nritems(mid) == 1) { | ||
| 1020 | /* | ||
| 1021 | * we're not allowed to leave a node with one item in the | ||
| 1022 | * tree during a delete. A deletion from lower in the tree | ||
| 1023 | * could try to delete the only pointer in this node. | ||
| 1024 | * So, pull some keys from the left. | ||
| 1025 | * There has to be a left pointer at this point because | ||
| 1026 | * otherwise we would have pulled some pointers from the | ||
| 1027 | * right | ||
| 1028 | */ | ||
| 1029 | BUG_ON(!left); | ||
| 1030 | wret = balance_node_right(trans, root, mid, left); | ||
| 1031 | if (wret < 0) { | ||
| 1032 | ret = wret; | ||
| 1033 | goto enospc; | ||
| 1034 | } | ||
| 1035 | if (wret == 1) { | ||
| 1036 | wret = push_node_left(trans, root, left, mid, 1); | ||
| 1037 | if (wret < 0) | ||
| 1038 | ret = wret; | ||
| 1039 | } | ||
| 1040 | BUG_ON(wret == 1); | ||
| 1041 | } | ||
| 1042 | if (btrfs_header_nritems(mid) == 0) { | ||
| 1043 | /* we've managed to empty the middle node, drop it */ | ||
| 1044 | u64 root_gen = btrfs_header_generation(parent); | ||
| 1045 | u64 bytenr = mid->start; | ||
| 1046 | u32 blocksize = mid->len; | ||
| 1047 | |||
| 1048 | clean_tree_block(trans, root, mid); | ||
| 1049 | btrfs_tree_unlock(mid); | ||
| 1050 | free_extent_buffer(mid); | ||
| 1051 | mid = NULL; | ||
| 1052 | wret = del_ptr(trans, root, path, level + 1, pslot); | ||
| 1053 | if (wret) | ||
| 1054 | ret = wret; | ||
| 1055 | wret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
| 1056 | parent->start, | ||
| 1057 | btrfs_header_owner(parent), | ||
| 1058 | root_gen, level, 1); | ||
| 1059 | if (wret) | ||
| 1060 | ret = wret; | ||
| 1061 | } else { | ||
| 1062 | /* update the parent key to reflect our changes */ | ||
| 1063 | struct btrfs_disk_key mid_key; | ||
| 1064 | btrfs_node_key(mid, &mid_key, 0); | ||
| 1065 | btrfs_set_node_key(parent, &mid_key, pslot); | ||
| 1066 | btrfs_mark_buffer_dirty(parent); | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | /* update the path */ | ||
| 1070 | if (left) { | ||
| 1071 | if (btrfs_header_nritems(left) > orig_slot) { | ||
| 1072 | extent_buffer_get(left); | ||
| 1073 | /* left was locked after cow */ | ||
| 1074 | path->nodes[level] = left; | ||
| 1075 | path->slots[level + 1] -= 1; | ||
| 1076 | path->slots[level] = orig_slot; | ||
| 1077 | if (mid) { | ||
| 1078 | btrfs_tree_unlock(mid); | ||
| 1079 | free_extent_buffer(mid); | ||
| 1080 | } | ||
| 1081 | } else { | ||
| 1082 | orig_slot -= btrfs_header_nritems(left); | ||
| 1083 | path->slots[level] = orig_slot; | ||
| 1084 | } | ||
| 1085 | } | ||
| 1086 | /* double check we haven't messed things up */ | ||
| 1087 | check_block(root, path, level); | ||
| 1088 | if (orig_ptr != | ||
| 1089 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) | ||
| 1090 | BUG(); | ||
| 1091 | enospc: | ||
| 1092 | if (right) { | ||
| 1093 | btrfs_tree_unlock(right); | ||
| 1094 | free_extent_buffer(right); | ||
| 1095 | } | ||
| 1096 | if (left) { | ||
| 1097 | if (path->nodes[level] != left) | ||
| 1098 | btrfs_tree_unlock(left); | ||
| 1099 | free_extent_buffer(left); | ||
| 1100 | } | ||
| 1101 | return ret; | ||
| 1102 | } | ||
| 1103 | |||
| 1104 | /* Node balancing for insertion. Here we only split or push nodes around | ||
| 1105 | * when they are completely full. This is also done top down, so we | ||
| 1106 | * have to be pessimistic. | ||
| 1107 | */ | ||
| 1108 | static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, | ||
| 1109 | struct btrfs_root *root, | ||
| 1110 | struct btrfs_path *path, int level) | ||
| 1111 | { | ||
| 1112 | struct extent_buffer *right = NULL; | ||
| 1113 | struct extent_buffer *mid; | ||
| 1114 | struct extent_buffer *left = NULL; | ||
| 1115 | struct extent_buffer *parent = NULL; | ||
| 1116 | int ret = 0; | ||
| 1117 | int wret; | ||
| 1118 | int pslot; | ||
| 1119 | int orig_slot = path->slots[level]; | ||
| 1120 | u64 orig_ptr; | ||
| 1121 | |||
| 1122 | if (level == 0) | ||
| 1123 | return 1; | ||
| 1124 | |||
| 1125 | mid = path->nodes[level]; | ||
| 1126 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | ||
| 1127 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | ||
| 1128 | |||
| 1129 | if (level < BTRFS_MAX_LEVEL - 1) | ||
| 1130 | parent = path->nodes[level + 1]; | ||
| 1131 | pslot = path->slots[level + 1]; | ||
| 1132 | |||
| 1133 | if (!parent) | ||
| 1134 | return 1; | ||
| 1135 | |||
| 1136 | left = read_node_slot(root, parent, pslot - 1); | ||
| 1137 | |||
| 1138 | /* first, try to make some room in the middle buffer */ | ||
| 1139 | if (left) { | ||
| 1140 | u32 left_nr; | ||
| 1141 | |||
| 1142 | btrfs_tree_lock(left); | ||
| 1143 | left_nr = btrfs_header_nritems(left); | ||
| 1144 | if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { | ||
| 1145 | wret = 1; | ||
| 1146 | } else { | ||
| 1147 | ret = btrfs_cow_block(trans, root, left, parent, | ||
| 1148 | pslot - 1, &left, 0); | ||
| 1149 | if (ret) | ||
| 1150 | wret = 1; | ||
| 1151 | else { | ||
| 1152 | wret = push_node_left(trans, root, | ||
| 1153 | left, mid, 0); | ||
| 1154 | } | ||
| 1155 | } | ||
| 1156 | if (wret < 0) | ||
| 1157 | ret = wret; | ||
| 1158 | if (wret == 0) { | ||
| 1159 | struct btrfs_disk_key disk_key; | ||
| 1160 | orig_slot += left_nr; | ||
| 1161 | btrfs_node_key(mid, &disk_key, 0); | ||
| 1162 | btrfs_set_node_key(parent, &disk_key, pslot); | ||
| 1163 | btrfs_mark_buffer_dirty(parent); | ||
| 1164 | if (btrfs_header_nritems(left) > orig_slot) { | ||
| 1165 | path->nodes[level] = left; | ||
| 1166 | path->slots[level + 1] -= 1; | ||
| 1167 | path->slots[level] = orig_slot; | ||
| 1168 | btrfs_tree_unlock(mid); | ||
| 1169 | free_extent_buffer(mid); | ||
| 1170 | } else { | ||
| 1171 | orig_slot -= | ||
| 1172 | btrfs_header_nritems(left); | ||
| 1173 | path->slots[level] = orig_slot; | ||
| 1174 | btrfs_tree_unlock(left); | ||
| 1175 | free_extent_buffer(left); | ||
| 1176 | } | ||
| 1177 | return 0; | ||
| 1178 | } | ||
| 1179 | btrfs_tree_unlock(left); | ||
| 1180 | free_extent_buffer(left); | ||
| 1181 | } | ||
| 1182 | right = read_node_slot(root, parent, pslot + 1); | ||
| 1183 | |||
| 1184 | /* | ||
| 1185 | * then try to empty the right most buffer into the middle | ||
| 1186 | */ | ||
| 1187 | if (right) { | ||
| 1188 | u32 right_nr; | ||
| 1189 | btrfs_tree_lock(right); | ||
| 1190 | right_nr = btrfs_header_nritems(right); | ||
| 1191 | if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { | ||
| 1192 | wret = 1; | ||
| 1193 | } else { | ||
| 1194 | ret = btrfs_cow_block(trans, root, right, | ||
| 1195 | parent, pslot + 1, | ||
| 1196 | &right, 0); | ||
| 1197 | if (ret) | ||
| 1198 | wret = 1; | ||
| 1199 | else { | ||
| 1200 | wret = balance_node_right(trans, root, | ||
| 1201 | right, mid); | ||
| 1202 | } | ||
| 1203 | } | ||
| 1204 | if (wret < 0) | ||
| 1205 | ret = wret; | ||
| 1206 | if (wret == 0) { | ||
| 1207 | struct btrfs_disk_key disk_key; | ||
| 1208 | |||
| 1209 | btrfs_node_key(right, &disk_key, 0); | ||
| 1210 | btrfs_set_node_key(parent, &disk_key, pslot + 1); | ||
| 1211 | btrfs_mark_buffer_dirty(parent); | ||
| 1212 | |||
| 1213 | if (btrfs_header_nritems(mid) <= orig_slot) { | ||
| 1214 | path->nodes[level] = right; | ||
| 1215 | path->slots[level + 1] += 1; | ||
| 1216 | path->slots[level] = orig_slot - | ||
| 1217 | btrfs_header_nritems(mid); | ||
| 1218 | btrfs_tree_unlock(mid); | ||
| 1219 | free_extent_buffer(mid); | ||
| 1220 | } else { | ||
| 1221 | btrfs_tree_unlock(right); | ||
| 1222 | free_extent_buffer(right); | ||
| 1223 | } | ||
| 1224 | return 0; | ||
| 1225 | } | ||
| 1226 | btrfs_tree_unlock(right); | ||
| 1227 | free_extent_buffer(right); | ||
| 1228 | } | ||
| 1229 | return 1; | ||
| 1230 | } | ||
| 1231 | |||
| 1232 | /* | ||
| 1233 | * readahead one full node of leaves, finding things that are close | ||
| 1234 | * to the block in 'slot', and triggering ra on them. | ||
| 1235 | */ | ||
| 1236 | static noinline void reada_for_search(struct btrfs_root *root, | ||
| 1237 | struct btrfs_path *path, | ||
| 1238 | int level, int slot, u64 objectid) | ||
| 1239 | { | ||
| 1240 | struct extent_buffer *node; | ||
| 1241 | struct btrfs_disk_key disk_key; | ||
| 1242 | u32 nritems; | ||
| 1243 | u64 search; | ||
| 1244 | u64 lowest_read; | ||
| 1245 | u64 highest_read; | ||
| 1246 | u64 nread = 0; | ||
| 1247 | int direction = path->reada; | ||
| 1248 | struct extent_buffer *eb; | ||
| 1249 | u32 nr; | ||
| 1250 | u32 blocksize; | ||
| 1251 | u32 nscan = 0; | ||
| 1252 | |||
| 1253 | if (level != 1) | ||
| 1254 | return; | ||
| 1255 | |||
| 1256 | if (!path->nodes[level]) | ||
| 1257 | return; | ||
| 1258 | |||
| 1259 | node = path->nodes[level]; | ||
| 1260 | |||
| 1261 | search = btrfs_node_blockptr(node, slot); | ||
| 1262 | blocksize = btrfs_level_size(root, level - 1); | ||
| 1263 | eb = btrfs_find_tree_block(root, search, blocksize); | ||
| 1264 | if (eb) { | ||
| 1265 | free_extent_buffer(eb); | ||
| 1266 | return; | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | highest_read = search; | ||
| 1270 | lowest_read = search; | ||
| 1271 | |||
| 1272 | nritems = btrfs_header_nritems(node); | ||
| 1273 | nr = slot; | ||
| 1274 | while(1) { | ||
| 1275 | if (direction < 0) { | ||
| 1276 | if (nr == 0) | ||
| 1277 | break; | ||
| 1278 | nr--; | ||
| 1279 | } else if (direction > 0) { | ||
| 1280 | nr++; | ||
| 1281 | if (nr >= nritems) | ||
| 1282 | break; | ||
| 1283 | } | ||
| 1284 | if (path->reada < 0 && objectid) { | ||
| 1285 | btrfs_node_key(node, &disk_key, nr); | ||
| 1286 | if (btrfs_disk_key_objectid(&disk_key) != objectid) | ||
| 1287 | break; | ||
| 1288 | } | ||
| 1289 | search = btrfs_node_blockptr(node, nr); | ||
| 1290 | if ((search >= lowest_read && search <= highest_read) || | ||
| 1291 | (search < lowest_read && lowest_read - search <= 32768) || | ||
| 1292 | (search > highest_read && search - highest_read <= 32768)) { | ||
| 1293 | readahead_tree_block(root, search, blocksize, | ||
| 1294 | btrfs_node_ptr_generation(node, nr)); | ||
| 1295 | nread += blocksize; | ||
| 1296 | } | ||
| 1297 | nscan++; | ||
| 1298 | if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32)) | ||
| 1299 | break; | ||
| 1300 | if(nread > (1024 * 1024) || nscan > 128) | ||
| 1301 | break; | ||
| 1302 | |||
| 1303 | if (search < lowest_read) | ||
| 1304 | lowest_read = search; | ||
| 1305 | if (search > highest_read) | ||
| 1306 | highest_read = search; | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | /* | ||
| 1311 | * when we walk down the tree, it is usually safe to unlock the higher layers in | ||
| 1312 | * the tree. The exceptions are when our path goes through slot 0, because operations | ||
| 1313 | * on the tree might require changing key pointers higher up in the tree. | ||
| 1314 | * | ||
| 1315 | * callers might also have set path->keep_locks, which tells this code to | ||
| 1316 | * keep the lock if the path points to the last slot in the block. This is | ||
| 1317 | * part of walking through the tree, and selecting the next slot in the higher | ||
| 1318 | * block. | ||
| 1319 | * | ||
| 1320 | * lowest_unlock sets the lowest level in the tree we're allowed to unlock. | ||
| 1321 | * so if lowest_unlock is 1, level 0 won't be unlocked | ||
| 1322 | */ | ||
| 1323 | static noinline void unlock_up(struct btrfs_path *path, int level, | ||
| 1324 | int lowest_unlock) | ||
| 1325 | { | ||
| 1326 | int i; | ||
| 1327 | int skip_level = level; | ||
| 1328 | int no_skips = 0; | ||
| 1329 | struct extent_buffer *t; | ||
| 1330 | |||
| 1331 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | ||
| 1332 | if (!path->nodes[i]) | ||
| 1333 | break; | ||
| 1334 | if (!path->locks[i]) | ||
| 1335 | break; | ||
| 1336 | if (!no_skips && path->slots[i] == 0) { | ||
| 1337 | skip_level = i + 1; | ||
| 1338 | continue; | ||
| 1339 | } | ||
| 1340 | if (!no_skips && path->keep_locks) { | ||
| 1341 | u32 nritems; | ||
| 1342 | t = path->nodes[i]; | ||
| 1343 | nritems = btrfs_header_nritems(t); | ||
| 1344 | if (nritems < 1 || path->slots[i] >= nritems - 1) { | ||
| 1345 | skip_level = i + 1; | ||
| 1346 | continue; | ||
| 1347 | } | ||
| 1348 | } | ||
| 1349 | if (skip_level < i && i >= lowest_unlock) | ||
| 1350 | no_skips = 1; | ||
| 1351 | |||
| 1352 | t = path->nodes[i]; | ||
| 1353 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { | ||
| 1354 | btrfs_tree_unlock(t); | ||
| 1355 | path->locks[i] = 0; | ||
| 1356 | } | ||
| 1357 | } | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | /* | ||
| 1361 | * look for key in the tree. path is filled in with nodes along the way | ||
| 1362 | * if key is found, we return zero and you can find the item in the leaf | ||
| 1363 | * level of the path (level 0) | ||
| 1364 | * | ||
| 1365 | * If the key isn't found, the path points to the slot where it should | ||
| 1366 | * be inserted, and 1 is returned. If there are other errors during the | ||
| 1367 | * search a negative error number is returned. | ||
| 1368 | * | ||
| 1369 | * if ins_len > 0, nodes and leaves will be split as we walk down the | ||
| 1370 | * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if | ||
| 1371 | * possible) | ||
| 1372 | */ | ||
| 1373 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1374 | *root, struct btrfs_key *key, struct btrfs_path *p, int | ||
| 1375 | ins_len, int cow) | ||
| 1376 | { | ||
| 1377 | struct extent_buffer *b; | ||
| 1378 | struct extent_buffer *tmp; | ||
| 1379 | int slot; | ||
| 1380 | int ret; | ||
| 1381 | int level; | ||
| 1382 | int should_reada = p->reada; | ||
| 1383 | int lowest_unlock = 1; | ||
| 1384 | int blocksize; | ||
| 1385 | u8 lowest_level = 0; | ||
| 1386 | u64 blocknr; | ||
| 1387 | u64 gen; | ||
| 1388 | struct btrfs_key prealloc_block; | ||
| 1389 | |||
| 1390 | lowest_level = p->lowest_level; | ||
| 1391 | WARN_ON(lowest_level && ins_len > 0); | ||
| 1392 | WARN_ON(p->nodes[0] != NULL); | ||
| 1393 | WARN_ON(cow && root == root->fs_info->extent_root && | ||
| 1394 | !mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 1395 | if (ins_len < 0) | ||
| 1396 | lowest_unlock = 2; | ||
| 1397 | |||
| 1398 | prealloc_block.objectid = 0; | ||
| 1399 | |||
| 1400 | again: | ||
| 1401 | if (p->skip_locking) | ||
| 1402 | b = btrfs_root_node(root); | ||
| 1403 | else | ||
| 1404 | b = btrfs_lock_root_node(root); | ||
| 1405 | |||
| 1406 | while (b) { | ||
| 1407 | level = btrfs_header_level(b); | ||
| 1408 | |||
| 1409 | /* | ||
| 1410 | * setup the path here so we can release it under lock | ||
| 1411 | * contention with the cow code | ||
| 1412 | */ | ||
| 1413 | p->nodes[level] = b; | ||
| 1414 | if (!p->skip_locking) | ||
| 1415 | p->locks[level] = 1; | ||
| 1416 | |||
| 1417 | if (cow) { | ||
| 1418 | int wret; | ||
| 1419 | |||
| 1420 | /* is a cow on this block not required */ | ||
| 1421 | spin_lock(&root->fs_info->hash_lock); | ||
| 1422 | if (btrfs_header_generation(b) == trans->transid && | ||
| 1423 | btrfs_header_owner(b) == root->root_key.objectid && | ||
| 1424 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
| 1425 | spin_unlock(&root->fs_info->hash_lock); | ||
| 1426 | goto cow_done; | ||
| 1427 | } | ||
| 1428 | spin_unlock(&root->fs_info->hash_lock); | ||
| 1429 | |||
| 1430 | /* ok, we have to cow, is our old prealloc the right | ||
| 1431 | * size? | ||
| 1432 | */ | ||
| 1433 | if (prealloc_block.objectid && | ||
| 1434 | prealloc_block.offset != b->len) { | ||
| 1435 | btrfs_free_reserved_extent(root, | ||
| 1436 | prealloc_block.objectid, | ||
| 1437 | prealloc_block.offset); | ||
| 1438 | prealloc_block.objectid = 0; | ||
| 1439 | } | ||
| 1440 | |||
| 1441 | /* | ||
| 1442 | * for higher level blocks, try not to allocate blocks | ||
| 1443 | * with the block and the parent locks held. | ||
| 1444 | */ | ||
| 1445 | if (level > 1 && !prealloc_block.objectid && | ||
| 1446 | btrfs_path_lock_waiting(p, level)) { | ||
| 1447 | u32 size = b->len; | ||
| 1448 | u64 hint = b->start; | ||
| 1449 | |||
| 1450 | btrfs_release_path(root, p); | ||
| 1451 | ret = btrfs_reserve_extent(trans, root, | ||
| 1452 | size, size, 0, | ||
| 1453 | hint, (u64)-1, | ||
| 1454 | &prealloc_block, 0); | ||
| 1455 | BUG_ON(ret); | ||
| 1456 | goto again; | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | wret = btrfs_cow_block(trans, root, b, | ||
| 1460 | p->nodes[level + 1], | ||
| 1461 | p->slots[level + 1], | ||
| 1462 | &b, prealloc_block.objectid); | ||
| 1463 | prealloc_block.objectid = 0; | ||
| 1464 | if (wret) { | ||
| 1465 | free_extent_buffer(b); | ||
| 1466 | ret = wret; | ||
| 1467 | goto done; | ||
| 1468 | } | ||
| 1469 | } | ||
| 1470 | cow_done: | ||
| 1471 | BUG_ON(!cow && ins_len); | ||
| 1472 | if (level != btrfs_header_level(b)) | ||
| 1473 | WARN_ON(1); | ||
| 1474 | level = btrfs_header_level(b); | ||
| 1475 | |||
| 1476 | p->nodes[level] = b; | ||
| 1477 | if (!p->skip_locking) | ||
| 1478 | p->locks[level] = 1; | ||
| 1479 | |||
| 1480 | ret = check_block(root, p, level); | ||
| 1481 | if (ret) { | ||
| 1482 | ret = -1; | ||
| 1483 | goto done; | ||
| 1484 | } | ||
| 1485 | |||
| 1486 | ret = bin_search(b, key, level, &slot); | ||
| 1487 | if (level != 0) { | ||
| 1488 | if (ret && slot > 0) | ||
| 1489 | slot -= 1; | ||
| 1490 | p->slots[level] = slot; | ||
| 1491 | if (ins_len > 0 && btrfs_header_nritems(b) >= | ||
| 1492 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | ||
| 1493 | int sret = split_node(trans, root, p, level); | ||
| 1494 | BUG_ON(sret > 0); | ||
| 1495 | if (sret) { | ||
| 1496 | ret = sret; | ||
| 1497 | goto done; | ||
| 1498 | } | ||
| 1499 | b = p->nodes[level]; | ||
| 1500 | slot = p->slots[level]; | ||
| 1501 | } else if (ins_len < 0) { | ||
| 1502 | int sret = balance_level(trans, root, p, | ||
| 1503 | level); | ||
| 1504 | if (sret) { | ||
| 1505 | ret = sret; | ||
| 1506 | goto done; | ||
| 1507 | } | ||
| 1508 | b = p->nodes[level]; | ||
| 1509 | if (!b) { | ||
| 1510 | btrfs_release_path(NULL, p); | ||
| 1511 | goto again; | ||
| 1512 | } | ||
| 1513 | slot = p->slots[level]; | ||
| 1514 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
| 1515 | } | ||
| 1516 | unlock_up(p, level, lowest_unlock); | ||
| 1517 | |||
| 1518 | /* this is only true while dropping a snapshot */ | ||
| 1519 | if (level == lowest_level) { | ||
| 1520 | ret = 0; | ||
| 1521 | goto done; | ||
| 1522 | } | ||
| 1523 | |||
| 1524 | blocknr = btrfs_node_blockptr(b, slot); | ||
| 1525 | gen = btrfs_node_ptr_generation(b, slot); | ||
| 1526 | blocksize = btrfs_level_size(root, level - 1); | ||
| 1527 | |||
| 1528 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
| 1529 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
| 1530 | b = tmp; | ||
| 1531 | } else { | ||
| 1532 | /* | ||
| 1533 | * reduce lock contention at high levels | ||
| 1534 | * of the btree by dropping locks before | ||
| 1535 | * we read. | ||
| 1536 | */ | ||
| 1537 | if (level > 1) { | ||
| 1538 | btrfs_release_path(NULL, p); | ||
| 1539 | if (tmp) | ||
| 1540 | free_extent_buffer(tmp); | ||
| 1541 | if (should_reada) | ||
| 1542 | reada_for_search(root, p, | ||
| 1543 | level, slot, | ||
| 1544 | key->objectid); | ||
| 1545 | |||
| 1546 | tmp = read_tree_block(root, blocknr, | ||
| 1547 | blocksize, gen); | ||
| 1548 | if (tmp) | ||
| 1549 | free_extent_buffer(tmp); | ||
| 1550 | goto again; | ||
| 1551 | } else { | ||
| 1552 | if (tmp) | ||
| 1553 | free_extent_buffer(tmp); | ||
| 1554 | if (should_reada) | ||
| 1555 | reada_for_search(root, p, | ||
| 1556 | level, slot, | ||
| 1557 | key->objectid); | ||
| 1558 | b = read_node_slot(root, b, slot); | ||
| 1559 | } | ||
| 1560 | } | ||
| 1561 | if (!p->skip_locking) | ||
| 1562 | btrfs_tree_lock(b); | ||
| 1563 | } else { | ||
| 1564 | p->slots[level] = slot; | ||
| 1565 | if (ins_len > 0 && btrfs_leaf_free_space(root, b) < | ||
| 1566 | sizeof(struct btrfs_item) + ins_len) { | ||
| 1567 | int sret = split_leaf(trans, root, key, | ||
| 1568 | p, ins_len, ret == 0); | ||
| 1569 | BUG_ON(sret > 0); | ||
| 1570 | if (sret) { | ||
| 1571 | ret = sret; | ||
| 1572 | goto done; | ||
| 1573 | } | ||
| 1574 | } | ||
| 1575 | unlock_up(p, level, lowest_unlock); | ||
| 1576 | goto done; | ||
| 1577 | } | ||
| 1578 | } | ||
| 1579 | ret = 1; | ||
| 1580 | done: | ||
| 1581 | if (prealloc_block.objectid) { | ||
| 1582 | btrfs_free_reserved_extent(root, | ||
| 1583 | prealloc_block.objectid, | ||
| 1584 | prealloc_block.offset); | ||
| 1585 | } | ||
| 1586 | |||
| 1587 | return ret; | ||
| 1588 | } | ||
| 1589 | |||
| 1590 | int btrfs_merge_path(struct btrfs_trans_handle *trans, | ||
| 1591 | struct btrfs_root *root, | ||
| 1592 | struct btrfs_key *node_keys, | ||
| 1593 | u64 *nodes, int lowest_level) | ||
| 1594 | { | ||
| 1595 | struct extent_buffer *eb; | ||
| 1596 | struct extent_buffer *parent; | ||
| 1597 | struct btrfs_key key; | ||
| 1598 | u64 bytenr; | ||
| 1599 | u64 generation; | ||
| 1600 | u32 blocksize; | ||
| 1601 | int level; | ||
| 1602 | int slot; | ||
| 1603 | int key_match; | ||
| 1604 | int ret; | ||
| 1605 | |||
| 1606 | eb = btrfs_lock_root_node(root); | ||
| 1607 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); | ||
| 1608 | BUG_ON(ret); | ||
| 1609 | |||
| 1610 | parent = eb; | ||
| 1611 | while (1) { | ||
| 1612 | level = btrfs_header_level(parent); | ||
| 1613 | if (level == 0 || level <= lowest_level) | ||
| 1614 | break; | ||
| 1615 | |||
| 1616 | ret = bin_search(parent, &node_keys[lowest_level], level, | ||
| 1617 | &slot); | ||
| 1618 | if (ret && slot > 0) | ||
| 1619 | slot--; | ||
| 1620 | |||
| 1621 | bytenr = btrfs_node_blockptr(parent, slot); | ||
| 1622 | if (nodes[level - 1] == bytenr) | ||
| 1623 | break; | ||
| 1624 | |||
| 1625 | blocksize = btrfs_level_size(root, level - 1); | ||
| 1626 | generation = btrfs_node_ptr_generation(parent, slot); | ||
| 1627 | btrfs_node_key_to_cpu(eb, &key, slot); | ||
| 1628 | key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); | ||
| 1629 | |||
| 1630 | /* | ||
| 1631 | * if node keys match and node pointer hasn't been modified | ||
| 1632 | * in the running transaction, we can merge the path. for | ||
| 1633 | * reloc trees, the node pointer check is skipped, this is | ||
| 1634 | * because the reloc trees are fully controlled by the space | ||
| 1635 | * balance code, no one else can modify them. | ||
| 1636 | */ | ||
| 1637 | if (!nodes[level - 1] || !key_match || | ||
| 1638 | (generation == trans->transid && | ||
| 1639 | root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { | ||
| 1640 | next_level: | ||
| 1641 | if (level == 1 || level == lowest_level + 1) | ||
| 1642 | break; | ||
| 1643 | |||
| 1644 | eb = read_tree_block(root, bytenr, blocksize, | ||
| 1645 | generation); | ||
| 1646 | btrfs_tree_lock(eb); | ||
| 1647 | |||
| 1648 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | ||
| 1649 | &eb, 0); | ||
| 1650 | BUG_ON(ret); | ||
| 1651 | |||
| 1652 | btrfs_tree_unlock(parent); | ||
| 1653 | free_extent_buffer(parent); | ||
| 1654 | parent = eb; | ||
| 1655 | continue; | ||
| 1656 | } | ||
| 1657 | |||
| 1658 | if (generation == trans->transid) { | ||
| 1659 | u32 refs; | ||
| 1660 | BUG_ON(btrfs_header_owner(eb) != | ||
| 1661 | BTRFS_TREE_RELOC_OBJECTID); | ||
| 1662 | /* | ||
| 1663 | * lock the block to keep __btrfs_cow_block from | ||
| 1664 | * changing the reference count. | ||
| 1665 | */ | ||
| 1666 | eb = read_tree_block(root, bytenr, blocksize, | ||
| 1667 | generation); | ||
| 1668 | btrfs_tree_lock(eb); | ||
| 1669 | |||
| 1670 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, | ||
| 1671 | blocksize, &refs); | ||
| 1672 | BUG_ON(ret); | ||
| 1673 | /* | ||
| 1674 | * if replace block whose reference count is one, | ||
| 1675 | * we have to "drop the subtree". so skip it for | ||
| 1676 | * simplicity | ||
| 1677 | */ | ||
| 1678 | if (refs == 1) { | ||
| 1679 | btrfs_tree_unlock(eb); | ||
| 1680 | free_extent_buffer(eb); | ||
| 1681 | goto next_level; | ||
| 1682 | } | ||
| 1683 | } | ||
| 1684 | |||
| 1685 | btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); | ||
| 1686 | btrfs_set_node_ptr_generation(parent, slot, trans->transid); | ||
| 1687 | btrfs_mark_buffer_dirty(parent); | ||
| 1688 | |||
| 1689 | ret = btrfs_inc_extent_ref(trans, root, | ||
| 1690 | nodes[level - 1], | ||
| 1691 | blocksize, parent->start, | ||
| 1692 | btrfs_header_owner(parent), | ||
| 1693 | btrfs_header_generation(parent), | ||
| 1694 | level - 1); | ||
| 1695 | BUG_ON(ret); | ||
| 1696 | ret = btrfs_free_extent(trans, root, bytenr, | ||
| 1697 | blocksize, parent->start, | ||
| 1698 | btrfs_header_owner(parent), | ||
| 1699 | btrfs_header_generation(parent), | ||
| 1700 | level - 1, 1); | ||
| 1701 | BUG_ON(ret); | ||
| 1702 | |||
| 1703 | if (generation == trans->transid) { | ||
| 1704 | btrfs_tree_unlock(eb); | ||
| 1705 | free_extent_buffer(eb); | ||
| 1706 | } | ||
| 1707 | break; | ||
| 1708 | } | ||
| 1709 | btrfs_tree_unlock(parent); | ||
| 1710 | free_extent_buffer(parent); | ||
| 1711 | return 0; | ||
| 1712 | } | ||
| 1713 | |||
| 1714 | /* | ||
| 1715 | * adjust the pointers going up the tree, starting at level | ||
| 1716 | * making sure the right key of each node is points to 'key'. | ||
| 1717 | * This is used after shifting pointers to the left, so it stops | ||
| 1718 | * fixing up pointers when a given leaf/node is not in slot 0 of the | ||
| 1719 | * higher levels | ||
| 1720 | * | ||
| 1721 | * If this fails to write a tree block, it returns -1, but continues | ||
| 1722 | * fixing up the blocks in ram so the tree is consistent. | ||
| 1723 | */ | ||
| 1724 | static int fixup_low_keys(struct btrfs_trans_handle *trans, | ||
| 1725 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 1726 | struct btrfs_disk_key *key, int level) | ||
| 1727 | { | ||
| 1728 | int i; | ||
| 1729 | int ret = 0; | ||
| 1730 | struct extent_buffer *t; | ||
| 1731 | |||
| 1732 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | ||
| 1733 | int tslot = path->slots[i]; | ||
| 1734 | if (!path->nodes[i]) | ||
| 1735 | break; | ||
| 1736 | t = path->nodes[i]; | ||
| 1737 | btrfs_set_node_key(t, key, tslot); | ||
| 1738 | btrfs_mark_buffer_dirty(path->nodes[i]); | ||
| 1739 | if (tslot != 0) | ||
| 1740 | break; | ||
| 1741 | } | ||
| 1742 | return ret; | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | /* | ||
| 1746 | * update item key. | ||
| 1747 | * | ||
| 1748 | * This function isn't completely safe. It's the caller's responsibility | ||
| 1749 | * that the new key won't break the order | ||
| 1750 | */ | ||
| 1751 | int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, | ||
| 1752 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 1753 | struct btrfs_key *new_key) | ||
| 1754 | { | ||
| 1755 | struct btrfs_disk_key disk_key; | ||
| 1756 | struct extent_buffer *eb; | ||
| 1757 | int slot; | ||
| 1758 | |||
| 1759 | eb = path->nodes[0]; | ||
| 1760 | slot = path->slots[0]; | ||
| 1761 | if (slot > 0) { | ||
| 1762 | btrfs_item_key(eb, &disk_key, slot - 1); | ||
| 1763 | if (comp_keys(&disk_key, new_key) >= 0) | ||
| 1764 | return -1; | ||
| 1765 | } | ||
| 1766 | if (slot < btrfs_header_nritems(eb) - 1) { | ||
| 1767 | btrfs_item_key(eb, &disk_key, slot + 1); | ||
| 1768 | if (comp_keys(&disk_key, new_key) <= 0) | ||
| 1769 | return -1; | ||
| 1770 | } | ||
| 1771 | |||
| 1772 | btrfs_cpu_key_to_disk(&disk_key, new_key); | ||
| 1773 | btrfs_set_item_key(eb, &disk_key, slot); | ||
| 1774 | btrfs_mark_buffer_dirty(eb); | ||
| 1775 | if (slot == 0) | ||
| 1776 | fixup_low_keys(trans, root, path, &disk_key, 1); | ||
| 1777 | return 0; | ||
| 1778 | } | ||
| 1779 | |||
| 1780 | /* | ||
| 1781 | * try to push data from one node into the next node left in the | ||
| 1782 | * tree. | ||
| 1783 | * | ||
| 1784 | * returns 0 if some ptrs were pushed left, < 0 if there was some horrible | ||
| 1785 | * error, and > 0 if there was no room in the left hand block. | ||
| 1786 | */ | ||
| 1787 | static int push_node_left(struct btrfs_trans_handle *trans, | ||
| 1788 | struct btrfs_root *root, struct extent_buffer *dst, | ||
| 1789 | struct extent_buffer *src, int empty) | ||
| 1790 | { | ||
| 1791 | int push_items = 0; | ||
| 1792 | int src_nritems; | ||
| 1793 | int dst_nritems; | ||
| 1794 | int ret = 0; | ||
| 1795 | |||
| 1796 | src_nritems = btrfs_header_nritems(src); | ||
| 1797 | dst_nritems = btrfs_header_nritems(dst); | ||
| 1798 | push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; | ||
| 1799 | WARN_ON(btrfs_header_generation(src) != trans->transid); | ||
| 1800 | WARN_ON(btrfs_header_generation(dst) != trans->transid); | ||
| 1801 | |||
| 1802 | if (!empty && src_nritems <= 8) | ||
| 1803 | return 1; | ||
| 1804 | |||
| 1805 | if (push_items <= 0) { | ||
| 1806 | return 1; | ||
| 1807 | } | ||
| 1808 | |||
| 1809 | if (empty) { | ||
| 1810 | push_items = min(src_nritems, push_items); | ||
| 1811 | if (push_items < src_nritems) { | ||
| 1812 | /* leave at least 8 pointers in the node if | ||
| 1813 | * we aren't going to empty it | ||
| 1814 | */ | ||
| 1815 | if (src_nritems - push_items < 8) { | ||
| 1816 | if (push_items <= 8) | ||
| 1817 | return 1; | ||
| 1818 | push_items -= 8; | ||
| 1819 | } | ||
| 1820 | } | ||
| 1821 | } else | ||
| 1822 | push_items = min(src_nritems - 8, push_items); | ||
| 1823 | |||
| 1824 | copy_extent_buffer(dst, src, | ||
| 1825 | btrfs_node_key_ptr_offset(dst_nritems), | ||
| 1826 | btrfs_node_key_ptr_offset(0), | ||
| 1827 | push_items * sizeof(struct btrfs_key_ptr)); | ||
| 1828 | |||
| 1829 | if (push_items < src_nritems) { | ||
| 1830 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), | ||
| 1831 | btrfs_node_key_ptr_offset(push_items), | ||
| 1832 | (src_nritems - push_items) * | ||
| 1833 | sizeof(struct btrfs_key_ptr)); | ||
| 1834 | } | ||
| 1835 | btrfs_set_header_nritems(src, src_nritems - push_items); | ||
| 1836 | btrfs_set_header_nritems(dst, dst_nritems + push_items); | ||
| 1837 | btrfs_mark_buffer_dirty(src); | ||
| 1838 | btrfs_mark_buffer_dirty(dst); | ||
| 1839 | |||
| 1840 | ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items); | ||
| 1841 | BUG_ON(ret); | ||
| 1842 | |||
| 1843 | return ret; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | /* | ||
| 1847 | * try to push data from one node into the next node right in the | ||
| 1848 | * tree. | ||
| 1849 | * | ||
| 1850 | * returns 0 if some ptrs were pushed, < 0 if there was some horrible | ||
| 1851 | * error, and > 0 if there was no room in the right hand block. | ||
| 1852 | * | ||
| 1853 | * this will only push up to 1/2 the contents of the left node over | ||
| 1854 | */ | ||
| 1855 | static int balance_node_right(struct btrfs_trans_handle *trans, | ||
| 1856 | struct btrfs_root *root, | ||
| 1857 | struct extent_buffer *dst, | ||
| 1858 | struct extent_buffer *src) | ||
| 1859 | { | ||
| 1860 | int push_items = 0; | ||
| 1861 | int max_push; | ||
| 1862 | int src_nritems; | ||
| 1863 | int dst_nritems; | ||
| 1864 | int ret = 0; | ||
| 1865 | |||
| 1866 | WARN_ON(btrfs_header_generation(src) != trans->transid); | ||
| 1867 | WARN_ON(btrfs_header_generation(dst) != trans->transid); | ||
| 1868 | |||
| 1869 | src_nritems = btrfs_header_nritems(src); | ||
| 1870 | dst_nritems = btrfs_header_nritems(dst); | ||
| 1871 | push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; | ||
| 1872 | if (push_items <= 0) { | ||
| 1873 | return 1; | ||
| 1874 | } | ||
| 1875 | |||
| 1876 | if (src_nritems < 4) { | ||
| 1877 | return 1; | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | max_push = src_nritems / 2 + 1; | ||
| 1881 | /* don't try to empty the node */ | ||
| 1882 | if (max_push >= src_nritems) { | ||
| 1883 | return 1; | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | if (max_push < push_items) | ||
| 1887 | push_items = max_push; | ||
| 1888 | |||
| 1889 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), | ||
| 1890 | btrfs_node_key_ptr_offset(0), | ||
| 1891 | (dst_nritems) * | ||
| 1892 | sizeof(struct btrfs_key_ptr)); | ||
| 1893 | |||
| 1894 | copy_extent_buffer(dst, src, | ||
| 1895 | btrfs_node_key_ptr_offset(0), | ||
| 1896 | btrfs_node_key_ptr_offset(src_nritems - push_items), | ||
| 1897 | push_items * sizeof(struct btrfs_key_ptr)); | ||
| 1898 | |||
| 1899 | btrfs_set_header_nritems(src, src_nritems - push_items); | ||
| 1900 | btrfs_set_header_nritems(dst, dst_nritems + push_items); | ||
| 1901 | |||
| 1902 | btrfs_mark_buffer_dirty(src); | ||
| 1903 | btrfs_mark_buffer_dirty(dst); | ||
| 1904 | |||
| 1905 | ret = btrfs_update_ref(trans, root, src, dst, 0, push_items); | ||
| 1906 | BUG_ON(ret); | ||
| 1907 | |||
| 1908 | return ret; | ||
| 1909 | } | ||
| 1910 | |||
| 1911 | /* | ||
| 1912 | * helper function to insert a new root level in the tree. | ||
| 1913 | * A new node is allocated, and a single item is inserted to | ||
| 1914 | * point to the existing root | ||
| 1915 | * | ||
| 1916 | * returns zero on success or < 0 on failure. | ||
| 1917 | */ | ||
| 1918 | static int noinline insert_new_root(struct btrfs_trans_handle *trans, | ||
| 1919 | struct btrfs_root *root, | ||
| 1920 | struct btrfs_path *path, int level) | ||
| 1921 | { | ||
| 1922 | u64 lower_gen; | ||
| 1923 | struct extent_buffer *lower; | ||
| 1924 | struct extent_buffer *c; | ||
| 1925 | struct extent_buffer *old; | ||
| 1926 | struct btrfs_disk_key lower_key; | ||
| 1927 | int ret; | ||
| 1928 | |||
| 1929 | BUG_ON(path->nodes[level]); | ||
| 1930 | BUG_ON(path->nodes[level-1] != root->node); | ||
| 1931 | |||
| 1932 | lower = path->nodes[level-1]; | ||
| 1933 | if (level == 1) | ||
| 1934 | btrfs_item_key(lower, &lower_key, 0); | ||
| 1935 | else | ||
| 1936 | btrfs_node_key(lower, &lower_key, 0); | ||
| 1937 | |||
| 1938 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | ||
| 1939 | root->root_key.objectid, trans->transid, | ||
| 1940 | level, root->node->start, 0); | ||
| 1941 | if (IS_ERR(c)) | ||
| 1942 | return PTR_ERR(c); | ||
| 1943 | |||
| 1944 | memset_extent_buffer(c, 0, 0, root->nodesize); | ||
| 1945 | btrfs_set_header_nritems(c, 1); | ||
| 1946 | btrfs_set_header_level(c, level); | ||
| 1947 | btrfs_set_header_bytenr(c, c->start); | ||
| 1948 | btrfs_set_header_generation(c, trans->transid); | ||
| 1949 | btrfs_set_header_owner(c, root->root_key.objectid); | ||
| 1950 | |||
| 1951 | write_extent_buffer(c, root->fs_info->fsid, | ||
| 1952 | (unsigned long)btrfs_header_fsid(c), | ||
| 1953 | BTRFS_FSID_SIZE); | ||
| 1954 | |||
| 1955 | write_extent_buffer(c, root->fs_info->chunk_tree_uuid, | ||
| 1956 | (unsigned long)btrfs_header_chunk_tree_uuid(c), | ||
| 1957 | BTRFS_UUID_SIZE); | ||
| 1958 | |||
| 1959 | btrfs_set_node_key(c, &lower_key, 0); | ||
| 1960 | btrfs_set_node_blockptr(c, 0, lower->start); | ||
| 1961 | lower_gen = btrfs_header_generation(lower); | ||
| 1962 | WARN_ON(lower_gen != trans->transid); | ||
| 1963 | |||
| 1964 | btrfs_set_node_ptr_generation(c, 0, lower_gen); | ||
| 1965 | |||
| 1966 | btrfs_mark_buffer_dirty(c); | ||
| 1967 | |||
| 1968 | spin_lock(&root->node_lock); | ||
| 1969 | old = root->node; | ||
| 1970 | root->node = c; | ||
| 1971 | spin_unlock(&root->node_lock); | ||
| 1972 | |||
| 1973 | ret = btrfs_update_extent_ref(trans, root, lower->start, | ||
| 1974 | lower->start, c->start, | ||
| 1975 | root->root_key.objectid, | ||
| 1976 | trans->transid, level - 1); | ||
| 1977 | BUG_ON(ret); | ||
| 1978 | |||
| 1979 | /* the super has an extra ref to root->node */ | ||
| 1980 | free_extent_buffer(old); | ||
| 1981 | |||
| 1982 | add_root_to_dirty_list(root); | ||
| 1983 | extent_buffer_get(c); | ||
| 1984 | path->nodes[level] = c; | ||
| 1985 | path->locks[level] = 1; | ||
| 1986 | path->slots[level] = 0; | ||
| 1987 | return 0; | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | /* | ||
| 1991 | * worker function to insert a single pointer in a node. | ||
| 1992 | * the node should have enough room for the pointer already | ||
| 1993 | * | ||
| 1994 | * slot and level indicate where you want the key to go, and | ||
| 1995 | * blocknr is the block the key points to. | ||
| 1996 | * | ||
| 1997 | * returns zero on success and < 0 on any error | ||
| 1998 | */ | ||
| 1999 | static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 2000 | *root, struct btrfs_path *path, struct btrfs_disk_key | ||
| 2001 | *key, u64 bytenr, int slot, int level) | ||
| 2002 | { | ||
| 2003 | struct extent_buffer *lower; | ||
| 2004 | int nritems; | ||
| 2005 | |||
| 2006 | BUG_ON(!path->nodes[level]); | ||
| 2007 | lower = path->nodes[level]; | ||
| 2008 | nritems = btrfs_header_nritems(lower); | ||
| 2009 | if (slot > nritems) | ||
| 2010 | BUG(); | ||
| 2011 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) | ||
| 2012 | BUG(); | ||
| 2013 | if (slot != nritems) { | ||
| 2014 | memmove_extent_buffer(lower, | ||
| 2015 | btrfs_node_key_ptr_offset(slot + 1), | ||
| 2016 | btrfs_node_key_ptr_offset(slot), | ||
| 2017 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); | ||
| 2018 | } | ||
| 2019 | btrfs_set_node_key(lower, key, slot); | ||
| 2020 | btrfs_set_node_blockptr(lower, slot, bytenr); | ||
| 2021 | WARN_ON(trans->transid == 0); | ||
| 2022 | btrfs_set_node_ptr_generation(lower, slot, trans->transid); | ||
| 2023 | btrfs_set_header_nritems(lower, nritems + 1); | ||
| 2024 | btrfs_mark_buffer_dirty(lower); | ||
| 2025 | return 0; | ||
| 2026 | } | ||
| 2027 | |||
| 2028 | /* | ||
| 2029 | * split the node at the specified level in path in two. | ||
| 2030 | * The path is corrected to point to the appropriate node after the split | ||
| 2031 | * | ||
| 2032 | * Before splitting this tries to make some room in the node by pushing | ||
| 2033 | * left and right, if either one works, it returns right away. | ||
| 2034 | * | ||
| 2035 | * returns 0 on success and < 0 on failure | ||
| 2036 | */ | ||
| 2037 | static noinline int split_node(struct btrfs_trans_handle *trans, | ||
| 2038 | struct btrfs_root *root, | ||
| 2039 | struct btrfs_path *path, int level) | ||
| 2040 | { | ||
| 2041 | struct extent_buffer *c; | ||
| 2042 | struct extent_buffer *split; | ||
| 2043 | struct btrfs_disk_key disk_key; | ||
| 2044 | int mid; | ||
| 2045 | int ret; | ||
| 2046 | int wret; | ||
| 2047 | u32 c_nritems; | ||
| 2048 | |||
| 2049 | c = path->nodes[level]; | ||
| 2050 | WARN_ON(btrfs_header_generation(c) != trans->transid); | ||
| 2051 | if (c == root->node) { | ||
| 2052 | /* trying to split the root, lets make a new one */ | ||
| 2053 | ret = insert_new_root(trans, root, path, level + 1); | ||
| 2054 | if (ret) | ||
| 2055 | return ret; | ||
| 2056 | } else { | ||
| 2057 | ret = push_nodes_for_insert(trans, root, path, level); | ||
| 2058 | c = path->nodes[level]; | ||
| 2059 | if (!ret && btrfs_header_nritems(c) < | ||
| 2060 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) | ||
| 2061 | return 0; | ||
| 2062 | if (ret < 0) | ||
| 2063 | return ret; | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | c_nritems = btrfs_header_nritems(c); | ||
| 2067 | |||
| 2068 | split = btrfs_alloc_free_block(trans, root, root->nodesize, | ||
| 2069 | path->nodes[level + 1]->start, | ||
| 2070 | root->root_key.objectid, | ||
| 2071 | trans->transid, level, c->start, 0); | ||
| 2072 | if (IS_ERR(split)) | ||
| 2073 | return PTR_ERR(split); | ||
| 2074 | |||
| 2075 | btrfs_set_header_flags(split, btrfs_header_flags(c)); | ||
| 2076 | btrfs_set_header_level(split, btrfs_header_level(c)); | ||
| 2077 | btrfs_set_header_bytenr(split, split->start); | ||
| 2078 | btrfs_set_header_generation(split, trans->transid); | ||
| 2079 | btrfs_set_header_owner(split, root->root_key.objectid); | ||
| 2080 | btrfs_set_header_flags(split, 0); | ||
| 2081 | write_extent_buffer(split, root->fs_info->fsid, | ||
| 2082 | (unsigned long)btrfs_header_fsid(split), | ||
| 2083 | BTRFS_FSID_SIZE); | ||
| 2084 | write_extent_buffer(split, root->fs_info->chunk_tree_uuid, | ||
| 2085 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | ||
| 2086 | BTRFS_UUID_SIZE); | ||
| 2087 | |||
| 2088 | mid = (c_nritems + 1) / 2; | ||
| 2089 | |||
| 2090 | copy_extent_buffer(split, c, | ||
| 2091 | btrfs_node_key_ptr_offset(0), | ||
| 2092 | btrfs_node_key_ptr_offset(mid), | ||
| 2093 | (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); | ||
| 2094 | btrfs_set_header_nritems(split, c_nritems - mid); | ||
| 2095 | btrfs_set_header_nritems(c, mid); | ||
| 2096 | ret = 0; | ||
| 2097 | |||
| 2098 | btrfs_mark_buffer_dirty(c); | ||
| 2099 | btrfs_mark_buffer_dirty(split); | ||
| 2100 | |||
| 2101 | btrfs_node_key(split, &disk_key, 0); | ||
| 2102 | wret = insert_ptr(trans, root, path, &disk_key, split->start, | ||
| 2103 | path->slots[level + 1] + 1, | ||
| 2104 | level + 1); | ||
| 2105 | if (wret) | ||
| 2106 | ret = wret; | ||
| 2107 | |||
| 2108 | ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid); | ||
| 2109 | BUG_ON(ret); | ||
| 2110 | |||
| 2111 | if (path->slots[level] >= mid) { | ||
| 2112 | path->slots[level] -= mid; | ||
| 2113 | btrfs_tree_unlock(c); | ||
| 2114 | free_extent_buffer(c); | ||
| 2115 | path->nodes[level] = split; | ||
| 2116 | path->slots[level + 1] += 1; | ||
| 2117 | } else { | ||
| 2118 | btrfs_tree_unlock(split); | ||
| 2119 | free_extent_buffer(split); | ||
| 2120 | } | ||
| 2121 | return ret; | ||
| 2122 | } | ||
| 2123 | |||
| 2124 | /* | ||
| 2125 | * how many bytes are required to store the items in a leaf. start | ||
| 2126 | * and nr indicate which items in the leaf to check. This totals up the | ||
| 2127 | * space used both by the item structs and the item data | ||
| 2128 | */ | ||
| 2129 | static int leaf_space_used(struct extent_buffer *l, int start, int nr) | ||
| 2130 | { | ||
| 2131 | int data_len; | ||
| 2132 | int nritems = btrfs_header_nritems(l); | ||
| 2133 | int end = min(nritems, start + nr) - 1; | ||
| 2134 | |||
| 2135 | if (!nr) | ||
| 2136 | return 0; | ||
| 2137 | data_len = btrfs_item_end_nr(l, start); | ||
| 2138 | data_len = data_len - btrfs_item_offset_nr(l, end); | ||
| 2139 | data_len += sizeof(struct btrfs_item) * nr; | ||
| 2140 | WARN_ON(data_len < 0); | ||
| 2141 | return data_len; | ||
| 2142 | } | ||
| 2143 | |||
| 2144 | /* | ||
| 2145 | * The space between the end of the leaf items and | ||
| 2146 | * the start of the leaf data. IOW, how much room | ||
| 2147 | * the leaf has left for both items and data | ||
| 2148 | */ | ||
| 2149 | int noinline btrfs_leaf_free_space(struct btrfs_root *root, | ||
| 2150 | struct extent_buffer *leaf) | ||
| 2151 | { | ||
| 2152 | int nritems = btrfs_header_nritems(leaf); | ||
| 2153 | int ret; | ||
| 2154 | ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); | ||
| 2155 | if (ret < 0) { | ||
| 2156 | printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", | ||
| 2157 | ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), | ||
| 2158 | leaf_space_used(leaf, 0, nritems), nritems); | ||
| 2159 | } | ||
| 2160 | return ret; | ||
| 2161 | } | ||
| 2162 | |||
| 2163 | /* | ||
| 2164 | * push some data in the path leaf to the right, trying to free up at | ||
| 2165 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | ||
| 2166 | * | ||
| 2167 | * returns 1 if the push failed because the other node didn't have enough | ||
| 2168 | * room, 0 if everything worked out and < 0 if there were major errors. | ||
| 2169 | */ | ||
| 2170 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 2171 | *root, struct btrfs_path *path, int data_size, | ||
| 2172 | int empty) | ||
| 2173 | { | ||
| 2174 | struct extent_buffer *left = path->nodes[0]; | ||
| 2175 | struct extent_buffer *right; | ||
| 2176 | struct extent_buffer *upper; | ||
| 2177 | struct btrfs_disk_key disk_key; | ||
| 2178 | int slot; | ||
| 2179 | u32 i; | ||
| 2180 | int free_space; | ||
| 2181 | int push_space = 0; | ||
| 2182 | int push_items = 0; | ||
| 2183 | struct btrfs_item *item; | ||
| 2184 | u32 left_nritems; | ||
| 2185 | u32 nr; | ||
| 2186 | u32 right_nritems; | ||
| 2187 | u32 data_end; | ||
| 2188 | u32 this_item_size; | ||
| 2189 | int ret; | ||
| 2190 | |||
| 2191 | slot = path->slots[1]; | ||
| 2192 | if (!path->nodes[1]) { | ||
| 2193 | return 1; | ||
| 2194 | } | ||
| 2195 | upper = path->nodes[1]; | ||
| 2196 | if (slot >= btrfs_header_nritems(upper) - 1) | ||
| 2197 | return 1; | ||
| 2198 | |||
| 2199 | WARN_ON(!btrfs_tree_locked(path->nodes[1])); | ||
| 2200 | |||
| 2201 | right = read_node_slot(root, upper, slot + 1); | ||
| 2202 | btrfs_tree_lock(right); | ||
| 2203 | free_space = btrfs_leaf_free_space(root, right); | ||
| 2204 | if (free_space < data_size + sizeof(struct btrfs_item)) | ||
| 2205 | goto out_unlock; | ||
| 2206 | |||
| 2207 | /* cow and double check */ | ||
| 2208 | ret = btrfs_cow_block(trans, root, right, upper, | ||
| 2209 | slot + 1, &right, 0); | ||
| 2210 | if (ret) | ||
| 2211 | goto out_unlock; | ||
| 2212 | |||
| 2213 | free_space = btrfs_leaf_free_space(root, right); | ||
| 2214 | if (free_space < data_size + sizeof(struct btrfs_item)) | ||
| 2215 | goto out_unlock; | ||
| 2216 | |||
| 2217 | left_nritems = btrfs_header_nritems(left); | ||
| 2218 | if (left_nritems == 0) | ||
| 2219 | goto out_unlock; | ||
| 2220 | |||
| 2221 | if (empty) | ||
| 2222 | nr = 0; | ||
| 2223 | else | ||
| 2224 | nr = 1; | ||
| 2225 | |||
| 2226 | if (path->slots[0] >= left_nritems) | ||
| 2227 | push_space += data_size + sizeof(*item); | ||
| 2228 | |||
| 2229 | i = left_nritems - 1; | ||
| 2230 | while (i >= nr) { | ||
| 2231 | item = btrfs_item_nr(left, i); | ||
| 2232 | |||
| 2233 | if (!empty && push_items > 0) { | ||
| 2234 | if (path->slots[0] > i) | ||
| 2235 | break; | ||
| 2236 | if (path->slots[0] == i) { | ||
| 2237 | int space = btrfs_leaf_free_space(root, left); | ||
| 2238 | if (space + push_space * 2 > free_space) | ||
| 2239 | break; | ||
| 2240 | } | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | if (path->slots[0] == i) | ||
| 2244 | push_space += data_size + sizeof(*item); | ||
| 2245 | |||
| 2246 | if (!left->map_token) { | ||
| 2247 | map_extent_buffer(left, (unsigned long)item, | ||
| 2248 | sizeof(struct btrfs_item), | ||
| 2249 | &left->map_token, &left->kaddr, | ||
| 2250 | &left->map_start, &left->map_len, | ||
| 2251 | KM_USER1); | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | this_item_size = btrfs_item_size(left, item); | ||
| 2255 | if (this_item_size + sizeof(*item) + push_space > free_space) | ||
| 2256 | break; | ||
| 2257 | |||
| 2258 | push_items++; | ||
| 2259 | push_space += this_item_size + sizeof(*item); | ||
| 2260 | if (i == 0) | ||
| 2261 | break; | ||
| 2262 | i--; | ||
| 2263 | } | ||
| 2264 | if (left->map_token) { | ||
| 2265 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
| 2266 | left->map_token = NULL; | ||
| 2267 | } | ||
| 2268 | |||
| 2269 | if (push_items == 0) | ||
| 2270 | goto out_unlock; | ||
| 2271 | |||
| 2272 | if (!empty && push_items == left_nritems) | ||
| 2273 | WARN_ON(1); | ||
| 2274 | |||
| 2275 | /* push left to right */ | ||
| 2276 | right_nritems = btrfs_header_nritems(right); | ||
| 2277 | |||
| 2278 | push_space = btrfs_item_end_nr(left, left_nritems - push_items); | ||
| 2279 | push_space -= leaf_data_end(root, left); | ||
| 2280 | |||
| 2281 | /* make room in the right data area */ | ||
| 2282 | data_end = leaf_data_end(root, right); | ||
| 2283 | memmove_extent_buffer(right, | ||
| 2284 | btrfs_leaf_data(right) + data_end - push_space, | ||
| 2285 | btrfs_leaf_data(right) + data_end, | ||
| 2286 | BTRFS_LEAF_DATA_SIZE(root) - data_end); | ||
| 2287 | |||
| 2288 | /* copy from the left data area */ | ||
| 2289 | copy_extent_buffer(right, left, btrfs_leaf_data(right) + | ||
| 2290 | BTRFS_LEAF_DATA_SIZE(root) - push_space, | ||
| 2291 | btrfs_leaf_data(left) + leaf_data_end(root, left), | ||
| 2292 | push_space); | ||
| 2293 | |||
| 2294 | memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), | ||
| 2295 | btrfs_item_nr_offset(0), | ||
| 2296 | right_nritems * sizeof(struct btrfs_item)); | ||
| 2297 | |||
| 2298 | /* copy the items from left to right */ | ||
| 2299 | copy_extent_buffer(right, left, btrfs_item_nr_offset(0), | ||
| 2300 | btrfs_item_nr_offset(left_nritems - push_items), | ||
| 2301 | push_items * sizeof(struct btrfs_item)); | ||
| 2302 | |||
| 2303 | /* update the item pointers */ | ||
| 2304 | right_nritems += push_items; | ||
| 2305 | btrfs_set_header_nritems(right, right_nritems); | ||
| 2306 | push_space = BTRFS_LEAF_DATA_SIZE(root); | ||
| 2307 | for (i = 0; i < right_nritems; i++) { | ||
| 2308 | item = btrfs_item_nr(right, i); | ||
| 2309 | if (!right->map_token) { | ||
| 2310 | map_extent_buffer(right, (unsigned long)item, | ||
| 2311 | sizeof(struct btrfs_item), | ||
| 2312 | &right->map_token, &right->kaddr, | ||
| 2313 | &right->map_start, &right->map_len, | ||
| 2314 | KM_USER1); | ||
| 2315 | } | ||
| 2316 | push_space -= btrfs_item_size(right, item); | ||
| 2317 | btrfs_set_item_offset(right, item, push_space); | ||
| 2318 | } | ||
| 2319 | |||
| 2320 | if (right->map_token) { | ||
| 2321 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
| 2322 | right->map_token = NULL; | ||
| 2323 | } | ||
| 2324 | left_nritems -= push_items; | ||
| 2325 | btrfs_set_header_nritems(left, left_nritems); | ||
| 2326 | |||
| 2327 | if (left_nritems) | ||
| 2328 | btrfs_mark_buffer_dirty(left); | ||
| 2329 | btrfs_mark_buffer_dirty(right); | ||
| 2330 | |||
| 2331 | ret = btrfs_update_ref(trans, root, left, right, 0, push_items); | ||
| 2332 | BUG_ON(ret); | ||
| 2333 | |||
| 2334 | btrfs_item_key(right, &disk_key, 0); | ||
| 2335 | btrfs_set_node_key(upper, &disk_key, slot + 1); | ||
| 2336 | btrfs_mark_buffer_dirty(upper); | ||
| 2337 | |||
| 2338 | /* then fixup the leaf pointer in the path */ | ||
| 2339 | if (path->slots[0] >= left_nritems) { | ||
| 2340 | path->slots[0] -= left_nritems; | ||
| 2341 | if (btrfs_header_nritems(path->nodes[0]) == 0) | ||
| 2342 | clean_tree_block(trans, root, path->nodes[0]); | ||
| 2343 | btrfs_tree_unlock(path->nodes[0]); | ||
| 2344 | free_extent_buffer(path->nodes[0]); | ||
| 2345 | path->nodes[0] = right; | ||
| 2346 | path->slots[1] += 1; | ||
| 2347 | } else { | ||
| 2348 | btrfs_tree_unlock(right); | ||
| 2349 | free_extent_buffer(right); | ||
| 2350 | } | ||
| 2351 | return 0; | ||
| 2352 | |||
| 2353 | out_unlock: | ||
| 2354 | btrfs_tree_unlock(right); | ||
| 2355 | free_extent_buffer(right); | ||
| 2356 | return 1; | ||
| 2357 | } | ||
| 2358 | |||
| 2359 | /* | ||
| 2360 | * push some data in the path leaf to the left, trying to free up at | ||
| 2361 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | ||
| 2362 | */ | ||
| 2363 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 2364 | *root, struct btrfs_path *path, int data_size, | ||
| 2365 | int empty) | ||
| 2366 | { | ||
| 2367 | struct btrfs_disk_key disk_key; | ||
| 2368 | struct extent_buffer *right = path->nodes[0]; | ||
| 2369 | struct extent_buffer *left; | ||
| 2370 | int slot; | ||
| 2371 | int i; | ||
| 2372 | int free_space; | ||
| 2373 | int push_space = 0; | ||
| 2374 | int push_items = 0; | ||
| 2375 | struct btrfs_item *item; | ||
| 2376 | u32 old_left_nritems; | ||
| 2377 | u32 right_nritems; | ||
| 2378 | u32 nr; | ||
| 2379 | int ret = 0; | ||
| 2380 | int wret; | ||
| 2381 | u32 this_item_size; | ||
| 2382 | u32 old_left_item_size; | ||
| 2383 | |||
| 2384 | slot = path->slots[1]; | ||
| 2385 | if (slot == 0) | ||
| 2386 | return 1; | ||
| 2387 | if (!path->nodes[1]) | ||
| 2388 | return 1; | ||
| 2389 | |||
| 2390 | right_nritems = btrfs_header_nritems(right); | ||
| 2391 | if (right_nritems == 0) { | ||
| 2392 | return 1; | ||
| 2393 | } | ||
| 2394 | |||
| 2395 | WARN_ON(!btrfs_tree_locked(path->nodes[1])); | ||
| 2396 | |||
| 2397 | left = read_node_slot(root, path->nodes[1], slot - 1); | ||
| 2398 | btrfs_tree_lock(left); | ||
| 2399 | free_space = btrfs_leaf_free_space(root, left); | ||
| 2400 | if (free_space < data_size + sizeof(struct btrfs_item)) { | ||
| 2401 | ret = 1; | ||
| 2402 | goto out; | ||
| 2403 | } | ||
| 2404 | |||
| 2405 | /* cow and double check */ | ||
| 2406 | ret = btrfs_cow_block(trans, root, left, | ||
| 2407 | path->nodes[1], slot - 1, &left, 0); | ||
| 2408 | if (ret) { | ||
| 2409 | /* we hit -ENOSPC, but it isn't fatal here */ | ||
| 2410 | ret = 1; | ||
| 2411 | goto out; | ||
| 2412 | } | ||
| 2413 | |||
| 2414 | free_space = btrfs_leaf_free_space(root, left); | ||
| 2415 | if (free_space < data_size + sizeof(struct btrfs_item)) { | ||
| 2416 | ret = 1; | ||
| 2417 | goto out; | ||
| 2418 | } | ||
| 2419 | |||
| 2420 | if (empty) | ||
| 2421 | nr = right_nritems; | ||
| 2422 | else | ||
| 2423 | nr = right_nritems - 1; | ||
| 2424 | |||
| 2425 | for (i = 0; i < nr; i++) { | ||
| 2426 | item = btrfs_item_nr(right, i); | ||
| 2427 | if (!right->map_token) { | ||
| 2428 | map_extent_buffer(right, (unsigned long)item, | ||
| 2429 | sizeof(struct btrfs_item), | ||
| 2430 | &right->map_token, &right->kaddr, | ||
| 2431 | &right->map_start, &right->map_len, | ||
| 2432 | KM_USER1); | ||
| 2433 | } | ||
| 2434 | |||
| 2435 | if (!empty && push_items > 0) { | ||
| 2436 | if (path->slots[0] < i) | ||
| 2437 | break; | ||
| 2438 | if (path->slots[0] == i) { | ||
| 2439 | int space = btrfs_leaf_free_space(root, right); | ||
| 2440 | if (space + push_space * 2 > free_space) | ||
| 2441 | break; | ||
| 2442 | } | ||
| 2443 | } | ||
| 2444 | |||
| 2445 | if (path->slots[0] == i) | ||
| 2446 | push_space += data_size + sizeof(*item); | ||
| 2447 | |||
| 2448 | this_item_size = btrfs_item_size(right, item); | ||
| 2449 | if (this_item_size + sizeof(*item) + push_space > free_space) | ||
| 2450 | break; | ||
| 2451 | |||
| 2452 | push_items++; | ||
| 2453 | push_space += this_item_size + sizeof(*item); | ||
| 2454 | } | ||
| 2455 | |||
| 2456 | if (right->map_token) { | ||
| 2457 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
| 2458 | right->map_token = NULL; | ||
| 2459 | } | ||
| 2460 | |||
| 2461 | if (push_items == 0) { | ||
| 2462 | ret = 1; | ||
| 2463 | goto out; | ||
| 2464 | } | ||
| 2465 | if (!empty && push_items == btrfs_header_nritems(right)) | ||
| 2466 | WARN_ON(1); | ||
| 2467 | |||
| 2468 | /* push data from right to left */ | ||
| 2469 | copy_extent_buffer(left, right, | ||
| 2470 | btrfs_item_nr_offset(btrfs_header_nritems(left)), | ||
| 2471 | btrfs_item_nr_offset(0), | ||
| 2472 | push_items * sizeof(struct btrfs_item)); | ||
| 2473 | |||
| 2474 | push_space = BTRFS_LEAF_DATA_SIZE(root) - | ||
| 2475 | btrfs_item_offset_nr(right, push_items -1); | ||
| 2476 | |||
| 2477 | copy_extent_buffer(left, right, btrfs_leaf_data(left) + | ||
| 2478 | leaf_data_end(root, left) - push_space, | ||
| 2479 | btrfs_leaf_data(right) + | ||
| 2480 | btrfs_item_offset_nr(right, push_items - 1), | ||
| 2481 | push_space); | ||
| 2482 | old_left_nritems = btrfs_header_nritems(left); | ||
| 2483 | BUG_ON(old_left_nritems < 0); | ||
| 2484 | |||
| 2485 | old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); | ||
| 2486 | for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { | ||
| 2487 | u32 ioff; | ||
| 2488 | |||
| 2489 | item = btrfs_item_nr(left, i); | ||
| 2490 | if (!left->map_token) { | ||
| 2491 | map_extent_buffer(left, (unsigned long)item, | ||
| 2492 | sizeof(struct btrfs_item), | ||
| 2493 | &left->map_token, &left->kaddr, | ||
| 2494 | &left->map_start, &left->map_len, | ||
| 2495 | KM_USER1); | ||
| 2496 | } | ||
| 2497 | |||
| 2498 | ioff = btrfs_item_offset(left, item); | ||
| 2499 | btrfs_set_item_offset(left, item, | ||
| 2500 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); | ||
| 2501 | } | ||
| 2502 | btrfs_set_header_nritems(left, old_left_nritems + push_items); | ||
| 2503 | if (left->map_token) { | ||
| 2504 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
| 2505 | left->map_token = NULL; | ||
| 2506 | } | ||
| 2507 | |||
| 2508 | /* fixup right node */ | ||
| 2509 | if (push_items > right_nritems) { | ||
| 2510 | printk("push items %d nr %u\n", push_items, right_nritems); | ||
| 2511 | WARN_ON(1); | ||
| 2512 | } | ||
| 2513 | |||
| 2514 | if (push_items < right_nritems) { | ||
| 2515 | push_space = btrfs_item_offset_nr(right, push_items - 1) - | ||
| 2516 | leaf_data_end(root, right); | ||
| 2517 | memmove_extent_buffer(right, btrfs_leaf_data(right) + | ||
| 2518 | BTRFS_LEAF_DATA_SIZE(root) - push_space, | ||
| 2519 | btrfs_leaf_data(right) + | ||
| 2520 | leaf_data_end(root, right), push_space); | ||
| 2521 | |||
| 2522 | memmove_extent_buffer(right, btrfs_item_nr_offset(0), | ||
| 2523 | btrfs_item_nr_offset(push_items), | ||
| 2524 | (btrfs_header_nritems(right) - push_items) * | ||
| 2525 | sizeof(struct btrfs_item)); | ||
| 2526 | } | ||
| 2527 | right_nritems -= push_items; | ||
| 2528 | btrfs_set_header_nritems(right, right_nritems); | ||
| 2529 | push_space = BTRFS_LEAF_DATA_SIZE(root); | ||
| 2530 | for (i = 0; i < right_nritems; i++) { | ||
| 2531 | item = btrfs_item_nr(right, i); | ||
| 2532 | |||
| 2533 | if (!right->map_token) { | ||
| 2534 | map_extent_buffer(right, (unsigned long)item, | ||
| 2535 | sizeof(struct btrfs_item), | ||
| 2536 | &right->map_token, &right->kaddr, | ||
| 2537 | &right->map_start, &right->map_len, | ||
| 2538 | KM_USER1); | ||
| 2539 | } | ||
| 2540 | |||
| 2541 | push_space = push_space - btrfs_item_size(right, item); | ||
| 2542 | btrfs_set_item_offset(right, item, push_space); | ||
| 2543 | } | ||
| 2544 | if (right->map_token) { | ||
| 2545 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
| 2546 | right->map_token = NULL; | ||
| 2547 | } | ||
| 2548 | |||
| 2549 | btrfs_mark_buffer_dirty(left); | ||
| 2550 | if (right_nritems) | ||
| 2551 | btrfs_mark_buffer_dirty(right); | ||
| 2552 | |||
| 2553 | ret = btrfs_update_ref(trans, root, right, left, | ||
| 2554 | old_left_nritems, push_items); | ||
| 2555 | BUG_ON(ret); | ||
| 2556 | |||
| 2557 | btrfs_item_key(right, &disk_key, 0); | ||
| 2558 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); | ||
| 2559 | if (wret) | ||
| 2560 | ret = wret; | ||
| 2561 | |||
| 2562 | /* then fixup the leaf pointer in the path */ | ||
| 2563 | if (path->slots[0] < push_items) { | ||
| 2564 | path->slots[0] += old_left_nritems; | ||
| 2565 | if (btrfs_header_nritems(path->nodes[0]) == 0) | ||
| 2566 | clean_tree_block(trans, root, path->nodes[0]); | ||
| 2567 | btrfs_tree_unlock(path->nodes[0]); | ||
| 2568 | free_extent_buffer(path->nodes[0]); | ||
| 2569 | path->nodes[0] = left; | ||
| 2570 | path->slots[1] -= 1; | ||
| 2571 | } else { | ||
| 2572 | btrfs_tree_unlock(left); | ||
| 2573 | free_extent_buffer(left); | ||
| 2574 | path->slots[0] -= push_items; | ||
| 2575 | } | ||
| 2576 | BUG_ON(path->slots[0] < 0); | ||
| 2577 | return ret; | ||
| 2578 | out: | ||
| 2579 | btrfs_tree_unlock(left); | ||
| 2580 | free_extent_buffer(left); | ||
| 2581 | return ret; | ||
| 2582 | } | ||
| 2583 | |||
| 2584 | /* | ||
| 2585 | * split the path's leaf in two, making sure there is at least data_size | ||
| 2586 | * available for the resulting leaf level of the path. | ||
| 2587 | * | ||
| 2588 | * returns 0 if all went well and < 0 on failure. | ||
| 2589 | */ | ||
| 2590 | static noinline int split_leaf(struct btrfs_trans_handle *trans, | ||
| 2591 | struct btrfs_root *root, | ||
| 2592 | struct btrfs_key *ins_key, | ||
| 2593 | struct btrfs_path *path, int data_size, | ||
| 2594 | int extend) | ||
| 2595 | { | ||
| 2596 | struct extent_buffer *l; | ||
| 2597 | u32 nritems; | ||
| 2598 | int mid; | ||
| 2599 | int slot; | ||
| 2600 | struct extent_buffer *right; | ||
| 2601 | int space_needed = data_size + sizeof(struct btrfs_item); | ||
| 2602 | int data_copy_size; | ||
| 2603 | int rt_data_off; | ||
| 2604 | int i; | ||
| 2605 | int ret = 0; | ||
| 2606 | int wret; | ||
| 2607 | int double_split; | ||
| 2608 | int num_doubles = 0; | ||
| 2609 | struct btrfs_disk_key disk_key; | ||
| 2610 | |||
| 2611 | if (extend) | ||
| 2612 | space_needed = data_size; | ||
| 2613 | |||
| 2614 | /* first try to make some room by pushing left and right */ | ||
| 2615 | if (ins_key->type != BTRFS_DIR_ITEM_KEY) { | ||
| 2616 | wret = push_leaf_right(trans, root, path, data_size, 0); | ||
| 2617 | if (wret < 0) { | ||
| 2618 | return wret; | ||
| 2619 | } | ||
| 2620 | if (wret) { | ||
| 2621 | wret = push_leaf_left(trans, root, path, data_size, 0); | ||
| 2622 | if (wret < 0) | ||
| 2623 | return wret; | ||
| 2624 | } | ||
| 2625 | l = path->nodes[0]; | ||
| 2626 | |||
| 2627 | /* did the pushes work? */ | ||
| 2628 | if (btrfs_leaf_free_space(root, l) >= space_needed) | ||
| 2629 | return 0; | ||
| 2630 | } | ||
| 2631 | |||
| 2632 | if (!path->nodes[1]) { | ||
| 2633 | ret = insert_new_root(trans, root, path, 1); | ||
| 2634 | if (ret) | ||
| 2635 | return ret; | ||
| 2636 | } | ||
| 2637 | again: | ||
| 2638 | double_split = 0; | ||
| 2639 | l = path->nodes[0]; | ||
| 2640 | slot = path->slots[0]; | ||
| 2641 | nritems = btrfs_header_nritems(l); | ||
| 2642 | mid = (nritems + 1)/ 2; | ||
| 2643 | |||
| 2644 | right = btrfs_alloc_free_block(trans, root, root->leafsize, | ||
| 2645 | path->nodes[1]->start, | ||
| 2646 | root->root_key.objectid, | ||
| 2647 | trans->transid, 0, l->start, 0); | ||
| 2648 | if (IS_ERR(right)) { | ||
| 2649 | BUG_ON(1); | ||
| 2650 | return PTR_ERR(right); | ||
| 2651 | } | ||
| 2652 | |||
| 2653 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | ||
| 2654 | btrfs_set_header_bytenr(right, right->start); | ||
| 2655 | btrfs_set_header_generation(right, trans->transid); | ||
| 2656 | btrfs_set_header_owner(right, root->root_key.objectid); | ||
| 2657 | btrfs_set_header_level(right, 0); | ||
| 2658 | write_extent_buffer(right, root->fs_info->fsid, | ||
| 2659 | (unsigned long)btrfs_header_fsid(right), | ||
| 2660 | BTRFS_FSID_SIZE); | ||
| 2661 | |||
| 2662 | write_extent_buffer(right, root->fs_info->chunk_tree_uuid, | ||
| 2663 | (unsigned long)btrfs_header_chunk_tree_uuid(right), | ||
| 2664 | BTRFS_UUID_SIZE); | ||
| 2665 | if (mid <= slot) { | ||
| 2666 | if (nritems == 1 || | ||
| 2667 | leaf_space_used(l, mid, nritems - mid) + space_needed > | ||
| 2668 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 2669 | if (slot >= nritems) { | ||
| 2670 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | ||
| 2671 | btrfs_set_header_nritems(right, 0); | ||
| 2672 | wret = insert_ptr(trans, root, path, | ||
| 2673 | &disk_key, right->start, | ||
| 2674 | path->slots[1] + 1, 1); | ||
| 2675 | if (wret) | ||
| 2676 | ret = wret; | ||
| 2677 | |||
| 2678 | btrfs_tree_unlock(path->nodes[0]); | ||
| 2679 | free_extent_buffer(path->nodes[0]); | ||
| 2680 | path->nodes[0] = right; | ||
| 2681 | path->slots[0] = 0; | ||
| 2682 | path->slots[1] += 1; | ||
| 2683 | btrfs_mark_buffer_dirty(right); | ||
| 2684 | return ret; | ||
| 2685 | } | ||
| 2686 | mid = slot; | ||
| 2687 | if (mid != nritems && | ||
| 2688 | leaf_space_used(l, mid, nritems - mid) + | ||
| 2689 | space_needed > BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 2690 | double_split = 1; | ||
| 2691 | } | ||
| 2692 | } | ||
| 2693 | } else { | ||
| 2694 | if (leaf_space_used(l, 0, mid + 1) + space_needed > | ||
| 2695 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 2696 | if (!extend && slot == 0) { | ||
| 2697 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | ||
| 2698 | btrfs_set_header_nritems(right, 0); | ||
| 2699 | wret = insert_ptr(trans, root, path, | ||
| 2700 | &disk_key, | ||
| 2701 | right->start, | ||
| 2702 | path->slots[1], 1); | ||
| 2703 | if (wret) | ||
| 2704 | ret = wret; | ||
| 2705 | btrfs_tree_unlock(path->nodes[0]); | ||
| 2706 | free_extent_buffer(path->nodes[0]); | ||
| 2707 | path->nodes[0] = right; | ||
| 2708 | path->slots[0] = 0; | ||
| 2709 | if (path->slots[1] == 0) { | ||
| 2710 | wret = fixup_low_keys(trans, root, | ||
| 2711 | path, &disk_key, 1); | ||
| 2712 | if (wret) | ||
| 2713 | ret = wret; | ||
| 2714 | } | ||
| 2715 | btrfs_mark_buffer_dirty(right); | ||
| 2716 | return ret; | ||
| 2717 | } else if (extend && slot == 0) { | ||
| 2718 | mid = 1; | ||
| 2719 | } else { | ||
| 2720 | mid = slot; | ||
| 2721 | if (mid != nritems && | ||
| 2722 | leaf_space_used(l, mid, nritems - mid) + | ||
| 2723 | space_needed > BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 2724 | double_split = 1; | ||
| 2725 | } | ||
| 2726 | } | ||
| 2727 | } | ||
| 2728 | } | ||
| 2729 | nritems = nritems - mid; | ||
| 2730 | btrfs_set_header_nritems(right, nritems); | ||
| 2731 | data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); | ||
| 2732 | |||
| 2733 | copy_extent_buffer(right, l, btrfs_item_nr_offset(0), | ||
| 2734 | btrfs_item_nr_offset(mid), | ||
| 2735 | nritems * sizeof(struct btrfs_item)); | ||
| 2736 | |||
| 2737 | copy_extent_buffer(right, l, | ||
| 2738 | btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - | ||
| 2739 | data_copy_size, btrfs_leaf_data(l) + | ||
| 2740 | leaf_data_end(root, l), data_copy_size); | ||
| 2741 | |||
| 2742 | rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - | ||
| 2743 | btrfs_item_end_nr(l, mid); | ||
| 2744 | |||
| 2745 | for (i = 0; i < nritems; i++) { | ||
| 2746 | struct btrfs_item *item = btrfs_item_nr(right, i); | ||
| 2747 | u32 ioff; | ||
| 2748 | |||
| 2749 | if (!right->map_token) { | ||
| 2750 | map_extent_buffer(right, (unsigned long)item, | ||
| 2751 | sizeof(struct btrfs_item), | ||
| 2752 | &right->map_token, &right->kaddr, | ||
| 2753 | &right->map_start, &right->map_len, | ||
| 2754 | KM_USER1); | ||
| 2755 | } | ||
| 2756 | |||
| 2757 | ioff = btrfs_item_offset(right, item); | ||
| 2758 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | ||
| 2759 | } | ||
| 2760 | |||
| 2761 | if (right->map_token) { | ||
| 2762 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
| 2763 | right->map_token = NULL; | ||
| 2764 | } | ||
| 2765 | |||
| 2766 | btrfs_set_header_nritems(l, mid); | ||
| 2767 | ret = 0; | ||
| 2768 | btrfs_item_key(right, &disk_key, 0); | ||
| 2769 | wret = insert_ptr(trans, root, path, &disk_key, right->start, | ||
| 2770 | path->slots[1] + 1, 1); | ||
| 2771 | if (wret) | ||
| 2772 | ret = wret; | ||
| 2773 | |||
| 2774 | btrfs_mark_buffer_dirty(right); | ||
| 2775 | btrfs_mark_buffer_dirty(l); | ||
| 2776 | BUG_ON(path->slots[0] != slot); | ||
| 2777 | |||
| 2778 | ret = btrfs_update_ref(trans, root, l, right, 0, nritems); | ||
| 2779 | BUG_ON(ret); | ||
| 2780 | |||
| 2781 | if (mid <= slot) { | ||
| 2782 | btrfs_tree_unlock(path->nodes[0]); | ||
| 2783 | free_extent_buffer(path->nodes[0]); | ||
| 2784 | path->nodes[0] = right; | ||
| 2785 | path->slots[0] -= mid; | ||
| 2786 | path->slots[1] += 1; | ||
| 2787 | } else { | ||
| 2788 | btrfs_tree_unlock(right); | ||
| 2789 | free_extent_buffer(right); | ||
| 2790 | } | ||
| 2791 | |||
| 2792 | BUG_ON(path->slots[0] < 0); | ||
| 2793 | |||
| 2794 | if (double_split) { | ||
| 2795 | BUG_ON(num_doubles != 0); | ||
| 2796 | num_doubles++; | ||
| 2797 | goto again; | ||
| 2798 | } | ||
| 2799 | return ret; | ||
| 2800 | } | ||
| 2801 | |||
| 2802 | /* | ||
| 2803 | * make the item pointed to by the path smaller. new_size indicates | ||
| 2804 | * how small to make it, and from_end tells us if we just chop bytes | ||
| 2805 | * off the end of the item or if we shift the item to chop bytes off | ||
| 2806 | * the front. | ||
| 2807 | */ | ||
| 2808 | int btrfs_truncate_item(struct btrfs_trans_handle *trans, | ||
| 2809 | struct btrfs_root *root, | ||
| 2810 | struct btrfs_path *path, | ||
| 2811 | u32 new_size, int from_end) | ||
| 2812 | { | ||
| 2813 | int ret = 0; | ||
| 2814 | int slot; | ||
| 2815 | int slot_orig; | ||
| 2816 | struct extent_buffer *leaf; | ||
| 2817 | struct btrfs_item *item; | ||
| 2818 | u32 nritems; | ||
| 2819 | unsigned int data_end; | ||
| 2820 | unsigned int old_data_start; | ||
| 2821 | unsigned int old_size; | ||
| 2822 | unsigned int size_diff; | ||
| 2823 | int i; | ||
| 2824 | |||
| 2825 | slot_orig = path->slots[0]; | ||
| 2826 | leaf = path->nodes[0]; | ||
| 2827 | slot = path->slots[0]; | ||
| 2828 | |||
| 2829 | old_size = btrfs_item_size_nr(leaf, slot); | ||
| 2830 | if (old_size == new_size) | ||
| 2831 | return 0; | ||
| 2832 | |||
| 2833 | nritems = btrfs_header_nritems(leaf); | ||
| 2834 | data_end = leaf_data_end(root, leaf); | ||
| 2835 | |||
| 2836 | old_data_start = btrfs_item_offset_nr(leaf, slot); | ||
| 2837 | |||
| 2838 | size_diff = old_size - new_size; | ||
| 2839 | |||
| 2840 | BUG_ON(slot < 0); | ||
| 2841 | BUG_ON(slot >= nritems); | ||
| 2842 | |||
| 2843 | /* | ||
| 2844 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | ||
| 2845 | */ | ||
| 2846 | /* first correct the data pointers */ | ||
| 2847 | for (i = slot; i < nritems; i++) { | ||
| 2848 | u32 ioff; | ||
| 2849 | item = btrfs_item_nr(leaf, i); | ||
| 2850 | |||
| 2851 | if (!leaf->map_token) { | ||
| 2852 | map_extent_buffer(leaf, (unsigned long)item, | ||
| 2853 | sizeof(struct btrfs_item), | ||
| 2854 | &leaf->map_token, &leaf->kaddr, | ||
| 2855 | &leaf->map_start, &leaf->map_len, | ||
| 2856 | KM_USER1); | ||
| 2857 | } | ||
| 2858 | |||
| 2859 | ioff = btrfs_item_offset(leaf, item); | ||
| 2860 | btrfs_set_item_offset(leaf, item, ioff + size_diff); | ||
| 2861 | } | ||
| 2862 | |||
| 2863 | if (leaf->map_token) { | ||
| 2864 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
| 2865 | leaf->map_token = NULL; | ||
| 2866 | } | ||
| 2867 | |||
| 2868 | /* shift the data */ | ||
| 2869 | if (from_end) { | ||
| 2870 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
| 2871 | data_end + size_diff, btrfs_leaf_data(leaf) + | ||
| 2872 | data_end, old_data_start + new_size - data_end); | ||
| 2873 | } else { | ||
| 2874 | struct btrfs_disk_key disk_key; | ||
| 2875 | u64 offset; | ||
| 2876 | |||
| 2877 | btrfs_item_key(leaf, &disk_key, slot); | ||
| 2878 | |||
| 2879 | if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { | ||
| 2880 | unsigned long ptr; | ||
| 2881 | struct btrfs_file_extent_item *fi; | ||
| 2882 | |||
| 2883 | fi = btrfs_item_ptr(leaf, slot, | ||
| 2884 | struct btrfs_file_extent_item); | ||
| 2885 | fi = (struct btrfs_file_extent_item *)( | ||
| 2886 | (unsigned long)fi - size_diff); | ||
| 2887 | |||
| 2888 | if (btrfs_file_extent_type(leaf, fi) == | ||
| 2889 | BTRFS_FILE_EXTENT_INLINE) { | ||
| 2890 | ptr = btrfs_item_ptr_offset(leaf, slot); | ||
| 2891 | memmove_extent_buffer(leaf, ptr, | ||
| 2892 | (unsigned long)fi, | ||
| 2893 | offsetof(struct btrfs_file_extent_item, | ||
| 2894 | disk_bytenr)); | ||
| 2895 | } | ||
| 2896 | } | ||
| 2897 | |||
| 2898 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
| 2899 | data_end + size_diff, btrfs_leaf_data(leaf) + | ||
| 2900 | data_end, old_data_start - data_end); | ||
| 2901 | |||
| 2902 | offset = btrfs_disk_key_offset(&disk_key); | ||
| 2903 | btrfs_set_disk_key_offset(&disk_key, offset + size_diff); | ||
| 2904 | btrfs_set_item_key(leaf, &disk_key, slot); | ||
| 2905 | if (slot == 0) | ||
| 2906 | fixup_low_keys(trans, root, path, &disk_key, 1); | ||
| 2907 | } | ||
| 2908 | |||
| 2909 | item = btrfs_item_nr(leaf, slot); | ||
| 2910 | btrfs_set_item_size(leaf, item, new_size); | ||
| 2911 | btrfs_mark_buffer_dirty(leaf); | ||
| 2912 | |||
| 2913 | ret = 0; | ||
| 2914 | if (btrfs_leaf_free_space(root, leaf) < 0) { | ||
| 2915 | btrfs_print_leaf(root, leaf); | ||
| 2916 | BUG(); | ||
| 2917 | } | ||
| 2918 | return ret; | ||
| 2919 | } | ||
| 2920 | |||
| 2921 | /* | ||
| 2922 | * make the item pointed to by the path bigger, data_size is the new size. | ||
| 2923 | */ | ||
| 2924 | int btrfs_extend_item(struct btrfs_trans_handle *trans, | ||
| 2925 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 2926 | u32 data_size) | ||
| 2927 | { | ||
| 2928 | int ret = 0; | ||
| 2929 | int slot; | ||
| 2930 | int slot_orig; | ||
| 2931 | struct extent_buffer *leaf; | ||
| 2932 | struct btrfs_item *item; | ||
| 2933 | u32 nritems; | ||
| 2934 | unsigned int data_end; | ||
| 2935 | unsigned int old_data; | ||
| 2936 | unsigned int old_size; | ||
| 2937 | int i; | ||
| 2938 | |||
| 2939 | slot_orig = path->slots[0]; | ||
| 2940 | leaf = path->nodes[0]; | ||
| 2941 | |||
| 2942 | nritems = btrfs_header_nritems(leaf); | ||
| 2943 | data_end = leaf_data_end(root, leaf); | ||
| 2944 | |||
| 2945 | if (btrfs_leaf_free_space(root, leaf) < data_size) { | ||
| 2946 | btrfs_print_leaf(root, leaf); | ||
| 2947 | BUG(); | ||
| 2948 | } | ||
| 2949 | slot = path->slots[0]; | ||
| 2950 | old_data = btrfs_item_end_nr(leaf, slot); | ||
| 2951 | |||
| 2952 | BUG_ON(slot < 0); | ||
| 2953 | if (slot >= nritems) { | ||
| 2954 | btrfs_print_leaf(root, leaf); | ||
| 2955 | printk("slot %d too large, nritems %d\n", slot, nritems); | ||
| 2956 | BUG_ON(1); | ||
| 2957 | } | ||
| 2958 | |||
| 2959 | /* | ||
| 2960 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | ||
| 2961 | */ | ||
| 2962 | /* first correct the data pointers */ | ||
| 2963 | for (i = slot; i < nritems; i++) { | ||
| 2964 | u32 ioff; | ||
| 2965 | item = btrfs_item_nr(leaf, i); | ||
| 2966 | |||
| 2967 | if (!leaf->map_token) { | ||
| 2968 | map_extent_buffer(leaf, (unsigned long)item, | ||
| 2969 | sizeof(struct btrfs_item), | ||
| 2970 | &leaf->map_token, &leaf->kaddr, | ||
| 2971 | &leaf->map_start, &leaf->map_len, | ||
| 2972 | KM_USER1); | ||
| 2973 | } | ||
| 2974 | ioff = btrfs_item_offset(leaf, item); | ||
| 2975 | btrfs_set_item_offset(leaf, item, ioff - data_size); | ||
| 2976 | } | ||
| 2977 | |||
| 2978 | if (leaf->map_token) { | ||
| 2979 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
| 2980 | leaf->map_token = NULL; | ||
| 2981 | } | ||
| 2982 | |||
| 2983 | /* shift the data */ | ||
| 2984 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
| 2985 | data_end - data_size, btrfs_leaf_data(leaf) + | ||
| 2986 | data_end, old_data - data_end); | ||
| 2987 | |||
| 2988 | data_end = old_data; | ||
| 2989 | old_size = btrfs_item_size_nr(leaf, slot); | ||
| 2990 | item = btrfs_item_nr(leaf, slot); | ||
| 2991 | btrfs_set_item_size(leaf, item, old_size + data_size); | ||
| 2992 | btrfs_mark_buffer_dirty(leaf); | ||
| 2993 | |||
| 2994 | ret = 0; | ||
| 2995 | if (btrfs_leaf_free_space(root, leaf) < 0) { | ||
| 2996 | btrfs_print_leaf(root, leaf); | ||
| 2997 | BUG(); | ||
| 2998 | } | ||
| 2999 | return ret; | ||
| 3000 | } | ||
| 3001 | |||
| 3002 | /* | ||
| 3003 | * Given a key and some data, insert items into the tree. | ||
| 3004 | * This does all the path init required, making room in the tree if needed. | ||
| 3005 | */ | ||
| 3006 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | ||
| 3007 | struct btrfs_root *root, | ||
| 3008 | struct btrfs_path *path, | ||
| 3009 | struct btrfs_key *cpu_key, u32 *data_size, | ||
| 3010 | int nr) | ||
| 3011 | { | ||
| 3012 | struct extent_buffer *leaf; | ||
| 3013 | struct btrfs_item *item; | ||
| 3014 | int ret = 0; | ||
| 3015 | int slot; | ||
| 3016 | int slot_orig; | ||
| 3017 | int i; | ||
| 3018 | u32 nritems; | ||
| 3019 | u32 total_size = 0; | ||
| 3020 | u32 total_data = 0; | ||
| 3021 | unsigned int data_end; | ||
| 3022 | struct btrfs_disk_key disk_key; | ||
| 3023 | |||
| 3024 | for (i = 0; i < nr; i++) { | ||
| 3025 | total_data += data_size[i]; | ||
| 3026 | } | ||
| 3027 | |||
| 3028 | total_size = total_data + (nr * sizeof(struct btrfs_item)); | ||
| 3029 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
| 3030 | if (ret == 0) | ||
| 3031 | return -EEXIST; | ||
| 3032 | if (ret < 0) | ||
| 3033 | goto out; | ||
| 3034 | |||
| 3035 | slot_orig = path->slots[0]; | ||
| 3036 | leaf = path->nodes[0]; | ||
| 3037 | |||
| 3038 | nritems = btrfs_header_nritems(leaf); | ||
| 3039 | data_end = leaf_data_end(root, leaf); | ||
| 3040 | |||
| 3041 | if (btrfs_leaf_free_space(root, leaf) < total_size) { | ||
| 3042 | btrfs_print_leaf(root, leaf); | ||
| 3043 | printk("not enough freespace need %u have %d\n", | ||
| 3044 | total_size, btrfs_leaf_free_space(root, leaf)); | ||
| 3045 | BUG(); | ||
| 3046 | } | ||
| 3047 | |||
| 3048 | slot = path->slots[0]; | ||
| 3049 | BUG_ON(slot < 0); | ||
| 3050 | |||
| 3051 | if (slot != nritems) { | ||
| 3052 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); | ||
| 3053 | |||
| 3054 | if (old_data < data_end) { | ||
| 3055 | btrfs_print_leaf(root, leaf); | ||
| 3056 | printk("slot %d old_data %d data_end %d\n", | ||
| 3057 | slot, old_data, data_end); | ||
| 3058 | BUG_ON(1); | ||
| 3059 | } | ||
| 3060 | /* | ||
| 3061 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | ||
| 3062 | */ | ||
| 3063 | /* first correct the data pointers */ | ||
| 3064 | WARN_ON(leaf->map_token); | ||
| 3065 | for (i = slot; i < nritems; i++) { | ||
| 3066 | u32 ioff; | ||
| 3067 | |||
| 3068 | item = btrfs_item_nr(leaf, i); | ||
| 3069 | if (!leaf->map_token) { | ||
| 3070 | map_extent_buffer(leaf, (unsigned long)item, | ||
| 3071 | sizeof(struct btrfs_item), | ||
| 3072 | &leaf->map_token, &leaf->kaddr, | ||
| 3073 | &leaf->map_start, &leaf->map_len, | ||
| 3074 | KM_USER1); | ||
| 3075 | } | ||
| 3076 | |||
| 3077 | ioff = btrfs_item_offset(leaf, item); | ||
| 3078 | btrfs_set_item_offset(leaf, item, ioff - total_data); | ||
| 3079 | } | ||
| 3080 | if (leaf->map_token) { | ||
| 3081 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
| 3082 | leaf->map_token = NULL; | ||
| 3083 | } | ||
| 3084 | |||
| 3085 | /* shift the items */ | ||
| 3086 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | ||
| 3087 | btrfs_item_nr_offset(slot), | ||
| 3088 | (nritems - slot) * sizeof(struct btrfs_item)); | ||
| 3089 | |||
| 3090 | /* shift the data */ | ||
| 3091 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
| 3092 | data_end - total_data, btrfs_leaf_data(leaf) + | ||
| 3093 | data_end, old_data - data_end); | ||
| 3094 | data_end = old_data; | ||
| 3095 | } | ||
| 3096 | |||
| 3097 | /* setup the item for the new data */ | ||
| 3098 | for (i = 0; i < nr; i++) { | ||
| 3099 | btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); | ||
| 3100 | btrfs_set_item_key(leaf, &disk_key, slot + i); | ||
| 3101 | item = btrfs_item_nr(leaf, slot + i); | ||
| 3102 | btrfs_set_item_offset(leaf, item, data_end - data_size[i]); | ||
| 3103 | data_end -= data_size[i]; | ||
| 3104 | btrfs_set_item_size(leaf, item, data_size[i]); | ||
| 3105 | } | ||
| 3106 | btrfs_set_header_nritems(leaf, nritems + nr); | ||
| 3107 | btrfs_mark_buffer_dirty(leaf); | ||
| 3108 | |||
| 3109 | ret = 0; | ||
| 3110 | if (slot == 0) { | ||
| 3111 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
| 3112 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); | ||
| 3113 | } | ||
| 3114 | |||
| 3115 | if (btrfs_leaf_free_space(root, leaf) < 0) { | ||
| 3116 | btrfs_print_leaf(root, leaf); | ||
| 3117 | BUG(); | ||
| 3118 | } | ||
| 3119 | out: | ||
| 3120 | return ret; | ||
| 3121 | } | ||
| 3122 | |||
| 3123 | /* | ||
| 3124 | * Given a key and some data, insert an item into the tree. | ||
| 3125 | * This does all the path init required, making room in the tree if needed. | ||
| 3126 | */ | ||
| 3127 | int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 3128 | *root, struct btrfs_key *cpu_key, void *data, u32 | ||
| 3129 | data_size) | ||
| 3130 | { | ||
| 3131 | int ret = 0; | ||
| 3132 | struct btrfs_path *path; | ||
| 3133 | struct extent_buffer *leaf; | ||
| 3134 | unsigned long ptr; | ||
| 3135 | |||
| 3136 | path = btrfs_alloc_path(); | ||
| 3137 | BUG_ON(!path); | ||
| 3138 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); | ||
| 3139 | if (!ret) { | ||
| 3140 | leaf = path->nodes[0]; | ||
| 3141 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 3142 | write_extent_buffer(leaf, data, ptr, data_size); | ||
| 3143 | btrfs_mark_buffer_dirty(leaf); | ||
| 3144 | } | ||
| 3145 | btrfs_free_path(path); | ||
| 3146 | return ret; | ||
| 3147 | } | ||
| 3148 | |||
| 3149 | /* | ||
| 3150 | * delete the pointer from a given node. | ||
| 3151 | * | ||
| 3152 | * the tree should have been previously balanced so the deletion does not | ||
| 3153 | * empty a node. | ||
| 3154 | */ | ||
| 3155 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 3156 | struct btrfs_path *path, int level, int slot) | ||
| 3157 | { | ||
| 3158 | struct extent_buffer *parent = path->nodes[level]; | ||
| 3159 | u32 nritems; | ||
| 3160 | int ret = 0; | ||
| 3161 | int wret; | ||
| 3162 | |||
| 3163 | nritems = btrfs_header_nritems(parent); | ||
| 3164 | if (slot != nritems -1) { | ||
| 3165 | memmove_extent_buffer(parent, | ||
| 3166 | btrfs_node_key_ptr_offset(slot), | ||
| 3167 | btrfs_node_key_ptr_offset(slot + 1), | ||
| 3168 | sizeof(struct btrfs_key_ptr) * | ||
| 3169 | (nritems - slot - 1)); | ||
| 3170 | } | ||
| 3171 | nritems--; | ||
| 3172 | btrfs_set_header_nritems(parent, nritems); | ||
| 3173 | if (nritems == 0 && parent == root->node) { | ||
| 3174 | BUG_ON(btrfs_header_level(root->node) != 1); | ||
| 3175 | /* just turn the root into a leaf and break */ | ||
| 3176 | btrfs_set_header_level(root->node, 0); | ||
| 3177 | } else if (slot == 0) { | ||
| 3178 | struct btrfs_disk_key disk_key; | ||
| 3179 | |||
| 3180 | btrfs_node_key(parent, &disk_key, 0); | ||
| 3181 | wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); | ||
| 3182 | if (wret) | ||
| 3183 | ret = wret; | ||
| 3184 | } | ||
| 3185 | btrfs_mark_buffer_dirty(parent); | ||
| 3186 | return ret; | ||
| 3187 | } | ||
| 3188 | |||
| 3189 | /* | ||
| 3190 | * a helper function to delete the leaf pointed to by path->slots[1] and | ||
| 3191 | * path->nodes[1]. bytenr is the node block pointer, but since the callers | ||
| 3192 | * already know it, it is faster to have them pass it down than to | ||
| 3193 | * read it out of the node again. | ||
| 3194 | * | ||
| 3195 | * This deletes the pointer in path->nodes[1] and frees the leaf | ||
| 3196 | * block extent. zero is returned if it all worked out, < 0 otherwise. | ||
| 3197 | * | ||
| 3198 | * The path must have already been setup for deleting the leaf, including | ||
| 3199 | * all the proper balancing. path->nodes[1] must be locked. | ||
| 3200 | */ | ||
| 3201 | noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | ||
| 3202 | struct btrfs_root *root, | ||
| 3203 | struct btrfs_path *path, u64 bytenr) | ||
| 3204 | { | ||
| 3205 | int ret; | ||
| 3206 | u64 root_gen = btrfs_header_generation(path->nodes[1]); | ||
| 3207 | |||
| 3208 | ret = del_ptr(trans, root, path, 1, path->slots[1]); | ||
| 3209 | if (ret) | ||
| 3210 | return ret; | ||
| 3211 | |||
| 3212 | ret = btrfs_free_extent(trans, root, bytenr, | ||
| 3213 | btrfs_level_size(root, 0), | ||
| 3214 | path->nodes[1]->start, | ||
| 3215 | btrfs_header_owner(path->nodes[1]), | ||
| 3216 | root_gen, 0, 1); | ||
| 3217 | return ret; | ||
| 3218 | } | ||
| 3219 | /* | ||
| 3220 | * delete the item at the leaf level in path. If that empties | ||
| 3221 | * the leaf, remove it from the tree | ||
| 3222 | */ | ||
| 3223 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 3224 | struct btrfs_path *path, int slot, int nr) | ||
| 3225 | { | ||
| 3226 | struct extent_buffer *leaf; | ||
| 3227 | struct btrfs_item *item; | ||
| 3228 | int last_off; | ||
| 3229 | int dsize = 0; | ||
| 3230 | int ret = 0; | ||
| 3231 | int wret; | ||
| 3232 | int i; | ||
| 3233 | u32 nritems; | ||
| 3234 | |||
| 3235 | leaf = path->nodes[0]; | ||
| 3236 | last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); | ||
| 3237 | |||
| 3238 | for (i = 0; i < nr; i++) | ||
| 3239 | dsize += btrfs_item_size_nr(leaf, slot + i); | ||
| 3240 | |||
| 3241 | nritems = btrfs_header_nritems(leaf); | ||
| 3242 | |||
| 3243 | if (slot + nr != nritems) { | ||
| 3244 | int data_end = leaf_data_end(root, leaf); | ||
| 3245 | |||
| 3246 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
| 3247 | data_end + dsize, | ||
| 3248 | btrfs_leaf_data(leaf) + data_end, | ||
| 3249 | last_off - data_end); | ||
| 3250 | |||
| 3251 | for (i = slot + nr; i < nritems; i++) { | ||
| 3252 | u32 ioff; | ||
| 3253 | |||
| 3254 | item = btrfs_item_nr(leaf, i); | ||
| 3255 | if (!leaf->map_token) { | ||
| 3256 | map_extent_buffer(leaf, (unsigned long)item, | ||
| 3257 | sizeof(struct btrfs_item), | ||
| 3258 | &leaf->map_token, &leaf->kaddr, | ||
| 3259 | &leaf->map_start, &leaf->map_len, | ||
| 3260 | KM_USER1); | ||
| 3261 | } | ||
| 3262 | ioff = btrfs_item_offset(leaf, item); | ||
| 3263 | btrfs_set_item_offset(leaf, item, ioff + dsize); | ||
| 3264 | } | ||
| 3265 | |||
| 3266 | if (leaf->map_token) { | ||
| 3267 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
| 3268 | leaf->map_token = NULL; | ||
| 3269 | } | ||
| 3270 | |||
| 3271 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), | ||
| 3272 | btrfs_item_nr_offset(slot + nr), | ||
| 3273 | sizeof(struct btrfs_item) * | ||
| 3274 | (nritems - slot - nr)); | ||
| 3275 | } | ||
| 3276 | btrfs_set_header_nritems(leaf, nritems - nr); | ||
| 3277 | nritems -= nr; | ||
| 3278 | |||
| 3279 | /* delete the leaf if we've emptied it */ | ||
| 3280 | if (nritems == 0) { | ||
| 3281 | if (leaf == root->node) { | ||
| 3282 | btrfs_set_header_level(leaf, 0); | ||
| 3283 | } else { | ||
| 3284 | ret = btrfs_del_leaf(trans, root, path, leaf->start); | ||
| 3285 | BUG_ON(ret); | ||
| 3286 | } | ||
| 3287 | } else { | ||
| 3288 | int used = leaf_space_used(leaf, 0, nritems); | ||
| 3289 | if (slot == 0) { | ||
| 3290 | struct btrfs_disk_key disk_key; | ||
| 3291 | |||
| 3292 | btrfs_item_key(leaf, &disk_key, 0); | ||
| 3293 | wret = fixup_low_keys(trans, root, path, | ||
| 3294 | &disk_key, 1); | ||
| 3295 | if (wret) | ||
| 3296 | ret = wret; | ||
| 3297 | } | ||
| 3298 | |||
| 3299 | /* delete the leaf if it is mostly empty */ | ||
| 3300 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { | ||
| 3301 | /* push_leaf_left fixes the path. | ||
| 3302 | * make sure the path still points to our leaf | ||
| 3303 | * for possible call to del_ptr below | ||
| 3304 | */ | ||
| 3305 | slot = path->slots[1]; | ||
| 3306 | extent_buffer_get(leaf); | ||
| 3307 | |||
| 3308 | wret = push_leaf_left(trans, root, path, 1, 1); | ||
| 3309 | if (wret < 0 && wret != -ENOSPC) | ||
| 3310 | ret = wret; | ||
| 3311 | |||
| 3312 | if (path->nodes[0] == leaf && | ||
| 3313 | btrfs_header_nritems(leaf)) { | ||
| 3314 | wret = push_leaf_right(trans, root, path, 1, 1); | ||
| 3315 | if (wret < 0 && wret != -ENOSPC) | ||
| 3316 | ret = wret; | ||
| 3317 | } | ||
| 3318 | |||
| 3319 | if (btrfs_header_nritems(leaf) == 0) { | ||
| 3320 | path->slots[1] = slot; | ||
| 3321 | ret = btrfs_del_leaf(trans, root, path, leaf->start); | ||
| 3322 | BUG_ON(ret); | ||
| 3323 | free_extent_buffer(leaf); | ||
| 3324 | } else { | ||
| 3325 | /* if we're still in the path, make sure | ||
| 3326 | * we're dirty. Otherwise, one of the | ||
| 3327 | * push_leaf functions must have already | ||
| 3328 | * dirtied this buffer | ||
| 3329 | */ | ||
| 3330 | if (path->nodes[0] == leaf) | ||
| 3331 | btrfs_mark_buffer_dirty(leaf); | ||
| 3332 | free_extent_buffer(leaf); | ||
| 3333 | } | ||
| 3334 | } else { | ||
| 3335 | btrfs_mark_buffer_dirty(leaf); | ||
| 3336 | } | ||
| 3337 | } | ||
| 3338 | return ret; | ||
| 3339 | } | ||
| 3340 | |||
| 3341 | /* | ||
| 3342 | * search the tree again to find a leaf with lesser keys | ||
| 3343 | * returns 0 if it found something or 1 if there are no lesser leaves. | ||
| 3344 | * returns < 0 on io errors. | ||
| 3345 | * | ||
| 3346 | * This may release the path, and so you may lose any locks held at the | ||
| 3347 | * time you call it. | ||
| 3348 | */ | ||
| 3349 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | ||
| 3350 | { | ||
| 3351 | struct btrfs_key key; | ||
| 3352 | struct btrfs_disk_key found_key; | ||
| 3353 | int ret; | ||
| 3354 | |||
| 3355 | btrfs_item_key_to_cpu(path->nodes[0], &key, 0); | ||
| 3356 | |||
| 3357 | if (key.offset > 0) | ||
| 3358 | key.offset--; | ||
| 3359 | else if (key.type > 0) | ||
| 3360 | key.type--; | ||
| 3361 | else if (key.objectid > 0) | ||
| 3362 | key.objectid--; | ||
| 3363 | else | ||
| 3364 | return 1; | ||
| 3365 | |||
| 3366 | btrfs_release_path(root, path); | ||
| 3367 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 3368 | if (ret < 0) | ||
| 3369 | return ret; | ||
| 3370 | btrfs_item_key(path->nodes[0], &found_key, 0); | ||
| 3371 | ret = comp_keys(&found_key, &key); | ||
| 3372 | if (ret < 0) | ||
| 3373 | return 0; | ||
| 3374 | return 1; | ||
| 3375 | } | ||
| 3376 | |||
| 3377 | /* | ||
| 3378 | * A helper function to walk down the tree starting at min_key, and looking | ||
| 3379 | * for nodes or leaves that are either in cache or have a minimum | ||
| 3380 | * transaction id. This is used by the btree defrag code, and tree logging | ||
| 3381 | * | ||
| 3382 | * This does not cow, but it does stuff the starting key it finds back | ||
| 3383 | * into min_key, so you can call btrfs_search_slot with cow=1 on the | ||
| 3384 | * key and get a writable path. | ||
| 3385 | * | ||
| 3386 | * This does lock as it descends, and path->keep_locks should be set | ||
| 3387 | * to 1 by the caller. | ||
| 3388 | * | ||
| 3389 | * This honors path->lowest_level to prevent descent past a given level | ||
| 3390 | * of the tree. | ||
| 3391 | * | ||
| 3392 | * min_trans indicates the oldest transaction that you are interested | ||
| 3393 | * in walking through. Any nodes or leaves older than min_trans are | ||
| 3394 | * skipped over (without reading them). | ||
| 3395 | * | ||
| 3396 | * returns zero if something useful was found, < 0 on error and 1 if there | ||
| 3397 | * was nothing in the tree that matched the search criteria. | ||
| 3398 | */ | ||
| 3399 | int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | ||
| 3400 | struct btrfs_key *max_key, | ||
| 3401 | struct btrfs_path *path, int cache_only, | ||
| 3402 | u64 min_trans) | ||
| 3403 | { | ||
| 3404 | struct extent_buffer *cur; | ||
| 3405 | struct btrfs_key found_key; | ||
| 3406 | int slot; | ||
| 3407 | int sret; | ||
| 3408 | u32 nritems; | ||
| 3409 | int level; | ||
| 3410 | int ret = 1; | ||
| 3411 | |||
| 3412 | again: | ||
| 3413 | cur = btrfs_lock_root_node(root); | ||
| 3414 | level = btrfs_header_level(cur); | ||
| 3415 | WARN_ON(path->nodes[level]); | ||
| 3416 | path->nodes[level] = cur; | ||
| 3417 | path->locks[level] = 1; | ||
| 3418 | |||
| 3419 | if (btrfs_header_generation(cur) < min_trans) { | ||
| 3420 | ret = 1; | ||
| 3421 | goto out; | ||
| 3422 | } | ||
| 3423 | while(1) { | ||
| 3424 | nritems = btrfs_header_nritems(cur); | ||
| 3425 | level = btrfs_header_level(cur); | ||
| 3426 | sret = bin_search(cur, min_key, level, &slot); | ||
| 3427 | |||
| 3428 | /* at the lowest level, we're done, setup the path and exit */ | ||
| 3429 | if (level == path->lowest_level) { | ||
| 3430 | if (slot >= nritems) | ||
| 3431 | goto find_next_key; | ||
| 3432 | ret = 0; | ||
| 3433 | path->slots[level] = slot; | ||
| 3434 | btrfs_item_key_to_cpu(cur, &found_key, slot); | ||
| 3435 | goto out; | ||
| 3436 | } | ||
| 3437 | if (sret && slot > 0) | ||
| 3438 | slot--; | ||
| 3439 | /* | ||
| 3440 | * check this node pointer against the cache_only and | ||
| 3441 | * min_trans parameters. If it isn't in cache or is too | ||
| 3442 | * old, skip to the next one. | ||
| 3443 | */ | ||
| 3444 | while(slot < nritems) { | ||
| 3445 | u64 blockptr; | ||
| 3446 | u64 gen; | ||
| 3447 | struct extent_buffer *tmp; | ||
| 3448 | struct btrfs_disk_key disk_key; | ||
| 3449 | |||
| 3450 | blockptr = btrfs_node_blockptr(cur, slot); | ||
| 3451 | gen = btrfs_node_ptr_generation(cur, slot); | ||
| 3452 | if (gen < min_trans) { | ||
| 3453 | slot++; | ||
| 3454 | continue; | ||
| 3455 | } | ||
| 3456 | if (!cache_only) | ||
| 3457 | break; | ||
| 3458 | |||
| 3459 | if (max_key) { | ||
| 3460 | btrfs_node_key(cur, &disk_key, slot); | ||
| 3461 | if (comp_keys(&disk_key, max_key) >= 0) { | ||
| 3462 | ret = 1; | ||
| 3463 | goto out; | ||
| 3464 | } | ||
| 3465 | } | ||
| 3466 | |||
| 3467 | tmp = btrfs_find_tree_block(root, blockptr, | ||
| 3468 | btrfs_level_size(root, level - 1)); | ||
| 3469 | |||
| 3470 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
| 3471 | free_extent_buffer(tmp); | ||
| 3472 | break; | ||
| 3473 | } | ||
| 3474 | if (tmp) | ||
| 3475 | free_extent_buffer(tmp); | ||
| 3476 | slot++; | ||
| 3477 | } | ||
| 3478 | find_next_key: | ||
| 3479 | /* | ||
| 3480 | * we didn't find a candidate key in this node, walk forward | ||
| 3481 | * and find another one | ||
| 3482 | */ | ||
| 3483 | if (slot >= nritems) { | ||
| 3484 | path->slots[level] = slot; | ||
| 3485 | sret = btrfs_find_next_key(root, path, min_key, level, | ||
| 3486 | cache_only, min_trans); | ||
| 3487 | if (sret == 0) { | ||
| 3488 | btrfs_release_path(root, path); | ||
| 3489 | goto again; | ||
| 3490 | } else { | ||
| 3491 | goto out; | ||
| 3492 | } | ||
| 3493 | } | ||
| 3494 | /* save our key for returning back */ | ||
| 3495 | btrfs_node_key_to_cpu(cur, &found_key, slot); | ||
| 3496 | path->slots[level] = slot; | ||
| 3497 | if (level == path->lowest_level) { | ||
| 3498 | ret = 0; | ||
| 3499 | unlock_up(path, level, 1); | ||
| 3500 | goto out; | ||
| 3501 | } | ||
| 3502 | cur = read_node_slot(root, cur, slot); | ||
| 3503 | |||
| 3504 | btrfs_tree_lock(cur); | ||
| 3505 | path->locks[level - 1] = 1; | ||
| 3506 | path->nodes[level - 1] = cur; | ||
| 3507 | unlock_up(path, level, 1); | ||
| 3508 | } | ||
| 3509 | out: | ||
| 3510 | if (ret == 0) | ||
| 3511 | memcpy(min_key, &found_key, sizeof(found_key)); | ||
| 3512 | return ret; | ||
| 3513 | } | ||
| 3514 | |||
| 3515 | /* | ||
| 3516 | * this is similar to btrfs_next_leaf, but does not try to preserve | ||
| 3517 | * and fixup the path. It looks for and returns the next key in the | ||
| 3518 | * tree based on the current path and the cache_only and min_trans | ||
| 3519 | * parameters. | ||
| 3520 | * | ||
| 3521 | * 0 is returned if another key is found, < 0 if there are any errors | ||
| 3522 | * and 1 is returned if there are no higher keys in the tree | ||
| 3523 | * | ||
| 3524 | * path->keep_locks should be set to 1 on the search made before | ||
| 3525 | * calling this function. | ||
| 3526 | */ | ||
| 3527 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | ||
| 3528 | struct btrfs_key *key, int lowest_level, | ||
| 3529 | int cache_only, u64 min_trans) | ||
| 3530 | { | ||
| 3531 | int level = lowest_level; | ||
| 3532 | int slot; | ||
| 3533 | struct extent_buffer *c; | ||
| 3534 | |||
| 3535 | while(level < BTRFS_MAX_LEVEL) { | ||
| 3536 | if (!path->nodes[level]) | ||
| 3537 | return 1; | ||
| 3538 | |||
| 3539 | slot = path->slots[level] + 1; | ||
| 3540 | c = path->nodes[level]; | ||
| 3541 | next: | ||
| 3542 | if (slot >= btrfs_header_nritems(c)) { | ||
| 3543 | level++; | ||
| 3544 | if (level == BTRFS_MAX_LEVEL) { | ||
| 3545 | return 1; | ||
| 3546 | } | ||
| 3547 | continue; | ||
| 3548 | } | ||
| 3549 | if (level == 0) | ||
| 3550 | btrfs_item_key_to_cpu(c, key, slot); | ||
| 3551 | else { | ||
| 3552 | u64 blockptr = btrfs_node_blockptr(c, slot); | ||
| 3553 | u64 gen = btrfs_node_ptr_generation(c, slot); | ||
| 3554 | |||
| 3555 | if (cache_only) { | ||
| 3556 | struct extent_buffer *cur; | ||
| 3557 | cur = btrfs_find_tree_block(root, blockptr, | ||
| 3558 | btrfs_level_size(root, level - 1)); | ||
| 3559 | if (!cur || !btrfs_buffer_uptodate(cur, gen)) { | ||
| 3560 | slot++; | ||
| 3561 | if (cur) | ||
| 3562 | free_extent_buffer(cur); | ||
| 3563 | goto next; | ||
| 3564 | } | ||
| 3565 | free_extent_buffer(cur); | ||
| 3566 | } | ||
| 3567 | if (gen < min_trans) { | ||
| 3568 | slot++; | ||
| 3569 | goto next; | ||
| 3570 | } | ||
| 3571 | btrfs_node_key_to_cpu(c, key, slot); | ||
| 3572 | } | ||
| 3573 | return 0; | ||
| 3574 | } | ||
| 3575 | return 1; | ||
| 3576 | } | ||
| 3577 | |||
| 3578 | /* | ||
| 3579 | * search the tree again to find a leaf with greater keys | ||
| 3580 | * returns 0 if it found something or 1 if there are no greater leaves. | ||
| 3581 | * returns < 0 on io errors. | ||
| 3582 | */ | ||
| 3583 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | ||
| 3584 | { | ||
| 3585 | int slot; | ||
| 3586 | int level = 1; | ||
| 3587 | struct extent_buffer *c; | ||
| 3588 | struct extent_buffer *next = NULL; | ||
| 3589 | struct btrfs_key key; | ||
| 3590 | u32 nritems; | ||
| 3591 | int ret; | ||
| 3592 | |||
| 3593 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 3594 | if (nritems == 0) { | ||
| 3595 | return 1; | ||
| 3596 | } | ||
| 3597 | |||
| 3598 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | ||
| 3599 | |||
| 3600 | btrfs_release_path(root, path); | ||
| 3601 | path->keep_locks = 1; | ||
| 3602 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 3603 | path->keep_locks = 0; | ||
| 3604 | |||
| 3605 | if (ret < 0) | ||
| 3606 | return ret; | ||
| 3607 | |||
| 3608 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 3609 | /* | ||
| 3610 | * by releasing the path above we dropped all our locks. A balance | ||
| 3611 | * could have added more items next to the key that used to be | ||
| 3612 | * at the very end of the block. So, check again here and | ||
| 3613 | * advance the path if there are now more items available. | ||
| 3614 | */ | ||
| 3615 | if (nritems > 0 && path->slots[0] < nritems - 1) { | ||
| 3616 | path->slots[0]++; | ||
| 3617 | goto done; | ||
| 3618 | } | ||
| 3619 | |||
| 3620 | while(level < BTRFS_MAX_LEVEL) { | ||
| 3621 | if (!path->nodes[level]) | ||
| 3622 | return 1; | ||
| 3623 | |||
| 3624 | slot = path->slots[level] + 1; | ||
| 3625 | c = path->nodes[level]; | ||
| 3626 | if (slot >= btrfs_header_nritems(c)) { | ||
| 3627 | level++; | ||
| 3628 | if (level == BTRFS_MAX_LEVEL) { | ||
| 3629 | return 1; | ||
| 3630 | } | ||
| 3631 | continue; | ||
| 3632 | } | ||
| 3633 | |||
| 3634 | if (next) { | ||
| 3635 | btrfs_tree_unlock(next); | ||
| 3636 | free_extent_buffer(next); | ||
| 3637 | } | ||
| 3638 | |||
| 3639 | if (level == 1 && (path->locks[1] || path->skip_locking) && | ||
| 3640 | path->reada) | ||
| 3641 | reada_for_search(root, path, level, slot, 0); | ||
| 3642 | |||
| 3643 | next = read_node_slot(root, c, slot); | ||
| 3644 | if (!path->skip_locking) { | ||
| 3645 | WARN_ON(!btrfs_tree_locked(c)); | ||
| 3646 | btrfs_tree_lock(next); | ||
| 3647 | } | ||
| 3648 | break; | ||
| 3649 | } | ||
| 3650 | path->slots[level] = slot; | ||
| 3651 | while(1) { | ||
| 3652 | level--; | ||
| 3653 | c = path->nodes[level]; | ||
| 3654 | if (path->locks[level]) | ||
| 3655 | btrfs_tree_unlock(c); | ||
| 3656 | free_extent_buffer(c); | ||
| 3657 | path->nodes[level] = next; | ||
| 3658 | path->slots[level] = 0; | ||
| 3659 | if (!path->skip_locking) | ||
| 3660 | path->locks[level] = 1; | ||
| 3661 | if (!level) | ||
| 3662 | break; | ||
| 3663 | if (level == 1 && path->locks[1] && path->reada) | ||
| 3664 | reada_for_search(root, path, level, slot, 0); | ||
| 3665 | next = read_node_slot(root, next, 0); | ||
| 3666 | if (!path->skip_locking) { | ||
| 3667 | WARN_ON(!btrfs_tree_locked(path->nodes[level])); | ||
| 3668 | btrfs_tree_lock(next); | ||
| 3669 | } | ||
| 3670 | } | ||
| 3671 | done: | ||
| 3672 | unlock_up(path, 0, 1); | ||
| 3673 | return 0; | ||
| 3674 | } | ||
| 3675 | |||
| 3676 | /* | ||
| 3677 | * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps | ||
| 3678 | * searching until it gets past min_objectid or finds an item of 'type' | ||
| 3679 | * | ||
| 3680 | * returns 0 if something is found, 1 if nothing was found and < 0 on error | ||
| 3681 | */ | ||
| 3682 | int btrfs_previous_item(struct btrfs_root *root, | ||
| 3683 | struct btrfs_path *path, u64 min_objectid, | ||
| 3684 | int type) | ||
| 3685 | { | ||
| 3686 | struct btrfs_key found_key; | ||
| 3687 | struct extent_buffer *leaf; | ||
| 3688 | u32 nritems; | ||
| 3689 | int ret; | ||
| 3690 | |||
| 3691 | while(1) { | ||
| 3692 | if (path->slots[0] == 0) { | ||
| 3693 | ret = btrfs_prev_leaf(root, path); | ||
| 3694 | if (ret != 0) | ||
| 3695 | return ret; | ||
| 3696 | } else { | ||
| 3697 | path->slots[0]--; | ||
| 3698 | } | ||
| 3699 | leaf = path->nodes[0]; | ||
| 3700 | nritems = btrfs_header_nritems(leaf); | ||
| 3701 | if (nritems == 0) | ||
| 3702 | return 1; | ||
| 3703 | if (path->slots[0] == nritems) | ||
| 3704 | path->slots[0]--; | ||
| 3705 | |||
| 3706 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 3707 | if (found_key.type == type) | ||
| 3708 | return 0; | ||
| 3709 | if (found_key.objectid < min_objectid) | ||
| 3710 | break; | ||
| 3711 | if (found_key.objectid == min_objectid && | ||
| 3712 | found_key.type < type) | ||
| 3713 | break; | ||
| 3714 | } | ||
| 3715 | return 1; | ||
| 3716 | } | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h new file mode 100644 index 00000000000..8559f39fd47 --- /dev/null +++ b/fs/btrfs/ctree.h | |||
| @@ -0,0 +1,1891 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_CTREE__ | ||
| 20 | #define __BTRFS_CTREE__ | ||
| 21 | |||
| 22 | #include <linux/version.h> | ||
| 23 | #include <linux/mm.h> | ||
| 24 | #include <linux/highmem.h> | ||
| 25 | #include <linux/fs.h> | ||
| 26 | #include <linux/completion.h> | ||
| 27 | #include <linux/backing-dev.h> | ||
| 28 | #include <linux/wait.h> | ||
| 29 | #include <asm/kmap_types.h> | ||
| 30 | #include "extent_io.h" | ||
| 31 | #include "extent_map.h" | ||
| 32 | #include "async-thread.h" | ||
| 33 | |||
| 34 | struct btrfs_trans_handle; | ||
| 35 | struct btrfs_transaction; | ||
| 36 | extern struct kmem_cache *btrfs_trans_handle_cachep; | ||
| 37 | extern struct kmem_cache *btrfs_transaction_cachep; | ||
| 38 | extern struct kmem_cache *btrfs_bit_radix_cachep; | ||
| 39 | extern struct kmem_cache *btrfs_path_cachep; | ||
| 40 | struct btrfs_ordered_sum; | ||
| 41 | |||
| 42 | #define BTRFS_MAGIC "_BBRfS_M" | ||
| 43 | |||
| 44 | #define BTRFS_ACL_NOT_CACHED ((void *)-1) | ||
| 45 | |||
| 46 | #ifdef CONFIG_LOCKDEP | ||
| 47 | # define BTRFS_MAX_LEVEL 7 | ||
| 48 | #else | ||
| 49 | # define BTRFS_MAX_LEVEL 8 | ||
| 50 | #endif | ||
| 51 | |||
| 52 | /* holds pointers to all of the tree roots */ | ||
| 53 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL | ||
| 54 | |||
| 55 | /* stores information about which extents are in use, and reference counts */ | ||
| 56 | #define BTRFS_EXTENT_TREE_OBJECTID 2ULL | ||
| 57 | |||
| 58 | /* | ||
| 59 | * chunk tree stores translations from logical -> physical block numbering | ||
| 60 | * the super block points to the chunk tree | ||
| 61 | */ | ||
| 62 | #define BTRFS_CHUNK_TREE_OBJECTID 3ULL | ||
| 63 | |||
| 64 | /* | ||
| 65 | * stores information about which areas of a given device are in use. | ||
| 66 | * one per device. The tree of tree roots points to the device tree | ||
| 67 | */ | ||
| 68 | #define BTRFS_DEV_TREE_OBJECTID 4ULL | ||
| 69 | |||
| 70 | /* one per subvolume, storing files and directories */ | ||
| 71 | #define BTRFS_FS_TREE_OBJECTID 5ULL | ||
| 72 | |||
| 73 | /* directory objectid inside the root tree */ | ||
| 74 | #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL | ||
| 75 | |||
| 76 | /* orhpan objectid for tracking unlinked/truncated files */ | ||
| 77 | #define BTRFS_ORPHAN_OBJECTID -5ULL | ||
| 78 | |||
| 79 | /* does write ahead logging to speed up fsyncs */ | ||
| 80 | #define BTRFS_TREE_LOG_OBJECTID -6ULL | ||
| 81 | #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL | ||
| 82 | |||
| 83 | /* for space balancing */ | ||
| 84 | #define BTRFS_TREE_RELOC_OBJECTID -8ULL | ||
| 85 | #define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL | ||
| 86 | |||
| 87 | /* dummy objectid represents multiple objectids */ | ||
| 88 | #define BTRFS_MULTIPLE_OBJECTIDS -255ULL | ||
| 89 | |||
| 90 | /* | ||
| 91 | * All files have objectids in this range. | ||
| 92 | */ | ||
| 93 | #define BTRFS_FIRST_FREE_OBJECTID 256ULL | ||
| 94 | #define BTRFS_LAST_FREE_OBJECTID -256ULL | ||
| 95 | #define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL | ||
| 96 | |||
| 97 | |||
| 98 | /* | ||
| 99 | * the device items go into the chunk tree. The key is in the form | ||
| 100 | * [ 1 BTRFS_DEV_ITEM_KEY device_id ] | ||
| 101 | */ | ||
| 102 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL | ||
| 103 | |||
| 104 | /* | ||
| 105 | * we can actually store much bigger names, but lets not confuse the rest | ||
| 106 | * of linux | ||
| 107 | */ | ||
| 108 | #define BTRFS_NAME_LEN 255 | ||
| 109 | |||
| 110 | /* 32 bytes in various csum fields */ | ||
| 111 | #define BTRFS_CSUM_SIZE 32 | ||
| 112 | /* four bytes for CRC32 */ | ||
| 113 | #define BTRFS_CRC32_SIZE 4 | ||
| 114 | #define BTRFS_EMPTY_DIR_SIZE 0 | ||
| 115 | |||
| 116 | #define BTRFS_FT_UNKNOWN 0 | ||
| 117 | #define BTRFS_FT_REG_FILE 1 | ||
| 118 | #define BTRFS_FT_DIR 2 | ||
| 119 | #define BTRFS_FT_CHRDEV 3 | ||
| 120 | #define BTRFS_FT_BLKDEV 4 | ||
| 121 | #define BTRFS_FT_FIFO 5 | ||
| 122 | #define BTRFS_FT_SOCK 6 | ||
| 123 | #define BTRFS_FT_SYMLINK 7 | ||
| 124 | #define BTRFS_FT_XATTR 8 | ||
| 125 | #define BTRFS_FT_MAX 9 | ||
| 126 | |||
| 127 | /* | ||
| 128 | * the key defines the order in the tree, and so it also defines (optimal) | ||
| 129 | * block layout. objectid corresonds to the inode number. The flags | ||
| 130 | * tells us things about the object, and is a kind of stream selector. | ||
| 131 | * so for a given inode, keys with flags of 1 might refer to the inode | ||
| 132 | * data, flags of 2 may point to file data in the btree and flags == 3 | ||
| 133 | * may point to extents. | ||
| 134 | * | ||
| 135 | * offset is the starting byte offset for this key in the stream. | ||
| 136 | * | ||
| 137 | * btrfs_disk_key is in disk byte order. struct btrfs_key is always | ||
| 138 | * in cpu native order. Otherwise they are identical and their sizes | ||
| 139 | * should be the same (ie both packed) | ||
| 140 | */ | ||
| 141 | struct btrfs_disk_key { | ||
| 142 | __le64 objectid; | ||
| 143 | u8 type; | ||
| 144 | __le64 offset; | ||
| 145 | } __attribute__ ((__packed__)); | ||
| 146 | |||
| 147 | struct btrfs_key { | ||
| 148 | u64 objectid; | ||
| 149 | u8 type; | ||
| 150 | u64 offset; | ||
| 151 | } __attribute__ ((__packed__)); | ||
| 152 | |||
| 153 | struct btrfs_mapping_tree { | ||
| 154 | struct extent_map_tree map_tree; | ||
| 155 | }; | ||
| 156 | |||
| 157 | #define BTRFS_UUID_SIZE 16 | ||
| 158 | struct btrfs_dev_item { | ||
| 159 | /* the internal btrfs device id */ | ||
| 160 | __le64 devid; | ||
| 161 | |||
| 162 | /* size of the device */ | ||
| 163 | __le64 total_bytes; | ||
| 164 | |||
| 165 | /* bytes used */ | ||
| 166 | __le64 bytes_used; | ||
| 167 | |||
| 168 | /* optimal io alignment for this device */ | ||
| 169 | __le32 io_align; | ||
| 170 | |||
| 171 | /* optimal io width for this device */ | ||
| 172 | __le32 io_width; | ||
| 173 | |||
| 174 | /* minimal io size for this device */ | ||
| 175 | __le32 sector_size; | ||
| 176 | |||
| 177 | /* type and info about this device */ | ||
| 178 | __le64 type; | ||
| 179 | |||
| 180 | /* grouping information for allocation decisions */ | ||
| 181 | __le32 dev_group; | ||
| 182 | |||
| 183 | /* seek speed 0-100 where 100 is fastest */ | ||
| 184 | u8 seek_speed; | ||
| 185 | |||
| 186 | /* bandwidth 0-100 where 100 is fastest */ | ||
| 187 | u8 bandwidth; | ||
| 188 | |||
| 189 | /* btrfs generated uuid for this device */ | ||
| 190 | u8 uuid[BTRFS_UUID_SIZE]; | ||
| 191 | } __attribute__ ((__packed__)); | ||
| 192 | |||
| 193 | struct btrfs_stripe { | ||
| 194 | __le64 devid; | ||
| 195 | __le64 offset; | ||
| 196 | u8 dev_uuid[BTRFS_UUID_SIZE]; | ||
| 197 | } __attribute__ ((__packed__)); | ||
| 198 | |||
| 199 | struct btrfs_chunk { | ||
| 200 | /* size of this chunk in bytes */ | ||
| 201 | __le64 length; | ||
| 202 | |||
| 203 | /* objectid of the root referencing this chunk */ | ||
| 204 | __le64 owner; | ||
| 205 | |||
| 206 | __le64 stripe_len; | ||
| 207 | __le64 type; | ||
| 208 | |||
| 209 | /* optimal io alignment for this chunk */ | ||
| 210 | __le32 io_align; | ||
| 211 | |||
| 212 | /* optimal io width for this chunk */ | ||
| 213 | __le32 io_width; | ||
| 214 | |||
| 215 | /* minimal io size for this chunk */ | ||
| 216 | __le32 sector_size; | ||
| 217 | |||
| 218 | /* 2^16 stripes is quite a lot, a second limit is the size of a single | ||
| 219 | * item in the btree | ||
| 220 | */ | ||
| 221 | __le16 num_stripes; | ||
| 222 | |||
| 223 | /* sub stripes only matter for raid10 */ | ||
| 224 | __le16 sub_stripes; | ||
| 225 | struct btrfs_stripe stripe; | ||
| 226 | /* additional stripes go here */ | ||
| 227 | } __attribute__ ((__packed__)); | ||
| 228 | |||
| 229 | static inline unsigned long btrfs_chunk_item_size(int num_stripes) | ||
| 230 | { | ||
| 231 | BUG_ON(num_stripes == 0); | ||
| 232 | return sizeof(struct btrfs_chunk) + | ||
| 233 | sizeof(struct btrfs_stripe) * (num_stripes - 1); | ||
| 234 | } | ||
| 235 | |||
| 236 | #define BTRFS_FSID_SIZE 16 | ||
| 237 | #define BTRFS_HEADER_FLAG_WRITTEN (1 << 0) | ||
| 238 | |||
| 239 | /* | ||
| 240 | * every tree block (leaf or node) starts with this header. | ||
| 241 | */ | ||
| 242 | struct btrfs_header { | ||
| 243 | /* these first four must match the super block */ | ||
| 244 | u8 csum[BTRFS_CSUM_SIZE]; | ||
| 245 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ | ||
| 246 | __le64 bytenr; /* which block this node is supposed to live in */ | ||
| 247 | __le64 flags; | ||
| 248 | |||
| 249 | /* allowed to be different from the super from here on down */ | ||
| 250 | u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; | ||
| 251 | __le64 generation; | ||
| 252 | __le64 owner; | ||
| 253 | __le32 nritems; | ||
| 254 | u8 level; | ||
| 255 | } __attribute__ ((__packed__)); | ||
| 256 | |||
| 257 | #define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ | ||
| 258 | sizeof(struct btrfs_header)) / \ | ||
| 259 | sizeof(struct btrfs_key_ptr)) | ||
| 260 | #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) | ||
| 261 | #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) | ||
| 262 | #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ | ||
| 263 | sizeof(struct btrfs_item) - \ | ||
| 264 | sizeof(struct btrfs_file_extent_item)) | ||
| 265 | |||
| 266 | |||
| 267 | /* | ||
| 268 | * this is a very generous portion of the super block, giving us | ||
| 269 | * room to translate 14 chunks with 3 stripes each. | ||
| 270 | */ | ||
| 271 | #define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 | ||
| 272 | #define BTRFS_LABEL_SIZE 256 | ||
| 273 | |||
| 274 | /* | ||
| 275 | * the super block basically lists the main trees of the FS | ||
| 276 | * it currently lacks any block count etc etc | ||
| 277 | */ | ||
| 278 | struct btrfs_super_block { | ||
| 279 | u8 csum[BTRFS_CSUM_SIZE]; | ||
| 280 | /* the first 4 fields must match struct btrfs_header */ | ||
| 281 | u8 fsid[16]; /* FS specific uuid */ | ||
| 282 | __le64 bytenr; /* this block number */ | ||
| 283 | __le64 flags; | ||
| 284 | |||
| 285 | /* allowed to be different from the btrfs_header from here own down */ | ||
| 286 | __le64 magic; | ||
| 287 | __le64 generation; | ||
| 288 | __le64 root; | ||
| 289 | __le64 chunk_root; | ||
| 290 | __le64 log_root; | ||
| 291 | __le64 total_bytes; | ||
| 292 | __le64 bytes_used; | ||
| 293 | __le64 root_dir_objectid; | ||
| 294 | __le64 num_devices; | ||
| 295 | __le32 sectorsize; | ||
| 296 | __le32 nodesize; | ||
| 297 | __le32 leafsize; | ||
| 298 | __le32 stripesize; | ||
| 299 | __le32 sys_chunk_array_size; | ||
| 300 | u8 root_level; | ||
| 301 | u8 chunk_root_level; | ||
| 302 | u8 log_root_level; | ||
| 303 | struct btrfs_dev_item dev_item; | ||
| 304 | char label[BTRFS_LABEL_SIZE]; | ||
| 305 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; | ||
| 306 | } __attribute__ ((__packed__)); | ||
| 307 | |||
| 308 | /* | ||
| 309 | * A leaf is full of items. offset and size tell us where to find | ||
| 310 | * the item in the leaf (relative to the start of the data area) | ||
| 311 | */ | ||
| 312 | struct btrfs_item { | ||
| 313 | struct btrfs_disk_key key; | ||
| 314 | __le32 offset; | ||
| 315 | __le32 size; | ||
| 316 | } __attribute__ ((__packed__)); | ||
| 317 | |||
| 318 | /* | ||
| 319 | * leaves have an item area and a data area: | ||
| 320 | * [item0, item1....itemN] [free space] [dataN...data1, data0] | ||
| 321 | * | ||
| 322 | * The data is separate from the items to get the keys closer together | ||
| 323 | * during searches. | ||
| 324 | */ | ||
| 325 | struct btrfs_leaf { | ||
| 326 | struct btrfs_header header; | ||
| 327 | struct btrfs_item items[]; | ||
| 328 | } __attribute__ ((__packed__)); | ||
| 329 | |||
| 330 | /* | ||
| 331 | * all non-leaf blocks are nodes, they hold only keys and pointers to | ||
| 332 | * other blocks | ||
| 333 | */ | ||
| 334 | struct btrfs_key_ptr { | ||
| 335 | struct btrfs_disk_key key; | ||
| 336 | __le64 blockptr; | ||
| 337 | __le64 generation; | ||
| 338 | } __attribute__ ((__packed__)); | ||
| 339 | |||
| 340 | struct btrfs_node { | ||
| 341 | struct btrfs_header header; | ||
| 342 | struct btrfs_key_ptr ptrs[]; | ||
| 343 | } __attribute__ ((__packed__)); | ||
| 344 | |||
| 345 | /* | ||
| 346 | * btrfs_paths remember the path taken from the root down to the leaf. | ||
| 347 | * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point | ||
| 348 | * to any other levels that are present. | ||
| 349 | * | ||
| 350 | * The slots array records the index of the item or block pointer | ||
| 351 | * used while walking the tree. | ||
| 352 | */ | ||
| 353 | struct btrfs_path { | ||
| 354 | struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; | ||
| 355 | int slots[BTRFS_MAX_LEVEL]; | ||
| 356 | /* if there is real range locking, this locks field will change */ | ||
| 357 | int locks[BTRFS_MAX_LEVEL]; | ||
| 358 | int reada; | ||
| 359 | /* keep some upper locks as we walk down */ | ||
| 360 | int keep_locks; | ||
| 361 | int skip_locking; | ||
| 362 | int lowest_level; | ||
| 363 | }; | ||
| 364 | |||
| 365 | /* | ||
| 366 | * items in the extent btree are used to record the objectid of the | ||
| 367 | * owner of the block and the number of references | ||
| 368 | */ | ||
| 369 | struct btrfs_extent_item { | ||
| 370 | __le32 refs; | ||
| 371 | } __attribute__ ((__packed__)); | ||
| 372 | |||
| 373 | struct btrfs_extent_ref { | ||
| 374 | __le64 root; | ||
| 375 | __le64 generation; | ||
| 376 | __le64 objectid; | ||
| 377 | __le32 num_refs; | ||
| 378 | } __attribute__ ((__packed__)); | ||
| 379 | |||
| 380 | /* dev extents record free space on individual devices. The owner | ||
| 381 | * field points back to the chunk allocation mapping tree that allocated | ||
| 382 | * the extent. The chunk tree uuid field is a way to double check the owner | ||
| 383 | */ | ||
| 384 | struct btrfs_dev_extent { | ||
| 385 | __le64 chunk_tree; | ||
| 386 | __le64 chunk_objectid; | ||
| 387 | __le64 chunk_offset; | ||
| 388 | __le64 length; | ||
| 389 | u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; | ||
| 390 | } __attribute__ ((__packed__)); | ||
| 391 | |||
| 392 | struct btrfs_inode_ref { | ||
| 393 | __le64 index; | ||
| 394 | __le16 name_len; | ||
| 395 | /* name goes here */ | ||
| 396 | } __attribute__ ((__packed__)); | ||
| 397 | |||
| 398 | struct btrfs_timespec { | ||
| 399 | __le64 sec; | ||
| 400 | __le32 nsec; | ||
| 401 | } __attribute__ ((__packed__)); | ||
| 402 | |||
| 403 | /* | ||
| 404 | * there is no padding here on purpose. If you want to extent the inode, | ||
| 405 | * make a new item type | ||
| 406 | */ | ||
| 407 | struct btrfs_inode_item { | ||
| 408 | /* nfs style generation number */ | ||
| 409 | __le64 generation; | ||
| 410 | /* transid that last touched this inode */ | ||
| 411 | __le64 transid; | ||
| 412 | __le64 size; | ||
| 413 | __le64 nbytes; | ||
| 414 | __le64 block_group; | ||
| 415 | __le32 nlink; | ||
| 416 | __le32 uid; | ||
| 417 | __le32 gid; | ||
| 418 | __le32 mode; | ||
| 419 | __le64 rdev; | ||
| 420 | __le16 flags; | ||
| 421 | __le16 compat_flags; | ||
| 422 | struct btrfs_timespec atime; | ||
| 423 | struct btrfs_timespec ctime; | ||
| 424 | struct btrfs_timespec mtime; | ||
| 425 | struct btrfs_timespec otime; | ||
| 426 | } __attribute__ ((__packed__)); | ||
| 427 | |||
| 428 | struct btrfs_dir_log_item { | ||
| 429 | __le64 end; | ||
| 430 | } __attribute__ ((__packed__)); | ||
| 431 | |||
| 432 | struct btrfs_dir_item { | ||
| 433 | struct btrfs_disk_key location; | ||
| 434 | __le64 transid; | ||
| 435 | __le16 data_len; | ||
| 436 | __le16 name_len; | ||
| 437 | u8 type; | ||
| 438 | } __attribute__ ((__packed__)); | ||
| 439 | |||
| 440 | struct btrfs_root_item { | ||
| 441 | struct btrfs_inode_item inode; | ||
| 442 | __le64 root_dirid; | ||
| 443 | __le64 bytenr; | ||
| 444 | __le64 byte_limit; | ||
| 445 | __le64 bytes_used; | ||
| 446 | __le32 flags; | ||
| 447 | __le32 refs; | ||
| 448 | struct btrfs_disk_key drop_progress; | ||
| 449 | u8 drop_level; | ||
| 450 | u8 level; | ||
| 451 | } __attribute__ ((__packed__)); | ||
| 452 | |||
| 453 | #define BTRFS_FILE_EXTENT_REG 0 | ||
| 454 | #define BTRFS_FILE_EXTENT_INLINE 1 | ||
| 455 | |||
| 456 | struct btrfs_file_extent_item { | ||
| 457 | __le64 generation; | ||
| 458 | u8 type; | ||
| 459 | /* | ||
| 460 | * disk space consumed by the extent, checksum blocks are included | ||
| 461 | * in these numbers | ||
| 462 | */ | ||
| 463 | __le64 disk_bytenr; | ||
| 464 | __le64 disk_num_bytes; | ||
| 465 | /* | ||
| 466 | * the logical offset in file blocks (no csums) | ||
| 467 | * this extent record is for. This allows a file extent to point | ||
| 468 | * into the middle of an existing extent on disk, sharing it | ||
| 469 | * between two snapshots (useful if some bytes in the middle of the | ||
| 470 | * extent have changed | ||
| 471 | */ | ||
| 472 | __le64 offset; | ||
| 473 | /* | ||
| 474 | * the logical number of file blocks (no csums included) | ||
| 475 | */ | ||
| 476 | __le64 num_bytes; | ||
| 477 | } __attribute__ ((__packed__)); | ||
| 478 | |||
| 479 | struct btrfs_csum_item { | ||
| 480 | u8 csum; | ||
| 481 | } __attribute__ ((__packed__)); | ||
| 482 | |||
| 483 | /* different types of block groups (and chunks) */ | ||
| 484 | #define BTRFS_BLOCK_GROUP_DATA (1 << 0) | ||
| 485 | #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) | ||
| 486 | #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) | ||
| 487 | #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) | ||
| 488 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | ||
| 489 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) | ||
| 490 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) | ||
| 491 | |||
| 492 | struct btrfs_block_group_item { | ||
| 493 | __le64 used; | ||
| 494 | __le64 chunk_objectid; | ||
| 495 | __le64 flags; | ||
| 496 | } __attribute__ ((__packed__)); | ||
| 497 | |||
| 498 | struct btrfs_space_info { | ||
| 499 | u64 flags; | ||
| 500 | u64 total_bytes; | ||
| 501 | u64 bytes_used; | ||
| 502 | u64 bytes_pinned; | ||
| 503 | u64 bytes_reserved; | ||
| 504 | int full; | ||
| 505 | int force_alloc; | ||
| 506 | struct list_head list; | ||
| 507 | |||
| 508 | /* for block groups in our same type */ | ||
| 509 | struct list_head block_groups; | ||
| 510 | spinlock_t lock; | ||
| 511 | }; | ||
| 512 | |||
| 513 | struct btrfs_free_space { | ||
| 514 | struct rb_node bytes_index; | ||
| 515 | struct rb_node offset_index; | ||
| 516 | u64 offset; | ||
| 517 | u64 bytes; | ||
| 518 | }; | ||
| 519 | |||
| 520 | struct btrfs_block_group_cache { | ||
| 521 | struct btrfs_key key; | ||
| 522 | struct btrfs_block_group_item item; | ||
| 523 | spinlock_t lock; | ||
| 524 | u64 pinned; | ||
| 525 | u64 reserved; | ||
| 526 | u64 flags; | ||
| 527 | int cached; | ||
| 528 | int ro; | ||
| 529 | int dirty; | ||
| 530 | |||
| 531 | struct btrfs_space_info *space_info; | ||
| 532 | |||
| 533 | /* free space cache stuff */ | ||
| 534 | struct rb_root free_space_bytes; | ||
| 535 | struct rb_root free_space_offset; | ||
| 536 | |||
| 537 | /* block group cache stuff */ | ||
| 538 | struct rb_node cache_node; | ||
| 539 | |||
| 540 | /* for block groups in the same raid type */ | ||
| 541 | struct list_head list; | ||
| 542 | }; | ||
| 543 | |||
| 544 | struct btrfs_leaf_ref_tree { | ||
| 545 | struct rb_root root; | ||
| 546 | struct list_head list; | ||
| 547 | spinlock_t lock; | ||
| 548 | }; | ||
| 549 | |||
| 550 | struct btrfs_device; | ||
| 551 | struct btrfs_fs_devices; | ||
| 552 | struct btrfs_fs_info { | ||
| 553 | u8 fsid[BTRFS_FSID_SIZE]; | ||
| 554 | u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; | ||
| 555 | struct btrfs_root *extent_root; | ||
| 556 | struct btrfs_root *tree_root; | ||
| 557 | struct btrfs_root *chunk_root; | ||
| 558 | struct btrfs_root *dev_root; | ||
| 559 | |||
| 560 | /* the log root tree is a directory of all the other log roots */ | ||
| 561 | struct btrfs_root *log_root_tree; | ||
| 562 | struct radix_tree_root fs_roots_radix; | ||
| 563 | |||
| 564 | /* block group cache stuff */ | ||
| 565 | spinlock_t block_group_cache_lock; | ||
| 566 | struct rb_root block_group_cache_tree; | ||
| 567 | |||
| 568 | struct extent_io_tree pinned_extents; | ||
| 569 | struct extent_io_tree pending_del; | ||
| 570 | struct extent_io_tree extent_ins; | ||
| 571 | |||
| 572 | /* logical->physical extent mapping */ | ||
| 573 | struct btrfs_mapping_tree mapping_tree; | ||
| 574 | |||
| 575 | u64 generation; | ||
| 576 | u64 last_trans_committed; | ||
| 577 | u64 last_trans_new_blockgroup; | ||
| 578 | u64 open_ioctl_trans; | ||
| 579 | unsigned long mount_opt; | ||
| 580 | u64 max_extent; | ||
| 581 | u64 max_inline; | ||
| 582 | u64 alloc_start; | ||
| 583 | struct btrfs_transaction *running_transaction; | ||
| 584 | wait_queue_head_t transaction_throttle; | ||
| 585 | wait_queue_head_t transaction_wait; | ||
| 586 | wait_queue_head_t async_submit_wait; | ||
| 587 | |||
| 588 | wait_queue_head_t tree_log_wait; | ||
| 589 | |||
| 590 | struct btrfs_super_block super_copy; | ||
| 591 | struct btrfs_super_block super_for_commit; | ||
| 592 | struct block_device *__bdev; | ||
| 593 | struct super_block *sb; | ||
| 594 | struct inode *btree_inode; | ||
| 595 | struct backing_dev_info bdi; | ||
| 596 | spinlock_t hash_lock; | ||
| 597 | struct mutex trans_mutex; | ||
| 598 | struct mutex tree_log_mutex; | ||
| 599 | struct mutex transaction_kthread_mutex; | ||
| 600 | struct mutex cleaner_mutex; | ||
| 601 | struct mutex alloc_mutex; | ||
| 602 | struct mutex chunk_mutex; | ||
| 603 | struct mutex drop_mutex; | ||
| 604 | struct mutex volume_mutex; | ||
| 605 | struct mutex tree_reloc_mutex; | ||
| 606 | struct list_head trans_list; | ||
| 607 | struct list_head hashers; | ||
| 608 | struct list_head dead_roots; | ||
| 609 | |||
| 610 | atomic_t nr_async_submits; | ||
| 611 | atomic_t async_submit_draining; | ||
| 612 | atomic_t nr_async_bios; | ||
| 613 | atomic_t tree_log_writers; | ||
| 614 | atomic_t tree_log_commit; | ||
| 615 | unsigned long tree_log_batch; | ||
| 616 | u64 tree_log_transid; | ||
| 617 | |||
| 618 | /* | ||
| 619 | * this is used by the balancing code to wait for all the pending | ||
| 620 | * ordered extents | ||
| 621 | */ | ||
| 622 | spinlock_t ordered_extent_lock; | ||
| 623 | struct list_head ordered_extents; | ||
| 624 | struct list_head delalloc_inodes; | ||
| 625 | |||
| 626 | /* | ||
| 627 | * there is a pool of worker threads for checksumming during writes | ||
| 628 | * and a pool for checksumming after reads. This is because readers | ||
| 629 | * can run with FS locks held, and the writers may be waiting for | ||
| 630 | * those locks. We don't want ordering in the pending list to cause | ||
| 631 | * deadlocks, and so the two are serviced separately. | ||
| 632 | * | ||
| 633 | * A third pool does submit_bio to avoid deadlocking with the other | ||
| 634 | * two | ||
| 635 | */ | ||
| 636 | struct btrfs_workers workers; | ||
| 637 | struct btrfs_workers endio_workers; | ||
| 638 | struct btrfs_workers endio_write_workers; | ||
| 639 | struct btrfs_workers submit_workers; | ||
| 640 | /* | ||
| 641 | * fixup workers take dirty pages that didn't properly go through | ||
| 642 | * the cow mechanism and make them safe to write. It happens | ||
| 643 | * for the sys_munmap function call path | ||
| 644 | */ | ||
| 645 | struct btrfs_workers fixup_workers; | ||
| 646 | struct task_struct *transaction_kthread; | ||
| 647 | struct task_struct *cleaner_kthread; | ||
| 648 | int thread_pool_size; | ||
| 649 | |||
| 650 | /* tree relocation relocated fields */ | ||
| 651 | struct extent_io_tree reloc_mapping_tree; | ||
| 652 | struct list_head dead_reloc_roots; | ||
| 653 | struct btrfs_leaf_ref_tree reloc_ref_tree; | ||
| 654 | struct btrfs_leaf_ref_tree shared_ref_tree; | ||
| 655 | |||
| 656 | struct kobject super_kobj; | ||
| 657 | struct completion kobj_unregister; | ||
| 658 | int do_barriers; | ||
| 659 | int closing; | ||
| 660 | int log_root_recovering; | ||
| 661 | atomic_t throttles; | ||
| 662 | atomic_t throttle_gen; | ||
| 663 | |||
| 664 | u64 total_pinned; | ||
| 665 | struct list_head dirty_cowonly_roots; | ||
| 666 | |||
| 667 | struct btrfs_fs_devices *fs_devices; | ||
| 668 | struct list_head space_info; | ||
| 669 | spinlock_t delalloc_lock; | ||
| 670 | spinlock_t new_trans_lock; | ||
| 671 | u64 delalloc_bytes; | ||
| 672 | u64 last_alloc; | ||
| 673 | u64 last_data_alloc; | ||
| 674 | |||
| 675 | spinlock_t ref_cache_lock; | ||
| 676 | u64 total_ref_cache_size; | ||
| 677 | |||
| 678 | u64 avail_data_alloc_bits; | ||
| 679 | u64 avail_metadata_alloc_bits; | ||
| 680 | u64 avail_system_alloc_bits; | ||
| 681 | u64 data_alloc_profile; | ||
| 682 | u64 metadata_alloc_profile; | ||
| 683 | u64 system_alloc_profile; | ||
| 684 | |||
| 685 | void *bdev_holder; | ||
| 686 | }; | ||
| 687 | |||
| 688 | /* | ||
| 689 | * in ram representation of the tree. extent_root is used for all allocations | ||
| 690 | * and for the extent tree extent_root root. | ||
| 691 | */ | ||
| 692 | struct btrfs_dirty_root; | ||
| 693 | struct btrfs_root { | ||
| 694 | struct extent_buffer *node; | ||
| 695 | |||
| 696 | /* the node lock is held while changing the node pointer */ | ||
| 697 | spinlock_t node_lock; | ||
| 698 | |||
| 699 | struct extent_buffer *commit_root; | ||
| 700 | struct btrfs_leaf_ref_tree *ref_tree; | ||
| 701 | struct btrfs_leaf_ref_tree ref_tree_struct; | ||
| 702 | struct btrfs_dirty_root *dirty_root; | ||
| 703 | struct btrfs_root *log_root; | ||
| 704 | struct btrfs_root *reloc_root; | ||
| 705 | |||
| 706 | struct btrfs_root_item root_item; | ||
| 707 | struct btrfs_key root_key; | ||
| 708 | struct btrfs_fs_info *fs_info; | ||
| 709 | struct inode *inode; | ||
| 710 | struct extent_io_tree dirty_log_pages; | ||
| 711 | |||
| 712 | struct kobject root_kobj; | ||
| 713 | struct completion kobj_unregister; | ||
| 714 | struct mutex objectid_mutex; | ||
| 715 | struct mutex log_mutex; | ||
| 716 | |||
| 717 | u64 objectid; | ||
| 718 | u64 last_trans; | ||
| 719 | |||
| 720 | /* data allocations are done in sectorsize units */ | ||
| 721 | u32 sectorsize; | ||
| 722 | |||
| 723 | /* node allocations are done in nodesize units */ | ||
| 724 | u32 nodesize; | ||
| 725 | |||
| 726 | /* leaf allocations are done in leafsize units */ | ||
| 727 | u32 leafsize; | ||
| 728 | |||
| 729 | u32 stripesize; | ||
| 730 | |||
| 731 | u32 type; | ||
| 732 | u64 highest_inode; | ||
| 733 | u64 last_inode_alloc; | ||
| 734 | int ref_cows; | ||
| 735 | int track_dirty; | ||
| 736 | u64 defrag_trans_start; | ||
| 737 | struct btrfs_key defrag_progress; | ||
| 738 | struct btrfs_key defrag_max; | ||
| 739 | int defrag_running; | ||
| 740 | int defrag_level; | ||
| 741 | char *name; | ||
| 742 | int in_sysfs; | ||
| 743 | |||
| 744 | /* the dirty list is only used by non-reference counted roots */ | ||
| 745 | struct list_head dirty_list; | ||
| 746 | |||
| 747 | spinlock_t list_lock; | ||
| 748 | struct list_head dead_list; | ||
| 749 | struct list_head orphan_list; | ||
| 750 | }; | ||
| 751 | |||
| 752 | /* | ||
| 753 | |||
| 754 | * inode items have the data typically returned from stat and store other | ||
| 755 | * info about object characteristics. There is one for every file and dir in | ||
| 756 | * the FS | ||
| 757 | */ | ||
| 758 | #define BTRFS_INODE_ITEM_KEY 1 | ||
| 759 | #define BTRFS_INODE_REF_KEY 2 | ||
| 760 | #define BTRFS_XATTR_ITEM_KEY 8 | ||
| 761 | #define BTRFS_ORPHAN_ITEM_KEY 9 | ||
| 762 | /* reserve 2-15 close to the inode for later flexibility */ | ||
| 763 | |||
| 764 | /* | ||
| 765 | * dir items are the name -> inode pointers in a directory. There is one | ||
| 766 | * for every name in a directory. | ||
| 767 | */ | ||
| 768 | #define BTRFS_DIR_LOG_ITEM_KEY 14 | ||
| 769 | #define BTRFS_DIR_LOG_INDEX_KEY 15 | ||
| 770 | #define BTRFS_DIR_ITEM_KEY 16 | ||
| 771 | #define BTRFS_DIR_INDEX_KEY 17 | ||
| 772 | /* | ||
| 773 | * extent data is for file data | ||
| 774 | */ | ||
| 775 | #define BTRFS_EXTENT_DATA_KEY 18 | ||
| 776 | /* | ||
| 777 | * csum items have the checksums for data in the extents | ||
| 778 | */ | ||
| 779 | #define BTRFS_CSUM_ITEM_KEY 19 | ||
| 780 | |||
| 781 | |||
| 782 | /* reserve 21-31 for other file/dir stuff */ | ||
| 783 | |||
| 784 | /* | ||
| 785 | * root items point to tree roots. There are typically in the root | ||
| 786 | * tree used by the super block to find all the other trees | ||
| 787 | */ | ||
| 788 | #define BTRFS_ROOT_ITEM_KEY 32 | ||
| 789 | /* | ||
| 790 | * extent items are in the extent map tree. These record which blocks | ||
| 791 | * are used, and how many references there are to each block | ||
| 792 | */ | ||
| 793 | #define BTRFS_EXTENT_ITEM_KEY 33 | ||
| 794 | #define BTRFS_EXTENT_REF_KEY 34 | ||
| 795 | |||
| 796 | /* | ||
| 797 | * block groups give us hints into the extent allocation trees. Which | ||
| 798 | * blocks are free etc etc | ||
| 799 | */ | ||
| 800 | #define BTRFS_BLOCK_GROUP_ITEM_KEY 50 | ||
| 801 | |||
| 802 | #define BTRFS_DEV_EXTENT_KEY 75 | ||
| 803 | #define BTRFS_DEV_ITEM_KEY 76 | ||
| 804 | #define BTRFS_CHUNK_ITEM_KEY 77 | ||
| 805 | |||
| 806 | /* | ||
| 807 | * string items are for debugging. They just store a short string of | ||
| 808 | * data in the FS | ||
| 809 | */ | ||
| 810 | #define BTRFS_STRING_ITEM_KEY 253 | ||
| 811 | |||
| 812 | #define BTRFS_MOUNT_NODATASUM (1 << 0) | ||
| 813 | #define BTRFS_MOUNT_NODATACOW (1 << 1) | ||
| 814 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) | ||
| 815 | #define BTRFS_MOUNT_SSD (1 << 3) | ||
| 816 | #define BTRFS_MOUNT_DEGRADED (1 << 4) | ||
| 817 | |||
| 818 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | ||
| 819 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | ||
| 820 | #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ | ||
| 821 | BTRFS_MOUNT_##opt) | ||
| 822 | /* | ||
| 823 | * Inode flags | ||
| 824 | */ | ||
| 825 | #define BTRFS_INODE_NODATASUM (1 << 0) | ||
| 826 | #define BTRFS_INODE_NODATACOW (1 << 1) | ||
| 827 | #define BTRFS_INODE_READONLY (1 << 2) | ||
| 828 | #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ | ||
| 829 | ~BTRFS_INODE_##flag) | ||
| 830 | #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ | ||
| 831 | BTRFS_INODE_##flag) | ||
| 832 | #define btrfs_test_flag(inode, flag) (BTRFS_I(inode)->flags & \ | ||
| 833 | BTRFS_INODE_##flag) | ||
| 834 | /* some macros to generate set/get funcs for the struct fields. This | ||
| 835 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple | ||
| 836 | * one for u8: | ||
| 837 | */ | ||
| 838 | #define le8_to_cpu(v) (v) | ||
| 839 | #define cpu_to_le8(v) (v) | ||
| 840 | #define __le8 u8 | ||
| 841 | |||
| 842 | #define read_eb_member(eb, ptr, type, member, result) ( \ | ||
| 843 | read_extent_buffer(eb, (char *)(result), \ | ||
| 844 | ((unsigned long)(ptr)) + \ | ||
| 845 | offsetof(type, member), \ | ||
| 846 | sizeof(((type *)0)->member))) | ||
| 847 | |||
| 848 | #define write_eb_member(eb, ptr, type, member, result) ( \ | ||
| 849 | write_extent_buffer(eb, (char *)(result), \ | ||
| 850 | ((unsigned long)(ptr)) + \ | ||
| 851 | offsetof(type, member), \ | ||
| 852 | sizeof(((type *)0)->member))) | ||
| 853 | |||
| 854 | #ifndef BTRFS_SETGET_FUNCS | ||
| 855 | #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ | ||
| 856 | u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ | ||
| 857 | void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); | ||
| 858 | #endif | ||
| 859 | |||
| 860 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ | ||
| 861 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ | ||
| 862 | { \ | ||
| 863 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | ||
| 864 | u##bits res = le##bits##_to_cpu(p->member); \ | ||
| 865 | kunmap_atomic(p, KM_USER0); \ | ||
| 866 | return res; \ | ||
| 867 | } \ | ||
| 868 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ | ||
| 869 | u##bits val) \ | ||
| 870 | { \ | ||
| 871 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | ||
| 872 | p->member = cpu_to_le##bits(val); \ | ||
| 873 | kunmap_atomic(p, KM_USER0); \ | ||
| 874 | } | ||
| 875 | |||
| 876 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ | ||
| 877 | static inline u##bits btrfs_##name(type *s) \ | ||
| 878 | { \ | ||
| 879 | return le##bits##_to_cpu(s->member); \ | ||
| 880 | } \ | ||
| 881 | static inline void btrfs_set_##name(type *s, u##bits val) \ | ||
| 882 | { \ | ||
| 883 | s->member = cpu_to_le##bits(val); \ | ||
| 884 | } | ||
| 885 | |||
| 886 | BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); | ||
| 887 | BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); | ||
| 888 | BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); | ||
| 889 | BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); | ||
| 890 | BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); | ||
| 891 | BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); | ||
| 892 | BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); | ||
| 893 | BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); | ||
| 894 | BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); | ||
| 895 | BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); | ||
| 896 | |||
| 897 | BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); | ||
| 898 | BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, | ||
| 899 | total_bytes, 64); | ||
| 900 | BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, | ||
| 901 | bytes_used, 64); | ||
| 902 | BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, | ||
| 903 | io_align, 32); | ||
| 904 | BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, | ||
| 905 | io_width, 32); | ||
| 906 | BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, | ||
| 907 | sector_size, 32); | ||
| 908 | BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); | ||
| 909 | BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item, | ||
| 910 | dev_group, 32); | ||
| 911 | BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, | ||
| 912 | seek_speed, 8); | ||
| 913 | BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, | ||
| 914 | bandwidth, 8); | ||
| 915 | |||
| 916 | static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) | ||
| 917 | { | ||
| 918 | return (char *)d + offsetof(struct btrfs_dev_item, uuid); | ||
| 919 | } | ||
| 920 | |||
| 921 | BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); | ||
| 922 | BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); | ||
| 923 | BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); | ||
| 924 | BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); | ||
| 925 | BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); | ||
| 926 | BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); | ||
| 927 | BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); | ||
| 928 | BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); | ||
| 929 | BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); | ||
| 930 | BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); | ||
| 931 | BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); | ||
| 932 | |||
| 933 | static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) | ||
| 934 | { | ||
| 935 | return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); | ||
| 936 | } | ||
| 937 | |||
| 938 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); | ||
| 939 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); | ||
| 940 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, | ||
| 941 | stripe_len, 64); | ||
| 942 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, | ||
| 943 | io_align, 32); | ||
| 944 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, | ||
| 945 | io_width, 32); | ||
| 946 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, | ||
| 947 | sector_size, 32); | ||
| 948 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); | ||
| 949 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, | ||
| 950 | num_stripes, 16); | ||
| 951 | BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, | ||
| 952 | sub_stripes, 16); | ||
| 953 | BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); | ||
| 954 | BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); | ||
| 955 | |||
| 956 | static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, | ||
| 957 | int nr) | ||
| 958 | { | ||
| 959 | unsigned long offset = (unsigned long)c; | ||
| 960 | offset += offsetof(struct btrfs_chunk, stripe); | ||
| 961 | offset += nr * sizeof(struct btrfs_stripe); | ||
| 962 | return (struct btrfs_stripe *)offset; | ||
| 963 | } | ||
| 964 | |||
| 965 | static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) | ||
| 966 | { | ||
| 967 | return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); | ||
| 968 | } | ||
| 969 | |||
| 970 | static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, | ||
| 971 | struct btrfs_chunk *c, int nr) | ||
| 972 | { | ||
| 973 | return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); | ||
| 974 | } | ||
| 975 | |||
| 976 | static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, | ||
| 977 | struct btrfs_chunk *c, int nr, | ||
| 978 | u64 val) | ||
| 979 | { | ||
| 980 | btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); | ||
| 981 | } | ||
| 982 | |||
| 983 | static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, | ||
| 984 | struct btrfs_chunk *c, int nr) | ||
| 985 | { | ||
| 986 | return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); | ||
| 987 | } | ||
| 988 | |||
| 989 | static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, | ||
| 990 | struct btrfs_chunk *c, int nr, | ||
| 991 | u64 val) | ||
| 992 | { | ||
| 993 | btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); | ||
| 994 | } | ||
| 995 | |||
| 996 | /* struct btrfs_block_group_item */ | ||
| 997 | BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, | ||
| 998 | used, 64); | ||
| 999 | BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, | ||
| 1000 | used, 64); | ||
| 1001 | BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, | ||
| 1002 | struct btrfs_block_group_item, chunk_objectid, 64); | ||
| 1003 | |||
| 1004 | BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid, | ||
| 1005 | struct btrfs_block_group_item, chunk_objectid, 64); | ||
| 1006 | BTRFS_SETGET_FUNCS(disk_block_group_flags, | ||
| 1007 | struct btrfs_block_group_item, flags, 64); | ||
| 1008 | BTRFS_SETGET_STACK_FUNCS(block_group_flags, | ||
| 1009 | struct btrfs_block_group_item, flags, 64); | ||
| 1010 | |||
| 1011 | /* struct btrfs_inode_ref */ | ||
| 1012 | BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); | ||
| 1013 | BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); | ||
| 1014 | |||
| 1015 | /* struct btrfs_inode_item */ | ||
| 1016 | BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); | ||
| 1017 | BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); | ||
| 1018 | BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); | ||
| 1019 | BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); | ||
| 1020 | BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); | ||
| 1021 | BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); | ||
| 1022 | BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); | ||
| 1023 | BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); | ||
| 1024 | BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); | ||
| 1025 | BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); | ||
| 1026 | BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); | ||
| 1027 | BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, | ||
| 1028 | compat_flags, 16); | ||
| 1029 | |||
| 1030 | static inline struct btrfs_timespec * | ||
| 1031 | btrfs_inode_atime(struct btrfs_inode_item *inode_item) | ||
| 1032 | { | ||
| 1033 | unsigned long ptr = (unsigned long)inode_item; | ||
| 1034 | ptr += offsetof(struct btrfs_inode_item, atime); | ||
| 1035 | return (struct btrfs_timespec *)ptr; | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | static inline struct btrfs_timespec * | ||
| 1039 | btrfs_inode_mtime(struct btrfs_inode_item *inode_item) | ||
| 1040 | { | ||
| 1041 | unsigned long ptr = (unsigned long)inode_item; | ||
| 1042 | ptr += offsetof(struct btrfs_inode_item, mtime); | ||
| 1043 | return (struct btrfs_timespec *)ptr; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | static inline struct btrfs_timespec * | ||
| 1047 | btrfs_inode_ctime(struct btrfs_inode_item *inode_item) | ||
| 1048 | { | ||
| 1049 | unsigned long ptr = (unsigned long)inode_item; | ||
| 1050 | ptr += offsetof(struct btrfs_inode_item, ctime); | ||
| 1051 | return (struct btrfs_timespec *)ptr; | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | static inline struct btrfs_timespec * | ||
| 1055 | btrfs_inode_otime(struct btrfs_inode_item *inode_item) | ||
| 1056 | { | ||
| 1057 | unsigned long ptr = (unsigned long)inode_item; | ||
| 1058 | ptr += offsetof(struct btrfs_inode_item, otime); | ||
| 1059 | return (struct btrfs_timespec *)ptr; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); | ||
| 1063 | BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); | ||
| 1064 | |||
| 1065 | /* struct btrfs_dev_extent */ | ||
| 1066 | BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, | ||
| 1067 | chunk_tree, 64); | ||
| 1068 | BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, | ||
| 1069 | chunk_objectid, 64); | ||
| 1070 | BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, | ||
| 1071 | chunk_offset, 64); | ||
| 1072 | BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); | ||
| 1073 | |||
| 1074 | static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) | ||
| 1075 | { | ||
| 1076 | unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); | ||
| 1077 | return (u8 *)((unsigned long)dev + ptr); | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | /* struct btrfs_extent_ref */ | ||
| 1081 | BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); | ||
| 1082 | BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); | ||
| 1083 | BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); | ||
| 1084 | BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32); | ||
| 1085 | |||
| 1086 | BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); | ||
| 1087 | BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, | ||
| 1088 | generation, 64); | ||
| 1089 | BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, | ||
| 1090 | objectid, 64); | ||
| 1091 | BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, | ||
| 1092 | num_refs, 32); | ||
| 1093 | |||
| 1094 | /* struct btrfs_extent_item */ | ||
| 1095 | BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); | ||
| 1096 | BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, | ||
| 1097 | refs, 32); | ||
| 1098 | |||
| 1099 | /* struct btrfs_node */ | ||
| 1100 | BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); | ||
| 1101 | BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); | ||
| 1102 | |||
| 1103 | static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) | ||
| 1104 | { | ||
| 1105 | unsigned long ptr; | ||
| 1106 | ptr = offsetof(struct btrfs_node, ptrs) + | ||
| 1107 | sizeof(struct btrfs_key_ptr) * nr; | ||
| 1108 | return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, | ||
| 1112 | int nr, u64 val) | ||
| 1113 | { | ||
| 1114 | unsigned long ptr; | ||
| 1115 | ptr = offsetof(struct btrfs_node, ptrs) + | ||
| 1116 | sizeof(struct btrfs_key_ptr) * nr; | ||
| 1117 | btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr) | ||
| 1121 | { | ||
| 1122 | unsigned long ptr; | ||
| 1123 | ptr = offsetof(struct btrfs_node, ptrs) + | ||
| 1124 | sizeof(struct btrfs_key_ptr) * nr; | ||
| 1125 | return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb, | ||
| 1129 | int nr, u64 val) | ||
| 1130 | { | ||
| 1131 | unsigned long ptr; | ||
| 1132 | ptr = offsetof(struct btrfs_node, ptrs) + | ||
| 1133 | sizeof(struct btrfs_key_ptr) * nr; | ||
| 1134 | btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val); | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | static inline unsigned long btrfs_node_key_ptr_offset(int nr) | ||
| 1138 | { | ||
| 1139 | return offsetof(struct btrfs_node, ptrs) + | ||
| 1140 | sizeof(struct btrfs_key_ptr) * nr; | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | void btrfs_node_key(struct extent_buffer *eb, | ||
| 1144 | struct btrfs_disk_key *disk_key, int nr); | ||
| 1145 | |||
| 1146 | static inline void btrfs_set_node_key(struct extent_buffer *eb, | ||
| 1147 | struct btrfs_disk_key *disk_key, int nr) | ||
| 1148 | { | ||
| 1149 | unsigned long ptr; | ||
| 1150 | ptr = btrfs_node_key_ptr_offset(nr); | ||
| 1151 | write_eb_member(eb, (struct btrfs_key_ptr *)ptr, | ||
| 1152 | struct btrfs_key_ptr, key, disk_key); | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | /* struct btrfs_item */ | ||
| 1156 | BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); | ||
| 1157 | BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); | ||
| 1158 | |||
| 1159 | static inline unsigned long btrfs_item_nr_offset(int nr) | ||
| 1160 | { | ||
| 1161 | return offsetof(struct btrfs_leaf, items) + | ||
| 1162 | sizeof(struct btrfs_item) * nr; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, | ||
| 1166 | int nr) | ||
| 1167 | { | ||
| 1168 | return (struct btrfs_item *)btrfs_item_nr_offset(nr); | ||
| 1169 | } | ||
| 1170 | |||
| 1171 | static inline u32 btrfs_item_end(struct extent_buffer *eb, | ||
| 1172 | struct btrfs_item *item) | ||
| 1173 | { | ||
| 1174 | return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) | ||
| 1178 | { | ||
| 1179 | return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) | ||
| 1183 | { | ||
| 1184 | return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); | ||
| 1185 | } | ||
| 1186 | |||
| 1187 | static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) | ||
| 1188 | { | ||
| 1189 | return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | static inline void btrfs_item_key(struct extent_buffer *eb, | ||
| 1193 | struct btrfs_disk_key *disk_key, int nr) | ||
| 1194 | { | ||
| 1195 | struct btrfs_item *item = btrfs_item_nr(eb, nr); | ||
| 1196 | read_eb_member(eb, item, struct btrfs_item, key, disk_key); | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | static inline void btrfs_set_item_key(struct extent_buffer *eb, | ||
| 1200 | struct btrfs_disk_key *disk_key, int nr) | ||
| 1201 | { | ||
| 1202 | struct btrfs_item *item = btrfs_item_nr(eb, nr); | ||
| 1203 | write_eb_member(eb, item, struct btrfs_item, key, disk_key); | ||
| 1204 | } | ||
| 1205 | |||
| 1206 | BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); | ||
| 1207 | |||
| 1208 | /* struct btrfs_dir_item */ | ||
| 1209 | BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); | ||
| 1210 | BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); | ||
| 1211 | BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); | ||
| 1212 | BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); | ||
| 1213 | |||
| 1214 | static inline void btrfs_dir_item_key(struct extent_buffer *eb, | ||
| 1215 | struct btrfs_dir_item *item, | ||
| 1216 | struct btrfs_disk_key *key) | ||
| 1217 | { | ||
| 1218 | read_eb_member(eb, item, struct btrfs_dir_item, location, key); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, | ||
| 1222 | struct btrfs_dir_item *item, | ||
| 1223 | struct btrfs_disk_key *key) | ||
| 1224 | { | ||
| 1225 | write_eb_member(eb, item, struct btrfs_dir_item, location, key); | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | /* struct btrfs_disk_key */ | ||
| 1229 | BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, | ||
| 1230 | objectid, 64); | ||
| 1231 | BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); | ||
| 1232 | BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); | ||
| 1233 | |||
| 1234 | static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, | ||
| 1235 | struct btrfs_disk_key *disk) | ||
| 1236 | { | ||
| 1237 | cpu->offset = le64_to_cpu(disk->offset); | ||
| 1238 | cpu->type = disk->type; | ||
| 1239 | cpu->objectid = le64_to_cpu(disk->objectid); | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, | ||
| 1243 | struct btrfs_key *cpu) | ||
| 1244 | { | ||
| 1245 | disk->offset = cpu_to_le64(cpu->offset); | ||
| 1246 | disk->type = cpu->type; | ||
| 1247 | disk->objectid = cpu_to_le64(cpu->objectid); | ||
| 1248 | } | ||
| 1249 | |||
| 1250 | static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, | ||
| 1251 | struct btrfs_key *key, int nr) | ||
| 1252 | { | ||
| 1253 | struct btrfs_disk_key disk_key; | ||
| 1254 | btrfs_node_key(eb, &disk_key, nr); | ||
| 1255 | btrfs_disk_key_to_cpu(key, &disk_key); | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, | ||
| 1259 | struct btrfs_key *key, int nr) | ||
| 1260 | { | ||
| 1261 | struct btrfs_disk_key disk_key; | ||
| 1262 | btrfs_item_key(eb, &disk_key, nr); | ||
| 1263 | btrfs_disk_key_to_cpu(key, &disk_key); | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, | ||
| 1267 | struct btrfs_dir_item *item, | ||
| 1268 | struct btrfs_key *key) | ||
| 1269 | { | ||
| 1270 | struct btrfs_disk_key disk_key; | ||
| 1271 | btrfs_dir_item_key(eb, item, &disk_key); | ||
| 1272 | btrfs_disk_key_to_cpu(key, &disk_key); | ||
| 1273 | } | ||
| 1274 | |||
| 1275 | |||
| 1276 | static inline u8 btrfs_key_type(struct btrfs_key *key) | ||
| 1277 | { | ||
| 1278 | return key->type; | ||
| 1279 | } | ||
| 1280 | |||
| 1281 | static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) | ||
| 1282 | { | ||
| 1283 | key->type = val; | ||
| 1284 | } | ||
| 1285 | |||
| 1286 | /* struct btrfs_header */ | ||
| 1287 | BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); | ||
| 1288 | BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, | ||
| 1289 | generation, 64); | ||
| 1290 | BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); | ||
| 1291 | BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); | ||
| 1292 | BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); | ||
| 1293 | BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); | ||
| 1294 | |||
| 1295 | static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) | ||
| 1296 | { | ||
| 1297 | return (btrfs_header_flags(eb) & flag) == flag; | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) | ||
| 1301 | { | ||
| 1302 | u64 flags = btrfs_header_flags(eb); | ||
| 1303 | btrfs_set_header_flags(eb, flags | flag); | ||
| 1304 | return (flags & flag) == flag; | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) | ||
| 1308 | { | ||
| 1309 | u64 flags = btrfs_header_flags(eb); | ||
| 1310 | btrfs_set_header_flags(eb, flags & ~flag); | ||
| 1311 | return (flags & flag) == flag; | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) | ||
| 1315 | { | ||
| 1316 | unsigned long ptr = offsetof(struct btrfs_header, fsid); | ||
| 1317 | return (u8 *)ptr; | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) | ||
| 1321 | { | ||
| 1322 | unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid); | ||
| 1323 | return (u8 *)ptr; | ||
| 1324 | } | ||
| 1325 | |||
| 1326 | static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) | ||
| 1327 | { | ||
| 1328 | unsigned long ptr = offsetof(struct btrfs_super_block, fsid); | ||
| 1329 | return (u8 *)ptr; | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | static inline u8 *btrfs_header_csum(struct extent_buffer *eb) | ||
| 1333 | { | ||
| 1334 | unsigned long ptr = offsetof(struct btrfs_header, csum); | ||
| 1335 | return (u8 *)ptr; | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb) | ||
| 1339 | { | ||
| 1340 | return NULL; | ||
| 1341 | } | ||
| 1342 | |||
| 1343 | static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb) | ||
| 1344 | { | ||
| 1345 | return NULL; | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) | ||
| 1349 | { | ||
| 1350 | return NULL; | ||
| 1351 | } | ||
| 1352 | |||
| 1353 | static inline int btrfs_is_leaf(struct extent_buffer *eb) | ||
| 1354 | { | ||
| 1355 | return (btrfs_header_level(eb) == 0); | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | /* struct btrfs_root_item */ | ||
| 1359 | BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); | ||
| 1360 | BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); | ||
| 1361 | BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); | ||
| 1362 | |||
| 1363 | BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); | ||
| 1364 | BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); | ||
| 1365 | BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); | ||
| 1366 | BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); | ||
| 1367 | BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); | ||
| 1368 | BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); | ||
| 1369 | BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | ||
| 1370 | |||
| 1371 | /* struct btrfs_super_block */ | ||
| 1372 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | ||
| 1373 | BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); | ||
| 1374 | BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, | ||
| 1375 | generation, 64); | ||
| 1376 | BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); | ||
| 1377 | BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, | ||
| 1378 | struct btrfs_super_block, sys_chunk_array_size, 32); | ||
| 1379 | BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, | ||
| 1380 | root_level, 8); | ||
| 1381 | BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, | ||
| 1382 | chunk_root, 64); | ||
| 1383 | BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, | ||
| 1384 | chunk_root_level, 8); | ||
| 1385 | BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, | ||
| 1386 | log_root, 64); | ||
| 1387 | BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, | ||
| 1388 | log_root_level, 8); | ||
| 1389 | BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, | ||
| 1390 | total_bytes, 64); | ||
| 1391 | BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, | ||
| 1392 | bytes_used, 64); | ||
| 1393 | BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, | ||
| 1394 | sectorsize, 32); | ||
| 1395 | BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, | ||
| 1396 | nodesize, 32); | ||
| 1397 | BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, | ||
| 1398 | leafsize, 32); | ||
| 1399 | BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, | ||
| 1400 | stripesize, 32); | ||
| 1401 | BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, | ||
| 1402 | root_dir_objectid, 64); | ||
| 1403 | BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, | ||
| 1404 | num_devices, 64); | ||
| 1405 | |||
| 1406 | static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) | ||
| 1407 | { | ||
| 1408 | return offsetof(struct btrfs_leaf, items); | ||
| 1409 | } | ||
| 1410 | |||
| 1411 | /* struct btrfs_file_extent_item */ | ||
| 1412 | BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); | ||
| 1413 | |||
| 1414 | static inline unsigned long btrfs_file_extent_inline_start(struct | ||
| 1415 | btrfs_file_extent_item *e) | ||
| 1416 | { | ||
| 1417 | unsigned long offset = (unsigned long)e; | ||
| 1418 | offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
| 1419 | return offset; | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) | ||
| 1423 | { | ||
| 1424 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, | ||
| 1428 | struct btrfs_item *e) | ||
| 1429 | { | ||
| 1430 | unsigned long offset; | ||
| 1431 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
| 1432 | return btrfs_item_size(eb, e) - offset; | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, | ||
| 1436 | disk_bytenr, 64); | ||
| 1437 | BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, | ||
| 1438 | generation, 64); | ||
| 1439 | BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, | ||
| 1440 | disk_num_bytes, 64); | ||
| 1441 | BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, | ||
| 1442 | offset, 64); | ||
| 1443 | BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, | ||
| 1444 | num_bytes, 64); | ||
| 1445 | |||
| 1446 | static inline struct btrfs_root *btrfs_sb(struct super_block *sb) | ||
| 1447 | { | ||
| 1448 | return sb->s_fs_info; | ||
| 1449 | } | ||
| 1450 | |||
| 1451 | static inline int btrfs_set_root_name(struct btrfs_root *root, | ||
| 1452 | const char *name, int len) | ||
| 1453 | { | ||
| 1454 | /* if we already have a name just free it */ | ||
| 1455 | if (root->name) | ||
| 1456 | kfree(root->name); | ||
| 1457 | |||
| 1458 | root->name = kmalloc(len+1, GFP_KERNEL); | ||
| 1459 | if (!root->name) | ||
| 1460 | return -ENOMEM; | ||
| 1461 | |||
| 1462 | memcpy(root->name, name, len); | ||
| 1463 | root->name[len] ='\0'; | ||
| 1464 | |||
| 1465 | return 0; | ||
| 1466 | } | ||
| 1467 | |||
| 1468 | static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { | ||
| 1469 | if (level == 0) | ||
| 1470 | return root->leafsize; | ||
| 1471 | return root->nodesize; | ||
| 1472 | } | ||
| 1473 | |||
| 1474 | /* helper function to cast into the data area of the leaf. */ | ||
| 1475 | #define btrfs_item_ptr(leaf, slot, type) \ | ||
| 1476 | ((type *)(btrfs_leaf_data(leaf) + \ | ||
| 1477 | btrfs_item_offset_nr(leaf, slot))) | ||
| 1478 | |||
| 1479 | #define btrfs_item_ptr_offset(leaf, slot) \ | ||
| 1480 | ((unsigned long)(btrfs_leaf_data(leaf) + \ | ||
| 1481 | btrfs_item_offset_nr(leaf, slot))) | ||
| 1482 | |||
| 1483 | static inline struct dentry *fdentry(struct file *file) | ||
| 1484 | { | ||
| 1485 | return file->f_path.dentry; | ||
| 1486 | } | ||
| 1487 | |||
| 1488 | /* extent-tree.c */ | ||
| 1489 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | ||
| 1490 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
| 1491 | struct btrfs_root *root, u64 bytenr, | ||
| 1492 | u64 num_bytes, u32 *refs); | ||
| 1493 | int btrfs_update_pinned_extents(struct btrfs_root *root, | ||
| 1494 | u64 bytenr, u64 num, int pin); | ||
| 1495 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
| 1496 | struct btrfs_root *root, struct extent_buffer *leaf); | ||
| 1497 | int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, | ||
| 1498 | struct btrfs_root *root, | ||
| 1499 | struct btrfs_key *key, u64 bytenr); | ||
| 1500 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | ||
| 1501 | struct btrfs_root *root); | ||
| 1502 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | ||
| 1503 | struct btrfs_block_group_cache *btrfs_lookup_block_group(struct | ||
| 1504 | btrfs_fs_info *info, | ||
| 1505 | u64 bytenr); | ||
| 1506 | struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, | ||
| 1507 | struct btrfs_block_group_cache | ||
| 1508 | *hint, u64 search_start, | ||
| 1509 | int data, int owner); | ||
| 1510 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | ||
| 1511 | struct btrfs_root *root, | ||
| 1512 | u32 blocksize, u64 parent, | ||
| 1513 | u64 root_objectid, | ||
| 1514 | u64 ref_generation, | ||
| 1515 | int level, | ||
| 1516 | u64 hint, | ||
| 1517 | u64 empty_size); | ||
| 1518 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | ||
| 1519 | struct btrfs_root *root, | ||
| 1520 | u64 bytenr, u32 blocksize); | ||
| 1521 | int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | ||
| 1522 | struct btrfs_root *root, | ||
| 1523 | u64 num_bytes, u64 parent, u64 min_bytes, | ||
| 1524 | u64 root_objectid, u64 ref_generation, | ||
| 1525 | u64 owner, u64 empty_size, u64 hint_byte, | ||
| 1526 | u64 search_end, struct btrfs_key *ins, u64 data); | ||
| 1527 | int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | ||
| 1528 | struct btrfs_root *root, u64 parent, | ||
| 1529 | u64 root_objectid, u64 ref_generation, | ||
| 1530 | u64 owner, struct btrfs_key *ins); | ||
| 1531 | int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | ||
| 1532 | struct btrfs_root *root, u64 parent, | ||
| 1533 | u64 root_objectid, u64 ref_generation, | ||
| 1534 | u64 owner, struct btrfs_key *ins); | ||
| 1535 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
| 1536 | struct btrfs_root *root, | ||
| 1537 | u64 num_bytes, u64 min_alloc_size, | ||
| 1538 | u64 empty_size, u64 hint_byte, | ||
| 1539 | u64 search_end, struct btrfs_key *ins, | ||
| 1540 | u64 data); | ||
| 1541 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 1542 | struct extent_buffer *orig_buf, struct extent_buffer *buf, | ||
| 1543 | u32 *nr_extents); | ||
| 1544 | int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 1545 | struct extent_buffer *buf, u32 nr_extents); | ||
| 1546 | int btrfs_update_ref(struct btrfs_trans_handle *trans, | ||
| 1547 | struct btrfs_root *root, struct extent_buffer *orig_buf, | ||
| 1548 | struct extent_buffer *buf, int start_slot, int nr); | ||
| 1549 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | ||
| 1550 | struct btrfs_root *root, | ||
| 1551 | u64 bytenr, u64 num_bytes, u64 parent, | ||
| 1552 | u64 root_objectid, u64 ref_generation, | ||
| 1553 | u64 owner_objectid, int pin); | ||
| 1554 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | ||
| 1555 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | ||
| 1556 | struct btrfs_root *root, | ||
| 1557 | struct extent_io_tree *unpin); | ||
| 1558 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | ||
| 1559 | struct btrfs_root *root, | ||
| 1560 | u64 bytenr, u64 num_bytes, u64 parent, | ||
| 1561 | u64 root_objectid, u64 ref_generation, | ||
| 1562 | u64 owner_objectid); | ||
| 1563 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | ||
| 1564 | struct btrfs_root *root, u64 bytenr, | ||
| 1565 | u64 orig_parent, u64 parent, | ||
| 1566 | u64 root_objectid, u64 ref_generation, | ||
| 1567 | u64 owner_objectid); | ||
| 1568 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | ||
| 1569 | struct btrfs_root *root); | ||
| 1570 | int btrfs_free_block_groups(struct btrfs_fs_info *info); | ||
| 1571 | int btrfs_read_block_groups(struct btrfs_root *root); | ||
| 1572 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | ||
| 1573 | struct btrfs_root *root, u64 bytes_used, | ||
| 1574 | u64 type, u64 chunk_objectid, u64 chunk_offset, | ||
| 1575 | u64 size); | ||
| 1576 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | ||
| 1577 | struct btrfs_root *root, u64 group_start); | ||
| 1578 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); | ||
| 1579 | int btrfs_free_reloc_root(struct btrfs_root *root); | ||
| 1580 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); | ||
| 1581 | int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
| 1582 | u64 num_bytes, u64 new_bytenr); | ||
| 1583 | int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
| 1584 | u64 num_bytes, u64 *new_bytenr); | ||
| 1585 | void btrfs_free_reloc_mappings(struct btrfs_root *root); | ||
| 1586 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | ||
| 1587 | struct btrfs_root *root, | ||
| 1588 | struct extent_buffer *buf, u64 orig_start); | ||
| 1589 | int btrfs_add_dead_reloc_root(struct btrfs_root *root); | ||
| 1590 | int btrfs_cleanup_reloc_trees(struct btrfs_root *root); | ||
| 1591 | /* ctree.c */ | ||
| 1592 | int btrfs_previous_item(struct btrfs_root *root, | ||
| 1593 | struct btrfs_path *path, u64 min_objectid, | ||
| 1594 | int type); | ||
| 1595 | int btrfs_merge_path(struct btrfs_trans_handle *trans, | ||
| 1596 | struct btrfs_root *root, | ||
| 1597 | struct btrfs_key *node_keys, | ||
| 1598 | u64 *nodes, int lowest_level); | ||
| 1599 | int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, | ||
| 1600 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 1601 | struct btrfs_key *new_key); | ||
| 1602 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root); | ||
| 1603 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); | ||
| 1604 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | ||
| 1605 | struct btrfs_key *key, int lowest_level, | ||
| 1606 | int cache_only, u64 min_trans); | ||
| 1607 | int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | ||
| 1608 | struct btrfs_key *max_key, | ||
| 1609 | struct btrfs_path *path, int cache_only, | ||
| 1610 | u64 min_trans); | ||
| 1611 | int btrfs_cow_block(struct btrfs_trans_handle *trans, | ||
| 1612 | struct btrfs_root *root, struct extent_buffer *buf, | ||
| 1613 | struct extent_buffer *parent, int parent_slot, | ||
| 1614 | struct extent_buffer **cow_ret, u64 prealloc_dest); | ||
| 1615 | int btrfs_copy_root(struct btrfs_trans_handle *trans, | ||
| 1616 | struct btrfs_root *root, | ||
| 1617 | struct extent_buffer *buf, | ||
| 1618 | struct extent_buffer **cow_ret, u64 new_root_objectid); | ||
| 1619 | int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1620 | *root, struct btrfs_path *path, u32 data_size); | ||
| 1621 | int btrfs_truncate_item(struct btrfs_trans_handle *trans, | ||
| 1622 | struct btrfs_root *root, | ||
| 1623 | struct btrfs_path *path, | ||
| 1624 | u32 new_size, int from_end); | ||
| 1625 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1626 | *root, struct btrfs_key *key, struct btrfs_path *p, int | ||
| 1627 | ins_len, int cow); | ||
| 1628 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | ||
| 1629 | struct btrfs_root *root, struct extent_buffer *parent, | ||
| 1630 | int start_slot, int cache_only, u64 *last_ret, | ||
| 1631 | struct btrfs_key *progress); | ||
| 1632 | void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); | ||
| 1633 | struct btrfs_path *btrfs_alloc_path(void); | ||
| 1634 | void btrfs_free_path(struct btrfs_path *p); | ||
| 1635 | void btrfs_init_path(struct btrfs_path *p); | ||
| 1636 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 1637 | struct btrfs_path *path, int slot, int nr); | ||
| 1638 | int btrfs_del_leaf(struct btrfs_trans_handle *trans, | ||
| 1639 | struct btrfs_root *root, | ||
| 1640 | struct btrfs_path *path, u64 bytenr); | ||
| 1641 | static inline int btrfs_del_item(struct btrfs_trans_handle *trans, | ||
| 1642 | struct btrfs_root *root, | ||
| 1643 | struct btrfs_path *path) | ||
| 1644 | { | ||
| 1645 | return btrfs_del_items(trans, root, path, path->slots[0], 1); | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1649 | *root, struct btrfs_key *key, void *data, u32 data_size); | ||
| 1650 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | ||
| 1651 | struct btrfs_root *root, | ||
| 1652 | struct btrfs_path *path, | ||
| 1653 | struct btrfs_key *cpu_key, u32 *data_size, int nr); | ||
| 1654 | |||
| 1655 | static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | ||
| 1656 | struct btrfs_root *root, | ||
| 1657 | struct btrfs_path *path, | ||
| 1658 | struct btrfs_key *key, | ||
| 1659 | u32 data_size) | ||
| 1660 | { | ||
| 1661 | return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1); | ||
| 1662 | } | ||
| 1663 | |||
| 1664 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | ||
| 1665 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | ||
| 1666 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | ||
| 1667 | int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1668 | *root); | ||
| 1669 | /* root-item.c */ | ||
| 1670 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 1671 | struct btrfs_key *key); | ||
| 1672 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1673 | *root, struct btrfs_key *key, struct btrfs_root_item | ||
| 1674 | *item); | ||
| 1675 | int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1676 | *root, struct btrfs_key *key, struct btrfs_root_item | ||
| 1677 | *item); | ||
| 1678 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | ||
| 1679 | btrfs_root_item *item, struct btrfs_key *key); | ||
| 1680 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | ||
| 1681 | u64 *found_objectid); | ||
| 1682 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, | ||
| 1683 | struct btrfs_root *latest_root); | ||
| 1684 | /* dir-item.c */ | ||
| 1685 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1686 | *root, const char *name, int name_len, u64 dir, | ||
| 1687 | struct btrfs_key *location, u8 type, u64 index); | ||
| 1688 | struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | ||
| 1689 | struct btrfs_root *root, | ||
| 1690 | struct btrfs_path *path, u64 dir, | ||
| 1691 | const char *name, int name_len, | ||
| 1692 | int mod); | ||
| 1693 | struct btrfs_dir_item * | ||
| 1694 | btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | ||
| 1695 | struct btrfs_root *root, | ||
| 1696 | struct btrfs_path *path, u64 dir, | ||
| 1697 | u64 objectid, const char *name, int name_len, | ||
| 1698 | int mod); | ||
| 1699 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | ||
| 1700 | struct btrfs_path *path, | ||
| 1701 | const char *name, int name_len); | ||
| 1702 | int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | ||
| 1703 | struct btrfs_root *root, | ||
| 1704 | struct btrfs_path *path, | ||
| 1705 | struct btrfs_dir_item *di); | ||
| 1706 | int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | ||
| 1707 | struct btrfs_root *root, const char *name, | ||
| 1708 | u16 name_len, const void *data, u16 data_len, | ||
| 1709 | u64 dir); | ||
| 1710 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | ||
| 1711 | struct btrfs_root *root, | ||
| 1712 | struct btrfs_path *path, u64 dir, | ||
| 1713 | const char *name, u16 name_len, | ||
| 1714 | int mod); | ||
| 1715 | |||
| 1716 | /* orphan.c */ | ||
| 1717 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | ||
| 1718 | struct btrfs_root *root, u64 offset); | ||
| 1719 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | ||
| 1720 | struct btrfs_root *root, u64 offset); | ||
| 1721 | |||
| 1722 | /* inode-map.c */ | ||
| 1723 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | ||
| 1724 | struct btrfs_root *fs_root, | ||
| 1725 | u64 dirid, u64 *objectid); | ||
| 1726 | int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); | ||
| 1727 | |||
| 1728 | /* inode-item.c */ | ||
| 1729 | int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | ||
| 1730 | struct btrfs_root *root, | ||
| 1731 | const char *name, int name_len, | ||
| 1732 | u64 inode_objectid, u64 ref_objectid, u64 index); | ||
| 1733 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | ||
| 1734 | struct btrfs_root *root, | ||
| 1735 | const char *name, int name_len, | ||
| 1736 | u64 inode_objectid, u64 ref_objectid, u64 *index); | ||
| 1737 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | ||
| 1738 | struct btrfs_root *root, | ||
| 1739 | struct btrfs_path *path, u64 objectid); | ||
| 1740 | int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1741 | *root, struct btrfs_path *path, | ||
| 1742 | struct btrfs_key *location, int mod); | ||
| 1743 | |||
| 1744 | /* file-item.c */ | ||
| 1745 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
| 1746 | struct bio *bio); | ||
| 1747 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | ||
| 1748 | struct btrfs_root *root, | ||
| 1749 | u64 objectid, u64 pos, u64 disk_offset, | ||
| 1750 | u64 disk_num_bytes, | ||
| 1751 | u64 num_bytes, u64 offset); | ||
| 1752 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | ||
| 1753 | struct btrfs_root *root, | ||
| 1754 | struct btrfs_path *path, u64 objectid, | ||
| 1755 | u64 bytenr, int mod); | ||
| 1756 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | ||
| 1757 | struct btrfs_root *root, struct inode *inode, | ||
| 1758 | struct btrfs_ordered_sum *sums); | ||
| 1759 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | ||
| 1760 | struct bio *bio); | ||
| 1761 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | ||
| 1762 | struct btrfs_root *root, | ||
| 1763 | struct btrfs_path *path, | ||
| 1764 | u64 objectid, u64 offset, | ||
| 1765 | int cow); | ||
| 1766 | int btrfs_csum_truncate(struct btrfs_trans_handle *trans, | ||
| 1767 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 1768 | u64 isize); | ||
| 1769 | /* inode.c */ | ||
| 1770 | |||
| 1771 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ | ||
| 1772 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) | ||
| 1773 | #define ClearPageChecked ClearPageFsMisc | ||
| 1774 | #define SetPageChecked SetPageFsMisc | ||
| 1775 | #define PageChecked PageFsMisc | ||
| 1776 | #endif | ||
| 1777 | |||
| 1778 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | ||
| 1779 | struct btrfs_root *root, | ||
| 1780 | struct inode *dir, struct inode *inode, | ||
| 1781 | const char *name, int name_len); | ||
| 1782 | int btrfs_add_link(struct btrfs_trans_handle *trans, | ||
| 1783 | struct inode *parent_inode, struct inode *inode, | ||
| 1784 | const char *name, int name_len, int add_backref, u64 index); | ||
| 1785 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | ||
| 1786 | struct btrfs_root *root, | ||
| 1787 | struct inode *inode, u64 new_size, | ||
| 1788 | u32 min_type); | ||
| 1789 | |||
| 1790 | int btrfs_start_delalloc_inodes(struct btrfs_root *root); | ||
| 1791 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); | ||
| 1792 | int btrfs_writepages(struct address_space *mapping, | ||
| 1793 | struct writeback_control *wbc); | ||
| 1794 | int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, | ||
| 1795 | struct btrfs_trans_handle *trans, u64 new_dirid, | ||
| 1796 | struct btrfs_block_group_cache *block_group); | ||
| 1797 | |||
| 1798 | void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, | ||
| 1799 | int namelen); | ||
| 1800 | |||
| 1801 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | ||
| 1802 | size_t size, struct bio *bio); | ||
| 1803 | |||
| 1804 | unsigned long btrfs_force_ra(struct address_space *mapping, | ||
| 1805 | struct file_ra_state *ra, struct file *file, | ||
| 1806 | pgoff_t offset, pgoff_t last_index); | ||
| 1807 | int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, | ||
| 1808 | int for_del); | ||
| 1809 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); | ||
| 1810 | int btrfs_readpage(struct file *file, struct page *page); | ||
| 1811 | void btrfs_delete_inode(struct inode *inode); | ||
| 1812 | void btrfs_put_inode(struct inode *inode); | ||
| 1813 | void btrfs_read_locked_inode(struct inode *inode); | ||
| 1814 | int btrfs_write_inode(struct inode *inode, int wait); | ||
| 1815 | void btrfs_dirty_inode(struct inode *inode); | ||
| 1816 | struct inode *btrfs_alloc_inode(struct super_block *sb); | ||
| 1817 | void btrfs_destroy_inode(struct inode *inode); | ||
| 1818 | int btrfs_init_cachep(void); | ||
| 1819 | void btrfs_destroy_cachep(void); | ||
| 1820 | long btrfs_ioctl_trans_end(struct file *file); | ||
| 1821 | struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, | ||
| 1822 | struct btrfs_root *root, int wait); | ||
| 1823 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, | ||
| 1824 | struct btrfs_root *root); | ||
| 1825 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | ||
| 1826 | struct btrfs_root *root, int *is_new); | ||
| 1827 | int btrfs_commit_write(struct file *file, struct page *page, | ||
| 1828 | unsigned from, unsigned to); | ||
| 1829 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | ||
| 1830 | size_t page_offset, u64 start, u64 end, | ||
| 1831 | int create); | ||
| 1832 | int btrfs_update_inode(struct btrfs_trans_handle *trans, | ||
| 1833 | struct btrfs_root *root, | ||
| 1834 | struct inode *inode); | ||
| 1835 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | ||
| 1836 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | ||
| 1837 | void btrfs_orphan_cleanup(struct btrfs_root *root); | ||
| 1838 | |||
| 1839 | /* ioctl.c */ | ||
| 1840 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||
| 1841 | |||
| 1842 | /* file.c */ | ||
| 1843 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | ||
| 1844 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | ||
| 1845 | int skip_pinned); | ||
| 1846 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | ||
| 1847 | extern struct file_operations btrfs_file_operations; | ||
| 1848 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||
| 1849 | struct btrfs_root *root, struct inode *inode, | ||
| 1850 | u64 start, u64 end, u64 inline_limit, u64 *hint_block); | ||
| 1851 | int btrfs_release_file(struct inode *inode, struct file *file); | ||
| 1852 | |||
| 1853 | /* tree-defrag.c */ | ||
| 1854 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | ||
| 1855 | struct btrfs_root *root, int cache_only); | ||
| 1856 | |||
| 1857 | /* sysfs.c */ | ||
| 1858 | int btrfs_init_sysfs(void); | ||
| 1859 | void btrfs_exit_sysfs(void); | ||
| 1860 | int btrfs_sysfs_add_super(struct btrfs_fs_info *fs); | ||
| 1861 | int btrfs_sysfs_add_root(struct btrfs_root *root); | ||
| 1862 | void btrfs_sysfs_del_root(struct btrfs_root *root); | ||
| 1863 | void btrfs_sysfs_del_super(struct btrfs_fs_info *root); | ||
| 1864 | |||
| 1865 | /* xattr.c */ | ||
| 1866 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | ||
| 1867 | |||
| 1868 | /* super.c */ | ||
| 1869 | u64 btrfs_parse_size(char *str); | ||
| 1870 | int btrfs_parse_options(struct btrfs_root *root, char *options); | ||
| 1871 | int btrfs_sync_fs(struct super_block *sb, int wait); | ||
| 1872 | |||
| 1873 | /* acl.c */ | ||
| 1874 | int btrfs_check_acl(struct inode *inode, int mask); | ||
| 1875 | int btrfs_init_acl(struct inode *inode, struct inode *dir); | ||
| 1876 | int btrfs_acl_chmod(struct inode *inode); | ||
| 1877 | |||
| 1878 | /* free-space-cache.c */ | ||
| 1879 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
| 1880 | u64 bytenr, u64 size); | ||
| 1881 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
| 1882 | u64 bytenr, u64 size); | ||
| 1883 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
| 1884 | *block_group); | ||
| 1885 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | ||
| 1886 | *block_group, u64 offset, | ||
| 1887 | u64 bytes); | ||
| 1888 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
| 1889 | u64 bytes); | ||
| 1890 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
| 1891 | #endif | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c new file mode 100644 index 00000000000..5040b71f190 --- /dev/null +++ b/fs/btrfs/dir-item.c | |||
| @@ -0,0 +1,386 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "ctree.h" | ||
| 20 | #include "disk-io.h" | ||
| 21 | #include "hash.h" | ||
| 22 | #include "transaction.h" | ||
| 23 | |||
| 24 | /* | ||
| 25 | * insert a name into a directory, doing overflow properly if there is a hash | ||
| 26 | * collision. data_size indicates how big the item inserted should be. On | ||
| 27 | * success a struct btrfs_dir_item pointer is returned, otherwise it is | ||
| 28 | * an ERR_PTR. | ||
| 29 | * | ||
| 30 | * The name is not copied into the dir item, you have to do that yourself. | ||
| 31 | */ | ||
| 32 | static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle | ||
| 33 | *trans, | ||
| 34 | struct btrfs_root *root, | ||
| 35 | struct btrfs_path *path, | ||
| 36 | struct btrfs_key *cpu_key, | ||
| 37 | u32 data_size, | ||
| 38 | const char *name, | ||
| 39 | int name_len) | ||
| 40 | { | ||
| 41 | int ret; | ||
| 42 | char *ptr; | ||
| 43 | struct btrfs_item *item; | ||
| 44 | struct extent_buffer *leaf; | ||
| 45 | |||
| 46 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); | ||
| 47 | if (ret == -EEXIST) { | ||
| 48 | struct btrfs_dir_item *di; | ||
| 49 | di = btrfs_match_dir_item_name(root, path, name, name_len); | ||
| 50 | if (di) | ||
| 51 | return ERR_PTR(-EEXIST); | ||
| 52 | ret = btrfs_extend_item(trans, root, path, data_size); | ||
| 53 | WARN_ON(ret > 0); | ||
| 54 | } | ||
| 55 | if (ret < 0) | ||
| 56 | return ERR_PTR(ret); | ||
| 57 | WARN_ON(ret > 0); | ||
| 58 | leaf = path->nodes[0]; | ||
| 59 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
| 60 | ptr = btrfs_item_ptr(leaf, path->slots[0], char); | ||
| 61 | BUG_ON(data_size > btrfs_item_size(leaf, item)); | ||
| 62 | ptr += btrfs_item_size(leaf, item) - data_size; | ||
| 63 | return (struct btrfs_dir_item *)ptr; | ||
| 64 | } | ||
| 65 | |||
| 66 | /* | ||
| 67 | * xattrs work a lot like directories, this inserts an xattr item | ||
| 68 | * into the tree | ||
| 69 | */ | ||
| 70 | int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | ||
| 71 | struct btrfs_root *root, const char *name, | ||
| 72 | u16 name_len, const void *data, u16 data_len, | ||
| 73 | u64 dir) | ||
| 74 | { | ||
| 75 | int ret = 0; | ||
| 76 | struct btrfs_path *path; | ||
| 77 | struct btrfs_dir_item *dir_item; | ||
| 78 | unsigned long name_ptr, data_ptr; | ||
| 79 | struct btrfs_key key, location; | ||
| 80 | struct btrfs_disk_key disk_key; | ||
| 81 | struct extent_buffer *leaf; | ||
| 82 | u32 data_size; | ||
| 83 | |||
| 84 | key.objectid = dir; | ||
| 85 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | ||
| 86 | key.offset = btrfs_name_hash(name, name_len); | ||
| 87 | path = btrfs_alloc_path(); | ||
| 88 | if (!path) | ||
| 89 | return -ENOMEM; | ||
| 90 | if (name_len + data_len + sizeof(struct btrfs_dir_item) > | ||
| 91 | BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item)) | ||
| 92 | return -ENOSPC; | ||
| 93 | |||
| 94 | data_size = sizeof(*dir_item) + name_len + data_len; | ||
| 95 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | ||
| 96 | name, name_len); | ||
| 97 | /* | ||
| 98 | * FIXME: at some point we should handle xattr's that are larger than | ||
| 99 | * what we can fit in our leaf. We set location to NULL b/c we arent | ||
| 100 | * pointing at anything else, that will change if we store the xattr | ||
| 101 | * data in a separate inode. | ||
| 102 | */ | ||
| 103 | BUG_ON(IS_ERR(dir_item)); | ||
| 104 | memset(&location, 0, sizeof(location)); | ||
| 105 | |||
| 106 | leaf = path->nodes[0]; | ||
| 107 | btrfs_cpu_key_to_disk(&disk_key, &location); | ||
| 108 | btrfs_set_dir_item_key(leaf, dir_item, &disk_key); | ||
| 109 | btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR); | ||
| 110 | btrfs_set_dir_name_len(leaf, dir_item, name_len); | ||
| 111 | btrfs_set_dir_transid(leaf, dir_item, trans->transid); | ||
| 112 | btrfs_set_dir_data_len(leaf, dir_item, data_len); | ||
| 113 | name_ptr = (unsigned long)(dir_item + 1); | ||
| 114 | data_ptr = (unsigned long)((char *)name_ptr + name_len); | ||
| 115 | |||
| 116 | write_extent_buffer(leaf, name, name_ptr, name_len); | ||
| 117 | write_extent_buffer(leaf, data, data_ptr, data_len); | ||
| 118 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 119 | |||
| 120 | btrfs_free_path(path); | ||
| 121 | return ret; | ||
| 122 | } | ||
| 123 | |||
| 124 | /* | ||
| 125 | * insert a directory item in the tree, doing all the magic for | ||
| 126 | * both indexes. 'dir' indicates which objectid to insert it into, | ||
| 127 | * 'location' is the key to stuff into the directory item, 'type' is the | ||
| 128 | * type of the inode we're pointing to, and 'index' is the sequence number | ||
| 129 | * to use for the second index (if one is created). | ||
| 130 | */ | ||
| 131 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 132 | *root, const char *name, int name_len, u64 dir, | ||
| 133 | struct btrfs_key *location, u8 type, u64 index) | ||
| 134 | { | ||
| 135 | int ret = 0; | ||
| 136 | int ret2 = 0; | ||
| 137 | struct btrfs_path *path; | ||
| 138 | struct btrfs_dir_item *dir_item; | ||
| 139 | struct extent_buffer *leaf; | ||
| 140 | unsigned long name_ptr; | ||
| 141 | struct btrfs_key key; | ||
| 142 | struct btrfs_disk_key disk_key; | ||
| 143 | u32 data_size; | ||
| 144 | |||
| 145 | key.objectid = dir; | ||
| 146 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | ||
| 147 | key.offset = btrfs_name_hash(name, name_len); | ||
| 148 | path = btrfs_alloc_path(); | ||
| 149 | data_size = sizeof(*dir_item) + name_len; | ||
| 150 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | ||
| 151 | name, name_len); | ||
| 152 | if (IS_ERR(dir_item)) { | ||
| 153 | ret = PTR_ERR(dir_item); | ||
| 154 | if (ret == -EEXIST) | ||
| 155 | goto second_insert; | ||
| 156 | goto out; | ||
| 157 | } | ||
| 158 | |||
| 159 | leaf = path->nodes[0]; | ||
| 160 | btrfs_cpu_key_to_disk(&disk_key, location); | ||
| 161 | btrfs_set_dir_item_key(leaf, dir_item, &disk_key); | ||
| 162 | btrfs_set_dir_type(leaf, dir_item, type); | ||
| 163 | btrfs_set_dir_data_len(leaf, dir_item, 0); | ||
| 164 | btrfs_set_dir_name_len(leaf, dir_item, name_len); | ||
| 165 | btrfs_set_dir_transid(leaf, dir_item, trans->transid); | ||
| 166 | name_ptr = (unsigned long)(dir_item + 1); | ||
| 167 | |||
| 168 | write_extent_buffer(leaf, name, name_ptr, name_len); | ||
| 169 | btrfs_mark_buffer_dirty(leaf); | ||
| 170 | |||
| 171 | second_insert: | ||
| 172 | /* FIXME, use some real flag for selecting the extra index */ | ||
| 173 | if (root == root->fs_info->tree_root) { | ||
| 174 | ret = 0; | ||
| 175 | goto out; | ||
| 176 | } | ||
| 177 | btrfs_release_path(root, path); | ||
| 178 | |||
| 179 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | ||
| 180 | key.offset = index; | ||
| 181 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | ||
| 182 | name, name_len); | ||
| 183 | if (IS_ERR(dir_item)) { | ||
| 184 | ret2 = PTR_ERR(dir_item); | ||
| 185 | goto out; | ||
| 186 | } | ||
| 187 | leaf = path->nodes[0]; | ||
| 188 | btrfs_cpu_key_to_disk(&disk_key, location); | ||
| 189 | btrfs_set_dir_item_key(leaf, dir_item, &disk_key); | ||
| 190 | btrfs_set_dir_type(leaf, dir_item, type); | ||
| 191 | btrfs_set_dir_data_len(leaf, dir_item, 0); | ||
| 192 | btrfs_set_dir_name_len(leaf, dir_item, name_len); | ||
| 193 | btrfs_set_dir_transid(leaf, dir_item, trans->transid); | ||
| 194 | name_ptr = (unsigned long)(dir_item + 1); | ||
| 195 | write_extent_buffer(leaf, name, name_ptr, name_len); | ||
| 196 | btrfs_mark_buffer_dirty(leaf); | ||
| 197 | out: | ||
| 198 | btrfs_free_path(path); | ||
| 199 | if (ret) | ||
| 200 | return ret; | ||
| 201 | if (ret2) | ||
| 202 | return ret2; | ||
| 203 | return 0; | ||
| 204 | } | ||
| 205 | |||
| 206 | /* | ||
| 207 | * lookup a directory item based on name. 'dir' is the objectid | ||
| 208 | * we're searching in, and 'mod' tells us if you plan on deleting the | ||
| 209 | * item (use mod < 0) or changing the options (use mod > 0) | ||
| 210 | */ | ||
| 211 | struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | ||
| 212 | struct btrfs_root *root, | ||
| 213 | struct btrfs_path *path, u64 dir, | ||
| 214 | const char *name, int name_len, | ||
| 215 | int mod) | ||
| 216 | { | ||
| 217 | int ret; | ||
| 218 | struct btrfs_key key; | ||
| 219 | int ins_len = mod < 0 ? -1 : 0; | ||
| 220 | int cow = mod != 0; | ||
| 221 | struct btrfs_key found_key; | ||
| 222 | struct extent_buffer *leaf; | ||
| 223 | |||
| 224 | key.objectid = dir; | ||
| 225 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | ||
| 226 | |||
| 227 | key.offset = btrfs_name_hash(name, name_len); | ||
| 228 | |||
| 229 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
| 230 | if (ret < 0) | ||
| 231 | return ERR_PTR(ret); | ||
| 232 | if (ret > 0) { | ||
| 233 | if (path->slots[0] == 0) | ||
| 234 | return NULL; | ||
| 235 | path->slots[0]--; | ||
| 236 | } | ||
| 237 | |||
| 238 | leaf = path->nodes[0]; | ||
| 239 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 240 | |||
| 241 | if (found_key.objectid != dir || | ||
| 242 | btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || | ||
| 243 | found_key.offset != key.offset) | ||
| 244 | return NULL; | ||
| 245 | |||
| 246 | return btrfs_match_dir_item_name(root, path, name, name_len); | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * lookup a directory item based on index. 'dir' is the objectid | ||
| 251 | * we're searching in, and 'mod' tells us if you plan on deleting the | ||
| 252 | * item (use mod < 0) or changing the options (use mod > 0) | ||
| 253 | * | ||
| 254 | * The name is used to make sure the index really points to the name you were | ||
| 255 | * looking for. | ||
| 256 | */ | ||
| 257 | struct btrfs_dir_item * | ||
| 258 | btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | ||
| 259 | struct btrfs_root *root, | ||
| 260 | struct btrfs_path *path, u64 dir, | ||
| 261 | u64 objectid, const char *name, int name_len, | ||
| 262 | int mod) | ||
| 263 | { | ||
| 264 | int ret; | ||
| 265 | struct btrfs_key key; | ||
| 266 | int ins_len = mod < 0 ? -1 : 0; | ||
| 267 | int cow = mod != 0; | ||
| 268 | |||
| 269 | key.objectid = dir; | ||
| 270 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | ||
| 271 | key.offset = objectid; | ||
| 272 | |||
| 273 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
| 274 | if (ret < 0) | ||
| 275 | return ERR_PTR(ret); | ||
| 276 | if (ret > 0) | ||
| 277 | return ERR_PTR(-ENOENT); | ||
| 278 | return btrfs_match_dir_item_name(root, path, name, name_len); | ||
| 279 | } | ||
| 280 | |||
| 281 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | ||
| 282 | struct btrfs_root *root, | ||
| 283 | struct btrfs_path *path, u64 dir, | ||
| 284 | const char *name, u16 name_len, | ||
| 285 | int mod) | ||
| 286 | { | ||
| 287 | int ret; | ||
| 288 | struct btrfs_key key; | ||
| 289 | int ins_len = mod < 0 ? -1 : 0; | ||
| 290 | int cow = mod != 0; | ||
| 291 | struct btrfs_key found_key; | ||
| 292 | struct extent_buffer *leaf; | ||
| 293 | |||
| 294 | key.objectid = dir; | ||
| 295 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | ||
| 296 | key.offset = btrfs_name_hash(name, name_len); | ||
| 297 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
| 298 | if (ret < 0) | ||
| 299 | return ERR_PTR(ret); | ||
| 300 | if (ret > 0) { | ||
| 301 | if (path->slots[0] == 0) | ||
| 302 | return NULL; | ||
| 303 | path->slots[0]--; | ||
| 304 | } | ||
| 305 | |||
| 306 | leaf = path->nodes[0]; | ||
| 307 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 308 | |||
| 309 | if (found_key.objectid != dir || | ||
| 310 | btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || | ||
| 311 | found_key.offset != key.offset) | ||
| 312 | return NULL; | ||
| 313 | |||
| 314 | return btrfs_match_dir_item_name(root, path, name, name_len); | ||
| 315 | } | ||
| 316 | |||
| 317 | /* | ||
| 318 | * helper function to look at the directory item pointed to by 'path' | ||
| 319 | * this walks through all the entries in a dir item and finds one | ||
| 320 | * for a specific name. | ||
| 321 | */ | ||
| 322 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | ||
| 323 | struct btrfs_path *path, | ||
| 324 | const char *name, int name_len) | ||
| 325 | { | ||
| 326 | struct btrfs_dir_item *dir_item; | ||
| 327 | unsigned long name_ptr; | ||
| 328 | u32 total_len; | ||
| 329 | u32 cur = 0; | ||
| 330 | u32 this_len; | ||
| 331 | struct extent_buffer *leaf; | ||
| 332 | |||
| 333 | leaf = path->nodes[0]; | ||
| 334 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); | ||
| 335 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 336 | while(cur < total_len) { | ||
| 337 | this_len = sizeof(*dir_item) + | ||
| 338 | btrfs_dir_name_len(leaf, dir_item) + | ||
| 339 | btrfs_dir_data_len(leaf, dir_item); | ||
| 340 | name_ptr = (unsigned long)(dir_item + 1); | ||
| 341 | |||
| 342 | if (btrfs_dir_name_len(leaf, dir_item) == name_len && | ||
| 343 | memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) | ||
| 344 | return dir_item; | ||
| 345 | |||
| 346 | cur += this_len; | ||
| 347 | dir_item = (struct btrfs_dir_item *)((char *)dir_item + | ||
| 348 | this_len); | ||
| 349 | } | ||
| 350 | return NULL; | ||
| 351 | } | ||
| 352 | |||
| 353 | /* | ||
| 354 | * given a pointer into a directory item, delete it. This | ||
| 355 | * handles items that have more than one entry in them. | ||
| 356 | */ | ||
| 357 | int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | ||
| 358 | struct btrfs_root *root, | ||
| 359 | struct btrfs_path *path, | ||
| 360 | struct btrfs_dir_item *di) | ||
| 361 | { | ||
| 362 | |||
| 363 | struct extent_buffer *leaf; | ||
| 364 | u32 sub_item_len; | ||
| 365 | u32 item_len; | ||
| 366 | int ret = 0; | ||
| 367 | |||
| 368 | leaf = path->nodes[0]; | ||
| 369 | sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) + | ||
| 370 | btrfs_dir_data_len(leaf, di); | ||
| 371 | item_len = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 372 | if (sub_item_len == item_len) { | ||
| 373 | ret = btrfs_del_item(trans, root, path); | ||
| 374 | } else { | ||
| 375 | /* MARKER */ | ||
| 376 | unsigned long ptr = (unsigned long)di; | ||
| 377 | unsigned long start; | ||
| 378 | |||
| 379 | start = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 380 | memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, | ||
| 381 | item_len - (ptr + sub_item_len - start)); | ||
| 382 | ret = btrfs_truncate_item(trans, root, path, | ||
| 383 | item_len - sub_item_len, 1); | ||
| 384 | } | ||
| 385 | return 0; | ||
| 386 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c new file mode 100644 index 00000000000..0be044bb619 --- /dev/null +++ b/fs/btrfs/disk-io.c | |||
| @@ -0,0 +1,2078 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/version.h> | ||
| 20 | #include <linux/fs.h> | ||
| 21 | #include <linux/blkdev.h> | ||
| 22 | #include <linux/scatterlist.h> | ||
| 23 | #include <linux/swap.h> | ||
| 24 | #include <linux/radix-tree.h> | ||
| 25 | #include <linux/writeback.h> | ||
| 26 | #include <linux/buffer_head.h> // for block_sync_page | ||
| 27 | #include <linux/workqueue.h> | ||
| 28 | #include <linux/kthread.h> | ||
| 29 | # include <linux/freezer.h> | ||
| 30 | #include "crc32c.h" | ||
| 31 | #include "ctree.h" | ||
| 32 | #include "disk-io.h" | ||
| 33 | #include "transaction.h" | ||
| 34 | #include "btrfs_inode.h" | ||
| 35 | #include "volumes.h" | ||
| 36 | #include "print-tree.h" | ||
| 37 | #include "async-thread.h" | ||
| 38 | #include "locking.h" | ||
| 39 | #include "ref-cache.h" | ||
| 40 | #include "tree-log.h" | ||
| 41 | |||
| 42 | #if 0 | ||
| 43 | static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) | ||
| 44 | { | ||
| 45 | if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { | ||
| 46 | printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", | ||
| 47 | (unsigned long long)extent_buffer_blocknr(buf), | ||
| 48 | (unsigned long long)btrfs_header_blocknr(buf)); | ||
| 49 | return 1; | ||
| 50 | } | ||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | #endif | ||
| 54 | |||
| 55 | static struct extent_io_ops btree_extent_io_ops; | ||
| 56 | static void end_workqueue_fn(struct btrfs_work *work); | ||
| 57 | |||
| 58 | /* | ||
| 59 | * end_io_wq structs are used to do processing in task context when an IO is | ||
| 60 | * complete. This is used during reads to verify checksums, and it is used | ||
| 61 | * by writes to insert metadata for new file extents after IO is complete. | ||
| 62 | */ | ||
| 63 | struct end_io_wq { | ||
| 64 | struct bio *bio; | ||
| 65 | bio_end_io_t *end_io; | ||
| 66 | void *private; | ||
| 67 | struct btrfs_fs_info *info; | ||
| 68 | int error; | ||
| 69 | int metadata; | ||
| 70 | struct list_head list; | ||
| 71 | struct btrfs_work work; | ||
| 72 | }; | ||
| 73 | |||
| 74 | /* | ||
| 75 | * async submit bios are used to offload expensive checksumming | ||
| 76 | * onto the worker threads. They checksum file and metadata bios | ||
| 77 | * just before they are sent down the IO stack. | ||
| 78 | */ | ||
| 79 | struct async_submit_bio { | ||
| 80 | struct inode *inode; | ||
| 81 | struct bio *bio; | ||
| 82 | struct list_head list; | ||
| 83 | extent_submit_bio_hook_t *submit_bio_hook; | ||
| 84 | int rw; | ||
| 85 | int mirror_num; | ||
| 86 | struct btrfs_work work; | ||
| 87 | }; | ||
| 88 | |||
| 89 | /* | ||
| 90 | * extents on the btree inode are pretty simple, there's one extent | ||
| 91 | * that covers the entire device | ||
| 92 | */ | ||
| 93 | struct extent_map *btree_get_extent(struct inode *inode, struct page *page, | ||
| 94 | size_t page_offset, u64 start, u64 len, | ||
| 95 | int create) | ||
| 96 | { | ||
| 97 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 98 | struct extent_map *em; | ||
| 99 | int ret; | ||
| 100 | |||
| 101 | spin_lock(&em_tree->lock); | ||
| 102 | em = lookup_extent_mapping(em_tree, start, len); | ||
| 103 | if (em) { | ||
| 104 | em->bdev = | ||
| 105 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
| 106 | spin_unlock(&em_tree->lock); | ||
| 107 | goto out; | ||
| 108 | } | ||
| 109 | spin_unlock(&em_tree->lock); | ||
| 110 | |||
| 111 | em = alloc_extent_map(GFP_NOFS); | ||
| 112 | if (!em) { | ||
| 113 | em = ERR_PTR(-ENOMEM); | ||
| 114 | goto out; | ||
| 115 | } | ||
| 116 | em->start = 0; | ||
| 117 | em->len = (u64)-1; | ||
| 118 | em->block_start = 0; | ||
| 119 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
| 120 | |||
| 121 | spin_lock(&em_tree->lock); | ||
| 122 | ret = add_extent_mapping(em_tree, em); | ||
| 123 | if (ret == -EEXIST) { | ||
| 124 | u64 failed_start = em->start; | ||
| 125 | u64 failed_len = em->len; | ||
| 126 | |||
| 127 | printk("failed to insert %Lu %Lu -> %Lu into tree\n", | ||
| 128 | em->start, em->len, em->block_start); | ||
| 129 | free_extent_map(em); | ||
| 130 | em = lookup_extent_mapping(em_tree, start, len); | ||
| 131 | if (em) { | ||
| 132 | printk("after failing, found %Lu %Lu %Lu\n", | ||
| 133 | em->start, em->len, em->block_start); | ||
| 134 | ret = 0; | ||
| 135 | } else { | ||
| 136 | em = lookup_extent_mapping(em_tree, failed_start, | ||
| 137 | failed_len); | ||
| 138 | if (em) { | ||
| 139 | printk("double failure lookup gives us " | ||
| 140 | "%Lu %Lu -> %Lu\n", em->start, | ||
| 141 | em->len, em->block_start); | ||
| 142 | free_extent_map(em); | ||
| 143 | } | ||
| 144 | ret = -EIO; | ||
| 145 | } | ||
| 146 | } else if (ret) { | ||
| 147 | free_extent_map(em); | ||
| 148 | em = NULL; | ||
| 149 | } | ||
| 150 | spin_unlock(&em_tree->lock); | ||
| 151 | |||
| 152 | if (ret) | ||
| 153 | em = ERR_PTR(ret); | ||
| 154 | out: | ||
| 155 | return em; | ||
| 156 | } | ||
| 157 | |||
| 158 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | ||
| 159 | { | ||
| 160 | return btrfs_crc32c(seed, data, len); | ||
| 161 | } | ||
| 162 | |||
| 163 | void btrfs_csum_final(u32 crc, char *result) | ||
| 164 | { | ||
| 165 | *(__le32 *)result = ~cpu_to_le32(crc); | ||
| 166 | } | ||
| 167 | |||
| 168 | /* | ||
| 169 | * compute the csum for a btree block, and either verify it or write it | ||
| 170 | * into the csum field of the block. | ||
| 171 | */ | ||
| 172 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | ||
| 173 | int verify) | ||
| 174 | { | ||
| 175 | char result[BTRFS_CRC32_SIZE]; | ||
| 176 | unsigned long len; | ||
| 177 | unsigned long cur_len; | ||
| 178 | unsigned long offset = BTRFS_CSUM_SIZE; | ||
| 179 | char *map_token = NULL; | ||
| 180 | char *kaddr; | ||
| 181 | unsigned long map_start; | ||
| 182 | unsigned long map_len; | ||
| 183 | int err; | ||
| 184 | u32 crc = ~(u32)0; | ||
| 185 | |||
| 186 | len = buf->len - offset; | ||
| 187 | while(len > 0) { | ||
| 188 | err = map_private_extent_buffer(buf, offset, 32, | ||
| 189 | &map_token, &kaddr, | ||
| 190 | &map_start, &map_len, KM_USER0); | ||
| 191 | if (err) { | ||
| 192 | printk("failed to map extent buffer! %lu\n", | ||
| 193 | offset); | ||
| 194 | return 1; | ||
| 195 | } | ||
| 196 | cur_len = min(len, map_len - (offset - map_start)); | ||
| 197 | crc = btrfs_csum_data(root, kaddr + offset - map_start, | ||
| 198 | crc, cur_len); | ||
| 199 | len -= cur_len; | ||
| 200 | offset += cur_len; | ||
| 201 | unmap_extent_buffer(buf, map_token, KM_USER0); | ||
| 202 | } | ||
| 203 | btrfs_csum_final(crc, result); | ||
| 204 | |||
| 205 | if (verify) { | ||
| 206 | /* FIXME, this is not good */ | ||
| 207 | if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { | ||
| 208 | u32 val; | ||
| 209 | u32 found = 0; | ||
| 210 | memcpy(&found, result, BTRFS_CRC32_SIZE); | ||
| 211 | |||
| 212 | read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); | ||
| 213 | printk("btrfs: %s checksum verify failed on %llu " | ||
| 214 | "wanted %X found %X level %d\n", | ||
| 215 | root->fs_info->sb->s_id, | ||
| 216 | buf->start, val, found, btrfs_header_level(buf)); | ||
| 217 | return 1; | ||
| 218 | } | ||
| 219 | } else { | ||
| 220 | write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE); | ||
| 221 | } | ||
| 222 | return 0; | ||
| 223 | } | ||
| 224 | |||
| 225 | /* | ||
| 226 | * we can't consider a given block up to date unless the transid of the | ||
| 227 | * block matches the transid in the parent node's pointer. This is how we | ||
| 228 | * detect blocks that either didn't get written at all or got written | ||
| 229 | * in the wrong place. | ||
| 230 | */ | ||
| 231 | static int verify_parent_transid(struct extent_io_tree *io_tree, | ||
| 232 | struct extent_buffer *eb, u64 parent_transid) | ||
| 233 | { | ||
| 234 | int ret; | ||
| 235 | |||
| 236 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | ||
| 237 | return 0; | ||
| 238 | |||
| 239 | lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); | ||
| 240 | if (extent_buffer_uptodate(io_tree, eb) && | ||
| 241 | btrfs_header_generation(eb) == parent_transid) { | ||
| 242 | ret = 0; | ||
| 243 | goto out; | ||
| 244 | } | ||
| 245 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | ||
| 246 | (unsigned long long)eb->start, | ||
| 247 | (unsigned long long)parent_transid, | ||
| 248 | (unsigned long long)btrfs_header_generation(eb)); | ||
| 249 | ret = 1; | ||
| 250 | clear_extent_buffer_uptodate(io_tree, eb); | ||
| 251 | out: | ||
| 252 | unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, | ||
| 253 | GFP_NOFS); | ||
| 254 | return ret; | ||
| 255 | } | ||
| 256 | |||
| 257 | /* | ||
| 258 | * helper to read a given tree block, doing retries as required when | ||
| 259 | * the checksums don't match and we have alternate mirrors to try. | ||
| 260 | */ | ||
| 261 | static int btree_read_extent_buffer_pages(struct btrfs_root *root, | ||
| 262 | struct extent_buffer *eb, | ||
| 263 | u64 start, u64 parent_transid) | ||
| 264 | { | ||
| 265 | struct extent_io_tree *io_tree; | ||
| 266 | int ret; | ||
| 267 | int num_copies = 0; | ||
| 268 | int mirror_num = 0; | ||
| 269 | |||
| 270 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | ||
| 271 | while (1) { | ||
| 272 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | ||
| 273 | btree_get_extent, mirror_num); | ||
| 274 | if (!ret && | ||
| 275 | !verify_parent_transid(io_tree, eb, parent_transid)) | ||
| 276 | return ret; | ||
| 277 | printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num); | ||
| 278 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | ||
| 279 | eb->start, eb->len); | ||
| 280 | if (num_copies == 1) | ||
| 281 | return ret; | ||
| 282 | |||
| 283 | mirror_num++; | ||
| 284 | if (mirror_num > num_copies) | ||
| 285 | return ret; | ||
| 286 | } | ||
| 287 | return -EIO; | ||
| 288 | } | ||
| 289 | |||
| 290 | /* | ||
| 291 | * checksum a dirty tree block before IO. This has extra checks to make | ||
| 292 | * sure we only fill in the checksum field in the first page of a multi-page block | ||
| 293 | */ | ||
| 294 | int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | ||
| 295 | { | ||
| 296 | struct extent_io_tree *tree; | ||
| 297 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 298 | u64 found_start; | ||
| 299 | int found_level; | ||
| 300 | unsigned long len; | ||
| 301 | struct extent_buffer *eb; | ||
| 302 | int ret; | ||
| 303 | |||
| 304 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 305 | |||
| 306 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
| 307 | goto out; | ||
| 308 | if (!page->private) | ||
| 309 | goto out; | ||
| 310 | len = page->private >> 2; | ||
| 311 | if (len == 0) { | ||
| 312 | WARN_ON(1); | ||
| 313 | } | ||
| 314 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | ||
| 315 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | ||
| 316 | btrfs_header_generation(eb)); | ||
| 317 | BUG_ON(ret); | ||
| 318 | found_start = btrfs_header_bytenr(eb); | ||
| 319 | if (found_start != start) { | ||
| 320 | printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", | ||
| 321 | start, found_start, len); | ||
| 322 | WARN_ON(1); | ||
| 323 | goto err; | ||
| 324 | } | ||
| 325 | if (eb->first_page != page) { | ||
| 326 | printk("bad first page %lu %lu\n", eb->first_page->index, | ||
| 327 | page->index); | ||
| 328 | WARN_ON(1); | ||
| 329 | goto err; | ||
| 330 | } | ||
| 331 | if (!PageUptodate(page)) { | ||
| 332 | printk("csum not up to date page %lu\n", page->index); | ||
| 333 | WARN_ON(1); | ||
| 334 | goto err; | ||
| 335 | } | ||
| 336 | found_level = btrfs_header_level(eb); | ||
| 337 | |||
| 338 | csum_tree_block(root, eb, 0); | ||
| 339 | err: | ||
| 340 | free_extent_buffer(eb); | ||
| 341 | out: | ||
| 342 | return 0; | ||
| 343 | } | ||
| 344 | |||
| 345 | int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | ||
| 346 | struct extent_state *state) | ||
| 347 | { | ||
| 348 | struct extent_io_tree *tree; | ||
| 349 | u64 found_start; | ||
| 350 | int found_level; | ||
| 351 | unsigned long len; | ||
| 352 | struct extent_buffer *eb; | ||
| 353 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
| 354 | int ret = 0; | ||
| 355 | |||
| 356 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 357 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
| 358 | goto out; | ||
| 359 | if (!page->private) | ||
| 360 | goto out; | ||
| 361 | len = page->private >> 2; | ||
| 362 | if (len == 0) { | ||
| 363 | WARN_ON(1); | ||
| 364 | } | ||
| 365 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | ||
| 366 | |||
| 367 | found_start = btrfs_header_bytenr(eb); | ||
| 368 | if (found_start != start) { | ||
| 369 | printk("bad tree block start %llu %llu\n", | ||
| 370 | (unsigned long long)found_start, | ||
| 371 | (unsigned long long)eb->start); | ||
| 372 | ret = -EIO; | ||
| 373 | goto err; | ||
| 374 | } | ||
| 375 | if (eb->first_page != page) { | ||
| 376 | printk("bad first page %lu %lu\n", eb->first_page->index, | ||
| 377 | page->index); | ||
| 378 | WARN_ON(1); | ||
| 379 | ret = -EIO; | ||
| 380 | goto err; | ||
| 381 | } | ||
| 382 | if (memcmp_extent_buffer(eb, root->fs_info->fsid, | ||
| 383 | (unsigned long)btrfs_header_fsid(eb), | ||
| 384 | BTRFS_FSID_SIZE)) { | ||
| 385 | printk("bad fsid on block %Lu\n", eb->start); | ||
| 386 | ret = -EIO; | ||
| 387 | goto err; | ||
| 388 | } | ||
| 389 | found_level = btrfs_header_level(eb); | ||
| 390 | |||
| 391 | ret = csum_tree_block(root, eb, 1); | ||
| 392 | if (ret) | ||
| 393 | ret = -EIO; | ||
| 394 | |||
| 395 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | ||
| 396 | end = eb->start + end - 1; | ||
| 397 | err: | ||
| 398 | free_extent_buffer(eb); | ||
| 399 | out: | ||
| 400 | return ret; | ||
| 401 | } | ||
| 402 | |||
| 403 | static void end_workqueue_bio(struct bio *bio, int err) | ||
| 404 | { | ||
| 405 | struct end_io_wq *end_io_wq = bio->bi_private; | ||
| 406 | struct btrfs_fs_info *fs_info; | ||
| 407 | |||
| 408 | fs_info = end_io_wq->info; | ||
| 409 | end_io_wq->error = err; | ||
| 410 | end_io_wq->work.func = end_workqueue_fn; | ||
| 411 | end_io_wq->work.flags = 0; | ||
| 412 | if (bio->bi_rw & (1 << BIO_RW)) | ||
| 413 | btrfs_queue_worker(&fs_info->endio_write_workers, | ||
| 414 | &end_io_wq->work); | ||
| 415 | else | ||
| 416 | btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); | ||
| 417 | } | ||
| 418 | |||
| 419 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | ||
| 420 | int metadata) | ||
| 421 | { | ||
| 422 | struct end_io_wq *end_io_wq; | ||
| 423 | end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); | ||
| 424 | if (!end_io_wq) | ||
| 425 | return -ENOMEM; | ||
| 426 | |||
| 427 | end_io_wq->private = bio->bi_private; | ||
| 428 | end_io_wq->end_io = bio->bi_end_io; | ||
| 429 | end_io_wq->info = info; | ||
| 430 | end_io_wq->error = 0; | ||
| 431 | end_io_wq->bio = bio; | ||
| 432 | end_io_wq->metadata = metadata; | ||
| 433 | |||
| 434 | bio->bi_private = end_io_wq; | ||
| 435 | bio->bi_end_io = end_workqueue_bio; | ||
| 436 | return 0; | ||
| 437 | } | ||
| 438 | |||
| 439 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | ||
| 440 | { | ||
| 441 | unsigned long limit = min_t(unsigned long, | ||
| 442 | info->workers.max_workers, | ||
| 443 | info->fs_devices->open_devices); | ||
| 444 | return 256 * limit; | ||
| 445 | } | ||
| 446 | |||
| 447 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) | ||
| 448 | { | ||
| 449 | return atomic_read(&info->nr_async_bios) > | ||
| 450 | btrfs_async_submit_limit(info); | ||
| 451 | } | ||
| 452 | |||
| 453 | static void run_one_async_submit(struct btrfs_work *work) | ||
| 454 | { | ||
| 455 | struct btrfs_fs_info *fs_info; | ||
| 456 | struct async_submit_bio *async; | ||
| 457 | int limit; | ||
| 458 | |||
| 459 | async = container_of(work, struct async_submit_bio, work); | ||
| 460 | fs_info = BTRFS_I(async->inode)->root->fs_info; | ||
| 461 | |||
| 462 | limit = btrfs_async_submit_limit(fs_info); | ||
| 463 | limit = limit * 2 / 3; | ||
| 464 | |||
| 465 | atomic_dec(&fs_info->nr_async_submits); | ||
| 466 | |||
| 467 | if (atomic_read(&fs_info->nr_async_submits) < limit && | ||
| 468 | waitqueue_active(&fs_info->async_submit_wait)) | ||
| 469 | wake_up(&fs_info->async_submit_wait); | ||
| 470 | |||
| 471 | async->submit_bio_hook(async->inode, async->rw, async->bio, | ||
| 472 | async->mirror_num); | ||
| 473 | kfree(async); | ||
| 474 | } | ||
| 475 | |||
| 476 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | ||
| 477 | int rw, struct bio *bio, int mirror_num, | ||
| 478 | extent_submit_bio_hook_t *submit_bio_hook) | ||
| 479 | { | ||
| 480 | struct async_submit_bio *async; | ||
| 481 | int limit = btrfs_async_submit_limit(fs_info); | ||
| 482 | |||
| 483 | async = kmalloc(sizeof(*async), GFP_NOFS); | ||
| 484 | if (!async) | ||
| 485 | return -ENOMEM; | ||
| 486 | |||
| 487 | async->inode = inode; | ||
| 488 | async->rw = rw; | ||
| 489 | async->bio = bio; | ||
| 490 | async->mirror_num = mirror_num; | ||
| 491 | async->submit_bio_hook = submit_bio_hook; | ||
| 492 | async->work.func = run_one_async_submit; | ||
| 493 | async->work.flags = 0; | ||
| 494 | |||
| 495 | while(atomic_read(&fs_info->async_submit_draining) && | ||
| 496 | atomic_read(&fs_info->nr_async_submits)) { | ||
| 497 | wait_event(fs_info->async_submit_wait, | ||
| 498 | (atomic_read(&fs_info->nr_async_submits) == 0)); | ||
| 499 | } | ||
| 500 | |||
| 501 | atomic_inc(&fs_info->nr_async_submits); | ||
| 502 | btrfs_queue_worker(&fs_info->workers, &async->work); | ||
| 503 | |||
| 504 | if (atomic_read(&fs_info->nr_async_submits) > limit) { | ||
| 505 | wait_event_timeout(fs_info->async_submit_wait, | ||
| 506 | (atomic_read(&fs_info->nr_async_submits) < limit), | ||
| 507 | HZ/10); | ||
| 508 | |||
| 509 | wait_event_timeout(fs_info->async_submit_wait, | ||
| 510 | (atomic_read(&fs_info->nr_async_bios) < limit), | ||
| 511 | HZ/10); | ||
| 512 | } | ||
| 513 | return 0; | ||
| 514 | } | ||
| 515 | |||
| 516 | static int btree_csum_one_bio(struct bio *bio) | ||
| 517 | { | ||
| 518 | struct bio_vec *bvec = bio->bi_io_vec; | ||
| 519 | int bio_index = 0; | ||
| 520 | struct btrfs_root *root; | ||
| 521 | |||
| 522 | WARN_ON(bio->bi_vcnt <= 0); | ||
| 523 | while(bio_index < bio->bi_vcnt) { | ||
| 524 | root = BTRFS_I(bvec->bv_page->mapping->host)->root; | ||
| 525 | csum_dirty_buffer(root, bvec->bv_page); | ||
| 526 | bio_index++; | ||
| 527 | bvec++; | ||
| 528 | } | ||
| 529 | return 0; | ||
| 530 | } | ||
| 531 | |||
| 532 | static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | ||
| 533 | int mirror_num) | ||
| 534 | { | ||
| 535 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 536 | int ret; | ||
| 537 | |||
| 538 | /* | ||
| 539 | * when we're called for a write, we're already in the async | ||
| 540 | * submission context. Just jump into btrfs_map_bio | ||
| 541 | */ | ||
| 542 | if (rw & (1 << BIO_RW)) { | ||
| 543 | btree_csum_one_bio(bio); | ||
| 544 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | ||
| 545 | mirror_num, 1); | ||
| 546 | } | ||
| 547 | |||
| 548 | /* | ||
| 549 | * called for a read, do the setup so that checksum validation | ||
| 550 | * can happen in the async kernel threads | ||
| 551 | */ | ||
| 552 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); | ||
| 553 | BUG_ON(ret); | ||
| 554 | |||
| 555 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | ||
| 556 | } | ||
| 557 | |||
| 558 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | ||
| 559 | int mirror_num) | ||
| 560 | { | ||
| 561 | /* | ||
| 562 | * kthread helpers are used to submit writes so that checksumming | ||
| 563 | * can happen in parallel across all CPUs | ||
| 564 | */ | ||
| 565 | if (!(rw & (1 << BIO_RW))) { | ||
| 566 | return __btree_submit_bio_hook(inode, rw, bio, mirror_num); | ||
| 567 | } | ||
| 568 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
| 569 | inode, rw, bio, mirror_num, | ||
| 570 | __btree_submit_bio_hook); | ||
| 571 | } | ||
| 572 | |||
| 573 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
| 574 | { | ||
| 575 | struct extent_io_tree *tree; | ||
| 576 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 577 | |||
| 578 | if (current->flags & PF_MEMALLOC) { | ||
| 579 | redirty_page_for_writepage(wbc, page); | ||
| 580 | unlock_page(page); | ||
| 581 | return 0; | ||
| 582 | } | ||
| 583 | return extent_write_full_page(tree, page, btree_get_extent, wbc); | ||
| 584 | } | ||
| 585 | |||
| 586 | static int btree_writepages(struct address_space *mapping, | ||
| 587 | struct writeback_control *wbc) | ||
| 588 | { | ||
| 589 | struct extent_io_tree *tree; | ||
| 590 | tree = &BTRFS_I(mapping->host)->io_tree; | ||
| 591 | if (wbc->sync_mode == WB_SYNC_NONE) { | ||
| 592 | u64 num_dirty; | ||
| 593 | u64 start = 0; | ||
| 594 | unsigned long thresh = 32 * 1024 * 1024; | ||
| 595 | |||
| 596 | if (wbc->for_kupdate) | ||
| 597 | return 0; | ||
| 598 | |||
| 599 | num_dirty = count_range_bits(tree, &start, (u64)-1, | ||
| 600 | thresh, EXTENT_DIRTY); | ||
| 601 | if (num_dirty < thresh) { | ||
| 602 | return 0; | ||
| 603 | } | ||
| 604 | } | ||
| 605 | return extent_writepages(tree, mapping, btree_get_extent, wbc); | ||
| 606 | } | ||
| 607 | |||
| 608 | int btree_readpage(struct file *file, struct page *page) | ||
| 609 | { | ||
| 610 | struct extent_io_tree *tree; | ||
| 611 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 612 | return extent_read_full_page(tree, page, btree_get_extent); | ||
| 613 | } | ||
| 614 | |||
| 615 | static int btree_releasepage(struct page *page, gfp_t gfp_flags) | ||
| 616 | { | ||
| 617 | struct extent_io_tree *tree; | ||
| 618 | struct extent_map_tree *map; | ||
| 619 | int ret; | ||
| 620 | |||
| 621 | if (PageWriteback(page) || PageDirty(page)) | ||
| 622 | return 0; | ||
| 623 | |||
| 624 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 625 | map = &BTRFS_I(page->mapping->host)->extent_tree; | ||
| 626 | |||
| 627 | ret = try_release_extent_state(map, tree, page, gfp_flags); | ||
| 628 | if (!ret) { | ||
| 629 | return 0; | ||
| 630 | } | ||
| 631 | |||
| 632 | ret = try_release_extent_buffer(tree, page); | ||
| 633 | if (ret == 1) { | ||
| 634 | ClearPagePrivate(page); | ||
| 635 | set_page_private(page, 0); | ||
| 636 | page_cache_release(page); | ||
| 637 | } | ||
| 638 | |||
| 639 | return ret; | ||
| 640 | } | ||
| 641 | |||
| 642 | static void btree_invalidatepage(struct page *page, unsigned long offset) | ||
| 643 | { | ||
| 644 | struct extent_io_tree *tree; | ||
| 645 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 646 | extent_invalidatepage(tree, page, offset); | ||
| 647 | btree_releasepage(page, GFP_NOFS); | ||
| 648 | if (PagePrivate(page)) { | ||
| 649 | printk("warning page private not zero on page %Lu\n", | ||
| 650 | page_offset(page)); | ||
| 651 | ClearPagePrivate(page); | ||
| 652 | set_page_private(page, 0); | ||
| 653 | page_cache_release(page); | ||
| 654 | } | ||
| 655 | } | ||
| 656 | |||
| 657 | #if 0 | ||
| 658 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
| 659 | { | ||
| 660 | struct buffer_head *bh; | ||
| 661 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
| 662 | struct buffer_head *head; | ||
| 663 | if (!page_has_buffers(page)) { | ||
| 664 | create_empty_buffers(page, root->fs_info->sb->s_blocksize, | ||
| 665 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
| 666 | } | ||
| 667 | head = page_buffers(page); | ||
| 668 | bh = head; | ||
| 669 | do { | ||
| 670 | if (buffer_dirty(bh)) | ||
| 671 | csum_tree_block(root, bh, 0); | ||
| 672 | bh = bh->b_this_page; | ||
| 673 | } while (bh != head); | ||
| 674 | return block_write_full_page(page, btree_get_block, wbc); | ||
| 675 | } | ||
| 676 | #endif | ||
| 677 | |||
| 678 | static struct address_space_operations btree_aops = { | ||
| 679 | .readpage = btree_readpage, | ||
| 680 | .writepage = btree_writepage, | ||
| 681 | .writepages = btree_writepages, | ||
| 682 | .releasepage = btree_releasepage, | ||
| 683 | .invalidatepage = btree_invalidatepage, | ||
| 684 | .sync_page = block_sync_page, | ||
| 685 | }; | ||
| 686 | |||
| 687 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | ||
| 688 | u64 parent_transid) | ||
| 689 | { | ||
| 690 | struct extent_buffer *buf = NULL; | ||
| 691 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 692 | int ret = 0; | ||
| 693 | |||
| 694 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 695 | if (!buf) | ||
| 696 | return 0; | ||
| 697 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, | ||
| 698 | buf, 0, 0, btree_get_extent, 0); | ||
| 699 | free_extent_buffer(buf); | ||
| 700 | return ret; | ||
| 701 | } | ||
| 702 | |||
| 703 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | ||
| 704 | u64 bytenr, u32 blocksize) | ||
| 705 | { | ||
| 706 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 707 | struct extent_buffer *eb; | ||
| 708 | eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, | ||
| 709 | bytenr, blocksize, GFP_NOFS); | ||
| 710 | return eb; | ||
| 711 | } | ||
| 712 | |||
| 713 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | ||
| 714 | u64 bytenr, u32 blocksize) | ||
| 715 | { | ||
| 716 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 717 | struct extent_buffer *eb; | ||
| 718 | |||
| 719 | eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, | ||
| 720 | bytenr, blocksize, NULL, GFP_NOFS); | ||
| 721 | return eb; | ||
| 722 | } | ||
| 723 | |||
| 724 | |||
| 725 | int btrfs_write_tree_block(struct extent_buffer *buf) | ||
| 726 | { | ||
| 727 | return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, | ||
| 728 | buf->start + buf->len - 1, WB_SYNC_ALL); | ||
| 729 | } | ||
| 730 | |||
| 731 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | ||
| 732 | { | ||
| 733 | return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, | ||
| 734 | buf->start, buf->start + buf->len -1); | ||
| 735 | } | ||
| 736 | |||
| 737 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | ||
| 738 | u32 blocksize, u64 parent_transid) | ||
| 739 | { | ||
| 740 | struct extent_buffer *buf = NULL; | ||
| 741 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 742 | struct extent_io_tree *io_tree; | ||
| 743 | int ret; | ||
| 744 | |||
| 745 | io_tree = &BTRFS_I(btree_inode)->io_tree; | ||
| 746 | |||
| 747 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 748 | if (!buf) | ||
| 749 | return NULL; | ||
| 750 | |||
| 751 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | ||
| 752 | |||
| 753 | if (ret == 0) { | ||
| 754 | buf->flags |= EXTENT_UPTODATE; | ||
| 755 | } else { | ||
| 756 | WARN_ON(1); | ||
| 757 | } | ||
| 758 | return buf; | ||
| 759 | |||
| 760 | } | ||
| 761 | |||
| 762 | int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 763 | struct extent_buffer *buf) | ||
| 764 | { | ||
| 765 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 766 | if (btrfs_header_generation(buf) == | ||
| 767 | root->fs_info->running_transaction->transid) { | ||
| 768 | WARN_ON(!btrfs_tree_locked(buf)); | ||
| 769 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | ||
| 770 | buf); | ||
| 771 | } | ||
| 772 | return 0; | ||
| 773 | } | ||
| 774 | |||
| 775 | static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | ||
| 776 | u32 stripesize, struct btrfs_root *root, | ||
| 777 | struct btrfs_fs_info *fs_info, | ||
| 778 | u64 objectid) | ||
| 779 | { | ||
| 780 | root->node = NULL; | ||
| 781 | root->inode = NULL; | ||
| 782 | root->commit_root = NULL; | ||
| 783 | root->ref_tree = NULL; | ||
| 784 | root->sectorsize = sectorsize; | ||
| 785 | root->nodesize = nodesize; | ||
| 786 | root->leafsize = leafsize; | ||
| 787 | root->stripesize = stripesize; | ||
| 788 | root->ref_cows = 0; | ||
| 789 | root->track_dirty = 0; | ||
| 790 | |||
| 791 | root->fs_info = fs_info; | ||
| 792 | root->objectid = objectid; | ||
| 793 | root->last_trans = 0; | ||
| 794 | root->highest_inode = 0; | ||
| 795 | root->last_inode_alloc = 0; | ||
| 796 | root->name = NULL; | ||
| 797 | root->in_sysfs = 0; | ||
| 798 | |||
| 799 | INIT_LIST_HEAD(&root->dirty_list); | ||
| 800 | INIT_LIST_HEAD(&root->orphan_list); | ||
| 801 | INIT_LIST_HEAD(&root->dead_list); | ||
| 802 | spin_lock_init(&root->node_lock); | ||
| 803 | spin_lock_init(&root->list_lock); | ||
| 804 | mutex_init(&root->objectid_mutex); | ||
| 805 | mutex_init(&root->log_mutex); | ||
| 806 | extent_io_tree_init(&root->dirty_log_pages, | ||
| 807 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 808 | |||
| 809 | btrfs_leaf_ref_tree_init(&root->ref_tree_struct); | ||
| 810 | root->ref_tree = &root->ref_tree_struct; | ||
| 811 | |||
| 812 | memset(&root->root_key, 0, sizeof(root->root_key)); | ||
| 813 | memset(&root->root_item, 0, sizeof(root->root_item)); | ||
| 814 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); | ||
| 815 | memset(&root->root_kobj, 0, sizeof(root->root_kobj)); | ||
| 816 | root->defrag_trans_start = fs_info->generation; | ||
| 817 | init_completion(&root->kobj_unregister); | ||
| 818 | root->defrag_running = 0; | ||
| 819 | root->defrag_level = 0; | ||
| 820 | root->root_key.objectid = objectid; | ||
| 821 | return 0; | ||
| 822 | } | ||
| 823 | |||
| 824 | static int find_and_setup_root(struct btrfs_root *tree_root, | ||
| 825 | struct btrfs_fs_info *fs_info, | ||
| 826 | u64 objectid, | ||
| 827 | struct btrfs_root *root) | ||
| 828 | { | ||
| 829 | int ret; | ||
| 830 | u32 blocksize; | ||
| 831 | |||
| 832 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
| 833 | tree_root->sectorsize, tree_root->stripesize, | ||
| 834 | root, fs_info, objectid); | ||
| 835 | ret = btrfs_find_last_root(tree_root, objectid, | ||
| 836 | &root->root_item, &root->root_key); | ||
| 837 | BUG_ON(ret); | ||
| 838 | |||
| 839 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | ||
| 840 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | ||
| 841 | blocksize, 0); | ||
| 842 | BUG_ON(!root->node); | ||
| 843 | return 0; | ||
| 844 | } | ||
| 845 | |||
| 846 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 847 | struct btrfs_fs_info *fs_info) | ||
| 848 | { | ||
| 849 | struct extent_buffer *eb; | ||
| 850 | struct btrfs_root *log_root_tree = fs_info->log_root_tree; | ||
| 851 | u64 start = 0; | ||
| 852 | u64 end = 0; | ||
| 853 | int ret; | ||
| 854 | |||
| 855 | if (!log_root_tree) | ||
| 856 | return 0; | ||
| 857 | |||
| 858 | while(1) { | ||
| 859 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | ||
| 860 | 0, &start, &end, EXTENT_DIRTY); | ||
| 861 | if (ret) | ||
| 862 | break; | ||
| 863 | |||
| 864 | clear_extent_dirty(&log_root_tree->dirty_log_pages, | ||
| 865 | start, end, GFP_NOFS); | ||
| 866 | } | ||
| 867 | eb = fs_info->log_root_tree->node; | ||
| 868 | |||
| 869 | WARN_ON(btrfs_header_level(eb) != 0); | ||
| 870 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
| 871 | |||
| 872 | ret = btrfs_free_reserved_extent(fs_info->tree_root, | ||
| 873 | eb->start, eb->len); | ||
| 874 | BUG_ON(ret); | ||
| 875 | |||
| 876 | free_extent_buffer(eb); | ||
| 877 | kfree(fs_info->log_root_tree); | ||
| 878 | fs_info->log_root_tree = NULL; | ||
| 879 | return 0; | ||
| 880 | } | ||
| 881 | |||
| 882 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 883 | struct btrfs_fs_info *fs_info) | ||
| 884 | { | ||
| 885 | struct btrfs_root *root; | ||
| 886 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
| 887 | |||
| 888 | root = kzalloc(sizeof(*root), GFP_NOFS); | ||
| 889 | if (!root) | ||
| 890 | return -ENOMEM; | ||
| 891 | |||
| 892 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
| 893 | tree_root->sectorsize, tree_root->stripesize, | ||
| 894 | root, fs_info, BTRFS_TREE_LOG_OBJECTID); | ||
| 895 | |||
| 896 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; | ||
| 897 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 898 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; | ||
| 899 | root->ref_cows = 0; | ||
| 900 | |||
| 901 | root->node = btrfs_alloc_free_block(trans, root, root->leafsize, | ||
| 902 | 0, BTRFS_TREE_LOG_OBJECTID, | ||
| 903 | trans->transid, 0, 0, 0); | ||
| 904 | |||
| 905 | btrfs_set_header_nritems(root->node, 0); | ||
| 906 | btrfs_set_header_level(root->node, 0); | ||
| 907 | btrfs_set_header_bytenr(root->node, root->node->start); | ||
| 908 | btrfs_set_header_generation(root->node, trans->transid); | ||
| 909 | btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); | ||
| 910 | |||
| 911 | write_extent_buffer(root->node, root->fs_info->fsid, | ||
| 912 | (unsigned long)btrfs_header_fsid(root->node), | ||
| 913 | BTRFS_FSID_SIZE); | ||
| 914 | btrfs_mark_buffer_dirty(root->node); | ||
| 915 | btrfs_tree_unlock(root->node); | ||
| 916 | fs_info->log_root_tree = root; | ||
| 917 | return 0; | ||
| 918 | } | ||
| 919 | |||
| 920 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | ||
| 921 | struct btrfs_key *location) | ||
| 922 | { | ||
| 923 | struct btrfs_root *root; | ||
| 924 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | ||
| 925 | struct btrfs_path *path; | ||
| 926 | struct extent_buffer *l; | ||
| 927 | u64 highest_inode; | ||
| 928 | u32 blocksize; | ||
| 929 | int ret = 0; | ||
| 930 | |||
| 931 | root = kzalloc(sizeof(*root), GFP_NOFS); | ||
| 932 | if (!root) | ||
| 933 | return ERR_PTR(-ENOMEM); | ||
| 934 | if (location->offset == (u64)-1) { | ||
| 935 | ret = find_and_setup_root(tree_root, fs_info, | ||
| 936 | location->objectid, root); | ||
| 937 | if (ret) { | ||
| 938 | kfree(root); | ||
| 939 | return ERR_PTR(ret); | ||
| 940 | } | ||
| 941 | goto insert; | ||
| 942 | } | ||
| 943 | |||
| 944 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
| 945 | tree_root->sectorsize, tree_root->stripesize, | ||
| 946 | root, fs_info, location->objectid); | ||
| 947 | |||
| 948 | path = btrfs_alloc_path(); | ||
| 949 | BUG_ON(!path); | ||
| 950 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | ||
| 951 | if (ret != 0) { | ||
| 952 | if (ret > 0) | ||
| 953 | ret = -ENOENT; | ||
| 954 | goto out; | ||
| 955 | } | ||
| 956 | l = path->nodes[0]; | ||
| 957 | read_extent_buffer(l, &root->root_item, | ||
| 958 | btrfs_item_ptr_offset(l, path->slots[0]), | ||
| 959 | sizeof(root->root_item)); | ||
| 960 | memcpy(&root->root_key, location, sizeof(*location)); | ||
| 961 | ret = 0; | ||
| 962 | out: | ||
| 963 | btrfs_release_path(root, path); | ||
| 964 | btrfs_free_path(path); | ||
| 965 | if (ret) { | ||
| 966 | kfree(root); | ||
| 967 | return ERR_PTR(ret); | ||
| 968 | } | ||
| 969 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | ||
| 970 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | ||
| 971 | blocksize, 0); | ||
| 972 | BUG_ON(!root->node); | ||
| 973 | insert: | ||
| 974 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 975 | root->ref_cows = 1; | ||
| 976 | ret = btrfs_find_highest_inode(root, &highest_inode); | ||
| 977 | if (ret == 0) { | ||
| 978 | root->highest_inode = highest_inode; | ||
| 979 | root->last_inode_alloc = highest_inode; | ||
| 980 | } | ||
| 981 | } | ||
| 982 | return root; | ||
| 983 | } | ||
| 984 | |||
| 985 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
| 986 | u64 root_objectid) | ||
| 987 | { | ||
| 988 | struct btrfs_root *root; | ||
| 989 | |||
| 990 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID) | ||
| 991 | return fs_info->tree_root; | ||
| 992 | if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) | ||
| 993 | return fs_info->extent_root; | ||
| 994 | |||
| 995 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
| 996 | (unsigned long)root_objectid); | ||
| 997 | return root; | ||
| 998 | } | ||
| 999 | |||
| 1000 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | ||
| 1001 | struct btrfs_key *location) | ||
| 1002 | { | ||
| 1003 | struct btrfs_root *root; | ||
| 1004 | int ret; | ||
| 1005 | |||
| 1006 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | ||
| 1007 | return fs_info->tree_root; | ||
| 1008 | if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) | ||
| 1009 | return fs_info->extent_root; | ||
| 1010 | if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) | ||
| 1011 | return fs_info->chunk_root; | ||
| 1012 | if (location->objectid == BTRFS_DEV_TREE_OBJECTID) | ||
| 1013 | return fs_info->dev_root; | ||
| 1014 | |||
| 1015 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
| 1016 | (unsigned long)location->objectid); | ||
| 1017 | if (root) | ||
| 1018 | return root; | ||
| 1019 | |||
| 1020 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | ||
| 1021 | if (IS_ERR(root)) | ||
| 1022 | return root; | ||
| 1023 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | ||
| 1024 | (unsigned long)root->root_key.objectid, | ||
| 1025 | root); | ||
| 1026 | if (ret) { | ||
| 1027 | free_extent_buffer(root->node); | ||
| 1028 | kfree(root); | ||
| 1029 | return ERR_PTR(ret); | ||
| 1030 | } | ||
| 1031 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
| 1032 | root->root_key.objectid, root); | ||
| 1033 | BUG_ON(ret); | ||
| 1034 | |||
| 1035 | return root; | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | ||
| 1039 | struct btrfs_key *location, | ||
| 1040 | const char *name, int namelen) | ||
| 1041 | { | ||
| 1042 | struct btrfs_root *root; | ||
| 1043 | int ret; | ||
| 1044 | |||
| 1045 | root = btrfs_read_fs_root_no_name(fs_info, location); | ||
| 1046 | if (!root) | ||
| 1047 | return NULL; | ||
| 1048 | |||
| 1049 | if (root->in_sysfs) | ||
| 1050 | return root; | ||
| 1051 | |||
| 1052 | ret = btrfs_set_root_name(root, name, namelen); | ||
| 1053 | if (ret) { | ||
| 1054 | free_extent_buffer(root->node); | ||
| 1055 | kfree(root); | ||
| 1056 | return ERR_PTR(ret); | ||
| 1057 | } | ||
| 1058 | |||
| 1059 | ret = btrfs_sysfs_add_root(root); | ||
| 1060 | if (ret) { | ||
| 1061 | free_extent_buffer(root->node); | ||
| 1062 | kfree(root->name); | ||
| 1063 | kfree(root); | ||
| 1064 | return ERR_PTR(ret); | ||
| 1065 | } | ||
| 1066 | root->in_sysfs = 1; | ||
| 1067 | return root; | ||
| 1068 | } | ||
| 1069 | #if 0 | ||
| 1070 | static int add_hasher(struct btrfs_fs_info *info, char *type) { | ||
| 1071 | struct btrfs_hasher *hasher; | ||
| 1072 | |||
| 1073 | hasher = kmalloc(sizeof(*hasher), GFP_NOFS); | ||
| 1074 | if (!hasher) | ||
| 1075 | return -ENOMEM; | ||
| 1076 | hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC); | ||
| 1077 | if (!hasher->hash_tfm) { | ||
| 1078 | kfree(hasher); | ||
| 1079 | return -EINVAL; | ||
| 1080 | } | ||
| 1081 | spin_lock(&info->hash_lock); | ||
| 1082 | list_add(&hasher->list, &info->hashers); | ||
| 1083 | spin_unlock(&info->hash_lock); | ||
| 1084 | return 0; | ||
| 1085 | } | ||
| 1086 | #endif | ||
| 1087 | |||
| 1088 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) | ||
| 1089 | { | ||
| 1090 | struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; | ||
| 1091 | int ret = 0; | ||
| 1092 | struct list_head *cur; | ||
| 1093 | struct btrfs_device *device; | ||
| 1094 | struct backing_dev_info *bdi; | ||
| 1095 | |||
| 1096 | if ((bdi_bits & (1 << BDI_write_congested)) && | ||
| 1097 | btrfs_congested_async(info, 0)) | ||
| 1098 | return 1; | ||
| 1099 | |||
| 1100 | list_for_each(cur, &info->fs_devices->devices) { | ||
| 1101 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 1102 | if (!device->bdev) | ||
| 1103 | continue; | ||
| 1104 | bdi = blk_get_backing_dev_info(device->bdev); | ||
| 1105 | if (bdi && bdi_congested(bdi, bdi_bits)) { | ||
| 1106 | ret = 1; | ||
| 1107 | break; | ||
| 1108 | } | ||
| 1109 | } | ||
| 1110 | return ret; | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | /* | ||
| 1114 | * this unplugs every device on the box, and it is only used when page | ||
| 1115 | * is null | ||
| 1116 | */ | ||
| 1117 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
| 1118 | { | ||
| 1119 | struct list_head *cur; | ||
| 1120 | struct btrfs_device *device; | ||
| 1121 | struct btrfs_fs_info *info; | ||
| 1122 | |||
| 1123 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | ||
| 1124 | list_for_each(cur, &info->fs_devices->devices) { | ||
| 1125 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 1126 | bdi = blk_get_backing_dev_info(device->bdev); | ||
| 1127 | if (bdi->unplug_io_fn) { | ||
| 1128 | bdi->unplug_io_fn(bdi, page); | ||
| 1129 | } | ||
| 1130 | } | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
| 1134 | { | ||
| 1135 | struct inode *inode; | ||
| 1136 | struct extent_map_tree *em_tree; | ||
| 1137 | struct extent_map *em; | ||
| 1138 | struct address_space *mapping; | ||
| 1139 | u64 offset; | ||
| 1140 | |||
| 1141 | /* the generic O_DIRECT read code does this */ | ||
| 1142 | if (!page) { | ||
| 1143 | __unplug_io_fn(bdi, page); | ||
| 1144 | return; | ||
| 1145 | } | ||
| 1146 | |||
| 1147 | /* | ||
| 1148 | * page->mapping may change at any time. Get a consistent copy | ||
| 1149 | * and use that for everything below | ||
| 1150 | */ | ||
| 1151 | smp_mb(); | ||
| 1152 | mapping = page->mapping; | ||
| 1153 | if (!mapping) | ||
| 1154 | return; | ||
| 1155 | |||
| 1156 | inode = mapping->host; | ||
| 1157 | offset = page_offset(page); | ||
| 1158 | |||
| 1159 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 1160 | spin_lock(&em_tree->lock); | ||
| 1161 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
| 1162 | spin_unlock(&em_tree->lock); | ||
| 1163 | if (!em) { | ||
| 1164 | __unplug_io_fn(bdi, page); | ||
| 1165 | return; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
| 1169 | free_extent_map(em); | ||
| 1170 | __unplug_io_fn(bdi, page); | ||
| 1171 | return; | ||
| 1172 | } | ||
| 1173 | offset = offset - em->start; | ||
| 1174 | btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
| 1175 | em->block_start + offset, page); | ||
| 1176 | free_extent_map(em); | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | ||
| 1180 | { | ||
| 1181 | bdi_init(bdi); | ||
| 1182 | bdi->ra_pages = default_backing_dev_info.ra_pages; | ||
| 1183 | bdi->state = 0; | ||
| 1184 | bdi->capabilities = default_backing_dev_info.capabilities; | ||
| 1185 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | ||
| 1186 | bdi->unplug_io_data = info; | ||
| 1187 | bdi->congested_fn = btrfs_congested_fn; | ||
| 1188 | bdi->congested_data = info; | ||
| 1189 | return 0; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | static int bio_ready_for_csum(struct bio *bio) | ||
| 1193 | { | ||
| 1194 | u64 length = 0; | ||
| 1195 | u64 buf_len = 0; | ||
| 1196 | u64 start = 0; | ||
| 1197 | struct page *page; | ||
| 1198 | struct extent_io_tree *io_tree = NULL; | ||
| 1199 | struct btrfs_fs_info *info = NULL; | ||
| 1200 | struct bio_vec *bvec; | ||
| 1201 | int i; | ||
| 1202 | int ret; | ||
| 1203 | |||
| 1204 | bio_for_each_segment(bvec, bio, i) { | ||
| 1205 | page = bvec->bv_page; | ||
| 1206 | if (page->private == EXTENT_PAGE_PRIVATE) { | ||
| 1207 | length += bvec->bv_len; | ||
| 1208 | continue; | ||
| 1209 | } | ||
| 1210 | if (!page->private) { | ||
| 1211 | length += bvec->bv_len; | ||
| 1212 | continue; | ||
| 1213 | } | ||
| 1214 | length = bvec->bv_len; | ||
| 1215 | buf_len = page->private >> 2; | ||
| 1216 | start = page_offset(page) + bvec->bv_offset; | ||
| 1217 | io_tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 1218 | info = BTRFS_I(page->mapping->host)->root->fs_info; | ||
| 1219 | } | ||
| 1220 | /* are we fully contained in this bio? */ | ||
| 1221 | if (buf_len <= length) | ||
| 1222 | return 1; | ||
| 1223 | |||
| 1224 | ret = extent_range_uptodate(io_tree, start + length, | ||
| 1225 | start + buf_len - 1); | ||
| 1226 | if (ret == 1) | ||
| 1227 | return ret; | ||
| 1228 | return ret; | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | /* | ||
| 1232 | * called by the kthread helper functions to finally call the bio end_io | ||
| 1233 | * functions. This is where read checksum verification actually happens | ||
| 1234 | */ | ||
| 1235 | static void end_workqueue_fn(struct btrfs_work *work) | ||
| 1236 | { | ||
| 1237 | struct bio *bio; | ||
| 1238 | struct end_io_wq *end_io_wq; | ||
| 1239 | struct btrfs_fs_info *fs_info; | ||
| 1240 | int error; | ||
| 1241 | |||
| 1242 | end_io_wq = container_of(work, struct end_io_wq, work); | ||
| 1243 | bio = end_io_wq->bio; | ||
| 1244 | fs_info = end_io_wq->info; | ||
| 1245 | |||
| 1246 | /* metadata bios are special because the whole tree block must | ||
| 1247 | * be checksummed at once. This makes sure the entire block is in | ||
| 1248 | * ram and up to date before trying to verify things. For | ||
| 1249 | * blocksize <= pagesize, it is basically a noop | ||
| 1250 | */ | ||
| 1251 | if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { | ||
| 1252 | btrfs_queue_worker(&fs_info->endio_workers, | ||
| 1253 | &end_io_wq->work); | ||
| 1254 | return; | ||
| 1255 | } | ||
| 1256 | error = end_io_wq->error; | ||
| 1257 | bio->bi_private = end_io_wq->private; | ||
| 1258 | bio->bi_end_io = end_io_wq->end_io; | ||
| 1259 | kfree(end_io_wq); | ||
| 1260 | bio_endio(bio, error); | ||
| 1261 | } | ||
| 1262 | |||
| 1263 | static int cleaner_kthread(void *arg) | ||
| 1264 | { | ||
| 1265 | struct btrfs_root *root = arg; | ||
| 1266 | |||
| 1267 | do { | ||
| 1268 | smp_mb(); | ||
| 1269 | if (root->fs_info->closing) | ||
| 1270 | break; | ||
| 1271 | |||
| 1272 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | ||
| 1273 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
| 1274 | btrfs_clean_old_snapshots(root); | ||
| 1275 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 1276 | |||
| 1277 | if (freezing(current)) { | ||
| 1278 | refrigerator(); | ||
| 1279 | } else { | ||
| 1280 | smp_mb(); | ||
| 1281 | if (root->fs_info->closing) | ||
| 1282 | break; | ||
| 1283 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1284 | schedule(); | ||
| 1285 | __set_current_state(TASK_RUNNING); | ||
| 1286 | } | ||
| 1287 | } while (!kthread_should_stop()); | ||
| 1288 | return 0; | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | static int transaction_kthread(void *arg) | ||
| 1292 | { | ||
| 1293 | struct btrfs_root *root = arg; | ||
| 1294 | struct btrfs_trans_handle *trans; | ||
| 1295 | struct btrfs_transaction *cur; | ||
| 1296 | unsigned long now; | ||
| 1297 | unsigned long delay; | ||
| 1298 | int ret; | ||
| 1299 | |||
| 1300 | do { | ||
| 1301 | smp_mb(); | ||
| 1302 | if (root->fs_info->closing) | ||
| 1303 | break; | ||
| 1304 | |||
| 1305 | delay = HZ * 30; | ||
| 1306 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | ||
| 1307 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
| 1308 | |||
| 1309 | if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { | ||
| 1310 | printk("btrfs: total reference cache size %Lu\n", | ||
| 1311 | root->fs_info->total_ref_cache_size); | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 1315 | cur = root->fs_info->running_transaction; | ||
| 1316 | if (!cur) { | ||
| 1317 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1318 | goto sleep; | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | now = get_seconds(); | ||
| 1322 | if (now < cur->start_time || now - cur->start_time < 30) { | ||
| 1323 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1324 | delay = HZ * 5; | ||
| 1325 | goto sleep; | ||
| 1326 | } | ||
| 1327 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1328 | trans = btrfs_start_transaction(root, 1); | ||
| 1329 | ret = btrfs_commit_transaction(trans, root); | ||
| 1330 | sleep: | ||
| 1331 | wake_up_process(root->fs_info->cleaner_kthread); | ||
| 1332 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
| 1333 | |||
| 1334 | if (freezing(current)) { | ||
| 1335 | refrigerator(); | ||
| 1336 | } else { | ||
| 1337 | if (root->fs_info->closing) | ||
| 1338 | break; | ||
| 1339 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1340 | schedule_timeout(delay); | ||
| 1341 | __set_current_state(TASK_RUNNING); | ||
| 1342 | } | ||
| 1343 | } while (!kthread_should_stop()); | ||
| 1344 | return 0; | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | struct btrfs_root *open_ctree(struct super_block *sb, | ||
| 1348 | struct btrfs_fs_devices *fs_devices, | ||
| 1349 | char *options) | ||
| 1350 | { | ||
| 1351 | u32 sectorsize; | ||
| 1352 | u32 nodesize; | ||
| 1353 | u32 leafsize; | ||
| 1354 | u32 blocksize; | ||
| 1355 | u32 stripesize; | ||
| 1356 | struct buffer_head *bh; | ||
| 1357 | struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1358 | GFP_NOFS); | ||
| 1359 | struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1360 | GFP_NOFS); | ||
| 1361 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), | ||
| 1362 | GFP_NOFS); | ||
| 1363 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1364 | GFP_NOFS); | ||
| 1365 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1366 | GFP_NOFS); | ||
| 1367 | struct btrfs_root *log_tree_root; | ||
| 1368 | |||
| 1369 | int ret; | ||
| 1370 | int err = -EINVAL; | ||
| 1371 | |||
| 1372 | struct btrfs_super_block *disk_super; | ||
| 1373 | |||
| 1374 | if (!extent_root || !tree_root || !fs_info || | ||
| 1375 | !chunk_root || !dev_root) { | ||
| 1376 | err = -ENOMEM; | ||
| 1377 | goto fail; | ||
| 1378 | } | ||
| 1379 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); | ||
| 1380 | INIT_LIST_HEAD(&fs_info->trans_list); | ||
| 1381 | INIT_LIST_HEAD(&fs_info->dead_roots); | ||
| 1382 | INIT_LIST_HEAD(&fs_info->hashers); | ||
| 1383 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | ||
| 1384 | spin_lock_init(&fs_info->hash_lock); | ||
| 1385 | spin_lock_init(&fs_info->delalloc_lock); | ||
| 1386 | spin_lock_init(&fs_info->new_trans_lock); | ||
| 1387 | spin_lock_init(&fs_info->ref_cache_lock); | ||
| 1388 | |||
| 1389 | init_completion(&fs_info->kobj_unregister); | ||
| 1390 | fs_info->tree_root = tree_root; | ||
| 1391 | fs_info->extent_root = extent_root; | ||
| 1392 | fs_info->chunk_root = chunk_root; | ||
| 1393 | fs_info->dev_root = dev_root; | ||
| 1394 | fs_info->fs_devices = fs_devices; | ||
| 1395 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | ||
| 1396 | INIT_LIST_HEAD(&fs_info->space_info); | ||
| 1397 | btrfs_mapping_init(&fs_info->mapping_tree); | ||
| 1398 | atomic_set(&fs_info->nr_async_submits, 0); | ||
| 1399 | atomic_set(&fs_info->async_submit_draining, 0); | ||
| 1400 | atomic_set(&fs_info->nr_async_bios, 0); | ||
| 1401 | atomic_set(&fs_info->throttles, 0); | ||
| 1402 | atomic_set(&fs_info->throttle_gen, 0); | ||
| 1403 | fs_info->sb = sb; | ||
| 1404 | fs_info->max_extent = (u64)-1; | ||
| 1405 | fs_info->max_inline = 8192 * 1024; | ||
| 1406 | setup_bdi(fs_info, &fs_info->bdi); | ||
| 1407 | fs_info->btree_inode = new_inode(sb); | ||
| 1408 | fs_info->btree_inode->i_ino = 1; | ||
| 1409 | fs_info->btree_inode->i_nlink = 1; | ||
| 1410 | fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); | ||
| 1411 | |||
| 1412 | INIT_LIST_HEAD(&fs_info->ordered_extents); | ||
| 1413 | spin_lock_init(&fs_info->ordered_extent_lock); | ||
| 1414 | |||
| 1415 | sb->s_blocksize = 4096; | ||
| 1416 | sb->s_blocksize_bits = blksize_bits(4096); | ||
| 1417 | |||
| 1418 | /* | ||
| 1419 | * we set the i_size on the btree inode to the max possible int. | ||
| 1420 | * the real end of the address space is determined by all of | ||
| 1421 | * the devices in the system | ||
| 1422 | */ | ||
| 1423 | fs_info->btree_inode->i_size = OFFSET_MAX; | ||
| 1424 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; | ||
| 1425 | fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; | ||
| 1426 | |||
| 1427 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | ||
| 1428 | fs_info->btree_inode->i_mapping, | ||
| 1429 | GFP_NOFS); | ||
| 1430 | extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, | ||
| 1431 | GFP_NOFS); | ||
| 1432 | |||
| 1433 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | ||
| 1434 | |||
| 1435 | spin_lock_init(&fs_info->block_group_cache_lock); | ||
| 1436 | fs_info->block_group_cache_tree.rb_node = NULL; | ||
| 1437 | |||
| 1438 | extent_io_tree_init(&fs_info->pinned_extents, | ||
| 1439 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1440 | extent_io_tree_init(&fs_info->pending_del, | ||
| 1441 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1442 | extent_io_tree_init(&fs_info->extent_ins, | ||
| 1443 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1444 | fs_info->do_barriers = 1; | ||
| 1445 | |||
| 1446 | extent_io_tree_init(&fs_info->reloc_mapping_tree, | ||
| 1447 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1448 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | ||
| 1449 | btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); | ||
| 1450 | btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); | ||
| 1451 | |||
| 1452 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
| 1453 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
| 1454 | sizeof(struct btrfs_key)); | ||
| 1455 | insert_inode_hash(fs_info->btree_inode); | ||
| 1456 | |||
| 1457 | mutex_init(&fs_info->trans_mutex); | ||
| 1458 | mutex_init(&fs_info->tree_log_mutex); | ||
| 1459 | mutex_init(&fs_info->drop_mutex); | ||
| 1460 | mutex_init(&fs_info->alloc_mutex); | ||
| 1461 | mutex_init(&fs_info->chunk_mutex); | ||
| 1462 | mutex_init(&fs_info->transaction_kthread_mutex); | ||
| 1463 | mutex_init(&fs_info->cleaner_mutex); | ||
| 1464 | mutex_init(&fs_info->volume_mutex); | ||
| 1465 | mutex_init(&fs_info->tree_reloc_mutex); | ||
| 1466 | init_waitqueue_head(&fs_info->transaction_throttle); | ||
| 1467 | init_waitqueue_head(&fs_info->transaction_wait); | ||
| 1468 | init_waitqueue_head(&fs_info->async_submit_wait); | ||
| 1469 | init_waitqueue_head(&fs_info->tree_log_wait); | ||
| 1470 | atomic_set(&fs_info->tree_log_commit, 0); | ||
| 1471 | atomic_set(&fs_info->tree_log_writers, 0); | ||
| 1472 | fs_info->tree_log_transid = 0; | ||
| 1473 | |||
| 1474 | #if 0 | ||
| 1475 | ret = add_hasher(fs_info, "crc32c"); | ||
| 1476 | if (ret) { | ||
| 1477 | printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); | ||
| 1478 | err = -ENOMEM; | ||
| 1479 | goto fail_iput; | ||
| 1480 | } | ||
| 1481 | #endif | ||
| 1482 | __setup_root(4096, 4096, 4096, 4096, tree_root, | ||
| 1483 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | ||
| 1484 | |||
| 1485 | |||
| 1486 | bh = __bread(fs_devices->latest_bdev, | ||
| 1487 | BTRFS_SUPER_INFO_OFFSET / 4096, 4096); | ||
| 1488 | if (!bh) | ||
| 1489 | goto fail_iput; | ||
| 1490 | |||
| 1491 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | ||
| 1492 | brelse(bh); | ||
| 1493 | |||
| 1494 | memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); | ||
| 1495 | |||
| 1496 | disk_super = &fs_info->super_copy; | ||
| 1497 | if (!btrfs_super_root(disk_super)) | ||
| 1498 | goto fail_sb_buffer; | ||
| 1499 | |||
| 1500 | err = btrfs_parse_options(tree_root, options); | ||
| 1501 | if (err) | ||
| 1502 | goto fail_sb_buffer; | ||
| 1503 | |||
| 1504 | /* | ||
| 1505 | * we need to start all the end_io workers up front because the | ||
| 1506 | * queue work function gets called at interrupt time, and so it | ||
| 1507 | * cannot dynamically grow. | ||
| 1508 | */ | ||
| 1509 | btrfs_init_workers(&fs_info->workers, "worker", | ||
| 1510 | fs_info->thread_pool_size); | ||
| 1511 | btrfs_init_workers(&fs_info->submit_workers, "submit", | ||
| 1512 | min_t(u64, fs_devices->num_devices, | ||
| 1513 | fs_info->thread_pool_size)); | ||
| 1514 | |||
| 1515 | /* a higher idle thresh on the submit workers makes it much more | ||
| 1516 | * likely that bios will be send down in a sane order to the | ||
| 1517 | * devices | ||
| 1518 | */ | ||
| 1519 | fs_info->submit_workers.idle_thresh = 64; | ||
| 1520 | |||
| 1521 | /* fs_info->workers is responsible for checksumming file data | ||
| 1522 | * blocks and metadata. Using a larger idle thresh allows each | ||
| 1523 | * worker thread to operate on things in roughly the order they | ||
| 1524 | * were sent by the writeback daemons, improving overall locality | ||
| 1525 | * of the IO going down the pipe. | ||
| 1526 | */ | ||
| 1527 | fs_info->workers.idle_thresh = 128; | ||
| 1528 | |||
| 1529 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); | ||
| 1530 | btrfs_init_workers(&fs_info->endio_workers, "endio", | ||
| 1531 | fs_info->thread_pool_size); | ||
| 1532 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | ||
| 1533 | fs_info->thread_pool_size); | ||
| 1534 | |||
| 1535 | /* | ||
| 1536 | * endios are largely parallel and should have a very | ||
| 1537 | * low idle thresh | ||
| 1538 | */ | ||
| 1539 | fs_info->endio_workers.idle_thresh = 4; | ||
| 1540 | fs_info->endio_write_workers.idle_thresh = 64; | ||
| 1541 | |||
| 1542 | btrfs_start_workers(&fs_info->workers, 1); | ||
| 1543 | btrfs_start_workers(&fs_info->submit_workers, 1); | ||
| 1544 | btrfs_start_workers(&fs_info->fixup_workers, 1); | ||
| 1545 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | ||
| 1546 | btrfs_start_workers(&fs_info->endio_write_workers, | ||
| 1547 | fs_info->thread_pool_size); | ||
| 1548 | |||
| 1549 | err = -EINVAL; | ||
| 1550 | if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { | ||
| 1551 | printk("Btrfs: wanted %llu devices, but found %llu\n", | ||
| 1552 | (unsigned long long)btrfs_super_num_devices(disk_super), | ||
| 1553 | (unsigned long long)fs_devices->open_devices); | ||
| 1554 | if (btrfs_test_opt(tree_root, DEGRADED)) | ||
| 1555 | printk("continuing in degraded mode\n"); | ||
| 1556 | else { | ||
| 1557 | goto fail_sb_buffer; | ||
| 1558 | } | ||
| 1559 | } | ||
| 1560 | |||
| 1561 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | ||
| 1562 | |||
| 1563 | nodesize = btrfs_super_nodesize(disk_super); | ||
| 1564 | leafsize = btrfs_super_leafsize(disk_super); | ||
| 1565 | sectorsize = btrfs_super_sectorsize(disk_super); | ||
| 1566 | stripesize = btrfs_super_stripesize(disk_super); | ||
| 1567 | tree_root->nodesize = nodesize; | ||
| 1568 | tree_root->leafsize = leafsize; | ||
| 1569 | tree_root->sectorsize = sectorsize; | ||
| 1570 | tree_root->stripesize = stripesize; | ||
| 1571 | |||
| 1572 | sb->s_blocksize = sectorsize; | ||
| 1573 | sb->s_blocksize_bits = blksize_bits(sectorsize); | ||
| 1574 | |||
| 1575 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, | ||
| 1576 | sizeof(disk_super->magic))) { | ||
| 1577 | printk("btrfs: valid FS not found on %s\n", sb->s_id); | ||
| 1578 | goto fail_sb_buffer; | ||
| 1579 | } | ||
| 1580 | |||
| 1581 | mutex_lock(&fs_info->chunk_mutex); | ||
| 1582 | ret = btrfs_read_sys_array(tree_root); | ||
| 1583 | mutex_unlock(&fs_info->chunk_mutex); | ||
| 1584 | if (ret) { | ||
| 1585 | printk("btrfs: failed to read the system array on %s\n", | ||
| 1586 | sb->s_id); | ||
| 1587 | goto fail_sys_array; | ||
| 1588 | } | ||
| 1589 | |||
| 1590 | blocksize = btrfs_level_size(tree_root, | ||
| 1591 | btrfs_super_chunk_root_level(disk_super)); | ||
| 1592 | |||
| 1593 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | ||
| 1594 | chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); | ||
| 1595 | |||
| 1596 | chunk_root->node = read_tree_block(chunk_root, | ||
| 1597 | btrfs_super_chunk_root(disk_super), | ||
| 1598 | blocksize, 0); | ||
| 1599 | BUG_ON(!chunk_root->node); | ||
| 1600 | |||
| 1601 | read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, | ||
| 1602 | (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), | ||
| 1603 | BTRFS_UUID_SIZE); | ||
| 1604 | |||
| 1605 | mutex_lock(&fs_info->chunk_mutex); | ||
| 1606 | ret = btrfs_read_chunk_tree(chunk_root); | ||
| 1607 | mutex_unlock(&fs_info->chunk_mutex); | ||
| 1608 | BUG_ON(ret); | ||
| 1609 | |||
| 1610 | btrfs_close_extra_devices(fs_devices); | ||
| 1611 | |||
| 1612 | blocksize = btrfs_level_size(tree_root, | ||
| 1613 | btrfs_super_root_level(disk_super)); | ||
| 1614 | |||
| 1615 | |||
| 1616 | tree_root->node = read_tree_block(tree_root, | ||
| 1617 | btrfs_super_root(disk_super), | ||
| 1618 | blocksize, 0); | ||
| 1619 | if (!tree_root->node) | ||
| 1620 | goto fail_sb_buffer; | ||
| 1621 | |||
| 1622 | |||
| 1623 | ret = find_and_setup_root(tree_root, fs_info, | ||
| 1624 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | ||
| 1625 | if (ret) | ||
| 1626 | goto fail_tree_root; | ||
| 1627 | extent_root->track_dirty = 1; | ||
| 1628 | |||
| 1629 | ret = find_and_setup_root(tree_root, fs_info, | ||
| 1630 | BTRFS_DEV_TREE_OBJECTID, dev_root); | ||
| 1631 | dev_root->track_dirty = 1; | ||
| 1632 | |||
| 1633 | if (ret) | ||
| 1634 | goto fail_extent_root; | ||
| 1635 | |||
| 1636 | btrfs_read_block_groups(extent_root); | ||
| 1637 | |||
| 1638 | fs_info->generation = btrfs_super_generation(disk_super) + 1; | ||
| 1639 | fs_info->data_alloc_profile = (u64)-1; | ||
| 1640 | fs_info->metadata_alloc_profile = (u64)-1; | ||
| 1641 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
| 1642 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | ||
| 1643 | "btrfs-cleaner"); | ||
| 1644 | if (!fs_info->cleaner_kthread) | ||
| 1645 | goto fail_extent_root; | ||
| 1646 | |||
| 1647 | fs_info->transaction_kthread = kthread_run(transaction_kthread, | ||
| 1648 | tree_root, | ||
| 1649 | "btrfs-transaction"); | ||
| 1650 | if (!fs_info->transaction_kthread) | ||
| 1651 | goto fail_cleaner; | ||
| 1652 | |||
| 1653 | if (btrfs_super_log_root(disk_super) != 0) { | ||
| 1654 | u32 blocksize; | ||
| 1655 | u64 bytenr = btrfs_super_log_root(disk_super); | ||
| 1656 | |||
| 1657 | blocksize = | ||
| 1658 | btrfs_level_size(tree_root, | ||
| 1659 | btrfs_super_log_root_level(disk_super)); | ||
| 1660 | |||
| 1661 | log_tree_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1662 | GFP_NOFS); | ||
| 1663 | |||
| 1664 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | ||
| 1665 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); | ||
| 1666 | |||
| 1667 | log_tree_root->node = read_tree_block(tree_root, bytenr, | ||
| 1668 | blocksize, 0); | ||
| 1669 | ret = btrfs_recover_log_trees(log_tree_root); | ||
| 1670 | BUG_ON(ret); | ||
| 1671 | } | ||
| 1672 | |||
| 1673 | ret = btrfs_cleanup_reloc_trees(tree_root); | ||
| 1674 | BUG_ON(ret); | ||
| 1675 | |||
| 1676 | fs_info->last_trans_committed = btrfs_super_generation(disk_super); | ||
| 1677 | return tree_root; | ||
| 1678 | |||
| 1679 | fail_cleaner: | ||
| 1680 | kthread_stop(fs_info->cleaner_kthread); | ||
| 1681 | fail_extent_root: | ||
| 1682 | free_extent_buffer(extent_root->node); | ||
| 1683 | fail_tree_root: | ||
| 1684 | free_extent_buffer(tree_root->node); | ||
| 1685 | fail_sys_array: | ||
| 1686 | fail_sb_buffer: | ||
| 1687 | btrfs_stop_workers(&fs_info->fixup_workers); | ||
| 1688 | btrfs_stop_workers(&fs_info->workers); | ||
| 1689 | btrfs_stop_workers(&fs_info->endio_workers); | ||
| 1690 | btrfs_stop_workers(&fs_info->endio_write_workers); | ||
| 1691 | btrfs_stop_workers(&fs_info->submit_workers); | ||
| 1692 | fail_iput: | ||
| 1693 | iput(fs_info->btree_inode); | ||
| 1694 | fail: | ||
| 1695 | btrfs_close_devices(fs_info->fs_devices); | ||
| 1696 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
| 1697 | |||
| 1698 | kfree(extent_root); | ||
| 1699 | kfree(tree_root); | ||
| 1700 | bdi_destroy(&fs_info->bdi); | ||
| 1701 | kfree(fs_info); | ||
| 1702 | kfree(chunk_root); | ||
| 1703 | kfree(dev_root); | ||
| 1704 | return ERR_PTR(err); | ||
| 1705 | } | ||
| 1706 | |||
| 1707 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | ||
| 1708 | { | ||
| 1709 | char b[BDEVNAME_SIZE]; | ||
| 1710 | |||
| 1711 | if (uptodate) { | ||
| 1712 | set_buffer_uptodate(bh); | ||
| 1713 | } else { | ||
| 1714 | if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { | ||
| 1715 | printk(KERN_WARNING "lost page write due to " | ||
| 1716 | "I/O error on %s\n", | ||
| 1717 | bdevname(bh->b_bdev, b)); | ||
| 1718 | } | ||
| 1719 | /* note, we dont' set_buffer_write_io_error because we have | ||
| 1720 | * our own ways of dealing with the IO errors | ||
| 1721 | */ | ||
| 1722 | clear_buffer_uptodate(bh); | ||
| 1723 | } | ||
| 1724 | unlock_buffer(bh); | ||
| 1725 | put_bh(bh); | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | int write_all_supers(struct btrfs_root *root) | ||
| 1729 | { | ||
| 1730 | struct list_head *cur; | ||
| 1731 | struct list_head *head = &root->fs_info->fs_devices->devices; | ||
| 1732 | struct btrfs_device *dev; | ||
| 1733 | struct btrfs_super_block *sb; | ||
| 1734 | struct btrfs_dev_item *dev_item; | ||
| 1735 | struct buffer_head *bh; | ||
| 1736 | int ret; | ||
| 1737 | int do_barriers; | ||
| 1738 | int max_errors; | ||
| 1739 | int total_errors = 0; | ||
| 1740 | u32 crc; | ||
| 1741 | u64 flags; | ||
| 1742 | |||
| 1743 | max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | ||
| 1744 | do_barriers = !btrfs_test_opt(root, NOBARRIER); | ||
| 1745 | |||
| 1746 | sb = &root->fs_info->super_for_commit; | ||
| 1747 | dev_item = &sb->dev_item; | ||
| 1748 | list_for_each(cur, head) { | ||
| 1749 | dev = list_entry(cur, struct btrfs_device, dev_list); | ||
| 1750 | if (!dev->bdev) { | ||
| 1751 | total_errors++; | ||
| 1752 | continue; | ||
| 1753 | } | ||
| 1754 | if (!dev->in_fs_metadata) | ||
| 1755 | continue; | ||
| 1756 | |||
| 1757 | btrfs_set_stack_device_type(dev_item, dev->type); | ||
| 1758 | btrfs_set_stack_device_id(dev_item, dev->devid); | ||
| 1759 | btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); | ||
| 1760 | btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); | ||
| 1761 | btrfs_set_stack_device_io_align(dev_item, dev->io_align); | ||
| 1762 | btrfs_set_stack_device_io_width(dev_item, dev->io_width); | ||
| 1763 | btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); | ||
| 1764 | memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); | ||
| 1765 | flags = btrfs_super_flags(sb); | ||
| 1766 | btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); | ||
| 1767 | |||
| 1768 | |||
| 1769 | crc = ~(u32)0; | ||
| 1770 | crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc, | ||
| 1771 | BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); | ||
| 1772 | btrfs_csum_final(crc, sb->csum); | ||
| 1773 | |||
| 1774 | bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096, | ||
| 1775 | BTRFS_SUPER_INFO_SIZE); | ||
| 1776 | |||
| 1777 | memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); | ||
| 1778 | dev->pending_io = bh; | ||
| 1779 | |||
| 1780 | get_bh(bh); | ||
| 1781 | set_buffer_uptodate(bh); | ||
| 1782 | lock_buffer(bh); | ||
| 1783 | bh->b_end_io = btrfs_end_buffer_write_sync; | ||
| 1784 | |||
| 1785 | if (do_barriers && dev->barriers) { | ||
| 1786 | ret = submit_bh(WRITE_BARRIER, bh); | ||
| 1787 | if (ret == -EOPNOTSUPP) { | ||
| 1788 | printk("btrfs: disabling barriers on dev %s\n", | ||
| 1789 | dev->name); | ||
| 1790 | set_buffer_uptodate(bh); | ||
| 1791 | dev->barriers = 0; | ||
| 1792 | get_bh(bh); | ||
| 1793 | lock_buffer(bh); | ||
| 1794 | ret = submit_bh(WRITE, bh); | ||
| 1795 | } | ||
| 1796 | } else { | ||
| 1797 | ret = submit_bh(WRITE, bh); | ||
| 1798 | } | ||
| 1799 | if (ret) | ||
| 1800 | total_errors++; | ||
| 1801 | } | ||
| 1802 | if (total_errors > max_errors) { | ||
| 1803 | printk("btrfs: %d errors while writing supers\n", total_errors); | ||
| 1804 | BUG(); | ||
| 1805 | } | ||
| 1806 | total_errors = 0; | ||
| 1807 | |||
| 1808 | list_for_each(cur, head) { | ||
| 1809 | dev = list_entry(cur, struct btrfs_device, dev_list); | ||
| 1810 | if (!dev->bdev) | ||
| 1811 | continue; | ||
| 1812 | if (!dev->in_fs_metadata) | ||
| 1813 | continue; | ||
| 1814 | |||
| 1815 | BUG_ON(!dev->pending_io); | ||
| 1816 | bh = dev->pending_io; | ||
| 1817 | wait_on_buffer(bh); | ||
| 1818 | if (!buffer_uptodate(dev->pending_io)) { | ||
| 1819 | if (do_barriers && dev->barriers) { | ||
| 1820 | printk("btrfs: disabling barriers on dev %s\n", | ||
| 1821 | dev->name); | ||
| 1822 | set_buffer_uptodate(bh); | ||
| 1823 | get_bh(bh); | ||
| 1824 | lock_buffer(bh); | ||
| 1825 | dev->barriers = 0; | ||
| 1826 | ret = submit_bh(WRITE, bh); | ||
| 1827 | BUG_ON(ret); | ||
| 1828 | wait_on_buffer(bh); | ||
| 1829 | if (!buffer_uptodate(bh)) | ||
| 1830 | total_errors++; | ||
| 1831 | } else { | ||
| 1832 | total_errors++; | ||
| 1833 | } | ||
| 1834 | |||
| 1835 | } | ||
| 1836 | dev->pending_io = NULL; | ||
| 1837 | brelse(bh); | ||
| 1838 | } | ||
| 1839 | if (total_errors > max_errors) { | ||
| 1840 | printk("btrfs: %d errors while writing supers\n", total_errors); | ||
| 1841 | BUG(); | ||
| 1842 | } | ||
| 1843 | return 0; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 1847 | *root) | ||
| 1848 | { | ||
| 1849 | int ret; | ||
| 1850 | |||
| 1851 | ret = write_all_supers(root); | ||
| 1852 | return ret; | ||
| 1853 | } | ||
| 1854 | |||
| 1855 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | ||
| 1856 | { | ||
| 1857 | radix_tree_delete(&fs_info->fs_roots_radix, | ||
| 1858 | (unsigned long)root->root_key.objectid); | ||
| 1859 | if (root->in_sysfs) | ||
| 1860 | btrfs_sysfs_del_root(root); | ||
| 1861 | if (root->inode) | ||
| 1862 | iput(root->inode); | ||
| 1863 | if (root->node) | ||
| 1864 | free_extent_buffer(root->node); | ||
| 1865 | if (root->commit_root) | ||
| 1866 | free_extent_buffer(root->commit_root); | ||
| 1867 | if (root->name) | ||
| 1868 | kfree(root->name); | ||
| 1869 | kfree(root); | ||
| 1870 | return 0; | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | static int del_fs_roots(struct btrfs_fs_info *fs_info) | ||
| 1874 | { | ||
| 1875 | int ret; | ||
| 1876 | struct btrfs_root *gang[8]; | ||
| 1877 | int i; | ||
| 1878 | |||
| 1879 | while(1) { | ||
| 1880 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | ||
| 1881 | (void **)gang, 0, | ||
| 1882 | ARRAY_SIZE(gang)); | ||
| 1883 | if (!ret) | ||
| 1884 | break; | ||
| 1885 | for (i = 0; i < ret; i++) | ||
| 1886 | btrfs_free_fs_root(fs_info, gang[i]); | ||
| 1887 | } | ||
| 1888 | return 0; | ||
| 1889 | } | ||
| 1890 | |||
| 1891 | int close_ctree(struct btrfs_root *root) | ||
| 1892 | { | ||
| 1893 | int ret; | ||
| 1894 | struct btrfs_trans_handle *trans; | ||
| 1895 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 1896 | |||
| 1897 | fs_info->closing = 1; | ||
| 1898 | smp_mb(); | ||
| 1899 | |||
| 1900 | kthread_stop(root->fs_info->transaction_kthread); | ||
| 1901 | kthread_stop(root->fs_info->cleaner_kthread); | ||
| 1902 | |||
| 1903 | btrfs_clean_old_snapshots(root); | ||
| 1904 | trans = btrfs_start_transaction(root, 1); | ||
| 1905 | ret = btrfs_commit_transaction(trans, root); | ||
| 1906 | /* run commit again to drop the original snapshot */ | ||
| 1907 | trans = btrfs_start_transaction(root, 1); | ||
| 1908 | btrfs_commit_transaction(trans, root); | ||
| 1909 | ret = btrfs_write_and_wait_transaction(NULL, root); | ||
| 1910 | BUG_ON(ret); | ||
| 1911 | |||
| 1912 | write_ctree_super(NULL, root); | ||
| 1913 | |||
| 1914 | if (fs_info->delalloc_bytes) { | ||
| 1915 | printk("btrfs: at unmount delalloc count %Lu\n", | ||
| 1916 | fs_info->delalloc_bytes); | ||
| 1917 | } | ||
| 1918 | if (fs_info->total_ref_cache_size) { | ||
| 1919 | printk("btrfs: at umount reference cache size %Lu\n", | ||
| 1920 | fs_info->total_ref_cache_size); | ||
| 1921 | } | ||
| 1922 | |||
| 1923 | if (fs_info->extent_root->node) | ||
| 1924 | free_extent_buffer(fs_info->extent_root->node); | ||
| 1925 | |||
| 1926 | if (fs_info->tree_root->node) | ||
| 1927 | free_extent_buffer(fs_info->tree_root->node); | ||
| 1928 | |||
| 1929 | if (root->fs_info->chunk_root->node); | ||
| 1930 | free_extent_buffer(root->fs_info->chunk_root->node); | ||
| 1931 | |||
| 1932 | if (root->fs_info->dev_root->node); | ||
| 1933 | free_extent_buffer(root->fs_info->dev_root->node); | ||
| 1934 | |||
| 1935 | btrfs_free_block_groups(root->fs_info); | ||
| 1936 | fs_info->closing = 2; | ||
| 1937 | del_fs_roots(fs_info); | ||
| 1938 | |||
| 1939 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); | ||
| 1940 | |||
| 1941 | truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); | ||
| 1942 | |||
| 1943 | btrfs_stop_workers(&fs_info->fixup_workers); | ||
| 1944 | btrfs_stop_workers(&fs_info->workers); | ||
| 1945 | btrfs_stop_workers(&fs_info->endio_workers); | ||
| 1946 | btrfs_stop_workers(&fs_info->endio_write_workers); | ||
| 1947 | btrfs_stop_workers(&fs_info->submit_workers); | ||
| 1948 | |||
| 1949 | iput(fs_info->btree_inode); | ||
| 1950 | #if 0 | ||
| 1951 | while(!list_empty(&fs_info->hashers)) { | ||
| 1952 | struct btrfs_hasher *hasher; | ||
| 1953 | hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, | ||
| 1954 | hashers); | ||
| 1955 | list_del(&hasher->hashers); | ||
| 1956 | crypto_free_hash(&fs_info->hash_tfm); | ||
| 1957 | kfree(hasher); | ||
| 1958 | } | ||
| 1959 | #endif | ||
| 1960 | btrfs_close_devices(fs_info->fs_devices); | ||
| 1961 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
| 1962 | |||
| 1963 | bdi_destroy(&fs_info->bdi); | ||
| 1964 | |||
| 1965 | kfree(fs_info->extent_root); | ||
| 1966 | kfree(fs_info->tree_root); | ||
| 1967 | kfree(fs_info->chunk_root); | ||
| 1968 | kfree(fs_info->dev_root); | ||
| 1969 | return 0; | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | ||
| 1973 | { | ||
| 1974 | int ret; | ||
| 1975 | struct inode *btree_inode = buf->first_page->mapping->host; | ||
| 1976 | |||
| 1977 | ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); | ||
| 1978 | if (!ret) | ||
| 1979 | return ret; | ||
| 1980 | |||
| 1981 | ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, | ||
| 1982 | parent_transid); | ||
| 1983 | return !ret; | ||
| 1984 | } | ||
| 1985 | |||
| 1986 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf) | ||
| 1987 | { | ||
| 1988 | struct inode *btree_inode = buf->first_page->mapping->host; | ||
| 1989 | return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, | ||
| 1990 | buf); | ||
| 1991 | } | ||
| 1992 | |||
| 1993 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | ||
| 1994 | { | ||
| 1995 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | ||
| 1996 | u64 transid = btrfs_header_generation(buf); | ||
| 1997 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 1998 | |||
| 1999 | WARN_ON(!btrfs_tree_locked(buf)); | ||
| 2000 | if (transid != root->fs_info->generation) { | ||
| 2001 | printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", | ||
| 2002 | (unsigned long long)buf->start, | ||
| 2003 | transid, root->fs_info->generation); | ||
| 2004 | WARN_ON(1); | ||
| 2005 | } | ||
| 2006 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | ||
| 2010 | { | ||
| 2011 | /* | ||
| 2012 | * looks as though older kernels can get into trouble with | ||
| 2013 | * this code, they end up stuck in balance_dirty_pages forever | ||
| 2014 | */ | ||
| 2015 | struct extent_io_tree *tree; | ||
| 2016 | u64 num_dirty; | ||
| 2017 | u64 start = 0; | ||
| 2018 | unsigned long thresh = 96 * 1024 * 1024; | ||
| 2019 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | ||
| 2020 | |||
| 2021 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) | ||
| 2022 | return; | ||
| 2023 | |||
| 2024 | num_dirty = count_range_bits(tree, &start, (u64)-1, | ||
| 2025 | thresh, EXTENT_DIRTY); | ||
| 2026 | if (num_dirty > thresh) { | ||
| 2027 | balance_dirty_pages_ratelimited_nr( | ||
| 2028 | root->fs_info->btree_inode->i_mapping, 1); | ||
| 2029 | } | ||
| 2030 | return; | ||
| 2031 | } | ||
| 2032 | |||
| 2033 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | ||
| 2034 | { | ||
| 2035 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | ||
| 2036 | int ret; | ||
| 2037 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | ||
| 2038 | if (ret == 0) { | ||
| 2039 | buf->flags |= EXTENT_UPTODATE; | ||
| 2040 | } | ||
| 2041 | return ret; | ||
| 2042 | } | ||
| 2043 | |||
| 2044 | int btree_lock_page_hook(struct page *page) | ||
| 2045 | { | ||
| 2046 | struct inode *inode = page->mapping->host; | ||
| 2047 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2048 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 2049 | struct extent_buffer *eb; | ||
| 2050 | unsigned long len; | ||
| 2051 | u64 bytenr = page_offset(page); | ||
| 2052 | |||
| 2053 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
| 2054 | goto out; | ||
| 2055 | |||
| 2056 | len = page->private >> 2; | ||
| 2057 | eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); | ||
| 2058 | if (!eb) | ||
| 2059 | goto out; | ||
| 2060 | |||
| 2061 | btrfs_tree_lock(eb); | ||
| 2062 | spin_lock(&root->fs_info->hash_lock); | ||
| 2063 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | ||
| 2064 | spin_unlock(&root->fs_info->hash_lock); | ||
| 2065 | btrfs_tree_unlock(eb); | ||
| 2066 | free_extent_buffer(eb); | ||
| 2067 | out: | ||
| 2068 | lock_page(page); | ||
| 2069 | return 0; | ||
| 2070 | } | ||
| 2071 | |||
| 2072 | static struct extent_io_ops btree_extent_io_ops = { | ||
| 2073 | .write_cache_pages_lock_hook = btree_lock_page_hook, | ||
| 2074 | .readpage_end_io_hook = btree_readpage_end_io_hook, | ||
| 2075 | .submit_bio_hook = btree_submit_bio_hook, | ||
| 2076 | /* note we're sharing with inode.c for the merge bio hook */ | ||
| 2077 | .merge_bio_hook = btrfs_merge_bio_hook, | ||
| 2078 | }; | ||
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h new file mode 100644 index 00000000000..f84f5058dbb --- /dev/null +++ b/fs/btrfs/disk-io.h | |||
| @@ -0,0 +1,84 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __DISKIO__ | ||
| 20 | #define __DISKIO__ | ||
| 21 | |||
| 22 | #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) | ||
| 23 | #define BTRFS_SUPER_INFO_SIZE 4096 | ||
| 24 | struct btrfs_device; | ||
| 25 | struct btrfs_fs_devices; | ||
| 26 | |||
| 27 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | ||
| 28 | u32 blocksize, u64 parent_transid); | ||
| 29 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | ||
| 30 | u64 parent_transid); | ||
| 31 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | ||
| 32 | u64 bytenr, u32 blocksize); | ||
| 33 | int clean_tree_block(struct btrfs_trans_handle *trans, | ||
| 34 | struct btrfs_root *root, struct extent_buffer *buf); | ||
| 35 | struct btrfs_root *open_ctree(struct super_block *sb, | ||
| 36 | struct btrfs_fs_devices *fs_devices, | ||
| 37 | char *options); | ||
| 38 | int close_ctree(struct btrfs_root *root); | ||
| 39 | int write_ctree_super(struct btrfs_trans_handle *trans, | ||
| 40 | struct btrfs_root *root); | ||
| 41 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | ||
| 42 | u64 bytenr, u32 blocksize); | ||
| 43 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
| 44 | u64 root_objectid); | ||
| 45 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | ||
| 46 | struct btrfs_key *location, | ||
| 47 | const char *name, int namelen); | ||
| 48 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | ||
| 49 | struct btrfs_key *location); | ||
| 50 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | ||
| 51 | struct btrfs_key *location); | ||
| 52 | int btrfs_insert_dev_radix(struct btrfs_root *root, | ||
| 53 | struct block_device *bdev, | ||
| 54 | u64 device_id, | ||
| 55 | u64 block_start, | ||
| 56 | u64 num_blocks); | ||
| 57 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | ||
| 58 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | ||
| 59 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | ||
| 60 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); | ||
| 61 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); | ||
| 62 | int wait_on_tree_block_writeback(struct btrfs_root *root, | ||
| 63 | struct extent_buffer *buf); | ||
| 64 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); | ||
| 65 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); | ||
| 66 | void btrfs_csum_final(u32 crc, char *result); | ||
| 67 | int btrfs_open_device(struct btrfs_device *dev); | ||
| 68 | int btrfs_verify_block_csum(struct btrfs_root *root, | ||
| 69 | struct extent_buffer *buf); | ||
| 70 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | ||
| 71 | int metadata); | ||
| 72 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | ||
| 73 | int rw, struct bio *bio, int mirror_num, | ||
| 74 | extent_submit_bio_hook_t *submit_bio_hook); | ||
| 75 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | ||
| 76 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | ||
| 77 | int btrfs_write_tree_block(struct extent_buffer *buf); | ||
| 78 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | ||
| 79 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 80 | struct btrfs_fs_info *fs_info); | ||
| 81 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 82 | struct btrfs_fs_info *fs_info); | ||
| 83 | int btree_lock_page_hook(struct page *page); | ||
| 84 | #endif | ||
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c new file mode 100644 index 00000000000..48b82cd7583 --- /dev/null +++ b/fs/btrfs/export.c | |||
| @@ -0,0 +1,201 @@ | |||
| 1 | #include <linux/fs.h> | ||
| 2 | #include <linux/types.h> | ||
| 3 | #include "ctree.h" | ||
| 4 | #include "disk-io.h" | ||
| 5 | #include "btrfs_inode.h" | ||
| 6 | #include "print-tree.h" | ||
| 7 | #include "export.h" | ||
| 8 | #include "compat.h" | ||
| 9 | |||
| 10 | #define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) | ||
| 11 | #define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, parent_root_objectid)/4) | ||
| 12 | #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4) | ||
| 13 | |||
| 14 | static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | ||
| 15 | int connectable) | ||
| 16 | { | ||
| 17 | struct btrfs_fid *fid = (struct btrfs_fid *)fh; | ||
| 18 | struct inode *inode = dentry->d_inode; | ||
| 19 | int len = *max_len; | ||
| 20 | int type; | ||
| 21 | |||
| 22 | if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || | ||
| 23 | (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) | ||
| 24 | return 255; | ||
| 25 | |||
| 26 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; | ||
| 27 | type = FILEID_BTRFS_WITHOUT_PARENT; | ||
| 28 | |||
| 29 | fid->objectid = BTRFS_I(inode)->location.objectid; | ||
| 30 | fid->root_objectid = BTRFS_I(inode)->root->objectid; | ||
| 31 | fid->gen = inode->i_generation; | ||
| 32 | |||
| 33 | if (connectable && !S_ISDIR(inode->i_mode)) { | ||
| 34 | struct inode *parent; | ||
| 35 | u64 parent_root_id; | ||
| 36 | |||
| 37 | spin_lock(&dentry->d_lock); | ||
| 38 | |||
| 39 | parent = dentry->d_parent->d_inode; | ||
| 40 | fid->parent_objectid = BTRFS_I(parent)->location.objectid; | ||
| 41 | fid->parent_gen = parent->i_generation; | ||
| 42 | parent_root_id = BTRFS_I(parent)->root->objectid; | ||
| 43 | |||
| 44 | spin_unlock(&dentry->d_lock); | ||
| 45 | |||
| 46 | if (parent_root_id != fid->root_objectid) { | ||
| 47 | fid->parent_root_objectid = parent_root_id; | ||
| 48 | len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; | ||
| 49 | type = FILEID_BTRFS_WITH_PARENT_ROOT; | ||
| 50 | } else { | ||
| 51 | len = BTRFS_FID_SIZE_CONNECTABLE; | ||
| 52 | type = FILEID_BTRFS_WITH_PARENT; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | *max_len = len; | ||
| 57 | return type; | ||
| 58 | } | ||
| 59 | |||
| 60 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | ||
| 61 | u64 root_objectid, u32 generation) | ||
| 62 | { | ||
| 63 | struct btrfs_root *root; | ||
| 64 | struct inode *inode; | ||
| 65 | struct btrfs_key key; | ||
| 66 | |||
| 67 | key.objectid = root_objectid; | ||
| 68 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 69 | key.offset = (u64)-1; | ||
| 70 | |||
| 71 | root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); | ||
| 72 | if (IS_ERR(root)) | ||
| 73 | return ERR_CAST(root); | ||
| 74 | |||
| 75 | key.objectid = objectid; | ||
| 76 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 77 | key.offset = 0; | ||
| 78 | |||
| 79 | inode = btrfs_iget(sb, &key, root, NULL); | ||
| 80 | if (IS_ERR(inode)) | ||
| 81 | return (void *)inode; | ||
| 82 | |||
| 83 | if (generation != inode->i_generation) { | ||
| 84 | iput(inode); | ||
| 85 | return ERR_PTR(-ESTALE); | ||
| 86 | } | ||
| 87 | |||
| 88 | return d_obtain_alias(inode); | ||
| 89 | } | ||
| 90 | |||
| 91 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | ||
| 92 | int fh_len, int fh_type) | ||
| 93 | { | ||
| 94 | struct btrfs_fid *fid = (struct btrfs_fid *) fh; | ||
| 95 | u64 objectid, root_objectid; | ||
| 96 | u32 generation; | ||
| 97 | |||
| 98 | if (fh_type == FILEID_BTRFS_WITH_PARENT) { | ||
| 99 | if (fh_len != BTRFS_FID_SIZE_CONNECTABLE) | ||
| 100 | return NULL; | ||
| 101 | root_objectid = fid->root_objectid; | ||
| 102 | } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) { | ||
| 103 | if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) | ||
| 104 | return NULL; | ||
| 105 | root_objectid = fid->parent_root_objectid; | ||
| 106 | } else | ||
| 107 | return NULL; | ||
| 108 | |||
| 109 | objectid = fid->parent_objectid; | ||
| 110 | generation = fid->parent_gen; | ||
| 111 | |||
| 112 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | ||
| 113 | } | ||
| 114 | |||
| 115 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | ||
| 116 | int fh_len, int fh_type) | ||
| 117 | { | ||
| 118 | struct btrfs_fid *fid = (struct btrfs_fid *) fh; | ||
| 119 | u64 objectid, root_objectid; | ||
| 120 | u32 generation; | ||
| 121 | |||
| 122 | if ((fh_type != FILEID_BTRFS_WITH_PARENT || | ||
| 123 | fh_len != BTRFS_FID_SIZE_CONNECTABLE) && | ||
| 124 | (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT || | ||
| 125 | fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) && | ||
| 126 | (fh_type != FILEID_BTRFS_WITHOUT_PARENT || | ||
| 127 | fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE)) | ||
| 128 | return NULL; | ||
| 129 | |||
| 130 | objectid = fid->objectid; | ||
| 131 | root_objectid = fid->root_objectid; | ||
| 132 | generation = fid->gen; | ||
| 133 | |||
| 134 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | ||
| 135 | } | ||
| 136 | |||
| 137 | static struct dentry *btrfs_get_parent(struct dentry *child) | ||
| 138 | { | ||
| 139 | struct inode *dir = child->d_inode; | ||
| 140 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 141 | struct btrfs_key key; | ||
| 142 | struct btrfs_path *path; | ||
| 143 | struct extent_buffer *leaf; | ||
| 144 | int slot; | ||
| 145 | u64 objectid; | ||
| 146 | int ret; | ||
| 147 | |||
| 148 | path = btrfs_alloc_path(); | ||
| 149 | |||
| 150 | key.objectid = dir->i_ino; | ||
| 151 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | ||
| 152 | key.offset = (u64)-1; | ||
| 153 | |||
| 154 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 155 | if (ret < 0) { | ||
| 156 | /* Error */ | ||
| 157 | btrfs_free_path(path); | ||
| 158 | return ERR_PTR(ret); | ||
| 159 | } | ||
| 160 | leaf = path->nodes[0]; | ||
| 161 | slot = path->slots[0]; | ||
| 162 | if (ret) { | ||
| 163 | /* btrfs_search_slot() returns the slot where we'd want to | ||
| 164 | insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. | ||
| 165 | The _real_ backref, telling us what the parent inode | ||
| 166 | _actually_ is, will be in the slot _before_ the one | ||
| 167 | that btrfs_search_slot() returns. */ | ||
| 168 | if (!slot) { | ||
| 169 | /* Unless there is _no_ key in the tree before... */ | ||
| 170 | btrfs_free_path(path); | ||
| 171 | return ERR_PTR(-EIO); | ||
| 172 | } | ||
| 173 | slot--; | ||
| 174 | } | ||
| 175 | |||
| 176 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 177 | btrfs_free_path(path); | ||
| 178 | |||
| 179 | if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) | ||
| 180 | return ERR_PTR(-EINVAL); | ||
| 181 | |||
| 182 | objectid = key.offset; | ||
| 183 | |||
| 184 | /* If we are already at the root of a subvol, return the real root */ | ||
| 185 | if (objectid == dir->i_ino) | ||
| 186 | return dget(dir->i_sb->s_root); | ||
| 187 | |||
| 188 | /* Build a new key for the inode item */ | ||
| 189 | key.objectid = objectid; | ||
| 190 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 191 | key.offset = 0; | ||
| 192 | |||
| 193 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); | ||
| 194 | } | ||
| 195 | |||
| 196 | const struct export_operations btrfs_export_ops = { | ||
| 197 | .encode_fh = btrfs_encode_fh, | ||
| 198 | .fh_to_dentry = btrfs_fh_to_dentry, | ||
| 199 | .fh_to_parent = btrfs_fh_to_parent, | ||
| 200 | .get_parent = btrfs_get_parent, | ||
| 201 | }; | ||
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h new file mode 100644 index 00000000000..074348a9584 --- /dev/null +++ b/fs/btrfs/export.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | #ifndef BTRFS_EXPORT_H | ||
| 2 | #define BTRFS_EXPORT_H | ||
| 3 | |||
| 4 | #include <linux/exportfs.h> | ||
| 5 | |||
| 6 | extern const struct export_operations btrfs_export_ops; | ||
| 7 | |||
| 8 | struct btrfs_fid { | ||
| 9 | u64 objectid; | ||
| 10 | u64 root_objectid; | ||
| 11 | u32 gen; | ||
| 12 | |||
| 13 | u64 parent_objectid; | ||
| 14 | u32 parent_gen; | ||
| 15 | |||
| 16 | u64 parent_root_objectid; | ||
| 17 | } __attribute__ ((packed)); | ||
| 18 | |||
| 19 | #endif | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c new file mode 100644 index 00000000000..280ac1aa9b6 --- /dev/null +++ b/fs/btrfs/extent-tree.c | |||
| @@ -0,0 +1,5253 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | #include <linux/sched.h> | ||
| 19 | #include <linux/pagemap.h> | ||
| 20 | #include <linux/writeback.h> | ||
| 21 | #include <linux/blkdev.h> | ||
| 22 | #include "hash.h" | ||
| 23 | #include "crc32c.h" | ||
| 24 | #include "ctree.h" | ||
| 25 | #include "disk-io.h" | ||
| 26 | #include "print-tree.h" | ||
| 27 | #include "transaction.h" | ||
| 28 | #include "volumes.h" | ||
| 29 | #include "locking.h" | ||
| 30 | #include "ref-cache.h" | ||
| 31 | |||
| 32 | #define PENDING_EXTENT_INSERT 0 | ||
| 33 | #define PENDING_EXTENT_DELETE 1 | ||
| 34 | #define PENDING_BACKREF_UPDATE 2 | ||
| 35 | |||
| 36 | struct pending_extent_op { | ||
| 37 | int type; | ||
| 38 | u64 bytenr; | ||
| 39 | u64 num_bytes; | ||
| 40 | u64 parent; | ||
| 41 | u64 orig_parent; | ||
| 42 | u64 generation; | ||
| 43 | u64 orig_generation; | ||
| 44 | int level; | ||
| 45 | }; | ||
| 46 | |||
| 47 | static int finish_current_insert(struct btrfs_trans_handle *trans, struct | ||
| 48 | btrfs_root *extent_root); | ||
| 49 | static int del_pending_extents(struct btrfs_trans_handle *trans, struct | ||
| 50 | btrfs_root *extent_root); | ||
| 51 | static struct btrfs_block_group_cache * | ||
| 52 | __btrfs_find_block_group(struct btrfs_root *root, | ||
| 53 | struct btrfs_block_group_cache *hint, | ||
| 54 | u64 search_start, int data, int owner); | ||
| 55 | |||
| 56 | void maybe_lock_mutex(struct btrfs_root *root) | ||
| 57 | { | ||
| 58 | if (root != root->fs_info->extent_root && | ||
| 59 | root != root->fs_info->chunk_root && | ||
| 60 | root != root->fs_info->dev_root) { | ||
| 61 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | void maybe_unlock_mutex(struct btrfs_root *root) | ||
| 66 | { | ||
| 67 | if (root != root->fs_info->extent_root && | ||
| 68 | root != root->fs_info->chunk_root && | ||
| 69 | root != root->fs_info->dev_root) { | ||
| 70 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | ||
| 75 | { | ||
| 76 | return (cache->flags & bits) == bits; | ||
| 77 | } | ||
| 78 | |||
| 79 | /* | ||
| 80 | * this adds the block group to the fs_info rb tree for the block group | ||
| 81 | * cache | ||
| 82 | */ | ||
| 83 | int btrfs_add_block_group_cache(struct btrfs_fs_info *info, | ||
| 84 | struct btrfs_block_group_cache *block_group) | ||
| 85 | { | ||
| 86 | struct rb_node **p; | ||
| 87 | struct rb_node *parent = NULL; | ||
| 88 | struct btrfs_block_group_cache *cache; | ||
| 89 | |||
| 90 | spin_lock(&info->block_group_cache_lock); | ||
| 91 | p = &info->block_group_cache_tree.rb_node; | ||
| 92 | |||
| 93 | while (*p) { | ||
| 94 | parent = *p; | ||
| 95 | cache = rb_entry(parent, struct btrfs_block_group_cache, | ||
| 96 | cache_node); | ||
| 97 | if (block_group->key.objectid < cache->key.objectid) { | ||
| 98 | p = &(*p)->rb_left; | ||
| 99 | } else if (block_group->key.objectid > cache->key.objectid) { | ||
| 100 | p = &(*p)->rb_right; | ||
| 101 | } else { | ||
| 102 | spin_unlock(&info->block_group_cache_lock); | ||
| 103 | return -EEXIST; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | |||
| 107 | rb_link_node(&block_group->cache_node, parent, p); | ||
| 108 | rb_insert_color(&block_group->cache_node, | ||
| 109 | &info->block_group_cache_tree); | ||
| 110 | spin_unlock(&info->block_group_cache_lock); | ||
| 111 | |||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | /* | ||
| 116 | * This will return the block group at or after bytenr if contains is 0, else | ||
| 117 | * it will return the block group that contains the bytenr | ||
| 118 | */ | ||
| 119 | static struct btrfs_block_group_cache * | ||
| 120 | block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | ||
| 121 | int contains) | ||
| 122 | { | ||
| 123 | struct btrfs_block_group_cache *cache, *ret = NULL; | ||
| 124 | struct rb_node *n; | ||
| 125 | u64 end, start; | ||
| 126 | |||
| 127 | spin_lock(&info->block_group_cache_lock); | ||
| 128 | n = info->block_group_cache_tree.rb_node; | ||
| 129 | |||
| 130 | while (n) { | ||
| 131 | cache = rb_entry(n, struct btrfs_block_group_cache, | ||
| 132 | cache_node); | ||
| 133 | end = cache->key.objectid + cache->key.offset - 1; | ||
| 134 | start = cache->key.objectid; | ||
| 135 | |||
| 136 | if (bytenr < start) { | ||
| 137 | if (!contains && (!ret || start < ret->key.objectid)) | ||
| 138 | ret = cache; | ||
| 139 | n = n->rb_left; | ||
| 140 | } else if (bytenr > start) { | ||
| 141 | if (contains && bytenr <= end) { | ||
| 142 | ret = cache; | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | n = n->rb_right; | ||
| 146 | } else { | ||
| 147 | ret = cache; | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | spin_unlock(&info->block_group_cache_lock); | ||
| 152 | |||
| 153 | return ret; | ||
| 154 | } | ||
| 155 | |||
| 156 | /* | ||
| 157 | * this is only called by cache_block_group, since we could have freed extents | ||
| 158 | * we need to check the pinned_extents for any extents that can't be used yet | ||
| 159 | * since their free space will be released as soon as the transaction commits. | ||
| 160 | */ | ||
| 161 | static int add_new_free_space(struct btrfs_block_group_cache *block_group, | ||
| 162 | struct btrfs_fs_info *info, u64 start, u64 end) | ||
| 163 | { | ||
| 164 | u64 extent_start, extent_end, size; | ||
| 165 | int ret; | ||
| 166 | |||
| 167 | while (start < end) { | ||
| 168 | ret = find_first_extent_bit(&info->pinned_extents, start, | ||
| 169 | &extent_start, &extent_end, | ||
| 170 | EXTENT_DIRTY); | ||
| 171 | if (ret) | ||
| 172 | break; | ||
| 173 | |||
| 174 | if (extent_start == start) { | ||
| 175 | start = extent_end + 1; | ||
| 176 | } else if (extent_start > start && extent_start < end) { | ||
| 177 | size = extent_start - start; | ||
| 178 | ret = btrfs_add_free_space(block_group, start, size); | ||
| 179 | BUG_ON(ret); | ||
| 180 | start = extent_end + 1; | ||
| 181 | } else { | ||
| 182 | break; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | if (start < end) { | ||
| 187 | size = end - start; | ||
| 188 | ret = btrfs_add_free_space(block_group, start, size); | ||
| 189 | BUG_ON(ret); | ||
| 190 | } | ||
| 191 | |||
| 192 | return 0; | ||
| 193 | } | ||
| 194 | |||
| 195 | static int cache_block_group(struct btrfs_root *root, | ||
| 196 | struct btrfs_block_group_cache *block_group) | ||
| 197 | { | ||
| 198 | struct btrfs_path *path; | ||
| 199 | int ret = 0; | ||
| 200 | struct btrfs_key key; | ||
| 201 | struct extent_buffer *leaf; | ||
| 202 | int slot; | ||
| 203 | u64 last = 0; | ||
| 204 | u64 first_free; | ||
| 205 | int found = 0; | ||
| 206 | |||
| 207 | if (!block_group) | ||
| 208 | return 0; | ||
| 209 | |||
| 210 | root = root->fs_info->extent_root; | ||
| 211 | |||
| 212 | if (block_group->cached) | ||
| 213 | return 0; | ||
| 214 | |||
| 215 | path = btrfs_alloc_path(); | ||
| 216 | if (!path) | ||
| 217 | return -ENOMEM; | ||
| 218 | |||
| 219 | path->reada = 2; | ||
| 220 | /* | ||
| 221 | * we get into deadlocks with paths held by callers of this function. | ||
| 222 | * since the alloc_mutex is protecting things right now, just | ||
| 223 | * skip the locking here | ||
| 224 | */ | ||
| 225 | path->skip_locking = 1; | ||
| 226 | first_free = max_t(u64, block_group->key.objectid, | ||
| 227 | BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); | ||
| 228 | key.objectid = block_group->key.objectid; | ||
| 229 | key.offset = 0; | ||
| 230 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | ||
| 231 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 232 | if (ret < 0) | ||
| 233 | goto err; | ||
| 234 | ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); | ||
| 235 | if (ret < 0) | ||
| 236 | goto err; | ||
| 237 | if (ret == 0) { | ||
| 238 | leaf = path->nodes[0]; | ||
| 239 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 240 | if (key.objectid + key.offset > first_free) | ||
| 241 | first_free = key.objectid + key.offset; | ||
| 242 | } | ||
| 243 | while(1) { | ||
| 244 | leaf = path->nodes[0]; | ||
| 245 | slot = path->slots[0]; | ||
| 246 | if (slot >= btrfs_header_nritems(leaf)) { | ||
| 247 | ret = btrfs_next_leaf(root, path); | ||
| 248 | if (ret < 0) | ||
| 249 | goto err; | ||
| 250 | if (ret == 0) | ||
| 251 | continue; | ||
| 252 | else | ||
| 253 | break; | ||
| 254 | } | ||
| 255 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 256 | if (key.objectid < block_group->key.objectid) | ||
| 257 | goto next; | ||
| 258 | |||
| 259 | if (key.objectid >= block_group->key.objectid + | ||
| 260 | block_group->key.offset) | ||
| 261 | break; | ||
| 262 | |||
| 263 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | ||
| 264 | if (!found) { | ||
| 265 | last = first_free; | ||
| 266 | found = 1; | ||
| 267 | } | ||
| 268 | |||
| 269 | add_new_free_space(block_group, root->fs_info, last, | ||
| 270 | key.objectid); | ||
| 271 | |||
| 272 | last = key.objectid + key.offset; | ||
| 273 | } | ||
| 274 | next: | ||
| 275 | path->slots[0]++; | ||
| 276 | } | ||
| 277 | |||
| 278 | if (!found) | ||
| 279 | last = first_free; | ||
| 280 | |||
| 281 | add_new_free_space(block_group, root->fs_info, last, | ||
| 282 | block_group->key.objectid + | ||
| 283 | block_group->key.offset); | ||
| 284 | |||
| 285 | block_group->cached = 1; | ||
| 286 | ret = 0; | ||
| 287 | err: | ||
| 288 | btrfs_free_path(path); | ||
| 289 | return ret; | ||
| 290 | } | ||
| 291 | |||
| 292 | /* | ||
| 293 | * return the block group that starts at or after bytenr | ||
| 294 | */ | ||
| 295 | struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct | ||
| 296 | btrfs_fs_info *info, | ||
| 297 | u64 bytenr) | ||
| 298 | { | ||
| 299 | struct btrfs_block_group_cache *cache; | ||
| 300 | |||
| 301 | cache = block_group_cache_tree_search(info, bytenr, 0); | ||
| 302 | |||
| 303 | return cache; | ||
| 304 | } | ||
| 305 | |||
| 306 | /* | ||
| 307 | * return the block group that contains teh given bytenr | ||
| 308 | */ | ||
| 309 | struct btrfs_block_group_cache *btrfs_lookup_block_group(struct | ||
| 310 | btrfs_fs_info *info, | ||
| 311 | u64 bytenr) | ||
| 312 | { | ||
| 313 | struct btrfs_block_group_cache *cache; | ||
| 314 | |||
| 315 | cache = block_group_cache_tree_search(info, bytenr, 1); | ||
| 316 | |||
| 317 | return cache; | ||
| 318 | } | ||
| 319 | |||
| 320 | static int noinline find_free_space(struct btrfs_root *root, | ||
| 321 | struct btrfs_block_group_cache **cache_ret, | ||
| 322 | u64 *start_ret, u64 num, int data) | ||
| 323 | { | ||
| 324 | int ret; | ||
| 325 | struct btrfs_block_group_cache *cache = *cache_ret; | ||
| 326 | struct btrfs_free_space *info = NULL; | ||
| 327 | u64 last; | ||
| 328 | u64 search_start = *start_ret; | ||
| 329 | |||
| 330 | WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 331 | if (!cache) | ||
| 332 | goto out; | ||
| 333 | |||
| 334 | last = max(search_start, cache->key.objectid); | ||
| 335 | |||
| 336 | again: | ||
| 337 | ret = cache_block_group(root, cache); | ||
| 338 | if (ret) | ||
| 339 | goto out; | ||
| 340 | |||
| 341 | if (cache->ro || !block_group_bits(cache, data)) | ||
| 342 | goto new_group; | ||
| 343 | |||
| 344 | info = btrfs_find_free_space(cache, last, num); | ||
| 345 | if (info) { | ||
| 346 | *start_ret = info->offset; | ||
| 347 | return 0; | ||
| 348 | } | ||
| 349 | |||
| 350 | new_group: | ||
| 351 | last = cache->key.objectid + cache->key.offset; | ||
| 352 | |||
| 353 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
| 354 | if (!cache) | ||
| 355 | goto out; | ||
| 356 | |||
| 357 | *cache_ret = cache; | ||
| 358 | goto again; | ||
| 359 | |||
| 360 | out: | ||
| 361 | return -ENOSPC; | ||
| 362 | } | ||
| 363 | |||
| 364 | static u64 div_factor(u64 num, int factor) | ||
| 365 | { | ||
| 366 | if (factor == 10) | ||
| 367 | return num; | ||
| 368 | num *= factor; | ||
| 369 | do_div(num, 10); | ||
| 370 | return num; | ||
| 371 | } | ||
| 372 | |||
| 373 | static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | ||
| 374 | u64 flags) | ||
| 375 | { | ||
| 376 | struct list_head *head = &info->space_info; | ||
| 377 | struct list_head *cur; | ||
| 378 | struct btrfs_space_info *found; | ||
| 379 | list_for_each(cur, head) { | ||
| 380 | found = list_entry(cur, struct btrfs_space_info, list); | ||
| 381 | if (found->flags == flags) | ||
| 382 | return found; | ||
| 383 | } | ||
| 384 | return NULL; | ||
| 385 | } | ||
| 386 | |||
| 387 | static struct btrfs_block_group_cache * | ||
| 388 | __btrfs_find_block_group(struct btrfs_root *root, | ||
| 389 | struct btrfs_block_group_cache *hint, | ||
| 390 | u64 search_start, int data, int owner) | ||
| 391 | { | ||
| 392 | struct btrfs_block_group_cache *cache; | ||
| 393 | struct btrfs_block_group_cache *found_group = NULL; | ||
| 394 | struct btrfs_fs_info *info = root->fs_info; | ||
| 395 | u64 used; | ||
| 396 | u64 last = 0; | ||
| 397 | u64 free_check; | ||
| 398 | int full_search = 0; | ||
| 399 | int factor = 10; | ||
| 400 | int wrapped = 0; | ||
| 401 | |||
| 402 | if (data & BTRFS_BLOCK_GROUP_METADATA) | ||
| 403 | factor = 9; | ||
| 404 | |||
| 405 | if (search_start) { | ||
| 406 | struct btrfs_block_group_cache *shint; | ||
| 407 | shint = btrfs_lookup_first_block_group(info, search_start); | ||
| 408 | if (shint && block_group_bits(shint, data) && !shint->ro) { | ||
| 409 | spin_lock(&shint->lock); | ||
| 410 | used = btrfs_block_group_used(&shint->item); | ||
| 411 | if (used + shint->pinned + shint->reserved < | ||
| 412 | div_factor(shint->key.offset, factor)) { | ||
| 413 | spin_unlock(&shint->lock); | ||
| 414 | return shint; | ||
| 415 | } | ||
| 416 | spin_unlock(&shint->lock); | ||
| 417 | } | ||
| 418 | } | ||
| 419 | if (hint && !hint->ro && block_group_bits(hint, data)) { | ||
| 420 | spin_lock(&hint->lock); | ||
| 421 | used = btrfs_block_group_used(&hint->item); | ||
| 422 | if (used + hint->pinned + hint->reserved < | ||
| 423 | div_factor(hint->key.offset, factor)) { | ||
| 424 | spin_unlock(&hint->lock); | ||
| 425 | return hint; | ||
| 426 | } | ||
| 427 | spin_unlock(&hint->lock); | ||
| 428 | last = hint->key.objectid + hint->key.offset; | ||
| 429 | } else { | ||
| 430 | if (hint) | ||
| 431 | last = max(hint->key.objectid, search_start); | ||
| 432 | else | ||
| 433 | last = search_start; | ||
| 434 | } | ||
| 435 | again: | ||
| 436 | while (1) { | ||
| 437 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
| 438 | if (!cache) | ||
| 439 | break; | ||
| 440 | |||
| 441 | spin_lock(&cache->lock); | ||
| 442 | last = cache->key.objectid + cache->key.offset; | ||
| 443 | used = btrfs_block_group_used(&cache->item); | ||
| 444 | |||
| 445 | if (!cache->ro && block_group_bits(cache, data)) { | ||
| 446 | free_check = div_factor(cache->key.offset, factor); | ||
| 447 | if (used + cache->pinned + cache->reserved < | ||
| 448 | free_check) { | ||
| 449 | found_group = cache; | ||
| 450 | spin_unlock(&cache->lock); | ||
| 451 | goto found; | ||
| 452 | } | ||
| 453 | } | ||
| 454 | spin_unlock(&cache->lock); | ||
| 455 | cond_resched(); | ||
| 456 | } | ||
| 457 | if (!wrapped) { | ||
| 458 | last = search_start; | ||
| 459 | wrapped = 1; | ||
| 460 | goto again; | ||
| 461 | } | ||
| 462 | if (!full_search && factor < 10) { | ||
| 463 | last = search_start; | ||
| 464 | full_search = 1; | ||
| 465 | factor = 10; | ||
| 466 | goto again; | ||
| 467 | } | ||
| 468 | found: | ||
| 469 | return found_group; | ||
| 470 | } | ||
| 471 | |||
| 472 | struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, | ||
| 473 | struct btrfs_block_group_cache | ||
| 474 | *hint, u64 search_start, | ||
| 475 | int data, int owner) | ||
| 476 | { | ||
| 477 | |||
| 478 | struct btrfs_block_group_cache *ret; | ||
| 479 | ret = __btrfs_find_block_group(root, hint, search_start, data, owner); | ||
| 480 | return ret; | ||
| 481 | } | ||
| 482 | |||
| 483 | /* simple helper to search for an existing extent at a given offset */ | ||
| 484 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | ||
| 485 | { | ||
| 486 | int ret; | ||
| 487 | struct btrfs_key key; | ||
| 488 | struct btrfs_path *path; | ||
| 489 | |||
| 490 | path = btrfs_alloc_path(); | ||
| 491 | BUG_ON(!path); | ||
| 492 | maybe_lock_mutex(root); | ||
| 493 | key.objectid = start; | ||
| 494 | key.offset = len; | ||
| 495 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | ||
| 496 | ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, | ||
| 497 | 0, 0); | ||
| 498 | maybe_unlock_mutex(root); | ||
| 499 | btrfs_free_path(path); | ||
| 500 | return ret; | ||
| 501 | } | ||
| 502 | |||
| 503 | /* | ||
| 504 | * Back reference rules. Back refs have three main goals: | ||
| 505 | * | ||
| 506 | * 1) differentiate between all holders of references to an extent so that | ||
| 507 | * when a reference is dropped we can make sure it was a valid reference | ||
| 508 | * before freeing the extent. | ||
| 509 | * | ||
| 510 | * 2) Provide enough information to quickly find the holders of an extent | ||
| 511 | * if we notice a given block is corrupted or bad. | ||
| 512 | * | ||
| 513 | * 3) Make it easy to migrate blocks for FS shrinking or storage pool | ||
| 514 | * maintenance. This is actually the same as #2, but with a slightly | ||
| 515 | * different use case. | ||
| 516 | * | ||
| 517 | * File extents can be referenced by: | ||
| 518 | * | ||
| 519 | * - multiple snapshots, subvolumes, or different generations in one subvol | ||
| 520 | * - different files inside a single subvolume | ||
| 521 | * - different offsets inside a file (bookend extents in file.c) | ||
| 522 | * | ||
| 523 | * The extent ref structure has fields for: | ||
| 524 | * | ||
| 525 | * - Objectid of the subvolume root | ||
| 526 | * - Generation number of the tree holding the reference | ||
| 527 | * - objectid of the file holding the reference | ||
| 528 | * - number of references holding by parent node (alway 1 for tree blocks) | ||
| 529 | * | ||
| 530 | * Btree leaf may hold multiple references to a file extent. In most cases, | ||
| 531 | * these references are from same file and the corresponding offsets inside | ||
| 532 | * the file are close together. | ||
| 533 | * | ||
| 534 | * When a file extent is allocated the fields are filled in: | ||
| 535 | * (root_key.objectid, trans->transid, inode objectid, 1) | ||
| 536 | * | ||
| 537 | * When a leaf is cow'd new references are added for every file extent found | ||
| 538 | * in the leaf. It looks similar to the create case, but trans->transid will | ||
| 539 | * be different when the block is cow'd. | ||
| 540 | * | ||
| 541 | * (root_key.objectid, trans->transid, inode objectid, | ||
| 542 | * number of references in the leaf) | ||
| 543 | * | ||
| 544 | * When a file extent is removed either during snapshot deletion or | ||
| 545 | * file truncation, we find the corresponding back reference and check | ||
| 546 | * the following fields: | ||
| 547 | * | ||
| 548 | * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), | ||
| 549 | * inode objectid) | ||
| 550 | * | ||
| 551 | * Btree extents can be referenced by: | ||
| 552 | * | ||
| 553 | * - Different subvolumes | ||
| 554 | * - Different generations of the same subvolume | ||
| 555 | * | ||
| 556 | * When a tree block is created, back references are inserted: | ||
| 557 | * | ||
| 558 | * (root->root_key.objectid, trans->transid, level, 1) | ||
| 559 | * | ||
| 560 | * When a tree block is cow'd, new back references are added for all the | ||
| 561 | * blocks it points to. If the tree block isn't in reference counted root, | ||
| 562 | * the old back references are removed. These new back references are of | ||
| 563 | * the form (trans->transid will have increased since creation): | ||
| 564 | * | ||
| 565 | * (root->root_key.objectid, trans->transid, level, 1) | ||
| 566 | * | ||
| 567 | * When a backref is in deleting, the following fields are checked: | ||
| 568 | * | ||
| 569 | * if backref was for a tree root: | ||
| 570 | * (btrfs_header_owner(itself), btrfs_header_generation(itself), level) | ||
| 571 | * else | ||
| 572 | * (btrfs_header_owner(parent), btrfs_header_generation(parent), level) | ||
| 573 | * | ||
| 574 | * Back Reference Key composing: | ||
| 575 | * | ||
| 576 | * The key objectid corresponds to the first byte in the extent, the key | ||
| 577 | * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first | ||
| 578 | * byte of parent extent. If a extent is tree root, the key offset is set | ||
| 579 | * to the key objectid. | ||
| 580 | */ | ||
| 581 | |||
| 582 | static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, | ||
| 583 | struct btrfs_root *root, | ||
| 584 | struct btrfs_path *path, | ||
| 585 | u64 bytenr, u64 parent, | ||
| 586 | u64 ref_root, u64 ref_generation, | ||
| 587 | u64 owner_objectid, int del) | ||
| 588 | { | ||
| 589 | struct btrfs_key key; | ||
| 590 | struct btrfs_extent_ref *ref; | ||
| 591 | struct extent_buffer *leaf; | ||
| 592 | u64 ref_objectid; | ||
| 593 | int ret; | ||
| 594 | |||
| 595 | key.objectid = bytenr; | ||
| 596 | key.type = BTRFS_EXTENT_REF_KEY; | ||
| 597 | key.offset = parent; | ||
| 598 | |||
| 599 | ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); | ||
| 600 | if (ret < 0) | ||
| 601 | goto out; | ||
| 602 | if (ret > 0) { | ||
| 603 | ret = -ENOENT; | ||
| 604 | goto out; | ||
| 605 | } | ||
| 606 | |||
| 607 | leaf = path->nodes[0]; | ||
| 608 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | ||
| 609 | ref_objectid = btrfs_ref_objectid(leaf, ref); | ||
| 610 | if (btrfs_ref_root(leaf, ref) != ref_root || | ||
| 611 | btrfs_ref_generation(leaf, ref) != ref_generation || | ||
| 612 | (ref_objectid != owner_objectid && | ||
| 613 | ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { | ||
| 614 | ret = -EIO; | ||
| 615 | WARN_ON(1); | ||
| 616 | goto out; | ||
| 617 | } | ||
| 618 | ret = 0; | ||
| 619 | out: | ||
| 620 | return ret; | ||
| 621 | } | ||
| 622 | |||
| 623 | static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, | ||
| 624 | struct btrfs_root *root, | ||
| 625 | struct btrfs_path *path, | ||
| 626 | u64 bytenr, u64 parent, | ||
| 627 | u64 ref_root, u64 ref_generation, | ||
| 628 | u64 owner_objectid) | ||
| 629 | { | ||
| 630 | struct btrfs_key key; | ||
| 631 | struct extent_buffer *leaf; | ||
| 632 | struct btrfs_extent_ref *ref; | ||
| 633 | u32 num_refs; | ||
| 634 | int ret; | ||
| 635 | |||
| 636 | key.objectid = bytenr; | ||
| 637 | key.type = BTRFS_EXTENT_REF_KEY; | ||
| 638 | key.offset = parent; | ||
| 639 | |||
| 640 | ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); | ||
| 641 | if (ret == 0) { | ||
| 642 | leaf = path->nodes[0]; | ||
| 643 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
| 644 | struct btrfs_extent_ref); | ||
| 645 | btrfs_set_ref_root(leaf, ref, ref_root); | ||
| 646 | btrfs_set_ref_generation(leaf, ref, ref_generation); | ||
| 647 | btrfs_set_ref_objectid(leaf, ref, owner_objectid); | ||
| 648 | btrfs_set_ref_num_refs(leaf, ref, 1); | ||
| 649 | } else if (ret == -EEXIST) { | ||
| 650 | u64 existing_owner; | ||
| 651 | BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); | ||
| 652 | leaf = path->nodes[0]; | ||
| 653 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
| 654 | struct btrfs_extent_ref); | ||
| 655 | if (btrfs_ref_root(leaf, ref) != ref_root || | ||
| 656 | btrfs_ref_generation(leaf, ref) != ref_generation) { | ||
| 657 | ret = -EIO; | ||
| 658 | WARN_ON(1); | ||
| 659 | goto out; | ||
| 660 | } | ||
| 661 | |||
| 662 | num_refs = btrfs_ref_num_refs(leaf, ref); | ||
| 663 | BUG_ON(num_refs == 0); | ||
| 664 | btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); | ||
| 665 | |||
| 666 | existing_owner = btrfs_ref_objectid(leaf, ref); | ||
| 667 | if (existing_owner != owner_objectid && | ||
| 668 | existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { | ||
| 669 | btrfs_set_ref_objectid(leaf, ref, | ||
| 670 | BTRFS_MULTIPLE_OBJECTIDS); | ||
| 671 | } | ||
| 672 | ret = 0; | ||
| 673 | } else { | ||
| 674 | goto out; | ||
| 675 | } | ||
| 676 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 677 | out: | ||
| 678 | btrfs_release_path(root, path); | ||
| 679 | return ret; | ||
| 680 | } | ||
| 681 | |||
| 682 | static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, | ||
| 683 | struct btrfs_root *root, | ||
| 684 | struct btrfs_path *path) | ||
| 685 | { | ||
| 686 | struct extent_buffer *leaf; | ||
| 687 | struct btrfs_extent_ref *ref; | ||
| 688 | u32 num_refs; | ||
| 689 | int ret = 0; | ||
| 690 | |||
| 691 | leaf = path->nodes[0]; | ||
| 692 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | ||
| 693 | num_refs = btrfs_ref_num_refs(leaf, ref); | ||
| 694 | BUG_ON(num_refs == 0); | ||
| 695 | num_refs -= 1; | ||
| 696 | if (num_refs == 0) { | ||
| 697 | ret = btrfs_del_item(trans, root, path); | ||
| 698 | } else { | ||
| 699 | btrfs_set_ref_num_refs(leaf, ref, num_refs); | ||
| 700 | btrfs_mark_buffer_dirty(leaf); | ||
| 701 | } | ||
| 702 | btrfs_release_path(root, path); | ||
| 703 | return ret; | ||
| 704 | } | ||
| 705 | |||
| 706 | static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | ||
| 707 | struct btrfs_root *root, u64 bytenr, | ||
| 708 | u64 orig_parent, u64 parent, | ||
| 709 | u64 orig_root, u64 ref_root, | ||
| 710 | u64 orig_generation, u64 ref_generation, | ||
| 711 | u64 owner_objectid) | ||
| 712 | { | ||
| 713 | int ret; | ||
| 714 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
| 715 | struct btrfs_path *path; | ||
| 716 | |||
| 717 | if (root == root->fs_info->extent_root) { | ||
| 718 | struct pending_extent_op *extent_op; | ||
| 719 | u64 num_bytes; | ||
| 720 | |||
| 721 | BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); | ||
| 722 | num_bytes = btrfs_level_size(root, (int)owner_objectid); | ||
| 723 | if (test_range_bit(&root->fs_info->extent_ins, bytenr, | ||
| 724 | bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) { | ||
| 725 | u64 priv; | ||
| 726 | ret = get_state_private(&root->fs_info->extent_ins, | ||
| 727 | bytenr, &priv); | ||
| 728 | BUG_ON(ret); | ||
| 729 | extent_op = (struct pending_extent_op *) | ||
| 730 | (unsigned long)priv; | ||
| 731 | BUG_ON(extent_op->parent != orig_parent); | ||
| 732 | BUG_ON(extent_op->generation != orig_generation); | ||
| 733 | extent_op->parent = parent; | ||
| 734 | extent_op->generation = ref_generation; | ||
| 735 | } else { | ||
| 736 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
| 737 | BUG_ON(!extent_op); | ||
| 738 | |||
| 739 | extent_op->type = PENDING_BACKREF_UPDATE; | ||
| 740 | extent_op->bytenr = bytenr; | ||
| 741 | extent_op->num_bytes = num_bytes; | ||
| 742 | extent_op->parent = parent; | ||
| 743 | extent_op->orig_parent = orig_parent; | ||
| 744 | extent_op->generation = ref_generation; | ||
| 745 | extent_op->orig_generation = orig_generation; | ||
| 746 | extent_op->level = (int)owner_objectid; | ||
| 747 | |||
| 748 | set_extent_bits(&root->fs_info->extent_ins, | ||
| 749 | bytenr, bytenr + num_bytes - 1, | ||
| 750 | EXTENT_LOCKED, GFP_NOFS); | ||
| 751 | set_state_private(&root->fs_info->extent_ins, | ||
| 752 | bytenr, (unsigned long)extent_op); | ||
| 753 | } | ||
| 754 | return 0; | ||
| 755 | } | ||
| 756 | |||
| 757 | path = btrfs_alloc_path(); | ||
| 758 | if (!path) | ||
| 759 | return -ENOMEM; | ||
| 760 | ret = lookup_extent_backref(trans, extent_root, path, | ||
| 761 | bytenr, orig_parent, orig_root, | ||
| 762 | orig_generation, owner_objectid, 1); | ||
| 763 | if (ret) | ||
| 764 | goto out; | ||
| 765 | ret = remove_extent_backref(trans, extent_root, path); | ||
| 766 | if (ret) | ||
| 767 | goto out; | ||
| 768 | ret = insert_extent_backref(trans, extent_root, path, bytenr, | ||
| 769 | parent, ref_root, ref_generation, | ||
| 770 | owner_objectid); | ||
| 771 | BUG_ON(ret); | ||
| 772 | finish_current_insert(trans, extent_root); | ||
| 773 | del_pending_extents(trans, extent_root); | ||
| 774 | out: | ||
| 775 | btrfs_free_path(path); | ||
| 776 | return ret; | ||
| 777 | } | ||
| 778 | |||
| 779 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | ||
| 780 | struct btrfs_root *root, u64 bytenr, | ||
| 781 | u64 orig_parent, u64 parent, | ||
| 782 | u64 ref_root, u64 ref_generation, | ||
| 783 | u64 owner_objectid) | ||
| 784 | { | ||
| 785 | int ret; | ||
| 786 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | ||
| 787 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
| 788 | return 0; | ||
| 789 | maybe_lock_mutex(root); | ||
| 790 | ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, | ||
| 791 | parent, ref_root, ref_root, | ||
| 792 | ref_generation, ref_generation, | ||
| 793 | owner_objectid); | ||
| 794 | maybe_unlock_mutex(root); | ||
| 795 | return ret; | ||
| 796 | } | ||
| 797 | |||
| 798 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | ||
| 799 | struct btrfs_root *root, u64 bytenr, | ||
| 800 | u64 orig_parent, u64 parent, | ||
| 801 | u64 orig_root, u64 ref_root, | ||
| 802 | u64 orig_generation, u64 ref_generation, | ||
| 803 | u64 owner_objectid) | ||
| 804 | { | ||
| 805 | struct btrfs_path *path; | ||
| 806 | int ret; | ||
| 807 | struct btrfs_key key; | ||
| 808 | struct extent_buffer *l; | ||
| 809 | struct btrfs_extent_item *item; | ||
| 810 | u32 refs; | ||
| 811 | |||
| 812 | path = btrfs_alloc_path(); | ||
| 813 | if (!path) | ||
| 814 | return -ENOMEM; | ||
| 815 | |||
| 816 | path->reada = 1; | ||
| 817 | key.objectid = bytenr; | ||
| 818 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 819 | key.offset = (u64)-1; | ||
| 820 | |||
| 821 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, | ||
| 822 | 0, 1); | ||
| 823 | if (ret < 0) | ||
| 824 | return ret; | ||
| 825 | BUG_ON(ret == 0 || path->slots[0] == 0); | ||
| 826 | |||
| 827 | path->slots[0]--; | ||
| 828 | l = path->nodes[0]; | ||
| 829 | |||
| 830 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
| 831 | BUG_ON(key.objectid != bytenr); | ||
| 832 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); | ||
| 833 | |||
| 834 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | ||
| 835 | refs = btrfs_extent_refs(l, item); | ||
| 836 | btrfs_set_extent_refs(l, item, refs + 1); | ||
| 837 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 838 | |||
| 839 | btrfs_release_path(root->fs_info->extent_root, path); | ||
| 840 | |||
| 841 | path->reada = 1; | ||
| 842 | ret = insert_extent_backref(trans, root->fs_info->extent_root, | ||
| 843 | path, bytenr, parent, | ||
| 844 | ref_root, ref_generation, | ||
| 845 | owner_objectid); | ||
| 846 | BUG_ON(ret); | ||
| 847 | finish_current_insert(trans, root->fs_info->extent_root); | ||
| 848 | del_pending_extents(trans, root->fs_info->extent_root); | ||
| 849 | |||
| 850 | btrfs_free_path(path); | ||
| 851 | return 0; | ||
| 852 | } | ||
| 853 | |||
| 854 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | ||
| 855 | struct btrfs_root *root, | ||
| 856 | u64 bytenr, u64 num_bytes, u64 parent, | ||
| 857 | u64 ref_root, u64 ref_generation, | ||
| 858 | u64 owner_objectid) | ||
| 859 | { | ||
| 860 | int ret; | ||
| 861 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | ||
| 862 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
| 863 | return 0; | ||
| 864 | maybe_lock_mutex(root); | ||
| 865 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, | ||
| 866 | 0, ref_root, 0, ref_generation, | ||
| 867 | owner_objectid); | ||
| 868 | maybe_unlock_mutex(root); | ||
| 869 | return ret; | ||
| 870 | } | ||
| 871 | |||
| 872 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | ||
| 873 | struct btrfs_root *root) | ||
| 874 | { | ||
| 875 | finish_current_insert(trans, root->fs_info->extent_root); | ||
| 876 | del_pending_extents(trans, root->fs_info->extent_root); | ||
| 877 | return 0; | ||
| 878 | } | ||
| 879 | |||
| 880 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
| 881 | struct btrfs_root *root, u64 bytenr, | ||
| 882 | u64 num_bytes, u32 *refs) | ||
| 883 | { | ||
| 884 | struct btrfs_path *path; | ||
| 885 | int ret; | ||
| 886 | struct btrfs_key key; | ||
| 887 | struct extent_buffer *l; | ||
| 888 | struct btrfs_extent_item *item; | ||
| 889 | |||
| 890 | WARN_ON(num_bytes < root->sectorsize); | ||
| 891 | path = btrfs_alloc_path(); | ||
| 892 | path->reada = 1; | ||
| 893 | key.objectid = bytenr; | ||
| 894 | key.offset = num_bytes; | ||
| 895 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | ||
| 896 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, | ||
| 897 | 0, 0); | ||
| 898 | if (ret < 0) | ||
| 899 | goto out; | ||
| 900 | if (ret != 0) { | ||
| 901 | btrfs_print_leaf(root, path->nodes[0]); | ||
| 902 | printk("failed to find block number %Lu\n", bytenr); | ||
| 903 | BUG(); | ||
| 904 | } | ||
| 905 | l = path->nodes[0]; | ||
| 906 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | ||
| 907 | *refs = btrfs_extent_refs(l, item); | ||
| 908 | out: | ||
| 909 | btrfs_free_path(path); | ||
| 910 | return 0; | ||
| 911 | } | ||
| 912 | |||
| 913 | static int get_reference_status(struct btrfs_root *root, u64 bytenr, | ||
| 914 | u64 parent_gen, u64 ref_objectid, | ||
| 915 | u64 *min_generation, u32 *ref_count) | ||
| 916 | { | ||
| 917 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
| 918 | struct btrfs_path *path; | ||
| 919 | struct extent_buffer *leaf; | ||
| 920 | struct btrfs_extent_ref *ref_item; | ||
| 921 | struct btrfs_key key; | ||
| 922 | struct btrfs_key found_key; | ||
| 923 | u64 root_objectid = root->root_key.objectid; | ||
| 924 | u64 ref_generation; | ||
| 925 | u32 nritems; | ||
| 926 | int ret; | ||
| 927 | |||
| 928 | key.objectid = bytenr; | ||
| 929 | key.offset = (u64)-1; | ||
| 930 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 931 | |||
| 932 | path = btrfs_alloc_path(); | ||
| 933 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 934 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); | ||
| 935 | if (ret < 0) | ||
| 936 | goto out; | ||
| 937 | BUG_ON(ret == 0); | ||
| 938 | if (ret < 0 || path->slots[0] == 0) | ||
| 939 | goto out; | ||
| 940 | |||
| 941 | path->slots[0]--; | ||
| 942 | leaf = path->nodes[0]; | ||
| 943 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 944 | |||
| 945 | if (found_key.objectid != bytenr || | ||
| 946 | found_key.type != BTRFS_EXTENT_ITEM_KEY) { | ||
| 947 | ret = 1; | ||
| 948 | goto out; | ||
| 949 | } | ||
| 950 | |||
| 951 | *ref_count = 0; | ||
| 952 | *min_generation = (u64)-1; | ||
| 953 | |||
| 954 | while (1) { | ||
| 955 | leaf = path->nodes[0]; | ||
| 956 | nritems = btrfs_header_nritems(leaf); | ||
| 957 | if (path->slots[0] >= nritems) { | ||
| 958 | ret = btrfs_next_leaf(extent_root, path); | ||
| 959 | if (ret < 0) | ||
| 960 | goto out; | ||
| 961 | if (ret == 0) | ||
| 962 | continue; | ||
| 963 | break; | ||
| 964 | } | ||
| 965 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 966 | if (found_key.objectid != bytenr) | ||
| 967 | break; | ||
| 968 | |||
| 969 | if (found_key.type != BTRFS_EXTENT_REF_KEY) { | ||
| 970 | path->slots[0]++; | ||
| 971 | continue; | ||
| 972 | } | ||
| 973 | |||
| 974 | ref_item = btrfs_item_ptr(leaf, path->slots[0], | ||
| 975 | struct btrfs_extent_ref); | ||
| 976 | ref_generation = btrfs_ref_generation(leaf, ref_item); | ||
| 977 | /* | ||
| 978 | * For (parent_gen > 0 && parent_gen > ref_generation): | ||
| 979 | * | ||
| 980 | * we reach here through the oldest root, therefore | ||
| 981 | * all other reference from same snapshot should have | ||
| 982 | * a larger generation. | ||
| 983 | */ | ||
| 984 | if ((root_objectid != btrfs_ref_root(leaf, ref_item)) || | ||
| 985 | (parent_gen > 0 && parent_gen > ref_generation) || | ||
| 986 | (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID && | ||
| 987 | ref_objectid != btrfs_ref_objectid(leaf, ref_item))) { | ||
| 988 | *ref_count = 2; | ||
| 989 | break; | ||
| 990 | } | ||
| 991 | |||
| 992 | *ref_count = 1; | ||
| 993 | if (*min_generation > ref_generation) | ||
| 994 | *min_generation = ref_generation; | ||
| 995 | |||
| 996 | path->slots[0]++; | ||
| 997 | } | ||
| 998 | ret = 0; | ||
| 999 | out: | ||
| 1000 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 1001 | btrfs_free_path(path); | ||
| 1002 | return ret; | ||
| 1003 | } | ||
| 1004 | |||
| 1005 | int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, | ||
| 1006 | struct btrfs_root *root, | ||
| 1007 | struct btrfs_key *key, u64 bytenr) | ||
| 1008 | { | ||
| 1009 | struct btrfs_root *old_root; | ||
| 1010 | struct btrfs_path *path = NULL; | ||
| 1011 | struct extent_buffer *eb; | ||
| 1012 | struct btrfs_file_extent_item *item; | ||
| 1013 | u64 ref_generation; | ||
| 1014 | u64 min_generation; | ||
| 1015 | u64 extent_start; | ||
| 1016 | u32 ref_count; | ||
| 1017 | int level; | ||
| 1018 | int ret; | ||
| 1019 | |||
| 1020 | BUG_ON(trans == NULL); | ||
| 1021 | BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY); | ||
| 1022 | ret = get_reference_status(root, bytenr, 0, key->objectid, | ||
| 1023 | &min_generation, &ref_count); | ||
| 1024 | if (ret) | ||
| 1025 | return ret; | ||
| 1026 | |||
| 1027 | if (ref_count != 1) | ||
| 1028 | return 1; | ||
| 1029 | |||
| 1030 | old_root = root->dirty_root->root; | ||
| 1031 | ref_generation = old_root->root_key.offset; | ||
| 1032 | |||
| 1033 | /* all references are created in running transaction */ | ||
| 1034 | if (min_generation > ref_generation) { | ||
| 1035 | ret = 0; | ||
| 1036 | goto out; | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | path = btrfs_alloc_path(); | ||
| 1040 | if (!path) { | ||
| 1041 | ret = -ENOMEM; | ||
| 1042 | goto out; | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | path->skip_locking = 1; | ||
| 1046 | /* if no item found, the extent is referenced by other snapshot */ | ||
| 1047 | ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0); | ||
| 1048 | if (ret) | ||
| 1049 | goto out; | ||
| 1050 | |||
| 1051 | eb = path->nodes[0]; | ||
| 1052 | item = btrfs_item_ptr(eb, path->slots[0], | ||
| 1053 | struct btrfs_file_extent_item); | ||
| 1054 | if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG || | ||
| 1055 | btrfs_file_extent_disk_bytenr(eb, item) != bytenr) { | ||
| 1056 | ret = 1; | ||
| 1057 | goto out; | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) { | ||
| 1061 | if (level >= 0) { | ||
| 1062 | eb = path->nodes[level]; | ||
| 1063 | if (!eb) | ||
| 1064 | continue; | ||
| 1065 | extent_start = eb->start; | ||
| 1066 | } else | ||
| 1067 | extent_start = bytenr; | ||
| 1068 | |||
| 1069 | ret = get_reference_status(root, extent_start, ref_generation, | ||
| 1070 | 0, &min_generation, &ref_count); | ||
| 1071 | if (ret) | ||
| 1072 | goto out; | ||
| 1073 | |||
| 1074 | if (ref_count != 1) { | ||
| 1075 | ret = 1; | ||
| 1076 | goto out; | ||
| 1077 | } | ||
| 1078 | if (level >= 0) | ||
| 1079 | ref_generation = btrfs_header_generation(eb); | ||
| 1080 | } | ||
| 1081 | ret = 0; | ||
| 1082 | out: | ||
| 1083 | if (path) | ||
| 1084 | btrfs_free_path(path); | ||
| 1085 | return ret; | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 1089 | struct extent_buffer *buf, u32 nr_extents) | ||
| 1090 | { | ||
| 1091 | struct btrfs_key key; | ||
| 1092 | struct btrfs_file_extent_item *fi; | ||
| 1093 | u64 root_gen; | ||
| 1094 | u32 nritems; | ||
| 1095 | int i; | ||
| 1096 | int level; | ||
| 1097 | int ret = 0; | ||
| 1098 | int shared = 0; | ||
| 1099 | |||
| 1100 | if (!root->ref_cows) | ||
| 1101 | return 0; | ||
| 1102 | |||
| 1103 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | ||
| 1104 | shared = 0; | ||
| 1105 | root_gen = root->root_key.offset; | ||
| 1106 | } else { | ||
| 1107 | shared = 1; | ||
| 1108 | root_gen = trans->transid - 1; | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | level = btrfs_header_level(buf); | ||
| 1112 | nritems = btrfs_header_nritems(buf); | ||
| 1113 | |||
| 1114 | if (level == 0) { | ||
| 1115 | struct btrfs_leaf_ref *ref; | ||
| 1116 | struct btrfs_extent_info *info; | ||
| 1117 | |||
| 1118 | ref = btrfs_alloc_leaf_ref(root, nr_extents); | ||
| 1119 | if (!ref) { | ||
| 1120 | ret = -ENOMEM; | ||
| 1121 | goto out; | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | ref->root_gen = root_gen; | ||
| 1125 | ref->bytenr = buf->start; | ||
| 1126 | ref->owner = btrfs_header_owner(buf); | ||
| 1127 | ref->generation = btrfs_header_generation(buf); | ||
| 1128 | ref->nritems = nr_extents; | ||
| 1129 | info = ref->extents; | ||
| 1130 | |||
| 1131 | for (i = 0; nr_extents > 0 && i < nritems; i++) { | ||
| 1132 | u64 disk_bytenr; | ||
| 1133 | btrfs_item_key_to_cpu(buf, &key, i); | ||
| 1134 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
| 1135 | continue; | ||
| 1136 | fi = btrfs_item_ptr(buf, i, | ||
| 1137 | struct btrfs_file_extent_item); | ||
| 1138 | if (btrfs_file_extent_type(buf, fi) == | ||
| 1139 | BTRFS_FILE_EXTENT_INLINE) | ||
| 1140 | continue; | ||
| 1141 | disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
| 1142 | if (disk_bytenr == 0) | ||
| 1143 | continue; | ||
| 1144 | |||
| 1145 | info->bytenr = disk_bytenr; | ||
| 1146 | info->num_bytes = | ||
| 1147 | btrfs_file_extent_disk_num_bytes(buf, fi); | ||
| 1148 | info->objectid = key.objectid; | ||
| 1149 | info->offset = key.offset; | ||
| 1150 | info++; | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | ret = btrfs_add_leaf_ref(root, ref, shared); | ||
| 1154 | if (ret == -EEXIST && shared) { | ||
| 1155 | struct btrfs_leaf_ref *old; | ||
| 1156 | old = btrfs_lookup_leaf_ref(root, ref->bytenr); | ||
| 1157 | BUG_ON(!old); | ||
| 1158 | btrfs_remove_leaf_ref(root, old); | ||
| 1159 | btrfs_free_leaf_ref(root, old); | ||
| 1160 | ret = btrfs_add_leaf_ref(root, ref, shared); | ||
| 1161 | } | ||
| 1162 | WARN_ON(ret); | ||
| 1163 | btrfs_free_leaf_ref(root, ref); | ||
| 1164 | } | ||
| 1165 | out: | ||
| 1166 | return ret; | ||
| 1167 | } | ||
| 1168 | |||
| 1169 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 1170 | struct extent_buffer *orig_buf, struct extent_buffer *buf, | ||
| 1171 | u32 *nr_extents) | ||
| 1172 | { | ||
| 1173 | u64 bytenr; | ||
| 1174 | u64 ref_root; | ||
| 1175 | u64 orig_root; | ||
| 1176 | u64 ref_generation; | ||
| 1177 | u64 orig_generation; | ||
| 1178 | u32 nritems; | ||
| 1179 | u32 nr_file_extents = 0; | ||
| 1180 | struct btrfs_key key; | ||
| 1181 | struct btrfs_file_extent_item *fi; | ||
| 1182 | int i; | ||
| 1183 | int level; | ||
| 1184 | int ret = 0; | ||
| 1185 | int faili = 0; | ||
| 1186 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | ||
| 1187 | u64, u64, u64, u64, u64, u64, u64, u64); | ||
| 1188 | |||
| 1189 | ref_root = btrfs_header_owner(buf); | ||
| 1190 | ref_generation = btrfs_header_generation(buf); | ||
| 1191 | orig_root = btrfs_header_owner(orig_buf); | ||
| 1192 | orig_generation = btrfs_header_generation(orig_buf); | ||
| 1193 | |||
| 1194 | nritems = btrfs_header_nritems(buf); | ||
| 1195 | level = btrfs_header_level(buf); | ||
| 1196 | |||
| 1197 | if (root->ref_cows) { | ||
| 1198 | process_func = __btrfs_inc_extent_ref; | ||
| 1199 | } else { | ||
| 1200 | if (level == 0 && | ||
| 1201 | root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) | ||
| 1202 | goto out; | ||
| 1203 | if (level != 0 && | ||
| 1204 | root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) | ||
| 1205 | goto out; | ||
| 1206 | process_func = __btrfs_update_extent_ref; | ||
| 1207 | } | ||
| 1208 | |||
| 1209 | for (i = 0; i < nritems; i++) { | ||
| 1210 | cond_resched(); | ||
| 1211 | if (level == 0) { | ||
| 1212 | btrfs_item_key_to_cpu(buf, &key, i); | ||
| 1213 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
| 1214 | continue; | ||
| 1215 | fi = btrfs_item_ptr(buf, i, | ||
| 1216 | struct btrfs_file_extent_item); | ||
| 1217 | if (btrfs_file_extent_type(buf, fi) == | ||
| 1218 | BTRFS_FILE_EXTENT_INLINE) | ||
| 1219 | continue; | ||
| 1220 | bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
| 1221 | if (bytenr == 0) | ||
| 1222 | continue; | ||
| 1223 | |||
| 1224 | nr_file_extents++; | ||
| 1225 | |||
| 1226 | maybe_lock_mutex(root); | ||
| 1227 | ret = process_func(trans, root, bytenr, | ||
| 1228 | orig_buf->start, buf->start, | ||
| 1229 | orig_root, ref_root, | ||
| 1230 | orig_generation, ref_generation, | ||
| 1231 | key.objectid); | ||
| 1232 | maybe_unlock_mutex(root); | ||
| 1233 | |||
| 1234 | if (ret) { | ||
| 1235 | faili = i; | ||
| 1236 | WARN_ON(1); | ||
| 1237 | goto fail; | ||
| 1238 | } | ||
| 1239 | } else { | ||
| 1240 | bytenr = btrfs_node_blockptr(buf, i); | ||
| 1241 | maybe_lock_mutex(root); | ||
| 1242 | ret = process_func(trans, root, bytenr, | ||
| 1243 | orig_buf->start, buf->start, | ||
| 1244 | orig_root, ref_root, | ||
| 1245 | orig_generation, ref_generation, | ||
| 1246 | level - 1); | ||
| 1247 | maybe_unlock_mutex(root); | ||
| 1248 | if (ret) { | ||
| 1249 | faili = i; | ||
| 1250 | WARN_ON(1); | ||
| 1251 | goto fail; | ||
| 1252 | } | ||
| 1253 | } | ||
| 1254 | } | ||
| 1255 | out: | ||
| 1256 | if (nr_extents) { | ||
| 1257 | if (level == 0) | ||
| 1258 | *nr_extents = nr_file_extents; | ||
| 1259 | else | ||
| 1260 | *nr_extents = nritems; | ||
| 1261 | } | ||
| 1262 | return 0; | ||
| 1263 | fail: | ||
| 1264 | WARN_ON(1); | ||
| 1265 | return ret; | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | int btrfs_update_ref(struct btrfs_trans_handle *trans, | ||
| 1269 | struct btrfs_root *root, struct extent_buffer *orig_buf, | ||
| 1270 | struct extent_buffer *buf, int start_slot, int nr) | ||
| 1271 | |||
| 1272 | { | ||
| 1273 | u64 bytenr; | ||
| 1274 | u64 ref_root; | ||
| 1275 | u64 orig_root; | ||
| 1276 | u64 ref_generation; | ||
| 1277 | u64 orig_generation; | ||
| 1278 | struct btrfs_key key; | ||
| 1279 | struct btrfs_file_extent_item *fi; | ||
| 1280 | int i; | ||
| 1281 | int ret; | ||
| 1282 | int slot; | ||
| 1283 | int level; | ||
| 1284 | |||
| 1285 | BUG_ON(start_slot < 0); | ||
| 1286 | BUG_ON(start_slot + nr > btrfs_header_nritems(buf)); | ||
| 1287 | |||
| 1288 | ref_root = btrfs_header_owner(buf); | ||
| 1289 | ref_generation = btrfs_header_generation(buf); | ||
| 1290 | orig_root = btrfs_header_owner(orig_buf); | ||
| 1291 | orig_generation = btrfs_header_generation(orig_buf); | ||
| 1292 | level = btrfs_header_level(buf); | ||
| 1293 | |||
| 1294 | if (!root->ref_cows) { | ||
| 1295 | if (level == 0 && | ||
| 1296 | root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) | ||
| 1297 | return 0; | ||
| 1298 | if (level != 0 && | ||
| 1299 | root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) | ||
| 1300 | return 0; | ||
| 1301 | } | ||
| 1302 | |||
| 1303 | for (i = 0, slot = start_slot; i < nr; i++, slot++) { | ||
| 1304 | cond_resched(); | ||
| 1305 | if (level == 0) { | ||
| 1306 | btrfs_item_key_to_cpu(buf, &key, slot); | ||
| 1307 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
| 1308 | continue; | ||
| 1309 | fi = btrfs_item_ptr(buf, slot, | ||
| 1310 | struct btrfs_file_extent_item); | ||
| 1311 | if (btrfs_file_extent_type(buf, fi) == | ||
| 1312 | BTRFS_FILE_EXTENT_INLINE) | ||
| 1313 | continue; | ||
| 1314 | bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
| 1315 | if (bytenr == 0) | ||
| 1316 | continue; | ||
| 1317 | maybe_lock_mutex(root); | ||
| 1318 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | ||
| 1319 | orig_buf->start, buf->start, | ||
| 1320 | orig_root, ref_root, | ||
| 1321 | orig_generation, ref_generation, | ||
| 1322 | key.objectid); | ||
| 1323 | maybe_unlock_mutex(root); | ||
| 1324 | if (ret) | ||
| 1325 | goto fail; | ||
| 1326 | } else { | ||
| 1327 | bytenr = btrfs_node_blockptr(buf, slot); | ||
| 1328 | maybe_lock_mutex(root); | ||
| 1329 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | ||
| 1330 | orig_buf->start, buf->start, | ||
| 1331 | orig_root, ref_root, | ||
| 1332 | orig_generation, ref_generation, | ||
| 1333 | level - 1); | ||
| 1334 | maybe_unlock_mutex(root); | ||
| 1335 | if (ret) | ||
| 1336 | goto fail; | ||
| 1337 | } | ||
| 1338 | } | ||
| 1339 | return 0; | ||
| 1340 | fail: | ||
| 1341 | WARN_ON(1); | ||
| 1342 | return -1; | ||
| 1343 | } | ||
| 1344 | |||
| 1345 | static int write_one_cache_group(struct btrfs_trans_handle *trans, | ||
| 1346 | struct btrfs_root *root, | ||
| 1347 | struct btrfs_path *path, | ||
| 1348 | struct btrfs_block_group_cache *cache) | ||
| 1349 | { | ||
| 1350 | int ret; | ||
| 1351 | int pending_ret; | ||
| 1352 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
| 1353 | unsigned long bi; | ||
| 1354 | struct extent_buffer *leaf; | ||
| 1355 | |||
| 1356 | ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); | ||
| 1357 | if (ret < 0) | ||
| 1358 | goto fail; | ||
| 1359 | BUG_ON(ret); | ||
| 1360 | |||
| 1361 | leaf = path->nodes[0]; | ||
| 1362 | bi = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 1363 | write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); | ||
| 1364 | btrfs_mark_buffer_dirty(leaf); | ||
| 1365 | btrfs_release_path(extent_root, path); | ||
| 1366 | fail: | ||
| 1367 | finish_current_insert(trans, extent_root); | ||
| 1368 | pending_ret = del_pending_extents(trans, extent_root); | ||
| 1369 | if (ret) | ||
| 1370 | return ret; | ||
| 1371 | if (pending_ret) | ||
| 1372 | return pending_ret; | ||
| 1373 | return 0; | ||
| 1374 | |||
| 1375 | } | ||
| 1376 | |||
| 1377 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | ||
| 1378 | struct btrfs_root *root) | ||
| 1379 | { | ||
| 1380 | struct btrfs_block_group_cache *cache, *entry; | ||
| 1381 | struct rb_node *n; | ||
| 1382 | int err = 0; | ||
| 1383 | int werr = 0; | ||
| 1384 | struct btrfs_path *path; | ||
| 1385 | u64 last = 0; | ||
| 1386 | |||
| 1387 | path = btrfs_alloc_path(); | ||
| 1388 | if (!path) | ||
| 1389 | return -ENOMEM; | ||
| 1390 | |||
| 1391 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 1392 | while(1) { | ||
| 1393 | cache = NULL; | ||
| 1394 | spin_lock(&root->fs_info->block_group_cache_lock); | ||
| 1395 | for (n = rb_first(&root->fs_info->block_group_cache_tree); | ||
| 1396 | n; n = rb_next(n)) { | ||
| 1397 | entry = rb_entry(n, struct btrfs_block_group_cache, | ||
| 1398 | cache_node); | ||
| 1399 | if (entry->dirty) { | ||
| 1400 | cache = entry; | ||
| 1401 | break; | ||
| 1402 | } | ||
| 1403 | } | ||
| 1404 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
| 1405 | |||
| 1406 | if (!cache) | ||
| 1407 | break; | ||
| 1408 | |||
| 1409 | cache->dirty = 0; | ||
| 1410 | last += cache->key.offset; | ||
| 1411 | |||
| 1412 | err = write_one_cache_group(trans, root, | ||
| 1413 | path, cache); | ||
| 1414 | /* | ||
| 1415 | * if we fail to write the cache group, we want | ||
| 1416 | * to keep it marked dirty in hopes that a later | ||
| 1417 | * write will work | ||
| 1418 | */ | ||
| 1419 | if (err) { | ||
| 1420 | werr = err; | ||
| 1421 | continue; | ||
| 1422 | } | ||
| 1423 | } | ||
| 1424 | btrfs_free_path(path); | ||
| 1425 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 1426 | return werr; | ||
| 1427 | } | ||
| 1428 | |||
| 1429 | static int update_space_info(struct btrfs_fs_info *info, u64 flags, | ||
| 1430 | u64 total_bytes, u64 bytes_used, | ||
| 1431 | struct btrfs_space_info **space_info) | ||
| 1432 | { | ||
| 1433 | struct btrfs_space_info *found; | ||
| 1434 | |||
| 1435 | found = __find_space_info(info, flags); | ||
| 1436 | if (found) { | ||
| 1437 | found->total_bytes += total_bytes; | ||
| 1438 | found->bytes_used += bytes_used; | ||
| 1439 | found->full = 0; | ||
| 1440 | *space_info = found; | ||
| 1441 | return 0; | ||
| 1442 | } | ||
| 1443 | found = kmalloc(sizeof(*found), GFP_NOFS); | ||
| 1444 | if (!found) | ||
| 1445 | return -ENOMEM; | ||
| 1446 | |||
| 1447 | list_add(&found->list, &info->space_info); | ||
| 1448 | INIT_LIST_HEAD(&found->block_groups); | ||
| 1449 | spin_lock_init(&found->lock); | ||
| 1450 | found->flags = flags; | ||
| 1451 | found->total_bytes = total_bytes; | ||
| 1452 | found->bytes_used = bytes_used; | ||
| 1453 | found->bytes_pinned = 0; | ||
| 1454 | found->bytes_reserved = 0; | ||
| 1455 | found->full = 0; | ||
| 1456 | found->force_alloc = 0; | ||
| 1457 | *space_info = found; | ||
| 1458 | return 0; | ||
| 1459 | } | ||
| 1460 | |||
| 1461 | static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | ||
| 1462 | { | ||
| 1463 | u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | | ||
| 1464 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 1465 | BTRFS_BLOCK_GROUP_RAID10 | | ||
| 1466 | BTRFS_BLOCK_GROUP_DUP); | ||
| 1467 | if (extra_flags) { | ||
| 1468 | if (flags & BTRFS_BLOCK_GROUP_DATA) | ||
| 1469 | fs_info->avail_data_alloc_bits |= extra_flags; | ||
| 1470 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | ||
| 1471 | fs_info->avail_metadata_alloc_bits |= extra_flags; | ||
| 1472 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
| 1473 | fs_info->avail_system_alloc_bits |= extra_flags; | ||
| 1474 | } | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) | ||
| 1478 | { | ||
| 1479 | u64 num_devices = root->fs_info->fs_devices->num_devices; | ||
| 1480 | |||
| 1481 | if (num_devices == 1) | ||
| 1482 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | ||
| 1483 | if (num_devices < 4) | ||
| 1484 | flags &= ~BTRFS_BLOCK_GROUP_RAID10; | ||
| 1485 | |||
| 1486 | if ((flags & BTRFS_BLOCK_GROUP_DUP) && | ||
| 1487 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 1488 | BTRFS_BLOCK_GROUP_RAID10))) { | ||
| 1489 | flags &= ~BTRFS_BLOCK_GROUP_DUP; | ||
| 1490 | } | ||
| 1491 | |||
| 1492 | if ((flags & BTRFS_BLOCK_GROUP_RAID1) && | ||
| 1493 | (flags & BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 1494 | flags &= ~BTRFS_BLOCK_GROUP_RAID1; | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | if ((flags & BTRFS_BLOCK_GROUP_RAID0) && | ||
| 1498 | ((flags & BTRFS_BLOCK_GROUP_RAID1) | | ||
| 1499 | (flags & BTRFS_BLOCK_GROUP_RAID10) | | ||
| 1500 | (flags & BTRFS_BLOCK_GROUP_DUP))) | ||
| 1501 | flags &= ~BTRFS_BLOCK_GROUP_RAID0; | ||
| 1502 | return flags; | ||
| 1503 | } | ||
| 1504 | |||
| 1505 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | ||
| 1506 | struct btrfs_root *extent_root, u64 alloc_bytes, | ||
| 1507 | u64 flags, int force) | ||
| 1508 | { | ||
| 1509 | struct btrfs_space_info *space_info; | ||
| 1510 | u64 thresh; | ||
| 1511 | u64 start; | ||
| 1512 | u64 num_bytes; | ||
| 1513 | int ret = 0, waited = 0; | ||
| 1514 | |||
| 1515 | flags = reduce_alloc_profile(extent_root, flags); | ||
| 1516 | |||
| 1517 | space_info = __find_space_info(extent_root->fs_info, flags); | ||
| 1518 | if (!space_info) { | ||
| 1519 | ret = update_space_info(extent_root->fs_info, flags, | ||
| 1520 | 0, 0, &space_info); | ||
| 1521 | BUG_ON(ret); | ||
| 1522 | } | ||
| 1523 | BUG_ON(!space_info); | ||
| 1524 | |||
| 1525 | if (space_info->force_alloc) { | ||
| 1526 | force = 1; | ||
| 1527 | space_info->force_alloc = 0; | ||
| 1528 | } | ||
| 1529 | if (space_info->full) | ||
| 1530 | goto out; | ||
| 1531 | |||
| 1532 | thresh = div_factor(space_info->total_bytes, 6); | ||
| 1533 | if (!force && | ||
| 1534 | (space_info->bytes_used + space_info->bytes_pinned + | ||
| 1535 | space_info->bytes_reserved + alloc_bytes) < thresh) | ||
| 1536 | goto out; | ||
| 1537 | |||
| 1538 | while (!mutex_trylock(&extent_root->fs_info->chunk_mutex)) { | ||
| 1539 | if (!force) | ||
| 1540 | goto out; | ||
| 1541 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
| 1542 | cond_resched(); | ||
| 1543 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
| 1544 | waited = 1; | ||
| 1545 | } | ||
| 1546 | |||
| 1547 | if (waited && space_info->full) | ||
| 1548 | goto out_unlock; | ||
| 1549 | |||
| 1550 | ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); | ||
| 1551 | if (ret == -ENOSPC) { | ||
| 1552 | printk("space info full %Lu\n", flags); | ||
| 1553 | space_info->full = 1; | ||
| 1554 | goto out_unlock; | ||
| 1555 | } | ||
| 1556 | BUG_ON(ret); | ||
| 1557 | |||
| 1558 | ret = btrfs_make_block_group(trans, extent_root, 0, flags, | ||
| 1559 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); | ||
| 1560 | BUG_ON(ret); | ||
| 1561 | |||
| 1562 | out_unlock: | ||
| 1563 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | ||
| 1564 | out: | ||
| 1565 | return ret; | ||
| 1566 | } | ||
| 1567 | |||
| 1568 | static int update_block_group(struct btrfs_trans_handle *trans, | ||
| 1569 | struct btrfs_root *root, | ||
| 1570 | u64 bytenr, u64 num_bytes, int alloc, | ||
| 1571 | int mark_free) | ||
| 1572 | { | ||
| 1573 | struct btrfs_block_group_cache *cache; | ||
| 1574 | struct btrfs_fs_info *info = root->fs_info; | ||
| 1575 | u64 total = num_bytes; | ||
| 1576 | u64 old_val; | ||
| 1577 | u64 byte_in_group; | ||
| 1578 | |||
| 1579 | WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 1580 | while(total) { | ||
| 1581 | cache = btrfs_lookup_block_group(info, bytenr); | ||
| 1582 | if (!cache) { | ||
| 1583 | return -1; | ||
| 1584 | } | ||
| 1585 | byte_in_group = bytenr - cache->key.objectid; | ||
| 1586 | WARN_ON(byte_in_group > cache->key.offset); | ||
| 1587 | |||
| 1588 | spin_lock(&cache->lock); | ||
| 1589 | cache->dirty = 1; | ||
| 1590 | old_val = btrfs_block_group_used(&cache->item); | ||
| 1591 | num_bytes = min(total, cache->key.offset - byte_in_group); | ||
| 1592 | if (alloc) { | ||
| 1593 | old_val += num_bytes; | ||
| 1594 | cache->space_info->bytes_used += num_bytes; | ||
| 1595 | btrfs_set_block_group_used(&cache->item, old_val); | ||
| 1596 | spin_unlock(&cache->lock); | ||
| 1597 | } else { | ||
| 1598 | old_val -= num_bytes; | ||
| 1599 | cache->space_info->bytes_used -= num_bytes; | ||
| 1600 | btrfs_set_block_group_used(&cache->item, old_val); | ||
| 1601 | spin_unlock(&cache->lock); | ||
| 1602 | if (mark_free) { | ||
| 1603 | int ret; | ||
| 1604 | ret = btrfs_add_free_space(cache, bytenr, | ||
| 1605 | num_bytes); | ||
| 1606 | if (ret) | ||
| 1607 | return -1; | ||
| 1608 | } | ||
| 1609 | } | ||
| 1610 | total -= num_bytes; | ||
| 1611 | bytenr += num_bytes; | ||
| 1612 | } | ||
| 1613 | return 0; | ||
| 1614 | } | ||
| 1615 | |||
| 1616 | static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | ||
| 1617 | { | ||
| 1618 | struct btrfs_block_group_cache *cache; | ||
| 1619 | |||
| 1620 | cache = btrfs_lookup_first_block_group(root->fs_info, search_start); | ||
| 1621 | if (!cache) | ||
| 1622 | return 0; | ||
| 1623 | |||
| 1624 | return cache->key.objectid; | ||
| 1625 | } | ||
| 1626 | |||
| 1627 | int btrfs_update_pinned_extents(struct btrfs_root *root, | ||
| 1628 | u64 bytenr, u64 num, int pin) | ||
| 1629 | { | ||
| 1630 | u64 len; | ||
| 1631 | struct btrfs_block_group_cache *cache; | ||
| 1632 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 1633 | |||
| 1634 | WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 1635 | if (pin) { | ||
| 1636 | set_extent_dirty(&fs_info->pinned_extents, | ||
| 1637 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
| 1638 | } else { | ||
| 1639 | clear_extent_dirty(&fs_info->pinned_extents, | ||
| 1640 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
| 1641 | } | ||
| 1642 | while (num > 0) { | ||
| 1643 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
| 1644 | BUG_ON(!cache); | ||
| 1645 | len = min(num, cache->key.offset - | ||
| 1646 | (bytenr - cache->key.objectid)); | ||
| 1647 | if (pin) { | ||
| 1648 | spin_lock(&cache->lock); | ||
| 1649 | cache->pinned += len; | ||
| 1650 | cache->space_info->bytes_pinned += len; | ||
| 1651 | spin_unlock(&cache->lock); | ||
| 1652 | fs_info->total_pinned += len; | ||
| 1653 | } else { | ||
| 1654 | spin_lock(&cache->lock); | ||
| 1655 | cache->pinned -= len; | ||
| 1656 | cache->space_info->bytes_pinned -= len; | ||
| 1657 | spin_unlock(&cache->lock); | ||
| 1658 | fs_info->total_pinned -= len; | ||
| 1659 | } | ||
| 1660 | bytenr += len; | ||
| 1661 | num -= len; | ||
| 1662 | } | ||
| 1663 | return 0; | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | static int update_reserved_extents(struct btrfs_root *root, | ||
| 1667 | u64 bytenr, u64 num, int reserve) | ||
| 1668 | { | ||
| 1669 | u64 len; | ||
| 1670 | struct btrfs_block_group_cache *cache; | ||
| 1671 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 1672 | |||
| 1673 | WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 1674 | while (num > 0) { | ||
| 1675 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
| 1676 | BUG_ON(!cache); | ||
| 1677 | len = min(num, cache->key.offset - | ||
| 1678 | (bytenr - cache->key.objectid)); | ||
| 1679 | if (reserve) { | ||
| 1680 | spin_lock(&cache->lock); | ||
| 1681 | cache->reserved += len; | ||
| 1682 | cache->space_info->bytes_reserved += len; | ||
| 1683 | spin_unlock(&cache->lock); | ||
| 1684 | } else { | ||
| 1685 | spin_lock(&cache->lock); | ||
| 1686 | cache->reserved -= len; | ||
| 1687 | cache->space_info->bytes_reserved -= len; | ||
| 1688 | spin_unlock(&cache->lock); | ||
| 1689 | } | ||
| 1690 | bytenr += len; | ||
| 1691 | num -= len; | ||
| 1692 | } | ||
| 1693 | return 0; | ||
| 1694 | } | ||
| 1695 | |||
| 1696 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | ||
| 1697 | { | ||
| 1698 | u64 last = 0; | ||
| 1699 | u64 start; | ||
| 1700 | u64 end; | ||
| 1701 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | ||
| 1702 | int ret; | ||
| 1703 | |||
| 1704 | while(1) { | ||
| 1705 | ret = find_first_extent_bit(pinned_extents, last, | ||
| 1706 | &start, &end, EXTENT_DIRTY); | ||
| 1707 | if (ret) | ||
| 1708 | break; | ||
| 1709 | set_extent_dirty(copy, start, end, GFP_NOFS); | ||
| 1710 | last = end + 1; | ||
| 1711 | } | ||
| 1712 | return 0; | ||
| 1713 | } | ||
| 1714 | |||
| 1715 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | ||
| 1716 | struct btrfs_root *root, | ||
| 1717 | struct extent_io_tree *unpin) | ||
| 1718 | { | ||
| 1719 | u64 start; | ||
| 1720 | u64 end; | ||
| 1721 | int ret; | ||
| 1722 | struct btrfs_block_group_cache *cache; | ||
| 1723 | |||
| 1724 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 1725 | while(1) { | ||
| 1726 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
| 1727 | EXTENT_DIRTY); | ||
| 1728 | if (ret) | ||
| 1729 | break; | ||
| 1730 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | ||
| 1731 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
| 1732 | cache = btrfs_lookup_block_group(root->fs_info, start); | ||
| 1733 | if (cache->cached) | ||
| 1734 | btrfs_add_free_space(cache, start, end - start + 1); | ||
| 1735 | if (need_resched()) { | ||
| 1736 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 1737 | cond_resched(); | ||
| 1738 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 1739 | } | ||
| 1740 | } | ||
| 1741 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 1742 | return 0; | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | static int finish_current_insert(struct btrfs_trans_handle *trans, | ||
| 1746 | struct btrfs_root *extent_root) | ||
| 1747 | { | ||
| 1748 | u64 start; | ||
| 1749 | u64 end; | ||
| 1750 | u64 priv; | ||
| 1751 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
| 1752 | struct btrfs_path *path; | ||
| 1753 | struct btrfs_extent_ref *ref; | ||
| 1754 | struct pending_extent_op *extent_op; | ||
| 1755 | struct btrfs_key key; | ||
| 1756 | struct btrfs_extent_item extent_item; | ||
| 1757 | int ret; | ||
| 1758 | int err = 0; | ||
| 1759 | |||
| 1760 | WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); | ||
| 1761 | btrfs_set_stack_extent_refs(&extent_item, 1); | ||
| 1762 | path = btrfs_alloc_path(); | ||
| 1763 | |||
| 1764 | while(1) { | ||
| 1765 | ret = find_first_extent_bit(&info->extent_ins, 0, &start, | ||
| 1766 | &end, EXTENT_LOCKED); | ||
| 1767 | if (ret) | ||
| 1768 | break; | ||
| 1769 | |||
| 1770 | ret = get_state_private(&info->extent_ins, start, &priv); | ||
| 1771 | BUG_ON(ret); | ||
| 1772 | extent_op = (struct pending_extent_op *)(unsigned long)priv; | ||
| 1773 | |||
| 1774 | if (extent_op->type == PENDING_EXTENT_INSERT) { | ||
| 1775 | key.objectid = start; | ||
| 1776 | key.offset = end + 1 - start; | ||
| 1777 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 1778 | err = btrfs_insert_item(trans, extent_root, &key, | ||
| 1779 | &extent_item, sizeof(extent_item)); | ||
| 1780 | BUG_ON(err); | ||
| 1781 | |||
| 1782 | clear_extent_bits(&info->extent_ins, start, end, | ||
| 1783 | EXTENT_LOCKED, GFP_NOFS); | ||
| 1784 | |||
| 1785 | err = insert_extent_backref(trans, extent_root, path, | ||
| 1786 | start, extent_op->parent, | ||
| 1787 | extent_root->root_key.objectid, | ||
| 1788 | extent_op->generation, | ||
| 1789 | extent_op->level); | ||
| 1790 | BUG_ON(err); | ||
| 1791 | } else if (extent_op->type == PENDING_BACKREF_UPDATE) { | ||
| 1792 | err = lookup_extent_backref(trans, extent_root, path, | ||
| 1793 | start, extent_op->orig_parent, | ||
| 1794 | extent_root->root_key.objectid, | ||
| 1795 | extent_op->orig_generation, | ||
| 1796 | extent_op->level, 0); | ||
| 1797 | BUG_ON(err); | ||
| 1798 | |||
| 1799 | clear_extent_bits(&info->extent_ins, start, end, | ||
| 1800 | EXTENT_LOCKED, GFP_NOFS); | ||
| 1801 | |||
| 1802 | key.objectid = start; | ||
| 1803 | key.offset = extent_op->parent; | ||
| 1804 | key.type = BTRFS_EXTENT_REF_KEY; | ||
| 1805 | err = btrfs_set_item_key_safe(trans, extent_root, path, | ||
| 1806 | &key); | ||
| 1807 | BUG_ON(err); | ||
| 1808 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 1809 | struct btrfs_extent_ref); | ||
| 1810 | btrfs_set_ref_generation(path->nodes[0], ref, | ||
| 1811 | extent_op->generation); | ||
| 1812 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 1813 | btrfs_release_path(extent_root, path); | ||
| 1814 | } else { | ||
| 1815 | BUG_ON(1); | ||
| 1816 | } | ||
| 1817 | kfree(extent_op); | ||
| 1818 | |||
| 1819 | if (need_resched()) { | ||
| 1820 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
| 1821 | cond_resched(); | ||
| 1822 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
| 1823 | } | ||
| 1824 | } | ||
| 1825 | btrfs_free_path(path); | ||
| 1826 | return 0; | ||
| 1827 | } | ||
| 1828 | |||
| 1829 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
| 1830 | struct btrfs_root *root, | ||
| 1831 | u64 bytenr, u64 num_bytes, int is_data) | ||
| 1832 | { | ||
| 1833 | int err = 0; | ||
| 1834 | struct extent_buffer *buf; | ||
| 1835 | |||
| 1836 | WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 1837 | if (is_data) | ||
| 1838 | goto pinit; | ||
| 1839 | |||
| 1840 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | ||
| 1841 | if (!buf) | ||
| 1842 | goto pinit; | ||
| 1843 | |||
| 1844 | /* we can reuse a block if it hasn't been written | ||
| 1845 | * and it is from this transaction. We can't | ||
| 1846 | * reuse anything from the tree log root because | ||
| 1847 | * it has tiny sub-transactions. | ||
| 1848 | */ | ||
| 1849 | if (btrfs_buffer_uptodate(buf, 0) && | ||
| 1850 | btrfs_try_tree_lock(buf)) { | ||
| 1851 | u64 header_owner = btrfs_header_owner(buf); | ||
| 1852 | u64 header_transid = btrfs_header_generation(buf); | ||
| 1853 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | ||
| 1854 | header_owner != BTRFS_TREE_RELOC_OBJECTID && | ||
| 1855 | header_transid == trans->transid && | ||
| 1856 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
| 1857 | clean_tree_block(NULL, root, buf); | ||
| 1858 | btrfs_tree_unlock(buf); | ||
| 1859 | free_extent_buffer(buf); | ||
| 1860 | return 1; | ||
| 1861 | } | ||
| 1862 | btrfs_tree_unlock(buf); | ||
| 1863 | } | ||
| 1864 | free_extent_buffer(buf); | ||
| 1865 | pinit: | ||
| 1866 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | ||
| 1867 | |||
| 1868 | BUG_ON(err < 0); | ||
| 1869 | return 0; | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | /* | ||
| 1873 | * remove an extent from the root, returns 0 on success | ||
| 1874 | */ | ||
| 1875 | static int __free_extent(struct btrfs_trans_handle *trans, | ||
| 1876 | struct btrfs_root *root, | ||
| 1877 | u64 bytenr, u64 num_bytes, u64 parent, | ||
| 1878 | u64 root_objectid, u64 ref_generation, | ||
| 1879 | u64 owner_objectid, int pin, int mark_free) | ||
| 1880 | { | ||
| 1881 | struct btrfs_path *path; | ||
| 1882 | struct btrfs_key key; | ||
| 1883 | struct btrfs_fs_info *info = root->fs_info; | ||
| 1884 | struct btrfs_root *extent_root = info->extent_root; | ||
| 1885 | struct extent_buffer *leaf; | ||
| 1886 | int ret; | ||
| 1887 | int extent_slot = 0; | ||
| 1888 | int found_extent = 0; | ||
| 1889 | int num_to_del = 1; | ||
| 1890 | struct btrfs_extent_item *ei; | ||
| 1891 | u32 refs; | ||
| 1892 | |||
| 1893 | WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 1894 | key.objectid = bytenr; | ||
| 1895 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | ||
| 1896 | key.offset = num_bytes; | ||
| 1897 | path = btrfs_alloc_path(); | ||
| 1898 | if (!path) | ||
| 1899 | return -ENOMEM; | ||
| 1900 | |||
| 1901 | path->reada = 1; | ||
| 1902 | ret = lookup_extent_backref(trans, extent_root, path, | ||
| 1903 | bytenr, parent, root_objectid, | ||
| 1904 | ref_generation, owner_objectid, 1); | ||
| 1905 | if (ret == 0) { | ||
| 1906 | struct btrfs_key found_key; | ||
| 1907 | extent_slot = path->slots[0]; | ||
| 1908 | while(extent_slot > 0) { | ||
| 1909 | extent_slot--; | ||
| 1910 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
| 1911 | extent_slot); | ||
| 1912 | if (found_key.objectid != bytenr) | ||
| 1913 | break; | ||
| 1914 | if (found_key.type == BTRFS_EXTENT_ITEM_KEY && | ||
| 1915 | found_key.offset == num_bytes) { | ||
| 1916 | found_extent = 1; | ||
| 1917 | break; | ||
| 1918 | } | ||
| 1919 | if (path->slots[0] - extent_slot > 5) | ||
| 1920 | break; | ||
| 1921 | } | ||
| 1922 | if (!found_extent) { | ||
| 1923 | ret = remove_extent_backref(trans, extent_root, path); | ||
| 1924 | BUG_ON(ret); | ||
| 1925 | btrfs_release_path(extent_root, path); | ||
| 1926 | ret = btrfs_search_slot(trans, extent_root, | ||
| 1927 | &key, path, -1, 1); | ||
| 1928 | BUG_ON(ret); | ||
| 1929 | extent_slot = path->slots[0]; | ||
| 1930 | } | ||
| 1931 | } else { | ||
| 1932 | btrfs_print_leaf(extent_root, path->nodes[0]); | ||
| 1933 | WARN_ON(1); | ||
| 1934 | printk("Unable to find ref byte nr %Lu root %Lu " | ||
| 1935 | "gen %Lu owner %Lu\n", bytenr, | ||
| 1936 | root_objectid, ref_generation, owner_objectid); | ||
| 1937 | } | ||
| 1938 | |||
| 1939 | leaf = path->nodes[0]; | ||
| 1940 | ei = btrfs_item_ptr(leaf, extent_slot, | ||
| 1941 | struct btrfs_extent_item); | ||
| 1942 | refs = btrfs_extent_refs(leaf, ei); | ||
| 1943 | BUG_ON(refs == 0); | ||
| 1944 | refs -= 1; | ||
| 1945 | btrfs_set_extent_refs(leaf, ei, refs); | ||
| 1946 | |||
| 1947 | btrfs_mark_buffer_dirty(leaf); | ||
| 1948 | |||
| 1949 | if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { | ||
| 1950 | struct btrfs_extent_ref *ref; | ||
| 1951 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
| 1952 | struct btrfs_extent_ref); | ||
| 1953 | BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); | ||
| 1954 | /* if the back ref and the extent are next to each other | ||
| 1955 | * they get deleted below in one shot | ||
| 1956 | */ | ||
| 1957 | path->slots[0] = extent_slot; | ||
| 1958 | num_to_del = 2; | ||
| 1959 | } else if (found_extent) { | ||
| 1960 | /* otherwise delete the extent back ref */ | ||
| 1961 | ret = remove_extent_backref(trans, extent_root, path); | ||
| 1962 | BUG_ON(ret); | ||
| 1963 | /* if refs are 0, we need to setup the path for deletion */ | ||
| 1964 | if (refs == 0) { | ||
| 1965 | btrfs_release_path(extent_root, path); | ||
| 1966 | ret = btrfs_search_slot(trans, extent_root, &key, path, | ||
| 1967 | -1, 1); | ||
| 1968 | BUG_ON(ret); | ||
| 1969 | } | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | if (refs == 0) { | ||
| 1973 | u64 super_used; | ||
| 1974 | u64 root_used; | ||
| 1975 | #ifdef BIO_RW_DISCARD | ||
| 1976 | u64 map_length = num_bytes; | ||
| 1977 | struct btrfs_multi_bio *multi = NULL; | ||
| 1978 | #endif | ||
| 1979 | |||
| 1980 | if (pin) { | ||
| 1981 | ret = pin_down_bytes(trans, root, bytenr, num_bytes, | ||
| 1982 | owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); | ||
| 1983 | if (ret > 0) | ||
| 1984 | mark_free = 1; | ||
| 1985 | BUG_ON(ret < 0); | ||
| 1986 | } | ||
| 1987 | |||
| 1988 | /* block accounting for super block */ | ||
| 1989 | spin_lock_irq(&info->delalloc_lock); | ||
| 1990 | super_used = btrfs_super_bytes_used(&info->super_copy); | ||
| 1991 | btrfs_set_super_bytes_used(&info->super_copy, | ||
| 1992 | super_used - num_bytes); | ||
| 1993 | spin_unlock_irq(&info->delalloc_lock); | ||
| 1994 | |||
| 1995 | /* block accounting for root item */ | ||
| 1996 | root_used = btrfs_root_used(&root->root_item); | ||
| 1997 | btrfs_set_root_used(&root->root_item, | ||
| 1998 | root_used - num_bytes); | ||
| 1999 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | ||
| 2000 | num_to_del); | ||
| 2001 | BUG_ON(ret); | ||
| 2002 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | ||
| 2003 | mark_free); | ||
| 2004 | BUG_ON(ret); | ||
| 2005 | |||
| 2006 | #ifdef BIO_RW_DISCARD | ||
| 2007 | /* Tell the block device(s) that the sectors can be discarded */ | ||
| 2008 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | ||
| 2009 | bytenr, &map_length, &multi, 0); | ||
| 2010 | if (!ret) { | ||
| 2011 | struct btrfs_bio_stripe *stripe = multi->stripes; | ||
| 2012 | int i; | ||
| 2013 | |||
| 2014 | if (map_length > num_bytes) | ||
| 2015 | map_length = num_bytes; | ||
| 2016 | |||
| 2017 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | ||
| 2018 | blkdev_issue_discard(stripe->dev->bdev, | ||
| 2019 | stripe->physical >> 9, | ||
| 2020 | map_length >> 9); | ||
| 2021 | } | ||
| 2022 | kfree(multi); | ||
| 2023 | } | ||
| 2024 | #endif | ||
| 2025 | } | ||
| 2026 | btrfs_free_path(path); | ||
| 2027 | finish_current_insert(trans, extent_root); | ||
| 2028 | return ret; | ||
| 2029 | } | ||
| 2030 | |||
| 2031 | /* | ||
| 2032 | * find all the blocks marked as pending in the radix tree and remove | ||
| 2033 | * them from the extent map | ||
| 2034 | */ | ||
| 2035 | static int del_pending_extents(struct btrfs_trans_handle *trans, struct | ||
| 2036 | btrfs_root *extent_root) | ||
| 2037 | { | ||
| 2038 | int ret; | ||
| 2039 | int err = 0; | ||
| 2040 | int mark_free = 0; | ||
| 2041 | u64 start; | ||
| 2042 | u64 end; | ||
| 2043 | u64 priv; | ||
| 2044 | struct extent_io_tree *pending_del; | ||
| 2045 | struct extent_io_tree *extent_ins; | ||
| 2046 | struct pending_extent_op *extent_op; | ||
| 2047 | |||
| 2048 | WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); | ||
| 2049 | extent_ins = &extent_root->fs_info->extent_ins; | ||
| 2050 | pending_del = &extent_root->fs_info->pending_del; | ||
| 2051 | |||
| 2052 | while(1) { | ||
| 2053 | ret = find_first_extent_bit(pending_del, 0, &start, &end, | ||
| 2054 | EXTENT_LOCKED); | ||
| 2055 | if (ret) | ||
| 2056 | break; | ||
| 2057 | |||
| 2058 | ret = get_state_private(pending_del, start, &priv); | ||
| 2059 | BUG_ON(ret); | ||
| 2060 | extent_op = (struct pending_extent_op *)(unsigned long)priv; | ||
| 2061 | |||
| 2062 | clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, | ||
| 2063 | GFP_NOFS); | ||
| 2064 | |||
| 2065 | ret = pin_down_bytes(trans, extent_root, start, | ||
| 2066 | end + 1 - start, 0); | ||
| 2067 | mark_free = ret > 0; | ||
| 2068 | if (!test_range_bit(extent_ins, start, end, | ||
| 2069 | EXTENT_LOCKED, 0)) { | ||
| 2070 | free_extent: | ||
| 2071 | ret = __free_extent(trans, extent_root, | ||
| 2072 | start, end + 1 - start, | ||
| 2073 | extent_op->orig_parent, | ||
| 2074 | extent_root->root_key.objectid, | ||
| 2075 | extent_op->orig_generation, | ||
| 2076 | extent_op->level, 0, mark_free); | ||
| 2077 | kfree(extent_op); | ||
| 2078 | } else { | ||
| 2079 | kfree(extent_op); | ||
| 2080 | ret = get_state_private(extent_ins, start, &priv); | ||
| 2081 | BUG_ON(ret); | ||
| 2082 | extent_op = (struct pending_extent_op *) | ||
| 2083 | (unsigned long)priv; | ||
| 2084 | |||
| 2085 | clear_extent_bits(extent_ins, start, end, | ||
| 2086 | EXTENT_LOCKED, GFP_NOFS); | ||
| 2087 | |||
| 2088 | if (extent_op->type == PENDING_BACKREF_UPDATE) | ||
| 2089 | goto free_extent; | ||
| 2090 | |||
| 2091 | ret = update_block_group(trans, extent_root, start, | ||
| 2092 | end + 1 - start, 0, mark_free); | ||
| 2093 | BUG_ON(ret); | ||
| 2094 | kfree(extent_op); | ||
| 2095 | } | ||
| 2096 | if (ret) | ||
| 2097 | err = ret; | ||
| 2098 | |||
| 2099 | if (need_resched()) { | ||
| 2100 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
| 2101 | cond_resched(); | ||
| 2102 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
| 2103 | } | ||
| 2104 | } | ||
| 2105 | return err; | ||
| 2106 | } | ||
| 2107 | |||
| 2108 | /* | ||
| 2109 | * remove an extent from the root, returns 0 on success | ||
| 2110 | */ | ||
| 2111 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | ||
| 2112 | struct btrfs_root *root, | ||
| 2113 | u64 bytenr, u64 num_bytes, u64 parent, | ||
| 2114 | u64 root_objectid, u64 ref_generation, | ||
| 2115 | u64 owner_objectid, int pin) | ||
| 2116 | { | ||
| 2117 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
| 2118 | int pending_ret; | ||
| 2119 | int ret; | ||
| 2120 | |||
| 2121 | WARN_ON(num_bytes < root->sectorsize); | ||
| 2122 | if (root == extent_root) { | ||
| 2123 | struct pending_extent_op *extent_op; | ||
| 2124 | |||
| 2125 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
| 2126 | BUG_ON(!extent_op); | ||
| 2127 | |||
| 2128 | extent_op->type = PENDING_EXTENT_DELETE; | ||
| 2129 | extent_op->bytenr = bytenr; | ||
| 2130 | extent_op->num_bytes = num_bytes; | ||
| 2131 | extent_op->parent = parent; | ||
| 2132 | extent_op->orig_parent = parent; | ||
| 2133 | extent_op->generation = ref_generation; | ||
| 2134 | extent_op->orig_generation = ref_generation; | ||
| 2135 | extent_op->level = (int)owner_objectid; | ||
| 2136 | |||
| 2137 | set_extent_bits(&root->fs_info->pending_del, | ||
| 2138 | bytenr, bytenr + num_bytes - 1, | ||
| 2139 | EXTENT_LOCKED, GFP_NOFS); | ||
| 2140 | set_state_private(&root->fs_info->pending_del, | ||
| 2141 | bytenr, (unsigned long)extent_op); | ||
| 2142 | return 0; | ||
| 2143 | } | ||
| 2144 | /* if metadata always pin */ | ||
| 2145 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
| 2146 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | ||
| 2147 | struct btrfs_block_group_cache *cache; | ||
| 2148 | |||
| 2149 | /* btrfs_free_reserved_extent */ | ||
| 2150 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
| 2151 | BUG_ON(!cache); | ||
| 2152 | btrfs_add_free_space(cache, bytenr, num_bytes); | ||
| 2153 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
| 2154 | return 0; | ||
| 2155 | } | ||
| 2156 | pin = 1; | ||
| 2157 | } | ||
| 2158 | |||
| 2159 | /* if data pin when any transaction has committed this */ | ||
| 2160 | if (ref_generation != trans->transid) | ||
| 2161 | pin = 1; | ||
| 2162 | |||
| 2163 | ret = __free_extent(trans, root, bytenr, num_bytes, parent, | ||
| 2164 | root_objectid, ref_generation, | ||
| 2165 | owner_objectid, pin, pin == 0); | ||
| 2166 | |||
| 2167 | finish_current_insert(trans, root->fs_info->extent_root); | ||
| 2168 | pending_ret = del_pending_extents(trans, root->fs_info->extent_root); | ||
| 2169 | return ret ? ret : pending_ret; | ||
| 2170 | } | ||
| 2171 | |||
| 2172 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | ||
| 2173 | struct btrfs_root *root, | ||
| 2174 | u64 bytenr, u64 num_bytes, u64 parent, | ||
| 2175 | u64 root_objectid, u64 ref_generation, | ||
| 2176 | u64 owner_objectid, int pin) | ||
| 2177 | { | ||
| 2178 | int ret; | ||
| 2179 | |||
| 2180 | maybe_lock_mutex(root); | ||
| 2181 | ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, | ||
| 2182 | root_objectid, ref_generation, | ||
| 2183 | owner_objectid, pin); | ||
| 2184 | maybe_unlock_mutex(root); | ||
| 2185 | return ret; | ||
| 2186 | } | ||
| 2187 | |||
| 2188 | static u64 stripe_align(struct btrfs_root *root, u64 val) | ||
| 2189 | { | ||
| 2190 | u64 mask = ((u64)root->stripesize - 1); | ||
| 2191 | u64 ret = (val + mask) & ~mask; | ||
| 2192 | return ret; | ||
| 2193 | } | ||
| 2194 | |||
| 2195 | /* | ||
| 2196 | * walks the btree of allocated extents and find a hole of a given size. | ||
| 2197 | * The key ins is changed to record the hole: | ||
| 2198 | * ins->objectid == block start | ||
| 2199 | * ins->flags = BTRFS_EXTENT_ITEM_KEY | ||
| 2200 | * ins->offset == number of blocks | ||
| 2201 | * Any available blocks before search_start are skipped. | ||
| 2202 | */ | ||
| 2203 | static int noinline find_free_extent(struct btrfs_trans_handle *trans, | ||
| 2204 | struct btrfs_root *orig_root, | ||
| 2205 | u64 num_bytes, u64 empty_size, | ||
| 2206 | u64 search_start, u64 search_end, | ||
| 2207 | u64 hint_byte, struct btrfs_key *ins, | ||
| 2208 | u64 exclude_start, u64 exclude_nr, | ||
| 2209 | int data) | ||
| 2210 | { | ||
| 2211 | int ret; | ||
| 2212 | u64 orig_search_start; | ||
| 2213 | struct btrfs_root * root = orig_root->fs_info->extent_root; | ||
| 2214 | struct btrfs_fs_info *info = root->fs_info; | ||
| 2215 | u64 total_needed = num_bytes; | ||
| 2216 | u64 *last_ptr = NULL; | ||
| 2217 | struct btrfs_block_group_cache *block_group; | ||
| 2218 | int chunk_alloc_done = 0; | ||
| 2219 | int empty_cluster = 2 * 1024 * 1024; | ||
| 2220 | int allowed_chunk_alloc = 0; | ||
| 2221 | |||
| 2222 | WARN_ON(num_bytes < root->sectorsize); | ||
| 2223 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | ||
| 2224 | |||
| 2225 | if (orig_root->ref_cows || empty_size) | ||
| 2226 | allowed_chunk_alloc = 1; | ||
| 2227 | |||
| 2228 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | ||
| 2229 | last_ptr = &root->fs_info->last_alloc; | ||
| 2230 | empty_cluster = 256 * 1024; | ||
| 2231 | } | ||
| 2232 | |||
| 2233 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) | ||
| 2234 | last_ptr = &root->fs_info->last_data_alloc; | ||
| 2235 | |||
| 2236 | if (last_ptr) { | ||
| 2237 | if (*last_ptr) | ||
| 2238 | hint_byte = *last_ptr; | ||
| 2239 | else | ||
| 2240 | empty_size += empty_cluster; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | search_start = max(search_start, first_logical_byte(root, 0)); | ||
| 2244 | orig_search_start = search_start; | ||
| 2245 | |||
| 2246 | search_start = max(search_start, hint_byte); | ||
| 2247 | total_needed += empty_size; | ||
| 2248 | |||
| 2249 | new_group: | ||
| 2250 | block_group = btrfs_lookup_block_group(info, search_start); | ||
| 2251 | if (!block_group) | ||
| 2252 | block_group = btrfs_lookup_first_block_group(info, | ||
| 2253 | search_start); | ||
| 2254 | |||
| 2255 | /* | ||
| 2256 | * Ok this looks a little tricky, buts its really simple. First if we | ||
| 2257 | * didn't find a block group obviously we want to start over. | ||
| 2258 | * Secondly, if the block group we found does not match the type we | ||
| 2259 | * need, and we have a last_ptr and its not 0, chances are the last | ||
| 2260 | * allocation we made was at the end of the block group, so lets go | ||
| 2261 | * ahead and skip the looking through the rest of the block groups and | ||
| 2262 | * start at the beginning. This helps with metadata allocations, | ||
| 2263 | * since you are likely to have a bunch of data block groups to search | ||
| 2264 | * through first before you realize that you need to start over, so go | ||
| 2265 | * ahead and start over and save the time. | ||
| 2266 | */ | ||
| 2267 | if (!block_group || (!block_group_bits(block_group, data) && | ||
| 2268 | last_ptr && *last_ptr)) { | ||
| 2269 | if (search_start != orig_search_start) { | ||
| 2270 | if (last_ptr && *last_ptr) { | ||
| 2271 | total_needed += empty_cluster; | ||
| 2272 | *last_ptr = 0; | ||
| 2273 | } | ||
| 2274 | search_start = orig_search_start; | ||
| 2275 | goto new_group; | ||
| 2276 | } else if (!chunk_alloc_done && allowed_chunk_alloc) { | ||
| 2277 | ret = do_chunk_alloc(trans, root, | ||
| 2278 | num_bytes + 2 * 1024 * 1024, | ||
| 2279 | data, 1); | ||
| 2280 | if (ret < 0) | ||
| 2281 | goto error; | ||
| 2282 | BUG_ON(ret); | ||
| 2283 | chunk_alloc_done = 1; | ||
| 2284 | search_start = orig_search_start; | ||
| 2285 | goto new_group; | ||
| 2286 | } else { | ||
| 2287 | ret = -ENOSPC; | ||
| 2288 | goto error; | ||
| 2289 | } | ||
| 2290 | } | ||
| 2291 | |||
| 2292 | /* | ||
| 2293 | * this is going to seach through all of the existing block groups it | ||
| 2294 | * can find, so if we don't find something we need to see if we can | ||
| 2295 | * allocate what we need. | ||
| 2296 | */ | ||
| 2297 | ret = find_free_space(root, &block_group, &search_start, | ||
| 2298 | total_needed, data); | ||
| 2299 | if (ret == -ENOSPC) { | ||
| 2300 | /* | ||
| 2301 | * instead of allocating, start at the original search start | ||
| 2302 | * and see if there is something to be found, if not then we | ||
| 2303 | * allocate | ||
| 2304 | */ | ||
| 2305 | if (search_start != orig_search_start) { | ||
| 2306 | if (last_ptr && *last_ptr) { | ||
| 2307 | *last_ptr = 0; | ||
| 2308 | total_needed += empty_cluster; | ||
| 2309 | } | ||
| 2310 | search_start = orig_search_start; | ||
| 2311 | goto new_group; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | /* | ||
| 2315 | * we've already allocated, we're pretty screwed | ||
| 2316 | */ | ||
| 2317 | if (chunk_alloc_done) { | ||
| 2318 | goto error; | ||
| 2319 | } else if (!allowed_chunk_alloc && block_group && | ||
| 2320 | block_group_bits(block_group, data)) { | ||
| 2321 | block_group->space_info->force_alloc = 1; | ||
| 2322 | goto error; | ||
| 2323 | } else if (!allowed_chunk_alloc) { | ||
| 2324 | goto error; | ||
| 2325 | } | ||
| 2326 | |||
| 2327 | ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, | ||
| 2328 | data, 1); | ||
| 2329 | if (ret < 0) | ||
| 2330 | goto error; | ||
| 2331 | |||
| 2332 | BUG_ON(ret); | ||
| 2333 | chunk_alloc_done = 1; | ||
| 2334 | if (block_group) | ||
| 2335 | search_start = block_group->key.objectid + | ||
| 2336 | block_group->key.offset; | ||
| 2337 | else | ||
| 2338 | search_start = orig_search_start; | ||
| 2339 | goto new_group; | ||
| 2340 | } | ||
| 2341 | |||
| 2342 | if (ret) | ||
| 2343 | goto error; | ||
| 2344 | |||
| 2345 | search_start = stripe_align(root, search_start); | ||
| 2346 | ins->objectid = search_start; | ||
| 2347 | ins->offset = num_bytes; | ||
| 2348 | |||
| 2349 | if (ins->objectid + num_bytes >= search_end) { | ||
| 2350 | search_start = orig_search_start; | ||
| 2351 | if (chunk_alloc_done) { | ||
| 2352 | ret = -ENOSPC; | ||
| 2353 | goto error; | ||
| 2354 | } | ||
| 2355 | goto new_group; | ||
| 2356 | } | ||
| 2357 | |||
| 2358 | if (ins->objectid + num_bytes > | ||
| 2359 | block_group->key.objectid + block_group->key.offset) { | ||
| 2360 | if (search_start == orig_search_start && chunk_alloc_done) { | ||
| 2361 | ret = -ENOSPC; | ||
| 2362 | goto error; | ||
| 2363 | } | ||
| 2364 | search_start = block_group->key.objectid + | ||
| 2365 | block_group->key.offset; | ||
| 2366 | goto new_group; | ||
| 2367 | } | ||
| 2368 | |||
| 2369 | if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && | ||
| 2370 | ins->objectid < exclude_start + exclude_nr)) { | ||
| 2371 | search_start = exclude_start + exclude_nr; | ||
| 2372 | goto new_group; | ||
| 2373 | } | ||
| 2374 | |||
| 2375 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | ||
| 2376 | trans->block_group = block_group; | ||
| 2377 | |||
| 2378 | ins->offset = num_bytes; | ||
| 2379 | if (last_ptr) { | ||
| 2380 | *last_ptr = ins->objectid + ins->offset; | ||
| 2381 | if (*last_ptr == | ||
| 2382 | btrfs_super_total_bytes(&root->fs_info->super_copy)) | ||
| 2383 | *last_ptr = 0; | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | ret = 0; | ||
| 2387 | error: | ||
| 2388 | return ret; | ||
| 2389 | } | ||
| 2390 | |||
| 2391 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | ||
| 2392 | { | ||
| 2393 | struct btrfs_block_group_cache *cache; | ||
| 2394 | struct list_head *l; | ||
| 2395 | |||
| 2396 | printk(KERN_INFO "space_info has %Lu free, is %sfull\n", | ||
| 2397 | info->total_bytes - info->bytes_used - info->bytes_pinned - | ||
| 2398 | info->bytes_reserved, (info->full) ? "" : "not "); | ||
| 2399 | |||
| 2400 | spin_lock(&info->lock); | ||
| 2401 | list_for_each(l, &info->block_groups) { | ||
| 2402 | cache = list_entry(l, struct btrfs_block_group_cache, list); | ||
| 2403 | spin_lock(&cache->lock); | ||
| 2404 | printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " | ||
| 2405 | "%Lu pinned %Lu reserved\n", | ||
| 2406 | cache->key.objectid, cache->key.offset, | ||
| 2407 | btrfs_block_group_used(&cache->item), | ||
| 2408 | cache->pinned, cache->reserved); | ||
| 2409 | btrfs_dump_free_space(cache, bytes); | ||
| 2410 | spin_unlock(&cache->lock); | ||
| 2411 | } | ||
| 2412 | spin_unlock(&info->lock); | ||
| 2413 | } | ||
| 2414 | |||
| 2415 | static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
| 2416 | struct btrfs_root *root, | ||
| 2417 | u64 num_bytes, u64 min_alloc_size, | ||
| 2418 | u64 empty_size, u64 hint_byte, | ||
| 2419 | u64 search_end, struct btrfs_key *ins, | ||
| 2420 | u64 data) | ||
| 2421 | { | ||
| 2422 | int ret; | ||
| 2423 | u64 search_start = 0; | ||
| 2424 | u64 alloc_profile; | ||
| 2425 | struct btrfs_fs_info *info = root->fs_info; | ||
| 2426 | struct btrfs_block_group_cache *cache; | ||
| 2427 | |||
| 2428 | if (data) { | ||
| 2429 | alloc_profile = info->avail_data_alloc_bits & | ||
| 2430 | info->data_alloc_profile; | ||
| 2431 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
| 2432 | } else if (root == root->fs_info->chunk_root) { | ||
| 2433 | alloc_profile = info->avail_system_alloc_bits & | ||
| 2434 | info->system_alloc_profile; | ||
| 2435 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
| 2436 | } else { | ||
| 2437 | alloc_profile = info->avail_metadata_alloc_bits & | ||
| 2438 | info->metadata_alloc_profile; | ||
| 2439 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
| 2440 | } | ||
| 2441 | again: | ||
| 2442 | data = reduce_alloc_profile(root, data); | ||
| 2443 | /* | ||
| 2444 | * the only place that sets empty_size is btrfs_realloc_node, which | ||
| 2445 | * is not called recursively on allocations | ||
| 2446 | */ | ||
| 2447 | if (empty_size || root->ref_cows) { | ||
| 2448 | if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { | ||
| 2449 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 2450 | 2 * 1024 * 1024, | ||
| 2451 | BTRFS_BLOCK_GROUP_METADATA | | ||
| 2452 | (info->metadata_alloc_profile & | ||
| 2453 | info->avail_metadata_alloc_bits), 0); | ||
| 2454 | } | ||
| 2455 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 2456 | num_bytes + 2 * 1024 * 1024, data, 0); | ||
| 2457 | } | ||
| 2458 | |||
| 2459 | WARN_ON(num_bytes < root->sectorsize); | ||
| 2460 | ret = find_free_extent(trans, root, num_bytes, empty_size, | ||
| 2461 | search_start, search_end, hint_byte, ins, | ||
| 2462 | trans->alloc_exclude_start, | ||
| 2463 | trans->alloc_exclude_nr, data); | ||
| 2464 | |||
| 2465 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | ||
| 2466 | num_bytes = num_bytes >> 1; | ||
| 2467 | num_bytes = num_bytes & ~(root->sectorsize - 1); | ||
| 2468 | num_bytes = max(num_bytes, min_alloc_size); | ||
| 2469 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 2470 | num_bytes, data, 1); | ||
| 2471 | goto again; | ||
| 2472 | } | ||
| 2473 | if (ret) { | ||
| 2474 | struct btrfs_space_info *sinfo; | ||
| 2475 | |||
| 2476 | sinfo = __find_space_info(root->fs_info, data); | ||
| 2477 | printk("allocation failed flags %Lu, wanted %Lu\n", | ||
| 2478 | data, num_bytes); | ||
| 2479 | dump_space_info(sinfo, num_bytes); | ||
| 2480 | BUG(); | ||
| 2481 | } | ||
| 2482 | cache = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
| 2483 | if (!cache) { | ||
| 2484 | printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid); | ||
| 2485 | return -ENOSPC; | ||
| 2486 | } | ||
| 2487 | |||
| 2488 | ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset); | ||
| 2489 | |||
| 2490 | return ret; | ||
| 2491 | } | ||
| 2492 | |||
| 2493 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | ||
| 2494 | { | ||
| 2495 | struct btrfs_block_group_cache *cache; | ||
| 2496 | |||
| 2497 | maybe_lock_mutex(root); | ||
| 2498 | cache = btrfs_lookup_block_group(root->fs_info, start); | ||
| 2499 | if (!cache) { | ||
| 2500 | printk(KERN_ERR "Unable to find block group for %Lu\n", start); | ||
| 2501 | maybe_unlock_mutex(root); | ||
| 2502 | return -ENOSPC; | ||
| 2503 | } | ||
| 2504 | btrfs_add_free_space(cache, start, len); | ||
| 2505 | update_reserved_extents(root, start, len, 0); | ||
| 2506 | maybe_unlock_mutex(root); | ||
| 2507 | return 0; | ||
| 2508 | } | ||
| 2509 | |||
| 2510 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
| 2511 | struct btrfs_root *root, | ||
| 2512 | u64 num_bytes, u64 min_alloc_size, | ||
| 2513 | u64 empty_size, u64 hint_byte, | ||
| 2514 | u64 search_end, struct btrfs_key *ins, | ||
| 2515 | u64 data) | ||
| 2516 | { | ||
| 2517 | int ret; | ||
| 2518 | maybe_lock_mutex(root); | ||
| 2519 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | ||
| 2520 | empty_size, hint_byte, search_end, ins, | ||
| 2521 | data); | ||
| 2522 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
| 2523 | maybe_unlock_mutex(root); | ||
| 2524 | return ret; | ||
| 2525 | } | ||
| 2526 | |||
| 2527 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | ||
| 2528 | struct btrfs_root *root, u64 parent, | ||
| 2529 | u64 root_objectid, u64 ref_generation, | ||
| 2530 | u64 owner, struct btrfs_key *ins) | ||
| 2531 | { | ||
| 2532 | int ret; | ||
| 2533 | int pending_ret; | ||
| 2534 | u64 super_used; | ||
| 2535 | u64 root_used; | ||
| 2536 | u64 num_bytes = ins->offset; | ||
| 2537 | u32 sizes[2]; | ||
| 2538 | struct btrfs_fs_info *info = root->fs_info; | ||
| 2539 | struct btrfs_root *extent_root = info->extent_root; | ||
| 2540 | struct btrfs_extent_item *extent_item; | ||
| 2541 | struct btrfs_extent_ref *ref; | ||
| 2542 | struct btrfs_path *path; | ||
| 2543 | struct btrfs_key keys[2]; | ||
| 2544 | |||
| 2545 | if (parent == 0) | ||
| 2546 | parent = ins->objectid; | ||
| 2547 | |||
| 2548 | /* block accounting for super block */ | ||
| 2549 | spin_lock_irq(&info->delalloc_lock); | ||
| 2550 | super_used = btrfs_super_bytes_used(&info->super_copy); | ||
| 2551 | btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); | ||
| 2552 | spin_unlock_irq(&info->delalloc_lock); | ||
| 2553 | |||
| 2554 | /* block accounting for root item */ | ||
| 2555 | root_used = btrfs_root_used(&root->root_item); | ||
| 2556 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); | ||
| 2557 | |||
| 2558 | if (root == extent_root) { | ||
| 2559 | struct pending_extent_op *extent_op; | ||
| 2560 | |||
| 2561 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
| 2562 | BUG_ON(!extent_op); | ||
| 2563 | |||
| 2564 | extent_op->type = PENDING_EXTENT_INSERT; | ||
| 2565 | extent_op->bytenr = ins->objectid; | ||
| 2566 | extent_op->num_bytes = ins->offset; | ||
| 2567 | extent_op->parent = parent; | ||
| 2568 | extent_op->orig_parent = 0; | ||
| 2569 | extent_op->generation = ref_generation; | ||
| 2570 | extent_op->orig_generation = 0; | ||
| 2571 | extent_op->level = (int)owner; | ||
| 2572 | |||
| 2573 | set_extent_bits(&root->fs_info->extent_ins, ins->objectid, | ||
| 2574 | ins->objectid + ins->offset - 1, | ||
| 2575 | EXTENT_LOCKED, GFP_NOFS); | ||
| 2576 | set_state_private(&root->fs_info->extent_ins, | ||
| 2577 | ins->objectid, (unsigned long)extent_op); | ||
| 2578 | goto update_block; | ||
| 2579 | } | ||
| 2580 | |||
| 2581 | memcpy(&keys[0], ins, sizeof(*ins)); | ||
| 2582 | keys[1].objectid = ins->objectid; | ||
| 2583 | keys[1].type = BTRFS_EXTENT_REF_KEY; | ||
| 2584 | keys[1].offset = parent; | ||
| 2585 | sizes[0] = sizeof(*extent_item); | ||
| 2586 | sizes[1] = sizeof(*ref); | ||
| 2587 | |||
| 2588 | path = btrfs_alloc_path(); | ||
| 2589 | BUG_ON(!path); | ||
| 2590 | |||
| 2591 | ret = btrfs_insert_empty_items(trans, extent_root, path, keys, | ||
| 2592 | sizes, 2); | ||
| 2593 | BUG_ON(ret); | ||
| 2594 | |||
| 2595 | extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 2596 | struct btrfs_extent_item); | ||
| 2597 | btrfs_set_extent_refs(path->nodes[0], extent_item, 1); | ||
| 2598 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | ||
| 2599 | struct btrfs_extent_ref); | ||
| 2600 | |||
| 2601 | btrfs_set_ref_root(path->nodes[0], ref, root_objectid); | ||
| 2602 | btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); | ||
| 2603 | btrfs_set_ref_objectid(path->nodes[0], ref, owner); | ||
| 2604 | btrfs_set_ref_num_refs(path->nodes[0], ref, 1); | ||
| 2605 | |||
| 2606 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 2607 | |||
| 2608 | trans->alloc_exclude_start = 0; | ||
| 2609 | trans->alloc_exclude_nr = 0; | ||
| 2610 | btrfs_free_path(path); | ||
| 2611 | finish_current_insert(trans, extent_root); | ||
| 2612 | pending_ret = del_pending_extents(trans, extent_root); | ||
| 2613 | |||
| 2614 | if (ret) | ||
| 2615 | goto out; | ||
| 2616 | if (pending_ret) { | ||
| 2617 | ret = pending_ret; | ||
| 2618 | goto out; | ||
| 2619 | } | ||
| 2620 | |||
| 2621 | update_block: | ||
| 2622 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); | ||
| 2623 | if (ret) { | ||
| 2624 | printk("update block group failed for %Lu %Lu\n", | ||
| 2625 | ins->objectid, ins->offset); | ||
| 2626 | BUG(); | ||
| 2627 | } | ||
| 2628 | out: | ||
| 2629 | return ret; | ||
| 2630 | } | ||
| 2631 | |||
| 2632 | int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | ||
| 2633 | struct btrfs_root *root, u64 parent, | ||
| 2634 | u64 root_objectid, u64 ref_generation, | ||
| 2635 | u64 owner, struct btrfs_key *ins) | ||
| 2636 | { | ||
| 2637 | int ret; | ||
| 2638 | |||
| 2639 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) | ||
| 2640 | return 0; | ||
| 2641 | maybe_lock_mutex(root); | ||
| 2642 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | ||
| 2643 | ref_generation, owner, ins); | ||
| 2644 | update_reserved_extents(root, ins->objectid, ins->offset, 0); | ||
| 2645 | maybe_unlock_mutex(root); | ||
| 2646 | return ret; | ||
| 2647 | } | ||
| 2648 | |||
| 2649 | /* | ||
| 2650 | * this is used by the tree logging recovery code. It records that | ||
| 2651 | * an extent has been allocated and makes sure to clear the free | ||
| 2652 | * space cache bits as well | ||
| 2653 | */ | ||
| 2654 | int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | ||
| 2655 | struct btrfs_root *root, u64 parent, | ||
| 2656 | u64 root_objectid, u64 ref_generation, | ||
| 2657 | u64 owner, struct btrfs_key *ins) | ||
| 2658 | { | ||
| 2659 | int ret; | ||
| 2660 | struct btrfs_block_group_cache *block_group; | ||
| 2661 | |||
| 2662 | maybe_lock_mutex(root); | ||
| 2663 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
| 2664 | cache_block_group(root, block_group); | ||
| 2665 | |||
| 2666 | ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); | ||
| 2667 | BUG_ON(ret); | ||
| 2668 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | ||
| 2669 | ref_generation, owner, ins); | ||
| 2670 | maybe_unlock_mutex(root); | ||
| 2671 | return ret; | ||
| 2672 | } | ||
| 2673 | |||
| 2674 | /* | ||
| 2675 | * finds a free extent and does all the dirty work required for allocation | ||
| 2676 | * returns the key for the extent through ins, and a tree buffer for | ||
| 2677 | * the first block of the extent through buf. | ||
| 2678 | * | ||
| 2679 | * returns 0 if everything worked, non-zero otherwise. | ||
| 2680 | */ | ||
| 2681 | int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | ||
| 2682 | struct btrfs_root *root, | ||
| 2683 | u64 num_bytes, u64 parent, u64 min_alloc_size, | ||
| 2684 | u64 root_objectid, u64 ref_generation, | ||
| 2685 | u64 owner_objectid, u64 empty_size, u64 hint_byte, | ||
| 2686 | u64 search_end, struct btrfs_key *ins, u64 data) | ||
| 2687 | { | ||
| 2688 | int ret; | ||
| 2689 | |||
| 2690 | maybe_lock_mutex(root); | ||
| 2691 | |||
| 2692 | ret = __btrfs_reserve_extent(trans, root, num_bytes, | ||
| 2693 | min_alloc_size, empty_size, hint_byte, | ||
| 2694 | search_end, ins, data); | ||
| 2695 | BUG_ON(ret); | ||
| 2696 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 2697 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, | ||
| 2698 | root_objectid, ref_generation, | ||
| 2699 | owner_objectid, ins); | ||
| 2700 | BUG_ON(ret); | ||
| 2701 | |||
| 2702 | } else { | ||
| 2703 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
| 2704 | } | ||
| 2705 | maybe_unlock_mutex(root); | ||
| 2706 | return ret; | ||
| 2707 | } | ||
| 2708 | |||
| 2709 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | ||
| 2710 | struct btrfs_root *root, | ||
| 2711 | u64 bytenr, u32 blocksize) | ||
| 2712 | { | ||
| 2713 | struct extent_buffer *buf; | ||
| 2714 | |||
| 2715 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 2716 | if (!buf) | ||
| 2717 | return ERR_PTR(-ENOMEM); | ||
| 2718 | btrfs_set_header_generation(buf, trans->transid); | ||
| 2719 | btrfs_tree_lock(buf); | ||
| 2720 | clean_tree_block(trans, root, buf); | ||
| 2721 | btrfs_set_buffer_uptodate(buf); | ||
| 2722 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | ||
| 2723 | set_extent_dirty(&root->dirty_log_pages, buf->start, | ||
| 2724 | buf->start + buf->len - 1, GFP_NOFS); | ||
| 2725 | } else { | ||
| 2726 | set_extent_dirty(&trans->transaction->dirty_pages, buf->start, | ||
| 2727 | buf->start + buf->len - 1, GFP_NOFS); | ||
| 2728 | } | ||
| 2729 | trans->blocks_used++; | ||
| 2730 | return buf; | ||
| 2731 | } | ||
| 2732 | |||
| 2733 | /* | ||
| 2734 | * helper function to allocate a block for a given tree | ||
| 2735 | * returns the tree buffer or NULL. | ||
| 2736 | */ | ||
| 2737 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | ||
| 2738 | struct btrfs_root *root, | ||
| 2739 | u32 blocksize, u64 parent, | ||
| 2740 | u64 root_objectid, | ||
| 2741 | u64 ref_generation, | ||
| 2742 | int level, | ||
| 2743 | u64 hint, | ||
| 2744 | u64 empty_size) | ||
| 2745 | { | ||
| 2746 | struct btrfs_key ins; | ||
| 2747 | int ret; | ||
| 2748 | struct extent_buffer *buf; | ||
| 2749 | |||
| 2750 | ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, | ||
| 2751 | root_objectid, ref_generation, level, | ||
| 2752 | empty_size, hint, (u64)-1, &ins, 0); | ||
| 2753 | if (ret) { | ||
| 2754 | BUG_ON(ret > 0); | ||
| 2755 | return ERR_PTR(ret); | ||
| 2756 | } | ||
| 2757 | |||
| 2758 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); | ||
| 2759 | return buf; | ||
| 2760 | } | ||
| 2761 | |||
| 2762 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
| 2763 | struct btrfs_root *root, struct extent_buffer *leaf) | ||
| 2764 | { | ||
| 2765 | u64 leaf_owner; | ||
| 2766 | u64 leaf_generation; | ||
| 2767 | struct btrfs_key key; | ||
| 2768 | struct btrfs_file_extent_item *fi; | ||
| 2769 | int i; | ||
| 2770 | int nritems; | ||
| 2771 | int ret; | ||
| 2772 | |||
| 2773 | BUG_ON(!btrfs_is_leaf(leaf)); | ||
| 2774 | nritems = btrfs_header_nritems(leaf); | ||
| 2775 | leaf_owner = btrfs_header_owner(leaf); | ||
| 2776 | leaf_generation = btrfs_header_generation(leaf); | ||
| 2777 | |||
| 2778 | for (i = 0; i < nritems; i++) { | ||
| 2779 | u64 disk_bytenr; | ||
| 2780 | cond_resched(); | ||
| 2781 | |||
| 2782 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
| 2783 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
| 2784 | continue; | ||
| 2785 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
| 2786 | if (btrfs_file_extent_type(leaf, fi) == | ||
| 2787 | BTRFS_FILE_EXTENT_INLINE) | ||
| 2788 | continue; | ||
| 2789 | /* | ||
| 2790 | * FIXME make sure to insert a trans record that | ||
| 2791 | * repeats the snapshot del on crash | ||
| 2792 | */ | ||
| 2793 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 2794 | if (disk_bytenr == 0) | ||
| 2795 | continue; | ||
| 2796 | |||
| 2797 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 2798 | ret = __btrfs_free_extent(trans, root, disk_bytenr, | ||
| 2799 | btrfs_file_extent_disk_num_bytes(leaf, fi), | ||
| 2800 | leaf->start, leaf_owner, leaf_generation, | ||
| 2801 | key.objectid, 0); | ||
| 2802 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 2803 | BUG_ON(ret); | ||
| 2804 | |||
| 2805 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 2806 | wake_up(&root->fs_info->transaction_throttle); | ||
| 2807 | cond_resched(); | ||
| 2808 | } | ||
| 2809 | return 0; | ||
| 2810 | } | ||
| 2811 | |||
| 2812 | static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
| 2813 | struct btrfs_root *root, | ||
| 2814 | struct btrfs_leaf_ref *ref) | ||
| 2815 | { | ||
| 2816 | int i; | ||
| 2817 | int ret; | ||
| 2818 | struct btrfs_extent_info *info = ref->extents; | ||
| 2819 | |||
| 2820 | for (i = 0; i < ref->nritems; i++) { | ||
| 2821 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 2822 | ret = __btrfs_free_extent(trans, root, info->bytenr, | ||
| 2823 | info->num_bytes, ref->bytenr, | ||
| 2824 | ref->owner, ref->generation, | ||
| 2825 | info->objectid, 0); | ||
| 2826 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 2827 | |||
| 2828 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 2829 | wake_up(&root->fs_info->transaction_throttle); | ||
| 2830 | cond_resched(); | ||
| 2831 | |||
| 2832 | BUG_ON(ret); | ||
| 2833 | info++; | ||
| 2834 | } | ||
| 2835 | |||
| 2836 | return 0; | ||
| 2837 | } | ||
| 2838 | |||
| 2839 | int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, | ||
| 2840 | u32 *refs) | ||
| 2841 | { | ||
| 2842 | int ret; | ||
| 2843 | |||
| 2844 | ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); | ||
| 2845 | BUG_ON(ret); | ||
| 2846 | |||
| 2847 | #if 0 // some debugging code in case we see problems here | ||
| 2848 | /* if the refs count is one, it won't get increased again. But | ||
| 2849 | * if the ref count is > 1, someone may be decreasing it at | ||
| 2850 | * the same time we are. | ||
| 2851 | */ | ||
| 2852 | if (*refs != 1) { | ||
| 2853 | struct extent_buffer *eb = NULL; | ||
| 2854 | eb = btrfs_find_create_tree_block(root, start, len); | ||
| 2855 | if (eb) | ||
| 2856 | btrfs_tree_lock(eb); | ||
| 2857 | |||
| 2858 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 2859 | ret = lookup_extent_ref(NULL, root, start, len, refs); | ||
| 2860 | BUG_ON(ret); | ||
| 2861 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 2862 | |||
| 2863 | if (eb) { | ||
| 2864 | btrfs_tree_unlock(eb); | ||
| 2865 | free_extent_buffer(eb); | ||
| 2866 | } | ||
| 2867 | if (*refs == 1) { | ||
| 2868 | printk("block %llu went down to one during drop_snap\n", | ||
| 2869 | (unsigned long long)start); | ||
| 2870 | } | ||
| 2871 | |||
| 2872 | } | ||
| 2873 | #endif | ||
| 2874 | |||
| 2875 | cond_resched(); | ||
| 2876 | return ret; | ||
| 2877 | } | ||
| 2878 | |||
| 2879 | /* | ||
| 2880 | * helper function for drop_snapshot, this walks down the tree dropping ref | ||
| 2881 | * counts as it goes. | ||
| 2882 | */ | ||
| 2883 | static int noinline walk_down_tree(struct btrfs_trans_handle *trans, | ||
| 2884 | struct btrfs_root *root, | ||
| 2885 | struct btrfs_path *path, int *level) | ||
| 2886 | { | ||
| 2887 | u64 root_owner; | ||
| 2888 | u64 root_gen; | ||
| 2889 | u64 bytenr; | ||
| 2890 | u64 ptr_gen; | ||
| 2891 | struct extent_buffer *next; | ||
| 2892 | struct extent_buffer *cur; | ||
| 2893 | struct extent_buffer *parent; | ||
| 2894 | struct btrfs_leaf_ref *ref; | ||
| 2895 | u32 blocksize; | ||
| 2896 | int ret; | ||
| 2897 | u32 refs; | ||
| 2898 | |||
| 2899 | WARN_ON(*level < 0); | ||
| 2900 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 2901 | ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, | ||
| 2902 | path->nodes[*level]->len, &refs); | ||
| 2903 | BUG_ON(ret); | ||
| 2904 | if (refs > 1) | ||
| 2905 | goto out; | ||
| 2906 | |||
| 2907 | /* | ||
| 2908 | * walk down to the last node level and free all the leaves | ||
| 2909 | */ | ||
| 2910 | while(*level >= 0) { | ||
| 2911 | WARN_ON(*level < 0); | ||
| 2912 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 2913 | cur = path->nodes[*level]; | ||
| 2914 | |||
| 2915 | if (btrfs_header_level(cur) != *level) | ||
| 2916 | WARN_ON(1); | ||
| 2917 | |||
| 2918 | if (path->slots[*level] >= | ||
| 2919 | btrfs_header_nritems(cur)) | ||
| 2920 | break; | ||
| 2921 | if (*level == 0) { | ||
| 2922 | ret = btrfs_drop_leaf_ref(trans, root, cur); | ||
| 2923 | BUG_ON(ret); | ||
| 2924 | break; | ||
| 2925 | } | ||
| 2926 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
| 2927 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
| 2928 | blocksize = btrfs_level_size(root, *level - 1); | ||
| 2929 | |||
| 2930 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | ||
| 2931 | BUG_ON(ret); | ||
| 2932 | if (refs != 1) { | ||
| 2933 | parent = path->nodes[*level]; | ||
| 2934 | root_owner = btrfs_header_owner(parent); | ||
| 2935 | root_gen = btrfs_header_generation(parent); | ||
| 2936 | path->slots[*level]++; | ||
| 2937 | |||
| 2938 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 2939 | ret = __btrfs_free_extent(trans, root, bytenr, | ||
| 2940 | blocksize, parent->start, | ||
| 2941 | root_owner, root_gen, | ||
| 2942 | *level - 1, 1); | ||
| 2943 | BUG_ON(ret); | ||
| 2944 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 2945 | |||
| 2946 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 2947 | wake_up(&root->fs_info->transaction_throttle); | ||
| 2948 | cond_resched(); | ||
| 2949 | |||
| 2950 | continue; | ||
| 2951 | } | ||
| 2952 | /* | ||
| 2953 | * at this point, we have a single ref, and since the | ||
| 2954 | * only place referencing this extent is a dead root | ||
| 2955 | * the reference count should never go higher. | ||
| 2956 | * So, we don't need to check it again | ||
| 2957 | */ | ||
| 2958 | if (*level == 1) { | ||
| 2959 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
| 2960 | if (ref && ref->generation != ptr_gen) { | ||
| 2961 | btrfs_free_leaf_ref(root, ref); | ||
| 2962 | ref = NULL; | ||
| 2963 | } | ||
| 2964 | if (ref) { | ||
| 2965 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
| 2966 | BUG_ON(ret); | ||
| 2967 | btrfs_remove_leaf_ref(root, ref); | ||
| 2968 | btrfs_free_leaf_ref(root, ref); | ||
| 2969 | *level = 0; | ||
| 2970 | break; | ||
| 2971 | } | ||
| 2972 | if (printk_ratelimit()) { | ||
| 2973 | printk("leaf ref miss for bytenr %llu\n", | ||
| 2974 | (unsigned long long)bytenr); | ||
| 2975 | } | ||
| 2976 | } | ||
| 2977 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
| 2978 | if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { | ||
| 2979 | free_extent_buffer(next); | ||
| 2980 | |||
| 2981 | next = read_tree_block(root, bytenr, blocksize, | ||
| 2982 | ptr_gen); | ||
| 2983 | cond_resched(); | ||
| 2984 | #if 0 | ||
| 2985 | /* | ||
| 2986 | * this is a debugging check and can go away | ||
| 2987 | * the ref should never go all the way down to 1 | ||
| 2988 | * at this point | ||
| 2989 | */ | ||
| 2990 | ret = lookup_extent_ref(NULL, root, bytenr, blocksize, | ||
| 2991 | &refs); | ||
| 2992 | BUG_ON(ret); | ||
| 2993 | WARN_ON(refs != 1); | ||
| 2994 | #endif | ||
| 2995 | } | ||
| 2996 | WARN_ON(*level <= 0); | ||
| 2997 | if (path->nodes[*level-1]) | ||
| 2998 | free_extent_buffer(path->nodes[*level-1]); | ||
| 2999 | path->nodes[*level-1] = next; | ||
| 3000 | *level = btrfs_header_level(next); | ||
| 3001 | path->slots[*level] = 0; | ||
| 3002 | cond_resched(); | ||
| 3003 | } | ||
| 3004 | out: | ||
| 3005 | WARN_ON(*level < 0); | ||
| 3006 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 3007 | |||
| 3008 | if (path->nodes[*level] == root->node) { | ||
| 3009 | parent = path->nodes[*level]; | ||
| 3010 | bytenr = path->nodes[*level]->start; | ||
| 3011 | } else { | ||
| 3012 | parent = path->nodes[*level + 1]; | ||
| 3013 | bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); | ||
| 3014 | } | ||
| 3015 | |||
| 3016 | blocksize = btrfs_level_size(root, *level); | ||
| 3017 | root_owner = btrfs_header_owner(parent); | ||
| 3018 | root_gen = btrfs_header_generation(parent); | ||
| 3019 | |||
| 3020 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 3021 | ret = __btrfs_free_extent(trans, root, bytenr, blocksize, | ||
| 3022 | parent->start, root_owner, root_gen, | ||
| 3023 | *level, 1); | ||
| 3024 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 3025 | free_extent_buffer(path->nodes[*level]); | ||
| 3026 | path->nodes[*level] = NULL; | ||
| 3027 | *level += 1; | ||
| 3028 | BUG_ON(ret); | ||
| 3029 | |||
| 3030 | cond_resched(); | ||
| 3031 | return 0; | ||
| 3032 | } | ||
| 3033 | |||
| 3034 | /* | ||
| 3035 | * helper for dropping snapshots. This walks back up the tree in the path | ||
| 3036 | * to find the first node higher up where we haven't yet gone through | ||
| 3037 | * all the slots | ||
| 3038 | */ | ||
| 3039 | static int noinline walk_up_tree(struct btrfs_trans_handle *trans, | ||
| 3040 | struct btrfs_root *root, | ||
| 3041 | struct btrfs_path *path, int *level) | ||
| 3042 | { | ||
| 3043 | u64 root_owner; | ||
| 3044 | u64 root_gen; | ||
| 3045 | struct btrfs_root_item *root_item = &root->root_item; | ||
| 3046 | int i; | ||
| 3047 | int slot; | ||
| 3048 | int ret; | ||
| 3049 | |||
| 3050 | for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | ||
| 3051 | slot = path->slots[i]; | ||
| 3052 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | ||
| 3053 | struct extent_buffer *node; | ||
| 3054 | struct btrfs_disk_key disk_key; | ||
| 3055 | node = path->nodes[i]; | ||
| 3056 | path->slots[i]++; | ||
| 3057 | *level = i; | ||
| 3058 | WARN_ON(*level == 0); | ||
| 3059 | btrfs_node_key(node, &disk_key, path->slots[i]); | ||
| 3060 | memcpy(&root_item->drop_progress, | ||
| 3061 | &disk_key, sizeof(disk_key)); | ||
| 3062 | root_item->drop_level = i; | ||
| 3063 | return 0; | ||
| 3064 | } else { | ||
| 3065 | struct extent_buffer *parent; | ||
| 3066 | if (path->nodes[*level] == root->node) | ||
| 3067 | parent = path->nodes[*level]; | ||
| 3068 | else | ||
| 3069 | parent = path->nodes[*level + 1]; | ||
| 3070 | |||
| 3071 | root_owner = btrfs_header_owner(parent); | ||
| 3072 | root_gen = btrfs_header_generation(parent); | ||
| 3073 | ret = btrfs_free_extent(trans, root, | ||
| 3074 | path->nodes[*level]->start, | ||
| 3075 | path->nodes[*level]->len, | ||
| 3076 | parent->start, root_owner, | ||
| 3077 | root_gen, *level, 1); | ||
| 3078 | BUG_ON(ret); | ||
| 3079 | free_extent_buffer(path->nodes[*level]); | ||
| 3080 | path->nodes[*level] = NULL; | ||
| 3081 | *level = i + 1; | ||
| 3082 | } | ||
| 3083 | } | ||
| 3084 | return 1; | ||
| 3085 | } | ||
| 3086 | |||
| 3087 | /* | ||
| 3088 | * drop the reference count on the tree rooted at 'snap'. This traverses | ||
| 3089 | * the tree freeing any blocks that have a ref count of zero after being | ||
| 3090 | * decremented. | ||
| 3091 | */ | ||
| 3092 | int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 3093 | *root) | ||
| 3094 | { | ||
| 3095 | int ret = 0; | ||
| 3096 | int wret; | ||
| 3097 | int level; | ||
| 3098 | struct btrfs_path *path; | ||
| 3099 | int i; | ||
| 3100 | int orig_level; | ||
| 3101 | struct btrfs_root_item *root_item = &root->root_item; | ||
| 3102 | |||
| 3103 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); | ||
| 3104 | path = btrfs_alloc_path(); | ||
| 3105 | BUG_ON(!path); | ||
| 3106 | |||
| 3107 | level = btrfs_header_level(root->node); | ||
| 3108 | orig_level = level; | ||
| 3109 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | ||
| 3110 | path->nodes[level] = root->node; | ||
| 3111 | extent_buffer_get(root->node); | ||
| 3112 | path->slots[level] = 0; | ||
| 3113 | } else { | ||
| 3114 | struct btrfs_key key; | ||
| 3115 | struct btrfs_disk_key found_key; | ||
| 3116 | struct extent_buffer *node; | ||
| 3117 | |||
| 3118 | btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); | ||
| 3119 | level = root_item->drop_level; | ||
| 3120 | path->lowest_level = level; | ||
| 3121 | wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 3122 | if (wret < 0) { | ||
| 3123 | ret = wret; | ||
| 3124 | goto out; | ||
| 3125 | } | ||
| 3126 | node = path->nodes[level]; | ||
| 3127 | btrfs_node_key(node, &found_key, path->slots[level]); | ||
| 3128 | WARN_ON(memcmp(&found_key, &root_item->drop_progress, | ||
| 3129 | sizeof(found_key))); | ||
| 3130 | /* | ||
| 3131 | * unlock our path, this is safe because only this | ||
| 3132 | * function is allowed to delete this snapshot | ||
| 3133 | */ | ||
| 3134 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | ||
| 3135 | if (path->nodes[i] && path->locks[i]) { | ||
| 3136 | path->locks[i] = 0; | ||
| 3137 | btrfs_tree_unlock(path->nodes[i]); | ||
| 3138 | } | ||
| 3139 | } | ||
| 3140 | } | ||
| 3141 | while(1) { | ||
| 3142 | wret = walk_down_tree(trans, root, path, &level); | ||
| 3143 | if (wret > 0) | ||
| 3144 | break; | ||
| 3145 | if (wret < 0) | ||
| 3146 | ret = wret; | ||
| 3147 | |||
| 3148 | wret = walk_up_tree(trans, root, path, &level); | ||
| 3149 | if (wret > 0) | ||
| 3150 | break; | ||
| 3151 | if (wret < 0) | ||
| 3152 | ret = wret; | ||
| 3153 | if (trans->transaction->in_commit) { | ||
| 3154 | ret = -EAGAIN; | ||
| 3155 | break; | ||
| 3156 | } | ||
| 3157 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 3158 | wake_up(&root->fs_info->transaction_throttle); | ||
| 3159 | } | ||
| 3160 | for (i = 0; i <= orig_level; i++) { | ||
| 3161 | if (path->nodes[i]) { | ||
| 3162 | free_extent_buffer(path->nodes[i]); | ||
| 3163 | path->nodes[i] = NULL; | ||
| 3164 | } | ||
| 3165 | } | ||
| 3166 | out: | ||
| 3167 | btrfs_free_path(path); | ||
| 3168 | return ret; | ||
| 3169 | } | ||
| 3170 | |||
| 3171 | static unsigned long calc_ra(unsigned long start, unsigned long last, | ||
| 3172 | unsigned long nr) | ||
| 3173 | { | ||
| 3174 | return min(last, start + nr - 1); | ||
| 3175 | } | ||
| 3176 | |||
| 3177 | static int noinline relocate_inode_pages(struct inode *inode, u64 start, | ||
| 3178 | u64 len) | ||
| 3179 | { | ||
| 3180 | u64 page_start; | ||
| 3181 | u64 page_end; | ||
| 3182 | unsigned long first_index; | ||
| 3183 | unsigned long last_index; | ||
| 3184 | unsigned long i; | ||
| 3185 | struct page *page; | ||
| 3186 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 3187 | struct file_ra_state *ra; | ||
| 3188 | struct btrfs_ordered_extent *ordered; | ||
| 3189 | unsigned int total_read = 0; | ||
| 3190 | unsigned int total_dirty = 0; | ||
| 3191 | int ret = 0; | ||
| 3192 | |||
| 3193 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
| 3194 | |||
| 3195 | mutex_lock(&inode->i_mutex); | ||
| 3196 | first_index = start >> PAGE_CACHE_SHIFT; | ||
| 3197 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
| 3198 | |||
| 3199 | /* make sure the dirty trick played by the caller work */ | ||
| 3200 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
| 3201 | first_index, last_index); | ||
| 3202 | if (ret) | ||
| 3203 | goto out_unlock; | ||
| 3204 | |||
| 3205 | file_ra_state_init(ra, inode->i_mapping); | ||
| 3206 | |||
| 3207 | for (i = first_index ; i <= last_index; i++) { | ||
| 3208 | if (total_read % ra->ra_pages == 0) { | ||
| 3209 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | ||
| 3210 | calc_ra(i, last_index, ra->ra_pages)); | ||
| 3211 | } | ||
| 3212 | total_read++; | ||
| 3213 | again: | ||
| 3214 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
| 3215 | BUG_ON(1); | ||
| 3216 | page = grab_cache_page(inode->i_mapping, i); | ||
| 3217 | if (!page) { | ||
| 3218 | ret = -ENOMEM; | ||
| 3219 | goto out_unlock; | ||
| 3220 | } | ||
| 3221 | if (!PageUptodate(page)) { | ||
| 3222 | btrfs_readpage(NULL, page); | ||
| 3223 | lock_page(page); | ||
| 3224 | if (!PageUptodate(page)) { | ||
| 3225 | unlock_page(page); | ||
| 3226 | page_cache_release(page); | ||
| 3227 | ret = -EIO; | ||
| 3228 | goto out_unlock; | ||
| 3229 | } | ||
| 3230 | } | ||
| 3231 | wait_on_page_writeback(page); | ||
| 3232 | |||
| 3233 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 3234 | page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
| 3235 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3236 | |||
| 3237 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
| 3238 | if (ordered) { | ||
| 3239 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3240 | unlock_page(page); | ||
| 3241 | page_cache_release(page); | ||
| 3242 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 3243 | btrfs_put_ordered_extent(ordered); | ||
| 3244 | goto again; | ||
| 3245 | } | ||
| 3246 | set_page_extent_mapped(page); | ||
| 3247 | |||
| 3248 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 3249 | if (i == first_index) | ||
| 3250 | set_extent_bits(io_tree, page_start, page_end, | ||
| 3251 | EXTENT_BOUNDARY, GFP_NOFS); | ||
| 3252 | |||
| 3253 | set_page_dirty(page); | ||
| 3254 | total_dirty++; | ||
| 3255 | |||
| 3256 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3257 | unlock_page(page); | ||
| 3258 | page_cache_release(page); | ||
| 3259 | } | ||
| 3260 | |||
| 3261 | out_unlock: | ||
| 3262 | kfree(ra); | ||
| 3263 | mutex_unlock(&inode->i_mutex); | ||
| 3264 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
| 3265 | return ret; | ||
| 3266 | } | ||
| 3267 | |||
| 3268 | static int noinline relocate_data_extent(struct inode *reloc_inode, | ||
| 3269 | struct btrfs_key *extent_key, | ||
| 3270 | u64 offset) | ||
| 3271 | { | ||
| 3272 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
| 3273 | struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; | ||
| 3274 | struct extent_map *em; | ||
| 3275 | |||
| 3276 | em = alloc_extent_map(GFP_NOFS); | ||
| 3277 | BUG_ON(!em || IS_ERR(em)); | ||
| 3278 | |||
| 3279 | em->start = extent_key->objectid - offset; | ||
| 3280 | em->len = extent_key->offset; | ||
| 3281 | em->block_start = extent_key->objectid; | ||
| 3282 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 3283 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 3284 | |||
| 3285 | /* setup extent map to cheat btrfs_readpage */ | ||
| 3286 | mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex); | ||
| 3287 | while (1) { | ||
| 3288 | int ret; | ||
| 3289 | spin_lock(&em_tree->lock); | ||
| 3290 | ret = add_extent_mapping(em_tree, em); | ||
| 3291 | spin_unlock(&em_tree->lock); | ||
| 3292 | if (ret != -EEXIST) { | ||
| 3293 | free_extent_map(em); | ||
| 3294 | break; | ||
| 3295 | } | ||
| 3296 | btrfs_drop_extent_cache(reloc_inode, em->start, | ||
| 3297 | em->start + em->len - 1, 0); | ||
| 3298 | } | ||
| 3299 | mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex); | ||
| 3300 | |||
| 3301 | return relocate_inode_pages(reloc_inode, extent_key->objectid - offset, | ||
| 3302 | extent_key->offset); | ||
| 3303 | } | ||
| 3304 | |||
| 3305 | struct btrfs_ref_path { | ||
| 3306 | u64 extent_start; | ||
| 3307 | u64 nodes[BTRFS_MAX_LEVEL]; | ||
| 3308 | u64 root_objectid; | ||
| 3309 | u64 root_generation; | ||
| 3310 | u64 owner_objectid; | ||
| 3311 | u32 num_refs; | ||
| 3312 | int lowest_level; | ||
| 3313 | int current_level; | ||
| 3314 | }; | ||
| 3315 | |||
| 3316 | struct disk_extent { | ||
| 3317 | u64 disk_bytenr; | ||
| 3318 | u64 disk_num_bytes; | ||
| 3319 | u64 offset; | ||
| 3320 | u64 num_bytes; | ||
| 3321 | }; | ||
| 3322 | |||
| 3323 | static int is_cowonly_root(u64 root_objectid) | ||
| 3324 | { | ||
| 3325 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || | ||
| 3326 | root_objectid == BTRFS_EXTENT_TREE_OBJECTID || | ||
| 3327 | root_objectid == BTRFS_CHUNK_TREE_OBJECTID || | ||
| 3328 | root_objectid == BTRFS_DEV_TREE_OBJECTID || | ||
| 3329 | root_objectid == BTRFS_TREE_LOG_OBJECTID) | ||
| 3330 | return 1; | ||
| 3331 | return 0; | ||
| 3332 | } | ||
| 3333 | |||
| 3334 | static int noinline __next_ref_path(struct btrfs_trans_handle *trans, | ||
| 3335 | struct btrfs_root *extent_root, | ||
| 3336 | struct btrfs_ref_path *ref_path, | ||
| 3337 | int first_time) | ||
| 3338 | { | ||
| 3339 | struct extent_buffer *leaf; | ||
| 3340 | struct btrfs_path *path; | ||
| 3341 | struct btrfs_extent_ref *ref; | ||
| 3342 | struct btrfs_key key; | ||
| 3343 | struct btrfs_key found_key; | ||
| 3344 | u64 bytenr; | ||
| 3345 | u32 nritems; | ||
| 3346 | int level; | ||
| 3347 | int ret = 1; | ||
| 3348 | |||
| 3349 | path = btrfs_alloc_path(); | ||
| 3350 | if (!path) | ||
| 3351 | return -ENOMEM; | ||
| 3352 | |||
| 3353 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
| 3354 | |||
| 3355 | if (first_time) { | ||
| 3356 | ref_path->lowest_level = -1; | ||
| 3357 | ref_path->current_level = -1; | ||
| 3358 | goto walk_up; | ||
| 3359 | } | ||
| 3360 | walk_down: | ||
| 3361 | level = ref_path->current_level - 1; | ||
| 3362 | while (level >= -1) { | ||
| 3363 | u64 parent; | ||
| 3364 | if (level < ref_path->lowest_level) | ||
| 3365 | break; | ||
| 3366 | |||
| 3367 | if (level >= 0) { | ||
| 3368 | bytenr = ref_path->nodes[level]; | ||
| 3369 | } else { | ||
| 3370 | bytenr = ref_path->extent_start; | ||
| 3371 | } | ||
| 3372 | BUG_ON(bytenr == 0); | ||
| 3373 | |||
| 3374 | parent = ref_path->nodes[level + 1]; | ||
| 3375 | ref_path->nodes[level + 1] = 0; | ||
| 3376 | ref_path->current_level = level; | ||
| 3377 | BUG_ON(parent == 0); | ||
| 3378 | |||
| 3379 | key.objectid = bytenr; | ||
| 3380 | key.offset = parent + 1; | ||
| 3381 | key.type = BTRFS_EXTENT_REF_KEY; | ||
| 3382 | |||
| 3383 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); | ||
| 3384 | if (ret < 0) | ||
| 3385 | goto out; | ||
| 3386 | BUG_ON(ret == 0); | ||
| 3387 | |||
| 3388 | leaf = path->nodes[0]; | ||
| 3389 | nritems = btrfs_header_nritems(leaf); | ||
| 3390 | if (path->slots[0] >= nritems) { | ||
| 3391 | ret = btrfs_next_leaf(extent_root, path); | ||
| 3392 | if (ret < 0) | ||
| 3393 | goto out; | ||
| 3394 | if (ret > 0) | ||
| 3395 | goto next; | ||
| 3396 | leaf = path->nodes[0]; | ||
| 3397 | } | ||
| 3398 | |||
| 3399 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 3400 | if (found_key.objectid == bytenr && | ||
| 3401 | found_key.type == BTRFS_EXTENT_REF_KEY) | ||
| 3402 | goto found; | ||
| 3403 | next: | ||
| 3404 | level--; | ||
| 3405 | btrfs_release_path(extent_root, path); | ||
| 3406 | if (need_resched()) { | ||
| 3407 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
| 3408 | cond_resched(); | ||
| 3409 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
| 3410 | } | ||
| 3411 | } | ||
| 3412 | /* reached lowest level */ | ||
| 3413 | ret = 1; | ||
| 3414 | goto out; | ||
| 3415 | walk_up: | ||
| 3416 | level = ref_path->current_level; | ||
| 3417 | while (level < BTRFS_MAX_LEVEL - 1) { | ||
| 3418 | u64 ref_objectid; | ||
| 3419 | if (level >= 0) { | ||
| 3420 | bytenr = ref_path->nodes[level]; | ||
| 3421 | } else { | ||
| 3422 | bytenr = ref_path->extent_start; | ||
| 3423 | } | ||
| 3424 | BUG_ON(bytenr == 0); | ||
| 3425 | |||
| 3426 | key.objectid = bytenr; | ||
| 3427 | key.offset = 0; | ||
| 3428 | key.type = BTRFS_EXTENT_REF_KEY; | ||
| 3429 | |||
| 3430 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); | ||
| 3431 | if (ret < 0) | ||
| 3432 | goto out; | ||
| 3433 | |||
| 3434 | leaf = path->nodes[0]; | ||
| 3435 | nritems = btrfs_header_nritems(leaf); | ||
| 3436 | if (path->slots[0] >= nritems) { | ||
| 3437 | ret = btrfs_next_leaf(extent_root, path); | ||
| 3438 | if (ret < 0) | ||
| 3439 | goto out; | ||
| 3440 | if (ret > 0) { | ||
| 3441 | /* the extent was freed by someone */ | ||
| 3442 | if (ref_path->lowest_level == level) | ||
| 3443 | goto out; | ||
| 3444 | btrfs_release_path(extent_root, path); | ||
| 3445 | goto walk_down; | ||
| 3446 | } | ||
| 3447 | leaf = path->nodes[0]; | ||
| 3448 | } | ||
| 3449 | |||
| 3450 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 3451 | if (found_key.objectid != bytenr || | ||
| 3452 | found_key.type != BTRFS_EXTENT_REF_KEY) { | ||
| 3453 | /* the extent was freed by someone */ | ||
| 3454 | if (ref_path->lowest_level == level) { | ||
| 3455 | ret = 1; | ||
| 3456 | goto out; | ||
| 3457 | } | ||
| 3458 | btrfs_release_path(extent_root, path); | ||
| 3459 | goto walk_down; | ||
| 3460 | } | ||
| 3461 | found: | ||
| 3462 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3463 | struct btrfs_extent_ref); | ||
| 3464 | ref_objectid = btrfs_ref_objectid(leaf, ref); | ||
| 3465 | if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
| 3466 | if (first_time) { | ||
| 3467 | level = (int)ref_objectid; | ||
| 3468 | BUG_ON(level >= BTRFS_MAX_LEVEL); | ||
| 3469 | ref_path->lowest_level = level; | ||
| 3470 | ref_path->current_level = level; | ||
| 3471 | ref_path->nodes[level] = bytenr; | ||
| 3472 | } else { | ||
| 3473 | WARN_ON(ref_objectid != level); | ||
| 3474 | } | ||
| 3475 | } else { | ||
| 3476 | WARN_ON(level != -1); | ||
| 3477 | } | ||
| 3478 | first_time = 0; | ||
| 3479 | |||
| 3480 | if (ref_path->lowest_level == level) { | ||
| 3481 | ref_path->owner_objectid = ref_objectid; | ||
| 3482 | ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); | ||
| 3483 | } | ||
| 3484 | |||
| 3485 | /* | ||
| 3486 | * the block is tree root or the block isn't in reference | ||
| 3487 | * counted tree. | ||
| 3488 | */ | ||
| 3489 | if (found_key.objectid == found_key.offset || | ||
| 3490 | is_cowonly_root(btrfs_ref_root(leaf, ref))) { | ||
| 3491 | ref_path->root_objectid = btrfs_ref_root(leaf, ref); | ||
| 3492 | ref_path->root_generation = | ||
| 3493 | btrfs_ref_generation(leaf, ref); | ||
| 3494 | if (level < 0) { | ||
| 3495 | /* special reference from the tree log */ | ||
| 3496 | ref_path->nodes[0] = found_key.offset; | ||
| 3497 | ref_path->current_level = 0; | ||
| 3498 | } | ||
| 3499 | ret = 0; | ||
| 3500 | goto out; | ||
| 3501 | } | ||
| 3502 | |||
| 3503 | level++; | ||
| 3504 | BUG_ON(ref_path->nodes[level] != 0); | ||
| 3505 | ref_path->nodes[level] = found_key.offset; | ||
| 3506 | ref_path->current_level = level; | ||
| 3507 | |||
| 3508 | /* | ||
| 3509 | * the reference was created in the running transaction, | ||
| 3510 | * no need to continue walking up. | ||
| 3511 | */ | ||
| 3512 | if (btrfs_ref_generation(leaf, ref) == trans->transid) { | ||
| 3513 | ref_path->root_objectid = btrfs_ref_root(leaf, ref); | ||
| 3514 | ref_path->root_generation = | ||
| 3515 | btrfs_ref_generation(leaf, ref); | ||
| 3516 | ret = 0; | ||
| 3517 | goto out; | ||
| 3518 | } | ||
| 3519 | |||
| 3520 | btrfs_release_path(extent_root, path); | ||
| 3521 | if (need_resched()) { | ||
| 3522 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
| 3523 | cond_resched(); | ||
| 3524 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
| 3525 | } | ||
| 3526 | } | ||
| 3527 | /* reached max tree level, but no tree root found. */ | ||
| 3528 | BUG(); | ||
| 3529 | out: | ||
| 3530 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
| 3531 | btrfs_free_path(path); | ||
| 3532 | return ret; | ||
| 3533 | } | ||
| 3534 | |||
| 3535 | static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, | ||
| 3536 | struct btrfs_root *extent_root, | ||
| 3537 | struct btrfs_ref_path *ref_path, | ||
| 3538 | u64 extent_start) | ||
| 3539 | { | ||
| 3540 | memset(ref_path, 0, sizeof(*ref_path)); | ||
| 3541 | ref_path->extent_start = extent_start; | ||
| 3542 | |||
| 3543 | return __next_ref_path(trans, extent_root, ref_path, 1); | ||
| 3544 | } | ||
| 3545 | |||
| 3546 | static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, | ||
| 3547 | struct btrfs_root *extent_root, | ||
| 3548 | struct btrfs_ref_path *ref_path) | ||
| 3549 | { | ||
| 3550 | return __next_ref_path(trans, extent_root, ref_path, 0); | ||
| 3551 | } | ||
| 3552 | |||
| 3553 | static int noinline get_new_locations(struct inode *reloc_inode, | ||
| 3554 | struct btrfs_key *extent_key, | ||
| 3555 | u64 offset, int no_fragment, | ||
| 3556 | struct disk_extent **extents, | ||
| 3557 | int *nr_extents) | ||
| 3558 | { | ||
| 3559 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
| 3560 | struct btrfs_path *path; | ||
| 3561 | struct btrfs_file_extent_item *fi; | ||
| 3562 | struct extent_buffer *leaf; | ||
| 3563 | struct disk_extent *exts = *extents; | ||
| 3564 | struct btrfs_key found_key; | ||
| 3565 | u64 cur_pos; | ||
| 3566 | u64 last_byte; | ||
| 3567 | u32 nritems; | ||
| 3568 | int nr = 0; | ||
| 3569 | int max = *nr_extents; | ||
| 3570 | int ret; | ||
| 3571 | |||
| 3572 | WARN_ON(!no_fragment && *extents); | ||
| 3573 | if (!exts) { | ||
| 3574 | max = 1; | ||
| 3575 | exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); | ||
| 3576 | if (!exts) | ||
| 3577 | return -ENOMEM; | ||
| 3578 | } | ||
| 3579 | |||
| 3580 | path = btrfs_alloc_path(); | ||
| 3581 | BUG_ON(!path); | ||
| 3582 | |||
| 3583 | cur_pos = extent_key->objectid - offset; | ||
| 3584 | last_byte = extent_key->objectid + extent_key->offset; | ||
| 3585 | ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, | ||
| 3586 | cur_pos, 0); | ||
| 3587 | if (ret < 0) | ||
| 3588 | goto out; | ||
| 3589 | if (ret > 0) { | ||
| 3590 | ret = -ENOENT; | ||
| 3591 | goto out; | ||
| 3592 | } | ||
| 3593 | |||
| 3594 | while (1) { | ||
| 3595 | leaf = path->nodes[0]; | ||
| 3596 | nritems = btrfs_header_nritems(leaf); | ||
| 3597 | if (path->slots[0] >= nritems) { | ||
| 3598 | ret = btrfs_next_leaf(root, path); | ||
| 3599 | if (ret < 0) | ||
| 3600 | goto out; | ||
| 3601 | if (ret > 0) | ||
| 3602 | break; | ||
| 3603 | leaf = path->nodes[0]; | ||
| 3604 | } | ||
| 3605 | |||
| 3606 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 3607 | if (found_key.offset != cur_pos || | ||
| 3608 | found_key.type != BTRFS_EXTENT_DATA_KEY || | ||
| 3609 | found_key.objectid != reloc_inode->i_ino) | ||
| 3610 | break; | ||
| 3611 | |||
| 3612 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3613 | struct btrfs_file_extent_item); | ||
| 3614 | if (btrfs_file_extent_type(leaf, fi) != | ||
| 3615 | BTRFS_FILE_EXTENT_REG || | ||
| 3616 | btrfs_file_extent_disk_bytenr(leaf, fi) == 0) | ||
| 3617 | break; | ||
| 3618 | |||
| 3619 | if (nr == max) { | ||
| 3620 | struct disk_extent *old = exts; | ||
| 3621 | max *= 2; | ||
| 3622 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); | ||
| 3623 | memcpy(exts, old, sizeof(*exts) * nr); | ||
| 3624 | if (old != *extents) | ||
| 3625 | kfree(old); | ||
| 3626 | } | ||
| 3627 | |||
| 3628 | exts[nr].disk_bytenr = | ||
| 3629 | btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 3630 | exts[nr].disk_num_bytes = | ||
| 3631 | btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
| 3632 | exts[nr].offset = btrfs_file_extent_offset(leaf, fi); | ||
| 3633 | exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
| 3634 | WARN_ON(exts[nr].offset > 0); | ||
| 3635 | WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); | ||
| 3636 | |||
| 3637 | cur_pos += exts[nr].num_bytes; | ||
| 3638 | nr++; | ||
| 3639 | |||
| 3640 | if (cur_pos + offset >= last_byte) | ||
| 3641 | break; | ||
| 3642 | |||
| 3643 | if (no_fragment) { | ||
| 3644 | ret = 1; | ||
| 3645 | goto out; | ||
| 3646 | } | ||
| 3647 | path->slots[0]++; | ||
| 3648 | } | ||
| 3649 | |||
| 3650 | WARN_ON(cur_pos + offset > last_byte); | ||
| 3651 | if (cur_pos + offset < last_byte) { | ||
| 3652 | ret = -ENOENT; | ||
| 3653 | goto out; | ||
| 3654 | } | ||
| 3655 | ret = 0; | ||
| 3656 | out: | ||
| 3657 | btrfs_free_path(path); | ||
| 3658 | if (ret) { | ||
| 3659 | if (exts != *extents) | ||
| 3660 | kfree(exts); | ||
| 3661 | } else { | ||
| 3662 | *extents = exts; | ||
| 3663 | *nr_extents = nr; | ||
| 3664 | } | ||
| 3665 | return ret; | ||
| 3666 | } | ||
| 3667 | |||
| 3668 | static int noinline replace_one_extent(struct btrfs_trans_handle *trans, | ||
| 3669 | struct btrfs_root *root, | ||
| 3670 | struct btrfs_path *path, | ||
| 3671 | struct btrfs_key *extent_key, | ||
| 3672 | struct btrfs_key *leaf_key, | ||
| 3673 | struct btrfs_ref_path *ref_path, | ||
| 3674 | struct disk_extent *new_extents, | ||
| 3675 | int nr_extents) | ||
| 3676 | { | ||
| 3677 | struct extent_buffer *leaf; | ||
| 3678 | struct btrfs_file_extent_item *fi; | ||
| 3679 | struct inode *inode = NULL; | ||
| 3680 | struct btrfs_key key; | ||
| 3681 | u64 lock_start = 0; | ||
| 3682 | u64 lock_end = 0; | ||
| 3683 | u64 num_bytes; | ||
| 3684 | u64 ext_offset; | ||
| 3685 | u64 first_pos; | ||
| 3686 | u32 nritems; | ||
| 3687 | int nr_scaned = 0; | ||
| 3688 | int extent_locked = 0; | ||
| 3689 | int ret; | ||
| 3690 | |||
| 3691 | memcpy(&key, leaf_key, sizeof(key)); | ||
| 3692 | first_pos = INT_LIMIT(loff_t) - extent_key->offset; | ||
| 3693 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | ||
| 3694 | if (key.objectid < ref_path->owner_objectid || | ||
| 3695 | (key.objectid == ref_path->owner_objectid && | ||
| 3696 | key.type < BTRFS_EXTENT_DATA_KEY)) { | ||
| 3697 | key.objectid = ref_path->owner_objectid; | ||
| 3698 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
| 3699 | key.offset = 0; | ||
| 3700 | } | ||
| 3701 | } | ||
| 3702 | |||
| 3703 | while (1) { | ||
| 3704 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
| 3705 | if (ret < 0) | ||
| 3706 | goto out; | ||
| 3707 | |||
| 3708 | leaf = path->nodes[0]; | ||
| 3709 | nritems = btrfs_header_nritems(leaf); | ||
| 3710 | next: | ||
| 3711 | if (extent_locked && ret > 0) { | ||
| 3712 | /* | ||
| 3713 | * the file extent item was modified by someone | ||
| 3714 | * before the extent got locked. | ||
| 3715 | */ | ||
| 3716 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 3717 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
| 3718 | lock_end, GFP_NOFS); | ||
| 3719 | extent_locked = 0; | ||
| 3720 | } | ||
| 3721 | |||
| 3722 | if (path->slots[0] >= nritems) { | ||
| 3723 | if (++nr_scaned > 2) | ||
| 3724 | break; | ||
| 3725 | |||
| 3726 | BUG_ON(extent_locked); | ||
| 3727 | ret = btrfs_next_leaf(root, path); | ||
| 3728 | if (ret < 0) | ||
| 3729 | goto out; | ||
| 3730 | if (ret > 0) | ||
| 3731 | break; | ||
| 3732 | leaf = path->nodes[0]; | ||
| 3733 | nritems = btrfs_header_nritems(leaf); | ||
| 3734 | } | ||
| 3735 | |||
| 3736 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 3737 | |||
| 3738 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | ||
| 3739 | if ((key.objectid > ref_path->owner_objectid) || | ||
| 3740 | (key.objectid == ref_path->owner_objectid && | ||
| 3741 | key.type > BTRFS_EXTENT_DATA_KEY) || | ||
| 3742 | (key.offset >= first_pos + extent_key->offset)) | ||
| 3743 | break; | ||
| 3744 | } | ||
| 3745 | |||
| 3746 | if (inode && key.objectid != inode->i_ino) { | ||
| 3747 | BUG_ON(extent_locked); | ||
| 3748 | btrfs_release_path(root, path); | ||
| 3749 | mutex_unlock(&inode->i_mutex); | ||
| 3750 | iput(inode); | ||
| 3751 | inode = NULL; | ||
| 3752 | continue; | ||
| 3753 | } | ||
| 3754 | |||
| 3755 | if (key.type != BTRFS_EXTENT_DATA_KEY) { | ||
| 3756 | path->slots[0]++; | ||
| 3757 | ret = 1; | ||
| 3758 | goto next; | ||
| 3759 | } | ||
| 3760 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3761 | struct btrfs_file_extent_item); | ||
| 3762 | if ((btrfs_file_extent_type(leaf, fi) != | ||
| 3763 | BTRFS_FILE_EXTENT_REG) || | ||
| 3764 | (btrfs_file_extent_disk_bytenr(leaf, fi) != | ||
| 3765 | extent_key->objectid)) { | ||
| 3766 | path->slots[0]++; | ||
| 3767 | ret = 1; | ||
| 3768 | goto next; | ||
| 3769 | } | ||
| 3770 | |||
| 3771 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
| 3772 | ext_offset = btrfs_file_extent_offset(leaf, fi); | ||
| 3773 | |||
| 3774 | if (first_pos > key.offset - ext_offset) | ||
| 3775 | first_pos = key.offset - ext_offset; | ||
| 3776 | |||
| 3777 | if (!extent_locked) { | ||
| 3778 | lock_start = key.offset; | ||
| 3779 | lock_end = lock_start + num_bytes - 1; | ||
| 3780 | } else { | ||
| 3781 | BUG_ON(lock_start != key.offset); | ||
| 3782 | BUG_ON(lock_end - lock_start + 1 < num_bytes); | ||
| 3783 | } | ||
| 3784 | |||
| 3785 | if (!inode) { | ||
| 3786 | btrfs_release_path(root, path); | ||
| 3787 | |||
| 3788 | inode = btrfs_iget_locked(root->fs_info->sb, | ||
| 3789 | key.objectid, root); | ||
| 3790 | if (inode->i_state & I_NEW) { | ||
| 3791 | BTRFS_I(inode)->root = root; | ||
| 3792 | BTRFS_I(inode)->location.objectid = | ||
| 3793 | key.objectid; | ||
| 3794 | BTRFS_I(inode)->location.type = | ||
| 3795 | BTRFS_INODE_ITEM_KEY; | ||
| 3796 | BTRFS_I(inode)->location.offset = 0; | ||
| 3797 | btrfs_read_locked_inode(inode); | ||
| 3798 | unlock_new_inode(inode); | ||
| 3799 | } | ||
| 3800 | /* | ||
| 3801 | * some code call btrfs_commit_transaction while | ||
| 3802 | * holding the i_mutex, so we can't use mutex_lock | ||
| 3803 | * here. | ||
| 3804 | */ | ||
| 3805 | if (is_bad_inode(inode) || | ||
| 3806 | !mutex_trylock(&inode->i_mutex)) { | ||
| 3807 | iput(inode); | ||
| 3808 | inode = NULL; | ||
| 3809 | key.offset = (u64)-1; | ||
| 3810 | goto skip; | ||
| 3811 | } | ||
| 3812 | } | ||
| 3813 | |||
| 3814 | if (!extent_locked) { | ||
| 3815 | struct btrfs_ordered_extent *ordered; | ||
| 3816 | |||
| 3817 | btrfs_release_path(root, path); | ||
| 3818 | |||
| 3819 | lock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
| 3820 | lock_end, GFP_NOFS); | ||
| 3821 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
| 3822 | lock_end); | ||
| 3823 | if (ordered && | ||
| 3824 | ordered->file_offset <= lock_end && | ||
| 3825 | ordered->file_offset + ordered->len > lock_start) { | ||
| 3826 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
| 3827 | lock_start, lock_end, GFP_NOFS); | ||
| 3828 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 3829 | btrfs_put_ordered_extent(ordered); | ||
| 3830 | key.offset += num_bytes; | ||
| 3831 | goto skip; | ||
| 3832 | } | ||
| 3833 | if (ordered) | ||
| 3834 | btrfs_put_ordered_extent(ordered); | ||
| 3835 | |||
| 3836 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 3837 | extent_locked = 1; | ||
| 3838 | continue; | ||
| 3839 | } | ||
| 3840 | |||
| 3841 | if (nr_extents == 1) { | ||
| 3842 | /* update extent pointer in place */ | ||
| 3843 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 3844 | trans->transid); | ||
| 3845 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
| 3846 | new_extents[0].disk_bytenr); | ||
| 3847 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
| 3848 | new_extents[0].disk_num_bytes); | ||
| 3849 | ext_offset += new_extents[0].offset; | ||
| 3850 | btrfs_set_file_extent_offset(leaf, fi, ext_offset); | ||
| 3851 | btrfs_mark_buffer_dirty(leaf); | ||
| 3852 | |||
| 3853 | btrfs_drop_extent_cache(inode, key.offset, | ||
| 3854 | key.offset + num_bytes - 1, 0); | ||
| 3855 | |||
| 3856 | ret = btrfs_inc_extent_ref(trans, root, | ||
| 3857 | new_extents[0].disk_bytenr, | ||
| 3858 | new_extents[0].disk_num_bytes, | ||
| 3859 | leaf->start, | ||
| 3860 | root->root_key.objectid, | ||
| 3861 | trans->transid, | ||
| 3862 | key.objectid); | ||
| 3863 | BUG_ON(ret); | ||
| 3864 | |||
| 3865 | ret = btrfs_free_extent(trans, root, | ||
| 3866 | extent_key->objectid, | ||
| 3867 | extent_key->offset, | ||
| 3868 | leaf->start, | ||
| 3869 | btrfs_header_owner(leaf), | ||
| 3870 | btrfs_header_generation(leaf), | ||
| 3871 | key.objectid, 0); | ||
| 3872 | BUG_ON(ret); | ||
| 3873 | |||
| 3874 | btrfs_release_path(root, path); | ||
| 3875 | key.offset += num_bytes; | ||
| 3876 | } else { | ||
| 3877 | u64 alloc_hint; | ||
| 3878 | u64 extent_len; | ||
| 3879 | int i; | ||
| 3880 | /* | ||
| 3881 | * drop old extent pointer at first, then insert the | ||
| 3882 | * new pointers one bye one | ||
| 3883 | */ | ||
| 3884 | btrfs_release_path(root, path); | ||
| 3885 | ret = btrfs_drop_extents(trans, root, inode, key.offset, | ||
| 3886 | key.offset + num_bytes, | ||
| 3887 | key.offset, &alloc_hint); | ||
| 3888 | BUG_ON(ret); | ||
| 3889 | |||
| 3890 | for (i = 0; i < nr_extents; i++) { | ||
| 3891 | if (ext_offset >= new_extents[i].num_bytes) { | ||
| 3892 | ext_offset -= new_extents[i].num_bytes; | ||
| 3893 | continue; | ||
| 3894 | } | ||
| 3895 | extent_len = min(new_extents[i].num_bytes - | ||
| 3896 | ext_offset, num_bytes); | ||
| 3897 | |||
| 3898 | ret = btrfs_insert_empty_item(trans, root, | ||
| 3899 | path, &key, | ||
| 3900 | sizeof(*fi)); | ||
| 3901 | BUG_ON(ret); | ||
| 3902 | |||
| 3903 | leaf = path->nodes[0]; | ||
| 3904 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3905 | struct btrfs_file_extent_item); | ||
| 3906 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 3907 | trans->transid); | ||
| 3908 | btrfs_set_file_extent_type(leaf, fi, | ||
| 3909 | BTRFS_FILE_EXTENT_REG); | ||
| 3910 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
| 3911 | new_extents[i].disk_bytenr); | ||
| 3912 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
| 3913 | new_extents[i].disk_num_bytes); | ||
| 3914 | btrfs_set_file_extent_num_bytes(leaf, fi, | ||
| 3915 | extent_len); | ||
| 3916 | ext_offset += new_extents[i].offset; | ||
| 3917 | btrfs_set_file_extent_offset(leaf, fi, | ||
| 3918 | ext_offset); | ||
| 3919 | btrfs_mark_buffer_dirty(leaf); | ||
| 3920 | |||
| 3921 | btrfs_drop_extent_cache(inode, key.offset, | ||
| 3922 | key.offset + extent_len - 1, 0); | ||
| 3923 | |||
| 3924 | ret = btrfs_inc_extent_ref(trans, root, | ||
| 3925 | new_extents[i].disk_bytenr, | ||
| 3926 | new_extents[i].disk_num_bytes, | ||
| 3927 | leaf->start, | ||
| 3928 | root->root_key.objectid, | ||
| 3929 | trans->transid, key.objectid); | ||
| 3930 | BUG_ON(ret); | ||
| 3931 | btrfs_release_path(root, path); | ||
| 3932 | |||
| 3933 | inode_add_bytes(inode, extent_len); | ||
| 3934 | |||
| 3935 | ext_offset = 0; | ||
| 3936 | num_bytes -= extent_len; | ||
| 3937 | key.offset += extent_len; | ||
| 3938 | |||
| 3939 | if (num_bytes == 0) | ||
| 3940 | break; | ||
| 3941 | } | ||
| 3942 | BUG_ON(i >= nr_extents); | ||
| 3943 | } | ||
| 3944 | |||
| 3945 | if (extent_locked) { | ||
| 3946 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 3947 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
| 3948 | lock_end, GFP_NOFS); | ||
| 3949 | extent_locked = 0; | ||
| 3950 | } | ||
| 3951 | skip: | ||
| 3952 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && | ||
| 3953 | key.offset >= first_pos + extent_key->offset) | ||
| 3954 | break; | ||
| 3955 | |||
| 3956 | cond_resched(); | ||
| 3957 | } | ||
| 3958 | ret = 0; | ||
| 3959 | out: | ||
| 3960 | btrfs_release_path(root, path); | ||
| 3961 | if (inode) { | ||
| 3962 | mutex_unlock(&inode->i_mutex); | ||
| 3963 | if (extent_locked) { | ||
| 3964 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 3965 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | ||
| 3966 | lock_end, GFP_NOFS); | ||
| 3967 | } | ||
| 3968 | iput(inode); | ||
| 3969 | } | ||
| 3970 | return ret; | ||
| 3971 | } | ||
| 3972 | |||
| 3973 | int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
| 3974 | u64 num_bytes, u64 new_bytenr) | ||
| 3975 | { | ||
| 3976 | set_extent_bits(&root->fs_info->reloc_mapping_tree, | ||
| 3977 | orig_bytenr, orig_bytenr + num_bytes - 1, | ||
| 3978 | EXTENT_LOCKED, GFP_NOFS); | ||
| 3979 | set_state_private(&root->fs_info->reloc_mapping_tree, | ||
| 3980 | orig_bytenr, new_bytenr); | ||
| 3981 | return 0; | ||
| 3982 | } | ||
| 3983 | |||
| 3984 | int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
| 3985 | u64 num_bytes, u64 *new_bytenr) | ||
| 3986 | { | ||
| 3987 | u64 bytenr; | ||
| 3988 | u64 cur_bytenr = orig_bytenr; | ||
| 3989 | u64 prev_bytenr = orig_bytenr; | ||
| 3990 | int ret; | ||
| 3991 | |||
| 3992 | while (1) { | ||
| 3993 | ret = get_state_private(&root->fs_info->reloc_mapping_tree, | ||
| 3994 | cur_bytenr, &bytenr); | ||
| 3995 | if (ret) | ||
| 3996 | break; | ||
| 3997 | prev_bytenr = cur_bytenr; | ||
| 3998 | cur_bytenr = bytenr; | ||
| 3999 | } | ||
| 4000 | |||
| 4001 | if (orig_bytenr == cur_bytenr) | ||
| 4002 | return -ENOENT; | ||
| 4003 | |||
| 4004 | if (prev_bytenr != orig_bytenr) { | ||
| 4005 | set_state_private(&root->fs_info->reloc_mapping_tree, | ||
| 4006 | orig_bytenr, cur_bytenr); | ||
| 4007 | } | ||
| 4008 | *new_bytenr = cur_bytenr; | ||
| 4009 | return 0; | ||
| 4010 | } | ||
| 4011 | |||
| 4012 | void btrfs_free_reloc_mappings(struct btrfs_root *root) | ||
| 4013 | { | ||
| 4014 | clear_extent_bits(&root->fs_info->reloc_mapping_tree, | ||
| 4015 | 0, (u64)-1, -1, GFP_NOFS); | ||
| 4016 | } | ||
| 4017 | |||
| 4018 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | ||
| 4019 | struct btrfs_root *root, | ||
| 4020 | struct extent_buffer *buf, u64 orig_start) | ||
| 4021 | { | ||
| 4022 | int level; | ||
| 4023 | int ret; | ||
| 4024 | |||
| 4025 | BUG_ON(btrfs_header_generation(buf) != trans->transid); | ||
| 4026 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | ||
| 4027 | |||
| 4028 | level = btrfs_header_level(buf); | ||
| 4029 | if (level == 0) { | ||
| 4030 | struct btrfs_leaf_ref *ref; | ||
| 4031 | struct btrfs_leaf_ref *orig_ref; | ||
| 4032 | |||
| 4033 | orig_ref = btrfs_lookup_leaf_ref(root, orig_start); | ||
| 4034 | if (!orig_ref) | ||
| 4035 | return -ENOENT; | ||
| 4036 | |||
| 4037 | ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); | ||
| 4038 | if (!ref) { | ||
| 4039 | btrfs_free_leaf_ref(root, orig_ref); | ||
| 4040 | return -ENOMEM; | ||
| 4041 | } | ||
| 4042 | |||
| 4043 | ref->nritems = orig_ref->nritems; | ||
| 4044 | memcpy(ref->extents, orig_ref->extents, | ||
| 4045 | sizeof(ref->extents[0]) * ref->nritems); | ||
| 4046 | |||
| 4047 | btrfs_free_leaf_ref(root, orig_ref); | ||
| 4048 | |||
| 4049 | ref->root_gen = trans->transid; | ||
| 4050 | ref->bytenr = buf->start; | ||
| 4051 | ref->owner = btrfs_header_owner(buf); | ||
| 4052 | ref->generation = btrfs_header_generation(buf); | ||
| 4053 | ret = btrfs_add_leaf_ref(root, ref, 0); | ||
| 4054 | WARN_ON(ret); | ||
| 4055 | btrfs_free_leaf_ref(root, ref); | ||
| 4056 | } | ||
| 4057 | return 0; | ||
| 4058 | } | ||
| 4059 | |||
| 4060 | static int noinline invalidate_extent_cache(struct btrfs_root *root, | ||
| 4061 | struct extent_buffer *leaf, | ||
| 4062 | struct btrfs_block_group_cache *group, | ||
| 4063 | struct btrfs_root *target_root) | ||
| 4064 | { | ||
| 4065 | struct btrfs_key key; | ||
| 4066 | struct inode *inode = NULL; | ||
| 4067 | struct btrfs_file_extent_item *fi; | ||
| 4068 | u64 num_bytes; | ||
| 4069 | u64 skip_objectid = 0; | ||
| 4070 | u32 nritems; | ||
| 4071 | u32 i; | ||
| 4072 | |||
| 4073 | nritems = btrfs_header_nritems(leaf); | ||
| 4074 | for (i = 0; i < nritems; i++) { | ||
| 4075 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
| 4076 | if (key.objectid == skip_objectid || | ||
| 4077 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 4078 | continue; | ||
| 4079 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
| 4080 | if (btrfs_file_extent_type(leaf, fi) == | ||
| 4081 | BTRFS_FILE_EXTENT_INLINE) | ||
| 4082 | continue; | ||
| 4083 | if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) | ||
| 4084 | continue; | ||
| 4085 | if (!inode || inode->i_ino != key.objectid) { | ||
| 4086 | iput(inode); | ||
| 4087 | inode = btrfs_ilookup(target_root->fs_info->sb, | ||
| 4088 | key.objectid, target_root, 1); | ||
| 4089 | } | ||
| 4090 | if (!inode) { | ||
| 4091 | skip_objectid = key.objectid; | ||
| 4092 | continue; | ||
| 4093 | } | ||
| 4094 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | ||
| 4095 | |||
| 4096 | lock_extent(&BTRFS_I(inode)->io_tree, key.offset, | ||
| 4097 | key.offset + num_bytes - 1, GFP_NOFS); | ||
| 4098 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 4099 | btrfs_drop_extent_cache(inode, key.offset, | ||
| 4100 | key.offset + num_bytes - 1, 1); | ||
| 4101 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 4102 | unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, | ||
| 4103 | key.offset + num_bytes - 1, GFP_NOFS); | ||
| 4104 | cond_resched(); | ||
| 4105 | } | ||
| 4106 | iput(inode); | ||
| 4107 | return 0; | ||
| 4108 | } | ||
| 4109 | |||
| 4110 | static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, | ||
| 4111 | struct btrfs_root *root, | ||
| 4112 | struct extent_buffer *leaf, | ||
| 4113 | struct btrfs_block_group_cache *group, | ||
| 4114 | struct inode *reloc_inode) | ||
| 4115 | { | ||
| 4116 | struct btrfs_key key; | ||
| 4117 | struct btrfs_key extent_key; | ||
| 4118 | struct btrfs_file_extent_item *fi; | ||
| 4119 | struct btrfs_leaf_ref *ref; | ||
| 4120 | struct disk_extent *new_extent; | ||
| 4121 | u64 bytenr; | ||
| 4122 | u64 num_bytes; | ||
| 4123 | u32 nritems; | ||
| 4124 | u32 i; | ||
| 4125 | int ext_index; | ||
| 4126 | int nr_extent; | ||
| 4127 | int ret; | ||
| 4128 | |||
| 4129 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); | ||
| 4130 | BUG_ON(!new_extent); | ||
| 4131 | |||
| 4132 | ref = btrfs_lookup_leaf_ref(root, leaf->start); | ||
| 4133 | BUG_ON(!ref); | ||
| 4134 | |||
| 4135 | ext_index = -1; | ||
| 4136 | nritems = btrfs_header_nritems(leaf); | ||
| 4137 | for (i = 0; i < nritems; i++) { | ||
| 4138 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
| 4139 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
| 4140 | continue; | ||
| 4141 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
| 4142 | if (btrfs_file_extent_type(leaf, fi) == | ||
| 4143 | BTRFS_FILE_EXTENT_INLINE) | ||
| 4144 | continue; | ||
| 4145 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 4146 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
| 4147 | if (bytenr == 0) | ||
| 4148 | continue; | ||
| 4149 | |||
| 4150 | ext_index++; | ||
| 4151 | if (bytenr >= group->key.objectid + group->key.offset || | ||
| 4152 | bytenr + num_bytes <= group->key.objectid) | ||
| 4153 | continue; | ||
| 4154 | |||
| 4155 | extent_key.objectid = bytenr; | ||
| 4156 | extent_key.offset = num_bytes; | ||
| 4157 | extent_key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 4158 | nr_extent = 1; | ||
| 4159 | ret = get_new_locations(reloc_inode, &extent_key, | ||
| 4160 | group->key.objectid, 1, | ||
| 4161 | &new_extent, &nr_extent); | ||
| 4162 | if (ret > 0) | ||
| 4163 | continue; | ||
| 4164 | BUG_ON(ret < 0); | ||
| 4165 | |||
| 4166 | BUG_ON(ref->extents[ext_index].bytenr != bytenr); | ||
| 4167 | BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); | ||
| 4168 | ref->extents[ext_index].bytenr = new_extent->disk_bytenr; | ||
| 4169 | ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; | ||
| 4170 | |||
| 4171 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 4172 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | ||
| 4173 | new_extent->disk_bytenr); | ||
| 4174 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | ||
| 4175 | new_extent->disk_num_bytes); | ||
| 4176 | new_extent->offset += btrfs_file_extent_offset(leaf, fi); | ||
| 4177 | btrfs_set_file_extent_offset(leaf, fi, new_extent->offset); | ||
| 4178 | btrfs_mark_buffer_dirty(leaf); | ||
| 4179 | |||
| 4180 | ret = btrfs_inc_extent_ref(trans, root, | ||
| 4181 | new_extent->disk_bytenr, | ||
| 4182 | new_extent->disk_num_bytes, | ||
| 4183 | leaf->start, | ||
| 4184 | root->root_key.objectid, | ||
| 4185 | trans->transid, key.objectid); | ||
| 4186 | BUG_ON(ret); | ||
| 4187 | ret = btrfs_free_extent(trans, root, | ||
| 4188 | bytenr, num_bytes, leaf->start, | ||
| 4189 | btrfs_header_owner(leaf), | ||
| 4190 | btrfs_header_generation(leaf), | ||
| 4191 | key.objectid, 0); | ||
| 4192 | BUG_ON(ret); | ||
| 4193 | cond_resched(); | ||
| 4194 | } | ||
| 4195 | kfree(new_extent); | ||
| 4196 | BUG_ON(ext_index + 1 != ref->nritems); | ||
| 4197 | btrfs_free_leaf_ref(root, ref); | ||
| 4198 | return 0; | ||
| 4199 | } | ||
| 4200 | |||
| 4201 | int btrfs_free_reloc_root(struct btrfs_root *root) | ||
| 4202 | { | ||
| 4203 | struct btrfs_root *reloc_root; | ||
| 4204 | |||
| 4205 | if (root->reloc_root) { | ||
| 4206 | reloc_root = root->reloc_root; | ||
| 4207 | root->reloc_root = NULL; | ||
| 4208 | list_add(&reloc_root->dead_list, | ||
| 4209 | &root->fs_info->dead_reloc_roots); | ||
| 4210 | } | ||
| 4211 | return 0; | ||
| 4212 | } | ||
| 4213 | |||
| 4214 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) | ||
| 4215 | { | ||
| 4216 | struct btrfs_trans_handle *trans; | ||
| 4217 | struct btrfs_root *reloc_root; | ||
| 4218 | struct btrfs_root *prev_root = NULL; | ||
| 4219 | struct list_head dead_roots; | ||
| 4220 | int ret; | ||
| 4221 | unsigned long nr; | ||
| 4222 | |||
| 4223 | INIT_LIST_HEAD(&dead_roots); | ||
| 4224 | list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); | ||
| 4225 | |||
| 4226 | while (!list_empty(&dead_roots)) { | ||
| 4227 | reloc_root = list_entry(dead_roots.prev, | ||
| 4228 | struct btrfs_root, dead_list); | ||
| 4229 | list_del_init(&reloc_root->dead_list); | ||
| 4230 | |||
| 4231 | BUG_ON(reloc_root->commit_root != NULL); | ||
| 4232 | while (1) { | ||
| 4233 | trans = btrfs_join_transaction(root, 1); | ||
| 4234 | BUG_ON(!trans); | ||
| 4235 | |||
| 4236 | mutex_lock(&root->fs_info->drop_mutex); | ||
| 4237 | ret = btrfs_drop_snapshot(trans, reloc_root); | ||
| 4238 | if (ret != -EAGAIN) | ||
| 4239 | break; | ||
| 4240 | mutex_unlock(&root->fs_info->drop_mutex); | ||
| 4241 | |||
| 4242 | nr = trans->blocks_used; | ||
| 4243 | ret = btrfs_end_transaction(trans, root); | ||
| 4244 | BUG_ON(ret); | ||
| 4245 | btrfs_btree_balance_dirty(root, nr); | ||
| 4246 | } | ||
| 4247 | |||
| 4248 | free_extent_buffer(reloc_root->node); | ||
| 4249 | |||
| 4250 | ret = btrfs_del_root(trans, root->fs_info->tree_root, | ||
| 4251 | &reloc_root->root_key); | ||
| 4252 | BUG_ON(ret); | ||
| 4253 | mutex_unlock(&root->fs_info->drop_mutex); | ||
| 4254 | |||
| 4255 | nr = trans->blocks_used; | ||
| 4256 | ret = btrfs_end_transaction(trans, root); | ||
| 4257 | BUG_ON(ret); | ||
| 4258 | btrfs_btree_balance_dirty(root, nr); | ||
| 4259 | |||
| 4260 | kfree(prev_root); | ||
| 4261 | prev_root = reloc_root; | ||
| 4262 | } | ||
| 4263 | if (prev_root) { | ||
| 4264 | btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); | ||
| 4265 | kfree(prev_root); | ||
| 4266 | } | ||
| 4267 | return 0; | ||
| 4268 | } | ||
| 4269 | |||
| 4270 | int btrfs_add_dead_reloc_root(struct btrfs_root *root) | ||
| 4271 | { | ||
| 4272 | list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); | ||
| 4273 | return 0; | ||
| 4274 | } | ||
| 4275 | |||
| 4276 | int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | ||
| 4277 | { | ||
| 4278 | struct btrfs_root *reloc_root; | ||
| 4279 | struct btrfs_trans_handle *trans; | ||
| 4280 | struct btrfs_key location; | ||
| 4281 | int found; | ||
| 4282 | int ret; | ||
| 4283 | |||
| 4284 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
| 4285 | ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); | ||
| 4286 | BUG_ON(ret); | ||
| 4287 | found = !list_empty(&root->fs_info->dead_reloc_roots); | ||
| 4288 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
| 4289 | |||
| 4290 | if (found) { | ||
| 4291 | trans = btrfs_start_transaction(root, 1); | ||
| 4292 | BUG_ON(!trans); | ||
| 4293 | ret = btrfs_commit_transaction(trans, root); | ||
| 4294 | BUG_ON(ret); | ||
| 4295 | } | ||
| 4296 | |||
| 4297 | location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | ||
| 4298 | location.offset = (u64)-1; | ||
| 4299 | location.type = BTRFS_ROOT_ITEM_KEY; | ||
| 4300 | |||
| 4301 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | ||
| 4302 | BUG_ON(!reloc_root); | ||
| 4303 | btrfs_orphan_cleanup(reloc_root); | ||
| 4304 | return 0; | ||
| 4305 | } | ||
| 4306 | |||
| 4307 | static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, | ||
| 4308 | struct btrfs_root *root) | ||
| 4309 | { | ||
| 4310 | struct btrfs_root *reloc_root; | ||
| 4311 | struct extent_buffer *eb; | ||
| 4312 | struct btrfs_root_item *root_item; | ||
| 4313 | struct btrfs_key root_key; | ||
| 4314 | int ret; | ||
| 4315 | |||
| 4316 | BUG_ON(!root->ref_cows); | ||
| 4317 | if (root->reloc_root) | ||
| 4318 | return 0; | ||
| 4319 | |||
| 4320 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | ||
| 4321 | BUG_ON(!root_item); | ||
| 4322 | |||
| 4323 | ret = btrfs_copy_root(trans, root, root->commit_root, | ||
| 4324 | &eb, BTRFS_TREE_RELOC_OBJECTID); | ||
| 4325 | BUG_ON(ret); | ||
| 4326 | |||
| 4327 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | ||
| 4328 | root_key.offset = root->root_key.objectid; | ||
| 4329 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 4330 | |||
| 4331 | memcpy(root_item, &root->root_item, sizeof(root_item)); | ||
| 4332 | btrfs_set_root_refs(root_item, 0); | ||
| 4333 | btrfs_set_root_bytenr(root_item, eb->start); | ||
| 4334 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | ||
| 4335 | memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress)); | ||
| 4336 | root_item->drop_level = 0; | ||
| 4337 | |||
| 4338 | btrfs_tree_unlock(eb); | ||
| 4339 | free_extent_buffer(eb); | ||
| 4340 | |||
| 4341 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, | ||
| 4342 | &root_key, root_item); | ||
| 4343 | BUG_ON(ret); | ||
| 4344 | kfree(root_item); | ||
| 4345 | |||
| 4346 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | ||
| 4347 | &root_key); | ||
| 4348 | BUG_ON(!reloc_root); | ||
| 4349 | reloc_root->last_trans = trans->transid; | ||
| 4350 | reloc_root->commit_root = NULL; | ||
| 4351 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; | ||
| 4352 | |||
| 4353 | root->reloc_root = reloc_root; | ||
| 4354 | return 0; | ||
| 4355 | } | ||
| 4356 | |||
| 4357 | /* | ||
| 4358 | * Core function of space balance. | ||
| 4359 | * | ||
| 4360 | * The idea is using reloc trees to relocate tree blocks in reference | ||
| 4361 | * counted roots. There is one reloc tree for each subvol, all reloc | ||
| 4362 | * trees share same key objectid. Reloc trees are snapshots of the | ||
| 4363 | * latest committed roots (subvol root->commit_root). To relocate a tree | ||
| 4364 | * block referenced by a subvol, the code COW the block through the reloc | ||
| 4365 | * tree, then update pointer in the subvol to point to the new block. | ||
| 4366 | * Since all reloc trees share same key objectid, we can easily do special | ||
| 4367 | * handing to share tree blocks between reloc trees. Once a tree block has | ||
| 4368 | * been COWed in one reloc tree, we can use the result when the same block | ||
| 4369 | * is COWed again through other reloc trees. | ||
| 4370 | */ | ||
| 4371 | static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | ||
| 4372 | struct btrfs_root *root, | ||
| 4373 | struct btrfs_path *path, | ||
| 4374 | struct btrfs_key *first_key, | ||
| 4375 | struct btrfs_ref_path *ref_path, | ||
| 4376 | struct btrfs_block_group_cache *group, | ||
| 4377 | struct inode *reloc_inode) | ||
| 4378 | { | ||
| 4379 | struct btrfs_root *reloc_root; | ||
| 4380 | struct extent_buffer *eb = NULL; | ||
| 4381 | struct btrfs_key *keys; | ||
| 4382 | u64 *nodes; | ||
| 4383 | int level; | ||
| 4384 | int lowest_merge; | ||
| 4385 | int lowest_level = 0; | ||
| 4386 | int update_refs; | ||
| 4387 | int ret; | ||
| 4388 | |||
| 4389 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
| 4390 | lowest_level = ref_path->owner_objectid; | ||
| 4391 | |||
| 4392 | if (is_cowonly_root(ref_path->root_objectid)) { | ||
| 4393 | path->lowest_level = lowest_level; | ||
| 4394 | ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); | ||
| 4395 | BUG_ON(ret < 0); | ||
| 4396 | path->lowest_level = 0; | ||
| 4397 | btrfs_release_path(root, path); | ||
| 4398 | return 0; | ||
| 4399 | } | ||
| 4400 | |||
| 4401 | keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS); | ||
| 4402 | BUG_ON(!keys); | ||
| 4403 | nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS); | ||
| 4404 | BUG_ON(!nodes); | ||
| 4405 | |||
| 4406 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
| 4407 | ret = init_reloc_tree(trans, root); | ||
| 4408 | BUG_ON(ret); | ||
| 4409 | reloc_root = root->reloc_root; | ||
| 4410 | |||
| 4411 | path->lowest_level = lowest_level; | ||
| 4412 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0); | ||
| 4413 | BUG_ON(ret); | ||
| 4414 | /* | ||
| 4415 | * get relocation mapping for tree blocks in the path | ||
| 4416 | */ | ||
| 4417 | lowest_merge = BTRFS_MAX_LEVEL; | ||
| 4418 | for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { | ||
| 4419 | u64 new_bytenr; | ||
| 4420 | eb = path->nodes[level]; | ||
| 4421 | if (!eb || eb == reloc_root->node) | ||
| 4422 | continue; | ||
| 4423 | ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len, | ||
| 4424 | &new_bytenr); | ||
| 4425 | if (ret) | ||
| 4426 | continue; | ||
| 4427 | if (level == 0) | ||
| 4428 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
| 4429 | else | ||
| 4430 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
| 4431 | nodes[level] = new_bytenr; | ||
| 4432 | lowest_merge = level; | ||
| 4433 | } | ||
| 4434 | |||
| 4435 | update_refs = 0; | ||
| 4436 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
| 4437 | eb = path->nodes[0]; | ||
| 4438 | if (btrfs_header_generation(eb) < trans->transid) | ||
| 4439 | update_refs = 1; | ||
| 4440 | } | ||
| 4441 | |||
| 4442 | btrfs_release_path(reloc_root, path); | ||
| 4443 | /* | ||
| 4444 | * merge tree blocks that already relocated in other reloc trees | ||
| 4445 | */ | ||
| 4446 | if (lowest_merge != BTRFS_MAX_LEVEL) { | ||
| 4447 | ret = btrfs_merge_path(trans, reloc_root, keys, nodes, | ||
| 4448 | lowest_merge); | ||
| 4449 | BUG_ON(ret < 0); | ||
| 4450 | } | ||
| 4451 | /* | ||
| 4452 | * cow any tree blocks that still haven't been relocated | ||
| 4453 | */ | ||
| 4454 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1); | ||
| 4455 | BUG_ON(ret); | ||
| 4456 | /* | ||
| 4457 | * if we are relocating data block group, update extent pointers | ||
| 4458 | * in the newly created tree leaf. | ||
| 4459 | */ | ||
| 4460 | eb = path->nodes[0]; | ||
| 4461 | if (update_refs && nodes[0] != eb->start) { | ||
| 4462 | ret = replace_extents_in_leaf(trans, reloc_root, eb, group, | ||
| 4463 | reloc_inode); | ||
| 4464 | BUG_ON(ret); | ||
| 4465 | } | ||
| 4466 | |||
| 4467 | memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL); | ||
| 4468 | memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL); | ||
| 4469 | for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { | ||
| 4470 | eb = path->nodes[level]; | ||
| 4471 | if (!eb || eb == reloc_root->node) | ||
| 4472 | continue; | ||
| 4473 | BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID); | ||
| 4474 | nodes[level] = eb->start; | ||
| 4475 | if (level == 0) | ||
| 4476 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
| 4477 | else | ||
| 4478 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
| 4479 | } | ||
| 4480 | |||
| 4481 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
| 4482 | eb = path->nodes[0]; | ||
| 4483 | extent_buffer_get(eb); | ||
| 4484 | } | ||
| 4485 | btrfs_release_path(reloc_root, path); | ||
| 4486 | /* | ||
| 4487 | * replace tree blocks in the fs tree with tree blocks in | ||
| 4488 | * the reloc tree. | ||
| 4489 | */ | ||
| 4490 | ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); | ||
| 4491 | BUG_ON(ret < 0); | ||
| 4492 | |||
| 4493 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
| 4494 | ret = invalidate_extent_cache(reloc_root, eb, group, root); | ||
| 4495 | BUG_ON(ret); | ||
| 4496 | free_extent_buffer(eb); | ||
| 4497 | } | ||
| 4498 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
| 4499 | |||
| 4500 | path->lowest_level = 0; | ||
| 4501 | kfree(nodes); | ||
| 4502 | kfree(keys); | ||
| 4503 | return 0; | ||
| 4504 | } | ||
| 4505 | |||
| 4506 | static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, | ||
| 4507 | struct btrfs_root *root, | ||
| 4508 | struct btrfs_path *path, | ||
| 4509 | struct btrfs_key *first_key, | ||
| 4510 | struct btrfs_ref_path *ref_path) | ||
| 4511 | { | ||
| 4512 | int ret; | ||
| 4513 | int needs_lock = 0; | ||
| 4514 | |||
| 4515 | if (root == root->fs_info->extent_root || | ||
| 4516 | root == root->fs_info->chunk_root || | ||
| 4517 | root == root->fs_info->dev_root) { | ||
| 4518 | needs_lock = 1; | ||
| 4519 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 4520 | } | ||
| 4521 | |||
| 4522 | ret = relocate_one_path(trans, root, path, first_key, | ||
| 4523 | ref_path, NULL, NULL); | ||
| 4524 | BUG_ON(ret); | ||
| 4525 | |||
| 4526 | if (root == root->fs_info->extent_root) | ||
| 4527 | btrfs_extent_post_op(trans, root); | ||
| 4528 | if (needs_lock) | ||
| 4529 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 4530 | |||
| 4531 | return 0; | ||
| 4532 | } | ||
| 4533 | |||
| 4534 | static int noinline del_extent_zero(struct btrfs_trans_handle *trans, | ||
| 4535 | struct btrfs_root *extent_root, | ||
| 4536 | struct btrfs_path *path, | ||
| 4537 | struct btrfs_key *extent_key) | ||
| 4538 | { | ||
| 4539 | int ret; | ||
| 4540 | |||
| 4541 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
| 4542 | ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); | ||
| 4543 | if (ret) | ||
| 4544 | goto out; | ||
| 4545 | ret = btrfs_del_item(trans, extent_root, path); | ||
| 4546 | out: | ||
| 4547 | btrfs_release_path(extent_root, path); | ||
| 4548 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
| 4549 | return ret; | ||
| 4550 | } | ||
| 4551 | |||
| 4552 | static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info, | ||
| 4553 | struct btrfs_ref_path *ref_path) | ||
| 4554 | { | ||
| 4555 | struct btrfs_key root_key; | ||
| 4556 | |||
| 4557 | root_key.objectid = ref_path->root_objectid; | ||
| 4558 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 4559 | if (is_cowonly_root(ref_path->root_objectid)) | ||
| 4560 | root_key.offset = 0; | ||
| 4561 | else | ||
| 4562 | root_key.offset = (u64)-1; | ||
| 4563 | |||
| 4564 | return btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
| 4565 | } | ||
| 4566 | |||
| 4567 | static int noinline relocate_one_extent(struct btrfs_root *extent_root, | ||
| 4568 | struct btrfs_path *path, | ||
| 4569 | struct btrfs_key *extent_key, | ||
| 4570 | struct btrfs_block_group_cache *group, | ||
| 4571 | struct inode *reloc_inode, int pass) | ||
| 4572 | { | ||
| 4573 | struct btrfs_trans_handle *trans; | ||
| 4574 | struct btrfs_root *found_root; | ||
| 4575 | struct btrfs_ref_path *ref_path = NULL; | ||
| 4576 | struct disk_extent *new_extents = NULL; | ||
| 4577 | int nr_extents = 0; | ||
| 4578 | int loops; | ||
| 4579 | int ret; | ||
| 4580 | int level; | ||
| 4581 | struct btrfs_key first_key; | ||
| 4582 | u64 prev_block = 0; | ||
| 4583 | |||
| 4584 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
| 4585 | |||
| 4586 | trans = btrfs_start_transaction(extent_root, 1); | ||
| 4587 | BUG_ON(!trans); | ||
| 4588 | |||
| 4589 | if (extent_key->objectid == 0) { | ||
| 4590 | ret = del_extent_zero(trans, extent_root, path, extent_key); | ||
| 4591 | goto out; | ||
| 4592 | } | ||
| 4593 | |||
| 4594 | ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); | ||
| 4595 | if (!ref_path) { | ||
| 4596 | ret = -ENOMEM; | ||
| 4597 | goto out; | ||
| 4598 | } | ||
| 4599 | |||
| 4600 | for (loops = 0; ; loops++) { | ||
| 4601 | if (loops == 0) { | ||
| 4602 | ret = btrfs_first_ref_path(trans, extent_root, ref_path, | ||
| 4603 | extent_key->objectid); | ||
| 4604 | } else { | ||
| 4605 | ret = btrfs_next_ref_path(trans, extent_root, ref_path); | ||
| 4606 | } | ||
| 4607 | if (ret < 0) | ||
| 4608 | goto out; | ||
| 4609 | if (ret > 0) | ||
| 4610 | break; | ||
| 4611 | |||
| 4612 | if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || | ||
| 4613 | ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
| 4614 | continue; | ||
| 4615 | |||
| 4616 | found_root = read_ref_root(extent_root->fs_info, ref_path); | ||
| 4617 | BUG_ON(!found_root); | ||
| 4618 | /* | ||
| 4619 | * for reference counted tree, only process reference paths | ||
| 4620 | * rooted at the latest committed root. | ||
| 4621 | */ | ||
| 4622 | if (found_root->ref_cows && | ||
| 4623 | ref_path->root_generation != found_root->root_key.offset) | ||
| 4624 | continue; | ||
| 4625 | |||
| 4626 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
| 4627 | if (pass == 0) { | ||
| 4628 | /* | ||
| 4629 | * copy data extents to new locations | ||
| 4630 | */ | ||
| 4631 | u64 group_start = group->key.objectid; | ||
| 4632 | ret = relocate_data_extent(reloc_inode, | ||
| 4633 | extent_key, | ||
| 4634 | group_start); | ||
| 4635 | if (ret < 0) | ||
| 4636 | goto out; | ||
| 4637 | break; | ||
| 4638 | } | ||
| 4639 | level = 0; | ||
| 4640 | } else { | ||
| 4641 | level = ref_path->owner_objectid; | ||
| 4642 | } | ||
| 4643 | |||
| 4644 | if (prev_block != ref_path->nodes[level]) { | ||
| 4645 | struct extent_buffer *eb; | ||
| 4646 | u64 block_start = ref_path->nodes[level]; | ||
| 4647 | u64 block_size = btrfs_level_size(found_root, level); | ||
| 4648 | |||
| 4649 | eb = read_tree_block(found_root, block_start, | ||
| 4650 | block_size, 0); | ||
| 4651 | btrfs_tree_lock(eb); | ||
| 4652 | BUG_ON(level != btrfs_header_level(eb)); | ||
| 4653 | |||
| 4654 | if (level == 0) | ||
| 4655 | btrfs_item_key_to_cpu(eb, &first_key, 0); | ||
| 4656 | else | ||
| 4657 | btrfs_node_key_to_cpu(eb, &first_key, 0); | ||
| 4658 | |||
| 4659 | btrfs_tree_unlock(eb); | ||
| 4660 | free_extent_buffer(eb); | ||
| 4661 | prev_block = block_start; | ||
| 4662 | } | ||
| 4663 | |||
| 4664 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && | ||
| 4665 | pass >= 2) { | ||
| 4666 | /* | ||
| 4667 | * use fallback method to process the remaining | ||
| 4668 | * references. | ||
| 4669 | */ | ||
| 4670 | if (!new_extents) { | ||
| 4671 | u64 group_start = group->key.objectid; | ||
| 4672 | ret = get_new_locations(reloc_inode, | ||
| 4673 | extent_key, | ||
| 4674 | group_start, 0, | ||
| 4675 | &new_extents, | ||
| 4676 | &nr_extents); | ||
| 4677 | if (ret < 0) | ||
| 4678 | goto out; | ||
| 4679 | } | ||
| 4680 | btrfs_record_root_in_trans(found_root); | ||
| 4681 | ret = replace_one_extent(trans, found_root, | ||
| 4682 | path, extent_key, | ||
| 4683 | &first_key, ref_path, | ||
| 4684 | new_extents, nr_extents); | ||
| 4685 | if (ret < 0) | ||
| 4686 | goto out; | ||
| 4687 | continue; | ||
| 4688 | } | ||
| 4689 | |||
| 4690 | btrfs_record_root_in_trans(found_root); | ||
| 4691 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
| 4692 | ret = relocate_tree_block(trans, found_root, path, | ||
| 4693 | &first_key, ref_path); | ||
| 4694 | } else { | ||
| 4695 | /* | ||
| 4696 | * try to update data extent references while | ||
| 4697 | * keeping metadata shared between snapshots. | ||
| 4698 | */ | ||
| 4699 | ret = relocate_one_path(trans, found_root, path, | ||
| 4700 | &first_key, ref_path, | ||
| 4701 | group, reloc_inode); | ||
| 4702 | } | ||
| 4703 | if (ret < 0) | ||
| 4704 | goto out; | ||
| 4705 | } | ||
| 4706 | ret = 0; | ||
| 4707 | out: | ||
| 4708 | btrfs_end_transaction(trans, extent_root); | ||
| 4709 | kfree(new_extents); | ||
| 4710 | kfree(ref_path); | ||
| 4711 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
| 4712 | return ret; | ||
| 4713 | } | ||
| 4714 | |||
| 4715 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | ||
| 4716 | { | ||
| 4717 | u64 num_devices; | ||
| 4718 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | ||
| 4719 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | ||
| 4720 | |||
| 4721 | num_devices = root->fs_info->fs_devices->num_devices; | ||
| 4722 | if (num_devices == 1) { | ||
| 4723 | stripped |= BTRFS_BLOCK_GROUP_DUP; | ||
| 4724 | stripped = flags & ~stripped; | ||
| 4725 | |||
| 4726 | /* turn raid0 into single device chunks */ | ||
| 4727 | if (flags & BTRFS_BLOCK_GROUP_RAID0) | ||
| 4728 | return stripped; | ||
| 4729 | |||
| 4730 | /* turn mirroring into duplication */ | ||
| 4731 | if (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 4732 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 4733 | return stripped | BTRFS_BLOCK_GROUP_DUP; | ||
| 4734 | return flags; | ||
| 4735 | } else { | ||
| 4736 | /* they already had raid on here, just return */ | ||
| 4737 | if (flags & stripped) | ||
| 4738 | return flags; | ||
| 4739 | |||
| 4740 | stripped |= BTRFS_BLOCK_GROUP_DUP; | ||
| 4741 | stripped = flags & ~stripped; | ||
| 4742 | |||
| 4743 | /* switch duplicated blocks with raid1 */ | ||
| 4744 | if (flags & BTRFS_BLOCK_GROUP_DUP) | ||
| 4745 | return stripped | BTRFS_BLOCK_GROUP_RAID1; | ||
| 4746 | |||
| 4747 | /* turn single device chunks into raid0 */ | ||
| 4748 | return stripped | BTRFS_BLOCK_GROUP_RAID0; | ||
| 4749 | } | ||
| 4750 | return flags; | ||
| 4751 | } | ||
| 4752 | |||
| 4753 | int __alloc_chunk_for_shrink(struct btrfs_root *root, | ||
| 4754 | struct btrfs_block_group_cache *shrink_block_group, | ||
| 4755 | int force) | ||
| 4756 | { | ||
| 4757 | struct btrfs_trans_handle *trans; | ||
| 4758 | u64 new_alloc_flags; | ||
| 4759 | u64 calc; | ||
| 4760 | |||
| 4761 | spin_lock(&shrink_block_group->lock); | ||
| 4762 | if (btrfs_block_group_used(&shrink_block_group->item) > 0) { | ||
| 4763 | spin_unlock(&shrink_block_group->lock); | ||
| 4764 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 4765 | |||
| 4766 | trans = btrfs_start_transaction(root, 1); | ||
| 4767 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 4768 | spin_lock(&shrink_block_group->lock); | ||
| 4769 | |||
| 4770 | new_alloc_flags = update_block_group_flags(root, | ||
| 4771 | shrink_block_group->flags); | ||
| 4772 | if (new_alloc_flags != shrink_block_group->flags) { | ||
| 4773 | calc = | ||
| 4774 | btrfs_block_group_used(&shrink_block_group->item); | ||
| 4775 | } else { | ||
| 4776 | calc = shrink_block_group->key.offset; | ||
| 4777 | } | ||
| 4778 | spin_unlock(&shrink_block_group->lock); | ||
| 4779 | |||
| 4780 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 4781 | calc + 2 * 1024 * 1024, new_alloc_flags, force); | ||
| 4782 | |||
| 4783 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 4784 | btrfs_end_transaction(trans, root); | ||
| 4785 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 4786 | } else | ||
| 4787 | spin_unlock(&shrink_block_group->lock); | ||
| 4788 | return 0; | ||
| 4789 | } | ||
| 4790 | |||
| 4791 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | ||
| 4792 | struct btrfs_root *root, | ||
| 4793 | u64 objectid, u64 size) | ||
| 4794 | { | ||
| 4795 | struct btrfs_path *path; | ||
| 4796 | struct btrfs_inode_item *item; | ||
| 4797 | struct extent_buffer *leaf; | ||
| 4798 | int ret; | ||
| 4799 | |||
| 4800 | path = btrfs_alloc_path(); | ||
| 4801 | if (!path) | ||
| 4802 | return -ENOMEM; | ||
| 4803 | |||
| 4804 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
| 4805 | if (ret) | ||
| 4806 | goto out; | ||
| 4807 | |||
| 4808 | leaf = path->nodes[0]; | ||
| 4809 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
| 4810 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
| 4811 | btrfs_set_inode_generation(leaf, item, 1); | ||
| 4812 | btrfs_set_inode_size(leaf, item, size); | ||
| 4813 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
| 4814 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM); | ||
| 4815 | btrfs_mark_buffer_dirty(leaf); | ||
| 4816 | btrfs_release_path(root, path); | ||
| 4817 | out: | ||
| 4818 | btrfs_free_path(path); | ||
| 4819 | return ret; | ||
| 4820 | } | ||
| 4821 | |||
| 4822 | static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
| 4823 | struct btrfs_block_group_cache *group) | ||
| 4824 | { | ||
| 4825 | struct inode *inode = NULL; | ||
| 4826 | struct btrfs_trans_handle *trans; | ||
| 4827 | struct btrfs_root *root; | ||
| 4828 | struct btrfs_key root_key; | ||
| 4829 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | ||
| 4830 | int err = 0; | ||
| 4831 | |||
| 4832 | root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | ||
| 4833 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 4834 | root_key.offset = (u64)-1; | ||
| 4835 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
| 4836 | if (IS_ERR(root)) | ||
| 4837 | return ERR_CAST(root); | ||
| 4838 | |||
| 4839 | trans = btrfs_start_transaction(root, 1); | ||
| 4840 | BUG_ON(!trans); | ||
| 4841 | |||
| 4842 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | ||
| 4843 | if (err) | ||
| 4844 | goto out; | ||
| 4845 | |||
| 4846 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | ||
| 4847 | BUG_ON(err); | ||
| 4848 | |||
| 4849 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
| 4850 | group->key.offset, 0); | ||
| 4851 | BUG_ON(err); | ||
| 4852 | |||
| 4853 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
| 4854 | if (inode->i_state & I_NEW) { | ||
| 4855 | BTRFS_I(inode)->root = root; | ||
| 4856 | BTRFS_I(inode)->location.objectid = objectid; | ||
| 4857 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
| 4858 | BTRFS_I(inode)->location.offset = 0; | ||
| 4859 | btrfs_read_locked_inode(inode); | ||
| 4860 | unlock_new_inode(inode); | ||
| 4861 | BUG_ON(is_bad_inode(inode)); | ||
| 4862 | } else { | ||
| 4863 | BUG_ON(1); | ||
| 4864 | } | ||
| 4865 | |||
| 4866 | err = btrfs_orphan_add(trans, inode); | ||
| 4867 | out: | ||
| 4868 | btrfs_end_transaction(trans, root); | ||
| 4869 | if (err) { | ||
| 4870 | if (inode) | ||
| 4871 | iput(inode); | ||
| 4872 | inode = ERR_PTR(err); | ||
| 4873 | } | ||
| 4874 | return inode; | ||
| 4875 | } | ||
| 4876 | |||
| 4877 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) | ||
| 4878 | { | ||
| 4879 | struct btrfs_trans_handle *trans; | ||
| 4880 | struct btrfs_path *path; | ||
| 4881 | struct btrfs_fs_info *info = root->fs_info; | ||
| 4882 | struct extent_buffer *leaf; | ||
| 4883 | struct inode *reloc_inode; | ||
| 4884 | struct btrfs_block_group_cache *block_group; | ||
| 4885 | struct btrfs_key key; | ||
| 4886 | u64 cur_byte; | ||
| 4887 | u64 total_found; | ||
| 4888 | u32 nritems; | ||
| 4889 | int ret; | ||
| 4890 | int progress; | ||
| 4891 | int pass = 0; | ||
| 4892 | |||
| 4893 | root = root->fs_info->extent_root; | ||
| 4894 | |||
| 4895 | block_group = btrfs_lookup_block_group(info, group_start); | ||
| 4896 | BUG_ON(!block_group); | ||
| 4897 | |||
| 4898 | printk("btrfs relocating block group %llu flags %llu\n", | ||
| 4899 | (unsigned long long)block_group->key.objectid, | ||
| 4900 | (unsigned long long)block_group->flags); | ||
| 4901 | |||
| 4902 | path = btrfs_alloc_path(); | ||
| 4903 | BUG_ON(!path); | ||
| 4904 | |||
| 4905 | reloc_inode = create_reloc_inode(info, block_group); | ||
| 4906 | BUG_ON(IS_ERR(reloc_inode)); | ||
| 4907 | |||
| 4908 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 4909 | |||
| 4910 | __alloc_chunk_for_shrink(root, block_group, 1); | ||
| 4911 | block_group->ro = 1; | ||
| 4912 | block_group->space_info->total_bytes -= block_group->key.offset; | ||
| 4913 | |||
| 4914 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 4915 | |||
| 4916 | btrfs_start_delalloc_inodes(info->tree_root); | ||
| 4917 | btrfs_wait_ordered_extents(info->tree_root, 0); | ||
| 4918 | again: | ||
| 4919 | total_found = 0; | ||
| 4920 | progress = 0; | ||
| 4921 | key.objectid = block_group->key.objectid; | ||
| 4922 | key.offset = 0; | ||
| 4923 | key.type = 0; | ||
| 4924 | cur_byte = key.objectid; | ||
| 4925 | |||
| 4926 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
| 4927 | btrfs_commit_transaction(trans, info->tree_root); | ||
| 4928 | |||
| 4929 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
| 4930 | btrfs_clean_old_snapshots(info->tree_root); | ||
| 4931 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | ||
| 4932 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 4933 | |||
| 4934 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 4935 | |||
| 4936 | while(1) { | ||
| 4937 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 4938 | if (ret < 0) | ||
| 4939 | goto out; | ||
| 4940 | next: | ||
| 4941 | leaf = path->nodes[0]; | ||
| 4942 | nritems = btrfs_header_nritems(leaf); | ||
| 4943 | if (path->slots[0] >= nritems) { | ||
| 4944 | ret = btrfs_next_leaf(root, path); | ||
| 4945 | if (ret < 0) | ||
| 4946 | goto out; | ||
| 4947 | if (ret == 1) { | ||
| 4948 | ret = 0; | ||
| 4949 | break; | ||
| 4950 | } | ||
| 4951 | leaf = path->nodes[0]; | ||
| 4952 | nritems = btrfs_header_nritems(leaf); | ||
| 4953 | } | ||
| 4954 | |||
| 4955 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 4956 | |||
| 4957 | if (key.objectid >= block_group->key.objectid + | ||
| 4958 | block_group->key.offset) | ||
| 4959 | break; | ||
| 4960 | |||
| 4961 | if (progress && need_resched()) { | ||
| 4962 | btrfs_release_path(root, path); | ||
| 4963 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 4964 | cond_resched(); | ||
| 4965 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 4966 | progress = 0; | ||
| 4967 | continue; | ||
| 4968 | } | ||
| 4969 | progress = 1; | ||
| 4970 | |||
| 4971 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || | ||
| 4972 | key.objectid + key.offset <= cur_byte) { | ||
| 4973 | path->slots[0]++; | ||
| 4974 | goto next; | ||
| 4975 | } | ||
| 4976 | |||
| 4977 | total_found++; | ||
| 4978 | cur_byte = key.objectid + key.offset; | ||
| 4979 | btrfs_release_path(root, path); | ||
| 4980 | |||
| 4981 | __alloc_chunk_for_shrink(root, block_group, 0); | ||
| 4982 | ret = relocate_one_extent(root, path, &key, block_group, | ||
| 4983 | reloc_inode, pass); | ||
| 4984 | BUG_ON(ret < 0); | ||
| 4985 | |||
| 4986 | key.objectid = cur_byte; | ||
| 4987 | key.type = 0; | ||
| 4988 | key.offset = 0; | ||
| 4989 | } | ||
| 4990 | |||
| 4991 | btrfs_release_path(root, path); | ||
| 4992 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 4993 | |||
| 4994 | if (pass == 0) { | ||
| 4995 | btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); | ||
| 4996 | invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); | ||
| 4997 | WARN_ON(reloc_inode->i_mapping->nrpages); | ||
| 4998 | } | ||
| 4999 | |||
| 5000 | if (total_found > 0) { | ||
| 5001 | printk("btrfs found %llu extents in pass %d\n", | ||
| 5002 | (unsigned long long)total_found, pass); | ||
| 5003 | pass++; | ||
| 5004 | goto again; | ||
| 5005 | } | ||
| 5006 | |||
| 5007 | /* delete reloc_inode */ | ||
| 5008 | iput(reloc_inode); | ||
| 5009 | |||
| 5010 | /* unpin extents in this range */ | ||
| 5011 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
| 5012 | btrfs_commit_transaction(trans, info->tree_root); | ||
| 5013 | |||
| 5014 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 5015 | |||
| 5016 | spin_lock(&block_group->lock); | ||
| 5017 | WARN_ON(block_group->pinned > 0); | ||
| 5018 | WARN_ON(block_group->reserved > 0); | ||
| 5019 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | ||
| 5020 | spin_unlock(&block_group->lock); | ||
| 5021 | ret = 0; | ||
| 5022 | out: | ||
| 5023 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 5024 | btrfs_free_path(path); | ||
| 5025 | return ret; | ||
| 5026 | } | ||
| 5027 | |||
| 5028 | int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, | ||
| 5029 | struct btrfs_key *key) | ||
| 5030 | { | ||
| 5031 | int ret = 0; | ||
| 5032 | struct btrfs_key found_key; | ||
| 5033 | struct extent_buffer *leaf; | ||
| 5034 | int slot; | ||
| 5035 | |||
| 5036 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | ||
| 5037 | if (ret < 0) | ||
| 5038 | goto out; | ||
| 5039 | |||
| 5040 | while(1) { | ||
| 5041 | slot = path->slots[0]; | ||
| 5042 | leaf = path->nodes[0]; | ||
| 5043 | if (slot >= btrfs_header_nritems(leaf)) { | ||
| 5044 | ret = btrfs_next_leaf(root, path); | ||
| 5045 | if (ret == 0) | ||
| 5046 | continue; | ||
| 5047 | if (ret < 0) | ||
| 5048 | goto out; | ||
| 5049 | break; | ||
| 5050 | } | ||
| 5051 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 5052 | |||
| 5053 | if (found_key.objectid >= key->objectid && | ||
| 5054 | found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { | ||
| 5055 | ret = 0; | ||
| 5056 | goto out; | ||
| 5057 | } | ||
| 5058 | path->slots[0]++; | ||
| 5059 | } | ||
| 5060 | ret = -ENOENT; | ||
| 5061 | out: | ||
| 5062 | return ret; | ||
| 5063 | } | ||
| 5064 | |||
| 5065 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | ||
| 5066 | { | ||
| 5067 | struct btrfs_block_group_cache *block_group; | ||
| 5068 | struct rb_node *n; | ||
| 5069 | |||
| 5070 | mutex_lock(&info->alloc_mutex); | ||
| 5071 | spin_lock(&info->block_group_cache_lock); | ||
| 5072 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | ||
| 5073 | block_group = rb_entry(n, struct btrfs_block_group_cache, | ||
| 5074 | cache_node); | ||
| 5075 | |||
| 5076 | spin_unlock(&info->block_group_cache_lock); | ||
| 5077 | btrfs_remove_free_space_cache(block_group); | ||
| 5078 | spin_lock(&info->block_group_cache_lock); | ||
| 5079 | |||
| 5080 | rb_erase(&block_group->cache_node, | ||
| 5081 | &info->block_group_cache_tree); | ||
| 5082 | spin_lock(&block_group->space_info->lock); | ||
| 5083 | list_del(&block_group->list); | ||
| 5084 | spin_unlock(&block_group->space_info->lock); | ||
| 5085 | kfree(block_group); | ||
| 5086 | } | ||
| 5087 | spin_unlock(&info->block_group_cache_lock); | ||
| 5088 | mutex_unlock(&info->alloc_mutex); | ||
| 5089 | return 0; | ||
| 5090 | } | ||
| 5091 | |||
| 5092 | int btrfs_read_block_groups(struct btrfs_root *root) | ||
| 5093 | { | ||
| 5094 | struct btrfs_path *path; | ||
| 5095 | int ret; | ||
| 5096 | struct btrfs_block_group_cache *cache; | ||
| 5097 | struct btrfs_fs_info *info = root->fs_info; | ||
| 5098 | struct btrfs_space_info *space_info; | ||
| 5099 | struct btrfs_key key; | ||
| 5100 | struct btrfs_key found_key; | ||
| 5101 | struct extent_buffer *leaf; | ||
| 5102 | |||
| 5103 | root = info->extent_root; | ||
| 5104 | key.objectid = 0; | ||
| 5105 | key.offset = 0; | ||
| 5106 | btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); | ||
| 5107 | path = btrfs_alloc_path(); | ||
| 5108 | if (!path) | ||
| 5109 | return -ENOMEM; | ||
| 5110 | |||
| 5111 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 5112 | while(1) { | ||
| 5113 | ret = find_first_block_group(root, path, &key); | ||
| 5114 | if (ret > 0) { | ||
| 5115 | ret = 0; | ||
| 5116 | goto error; | ||
| 5117 | } | ||
| 5118 | if (ret != 0) | ||
| 5119 | goto error; | ||
| 5120 | |||
| 5121 | leaf = path->nodes[0]; | ||
| 5122 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 5123 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | ||
| 5124 | if (!cache) { | ||
| 5125 | ret = -ENOMEM; | ||
| 5126 | break; | ||
| 5127 | } | ||
| 5128 | |||
| 5129 | spin_lock_init(&cache->lock); | ||
| 5130 | INIT_LIST_HEAD(&cache->list); | ||
| 5131 | read_extent_buffer(leaf, &cache->item, | ||
| 5132 | btrfs_item_ptr_offset(leaf, path->slots[0]), | ||
| 5133 | sizeof(cache->item)); | ||
| 5134 | memcpy(&cache->key, &found_key, sizeof(found_key)); | ||
| 5135 | |||
| 5136 | key.objectid = found_key.objectid + found_key.offset; | ||
| 5137 | btrfs_release_path(root, path); | ||
| 5138 | cache->flags = btrfs_block_group_flags(&cache->item); | ||
| 5139 | |||
| 5140 | ret = update_space_info(info, cache->flags, found_key.offset, | ||
| 5141 | btrfs_block_group_used(&cache->item), | ||
| 5142 | &space_info); | ||
| 5143 | BUG_ON(ret); | ||
| 5144 | cache->space_info = space_info; | ||
| 5145 | spin_lock(&space_info->lock); | ||
| 5146 | list_add(&cache->list, &space_info->block_groups); | ||
| 5147 | spin_unlock(&space_info->lock); | ||
| 5148 | |||
| 5149 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | ||
| 5150 | BUG_ON(ret); | ||
| 5151 | |||
| 5152 | set_avail_alloc_bits(root->fs_info, cache->flags); | ||
| 5153 | } | ||
| 5154 | ret = 0; | ||
| 5155 | error: | ||
| 5156 | btrfs_free_path(path); | ||
| 5157 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 5158 | return ret; | ||
| 5159 | } | ||
| 5160 | |||
| 5161 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | ||
| 5162 | struct btrfs_root *root, u64 bytes_used, | ||
| 5163 | u64 type, u64 chunk_objectid, u64 chunk_offset, | ||
| 5164 | u64 size) | ||
| 5165 | { | ||
| 5166 | int ret; | ||
| 5167 | struct btrfs_root *extent_root; | ||
| 5168 | struct btrfs_block_group_cache *cache; | ||
| 5169 | |||
| 5170 | WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 5171 | extent_root = root->fs_info->extent_root; | ||
| 5172 | |||
| 5173 | root->fs_info->last_trans_new_blockgroup = trans->transid; | ||
| 5174 | |||
| 5175 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | ||
| 5176 | if (!cache) | ||
| 5177 | return -ENOMEM; | ||
| 5178 | |||
| 5179 | cache->key.objectid = chunk_offset; | ||
| 5180 | cache->key.offset = size; | ||
| 5181 | spin_lock_init(&cache->lock); | ||
| 5182 | INIT_LIST_HEAD(&cache->list); | ||
| 5183 | btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); | ||
| 5184 | |||
| 5185 | btrfs_set_block_group_used(&cache->item, bytes_used); | ||
| 5186 | btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); | ||
| 5187 | cache->flags = type; | ||
| 5188 | btrfs_set_block_group_flags(&cache->item, type); | ||
| 5189 | |||
| 5190 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | ||
| 5191 | &cache->space_info); | ||
| 5192 | BUG_ON(ret); | ||
| 5193 | spin_lock(&cache->space_info->lock); | ||
| 5194 | list_add(&cache->list, &cache->space_info->block_groups); | ||
| 5195 | spin_unlock(&cache->space_info->lock); | ||
| 5196 | |||
| 5197 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | ||
| 5198 | BUG_ON(ret); | ||
| 5199 | |||
| 5200 | ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, | ||
| 5201 | sizeof(cache->item)); | ||
| 5202 | BUG_ON(ret); | ||
| 5203 | |||
| 5204 | finish_current_insert(trans, extent_root); | ||
| 5205 | ret = del_pending_extents(trans, extent_root); | ||
| 5206 | BUG_ON(ret); | ||
| 5207 | set_avail_alloc_bits(extent_root->fs_info, type); | ||
| 5208 | |||
| 5209 | return 0; | ||
| 5210 | } | ||
| 5211 | |||
| 5212 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | ||
| 5213 | struct btrfs_root *root, u64 group_start) | ||
| 5214 | { | ||
| 5215 | struct btrfs_path *path; | ||
| 5216 | struct btrfs_block_group_cache *block_group; | ||
| 5217 | struct btrfs_key key; | ||
| 5218 | int ret; | ||
| 5219 | |||
| 5220 | BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); | ||
| 5221 | root = root->fs_info->extent_root; | ||
| 5222 | |||
| 5223 | block_group = btrfs_lookup_block_group(root->fs_info, group_start); | ||
| 5224 | BUG_ON(!block_group); | ||
| 5225 | |||
| 5226 | memcpy(&key, &block_group->key, sizeof(key)); | ||
| 5227 | |||
| 5228 | path = btrfs_alloc_path(); | ||
| 5229 | BUG_ON(!path); | ||
| 5230 | |||
| 5231 | btrfs_remove_free_space_cache(block_group); | ||
| 5232 | rb_erase(&block_group->cache_node, | ||
| 5233 | &root->fs_info->block_group_cache_tree); | ||
| 5234 | spin_lock(&block_group->space_info->lock); | ||
| 5235 | list_del(&block_group->list); | ||
| 5236 | spin_unlock(&block_group->space_info->lock); | ||
| 5237 | |||
| 5238 | /* | ||
| 5239 | memset(shrink_block_group, 0, sizeof(*shrink_block_group)); | ||
| 5240 | kfree(shrink_block_group); | ||
| 5241 | */ | ||
| 5242 | |||
| 5243 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 5244 | if (ret > 0) | ||
| 5245 | ret = -EIO; | ||
| 5246 | if (ret < 0) | ||
| 5247 | goto out; | ||
| 5248 | |||
| 5249 | ret = btrfs_del_item(trans, root, path); | ||
| 5250 | out: | ||
| 5251 | btrfs_free_path(path); | ||
| 5252 | return ret; | ||
| 5253 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c new file mode 100644 index 00000000000..563b2d12f4f --- /dev/null +++ b/fs/btrfs/extent_io.c | |||
| @@ -0,0 +1,3416 @@ | |||
| 1 | #include <linux/bitops.h> | ||
| 2 | #include <linux/slab.h> | ||
| 3 | #include <linux/bio.h> | ||
| 4 | #include <linux/mm.h> | ||
| 5 | #include <linux/gfp.h> | ||
| 6 | #include <linux/pagemap.h> | ||
| 7 | #include <linux/page-flags.h> | ||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/spinlock.h> | ||
| 10 | #include <linux/blkdev.h> | ||
| 11 | #include <linux/swap.h> | ||
| 12 | #include <linux/version.h> | ||
| 13 | #include <linux/writeback.h> | ||
| 14 | #include <linux/pagevec.h> | ||
| 15 | #include "extent_io.h" | ||
| 16 | #include "extent_map.h" | ||
| 17 | #include "compat.h" | ||
| 18 | #include "ctree.h" | ||
| 19 | #include "btrfs_inode.h" | ||
| 20 | |||
| 21 | /* temporary define until extent_map moves out of btrfs */ | ||
| 22 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
| 23 | unsigned long extra_flags, | ||
| 24 | void (*ctor)(void *, struct kmem_cache *, | ||
| 25 | unsigned long)); | ||
| 26 | |||
| 27 | static struct kmem_cache *extent_state_cache; | ||
| 28 | static struct kmem_cache *extent_buffer_cache; | ||
| 29 | |||
| 30 | static LIST_HEAD(buffers); | ||
| 31 | static LIST_HEAD(states); | ||
| 32 | |||
| 33 | #ifdef LEAK_DEBUG | ||
| 34 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; | ||
| 35 | #endif | ||
| 36 | |||
| 37 | #define BUFFER_LRU_MAX 64 | ||
| 38 | |||
| 39 | struct tree_entry { | ||
| 40 | u64 start; | ||
| 41 | u64 end; | ||
| 42 | struct rb_node rb_node; | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct extent_page_data { | ||
| 46 | struct bio *bio; | ||
| 47 | struct extent_io_tree *tree; | ||
| 48 | get_extent_t *get_extent; | ||
| 49 | }; | ||
| 50 | |||
| 51 | int __init extent_io_init(void) | ||
| 52 | { | ||
| 53 | extent_state_cache = btrfs_cache_create("extent_state", | ||
| 54 | sizeof(struct extent_state), 0, | ||
| 55 | NULL); | ||
| 56 | if (!extent_state_cache) | ||
| 57 | return -ENOMEM; | ||
| 58 | |||
| 59 | extent_buffer_cache = btrfs_cache_create("extent_buffers", | ||
| 60 | sizeof(struct extent_buffer), 0, | ||
| 61 | NULL); | ||
| 62 | if (!extent_buffer_cache) | ||
| 63 | goto free_state_cache; | ||
| 64 | return 0; | ||
| 65 | |||
| 66 | free_state_cache: | ||
| 67 | kmem_cache_destroy(extent_state_cache); | ||
| 68 | return -ENOMEM; | ||
| 69 | } | ||
| 70 | |||
| 71 | void extent_io_exit(void) | ||
| 72 | { | ||
| 73 | struct extent_state *state; | ||
| 74 | struct extent_buffer *eb; | ||
| 75 | |||
| 76 | while (!list_empty(&states)) { | ||
| 77 | state = list_entry(states.next, struct extent_state, leak_list); | ||
| 78 | printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs)); | ||
| 79 | list_del(&state->leak_list); | ||
| 80 | kmem_cache_free(extent_state_cache, state); | ||
| 81 | |||
| 82 | } | ||
| 83 | |||
| 84 | while (!list_empty(&buffers)) { | ||
| 85 | eb = list_entry(buffers.next, struct extent_buffer, leak_list); | ||
| 86 | printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs)); | ||
| 87 | list_del(&eb->leak_list); | ||
| 88 | kmem_cache_free(extent_buffer_cache, eb); | ||
| 89 | } | ||
| 90 | if (extent_state_cache) | ||
| 91 | kmem_cache_destroy(extent_state_cache); | ||
| 92 | if (extent_buffer_cache) | ||
| 93 | kmem_cache_destroy(extent_buffer_cache); | ||
| 94 | } | ||
| 95 | |||
| 96 | void extent_io_tree_init(struct extent_io_tree *tree, | ||
| 97 | struct address_space *mapping, gfp_t mask) | ||
| 98 | { | ||
| 99 | tree->state.rb_node = NULL; | ||
| 100 | tree->buffer.rb_node = NULL; | ||
| 101 | tree->ops = NULL; | ||
| 102 | tree->dirty_bytes = 0; | ||
| 103 | spin_lock_init(&tree->lock); | ||
| 104 | spin_lock_init(&tree->buffer_lock); | ||
| 105 | tree->mapping = mapping; | ||
| 106 | } | ||
| 107 | EXPORT_SYMBOL(extent_io_tree_init); | ||
| 108 | |||
| 109 | struct extent_state *alloc_extent_state(gfp_t mask) | ||
| 110 | { | ||
| 111 | struct extent_state *state; | ||
| 112 | #ifdef LEAK_DEBUG | ||
| 113 | unsigned long flags; | ||
| 114 | #endif | ||
| 115 | |||
| 116 | state = kmem_cache_alloc(extent_state_cache, mask); | ||
| 117 | if (!state) | ||
| 118 | return state; | ||
| 119 | state->state = 0; | ||
| 120 | state->private = 0; | ||
| 121 | state->tree = NULL; | ||
| 122 | #ifdef LEAK_DEBUG | ||
| 123 | spin_lock_irqsave(&leak_lock, flags); | ||
| 124 | list_add(&state->leak_list, &states); | ||
| 125 | spin_unlock_irqrestore(&leak_lock, flags); | ||
| 126 | #endif | ||
| 127 | atomic_set(&state->refs, 1); | ||
| 128 | init_waitqueue_head(&state->wq); | ||
| 129 | return state; | ||
| 130 | } | ||
| 131 | EXPORT_SYMBOL(alloc_extent_state); | ||
| 132 | |||
| 133 | void free_extent_state(struct extent_state *state) | ||
| 134 | { | ||
| 135 | if (!state) | ||
| 136 | return; | ||
| 137 | if (atomic_dec_and_test(&state->refs)) { | ||
| 138 | #ifdef LEAK_DEBUG | ||
| 139 | unsigned long flags; | ||
| 140 | #endif | ||
| 141 | WARN_ON(state->tree); | ||
| 142 | #ifdef LEAK_DEBUG | ||
| 143 | spin_lock_irqsave(&leak_lock, flags); | ||
| 144 | list_del(&state->leak_list); | ||
| 145 | spin_unlock_irqrestore(&leak_lock, flags); | ||
| 146 | #endif | ||
| 147 | kmem_cache_free(extent_state_cache, state); | ||
| 148 | } | ||
| 149 | } | ||
| 150 | EXPORT_SYMBOL(free_extent_state); | ||
| 151 | |||
| 152 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | ||
| 153 | struct rb_node *node) | ||
| 154 | { | ||
| 155 | struct rb_node ** p = &root->rb_node; | ||
| 156 | struct rb_node * parent = NULL; | ||
| 157 | struct tree_entry *entry; | ||
| 158 | |||
| 159 | while(*p) { | ||
| 160 | parent = *p; | ||
| 161 | entry = rb_entry(parent, struct tree_entry, rb_node); | ||
| 162 | |||
| 163 | if (offset < entry->start) | ||
| 164 | p = &(*p)->rb_left; | ||
| 165 | else if (offset > entry->end) | ||
| 166 | p = &(*p)->rb_right; | ||
| 167 | else | ||
| 168 | return parent; | ||
| 169 | } | ||
| 170 | |||
| 171 | entry = rb_entry(node, struct tree_entry, rb_node); | ||
| 172 | rb_link_node(node, parent, p); | ||
| 173 | rb_insert_color(node, root); | ||
| 174 | return NULL; | ||
| 175 | } | ||
| 176 | |||
| 177 | static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, | ||
| 178 | struct rb_node **prev_ret, | ||
| 179 | struct rb_node **next_ret) | ||
| 180 | { | ||
| 181 | struct rb_root *root = &tree->state; | ||
| 182 | struct rb_node * n = root->rb_node; | ||
| 183 | struct rb_node *prev = NULL; | ||
| 184 | struct rb_node *orig_prev = NULL; | ||
| 185 | struct tree_entry *entry; | ||
| 186 | struct tree_entry *prev_entry = NULL; | ||
| 187 | |||
| 188 | while(n) { | ||
| 189 | entry = rb_entry(n, struct tree_entry, rb_node); | ||
| 190 | prev = n; | ||
| 191 | prev_entry = entry; | ||
| 192 | |||
| 193 | if (offset < entry->start) | ||
| 194 | n = n->rb_left; | ||
| 195 | else if (offset > entry->end) | ||
| 196 | n = n->rb_right; | ||
| 197 | else { | ||
| 198 | return n; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | if (prev_ret) { | ||
| 203 | orig_prev = prev; | ||
| 204 | while(prev && offset > prev_entry->end) { | ||
| 205 | prev = rb_next(prev); | ||
| 206 | prev_entry = rb_entry(prev, struct tree_entry, rb_node); | ||
| 207 | } | ||
| 208 | *prev_ret = prev; | ||
| 209 | prev = orig_prev; | ||
| 210 | } | ||
| 211 | |||
| 212 | if (next_ret) { | ||
| 213 | prev_entry = rb_entry(prev, struct tree_entry, rb_node); | ||
| 214 | while(prev && offset < prev_entry->start) { | ||
| 215 | prev = rb_prev(prev); | ||
| 216 | prev_entry = rb_entry(prev, struct tree_entry, rb_node); | ||
| 217 | } | ||
| 218 | *next_ret = prev; | ||
| 219 | } | ||
| 220 | return NULL; | ||
| 221 | } | ||
| 222 | |||
| 223 | static inline struct rb_node *tree_search(struct extent_io_tree *tree, | ||
| 224 | u64 offset) | ||
| 225 | { | ||
| 226 | struct rb_node *prev = NULL; | ||
| 227 | struct rb_node *ret; | ||
| 228 | |||
| 229 | ret = __etree_search(tree, offset, &prev, NULL); | ||
| 230 | if (!ret) { | ||
| 231 | return prev; | ||
| 232 | } | ||
| 233 | return ret; | ||
| 234 | } | ||
| 235 | |||
| 236 | static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, | ||
| 237 | u64 offset, struct rb_node *node) | ||
| 238 | { | ||
| 239 | struct rb_root *root = &tree->buffer; | ||
| 240 | struct rb_node ** p = &root->rb_node; | ||
| 241 | struct rb_node * parent = NULL; | ||
| 242 | struct extent_buffer *eb; | ||
| 243 | |||
| 244 | while(*p) { | ||
| 245 | parent = *p; | ||
| 246 | eb = rb_entry(parent, struct extent_buffer, rb_node); | ||
| 247 | |||
| 248 | if (offset < eb->start) | ||
| 249 | p = &(*p)->rb_left; | ||
| 250 | else if (offset > eb->start) | ||
| 251 | p = &(*p)->rb_right; | ||
| 252 | else | ||
| 253 | return eb; | ||
| 254 | } | ||
| 255 | |||
| 256 | rb_link_node(node, parent, p); | ||
| 257 | rb_insert_color(node, root); | ||
| 258 | return NULL; | ||
| 259 | } | ||
| 260 | |||
| 261 | static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | ||
| 262 | u64 offset) | ||
| 263 | { | ||
| 264 | struct rb_root *root = &tree->buffer; | ||
| 265 | struct rb_node * n = root->rb_node; | ||
| 266 | struct extent_buffer *eb; | ||
| 267 | |||
| 268 | while(n) { | ||
| 269 | eb = rb_entry(n, struct extent_buffer, rb_node); | ||
| 270 | if (offset < eb->start) | ||
| 271 | n = n->rb_left; | ||
| 272 | else if (offset > eb->start) | ||
| 273 | n = n->rb_right; | ||
| 274 | else | ||
| 275 | return eb; | ||
| 276 | } | ||
| 277 | return NULL; | ||
| 278 | } | ||
| 279 | |||
| 280 | /* | ||
| 281 | * utility function to look for merge candidates inside a given range. | ||
| 282 | * Any extents with matching state are merged together into a single | ||
| 283 | * extent in the tree. Extents with EXTENT_IO in their state field | ||
| 284 | * are not merged because the end_io handlers need to be able to do | ||
| 285 | * operations on them without sleeping (or doing allocations/splits). | ||
| 286 | * | ||
| 287 | * This should be called with the tree lock held. | ||
| 288 | */ | ||
| 289 | static int merge_state(struct extent_io_tree *tree, | ||
| 290 | struct extent_state *state) | ||
| 291 | { | ||
| 292 | struct extent_state *other; | ||
| 293 | struct rb_node *other_node; | ||
| 294 | |||
| 295 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | ||
| 296 | return 0; | ||
| 297 | |||
| 298 | other_node = rb_prev(&state->rb_node); | ||
| 299 | if (other_node) { | ||
| 300 | other = rb_entry(other_node, struct extent_state, rb_node); | ||
| 301 | if (other->end == state->start - 1 && | ||
| 302 | other->state == state->state) { | ||
| 303 | state->start = other->start; | ||
| 304 | other->tree = NULL; | ||
| 305 | rb_erase(&other->rb_node, &tree->state); | ||
| 306 | free_extent_state(other); | ||
| 307 | } | ||
| 308 | } | ||
| 309 | other_node = rb_next(&state->rb_node); | ||
| 310 | if (other_node) { | ||
| 311 | other = rb_entry(other_node, struct extent_state, rb_node); | ||
| 312 | if (other->start == state->end + 1 && | ||
| 313 | other->state == state->state) { | ||
| 314 | other->start = state->start; | ||
| 315 | state->tree = NULL; | ||
| 316 | rb_erase(&state->rb_node, &tree->state); | ||
| 317 | free_extent_state(state); | ||
| 318 | } | ||
| 319 | } | ||
| 320 | return 0; | ||
| 321 | } | ||
| 322 | |||
| 323 | static void set_state_cb(struct extent_io_tree *tree, | ||
| 324 | struct extent_state *state, | ||
| 325 | unsigned long bits) | ||
| 326 | { | ||
| 327 | if (tree->ops && tree->ops->set_bit_hook) { | ||
| 328 | tree->ops->set_bit_hook(tree->mapping->host, state->start, | ||
| 329 | state->end, state->state, bits); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | |||
| 333 | static void clear_state_cb(struct extent_io_tree *tree, | ||
| 334 | struct extent_state *state, | ||
| 335 | unsigned long bits) | ||
| 336 | { | ||
| 337 | if (tree->ops && tree->ops->set_bit_hook) { | ||
| 338 | tree->ops->clear_bit_hook(tree->mapping->host, state->start, | ||
| 339 | state->end, state->state, bits); | ||
| 340 | } | ||
| 341 | } | ||
| 342 | |||
| 343 | /* | ||
| 344 | * insert an extent_state struct into the tree. 'bits' are set on the | ||
| 345 | * struct before it is inserted. | ||
| 346 | * | ||
| 347 | * This may return -EEXIST if the extent is already there, in which case the | ||
| 348 | * state struct is freed. | ||
| 349 | * | ||
| 350 | * The tree lock is not taken internally. This is a utility function and | ||
| 351 | * probably isn't what you want to call (see set/clear_extent_bit). | ||
| 352 | */ | ||
| 353 | static int insert_state(struct extent_io_tree *tree, | ||
| 354 | struct extent_state *state, u64 start, u64 end, | ||
| 355 | int bits) | ||
| 356 | { | ||
| 357 | struct rb_node *node; | ||
| 358 | |||
| 359 | if (end < start) { | ||
| 360 | printk("end < start %Lu %Lu\n", end, start); | ||
| 361 | WARN_ON(1); | ||
| 362 | } | ||
| 363 | if (bits & EXTENT_DIRTY) | ||
| 364 | tree->dirty_bytes += end - start + 1; | ||
| 365 | set_state_cb(tree, state, bits); | ||
| 366 | state->state |= bits; | ||
| 367 | state->start = start; | ||
| 368 | state->end = end; | ||
| 369 | node = tree_insert(&tree->state, end, &state->rb_node); | ||
| 370 | if (node) { | ||
| 371 | struct extent_state *found; | ||
| 372 | found = rb_entry(node, struct extent_state, rb_node); | ||
| 373 | printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); | ||
| 374 | free_extent_state(state); | ||
| 375 | return -EEXIST; | ||
| 376 | } | ||
| 377 | state->tree = tree; | ||
| 378 | merge_state(tree, state); | ||
| 379 | return 0; | ||
| 380 | } | ||
| 381 | |||
| 382 | /* | ||
| 383 | * split a given extent state struct in two, inserting the preallocated | ||
| 384 | * struct 'prealloc' as the newly created second half. 'split' indicates an | ||
| 385 | * offset inside 'orig' where it should be split. | ||
| 386 | * | ||
| 387 | * Before calling, | ||
| 388 | * the tree has 'orig' at [orig->start, orig->end]. After calling, there | ||
| 389 | * are two extent state structs in the tree: | ||
| 390 | * prealloc: [orig->start, split - 1] | ||
| 391 | * orig: [ split, orig->end ] | ||
| 392 | * | ||
| 393 | * The tree locks are not taken by this function. They need to be held | ||
| 394 | * by the caller. | ||
| 395 | */ | ||
| 396 | static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | ||
| 397 | struct extent_state *prealloc, u64 split) | ||
| 398 | { | ||
| 399 | struct rb_node *node; | ||
| 400 | prealloc->start = orig->start; | ||
| 401 | prealloc->end = split - 1; | ||
| 402 | prealloc->state = orig->state; | ||
| 403 | orig->start = split; | ||
| 404 | |||
| 405 | node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); | ||
| 406 | if (node) { | ||
| 407 | struct extent_state *found; | ||
| 408 | found = rb_entry(node, struct extent_state, rb_node); | ||
| 409 | printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); | ||
| 410 | free_extent_state(prealloc); | ||
| 411 | return -EEXIST; | ||
| 412 | } | ||
| 413 | prealloc->tree = tree; | ||
| 414 | return 0; | ||
| 415 | } | ||
| 416 | |||
| 417 | /* | ||
| 418 | * utility function to clear some bits in an extent state struct. | ||
| 419 | * it will optionally wake up any one waiting on this state (wake == 1), or | ||
| 420 | * forcibly remove the state from the tree (delete == 1). | ||
| 421 | * | ||
| 422 | * If no bits are set on the state struct after clearing things, the | ||
| 423 | * struct is freed and removed from the tree | ||
| 424 | */ | ||
| 425 | static int clear_state_bit(struct extent_io_tree *tree, | ||
| 426 | struct extent_state *state, int bits, int wake, | ||
| 427 | int delete) | ||
| 428 | { | ||
| 429 | int ret = state->state & bits; | ||
| 430 | |||
| 431 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | ||
| 432 | u64 range = state->end - state->start + 1; | ||
| 433 | WARN_ON(range > tree->dirty_bytes); | ||
| 434 | tree->dirty_bytes -= range; | ||
| 435 | } | ||
| 436 | clear_state_cb(tree, state, bits); | ||
| 437 | state->state &= ~bits; | ||
| 438 | if (wake) | ||
| 439 | wake_up(&state->wq); | ||
| 440 | if (delete || state->state == 0) { | ||
| 441 | if (state->tree) { | ||
| 442 | clear_state_cb(tree, state, state->state); | ||
| 443 | rb_erase(&state->rb_node, &tree->state); | ||
| 444 | state->tree = NULL; | ||
| 445 | free_extent_state(state); | ||
| 446 | } else { | ||
| 447 | WARN_ON(1); | ||
| 448 | } | ||
| 449 | } else { | ||
| 450 | merge_state(tree, state); | ||
| 451 | } | ||
| 452 | return ret; | ||
| 453 | } | ||
| 454 | |||
| 455 | /* | ||
| 456 | * clear some bits on a range in the tree. This may require splitting | ||
| 457 | * or inserting elements in the tree, so the gfp mask is used to | ||
| 458 | * indicate which allocations or sleeping are allowed. | ||
| 459 | * | ||
| 460 | * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove | ||
| 461 | * the given range from the tree regardless of state (ie for truncate). | ||
| 462 | * | ||
| 463 | * the range [start, end] is inclusive. | ||
| 464 | * | ||
| 465 | * This takes the tree lock, and returns < 0 on error, > 0 if any of the | ||
| 466 | * bits were already set, or zero if none of the bits were already set. | ||
| 467 | */ | ||
| 468 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 469 | int bits, int wake, int delete, gfp_t mask) | ||
| 470 | { | ||
| 471 | struct extent_state *state; | ||
| 472 | struct extent_state *prealloc = NULL; | ||
| 473 | struct rb_node *node; | ||
| 474 | unsigned long flags; | ||
| 475 | int err; | ||
| 476 | int set = 0; | ||
| 477 | |||
| 478 | again: | ||
| 479 | if (!prealloc && (mask & __GFP_WAIT)) { | ||
| 480 | prealloc = alloc_extent_state(mask); | ||
| 481 | if (!prealloc) | ||
| 482 | return -ENOMEM; | ||
| 483 | } | ||
| 484 | |||
| 485 | spin_lock_irqsave(&tree->lock, flags); | ||
| 486 | /* | ||
| 487 | * this search will find the extents that end after | ||
| 488 | * our range starts | ||
| 489 | */ | ||
| 490 | node = tree_search(tree, start); | ||
| 491 | if (!node) | ||
| 492 | goto out; | ||
| 493 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 494 | if (state->start > end) | ||
| 495 | goto out; | ||
| 496 | WARN_ON(state->end < start); | ||
| 497 | |||
| 498 | /* | ||
| 499 | * | ---- desired range ---- | | ||
| 500 | * | state | or | ||
| 501 | * | ------------- state -------------- | | ||
| 502 | * | ||
| 503 | * We need to split the extent we found, and may flip | ||
| 504 | * bits on second half. | ||
| 505 | * | ||
| 506 | * If the extent we found extends past our range, we | ||
| 507 | * just split and search again. It'll get split again | ||
| 508 | * the next time though. | ||
| 509 | * | ||
| 510 | * If the extent we found is inside our range, we clear | ||
| 511 | * the desired bit on it. | ||
| 512 | */ | ||
| 513 | |||
| 514 | if (state->start < start) { | ||
| 515 | if (!prealloc) | ||
| 516 | prealloc = alloc_extent_state(GFP_ATOMIC); | ||
| 517 | err = split_state(tree, state, prealloc, start); | ||
| 518 | BUG_ON(err == -EEXIST); | ||
| 519 | prealloc = NULL; | ||
| 520 | if (err) | ||
| 521 | goto out; | ||
| 522 | if (state->end <= end) { | ||
| 523 | start = state->end + 1; | ||
| 524 | set |= clear_state_bit(tree, state, bits, | ||
| 525 | wake, delete); | ||
| 526 | } else { | ||
| 527 | start = state->start; | ||
| 528 | } | ||
| 529 | goto search_again; | ||
| 530 | } | ||
| 531 | /* | ||
| 532 | * | ---- desired range ---- | | ||
| 533 | * | state | | ||
| 534 | * We need to split the extent, and clear the bit | ||
| 535 | * on the first half | ||
| 536 | */ | ||
| 537 | if (state->start <= end && state->end > end) { | ||
| 538 | if (!prealloc) | ||
| 539 | prealloc = alloc_extent_state(GFP_ATOMIC); | ||
| 540 | err = split_state(tree, state, prealloc, end + 1); | ||
| 541 | BUG_ON(err == -EEXIST); | ||
| 542 | |||
| 543 | if (wake) | ||
| 544 | wake_up(&state->wq); | ||
| 545 | set |= clear_state_bit(tree, prealloc, bits, | ||
| 546 | wake, delete); | ||
| 547 | prealloc = NULL; | ||
| 548 | goto out; | ||
| 549 | } | ||
| 550 | |||
| 551 | start = state->end + 1; | ||
| 552 | set |= clear_state_bit(tree, state, bits, wake, delete); | ||
| 553 | goto search_again; | ||
| 554 | |||
| 555 | out: | ||
| 556 | spin_unlock_irqrestore(&tree->lock, flags); | ||
| 557 | if (prealloc) | ||
| 558 | free_extent_state(prealloc); | ||
| 559 | |||
| 560 | return set; | ||
| 561 | |||
| 562 | search_again: | ||
| 563 | if (start > end) | ||
| 564 | goto out; | ||
| 565 | spin_unlock_irqrestore(&tree->lock, flags); | ||
| 566 | if (mask & __GFP_WAIT) | ||
| 567 | cond_resched(); | ||
| 568 | goto again; | ||
| 569 | } | ||
| 570 | EXPORT_SYMBOL(clear_extent_bit); | ||
| 571 | |||
| 572 | static int wait_on_state(struct extent_io_tree *tree, | ||
| 573 | struct extent_state *state) | ||
| 574 | { | ||
| 575 | DEFINE_WAIT(wait); | ||
| 576 | prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); | ||
| 577 | spin_unlock_irq(&tree->lock); | ||
| 578 | schedule(); | ||
| 579 | spin_lock_irq(&tree->lock); | ||
| 580 | finish_wait(&state->wq, &wait); | ||
| 581 | return 0; | ||
| 582 | } | ||
| 583 | |||
| 584 | /* | ||
| 585 | * waits for one or more bits to clear on a range in the state tree. | ||
| 586 | * The range [start, end] is inclusive. | ||
| 587 | * The tree lock is taken by this function | ||
| 588 | */ | ||
| 589 | int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) | ||
| 590 | { | ||
| 591 | struct extent_state *state; | ||
| 592 | struct rb_node *node; | ||
| 593 | |||
| 594 | spin_lock_irq(&tree->lock); | ||
| 595 | again: | ||
| 596 | while (1) { | ||
| 597 | /* | ||
| 598 | * this search will find all the extents that end after | ||
| 599 | * our range starts | ||
| 600 | */ | ||
| 601 | node = tree_search(tree, start); | ||
| 602 | if (!node) | ||
| 603 | break; | ||
| 604 | |||
| 605 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 606 | |||
| 607 | if (state->start > end) | ||
| 608 | goto out; | ||
| 609 | |||
| 610 | if (state->state & bits) { | ||
| 611 | start = state->start; | ||
| 612 | atomic_inc(&state->refs); | ||
| 613 | wait_on_state(tree, state); | ||
| 614 | free_extent_state(state); | ||
| 615 | goto again; | ||
| 616 | } | ||
| 617 | start = state->end + 1; | ||
| 618 | |||
| 619 | if (start > end) | ||
| 620 | break; | ||
| 621 | |||
| 622 | if (need_resched()) { | ||
| 623 | spin_unlock_irq(&tree->lock); | ||
| 624 | cond_resched(); | ||
| 625 | spin_lock_irq(&tree->lock); | ||
| 626 | } | ||
| 627 | } | ||
| 628 | out: | ||
| 629 | spin_unlock_irq(&tree->lock); | ||
| 630 | return 0; | ||
| 631 | } | ||
| 632 | EXPORT_SYMBOL(wait_extent_bit); | ||
| 633 | |||
| 634 | static void set_state_bits(struct extent_io_tree *tree, | ||
| 635 | struct extent_state *state, | ||
| 636 | int bits) | ||
| 637 | { | ||
| 638 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | ||
| 639 | u64 range = state->end - state->start + 1; | ||
| 640 | tree->dirty_bytes += range; | ||
| 641 | } | ||
| 642 | set_state_cb(tree, state, bits); | ||
| 643 | state->state |= bits; | ||
| 644 | } | ||
| 645 | |||
| 646 | /* | ||
| 647 | * set some bits on a range in the tree. This may require allocations | ||
| 648 | * or sleeping, so the gfp mask is used to indicate what is allowed. | ||
| 649 | * | ||
| 650 | * If 'exclusive' == 1, this will fail with -EEXIST if some part of the | ||
| 651 | * range already has the desired bits set. The start of the existing | ||
| 652 | * range is returned in failed_start in this case. | ||
| 653 | * | ||
| 654 | * [start, end] is inclusive | ||
| 655 | * This takes the tree lock. | ||
| 656 | */ | ||
| 657 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, | ||
| 658 | int exclusive, u64 *failed_start, gfp_t mask) | ||
| 659 | { | ||
| 660 | struct extent_state *state; | ||
| 661 | struct extent_state *prealloc = NULL; | ||
| 662 | struct rb_node *node; | ||
| 663 | unsigned long flags; | ||
| 664 | int err = 0; | ||
| 665 | int set; | ||
| 666 | u64 last_start; | ||
| 667 | u64 last_end; | ||
| 668 | again: | ||
| 669 | if (!prealloc && (mask & __GFP_WAIT)) { | ||
| 670 | prealloc = alloc_extent_state(mask); | ||
| 671 | if (!prealloc) | ||
| 672 | return -ENOMEM; | ||
| 673 | } | ||
| 674 | |||
| 675 | spin_lock_irqsave(&tree->lock, flags); | ||
| 676 | /* | ||
| 677 | * this search will find all the extents that end after | ||
| 678 | * our range starts. | ||
| 679 | */ | ||
| 680 | node = tree_search(tree, start); | ||
| 681 | if (!node) { | ||
| 682 | err = insert_state(tree, prealloc, start, end, bits); | ||
| 683 | prealloc = NULL; | ||
| 684 | BUG_ON(err == -EEXIST); | ||
| 685 | goto out; | ||
| 686 | } | ||
| 687 | |||
| 688 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 689 | last_start = state->start; | ||
| 690 | last_end = state->end; | ||
| 691 | |||
| 692 | /* | ||
| 693 | * | ---- desired range ---- | | ||
| 694 | * | state | | ||
| 695 | * | ||
| 696 | * Just lock what we found and keep going | ||
| 697 | */ | ||
| 698 | if (state->start == start && state->end <= end) { | ||
| 699 | set = state->state & bits; | ||
| 700 | if (set && exclusive) { | ||
| 701 | *failed_start = state->start; | ||
| 702 | err = -EEXIST; | ||
| 703 | goto out; | ||
| 704 | } | ||
| 705 | set_state_bits(tree, state, bits); | ||
| 706 | start = state->end + 1; | ||
| 707 | merge_state(tree, state); | ||
| 708 | goto search_again; | ||
| 709 | } | ||
| 710 | |||
| 711 | /* | ||
| 712 | * | ---- desired range ---- | | ||
| 713 | * | state | | ||
| 714 | * or | ||
| 715 | * | ------------- state -------------- | | ||
| 716 | * | ||
| 717 | * We need to split the extent we found, and may flip bits on | ||
| 718 | * second half. | ||
| 719 | * | ||
| 720 | * If the extent we found extends past our | ||
| 721 | * range, we just split and search again. It'll get split | ||
| 722 | * again the next time though. | ||
| 723 | * | ||
| 724 | * If the extent we found is inside our range, we set the | ||
| 725 | * desired bit on it. | ||
| 726 | */ | ||
| 727 | if (state->start < start) { | ||
| 728 | set = state->state & bits; | ||
| 729 | if (exclusive && set) { | ||
| 730 | *failed_start = start; | ||
| 731 | err = -EEXIST; | ||
| 732 | goto out; | ||
| 733 | } | ||
| 734 | err = split_state(tree, state, prealloc, start); | ||
| 735 | BUG_ON(err == -EEXIST); | ||
| 736 | prealloc = NULL; | ||
| 737 | if (err) | ||
| 738 | goto out; | ||
| 739 | if (state->end <= end) { | ||
| 740 | set_state_bits(tree, state, bits); | ||
| 741 | start = state->end + 1; | ||
| 742 | merge_state(tree, state); | ||
| 743 | } else { | ||
| 744 | start = state->start; | ||
| 745 | } | ||
| 746 | goto search_again; | ||
| 747 | } | ||
| 748 | /* | ||
| 749 | * | ---- desired range ---- | | ||
| 750 | * | state | or | state | | ||
| 751 | * | ||
| 752 | * There's a hole, we need to insert something in it and | ||
| 753 | * ignore the extent we found. | ||
| 754 | */ | ||
| 755 | if (state->start > start) { | ||
| 756 | u64 this_end; | ||
| 757 | if (end < last_start) | ||
| 758 | this_end = end; | ||
| 759 | else | ||
| 760 | this_end = last_start -1; | ||
| 761 | err = insert_state(tree, prealloc, start, this_end, | ||
| 762 | bits); | ||
| 763 | prealloc = NULL; | ||
| 764 | BUG_ON(err == -EEXIST); | ||
| 765 | if (err) | ||
| 766 | goto out; | ||
| 767 | start = this_end + 1; | ||
| 768 | goto search_again; | ||
| 769 | } | ||
| 770 | /* | ||
| 771 | * | ---- desired range ---- | | ||
| 772 | * | state | | ||
| 773 | * We need to split the extent, and set the bit | ||
| 774 | * on the first half | ||
| 775 | */ | ||
| 776 | if (state->start <= end && state->end > end) { | ||
| 777 | set = state->state & bits; | ||
| 778 | if (exclusive && set) { | ||
| 779 | *failed_start = start; | ||
| 780 | err = -EEXIST; | ||
| 781 | goto out; | ||
| 782 | } | ||
| 783 | err = split_state(tree, state, prealloc, end + 1); | ||
| 784 | BUG_ON(err == -EEXIST); | ||
| 785 | |||
| 786 | set_state_bits(tree, prealloc, bits); | ||
| 787 | merge_state(tree, prealloc); | ||
| 788 | prealloc = NULL; | ||
| 789 | goto out; | ||
| 790 | } | ||
| 791 | |||
| 792 | goto search_again; | ||
| 793 | |||
| 794 | out: | ||
| 795 | spin_unlock_irqrestore(&tree->lock, flags); | ||
| 796 | if (prealloc) | ||
| 797 | free_extent_state(prealloc); | ||
| 798 | |||
| 799 | return err; | ||
| 800 | |||
| 801 | search_again: | ||
| 802 | if (start > end) | ||
| 803 | goto out; | ||
| 804 | spin_unlock_irqrestore(&tree->lock, flags); | ||
| 805 | if (mask & __GFP_WAIT) | ||
| 806 | cond_resched(); | ||
| 807 | goto again; | ||
| 808 | } | ||
| 809 | EXPORT_SYMBOL(set_extent_bit); | ||
| 810 | |||
| 811 | /* wrappers around set/clear extent bit */ | ||
| 812 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 813 | gfp_t mask) | ||
| 814 | { | ||
| 815 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, | ||
| 816 | mask); | ||
| 817 | } | ||
| 818 | EXPORT_SYMBOL(set_extent_dirty); | ||
| 819 | |||
| 820 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 821 | gfp_t mask) | ||
| 822 | { | ||
| 823 | return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); | ||
| 824 | } | ||
| 825 | EXPORT_SYMBOL(set_extent_ordered); | ||
| 826 | |||
| 827 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 828 | int bits, gfp_t mask) | ||
| 829 | { | ||
| 830 | return set_extent_bit(tree, start, end, bits, 0, NULL, | ||
| 831 | mask); | ||
| 832 | } | ||
| 833 | EXPORT_SYMBOL(set_extent_bits); | ||
| 834 | |||
| 835 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 836 | int bits, gfp_t mask) | ||
| 837 | { | ||
| 838 | return clear_extent_bit(tree, start, end, bits, 0, 0, mask); | ||
| 839 | } | ||
| 840 | EXPORT_SYMBOL(clear_extent_bits); | ||
| 841 | |||
| 842 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 843 | gfp_t mask) | ||
| 844 | { | ||
| 845 | return set_extent_bit(tree, start, end, | ||
| 846 | EXTENT_DELALLOC | EXTENT_DIRTY, | ||
| 847 | 0, NULL, mask); | ||
| 848 | } | ||
| 849 | EXPORT_SYMBOL(set_extent_delalloc); | ||
| 850 | |||
| 851 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 852 | gfp_t mask) | ||
| 853 | { | ||
| 854 | return clear_extent_bit(tree, start, end, | ||
| 855 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); | ||
| 856 | } | ||
| 857 | EXPORT_SYMBOL(clear_extent_dirty); | ||
| 858 | |||
| 859 | int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 860 | gfp_t mask) | ||
| 861 | { | ||
| 862 | return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); | ||
| 863 | } | ||
| 864 | EXPORT_SYMBOL(clear_extent_ordered); | ||
| 865 | |||
| 866 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 867 | gfp_t mask) | ||
| 868 | { | ||
| 869 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, | ||
| 870 | mask); | ||
| 871 | } | ||
| 872 | EXPORT_SYMBOL(set_extent_new); | ||
| 873 | |||
| 874 | int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 875 | gfp_t mask) | ||
| 876 | { | ||
| 877 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); | ||
| 878 | } | ||
| 879 | EXPORT_SYMBOL(clear_extent_new); | ||
| 880 | |||
| 881 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 882 | gfp_t mask) | ||
| 883 | { | ||
| 884 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | ||
| 885 | mask); | ||
| 886 | } | ||
| 887 | EXPORT_SYMBOL(set_extent_uptodate); | ||
| 888 | |||
| 889 | int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 890 | gfp_t mask) | ||
| 891 | { | ||
| 892 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); | ||
| 893 | } | ||
| 894 | EXPORT_SYMBOL(clear_extent_uptodate); | ||
| 895 | |||
| 896 | int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 897 | gfp_t mask) | ||
| 898 | { | ||
| 899 | return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, | ||
| 900 | 0, NULL, mask); | ||
| 901 | } | ||
| 902 | EXPORT_SYMBOL(set_extent_writeback); | ||
| 903 | |||
| 904 | int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 905 | gfp_t mask) | ||
| 906 | { | ||
| 907 | return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); | ||
| 908 | } | ||
| 909 | EXPORT_SYMBOL(clear_extent_writeback); | ||
| 910 | |||
| 911 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | ||
| 912 | { | ||
| 913 | return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); | ||
| 914 | } | ||
| 915 | EXPORT_SYMBOL(wait_on_extent_writeback); | ||
| 916 | |||
| 917 | /* | ||
| 918 | * either insert or lock state struct between start and end use mask to tell | ||
| 919 | * us if waiting is desired. | ||
| 920 | */ | ||
| 921 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | ||
| 922 | { | ||
| 923 | int err; | ||
| 924 | u64 failed_start; | ||
| 925 | while (1) { | ||
| 926 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | ||
| 927 | &failed_start, mask); | ||
| 928 | if (err == -EEXIST && (mask & __GFP_WAIT)) { | ||
| 929 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); | ||
| 930 | start = failed_start; | ||
| 931 | } else { | ||
| 932 | break; | ||
| 933 | } | ||
| 934 | WARN_ON(start > end); | ||
| 935 | } | ||
| 936 | return err; | ||
| 937 | } | ||
| 938 | EXPORT_SYMBOL(lock_extent); | ||
| 939 | |||
| 940 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 941 | gfp_t mask) | ||
| 942 | { | ||
| 943 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); | ||
| 944 | } | ||
| 945 | EXPORT_SYMBOL(unlock_extent); | ||
| 946 | |||
| 947 | /* | ||
| 948 | * helper function to set pages and extents in the tree dirty | ||
| 949 | */ | ||
| 950 | int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) | ||
| 951 | { | ||
| 952 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 953 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
| 954 | struct page *page; | ||
| 955 | |||
| 956 | while (index <= end_index) { | ||
| 957 | page = find_get_page(tree->mapping, index); | ||
| 958 | BUG_ON(!page); | ||
| 959 | __set_page_dirty_nobuffers(page); | ||
| 960 | page_cache_release(page); | ||
| 961 | index++; | ||
| 962 | } | ||
| 963 | set_extent_dirty(tree, start, end, GFP_NOFS); | ||
| 964 | return 0; | ||
| 965 | } | ||
| 966 | EXPORT_SYMBOL(set_range_dirty); | ||
| 967 | |||
| 968 | /* | ||
| 969 | * helper function to set both pages and extents in the tree writeback | ||
| 970 | */ | ||
| 971 | int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | ||
| 972 | { | ||
| 973 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 974 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
| 975 | struct page *page; | ||
| 976 | |||
| 977 | while (index <= end_index) { | ||
| 978 | page = find_get_page(tree->mapping, index); | ||
| 979 | BUG_ON(!page); | ||
| 980 | set_page_writeback(page); | ||
| 981 | page_cache_release(page); | ||
| 982 | index++; | ||
| 983 | } | ||
| 984 | set_extent_writeback(tree, start, end, GFP_NOFS); | ||
| 985 | return 0; | ||
| 986 | } | ||
| 987 | EXPORT_SYMBOL(set_range_writeback); | ||
| 988 | |||
| 989 | /* | ||
| 990 | * find the first offset in the io tree with 'bits' set. zero is | ||
| 991 | * returned if we find something, and *start_ret and *end_ret are | ||
| 992 | * set to reflect the state struct that was found. | ||
| 993 | * | ||
| 994 | * If nothing was found, 1 is returned, < 0 on error | ||
| 995 | */ | ||
| 996 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
| 997 | u64 *start_ret, u64 *end_ret, int bits) | ||
| 998 | { | ||
| 999 | struct rb_node *node; | ||
| 1000 | struct extent_state *state; | ||
| 1001 | int ret = 1; | ||
| 1002 | |||
| 1003 | spin_lock_irq(&tree->lock); | ||
| 1004 | /* | ||
| 1005 | * this search will find all the extents that end after | ||
| 1006 | * our range starts. | ||
| 1007 | */ | ||
| 1008 | node = tree_search(tree, start); | ||
| 1009 | if (!node) { | ||
| 1010 | goto out; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | while(1) { | ||
| 1014 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1015 | if (state->end >= start && (state->state & bits)) { | ||
| 1016 | *start_ret = state->start; | ||
| 1017 | *end_ret = state->end; | ||
| 1018 | ret = 0; | ||
| 1019 | break; | ||
| 1020 | } | ||
| 1021 | node = rb_next(node); | ||
| 1022 | if (!node) | ||
| 1023 | break; | ||
| 1024 | } | ||
| 1025 | out: | ||
| 1026 | spin_unlock_irq(&tree->lock); | ||
| 1027 | return ret; | ||
| 1028 | } | ||
| 1029 | EXPORT_SYMBOL(find_first_extent_bit); | ||
| 1030 | |||
| 1031 | /* find the first state struct with 'bits' set after 'start', and | ||
| 1032 | * return it. tree->lock must be held. NULL will returned if | ||
| 1033 | * nothing was found after 'start' | ||
| 1034 | */ | ||
| 1035 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, | ||
| 1036 | u64 start, int bits) | ||
| 1037 | { | ||
| 1038 | struct rb_node *node; | ||
| 1039 | struct extent_state *state; | ||
| 1040 | |||
| 1041 | /* | ||
| 1042 | * this search will find all the extents that end after | ||
| 1043 | * our range starts. | ||
| 1044 | */ | ||
| 1045 | node = tree_search(tree, start); | ||
| 1046 | if (!node) { | ||
| 1047 | goto out; | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | while(1) { | ||
| 1051 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1052 | if (state->end >= start && (state->state & bits)) { | ||
| 1053 | return state; | ||
| 1054 | } | ||
| 1055 | node = rb_next(node); | ||
| 1056 | if (!node) | ||
| 1057 | break; | ||
| 1058 | } | ||
| 1059 | out: | ||
| 1060 | return NULL; | ||
| 1061 | } | ||
| 1062 | EXPORT_SYMBOL(find_first_extent_bit_state); | ||
| 1063 | |||
| 1064 | /* | ||
| 1065 | * find a contiguous range of bytes in the file marked as delalloc, not | ||
| 1066 | * more than 'max_bytes'. start and end are used to return the range, | ||
| 1067 | * | ||
| 1068 | * 1 is returned if we find something, 0 if nothing was in the tree | ||
| 1069 | */ | ||
| 1070 | static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, | ||
| 1071 | u64 *start, u64 *end, u64 max_bytes) | ||
| 1072 | { | ||
| 1073 | struct rb_node *node; | ||
| 1074 | struct extent_state *state; | ||
| 1075 | u64 cur_start = *start; | ||
| 1076 | u64 found = 0; | ||
| 1077 | u64 total_bytes = 0; | ||
| 1078 | |||
| 1079 | spin_lock_irq(&tree->lock); | ||
| 1080 | /* | ||
| 1081 | * this search will find all the extents that end after | ||
| 1082 | * our range starts. | ||
| 1083 | */ | ||
| 1084 | search_again: | ||
| 1085 | node = tree_search(tree, cur_start); | ||
| 1086 | if (!node) { | ||
| 1087 | if (!found) | ||
| 1088 | *end = (u64)-1; | ||
| 1089 | goto out; | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | while(1) { | ||
| 1093 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1094 | if (found && (state->start != cur_start || | ||
| 1095 | (state->state & EXTENT_BOUNDARY))) { | ||
| 1096 | goto out; | ||
| 1097 | } | ||
| 1098 | if (!(state->state & EXTENT_DELALLOC)) { | ||
| 1099 | if (!found) | ||
| 1100 | *end = state->end; | ||
| 1101 | goto out; | ||
| 1102 | } | ||
| 1103 | if (!found && !(state->state & EXTENT_BOUNDARY)) { | ||
| 1104 | struct extent_state *prev_state; | ||
| 1105 | struct rb_node *prev_node = node; | ||
| 1106 | while(1) { | ||
| 1107 | prev_node = rb_prev(prev_node); | ||
| 1108 | if (!prev_node) | ||
| 1109 | break; | ||
| 1110 | prev_state = rb_entry(prev_node, | ||
| 1111 | struct extent_state, | ||
| 1112 | rb_node); | ||
| 1113 | if ((prev_state->end + 1 != state->start) || | ||
| 1114 | !(prev_state->state & EXTENT_DELALLOC)) | ||
| 1115 | break; | ||
| 1116 | if ((cur_start - prev_state->start) * 2 > | ||
| 1117 | max_bytes) | ||
| 1118 | break; | ||
| 1119 | state = prev_state; | ||
| 1120 | node = prev_node; | ||
| 1121 | } | ||
| 1122 | } | ||
| 1123 | if (state->state & EXTENT_LOCKED) { | ||
| 1124 | DEFINE_WAIT(wait); | ||
| 1125 | atomic_inc(&state->refs); | ||
| 1126 | prepare_to_wait(&state->wq, &wait, | ||
| 1127 | TASK_UNINTERRUPTIBLE); | ||
| 1128 | spin_unlock_irq(&tree->lock); | ||
| 1129 | schedule(); | ||
| 1130 | spin_lock_irq(&tree->lock); | ||
| 1131 | finish_wait(&state->wq, &wait); | ||
| 1132 | free_extent_state(state); | ||
| 1133 | goto search_again; | ||
| 1134 | } | ||
| 1135 | set_state_cb(tree, state, EXTENT_LOCKED); | ||
| 1136 | state->state |= EXTENT_LOCKED; | ||
| 1137 | if (!found) | ||
| 1138 | *start = state->start; | ||
| 1139 | found++; | ||
| 1140 | *end = state->end; | ||
| 1141 | cur_start = state->end + 1; | ||
| 1142 | node = rb_next(node); | ||
| 1143 | if (!node) | ||
| 1144 | break; | ||
| 1145 | total_bytes += state->end - state->start + 1; | ||
| 1146 | if (total_bytes >= max_bytes) | ||
| 1147 | break; | ||
| 1148 | } | ||
| 1149 | out: | ||
| 1150 | spin_unlock_irq(&tree->lock); | ||
| 1151 | return found; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | /* | ||
| 1155 | * count the number of bytes in the tree that have a given bit(s) | ||
| 1156 | * set. This can be fairly slow, except for EXTENT_DIRTY which is | ||
| 1157 | * cached. The total number found is returned. | ||
| 1158 | */ | ||
| 1159 | u64 count_range_bits(struct extent_io_tree *tree, | ||
| 1160 | u64 *start, u64 search_end, u64 max_bytes, | ||
| 1161 | unsigned long bits) | ||
| 1162 | { | ||
| 1163 | struct rb_node *node; | ||
| 1164 | struct extent_state *state; | ||
| 1165 | u64 cur_start = *start; | ||
| 1166 | u64 total_bytes = 0; | ||
| 1167 | int found = 0; | ||
| 1168 | |||
| 1169 | if (search_end <= cur_start) { | ||
| 1170 | printk("search_end %Lu start %Lu\n", search_end, cur_start); | ||
| 1171 | WARN_ON(1); | ||
| 1172 | return 0; | ||
| 1173 | } | ||
| 1174 | |||
| 1175 | spin_lock_irq(&tree->lock); | ||
| 1176 | if (cur_start == 0 && bits == EXTENT_DIRTY) { | ||
| 1177 | total_bytes = tree->dirty_bytes; | ||
| 1178 | goto out; | ||
| 1179 | } | ||
| 1180 | /* | ||
| 1181 | * this search will find all the extents that end after | ||
| 1182 | * our range starts. | ||
| 1183 | */ | ||
| 1184 | node = tree_search(tree, cur_start); | ||
| 1185 | if (!node) { | ||
| 1186 | goto out; | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | while(1) { | ||
| 1190 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1191 | if (state->start > search_end) | ||
| 1192 | break; | ||
| 1193 | if (state->end >= cur_start && (state->state & bits)) { | ||
| 1194 | total_bytes += min(search_end, state->end) + 1 - | ||
| 1195 | max(cur_start, state->start); | ||
| 1196 | if (total_bytes >= max_bytes) | ||
| 1197 | break; | ||
| 1198 | if (!found) { | ||
| 1199 | *start = state->start; | ||
| 1200 | found = 1; | ||
| 1201 | } | ||
| 1202 | } | ||
| 1203 | node = rb_next(node); | ||
| 1204 | if (!node) | ||
| 1205 | break; | ||
| 1206 | } | ||
| 1207 | out: | ||
| 1208 | spin_unlock_irq(&tree->lock); | ||
| 1209 | return total_bytes; | ||
| 1210 | } | ||
| 1211 | /* | ||
| 1212 | * helper function to lock both pages and extents in the tree. | ||
| 1213 | * pages must be locked first. | ||
| 1214 | */ | ||
| 1215 | int lock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
| 1216 | { | ||
| 1217 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 1218 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
| 1219 | struct page *page; | ||
| 1220 | int err; | ||
| 1221 | |||
| 1222 | while (index <= end_index) { | ||
| 1223 | page = grab_cache_page(tree->mapping, index); | ||
| 1224 | if (!page) { | ||
| 1225 | err = -ENOMEM; | ||
| 1226 | goto failed; | ||
| 1227 | } | ||
| 1228 | if (IS_ERR(page)) { | ||
| 1229 | err = PTR_ERR(page); | ||
| 1230 | goto failed; | ||
| 1231 | } | ||
| 1232 | index++; | ||
| 1233 | } | ||
| 1234 | lock_extent(tree, start, end, GFP_NOFS); | ||
| 1235 | return 0; | ||
| 1236 | |||
| 1237 | failed: | ||
| 1238 | /* | ||
| 1239 | * we failed above in getting the page at 'index', so we undo here | ||
| 1240 | * up to but not including the page at 'index' | ||
| 1241 | */ | ||
| 1242 | end_index = index; | ||
| 1243 | index = start >> PAGE_CACHE_SHIFT; | ||
| 1244 | while (index < end_index) { | ||
| 1245 | page = find_get_page(tree->mapping, index); | ||
| 1246 | unlock_page(page); | ||
| 1247 | page_cache_release(page); | ||
| 1248 | index++; | ||
| 1249 | } | ||
| 1250 | return err; | ||
| 1251 | } | ||
| 1252 | EXPORT_SYMBOL(lock_range); | ||
| 1253 | |||
| 1254 | /* | ||
| 1255 | * helper function to unlock both pages and extents in the tree. | ||
| 1256 | */ | ||
| 1257 | int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
| 1258 | { | ||
| 1259 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 1260 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
| 1261 | struct page *page; | ||
| 1262 | |||
| 1263 | while (index <= end_index) { | ||
| 1264 | page = find_get_page(tree->mapping, index); | ||
| 1265 | unlock_page(page); | ||
| 1266 | page_cache_release(page); | ||
| 1267 | index++; | ||
| 1268 | } | ||
| 1269 | unlock_extent(tree, start, end, GFP_NOFS); | ||
| 1270 | return 0; | ||
| 1271 | } | ||
| 1272 | EXPORT_SYMBOL(unlock_range); | ||
| 1273 | |||
| 1274 | /* | ||
| 1275 | * set the private field for a given byte offset in the tree. If there isn't | ||
| 1276 | * an extent_state there already, this does nothing. | ||
| 1277 | */ | ||
| 1278 | int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) | ||
| 1279 | { | ||
| 1280 | struct rb_node *node; | ||
| 1281 | struct extent_state *state; | ||
| 1282 | int ret = 0; | ||
| 1283 | |||
| 1284 | spin_lock_irq(&tree->lock); | ||
| 1285 | /* | ||
| 1286 | * this search will find all the extents that end after | ||
| 1287 | * our range starts. | ||
| 1288 | */ | ||
| 1289 | node = tree_search(tree, start); | ||
| 1290 | if (!node) { | ||
| 1291 | ret = -ENOENT; | ||
| 1292 | goto out; | ||
| 1293 | } | ||
| 1294 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1295 | if (state->start != start) { | ||
| 1296 | ret = -ENOENT; | ||
| 1297 | goto out; | ||
| 1298 | } | ||
| 1299 | state->private = private; | ||
| 1300 | out: | ||
| 1301 | spin_unlock_irq(&tree->lock); | ||
| 1302 | return ret; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) | ||
| 1306 | { | ||
| 1307 | struct rb_node *node; | ||
| 1308 | struct extent_state *state; | ||
| 1309 | int ret = 0; | ||
| 1310 | |||
| 1311 | spin_lock_irq(&tree->lock); | ||
| 1312 | /* | ||
| 1313 | * this search will find all the extents that end after | ||
| 1314 | * our range starts. | ||
| 1315 | */ | ||
| 1316 | node = tree_search(tree, start); | ||
| 1317 | if (!node) { | ||
| 1318 | ret = -ENOENT; | ||
| 1319 | goto out; | ||
| 1320 | } | ||
| 1321 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1322 | if (state->start != start) { | ||
| 1323 | ret = -ENOENT; | ||
| 1324 | goto out; | ||
| 1325 | } | ||
| 1326 | *private = state->private; | ||
| 1327 | out: | ||
| 1328 | spin_unlock_irq(&tree->lock); | ||
| 1329 | return ret; | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | /* | ||
| 1333 | * searches a range in the state tree for a given mask. | ||
| 1334 | * If 'filled' == 1, this returns 1 only if every extent in the tree | ||
| 1335 | * has the bits set. Otherwise, 1 is returned if any bit in the | ||
| 1336 | * range is found set. | ||
| 1337 | */ | ||
| 1338 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 1339 | int bits, int filled) | ||
| 1340 | { | ||
| 1341 | struct extent_state *state = NULL; | ||
| 1342 | struct rb_node *node; | ||
| 1343 | int bitset = 0; | ||
| 1344 | unsigned long flags; | ||
| 1345 | |||
| 1346 | spin_lock_irqsave(&tree->lock, flags); | ||
| 1347 | node = tree_search(tree, start); | ||
| 1348 | while (node && start <= end) { | ||
| 1349 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1350 | |||
| 1351 | if (filled && state->start > start) { | ||
| 1352 | bitset = 0; | ||
| 1353 | break; | ||
| 1354 | } | ||
| 1355 | |||
| 1356 | if (state->start > end) | ||
| 1357 | break; | ||
| 1358 | |||
| 1359 | if (state->state & bits) { | ||
| 1360 | bitset = 1; | ||
| 1361 | if (!filled) | ||
| 1362 | break; | ||
| 1363 | } else if (filled) { | ||
| 1364 | bitset = 0; | ||
| 1365 | break; | ||
| 1366 | } | ||
| 1367 | start = state->end + 1; | ||
| 1368 | if (start > end) | ||
| 1369 | break; | ||
| 1370 | node = rb_next(node); | ||
| 1371 | if (!node) { | ||
| 1372 | if (filled) | ||
| 1373 | bitset = 0; | ||
| 1374 | break; | ||
| 1375 | } | ||
| 1376 | } | ||
| 1377 | spin_unlock_irqrestore(&tree->lock, flags); | ||
| 1378 | return bitset; | ||
| 1379 | } | ||
| 1380 | EXPORT_SYMBOL(test_range_bit); | ||
| 1381 | |||
| 1382 | /* | ||
| 1383 | * helper function to set a given page up to date if all the | ||
| 1384 | * extents in the tree for that page are up to date | ||
| 1385 | */ | ||
| 1386 | static int check_page_uptodate(struct extent_io_tree *tree, | ||
| 1387 | struct page *page) | ||
| 1388 | { | ||
| 1389 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 1390 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 1391 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) | ||
| 1392 | SetPageUptodate(page); | ||
| 1393 | return 0; | ||
| 1394 | } | ||
| 1395 | |||
| 1396 | /* | ||
| 1397 | * helper function to unlock a page if all the extents in the tree | ||
| 1398 | * for that page are unlocked | ||
| 1399 | */ | ||
| 1400 | static int check_page_locked(struct extent_io_tree *tree, | ||
| 1401 | struct page *page) | ||
| 1402 | { | ||
| 1403 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 1404 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 1405 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) | ||
| 1406 | unlock_page(page); | ||
| 1407 | return 0; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | /* | ||
| 1411 | * helper function to end page writeback if all the extents | ||
| 1412 | * in the tree for that page are done with writeback | ||
| 1413 | */ | ||
| 1414 | static int check_page_writeback(struct extent_io_tree *tree, | ||
| 1415 | struct page *page) | ||
| 1416 | { | ||
| 1417 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 1418 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 1419 | if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) | ||
| 1420 | end_page_writeback(page); | ||
| 1421 | return 0; | ||
| 1422 | } | ||
| 1423 | |||
| 1424 | /* lots and lots of room for performance fixes in the end_bio funcs */ | ||
| 1425 | |||
| 1426 | /* | ||
| 1427 | * after a writepage IO is done, we need to: | ||
| 1428 | * clear the uptodate bits on error | ||
| 1429 | * clear the writeback bits in the extent tree for this IO | ||
| 1430 | * end_page_writeback if the page has no more pending IO | ||
| 1431 | * | ||
| 1432 | * Scheduling is not allowed, so the extent state tree is expected | ||
| 1433 | * to have one and only one object corresponding to this IO. | ||
| 1434 | */ | ||
| 1435 | static void end_bio_extent_writepage(struct bio *bio, int err) | ||
| 1436 | { | ||
| 1437 | int uptodate = err == 0; | ||
| 1438 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
| 1439 | struct extent_io_tree *tree; | ||
| 1440 | u64 start; | ||
| 1441 | u64 end; | ||
| 1442 | int whole_page; | ||
| 1443 | int ret; | ||
| 1444 | |||
| 1445 | do { | ||
| 1446 | struct page *page = bvec->bv_page; | ||
| 1447 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 1448 | |||
| 1449 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | ||
| 1450 | bvec->bv_offset; | ||
| 1451 | end = start + bvec->bv_len - 1; | ||
| 1452 | |||
| 1453 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) | ||
| 1454 | whole_page = 1; | ||
| 1455 | else | ||
| 1456 | whole_page = 0; | ||
| 1457 | |||
| 1458 | if (--bvec >= bio->bi_io_vec) | ||
| 1459 | prefetchw(&bvec->bv_page->flags); | ||
| 1460 | if (tree->ops && tree->ops->writepage_end_io_hook) { | ||
| 1461 | ret = tree->ops->writepage_end_io_hook(page, start, | ||
| 1462 | end, NULL, uptodate); | ||
| 1463 | if (ret) | ||
| 1464 | uptodate = 0; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | if (!uptodate && tree->ops && | ||
| 1468 | tree->ops->writepage_io_failed_hook) { | ||
| 1469 | ret = tree->ops->writepage_io_failed_hook(bio, page, | ||
| 1470 | start, end, NULL); | ||
| 1471 | if (ret == 0) { | ||
| 1472 | uptodate = (err == 0); | ||
| 1473 | continue; | ||
| 1474 | } | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | if (!uptodate) { | ||
| 1478 | clear_extent_uptodate(tree, start, end, GFP_ATOMIC); | ||
| 1479 | ClearPageUptodate(page); | ||
| 1480 | SetPageError(page); | ||
| 1481 | } | ||
| 1482 | |||
| 1483 | clear_extent_writeback(tree, start, end, GFP_ATOMIC); | ||
| 1484 | |||
| 1485 | if (whole_page) | ||
| 1486 | end_page_writeback(page); | ||
| 1487 | else | ||
| 1488 | check_page_writeback(tree, page); | ||
| 1489 | } while (bvec >= bio->bi_io_vec); | ||
| 1490 | |||
| 1491 | bio_put(bio); | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | /* | ||
| 1495 | * after a readpage IO is done, we need to: | ||
| 1496 | * clear the uptodate bits on error | ||
| 1497 | * set the uptodate bits if things worked | ||
| 1498 | * set the page up to date if all extents in the tree are uptodate | ||
| 1499 | * clear the lock bit in the extent tree | ||
| 1500 | * unlock the page if there are no other extents locked for it | ||
| 1501 | * | ||
| 1502 | * Scheduling is not allowed, so the extent state tree is expected | ||
| 1503 | * to have one and only one object corresponding to this IO. | ||
| 1504 | */ | ||
| 1505 | static void end_bio_extent_readpage(struct bio *bio, int err) | ||
| 1506 | { | ||
| 1507 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 1508 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
| 1509 | struct extent_io_tree *tree; | ||
| 1510 | u64 start; | ||
| 1511 | u64 end; | ||
| 1512 | int whole_page; | ||
| 1513 | int ret; | ||
| 1514 | |||
| 1515 | do { | ||
| 1516 | struct page *page = bvec->bv_page; | ||
| 1517 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 1518 | |||
| 1519 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | ||
| 1520 | bvec->bv_offset; | ||
| 1521 | end = start + bvec->bv_len - 1; | ||
| 1522 | |||
| 1523 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) | ||
| 1524 | whole_page = 1; | ||
| 1525 | else | ||
| 1526 | whole_page = 0; | ||
| 1527 | |||
| 1528 | if (--bvec >= bio->bi_io_vec) | ||
| 1529 | prefetchw(&bvec->bv_page->flags); | ||
| 1530 | |||
| 1531 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | ||
| 1532 | ret = tree->ops->readpage_end_io_hook(page, start, end, | ||
| 1533 | NULL); | ||
| 1534 | if (ret) | ||
| 1535 | uptodate = 0; | ||
| 1536 | } | ||
| 1537 | if (!uptodate && tree->ops && | ||
| 1538 | tree->ops->readpage_io_failed_hook) { | ||
| 1539 | ret = tree->ops->readpage_io_failed_hook(bio, page, | ||
| 1540 | start, end, NULL); | ||
| 1541 | if (ret == 0) { | ||
| 1542 | uptodate = | ||
| 1543 | test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 1544 | continue; | ||
| 1545 | } | ||
| 1546 | } | ||
| 1547 | |||
| 1548 | if (uptodate) | ||
| 1549 | set_extent_uptodate(tree, start, end, | ||
| 1550 | GFP_ATOMIC); | ||
| 1551 | unlock_extent(tree, start, end, GFP_ATOMIC); | ||
| 1552 | |||
| 1553 | if (whole_page) { | ||
| 1554 | if (uptodate) { | ||
| 1555 | SetPageUptodate(page); | ||
| 1556 | } else { | ||
| 1557 | ClearPageUptodate(page); | ||
| 1558 | SetPageError(page); | ||
| 1559 | } | ||
| 1560 | unlock_page(page); | ||
| 1561 | } else { | ||
| 1562 | if (uptodate) { | ||
| 1563 | check_page_uptodate(tree, page); | ||
| 1564 | } else { | ||
| 1565 | ClearPageUptodate(page); | ||
| 1566 | SetPageError(page); | ||
| 1567 | } | ||
| 1568 | check_page_locked(tree, page); | ||
| 1569 | } | ||
| 1570 | } while (bvec >= bio->bi_io_vec); | ||
| 1571 | |||
| 1572 | bio_put(bio); | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | /* | ||
| 1576 | * IO done from prepare_write is pretty simple, we just unlock | ||
| 1577 | * the structs in the extent tree when done, and set the uptodate bits | ||
| 1578 | * as appropriate. | ||
| 1579 | */ | ||
| 1580 | static void end_bio_extent_preparewrite(struct bio *bio, int err) | ||
| 1581 | { | ||
| 1582 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 1583 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
| 1584 | struct extent_io_tree *tree; | ||
| 1585 | u64 start; | ||
| 1586 | u64 end; | ||
| 1587 | |||
| 1588 | do { | ||
| 1589 | struct page *page = bvec->bv_page; | ||
| 1590 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 1591 | |||
| 1592 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | ||
| 1593 | bvec->bv_offset; | ||
| 1594 | end = start + bvec->bv_len - 1; | ||
| 1595 | |||
| 1596 | if (--bvec >= bio->bi_io_vec) | ||
| 1597 | prefetchw(&bvec->bv_page->flags); | ||
| 1598 | |||
| 1599 | if (uptodate) { | ||
| 1600 | set_extent_uptodate(tree, start, end, GFP_ATOMIC); | ||
| 1601 | } else { | ||
| 1602 | ClearPageUptodate(page); | ||
| 1603 | SetPageError(page); | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | unlock_extent(tree, start, end, GFP_ATOMIC); | ||
| 1607 | |||
| 1608 | } while (bvec >= bio->bi_io_vec); | ||
| 1609 | |||
| 1610 | bio_put(bio); | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | static struct bio * | ||
| 1614 | extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | ||
| 1615 | gfp_t gfp_flags) | ||
| 1616 | { | ||
| 1617 | struct bio *bio; | ||
| 1618 | |||
| 1619 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
| 1620 | |||
| 1621 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | ||
| 1622 | while (!bio && (nr_vecs /= 2)) | ||
| 1623 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
| 1624 | } | ||
| 1625 | |||
| 1626 | if (bio) { | ||
| 1627 | bio->bi_size = 0; | ||
| 1628 | bio->bi_bdev = bdev; | ||
| 1629 | bio->bi_sector = first_sector; | ||
| 1630 | } | ||
| 1631 | return bio; | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | static int submit_one_bio(int rw, struct bio *bio, int mirror_num) | ||
| 1635 | { | ||
| 1636 | int ret = 0; | ||
| 1637 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
| 1638 | struct page *page = bvec->bv_page; | ||
| 1639 | struct extent_io_tree *tree = bio->bi_private; | ||
| 1640 | struct rb_node *node; | ||
| 1641 | struct extent_state *state; | ||
| 1642 | u64 start; | ||
| 1643 | u64 end; | ||
| 1644 | |||
| 1645 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | ||
| 1646 | end = start + bvec->bv_len - 1; | ||
| 1647 | |||
| 1648 | spin_lock_irq(&tree->lock); | ||
| 1649 | node = __etree_search(tree, start, NULL, NULL); | ||
| 1650 | BUG_ON(!node); | ||
| 1651 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1652 | while(state->end < end) { | ||
| 1653 | node = rb_next(node); | ||
| 1654 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1655 | } | ||
| 1656 | BUG_ON(state->end != end); | ||
| 1657 | spin_unlock_irq(&tree->lock); | ||
| 1658 | |||
| 1659 | bio->bi_private = NULL; | ||
| 1660 | |||
| 1661 | bio_get(bio); | ||
| 1662 | |||
| 1663 | if (tree->ops && tree->ops->submit_bio_hook) | ||
| 1664 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | ||
| 1665 | mirror_num); | ||
| 1666 | else | ||
| 1667 | submit_bio(rw, bio); | ||
| 1668 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
| 1669 | ret = -EOPNOTSUPP; | ||
| 1670 | bio_put(bio); | ||
| 1671 | return ret; | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | static int submit_extent_page(int rw, struct extent_io_tree *tree, | ||
| 1675 | struct page *page, sector_t sector, | ||
| 1676 | size_t size, unsigned long offset, | ||
| 1677 | struct block_device *bdev, | ||
| 1678 | struct bio **bio_ret, | ||
| 1679 | unsigned long max_pages, | ||
| 1680 | bio_end_io_t end_io_func, | ||
| 1681 | int mirror_num) | ||
| 1682 | { | ||
| 1683 | int ret = 0; | ||
| 1684 | struct bio *bio; | ||
| 1685 | int nr; | ||
| 1686 | |||
| 1687 | if (bio_ret && *bio_ret) { | ||
| 1688 | bio = *bio_ret; | ||
| 1689 | if (bio->bi_sector + (bio->bi_size >> 9) != sector || | ||
| 1690 | (tree->ops && tree->ops->merge_bio_hook && | ||
| 1691 | tree->ops->merge_bio_hook(page, offset, size, bio)) || | ||
| 1692 | bio_add_page(bio, page, size, offset) < size) { | ||
| 1693 | ret = submit_one_bio(rw, bio, mirror_num); | ||
| 1694 | bio = NULL; | ||
| 1695 | } else { | ||
| 1696 | return 0; | ||
| 1697 | } | ||
| 1698 | } | ||
| 1699 | nr = bio_get_nr_vecs(bdev); | ||
| 1700 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | ||
| 1701 | if (!bio) { | ||
| 1702 | printk("failed to allocate bio nr %d\n", nr); | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | |||
| 1706 | bio_add_page(bio, page, size, offset); | ||
| 1707 | bio->bi_end_io = end_io_func; | ||
| 1708 | bio->bi_private = tree; | ||
| 1709 | |||
| 1710 | if (bio_ret) { | ||
| 1711 | *bio_ret = bio; | ||
| 1712 | } else { | ||
| 1713 | ret = submit_one_bio(rw, bio, mirror_num); | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | return ret; | ||
| 1717 | } | ||
| 1718 | |||
| 1719 | void set_page_extent_mapped(struct page *page) | ||
| 1720 | { | ||
| 1721 | if (!PagePrivate(page)) { | ||
| 1722 | SetPagePrivate(page); | ||
| 1723 | page_cache_get(page); | ||
| 1724 | set_page_private(page, EXTENT_PAGE_PRIVATE); | ||
| 1725 | } | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | void set_page_extent_head(struct page *page, unsigned long len) | ||
| 1729 | { | ||
| 1730 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); | ||
| 1731 | } | ||
| 1732 | |||
| 1733 | /* | ||
| 1734 | * basic readpage implementation. Locked extent state structs are inserted | ||
| 1735 | * into the tree that are removed when the IO is done (by the end_io | ||
| 1736 | * handlers) | ||
| 1737 | */ | ||
| 1738 | static int __extent_read_full_page(struct extent_io_tree *tree, | ||
| 1739 | struct page *page, | ||
| 1740 | get_extent_t *get_extent, | ||
| 1741 | struct bio **bio, int mirror_num) | ||
| 1742 | { | ||
| 1743 | struct inode *inode = page->mapping->host; | ||
| 1744 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 1745 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | ||
| 1746 | u64 end; | ||
| 1747 | u64 cur = start; | ||
| 1748 | u64 extent_offset; | ||
| 1749 | u64 last_byte = i_size_read(inode); | ||
| 1750 | u64 block_start; | ||
| 1751 | u64 cur_end; | ||
| 1752 | sector_t sector; | ||
| 1753 | struct extent_map *em; | ||
| 1754 | struct block_device *bdev; | ||
| 1755 | int ret; | ||
| 1756 | int nr = 0; | ||
| 1757 | size_t page_offset = 0; | ||
| 1758 | size_t iosize; | ||
| 1759 | size_t blocksize = inode->i_sb->s_blocksize; | ||
| 1760 | |||
| 1761 | set_page_extent_mapped(page); | ||
| 1762 | |||
| 1763 | end = page_end; | ||
| 1764 | lock_extent(tree, start, end, GFP_NOFS); | ||
| 1765 | |||
| 1766 | while (cur <= end) { | ||
| 1767 | if (cur >= last_byte) { | ||
| 1768 | char *userpage; | ||
| 1769 | iosize = PAGE_CACHE_SIZE - page_offset; | ||
| 1770 | userpage = kmap_atomic(page, KM_USER0); | ||
| 1771 | memset(userpage + page_offset, 0, iosize); | ||
| 1772 | flush_dcache_page(page); | ||
| 1773 | kunmap_atomic(userpage, KM_USER0); | ||
| 1774 | set_extent_uptodate(tree, cur, cur + iosize - 1, | ||
| 1775 | GFP_NOFS); | ||
| 1776 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
| 1777 | break; | ||
| 1778 | } | ||
| 1779 | em = get_extent(inode, page, page_offset, cur, | ||
| 1780 | end - cur + 1, 0); | ||
| 1781 | if (IS_ERR(em) || !em) { | ||
| 1782 | SetPageError(page); | ||
| 1783 | unlock_extent(tree, cur, end, GFP_NOFS); | ||
| 1784 | break; | ||
| 1785 | } | ||
| 1786 | extent_offset = cur - em->start; | ||
| 1787 | if (extent_map_end(em) <= cur) { | ||
| 1788 | printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur); | ||
| 1789 | } | ||
| 1790 | BUG_ON(extent_map_end(em) <= cur); | ||
| 1791 | if (end < cur) { | ||
| 1792 | printk("2bad mapping end %Lu cur %Lu\n", end, cur); | ||
| 1793 | } | ||
| 1794 | BUG_ON(end < cur); | ||
| 1795 | |||
| 1796 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | ||
| 1797 | cur_end = min(extent_map_end(em) - 1, end); | ||
| 1798 | iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); | ||
| 1799 | sector = (em->block_start + extent_offset) >> 9; | ||
| 1800 | bdev = em->bdev; | ||
| 1801 | block_start = em->block_start; | ||
| 1802 | free_extent_map(em); | ||
| 1803 | em = NULL; | ||
| 1804 | |||
| 1805 | /* we've found a hole, just zero and go on */ | ||
| 1806 | if (block_start == EXTENT_MAP_HOLE) { | ||
| 1807 | char *userpage; | ||
| 1808 | userpage = kmap_atomic(page, KM_USER0); | ||
| 1809 | memset(userpage + page_offset, 0, iosize); | ||
| 1810 | flush_dcache_page(page); | ||
| 1811 | kunmap_atomic(userpage, KM_USER0); | ||
| 1812 | |||
| 1813 | set_extent_uptodate(tree, cur, cur + iosize - 1, | ||
| 1814 | GFP_NOFS); | ||
| 1815 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
| 1816 | cur = cur + iosize; | ||
| 1817 | page_offset += iosize; | ||
| 1818 | continue; | ||
| 1819 | } | ||
| 1820 | /* the get_extent function already copied into the page */ | ||
| 1821 | if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { | ||
| 1822 | check_page_uptodate(tree, page); | ||
| 1823 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
| 1824 | cur = cur + iosize; | ||
| 1825 | page_offset += iosize; | ||
| 1826 | continue; | ||
| 1827 | } | ||
| 1828 | /* we have an inline extent but it didn't get marked up | ||
| 1829 | * to date. Error out | ||
| 1830 | */ | ||
| 1831 | if (block_start == EXTENT_MAP_INLINE) { | ||
| 1832 | SetPageError(page); | ||
| 1833 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
| 1834 | cur = cur + iosize; | ||
| 1835 | page_offset += iosize; | ||
| 1836 | continue; | ||
| 1837 | } | ||
| 1838 | |||
| 1839 | ret = 0; | ||
| 1840 | if (tree->ops && tree->ops->readpage_io_hook) { | ||
| 1841 | ret = tree->ops->readpage_io_hook(page, cur, | ||
| 1842 | cur + iosize - 1); | ||
| 1843 | } | ||
| 1844 | if (!ret) { | ||
| 1845 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | ||
| 1846 | pnr -= page->index; | ||
| 1847 | ret = submit_extent_page(READ, tree, page, | ||
| 1848 | sector, iosize, page_offset, | ||
| 1849 | bdev, bio, pnr, | ||
| 1850 | end_bio_extent_readpage, mirror_num); | ||
| 1851 | nr++; | ||
| 1852 | } | ||
| 1853 | if (ret) | ||
| 1854 | SetPageError(page); | ||
| 1855 | cur = cur + iosize; | ||
| 1856 | page_offset += iosize; | ||
| 1857 | } | ||
| 1858 | if (!nr) { | ||
| 1859 | if (!PageError(page)) | ||
| 1860 | SetPageUptodate(page); | ||
| 1861 | unlock_page(page); | ||
| 1862 | } | ||
| 1863 | return 0; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | ||
| 1867 | get_extent_t *get_extent) | ||
| 1868 | { | ||
| 1869 | struct bio *bio = NULL; | ||
| 1870 | int ret; | ||
| 1871 | |||
| 1872 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0); | ||
| 1873 | if (bio) | ||
| 1874 | submit_one_bio(READ, bio, 0); | ||
| 1875 | return ret; | ||
| 1876 | } | ||
| 1877 | EXPORT_SYMBOL(extent_read_full_page); | ||
| 1878 | |||
| 1879 | /* | ||
| 1880 | * the writepage semantics are similar to regular writepage. extent | ||
| 1881 | * records are inserted to lock ranges in the tree, and as dirty areas | ||
| 1882 | * are found, they are marked writeback. Then the lock bits are removed | ||
| 1883 | * and the end_io handler clears the writeback ranges | ||
| 1884 | */ | ||
| 1885 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | ||
| 1886 | void *data) | ||
| 1887 | { | ||
| 1888 | struct inode *inode = page->mapping->host; | ||
| 1889 | struct extent_page_data *epd = data; | ||
| 1890 | struct extent_io_tree *tree = epd->tree; | ||
| 1891 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 1892 | u64 delalloc_start; | ||
| 1893 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | ||
| 1894 | u64 end; | ||
| 1895 | u64 cur = start; | ||
| 1896 | u64 extent_offset; | ||
| 1897 | u64 last_byte = i_size_read(inode); | ||
| 1898 | u64 block_start; | ||
| 1899 | u64 iosize; | ||
| 1900 | u64 unlock_start; | ||
| 1901 | sector_t sector; | ||
| 1902 | struct extent_map *em; | ||
| 1903 | struct block_device *bdev; | ||
| 1904 | int ret; | ||
| 1905 | int nr = 0; | ||
| 1906 | size_t pg_offset = 0; | ||
| 1907 | size_t blocksize; | ||
| 1908 | loff_t i_size = i_size_read(inode); | ||
| 1909 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | ||
| 1910 | u64 nr_delalloc; | ||
| 1911 | u64 delalloc_end; | ||
| 1912 | |||
| 1913 | WARN_ON(!PageLocked(page)); | ||
| 1914 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
| 1915 | if (page->index > end_index || | ||
| 1916 | (page->index == end_index && !pg_offset)) { | ||
| 1917 | page->mapping->a_ops->invalidatepage(page, 0); | ||
| 1918 | unlock_page(page); | ||
| 1919 | return 0; | ||
| 1920 | } | ||
| 1921 | |||
| 1922 | if (page->index == end_index) { | ||
| 1923 | char *userpage; | ||
| 1924 | |||
| 1925 | userpage = kmap_atomic(page, KM_USER0); | ||
| 1926 | memset(userpage + pg_offset, 0, | ||
| 1927 | PAGE_CACHE_SIZE - pg_offset); | ||
| 1928 | kunmap_atomic(userpage, KM_USER0); | ||
| 1929 | flush_dcache_page(page); | ||
| 1930 | } | ||
| 1931 | pg_offset = 0; | ||
| 1932 | |||
| 1933 | set_page_extent_mapped(page); | ||
| 1934 | |||
| 1935 | delalloc_start = start; | ||
| 1936 | delalloc_end = 0; | ||
| 1937 | while(delalloc_end < page_end) { | ||
| 1938 | nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, | ||
| 1939 | &delalloc_end, | ||
| 1940 | 128 * 1024 * 1024); | ||
| 1941 | if (nr_delalloc == 0) { | ||
| 1942 | delalloc_start = delalloc_end + 1; | ||
| 1943 | continue; | ||
| 1944 | } | ||
| 1945 | tree->ops->fill_delalloc(inode, delalloc_start, | ||
| 1946 | delalloc_end); | ||
| 1947 | clear_extent_bit(tree, delalloc_start, | ||
| 1948 | delalloc_end, | ||
| 1949 | EXTENT_LOCKED | EXTENT_DELALLOC, | ||
| 1950 | 1, 0, GFP_NOFS); | ||
| 1951 | delalloc_start = delalloc_end + 1; | ||
| 1952 | } | ||
| 1953 | lock_extent(tree, start, page_end, GFP_NOFS); | ||
| 1954 | unlock_start = start; | ||
| 1955 | |||
| 1956 | if (tree->ops && tree->ops->writepage_start_hook) { | ||
| 1957 | ret = tree->ops->writepage_start_hook(page, start, page_end); | ||
| 1958 | if (ret == -EAGAIN) { | ||
| 1959 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
| 1960 | redirty_page_for_writepage(wbc, page); | ||
| 1961 | unlock_page(page); | ||
| 1962 | return 0; | ||
| 1963 | } | ||
| 1964 | } | ||
| 1965 | |||
| 1966 | end = page_end; | ||
| 1967 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { | ||
| 1968 | printk("found delalloc bits after lock_extent\n"); | ||
| 1969 | } | ||
| 1970 | |||
| 1971 | if (last_byte <= start) { | ||
| 1972 | clear_extent_dirty(tree, start, page_end, GFP_NOFS); | ||
| 1973 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
| 1974 | if (tree->ops && tree->ops->writepage_end_io_hook) | ||
| 1975 | tree->ops->writepage_end_io_hook(page, start, | ||
| 1976 | page_end, NULL, 1); | ||
| 1977 | unlock_start = page_end + 1; | ||
| 1978 | goto done; | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | set_extent_uptodate(tree, start, page_end, GFP_NOFS); | ||
| 1982 | blocksize = inode->i_sb->s_blocksize; | ||
| 1983 | |||
| 1984 | while (cur <= end) { | ||
| 1985 | if (cur >= last_byte) { | ||
| 1986 | clear_extent_dirty(tree, cur, page_end, GFP_NOFS); | ||
| 1987 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
| 1988 | if (tree->ops && tree->ops->writepage_end_io_hook) | ||
| 1989 | tree->ops->writepage_end_io_hook(page, cur, | ||
| 1990 | page_end, NULL, 1); | ||
| 1991 | unlock_start = page_end + 1; | ||
| 1992 | break; | ||
| 1993 | } | ||
| 1994 | em = epd->get_extent(inode, page, pg_offset, cur, | ||
| 1995 | end - cur + 1, 1); | ||
| 1996 | if (IS_ERR(em) || !em) { | ||
| 1997 | SetPageError(page); | ||
| 1998 | break; | ||
| 1999 | } | ||
| 2000 | |||
| 2001 | extent_offset = cur - em->start; | ||
| 2002 | BUG_ON(extent_map_end(em) <= cur); | ||
| 2003 | BUG_ON(end < cur); | ||
| 2004 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | ||
| 2005 | iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); | ||
| 2006 | sector = (em->block_start + extent_offset) >> 9; | ||
| 2007 | bdev = em->bdev; | ||
| 2008 | block_start = em->block_start; | ||
| 2009 | free_extent_map(em); | ||
| 2010 | em = NULL; | ||
| 2011 | |||
| 2012 | if (block_start == EXTENT_MAP_HOLE || | ||
| 2013 | block_start == EXTENT_MAP_INLINE) { | ||
| 2014 | clear_extent_dirty(tree, cur, | ||
| 2015 | cur + iosize - 1, GFP_NOFS); | ||
| 2016 | |||
| 2017 | unlock_extent(tree, unlock_start, cur + iosize -1, | ||
| 2018 | GFP_NOFS); | ||
| 2019 | |||
| 2020 | if (tree->ops && tree->ops->writepage_end_io_hook) | ||
| 2021 | tree->ops->writepage_end_io_hook(page, cur, | ||
| 2022 | cur + iosize - 1, | ||
| 2023 | NULL, 1); | ||
| 2024 | cur = cur + iosize; | ||
| 2025 | pg_offset += iosize; | ||
| 2026 | unlock_start = cur; | ||
| 2027 | continue; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | /* leave this out until we have a page_mkwrite call */ | ||
| 2031 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | ||
| 2032 | EXTENT_DIRTY, 0)) { | ||
| 2033 | cur = cur + iosize; | ||
| 2034 | pg_offset += iosize; | ||
| 2035 | continue; | ||
| 2036 | } | ||
| 2037 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
| 2038 | if (tree->ops && tree->ops->writepage_io_hook) { | ||
| 2039 | ret = tree->ops->writepage_io_hook(page, cur, | ||
| 2040 | cur + iosize - 1); | ||
| 2041 | } else { | ||
| 2042 | ret = 0; | ||
| 2043 | } | ||
| 2044 | if (ret) { | ||
| 2045 | SetPageError(page); | ||
| 2046 | } else { | ||
| 2047 | unsigned long max_nr = end_index + 1; | ||
| 2048 | |||
| 2049 | set_range_writeback(tree, cur, cur + iosize - 1); | ||
| 2050 | if (!PageWriteback(page)) { | ||
| 2051 | printk("warning page %lu not writeback, " | ||
| 2052 | "cur %llu end %llu\n", page->index, | ||
| 2053 | (unsigned long long)cur, | ||
| 2054 | (unsigned long long)end); | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | ret = submit_extent_page(WRITE, tree, page, sector, | ||
| 2058 | iosize, pg_offset, bdev, | ||
| 2059 | &epd->bio, max_nr, | ||
| 2060 | end_bio_extent_writepage, 0); | ||
| 2061 | if (ret) | ||
| 2062 | SetPageError(page); | ||
| 2063 | } | ||
| 2064 | cur = cur + iosize; | ||
| 2065 | pg_offset += iosize; | ||
| 2066 | nr++; | ||
| 2067 | } | ||
| 2068 | done: | ||
| 2069 | if (nr == 0) { | ||
| 2070 | /* make sure the mapping tag for page dirty gets cleared */ | ||
| 2071 | set_page_writeback(page); | ||
| 2072 | end_page_writeback(page); | ||
| 2073 | } | ||
| 2074 | if (unlock_start <= page_end) | ||
| 2075 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
| 2076 | unlock_page(page); | ||
| 2077 | return 0; | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | /** | ||
| 2081 | * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. | ||
| 2082 | * @mapping: address space structure to write | ||
| 2083 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | ||
| 2084 | * @writepage: function called for each page | ||
| 2085 | * @data: data passed to writepage function | ||
| 2086 | * | ||
| 2087 | * If a page is already under I/O, write_cache_pages() skips it, even | ||
| 2088 | * if it's dirty. This is desirable behaviour for memory-cleaning writeback, | ||
| 2089 | * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() | ||
| 2090 | * and msync() need to guarantee that all the data which was dirty at the time | ||
| 2091 | * the call was made get new I/O started against them. If wbc->sync_mode is | ||
| 2092 | * WB_SYNC_ALL then we were called for data integrity and we must wait for | ||
| 2093 | * existing IO to complete. | ||
| 2094 | */ | ||
| 2095 | int extent_write_cache_pages(struct extent_io_tree *tree, | ||
| 2096 | struct address_space *mapping, | ||
| 2097 | struct writeback_control *wbc, | ||
| 2098 | writepage_t writepage, void *data) | ||
| 2099 | { | ||
| 2100 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
| 2101 | int ret = 0; | ||
| 2102 | int done = 0; | ||
| 2103 | struct pagevec pvec; | ||
| 2104 | int nr_pages; | ||
| 2105 | pgoff_t index; | ||
| 2106 | pgoff_t end; /* Inclusive */ | ||
| 2107 | int scanned = 0; | ||
| 2108 | int range_whole = 0; | ||
| 2109 | |||
| 2110 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
| 2111 | wbc->encountered_congestion = 1; | ||
| 2112 | return 0; | ||
| 2113 | } | ||
| 2114 | |||
| 2115 | pagevec_init(&pvec, 0); | ||
| 2116 | if (wbc->range_cyclic) { | ||
| 2117 | index = mapping->writeback_index; /* Start from prev offset */ | ||
| 2118 | end = -1; | ||
| 2119 | } else { | ||
| 2120 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
| 2121 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
| 2122 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
| 2123 | range_whole = 1; | ||
| 2124 | scanned = 1; | ||
| 2125 | } | ||
| 2126 | retry: | ||
| 2127 | while (!done && (index <= end) && | ||
| 2128 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 2129 | PAGECACHE_TAG_DIRTY, | ||
| 2130 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
| 2131 | unsigned i; | ||
| 2132 | |||
| 2133 | scanned = 1; | ||
| 2134 | for (i = 0; i < nr_pages; i++) { | ||
| 2135 | struct page *page = pvec.pages[i]; | ||
| 2136 | |||
| 2137 | /* | ||
| 2138 | * At this point we hold neither mapping->tree_lock nor | ||
| 2139 | * lock on the page itself: the page may be truncated or | ||
| 2140 | * invalidated (changing page->mapping to NULL), or even | ||
| 2141 | * swizzled back from swapper_space to tmpfs file | ||
| 2142 | * mapping | ||
| 2143 | */ | ||
| 2144 | if (tree->ops && tree->ops->write_cache_pages_lock_hook) | ||
| 2145 | tree->ops->write_cache_pages_lock_hook(page); | ||
| 2146 | else | ||
| 2147 | lock_page(page); | ||
| 2148 | |||
| 2149 | if (unlikely(page->mapping != mapping)) { | ||
| 2150 | unlock_page(page); | ||
| 2151 | continue; | ||
| 2152 | } | ||
| 2153 | |||
| 2154 | if (!wbc->range_cyclic && page->index > end) { | ||
| 2155 | done = 1; | ||
| 2156 | unlock_page(page); | ||
| 2157 | continue; | ||
| 2158 | } | ||
| 2159 | |||
| 2160 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
| 2161 | wait_on_page_writeback(page); | ||
| 2162 | |||
| 2163 | if (PageWriteback(page) || | ||
| 2164 | !clear_page_dirty_for_io(page)) { | ||
| 2165 | unlock_page(page); | ||
| 2166 | continue; | ||
| 2167 | } | ||
| 2168 | |||
| 2169 | ret = (*writepage)(page, wbc, data); | ||
| 2170 | |||
| 2171 | if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { | ||
| 2172 | unlock_page(page); | ||
| 2173 | ret = 0; | ||
| 2174 | } | ||
| 2175 | if (ret || (--(wbc->nr_to_write) <= 0)) | ||
| 2176 | done = 1; | ||
| 2177 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
| 2178 | wbc->encountered_congestion = 1; | ||
| 2179 | done = 1; | ||
| 2180 | } | ||
| 2181 | } | ||
| 2182 | pagevec_release(&pvec); | ||
| 2183 | cond_resched(); | ||
| 2184 | } | ||
| 2185 | if (!scanned && !done) { | ||
| 2186 | /* | ||
| 2187 | * We hit the last page and there is more work to be done: wrap | ||
| 2188 | * back to the start of the file | ||
| 2189 | */ | ||
| 2190 | scanned = 1; | ||
| 2191 | index = 0; | ||
| 2192 | goto retry; | ||
| 2193 | } | ||
| 2194 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
| 2195 | mapping->writeback_index = index; | ||
| 2196 | |||
| 2197 | if (wbc->range_cont) | ||
| 2198 | wbc->range_start = index << PAGE_CACHE_SHIFT; | ||
| 2199 | return ret; | ||
| 2200 | } | ||
| 2201 | EXPORT_SYMBOL(extent_write_cache_pages); | ||
| 2202 | |||
| 2203 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | ||
| 2204 | get_extent_t *get_extent, | ||
| 2205 | struct writeback_control *wbc) | ||
| 2206 | { | ||
| 2207 | int ret; | ||
| 2208 | struct address_space *mapping = page->mapping; | ||
| 2209 | struct extent_page_data epd = { | ||
| 2210 | .bio = NULL, | ||
| 2211 | .tree = tree, | ||
| 2212 | .get_extent = get_extent, | ||
| 2213 | }; | ||
| 2214 | struct writeback_control wbc_writepages = { | ||
| 2215 | .bdi = wbc->bdi, | ||
| 2216 | .sync_mode = WB_SYNC_NONE, | ||
| 2217 | .older_than_this = NULL, | ||
| 2218 | .nr_to_write = 64, | ||
| 2219 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | ||
| 2220 | .range_end = (loff_t)-1, | ||
| 2221 | }; | ||
| 2222 | |||
| 2223 | |||
| 2224 | ret = __extent_writepage(page, wbc, &epd); | ||
| 2225 | |||
| 2226 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | ||
| 2227 | __extent_writepage, &epd); | ||
| 2228 | if (epd.bio) { | ||
| 2229 | submit_one_bio(WRITE, epd.bio, 0); | ||
| 2230 | } | ||
| 2231 | return ret; | ||
| 2232 | } | ||
| 2233 | EXPORT_SYMBOL(extent_write_full_page); | ||
| 2234 | |||
| 2235 | |||
| 2236 | int extent_writepages(struct extent_io_tree *tree, | ||
| 2237 | struct address_space *mapping, | ||
| 2238 | get_extent_t *get_extent, | ||
| 2239 | struct writeback_control *wbc) | ||
| 2240 | { | ||
| 2241 | int ret = 0; | ||
| 2242 | struct extent_page_data epd = { | ||
| 2243 | .bio = NULL, | ||
| 2244 | .tree = tree, | ||
| 2245 | .get_extent = get_extent, | ||
| 2246 | }; | ||
| 2247 | |||
| 2248 | ret = extent_write_cache_pages(tree, mapping, wbc, | ||
| 2249 | __extent_writepage, &epd); | ||
| 2250 | if (epd.bio) { | ||
| 2251 | submit_one_bio(WRITE, epd.bio, 0); | ||
| 2252 | } | ||
| 2253 | return ret; | ||
| 2254 | } | ||
| 2255 | EXPORT_SYMBOL(extent_writepages); | ||
| 2256 | |||
| 2257 | int extent_readpages(struct extent_io_tree *tree, | ||
| 2258 | struct address_space *mapping, | ||
| 2259 | struct list_head *pages, unsigned nr_pages, | ||
| 2260 | get_extent_t get_extent) | ||
| 2261 | { | ||
| 2262 | struct bio *bio = NULL; | ||
| 2263 | unsigned page_idx; | ||
| 2264 | struct pagevec pvec; | ||
| 2265 | |||
| 2266 | pagevec_init(&pvec, 0); | ||
| 2267 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | ||
| 2268 | struct page *page = list_entry(pages->prev, struct page, lru); | ||
| 2269 | |||
| 2270 | prefetchw(&page->flags); | ||
| 2271 | list_del(&page->lru); | ||
| 2272 | /* | ||
| 2273 | * what we want to do here is call add_to_page_cache_lru, | ||
| 2274 | * but that isn't exported, so we reproduce it here | ||
| 2275 | */ | ||
| 2276 | if (!add_to_page_cache(page, mapping, | ||
| 2277 | page->index, GFP_KERNEL)) { | ||
| 2278 | |||
| 2279 | /* open coding of lru_cache_add, also not exported */ | ||
| 2280 | page_cache_get(page); | ||
| 2281 | if (!pagevec_add(&pvec, page)) | ||
| 2282 | __pagevec_lru_add(&pvec); | ||
| 2283 | __extent_read_full_page(tree, page, get_extent, | ||
| 2284 | &bio, 0); | ||
| 2285 | } | ||
| 2286 | page_cache_release(page); | ||
| 2287 | } | ||
| 2288 | if (pagevec_count(&pvec)) | ||
| 2289 | __pagevec_lru_add(&pvec); | ||
| 2290 | BUG_ON(!list_empty(pages)); | ||
| 2291 | if (bio) | ||
| 2292 | submit_one_bio(READ, bio, 0); | ||
| 2293 | return 0; | ||
| 2294 | } | ||
| 2295 | EXPORT_SYMBOL(extent_readpages); | ||
| 2296 | |||
| 2297 | /* | ||
| 2298 | * basic invalidatepage code, this waits on any locked or writeback | ||
| 2299 | * ranges corresponding to the page, and then deletes any extent state | ||
| 2300 | * records from the tree | ||
| 2301 | */ | ||
| 2302 | int extent_invalidatepage(struct extent_io_tree *tree, | ||
| 2303 | struct page *page, unsigned long offset) | ||
| 2304 | { | ||
| 2305 | u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); | ||
| 2306 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 2307 | size_t blocksize = page->mapping->host->i_sb->s_blocksize; | ||
| 2308 | |||
| 2309 | start += (offset + blocksize -1) & ~(blocksize - 1); | ||
| 2310 | if (start > end) | ||
| 2311 | return 0; | ||
| 2312 | |||
| 2313 | lock_extent(tree, start, end, GFP_NOFS); | ||
| 2314 | wait_on_extent_writeback(tree, start, end); | ||
| 2315 | clear_extent_bit(tree, start, end, | ||
| 2316 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, | ||
| 2317 | 1, 1, GFP_NOFS); | ||
| 2318 | return 0; | ||
| 2319 | } | ||
| 2320 | EXPORT_SYMBOL(extent_invalidatepage); | ||
| 2321 | |||
| 2322 | /* | ||
| 2323 | * simple commit_write call, set_range_dirty is used to mark both | ||
| 2324 | * the pages and the extent records as dirty | ||
| 2325 | */ | ||
| 2326 | int extent_commit_write(struct extent_io_tree *tree, | ||
| 2327 | struct inode *inode, struct page *page, | ||
| 2328 | unsigned from, unsigned to) | ||
| 2329 | { | ||
| 2330 | loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | ||
| 2331 | |||
| 2332 | set_page_extent_mapped(page); | ||
| 2333 | set_page_dirty(page); | ||
| 2334 | |||
| 2335 | if (pos > inode->i_size) { | ||
| 2336 | i_size_write(inode, pos); | ||
| 2337 | mark_inode_dirty(inode); | ||
| 2338 | } | ||
| 2339 | return 0; | ||
| 2340 | } | ||
| 2341 | EXPORT_SYMBOL(extent_commit_write); | ||
| 2342 | |||
| 2343 | int extent_prepare_write(struct extent_io_tree *tree, | ||
| 2344 | struct inode *inode, struct page *page, | ||
| 2345 | unsigned from, unsigned to, get_extent_t *get_extent) | ||
| 2346 | { | ||
| 2347 | u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 2348 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
| 2349 | u64 block_start; | ||
| 2350 | u64 orig_block_start; | ||
| 2351 | u64 block_end; | ||
| 2352 | u64 cur_end; | ||
| 2353 | struct extent_map *em; | ||
| 2354 | unsigned blocksize = 1 << inode->i_blkbits; | ||
| 2355 | size_t page_offset = 0; | ||
| 2356 | size_t block_off_start; | ||
| 2357 | size_t block_off_end; | ||
| 2358 | int err = 0; | ||
| 2359 | int iocount = 0; | ||
| 2360 | int ret = 0; | ||
| 2361 | int isnew; | ||
| 2362 | |||
| 2363 | set_page_extent_mapped(page); | ||
| 2364 | |||
| 2365 | block_start = (page_start + from) & ~((u64)blocksize - 1); | ||
| 2366 | block_end = (page_start + to - 1) | (blocksize - 1); | ||
| 2367 | orig_block_start = block_start; | ||
| 2368 | |||
| 2369 | lock_extent(tree, page_start, page_end, GFP_NOFS); | ||
| 2370 | while(block_start <= block_end) { | ||
| 2371 | em = get_extent(inode, page, page_offset, block_start, | ||
| 2372 | block_end - block_start + 1, 1); | ||
| 2373 | if (IS_ERR(em) || !em) { | ||
| 2374 | goto err; | ||
| 2375 | } | ||
| 2376 | cur_end = min(block_end, extent_map_end(em) - 1); | ||
| 2377 | block_off_start = block_start & (PAGE_CACHE_SIZE - 1); | ||
| 2378 | block_off_end = block_off_start + blocksize; | ||
| 2379 | isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); | ||
| 2380 | |||
| 2381 | if (!PageUptodate(page) && isnew && | ||
| 2382 | (block_off_end > to || block_off_start < from)) { | ||
| 2383 | void *kaddr; | ||
| 2384 | |||
| 2385 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 2386 | if (block_off_end > to) | ||
| 2387 | memset(kaddr + to, 0, block_off_end - to); | ||
| 2388 | if (block_off_start < from) | ||
| 2389 | memset(kaddr + block_off_start, 0, | ||
| 2390 | from - block_off_start); | ||
| 2391 | flush_dcache_page(page); | ||
| 2392 | kunmap_atomic(kaddr, KM_USER0); | ||
| 2393 | } | ||
| 2394 | if ((em->block_start != EXTENT_MAP_HOLE && | ||
| 2395 | em->block_start != EXTENT_MAP_INLINE) && | ||
| 2396 | !isnew && !PageUptodate(page) && | ||
| 2397 | (block_off_end > to || block_off_start < from) && | ||
| 2398 | !test_range_bit(tree, block_start, cur_end, | ||
| 2399 | EXTENT_UPTODATE, 1)) { | ||
| 2400 | u64 sector; | ||
| 2401 | u64 extent_offset = block_start - em->start; | ||
| 2402 | size_t iosize; | ||
| 2403 | sector = (em->block_start + extent_offset) >> 9; | ||
| 2404 | iosize = (cur_end - block_start + blocksize) & | ||
| 2405 | ~((u64)blocksize - 1); | ||
| 2406 | /* | ||
| 2407 | * we've already got the extent locked, but we | ||
| 2408 | * need to split the state such that our end_bio | ||
| 2409 | * handler can clear the lock. | ||
| 2410 | */ | ||
| 2411 | set_extent_bit(tree, block_start, | ||
| 2412 | block_start + iosize - 1, | ||
| 2413 | EXTENT_LOCKED, 0, NULL, GFP_NOFS); | ||
| 2414 | ret = submit_extent_page(READ, tree, page, | ||
| 2415 | sector, iosize, page_offset, em->bdev, | ||
| 2416 | NULL, 1, | ||
| 2417 | end_bio_extent_preparewrite, 0); | ||
| 2418 | iocount++; | ||
| 2419 | block_start = block_start + iosize; | ||
| 2420 | } else { | ||
| 2421 | set_extent_uptodate(tree, block_start, cur_end, | ||
| 2422 | GFP_NOFS); | ||
| 2423 | unlock_extent(tree, block_start, cur_end, GFP_NOFS); | ||
| 2424 | block_start = cur_end + 1; | ||
| 2425 | } | ||
| 2426 | page_offset = block_start & (PAGE_CACHE_SIZE - 1); | ||
| 2427 | free_extent_map(em); | ||
| 2428 | } | ||
| 2429 | if (iocount) { | ||
| 2430 | wait_extent_bit(tree, orig_block_start, | ||
| 2431 | block_end, EXTENT_LOCKED); | ||
| 2432 | } | ||
| 2433 | check_page_uptodate(tree, page); | ||
| 2434 | err: | ||
| 2435 | /* FIXME, zero out newly allocated blocks on error */ | ||
| 2436 | return err; | ||
| 2437 | } | ||
| 2438 | EXPORT_SYMBOL(extent_prepare_write); | ||
| 2439 | |||
| 2440 | /* | ||
| 2441 | * a helper for releasepage, this tests for areas of the page that | ||
| 2442 | * are locked or under IO and drops the related state bits if it is safe | ||
| 2443 | * to drop the page. | ||
| 2444 | */ | ||
| 2445 | int try_release_extent_state(struct extent_map_tree *map, | ||
| 2446 | struct extent_io_tree *tree, struct page *page, | ||
| 2447 | gfp_t mask) | ||
| 2448 | { | ||
| 2449 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 2450 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 2451 | int ret = 1; | ||
| 2452 | |||
| 2453 | if (test_range_bit(tree, start, end, | ||
| 2454 | EXTENT_IOBITS | EXTENT_ORDERED, 0)) | ||
| 2455 | ret = 0; | ||
| 2456 | else { | ||
| 2457 | if ((mask & GFP_NOFS) == GFP_NOFS) | ||
| 2458 | mask = GFP_NOFS; | ||
| 2459 | clear_extent_bit(tree, start, end, EXTENT_UPTODATE, | ||
| 2460 | 1, 1, mask); | ||
| 2461 | } | ||
| 2462 | return ret; | ||
| 2463 | } | ||
| 2464 | EXPORT_SYMBOL(try_release_extent_state); | ||
| 2465 | |||
| 2466 | /* | ||
| 2467 | * a helper for releasepage. As long as there are no locked extents | ||
| 2468 | * in the range corresponding to the page, both state records and extent | ||
| 2469 | * map records are removed | ||
| 2470 | */ | ||
| 2471 | int try_release_extent_mapping(struct extent_map_tree *map, | ||
| 2472 | struct extent_io_tree *tree, struct page *page, | ||
| 2473 | gfp_t mask) | ||
| 2474 | { | ||
| 2475 | struct extent_map *em; | ||
| 2476 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 2477 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 2478 | |||
| 2479 | if ((mask & __GFP_WAIT) && | ||
| 2480 | page->mapping->host->i_size > 16 * 1024 * 1024) { | ||
| 2481 | u64 len; | ||
| 2482 | while (start <= end) { | ||
| 2483 | len = end - start + 1; | ||
| 2484 | spin_lock(&map->lock); | ||
| 2485 | em = lookup_extent_mapping(map, start, len); | ||
| 2486 | if (!em || IS_ERR(em)) { | ||
| 2487 | spin_unlock(&map->lock); | ||
| 2488 | break; | ||
| 2489 | } | ||
| 2490 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || | ||
| 2491 | em->start != start) { | ||
| 2492 | spin_unlock(&map->lock); | ||
| 2493 | free_extent_map(em); | ||
| 2494 | break; | ||
| 2495 | } | ||
| 2496 | if (!test_range_bit(tree, em->start, | ||
| 2497 | extent_map_end(em) - 1, | ||
| 2498 | EXTENT_LOCKED, 0)) { | ||
| 2499 | remove_extent_mapping(map, em); | ||
| 2500 | /* once for the rb tree */ | ||
| 2501 | free_extent_map(em); | ||
| 2502 | } | ||
| 2503 | start = extent_map_end(em); | ||
| 2504 | spin_unlock(&map->lock); | ||
| 2505 | |||
| 2506 | /* once for us */ | ||
| 2507 | free_extent_map(em); | ||
| 2508 | } | ||
| 2509 | } | ||
| 2510 | return try_release_extent_state(map, tree, page, mask); | ||
| 2511 | } | ||
| 2512 | EXPORT_SYMBOL(try_release_extent_mapping); | ||
| 2513 | |||
| 2514 | sector_t extent_bmap(struct address_space *mapping, sector_t iblock, | ||
| 2515 | get_extent_t *get_extent) | ||
| 2516 | { | ||
| 2517 | struct inode *inode = mapping->host; | ||
| 2518 | u64 start = iblock << inode->i_blkbits; | ||
| 2519 | sector_t sector = 0; | ||
| 2520 | struct extent_map *em; | ||
| 2521 | |||
| 2522 | em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0); | ||
| 2523 | if (!em || IS_ERR(em)) | ||
| 2524 | return 0; | ||
| 2525 | |||
| 2526 | if (em->block_start == EXTENT_MAP_INLINE || | ||
| 2527 | em->block_start == EXTENT_MAP_HOLE) | ||
| 2528 | goto out; | ||
| 2529 | |||
| 2530 | sector = (em->block_start + start - em->start) >> inode->i_blkbits; | ||
| 2531 | out: | ||
| 2532 | free_extent_map(em); | ||
| 2533 | return sector; | ||
| 2534 | } | ||
| 2535 | |||
| 2536 | static inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
| 2537 | unsigned long i) | ||
| 2538 | { | ||
| 2539 | struct page *p; | ||
| 2540 | struct address_space *mapping; | ||
| 2541 | |||
| 2542 | if (i == 0) | ||
| 2543 | return eb->first_page; | ||
| 2544 | i += eb->start >> PAGE_CACHE_SHIFT; | ||
| 2545 | mapping = eb->first_page->mapping; | ||
| 2546 | if (!mapping) | ||
| 2547 | return NULL; | ||
| 2548 | |||
| 2549 | /* | ||
| 2550 | * extent_buffer_page is only called after pinning the page | ||
| 2551 | * by increasing the reference count. So we know the page must | ||
| 2552 | * be in the radix tree. | ||
| 2553 | */ | ||
| 2554 | rcu_read_lock(); | ||
| 2555 | p = radix_tree_lookup(&mapping->page_tree, i); | ||
| 2556 | rcu_read_unlock(); | ||
| 2557 | |||
| 2558 | return p; | ||
| 2559 | } | ||
| 2560 | |||
| 2561 | static inline unsigned long num_extent_pages(u64 start, u64 len) | ||
| 2562 | { | ||
| 2563 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | ||
| 2564 | (start >> PAGE_CACHE_SHIFT); | ||
| 2565 | } | ||
| 2566 | |||
| 2567 | static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | ||
| 2568 | u64 start, | ||
| 2569 | unsigned long len, | ||
| 2570 | gfp_t mask) | ||
| 2571 | { | ||
| 2572 | struct extent_buffer *eb = NULL; | ||
| 2573 | #ifdef LEAK_DEBUG | ||
| 2574 | unsigned long flags; | ||
| 2575 | #endif | ||
| 2576 | |||
| 2577 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | ||
| 2578 | eb->start = start; | ||
| 2579 | eb->len = len; | ||
| 2580 | mutex_init(&eb->mutex); | ||
| 2581 | #ifdef LEAK_DEBUG | ||
| 2582 | spin_lock_irqsave(&leak_lock, flags); | ||
| 2583 | list_add(&eb->leak_list, &buffers); | ||
| 2584 | spin_unlock_irqrestore(&leak_lock, flags); | ||
| 2585 | #endif | ||
| 2586 | atomic_set(&eb->refs, 1); | ||
| 2587 | |||
| 2588 | return eb; | ||
| 2589 | } | ||
| 2590 | |||
| 2591 | static void __free_extent_buffer(struct extent_buffer *eb) | ||
| 2592 | { | ||
| 2593 | #ifdef LEAK_DEBUG | ||
| 2594 | unsigned long flags; | ||
| 2595 | spin_lock_irqsave(&leak_lock, flags); | ||
| 2596 | list_del(&eb->leak_list); | ||
| 2597 | spin_unlock_irqrestore(&leak_lock, flags); | ||
| 2598 | #endif | ||
| 2599 | kmem_cache_free(extent_buffer_cache, eb); | ||
| 2600 | } | ||
| 2601 | |||
| 2602 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | ||
| 2603 | u64 start, unsigned long len, | ||
| 2604 | struct page *page0, | ||
| 2605 | gfp_t mask) | ||
| 2606 | { | ||
| 2607 | unsigned long num_pages = num_extent_pages(start, len); | ||
| 2608 | unsigned long i; | ||
| 2609 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 2610 | struct extent_buffer *eb; | ||
| 2611 | struct extent_buffer *exists = NULL; | ||
| 2612 | struct page *p; | ||
| 2613 | struct address_space *mapping = tree->mapping; | ||
| 2614 | int uptodate = 1; | ||
| 2615 | |||
| 2616 | spin_lock(&tree->buffer_lock); | ||
| 2617 | eb = buffer_search(tree, start); | ||
| 2618 | if (eb) { | ||
| 2619 | atomic_inc(&eb->refs); | ||
| 2620 | spin_unlock(&tree->buffer_lock); | ||
| 2621 | mark_page_accessed(eb->first_page); | ||
| 2622 | return eb; | ||
| 2623 | } | ||
| 2624 | spin_unlock(&tree->buffer_lock); | ||
| 2625 | |||
| 2626 | eb = __alloc_extent_buffer(tree, start, len, mask); | ||
| 2627 | if (!eb) | ||
| 2628 | return NULL; | ||
| 2629 | |||
| 2630 | if (page0) { | ||
| 2631 | eb->first_page = page0; | ||
| 2632 | i = 1; | ||
| 2633 | index++; | ||
| 2634 | page_cache_get(page0); | ||
| 2635 | mark_page_accessed(page0); | ||
| 2636 | set_page_extent_mapped(page0); | ||
| 2637 | set_page_extent_head(page0, len); | ||
| 2638 | uptodate = PageUptodate(page0); | ||
| 2639 | } else { | ||
| 2640 | i = 0; | ||
| 2641 | } | ||
| 2642 | for (; i < num_pages; i++, index++) { | ||
| 2643 | p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); | ||
| 2644 | if (!p) { | ||
| 2645 | WARN_ON(1); | ||
| 2646 | goto free_eb; | ||
| 2647 | } | ||
| 2648 | set_page_extent_mapped(p); | ||
| 2649 | mark_page_accessed(p); | ||
| 2650 | if (i == 0) { | ||
| 2651 | eb->first_page = p; | ||
| 2652 | set_page_extent_head(p, len); | ||
| 2653 | } else { | ||
| 2654 | set_page_private(p, EXTENT_PAGE_PRIVATE); | ||
| 2655 | } | ||
| 2656 | if (!PageUptodate(p)) | ||
| 2657 | uptodate = 0; | ||
| 2658 | unlock_page(p); | ||
| 2659 | } | ||
| 2660 | if (uptodate) | ||
| 2661 | eb->flags |= EXTENT_UPTODATE; | ||
| 2662 | eb->flags |= EXTENT_BUFFER_FILLED; | ||
| 2663 | |||
| 2664 | spin_lock(&tree->buffer_lock); | ||
| 2665 | exists = buffer_tree_insert(tree, start, &eb->rb_node); | ||
| 2666 | if (exists) { | ||
| 2667 | /* add one reference for the caller */ | ||
| 2668 | atomic_inc(&exists->refs); | ||
| 2669 | spin_unlock(&tree->buffer_lock); | ||
| 2670 | goto free_eb; | ||
| 2671 | } | ||
| 2672 | spin_unlock(&tree->buffer_lock); | ||
| 2673 | |||
| 2674 | /* add one reference for the tree */ | ||
| 2675 | atomic_inc(&eb->refs); | ||
| 2676 | return eb; | ||
| 2677 | |||
| 2678 | free_eb: | ||
| 2679 | if (!atomic_dec_and_test(&eb->refs)) | ||
| 2680 | return exists; | ||
| 2681 | for (index = 1; index < i; index++) | ||
| 2682 | page_cache_release(extent_buffer_page(eb, index)); | ||
| 2683 | page_cache_release(extent_buffer_page(eb, 0)); | ||
| 2684 | __free_extent_buffer(eb); | ||
| 2685 | return exists; | ||
| 2686 | } | ||
| 2687 | EXPORT_SYMBOL(alloc_extent_buffer); | ||
| 2688 | |||
| 2689 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | ||
| 2690 | u64 start, unsigned long len, | ||
| 2691 | gfp_t mask) | ||
| 2692 | { | ||
| 2693 | struct extent_buffer *eb; | ||
| 2694 | |||
| 2695 | spin_lock(&tree->buffer_lock); | ||
| 2696 | eb = buffer_search(tree, start); | ||
| 2697 | if (eb) | ||
| 2698 | atomic_inc(&eb->refs); | ||
| 2699 | spin_unlock(&tree->buffer_lock); | ||
| 2700 | |||
| 2701 | if (eb) | ||
| 2702 | mark_page_accessed(eb->first_page); | ||
| 2703 | |||
| 2704 | return eb; | ||
| 2705 | } | ||
| 2706 | EXPORT_SYMBOL(find_extent_buffer); | ||
| 2707 | |||
| 2708 | void free_extent_buffer(struct extent_buffer *eb) | ||
| 2709 | { | ||
| 2710 | if (!eb) | ||
| 2711 | return; | ||
| 2712 | |||
| 2713 | if (!atomic_dec_and_test(&eb->refs)) | ||
| 2714 | return; | ||
| 2715 | |||
| 2716 | WARN_ON(1); | ||
| 2717 | } | ||
| 2718 | EXPORT_SYMBOL(free_extent_buffer); | ||
| 2719 | |||
| 2720 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, | ||
| 2721 | struct extent_buffer *eb) | ||
| 2722 | { | ||
| 2723 | int set; | ||
| 2724 | unsigned long i; | ||
| 2725 | unsigned long num_pages; | ||
| 2726 | struct page *page; | ||
| 2727 | |||
| 2728 | u64 start = eb->start; | ||
| 2729 | u64 end = start + eb->len - 1; | ||
| 2730 | |||
| 2731 | set = clear_extent_dirty(tree, start, end, GFP_NOFS); | ||
| 2732 | num_pages = num_extent_pages(eb->start, eb->len); | ||
| 2733 | |||
| 2734 | for (i = 0; i < num_pages; i++) { | ||
| 2735 | page = extent_buffer_page(eb, i); | ||
| 2736 | lock_page(page); | ||
| 2737 | if (i == 0) | ||
| 2738 | set_page_extent_head(page, eb->len); | ||
| 2739 | else | ||
| 2740 | set_page_private(page, EXTENT_PAGE_PRIVATE); | ||
| 2741 | |||
| 2742 | /* | ||
| 2743 | * if we're on the last page or the first page and the | ||
| 2744 | * block isn't aligned on a page boundary, do extra checks | ||
| 2745 | * to make sure we don't clean page that is partially dirty | ||
| 2746 | */ | ||
| 2747 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | ||
| 2748 | ((i == num_pages - 1) && | ||
| 2749 | ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { | ||
| 2750 | start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 2751 | end = start + PAGE_CACHE_SIZE - 1; | ||
| 2752 | if (test_range_bit(tree, start, end, | ||
| 2753 | EXTENT_DIRTY, 0)) { | ||
| 2754 | unlock_page(page); | ||
| 2755 | continue; | ||
| 2756 | } | ||
| 2757 | } | ||
| 2758 | clear_page_dirty_for_io(page); | ||
| 2759 | spin_lock_irq(&page->mapping->tree_lock); | ||
| 2760 | if (!PageDirty(page)) { | ||
| 2761 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
| 2762 | page_index(page), | ||
| 2763 | PAGECACHE_TAG_DIRTY); | ||
| 2764 | } | ||
| 2765 | spin_unlock_irq(&page->mapping->tree_lock); | ||
| 2766 | unlock_page(page); | ||
| 2767 | } | ||
| 2768 | return 0; | ||
| 2769 | } | ||
| 2770 | EXPORT_SYMBOL(clear_extent_buffer_dirty); | ||
| 2771 | |||
| 2772 | int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, | ||
| 2773 | struct extent_buffer *eb) | ||
| 2774 | { | ||
| 2775 | return wait_on_extent_writeback(tree, eb->start, | ||
| 2776 | eb->start + eb->len - 1); | ||
| 2777 | } | ||
| 2778 | EXPORT_SYMBOL(wait_on_extent_buffer_writeback); | ||
| 2779 | |||
| 2780 | int set_extent_buffer_dirty(struct extent_io_tree *tree, | ||
| 2781 | struct extent_buffer *eb) | ||
| 2782 | { | ||
| 2783 | unsigned long i; | ||
| 2784 | unsigned long num_pages; | ||
| 2785 | |||
| 2786 | num_pages = num_extent_pages(eb->start, eb->len); | ||
| 2787 | for (i = 0; i < num_pages; i++) { | ||
| 2788 | struct page *page = extent_buffer_page(eb, i); | ||
| 2789 | /* writepage may need to do something special for the | ||
| 2790 | * first page, we have to make sure page->private is | ||
| 2791 | * properly set. releasepage may drop page->private | ||
| 2792 | * on us if the page isn't already dirty. | ||
| 2793 | */ | ||
| 2794 | lock_page(page); | ||
| 2795 | if (i == 0) { | ||
| 2796 | set_page_extent_head(page, eb->len); | ||
| 2797 | } else if (PagePrivate(page) && | ||
| 2798 | page->private != EXTENT_PAGE_PRIVATE) { | ||
| 2799 | set_page_extent_mapped(page); | ||
| 2800 | } | ||
| 2801 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); | ||
| 2802 | set_extent_dirty(tree, page_offset(page), | ||
| 2803 | page_offset(page) + PAGE_CACHE_SIZE -1, | ||
| 2804 | GFP_NOFS); | ||
| 2805 | unlock_page(page); | ||
| 2806 | } | ||
| 2807 | return 0; | ||
| 2808 | } | ||
| 2809 | EXPORT_SYMBOL(set_extent_buffer_dirty); | ||
| 2810 | |||
| 2811 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | ||
| 2812 | struct extent_buffer *eb) | ||
| 2813 | { | ||
| 2814 | unsigned long i; | ||
| 2815 | struct page *page; | ||
| 2816 | unsigned long num_pages; | ||
| 2817 | |||
| 2818 | num_pages = num_extent_pages(eb->start, eb->len); | ||
| 2819 | eb->flags &= ~EXTENT_UPTODATE; | ||
| 2820 | |||
| 2821 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | ||
| 2822 | GFP_NOFS); | ||
| 2823 | for (i = 0; i < num_pages; i++) { | ||
| 2824 | page = extent_buffer_page(eb, i); | ||
| 2825 | if (page) | ||
| 2826 | ClearPageUptodate(page); | ||
| 2827 | } | ||
| 2828 | return 0; | ||
| 2829 | } | ||
| 2830 | |||
| 2831 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, | ||
| 2832 | struct extent_buffer *eb) | ||
| 2833 | { | ||
| 2834 | unsigned long i; | ||
| 2835 | struct page *page; | ||
| 2836 | unsigned long num_pages; | ||
| 2837 | |||
| 2838 | num_pages = num_extent_pages(eb->start, eb->len); | ||
| 2839 | |||
| 2840 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | ||
| 2841 | GFP_NOFS); | ||
| 2842 | for (i = 0; i < num_pages; i++) { | ||
| 2843 | page = extent_buffer_page(eb, i); | ||
| 2844 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | ||
| 2845 | ((i == num_pages - 1) && | ||
| 2846 | ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { | ||
| 2847 | check_page_uptodate(tree, page); | ||
| 2848 | continue; | ||
| 2849 | } | ||
| 2850 | SetPageUptodate(page); | ||
| 2851 | } | ||
| 2852 | return 0; | ||
| 2853 | } | ||
| 2854 | EXPORT_SYMBOL(set_extent_buffer_uptodate); | ||
| 2855 | |||
| 2856 | int extent_range_uptodate(struct extent_io_tree *tree, | ||
| 2857 | u64 start, u64 end) | ||
| 2858 | { | ||
| 2859 | struct page *page; | ||
| 2860 | int ret; | ||
| 2861 | int pg_uptodate = 1; | ||
| 2862 | int uptodate; | ||
| 2863 | unsigned long index; | ||
| 2864 | |||
| 2865 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); | ||
| 2866 | if (ret) | ||
| 2867 | return 1; | ||
| 2868 | while(start <= end) { | ||
| 2869 | index = start >> PAGE_CACHE_SHIFT; | ||
| 2870 | page = find_get_page(tree->mapping, index); | ||
| 2871 | uptodate = PageUptodate(page); | ||
| 2872 | page_cache_release(page); | ||
| 2873 | if (!uptodate) { | ||
| 2874 | pg_uptodate = 0; | ||
| 2875 | break; | ||
| 2876 | } | ||
| 2877 | start += PAGE_CACHE_SIZE; | ||
| 2878 | } | ||
| 2879 | return pg_uptodate; | ||
| 2880 | } | ||
| 2881 | |||
| 2882 | int extent_buffer_uptodate(struct extent_io_tree *tree, | ||
| 2883 | struct extent_buffer *eb) | ||
| 2884 | { | ||
| 2885 | int ret = 0; | ||
| 2886 | unsigned long num_pages; | ||
| 2887 | unsigned long i; | ||
| 2888 | struct page *page; | ||
| 2889 | int pg_uptodate = 1; | ||
| 2890 | |||
| 2891 | if (eb->flags & EXTENT_UPTODATE) | ||
| 2892 | return 1; | ||
| 2893 | |||
| 2894 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | ||
| 2895 | EXTENT_UPTODATE, 1); | ||
| 2896 | if (ret) | ||
| 2897 | return ret; | ||
| 2898 | |||
| 2899 | num_pages = num_extent_pages(eb->start, eb->len); | ||
| 2900 | for (i = 0; i < num_pages; i++) { | ||
| 2901 | page = extent_buffer_page(eb, i); | ||
| 2902 | if (!PageUptodate(page)) { | ||
| 2903 | pg_uptodate = 0; | ||
| 2904 | break; | ||
| 2905 | } | ||
| 2906 | } | ||
| 2907 | return pg_uptodate; | ||
| 2908 | } | ||
| 2909 | EXPORT_SYMBOL(extent_buffer_uptodate); | ||
| 2910 | |||
| 2911 | int read_extent_buffer_pages(struct extent_io_tree *tree, | ||
| 2912 | struct extent_buffer *eb, | ||
| 2913 | u64 start, int wait, | ||
| 2914 | get_extent_t *get_extent, int mirror_num) | ||
| 2915 | { | ||
| 2916 | unsigned long i; | ||
| 2917 | unsigned long start_i; | ||
| 2918 | struct page *page; | ||
| 2919 | int err; | ||
| 2920 | int ret = 0; | ||
| 2921 | int locked_pages = 0; | ||
| 2922 | int all_uptodate = 1; | ||
| 2923 | int inc_all_pages = 0; | ||
| 2924 | unsigned long num_pages; | ||
| 2925 | struct bio *bio = NULL; | ||
| 2926 | |||
| 2927 | if (eb->flags & EXTENT_UPTODATE) | ||
| 2928 | return 0; | ||
| 2929 | |||
| 2930 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | ||
| 2931 | EXTENT_UPTODATE, 1)) { | ||
| 2932 | return 0; | ||
| 2933 | } | ||
| 2934 | |||
| 2935 | if (start) { | ||
| 2936 | WARN_ON(start < eb->start); | ||
| 2937 | start_i = (start >> PAGE_CACHE_SHIFT) - | ||
| 2938 | (eb->start >> PAGE_CACHE_SHIFT); | ||
| 2939 | } else { | ||
| 2940 | start_i = 0; | ||
| 2941 | } | ||
| 2942 | |||
| 2943 | num_pages = num_extent_pages(eb->start, eb->len); | ||
| 2944 | for (i = start_i; i < num_pages; i++) { | ||
| 2945 | page = extent_buffer_page(eb, i); | ||
| 2946 | if (!wait) { | ||
| 2947 | if (!trylock_page(page)) | ||
| 2948 | goto unlock_exit; | ||
| 2949 | } else { | ||
| 2950 | lock_page(page); | ||
| 2951 | } | ||
| 2952 | locked_pages++; | ||
| 2953 | if (!PageUptodate(page)) { | ||
| 2954 | all_uptodate = 0; | ||
| 2955 | } | ||
| 2956 | } | ||
| 2957 | if (all_uptodate) { | ||
| 2958 | if (start_i == 0) | ||
| 2959 | eb->flags |= EXTENT_UPTODATE; | ||
| 2960 | if (ret) { | ||
| 2961 | printk("all up to date but ret is %d\n", ret); | ||
| 2962 | } | ||
| 2963 | goto unlock_exit; | ||
| 2964 | } | ||
| 2965 | |||
| 2966 | for (i = start_i; i < num_pages; i++) { | ||
| 2967 | page = extent_buffer_page(eb, i); | ||
| 2968 | if (inc_all_pages) | ||
| 2969 | page_cache_get(page); | ||
| 2970 | if (!PageUptodate(page)) { | ||
| 2971 | if (start_i == 0) | ||
| 2972 | inc_all_pages = 1; | ||
| 2973 | ClearPageError(page); | ||
| 2974 | err = __extent_read_full_page(tree, page, | ||
| 2975 | get_extent, &bio, | ||
| 2976 | mirror_num); | ||
| 2977 | if (err) { | ||
| 2978 | ret = err; | ||
| 2979 | printk("err %d from __extent_read_full_page\n", ret); | ||
| 2980 | } | ||
| 2981 | } else { | ||
| 2982 | unlock_page(page); | ||
| 2983 | } | ||
| 2984 | } | ||
| 2985 | |||
| 2986 | if (bio) | ||
| 2987 | submit_one_bio(READ, bio, mirror_num); | ||
| 2988 | |||
| 2989 | if (ret || !wait) { | ||
| 2990 | if (ret) | ||
| 2991 | printk("ret %d wait %d returning\n", ret, wait); | ||
| 2992 | return ret; | ||
| 2993 | } | ||
| 2994 | for (i = start_i; i < num_pages; i++) { | ||
| 2995 | page = extent_buffer_page(eb, i); | ||
| 2996 | wait_on_page_locked(page); | ||
| 2997 | if (!PageUptodate(page)) { | ||
| 2998 | printk("page not uptodate after wait_on_page_locked\n"); | ||
| 2999 | ret = -EIO; | ||
| 3000 | } | ||
| 3001 | } | ||
| 3002 | if (!ret) | ||
| 3003 | eb->flags |= EXTENT_UPTODATE; | ||
| 3004 | return ret; | ||
| 3005 | |||
| 3006 | unlock_exit: | ||
| 3007 | i = start_i; | ||
| 3008 | while(locked_pages > 0) { | ||
| 3009 | page = extent_buffer_page(eb, i); | ||
| 3010 | i++; | ||
| 3011 | unlock_page(page); | ||
| 3012 | locked_pages--; | ||
| 3013 | } | ||
| 3014 | return ret; | ||
| 3015 | } | ||
| 3016 | EXPORT_SYMBOL(read_extent_buffer_pages); | ||
| 3017 | |||
| 3018 | void read_extent_buffer(struct extent_buffer *eb, void *dstv, | ||
| 3019 | unsigned long start, | ||
| 3020 | unsigned long len) | ||
| 3021 | { | ||
| 3022 | size_t cur; | ||
| 3023 | size_t offset; | ||
| 3024 | struct page *page; | ||
| 3025 | char *kaddr; | ||
| 3026 | char *dst = (char *)dstv; | ||
| 3027 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 3028 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
| 3029 | |||
| 3030 | WARN_ON(start > eb->len); | ||
| 3031 | WARN_ON(start + len > eb->start + eb->len); | ||
| 3032 | |||
| 3033 | offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3034 | |||
| 3035 | while(len > 0) { | ||
| 3036 | page = extent_buffer_page(eb, i); | ||
| 3037 | |||
| 3038 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | ||
| 3039 | kaddr = kmap_atomic(page, KM_USER1); | ||
| 3040 | memcpy(dst, kaddr + offset, cur); | ||
| 3041 | kunmap_atomic(kaddr, KM_USER1); | ||
| 3042 | |||
| 3043 | dst += cur; | ||
| 3044 | len -= cur; | ||
| 3045 | offset = 0; | ||
| 3046 | i++; | ||
| 3047 | } | ||
| 3048 | } | ||
| 3049 | EXPORT_SYMBOL(read_extent_buffer); | ||
| 3050 | |||
| 3051 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | ||
| 3052 | unsigned long min_len, char **token, char **map, | ||
| 3053 | unsigned long *map_start, | ||
| 3054 | unsigned long *map_len, int km) | ||
| 3055 | { | ||
| 3056 | size_t offset = start & (PAGE_CACHE_SIZE - 1); | ||
| 3057 | char *kaddr; | ||
| 3058 | struct page *p; | ||
| 3059 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 3060 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
| 3061 | unsigned long end_i = (start_offset + start + min_len - 1) >> | ||
| 3062 | PAGE_CACHE_SHIFT; | ||
| 3063 | |||
| 3064 | if (i != end_i) | ||
| 3065 | return -EINVAL; | ||
| 3066 | |||
| 3067 | if (i == 0) { | ||
| 3068 | offset = start_offset; | ||
| 3069 | *map_start = 0; | ||
| 3070 | } else { | ||
| 3071 | offset = 0; | ||
| 3072 | *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; | ||
| 3073 | } | ||
| 3074 | if (start + min_len > eb->len) { | ||
| 3075 | printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); | ||
| 3076 | WARN_ON(1); | ||
| 3077 | } | ||
| 3078 | |||
| 3079 | p = extent_buffer_page(eb, i); | ||
| 3080 | kaddr = kmap_atomic(p, km); | ||
| 3081 | *token = kaddr; | ||
| 3082 | *map = kaddr + offset; | ||
| 3083 | *map_len = PAGE_CACHE_SIZE - offset; | ||
| 3084 | return 0; | ||
| 3085 | } | ||
| 3086 | EXPORT_SYMBOL(map_private_extent_buffer); | ||
| 3087 | |||
| 3088 | int map_extent_buffer(struct extent_buffer *eb, unsigned long start, | ||
| 3089 | unsigned long min_len, | ||
| 3090 | char **token, char **map, | ||
| 3091 | unsigned long *map_start, | ||
| 3092 | unsigned long *map_len, int km) | ||
| 3093 | { | ||
| 3094 | int err; | ||
| 3095 | int save = 0; | ||
| 3096 | if (eb->map_token) { | ||
| 3097 | unmap_extent_buffer(eb, eb->map_token, km); | ||
| 3098 | eb->map_token = NULL; | ||
| 3099 | save = 1; | ||
| 3100 | } | ||
| 3101 | err = map_private_extent_buffer(eb, start, min_len, token, map, | ||
| 3102 | map_start, map_len, km); | ||
| 3103 | if (!err && save) { | ||
| 3104 | eb->map_token = *token; | ||
| 3105 | eb->kaddr = *map; | ||
| 3106 | eb->map_start = *map_start; | ||
| 3107 | eb->map_len = *map_len; | ||
| 3108 | } | ||
| 3109 | return err; | ||
| 3110 | } | ||
| 3111 | EXPORT_SYMBOL(map_extent_buffer); | ||
| 3112 | |||
| 3113 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) | ||
| 3114 | { | ||
| 3115 | kunmap_atomic(token, km); | ||
| 3116 | } | ||
| 3117 | EXPORT_SYMBOL(unmap_extent_buffer); | ||
| 3118 | |||
| 3119 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | ||
| 3120 | unsigned long start, | ||
| 3121 | unsigned long len) | ||
| 3122 | { | ||
| 3123 | size_t cur; | ||
| 3124 | size_t offset; | ||
| 3125 | struct page *page; | ||
| 3126 | char *kaddr; | ||
| 3127 | char *ptr = (char *)ptrv; | ||
| 3128 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 3129 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
| 3130 | int ret = 0; | ||
| 3131 | |||
| 3132 | WARN_ON(start > eb->len); | ||
| 3133 | WARN_ON(start + len > eb->start + eb->len); | ||
| 3134 | |||
| 3135 | offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3136 | |||
| 3137 | while(len > 0) { | ||
| 3138 | page = extent_buffer_page(eb, i); | ||
| 3139 | |||
| 3140 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | ||
| 3141 | |||
| 3142 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 3143 | ret = memcmp(ptr, kaddr + offset, cur); | ||
| 3144 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3145 | if (ret) | ||
| 3146 | break; | ||
| 3147 | |||
| 3148 | ptr += cur; | ||
| 3149 | len -= cur; | ||
| 3150 | offset = 0; | ||
| 3151 | i++; | ||
| 3152 | } | ||
| 3153 | return ret; | ||
| 3154 | } | ||
| 3155 | EXPORT_SYMBOL(memcmp_extent_buffer); | ||
| 3156 | |||
| 3157 | void write_extent_buffer(struct extent_buffer *eb, const void *srcv, | ||
| 3158 | unsigned long start, unsigned long len) | ||
| 3159 | { | ||
| 3160 | size_t cur; | ||
| 3161 | size_t offset; | ||
| 3162 | struct page *page; | ||
| 3163 | char *kaddr; | ||
| 3164 | char *src = (char *)srcv; | ||
| 3165 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 3166 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
| 3167 | |||
| 3168 | WARN_ON(start > eb->len); | ||
| 3169 | WARN_ON(start + len > eb->start + eb->len); | ||
| 3170 | |||
| 3171 | offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3172 | |||
| 3173 | while(len > 0) { | ||
| 3174 | page = extent_buffer_page(eb, i); | ||
| 3175 | WARN_ON(!PageUptodate(page)); | ||
| 3176 | |||
| 3177 | cur = min(len, PAGE_CACHE_SIZE - offset); | ||
| 3178 | kaddr = kmap_atomic(page, KM_USER1); | ||
| 3179 | memcpy(kaddr + offset, src, cur); | ||
| 3180 | kunmap_atomic(kaddr, KM_USER1); | ||
| 3181 | |||
| 3182 | src += cur; | ||
| 3183 | len -= cur; | ||
| 3184 | offset = 0; | ||
| 3185 | i++; | ||
| 3186 | } | ||
| 3187 | } | ||
| 3188 | EXPORT_SYMBOL(write_extent_buffer); | ||
| 3189 | |||
| 3190 | void memset_extent_buffer(struct extent_buffer *eb, char c, | ||
| 3191 | unsigned long start, unsigned long len) | ||
| 3192 | { | ||
| 3193 | size_t cur; | ||
| 3194 | size_t offset; | ||
| 3195 | struct page *page; | ||
| 3196 | char *kaddr; | ||
| 3197 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 3198 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
| 3199 | |||
| 3200 | WARN_ON(start > eb->len); | ||
| 3201 | WARN_ON(start + len > eb->start + eb->len); | ||
| 3202 | |||
| 3203 | offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3204 | |||
| 3205 | while(len > 0) { | ||
| 3206 | page = extent_buffer_page(eb, i); | ||
| 3207 | WARN_ON(!PageUptodate(page)); | ||
| 3208 | |||
| 3209 | cur = min(len, PAGE_CACHE_SIZE - offset); | ||
| 3210 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 3211 | memset(kaddr + offset, c, cur); | ||
| 3212 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3213 | |||
| 3214 | len -= cur; | ||
| 3215 | offset = 0; | ||
| 3216 | i++; | ||
| 3217 | } | ||
| 3218 | } | ||
| 3219 | EXPORT_SYMBOL(memset_extent_buffer); | ||
| 3220 | |||
| 3221 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | ||
| 3222 | unsigned long dst_offset, unsigned long src_offset, | ||
| 3223 | unsigned long len) | ||
| 3224 | { | ||
| 3225 | u64 dst_len = dst->len; | ||
| 3226 | size_t cur; | ||
| 3227 | size_t offset; | ||
| 3228 | struct page *page; | ||
| 3229 | char *kaddr; | ||
| 3230 | size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 3231 | unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; | ||
| 3232 | |||
| 3233 | WARN_ON(src->len != dst_len); | ||
| 3234 | |||
| 3235 | offset = (start_offset + dst_offset) & | ||
| 3236 | ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3237 | |||
| 3238 | while(len > 0) { | ||
| 3239 | page = extent_buffer_page(dst, i); | ||
| 3240 | WARN_ON(!PageUptodate(page)); | ||
| 3241 | |||
| 3242 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); | ||
| 3243 | |||
| 3244 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 3245 | read_extent_buffer(src, kaddr + offset, src_offset, cur); | ||
| 3246 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3247 | |||
| 3248 | src_offset += cur; | ||
| 3249 | len -= cur; | ||
| 3250 | offset = 0; | ||
| 3251 | i++; | ||
| 3252 | } | ||
| 3253 | } | ||
| 3254 | EXPORT_SYMBOL(copy_extent_buffer); | ||
| 3255 | |||
| 3256 | static void move_pages(struct page *dst_page, struct page *src_page, | ||
| 3257 | unsigned long dst_off, unsigned long src_off, | ||
| 3258 | unsigned long len) | ||
| 3259 | { | ||
| 3260 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | ||
| 3261 | if (dst_page == src_page) { | ||
| 3262 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); | ||
| 3263 | } else { | ||
| 3264 | char *src_kaddr = kmap_atomic(src_page, KM_USER1); | ||
| 3265 | char *p = dst_kaddr + dst_off + len; | ||
| 3266 | char *s = src_kaddr + src_off + len; | ||
| 3267 | |||
| 3268 | while (len--) | ||
| 3269 | *--p = *--s; | ||
| 3270 | |||
| 3271 | kunmap_atomic(src_kaddr, KM_USER1); | ||
| 3272 | } | ||
| 3273 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
| 3274 | } | ||
| 3275 | |||
| 3276 | static void copy_pages(struct page *dst_page, struct page *src_page, | ||
| 3277 | unsigned long dst_off, unsigned long src_off, | ||
| 3278 | unsigned long len) | ||
| 3279 | { | ||
| 3280 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | ||
| 3281 | char *src_kaddr; | ||
| 3282 | |||
| 3283 | if (dst_page != src_page) | ||
| 3284 | src_kaddr = kmap_atomic(src_page, KM_USER1); | ||
| 3285 | else | ||
| 3286 | src_kaddr = dst_kaddr; | ||
| 3287 | |||
| 3288 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | ||
| 3289 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
| 3290 | if (dst_page != src_page) | ||
| 3291 | kunmap_atomic(src_kaddr, KM_USER1); | ||
| 3292 | } | ||
| 3293 | |||
| 3294 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | ||
| 3295 | unsigned long src_offset, unsigned long len) | ||
| 3296 | { | ||
| 3297 | size_t cur; | ||
| 3298 | size_t dst_off_in_page; | ||
| 3299 | size_t src_off_in_page; | ||
| 3300 | size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 3301 | unsigned long dst_i; | ||
| 3302 | unsigned long src_i; | ||
| 3303 | |||
| 3304 | if (src_offset + len > dst->len) { | ||
| 3305 | printk("memmove bogus src_offset %lu move len %lu len %lu\n", | ||
| 3306 | src_offset, len, dst->len); | ||
| 3307 | BUG_ON(1); | ||
| 3308 | } | ||
| 3309 | if (dst_offset + len > dst->len) { | ||
| 3310 | printk("memmove bogus dst_offset %lu move len %lu len %lu\n", | ||
| 3311 | dst_offset, len, dst->len); | ||
| 3312 | BUG_ON(1); | ||
| 3313 | } | ||
| 3314 | |||
| 3315 | while(len > 0) { | ||
| 3316 | dst_off_in_page = (start_offset + dst_offset) & | ||
| 3317 | ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3318 | src_off_in_page = (start_offset + src_offset) & | ||
| 3319 | ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3320 | |||
| 3321 | dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; | ||
| 3322 | src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; | ||
| 3323 | |||
| 3324 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - | ||
| 3325 | src_off_in_page)); | ||
| 3326 | cur = min_t(unsigned long, cur, | ||
| 3327 | (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); | ||
| 3328 | |||
| 3329 | copy_pages(extent_buffer_page(dst, dst_i), | ||
| 3330 | extent_buffer_page(dst, src_i), | ||
| 3331 | dst_off_in_page, src_off_in_page, cur); | ||
| 3332 | |||
| 3333 | src_offset += cur; | ||
| 3334 | dst_offset += cur; | ||
| 3335 | len -= cur; | ||
| 3336 | } | ||
| 3337 | } | ||
| 3338 | EXPORT_SYMBOL(memcpy_extent_buffer); | ||
| 3339 | |||
| 3340 | void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | ||
| 3341 | unsigned long src_offset, unsigned long len) | ||
| 3342 | { | ||
| 3343 | size_t cur; | ||
| 3344 | size_t dst_off_in_page; | ||
| 3345 | size_t src_off_in_page; | ||
| 3346 | unsigned long dst_end = dst_offset + len - 1; | ||
| 3347 | unsigned long src_end = src_offset + len - 1; | ||
| 3348 | size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 3349 | unsigned long dst_i; | ||
| 3350 | unsigned long src_i; | ||
| 3351 | |||
| 3352 | if (src_offset + len > dst->len) { | ||
| 3353 | printk("memmove bogus src_offset %lu move len %lu len %lu\n", | ||
| 3354 | src_offset, len, dst->len); | ||
| 3355 | BUG_ON(1); | ||
| 3356 | } | ||
| 3357 | if (dst_offset + len > dst->len) { | ||
| 3358 | printk("memmove bogus dst_offset %lu move len %lu len %lu\n", | ||
| 3359 | dst_offset, len, dst->len); | ||
| 3360 | BUG_ON(1); | ||
| 3361 | } | ||
| 3362 | if (dst_offset < src_offset) { | ||
| 3363 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); | ||
| 3364 | return; | ||
| 3365 | } | ||
| 3366 | while(len > 0) { | ||
| 3367 | dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; | ||
| 3368 | src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; | ||
| 3369 | |||
| 3370 | dst_off_in_page = (start_offset + dst_end) & | ||
| 3371 | ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3372 | src_off_in_page = (start_offset + src_end) & | ||
| 3373 | ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 3374 | |||
| 3375 | cur = min_t(unsigned long, len, src_off_in_page + 1); | ||
| 3376 | cur = min(cur, dst_off_in_page + 1); | ||
| 3377 | move_pages(extent_buffer_page(dst, dst_i), | ||
| 3378 | extent_buffer_page(dst, src_i), | ||
| 3379 | dst_off_in_page - cur + 1, | ||
| 3380 | src_off_in_page - cur + 1, cur); | ||
| 3381 | |||
| 3382 | dst_end -= cur; | ||
| 3383 | src_end -= cur; | ||
| 3384 | len -= cur; | ||
| 3385 | } | ||
| 3386 | } | ||
| 3387 | EXPORT_SYMBOL(memmove_extent_buffer); | ||
| 3388 | |||
| 3389 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | ||
| 3390 | { | ||
| 3391 | u64 start = page_offset(page); | ||
| 3392 | struct extent_buffer *eb; | ||
| 3393 | int ret = 1; | ||
| 3394 | unsigned long i; | ||
| 3395 | unsigned long num_pages; | ||
| 3396 | |||
| 3397 | spin_lock(&tree->buffer_lock); | ||
| 3398 | eb = buffer_search(tree, start); | ||
| 3399 | if (!eb) | ||
| 3400 | goto out; | ||
| 3401 | |||
| 3402 | if (atomic_read(&eb->refs) > 1) { | ||
| 3403 | ret = 0; | ||
| 3404 | goto out; | ||
| 3405 | } | ||
| 3406 | /* at this point we can safely release the extent buffer */ | ||
| 3407 | num_pages = num_extent_pages(eb->start, eb->len); | ||
| 3408 | for (i = 0; i < num_pages; i++) | ||
| 3409 | page_cache_release(extent_buffer_page(eb, i)); | ||
| 3410 | rb_erase(&eb->rb_node, &tree->buffer); | ||
| 3411 | __free_extent_buffer(eb); | ||
| 3412 | out: | ||
| 3413 | spin_unlock(&tree->buffer_lock); | ||
| 3414 | return ret; | ||
| 3415 | } | ||
| 3416 | EXPORT_SYMBOL(try_release_extent_buffer); | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h new file mode 100644 index 00000000000..c9d1908a1ae --- /dev/null +++ b/fs/btrfs/extent_io.h | |||
| @@ -0,0 +1,248 @@ | |||
| 1 | #ifndef __EXTENTIO__ | ||
| 2 | #define __EXTENTIO__ | ||
| 3 | |||
| 4 | #include <linux/rbtree.h> | ||
| 5 | |||
| 6 | /* bits for the extent state */ | ||
| 7 | #define EXTENT_DIRTY 1 | ||
| 8 | #define EXTENT_WRITEBACK (1 << 1) | ||
| 9 | #define EXTENT_UPTODATE (1 << 2) | ||
| 10 | #define EXTENT_LOCKED (1 << 3) | ||
| 11 | #define EXTENT_NEW (1 << 4) | ||
| 12 | #define EXTENT_DELALLOC (1 << 5) | ||
| 13 | #define EXTENT_DEFRAG (1 << 6) | ||
| 14 | #define EXTENT_DEFRAG_DONE (1 << 7) | ||
| 15 | #define EXTENT_BUFFER_FILLED (1 << 8) | ||
| 16 | #define EXTENT_ORDERED (1 << 9) | ||
| 17 | #define EXTENT_ORDERED_METADATA (1 << 10) | ||
| 18 | #define EXTENT_BOUNDARY (1 << 11) | ||
| 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | ||
| 20 | |||
| 21 | /* | ||
| 22 | * page->private values. Every page that is controlled by the extent | ||
| 23 | * map has page->private set to one. | ||
| 24 | */ | ||
| 25 | #define EXTENT_PAGE_PRIVATE 1 | ||
| 26 | #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 | ||
| 27 | |||
| 28 | struct extent_state; | ||
| 29 | |||
| 30 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | ||
| 31 | struct bio *bio, int mirror_num); | ||
| 32 | struct extent_io_ops { | ||
| 33 | int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); | ||
| 34 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); | ||
| 35 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); | ||
| 36 | extent_submit_bio_hook_t *submit_bio_hook; | ||
| 37 | int (*merge_bio_hook)(struct page *page, unsigned long offset, | ||
| 38 | size_t size, struct bio *bio); | ||
| 39 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | ||
| 40 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, | ||
| 41 | u64 start, u64 end, | ||
| 42 | struct extent_state *state); | ||
| 43 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, | ||
| 44 | u64 start, u64 end, | ||
| 45 | struct extent_state *state); | ||
| 46 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, | ||
| 47 | struct extent_state *state); | ||
| 48 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | ||
| 49 | struct extent_state *state, int uptodate); | ||
| 50 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | ||
| 51 | unsigned long old, unsigned long bits); | ||
| 52 | int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, | ||
| 53 | unsigned long old, unsigned long bits); | ||
| 54 | int (*write_cache_pages_lock_hook)(struct page *page); | ||
| 55 | }; | ||
| 56 | |||
| 57 | struct extent_io_tree { | ||
| 58 | struct rb_root state; | ||
| 59 | struct rb_root buffer; | ||
| 60 | struct address_space *mapping; | ||
| 61 | u64 dirty_bytes; | ||
| 62 | spinlock_t lock; | ||
| 63 | spinlock_t buffer_lock; | ||
| 64 | struct extent_io_ops *ops; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct extent_state { | ||
| 68 | u64 start; | ||
| 69 | u64 end; /* inclusive */ | ||
| 70 | struct rb_node rb_node; | ||
| 71 | struct extent_io_tree *tree; | ||
| 72 | wait_queue_head_t wq; | ||
| 73 | atomic_t refs; | ||
| 74 | unsigned long state; | ||
| 75 | |||
| 76 | /* for use by the FS */ | ||
| 77 | u64 private; | ||
| 78 | |||
| 79 | struct list_head leak_list; | ||
| 80 | }; | ||
| 81 | |||
| 82 | struct extent_buffer { | ||
| 83 | u64 start; | ||
| 84 | unsigned long len; | ||
| 85 | char *map_token; | ||
| 86 | char *kaddr; | ||
| 87 | unsigned long map_start; | ||
| 88 | unsigned long map_len; | ||
| 89 | struct page *first_page; | ||
| 90 | atomic_t refs; | ||
| 91 | int flags; | ||
| 92 | struct list_head leak_list; | ||
| 93 | struct rb_node rb_node; | ||
| 94 | struct mutex mutex; | ||
| 95 | }; | ||
| 96 | |||
| 97 | struct extent_map_tree; | ||
| 98 | |||
| 99 | static inline struct extent_state *extent_state_next(struct extent_state *state) | ||
| 100 | { | ||
| 101 | struct rb_node *node; | ||
| 102 | node = rb_next(&state->rb_node); | ||
| 103 | if (!node) | ||
| 104 | return NULL; | ||
| 105 | return rb_entry(node, struct extent_state, rb_node); | ||
| 106 | } | ||
| 107 | |||
| 108 | typedef struct extent_map *(get_extent_t)(struct inode *inode, | ||
| 109 | struct page *page, | ||
| 110 | size_t page_offset, | ||
| 111 | u64 start, u64 len, | ||
| 112 | int create); | ||
| 113 | |||
| 114 | void extent_io_tree_init(struct extent_io_tree *tree, | ||
| 115 | struct address_space *mapping, gfp_t mask); | ||
| 116 | int try_release_extent_mapping(struct extent_map_tree *map, | ||
| 117 | struct extent_io_tree *tree, struct page *page, | ||
| 118 | gfp_t mask); | ||
| 119 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page); | ||
| 120 | int try_release_extent_state(struct extent_map_tree *map, | ||
| 121 | struct extent_io_tree *tree, struct page *page, | ||
| 122 | gfp_t mask); | ||
| 123 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | ||
| 124 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | ||
| 125 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | ||
| 126 | get_extent_t *get_extent); | ||
| 127 | int __init extent_io_init(void); | ||
| 128 | void extent_io_exit(void); | ||
| 129 | |||
| 130 | u64 count_range_bits(struct extent_io_tree *tree, | ||
| 131 | u64 *start, u64 search_end, | ||
| 132 | u64 max_bytes, unsigned long bits); | ||
| 133 | |||
| 134 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 135 | int bits, int filled); | ||
| 136 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 137 | int bits, gfp_t mask); | ||
| 138 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 139 | int bits, int wake, int delete, gfp_t mask); | ||
| 140 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 141 | int bits, gfp_t mask); | ||
| 142 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 143 | gfp_t mask); | ||
| 144 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 145 | gfp_t mask); | ||
| 146 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 147 | gfp_t mask); | ||
| 148 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 149 | gfp_t mask); | ||
| 150 | int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 151 | gfp_t mask); | ||
| 152 | int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, | ||
| 153 | u64 end, gfp_t mask); | ||
| 154 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 155 | gfp_t mask); | ||
| 156 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 157 | gfp_t mask); | ||
| 158 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
| 159 | u64 *start_ret, u64 *end_ret, int bits); | ||
| 160 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, | ||
| 161 | u64 start, int bits); | ||
| 162 | int extent_invalidatepage(struct extent_io_tree *tree, | ||
| 163 | struct page *page, unsigned long offset); | ||
| 164 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | ||
| 165 | get_extent_t *get_extent, | ||
| 166 | struct writeback_control *wbc); | ||
| 167 | int extent_writepages(struct extent_io_tree *tree, | ||
| 168 | struct address_space *mapping, | ||
| 169 | get_extent_t *get_extent, | ||
| 170 | struct writeback_control *wbc); | ||
| 171 | int extent_readpages(struct extent_io_tree *tree, | ||
| 172 | struct address_space *mapping, | ||
| 173 | struct list_head *pages, unsigned nr_pages, | ||
| 174 | get_extent_t get_extent); | ||
| 175 | int extent_prepare_write(struct extent_io_tree *tree, | ||
| 176 | struct inode *inode, struct page *page, | ||
| 177 | unsigned from, unsigned to, get_extent_t *get_extent); | ||
| 178 | int extent_commit_write(struct extent_io_tree *tree, | ||
| 179 | struct inode *inode, struct page *page, | ||
| 180 | unsigned from, unsigned to); | ||
| 181 | sector_t extent_bmap(struct address_space *mapping, sector_t iblock, | ||
| 182 | get_extent_t *get_extent); | ||
| 183 | int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); | ||
| 184 | int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); | ||
| 185 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); | ||
| 186 | void set_page_extent_mapped(struct page *page); | ||
| 187 | |||
| 188 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | ||
| 189 | u64 start, unsigned long len, | ||
| 190 | struct page *page0, | ||
| 191 | gfp_t mask); | ||
| 192 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | ||
| 193 | u64 start, unsigned long len, | ||
| 194 | gfp_t mask); | ||
| 195 | void free_extent_buffer(struct extent_buffer *eb); | ||
| 196 | int read_extent_buffer_pages(struct extent_io_tree *tree, | ||
| 197 | struct extent_buffer *eb, u64 start, int wait, | ||
| 198 | get_extent_t *get_extent, int mirror_num); | ||
| 199 | |||
| 200 | static inline void extent_buffer_get(struct extent_buffer *eb) | ||
| 201 | { | ||
| 202 | atomic_inc(&eb->refs); | ||
| 203 | } | ||
| 204 | |||
| 205 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | ||
| 206 | unsigned long start, | ||
| 207 | unsigned long len); | ||
| 208 | void read_extent_buffer(struct extent_buffer *eb, void *dst, | ||
| 209 | unsigned long start, | ||
| 210 | unsigned long len); | ||
| 211 | void write_extent_buffer(struct extent_buffer *eb, const void *src, | ||
| 212 | unsigned long start, unsigned long len); | ||
| 213 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | ||
| 214 | unsigned long dst_offset, unsigned long src_offset, | ||
| 215 | unsigned long len); | ||
| 216 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | ||
| 217 | unsigned long src_offset, unsigned long len); | ||
| 218 | void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | ||
| 219 | unsigned long src_offset, unsigned long len); | ||
| 220 | void memset_extent_buffer(struct extent_buffer *eb, char c, | ||
| 221 | unsigned long start, unsigned long len); | ||
| 222 | int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, | ||
| 223 | struct extent_buffer *eb); | ||
| 224 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end); | ||
| 225 | int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); | ||
| 226 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, | ||
| 227 | struct extent_buffer *eb); | ||
| 228 | int set_extent_buffer_dirty(struct extent_io_tree *tree, | ||
| 229 | struct extent_buffer *eb); | ||
| 230 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, | ||
| 231 | struct extent_buffer *eb); | ||
| 232 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | ||
| 233 | struct extent_buffer *eb); | ||
| 234 | int extent_buffer_uptodate(struct extent_io_tree *tree, | ||
| 235 | struct extent_buffer *eb); | ||
| 236 | int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, | ||
| 237 | unsigned long min_len, char **token, char **map, | ||
| 238 | unsigned long *map_start, | ||
| 239 | unsigned long *map_len, int km); | ||
| 240 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, | ||
| 241 | unsigned long min_len, char **token, char **map, | ||
| 242 | unsigned long *map_start, | ||
| 243 | unsigned long *map_len, int km); | ||
| 244 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); | ||
| 245 | int release_extent_buffer_tail_pages(struct extent_buffer *eb); | ||
| 246 | int extent_range_uptodate(struct extent_io_tree *tree, | ||
| 247 | u64 start, u64 end); | ||
| 248 | #endif | ||
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c new file mode 100644 index 00000000000..74b2a29880d --- /dev/null +++ b/fs/btrfs/extent_map.c | |||
| @@ -0,0 +1,342 @@ | |||
| 1 | #include <linux/err.h> | ||
| 2 | #include <linux/gfp.h> | ||
| 3 | #include <linux/slab.h> | ||
| 4 | #include <linux/module.h> | ||
| 5 | #include <linux/spinlock.h> | ||
| 6 | #include <linux/version.h> | ||
| 7 | #include <linux/hardirq.h> | ||
| 8 | #include "extent_map.h" | ||
| 9 | |||
| 10 | /* temporary define until extent_map moves out of btrfs */ | ||
| 11 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
| 12 | unsigned long extra_flags, | ||
| 13 | void (*ctor)(void *, struct kmem_cache *, | ||
| 14 | unsigned long)); | ||
| 15 | |||
| 16 | static struct kmem_cache *extent_map_cache; | ||
| 17 | |||
| 18 | int __init extent_map_init(void) | ||
| 19 | { | ||
| 20 | extent_map_cache = btrfs_cache_create("extent_map", | ||
| 21 | sizeof(struct extent_map), 0, | ||
| 22 | NULL); | ||
| 23 | if (!extent_map_cache) | ||
| 24 | return -ENOMEM; | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | |||
| 28 | void extent_map_exit(void) | ||
| 29 | { | ||
| 30 | if (extent_map_cache) | ||
| 31 | kmem_cache_destroy(extent_map_cache); | ||
| 32 | } | ||
| 33 | |||
| 34 | /** | ||
| 35 | * extent_map_tree_init - initialize extent map tree | ||
| 36 | * @tree: tree to initialize | ||
| 37 | * @mask: flags for memory allocations during tree operations | ||
| 38 | * | ||
| 39 | * Initialize the extent tree @tree. Should be called for each new inode | ||
| 40 | * or other user of the extent_map interface. | ||
| 41 | */ | ||
| 42 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | ||
| 43 | { | ||
| 44 | tree->map.rb_node = NULL; | ||
| 45 | spin_lock_init(&tree->lock); | ||
| 46 | } | ||
| 47 | EXPORT_SYMBOL(extent_map_tree_init); | ||
| 48 | |||
| 49 | /** | ||
| 50 | * alloc_extent_map - allocate new extent map structure | ||
| 51 | * @mask: memory allocation flags | ||
| 52 | * | ||
| 53 | * Allocate a new extent_map structure. The new structure is | ||
| 54 | * returned with a reference count of one and needs to be | ||
| 55 | * freed using free_extent_map() | ||
| 56 | */ | ||
| 57 | struct extent_map *alloc_extent_map(gfp_t mask) | ||
| 58 | { | ||
| 59 | struct extent_map *em; | ||
| 60 | em = kmem_cache_alloc(extent_map_cache, mask); | ||
| 61 | if (!em || IS_ERR(em)) | ||
| 62 | return em; | ||
| 63 | em->in_tree = 0; | ||
| 64 | em->flags = 0; | ||
| 65 | atomic_set(&em->refs, 1); | ||
| 66 | return em; | ||
| 67 | } | ||
| 68 | EXPORT_SYMBOL(alloc_extent_map); | ||
| 69 | |||
| 70 | /** | ||
| 71 | * free_extent_map - drop reference count of an extent_map | ||
| 72 | * @em: extent map beeing releasead | ||
| 73 | * | ||
| 74 | * Drops the reference out on @em by one and free the structure | ||
| 75 | * if the reference count hits zero. | ||
| 76 | */ | ||
| 77 | void free_extent_map(struct extent_map *em) | ||
| 78 | { | ||
| 79 | if (!em) | ||
| 80 | return; | ||
| 81 | WARN_ON(atomic_read(&em->refs) == 0); | ||
| 82 | if (atomic_dec_and_test(&em->refs)) { | ||
| 83 | WARN_ON(em->in_tree); | ||
| 84 | kmem_cache_free(extent_map_cache, em); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | EXPORT_SYMBOL(free_extent_map); | ||
| 88 | |||
| 89 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | ||
| 90 | struct rb_node *node) | ||
| 91 | { | ||
| 92 | struct rb_node ** p = &root->rb_node; | ||
| 93 | struct rb_node * parent = NULL; | ||
| 94 | struct extent_map *entry; | ||
| 95 | |||
| 96 | while(*p) { | ||
| 97 | parent = *p; | ||
| 98 | entry = rb_entry(parent, struct extent_map, rb_node); | ||
| 99 | |||
| 100 | WARN_ON(!entry->in_tree); | ||
| 101 | |||
| 102 | if (offset < entry->start) | ||
| 103 | p = &(*p)->rb_left; | ||
| 104 | else if (offset >= extent_map_end(entry)) | ||
| 105 | p = &(*p)->rb_right; | ||
| 106 | else | ||
| 107 | return parent; | ||
| 108 | } | ||
| 109 | |||
| 110 | entry = rb_entry(node, struct extent_map, rb_node); | ||
| 111 | entry->in_tree = 1; | ||
| 112 | rb_link_node(node, parent, p); | ||
| 113 | rb_insert_color(node, root); | ||
| 114 | return NULL; | ||
| 115 | } | ||
| 116 | |||
| 117 | /* | ||
| 118 | * search through the tree for an extent_map with a given offset. If | ||
| 119 | * it can't be found, try to find some neighboring extents | ||
| 120 | */ | ||
| 121 | static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | ||
| 122 | struct rb_node **prev_ret, | ||
| 123 | struct rb_node **next_ret) | ||
| 124 | { | ||
| 125 | struct rb_node * n = root->rb_node; | ||
| 126 | struct rb_node *prev = NULL; | ||
| 127 | struct rb_node *orig_prev = NULL; | ||
| 128 | struct extent_map *entry; | ||
| 129 | struct extent_map *prev_entry = NULL; | ||
| 130 | |||
| 131 | while(n) { | ||
| 132 | entry = rb_entry(n, struct extent_map, rb_node); | ||
| 133 | prev = n; | ||
| 134 | prev_entry = entry; | ||
| 135 | |||
| 136 | WARN_ON(!entry->in_tree); | ||
| 137 | |||
| 138 | if (offset < entry->start) | ||
| 139 | n = n->rb_left; | ||
| 140 | else if (offset >= extent_map_end(entry)) | ||
| 141 | n = n->rb_right; | ||
| 142 | else | ||
| 143 | return n; | ||
| 144 | } | ||
| 145 | |||
| 146 | if (prev_ret) { | ||
| 147 | orig_prev = prev; | ||
| 148 | while(prev && offset >= extent_map_end(prev_entry)) { | ||
| 149 | prev = rb_next(prev); | ||
| 150 | prev_entry = rb_entry(prev, struct extent_map, rb_node); | ||
| 151 | } | ||
| 152 | *prev_ret = prev; | ||
| 153 | prev = orig_prev; | ||
| 154 | } | ||
| 155 | |||
| 156 | if (next_ret) { | ||
| 157 | prev_entry = rb_entry(prev, struct extent_map, rb_node); | ||
| 158 | while(prev && offset < prev_entry->start) { | ||
| 159 | prev = rb_prev(prev); | ||
| 160 | prev_entry = rb_entry(prev, struct extent_map, rb_node); | ||
| 161 | } | ||
| 162 | *next_ret = prev; | ||
| 163 | } | ||
| 164 | return NULL; | ||
| 165 | } | ||
| 166 | |||
| 167 | /* | ||
| 168 | * look for an offset in the tree, and if it can't be found, return | ||
| 169 | * the first offset we can find smaller than 'offset'. | ||
| 170 | */ | ||
| 171 | static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) | ||
| 172 | { | ||
| 173 | struct rb_node *prev; | ||
| 174 | struct rb_node *ret; | ||
| 175 | ret = __tree_search(root, offset, &prev, NULL); | ||
| 176 | if (!ret) | ||
| 177 | return prev; | ||
| 178 | return ret; | ||
| 179 | } | ||
| 180 | |||
| 181 | /* check to see if two extent_map structs are adjacent and safe to merge */ | ||
| 182 | static int mergable_maps(struct extent_map *prev, struct extent_map *next) | ||
| 183 | { | ||
| 184 | if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) | ||
| 185 | return 0; | ||
| 186 | |||
| 187 | if (extent_map_end(prev) == next->start && | ||
| 188 | prev->flags == next->flags && | ||
| 189 | prev->bdev == next->bdev && | ||
| 190 | ((next->block_start == EXTENT_MAP_HOLE && | ||
| 191 | prev->block_start == EXTENT_MAP_HOLE) || | ||
| 192 | (next->block_start == EXTENT_MAP_INLINE && | ||
| 193 | prev->block_start == EXTENT_MAP_INLINE) || | ||
| 194 | (next->block_start == EXTENT_MAP_DELALLOC && | ||
| 195 | prev->block_start == EXTENT_MAP_DELALLOC) || | ||
| 196 | (next->block_start < EXTENT_MAP_LAST_BYTE - 1 && | ||
| 197 | next->block_start == extent_map_block_end(prev)))) { | ||
| 198 | return 1; | ||
| 199 | } | ||
| 200 | return 0; | ||
| 201 | } | ||
| 202 | |||
| 203 | /** | ||
| 204 | * add_extent_mapping - add new extent map to the extent tree | ||
| 205 | * @tree: tree to insert new map in | ||
| 206 | * @em: map to insert | ||
| 207 | * | ||
| 208 | * Insert @em into @tree or perform a simple forward/backward merge with | ||
| 209 | * existing mappings. The extent_map struct passed in will be inserted | ||
| 210 | * into the tree directly, with an additional reference taken, or a | ||
| 211 | * reference dropped if the merge attempt was sucessfull. | ||
| 212 | */ | ||
| 213 | int add_extent_mapping(struct extent_map_tree *tree, | ||
| 214 | struct extent_map *em) | ||
| 215 | { | ||
| 216 | int ret = 0; | ||
| 217 | struct extent_map *merge = NULL; | ||
| 218 | struct rb_node *rb; | ||
| 219 | struct extent_map *exist; | ||
| 220 | |||
| 221 | exist = lookup_extent_mapping(tree, em->start, em->len); | ||
| 222 | if (exist) { | ||
| 223 | free_extent_map(exist); | ||
| 224 | ret = -EEXIST; | ||
| 225 | goto out; | ||
| 226 | } | ||
| 227 | assert_spin_locked(&tree->lock); | ||
| 228 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | ||
| 229 | if (rb) { | ||
| 230 | ret = -EEXIST; | ||
| 231 | free_extent_map(merge); | ||
| 232 | goto out; | ||
| 233 | } | ||
| 234 | atomic_inc(&em->refs); | ||
| 235 | if (em->start != 0) { | ||
| 236 | rb = rb_prev(&em->rb_node); | ||
| 237 | if (rb) | ||
| 238 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
| 239 | if (rb && mergable_maps(merge, em)) { | ||
| 240 | em->start = merge->start; | ||
| 241 | em->len += merge->len; | ||
| 242 | em->block_start = merge->block_start; | ||
| 243 | merge->in_tree = 0; | ||
| 244 | rb_erase(&merge->rb_node, &tree->map); | ||
| 245 | free_extent_map(merge); | ||
| 246 | } | ||
| 247 | } | ||
| 248 | rb = rb_next(&em->rb_node); | ||
| 249 | if (rb) | ||
| 250 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
| 251 | if (rb && mergable_maps(em, merge)) { | ||
| 252 | em->len += merge->len; | ||
| 253 | rb_erase(&merge->rb_node, &tree->map); | ||
| 254 | merge->in_tree = 0; | ||
| 255 | free_extent_map(merge); | ||
| 256 | } | ||
| 257 | out: | ||
| 258 | return ret; | ||
| 259 | } | ||
| 260 | EXPORT_SYMBOL(add_extent_mapping); | ||
| 261 | |||
| 262 | /* simple helper to do math around the end of an extent, handling wrap */ | ||
| 263 | static u64 range_end(u64 start, u64 len) | ||
| 264 | { | ||
| 265 | if (start + len < start) | ||
| 266 | return (u64)-1; | ||
| 267 | return start + len; | ||
| 268 | } | ||
| 269 | |||
| 270 | /** | ||
| 271 | * lookup_extent_mapping - lookup extent_map | ||
| 272 | * @tree: tree to lookup in | ||
| 273 | * @start: byte offset to start the search | ||
| 274 | * @len: length of the lookup range | ||
| 275 | * | ||
| 276 | * Find and return the first extent_map struct in @tree that intersects the | ||
| 277 | * [start, len] range. There may be additional objects in the tree that | ||
| 278 | * intersect, so check the object returned carefully to make sure that no | ||
| 279 | * additional lookups are needed. | ||
| 280 | */ | ||
| 281 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | ||
| 282 | u64 start, u64 len) | ||
| 283 | { | ||
| 284 | struct extent_map *em; | ||
| 285 | struct rb_node *rb_node; | ||
| 286 | struct rb_node *prev = NULL; | ||
| 287 | struct rb_node *next = NULL; | ||
| 288 | u64 end = range_end(start, len); | ||
| 289 | |||
| 290 | assert_spin_locked(&tree->lock); | ||
| 291 | rb_node = __tree_search(&tree->map, start, &prev, &next); | ||
| 292 | if (!rb_node && prev) { | ||
| 293 | em = rb_entry(prev, struct extent_map, rb_node); | ||
| 294 | if (end > em->start && start < extent_map_end(em)) | ||
| 295 | goto found; | ||
| 296 | } | ||
| 297 | if (!rb_node && next) { | ||
| 298 | em = rb_entry(next, struct extent_map, rb_node); | ||
| 299 | if (end > em->start && start < extent_map_end(em)) | ||
| 300 | goto found; | ||
| 301 | } | ||
| 302 | if (!rb_node) { | ||
| 303 | em = NULL; | ||
| 304 | goto out; | ||
| 305 | } | ||
| 306 | if (IS_ERR(rb_node)) { | ||
| 307 | em = ERR_PTR(PTR_ERR(rb_node)); | ||
| 308 | goto out; | ||
| 309 | } | ||
| 310 | em = rb_entry(rb_node, struct extent_map, rb_node); | ||
| 311 | if (end > em->start && start < extent_map_end(em)) | ||
| 312 | goto found; | ||
| 313 | |||
| 314 | em = NULL; | ||
| 315 | goto out; | ||
| 316 | |||
| 317 | found: | ||
| 318 | atomic_inc(&em->refs); | ||
| 319 | out: | ||
| 320 | return em; | ||
| 321 | } | ||
| 322 | EXPORT_SYMBOL(lookup_extent_mapping); | ||
| 323 | |||
| 324 | /** | ||
| 325 | * remove_extent_mapping - removes an extent_map from the extent tree | ||
| 326 | * @tree: extent tree to remove from | ||
| 327 | * @em: extent map beeing removed | ||
| 328 | * | ||
| 329 | * Removes @em from @tree. No reference counts are dropped, and no checks | ||
| 330 | * are done to see if the range is in use | ||
| 331 | */ | ||
| 332 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | ||
| 333 | { | ||
| 334 | int ret = 0; | ||
| 335 | |||
| 336 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | ||
| 337 | assert_spin_locked(&tree->lock); | ||
| 338 | rb_erase(&em->rb_node, &tree->map); | ||
| 339 | em->in_tree = 0; | ||
| 340 | return ret; | ||
| 341 | } | ||
| 342 | EXPORT_SYMBOL(remove_extent_mapping); | ||
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h new file mode 100644 index 00000000000..26ac6fe0b26 --- /dev/null +++ b/fs/btrfs/extent_map.h | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | #ifndef __EXTENTMAP__ | ||
| 2 | #define __EXTENTMAP__ | ||
| 3 | |||
| 4 | #include <linux/rbtree.h> | ||
| 5 | |||
| 6 | #define EXTENT_MAP_LAST_BYTE (u64)-4 | ||
| 7 | #define EXTENT_MAP_HOLE (u64)-3 | ||
| 8 | #define EXTENT_MAP_INLINE (u64)-2 | ||
| 9 | #define EXTENT_MAP_DELALLOC (u64)-1 | ||
| 10 | |||
| 11 | /* bits for the flags field */ | ||
| 12 | #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ | ||
| 13 | |||
| 14 | struct extent_map { | ||
| 15 | struct rb_node rb_node; | ||
| 16 | |||
| 17 | /* all of these are in bytes */ | ||
| 18 | u64 start; | ||
| 19 | u64 len; | ||
| 20 | u64 block_start; | ||
| 21 | unsigned long flags; | ||
| 22 | struct block_device *bdev; | ||
| 23 | atomic_t refs; | ||
| 24 | int in_tree; | ||
| 25 | }; | ||
| 26 | |||
| 27 | struct extent_map_tree { | ||
| 28 | struct rb_root map; | ||
| 29 | spinlock_t lock; | ||
| 30 | }; | ||
| 31 | |||
| 32 | static inline u64 extent_map_end(struct extent_map *em) | ||
| 33 | { | ||
| 34 | if (em->start + em->len < em->start) | ||
| 35 | return (u64)-1; | ||
| 36 | return em->start + em->len; | ||
| 37 | } | ||
| 38 | |||
| 39 | static inline u64 extent_map_block_end(struct extent_map *em) | ||
| 40 | { | ||
| 41 | if (em->block_start + em->len < em->block_start) | ||
| 42 | return (u64)-1; | ||
| 43 | return em->block_start + em->len; | ||
| 44 | } | ||
| 45 | |||
| 46 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); | ||
| 47 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | ||
| 48 | u64 start, u64 len); | ||
| 49 | int add_extent_mapping(struct extent_map_tree *tree, | ||
| 50 | struct extent_map *em); | ||
| 51 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); | ||
| 52 | |||
| 53 | struct extent_map *alloc_extent_map(gfp_t mask); | ||
| 54 | void free_extent_map(struct extent_map *em); | ||
| 55 | int __init extent_map_init(void); | ||
| 56 | void extent_map_exit(void); | ||
| 57 | #endif | ||
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c new file mode 100644 index 00000000000..6dbe88b9d7d --- /dev/null +++ b/fs/btrfs/file-item.c | |||
| @@ -0,0 +1,512 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/bio.h> | ||
| 20 | #include <linux/pagemap.h> | ||
| 21 | #include <linux/highmem.h> | ||
| 22 | #include "ctree.h" | ||
| 23 | #include "disk-io.h" | ||
| 24 | #include "transaction.h" | ||
| 25 | #include "print-tree.h" | ||
| 26 | |||
| 27 | #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ | ||
| 28 | sizeof(struct btrfs_item) * 2) / \ | ||
| 29 | BTRFS_CRC32_SIZE) - 1)) | ||
| 30 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | ||
| 31 | struct btrfs_root *root, | ||
| 32 | u64 objectid, u64 pos, | ||
| 33 | u64 disk_offset, u64 disk_num_bytes, | ||
| 34 | u64 num_bytes, u64 offset) | ||
| 35 | { | ||
| 36 | int ret = 0; | ||
| 37 | struct btrfs_file_extent_item *item; | ||
| 38 | struct btrfs_key file_key; | ||
| 39 | struct btrfs_path *path; | ||
| 40 | struct extent_buffer *leaf; | ||
| 41 | |||
| 42 | path = btrfs_alloc_path(); | ||
| 43 | BUG_ON(!path); | ||
| 44 | file_key.objectid = objectid; | ||
| 45 | file_key.offset = pos; | ||
| 46 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | ||
| 47 | |||
| 48 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | ||
| 49 | sizeof(*item)); | ||
| 50 | if (ret < 0) | ||
| 51 | goto out; | ||
| 52 | BUG_ON(ret); | ||
| 53 | leaf = path->nodes[0]; | ||
| 54 | item = btrfs_item_ptr(leaf, path->slots[0], | ||
| 55 | struct btrfs_file_extent_item); | ||
| 56 | btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset); | ||
| 57 | btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); | ||
| 58 | btrfs_set_file_extent_offset(leaf, item, offset); | ||
| 59 | btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); | ||
| 60 | btrfs_set_file_extent_generation(leaf, item, trans->transid); | ||
| 61 | btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); | ||
| 62 | btrfs_mark_buffer_dirty(leaf); | ||
| 63 | out: | ||
| 64 | btrfs_free_path(path); | ||
| 65 | return ret; | ||
| 66 | } | ||
| 67 | |||
| 68 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | ||
| 69 | struct btrfs_root *root, | ||
| 70 | struct btrfs_path *path, | ||
| 71 | u64 objectid, u64 offset, | ||
| 72 | int cow) | ||
| 73 | { | ||
| 74 | int ret; | ||
| 75 | struct btrfs_key file_key; | ||
| 76 | struct btrfs_key found_key; | ||
| 77 | struct btrfs_csum_item *item; | ||
| 78 | struct extent_buffer *leaf; | ||
| 79 | u64 csum_offset = 0; | ||
| 80 | int csums_in_item; | ||
| 81 | |||
| 82 | file_key.objectid = objectid; | ||
| 83 | file_key.offset = offset; | ||
| 84 | btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); | ||
| 85 | ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); | ||
| 86 | if (ret < 0) | ||
| 87 | goto fail; | ||
| 88 | leaf = path->nodes[0]; | ||
| 89 | if (ret > 0) { | ||
| 90 | ret = 1; | ||
| 91 | if (path->slots[0] == 0) | ||
| 92 | goto fail; | ||
| 93 | path->slots[0]--; | ||
| 94 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 95 | if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || | ||
| 96 | found_key.objectid != objectid) { | ||
| 97 | goto fail; | ||
| 98 | } | ||
| 99 | csum_offset = (offset - found_key.offset) >> | ||
| 100 | root->fs_info->sb->s_blocksize_bits; | ||
| 101 | csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 102 | csums_in_item /= BTRFS_CRC32_SIZE; | ||
| 103 | |||
| 104 | if (csum_offset >= csums_in_item) { | ||
| 105 | ret = -EFBIG; | ||
| 106 | goto fail; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | ||
| 110 | item = (struct btrfs_csum_item *)((unsigned char *)item + | ||
| 111 | csum_offset * BTRFS_CRC32_SIZE); | ||
| 112 | return item; | ||
| 113 | fail: | ||
| 114 | if (ret > 0) | ||
| 115 | ret = -ENOENT; | ||
| 116 | return ERR_PTR(ret); | ||
| 117 | } | ||
| 118 | |||
| 119 | |||
| 120 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | ||
| 121 | struct btrfs_root *root, | ||
| 122 | struct btrfs_path *path, u64 objectid, | ||
| 123 | u64 offset, int mod) | ||
| 124 | { | ||
| 125 | int ret; | ||
| 126 | struct btrfs_key file_key; | ||
| 127 | int ins_len = mod < 0 ? -1 : 0; | ||
| 128 | int cow = mod != 0; | ||
| 129 | |||
| 130 | file_key.objectid = objectid; | ||
| 131 | file_key.offset = offset; | ||
| 132 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | ||
| 133 | ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); | ||
| 134 | return ret; | ||
| 135 | } | ||
| 136 | |||
| 137 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
| 138 | struct bio *bio) | ||
| 139 | { | ||
| 140 | u32 sum; | ||
| 141 | struct bio_vec *bvec = bio->bi_io_vec; | ||
| 142 | int bio_index = 0; | ||
| 143 | u64 offset; | ||
| 144 | u64 item_start_offset = 0; | ||
| 145 | u64 item_last_offset = 0; | ||
| 146 | u32 diff; | ||
| 147 | int ret; | ||
| 148 | struct btrfs_path *path; | ||
| 149 | struct btrfs_csum_item *item = NULL; | ||
| 150 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 151 | |||
| 152 | path = btrfs_alloc_path(); | ||
| 153 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) | ||
| 154 | path->reada = 2; | ||
| 155 | |||
| 156 | WARN_ON(bio->bi_vcnt <= 0); | ||
| 157 | |||
| 158 | while(bio_index < bio->bi_vcnt) { | ||
| 159 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
| 160 | ret = btrfs_find_ordered_sum(inode, offset, &sum); | ||
| 161 | if (ret == 0) | ||
| 162 | goto found; | ||
| 163 | |||
| 164 | if (!item || offset < item_start_offset || | ||
| 165 | offset >= item_last_offset) { | ||
| 166 | struct btrfs_key found_key; | ||
| 167 | u32 item_size; | ||
| 168 | |||
| 169 | if (item) | ||
| 170 | btrfs_release_path(root, path); | ||
| 171 | item = btrfs_lookup_csum(NULL, root, path, | ||
| 172 | inode->i_ino, offset, 0); | ||
| 173 | if (IS_ERR(item)) { | ||
| 174 | ret = PTR_ERR(item); | ||
| 175 | if (ret == -ENOENT || ret == -EFBIG) | ||
| 176 | ret = 0; | ||
| 177 | sum = 0; | ||
| 178 | printk("no csum found for inode %lu start " | ||
| 179 | "%llu\n", inode->i_ino, | ||
| 180 | (unsigned long long)offset); | ||
| 181 | item = NULL; | ||
| 182 | goto found; | ||
| 183 | } | ||
| 184 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
| 185 | path->slots[0]); | ||
| 186 | |||
| 187 | item_start_offset = found_key.offset; | ||
| 188 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
| 189 | path->slots[0]); | ||
| 190 | item_last_offset = item_start_offset + | ||
| 191 | (item_size / BTRFS_CRC32_SIZE) * | ||
| 192 | root->sectorsize; | ||
| 193 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 194 | struct btrfs_csum_item); | ||
| 195 | } | ||
| 196 | /* | ||
| 197 | * this byte range must be able to fit inside | ||
| 198 | * a single leaf so it will also fit inside a u32 | ||
| 199 | */ | ||
| 200 | diff = offset - item_start_offset; | ||
| 201 | diff = diff / root->sectorsize; | ||
| 202 | diff = diff * BTRFS_CRC32_SIZE; | ||
| 203 | |||
| 204 | read_extent_buffer(path->nodes[0], &sum, | ||
| 205 | ((unsigned long)item) + diff, | ||
| 206 | BTRFS_CRC32_SIZE); | ||
| 207 | found: | ||
| 208 | set_state_private(io_tree, offset, sum); | ||
| 209 | bio_index++; | ||
| 210 | bvec++; | ||
| 211 | } | ||
| 212 | btrfs_free_path(path); | ||
| 213 | return 0; | ||
| 214 | } | ||
| 215 | |||
| 216 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | ||
| 217 | struct bio *bio) | ||
| 218 | { | ||
| 219 | struct btrfs_ordered_sum *sums; | ||
| 220 | struct btrfs_sector_sum *sector_sum; | ||
| 221 | struct btrfs_ordered_extent *ordered; | ||
| 222 | char *data; | ||
| 223 | struct bio_vec *bvec = bio->bi_io_vec; | ||
| 224 | int bio_index = 0; | ||
| 225 | unsigned long total_bytes = 0; | ||
| 226 | unsigned long this_sum_bytes = 0; | ||
| 227 | u64 offset; | ||
| 228 | |||
| 229 | WARN_ON(bio->bi_vcnt <= 0); | ||
| 230 | sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); | ||
| 231 | if (!sums) | ||
| 232 | return -ENOMEM; | ||
| 233 | |||
| 234 | sector_sum = sums->sums; | ||
| 235 | sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
| 236 | sums->len = bio->bi_size; | ||
| 237 | INIT_LIST_HEAD(&sums->list); | ||
| 238 | ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset); | ||
| 239 | BUG_ON(!ordered); | ||
| 240 | |||
| 241 | while(bio_index < bio->bi_vcnt) { | ||
| 242 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
| 243 | if (offset >= ordered->file_offset + ordered->len || | ||
| 244 | offset < ordered->file_offset) { | ||
| 245 | unsigned long bytes_left; | ||
| 246 | sums->len = this_sum_bytes; | ||
| 247 | this_sum_bytes = 0; | ||
| 248 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
| 249 | btrfs_put_ordered_extent(ordered); | ||
| 250 | |||
| 251 | bytes_left = bio->bi_size - total_bytes; | ||
| 252 | |||
| 253 | sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), | ||
| 254 | GFP_NOFS); | ||
| 255 | BUG_ON(!sums); | ||
| 256 | sector_sum = sums->sums; | ||
| 257 | sums->len = bytes_left; | ||
| 258 | sums->file_offset = offset; | ||
| 259 | ordered = btrfs_lookup_ordered_extent(inode, | ||
| 260 | sums->file_offset); | ||
| 261 | BUG_ON(!ordered); | ||
| 262 | } | ||
| 263 | |||
| 264 | data = kmap_atomic(bvec->bv_page, KM_USER0); | ||
| 265 | sector_sum->sum = ~(u32)0; | ||
| 266 | sector_sum->sum = btrfs_csum_data(root, | ||
| 267 | data + bvec->bv_offset, | ||
| 268 | sector_sum->sum, | ||
| 269 | bvec->bv_len); | ||
| 270 | kunmap_atomic(data, KM_USER0); | ||
| 271 | btrfs_csum_final(sector_sum->sum, | ||
| 272 | (char *)§or_sum->sum); | ||
| 273 | sector_sum->offset = page_offset(bvec->bv_page) + | ||
| 274 | bvec->bv_offset; | ||
| 275 | |||
| 276 | sector_sum++; | ||
| 277 | bio_index++; | ||
| 278 | total_bytes += bvec->bv_len; | ||
| 279 | this_sum_bytes += bvec->bv_len; | ||
| 280 | bvec++; | ||
| 281 | } | ||
| 282 | this_sum_bytes = 0; | ||
| 283 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
| 284 | btrfs_put_ordered_extent(ordered); | ||
| 285 | return 0; | ||
| 286 | } | ||
| 287 | |||
| 288 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | ||
| 289 | struct btrfs_root *root, struct inode *inode, | ||
| 290 | struct btrfs_ordered_sum *sums) | ||
| 291 | { | ||
| 292 | u64 objectid = inode->i_ino; | ||
| 293 | u64 offset; | ||
| 294 | int ret; | ||
| 295 | struct btrfs_key file_key; | ||
| 296 | struct btrfs_key found_key; | ||
| 297 | u64 next_offset; | ||
| 298 | u64 total_bytes = 0; | ||
| 299 | int found_next; | ||
| 300 | struct btrfs_path *path; | ||
| 301 | struct btrfs_csum_item *item; | ||
| 302 | struct btrfs_csum_item *item_end; | ||
| 303 | struct extent_buffer *leaf = NULL; | ||
| 304 | u64 csum_offset; | ||
| 305 | struct btrfs_sector_sum *sector_sum; | ||
| 306 | u32 nritems; | ||
| 307 | u32 ins_size; | ||
| 308 | char *eb_map; | ||
| 309 | char *eb_token; | ||
| 310 | unsigned long map_len; | ||
| 311 | unsigned long map_start; | ||
| 312 | |||
| 313 | path = btrfs_alloc_path(); | ||
| 314 | BUG_ON(!path); | ||
| 315 | sector_sum = sums->sums; | ||
| 316 | again: | ||
| 317 | next_offset = (u64)-1; | ||
| 318 | found_next = 0; | ||
| 319 | offset = sector_sum->offset; | ||
| 320 | file_key.objectid = objectid; | ||
| 321 | file_key.offset = offset; | ||
| 322 | btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); | ||
| 323 | |||
| 324 | mutex_lock(&BTRFS_I(inode)->csum_mutex); | ||
| 325 | item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); | ||
| 326 | if (!IS_ERR(item)) { | ||
| 327 | leaf = path->nodes[0]; | ||
| 328 | ret = 0; | ||
| 329 | goto found; | ||
| 330 | } | ||
| 331 | ret = PTR_ERR(item); | ||
| 332 | if (ret == -EFBIG) { | ||
| 333 | u32 item_size; | ||
| 334 | /* we found one, but it isn't big enough yet */ | ||
| 335 | leaf = path->nodes[0]; | ||
| 336 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 337 | if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) { | ||
| 338 | /* already at max size, make a new one */ | ||
| 339 | goto insert; | ||
| 340 | } | ||
| 341 | } else { | ||
| 342 | int slot = path->slots[0] + 1; | ||
| 343 | /* we didn't find a csum item, insert one */ | ||
| 344 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 345 | if (path->slots[0] >= nritems - 1) { | ||
| 346 | ret = btrfs_next_leaf(root, path); | ||
| 347 | if (ret == 1) | ||
| 348 | found_next = 1; | ||
| 349 | if (ret != 0) | ||
| 350 | goto insert; | ||
| 351 | slot = 0; | ||
| 352 | } | ||
| 353 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); | ||
| 354 | if (found_key.objectid != objectid || | ||
| 355 | found_key.type != BTRFS_CSUM_ITEM_KEY) { | ||
| 356 | found_next = 1; | ||
| 357 | goto insert; | ||
| 358 | } | ||
| 359 | next_offset = found_key.offset; | ||
| 360 | found_next = 1; | ||
| 361 | goto insert; | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * at this point, we know the tree has an item, but it isn't big | ||
| 366 | * enough yet to put our csum in. Grow it | ||
| 367 | */ | ||
| 368 | btrfs_release_path(root, path); | ||
| 369 | ret = btrfs_search_slot(trans, root, &file_key, path, | ||
| 370 | BTRFS_CRC32_SIZE, 1); | ||
| 371 | if (ret < 0) | ||
| 372 | goto fail_unlock; | ||
| 373 | if (ret == 0) { | ||
| 374 | BUG(); | ||
| 375 | } | ||
| 376 | if (path->slots[0] == 0) { | ||
| 377 | goto insert; | ||
| 378 | } | ||
| 379 | path->slots[0]--; | ||
| 380 | leaf = path->nodes[0]; | ||
| 381 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 382 | csum_offset = (offset - found_key.offset) >> | ||
| 383 | root->fs_info->sb->s_blocksize_bits; | ||
| 384 | if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || | ||
| 385 | found_key.objectid != objectid || | ||
| 386 | csum_offset >= MAX_CSUM_ITEMS(root)) { | ||
| 387 | goto insert; | ||
| 388 | } | ||
| 389 | if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / | ||
| 390 | BTRFS_CRC32_SIZE) { | ||
| 391 | u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; | ||
| 392 | diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 393 | if (diff != BTRFS_CRC32_SIZE) | ||
| 394 | goto insert; | ||
| 395 | ret = btrfs_extend_item(trans, root, path, diff); | ||
| 396 | BUG_ON(ret); | ||
| 397 | goto csum; | ||
| 398 | } | ||
| 399 | |||
| 400 | insert: | ||
| 401 | btrfs_release_path(root, path); | ||
| 402 | csum_offset = 0; | ||
| 403 | if (found_next) { | ||
| 404 | u64 tmp = min((u64)i_size_read(inode), next_offset); | ||
| 405 | tmp -= offset & ~((u64)root->sectorsize -1); | ||
| 406 | tmp >>= root->fs_info->sb->s_blocksize_bits; | ||
| 407 | tmp = max((u64)1, tmp); | ||
| 408 | tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root)); | ||
| 409 | ins_size = BTRFS_CRC32_SIZE * tmp; | ||
| 410 | } else { | ||
| 411 | ins_size = BTRFS_CRC32_SIZE; | ||
| 412 | } | ||
| 413 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | ||
| 414 | ins_size); | ||
| 415 | if (ret < 0) | ||
| 416 | goto fail_unlock; | ||
| 417 | if (ret != 0) { | ||
| 418 | WARN_ON(1); | ||
| 419 | goto fail_unlock; | ||
| 420 | } | ||
| 421 | csum: | ||
| 422 | leaf = path->nodes[0]; | ||
| 423 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | ||
| 424 | ret = 0; | ||
| 425 | item = (struct btrfs_csum_item *)((unsigned char *)item + | ||
| 426 | csum_offset * BTRFS_CRC32_SIZE); | ||
| 427 | found: | ||
| 428 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | ||
| 429 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | ||
| 430 | btrfs_item_size_nr(leaf, path->slots[0])); | ||
| 431 | eb_token = NULL; | ||
| 432 | mutex_unlock(&BTRFS_I(inode)->csum_mutex); | ||
| 433 | cond_resched(); | ||
| 434 | next_sector: | ||
| 435 | |||
| 436 | if (!eb_token || | ||
| 437 | (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { | ||
| 438 | int err; | ||
| 439 | |||
| 440 | if (eb_token) | ||
| 441 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
| 442 | eb_token = NULL; | ||
| 443 | err = map_private_extent_buffer(leaf, (unsigned long)item, | ||
| 444 | BTRFS_CRC32_SIZE, | ||
| 445 | &eb_token, &eb_map, | ||
| 446 | &map_start, &map_len, KM_USER1); | ||
| 447 | if (err) | ||
| 448 | eb_token = NULL; | ||
| 449 | } | ||
| 450 | if (eb_token) { | ||
| 451 | memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), | ||
| 452 | §or_sum->sum, BTRFS_CRC32_SIZE); | ||
| 453 | } else { | ||
| 454 | write_extent_buffer(leaf, §or_sum->sum, | ||
| 455 | (unsigned long)item, BTRFS_CRC32_SIZE); | ||
| 456 | } | ||
| 457 | |||
| 458 | total_bytes += root->sectorsize; | ||
| 459 | sector_sum++; | ||
| 460 | if (total_bytes < sums->len) { | ||
| 461 | item = (struct btrfs_csum_item *)((char *)item + | ||
| 462 | BTRFS_CRC32_SIZE); | ||
| 463 | if (item < item_end && offset + PAGE_CACHE_SIZE == | ||
| 464 | sector_sum->offset) { | ||
| 465 | offset = sector_sum->offset; | ||
| 466 | goto next_sector; | ||
| 467 | } | ||
| 468 | } | ||
| 469 | if (eb_token) { | ||
| 470 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
| 471 | eb_token = NULL; | ||
| 472 | } | ||
| 473 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 474 | cond_resched(); | ||
| 475 | if (total_bytes < sums->len) { | ||
| 476 | btrfs_release_path(root, path); | ||
| 477 | goto again; | ||
| 478 | } | ||
| 479 | out: | ||
| 480 | btrfs_free_path(path); | ||
| 481 | return ret; | ||
| 482 | |||
| 483 | fail_unlock: | ||
| 484 | mutex_unlock(&BTRFS_I(inode)->csum_mutex); | ||
| 485 | goto out; | ||
| 486 | } | ||
| 487 | |||
| 488 | int btrfs_csum_truncate(struct btrfs_trans_handle *trans, | ||
| 489 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 490 | u64 isize) | ||
| 491 | { | ||
| 492 | struct btrfs_key key; | ||
| 493 | struct extent_buffer *leaf = path->nodes[0]; | ||
| 494 | int slot = path->slots[0]; | ||
| 495 | int ret; | ||
| 496 | u32 new_item_size; | ||
| 497 | u64 new_item_span; | ||
| 498 | u64 blocks; | ||
| 499 | |||
| 500 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 501 | if (isize <= key.offset) | ||
| 502 | return 0; | ||
| 503 | new_item_span = isize - key.offset; | ||
| 504 | blocks = (new_item_span + root->sectorsize - 1) >> | ||
| 505 | root->fs_info->sb->s_blocksize_bits; | ||
| 506 | new_item_size = blocks * BTRFS_CRC32_SIZE; | ||
| 507 | if (new_item_size >= btrfs_item_size_nr(leaf, slot)) | ||
| 508 | return 0; | ||
| 509 | ret = btrfs_truncate_item(trans, root, path, new_item_size, 1); | ||
| 510 | BUG_ON(ret); | ||
| 511 | return ret; | ||
| 512 | } | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c new file mode 100644 index 00000000000..69abbe19add --- /dev/null +++ b/fs/btrfs/file.c | |||
| @@ -0,0 +1,1178 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/fs.h> | ||
| 20 | #include <linux/pagemap.h> | ||
| 21 | #include <linux/highmem.h> | ||
| 22 | #include <linux/time.h> | ||
| 23 | #include <linux/init.h> | ||
| 24 | #include <linux/string.h> | ||
| 25 | #include <linux/smp_lock.h> | ||
| 26 | #include <linux/backing-dev.h> | ||
| 27 | #include <linux/mpage.h> | ||
| 28 | #include <linux/swap.h> | ||
| 29 | #include <linux/writeback.h> | ||
| 30 | #include <linux/statfs.h> | ||
| 31 | #include <linux/compat.h> | ||
| 32 | #include <linux/version.h> | ||
| 33 | #include "ctree.h" | ||
| 34 | #include "disk-io.h" | ||
| 35 | #include "transaction.h" | ||
| 36 | #include "btrfs_inode.h" | ||
| 37 | #include "ioctl.h" | ||
| 38 | #include "print-tree.h" | ||
| 39 | #include "tree-log.h" | ||
| 40 | #include "locking.h" | ||
| 41 | #include "compat.h" | ||
| 42 | |||
| 43 | |||
| 44 | /* simple helper to fault in pages and copy. This should go away | ||
| 45 | * and be replaced with calls into generic code. | ||
| 46 | */ | ||
| 47 | static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, | ||
| 48 | int write_bytes, | ||
| 49 | struct page **prepared_pages, | ||
| 50 | const char __user * buf) | ||
| 51 | { | ||
| 52 | long page_fault = 0; | ||
| 53 | int i; | ||
| 54 | int offset = pos & (PAGE_CACHE_SIZE - 1); | ||
| 55 | |||
| 56 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | ||
| 57 | size_t count = min_t(size_t, | ||
| 58 | PAGE_CACHE_SIZE - offset, write_bytes); | ||
| 59 | struct page *page = prepared_pages[i]; | ||
| 60 | fault_in_pages_readable(buf, count); | ||
| 61 | |||
| 62 | /* Copy data from userspace to the current page */ | ||
| 63 | kmap(page); | ||
| 64 | page_fault = __copy_from_user(page_address(page) + offset, | ||
| 65 | buf, count); | ||
| 66 | /* Flush processor's dcache for this page */ | ||
| 67 | flush_dcache_page(page); | ||
| 68 | kunmap(page); | ||
| 69 | buf += count; | ||
| 70 | write_bytes -= count; | ||
| 71 | |||
| 72 | if (page_fault) | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | return page_fault ? -EFAULT : 0; | ||
| 76 | } | ||
| 77 | |||
| 78 | /* | ||
| 79 | * unlocks pages after btrfs_file_write is done with them | ||
| 80 | */ | ||
| 81 | static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) | ||
| 82 | { | ||
| 83 | size_t i; | ||
| 84 | for (i = 0; i < num_pages; i++) { | ||
| 85 | if (!pages[i]) | ||
| 86 | break; | ||
| 87 | /* page checked is some magic around finding pages that | ||
| 88 | * have been modified without going through btrfs_set_page_dirty | ||
| 89 | * clear it here | ||
| 90 | */ | ||
| 91 | ClearPageChecked(pages[i]); | ||
| 92 | unlock_page(pages[i]); | ||
| 93 | mark_page_accessed(pages[i]); | ||
| 94 | page_cache_release(pages[i]); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 98 | /* this does all the hard work for inserting an inline extent into | ||
| 99 | * the btree. Any existing inline extent is extended as required to make room, | ||
| 100 | * otherwise things are inserted as required into the btree | ||
| 101 | */ | ||
| 102 | static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, | ||
| 103 | struct btrfs_root *root, struct inode *inode, | ||
| 104 | u64 offset, size_t size, | ||
| 105 | struct page **pages, size_t page_offset, | ||
| 106 | int num_pages) | ||
| 107 | { | ||
| 108 | struct btrfs_key key; | ||
| 109 | struct btrfs_path *path; | ||
| 110 | struct extent_buffer *leaf; | ||
| 111 | char *kaddr; | ||
| 112 | unsigned long ptr; | ||
| 113 | struct btrfs_file_extent_item *ei; | ||
| 114 | struct page *page; | ||
| 115 | u32 datasize; | ||
| 116 | int err = 0; | ||
| 117 | int ret; | ||
| 118 | int i; | ||
| 119 | ssize_t cur_size; | ||
| 120 | |||
| 121 | path = btrfs_alloc_path(); | ||
| 122 | if (!path) | ||
| 123 | return -ENOMEM; | ||
| 124 | |||
| 125 | btrfs_set_trans_block_group(trans, inode); | ||
| 126 | |||
| 127 | key.objectid = inode->i_ino; | ||
| 128 | key.offset = offset; | ||
| 129 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | ||
| 130 | |||
| 131 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
| 132 | if (ret < 0) { | ||
| 133 | err = ret; | ||
| 134 | goto fail; | ||
| 135 | } | ||
| 136 | if (ret == 1) { | ||
| 137 | struct btrfs_key found_key; | ||
| 138 | |||
| 139 | if (path->slots[0] == 0) | ||
| 140 | goto insert; | ||
| 141 | |||
| 142 | path->slots[0]--; | ||
| 143 | leaf = path->nodes[0]; | ||
| 144 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 145 | |||
| 146 | if (found_key.objectid != inode->i_ino) | ||
| 147 | goto insert; | ||
| 148 | |||
| 149 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 150 | goto insert; | ||
| 151 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 152 | struct btrfs_file_extent_item); | ||
| 153 | |||
| 154 | if (btrfs_file_extent_type(leaf, ei) != | ||
| 155 | BTRFS_FILE_EXTENT_INLINE) { | ||
| 156 | goto insert; | ||
| 157 | } | ||
| 158 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 159 | ret = 0; | ||
| 160 | } | ||
| 161 | if (ret == 0) { | ||
| 162 | u32 found_size; | ||
| 163 | u64 found_end; | ||
| 164 | |||
| 165 | leaf = path->nodes[0]; | ||
| 166 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 167 | struct btrfs_file_extent_item); | ||
| 168 | |||
| 169 | if (btrfs_file_extent_type(leaf, ei) != | ||
| 170 | BTRFS_FILE_EXTENT_INLINE) { | ||
| 171 | err = ret; | ||
| 172 | btrfs_print_leaf(root, leaf); | ||
| 173 | printk("found wasn't inline offset %Lu inode %lu\n", | ||
| 174 | offset, inode->i_ino); | ||
| 175 | goto fail; | ||
| 176 | } | ||
| 177 | found_size = btrfs_file_extent_inline_len(leaf, | ||
| 178 | btrfs_item_nr(leaf, path->slots[0])); | ||
| 179 | found_end = key.offset + found_size; | ||
| 180 | |||
| 181 | if (found_end < offset + size) { | ||
| 182 | btrfs_release_path(root, path); | ||
| 183 | ret = btrfs_search_slot(trans, root, &key, path, | ||
| 184 | offset + size - found_end, 1); | ||
| 185 | BUG_ON(ret != 0); | ||
| 186 | |||
| 187 | ret = btrfs_extend_item(trans, root, path, | ||
| 188 | offset + size - found_end); | ||
| 189 | if (ret) { | ||
| 190 | err = ret; | ||
| 191 | goto fail; | ||
| 192 | } | ||
| 193 | leaf = path->nodes[0]; | ||
| 194 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 195 | struct btrfs_file_extent_item); | ||
| 196 | inode_add_bytes(inode, offset + size - found_end); | ||
| 197 | } | ||
| 198 | if (found_end < offset) { | ||
| 199 | ptr = btrfs_file_extent_inline_start(ei) + found_size; | ||
| 200 | memset_extent_buffer(leaf, 0, ptr, offset - found_end); | ||
| 201 | } | ||
| 202 | } else { | ||
| 203 | insert: | ||
| 204 | btrfs_release_path(root, path); | ||
| 205 | datasize = offset + size - key.offset; | ||
| 206 | inode_add_bytes(inode, datasize); | ||
| 207 | datasize = btrfs_file_extent_calc_inline_size(datasize); | ||
| 208 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 209 | datasize); | ||
| 210 | if (ret) { | ||
| 211 | err = ret; | ||
| 212 | printk("got bad ret %d\n", ret); | ||
| 213 | goto fail; | ||
| 214 | } | ||
| 215 | leaf = path->nodes[0]; | ||
| 216 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 217 | struct btrfs_file_extent_item); | ||
| 218 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); | ||
| 219 | btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); | ||
| 220 | } | ||
| 221 | ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset; | ||
| 222 | |||
| 223 | cur_size = size; | ||
| 224 | i = 0; | ||
| 225 | while (size > 0) { | ||
| 226 | page = pages[i]; | ||
| 227 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 228 | cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size); | ||
| 229 | write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); | ||
| 230 | kunmap_atomic(kaddr, KM_USER0); | ||
| 231 | page_offset = 0; | ||
| 232 | ptr += cur_size; | ||
| 233 | size -= cur_size; | ||
| 234 | if (i >= num_pages) { | ||
| 235 | printk("i %d num_pages %d\n", i, num_pages); | ||
| 236 | } | ||
| 237 | i++; | ||
| 238 | } | ||
| 239 | btrfs_mark_buffer_dirty(leaf); | ||
| 240 | fail: | ||
| 241 | btrfs_free_path(path); | ||
| 242 | return err; | ||
| 243 | } | ||
| 244 | |||
| 245 | /* | ||
| 246 | * after copy_from_user, pages need to be dirtied and we need to make | ||
| 247 | * sure holes are created between the current EOF and the start of | ||
| 248 | * any next extents (if required). | ||
| 249 | * | ||
| 250 | * this also makes the decision about creating an inline extent vs | ||
| 251 | * doing real data extents, marking pages dirty and delalloc as required. | ||
| 252 | */ | ||
| 253 | static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | ||
| 254 | struct btrfs_root *root, | ||
| 255 | struct file *file, | ||
| 256 | struct page **pages, | ||
| 257 | size_t num_pages, | ||
| 258 | loff_t pos, | ||
| 259 | size_t write_bytes) | ||
| 260 | { | ||
| 261 | int err = 0; | ||
| 262 | int i; | ||
| 263 | struct inode *inode = fdentry(file)->d_inode; | ||
| 264 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 265 | u64 hint_byte; | ||
| 266 | u64 num_bytes; | ||
| 267 | u64 start_pos; | ||
| 268 | u64 end_of_last_block; | ||
| 269 | u64 end_pos = pos + write_bytes; | ||
| 270 | u64 inline_size; | ||
| 271 | int did_inline = 0; | ||
| 272 | loff_t isize = i_size_read(inode); | ||
| 273 | |||
| 274 | start_pos = pos & ~((u64)root->sectorsize - 1); | ||
| 275 | num_bytes = (write_bytes + pos - start_pos + | ||
| 276 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | ||
| 277 | |||
| 278 | end_of_last_block = start_pos + num_bytes - 1; | ||
| 279 | |||
| 280 | lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 281 | trans = btrfs_join_transaction(root, 1); | ||
| 282 | if (!trans) { | ||
| 283 | err = -ENOMEM; | ||
| 284 | goto out_unlock; | ||
| 285 | } | ||
| 286 | btrfs_set_trans_block_group(trans, inode); | ||
| 287 | hint_byte = 0; | ||
| 288 | |||
| 289 | if ((end_of_last_block & 4095) == 0) { | ||
| 290 | printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); | ||
| 291 | } | ||
| 292 | set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 293 | |||
| 294 | /* FIXME...EIEIO, ENOSPC and more */ | ||
| 295 | /* insert any holes we need to create */ | ||
| 296 | if (isize < start_pos) { | ||
| 297 | u64 last_pos_in_file; | ||
| 298 | u64 hole_size; | ||
| 299 | u64 mask = root->sectorsize - 1; | ||
| 300 | last_pos_in_file = (isize + mask) & ~mask; | ||
| 301 | hole_size = (start_pos - last_pos_in_file + mask) & ~mask; | ||
| 302 | if (hole_size > 0) { | ||
| 303 | btrfs_wait_ordered_range(inode, last_pos_in_file, | ||
| 304 | last_pos_in_file + hole_size); | ||
| 305 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 306 | err = btrfs_drop_extents(trans, root, inode, | ||
| 307 | last_pos_in_file, | ||
| 308 | last_pos_in_file + hole_size, | ||
| 309 | last_pos_in_file, | ||
| 310 | &hint_byte); | ||
| 311 | if (err) | ||
| 312 | goto failed; | ||
| 313 | |||
| 314 | err = btrfs_insert_file_extent(trans, root, | ||
| 315 | inode->i_ino, | ||
| 316 | last_pos_in_file, | ||
| 317 | 0, 0, hole_size, 0); | ||
| 318 | btrfs_drop_extent_cache(inode, last_pos_in_file, | ||
| 319 | last_pos_in_file + hole_size - 1, 0); | ||
| 320 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 321 | btrfs_check_file(root, inode); | ||
| 322 | } | ||
| 323 | if (err) | ||
| 324 | goto failed; | ||
| 325 | } | ||
| 326 | |||
| 327 | /* | ||
| 328 | * either allocate an extent for the new bytes or setup the key | ||
| 329 | * to show we are doing inline data in the extent | ||
| 330 | */ | ||
| 331 | inline_size = end_pos; | ||
| 332 | if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || | ||
| 333 | inline_size > root->fs_info->max_inline || | ||
| 334 | (inline_size & (root->sectorsize -1)) == 0 || | ||
| 335 | inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { | ||
| 336 | /* check for reserved extents on each page, we don't want | ||
| 337 | * to reset the delalloc bit on things that already have | ||
| 338 | * extents reserved. | ||
| 339 | */ | ||
| 340 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
| 341 | for (i = 0; i < num_pages; i++) { | ||
| 342 | struct page *p = pages[i]; | ||
| 343 | SetPageUptodate(p); | ||
| 344 | ClearPageChecked(p); | ||
| 345 | set_page_dirty(p); | ||
| 346 | } | ||
| 347 | } else { | ||
| 348 | u64 aligned_end; | ||
| 349 | /* step one, delete the existing extents in this range */ | ||
| 350 | aligned_end = (pos + write_bytes + root->sectorsize - 1) & | ||
| 351 | ~((u64)root->sectorsize - 1); | ||
| 352 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 353 | err = btrfs_drop_extents(trans, root, inode, start_pos, | ||
| 354 | aligned_end, aligned_end, &hint_byte); | ||
| 355 | if (err) | ||
| 356 | goto failed; | ||
| 357 | if (isize > inline_size) | ||
| 358 | inline_size = min_t(u64, isize, aligned_end); | ||
| 359 | inline_size -= start_pos; | ||
| 360 | err = insert_inline_extent(trans, root, inode, start_pos, | ||
| 361 | inline_size, pages, 0, num_pages); | ||
| 362 | btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0); | ||
| 363 | BUG_ON(err); | ||
| 364 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 365 | |||
| 366 | /* | ||
| 367 | * an ugly way to do all the prop accounting around | ||
| 368 | * the page bits and mapping tags | ||
| 369 | */ | ||
| 370 | set_page_writeback(pages[0]); | ||
| 371 | end_page_writeback(pages[0]); | ||
| 372 | did_inline = 1; | ||
| 373 | } | ||
| 374 | if (end_pos > isize) { | ||
| 375 | i_size_write(inode, end_pos); | ||
| 376 | if (did_inline) | ||
| 377 | BTRFS_I(inode)->disk_i_size = end_pos; | ||
| 378 | btrfs_update_inode(trans, root, inode); | ||
| 379 | } | ||
| 380 | failed: | ||
| 381 | err = btrfs_end_transaction(trans, root); | ||
| 382 | out_unlock: | ||
| 383 | unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 384 | return err; | ||
| 385 | } | ||
| 386 | |||
| 387 | /* | ||
| 388 | * this drops all the extents in the cache that intersect the range | ||
| 389 | * [start, end]. Existing extents are split as required. | ||
| 390 | */ | ||
| 391 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | ||
| 392 | int skip_pinned) | ||
| 393 | { | ||
| 394 | struct extent_map *em; | ||
| 395 | struct extent_map *split = NULL; | ||
| 396 | struct extent_map *split2 = NULL; | ||
| 397 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 398 | u64 len = end - start + 1; | ||
| 399 | int ret; | ||
| 400 | int testend = 1; | ||
| 401 | unsigned long flags; | ||
| 402 | |||
| 403 | WARN_ON(end < start); | ||
| 404 | if (end == (u64)-1) { | ||
| 405 | len = (u64)-1; | ||
| 406 | testend = 0; | ||
| 407 | } | ||
| 408 | while(1) { | ||
| 409 | if (!split) | ||
| 410 | split = alloc_extent_map(GFP_NOFS); | ||
| 411 | if (!split2) | ||
| 412 | split2 = alloc_extent_map(GFP_NOFS); | ||
| 413 | |||
| 414 | spin_lock(&em_tree->lock); | ||
| 415 | em = lookup_extent_mapping(em_tree, start, len); | ||
| 416 | if (!em) { | ||
| 417 | spin_unlock(&em_tree->lock); | ||
| 418 | break; | ||
| 419 | } | ||
| 420 | flags = em->flags; | ||
| 421 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | ||
| 422 | spin_unlock(&em_tree->lock); | ||
| 423 | if (em->start <= start && | ||
| 424 | (!testend || em->start + em->len >= start + len)) { | ||
| 425 | free_extent_map(em); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | if (start < em->start) { | ||
| 429 | len = em->start - start; | ||
| 430 | } else { | ||
| 431 | len = start + len - (em->start + em->len); | ||
| 432 | start = em->start + em->len; | ||
| 433 | } | ||
| 434 | free_extent_map(em); | ||
| 435 | continue; | ||
| 436 | } | ||
| 437 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 438 | remove_extent_mapping(em_tree, em); | ||
| 439 | |||
| 440 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | ||
| 441 | em->start < start) { | ||
| 442 | split->start = em->start; | ||
| 443 | split->len = start - em->start; | ||
| 444 | split->block_start = em->block_start; | ||
| 445 | split->bdev = em->bdev; | ||
| 446 | split->flags = flags; | ||
| 447 | ret = add_extent_mapping(em_tree, split); | ||
| 448 | BUG_ON(ret); | ||
| 449 | free_extent_map(split); | ||
| 450 | split = split2; | ||
| 451 | split2 = NULL; | ||
| 452 | } | ||
| 453 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | ||
| 454 | testend && em->start + em->len > start + len) { | ||
| 455 | u64 diff = start + len - em->start; | ||
| 456 | |||
| 457 | split->start = start + len; | ||
| 458 | split->len = em->start + em->len - (start + len); | ||
| 459 | split->bdev = em->bdev; | ||
| 460 | split->flags = flags; | ||
| 461 | |||
| 462 | split->block_start = em->block_start + diff; | ||
| 463 | |||
| 464 | ret = add_extent_mapping(em_tree, split); | ||
| 465 | BUG_ON(ret); | ||
| 466 | free_extent_map(split); | ||
| 467 | split = NULL; | ||
| 468 | } | ||
| 469 | spin_unlock(&em_tree->lock); | ||
| 470 | |||
| 471 | /* once for us */ | ||
| 472 | free_extent_map(em); | ||
| 473 | /* once for the tree*/ | ||
| 474 | free_extent_map(em); | ||
| 475 | } | ||
| 476 | if (split) | ||
| 477 | free_extent_map(split); | ||
| 478 | if (split2) | ||
| 479 | free_extent_map(split2); | ||
| 480 | return 0; | ||
| 481 | } | ||
| 482 | |||
| 483 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode) | ||
| 484 | { | ||
| 485 | return 0; | ||
| 486 | #if 0 | ||
| 487 | struct btrfs_path *path; | ||
| 488 | struct btrfs_key found_key; | ||
| 489 | struct extent_buffer *leaf; | ||
| 490 | struct btrfs_file_extent_item *extent; | ||
| 491 | u64 last_offset = 0; | ||
| 492 | int nritems; | ||
| 493 | int slot; | ||
| 494 | int found_type; | ||
| 495 | int ret; | ||
| 496 | int err = 0; | ||
| 497 | u64 extent_end = 0; | ||
| 498 | |||
| 499 | path = btrfs_alloc_path(); | ||
| 500 | ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, | ||
| 501 | last_offset, 0); | ||
| 502 | while(1) { | ||
| 503 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 504 | if (path->slots[0] >= nritems) { | ||
| 505 | ret = btrfs_next_leaf(root, path); | ||
| 506 | if (ret) | ||
| 507 | goto out; | ||
| 508 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 509 | } | ||
| 510 | slot = path->slots[0]; | ||
| 511 | leaf = path->nodes[0]; | ||
| 512 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 513 | if (found_key.objectid != inode->i_ino) | ||
| 514 | break; | ||
| 515 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 516 | goto out; | ||
| 517 | |||
| 518 | if (found_key.offset < last_offset) { | ||
| 519 | WARN_ON(1); | ||
| 520 | btrfs_print_leaf(root, leaf); | ||
| 521 | printk("inode %lu found offset %Lu expected %Lu\n", | ||
| 522 | inode->i_ino, found_key.offset, last_offset); | ||
| 523 | err = 1; | ||
| 524 | goto out; | ||
| 525 | } | ||
| 526 | extent = btrfs_item_ptr(leaf, slot, | ||
| 527 | struct btrfs_file_extent_item); | ||
| 528 | found_type = btrfs_file_extent_type(leaf, extent); | ||
| 529 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
| 530 | extent_end = found_key.offset + | ||
| 531 | btrfs_file_extent_num_bytes(leaf, extent); | ||
| 532 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 533 | struct btrfs_item *item; | ||
| 534 | item = btrfs_item_nr(leaf, slot); | ||
| 535 | extent_end = found_key.offset + | ||
| 536 | btrfs_file_extent_inline_len(leaf, item); | ||
| 537 | extent_end = (extent_end + root->sectorsize - 1) & | ||
| 538 | ~((u64)root->sectorsize -1 ); | ||
| 539 | } | ||
| 540 | last_offset = extent_end; | ||
| 541 | path->slots[0]++; | ||
| 542 | } | ||
| 543 | if (0 && last_offset < inode->i_size) { | ||
| 544 | WARN_ON(1); | ||
| 545 | btrfs_print_leaf(root, leaf); | ||
| 546 | printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino, | ||
| 547 | last_offset, inode->i_size); | ||
| 548 | err = 1; | ||
| 549 | |||
| 550 | } | ||
| 551 | out: | ||
| 552 | btrfs_free_path(path); | ||
| 553 | return err; | ||
| 554 | #endif | ||
| 555 | } | ||
| 556 | |||
| 557 | /* | ||
| 558 | * this is very complex, but the basic idea is to drop all extents | ||
| 559 | * in the range start - end. hint_block is filled in with a block number | ||
| 560 | * that would be a good hint to the block allocator for this file. | ||
| 561 | * | ||
| 562 | * If an extent intersects the range but is not entirely inside the range | ||
| 563 | * it is either truncated or split. Anything entirely inside the range | ||
| 564 | * is deleted from the tree. | ||
| 565 | * | ||
| 566 | * inline_limit is used to tell this code which offsets in the file to keep | ||
| 567 | * if they contain inline extents. | ||
| 568 | */ | ||
| 569 | int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||
| 570 | struct btrfs_root *root, struct inode *inode, | ||
| 571 | u64 start, u64 end, u64 inline_limit, u64 *hint_byte) | ||
| 572 | { | ||
| 573 | u64 extent_end = 0; | ||
| 574 | u64 search_start = start; | ||
| 575 | u64 leaf_start; | ||
| 576 | u64 root_gen; | ||
| 577 | u64 root_owner; | ||
| 578 | struct extent_buffer *leaf; | ||
| 579 | struct btrfs_file_extent_item *extent; | ||
| 580 | struct btrfs_path *path; | ||
| 581 | struct btrfs_key key; | ||
| 582 | struct btrfs_file_extent_item old; | ||
| 583 | int keep; | ||
| 584 | int slot; | ||
| 585 | int bookend; | ||
| 586 | int found_type; | ||
| 587 | int found_extent; | ||
| 588 | int found_inline; | ||
| 589 | int recow; | ||
| 590 | int ret; | ||
| 591 | |||
| 592 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
| 593 | |||
| 594 | path = btrfs_alloc_path(); | ||
| 595 | if (!path) | ||
| 596 | return -ENOMEM; | ||
| 597 | while(1) { | ||
| 598 | recow = 0; | ||
| 599 | btrfs_release_path(root, path); | ||
| 600 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | ||
| 601 | search_start, -1); | ||
| 602 | if (ret < 0) | ||
| 603 | goto out; | ||
| 604 | if (ret > 0) { | ||
| 605 | if (path->slots[0] == 0) { | ||
| 606 | ret = 0; | ||
| 607 | goto out; | ||
| 608 | } | ||
| 609 | path->slots[0]--; | ||
| 610 | } | ||
| 611 | next_slot: | ||
| 612 | keep = 0; | ||
| 613 | bookend = 0; | ||
| 614 | found_extent = 0; | ||
| 615 | found_inline = 0; | ||
| 616 | leaf_start = 0; | ||
| 617 | root_gen = 0; | ||
| 618 | root_owner = 0; | ||
| 619 | extent = NULL; | ||
| 620 | leaf = path->nodes[0]; | ||
| 621 | slot = path->slots[0]; | ||
| 622 | ret = 0; | ||
| 623 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 624 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && | ||
| 625 | key.offset >= end) { | ||
| 626 | goto out; | ||
| 627 | } | ||
| 628 | if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || | ||
| 629 | key.objectid != inode->i_ino) { | ||
| 630 | goto out; | ||
| 631 | } | ||
| 632 | if (recow) { | ||
| 633 | search_start = key.offset; | ||
| 634 | continue; | ||
| 635 | } | ||
| 636 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { | ||
| 637 | extent = btrfs_item_ptr(leaf, slot, | ||
| 638 | struct btrfs_file_extent_item); | ||
| 639 | found_type = btrfs_file_extent_type(leaf, extent); | ||
| 640 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
| 641 | extent_end = | ||
| 642 | btrfs_file_extent_disk_bytenr(leaf, | ||
| 643 | extent); | ||
| 644 | if (extent_end) | ||
| 645 | *hint_byte = extent_end; | ||
| 646 | |||
| 647 | extent_end = key.offset + | ||
| 648 | btrfs_file_extent_num_bytes(leaf, extent); | ||
| 649 | found_extent = 1; | ||
| 650 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 651 | struct btrfs_item *item; | ||
| 652 | item = btrfs_item_nr(leaf, slot); | ||
| 653 | found_inline = 1; | ||
| 654 | extent_end = key.offset + | ||
| 655 | btrfs_file_extent_inline_len(leaf, item); | ||
| 656 | } | ||
| 657 | } else { | ||
| 658 | extent_end = search_start; | ||
| 659 | } | ||
| 660 | |||
| 661 | /* we found nothing we can drop */ | ||
| 662 | if ((!found_extent && !found_inline) || | ||
| 663 | search_start >= extent_end) { | ||
| 664 | int nextret; | ||
| 665 | u32 nritems; | ||
| 666 | nritems = btrfs_header_nritems(leaf); | ||
| 667 | if (slot >= nritems - 1) { | ||
| 668 | nextret = btrfs_next_leaf(root, path); | ||
| 669 | if (nextret) | ||
| 670 | goto out; | ||
| 671 | recow = 1; | ||
| 672 | } else { | ||
| 673 | path->slots[0]++; | ||
| 674 | } | ||
| 675 | goto next_slot; | ||
| 676 | } | ||
| 677 | |||
| 678 | if (found_inline) { | ||
| 679 | u64 mask = root->sectorsize - 1; | ||
| 680 | search_start = (extent_end + mask) & ~mask; | ||
| 681 | } else | ||
| 682 | search_start = extent_end; | ||
| 683 | if (end <= extent_end && start >= key.offset && found_inline) { | ||
| 684 | *hint_byte = EXTENT_MAP_INLINE; | ||
| 685 | goto out; | ||
| 686 | } | ||
| 687 | |||
| 688 | if (found_extent) { | ||
| 689 | read_extent_buffer(leaf, &old, (unsigned long)extent, | ||
| 690 | sizeof(old)); | ||
| 691 | root_gen = btrfs_header_generation(leaf); | ||
| 692 | root_owner = btrfs_header_owner(leaf); | ||
| 693 | leaf_start = leaf->start; | ||
| 694 | } | ||
| 695 | |||
| 696 | if (end < extent_end && end >= key.offset) { | ||
| 697 | bookend = 1; | ||
| 698 | if (found_inline && start <= key.offset) | ||
| 699 | keep = 1; | ||
| 700 | } | ||
| 701 | /* truncate existing extent */ | ||
| 702 | if (start > key.offset) { | ||
| 703 | u64 new_num; | ||
| 704 | u64 old_num; | ||
| 705 | keep = 1; | ||
| 706 | WARN_ON(start & (root->sectorsize - 1)); | ||
| 707 | if (found_extent) { | ||
| 708 | new_num = start - key.offset; | ||
| 709 | old_num = btrfs_file_extent_num_bytes(leaf, | ||
| 710 | extent); | ||
| 711 | *hint_byte = | ||
| 712 | btrfs_file_extent_disk_bytenr(leaf, | ||
| 713 | extent); | ||
| 714 | if (btrfs_file_extent_disk_bytenr(leaf, | ||
| 715 | extent)) { | ||
| 716 | inode_sub_bytes(inode, old_num - | ||
| 717 | new_num); | ||
| 718 | } | ||
| 719 | btrfs_set_file_extent_num_bytes(leaf, extent, | ||
| 720 | new_num); | ||
| 721 | btrfs_mark_buffer_dirty(leaf); | ||
| 722 | } else if (key.offset < inline_limit && | ||
| 723 | (end > extent_end) && | ||
| 724 | (inline_limit < extent_end)) { | ||
| 725 | u32 new_size; | ||
| 726 | new_size = btrfs_file_extent_calc_inline_size( | ||
| 727 | inline_limit - key.offset); | ||
| 728 | inode_sub_bytes(inode, extent_end - | ||
| 729 | inline_limit); | ||
| 730 | btrfs_truncate_item(trans, root, path, | ||
| 731 | new_size, 1); | ||
| 732 | } | ||
| 733 | } | ||
| 734 | /* delete the entire extent */ | ||
| 735 | if (!keep) { | ||
| 736 | if (found_inline) | ||
| 737 | inode_sub_bytes(inode, extent_end - | ||
| 738 | key.offset); | ||
| 739 | ret = btrfs_del_item(trans, root, path); | ||
| 740 | /* TODO update progress marker and return */ | ||
| 741 | BUG_ON(ret); | ||
| 742 | extent = NULL; | ||
| 743 | btrfs_release_path(root, path); | ||
| 744 | /* the extent will be freed later */ | ||
| 745 | } | ||
| 746 | if (bookend && found_inline && start <= key.offset) { | ||
| 747 | u32 new_size; | ||
| 748 | new_size = btrfs_file_extent_calc_inline_size( | ||
| 749 | extent_end - end); | ||
| 750 | inode_sub_bytes(inode, end - key.offset); | ||
| 751 | ret = btrfs_truncate_item(trans, root, path, | ||
| 752 | new_size, 0); | ||
| 753 | BUG_ON(ret); | ||
| 754 | } | ||
| 755 | /* create bookend, splitting the extent in two */ | ||
| 756 | if (bookend && found_extent) { | ||
| 757 | u64 disk_bytenr; | ||
| 758 | struct btrfs_key ins; | ||
| 759 | ins.objectid = inode->i_ino; | ||
| 760 | ins.offset = end; | ||
| 761 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); | ||
| 762 | btrfs_release_path(root, path); | ||
| 763 | ret = btrfs_insert_empty_item(trans, root, path, &ins, | ||
| 764 | sizeof(*extent)); | ||
| 765 | BUG_ON(ret); | ||
| 766 | |||
| 767 | leaf = path->nodes[0]; | ||
| 768 | extent = btrfs_item_ptr(leaf, path->slots[0], | ||
| 769 | struct btrfs_file_extent_item); | ||
| 770 | write_extent_buffer(leaf, &old, | ||
| 771 | (unsigned long)extent, sizeof(old)); | ||
| 772 | |||
| 773 | btrfs_set_file_extent_offset(leaf, extent, | ||
| 774 | le64_to_cpu(old.offset) + end - key.offset); | ||
| 775 | WARN_ON(le64_to_cpu(old.num_bytes) < | ||
| 776 | (extent_end - end)); | ||
| 777 | btrfs_set_file_extent_num_bytes(leaf, extent, | ||
| 778 | extent_end - end); | ||
| 779 | btrfs_set_file_extent_type(leaf, extent, | ||
| 780 | BTRFS_FILE_EXTENT_REG); | ||
| 781 | |||
| 782 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 783 | |||
| 784 | disk_bytenr = le64_to_cpu(old.disk_bytenr); | ||
| 785 | if (disk_bytenr != 0) { | ||
| 786 | ret = btrfs_inc_extent_ref(trans, root, | ||
| 787 | disk_bytenr, | ||
| 788 | le64_to_cpu(old.disk_num_bytes), | ||
| 789 | leaf->start, | ||
| 790 | root->root_key.objectid, | ||
| 791 | trans->transid, ins.objectid); | ||
| 792 | BUG_ON(ret); | ||
| 793 | } | ||
| 794 | btrfs_release_path(root, path); | ||
| 795 | if (disk_bytenr != 0) { | ||
| 796 | inode_add_bytes(inode, extent_end - end); | ||
| 797 | } | ||
| 798 | } | ||
| 799 | |||
| 800 | if (found_extent && !keep) { | ||
| 801 | u64 disk_bytenr = le64_to_cpu(old.disk_bytenr); | ||
| 802 | |||
| 803 | if (disk_bytenr != 0) { | ||
| 804 | inode_sub_bytes(inode, | ||
| 805 | le64_to_cpu(old.num_bytes)); | ||
| 806 | ret = btrfs_free_extent(trans, root, | ||
| 807 | disk_bytenr, | ||
| 808 | le64_to_cpu(old.disk_num_bytes), | ||
| 809 | leaf_start, root_owner, | ||
| 810 | root_gen, key.objectid, 0); | ||
| 811 | BUG_ON(ret); | ||
| 812 | *hint_byte = disk_bytenr; | ||
| 813 | } | ||
| 814 | } | ||
| 815 | |||
| 816 | if (search_start >= end) { | ||
| 817 | ret = 0; | ||
| 818 | goto out; | ||
| 819 | } | ||
| 820 | } | ||
| 821 | out: | ||
| 822 | btrfs_free_path(path); | ||
| 823 | btrfs_check_file(root, inode); | ||
| 824 | return ret; | ||
| 825 | } | ||
| 826 | |||
| 827 | /* | ||
| 828 | * this gets pages into the page cache and locks them down, it also properly | ||
| 829 | * waits for data=ordered extents to finish before allowing the pages to be | ||
| 830 | * modified. | ||
| 831 | */ | ||
| 832 | static int noinline prepare_pages(struct btrfs_root *root, struct file *file, | ||
| 833 | struct page **pages, size_t num_pages, | ||
| 834 | loff_t pos, unsigned long first_index, | ||
| 835 | unsigned long last_index, size_t write_bytes) | ||
| 836 | { | ||
| 837 | int i; | ||
| 838 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | ||
| 839 | struct inode *inode = fdentry(file)->d_inode; | ||
| 840 | int err = 0; | ||
| 841 | u64 start_pos; | ||
| 842 | u64 last_pos; | ||
| 843 | |||
| 844 | start_pos = pos & ~((u64)root->sectorsize - 1); | ||
| 845 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | ||
| 846 | |||
| 847 | memset(pages, 0, num_pages * sizeof(struct page *)); | ||
| 848 | again: | ||
| 849 | for (i = 0; i < num_pages; i++) { | ||
| 850 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | ||
| 851 | if (!pages[i]) { | ||
| 852 | err = -ENOMEM; | ||
| 853 | BUG_ON(1); | ||
| 854 | } | ||
| 855 | wait_on_page_writeback(pages[i]); | ||
| 856 | } | ||
| 857 | if (start_pos < inode->i_size) { | ||
| 858 | struct btrfs_ordered_extent *ordered; | ||
| 859 | lock_extent(&BTRFS_I(inode)->io_tree, | ||
| 860 | start_pos, last_pos - 1, GFP_NOFS); | ||
| 861 | ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1); | ||
| 862 | if (ordered && | ||
| 863 | ordered->file_offset + ordered->len > start_pos && | ||
| 864 | ordered->file_offset < last_pos) { | ||
| 865 | btrfs_put_ordered_extent(ordered); | ||
| 866 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
| 867 | start_pos, last_pos - 1, GFP_NOFS); | ||
| 868 | for (i = 0; i < num_pages; i++) { | ||
| 869 | unlock_page(pages[i]); | ||
| 870 | page_cache_release(pages[i]); | ||
| 871 | } | ||
| 872 | btrfs_wait_ordered_range(inode, start_pos, | ||
| 873 | last_pos - start_pos); | ||
| 874 | goto again; | ||
| 875 | } | ||
| 876 | if (ordered) | ||
| 877 | btrfs_put_ordered_extent(ordered); | ||
| 878 | |||
| 879 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, | ||
| 880 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, | ||
| 881 | GFP_NOFS); | ||
| 882 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
| 883 | start_pos, last_pos - 1, GFP_NOFS); | ||
| 884 | } | ||
| 885 | for (i = 0; i < num_pages; i++) { | ||
| 886 | clear_page_dirty_for_io(pages[i]); | ||
| 887 | set_page_extent_mapped(pages[i]); | ||
| 888 | WARN_ON(!PageLocked(pages[i])); | ||
| 889 | } | ||
| 890 | return 0; | ||
| 891 | } | ||
| 892 | |||
| 893 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | ||
| 894 | size_t count, loff_t *ppos) | ||
| 895 | { | ||
| 896 | loff_t pos; | ||
| 897 | loff_t start_pos; | ||
| 898 | ssize_t num_written = 0; | ||
| 899 | ssize_t err = 0; | ||
| 900 | int ret = 0; | ||
| 901 | struct inode *inode = fdentry(file)->d_inode; | ||
| 902 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 903 | struct page **pages = NULL; | ||
| 904 | int nrptrs; | ||
| 905 | struct page *pinned[2]; | ||
| 906 | unsigned long first_index; | ||
| 907 | unsigned long last_index; | ||
| 908 | int will_write; | ||
| 909 | |||
| 910 | will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || | ||
| 911 | (file->f_flags & O_DIRECT)); | ||
| 912 | |||
| 913 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
| 914 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
| 915 | pinned[0] = NULL; | ||
| 916 | pinned[1] = NULL; | ||
| 917 | |||
| 918 | pos = *ppos; | ||
| 919 | start_pos = pos; | ||
| 920 | |||
| 921 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
| 922 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
| 923 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
| 924 | if (err) | ||
| 925 | goto out_nolock; | ||
| 926 | if (count == 0) | ||
| 927 | goto out_nolock; | ||
| 928 | |||
| 929 | err = file_remove_suid(file); | ||
| 930 | if (err) | ||
| 931 | goto out_nolock; | ||
| 932 | file_update_time(file); | ||
| 933 | |||
| 934 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | ||
| 935 | |||
| 936 | mutex_lock(&inode->i_mutex); | ||
| 937 | first_index = pos >> PAGE_CACHE_SHIFT; | ||
| 938 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | ||
| 939 | |||
| 940 | /* | ||
| 941 | * if this is a nodatasum mount, force summing off for the inode | ||
| 942 | * all the time. That way a later mount with summing on won't | ||
| 943 | * get confused | ||
| 944 | */ | ||
| 945 | if (btrfs_test_opt(root, NODATASUM)) | ||
| 946 | btrfs_set_flag(inode, NODATASUM); | ||
| 947 | |||
| 948 | /* | ||
| 949 | * there are lots of better ways to do this, but this code | ||
| 950 | * makes sure the first and last page in the file range are | ||
| 951 | * up to date and ready for cow | ||
| 952 | */ | ||
| 953 | if ((pos & (PAGE_CACHE_SIZE - 1))) { | ||
| 954 | pinned[0] = grab_cache_page(inode->i_mapping, first_index); | ||
| 955 | if (!PageUptodate(pinned[0])) { | ||
| 956 | ret = btrfs_readpage(NULL, pinned[0]); | ||
| 957 | BUG_ON(ret); | ||
| 958 | wait_on_page_locked(pinned[0]); | ||
| 959 | } else { | ||
| 960 | unlock_page(pinned[0]); | ||
| 961 | } | ||
| 962 | } | ||
| 963 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | ||
| 964 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | ||
| 965 | if (!PageUptodate(pinned[1])) { | ||
| 966 | ret = btrfs_readpage(NULL, pinned[1]); | ||
| 967 | BUG_ON(ret); | ||
| 968 | wait_on_page_locked(pinned[1]); | ||
| 969 | } else { | ||
| 970 | unlock_page(pinned[1]); | ||
| 971 | } | ||
| 972 | } | ||
| 973 | |||
| 974 | while(count > 0) { | ||
| 975 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | ||
| 976 | size_t write_bytes = min(count, nrptrs * | ||
| 977 | (size_t)PAGE_CACHE_SIZE - | ||
| 978 | offset); | ||
| 979 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | ||
| 980 | PAGE_CACHE_SHIFT; | ||
| 981 | |||
| 982 | WARN_ON(num_pages > nrptrs); | ||
| 983 | memset(pages, 0, sizeof(pages)); | ||
| 984 | |||
| 985 | ret = btrfs_check_free_space(root, write_bytes, 0); | ||
| 986 | if (ret) | ||
| 987 | goto out; | ||
| 988 | |||
| 989 | ret = prepare_pages(root, file, pages, num_pages, | ||
| 990 | pos, first_index, last_index, | ||
| 991 | write_bytes); | ||
| 992 | if (ret) | ||
| 993 | goto out; | ||
| 994 | |||
| 995 | ret = btrfs_copy_from_user(pos, num_pages, | ||
| 996 | write_bytes, pages, buf); | ||
| 997 | if (ret) { | ||
| 998 | btrfs_drop_pages(pages, num_pages); | ||
| 999 | goto out; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | ret = dirty_and_release_pages(NULL, root, file, pages, | ||
| 1003 | num_pages, pos, write_bytes); | ||
| 1004 | btrfs_drop_pages(pages, num_pages); | ||
| 1005 | if (ret) | ||
| 1006 | goto out; | ||
| 1007 | |||
| 1008 | if (will_write) { | ||
| 1009 | btrfs_fdatawrite_range(inode->i_mapping, pos, | ||
| 1010 | pos + write_bytes - 1, | ||
| 1011 | WB_SYNC_NONE); | ||
| 1012 | } else { | ||
| 1013 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 1014 | num_pages); | ||
| 1015 | if (num_pages < | ||
| 1016 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
| 1017 | btrfs_btree_balance_dirty(root, 1); | ||
| 1018 | btrfs_throttle(root); | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | buf += write_bytes; | ||
| 1022 | count -= write_bytes; | ||
| 1023 | pos += write_bytes; | ||
| 1024 | num_written += write_bytes; | ||
| 1025 | |||
| 1026 | cond_resched(); | ||
| 1027 | } | ||
| 1028 | out: | ||
| 1029 | mutex_unlock(&inode->i_mutex); | ||
| 1030 | |||
| 1031 | out_nolock: | ||
| 1032 | kfree(pages); | ||
| 1033 | if (pinned[0]) | ||
| 1034 | page_cache_release(pinned[0]); | ||
| 1035 | if (pinned[1]) | ||
| 1036 | page_cache_release(pinned[1]); | ||
| 1037 | *ppos = pos; | ||
| 1038 | |||
| 1039 | if (num_written > 0 && will_write) { | ||
| 1040 | struct btrfs_trans_handle *trans; | ||
| 1041 | |||
| 1042 | err = btrfs_wait_ordered_range(inode, start_pos, num_written); | ||
| 1043 | if (err) | ||
| 1044 | num_written = err; | ||
| 1045 | |||
| 1046 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | ||
| 1047 | trans = btrfs_start_transaction(root, 1); | ||
| 1048 | ret = btrfs_log_dentry_safe(trans, root, | ||
| 1049 | file->f_dentry); | ||
| 1050 | if (ret == 0) { | ||
| 1051 | btrfs_sync_log(trans, root); | ||
| 1052 | btrfs_end_transaction(trans, root); | ||
| 1053 | } else { | ||
| 1054 | btrfs_commit_transaction(trans, root); | ||
| 1055 | } | ||
| 1056 | } | ||
| 1057 | if (file->f_flags & O_DIRECT) { | ||
| 1058 | invalidate_mapping_pages(inode->i_mapping, | ||
| 1059 | start_pos >> PAGE_CACHE_SHIFT, | ||
| 1060 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | ||
| 1061 | } | ||
| 1062 | } | ||
| 1063 | current->backing_dev_info = NULL; | ||
| 1064 | return num_written ? num_written : err; | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | int btrfs_release_file(struct inode * inode, struct file * filp) | ||
| 1068 | { | ||
| 1069 | if (filp->private_data) | ||
| 1070 | btrfs_ioctl_trans_end(filp); | ||
| 1071 | return 0; | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | /* | ||
| 1075 | * fsync call for both files and directories. This logs the inode into | ||
| 1076 | * the tree log instead of forcing full commits whenever possible. | ||
| 1077 | * | ||
| 1078 | * It needs to call filemap_fdatawait so that all ordered extent updates are | ||
| 1079 | * in the metadata btree are up to date for copying to the log. | ||
| 1080 | * | ||
| 1081 | * It drops the inode mutex before doing the tree log commit. This is an | ||
| 1082 | * important optimization for directories because holding the mutex prevents | ||
| 1083 | * new operations on the dir while we write to disk. | ||
| 1084 | */ | ||
| 1085 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | ||
| 1086 | { | ||
| 1087 | struct inode *inode = dentry->d_inode; | ||
| 1088 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1089 | int ret = 0; | ||
| 1090 | struct btrfs_trans_handle *trans; | ||
| 1091 | |||
| 1092 | /* | ||
| 1093 | * check the transaction that last modified this inode | ||
| 1094 | * and see if its already been committed | ||
| 1095 | */ | ||
| 1096 | if (!BTRFS_I(inode)->last_trans) | ||
| 1097 | goto out; | ||
| 1098 | |||
| 1099 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 1100 | if (BTRFS_I(inode)->last_trans <= | ||
| 1101 | root->fs_info->last_trans_committed) { | ||
| 1102 | BTRFS_I(inode)->last_trans = 0; | ||
| 1103 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1104 | goto out; | ||
| 1105 | } | ||
| 1106 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1107 | |||
| 1108 | root->fs_info->tree_log_batch++; | ||
| 1109 | filemap_fdatawait(inode->i_mapping); | ||
| 1110 | root->fs_info->tree_log_batch++; | ||
| 1111 | |||
| 1112 | /* | ||
| 1113 | * ok we haven't committed the transaction yet, lets do a commit | ||
| 1114 | */ | ||
| 1115 | if (file->private_data) | ||
| 1116 | btrfs_ioctl_trans_end(file); | ||
| 1117 | |||
| 1118 | trans = btrfs_start_transaction(root, 1); | ||
| 1119 | if (!trans) { | ||
| 1120 | ret = -ENOMEM; | ||
| 1121 | goto out; | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); | ||
| 1125 | if (ret < 0) { | ||
| 1126 | goto out; | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | /* we've logged all the items and now have a consistent | ||
| 1130 | * version of the file in the log. It is possible that | ||
| 1131 | * someone will come in and modify the file, but that's | ||
| 1132 | * fine because the log is consistent on disk, and we | ||
| 1133 | * have references to all of the file's extents | ||
| 1134 | * | ||
| 1135 | * It is possible that someone will come in and log the | ||
| 1136 | * file again, but that will end up using the synchronization | ||
| 1137 | * inside btrfs_sync_log to keep things safe. | ||
| 1138 | */ | ||
| 1139 | mutex_unlock(&file->f_dentry->d_inode->i_mutex); | ||
| 1140 | |||
| 1141 | if (ret > 0) { | ||
| 1142 | ret = btrfs_commit_transaction(trans, root); | ||
| 1143 | } else { | ||
| 1144 | btrfs_sync_log(trans, root); | ||
| 1145 | ret = btrfs_end_transaction(trans, root); | ||
| 1146 | } | ||
| 1147 | mutex_lock(&file->f_dentry->d_inode->i_mutex); | ||
| 1148 | out: | ||
| 1149 | return ret > 0 ? EIO : ret; | ||
| 1150 | } | ||
| 1151 | |||
| 1152 | static struct vm_operations_struct btrfs_file_vm_ops = { | ||
| 1153 | .fault = filemap_fault, | ||
| 1154 | .page_mkwrite = btrfs_page_mkwrite, | ||
| 1155 | }; | ||
| 1156 | |||
| 1157 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | ||
| 1158 | { | ||
| 1159 | vma->vm_ops = &btrfs_file_vm_ops; | ||
| 1160 | file_accessed(filp); | ||
| 1161 | return 0; | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | struct file_operations btrfs_file_operations = { | ||
| 1165 | .llseek = generic_file_llseek, | ||
| 1166 | .read = do_sync_read, | ||
| 1167 | .aio_read = generic_file_aio_read, | ||
| 1168 | .splice_read = generic_file_splice_read, | ||
| 1169 | .write = btrfs_file_write, | ||
| 1170 | .mmap = btrfs_file_mmap, | ||
| 1171 | .open = generic_file_open, | ||
| 1172 | .release = btrfs_release_file, | ||
| 1173 | .fsync = btrfs_sync_file, | ||
| 1174 | .unlocked_ioctl = btrfs_ioctl, | ||
| 1175 | #ifdef CONFIG_COMPAT | ||
| 1176 | .compat_ioctl = btrfs_ioctl, | ||
| 1177 | #endif | ||
| 1178 | }; | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c new file mode 100644 index 00000000000..96241f01fa0 --- /dev/null +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -0,0 +1,449 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Red Hat. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/sched.h> | ||
| 20 | #include "ctree.h" | ||
| 21 | |||
| 22 | static int tree_insert_offset(struct rb_root *root, u64 offset, | ||
| 23 | struct rb_node *node) | ||
| 24 | { | ||
| 25 | struct rb_node **p = &root->rb_node; | ||
| 26 | struct rb_node *parent = NULL; | ||
| 27 | struct btrfs_free_space *info; | ||
| 28 | |||
| 29 | while (*p) { | ||
| 30 | parent = *p; | ||
| 31 | info = rb_entry(parent, struct btrfs_free_space, offset_index); | ||
| 32 | |||
| 33 | if (offset < info->offset) | ||
| 34 | p = &(*p)->rb_left; | ||
| 35 | else if (offset > info->offset) | ||
| 36 | p = &(*p)->rb_right; | ||
| 37 | else | ||
| 38 | return -EEXIST; | ||
| 39 | } | ||
| 40 | |||
| 41 | rb_link_node(node, parent, p); | ||
| 42 | rb_insert_color(node, root); | ||
| 43 | |||
| 44 | return 0; | ||
| 45 | } | ||
| 46 | |||
| 47 | static int tree_insert_bytes(struct rb_root *root, u64 bytes, | ||
| 48 | struct rb_node *node) | ||
| 49 | { | ||
| 50 | struct rb_node **p = &root->rb_node; | ||
| 51 | struct rb_node *parent = NULL; | ||
| 52 | struct btrfs_free_space *info; | ||
| 53 | |||
| 54 | while (*p) { | ||
| 55 | parent = *p; | ||
| 56 | info = rb_entry(parent, struct btrfs_free_space, bytes_index); | ||
| 57 | |||
| 58 | if (bytes < info->bytes) | ||
| 59 | p = &(*p)->rb_left; | ||
| 60 | else | ||
| 61 | p = &(*p)->rb_right; | ||
| 62 | } | ||
| 63 | |||
| 64 | rb_link_node(node, parent, p); | ||
| 65 | rb_insert_color(node, root); | ||
| 66 | |||
| 67 | return 0; | ||
| 68 | } | ||
| 69 | |||
| 70 | /* | ||
| 71 | * searches the tree for the given offset. If contains is set we will return | ||
| 72 | * the free space that contains the given offset. If contains is not set we | ||
| 73 | * will return the free space that starts at or after the given offset and is | ||
| 74 | * at least bytes long. | ||
| 75 | */ | ||
| 76 | static struct btrfs_free_space *tree_search_offset(struct rb_root *root, | ||
| 77 | u64 offset, u64 bytes, | ||
| 78 | int contains) | ||
| 79 | { | ||
| 80 | struct rb_node *n = root->rb_node; | ||
| 81 | struct btrfs_free_space *entry, *ret = NULL; | ||
| 82 | |||
| 83 | while (n) { | ||
| 84 | entry = rb_entry(n, struct btrfs_free_space, offset_index); | ||
| 85 | |||
| 86 | if (offset < entry->offset) { | ||
| 87 | if (!contains && | ||
| 88 | (!ret || entry->offset < ret->offset) && | ||
| 89 | (bytes <= entry->bytes)) | ||
| 90 | ret = entry; | ||
| 91 | n = n->rb_left; | ||
| 92 | } else if (offset > entry->offset) { | ||
| 93 | if ((entry->offset + entry->bytes - 1) >= offset && | ||
| 94 | bytes <= entry->bytes) { | ||
| 95 | ret = entry; | ||
| 96 | break; | ||
| 97 | } | ||
| 98 | n = n->rb_right; | ||
| 99 | } else { | ||
| 100 | if (bytes > entry->bytes) { | ||
| 101 | n = n->rb_right; | ||
| 102 | continue; | ||
| 103 | } | ||
| 104 | ret = entry; | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | return ret; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* | ||
| 113 | * return a chunk at least bytes size, as close to offset that we can get. | ||
| 114 | */ | ||
| 115 | static struct btrfs_free_space *tree_search_bytes(struct rb_root *root, | ||
| 116 | u64 offset, u64 bytes) | ||
| 117 | { | ||
| 118 | struct rb_node *n = root->rb_node; | ||
| 119 | struct btrfs_free_space *entry, *ret = NULL; | ||
| 120 | |||
| 121 | while (n) { | ||
| 122 | entry = rb_entry(n, struct btrfs_free_space, bytes_index); | ||
| 123 | |||
| 124 | if (bytes < entry->bytes) { | ||
| 125 | /* | ||
| 126 | * We prefer to get a hole size as close to the size we | ||
| 127 | * are asking for so we don't take small slivers out of | ||
| 128 | * huge holes, but we also want to get as close to the | ||
| 129 | * offset as possible so we don't have a whole lot of | ||
| 130 | * fragmentation. | ||
| 131 | */ | ||
| 132 | if (offset <= entry->offset) { | ||
| 133 | if (!ret) | ||
| 134 | ret = entry; | ||
| 135 | else if (entry->bytes < ret->bytes) | ||
| 136 | ret = entry; | ||
| 137 | else if (entry->offset < ret->offset) | ||
| 138 | ret = entry; | ||
| 139 | } | ||
| 140 | n = n->rb_left; | ||
| 141 | } else if (bytes > entry->bytes) { | ||
| 142 | n = n->rb_right; | ||
| 143 | } else { | ||
| 144 | /* | ||
| 145 | * Ok we may have multiple chunks of the wanted size, | ||
| 146 | * so we don't want to take the first one we find, we | ||
| 147 | * want to take the one closest to our given offset, so | ||
| 148 | * keep searching just in case theres a better match. | ||
| 149 | */ | ||
| 150 | n = n->rb_right; | ||
| 151 | if (offset > entry->offset) | ||
| 152 | continue; | ||
| 153 | else if (!ret || entry->offset < ret->offset) | ||
| 154 | ret = entry; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | return ret; | ||
| 159 | } | ||
| 160 | |||
| 161 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, | ||
| 162 | struct btrfs_free_space *info) | ||
| 163 | { | ||
| 164 | rb_erase(&info->offset_index, &block_group->free_space_offset); | ||
| 165 | rb_erase(&info->bytes_index, &block_group->free_space_bytes); | ||
| 166 | } | ||
| 167 | |||
| 168 | static int link_free_space(struct btrfs_block_group_cache *block_group, | ||
| 169 | struct btrfs_free_space *info) | ||
| 170 | { | ||
| 171 | int ret = 0; | ||
| 172 | |||
| 173 | |||
| 174 | ret = tree_insert_offset(&block_group->free_space_offset, info->offset, | ||
| 175 | &info->offset_index); | ||
| 176 | if (ret) | ||
| 177 | return ret; | ||
| 178 | |||
| 179 | ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes, | ||
| 180 | &info->bytes_index); | ||
| 181 | if (ret) | ||
| 182 | return ret; | ||
| 183 | |||
| 184 | return ret; | ||
| 185 | } | ||
| 186 | |||
| 187 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
| 188 | u64 offset, u64 bytes) | ||
| 189 | { | ||
| 190 | struct btrfs_free_space *right_info; | ||
| 191 | struct btrfs_free_space *left_info; | ||
| 192 | struct btrfs_free_space *info = NULL; | ||
| 193 | struct btrfs_free_space *alloc_info; | ||
| 194 | int ret = 0; | ||
| 195 | |||
| 196 | alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | ||
| 197 | if (!alloc_info) | ||
| 198 | return -ENOMEM; | ||
| 199 | |||
| 200 | /* | ||
| 201 | * first we want to see if there is free space adjacent to the range we | ||
| 202 | * are adding, if there is remove that struct and add a new one to | ||
| 203 | * cover the entire range | ||
| 204 | */ | ||
| 205 | spin_lock(&block_group->lock); | ||
| 206 | |||
| 207 | right_info = tree_search_offset(&block_group->free_space_offset, | ||
| 208 | offset+bytes, 0, 1); | ||
| 209 | left_info = tree_search_offset(&block_group->free_space_offset, | ||
| 210 | offset-1, 0, 1); | ||
| 211 | |||
| 212 | if (right_info && right_info->offset == offset+bytes) { | ||
| 213 | unlink_free_space(block_group, right_info); | ||
| 214 | info = right_info; | ||
| 215 | info->offset = offset; | ||
| 216 | info->bytes += bytes; | ||
| 217 | } else if (right_info && right_info->offset != offset+bytes) { | ||
| 218 | printk(KERN_ERR "adding space in the middle of an existing " | ||
| 219 | "free space area. existing: offset=%Lu, bytes=%Lu. " | ||
| 220 | "new: offset=%Lu, bytes=%Lu\n", right_info->offset, | ||
| 221 | right_info->bytes, offset, bytes); | ||
| 222 | BUG(); | ||
| 223 | } | ||
| 224 | |||
| 225 | if (left_info) { | ||
| 226 | unlink_free_space(block_group, left_info); | ||
| 227 | |||
| 228 | if (unlikely((left_info->offset + left_info->bytes) != | ||
| 229 | offset)) { | ||
| 230 | printk(KERN_ERR "free space to the left of new free " | ||
| 231 | "space isn't quite right. existing: offset=%Lu," | ||
| 232 | " bytes=%Lu. new: offset=%Lu, bytes=%Lu\n", | ||
| 233 | left_info->offset, left_info->bytes, offset, | ||
| 234 | bytes); | ||
| 235 | BUG(); | ||
| 236 | } | ||
| 237 | |||
| 238 | if (info) { | ||
| 239 | info->offset = left_info->offset; | ||
| 240 | info->bytes += left_info->bytes; | ||
| 241 | kfree(left_info); | ||
| 242 | } else { | ||
| 243 | info = left_info; | ||
| 244 | info->bytes += bytes; | ||
| 245 | } | ||
| 246 | } | ||
| 247 | |||
| 248 | if (info) { | ||
| 249 | ret = link_free_space(block_group, info); | ||
| 250 | if (!ret) | ||
| 251 | info = NULL; | ||
| 252 | goto out; | ||
| 253 | } | ||
| 254 | |||
| 255 | info = alloc_info; | ||
| 256 | alloc_info = NULL; | ||
| 257 | info->offset = offset; | ||
| 258 | info->bytes = bytes; | ||
| 259 | |||
| 260 | ret = link_free_space(block_group, info); | ||
| 261 | if (ret) | ||
| 262 | kfree(info); | ||
| 263 | out: | ||
| 264 | spin_unlock(&block_group->lock); | ||
| 265 | if (ret) { | ||
| 266 | printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); | ||
| 267 | if (ret == -EEXIST) | ||
| 268 | BUG(); | ||
| 269 | } | ||
| 270 | |||
| 271 | if (alloc_info) | ||
| 272 | kfree(alloc_info); | ||
| 273 | |||
| 274 | return ret; | ||
| 275 | } | ||
| 276 | |||
| 277 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
| 278 | u64 offset, u64 bytes) | ||
| 279 | { | ||
| 280 | struct btrfs_free_space *info; | ||
| 281 | int ret = 0; | ||
| 282 | |||
| 283 | spin_lock(&block_group->lock); | ||
| 284 | info = tree_search_offset(&block_group->free_space_offset, offset, 0, | ||
| 285 | 1); | ||
| 286 | |||
| 287 | if (info && info->offset == offset) { | ||
| 288 | if (info->bytes < bytes) { | ||
| 289 | printk(KERN_ERR "Found free space at %Lu, size %Lu," | ||
| 290 | "trying to use %Lu\n", | ||
| 291 | info->offset, info->bytes, bytes); | ||
| 292 | WARN_ON(1); | ||
| 293 | ret = -EINVAL; | ||
| 294 | goto out; | ||
| 295 | } | ||
| 296 | |||
| 297 | unlink_free_space(block_group, info); | ||
| 298 | |||
| 299 | if (info->bytes == bytes) { | ||
| 300 | kfree(info); | ||
| 301 | goto out; | ||
| 302 | } | ||
| 303 | |||
| 304 | info->offset += bytes; | ||
| 305 | info->bytes -= bytes; | ||
| 306 | |||
| 307 | ret = link_free_space(block_group, info); | ||
| 308 | BUG_ON(ret); | ||
| 309 | } else if (info && info->offset < offset && | ||
| 310 | info->offset + info->bytes >= offset + bytes) { | ||
| 311 | u64 old_start = info->offset; | ||
| 312 | /* | ||
| 313 | * we're freeing space in the middle of the info, | ||
| 314 | * this can happen during tree log replay | ||
| 315 | * | ||
| 316 | * first unlink the old info and then | ||
| 317 | * insert it again after the hole we're creating | ||
| 318 | */ | ||
| 319 | unlink_free_space(block_group, info); | ||
| 320 | if (offset + bytes < info->offset + info->bytes) { | ||
| 321 | u64 old_end = info->offset + info->bytes; | ||
| 322 | |||
| 323 | info->offset = offset + bytes; | ||
| 324 | info->bytes = old_end - info->offset; | ||
| 325 | ret = link_free_space(block_group, info); | ||
| 326 | BUG_ON(ret); | ||
| 327 | } else { | ||
| 328 | /* the hole we're creating ends at the end | ||
| 329 | * of the info struct, just free the info | ||
| 330 | */ | ||
| 331 | kfree(info); | ||
| 332 | } | ||
| 333 | |||
| 334 | /* step two, insert a new info struct to cover anything | ||
| 335 | * before the hole | ||
| 336 | */ | ||
| 337 | spin_unlock(&block_group->lock); | ||
| 338 | ret = btrfs_add_free_space(block_group, old_start, | ||
| 339 | offset - old_start); | ||
| 340 | BUG_ON(ret); | ||
| 341 | goto out_nolock; | ||
| 342 | } else { | ||
| 343 | WARN_ON(1); | ||
| 344 | } | ||
| 345 | out: | ||
| 346 | spin_unlock(&block_group->lock); | ||
| 347 | out_nolock: | ||
| 348 | return ret; | ||
| 349 | } | ||
| 350 | |||
| 351 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
| 352 | u64 bytes) | ||
| 353 | { | ||
| 354 | struct btrfs_free_space *info; | ||
| 355 | struct rb_node *n; | ||
| 356 | int count = 0; | ||
| 357 | |||
| 358 | for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { | ||
| 359 | info = rb_entry(n, struct btrfs_free_space, offset_index); | ||
| 360 | if (info->bytes >= bytes) | ||
| 361 | count++; | ||
| 362 | //printk(KERN_INFO "offset=%Lu, bytes=%Lu\n", info->offset, | ||
| 363 | // info->bytes); | ||
| 364 | } | ||
| 365 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" | ||
| 366 | "\n", count); | ||
| 367 | } | ||
| 368 | |||
| 369 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) | ||
| 370 | { | ||
| 371 | struct btrfs_free_space *info; | ||
| 372 | struct rb_node *n; | ||
| 373 | u64 ret = 0; | ||
| 374 | |||
| 375 | for (n = rb_first(&block_group->free_space_offset); n; | ||
| 376 | n = rb_next(n)) { | ||
| 377 | info = rb_entry(n, struct btrfs_free_space, offset_index); | ||
| 378 | ret += info->bytes; | ||
| 379 | } | ||
| 380 | |||
| 381 | return ret; | ||
| 382 | } | ||
| 383 | |||
| 384 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | ||
| 385 | { | ||
| 386 | struct btrfs_free_space *info; | ||
| 387 | struct rb_node *node; | ||
| 388 | |||
| 389 | spin_lock(&block_group->lock); | ||
| 390 | while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { | ||
| 391 | info = rb_entry(node, struct btrfs_free_space, bytes_index); | ||
| 392 | unlink_free_space(block_group, info); | ||
| 393 | kfree(info); | ||
| 394 | if (need_resched()) { | ||
| 395 | spin_unlock(&block_group->lock); | ||
| 396 | cond_resched(); | ||
| 397 | spin_lock(&block_group->lock); | ||
| 398 | } | ||
| 399 | } | ||
| 400 | spin_unlock(&block_group->lock); | ||
| 401 | } | ||
| 402 | |||
| 403 | struct btrfs_free_space *btrfs_find_free_space_offset(struct | ||
| 404 | btrfs_block_group_cache | ||
| 405 | *block_group, u64 offset, | ||
| 406 | u64 bytes) | ||
| 407 | { | ||
| 408 | struct btrfs_free_space *ret; | ||
| 409 | |||
| 410 | spin_lock(&block_group->lock); | ||
| 411 | ret = tree_search_offset(&block_group->free_space_offset, offset, | ||
| 412 | bytes, 0); | ||
| 413 | spin_unlock(&block_group->lock); | ||
| 414 | |||
| 415 | return ret; | ||
| 416 | } | ||
| 417 | |||
| 418 | struct btrfs_free_space *btrfs_find_free_space_bytes(struct | ||
| 419 | btrfs_block_group_cache | ||
| 420 | *block_group, u64 offset, | ||
| 421 | u64 bytes) | ||
| 422 | { | ||
| 423 | struct btrfs_free_space *ret; | ||
| 424 | |||
| 425 | spin_lock(&block_group->lock); | ||
| 426 | |||
| 427 | ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); | ||
| 428 | spin_unlock(&block_group->lock); | ||
| 429 | |||
| 430 | return ret; | ||
| 431 | } | ||
| 432 | |||
| 433 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | ||
| 434 | *block_group, u64 offset, | ||
| 435 | u64 bytes) | ||
| 436 | { | ||
| 437 | struct btrfs_free_space *ret; | ||
| 438 | |||
| 439 | spin_lock(&block_group->lock); | ||
| 440 | ret = tree_search_offset(&block_group->free_space_offset, offset, | ||
| 441 | bytes, 0); | ||
| 442 | if (!ret) | ||
| 443 | ret = tree_search_bytes(&block_group->free_space_bytes, | ||
| 444 | offset, bytes); | ||
| 445 | |||
| 446 | spin_unlock(&block_group->lock); | ||
| 447 | |||
| 448 | return ret; | ||
| 449 | } | ||
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h new file mode 100644 index 00000000000..2a020b27676 --- /dev/null +++ b/fs/btrfs/hash.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __HASH__ | ||
| 20 | #define __HASH__ | ||
| 21 | |||
| 22 | #include "crc32c.h" | ||
| 23 | static inline u64 btrfs_name_hash(const char *name, int len) | ||
| 24 | { | ||
| 25 | return btrfs_crc32c((u32)~1, name, len); | ||
| 26 | } | ||
| 27 | #endif | ||
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c new file mode 100644 index 00000000000..d93451c66ba --- /dev/null +++ b/fs/btrfs/inode-item.c | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "ctree.h" | ||
| 20 | #include "disk-io.h" | ||
| 21 | #include "transaction.h" | ||
| 22 | |||
| 23 | int find_name_in_backref(struct btrfs_path *path, const char * name, | ||
| 24 | int name_len, struct btrfs_inode_ref **ref_ret) | ||
| 25 | { | ||
| 26 | struct extent_buffer *leaf; | ||
| 27 | struct btrfs_inode_ref *ref; | ||
| 28 | unsigned long ptr; | ||
| 29 | unsigned long name_ptr; | ||
| 30 | u32 item_size; | ||
| 31 | u32 cur_offset = 0; | ||
| 32 | int len; | ||
| 33 | |||
| 34 | leaf = path->nodes[0]; | ||
| 35 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 36 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 37 | while (cur_offset < item_size) { | ||
| 38 | ref = (struct btrfs_inode_ref *)(ptr + cur_offset); | ||
| 39 | len = btrfs_inode_ref_name_len(leaf, ref); | ||
| 40 | name_ptr = (unsigned long)(ref + 1); | ||
| 41 | cur_offset += len + sizeof(*ref); | ||
| 42 | if (len != name_len) | ||
| 43 | continue; | ||
| 44 | if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) { | ||
| 45 | *ref_ret = ref; | ||
| 46 | return 1; | ||
| 47 | } | ||
| 48 | } | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | ||
| 53 | struct btrfs_root *root, | ||
| 54 | const char *name, int name_len, | ||
| 55 | u64 inode_objectid, u64 ref_objectid, u64 *index) | ||
| 56 | { | ||
| 57 | struct btrfs_path *path; | ||
| 58 | struct btrfs_key key; | ||
| 59 | struct btrfs_inode_ref *ref; | ||
| 60 | struct extent_buffer *leaf; | ||
| 61 | unsigned long ptr; | ||
| 62 | unsigned long item_start; | ||
| 63 | u32 item_size; | ||
| 64 | u32 sub_item_len; | ||
| 65 | int ret; | ||
| 66 | int del_len = name_len + sizeof(*ref); | ||
| 67 | |||
| 68 | key.objectid = inode_objectid; | ||
| 69 | key.offset = ref_objectid; | ||
| 70 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | ||
| 71 | |||
| 72 | path = btrfs_alloc_path(); | ||
| 73 | if (!path) | ||
| 74 | return -ENOMEM; | ||
| 75 | |||
| 76 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 77 | if (ret > 0) { | ||
| 78 | ret = -ENOENT; | ||
| 79 | goto out; | ||
| 80 | } else if (ret < 0) { | ||
| 81 | goto out; | ||
| 82 | } | ||
| 83 | if (!find_name_in_backref(path, name, name_len, &ref)) { | ||
| 84 | ret = -ENOENT; | ||
| 85 | goto out; | ||
| 86 | } | ||
| 87 | leaf = path->nodes[0]; | ||
| 88 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 89 | |||
| 90 | if (index) | ||
| 91 | *index = btrfs_inode_ref_index(leaf, ref); | ||
| 92 | |||
| 93 | if (del_len == item_size) { | ||
| 94 | ret = btrfs_del_item(trans, root, path); | ||
| 95 | goto out; | ||
| 96 | } | ||
| 97 | ptr = (unsigned long)ref; | ||
| 98 | sub_item_len = name_len + sizeof(*ref); | ||
| 99 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 100 | memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, | ||
| 101 | item_size - (ptr + sub_item_len - item_start)); | ||
| 102 | ret = btrfs_truncate_item(trans, root, path, | ||
| 103 | item_size - sub_item_len, 1); | ||
| 104 | BUG_ON(ret); | ||
| 105 | out: | ||
| 106 | btrfs_free_path(path); | ||
| 107 | return ret; | ||
| 108 | } | ||
| 109 | |||
| 110 | int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | ||
| 111 | struct btrfs_root *root, | ||
| 112 | const char *name, int name_len, | ||
| 113 | u64 inode_objectid, u64 ref_objectid, u64 index) | ||
| 114 | { | ||
| 115 | struct btrfs_path *path; | ||
| 116 | struct btrfs_key key; | ||
| 117 | struct btrfs_inode_ref *ref; | ||
| 118 | unsigned long ptr; | ||
| 119 | int ret; | ||
| 120 | int ins_len = name_len + sizeof(*ref); | ||
| 121 | |||
| 122 | key.objectid = inode_objectid; | ||
| 123 | key.offset = ref_objectid; | ||
| 124 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | ||
| 125 | |||
| 126 | path = btrfs_alloc_path(); | ||
| 127 | if (!path) | ||
| 128 | return -ENOMEM; | ||
| 129 | |||
| 130 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 131 | ins_len); | ||
| 132 | if (ret == -EEXIST) { | ||
| 133 | u32 old_size; | ||
| 134 | |||
| 135 | if (find_name_in_backref(path, name, name_len, &ref)) | ||
| 136 | goto out; | ||
| 137 | |||
| 138 | old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
| 139 | ret = btrfs_extend_item(trans, root, path, ins_len); | ||
| 140 | BUG_ON(ret); | ||
| 141 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 142 | struct btrfs_inode_ref); | ||
| 143 | ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); | ||
| 144 | btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); | ||
| 145 | btrfs_set_inode_ref_index(path->nodes[0], ref, index); | ||
| 146 | ptr = (unsigned long)(ref + 1); | ||
| 147 | ret = 0; | ||
| 148 | } else if (ret < 0) { | ||
| 149 | goto out; | ||
| 150 | } else { | ||
| 151 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 152 | struct btrfs_inode_ref); | ||
| 153 | btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); | ||
| 154 | btrfs_set_inode_ref_index(path->nodes[0], ref, index); | ||
| 155 | ptr = (unsigned long)(ref + 1); | ||
| 156 | } | ||
| 157 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||
| 158 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 159 | |||
| 160 | out: | ||
| 161 | btrfs_free_path(path); | ||
| 162 | return ret; | ||
| 163 | } | ||
| 164 | |||
| 165 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | ||
| 166 | struct btrfs_root *root, | ||
| 167 | struct btrfs_path *path, u64 objectid) | ||
| 168 | { | ||
| 169 | struct btrfs_key key; | ||
| 170 | int ret; | ||
| 171 | key.objectid = objectid; | ||
| 172 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 173 | key.offset = 0; | ||
| 174 | |||
| 175 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 176 | sizeof(struct btrfs_inode_item)); | ||
| 177 | if (ret == 0 && objectid > root->highest_inode) | ||
| 178 | root->highest_inode = objectid; | ||
| 179 | return ret; | ||
| 180 | } | ||
| 181 | |||
| 182 | int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 183 | *root, struct btrfs_path *path, | ||
| 184 | struct btrfs_key *location, int mod) | ||
| 185 | { | ||
| 186 | int ins_len = mod < 0 ? -1 : 0; | ||
| 187 | int cow = mod != 0; | ||
| 188 | int ret; | ||
| 189 | int slot; | ||
| 190 | struct extent_buffer *leaf; | ||
| 191 | struct btrfs_key found_key; | ||
| 192 | |||
| 193 | ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); | ||
| 194 | if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && | ||
| 195 | location->offset == (u64)-1 && path->slots[0] != 0) { | ||
| 196 | slot = path->slots[0] - 1; | ||
| 197 | leaf = path->nodes[0]; | ||
| 198 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 199 | if (found_key.objectid == location->objectid && | ||
| 200 | btrfs_key_type(&found_key) == btrfs_key_type(location)) { | ||
| 201 | path->slots[0]--; | ||
| 202 | return 0; | ||
| 203 | } | ||
| 204 | } | ||
| 205 | return ret; | ||
| 206 | } | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c new file mode 100644 index 00000000000..80038c5ef7c --- /dev/null +++ b/fs/btrfs/inode-map.c | |||
| @@ -0,0 +1,145 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "ctree.h" | ||
| 20 | #include "disk-io.h" | ||
| 21 | #include "transaction.h" | ||
| 22 | |||
| 23 | int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | ||
| 24 | { | ||
| 25 | struct btrfs_path *path; | ||
| 26 | int ret; | ||
| 27 | struct extent_buffer *l; | ||
| 28 | struct btrfs_key search_key; | ||
| 29 | struct btrfs_key found_key; | ||
| 30 | int slot; | ||
| 31 | |||
| 32 | path = btrfs_alloc_path(); | ||
| 33 | BUG_ON(!path); | ||
| 34 | |||
| 35 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; | ||
| 36 | search_key.type = -1; | ||
| 37 | search_key.offset = (u64)-1; | ||
| 38 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | ||
| 39 | if (ret < 0) | ||
| 40 | goto error; | ||
| 41 | BUG_ON(ret == 0); | ||
| 42 | if (path->slots[0] > 0) { | ||
| 43 | slot = path->slots[0] - 1; | ||
| 44 | l = path->nodes[0]; | ||
| 45 | btrfs_item_key_to_cpu(l, &found_key, slot); | ||
| 46 | *objectid = found_key.objectid; | ||
| 47 | } else { | ||
| 48 | *objectid = BTRFS_FIRST_FREE_OBJECTID; | ||
| 49 | } | ||
| 50 | ret = 0; | ||
| 51 | error: | ||
| 52 | btrfs_free_path(path); | ||
| 53 | return ret; | ||
| 54 | } | ||
| 55 | |||
| 56 | /* | ||
| 57 | * walks the btree of allocated inodes and find a hole. | ||
| 58 | */ | ||
| 59 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | ||
| 60 | struct btrfs_root *root, | ||
| 61 | u64 dirid, u64 *objectid) | ||
| 62 | { | ||
| 63 | struct btrfs_path *path; | ||
| 64 | struct btrfs_key key; | ||
| 65 | int ret; | ||
| 66 | int slot = 0; | ||
| 67 | u64 last_ino = 0; | ||
| 68 | int start_found; | ||
| 69 | struct extent_buffer *l; | ||
| 70 | struct btrfs_key search_key; | ||
| 71 | u64 search_start = dirid; | ||
| 72 | |||
| 73 | mutex_lock(&root->objectid_mutex); | ||
| 74 | if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID && | ||
| 75 | root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) { | ||
| 76 | *objectid = ++root->last_inode_alloc; | ||
| 77 | mutex_unlock(&root->objectid_mutex); | ||
| 78 | return 0; | ||
| 79 | } | ||
| 80 | path = btrfs_alloc_path(); | ||
| 81 | BUG_ON(!path); | ||
| 82 | search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); | ||
| 83 | search_key.objectid = search_start; | ||
| 84 | search_key.type = 0; | ||
| 85 | search_key.offset = 0; | ||
| 86 | |||
| 87 | btrfs_init_path(path); | ||
| 88 | start_found = 0; | ||
| 89 | ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); | ||
| 90 | if (ret < 0) | ||
| 91 | goto error; | ||
| 92 | |||
| 93 | while (1) { | ||
| 94 | l = path->nodes[0]; | ||
| 95 | slot = path->slots[0]; | ||
| 96 | if (slot >= btrfs_header_nritems(l)) { | ||
| 97 | ret = btrfs_next_leaf(root, path); | ||
| 98 | if (ret == 0) | ||
| 99 | continue; | ||
| 100 | if (ret < 0) | ||
| 101 | goto error; | ||
| 102 | if (!start_found) { | ||
| 103 | *objectid = search_start; | ||
| 104 | start_found = 1; | ||
| 105 | goto found; | ||
| 106 | } | ||
| 107 | *objectid = last_ino > search_start ? | ||
| 108 | last_ino : search_start; | ||
| 109 | goto found; | ||
| 110 | } | ||
| 111 | btrfs_item_key_to_cpu(l, &key, slot); | ||
| 112 | if (key.objectid >= search_start) { | ||
| 113 | if (start_found) { | ||
| 114 | if (last_ino < search_start) | ||
| 115 | last_ino = search_start; | ||
| 116 | if (key.objectid > last_ino) { | ||
| 117 | *objectid = last_ino; | ||
| 118 | goto found; | ||
| 119 | } | ||
| 120 | } else if (key.objectid > search_start) { | ||
| 121 | *objectid = search_start; | ||
| 122 | goto found; | ||
| 123 | } | ||
| 124 | } | ||
| 125 | if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) | ||
| 126 | break; | ||
| 127 | |||
| 128 | start_found = 1; | ||
| 129 | last_ino = key.objectid + 1; | ||
| 130 | path->slots[0]++; | ||
| 131 | } | ||
| 132 | // FIXME -ENOSPC | ||
| 133 | BUG_ON(1); | ||
| 134 | found: | ||
| 135 | btrfs_release_path(root, path); | ||
| 136 | btrfs_free_path(path); | ||
| 137 | BUG_ON(*objectid < search_start); | ||
| 138 | mutex_unlock(&root->objectid_mutex); | ||
| 139 | return 0; | ||
| 140 | error: | ||
| 141 | btrfs_release_path(root, path); | ||
| 142 | btrfs_free_path(path); | ||
| 143 | mutex_unlock(&root->objectid_mutex); | ||
| 144 | return ret; | ||
| 145 | } | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c new file mode 100644 index 00000000000..bf4bed6ca4d --- /dev/null +++ b/fs/btrfs/inode.c | |||
| @@ -0,0 +1,3908 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/kernel.h> | ||
| 20 | #include <linux/bio.h> | ||
| 21 | #include <linux/buffer_head.h> | ||
| 22 | #include <linux/file.h> | ||
| 23 | #include <linux/fs.h> | ||
| 24 | #include <linux/pagemap.h> | ||
| 25 | #include <linux/highmem.h> | ||
| 26 | #include <linux/time.h> | ||
| 27 | #include <linux/init.h> | ||
| 28 | #include <linux/string.h> | ||
| 29 | #include <linux/smp_lock.h> | ||
| 30 | #include <linux/backing-dev.h> | ||
| 31 | #include <linux/mpage.h> | ||
| 32 | #include <linux/swap.h> | ||
| 33 | #include <linux/writeback.h> | ||
| 34 | #include <linux/statfs.h> | ||
| 35 | #include <linux/compat.h> | ||
| 36 | #include <linux/bit_spinlock.h> | ||
| 37 | #include <linux/version.h> | ||
| 38 | #include <linux/xattr.h> | ||
| 39 | #include <linux/posix_acl.h> | ||
| 40 | #include "ctree.h" | ||
| 41 | #include "disk-io.h" | ||
| 42 | #include "transaction.h" | ||
| 43 | #include "btrfs_inode.h" | ||
| 44 | #include "ioctl.h" | ||
| 45 | #include "print-tree.h" | ||
| 46 | #include "volumes.h" | ||
| 47 | #include "ordered-data.h" | ||
| 48 | #include "xattr.h" | ||
| 49 | #include "compat.h" | ||
| 50 | #include "tree-log.h" | ||
| 51 | #include "ref-cache.h" | ||
| 52 | |||
| 53 | struct btrfs_iget_args { | ||
| 54 | u64 ino; | ||
| 55 | struct btrfs_root *root; | ||
| 56 | }; | ||
| 57 | |||
| 58 | static struct inode_operations btrfs_dir_inode_operations; | ||
| 59 | static struct inode_operations btrfs_symlink_inode_operations; | ||
| 60 | static struct inode_operations btrfs_dir_ro_inode_operations; | ||
| 61 | static struct inode_operations btrfs_special_inode_operations; | ||
| 62 | static struct inode_operations btrfs_file_inode_operations; | ||
| 63 | static struct address_space_operations btrfs_aops; | ||
| 64 | static struct address_space_operations btrfs_symlink_aops; | ||
| 65 | static struct file_operations btrfs_dir_file_operations; | ||
| 66 | static struct extent_io_ops btrfs_extent_io_ops; | ||
| 67 | |||
| 68 | static struct kmem_cache *btrfs_inode_cachep; | ||
| 69 | struct kmem_cache *btrfs_trans_handle_cachep; | ||
| 70 | struct kmem_cache *btrfs_transaction_cachep; | ||
| 71 | struct kmem_cache *btrfs_bit_radix_cachep; | ||
| 72 | struct kmem_cache *btrfs_path_cachep; | ||
| 73 | |||
| 74 | #define S_SHIFT 12 | ||
| 75 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | ||
| 76 | [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, | ||
| 77 | [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, | ||
| 78 | [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, | ||
| 79 | [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, | ||
| 80 | [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, | ||
| 81 | [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, | ||
| 82 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, | ||
| 83 | }; | ||
| 84 | |||
| 85 | static void btrfs_truncate(struct inode *inode); | ||
| 86 | |||
| 87 | /* | ||
| 88 | * a very lame attempt at stopping writes when the FS is 85% full. There | ||
| 89 | * are countless ways this is incorrect, but it is better than nothing. | ||
| 90 | */ | ||
| 91 | int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, | ||
| 92 | int for_del) | ||
| 93 | { | ||
| 94 | u64 total; | ||
| 95 | u64 used; | ||
| 96 | u64 thresh; | ||
| 97 | unsigned long flags; | ||
| 98 | int ret = 0; | ||
| 99 | |||
| 100 | spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); | ||
| 101 | total = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
| 102 | used = btrfs_super_bytes_used(&root->fs_info->super_copy); | ||
| 103 | if (for_del) | ||
| 104 | thresh = total * 90; | ||
| 105 | else | ||
| 106 | thresh = total * 85; | ||
| 107 | |||
| 108 | do_div(thresh, 100); | ||
| 109 | |||
| 110 | if (used + root->fs_info->delalloc_bytes + num_required > thresh) | ||
| 111 | ret = -ENOSPC; | ||
| 112 | spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); | ||
| 113 | return ret; | ||
| 114 | } | ||
| 115 | |||
| 116 | /* | ||
| 117 | * when extent_io.c finds a delayed allocation range in the file, | ||
| 118 | * the call backs end up in this code. The basic idea is to | ||
| 119 | * allocate extents on disk for the range, and create ordered data structs | ||
| 120 | * in ram to track those extents. | ||
| 121 | */ | ||
| 122 | static int cow_file_range(struct inode *inode, u64 start, u64 end) | ||
| 123 | { | ||
| 124 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 125 | struct btrfs_trans_handle *trans; | ||
| 126 | u64 alloc_hint = 0; | ||
| 127 | u64 num_bytes; | ||
| 128 | u64 cur_alloc_size; | ||
| 129 | u64 blocksize = root->sectorsize; | ||
| 130 | u64 orig_num_bytes; | ||
| 131 | struct btrfs_key ins; | ||
| 132 | struct extent_map *em; | ||
| 133 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 134 | int ret = 0; | ||
| 135 | |||
| 136 | trans = btrfs_join_transaction(root, 1); | ||
| 137 | BUG_ON(!trans); | ||
| 138 | btrfs_set_trans_block_group(trans, inode); | ||
| 139 | |||
| 140 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | ||
| 141 | num_bytes = max(blocksize, num_bytes); | ||
| 142 | orig_num_bytes = num_bytes; | ||
| 143 | |||
| 144 | if (alloc_hint == EXTENT_MAP_INLINE) | ||
| 145 | goto out; | ||
| 146 | |||
| 147 | BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); | ||
| 148 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 149 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | ||
| 150 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 151 | |||
| 152 | while(num_bytes > 0) { | ||
| 153 | cur_alloc_size = min(num_bytes, root->fs_info->max_extent); | ||
| 154 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | ||
| 155 | root->sectorsize, 0, alloc_hint, | ||
| 156 | (u64)-1, &ins, 1); | ||
| 157 | if (ret) { | ||
| 158 | WARN_ON(1); | ||
| 159 | goto out; | ||
| 160 | } | ||
| 161 | em = alloc_extent_map(GFP_NOFS); | ||
| 162 | em->start = start; | ||
| 163 | em->len = ins.offset; | ||
| 164 | em->block_start = ins.objectid; | ||
| 165 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 166 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 167 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 168 | while(1) { | ||
| 169 | spin_lock(&em_tree->lock); | ||
| 170 | ret = add_extent_mapping(em_tree, em); | ||
| 171 | spin_unlock(&em_tree->lock); | ||
| 172 | if (ret != -EEXIST) { | ||
| 173 | free_extent_map(em); | ||
| 174 | break; | ||
| 175 | } | ||
| 176 | btrfs_drop_extent_cache(inode, start, | ||
| 177 | start + ins.offset - 1, 0); | ||
| 178 | } | ||
| 179 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 180 | |||
| 181 | cur_alloc_size = ins.offset; | ||
| 182 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, | ||
| 183 | ins.offset, 0); | ||
| 184 | BUG_ON(ret); | ||
| 185 | if (num_bytes < cur_alloc_size) { | ||
| 186 | printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, | ||
| 187 | cur_alloc_size); | ||
| 188 | break; | ||
| 189 | } | ||
| 190 | num_bytes -= cur_alloc_size; | ||
| 191 | alloc_hint = ins.objectid + ins.offset; | ||
| 192 | start += cur_alloc_size; | ||
| 193 | } | ||
| 194 | out: | ||
| 195 | btrfs_end_transaction(trans, root); | ||
| 196 | return ret; | ||
| 197 | } | ||
| 198 | |||
| 199 | /* | ||
| 200 | * when nowcow writeback call back. This checks for snapshots or COW copies | ||
| 201 | * of the extents that exist in the file, and COWs the file as required. | ||
| 202 | * | ||
| 203 | * If no cow copies or snapshots exist, we write directly to the existing | ||
| 204 | * blocks on disk | ||
| 205 | */ | ||
| 206 | static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) | ||
| 207 | { | ||
| 208 | u64 extent_start; | ||
| 209 | u64 extent_end; | ||
| 210 | u64 bytenr; | ||
| 211 | u64 loops = 0; | ||
| 212 | u64 total_fs_bytes; | ||
| 213 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 214 | struct btrfs_block_group_cache *block_group; | ||
| 215 | struct btrfs_trans_handle *trans; | ||
| 216 | struct extent_buffer *leaf; | ||
| 217 | int found_type; | ||
| 218 | struct btrfs_path *path; | ||
| 219 | struct btrfs_file_extent_item *item; | ||
| 220 | int ret; | ||
| 221 | int err = 0; | ||
| 222 | struct btrfs_key found_key; | ||
| 223 | |||
| 224 | total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
| 225 | path = btrfs_alloc_path(); | ||
| 226 | BUG_ON(!path); | ||
| 227 | trans = btrfs_join_transaction(root, 1); | ||
| 228 | BUG_ON(!trans); | ||
| 229 | again: | ||
| 230 | ret = btrfs_lookup_file_extent(NULL, root, path, | ||
| 231 | inode->i_ino, start, 0); | ||
| 232 | if (ret < 0) { | ||
| 233 | err = ret; | ||
| 234 | goto out; | ||
| 235 | } | ||
| 236 | |||
| 237 | if (ret != 0) { | ||
| 238 | if (path->slots[0] == 0) | ||
| 239 | goto not_found; | ||
| 240 | path->slots[0]--; | ||
| 241 | } | ||
| 242 | |||
| 243 | leaf = path->nodes[0]; | ||
| 244 | item = btrfs_item_ptr(leaf, path->slots[0], | ||
| 245 | struct btrfs_file_extent_item); | ||
| 246 | |||
| 247 | /* are we inside the extent that was found? */ | ||
| 248 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 249 | found_type = btrfs_key_type(&found_key); | ||
| 250 | if (found_key.objectid != inode->i_ino || | ||
| 251 | found_type != BTRFS_EXTENT_DATA_KEY) | ||
| 252 | goto not_found; | ||
| 253 | |||
| 254 | found_type = btrfs_file_extent_type(leaf, item); | ||
| 255 | extent_start = found_key.offset; | ||
| 256 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
| 257 | u64 extent_num_bytes; | ||
| 258 | |||
| 259 | extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item); | ||
| 260 | extent_end = extent_start + extent_num_bytes; | ||
| 261 | err = 0; | ||
| 262 | |||
| 263 | if (loops && start != extent_start) | ||
| 264 | goto not_found; | ||
| 265 | |||
| 266 | if (start < extent_start || start >= extent_end) | ||
| 267 | goto not_found; | ||
| 268 | |||
| 269 | bytenr = btrfs_file_extent_disk_bytenr(leaf, item); | ||
| 270 | if (bytenr == 0) | ||
| 271 | goto not_found; | ||
| 272 | |||
| 273 | if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr)) | ||
| 274 | goto not_found; | ||
| 275 | /* | ||
| 276 | * we may be called by the resizer, make sure we're inside | ||
| 277 | * the limits of the FS | ||
| 278 | */ | ||
| 279 | block_group = btrfs_lookup_block_group(root->fs_info, | ||
| 280 | bytenr); | ||
| 281 | if (!block_group || block_group->ro) | ||
| 282 | goto not_found; | ||
| 283 | |||
| 284 | bytenr += btrfs_file_extent_offset(leaf, item); | ||
| 285 | extent_num_bytes = min(end + 1, extent_end) - start; | ||
| 286 | ret = btrfs_add_ordered_extent(inode, start, bytenr, | ||
| 287 | extent_num_bytes, 1); | ||
| 288 | if (ret) { | ||
| 289 | err = ret; | ||
| 290 | goto out; | ||
| 291 | } | ||
| 292 | |||
| 293 | btrfs_release_path(root, path); | ||
| 294 | start = extent_end; | ||
| 295 | if (start <= end) { | ||
| 296 | loops++; | ||
| 297 | goto again; | ||
| 298 | } | ||
| 299 | } else { | ||
| 300 | not_found: | ||
| 301 | btrfs_end_transaction(trans, root); | ||
| 302 | btrfs_free_path(path); | ||
| 303 | return cow_file_range(inode, start, end); | ||
| 304 | } | ||
| 305 | out: | ||
| 306 | WARN_ON(err); | ||
| 307 | btrfs_end_transaction(trans, root); | ||
| 308 | btrfs_free_path(path); | ||
| 309 | return err; | ||
| 310 | } | ||
| 311 | |||
| 312 | /* | ||
| 313 | * extent_io.c call back to do delayed allocation processing | ||
| 314 | */ | ||
| 315 | static int run_delalloc_range(struct inode *inode, u64 start, u64 end) | ||
| 316 | { | ||
| 317 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 318 | int ret; | ||
| 319 | |||
| 320 | if (btrfs_test_opt(root, NODATACOW) || | ||
| 321 | btrfs_test_flag(inode, NODATACOW)) | ||
| 322 | ret = run_delalloc_nocow(inode, start, end); | ||
| 323 | else | ||
| 324 | ret = cow_file_range(inode, start, end); | ||
| 325 | |||
| 326 | return ret; | ||
| 327 | } | ||
| 328 | |||
| 329 | /* | ||
| 330 | * extent_io.c set_bit_hook, used to track delayed allocation | ||
| 331 | * bytes in this file, and to maintain the list of inodes that | ||
| 332 | * have pending delalloc work to be done. | ||
| 333 | */ | ||
| 334 | int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | ||
| 335 | unsigned long old, unsigned long bits) | ||
| 336 | { | ||
| 337 | unsigned long flags; | ||
| 338 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | ||
| 339 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 340 | spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); | ||
| 341 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | ||
| 342 | root->fs_info->delalloc_bytes += end - start + 1; | ||
| 343 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
| 344 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | ||
| 345 | &root->fs_info->delalloc_inodes); | ||
| 346 | } | ||
| 347 | spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); | ||
| 348 | } | ||
| 349 | return 0; | ||
| 350 | } | ||
| 351 | |||
| 352 | /* | ||
| 353 | * extent_io.c clear_bit_hook, see set_bit_hook for why | ||
| 354 | */ | ||
| 355 | int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | ||
| 356 | unsigned long old, unsigned long bits) | ||
| 357 | { | ||
| 358 | if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | ||
| 359 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 360 | unsigned long flags; | ||
| 361 | |||
| 362 | spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); | ||
| 363 | if (end - start + 1 > root->fs_info->delalloc_bytes) { | ||
| 364 | printk("warning: delalloc account %Lu %Lu\n", | ||
| 365 | end - start + 1, root->fs_info->delalloc_bytes); | ||
| 366 | root->fs_info->delalloc_bytes = 0; | ||
| 367 | BTRFS_I(inode)->delalloc_bytes = 0; | ||
| 368 | } else { | ||
| 369 | root->fs_info->delalloc_bytes -= end - start + 1; | ||
| 370 | BTRFS_I(inode)->delalloc_bytes -= end - start + 1; | ||
| 371 | } | ||
| 372 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | ||
| 373 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
| 374 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | ||
| 375 | } | ||
| 376 | spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); | ||
| 377 | } | ||
| 378 | return 0; | ||
| 379 | } | ||
| 380 | |||
| 381 | /* | ||
| 382 | * extent_io.c merge_bio_hook, this must check the chunk tree to make sure | ||
| 383 | * we don't create bios that span stripes or chunks | ||
| 384 | */ | ||
| 385 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | ||
| 386 | size_t size, struct bio *bio) | ||
| 387 | { | ||
| 388 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
| 389 | struct btrfs_mapping_tree *map_tree; | ||
| 390 | u64 logical = (u64)bio->bi_sector << 9; | ||
| 391 | u64 length = 0; | ||
| 392 | u64 map_length; | ||
| 393 | int ret; | ||
| 394 | |||
| 395 | length = bio->bi_size; | ||
| 396 | map_tree = &root->fs_info->mapping_tree; | ||
| 397 | map_length = length; | ||
| 398 | ret = btrfs_map_block(map_tree, READ, logical, | ||
| 399 | &map_length, NULL, 0); | ||
| 400 | |||
| 401 | if (map_length < length + size) { | ||
| 402 | return 1; | ||
| 403 | } | ||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | /* | ||
| 408 | * in order to insert checksums into the metadata in large chunks, | ||
| 409 | * we wait until bio submission time. All the pages in the bio are | ||
| 410 | * checksummed and sums are attached onto the ordered extent record. | ||
| 411 | * | ||
| 412 | * At IO completion time the cums attached on the ordered extent record | ||
| 413 | * are inserted into the btree | ||
| 414 | */ | ||
| 415 | int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | ||
| 416 | int mirror_num) | ||
| 417 | { | ||
| 418 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 419 | int ret = 0; | ||
| 420 | |||
| 421 | ret = btrfs_csum_one_bio(root, inode, bio); | ||
| 422 | BUG_ON(ret); | ||
| 423 | |||
| 424 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | ||
| 425 | } | ||
| 426 | |||
| 427 | /* | ||
| 428 | * extent_io.c submission hook. This does the right thing for csum calculation on write, | ||
| 429 | * or reading the csums from the tree before a read | ||
| 430 | */ | ||
| 431 | int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | ||
| 432 | int mirror_num) | ||
| 433 | { | ||
| 434 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 435 | int ret = 0; | ||
| 436 | |||
| 437 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
| 438 | BUG_ON(ret); | ||
| 439 | |||
| 440 | if (btrfs_test_opt(root, NODATASUM) || | ||
| 441 | btrfs_test_flag(inode, NODATASUM)) { | ||
| 442 | goto mapit; | ||
| 443 | } | ||
| 444 | |||
| 445 | if (!(rw & (1 << BIO_RW))) { | ||
| 446 | btrfs_lookup_bio_sums(root, inode, bio); | ||
| 447 | goto mapit; | ||
| 448 | } | ||
| 449 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
| 450 | inode, rw, bio, mirror_num, | ||
| 451 | __btrfs_submit_bio_hook); | ||
| 452 | mapit: | ||
| 453 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); | ||
| 454 | } | ||
| 455 | |||
| 456 | /* | ||
| 457 | * given a list of ordered sums record them in the inode. This happens | ||
| 458 | * at IO completion time based on sums calculated at bio submission time. | ||
| 459 | */ | ||
| 460 | static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | ||
| 461 | struct inode *inode, u64 file_offset, | ||
| 462 | struct list_head *list) | ||
| 463 | { | ||
| 464 | struct list_head *cur; | ||
| 465 | struct btrfs_ordered_sum *sum; | ||
| 466 | |||
| 467 | btrfs_set_trans_block_group(trans, inode); | ||
| 468 | list_for_each(cur, list) { | ||
| 469 | sum = list_entry(cur, struct btrfs_ordered_sum, list); | ||
| 470 | btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, | ||
| 471 | inode, sum); | ||
| 472 | } | ||
| 473 | return 0; | ||
| 474 | } | ||
| 475 | |||
| 476 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) | ||
| 477 | { | ||
| 478 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, | ||
| 479 | GFP_NOFS); | ||
| 480 | } | ||
| 481 | |||
| 482 | /* see btrfs_writepage_start_hook for details on why this is required */ | ||
| 483 | struct btrfs_writepage_fixup { | ||
| 484 | struct page *page; | ||
| 485 | struct btrfs_work work; | ||
| 486 | }; | ||
| 487 | |||
| 488 | void btrfs_writepage_fixup_worker(struct btrfs_work *work) | ||
| 489 | { | ||
| 490 | struct btrfs_writepage_fixup *fixup; | ||
| 491 | struct btrfs_ordered_extent *ordered; | ||
| 492 | struct page *page; | ||
| 493 | struct inode *inode; | ||
| 494 | u64 page_start; | ||
| 495 | u64 page_end; | ||
| 496 | |||
| 497 | fixup = container_of(work, struct btrfs_writepage_fixup, work); | ||
| 498 | page = fixup->page; | ||
| 499 | again: | ||
| 500 | lock_page(page); | ||
| 501 | if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { | ||
| 502 | ClearPageChecked(page); | ||
| 503 | goto out_page; | ||
| 504 | } | ||
| 505 | |||
| 506 | inode = page->mapping->host; | ||
| 507 | page_start = page_offset(page); | ||
| 508 | page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; | ||
| 509 | |||
| 510 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | ||
| 511 | |||
| 512 | /* already ordered? We're done */ | ||
| 513 | if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | ||
| 514 | EXTENT_ORDERED, 0)) { | ||
| 515 | goto out; | ||
| 516 | } | ||
| 517 | |||
| 518 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
| 519 | if (ordered) { | ||
| 520 | unlock_extent(&BTRFS_I(inode)->io_tree, page_start, | ||
| 521 | page_end, GFP_NOFS); | ||
| 522 | unlock_page(page); | ||
| 523 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 524 | goto again; | ||
| 525 | } | ||
| 526 | |||
| 527 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 528 | ClearPageChecked(page); | ||
| 529 | out: | ||
| 530 | unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | ||
| 531 | out_page: | ||
| 532 | unlock_page(page); | ||
| 533 | page_cache_release(page); | ||
| 534 | } | ||
| 535 | |||
| 536 | /* | ||
| 537 | * There are a few paths in the higher layers of the kernel that directly | ||
| 538 | * set the page dirty bit without asking the filesystem if it is a | ||
| 539 | * good idea. This causes problems because we want to make sure COW | ||
| 540 | * properly happens and the data=ordered rules are followed. | ||
| 541 | * | ||
| 542 | * In our case any range that doesn't have the EXTENT_ORDERED bit set | ||
| 543 | * hasn't been properly setup for IO. We kick off an async process | ||
| 544 | * to fix it up. The async helper will wait for ordered extents, set | ||
| 545 | * the delalloc bit and make it safe to write the page. | ||
| 546 | */ | ||
| 547 | int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | ||
| 548 | { | ||
| 549 | struct inode *inode = page->mapping->host; | ||
| 550 | struct btrfs_writepage_fixup *fixup; | ||
| 551 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 552 | int ret; | ||
| 553 | |||
| 554 | ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | ||
| 555 | EXTENT_ORDERED, 0); | ||
| 556 | if (ret) | ||
| 557 | return 0; | ||
| 558 | |||
| 559 | if (PageChecked(page)) | ||
| 560 | return -EAGAIN; | ||
| 561 | |||
| 562 | fixup = kzalloc(sizeof(*fixup), GFP_NOFS); | ||
| 563 | if (!fixup) | ||
| 564 | return -EAGAIN; | ||
| 565 | |||
| 566 | SetPageChecked(page); | ||
| 567 | page_cache_get(page); | ||
| 568 | fixup->work.func = btrfs_writepage_fixup_worker; | ||
| 569 | fixup->page = page; | ||
| 570 | btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); | ||
| 571 | return -EAGAIN; | ||
| 572 | } | ||
| 573 | |||
| 574 | /* as ordered data IO finishes, this gets called so we can finish | ||
| 575 | * an ordered extent if the range of bytes in the file it covers are | ||
| 576 | * fully written. | ||
| 577 | */ | ||
| 578 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | ||
| 579 | { | ||
| 580 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 581 | struct btrfs_trans_handle *trans; | ||
| 582 | struct btrfs_ordered_extent *ordered_extent; | ||
| 583 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 584 | struct btrfs_file_extent_item *extent_item; | ||
| 585 | struct btrfs_path *path = NULL; | ||
| 586 | struct extent_buffer *leaf; | ||
| 587 | u64 alloc_hint = 0; | ||
| 588 | struct list_head list; | ||
| 589 | struct btrfs_key ins; | ||
| 590 | int ret; | ||
| 591 | |||
| 592 | ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); | ||
| 593 | if (!ret) | ||
| 594 | return 0; | ||
| 595 | |||
| 596 | trans = btrfs_join_transaction(root, 1); | ||
| 597 | |||
| 598 | ordered_extent = btrfs_lookup_ordered_extent(inode, start); | ||
| 599 | BUG_ON(!ordered_extent); | ||
| 600 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) | ||
| 601 | goto nocow; | ||
| 602 | |||
| 603 | path = btrfs_alloc_path(); | ||
| 604 | BUG_ON(!path); | ||
| 605 | |||
| 606 | lock_extent(io_tree, ordered_extent->file_offset, | ||
| 607 | ordered_extent->file_offset + ordered_extent->len - 1, | ||
| 608 | GFP_NOFS); | ||
| 609 | |||
| 610 | INIT_LIST_HEAD(&list); | ||
| 611 | |||
| 612 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 613 | |||
| 614 | ret = btrfs_drop_extents(trans, root, inode, | ||
| 615 | ordered_extent->file_offset, | ||
| 616 | ordered_extent->file_offset + | ||
| 617 | ordered_extent->len, | ||
| 618 | ordered_extent->file_offset, &alloc_hint); | ||
| 619 | BUG_ON(ret); | ||
| 620 | |||
| 621 | ins.objectid = inode->i_ino; | ||
| 622 | ins.offset = ordered_extent->file_offset; | ||
| 623 | ins.type = BTRFS_EXTENT_DATA_KEY; | ||
| 624 | ret = btrfs_insert_empty_item(trans, root, path, &ins, | ||
| 625 | sizeof(*extent_item)); | ||
| 626 | BUG_ON(ret); | ||
| 627 | leaf = path->nodes[0]; | ||
| 628 | extent_item = btrfs_item_ptr(leaf, path->slots[0], | ||
| 629 | struct btrfs_file_extent_item); | ||
| 630 | btrfs_set_file_extent_generation(leaf, extent_item, trans->transid); | ||
| 631 | btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG); | ||
| 632 | btrfs_set_file_extent_disk_bytenr(leaf, extent_item, | ||
| 633 | ordered_extent->start); | ||
| 634 | btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, | ||
| 635 | ordered_extent->len); | ||
| 636 | btrfs_set_file_extent_offset(leaf, extent_item, 0); | ||
| 637 | btrfs_set_file_extent_num_bytes(leaf, extent_item, | ||
| 638 | ordered_extent->len); | ||
| 639 | btrfs_mark_buffer_dirty(leaf); | ||
| 640 | |||
| 641 | btrfs_drop_extent_cache(inode, ordered_extent->file_offset, | ||
| 642 | ordered_extent->file_offset + | ||
| 643 | ordered_extent->len - 1, 0); | ||
| 644 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 645 | |||
| 646 | ins.objectid = ordered_extent->start; | ||
| 647 | ins.offset = ordered_extent->len; | ||
| 648 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 649 | ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, | ||
| 650 | root->root_key.objectid, | ||
| 651 | trans->transid, inode->i_ino, &ins); | ||
| 652 | BUG_ON(ret); | ||
| 653 | btrfs_release_path(root, path); | ||
| 654 | |||
| 655 | inode_add_bytes(inode, ordered_extent->len); | ||
| 656 | unlock_extent(io_tree, ordered_extent->file_offset, | ||
| 657 | ordered_extent->file_offset + ordered_extent->len - 1, | ||
| 658 | GFP_NOFS); | ||
| 659 | nocow: | ||
| 660 | add_pending_csums(trans, inode, ordered_extent->file_offset, | ||
| 661 | &ordered_extent->list); | ||
| 662 | |||
| 663 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 664 | btrfs_ordered_update_i_size(inode, ordered_extent); | ||
| 665 | btrfs_update_inode(trans, root, inode); | ||
| 666 | btrfs_remove_ordered_extent(inode, ordered_extent); | ||
| 667 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 668 | |||
| 669 | /* once for us */ | ||
| 670 | btrfs_put_ordered_extent(ordered_extent); | ||
| 671 | /* once for the tree */ | ||
| 672 | btrfs_put_ordered_extent(ordered_extent); | ||
| 673 | |||
| 674 | btrfs_end_transaction(trans, root); | ||
| 675 | if (path) | ||
| 676 | btrfs_free_path(path); | ||
| 677 | return 0; | ||
| 678 | } | ||
| 679 | |||
| 680 | int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | ||
| 681 | struct extent_state *state, int uptodate) | ||
| 682 | { | ||
| 683 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | ||
| 684 | } | ||
| 685 | |||
| 686 | /* | ||
| 687 | * When IO fails, either with EIO or csum verification fails, we | ||
| 688 | * try other mirrors that might have a good copy of the data. This | ||
| 689 | * io_failure_record is used to record state as we go through all the | ||
| 690 | * mirrors. If another mirror has good data, the page is set up to date | ||
| 691 | * and things continue. If a good mirror can't be found, the original | ||
| 692 | * bio end_io callback is called to indicate things have failed. | ||
| 693 | */ | ||
| 694 | struct io_failure_record { | ||
| 695 | struct page *page; | ||
| 696 | u64 start; | ||
| 697 | u64 len; | ||
| 698 | u64 logical; | ||
| 699 | int last_mirror; | ||
| 700 | }; | ||
| 701 | |||
| 702 | int btrfs_io_failed_hook(struct bio *failed_bio, | ||
| 703 | struct page *page, u64 start, u64 end, | ||
| 704 | struct extent_state *state) | ||
| 705 | { | ||
| 706 | struct io_failure_record *failrec = NULL; | ||
| 707 | u64 private; | ||
| 708 | struct extent_map *em; | ||
| 709 | struct inode *inode = page->mapping->host; | ||
| 710 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
| 711 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 712 | struct bio *bio; | ||
| 713 | int num_copies; | ||
| 714 | int ret; | ||
| 715 | int rw; | ||
| 716 | u64 logical; | ||
| 717 | |||
| 718 | ret = get_state_private(failure_tree, start, &private); | ||
| 719 | if (ret) { | ||
| 720 | failrec = kmalloc(sizeof(*failrec), GFP_NOFS); | ||
| 721 | if (!failrec) | ||
| 722 | return -ENOMEM; | ||
| 723 | failrec->start = start; | ||
| 724 | failrec->len = end - start + 1; | ||
| 725 | failrec->last_mirror = 0; | ||
| 726 | |||
| 727 | spin_lock(&em_tree->lock); | ||
| 728 | em = lookup_extent_mapping(em_tree, start, failrec->len); | ||
| 729 | if (em->start > start || em->start + em->len < start) { | ||
| 730 | free_extent_map(em); | ||
| 731 | em = NULL; | ||
| 732 | } | ||
| 733 | spin_unlock(&em_tree->lock); | ||
| 734 | |||
| 735 | if (!em || IS_ERR(em)) { | ||
| 736 | kfree(failrec); | ||
| 737 | return -EIO; | ||
| 738 | } | ||
| 739 | logical = start - em->start; | ||
| 740 | logical = em->block_start + logical; | ||
| 741 | failrec->logical = logical; | ||
| 742 | free_extent_map(em); | ||
| 743 | set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | | ||
| 744 | EXTENT_DIRTY, GFP_NOFS); | ||
| 745 | set_state_private(failure_tree, start, | ||
| 746 | (u64)(unsigned long)failrec); | ||
| 747 | } else { | ||
| 748 | failrec = (struct io_failure_record *)(unsigned long)private; | ||
| 749 | } | ||
| 750 | num_copies = btrfs_num_copies( | ||
| 751 | &BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
| 752 | failrec->logical, failrec->len); | ||
| 753 | failrec->last_mirror++; | ||
| 754 | if (!state) { | ||
| 755 | spin_lock_irq(&BTRFS_I(inode)->io_tree.lock); | ||
| 756 | state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, | ||
| 757 | failrec->start, | ||
| 758 | EXTENT_LOCKED); | ||
| 759 | if (state && state->start != failrec->start) | ||
| 760 | state = NULL; | ||
| 761 | spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock); | ||
| 762 | } | ||
| 763 | if (!state || failrec->last_mirror > num_copies) { | ||
| 764 | set_state_private(failure_tree, failrec->start, 0); | ||
| 765 | clear_extent_bits(failure_tree, failrec->start, | ||
| 766 | failrec->start + failrec->len - 1, | ||
| 767 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | ||
| 768 | kfree(failrec); | ||
| 769 | return -EIO; | ||
| 770 | } | ||
| 771 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 772 | bio->bi_private = state; | ||
| 773 | bio->bi_end_io = failed_bio->bi_end_io; | ||
| 774 | bio->bi_sector = failrec->logical >> 9; | ||
| 775 | bio->bi_bdev = failed_bio->bi_bdev; | ||
| 776 | bio->bi_size = 0; | ||
| 777 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | ||
| 778 | if (failed_bio->bi_rw & (1 << BIO_RW)) | ||
| 779 | rw = WRITE; | ||
| 780 | else | ||
| 781 | rw = READ; | ||
| 782 | |||
| 783 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | ||
| 784 | failrec->last_mirror); | ||
| 785 | return 0; | ||
| 786 | } | ||
| 787 | |||
| 788 | /* | ||
| 789 | * each time an IO finishes, we do a fast check in the IO failure tree | ||
| 790 | * to see if we need to process or clean up an io_failure_record | ||
| 791 | */ | ||
| 792 | int btrfs_clean_io_failures(struct inode *inode, u64 start) | ||
| 793 | { | ||
| 794 | u64 private; | ||
| 795 | u64 private_failure; | ||
| 796 | struct io_failure_record *failure; | ||
| 797 | int ret; | ||
| 798 | |||
| 799 | private = 0; | ||
| 800 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | ||
| 801 | (u64)-1, 1, EXTENT_DIRTY)) { | ||
| 802 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
| 803 | start, &private_failure); | ||
| 804 | if (ret == 0) { | ||
| 805 | failure = (struct io_failure_record *)(unsigned long) | ||
| 806 | private_failure; | ||
| 807 | set_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
| 808 | failure->start, 0); | ||
| 809 | clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, | ||
| 810 | failure->start, | ||
| 811 | failure->start + failure->len - 1, | ||
| 812 | EXTENT_DIRTY | EXTENT_LOCKED, | ||
| 813 | GFP_NOFS); | ||
| 814 | kfree(failure); | ||
| 815 | } | ||
| 816 | } | ||
| 817 | return 0; | ||
| 818 | } | ||
| 819 | |||
| 820 | /* | ||
| 821 | * when reads are done, we need to check csums to verify the data is correct | ||
| 822 | * if there's a match, we allow the bio to finish. If not, we go through | ||
| 823 | * the io_failure_record routines to find good copies | ||
| 824 | */ | ||
| 825 | int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | ||
| 826 | struct extent_state *state) | ||
| 827 | { | ||
| 828 | size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); | ||
| 829 | struct inode *inode = page->mapping->host; | ||
| 830 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 831 | char *kaddr; | ||
| 832 | u64 private = ~(u32)0; | ||
| 833 | int ret; | ||
| 834 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 835 | u32 csum = ~(u32)0; | ||
| 836 | unsigned long flags; | ||
| 837 | |||
| 838 | if (btrfs_test_opt(root, NODATASUM) || | ||
| 839 | btrfs_test_flag(inode, NODATASUM)) | ||
| 840 | return 0; | ||
| 841 | if (state && state->start == start) { | ||
| 842 | private = state->private; | ||
| 843 | ret = 0; | ||
| 844 | } else { | ||
| 845 | ret = get_state_private(io_tree, start, &private); | ||
| 846 | } | ||
| 847 | local_irq_save(flags); | ||
| 848 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
| 849 | if (ret) { | ||
| 850 | goto zeroit; | ||
| 851 | } | ||
| 852 | csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1); | ||
| 853 | btrfs_csum_final(csum, (char *)&csum); | ||
| 854 | if (csum != private) { | ||
| 855 | goto zeroit; | ||
| 856 | } | ||
| 857 | kunmap_atomic(kaddr, KM_IRQ0); | ||
| 858 | local_irq_restore(flags); | ||
| 859 | |||
| 860 | /* if the io failure tree for this inode is non-empty, | ||
| 861 | * check to see if we've recovered from a failed IO | ||
| 862 | */ | ||
| 863 | btrfs_clean_io_failures(inode, start); | ||
| 864 | return 0; | ||
| 865 | |||
| 866 | zeroit: | ||
| 867 | printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n", | ||
| 868 | page->mapping->host->i_ino, (unsigned long long)start, csum, | ||
| 869 | private); | ||
| 870 | memset(kaddr + offset, 1, end - start + 1); | ||
| 871 | flush_dcache_page(page); | ||
| 872 | kunmap_atomic(kaddr, KM_IRQ0); | ||
| 873 | local_irq_restore(flags); | ||
| 874 | if (private == 0) | ||
| 875 | return 0; | ||
| 876 | return -EIO; | ||
| 877 | } | ||
| 878 | |||
| 879 | /* | ||
| 880 | * This creates an orphan entry for the given inode in case something goes | ||
| 881 | * wrong in the middle of an unlink/truncate. | ||
| 882 | */ | ||
| 883 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | ||
| 884 | { | ||
| 885 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 886 | int ret = 0; | ||
| 887 | |||
| 888 | spin_lock(&root->list_lock); | ||
| 889 | |||
| 890 | /* already on the orphan list, we're good */ | ||
| 891 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | ||
| 892 | spin_unlock(&root->list_lock); | ||
| 893 | return 0; | ||
| 894 | } | ||
| 895 | |||
| 896 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
| 897 | |||
| 898 | spin_unlock(&root->list_lock); | ||
| 899 | |||
| 900 | /* | ||
| 901 | * insert an orphan item to track this unlinked/truncated file | ||
| 902 | */ | ||
| 903 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | ||
| 904 | |||
| 905 | return ret; | ||
| 906 | } | ||
| 907 | |||
| 908 | /* | ||
| 909 | * We have done the truncate/delete so we can go ahead and remove the orphan | ||
| 910 | * item for this particular inode. | ||
| 911 | */ | ||
| 912 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | ||
| 913 | { | ||
| 914 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 915 | int ret = 0; | ||
| 916 | |||
| 917 | spin_lock(&root->list_lock); | ||
| 918 | |||
| 919 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | ||
| 920 | spin_unlock(&root->list_lock); | ||
| 921 | return 0; | ||
| 922 | } | ||
| 923 | |||
| 924 | list_del_init(&BTRFS_I(inode)->i_orphan); | ||
| 925 | if (!trans) { | ||
| 926 | spin_unlock(&root->list_lock); | ||
| 927 | return 0; | ||
| 928 | } | ||
| 929 | |||
| 930 | spin_unlock(&root->list_lock); | ||
| 931 | |||
| 932 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | ||
| 933 | |||
| 934 | return ret; | ||
| 935 | } | ||
| 936 | |||
| 937 | /* | ||
| 938 | * this cleans up any orphans that may be left on the list from the last use | ||
| 939 | * of this root. | ||
| 940 | */ | ||
| 941 | void btrfs_orphan_cleanup(struct btrfs_root *root) | ||
| 942 | { | ||
| 943 | struct btrfs_path *path; | ||
| 944 | struct extent_buffer *leaf; | ||
| 945 | struct btrfs_item *item; | ||
| 946 | struct btrfs_key key, found_key; | ||
| 947 | struct btrfs_trans_handle *trans; | ||
| 948 | struct inode *inode; | ||
| 949 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | ||
| 950 | |||
| 951 | /* don't do orphan cleanup if the fs is readonly. */ | ||
| 952 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
| 953 | return; | ||
| 954 | |||
| 955 | path = btrfs_alloc_path(); | ||
| 956 | if (!path) | ||
| 957 | return; | ||
| 958 | path->reada = -1; | ||
| 959 | |||
| 960 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
| 961 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | ||
| 962 | key.offset = (u64)-1; | ||
| 963 | |||
| 964 | |||
| 965 | while (1) { | ||
| 966 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 967 | if (ret < 0) { | ||
| 968 | printk(KERN_ERR "Error searching slot for orphan: %d" | ||
| 969 | "\n", ret); | ||
| 970 | break; | ||
| 971 | } | ||
| 972 | |||
| 973 | /* | ||
| 974 | * if ret == 0 means we found what we were searching for, which | ||
| 975 | * is weird, but possible, so only screw with path if we didnt | ||
| 976 | * find the key and see if we have stuff that matches | ||
| 977 | */ | ||
| 978 | if (ret > 0) { | ||
| 979 | if (path->slots[0] == 0) | ||
| 980 | break; | ||
| 981 | path->slots[0]--; | ||
| 982 | } | ||
| 983 | |||
| 984 | /* pull out the item */ | ||
| 985 | leaf = path->nodes[0]; | ||
| 986 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
| 987 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 988 | |||
| 989 | /* make sure the item matches what we want */ | ||
| 990 | if (found_key.objectid != BTRFS_ORPHAN_OBJECTID) | ||
| 991 | break; | ||
| 992 | if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY) | ||
| 993 | break; | ||
| 994 | |||
| 995 | /* release the path since we're done with it */ | ||
| 996 | btrfs_release_path(root, path); | ||
| 997 | |||
| 998 | /* | ||
| 999 | * this is where we are basically btrfs_lookup, without the | ||
| 1000 | * crossing root thing. we store the inode number in the | ||
| 1001 | * offset of the orphan item. | ||
| 1002 | */ | ||
| 1003 | inode = btrfs_iget_locked(root->fs_info->sb, | ||
| 1004 | found_key.offset, root); | ||
| 1005 | if (!inode) | ||
| 1006 | break; | ||
| 1007 | |||
| 1008 | if (inode->i_state & I_NEW) { | ||
| 1009 | BTRFS_I(inode)->root = root; | ||
| 1010 | |||
| 1011 | /* have to set the location manually */ | ||
| 1012 | BTRFS_I(inode)->location.objectid = inode->i_ino; | ||
| 1013 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
| 1014 | BTRFS_I(inode)->location.offset = 0; | ||
| 1015 | |||
| 1016 | btrfs_read_locked_inode(inode); | ||
| 1017 | unlock_new_inode(inode); | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | /* | ||
| 1021 | * add this inode to the orphan list so btrfs_orphan_del does | ||
| 1022 | * the proper thing when we hit it | ||
| 1023 | */ | ||
| 1024 | spin_lock(&root->list_lock); | ||
| 1025 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
| 1026 | spin_unlock(&root->list_lock); | ||
| 1027 | |||
| 1028 | /* | ||
| 1029 | * if this is a bad inode, means we actually succeeded in | ||
| 1030 | * removing the inode, but not the orphan record, which means | ||
| 1031 | * we need to manually delete the orphan since iput will just | ||
| 1032 | * do a destroy_inode | ||
| 1033 | */ | ||
| 1034 | if (is_bad_inode(inode)) { | ||
| 1035 | trans = btrfs_start_transaction(root, 1); | ||
| 1036 | btrfs_orphan_del(trans, inode); | ||
| 1037 | btrfs_end_transaction(trans, root); | ||
| 1038 | iput(inode); | ||
| 1039 | continue; | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | /* if we have links, this was a truncate, lets do that */ | ||
| 1043 | if (inode->i_nlink) { | ||
| 1044 | nr_truncate++; | ||
| 1045 | btrfs_truncate(inode); | ||
| 1046 | } else { | ||
| 1047 | nr_unlink++; | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | /* this will do delete_inode and everything for us */ | ||
| 1051 | iput(inode); | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | if (nr_unlink) | ||
| 1055 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | ||
| 1056 | if (nr_truncate) | ||
| 1057 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | ||
| 1058 | |||
| 1059 | btrfs_free_path(path); | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | /* | ||
| 1063 | * read an inode from the btree into the in-memory inode | ||
| 1064 | */ | ||
| 1065 | void btrfs_read_locked_inode(struct inode *inode) | ||
| 1066 | { | ||
| 1067 | struct btrfs_path *path; | ||
| 1068 | struct extent_buffer *leaf; | ||
| 1069 | struct btrfs_inode_item *inode_item; | ||
| 1070 | struct btrfs_timespec *tspec; | ||
| 1071 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1072 | struct btrfs_key location; | ||
| 1073 | u64 alloc_group_block; | ||
| 1074 | u32 rdev; | ||
| 1075 | int ret; | ||
| 1076 | |||
| 1077 | path = btrfs_alloc_path(); | ||
| 1078 | BUG_ON(!path); | ||
| 1079 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | ||
| 1080 | |||
| 1081 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); | ||
| 1082 | if (ret) | ||
| 1083 | goto make_bad; | ||
| 1084 | |||
| 1085 | leaf = path->nodes[0]; | ||
| 1086 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | ||
| 1087 | struct btrfs_inode_item); | ||
| 1088 | |||
| 1089 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | ||
| 1090 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | ||
| 1091 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); | ||
| 1092 | inode->i_gid = btrfs_inode_gid(leaf, inode_item); | ||
| 1093 | btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); | ||
| 1094 | |||
| 1095 | tspec = btrfs_inode_atime(inode_item); | ||
| 1096 | inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); | ||
| 1097 | inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); | ||
| 1098 | |||
| 1099 | tspec = btrfs_inode_mtime(inode_item); | ||
| 1100 | inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); | ||
| 1101 | inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); | ||
| 1102 | |||
| 1103 | tspec = btrfs_inode_ctime(inode_item); | ||
| 1104 | inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); | ||
| 1105 | inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); | ||
| 1106 | |||
| 1107 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); | ||
| 1108 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | ||
| 1109 | inode->i_generation = BTRFS_I(inode)->generation; | ||
| 1110 | inode->i_rdev = 0; | ||
| 1111 | rdev = btrfs_inode_rdev(leaf, inode_item); | ||
| 1112 | |||
| 1113 | BTRFS_I(inode)->index_cnt = (u64)-1; | ||
| 1114 | |||
| 1115 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | ||
| 1116 | BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, | ||
| 1117 | alloc_group_block); | ||
| 1118 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); | ||
| 1119 | if (!BTRFS_I(inode)->block_group) { | ||
| 1120 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, | ||
| 1121 | NULL, 0, | ||
| 1122 | BTRFS_BLOCK_GROUP_METADATA, 0); | ||
| 1123 | } | ||
| 1124 | btrfs_free_path(path); | ||
| 1125 | inode_item = NULL; | ||
| 1126 | |||
| 1127 | switch (inode->i_mode & S_IFMT) { | ||
| 1128 | case S_IFREG: | ||
| 1129 | inode->i_mapping->a_ops = &btrfs_aops; | ||
| 1130 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
| 1131 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | ||
| 1132 | inode->i_fop = &btrfs_file_operations; | ||
| 1133 | inode->i_op = &btrfs_file_inode_operations; | ||
| 1134 | break; | ||
| 1135 | case S_IFDIR: | ||
| 1136 | inode->i_fop = &btrfs_dir_file_operations; | ||
| 1137 | if (root == root->fs_info->tree_root) | ||
| 1138 | inode->i_op = &btrfs_dir_ro_inode_operations; | ||
| 1139 | else | ||
| 1140 | inode->i_op = &btrfs_dir_inode_operations; | ||
| 1141 | break; | ||
| 1142 | case S_IFLNK: | ||
| 1143 | inode->i_op = &btrfs_symlink_inode_operations; | ||
| 1144 | inode->i_mapping->a_ops = &btrfs_symlink_aops; | ||
| 1145 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
| 1146 | break; | ||
| 1147 | default: | ||
| 1148 | init_special_inode(inode, inode->i_mode, rdev); | ||
| 1149 | break; | ||
| 1150 | } | ||
| 1151 | return; | ||
| 1152 | |||
| 1153 | make_bad: | ||
| 1154 | btrfs_free_path(path); | ||
| 1155 | make_bad_inode(inode); | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | /* | ||
| 1159 | * given a leaf and an inode, copy the inode fields into the leaf | ||
| 1160 | */ | ||
| 1161 | static void fill_inode_item(struct btrfs_trans_handle *trans, | ||
| 1162 | struct extent_buffer *leaf, | ||
| 1163 | struct btrfs_inode_item *item, | ||
| 1164 | struct inode *inode) | ||
| 1165 | { | ||
| 1166 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | ||
| 1167 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | ||
| 1168 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | ||
| 1169 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | ||
| 1170 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | ||
| 1171 | |||
| 1172 | btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), | ||
| 1173 | inode->i_atime.tv_sec); | ||
| 1174 | btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), | ||
| 1175 | inode->i_atime.tv_nsec); | ||
| 1176 | |||
| 1177 | btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), | ||
| 1178 | inode->i_mtime.tv_sec); | ||
| 1179 | btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), | ||
| 1180 | inode->i_mtime.tv_nsec); | ||
| 1181 | |||
| 1182 | btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), | ||
| 1183 | inode->i_ctime.tv_sec); | ||
| 1184 | btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), | ||
| 1185 | inode->i_ctime.tv_nsec); | ||
| 1186 | |||
| 1187 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | ||
| 1188 | btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); | ||
| 1189 | btrfs_set_inode_transid(leaf, item, trans->transid); | ||
| 1190 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | ||
| 1191 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | ||
| 1192 | btrfs_set_inode_block_group(leaf, item, | ||
| 1193 | BTRFS_I(inode)->block_group->key.objectid); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | /* | ||
| 1197 | * copy everything in the in-memory inode into the btree. | ||
| 1198 | */ | ||
| 1199 | int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, | ||
| 1200 | struct btrfs_root *root, | ||
| 1201 | struct inode *inode) | ||
| 1202 | { | ||
| 1203 | struct btrfs_inode_item *inode_item; | ||
| 1204 | struct btrfs_path *path; | ||
| 1205 | struct extent_buffer *leaf; | ||
| 1206 | int ret; | ||
| 1207 | |||
| 1208 | path = btrfs_alloc_path(); | ||
| 1209 | BUG_ON(!path); | ||
| 1210 | ret = btrfs_lookup_inode(trans, root, path, | ||
| 1211 | &BTRFS_I(inode)->location, 1); | ||
| 1212 | if (ret) { | ||
| 1213 | if (ret > 0) | ||
| 1214 | ret = -ENOENT; | ||
| 1215 | goto failed; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | leaf = path->nodes[0]; | ||
| 1219 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | ||
| 1220 | struct btrfs_inode_item); | ||
| 1221 | |||
| 1222 | fill_inode_item(trans, leaf, inode_item, inode); | ||
| 1223 | btrfs_mark_buffer_dirty(leaf); | ||
| 1224 | btrfs_set_inode_last_trans(trans, inode); | ||
| 1225 | ret = 0; | ||
| 1226 | failed: | ||
| 1227 | btrfs_free_path(path); | ||
| 1228 | return ret; | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | |||
| 1232 | /* | ||
| 1233 | * unlink helper that gets used here in inode.c and in the tree logging | ||
| 1234 | * recovery code. It remove a link in a directory with a given name, and | ||
| 1235 | * also drops the back refs in the inode to the directory | ||
| 1236 | */ | ||
| 1237 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | ||
| 1238 | struct btrfs_root *root, | ||
| 1239 | struct inode *dir, struct inode *inode, | ||
| 1240 | const char *name, int name_len) | ||
| 1241 | { | ||
| 1242 | struct btrfs_path *path; | ||
| 1243 | int ret = 0; | ||
| 1244 | struct extent_buffer *leaf; | ||
| 1245 | struct btrfs_dir_item *di; | ||
| 1246 | struct btrfs_key key; | ||
| 1247 | u64 index; | ||
| 1248 | |||
| 1249 | path = btrfs_alloc_path(); | ||
| 1250 | if (!path) { | ||
| 1251 | ret = -ENOMEM; | ||
| 1252 | goto err; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
| 1256 | name, name_len, -1); | ||
| 1257 | if (IS_ERR(di)) { | ||
| 1258 | ret = PTR_ERR(di); | ||
| 1259 | goto err; | ||
| 1260 | } | ||
| 1261 | if (!di) { | ||
| 1262 | ret = -ENOENT; | ||
| 1263 | goto err; | ||
| 1264 | } | ||
| 1265 | leaf = path->nodes[0]; | ||
| 1266 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
| 1267 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
| 1268 | if (ret) | ||
| 1269 | goto err; | ||
| 1270 | btrfs_release_path(root, path); | ||
| 1271 | |||
| 1272 | ret = btrfs_del_inode_ref(trans, root, name, name_len, | ||
| 1273 | inode->i_ino, | ||
| 1274 | dir->i_ino, &index); | ||
| 1275 | if (ret) { | ||
| 1276 | printk("failed to delete reference to %.*s, " | ||
| 1277 | "inode %lu parent %lu\n", name_len, name, | ||
| 1278 | inode->i_ino, dir->i_ino); | ||
| 1279 | goto err; | ||
| 1280 | } | ||
| 1281 | |||
| 1282 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | ||
| 1283 | index, name, name_len, -1); | ||
| 1284 | if (IS_ERR(di)) { | ||
| 1285 | ret = PTR_ERR(di); | ||
| 1286 | goto err; | ||
| 1287 | } | ||
| 1288 | if (!di) { | ||
| 1289 | ret = -ENOENT; | ||
| 1290 | goto err; | ||
| 1291 | } | ||
| 1292 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
| 1293 | btrfs_release_path(root, path); | ||
| 1294 | |||
| 1295 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, | ||
| 1296 | inode, dir->i_ino); | ||
| 1297 | BUG_ON(ret != 0 && ret != -ENOENT); | ||
| 1298 | if (ret != -ENOENT) | ||
| 1299 | BTRFS_I(dir)->log_dirty_trans = trans->transid; | ||
| 1300 | |||
| 1301 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, | ||
| 1302 | dir, index); | ||
| 1303 | BUG_ON(ret); | ||
| 1304 | err: | ||
| 1305 | btrfs_free_path(path); | ||
| 1306 | if (ret) | ||
| 1307 | goto out; | ||
| 1308 | |||
| 1309 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | ||
| 1310 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
| 1311 | btrfs_update_inode(trans, root, dir); | ||
| 1312 | btrfs_drop_nlink(inode); | ||
| 1313 | ret = btrfs_update_inode(trans, root, inode); | ||
| 1314 | dir->i_sb->s_dirt = 1; | ||
| 1315 | out: | ||
| 1316 | return ret; | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | ||
| 1320 | { | ||
| 1321 | struct btrfs_root *root; | ||
| 1322 | struct btrfs_trans_handle *trans; | ||
| 1323 | struct inode *inode = dentry->d_inode; | ||
| 1324 | int ret; | ||
| 1325 | unsigned long nr = 0; | ||
| 1326 | |||
| 1327 | root = BTRFS_I(dir)->root; | ||
| 1328 | |||
| 1329 | ret = btrfs_check_free_space(root, 1, 1); | ||
| 1330 | if (ret) | ||
| 1331 | goto fail; | ||
| 1332 | |||
| 1333 | trans = btrfs_start_transaction(root, 1); | ||
| 1334 | |||
| 1335 | btrfs_set_trans_block_group(trans, dir); | ||
| 1336 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | ||
| 1337 | dentry->d_name.name, dentry->d_name.len); | ||
| 1338 | |||
| 1339 | if (inode->i_nlink == 0) | ||
| 1340 | ret = btrfs_orphan_add(trans, inode); | ||
| 1341 | |||
| 1342 | nr = trans->blocks_used; | ||
| 1343 | |||
| 1344 | btrfs_end_transaction_throttle(trans, root); | ||
| 1345 | fail: | ||
| 1346 | btrfs_btree_balance_dirty(root, nr); | ||
| 1347 | return ret; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | ||
| 1351 | { | ||
| 1352 | struct inode *inode = dentry->d_inode; | ||
| 1353 | int err = 0; | ||
| 1354 | int ret; | ||
| 1355 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 1356 | struct btrfs_trans_handle *trans; | ||
| 1357 | unsigned long nr = 0; | ||
| 1358 | |||
| 1359 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) { | ||
| 1360 | return -ENOTEMPTY; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | ret = btrfs_check_free_space(root, 1, 1); | ||
| 1364 | if (ret) | ||
| 1365 | goto fail; | ||
| 1366 | |||
| 1367 | trans = btrfs_start_transaction(root, 1); | ||
| 1368 | btrfs_set_trans_block_group(trans, dir); | ||
| 1369 | |||
| 1370 | err = btrfs_orphan_add(trans, inode); | ||
| 1371 | if (err) | ||
| 1372 | goto fail_trans; | ||
| 1373 | |||
| 1374 | /* now the directory is empty */ | ||
| 1375 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | ||
| 1376 | dentry->d_name.name, dentry->d_name.len); | ||
| 1377 | if (!err) { | ||
| 1378 | btrfs_i_size_write(inode, 0); | ||
| 1379 | } | ||
| 1380 | |||
| 1381 | fail_trans: | ||
| 1382 | nr = trans->blocks_used; | ||
| 1383 | ret = btrfs_end_transaction_throttle(trans, root); | ||
| 1384 | fail: | ||
| 1385 | btrfs_btree_balance_dirty(root, nr); | ||
| 1386 | |||
| 1387 | if (ret && !err) | ||
| 1388 | err = ret; | ||
| 1389 | return err; | ||
| 1390 | } | ||
| 1391 | |||
| 1392 | /* | ||
| 1393 | * when truncating bytes in a file, it is possible to avoid reading | ||
| 1394 | * the leaves that contain only checksum items. This can be the | ||
| 1395 | * majority of the IO required to delete a large file, but it must | ||
| 1396 | * be done carefully. | ||
| 1397 | * | ||
| 1398 | * The keys in the level just above the leaves are checked to make sure | ||
| 1399 | * the lowest key in a given leaf is a csum key, and starts at an offset | ||
| 1400 | * after the new size. | ||
| 1401 | * | ||
| 1402 | * Then the key for the next leaf is checked to make sure it also has | ||
| 1403 | * a checksum item for the same file. If it does, we know our target leaf | ||
| 1404 | * contains only checksum items, and it can be safely freed without reading | ||
| 1405 | * it. | ||
| 1406 | * | ||
| 1407 | * This is just an optimization targeted at large files. It may do | ||
| 1408 | * nothing. It will return 0 unless things went badly. | ||
| 1409 | */ | ||
| 1410 | static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans, | ||
| 1411 | struct btrfs_root *root, | ||
| 1412 | struct btrfs_path *path, | ||
| 1413 | struct inode *inode, u64 new_size) | ||
| 1414 | { | ||
| 1415 | struct btrfs_key key; | ||
| 1416 | int ret; | ||
| 1417 | int nritems; | ||
| 1418 | struct btrfs_key found_key; | ||
| 1419 | struct btrfs_key other_key; | ||
| 1420 | struct btrfs_leaf_ref *ref; | ||
| 1421 | u64 leaf_gen; | ||
| 1422 | u64 leaf_start; | ||
| 1423 | |||
| 1424 | path->lowest_level = 1; | ||
| 1425 | key.objectid = inode->i_ino; | ||
| 1426 | key.type = BTRFS_CSUM_ITEM_KEY; | ||
| 1427 | key.offset = new_size; | ||
| 1428 | again: | ||
| 1429 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 1430 | if (ret < 0) | ||
| 1431 | goto out; | ||
| 1432 | |||
| 1433 | if (path->nodes[1] == NULL) { | ||
| 1434 | ret = 0; | ||
| 1435 | goto out; | ||
| 1436 | } | ||
| 1437 | ret = 0; | ||
| 1438 | btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]); | ||
| 1439 | nritems = btrfs_header_nritems(path->nodes[1]); | ||
| 1440 | |||
| 1441 | if (!nritems) | ||
| 1442 | goto out; | ||
| 1443 | |||
| 1444 | if (path->slots[1] >= nritems) | ||
| 1445 | goto next_node; | ||
| 1446 | |||
| 1447 | /* did we find a key greater than anything we want to delete? */ | ||
| 1448 | if (found_key.objectid > inode->i_ino || | ||
| 1449 | (found_key.objectid == inode->i_ino && found_key.type > key.type)) | ||
| 1450 | goto out; | ||
| 1451 | |||
| 1452 | /* we check the next key in the node to make sure the leave contains | ||
| 1453 | * only checksum items. This comparison doesn't work if our | ||
| 1454 | * leaf is the last one in the node | ||
| 1455 | */ | ||
| 1456 | if (path->slots[1] + 1 >= nritems) { | ||
| 1457 | next_node: | ||
| 1458 | /* search forward from the last key in the node, this | ||
| 1459 | * will bring us into the next node in the tree | ||
| 1460 | */ | ||
| 1461 | btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1); | ||
| 1462 | |||
| 1463 | /* unlikely, but we inc below, so check to be safe */ | ||
| 1464 | if (found_key.offset == (u64)-1) | ||
| 1465 | goto out; | ||
| 1466 | |||
| 1467 | /* search_forward needs a path with locks held, do the | ||
| 1468 | * search again for the original key. It is possible | ||
| 1469 | * this will race with a balance and return a path that | ||
| 1470 | * we could modify, but this drop is just an optimization | ||
| 1471 | * and is allowed to miss some leaves. | ||
| 1472 | */ | ||
| 1473 | btrfs_release_path(root, path); | ||
| 1474 | found_key.offset++; | ||
| 1475 | |||
| 1476 | /* setup a max key for search_forward */ | ||
| 1477 | other_key.offset = (u64)-1; | ||
| 1478 | other_key.type = key.type; | ||
| 1479 | other_key.objectid = key.objectid; | ||
| 1480 | |||
| 1481 | path->keep_locks = 1; | ||
| 1482 | ret = btrfs_search_forward(root, &found_key, &other_key, | ||
| 1483 | path, 0, 0); | ||
| 1484 | path->keep_locks = 0; | ||
| 1485 | if (ret || found_key.objectid != key.objectid || | ||
| 1486 | found_key.type != key.type) { | ||
| 1487 | ret = 0; | ||
| 1488 | goto out; | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | key.offset = found_key.offset; | ||
| 1492 | btrfs_release_path(root, path); | ||
| 1493 | cond_resched(); | ||
| 1494 | goto again; | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | /* we know there's one more slot after us in the tree, | ||
| 1498 | * read that key so we can verify it is also a checksum item | ||
| 1499 | */ | ||
| 1500 | btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1); | ||
| 1501 | |||
| 1502 | if (found_key.objectid < inode->i_ino) | ||
| 1503 | goto next_key; | ||
| 1504 | |||
| 1505 | if (found_key.type != key.type || found_key.offset < new_size) | ||
| 1506 | goto next_key; | ||
| 1507 | |||
| 1508 | /* | ||
| 1509 | * if the key for the next leaf isn't a csum key from this objectid, | ||
| 1510 | * we can't be sure there aren't good items inside this leaf. | ||
| 1511 | * Bail out | ||
| 1512 | */ | ||
| 1513 | if (other_key.objectid != inode->i_ino || other_key.type != key.type) | ||
| 1514 | goto out; | ||
| 1515 | |||
| 1516 | leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]); | ||
| 1517 | leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]); | ||
| 1518 | /* | ||
| 1519 | * it is safe to delete this leaf, it contains only | ||
| 1520 | * csum items from this inode at an offset >= new_size | ||
| 1521 | */ | ||
| 1522 | ret = btrfs_del_leaf(trans, root, path, leaf_start); | ||
| 1523 | BUG_ON(ret); | ||
| 1524 | |||
| 1525 | if (root->ref_cows && leaf_gen < trans->transid) { | ||
| 1526 | ref = btrfs_alloc_leaf_ref(root, 0); | ||
| 1527 | if (ref) { | ||
| 1528 | ref->root_gen = root->root_key.offset; | ||
| 1529 | ref->bytenr = leaf_start; | ||
| 1530 | ref->owner = 0; | ||
| 1531 | ref->generation = leaf_gen; | ||
| 1532 | ref->nritems = 0; | ||
| 1533 | |||
| 1534 | ret = btrfs_add_leaf_ref(root, ref, 0); | ||
| 1535 | WARN_ON(ret); | ||
| 1536 | btrfs_free_leaf_ref(root, ref); | ||
| 1537 | } else { | ||
| 1538 | WARN_ON(1); | ||
| 1539 | } | ||
| 1540 | } | ||
| 1541 | next_key: | ||
| 1542 | btrfs_release_path(root, path); | ||
| 1543 | |||
| 1544 | if (other_key.objectid == inode->i_ino && | ||
| 1545 | other_key.type == key.type && other_key.offset > key.offset) { | ||
| 1546 | key.offset = other_key.offset; | ||
| 1547 | cond_resched(); | ||
| 1548 | goto again; | ||
| 1549 | } | ||
| 1550 | ret = 0; | ||
| 1551 | out: | ||
| 1552 | /* fixup any changes we've made to the path */ | ||
| 1553 | path->lowest_level = 0; | ||
| 1554 | path->keep_locks = 0; | ||
| 1555 | btrfs_release_path(root, path); | ||
| 1556 | return ret; | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | /* | ||
| 1560 | * this can truncate away extent items, csum items and directory items. | ||
| 1561 | * It starts at a high offset and removes keys until it can't find | ||
| 1562 | * any higher than new_size | ||
| 1563 | * | ||
| 1564 | * csum items that cross the new i_size are truncated to the new size | ||
| 1565 | * as well. | ||
| 1566 | * | ||
| 1567 | * min_type is the minimum key type to truncate down to. If set to 0, this | ||
| 1568 | * will kill all the items on this inode, including the INODE_ITEM_KEY. | ||
| 1569 | */ | ||
| 1570 | noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | ||
| 1571 | struct btrfs_root *root, | ||
| 1572 | struct inode *inode, | ||
| 1573 | u64 new_size, u32 min_type) | ||
| 1574 | { | ||
| 1575 | int ret; | ||
| 1576 | struct btrfs_path *path; | ||
| 1577 | struct btrfs_key key; | ||
| 1578 | struct btrfs_key found_key; | ||
| 1579 | u32 found_type; | ||
| 1580 | struct extent_buffer *leaf; | ||
| 1581 | struct btrfs_file_extent_item *fi; | ||
| 1582 | u64 extent_start = 0; | ||
| 1583 | u64 extent_num_bytes = 0; | ||
| 1584 | u64 item_end = 0; | ||
| 1585 | u64 root_gen = 0; | ||
| 1586 | u64 root_owner = 0; | ||
| 1587 | int found_extent; | ||
| 1588 | int del_item; | ||
| 1589 | int pending_del_nr = 0; | ||
| 1590 | int pending_del_slot = 0; | ||
| 1591 | int extent_type = -1; | ||
| 1592 | u64 mask = root->sectorsize - 1; | ||
| 1593 | |||
| 1594 | if (root->ref_cows) | ||
| 1595 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | ||
| 1596 | path = btrfs_alloc_path(); | ||
| 1597 | path->reada = -1; | ||
| 1598 | BUG_ON(!path); | ||
| 1599 | |||
| 1600 | /* FIXME, add redo link to tree so we don't leak on crash */ | ||
| 1601 | key.objectid = inode->i_ino; | ||
| 1602 | key.offset = (u64)-1; | ||
| 1603 | key.type = (u8)-1; | ||
| 1604 | |||
| 1605 | btrfs_init_path(path); | ||
| 1606 | |||
| 1607 | ret = drop_csum_leaves(trans, root, path, inode, new_size); | ||
| 1608 | BUG_ON(ret); | ||
| 1609 | |||
| 1610 | search_again: | ||
| 1611 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 1612 | if (ret < 0) { | ||
| 1613 | goto error; | ||
| 1614 | } | ||
| 1615 | if (ret > 0) { | ||
| 1616 | /* there are no items in the tree for us to truncate, we're | ||
| 1617 | * done | ||
| 1618 | */ | ||
| 1619 | if (path->slots[0] == 0) { | ||
| 1620 | ret = 0; | ||
| 1621 | goto error; | ||
| 1622 | } | ||
| 1623 | path->slots[0]--; | ||
| 1624 | } | ||
| 1625 | |||
| 1626 | while(1) { | ||
| 1627 | fi = NULL; | ||
| 1628 | leaf = path->nodes[0]; | ||
| 1629 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 1630 | found_type = btrfs_key_type(&found_key); | ||
| 1631 | |||
| 1632 | if (found_key.objectid != inode->i_ino) | ||
| 1633 | break; | ||
| 1634 | |||
| 1635 | if (found_type < min_type) | ||
| 1636 | break; | ||
| 1637 | |||
| 1638 | item_end = found_key.offset; | ||
| 1639 | if (found_type == BTRFS_EXTENT_DATA_KEY) { | ||
| 1640 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 1641 | struct btrfs_file_extent_item); | ||
| 1642 | extent_type = btrfs_file_extent_type(leaf, fi); | ||
| 1643 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { | ||
| 1644 | item_end += | ||
| 1645 | btrfs_file_extent_num_bytes(leaf, fi); | ||
| 1646 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 1647 | struct btrfs_item *item = btrfs_item_nr(leaf, | ||
| 1648 | path->slots[0]); | ||
| 1649 | item_end += btrfs_file_extent_inline_len(leaf, | ||
| 1650 | item); | ||
| 1651 | } | ||
| 1652 | item_end--; | ||
| 1653 | } | ||
| 1654 | if (found_type == BTRFS_CSUM_ITEM_KEY) { | ||
| 1655 | ret = btrfs_csum_truncate(trans, root, path, | ||
| 1656 | new_size); | ||
| 1657 | BUG_ON(ret); | ||
| 1658 | } | ||
| 1659 | if (item_end < new_size) { | ||
| 1660 | if (found_type == BTRFS_DIR_ITEM_KEY) { | ||
| 1661 | found_type = BTRFS_INODE_ITEM_KEY; | ||
| 1662 | } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { | ||
| 1663 | found_type = BTRFS_CSUM_ITEM_KEY; | ||
| 1664 | } else if (found_type == BTRFS_EXTENT_DATA_KEY) { | ||
| 1665 | found_type = BTRFS_XATTR_ITEM_KEY; | ||
| 1666 | } else if (found_type == BTRFS_XATTR_ITEM_KEY) { | ||
| 1667 | found_type = BTRFS_INODE_REF_KEY; | ||
| 1668 | } else if (found_type) { | ||
| 1669 | found_type--; | ||
| 1670 | } else { | ||
| 1671 | break; | ||
| 1672 | } | ||
| 1673 | btrfs_set_key_type(&key, found_type); | ||
| 1674 | goto next; | ||
| 1675 | } | ||
| 1676 | if (found_key.offset >= new_size) | ||
| 1677 | del_item = 1; | ||
| 1678 | else | ||
| 1679 | del_item = 0; | ||
| 1680 | found_extent = 0; | ||
| 1681 | |||
| 1682 | /* FIXME, shrink the extent if the ref count is only 1 */ | ||
| 1683 | if (found_type != BTRFS_EXTENT_DATA_KEY) | ||
| 1684 | goto delete; | ||
| 1685 | |||
| 1686 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { | ||
| 1687 | u64 num_dec; | ||
| 1688 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 1689 | if (!del_item) { | ||
| 1690 | u64 orig_num_bytes = | ||
| 1691 | btrfs_file_extent_num_bytes(leaf, fi); | ||
| 1692 | extent_num_bytes = new_size - | ||
| 1693 | found_key.offset + root->sectorsize - 1; | ||
| 1694 | extent_num_bytes = extent_num_bytes & | ||
| 1695 | ~((u64)root->sectorsize - 1); | ||
| 1696 | btrfs_set_file_extent_num_bytes(leaf, fi, | ||
| 1697 | extent_num_bytes); | ||
| 1698 | num_dec = (orig_num_bytes - | ||
| 1699 | extent_num_bytes); | ||
| 1700 | if (root->ref_cows && extent_start != 0) | ||
| 1701 | inode_sub_bytes(inode, num_dec); | ||
| 1702 | btrfs_mark_buffer_dirty(leaf); | ||
| 1703 | } else { | ||
| 1704 | extent_num_bytes = | ||
| 1705 | btrfs_file_extent_disk_num_bytes(leaf, | ||
| 1706 | fi); | ||
| 1707 | /* FIXME blocksize != 4096 */ | ||
| 1708 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); | ||
| 1709 | if (extent_start != 0) { | ||
| 1710 | found_extent = 1; | ||
| 1711 | if (root->ref_cows) | ||
| 1712 | inode_sub_bytes(inode, num_dec); | ||
| 1713 | } | ||
| 1714 | root_gen = btrfs_header_generation(leaf); | ||
| 1715 | root_owner = btrfs_header_owner(leaf); | ||
| 1716 | } | ||
| 1717 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 1718 | if (!del_item) { | ||
| 1719 | u32 size = new_size - found_key.offset; | ||
| 1720 | |||
| 1721 | if (root->ref_cows) { | ||
| 1722 | inode_sub_bytes(inode, item_end + 1 - | ||
| 1723 | new_size); | ||
| 1724 | } | ||
| 1725 | size = | ||
| 1726 | btrfs_file_extent_calc_inline_size(size); | ||
| 1727 | ret = btrfs_truncate_item(trans, root, path, | ||
| 1728 | size, 1); | ||
| 1729 | BUG_ON(ret); | ||
| 1730 | } else if (root->ref_cows) { | ||
| 1731 | inode_sub_bytes(inode, item_end + 1 - | ||
| 1732 | found_key.offset); | ||
| 1733 | } | ||
| 1734 | } | ||
| 1735 | delete: | ||
| 1736 | if (del_item) { | ||
| 1737 | if (!pending_del_nr) { | ||
| 1738 | /* no pending yet, add ourselves */ | ||
| 1739 | pending_del_slot = path->slots[0]; | ||
| 1740 | pending_del_nr = 1; | ||
| 1741 | } else if (pending_del_nr && | ||
| 1742 | path->slots[0] + 1 == pending_del_slot) { | ||
| 1743 | /* hop on the pending chunk */ | ||
| 1744 | pending_del_nr++; | ||
| 1745 | pending_del_slot = path->slots[0]; | ||
| 1746 | } else { | ||
| 1747 | printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot); | ||
| 1748 | } | ||
| 1749 | } else { | ||
| 1750 | break; | ||
| 1751 | } | ||
| 1752 | if (found_extent) { | ||
| 1753 | ret = btrfs_free_extent(trans, root, extent_start, | ||
| 1754 | extent_num_bytes, | ||
| 1755 | leaf->start, root_owner, | ||
| 1756 | root_gen, inode->i_ino, 0); | ||
| 1757 | BUG_ON(ret); | ||
| 1758 | } | ||
| 1759 | next: | ||
| 1760 | if (path->slots[0] == 0) { | ||
| 1761 | if (pending_del_nr) | ||
| 1762 | goto del_pending; | ||
| 1763 | btrfs_release_path(root, path); | ||
| 1764 | goto search_again; | ||
| 1765 | } | ||
| 1766 | |||
| 1767 | path->slots[0]--; | ||
| 1768 | if (pending_del_nr && | ||
| 1769 | path->slots[0] + 1 != pending_del_slot) { | ||
| 1770 | struct btrfs_key debug; | ||
| 1771 | del_pending: | ||
| 1772 | btrfs_item_key_to_cpu(path->nodes[0], &debug, | ||
| 1773 | pending_del_slot); | ||
| 1774 | ret = btrfs_del_items(trans, root, path, | ||
| 1775 | pending_del_slot, | ||
| 1776 | pending_del_nr); | ||
| 1777 | BUG_ON(ret); | ||
| 1778 | pending_del_nr = 0; | ||
| 1779 | btrfs_release_path(root, path); | ||
| 1780 | goto search_again; | ||
| 1781 | } | ||
| 1782 | } | ||
| 1783 | ret = 0; | ||
| 1784 | error: | ||
| 1785 | if (pending_del_nr) { | ||
| 1786 | ret = btrfs_del_items(trans, root, path, pending_del_slot, | ||
| 1787 | pending_del_nr); | ||
| 1788 | } | ||
| 1789 | btrfs_free_path(path); | ||
| 1790 | inode->i_sb->s_dirt = 1; | ||
| 1791 | return ret; | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | /* | ||
| 1795 | * taken from block_truncate_page, but does cow as it zeros out | ||
| 1796 | * any bytes left in the last page in the file. | ||
| 1797 | */ | ||
| 1798 | static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | ||
| 1799 | { | ||
| 1800 | struct inode *inode = mapping->host; | ||
| 1801 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1802 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 1803 | struct btrfs_ordered_extent *ordered; | ||
| 1804 | char *kaddr; | ||
| 1805 | u32 blocksize = root->sectorsize; | ||
| 1806 | pgoff_t index = from >> PAGE_CACHE_SHIFT; | ||
| 1807 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
| 1808 | struct page *page; | ||
| 1809 | int ret = 0; | ||
| 1810 | u64 page_start; | ||
| 1811 | u64 page_end; | ||
| 1812 | |||
| 1813 | if ((offset & (blocksize - 1)) == 0) | ||
| 1814 | goto out; | ||
| 1815 | |||
| 1816 | ret = -ENOMEM; | ||
| 1817 | again: | ||
| 1818 | page = grab_cache_page(mapping, index); | ||
| 1819 | if (!page) | ||
| 1820 | goto out; | ||
| 1821 | |||
| 1822 | page_start = page_offset(page); | ||
| 1823 | page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
| 1824 | |||
| 1825 | if (!PageUptodate(page)) { | ||
| 1826 | ret = btrfs_readpage(NULL, page); | ||
| 1827 | lock_page(page); | ||
| 1828 | if (page->mapping != mapping) { | ||
| 1829 | unlock_page(page); | ||
| 1830 | page_cache_release(page); | ||
| 1831 | goto again; | ||
| 1832 | } | ||
| 1833 | if (!PageUptodate(page)) { | ||
| 1834 | ret = -EIO; | ||
| 1835 | goto out_unlock; | ||
| 1836 | } | ||
| 1837 | } | ||
| 1838 | wait_on_page_writeback(page); | ||
| 1839 | |||
| 1840 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 1841 | set_page_extent_mapped(page); | ||
| 1842 | |||
| 1843 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
| 1844 | if (ordered) { | ||
| 1845 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 1846 | unlock_page(page); | ||
| 1847 | page_cache_release(page); | ||
| 1848 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 1849 | btrfs_put_ordered_extent(ordered); | ||
| 1850 | goto again; | ||
| 1851 | } | ||
| 1852 | |||
| 1853 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 1854 | ret = 0; | ||
| 1855 | if (offset != PAGE_CACHE_SIZE) { | ||
| 1856 | kaddr = kmap(page); | ||
| 1857 | memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); | ||
| 1858 | flush_dcache_page(page); | ||
| 1859 | kunmap(page); | ||
| 1860 | } | ||
| 1861 | ClearPageChecked(page); | ||
| 1862 | set_page_dirty(page); | ||
| 1863 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 1864 | |||
| 1865 | out_unlock: | ||
| 1866 | unlock_page(page); | ||
| 1867 | page_cache_release(page); | ||
| 1868 | out: | ||
| 1869 | return ret; | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | ||
| 1873 | { | ||
| 1874 | struct inode *inode = dentry->d_inode; | ||
| 1875 | int err; | ||
| 1876 | |||
| 1877 | err = inode_change_ok(inode, attr); | ||
| 1878 | if (err) | ||
| 1879 | return err; | ||
| 1880 | |||
| 1881 | if (S_ISREG(inode->i_mode) && | ||
| 1882 | attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { | ||
| 1883 | struct btrfs_trans_handle *trans; | ||
| 1884 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1885 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 1886 | |||
| 1887 | u64 mask = root->sectorsize - 1; | ||
| 1888 | u64 hole_start = (inode->i_size + mask) & ~mask; | ||
| 1889 | u64 block_end = (attr->ia_size + mask) & ~mask; | ||
| 1890 | u64 hole_size; | ||
| 1891 | u64 alloc_hint = 0; | ||
| 1892 | |||
| 1893 | if (attr->ia_size <= hole_start) | ||
| 1894 | goto out; | ||
| 1895 | |||
| 1896 | err = btrfs_check_free_space(root, 1, 0); | ||
| 1897 | if (err) | ||
| 1898 | goto fail; | ||
| 1899 | |||
| 1900 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | ||
| 1901 | |||
| 1902 | hole_size = block_end - hole_start; | ||
| 1903 | while(1) { | ||
| 1904 | struct btrfs_ordered_extent *ordered; | ||
| 1905 | btrfs_wait_ordered_range(inode, hole_start, hole_size); | ||
| 1906 | |||
| 1907 | lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); | ||
| 1908 | ordered = btrfs_lookup_ordered_extent(inode, hole_start); | ||
| 1909 | if (ordered) { | ||
| 1910 | unlock_extent(io_tree, hole_start, | ||
| 1911 | block_end - 1, GFP_NOFS); | ||
| 1912 | btrfs_put_ordered_extent(ordered); | ||
| 1913 | } else { | ||
| 1914 | break; | ||
| 1915 | } | ||
| 1916 | } | ||
| 1917 | |||
| 1918 | trans = btrfs_start_transaction(root, 1); | ||
| 1919 | btrfs_set_trans_block_group(trans, inode); | ||
| 1920 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 1921 | err = btrfs_drop_extents(trans, root, inode, | ||
| 1922 | hole_start, block_end, hole_start, | ||
| 1923 | &alloc_hint); | ||
| 1924 | |||
| 1925 | if (alloc_hint != EXTENT_MAP_INLINE) { | ||
| 1926 | err = btrfs_insert_file_extent(trans, root, | ||
| 1927 | inode->i_ino, | ||
| 1928 | hole_start, 0, 0, | ||
| 1929 | hole_size, 0); | ||
| 1930 | btrfs_drop_extent_cache(inode, hole_start, | ||
| 1931 | (u64)-1, 0); | ||
| 1932 | btrfs_check_file(root, inode); | ||
| 1933 | } | ||
| 1934 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 1935 | btrfs_end_transaction(trans, root); | ||
| 1936 | unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); | ||
| 1937 | if (err) | ||
| 1938 | return err; | ||
| 1939 | } | ||
| 1940 | out: | ||
| 1941 | err = inode_setattr(inode, attr); | ||
| 1942 | |||
| 1943 | if (!err && ((attr->ia_valid & ATTR_MODE))) | ||
| 1944 | err = btrfs_acl_chmod(inode); | ||
| 1945 | fail: | ||
| 1946 | return err; | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | void btrfs_delete_inode(struct inode *inode) | ||
| 1950 | { | ||
| 1951 | struct btrfs_trans_handle *trans; | ||
| 1952 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1953 | unsigned long nr; | ||
| 1954 | int ret; | ||
| 1955 | |||
| 1956 | truncate_inode_pages(&inode->i_data, 0); | ||
| 1957 | if (is_bad_inode(inode)) { | ||
| 1958 | btrfs_orphan_del(NULL, inode); | ||
| 1959 | goto no_delete; | ||
| 1960 | } | ||
| 1961 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
| 1962 | |||
| 1963 | btrfs_i_size_write(inode, 0); | ||
| 1964 | trans = btrfs_start_transaction(root, 1); | ||
| 1965 | |||
| 1966 | btrfs_set_trans_block_group(trans, inode); | ||
| 1967 | ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); | ||
| 1968 | if (ret) { | ||
| 1969 | btrfs_orphan_del(NULL, inode); | ||
| 1970 | goto no_delete_lock; | ||
| 1971 | } | ||
| 1972 | |||
| 1973 | btrfs_orphan_del(trans, inode); | ||
| 1974 | |||
| 1975 | nr = trans->blocks_used; | ||
| 1976 | clear_inode(inode); | ||
| 1977 | |||
| 1978 | btrfs_end_transaction(trans, root); | ||
| 1979 | btrfs_btree_balance_dirty(root, nr); | ||
| 1980 | return; | ||
| 1981 | |||
| 1982 | no_delete_lock: | ||
| 1983 | nr = trans->blocks_used; | ||
| 1984 | btrfs_end_transaction(trans, root); | ||
| 1985 | btrfs_btree_balance_dirty(root, nr); | ||
| 1986 | no_delete: | ||
| 1987 | clear_inode(inode); | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | /* | ||
| 1991 | * this returns the key found in the dir entry in the location pointer. | ||
| 1992 | * If no dir entries were found, location->objectid is 0. | ||
| 1993 | */ | ||
| 1994 | static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, | ||
| 1995 | struct btrfs_key *location) | ||
| 1996 | { | ||
| 1997 | const char *name = dentry->d_name.name; | ||
| 1998 | int namelen = dentry->d_name.len; | ||
| 1999 | struct btrfs_dir_item *di; | ||
| 2000 | struct btrfs_path *path; | ||
| 2001 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2002 | int ret = 0; | ||
| 2003 | |||
| 2004 | path = btrfs_alloc_path(); | ||
| 2005 | BUG_ON(!path); | ||
| 2006 | |||
| 2007 | di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, | ||
| 2008 | namelen, 0); | ||
| 2009 | if (IS_ERR(di)) | ||
| 2010 | ret = PTR_ERR(di); | ||
| 2011 | if (!di || IS_ERR(di)) { | ||
| 2012 | goto out_err; | ||
| 2013 | } | ||
| 2014 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); | ||
| 2015 | out: | ||
| 2016 | btrfs_free_path(path); | ||
| 2017 | return ret; | ||
| 2018 | out_err: | ||
| 2019 | location->objectid = 0; | ||
| 2020 | goto out; | ||
| 2021 | } | ||
| 2022 | |||
| 2023 | /* | ||
| 2024 | * when we hit a tree root in a directory, the btrfs part of the inode | ||
| 2025 | * needs to be changed to reflect the root directory of the tree root. This | ||
| 2026 | * is kind of like crossing a mount point. | ||
| 2027 | */ | ||
| 2028 | static int fixup_tree_root_location(struct btrfs_root *root, | ||
| 2029 | struct btrfs_key *location, | ||
| 2030 | struct btrfs_root **sub_root, | ||
| 2031 | struct dentry *dentry) | ||
| 2032 | { | ||
| 2033 | struct btrfs_root_item *ri; | ||
| 2034 | |||
| 2035 | if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) | ||
| 2036 | return 0; | ||
| 2037 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | ||
| 2038 | return 0; | ||
| 2039 | |||
| 2040 | *sub_root = btrfs_read_fs_root(root->fs_info, location, | ||
| 2041 | dentry->d_name.name, | ||
| 2042 | dentry->d_name.len); | ||
| 2043 | if (IS_ERR(*sub_root)) | ||
| 2044 | return PTR_ERR(*sub_root); | ||
| 2045 | |||
| 2046 | ri = &(*sub_root)->root_item; | ||
| 2047 | location->objectid = btrfs_root_dirid(ri); | ||
| 2048 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | ||
| 2049 | location->offset = 0; | ||
| 2050 | |||
| 2051 | return 0; | ||
| 2052 | } | ||
| 2053 | |||
| 2054 | static noinline void init_btrfs_i(struct inode *inode) | ||
| 2055 | { | ||
| 2056 | struct btrfs_inode *bi = BTRFS_I(inode); | ||
| 2057 | |||
| 2058 | bi->i_acl = NULL; | ||
| 2059 | bi->i_default_acl = NULL; | ||
| 2060 | |||
| 2061 | bi->generation = 0; | ||
| 2062 | bi->last_trans = 0; | ||
| 2063 | bi->logged_trans = 0; | ||
| 2064 | bi->delalloc_bytes = 0; | ||
| 2065 | bi->disk_i_size = 0; | ||
| 2066 | bi->flags = 0; | ||
| 2067 | bi->index_cnt = (u64)-1; | ||
| 2068 | bi->log_dirty_trans = 0; | ||
| 2069 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | ||
| 2070 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | ||
| 2071 | inode->i_mapping, GFP_NOFS); | ||
| 2072 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | ||
| 2073 | inode->i_mapping, GFP_NOFS); | ||
| 2074 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | ||
| 2075 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
| 2076 | mutex_init(&BTRFS_I(inode)->csum_mutex); | ||
| 2077 | mutex_init(&BTRFS_I(inode)->extent_mutex); | ||
| 2078 | mutex_init(&BTRFS_I(inode)->log_mutex); | ||
| 2079 | } | ||
| 2080 | |||
| 2081 | static int btrfs_init_locked_inode(struct inode *inode, void *p) | ||
| 2082 | { | ||
| 2083 | struct btrfs_iget_args *args = p; | ||
| 2084 | inode->i_ino = args->ino; | ||
| 2085 | init_btrfs_i(inode); | ||
| 2086 | BTRFS_I(inode)->root = args->root; | ||
| 2087 | return 0; | ||
| 2088 | } | ||
| 2089 | |||
| 2090 | static int btrfs_find_actor(struct inode *inode, void *opaque) | ||
| 2091 | { | ||
| 2092 | struct btrfs_iget_args *args = opaque; | ||
| 2093 | return (args->ino == inode->i_ino && | ||
| 2094 | args->root == BTRFS_I(inode)->root); | ||
| 2095 | } | ||
| 2096 | |||
| 2097 | struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, | ||
| 2098 | struct btrfs_root *root, int wait) | ||
| 2099 | { | ||
| 2100 | struct inode *inode; | ||
| 2101 | struct btrfs_iget_args args; | ||
| 2102 | args.ino = objectid; | ||
| 2103 | args.root = root; | ||
| 2104 | |||
| 2105 | if (wait) { | ||
| 2106 | inode = ilookup5(s, objectid, btrfs_find_actor, | ||
| 2107 | (void *)&args); | ||
| 2108 | } else { | ||
| 2109 | inode = ilookup5_nowait(s, objectid, btrfs_find_actor, | ||
| 2110 | (void *)&args); | ||
| 2111 | } | ||
| 2112 | return inode; | ||
| 2113 | } | ||
| 2114 | |||
| 2115 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, | ||
| 2116 | struct btrfs_root *root) | ||
| 2117 | { | ||
| 2118 | struct inode *inode; | ||
| 2119 | struct btrfs_iget_args args; | ||
| 2120 | args.ino = objectid; | ||
| 2121 | args.root = root; | ||
| 2122 | |||
| 2123 | inode = iget5_locked(s, objectid, btrfs_find_actor, | ||
| 2124 | btrfs_init_locked_inode, | ||
| 2125 | (void *)&args); | ||
| 2126 | return inode; | ||
| 2127 | } | ||
| 2128 | |||
| 2129 | /* Get an inode object given its location and corresponding root. | ||
| 2130 | * Returns in *is_new if the inode was read from disk | ||
| 2131 | */ | ||
| 2132 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | ||
| 2133 | struct btrfs_root *root, int *is_new) | ||
| 2134 | { | ||
| 2135 | struct inode *inode; | ||
| 2136 | |||
| 2137 | inode = btrfs_iget_locked(s, location->objectid, root); | ||
| 2138 | if (!inode) | ||
| 2139 | return ERR_PTR(-EACCES); | ||
| 2140 | |||
| 2141 | if (inode->i_state & I_NEW) { | ||
| 2142 | BTRFS_I(inode)->root = root; | ||
| 2143 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | ||
| 2144 | btrfs_read_locked_inode(inode); | ||
| 2145 | unlock_new_inode(inode); | ||
| 2146 | if (is_new) | ||
| 2147 | *is_new = 1; | ||
| 2148 | } else { | ||
| 2149 | if (is_new) | ||
| 2150 | *is_new = 0; | ||
| 2151 | } | ||
| 2152 | |||
| 2153 | return inode; | ||
| 2154 | } | ||
| 2155 | |||
| 2156 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | ||
| 2157 | struct nameidata *nd) | ||
| 2158 | { | ||
| 2159 | struct inode * inode; | ||
| 2160 | struct btrfs_inode *bi = BTRFS_I(dir); | ||
| 2161 | struct btrfs_root *root = bi->root; | ||
| 2162 | struct btrfs_root *sub_root = root; | ||
| 2163 | struct btrfs_key location; | ||
| 2164 | int ret, new, do_orphan = 0; | ||
| 2165 | |||
| 2166 | if (dentry->d_name.len > BTRFS_NAME_LEN) | ||
| 2167 | return ERR_PTR(-ENAMETOOLONG); | ||
| 2168 | |||
| 2169 | ret = btrfs_inode_by_name(dir, dentry, &location); | ||
| 2170 | |||
| 2171 | if (ret < 0) | ||
| 2172 | return ERR_PTR(ret); | ||
| 2173 | |||
| 2174 | inode = NULL; | ||
| 2175 | if (location.objectid) { | ||
| 2176 | ret = fixup_tree_root_location(root, &location, &sub_root, | ||
| 2177 | dentry); | ||
| 2178 | if (ret < 0) | ||
| 2179 | return ERR_PTR(ret); | ||
| 2180 | if (ret > 0) | ||
| 2181 | return ERR_PTR(-ENOENT); | ||
| 2182 | inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); | ||
| 2183 | if (IS_ERR(inode)) | ||
| 2184 | return ERR_CAST(inode); | ||
| 2185 | |||
| 2186 | /* the inode and parent dir are two different roots */ | ||
| 2187 | if (new && root != sub_root) { | ||
| 2188 | igrab(inode); | ||
| 2189 | sub_root->inode = inode; | ||
| 2190 | do_orphan = 1; | ||
| 2191 | } | ||
| 2192 | } | ||
| 2193 | |||
| 2194 | if (unlikely(do_orphan)) | ||
| 2195 | btrfs_orphan_cleanup(sub_root); | ||
| 2196 | |||
| 2197 | return d_splice_alias(inode, dentry); | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | static unsigned char btrfs_filetype_table[] = { | ||
| 2201 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | ||
| 2202 | }; | ||
| 2203 | |||
| 2204 | static int btrfs_real_readdir(struct file *filp, void *dirent, | ||
| 2205 | filldir_t filldir) | ||
| 2206 | { | ||
| 2207 | struct inode *inode = filp->f_dentry->d_inode; | ||
| 2208 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2209 | struct btrfs_item *item; | ||
| 2210 | struct btrfs_dir_item *di; | ||
| 2211 | struct btrfs_key key; | ||
| 2212 | struct btrfs_key found_key; | ||
| 2213 | struct btrfs_path *path; | ||
| 2214 | int ret; | ||
| 2215 | u32 nritems; | ||
| 2216 | struct extent_buffer *leaf; | ||
| 2217 | int slot; | ||
| 2218 | int advance; | ||
| 2219 | unsigned char d_type; | ||
| 2220 | int over = 0; | ||
| 2221 | u32 di_cur; | ||
| 2222 | u32 di_total; | ||
| 2223 | u32 di_len; | ||
| 2224 | int key_type = BTRFS_DIR_INDEX_KEY; | ||
| 2225 | char tmp_name[32]; | ||
| 2226 | char *name_ptr; | ||
| 2227 | int name_len; | ||
| 2228 | |||
| 2229 | /* FIXME, use a real flag for deciding about the key type */ | ||
| 2230 | if (root->fs_info->tree_root == root) | ||
| 2231 | key_type = BTRFS_DIR_ITEM_KEY; | ||
| 2232 | |||
| 2233 | /* special case for "." */ | ||
| 2234 | if (filp->f_pos == 0) { | ||
| 2235 | over = filldir(dirent, ".", 1, | ||
| 2236 | 1, inode->i_ino, | ||
| 2237 | DT_DIR); | ||
| 2238 | if (over) | ||
| 2239 | return 0; | ||
| 2240 | filp->f_pos = 1; | ||
| 2241 | } | ||
| 2242 | /* special case for .., just use the back ref */ | ||
| 2243 | if (filp->f_pos == 1) { | ||
| 2244 | u64 pino = parent_ino(filp->f_path.dentry); | ||
| 2245 | over = filldir(dirent, "..", 2, | ||
| 2246 | 2, pino, DT_DIR); | ||
| 2247 | if (over) | ||
| 2248 | return 0; | ||
| 2249 | filp->f_pos = 2; | ||
| 2250 | } | ||
| 2251 | |||
| 2252 | path = btrfs_alloc_path(); | ||
| 2253 | path->reada = 2; | ||
| 2254 | |||
| 2255 | btrfs_set_key_type(&key, key_type); | ||
| 2256 | key.offset = filp->f_pos; | ||
| 2257 | key.objectid = inode->i_ino; | ||
| 2258 | |||
| 2259 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 2260 | if (ret < 0) | ||
| 2261 | goto err; | ||
| 2262 | advance = 0; | ||
| 2263 | |||
| 2264 | while (1) { | ||
| 2265 | leaf = path->nodes[0]; | ||
| 2266 | nritems = btrfs_header_nritems(leaf); | ||
| 2267 | slot = path->slots[0]; | ||
| 2268 | if (advance || slot >= nritems) { | ||
| 2269 | if (slot >= nritems - 1) { | ||
| 2270 | ret = btrfs_next_leaf(root, path); | ||
| 2271 | if (ret) | ||
| 2272 | break; | ||
| 2273 | leaf = path->nodes[0]; | ||
| 2274 | nritems = btrfs_header_nritems(leaf); | ||
| 2275 | slot = path->slots[0]; | ||
| 2276 | } else { | ||
| 2277 | slot++; | ||
| 2278 | path->slots[0]++; | ||
| 2279 | } | ||
| 2280 | } | ||
| 2281 | advance = 1; | ||
| 2282 | item = btrfs_item_nr(leaf, slot); | ||
| 2283 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 2284 | |||
| 2285 | if (found_key.objectid != key.objectid) | ||
| 2286 | break; | ||
| 2287 | if (btrfs_key_type(&found_key) != key_type) | ||
| 2288 | break; | ||
| 2289 | if (found_key.offset < filp->f_pos) | ||
| 2290 | continue; | ||
| 2291 | |||
| 2292 | filp->f_pos = found_key.offset; | ||
| 2293 | |||
| 2294 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | ||
| 2295 | di_cur = 0; | ||
| 2296 | di_total = btrfs_item_size(leaf, item); | ||
| 2297 | |||
| 2298 | while (di_cur < di_total) { | ||
| 2299 | struct btrfs_key location; | ||
| 2300 | |||
| 2301 | name_len = btrfs_dir_name_len(leaf, di); | ||
| 2302 | if (name_len <= sizeof(tmp_name)) { | ||
| 2303 | name_ptr = tmp_name; | ||
| 2304 | } else { | ||
| 2305 | name_ptr = kmalloc(name_len, GFP_NOFS); | ||
| 2306 | if (!name_ptr) { | ||
| 2307 | ret = -ENOMEM; | ||
| 2308 | goto err; | ||
| 2309 | } | ||
| 2310 | } | ||
| 2311 | read_extent_buffer(leaf, name_ptr, | ||
| 2312 | (unsigned long)(di + 1), name_len); | ||
| 2313 | |||
| 2314 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; | ||
| 2315 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | ||
| 2316 | over = filldir(dirent, name_ptr, name_len, | ||
| 2317 | found_key.offset, location.objectid, | ||
| 2318 | d_type); | ||
| 2319 | |||
| 2320 | if (name_ptr != tmp_name) | ||
| 2321 | kfree(name_ptr); | ||
| 2322 | |||
| 2323 | if (over) | ||
| 2324 | goto nopos; | ||
| 2325 | |||
| 2326 | di_len = btrfs_dir_name_len(leaf, di) + | ||
| 2327 | btrfs_dir_data_len(leaf, di) + sizeof(*di); | ||
| 2328 | di_cur += di_len; | ||
| 2329 | di = (struct btrfs_dir_item *)((char *)di + di_len); | ||
| 2330 | } | ||
| 2331 | } | ||
| 2332 | |||
| 2333 | /* Reached end of directory/root. Bump pos past the last item. */ | ||
| 2334 | if (key_type == BTRFS_DIR_INDEX_KEY) | ||
| 2335 | filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); | ||
| 2336 | else | ||
| 2337 | filp->f_pos++; | ||
| 2338 | nopos: | ||
| 2339 | ret = 0; | ||
| 2340 | err: | ||
| 2341 | btrfs_free_path(path); | ||
| 2342 | return ret; | ||
| 2343 | } | ||
| 2344 | |||
| 2345 | int btrfs_write_inode(struct inode *inode, int wait) | ||
| 2346 | { | ||
| 2347 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2348 | struct btrfs_trans_handle *trans; | ||
| 2349 | int ret = 0; | ||
| 2350 | |||
| 2351 | if (root->fs_info->closing > 1) | ||
| 2352 | return 0; | ||
| 2353 | |||
| 2354 | if (wait) { | ||
| 2355 | trans = btrfs_join_transaction(root, 1); | ||
| 2356 | btrfs_set_trans_block_group(trans, inode); | ||
| 2357 | ret = btrfs_commit_transaction(trans, root); | ||
| 2358 | } | ||
| 2359 | return ret; | ||
| 2360 | } | ||
| 2361 | |||
| 2362 | /* | ||
| 2363 | * This is somewhat expensive, updating the tree every time the | ||
| 2364 | * inode changes. But, it is most likely to find the inode in cache. | ||
| 2365 | * FIXME, needs more benchmarking...there are no reasons other than performance | ||
| 2366 | * to keep or drop this code. | ||
| 2367 | */ | ||
| 2368 | void btrfs_dirty_inode(struct inode *inode) | ||
| 2369 | { | ||
| 2370 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2371 | struct btrfs_trans_handle *trans; | ||
| 2372 | |||
| 2373 | trans = btrfs_join_transaction(root, 1); | ||
| 2374 | btrfs_set_trans_block_group(trans, inode); | ||
| 2375 | btrfs_update_inode(trans, root, inode); | ||
| 2376 | btrfs_end_transaction(trans, root); | ||
| 2377 | } | ||
| 2378 | |||
| 2379 | /* | ||
| 2380 | * find the highest existing sequence number in a directory | ||
| 2381 | * and then set the in-memory index_cnt variable to reflect | ||
| 2382 | * free sequence numbers | ||
| 2383 | */ | ||
| 2384 | static int btrfs_set_inode_index_count(struct inode *inode) | ||
| 2385 | { | ||
| 2386 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2387 | struct btrfs_key key, found_key; | ||
| 2388 | struct btrfs_path *path; | ||
| 2389 | struct extent_buffer *leaf; | ||
| 2390 | int ret; | ||
| 2391 | |||
| 2392 | key.objectid = inode->i_ino; | ||
| 2393 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | ||
| 2394 | key.offset = (u64)-1; | ||
| 2395 | |||
| 2396 | path = btrfs_alloc_path(); | ||
| 2397 | if (!path) | ||
| 2398 | return -ENOMEM; | ||
| 2399 | |||
| 2400 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 2401 | if (ret < 0) | ||
| 2402 | goto out; | ||
| 2403 | /* FIXME: we should be able to handle this */ | ||
| 2404 | if (ret == 0) | ||
| 2405 | goto out; | ||
| 2406 | ret = 0; | ||
| 2407 | |||
| 2408 | /* | ||
| 2409 | * MAGIC NUMBER EXPLANATION: | ||
| 2410 | * since we search a directory based on f_pos we have to start at 2 | ||
| 2411 | * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody | ||
| 2412 | * else has to start at 2 | ||
| 2413 | */ | ||
| 2414 | if (path->slots[0] == 0) { | ||
| 2415 | BTRFS_I(inode)->index_cnt = 2; | ||
| 2416 | goto out; | ||
| 2417 | } | ||
| 2418 | |||
| 2419 | path->slots[0]--; | ||
| 2420 | |||
| 2421 | leaf = path->nodes[0]; | ||
| 2422 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 2423 | |||
| 2424 | if (found_key.objectid != inode->i_ino || | ||
| 2425 | btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { | ||
| 2426 | BTRFS_I(inode)->index_cnt = 2; | ||
| 2427 | goto out; | ||
| 2428 | } | ||
| 2429 | |||
| 2430 | BTRFS_I(inode)->index_cnt = found_key.offset + 1; | ||
| 2431 | out: | ||
| 2432 | btrfs_free_path(path); | ||
| 2433 | return ret; | ||
| 2434 | } | ||
| 2435 | |||
| 2436 | /* | ||
| 2437 | * helper to find a free sequence number in a given directory. This current | ||
| 2438 | * code is very simple, later versions will do smarter things in the btree | ||
| 2439 | */ | ||
| 2440 | static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, | ||
| 2441 | u64 *index) | ||
| 2442 | { | ||
| 2443 | int ret = 0; | ||
| 2444 | |||
| 2445 | if (BTRFS_I(dir)->index_cnt == (u64)-1) { | ||
| 2446 | ret = btrfs_set_inode_index_count(dir); | ||
| 2447 | if (ret) { | ||
| 2448 | return ret; | ||
| 2449 | } | ||
| 2450 | } | ||
| 2451 | |||
| 2452 | *index = BTRFS_I(dir)->index_cnt; | ||
| 2453 | BTRFS_I(dir)->index_cnt++; | ||
| 2454 | |||
| 2455 | return ret; | ||
| 2456 | } | ||
| 2457 | |||
| 2458 | static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | ||
| 2459 | struct btrfs_root *root, | ||
| 2460 | struct inode *dir, | ||
| 2461 | const char *name, int name_len, | ||
| 2462 | u64 ref_objectid, | ||
| 2463 | u64 objectid, | ||
| 2464 | struct btrfs_block_group_cache *group, | ||
| 2465 | int mode, u64 *index) | ||
| 2466 | { | ||
| 2467 | struct inode *inode; | ||
| 2468 | struct btrfs_inode_item *inode_item; | ||
| 2469 | struct btrfs_block_group_cache *new_inode_group; | ||
| 2470 | struct btrfs_key *location; | ||
| 2471 | struct btrfs_path *path; | ||
| 2472 | struct btrfs_inode_ref *ref; | ||
| 2473 | struct btrfs_key key[2]; | ||
| 2474 | u32 sizes[2]; | ||
| 2475 | unsigned long ptr; | ||
| 2476 | int ret; | ||
| 2477 | int owner; | ||
| 2478 | |||
| 2479 | path = btrfs_alloc_path(); | ||
| 2480 | BUG_ON(!path); | ||
| 2481 | |||
| 2482 | inode = new_inode(root->fs_info->sb); | ||
| 2483 | if (!inode) | ||
| 2484 | return ERR_PTR(-ENOMEM); | ||
| 2485 | |||
| 2486 | if (dir) { | ||
| 2487 | ret = btrfs_set_inode_index(dir, inode, index); | ||
| 2488 | if (ret) | ||
| 2489 | return ERR_PTR(ret); | ||
| 2490 | } | ||
| 2491 | /* | ||
| 2492 | * index_cnt is ignored for everything but a dir, | ||
| 2493 | * btrfs_get_inode_index_count has an explanation for the magic | ||
| 2494 | * number | ||
| 2495 | */ | ||
| 2496 | init_btrfs_i(inode); | ||
| 2497 | BTRFS_I(inode)->index_cnt = 2; | ||
| 2498 | BTRFS_I(inode)->root = root; | ||
| 2499 | BTRFS_I(inode)->generation = trans->transid; | ||
| 2500 | |||
| 2501 | if (mode & S_IFDIR) | ||
| 2502 | owner = 0; | ||
| 2503 | else | ||
| 2504 | owner = 1; | ||
| 2505 | new_inode_group = btrfs_find_block_group(root, group, 0, | ||
| 2506 | BTRFS_BLOCK_GROUP_METADATA, owner); | ||
| 2507 | if (!new_inode_group) { | ||
| 2508 | printk("find_block group failed\n"); | ||
| 2509 | new_inode_group = group; | ||
| 2510 | } | ||
| 2511 | BTRFS_I(inode)->block_group = new_inode_group; | ||
| 2512 | |||
| 2513 | key[0].objectid = objectid; | ||
| 2514 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | ||
| 2515 | key[0].offset = 0; | ||
| 2516 | |||
| 2517 | key[1].objectid = objectid; | ||
| 2518 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | ||
| 2519 | key[1].offset = ref_objectid; | ||
| 2520 | |||
| 2521 | sizes[0] = sizeof(struct btrfs_inode_item); | ||
| 2522 | sizes[1] = name_len + sizeof(*ref); | ||
| 2523 | |||
| 2524 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); | ||
| 2525 | if (ret != 0) | ||
| 2526 | goto fail; | ||
| 2527 | |||
| 2528 | if (objectid > root->highest_inode) | ||
| 2529 | root->highest_inode = objectid; | ||
| 2530 | |||
| 2531 | inode->i_uid = current->fsuid; | ||
| 2532 | inode->i_gid = current->fsgid; | ||
| 2533 | inode->i_mode = mode; | ||
| 2534 | inode->i_ino = objectid; | ||
| 2535 | inode_set_bytes(inode, 0); | ||
| 2536 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
| 2537 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 2538 | struct btrfs_inode_item); | ||
| 2539 | fill_inode_item(trans, path->nodes[0], inode_item, inode); | ||
| 2540 | |||
| 2541 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | ||
| 2542 | struct btrfs_inode_ref); | ||
| 2543 | btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); | ||
| 2544 | btrfs_set_inode_ref_index(path->nodes[0], ref, *index); | ||
| 2545 | ptr = (unsigned long)(ref + 1); | ||
| 2546 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||
| 2547 | |||
| 2548 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 2549 | btrfs_free_path(path); | ||
| 2550 | |||
| 2551 | location = &BTRFS_I(inode)->location; | ||
| 2552 | location->objectid = objectid; | ||
| 2553 | location->offset = 0; | ||
| 2554 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | ||
| 2555 | |||
| 2556 | insert_inode_hash(inode); | ||
| 2557 | return inode; | ||
| 2558 | fail: | ||
| 2559 | if (dir) | ||
| 2560 | BTRFS_I(dir)->index_cnt--; | ||
| 2561 | btrfs_free_path(path); | ||
| 2562 | return ERR_PTR(ret); | ||
| 2563 | } | ||
| 2564 | |||
| 2565 | static inline u8 btrfs_inode_type(struct inode *inode) | ||
| 2566 | { | ||
| 2567 | return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; | ||
| 2568 | } | ||
| 2569 | |||
| 2570 | /* | ||
| 2571 | * utility function to add 'inode' into 'parent_inode' with | ||
| 2572 | * a give name and a given sequence number. | ||
| 2573 | * if 'add_backref' is true, also insert a backref from the | ||
| 2574 | * inode to the parent directory. | ||
| 2575 | */ | ||
| 2576 | int btrfs_add_link(struct btrfs_trans_handle *trans, | ||
| 2577 | struct inode *parent_inode, struct inode *inode, | ||
| 2578 | const char *name, int name_len, int add_backref, u64 index) | ||
| 2579 | { | ||
| 2580 | int ret; | ||
| 2581 | struct btrfs_key key; | ||
| 2582 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; | ||
| 2583 | |||
| 2584 | key.objectid = inode->i_ino; | ||
| 2585 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 2586 | key.offset = 0; | ||
| 2587 | |||
| 2588 | ret = btrfs_insert_dir_item(trans, root, name, name_len, | ||
| 2589 | parent_inode->i_ino, | ||
| 2590 | &key, btrfs_inode_type(inode), | ||
| 2591 | index); | ||
| 2592 | if (ret == 0) { | ||
| 2593 | if (add_backref) { | ||
| 2594 | ret = btrfs_insert_inode_ref(trans, root, | ||
| 2595 | name, name_len, | ||
| 2596 | inode->i_ino, | ||
| 2597 | parent_inode->i_ino, | ||
| 2598 | index); | ||
| 2599 | } | ||
| 2600 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
| 2601 | name_len * 2); | ||
| 2602 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | ||
| 2603 | ret = btrfs_update_inode(trans, root, parent_inode); | ||
| 2604 | } | ||
| 2605 | return ret; | ||
| 2606 | } | ||
| 2607 | |||
| 2608 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, | ||
| 2609 | struct dentry *dentry, struct inode *inode, | ||
| 2610 | int backref, u64 index) | ||
| 2611 | { | ||
| 2612 | int err = btrfs_add_link(trans, dentry->d_parent->d_inode, | ||
| 2613 | inode, dentry->d_name.name, | ||
| 2614 | dentry->d_name.len, backref, index); | ||
| 2615 | if (!err) { | ||
| 2616 | d_instantiate(dentry, inode); | ||
| 2617 | return 0; | ||
| 2618 | } | ||
| 2619 | if (err > 0) | ||
| 2620 | err = -EEXIST; | ||
| 2621 | return err; | ||
| 2622 | } | ||
| 2623 | |||
| 2624 | static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | ||
| 2625 | int mode, dev_t rdev) | ||
| 2626 | { | ||
| 2627 | struct btrfs_trans_handle *trans; | ||
| 2628 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2629 | struct inode *inode = NULL; | ||
| 2630 | int err; | ||
| 2631 | int drop_inode = 0; | ||
| 2632 | u64 objectid; | ||
| 2633 | unsigned long nr = 0; | ||
| 2634 | u64 index = 0; | ||
| 2635 | |||
| 2636 | if (!new_valid_dev(rdev)) | ||
| 2637 | return -EINVAL; | ||
| 2638 | |||
| 2639 | err = btrfs_check_free_space(root, 1, 0); | ||
| 2640 | if (err) | ||
| 2641 | goto fail; | ||
| 2642 | |||
| 2643 | trans = btrfs_start_transaction(root, 1); | ||
| 2644 | btrfs_set_trans_block_group(trans, dir); | ||
| 2645 | |||
| 2646 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 2647 | if (err) { | ||
| 2648 | err = -ENOSPC; | ||
| 2649 | goto out_unlock; | ||
| 2650 | } | ||
| 2651 | |||
| 2652 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | ||
| 2653 | dentry->d_name.len, | ||
| 2654 | dentry->d_parent->d_inode->i_ino, objectid, | ||
| 2655 | BTRFS_I(dir)->block_group, mode, &index); | ||
| 2656 | err = PTR_ERR(inode); | ||
| 2657 | if (IS_ERR(inode)) | ||
| 2658 | goto out_unlock; | ||
| 2659 | |||
| 2660 | err = btrfs_init_acl(inode, dir); | ||
| 2661 | if (err) { | ||
| 2662 | drop_inode = 1; | ||
| 2663 | goto out_unlock; | ||
| 2664 | } | ||
| 2665 | |||
| 2666 | btrfs_set_trans_block_group(trans, inode); | ||
| 2667 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | ||
| 2668 | if (err) | ||
| 2669 | drop_inode = 1; | ||
| 2670 | else { | ||
| 2671 | inode->i_op = &btrfs_special_inode_operations; | ||
| 2672 | init_special_inode(inode, inode->i_mode, rdev); | ||
| 2673 | btrfs_update_inode(trans, root, inode); | ||
| 2674 | } | ||
| 2675 | dir->i_sb->s_dirt = 1; | ||
| 2676 | btrfs_update_inode_block_group(trans, inode); | ||
| 2677 | btrfs_update_inode_block_group(trans, dir); | ||
| 2678 | out_unlock: | ||
| 2679 | nr = trans->blocks_used; | ||
| 2680 | btrfs_end_transaction_throttle(trans, root); | ||
| 2681 | fail: | ||
| 2682 | if (drop_inode) { | ||
| 2683 | inode_dec_link_count(inode); | ||
| 2684 | iput(inode); | ||
| 2685 | } | ||
| 2686 | btrfs_btree_balance_dirty(root, nr); | ||
| 2687 | return err; | ||
| 2688 | } | ||
| 2689 | |||
| 2690 | static int btrfs_create(struct inode *dir, struct dentry *dentry, | ||
| 2691 | int mode, struct nameidata *nd) | ||
| 2692 | { | ||
| 2693 | struct btrfs_trans_handle *trans; | ||
| 2694 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2695 | struct inode *inode = NULL; | ||
| 2696 | int err; | ||
| 2697 | int drop_inode = 0; | ||
| 2698 | unsigned long nr = 0; | ||
| 2699 | u64 objectid; | ||
| 2700 | u64 index = 0; | ||
| 2701 | |||
| 2702 | err = btrfs_check_free_space(root, 1, 0); | ||
| 2703 | if (err) | ||
| 2704 | goto fail; | ||
| 2705 | trans = btrfs_start_transaction(root, 1); | ||
| 2706 | btrfs_set_trans_block_group(trans, dir); | ||
| 2707 | |||
| 2708 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 2709 | if (err) { | ||
| 2710 | err = -ENOSPC; | ||
| 2711 | goto out_unlock; | ||
| 2712 | } | ||
| 2713 | |||
| 2714 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | ||
| 2715 | dentry->d_name.len, | ||
| 2716 | dentry->d_parent->d_inode->i_ino, | ||
| 2717 | objectid, BTRFS_I(dir)->block_group, mode, | ||
| 2718 | &index); | ||
| 2719 | err = PTR_ERR(inode); | ||
| 2720 | if (IS_ERR(inode)) | ||
| 2721 | goto out_unlock; | ||
| 2722 | |||
| 2723 | err = btrfs_init_acl(inode, dir); | ||
| 2724 | if (err) { | ||
| 2725 | drop_inode = 1; | ||
| 2726 | goto out_unlock; | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | btrfs_set_trans_block_group(trans, inode); | ||
| 2730 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | ||
| 2731 | if (err) | ||
| 2732 | drop_inode = 1; | ||
| 2733 | else { | ||
| 2734 | inode->i_mapping->a_ops = &btrfs_aops; | ||
| 2735 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
| 2736 | inode->i_fop = &btrfs_file_operations; | ||
| 2737 | inode->i_op = &btrfs_file_inode_operations; | ||
| 2738 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | ||
| 2739 | } | ||
| 2740 | dir->i_sb->s_dirt = 1; | ||
| 2741 | btrfs_update_inode_block_group(trans, inode); | ||
| 2742 | btrfs_update_inode_block_group(trans, dir); | ||
| 2743 | out_unlock: | ||
| 2744 | nr = trans->blocks_used; | ||
| 2745 | btrfs_end_transaction_throttle(trans, root); | ||
| 2746 | fail: | ||
| 2747 | if (drop_inode) { | ||
| 2748 | inode_dec_link_count(inode); | ||
| 2749 | iput(inode); | ||
| 2750 | } | ||
| 2751 | btrfs_btree_balance_dirty(root, nr); | ||
| 2752 | return err; | ||
| 2753 | } | ||
| 2754 | |||
| 2755 | static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | ||
| 2756 | struct dentry *dentry) | ||
| 2757 | { | ||
| 2758 | struct btrfs_trans_handle *trans; | ||
| 2759 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2760 | struct inode *inode = old_dentry->d_inode; | ||
| 2761 | u64 index; | ||
| 2762 | unsigned long nr = 0; | ||
| 2763 | int err; | ||
| 2764 | int drop_inode = 0; | ||
| 2765 | |||
| 2766 | if (inode->i_nlink == 0) | ||
| 2767 | return -ENOENT; | ||
| 2768 | |||
| 2769 | btrfs_inc_nlink(inode); | ||
| 2770 | err = btrfs_check_free_space(root, 1, 0); | ||
| 2771 | if (err) | ||
| 2772 | goto fail; | ||
| 2773 | err = btrfs_set_inode_index(dir, inode, &index); | ||
| 2774 | if (err) | ||
| 2775 | goto fail; | ||
| 2776 | |||
| 2777 | trans = btrfs_start_transaction(root, 1); | ||
| 2778 | |||
| 2779 | btrfs_set_trans_block_group(trans, dir); | ||
| 2780 | atomic_inc(&inode->i_count); | ||
| 2781 | |||
| 2782 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | ||
| 2783 | |||
| 2784 | if (err) | ||
| 2785 | drop_inode = 1; | ||
| 2786 | |||
| 2787 | dir->i_sb->s_dirt = 1; | ||
| 2788 | btrfs_update_inode_block_group(trans, dir); | ||
| 2789 | err = btrfs_update_inode(trans, root, inode); | ||
| 2790 | |||
| 2791 | if (err) | ||
| 2792 | drop_inode = 1; | ||
| 2793 | |||
| 2794 | nr = trans->blocks_used; | ||
| 2795 | btrfs_end_transaction_throttle(trans, root); | ||
| 2796 | fail: | ||
| 2797 | if (drop_inode) { | ||
| 2798 | inode_dec_link_count(inode); | ||
| 2799 | iput(inode); | ||
| 2800 | } | ||
| 2801 | btrfs_btree_balance_dirty(root, nr); | ||
| 2802 | return err; | ||
| 2803 | } | ||
| 2804 | |||
| 2805 | static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
| 2806 | { | ||
| 2807 | struct inode *inode = NULL; | ||
| 2808 | struct btrfs_trans_handle *trans; | ||
| 2809 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2810 | int err = 0; | ||
| 2811 | int drop_on_err = 0; | ||
| 2812 | u64 objectid = 0; | ||
| 2813 | u64 index = 0; | ||
| 2814 | unsigned long nr = 1; | ||
| 2815 | |||
| 2816 | err = btrfs_check_free_space(root, 1, 0); | ||
| 2817 | if (err) | ||
| 2818 | goto out_unlock; | ||
| 2819 | |||
| 2820 | trans = btrfs_start_transaction(root, 1); | ||
| 2821 | btrfs_set_trans_block_group(trans, dir); | ||
| 2822 | |||
| 2823 | if (IS_ERR(trans)) { | ||
| 2824 | err = PTR_ERR(trans); | ||
| 2825 | goto out_unlock; | ||
| 2826 | } | ||
| 2827 | |||
| 2828 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 2829 | if (err) { | ||
| 2830 | err = -ENOSPC; | ||
| 2831 | goto out_unlock; | ||
| 2832 | } | ||
| 2833 | |||
| 2834 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | ||
| 2835 | dentry->d_name.len, | ||
| 2836 | dentry->d_parent->d_inode->i_ino, objectid, | ||
| 2837 | BTRFS_I(dir)->block_group, S_IFDIR | mode, | ||
| 2838 | &index); | ||
| 2839 | if (IS_ERR(inode)) { | ||
| 2840 | err = PTR_ERR(inode); | ||
| 2841 | goto out_fail; | ||
| 2842 | } | ||
| 2843 | |||
| 2844 | drop_on_err = 1; | ||
| 2845 | |||
| 2846 | err = btrfs_init_acl(inode, dir); | ||
| 2847 | if (err) | ||
| 2848 | goto out_fail; | ||
| 2849 | |||
| 2850 | inode->i_op = &btrfs_dir_inode_operations; | ||
| 2851 | inode->i_fop = &btrfs_dir_file_operations; | ||
| 2852 | btrfs_set_trans_block_group(trans, inode); | ||
| 2853 | |||
| 2854 | btrfs_i_size_write(inode, 0); | ||
| 2855 | err = btrfs_update_inode(trans, root, inode); | ||
| 2856 | if (err) | ||
| 2857 | goto out_fail; | ||
| 2858 | |||
| 2859 | err = btrfs_add_link(trans, dentry->d_parent->d_inode, | ||
| 2860 | inode, dentry->d_name.name, | ||
| 2861 | dentry->d_name.len, 0, index); | ||
| 2862 | if (err) | ||
| 2863 | goto out_fail; | ||
| 2864 | |||
| 2865 | d_instantiate(dentry, inode); | ||
| 2866 | drop_on_err = 0; | ||
| 2867 | dir->i_sb->s_dirt = 1; | ||
| 2868 | btrfs_update_inode_block_group(trans, inode); | ||
| 2869 | btrfs_update_inode_block_group(trans, dir); | ||
| 2870 | |||
| 2871 | out_fail: | ||
| 2872 | nr = trans->blocks_used; | ||
| 2873 | btrfs_end_transaction_throttle(trans, root); | ||
| 2874 | |||
| 2875 | out_unlock: | ||
| 2876 | if (drop_on_err) | ||
| 2877 | iput(inode); | ||
| 2878 | btrfs_btree_balance_dirty(root, nr); | ||
| 2879 | return err; | ||
| 2880 | } | ||
| 2881 | |||
| 2882 | /* helper for btfs_get_extent. Given an existing extent in the tree, | ||
| 2883 | * and an extent that you want to insert, deal with overlap and insert | ||
| 2884 | * the new extent into the tree. | ||
| 2885 | */ | ||
| 2886 | static int merge_extent_mapping(struct extent_map_tree *em_tree, | ||
| 2887 | struct extent_map *existing, | ||
| 2888 | struct extent_map *em, | ||
| 2889 | u64 map_start, u64 map_len) | ||
| 2890 | { | ||
| 2891 | u64 start_diff; | ||
| 2892 | |||
| 2893 | BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); | ||
| 2894 | start_diff = map_start - em->start; | ||
| 2895 | em->start = map_start; | ||
| 2896 | em->len = map_len; | ||
| 2897 | if (em->block_start < EXTENT_MAP_LAST_BYTE) | ||
| 2898 | em->block_start += start_diff; | ||
| 2899 | return add_extent_mapping(em_tree, em); | ||
| 2900 | } | ||
| 2901 | |||
| 2902 | /* | ||
| 2903 | * a bit scary, this does extent mapping from logical file offset to the disk. | ||
| 2904 | * the ugly parts come from merging extents from the disk with the | ||
| 2905 | * in-ram representation. This gets more complex because of the data=ordered code, | ||
| 2906 | * where the in-ram extents might be locked pending data=ordered completion. | ||
| 2907 | * | ||
| 2908 | * This also copies inline extents directly into the page. | ||
| 2909 | */ | ||
| 2910 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | ||
| 2911 | size_t pg_offset, u64 start, u64 len, | ||
| 2912 | int create) | ||
| 2913 | { | ||
| 2914 | int ret; | ||
| 2915 | int err = 0; | ||
| 2916 | u64 bytenr; | ||
| 2917 | u64 extent_start = 0; | ||
| 2918 | u64 extent_end = 0; | ||
| 2919 | u64 objectid = inode->i_ino; | ||
| 2920 | u32 found_type; | ||
| 2921 | struct btrfs_path *path = NULL; | ||
| 2922 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2923 | struct btrfs_file_extent_item *item; | ||
| 2924 | struct extent_buffer *leaf; | ||
| 2925 | struct btrfs_key found_key; | ||
| 2926 | struct extent_map *em = NULL; | ||
| 2927 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 2928 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 2929 | struct btrfs_trans_handle *trans = NULL; | ||
| 2930 | |||
| 2931 | again: | ||
| 2932 | spin_lock(&em_tree->lock); | ||
| 2933 | em = lookup_extent_mapping(em_tree, start, len); | ||
| 2934 | if (em) | ||
| 2935 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2936 | spin_unlock(&em_tree->lock); | ||
| 2937 | |||
| 2938 | if (em) { | ||
| 2939 | if (em->start > start || em->start + em->len <= start) | ||
| 2940 | free_extent_map(em); | ||
| 2941 | else if (em->block_start == EXTENT_MAP_INLINE && page) | ||
| 2942 | free_extent_map(em); | ||
| 2943 | else | ||
| 2944 | goto out; | ||
| 2945 | } | ||
| 2946 | em = alloc_extent_map(GFP_NOFS); | ||
| 2947 | if (!em) { | ||
| 2948 | err = -ENOMEM; | ||
| 2949 | goto out; | ||
| 2950 | } | ||
| 2951 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2952 | em->start = EXTENT_MAP_HOLE; | ||
| 2953 | em->len = (u64)-1; | ||
| 2954 | |||
| 2955 | if (!path) { | ||
| 2956 | path = btrfs_alloc_path(); | ||
| 2957 | BUG_ON(!path); | ||
| 2958 | } | ||
| 2959 | |||
| 2960 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
| 2961 | objectid, start, trans != NULL); | ||
| 2962 | if (ret < 0) { | ||
| 2963 | err = ret; | ||
| 2964 | goto out; | ||
| 2965 | } | ||
| 2966 | |||
| 2967 | if (ret != 0) { | ||
| 2968 | if (path->slots[0] == 0) | ||
| 2969 | goto not_found; | ||
| 2970 | path->slots[0]--; | ||
| 2971 | } | ||
| 2972 | |||
| 2973 | leaf = path->nodes[0]; | ||
| 2974 | item = btrfs_item_ptr(leaf, path->slots[0], | ||
| 2975 | struct btrfs_file_extent_item); | ||
| 2976 | /* are we inside the extent that was found? */ | ||
| 2977 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 2978 | found_type = btrfs_key_type(&found_key); | ||
| 2979 | if (found_key.objectid != objectid || | ||
| 2980 | found_type != BTRFS_EXTENT_DATA_KEY) { | ||
| 2981 | goto not_found; | ||
| 2982 | } | ||
| 2983 | |||
| 2984 | found_type = btrfs_file_extent_type(leaf, item); | ||
| 2985 | extent_start = found_key.offset; | ||
| 2986 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
| 2987 | extent_end = extent_start + | ||
| 2988 | btrfs_file_extent_num_bytes(leaf, item); | ||
| 2989 | err = 0; | ||
| 2990 | if (start < extent_start || start >= extent_end) { | ||
| 2991 | em->start = start; | ||
| 2992 | if (start < extent_start) { | ||
| 2993 | if (start + len <= extent_start) | ||
| 2994 | goto not_found; | ||
| 2995 | em->len = extent_end - extent_start; | ||
| 2996 | } else { | ||
| 2997 | em->len = len; | ||
| 2998 | } | ||
| 2999 | goto not_found_em; | ||
| 3000 | } | ||
| 3001 | bytenr = btrfs_file_extent_disk_bytenr(leaf, item); | ||
| 3002 | if (bytenr == 0) { | ||
| 3003 | em->start = extent_start; | ||
| 3004 | em->len = extent_end - extent_start; | ||
| 3005 | em->block_start = EXTENT_MAP_HOLE; | ||
| 3006 | goto insert; | ||
| 3007 | } | ||
| 3008 | bytenr += btrfs_file_extent_offset(leaf, item); | ||
| 3009 | em->block_start = bytenr; | ||
| 3010 | em->start = extent_start; | ||
| 3011 | em->len = extent_end - extent_start; | ||
| 3012 | goto insert; | ||
| 3013 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 3014 | u64 page_start; | ||
| 3015 | unsigned long ptr; | ||
| 3016 | char *map; | ||
| 3017 | size_t size; | ||
| 3018 | size_t extent_offset; | ||
| 3019 | size_t copy_size; | ||
| 3020 | |||
| 3021 | size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, | ||
| 3022 | path->slots[0])); | ||
| 3023 | extent_end = (extent_start + size + root->sectorsize - 1) & | ||
| 3024 | ~((u64)root->sectorsize - 1); | ||
| 3025 | if (start < extent_start || start >= extent_end) { | ||
| 3026 | em->start = start; | ||
| 3027 | if (start < extent_start) { | ||
| 3028 | if (start + len <= extent_start) | ||
| 3029 | goto not_found; | ||
| 3030 | em->len = extent_end - extent_start; | ||
| 3031 | } else { | ||
| 3032 | em->len = len; | ||
| 3033 | } | ||
| 3034 | goto not_found_em; | ||
| 3035 | } | ||
| 3036 | em->block_start = EXTENT_MAP_INLINE; | ||
| 3037 | |||
| 3038 | if (!page) { | ||
| 3039 | em->start = extent_start; | ||
| 3040 | em->len = size; | ||
| 3041 | goto out; | ||
| 3042 | } | ||
| 3043 | |||
| 3044 | page_start = page_offset(page) + pg_offset; | ||
| 3045 | extent_offset = page_start - extent_start; | ||
| 3046 | copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, | ||
| 3047 | size - extent_offset); | ||
| 3048 | em->start = extent_start + extent_offset; | ||
| 3049 | em->len = (copy_size + root->sectorsize - 1) & | ||
| 3050 | ~((u64)root->sectorsize - 1); | ||
| 3051 | map = kmap(page); | ||
| 3052 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | ||
| 3053 | if (create == 0 && !PageUptodate(page)) { | ||
| 3054 | read_extent_buffer(leaf, map + pg_offset, ptr, | ||
| 3055 | copy_size); | ||
| 3056 | flush_dcache_page(page); | ||
| 3057 | } else if (create && PageUptodate(page)) { | ||
| 3058 | if (!trans) { | ||
| 3059 | kunmap(page); | ||
| 3060 | free_extent_map(em); | ||
| 3061 | em = NULL; | ||
| 3062 | btrfs_release_path(root, path); | ||
| 3063 | trans = btrfs_join_transaction(root, 1); | ||
| 3064 | goto again; | ||
| 3065 | } | ||
| 3066 | write_extent_buffer(leaf, map + pg_offset, ptr, | ||
| 3067 | copy_size); | ||
| 3068 | btrfs_mark_buffer_dirty(leaf); | ||
| 3069 | } | ||
| 3070 | kunmap(page); | ||
| 3071 | set_extent_uptodate(io_tree, em->start, | ||
| 3072 | extent_map_end(em) - 1, GFP_NOFS); | ||
| 3073 | goto insert; | ||
| 3074 | } else { | ||
| 3075 | printk("unkknown found_type %d\n", found_type); | ||
| 3076 | WARN_ON(1); | ||
| 3077 | } | ||
| 3078 | not_found: | ||
| 3079 | em->start = start; | ||
| 3080 | em->len = len; | ||
| 3081 | not_found_em: | ||
| 3082 | em->block_start = EXTENT_MAP_HOLE; | ||
| 3083 | insert: | ||
| 3084 | btrfs_release_path(root, path); | ||
| 3085 | if (em->start > start || extent_map_end(em) <= start) { | ||
| 3086 | printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len); | ||
| 3087 | err = -EIO; | ||
| 3088 | goto out; | ||
| 3089 | } | ||
| 3090 | |||
| 3091 | err = 0; | ||
| 3092 | spin_lock(&em_tree->lock); | ||
| 3093 | ret = add_extent_mapping(em_tree, em); | ||
| 3094 | /* it is possible that someone inserted the extent into the tree | ||
| 3095 | * while we had the lock dropped. It is also possible that | ||
| 3096 | * an overlapping map exists in the tree | ||
| 3097 | */ | ||
| 3098 | if (ret == -EEXIST) { | ||
| 3099 | struct extent_map *existing; | ||
| 3100 | |||
| 3101 | ret = 0; | ||
| 3102 | |||
| 3103 | existing = lookup_extent_mapping(em_tree, start, len); | ||
| 3104 | if (existing && (existing->start > start || | ||
| 3105 | existing->start + existing->len <= start)) { | ||
| 3106 | free_extent_map(existing); | ||
| 3107 | existing = NULL; | ||
| 3108 | } | ||
| 3109 | if (!existing) { | ||
| 3110 | existing = lookup_extent_mapping(em_tree, em->start, | ||
| 3111 | em->len); | ||
| 3112 | if (existing) { | ||
| 3113 | err = merge_extent_mapping(em_tree, existing, | ||
| 3114 | em, start, | ||
| 3115 | root->sectorsize); | ||
| 3116 | free_extent_map(existing); | ||
| 3117 | if (err) { | ||
| 3118 | free_extent_map(em); | ||
| 3119 | em = NULL; | ||
| 3120 | } | ||
| 3121 | } else { | ||
| 3122 | err = -EIO; | ||
| 3123 | printk("failing to insert %Lu %Lu\n", | ||
| 3124 | start, len); | ||
| 3125 | free_extent_map(em); | ||
| 3126 | em = NULL; | ||
| 3127 | } | ||
| 3128 | } else { | ||
| 3129 | free_extent_map(em); | ||
| 3130 | em = existing; | ||
| 3131 | err = 0; | ||
| 3132 | } | ||
| 3133 | } | ||
| 3134 | spin_unlock(&em_tree->lock); | ||
| 3135 | out: | ||
| 3136 | if (path) | ||
| 3137 | btrfs_free_path(path); | ||
| 3138 | if (trans) { | ||
| 3139 | ret = btrfs_end_transaction(trans, root); | ||
| 3140 | if (!err) { | ||
| 3141 | err = ret; | ||
| 3142 | } | ||
| 3143 | } | ||
| 3144 | if (err) { | ||
| 3145 | free_extent_map(em); | ||
| 3146 | WARN_ON(1); | ||
| 3147 | return ERR_PTR(err); | ||
| 3148 | } | ||
| 3149 | return em; | ||
| 3150 | } | ||
| 3151 | |||
| 3152 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | ||
| 3153 | const struct iovec *iov, loff_t offset, | ||
| 3154 | unsigned long nr_segs) | ||
| 3155 | { | ||
| 3156 | return -EINVAL; | ||
| 3157 | } | ||
| 3158 | |||
| 3159 | static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) | ||
| 3160 | { | ||
| 3161 | return extent_bmap(mapping, iblock, btrfs_get_extent); | ||
| 3162 | } | ||
| 3163 | |||
| 3164 | int btrfs_readpage(struct file *file, struct page *page) | ||
| 3165 | { | ||
| 3166 | struct extent_io_tree *tree; | ||
| 3167 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 3168 | return extent_read_full_page(tree, page, btrfs_get_extent); | ||
| 3169 | } | ||
| 3170 | |||
| 3171 | static int btrfs_writepage(struct page *page, struct writeback_control *wbc) | ||
| 3172 | { | ||
| 3173 | struct extent_io_tree *tree; | ||
| 3174 | |||
| 3175 | |||
| 3176 | if (current->flags & PF_MEMALLOC) { | ||
| 3177 | redirty_page_for_writepage(wbc, page); | ||
| 3178 | unlock_page(page); | ||
| 3179 | return 0; | ||
| 3180 | } | ||
| 3181 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 3182 | return extent_write_full_page(tree, page, btrfs_get_extent, wbc); | ||
| 3183 | } | ||
| 3184 | |||
| 3185 | int btrfs_writepages(struct address_space *mapping, | ||
| 3186 | struct writeback_control *wbc) | ||
| 3187 | { | ||
| 3188 | struct extent_io_tree *tree; | ||
| 3189 | tree = &BTRFS_I(mapping->host)->io_tree; | ||
| 3190 | return extent_writepages(tree, mapping, btrfs_get_extent, wbc); | ||
| 3191 | } | ||
| 3192 | |||
| 3193 | static int | ||
| 3194 | btrfs_readpages(struct file *file, struct address_space *mapping, | ||
| 3195 | struct list_head *pages, unsigned nr_pages) | ||
| 3196 | { | ||
| 3197 | struct extent_io_tree *tree; | ||
| 3198 | tree = &BTRFS_I(mapping->host)->io_tree; | ||
| 3199 | return extent_readpages(tree, mapping, pages, nr_pages, | ||
| 3200 | btrfs_get_extent); | ||
| 3201 | } | ||
| 3202 | static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) | ||
| 3203 | { | ||
| 3204 | struct extent_io_tree *tree; | ||
| 3205 | struct extent_map_tree *map; | ||
| 3206 | int ret; | ||
| 3207 | |||
| 3208 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 3209 | map = &BTRFS_I(page->mapping->host)->extent_tree; | ||
| 3210 | ret = try_release_extent_mapping(map, tree, page, gfp_flags); | ||
| 3211 | if (ret == 1) { | ||
| 3212 | ClearPagePrivate(page); | ||
| 3213 | set_page_private(page, 0); | ||
| 3214 | page_cache_release(page); | ||
| 3215 | } | ||
| 3216 | return ret; | ||
| 3217 | } | ||
| 3218 | |||
| 3219 | static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) | ||
| 3220 | { | ||
| 3221 | if (PageWriteback(page) || PageDirty(page)) | ||
| 3222 | return 0; | ||
| 3223 | return __btrfs_releasepage(page, gfp_flags); | ||
| 3224 | } | ||
| 3225 | |||
| 3226 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) | ||
| 3227 | { | ||
| 3228 | struct extent_io_tree *tree; | ||
| 3229 | struct btrfs_ordered_extent *ordered; | ||
| 3230 | u64 page_start = page_offset(page); | ||
| 3231 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
| 3232 | |||
| 3233 | wait_on_page_writeback(page); | ||
| 3234 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 3235 | if (offset) { | ||
| 3236 | btrfs_releasepage(page, GFP_NOFS); | ||
| 3237 | return; | ||
| 3238 | } | ||
| 3239 | |||
| 3240 | lock_extent(tree, page_start, page_end, GFP_NOFS); | ||
| 3241 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | ||
| 3242 | page_offset(page)); | ||
| 3243 | if (ordered) { | ||
| 3244 | /* | ||
| 3245 | * IO on this page will never be started, so we need | ||
| 3246 | * to account for any ordered extents now | ||
| 3247 | */ | ||
| 3248 | clear_extent_bit(tree, page_start, page_end, | ||
| 3249 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
| 3250 | EXTENT_LOCKED, 1, 0, GFP_NOFS); | ||
| 3251 | btrfs_finish_ordered_io(page->mapping->host, | ||
| 3252 | page_start, page_end); | ||
| 3253 | btrfs_put_ordered_extent(ordered); | ||
| 3254 | lock_extent(tree, page_start, page_end, GFP_NOFS); | ||
| 3255 | } | ||
| 3256 | clear_extent_bit(tree, page_start, page_end, | ||
| 3257 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
| 3258 | EXTENT_ORDERED, | ||
| 3259 | 1, 1, GFP_NOFS); | ||
| 3260 | __btrfs_releasepage(page, GFP_NOFS); | ||
| 3261 | |||
| 3262 | ClearPageChecked(page); | ||
| 3263 | if (PagePrivate(page)) { | ||
| 3264 | ClearPagePrivate(page); | ||
| 3265 | set_page_private(page, 0); | ||
| 3266 | page_cache_release(page); | ||
| 3267 | } | ||
| 3268 | } | ||
| 3269 | |||
| 3270 | /* | ||
| 3271 | * btrfs_page_mkwrite() is not allowed to change the file size as it gets | ||
| 3272 | * called from a page fault handler when a page is first dirtied. Hence we must | ||
| 3273 | * be careful to check for EOF conditions here. We set the page up correctly | ||
| 3274 | * for a written page which means we get ENOSPC checking when writing into | ||
| 3275 | * holes and correct delalloc and unwritten extent mapping on filesystems that | ||
| 3276 | * support these features. | ||
| 3277 | * | ||
| 3278 | * We are not allowed to take the i_mutex here so we have to play games to | ||
| 3279 | * protect against truncate races as the page could now be beyond EOF. Because | ||
| 3280 | * vmtruncate() writes the inode size before removing pages, once we have the | ||
| 3281 | * page lock we can determine safely if the page is beyond EOF. If it is not | ||
| 3282 | * beyond EOF, then the page is guaranteed safe against truncation until we | ||
| 3283 | * unlock the page. | ||
| 3284 | */ | ||
| 3285 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
| 3286 | { | ||
| 3287 | struct inode *inode = fdentry(vma->vm_file)->d_inode; | ||
| 3288 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3289 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 3290 | struct btrfs_ordered_extent *ordered; | ||
| 3291 | char *kaddr; | ||
| 3292 | unsigned long zero_start; | ||
| 3293 | loff_t size; | ||
| 3294 | int ret; | ||
| 3295 | u64 page_start; | ||
| 3296 | u64 page_end; | ||
| 3297 | |||
| 3298 | ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); | ||
| 3299 | if (ret) | ||
| 3300 | goto out; | ||
| 3301 | |||
| 3302 | ret = -EINVAL; | ||
| 3303 | again: | ||
| 3304 | lock_page(page); | ||
| 3305 | size = i_size_read(inode); | ||
| 3306 | page_start = page_offset(page); | ||
| 3307 | page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
| 3308 | |||
| 3309 | if ((page->mapping != inode->i_mapping) || | ||
| 3310 | (page_start >= size)) { | ||
| 3311 | /* page got truncated out from underneath us */ | ||
| 3312 | goto out_unlock; | ||
| 3313 | } | ||
| 3314 | wait_on_page_writeback(page); | ||
| 3315 | |||
| 3316 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3317 | set_page_extent_mapped(page); | ||
| 3318 | |||
| 3319 | /* | ||
| 3320 | * we can't set the delalloc bits if there are pending ordered | ||
| 3321 | * extents. Drop our locks and wait for them to finish | ||
| 3322 | */ | ||
| 3323 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
| 3324 | if (ordered) { | ||
| 3325 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3326 | unlock_page(page); | ||
| 3327 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 3328 | btrfs_put_ordered_extent(ordered); | ||
| 3329 | goto again; | ||
| 3330 | } | ||
| 3331 | |||
| 3332 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 3333 | ret = 0; | ||
| 3334 | |||
| 3335 | /* page is wholly or partially inside EOF */ | ||
| 3336 | if (page_start + PAGE_CACHE_SIZE > size) | ||
| 3337 | zero_start = size & ~PAGE_CACHE_MASK; | ||
| 3338 | else | ||
| 3339 | zero_start = PAGE_CACHE_SIZE; | ||
| 3340 | |||
| 3341 | if (zero_start != PAGE_CACHE_SIZE) { | ||
| 3342 | kaddr = kmap(page); | ||
| 3343 | memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); | ||
| 3344 | flush_dcache_page(page); | ||
| 3345 | kunmap(page); | ||
| 3346 | } | ||
| 3347 | ClearPageChecked(page); | ||
| 3348 | set_page_dirty(page); | ||
| 3349 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3350 | |||
| 3351 | out_unlock: | ||
| 3352 | unlock_page(page); | ||
| 3353 | out: | ||
| 3354 | return ret; | ||
| 3355 | } | ||
| 3356 | |||
| 3357 | static void btrfs_truncate(struct inode *inode) | ||
| 3358 | { | ||
| 3359 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3360 | int ret; | ||
| 3361 | struct btrfs_trans_handle *trans; | ||
| 3362 | unsigned long nr; | ||
| 3363 | u64 mask = root->sectorsize - 1; | ||
| 3364 | |||
| 3365 | if (!S_ISREG(inode->i_mode)) | ||
| 3366 | return; | ||
| 3367 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
| 3368 | return; | ||
| 3369 | |||
| 3370 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | ||
| 3371 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | ||
| 3372 | |||
| 3373 | trans = btrfs_start_transaction(root, 1); | ||
| 3374 | btrfs_set_trans_block_group(trans, inode); | ||
| 3375 | btrfs_i_size_write(inode, inode->i_size); | ||
| 3376 | |||
| 3377 | ret = btrfs_orphan_add(trans, inode); | ||
| 3378 | if (ret) | ||
| 3379 | goto out; | ||
| 3380 | /* FIXME, add redo link to tree so we don't leak on crash */ | ||
| 3381 | ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, | ||
| 3382 | BTRFS_EXTENT_DATA_KEY); | ||
| 3383 | btrfs_update_inode(trans, root, inode); | ||
| 3384 | |||
| 3385 | ret = btrfs_orphan_del(trans, inode); | ||
| 3386 | BUG_ON(ret); | ||
| 3387 | |||
| 3388 | out: | ||
| 3389 | nr = trans->blocks_used; | ||
| 3390 | ret = btrfs_end_transaction_throttle(trans, root); | ||
| 3391 | BUG_ON(ret); | ||
| 3392 | btrfs_btree_balance_dirty(root, nr); | ||
| 3393 | } | ||
| 3394 | |||
| 3395 | /* | ||
| 3396 | * Invalidate a single dcache entry at the root of the filesystem. | ||
| 3397 | * Needed after creation of snapshot or subvolume. | ||
| 3398 | */ | ||
| 3399 | void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, | ||
| 3400 | int namelen) | ||
| 3401 | { | ||
| 3402 | struct dentry *alias, *entry; | ||
| 3403 | struct qstr qstr; | ||
| 3404 | |||
| 3405 | alias = d_find_alias(root->fs_info->sb->s_root->d_inode); | ||
| 3406 | if (alias) { | ||
| 3407 | qstr.name = name; | ||
| 3408 | qstr.len = namelen; | ||
| 3409 | /* change me if btrfs ever gets a d_hash operation */ | ||
| 3410 | qstr.hash = full_name_hash(qstr.name, qstr.len); | ||
| 3411 | entry = d_lookup(alias, &qstr); | ||
| 3412 | dput(alias); | ||
| 3413 | if (entry) { | ||
| 3414 | d_invalidate(entry); | ||
| 3415 | dput(entry); | ||
| 3416 | } | ||
| 3417 | } | ||
| 3418 | } | ||
| 3419 | |||
| 3420 | /* | ||
| 3421 | * create a new subvolume directory/inode (helper for the ioctl). | ||
| 3422 | */ | ||
| 3423 | int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, | ||
| 3424 | struct btrfs_trans_handle *trans, u64 new_dirid, | ||
| 3425 | struct btrfs_block_group_cache *block_group) | ||
| 3426 | { | ||
| 3427 | struct inode *inode; | ||
| 3428 | int error; | ||
| 3429 | u64 index = 0; | ||
| 3430 | |||
| 3431 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, | ||
| 3432 | new_dirid, block_group, S_IFDIR | 0700, &index); | ||
| 3433 | if (IS_ERR(inode)) | ||
| 3434 | return PTR_ERR(inode); | ||
| 3435 | inode->i_op = &btrfs_dir_inode_operations; | ||
| 3436 | inode->i_fop = &btrfs_dir_file_operations; | ||
| 3437 | new_root->inode = inode; | ||
| 3438 | |||
| 3439 | inode->i_nlink = 1; | ||
| 3440 | btrfs_i_size_write(inode, 0); | ||
| 3441 | |||
| 3442 | error = btrfs_update_inode(trans, new_root, inode); | ||
| 3443 | if (error) | ||
| 3444 | return error; | ||
| 3445 | |||
| 3446 | d_instantiate(dentry, inode); | ||
| 3447 | return 0; | ||
| 3448 | } | ||
| 3449 | |||
| 3450 | /* helper function for file defrag and space balancing. This | ||
| 3451 | * forces readahead on a given range of bytes in an inode | ||
| 3452 | */ | ||
| 3453 | unsigned long btrfs_force_ra(struct address_space *mapping, | ||
| 3454 | struct file_ra_state *ra, struct file *file, | ||
| 3455 | pgoff_t offset, pgoff_t last_index) | ||
| 3456 | { | ||
| 3457 | pgoff_t req_size = last_index - offset + 1; | ||
| 3458 | |||
| 3459 | page_cache_sync_readahead(mapping, ra, file, offset, req_size); | ||
| 3460 | return offset + req_size; | ||
| 3461 | } | ||
| 3462 | |||
| 3463 | struct inode *btrfs_alloc_inode(struct super_block *sb) | ||
| 3464 | { | ||
| 3465 | struct btrfs_inode *ei; | ||
| 3466 | |||
| 3467 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); | ||
| 3468 | if (!ei) | ||
| 3469 | return NULL; | ||
| 3470 | ei->last_trans = 0; | ||
| 3471 | ei->logged_trans = 0; | ||
| 3472 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | ||
| 3473 | ei->i_acl = BTRFS_ACL_NOT_CACHED; | ||
| 3474 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; | ||
| 3475 | INIT_LIST_HEAD(&ei->i_orphan); | ||
| 3476 | return &ei->vfs_inode; | ||
| 3477 | } | ||
| 3478 | |||
| 3479 | void btrfs_destroy_inode(struct inode *inode) | ||
| 3480 | { | ||
| 3481 | struct btrfs_ordered_extent *ordered; | ||
| 3482 | WARN_ON(!list_empty(&inode->i_dentry)); | ||
| 3483 | WARN_ON(inode->i_data.nrpages); | ||
| 3484 | |||
| 3485 | if (BTRFS_I(inode)->i_acl && | ||
| 3486 | BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED) | ||
| 3487 | posix_acl_release(BTRFS_I(inode)->i_acl); | ||
| 3488 | if (BTRFS_I(inode)->i_default_acl && | ||
| 3489 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) | ||
| 3490 | posix_acl_release(BTRFS_I(inode)->i_default_acl); | ||
| 3491 | |||
| 3492 | spin_lock(&BTRFS_I(inode)->root->list_lock); | ||
| 3493 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | ||
| 3494 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" | ||
| 3495 | " list\n", inode->i_ino); | ||
| 3496 | dump_stack(); | ||
| 3497 | } | ||
| 3498 | spin_unlock(&BTRFS_I(inode)->root->list_lock); | ||
| 3499 | |||
| 3500 | while(1) { | ||
| 3501 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | ||
| 3502 | if (!ordered) | ||
| 3503 | break; | ||
| 3504 | else { | ||
| 3505 | printk("found ordered extent %Lu %Lu\n", | ||
| 3506 | ordered->file_offset, ordered->len); | ||
| 3507 | btrfs_remove_ordered_extent(inode, ordered); | ||
| 3508 | btrfs_put_ordered_extent(ordered); | ||
| 3509 | btrfs_put_ordered_extent(ordered); | ||
| 3510 | } | ||
| 3511 | } | ||
| 3512 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); | ||
| 3513 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | ||
| 3514 | } | ||
| 3515 | |||
| 3516 | static void init_once(void *foo) | ||
| 3517 | { | ||
| 3518 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; | ||
| 3519 | |||
| 3520 | inode_init_once(&ei->vfs_inode); | ||
| 3521 | } | ||
| 3522 | |||
| 3523 | void btrfs_destroy_cachep(void) | ||
| 3524 | { | ||
| 3525 | if (btrfs_inode_cachep) | ||
| 3526 | kmem_cache_destroy(btrfs_inode_cachep); | ||
| 3527 | if (btrfs_trans_handle_cachep) | ||
| 3528 | kmem_cache_destroy(btrfs_trans_handle_cachep); | ||
| 3529 | if (btrfs_transaction_cachep) | ||
| 3530 | kmem_cache_destroy(btrfs_transaction_cachep); | ||
| 3531 | if (btrfs_bit_radix_cachep) | ||
| 3532 | kmem_cache_destroy(btrfs_bit_radix_cachep); | ||
| 3533 | if (btrfs_path_cachep) | ||
| 3534 | kmem_cache_destroy(btrfs_path_cachep); | ||
| 3535 | } | ||
| 3536 | |||
| 3537 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
| 3538 | unsigned long extra_flags, | ||
| 3539 | void (*ctor)(void *)) | ||
| 3540 | { | ||
| 3541 | return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | | ||
| 3542 | SLAB_MEM_SPREAD | extra_flags), ctor); | ||
| 3543 | } | ||
| 3544 | |||
| 3545 | int btrfs_init_cachep(void) | ||
| 3546 | { | ||
| 3547 | btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", | ||
| 3548 | sizeof(struct btrfs_inode), | ||
| 3549 | 0, init_once); | ||
| 3550 | if (!btrfs_inode_cachep) | ||
| 3551 | goto fail; | ||
| 3552 | btrfs_trans_handle_cachep = | ||
| 3553 | btrfs_cache_create("btrfs_trans_handle_cache", | ||
| 3554 | sizeof(struct btrfs_trans_handle), | ||
| 3555 | 0, NULL); | ||
| 3556 | if (!btrfs_trans_handle_cachep) | ||
| 3557 | goto fail; | ||
| 3558 | btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", | ||
| 3559 | sizeof(struct btrfs_transaction), | ||
| 3560 | 0, NULL); | ||
| 3561 | if (!btrfs_transaction_cachep) | ||
| 3562 | goto fail; | ||
| 3563 | btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", | ||
| 3564 | sizeof(struct btrfs_path), | ||
| 3565 | 0, NULL); | ||
| 3566 | if (!btrfs_path_cachep) | ||
| 3567 | goto fail; | ||
| 3568 | btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, | ||
| 3569 | SLAB_DESTROY_BY_RCU, NULL); | ||
| 3570 | if (!btrfs_bit_radix_cachep) | ||
| 3571 | goto fail; | ||
| 3572 | return 0; | ||
| 3573 | fail: | ||
| 3574 | btrfs_destroy_cachep(); | ||
| 3575 | return -ENOMEM; | ||
| 3576 | } | ||
| 3577 | |||
| 3578 | static int btrfs_getattr(struct vfsmount *mnt, | ||
| 3579 | struct dentry *dentry, struct kstat *stat) | ||
| 3580 | { | ||
| 3581 | struct inode *inode = dentry->d_inode; | ||
| 3582 | generic_fillattr(inode, stat); | ||
| 3583 | stat->blksize = PAGE_CACHE_SIZE; | ||
| 3584 | stat->blocks = (inode_get_bytes(inode) + | ||
| 3585 | BTRFS_I(inode)->delalloc_bytes) >> 9; | ||
| 3586 | return 0; | ||
| 3587 | } | ||
| 3588 | |||
| 3589 | static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, | ||
| 3590 | struct inode * new_dir,struct dentry *new_dentry) | ||
| 3591 | { | ||
| 3592 | struct btrfs_trans_handle *trans; | ||
| 3593 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | ||
| 3594 | struct inode *new_inode = new_dentry->d_inode; | ||
| 3595 | struct inode *old_inode = old_dentry->d_inode; | ||
| 3596 | struct timespec ctime = CURRENT_TIME; | ||
| 3597 | u64 index = 0; | ||
| 3598 | int ret; | ||
| 3599 | |||
| 3600 | if (S_ISDIR(old_inode->i_mode) && new_inode && | ||
| 3601 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { | ||
| 3602 | return -ENOTEMPTY; | ||
| 3603 | } | ||
| 3604 | |||
| 3605 | ret = btrfs_check_free_space(root, 1, 0); | ||
| 3606 | if (ret) | ||
| 3607 | goto out_unlock; | ||
| 3608 | |||
| 3609 | trans = btrfs_start_transaction(root, 1); | ||
| 3610 | |||
| 3611 | btrfs_set_trans_block_group(trans, new_dir); | ||
| 3612 | |||
| 3613 | btrfs_inc_nlink(old_dentry->d_inode); | ||
| 3614 | old_dir->i_ctime = old_dir->i_mtime = ctime; | ||
| 3615 | new_dir->i_ctime = new_dir->i_mtime = ctime; | ||
| 3616 | old_inode->i_ctime = ctime; | ||
| 3617 | |||
| 3618 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, | ||
| 3619 | old_dentry->d_name.name, | ||
| 3620 | old_dentry->d_name.len); | ||
| 3621 | if (ret) | ||
| 3622 | goto out_fail; | ||
| 3623 | |||
| 3624 | if (new_inode) { | ||
| 3625 | new_inode->i_ctime = CURRENT_TIME; | ||
| 3626 | ret = btrfs_unlink_inode(trans, root, new_dir, | ||
| 3627 | new_dentry->d_inode, | ||
| 3628 | new_dentry->d_name.name, | ||
| 3629 | new_dentry->d_name.len); | ||
| 3630 | if (ret) | ||
| 3631 | goto out_fail; | ||
| 3632 | if (new_inode->i_nlink == 0) { | ||
| 3633 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); | ||
| 3634 | if (ret) | ||
| 3635 | goto out_fail; | ||
| 3636 | } | ||
| 3637 | |||
| 3638 | } | ||
| 3639 | ret = btrfs_set_inode_index(new_dir, old_inode, &index); | ||
| 3640 | if (ret) | ||
| 3641 | goto out_fail; | ||
| 3642 | |||
| 3643 | ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, | ||
| 3644 | old_inode, new_dentry->d_name.name, | ||
| 3645 | new_dentry->d_name.len, 1, index); | ||
| 3646 | if (ret) | ||
| 3647 | goto out_fail; | ||
| 3648 | |||
| 3649 | out_fail: | ||
| 3650 | btrfs_end_transaction_throttle(trans, root); | ||
| 3651 | out_unlock: | ||
| 3652 | return ret; | ||
| 3653 | } | ||
| 3654 | |||
| 3655 | /* | ||
| 3656 | * some fairly slow code that needs optimization. This walks the list | ||
| 3657 | * of all the inodes with pending delalloc and forces them to disk. | ||
| 3658 | */ | ||
| 3659 | int btrfs_start_delalloc_inodes(struct btrfs_root *root) | ||
| 3660 | { | ||
| 3661 | struct list_head *head = &root->fs_info->delalloc_inodes; | ||
| 3662 | struct btrfs_inode *binode; | ||
| 3663 | struct inode *inode; | ||
| 3664 | unsigned long flags; | ||
| 3665 | |||
| 3666 | spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); | ||
| 3667 | while(!list_empty(head)) { | ||
| 3668 | binode = list_entry(head->next, struct btrfs_inode, | ||
| 3669 | delalloc_inodes); | ||
| 3670 | inode = igrab(&binode->vfs_inode); | ||
| 3671 | if (!inode) | ||
| 3672 | list_del_init(&binode->delalloc_inodes); | ||
| 3673 | spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); | ||
| 3674 | if (inode) { | ||
| 3675 | filemap_flush(inode->i_mapping); | ||
| 3676 | iput(inode); | ||
| 3677 | } | ||
| 3678 | cond_resched(); | ||
| 3679 | spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); | ||
| 3680 | } | ||
| 3681 | spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); | ||
| 3682 | |||
| 3683 | /* the filemap_flush will queue IO into the worker threads, but | ||
| 3684 | * we have to make sure the IO is actually started and that | ||
| 3685 | * ordered extents get created before we return | ||
| 3686 | */ | ||
| 3687 | atomic_inc(&root->fs_info->async_submit_draining); | ||
| 3688 | while(atomic_read(&root->fs_info->nr_async_submits)) { | ||
| 3689 | wait_event(root->fs_info->async_submit_wait, | ||
| 3690 | (atomic_read(&root->fs_info->nr_async_submits) == 0)); | ||
| 3691 | } | ||
| 3692 | atomic_dec(&root->fs_info->async_submit_draining); | ||
| 3693 | return 0; | ||
| 3694 | } | ||
| 3695 | |||
| 3696 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | ||
| 3697 | const char *symname) | ||
| 3698 | { | ||
| 3699 | struct btrfs_trans_handle *trans; | ||
| 3700 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 3701 | struct btrfs_path *path; | ||
| 3702 | struct btrfs_key key; | ||
| 3703 | struct inode *inode = NULL; | ||
| 3704 | int err; | ||
| 3705 | int drop_inode = 0; | ||
| 3706 | u64 objectid; | ||
| 3707 | u64 index = 0 ; | ||
| 3708 | int name_len; | ||
| 3709 | int datasize; | ||
| 3710 | unsigned long ptr; | ||
| 3711 | struct btrfs_file_extent_item *ei; | ||
| 3712 | struct extent_buffer *leaf; | ||
| 3713 | unsigned long nr = 0; | ||
| 3714 | |||
| 3715 | name_len = strlen(symname) + 1; | ||
| 3716 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | ||
| 3717 | return -ENAMETOOLONG; | ||
| 3718 | |||
| 3719 | err = btrfs_check_free_space(root, 1, 0); | ||
| 3720 | if (err) | ||
| 3721 | goto out_fail; | ||
| 3722 | |||
| 3723 | trans = btrfs_start_transaction(root, 1); | ||
| 3724 | btrfs_set_trans_block_group(trans, dir); | ||
| 3725 | |||
| 3726 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 3727 | if (err) { | ||
| 3728 | err = -ENOSPC; | ||
| 3729 | goto out_unlock; | ||
| 3730 | } | ||
| 3731 | |||
| 3732 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | ||
| 3733 | dentry->d_name.len, | ||
| 3734 | dentry->d_parent->d_inode->i_ino, objectid, | ||
| 3735 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, | ||
| 3736 | &index); | ||
| 3737 | err = PTR_ERR(inode); | ||
| 3738 | if (IS_ERR(inode)) | ||
| 3739 | goto out_unlock; | ||
| 3740 | |||
| 3741 | err = btrfs_init_acl(inode, dir); | ||
| 3742 | if (err) { | ||
| 3743 | drop_inode = 1; | ||
| 3744 | goto out_unlock; | ||
| 3745 | } | ||
| 3746 | |||
| 3747 | btrfs_set_trans_block_group(trans, inode); | ||
| 3748 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | ||
| 3749 | if (err) | ||
| 3750 | drop_inode = 1; | ||
| 3751 | else { | ||
| 3752 | inode->i_mapping->a_ops = &btrfs_aops; | ||
| 3753 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
| 3754 | inode->i_fop = &btrfs_file_operations; | ||
| 3755 | inode->i_op = &btrfs_file_inode_operations; | ||
| 3756 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | ||
| 3757 | } | ||
| 3758 | dir->i_sb->s_dirt = 1; | ||
| 3759 | btrfs_update_inode_block_group(trans, inode); | ||
| 3760 | btrfs_update_inode_block_group(trans, dir); | ||
| 3761 | if (drop_inode) | ||
| 3762 | goto out_unlock; | ||
| 3763 | |||
| 3764 | path = btrfs_alloc_path(); | ||
| 3765 | BUG_ON(!path); | ||
| 3766 | key.objectid = inode->i_ino; | ||
| 3767 | key.offset = 0; | ||
| 3768 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | ||
| 3769 | datasize = btrfs_file_extent_calc_inline_size(name_len); | ||
| 3770 | err = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 3771 | datasize); | ||
| 3772 | if (err) { | ||
| 3773 | drop_inode = 1; | ||
| 3774 | goto out_unlock; | ||
| 3775 | } | ||
| 3776 | leaf = path->nodes[0]; | ||
| 3777 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3778 | struct btrfs_file_extent_item); | ||
| 3779 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); | ||
| 3780 | btrfs_set_file_extent_type(leaf, ei, | ||
| 3781 | BTRFS_FILE_EXTENT_INLINE); | ||
| 3782 | ptr = btrfs_file_extent_inline_start(ei); | ||
| 3783 | write_extent_buffer(leaf, symname, ptr, name_len); | ||
| 3784 | btrfs_mark_buffer_dirty(leaf); | ||
| 3785 | btrfs_free_path(path); | ||
| 3786 | |||
| 3787 | inode->i_op = &btrfs_symlink_inode_operations; | ||
| 3788 | inode->i_mapping->a_ops = &btrfs_symlink_aops; | ||
| 3789 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
| 3790 | btrfs_i_size_write(inode, name_len - 1); | ||
| 3791 | err = btrfs_update_inode(trans, root, inode); | ||
| 3792 | if (err) | ||
| 3793 | drop_inode = 1; | ||
| 3794 | |||
| 3795 | out_unlock: | ||
| 3796 | nr = trans->blocks_used; | ||
| 3797 | btrfs_end_transaction_throttle(trans, root); | ||
| 3798 | out_fail: | ||
| 3799 | if (drop_inode) { | ||
| 3800 | inode_dec_link_count(inode); | ||
| 3801 | iput(inode); | ||
| 3802 | } | ||
| 3803 | btrfs_btree_balance_dirty(root, nr); | ||
| 3804 | return err; | ||
| 3805 | } | ||
| 3806 | |||
| 3807 | static int btrfs_set_page_dirty(struct page *page) | ||
| 3808 | { | ||
| 3809 | return __set_page_dirty_nobuffers(page); | ||
| 3810 | } | ||
| 3811 | |||
| 3812 | static int btrfs_permission(struct inode *inode, int mask) | ||
| 3813 | { | ||
| 3814 | if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) | ||
| 3815 | return -EACCES; | ||
| 3816 | return generic_permission(inode, mask, btrfs_check_acl); | ||
| 3817 | } | ||
| 3818 | |||
| 3819 | static struct inode_operations btrfs_dir_inode_operations = { | ||
| 3820 | .lookup = btrfs_lookup, | ||
| 3821 | .create = btrfs_create, | ||
| 3822 | .unlink = btrfs_unlink, | ||
| 3823 | .link = btrfs_link, | ||
| 3824 | .mkdir = btrfs_mkdir, | ||
| 3825 | .rmdir = btrfs_rmdir, | ||
| 3826 | .rename = btrfs_rename, | ||
| 3827 | .symlink = btrfs_symlink, | ||
| 3828 | .setattr = btrfs_setattr, | ||
| 3829 | .mknod = btrfs_mknod, | ||
| 3830 | .setxattr = btrfs_setxattr, | ||
| 3831 | .getxattr = btrfs_getxattr, | ||
| 3832 | .listxattr = btrfs_listxattr, | ||
| 3833 | .removexattr = btrfs_removexattr, | ||
| 3834 | .permission = btrfs_permission, | ||
| 3835 | }; | ||
| 3836 | static struct inode_operations btrfs_dir_ro_inode_operations = { | ||
| 3837 | .lookup = btrfs_lookup, | ||
| 3838 | .permission = btrfs_permission, | ||
| 3839 | }; | ||
| 3840 | static struct file_operations btrfs_dir_file_operations = { | ||
| 3841 | .llseek = generic_file_llseek, | ||
| 3842 | .read = generic_read_dir, | ||
| 3843 | .readdir = btrfs_real_readdir, | ||
| 3844 | .unlocked_ioctl = btrfs_ioctl, | ||
| 3845 | #ifdef CONFIG_COMPAT | ||
| 3846 | .compat_ioctl = btrfs_ioctl, | ||
| 3847 | #endif | ||
| 3848 | .release = btrfs_release_file, | ||
| 3849 | .fsync = btrfs_sync_file, | ||
| 3850 | }; | ||
| 3851 | |||
| 3852 | static struct extent_io_ops btrfs_extent_io_ops = { | ||
| 3853 | .fill_delalloc = run_delalloc_range, | ||
| 3854 | .submit_bio_hook = btrfs_submit_bio_hook, | ||
| 3855 | .merge_bio_hook = btrfs_merge_bio_hook, | ||
| 3856 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, | ||
| 3857 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, | ||
| 3858 | .writepage_start_hook = btrfs_writepage_start_hook, | ||
| 3859 | .readpage_io_failed_hook = btrfs_io_failed_hook, | ||
| 3860 | .set_bit_hook = btrfs_set_bit_hook, | ||
| 3861 | .clear_bit_hook = btrfs_clear_bit_hook, | ||
| 3862 | }; | ||
| 3863 | |||
| 3864 | static struct address_space_operations btrfs_aops = { | ||
| 3865 | .readpage = btrfs_readpage, | ||
| 3866 | .writepage = btrfs_writepage, | ||
| 3867 | .writepages = btrfs_writepages, | ||
| 3868 | .readpages = btrfs_readpages, | ||
| 3869 | .sync_page = block_sync_page, | ||
| 3870 | .bmap = btrfs_bmap, | ||
| 3871 | .direct_IO = btrfs_direct_IO, | ||
| 3872 | .invalidatepage = btrfs_invalidatepage, | ||
| 3873 | .releasepage = btrfs_releasepage, | ||
| 3874 | .set_page_dirty = btrfs_set_page_dirty, | ||
| 3875 | }; | ||
| 3876 | |||
| 3877 | static struct address_space_operations btrfs_symlink_aops = { | ||
| 3878 | .readpage = btrfs_readpage, | ||
| 3879 | .writepage = btrfs_writepage, | ||
| 3880 | .invalidatepage = btrfs_invalidatepage, | ||
| 3881 | .releasepage = btrfs_releasepage, | ||
| 3882 | }; | ||
| 3883 | |||
| 3884 | static struct inode_operations btrfs_file_inode_operations = { | ||
| 3885 | .truncate = btrfs_truncate, | ||
| 3886 | .getattr = btrfs_getattr, | ||
| 3887 | .setattr = btrfs_setattr, | ||
| 3888 | .setxattr = btrfs_setxattr, | ||
| 3889 | .getxattr = btrfs_getxattr, | ||
| 3890 | .listxattr = btrfs_listxattr, | ||
| 3891 | .removexattr = btrfs_removexattr, | ||
| 3892 | .permission = btrfs_permission, | ||
| 3893 | }; | ||
| 3894 | static struct inode_operations btrfs_special_inode_operations = { | ||
| 3895 | .getattr = btrfs_getattr, | ||
| 3896 | .setattr = btrfs_setattr, | ||
| 3897 | .permission = btrfs_permission, | ||
| 3898 | .setxattr = btrfs_setxattr, | ||
| 3899 | .getxattr = btrfs_getxattr, | ||
| 3900 | .listxattr = btrfs_listxattr, | ||
| 3901 | .removexattr = btrfs_removexattr, | ||
| 3902 | }; | ||
| 3903 | static struct inode_operations btrfs_symlink_inode_operations = { | ||
| 3904 | .readlink = generic_readlink, | ||
| 3905 | .follow_link = page_follow_link_light, | ||
| 3906 | .put_link = page_put_link, | ||
| 3907 | .permission = btrfs_permission, | ||
| 3908 | }; | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c new file mode 100644 index 00000000000..1136ce2febc --- /dev/null +++ b/fs/btrfs/ioctl.c | |||
| @@ -0,0 +1,863 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/kernel.h> | ||
| 20 | #include <linux/bio.h> | ||
| 21 | #include <linux/buffer_head.h> | ||
| 22 | #include <linux/file.h> | ||
| 23 | #include <linux/fs.h> | ||
| 24 | #include <linux/fsnotify.h> | ||
| 25 | #include <linux/pagemap.h> | ||
| 26 | #include <linux/highmem.h> | ||
| 27 | #include <linux/time.h> | ||
| 28 | #include <linux/init.h> | ||
| 29 | #include <linux/string.h> | ||
| 30 | #include <linux/smp_lock.h> | ||
| 31 | #include <linux/backing-dev.h> | ||
| 32 | #include <linux/mount.h> | ||
| 33 | #include <linux/mpage.h> | ||
| 34 | #include <linux/namei.h> | ||
| 35 | #include <linux/swap.h> | ||
| 36 | #include <linux/writeback.h> | ||
| 37 | #include <linux/statfs.h> | ||
| 38 | #include <linux/compat.h> | ||
| 39 | #include <linux/bit_spinlock.h> | ||
| 40 | #include <linux/security.h> | ||
| 41 | #include <linux/version.h> | ||
| 42 | #include <linux/xattr.h> | ||
| 43 | #include <linux/vmalloc.h> | ||
| 44 | #include "ctree.h" | ||
| 45 | #include "disk-io.h" | ||
| 46 | #include "transaction.h" | ||
| 47 | #include "btrfs_inode.h" | ||
| 48 | #include "ioctl.h" | ||
| 49 | #include "print-tree.h" | ||
| 50 | #include "volumes.h" | ||
| 51 | #include "locking.h" | ||
| 52 | |||
| 53 | |||
| 54 | |||
| 55 | static noinline int create_subvol(struct btrfs_root *root, | ||
| 56 | struct dentry *dentry, | ||
| 57 | char *name, int namelen) | ||
| 58 | { | ||
| 59 | struct btrfs_trans_handle *trans; | ||
| 60 | struct btrfs_key key; | ||
| 61 | struct btrfs_root_item root_item; | ||
| 62 | struct btrfs_inode_item *inode_item; | ||
| 63 | struct extent_buffer *leaf; | ||
| 64 | struct btrfs_root *new_root = root; | ||
| 65 | struct inode *dir; | ||
| 66 | int ret; | ||
| 67 | int err; | ||
| 68 | u64 objectid; | ||
| 69 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | ||
| 70 | unsigned long nr = 1; | ||
| 71 | |||
| 72 | ret = btrfs_check_free_space(root, 1, 0); | ||
| 73 | if (ret) | ||
| 74 | goto fail_commit; | ||
| 75 | |||
| 76 | trans = btrfs_start_transaction(root, 1); | ||
| 77 | BUG_ON(!trans); | ||
| 78 | |||
| 79 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | ||
| 80 | 0, &objectid); | ||
| 81 | if (ret) | ||
| 82 | goto fail; | ||
| 83 | |||
| 84 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | ||
| 85 | objectid, trans->transid, 0, 0, 0); | ||
| 86 | if (IS_ERR(leaf)) { | ||
| 87 | ret = PTR_ERR(leaf); | ||
| 88 | goto fail; | ||
| 89 | } | ||
| 90 | |||
| 91 | btrfs_set_header_nritems(leaf, 0); | ||
| 92 | btrfs_set_header_level(leaf, 0); | ||
| 93 | btrfs_set_header_bytenr(leaf, leaf->start); | ||
| 94 | btrfs_set_header_generation(leaf, trans->transid); | ||
| 95 | btrfs_set_header_owner(leaf, objectid); | ||
| 96 | |||
| 97 | write_extent_buffer(leaf, root->fs_info->fsid, | ||
| 98 | (unsigned long)btrfs_header_fsid(leaf), | ||
| 99 | BTRFS_FSID_SIZE); | ||
| 100 | btrfs_mark_buffer_dirty(leaf); | ||
| 101 | |||
| 102 | inode_item = &root_item.inode; | ||
| 103 | memset(inode_item, 0, sizeof(*inode_item)); | ||
| 104 | inode_item->generation = cpu_to_le64(1); | ||
| 105 | inode_item->size = cpu_to_le64(3); | ||
| 106 | inode_item->nlink = cpu_to_le32(1); | ||
| 107 | inode_item->nbytes = cpu_to_le64(root->leafsize); | ||
| 108 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); | ||
| 109 | |||
| 110 | btrfs_set_root_bytenr(&root_item, leaf->start); | ||
| 111 | btrfs_set_root_level(&root_item, 0); | ||
| 112 | btrfs_set_root_refs(&root_item, 1); | ||
| 113 | btrfs_set_root_used(&root_item, 0); | ||
| 114 | |||
| 115 | memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); | ||
| 116 | root_item.drop_level = 0; | ||
| 117 | |||
| 118 | btrfs_tree_unlock(leaf); | ||
| 119 | free_extent_buffer(leaf); | ||
| 120 | leaf = NULL; | ||
| 121 | |||
| 122 | btrfs_set_root_dirid(&root_item, new_dirid); | ||
| 123 | |||
| 124 | key.objectid = objectid; | ||
| 125 | key.offset = 1; | ||
| 126 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 127 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | ||
| 128 | &root_item); | ||
| 129 | if (ret) | ||
| 130 | goto fail; | ||
| 131 | |||
| 132 | /* | ||
| 133 | * insert the directory item | ||
| 134 | */ | ||
| 135 | key.offset = (u64)-1; | ||
| 136 | dir = root->fs_info->sb->s_root->d_inode; | ||
| 137 | ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, | ||
| 138 | name, namelen, dir->i_ino, &key, | ||
| 139 | BTRFS_FT_DIR, 0); | ||
| 140 | if (ret) | ||
| 141 | goto fail; | ||
| 142 | |||
| 143 | ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, | ||
| 144 | name, namelen, objectid, | ||
| 145 | root->fs_info->sb->s_root->d_inode->i_ino, 0); | ||
| 146 | if (ret) | ||
| 147 | goto fail; | ||
| 148 | |||
| 149 | ret = btrfs_commit_transaction(trans, root); | ||
| 150 | if (ret) | ||
| 151 | goto fail_commit; | ||
| 152 | |||
| 153 | new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); | ||
| 154 | BUG_ON(!new_root); | ||
| 155 | |||
| 156 | trans = btrfs_start_transaction(new_root, 1); | ||
| 157 | BUG_ON(!trans); | ||
| 158 | |||
| 159 | ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid, | ||
| 160 | BTRFS_I(dir)->block_group); | ||
| 161 | if (ret) | ||
| 162 | goto fail; | ||
| 163 | |||
| 164 | fail: | ||
| 165 | nr = trans->blocks_used; | ||
| 166 | err = btrfs_commit_transaction(trans, new_root); | ||
| 167 | if (err && !ret) | ||
| 168 | ret = err; | ||
| 169 | fail_commit: | ||
| 170 | btrfs_btree_balance_dirty(root, nr); | ||
| 171 | return ret; | ||
| 172 | } | ||
| 173 | |||
| 174 | static int create_snapshot(struct btrfs_root *root, char *name, int namelen) | ||
| 175 | { | ||
| 176 | struct btrfs_pending_snapshot *pending_snapshot; | ||
| 177 | struct btrfs_trans_handle *trans; | ||
| 178 | int ret; | ||
| 179 | int err; | ||
| 180 | unsigned long nr = 0; | ||
| 181 | |||
| 182 | if (!root->ref_cows) | ||
| 183 | return -EINVAL; | ||
| 184 | |||
| 185 | ret = btrfs_check_free_space(root, 1, 0); | ||
| 186 | if (ret) | ||
| 187 | goto fail_unlock; | ||
| 188 | |||
| 189 | pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS); | ||
| 190 | if (!pending_snapshot) { | ||
| 191 | ret = -ENOMEM; | ||
| 192 | goto fail_unlock; | ||
| 193 | } | ||
| 194 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | ||
| 195 | if (!pending_snapshot->name) { | ||
| 196 | ret = -ENOMEM; | ||
| 197 | kfree(pending_snapshot); | ||
| 198 | goto fail_unlock; | ||
| 199 | } | ||
| 200 | memcpy(pending_snapshot->name, name, namelen); | ||
| 201 | pending_snapshot->name[namelen] = '\0'; | ||
| 202 | trans = btrfs_start_transaction(root, 1); | ||
| 203 | BUG_ON(!trans); | ||
| 204 | pending_snapshot->root = root; | ||
| 205 | list_add(&pending_snapshot->list, | ||
| 206 | &trans->transaction->pending_snapshots); | ||
| 207 | ret = btrfs_update_inode(trans, root, root->inode); | ||
| 208 | err = btrfs_commit_transaction(trans, root); | ||
| 209 | |||
| 210 | fail_unlock: | ||
| 211 | btrfs_btree_balance_dirty(root, nr); | ||
| 212 | return ret; | ||
| 213 | } | ||
| 214 | |||
| 215 | /* copy of may_create in fs/namei.c() */ | ||
| 216 | static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | ||
| 217 | { | ||
| 218 | if (child->d_inode) | ||
| 219 | return -EEXIST; | ||
| 220 | if (IS_DEADDIR(dir)) | ||
| 221 | return -ENOENT; | ||
| 222 | return inode_permission(dir, MAY_WRITE | MAY_EXEC); | ||
| 223 | } | ||
| 224 | |||
| 225 | /* | ||
| 226 | * Create a new subvolume below @parent. This is largely modeled after | ||
| 227 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup | ||
| 228 | * inside this filesystem so it's quite a bit simpler. | ||
| 229 | */ | ||
| 230 | static noinline int btrfs_mksubvol(struct path *parent, char *name, | ||
| 231 | int mode, int namelen) | ||
| 232 | { | ||
| 233 | struct dentry *dentry; | ||
| 234 | int error; | ||
| 235 | |||
| 236 | mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); | ||
| 237 | |||
| 238 | dentry = lookup_one_len(name, parent->dentry, namelen); | ||
| 239 | error = PTR_ERR(dentry); | ||
| 240 | if (IS_ERR(dentry)) | ||
| 241 | goto out_unlock; | ||
| 242 | |||
| 243 | error = -EEXIST; | ||
| 244 | if (dentry->d_inode) | ||
| 245 | goto out_dput; | ||
| 246 | |||
| 247 | if (!IS_POSIXACL(parent->dentry->d_inode)) | ||
| 248 | mode &= ~current->fs->umask; | ||
| 249 | error = mnt_want_write(parent->mnt); | ||
| 250 | if (error) | ||
| 251 | goto out_dput; | ||
| 252 | |||
| 253 | error = btrfs_may_create(parent->dentry->d_inode, dentry); | ||
| 254 | if (error) | ||
| 255 | goto out_drop_write; | ||
| 256 | |||
| 257 | /* | ||
| 258 | * Actually perform the low-level subvolume creation after all | ||
| 259 | * this VFS fuzz. | ||
| 260 | * | ||
| 261 | * Eventually we want to pass in an inode under which we create this | ||
| 262 | * subvolume, but for now all are under the filesystem root. | ||
| 263 | * | ||
| 264 | * Also we should pass on the mode eventually to allow creating new | ||
| 265 | * subvolume with specific mode bits. | ||
| 266 | */ | ||
| 267 | error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, dentry, | ||
| 268 | name, namelen); | ||
| 269 | if (error) | ||
| 270 | goto out_drop_write; | ||
| 271 | |||
| 272 | fsnotify_mkdir(parent->dentry->d_inode, dentry); | ||
| 273 | out_drop_write: | ||
| 274 | mnt_drop_write(parent->mnt); | ||
| 275 | out_dput: | ||
| 276 | dput(dentry); | ||
| 277 | out_unlock: | ||
| 278 | mutex_unlock(&parent->dentry->d_inode->i_mutex); | ||
| 279 | return error; | ||
| 280 | } | ||
| 281 | |||
| 282 | |||
| 283 | int btrfs_defrag_file(struct file *file) | ||
| 284 | { | ||
| 285 | struct inode *inode = fdentry(file)->d_inode; | ||
| 286 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 287 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 288 | struct btrfs_ordered_extent *ordered; | ||
| 289 | struct page *page; | ||
| 290 | unsigned long last_index; | ||
| 291 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | ||
| 292 | unsigned long total_read = 0; | ||
| 293 | u64 page_start; | ||
| 294 | u64 page_end; | ||
| 295 | unsigned long i; | ||
| 296 | int ret; | ||
| 297 | |||
| 298 | ret = btrfs_check_free_space(root, inode->i_size, 0); | ||
| 299 | if (ret) | ||
| 300 | return -ENOSPC; | ||
| 301 | |||
| 302 | mutex_lock(&inode->i_mutex); | ||
| 303 | last_index = inode->i_size >> PAGE_CACHE_SHIFT; | ||
| 304 | for (i = 0; i <= last_index; i++) { | ||
| 305 | if (total_read % ra_pages == 0) { | ||
| 306 | btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, | ||
| 307 | min(last_index, i + ra_pages - 1)); | ||
| 308 | } | ||
| 309 | total_read++; | ||
| 310 | again: | ||
| 311 | page = grab_cache_page(inode->i_mapping, i); | ||
| 312 | if (!page) | ||
| 313 | goto out_unlock; | ||
| 314 | if (!PageUptodate(page)) { | ||
| 315 | btrfs_readpage(NULL, page); | ||
| 316 | lock_page(page); | ||
| 317 | if (!PageUptodate(page)) { | ||
| 318 | unlock_page(page); | ||
| 319 | page_cache_release(page); | ||
| 320 | goto out_unlock; | ||
| 321 | } | ||
| 322 | } | ||
| 323 | |||
| 324 | wait_on_page_writeback(page); | ||
| 325 | |||
| 326 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 327 | page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
| 328 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 329 | |||
| 330 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
| 331 | if (ordered) { | ||
| 332 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 333 | unlock_page(page); | ||
| 334 | page_cache_release(page); | ||
| 335 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 336 | btrfs_put_ordered_extent(ordered); | ||
| 337 | goto again; | ||
| 338 | } | ||
| 339 | set_page_extent_mapped(page); | ||
| 340 | |||
| 341 | /* | ||
| 342 | * this makes sure page_mkwrite is called on the | ||
| 343 | * page if it is dirtied again later | ||
| 344 | */ | ||
| 345 | clear_page_dirty_for_io(page); | ||
| 346 | |||
| 347 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 348 | |||
| 349 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 350 | set_page_dirty(page); | ||
| 351 | unlock_page(page); | ||
| 352 | page_cache_release(page); | ||
| 353 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | ||
| 354 | } | ||
| 355 | |||
| 356 | out_unlock: | ||
| 357 | mutex_unlock(&inode->i_mutex); | ||
| 358 | return 0; | ||
| 359 | } | ||
| 360 | |||
| 361 | /* | ||
| 362 | * Called inside transaction, so use GFP_NOFS | ||
| 363 | */ | ||
| 364 | |||
| 365 | static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | ||
| 366 | { | ||
| 367 | u64 new_size; | ||
| 368 | u64 old_size; | ||
| 369 | u64 devid = 1; | ||
| 370 | struct btrfs_ioctl_vol_args *vol_args; | ||
| 371 | struct btrfs_trans_handle *trans; | ||
| 372 | struct btrfs_device *device = NULL; | ||
| 373 | char *sizestr; | ||
| 374 | char *devstr = NULL; | ||
| 375 | int ret = 0; | ||
| 376 | int namelen; | ||
| 377 | int mod = 0; | ||
| 378 | |||
| 379 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | ||
| 380 | |||
| 381 | if (!vol_args) | ||
| 382 | return -ENOMEM; | ||
| 383 | |||
| 384 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
| 385 | ret = -EFAULT; | ||
| 386 | goto out; | ||
| 387 | } | ||
| 388 | |||
| 389 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
| 390 | namelen = strlen(vol_args->name); | ||
| 391 | |||
| 392 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 393 | sizestr = vol_args->name; | ||
| 394 | devstr = strchr(sizestr, ':'); | ||
| 395 | if (devstr) { | ||
| 396 | char *end; | ||
| 397 | sizestr = devstr + 1; | ||
| 398 | *devstr = '\0'; | ||
| 399 | devstr = vol_args->name; | ||
| 400 | devid = simple_strtoull(devstr, &end, 10); | ||
| 401 | printk(KERN_INFO "resizing devid %llu\n", devid); | ||
| 402 | } | ||
| 403 | device = btrfs_find_device(root, devid, NULL); | ||
| 404 | if (!device) { | ||
| 405 | printk(KERN_INFO "resizer unable to find device %llu\n", devid); | ||
| 406 | ret = -EINVAL; | ||
| 407 | goto out_unlock; | ||
| 408 | } | ||
| 409 | if (!strcmp(sizestr, "max")) | ||
| 410 | new_size = device->bdev->bd_inode->i_size; | ||
| 411 | else { | ||
| 412 | if (sizestr[0] == '-') { | ||
| 413 | mod = -1; | ||
| 414 | sizestr++; | ||
| 415 | } else if (sizestr[0] == '+') { | ||
| 416 | mod = 1; | ||
| 417 | sizestr++; | ||
| 418 | } | ||
| 419 | new_size = btrfs_parse_size(sizestr); | ||
| 420 | if (new_size == 0) { | ||
| 421 | ret = -EINVAL; | ||
| 422 | goto out_unlock; | ||
| 423 | } | ||
| 424 | } | ||
| 425 | |||
| 426 | old_size = device->total_bytes; | ||
| 427 | |||
| 428 | if (mod < 0) { | ||
| 429 | if (new_size > old_size) { | ||
| 430 | ret = -EINVAL; | ||
| 431 | goto out_unlock; | ||
| 432 | } | ||
| 433 | new_size = old_size - new_size; | ||
| 434 | } else if (mod > 0) { | ||
| 435 | new_size = old_size + new_size; | ||
| 436 | } | ||
| 437 | |||
| 438 | if (new_size < 256 * 1024 * 1024) { | ||
| 439 | ret = -EINVAL; | ||
| 440 | goto out_unlock; | ||
| 441 | } | ||
| 442 | if (new_size > device->bdev->bd_inode->i_size) { | ||
| 443 | ret = -EFBIG; | ||
| 444 | goto out_unlock; | ||
| 445 | } | ||
| 446 | |||
| 447 | do_div(new_size, root->sectorsize); | ||
| 448 | new_size *= root->sectorsize; | ||
| 449 | |||
| 450 | printk(KERN_INFO "new size for %s is %llu\n", | ||
| 451 | device->name, (unsigned long long)new_size); | ||
| 452 | |||
| 453 | if (new_size > old_size) { | ||
| 454 | trans = btrfs_start_transaction(root, 1); | ||
| 455 | ret = btrfs_grow_device(trans, device, new_size); | ||
| 456 | btrfs_commit_transaction(trans, root); | ||
| 457 | } else { | ||
| 458 | ret = btrfs_shrink_device(device, new_size); | ||
| 459 | } | ||
| 460 | |||
| 461 | out_unlock: | ||
| 462 | mutex_unlock(&root->fs_info->volume_mutex); | ||
| 463 | out: | ||
| 464 | kfree(vol_args); | ||
| 465 | return ret; | ||
| 466 | } | ||
| 467 | |||
| 468 | static noinline int btrfs_ioctl_snap_create(struct file *file, | ||
| 469 | void __user *arg) | ||
| 470 | { | ||
| 471 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 472 | struct btrfs_ioctl_vol_args *vol_args; | ||
| 473 | struct btrfs_dir_item *di; | ||
| 474 | struct btrfs_path *path; | ||
| 475 | u64 root_dirid; | ||
| 476 | int namelen; | ||
| 477 | int ret; | ||
| 478 | |||
| 479 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | ||
| 480 | |||
| 481 | if (!vol_args) | ||
| 482 | return -ENOMEM; | ||
| 483 | |||
| 484 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
| 485 | ret = -EFAULT; | ||
| 486 | goto out; | ||
| 487 | } | ||
| 488 | |||
| 489 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
| 490 | namelen = strlen(vol_args->name); | ||
| 491 | if (strchr(vol_args->name, '/')) { | ||
| 492 | ret = -EINVAL; | ||
| 493 | goto out; | ||
| 494 | } | ||
| 495 | |||
| 496 | path = btrfs_alloc_path(); | ||
| 497 | if (!path) { | ||
| 498 | ret = -ENOMEM; | ||
| 499 | goto out; | ||
| 500 | } | ||
| 501 | |||
| 502 | root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, | ||
| 503 | di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, | ||
| 504 | path, root_dirid, | ||
| 505 | vol_args->name, namelen, 0); | ||
| 506 | btrfs_free_path(path); | ||
| 507 | |||
| 508 | if (di && !IS_ERR(di)) { | ||
| 509 | ret = -EEXIST; | ||
| 510 | goto out; | ||
| 511 | } | ||
| 512 | |||
| 513 | if (IS_ERR(di)) { | ||
| 514 | ret = PTR_ERR(di); | ||
| 515 | goto out; | ||
| 516 | } | ||
| 517 | |||
| 518 | if (root == root->fs_info->tree_root) { | ||
| 519 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | ||
| 520 | file->f_path.dentry->d_inode->i_mode, | ||
| 521 | namelen); | ||
| 522 | } else { | ||
| 523 | ret = create_snapshot(root, vol_args->name, namelen); | ||
| 524 | } | ||
| 525 | |||
| 526 | out: | ||
| 527 | kfree(vol_args); | ||
| 528 | return ret; | ||
| 529 | } | ||
| 530 | |||
| 531 | static int btrfs_ioctl_defrag(struct file *file) | ||
| 532 | { | ||
| 533 | struct inode *inode = fdentry(file)->d_inode; | ||
| 534 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 535 | |||
| 536 | switch (inode->i_mode & S_IFMT) { | ||
| 537 | case S_IFDIR: | ||
| 538 | btrfs_defrag_root(root, 0); | ||
| 539 | btrfs_defrag_root(root->fs_info->extent_root, 0); | ||
| 540 | break; | ||
| 541 | case S_IFREG: | ||
| 542 | btrfs_defrag_file(file); | ||
| 543 | break; | ||
| 544 | } | ||
| 545 | |||
| 546 | return 0; | ||
| 547 | } | ||
| 548 | |||
| 549 | long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | ||
| 550 | { | ||
| 551 | struct btrfs_ioctl_vol_args *vol_args; | ||
| 552 | int ret; | ||
| 553 | |||
| 554 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | ||
| 555 | |||
| 556 | if (!vol_args) | ||
| 557 | return -ENOMEM; | ||
| 558 | |||
| 559 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
| 560 | ret = -EFAULT; | ||
| 561 | goto out; | ||
| 562 | } | ||
| 563 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
| 564 | ret = btrfs_init_new_device(root, vol_args->name); | ||
| 565 | |||
| 566 | out: | ||
| 567 | kfree(vol_args); | ||
| 568 | return ret; | ||
| 569 | } | ||
| 570 | |||
| 571 | long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | ||
| 572 | { | ||
| 573 | struct btrfs_ioctl_vol_args *vol_args; | ||
| 574 | int ret; | ||
| 575 | |||
| 576 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | ||
| 577 | |||
| 578 | if (!vol_args) | ||
| 579 | return -ENOMEM; | ||
| 580 | |||
| 581 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
| 582 | ret = -EFAULT; | ||
| 583 | goto out; | ||
| 584 | } | ||
| 585 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
| 586 | ret = btrfs_rm_device(root, vol_args->name); | ||
| 587 | |||
| 588 | out: | ||
| 589 | kfree(vol_args); | ||
| 590 | return ret; | ||
| 591 | } | ||
| 592 | |||
| 593 | long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) | ||
| 594 | { | ||
| 595 | struct inode *inode = fdentry(file)->d_inode; | ||
| 596 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 597 | struct file *src_file; | ||
| 598 | struct inode *src; | ||
| 599 | struct btrfs_trans_handle *trans; | ||
| 600 | struct btrfs_path *path; | ||
| 601 | struct extent_buffer *leaf; | ||
| 602 | char *buf; | ||
| 603 | struct btrfs_key key; | ||
| 604 | u32 nritems; | ||
| 605 | int slot; | ||
| 606 | int ret; | ||
| 607 | |||
| 608 | src_file = fget(src_fd); | ||
| 609 | if (!src_file) | ||
| 610 | return -EBADF; | ||
| 611 | src = src_file->f_dentry->d_inode; | ||
| 612 | |||
| 613 | ret = -EISDIR; | ||
| 614 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) | ||
| 615 | goto out_fput; | ||
| 616 | |||
| 617 | ret = -EXDEV; | ||
| 618 | if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root) | ||
| 619 | goto out_fput; | ||
| 620 | |||
| 621 | ret = -ENOMEM; | ||
| 622 | buf = vmalloc(btrfs_level_size(root, 0)); | ||
| 623 | if (!buf) | ||
| 624 | goto out_fput; | ||
| 625 | |||
| 626 | path = btrfs_alloc_path(); | ||
| 627 | if (!path) { | ||
| 628 | vfree(buf); | ||
| 629 | goto out_fput; | ||
| 630 | } | ||
| 631 | path->reada = 2; | ||
| 632 | |||
| 633 | if (inode < src) { | ||
| 634 | mutex_lock(&inode->i_mutex); | ||
| 635 | mutex_lock(&src->i_mutex); | ||
| 636 | } else { | ||
| 637 | mutex_lock(&src->i_mutex); | ||
| 638 | mutex_lock(&inode->i_mutex); | ||
| 639 | } | ||
| 640 | |||
| 641 | ret = -ENOTEMPTY; | ||
| 642 | if (inode->i_size) | ||
| 643 | goto out_unlock; | ||
| 644 | |||
| 645 | /* do any pending delalloc/csum calc on src, one way or | ||
| 646 | another, and lock file content */ | ||
| 647 | while (1) { | ||
| 648 | struct btrfs_ordered_extent *ordered; | ||
| 649 | lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); | ||
| 650 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | ||
| 651 | if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) | ||
| 652 | break; | ||
| 653 | unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); | ||
| 654 | if (ordered) | ||
| 655 | btrfs_put_ordered_extent(ordered); | ||
| 656 | btrfs_wait_ordered_range(src, 0, (u64)-1); | ||
| 657 | } | ||
| 658 | |||
| 659 | trans = btrfs_start_transaction(root, 1); | ||
| 660 | BUG_ON(!trans); | ||
| 661 | |||
| 662 | key.objectid = src->i_ino; | ||
| 663 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
| 664 | key.offset = 0; | ||
| 665 | |||
| 666 | while (1) { | ||
| 667 | /* | ||
| 668 | * note the key will change type as we walk through the | ||
| 669 | * tree. | ||
| 670 | */ | ||
| 671 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | ||
| 672 | if (ret < 0) | ||
| 673 | goto out; | ||
| 674 | |||
| 675 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 676 | if (path->slots[0] >= nritems) { | ||
| 677 | ret = btrfs_next_leaf(root, path); | ||
| 678 | if (ret < 0) | ||
| 679 | goto out; | ||
| 680 | if (ret > 0) | ||
| 681 | break; | ||
| 682 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 683 | } | ||
| 684 | leaf = path->nodes[0]; | ||
| 685 | slot = path->slots[0]; | ||
| 686 | |||
| 687 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 688 | if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY || | ||
| 689 | key.objectid != src->i_ino) | ||
| 690 | break; | ||
| 691 | |||
| 692 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || | ||
| 693 | btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { | ||
| 694 | u32 size; | ||
| 695 | struct btrfs_key new_key; | ||
| 696 | |||
| 697 | size = btrfs_item_size_nr(leaf, slot); | ||
| 698 | read_extent_buffer(leaf, buf, | ||
| 699 | btrfs_item_ptr_offset(leaf, slot), | ||
| 700 | size); | ||
| 701 | btrfs_release_path(root, path); | ||
| 702 | |||
| 703 | memcpy(&new_key, &key, sizeof(new_key)); | ||
| 704 | new_key.objectid = inode->i_ino; | ||
| 705 | ret = btrfs_insert_empty_item(trans, root, path, | ||
| 706 | &new_key, size); | ||
| 707 | if (ret) | ||
| 708 | goto out; | ||
| 709 | |||
| 710 | leaf = path->nodes[0]; | ||
| 711 | slot = path->slots[0]; | ||
| 712 | write_extent_buffer(leaf, buf, | ||
| 713 | btrfs_item_ptr_offset(leaf, slot), | ||
| 714 | size); | ||
| 715 | btrfs_mark_buffer_dirty(leaf); | ||
| 716 | } | ||
| 717 | |||
| 718 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { | ||
| 719 | struct btrfs_file_extent_item *extent; | ||
| 720 | int found_type; | ||
| 721 | |||
| 722 | extent = btrfs_item_ptr(leaf, slot, | ||
| 723 | struct btrfs_file_extent_item); | ||
| 724 | found_type = btrfs_file_extent_type(leaf, extent); | ||
| 725 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
| 726 | u64 ds = btrfs_file_extent_disk_bytenr(leaf, | ||
| 727 | extent); | ||
| 728 | u64 dl = btrfs_file_extent_disk_num_bytes(leaf, | ||
| 729 | extent); | ||
| 730 | /* ds == 0 means there's a hole */ | ||
| 731 | if (ds != 0) { | ||
| 732 | ret = btrfs_inc_extent_ref(trans, root, | ||
| 733 | ds, dl, leaf->start, | ||
| 734 | root->root_key.objectid, | ||
| 735 | trans->transid, | ||
| 736 | inode->i_ino); | ||
| 737 | BUG_ON(ret); | ||
| 738 | } | ||
| 739 | } | ||
| 740 | } | ||
| 741 | btrfs_release_path(root, path); | ||
| 742 | key.offset++; | ||
| 743 | } | ||
| 744 | ret = 0; | ||
| 745 | out: | ||
| 746 | btrfs_release_path(root, path); | ||
| 747 | if (ret == 0) { | ||
| 748 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 749 | inode_set_bytes(inode, inode_get_bytes(src)); | ||
| 750 | btrfs_i_size_write(inode, src->i_size); | ||
| 751 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
| 752 | ret = btrfs_update_inode(trans, root, inode); | ||
| 753 | } | ||
| 754 | btrfs_end_transaction(trans, root); | ||
| 755 | unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); | ||
| 756 | if (ret) | ||
| 757 | vmtruncate(inode, 0); | ||
| 758 | out_unlock: | ||
| 759 | mutex_unlock(&src->i_mutex); | ||
| 760 | mutex_unlock(&inode->i_mutex); | ||
| 761 | vfree(buf); | ||
| 762 | btrfs_free_path(path); | ||
| 763 | out_fput: | ||
| 764 | fput(src_file); | ||
| 765 | return ret; | ||
| 766 | } | ||
| 767 | |||
| 768 | /* | ||
| 769 | * there are many ways the trans_start and trans_end ioctls can lead | ||
| 770 | * to deadlocks. They should only be used by applications that | ||
| 771 | * basically own the machine, and have a very in depth understanding | ||
| 772 | * of all the possible deadlocks and enospc problems. | ||
| 773 | */ | ||
| 774 | long btrfs_ioctl_trans_start(struct file *file) | ||
| 775 | { | ||
| 776 | struct inode *inode = fdentry(file)->d_inode; | ||
| 777 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 778 | struct btrfs_trans_handle *trans; | ||
| 779 | int ret = 0; | ||
| 780 | |||
| 781 | if (!capable(CAP_SYS_ADMIN)) | ||
| 782 | return -EPERM; | ||
| 783 | |||
| 784 | if (file->private_data) { | ||
| 785 | ret = -EINPROGRESS; | ||
| 786 | goto out; | ||
| 787 | } | ||
| 788 | |||
| 789 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 790 | root->fs_info->open_ioctl_trans++; | ||
| 791 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 792 | |||
| 793 | trans = btrfs_start_ioctl_transaction(root, 0); | ||
| 794 | if (trans) | ||
| 795 | file->private_data = trans; | ||
| 796 | else | ||
| 797 | ret = -ENOMEM; | ||
| 798 | /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ | ||
| 799 | out: | ||
| 800 | return ret; | ||
| 801 | } | ||
| 802 | |||
| 803 | /* | ||
| 804 | * there are many ways the trans_start and trans_end ioctls can lead | ||
| 805 | * to deadlocks. They should only be used by applications that | ||
| 806 | * basically own the machine, and have a very in depth understanding | ||
| 807 | * of all the possible deadlocks and enospc problems. | ||
| 808 | */ | ||
| 809 | long btrfs_ioctl_trans_end(struct file *file) | ||
| 810 | { | ||
| 811 | struct inode *inode = fdentry(file)->d_inode; | ||
| 812 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 813 | struct btrfs_trans_handle *trans; | ||
| 814 | int ret = 0; | ||
| 815 | |||
| 816 | trans = file->private_data; | ||
| 817 | if (!trans) { | ||
| 818 | ret = -EINVAL; | ||
| 819 | goto out; | ||
| 820 | } | ||
| 821 | btrfs_end_transaction(trans, root); | ||
| 822 | file->private_data = NULL; | ||
| 823 | |||
| 824 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 825 | root->fs_info->open_ioctl_trans--; | ||
| 826 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 827 | |||
| 828 | out: | ||
| 829 | return ret; | ||
| 830 | } | ||
| 831 | |||
| 832 | long btrfs_ioctl(struct file *file, unsigned int | ||
| 833 | cmd, unsigned long arg) | ||
| 834 | { | ||
| 835 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 836 | |||
| 837 | switch (cmd) { | ||
| 838 | case BTRFS_IOC_SNAP_CREATE: | ||
| 839 | return btrfs_ioctl_snap_create(file, (void __user *)arg); | ||
| 840 | case BTRFS_IOC_DEFRAG: | ||
| 841 | return btrfs_ioctl_defrag(file); | ||
| 842 | case BTRFS_IOC_RESIZE: | ||
| 843 | return btrfs_ioctl_resize(root, (void __user *)arg); | ||
| 844 | case BTRFS_IOC_ADD_DEV: | ||
| 845 | return btrfs_ioctl_add_dev(root, (void __user *)arg); | ||
| 846 | case BTRFS_IOC_RM_DEV: | ||
| 847 | return btrfs_ioctl_rm_dev(root, (void __user *)arg); | ||
| 848 | case BTRFS_IOC_BALANCE: | ||
| 849 | return btrfs_balance(root->fs_info->dev_root); | ||
| 850 | case BTRFS_IOC_CLONE: | ||
| 851 | return btrfs_ioctl_clone(file, arg); | ||
| 852 | case BTRFS_IOC_TRANS_START: | ||
| 853 | return btrfs_ioctl_trans_start(file); | ||
| 854 | case BTRFS_IOC_TRANS_END: | ||
| 855 | return btrfs_ioctl_trans_end(file); | ||
| 856 | case BTRFS_IOC_SYNC: | ||
| 857 | btrfs_start_delalloc_inodes(root); | ||
| 858 | btrfs_sync_fs(file->f_dentry->d_sb, 1); | ||
| 859 | return 0; | ||
| 860 | } | ||
| 861 | |||
| 862 | return -ENOTTY; | ||
| 863 | } | ||
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h new file mode 100644 index 00000000000..85ed35a775b --- /dev/null +++ b/fs/btrfs/ioctl.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __IOCTL_ | ||
| 20 | #define __IOCTL_ | ||
| 21 | #include <linux/ioctl.h> | ||
| 22 | |||
| 23 | #define BTRFS_IOCTL_MAGIC 0x94 | ||
| 24 | #define BTRFS_VOL_NAME_MAX 255 | ||
| 25 | #define BTRFS_PATH_NAME_MAX 4095 | ||
| 26 | |||
| 27 | struct btrfs_ioctl_vol_args { | ||
| 28 | char name[BTRFS_PATH_NAME_MAX + 1]; | ||
| 29 | }; | ||
| 30 | |||
| 31 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ | ||
| 32 | struct btrfs_ioctl_vol_args) | ||
| 33 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ | ||
| 34 | struct btrfs_ioctl_vol_args) | ||
| 35 | #define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ | ||
| 36 | struct btrfs_ioctl_vol_args) | ||
| 37 | #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ | ||
| 38 | struct btrfs_ioctl_vol_args) | ||
| 39 | /* trans start and trans end are dangerous, and only for | ||
| 40 | * use by applications that know how to avoid the | ||
| 41 | * resulting deadlocks | ||
| 42 | */ | ||
| 43 | #define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) | ||
| 44 | #define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) | ||
| 45 | #define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) | ||
| 46 | |||
| 47 | #define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) | ||
| 48 | #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ | ||
| 49 | struct btrfs_ioctl_vol_args) | ||
| 50 | #define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ | ||
| 51 | struct btrfs_ioctl_vol_args) | ||
| 52 | #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ | ||
| 53 | struct btrfs_ioctl_vol_args) | ||
| 54 | |||
| 55 | #endif | ||
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c new file mode 100644 index 00000000000..e30aa6e2958 --- /dev/null +++ b/fs/btrfs/locking.c | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | #include <linux/sched.h> | ||
| 19 | #include <linux/gfp.h> | ||
| 20 | #include <linux/pagemap.h> | ||
| 21 | #include <linux/spinlock.h> | ||
| 22 | #include <linux/page-flags.h> | ||
| 23 | #include <asm/bug.h> | ||
| 24 | #include "ctree.h" | ||
| 25 | #include "extent_io.h" | ||
| 26 | #include "locking.h" | ||
| 27 | |||
| 28 | /* | ||
| 29 | * locks the per buffer mutex in an extent buffer. This uses adaptive locks | ||
| 30 | * and the spin is not tuned very extensively. The spinning does make a big | ||
| 31 | * difference in almost every workload, but spinning for the right amount of | ||
| 32 | * time needs some help. | ||
| 33 | * | ||
| 34 | * In general, we want to spin as long as the lock holder is doing btree searches, | ||
| 35 | * and we should give up if they are in more expensive code. | ||
| 36 | */ | ||
| 37 | int btrfs_tree_lock(struct extent_buffer *eb) | ||
| 38 | { | ||
| 39 | int i; | ||
| 40 | |||
| 41 | if (mutex_trylock(&eb->mutex)) | ||
| 42 | return 0; | ||
| 43 | for (i = 0; i < 512; i++) { | ||
| 44 | cpu_relax(); | ||
| 45 | if (mutex_trylock(&eb->mutex)) | ||
| 46 | return 0; | ||
| 47 | } | ||
| 48 | cpu_relax(); | ||
| 49 | mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); | ||
| 50 | return 0; | ||
| 51 | } | ||
| 52 | |||
| 53 | int btrfs_try_tree_lock(struct extent_buffer *eb) | ||
| 54 | { | ||
| 55 | return mutex_trylock(&eb->mutex); | ||
| 56 | } | ||
| 57 | |||
| 58 | int btrfs_tree_unlock(struct extent_buffer *eb) | ||
| 59 | { | ||
| 60 | mutex_unlock(&eb->mutex); | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | int btrfs_tree_locked(struct extent_buffer *eb) | ||
| 65 | { | ||
| 66 | return mutex_is_locked(&eb->mutex); | ||
| 67 | } | ||
| 68 | |||
| 69 | /* | ||
| 70 | * btrfs_search_slot uses this to decide if it should drop its locks | ||
| 71 | * before doing something expensive like allocating free blocks for cow. | ||
| 72 | */ | ||
| 73 | int btrfs_path_lock_waiting(struct btrfs_path *path, int level) | ||
| 74 | { | ||
| 75 | int i; | ||
| 76 | struct extent_buffer *eb; | ||
| 77 | for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { | ||
| 78 | eb = path->nodes[i]; | ||
| 79 | if (!eb) | ||
| 80 | break; | ||
| 81 | smp_mb(); | ||
| 82 | if (!list_empty(&eb->mutex.wait_list)) | ||
| 83 | return 1; | ||
| 84 | } | ||
| 85 | return 0; | ||
| 86 | } | ||
| 87 | |||
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h new file mode 100644 index 00000000000..bc1faef1251 --- /dev/null +++ b/fs/btrfs/locking.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_LOCKING_ | ||
| 20 | #define __BTRFS_LOCKING_ | ||
| 21 | |||
| 22 | int btrfs_tree_lock(struct extent_buffer *eb); | ||
| 23 | int btrfs_tree_unlock(struct extent_buffer *eb); | ||
| 24 | int btrfs_tree_locked(struct extent_buffer *eb); | ||
| 25 | int btrfs_try_tree_lock(struct extent_buffer *eb); | ||
| 26 | int btrfs_path_lock_waiting(struct btrfs_path *path, int level); | ||
| 27 | #endif | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c new file mode 100644 index 00000000000..2eb6caba57c --- /dev/null +++ b/fs/btrfs/ordered-data.c | |||
| @@ -0,0 +1,727 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/gfp.h> | ||
| 20 | #include <linux/slab.h> | ||
| 21 | #include <linux/blkdev.h> | ||
| 22 | #include <linux/writeback.h> | ||
| 23 | #include <linux/pagevec.h> | ||
| 24 | #include "ctree.h" | ||
| 25 | #include "transaction.h" | ||
| 26 | #include "btrfs_inode.h" | ||
| 27 | #include "extent_io.h" | ||
| 28 | |||
| 29 | static u64 entry_end(struct btrfs_ordered_extent *entry) | ||
| 30 | { | ||
| 31 | if (entry->file_offset + entry->len < entry->file_offset) | ||
| 32 | return (u64)-1; | ||
| 33 | return entry->file_offset + entry->len; | ||
| 34 | } | ||
| 35 | |||
| 36 | /* returns NULL if the insertion worked, or it returns the node it did find | ||
| 37 | * in the tree | ||
| 38 | */ | ||
| 39 | static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, | ||
| 40 | struct rb_node *node) | ||
| 41 | { | ||
| 42 | struct rb_node ** p = &root->rb_node; | ||
| 43 | struct rb_node * parent = NULL; | ||
| 44 | struct btrfs_ordered_extent *entry; | ||
| 45 | |||
| 46 | while(*p) { | ||
| 47 | parent = *p; | ||
| 48 | entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); | ||
| 49 | |||
| 50 | if (file_offset < entry->file_offset) | ||
| 51 | p = &(*p)->rb_left; | ||
| 52 | else if (file_offset >= entry_end(entry)) | ||
| 53 | p = &(*p)->rb_right; | ||
| 54 | else | ||
| 55 | return parent; | ||
| 56 | } | ||
| 57 | |||
| 58 | rb_link_node(node, parent, p); | ||
| 59 | rb_insert_color(node, root); | ||
| 60 | return NULL; | ||
| 61 | } | ||
| 62 | |||
| 63 | /* | ||
| 64 | * look for a given offset in the tree, and if it can't be found return the | ||
| 65 | * first lesser offset | ||
| 66 | */ | ||
| 67 | static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, | ||
| 68 | struct rb_node **prev_ret) | ||
| 69 | { | ||
| 70 | struct rb_node * n = root->rb_node; | ||
| 71 | struct rb_node *prev = NULL; | ||
| 72 | struct rb_node *test; | ||
| 73 | struct btrfs_ordered_extent *entry; | ||
| 74 | struct btrfs_ordered_extent *prev_entry = NULL; | ||
| 75 | |||
| 76 | while(n) { | ||
| 77 | entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); | ||
| 78 | prev = n; | ||
| 79 | prev_entry = entry; | ||
| 80 | |||
| 81 | if (file_offset < entry->file_offset) | ||
| 82 | n = n->rb_left; | ||
| 83 | else if (file_offset >= entry_end(entry)) | ||
| 84 | n = n->rb_right; | ||
| 85 | else | ||
| 86 | return n; | ||
| 87 | } | ||
| 88 | if (!prev_ret) | ||
| 89 | return NULL; | ||
| 90 | |||
| 91 | while(prev && file_offset >= entry_end(prev_entry)) { | ||
| 92 | test = rb_next(prev); | ||
| 93 | if (!test) | ||
| 94 | break; | ||
| 95 | prev_entry = rb_entry(test, struct btrfs_ordered_extent, | ||
| 96 | rb_node); | ||
| 97 | if (file_offset < entry_end(prev_entry)) | ||
| 98 | break; | ||
| 99 | |||
| 100 | prev = test; | ||
| 101 | } | ||
| 102 | if (prev) | ||
| 103 | prev_entry = rb_entry(prev, struct btrfs_ordered_extent, | ||
| 104 | rb_node); | ||
| 105 | while(prev && file_offset < entry_end(prev_entry)) { | ||
| 106 | test = rb_prev(prev); | ||
| 107 | if (!test) | ||
| 108 | break; | ||
| 109 | prev_entry = rb_entry(test, struct btrfs_ordered_extent, | ||
| 110 | rb_node); | ||
| 111 | prev = test; | ||
| 112 | } | ||
| 113 | *prev_ret = prev; | ||
| 114 | return NULL; | ||
| 115 | } | ||
| 116 | |||
| 117 | /* | ||
| 118 | * helper to check if a given offset is inside a given entry | ||
| 119 | */ | ||
| 120 | static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | ||
| 121 | { | ||
| 122 | if (file_offset < entry->file_offset || | ||
| 123 | entry->file_offset + entry->len <= file_offset) | ||
| 124 | return 0; | ||
| 125 | return 1; | ||
| 126 | } | ||
| 127 | |||
| 128 | /* | ||
| 129 | * look find the first ordered struct that has this offset, otherwise | ||
| 130 | * the first one less than this offset | ||
| 131 | */ | ||
| 132 | static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | ||
| 133 | u64 file_offset) | ||
| 134 | { | ||
| 135 | struct rb_root *root = &tree->tree; | ||
| 136 | struct rb_node *prev; | ||
| 137 | struct rb_node *ret; | ||
| 138 | struct btrfs_ordered_extent *entry; | ||
| 139 | |||
| 140 | if (tree->last) { | ||
| 141 | entry = rb_entry(tree->last, struct btrfs_ordered_extent, | ||
| 142 | rb_node); | ||
| 143 | if (offset_in_entry(entry, file_offset)) | ||
| 144 | return tree->last; | ||
| 145 | } | ||
| 146 | ret = __tree_search(root, file_offset, &prev); | ||
| 147 | if (!ret) | ||
| 148 | ret = prev; | ||
| 149 | if (ret) | ||
| 150 | tree->last = ret; | ||
| 151 | return ret; | ||
| 152 | } | ||
| 153 | |||
| 154 | /* allocate and add a new ordered_extent into the per-inode tree. | ||
| 155 | * file_offset is the logical offset in the file | ||
| 156 | * | ||
| 157 | * start is the disk block number of an extent already reserved in the | ||
| 158 | * extent allocation tree | ||
| 159 | * | ||
| 160 | * len is the length of the extent | ||
| 161 | * | ||
| 162 | * This also sets the EXTENT_ORDERED bit on the range in the inode. | ||
| 163 | * | ||
| 164 | * The tree is given a single reference on the ordered extent that was | ||
| 165 | * inserted. | ||
| 166 | */ | ||
| 167 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
| 168 | u64 start, u64 len, int nocow) | ||
| 169 | { | ||
| 170 | struct btrfs_ordered_inode_tree *tree; | ||
| 171 | struct rb_node *node; | ||
| 172 | struct btrfs_ordered_extent *entry; | ||
| 173 | |||
| 174 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 175 | entry = kzalloc(sizeof(*entry), GFP_NOFS); | ||
| 176 | if (!entry) | ||
| 177 | return -ENOMEM; | ||
| 178 | |||
| 179 | mutex_lock(&tree->mutex); | ||
| 180 | entry->file_offset = file_offset; | ||
| 181 | entry->start = start; | ||
| 182 | entry->len = len; | ||
| 183 | entry->inode = inode; | ||
| 184 | if (nocow) | ||
| 185 | set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); | ||
| 186 | |||
| 187 | /* one ref for the tree */ | ||
| 188 | atomic_set(&entry->refs, 1); | ||
| 189 | init_waitqueue_head(&entry->wait); | ||
| 190 | INIT_LIST_HEAD(&entry->list); | ||
| 191 | INIT_LIST_HEAD(&entry->root_extent_list); | ||
| 192 | |||
| 193 | node = tree_insert(&tree->tree, file_offset, | ||
| 194 | &entry->rb_node); | ||
| 195 | if (node) { | ||
| 196 | printk("warning dup entry from add_ordered_extent\n"); | ||
| 197 | BUG(); | ||
| 198 | } | ||
| 199 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | ||
| 200 | entry_end(entry) - 1, GFP_NOFS); | ||
| 201 | |||
| 202 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
| 203 | list_add_tail(&entry->root_extent_list, | ||
| 204 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | ||
| 205 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
| 206 | |||
| 207 | mutex_unlock(&tree->mutex); | ||
| 208 | BUG_ON(node); | ||
| 209 | return 0; | ||
| 210 | } | ||
| 211 | |||
| 212 | /* | ||
| 213 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | ||
| 214 | * when an ordered extent is finished. If the list covers more than one | ||
| 215 | * ordered extent, it is split across multiples. | ||
| 216 | */ | ||
| 217 | int btrfs_add_ordered_sum(struct inode *inode, | ||
| 218 | struct btrfs_ordered_extent *entry, | ||
| 219 | struct btrfs_ordered_sum *sum) | ||
| 220 | { | ||
| 221 | struct btrfs_ordered_inode_tree *tree; | ||
| 222 | |||
| 223 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 224 | mutex_lock(&tree->mutex); | ||
| 225 | list_add_tail(&sum->list, &entry->list); | ||
| 226 | mutex_unlock(&tree->mutex); | ||
| 227 | return 0; | ||
| 228 | } | ||
| 229 | |||
| 230 | /* | ||
| 231 | * this is used to account for finished IO across a given range | ||
| 232 | * of the file. The IO should not span ordered extents. If | ||
| 233 | * a given ordered_extent is completely done, 1 is returned, otherwise | ||
| 234 | * 0. | ||
| 235 | * | ||
| 236 | * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used | ||
| 237 | * to make sure this function only returns 1 once for a given ordered extent. | ||
| 238 | */ | ||
| 239 | int btrfs_dec_test_ordered_pending(struct inode *inode, | ||
| 240 | u64 file_offset, u64 io_size) | ||
| 241 | { | ||
| 242 | struct btrfs_ordered_inode_tree *tree; | ||
| 243 | struct rb_node *node; | ||
| 244 | struct btrfs_ordered_extent *entry; | ||
| 245 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 246 | int ret; | ||
| 247 | |||
| 248 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 249 | mutex_lock(&tree->mutex); | ||
| 250 | clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, | ||
| 251 | GFP_NOFS); | ||
| 252 | node = tree_search(tree, file_offset); | ||
| 253 | if (!node) { | ||
| 254 | ret = 1; | ||
| 255 | goto out; | ||
| 256 | } | ||
| 257 | |||
| 258 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 259 | if (!offset_in_entry(entry, file_offset)) { | ||
| 260 | ret = 1; | ||
| 261 | goto out; | ||
| 262 | } | ||
| 263 | |||
| 264 | ret = test_range_bit(io_tree, entry->file_offset, | ||
| 265 | entry->file_offset + entry->len - 1, | ||
| 266 | EXTENT_ORDERED, 0); | ||
| 267 | if (ret == 0) | ||
| 268 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | ||
| 269 | out: | ||
| 270 | mutex_unlock(&tree->mutex); | ||
| 271 | return ret == 0; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * used to drop a reference on an ordered extent. This will free | ||
| 276 | * the extent if the last reference is dropped | ||
| 277 | */ | ||
| 278 | int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | ||
| 279 | { | ||
| 280 | struct list_head *cur; | ||
| 281 | struct btrfs_ordered_sum *sum; | ||
| 282 | |||
| 283 | if (atomic_dec_and_test(&entry->refs)) { | ||
| 284 | while(!list_empty(&entry->list)) { | ||
| 285 | cur = entry->list.next; | ||
| 286 | sum = list_entry(cur, struct btrfs_ordered_sum, list); | ||
| 287 | list_del(&sum->list); | ||
| 288 | kfree(sum); | ||
| 289 | } | ||
| 290 | kfree(entry); | ||
| 291 | } | ||
| 292 | return 0; | ||
| 293 | } | ||
| 294 | |||
| 295 | /* | ||
| 296 | * remove an ordered extent from the tree. No references are dropped | ||
| 297 | * but, anyone waiting on this extent is woken up. | ||
| 298 | */ | ||
| 299 | int btrfs_remove_ordered_extent(struct inode *inode, | ||
| 300 | struct btrfs_ordered_extent *entry) | ||
| 301 | { | ||
| 302 | struct btrfs_ordered_inode_tree *tree; | ||
| 303 | struct rb_node *node; | ||
| 304 | |||
| 305 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 306 | mutex_lock(&tree->mutex); | ||
| 307 | node = &entry->rb_node; | ||
| 308 | rb_erase(node, &tree->tree); | ||
| 309 | tree->last = NULL; | ||
| 310 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | ||
| 311 | |||
| 312 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
| 313 | list_del_init(&entry->root_extent_list); | ||
| 314 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
| 315 | |||
| 316 | mutex_unlock(&tree->mutex); | ||
| 317 | wake_up(&entry->wait); | ||
| 318 | return 0; | ||
| 319 | } | ||
| 320 | |||
| 321 | /* | ||
| 322 | * wait for all the ordered extents in a root. This is done when balancing | ||
| 323 | * space between drives. | ||
| 324 | */ | ||
| 325 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) | ||
| 326 | { | ||
| 327 | struct list_head splice; | ||
| 328 | struct list_head *cur; | ||
| 329 | struct btrfs_ordered_extent *ordered; | ||
| 330 | struct inode *inode; | ||
| 331 | |||
| 332 | INIT_LIST_HEAD(&splice); | ||
| 333 | |||
| 334 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
| 335 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
| 336 | while (!list_empty(&splice)) { | ||
| 337 | cur = splice.next; | ||
| 338 | ordered = list_entry(cur, struct btrfs_ordered_extent, | ||
| 339 | root_extent_list); | ||
| 340 | if (nocow_only && | ||
| 341 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
| 342 | list_move(&ordered->root_extent_list, | ||
| 343 | &root->fs_info->ordered_extents); | ||
| 344 | cond_resched_lock(&root->fs_info->ordered_extent_lock); | ||
| 345 | continue; | ||
| 346 | } | ||
| 347 | |||
| 348 | list_del_init(&ordered->root_extent_list); | ||
| 349 | atomic_inc(&ordered->refs); | ||
| 350 | |||
| 351 | /* | ||
| 352 | * the inode may be getting freed (in sys_unlink path). | ||
| 353 | */ | ||
| 354 | inode = igrab(ordered->inode); | ||
| 355 | |||
| 356 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
| 357 | |||
| 358 | if (inode) { | ||
| 359 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 360 | btrfs_put_ordered_extent(ordered); | ||
| 361 | iput(inode); | ||
| 362 | } else { | ||
| 363 | btrfs_put_ordered_extent(ordered); | ||
| 364 | } | ||
| 365 | |||
| 366 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
| 367 | } | ||
| 368 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
| 369 | return 0; | ||
| 370 | } | ||
| 371 | |||
| 372 | /* | ||
| 373 | * Used to start IO or wait for a given ordered extent to finish. | ||
| 374 | * | ||
| 375 | * If wait is one, this effectively waits on page writeback for all the pages | ||
| 376 | * in the extent, and it waits on the io completion code to insert | ||
| 377 | * metadata into the btree corresponding to the extent | ||
| 378 | */ | ||
| 379 | void btrfs_start_ordered_extent(struct inode *inode, | ||
| 380 | struct btrfs_ordered_extent *entry, | ||
| 381 | int wait) | ||
| 382 | { | ||
| 383 | u64 start = entry->file_offset; | ||
| 384 | u64 end = start + entry->len - 1; | ||
| 385 | |||
| 386 | /* | ||
| 387 | * pages in the range can be dirty, clean or writeback. We | ||
| 388 | * start IO on any dirty ones so the wait doesn't stall waiting | ||
| 389 | * for pdflush to find them | ||
| 390 | */ | ||
| 391 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); | ||
| 392 | if (wait) | ||
| 393 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | ||
| 394 | &entry->flags)); | ||
| 395 | } | ||
| 396 | |||
| 397 | /* | ||
| 398 | * Used to wait on ordered extents across a large range of bytes. | ||
| 399 | */ | ||
| 400 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | ||
| 401 | { | ||
| 402 | u64 end; | ||
| 403 | u64 orig_end; | ||
| 404 | u64 wait_end; | ||
| 405 | struct btrfs_ordered_extent *ordered; | ||
| 406 | |||
| 407 | if (start + len < start) { | ||
| 408 | orig_end = INT_LIMIT(loff_t); | ||
| 409 | } else { | ||
| 410 | orig_end = start + len - 1; | ||
| 411 | if (orig_end > INT_LIMIT(loff_t)) | ||
| 412 | orig_end = INT_LIMIT(loff_t); | ||
| 413 | } | ||
| 414 | wait_end = orig_end; | ||
| 415 | again: | ||
| 416 | /* start IO across the range first to instantiate any delalloc | ||
| 417 | * extents | ||
| 418 | */ | ||
| 419 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); | ||
| 420 | |||
| 421 | btrfs_wait_on_page_writeback_range(inode->i_mapping, | ||
| 422 | start >> PAGE_CACHE_SHIFT, | ||
| 423 | orig_end >> PAGE_CACHE_SHIFT); | ||
| 424 | |||
| 425 | end = orig_end; | ||
| 426 | while(1) { | ||
| 427 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | ||
| 428 | if (!ordered) { | ||
| 429 | break; | ||
| 430 | } | ||
| 431 | if (ordered->file_offset > orig_end) { | ||
| 432 | btrfs_put_ordered_extent(ordered); | ||
| 433 | break; | ||
| 434 | } | ||
| 435 | if (ordered->file_offset + ordered->len < start) { | ||
| 436 | btrfs_put_ordered_extent(ordered); | ||
| 437 | break; | ||
| 438 | } | ||
| 439 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 440 | end = ordered->file_offset; | ||
| 441 | btrfs_put_ordered_extent(ordered); | ||
| 442 | if (end == 0 || end == start) | ||
| 443 | break; | ||
| 444 | end--; | ||
| 445 | } | ||
| 446 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | ||
| 447 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | ||
| 448 | printk("inode %lu still ordered or delalloc after wait " | ||
| 449 | "%llu %llu\n", inode->i_ino, | ||
| 450 | (unsigned long long)start, | ||
| 451 | (unsigned long long)orig_end); | ||
| 452 | goto again; | ||
| 453 | } | ||
| 454 | return 0; | ||
| 455 | } | ||
| 456 | |||
| 457 | /* | ||
| 458 | * find an ordered extent corresponding to file_offset. return NULL if | ||
| 459 | * nothing is found, otherwise take a reference on the extent and return it | ||
| 460 | */ | ||
| 461 | struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | ||
| 462 | u64 file_offset) | ||
| 463 | { | ||
| 464 | struct btrfs_ordered_inode_tree *tree; | ||
| 465 | struct rb_node *node; | ||
| 466 | struct btrfs_ordered_extent *entry = NULL; | ||
| 467 | |||
| 468 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 469 | mutex_lock(&tree->mutex); | ||
| 470 | node = tree_search(tree, file_offset); | ||
| 471 | if (!node) | ||
| 472 | goto out; | ||
| 473 | |||
| 474 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 475 | if (!offset_in_entry(entry, file_offset)) | ||
| 476 | entry = NULL; | ||
| 477 | if (entry) | ||
| 478 | atomic_inc(&entry->refs); | ||
| 479 | out: | ||
| 480 | mutex_unlock(&tree->mutex); | ||
| 481 | return entry; | ||
| 482 | } | ||
| 483 | |||
| 484 | /* | ||
| 485 | * lookup and return any extent before 'file_offset'. NULL is returned | ||
| 486 | * if none is found | ||
| 487 | */ | ||
| 488 | struct btrfs_ordered_extent * | ||
| 489 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) | ||
| 490 | { | ||
| 491 | struct btrfs_ordered_inode_tree *tree; | ||
| 492 | struct rb_node *node; | ||
| 493 | struct btrfs_ordered_extent *entry = NULL; | ||
| 494 | |||
| 495 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 496 | mutex_lock(&tree->mutex); | ||
| 497 | node = tree_search(tree, file_offset); | ||
| 498 | if (!node) | ||
| 499 | goto out; | ||
| 500 | |||
| 501 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 502 | atomic_inc(&entry->refs); | ||
| 503 | out: | ||
| 504 | mutex_unlock(&tree->mutex); | ||
| 505 | return entry; | ||
| 506 | } | ||
| 507 | |||
| 508 | /* | ||
| 509 | * After an extent is done, call this to conditionally update the on disk | ||
| 510 | * i_size. i_size is updated to cover any fully written part of the file. | ||
| 511 | */ | ||
| 512 | int btrfs_ordered_update_i_size(struct inode *inode, | ||
| 513 | struct btrfs_ordered_extent *ordered) | ||
| 514 | { | ||
| 515 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | ||
| 516 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 517 | u64 disk_i_size; | ||
| 518 | u64 new_i_size; | ||
| 519 | u64 i_size_test; | ||
| 520 | struct rb_node *node; | ||
| 521 | struct btrfs_ordered_extent *test; | ||
| 522 | |||
| 523 | mutex_lock(&tree->mutex); | ||
| 524 | disk_i_size = BTRFS_I(inode)->disk_i_size; | ||
| 525 | |||
| 526 | /* | ||
| 527 | * if the disk i_size is already at the inode->i_size, or | ||
| 528 | * this ordered extent is inside the disk i_size, we're done | ||
| 529 | */ | ||
| 530 | if (disk_i_size >= inode->i_size || | ||
| 531 | ordered->file_offset + ordered->len <= disk_i_size) { | ||
| 532 | goto out; | ||
| 533 | } | ||
| 534 | |||
| 535 | /* | ||
| 536 | * we can't update the disk_isize if there are delalloc bytes | ||
| 537 | * between disk_i_size and this ordered extent | ||
| 538 | */ | ||
| 539 | if (test_range_bit(io_tree, disk_i_size, | ||
| 540 | ordered->file_offset + ordered->len - 1, | ||
| 541 | EXTENT_DELALLOC, 0)) { | ||
| 542 | goto out; | ||
| 543 | } | ||
| 544 | /* | ||
| 545 | * walk backward from this ordered extent to disk_i_size. | ||
| 546 | * if we find an ordered extent then we can't update disk i_size | ||
| 547 | * yet | ||
| 548 | */ | ||
| 549 | node = &ordered->rb_node; | ||
| 550 | while(1) { | ||
| 551 | node = rb_prev(node); | ||
| 552 | if (!node) | ||
| 553 | break; | ||
| 554 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 555 | if (test->file_offset + test->len <= disk_i_size) | ||
| 556 | break; | ||
| 557 | if (test->file_offset >= inode->i_size) | ||
| 558 | break; | ||
| 559 | if (test->file_offset >= disk_i_size) | ||
| 560 | goto out; | ||
| 561 | } | ||
| 562 | new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); | ||
| 563 | |||
| 564 | /* | ||
| 565 | * at this point, we know we can safely update i_size to at least | ||
| 566 | * the offset from this ordered extent. But, we need to | ||
| 567 | * walk forward and see if ios from higher up in the file have | ||
| 568 | * finished. | ||
| 569 | */ | ||
| 570 | node = rb_next(&ordered->rb_node); | ||
| 571 | i_size_test = 0; | ||
| 572 | if (node) { | ||
| 573 | /* | ||
| 574 | * do we have an area where IO might have finished | ||
| 575 | * between our ordered extent and the next one. | ||
| 576 | */ | ||
| 577 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 578 | if (test->file_offset > entry_end(ordered)) { | ||
| 579 | i_size_test = test->file_offset; | ||
| 580 | } | ||
| 581 | } else { | ||
| 582 | i_size_test = i_size_read(inode); | ||
| 583 | } | ||
| 584 | |||
| 585 | /* | ||
| 586 | * i_size_test is the end of a region after this ordered | ||
| 587 | * extent where there are no ordered extents. As long as there | ||
| 588 | * are no delalloc bytes in this area, it is safe to update | ||
| 589 | * disk_i_size to the end of the region. | ||
| 590 | */ | ||
| 591 | if (i_size_test > entry_end(ordered) && | ||
| 592 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, | ||
| 593 | EXTENT_DELALLOC, 0)) { | ||
| 594 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | ||
| 595 | } | ||
| 596 | BTRFS_I(inode)->disk_i_size = new_i_size; | ||
| 597 | out: | ||
| 598 | mutex_unlock(&tree->mutex); | ||
| 599 | return 0; | ||
| 600 | } | ||
| 601 | |||
| 602 | /* | ||
| 603 | * search the ordered extents for one corresponding to 'offset' and | ||
| 604 | * try to find a checksum. This is used because we allow pages to | ||
| 605 | * be reclaimed before their checksum is actually put into the btree | ||
| 606 | */ | ||
| 607 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) | ||
| 608 | { | ||
| 609 | struct btrfs_ordered_sum *ordered_sum; | ||
| 610 | struct btrfs_sector_sum *sector_sums; | ||
| 611 | struct btrfs_ordered_extent *ordered; | ||
| 612 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | ||
| 613 | struct list_head *cur; | ||
| 614 | unsigned long num_sectors; | ||
| 615 | unsigned long i; | ||
| 616 | u32 sectorsize = BTRFS_I(inode)->root->sectorsize; | ||
| 617 | int ret = 1; | ||
| 618 | |||
| 619 | ordered = btrfs_lookup_ordered_extent(inode, offset); | ||
| 620 | if (!ordered) | ||
| 621 | return 1; | ||
| 622 | |||
| 623 | mutex_lock(&tree->mutex); | ||
| 624 | list_for_each_prev(cur, &ordered->list) { | ||
| 625 | ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); | ||
| 626 | if (offset >= ordered_sum->file_offset) { | ||
| 627 | num_sectors = ordered_sum->len / sectorsize; | ||
| 628 | sector_sums = ordered_sum->sums; | ||
| 629 | for (i = 0; i < num_sectors; i++) { | ||
| 630 | if (sector_sums[i].offset == offset) { | ||
| 631 | *sum = sector_sums[i].sum; | ||
| 632 | ret = 0; | ||
| 633 | goto out; | ||
| 634 | } | ||
| 635 | } | ||
| 636 | } | ||
| 637 | } | ||
| 638 | out: | ||
| 639 | mutex_unlock(&tree->mutex); | ||
| 640 | btrfs_put_ordered_extent(ordered); | ||
| 641 | return ret; | ||
| 642 | } | ||
| 643 | |||
| 644 | |||
| 645 | /** | ||
| 646 | * taken from mm/filemap.c because it isn't exported | ||
| 647 | * | ||
| 648 | * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range | ||
| 649 | * @mapping: address space structure to write | ||
| 650 | * @start: offset in bytes where the range starts | ||
| 651 | * @end: offset in bytes where the range ends (inclusive) | ||
| 652 | * @sync_mode: enable synchronous operation | ||
| 653 | * | ||
| 654 | * Start writeback against all of a mapping's dirty pages that lie | ||
| 655 | * within the byte offsets <start, end> inclusive. | ||
| 656 | * | ||
| 657 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as | ||
| 658 | * opposed to a regular memory cleansing writeback. The difference between | ||
| 659 | * these two operations is that if a dirty page/buffer is encountered, it must | ||
| 660 | * be waited upon, and not just skipped over. | ||
| 661 | */ | ||
| 662 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
| 663 | loff_t end, int sync_mode) | ||
| 664 | { | ||
| 665 | struct writeback_control wbc = { | ||
| 666 | .sync_mode = sync_mode, | ||
| 667 | .nr_to_write = mapping->nrpages * 2, | ||
| 668 | .range_start = start, | ||
| 669 | .range_end = end, | ||
| 670 | .for_writepages = 1, | ||
| 671 | }; | ||
| 672 | return btrfs_writepages(mapping, &wbc); | ||
| 673 | } | ||
| 674 | |||
| 675 | /** | ||
| 676 | * taken from mm/filemap.c because it isn't exported | ||
| 677 | * | ||
| 678 | * wait_on_page_writeback_range - wait for writeback to complete | ||
| 679 | * @mapping: target address_space | ||
| 680 | * @start: beginning page index | ||
| 681 | * @end: ending page index | ||
| 682 | * | ||
| 683 | * Wait for writeback to complete against pages indexed by start->end | ||
| 684 | * inclusive | ||
| 685 | */ | ||
| 686 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
| 687 | pgoff_t start, pgoff_t end) | ||
| 688 | { | ||
| 689 | struct pagevec pvec; | ||
| 690 | int nr_pages; | ||
| 691 | int ret = 0; | ||
| 692 | pgoff_t index; | ||
| 693 | |||
| 694 | if (end < start) | ||
| 695 | return 0; | ||
| 696 | |||
| 697 | pagevec_init(&pvec, 0); | ||
| 698 | index = start; | ||
| 699 | while ((index <= end) && | ||
| 700 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 701 | PAGECACHE_TAG_WRITEBACK, | ||
| 702 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
| 703 | unsigned i; | ||
| 704 | |||
| 705 | for (i = 0; i < nr_pages; i++) { | ||
| 706 | struct page *page = pvec.pages[i]; | ||
| 707 | |||
| 708 | /* until radix tree lookup accepts end_index */ | ||
| 709 | if (page->index > end) | ||
| 710 | continue; | ||
| 711 | |||
| 712 | wait_on_page_writeback(page); | ||
| 713 | if (PageError(page)) | ||
| 714 | ret = -EIO; | ||
| 715 | } | ||
| 716 | pagevec_release(&pvec); | ||
| 717 | cond_resched(); | ||
| 718 | } | ||
| 719 | |||
| 720 | /* Check for outstanding write errors */ | ||
| 721 | if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) | ||
| 722 | ret = -ENOSPC; | ||
| 723 | if (test_and_clear_bit(AS_EIO, &mapping->flags)) | ||
| 724 | ret = -EIO; | ||
| 725 | |||
| 726 | return ret; | ||
| 727 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h new file mode 100644 index 00000000000..f50f8870a14 --- /dev/null +++ b/fs/btrfs/ordered-data.h | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_ORDERED_DATA__ | ||
| 20 | #define __BTRFS_ORDERED_DATA__ | ||
| 21 | |||
| 22 | /* one of these per inode */ | ||
| 23 | struct btrfs_ordered_inode_tree { | ||
| 24 | struct mutex mutex; | ||
| 25 | struct rb_root tree; | ||
| 26 | struct rb_node *last; | ||
| 27 | }; | ||
| 28 | |||
| 29 | /* | ||
| 30 | * these are used to collect checksums done just before bios submission. | ||
| 31 | * They are attached via a list into the ordered extent, and | ||
| 32 | * checksum items are inserted into the tree after all the blocks in | ||
| 33 | * the ordered extent are on disk | ||
| 34 | */ | ||
| 35 | struct btrfs_sector_sum { | ||
| 36 | u64 offset; | ||
| 37 | u32 sum; | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct btrfs_ordered_sum { | ||
| 41 | u64 file_offset; | ||
| 42 | /* | ||
| 43 | * this is the length in bytes covered by the sums array below. | ||
| 44 | * But, the sums array may not be contiguous in the file. | ||
| 45 | */ | ||
| 46 | unsigned long len; | ||
| 47 | struct list_head list; | ||
| 48 | /* last field is a variable length array of btrfs_sector_sums */ | ||
| 49 | struct btrfs_sector_sum sums[]; | ||
| 50 | }; | ||
| 51 | |||
| 52 | /* | ||
| 53 | * bits for the flags field: | ||
| 54 | * | ||
| 55 | * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written. | ||
| 56 | * It is used to make sure metadata is inserted into the tree only once | ||
| 57 | * per extent. | ||
| 58 | * | ||
| 59 | * BTRFS_ORDERED_COMPLETE is set when the extent is removed from the | ||
| 60 | * rbtree, just before waking any waiters. It is used to indicate the | ||
| 61 | * IO is done and any metadata is inserted into the tree. | ||
| 62 | */ | ||
| 63 | #define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */ | ||
| 64 | |||
| 65 | #define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ | ||
| 66 | |||
| 67 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | ||
| 68 | |||
| 69 | struct btrfs_ordered_extent { | ||
| 70 | /* logical offset in the file */ | ||
| 71 | u64 file_offset; | ||
| 72 | |||
| 73 | /* disk byte number */ | ||
| 74 | u64 start; | ||
| 75 | |||
| 76 | /* length of the extent in bytes */ | ||
| 77 | u64 len; | ||
| 78 | |||
| 79 | /* flags (described above) */ | ||
| 80 | unsigned long flags; | ||
| 81 | |||
| 82 | /* reference count */ | ||
| 83 | atomic_t refs; | ||
| 84 | |||
| 85 | /* the inode we belong to */ | ||
| 86 | struct inode *inode; | ||
| 87 | |||
| 88 | /* list of checksums for insertion when the extent io is done */ | ||
| 89 | struct list_head list; | ||
| 90 | |||
| 91 | /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ | ||
| 92 | wait_queue_head_t wait; | ||
| 93 | |||
| 94 | /* our friendly rbtree entry */ | ||
| 95 | struct rb_node rb_node; | ||
| 96 | |||
| 97 | /* a per root list of all the pending ordered extents */ | ||
| 98 | struct list_head root_extent_list; | ||
| 99 | }; | ||
| 100 | |||
| 101 | |||
| 102 | /* | ||
| 103 | * calculates the total size you need to allocate for an ordered sum | ||
| 104 | * structure spanning 'bytes' in the file | ||
| 105 | */ | ||
| 106 | static inline int btrfs_ordered_sum_size(struct btrfs_root *root, | ||
| 107 | unsigned long bytes) | ||
| 108 | { | ||
| 109 | unsigned long num_sectors = (bytes + root->sectorsize - 1) / | ||
| 110 | root->sectorsize; | ||
| 111 | num_sectors++; | ||
| 112 | return sizeof(struct btrfs_ordered_sum) + | ||
| 113 | num_sectors * sizeof(struct btrfs_sector_sum); | ||
| 114 | } | ||
| 115 | |||
| 116 | static inline void | ||
| 117 | btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) | ||
| 118 | { | ||
| 119 | mutex_init(&t->mutex); | ||
| 120 | t->tree.rb_node = NULL; | ||
| 121 | t->last = NULL; | ||
| 122 | } | ||
| 123 | |||
| 124 | int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); | ||
| 125 | int btrfs_remove_ordered_extent(struct inode *inode, | ||
| 126 | struct btrfs_ordered_extent *entry); | ||
| 127 | int btrfs_dec_test_ordered_pending(struct inode *inode, | ||
| 128 | u64 file_offset, u64 io_size); | ||
| 129 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
| 130 | u64 start, u64 len, int nocow); | ||
| 131 | int btrfs_add_ordered_sum(struct inode *inode, | ||
| 132 | struct btrfs_ordered_extent *entry, | ||
| 133 | struct btrfs_ordered_sum *sum); | ||
| 134 | struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | ||
| 135 | u64 file_offset); | ||
| 136 | void btrfs_start_ordered_extent(struct inode *inode, | ||
| 137 | struct btrfs_ordered_extent *entry, int wait); | ||
| 138 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | ||
| 139 | struct btrfs_ordered_extent * | ||
| 140 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | ||
| 141 | int btrfs_ordered_update_i_size(struct inode *inode, | ||
| 142 | struct btrfs_ordered_extent *ordered); | ||
| 143 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum); | ||
| 144 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
| 145 | pgoff_t start, pgoff_t end); | ||
| 146 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
| 147 | loff_t end, int sync_mode); | ||
| 148 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | ||
| 149 | #endif | ||
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c new file mode 100644 index 00000000000..3c0d52af4f8 --- /dev/null +++ b/fs/btrfs/orphan.c | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Red Hat. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "ctree.h" | ||
| 20 | #include "disk-io.h" | ||
| 21 | |||
| 22 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | ||
| 23 | struct btrfs_root *root, u64 offset) | ||
| 24 | { | ||
| 25 | struct btrfs_path *path; | ||
| 26 | struct btrfs_key key; | ||
| 27 | int ret = 0; | ||
| 28 | |||
| 29 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
| 30 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | ||
| 31 | key.offset = offset; | ||
| 32 | |||
| 33 | path = btrfs_alloc_path(); | ||
| 34 | if (!path) | ||
| 35 | return -ENOMEM; | ||
| 36 | |||
| 37 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); | ||
| 38 | |||
| 39 | btrfs_free_path(path); | ||
| 40 | return ret; | ||
| 41 | } | ||
| 42 | |||
| 43 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | ||
| 44 | struct btrfs_root *root, u64 offset) | ||
| 45 | { | ||
| 46 | struct btrfs_path *path; | ||
| 47 | struct btrfs_key key; | ||
| 48 | int ret = 0; | ||
| 49 | |||
| 50 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
| 51 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | ||
| 52 | key.offset = offset; | ||
| 53 | |||
| 54 | path = btrfs_alloc_path(); | ||
| 55 | if (!path) | ||
| 56 | return -ENOMEM; | ||
| 57 | |||
| 58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 59 | if (ret) | ||
| 60 | goto out; | ||
| 61 | |||
| 62 | ret = btrfs_del_item(trans, root, path); | ||
| 63 | |||
| 64 | out: | ||
| 65 | btrfs_free_path(path); | ||
| 66 | return ret; | ||
| 67 | } | ||
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c new file mode 100644 index 00000000000..bd9ab3e9a7f --- /dev/null +++ b/fs/btrfs/print-tree.c | |||
| @@ -0,0 +1,200 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "ctree.h" | ||
| 20 | #include "disk-io.h" | ||
| 21 | #include "print-tree.h" | ||
| 22 | |||
| 23 | static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) | ||
| 24 | { | ||
| 25 | int num_stripes = btrfs_chunk_num_stripes(eb, chunk); | ||
| 26 | int i; | ||
| 27 | printk("\t\tchunk length %llu owner %llu type %llu num_stripes %d\n", | ||
| 28 | (unsigned long long)btrfs_chunk_length(eb, chunk), | ||
| 29 | (unsigned long long)btrfs_chunk_owner(eb, chunk), | ||
| 30 | (unsigned long long)btrfs_chunk_type(eb, chunk), | ||
| 31 | num_stripes); | ||
| 32 | for (i = 0 ; i < num_stripes ; i++) { | ||
| 33 | printk("\t\t\tstripe %d devid %llu offset %llu\n", i, | ||
| 34 | (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), | ||
| 35 | (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | static void print_dev_item(struct extent_buffer *eb, | ||
| 39 | struct btrfs_dev_item *dev_item) | ||
| 40 | { | ||
| 41 | printk("\t\tdev item devid %llu " | ||
| 42 | "total_bytes %llu bytes used %Lu\n", | ||
| 43 | (unsigned long long)btrfs_device_id(eb, dev_item), | ||
| 44 | (unsigned long long)btrfs_device_total_bytes(eb, dev_item), | ||
| 45 | (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); | ||
| 46 | } | ||
| 47 | void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | ||
| 48 | { | ||
| 49 | int i; | ||
| 50 | u32 nr = btrfs_header_nritems(l); | ||
| 51 | struct btrfs_item *item; | ||
| 52 | struct btrfs_extent_item *ei; | ||
| 53 | struct btrfs_root_item *ri; | ||
| 54 | struct btrfs_dir_item *di; | ||
| 55 | struct btrfs_inode_item *ii; | ||
| 56 | struct btrfs_block_group_item *bi; | ||
| 57 | struct btrfs_file_extent_item *fi; | ||
| 58 | struct btrfs_key key; | ||
| 59 | struct btrfs_key found_key; | ||
| 60 | struct btrfs_extent_ref *ref; | ||
| 61 | struct btrfs_dev_extent *dev_extent; | ||
| 62 | u32 type; | ||
| 63 | |||
| 64 | printk("leaf %llu total ptrs %d free space %d\n", | ||
| 65 | (unsigned long long)btrfs_header_bytenr(l), nr, | ||
| 66 | btrfs_leaf_free_space(root, l)); | ||
| 67 | for (i = 0 ; i < nr ; i++) { | ||
| 68 | item = btrfs_item_nr(l, i); | ||
| 69 | btrfs_item_key_to_cpu(l, &key, i); | ||
| 70 | type = btrfs_key_type(&key); | ||
| 71 | printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", | ||
| 72 | i, | ||
| 73 | (unsigned long long)key.objectid, type, | ||
| 74 | (unsigned long long)key.offset, | ||
| 75 | btrfs_item_offset(l, item), btrfs_item_size(l, item)); | ||
| 76 | switch (type) { | ||
| 77 | case BTRFS_INODE_ITEM_KEY: | ||
| 78 | ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); | ||
| 79 | printk("\t\tinode generation %llu size %llu mode %o\n", | ||
| 80 | (unsigned long long)btrfs_inode_generation(l, ii), | ||
| 81 | (unsigned long long)btrfs_inode_size(l, ii), | ||
| 82 | btrfs_inode_mode(l, ii)); | ||
| 83 | break; | ||
| 84 | case BTRFS_DIR_ITEM_KEY: | ||
| 85 | di = btrfs_item_ptr(l, i, struct btrfs_dir_item); | ||
| 86 | btrfs_dir_item_key_to_cpu(l, di, &found_key); | ||
| 87 | printk("\t\tdir oid %llu type %u\n", | ||
| 88 | (unsigned long long)found_key.objectid, | ||
| 89 | btrfs_dir_type(l, di)); | ||
| 90 | break; | ||
| 91 | case BTRFS_ROOT_ITEM_KEY: | ||
| 92 | ri = btrfs_item_ptr(l, i, struct btrfs_root_item); | ||
| 93 | printk("\t\troot data bytenr %llu refs %u\n", | ||
| 94 | (unsigned long long)btrfs_disk_root_bytenr(l, ri), | ||
| 95 | btrfs_disk_root_refs(l, ri)); | ||
| 96 | break; | ||
| 97 | case BTRFS_EXTENT_ITEM_KEY: | ||
| 98 | ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); | ||
| 99 | printk("\t\textent data refs %u\n", | ||
| 100 | btrfs_extent_refs(l, ei)); | ||
| 101 | break; | ||
| 102 | case BTRFS_EXTENT_REF_KEY: | ||
| 103 | ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); | ||
| 104 | printk("\t\textent back ref root %llu gen %llu " | ||
| 105 | "owner %llu num_refs %lu\n", | ||
| 106 | (unsigned long long)btrfs_ref_root(l, ref), | ||
| 107 | (unsigned long long)btrfs_ref_generation(l, ref), | ||
| 108 | (unsigned long long)btrfs_ref_objectid(l, ref), | ||
| 109 | (unsigned long)btrfs_ref_num_refs(l, ref)); | ||
| 110 | break; | ||
| 111 | |||
| 112 | case BTRFS_EXTENT_DATA_KEY: | ||
| 113 | fi = btrfs_item_ptr(l, i, | ||
| 114 | struct btrfs_file_extent_item); | ||
| 115 | if (btrfs_file_extent_type(l, fi) == | ||
| 116 | BTRFS_FILE_EXTENT_INLINE) { | ||
| 117 | printk("\t\tinline extent data size %u\n", | ||
| 118 | btrfs_file_extent_inline_len(l, item)); | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | printk("\t\textent data disk bytenr %llu nr %llu\n", | ||
| 122 | (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), | ||
| 123 | (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); | ||
| 124 | printk("\t\textent data offset %llu nr %llu\n", | ||
| 125 | (unsigned long long)btrfs_file_extent_offset(l, fi), | ||
| 126 | (unsigned long long)btrfs_file_extent_num_bytes(l, fi)); | ||
| 127 | break; | ||
| 128 | case BTRFS_BLOCK_GROUP_ITEM_KEY: | ||
| 129 | bi = btrfs_item_ptr(l, i, | ||
| 130 | struct btrfs_block_group_item); | ||
| 131 | printk("\t\tblock group used %llu\n", | ||
| 132 | (unsigned long long)btrfs_disk_block_group_used(l, bi)); | ||
| 133 | break; | ||
| 134 | case BTRFS_CHUNK_ITEM_KEY: | ||
| 135 | print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk)); | ||
| 136 | break; | ||
| 137 | case BTRFS_DEV_ITEM_KEY: | ||
| 138 | print_dev_item(l, btrfs_item_ptr(l, i, | ||
| 139 | struct btrfs_dev_item)); | ||
| 140 | break; | ||
| 141 | case BTRFS_DEV_EXTENT_KEY: | ||
| 142 | dev_extent = btrfs_item_ptr(l, i, | ||
| 143 | struct btrfs_dev_extent); | ||
| 144 | printk("\t\tdev extent chunk_tree %llu\n" | ||
| 145 | "\t\tchunk objectid %llu chunk offset %llu " | ||
| 146 | "length %llu\n", | ||
| 147 | (unsigned long long) | ||
| 148 | btrfs_dev_extent_chunk_tree(l, dev_extent), | ||
| 149 | (unsigned long long) | ||
| 150 | btrfs_dev_extent_chunk_objectid(l, dev_extent), | ||
| 151 | (unsigned long long) | ||
| 152 | btrfs_dev_extent_chunk_offset(l, dev_extent), | ||
| 153 | (unsigned long long) | ||
| 154 | btrfs_dev_extent_length(l, dev_extent)); | ||
| 155 | }; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 159 | void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) | ||
| 160 | { | ||
| 161 | int i; u32 nr; | ||
| 162 | struct btrfs_key key; | ||
| 163 | int level; | ||
| 164 | |||
| 165 | if (!c) | ||
| 166 | return; | ||
| 167 | nr = btrfs_header_nritems(c); | ||
| 168 | level = btrfs_header_level(c); | ||
| 169 | if (level == 0) { | ||
| 170 | btrfs_print_leaf(root, c); | ||
| 171 | return; | ||
| 172 | } | ||
| 173 | printk("node %llu level %d total ptrs %d free spc %u\n", | ||
| 174 | (unsigned long long)btrfs_header_bytenr(c), | ||
| 175 | btrfs_header_level(c), nr, | ||
| 176 | (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); | ||
| 177 | for (i = 0; i < nr; i++) { | ||
| 178 | btrfs_node_key_to_cpu(c, &key, i); | ||
| 179 | printk("\tkey %d (%llu %u %llu) block %llu\n", | ||
| 180 | i, | ||
| 181 | (unsigned long long)key.objectid, | ||
| 182 | key.type, | ||
| 183 | (unsigned long long)key.offset, | ||
| 184 | (unsigned long long)btrfs_node_blockptr(c, i)); | ||
| 185 | } | ||
| 186 | for (i = 0; i < nr; i++) { | ||
| 187 | struct extent_buffer *next = read_tree_block(root, | ||
| 188 | btrfs_node_blockptr(c, i), | ||
| 189 | btrfs_level_size(root, level - 1), | ||
| 190 | btrfs_node_ptr_generation(c, i)); | ||
| 191 | if (btrfs_is_leaf(next) && | ||
| 192 | btrfs_header_level(c) != 1) | ||
| 193 | BUG(); | ||
| 194 | if (btrfs_header_level(next) != | ||
| 195 | btrfs_header_level(c) - 1) | ||
| 196 | BUG(); | ||
| 197 | btrfs_print_tree(root, next); | ||
| 198 | free_extent_buffer(next); | ||
| 199 | } | ||
| 200 | } | ||
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h new file mode 100644 index 00000000000..da75efe534d --- /dev/null +++ b/fs/btrfs/print-tree.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __PRINT_TREE_ | ||
| 20 | #define __PRINT_TREE_ | ||
| 21 | void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); | ||
| 22 | void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); | ||
| 23 | #endif | ||
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c new file mode 100644 index 00000000000..a50ebb67055 --- /dev/null +++ b/fs/btrfs/ref-cache.c | |||
| @@ -0,0 +1,230 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/sched.h> | ||
| 20 | #include "ctree.h" | ||
| 21 | #include "ref-cache.h" | ||
| 22 | #include "transaction.h" | ||
| 23 | |||
| 24 | /* | ||
| 25 | * leaf refs are used to cache the information about which extents | ||
| 26 | * a given leaf has references on. This allows us to process that leaf | ||
| 27 | * in btrfs_drop_snapshot without needing to read it back from disk. | ||
| 28 | */ | ||
| 29 | |||
| 30 | /* | ||
| 31 | * kmalloc a leaf reference struct and update the counters for the | ||
| 32 | * total ref cache size | ||
| 33 | */ | ||
| 34 | struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, | ||
| 35 | int nr_extents) | ||
| 36 | { | ||
| 37 | struct btrfs_leaf_ref *ref; | ||
| 38 | size_t size = btrfs_leaf_ref_size(nr_extents); | ||
| 39 | |||
| 40 | ref = kmalloc(size, GFP_NOFS); | ||
| 41 | if (ref) { | ||
| 42 | spin_lock(&root->fs_info->ref_cache_lock); | ||
| 43 | root->fs_info->total_ref_cache_size += size; | ||
| 44 | spin_unlock(&root->fs_info->ref_cache_lock); | ||
| 45 | |||
| 46 | memset(ref, 0, sizeof(*ref)); | ||
| 47 | atomic_set(&ref->usage, 1); | ||
| 48 | INIT_LIST_HEAD(&ref->list); | ||
| 49 | } | ||
| 50 | return ref; | ||
| 51 | } | ||
| 52 | |||
| 53 | /* | ||
| 54 | * free a leaf reference struct and update the counters for the | ||
| 55 | * total ref cache size | ||
| 56 | */ | ||
| 57 | void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) | ||
| 58 | { | ||
| 59 | if (!ref) | ||
| 60 | return; | ||
| 61 | WARN_ON(atomic_read(&ref->usage) == 0); | ||
| 62 | if (atomic_dec_and_test(&ref->usage)) { | ||
| 63 | size_t size = btrfs_leaf_ref_size(ref->nritems); | ||
| 64 | |||
| 65 | BUG_ON(ref->in_tree); | ||
| 66 | kfree(ref); | ||
| 67 | |||
| 68 | spin_lock(&root->fs_info->ref_cache_lock); | ||
| 69 | root->fs_info->total_ref_cache_size -= size; | ||
| 70 | spin_unlock(&root->fs_info->ref_cache_lock); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | ||
| 75 | struct rb_node *node) | ||
| 76 | { | ||
| 77 | struct rb_node ** p = &root->rb_node; | ||
| 78 | struct rb_node * parent = NULL; | ||
| 79 | struct btrfs_leaf_ref *entry; | ||
| 80 | |||
| 81 | while(*p) { | ||
| 82 | parent = *p; | ||
| 83 | entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); | ||
| 84 | |||
| 85 | if (bytenr < entry->bytenr) | ||
| 86 | p = &(*p)->rb_left; | ||
| 87 | else if (bytenr > entry->bytenr) | ||
| 88 | p = &(*p)->rb_right; | ||
| 89 | else | ||
| 90 | return parent; | ||
| 91 | } | ||
| 92 | |||
| 93 | entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); | ||
| 94 | rb_link_node(node, parent, p); | ||
| 95 | rb_insert_color(node, root); | ||
| 96 | return NULL; | ||
| 97 | } | ||
| 98 | |||
| 99 | static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) | ||
| 100 | { | ||
| 101 | struct rb_node * n = root->rb_node; | ||
| 102 | struct btrfs_leaf_ref *entry; | ||
| 103 | |||
| 104 | while(n) { | ||
| 105 | entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); | ||
| 106 | WARN_ON(!entry->in_tree); | ||
| 107 | |||
| 108 | if (bytenr < entry->bytenr) | ||
| 109 | n = n->rb_left; | ||
| 110 | else if (bytenr > entry->bytenr) | ||
| 111 | n = n->rb_right; | ||
| 112 | else | ||
| 113 | return n; | ||
| 114 | } | ||
| 115 | return NULL; | ||
| 116 | } | ||
| 117 | |||
| 118 | int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, | ||
| 119 | int shared) | ||
| 120 | { | ||
| 121 | struct btrfs_leaf_ref *ref = NULL; | ||
| 122 | struct btrfs_leaf_ref_tree *tree = root->ref_tree; | ||
| 123 | |||
| 124 | if (shared) | ||
| 125 | tree = &root->fs_info->shared_ref_tree; | ||
| 126 | if (!tree) | ||
| 127 | return 0; | ||
| 128 | |||
| 129 | spin_lock(&tree->lock); | ||
| 130 | while(!list_empty(&tree->list)) { | ||
| 131 | ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list); | ||
| 132 | BUG_ON(ref->tree != tree); | ||
| 133 | if (ref->root_gen > max_root_gen) | ||
| 134 | break; | ||
| 135 | if (!xchg(&ref->in_tree, 0)) { | ||
| 136 | cond_resched_lock(&tree->lock); | ||
| 137 | continue; | ||
| 138 | } | ||
| 139 | |||
| 140 | rb_erase(&ref->rb_node, &tree->root); | ||
| 141 | list_del_init(&ref->list); | ||
| 142 | |||
| 143 | spin_unlock(&tree->lock); | ||
| 144 | btrfs_free_leaf_ref(root, ref); | ||
| 145 | cond_resched(); | ||
| 146 | spin_lock(&tree->lock); | ||
| 147 | } | ||
| 148 | spin_unlock(&tree->lock); | ||
| 149 | return 0; | ||
| 150 | } | ||
| 151 | |||
| 152 | /* | ||
| 153 | * find the leaf ref for a given extent. This returns the ref struct with | ||
| 154 | * a usage reference incremented | ||
| 155 | */ | ||
| 156 | struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, | ||
| 157 | u64 bytenr) | ||
| 158 | { | ||
| 159 | struct rb_node *rb; | ||
| 160 | struct btrfs_leaf_ref *ref = NULL; | ||
| 161 | struct btrfs_leaf_ref_tree *tree = root->ref_tree; | ||
| 162 | again: | ||
| 163 | if (tree) { | ||
| 164 | spin_lock(&tree->lock); | ||
| 165 | rb = tree_search(&tree->root, bytenr); | ||
| 166 | if (rb) | ||
| 167 | ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); | ||
| 168 | if (ref) | ||
| 169 | atomic_inc(&ref->usage); | ||
| 170 | spin_unlock(&tree->lock); | ||
| 171 | if (ref) | ||
| 172 | return ref; | ||
| 173 | } | ||
| 174 | if (tree != &root->fs_info->shared_ref_tree) { | ||
| 175 | tree = &root->fs_info->shared_ref_tree; | ||
| 176 | goto again; | ||
| 177 | } | ||
| 178 | return NULL; | ||
| 179 | } | ||
| 180 | |||
| 181 | /* | ||
| 182 | * add a fully filled in leaf ref struct | ||
| 183 | * remove all the refs older than a given root generation | ||
| 184 | */ | ||
| 185 | int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, | ||
| 186 | int shared) | ||
| 187 | { | ||
| 188 | int ret = 0; | ||
| 189 | struct rb_node *rb; | ||
| 190 | struct btrfs_leaf_ref_tree *tree = root->ref_tree; | ||
| 191 | |||
| 192 | if (shared) | ||
| 193 | tree = &root->fs_info->shared_ref_tree; | ||
| 194 | |||
| 195 | spin_lock(&tree->lock); | ||
| 196 | rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node); | ||
| 197 | if (rb) { | ||
| 198 | ret = -EEXIST; | ||
| 199 | } else { | ||
| 200 | atomic_inc(&ref->usage); | ||
| 201 | ref->tree = tree; | ||
| 202 | ref->in_tree = 1; | ||
| 203 | list_add_tail(&ref->list, &tree->list); | ||
| 204 | } | ||
| 205 | spin_unlock(&tree->lock); | ||
| 206 | return ret; | ||
| 207 | } | ||
| 208 | |||
| 209 | /* | ||
| 210 | * remove a single leaf ref from the tree. This drops the ref held by the tree | ||
| 211 | * only | ||
| 212 | */ | ||
| 213 | int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) | ||
| 214 | { | ||
| 215 | struct btrfs_leaf_ref_tree *tree; | ||
| 216 | |||
| 217 | if (!xchg(&ref->in_tree, 0)) | ||
| 218 | return 0; | ||
| 219 | |||
| 220 | tree = ref->tree; | ||
| 221 | spin_lock(&tree->lock); | ||
| 222 | |||
| 223 | rb_erase(&ref->rb_node, &tree->root); | ||
| 224 | list_del_init(&ref->list); | ||
| 225 | |||
| 226 | spin_unlock(&tree->lock); | ||
| 227 | |||
| 228 | btrfs_free_leaf_ref(root, ref); | ||
| 229 | return 0; | ||
| 230 | } | ||
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h new file mode 100644 index 00000000000..16f3183d7c5 --- /dev/null +++ b/fs/btrfs/ref-cache.h | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | #ifndef __REFCACHE__ | ||
| 19 | #define __REFCACHE__ | ||
| 20 | |||
| 21 | struct btrfs_extent_info { | ||
| 22 | /* bytenr and num_bytes find the extent in the extent allocation tree */ | ||
| 23 | u64 bytenr; | ||
| 24 | u64 num_bytes; | ||
| 25 | |||
| 26 | /* objectid and offset find the back reference for the file */ | ||
| 27 | u64 objectid; | ||
| 28 | u64 offset; | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct btrfs_leaf_ref { | ||
| 32 | struct rb_node rb_node; | ||
| 33 | struct btrfs_leaf_ref_tree *tree; | ||
| 34 | int in_tree; | ||
| 35 | atomic_t usage; | ||
| 36 | |||
| 37 | u64 root_gen; | ||
| 38 | u64 bytenr; | ||
| 39 | u64 owner; | ||
| 40 | u64 generation; | ||
| 41 | int nritems; | ||
| 42 | |||
| 43 | struct list_head list; | ||
| 44 | struct btrfs_extent_info extents[]; | ||
| 45 | }; | ||
| 46 | |||
| 47 | static inline size_t btrfs_leaf_ref_size(int nr_extents) | ||
| 48 | { | ||
| 49 | return sizeof(struct btrfs_leaf_ref) + | ||
| 50 | sizeof(struct btrfs_extent_info) * nr_extents; | ||
| 51 | } | ||
| 52 | |||
| 53 | static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) | ||
| 54 | { | ||
| 55 | tree->root.rb_node = NULL; | ||
| 56 | INIT_LIST_HEAD(&tree->list); | ||
| 57 | spin_lock_init(&tree->lock); | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree) | ||
| 61 | { | ||
| 62 | return RB_EMPTY_ROOT(&tree->root); | ||
| 63 | } | ||
| 64 | |||
| 65 | void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree); | ||
| 66 | struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, | ||
| 67 | int nr_extents); | ||
| 68 | void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); | ||
| 69 | struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, | ||
| 70 | u64 bytenr); | ||
| 71 | int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, | ||
| 72 | int shared); | ||
| 73 | int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, | ||
| 74 | int shared); | ||
| 75 | int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); | ||
| 76 | |||
| 77 | #endif | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c new file mode 100644 index 00000000000..eb7f7655e9d --- /dev/null +++ b/fs/btrfs/root-tree.c | |||
| @@ -0,0 +1,277 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "ctree.h" | ||
| 20 | #include "transaction.h" | ||
| 21 | #include "disk-io.h" | ||
| 22 | #include "print-tree.h" | ||
| 23 | |||
| 24 | /* | ||
| 25 | * search forward for a root, starting with objectid 'search_start' | ||
| 26 | * if a root key is found, the objectid we find is filled into 'found_objectid' | ||
| 27 | * and 0 is returned. < 0 is returned on error, 1 if there is nothing | ||
| 28 | * left in the tree. | ||
| 29 | */ | ||
| 30 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | ||
| 31 | u64 *found_objectid) | ||
| 32 | { | ||
| 33 | struct btrfs_path *path; | ||
| 34 | struct btrfs_key search_key; | ||
| 35 | int ret; | ||
| 36 | |||
| 37 | root = root->fs_info->tree_root; | ||
| 38 | search_key.objectid = search_start; | ||
| 39 | search_key.type = (u8)-1; | ||
| 40 | search_key.offset = (u64)-1; | ||
| 41 | |||
| 42 | path = btrfs_alloc_path(); | ||
| 43 | BUG_ON(!path); | ||
| 44 | again: | ||
| 45 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | ||
| 46 | if (ret < 0) | ||
| 47 | goto out; | ||
| 48 | if (ret == 0) { | ||
| 49 | ret = 1; | ||
| 50 | goto out; | ||
| 51 | } | ||
| 52 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { | ||
| 53 | ret = btrfs_next_leaf(root, path); | ||
| 54 | if (ret) | ||
| 55 | goto out; | ||
| 56 | } | ||
| 57 | btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]); | ||
| 58 | if (search_key.type != BTRFS_ROOT_ITEM_KEY) { | ||
| 59 | search_key.offset++; | ||
| 60 | btrfs_release_path(root, path); | ||
| 61 | goto again; | ||
| 62 | } | ||
| 63 | ret = 0; | ||
| 64 | *found_objectid = search_key.objectid; | ||
| 65 | |||
| 66 | out: | ||
| 67 | btrfs_free_path(path); | ||
| 68 | return ret; | ||
| 69 | } | ||
| 70 | |||
| 71 | /* | ||
| 72 | * lookup the root with the highest offset for a given objectid. The key we do | ||
| 73 | * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 | ||
| 74 | * on error. | ||
| 75 | */ | ||
| 76 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | ||
| 77 | struct btrfs_root_item *item, struct btrfs_key *key) | ||
| 78 | { | ||
| 79 | struct btrfs_path *path; | ||
| 80 | struct btrfs_key search_key; | ||
| 81 | struct btrfs_key found_key; | ||
| 82 | struct extent_buffer *l; | ||
| 83 | int ret; | ||
| 84 | int slot; | ||
| 85 | |||
| 86 | search_key.objectid = objectid; | ||
| 87 | search_key.type = (u8)-1; | ||
| 88 | search_key.offset = (u64)-1; | ||
| 89 | |||
| 90 | path = btrfs_alloc_path(); | ||
| 91 | BUG_ON(!path); | ||
| 92 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | ||
| 93 | if (ret < 0) | ||
| 94 | goto out; | ||
| 95 | |||
| 96 | BUG_ON(ret == 0); | ||
| 97 | l = path->nodes[0]; | ||
| 98 | BUG_ON(path->slots[0] == 0); | ||
| 99 | slot = path->slots[0] - 1; | ||
| 100 | btrfs_item_key_to_cpu(l, &found_key, slot); | ||
| 101 | if (found_key.objectid != objectid) { | ||
| 102 | ret = 1; | ||
| 103 | goto out; | ||
| 104 | } | ||
| 105 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), | ||
| 106 | sizeof(*item)); | ||
| 107 | memcpy(key, &found_key, sizeof(found_key)); | ||
| 108 | ret = 0; | ||
| 109 | out: | ||
| 110 | btrfs_free_path(path); | ||
| 111 | return ret; | ||
| 112 | } | ||
| 113 | |||
| 114 | /* | ||
| 115 | * copy the data in 'item' into the btree | ||
| 116 | */ | ||
| 117 | int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 118 | *root, struct btrfs_key *key, struct btrfs_root_item | ||
| 119 | *item) | ||
| 120 | { | ||
| 121 | struct btrfs_path *path; | ||
| 122 | struct extent_buffer *l; | ||
| 123 | int ret; | ||
| 124 | int slot; | ||
| 125 | unsigned long ptr; | ||
| 126 | |||
| 127 | path = btrfs_alloc_path(); | ||
| 128 | BUG_ON(!path); | ||
| 129 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
| 130 | if (ret < 0) | ||
| 131 | goto out; | ||
| 132 | |||
| 133 | if (ret != 0) { | ||
| 134 | btrfs_print_leaf(root, path->nodes[0]); | ||
| 135 | printk("unable to update root key %Lu %u %Lu\n", | ||
| 136 | key->objectid, key->type, key->offset); | ||
| 137 | BUG_ON(1); | ||
| 138 | } | ||
| 139 | |||
| 140 | l = path->nodes[0]; | ||
| 141 | slot = path->slots[0]; | ||
| 142 | ptr = btrfs_item_ptr_offset(l, slot); | ||
| 143 | write_extent_buffer(l, item, ptr, sizeof(*item)); | ||
| 144 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 145 | out: | ||
| 146 | btrfs_release_path(root, path); | ||
| 147 | btrfs_free_path(path); | ||
| 148 | return ret; | ||
| 149 | } | ||
| 150 | |||
| 151 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root | ||
| 152 | *root, struct btrfs_key *key, struct btrfs_root_item | ||
| 153 | *item) | ||
| 154 | { | ||
| 155 | int ret; | ||
| 156 | ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); | ||
| 157 | return ret; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * at mount time we want to find all the old transaction snapshots that were in | ||
| 162 | * the process of being deleted if we crashed. This is any root item with an offset | ||
| 163 | * lower than the latest root. They need to be queued for deletion to finish | ||
| 164 | * what was happening when we crashed. | ||
| 165 | */ | ||
| 166 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, | ||
| 167 | struct btrfs_root *latest) | ||
| 168 | { | ||
| 169 | struct btrfs_root *dead_root; | ||
| 170 | struct btrfs_item *item; | ||
| 171 | struct btrfs_root_item *ri; | ||
| 172 | struct btrfs_key key; | ||
| 173 | struct btrfs_key found_key; | ||
| 174 | struct btrfs_path *path; | ||
| 175 | int ret; | ||
| 176 | u32 nritems; | ||
| 177 | struct extent_buffer *leaf; | ||
| 178 | int slot; | ||
| 179 | |||
| 180 | key.objectid = objectid; | ||
| 181 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 182 | key.offset = 0; | ||
| 183 | path = btrfs_alloc_path(); | ||
| 184 | if (!path) | ||
| 185 | return -ENOMEM; | ||
| 186 | |||
| 187 | again: | ||
| 188 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 189 | if (ret < 0) | ||
| 190 | goto err; | ||
| 191 | while(1) { | ||
| 192 | leaf = path->nodes[0]; | ||
| 193 | nritems = btrfs_header_nritems(leaf); | ||
| 194 | slot = path->slots[0]; | ||
| 195 | if (slot >= nritems) { | ||
| 196 | ret = btrfs_next_leaf(root, path); | ||
| 197 | if (ret) | ||
| 198 | break; | ||
| 199 | leaf = path->nodes[0]; | ||
| 200 | nritems = btrfs_header_nritems(leaf); | ||
| 201 | slot = path->slots[0]; | ||
| 202 | } | ||
| 203 | item = btrfs_item_nr(leaf, slot); | ||
| 204 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 205 | if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) | ||
| 206 | goto next; | ||
| 207 | |||
| 208 | if (key.objectid < objectid) | ||
| 209 | goto next; | ||
| 210 | |||
| 211 | if (key.objectid > objectid) | ||
| 212 | break; | ||
| 213 | |||
| 214 | ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); | ||
| 215 | if (btrfs_disk_root_refs(leaf, ri) != 0) | ||
| 216 | goto next; | ||
| 217 | |||
| 218 | memcpy(&found_key, &key, sizeof(key)); | ||
| 219 | key.offset++; | ||
| 220 | btrfs_release_path(root, path); | ||
| 221 | dead_root = | ||
| 222 | btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | ||
| 223 | &found_key); | ||
| 224 | if (IS_ERR(dead_root)) { | ||
| 225 | ret = PTR_ERR(dead_root); | ||
| 226 | goto err; | ||
| 227 | } | ||
| 228 | |||
| 229 | if (objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
| 230 | ret = btrfs_add_dead_reloc_root(dead_root); | ||
| 231 | else | ||
| 232 | ret = btrfs_add_dead_root(dead_root, latest); | ||
| 233 | if (ret) | ||
| 234 | goto err; | ||
| 235 | goto again; | ||
| 236 | next: | ||
| 237 | slot++; | ||
| 238 | path->slots[0]++; | ||
| 239 | } | ||
| 240 | ret = 0; | ||
| 241 | err: | ||
| 242 | btrfs_free_path(path); | ||
| 243 | return ret; | ||
| 244 | } | ||
| 245 | |||
| 246 | /* drop the root item for 'key' from 'root' */ | ||
| 247 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 248 | struct btrfs_key *key) | ||
| 249 | { | ||
| 250 | struct btrfs_path *path; | ||
| 251 | int ret; | ||
| 252 | u32 refs; | ||
| 253 | struct btrfs_root_item *ri; | ||
| 254 | struct extent_buffer *leaf; | ||
| 255 | |||
| 256 | path = btrfs_alloc_path(); | ||
| 257 | BUG_ON(!path); | ||
| 258 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); | ||
| 259 | if (ret < 0) | ||
| 260 | goto out; | ||
| 261 | if (ret) { | ||
| 262 | btrfs_print_leaf(root, path->nodes[0]); | ||
| 263 | printk("failed to del %Lu %u %Lu\n", key->objectid, key->type, key->offset); | ||
| 264 | |||
| 265 | } | ||
| 266 | BUG_ON(ret != 0); | ||
| 267 | leaf = path->nodes[0]; | ||
| 268 | ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); | ||
| 269 | |||
| 270 | refs = btrfs_disk_root_refs(leaf, ri); | ||
| 271 | BUG_ON(refs != 0); | ||
| 272 | ret = btrfs_del_item(trans, root, path); | ||
| 273 | out: | ||
| 274 | btrfs_release_path(root, path); | ||
| 275 | btrfs_free_path(path); | ||
| 276 | return ret; | ||
| 277 | } | ||
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c new file mode 100644 index 00000000000..cdedbe144d4 --- /dev/null +++ b/fs/btrfs/struct-funcs.c | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/highmem.h> | ||
| 20 | |||
| 21 | /* this is some deeply nasty code. ctree.h has a different | ||
| 22 | * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef | ||
| 23 | * | ||
| 24 | * The end result is that anyone who #includes ctree.h gets a | ||
| 25 | * declaration for the btrfs_set_foo functions and btrfs_foo functions | ||
| 26 | * | ||
| 27 | * This file declares the macros and then #includes ctree.h, which results | ||
| 28 | * in cpp creating the function here based on the template below. | ||
| 29 | * | ||
| 30 | * These setget functions do all the extent_buffer related mapping | ||
| 31 | * required to efficiently read and write specific fields in the extent | ||
| 32 | * buffers. Every pointer to metadata items in btrfs is really just | ||
| 33 | * an unsigned long offset into the extent buffer which has been | ||
| 34 | * cast to a specific type. This gives us all the gcc type checking. | ||
| 35 | * | ||
| 36 | * The extent buffer api is used to do all the kmapping and page | ||
| 37 | * spanning work required to get extent buffers in highmem and have | ||
| 38 | * a metadata blocksize different from the page size. | ||
| 39 | */ | ||
| 40 | |||
| 41 | #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ | ||
| 42 | u##bits btrfs_##name(struct extent_buffer *eb, \ | ||
| 43 | type *s) \ | ||
| 44 | { \ | ||
| 45 | unsigned long part_offset = (unsigned long)s; \ | ||
| 46 | unsigned long offset = part_offset + offsetof(type, member); \ | ||
| 47 | type *p; \ | ||
| 48 | /* ugly, but we want the fast path here */ \ | ||
| 49 | if (eb->map_token && offset >= eb->map_start && \ | ||
| 50 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | ||
| 51 | eb->map_len) { \ | ||
| 52 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | ||
| 53 | return le##bits##_to_cpu(p->member); \ | ||
| 54 | } \ | ||
| 55 | { \ | ||
| 56 | int err; \ | ||
| 57 | char *map_token; \ | ||
| 58 | char *kaddr; \ | ||
| 59 | int unmap_on_exit = (eb->map_token == NULL); \ | ||
| 60 | unsigned long map_start; \ | ||
| 61 | unsigned long map_len; \ | ||
| 62 | __le##bits res; \ | ||
| 63 | err = map_extent_buffer(eb, offset, \ | ||
| 64 | sizeof(((type *)0)->member), \ | ||
| 65 | &map_token, &kaddr, \ | ||
| 66 | &map_start, &map_len, KM_USER1); \ | ||
| 67 | if (err) { \ | ||
| 68 | read_eb_member(eb, s, type, member, &res); \ | ||
| 69 | return le##bits##_to_cpu(res); \ | ||
| 70 | } \ | ||
| 71 | p = (type *)(kaddr + part_offset - map_start); \ | ||
| 72 | res = le##bits##_to_cpu(p->member); \ | ||
| 73 | if (unmap_on_exit) \ | ||
| 74 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
| 75 | return res; \ | ||
| 76 | } \ | ||
| 77 | } \ | ||
| 78 | void btrfs_set_##name(struct extent_buffer *eb, \ | ||
| 79 | type *s, u##bits val) \ | ||
| 80 | { \ | ||
| 81 | unsigned long part_offset = (unsigned long)s; \ | ||
| 82 | unsigned long offset = part_offset + offsetof(type, member); \ | ||
| 83 | type *p; \ | ||
| 84 | /* ugly, but we want the fast path here */ \ | ||
| 85 | if (eb->map_token && offset >= eb->map_start && \ | ||
| 86 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | ||
| 87 | eb->map_len) { \ | ||
| 88 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | ||
| 89 | p->member = cpu_to_le##bits(val); \ | ||
| 90 | return; \ | ||
| 91 | } \ | ||
| 92 | { \ | ||
| 93 | int err; \ | ||
| 94 | char *map_token; \ | ||
| 95 | char *kaddr; \ | ||
| 96 | int unmap_on_exit = (eb->map_token == NULL); \ | ||
| 97 | unsigned long map_start; \ | ||
| 98 | unsigned long map_len; \ | ||
| 99 | err = map_extent_buffer(eb, offset, \ | ||
| 100 | sizeof(((type *)0)->member), \ | ||
| 101 | &map_token, &kaddr, \ | ||
| 102 | &map_start, &map_len, KM_USER1); \ | ||
| 103 | if (err) { \ | ||
| 104 | val = cpu_to_le##bits(val); \ | ||
| 105 | write_eb_member(eb, s, type, member, &val); \ | ||
| 106 | return; \ | ||
| 107 | } \ | ||
| 108 | p = (type *)(kaddr + part_offset - map_start); \ | ||
| 109 | p->member = cpu_to_le##bits(val); \ | ||
| 110 | if (unmap_on_exit) \ | ||
| 111 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
| 112 | } \ | ||
| 113 | } | ||
| 114 | |||
| 115 | #include "ctree.h" | ||
| 116 | |||
| 117 | void btrfs_node_key(struct extent_buffer *eb, | ||
| 118 | struct btrfs_disk_key *disk_key, int nr) | ||
| 119 | { | ||
| 120 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); | ||
| 121 | if (eb->map_token && ptr >= eb->map_start && | ||
| 122 | ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) { | ||
| 123 | memcpy(disk_key, eb->kaddr + ptr - eb->map_start, | ||
| 124 | sizeof(*disk_key)); | ||
| 125 | return; | ||
| 126 | } else if (eb->map_token) { | ||
| 127 | unmap_extent_buffer(eb, eb->map_token, KM_USER1); | ||
| 128 | eb->map_token = NULL; | ||
| 129 | } | ||
| 130 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, | ||
| 131 | struct btrfs_key_ptr, key, disk_key); | ||
| 132 | } | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c new file mode 100644 index 00000000000..2e6039825b7 --- /dev/null +++ b/fs/btrfs/super.c | |||
| @@ -0,0 +1,659 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/blkdev.h> | ||
| 20 | #include <linux/module.h> | ||
| 21 | #include <linux/buffer_head.h> | ||
| 22 | #include <linux/fs.h> | ||
| 23 | #include <linux/pagemap.h> | ||
| 24 | #include <linux/highmem.h> | ||
| 25 | #include <linux/time.h> | ||
| 26 | #include <linux/init.h> | ||
| 27 | #include <linux/string.h> | ||
| 28 | #include <linux/smp_lock.h> | ||
| 29 | #include <linux/backing-dev.h> | ||
| 30 | #include <linux/mount.h> | ||
| 31 | #include <linux/mpage.h> | ||
| 32 | #include <linux/swap.h> | ||
| 33 | #include <linux/writeback.h> | ||
| 34 | #include <linux/statfs.h> | ||
| 35 | #include <linux/compat.h> | ||
| 36 | #include <linux/parser.h> | ||
| 37 | #include <linux/ctype.h> | ||
| 38 | #include <linux/namei.h> | ||
| 39 | #include <linux/miscdevice.h> | ||
| 40 | #include "ctree.h" | ||
| 41 | #include "disk-io.h" | ||
| 42 | #include "transaction.h" | ||
| 43 | #include "btrfs_inode.h" | ||
| 44 | #include "ioctl.h" | ||
| 45 | #include "print-tree.h" | ||
| 46 | #include "xattr.h" | ||
| 47 | #include "volumes.h" | ||
| 48 | #include "version.h" | ||
| 49 | #include "export.h" | ||
| 50 | |||
| 51 | #define BTRFS_SUPER_MAGIC 0x9123683E | ||
| 52 | |||
| 53 | static struct super_operations btrfs_super_ops; | ||
| 54 | |||
| 55 | static void btrfs_put_super (struct super_block * sb) | ||
| 56 | { | ||
| 57 | struct btrfs_root *root = btrfs_sb(sb); | ||
| 58 | struct btrfs_fs_info *fs = root->fs_info; | ||
| 59 | int ret; | ||
| 60 | |||
| 61 | ret = close_ctree(root); | ||
| 62 | if (ret) { | ||
| 63 | printk("close ctree returns %d\n", ret); | ||
| 64 | } | ||
| 65 | btrfs_sysfs_del_super(fs); | ||
| 66 | sb->s_fs_info = NULL; | ||
| 67 | } | ||
| 68 | |||
| 69 | enum { | ||
| 70 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | ||
| 71 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | ||
| 72 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_err, | ||
| 73 | }; | ||
| 74 | |||
| 75 | static match_table_t tokens = { | ||
| 76 | {Opt_degraded, "degraded"}, | ||
| 77 | {Opt_subvol, "subvol=%s"}, | ||
| 78 | {Opt_device, "device=%s"}, | ||
| 79 | {Opt_nodatasum, "nodatasum"}, | ||
| 80 | {Opt_nodatacow, "nodatacow"}, | ||
| 81 | {Opt_nobarrier, "nobarrier"}, | ||
| 82 | {Opt_max_extent, "max_extent=%s"}, | ||
| 83 | {Opt_max_inline, "max_inline=%s"}, | ||
| 84 | {Opt_alloc_start, "alloc_start=%s"}, | ||
| 85 | {Opt_thread_pool, "thread_pool=%d"}, | ||
| 86 | {Opt_ssd, "ssd"}, | ||
| 87 | {Opt_noacl, "noacl"}, | ||
| 88 | {Opt_err, NULL}, | ||
| 89 | }; | ||
| 90 | |||
| 91 | u64 btrfs_parse_size(char *str) | ||
| 92 | { | ||
| 93 | u64 res; | ||
| 94 | int mult = 1; | ||
| 95 | char *end; | ||
| 96 | char last; | ||
| 97 | |||
| 98 | res = simple_strtoul(str, &end, 10); | ||
| 99 | |||
| 100 | last = end[0]; | ||
| 101 | if (isalpha(last)) { | ||
| 102 | last = tolower(last); | ||
| 103 | switch (last) { | ||
| 104 | case 'g': | ||
| 105 | mult *= 1024; | ||
| 106 | case 'm': | ||
| 107 | mult *= 1024; | ||
| 108 | case 'k': | ||
| 109 | mult *= 1024; | ||
| 110 | } | ||
| 111 | res = res * mult; | ||
| 112 | } | ||
| 113 | return res; | ||
| 114 | } | ||
| 115 | |||
| 116 | /* | ||
| 117 | * Regular mount options parser. Everything that is needed only when | ||
| 118 | * reading in a new superblock is parsed here. | ||
| 119 | */ | ||
| 120 | int btrfs_parse_options(struct btrfs_root *root, char *options) | ||
| 121 | { | ||
| 122 | struct btrfs_fs_info *info = root->fs_info; | ||
| 123 | substring_t args[MAX_OPT_ARGS]; | ||
| 124 | char *p, *num; | ||
| 125 | int intarg; | ||
| 126 | |||
| 127 | if (!options) | ||
| 128 | return 0; | ||
| 129 | |||
| 130 | /* | ||
| 131 | * strsep changes the string, duplicate it because parse_options | ||
| 132 | * gets called twice | ||
| 133 | */ | ||
| 134 | options = kstrdup(options, GFP_NOFS); | ||
| 135 | if (!options) | ||
| 136 | return -ENOMEM; | ||
| 137 | |||
| 138 | |||
| 139 | while ((p = strsep(&options, ",")) != NULL) { | ||
| 140 | int token; | ||
| 141 | if (!*p) | ||
| 142 | continue; | ||
| 143 | |||
| 144 | token = match_token(p, tokens, args); | ||
| 145 | switch (token) { | ||
| 146 | case Opt_degraded: | ||
| 147 | printk(KERN_INFO "btrfs: allowing degraded mounts\n"); | ||
| 148 | btrfs_set_opt(info->mount_opt, DEGRADED); | ||
| 149 | break; | ||
| 150 | case Opt_subvol: | ||
| 151 | case Opt_device: | ||
| 152 | /* | ||
| 153 | * These are parsed by btrfs_parse_early_options | ||
| 154 | * and can be happily ignored here. | ||
| 155 | */ | ||
| 156 | break; | ||
| 157 | case Opt_nodatasum: | ||
| 158 | printk(KERN_INFO "btrfs: setting nodatacsum\n"); | ||
| 159 | btrfs_set_opt(info->mount_opt, NODATASUM); | ||
| 160 | break; | ||
| 161 | case Opt_nodatacow: | ||
| 162 | printk(KERN_INFO "btrfs: setting nodatacow\n"); | ||
| 163 | btrfs_set_opt(info->mount_opt, NODATACOW); | ||
| 164 | btrfs_set_opt(info->mount_opt, NODATASUM); | ||
| 165 | break; | ||
| 166 | case Opt_ssd: | ||
| 167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | ||
| 168 | btrfs_set_opt(info->mount_opt, SSD); | ||
| 169 | break; | ||
| 170 | case Opt_nobarrier: | ||
| 171 | printk(KERN_INFO "btrfs: turning off barriers\n"); | ||
| 172 | btrfs_set_opt(info->mount_opt, NOBARRIER); | ||
| 173 | break; | ||
| 174 | case Opt_thread_pool: | ||
| 175 | intarg = 0; | ||
| 176 | match_int(&args[0], &intarg); | ||
| 177 | if (intarg) { | ||
| 178 | info->thread_pool_size = intarg; | ||
| 179 | printk(KERN_INFO "btrfs: thread pool %d\n", | ||
| 180 | info->thread_pool_size); | ||
| 181 | } | ||
| 182 | break; | ||
| 183 | case Opt_max_extent: | ||
| 184 | num = match_strdup(&args[0]); | ||
| 185 | if (num) { | ||
| 186 | info->max_extent = btrfs_parse_size(num); | ||
| 187 | kfree(num); | ||
| 188 | |||
| 189 | info->max_extent = max_t(u64, | ||
| 190 | info->max_extent, root->sectorsize); | ||
| 191 | printk(KERN_INFO "btrfs: max_extent at %llu\n", | ||
| 192 | info->max_extent); | ||
| 193 | } | ||
| 194 | break; | ||
| 195 | case Opt_max_inline: | ||
| 196 | num = match_strdup(&args[0]); | ||
| 197 | if (num) { | ||
| 198 | info->max_inline = btrfs_parse_size(num); | ||
| 199 | kfree(num); | ||
| 200 | |||
| 201 | if (info->max_inline) { | ||
| 202 | info->max_inline = max_t(u64, | ||
| 203 | info->max_inline, | ||
| 204 | root->sectorsize); | ||
| 205 | } | ||
| 206 | printk(KERN_INFO "btrfs: max_inline at %llu\n", | ||
| 207 | info->max_inline); | ||
| 208 | } | ||
| 209 | break; | ||
| 210 | case Opt_alloc_start: | ||
| 211 | num = match_strdup(&args[0]); | ||
| 212 | if (num) { | ||
| 213 | info->alloc_start = btrfs_parse_size(num); | ||
| 214 | kfree(num); | ||
| 215 | printk(KERN_INFO | ||
| 216 | "btrfs: allocations start at %llu\n", | ||
| 217 | info->alloc_start); | ||
| 218 | } | ||
| 219 | break; | ||
| 220 | case Opt_noacl: | ||
| 221 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; | ||
| 222 | break; | ||
| 223 | default: | ||
| 224 | break; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | kfree(options); | ||
| 228 | return 0; | ||
| 229 | } | ||
| 230 | |||
| 231 | /* | ||
| 232 | * Parse mount options that are required early in the mount process. | ||
| 233 | * | ||
| 234 | * All other options will be parsed on much later in the mount process and | ||
| 235 | * only when we need to allocate a new super block. | ||
| 236 | */ | ||
| 237 | static int btrfs_parse_early_options(const char *options, int flags, | ||
| 238 | void *holder, char **subvol_name, | ||
| 239 | struct btrfs_fs_devices **fs_devices) | ||
| 240 | { | ||
| 241 | substring_t args[MAX_OPT_ARGS]; | ||
| 242 | char *opts, *p; | ||
| 243 | int error = 0; | ||
| 244 | |||
| 245 | if (!options) | ||
| 246 | goto out; | ||
| 247 | |||
| 248 | /* | ||
| 249 | * strsep changes the string, duplicate it because parse_options | ||
| 250 | * gets called twice | ||
| 251 | */ | ||
| 252 | opts = kstrdup(options, GFP_KERNEL); | ||
| 253 | if (!opts) | ||
| 254 | return -ENOMEM; | ||
| 255 | |||
| 256 | while ((p = strsep(&opts, ",")) != NULL) { | ||
| 257 | int token; | ||
| 258 | if (!*p) | ||
| 259 | continue; | ||
| 260 | |||
| 261 | token = match_token(p, tokens, args); | ||
| 262 | switch (token) { | ||
| 263 | case Opt_subvol: | ||
| 264 | *subvol_name = match_strdup(&args[0]); | ||
| 265 | break; | ||
| 266 | case Opt_device: | ||
| 267 | error = btrfs_scan_one_device(match_strdup(&args[0]), | ||
| 268 | flags, holder, fs_devices); | ||
| 269 | if (error) | ||
| 270 | goto out_free_opts; | ||
| 271 | break; | ||
| 272 | default: | ||
| 273 | break; | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | out_free_opts: | ||
| 278 | kfree(opts); | ||
| 279 | out: | ||
| 280 | /* | ||
| 281 | * If no subvolume name is specified we use the default one. Allocate | ||
| 282 | * a copy of the string "default" here so that code later in the | ||
| 283 | * mount path doesn't care if it's the default volume or another one. | ||
| 284 | */ | ||
| 285 | if (!*subvol_name) { | ||
| 286 | *subvol_name = kstrdup("default", GFP_KERNEL); | ||
| 287 | if (!*subvol_name) | ||
| 288 | return -ENOMEM; | ||
| 289 | } | ||
| 290 | return error; | ||
| 291 | } | ||
| 292 | |||
| 293 | static int btrfs_fill_super(struct super_block * sb, | ||
| 294 | struct btrfs_fs_devices *fs_devices, | ||
| 295 | void * data, int silent) | ||
| 296 | { | ||
| 297 | struct inode * inode; | ||
| 298 | struct dentry * root_dentry; | ||
| 299 | struct btrfs_super_block *disk_super; | ||
| 300 | struct btrfs_root *tree_root; | ||
| 301 | struct btrfs_inode *bi; | ||
| 302 | int err; | ||
| 303 | |||
| 304 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
| 305 | sb->s_magic = BTRFS_SUPER_MAGIC; | ||
| 306 | sb->s_op = &btrfs_super_ops; | ||
| 307 | sb->s_export_op = &btrfs_export_ops; | ||
| 308 | sb->s_xattr = btrfs_xattr_handlers; | ||
| 309 | sb->s_time_gran = 1; | ||
| 310 | sb->s_flags |= MS_POSIXACL; | ||
| 311 | |||
| 312 | tree_root = open_ctree(sb, fs_devices, (char *)data); | ||
| 313 | |||
| 314 | if (IS_ERR(tree_root)) { | ||
| 315 | printk("btrfs: open_ctree failed\n"); | ||
| 316 | return PTR_ERR(tree_root); | ||
| 317 | } | ||
| 318 | sb->s_fs_info = tree_root; | ||
| 319 | disk_super = &tree_root->fs_info->super_copy; | ||
| 320 | inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), | ||
| 321 | tree_root); | ||
| 322 | bi = BTRFS_I(inode); | ||
| 323 | bi->location.objectid = inode->i_ino; | ||
| 324 | bi->location.offset = 0; | ||
| 325 | bi->root = tree_root; | ||
| 326 | |||
| 327 | btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); | ||
| 328 | |||
| 329 | if (!inode) { | ||
| 330 | err = -ENOMEM; | ||
| 331 | goto fail_close; | ||
| 332 | } | ||
| 333 | if (inode->i_state & I_NEW) { | ||
| 334 | btrfs_read_locked_inode(inode); | ||
| 335 | unlock_new_inode(inode); | ||
| 336 | } | ||
| 337 | |||
| 338 | root_dentry = d_alloc_root(inode); | ||
| 339 | if (!root_dentry) { | ||
| 340 | iput(inode); | ||
| 341 | err = -ENOMEM; | ||
| 342 | goto fail_close; | ||
| 343 | } | ||
| 344 | |||
| 345 | /* this does the super kobj at the same time */ | ||
| 346 | err = btrfs_sysfs_add_super(tree_root->fs_info); | ||
| 347 | if (err) | ||
| 348 | goto fail_close; | ||
| 349 | |||
| 350 | sb->s_root = root_dentry; | ||
| 351 | |||
| 352 | save_mount_options(sb, data); | ||
| 353 | return 0; | ||
| 354 | |||
| 355 | fail_close: | ||
| 356 | close_ctree(tree_root); | ||
| 357 | return err; | ||
| 358 | } | ||
| 359 | |||
| 360 | int btrfs_sync_fs(struct super_block *sb, int wait) | ||
| 361 | { | ||
| 362 | struct btrfs_trans_handle *trans; | ||
| 363 | struct btrfs_root *root; | ||
| 364 | int ret; | ||
| 365 | root = btrfs_sb(sb); | ||
| 366 | |||
| 367 | sb->s_dirt = 0; | ||
| 368 | if (!wait) { | ||
| 369 | filemap_flush(root->fs_info->btree_inode->i_mapping); | ||
| 370 | return 0; | ||
| 371 | } | ||
| 372 | btrfs_clean_old_snapshots(root); | ||
| 373 | trans = btrfs_start_transaction(root, 1); | ||
| 374 | ret = btrfs_commit_transaction(trans, root); | ||
| 375 | sb->s_dirt = 0; | ||
| 376 | return ret; | ||
| 377 | } | ||
| 378 | |||
| 379 | static void btrfs_write_super(struct super_block *sb) | ||
| 380 | { | ||
| 381 | sb->s_dirt = 0; | ||
| 382 | } | ||
| 383 | |||
| 384 | static int btrfs_test_super(struct super_block *s, void *data) | ||
| 385 | { | ||
| 386 | struct btrfs_fs_devices *test_fs_devices = data; | ||
| 387 | struct btrfs_root *root = btrfs_sb(s); | ||
| 388 | |||
| 389 | return root->fs_info->fs_devices == test_fs_devices; | ||
| 390 | } | ||
| 391 | |||
| 392 | /* | ||
| 393 | * Find a superblock for the given device / mount point. | ||
| 394 | * | ||
| 395 | * Note: This is based on get_sb_bdev from fs/super.c with a few additions | ||
| 396 | * for multiple device setup. Make sure to keep it in sync. | ||
| 397 | */ | ||
| 398 | static int btrfs_get_sb(struct file_system_type *fs_type, int flags, | ||
| 399 | const char *dev_name, void *data, struct vfsmount *mnt) | ||
| 400 | { | ||
| 401 | char *subvol_name = NULL; | ||
| 402 | struct block_device *bdev = NULL; | ||
| 403 | struct super_block *s; | ||
| 404 | struct dentry *root; | ||
| 405 | struct btrfs_fs_devices *fs_devices = NULL; | ||
| 406 | int error = 0; | ||
| 407 | |||
| 408 | error = btrfs_parse_early_options(data, flags, fs_type, | ||
| 409 | &subvol_name, &fs_devices); | ||
| 410 | if (error) | ||
| 411 | goto error; | ||
| 412 | |||
| 413 | error = btrfs_scan_one_device(dev_name, flags, fs_type, &fs_devices); | ||
| 414 | if (error) | ||
| 415 | goto error_free_subvol_name; | ||
| 416 | |||
| 417 | error = btrfs_open_devices(fs_devices, flags, fs_type); | ||
| 418 | if (error) | ||
| 419 | goto error_free_subvol_name; | ||
| 420 | |||
| 421 | bdev = fs_devices->latest_bdev; | ||
| 422 | s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); | ||
| 423 | if (IS_ERR(s)) | ||
| 424 | goto error_s; | ||
| 425 | |||
| 426 | if (s->s_root) { | ||
| 427 | if ((flags ^ s->s_flags) & MS_RDONLY) { | ||
| 428 | up_write(&s->s_umount); | ||
| 429 | deactivate_super(s); | ||
| 430 | error = -EBUSY; | ||
| 431 | goto error_bdev; | ||
| 432 | } | ||
| 433 | |||
| 434 | } else { | ||
| 435 | char b[BDEVNAME_SIZE]; | ||
| 436 | |||
| 437 | s->s_flags = flags; | ||
| 438 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | ||
| 439 | error = btrfs_fill_super(s, fs_devices, data, | ||
| 440 | flags & MS_SILENT ? 1 : 0); | ||
| 441 | if (error) { | ||
| 442 | up_write(&s->s_umount); | ||
| 443 | deactivate_super(s); | ||
| 444 | goto error; | ||
| 445 | } | ||
| 446 | |||
| 447 | btrfs_sb(s)->fs_info->bdev_holder = fs_type; | ||
| 448 | s->s_flags |= MS_ACTIVE; | ||
| 449 | } | ||
| 450 | |||
| 451 | if (!strcmp(subvol_name, ".")) | ||
| 452 | root = dget(s->s_root); | ||
| 453 | else { | ||
| 454 | mutex_lock(&s->s_root->d_inode->i_mutex); | ||
| 455 | root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); | ||
| 456 | mutex_unlock(&s->s_root->d_inode->i_mutex); | ||
| 457 | if (IS_ERR(root)) { | ||
| 458 | up_write(&s->s_umount); | ||
| 459 | deactivate_super(s); | ||
| 460 | error = PTR_ERR(root); | ||
| 461 | goto error; | ||
| 462 | } | ||
| 463 | if (!root->d_inode) { | ||
| 464 | dput(root); | ||
| 465 | up_write(&s->s_umount); | ||
| 466 | deactivate_super(s); | ||
| 467 | error = -ENXIO; | ||
| 468 | goto error; | ||
| 469 | } | ||
| 470 | } | ||
| 471 | |||
| 472 | mnt->mnt_sb = s; | ||
| 473 | mnt->mnt_root = root; | ||
| 474 | |||
| 475 | kfree(subvol_name); | ||
| 476 | return 0; | ||
| 477 | |||
| 478 | error_s: | ||
| 479 | error = PTR_ERR(s); | ||
| 480 | error_bdev: | ||
| 481 | btrfs_close_devices(fs_devices); | ||
| 482 | error_free_subvol_name: | ||
| 483 | kfree(subvol_name); | ||
| 484 | error: | ||
| 485 | return error; | ||
| 486 | } | ||
| 487 | |||
| 488 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
| 489 | { | ||
| 490 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | ||
| 491 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
| 492 | int bits = dentry->d_sb->s_blocksize_bits; | ||
| 493 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | ||
| 494 | |||
| 495 | buf->f_namelen = BTRFS_NAME_LEN; | ||
| 496 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | ||
| 497 | buf->f_bfree = buf->f_blocks - | ||
| 498 | (btrfs_super_bytes_used(disk_super) >> bits); | ||
| 499 | buf->f_bavail = buf->f_bfree; | ||
| 500 | buf->f_bsize = dentry->d_sb->s_blocksize; | ||
| 501 | buf->f_type = BTRFS_SUPER_MAGIC; | ||
| 502 | /* We treat it as constant endianness (it doesn't matter _which_) | ||
| 503 | because we want the fsid to come out the same whether mounted | ||
| 504 | on a big-endian or little-endian host */ | ||
| 505 | buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); | ||
| 506 | buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); | ||
| 507 | /* Mask in the root object ID too, to disambiguate subvols */ | ||
| 508 | buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32; | ||
| 509 | buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid; | ||
| 510 | |||
| 511 | return 0; | ||
| 512 | } | ||
| 513 | |||
| 514 | static struct file_system_type btrfs_fs_type = { | ||
| 515 | .owner = THIS_MODULE, | ||
| 516 | .name = "btrfs", | ||
| 517 | .get_sb = btrfs_get_sb, | ||
| 518 | .kill_sb = kill_anon_super, | ||
| 519 | .fs_flags = FS_REQUIRES_DEV, | ||
| 520 | }; | ||
| 521 | |||
| 522 | /* | ||
| 523 | * used by btrfsctl to scan devices when no FS is mounted | ||
| 524 | */ | ||
| 525 | static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | ||
| 526 | unsigned long arg) | ||
| 527 | { | ||
| 528 | struct btrfs_ioctl_vol_args *vol; | ||
| 529 | struct btrfs_fs_devices *fs_devices; | ||
| 530 | int ret = 0; | ||
| 531 | int len; | ||
| 532 | |||
| 533 | vol = kmalloc(sizeof(*vol), GFP_KERNEL); | ||
| 534 | if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { | ||
| 535 | ret = -EFAULT; | ||
| 536 | goto out; | ||
| 537 | } | ||
| 538 | len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); | ||
| 539 | switch (cmd) { | ||
| 540 | case BTRFS_IOC_SCAN_DEV: | ||
| 541 | ret = btrfs_scan_one_device(vol->name, MS_RDONLY, | ||
| 542 | &btrfs_fs_type, &fs_devices); | ||
| 543 | break; | ||
| 544 | } | ||
| 545 | out: | ||
| 546 | kfree(vol); | ||
| 547 | return ret; | ||
| 548 | } | ||
| 549 | |||
| 550 | static void btrfs_write_super_lockfs(struct super_block *sb) | ||
| 551 | { | ||
| 552 | struct btrfs_root *root = btrfs_sb(sb); | ||
| 553 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
| 554 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
| 555 | } | ||
| 556 | |||
| 557 | static void btrfs_unlockfs(struct super_block *sb) | ||
| 558 | { | ||
| 559 | struct btrfs_root *root = btrfs_sb(sb); | ||
| 560 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 561 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
| 562 | } | ||
| 563 | |||
| 564 | static struct super_operations btrfs_super_ops = { | ||
| 565 | .delete_inode = btrfs_delete_inode, | ||
| 566 | .put_super = btrfs_put_super, | ||
| 567 | .write_super = btrfs_write_super, | ||
| 568 | .sync_fs = btrfs_sync_fs, | ||
| 569 | .show_options = generic_show_options, | ||
| 570 | .write_inode = btrfs_write_inode, | ||
| 571 | .dirty_inode = btrfs_dirty_inode, | ||
| 572 | .alloc_inode = btrfs_alloc_inode, | ||
| 573 | .destroy_inode = btrfs_destroy_inode, | ||
| 574 | .statfs = btrfs_statfs, | ||
| 575 | .write_super_lockfs = btrfs_write_super_lockfs, | ||
| 576 | .unlockfs = btrfs_unlockfs, | ||
| 577 | }; | ||
| 578 | |||
| 579 | static const struct file_operations btrfs_ctl_fops = { | ||
| 580 | .unlocked_ioctl = btrfs_control_ioctl, | ||
| 581 | .compat_ioctl = btrfs_control_ioctl, | ||
| 582 | .owner = THIS_MODULE, | ||
| 583 | }; | ||
| 584 | |||
| 585 | static struct miscdevice btrfs_misc = { | ||
| 586 | .minor = MISC_DYNAMIC_MINOR, | ||
| 587 | .name = "btrfs-control", | ||
| 588 | .fops = &btrfs_ctl_fops | ||
| 589 | }; | ||
| 590 | |||
| 591 | static int btrfs_interface_init(void) | ||
| 592 | { | ||
| 593 | return misc_register(&btrfs_misc); | ||
| 594 | } | ||
| 595 | |||
| 596 | void btrfs_interface_exit(void) | ||
| 597 | { | ||
| 598 | if (misc_deregister(&btrfs_misc) < 0) | ||
| 599 | printk("misc_deregister failed for control device"); | ||
| 600 | } | ||
| 601 | |||
| 602 | static int __init init_btrfs_fs(void) | ||
| 603 | { | ||
| 604 | int err; | ||
| 605 | |||
| 606 | err = btrfs_init_sysfs(); | ||
| 607 | if (err) | ||
| 608 | return err; | ||
| 609 | |||
| 610 | err = btrfs_init_cachep(); | ||
| 611 | if (err) | ||
| 612 | goto free_sysfs; | ||
| 613 | |||
| 614 | err = extent_io_init(); | ||
| 615 | if (err) | ||
| 616 | goto free_cachep; | ||
| 617 | |||
| 618 | err = extent_map_init(); | ||
| 619 | if (err) | ||
| 620 | goto free_extent_io; | ||
| 621 | |||
| 622 | err = btrfs_interface_init(); | ||
| 623 | if (err) | ||
| 624 | goto free_extent_map; | ||
| 625 | err = register_filesystem(&btrfs_fs_type); | ||
| 626 | if (err) | ||
| 627 | goto unregister_ioctl; | ||
| 628 | |||
| 629 | printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); | ||
| 630 | return 0; | ||
| 631 | |||
| 632 | unregister_ioctl: | ||
| 633 | btrfs_interface_exit(); | ||
| 634 | free_extent_map: | ||
| 635 | extent_map_exit(); | ||
| 636 | free_extent_io: | ||
| 637 | extent_io_exit(); | ||
| 638 | free_cachep: | ||
| 639 | btrfs_destroy_cachep(); | ||
| 640 | free_sysfs: | ||
| 641 | btrfs_exit_sysfs(); | ||
| 642 | return err; | ||
| 643 | } | ||
| 644 | |||
| 645 | static void __exit exit_btrfs_fs(void) | ||
| 646 | { | ||
| 647 | btrfs_destroy_cachep(); | ||
| 648 | extent_map_exit(); | ||
| 649 | extent_io_exit(); | ||
| 650 | btrfs_interface_exit(); | ||
| 651 | unregister_filesystem(&btrfs_fs_type); | ||
| 652 | btrfs_exit_sysfs(); | ||
| 653 | btrfs_cleanup_fs_uuids(); | ||
| 654 | } | ||
| 655 | |||
| 656 | module_init(init_btrfs_fs) | ||
| 657 | module_exit(exit_btrfs_fs) | ||
| 658 | |||
| 659 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c new file mode 100644 index 00000000000..300076e6676 --- /dev/null +++ b/fs/btrfs/sysfs.c | |||
| @@ -0,0 +1,268 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/sched.h> | ||
| 20 | #include <linux/slab.h> | ||
| 21 | #include <linux/spinlock.h> | ||
| 22 | #include <linux/completion.h> | ||
| 23 | #include <linux/buffer_head.h> | ||
| 24 | #include <linux/module.h> | ||
| 25 | #include <linux/kobject.h> | ||
| 26 | |||
| 27 | #include "ctree.h" | ||
| 28 | #include "disk-io.h" | ||
| 29 | #include "transaction.h" | ||
| 30 | |||
| 31 | static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) | ||
| 32 | { | ||
| 33 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
| 34 | (unsigned long long)btrfs_root_used(&root->root_item)); | ||
| 35 | } | ||
| 36 | |||
| 37 | static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) | ||
| 38 | { | ||
| 39 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
| 40 | (unsigned long long)btrfs_root_limit(&root->root_item)); | ||
| 41 | } | ||
| 42 | |||
| 43 | static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) | ||
| 44 | { | ||
| 45 | |||
| 46 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
| 47 | (unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); | ||
| 48 | } | ||
| 49 | |||
| 50 | static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) | ||
| 51 | { | ||
| 52 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
| 53 | (unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); | ||
| 54 | } | ||
| 55 | |||
| 56 | static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) | ||
| 57 | { | ||
| 58 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
| 59 | (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); | ||
| 60 | } | ||
| 61 | |||
| 62 | /* this is for root attrs (subvols/snapshots) */ | ||
| 63 | struct btrfs_root_attr { | ||
| 64 | struct attribute attr; | ||
| 65 | ssize_t (*show)(struct btrfs_root *, char *); | ||
| 66 | ssize_t (*store)(struct btrfs_root *, const char *, size_t); | ||
| 67 | }; | ||
| 68 | |||
| 69 | #define ROOT_ATTR(name, mode, show, store) \ | ||
| 70 | static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, show, store) | ||
| 71 | |||
| 72 | ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); | ||
| 73 | ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); | ||
| 74 | |||
| 75 | static struct attribute *btrfs_root_attrs[] = { | ||
| 76 | &btrfs_root_attr_blocks_used.attr, | ||
| 77 | &btrfs_root_attr_block_limit.attr, | ||
| 78 | NULL, | ||
| 79 | }; | ||
| 80 | |||
| 81 | /* this is for super attrs (actual full fs) */ | ||
| 82 | struct btrfs_super_attr { | ||
| 83 | struct attribute attr; | ||
| 84 | ssize_t (*show)(struct btrfs_fs_info *, char *); | ||
| 85 | ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); | ||
| 86 | }; | ||
| 87 | |||
| 88 | #define SUPER_ATTR(name, mode, show, store) \ | ||
| 89 | static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, show, store) | ||
| 90 | |||
| 91 | SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); | ||
| 92 | SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); | ||
| 93 | SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL); | ||
| 94 | |||
| 95 | static struct attribute *btrfs_super_attrs[] = { | ||
| 96 | &btrfs_super_attr_blocks_used.attr, | ||
| 97 | &btrfs_super_attr_total_blocks.attr, | ||
| 98 | &btrfs_super_attr_blocksize.attr, | ||
| 99 | NULL, | ||
| 100 | }; | ||
| 101 | |||
| 102 | static ssize_t btrfs_super_attr_show(struct kobject *kobj, | ||
| 103 | struct attribute *attr, char *buf) | ||
| 104 | { | ||
| 105 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
| 106 | super_kobj); | ||
| 107 | struct btrfs_super_attr *a = container_of(attr, | ||
| 108 | struct btrfs_super_attr, | ||
| 109 | attr); | ||
| 110 | |||
| 111 | return a->show ? a->show(fs, buf) : 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | static ssize_t btrfs_super_attr_store(struct kobject *kobj, | ||
| 115 | struct attribute *attr, | ||
| 116 | const char *buf, size_t len) | ||
| 117 | { | ||
| 118 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
| 119 | super_kobj); | ||
| 120 | struct btrfs_super_attr *a = container_of(attr, | ||
| 121 | struct btrfs_super_attr, | ||
| 122 | attr); | ||
| 123 | |||
| 124 | return a->store ? a->store(fs, buf, len) : 0; | ||
| 125 | } | ||
| 126 | |||
| 127 | static ssize_t btrfs_root_attr_show(struct kobject *kobj, | ||
| 128 | struct attribute *attr, char *buf) | ||
| 129 | { | ||
| 130 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
| 131 | root_kobj); | ||
| 132 | struct btrfs_root_attr *a = container_of(attr, | ||
| 133 | struct btrfs_root_attr, | ||
| 134 | attr); | ||
| 135 | |||
| 136 | return a->show ? a->show(root, buf) : 0; | ||
| 137 | } | ||
| 138 | |||
| 139 | static ssize_t btrfs_root_attr_store(struct kobject *kobj, | ||
| 140 | struct attribute *attr, | ||
| 141 | const char *buf, size_t len) | ||
| 142 | { | ||
| 143 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
| 144 | root_kobj); | ||
| 145 | struct btrfs_root_attr *a = container_of(attr, | ||
| 146 | struct btrfs_root_attr, | ||
| 147 | attr); | ||
| 148 | return a->store ? a->store(root, buf, len) : 0; | ||
| 149 | } | ||
| 150 | |||
| 151 | static void btrfs_super_release(struct kobject *kobj) | ||
| 152 | { | ||
| 153 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
| 154 | super_kobj); | ||
| 155 | complete(&fs->kobj_unregister); | ||
| 156 | } | ||
| 157 | |||
| 158 | static void btrfs_root_release(struct kobject *kobj) | ||
| 159 | { | ||
| 160 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
| 161 | root_kobj); | ||
| 162 | complete(&root->kobj_unregister); | ||
| 163 | } | ||
| 164 | |||
| 165 | static struct sysfs_ops btrfs_super_attr_ops = { | ||
| 166 | .show = btrfs_super_attr_show, | ||
| 167 | .store = btrfs_super_attr_store, | ||
| 168 | }; | ||
| 169 | |||
| 170 | static struct sysfs_ops btrfs_root_attr_ops = { | ||
| 171 | .show = btrfs_root_attr_show, | ||
| 172 | .store = btrfs_root_attr_store, | ||
| 173 | }; | ||
| 174 | |||
| 175 | static struct kobj_type btrfs_root_ktype = { | ||
| 176 | .default_attrs = btrfs_root_attrs, | ||
| 177 | .sysfs_ops = &btrfs_root_attr_ops, | ||
| 178 | .release = btrfs_root_release, | ||
| 179 | }; | ||
| 180 | |||
| 181 | static struct kobj_type btrfs_super_ktype = { | ||
| 182 | .default_attrs = btrfs_super_attrs, | ||
| 183 | .sysfs_ops = &btrfs_super_attr_ops, | ||
| 184 | .release = btrfs_super_release, | ||
| 185 | }; | ||
| 186 | |||
| 187 | /* /sys/fs/btrfs/ entry */ | ||
| 188 | static struct kset *btrfs_kset; | ||
| 189 | |||
| 190 | int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) | ||
| 191 | { | ||
| 192 | int error; | ||
| 193 | char *name; | ||
| 194 | char c; | ||
| 195 | int len = strlen(fs->sb->s_id) + 1; | ||
| 196 | int i; | ||
| 197 | |||
| 198 | name = kmalloc(len, GFP_NOFS); | ||
| 199 | if (!name) { | ||
| 200 | error = -ENOMEM; | ||
| 201 | goto fail; | ||
| 202 | } | ||
| 203 | |||
| 204 | for (i = 0; i < len; i++) { | ||
| 205 | c = fs->sb->s_id[i]; | ||
| 206 | if (c == '/' || c == '\\') | ||
| 207 | c = '!'; | ||
| 208 | name[i] = c; | ||
| 209 | } | ||
| 210 | name[len] = '\0'; | ||
| 211 | |||
| 212 | fs->super_kobj.kset = btrfs_kset; | ||
| 213 | error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype, | ||
| 214 | NULL, "%s", name); | ||
| 215 | if (error) | ||
| 216 | goto fail; | ||
| 217 | |||
| 218 | kfree(name); | ||
| 219 | return 0; | ||
| 220 | |||
| 221 | fail: | ||
| 222 | kfree(name); | ||
| 223 | printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); | ||
| 224 | return error; | ||
| 225 | } | ||
| 226 | |||
| 227 | int btrfs_sysfs_add_root(struct btrfs_root *root) | ||
| 228 | { | ||
| 229 | int error; | ||
| 230 | |||
| 231 | error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype, | ||
| 232 | &root->fs_info->super_kobj, | ||
| 233 | "%s", root->name); | ||
| 234 | if (error) | ||
| 235 | goto fail; | ||
| 236 | |||
| 237 | return 0; | ||
| 238 | |||
| 239 | fail: | ||
| 240 | printk(KERN_ERR "btrfs: sysfs creation for root failed\n"); | ||
| 241 | return error; | ||
| 242 | } | ||
| 243 | |||
| 244 | void btrfs_sysfs_del_root(struct btrfs_root *root) | ||
| 245 | { | ||
| 246 | kobject_put(&root->root_kobj); | ||
| 247 | wait_for_completion(&root->kobj_unregister); | ||
| 248 | } | ||
| 249 | |||
| 250 | void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) | ||
| 251 | { | ||
| 252 | kobject_put(&fs->super_kobj); | ||
| 253 | wait_for_completion(&fs->kobj_unregister); | ||
| 254 | } | ||
| 255 | |||
| 256 | int btrfs_init_sysfs(void) | ||
| 257 | { | ||
| 258 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); | ||
| 259 | if (!btrfs_kset) | ||
| 260 | return -ENOMEM; | ||
| 261 | return 0; | ||
| 262 | } | ||
| 263 | |||
| 264 | void btrfs_exit_sysfs(void) | ||
| 265 | { | ||
| 266 | kset_unregister(btrfs_kset); | ||
| 267 | } | ||
| 268 | |||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c new file mode 100644 index 00000000000..5ecc24d634a --- /dev/null +++ b/fs/btrfs/transaction.c | |||
| @@ -0,0 +1,1023 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/fs.h> | ||
| 20 | #include <linux/sched.h> | ||
| 21 | #include <linux/writeback.h> | ||
| 22 | #include <linux/pagemap.h> | ||
| 23 | #include "ctree.h" | ||
| 24 | #include "disk-io.h" | ||
| 25 | #include "transaction.h" | ||
| 26 | #include "locking.h" | ||
| 27 | #include "ref-cache.h" | ||
| 28 | #include "tree-log.h" | ||
| 29 | |||
| 30 | static int total_trans = 0; | ||
| 31 | extern struct kmem_cache *btrfs_trans_handle_cachep; | ||
| 32 | extern struct kmem_cache *btrfs_transaction_cachep; | ||
| 33 | |||
| 34 | #define BTRFS_ROOT_TRANS_TAG 0 | ||
| 35 | |||
| 36 | static noinline void put_transaction(struct btrfs_transaction *transaction) | ||
| 37 | { | ||
| 38 | WARN_ON(transaction->use_count == 0); | ||
| 39 | transaction->use_count--; | ||
| 40 | if (transaction->use_count == 0) { | ||
| 41 | WARN_ON(total_trans == 0); | ||
| 42 | total_trans--; | ||
| 43 | list_del_init(&transaction->list); | ||
| 44 | memset(transaction, 0, sizeof(*transaction)); | ||
| 45 | kmem_cache_free(btrfs_transaction_cachep, transaction); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | /* | ||
| 50 | * either allocate a new transaction or hop into the existing one | ||
| 51 | */ | ||
| 52 | static noinline int join_transaction(struct btrfs_root *root) | ||
| 53 | { | ||
| 54 | struct btrfs_transaction *cur_trans; | ||
| 55 | cur_trans = root->fs_info->running_transaction; | ||
| 56 | if (!cur_trans) { | ||
| 57 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | ||
| 58 | GFP_NOFS); | ||
| 59 | total_trans++; | ||
| 60 | BUG_ON(!cur_trans); | ||
| 61 | root->fs_info->generation++; | ||
| 62 | root->fs_info->last_alloc = 0; | ||
| 63 | root->fs_info->last_data_alloc = 0; | ||
| 64 | cur_trans->num_writers = 1; | ||
| 65 | cur_trans->num_joined = 0; | ||
| 66 | cur_trans->transid = root->fs_info->generation; | ||
| 67 | init_waitqueue_head(&cur_trans->writer_wait); | ||
| 68 | init_waitqueue_head(&cur_trans->commit_wait); | ||
| 69 | cur_trans->in_commit = 0; | ||
| 70 | cur_trans->blocked = 0; | ||
| 71 | cur_trans->use_count = 1; | ||
| 72 | cur_trans->commit_done = 0; | ||
| 73 | cur_trans->start_time = get_seconds(); | ||
| 74 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
| 75 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
| 76 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
| 77 | root->fs_info->btree_inode->i_mapping, | ||
| 78 | GFP_NOFS); | ||
| 79 | spin_lock(&root->fs_info->new_trans_lock); | ||
| 80 | root->fs_info->running_transaction = cur_trans; | ||
| 81 | spin_unlock(&root->fs_info->new_trans_lock); | ||
| 82 | } else { | ||
| 83 | cur_trans->num_writers++; | ||
| 84 | cur_trans->num_joined++; | ||
| 85 | } | ||
| 86 | |||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | /* | ||
| 91 | * this does all the record keeping required to make sure that a | ||
| 92 | * reference counted root is properly recorded in a given transaction. | ||
| 93 | * This is required to make sure the old root from before we joined the transaction | ||
| 94 | * is deleted when the transaction commits | ||
| 95 | */ | ||
| 96 | noinline int btrfs_record_root_in_trans(struct btrfs_root *root) | ||
| 97 | { | ||
| 98 | struct btrfs_dirty_root *dirty; | ||
| 99 | u64 running_trans_id = root->fs_info->running_transaction->transid; | ||
| 100 | if (root->ref_cows && root->last_trans < running_trans_id) { | ||
| 101 | WARN_ON(root == root->fs_info->extent_root); | ||
| 102 | if (root->root_item.refs != 0) { | ||
| 103 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | ||
| 104 | (unsigned long)root->root_key.objectid, | ||
| 105 | BTRFS_ROOT_TRANS_TAG); | ||
| 106 | |||
| 107 | dirty = kmalloc(sizeof(*dirty), GFP_NOFS); | ||
| 108 | BUG_ON(!dirty); | ||
| 109 | dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); | ||
| 110 | BUG_ON(!dirty->root); | ||
| 111 | dirty->latest_root = root; | ||
| 112 | INIT_LIST_HEAD(&dirty->list); | ||
| 113 | |||
| 114 | root->commit_root = btrfs_root_node(root); | ||
| 115 | |||
| 116 | memcpy(dirty->root, root, sizeof(*root)); | ||
| 117 | spin_lock_init(&dirty->root->node_lock); | ||
| 118 | spin_lock_init(&dirty->root->list_lock); | ||
| 119 | mutex_init(&dirty->root->objectid_mutex); | ||
| 120 | mutex_init(&dirty->root->log_mutex); | ||
| 121 | INIT_LIST_HEAD(&dirty->root->dead_list); | ||
| 122 | dirty->root->node = root->commit_root; | ||
| 123 | dirty->root->commit_root = NULL; | ||
| 124 | |||
| 125 | spin_lock(&root->list_lock); | ||
| 126 | list_add(&dirty->root->dead_list, &root->dead_list); | ||
| 127 | spin_unlock(&root->list_lock); | ||
| 128 | |||
| 129 | root->dirty_root = dirty; | ||
| 130 | } else { | ||
| 131 | WARN_ON(1); | ||
| 132 | } | ||
| 133 | root->last_trans = running_trans_id; | ||
| 134 | } | ||
| 135 | return 0; | ||
| 136 | } | ||
| 137 | |||
| 138 | /* wait for commit against the current transaction to become unblocked | ||
| 139 | * when this is done, it is safe to start a new transaction, but the current | ||
| 140 | * transaction might not be fully on disk. | ||
| 141 | */ | ||
| 142 | static void wait_current_trans(struct btrfs_root *root) | ||
| 143 | { | ||
| 144 | struct btrfs_transaction *cur_trans; | ||
| 145 | |||
| 146 | cur_trans = root->fs_info->running_transaction; | ||
| 147 | if (cur_trans && cur_trans->blocked) { | ||
| 148 | DEFINE_WAIT(wait); | ||
| 149 | cur_trans->use_count++; | ||
| 150 | while(1) { | ||
| 151 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | ||
| 152 | TASK_UNINTERRUPTIBLE); | ||
| 153 | if (cur_trans->blocked) { | ||
| 154 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 155 | schedule(); | ||
| 156 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 157 | finish_wait(&root->fs_info->transaction_wait, | ||
| 158 | &wait); | ||
| 159 | } else { | ||
| 160 | finish_wait(&root->fs_info->transaction_wait, | ||
| 161 | &wait); | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | } | ||
| 165 | put_transaction(cur_trans); | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 169 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | ||
| 170 | int num_blocks, int wait) | ||
| 171 | { | ||
| 172 | struct btrfs_trans_handle *h = | ||
| 173 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
| 174 | int ret; | ||
| 175 | |||
| 176 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 177 | if (!root->fs_info->log_root_recovering && | ||
| 178 | ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) | ||
| 179 | wait_current_trans(root); | ||
| 180 | ret = join_transaction(root); | ||
| 181 | BUG_ON(ret); | ||
| 182 | |||
| 183 | btrfs_record_root_in_trans(root); | ||
| 184 | h->transid = root->fs_info->running_transaction->transid; | ||
| 185 | h->transaction = root->fs_info->running_transaction; | ||
| 186 | h->blocks_reserved = num_blocks; | ||
| 187 | h->blocks_used = 0; | ||
| 188 | h->block_group = NULL; | ||
| 189 | h->alloc_exclude_nr = 0; | ||
| 190 | h->alloc_exclude_start = 0; | ||
| 191 | root->fs_info->running_transaction->use_count++; | ||
| 192 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 193 | return h; | ||
| 194 | } | ||
| 195 | |||
| 196 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | ||
| 197 | int num_blocks) | ||
| 198 | { | ||
| 199 | return start_transaction(root, num_blocks, 1); | ||
| 200 | } | ||
| 201 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | ||
| 202 | int num_blocks) | ||
| 203 | { | ||
| 204 | return start_transaction(root, num_blocks, 0); | ||
| 205 | } | ||
| 206 | |||
| 207 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | ||
| 208 | int num_blocks) | ||
| 209 | { | ||
| 210 | return start_transaction(r, num_blocks, 2); | ||
| 211 | } | ||
| 212 | |||
| 213 | /* wait for a transaction commit to be fully complete */ | ||
| 214 | static noinline int wait_for_commit(struct btrfs_root *root, | ||
| 215 | struct btrfs_transaction *commit) | ||
| 216 | { | ||
| 217 | DEFINE_WAIT(wait); | ||
| 218 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 219 | while(!commit->commit_done) { | ||
| 220 | prepare_to_wait(&commit->commit_wait, &wait, | ||
| 221 | TASK_UNINTERRUPTIBLE); | ||
| 222 | if (commit->commit_done) | ||
| 223 | break; | ||
| 224 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 225 | schedule(); | ||
| 226 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 227 | } | ||
| 228 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 229 | finish_wait(&commit->commit_wait, &wait); | ||
| 230 | return 0; | ||
| 231 | } | ||
| 232 | |||
| 233 | /* | ||
| 234 | * rate limit against the drop_snapshot code. This helps to slow down new operations | ||
| 235 | * if the drop_snapshot code isn't able to keep up. | ||
| 236 | */ | ||
| 237 | static void throttle_on_drops(struct btrfs_root *root) | ||
| 238 | { | ||
| 239 | struct btrfs_fs_info *info = root->fs_info; | ||
| 240 | int harder_count = 0; | ||
| 241 | |||
| 242 | harder: | ||
| 243 | if (atomic_read(&info->throttles)) { | ||
| 244 | DEFINE_WAIT(wait); | ||
| 245 | int thr; | ||
| 246 | thr = atomic_read(&info->throttle_gen); | ||
| 247 | |||
| 248 | do { | ||
| 249 | prepare_to_wait(&info->transaction_throttle, | ||
| 250 | &wait, TASK_UNINTERRUPTIBLE); | ||
| 251 | if (!atomic_read(&info->throttles)) { | ||
| 252 | finish_wait(&info->transaction_throttle, &wait); | ||
| 253 | break; | ||
| 254 | } | ||
| 255 | schedule(); | ||
| 256 | finish_wait(&info->transaction_throttle, &wait); | ||
| 257 | } while (thr == atomic_read(&info->throttle_gen)); | ||
| 258 | harder_count++; | ||
| 259 | |||
| 260 | if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && | ||
| 261 | harder_count < 2) | ||
| 262 | goto harder; | ||
| 263 | |||
| 264 | if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && | ||
| 265 | harder_count < 10) | ||
| 266 | goto harder; | ||
| 267 | |||
| 268 | if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && | ||
| 269 | harder_count < 20) | ||
| 270 | goto harder; | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | void btrfs_throttle(struct btrfs_root *root) | ||
| 275 | { | ||
| 276 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 277 | if (!root->fs_info->open_ioctl_trans) | ||
| 278 | wait_current_trans(root); | ||
| 279 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 280 | |||
| 281 | throttle_on_drops(root); | ||
| 282 | } | ||
| 283 | |||
| 284 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | ||
| 285 | struct btrfs_root *root, int throttle) | ||
| 286 | { | ||
| 287 | struct btrfs_transaction *cur_trans; | ||
| 288 | struct btrfs_fs_info *info = root->fs_info; | ||
| 289 | |||
| 290 | mutex_lock(&info->trans_mutex); | ||
| 291 | cur_trans = info->running_transaction; | ||
| 292 | WARN_ON(cur_trans != trans->transaction); | ||
| 293 | WARN_ON(cur_trans->num_writers < 1); | ||
| 294 | cur_trans->num_writers--; | ||
| 295 | |||
| 296 | if (waitqueue_active(&cur_trans->writer_wait)) | ||
| 297 | wake_up(&cur_trans->writer_wait); | ||
| 298 | put_transaction(cur_trans); | ||
| 299 | mutex_unlock(&info->trans_mutex); | ||
| 300 | memset(trans, 0, sizeof(*trans)); | ||
| 301 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | ||
| 302 | |||
| 303 | if (throttle) | ||
| 304 | throttle_on_drops(root); | ||
| 305 | |||
| 306 | return 0; | ||
| 307 | } | ||
| 308 | |||
| 309 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | ||
| 310 | struct btrfs_root *root) | ||
| 311 | { | ||
| 312 | return __btrfs_end_transaction(trans, root, 0); | ||
| 313 | } | ||
| 314 | |||
| 315 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | ||
| 316 | struct btrfs_root *root) | ||
| 317 | { | ||
| 318 | return __btrfs_end_transaction(trans, root, 1); | ||
| 319 | } | ||
| 320 | |||
| 321 | /* | ||
| 322 | * when btree blocks are allocated, they have some corresponding bits set for | ||
| 323 | * them in one of two extent_io trees. This is used to make sure all of | ||
| 324 | * those extents are on disk for transaction or log commit | ||
| 325 | */ | ||
| 326 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | ||
| 327 | struct extent_io_tree *dirty_pages) | ||
| 328 | { | ||
| 329 | int ret; | ||
| 330 | int err = 0; | ||
| 331 | int werr = 0; | ||
| 332 | struct page *page; | ||
| 333 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 334 | u64 start = 0; | ||
| 335 | u64 end; | ||
| 336 | unsigned long index; | ||
| 337 | |||
| 338 | while(1) { | ||
| 339 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
| 340 | EXTENT_DIRTY); | ||
| 341 | if (ret) | ||
| 342 | break; | ||
| 343 | while(start <= end) { | ||
| 344 | cond_resched(); | ||
| 345 | |||
| 346 | index = start >> PAGE_CACHE_SHIFT; | ||
| 347 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
| 348 | page = find_get_page(btree_inode->i_mapping, index); | ||
| 349 | if (!page) | ||
| 350 | continue; | ||
| 351 | |||
| 352 | btree_lock_page_hook(page); | ||
| 353 | if (!page->mapping) { | ||
| 354 | unlock_page(page); | ||
| 355 | page_cache_release(page); | ||
| 356 | continue; | ||
| 357 | } | ||
| 358 | |||
| 359 | if (PageWriteback(page)) { | ||
| 360 | if (PageDirty(page)) | ||
| 361 | wait_on_page_writeback(page); | ||
| 362 | else { | ||
| 363 | unlock_page(page); | ||
| 364 | page_cache_release(page); | ||
| 365 | continue; | ||
| 366 | } | ||
| 367 | } | ||
| 368 | err = write_one_page(page, 0); | ||
| 369 | if (err) | ||
| 370 | werr = err; | ||
| 371 | page_cache_release(page); | ||
| 372 | } | ||
| 373 | } | ||
| 374 | while(1) { | ||
| 375 | ret = find_first_extent_bit(dirty_pages, 0, &start, &end, | ||
| 376 | EXTENT_DIRTY); | ||
| 377 | if (ret) | ||
| 378 | break; | ||
| 379 | |||
| 380 | clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); | ||
| 381 | while(start <= end) { | ||
| 382 | index = start >> PAGE_CACHE_SHIFT; | ||
| 383 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
| 384 | page = find_get_page(btree_inode->i_mapping, index); | ||
| 385 | if (!page) | ||
| 386 | continue; | ||
| 387 | if (PageDirty(page)) { | ||
| 388 | btree_lock_page_hook(page); | ||
| 389 | wait_on_page_writeback(page); | ||
| 390 | err = write_one_page(page, 0); | ||
| 391 | if (err) | ||
| 392 | werr = err; | ||
| 393 | } | ||
| 394 | wait_on_page_writeback(page); | ||
| 395 | page_cache_release(page); | ||
| 396 | cond_resched(); | ||
| 397 | } | ||
| 398 | } | ||
| 399 | if (err) | ||
| 400 | werr = err; | ||
| 401 | return werr; | ||
| 402 | } | ||
| 403 | |||
| 404 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | ||
| 405 | struct btrfs_root *root) | ||
| 406 | { | ||
| 407 | if (!trans || !trans->transaction) { | ||
| 408 | struct inode *btree_inode; | ||
| 409 | btree_inode = root->fs_info->btree_inode; | ||
| 410 | return filemap_write_and_wait(btree_inode->i_mapping); | ||
| 411 | } | ||
| 412 | return btrfs_write_and_wait_marked_extents(root, | ||
| 413 | &trans->transaction->dirty_pages); | ||
| 414 | } | ||
| 415 | |||
| 416 | /* | ||
| 417 | * this is used to update the root pointer in the tree of tree roots. | ||
| 418 | * | ||
| 419 | * But, in the case of the extent allocation tree, updating the root | ||
| 420 | * pointer may allocate blocks which may change the root of the extent | ||
| 421 | * allocation tree. | ||
| 422 | * | ||
| 423 | * So, this loops and repeats and makes sure the cowonly root didn't | ||
| 424 | * change while the root pointer was being updated in the metadata. | ||
| 425 | */ | ||
| 426 | static int update_cowonly_root(struct btrfs_trans_handle *trans, | ||
| 427 | struct btrfs_root *root) | ||
| 428 | { | ||
| 429 | int ret; | ||
| 430 | u64 old_root_bytenr; | ||
| 431 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
| 432 | |||
| 433 | btrfs_write_dirty_block_groups(trans, root); | ||
| 434 | while(1) { | ||
| 435 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | ||
| 436 | if (old_root_bytenr == root->node->start) | ||
| 437 | break; | ||
| 438 | btrfs_set_root_bytenr(&root->root_item, | ||
| 439 | root->node->start); | ||
| 440 | btrfs_set_root_level(&root->root_item, | ||
| 441 | btrfs_header_level(root->node)); | ||
| 442 | ret = btrfs_update_root(trans, tree_root, | ||
| 443 | &root->root_key, | ||
| 444 | &root->root_item); | ||
| 445 | BUG_ON(ret); | ||
| 446 | btrfs_write_dirty_block_groups(trans, root); | ||
| 447 | } | ||
| 448 | return 0; | ||
| 449 | } | ||
| 450 | |||
| 451 | /* | ||
| 452 | * update all the cowonly tree roots on disk | ||
| 453 | */ | ||
| 454 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | ||
| 455 | struct btrfs_root *root) | ||
| 456 | { | ||
| 457 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 458 | struct list_head *next; | ||
| 459 | |||
| 460 | while(!list_empty(&fs_info->dirty_cowonly_roots)) { | ||
| 461 | next = fs_info->dirty_cowonly_roots.next; | ||
| 462 | list_del_init(next); | ||
| 463 | root = list_entry(next, struct btrfs_root, dirty_list); | ||
| 464 | update_cowonly_root(trans, root); | ||
| 465 | } | ||
| 466 | return 0; | ||
| 467 | } | ||
| 468 | |||
| 469 | /* | ||
| 470 | * dead roots are old snapshots that need to be deleted. This allocates | ||
| 471 | * a dirty root struct and adds it into the list of dead roots that need to | ||
| 472 | * be deleted | ||
| 473 | */ | ||
| 474 | int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) | ||
| 475 | { | ||
| 476 | struct btrfs_dirty_root *dirty; | ||
| 477 | |||
| 478 | dirty = kmalloc(sizeof(*dirty), GFP_NOFS); | ||
| 479 | if (!dirty) | ||
| 480 | return -ENOMEM; | ||
| 481 | dirty->root = root; | ||
| 482 | dirty->latest_root = latest; | ||
| 483 | |||
| 484 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 485 | list_add(&dirty->list, &latest->fs_info->dead_roots); | ||
| 486 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 487 | return 0; | ||
| 488 | } | ||
| 489 | |||
| 490 | /* | ||
| 491 | * at transaction commit time we need to schedule the old roots for | ||
| 492 | * deletion via btrfs_drop_snapshot. This runs through all the | ||
| 493 | * reference counted roots that were modified in the current | ||
| 494 | * transaction and puts them into the drop list | ||
| 495 | */ | ||
| 496 | static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | ||
| 497 | struct radix_tree_root *radix, | ||
| 498 | struct list_head *list) | ||
| 499 | { | ||
| 500 | struct btrfs_dirty_root *dirty; | ||
| 501 | struct btrfs_root *gang[8]; | ||
| 502 | struct btrfs_root *root; | ||
| 503 | int i; | ||
| 504 | int ret; | ||
| 505 | int err = 0; | ||
| 506 | u32 refs; | ||
| 507 | |||
| 508 | while(1) { | ||
| 509 | ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, | ||
| 510 | ARRAY_SIZE(gang), | ||
| 511 | BTRFS_ROOT_TRANS_TAG); | ||
| 512 | if (ret == 0) | ||
| 513 | break; | ||
| 514 | for (i = 0; i < ret; i++) { | ||
| 515 | root = gang[i]; | ||
| 516 | radix_tree_tag_clear(radix, | ||
| 517 | (unsigned long)root->root_key.objectid, | ||
| 518 | BTRFS_ROOT_TRANS_TAG); | ||
| 519 | |||
| 520 | BUG_ON(!root->ref_tree); | ||
| 521 | dirty = root->dirty_root; | ||
| 522 | |||
| 523 | btrfs_free_log(trans, root); | ||
| 524 | btrfs_free_reloc_root(root); | ||
| 525 | |||
| 526 | if (root->commit_root == root->node) { | ||
| 527 | WARN_ON(root->node->start != | ||
| 528 | btrfs_root_bytenr(&root->root_item)); | ||
| 529 | |||
| 530 | free_extent_buffer(root->commit_root); | ||
| 531 | root->commit_root = NULL; | ||
| 532 | root->dirty_root = NULL; | ||
| 533 | |||
| 534 | spin_lock(&root->list_lock); | ||
| 535 | list_del_init(&dirty->root->dead_list); | ||
| 536 | spin_unlock(&root->list_lock); | ||
| 537 | |||
| 538 | kfree(dirty->root); | ||
| 539 | kfree(dirty); | ||
| 540 | |||
| 541 | /* make sure to update the root on disk | ||
| 542 | * so we get any updates to the block used | ||
| 543 | * counts | ||
| 544 | */ | ||
| 545 | err = btrfs_update_root(trans, | ||
| 546 | root->fs_info->tree_root, | ||
| 547 | &root->root_key, | ||
| 548 | &root->root_item); | ||
| 549 | continue; | ||
| 550 | } | ||
| 551 | |||
| 552 | memset(&root->root_item.drop_progress, 0, | ||
| 553 | sizeof(struct btrfs_disk_key)); | ||
| 554 | root->root_item.drop_level = 0; | ||
| 555 | root->commit_root = NULL; | ||
| 556 | root->dirty_root = NULL; | ||
| 557 | root->root_key.offset = root->fs_info->generation; | ||
| 558 | btrfs_set_root_bytenr(&root->root_item, | ||
| 559 | root->node->start); | ||
| 560 | btrfs_set_root_level(&root->root_item, | ||
| 561 | btrfs_header_level(root->node)); | ||
| 562 | err = btrfs_insert_root(trans, root->fs_info->tree_root, | ||
| 563 | &root->root_key, | ||
| 564 | &root->root_item); | ||
| 565 | if (err) | ||
| 566 | break; | ||
| 567 | |||
| 568 | refs = btrfs_root_refs(&dirty->root->root_item); | ||
| 569 | btrfs_set_root_refs(&dirty->root->root_item, refs - 1); | ||
| 570 | err = btrfs_update_root(trans, root->fs_info->tree_root, | ||
| 571 | &dirty->root->root_key, | ||
| 572 | &dirty->root->root_item); | ||
| 573 | |||
| 574 | BUG_ON(err); | ||
| 575 | if (refs == 1) { | ||
| 576 | list_add(&dirty->list, list); | ||
| 577 | } else { | ||
| 578 | WARN_ON(1); | ||
| 579 | free_extent_buffer(dirty->root->node); | ||
| 580 | kfree(dirty->root); | ||
| 581 | kfree(dirty); | ||
| 582 | } | ||
| 583 | } | ||
| 584 | } | ||
| 585 | return err; | ||
| 586 | } | ||
| 587 | |||
| 588 | /* | ||
| 589 | * defrag a given btree. If cacheonly == 1, this won't read from the disk, | ||
| 590 | * otherwise every leaf in the btree is read and defragged. | ||
| 591 | */ | ||
| 592 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | ||
| 593 | { | ||
| 594 | struct btrfs_fs_info *info = root->fs_info; | ||
| 595 | int ret; | ||
| 596 | struct btrfs_trans_handle *trans; | ||
| 597 | unsigned long nr; | ||
| 598 | |||
| 599 | smp_mb(); | ||
| 600 | if (root->defrag_running) | ||
| 601 | return 0; | ||
| 602 | trans = btrfs_start_transaction(root, 1); | ||
| 603 | while (1) { | ||
| 604 | root->defrag_running = 1; | ||
| 605 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | ||
| 606 | nr = trans->blocks_used; | ||
| 607 | btrfs_end_transaction(trans, root); | ||
| 608 | btrfs_btree_balance_dirty(info->tree_root, nr); | ||
| 609 | cond_resched(); | ||
| 610 | |||
| 611 | trans = btrfs_start_transaction(root, 1); | ||
| 612 | if (root->fs_info->closing || ret != -EAGAIN) | ||
| 613 | break; | ||
| 614 | } | ||
| 615 | root->defrag_running = 0; | ||
| 616 | smp_mb(); | ||
| 617 | btrfs_end_transaction(trans, root); | ||
| 618 | return 0; | ||
| 619 | } | ||
| 620 | |||
| 621 | /* | ||
| 622 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | ||
| 623 | * all of them | ||
| 624 | */ | ||
| 625 | static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | ||
| 626 | struct list_head *list) | ||
| 627 | { | ||
| 628 | struct btrfs_dirty_root *dirty; | ||
| 629 | struct btrfs_trans_handle *trans; | ||
| 630 | unsigned long nr; | ||
| 631 | u64 num_bytes; | ||
| 632 | u64 bytes_used; | ||
| 633 | u64 max_useless; | ||
| 634 | int ret = 0; | ||
| 635 | int err; | ||
| 636 | |||
| 637 | while(!list_empty(list)) { | ||
| 638 | struct btrfs_root *root; | ||
| 639 | |||
| 640 | dirty = list_entry(list->prev, struct btrfs_dirty_root, list); | ||
| 641 | list_del_init(&dirty->list); | ||
| 642 | |||
| 643 | num_bytes = btrfs_root_used(&dirty->root->root_item); | ||
| 644 | root = dirty->latest_root; | ||
| 645 | atomic_inc(&root->fs_info->throttles); | ||
| 646 | |||
| 647 | while(1) { | ||
| 648 | trans = btrfs_start_transaction(tree_root, 1); | ||
| 649 | mutex_lock(&root->fs_info->drop_mutex); | ||
| 650 | ret = btrfs_drop_snapshot(trans, dirty->root); | ||
| 651 | if (ret != -EAGAIN) { | ||
| 652 | break; | ||
| 653 | } | ||
| 654 | mutex_unlock(&root->fs_info->drop_mutex); | ||
| 655 | |||
| 656 | err = btrfs_update_root(trans, | ||
| 657 | tree_root, | ||
| 658 | &dirty->root->root_key, | ||
| 659 | &dirty->root->root_item); | ||
| 660 | if (err) | ||
| 661 | ret = err; | ||
| 662 | nr = trans->blocks_used; | ||
| 663 | ret = btrfs_end_transaction(trans, tree_root); | ||
| 664 | BUG_ON(ret); | ||
| 665 | |||
| 666 | btrfs_btree_balance_dirty(tree_root, nr); | ||
| 667 | cond_resched(); | ||
| 668 | } | ||
| 669 | BUG_ON(ret); | ||
| 670 | atomic_dec(&root->fs_info->throttles); | ||
| 671 | wake_up(&root->fs_info->transaction_throttle); | ||
| 672 | |||
| 673 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 674 | num_bytes -= btrfs_root_used(&dirty->root->root_item); | ||
| 675 | bytes_used = btrfs_root_used(&root->root_item); | ||
| 676 | if (num_bytes) { | ||
| 677 | btrfs_record_root_in_trans(root); | ||
| 678 | btrfs_set_root_used(&root->root_item, | ||
| 679 | bytes_used - num_bytes); | ||
| 680 | } | ||
| 681 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 682 | |||
| 683 | ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); | ||
| 684 | if (ret) { | ||
| 685 | BUG(); | ||
| 686 | break; | ||
| 687 | } | ||
| 688 | mutex_unlock(&root->fs_info->drop_mutex); | ||
| 689 | |||
| 690 | spin_lock(&root->list_lock); | ||
| 691 | list_del_init(&dirty->root->dead_list); | ||
| 692 | if (!list_empty(&root->dead_list)) { | ||
| 693 | struct btrfs_root *oldest; | ||
| 694 | oldest = list_entry(root->dead_list.prev, | ||
| 695 | struct btrfs_root, dead_list); | ||
| 696 | max_useless = oldest->root_key.offset - 1; | ||
| 697 | } else { | ||
| 698 | max_useless = root->root_key.offset - 1; | ||
| 699 | } | ||
| 700 | spin_unlock(&root->list_lock); | ||
| 701 | |||
| 702 | nr = trans->blocks_used; | ||
| 703 | ret = btrfs_end_transaction(trans, tree_root); | ||
| 704 | BUG_ON(ret); | ||
| 705 | |||
| 706 | ret = btrfs_remove_leaf_refs(root, max_useless, 0); | ||
| 707 | BUG_ON(ret); | ||
| 708 | |||
| 709 | free_extent_buffer(dirty->root->node); | ||
| 710 | kfree(dirty->root); | ||
| 711 | kfree(dirty); | ||
| 712 | |||
| 713 | btrfs_btree_balance_dirty(tree_root, nr); | ||
| 714 | cond_resched(); | ||
| 715 | } | ||
| 716 | return ret; | ||
| 717 | } | ||
| 718 | |||
| 719 | /* | ||
| 720 | * new snapshots need to be created at a very specific time in the | ||
| 721 | * transaction commit. This does the actual creation | ||
| 722 | */ | ||
| 723 | static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | ||
| 724 | struct btrfs_fs_info *fs_info, | ||
| 725 | struct btrfs_pending_snapshot *pending) | ||
| 726 | { | ||
| 727 | struct btrfs_key key; | ||
| 728 | struct btrfs_root_item *new_root_item; | ||
| 729 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
| 730 | struct btrfs_root *root = pending->root; | ||
| 731 | struct extent_buffer *tmp; | ||
| 732 | struct extent_buffer *old; | ||
| 733 | int ret; | ||
| 734 | int namelen; | ||
| 735 | u64 objectid; | ||
| 736 | |||
| 737 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | ||
| 738 | if (!new_root_item) { | ||
| 739 | ret = -ENOMEM; | ||
| 740 | goto fail; | ||
| 741 | } | ||
| 742 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | ||
| 743 | if (ret) | ||
| 744 | goto fail; | ||
| 745 | |||
| 746 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | ||
| 747 | |||
| 748 | key.objectid = objectid; | ||
| 749 | key.offset = trans->transid; | ||
| 750 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 751 | |||
| 752 | old = btrfs_lock_root_node(root); | ||
| 753 | btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); | ||
| 754 | |||
| 755 | btrfs_copy_root(trans, root, old, &tmp, objectid); | ||
| 756 | btrfs_tree_unlock(old); | ||
| 757 | free_extent_buffer(old); | ||
| 758 | |||
| 759 | btrfs_set_root_bytenr(new_root_item, tmp->start); | ||
| 760 | btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); | ||
| 761 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | ||
| 762 | new_root_item); | ||
| 763 | btrfs_tree_unlock(tmp); | ||
| 764 | free_extent_buffer(tmp); | ||
| 765 | if (ret) | ||
| 766 | goto fail; | ||
| 767 | |||
| 768 | /* | ||
| 769 | * insert the directory item | ||
| 770 | */ | ||
| 771 | key.offset = (u64)-1; | ||
| 772 | namelen = strlen(pending->name); | ||
| 773 | ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, | ||
| 774 | pending->name, namelen, | ||
| 775 | root->fs_info->sb->s_root->d_inode->i_ino, | ||
| 776 | &key, BTRFS_FT_DIR, 0); | ||
| 777 | |||
| 778 | if (ret) | ||
| 779 | goto fail; | ||
| 780 | |||
| 781 | ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, | ||
| 782 | pending->name, strlen(pending->name), objectid, | ||
| 783 | root->fs_info->sb->s_root->d_inode->i_ino, 0); | ||
| 784 | |||
| 785 | /* Invalidate existing dcache entry for new snapshot. */ | ||
| 786 | btrfs_invalidate_dcache_root(root, pending->name, namelen); | ||
| 787 | |||
| 788 | fail: | ||
| 789 | kfree(new_root_item); | ||
| 790 | return ret; | ||
| 791 | } | ||
| 792 | |||
| 793 | /* | ||
| 794 | * create all the snapshots we've scheduled for creation | ||
| 795 | */ | ||
| 796 | static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, | ||
| 797 | struct btrfs_fs_info *fs_info) | ||
| 798 | { | ||
| 799 | struct btrfs_pending_snapshot *pending; | ||
| 800 | struct list_head *head = &trans->transaction->pending_snapshots; | ||
| 801 | int ret; | ||
| 802 | |||
| 803 | while(!list_empty(head)) { | ||
| 804 | pending = list_entry(head->next, | ||
| 805 | struct btrfs_pending_snapshot, list); | ||
| 806 | ret = create_pending_snapshot(trans, fs_info, pending); | ||
| 807 | BUG_ON(ret); | ||
| 808 | list_del(&pending->list); | ||
| 809 | kfree(pending->name); | ||
| 810 | kfree(pending); | ||
| 811 | } | ||
| 812 | return 0; | ||
| 813 | } | ||
| 814 | |||
| 815 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | ||
| 816 | struct btrfs_root *root) | ||
| 817 | { | ||
| 818 | unsigned long joined = 0; | ||
| 819 | unsigned long timeout = 1; | ||
| 820 | struct btrfs_transaction *cur_trans; | ||
| 821 | struct btrfs_transaction *prev_trans = NULL; | ||
| 822 | struct btrfs_root *chunk_root = root->fs_info->chunk_root; | ||
| 823 | struct list_head dirty_fs_roots; | ||
| 824 | struct extent_io_tree *pinned_copy; | ||
| 825 | DEFINE_WAIT(wait); | ||
| 826 | int ret; | ||
| 827 | |||
| 828 | INIT_LIST_HEAD(&dirty_fs_roots); | ||
| 829 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 830 | if (trans->transaction->in_commit) { | ||
| 831 | cur_trans = trans->transaction; | ||
| 832 | trans->transaction->use_count++; | ||
| 833 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 834 | btrfs_end_transaction(trans, root); | ||
| 835 | |||
| 836 | ret = wait_for_commit(root, cur_trans); | ||
| 837 | BUG_ON(ret); | ||
| 838 | |||
| 839 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 840 | put_transaction(cur_trans); | ||
| 841 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 842 | |||
| 843 | return 0; | ||
| 844 | } | ||
| 845 | |||
| 846 | pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); | ||
| 847 | if (!pinned_copy) | ||
| 848 | return -ENOMEM; | ||
| 849 | |||
| 850 | extent_io_tree_init(pinned_copy, | ||
| 851 | root->fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 852 | |||
| 853 | trans->transaction->in_commit = 1; | ||
| 854 | trans->transaction->blocked = 1; | ||
| 855 | cur_trans = trans->transaction; | ||
| 856 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | ||
| 857 | prev_trans = list_entry(cur_trans->list.prev, | ||
| 858 | struct btrfs_transaction, list); | ||
| 859 | if (!prev_trans->commit_done) { | ||
| 860 | prev_trans->use_count++; | ||
| 861 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 862 | |||
| 863 | wait_for_commit(root, prev_trans); | ||
| 864 | |||
| 865 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 866 | put_transaction(prev_trans); | ||
| 867 | } | ||
| 868 | } | ||
| 869 | |||
| 870 | do { | ||
| 871 | int snap_pending = 0; | ||
| 872 | joined = cur_trans->num_joined; | ||
| 873 | if (!list_empty(&trans->transaction->pending_snapshots)) | ||
| 874 | snap_pending = 1; | ||
| 875 | |||
| 876 | WARN_ON(cur_trans != trans->transaction); | ||
| 877 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
| 878 | TASK_UNINTERRUPTIBLE); | ||
| 879 | |||
| 880 | if (cur_trans->num_writers > 1) | ||
| 881 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
| 882 | else | ||
| 883 | timeout = 1; | ||
| 884 | |||
| 885 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 886 | |||
| 887 | if (snap_pending) { | ||
| 888 | ret = btrfs_wait_ordered_extents(root, 1); | ||
| 889 | BUG_ON(ret); | ||
| 890 | } | ||
| 891 | |||
| 892 | schedule_timeout(timeout); | ||
| 893 | |||
| 894 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 895 | finish_wait(&cur_trans->writer_wait, &wait); | ||
| 896 | } while (cur_trans->num_writers > 1 || | ||
| 897 | (cur_trans->num_joined != joined)); | ||
| 898 | |||
| 899 | ret = create_pending_snapshots(trans, root->fs_info); | ||
| 900 | BUG_ON(ret); | ||
| 901 | |||
| 902 | WARN_ON(cur_trans != trans->transaction); | ||
| 903 | |||
| 904 | /* btrfs_commit_tree_roots is responsible for getting the | ||
| 905 | * various roots consistent with each other. Every pointer | ||
| 906 | * in the tree of tree roots has to point to the most up to date | ||
| 907 | * root for every subvolume and other tree. So, we have to keep | ||
| 908 | * the tree logging code from jumping in and changing any | ||
| 909 | * of the trees. | ||
| 910 | * | ||
| 911 | * At this point in the commit, there can't be any tree-log | ||
| 912 | * writers, but a little lower down we drop the trans mutex | ||
| 913 | * and let new people in. By holding the tree_log_mutex | ||
| 914 | * from now until after the super is written, we avoid races | ||
| 915 | * with the tree-log code. | ||
| 916 | */ | ||
| 917 | mutex_lock(&root->fs_info->tree_log_mutex); | ||
| 918 | /* | ||
| 919 | * keep tree reloc code from adding new reloc trees | ||
| 920 | */ | ||
| 921 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
| 922 | |||
| 923 | |||
| 924 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, | ||
| 925 | &dirty_fs_roots); | ||
| 926 | BUG_ON(ret); | ||
| 927 | |||
| 928 | /* add_dirty_roots gets rid of all the tree log roots, it is now | ||
| 929 | * safe to free the root of tree log roots | ||
| 930 | */ | ||
| 931 | btrfs_free_log_root_tree(trans, root->fs_info); | ||
| 932 | |||
| 933 | btrfs_free_reloc_mappings(root); | ||
| 934 | |||
| 935 | ret = btrfs_commit_tree_roots(trans, root); | ||
| 936 | BUG_ON(ret); | ||
| 937 | |||
| 938 | cur_trans = root->fs_info->running_transaction; | ||
| 939 | spin_lock(&root->fs_info->new_trans_lock); | ||
| 940 | root->fs_info->running_transaction = NULL; | ||
| 941 | spin_unlock(&root->fs_info->new_trans_lock); | ||
| 942 | btrfs_set_super_generation(&root->fs_info->super_copy, | ||
| 943 | cur_trans->transid); | ||
| 944 | btrfs_set_super_root(&root->fs_info->super_copy, | ||
| 945 | root->fs_info->tree_root->node->start); | ||
| 946 | btrfs_set_super_root_level(&root->fs_info->super_copy, | ||
| 947 | btrfs_header_level(root->fs_info->tree_root->node)); | ||
| 948 | |||
| 949 | btrfs_set_super_chunk_root(&root->fs_info->super_copy, | ||
| 950 | chunk_root->node->start); | ||
| 951 | btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, | ||
| 952 | btrfs_header_level(chunk_root->node)); | ||
| 953 | |||
| 954 | if (!root->fs_info->log_root_recovering) { | ||
| 955 | btrfs_set_super_log_root(&root->fs_info->super_copy, 0); | ||
| 956 | btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); | ||
| 957 | } | ||
| 958 | |||
| 959 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | ||
| 960 | sizeof(root->fs_info->super_copy)); | ||
| 961 | |||
| 962 | btrfs_copy_pinned(root, pinned_copy); | ||
| 963 | |||
| 964 | trans->transaction->blocked = 0; | ||
| 965 | wake_up(&root->fs_info->transaction_throttle); | ||
| 966 | wake_up(&root->fs_info->transaction_wait); | ||
| 967 | |||
| 968 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 969 | ret = btrfs_write_and_wait_transaction(trans, root); | ||
| 970 | BUG_ON(ret); | ||
| 971 | write_ctree_super(trans, root); | ||
| 972 | |||
| 973 | /* | ||
| 974 | * the super is written, we can safely allow the tree-loggers | ||
| 975 | * to go about their business | ||
| 976 | */ | ||
| 977 | mutex_unlock(&root->fs_info->tree_log_mutex); | ||
| 978 | |||
| 979 | btrfs_finish_extent_commit(trans, root, pinned_copy); | ||
| 980 | kfree(pinned_copy); | ||
| 981 | |||
| 982 | btrfs_drop_dead_reloc_roots(root); | ||
| 983 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
| 984 | |||
| 985 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 986 | |||
| 987 | cur_trans->commit_done = 1; | ||
| 988 | root->fs_info->last_trans_committed = cur_trans->transid; | ||
| 989 | wake_up(&cur_trans->commit_wait); | ||
| 990 | put_transaction(cur_trans); | ||
| 991 | put_transaction(cur_trans); | ||
| 992 | |||
| 993 | list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); | ||
| 994 | if (root->fs_info->closing) | ||
| 995 | list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); | ||
| 996 | |||
| 997 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 998 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | ||
| 999 | |||
| 1000 | if (root->fs_info->closing) { | ||
| 1001 | drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); | ||
| 1002 | } | ||
| 1003 | return ret; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | /* | ||
| 1007 | * interface function to delete all the snapshots we have scheduled for deletion | ||
| 1008 | */ | ||
| 1009 | int btrfs_clean_old_snapshots(struct btrfs_root *root) | ||
| 1010 | { | ||
| 1011 | struct list_head dirty_roots; | ||
| 1012 | INIT_LIST_HEAD(&dirty_roots); | ||
| 1013 | again: | ||
| 1014 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 1015 | list_splice_init(&root->fs_info->dead_roots, &dirty_roots); | ||
| 1016 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1017 | |||
| 1018 | if (!list_empty(&dirty_roots)) { | ||
| 1019 | drop_dirty_roots(root, &dirty_roots); | ||
| 1020 | goto again; | ||
| 1021 | } | ||
| 1022 | return 0; | ||
| 1023 | } | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h new file mode 100644 index 00000000000..eef2cb7d7e7 --- /dev/null +++ b/fs/btrfs/transaction.h | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_TRANSACTION__ | ||
| 20 | #define __BTRFS_TRANSACTION__ | ||
| 21 | #include "btrfs_inode.h" | ||
| 22 | |||
| 23 | struct btrfs_transaction { | ||
| 24 | u64 transid; | ||
| 25 | unsigned long num_writers; | ||
| 26 | unsigned long num_joined; | ||
| 27 | int in_commit; | ||
| 28 | int use_count; | ||
| 29 | int commit_done; | ||
| 30 | int blocked; | ||
| 31 | struct list_head list; | ||
| 32 | struct extent_io_tree dirty_pages; | ||
| 33 | unsigned long start_time; | ||
| 34 | wait_queue_head_t writer_wait; | ||
| 35 | wait_queue_head_t commit_wait; | ||
| 36 | struct list_head pending_snapshots; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct btrfs_trans_handle { | ||
| 40 | u64 transid; | ||
| 41 | unsigned long blocks_reserved; | ||
| 42 | unsigned long blocks_used; | ||
| 43 | struct btrfs_transaction *transaction; | ||
| 44 | struct btrfs_block_group_cache *block_group; | ||
| 45 | u64 alloc_exclude_start; | ||
| 46 | u64 alloc_exclude_nr; | ||
| 47 | }; | ||
| 48 | |||
| 49 | struct btrfs_pending_snapshot { | ||
| 50 | struct btrfs_root *root; | ||
| 51 | char *name; | ||
| 52 | struct list_head list; | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct btrfs_dirty_root { | ||
| 56 | struct list_head list; | ||
| 57 | struct btrfs_root *root; | ||
| 58 | struct btrfs_root *latest_root; | ||
| 59 | }; | ||
| 60 | |||
| 61 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, | ||
| 62 | struct inode *inode) | ||
| 63 | { | ||
| 64 | trans->block_group = BTRFS_I(inode)->block_group; | ||
| 65 | } | ||
| 66 | |||
| 67 | static inline void btrfs_update_inode_block_group(struct | ||
| 68 | btrfs_trans_handle *trans, | ||
| 69 | struct inode *inode) | ||
| 70 | { | ||
| 71 | BTRFS_I(inode)->block_group = trans->block_group; | ||
| 72 | } | ||
| 73 | |||
| 74 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | ||
| 75 | struct inode *inode) | ||
| 76 | { | ||
| 77 | BTRFS_I(inode)->last_trans = trans->transaction->transid; | ||
| 78 | } | ||
| 79 | |||
| 80 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | ||
| 81 | struct btrfs_root *root); | ||
| 82 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | ||
| 83 | int num_blocks); | ||
| 84 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | ||
| 85 | int num_blocks); | ||
| 86 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | ||
| 87 | int num_blocks); | ||
| 88 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | ||
| 89 | struct btrfs_root *root); | ||
| 90 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | ||
| 91 | struct btrfs_root *root); | ||
| 92 | |||
| 93 | int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest); | ||
| 94 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); | ||
| 95 | int btrfs_clean_old_snapshots(struct btrfs_root *root); | ||
| 96 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | ||
| 97 | struct btrfs_root *root); | ||
| 98 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | ||
| 99 | struct btrfs_root *root); | ||
| 100 | void btrfs_throttle(struct btrfs_root *root); | ||
| 101 | int btrfs_record_root_in_trans(struct btrfs_root *root); | ||
| 102 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | ||
| 103 | struct extent_io_tree *dirty_pages); | ||
| 104 | #endif | ||
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c new file mode 100644 index 00000000000..6f57d0889b1 --- /dev/null +++ b/fs/btrfs/tree-defrag.c | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/sched.h> | ||
| 20 | #include "ctree.h" | ||
| 21 | #include "disk-io.h" | ||
| 22 | #include "print-tree.h" | ||
| 23 | #include "transaction.h" | ||
| 24 | #include "locking.h" | ||
| 25 | |||
| 26 | /* defrag all the leaves in a given btree. If cache_only == 1, don't read things | ||
| 27 | * from disk, otherwise read all the leaves and try to get key order to | ||
| 28 | * better reflect disk order | ||
| 29 | */ | ||
| 30 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | ||
| 31 | struct btrfs_root *root, int cache_only) | ||
| 32 | { | ||
| 33 | struct btrfs_path *path = NULL; | ||
| 34 | struct btrfs_key key; | ||
| 35 | int ret = 0; | ||
| 36 | int wret; | ||
| 37 | int level; | ||
| 38 | int orig_level; | ||
| 39 | int is_extent = 0; | ||
| 40 | int next_key_ret = 0; | ||
| 41 | u64 last_ret = 0; | ||
| 42 | u64 min_trans = 0; | ||
| 43 | |||
| 44 | if (cache_only) | ||
| 45 | goto out; | ||
| 46 | |||
| 47 | if (root->fs_info->extent_root == root) { | ||
| 48 | /* | ||
| 49 | * there's recursion here right now in the tree locking, | ||
| 50 | * we can't defrag the extent root without deadlock | ||
| 51 | */ | ||
| 52 | goto out; | ||
| 53 | } | ||
| 54 | |||
| 55 | if (root->ref_cows == 0 && !is_extent) | ||
| 56 | goto out; | ||
| 57 | |||
| 58 | if (btrfs_test_opt(root, SSD)) | ||
| 59 | goto out; | ||
| 60 | |||
| 61 | path = btrfs_alloc_path(); | ||
| 62 | if (!path) | ||
| 63 | return -ENOMEM; | ||
| 64 | |||
| 65 | level = btrfs_header_level(root->node); | ||
| 66 | orig_level = level; | ||
| 67 | |||
| 68 | if (level == 0) { | ||
| 69 | goto out; | ||
| 70 | } | ||
| 71 | if (root->defrag_progress.objectid == 0) { | ||
| 72 | struct extent_buffer *root_node; | ||
| 73 | u32 nritems; | ||
| 74 | |||
| 75 | root_node = btrfs_lock_root_node(root); | ||
| 76 | nritems = btrfs_header_nritems(root_node); | ||
| 77 | root->defrag_max.objectid = 0; | ||
| 78 | /* from above we know this is not a leaf */ | ||
| 79 | btrfs_node_key_to_cpu(root_node, &root->defrag_max, | ||
| 80 | nritems - 1); | ||
| 81 | btrfs_tree_unlock(root_node); | ||
| 82 | free_extent_buffer(root_node); | ||
| 83 | memset(&key, 0, sizeof(key)); | ||
| 84 | } else { | ||
| 85 | memcpy(&key, &root->defrag_progress, sizeof(key)); | ||
| 86 | } | ||
| 87 | |||
| 88 | path->keep_locks = 1; | ||
| 89 | if (cache_only) | ||
| 90 | min_trans = root->defrag_trans_start; | ||
| 91 | |||
| 92 | ret = btrfs_search_forward(root, &key, NULL, path, | ||
| 93 | cache_only, min_trans); | ||
| 94 | if (ret < 0) | ||
| 95 | goto out; | ||
| 96 | if (ret > 0) { | ||
| 97 | ret = 0; | ||
| 98 | goto out; | ||
| 99 | } | ||
| 100 | btrfs_release_path(root, path); | ||
| 101 | wret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
| 102 | |||
| 103 | if (wret < 0) { | ||
| 104 | ret = wret; | ||
| 105 | goto out; | ||
| 106 | } | ||
| 107 | if (!path->nodes[1]) { | ||
| 108 | ret = 0; | ||
| 109 | goto out; | ||
| 110 | } | ||
| 111 | path->slots[1] = btrfs_header_nritems(path->nodes[1]); | ||
| 112 | next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only, | ||
| 113 | min_trans); | ||
| 114 | ret = btrfs_realloc_node(trans, root, | ||
| 115 | path->nodes[1], 0, | ||
| 116 | cache_only, &last_ret, | ||
| 117 | &root->defrag_progress); | ||
| 118 | WARN_ON(ret && ret != -EAGAIN); | ||
| 119 | if (next_key_ret == 0) { | ||
| 120 | memcpy(&root->defrag_progress, &key, sizeof(key)); | ||
| 121 | ret = -EAGAIN; | ||
| 122 | } | ||
| 123 | |||
| 124 | btrfs_release_path(root, path); | ||
| 125 | if (is_extent) | ||
| 126 | btrfs_extent_post_op(trans, root); | ||
| 127 | out: | ||
| 128 | if (is_extent) | ||
| 129 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 130 | |||
| 131 | if (path) | ||
| 132 | btrfs_free_path(path); | ||
| 133 | if (ret == -EAGAIN) { | ||
| 134 | if (root->defrag_max.objectid > root->defrag_progress.objectid) | ||
| 135 | goto done; | ||
| 136 | if (root->defrag_max.type > root->defrag_progress.type) | ||
| 137 | goto done; | ||
| 138 | if (root->defrag_max.offset > root->defrag_progress.offset) | ||
| 139 | goto done; | ||
| 140 | ret = 0; | ||
| 141 | } | ||
| 142 | done: | ||
| 143 | if (ret != -EAGAIN) { | ||
| 144 | memset(&root->defrag_progress, 0, | ||
| 145 | sizeof(root->defrag_progress)); | ||
| 146 | root->defrag_trans_start = trans->transid; | ||
| 147 | } | ||
| 148 | return ret; | ||
| 149 | } | ||
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c new file mode 100644 index 00000000000..cf618cc8b34 --- /dev/null +++ b/fs/btrfs/tree-log.c | |||
| @@ -0,0 +1,2890 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/sched.h> | ||
| 20 | #include "ctree.h" | ||
| 21 | #include "transaction.h" | ||
| 22 | #include "disk-io.h" | ||
| 23 | #include "locking.h" | ||
| 24 | #include "print-tree.h" | ||
| 25 | #include "compat.h" | ||
| 26 | |||
| 27 | /* magic values for the inode_only field in btrfs_log_inode: | ||
| 28 | * | ||
| 29 | * LOG_INODE_ALL means to log everything | ||
| 30 | * LOG_INODE_EXISTS means to log just enough to recreate the inode | ||
| 31 | * during log replay | ||
| 32 | */ | ||
| 33 | #define LOG_INODE_ALL 0 | ||
| 34 | #define LOG_INODE_EXISTS 1 | ||
| 35 | |||
| 36 | /* | ||
| 37 | * stages for the tree walking. The first | ||
| 38 | * stage (0) is to only pin down the blocks we find | ||
| 39 | * the second stage (1) is to make sure that all the inodes | ||
| 40 | * we find in the log are created in the subvolume. | ||
| 41 | * | ||
| 42 | * The last stage is to deal with directories and links and extents | ||
| 43 | * and all the other fun semantics | ||
| 44 | */ | ||
| 45 | #define LOG_WALK_PIN_ONLY 0 | ||
| 46 | #define LOG_WALK_REPLAY_INODES 1 | ||
| 47 | #define LOG_WALK_REPLAY_ALL 2 | ||
| 48 | |||
| 49 | static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | ||
| 50 | struct btrfs_root *root, struct inode *inode, | ||
| 51 | int inode_only); | ||
| 52 | |||
| 53 | /* | ||
| 54 | * tree logging is a special write ahead log used to make sure that | ||
| 55 | * fsyncs and O_SYNCs can happen without doing full tree commits. | ||
| 56 | * | ||
| 57 | * Full tree commits are expensive because they require commonly | ||
| 58 | * modified blocks to be recowed, creating many dirty pages in the | ||
| 59 | * extent tree an 4x-6x higher write load than ext3. | ||
| 60 | * | ||
| 61 | * Instead of doing a tree commit on every fsync, we use the | ||
| 62 | * key ranges and transaction ids to find items for a given file or directory | ||
| 63 | * that have changed in this transaction. Those items are copied into | ||
| 64 | * a special tree (one per subvolume root), that tree is written to disk | ||
| 65 | * and then the fsync is considered complete. | ||
| 66 | * | ||
| 67 | * After a crash, items are copied out of the log-tree back into the | ||
| 68 | * subvolume tree. Any file data extents found are recorded in the extent | ||
| 69 | * allocation tree, and the log-tree freed. | ||
| 70 | * | ||
| 71 | * The log tree is read three times, once to pin down all the extents it is | ||
| 72 | * using in ram and once, once to create all the inodes logged in the tree | ||
| 73 | * and once to do all the other items. | ||
| 74 | */ | ||
| 75 | |||
| 76 | /* | ||
| 77 | * btrfs_add_log_tree adds a new per-subvolume log tree into the | ||
| 78 | * tree of log tree roots. This must be called with a tree log transaction | ||
| 79 | * running (see start_log_trans). | ||
| 80 | */ | ||
| 81 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | ||
| 82 | struct btrfs_root *root) | ||
| 83 | { | ||
| 84 | struct btrfs_key key; | ||
| 85 | struct btrfs_root_item root_item; | ||
| 86 | struct btrfs_inode_item *inode_item; | ||
| 87 | struct extent_buffer *leaf; | ||
| 88 | struct btrfs_root *new_root = root; | ||
| 89 | int ret; | ||
| 90 | u64 objectid = root->root_key.objectid; | ||
| 91 | |||
| 92 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | ||
| 93 | BTRFS_TREE_LOG_OBJECTID, | ||
| 94 | trans->transid, 0, 0, 0); | ||
| 95 | if (IS_ERR(leaf)) { | ||
| 96 | ret = PTR_ERR(leaf); | ||
| 97 | return ret; | ||
| 98 | } | ||
| 99 | |||
| 100 | btrfs_set_header_nritems(leaf, 0); | ||
| 101 | btrfs_set_header_level(leaf, 0); | ||
| 102 | btrfs_set_header_bytenr(leaf, leaf->start); | ||
| 103 | btrfs_set_header_generation(leaf, trans->transid); | ||
| 104 | btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); | ||
| 105 | |||
| 106 | write_extent_buffer(leaf, root->fs_info->fsid, | ||
| 107 | (unsigned long)btrfs_header_fsid(leaf), | ||
| 108 | BTRFS_FSID_SIZE); | ||
| 109 | btrfs_mark_buffer_dirty(leaf); | ||
| 110 | |||
| 111 | inode_item = &root_item.inode; | ||
| 112 | memset(inode_item, 0, sizeof(*inode_item)); | ||
| 113 | inode_item->generation = cpu_to_le64(1); | ||
| 114 | inode_item->size = cpu_to_le64(3); | ||
| 115 | inode_item->nlink = cpu_to_le32(1); | ||
| 116 | inode_item->nbytes = cpu_to_le64(root->leafsize); | ||
| 117 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); | ||
| 118 | |||
| 119 | btrfs_set_root_bytenr(&root_item, leaf->start); | ||
| 120 | btrfs_set_root_level(&root_item, 0); | ||
| 121 | btrfs_set_root_refs(&root_item, 0); | ||
| 122 | btrfs_set_root_used(&root_item, 0); | ||
| 123 | |||
| 124 | memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); | ||
| 125 | root_item.drop_level = 0; | ||
| 126 | |||
| 127 | btrfs_tree_unlock(leaf); | ||
| 128 | free_extent_buffer(leaf); | ||
| 129 | leaf = NULL; | ||
| 130 | |||
| 131 | btrfs_set_root_dirid(&root_item, 0); | ||
| 132 | |||
| 133 | key.objectid = BTRFS_TREE_LOG_OBJECTID; | ||
| 134 | key.offset = objectid; | ||
| 135 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 136 | ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key, | ||
| 137 | &root_item); | ||
| 138 | if (ret) | ||
| 139 | goto fail; | ||
| 140 | |||
| 141 | new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree, | ||
| 142 | &key); | ||
| 143 | BUG_ON(!new_root); | ||
| 144 | |||
| 145 | WARN_ON(root->log_root); | ||
| 146 | root->log_root = new_root; | ||
| 147 | |||
| 148 | /* | ||
| 149 | * log trees do not get reference counted because they go away | ||
| 150 | * before a real commit is actually done. They do store pointers | ||
| 151 | * to file data extents, and those reference counts still get | ||
| 152 | * updated (along with back refs to the log tree). | ||
| 153 | */ | ||
| 154 | new_root->ref_cows = 0; | ||
| 155 | new_root->last_trans = trans->transid; | ||
| 156 | fail: | ||
| 157 | return ret; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * start a sub transaction and setup the log tree | ||
| 162 | * this increments the log tree writer count to make the people | ||
| 163 | * syncing the tree wait for us to finish | ||
| 164 | */ | ||
| 165 | static int start_log_trans(struct btrfs_trans_handle *trans, | ||
| 166 | struct btrfs_root *root) | ||
| 167 | { | ||
| 168 | int ret; | ||
| 169 | mutex_lock(&root->fs_info->tree_log_mutex); | ||
| 170 | if (!root->fs_info->log_root_tree) { | ||
| 171 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | ||
| 172 | BUG_ON(ret); | ||
| 173 | } | ||
| 174 | if (!root->log_root) { | ||
| 175 | ret = btrfs_add_log_tree(trans, root); | ||
| 176 | BUG_ON(ret); | ||
| 177 | } | ||
| 178 | atomic_inc(&root->fs_info->tree_log_writers); | ||
| 179 | root->fs_info->tree_log_batch++; | ||
| 180 | mutex_unlock(&root->fs_info->tree_log_mutex); | ||
| 181 | return 0; | ||
| 182 | } | ||
| 183 | |||
| 184 | /* | ||
| 185 | * returns 0 if there was a log transaction running and we were able | ||
| 186 | * to join, or returns -ENOENT if there were not transactions | ||
| 187 | * in progress | ||
| 188 | */ | ||
| 189 | static int join_running_log_trans(struct btrfs_root *root) | ||
| 190 | { | ||
| 191 | int ret = -ENOENT; | ||
| 192 | |||
| 193 | smp_mb(); | ||
| 194 | if (!root->log_root) | ||
| 195 | return -ENOENT; | ||
| 196 | |||
| 197 | mutex_lock(&root->fs_info->tree_log_mutex); | ||
| 198 | if (root->log_root) { | ||
| 199 | ret = 0; | ||
| 200 | atomic_inc(&root->fs_info->tree_log_writers); | ||
| 201 | root->fs_info->tree_log_batch++; | ||
| 202 | } | ||
| 203 | mutex_unlock(&root->fs_info->tree_log_mutex); | ||
| 204 | return ret; | ||
| 205 | } | ||
| 206 | |||
| 207 | /* | ||
| 208 | * indicate we're done making changes to the log tree | ||
| 209 | * and wake up anyone waiting to do a sync | ||
| 210 | */ | ||
| 211 | static int end_log_trans(struct btrfs_root *root) | ||
| 212 | { | ||
| 213 | atomic_dec(&root->fs_info->tree_log_writers); | ||
| 214 | smp_mb(); | ||
| 215 | if (waitqueue_active(&root->fs_info->tree_log_wait)) | ||
| 216 | wake_up(&root->fs_info->tree_log_wait); | ||
| 217 | return 0; | ||
| 218 | } | ||
| 219 | |||
| 220 | |||
| 221 | /* | ||
| 222 | * the walk control struct is used to pass state down the chain when | ||
| 223 | * processing the log tree. The stage field tells us which part | ||
| 224 | * of the log tree processing we are currently doing. The others | ||
| 225 | * are state fields used for that specific part | ||
| 226 | */ | ||
| 227 | struct walk_control { | ||
| 228 | /* should we free the extent on disk when done? This is used | ||
| 229 | * at transaction commit time while freeing a log tree | ||
| 230 | */ | ||
| 231 | int free; | ||
| 232 | |||
| 233 | /* should we write out the extent buffer? This is used | ||
| 234 | * while flushing the log tree to disk during a sync | ||
| 235 | */ | ||
| 236 | int write; | ||
| 237 | |||
| 238 | /* should we wait for the extent buffer io to finish? Also used | ||
| 239 | * while flushing the log tree to disk for a sync | ||
| 240 | */ | ||
| 241 | int wait; | ||
| 242 | |||
| 243 | /* pin only walk, we record which extents on disk belong to the | ||
| 244 | * log trees | ||
| 245 | */ | ||
| 246 | int pin; | ||
| 247 | |||
| 248 | /* what stage of the replay code we're currently in */ | ||
| 249 | int stage; | ||
| 250 | |||
| 251 | /* the root we are currently replaying */ | ||
| 252 | struct btrfs_root *replay_dest; | ||
| 253 | |||
| 254 | /* the trans handle for the current replay */ | ||
| 255 | struct btrfs_trans_handle *trans; | ||
| 256 | |||
| 257 | /* the function that gets used to process blocks we find in the | ||
| 258 | * tree. Note the extent_buffer might not be up to date when it is | ||
| 259 | * passed in, and it must be checked or read if you need the data | ||
| 260 | * inside it | ||
| 261 | */ | ||
| 262 | int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, | ||
| 263 | struct walk_control *wc, u64 gen); | ||
| 264 | }; | ||
| 265 | |||
| 266 | /* | ||
| 267 | * process_func used to pin down extents, write them or wait on them | ||
| 268 | */ | ||
| 269 | static int process_one_buffer(struct btrfs_root *log, | ||
| 270 | struct extent_buffer *eb, | ||
| 271 | struct walk_control *wc, u64 gen) | ||
| 272 | { | ||
| 273 | if (wc->pin) { | ||
| 274 | mutex_lock(&log->fs_info->alloc_mutex); | ||
| 275 | btrfs_update_pinned_extents(log->fs_info->extent_root, | ||
| 276 | eb->start, eb->len, 1); | ||
| 277 | mutex_unlock(&log->fs_info->alloc_mutex); | ||
| 278 | } | ||
| 279 | |||
| 280 | if (btrfs_buffer_uptodate(eb, gen)) { | ||
| 281 | if (wc->write) | ||
| 282 | btrfs_write_tree_block(eb); | ||
| 283 | if (wc->wait) | ||
| 284 | btrfs_wait_tree_block_writeback(eb); | ||
| 285 | } | ||
| 286 | return 0; | ||
| 287 | } | ||
| 288 | |||
| 289 | /* | ||
| 290 | * Item overwrite used by replay and tree logging. eb, slot and key all refer | ||
| 291 | * to the src data we are copying out. | ||
| 292 | * | ||
| 293 | * root is the tree we are copying into, and path is a scratch | ||
| 294 | * path for use in this function (it should be released on entry and | ||
| 295 | * will be released on exit). | ||
| 296 | * | ||
| 297 | * If the key is already in the destination tree the existing item is | ||
| 298 | * overwritten. If the existing item isn't big enough, it is extended. | ||
| 299 | * If it is too large, it is truncated. | ||
| 300 | * | ||
| 301 | * If the key isn't in the destination yet, a new item is inserted. | ||
| 302 | */ | ||
| 303 | static noinline int overwrite_item(struct btrfs_trans_handle *trans, | ||
| 304 | struct btrfs_root *root, | ||
| 305 | struct btrfs_path *path, | ||
| 306 | struct extent_buffer *eb, int slot, | ||
| 307 | struct btrfs_key *key) | ||
| 308 | { | ||
| 309 | int ret; | ||
| 310 | u32 item_size; | ||
| 311 | u64 saved_i_size = 0; | ||
| 312 | int save_old_i_size = 0; | ||
| 313 | unsigned long src_ptr; | ||
| 314 | unsigned long dst_ptr; | ||
| 315 | int overwrite_root = 0; | ||
| 316 | |||
| 317 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) | ||
| 318 | overwrite_root = 1; | ||
| 319 | |||
| 320 | item_size = btrfs_item_size_nr(eb, slot); | ||
| 321 | src_ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 322 | |||
| 323 | /* look for the key in the destination tree */ | ||
| 324 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | ||
| 325 | if (ret == 0) { | ||
| 326 | char *src_copy; | ||
| 327 | char *dst_copy; | ||
| 328 | u32 dst_size = btrfs_item_size_nr(path->nodes[0], | ||
| 329 | path->slots[0]); | ||
| 330 | if (dst_size != item_size) | ||
| 331 | goto insert; | ||
| 332 | |||
| 333 | if (item_size == 0) { | ||
| 334 | btrfs_release_path(root, path); | ||
| 335 | return 0; | ||
| 336 | } | ||
| 337 | dst_copy = kmalloc(item_size, GFP_NOFS); | ||
| 338 | src_copy = kmalloc(item_size, GFP_NOFS); | ||
| 339 | |||
| 340 | read_extent_buffer(eb, src_copy, src_ptr, item_size); | ||
| 341 | |||
| 342 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | ||
| 343 | read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, | ||
| 344 | item_size); | ||
| 345 | ret = memcmp(dst_copy, src_copy, item_size); | ||
| 346 | |||
| 347 | kfree(dst_copy); | ||
| 348 | kfree(src_copy); | ||
| 349 | /* | ||
| 350 | * they have the same contents, just return, this saves | ||
| 351 | * us from cowing blocks in the destination tree and doing | ||
| 352 | * extra writes that may not have been done by a previous | ||
| 353 | * sync | ||
| 354 | */ | ||
| 355 | if (ret == 0) { | ||
| 356 | btrfs_release_path(root, path); | ||
| 357 | return 0; | ||
| 358 | } | ||
| 359 | |||
| 360 | } | ||
| 361 | insert: | ||
| 362 | btrfs_release_path(root, path); | ||
| 363 | /* try to insert the key into the destination tree */ | ||
| 364 | ret = btrfs_insert_empty_item(trans, root, path, | ||
| 365 | key, item_size); | ||
| 366 | |||
| 367 | /* make sure any existing item is the correct size */ | ||
| 368 | if (ret == -EEXIST) { | ||
| 369 | u32 found_size; | ||
| 370 | found_size = btrfs_item_size_nr(path->nodes[0], | ||
| 371 | path->slots[0]); | ||
| 372 | if (found_size > item_size) { | ||
| 373 | btrfs_truncate_item(trans, root, path, item_size, 1); | ||
| 374 | } else if (found_size < item_size) { | ||
| 375 | ret = btrfs_del_item(trans, root, | ||
| 376 | path); | ||
| 377 | BUG_ON(ret); | ||
| 378 | |||
| 379 | btrfs_release_path(root, path); | ||
| 380 | ret = btrfs_insert_empty_item(trans, | ||
| 381 | root, path, key, item_size); | ||
| 382 | BUG_ON(ret); | ||
| 383 | } | ||
| 384 | } else if (ret) { | ||
| 385 | BUG(); | ||
| 386 | } | ||
| 387 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], | ||
| 388 | path->slots[0]); | ||
| 389 | |||
| 390 | /* don't overwrite an existing inode if the generation number | ||
| 391 | * was logged as zero. This is done when the tree logging code | ||
| 392 | * is just logging an inode to make sure it exists after recovery. | ||
| 393 | * | ||
| 394 | * Also, don't overwrite i_size on directories during replay. | ||
| 395 | * log replay inserts and removes directory items based on the | ||
| 396 | * state of the tree found in the subvolume, and i_size is modified | ||
| 397 | * as it goes | ||
| 398 | */ | ||
| 399 | if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { | ||
| 400 | struct btrfs_inode_item *src_item; | ||
| 401 | struct btrfs_inode_item *dst_item; | ||
| 402 | |||
| 403 | src_item = (struct btrfs_inode_item *)src_ptr; | ||
| 404 | dst_item = (struct btrfs_inode_item *)dst_ptr; | ||
| 405 | |||
| 406 | if (btrfs_inode_generation(eb, src_item) == 0) | ||
| 407 | goto no_copy; | ||
| 408 | |||
| 409 | if (overwrite_root && | ||
| 410 | S_ISDIR(btrfs_inode_mode(eb, src_item)) && | ||
| 411 | S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { | ||
| 412 | save_old_i_size = 1; | ||
| 413 | saved_i_size = btrfs_inode_size(path->nodes[0], | ||
| 414 | dst_item); | ||
| 415 | } | ||
| 416 | } | ||
| 417 | |||
| 418 | copy_extent_buffer(path->nodes[0], eb, dst_ptr, | ||
| 419 | src_ptr, item_size); | ||
| 420 | |||
| 421 | if (save_old_i_size) { | ||
| 422 | struct btrfs_inode_item *dst_item; | ||
| 423 | dst_item = (struct btrfs_inode_item *)dst_ptr; | ||
| 424 | btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); | ||
| 425 | } | ||
| 426 | |||
| 427 | /* make sure the generation is filled in */ | ||
| 428 | if (key->type == BTRFS_INODE_ITEM_KEY) { | ||
| 429 | struct btrfs_inode_item *dst_item; | ||
| 430 | dst_item = (struct btrfs_inode_item *)dst_ptr; | ||
| 431 | if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { | ||
| 432 | btrfs_set_inode_generation(path->nodes[0], dst_item, | ||
| 433 | trans->transid); | ||
| 434 | } | ||
| 435 | } | ||
| 436 | |||
| 437 | if (overwrite_root && | ||
| 438 | key->type == BTRFS_EXTENT_DATA_KEY) { | ||
| 439 | int extent_type; | ||
| 440 | struct btrfs_file_extent_item *fi; | ||
| 441 | |||
| 442 | fi = (struct btrfs_file_extent_item *)dst_ptr; | ||
| 443 | extent_type = btrfs_file_extent_type(path->nodes[0], fi); | ||
| 444 | if (extent_type == BTRFS_FILE_EXTENT_REG) { | ||
| 445 | struct btrfs_key ins; | ||
| 446 | ins.objectid = btrfs_file_extent_disk_bytenr( | ||
| 447 | path->nodes[0], fi); | ||
| 448 | ins.offset = btrfs_file_extent_disk_num_bytes( | ||
| 449 | path->nodes[0], fi); | ||
| 450 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 451 | |||
| 452 | /* | ||
| 453 | * is this extent already allocated in the extent | ||
| 454 | * allocation tree? If so, just add a reference | ||
| 455 | */ | ||
| 456 | ret = btrfs_lookup_extent(root, ins.objectid, | ||
| 457 | ins.offset); | ||
| 458 | if (ret == 0) { | ||
| 459 | ret = btrfs_inc_extent_ref(trans, root, | ||
| 460 | ins.objectid, ins.offset, | ||
| 461 | path->nodes[0]->start, | ||
| 462 | root->root_key.objectid, | ||
| 463 | trans->transid, key->objectid); | ||
| 464 | } else { | ||
| 465 | /* | ||
| 466 | * insert the extent pointer in the extent | ||
| 467 | * allocation tree | ||
| 468 | */ | ||
| 469 | ret = btrfs_alloc_logged_extent(trans, root, | ||
| 470 | path->nodes[0]->start, | ||
| 471 | root->root_key.objectid, | ||
| 472 | trans->transid, key->objectid, | ||
| 473 | &ins); | ||
| 474 | BUG_ON(ret); | ||
| 475 | } | ||
| 476 | } | ||
| 477 | } | ||
| 478 | no_copy: | ||
| 479 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 480 | btrfs_release_path(root, path); | ||
| 481 | return 0; | ||
| 482 | } | ||
| 483 | |||
| 484 | /* | ||
| 485 | * simple helper to read an inode off the disk from a given root | ||
| 486 | * This can only be called for subvolume roots and not for the log | ||
| 487 | */ | ||
| 488 | static noinline struct inode *read_one_inode(struct btrfs_root *root, | ||
| 489 | u64 objectid) | ||
| 490 | { | ||
| 491 | struct inode *inode; | ||
| 492 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
| 493 | if (inode->i_state & I_NEW) { | ||
| 494 | BTRFS_I(inode)->root = root; | ||
| 495 | BTRFS_I(inode)->location.objectid = objectid; | ||
| 496 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
| 497 | BTRFS_I(inode)->location.offset = 0; | ||
| 498 | btrfs_read_locked_inode(inode); | ||
| 499 | unlock_new_inode(inode); | ||
| 500 | |||
| 501 | } | ||
| 502 | if (is_bad_inode(inode)) { | ||
| 503 | iput(inode); | ||
| 504 | inode = NULL; | ||
| 505 | } | ||
| 506 | return inode; | ||
| 507 | } | ||
| 508 | |||
| 509 | /* replays a single extent in 'eb' at 'slot' with 'key' into the | ||
| 510 | * subvolume 'root'. path is released on entry and should be released | ||
| 511 | * on exit. | ||
| 512 | * | ||
| 513 | * extents in the log tree have not been allocated out of the extent | ||
| 514 | * tree yet. So, this completes the allocation, taking a reference | ||
| 515 | * as required if the extent already exists or creating a new extent | ||
| 516 | * if it isn't in the extent allocation tree yet. | ||
| 517 | * | ||
| 518 | * The extent is inserted into the file, dropping any existing extents | ||
| 519 | * from the file that overlap the new one. | ||
| 520 | */ | ||
| 521 | static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | ||
| 522 | struct btrfs_root *root, | ||
| 523 | struct btrfs_path *path, | ||
| 524 | struct extent_buffer *eb, int slot, | ||
| 525 | struct btrfs_key *key) | ||
| 526 | { | ||
| 527 | int found_type; | ||
| 528 | u64 mask = root->sectorsize - 1; | ||
| 529 | u64 extent_end; | ||
| 530 | u64 alloc_hint; | ||
| 531 | u64 start = key->offset; | ||
| 532 | struct btrfs_file_extent_item *item; | ||
| 533 | struct inode *inode = NULL; | ||
| 534 | unsigned long size; | ||
| 535 | int ret = 0; | ||
| 536 | |||
| 537 | item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
| 538 | found_type = btrfs_file_extent_type(eb, item); | ||
| 539 | |||
| 540 | if (found_type == BTRFS_FILE_EXTENT_REG) | ||
| 541 | extent_end = start + btrfs_file_extent_num_bytes(eb, item); | ||
| 542 | else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 543 | size = btrfs_file_extent_inline_len(eb, | ||
| 544 | btrfs_item_nr(eb, slot)); | ||
| 545 | extent_end = (start + size + mask) & ~mask; | ||
| 546 | } else { | ||
| 547 | ret = 0; | ||
| 548 | goto out; | ||
| 549 | } | ||
| 550 | |||
| 551 | inode = read_one_inode(root, key->objectid); | ||
| 552 | if (!inode) { | ||
| 553 | ret = -EIO; | ||
| 554 | goto out; | ||
| 555 | } | ||
| 556 | |||
| 557 | /* | ||
| 558 | * first check to see if we already have this extent in the | ||
| 559 | * file. This must be done before the btrfs_drop_extents run | ||
| 560 | * so we don't try to drop this extent. | ||
| 561 | */ | ||
| 562 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | ||
| 563 | start, 0); | ||
| 564 | |||
| 565 | if (ret == 0 && found_type == BTRFS_FILE_EXTENT_REG) { | ||
| 566 | struct btrfs_file_extent_item cmp1; | ||
| 567 | struct btrfs_file_extent_item cmp2; | ||
| 568 | struct btrfs_file_extent_item *existing; | ||
| 569 | struct extent_buffer *leaf; | ||
| 570 | |||
| 571 | leaf = path->nodes[0]; | ||
| 572 | existing = btrfs_item_ptr(leaf, path->slots[0], | ||
| 573 | struct btrfs_file_extent_item); | ||
| 574 | |||
| 575 | read_extent_buffer(eb, &cmp1, (unsigned long)item, | ||
| 576 | sizeof(cmp1)); | ||
| 577 | read_extent_buffer(leaf, &cmp2, (unsigned long)existing, | ||
| 578 | sizeof(cmp2)); | ||
| 579 | |||
| 580 | /* | ||
| 581 | * we already have a pointer to this exact extent, | ||
| 582 | * we don't have to do anything | ||
| 583 | */ | ||
| 584 | if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { | ||
| 585 | btrfs_release_path(root, path); | ||
| 586 | goto out; | ||
| 587 | } | ||
| 588 | } | ||
| 589 | btrfs_release_path(root, path); | ||
| 590 | |||
| 591 | /* drop any overlapping extents */ | ||
| 592 | ret = btrfs_drop_extents(trans, root, inode, | ||
| 593 | start, extent_end, start, &alloc_hint); | ||
| 594 | BUG_ON(ret); | ||
| 595 | |||
| 596 | /* insert the extent */ | ||
| 597 | ret = overwrite_item(trans, root, path, eb, slot, key); | ||
| 598 | BUG_ON(ret); | ||
| 599 | |||
| 600 | /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */ | ||
| 601 | inode_add_bytes(inode, extent_end - start); | ||
| 602 | btrfs_update_inode(trans, root, inode); | ||
| 603 | out: | ||
| 604 | if (inode) | ||
| 605 | iput(inode); | ||
| 606 | return ret; | ||
| 607 | } | ||
| 608 | |||
| 609 | /* | ||
| 610 | * when cleaning up conflicts between the directory names in the | ||
| 611 | * subvolume, directory names in the log and directory names in the | ||
| 612 | * inode back references, we may have to unlink inodes from directories. | ||
| 613 | * | ||
| 614 | * This is a helper function to do the unlink of a specific directory | ||
| 615 | * item | ||
| 616 | */ | ||
| 617 | static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, | ||
| 618 | struct btrfs_root *root, | ||
| 619 | struct btrfs_path *path, | ||
| 620 | struct inode *dir, | ||
| 621 | struct btrfs_dir_item *di) | ||
| 622 | { | ||
| 623 | struct inode *inode; | ||
| 624 | char *name; | ||
| 625 | int name_len; | ||
| 626 | struct extent_buffer *leaf; | ||
| 627 | struct btrfs_key location; | ||
| 628 | int ret; | ||
| 629 | |||
| 630 | leaf = path->nodes[0]; | ||
| 631 | |||
| 632 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | ||
| 633 | name_len = btrfs_dir_name_len(leaf, di); | ||
| 634 | name = kmalloc(name_len, GFP_NOFS); | ||
| 635 | read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); | ||
| 636 | btrfs_release_path(root, path); | ||
| 637 | |||
| 638 | inode = read_one_inode(root, location.objectid); | ||
| 639 | BUG_ON(!inode); | ||
| 640 | |||
| 641 | btrfs_inc_nlink(inode); | ||
| 642 | ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); | ||
| 643 | kfree(name); | ||
| 644 | |||
| 645 | iput(inode); | ||
| 646 | return ret; | ||
| 647 | } | ||
| 648 | |||
| 649 | /* | ||
| 650 | * helper function to see if a given name and sequence number found | ||
| 651 | * in an inode back reference are already in a directory and correctly | ||
| 652 | * point to this inode | ||
| 653 | */ | ||
| 654 | static noinline int inode_in_dir(struct btrfs_root *root, | ||
| 655 | struct btrfs_path *path, | ||
| 656 | u64 dirid, u64 objectid, u64 index, | ||
| 657 | const char *name, int name_len) | ||
| 658 | { | ||
| 659 | struct btrfs_dir_item *di; | ||
| 660 | struct btrfs_key location; | ||
| 661 | int match = 0; | ||
| 662 | |||
| 663 | di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, | ||
| 664 | index, name, name_len, 0); | ||
| 665 | if (di && !IS_ERR(di)) { | ||
| 666 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); | ||
| 667 | if (location.objectid != objectid) | ||
| 668 | goto out; | ||
| 669 | } else | ||
| 670 | goto out; | ||
| 671 | btrfs_release_path(root, path); | ||
| 672 | |||
| 673 | di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); | ||
| 674 | if (di && !IS_ERR(di)) { | ||
| 675 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); | ||
| 676 | if (location.objectid != objectid) | ||
| 677 | goto out; | ||
| 678 | } else | ||
| 679 | goto out; | ||
| 680 | match = 1; | ||
| 681 | out: | ||
| 682 | btrfs_release_path(root, path); | ||
| 683 | return match; | ||
| 684 | } | ||
| 685 | |||
| 686 | /* | ||
| 687 | * helper function to check a log tree for a named back reference in | ||
| 688 | * an inode. This is used to decide if a back reference that is | ||
| 689 | * found in the subvolume conflicts with what we find in the log. | ||
| 690 | * | ||
| 691 | * inode backreferences may have multiple refs in a single item, | ||
| 692 | * during replay we process one reference at a time, and we don't | ||
| 693 | * want to delete valid links to a file from the subvolume if that | ||
| 694 | * link is also in the log. | ||
| 695 | */ | ||
| 696 | static noinline int backref_in_log(struct btrfs_root *log, | ||
| 697 | struct btrfs_key *key, | ||
| 698 | char *name, int namelen) | ||
| 699 | { | ||
| 700 | struct btrfs_path *path; | ||
| 701 | struct btrfs_inode_ref *ref; | ||
| 702 | unsigned long ptr; | ||
| 703 | unsigned long ptr_end; | ||
| 704 | unsigned long name_ptr; | ||
| 705 | int found_name_len; | ||
| 706 | int item_size; | ||
| 707 | int ret; | ||
| 708 | int match = 0; | ||
| 709 | |||
| 710 | path = btrfs_alloc_path(); | ||
| 711 | ret = btrfs_search_slot(NULL, log, key, path, 0, 0); | ||
| 712 | if (ret != 0) | ||
| 713 | goto out; | ||
| 714 | |||
| 715 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
| 716 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | ||
| 717 | ptr_end = ptr + item_size; | ||
| 718 | while (ptr < ptr_end) { | ||
| 719 | ref = (struct btrfs_inode_ref *)ptr; | ||
| 720 | found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); | ||
| 721 | if (found_name_len == namelen) { | ||
| 722 | name_ptr = (unsigned long)(ref + 1); | ||
| 723 | ret = memcmp_extent_buffer(path->nodes[0], name, | ||
| 724 | name_ptr, namelen); | ||
| 725 | if (ret == 0) { | ||
| 726 | match = 1; | ||
| 727 | goto out; | ||
| 728 | } | ||
| 729 | } | ||
| 730 | ptr = (unsigned long)(ref + 1) + found_name_len; | ||
| 731 | } | ||
| 732 | out: | ||
| 733 | btrfs_free_path(path); | ||
| 734 | return match; | ||
| 735 | } | ||
| 736 | |||
| 737 | |||
| 738 | /* | ||
| 739 | * replay one inode back reference item found in the log tree. | ||
| 740 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
| 741 | * root is the destination we are replaying into, and path is for temp | ||
| 742 | * use by this function. (it should be released on return). | ||
| 743 | */ | ||
| 744 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
| 745 | struct btrfs_root *root, | ||
| 746 | struct btrfs_root *log, | ||
| 747 | struct btrfs_path *path, | ||
| 748 | struct extent_buffer *eb, int slot, | ||
| 749 | struct btrfs_key *key) | ||
| 750 | { | ||
| 751 | struct inode *dir; | ||
| 752 | int ret; | ||
| 753 | struct btrfs_key location; | ||
| 754 | struct btrfs_inode_ref *ref; | ||
| 755 | struct btrfs_dir_item *di; | ||
| 756 | struct inode *inode; | ||
| 757 | char *name; | ||
| 758 | int namelen; | ||
| 759 | unsigned long ref_ptr; | ||
| 760 | unsigned long ref_end; | ||
| 761 | |||
| 762 | location.objectid = key->objectid; | ||
| 763 | location.type = BTRFS_INODE_ITEM_KEY; | ||
| 764 | location.offset = 0; | ||
| 765 | |||
| 766 | /* | ||
| 767 | * it is possible that we didn't log all the parent directories | ||
| 768 | * for a given inode. If we don't find the dir, just don't | ||
| 769 | * copy the back ref in. The link count fixup code will take | ||
| 770 | * care of the rest | ||
| 771 | */ | ||
| 772 | dir = read_one_inode(root, key->offset); | ||
| 773 | if (!dir) | ||
| 774 | return -ENOENT; | ||
| 775 | |||
| 776 | inode = read_one_inode(root, key->objectid); | ||
| 777 | BUG_ON(!dir); | ||
| 778 | |||
| 779 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 780 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
| 781 | |||
| 782 | again: | ||
| 783 | ref = (struct btrfs_inode_ref *)ref_ptr; | ||
| 784 | |||
| 785 | namelen = btrfs_inode_ref_name_len(eb, ref); | ||
| 786 | name = kmalloc(namelen, GFP_NOFS); | ||
| 787 | BUG_ON(!name); | ||
| 788 | |||
| 789 | read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); | ||
| 790 | |||
| 791 | /* if we already have a perfect match, we're done */ | ||
| 792 | if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, | ||
| 793 | btrfs_inode_ref_index(eb, ref), | ||
| 794 | name, namelen)) { | ||
| 795 | goto out; | ||
| 796 | } | ||
| 797 | |||
| 798 | /* | ||
| 799 | * look for a conflicting back reference in the metadata. | ||
| 800 | * if we find one we have to unlink that name of the file | ||
| 801 | * before we add our new link. Later on, we overwrite any | ||
| 802 | * existing back reference, and we don't want to create | ||
| 803 | * dangling pointers in the directory. | ||
| 804 | */ | ||
| 805 | conflict_again: | ||
| 806 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | ||
| 807 | if (ret == 0) { | ||
| 808 | char *victim_name; | ||
| 809 | int victim_name_len; | ||
| 810 | struct btrfs_inode_ref *victim_ref; | ||
| 811 | unsigned long ptr; | ||
| 812 | unsigned long ptr_end; | ||
| 813 | struct extent_buffer *leaf = path->nodes[0]; | ||
| 814 | |||
| 815 | /* are we trying to overwrite a back ref for the root directory | ||
| 816 | * if so, just jump out, we're done | ||
| 817 | */ | ||
| 818 | if (key->objectid == key->offset) | ||
| 819 | goto out_nowrite; | ||
| 820 | |||
| 821 | /* check all the names in this back reference to see | ||
| 822 | * if they are in the log. if so, we allow them to stay | ||
| 823 | * otherwise they must be unlinked as a conflict | ||
| 824 | */ | ||
| 825 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 826 | ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 827 | while(ptr < ptr_end) { | ||
| 828 | victim_ref = (struct btrfs_inode_ref *)ptr; | ||
| 829 | victim_name_len = btrfs_inode_ref_name_len(leaf, | ||
| 830 | victim_ref); | ||
| 831 | victim_name = kmalloc(victim_name_len, GFP_NOFS); | ||
| 832 | BUG_ON(!victim_name); | ||
| 833 | |||
| 834 | read_extent_buffer(leaf, victim_name, | ||
| 835 | (unsigned long)(victim_ref + 1), | ||
| 836 | victim_name_len); | ||
| 837 | |||
| 838 | if (!backref_in_log(log, key, victim_name, | ||
| 839 | victim_name_len)) { | ||
| 840 | btrfs_inc_nlink(inode); | ||
| 841 | btrfs_release_path(root, path); | ||
| 842 | ret = btrfs_unlink_inode(trans, root, dir, | ||
| 843 | inode, victim_name, | ||
| 844 | victim_name_len); | ||
| 845 | kfree(victim_name); | ||
| 846 | btrfs_release_path(root, path); | ||
| 847 | goto conflict_again; | ||
| 848 | } | ||
| 849 | kfree(victim_name); | ||
| 850 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | ||
| 851 | } | ||
| 852 | BUG_ON(ret); | ||
| 853 | } | ||
| 854 | btrfs_release_path(root, path); | ||
| 855 | |||
| 856 | /* look for a conflicting sequence number */ | ||
| 857 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | ||
| 858 | btrfs_inode_ref_index(eb, ref), | ||
| 859 | name, namelen, 0); | ||
| 860 | if (di && !IS_ERR(di)) { | ||
| 861 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
| 862 | BUG_ON(ret); | ||
| 863 | } | ||
| 864 | btrfs_release_path(root, path); | ||
| 865 | |||
| 866 | |||
| 867 | /* look for a conflicting name */ | ||
| 868 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
| 869 | name, namelen, 0); | ||
| 870 | if (di && !IS_ERR(di)) { | ||
| 871 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
| 872 | BUG_ON(ret); | ||
| 873 | } | ||
| 874 | btrfs_release_path(root, path); | ||
| 875 | |||
| 876 | /* insert our name */ | ||
| 877 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | ||
| 878 | btrfs_inode_ref_index(eb, ref)); | ||
| 879 | BUG_ON(ret); | ||
| 880 | |||
| 881 | btrfs_update_inode(trans, root, inode); | ||
| 882 | |||
| 883 | out: | ||
| 884 | ref_ptr = (unsigned long)(ref + 1) + namelen; | ||
| 885 | kfree(name); | ||
| 886 | if (ref_ptr < ref_end) | ||
| 887 | goto again; | ||
| 888 | |||
| 889 | /* finally write the back reference in the inode */ | ||
| 890 | ret = overwrite_item(trans, root, path, eb, slot, key); | ||
| 891 | BUG_ON(ret); | ||
| 892 | |||
| 893 | out_nowrite: | ||
| 894 | btrfs_release_path(root, path); | ||
| 895 | iput(dir); | ||
| 896 | iput(inode); | ||
| 897 | return 0; | ||
| 898 | } | ||
| 899 | |||
| 900 | /* | ||
| 901 | * replay one csum item from the log tree into the subvolume 'root' | ||
| 902 | * eb, slot and key all refer to the log tree | ||
| 903 | * path is for temp use by this function and should be released on return | ||
| 904 | * | ||
| 905 | * This copies the checksums out of the log tree and inserts them into | ||
| 906 | * the subvolume. Any existing checksums for this range in the file | ||
| 907 | * are overwritten, and new items are added where required. | ||
| 908 | * | ||
| 909 | * We keep this simple by reusing the btrfs_ordered_sum code from | ||
| 910 | * the data=ordered mode. This basically means making a copy | ||
| 911 | * of all the checksums in ram, which we have to do anyway for kmap | ||
| 912 | * rules. | ||
| 913 | * | ||
| 914 | * The copy is then sent down to btrfs_csum_file_blocks, which | ||
| 915 | * does all the hard work of finding existing items in the file | ||
| 916 | * or adding new ones. | ||
| 917 | */ | ||
| 918 | static noinline int replay_one_csum(struct btrfs_trans_handle *trans, | ||
| 919 | struct btrfs_root *root, | ||
| 920 | struct btrfs_path *path, | ||
| 921 | struct extent_buffer *eb, int slot, | ||
| 922 | struct btrfs_key *key) | ||
| 923 | { | ||
| 924 | int ret; | ||
| 925 | u32 item_size = btrfs_item_size_nr(eb, slot); | ||
| 926 | u64 cur_offset; | ||
| 927 | unsigned long file_bytes; | ||
| 928 | struct btrfs_ordered_sum *sums; | ||
| 929 | struct btrfs_sector_sum *sector_sum; | ||
| 930 | struct inode *inode; | ||
| 931 | unsigned long ptr; | ||
| 932 | |||
| 933 | file_bytes = (item_size / BTRFS_CRC32_SIZE) * root->sectorsize; | ||
| 934 | inode = read_one_inode(root, key->objectid); | ||
| 935 | if (!inode) { | ||
| 936 | return -EIO; | ||
| 937 | } | ||
| 938 | |||
| 939 | sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); | ||
| 940 | if (!sums) { | ||
| 941 | iput(inode); | ||
| 942 | return -ENOMEM; | ||
| 943 | } | ||
| 944 | |||
| 945 | INIT_LIST_HEAD(&sums->list); | ||
| 946 | sums->len = file_bytes; | ||
| 947 | sums->file_offset = key->offset; | ||
| 948 | |||
| 949 | /* | ||
| 950 | * copy all the sums into the ordered sum struct | ||
| 951 | */ | ||
| 952 | sector_sum = sums->sums; | ||
| 953 | cur_offset = key->offset; | ||
| 954 | ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 955 | while(item_size > 0) { | ||
| 956 | sector_sum->offset = cur_offset; | ||
| 957 | read_extent_buffer(eb, §or_sum->sum, ptr, BTRFS_CRC32_SIZE); | ||
| 958 | sector_sum++; | ||
| 959 | item_size -= BTRFS_CRC32_SIZE; | ||
| 960 | ptr += BTRFS_CRC32_SIZE; | ||
| 961 | cur_offset += root->sectorsize; | ||
| 962 | } | ||
| 963 | |||
| 964 | /* let btrfs_csum_file_blocks add them into the file */ | ||
| 965 | ret = btrfs_csum_file_blocks(trans, root, inode, sums); | ||
| 966 | BUG_ON(ret); | ||
| 967 | kfree(sums); | ||
| 968 | iput(inode); | ||
| 969 | |||
| 970 | return 0; | ||
| 971 | } | ||
| 972 | /* | ||
| 973 | * There are a few corners where the link count of the file can't | ||
| 974 | * be properly maintained during replay. So, instead of adding | ||
| 975 | * lots of complexity to the log code, we just scan the backrefs | ||
| 976 | * for any file that has been through replay. | ||
| 977 | * | ||
| 978 | * The scan will update the link count on the inode to reflect the | ||
| 979 | * number of back refs found. If it goes down to zero, the iput | ||
| 980 | * will free the inode. | ||
| 981 | */ | ||
| 982 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | ||
| 983 | struct btrfs_root *root, | ||
| 984 | struct inode *inode) | ||
| 985 | { | ||
| 986 | struct btrfs_path *path; | ||
| 987 | int ret; | ||
| 988 | struct btrfs_key key; | ||
| 989 | u64 nlink = 0; | ||
| 990 | unsigned long ptr; | ||
| 991 | unsigned long ptr_end; | ||
| 992 | int name_len; | ||
| 993 | |||
| 994 | key.objectid = inode->i_ino; | ||
| 995 | key.type = BTRFS_INODE_REF_KEY; | ||
| 996 | key.offset = (u64)-1; | ||
| 997 | |||
| 998 | path = btrfs_alloc_path(); | ||
| 999 | |||
| 1000 | while(1) { | ||
| 1001 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 1002 | if (ret < 0) | ||
| 1003 | break; | ||
| 1004 | if (ret > 0) { | ||
| 1005 | if (path->slots[0] == 0) | ||
| 1006 | break; | ||
| 1007 | path->slots[0]--; | ||
| 1008 | } | ||
| 1009 | btrfs_item_key_to_cpu(path->nodes[0], &key, | ||
| 1010 | path->slots[0]); | ||
| 1011 | if (key.objectid != inode->i_ino || | ||
| 1012 | key.type != BTRFS_INODE_REF_KEY) | ||
| 1013 | break; | ||
| 1014 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | ||
| 1015 | ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], | ||
| 1016 | path->slots[0]); | ||
| 1017 | while(ptr < ptr_end) { | ||
| 1018 | struct btrfs_inode_ref *ref; | ||
| 1019 | |||
| 1020 | ref = (struct btrfs_inode_ref *)ptr; | ||
| 1021 | name_len = btrfs_inode_ref_name_len(path->nodes[0], | ||
| 1022 | ref); | ||
| 1023 | ptr = (unsigned long)(ref + 1) + name_len; | ||
| 1024 | nlink++; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | if (key.offset == 0) | ||
| 1028 | break; | ||
| 1029 | key.offset--; | ||
| 1030 | btrfs_release_path(root, path); | ||
| 1031 | } | ||
| 1032 | btrfs_free_path(path); | ||
| 1033 | if (nlink != inode->i_nlink) { | ||
| 1034 | inode->i_nlink = nlink; | ||
| 1035 | btrfs_update_inode(trans, root, inode); | ||
| 1036 | } | ||
| 1037 | BTRFS_I(inode)->index_cnt = (u64)-1; | ||
| 1038 | |||
| 1039 | return 0; | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | ||
| 1043 | struct btrfs_root *root, | ||
| 1044 | struct btrfs_path *path) | ||
| 1045 | { | ||
| 1046 | int ret; | ||
| 1047 | struct btrfs_key key; | ||
| 1048 | struct inode *inode; | ||
| 1049 | |||
| 1050 | key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; | ||
| 1051 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
| 1052 | key.offset = (u64)-1; | ||
| 1053 | while(1) { | ||
| 1054 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 1055 | if (ret < 0) | ||
| 1056 | break; | ||
| 1057 | |||
| 1058 | if (ret == 1) { | ||
| 1059 | if (path->slots[0] == 0) | ||
| 1060 | break; | ||
| 1061 | path->slots[0]--; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 1065 | if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || | ||
| 1066 | key.type != BTRFS_ORPHAN_ITEM_KEY) | ||
| 1067 | break; | ||
| 1068 | |||
| 1069 | ret = btrfs_del_item(trans, root, path); | ||
| 1070 | BUG_ON(ret); | ||
| 1071 | |||
| 1072 | btrfs_release_path(root, path); | ||
| 1073 | inode = read_one_inode(root, key.offset); | ||
| 1074 | BUG_ON(!inode); | ||
| 1075 | |||
| 1076 | ret = fixup_inode_link_count(trans, root, inode); | ||
| 1077 | BUG_ON(ret); | ||
| 1078 | |||
| 1079 | iput(inode); | ||
| 1080 | |||
| 1081 | if (key.offset == 0) | ||
| 1082 | break; | ||
| 1083 | key.offset--; | ||
| 1084 | } | ||
| 1085 | btrfs_release_path(root, path); | ||
| 1086 | return 0; | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | |||
| 1090 | /* | ||
| 1091 | * record a given inode in the fixup dir so we can check its link | ||
| 1092 | * count when replay is done. The link count is incremented here | ||
| 1093 | * so the inode won't go away until we check it | ||
| 1094 | */ | ||
| 1095 | static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, | ||
| 1096 | struct btrfs_root *root, | ||
| 1097 | struct btrfs_path *path, | ||
| 1098 | u64 objectid) | ||
| 1099 | { | ||
| 1100 | struct btrfs_key key; | ||
| 1101 | int ret = 0; | ||
| 1102 | struct inode *inode; | ||
| 1103 | |||
| 1104 | inode = read_one_inode(root, objectid); | ||
| 1105 | BUG_ON(!inode); | ||
| 1106 | |||
| 1107 | key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; | ||
| 1108 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | ||
| 1109 | key.offset = objectid; | ||
| 1110 | |||
| 1111 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); | ||
| 1112 | |||
| 1113 | btrfs_release_path(root, path); | ||
| 1114 | if (ret == 0) { | ||
| 1115 | btrfs_inc_nlink(inode); | ||
| 1116 | btrfs_update_inode(trans, root, inode); | ||
| 1117 | } else if (ret == -EEXIST) { | ||
| 1118 | ret = 0; | ||
| 1119 | } else { | ||
| 1120 | BUG(); | ||
| 1121 | } | ||
| 1122 | iput(inode); | ||
| 1123 | |||
| 1124 | return ret; | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | /* | ||
| 1128 | * when replaying the log for a directory, we only insert names | ||
| 1129 | * for inodes that actually exist. This means an fsync on a directory | ||
| 1130 | * does not implicitly fsync all the new files in it | ||
| 1131 | */ | ||
| 1132 | static noinline int insert_one_name(struct btrfs_trans_handle *trans, | ||
| 1133 | struct btrfs_root *root, | ||
| 1134 | struct btrfs_path *path, | ||
| 1135 | u64 dirid, u64 index, | ||
| 1136 | char *name, int name_len, u8 type, | ||
| 1137 | struct btrfs_key *location) | ||
| 1138 | { | ||
| 1139 | struct inode *inode; | ||
| 1140 | struct inode *dir; | ||
| 1141 | int ret; | ||
| 1142 | |||
| 1143 | inode = read_one_inode(root, location->objectid); | ||
| 1144 | if (!inode) | ||
| 1145 | return -ENOENT; | ||
| 1146 | |||
| 1147 | dir = read_one_inode(root, dirid); | ||
| 1148 | if (!dir) { | ||
| 1149 | iput(inode); | ||
| 1150 | return -EIO; | ||
| 1151 | } | ||
| 1152 | ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); | ||
| 1153 | |||
| 1154 | /* FIXME, put inode into FIXUP list */ | ||
| 1155 | |||
| 1156 | iput(inode); | ||
| 1157 | iput(dir); | ||
| 1158 | return ret; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | /* | ||
| 1162 | * take a single entry in a log directory item and replay it into | ||
| 1163 | * the subvolume. | ||
| 1164 | * | ||
| 1165 | * if a conflicting item exists in the subdirectory already, | ||
| 1166 | * the inode it points to is unlinked and put into the link count | ||
| 1167 | * fix up tree. | ||
| 1168 | * | ||
| 1169 | * If a name from the log points to a file or directory that does | ||
| 1170 | * not exist in the FS, it is skipped. fsyncs on directories | ||
| 1171 | * do not force down inodes inside that directory, just changes to the | ||
| 1172 | * names or unlinks in a directory. | ||
| 1173 | */ | ||
| 1174 | static noinline int replay_one_name(struct btrfs_trans_handle *trans, | ||
| 1175 | struct btrfs_root *root, | ||
| 1176 | struct btrfs_path *path, | ||
| 1177 | struct extent_buffer *eb, | ||
| 1178 | struct btrfs_dir_item *di, | ||
| 1179 | struct btrfs_key *key) | ||
| 1180 | { | ||
| 1181 | char *name; | ||
| 1182 | int name_len; | ||
| 1183 | struct btrfs_dir_item *dst_di; | ||
| 1184 | struct btrfs_key found_key; | ||
| 1185 | struct btrfs_key log_key; | ||
| 1186 | struct inode *dir; | ||
| 1187 | u8 log_type; | ||
| 1188 | int exists; | ||
| 1189 | int ret; | ||
| 1190 | |||
| 1191 | dir = read_one_inode(root, key->objectid); | ||
| 1192 | BUG_ON(!dir); | ||
| 1193 | |||
| 1194 | name_len = btrfs_dir_name_len(eb, di); | ||
| 1195 | name = kmalloc(name_len, GFP_NOFS); | ||
| 1196 | log_type = btrfs_dir_type(eb, di); | ||
| 1197 | read_extent_buffer(eb, name, (unsigned long)(di + 1), | ||
| 1198 | name_len); | ||
| 1199 | |||
| 1200 | btrfs_dir_item_key_to_cpu(eb, di, &log_key); | ||
| 1201 | exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); | ||
| 1202 | if (exists == 0) | ||
| 1203 | exists = 1; | ||
| 1204 | else | ||
| 1205 | exists = 0; | ||
| 1206 | btrfs_release_path(root, path); | ||
| 1207 | |||
| 1208 | if (key->type == BTRFS_DIR_ITEM_KEY) { | ||
| 1209 | dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, | ||
| 1210 | name, name_len, 1); | ||
| 1211 | } | ||
| 1212 | else if (key->type == BTRFS_DIR_INDEX_KEY) { | ||
| 1213 | dst_di = btrfs_lookup_dir_index_item(trans, root, path, | ||
| 1214 | key->objectid, | ||
| 1215 | key->offset, name, | ||
| 1216 | name_len, 1); | ||
| 1217 | } else { | ||
| 1218 | BUG(); | ||
| 1219 | } | ||
| 1220 | if (!dst_di || IS_ERR(dst_di)) { | ||
| 1221 | /* we need a sequence number to insert, so we only | ||
| 1222 | * do inserts for the BTRFS_DIR_INDEX_KEY types | ||
| 1223 | */ | ||
| 1224 | if (key->type != BTRFS_DIR_INDEX_KEY) | ||
| 1225 | goto out; | ||
| 1226 | goto insert; | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); | ||
| 1230 | /* the existing item matches the logged item */ | ||
| 1231 | if (found_key.objectid == log_key.objectid && | ||
| 1232 | found_key.type == log_key.type && | ||
| 1233 | found_key.offset == log_key.offset && | ||
| 1234 | btrfs_dir_type(path->nodes[0], dst_di) == log_type) { | ||
| 1235 | goto out; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | /* | ||
| 1239 | * don't drop the conflicting directory entry if the inode | ||
| 1240 | * for the new entry doesn't exist | ||
| 1241 | */ | ||
| 1242 | if (!exists) | ||
| 1243 | goto out; | ||
| 1244 | |||
| 1245 | ret = drop_one_dir_item(trans, root, path, dir, dst_di); | ||
| 1246 | BUG_ON(ret); | ||
| 1247 | |||
| 1248 | if (key->type == BTRFS_DIR_INDEX_KEY) | ||
| 1249 | goto insert; | ||
| 1250 | out: | ||
| 1251 | btrfs_release_path(root, path); | ||
| 1252 | kfree(name); | ||
| 1253 | iput(dir); | ||
| 1254 | return 0; | ||
| 1255 | |||
| 1256 | insert: | ||
| 1257 | btrfs_release_path(root, path); | ||
| 1258 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, | ||
| 1259 | name, name_len, log_type, &log_key); | ||
| 1260 | |||
| 1261 | if (ret && ret != -ENOENT) | ||
| 1262 | BUG(); | ||
| 1263 | goto out; | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | /* | ||
| 1267 | * find all the names in a directory item and reconcile them into | ||
| 1268 | * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than | ||
| 1269 | * one name in a directory item, but the same code gets used for | ||
| 1270 | * both directory index types | ||
| 1271 | */ | ||
| 1272 | static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, | ||
| 1273 | struct btrfs_root *root, | ||
| 1274 | struct btrfs_path *path, | ||
| 1275 | struct extent_buffer *eb, int slot, | ||
| 1276 | struct btrfs_key *key) | ||
| 1277 | { | ||
| 1278 | int ret; | ||
| 1279 | u32 item_size = btrfs_item_size_nr(eb, slot); | ||
| 1280 | struct btrfs_dir_item *di; | ||
| 1281 | int name_len; | ||
| 1282 | unsigned long ptr; | ||
| 1283 | unsigned long ptr_end; | ||
| 1284 | |||
| 1285 | ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 1286 | ptr_end = ptr + item_size; | ||
| 1287 | while(ptr < ptr_end) { | ||
| 1288 | di = (struct btrfs_dir_item *)ptr; | ||
| 1289 | name_len = btrfs_dir_name_len(eb, di); | ||
| 1290 | ret = replay_one_name(trans, root, path, eb, di, key); | ||
| 1291 | BUG_ON(ret); | ||
| 1292 | ptr = (unsigned long)(di + 1); | ||
| 1293 | ptr += name_len; | ||
| 1294 | } | ||
| 1295 | return 0; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | /* | ||
| 1299 | * directory replay has two parts. There are the standard directory | ||
| 1300 | * items in the log copied from the subvolume, and range items | ||
| 1301 | * created in the log while the subvolume was logged. | ||
| 1302 | * | ||
| 1303 | * The range items tell us which parts of the key space the log | ||
| 1304 | * is authoritative for. During replay, if a key in the subvolume | ||
| 1305 | * directory is in a logged range item, but not actually in the log | ||
| 1306 | * that means it was deleted from the directory before the fsync | ||
| 1307 | * and should be removed. | ||
| 1308 | */ | ||
| 1309 | static noinline int find_dir_range(struct btrfs_root *root, | ||
| 1310 | struct btrfs_path *path, | ||
| 1311 | u64 dirid, int key_type, | ||
| 1312 | u64 *start_ret, u64 *end_ret) | ||
| 1313 | { | ||
| 1314 | struct btrfs_key key; | ||
| 1315 | u64 found_end; | ||
| 1316 | struct btrfs_dir_log_item *item; | ||
| 1317 | int ret; | ||
| 1318 | int nritems; | ||
| 1319 | |||
| 1320 | if (*start_ret == (u64)-1) | ||
| 1321 | return 1; | ||
| 1322 | |||
| 1323 | key.objectid = dirid; | ||
| 1324 | key.type = key_type; | ||
| 1325 | key.offset = *start_ret; | ||
| 1326 | |||
| 1327 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 1328 | if (ret < 0) | ||
| 1329 | goto out; | ||
| 1330 | if (ret > 0) { | ||
| 1331 | if (path->slots[0] == 0) | ||
| 1332 | goto out; | ||
| 1333 | path->slots[0]--; | ||
| 1334 | } | ||
| 1335 | if (ret != 0) | ||
| 1336 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 1337 | |||
| 1338 | if (key.type != key_type || key.objectid != dirid) { | ||
| 1339 | ret = 1; | ||
| 1340 | goto next; | ||
| 1341 | } | ||
| 1342 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 1343 | struct btrfs_dir_log_item); | ||
| 1344 | found_end = btrfs_dir_log_end(path->nodes[0], item); | ||
| 1345 | |||
| 1346 | if (*start_ret >= key.offset && *start_ret <= found_end) { | ||
| 1347 | ret = 0; | ||
| 1348 | *start_ret = key.offset; | ||
| 1349 | *end_ret = found_end; | ||
| 1350 | goto out; | ||
| 1351 | } | ||
| 1352 | ret = 1; | ||
| 1353 | next: | ||
| 1354 | /* check the next slot in the tree to see if it is a valid item */ | ||
| 1355 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 1356 | if (path->slots[0] >= nritems) { | ||
| 1357 | ret = btrfs_next_leaf(root, path); | ||
| 1358 | if (ret) | ||
| 1359 | goto out; | ||
| 1360 | } else { | ||
| 1361 | path->slots[0]++; | ||
| 1362 | } | ||
| 1363 | |||
| 1364 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 1365 | |||
| 1366 | if (key.type != key_type || key.objectid != dirid) { | ||
| 1367 | ret = 1; | ||
| 1368 | goto out; | ||
| 1369 | } | ||
| 1370 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 1371 | struct btrfs_dir_log_item); | ||
| 1372 | found_end = btrfs_dir_log_end(path->nodes[0], item); | ||
| 1373 | *start_ret = key.offset; | ||
| 1374 | *end_ret = found_end; | ||
| 1375 | ret = 0; | ||
| 1376 | out: | ||
| 1377 | btrfs_release_path(root, path); | ||
| 1378 | return ret; | ||
| 1379 | } | ||
| 1380 | |||
| 1381 | /* | ||
| 1382 | * this looks for a given directory item in the log. If the directory | ||
| 1383 | * item is not in the log, the item is removed and the inode it points | ||
| 1384 | * to is unlinked | ||
| 1385 | */ | ||
| 1386 | static noinline int check_item_in_log(struct btrfs_trans_handle *trans, | ||
| 1387 | struct btrfs_root *root, | ||
| 1388 | struct btrfs_root *log, | ||
| 1389 | struct btrfs_path *path, | ||
| 1390 | struct btrfs_path *log_path, | ||
| 1391 | struct inode *dir, | ||
| 1392 | struct btrfs_key *dir_key) | ||
| 1393 | { | ||
| 1394 | int ret; | ||
| 1395 | struct extent_buffer *eb; | ||
| 1396 | int slot; | ||
| 1397 | u32 item_size; | ||
| 1398 | struct btrfs_dir_item *di; | ||
| 1399 | struct btrfs_dir_item *log_di; | ||
| 1400 | int name_len; | ||
| 1401 | unsigned long ptr; | ||
| 1402 | unsigned long ptr_end; | ||
| 1403 | char *name; | ||
| 1404 | struct inode *inode; | ||
| 1405 | struct btrfs_key location; | ||
| 1406 | |||
| 1407 | again: | ||
| 1408 | eb = path->nodes[0]; | ||
| 1409 | slot = path->slots[0]; | ||
| 1410 | item_size = btrfs_item_size_nr(eb, slot); | ||
| 1411 | ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 1412 | ptr_end = ptr + item_size; | ||
| 1413 | while(ptr < ptr_end) { | ||
| 1414 | di = (struct btrfs_dir_item *)ptr; | ||
| 1415 | name_len = btrfs_dir_name_len(eb, di); | ||
| 1416 | name = kmalloc(name_len, GFP_NOFS); | ||
| 1417 | if (!name) { | ||
| 1418 | ret = -ENOMEM; | ||
| 1419 | goto out; | ||
| 1420 | } | ||
| 1421 | read_extent_buffer(eb, name, (unsigned long)(di + 1), | ||
| 1422 | name_len); | ||
| 1423 | log_di = NULL; | ||
| 1424 | if (dir_key->type == BTRFS_DIR_ITEM_KEY) { | ||
| 1425 | log_di = btrfs_lookup_dir_item(trans, log, log_path, | ||
| 1426 | dir_key->objectid, | ||
| 1427 | name, name_len, 0); | ||
| 1428 | } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { | ||
| 1429 | log_di = btrfs_lookup_dir_index_item(trans, log, | ||
| 1430 | log_path, | ||
| 1431 | dir_key->objectid, | ||
| 1432 | dir_key->offset, | ||
| 1433 | name, name_len, 0); | ||
| 1434 | } | ||
| 1435 | if (!log_di || IS_ERR(log_di)) { | ||
| 1436 | btrfs_dir_item_key_to_cpu(eb, di, &location); | ||
| 1437 | btrfs_release_path(root, path); | ||
| 1438 | btrfs_release_path(log, log_path); | ||
| 1439 | inode = read_one_inode(root, location.objectid); | ||
| 1440 | BUG_ON(!inode); | ||
| 1441 | |||
| 1442 | ret = link_to_fixup_dir(trans, root, | ||
| 1443 | path, location.objectid); | ||
| 1444 | BUG_ON(ret); | ||
| 1445 | btrfs_inc_nlink(inode); | ||
| 1446 | ret = btrfs_unlink_inode(trans, root, dir, inode, | ||
| 1447 | name, name_len); | ||
| 1448 | BUG_ON(ret); | ||
| 1449 | kfree(name); | ||
| 1450 | iput(inode); | ||
| 1451 | |||
| 1452 | /* there might still be more names under this key | ||
| 1453 | * check and repeat if required | ||
| 1454 | */ | ||
| 1455 | ret = btrfs_search_slot(NULL, root, dir_key, path, | ||
| 1456 | 0, 0); | ||
| 1457 | if (ret == 0) | ||
| 1458 | goto again; | ||
| 1459 | ret = 0; | ||
| 1460 | goto out; | ||
| 1461 | } | ||
| 1462 | btrfs_release_path(log, log_path); | ||
| 1463 | kfree(name); | ||
| 1464 | |||
| 1465 | ptr = (unsigned long)(di + 1); | ||
| 1466 | ptr += name_len; | ||
| 1467 | } | ||
| 1468 | ret = 0; | ||
| 1469 | out: | ||
| 1470 | btrfs_release_path(root, path); | ||
| 1471 | btrfs_release_path(log, log_path); | ||
| 1472 | return ret; | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | /* | ||
| 1476 | * deletion replay happens before we copy any new directory items | ||
| 1477 | * out of the log or out of backreferences from inodes. It | ||
| 1478 | * scans the log to find ranges of keys that log is authoritative for, | ||
| 1479 | * and then scans the directory to find items in those ranges that are | ||
| 1480 | * not present in the log. | ||
| 1481 | * | ||
| 1482 | * Anything we don't find in the log is unlinked and removed from the | ||
| 1483 | * directory. | ||
| 1484 | */ | ||
| 1485 | static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | ||
| 1486 | struct btrfs_root *root, | ||
| 1487 | struct btrfs_root *log, | ||
| 1488 | struct btrfs_path *path, | ||
| 1489 | u64 dirid) | ||
| 1490 | { | ||
| 1491 | u64 range_start; | ||
| 1492 | u64 range_end; | ||
| 1493 | int key_type = BTRFS_DIR_LOG_ITEM_KEY; | ||
| 1494 | int ret = 0; | ||
| 1495 | struct btrfs_key dir_key; | ||
| 1496 | struct btrfs_key found_key; | ||
| 1497 | struct btrfs_path *log_path; | ||
| 1498 | struct inode *dir; | ||
| 1499 | |||
| 1500 | dir_key.objectid = dirid; | ||
| 1501 | dir_key.type = BTRFS_DIR_ITEM_KEY; | ||
| 1502 | log_path = btrfs_alloc_path(); | ||
| 1503 | if (!log_path) | ||
| 1504 | return -ENOMEM; | ||
| 1505 | |||
| 1506 | dir = read_one_inode(root, dirid); | ||
| 1507 | /* it isn't an error if the inode isn't there, that can happen | ||
| 1508 | * because we replay the deletes before we copy in the inode item | ||
| 1509 | * from the log | ||
| 1510 | */ | ||
| 1511 | if (!dir) { | ||
| 1512 | btrfs_free_path(log_path); | ||
| 1513 | return 0; | ||
| 1514 | } | ||
| 1515 | again: | ||
| 1516 | range_start = 0; | ||
| 1517 | range_end = 0; | ||
| 1518 | while(1) { | ||
| 1519 | ret = find_dir_range(log, path, dirid, key_type, | ||
| 1520 | &range_start, &range_end); | ||
| 1521 | if (ret != 0) | ||
| 1522 | break; | ||
| 1523 | |||
| 1524 | dir_key.offset = range_start; | ||
| 1525 | while(1) { | ||
| 1526 | int nritems; | ||
| 1527 | ret = btrfs_search_slot(NULL, root, &dir_key, path, | ||
| 1528 | 0, 0); | ||
| 1529 | if (ret < 0) | ||
| 1530 | goto out; | ||
| 1531 | |||
| 1532 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 1533 | if (path->slots[0] >= nritems) { | ||
| 1534 | ret = btrfs_next_leaf(root, path); | ||
| 1535 | if (ret) | ||
| 1536 | break; | ||
| 1537 | } | ||
| 1538 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
| 1539 | path->slots[0]); | ||
| 1540 | if (found_key.objectid != dirid || | ||
| 1541 | found_key.type != dir_key.type) | ||
| 1542 | goto next_type; | ||
| 1543 | |||
| 1544 | if (found_key.offset > range_end) | ||
| 1545 | break; | ||
| 1546 | |||
| 1547 | ret = check_item_in_log(trans, root, log, path, | ||
| 1548 | log_path, dir, &found_key); | ||
| 1549 | BUG_ON(ret); | ||
| 1550 | if (found_key.offset == (u64)-1) | ||
| 1551 | break; | ||
| 1552 | dir_key.offset = found_key.offset + 1; | ||
| 1553 | } | ||
| 1554 | btrfs_release_path(root, path); | ||
| 1555 | if (range_end == (u64)-1) | ||
| 1556 | break; | ||
| 1557 | range_start = range_end + 1; | ||
| 1558 | } | ||
| 1559 | |||
| 1560 | next_type: | ||
| 1561 | ret = 0; | ||
| 1562 | if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { | ||
| 1563 | key_type = BTRFS_DIR_LOG_INDEX_KEY; | ||
| 1564 | dir_key.type = BTRFS_DIR_INDEX_KEY; | ||
| 1565 | btrfs_release_path(root, path); | ||
| 1566 | goto again; | ||
| 1567 | } | ||
| 1568 | out: | ||
| 1569 | btrfs_release_path(root, path); | ||
| 1570 | btrfs_free_path(log_path); | ||
| 1571 | iput(dir); | ||
| 1572 | return ret; | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | /* | ||
| 1576 | * the process_func used to replay items from the log tree. This | ||
| 1577 | * gets called in two different stages. The first stage just looks | ||
| 1578 | * for inodes and makes sure they are all copied into the subvolume. | ||
| 1579 | * | ||
| 1580 | * The second stage copies all the other item types from the log into | ||
| 1581 | * the subvolume. The two stage approach is slower, but gets rid of | ||
| 1582 | * lots of complexity around inodes referencing other inodes that exist | ||
| 1583 | * only in the log (references come from either directory items or inode | ||
| 1584 | * back refs). | ||
| 1585 | */ | ||
| 1586 | static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | ||
| 1587 | struct walk_control *wc, u64 gen) | ||
| 1588 | { | ||
| 1589 | int nritems; | ||
| 1590 | struct btrfs_path *path; | ||
| 1591 | struct btrfs_root *root = wc->replay_dest; | ||
| 1592 | struct btrfs_key key; | ||
| 1593 | u32 item_size; | ||
| 1594 | int level; | ||
| 1595 | int i; | ||
| 1596 | int ret; | ||
| 1597 | |||
| 1598 | btrfs_read_buffer(eb, gen); | ||
| 1599 | |||
| 1600 | level = btrfs_header_level(eb); | ||
| 1601 | |||
| 1602 | if (level != 0) | ||
| 1603 | return 0; | ||
| 1604 | |||
| 1605 | path = btrfs_alloc_path(); | ||
| 1606 | BUG_ON(!path); | ||
| 1607 | |||
| 1608 | nritems = btrfs_header_nritems(eb); | ||
| 1609 | for (i = 0; i < nritems; i++) { | ||
| 1610 | btrfs_item_key_to_cpu(eb, &key, i); | ||
| 1611 | item_size = btrfs_item_size_nr(eb, i); | ||
| 1612 | |||
| 1613 | /* inode keys are done during the first stage */ | ||
| 1614 | if (key.type == BTRFS_INODE_ITEM_KEY && | ||
| 1615 | wc->stage == LOG_WALK_REPLAY_INODES) { | ||
| 1616 | struct inode *inode; | ||
| 1617 | struct btrfs_inode_item *inode_item; | ||
| 1618 | u32 mode; | ||
| 1619 | |||
| 1620 | inode_item = btrfs_item_ptr(eb, i, | ||
| 1621 | struct btrfs_inode_item); | ||
| 1622 | mode = btrfs_inode_mode(eb, inode_item); | ||
| 1623 | if (S_ISDIR(mode)) { | ||
| 1624 | ret = replay_dir_deletes(wc->trans, | ||
| 1625 | root, log, path, key.objectid); | ||
| 1626 | BUG_ON(ret); | ||
| 1627 | } | ||
| 1628 | ret = overwrite_item(wc->trans, root, path, | ||
| 1629 | eb, i, &key); | ||
| 1630 | BUG_ON(ret); | ||
| 1631 | |||
| 1632 | /* for regular files, truncate away | ||
| 1633 | * extents past the new EOF | ||
| 1634 | */ | ||
| 1635 | if (S_ISREG(mode)) { | ||
| 1636 | inode = read_one_inode(root, | ||
| 1637 | key.objectid); | ||
| 1638 | BUG_ON(!inode); | ||
| 1639 | |||
| 1640 | ret = btrfs_truncate_inode_items(wc->trans, | ||
| 1641 | root, inode, inode->i_size, | ||
| 1642 | BTRFS_EXTENT_DATA_KEY); | ||
| 1643 | BUG_ON(ret); | ||
| 1644 | iput(inode); | ||
| 1645 | } | ||
| 1646 | ret = link_to_fixup_dir(wc->trans, root, | ||
| 1647 | path, key.objectid); | ||
| 1648 | BUG_ON(ret); | ||
| 1649 | } | ||
| 1650 | if (wc->stage < LOG_WALK_REPLAY_ALL) | ||
| 1651 | continue; | ||
| 1652 | |||
| 1653 | /* these keys are simply copied */ | ||
| 1654 | if (key.type == BTRFS_XATTR_ITEM_KEY) { | ||
| 1655 | ret = overwrite_item(wc->trans, root, path, | ||
| 1656 | eb, i, &key); | ||
| 1657 | BUG_ON(ret); | ||
| 1658 | } else if (key.type == BTRFS_INODE_REF_KEY) { | ||
| 1659 | ret = add_inode_ref(wc->trans, root, log, path, | ||
| 1660 | eb, i, &key); | ||
| 1661 | BUG_ON(ret && ret != -ENOENT); | ||
| 1662 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { | ||
| 1663 | ret = replay_one_extent(wc->trans, root, path, | ||
| 1664 | eb, i, &key); | ||
| 1665 | BUG_ON(ret); | ||
| 1666 | } else if (key.type == BTRFS_CSUM_ITEM_KEY) { | ||
| 1667 | ret = replay_one_csum(wc->trans, root, path, | ||
| 1668 | eb, i, &key); | ||
| 1669 | BUG_ON(ret); | ||
| 1670 | } else if (key.type == BTRFS_DIR_ITEM_KEY || | ||
| 1671 | key.type == BTRFS_DIR_INDEX_KEY) { | ||
| 1672 | ret = replay_one_dir_item(wc->trans, root, path, | ||
| 1673 | eb, i, &key); | ||
| 1674 | BUG_ON(ret); | ||
| 1675 | } | ||
| 1676 | } | ||
| 1677 | btrfs_free_path(path); | ||
| 1678 | return 0; | ||
| 1679 | } | ||
| 1680 | |||
| 1681 | static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, | ||
| 1682 | struct btrfs_root *root, | ||
| 1683 | struct btrfs_path *path, int *level, | ||
| 1684 | struct walk_control *wc) | ||
| 1685 | { | ||
| 1686 | u64 root_owner; | ||
| 1687 | u64 root_gen; | ||
| 1688 | u64 bytenr; | ||
| 1689 | u64 ptr_gen; | ||
| 1690 | struct extent_buffer *next; | ||
| 1691 | struct extent_buffer *cur; | ||
| 1692 | struct extent_buffer *parent; | ||
| 1693 | u32 blocksize; | ||
| 1694 | int ret = 0; | ||
| 1695 | |||
| 1696 | WARN_ON(*level < 0); | ||
| 1697 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 1698 | |||
| 1699 | while(*level > 0) { | ||
| 1700 | WARN_ON(*level < 0); | ||
| 1701 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 1702 | cur = path->nodes[*level]; | ||
| 1703 | |||
| 1704 | if (btrfs_header_level(cur) != *level) | ||
| 1705 | WARN_ON(1); | ||
| 1706 | |||
| 1707 | if (path->slots[*level] >= | ||
| 1708 | btrfs_header_nritems(cur)) | ||
| 1709 | break; | ||
| 1710 | |||
| 1711 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
| 1712 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
| 1713 | blocksize = btrfs_level_size(root, *level - 1); | ||
| 1714 | |||
| 1715 | parent = path->nodes[*level]; | ||
| 1716 | root_owner = btrfs_header_owner(parent); | ||
| 1717 | root_gen = btrfs_header_generation(parent); | ||
| 1718 | |||
| 1719 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 1720 | |||
| 1721 | wc->process_func(root, next, wc, ptr_gen); | ||
| 1722 | |||
| 1723 | if (*level == 1) { | ||
| 1724 | path->slots[*level]++; | ||
| 1725 | if (wc->free) { | ||
| 1726 | btrfs_read_buffer(next, ptr_gen); | ||
| 1727 | |||
| 1728 | btrfs_tree_lock(next); | ||
| 1729 | clean_tree_block(trans, root, next); | ||
| 1730 | btrfs_wait_tree_block_writeback(next); | ||
| 1731 | btrfs_tree_unlock(next); | ||
| 1732 | |||
| 1733 | ret = btrfs_drop_leaf_ref(trans, root, next); | ||
| 1734 | BUG_ON(ret); | ||
| 1735 | |||
| 1736 | WARN_ON(root_owner != | ||
| 1737 | BTRFS_TREE_LOG_OBJECTID); | ||
| 1738 | ret = btrfs_free_reserved_extent(root, | ||
| 1739 | bytenr, blocksize); | ||
| 1740 | BUG_ON(ret); | ||
| 1741 | } | ||
| 1742 | free_extent_buffer(next); | ||
| 1743 | continue; | ||
| 1744 | } | ||
| 1745 | btrfs_read_buffer(next, ptr_gen); | ||
| 1746 | |||
| 1747 | WARN_ON(*level <= 0); | ||
| 1748 | if (path->nodes[*level-1]) | ||
| 1749 | free_extent_buffer(path->nodes[*level-1]); | ||
| 1750 | path->nodes[*level-1] = next; | ||
| 1751 | *level = btrfs_header_level(next); | ||
| 1752 | path->slots[*level] = 0; | ||
| 1753 | cond_resched(); | ||
| 1754 | } | ||
| 1755 | WARN_ON(*level < 0); | ||
| 1756 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 1757 | |||
| 1758 | if (path->nodes[*level] == root->node) { | ||
| 1759 | parent = path->nodes[*level]; | ||
| 1760 | } else { | ||
| 1761 | parent = path->nodes[*level + 1]; | ||
| 1762 | } | ||
| 1763 | bytenr = path->nodes[*level]->start; | ||
| 1764 | |||
| 1765 | blocksize = btrfs_level_size(root, *level); | ||
| 1766 | root_owner = btrfs_header_owner(parent); | ||
| 1767 | root_gen = btrfs_header_generation(parent); | ||
| 1768 | |||
| 1769 | wc->process_func(root, path->nodes[*level], wc, | ||
| 1770 | btrfs_header_generation(path->nodes[*level])); | ||
| 1771 | |||
| 1772 | if (wc->free) { | ||
| 1773 | next = path->nodes[*level]; | ||
| 1774 | btrfs_tree_lock(next); | ||
| 1775 | clean_tree_block(trans, root, next); | ||
| 1776 | btrfs_wait_tree_block_writeback(next); | ||
| 1777 | btrfs_tree_unlock(next); | ||
| 1778 | |||
| 1779 | if (*level == 0) { | ||
| 1780 | ret = btrfs_drop_leaf_ref(trans, root, next); | ||
| 1781 | BUG_ON(ret); | ||
| 1782 | } | ||
| 1783 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | ||
| 1784 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); | ||
| 1785 | BUG_ON(ret); | ||
| 1786 | } | ||
| 1787 | free_extent_buffer(path->nodes[*level]); | ||
| 1788 | path->nodes[*level] = NULL; | ||
| 1789 | *level += 1; | ||
| 1790 | |||
| 1791 | cond_resched(); | ||
| 1792 | return 0; | ||
| 1793 | } | ||
| 1794 | |||
| 1795 | static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, | ||
| 1796 | struct btrfs_root *root, | ||
| 1797 | struct btrfs_path *path, int *level, | ||
| 1798 | struct walk_control *wc) | ||
| 1799 | { | ||
| 1800 | u64 root_owner; | ||
| 1801 | u64 root_gen; | ||
| 1802 | int i; | ||
| 1803 | int slot; | ||
| 1804 | int ret; | ||
| 1805 | |||
| 1806 | for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | ||
| 1807 | slot = path->slots[i]; | ||
| 1808 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | ||
| 1809 | struct extent_buffer *node; | ||
| 1810 | node = path->nodes[i]; | ||
| 1811 | path->slots[i]++; | ||
| 1812 | *level = i; | ||
| 1813 | WARN_ON(*level == 0); | ||
| 1814 | return 0; | ||
| 1815 | } else { | ||
| 1816 | struct extent_buffer *parent; | ||
| 1817 | if (path->nodes[*level] == root->node) | ||
| 1818 | parent = path->nodes[*level]; | ||
| 1819 | else | ||
| 1820 | parent = path->nodes[*level + 1]; | ||
| 1821 | |||
| 1822 | root_owner = btrfs_header_owner(parent); | ||
| 1823 | root_gen = btrfs_header_generation(parent); | ||
| 1824 | wc->process_func(root, path->nodes[*level], wc, | ||
| 1825 | btrfs_header_generation(path->nodes[*level])); | ||
| 1826 | if (wc->free) { | ||
| 1827 | struct extent_buffer *next; | ||
| 1828 | |||
| 1829 | next = path->nodes[*level]; | ||
| 1830 | |||
| 1831 | btrfs_tree_lock(next); | ||
| 1832 | clean_tree_block(trans, root, next); | ||
| 1833 | btrfs_wait_tree_block_writeback(next); | ||
| 1834 | btrfs_tree_unlock(next); | ||
| 1835 | |||
| 1836 | if (*level == 0) { | ||
| 1837 | ret = btrfs_drop_leaf_ref(trans, root, | ||
| 1838 | next); | ||
| 1839 | BUG_ON(ret); | ||
| 1840 | } | ||
| 1841 | |||
| 1842 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | ||
| 1843 | ret = btrfs_free_reserved_extent(root, | ||
| 1844 | path->nodes[*level]->start, | ||
| 1845 | path->nodes[*level]->len); | ||
| 1846 | BUG_ON(ret); | ||
| 1847 | } | ||
| 1848 | free_extent_buffer(path->nodes[*level]); | ||
| 1849 | path->nodes[*level] = NULL; | ||
| 1850 | *level = i + 1; | ||
| 1851 | } | ||
| 1852 | } | ||
| 1853 | return 1; | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | /* | ||
| 1857 | * drop the reference count on the tree rooted at 'snap'. This traverses | ||
| 1858 | * the tree freeing any blocks that have a ref count of zero after being | ||
| 1859 | * decremented. | ||
| 1860 | */ | ||
| 1861 | static int walk_log_tree(struct btrfs_trans_handle *trans, | ||
| 1862 | struct btrfs_root *log, struct walk_control *wc) | ||
| 1863 | { | ||
| 1864 | int ret = 0; | ||
| 1865 | int wret; | ||
| 1866 | int level; | ||
| 1867 | struct btrfs_path *path; | ||
| 1868 | int i; | ||
| 1869 | int orig_level; | ||
| 1870 | |||
| 1871 | path = btrfs_alloc_path(); | ||
| 1872 | BUG_ON(!path); | ||
| 1873 | |||
| 1874 | level = btrfs_header_level(log->node); | ||
| 1875 | orig_level = level; | ||
| 1876 | path->nodes[level] = log->node; | ||
| 1877 | extent_buffer_get(log->node); | ||
| 1878 | path->slots[level] = 0; | ||
| 1879 | |||
| 1880 | while(1) { | ||
| 1881 | wret = walk_down_log_tree(trans, log, path, &level, wc); | ||
| 1882 | if (wret > 0) | ||
| 1883 | break; | ||
| 1884 | if (wret < 0) | ||
| 1885 | ret = wret; | ||
| 1886 | |||
| 1887 | wret = walk_up_log_tree(trans, log, path, &level, wc); | ||
| 1888 | if (wret > 0) | ||
| 1889 | break; | ||
| 1890 | if (wret < 0) | ||
| 1891 | ret = wret; | ||
| 1892 | } | ||
| 1893 | |||
| 1894 | /* was the root node processed? if not, catch it here */ | ||
| 1895 | if (path->nodes[orig_level]) { | ||
| 1896 | wc->process_func(log, path->nodes[orig_level], wc, | ||
| 1897 | btrfs_header_generation(path->nodes[orig_level])); | ||
| 1898 | if (wc->free) { | ||
| 1899 | struct extent_buffer *next; | ||
| 1900 | |||
| 1901 | next = path->nodes[orig_level]; | ||
| 1902 | |||
| 1903 | btrfs_tree_lock(next); | ||
| 1904 | clean_tree_block(trans, log, next); | ||
| 1905 | btrfs_wait_tree_block_writeback(next); | ||
| 1906 | btrfs_tree_unlock(next); | ||
| 1907 | |||
| 1908 | if (orig_level == 0) { | ||
| 1909 | ret = btrfs_drop_leaf_ref(trans, log, | ||
| 1910 | next); | ||
| 1911 | BUG_ON(ret); | ||
| 1912 | } | ||
| 1913 | WARN_ON(log->root_key.objectid != | ||
| 1914 | BTRFS_TREE_LOG_OBJECTID); | ||
| 1915 | ret = btrfs_free_reserved_extent(log, next->start, | ||
| 1916 | next->len); | ||
| 1917 | BUG_ON(ret); | ||
| 1918 | } | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | for (i = 0; i <= orig_level; i++) { | ||
| 1922 | if (path->nodes[i]) { | ||
| 1923 | free_extent_buffer(path->nodes[i]); | ||
| 1924 | path->nodes[i] = NULL; | ||
| 1925 | } | ||
| 1926 | } | ||
| 1927 | btrfs_free_path(path); | ||
| 1928 | if (wc->free) | ||
| 1929 | free_extent_buffer(log->node); | ||
| 1930 | return ret; | ||
| 1931 | } | ||
| 1932 | |||
| 1933 | int wait_log_commit(struct btrfs_root *log) | ||
| 1934 | { | ||
| 1935 | DEFINE_WAIT(wait); | ||
| 1936 | u64 transid = log->fs_info->tree_log_transid; | ||
| 1937 | |||
| 1938 | do { | ||
| 1939 | prepare_to_wait(&log->fs_info->tree_log_wait, &wait, | ||
| 1940 | TASK_UNINTERRUPTIBLE); | ||
| 1941 | mutex_unlock(&log->fs_info->tree_log_mutex); | ||
| 1942 | if (atomic_read(&log->fs_info->tree_log_commit)) | ||
| 1943 | schedule(); | ||
| 1944 | finish_wait(&log->fs_info->tree_log_wait, &wait); | ||
| 1945 | mutex_lock(&log->fs_info->tree_log_mutex); | ||
| 1946 | } while(transid == log->fs_info->tree_log_transid && | ||
| 1947 | atomic_read(&log->fs_info->tree_log_commit)); | ||
| 1948 | return 0; | ||
| 1949 | } | ||
| 1950 | |||
| 1951 | /* | ||
| 1952 | * btrfs_sync_log does sends a given tree log down to the disk and | ||
| 1953 | * updates the super blocks to record it. When this call is done, | ||
| 1954 | * you know that any inodes previously logged are safely on disk | ||
| 1955 | */ | ||
| 1956 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | ||
| 1957 | struct btrfs_root *root) | ||
| 1958 | { | ||
| 1959 | int ret; | ||
| 1960 | unsigned long batch; | ||
| 1961 | struct btrfs_root *log = root->log_root; | ||
| 1962 | |||
| 1963 | mutex_lock(&log->fs_info->tree_log_mutex); | ||
| 1964 | if (atomic_read(&log->fs_info->tree_log_commit)) { | ||
| 1965 | wait_log_commit(log); | ||
| 1966 | goto out; | ||
| 1967 | } | ||
| 1968 | atomic_set(&log->fs_info->tree_log_commit, 1); | ||
| 1969 | |||
| 1970 | while(1) { | ||
| 1971 | batch = log->fs_info->tree_log_batch; | ||
| 1972 | mutex_unlock(&log->fs_info->tree_log_mutex); | ||
| 1973 | schedule_timeout_uninterruptible(1); | ||
| 1974 | mutex_lock(&log->fs_info->tree_log_mutex); | ||
| 1975 | |||
| 1976 | while(atomic_read(&log->fs_info->tree_log_writers)) { | ||
| 1977 | DEFINE_WAIT(wait); | ||
| 1978 | prepare_to_wait(&log->fs_info->tree_log_wait, &wait, | ||
| 1979 | TASK_UNINTERRUPTIBLE); | ||
| 1980 | mutex_unlock(&log->fs_info->tree_log_mutex); | ||
| 1981 | if (atomic_read(&log->fs_info->tree_log_writers)) | ||
| 1982 | schedule(); | ||
| 1983 | mutex_lock(&log->fs_info->tree_log_mutex); | ||
| 1984 | finish_wait(&log->fs_info->tree_log_wait, &wait); | ||
| 1985 | } | ||
| 1986 | if (batch == log->fs_info->tree_log_batch) | ||
| 1987 | break; | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); | ||
| 1991 | BUG_ON(ret); | ||
| 1992 | ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, | ||
| 1993 | &root->fs_info->log_root_tree->dirty_log_pages); | ||
| 1994 | BUG_ON(ret); | ||
| 1995 | |||
| 1996 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, | ||
| 1997 | log->fs_info->log_root_tree->node->start); | ||
| 1998 | btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, | ||
| 1999 | btrfs_header_level(log->fs_info->log_root_tree->node)); | ||
| 2000 | |||
| 2001 | write_ctree_super(trans, log->fs_info->tree_root); | ||
| 2002 | log->fs_info->tree_log_transid++; | ||
| 2003 | log->fs_info->tree_log_batch = 0; | ||
| 2004 | atomic_set(&log->fs_info->tree_log_commit, 0); | ||
| 2005 | smp_mb(); | ||
| 2006 | if (waitqueue_active(&log->fs_info->tree_log_wait)) | ||
| 2007 | wake_up(&log->fs_info->tree_log_wait); | ||
| 2008 | out: | ||
| 2009 | mutex_unlock(&log->fs_info->tree_log_mutex); | ||
| 2010 | return 0; | ||
| 2011 | |||
| 2012 | } | ||
| 2013 | |||
| 2014 | /* * free all the extents used by the tree log. This should be called | ||
| 2015 | * at commit time of the full transaction | ||
| 2016 | */ | ||
| 2017 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
| 2018 | { | ||
| 2019 | int ret; | ||
| 2020 | struct btrfs_root *log; | ||
| 2021 | struct key; | ||
| 2022 | u64 start; | ||
| 2023 | u64 end; | ||
| 2024 | struct walk_control wc = { | ||
| 2025 | .free = 1, | ||
| 2026 | .process_func = process_one_buffer | ||
| 2027 | }; | ||
| 2028 | |||
| 2029 | if (!root->log_root) | ||
| 2030 | return 0; | ||
| 2031 | |||
| 2032 | log = root->log_root; | ||
| 2033 | ret = walk_log_tree(trans, log, &wc); | ||
| 2034 | BUG_ON(ret); | ||
| 2035 | |||
| 2036 | while(1) { | ||
| 2037 | ret = find_first_extent_bit(&log->dirty_log_pages, | ||
| 2038 | 0, &start, &end, EXTENT_DIRTY); | ||
| 2039 | if (ret) | ||
| 2040 | break; | ||
| 2041 | |||
| 2042 | clear_extent_dirty(&log->dirty_log_pages, | ||
| 2043 | start, end, GFP_NOFS); | ||
| 2044 | } | ||
| 2045 | |||
| 2046 | log = root->log_root; | ||
| 2047 | ret = btrfs_del_root(trans, root->fs_info->log_root_tree, | ||
| 2048 | &log->root_key); | ||
| 2049 | BUG_ON(ret); | ||
| 2050 | root->log_root = NULL; | ||
| 2051 | kfree(root->log_root); | ||
| 2052 | return 0; | ||
| 2053 | } | ||
| 2054 | |||
| 2055 | /* | ||
| 2056 | * helper function to update the item for a given subvolumes log root | ||
| 2057 | * in the tree of log roots | ||
| 2058 | */ | ||
| 2059 | static int update_log_root(struct btrfs_trans_handle *trans, | ||
| 2060 | struct btrfs_root *log) | ||
| 2061 | { | ||
| 2062 | u64 bytenr = btrfs_root_bytenr(&log->root_item); | ||
| 2063 | int ret; | ||
| 2064 | |||
| 2065 | if (log->node->start == bytenr) | ||
| 2066 | return 0; | ||
| 2067 | |||
| 2068 | btrfs_set_root_bytenr(&log->root_item, log->node->start); | ||
| 2069 | btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); | ||
| 2070 | ret = btrfs_update_root(trans, log->fs_info->log_root_tree, | ||
| 2071 | &log->root_key, &log->root_item); | ||
| 2072 | BUG_ON(ret); | ||
| 2073 | return ret; | ||
| 2074 | } | ||
| 2075 | |||
| 2076 | /* | ||
| 2077 | * If both a file and directory are logged, and unlinks or renames are | ||
| 2078 | * mixed in, we have a few interesting corners: | ||
| 2079 | * | ||
| 2080 | * create file X in dir Y | ||
| 2081 | * link file X to X.link in dir Y | ||
| 2082 | * fsync file X | ||
| 2083 | * unlink file X but leave X.link | ||
| 2084 | * fsync dir Y | ||
| 2085 | * | ||
| 2086 | * After a crash we would expect only X.link to exist. But file X | ||
| 2087 | * didn't get fsync'd again so the log has back refs for X and X.link. | ||
| 2088 | * | ||
| 2089 | * We solve this by removing directory entries and inode backrefs from the | ||
| 2090 | * log when a file that was logged in the current transaction is | ||
| 2091 | * unlinked. Any later fsync will include the updated log entries, and | ||
| 2092 | * we'll be able to reconstruct the proper directory items from backrefs. | ||
| 2093 | * | ||
| 2094 | * This optimizations allows us to avoid relogging the entire inode | ||
| 2095 | * or the entire directory. | ||
| 2096 | */ | ||
| 2097 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | ||
| 2098 | struct btrfs_root *root, | ||
| 2099 | const char *name, int name_len, | ||
| 2100 | struct inode *dir, u64 index) | ||
| 2101 | { | ||
| 2102 | struct btrfs_root *log; | ||
| 2103 | struct btrfs_dir_item *di; | ||
| 2104 | struct btrfs_path *path; | ||
| 2105 | int ret; | ||
| 2106 | int bytes_del = 0; | ||
| 2107 | |||
| 2108 | if (BTRFS_I(dir)->logged_trans < trans->transid) | ||
| 2109 | return 0; | ||
| 2110 | |||
| 2111 | ret = join_running_log_trans(root); | ||
| 2112 | if (ret) | ||
| 2113 | return 0; | ||
| 2114 | |||
| 2115 | mutex_lock(&BTRFS_I(dir)->log_mutex); | ||
| 2116 | |||
| 2117 | log = root->log_root; | ||
| 2118 | path = btrfs_alloc_path(); | ||
| 2119 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | ||
| 2120 | name, name_len, -1); | ||
| 2121 | if (di && !IS_ERR(di)) { | ||
| 2122 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | ||
| 2123 | bytes_del += name_len; | ||
| 2124 | BUG_ON(ret); | ||
| 2125 | } | ||
| 2126 | btrfs_release_path(log, path); | ||
| 2127 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, | ||
| 2128 | index, name, name_len, -1); | ||
| 2129 | if (di && !IS_ERR(di)) { | ||
| 2130 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | ||
| 2131 | bytes_del += name_len; | ||
| 2132 | BUG_ON(ret); | ||
| 2133 | } | ||
| 2134 | |||
| 2135 | /* update the directory size in the log to reflect the names | ||
| 2136 | * we have removed | ||
| 2137 | */ | ||
| 2138 | if (bytes_del) { | ||
| 2139 | struct btrfs_key key; | ||
| 2140 | |||
| 2141 | key.objectid = dir->i_ino; | ||
| 2142 | key.offset = 0; | ||
| 2143 | key.type = BTRFS_INODE_ITEM_KEY; | ||
| 2144 | btrfs_release_path(log, path); | ||
| 2145 | |||
| 2146 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); | ||
| 2147 | if (ret == 0) { | ||
| 2148 | struct btrfs_inode_item *item; | ||
| 2149 | u64 i_size; | ||
| 2150 | |||
| 2151 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 2152 | struct btrfs_inode_item); | ||
| 2153 | i_size = btrfs_inode_size(path->nodes[0], item); | ||
| 2154 | if (i_size > bytes_del) | ||
| 2155 | i_size -= bytes_del; | ||
| 2156 | else | ||
| 2157 | i_size = 0; | ||
| 2158 | btrfs_set_inode_size(path->nodes[0], item, i_size); | ||
| 2159 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 2160 | } else | ||
| 2161 | ret = 0; | ||
| 2162 | btrfs_release_path(log, path); | ||
| 2163 | } | ||
| 2164 | |||
| 2165 | btrfs_free_path(path); | ||
| 2166 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | ||
| 2167 | end_log_trans(root); | ||
| 2168 | |||
| 2169 | return 0; | ||
| 2170 | } | ||
| 2171 | |||
| 2172 | /* see comments for btrfs_del_dir_entries_in_log */ | ||
| 2173 | int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | ||
| 2174 | struct btrfs_root *root, | ||
| 2175 | const char *name, int name_len, | ||
| 2176 | struct inode *inode, u64 dirid) | ||
| 2177 | { | ||
| 2178 | struct btrfs_root *log; | ||
| 2179 | u64 index; | ||
| 2180 | int ret; | ||
| 2181 | |||
| 2182 | if (BTRFS_I(inode)->logged_trans < trans->transid) | ||
| 2183 | return 0; | ||
| 2184 | |||
| 2185 | ret = join_running_log_trans(root); | ||
| 2186 | if (ret) | ||
| 2187 | return 0; | ||
| 2188 | log = root->log_root; | ||
| 2189 | mutex_lock(&BTRFS_I(inode)->log_mutex); | ||
| 2190 | |||
| 2191 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | ||
| 2192 | dirid, &index); | ||
| 2193 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | ||
| 2194 | end_log_trans(root); | ||
| 2195 | |||
| 2196 | return ret; | ||
| 2197 | } | ||
| 2198 | |||
| 2199 | /* | ||
| 2200 | * creates a range item in the log for 'dirid'. first_offset and | ||
| 2201 | * last_offset tell us which parts of the key space the log should | ||
| 2202 | * be considered authoritative for. | ||
| 2203 | */ | ||
| 2204 | static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, | ||
| 2205 | struct btrfs_root *log, | ||
| 2206 | struct btrfs_path *path, | ||
| 2207 | int key_type, u64 dirid, | ||
| 2208 | u64 first_offset, u64 last_offset) | ||
| 2209 | { | ||
| 2210 | int ret; | ||
| 2211 | struct btrfs_key key; | ||
| 2212 | struct btrfs_dir_log_item *item; | ||
| 2213 | |||
| 2214 | key.objectid = dirid; | ||
| 2215 | key.offset = first_offset; | ||
| 2216 | if (key_type == BTRFS_DIR_ITEM_KEY) | ||
| 2217 | key.type = BTRFS_DIR_LOG_ITEM_KEY; | ||
| 2218 | else | ||
| 2219 | key.type = BTRFS_DIR_LOG_INDEX_KEY; | ||
| 2220 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); | ||
| 2221 | BUG_ON(ret); | ||
| 2222 | |||
| 2223 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 2224 | struct btrfs_dir_log_item); | ||
| 2225 | btrfs_set_dir_log_end(path->nodes[0], item, last_offset); | ||
| 2226 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 2227 | btrfs_release_path(log, path); | ||
| 2228 | return 0; | ||
| 2229 | } | ||
| 2230 | |||
| 2231 | /* | ||
| 2232 | * log all the items included in the current transaction for a given | ||
| 2233 | * directory. This also creates the range items in the log tree required | ||
| 2234 | * to replay anything deleted before the fsync | ||
| 2235 | */ | ||
| 2236 | static noinline int log_dir_items(struct btrfs_trans_handle *trans, | ||
| 2237 | struct btrfs_root *root, struct inode *inode, | ||
| 2238 | struct btrfs_path *path, | ||
| 2239 | struct btrfs_path *dst_path, int key_type, | ||
| 2240 | u64 min_offset, u64 *last_offset_ret) | ||
| 2241 | { | ||
| 2242 | struct btrfs_key min_key; | ||
| 2243 | struct btrfs_key max_key; | ||
| 2244 | struct btrfs_root *log = root->log_root; | ||
| 2245 | struct extent_buffer *src; | ||
| 2246 | int ret; | ||
| 2247 | int i; | ||
| 2248 | int nritems; | ||
| 2249 | u64 first_offset = min_offset; | ||
| 2250 | u64 last_offset = (u64)-1; | ||
| 2251 | |||
| 2252 | log = root->log_root; | ||
| 2253 | max_key.objectid = inode->i_ino; | ||
| 2254 | max_key.offset = (u64)-1; | ||
| 2255 | max_key.type = key_type; | ||
| 2256 | |||
| 2257 | min_key.objectid = inode->i_ino; | ||
| 2258 | min_key.type = key_type; | ||
| 2259 | min_key.offset = min_offset; | ||
| 2260 | |||
| 2261 | path->keep_locks = 1; | ||
| 2262 | |||
| 2263 | ret = btrfs_search_forward(root, &min_key, &max_key, | ||
| 2264 | path, 0, trans->transid); | ||
| 2265 | |||
| 2266 | /* | ||
| 2267 | * we didn't find anything from this transaction, see if there | ||
| 2268 | * is anything at all | ||
| 2269 | */ | ||
| 2270 | if (ret != 0 || min_key.objectid != inode->i_ino || | ||
| 2271 | min_key.type != key_type) { | ||
| 2272 | min_key.objectid = inode->i_ino; | ||
| 2273 | min_key.type = key_type; | ||
| 2274 | min_key.offset = (u64)-1; | ||
| 2275 | btrfs_release_path(root, path); | ||
| 2276 | ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); | ||
| 2277 | if (ret < 0) { | ||
| 2278 | btrfs_release_path(root, path); | ||
| 2279 | return ret; | ||
| 2280 | } | ||
| 2281 | ret = btrfs_previous_item(root, path, inode->i_ino, key_type); | ||
| 2282 | |||
| 2283 | /* if ret == 0 there are items for this type, | ||
| 2284 | * create a range to tell us the last key of this type. | ||
| 2285 | * otherwise, there are no items in this directory after | ||
| 2286 | * *min_offset, and we create a range to indicate that. | ||
| 2287 | */ | ||
| 2288 | if (ret == 0) { | ||
| 2289 | struct btrfs_key tmp; | ||
| 2290 | btrfs_item_key_to_cpu(path->nodes[0], &tmp, | ||
| 2291 | path->slots[0]); | ||
| 2292 | if (key_type == tmp.type) { | ||
| 2293 | first_offset = max(min_offset, tmp.offset) + 1; | ||
| 2294 | } | ||
| 2295 | } | ||
| 2296 | goto done; | ||
| 2297 | } | ||
| 2298 | |||
| 2299 | /* go backward to find any previous key */ | ||
| 2300 | ret = btrfs_previous_item(root, path, inode->i_ino, key_type); | ||
| 2301 | if (ret == 0) { | ||
| 2302 | struct btrfs_key tmp; | ||
| 2303 | btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); | ||
| 2304 | if (key_type == tmp.type) { | ||
| 2305 | first_offset = tmp.offset; | ||
| 2306 | ret = overwrite_item(trans, log, dst_path, | ||
| 2307 | path->nodes[0], path->slots[0], | ||
| 2308 | &tmp); | ||
| 2309 | } | ||
| 2310 | } | ||
| 2311 | btrfs_release_path(root, path); | ||
| 2312 | |||
| 2313 | /* find the first key from this transaction again */ | ||
| 2314 | ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); | ||
| 2315 | if (ret != 0) { | ||
| 2316 | WARN_ON(1); | ||
| 2317 | goto done; | ||
| 2318 | } | ||
| 2319 | |||
| 2320 | /* | ||
| 2321 | * we have a block from this transaction, log every item in it | ||
| 2322 | * from our directory | ||
| 2323 | */ | ||
| 2324 | while(1) { | ||
| 2325 | struct btrfs_key tmp; | ||
| 2326 | src = path->nodes[0]; | ||
| 2327 | nritems = btrfs_header_nritems(src); | ||
| 2328 | for (i = path->slots[0]; i < nritems; i++) { | ||
| 2329 | btrfs_item_key_to_cpu(src, &min_key, i); | ||
| 2330 | |||
| 2331 | if (min_key.objectid != inode->i_ino || | ||
| 2332 | min_key.type != key_type) | ||
| 2333 | goto done; | ||
| 2334 | ret = overwrite_item(trans, log, dst_path, src, i, | ||
| 2335 | &min_key); | ||
| 2336 | BUG_ON(ret); | ||
| 2337 | } | ||
| 2338 | path->slots[0] = nritems; | ||
| 2339 | |||
| 2340 | /* | ||
| 2341 | * look ahead to the next item and see if it is also | ||
| 2342 | * from this directory and from this transaction | ||
| 2343 | */ | ||
| 2344 | ret = btrfs_next_leaf(root, path); | ||
| 2345 | if (ret == 1) { | ||
| 2346 | last_offset = (u64)-1; | ||
| 2347 | goto done; | ||
| 2348 | } | ||
| 2349 | btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); | ||
| 2350 | if (tmp.objectid != inode->i_ino || tmp.type != key_type) { | ||
| 2351 | last_offset = (u64)-1; | ||
| 2352 | goto done; | ||
| 2353 | } | ||
| 2354 | if (btrfs_header_generation(path->nodes[0]) != trans->transid) { | ||
| 2355 | ret = overwrite_item(trans, log, dst_path, | ||
| 2356 | path->nodes[0], path->slots[0], | ||
| 2357 | &tmp); | ||
| 2358 | |||
| 2359 | BUG_ON(ret); | ||
| 2360 | last_offset = tmp.offset; | ||
| 2361 | goto done; | ||
| 2362 | } | ||
| 2363 | } | ||
| 2364 | done: | ||
| 2365 | *last_offset_ret = last_offset; | ||
| 2366 | btrfs_release_path(root, path); | ||
| 2367 | btrfs_release_path(log, dst_path); | ||
| 2368 | |||
| 2369 | /* insert the log range keys to indicate where the log is valid */ | ||
| 2370 | ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, | ||
| 2371 | first_offset, last_offset); | ||
| 2372 | BUG_ON(ret); | ||
| 2373 | return 0; | ||
| 2374 | } | ||
| 2375 | |||
| 2376 | /* | ||
| 2377 | * logging directories is very similar to logging inodes, We find all the items | ||
| 2378 | * from the current transaction and write them to the log. | ||
| 2379 | * | ||
| 2380 | * The recovery code scans the directory in the subvolume, and if it finds a | ||
| 2381 | * key in the range logged that is not present in the log tree, then it means | ||
| 2382 | * that dir entry was unlinked during the transaction. | ||
| 2383 | * | ||
| 2384 | * In order for that scan to work, we must include one key smaller than | ||
| 2385 | * the smallest logged by this transaction and one key larger than the largest | ||
| 2386 | * key logged by this transaction. | ||
| 2387 | */ | ||
| 2388 | static noinline int log_directory_changes(struct btrfs_trans_handle *trans, | ||
| 2389 | struct btrfs_root *root, struct inode *inode, | ||
| 2390 | struct btrfs_path *path, | ||
| 2391 | struct btrfs_path *dst_path) | ||
| 2392 | { | ||
| 2393 | u64 min_key; | ||
| 2394 | u64 max_key; | ||
| 2395 | int ret; | ||
| 2396 | int key_type = BTRFS_DIR_ITEM_KEY; | ||
| 2397 | |||
| 2398 | again: | ||
| 2399 | min_key = 0; | ||
| 2400 | max_key = 0; | ||
| 2401 | while(1) { | ||
| 2402 | ret = log_dir_items(trans, root, inode, path, | ||
| 2403 | dst_path, key_type, min_key, | ||
| 2404 | &max_key); | ||
| 2405 | BUG_ON(ret); | ||
| 2406 | if (max_key == (u64)-1) | ||
| 2407 | break; | ||
| 2408 | min_key = max_key + 1; | ||
| 2409 | } | ||
| 2410 | |||
| 2411 | if (key_type == BTRFS_DIR_ITEM_KEY) { | ||
| 2412 | key_type = BTRFS_DIR_INDEX_KEY; | ||
| 2413 | goto again; | ||
| 2414 | } | ||
| 2415 | return 0; | ||
| 2416 | } | ||
| 2417 | |||
| 2418 | /* | ||
| 2419 | * a helper function to drop items from the log before we relog an | ||
| 2420 | * inode. max_key_type indicates the highest item type to remove. | ||
| 2421 | * This cannot be run for file data extents because it does not | ||
| 2422 | * free the extents they point to. | ||
| 2423 | */ | ||
| 2424 | static int drop_objectid_items(struct btrfs_trans_handle *trans, | ||
| 2425 | struct btrfs_root *log, | ||
| 2426 | struct btrfs_path *path, | ||
| 2427 | u64 objectid, int max_key_type) | ||
| 2428 | { | ||
| 2429 | int ret; | ||
| 2430 | struct btrfs_key key; | ||
| 2431 | struct btrfs_key found_key; | ||
| 2432 | |||
| 2433 | key.objectid = objectid; | ||
| 2434 | key.type = max_key_type; | ||
| 2435 | key.offset = (u64)-1; | ||
| 2436 | |||
| 2437 | while(1) { | ||
| 2438 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); | ||
| 2439 | |||
| 2440 | if (ret != 1) | ||
| 2441 | break; | ||
| 2442 | |||
| 2443 | if (path->slots[0] == 0) | ||
| 2444 | break; | ||
| 2445 | |||
| 2446 | path->slots[0]--; | ||
| 2447 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
| 2448 | path->slots[0]); | ||
| 2449 | |||
| 2450 | if (found_key.objectid != objectid) | ||
| 2451 | break; | ||
| 2452 | |||
| 2453 | ret = btrfs_del_item(trans, log, path); | ||
| 2454 | BUG_ON(ret); | ||
| 2455 | btrfs_release_path(log, path); | ||
| 2456 | } | ||
| 2457 | btrfs_release_path(log, path); | ||
| 2458 | return 0; | ||
| 2459 | } | ||
| 2460 | |||
| 2461 | static noinline int copy_items(struct btrfs_trans_handle *trans, | ||
| 2462 | struct btrfs_root *log, | ||
| 2463 | struct btrfs_path *dst_path, | ||
| 2464 | struct extent_buffer *src, | ||
| 2465 | int start_slot, int nr, int inode_only) | ||
| 2466 | { | ||
| 2467 | unsigned long src_offset; | ||
| 2468 | unsigned long dst_offset; | ||
| 2469 | struct btrfs_file_extent_item *extent; | ||
| 2470 | struct btrfs_inode_item *inode_item; | ||
| 2471 | int ret; | ||
| 2472 | struct btrfs_key *ins_keys; | ||
| 2473 | u32 *ins_sizes; | ||
| 2474 | char *ins_data; | ||
| 2475 | int i; | ||
| 2476 | |||
| 2477 | ins_data = kmalloc(nr * sizeof(struct btrfs_key) + | ||
| 2478 | nr * sizeof(u32), GFP_NOFS); | ||
| 2479 | ins_sizes = (u32 *)ins_data; | ||
| 2480 | ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); | ||
| 2481 | |||
| 2482 | for (i = 0; i < nr; i++) { | ||
| 2483 | ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot); | ||
| 2484 | btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot); | ||
| 2485 | } | ||
| 2486 | ret = btrfs_insert_empty_items(trans, log, dst_path, | ||
| 2487 | ins_keys, ins_sizes, nr); | ||
| 2488 | BUG_ON(ret); | ||
| 2489 | |||
| 2490 | for (i = 0; i < nr; i++) { | ||
| 2491 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], | ||
| 2492 | dst_path->slots[0]); | ||
| 2493 | |||
| 2494 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); | ||
| 2495 | |||
| 2496 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, | ||
| 2497 | src_offset, ins_sizes[i]); | ||
| 2498 | |||
| 2499 | if (inode_only == LOG_INODE_EXISTS && | ||
| 2500 | ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { | ||
| 2501 | inode_item = btrfs_item_ptr(dst_path->nodes[0], | ||
| 2502 | dst_path->slots[0], | ||
| 2503 | struct btrfs_inode_item); | ||
| 2504 | btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); | ||
| 2505 | |||
| 2506 | /* set the generation to zero so the recover code | ||
| 2507 | * can tell the difference between an logging | ||
| 2508 | * just to say 'this inode exists' and a logging | ||
| 2509 | * to say 'update this inode with these values' | ||
| 2510 | */ | ||
| 2511 | btrfs_set_inode_generation(dst_path->nodes[0], | ||
| 2512 | inode_item, 0); | ||
| 2513 | } | ||
| 2514 | /* take a reference on file data extents so that truncates | ||
| 2515 | * or deletes of this inode don't have to relog the inode | ||
| 2516 | * again | ||
| 2517 | */ | ||
| 2518 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) { | ||
| 2519 | int found_type; | ||
| 2520 | extent = btrfs_item_ptr(src, start_slot + i, | ||
| 2521 | struct btrfs_file_extent_item); | ||
| 2522 | |||
| 2523 | found_type = btrfs_file_extent_type(src, extent); | ||
| 2524 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
| 2525 | u64 ds = btrfs_file_extent_disk_bytenr(src, | ||
| 2526 | extent); | ||
| 2527 | u64 dl = btrfs_file_extent_disk_num_bytes(src, | ||
| 2528 | extent); | ||
| 2529 | /* ds == 0 is a hole */ | ||
| 2530 | if (ds != 0) { | ||
| 2531 | ret = btrfs_inc_extent_ref(trans, log, | ||
| 2532 | ds, dl, | ||
| 2533 | dst_path->nodes[0]->start, | ||
| 2534 | BTRFS_TREE_LOG_OBJECTID, | ||
| 2535 | trans->transid, | ||
| 2536 | ins_keys[i].objectid); | ||
| 2537 | BUG_ON(ret); | ||
| 2538 | } | ||
| 2539 | } | ||
| 2540 | } | ||
| 2541 | dst_path->slots[0]++; | ||
| 2542 | } | ||
| 2543 | |||
| 2544 | btrfs_mark_buffer_dirty(dst_path->nodes[0]); | ||
| 2545 | btrfs_release_path(log, dst_path); | ||
| 2546 | kfree(ins_data); | ||
| 2547 | return 0; | ||
| 2548 | } | ||
| 2549 | |||
| 2550 | /* log a single inode in the tree log. | ||
| 2551 | * At least one parent directory for this inode must exist in the tree | ||
| 2552 | * or be logged already. | ||
| 2553 | * | ||
| 2554 | * Any items from this inode changed by the current transaction are copied | ||
| 2555 | * to the log tree. An extra reference is taken on any extents in this | ||
| 2556 | * file, allowing us to avoid a whole pile of corner cases around logging | ||
| 2557 | * blocks that have been removed from the tree. | ||
| 2558 | * | ||
| 2559 | * See LOG_INODE_ALL and related defines for a description of what inode_only | ||
| 2560 | * does. | ||
| 2561 | * | ||
| 2562 | * This handles both files and directories. | ||
| 2563 | */ | ||
| 2564 | static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | ||
| 2565 | struct btrfs_root *root, struct inode *inode, | ||
| 2566 | int inode_only) | ||
| 2567 | { | ||
| 2568 | struct btrfs_path *path; | ||
| 2569 | struct btrfs_path *dst_path; | ||
| 2570 | struct btrfs_key min_key; | ||
| 2571 | struct btrfs_key max_key; | ||
| 2572 | struct btrfs_root *log = root->log_root; | ||
| 2573 | struct extent_buffer *src = NULL; | ||
| 2574 | u32 size; | ||
| 2575 | int ret; | ||
| 2576 | int nritems; | ||
| 2577 | int ins_start_slot = 0; | ||
| 2578 | int ins_nr; | ||
| 2579 | |||
| 2580 | log = root->log_root; | ||
| 2581 | |||
| 2582 | path = btrfs_alloc_path(); | ||
| 2583 | dst_path = btrfs_alloc_path(); | ||
| 2584 | |||
| 2585 | min_key.objectid = inode->i_ino; | ||
| 2586 | min_key.type = BTRFS_INODE_ITEM_KEY; | ||
| 2587 | min_key.offset = 0; | ||
| 2588 | |||
| 2589 | max_key.objectid = inode->i_ino; | ||
| 2590 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | ||
| 2591 | max_key.type = BTRFS_XATTR_ITEM_KEY; | ||
| 2592 | else | ||
| 2593 | max_key.type = (u8)-1; | ||
| 2594 | max_key.offset = (u64)-1; | ||
| 2595 | |||
| 2596 | /* | ||
| 2597 | * if this inode has already been logged and we're in inode_only | ||
| 2598 | * mode, we don't want to delete the things that have already | ||
| 2599 | * been written to the log. | ||
| 2600 | * | ||
| 2601 | * But, if the inode has been through an inode_only log, | ||
| 2602 | * the logged_trans field is not set. This allows us to catch | ||
| 2603 | * any new names for this inode in the backrefs by logging it | ||
| 2604 | * again | ||
| 2605 | */ | ||
| 2606 | if (inode_only == LOG_INODE_EXISTS && | ||
| 2607 | BTRFS_I(inode)->logged_trans == trans->transid) { | ||
| 2608 | btrfs_free_path(path); | ||
| 2609 | btrfs_free_path(dst_path); | ||
| 2610 | goto out; | ||
| 2611 | } | ||
| 2612 | mutex_lock(&BTRFS_I(inode)->log_mutex); | ||
| 2613 | |||
| 2614 | /* | ||
| 2615 | * a brute force approach to making sure we get the most uptodate | ||
| 2616 | * copies of everything. | ||
| 2617 | */ | ||
| 2618 | if (S_ISDIR(inode->i_mode)) { | ||
| 2619 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; | ||
| 2620 | |||
| 2621 | if (inode_only == LOG_INODE_EXISTS) | ||
| 2622 | max_key_type = BTRFS_XATTR_ITEM_KEY; | ||
| 2623 | ret = drop_objectid_items(trans, log, path, | ||
| 2624 | inode->i_ino, max_key_type); | ||
| 2625 | } else { | ||
| 2626 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | ||
| 2627 | } | ||
| 2628 | BUG_ON(ret); | ||
| 2629 | path->keep_locks = 1; | ||
| 2630 | |||
| 2631 | while(1) { | ||
| 2632 | ins_nr = 0; | ||
| 2633 | ret = btrfs_search_forward(root, &min_key, &max_key, | ||
| 2634 | path, 0, trans->transid); | ||
| 2635 | if (ret != 0) | ||
| 2636 | break; | ||
| 2637 | again: | ||
| 2638 | /* note, ins_nr might be > 0 here, cleanup outside the loop */ | ||
| 2639 | if (min_key.objectid != inode->i_ino) | ||
| 2640 | break; | ||
| 2641 | if (min_key.type > max_key.type) | ||
| 2642 | break; | ||
| 2643 | |||
| 2644 | src = path->nodes[0]; | ||
| 2645 | size = btrfs_item_size_nr(src, path->slots[0]); | ||
| 2646 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { | ||
| 2647 | ins_nr++; | ||
| 2648 | goto next_slot; | ||
| 2649 | } else if (!ins_nr) { | ||
| 2650 | ins_start_slot = path->slots[0]; | ||
| 2651 | ins_nr = 1; | ||
| 2652 | goto next_slot; | ||
| 2653 | } | ||
| 2654 | |||
| 2655 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | ||
| 2656 | ins_nr, inode_only); | ||
| 2657 | BUG_ON(ret); | ||
| 2658 | ins_nr = 1; | ||
| 2659 | ins_start_slot = path->slots[0]; | ||
| 2660 | next_slot: | ||
| 2661 | |||
| 2662 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 2663 | path->slots[0]++; | ||
| 2664 | if (path->slots[0] < nritems) { | ||
| 2665 | btrfs_item_key_to_cpu(path->nodes[0], &min_key, | ||
| 2666 | path->slots[0]); | ||
| 2667 | goto again; | ||
| 2668 | } | ||
| 2669 | if (ins_nr) { | ||
| 2670 | ret = copy_items(trans, log, dst_path, src, | ||
| 2671 | ins_start_slot, | ||
| 2672 | ins_nr, inode_only); | ||
| 2673 | BUG_ON(ret); | ||
| 2674 | ins_nr = 0; | ||
| 2675 | } | ||
| 2676 | btrfs_release_path(root, path); | ||
| 2677 | |||
| 2678 | if (min_key.offset < (u64)-1) | ||
| 2679 | min_key.offset++; | ||
| 2680 | else if (min_key.type < (u8)-1) | ||
| 2681 | min_key.type++; | ||
| 2682 | else if (min_key.objectid < (u64)-1) | ||
| 2683 | min_key.objectid++; | ||
| 2684 | else | ||
| 2685 | break; | ||
| 2686 | } | ||
| 2687 | if (ins_nr) { | ||
| 2688 | ret = copy_items(trans, log, dst_path, src, | ||
| 2689 | ins_start_slot, | ||
| 2690 | ins_nr, inode_only); | ||
| 2691 | BUG_ON(ret); | ||
| 2692 | ins_nr = 0; | ||
| 2693 | } | ||
| 2694 | WARN_ON(ins_nr); | ||
| 2695 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | ||
| 2696 | btrfs_release_path(root, path); | ||
| 2697 | btrfs_release_path(log, dst_path); | ||
| 2698 | BTRFS_I(inode)->log_dirty_trans = 0; | ||
| 2699 | ret = log_directory_changes(trans, root, inode, path, dst_path); | ||
| 2700 | BUG_ON(ret); | ||
| 2701 | } | ||
| 2702 | BTRFS_I(inode)->logged_trans = trans->transid; | ||
| 2703 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | ||
| 2704 | |||
| 2705 | btrfs_free_path(path); | ||
| 2706 | btrfs_free_path(dst_path); | ||
| 2707 | |||
| 2708 | mutex_lock(&root->fs_info->tree_log_mutex); | ||
| 2709 | ret = update_log_root(trans, log); | ||
| 2710 | BUG_ON(ret); | ||
| 2711 | mutex_unlock(&root->fs_info->tree_log_mutex); | ||
| 2712 | out: | ||
| 2713 | return 0; | ||
| 2714 | } | ||
| 2715 | |||
| 2716 | int btrfs_log_inode(struct btrfs_trans_handle *trans, | ||
| 2717 | struct btrfs_root *root, struct inode *inode, | ||
| 2718 | int inode_only) | ||
| 2719 | { | ||
| 2720 | int ret; | ||
| 2721 | |||
| 2722 | start_log_trans(trans, root); | ||
| 2723 | ret = __btrfs_log_inode(trans, root, inode, inode_only); | ||
| 2724 | end_log_trans(root); | ||
| 2725 | return ret; | ||
| 2726 | } | ||
| 2727 | |||
| 2728 | /* | ||
| 2729 | * helper function around btrfs_log_inode to make sure newly created | ||
| 2730 | * parent directories also end up in the log. A minimal inode and backref | ||
| 2731 | * only logging is done of any parent directories that are older than | ||
| 2732 | * the last committed transaction | ||
| 2733 | */ | ||
| 2734 | int btrfs_log_dentry(struct btrfs_trans_handle *trans, | ||
| 2735 | struct btrfs_root *root, struct dentry *dentry) | ||
| 2736 | { | ||
| 2737 | int inode_only = LOG_INODE_ALL; | ||
| 2738 | struct super_block *sb; | ||
| 2739 | int ret; | ||
| 2740 | |||
| 2741 | start_log_trans(trans, root); | ||
| 2742 | sb = dentry->d_inode->i_sb; | ||
| 2743 | while(1) { | ||
| 2744 | ret = __btrfs_log_inode(trans, root, dentry->d_inode, | ||
| 2745 | inode_only); | ||
| 2746 | BUG_ON(ret); | ||
| 2747 | inode_only = LOG_INODE_EXISTS; | ||
| 2748 | |||
| 2749 | dentry = dentry->d_parent; | ||
| 2750 | if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) | ||
| 2751 | break; | ||
| 2752 | |||
| 2753 | if (BTRFS_I(dentry->d_inode)->generation <= | ||
| 2754 | root->fs_info->last_trans_committed) | ||
| 2755 | break; | ||
| 2756 | } | ||
| 2757 | end_log_trans(root); | ||
| 2758 | return 0; | ||
| 2759 | } | ||
| 2760 | |||
| 2761 | /* | ||
| 2762 | * it is not safe to log dentry if the chunk root has added new | ||
| 2763 | * chunks. This returns 0 if the dentry was logged, and 1 otherwise. | ||
| 2764 | * If this returns 1, you must commit the transaction to safely get your | ||
| 2765 | * data on disk. | ||
| 2766 | */ | ||
| 2767 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | ||
| 2768 | struct btrfs_root *root, struct dentry *dentry) | ||
| 2769 | { | ||
| 2770 | u64 gen; | ||
| 2771 | gen = root->fs_info->last_trans_new_blockgroup; | ||
| 2772 | if (gen > root->fs_info->last_trans_committed) | ||
| 2773 | return 1; | ||
| 2774 | else | ||
| 2775 | return btrfs_log_dentry(trans, root, dentry); | ||
| 2776 | } | ||
| 2777 | |||
| 2778 | /* | ||
| 2779 | * should be called during mount to recover any replay any log trees | ||
| 2780 | * from the FS | ||
| 2781 | */ | ||
| 2782 | int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | ||
| 2783 | { | ||
| 2784 | int ret; | ||
| 2785 | struct btrfs_path *path; | ||
| 2786 | struct btrfs_trans_handle *trans; | ||
| 2787 | struct btrfs_key key; | ||
| 2788 | struct btrfs_key found_key; | ||
| 2789 | struct btrfs_key tmp_key; | ||
| 2790 | struct btrfs_root *log; | ||
| 2791 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; | ||
| 2792 | u64 highest_inode; | ||
| 2793 | struct walk_control wc = { | ||
| 2794 | .process_func = process_one_buffer, | ||
| 2795 | .stage = 0, | ||
| 2796 | }; | ||
| 2797 | |||
| 2798 | fs_info->log_root_recovering = 1; | ||
| 2799 | path = btrfs_alloc_path(); | ||
| 2800 | BUG_ON(!path); | ||
| 2801 | |||
| 2802 | trans = btrfs_start_transaction(fs_info->tree_root, 1); | ||
| 2803 | |||
| 2804 | wc.trans = trans; | ||
| 2805 | wc.pin = 1; | ||
| 2806 | |||
| 2807 | walk_log_tree(trans, log_root_tree, &wc); | ||
| 2808 | |||
| 2809 | again: | ||
| 2810 | key.objectid = BTRFS_TREE_LOG_OBJECTID; | ||
| 2811 | key.offset = (u64)-1; | ||
| 2812 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 2813 | |||
| 2814 | while(1) { | ||
| 2815 | ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); | ||
| 2816 | if (ret < 0) | ||
| 2817 | break; | ||
| 2818 | if (ret > 0) { | ||
| 2819 | if (path->slots[0] == 0) | ||
| 2820 | break; | ||
| 2821 | path->slots[0]--; | ||
| 2822 | } | ||
| 2823 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
| 2824 | path->slots[0]); | ||
| 2825 | btrfs_release_path(log_root_tree, path); | ||
| 2826 | if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) | ||
| 2827 | break; | ||
| 2828 | |||
| 2829 | log = btrfs_read_fs_root_no_radix(log_root_tree, | ||
| 2830 | &found_key); | ||
| 2831 | BUG_ON(!log); | ||
| 2832 | |||
| 2833 | |||
| 2834 | tmp_key.objectid = found_key.offset; | ||
| 2835 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 2836 | tmp_key.offset = (u64)-1; | ||
| 2837 | |||
| 2838 | wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); | ||
| 2839 | |||
| 2840 | BUG_ON(!wc.replay_dest); | ||
| 2841 | |||
| 2842 | btrfs_record_root_in_trans(wc.replay_dest); | ||
| 2843 | ret = walk_log_tree(trans, log, &wc); | ||
| 2844 | BUG_ON(ret); | ||
| 2845 | |||
| 2846 | if (wc.stage == LOG_WALK_REPLAY_ALL) { | ||
| 2847 | ret = fixup_inode_link_counts(trans, wc.replay_dest, | ||
| 2848 | path); | ||
| 2849 | BUG_ON(ret); | ||
| 2850 | } | ||
| 2851 | ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode); | ||
| 2852 | if (ret == 0) { | ||
| 2853 | wc.replay_dest->highest_inode = highest_inode; | ||
| 2854 | wc.replay_dest->last_inode_alloc = highest_inode; | ||
| 2855 | } | ||
| 2856 | |||
| 2857 | key.offset = found_key.offset - 1; | ||
| 2858 | free_extent_buffer(log->node); | ||
| 2859 | kfree(log); | ||
| 2860 | |||
| 2861 | if (found_key.offset == 0) | ||
| 2862 | break; | ||
| 2863 | } | ||
| 2864 | btrfs_release_path(log_root_tree, path); | ||
| 2865 | |||
| 2866 | /* step one is to pin it all, step two is to replay just inodes */ | ||
| 2867 | if (wc.pin) { | ||
| 2868 | wc.pin = 0; | ||
| 2869 | wc.process_func = replay_one_buffer; | ||
| 2870 | wc.stage = LOG_WALK_REPLAY_INODES; | ||
| 2871 | goto again; | ||
| 2872 | } | ||
| 2873 | /* step three is to replay everything */ | ||
| 2874 | if (wc.stage < LOG_WALK_REPLAY_ALL) { | ||
| 2875 | wc.stage++; | ||
| 2876 | goto again; | ||
| 2877 | } | ||
| 2878 | |||
| 2879 | btrfs_free_path(path); | ||
| 2880 | |||
| 2881 | free_extent_buffer(log_root_tree->node); | ||
| 2882 | log_root_tree->log_root = NULL; | ||
| 2883 | fs_info->log_root_recovering = 0; | ||
| 2884 | |||
| 2885 | /* step 4: commit the transaction, which also unpins the blocks */ | ||
| 2886 | btrfs_commit_transaction(trans, fs_info->tree_root); | ||
| 2887 | |||
| 2888 | kfree(log_root_tree); | ||
| 2889 | return 0; | ||
| 2890 | } | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h new file mode 100644 index 00000000000..b9409b32ed0 --- /dev/null +++ b/fs/btrfs/tree-log.h | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __TREE_LOG_ | ||
| 20 | #define __TREE_LOG_ | ||
| 21 | |||
| 22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | ||
| 23 | struct btrfs_root *root); | ||
| 24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | ||
| 25 | int btrfs_log_dentry(struct btrfs_trans_handle *trans, | ||
| 26 | struct btrfs_root *root, struct dentry *dentry); | ||
| 27 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | ||
| 28 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | ||
| 29 | struct btrfs_root *root, struct dentry *dentry); | ||
| 30 | int btrfs_log_inode(struct btrfs_trans_handle *trans, | ||
| 31 | struct btrfs_root *root, struct inode *inode, | ||
| 32 | int inode_only); | ||
| 33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | ||
| 34 | struct btrfs_root *root, | ||
| 35 | const char *name, int name_len, | ||
| 36 | struct inode *dir, u64 index); | ||
| 37 | int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | ||
| 38 | struct btrfs_root *root, | ||
| 39 | const char *name, int name_len, | ||
| 40 | struct inode *inode, u64 dirid); | ||
| 41 | #endif | ||
diff --git a/fs/btrfs/version.h b/fs/btrfs/version.h new file mode 100644 index 00000000000..9bf3946d5ef --- /dev/null +++ b/fs/btrfs/version.h | |||
| @@ -0,0 +1,4 @@ | |||
| 1 | #ifndef __BTRFS_VERSION_H | ||
| 2 | #define __BTRFS_VERSION_H | ||
| 3 | #define BTRFS_BUILD_VERSION "Btrfs" | ||
| 4 | #endif | ||
diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh new file mode 100644 index 00000000000..0f57f24404d --- /dev/null +++ b/fs/btrfs/version.sh | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | # | ||
| 3 | # determine-version -- report a useful version for releases | ||
| 4 | # | ||
| 5 | # Copyright 2008, Aron Griffis <agriffis@n01se.net> | ||
| 6 | # Copyright 2008, Oracle | ||
| 7 | # Released under the GNU GPLv2 | ||
| 8 | |||
| 9 | v="v0.16" | ||
| 10 | |||
| 11 | which hg > /dev/null | ||
| 12 | if [ -d .hg ] && [ $? == 0 ]; then | ||
| 13 | last=$(hg tags | grep -m1 -o '^v[0-9.]\+') | ||
| 14 | |||
| 15 | # now check if the repo has commits since then... | ||
| 16 | if [[ $(hg id -t) == $last || \ | ||
| 17 | $(hg di -r "$last:." | awk '/^diff/{print $NF}' | sort -u) == .hgtags ]] | ||
| 18 | then | ||
| 19 | # check if it's dirty | ||
| 20 | if [[ $(hg id | cut -d' ' -f1) == *+ ]]; then | ||
| 21 | v=$last+ | ||
| 22 | else | ||
| 23 | v=$last | ||
| 24 | fi | ||
| 25 | else | ||
| 26 | # includes dirty flag | ||
| 27 | v=$last+$(hg id -i) | ||
| 28 | fi | ||
| 29 | fi | ||
| 30 | |||
| 31 | echo "#ifndef __BUILD_VERSION" > .build-version.h | ||
| 32 | echo "#define __BUILD_VERSION" >> .build-version.h | ||
| 33 | echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h | ||
| 34 | echo "#endif" >> .build-version.h | ||
| 35 | |||
| 36 | diff -q version.h .build-version.h >& /dev/null | ||
| 37 | |||
| 38 | if [ $? == 0 ]; then | ||
| 39 | rm .build-version.h | ||
| 40 | exit 0 | ||
| 41 | fi | ||
| 42 | |||
| 43 | mv .build-version.h version.h | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c new file mode 100644 index 00000000000..2eed7f91f51 --- /dev/null +++ b/fs/btrfs/volumes.c | |||
| @@ -0,0 +1,2549 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | #include <linux/sched.h> | ||
| 19 | #include <linux/bio.h> | ||
| 20 | #include <linux/buffer_head.h> | ||
| 21 | #include <linux/blkdev.h> | ||
| 22 | #include <linux/random.h> | ||
| 23 | #include <asm/div64.h> | ||
| 24 | #include "ctree.h" | ||
| 25 | #include "extent_map.h" | ||
| 26 | #include "disk-io.h" | ||
| 27 | #include "transaction.h" | ||
| 28 | #include "print-tree.h" | ||
| 29 | #include "volumes.h" | ||
| 30 | #include "async-thread.h" | ||
| 31 | |||
| 32 | struct map_lookup { | ||
| 33 | u64 type; | ||
| 34 | int io_align; | ||
| 35 | int io_width; | ||
| 36 | int stripe_len; | ||
| 37 | int sector_size; | ||
| 38 | int num_stripes; | ||
| 39 | int sub_stripes; | ||
| 40 | struct btrfs_bio_stripe stripes[]; | ||
| 41 | }; | ||
| 42 | |||
| 43 | #define map_lookup_size(n) (sizeof(struct map_lookup) + \ | ||
| 44 | (sizeof(struct btrfs_bio_stripe) * (n))) | ||
| 45 | |||
| 46 | static DEFINE_MUTEX(uuid_mutex); | ||
| 47 | static LIST_HEAD(fs_uuids); | ||
| 48 | |||
| 49 | void btrfs_lock_volumes(void) | ||
| 50 | { | ||
| 51 | mutex_lock(&uuid_mutex); | ||
| 52 | } | ||
| 53 | |||
| 54 | void btrfs_unlock_volumes(void) | ||
| 55 | { | ||
| 56 | mutex_unlock(&uuid_mutex); | ||
| 57 | } | ||
| 58 | |||
| 59 | static void lock_chunks(struct btrfs_root *root) | ||
| 60 | { | ||
| 61 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 62 | mutex_lock(&root->fs_info->chunk_mutex); | ||
| 63 | } | ||
| 64 | |||
| 65 | static void unlock_chunks(struct btrfs_root *root) | ||
| 66 | { | ||
| 67 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 68 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 69 | } | ||
| 70 | |||
| 71 | int btrfs_cleanup_fs_uuids(void) | ||
| 72 | { | ||
| 73 | struct btrfs_fs_devices *fs_devices; | ||
| 74 | struct list_head *uuid_cur; | ||
| 75 | struct list_head *devices_cur; | ||
| 76 | struct btrfs_device *dev; | ||
| 77 | |||
| 78 | list_for_each(uuid_cur, &fs_uuids) { | ||
| 79 | fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, | ||
| 80 | list); | ||
| 81 | while(!list_empty(&fs_devices->devices)) { | ||
| 82 | devices_cur = fs_devices->devices.next; | ||
| 83 | dev = list_entry(devices_cur, struct btrfs_device, | ||
| 84 | dev_list); | ||
| 85 | if (dev->bdev) { | ||
| 86 | close_bdev_excl(dev->bdev); | ||
| 87 | fs_devices->open_devices--; | ||
| 88 | } | ||
| 89 | list_del(&dev->dev_list); | ||
| 90 | kfree(dev->name); | ||
| 91 | kfree(dev); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | return 0; | ||
| 95 | } | ||
| 96 | |||
| 97 | static noinline struct btrfs_device *__find_device(struct list_head *head, | ||
| 98 | u64 devid, u8 *uuid) | ||
| 99 | { | ||
| 100 | struct btrfs_device *dev; | ||
| 101 | struct list_head *cur; | ||
| 102 | |||
| 103 | list_for_each(cur, head) { | ||
| 104 | dev = list_entry(cur, struct btrfs_device, dev_list); | ||
| 105 | if (dev->devid == devid && | ||
| 106 | (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { | ||
| 107 | return dev; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | return NULL; | ||
| 111 | } | ||
| 112 | |||
| 113 | static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | ||
| 114 | { | ||
| 115 | struct list_head *cur; | ||
| 116 | struct btrfs_fs_devices *fs_devices; | ||
| 117 | |||
| 118 | list_for_each(cur, &fs_uuids) { | ||
| 119 | fs_devices = list_entry(cur, struct btrfs_fs_devices, list); | ||
| 120 | if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) | ||
| 121 | return fs_devices; | ||
| 122 | } | ||
| 123 | return NULL; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * we try to collect pending bios for a device so we don't get a large | ||
| 128 | * number of procs sending bios down to the same device. This greatly | ||
| 129 | * improves the schedulers ability to collect and merge the bios. | ||
| 130 | * | ||
| 131 | * But, it also turns into a long list of bios to process and that is sure | ||
| 132 | * to eventually make the worker thread block. The solution here is to | ||
| 133 | * make some progress and then put this work struct back at the end of | ||
| 134 | * the list if the block device is congested. This way, multiple devices | ||
| 135 | * can make progress from a single worker thread. | ||
| 136 | */ | ||
| 137 | static int noinline run_scheduled_bios(struct btrfs_device *device) | ||
| 138 | { | ||
| 139 | struct bio *pending; | ||
| 140 | struct backing_dev_info *bdi; | ||
| 141 | struct btrfs_fs_info *fs_info; | ||
| 142 | struct bio *tail; | ||
| 143 | struct bio *cur; | ||
| 144 | int again = 0; | ||
| 145 | unsigned long num_run = 0; | ||
| 146 | unsigned long limit; | ||
| 147 | |||
| 148 | bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; | ||
| 149 | fs_info = device->dev_root->fs_info; | ||
| 150 | limit = btrfs_async_submit_limit(fs_info); | ||
| 151 | limit = limit * 2 / 3; | ||
| 152 | |||
| 153 | loop: | ||
| 154 | spin_lock(&device->io_lock); | ||
| 155 | |||
| 156 | /* take all the bios off the list at once and process them | ||
| 157 | * later on (without the lock held). But, remember the | ||
| 158 | * tail and other pointers so the bios can be properly reinserted | ||
| 159 | * into the list if we hit congestion | ||
| 160 | */ | ||
| 161 | pending = device->pending_bios; | ||
| 162 | tail = device->pending_bio_tail; | ||
| 163 | WARN_ON(pending && !tail); | ||
| 164 | device->pending_bios = NULL; | ||
| 165 | device->pending_bio_tail = NULL; | ||
| 166 | |||
| 167 | /* | ||
| 168 | * if pending was null this time around, no bios need processing | ||
| 169 | * at all and we can stop. Otherwise it'll loop back up again | ||
| 170 | * and do an additional check so no bios are missed. | ||
| 171 | * | ||
| 172 | * device->running_pending is used to synchronize with the | ||
| 173 | * schedule_bio code. | ||
| 174 | */ | ||
| 175 | if (pending) { | ||
| 176 | again = 1; | ||
| 177 | device->running_pending = 1; | ||
| 178 | } else { | ||
| 179 | again = 0; | ||
| 180 | device->running_pending = 0; | ||
| 181 | } | ||
| 182 | spin_unlock(&device->io_lock); | ||
| 183 | |||
| 184 | while(pending) { | ||
| 185 | cur = pending; | ||
| 186 | pending = pending->bi_next; | ||
| 187 | cur->bi_next = NULL; | ||
| 188 | atomic_dec(&fs_info->nr_async_bios); | ||
| 189 | |||
| 190 | if (atomic_read(&fs_info->nr_async_bios) < limit && | ||
| 191 | waitqueue_active(&fs_info->async_submit_wait)) | ||
| 192 | wake_up(&fs_info->async_submit_wait); | ||
| 193 | |||
| 194 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | ||
| 195 | bio_get(cur); | ||
| 196 | submit_bio(cur->bi_rw, cur); | ||
| 197 | bio_put(cur); | ||
| 198 | num_run++; | ||
| 199 | |||
| 200 | /* | ||
| 201 | * we made progress, there is more work to do and the bdi | ||
| 202 | * is now congested. Back off and let other work structs | ||
| 203 | * run instead | ||
| 204 | */ | ||
| 205 | if (pending && bdi_write_congested(bdi)) { | ||
| 206 | struct bio *old_head; | ||
| 207 | |||
| 208 | spin_lock(&device->io_lock); | ||
| 209 | |||
| 210 | old_head = device->pending_bios; | ||
| 211 | device->pending_bios = pending; | ||
| 212 | if (device->pending_bio_tail) | ||
| 213 | tail->bi_next = old_head; | ||
| 214 | else | ||
| 215 | device->pending_bio_tail = tail; | ||
| 216 | |||
| 217 | spin_unlock(&device->io_lock); | ||
| 218 | btrfs_requeue_work(&device->work); | ||
| 219 | goto done; | ||
| 220 | } | ||
| 221 | } | ||
| 222 | if (again) | ||
| 223 | goto loop; | ||
| 224 | done: | ||
| 225 | return 0; | ||
| 226 | } | ||
| 227 | |||
| 228 | void pending_bios_fn(struct btrfs_work *work) | ||
| 229 | { | ||
| 230 | struct btrfs_device *device; | ||
| 231 | |||
| 232 | device = container_of(work, struct btrfs_device, work); | ||
| 233 | run_scheduled_bios(device); | ||
| 234 | } | ||
| 235 | |||
| 236 | static noinline int device_list_add(const char *path, | ||
| 237 | struct btrfs_super_block *disk_super, | ||
| 238 | u64 devid, struct btrfs_fs_devices **fs_devices_ret) | ||
| 239 | { | ||
| 240 | struct btrfs_device *device; | ||
| 241 | struct btrfs_fs_devices *fs_devices; | ||
| 242 | u64 found_transid = btrfs_super_generation(disk_super); | ||
| 243 | |||
| 244 | fs_devices = find_fsid(disk_super->fsid); | ||
| 245 | if (!fs_devices) { | ||
| 246 | fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); | ||
| 247 | if (!fs_devices) | ||
| 248 | return -ENOMEM; | ||
| 249 | INIT_LIST_HEAD(&fs_devices->devices); | ||
| 250 | INIT_LIST_HEAD(&fs_devices->alloc_list); | ||
| 251 | list_add(&fs_devices->list, &fs_uuids); | ||
| 252 | memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); | ||
| 253 | fs_devices->latest_devid = devid; | ||
| 254 | fs_devices->latest_trans = found_transid; | ||
| 255 | device = NULL; | ||
| 256 | } else { | ||
| 257 | device = __find_device(&fs_devices->devices, devid, | ||
| 258 | disk_super->dev_item.uuid); | ||
| 259 | } | ||
| 260 | if (!device) { | ||
| 261 | device = kzalloc(sizeof(*device), GFP_NOFS); | ||
| 262 | if (!device) { | ||
| 263 | /* we can safely leave the fs_devices entry around */ | ||
| 264 | return -ENOMEM; | ||
| 265 | } | ||
| 266 | device->devid = devid; | ||
| 267 | device->work.func = pending_bios_fn; | ||
| 268 | memcpy(device->uuid, disk_super->dev_item.uuid, | ||
| 269 | BTRFS_UUID_SIZE); | ||
| 270 | device->barriers = 1; | ||
| 271 | spin_lock_init(&device->io_lock); | ||
| 272 | device->name = kstrdup(path, GFP_NOFS); | ||
| 273 | if (!device->name) { | ||
| 274 | kfree(device); | ||
| 275 | return -ENOMEM; | ||
| 276 | } | ||
| 277 | list_add(&device->dev_list, &fs_devices->devices); | ||
| 278 | list_add(&device->dev_alloc_list, &fs_devices->alloc_list); | ||
| 279 | fs_devices->num_devices++; | ||
| 280 | } | ||
| 281 | |||
| 282 | if (found_transid > fs_devices->latest_trans) { | ||
| 283 | fs_devices->latest_devid = devid; | ||
| 284 | fs_devices->latest_trans = found_transid; | ||
| 285 | } | ||
| 286 | *fs_devices_ret = fs_devices; | ||
| 287 | return 0; | ||
| 288 | } | ||
| 289 | |||
| 290 | int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) | ||
| 291 | { | ||
| 292 | struct list_head *head = &fs_devices->devices; | ||
| 293 | struct list_head *cur; | ||
| 294 | struct btrfs_device *device; | ||
| 295 | |||
| 296 | mutex_lock(&uuid_mutex); | ||
| 297 | again: | ||
| 298 | list_for_each(cur, head) { | ||
| 299 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 300 | if (!device->in_fs_metadata) { | ||
| 301 | struct block_device *bdev; | ||
| 302 | list_del(&device->dev_list); | ||
| 303 | list_del(&device->dev_alloc_list); | ||
| 304 | fs_devices->num_devices--; | ||
| 305 | if (device->bdev) { | ||
| 306 | bdev = device->bdev; | ||
| 307 | fs_devices->open_devices--; | ||
| 308 | mutex_unlock(&uuid_mutex); | ||
| 309 | close_bdev_excl(bdev); | ||
| 310 | mutex_lock(&uuid_mutex); | ||
| 311 | } | ||
| 312 | kfree(device->name); | ||
| 313 | kfree(device); | ||
| 314 | goto again; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | mutex_unlock(&uuid_mutex); | ||
| 318 | return 0; | ||
| 319 | } | ||
| 320 | |||
| 321 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | ||
| 322 | { | ||
| 323 | struct list_head *head = &fs_devices->devices; | ||
| 324 | struct list_head *cur; | ||
| 325 | struct btrfs_device *device; | ||
| 326 | |||
| 327 | mutex_lock(&uuid_mutex); | ||
| 328 | list_for_each(cur, head) { | ||
| 329 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 330 | if (device->bdev) { | ||
| 331 | close_bdev_excl(device->bdev); | ||
| 332 | fs_devices->open_devices--; | ||
| 333 | } | ||
| 334 | device->bdev = NULL; | ||
| 335 | device->in_fs_metadata = 0; | ||
| 336 | } | ||
| 337 | fs_devices->mounted = 0; | ||
| 338 | mutex_unlock(&uuid_mutex); | ||
| 339 | return 0; | ||
| 340 | } | ||
| 341 | |||
| 342 | int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | ||
| 343 | int flags, void *holder) | ||
| 344 | { | ||
| 345 | struct block_device *bdev; | ||
| 346 | struct list_head *head = &fs_devices->devices; | ||
| 347 | struct list_head *cur; | ||
| 348 | struct btrfs_device *device; | ||
| 349 | struct block_device *latest_bdev = NULL; | ||
| 350 | struct buffer_head *bh; | ||
| 351 | struct btrfs_super_block *disk_super; | ||
| 352 | u64 latest_devid = 0; | ||
| 353 | u64 latest_transid = 0; | ||
| 354 | u64 transid; | ||
| 355 | u64 devid; | ||
| 356 | int ret = 0; | ||
| 357 | |||
| 358 | mutex_lock(&uuid_mutex); | ||
| 359 | if (fs_devices->mounted) | ||
| 360 | goto out; | ||
| 361 | |||
| 362 | list_for_each(cur, head) { | ||
| 363 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 364 | if (device->bdev) | ||
| 365 | continue; | ||
| 366 | |||
| 367 | if (!device->name) | ||
| 368 | continue; | ||
| 369 | |||
| 370 | bdev = open_bdev_excl(device->name, flags, holder); | ||
| 371 | |||
| 372 | if (IS_ERR(bdev)) { | ||
| 373 | printk("open %s failed\n", device->name); | ||
| 374 | goto error; | ||
| 375 | } | ||
| 376 | set_blocksize(bdev, 4096); | ||
| 377 | |||
| 378 | bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); | ||
| 379 | if (!bh) | ||
| 380 | goto error_close; | ||
| 381 | |||
| 382 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
| 383 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, | ||
| 384 | sizeof(disk_super->magic))) | ||
| 385 | goto error_brelse; | ||
| 386 | |||
| 387 | devid = le64_to_cpu(disk_super->dev_item.devid); | ||
| 388 | if (devid != device->devid) | ||
| 389 | goto error_brelse; | ||
| 390 | |||
| 391 | transid = btrfs_super_generation(disk_super); | ||
| 392 | if (!latest_transid || transid > latest_transid) { | ||
| 393 | latest_devid = devid; | ||
| 394 | latest_transid = transid; | ||
| 395 | latest_bdev = bdev; | ||
| 396 | } | ||
| 397 | |||
| 398 | device->bdev = bdev; | ||
| 399 | device->in_fs_metadata = 0; | ||
| 400 | fs_devices->open_devices++; | ||
| 401 | continue; | ||
| 402 | |||
| 403 | error_brelse: | ||
| 404 | brelse(bh); | ||
| 405 | error_close: | ||
| 406 | close_bdev_excl(bdev); | ||
| 407 | error: | ||
| 408 | continue; | ||
| 409 | } | ||
| 410 | if (fs_devices->open_devices == 0) { | ||
| 411 | ret = -EIO; | ||
| 412 | goto out; | ||
| 413 | } | ||
| 414 | fs_devices->mounted = 1; | ||
| 415 | fs_devices->latest_bdev = latest_bdev; | ||
| 416 | fs_devices->latest_devid = latest_devid; | ||
| 417 | fs_devices->latest_trans = latest_transid; | ||
| 418 | out: | ||
| 419 | mutex_unlock(&uuid_mutex); | ||
| 420 | return ret; | ||
| 421 | } | ||
| 422 | |||
| 423 | int btrfs_scan_one_device(const char *path, int flags, void *holder, | ||
| 424 | struct btrfs_fs_devices **fs_devices_ret) | ||
| 425 | { | ||
| 426 | struct btrfs_super_block *disk_super; | ||
| 427 | struct block_device *bdev; | ||
| 428 | struct buffer_head *bh; | ||
| 429 | int ret; | ||
| 430 | u64 devid; | ||
| 431 | u64 transid; | ||
| 432 | |||
| 433 | mutex_lock(&uuid_mutex); | ||
| 434 | |||
| 435 | bdev = open_bdev_excl(path, flags, holder); | ||
| 436 | |||
| 437 | if (IS_ERR(bdev)) { | ||
| 438 | ret = PTR_ERR(bdev); | ||
| 439 | goto error; | ||
| 440 | } | ||
| 441 | |||
| 442 | ret = set_blocksize(bdev, 4096); | ||
| 443 | if (ret) | ||
| 444 | goto error_close; | ||
| 445 | bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); | ||
| 446 | if (!bh) { | ||
| 447 | ret = -EIO; | ||
| 448 | goto error_close; | ||
| 449 | } | ||
| 450 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
| 451 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, | ||
| 452 | sizeof(disk_super->magic))) { | ||
| 453 | ret = -EINVAL; | ||
| 454 | goto error_brelse; | ||
| 455 | } | ||
| 456 | devid = le64_to_cpu(disk_super->dev_item.devid); | ||
| 457 | transid = btrfs_super_generation(disk_super); | ||
| 458 | if (disk_super->label[0]) | ||
| 459 | printk("device label %s ", disk_super->label); | ||
| 460 | else { | ||
| 461 | /* FIXME, make a readl uuid parser */ | ||
| 462 | printk("device fsid %llx-%llx ", | ||
| 463 | *(unsigned long long *)disk_super->fsid, | ||
| 464 | *(unsigned long long *)(disk_super->fsid + 8)); | ||
| 465 | } | ||
| 466 | printk("devid %Lu transid %Lu %s\n", devid, transid, path); | ||
| 467 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | ||
| 468 | |||
| 469 | error_brelse: | ||
| 470 | brelse(bh); | ||
| 471 | error_close: | ||
| 472 | close_bdev_excl(bdev); | ||
| 473 | error: | ||
| 474 | mutex_unlock(&uuid_mutex); | ||
| 475 | return ret; | ||
| 476 | } | ||
| 477 | |||
| 478 | /* | ||
| 479 | * this uses a pretty simple search, the expectation is that it is | ||
| 480 | * called very infrequently and that a given device has a small number | ||
| 481 | * of extents | ||
| 482 | */ | ||
| 483 | static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, | ||
| 484 | struct btrfs_device *device, | ||
| 485 | struct btrfs_path *path, | ||
| 486 | u64 num_bytes, u64 *start) | ||
| 487 | { | ||
| 488 | struct btrfs_key key; | ||
| 489 | struct btrfs_root *root = device->dev_root; | ||
| 490 | struct btrfs_dev_extent *dev_extent = NULL; | ||
| 491 | u64 hole_size = 0; | ||
| 492 | u64 last_byte = 0; | ||
| 493 | u64 search_start = 0; | ||
| 494 | u64 search_end = device->total_bytes; | ||
| 495 | int ret; | ||
| 496 | int slot = 0; | ||
| 497 | int start_found; | ||
| 498 | struct extent_buffer *l; | ||
| 499 | |||
| 500 | start_found = 0; | ||
| 501 | path->reada = 2; | ||
| 502 | |||
| 503 | /* FIXME use last free of some kind */ | ||
| 504 | |||
| 505 | /* we don't want to overwrite the superblock on the drive, | ||
| 506 | * so we make sure to start at an offset of at least 1MB | ||
| 507 | */ | ||
| 508 | search_start = max((u64)1024 * 1024, search_start); | ||
| 509 | |||
| 510 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | ||
| 511 | search_start = max(root->fs_info->alloc_start, search_start); | ||
| 512 | |||
| 513 | key.objectid = device->devid; | ||
| 514 | key.offset = search_start; | ||
| 515 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
| 516 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | ||
| 517 | if (ret < 0) | ||
| 518 | goto error; | ||
| 519 | ret = btrfs_previous_item(root, path, 0, key.type); | ||
| 520 | if (ret < 0) | ||
| 521 | goto error; | ||
| 522 | l = path->nodes[0]; | ||
| 523 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
| 524 | while (1) { | ||
| 525 | l = path->nodes[0]; | ||
| 526 | slot = path->slots[0]; | ||
| 527 | if (slot >= btrfs_header_nritems(l)) { | ||
| 528 | ret = btrfs_next_leaf(root, path); | ||
| 529 | if (ret == 0) | ||
| 530 | continue; | ||
| 531 | if (ret < 0) | ||
| 532 | goto error; | ||
| 533 | no_more_items: | ||
| 534 | if (!start_found) { | ||
| 535 | if (search_start >= search_end) { | ||
| 536 | ret = -ENOSPC; | ||
| 537 | goto error; | ||
| 538 | } | ||
| 539 | *start = search_start; | ||
| 540 | start_found = 1; | ||
| 541 | goto check_pending; | ||
| 542 | } | ||
| 543 | *start = last_byte > search_start ? | ||
| 544 | last_byte : search_start; | ||
| 545 | if (search_end <= *start) { | ||
| 546 | ret = -ENOSPC; | ||
| 547 | goto error; | ||
| 548 | } | ||
| 549 | goto check_pending; | ||
| 550 | } | ||
| 551 | btrfs_item_key_to_cpu(l, &key, slot); | ||
| 552 | |||
| 553 | if (key.objectid < device->devid) | ||
| 554 | goto next; | ||
| 555 | |||
| 556 | if (key.objectid > device->devid) | ||
| 557 | goto no_more_items; | ||
| 558 | |||
| 559 | if (key.offset >= search_start && key.offset > last_byte && | ||
| 560 | start_found) { | ||
| 561 | if (last_byte < search_start) | ||
| 562 | last_byte = search_start; | ||
| 563 | hole_size = key.offset - last_byte; | ||
| 564 | if (key.offset > last_byte && | ||
| 565 | hole_size >= num_bytes) { | ||
| 566 | *start = last_byte; | ||
| 567 | goto check_pending; | ||
| 568 | } | ||
| 569 | } | ||
| 570 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { | ||
| 571 | goto next; | ||
| 572 | } | ||
| 573 | |||
| 574 | start_found = 1; | ||
| 575 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
| 576 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | ||
| 577 | next: | ||
| 578 | path->slots[0]++; | ||
| 579 | cond_resched(); | ||
| 580 | } | ||
| 581 | check_pending: | ||
| 582 | /* we have to make sure we didn't find an extent that has already | ||
| 583 | * been allocated by the map tree or the original allocation | ||
| 584 | */ | ||
| 585 | btrfs_release_path(root, path); | ||
| 586 | BUG_ON(*start < search_start); | ||
| 587 | |||
| 588 | if (*start + num_bytes > search_end) { | ||
| 589 | ret = -ENOSPC; | ||
| 590 | goto error; | ||
| 591 | } | ||
| 592 | /* check for pending inserts here */ | ||
| 593 | return 0; | ||
| 594 | |||
| 595 | error: | ||
| 596 | btrfs_release_path(root, path); | ||
| 597 | return ret; | ||
| 598 | } | ||
| 599 | |||
| 600 | int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | ||
| 601 | struct btrfs_device *device, | ||
| 602 | u64 start) | ||
| 603 | { | ||
| 604 | int ret; | ||
| 605 | struct btrfs_path *path; | ||
| 606 | struct btrfs_root *root = device->dev_root; | ||
| 607 | struct btrfs_key key; | ||
| 608 | struct btrfs_key found_key; | ||
| 609 | struct extent_buffer *leaf = NULL; | ||
| 610 | struct btrfs_dev_extent *extent = NULL; | ||
| 611 | |||
| 612 | path = btrfs_alloc_path(); | ||
| 613 | if (!path) | ||
| 614 | return -ENOMEM; | ||
| 615 | |||
| 616 | key.objectid = device->devid; | ||
| 617 | key.offset = start; | ||
| 618 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
| 619 | |||
| 620 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 621 | if (ret > 0) { | ||
| 622 | ret = btrfs_previous_item(root, path, key.objectid, | ||
| 623 | BTRFS_DEV_EXTENT_KEY); | ||
| 624 | BUG_ON(ret); | ||
| 625 | leaf = path->nodes[0]; | ||
| 626 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 627 | extent = btrfs_item_ptr(leaf, path->slots[0], | ||
| 628 | struct btrfs_dev_extent); | ||
| 629 | BUG_ON(found_key.offset > start || found_key.offset + | ||
| 630 | btrfs_dev_extent_length(leaf, extent) < start); | ||
| 631 | ret = 0; | ||
| 632 | } else if (ret == 0) { | ||
| 633 | leaf = path->nodes[0]; | ||
| 634 | extent = btrfs_item_ptr(leaf, path->slots[0], | ||
| 635 | struct btrfs_dev_extent); | ||
| 636 | } | ||
| 637 | BUG_ON(ret); | ||
| 638 | |||
| 639 | if (device->bytes_used > 0) | ||
| 640 | device->bytes_used -= btrfs_dev_extent_length(leaf, extent); | ||
| 641 | ret = btrfs_del_item(trans, root, path); | ||
| 642 | BUG_ON(ret); | ||
| 643 | |||
| 644 | btrfs_free_path(path); | ||
| 645 | return ret; | ||
| 646 | } | ||
| 647 | |||
| 648 | int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | ||
| 649 | struct btrfs_device *device, | ||
| 650 | u64 chunk_tree, u64 chunk_objectid, | ||
| 651 | u64 chunk_offset, | ||
| 652 | u64 num_bytes, u64 *start) | ||
| 653 | { | ||
| 654 | int ret; | ||
| 655 | struct btrfs_path *path; | ||
| 656 | struct btrfs_root *root = device->dev_root; | ||
| 657 | struct btrfs_dev_extent *extent; | ||
| 658 | struct extent_buffer *leaf; | ||
| 659 | struct btrfs_key key; | ||
| 660 | |||
| 661 | WARN_ON(!device->in_fs_metadata); | ||
| 662 | path = btrfs_alloc_path(); | ||
| 663 | if (!path) | ||
| 664 | return -ENOMEM; | ||
| 665 | |||
| 666 | ret = find_free_dev_extent(trans, device, path, num_bytes, start); | ||
| 667 | if (ret) { | ||
| 668 | goto err; | ||
| 669 | } | ||
| 670 | |||
| 671 | key.objectid = device->devid; | ||
| 672 | key.offset = *start; | ||
| 673 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
| 674 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 675 | sizeof(*extent)); | ||
| 676 | BUG_ON(ret); | ||
| 677 | |||
| 678 | leaf = path->nodes[0]; | ||
| 679 | extent = btrfs_item_ptr(leaf, path->slots[0], | ||
| 680 | struct btrfs_dev_extent); | ||
| 681 | btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree); | ||
| 682 | btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid); | ||
| 683 | btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); | ||
| 684 | |||
| 685 | write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, | ||
| 686 | (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), | ||
| 687 | BTRFS_UUID_SIZE); | ||
| 688 | |||
| 689 | btrfs_set_dev_extent_length(leaf, extent, num_bytes); | ||
| 690 | btrfs_mark_buffer_dirty(leaf); | ||
| 691 | err: | ||
| 692 | btrfs_free_path(path); | ||
| 693 | return ret; | ||
| 694 | } | ||
| 695 | |||
| 696 | static noinline int find_next_chunk(struct btrfs_root *root, | ||
| 697 | u64 objectid, u64 *offset) | ||
| 698 | { | ||
| 699 | struct btrfs_path *path; | ||
| 700 | int ret; | ||
| 701 | struct btrfs_key key; | ||
| 702 | struct btrfs_chunk *chunk; | ||
| 703 | struct btrfs_key found_key; | ||
| 704 | |||
| 705 | path = btrfs_alloc_path(); | ||
| 706 | BUG_ON(!path); | ||
| 707 | |||
| 708 | key.objectid = objectid; | ||
| 709 | key.offset = (u64)-1; | ||
| 710 | key.type = BTRFS_CHUNK_ITEM_KEY; | ||
| 711 | |||
| 712 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 713 | if (ret < 0) | ||
| 714 | goto error; | ||
| 715 | |||
| 716 | BUG_ON(ret == 0); | ||
| 717 | |||
| 718 | ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); | ||
| 719 | if (ret) { | ||
| 720 | *offset = 0; | ||
| 721 | } else { | ||
| 722 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
| 723 | path->slots[0]); | ||
| 724 | if (found_key.objectid != objectid) | ||
| 725 | *offset = 0; | ||
| 726 | else { | ||
| 727 | chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 728 | struct btrfs_chunk); | ||
| 729 | *offset = found_key.offset + | ||
| 730 | btrfs_chunk_length(path->nodes[0], chunk); | ||
| 731 | } | ||
| 732 | } | ||
| 733 | ret = 0; | ||
| 734 | error: | ||
| 735 | btrfs_free_path(path); | ||
| 736 | return ret; | ||
| 737 | } | ||
| 738 | |||
| 739 | static noinline int find_next_devid(struct btrfs_root *root, | ||
| 740 | struct btrfs_path *path, u64 *objectid) | ||
| 741 | { | ||
| 742 | int ret; | ||
| 743 | struct btrfs_key key; | ||
| 744 | struct btrfs_key found_key; | ||
| 745 | |||
| 746 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | ||
| 747 | key.type = BTRFS_DEV_ITEM_KEY; | ||
| 748 | key.offset = (u64)-1; | ||
| 749 | |||
| 750 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 751 | if (ret < 0) | ||
| 752 | goto error; | ||
| 753 | |||
| 754 | BUG_ON(ret == 0); | ||
| 755 | |||
| 756 | ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, | ||
| 757 | BTRFS_DEV_ITEM_KEY); | ||
| 758 | if (ret) { | ||
| 759 | *objectid = 1; | ||
| 760 | } else { | ||
| 761 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
| 762 | path->slots[0]); | ||
| 763 | *objectid = found_key.offset + 1; | ||
| 764 | } | ||
| 765 | ret = 0; | ||
| 766 | error: | ||
| 767 | btrfs_release_path(root, path); | ||
| 768 | return ret; | ||
| 769 | } | ||
| 770 | |||
| 771 | /* | ||
| 772 | * the device information is stored in the chunk root | ||
| 773 | * the btrfs_device struct should be fully filled in | ||
| 774 | */ | ||
| 775 | int btrfs_add_device(struct btrfs_trans_handle *trans, | ||
| 776 | struct btrfs_root *root, | ||
| 777 | struct btrfs_device *device) | ||
| 778 | { | ||
| 779 | int ret; | ||
| 780 | struct btrfs_path *path; | ||
| 781 | struct btrfs_dev_item *dev_item; | ||
| 782 | struct extent_buffer *leaf; | ||
| 783 | struct btrfs_key key; | ||
| 784 | unsigned long ptr; | ||
| 785 | u64 free_devid = 0; | ||
| 786 | |||
| 787 | root = root->fs_info->chunk_root; | ||
| 788 | |||
| 789 | path = btrfs_alloc_path(); | ||
| 790 | if (!path) | ||
| 791 | return -ENOMEM; | ||
| 792 | |||
| 793 | ret = find_next_devid(root, path, &free_devid); | ||
| 794 | if (ret) | ||
| 795 | goto out; | ||
| 796 | |||
| 797 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | ||
| 798 | key.type = BTRFS_DEV_ITEM_KEY; | ||
| 799 | key.offset = free_devid; | ||
| 800 | |||
| 801 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 802 | sizeof(*dev_item)); | ||
| 803 | if (ret) | ||
| 804 | goto out; | ||
| 805 | |||
| 806 | leaf = path->nodes[0]; | ||
| 807 | dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); | ||
| 808 | |||
| 809 | device->devid = free_devid; | ||
| 810 | btrfs_set_device_id(leaf, dev_item, device->devid); | ||
| 811 | btrfs_set_device_type(leaf, dev_item, device->type); | ||
| 812 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | ||
| 813 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | ||
| 814 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | ||
| 815 | btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); | ||
| 816 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | ||
| 817 | btrfs_set_device_group(leaf, dev_item, 0); | ||
| 818 | btrfs_set_device_seek_speed(leaf, dev_item, 0); | ||
| 819 | btrfs_set_device_bandwidth(leaf, dev_item, 0); | ||
| 820 | |||
| 821 | ptr = (unsigned long)btrfs_device_uuid(dev_item); | ||
| 822 | write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); | ||
| 823 | btrfs_mark_buffer_dirty(leaf); | ||
| 824 | ret = 0; | ||
| 825 | |||
| 826 | out: | ||
| 827 | btrfs_free_path(path); | ||
| 828 | return ret; | ||
| 829 | } | ||
| 830 | |||
| 831 | static int btrfs_rm_dev_item(struct btrfs_root *root, | ||
| 832 | struct btrfs_device *device) | ||
| 833 | { | ||
| 834 | int ret; | ||
| 835 | struct btrfs_path *path; | ||
| 836 | struct block_device *bdev = device->bdev; | ||
| 837 | struct btrfs_device *next_dev; | ||
| 838 | struct btrfs_key key; | ||
| 839 | u64 total_bytes; | ||
| 840 | struct btrfs_fs_devices *fs_devices; | ||
| 841 | struct btrfs_trans_handle *trans; | ||
| 842 | |||
| 843 | root = root->fs_info->chunk_root; | ||
| 844 | |||
| 845 | path = btrfs_alloc_path(); | ||
| 846 | if (!path) | ||
| 847 | return -ENOMEM; | ||
| 848 | |||
| 849 | trans = btrfs_start_transaction(root, 1); | ||
| 850 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | ||
| 851 | key.type = BTRFS_DEV_ITEM_KEY; | ||
| 852 | key.offset = device->devid; | ||
| 853 | lock_chunks(root); | ||
| 854 | |||
| 855 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 856 | if (ret < 0) | ||
| 857 | goto out; | ||
| 858 | |||
| 859 | if (ret > 0) { | ||
| 860 | ret = -ENOENT; | ||
| 861 | goto out; | ||
| 862 | } | ||
| 863 | |||
| 864 | ret = btrfs_del_item(trans, root, path); | ||
| 865 | if (ret) | ||
| 866 | goto out; | ||
| 867 | |||
| 868 | /* | ||
| 869 | * at this point, the device is zero sized. We want to | ||
| 870 | * remove it from the devices list and zero out the old super | ||
| 871 | */ | ||
| 872 | list_del_init(&device->dev_list); | ||
| 873 | list_del_init(&device->dev_alloc_list); | ||
| 874 | fs_devices = root->fs_info->fs_devices; | ||
| 875 | |||
| 876 | next_dev = list_entry(fs_devices->devices.next, struct btrfs_device, | ||
| 877 | dev_list); | ||
| 878 | if (bdev == root->fs_info->sb->s_bdev) | ||
| 879 | root->fs_info->sb->s_bdev = next_dev->bdev; | ||
| 880 | if (bdev == fs_devices->latest_bdev) | ||
| 881 | fs_devices->latest_bdev = next_dev->bdev; | ||
| 882 | |||
| 883 | total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); | ||
| 884 | btrfs_set_super_num_devices(&root->fs_info->super_copy, | ||
| 885 | total_bytes - 1); | ||
| 886 | out: | ||
| 887 | btrfs_free_path(path); | ||
| 888 | unlock_chunks(root); | ||
| 889 | btrfs_commit_transaction(trans, root); | ||
| 890 | return ret; | ||
| 891 | } | ||
| 892 | |||
| 893 | int btrfs_rm_device(struct btrfs_root *root, char *device_path) | ||
| 894 | { | ||
| 895 | struct btrfs_device *device; | ||
| 896 | struct block_device *bdev; | ||
| 897 | struct buffer_head *bh = NULL; | ||
| 898 | struct btrfs_super_block *disk_super; | ||
| 899 | u64 all_avail; | ||
| 900 | u64 devid; | ||
| 901 | int ret = 0; | ||
| 902 | |||
| 903 | mutex_lock(&uuid_mutex); | ||
| 904 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 905 | |||
| 906 | all_avail = root->fs_info->avail_data_alloc_bits | | ||
| 907 | root->fs_info->avail_system_alloc_bits | | ||
| 908 | root->fs_info->avail_metadata_alloc_bits; | ||
| 909 | |||
| 910 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && | ||
| 911 | btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) { | ||
| 912 | printk("btrfs: unable to go below four devices on raid10\n"); | ||
| 913 | ret = -EINVAL; | ||
| 914 | goto out; | ||
| 915 | } | ||
| 916 | |||
| 917 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && | ||
| 918 | btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) { | ||
| 919 | printk("btrfs: unable to go below two devices on raid1\n"); | ||
| 920 | ret = -EINVAL; | ||
| 921 | goto out; | ||
| 922 | } | ||
| 923 | |||
| 924 | if (strcmp(device_path, "missing") == 0) { | ||
| 925 | struct list_head *cur; | ||
| 926 | struct list_head *devices; | ||
| 927 | struct btrfs_device *tmp; | ||
| 928 | |||
| 929 | device = NULL; | ||
| 930 | devices = &root->fs_info->fs_devices->devices; | ||
| 931 | list_for_each(cur, devices) { | ||
| 932 | tmp = list_entry(cur, struct btrfs_device, dev_list); | ||
| 933 | if (tmp->in_fs_metadata && !tmp->bdev) { | ||
| 934 | device = tmp; | ||
| 935 | break; | ||
| 936 | } | ||
| 937 | } | ||
| 938 | bdev = NULL; | ||
| 939 | bh = NULL; | ||
| 940 | disk_super = NULL; | ||
| 941 | if (!device) { | ||
| 942 | printk("btrfs: no missing devices found to remove\n"); | ||
| 943 | goto out; | ||
| 944 | } | ||
| 945 | |||
| 946 | } else { | ||
| 947 | bdev = open_bdev_excl(device_path, 0, | ||
| 948 | root->fs_info->bdev_holder); | ||
| 949 | if (IS_ERR(bdev)) { | ||
| 950 | ret = PTR_ERR(bdev); | ||
| 951 | goto out; | ||
| 952 | } | ||
| 953 | |||
| 954 | bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); | ||
| 955 | if (!bh) { | ||
| 956 | ret = -EIO; | ||
| 957 | goto error_close; | ||
| 958 | } | ||
| 959 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
| 960 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, | ||
| 961 | sizeof(disk_super->magic))) { | ||
| 962 | ret = -ENOENT; | ||
| 963 | goto error_brelse; | ||
| 964 | } | ||
| 965 | if (memcmp(disk_super->fsid, root->fs_info->fsid, | ||
| 966 | BTRFS_FSID_SIZE)) { | ||
| 967 | ret = -ENOENT; | ||
| 968 | goto error_brelse; | ||
| 969 | } | ||
| 970 | devid = le64_to_cpu(disk_super->dev_item.devid); | ||
| 971 | device = btrfs_find_device(root, devid, NULL); | ||
| 972 | if (!device) { | ||
| 973 | ret = -ENOENT; | ||
| 974 | goto error_brelse; | ||
| 975 | } | ||
| 976 | |||
| 977 | } | ||
| 978 | root->fs_info->fs_devices->num_devices--; | ||
| 979 | root->fs_info->fs_devices->open_devices--; | ||
| 980 | |||
| 981 | ret = btrfs_shrink_device(device, 0); | ||
| 982 | if (ret) | ||
| 983 | goto error_brelse; | ||
| 984 | |||
| 985 | |||
| 986 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); | ||
| 987 | if (ret) | ||
| 988 | goto error_brelse; | ||
| 989 | |||
| 990 | if (bh) { | ||
| 991 | /* make sure this device isn't detected as part of | ||
| 992 | * the FS anymore | ||
| 993 | */ | ||
| 994 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); | ||
| 995 | set_buffer_dirty(bh); | ||
| 996 | sync_dirty_buffer(bh); | ||
| 997 | |||
| 998 | brelse(bh); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | if (device->bdev) { | ||
| 1002 | /* one close for the device struct or super_block */ | ||
| 1003 | close_bdev_excl(device->bdev); | ||
| 1004 | } | ||
| 1005 | if (bdev) { | ||
| 1006 | /* one close for us */ | ||
| 1007 | close_bdev_excl(bdev); | ||
| 1008 | } | ||
| 1009 | kfree(device->name); | ||
| 1010 | kfree(device); | ||
| 1011 | ret = 0; | ||
| 1012 | goto out; | ||
| 1013 | |||
| 1014 | error_brelse: | ||
| 1015 | brelse(bh); | ||
| 1016 | error_close: | ||
| 1017 | if (bdev) | ||
| 1018 | close_bdev_excl(bdev); | ||
| 1019 | out: | ||
| 1020 | mutex_unlock(&root->fs_info->volume_mutex); | ||
| 1021 | mutex_unlock(&uuid_mutex); | ||
| 1022 | return ret; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | ||
| 1026 | { | ||
| 1027 | struct btrfs_trans_handle *trans; | ||
| 1028 | struct btrfs_device *device; | ||
| 1029 | struct block_device *bdev; | ||
| 1030 | struct list_head *cur; | ||
| 1031 | struct list_head *devices; | ||
| 1032 | u64 total_bytes; | ||
| 1033 | int ret = 0; | ||
| 1034 | |||
| 1035 | |||
| 1036 | bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); | ||
| 1037 | if (!bdev) { | ||
| 1038 | return -EIO; | ||
| 1039 | } | ||
| 1040 | |||
| 1041 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
| 1042 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 1043 | |||
| 1044 | trans = btrfs_start_transaction(root, 1); | ||
| 1045 | lock_chunks(root); | ||
| 1046 | devices = &root->fs_info->fs_devices->devices; | ||
| 1047 | list_for_each(cur, devices) { | ||
| 1048 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 1049 | if (device->bdev == bdev) { | ||
| 1050 | ret = -EEXIST; | ||
| 1051 | goto out; | ||
| 1052 | } | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | device = kzalloc(sizeof(*device), GFP_NOFS); | ||
| 1056 | if (!device) { | ||
| 1057 | /* we can safely leave the fs_devices entry around */ | ||
| 1058 | ret = -ENOMEM; | ||
| 1059 | goto out_close_bdev; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | device->barriers = 1; | ||
| 1063 | device->work.func = pending_bios_fn; | ||
| 1064 | generate_random_uuid(device->uuid); | ||
| 1065 | spin_lock_init(&device->io_lock); | ||
| 1066 | device->name = kstrdup(device_path, GFP_NOFS); | ||
| 1067 | if (!device->name) { | ||
| 1068 | kfree(device); | ||
| 1069 | goto out_close_bdev; | ||
| 1070 | } | ||
| 1071 | device->io_width = root->sectorsize; | ||
| 1072 | device->io_align = root->sectorsize; | ||
| 1073 | device->sector_size = root->sectorsize; | ||
| 1074 | device->total_bytes = i_size_read(bdev->bd_inode); | ||
| 1075 | device->dev_root = root->fs_info->dev_root; | ||
| 1076 | device->bdev = bdev; | ||
| 1077 | device->in_fs_metadata = 1; | ||
| 1078 | |||
| 1079 | ret = btrfs_add_device(trans, root, device); | ||
| 1080 | if (ret) | ||
| 1081 | goto out_close_bdev; | ||
| 1082 | |||
| 1083 | set_blocksize(device->bdev, 4096); | ||
| 1084 | |||
| 1085 | total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
| 1086 | btrfs_set_super_total_bytes(&root->fs_info->super_copy, | ||
| 1087 | total_bytes + device->total_bytes); | ||
| 1088 | |||
| 1089 | total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); | ||
| 1090 | btrfs_set_super_num_devices(&root->fs_info->super_copy, | ||
| 1091 | total_bytes + 1); | ||
| 1092 | |||
| 1093 | list_add(&device->dev_list, &root->fs_info->fs_devices->devices); | ||
| 1094 | list_add(&device->dev_alloc_list, | ||
| 1095 | &root->fs_info->fs_devices->alloc_list); | ||
| 1096 | root->fs_info->fs_devices->num_devices++; | ||
| 1097 | root->fs_info->fs_devices->open_devices++; | ||
| 1098 | out: | ||
| 1099 | unlock_chunks(root); | ||
| 1100 | btrfs_end_transaction(trans, root); | ||
| 1101 | mutex_unlock(&root->fs_info->volume_mutex); | ||
| 1102 | |||
| 1103 | return ret; | ||
| 1104 | |||
| 1105 | out_close_bdev: | ||
| 1106 | close_bdev_excl(bdev); | ||
| 1107 | goto out; | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | int noinline btrfs_update_device(struct btrfs_trans_handle *trans, | ||
| 1111 | struct btrfs_device *device) | ||
| 1112 | { | ||
| 1113 | int ret; | ||
| 1114 | struct btrfs_path *path; | ||
| 1115 | struct btrfs_root *root; | ||
| 1116 | struct btrfs_dev_item *dev_item; | ||
| 1117 | struct extent_buffer *leaf; | ||
| 1118 | struct btrfs_key key; | ||
| 1119 | |||
| 1120 | root = device->dev_root->fs_info->chunk_root; | ||
| 1121 | |||
| 1122 | path = btrfs_alloc_path(); | ||
| 1123 | if (!path) | ||
| 1124 | return -ENOMEM; | ||
| 1125 | |||
| 1126 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | ||
| 1127 | key.type = BTRFS_DEV_ITEM_KEY; | ||
| 1128 | key.offset = device->devid; | ||
| 1129 | |||
| 1130 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
| 1131 | if (ret < 0) | ||
| 1132 | goto out; | ||
| 1133 | |||
| 1134 | if (ret > 0) { | ||
| 1135 | ret = -ENOENT; | ||
| 1136 | goto out; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | leaf = path->nodes[0]; | ||
| 1140 | dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); | ||
| 1141 | |||
| 1142 | btrfs_set_device_id(leaf, dev_item, device->devid); | ||
| 1143 | btrfs_set_device_type(leaf, dev_item, device->type); | ||
| 1144 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | ||
| 1145 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | ||
| 1146 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | ||
| 1147 | btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); | ||
| 1148 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | ||
| 1149 | btrfs_mark_buffer_dirty(leaf); | ||
| 1150 | |||
| 1151 | out: | ||
| 1152 | btrfs_free_path(path); | ||
| 1153 | return ret; | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | ||
| 1157 | struct btrfs_device *device, u64 new_size) | ||
| 1158 | { | ||
| 1159 | struct btrfs_super_block *super_copy = | ||
| 1160 | &device->dev_root->fs_info->super_copy; | ||
| 1161 | u64 old_total = btrfs_super_total_bytes(super_copy); | ||
| 1162 | u64 diff = new_size - device->total_bytes; | ||
| 1163 | |||
| 1164 | btrfs_set_super_total_bytes(super_copy, old_total + diff); | ||
| 1165 | return btrfs_update_device(trans, device); | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | ||
| 1169 | struct btrfs_device *device, u64 new_size) | ||
| 1170 | { | ||
| 1171 | int ret; | ||
| 1172 | lock_chunks(device->dev_root); | ||
| 1173 | ret = __btrfs_grow_device(trans, device, new_size); | ||
| 1174 | unlock_chunks(device->dev_root); | ||
| 1175 | return ret; | ||
| 1176 | } | ||
| 1177 | |||
| 1178 | static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | ||
| 1179 | struct btrfs_root *root, | ||
| 1180 | u64 chunk_tree, u64 chunk_objectid, | ||
| 1181 | u64 chunk_offset) | ||
| 1182 | { | ||
| 1183 | int ret; | ||
| 1184 | struct btrfs_path *path; | ||
| 1185 | struct btrfs_key key; | ||
| 1186 | |||
| 1187 | root = root->fs_info->chunk_root; | ||
| 1188 | path = btrfs_alloc_path(); | ||
| 1189 | if (!path) | ||
| 1190 | return -ENOMEM; | ||
| 1191 | |||
| 1192 | key.objectid = chunk_objectid; | ||
| 1193 | key.offset = chunk_offset; | ||
| 1194 | key.type = BTRFS_CHUNK_ITEM_KEY; | ||
| 1195 | |||
| 1196 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 1197 | BUG_ON(ret); | ||
| 1198 | |||
| 1199 | ret = btrfs_del_item(trans, root, path); | ||
| 1200 | BUG_ON(ret); | ||
| 1201 | |||
| 1202 | btrfs_free_path(path); | ||
| 1203 | return 0; | ||
| 1204 | } | ||
| 1205 | |||
| 1206 | int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | ||
| 1207 | chunk_offset) | ||
| 1208 | { | ||
| 1209 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | ||
| 1210 | struct btrfs_disk_key *disk_key; | ||
| 1211 | struct btrfs_chunk *chunk; | ||
| 1212 | u8 *ptr; | ||
| 1213 | int ret = 0; | ||
| 1214 | u32 num_stripes; | ||
| 1215 | u32 array_size; | ||
| 1216 | u32 len = 0; | ||
| 1217 | u32 cur; | ||
| 1218 | struct btrfs_key key; | ||
| 1219 | |||
| 1220 | array_size = btrfs_super_sys_array_size(super_copy); | ||
| 1221 | |||
| 1222 | ptr = super_copy->sys_chunk_array; | ||
| 1223 | cur = 0; | ||
| 1224 | |||
| 1225 | while (cur < array_size) { | ||
| 1226 | disk_key = (struct btrfs_disk_key *)ptr; | ||
| 1227 | btrfs_disk_key_to_cpu(&key, disk_key); | ||
| 1228 | |||
| 1229 | len = sizeof(*disk_key); | ||
| 1230 | |||
| 1231 | if (key.type == BTRFS_CHUNK_ITEM_KEY) { | ||
| 1232 | chunk = (struct btrfs_chunk *)(ptr + len); | ||
| 1233 | num_stripes = btrfs_stack_chunk_num_stripes(chunk); | ||
| 1234 | len += btrfs_chunk_item_size(num_stripes); | ||
| 1235 | } else { | ||
| 1236 | ret = -EIO; | ||
| 1237 | break; | ||
| 1238 | } | ||
| 1239 | if (key.objectid == chunk_objectid && | ||
| 1240 | key.offset == chunk_offset) { | ||
| 1241 | memmove(ptr, ptr + len, array_size - (cur + len)); | ||
| 1242 | array_size -= len; | ||
| 1243 | btrfs_set_super_sys_array_size(super_copy, array_size); | ||
| 1244 | } else { | ||
| 1245 | ptr += len; | ||
| 1246 | cur += len; | ||
| 1247 | } | ||
| 1248 | } | ||
| 1249 | return ret; | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | |||
| 1253 | int btrfs_relocate_chunk(struct btrfs_root *root, | ||
| 1254 | u64 chunk_tree, u64 chunk_objectid, | ||
| 1255 | u64 chunk_offset) | ||
| 1256 | { | ||
| 1257 | struct extent_map_tree *em_tree; | ||
| 1258 | struct btrfs_root *extent_root; | ||
| 1259 | struct btrfs_trans_handle *trans; | ||
| 1260 | struct extent_map *em; | ||
| 1261 | struct map_lookup *map; | ||
| 1262 | int ret; | ||
| 1263 | int i; | ||
| 1264 | |||
| 1265 | printk("btrfs relocating chunk %llu\n", | ||
| 1266 | (unsigned long long)chunk_offset); | ||
| 1267 | root = root->fs_info->chunk_root; | ||
| 1268 | extent_root = root->fs_info->extent_root; | ||
| 1269 | em_tree = &root->fs_info->mapping_tree.map_tree; | ||
| 1270 | |||
| 1271 | /* step one, relocate all the extents inside this chunk */ | ||
| 1272 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | ||
| 1273 | BUG_ON(ret); | ||
| 1274 | |||
| 1275 | trans = btrfs_start_transaction(root, 1); | ||
| 1276 | BUG_ON(!trans); | ||
| 1277 | |||
| 1278 | lock_chunks(root); | ||
| 1279 | |||
| 1280 | /* | ||
| 1281 | * step two, delete the device extents and the | ||
| 1282 | * chunk tree entries | ||
| 1283 | */ | ||
| 1284 | spin_lock(&em_tree->lock); | ||
| 1285 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | ||
| 1286 | spin_unlock(&em_tree->lock); | ||
| 1287 | |||
| 1288 | BUG_ON(em->start > chunk_offset || | ||
| 1289 | em->start + em->len < chunk_offset); | ||
| 1290 | map = (struct map_lookup *)em->bdev; | ||
| 1291 | |||
| 1292 | for (i = 0; i < map->num_stripes; i++) { | ||
| 1293 | ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, | ||
| 1294 | map->stripes[i].physical); | ||
| 1295 | BUG_ON(ret); | ||
| 1296 | |||
| 1297 | if (map->stripes[i].dev) { | ||
| 1298 | ret = btrfs_update_device(trans, map->stripes[i].dev); | ||
| 1299 | BUG_ON(ret); | ||
| 1300 | } | ||
| 1301 | } | ||
| 1302 | ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, | ||
| 1303 | chunk_offset); | ||
| 1304 | |||
| 1305 | BUG_ON(ret); | ||
| 1306 | |||
| 1307 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | ||
| 1308 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | ||
| 1309 | BUG_ON(ret); | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | ||
| 1313 | BUG_ON(ret); | ||
| 1314 | |||
| 1315 | spin_lock(&em_tree->lock); | ||
| 1316 | remove_extent_mapping(em_tree, em); | ||
| 1317 | spin_unlock(&em_tree->lock); | ||
| 1318 | |||
| 1319 | kfree(map); | ||
| 1320 | em->bdev = NULL; | ||
| 1321 | |||
| 1322 | /* once for the tree */ | ||
| 1323 | free_extent_map(em); | ||
| 1324 | /* once for us */ | ||
| 1325 | free_extent_map(em); | ||
| 1326 | |||
| 1327 | unlock_chunks(root); | ||
| 1328 | btrfs_end_transaction(trans, root); | ||
| 1329 | return 0; | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | static u64 div_factor(u64 num, int factor) | ||
| 1333 | { | ||
| 1334 | if (factor == 10) | ||
| 1335 | return num; | ||
| 1336 | num *= factor; | ||
| 1337 | do_div(num, 10); | ||
| 1338 | return num; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | |||
| 1342 | int btrfs_balance(struct btrfs_root *dev_root) | ||
| 1343 | { | ||
| 1344 | int ret; | ||
| 1345 | struct list_head *cur; | ||
| 1346 | struct list_head *devices = &dev_root->fs_info->fs_devices->devices; | ||
| 1347 | struct btrfs_device *device; | ||
| 1348 | u64 old_size; | ||
| 1349 | u64 size_to_free; | ||
| 1350 | struct btrfs_path *path; | ||
| 1351 | struct btrfs_key key; | ||
| 1352 | struct btrfs_chunk *chunk; | ||
| 1353 | struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; | ||
| 1354 | struct btrfs_trans_handle *trans; | ||
| 1355 | struct btrfs_key found_key; | ||
| 1356 | |||
| 1357 | |||
| 1358 | mutex_lock(&dev_root->fs_info->volume_mutex); | ||
| 1359 | dev_root = dev_root->fs_info->dev_root; | ||
| 1360 | |||
| 1361 | /* step one make some room on all the devices */ | ||
| 1362 | list_for_each(cur, devices) { | ||
| 1363 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 1364 | old_size = device->total_bytes; | ||
| 1365 | size_to_free = div_factor(old_size, 1); | ||
| 1366 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); | ||
| 1367 | if (device->total_bytes - device->bytes_used > size_to_free) | ||
| 1368 | continue; | ||
| 1369 | |||
| 1370 | ret = btrfs_shrink_device(device, old_size - size_to_free); | ||
| 1371 | BUG_ON(ret); | ||
| 1372 | |||
| 1373 | trans = btrfs_start_transaction(dev_root, 1); | ||
| 1374 | BUG_ON(!trans); | ||
| 1375 | |||
| 1376 | ret = btrfs_grow_device(trans, device, old_size); | ||
| 1377 | BUG_ON(ret); | ||
| 1378 | |||
| 1379 | btrfs_end_transaction(trans, dev_root); | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | /* step two, relocate all the chunks */ | ||
| 1383 | path = btrfs_alloc_path(); | ||
| 1384 | BUG_ON(!path); | ||
| 1385 | |||
| 1386 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | ||
| 1387 | key.offset = (u64)-1; | ||
| 1388 | key.type = BTRFS_CHUNK_ITEM_KEY; | ||
| 1389 | |||
| 1390 | while(1) { | ||
| 1391 | ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); | ||
| 1392 | if (ret < 0) | ||
| 1393 | goto error; | ||
| 1394 | |||
| 1395 | /* | ||
| 1396 | * this shouldn't happen, it means the last relocate | ||
| 1397 | * failed | ||
| 1398 | */ | ||
| 1399 | if (ret == 0) | ||
| 1400 | break; | ||
| 1401 | |||
| 1402 | ret = btrfs_previous_item(chunk_root, path, 0, | ||
| 1403 | BTRFS_CHUNK_ITEM_KEY); | ||
| 1404 | if (ret) | ||
| 1405 | break; | ||
| 1406 | |||
| 1407 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
| 1408 | path->slots[0]); | ||
| 1409 | if (found_key.objectid != key.objectid) | ||
| 1410 | break; | ||
| 1411 | |||
| 1412 | chunk = btrfs_item_ptr(path->nodes[0], | ||
| 1413 | path->slots[0], | ||
| 1414 | struct btrfs_chunk); | ||
| 1415 | key.offset = found_key.offset; | ||
| 1416 | /* chunk zero is special */ | ||
| 1417 | if (key.offset == 0) | ||
| 1418 | break; | ||
| 1419 | |||
| 1420 | btrfs_release_path(chunk_root, path); | ||
| 1421 | ret = btrfs_relocate_chunk(chunk_root, | ||
| 1422 | chunk_root->root_key.objectid, | ||
| 1423 | found_key.objectid, | ||
| 1424 | found_key.offset); | ||
| 1425 | BUG_ON(ret); | ||
| 1426 | } | ||
| 1427 | ret = 0; | ||
| 1428 | error: | ||
| 1429 | btrfs_free_path(path); | ||
| 1430 | mutex_unlock(&dev_root->fs_info->volume_mutex); | ||
| 1431 | return ret; | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | /* | ||
| 1435 | * shrinking a device means finding all of the device extents past | ||
| 1436 | * the new size, and then following the back refs to the chunks. | ||
| 1437 | * The chunk relocation code actually frees the device extent | ||
| 1438 | */ | ||
| 1439 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | ||
| 1440 | { | ||
| 1441 | struct btrfs_trans_handle *trans; | ||
| 1442 | struct btrfs_root *root = device->dev_root; | ||
| 1443 | struct btrfs_dev_extent *dev_extent = NULL; | ||
| 1444 | struct btrfs_path *path; | ||
| 1445 | u64 length; | ||
| 1446 | u64 chunk_tree; | ||
| 1447 | u64 chunk_objectid; | ||
| 1448 | u64 chunk_offset; | ||
| 1449 | int ret; | ||
| 1450 | int slot; | ||
| 1451 | struct extent_buffer *l; | ||
| 1452 | struct btrfs_key key; | ||
| 1453 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | ||
| 1454 | u64 old_total = btrfs_super_total_bytes(super_copy); | ||
| 1455 | u64 diff = device->total_bytes - new_size; | ||
| 1456 | |||
| 1457 | |||
| 1458 | path = btrfs_alloc_path(); | ||
| 1459 | if (!path) | ||
| 1460 | return -ENOMEM; | ||
| 1461 | |||
| 1462 | trans = btrfs_start_transaction(root, 1); | ||
| 1463 | if (!trans) { | ||
| 1464 | ret = -ENOMEM; | ||
| 1465 | goto done; | ||
| 1466 | } | ||
| 1467 | |||
| 1468 | path->reada = 2; | ||
| 1469 | |||
| 1470 | lock_chunks(root); | ||
| 1471 | |||
| 1472 | device->total_bytes = new_size; | ||
| 1473 | ret = btrfs_update_device(trans, device); | ||
| 1474 | if (ret) { | ||
| 1475 | unlock_chunks(root); | ||
| 1476 | btrfs_end_transaction(trans, root); | ||
| 1477 | goto done; | ||
| 1478 | } | ||
| 1479 | WARN_ON(diff > old_total); | ||
| 1480 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | ||
| 1481 | unlock_chunks(root); | ||
| 1482 | btrfs_end_transaction(trans, root); | ||
| 1483 | |||
| 1484 | key.objectid = device->devid; | ||
| 1485 | key.offset = (u64)-1; | ||
| 1486 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
| 1487 | |||
| 1488 | while (1) { | ||
| 1489 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 1490 | if (ret < 0) | ||
| 1491 | goto done; | ||
| 1492 | |||
| 1493 | ret = btrfs_previous_item(root, path, 0, key.type); | ||
| 1494 | if (ret < 0) | ||
| 1495 | goto done; | ||
| 1496 | if (ret) { | ||
| 1497 | ret = 0; | ||
| 1498 | goto done; | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | l = path->nodes[0]; | ||
| 1502 | slot = path->slots[0]; | ||
| 1503 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
| 1504 | |||
| 1505 | if (key.objectid != device->devid) | ||
| 1506 | goto done; | ||
| 1507 | |||
| 1508 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
| 1509 | length = btrfs_dev_extent_length(l, dev_extent); | ||
| 1510 | |||
| 1511 | if (key.offset + length <= new_size) | ||
| 1512 | goto done; | ||
| 1513 | |||
| 1514 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | ||
| 1515 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | ||
| 1516 | chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); | ||
| 1517 | btrfs_release_path(root, path); | ||
| 1518 | |||
| 1519 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, | ||
| 1520 | chunk_offset); | ||
| 1521 | if (ret) | ||
| 1522 | goto done; | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | done: | ||
| 1526 | btrfs_free_path(path); | ||
| 1527 | return ret; | ||
| 1528 | } | ||
| 1529 | |||
| 1530 | int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, | ||
| 1531 | struct btrfs_root *root, | ||
| 1532 | struct btrfs_key *key, | ||
| 1533 | struct btrfs_chunk *chunk, int item_size) | ||
| 1534 | { | ||
| 1535 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | ||
| 1536 | struct btrfs_disk_key disk_key; | ||
| 1537 | u32 array_size; | ||
| 1538 | u8 *ptr; | ||
| 1539 | |||
| 1540 | array_size = btrfs_super_sys_array_size(super_copy); | ||
| 1541 | if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) | ||
| 1542 | return -EFBIG; | ||
| 1543 | |||
| 1544 | ptr = super_copy->sys_chunk_array + array_size; | ||
| 1545 | btrfs_cpu_key_to_disk(&disk_key, key); | ||
| 1546 | memcpy(ptr, &disk_key, sizeof(disk_key)); | ||
| 1547 | ptr += sizeof(disk_key); | ||
| 1548 | memcpy(ptr, chunk, item_size); | ||
| 1549 | item_size += sizeof(disk_key); | ||
| 1550 | btrfs_set_super_sys_array_size(super_copy, array_size + item_size); | ||
| 1551 | return 0; | ||
| 1552 | } | ||
| 1553 | |||
| 1554 | static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size, | ||
| 1555 | int num_stripes, int sub_stripes) | ||
| 1556 | { | ||
| 1557 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) | ||
| 1558 | return calc_size; | ||
| 1559 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
| 1560 | return calc_size * (num_stripes / sub_stripes); | ||
| 1561 | else | ||
| 1562 | return calc_size * num_stripes; | ||
| 1563 | } | ||
| 1564 | |||
| 1565 | |||
| 1566 | int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | ||
| 1567 | struct btrfs_root *extent_root, u64 *start, | ||
| 1568 | u64 *num_bytes, u64 type) | ||
| 1569 | { | ||
| 1570 | u64 dev_offset; | ||
| 1571 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
| 1572 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; | ||
| 1573 | struct btrfs_path *path; | ||
| 1574 | struct btrfs_stripe *stripes; | ||
| 1575 | struct btrfs_device *device = NULL; | ||
| 1576 | struct btrfs_chunk *chunk; | ||
| 1577 | struct list_head private_devs; | ||
| 1578 | struct list_head *dev_list; | ||
| 1579 | struct list_head *cur; | ||
| 1580 | struct extent_map_tree *em_tree; | ||
| 1581 | struct map_lookup *map; | ||
| 1582 | struct extent_map *em; | ||
| 1583 | int min_stripe_size = 1 * 1024 * 1024; | ||
| 1584 | u64 physical; | ||
| 1585 | u64 calc_size = 1024 * 1024 * 1024; | ||
| 1586 | u64 max_chunk_size = calc_size; | ||
| 1587 | u64 min_free; | ||
| 1588 | u64 avail; | ||
| 1589 | u64 max_avail = 0; | ||
| 1590 | u64 percent_max; | ||
| 1591 | int num_stripes = 1; | ||
| 1592 | int min_stripes = 1; | ||
| 1593 | int sub_stripes = 0; | ||
| 1594 | int looped = 0; | ||
| 1595 | int ret; | ||
| 1596 | int index; | ||
| 1597 | int stripe_len = 64 * 1024; | ||
| 1598 | struct btrfs_key key; | ||
| 1599 | |||
| 1600 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | ||
| 1601 | (type & BTRFS_BLOCK_GROUP_DUP)) { | ||
| 1602 | WARN_ON(1); | ||
| 1603 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
| 1604 | } | ||
| 1605 | dev_list = &extent_root->fs_info->fs_devices->alloc_list; | ||
| 1606 | if (list_empty(dev_list)) | ||
| 1607 | return -ENOSPC; | ||
| 1608 | |||
| 1609 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | ||
| 1610 | num_stripes = extent_root->fs_info->fs_devices->open_devices; | ||
| 1611 | min_stripes = 2; | ||
| 1612 | } | ||
| 1613 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | ||
| 1614 | num_stripes = 2; | ||
| 1615 | min_stripes = 2; | ||
| 1616 | } | ||
| 1617 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | ||
| 1618 | num_stripes = min_t(u64, 2, | ||
| 1619 | extent_root->fs_info->fs_devices->open_devices); | ||
| 1620 | if (num_stripes < 2) | ||
| 1621 | return -ENOSPC; | ||
| 1622 | min_stripes = 2; | ||
| 1623 | } | ||
| 1624 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 1625 | num_stripes = extent_root->fs_info->fs_devices->open_devices; | ||
| 1626 | if (num_stripes < 4) | ||
| 1627 | return -ENOSPC; | ||
| 1628 | num_stripes &= ~(u32)1; | ||
| 1629 | sub_stripes = 2; | ||
| 1630 | min_stripes = 4; | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | if (type & BTRFS_BLOCK_GROUP_DATA) { | ||
| 1634 | max_chunk_size = 10 * calc_size; | ||
| 1635 | min_stripe_size = 64 * 1024 * 1024; | ||
| 1636 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { | ||
| 1637 | max_chunk_size = 4 * calc_size; | ||
| 1638 | min_stripe_size = 32 * 1024 * 1024; | ||
| 1639 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | ||
| 1640 | calc_size = 8 * 1024 * 1024; | ||
| 1641 | max_chunk_size = calc_size * 2; | ||
| 1642 | min_stripe_size = 1 * 1024 * 1024; | ||
| 1643 | } | ||
| 1644 | |||
| 1645 | path = btrfs_alloc_path(); | ||
| 1646 | if (!path) | ||
| 1647 | return -ENOMEM; | ||
| 1648 | |||
| 1649 | /* we don't want a chunk larger than 10% of the FS */ | ||
| 1650 | percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); | ||
| 1651 | max_chunk_size = min(percent_max, max_chunk_size); | ||
| 1652 | |||
| 1653 | again: | ||
| 1654 | if (calc_size * num_stripes > max_chunk_size) { | ||
| 1655 | calc_size = max_chunk_size; | ||
| 1656 | do_div(calc_size, num_stripes); | ||
| 1657 | do_div(calc_size, stripe_len); | ||
| 1658 | calc_size *= stripe_len; | ||
| 1659 | } | ||
| 1660 | /* we don't want tiny stripes */ | ||
| 1661 | calc_size = max_t(u64, min_stripe_size, calc_size); | ||
| 1662 | |||
| 1663 | do_div(calc_size, stripe_len); | ||
| 1664 | calc_size *= stripe_len; | ||
| 1665 | |||
| 1666 | INIT_LIST_HEAD(&private_devs); | ||
| 1667 | cur = dev_list->next; | ||
| 1668 | index = 0; | ||
| 1669 | |||
| 1670 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
| 1671 | min_free = calc_size * 2; | ||
| 1672 | else | ||
| 1673 | min_free = calc_size; | ||
| 1674 | |||
| 1675 | /* | ||
| 1676 | * we add 1MB because we never use the first 1MB of the device, unless | ||
| 1677 | * we've looped, then we are likely allocating the maximum amount of | ||
| 1678 | * space left already | ||
| 1679 | */ | ||
| 1680 | if (!looped) | ||
| 1681 | min_free += 1024 * 1024; | ||
| 1682 | |||
| 1683 | /* build a private list of devices we will allocate from */ | ||
| 1684 | while(index < num_stripes) { | ||
| 1685 | device = list_entry(cur, struct btrfs_device, dev_alloc_list); | ||
| 1686 | |||
| 1687 | if (device->total_bytes > device->bytes_used) | ||
| 1688 | avail = device->total_bytes - device->bytes_used; | ||
| 1689 | else | ||
| 1690 | avail = 0; | ||
| 1691 | cur = cur->next; | ||
| 1692 | |||
| 1693 | if (device->in_fs_metadata && avail >= min_free) { | ||
| 1694 | u64 ignored_start = 0; | ||
| 1695 | ret = find_free_dev_extent(trans, device, path, | ||
| 1696 | min_free, | ||
| 1697 | &ignored_start); | ||
| 1698 | if (ret == 0) { | ||
| 1699 | list_move_tail(&device->dev_alloc_list, | ||
| 1700 | &private_devs); | ||
| 1701 | index++; | ||
| 1702 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
| 1703 | index++; | ||
| 1704 | } | ||
| 1705 | } else if (device->in_fs_metadata && avail > max_avail) | ||
| 1706 | max_avail = avail; | ||
| 1707 | if (cur == dev_list) | ||
| 1708 | break; | ||
| 1709 | } | ||
| 1710 | if (index < num_stripes) { | ||
| 1711 | list_splice(&private_devs, dev_list); | ||
| 1712 | if (index >= min_stripes) { | ||
| 1713 | num_stripes = index; | ||
| 1714 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 1715 | num_stripes /= sub_stripes; | ||
| 1716 | num_stripes *= sub_stripes; | ||
| 1717 | } | ||
| 1718 | looped = 1; | ||
| 1719 | goto again; | ||
| 1720 | } | ||
| 1721 | if (!looped && max_avail > 0) { | ||
| 1722 | looped = 1; | ||
| 1723 | calc_size = max_avail; | ||
| 1724 | goto again; | ||
| 1725 | } | ||
| 1726 | btrfs_free_path(path); | ||
| 1727 | return -ENOSPC; | ||
| 1728 | } | ||
| 1729 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | ||
| 1730 | key.type = BTRFS_CHUNK_ITEM_KEY; | ||
| 1731 | ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
| 1732 | &key.offset); | ||
| 1733 | if (ret) { | ||
| 1734 | btrfs_free_path(path); | ||
| 1735 | return ret; | ||
| 1736 | } | ||
| 1737 | |||
| 1738 | chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); | ||
| 1739 | if (!chunk) { | ||
| 1740 | btrfs_free_path(path); | ||
| 1741 | return -ENOMEM; | ||
| 1742 | } | ||
| 1743 | |||
| 1744 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
| 1745 | if (!map) { | ||
| 1746 | kfree(chunk); | ||
| 1747 | btrfs_free_path(path); | ||
| 1748 | return -ENOMEM; | ||
| 1749 | } | ||
| 1750 | btrfs_free_path(path); | ||
| 1751 | path = NULL; | ||
| 1752 | |||
| 1753 | stripes = &chunk->stripe; | ||
| 1754 | *num_bytes = chunk_bytes_by_type(type, calc_size, | ||
| 1755 | num_stripes, sub_stripes); | ||
| 1756 | |||
| 1757 | index = 0; | ||
| 1758 | while(index < num_stripes) { | ||
| 1759 | struct btrfs_stripe *stripe; | ||
| 1760 | BUG_ON(list_empty(&private_devs)); | ||
| 1761 | cur = private_devs.next; | ||
| 1762 | device = list_entry(cur, struct btrfs_device, dev_alloc_list); | ||
| 1763 | |||
| 1764 | /* loop over this device again if we're doing a dup group */ | ||
| 1765 | if (!(type & BTRFS_BLOCK_GROUP_DUP) || | ||
| 1766 | (index == num_stripes - 1)) | ||
| 1767 | list_move_tail(&device->dev_alloc_list, dev_list); | ||
| 1768 | |||
| 1769 | ret = btrfs_alloc_dev_extent(trans, device, | ||
| 1770 | info->chunk_root->root_key.objectid, | ||
| 1771 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset, | ||
| 1772 | calc_size, &dev_offset); | ||
| 1773 | BUG_ON(ret); | ||
| 1774 | device->bytes_used += calc_size; | ||
| 1775 | ret = btrfs_update_device(trans, device); | ||
| 1776 | BUG_ON(ret); | ||
| 1777 | |||
| 1778 | map->stripes[index].dev = device; | ||
| 1779 | map->stripes[index].physical = dev_offset; | ||
| 1780 | stripe = stripes + index; | ||
| 1781 | btrfs_set_stack_stripe_devid(stripe, device->devid); | ||
| 1782 | btrfs_set_stack_stripe_offset(stripe, dev_offset); | ||
| 1783 | memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); | ||
| 1784 | physical = dev_offset; | ||
| 1785 | index++; | ||
| 1786 | } | ||
| 1787 | BUG_ON(!list_empty(&private_devs)); | ||
| 1788 | |||
| 1789 | /* key was set above */ | ||
| 1790 | btrfs_set_stack_chunk_length(chunk, *num_bytes); | ||
| 1791 | btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); | ||
| 1792 | btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); | ||
| 1793 | btrfs_set_stack_chunk_type(chunk, type); | ||
| 1794 | btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); | ||
| 1795 | btrfs_set_stack_chunk_io_align(chunk, stripe_len); | ||
| 1796 | btrfs_set_stack_chunk_io_width(chunk, stripe_len); | ||
| 1797 | btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); | ||
| 1798 | btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes); | ||
| 1799 | map->sector_size = extent_root->sectorsize; | ||
| 1800 | map->stripe_len = stripe_len; | ||
| 1801 | map->io_align = stripe_len; | ||
| 1802 | map->io_width = stripe_len; | ||
| 1803 | map->type = type; | ||
| 1804 | map->num_stripes = num_stripes; | ||
| 1805 | map->sub_stripes = sub_stripes; | ||
| 1806 | |||
| 1807 | ret = btrfs_insert_item(trans, chunk_root, &key, chunk, | ||
| 1808 | btrfs_chunk_item_size(num_stripes)); | ||
| 1809 | BUG_ON(ret); | ||
| 1810 | *start = key.offset;; | ||
| 1811 | |||
| 1812 | em = alloc_extent_map(GFP_NOFS); | ||
| 1813 | if (!em) | ||
| 1814 | return -ENOMEM; | ||
| 1815 | em->bdev = (struct block_device *)map; | ||
| 1816 | em->start = key.offset; | ||
| 1817 | em->len = *num_bytes; | ||
| 1818 | em->block_start = 0; | ||
| 1819 | |||
| 1820 | if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | ||
| 1821 | ret = btrfs_add_system_chunk(trans, chunk_root, &key, | ||
| 1822 | chunk, btrfs_chunk_item_size(num_stripes)); | ||
| 1823 | BUG_ON(ret); | ||
| 1824 | } | ||
| 1825 | kfree(chunk); | ||
| 1826 | |||
| 1827 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | ||
| 1828 | spin_lock(&em_tree->lock); | ||
| 1829 | ret = add_extent_mapping(em_tree, em); | ||
| 1830 | spin_unlock(&em_tree->lock); | ||
| 1831 | BUG_ON(ret); | ||
| 1832 | free_extent_map(em); | ||
| 1833 | return ret; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | void btrfs_mapping_init(struct btrfs_mapping_tree *tree) | ||
| 1837 | { | ||
| 1838 | extent_map_tree_init(&tree->map_tree, GFP_NOFS); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | ||
| 1842 | { | ||
| 1843 | struct extent_map *em; | ||
| 1844 | |||
| 1845 | while(1) { | ||
| 1846 | spin_lock(&tree->map_tree.lock); | ||
| 1847 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); | ||
| 1848 | if (em) | ||
| 1849 | remove_extent_mapping(&tree->map_tree, em); | ||
| 1850 | spin_unlock(&tree->map_tree.lock); | ||
| 1851 | if (!em) | ||
| 1852 | break; | ||
| 1853 | kfree(em->bdev); | ||
| 1854 | /* once for us */ | ||
| 1855 | free_extent_map(em); | ||
| 1856 | /* once for the tree */ | ||
| 1857 | free_extent_map(em); | ||
| 1858 | } | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | ||
| 1862 | { | ||
| 1863 | struct extent_map *em; | ||
| 1864 | struct map_lookup *map; | ||
| 1865 | struct extent_map_tree *em_tree = &map_tree->map_tree; | ||
| 1866 | int ret; | ||
| 1867 | |||
| 1868 | spin_lock(&em_tree->lock); | ||
| 1869 | em = lookup_extent_mapping(em_tree, logical, len); | ||
| 1870 | spin_unlock(&em_tree->lock); | ||
| 1871 | BUG_ON(!em); | ||
| 1872 | |||
| 1873 | BUG_ON(em->start > logical || em->start + em->len < logical); | ||
| 1874 | map = (struct map_lookup *)em->bdev; | ||
| 1875 | if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) | ||
| 1876 | ret = map->num_stripes; | ||
| 1877 | else if (map->type & BTRFS_BLOCK_GROUP_RAID10) | ||
| 1878 | ret = map->sub_stripes; | ||
| 1879 | else | ||
| 1880 | ret = 1; | ||
| 1881 | free_extent_map(em); | ||
| 1882 | return ret; | ||
| 1883 | } | ||
| 1884 | |||
| 1885 | static int find_live_mirror(struct map_lookup *map, int first, int num, | ||
| 1886 | int optimal) | ||
| 1887 | { | ||
| 1888 | int i; | ||
| 1889 | if (map->stripes[optimal].dev->bdev) | ||
| 1890 | return optimal; | ||
| 1891 | for (i = first; i < first + num; i++) { | ||
| 1892 | if (map->stripes[i].dev->bdev) | ||
| 1893 | return i; | ||
| 1894 | } | ||
| 1895 | /* we couldn't find one that doesn't fail. Just return something | ||
| 1896 | * and the io error handling code will clean up eventually | ||
| 1897 | */ | ||
| 1898 | return optimal; | ||
| 1899 | } | ||
| 1900 | |||
| 1901 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | ||
| 1902 | u64 logical, u64 *length, | ||
| 1903 | struct btrfs_multi_bio **multi_ret, | ||
| 1904 | int mirror_num, struct page *unplug_page) | ||
| 1905 | { | ||
| 1906 | struct extent_map *em; | ||
| 1907 | struct map_lookup *map; | ||
| 1908 | struct extent_map_tree *em_tree = &map_tree->map_tree; | ||
| 1909 | u64 offset; | ||
| 1910 | u64 stripe_offset; | ||
| 1911 | u64 stripe_nr; | ||
| 1912 | int stripes_allocated = 8; | ||
| 1913 | int stripes_required = 1; | ||
| 1914 | int stripe_index; | ||
| 1915 | int i; | ||
| 1916 | int num_stripes; | ||
| 1917 | int max_errors = 0; | ||
| 1918 | struct btrfs_multi_bio *multi = NULL; | ||
| 1919 | |||
| 1920 | if (multi_ret && !(rw & (1 << BIO_RW))) { | ||
| 1921 | stripes_allocated = 1; | ||
| 1922 | } | ||
| 1923 | again: | ||
| 1924 | if (multi_ret) { | ||
| 1925 | multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), | ||
| 1926 | GFP_NOFS); | ||
| 1927 | if (!multi) | ||
| 1928 | return -ENOMEM; | ||
| 1929 | |||
| 1930 | atomic_set(&multi->error, 0); | ||
| 1931 | } | ||
| 1932 | |||
| 1933 | spin_lock(&em_tree->lock); | ||
| 1934 | em = lookup_extent_mapping(em_tree, logical, *length); | ||
| 1935 | spin_unlock(&em_tree->lock); | ||
| 1936 | |||
| 1937 | if (!em && unplug_page) | ||
| 1938 | return 0; | ||
| 1939 | |||
| 1940 | if (!em) { | ||
| 1941 | printk("unable to find logical %Lu len %Lu\n", logical, *length); | ||
| 1942 | BUG(); | ||
| 1943 | } | ||
| 1944 | |||
| 1945 | BUG_ON(em->start > logical || em->start + em->len < logical); | ||
| 1946 | map = (struct map_lookup *)em->bdev; | ||
| 1947 | offset = logical - em->start; | ||
| 1948 | |||
| 1949 | if (mirror_num > map->num_stripes) | ||
| 1950 | mirror_num = 0; | ||
| 1951 | |||
| 1952 | /* if our multi bio struct is too small, back off and try again */ | ||
| 1953 | if (rw & (1 << BIO_RW)) { | ||
| 1954 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 1955 | BTRFS_BLOCK_GROUP_DUP)) { | ||
| 1956 | stripes_required = map->num_stripes; | ||
| 1957 | max_errors = 1; | ||
| 1958 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
| 1959 | stripes_required = map->sub_stripes; | ||
| 1960 | max_errors = 1; | ||
| 1961 | } | ||
| 1962 | } | ||
| 1963 | if (multi_ret && rw == WRITE && | ||
| 1964 | stripes_allocated < stripes_required) { | ||
| 1965 | stripes_allocated = map->num_stripes; | ||
| 1966 | free_extent_map(em); | ||
| 1967 | kfree(multi); | ||
| 1968 | goto again; | ||
| 1969 | } | ||
| 1970 | stripe_nr = offset; | ||
| 1971 | /* | ||
| 1972 | * stripe_nr counts the total number of stripes we have to stride | ||
| 1973 | * to get to this block | ||
| 1974 | */ | ||
| 1975 | do_div(stripe_nr, map->stripe_len); | ||
| 1976 | |||
| 1977 | stripe_offset = stripe_nr * map->stripe_len; | ||
| 1978 | BUG_ON(offset < stripe_offset); | ||
| 1979 | |||
| 1980 | /* stripe_offset is the offset of this block in its stripe*/ | ||
| 1981 | stripe_offset = offset - stripe_offset; | ||
| 1982 | |||
| 1983 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 1984 | BTRFS_BLOCK_GROUP_RAID10 | | ||
| 1985 | BTRFS_BLOCK_GROUP_DUP)) { | ||
| 1986 | /* we limit the length of each bio to what fits in a stripe */ | ||
| 1987 | *length = min_t(u64, em->len - offset, | ||
| 1988 | map->stripe_len - stripe_offset); | ||
| 1989 | } else { | ||
| 1990 | *length = em->len - offset; | ||
| 1991 | } | ||
| 1992 | |||
| 1993 | if (!multi_ret && !unplug_page) | ||
| 1994 | goto out; | ||
| 1995 | |||
| 1996 | num_stripes = 1; | ||
| 1997 | stripe_index = 0; | ||
| 1998 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | ||
| 1999 | if (unplug_page || (rw & (1 << BIO_RW))) | ||
| 2000 | num_stripes = map->num_stripes; | ||
| 2001 | else if (mirror_num) | ||
| 2002 | stripe_index = mirror_num - 1; | ||
| 2003 | else { | ||
| 2004 | stripe_index = find_live_mirror(map, 0, | ||
| 2005 | map->num_stripes, | ||
| 2006 | current->pid % map->num_stripes); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | ||
| 2010 | if (rw & (1 << BIO_RW)) | ||
| 2011 | num_stripes = map->num_stripes; | ||
| 2012 | else if (mirror_num) | ||
| 2013 | stripe_index = mirror_num - 1; | ||
| 2014 | |||
| 2015 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
| 2016 | int factor = map->num_stripes / map->sub_stripes; | ||
| 2017 | |||
| 2018 | stripe_index = do_div(stripe_nr, factor); | ||
| 2019 | stripe_index *= map->sub_stripes; | ||
| 2020 | |||
| 2021 | if (unplug_page || (rw & (1 << BIO_RW))) | ||
| 2022 | num_stripes = map->sub_stripes; | ||
| 2023 | else if (mirror_num) | ||
| 2024 | stripe_index += mirror_num - 1; | ||
| 2025 | else { | ||
| 2026 | stripe_index = find_live_mirror(map, stripe_index, | ||
| 2027 | map->sub_stripes, stripe_index + | ||
| 2028 | current->pid % map->sub_stripes); | ||
| 2029 | } | ||
| 2030 | } else { | ||
| 2031 | /* | ||
| 2032 | * after this do_div call, stripe_nr is the number of stripes | ||
| 2033 | * on this device we have to walk to find the data, and | ||
| 2034 | * stripe_index is the number of our device in the stripe array | ||
| 2035 | */ | ||
| 2036 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
| 2037 | } | ||
| 2038 | BUG_ON(stripe_index >= map->num_stripes); | ||
| 2039 | |||
| 2040 | for (i = 0; i < num_stripes; i++) { | ||
| 2041 | if (unplug_page) { | ||
| 2042 | struct btrfs_device *device; | ||
| 2043 | struct backing_dev_info *bdi; | ||
| 2044 | |||
| 2045 | device = map->stripes[stripe_index].dev; | ||
| 2046 | if (device->bdev) { | ||
| 2047 | bdi = blk_get_backing_dev_info(device->bdev); | ||
| 2048 | if (bdi->unplug_io_fn) { | ||
| 2049 | bdi->unplug_io_fn(bdi, unplug_page); | ||
| 2050 | } | ||
| 2051 | } | ||
| 2052 | } else { | ||
| 2053 | multi->stripes[i].physical = | ||
| 2054 | map->stripes[stripe_index].physical + | ||
| 2055 | stripe_offset + stripe_nr * map->stripe_len; | ||
| 2056 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | ||
| 2057 | } | ||
| 2058 | stripe_index++; | ||
| 2059 | } | ||
| 2060 | if (multi_ret) { | ||
| 2061 | *multi_ret = multi; | ||
| 2062 | multi->num_stripes = num_stripes; | ||
| 2063 | multi->max_errors = max_errors; | ||
| 2064 | } | ||
| 2065 | out: | ||
| 2066 | free_extent_map(em); | ||
| 2067 | return 0; | ||
| 2068 | } | ||
| 2069 | |||
| 2070 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | ||
| 2071 | u64 logical, u64 *length, | ||
| 2072 | struct btrfs_multi_bio **multi_ret, int mirror_num) | ||
| 2073 | { | ||
| 2074 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, | ||
| 2075 | mirror_num, NULL); | ||
| 2076 | } | ||
| 2077 | |||
| 2078 | int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, | ||
| 2079 | u64 logical, struct page *page) | ||
| 2080 | { | ||
| 2081 | u64 length = PAGE_CACHE_SIZE; | ||
| 2082 | return __btrfs_map_block(map_tree, READ, logical, &length, | ||
| 2083 | NULL, 0, page); | ||
| 2084 | } | ||
| 2085 | |||
| 2086 | |||
| 2087 | static void end_bio_multi_stripe(struct bio *bio, int err) | ||
| 2088 | { | ||
| 2089 | struct btrfs_multi_bio *multi = bio->bi_private; | ||
| 2090 | int is_orig_bio = 0; | ||
| 2091 | |||
| 2092 | if (err) | ||
| 2093 | atomic_inc(&multi->error); | ||
| 2094 | |||
| 2095 | if (bio == multi->orig_bio) | ||
| 2096 | is_orig_bio = 1; | ||
| 2097 | |||
| 2098 | if (atomic_dec_and_test(&multi->stripes_pending)) { | ||
| 2099 | if (!is_orig_bio) { | ||
| 2100 | bio_put(bio); | ||
| 2101 | bio = multi->orig_bio; | ||
| 2102 | } | ||
| 2103 | bio->bi_private = multi->private; | ||
| 2104 | bio->bi_end_io = multi->end_io; | ||
| 2105 | /* only send an error to the higher layers if it is | ||
| 2106 | * beyond the tolerance of the multi-bio | ||
| 2107 | */ | ||
| 2108 | if (atomic_read(&multi->error) > multi->max_errors) { | ||
| 2109 | err = -EIO; | ||
| 2110 | } else if (err) { | ||
| 2111 | /* | ||
| 2112 | * this bio is actually up to date, we didn't | ||
| 2113 | * go over the max number of errors | ||
| 2114 | */ | ||
| 2115 | set_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 2116 | err = 0; | ||
| 2117 | } | ||
| 2118 | kfree(multi); | ||
| 2119 | |||
| 2120 | bio_endio(bio, err); | ||
| 2121 | } else if (!is_orig_bio) { | ||
| 2122 | bio_put(bio); | ||
| 2123 | } | ||
| 2124 | } | ||
| 2125 | |||
| 2126 | struct async_sched { | ||
| 2127 | struct bio *bio; | ||
| 2128 | int rw; | ||
| 2129 | struct btrfs_fs_info *info; | ||
| 2130 | struct btrfs_work work; | ||
| 2131 | }; | ||
| 2132 | |||
| 2133 | /* | ||
| 2134 | * see run_scheduled_bios for a description of why bios are collected for | ||
| 2135 | * async submit. | ||
| 2136 | * | ||
| 2137 | * This will add one bio to the pending list for a device and make sure | ||
| 2138 | * the work struct is scheduled. | ||
| 2139 | */ | ||
| 2140 | static int noinline schedule_bio(struct btrfs_root *root, | ||
| 2141 | struct btrfs_device *device, | ||
| 2142 | int rw, struct bio *bio) | ||
| 2143 | { | ||
| 2144 | int should_queue = 1; | ||
| 2145 | |||
| 2146 | /* don't bother with additional async steps for reads, right now */ | ||
| 2147 | if (!(rw & (1 << BIO_RW))) { | ||
| 2148 | bio_get(bio); | ||
| 2149 | submit_bio(rw, bio); | ||
| 2150 | bio_put(bio); | ||
| 2151 | return 0; | ||
| 2152 | } | ||
| 2153 | |||
| 2154 | /* | ||
| 2155 | * nr_async_bios allows us to reliably return congestion to the | ||
| 2156 | * higher layers. Otherwise, the async bio makes it appear we have | ||
| 2157 | * made progress against dirty pages when we've really just put it | ||
| 2158 | * on a queue for later | ||
| 2159 | */ | ||
| 2160 | atomic_inc(&root->fs_info->nr_async_bios); | ||
| 2161 | WARN_ON(bio->bi_next); | ||
| 2162 | bio->bi_next = NULL; | ||
| 2163 | bio->bi_rw |= rw; | ||
| 2164 | |||
| 2165 | spin_lock(&device->io_lock); | ||
| 2166 | |||
| 2167 | if (device->pending_bio_tail) | ||
| 2168 | device->pending_bio_tail->bi_next = bio; | ||
| 2169 | |||
| 2170 | device->pending_bio_tail = bio; | ||
| 2171 | if (!device->pending_bios) | ||
| 2172 | device->pending_bios = bio; | ||
| 2173 | if (device->running_pending) | ||
| 2174 | should_queue = 0; | ||
| 2175 | |||
| 2176 | spin_unlock(&device->io_lock); | ||
| 2177 | |||
| 2178 | if (should_queue) | ||
| 2179 | btrfs_queue_worker(&root->fs_info->submit_workers, | ||
| 2180 | &device->work); | ||
| 2181 | return 0; | ||
| 2182 | } | ||
| 2183 | |||
| 2184 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | ||
| 2185 | int mirror_num, int async_submit) | ||
| 2186 | { | ||
| 2187 | struct btrfs_mapping_tree *map_tree; | ||
| 2188 | struct btrfs_device *dev; | ||
| 2189 | struct bio *first_bio = bio; | ||
| 2190 | u64 logical = (u64)bio->bi_sector << 9; | ||
| 2191 | u64 length = 0; | ||
| 2192 | u64 map_length; | ||
| 2193 | struct btrfs_multi_bio *multi = NULL; | ||
| 2194 | int ret; | ||
| 2195 | int dev_nr = 0; | ||
| 2196 | int total_devs = 1; | ||
| 2197 | |||
| 2198 | length = bio->bi_size; | ||
| 2199 | map_tree = &root->fs_info->mapping_tree; | ||
| 2200 | map_length = length; | ||
| 2201 | |||
| 2202 | ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi, | ||
| 2203 | mirror_num); | ||
| 2204 | BUG_ON(ret); | ||
| 2205 | |||
| 2206 | total_devs = multi->num_stripes; | ||
| 2207 | if (map_length < length) { | ||
| 2208 | printk("mapping failed logical %Lu bio len %Lu " | ||
| 2209 | "len %Lu\n", logical, length, map_length); | ||
| 2210 | BUG(); | ||
| 2211 | } | ||
| 2212 | multi->end_io = first_bio->bi_end_io; | ||
| 2213 | multi->private = first_bio->bi_private; | ||
| 2214 | multi->orig_bio = first_bio; | ||
| 2215 | atomic_set(&multi->stripes_pending, multi->num_stripes); | ||
| 2216 | |||
| 2217 | while(dev_nr < total_devs) { | ||
| 2218 | if (total_devs > 1) { | ||
| 2219 | if (dev_nr < total_devs - 1) { | ||
| 2220 | bio = bio_clone(first_bio, GFP_NOFS); | ||
| 2221 | BUG_ON(!bio); | ||
| 2222 | } else { | ||
| 2223 | bio = first_bio; | ||
| 2224 | } | ||
| 2225 | bio->bi_private = multi; | ||
| 2226 | bio->bi_end_io = end_bio_multi_stripe; | ||
| 2227 | } | ||
| 2228 | bio->bi_sector = multi->stripes[dev_nr].physical >> 9; | ||
| 2229 | dev = multi->stripes[dev_nr].dev; | ||
| 2230 | if (dev && dev->bdev) { | ||
| 2231 | bio->bi_bdev = dev->bdev; | ||
| 2232 | if (async_submit) | ||
| 2233 | schedule_bio(root, dev, rw, bio); | ||
| 2234 | else | ||
| 2235 | submit_bio(rw, bio); | ||
| 2236 | } else { | ||
| 2237 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2238 | bio->bi_sector = logical >> 9; | ||
| 2239 | bio_endio(bio, -EIO); | ||
| 2240 | } | ||
| 2241 | dev_nr++; | ||
| 2242 | } | ||
| 2243 | if (total_devs == 1) | ||
| 2244 | kfree(multi); | ||
| 2245 | return 0; | ||
| 2246 | } | ||
| 2247 | |||
| 2248 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | ||
| 2249 | u8 *uuid) | ||
| 2250 | { | ||
| 2251 | struct list_head *head = &root->fs_info->fs_devices->devices; | ||
| 2252 | |||
| 2253 | return __find_device(head, devid, uuid); | ||
| 2254 | } | ||
| 2255 | |||
| 2256 | static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | ||
| 2257 | u64 devid, u8 *dev_uuid) | ||
| 2258 | { | ||
| 2259 | struct btrfs_device *device; | ||
| 2260 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
| 2261 | |||
| 2262 | device = kzalloc(sizeof(*device), GFP_NOFS); | ||
| 2263 | list_add(&device->dev_list, | ||
| 2264 | &fs_devices->devices); | ||
| 2265 | list_add(&device->dev_alloc_list, | ||
| 2266 | &fs_devices->alloc_list); | ||
| 2267 | device->barriers = 1; | ||
| 2268 | device->dev_root = root->fs_info->dev_root; | ||
| 2269 | device->devid = devid; | ||
| 2270 | device->work.func = pending_bios_fn; | ||
| 2271 | fs_devices->num_devices++; | ||
| 2272 | spin_lock_init(&device->io_lock); | ||
| 2273 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); | ||
| 2274 | return device; | ||
| 2275 | } | ||
| 2276 | |||
| 2277 | |||
| 2278 | static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | ||
| 2279 | struct extent_buffer *leaf, | ||
| 2280 | struct btrfs_chunk *chunk) | ||
| 2281 | { | ||
| 2282 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
| 2283 | struct map_lookup *map; | ||
| 2284 | struct extent_map *em; | ||
| 2285 | u64 logical; | ||
| 2286 | u64 length; | ||
| 2287 | u64 devid; | ||
| 2288 | u8 uuid[BTRFS_UUID_SIZE]; | ||
| 2289 | int num_stripes; | ||
| 2290 | int ret; | ||
| 2291 | int i; | ||
| 2292 | |||
| 2293 | logical = key->offset; | ||
| 2294 | length = btrfs_chunk_length(leaf, chunk); | ||
| 2295 | |||
| 2296 | spin_lock(&map_tree->map_tree.lock); | ||
| 2297 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); | ||
| 2298 | spin_unlock(&map_tree->map_tree.lock); | ||
| 2299 | |||
| 2300 | /* already mapped? */ | ||
| 2301 | if (em && em->start <= logical && em->start + em->len > logical) { | ||
| 2302 | free_extent_map(em); | ||
| 2303 | return 0; | ||
| 2304 | } else if (em) { | ||
| 2305 | free_extent_map(em); | ||
| 2306 | } | ||
| 2307 | |||
| 2308 | map = kzalloc(sizeof(*map), GFP_NOFS); | ||
| 2309 | if (!map) | ||
| 2310 | return -ENOMEM; | ||
| 2311 | |||
| 2312 | em = alloc_extent_map(GFP_NOFS); | ||
| 2313 | if (!em) | ||
| 2314 | return -ENOMEM; | ||
| 2315 | num_stripes = btrfs_chunk_num_stripes(leaf, chunk); | ||
| 2316 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
| 2317 | if (!map) { | ||
| 2318 | free_extent_map(em); | ||
| 2319 | return -ENOMEM; | ||
| 2320 | } | ||
| 2321 | |||
| 2322 | em->bdev = (struct block_device *)map; | ||
| 2323 | em->start = logical; | ||
| 2324 | em->len = length; | ||
| 2325 | em->block_start = 0; | ||
| 2326 | |||
| 2327 | map->num_stripes = num_stripes; | ||
| 2328 | map->io_width = btrfs_chunk_io_width(leaf, chunk); | ||
| 2329 | map->io_align = btrfs_chunk_io_align(leaf, chunk); | ||
| 2330 | map->sector_size = btrfs_chunk_sector_size(leaf, chunk); | ||
| 2331 | map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); | ||
| 2332 | map->type = btrfs_chunk_type(leaf, chunk); | ||
| 2333 | map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); | ||
| 2334 | for (i = 0; i < num_stripes; i++) { | ||
| 2335 | map->stripes[i].physical = | ||
| 2336 | btrfs_stripe_offset_nr(leaf, chunk, i); | ||
| 2337 | devid = btrfs_stripe_devid_nr(leaf, chunk, i); | ||
| 2338 | read_extent_buffer(leaf, uuid, (unsigned long) | ||
| 2339 | btrfs_stripe_dev_uuid_nr(chunk, i), | ||
| 2340 | BTRFS_UUID_SIZE); | ||
| 2341 | map->stripes[i].dev = btrfs_find_device(root, devid, uuid); | ||
| 2342 | |||
| 2343 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { | ||
| 2344 | kfree(map); | ||
| 2345 | free_extent_map(em); | ||
| 2346 | return -EIO; | ||
| 2347 | } | ||
| 2348 | if (!map->stripes[i].dev) { | ||
| 2349 | map->stripes[i].dev = | ||
| 2350 | add_missing_dev(root, devid, uuid); | ||
| 2351 | if (!map->stripes[i].dev) { | ||
| 2352 | kfree(map); | ||
| 2353 | free_extent_map(em); | ||
| 2354 | return -EIO; | ||
| 2355 | } | ||
| 2356 | } | ||
| 2357 | map->stripes[i].dev->in_fs_metadata = 1; | ||
| 2358 | } | ||
| 2359 | |||
| 2360 | spin_lock(&map_tree->map_tree.lock); | ||
| 2361 | ret = add_extent_mapping(&map_tree->map_tree, em); | ||
| 2362 | spin_unlock(&map_tree->map_tree.lock); | ||
| 2363 | BUG_ON(ret); | ||
| 2364 | free_extent_map(em); | ||
| 2365 | |||
| 2366 | return 0; | ||
| 2367 | } | ||
| 2368 | |||
| 2369 | static int fill_device_from_item(struct extent_buffer *leaf, | ||
| 2370 | struct btrfs_dev_item *dev_item, | ||
| 2371 | struct btrfs_device *device) | ||
| 2372 | { | ||
| 2373 | unsigned long ptr; | ||
| 2374 | |||
| 2375 | device->devid = btrfs_device_id(leaf, dev_item); | ||
| 2376 | device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); | ||
| 2377 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); | ||
| 2378 | device->type = btrfs_device_type(leaf, dev_item); | ||
| 2379 | device->io_align = btrfs_device_io_align(leaf, dev_item); | ||
| 2380 | device->io_width = btrfs_device_io_width(leaf, dev_item); | ||
| 2381 | device->sector_size = btrfs_device_sector_size(leaf, dev_item); | ||
| 2382 | |||
| 2383 | ptr = (unsigned long)btrfs_device_uuid(dev_item); | ||
| 2384 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); | ||
| 2385 | |||
| 2386 | return 0; | ||
| 2387 | } | ||
| 2388 | |||
| 2389 | static int read_one_dev(struct btrfs_root *root, | ||
| 2390 | struct extent_buffer *leaf, | ||
| 2391 | struct btrfs_dev_item *dev_item) | ||
| 2392 | { | ||
| 2393 | struct btrfs_device *device; | ||
| 2394 | u64 devid; | ||
| 2395 | int ret; | ||
| 2396 | u8 dev_uuid[BTRFS_UUID_SIZE]; | ||
| 2397 | |||
| 2398 | devid = btrfs_device_id(leaf, dev_item); | ||
| 2399 | read_extent_buffer(leaf, dev_uuid, | ||
| 2400 | (unsigned long)btrfs_device_uuid(dev_item), | ||
| 2401 | BTRFS_UUID_SIZE); | ||
| 2402 | device = btrfs_find_device(root, devid, dev_uuid); | ||
| 2403 | if (!device) { | ||
| 2404 | printk("warning devid %Lu missing\n", devid); | ||
| 2405 | device = add_missing_dev(root, devid, dev_uuid); | ||
| 2406 | if (!device) | ||
| 2407 | return -ENOMEM; | ||
| 2408 | } | ||
| 2409 | |||
| 2410 | fill_device_from_item(leaf, dev_item, device); | ||
| 2411 | device->dev_root = root->fs_info->dev_root; | ||
| 2412 | device->in_fs_metadata = 1; | ||
| 2413 | ret = 0; | ||
| 2414 | #if 0 | ||
| 2415 | ret = btrfs_open_device(device); | ||
| 2416 | if (ret) { | ||
| 2417 | kfree(device); | ||
| 2418 | } | ||
| 2419 | #endif | ||
| 2420 | return ret; | ||
| 2421 | } | ||
| 2422 | |||
| 2423 | int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) | ||
| 2424 | { | ||
| 2425 | struct btrfs_dev_item *dev_item; | ||
| 2426 | |||
| 2427 | dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, | ||
| 2428 | dev_item); | ||
| 2429 | return read_one_dev(root, buf, dev_item); | ||
| 2430 | } | ||
| 2431 | |||
| 2432 | int btrfs_read_sys_array(struct btrfs_root *root) | ||
| 2433 | { | ||
| 2434 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | ||
| 2435 | struct extent_buffer *sb; | ||
| 2436 | struct btrfs_disk_key *disk_key; | ||
| 2437 | struct btrfs_chunk *chunk; | ||
| 2438 | u8 *ptr; | ||
| 2439 | unsigned long sb_ptr; | ||
| 2440 | int ret = 0; | ||
| 2441 | u32 num_stripes; | ||
| 2442 | u32 array_size; | ||
| 2443 | u32 len = 0; | ||
| 2444 | u32 cur; | ||
| 2445 | struct btrfs_key key; | ||
| 2446 | |||
| 2447 | sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, | ||
| 2448 | BTRFS_SUPER_INFO_SIZE); | ||
| 2449 | if (!sb) | ||
| 2450 | return -ENOMEM; | ||
| 2451 | btrfs_set_buffer_uptodate(sb); | ||
| 2452 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); | ||
| 2453 | array_size = btrfs_super_sys_array_size(super_copy); | ||
| 2454 | |||
| 2455 | ptr = super_copy->sys_chunk_array; | ||
| 2456 | sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); | ||
| 2457 | cur = 0; | ||
| 2458 | |||
| 2459 | while (cur < array_size) { | ||
| 2460 | disk_key = (struct btrfs_disk_key *)ptr; | ||
| 2461 | btrfs_disk_key_to_cpu(&key, disk_key); | ||
| 2462 | |||
| 2463 | len = sizeof(*disk_key); ptr += len; | ||
| 2464 | sb_ptr += len; | ||
| 2465 | cur += len; | ||
| 2466 | |||
| 2467 | if (key.type == BTRFS_CHUNK_ITEM_KEY) { | ||
| 2468 | chunk = (struct btrfs_chunk *)sb_ptr; | ||
| 2469 | ret = read_one_chunk(root, &key, sb, chunk); | ||
| 2470 | if (ret) | ||
| 2471 | break; | ||
| 2472 | num_stripes = btrfs_chunk_num_stripes(sb, chunk); | ||
| 2473 | len = btrfs_chunk_item_size(num_stripes); | ||
| 2474 | } else { | ||
| 2475 | ret = -EIO; | ||
| 2476 | break; | ||
| 2477 | } | ||
| 2478 | ptr += len; | ||
| 2479 | sb_ptr += len; | ||
| 2480 | cur += len; | ||
| 2481 | } | ||
| 2482 | free_extent_buffer(sb); | ||
| 2483 | return ret; | ||
| 2484 | } | ||
| 2485 | |||
| 2486 | int btrfs_read_chunk_tree(struct btrfs_root *root) | ||
| 2487 | { | ||
| 2488 | struct btrfs_path *path; | ||
| 2489 | struct extent_buffer *leaf; | ||
| 2490 | struct btrfs_key key; | ||
| 2491 | struct btrfs_key found_key; | ||
| 2492 | int ret; | ||
| 2493 | int slot; | ||
| 2494 | |||
| 2495 | root = root->fs_info->chunk_root; | ||
| 2496 | |||
| 2497 | path = btrfs_alloc_path(); | ||
| 2498 | if (!path) | ||
| 2499 | return -ENOMEM; | ||
| 2500 | |||
| 2501 | /* first we search for all of the device items, and then we | ||
| 2502 | * read in all of the chunk items. This way we can create chunk | ||
| 2503 | * mappings that reference all of the devices that are afound | ||
| 2504 | */ | ||
| 2505 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | ||
| 2506 | key.offset = 0; | ||
| 2507 | key.type = 0; | ||
| 2508 | again: | ||
| 2509 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 2510 | while(1) { | ||
| 2511 | leaf = path->nodes[0]; | ||
| 2512 | slot = path->slots[0]; | ||
| 2513 | if (slot >= btrfs_header_nritems(leaf)) { | ||
| 2514 | ret = btrfs_next_leaf(root, path); | ||
| 2515 | if (ret == 0) | ||
| 2516 | continue; | ||
| 2517 | if (ret < 0) | ||
| 2518 | goto error; | ||
| 2519 | break; | ||
| 2520 | } | ||
| 2521 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 2522 | if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { | ||
| 2523 | if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) | ||
| 2524 | break; | ||
| 2525 | if (found_key.type == BTRFS_DEV_ITEM_KEY) { | ||
| 2526 | struct btrfs_dev_item *dev_item; | ||
| 2527 | dev_item = btrfs_item_ptr(leaf, slot, | ||
| 2528 | struct btrfs_dev_item); | ||
| 2529 | ret = read_one_dev(root, leaf, dev_item); | ||
| 2530 | BUG_ON(ret); | ||
| 2531 | } | ||
| 2532 | } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { | ||
| 2533 | struct btrfs_chunk *chunk; | ||
| 2534 | chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); | ||
| 2535 | ret = read_one_chunk(root, &found_key, leaf, chunk); | ||
| 2536 | } | ||
| 2537 | path->slots[0]++; | ||
| 2538 | } | ||
| 2539 | if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { | ||
| 2540 | key.objectid = 0; | ||
| 2541 | btrfs_release_path(root, path); | ||
| 2542 | goto again; | ||
| 2543 | } | ||
| 2544 | |||
| 2545 | btrfs_free_path(path); | ||
| 2546 | ret = 0; | ||
| 2547 | error: | ||
| 2548 | return ret; | ||
| 2549 | } | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h new file mode 100644 index 00000000000..c50e50580b5 --- /dev/null +++ b/fs/btrfs/volumes.h | |||
| @@ -0,0 +1,150 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_VOLUMES_ | ||
| 20 | #define __BTRFS_VOLUMES_ | ||
| 21 | |||
| 22 | #include <linux/bio.h> | ||
| 23 | #include "async-thread.h" | ||
| 24 | |||
| 25 | struct buffer_head; | ||
| 26 | struct btrfs_device { | ||
| 27 | struct list_head dev_list; | ||
| 28 | struct list_head dev_alloc_list; | ||
| 29 | struct btrfs_root *dev_root; | ||
| 30 | struct buffer_head *pending_io; | ||
| 31 | struct bio *pending_bios; | ||
| 32 | struct bio *pending_bio_tail; | ||
| 33 | int running_pending; | ||
| 34 | u64 generation; | ||
| 35 | |||
| 36 | int barriers; | ||
| 37 | int in_fs_metadata; | ||
| 38 | |||
| 39 | spinlock_t io_lock; | ||
| 40 | |||
| 41 | struct block_device *bdev; | ||
| 42 | |||
| 43 | char *name; | ||
| 44 | |||
| 45 | /* the internal btrfs device id */ | ||
| 46 | u64 devid; | ||
| 47 | |||
| 48 | /* size of the device */ | ||
| 49 | u64 total_bytes; | ||
| 50 | |||
| 51 | /* bytes used */ | ||
| 52 | u64 bytes_used; | ||
| 53 | |||
| 54 | /* optimal io alignment for this device */ | ||
| 55 | u32 io_align; | ||
| 56 | |||
| 57 | /* optimal io width for this device */ | ||
| 58 | u32 io_width; | ||
| 59 | |||
| 60 | /* minimal io size for this device */ | ||
| 61 | u32 sector_size; | ||
| 62 | |||
| 63 | /* type and info about this device */ | ||
| 64 | u64 type; | ||
| 65 | |||
| 66 | /* physical drive uuid (or lvm uuid) */ | ||
| 67 | u8 uuid[BTRFS_UUID_SIZE]; | ||
| 68 | |||
| 69 | struct btrfs_work work; | ||
| 70 | }; | ||
| 71 | |||
| 72 | struct btrfs_fs_devices { | ||
| 73 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ | ||
| 74 | |||
| 75 | /* the device with this id has the most recent coyp of the super */ | ||
| 76 | u64 latest_devid; | ||
| 77 | u64 latest_trans; | ||
| 78 | u64 num_devices; | ||
| 79 | u64 open_devices; | ||
| 80 | struct block_device *latest_bdev; | ||
| 81 | /* all of the devices in the FS */ | ||
| 82 | struct list_head devices; | ||
| 83 | |||
| 84 | /* devices not currently being allocated */ | ||
| 85 | struct list_head alloc_list; | ||
| 86 | struct list_head list; | ||
| 87 | int mounted; | ||
| 88 | }; | ||
| 89 | |||
| 90 | struct btrfs_bio_stripe { | ||
| 91 | struct btrfs_device *dev; | ||
| 92 | u64 physical; | ||
| 93 | }; | ||
| 94 | |||
| 95 | struct btrfs_multi_bio { | ||
| 96 | atomic_t stripes_pending; | ||
| 97 | bio_end_io_t *end_io; | ||
| 98 | struct bio *orig_bio; | ||
| 99 | void *private; | ||
| 100 | atomic_t error; | ||
| 101 | int max_errors; | ||
| 102 | int num_stripes; | ||
| 103 | struct btrfs_bio_stripe stripes[]; | ||
| 104 | }; | ||
| 105 | |||
| 106 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | ||
| 107 | (sizeof(struct btrfs_bio_stripe) * (n))) | ||
| 108 | |||
| 109 | int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | ||
| 110 | struct btrfs_device *device, | ||
| 111 | u64 chunk_tree, u64 chunk_objectid, | ||
| 112 | u64 chunk_offset, | ||
| 113 | u64 num_bytes, u64 *start); | ||
| 114 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | ||
| 115 | u64 logical, u64 *length, | ||
| 116 | struct btrfs_multi_bio **multi_ret, int mirror_num); | ||
| 117 | int btrfs_read_sys_array(struct btrfs_root *root); | ||
| 118 | int btrfs_read_chunk_tree(struct btrfs_root *root); | ||
| 119 | int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | ||
| 120 | struct btrfs_root *extent_root, u64 *start, | ||
| 121 | u64 *num_bytes, u64 type); | ||
| 122 | void btrfs_mapping_init(struct btrfs_mapping_tree *tree); | ||
| 123 | void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); | ||
| 124 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | ||
| 125 | int mirror_num, int async_submit); | ||
| 126 | int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); | ||
| 127 | int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | ||
| 128 | int flags, void *holder); | ||
| 129 | int btrfs_scan_one_device(const char *path, int flags, void *holder, | ||
| 130 | struct btrfs_fs_devices **fs_devices_ret); | ||
| 131 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); | ||
| 132 | int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); | ||
| 133 | int btrfs_add_device(struct btrfs_trans_handle *trans, | ||
| 134 | struct btrfs_root *root, | ||
| 135 | struct btrfs_device *device); | ||
| 136 | int btrfs_rm_device(struct btrfs_root *root, char *device_path); | ||
| 137 | int btrfs_cleanup_fs_uuids(void); | ||
| 138 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); | ||
| 139 | int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, | ||
| 140 | u64 logical, struct page *page); | ||
| 141 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | ||
| 142 | struct btrfs_device *device, u64 new_size); | ||
| 143 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | ||
| 144 | u8 *uuid); | ||
| 145 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); | ||
| 146 | int btrfs_init_new_device(struct btrfs_root *root, char *path); | ||
| 147 | int btrfs_balance(struct btrfs_root *dev_root); | ||
| 148 | void btrfs_unlock_volumes(void); | ||
| 149 | void btrfs_lock_volumes(void); | ||
| 150 | #endif | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c new file mode 100644 index 00000000000..adb4b32a9d5 --- /dev/null +++ b/fs/btrfs/xattr.c | |||
| @@ -0,0 +1,321 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Red Hat. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/init.h> | ||
| 20 | #include <linux/fs.h> | ||
| 21 | #include <linux/slab.h> | ||
| 22 | #include <linux/rwsem.h> | ||
| 23 | #include <linux/xattr.h> | ||
| 24 | #include "ctree.h" | ||
| 25 | #include "btrfs_inode.h" | ||
| 26 | #include "transaction.h" | ||
| 27 | #include "xattr.h" | ||
| 28 | #include "disk-io.h" | ||
| 29 | |||
| 30 | |||
| 31 | ssize_t __btrfs_getxattr(struct inode *inode, const char *name, | ||
| 32 | void *buffer, size_t size) | ||
| 33 | { | ||
| 34 | struct btrfs_dir_item *di; | ||
| 35 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 36 | struct btrfs_path *path; | ||
| 37 | struct extent_buffer *leaf; | ||
| 38 | int ret = 0; | ||
| 39 | unsigned long data_ptr; | ||
| 40 | |||
| 41 | path = btrfs_alloc_path(); | ||
| 42 | if (!path) | ||
| 43 | return -ENOMEM; | ||
| 44 | |||
| 45 | /* lookup the xattr by name */ | ||
| 46 | di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, | ||
| 47 | strlen(name), 0); | ||
| 48 | if (!di || IS_ERR(di)) { | ||
| 49 | ret = -ENODATA; | ||
| 50 | goto out; | ||
| 51 | } | ||
| 52 | |||
| 53 | leaf = path->nodes[0]; | ||
| 54 | /* if size is 0, that means we want the size of the attr */ | ||
| 55 | if (!size) { | ||
| 56 | ret = btrfs_dir_data_len(leaf, di); | ||
| 57 | goto out; | ||
| 58 | } | ||
| 59 | |||
| 60 | /* now get the data out of our dir_item */ | ||
| 61 | if (btrfs_dir_data_len(leaf, di) > size) { | ||
| 62 | ret = -ERANGE; | ||
| 63 | goto out; | ||
| 64 | } | ||
| 65 | data_ptr = (unsigned long)((char *)(di + 1) + | ||
| 66 | btrfs_dir_name_len(leaf, di)); | ||
| 67 | read_extent_buffer(leaf, buffer, data_ptr, | ||
| 68 | btrfs_dir_data_len(leaf, di)); | ||
| 69 | ret = btrfs_dir_data_len(leaf, di); | ||
| 70 | |||
| 71 | out: | ||
| 72 | btrfs_free_path(path); | ||
| 73 | return ret; | ||
| 74 | } | ||
| 75 | |||
| 76 | int __btrfs_setxattr(struct inode *inode, const char *name, | ||
| 77 | const void *value, size_t size, int flags) | ||
| 78 | { | ||
| 79 | struct btrfs_dir_item *di; | ||
| 80 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 81 | struct btrfs_trans_handle *trans; | ||
| 82 | struct btrfs_path *path; | ||
| 83 | int ret = 0, mod = 0; | ||
| 84 | |||
| 85 | path = btrfs_alloc_path(); | ||
| 86 | if (!path) | ||
| 87 | return -ENOMEM; | ||
| 88 | |||
| 89 | trans = btrfs_start_transaction(root, 1); | ||
| 90 | btrfs_set_trans_block_group(trans, inode); | ||
| 91 | |||
| 92 | /* first lets see if we already have this xattr */ | ||
| 93 | di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, | ||
| 94 | strlen(name), -1); | ||
| 95 | if (IS_ERR(di)) { | ||
| 96 | ret = PTR_ERR(di); | ||
| 97 | goto out; | ||
| 98 | } | ||
| 99 | |||
| 100 | /* ok we already have this xattr, lets remove it */ | ||
| 101 | if (di) { | ||
| 102 | /* if we want create only exit */ | ||
| 103 | if (flags & XATTR_CREATE) { | ||
| 104 | ret = -EEXIST; | ||
| 105 | goto out; | ||
| 106 | } | ||
| 107 | |||
| 108 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
| 109 | if (ret) | ||
| 110 | goto out; | ||
| 111 | btrfs_release_path(root, path); | ||
| 112 | |||
| 113 | /* if we don't have a value then we are removing the xattr */ | ||
| 114 | if (!value) { | ||
| 115 | mod = 1; | ||
| 116 | goto out; | ||
| 117 | } | ||
| 118 | } else { | ||
| 119 | btrfs_release_path(root, path); | ||
| 120 | |||
| 121 | if (flags & XATTR_REPLACE) { | ||
| 122 | /* we couldn't find the attr to replace */ | ||
| 123 | ret = -ENODATA; | ||
| 124 | goto out; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | /* ok we have to create a completely new xattr */ | ||
| 129 | ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), | ||
| 130 | value, size, inode->i_ino); | ||
| 131 | if (ret) | ||
| 132 | goto out; | ||
| 133 | mod = 1; | ||
| 134 | |||
| 135 | out: | ||
| 136 | if (mod) { | ||
| 137 | inode->i_ctime = CURRENT_TIME; | ||
| 138 | ret = btrfs_update_inode(trans, root, inode); | ||
| 139 | } | ||
| 140 | |||
| 141 | btrfs_end_transaction(trans, root); | ||
| 142 | btrfs_free_path(path); | ||
| 143 | return ret; | ||
| 144 | } | ||
| 145 | |||
| 146 | ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | ||
| 147 | { | ||
| 148 | struct btrfs_key key, found_key; | ||
| 149 | struct inode *inode = dentry->d_inode; | ||
| 150 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 151 | struct btrfs_path *path; | ||
| 152 | struct btrfs_item *item; | ||
| 153 | struct extent_buffer *leaf; | ||
| 154 | struct btrfs_dir_item *di; | ||
| 155 | int ret = 0, slot, advance; | ||
| 156 | size_t total_size = 0, size_left = size; | ||
| 157 | unsigned long name_ptr; | ||
| 158 | size_t name_len; | ||
| 159 | u32 nritems; | ||
| 160 | |||
| 161 | /* | ||
| 162 | * ok we want all objects associated with this id. | ||
| 163 | * NOTE: we set key.offset = 0; because we want to start with the | ||
| 164 | * first xattr that we find and walk forward | ||
| 165 | */ | ||
| 166 | key.objectid = inode->i_ino; | ||
| 167 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | ||
| 168 | key.offset = 0; | ||
| 169 | |||
| 170 | path = btrfs_alloc_path(); | ||
| 171 | if (!path) | ||
| 172 | return -ENOMEM; | ||
| 173 | path->reada = 2; | ||
| 174 | |||
| 175 | /* search for our xattrs */ | ||
| 176 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 177 | if (ret < 0) | ||
| 178 | goto err; | ||
| 179 | ret = 0; | ||
| 180 | advance = 0; | ||
| 181 | while (1) { | ||
| 182 | leaf = path->nodes[0]; | ||
| 183 | nritems = btrfs_header_nritems(leaf); | ||
| 184 | slot = path->slots[0]; | ||
| 185 | |||
| 186 | /* this is where we start walking through the path */ | ||
| 187 | if (advance || slot >= nritems) { | ||
| 188 | /* | ||
| 189 | * if we've reached the last slot in this leaf we need | ||
| 190 | * to go to the next leaf and reset everything | ||
| 191 | */ | ||
| 192 | if (slot >= nritems-1) { | ||
| 193 | ret = btrfs_next_leaf(root, path); | ||
| 194 | if (ret) | ||
| 195 | break; | ||
| 196 | leaf = path->nodes[0]; | ||
| 197 | nritems = btrfs_header_nritems(leaf); | ||
| 198 | slot = path->slots[0]; | ||
| 199 | } else { | ||
| 200 | /* | ||
| 201 | * just walking through the slots on this leaf | ||
| 202 | */ | ||
| 203 | slot++; | ||
| 204 | path->slots[0]++; | ||
| 205 | } | ||
| 206 | } | ||
| 207 | advance = 1; | ||
| 208 | |||
| 209 | item = btrfs_item_nr(leaf, slot); | ||
| 210 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 211 | |||
| 212 | /* check to make sure this item is what we want */ | ||
| 213 | if (found_key.objectid != key.objectid) | ||
| 214 | break; | ||
| 215 | if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) | ||
| 216 | break; | ||
| 217 | |||
| 218 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | ||
| 219 | |||
| 220 | name_len = btrfs_dir_name_len(leaf, di); | ||
| 221 | total_size += name_len + 1; | ||
| 222 | |||
| 223 | /* we are just looking for how big our buffer needs to be */ | ||
| 224 | if (!size) | ||
| 225 | continue; | ||
| 226 | |||
| 227 | if (!buffer || (name_len + 1) > size_left) { | ||
| 228 | ret = -ERANGE; | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | |||
| 232 | name_ptr = (unsigned long)(di + 1); | ||
| 233 | read_extent_buffer(leaf, buffer, name_ptr, name_len); | ||
| 234 | buffer[name_len] = '\0'; | ||
| 235 | |||
| 236 | size_left -= name_len + 1; | ||
| 237 | buffer += name_len + 1; | ||
| 238 | } | ||
| 239 | ret = total_size; | ||
| 240 | |||
| 241 | err: | ||
| 242 | btrfs_free_path(path); | ||
| 243 | |||
| 244 | return ret; | ||
| 245 | } | ||
| 246 | |||
| 247 | /* | ||
| 248 | * List of handlers for synthetic system.* attributes. All real ondisk | ||
| 249 | * attributes are handled directly. | ||
| 250 | */ | ||
| 251 | struct xattr_handler *btrfs_xattr_handlers[] = { | ||
| 252 | #ifdef CONFIG_FS_POSIX_ACL | ||
| 253 | &btrfs_xattr_acl_access_handler, | ||
| 254 | &btrfs_xattr_acl_default_handler, | ||
| 255 | #endif | ||
| 256 | NULL, | ||
| 257 | }; | ||
| 258 | |||
| 259 | /* | ||
| 260 | * Check if the attribute is in a supported namespace. | ||
| 261 | * | ||
| 262 | * This applied after the check for the synthetic attributes in the system | ||
| 263 | * namespace. | ||
| 264 | */ | ||
| 265 | static bool btrfs_is_valid_xattr(const char *name) | ||
| 266 | { | ||
| 267 | return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || | ||
| 268 | !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || | ||
| 269 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | ||
| 270 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); | ||
| 271 | } | ||
| 272 | |||
| 273 | ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | ||
| 274 | void *buffer, size_t size) | ||
| 275 | { | ||
| 276 | /* | ||
| 277 | * If this is a request for a synthetic attribute in the system.* | ||
| 278 | * namespace use the generic infrastructure to resolve a handler | ||
| 279 | * for it via sb->s_xattr. | ||
| 280 | */ | ||
| 281 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | ||
| 282 | return generic_getxattr(dentry, name, buffer, size); | ||
| 283 | |||
| 284 | if (!btrfs_is_valid_xattr(name)) | ||
| 285 | return -EOPNOTSUPP; | ||
| 286 | return __btrfs_getxattr(dentry->d_inode, name, buffer, size); | ||
| 287 | } | ||
| 288 | |||
| 289 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
| 290 | size_t size, int flags) | ||
| 291 | { | ||
| 292 | /* | ||
| 293 | * If this is a request for a synthetic attribute in the system.* | ||
| 294 | * namespace use the generic infrastructure to resolve a handler | ||
| 295 | * for it via sb->s_xattr. | ||
| 296 | */ | ||
| 297 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | ||
| 298 | return generic_setxattr(dentry, name, value, size, flags); | ||
| 299 | |||
| 300 | if (!btrfs_is_valid_xattr(name)) | ||
| 301 | return -EOPNOTSUPP; | ||
| 302 | |||
| 303 | if (size == 0) | ||
| 304 | value = ""; /* empty EA, do not remove */ | ||
| 305 | return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); | ||
| 306 | } | ||
| 307 | |||
| 308 | int btrfs_removexattr(struct dentry *dentry, const char *name) | ||
| 309 | { | ||
| 310 | /* | ||
| 311 | * If this is a request for a synthetic attribute in the system.* | ||
| 312 | * namespace use the generic infrastructure to resolve a handler | ||
| 313 | * for it via sb->s_xattr. | ||
| 314 | */ | ||
| 315 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | ||
| 316 | return generic_removexattr(dentry, name); | ||
| 317 | |||
| 318 | if (!btrfs_is_valid_xattr(name)) | ||
| 319 | return -EOPNOTSUPP; | ||
| 320 | return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); | ||
| 321 | } | ||
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h new file mode 100644 index 00000000000..5b1d08f8e68 --- /dev/null +++ b/fs/btrfs/xattr.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Red Hat. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __XATTR__ | ||
| 20 | #define __XATTR__ | ||
| 21 | |||
| 22 | #include <linux/xattr.h> | ||
| 23 | |||
| 24 | extern struct xattr_handler btrfs_xattr_acl_access_handler; | ||
| 25 | extern struct xattr_handler btrfs_xattr_acl_default_handler; | ||
| 26 | extern struct xattr_handler *btrfs_xattr_handlers[]; | ||
| 27 | |||
| 28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, | ||
| 29 | void *buffer, size_t size); | ||
| 30 | extern int __btrfs_setxattr(struct inode *inode, const char *name, | ||
| 31 | const void *value, size_t size, int flags); | ||
| 32 | |||
| 33 | extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | ||
| 34 | void *buffer, size_t size); | ||
| 35 | extern int btrfs_setxattr(struct dentry *dentry, const char *name, | ||
| 36 | const void *value, size_t size, int flags); | ||
| 37 | extern int btrfs_removexattr(struct dentry *dentry, const char *name); | ||
| 38 | |||
| 39 | #endif /* __XATTR__ */ | ||
