diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 218 |
1 files changed, 58 insertions, 160 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 4cd46abe8434..6fe21d2b8847 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -1,46 +1,20 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | ||
3 | * | 4 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 5 | * This program is free software; you can redistribute it and/or |
5 | * under the terms of version 2 of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
6 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
7 | * | 8 | * |
8 | * This program is distributed in the hope that it would be useful, but | 9 | * This program is distributed in the hope that it would be useful, |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * | 12 | * GNU General Public License for more details. |
12 | * Further, this software is distributed without any warranty that it is | ||
13 | * free of the rightful claim of any third person regarding infringement | ||
14 | * or the like. Any license provided herein, whether implied or | ||
15 | * otherwise, applies only to this software file. Patent licenses, if | ||
16 | * any, provided herein do not apply to combinations of this program with | ||
17 | * other software, or any other product whatsoever. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
22 | * | ||
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | ||
24 | * Mountain View, CA 94043, or: | ||
25 | * | ||
26 | * http://www.sgi.com | ||
27 | * | ||
28 | * For further information regarding this notice, see: | ||
29 | * | ||
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * The xfs_buf.c code provides an abstract buffer cache model on top | ||
35 | * of the Linux page cache. Cached metadata blocks for a file system | ||
36 | * are hashed to the inode for the block device. xfs_buf.c assembles | ||
37 | * buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O. | ||
38 | * | ||
39 | * Written by Steve Lord, Jim Mostek, Russell Cattelan | ||
40 | * and Rajagopal Ananthanarayanan ("ananth") at SGI. | ||
41 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
42 | */ | 17 | */ |
43 | |||
44 | #include <linux/stddef.h> | 18 | #include <linux/stddef.h> |
45 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
46 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
@@ -55,13 +29,8 @@ | |||
55 | #include <linux/blkdev.h> | 29 | #include <linux/blkdev.h> |
56 | #include <linux/hash.h> | 30 | #include <linux/hash.h> |
57 | #include <linux/kthread.h> | 31 | #include <linux/kthread.h> |
58 | |||
59 | #include "xfs_linux.h" | 32 | #include "xfs_linux.h" |
60 | 33 | ||
61 | /* | ||
62 | * File wide globals | ||
63 | */ | ||
64 | |||
65 | STATIC kmem_cache_t *pagebuf_zone; | 34 | STATIC kmem_cache_t *pagebuf_zone; |
66 | STATIC kmem_shaker_t pagebuf_shake; | 35 | STATIC kmem_shaker_t pagebuf_shake; |
67 | STATIC int xfsbufd_wakeup(int, gfp_t); | 36 | STATIC int xfsbufd_wakeup(int, gfp_t); |
@@ -70,10 +39,6 @@ STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); | |||
70 | STATIC struct workqueue_struct *xfslogd_workqueue; | 39 | STATIC struct workqueue_struct *xfslogd_workqueue; |
71 | struct workqueue_struct *xfsdatad_workqueue; | 40 | struct workqueue_struct *xfsdatad_workqueue; |
72 | 41 | ||
73 | /* | ||
74 | * Pagebuf debugging | ||
75 | */ | ||
76 | |||
77 | #ifdef PAGEBUF_TRACE | 42 | #ifdef PAGEBUF_TRACE |
78 | void | 43 | void |
79 | pagebuf_trace( | 44 | pagebuf_trace( |
@@ -112,10 +77,6 @@ ktrace_t *pagebuf_trace_buf; | |||
112 | # define PB_GET_OWNER(pb) do { } while (0) | 77 | # define PB_GET_OWNER(pb) do { } while (0) |
113 | #endif | 78 | #endif |
114 | 79 | ||
115 | /* | ||
116 | * Pagebuf allocation / freeing. | ||
117 | */ | ||
118 | |||
119 | #define pb_to_gfp(flags) \ | 80 | #define pb_to_gfp(flags) \ |
120 | ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ | 81 | ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ |
121 | ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) | 82 | ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) |
@@ -123,7 +84,6 @@ ktrace_t *pagebuf_trace_buf; | |||
123 | #define pb_to_km(flags) \ | 84 | #define pb_to_km(flags) \ |
124 | (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) | 85 | (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) |
125 | 86 | ||
126 | |||
127 | #define pagebuf_allocate(flags) \ | 87 | #define pagebuf_allocate(flags) \ |
128 | kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) | 88 | kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) |
129 | #define pagebuf_deallocate(pb) \ | 89 | #define pagebuf_deallocate(pb) \ |
@@ -286,7 +246,7 @@ _pagebuf_initialize( | |||
286 | * most cases but may be reset (e.g. XFS recovery). | 246 | * most cases but may be reset (e.g. XFS recovery). |
287 | */ | 247 | */ |
288 | pb->pb_buffer_length = pb->pb_count_desired = range_length; | 248 | pb->pb_buffer_length = pb->pb_count_desired = range_length; |
289 | pb->pb_flags = flags | PBF_NONE; | 249 | pb->pb_flags = flags; |
290 | pb->pb_bn = XFS_BUF_DADDR_NULL; | 250 | pb->pb_bn = XFS_BUF_DADDR_NULL; |
291 | atomic_set(&pb->pb_pin_count, 0); | 251 | atomic_set(&pb->pb_pin_count, 0); |
292 | init_waitqueue_head(&pb->pb_waiters); | 252 | init_waitqueue_head(&pb->pb_waiters); |
@@ -458,14 +418,8 @@ _pagebuf_lookup_pages( | |||
458 | unlock_page(bp->pb_pages[i]); | 418 | unlock_page(bp->pb_pages[i]); |
459 | } | 419 | } |
460 | 420 | ||
461 | if (page_count) { | 421 | if (page_count == bp->pb_page_count) |
462 | /* if we have any uptodate pages, mark that in the buffer */ | 422 | bp->pb_flags |= PBF_DONE; |
463 | bp->pb_flags &= ~PBF_NONE; | ||
464 | |||
465 | /* if some pages aren't uptodate, mark that in the buffer */ | ||
466 | if (page_count != bp->pb_page_count) | ||
467 | bp->pb_flags |= PBF_PARTIAL; | ||
468 | } | ||
469 | 423 | ||
470 | PB_TRACE(bp, "lookup_pages", (long)page_count); | 424 | PB_TRACE(bp, "lookup_pages", (long)page_count); |
471 | return error; | 425 | return error; |
@@ -676,7 +630,7 @@ xfs_buf_read_flags( | |||
676 | 630 | ||
677 | pb = xfs_buf_get_flags(target, ioff, isize, flags); | 631 | pb = xfs_buf_get_flags(target, ioff, isize, flags); |
678 | if (pb) { | 632 | if (pb) { |
679 | if (PBF_NOT_DONE(pb)) { | 633 | if (!XFS_BUF_ISDONE(pb)) { |
680 | PB_TRACE(pb, "read", (unsigned long)flags); | 634 | PB_TRACE(pb, "read", (unsigned long)flags); |
681 | XFS_STATS_INC(pb_get_read); | 635 | XFS_STATS_INC(pb_get_read); |
682 | pagebuf_iostart(pb, flags); | 636 | pagebuf_iostart(pb, flags); |
@@ -813,7 +767,7 @@ pagebuf_get_no_daddr( | |||
813 | bp = pagebuf_allocate(0); | 767 | bp = pagebuf_allocate(0); |
814 | if (unlikely(bp == NULL)) | 768 | if (unlikely(bp == NULL)) |
815 | goto fail; | 769 | goto fail; |
816 | _pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO); | 770 | _pagebuf_initialize(bp, target, 0, len, 0); |
817 | 771 | ||
818 | try_again: | 772 | try_again: |
819 | data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); | 773 | data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); |
@@ -876,39 +830,18 @@ pagebuf_rele( | |||
876 | 830 | ||
877 | PB_TRACE(pb, "rele", pb->pb_relse); | 831 | PB_TRACE(pb, "rele", pb->pb_relse); |
878 | 832 | ||
879 | /* | ||
880 | * pagebuf_lookup buffers are not hashed, not delayed write, | ||
881 | * and don't have their own release routines. Special case. | ||
882 | */ | ||
883 | if (unlikely(!hash)) { | ||
884 | ASSERT(!pb->pb_relse); | ||
885 | if (atomic_dec_and_test(&pb->pb_hold)) | ||
886 | xfs_buf_free(pb); | ||
887 | return; | ||
888 | } | ||
889 | |||
890 | if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { | 833 | if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { |
891 | int do_free = 1; | ||
892 | |||
893 | if (pb->pb_relse) { | 834 | if (pb->pb_relse) { |
894 | atomic_inc(&pb->pb_hold); | 835 | atomic_inc(&pb->pb_hold); |
895 | spin_unlock(&hash->bh_lock); | 836 | spin_unlock(&hash->bh_lock); |
896 | (*(pb->pb_relse)) (pb); | 837 | (*(pb->pb_relse)) (pb); |
897 | spin_lock(&hash->bh_lock); | 838 | } else if (pb->pb_flags & PBF_FS_MANAGED) { |
898 | do_free = 0; | ||
899 | } | ||
900 | |||
901 | if (pb->pb_flags & PBF_FS_MANAGED) { | ||
902 | do_free = 0; | ||
903 | } | ||
904 | |||
905 | if (do_free) { | ||
906 | ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0); | ||
907 | list_del_init(&pb->pb_hash_list); | ||
908 | spin_unlock(&hash->bh_lock); | 839 | spin_unlock(&hash->bh_lock); |
909 | pagebuf_free(pb); | ||
910 | } else { | 840 | } else { |
841 | ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q))); | ||
842 | list_del_init(&pb->pb_hash_list); | ||
911 | spin_unlock(&hash->bh_lock); | 843 | spin_unlock(&hash->bh_lock); |
844 | pagebuf_free(pb); | ||
912 | } | 845 | } |
913 | } else { | 846 | } else { |
914 | /* | 847 | /* |
@@ -1121,21 +1054,18 @@ pagebuf_iodone_work( | |||
1121 | void | 1054 | void |
1122 | pagebuf_iodone( | 1055 | pagebuf_iodone( |
1123 | xfs_buf_t *pb, | 1056 | xfs_buf_t *pb, |
1124 | int dataio, | ||
1125 | int schedule) | 1057 | int schedule) |
1126 | { | 1058 | { |
1127 | pb->pb_flags &= ~(PBF_READ | PBF_WRITE); | 1059 | pb->pb_flags &= ~(PBF_READ | PBF_WRITE); |
1128 | if (pb->pb_error == 0) { | 1060 | if (pb->pb_error == 0) |
1129 | pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE); | 1061 | pb->pb_flags |= PBF_DONE; |
1130 | } | ||
1131 | 1062 | ||
1132 | PB_TRACE(pb, "iodone", pb->pb_iodone); | 1063 | PB_TRACE(pb, "iodone", pb->pb_iodone); |
1133 | 1064 | ||
1134 | if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { | 1065 | if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { |
1135 | if (schedule) { | 1066 | if (schedule) { |
1136 | INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); | 1067 | INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); |
1137 | queue_work(dataio ? xfsdatad_workqueue : | 1068 | queue_work(xfslogd_workqueue, &pb->pb_iodone_work); |
1138 | xfslogd_workqueue, &pb->pb_iodone_work); | ||
1139 | } else { | 1069 | } else { |
1140 | pagebuf_iodone_work(pb); | 1070 | pagebuf_iodone_work(pb); |
1141 | } | 1071 | } |
@@ -1235,7 +1165,7 @@ _pagebuf_iodone( | |||
1235 | { | 1165 | { |
1236 | if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { | 1166 | if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { |
1237 | pb->pb_locked = 0; | 1167 | pb->pb_locked = 0; |
1238 | pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule); | 1168 | pagebuf_iodone(pb, schedule); |
1239 | } | 1169 | } |
1240 | } | 1170 | } |
1241 | 1171 | ||
@@ -1304,6 +1234,11 @@ _pagebuf_ioapply( | |||
1304 | rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; | 1234 | rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; |
1305 | } | 1235 | } |
1306 | 1236 | ||
1237 | if (pb->pb_flags & PBF_ORDERED) { | ||
1238 | ASSERT(!(pb->pb_flags & PBF_READ)); | ||
1239 | rw = WRITE_BARRIER; | ||
1240 | } | ||
1241 | |||
1307 | /* Special code path for reading a sub page size pagebuf in -- | 1242 | /* Special code path for reading a sub page size pagebuf in -- |
1308 | * we populate up the whole page, and hence the other metadata | 1243 | * we populate up the whole page, and hence the other metadata |
1309 | * in the same page. This optimization is only valid when the | 1244 | * in the same page. This optimization is only valid when the |
@@ -1750,8 +1685,8 @@ STATIC int xfsbufd_force_sleep; | |||
1750 | 1685 | ||
1751 | STATIC int | 1686 | STATIC int |
1752 | xfsbufd_wakeup( | 1687 | xfsbufd_wakeup( |
1753 | int priority, | 1688 | int priority, |
1754 | gfp_t mask) | 1689 | gfp_t mask) |
1755 | { | 1690 | { |
1756 | if (xfsbufd_force_sleep) | 1691 | if (xfsbufd_force_sleep) |
1757 | return 0; | 1692 | return 0; |
@@ -1781,8 +1716,8 @@ xfsbufd( | |||
1781 | xfsbufd_force_sleep = 0; | 1716 | xfsbufd_force_sleep = 0; |
1782 | } | 1717 | } |
1783 | 1718 | ||
1784 | schedule_timeout_interruptible | 1719 | schedule_timeout_interruptible( |
1785 | (xfs_buf_timer_centisecs * msecs_to_jiffies(10)); | 1720 | xfs_buf_timer_centisecs * msecs_to_jiffies(10)); |
1786 | 1721 | ||
1787 | age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1722 | age = xfs_buf_age_centisecs * msecs_to_jiffies(10); |
1788 | spin_lock(&pbd_delwrite_lock); | 1723 | spin_lock(&pbd_delwrite_lock); |
@@ -1891,14 +1826,22 @@ xfs_flush_buftarg( | |||
1891 | return pincount; | 1826 | return pincount; |
1892 | } | 1827 | } |
1893 | 1828 | ||
1894 | STATIC int | 1829 | int __init |
1895 | xfs_buf_daemons_start(void) | 1830 | pagebuf_init(void) |
1896 | { | 1831 | { |
1897 | int error = -ENOMEM; | 1832 | int error = -ENOMEM; |
1898 | 1833 | ||
1834 | #ifdef PAGEBUF_TRACE | ||
1835 | pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); | ||
1836 | #endif | ||
1837 | |||
1838 | pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); | ||
1839 | if (!pagebuf_zone) | ||
1840 | goto out_free_trace_buf; | ||
1841 | |||
1899 | xfslogd_workqueue = create_workqueue("xfslogd"); | 1842 | xfslogd_workqueue = create_workqueue("xfslogd"); |
1900 | if (!xfslogd_workqueue) | 1843 | if (!xfslogd_workqueue) |
1901 | goto out; | 1844 | goto out_free_buf_zone; |
1902 | 1845 | ||
1903 | xfsdatad_workqueue = create_workqueue("xfsdatad"); | 1846 | xfsdatad_workqueue = create_workqueue("xfsdatad"); |
1904 | if (!xfsdatad_workqueue) | 1847 | if (!xfsdatad_workqueue) |
@@ -1909,82 +1852,37 @@ xfs_buf_daemons_start(void) | |||
1909 | error = PTR_ERR(xfsbufd_task); | 1852 | error = PTR_ERR(xfsbufd_task); |
1910 | goto out_destroy_xfsdatad_workqueue; | 1853 | goto out_destroy_xfsdatad_workqueue; |
1911 | } | 1854 | } |
1855 | |||
1856 | pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); | ||
1857 | if (!pagebuf_shake) | ||
1858 | goto out_stop_xfsbufd; | ||
1859 | |||
1912 | return 0; | 1860 | return 0; |
1913 | 1861 | ||
1862 | out_stop_xfsbufd: | ||
1863 | kthread_stop(xfsbufd_task); | ||
1914 | out_destroy_xfsdatad_workqueue: | 1864 | out_destroy_xfsdatad_workqueue: |
1915 | destroy_workqueue(xfsdatad_workqueue); | 1865 | destroy_workqueue(xfsdatad_workqueue); |
1916 | out_destroy_xfslogd_workqueue: | 1866 | out_destroy_xfslogd_workqueue: |
1917 | destroy_workqueue(xfslogd_workqueue); | 1867 | destroy_workqueue(xfslogd_workqueue); |
1918 | out: | ||
1919 | return error; | ||
1920 | } | ||
1921 | |||
1922 | /* | ||
1923 | * Note: do not mark as __exit, it is called from pagebuf_terminate. | ||
1924 | */ | ||
1925 | STATIC void | ||
1926 | xfs_buf_daemons_stop(void) | ||
1927 | { | ||
1928 | kthread_stop(xfsbufd_task); | ||
1929 | destroy_workqueue(xfslogd_workqueue); | ||
1930 | destroy_workqueue(xfsdatad_workqueue); | ||
1931 | } | ||
1932 | |||
1933 | /* | ||
1934 | * Initialization and Termination | ||
1935 | */ | ||
1936 | |||
1937 | int __init | ||
1938 | pagebuf_init(void) | ||
1939 | { | ||
1940 | int error = -ENOMEM; | ||
1941 | |||
1942 | pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); | ||
1943 | if (!pagebuf_zone) | ||
1944 | goto out; | ||
1945 | |||
1946 | #ifdef PAGEBUF_TRACE | ||
1947 | pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); | ||
1948 | #endif | ||
1949 | |||
1950 | error = xfs_buf_daemons_start(); | ||
1951 | if (error) | ||
1952 | goto out_free_buf_zone; | ||
1953 | |||
1954 | pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); | ||
1955 | if (!pagebuf_shake) { | ||
1956 | error = -ENOMEM; | ||
1957 | goto out_stop_daemons; | ||
1958 | } | ||
1959 | |||
1960 | return 0; | ||
1961 | |||
1962 | out_stop_daemons: | ||
1963 | xfs_buf_daemons_stop(); | ||
1964 | out_free_buf_zone: | 1868 | out_free_buf_zone: |
1869 | kmem_zone_destroy(pagebuf_zone); | ||
1870 | out_free_trace_buf: | ||
1965 | #ifdef PAGEBUF_TRACE | 1871 | #ifdef PAGEBUF_TRACE |
1966 | ktrace_free(pagebuf_trace_buf); | 1872 | ktrace_free(pagebuf_trace_buf); |
1967 | #endif | 1873 | #endif |
1968 | kmem_zone_destroy(pagebuf_zone); | ||
1969 | out: | ||
1970 | return error; | 1874 | return error; |
1971 | } | 1875 | } |
1972 | 1876 | ||
1973 | |||
1974 | /* | ||
1975 | * pagebuf_terminate. | ||
1976 | * | ||
1977 | * Note: do not mark as __exit, this is also called from the __init code. | ||
1978 | */ | ||
1979 | void | 1877 | void |
1980 | pagebuf_terminate(void) | 1878 | pagebuf_terminate(void) |
1981 | { | 1879 | { |
1982 | xfs_buf_daemons_stop(); | 1880 | kmem_shake_deregister(pagebuf_shake); |
1983 | 1881 | kthread_stop(xfsbufd_task); | |
1882 | destroy_workqueue(xfsdatad_workqueue); | ||
1883 | destroy_workqueue(xfslogd_workqueue); | ||
1884 | kmem_zone_destroy(pagebuf_zone); | ||
1984 | #ifdef PAGEBUF_TRACE | 1885 | #ifdef PAGEBUF_TRACE |
1985 | ktrace_free(pagebuf_trace_buf); | 1886 | ktrace_free(pagebuf_trace_buf); |
1986 | #endif | 1887 | #endif |
1987 | |||
1988 | kmem_zone_destroy(pagebuf_zone); | ||
1989 | kmem_shake_deregister(pagebuf_shake); | ||
1990 | } | 1888 | } |