aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c218
1 files changed, 58 insertions, 160 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4cd46abe8434..6fe21d2b8847 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1,46 +1,20 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or
5 * under the terms of version 2 of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
7 * 8 *
8 * This program is distributed in the hope that it would be useful, but 9 * This program is distributed in the hope that it would be useful,
9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * 12 * GNU General Public License for more details.
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33/*
34 * The xfs_buf.c code provides an abstract buffer cache model on top
35 * of the Linux page cache. Cached metadata blocks for a file system
36 * are hashed to the inode for the block device. xfs_buf.c assembles
37 * buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
38 *
39 * Written by Steve Lord, Jim Mostek, Russell Cattelan
40 * and Rajagopal Ananthanarayanan ("ananth") at SGI.
41 * 13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
42 */ 17 */
43
44#include <linux/stddef.h> 18#include <linux/stddef.h>
45#include <linux/errno.h> 19#include <linux/errno.h>
46#include <linux/slab.h> 20#include <linux/slab.h>
@@ -55,13 +29,8 @@
55#include <linux/blkdev.h> 29#include <linux/blkdev.h>
56#include <linux/hash.h> 30#include <linux/hash.h>
57#include <linux/kthread.h> 31#include <linux/kthread.h>
58
59#include "xfs_linux.h" 32#include "xfs_linux.h"
60 33
61/*
62 * File wide globals
63 */
64
65STATIC kmem_cache_t *pagebuf_zone; 34STATIC kmem_cache_t *pagebuf_zone;
66STATIC kmem_shaker_t pagebuf_shake; 35STATIC kmem_shaker_t pagebuf_shake;
67STATIC int xfsbufd_wakeup(int, gfp_t); 36STATIC int xfsbufd_wakeup(int, gfp_t);
@@ -70,10 +39,6 @@ STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
70STATIC struct workqueue_struct *xfslogd_workqueue; 39STATIC struct workqueue_struct *xfslogd_workqueue;
71struct workqueue_struct *xfsdatad_workqueue; 40struct workqueue_struct *xfsdatad_workqueue;
72 41
73/*
74 * Pagebuf debugging
75 */
76
77#ifdef PAGEBUF_TRACE 42#ifdef PAGEBUF_TRACE
78void 43void
79pagebuf_trace( 44pagebuf_trace(
@@ -112,10 +77,6 @@ ktrace_t *pagebuf_trace_buf;
112# define PB_GET_OWNER(pb) do { } while (0) 77# define PB_GET_OWNER(pb) do { } while (0)
113#endif 78#endif
114 79
115/*
116 * Pagebuf allocation / freeing.
117 */
118
119#define pb_to_gfp(flags) \ 80#define pb_to_gfp(flags) \
120 ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ 81 ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
121 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) 82 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
@@ -123,7 +84,6 @@ ktrace_t *pagebuf_trace_buf;
123#define pb_to_km(flags) \ 84#define pb_to_km(flags) \
124 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) 85 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
125 86
126
127#define pagebuf_allocate(flags) \ 87#define pagebuf_allocate(flags) \
128 kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) 88 kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))
129#define pagebuf_deallocate(pb) \ 89#define pagebuf_deallocate(pb) \
@@ -286,7 +246,7 @@ _pagebuf_initialize(
286 * most cases but may be reset (e.g. XFS recovery). 246 * most cases but may be reset (e.g. XFS recovery).
287 */ 247 */
288 pb->pb_buffer_length = pb->pb_count_desired = range_length; 248 pb->pb_buffer_length = pb->pb_count_desired = range_length;
289 pb->pb_flags = flags | PBF_NONE; 249 pb->pb_flags = flags;
290 pb->pb_bn = XFS_BUF_DADDR_NULL; 250 pb->pb_bn = XFS_BUF_DADDR_NULL;
291 atomic_set(&pb->pb_pin_count, 0); 251 atomic_set(&pb->pb_pin_count, 0);
292 init_waitqueue_head(&pb->pb_waiters); 252 init_waitqueue_head(&pb->pb_waiters);
@@ -458,14 +418,8 @@ _pagebuf_lookup_pages(
458 unlock_page(bp->pb_pages[i]); 418 unlock_page(bp->pb_pages[i]);
459 } 419 }
460 420
461 if (page_count) { 421 if (page_count == bp->pb_page_count)
462 /* if we have any uptodate pages, mark that in the buffer */ 422 bp->pb_flags |= PBF_DONE;
463 bp->pb_flags &= ~PBF_NONE;
464
465 /* if some pages aren't uptodate, mark that in the buffer */
466 if (page_count != bp->pb_page_count)
467 bp->pb_flags |= PBF_PARTIAL;
468 }
469 423
470 PB_TRACE(bp, "lookup_pages", (long)page_count); 424 PB_TRACE(bp, "lookup_pages", (long)page_count);
471 return error; 425 return error;
@@ -676,7 +630,7 @@ xfs_buf_read_flags(
676 630
677 pb = xfs_buf_get_flags(target, ioff, isize, flags); 631 pb = xfs_buf_get_flags(target, ioff, isize, flags);
678 if (pb) { 632 if (pb) {
679 if (PBF_NOT_DONE(pb)) { 633 if (!XFS_BUF_ISDONE(pb)) {
680 PB_TRACE(pb, "read", (unsigned long)flags); 634 PB_TRACE(pb, "read", (unsigned long)flags);
681 XFS_STATS_INC(pb_get_read); 635 XFS_STATS_INC(pb_get_read);
682 pagebuf_iostart(pb, flags); 636 pagebuf_iostart(pb, flags);
@@ -813,7 +767,7 @@ pagebuf_get_no_daddr(
813 bp = pagebuf_allocate(0); 767 bp = pagebuf_allocate(0);
814 if (unlikely(bp == NULL)) 768 if (unlikely(bp == NULL))
815 goto fail; 769 goto fail;
816 _pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO); 770 _pagebuf_initialize(bp, target, 0, len, 0);
817 771
818 try_again: 772 try_again:
819 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); 773 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
@@ -876,39 +830,18 @@ pagebuf_rele(
876 830
877 PB_TRACE(pb, "rele", pb->pb_relse); 831 PB_TRACE(pb, "rele", pb->pb_relse);
878 832
879 /*
880 * pagebuf_lookup buffers are not hashed, not delayed write,
881 * and don't have their own release routines. Special case.
882 */
883 if (unlikely(!hash)) {
884 ASSERT(!pb->pb_relse);
885 if (atomic_dec_and_test(&pb->pb_hold))
886 xfs_buf_free(pb);
887 return;
888 }
889
890 if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { 833 if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
891 int do_free = 1;
892
893 if (pb->pb_relse) { 834 if (pb->pb_relse) {
894 atomic_inc(&pb->pb_hold); 835 atomic_inc(&pb->pb_hold);
895 spin_unlock(&hash->bh_lock); 836 spin_unlock(&hash->bh_lock);
896 (*(pb->pb_relse)) (pb); 837 (*(pb->pb_relse)) (pb);
897 spin_lock(&hash->bh_lock); 838 } else if (pb->pb_flags & PBF_FS_MANAGED) {
898 do_free = 0;
899 }
900
901 if (pb->pb_flags & PBF_FS_MANAGED) {
902 do_free = 0;
903 }
904
905 if (do_free) {
906 ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0);
907 list_del_init(&pb->pb_hash_list);
908 spin_unlock(&hash->bh_lock); 839 spin_unlock(&hash->bh_lock);
909 pagebuf_free(pb);
910 } else { 840 } else {
841 ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)));
842 list_del_init(&pb->pb_hash_list);
911 spin_unlock(&hash->bh_lock); 843 spin_unlock(&hash->bh_lock);
844 pagebuf_free(pb);
912 } 845 }
913 } else { 846 } else {
914 /* 847 /*
@@ -1121,21 +1054,18 @@ pagebuf_iodone_work(
1121void 1054void
1122pagebuf_iodone( 1055pagebuf_iodone(
1123 xfs_buf_t *pb, 1056 xfs_buf_t *pb,
1124 int dataio,
1125 int schedule) 1057 int schedule)
1126{ 1058{
1127 pb->pb_flags &= ~(PBF_READ | PBF_WRITE); 1059 pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
1128 if (pb->pb_error == 0) { 1060 if (pb->pb_error == 0)
1129 pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE); 1061 pb->pb_flags |= PBF_DONE;
1130 }
1131 1062
1132 PB_TRACE(pb, "iodone", pb->pb_iodone); 1063 PB_TRACE(pb, "iodone", pb->pb_iodone);
1133 1064
1134 if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { 1065 if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
1135 if (schedule) { 1066 if (schedule) {
1136 INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); 1067 INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
1137 queue_work(dataio ? xfsdatad_workqueue : 1068 queue_work(xfslogd_workqueue, &pb->pb_iodone_work);
1138 xfslogd_workqueue, &pb->pb_iodone_work);
1139 } else { 1069 } else {
1140 pagebuf_iodone_work(pb); 1070 pagebuf_iodone_work(pb);
1141 } 1071 }
@@ -1235,7 +1165,7 @@ _pagebuf_iodone(
1235{ 1165{
1236 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 1166 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
1237 pb->pb_locked = 0; 1167 pb->pb_locked = 0;
1238 pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule); 1168 pagebuf_iodone(pb, schedule);
1239 } 1169 }
1240} 1170}
1241 1171
@@ -1304,6 +1234,11 @@ _pagebuf_ioapply(
1304 rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; 1234 rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
1305 } 1235 }
1306 1236
1237 if (pb->pb_flags & PBF_ORDERED) {
1238 ASSERT(!(pb->pb_flags & PBF_READ));
1239 rw = WRITE_BARRIER;
1240 }
1241
1307 /* Special code path for reading a sub page size pagebuf in -- 1242 /* Special code path for reading a sub page size pagebuf in --
1308 * we populate up the whole page, and hence the other metadata 1243 * we populate up the whole page, and hence the other metadata
1309 * in the same page. This optimization is only valid when the 1244 * in the same page. This optimization is only valid when the
@@ -1750,8 +1685,8 @@ STATIC int xfsbufd_force_sleep;
1750 1685
1751STATIC int 1686STATIC int
1752xfsbufd_wakeup( 1687xfsbufd_wakeup(
1753 int priority, 1688 int priority,
1754 gfp_t mask) 1689 gfp_t mask)
1755{ 1690{
1756 if (xfsbufd_force_sleep) 1691 if (xfsbufd_force_sleep)
1757 return 0; 1692 return 0;
@@ -1781,8 +1716,8 @@ xfsbufd(
1781 xfsbufd_force_sleep = 0; 1716 xfsbufd_force_sleep = 0;
1782 } 1717 }
1783 1718
1784 schedule_timeout_interruptible 1719 schedule_timeout_interruptible(
1785 (xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1720 xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1786 1721
1787 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1722 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1788 spin_lock(&pbd_delwrite_lock); 1723 spin_lock(&pbd_delwrite_lock);
@@ -1891,14 +1826,22 @@ xfs_flush_buftarg(
1891 return pincount; 1826 return pincount;
1892} 1827}
1893 1828
1894STATIC int 1829int __init
1895xfs_buf_daemons_start(void) 1830pagebuf_init(void)
1896{ 1831{
1897 int error = -ENOMEM; 1832 int error = -ENOMEM;
1898 1833
1834#ifdef PAGEBUF_TRACE
1835 pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
1836#endif
1837
1838 pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
1839 if (!pagebuf_zone)
1840 goto out_free_trace_buf;
1841
1899 xfslogd_workqueue = create_workqueue("xfslogd"); 1842 xfslogd_workqueue = create_workqueue("xfslogd");
1900 if (!xfslogd_workqueue) 1843 if (!xfslogd_workqueue)
1901 goto out; 1844 goto out_free_buf_zone;
1902 1845
1903 xfsdatad_workqueue = create_workqueue("xfsdatad"); 1846 xfsdatad_workqueue = create_workqueue("xfsdatad");
1904 if (!xfsdatad_workqueue) 1847 if (!xfsdatad_workqueue)
@@ -1909,82 +1852,37 @@ xfs_buf_daemons_start(void)
1909 error = PTR_ERR(xfsbufd_task); 1852 error = PTR_ERR(xfsbufd_task);
1910 goto out_destroy_xfsdatad_workqueue; 1853 goto out_destroy_xfsdatad_workqueue;
1911 } 1854 }
1855
1856 pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
1857 if (!pagebuf_shake)
1858 goto out_stop_xfsbufd;
1859
1912 return 0; 1860 return 0;
1913 1861
1862 out_stop_xfsbufd:
1863 kthread_stop(xfsbufd_task);
1914 out_destroy_xfsdatad_workqueue: 1864 out_destroy_xfsdatad_workqueue:
1915 destroy_workqueue(xfsdatad_workqueue); 1865 destroy_workqueue(xfsdatad_workqueue);
1916 out_destroy_xfslogd_workqueue: 1866 out_destroy_xfslogd_workqueue:
1917 destroy_workqueue(xfslogd_workqueue); 1867 destroy_workqueue(xfslogd_workqueue);
1918 out:
1919 return error;
1920}
1921
1922/*
1923 * Note: do not mark as __exit, it is called from pagebuf_terminate.
1924 */
1925STATIC void
1926xfs_buf_daemons_stop(void)
1927{
1928 kthread_stop(xfsbufd_task);
1929 destroy_workqueue(xfslogd_workqueue);
1930 destroy_workqueue(xfsdatad_workqueue);
1931}
1932
1933/*
1934 * Initialization and Termination
1935 */
1936
1937int __init
1938pagebuf_init(void)
1939{
1940 int error = -ENOMEM;
1941
1942 pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
1943 if (!pagebuf_zone)
1944 goto out;
1945
1946#ifdef PAGEBUF_TRACE
1947 pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
1948#endif
1949
1950 error = xfs_buf_daemons_start();
1951 if (error)
1952 goto out_free_buf_zone;
1953
1954 pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
1955 if (!pagebuf_shake) {
1956 error = -ENOMEM;
1957 goto out_stop_daemons;
1958 }
1959
1960 return 0;
1961
1962 out_stop_daemons:
1963 xfs_buf_daemons_stop();
1964 out_free_buf_zone: 1868 out_free_buf_zone:
1869 kmem_zone_destroy(pagebuf_zone);
1870 out_free_trace_buf:
1965#ifdef PAGEBUF_TRACE 1871#ifdef PAGEBUF_TRACE
1966 ktrace_free(pagebuf_trace_buf); 1872 ktrace_free(pagebuf_trace_buf);
1967#endif 1873#endif
1968 kmem_zone_destroy(pagebuf_zone);
1969 out:
1970 return error; 1874 return error;
1971} 1875}
1972 1876
1973
1974/*
1975 * pagebuf_terminate.
1976 *
1977 * Note: do not mark as __exit, this is also called from the __init code.
1978 */
1979void 1877void
1980pagebuf_terminate(void) 1878pagebuf_terminate(void)
1981{ 1879{
1982 xfs_buf_daemons_stop(); 1880 kmem_shake_deregister(pagebuf_shake);
1983 1881 kthread_stop(xfsbufd_task);
1882 destroy_workqueue(xfsdatad_workqueue);
1883 destroy_workqueue(xfslogd_workqueue);
1884 kmem_zone_destroy(pagebuf_zone);
1984#ifdef PAGEBUF_TRACE 1885#ifdef PAGEBUF_TRACE
1985 ktrace_free(pagebuf_trace_buf); 1886 ktrace_free(pagebuf_trace_buf);
1986#endif 1887#endif
1987
1988 kmem_zone_destroy(pagebuf_zone);
1989 kmem_shake_deregister(pagebuf_shake);
1990} 1888}