diff options
author | Dave Chinner <david@fromorbit.com> | 2014-05-14 19:36:59 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2014-05-14 19:36:59 -0400 |
commit | 232c2f5c65dd80055e7163a5c82e3816119330e6 (patch) | |
tree | e103eaf14b6c9d4b24f9a33a0d3d218b3a76bbac | |
parent | fdd3a2ae2e48310491e605c46201c95629dd450b (diff) | |
parent | b94acd4786dce4379e986e6d58bdd74f8986af2f (diff) |
Merge branch 'xfs-filestreams-lookup' into for-next
-rw-r--r-- | fs/xfs/xfs_bmap.c | 202 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.c | 684 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.h | 34 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 35 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_mru_cache.c | 151 | ||||
-rw-r--r-- | fs/xfs/xfs_mru_cache.h | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 58 |
10 files changed, 403 insertions, 806 deletions
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 37b8d3c66745..1ff0da6e2bf9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -3515,6 +3515,67 @@ xfs_bmap_adjacent( | |||
3515 | #undef ISVALID | 3515 | #undef ISVALID |
3516 | } | 3516 | } |
3517 | 3517 | ||
3518 | static int | ||
3519 | xfs_bmap_longest_free_extent( | ||
3520 | struct xfs_trans *tp, | ||
3521 | xfs_agnumber_t ag, | ||
3522 | xfs_extlen_t *blen, | ||
3523 | int *notinit) | ||
3524 | { | ||
3525 | struct xfs_mount *mp = tp->t_mountp; | ||
3526 | struct xfs_perag *pag; | ||
3527 | xfs_extlen_t longest; | ||
3528 | int error = 0; | ||
3529 | |||
3530 | pag = xfs_perag_get(mp, ag); | ||
3531 | if (!pag->pagf_init) { | ||
3532 | error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); | ||
3533 | if (error) | ||
3534 | goto out; | ||
3535 | |||
3536 | if (!pag->pagf_init) { | ||
3537 | *notinit = 1; | ||
3538 | goto out; | ||
3539 | } | ||
3540 | } | ||
3541 | |||
3542 | longest = xfs_alloc_longest_free_extent(mp, pag); | ||
3543 | if (*blen < longest) | ||
3544 | *blen = longest; | ||
3545 | |||
3546 | out: | ||
3547 | xfs_perag_put(pag); | ||
3548 | return error; | ||
3549 | } | ||
3550 | |||
3551 | static void | ||
3552 | xfs_bmap_select_minlen( | ||
3553 | struct xfs_bmalloca *ap, | ||
3554 | struct xfs_alloc_arg *args, | ||
3555 | xfs_extlen_t *blen, | ||
3556 | int notinit) | ||
3557 | { | ||
3558 | if (notinit || *blen < ap->minlen) { | ||
3559 | /* | ||
3560 | * Since we did a BUF_TRYLOCK above, it is possible that | ||
3561 | * there is space for this request. | ||
3562 | */ | ||
3563 | args->minlen = ap->minlen; | ||
3564 | } else if (*blen < args->maxlen) { | ||
3565 | /* | ||
3566 | * If the best seen length is less than the request length, | ||
3567 | * use the best as the minimum. | ||
3568 | */ | ||
3569 | args->minlen = *blen; | ||
3570 | } else { | ||
3571 | /* | ||
3572 | * Otherwise we've seen an extent as big as maxlen, use that | ||
3573 | * as the minimum. | ||
3574 | */ | ||
3575 | args->minlen = args->maxlen; | ||
3576 | } | ||
3577 | } | ||
3578 | |||
3518 | STATIC int | 3579 | STATIC int |
3519 | xfs_bmap_btalloc_nullfb( | 3580 | xfs_bmap_btalloc_nullfb( |
3520 | struct xfs_bmalloca *ap, | 3581 | struct xfs_bmalloca *ap, |
@@ -3522,111 +3583,74 @@ xfs_bmap_btalloc_nullfb( | |||
3522 | xfs_extlen_t *blen) | 3583 | xfs_extlen_t *blen) |
3523 | { | 3584 | { |
3524 | struct xfs_mount *mp = ap->ip->i_mount; | 3585 | struct xfs_mount *mp = ap->ip->i_mount; |
3525 | struct xfs_perag *pag; | ||
3526 | xfs_agnumber_t ag, startag; | 3586 | xfs_agnumber_t ag, startag; |
3527 | int notinit = 0; | 3587 | int notinit = 0; |
3528 | int error; | 3588 | int error; |
3529 | 3589 | ||
3530 | if (ap->userdata && xfs_inode_is_filestream(ap->ip)) | 3590 | args->type = XFS_ALLOCTYPE_START_BNO; |
3531 | args->type = XFS_ALLOCTYPE_NEAR_BNO; | ||
3532 | else | ||
3533 | args->type = XFS_ALLOCTYPE_START_BNO; | ||
3534 | args->total = ap->total; | 3591 | args->total = ap->total; |
3535 | 3592 | ||
3536 | /* | ||
3537 | * Search for an allocation group with a single extent large enough | ||
3538 | * for the request. If one isn't found, then adjust the minimum | ||
3539 | * allocation size to the largest space found. | ||
3540 | */ | ||
3541 | startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); | 3593 | startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); |
3542 | if (startag == NULLAGNUMBER) | 3594 | if (startag == NULLAGNUMBER) |
3543 | startag = ag = 0; | 3595 | startag = ag = 0; |
3544 | 3596 | ||
3545 | pag = xfs_perag_get(mp, ag); | ||
3546 | while (*blen < args->maxlen) { | 3597 | while (*blen < args->maxlen) { |
3547 | if (!pag->pagf_init) { | 3598 | error = xfs_bmap_longest_free_extent(args->tp, ag, blen, |
3548 | error = xfs_alloc_pagf_init(mp, args->tp, ag, | 3599 | ¬init); |
3549 | XFS_ALLOC_FLAG_TRYLOCK); | 3600 | if (error) |
3550 | if (error) { | 3601 | return error; |
3551 | xfs_perag_put(pag); | ||
3552 | return error; | ||
3553 | } | ||
3554 | } | ||
3555 | |||
3556 | /* | ||
3557 | * See xfs_alloc_fix_freelist... | ||
3558 | */ | ||
3559 | if (pag->pagf_init) { | ||
3560 | xfs_extlen_t longest; | ||
3561 | longest = xfs_alloc_longest_free_extent(mp, pag); | ||
3562 | if (*blen < longest) | ||
3563 | *blen = longest; | ||
3564 | } else | ||
3565 | notinit = 1; | ||
3566 | |||
3567 | if (xfs_inode_is_filestream(ap->ip)) { | ||
3568 | if (*blen >= args->maxlen) | ||
3569 | break; | ||
3570 | |||
3571 | if (ap->userdata) { | ||
3572 | /* | ||
3573 | * If startag is an invalid AG, we've | ||
3574 | * come here once before and | ||
3575 | * xfs_filestream_new_ag picked the | ||
3576 | * best currently available. | ||
3577 | * | ||
3578 | * Don't continue looping, since we | ||
3579 | * could loop forever. | ||
3580 | */ | ||
3581 | if (startag == NULLAGNUMBER) | ||
3582 | break; | ||
3583 | |||
3584 | error = xfs_filestream_new_ag(ap, &ag); | ||
3585 | xfs_perag_put(pag); | ||
3586 | if (error) | ||
3587 | return error; | ||
3588 | 3602 | ||
3589 | /* loop again to set 'blen'*/ | ||
3590 | startag = NULLAGNUMBER; | ||
3591 | pag = xfs_perag_get(mp, ag); | ||
3592 | continue; | ||
3593 | } | ||
3594 | } | ||
3595 | if (++ag == mp->m_sb.sb_agcount) | 3603 | if (++ag == mp->m_sb.sb_agcount) |
3596 | ag = 0; | 3604 | ag = 0; |
3597 | if (ag == startag) | 3605 | if (ag == startag) |
3598 | break; | 3606 | break; |
3599 | xfs_perag_put(pag); | ||
3600 | pag = xfs_perag_get(mp, ag); | ||
3601 | } | 3607 | } |
3602 | xfs_perag_put(pag); | ||
3603 | 3608 | ||
3604 | /* | 3609 | xfs_bmap_select_minlen(ap, args, blen, notinit); |
3605 | * Since the above loop did a BUF_TRYLOCK, it is | 3610 | return 0; |
3606 | * possible that there is space for this request. | 3611 | } |
3607 | */ | 3612 | |
3608 | if (notinit || *blen < ap->minlen) | 3613 | STATIC int |
3609 | args->minlen = ap->minlen; | 3614 | xfs_bmap_btalloc_filestreams( |
3610 | /* | 3615 | struct xfs_bmalloca *ap, |
3611 | * If the best seen length is less than the request | 3616 | struct xfs_alloc_arg *args, |
3612 | * length, use the best as the minimum. | 3617 | xfs_extlen_t *blen) |
3613 | */ | 3618 | { |
3614 | else if (*blen < args->maxlen) | 3619 | struct xfs_mount *mp = ap->ip->i_mount; |
3615 | args->minlen = *blen; | 3620 | xfs_agnumber_t ag; |
3616 | /* | 3621 | int notinit = 0; |
3617 | * Otherwise we've seen an extent as big as maxlen, | 3622 | int error; |
3618 | * use that as the minimum. | 3623 | |
3619 | */ | 3624 | args->type = XFS_ALLOCTYPE_NEAR_BNO; |
3620 | else | 3625 | args->total = ap->total; |
3621 | args->minlen = args->maxlen; | 3626 | |
3627 | ag = XFS_FSB_TO_AGNO(mp, args->fsbno); | ||
3628 | if (ag == NULLAGNUMBER) | ||
3629 | ag = 0; | ||
3630 | |||
3631 | error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); | ||
3632 | if (error) | ||
3633 | return error; | ||
3634 | |||
3635 | if (*blen < args->maxlen) { | ||
3636 | error = xfs_filestream_new_ag(ap, &ag); | ||
3637 | if (error) | ||
3638 | return error; | ||
3639 | |||
3640 | error = xfs_bmap_longest_free_extent(args->tp, ag, blen, | ||
3641 | ¬init); | ||
3642 | if (error) | ||
3643 | return error; | ||
3644 | |||
3645 | } | ||
3646 | |||
3647 | xfs_bmap_select_minlen(ap, args, blen, notinit); | ||
3622 | 3648 | ||
3623 | /* | 3649 | /* |
3624 | * set the failure fallback case to look in the selected | 3650 | * Set the failure fallback case to look in the selected AG as stream |
3625 | * AG as the stream may have moved. | 3651 | * may have moved. |
3626 | */ | 3652 | */ |
3627 | if (xfs_inode_is_filestream(ap->ip)) | 3653 | ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); |
3628 | ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); | ||
3629 | |||
3630 | return 0; | 3654 | return 0; |
3631 | } | 3655 | } |
3632 | 3656 | ||
@@ -3706,7 +3730,15 @@ xfs_bmap_btalloc( | |||
3706 | args.firstblock = *ap->firstblock; | 3730 | args.firstblock = *ap->firstblock; |
3707 | blen = 0; | 3731 | blen = 0; |
3708 | if (nullfb) { | 3732 | if (nullfb) { |
3709 | error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); | 3733 | /* |
3734 | * Search for an allocation group with a single extent large | ||
3735 | * enough for the request. If one isn't found, then adjust | ||
3736 | * the minimum allocation size to the largest space found. | ||
3737 | */ | ||
3738 | if (ap->userdata && xfs_inode_is_filestream(ap->ip)) | ||
3739 | error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); | ||
3740 | else | ||
3741 | error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); | ||
3710 | if (error) | 3742 | if (error) |
3711 | return error; | 3743 | return error; |
3712 | } else if (ap->flist->xbf_low) { | 3744 | } else if (ap->flist->xbf_low) { |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 12b6e7701985..8ec81bed7992 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006-2007 Silicon Graphics, Inc. | 2 | * Copyright (c) 2006-2007 Silicon Graphics, Inc. |
3 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | * All Rights Reserved. | 4 | * All Rights Reserved. |
4 | * | 5 | * |
5 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
@@ -32,100 +33,20 @@ | |||
32 | #include "xfs_filestream.h" | 33 | #include "xfs_filestream.h" |
33 | #include "xfs_trace.h" | 34 | #include "xfs_trace.h" |
34 | 35 | ||
35 | #ifdef XFS_FILESTREAMS_TRACE | 36 | struct xfs_fstrm_item { |
36 | 37 | struct xfs_mru_cache_elem mru; | |
37 | ktrace_t *xfs_filestreams_trace_buf; | 38 | struct xfs_inode *ip; |
38 | 39 | xfs_agnumber_t ag; /* AG in use for this directory */ | |
39 | STATIC void | 40 | }; |
40 | xfs_filestreams_trace( | ||
41 | xfs_mount_t *mp, /* mount point */ | ||
42 | int type, /* type of trace */ | ||
43 | const char *func, /* source function */ | ||
44 | int line, /* source line number */ | ||
45 | __psunsigned_t arg0, | ||
46 | __psunsigned_t arg1, | ||
47 | __psunsigned_t arg2, | ||
48 | __psunsigned_t arg3, | ||
49 | __psunsigned_t arg4, | ||
50 | __psunsigned_t arg5) | ||
51 | { | ||
52 | ktrace_enter(xfs_filestreams_trace_buf, | ||
53 | (void *)(__psint_t)(type | (line << 16)), | ||
54 | (void *)func, | ||
55 | (void *)(__psunsigned_t)current_pid(), | ||
56 | (void *)mp, | ||
57 | (void *)(__psunsigned_t)arg0, | ||
58 | (void *)(__psunsigned_t)arg1, | ||
59 | (void *)(__psunsigned_t)arg2, | ||
60 | (void *)(__psunsigned_t)arg3, | ||
61 | (void *)(__psunsigned_t)arg4, | ||
62 | (void *)(__psunsigned_t)arg5, | ||
63 | NULL, NULL, NULL, NULL, NULL, NULL); | ||
64 | } | ||
65 | |||
66 | #define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0) | ||
67 | #define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0) | ||
68 | #define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0) | ||
69 | #define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0) | ||
70 | #define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) | ||
71 | #define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) | ||
72 | #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ | ||
73 | xfs_filestreams_trace(mp, t, __func__, __LINE__, \ | ||
74 | (__psunsigned_t)a0, (__psunsigned_t)a1, \ | ||
75 | (__psunsigned_t)a2, (__psunsigned_t)a3, \ | ||
76 | (__psunsigned_t)a4, (__psunsigned_t)a5) | ||
77 | |||
78 | #define TRACE_AG_SCAN(mp, ag, ag2) \ | ||
79 | TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2); | ||
80 | #define TRACE_AG_PICK1(mp, max_ag, maxfree) \ | ||
81 | TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree); | ||
82 | #define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \ | ||
83 | TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \ | ||
84 | cnt, free, scan, flag) | ||
85 | #define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \ | ||
86 | TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2) | ||
87 | #define TRACE_FREE(mp, ip, pip, ag, cnt) \ | ||
88 | TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt) | ||
89 | #define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \ | ||
90 | TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt) | ||
91 | #define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \ | ||
92 | TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt) | ||
93 | #define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \ | ||
94 | TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt) | ||
95 | #define TRACE_ORPHAN(mp, ip, ag) \ | ||
96 | TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag); | ||
97 | |||
98 | |||
99 | #else | ||
100 | #define TRACE_AG_SCAN(mp, ag, ag2) | ||
101 | #define TRACE_AG_PICK1(mp, max_ag, maxfree) | ||
102 | #define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) | ||
103 | #define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) | ||
104 | #define TRACE_FREE(mp, ip, pip, ag, cnt) | ||
105 | #define TRACE_LOOKUP(mp, ip, pip, ag, cnt) | ||
106 | #define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) | ||
107 | #define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) | ||
108 | #define TRACE_ORPHAN(mp, ip, ag) | ||
109 | #endif | ||
110 | |||
111 | static kmem_zone_t *item_zone; | ||
112 | 41 | ||
113 | /* | 42 | enum xfs_fstrm_alloc { |
114 | * Structure for associating a file or a directory with an allocation group. | 43 | XFS_PICK_USERDATA = 1, |
115 | * The parent directory pointer is only needed for files, but since there will | 44 | XFS_PICK_LOWSPACE = 2, |
116 | * generally be vastly more files than directories in the cache, using the same | 45 | }; |
117 | * data structure simplifies the code with very little memory overhead. | ||
118 | */ | ||
119 | typedef struct fstrm_item | ||
120 | { | ||
121 | xfs_agnumber_t ag; /* AG currently in use for the file/directory. */ | ||
122 | xfs_inode_t *ip; /* inode self-pointer. */ | ||
123 | xfs_inode_t *pip; /* Parent directory inode pointer. */ | ||
124 | } fstrm_item_t; | ||
125 | 46 | ||
126 | /* | 47 | /* |
127 | * Allocation group filestream associations are tracked with per-ag atomic | 48 | * Allocation group filestream associations are tracked with per-ag atomic |
128 | * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a | 49 | * counters. These counters allow xfs_filestream_pick_ag() to tell whether a |
129 | * particular AG already has active filestreams associated with it. The mount | 50 | * particular AG already has active filestreams associated with it. The mount |
130 | * point's m_peraglock is used to protect these counters from per-ag array | 51 | * point's m_peraglock is used to protect these counters from per-ag array |
131 | * re-allocation during a growfs operation. When xfs_growfs_data_private() is | 52 | * re-allocation during a growfs operation. When xfs_growfs_data_private() is |
@@ -160,7 +81,7 @@ typedef struct fstrm_item | |||
160 | * the cache that reference per-ag array elements that have since been | 81 | * the cache that reference per-ag array elements that have since been |
161 | * reallocated. | 82 | * reallocated. |
162 | */ | 83 | */ |
163 | static int | 84 | int |
164 | xfs_filestream_peek_ag( | 85 | xfs_filestream_peek_ag( |
165 | xfs_mount_t *mp, | 86 | xfs_mount_t *mp, |
166 | xfs_agnumber_t agno) | 87 | xfs_agnumber_t agno) |
@@ -200,23 +121,40 @@ xfs_filestream_put_ag( | |||
200 | xfs_perag_put(pag); | 121 | xfs_perag_put(pag); |
201 | } | 122 | } |
202 | 123 | ||
124 | static void | ||
125 | xfs_fstrm_free_func( | ||
126 | struct xfs_mru_cache_elem *mru) | ||
127 | { | ||
128 | struct xfs_fstrm_item *item = | ||
129 | container_of(mru, struct xfs_fstrm_item, mru); | ||
130 | |||
131 | xfs_filestream_put_ag(item->ip->i_mount, item->ag); | ||
132 | |||
133 | trace_xfs_filestream_free(item->ip, item->ag); | ||
134 | |||
135 | kmem_free(item); | ||
136 | } | ||
137 | |||
203 | /* | 138 | /* |
204 | * Scan the AGs starting at startag looking for an AG that isn't in use and has | 139 | * Scan the AGs starting at startag looking for an AG that isn't in use and has |
205 | * at least minlen blocks free. | 140 | * at least minlen blocks free. |
206 | */ | 141 | */ |
207 | static int | 142 | static int |
208 | _xfs_filestream_pick_ag( | 143 | xfs_filestream_pick_ag( |
209 | xfs_mount_t *mp, | 144 | struct xfs_inode *ip, |
210 | xfs_agnumber_t startag, | 145 | xfs_agnumber_t startag, |
211 | xfs_agnumber_t *agp, | 146 | xfs_agnumber_t *agp, |
212 | int flags, | 147 | int flags, |
213 | xfs_extlen_t minlen) | 148 | xfs_extlen_t minlen) |
214 | { | 149 | { |
215 | int streams, max_streams; | 150 | struct xfs_mount *mp = ip->i_mount; |
216 | int err, trylock, nscan; | 151 | struct xfs_fstrm_item *item; |
217 | xfs_extlen_t longest, free, minfree, maxfree = 0; | 152 | struct xfs_perag *pag; |
218 | xfs_agnumber_t ag, max_ag = NULLAGNUMBER; | 153 | xfs_extlen_t longest, free = 0, minfree, maxfree = 0; |
219 | struct xfs_perag *pag; | 154 | xfs_agnumber_t ag, max_ag = NULLAGNUMBER; |
155 | int err, trylock, nscan; | ||
156 | |||
157 | ASSERT(S_ISDIR(ip->i_d.di_mode)); | ||
220 | 158 | ||
221 | /* 2% of an AG's blocks must be free for it to be chosen. */ | 159 | /* 2% of an AG's blocks must be free for it to be chosen. */ |
222 | minfree = mp->m_sb.sb_agblocks / 50; | 160 | minfree = mp->m_sb.sb_agblocks / 50; |
@@ -228,8 +166,9 @@ _xfs_filestream_pick_ag( | |||
228 | trylock = XFS_ALLOC_FLAG_TRYLOCK; | 166 | trylock = XFS_ALLOC_FLAG_TRYLOCK; |
229 | 167 | ||
230 | for (nscan = 0; 1; nscan++) { | 168 | for (nscan = 0; 1; nscan++) { |
169 | trace_xfs_filestream_scan(ip, ag); | ||
170 | |||
231 | pag = xfs_perag_get(mp, ag); | 171 | pag = xfs_perag_get(mp, ag); |
232 | TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms)); | ||
233 | 172 | ||
234 | if (!pag->pagf_init) { | 173 | if (!pag->pagf_init) { |
235 | err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); | 174 | err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); |
@@ -246,7 +185,6 @@ _xfs_filestream_pick_ag( | |||
246 | /* Keep track of the AG with the most free blocks. */ | 185 | /* Keep track of the AG with the most free blocks. */ |
247 | if (pag->pagf_freeblks > maxfree) { | 186 | if (pag->pagf_freeblks > maxfree) { |
248 | maxfree = pag->pagf_freeblks; | 187 | maxfree = pag->pagf_freeblks; |
249 | max_streams = atomic_read(&pag->pagf_fstrms); | ||
250 | max_ag = ag; | 188 | max_ag = ag; |
251 | } | 189 | } |
252 | 190 | ||
@@ -269,7 +207,6 @@ _xfs_filestream_pick_ag( | |||
269 | 207 | ||
270 | /* Break out, retaining the reference on the AG. */ | 208 | /* Break out, retaining the reference on the AG. */ |
271 | free = pag->pagf_freeblks; | 209 | free = pag->pagf_freeblks; |
272 | streams = atomic_read(&pag->pagf_fstrms); | ||
273 | xfs_perag_put(pag); | 210 | xfs_perag_put(pag); |
274 | *agp = ag; | 211 | *agp = ag; |
275 | break; | 212 | break; |
@@ -305,317 +242,98 @@ next_ag: | |||
305 | */ | 242 | */ |
306 | if (max_ag != NULLAGNUMBER) { | 243 | if (max_ag != NULLAGNUMBER) { |
307 | xfs_filestream_get_ag(mp, max_ag); | 244 | xfs_filestream_get_ag(mp, max_ag); |
308 | TRACE_AG_PICK1(mp, max_ag, maxfree); | ||
309 | streams = max_streams; | ||
310 | free = maxfree; | 245 | free = maxfree; |
311 | *agp = max_ag; | 246 | *agp = max_ag; |
312 | break; | 247 | break; |
313 | } | 248 | } |
314 | 249 | ||
315 | /* take AG 0 if none matched */ | 250 | /* take AG 0 if none matched */ |
316 | TRACE_AG_PICK1(mp, max_ag, maxfree); | 251 | trace_xfs_filestream_pick(ip, *agp, free, nscan); |
317 | *agp = 0; | 252 | *agp = 0; |
318 | return 0; | 253 | return 0; |
319 | } | 254 | } |
320 | 255 | ||
321 | TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); | 256 | trace_xfs_filestream_pick(ip, *agp, free, nscan); |
322 | |||
323 | return 0; | ||
324 | } | ||
325 | 257 | ||
326 | /* | 258 | if (*agp == NULLAGNUMBER) |
327 | * Set the allocation group number for a file or a directory, updating inode | ||
328 | * references and per-AG references as appropriate. | ||
329 | */ | ||
330 | static int | ||
331 | _xfs_filestream_update_ag( | ||
332 | xfs_inode_t *ip, | ||
333 | xfs_inode_t *pip, | ||
334 | xfs_agnumber_t ag) | ||
335 | { | ||
336 | int err = 0; | ||
337 | xfs_mount_t *mp; | ||
338 | xfs_mru_cache_t *cache; | ||
339 | fstrm_item_t *item; | ||
340 | xfs_agnumber_t old_ag; | ||
341 | xfs_inode_t *old_pip; | ||
342 | |||
343 | /* | ||
344 | * Either ip is a regular file and pip is a directory, or ip is a | ||
345 | * directory and pip is NULL. | ||
346 | */ | ||
347 | ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip && | ||
348 | S_ISDIR(pip->i_d.di_mode)) || | ||
349 | (S_ISDIR(ip->i_d.di_mode) && !pip))); | ||
350 | |||
351 | mp = ip->i_mount; | ||
352 | cache = mp->m_filestream; | ||
353 | |||
354 | item = xfs_mru_cache_lookup(cache, ip->i_ino); | ||
355 | if (item) { | ||
356 | ASSERT(item->ip == ip); | ||
357 | old_ag = item->ag; | ||
358 | item->ag = ag; | ||
359 | old_pip = item->pip; | ||
360 | item->pip = pip; | ||
361 | xfs_mru_cache_done(cache); | ||
362 | |||
363 | /* | ||
364 | * If the AG has changed, drop the old ref and take a new one, | ||
365 | * effectively transferring the reference from old to new AG. | ||
366 | */ | ||
367 | if (ag != old_ag) { | ||
368 | xfs_filestream_put_ag(mp, old_ag); | ||
369 | xfs_filestream_get_ag(mp, ag); | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * If ip is a file and its pip has changed, drop the old ref and | ||
374 | * take a new one. | ||
375 | */ | ||
376 | if (pip && pip != old_pip) { | ||
377 | IRELE(old_pip); | ||
378 | IHOLD(pip); | ||
379 | } | ||
380 | |||
381 | TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag), | ||
382 | ag, xfs_filestream_peek_ag(mp, ag)); | ||
383 | return 0; | 259 | return 0; |
384 | } | ||
385 | 260 | ||
386 | item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); | 261 | err = ENOMEM; |
262 | item = kmem_alloc(sizeof(*item), KM_MAYFAIL); | ||
387 | if (!item) | 263 | if (!item) |
388 | return ENOMEM; | 264 | goto out_put_ag; |
389 | 265 | ||
390 | item->ag = ag; | 266 | item->ag = *agp; |
391 | item->ip = ip; | 267 | item->ip = ip; |
392 | item->pip = pip; | ||
393 | 268 | ||
394 | err = xfs_mru_cache_insert(cache, ip->i_ino, item); | 269 | err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); |
395 | if (err) { | 270 | if (err) { |
396 | kmem_zone_free(item_zone, item); | 271 | if (err == EEXIST) |
397 | return err; | 272 | err = 0; |
273 | goto out_free_item; | ||
398 | } | 274 | } |
399 | 275 | ||
400 | /* Take a reference on the AG. */ | ||
401 | xfs_filestream_get_ag(mp, ag); | ||
402 | |||
403 | /* | ||
404 | * Take a reference on the inode itself regardless of whether it's a | ||
405 | * regular file or a directory. | ||
406 | */ | ||
407 | IHOLD(ip); | ||
408 | |||
409 | /* | ||
410 | * In the case of a regular file, take a reference on the parent inode | ||
411 | * as well to ensure it remains in-core. | ||
412 | */ | ||
413 | if (pip) | ||
414 | IHOLD(pip); | ||
415 | |||
416 | TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag), | ||
417 | ag, xfs_filestream_peek_ag(mp, ag)); | ||
418 | |||
419 | return 0; | 276 | return 0; |
420 | } | ||
421 | |||
422 | /* xfs_fstrm_free_func(): callback for freeing cached stream items. */ | ||
423 | STATIC void | ||
424 | xfs_fstrm_free_func( | ||
425 | unsigned long ino, | ||
426 | void *data) | ||
427 | { | ||
428 | fstrm_item_t *item = (fstrm_item_t *)data; | ||
429 | xfs_inode_t *ip = item->ip; | ||
430 | |||
431 | ASSERT(ip->i_ino == ino); | ||
432 | |||
433 | xfs_iflags_clear(ip, XFS_IFILESTREAM); | ||
434 | |||
435 | /* Drop the reference taken on the AG when the item was added. */ | ||
436 | xfs_filestream_put_ag(ip->i_mount, item->ag); | ||
437 | |||
438 | TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, | ||
439 | xfs_filestream_peek_ag(ip->i_mount, item->ag)); | ||
440 | |||
441 | /* | ||
442 | * _xfs_filestream_update_ag() always takes a reference on the inode | ||
443 | * itself, whether it's a file or a directory. Release it here. | ||
444 | * This can result in the inode being freed and so we must | ||
445 | * not hold any inode locks when freeing filesstreams objects | ||
446 | * otherwise we can deadlock here. | ||
447 | */ | ||
448 | IRELE(ip); | ||
449 | |||
450 | /* | ||
451 | * In the case of a regular file, _xfs_filestream_update_ag() also | ||
452 | * takes a ref on the parent inode to keep it in-core. Release that | ||
453 | * too. | ||
454 | */ | ||
455 | if (item->pip) | ||
456 | IRELE(item->pip); | ||
457 | |||
458 | /* Finally, free the memory allocated for the item. */ | ||
459 | kmem_zone_free(item_zone, item); | ||
460 | } | ||
461 | |||
462 | /* | ||
463 | * xfs_filestream_init() is called at xfs initialisation time to set up the | ||
464 | * memory zone that will be used for filestream data structure allocation. | ||
465 | */ | ||
466 | int | ||
467 | xfs_filestream_init(void) | ||
468 | { | ||
469 | item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); | ||
470 | if (!item_zone) | ||
471 | return -ENOMEM; | ||
472 | |||
473 | return 0; | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * xfs_filestream_uninit() is called at xfs termination time to destroy the | ||
478 | * memory zone that was used for filestream data structure allocation. | ||
479 | */ | ||
480 | void | ||
481 | xfs_filestream_uninit(void) | ||
482 | { | ||
483 | kmem_zone_destroy(item_zone); | ||
484 | } | ||
485 | |||
486 | /* | ||
487 | * xfs_filestream_mount() is called when a file system is mounted with the | ||
488 | * filestream option. It is responsible for allocating the data structures | ||
489 | * needed to track the new file system's file streams. | ||
490 | */ | ||
491 | int | ||
492 | xfs_filestream_mount( | ||
493 | xfs_mount_t *mp) | ||
494 | { | ||
495 | int err; | ||
496 | unsigned int lifetime, grp_count; | ||
497 | |||
498 | /* | ||
499 | * The filestream timer tunable is currently fixed within the range of | ||
500 | * one second to four minutes, with five seconds being the default. The | ||
501 | * group count is somewhat arbitrary, but it'd be nice to adhere to the | ||
502 | * timer tunable to within about 10 percent. This requires at least 10 | ||
503 | * groups. | ||
504 | */ | ||
505 | lifetime = xfs_fstrm_centisecs * 10; | ||
506 | grp_count = 10; | ||
507 | |||
508 | err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, | ||
509 | xfs_fstrm_free_func); | ||
510 | 277 | ||
278 | out_free_item: | ||
279 | kmem_free(item); | ||
280 | out_put_ag: | ||
281 | xfs_filestream_put_ag(mp, *agp); | ||
511 | return err; | 282 | return err; |
512 | } | 283 | } |
513 | 284 | ||
514 | /* | 285 | static struct xfs_inode * |
515 | * xfs_filestream_unmount() is called when a file system that was mounted with | 286 | xfs_filestream_get_parent( |
516 | * the filestream option is unmounted. It drains the data structures created | 287 | struct xfs_inode *ip) |
517 | * to track the file system's file streams and frees all the memory that was | ||
518 | * allocated. | ||
519 | */ | ||
520 | void | ||
521 | xfs_filestream_unmount( | ||
522 | xfs_mount_t *mp) | ||
523 | { | 288 | { |
524 | xfs_mru_cache_destroy(mp->m_filestream); | 289 | struct inode *inode = VFS_I(ip), *dir = NULL; |
525 | } | 290 | struct dentry *dentry, *parent; |
526 | 291 | ||
527 | /* | 292 | dentry = d_find_alias(inode); |
528 | * Return the AG of the filestream the file or directory belongs to, or | 293 | if (!dentry) |
529 | * NULLAGNUMBER otherwise. | 294 | goto out; |
530 | */ | ||
531 | xfs_agnumber_t | ||
532 | xfs_filestream_lookup_ag( | ||
533 | xfs_inode_t *ip) | ||
534 | { | ||
535 | xfs_mru_cache_t *cache; | ||
536 | fstrm_item_t *item; | ||
537 | xfs_agnumber_t ag; | ||
538 | int ref; | ||
539 | |||
540 | if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) { | ||
541 | ASSERT(0); | ||
542 | return NULLAGNUMBER; | ||
543 | } | ||
544 | 295 | ||
545 | cache = ip->i_mount->m_filestream; | 296 | parent = dget_parent(dentry); |
546 | item = xfs_mru_cache_lookup(cache, ip->i_ino); | 297 | if (!parent) |
547 | if (!item) { | 298 | goto out_dput; |
548 | TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0); | ||
549 | return NULLAGNUMBER; | ||
550 | } | ||
551 | 299 | ||
552 | ASSERT(ip == item->ip); | 300 | dir = igrab(parent->d_inode); |
553 | ag = item->ag; | 301 | dput(parent); |
554 | ref = xfs_filestream_peek_ag(ip->i_mount, ag); | ||
555 | xfs_mru_cache_done(cache); | ||
556 | 302 | ||
557 | TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); | 303 | out_dput: |
558 | return ag; | 304 | dput(dentry); |
305 | out: | ||
306 | return dir ? XFS_I(dir) : NULL; | ||
559 | } | 307 | } |
560 | 308 | ||
561 | /* | 309 | /* |
562 | * xfs_filestream_associate() should only be called to associate a regular file | 310 | * Find the right allocation group for a file, either by finding an |
563 | * with its parent directory. Calling it with a child directory isn't | 311 | * existing file stream or creating a new one. |
564 | * appropriate because filestreams don't apply to entire directory hierarchies. | ||
565 | * Creating a file in a child directory of an existing filestream directory | ||
566 | * starts a new filestream with its own allocation group association. | ||
567 | * | 312 | * |
568 | * Returns < 0 on error, 0 if successful association occurred, > 0 if | 313 | * Returns NULLAGNUMBER in case of an error. |
569 | * we failed to get an association because of locking issues. | ||
570 | */ | 314 | */ |
571 | int | 315 | xfs_agnumber_t |
572 | xfs_filestream_associate( | 316 | xfs_filestream_lookup_ag( |
573 | xfs_inode_t *pip, | 317 | struct xfs_inode *ip) |
574 | xfs_inode_t *ip) | ||
575 | { | 318 | { |
576 | xfs_mount_t *mp; | 319 | struct xfs_mount *mp = ip->i_mount; |
577 | xfs_mru_cache_t *cache; | 320 | struct xfs_inode *pip = NULL; |
578 | fstrm_item_t *item; | 321 | xfs_agnumber_t startag, ag = NULLAGNUMBER; |
579 | xfs_agnumber_t ag, rotorstep, startag; | 322 | struct xfs_mru_cache_elem *mru; |
580 | int err = 0; | ||
581 | 323 | ||
582 | ASSERT(S_ISDIR(pip->i_d.di_mode)); | ||
583 | ASSERT(S_ISREG(ip->i_d.di_mode)); | 324 | ASSERT(S_ISREG(ip->i_d.di_mode)); |
584 | if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) | ||
585 | return -EINVAL; | ||
586 | 325 | ||
587 | mp = pip->i_mount; | 326 | pip = xfs_filestream_get_parent(ip); |
588 | cache = mp->m_filestream; | 327 | if (!pip) |
328 | goto out; | ||
589 | 329 | ||
590 | /* | 330 | mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); |
591 | * We have a problem, Houston. | 331 | if (mru) { |
592 | * | 332 | ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; |
593 | * Taking the iolock here violates inode locking order - we already | 333 | xfs_mru_cache_done(mp->m_filestream); |
594 | * hold the ilock. Hence if we block getting this lock we may never | ||
595 | * wake. Unfortunately, that means if we can't get the lock, we're | ||
596 | * screwed in terms of getting a stream association - we can't spin | ||
597 | * waiting for the lock because someone else is waiting on the lock we | ||
598 | * hold and we cannot drop that as we are in a transaction here. | ||
599 | * | ||
600 | * Lucky for us, this inversion is not a problem because it's a | ||
601 | * directory inode that we are trying to lock here. | ||
602 | * | ||
603 | * So, if we can't get the iolock without sleeping then just give up | ||
604 | */ | ||
605 | if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) | ||
606 | return 1; | ||
607 | |||
608 | /* If the parent directory is already in the cache, use its AG. */ | ||
609 | item = xfs_mru_cache_lookup(cache, pip->i_ino); | ||
610 | if (item) { | ||
611 | ASSERT(item->ip == pip); | ||
612 | ag = item->ag; | ||
613 | xfs_mru_cache_done(cache); | ||
614 | |||
615 | TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); | ||
616 | err = _xfs_filestream_update_ag(ip, pip, ag); | ||
617 | 334 | ||
618 | goto exit; | 335 | trace_xfs_filestream_lookup(ip, ag); |
336 | goto out; | ||
619 | } | 337 | } |
620 | 338 | ||
621 | /* | 339 | /* |
@@ -623,202 +341,94 @@ xfs_filestream_associate( | |||
623 | * use the directory inode's AG. | 341 | * use the directory inode's AG. |
624 | */ | 342 | */ |
625 | if (mp->m_flags & XFS_MOUNT_32BITINODES) { | 343 | if (mp->m_flags & XFS_MOUNT_32BITINODES) { |
626 | rotorstep = xfs_rotorstep; | 344 | xfs_agnumber_t rotorstep = xfs_rotorstep; |
627 | startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; | 345 | startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; |
628 | mp->m_agfrotor = (mp->m_agfrotor + 1) % | 346 | mp->m_agfrotor = (mp->m_agfrotor + 1) % |
629 | (mp->m_sb.sb_agcount * rotorstep); | 347 | (mp->m_sb.sb_agcount * rotorstep); |
630 | } else | 348 | } else |
631 | startag = XFS_INO_TO_AGNO(mp, pip->i_ino); | 349 | startag = XFS_INO_TO_AGNO(mp, pip->i_ino); |
632 | 350 | ||
633 | /* Pick a new AG for the parent inode starting at startag. */ | 351 | if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0)) |
634 | err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); | 352 | ag = NULLAGNUMBER; |
635 | if (err || ag == NULLAGNUMBER) | 353 | out: |
636 | goto exit_did_pick; | 354 | IRELE(pip); |
637 | 355 | return ag; | |
638 | /* Associate the parent inode with the AG. */ | ||
639 | err = _xfs_filestream_update_ag(pip, NULL, ag); | ||
640 | if (err) | ||
641 | goto exit_did_pick; | ||
642 | |||
643 | /* Associate the file inode with the AG. */ | ||
644 | err = _xfs_filestream_update_ag(ip, pip, ag); | ||
645 | if (err) | ||
646 | goto exit_did_pick; | ||
647 | |||
648 | TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); | ||
649 | |||
650 | exit_did_pick: | ||
651 | /* | ||
652 | * If _xfs_filestream_pick_ag() returned a valid AG, remove the | ||
653 | * reference it took on it, since the file and directory will have taken | ||
654 | * their own now if they were successfully cached. | ||
655 | */ | ||
656 | if (ag != NULLAGNUMBER) | ||
657 | xfs_filestream_put_ag(mp, ag); | ||
658 | |||
659 | exit: | ||
660 | xfs_iunlock(pip, XFS_IOLOCK_EXCL); | ||
661 | return -err; | ||
662 | } | 356 | } |
663 | 357 | ||
664 | /* | 358 | /* |
665 | * Pick a new allocation group for the current file and its file stream. This | 359 | * Pick a new allocation group for the current file and its file stream. |
666 | * function is called by xfs_bmap_filestreams() with the mount point's per-ag | 360 | * |
667 | * lock held. | 361 | * This is called when the allocator can't find a suitable extent in the |
362 | * current AG, and we have to move the stream into a new AG with more space. | ||
668 | */ | 363 | */ |
669 | int | 364 | int |
670 | xfs_filestream_new_ag( | 365 | xfs_filestream_new_ag( |
671 | struct xfs_bmalloca *ap, | 366 | struct xfs_bmalloca *ap, |
672 | xfs_agnumber_t *agp) | 367 | xfs_agnumber_t *agp) |
673 | { | 368 | { |
674 | int flags, err; | 369 | struct xfs_inode *ip = ap->ip, *pip; |
675 | xfs_inode_t *ip, *pip = NULL; | 370 | struct xfs_mount *mp = ip->i_mount; |
676 | xfs_mount_t *mp; | 371 | xfs_extlen_t minlen = ap->length; |
677 | xfs_mru_cache_t *cache; | 372 | xfs_agnumber_t startag = 0; |
678 | xfs_extlen_t minlen; | 373 | int flags, err = 0; |
679 | fstrm_item_t *dir, *file; | 374 | struct xfs_mru_cache_elem *mru; |
680 | xfs_agnumber_t ag = NULLAGNUMBER; | ||
681 | |||
682 | ip = ap->ip; | ||
683 | mp = ip->i_mount; | ||
684 | cache = mp->m_filestream; | ||
685 | minlen = ap->length; | ||
686 | *agp = NULLAGNUMBER; | ||
687 | 375 | ||
688 | /* | 376 | *agp = NULLAGNUMBER; |
689 | * Look for the file in the cache, removing it if it's found. Doing | ||
690 | * this allows it to be held across the dir lookup that follows. | ||
691 | */ | ||
692 | file = xfs_mru_cache_remove(cache, ip->i_ino); | ||
693 | if (file) { | ||
694 | ASSERT(ip == file->ip); | ||
695 | |||
696 | /* Save the file's parent inode and old AG number for later. */ | ||
697 | pip = file->pip; | ||
698 | ag = file->ag; | ||
699 | |||
700 | /* Look for the file's directory in the cache. */ | ||
701 | dir = xfs_mru_cache_lookup(cache, pip->i_ino); | ||
702 | if (dir) { | ||
703 | ASSERT(pip == dir->ip); | ||
704 | |||
705 | /* | ||
706 | * If the directory has already moved on to a new AG, | ||
707 | * use that AG as the new AG for the file. Don't | ||
708 | * forget to twiddle the AG refcounts to match the | ||
709 | * movement. | ||
710 | */ | ||
711 | if (dir->ag != file->ag) { | ||
712 | xfs_filestream_put_ag(mp, file->ag); | ||
713 | xfs_filestream_get_ag(mp, dir->ag); | ||
714 | *agp = file->ag = dir->ag; | ||
715 | } | ||
716 | |||
717 | xfs_mru_cache_done(cache); | ||
718 | } | ||
719 | 377 | ||
720 | /* | 378 | pip = xfs_filestream_get_parent(ip); |
721 | * Put the file back in the cache. If this fails, the free | 379 | if (!pip) |
722 | * function needs to be called to tidy up in the same way as if | 380 | goto exit; |
723 | * the item had simply expired from the cache. | ||
724 | */ | ||
725 | err = xfs_mru_cache_insert(cache, ip->i_ino, file); | ||
726 | if (err) { | ||
727 | xfs_fstrm_free_func(ip->i_ino, file); | ||
728 | return err; | ||
729 | } | ||
730 | 381 | ||
731 | /* | 382 | mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino); |
732 | * If the file's AG was moved to the directory's new AG, there's | 383 | if (mru) { |
733 | * nothing more to be done. | 384 | struct xfs_fstrm_item *item = |
734 | */ | 385 | container_of(mru, struct xfs_fstrm_item, mru); |
735 | if (*agp != NULLAGNUMBER) { | 386 | startag = (item->ag + 1) % mp->m_sb.sb_agcount; |
736 | TRACE_MOVEAG(mp, ip, pip, | ||
737 | ag, xfs_filestream_peek_ag(mp, ag), | ||
738 | *agp, xfs_filestream_peek_ag(mp, *agp)); | ||
739 | return 0; | ||
740 | } | ||
741 | } | 387 | } |
742 | 388 | ||
743 | /* | ||
744 | * If the file's parent directory is known, take its iolock in exclusive | ||
745 | * mode to prevent two sibling files from racing each other to migrate | ||
746 | * themselves and their parent to different AGs. | ||
747 | * | ||
748 | * Note that we lock the parent directory iolock inside the child | ||
749 | * iolock here. That's fine as we never hold both parent and child | ||
750 | * iolock in any other place. This is different from the ilock, | ||
751 | * which requires locking of the child after the parent for namespace | ||
752 | * operations. | ||
753 | */ | ||
754 | if (pip) | ||
755 | xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
756 | |||
757 | /* | ||
758 | * A new AG needs to be found for the file. If the file's parent | ||
759 | * directory is also known, it will be moved to the new AG as well to | ||
760 | * ensure that files created inside it in future use the new AG. | ||
761 | */ | ||
762 | ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; | ||
763 | flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | | 389 | flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | |
764 | (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); | 390 | (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); |
765 | 391 | ||
766 | err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); | 392 | err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); |
767 | if (err || *agp == NULLAGNUMBER) | ||
768 | goto exit; | ||
769 | 393 | ||
770 | /* | 394 | /* |
771 | * If the file wasn't found in the file cache, then its parent directory | 395 | * Only free the item here so we skip over the old AG earlier. |
772 | * inode isn't known. For this to have happened, the file must either | ||
773 | * be pre-existing, or it was created long enough ago that its cache | ||
774 | * entry has expired. This isn't the sort of usage that the filestreams | ||
775 | * allocator is trying to optimise, so there's no point trying to track | ||
776 | * its new AG somehow in the filestream data structures. | ||
777 | */ | 396 | */ |
778 | if (!pip) { | 397 | if (mru) |
779 | TRACE_ORPHAN(mp, ip, *agp); | 398 | xfs_fstrm_free_func(mru); |
780 | goto exit; | ||
781 | } | ||
782 | |||
783 | /* Associate the parent inode with the AG. */ | ||
784 | err = _xfs_filestream_update_ag(pip, NULL, *agp); | ||
785 | if (err) | ||
786 | goto exit; | ||
787 | |||
788 | /* Associate the file inode with the AG. */ | ||
789 | err = _xfs_filestream_update_ag(ip, pip, *agp); | ||
790 | if (err) | ||
791 | goto exit; | ||
792 | |||
793 | TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0, | ||
794 | *agp, xfs_filestream_peek_ag(mp, *agp)); | ||
795 | 399 | ||
400 | IRELE(pip); | ||
796 | exit: | 401 | exit: |
797 | /* | 402 | if (*agp == NULLAGNUMBER) |
798 | * If _xfs_filestream_pick_ag() returned a valid AG, remove the | ||
799 | * reference it took on it, since the file and directory will have taken | ||
800 | * their own now if they were successfully cached. | ||
801 | */ | ||
802 | if (*agp != NULLAGNUMBER) | ||
803 | xfs_filestream_put_ag(mp, *agp); | ||
804 | else | ||
805 | *agp = 0; | 403 | *agp = 0; |
806 | |||
807 | if (pip) | ||
808 | xfs_iunlock(pip, XFS_IOLOCK_EXCL); | ||
809 | |||
810 | return err; | 404 | return err; |
811 | } | 405 | } |
812 | 406 | ||
813 | /* | ||
814 | * Remove an association between an inode and a filestream object. | ||
815 | * Typically this is done on last close of an unlinked file. | ||
816 | */ | ||
817 | void | 407 | void |
818 | xfs_filestream_deassociate( | 408 | xfs_filestream_deassociate( |
819 | xfs_inode_t *ip) | 409 | struct xfs_inode *ip) |
820 | { | 410 | { |
821 | xfs_mru_cache_t *cache = ip->i_mount->m_filestream; | 411 | xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); |
412 | } | ||
413 | |||
414 | int | ||
415 | xfs_filestream_mount( | ||
416 | xfs_mount_t *mp) | ||
417 | { | ||
418 | /* | ||
419 | * The filestream timer tunable is currently fixed within the range of | ||
420 | * one second to four minutes, with five seconds being the default. The | ||
421 | * group count is somewhat arbitrary, but it'd be nice to adhere to the | ||
422 | * timer tunable to within about 10 percent. This requires at least 10 | ||
423 | * groups. | ||
424 | */ | ||
425 | return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10, | ||
426 | 10, xfs_fstrm_free_func); | ||
427 | } | ||
822 | 428 | ||
823 | xfs_mru_cache_delete(cache, ip->i_ino); | 429 | void |
430 | xfs_filestream_unmount( | ||
431 | xfs_mount_t *mp) | ||
432 | { | ||
433 | xfs_mru_cache_destroy(mp->m_filestream); | ||
824 | } | 434 | } |
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 6d61dbee8564..2ef43406e53b 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h | |||
@@ -20,50 +20,20 @@ | |||
20 | 20 | ||
21 | struct xfs_mount; | 21 | struct xfs_mount; |
22 | struct xfs_inode; | 22 | struct xfs_inode; |
23 | struct xfs_perag; | ||
24 | struct xfs_bmalloca; | 23 | struct xfs_bmalloca; |
25 | 24 | ||
26 | #ifdef XFS_FILESTREAMS_TRACE | ||
27 | #define XFS_FSTRM_KTRACE_INFO 1 | ||
28 | #define XFS_FSTRM_KTRACE_AGSCAN 2 | ||
29 | #define XFS_FSTRM_KTRACE_AGPICK1 3 | ||
30 | #define XFS_FSTRM_KTRACE_AGPICK2 4 | ||
31 | #define XFS_FSTRM_KTRACE_UPDATE 5 | ||
32 | #define XFS_FSTRM_KTRACE_FREE 6 | ||
33 | #define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7 | ||
34 | #define XFS_FSTRM_KTRACE_ASSOCIATE 8 | ||
35 | #define XFS_FSTRM_KTRACE_MOVEAG 9 | ||
36 | #define XFS_FSTRM_KTRACE_ORPHAN 10 | ||
37 | |||
38 | #define XFS_FSTRM_KTRACE_SIZE 16384 | ||
39 | extern ktrace_t *xfs_filestreams_trace_buf; | ||
40 | |||
41 | #endif | ||
42 | |||
43 | /* allocation selection flags */ | ||
44 | typedef enum xfs_fstrm_alloc { | ||
45 | XFS_PICK_USERDATA = 1, | ||
46 | XFS_PICK_LOWSPACE = 2, | ||
47 | } xfs_fstrm_alloc_t; | ||
48 | |||
49 | /* prototypes for filestream.c */ | ||
50 | int xfs_filestream_init(void); | ||
51 | void xfs_filestream_uninit(void); | ||
52 | int xfs_filestream_mount(struct xfs_mount *mp); | 25 | int xfs_filestream_mount(struct xfs_mount *mp); |
53 | void xfs_filestream_unmount(struct xfs_mount *mp); | 26 | void xfs_filestream_unmount(struct xfs_mount *mp); |
54 | xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); | ||
55 | int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); | ||
56 | void xfs_filestream_deassociate(struct xfs_inode *ip); | 27 | void xfs_filestream_deassociate(struct xfs_inode *ip); |
28 | xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); | ||
57 | int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); | 29 | int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); |
30 | int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno); | ||
58 | 31 | ||
59 | |||
60 | /* filestreams for the inode? */ | ||
61 | static inline int | 32 | static inline int |
62 | xfs_inode_is_filestream( | 33 | xfs_inode_is_filestream( |
63 | struct xfs_inode *ip) | 34 | struct xfs_inode *ip) |
64 | { | 35 | { |
65 | return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || | 36 | return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || |
66 | xfs_iflags_test(ip, XFS_IFILESTREAM) || | ||
67 | (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM); | 37 | (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM); |
68 | } | 38 | } |
69 | 39 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4e3b7ad7ac8f..6bbfcf0b3bb2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -655,7 +655,6 @@ xfs_ialloc( | |||
655 | uint flags; | 655 | uint flags; |
656 | int error; | 656 | int error; |
657 | timespec_t tv; | 657 | timespec_t tv; |
658 | int filestreams = 0; | ||
659 | 658 | ||
660 | /* | 659 | /* |
661 | * Call the space management code to pick | 660 | * Call the space management code to pick |
@@ -772,13 +771,6 @@ xfs_ialloc( | |||
772 | flags |= XFS_ILOG_DEV; | 771 | flags |= XFS_ILOG_DEV; |
773 | break; | 772 | break; |
774 | case S_IFREG: | 773 | case S_IFREG: |
775 | /* | ||
776 | * we can't set up filestreams until after the VFS inode | ||
777 | * is set up properly. | ||
778 | */ | ||
779 | if (pip && xfs_inode_is_filestream(pip)) | ||
780 | filestreams = 1; | ||
781 | /* fall through */ | ||
782 | case S_IFDIR: | 774 | case S_IFDIR: |
783 | if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { | 775 | if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { |
784 | uint di_flags = 0; | 776 | uint di_flags = 0; |
@@ -844,15 +836,6 @@ xfs_ialloc( | |||
844 | /* now that we have an i_mode we can setup inode ops and unlock */ | 836 | /* now that we have an i_mode we can setup inode ops and unlock */ |
845 | xfs_setup_inode(ip); | 837 | xfs_setup_inode(ip); |
846 | 838 | ||
847 | /* now we have set up the vfs inode we can associate the filestream */ | ||
848 | if (filestreams) { | ||
849 | error = xfs_filestream_associate(pip, ip); | ||
850 | if (error < 0) | ||
851 | return -error; | ||
852 | if (!error) | ||
853 | xfs_iflags_set(ip, XFS_IFILESTREAM); | ||
854 | } | ||
855 | |||
856 | *ipp = ip; | 839 | *ipp = ip; |
857 | return 0; | 840 | return 0; |
858 | } | 841 | } |
@@ -1699,16 +1682,6 @@ xfs_release( | |||
1699 | int truncated; | 1682 | int truncated; |
1700 | 1683 | ||
1701 | /* | 1684 | /* |
1702 | * If we are using filestreams, and we have an unlinked | ||
1703 | * file that we are processing the last close on, then nothing | ||
1704 | * will be able to reopen and write to this file. Purge this | ||
1705 | * inode from the filestreams cache so that it doesn't delay | ||
1706 | * teardown of the inode. | ||
1707 | */ | ||
1708 | if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) | ||
1709 | xfs_filestream_deassociate(ip); | ||
1710 | |||
1711 | /* | ||
1712 | * If we previously truncated this file and removed old data | 1685 | * If we previously truncated this file and removed old data |
1713 | * in the process, we want to initiate "early" writeout on | 1686 | * in the process, we want to initiate "early" writeout on |
1714 | * the last close. This is an attempt to combat the notorious | 1687 | * the last close. This is an attempt to combat the notorious |
@@ -2664,13 +2637,7 @@ xfs_remove( | |||
2664 | if (error) | 2637 | if (error) |
2665 | goto std_return; | 2638 | goto std_return; |
2666 | 2639 | ||
2667 | /* | 2640 | if (is_dir && xfs_inode_is_filestream(ip)) |
2668 | * If we are using filestreams, kill the stream association. | ||
2669 | * If the file is still open it may get a new one but that | ||
2670 | * will get killed on last close in xfs_close() so we don't | ||
2671 | * have to worry about that. | ||
2672 | */ | ||
2673 | if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) | ||
2674 | xfs_filestream_deassociate(ip); | 2641 | xfs_filestream_deassociate(ip); |
2675 | 2642 | ||
2676 | return 0; | 2643 | return 0; |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f2fcde52b66d..13aea548206c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -209,7 +209,6 @@ xfs_get_initial_prid(struct xfs_inode *dp) | |||
209 | #define XFS_ISTALE (1 << 1) /* inode has been staled */ | 209 | #define XFS_ISTALE (1 << 1) /* inode has been staled */ |
210 | #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ | 210 | #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ |
211 | #define XFS_INEW (1 << 3) /* inode has just been allocated */ | 211 | #define XFS_INEW (1 << 3) /* inode has just been allocated */ |
212 | #define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */ | ||
213 | #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ | 212 | #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ |
214 | #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ | 213 | #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ |
215 | #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ | 214 | #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ |
@@ -225,8 +224,7 @@ xfs_get_initial_prid(struct xfs_inode *dp) | |||
225 | */ | 224 | */ |
226 | #define XFS_IRECLAIM_RESET_FLAGS \ | 225 | #define XFS_IRECLAIM_RESET_FLAGS \ |
227 | (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ | 226 | (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ |
228 | XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ | 227 | XFS_IDIRTY_RELEASE | XFS_ITRUNCATED) |
229 | XFS_IFILESTREAM); | ||
230 | 228 | ||
231 | /* | 229 | /* |
232 | * Synchronize processes attempting to flush the in-core inode back to disk. | 230 | * Synchronize processes attempting to flush the in-core inode back to disk. |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 4aff56395732..f99b4933dc22 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -100,14 +100,20 @@ | |||
100 | * likely result in a loop in one of the lists. That's a sure-fire recipe for | 100 | * likely result in a loop in one of the lists. That's a sure-fire recipe for |
101 | * an infinite loop in the code. | 101 | * an infinite loop in the code. |
102 | */ | 102 | */ |
103 | typedef struct xfs_mru_cache_elem | 103 | struct xfs_mru_cache { |
104 | { | 104 | struct radix_tree_root store; /* Core storage data structure. */ |
105 | struct list_head list_node; | 105 | struct list_head *lists; /* Array of lists, one per grp. */ |
106 | unsigned long key; | 106 | struct list_head reap_list; /* Elements overdue for reaping. */ |
107 | void *value; | 107 | spinlock_t lock; /* Lock to protect this struct. */ |
108 | } xfs_mru_cache_elem_t; | 108 | unsigned int grp_count; /* Number of discrete groups. */ |
109 | unsigned int grp_time; /* Time period spanned by grps. */ | ||
110 | unsigned int lru_grp; /* Group containing time zero. */ | ||
111 | unsigned long time_zero; /* Time first element was added. */ | ||
112 | xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ | ||
113 | struct delayed_work work; /* Workqueue data for reaping. */ | ||
114 | unsigned int queued; /* work has been queued */ | ||
115 | }; | ||
109 | 116 | ||
110 | static kmem_zone_t *xfs_mru_elem_zone; | ||
111 | static struct workqueue_struct *xfs_mru_reap_wq; | 117 | static struct workqueue_struct *xfs_mru_reap_wq; |
112 | 118 | ||
113 | /* | 119 | /* |
@@ -129,12 +135,12 @@ static struct workqueue_struct *xfs_mru_reap_wq; | |||
129 | */ | 135 | */ |
130 | STATIC unsigned long | 136 | STATIC unsigned long |
131 | _xfs_mru_cache_migrate( | 137 | _xfs_mru_cache_migrate( |
132 | xfs_mru_cache_t *mru, | 138 | struct xfs_mru_cache *mru, |
133 | unsigned long now) | 139 | unsigned long now) |
134 | { | 140 | { |
135 | unsigned int grp; | 141 | unsigned int grp; |
136 | unsigned int migrated = 0; | 142 | unsigned int migrated = 0; |
137 | struct list_head *lru_list; | 143 | struct list_head *lru_list; |
138 | 144 | ||
139 | /* Nothing to do if the data store is empty. */ | 145 | /* Nothing to do if the data store is empty. */ |
140 | if (!mru->time_zero) | 146 | if (!mru->time_zero) |
@@ -193,11 +199,11 @@ _xfs_mru_cache_migrate( | |||
193 | */ | 199 | */ |
194 | STATIC void | 200 | STATIC void |
195 | _xfs_mru_cache_list_insert( | 201 | _xfs_mru_cache_list_insert( |
196 | xfs_mru_cache_t *mru, | 202 | struct xfs_mru_cache *mru, |
197 | xfs_mru_cache_elem_t *elem) | 203 | struct xfs_mru_cache_elem *elem) |
198 | { | 204 | { |
199 | unsigned int grp = 0; | 205 | unsigned int grp = 0; |
200 | unsigned long now = jiffies; | 206 | unsigned long now = jiffies; |
201 | 207 | ||
202 | /* | 208 | /* |
203 | * If the data store is empty, initialise time zero, leave grp set to | 209 | * If the data store is empty, initialise time zero, leave grp set to |
@@ -231,10 +237,10 @@ _xfs_mru_cache_list_insert( | |||
231 | */ | 237 | */ |
232 | STATIC void | 238 | STATIC void |
233 | _xfs_mru_cache_clear_reap_list( | 239 | _xfs_mru_cache_clear_reap_list( |
234 | xfs_mru_cache_t *mru) __releases(mru->lock) __acquires(mru->lock) | 240 | struct xfs_mru_cache *mru) |
235 | 241 | __releases(mru->lock) __acquires(mru->lock) | |
236 | { | 242 | { |
237 | xfs_mru_cache_elem_t *elem, *next; | 243 | struct xfs_mru_cache_elem *elem, *next; |
238 | struct list_head tmp; | 244 | struct list_head tmp; |
239 | 245 | ||
240 | INIT_LIST_HEAD(&tmp); | 246 | INIT_LIST_HEAD(&tmp); |
@@ -252,15 +258,8 @@ _xfs_mru_cache_clear_reap_list( | |||
252 | spin_unlock(&mru->lock); | 258 | spin_unlock(&mru->lock); |
253 | 259 | ||
254 | list_for_each_entry_safe(elem, next, &tmp, list_node) { | 260 | list_for_each_entry_safe(elem, next, &tmp, list_node) { |
255 | |||
256 | /* Remove the element from the reap list. */ | ||
257 | list_del_init(&elem->list_node); | 261 | list_del_init(&elem->list_node); |
258 | 262 | mru->free_func(elem); | |
259 | /* Call the client's free function with the key and value pointer. */ | ||
260 | mru->free_func(elem->key, elem->value); | ||
261 | |||
262 | /* Free the element structure. */ | ||
263 | kmem_zone_free(xfs_mru_elem_zone, elem); | ||
264 | } | 263 | } |
265 | 264 | ||
266 | spin_lock(&mru->lock); | 265 | spin_lock(&mru->lock); |
@@ -277,7 +276,8 @@ STATIC void | |||
277 | _xfs_mru_cache_reap( | 276 | _xfs_mru_cache_reap( |
278 | struct work_struct *work) | 277 | struct work_struct *work) |
279 | { | 278 | { |
280 | xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); | 279 | struct xfs_mru_cache *mru = |
280 | container_of(work, struct xfs_mru_cache, work.work); | ||
281 | unsigned long now, next; | 281 | unsigned long now, next; |
282 | 282 | ||
283 | ASSERT(mru && mru->lists); | 283 | ASSERT(mru && mru->lists); |
@@ -304,28 +304,16 @@ _xfs_mru_cache_reap( | |||
304 | int | 304 | int |
305 | xfs_mru_cache_init(void) | 305 | xfs_mru_cache_init(void) |
306 | { | 306 | { |
307 | xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), | ||
308 | "xfs_mru_cache_elem"); | ||
309 | if (!xfs_mru_elem_zone) | ||
310 | goto out; | ||
311 | |||
312 | xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); | 307 | xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); |
313 | if (!xfs_mru_reap_wq) | 308 | if (!xfs_mru_reap_wq) |
314 | goto out_destroy_mru_elem_zone; | 309 | return -ENOMEM; |
315 | |||
316 | return 0; | 310 | return 0; |
317 | |||
318 | out_destroy_mru_elem_zone: | ||
319 | kmem_zone_destroy(xfs_mru_elem_zone); | ||
320 | out: | ||
321 | return -ENOMEM; | ||
322 | } | 311 | } |
323 | 312 | ||
324 | void | 313 | void |
325 | xfs_mru_cache_uninit(void) | 314 | xfs_mru_cache_uninit(void) |
326 | { | 315 | { |
327 | destroy_workqueue(xfs_mru_reap_wq); | 316 | destroy_workqueue(xfs_mru_reap_wq); |
328 | kmem_zone_destroy(xfs_mru_elem_zone); | ||
329 | } | 317 | } |
330 | 318 | ||
331 | /* | 319 | /* |
@@ -336,14 +324,14 @@ xfs_mru_cache_uninit(void) | |||
336 | */ | 324 | */ |
337 | int | 325 | int |
338 | xfs_mru_cache_create( | 326 | xfs_mru_cache_create( |
339 | xfs_mru_cache_t **mrup, | 327 | struct xfs_mru_cache **mrup, |
340 | unsigned int lifetime_ms, | 328 | unsigned int lifetime_ms, |
341 | unsigned int grp_count, | 329 | unsigned int grp_count, |
342 | xfs_mru_cache_free_func_t free_func) | 330 | xfs_mru_cache_free_func_t free_func) |
343 | { | 331 | { |
344 | xfs_mru_cache_t *mru = NULL; | 332 | struct xfs_mru_cache *mru = NULL; |
345 | int err = 0, grp; | 333 | int err = 0, grp; |
346 | unsigned int grp_time; | 334 | unsigned int grp_time; |
347 | 335 | ||
348 | if (mrup) | 336 | if (mrup) |
349 | *mrup = NULL; | 337 | *mrup = NULL; |
@@ -400,7 +388,7 @@ exit: | |||
400 | */ | 388 | */ |
401 | static void | 389 | static void |
402 | xfs_mru_cache_flush( | 390 | xfs_mru_cache_flush( |
403 | xfs_mru_cache_t *mru) | 391 | struct xfs_mru_cache *mru) |
404 | { | 392 | { |
405 | if (!mru || !mru->lists) | 393 | if (!mru || !mru->lists) |
406 | return; | 394 | return; |
@@ -420,7 +408,7 @@ xfs_mru_cache_flush( | |||
420 | 408 | ||
421 | void | 409 | void |
422 | xfs_mru_cache_destroy( | 410 | xfs_mru_cache_destroy( |
423 | xfs_mru_cache_t *mru) | 411 | struct xfs_mru_cache *mru) |
424 | { | 412 | { |
425 | if (!mru || !mru->lists) | 413 | if (!mru || !mru->lists) |
426 | return; | 414 | return; |
@@ -438,38 +426,30 @@ xfs_mru_cache_destroy( | |||
438 | */ | 426 | */ |
439 | int | 427 | int |
440 | xfs_mru_cache_insert( | 428 | xfs_mru_cache_insert( |
441 | xfs_mru_cache_t *mru, | 429 | struct xfs_mru_cache *mru, |
442 | unsigned long key, | 430 | unsigned long key, |
443 | void *value) | 431 | struct xfs_mru_cache_elem *elem) |
444 | { | 432 | { |
445 | xfs_mru_cache_elem_t *elem; | 433 | int error; |
446 | 434 | ||
447 | ASSERT(mru && mru->lists); | 435 | ASSERT(mru && mru->lists); |
448 | if (!mru || !mru->lists) | 436 | if (!mru || !mru->lists) |
449 | return EINVAL; | 437 | return EINVAL; |
450 | 438 | ||
451 | elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP); | 439 | if (radix_tree_preload(GFP_KERNEL)) |
452 | if (!elem) | ||
453 | return ENOMEM; | 440 | return ENOMEM; |
454 | 441 | ||
455 | if (radix_tree_preload(GFP_KERNEL)) { | ||
456 | kmem_zone_free(xfs_mru_elem_zone, elem); | ||
457 | return ENOMEM; | ||
458 | } | ||
459 | |||
460 | INIT_LIST_HEAD(&elem->list_node); | 442 | INIT_LIST_HEAD(&elem->list_node); |
461 | elem->key = key; | 443 | elem->key = key; |
462 | elem->value = value; | ||
463 | 444 | ||
464 | spin_lock(&mru->lock); | 445 | spin_lock(&mru->lock); |
465 | 446 | error = -radix_tree_insert(&mru->store, key, elem); | |
466 | radix_tree_insert(&mru->store, key, elem); | ||
467 | radix_tree_preload_end(); | 447 | radix_tree_preload_end(); |
468 | _xfs_mru_cache_list_insert(mru, elem); | 448 | if (!error) |
469 | 449 | _xfs_mru_cache_list_insert(mru, elem); | |
470 | spin_unlock(&mru->lock); | 450 | spin_unlock(&mru->lock); |
471 | 451 | ||
472 | return 0; | 452 | return error; |
473 | } | 453 | } |
474 | 454 | ||
475 | /* | 455 | /* |
@@ -478,13 +458,12 @@ xfs_mru_cache_insert( | |||
478 | * the client data pointer for the removed element is returned, otherwise this | 458 | * the client data pointer for the removed element is returned, otherwise this |
479 | * function will return a NULL pointer. | 459 | * function will return a NULL pointer. |
480 | */ | 460 | */ |
481 | void * | 461 | struct xfs_mru_cache_elem * |
482 | xfs_mru_cache_remove( | 462 | xfs_mru_cache_remove( |
483 | xfs_mru_cache_t *mru, | 463 | struct xfs_mru_cache *mru, |
484 | unsigned long key) | 464 | unsigned long key) |
485 | { | 465 | { |
486 | xfs_mru_cache_elem_t *elem; | 466 | struct xfs_mru_cache_elem *elem; |
487 | void *value = NULL; | ||
488 | 467 | ||
489 | ASSERT(mru && mru->lists); | 468 | ASSERT(mru && mru->lists); |
490 | if (!mru || !mru->lists) | 469 | if (!mru || !mru->lists) |
@@ -492,17 +471,11 @@ xfs_mru_cache_remove( | |||
492 | 471 | ||
493 | spin_lock(&mru->lock); | 472 | spin_lock(&mru->lock); |
494 | elem = radix_tree_delete(&mru->store, key); | 473 | elem = radix_tree_delete(&mru->store, key); |
495 | if (elem) { | 474 | if (elem) |
496 | value = elem->value; | ||
497 | list_del(&elem->list_node); | 475 | list_del(&elem->list_node); |
498 | } | ||
499 | |||
500 | spin_unlock(&mru->lock); | 476 | spin_unlock(&mru->lock); |
501 | 477 | ||
502 | if (elem) | 478 | return elem; |
503 | kmem_zone_free(xfs_mru_elem_zone, elem); | ||
504 | |||
505 | return value; | ||
506 | } | 479 | } |
507 | 480 | ||
508 | /* | 481 | /* |
@@ -511,13 +484,14 @@ xfs_mru_cache_remove( | |||
511 | */ | 484 | */ |
512 | void | 485 | void |
513 | xfs_mru_cache_delete( | 486 | xfs_mru_cache_delete( |
514 | xfs_mru_cache_t *mru, | 487 | struct xfs_mru_cache *mru, |
515 | unsigned long key) | 488 | unsigned long key) |
516 | { | 489 | { |
517 | void *value = xfs_mru_cache_remove(mru, key); | 490 | struct xfs_mru_cache_elem *elem; |
518 | 491 | ||
519 | if (value) | 492 | elem = xfs_mru_cache_remove(mru, key); |
520 | mru->free_func(key, value); | 493 | if (elem) |
494 | mru->free_func(elem); | ||
521 | } | 495 | } |
522 | 496 | ||
523 | /* | 497 | /* |
@@ -540,12 +514,12 @@ xfs_mru_cache_delete( | |||
540 | * status, we need to help it get it right by annotating the path that does | 514 | * status, we need to help it get it right by annotating the path that does |
541 | * not release the lock. | 515 | * not release the lock. |
542 | */ | 516 | */ |
543 | void * | 517 | struct xfs_mru_cache_elem * |
544 | xfs_mru_cache_lookup( | 518 | xfs_mru_cache_lookup( |
545 | xfs_mru_cache_t *mru, | 519 | struct xfs_mru_cache *mru, |
546 | unsigned long key) | 520 | unsigned long key) |
547 | { | 521 | { |
548 | xfs_mru_cache_elem_t *elem; | 522 | struct xfs_mru_cache_elem *elem; |
549 | 523 | ||
550 | ASSERT(mru && mru->lists); | 524 | ASSERT(mru && mru->lists); |
551 | if (!mru || !mru->lists) | 525 | if (!mru || !mru->lists) |
@@ -560,7 +534,7 @@ xfs_mru_cache_lookup( | |||
560 | } else | 534 | } else |
561 | spin_unlock(&mru->lock); | 535 | spin_unlock(&mru->lock); |
562 | 536 | ||
563 | return elem ? elem->value : NULL; | 537 | return elem; |
564 | } | 538 | } |
565 | 539 | ||
566 | /* | 540 | /* |
@@ -570,7 +544,8 @@ xfs_mru_cache_lookup( | |||
570 | */ | 544 | */ |
571 | void | 545 | void |
572 | xfs_mru_cache_done( | 546 | xfs_mru_cache_done( |
573 | xfs_mru_cache_t *mru) __releases(mru->lock) | 547 | struct xfs_mru_cache *mru) |
548 | __releases(mru->lock) | ||
574 | { | 549 | { |
575 | spin_unlock(&mru->lock); | 550 | spin_unlock(&mru->lock); |
576 | } | 551 | } |
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 36dd3ec8b4eb..fb5245ba5ff7 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h | |||
@@ -18,24 +18,15 @@ | |||
18 | #ifndef __XFS_MRU_CACHE_H__ | 18 | #ifndef __XFS_MRU_CACHE_H__ |
19 | #define __XFS_MRU_CACHE_H__ | 19 | #define __XFS_MRU_CACHE_H__ |
20 | 20 | ||
21 | struct xfs_mru_cache; | ||
21 | 22 | ||
22 | /* Function pointer type for callback to free a client's data pointer. */ | 23 | struct xfs_mru_cache_elem { |
23 | typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*); | 24 | struct list_head list_node; |
25 | unsigned long key; | ||
26 | }; | ||
24 | 27 | ||
25 | typedef struct xfs_mru_cache | 28 | /* Function pointer type for callback to free a client's data pointer. */ |
26 | { | 29 | typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem); |
27 | struct radix_tree_root store; /* Core storage data structure. */ | ||
28 | struct list_head *lists; /* Array of lists, one per grp. */ | ||
29 | struct list_head reap_list; /* Elements overdue for reaping. */ | ||
30 | spinlock_t lock; /* Lock to protect this struct. */ | ||
31 | unsigned int grp_count; /* Number of discrete groups. */ | ||
32 | unsigned int grp_time; /* Time period spanned by grps. */ | ||
33 | unsigned int lru_grp; /* Group containing time zero. */ | ||
34 | unsigned long time_zero; /* Time first element was added. */ | ||
35 | xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ | ||
36 | struct delayed_work work; /* Workqueue data for reaping. */ | ||
37 | unsigned int queued; /* work has been queued */ | ||
38 | } xfs_mru_cache_t; | ||
39 | 30 | ||
40 | int xfs_mru_cache_init(void); | 31 | int xfs_mru_cache_init(void); |
41 | void xfs_mru_cache_uninit(void); | 32 | void xfs_mru_cache_uninit(void); |
@@ -44,10 +35,12 @@ int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, | |||
44 | xfs_mru_cache_free_func_t free_func); | 35 | xfs_mru_cache_free_func_t free_func); |
45 | void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); | 36 | void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); |
46 | int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, | 37 | int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, |
47 | void *value); | 38 | struct xfs_mru_cache_elem *elem); |
48 | void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); | 39 | struct xfs_mru_cache_elem * |
40 | xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); | ||
49 | void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); | 41 | void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); |
50 | void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); | 42 | struct xfs_mru_cache_elem * |
43 | xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); | ||
51 | void xfs_mru_cache_done(struct xfs_mru_cache *mru); | 44 | void xfs_mru_cache_done(struct xfs_mru_cache *mru); |
52 | 45 | ||
53 | #endif /* __XFS_MRU_CACHE_H__ */ | 46 | #endif /* __XFS_MRU_CACHE_H__ */ |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 6e6673543777..8f0333b3f7a0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1749,13 +1749,9 @@ init_xfs_fs(void) | |||
1749 | if (error) | 1749 | if (error) |
1750 | goto out_destroy_wq; | 1750 | goto out_destroy_wq; |
1751 | 1751 | ||
1752 | error = xfs_filestream_init(); | ||
1753 | if (error) | ||
1754 | goto out_mru_cache_uninit; | ||
1755 | |||
1756 | error = xfs_buf_init(); | 1752 | error = xfs_buf_init(); |
1757 | if (error) | 1753 | if (error) |
1758 | goto out_filestream_uninit; | 1754 | goto out_mru_cache_uninit; |
1759 | 1755 | ||
1760 | error = xfs_init_procfs(); | 1756 | error = xfs_init_procfs(); |
1761 | if (error) | 1757 | if (error) |
@@ -1782,8 +1778,6 @@ init_xfs_fs(void) | |||
1782 | xfs_cleanup_procfs(); | 1778 | xfs_cleanup_procfs(); |
1783 | out_buf_terminate: | 1779 | out_buf_terminate: |
1784 | xfs_buf_terminate(); | 1780 | xfs_buf_terminate(); |
1785 | out_filestream_uninit: | ||
1786 | xfs_filestream_uninit(); | ||
1787 | out_mru_cache_uninit: | 1781 | out_mru_cache_uninit: |
1788 | xfs_mru_cache_uninit(); | 1782 | xfs_mru_cache_uninit(); |
1789 | out_destroy_wq: | 1783 | out_destroy_wq: |
@@ -1802,7 +1796,6 @@ exit_xfs_fs(void) | |||
1802 | xfs_sysctl_unregister(); | 1796 | xfs_sysctl_unregister(); |
1803 | xfs_cleanup_procfs(); | 1797 | xfs_cleanup_procfs(); |
1804 | xfs_buf_terminate(); | 1798 | xfs_buf_terminate(); |
1805 | xfs_filestream_uninit(); | ||
1806 | xfs_mru_cache_uninit(); | 1799 | xfs_mru_cache_uninit(); |
1807 | xfs_destroy_workqueues(); | 1800 | xfs_destroy_workqueues(); |
1808 | xfs_destroy_zones(); | 1801 | xfs_destroy_zones(); |
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index dee3279c095e..1e85bcd0e418 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "xfs_log_recover.h" | 46 | #include "xfs_log_recover.h" |
47 | #include "xfs_inode_item.h" | 47 | #include "xfs_inode_item.h" |
48 | #include "xfs_bmap_btree.h" | 48 | #include "xfs_bmap_btree.h" |
49 | #include "xfs_filestream.h" | ||
49 | 50 | ||
50 | /* | 51 | /* |
51 | * We include this last to have the helpers above available for the trace | 52 | * We include this last to have the helpers above available for the trace |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 65d8c793a25c..6910458915cf 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -538,6 +538,64 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); | |||
538 | DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); | 538 | DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); |
539 | DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); | 539 | DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); |
540 | 540 | ||
541 | DECLARE_EVENT_CLASS(xfs_filestream_class, | ||
542 | TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), | ||
543 | TP_ARGS(ip, agno), | ||
544 | TP_STRUCT__entry( | ||
545 | __field(dev_t, dev) | ||
546 | __field(xfs_ino_t, ino) | ||
547 | __field(xfs_agnumber_t, agno) | ||
548 | __field(int, streams) | ||
549 | ), | ||
550 | TP_fast_assign( | ||
551 | __entry->dev = VFS_I(ip)->i_sb->s_dev; | ||
552 | __entry->ino = ip->i_ino; | ||
553 | __entry->agno = agno; | ||
554 | __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); | ||
555 | ), | ||
556 | TP_printk("dev %d:%d ino 0x%llx agno %u streams %d", | ||
557 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
558 | __entry->ino, | ||
559 | __entry->agno, | ||
560 | __entry->streams) | ||
561 | ) | ||
562 | #define DEFINE_FILESTREAM_EVENT(name) \ | ||
563 | DEFINE_EVENT(xfs_filestream_class, name, \ | ||
564 | TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \ | ||
565 | TP_ARGS(ip, agno)) | ||
566 | DEFINE_FILESTREAM_EVENT(xfs_filestream_free); | ||
567 | DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); | ||
568 | DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); | ||
569 | |||
570 | TRACE_EVENT(xfs_filestream_pick, | ||
571 | TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno, | ||
572 | xfs_extlen_t free, int nscan), | ||
573 | TP_ARGS(ip, agno, free, nscan), | ||
574 | TP_STRUCT__entry( | ||
575 | __field(dev_t, dev) | ||
576 | __field(xfs_ino_t, ino) | ||
577 | __field(xfs_agnumber_t, agno) | ||
578 | __field(int, streams) | ||
579 | __field(xfs_extlen_t, free) | ||
580 | __field(int, nscan) | ||
581 | ), | ||
582 | TP_fast_assign( | ||
583 | __entry->dev = VFS_I(ip)->i_sb->s_dev; | ||
584 | __entry->ino = ip->i_ino; | ||
585 | __entry->agno = agno; | ||
586 | __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); | ||
587 | __entry->free = free; | ||
588 | __entry->nscan = nscan; | ||
589 | ), | ||
590 | TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d", | ||
591 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
592 | __entry->ino, | ||
593 | __entry->agno, | ||
594 | __entry->streams, | ||
595 | __entry->free, | ||
596 | __entry->nscan) | ||
597 | ); | ||
598 | |||
541 | DECLARE_EVENT_CLASS(xfs_lock_class, | 599 | DECLARE_EVENT_CLASS(xfs_lock_class, |
542 | TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, | 600 | TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, |
543 | unsigned long caller_ip), | 601 | unsigned long caller_ip), |