aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 23:42:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 23:42:58 -0400
commit60c7b4df82d0ec44fe10487eadec737abea55b34 (patch)
tree35c5b3cf5f9c9169e018a3527a14c558a10611bc /fs
parentaab174f0df5d72d31caccf281af5f614fa254578 (diff)
parent2ea0392983a82f7dc3055568ae0f2558724d119b (diff)
Merge tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs
Pull xfs update from Ben Myers: "Several enhancements and cleanups: - make inode32 and inode64 remountable options - SEEK_HOLE/SEEK_DATA enhancements - cleanup struct declarations in xfs_mount.h" * tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs: xfs: Make inode32 a remountable option xfs: add inode64->inode32 transition into xfs_set_inode32() xfs: Fix mp->m_maxagi update during inode64 remount xfs: reduce code duplication handling inode32/64 options xfs: make inode64 as the default allocation mode xfs: Fix m_agirotor reset during AG selection Make inode64 a remountable option xfs: stop the sync worker before xfs_unmountfs xfs: xfs_seek_hole() refinement with hole searching from page cache for unwritten extents xfs: xfs_seek_data() refinement with unwritten extents check up from page cache xfs: Introduce a helper routine to probe data or hole offset from page cache xfs: Remove type argument from xfs_seek_data()/xfs_seek_hole() xfs: fix race while discarding buffers [V4] xfs: check for possible overflow in xfs_ioc_trim xfs: unlock the AGI buffer when looping in xfs_dialloc xfs: kill struct declarations in xfs_mount.h xfs: fix uninitialised variable in xfs_rtbuf_get()
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_file.c379
-rw-r--r--fs/xfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/xfs_mount.c43
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_super.c95
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_trace.h1
7 files changed, 447 insertions, 80 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 56afcdb2377d..1eaeb8be3aae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -36,6 +36,7 @@
36 36
37#include <linux/dcache.h> 37#include <linux/dcache.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/pagevec.h>
39 40
40static const struct vm_operations_struct xfs_file_vm_ops; 41static const struct vm_operations_struct xfs_file_vm_ops;
41 42
@@ -959,17 +960,232 @@ xfs_vm_page_mkwrite(
959 return block_page_mkwrite(vma, vmf, xfs_get_blocks); 960 return block_page_mkwrite(vma, vmf, xfs_get_blocks);
960} 961}
961 962
963/*
964 * This type is designed to indicate the type of offset we would like
965 * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
966 */
967enum {
968 HOLE_OFF = 0,
969 DATA_OFF,
970};
971
972/*
973 * Lookup the desired type of offset from the given page.
974 *
975 * On success, return true and the offset argument will point to the
976 * start of the region that was found. Otherwise this function will
977 * return false and keep the offset argument unchanged.
978 */
979STATIC bool
980xfs_lookup_buffer_offset(
981 struct page *page,
982 loff_t *offset,
983 unsigned int type)
984{
985 loff_t lastoff = page_offset(page);
986 bool found = false;
987 struct buffer_head *bh, *head;
988
989 bh = head = page_buffers(page);
990 do {
991 /*
992 * Unwritten extents that have data in the page
993 * cache covering them can be identified by the
994 * BH_Unwritten state flag. Pages with multiple
995 * buffers might have a mix of holes, data and
996 * unwritten extents - any buffer with valid
997 * data in it should have BH_Uptodate flag set
998 * on it.
999 */
1000 if (buffer_unwritten(bh) ||
1001 buffer_uptodate(bh)) {
1002 if (type == DATA_OFF)
1003 found = true;
1004 } else {
1005 if (type == HOLE_OFF)
1006 found = true;
1007 }
1008
1009 if (found) {
1010 *offset = lastoff;
1011 break;
1012 }
1013 lastoff += bh->b_size;
1014 } while ((bh = bh->b_this_page) != head);
1015
1016 return found;
1017}
1018
1019/*
1020 * This routine is called to find out and return a data or hole offset
1021 * from the page cache for unwritten extents according to the desired
1022 * type for xfs_seek_data() or xfs_seek_hole().
1023 *
1024 * The argument offset is used to tell where we start to search from the
1025 * page cache. Map is used to figure out the end points of the range to
1026 * lookup pages.
1027 *
1028 * Return true if the desired type of offset was found, and the argument
1029 * offset is filled with that address. Otherwise, return false and keep
1030 * offset unchanged.
1031 */
1032STATIC bool
1033xfs_find_get_desired_pgoff(
1034 struct inode *inode,
1035 struct xfs_bmbt_irec *map,
1036 unsigned int type,
1037 loff_t *offset)
1038{
1039 struct xfs_inode *ip = XFS_I(inode);
1040 struct xfs_mount *mp = ip->i_mount;
1041 struct pagevec pvec;
1042 pgoff_t index;
1043 pgoff_t end;
1044 loff_t endoff;
1045 loff_t startoff = *offset;
1046 loff_t lastoff = startoff;
1047 bool found = false;
1048
1049 pagevec_init(&pvec, 0);
1050
1051 index = startoff >> PAGE_CACHE_SHIFT;
1052 endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
1053 end = endoff >> PAGE_CACHE_SHIFT;
1054 do {
1055 int want;
1056 unsigned nr_pages;
1057 unsigned int i;
1058
1059 want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
1060 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
1061 want);
1062 /*
1063 * No page mapped into given range. If we are searching holes
1064 * and if this is the first time we got into the loop, it means
1065 * that the given offset is landed in a hole, return it.
1066 *
1067 * If we have already stepped through some block buffers to find
1068 * holes but they all contains data. In this case, the last
1069 * offset is already updated and pointed to the end of the last
1070 * mapped page, if it does not reach the endpoint to search,
1071 * that means there should be a hole between them.
1072 */
1073 if (nr_pages == 0) {
1074 /* Data search found nothing */
1075 if (type == DATA_OFF)
1076 break;
1077
1078 ASSERT(type == HOLE_OFF);
1079 if (lastoff == startoff || lastoff < endoff) {
1080 found = true;
1081 *offset = lastoff;
1082 }
1083 break;
1084 }
1085
1086 /*
1087 * At lease we found one page. If this is the first time we
1088 * step into the loop, and if the first page index offset is
1089 * greater than the given search offset, a hole was found.
1090 */
1091 if (type == HOLE_OFF && lastoff == startoff &&
1092 lastoff < page_offset(pvec.pages[0])) {
1093 found = true;
1094 break;
1095 }
1096
1097 for (i = 0; i < nr_pages; i++) {
1098 struct page *page = pvec.pages[i];
1099 loff_t b_offset;
1100
1101 /*
1102 * At this point, the page may be truncated or
1103 * invalidated (changing page->mapping to NULL),
1104 * or even swizzled back from swapper_space to tmpfs
1105 * file mapping. However, page->index will not change
1106 * because we have a reference on the page.
1107 *
1108 * Searching done if the page index is out of range.
1109 * If the current offset is not reaches the end of
1110 * the specified search range, there should be a hole
1111 * between them.
1112 */
1113 if (page->index > end) {
1114 if (type == HOLE_OFF && lastoff < endoff) {
1115 *offset = lastoff;
1116 found = true;
1117 }
1118 goto out;
1119 }
1120
1121 lock_page(page);
1122 /*
1123 * Page truncated or invalidated(page->mapping == NULL).
1124 * We can freely skip it and proceed to check the next
1125 * page.
1126 */
1127 if (unlikely(page->mapping != inode->i_mapping)) {
1128 unlock_page(page);
1129 continue;
1130 }
1131
1132 if (!page_has_buffers(page)) {
1133 unlock_page(page);
1134 continue;
1135 }
1136
1137 found = xfs_lookup_buffer_offset(page, &b_offset, type);
1138 if (found) {
1139 /*
1140 * The found offset may be less than the start
1141 * point to search if this is the first time to
1142 * come here.
1143 */
1144 *offset = max_t(loff_t, startoff, b_offset);
1145 unlock_page(page);
1146 goto out;
1147 }
1148
1149 /*
1150 * We either searching data but nothing was found, or
1151 * searching hole but found a data buffer. In either
1152 * case, probably the next page contains the desired
1153 * things, update the last offset to it so.
1154 */
1155 lastoff = page_offset(page) + PAGE_SIZE;
1156 unlock_page(page);
1157 }
1158
1159 /*
1160 * The number of returned pages less than our desired, search
1161 * done. In this case, nothing was found for searching data,
1162 * but we found a hole behind the last offset.
1163 */
1164 if (nr_pages < want) {
1165 if (type == HOLE_OFF) {
1166 *offset = lastoff;
1167 found = true;
1168 }
1169 break;
1170 }
1171
1172 index = pvec.pages[i - 1]->index + 1;
1173 pagevec_release(&pvec);
1174 } while (index <= end);
1175
1176out:
1177 pagevec_release(&pvec);
1178 return found;
1179}
1180
962STATIC loff_t 1181STATIC loff_t
963xfs_seek_data( 1182xfs_seek_data(
964 struct file *file, 1183 struct file *file,
965 loff_t start, 1184 loff_t start)
966 u32 type)
967{ 1185{
968 struct inode *inode = file->f_mapping->host; 1186 struct inode *inode = file->f_mapping->host;
969 struct xfs_inode *ip = XFS_I(inode); 1187 struct xfs_inode *ip = XFS_I(inode);
970 struct xfs_mount *mp = ip->i_mount; 1188 struct xfs_mount *mp = ip->i_mount;
971 struct xfs_bmbt_irec map[2];
972 int nmap = 2;
973 loff_t uninitialized_var(offset); 1189 loff_t uninitialized_var(offset);
974 xfs_fsize_t isize; 1190 xfs_fsize_t isize;
975 xfs_fileoff_t fsbno; 1191 xfs_fileoff_t fsbno;
@@ -985,36 +1201,74 @@ xfs_seek_data(
985 goto out_unlock; 1201 goto out_unlock;
986 } 1202 }
987 1203
988 fsbno = XFS_B_TO_FSBT(mp, start);
989
990 /* 1204 /*
991 * Try to read extents from the first block indicated 1205 * Try to read extents from the first block indicated
992 * by fsbno to the end block of the file. 1206 * by fsbno to the end block of the file.
993 */ 1207 */
1208 fsbno = XFS_B_TO_FSBT(mp, start);
994 end = XFS_B_TO_FSB(mp, isize); 1209 end = XFS_B_TO_FSB(mp, isize);
1210 for (;;) {
1211 struct xfs_bmbt_irec map[2];
1212 int nmap = 2;
1213 unsigned int i;
995 1214
996 error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, 1215 error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
997 XFS_BMAPI_ENTIRE); 1216 XFS_BMAPI_ENTIRE);
998 if (error) 1217 if (error)
999 goto out_unlock; 1218 goto out_unlock;
1000 1219
1001 /* 1220 /* No extents at given offset, must be beyond EOF */
1002 * Treat unwritten extent as data extent since it might 1221 if (nmap == 0) {
1003 * contains dirty data in page cache. 1222 error = ENXIO;
1004 */ 1223 goto out_unlock;
1005 if (map[0].br_startblock != HOLESTARTBLOCK) { 1224 }
1006 offset = max_t(loff_t, start, 1225
1007 XFS_FSB_TO_B(mp, map[0].br_startoff)); 1226 for (i = 0; i < nmap; i++) {
1008 } else { 1227 offset = max_t(loff_t, start,
1228 XFS_FSB_TO_B(mp, map[i].br_startoff));
1229
1230 /* Landed in a data extent */
1231 if (map[i].br_startblock == DELAYSTARTBLOCK ||
1232 (map[i].br_state == XFS_EXT_NORM &&
1233 !isnullstartblock(map[i].br_startblock)))
1234 goto out;
1235
1236 /*
1237 * Landed in an unwritten extent, try to search data
1238 * from page cache.
1239 */
1240 if (map[i].br_state == XFS_EXT_UNWRITTEN) {
1241 if (xfs_find_get_desired_pgoff(inode, &map[i],
1242 DATA_OFF, &offset))
1243 goto out;
1244 }
1245 }
1246
1247 /*
1248 * map[0] is hole or its an unwritten extent but
1249 * without data in page cache. Probably means that
1250 * we are reading after EOF if nothing in map[1].
1251 */
1009 if (nmap == 1) { 1252 if (nmap == 1) {
1010 error = ENXIO; 1253 error = ENXIO;
1011 goto out_unlock; 1254 goto out_unlock;
1012 } 1255 }
1013 1256
1014 offset = max_t(loff_t, start, 1257 ASSERT(i > 1);
1015 XFS_FSB_TO_B(mp, map[1].br_startoff)); 1258
1259 /*
1260 * Nothing was found, proceed to the next round of search
1261 * if reading offset not beyond or hit EOF.
1262 */
1263 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1264 start = XFS_FSB_TO_B(mp, fsbno);
1265 if (start >= isize) {
1266 error = ENXIO;
1267 goto out_unlock;
1268 }
1016 } 1269 }
1017 1270
1271out:
1018 if (offset != file->f_pos) 1272 if (offset != file->f_pos)
1019 file->f_pos = offset; 1273 file->f_pos = offset;
1020 1274
@@ -1029,16 +1283,15 @@ out_unlock:
1029STATIC loff_t 1283STATIC loff_t
1030xfs_seek_hole( 1284xfs_seek_hole(
1031 struct file *file, 1285 struct file *file,
1032 loff_t start, 1286 loff_t start)
1033 u32 type)
1034{ 1287{
1035 struct inode *inode = file->f_mapping->host; 1288 struct inode *inode = file->f_mapping->host;
1036 struct xfs_inode *ip = XFS_I(inode); 1289 struct xfs_inode *ip = XFS_I(inode);
1037 struct xfs_mount *mp = ip->i_mount; 1290 struct xfs_mount *mp = ip->i_mount;
1038 loff_t uninitialized_var(offset); 1291 loff_t uninitialized_var(offset);
1039 loff_t holeoff;
1040 xfs_fsize_t isize; 1292 xfs_fsize_t isize;
1041 xfs_fileoff_t fsbno; 1293 xfs_fileoff_t fsbno;
1294 xfs_filblks_t end;
1042 uint lock; 1295 uint lock;
1043 int error; 1296 int error;
1044 1297
@@ -1054,21 +1307,77 @@ xfs_seek_hole(
1054 } 1307 }
1055 1308
1056 fsbno = XFS_B_TO_FSBT(mp, start); 1309 fsbno = XFS_B_TO_FSBT(mp, start);
1057 error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK); 1310 end = XFS_B_TO_FSB(mp, isize);
1058 if (error) 1311
1059 goto out_unlock; 1312 for (;;) {
1313 struct xfs_bmbt_irec map[2];
1314 int nmap = 2;
1315 unsigned int i;
1316
1317 error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
1318 XFS_BMAPI_ENTIRE);
1319 if (error)
1320 goto out_unlock;
1321
1322 /* No extents at given offset, must be beyond EOF */
1323 if (nmap == 0) {
1324 error = ENXIO;
1325 goto out_unlock;
1326 }
1327
1328 for (i = 0; i < nmap; i++) {
1329 offset = max_t(loff_t, start,
1330 XFS_FSB_TO_B(mp, map[i].br_startoff));
1331
1332 /* Landed in a hole */
1333 if (map[i].br_startblock == HOLESTARTBLOCK)
1334 goto out;
1335
1336 /*
1337 * Landed in an unwritten extent, try to search hole
1338 * from page cache.
1339 */
1340 if (map[i].br_state == XFS_EXT_UNWRITTEN) {
1341 if (xfs_find_get_desired_pgoff(inode, &map[i],
1342 HOLE_OFF, &offset))
1343 goto out;
1344 }
1345 }
1346
1347 /*
1348 * map[0] contains data or its unwritten but contains
1349 * data in page cache, probably means that we are
1350 * reading after EOF. We should fix offset to point
1351 * to the end of the file(i.e., there is an implicit
1352 * hole at the end of any file).
1353 */
1354 if (nmap == 1) {
1355 offset = isize;
1356 break;
1357 }
1358
1359 ASSERT(i > 1);
1060 1360
1061 holeoff = XFS_FSB_TO_B(mp, fsbno);
1062 if (holeoff <= start)
1063 offset = start;
1064 else {
1065 /* 1361 /*
1066 * xfs_bmap_first_unused() could return a value bigger than 1362 * Both mappings contains data, proceed to the next round of
1067 * isize if there are no more holes past the supplied offset. 1363 * search if the current reading offset not beyond or hit EOF.
1068 */ 1364 */
1069 offset = min_t(loff_t, holeoff, isize); 1365 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1366 start = XFS_FSB_TO_B(mp, fsbno);
1367 if (start >= isize) {
1368 offset = isize;
1369 break;
1370 }
1070 } 1371 }
1071 1372
1373out:
1374 /*
1375 * At this point, we must have found a hole. However, the returned
1376 * offset may be bigger than the file size as it may be aligned to
1377 * page boundary for unwritten extents, we need to deal with this
1378 * situation in particular.
1379 */
1380 offset = min_t(loff_t, offset, isize);
1072 if (offset != file->f_pos) 1381 if (offset != file->f_pos)
1073 file->f_pos = offset; 1382 file->f_pos = offset;
1074 1383
@@ -1092,9 +1401,9 @@ xfs_file_llseek(
1092 case SEEK_SET: 1401 case SEEK_SET:
1093 return generic_file_llseek(file, offset, origin); 1402 return generic_file_llseek(file, offset, origin);
1094 case SEEK_DATA: 1403 case SEEK_DATA:
1095 return xfs_seek_data(file, offset, origin); 1404 return xfs_seek_data(file, offset);
1096 case SEEK_HOLE: 1405 case SEEK_HOLE:
1097 return xfs_seek_hole(file, offset, origin); 1406 return xfs_seek_hole(file, offset);
1098 default: 1407 default:
1099 return -EINVAL; 1408 return -EINVAL;
1100 } 1409 }
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5aceb3f8ecd6..445bf1aef31c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -431,7 +431,7 @@ xfs_ialloc_next_ag(
431 431
432 spin_lock(&mp->m_agirotor_lock); 432 spin_lock(&mp->m_agirotor_lock);
433 agno = mp->m_agirotor; 433 agno = mp->m_agirotor;
434 if (++mp->m_agirotor == mp->m_maxagi) 434 if (++mp->m_agirotor >= mp->m_maxagi)
435 mp->m_agirotor = 0; 435 mp->m_agirotor = 0;
436 spin_unlock(&mp->m_agirotor_lock); 436 spin_unlock(&mp->m_agirotor_lock);
437 437
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 29c2f83d4147..b2bd3a0e6376 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -440,7 +440,7 @@ xfs_initialize_perag(
440 xfs_agnumber_t agcount, 440 xfs_agnumber_t agcount,
441 xfs_agnumber_t *maxagi) 441 xfs_agnumber_t *maxagi)
442{ 442{
443 xfs_agnumber_t index, max_metadata; 443 xfs_agnumber_t index;
444 xfs_agnumber_t first_initialised = 0; 444 xfs_agnumber_t first_initialised = 0;
445 xfs_perag_t *pag; 445 xfs_perag_t *pag;
446 xfs_agino_t agino; 446 xfs_agino_t agino;
@@ -500,43 +500,10 @@ xfs_initialize_perag(
500 else 500 else
501 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 501 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
502 502
503 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 503 if (mp->m_flags & XFS_MOUNT_32BITINODES)
504 /* 504 index = xfs_set_inode32(mp);
505 * Calculate how much should be reserved for inodes to meet 505 else
506 * the max inode percentage. 506 index = xfs_set_inode64(mp);
507 */
508 if (mp->m_maxicount) {
509 __uint64_t icount;
510
511 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
512 do_div(icount, 100);
513 icount += sbp->sb_agblocks - 1;
514 do_div(icount, sbp->sb_agblocks);
515 max_metadata = icount;
516 } else {
517 max_metadata = agcount;
518 }
519
520 for (index = 0; index < agcount; index++) {
521 ino = XFS_AGINO_TO_INO(mp, index, agino);
522 if (ino > XFS_MAXINUMBER_32) {
523 index++;
524 break;
525 }
526
527 pag = xfs_perag_get(mp, index);
528 pag->pagi_inodeok = 1;
529 if (index < max_metadata)
530 pag->pagf_metadata = 1;
531 xfs_perag_put(pag);
532 }
533 } else {
534 for (index = 0; index < agcount; index++) {
535 pag = xfs_perag_get(mp, index);
536 pag->pagi_inodeok = 1;
537 xfs_perag_put(pag);
538 }
539 }
540 507
541 if (maxagi) 508 if (maxagi)
542 *maxagi = index; 509 *maxagi = index;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 05a05a7b6119..deee09e534dc 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -54,12 +54,7 @@ typedef struct xfs_trans_reservations {
54#include "xfs_sync.h" 54#include "xfs_sync.h"
55 55
56struct xlog; 56struct xlog;
57struct xfs_mount_args;
58struct xfs_inode; 57struct xfs_inode;
59struct xfs_bmbt_irec;
60struct xfs_bmap_free;
61struct xfs_extdelta;
62struct xfs_swapext;
63struct xfs_mru_cache; 58struct xfs_mru_cache;
64struct xfs_nameops; 59struct xfs_nameops;
65struct xfs_ail; 60struct xfs_ail;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e0fd2734189e..26a09bd7f975 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -88,6 +88,8 @@ mempool_t *xfs_ioend_pool;
88 * unwritten extent conversion */ 88 * unwritten extent conversion */
89#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ 89#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
90#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 90#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
91#define MNTOPT_32BITINODE "inode32" /* inode allocation limited to
92 * XFS_MAXINUMBER_32 */
91#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ 93#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
92#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ 94#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
93#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ 95#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
@@ -120,12 +122,18 @@ mempool_t *xfs_ioend_pool;
120 * in the future, too. 122 * in the future, too.
121 */ 123 */
122enum { 124enum {
123 Opt_barrier, Opt_nobarrier, Opt_err 125 Opt_barrier,
126 Opt_nobarrier,
127 Opt_inode64,
128 Opt_inode32,
129 Opt_err
124}; 130};
125 131
126static const match_table_t tokens = { 132static const match_table_t tokens = {
127 {Opt_barrier, "barrier"}, 133 {Opt_barrier, "barrier"},
128 {Opt_nobarrier, "nobarrier"}, 134 {Opt_nobarrier, "nobarrier"},
135 {Opt_inode64, "inode64"},
136 {Opt_inode32, "inode32"},
129 {Opt_err, NULL} 137 {Opt_err, NULL}
130}; 138};
131 139
@@ -197,7 +205,9 @@ xfs_parseargs(
197 */ 205 */
198 mp->m_flags |= XFS_MOUNT_BARRIER; 206 mp->m_flags |= XFS_MOUNT_BARRIER;
199 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 207 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
208#if !XFS_BIG_INUMS
200 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 209 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
210#endif
201 211
202 /* 212 /*
203 * These can be overridden by the mount option parsing. 213 * These can be overridden by the mount option parsing.
@@ -294,6 +304,8 @@ xfs_parseargs(
294 return EINVAL; 304 return EINVAL;
295 } 305 }
296 dswidth = simple_strtoul(value, &eov, 10); 306 dswidth = simple_strtoul(value, &eov, 10);
307 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
308 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
297 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 309 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
298 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 310 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
299#if !XFS_BIG_INUMS 311#if !XFS_BIG_INUMS
@@ -492,6 +504,7 @@ xfs_showargs(
492 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 504 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
493 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 505 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
494 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, 506 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
507 { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
495 { 0, NULL } 508 { 0, NULL }
496 }; 509 };
497 static struct proc_xfs_info xfs_info_unset[] = { 510 static struct proc_xfs_info xfs_info_unset[] = {
@@ -591,6 +604,80 @@ xfs_max_file_offset(
591 return (((__uint64_t)pagefactor) << bitshift) - 1; 604 return (((__uint64_t)pagefactor) << bitshift) - 1;
592} 605}
593 606
607xfs_agnumber_t
608xfs_set_inode32(struct xfs_mount *mp)
609{
610 xfs_agnumber_t index = 0;
611 xfs_agnumber_t maxagi = 0;
612 xfs_sb_t *sbp = &mp->m_sb;
613 xfs_agnumber_t max_metadata;
614 xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
615 xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
616 xfs_perag_t *pag;
617
618 /* Calculate how much should be reserved for inodes to meet
619 * the max inode percentage.
620 */
621 if (mp->m_maxicount) {
622 __uint64_t icount;
623
624 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
625 do_div(icount, 100);
626 icount += sbp->sb_agblocks - 1;
627 do_div(icount, sbp->sb_agblocks);
628 max_metadata = icount;
629 } else {
630 max_metadata = sbp->sb_agcount;
631 }
632
633 for (index = 0; index < sbp->sb_agcount; index++) {
634 ino = XFS_AGINO_TO_INO(mp, index, agino);
635
636 if (ino > XFS_MAXINUMBER_32) {
637 pag = xfs_perag_get(mp, index);
638 pag->pagi_inodeok = 0;
639 pag->pagf_metadata = 0;
640 xfs_perag_put(pag);
641 continue;
642 }
643
644 pag = xfs_perag_get(mp, index);
645 pag->pagi_inodeok = 1;
646 maxagi++;
647 if (index < max_metadata)
648 pag->pagf_metadata = 1;
649 xfs_perag_put(pag);
650 }
651 mp->m_flags |= (XFS_MOUNT_32BITINODES |
652 XFS_MOUNT_SMALL_INUMS);
653
654 return maxagi;
655}
656
657xfs_agnumber_t
658xfs_set_inode64(struct xfs_mount *mp)
659{
660 xfs_agnumber_t index = 0;
661
662 for (index = 0; index < mp->m_sb.sb_agcount; index++) {
663 struct xfs_perag *pag;
664
665 pag = xfs_perag_get(mp, index);
666 pag->pagi_inodeok = 1;
667 pag->pagf_metadata = 0;
668 xfs_perag_put(pag);
669 }
670
671 /* There is no need for lock protection on m_flags,
672 * the rw_semaphore of the VFS superblock is locked
673 * during mount/umount/remount operations, so this is
674 * enough to avoid concurency on the m_flags field
675 */
676 mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
677 XFS_MOUNT_SMALL_INUMS);
678 return index;
679}
680
594STATIC int 681STATIC int
595xfs_blkdev_get( 682xfs_blkdev_get(
596 xfs_mount_t *mp, 683 xfs_mount_t *mp,
@@ -1056,6 +1143,12 @@ xfs_fs_remount(
1056 case Opt_nobarrier: 1143 case Opt_nobarrier:
1057 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1144 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1058 break; 1145 break;
1146 case Opt_inode64:
1147 mp->m_maxagi = xfs_set_inode64(mp);
1148 break;
1149 case Opt_inode32:
1150 mp->m_maxagi = xfs_set_inode32(mp);
1151 break;
1059 default: 1152 default:
1060 /* 1153 /*
1061 * Logically we would return an error here to prevent 1154 * Logically we would return an error here to prevent
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 09b0c26b2245..9de4a920ba05 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -75,6 +75,8 @@ struct block_device;
75extern __uint64_t xfs_max_file_offset(unsigned int); 75extern __uint64_t xfs_max_file_offset(unsigned int);
76 76
77extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 77extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
78extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
79extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
78 80
79extern const struct export_operations xfs_export_operations; 81extern const struct export_operations xfs_export_operations;
80extern const struct xattr_handler *xfs_xattr_handlers[]; 82extern const struct xattr_handler *xfs_xattr_handlers[];
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e5795dd6013a..7d36ccf57f93 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -37,6 +37,7 @@ struct xlog_recover;
37struct xlog_recover_item; 37struct xlog_recover_item;
38struct xfs_buf_log_format; 38struct xfs_buf_log_format;
39struct xfs_inode_log_format; 39struct xfs_inode_log_format;
40struct xfs_bmbt_irec;
40 41
41DECLARE_EVENT_CLASS(xfs_attr_list_class, 42DECLARE_EVENT_CLASS(xfs_attr_list_class,
42 TP_PROTO(struct xfs_attr_list_context *ctx), 43 TP_PROTO(struct xfs_attr_list_context *ctx),