diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 607 |
1 files changed, 440 insertions, 167 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 080b09a57a8f..fcedf5464eb7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -99,6 +99,13 @@ static struct vfsmount *shm_mnt; | |||
99 | /* Pretend that each entry is of this size in directory's i_size */ | 99 | /* Pretend that each entry is of this size in directory's i_size */ |
100 | #define BOGO_DIRENT_SIZE 20 | 100 | #define BOGO_DIRENT_SIZE 20 |
101 | 101 | ||
102 | struct shmem_xattr { | ||
103 | struct list_head list; /* anchored by shmem_inode_info->xattr_list */ | ||
104 | char *name; /* xattr name */ | ||
105 | size_t size; | ||
106 | char value[0]; | ||
107 | }; | ||
108 | |||
102 | /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ | 109 | /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ |
103 | enum sgp_type { | 110 | enum sgp_type { |
104 | SGP_READ, /* don't exceed i_size, don't allocate page */ | 111 | SGP_READ, /* don't exceed i_size, don't allocate page */ |
@@ -224,7 +231,6 @@ static const struct vm_operations_struct shmem_vm_ops; | |||
224 | static struct backing_dev_info shmem_backing_dev_info __read_mostly = { | 231 | static struct backing_dev_info shmem_backing_dev_info __read_mostly = { |
225 | .ra_pages = 0, /* No readahead */ | 232 | .ra_pages = 0, /* No readahead */ |
226 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, | 233 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, |
227 | .unplug_io_fn = default_unplug_io_fn, | ||
228 | }; | 234 | }; |
229 | 235 | ||
230 | static LIST_HEAD(shmem_swaplist); | 236 | static LIST_HEAD(shmem_swaplist); |
@@ -422,7 +428,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long | |||
422 | * a waste to allocate index if we cannot allocate data. | 428 | * a waste to allocate index if we cannot allocate data. |
423 | */ | 429 | */ |
424 | if (sbinfo->max_blocks) { | 430 | if (sbinfo->max_blocks) { |
425 | if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0) | 431 | if (percpu_counter_compare(&sbinfo->used_blocks, |
432 | sbinfo->max_blocks - 1) >= 0) | ||
426 | return ERR_PTR(-ENOSPC); | 433 | return ERR_PTR(-ENOSPC); |
427 | percpu_counter_inc(&sbinfo->used_blocks); | 434 | percpu_counter_inc(&sbinfo->used_blocks); |
428 | spin_lock(&inode->i_lock); | 435 | spin_lock(&inode->i_lock); |
@@ -532,7 +539,7 @@ static void shmem_free_pages(struct list_head *next) | |||
532 | } while (next); | 539 | } while (next); |
533 | } | 540 | } |
534 | 541 | ||
535 | static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | 542 | void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) |
536 | { | 543 | { |
537 | struct shmem_inode_info *info = SHMEM_I(inode); | 544 | struct shmem_inode_info *info = SHMEM_I(inode); |
538 | unsigned long idx; | 545 | unsigned long idx; |
@@ -555,6 +562,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
555 | spinlock_t *punch_lock; | 562 | spinlock_t *punch_lock; |
556 | unsigned long upper_limit; | 563 | unsigned long upper_limit; |
557 | 564 | ||
565 | truncate_inode_pages_range(inode->i_mapping, start, end); | ||
566 | |||
558 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 567 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
559 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 568 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
560 | if (idx >= info->next_index) | 569 | if (idx >= info->next_index) |
@@ -731,16 +740,8 @@ done2: | |||
731 | * lowered next_index. Also, though shmem_getpage checks | 740 | * lowered next_index. Also, though shmem_getpage checks |
732 | * i_size before adding to cache, no recheck after: so fix the | 741 | * i_size before adding to cache, no recheck after: so fix the |
733 | * narrow window there too. | 742 | * narrow window there too. |
734 | * | ||
735 | * Recalling truncate_inode_pages_range and unmap_mapping_range | ||
736 | * every time for punch_hole (which never got a chance to clear | ||
737 | * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, | ||
738 | * yet hardly ever necessary: try to optimize them out later. | ||
739 | */ | 743 | */ |
740 | truncate_inode_pages_range(inode->i_mapping, start, end); | 744 | truncate_inode_pages_range(inode->i_mapping, start, end); |
741 | if (punch_hole) | ||
742 | unmap_mapping_range(inode->i_mapping, start, | ||
743 | end - start, 1); | ||
744 | } | 745 | } |
745 | 746 | ||
746 | spin_lock(&info->lock); | 747 | spin_lock(&info->lock); |
@@ -759,27 +760,28 @@ done2: | |||
759 | shmem_free_pages(pages_to_free.next); | 760 | shmem_free_pages(pages_to_free.next); |
760 | } | 761 | } |
761 | } | 762 | } |
763 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | ||
762 | 764 | ||
763 | static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) | 765 | static int shmem_setattr(struct dentry *dentry, struct iattr *attr) |
764 | { | 766 | { |
765 | struct inode *inode = dentry->d_inode; | 767 | struct inode *inode = dentry->d_inode; |
766 | loff_t newsize = attr->ia_size; | ||
767 | int error; | 768 | int error; |
768 | 769 | ||
769 | error = inode_change_ok(inode, attr); | 770 | error = inode_change_ok(inode, attr); |
770 | if (error) | 771 | if (error) |
771 | return error; | 772 | return error; |
772 | 773 | ||
773 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE) | 774 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
774 | && newsize != inode->i_size) { | 775 | loff_t oldsize = inode->i_size; |
776 | loff_t newsize = attr->ia_size; | ||
775 | struct page *page = NULL; | 777 | struct page *page = NULL; |
776 | 778 | ||
777 | if (newsize < inode->i_size) { | 779 | if (newsize < oldsize) { |
778 | /* | 780 | /* |
779 | * If truncating down to a partial page, then | 781 | * If truncating down to a partial page, then |
780 | * if that page is already allocated, hold it | 782 | * if that page is already allocated, hold it |
781 | * in memory until the truncation is over, so | 783 | * in memory until the truncation is over, so |
782 | * truncate_partial_page cannnot miss it were | 784 | * truncate_partial_page cannot miss it were |
783 | * it assigned to swap. | 785 | * it assigned to swap. |
784 | */ | 786 | */ |
785 | if (newsize & (PAGE_CACHE_SIZE-1)) { | 787 | if (newsize & (PAGE_CACHE_SIZE-1)) { |
@@ -803,12 +805,19 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) | |||
803 | spin_unlock(&info->lock); | 805 | spin_unlock(&info->lock); |
804 | } | 806 | } |
805 | } | 807 | } |
806 | 808 | if (newsize != oldsize) { | |
807 | /* XXX(truncate): truncate_setsize should be called last */ | 809 | i_size_write(inode, newsize); |
808 | truncate_setsize(inode, newsize); | 810 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
811 | } | ||
812 | if (newsize < oldsize) { | ||
813 | loff_t holebegin = round_up(newsize, PAGE_SIZE); | ||
814 | unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); | ||
815 | shmem_truncate_range(inode, newsize, (loff_t)-1); | ||
816 | /* unmap again to remove racily COWed private pages */ | ||
817 | unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); | ||
818 | } | ||
809 | if (page) | 819 | if (page) |
810 | page_cache_release(page); | 820 | page_cache_release(page); |
811 | shmem_truncate_range(inode, newsize, (loff_t)-1); | ||
812 | } | 821 | } |
813 | 822 | ||
814 | setattr_copy(inode, attr); | 823 | setattr_copy(inode, attr); |
@@ -822,9 +831,9 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) | |||
822 | static void shmem_evict_inode(struct inode *inode) | 831 | static void shmem_evict_inode(struct inode *inode) |
823 | { | 832 | { |
824 | struct shmem_inode_info *info = SHMEM_I(inode); | 833 | struct shmem_inode_info *info = SHMEM_I(inode); |
834 | struct shmem_xattr *xattr, *nxattr; | ||
825 | 835 | ||
826 | if (inode->i_mapping->a_ops == &shmem_aops) { | 836 | if (inode->i_mapping->a_ops == &shmem_aops) { |
827 | truncate_inode_pages(inode->i_mapping, 0); | ||
828 | shmem_unacct_size(info->flags, inode->i_size); | 837 | shmem_unacct_size(info->flags, inode->i_size); |
829 | inode->i_size = 0; | 838 | inode->i_size = 0; |
830 | shmem_truncate_range(inode, 0, (loff_t)-1); | 839 | shmem_truncate_range(inode, 0, (loff_t)-1); |
@@ -834,6 +843,11 @@ static void shmem_evict_inode(struct inode *inode) | |||
834 | mutex_unlock(&shmem_swaplist_mutex); | 843 | mutex_unlock(&shmem_swaplist_mutex); |
835 | } | 844 | } |
836 | } | 845 | } |
846 | |||
847 | list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { | ||
848 | kfree(xattr->name); | ||
849 | kfree(xattr); | ||
850 | } | ||
837 | BUG_ON(inode->i_blocks); | 851 | BUG_ON(inode->i_blocks); |
838 | shmem_free_inode(inode->i_sb); | 852 | shmem_free_inode(inode->i_sb); |
839 | end_writeback(inode); | 853 | end_writeback(inode); |
@@ -852,7 +866,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_ | |||
852 | 866 | ||
853 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) | 867 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) |
854 | { | 868 | { |
855 | struct inode *inode; | 869 | struct address_space *mapping; |
856 | unsigned long idx; | 870 | unsigned long idx; |
857 | unsigned long size; | 871 | unsigned long size; |
858 | unsigned long limit; | 872 | unsigned long limit; |
@@ -875,8 +889,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s | |||
875 | if (size > SHMEM_NR_DIRECT) | 889 | if (size > SHMEM_NR_DIRECT) |
876 | size = SHMEM_NR_DIRECT; | 890 | size = SHMEM_NR_DIRECT; |
877 | offset = shmem_find_swp(entry, ptr, ptr+size); | 891 | offset = shmem_find_swp(entry, ptr, ptr+size); |
878 | if (offset >= 0) | 892 | if (offset >= 0) { |
893 | shmem_swp_balance_unmap(); | ||
879 | goto found; | 894 | goto found; |
895 | } | ||
880 | if (!info->i_indirect) | 896 | if (!info->i_indirect) |
881 | goto lost2; | 897 | goto lost2; |
882 | 898 | ||
@@ -917,6 +933,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s | |||
917 | shmem_swp_unmap(ptr); | 933 | shmem_swp_unmap(ptr); |
918 | if (offset >= 0) { | 934 | if (offset >= 0) { |
919 | shmem_dir_unmap(dir); | 935 | shmem_dir_unmap(dir); |
936 | ptr = shmem_swp_map(subdir); | ||
920 | goto found; | 937 | goto found; |
921 | } | 938 | } |
922 | } | 939 | } |
@@ -928,8 +945,7 @@ lost2: | |||
928 | return 0; | 945 | return 0; |
929 | found: | 946 | found: |
930 | idx += offset; | 947 | idx += offset; |
931 | inode = igrab(&info->vfs_inode); | 948 | ptr += offset; |
932 | spin_unlock(&info->lock); | ||
933 | 949 | ||
934 | /* | 950 | /* |
935 | * Move _head_ to start search for next from here. | 951 | * Move _head_ to start search for next from here. |
@@ -940,37 +956,18 @@ found: | |||
940 | */ | 956 | */ |
941 | if (shmem_swaplist.next != &info->swaplist) | 957 | if (shmem_swaplist.next != &info->swaplist) |
942 | list_move_tail(&shmem_swaplist, &info->swaplist); | 958 | list_move_tail(&shmem_swaplist, &info->swaplist); |
943 | mutex_unlock(&shmem_swaplist_mutex); | ||
944 | 959 | ||
945 | error = 1; | ||
946 | if (!inode) | ||
947 | goto out; | ||
948 | /* | 960 | /* |
949 | * Charge page using GFP_KERNEL while we can wait. | 961 | * We rely on shmem_swaplist_mutex, not only to protect the swaplist, |
950 | * Charged back to the user(not to caller) when swap account is used. | 962 | * but also to hold up shmem_evict_inode(): so inode cannot be freed |
951 | * add_to_page_cache() will be called with GFP_NOWAIT. | 963 | * beneath us (pagelock doesn't help until the page is in pagecache). |
952 | */ | 964 | */ |
953 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | 965 | mapping = info->vfs_inode.i_mapping; |
954 | if (error) | 966 | error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); |
955 | goto out; | 967 | /* which does mem_cgroup_uncharge_cache_page on error */ |
956 | error = radix_tree_preload(GFP_KERNEL); | ||
957 | if (error) { | ||
958 | mem_cgroup_uncharge_cache_page(page); | ||
959 | goto out; | ||
960 | } | ||
961 | error = 1; | ||
962 | |||
963 | spin_lock(&info->lock); | ||
964 | ptr = shmem_swp_entry(info, idx, NULL); | ||
965 | if (ptr && ptr->val == entry.val) { | ||
966 | error = add_to_page_cache_locked(page, inode->i_mapping, | ||
967 | idx, GFP_NOWAIT); | ||
968 | /* does mem_cgroup_uncharge_cache_page on error */ | ||
969 | } else /* we must compensate for our precharge above */ | ||
970 | mem_cgroup_uncharge_cache_page(page); | ||
971 | 968 | ||
972 | if (error == -EEXIST) { | 969 | if (error == -EEXIST) { |
973 | struct page *filepage = find_get_page(inode->i_mapping, idx); | 970 | struct page *filepage = find_get_page(mapping, idx); |
974 | error = 1; | 971 | error = 1; |
975 | if (filepage) { | 972 | if (filepage) { |
976 | /* | 973 | /* |
@@ -990,14 +987,8 @@ found: | |||
990 | swap_free(entry); | 987 | swap_free(entry); |
991 | error = 1; /* not an error, but entry was found */ | 988 | error = 1; /* not an error, but entry was found */ |
992 | } | 989 | } |
993 | if (ptr) | 990 | shmem_swp_unmap(ptr); |
994 | shmem_swp_unmap(ptr); | ||
995 | spin_unlock(&info->lock); | 991 | spin_unlock(&info->lock); |
996 | radix_tree_preload_end(); | ||
997 | out: | ||
998 | unlock_page(page); | ||
999 | page_cache_release(page); | ||
1000 | iput(inode); /* allows for NULL */ | ||
1001 | return error; | 992 | return error; |
1002 | } | 993 | } |
1003 | 994 | ||
@@ -1009,6 +1000,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
1009 | struct list_head *p, *next; | 1000 | struct list_head *p, *next; |
1010 | struct shmem_inode_info *info; | 1001 | struct shmem_inode_info *info; |
1011 | int found = 0; | 1002 | int found = 0; |
1003 | int error; | ||
1004 | |||
1005 | /* | ||
1006 | * Charge page using GFP_KERNEL while we can wait, before taking | ||
1007 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). | ||
1008 | * Charged back to the user (not to caller) when swap account is used. | ||
1009 | * add_to_page_cache() will be called with GFP_NOWAIT. | ||
1010 | */ | ||
1011 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | ||
1012 | if (error) | ||
1013 | goto out; | ||
1014 | /* | ||
1015 | * Try to preload while we can wait, to not make a habit of | ||
1016 | * draining atomic reserves; but don't latch on to this cpu, | ||
1017 | * it's okay if sometimes we get rescheduled after this. | ||
1018 | */ | ||
1019 | error = radix_tree_preload(GFP_KERNEL); | ||
1020 | if (error) | ||
1021 | goto uncharge; | ||
1022 | radix_tree_preload_end(); | ||
1012 | 1023 | ||
1013 | mutex_lock(&shmem_swaplist_mutex); | 1024 | mutex_lock(&shmem_swaplist_mutex); |
1014 | list_for_each_safe(p, next, &shmem_swaplist) { | 1025 | list_for_each_safe(p, next, &shmem_swaplist) { |
@@ -1016,17 +1027,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
1016 | found = shmem_unuse_inode(info, entry, page); | 1027 | found = shmem_unuse_inode(info, entry, page); |
1017 | cond_resched(); | 1028 | cond_resched(); |
1018 | if (found) | 1029 | if (found) |
1019 | goto out; | 1030 | break; |
1020 | } | 1031 | } |
1021 | mutex_unlock(&shmem_swaplist_mutex); | 1032 | mutex_unlock(&shmem_swaplist_mutex); |
1022 | /* | 1033 | |
1023 | * Can some race bring us here? We've been holding page lock, | 1034 | uncharge: |
1024 | * so I think not; but would rather try again later than BUG() | 1035 | if (!found) |
1025 | */ | 1036 | mem_cgroup_uncharge_cache_page(page); |
1037 | if (found < 0) | ||
1038 | error = found; | ||
1039 | out: | ||
1026 | unlock_page(page); | 1040 | unlock_page(page); |
1027 | page_cache_release(page); | 1041 | page_cache_release(page); |
1028 | out: | 1042 | return error; |
1029 | return (found < 0) ? found : 0; | ||
1030 | } | 1043 | } |
1031 | 1044 | ||
1032 | /* | 1045 | /* |
@@ -1064,7 +1077,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1064 | else | 1077 | else |
1065 | swap.val = 0; | 1078 | swap.val = 0; |
1066 | 1079 | ||
1080 | /* | ||
1081 | * Add inode to shmem_unuse()'s list of swapped-out inodes, | ||
1082 | * if it's not already there. Do it now because we cannot take | ||
1083 | * mutex while holding spinlock, and must do so before the page | ||
1084 | * is moved to swap cache, when its pagelock no longer protects | ||
1085 | * the inode from eviction. But don't unlock the mutex until | ||
1086 | * we've taken the spinlock, because shmem_unuse_inode() will | ||
1087 | * prune a !swapped inode from the swaplist under both locks. | ||
1088 | */ | ||
1089 | if (swap.val) { | ||
1090 | mutex_lock(&shmem_swaplist_mutex); | ||
1091 | if (list_empty(&info->swaplist)) | ||
1092 | list_add_tail(&info->swaplist, &shmem_swaplist); | ||
1093 | } | ||
1094 | |||
1067 | spin_lock(&info->lock); | 1095 | spin_lock(&info->lock); |
1096 | if (swap.val) | ||
1097 | mutex_unlock(&shmem_swaplist_mutex); | ||
1098 | |||
1068 | if (index >= info->next_index) { | 1099 | if (index >= info->next_index) { |
1069 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); | 1100 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); |
1070 | goto unlock; | 1101 | goto unlock; |
@@ -1081,25 +1112,13 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1081 | shmem_recalc_inode(inode); | 1112 | shmem_recalc_inode(inode); |
1082 | 1113 | ||
1083 | if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { | 1114 | if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { |
1084 | remove_from_page_cache(page); | 1115 | delete_from_page_cache(page); |
1085 | shmem_swp_set(info, entry, swap.val); | 1116 | shmem_swp_set(info, entry, swap.val); |
1086 | shmem_swp_unmap(entry); | 1117 | shmem_swp_unmap(entry); |
1087 | if (list_empty(&info->swaplist)) | ||
1088 | inode = igrab(inode); | ||
1089 | else | ||
1090 | inode = NULL; | ||
1091 | spin_unlock(&info->lock); | ||
1092 | swap_shmem_alloc(swap); | 1118 | swap_shmem_alloc(swap); |
1119 | spin_unlock(&info->lock); | ||
1093 | BUG_ON(page_mapped(page)); | 1120 | BUG_ON(page_mapped(page)); |
1094 | page_cache_release(page); /* pagecache ref */ | ||
1095 | swap_writepage(page, wbc); | 1121 | swap_writepage(page, wbc); |
1096 | if (inode) { | ||
1097 | mutex_lock(&shmem_swaplist_mutex); | ||
1098 | /* move instead of add in case we're racing */ | ||
1099 | list_move_tail(&info->swaplist, &shmem_swaplist); | ||
1100 | mutex_unlock(&shmem_swaplist_mutex); | ||
1101 | iput(inode); | ||
1102 | } | ||
1103 | return 0; | 1122 | return 0; |
1104 | } | 1123 | } |
1105 | 1124 | ||
@@ -1287,12 +1306,10 @@ repeat: | |||
1287 | swappage = lookup_swap_cache(swap); | 1306 | swappage = lookup_swap_cache(swap); |
1288 | if (!swappage) { | 1307 | if (!swappage) { |
1289 | shmem_swp_unmap(entry); | 1308 | shmem_swp_unmap(entry); |
1309 | spin_unlock(&info->lock); | ||
1290 | /* here we actually do the io */ | 1310 | /* here we actually do the io */ |
1291 | if (type && !(*type & VM_FAULT_MAJOR)) { | 1311 | if (type) |
1292 | __count_vm_event(PGMAJFAULT); | ||
1293 | *type |= VM_FAULT_MAJOR; | 1312 | *type |= VM_FAULT_MAJOR; |
1294 | } | ||
1295 | spin_unlock(&info->lock); | ||
1296 | swappage = shmem_swapin(swap, gfp, info, idx); | 1313 | swappage = shmem_swapin(swap, gfp, info, idx); |
1297 | if (!swappage) { | 1314 | if (!swappage) { |
1298 | spin_lock(&info->lock); | 1315 | spin_lock(&info->lock); |
@@ -1399,21 +1416,16 @@ repeat: | |||
1399 | shmem_swp_unmap(entry); | 1416 | shmem_swp_unmap(entry); |
1400 | sbinfo = SHMEM_SB(inode->i_sb); | 1417 | sbinfo = SHMEM_SB(inode->i_sb); |
1401 | if (sbinfo->max_blocks) { | 1418 | if (sbinfo->max_blocks) { |
1402 | if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) || | 1419 | if (percpu_counter_compare(&sbinfo->used_blocks, |
1403 | shmem_acct_block(info->flags)) { | 1420 | sbinfo->max_blocks) >= 0 || |
1404 | spin_unlock(&info->lock); | 1421 | shmem_acct_block(info->flags)) |
1405 | error = -ENOSPC; | 1422 | goto nospace; |
1406 | goto failed; | ||
1407 | } | ||
1408 | percpu_counter_inc(&sbinfo->used_blocks); | 1423 | percpu_counter_inc(&sbinfo->used_blocks); |
1409 | spin_lock(&inode->i_lock); | 1424 | spin_lock(&inode->i_lock); |
1410 | inode->i_blocks += BLOCKS_PER_PAGE; | 1425 | inode->i_blocks += BLOCKS_PER_PAGE; |
1411 | spin_unlock(&inode->i_lock); | 1426 | spin_unlock(&inode->i_lock); |
1412 | } else if (shmem_acct_block(info->flags)) { | 1427 | } else if (shmem_acct_block(info->flags)) |
1413 | spin_unlock(&info->lock); | 1428 | goto nospace; |
1414 | error = -ENOSPC; | ||
1415 | goto failed; | ||
1416 | } | ||
1417 | 1429 | ||
1418 | if (!filepage) { | 1430 | if (!filepage) { |
1419 | int ret; | 1431 | int ret; |
@@ -1493,6 +1505,24 @@ done: | |||
1493 | error = 0; | 1505 | error = 0; |
1494 | goto out; | 1506 | goto out; |
1495 | 1507 | ||
1508 | nospace: | ||
1509 | /* | ||
1510 | * Perhaps the page was brought in from swap between find_lock_page | ||
1511 | * and taking info->lock? We allow for that at add_to_page_cache_lru, | ||
1512 | * but must also avoid reporting a spurious ENOSPC while working on a | ||
1513 | * full tmpfs. (When filepage has been passed in to shmem_getpage, it | ||
1514 | * is already in page cache, which prevents this race from occurring.) | ||
1515 | */ | ||
1516 | if (!filepage) { | ||
1517 | struct page *page = find_get_page(mapping, idx); | ||
1518 | if (page) { | ||
1519 | spin_unlock(&info->lock); | ||
1520 | page_cache_release(page); | ||
1521 | goto repeat; | ||
1522 | } | ||
1523 | } | ||
1524 | spin_unlock(&info->lock); | ||
1525 | error = -ENOSPC; | ||
1496 | failed: | 1526 | failed: |
1497 | if (*pagep != filepage) { | 1527 | if (*pagep != filepage) { |
1498 | unlock_page(filepage); | 1528 | unlock_page(filepage); |
@@ -1518,7 +1548,10 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1518 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); | 1548 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); |
1519 | if (error) | 1549 | if (error) |
1520 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); | 1550 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); |
1521 | 1551 | if (ret & VM_FAULT_MAJOR) { | |
1552 | count_vm_event(PGMAJFAULT); | ||
1553 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | ||
1554 | } | ||
1522 | return ret | VM_FAULT_LOCKED; | 1555 | return ret | VM_FAULT_LOCKED; |
1523 | } | 1556 | } |
1524 | 1557 | ||
@@ -1586,6 +1619,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode | |||
1586 | 1619 | ||
1587 | inode = new_inode(sb); | 1620 | inode = new_inode(sb); |
1588 | if (inode) { | 1621 | if (inode) { |
1622 | inode->i_ino = get_next_ino(); | ||
1589 | inode_init_owner(inode, dir, mode); | 1623 | inode_init_owner(inode, dir, mode); |
1590 | inode->i_blocks = 0; | 1624 | inode->i_blocks = 0; |
1591 | inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; | 1625 | inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; |
@@ -1596,6 +1630,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode | |||
1596 | spin_lock_init(&info->lock); | 1630 | spin_lock_init(&info->lock); |
1597 | info->flags = flags & VM_NORESERVE; | 1631 | info->flags = flags & VM_NORESERVE; |
1598 | INIT_LIST_HEAD(&info->swaplist); | 1632 | INIT_LIST_HEAD(&info->swaplist); |
1633 | INIT_LIST_HEAD(&info->xattr_list); | ||
1599 | cache_no_acl(inode); | 1634 | cache_no_acl(inode); |
1600 | 1635 | ||
1601 | switch (mode & S_IFMT) { | 1636 | switch (mode & S_IFMT) { |
@@ -1842,8 +1877,9 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | |||
1842 | 1877 | ||
1843 | inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE); | 1878 | inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE); |
1844 | if (inode) { | 1879 | if (inode) { |
1845 | error = security_inode_init_security(inode, dir, NULL, NULL, | 1880 | error = security_inode_init_security(inode, dir, |
1846 | NULL); | 1881 | &dentry->d_name, NULL, |
1882 | NULL, NULL); | ||
1847 | if (error) { | 1883 | if (error) { |
1848 | if (error != -EOPNOTSUPP) { | 1884 | if (error != -EOPNOTSUPP) { |
1849 | iput(inode); | 1885 | iput(inode); |
@@ -1903,7 +1939,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr | |||
1903 | dir->i_size += BOGO_DIRENT_SIZE; | 1939 | dir->i_size += BOGO_DIRENT_SIZE; |
1904 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 1940 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
1905 | inc_nlink(inode); | 1941 | inc_nlink(inode); |
1906 | atomic_inc(&inode->i_count); /* New dentry reference */ | 1942 | ihold(inode); /* New dentry reference */ |
1907 | dget(dentry); /* Extra pinning count for the created dentry */ | 1943 | dget(dentry); /* Extra pinning count for the created dentry */ |
1908 | d_instantiate(dentry, inode); | 1944 | d_instantiate(dentry, inode); |
1909 | out: | 1945 | out: |
@@ -1982,8 +2018,8 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
1982 | if (!inode) | 2018 | if (!inode) |
1983 | return -ENOSPC; | 2019 | return -ENOSPC; |
1984 | 2020 | ||
1985 | error = security_inode_init_security(inode, dir, NULL, NULL, | 2021 | error = security_inode_init_security(inode, dir, &dentry->d_name, NULL, |
1986 | NULL); | 2022 | NULL, NULL); |
1987 | if (error) { | 2023 | if (error) { |
1988 | if (error != -EOPNOTSUPP) { | 2024 | if (error != -EOPNOTSUPP) { |
1989 | iput(inode); | 2025 | iput(inode); |
@@ -1994,9 +2030,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
1994 | 2030 | ||
1995 | info = SHMEM_I(inode); | 2031 | info = SHMEM_I(inode); |
1996 | inode->i_size = len-1; | 2032 | inode->i_size = len-1; |
1997 | if (len <= (char *)inode - (char *)info) { | 2033 | if (len <= SHMEM_SYMLINK_INLINE_LEN) { |
1998 | /* do it inline */ | 2034 | /* do it inline */ |
1999 | memcpy(info, symname, len); | 2035 | memcpy(info->inline_symlink, symname, len); |
2000 | inode->i_op = &shmem_symlink_inline_operations; | 2036 | inode->i_op = &shmem_symlink_inline_operations; |
2001 | } else { | 2037 | } else { |
2002 | error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); | 2038 | error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); |
@@ -2022,7 +2058,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
2022 | 2058 | ||
2023 | static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) | 2059 | static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) |
2024 | { | 2060 | { |
2025 | nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode)); | 2061 | nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); |
2026 | return NULL; | 2062 | return NULL; |
2027 | } | 2063 | } |
2028 | 2064 | ||
@@ -2046,63 +2082,253 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co | |||
2046 | } | 2082 | } |
2047 | } | 2083 | } |
2048 | 2084 | ||
2049 | static const struct inode_operations shmem_symlink_inline_operations = { | 2085 | #ifdef CONFIG_TMPFS_XATTR |
2050 | .readlink = generic_readlink, | ||
2051 | .follow_link = shmem_follow_link_inline, | ||
2052 | }; | ||
2053 | |||
2054 | static const struct inode_operations shmem_symlink_inode_operations = { | ||
2055 | .readlink = generic_readlink, | ||
2056 | .follow_link = shmem_follow_link, | ||
2057 | .put_link = shmem_put_link, | ||
2058 | }; | ||
2059 | |||
2060 | #ifdef CONFIG_TMPFS_POSIX_ACL | ||
2061 | /* | 2086 | /* |
2062 | * Superblocks without xattr inode operations will get security.* xattr | 2087 | * Superblocks without xattr inode operations may get some security.* xattr |
2063 | * support from the VFS "for free". As soon as we have any other xattrs | 2088 | * support from the LSM "for free". As soon as we have any other xattrs |
2064 | * like ACLs, we also need to implement the security.* handlers at | 2089 | * like ACLs, we also need to implement the security.* handlers at |
2065 | * filesystem level, though. | 2090 | * filesystem level, though. |
2066 | */ | 2091 | */ |
2067 | 2092 | ||
2068 | static size_t shmem_xattr_security_list(struct dentry *dentry, char *list, | 2093 | static int shmem_xattr_get(struct dentry *dentry, const char *name, |
2069 | size_t list_len, const char *name, | 2094 | void *buffer, size_t size) |
2070 | size_t name_len, int handler_flags) | ||
2071 | { | 2095 | { |
2072 | return security_inode_listsecurity(dentry->d_inode, list, list_len); | 2096 | struct shmem_inode_info *info; |
2073 | } | 2097 | struct shmem_xattr *xattr; |
2098 | int ret = -ENODATA; | ||
2074 | 2099 | ||
2075 | static int shmem_xattr_security_get(struct dentry *dentry, const char *name, | 2100 | info = SHMEM_I(dentry->d_inode); |
2076 | void *buffer, size_t size, int handler_flags) | 2101 | |
2077 | { | 2102 | spin_lock(&info->lock); |
2078 | if (strcmp(name, "") == 0) | 2103 | list_for_each_entry(xattr, &info->xattr_list, list) { |
2079 | return -EINVAL; | 2104 | if (strcmp(name, xattr->name)) |
2080 | return xattr_getsecurity(dentry->d_inode, name, buffer, size); | 2105 | continue; |
2106 | |||
2107 | ret = xattr->size; | ||
2108 | if (buffer) { | ||
2109 | if (size < xattr->size) | ||
2110 | ret = -ERANGE; | ||
2111 | else | ||
2112 | memcpy(buffer, xattr->value, xattr->size); | ||
2113 | } | ||
2114 | break; | ||
2115 | } | ||
2116 | spin_unlock(&info->lock); | ||
2117 | return ret; | ||
2081 | } | 2118 | } |
2082 | 2119 | ||
2083 | static int shmem_xattr_security_set(struct dentry *dentry, const char *name, | 2120 | static int shmem_xattr_set(struct dentry *dentry, const char *name, |
2084 | const void *value, size_t size, int flags, int handler_flags) | 2121 | const void *value, size_t size, int flags) |
2085 | { | 2122 | { |
2086 | if (strcmp(name, "") == 0) | 2123 | struct inode *inode = dentry->d_inode; |
2087 | return -EINVAL; | 2124 | struct shmem_inode_info *info = SHMEM_I(inode); |
2088 | return security_inode_setsecurity(dentry->d_inode, name, value, | 2125 | struct shmem_xattr *xattr; |
2089 | size, flags); | 2126 | struct shmem_xattr *new_xattr = NULL; |
2127 | size_t len; | ||
2128 | int err = 0; | ||
2129 | |||
2130 | /* value == NULL means remove */ | ||
2131 | if (value) { | ||
2132 | /* wrap around? */ | ||
2133 | len = sizeof(*new_xattr) + size; | ||
2134 | if (len <= sizeof(*new_xattr)) | ||
2135 | return -ENOMEM; | ||
2136 | |||
2137 | new_xattr = kmalloc(len, GFP_KERNEL); | ||
2138 | if (!new_xattr) | ||
2139 | return -ENOMEM; | ||
2140 | |||
2141 | new_xattr->name = kstrdup(name, GFP_KERNEL); | ||
2142 | if (!new_xattr->name) { | ||
2143 | kfree(new_xattr); | ||
2144 | return -ENOMEM; | ||
2145 | } | ||
2146 | |||
2147 | new_xattr->size = size; | ||
2148 | memcpy(new_xattr->value, value, size); | ||
2149 | } | ||
2150 | |||
2151 | spin_lock(&info->lock); | ||
2152 | list_for_each_entry(xattr, &info->xattr_list, list) { | ||
2153 | if (!strcmp(name, xattr->name)) { | ||
2154 | if (flags & XATTR_CREATE) { | ||
2155 | xattr = new_xattr; | ||
2156 | err = -EEXIST; | ||
2157 | } else if (new_xattr) { | ||
2158 | list_replace(&xattr->list, &new_xattr->list); | ||
2159 | } else { | ||
2160 | list_del(&xattr->list); | ||
2161 | } | ||
2162 | goto out; | ||
2163 | } | ||
2164 | } | ||
2165 | if (flags & XATTR_REPLACE) { | ||
2166 | xattr = new_xattr; | ||
2167 | err = -ENODATA; | ||
2168 | } else { | ||
2169 | list_add(&new_xattr->list, &info->xattr_list); | ||
2170 | xattr = NULL; | ||
2171 | } | ||
2172 | out: | ||
2173 | spin_unlock(&info->lock); | ||
2174 | if (xattr) | ||
2175 | kfree(xattr->name); | ||
2176 | kfree(xattr); | ||
2177 | return err; | ||
2090 | } | 2178 | } |
2091 | 2179 | ||
2092 | static const struct xattr_handler shmem_xattr_security_handler = { | ||
2093 | .prefix = XATTR_SECURITY_PREFIX, | ||
2094 | .list = shmem_xattr_security_list, | ||
2095 | .get = shmem_xattr_security_get, | ||
2096 | .set = shmem_xattr_security_set, | ||
2097 | }; | ||
2098 | 2180 | ||
2099 | static const struct xattr_handler *shmem_xattr_handlers[] = { | 2181 | static const struct xattr_handler *shmem_xattr_handlers[] = { |
2182 | #ifdef CONFIG_TMPFS_POSIX_ACL | ||
2100 | &generic_acl_access_handler, | 2183 | &generic_acl_access_handler, |
2101 | &generic_acl_default_handler, | 2184 | &generic_acl_default_handler, |
2102 | &shmem_xattr_security_handler, | 2185 | #endif |
2103 | NULL | 2186 | NULL |
2104 | }; | 2187 | }; |
2188 | |||
2189 | static int shmem_xattr_validate(const char *name) | ||
2190 | { | ||
2191 | struct { const char *prefix; size_t len; } arr[] = { | ||
2192 | { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN }, | ||
2193 | { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN } | ||
2194 | }; | ||
2195 | int i; | ||
2196 | |||
2197 | for (i = 0; i < ARRAY_SIZE(arr); i++) { | ||
2198 | size_t preflen = arr[i].len; | ||
2199 | if (strncmp(name, arr[i].prefix, preflen) == 0) { | ||
2200 | if (!name[preflen]) | ||
2201 | return -EINVAL; | ||
2202 | return 0; | ||
2203 | } | ||
2204 | } | ||
2205 | return -EOPNOTSUPP; | ||
2206 | } | ||
2207 | |||
2208 | static ssize_t shmem_getxattr(struct dentry *dentry, const char *name, | ||
2209 | void *buffer, size_t size) | ||
2210 | { | ||
2211 | int err; | ||
2212 | |||
2213 | /* | ||
2214 | * If this is a request for a synthetic attribute in the system.* | ||
2215 | * namespace use the generic infrastructure to resolve a handler | ||
2216 | * for it via sb->s_xattr. | ||
2217 | */ | ||
2218 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | ||
2219 | return generic_getxattr(dentry, name, buffer, size); | ||
2220 | |||
2221 | err = shmem_xattr_validate(name); | ||
2222 | if (err) | ||
2223 | return err; | ||
2224 | |||
2225 | return shmem_xattr_get(dentry, name, buffer, size); | ||
2226 | } | ||
2227 | |||
2228 | static int shmem_setxattr(struct dentry *dentry, const char *name, | ||
2229 | const void *value, size_t size, int flags) | ||
2230 | { | ||
2231 | int err; | ||
2232 | |||
2233 | /* | ||
2234 | * If this is a request for a synthetic attribute in the system.* | ||
2235 | * namespace use the generic infrastructure to resolve a handler | ||
2236 | * for it via sb->s_xattr. | ||
2237 | */ | ||
2238 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | ||
2239 | return generic_setxattr(dentry, name, value, size, flags); | ||
2240 | |||
2241 | err = shmem_xattr_validate(name); | ||
2242 | if (err) | ||
2243 | return err; | ||
2244 | |||
2245 | if (size == 0) | ||
2246 | value = ""; /* empty EA, do not remove */ | ||
2247 | |||
2248 | return shmem_xattr_set(dentry, name, value, size, flags); | ||
2249 | |||
2250 | } | ||
2251 | |||
2252 | static int shmem_removexattr(struct dentry *dentry, const char *name) | ||
2253 | { | ||
2254 | int err; | ||
2255 | |||
2256 | /* | ||
2257 | * If this is a request for a synthetic attribute in the system.* | ||
2258 | * namespace use the generic infrastructure to resolve a handler | ||
2259 | * for it via sb->s_xattr. | ||
2260 | */ | ||
2261 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | ||
2262 | return generic_removexattr(dentry, name); | ||
2263 | |||
2264 | err = shmem_xattr_validate(name); | ||
2265 | if (err) | ||
2266 | return err; | ||
2267 | |||
2268 | return shmem_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE); | ||
2269 | } | ||
2270 | |||
2271 | static bool xattr_is_trusted(const char *name) | ||
2272 | { | ||
2273 | return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); | ||
2274 | } | ||
2275 | |||
2276 | static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) | ||
2277 | { | ||
2278 | bool trusted = capable(CAP_SYS_ADMIN); | ||
2279 | struct shmem_xattr *xattr; | ||
2280 | struct shmem_inode_info *info; | ||
2281 | size_t used = 0; | ||
2282 | |||
2283 | info = SHMEM_I(dentry->d_inode); | ||
2284 | |||
2285 | spin_lock(&info->lock); | ||
2286 | list_for_each_entry(xattr, &info->xattr_list, list) { | ||
2287 | size_t len; | ||
2288 | |||
2289 | /* skip "trusted." attributes for unprivileged callers */ | ||
2290 | if (!trusted && xattr_is_trusted(xattr->name)) | ||
2291 | continue; | ||
2292 | |||
2293 | len = strlen(xattr->name) + 1; | ||
2294 | used += len; | ||
2295 | if (buffer) { | ||
2296 | if (size < used) { | ||
2297 | used = -ERANGE; | ||
2298 | break; | ||
2299 | } | ||
2300 | memcpy(buffer, xattr->name, len); | ||
2301 | buffer += len; | ||
2302 | } | ||
2303 | } | ||
2304 | spin_unlock(&info->lock); | ||
2305 | |||
2306 | return used; | ||
2307 | } | ||
2308 | #endif /* CONFIG_TMPFS_XATTR */ | ||
2309 | |||
2310 | static const struct inode_operations shmem_symlink_inline_operations = { | ||
2311 | .readlink = generic_readlink, | ||
2312 | .follow_link = shmem_follow_link_inline, | ||
2313 | #ifdef CONFIG_TMPFS_XATTR | ||
2314 | .setxattr = shmem_setxattr, | ||
2315 | .getxattr = shmem_getxattr, | ||
2316 | .listxattr = shmem_listxattr, | ||
2317 | .removexattr = shmem_removexattr, | ||
2318 | #endif | ||
2319 | }; | ||
2320 | |||
2321 | static const struct inode_operations shmem_symlink_inode_operations = { | ||
2322 | .readlink = generic_readlink, | ||
2323 | .follow_link = shmem_follow_link, | ||
2324 | .put_link = shmem_put_link, | ||
2325 | #ifdef CONFIG_TMPFS_XATTR | ||
2326 | .setxattr = shmem_setxattr, | ||
2327 | .getxattr = shmem_getxattr, | ||
2328 | .listxattr = shmem_listxattr, | ||
2329 | .removexattr = shmem_removexattr, | ||
2105 | #endif | 2330 | #endif |
2331 | }; | ||
2106 | 2332 | ||
2107 | static struct dentry *shmem_get_parent(struct dentry *child) | 2333 | static struct dentry *shmem_get_parent(struct dentry *child) |
2108 | { | 2334 | { |
@@ -2143,10 +2369,12 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | |||
2143 | { | 2369 | { |
2144 | struct inode *inode = dentry->d_inode; | 2370 | struct inode *inode = dentry->d_inode; |
2145 | 2371 | ||
2146 | if (*len < 3) | 2372 | if (*len < 3) { |
2373 | *len = 3; | ||
2147 | return 255; | 2374 | return 255; |
2375 | } | ||
2148 | 2376 | ||
2149 | if (hlist_unhashed(&inode->i_hash)) { | 2377 | if (inode_unhashed(inode)) { |
2150 | /* Unfortunately insert_inode_hash is not idempotent, | 2378 | /* Unfortunately insert_inode_hash is not idempotent, |
2151 | * so as we hash inodes here rather than at creation | 2379 | * so as we hash inodes here rather than at creation |
2152 | * time, we need a lock to ensure we only try | 2380 | * time, we need a lock to ensure we only try |
@@ -2154,7 +2382,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | |||
2154 | */ | 2382 | */ |
2155 | static DEFINE_SPINLOCK(lock); | 2383 | static DEFINE_SPINLOCK(lock); |
2156 | spin_lock(&lock); | 2384 | spin_lock(&lock); |
2157 | if (hlist_unhashed(&inode->i_hash)) | 2385 | if (inode_unhashed(inode)) |
2158 | __insert_inode_hash(inode, | 2386 | __insert_inode_hash(inode, |
2159 | inode->i_ino + inode->i_generation); | 2387 | inode->i_ino + inode->i_generation); |
2160 | spin_unlock(&lock); | 2388 | spin_unlock(&lock); |
@@ -2380,8 +2608,10 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) | |||
2380 | sb->s_magic = TMPFS_MAGIC; | 2608 | sb->s_magic = TMPFS_MAGIC; |
2381 | sb->s_op = &shmem_ops; | 2609 | sb->s_op = &shmem_ops; |
2382 | sb->s_time_gran = 1; | 2610 | sb->s_time_gran = 1; |
2383 | #ifdef CONFIG_TMPFS_POSIX_ACL | 2611 | #ifdef CONFIG_TMPFS_XATTR |
2384 | sb->s_xattr = shmem_xattr_handlers; | 2612 | sb->s_xattr = shmem_xattr_handlers; |
2613 | #endif | ||
2614 | #ifdef CONFIG_TMPFS_POSIX_ACL | ||
2385 | sb->s_flags |= MS_POSIXACL; | 2615 | sb->s_flags |= MS_POSIXACL; |
2386 | #endif | 2616 | #endif |
2387 | 2617 | ||
@@ -2414,13 +2644,20 @@ static struct inode *shmem_alloc_inode(struct super_block *sb) | |||
2414 | return &p->vfs_inode; | 2644 | return &p->vfs_inode; |
2415 | } | 2645 | } |
2416 | 2646 | ||
2647 | static void shmem_i_callback(struct rcu_head *head) | ||
2648 | { | ||
2649 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
2650 | INIT_LIST_HEAD(&inode->i_dentry); | ||
2651 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); | ||
2652 | } | ||
2653 | |||
2417 | static void shmem_destroy_inode(struct inode *inode) | 2654 | static void shmem_destroy_inode(struct inode *inode) |
2418 | { | 2655 | { |
2419 | if ((inode->i_mode & S_IFMT) == S_IFREG) { | 2656 | if ((inode->i_mode & S_IFMT) == S_IFREG) { |
2420 | /* only struct inode is valid if it's an inline symlink */ | 2657 | /* only struct inode is valid if it's an inline symlink */ |
2421 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); | 2658 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); |
2422 | } | 2659 | } |
2423 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); | 2660 | call_rcu(&inode->i_rcu, shmem_i_callback); |
2424 | } | 2661 | } |
2425 | 2662 | ||
2426 | static void init_once(void *foo) | 2663 | static void init_once(void *foo) |
@@ -2470,13 +2707,15 @@ static const struct file_operations shmem_file_operations = { | |||
2470 | }; | 2707 | }; |
2471 | 2708 | ||
2472 | static const struct inode_operations shmem_inode_operations = { | 2709 | static const struct inode_operations shmem_inode_operations = { |
2473 | .setattr = shmem_notify_change, | 2710 | .setattr = shmem_setattr, |
2474 | .truncate_range = shmem_truncate_range, | 2711 | .truncate_range = shmem_truncate_range, |
2712 | #ifdef CONFIG_TMPFS_XATTR | ||
2713 | .setxattr = shmem_setxattr, | ||
2714 | .getxattr = shmem_getxattr, | ||
2715 | .listxattr = shmem_listxattr, | ||
2716 | .removexattr = shmem_removexattr, | ||
2717 | #endif | ||
2475 | #ifdef CONFIG_TMPFS_POSIX_ACL | 2718 | #ifdef CONFIG_TMPFS_POSIX_ACL |
2476 | .setxattr = generic_setxattr, | ||
2477 | .getxattr = generic_getxattr, | ||
2478 | .listxattr = generic_listxattr, | ||
2479 | .removexattr = generic_removexattr, | ||
2480 | .check_acl = generic_check_acl, | 2719 | .check_acl = generic_check_acl, |
2481 | #endif | 2720 | #endif |
2482 | 2721 | ||
@@ -2494,23 +2733,27 @@ static const struct inode_operations shmem_dir_inode_operations = { | |||
2494 | .mknod = shmem_mknod, | 2733 | .mknod = shmem_mknod, |
2495 | .rename = shmem_rename, | 2734 | .rename = shmem_rename, |
2496 | #endif | 2735 | #endif |
2736 | #ifdef CONFIG_TMPFS_XATTR | ||
2737 | .setxattr = shmem_setxattr, | ||
2738 | .getxattr = shmem_getxattr, | ||
2739 | .listxattr = shmem_listxattr, | ||
2740 | .removexattr = shmem_removexattr, | ||
2741 | #endif | ||
2497 | #ifdef CONFIG_TMPFS_POSIX_ACL | 2742 | #ifdef CONFIG_TMPFS_POSIX_ACL |
2498 | .setattr = shmem_notify_change, | 2743 | .setattr = shmem_setattr, |
2499 | .setxattr = generic_setxattr, | ||
2500 | .getxattr = generic_getxattr, | ||
2501 | .listxattr = generic_listxattr, | ||
2502 | .removexattr = generic_removexattr, | ||
2503 | .check_acl = generic_check_acl, | 2744 | .check_acl = generic_check_acl, |
2504 | #endif | 2745 | #endif |
2505 | }; | 2746 | }; |
2506 | 2747 | ||
2507 | static const struct inode_operations shmem_special_inode_operations = { | 2748 | static const struct inode_operations shmem_special_inode_operations = { |
2749 | #ifdef CONFIG_TMPFS_XATTR | ||
2750 | .setxattr = shmem_setxattr, | ||
2751 | .getxattr = shmem_getxattr, | ||
2752 | .listxattr = shmem_listxattr, | ||
2753 | .removexattr = shmem_removexattr, | ||
2754 | #endif | ||
2508 | #ifdef CONFIG_TMPFS_POSIX_ACL | 2755 | #ifdef CONFIG_TMPFS_POSIX_ACL |
2509 | .setattr = shmem_notify_change, | 2756 | .setattr = shmem_setattr, |
2510 | .setxattr = generic_setxattr, | ||
2511 | .getxattr = generic_getxattr, | ||
2512 | .listxattr = generic_listxattr, | ||
2513 | .removexattr = generic_removexattr, | ||
2514 | .check_acl = generic_check_acl, | 2757 | .check_acl = generic_check_acl, |
2515 | #endif | 2758 | #endif |
2516 | }; | 2759 | }; |
@@ -2537,16 +2780,16 @@ static const struct vm_operations_struct shmem_vm_ops = { | |||
2537 | }; | 2780 | }; |
2538 | 2781 | ||
2539 | 2782 | ||
2540 | static int shmem_get_sb(struct file_system_type *fs_type, | 2783 | static struct dentry *shmem_mount(struct file_system_type *fs_type, |
2541 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 2784 | int flags, const char *dev_name, void *data) |
2542 | { | 2785 | { |
2543 | return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt); | 2786 | return mount_nodev(fs_type, flags, data, shmem_fill_super); |
2544 | } | 2787 | } |
2545 | 2788 | ||
2546 | static struct file_system_type tmpfs_fs_type = { | 2789 | static struct file_system_type tmpfs_fs_type = { |
2547 | .owner = THIS_MODULE, | 2790 | .owner = THIS_MODULE, |
2548 | .name = "tmpfs", | 2791 | .name = "tmpfs", |
2549 | .get_sb = shmem_get_sb, | 2792 | .mount = shmem_mount, |
2550 | .kill_sb = kill_litter_super, | 2793 | .kill_sb = kill_litter_super, |
2551 | }; | 2794 | }; |
2552 | 2795 | ||
@@ -2642,7 +2885,7 @@ out: | |||
2642 | 2885 | ||
2643 | static struct file_system_type tmpfs_fs_type = { | 2886 | static struct file_system_type tmpfs_fs_type = { |
2644 | .name = "tmpfs", | 2887 | .name = "tmpfs", |
2645 | .get_sb = ramfs_get_sb, | 2888 | .mount = ramfs_mount, |
2646 | .kill_sb = kill_litter_super, | 2889 | .kill_sb = kill_litter_super, |
2647 | }; | 2890 | }; |
2648 | 2891 | ||
@@ -2666,6 +2909,12 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) | |||
2666 | return 0; | 2909 | return 0; |
2667 | } | 2910 | } |
2668 | 2911 | ||
2912 | void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | ||
2913 | { | ||
2914 | truncate_inode_pages_range(inode->i_mapping, start, end); | ||
2915 | } | ||
2916 | EXPORT_SYMBOL_GPL(shmem_truncate_range); | ||
2917 | |||
2669 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 2918 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
2670 | /** | 2919 | /** |
2671 | * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file | 2920 | * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file |
@@ -2783,5 +3032,29 @@ int shmem_zero_setup(struct vm_area_struct *vma) | |||
2783 | fput(vma->vm_file); | 3032 | fput(vma->vm_file); |
2784 | vma->vm_file = file; | 3033 | vma->vm_file = file; |
2785 | vma->vm_ops = &shmem_vm_ops; | 3034 | vma->vm_ops = &shmem_vm_ops; |
3035 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
2786 | return 0; | 3036 | return 0; |
2787 | } | 3037 | } |
3038 | |||
3039 | /** | ||
3040 | * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags. | ||
3041 | * @mapping: the page's address_space | ||
3042 | * @index: the page index | ||
3043 | * @gfp: the page allocator flags to use if allocating | ||
3044 | * | ||
3045 | * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)", | ||
3046 | * with any new page allocations done using the specified allocation flags. | ||
3047 | * But read_cache_page_gfp() uses the ->readpage() method: which does not | ||
3048 | * suit tmpfs, since it may have pages in swapcache, and needs to find those | ||
3049 | * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. | ||
3050 | * | ||
3051 | * Provide a stub for those callers to start using now, then later | ||
3052 | * flesh it out to call shmem_getpage() with additional gfp mask, when | ||
3053 | * shmem_file_splice_read() is added and shmem_readpage() is removed. | ||
3054 | */ | ||
3055 | struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, | ||
3056 | pgoff_t index, gfp_t gfp) | ||
3057 | { | ||
3058 | return read_cache_page_gfp(mapping, index, gfp); | ||
3059 | } | ||
3060 | EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); | ||