diff options
Diffstat (limited to 'fs')
47 files changed, 3158 insertions, 1047 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 397d3057d33..1bffbe0ed77 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
| @@ -820,6 +820,8 @@ static int load_flat_shared_library(int id, struct lib_info *libs) | |||
| 820 | int res; | 820 | int res; |
| 821 | char buf[16]; | 821 | char buf[16]; |
| 822 | 822 | ||
| 823 | memset(&bprm, 0, sizeof(bprm)); | ||
| 824 | |||
| 823 | /* Create the file name */ | 825 | /* Create the file name */ |
| 824 | sprintf(buf, "/lib/lib%d.so", id); | 826 | sprintf(buf, "/lib/lib%d.so", id); |
| 825 | 827 | ||
| @@ -835,6 +837,12 @@ static int load_flat_shared_library(int id, struct lib_info *libs) | |||
| 835 | if (!bprm.cred) | 837 | if (!bprm.cred) |
| 836 | goto out; | 838 | goto out; |
| 837 | 839 | ||
| 840 | /* We don't really care about recalculating credentials at this point | ||
| 841 | * as we're past the point of no return and are dealing with shared | ||
| 842 | * libraries. | ||
| 843 | */ | ||
| 844 | bprm.cred_prepared = 1; | ||
| 845 | |||
| 838 | res = prepare_binprm(&bprm); | 846 | res = prepare_binprm(&bprm); |
| 839 | 847 | ||
| 840 | if (!IS_ERR_VALUE(res)) | 848 | if (!IS_ERR_VALUE(res)) |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 0d329ff8ed4..9b026ea8baa 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
| @@ -100,6 +100,7 @@ struct dlm_cluster { | |||
| 100 | unsigned int cl_log_debug; | 100 | unsigned int cl_log_debug; |
| 101 | unsigned int cl_protocol; | 101 | unsigned int cl_protocol; |
| 102 | unsigned int cl_timewarn_cs; | 102 | unsigned int cl_timewarn_cs; |
| 103 | unsigned int cl_waitwarn_us; | ||
| 103 | }; | 104 | }; |
| 104 | 105 | ||
| 105 | enum { | 106 | enum { |
| @@ -114,6 +115,7 @@ enum { | |||
| 114 | CLUSTER_ATTR_LOG_DEBUG, | 115 | CLUSTER_ATTR_LOG_DEBUG, |
| 115 | CLUSTER_ATTR_PROTOCOL, | 116 | CLUSTER_ATTR_PROTOCOL, |
| 116 | CLUSTER_ATTR_TIMEWARN_CS, | 117 | CLUSTER_ATTR_TIMEWARN_CS, |
| 118 | CLUSTER_ATTR_WAITWARN_US, | ||
| 117 | }; | 119 | }; |
| 118 | 120 | ||
| 119 | struct cluster_attribute { | 121 | struct cluster_attribute { |
| @@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1); | |||
| 166 | CLUSTER_ATTR(log_debug, 0); | 168 | CLUSTER_ATTR(log_debug, 0); |
| 167 | CLUSTER_ATTR(protocol, 0); | 169 | CLUSTER_ATTR(protocol, 0); |
| 168 | CLUSTER_ATTR(timewarn_cs, 1); | 170 | CLUSTER_ATTR(timewarn_cs, 1); |
| 171 | CLUSTER_ATTR(waitwarn_us, 0); | ||
| 169 | 172 | ||
| 170 | static struct configfs_attribute *cluster_attrs[] = { | 173 | static struct configfs_attribute *cluster_attrs[] = { |
| 171 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, | 174 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, |
| @@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = { | |||
| 179 | [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, | 182 | [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, |
| 180 | [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, | 183 | [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, |
| 181 | [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, | 184 | [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, |
| 185 | [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, | ||
| 182 | NULL, | 186 | NULL, |
| 183 | }; | 187 | }; |
| 184 | 188 | ||
| @@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g, | |||
| 439 | cl->cl_log_debug = dlm_config.ci_log_debug; | 443 | cl->cl_log_debug = dlm_config.ci_log_debug; |
| 440 | cl->cl_protocol = dlm_config.ci_protocol; | 444 | cl->cl_protocol = dlm_config.ci_protocol; |
| 441 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; | 445 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; |
| 446 | cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; | ||
| 442 | 447 | ||
| 443 | space_list = &sps->ss_group; | 448 | space_list = &sps->ss_group; |
| 444 | comm_list = &cms->cs_group; | 449 | comm_list = &cms->cs_group; |
| @@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) | |||
| 986 | #define DEFAULT_LOG_DEBUG 0 | 991 | #define DEFAULT_LOG_DEBUG 0 |
| 987 | #define DEFAULT_PROTOCOL 0 | 992 | #define DEFAULT_PROTOCOL 0 |
| 988 | #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ | 993 | #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ |
| 994 | #define DEFAULT_WAITWARN_US 0 | ||
| 989 | 995 | ||
| 990 | struct dlm_config_info dlm_config = { | 996 | struct dlm_config_info dlm_config = { |
| 991 | .ci_tcp_port = DEFAULT_TCP_PORT, | 997 | .ci_tcp_port = DEFAULT_TCP_PORT, |
| @@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = { | |||
| 998 | .ci_scan_secs = DEFAULT_SCAN_SECS, | 1004 | .ci_scan_secs = DEFAULT_SCAN_SECS, |
| 999 | .ci_log_debug = DEFAULT_LOG_DEBUG, | 1005 | .ci_log_debug = DEFAULT_LOG_DEBUG, |
| 1000 | .ci_protocol = DEFAULT_PROTOCOL, | 1006 | .ci_protocol = DEFAULT_PROTOCOL, |
| 1001 | .ci_timewarn_cs = DEFAULT_TIMEWARN_CS | 1007 | .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, |
| 1008 | .ci_waitwarn_us = DEFAULT_WAITWARN_US | ||
| 1002 | }; | 1009 | }; |
| 1003 | 1010 | ||
diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 4f1d6fce58c..dd0ce24d5a8 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
| @@ -28,6 +28,7 @@ struct dlm_config_info { | |||
| 28 | int ci_log_debug; | 28 | int ci_log_debug; |
| 29 | int ci_protocol; | 29 | int ci_protocol; |
| 30 | int ci_timewarn_cs; | 30 | int ci_timewarn_cs; |
| 31 | int ci_waitwarn_us; | ||
| 31 | }; | 32 | }; |
| 32 | 33 | ||
| 33 | extern struct dlm_config_info dlm_config; | 34 | extern struct dlm_config_info dlm_config; |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index b9420491301..0262451eb9c 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
| @@ -209,6 +209,7 @@ struct dlm_args { | |||
| 209 | #define DLM_IFL_WATCH_TIMEWARN 0x00400000 | 209 | #define DLM_IFL_WATCH_TIMEWARN 0x00400000 |
| 210 | #define DLM_IFL_TIMEOUT_CANCEL 0x00800000 | 210 | #define DLM_IFL_TIMEOUT_CANCEL 0x00800000 |
| 211 | #define DLM_IFL_DEADLOCK_CANCEL 0x01000000 | 211 | #define DLM_IFL_DEADLOCK_CANCEL 0x01000000 |
| 212 | #define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */ | ||
| 212 | #define DLM_IFL_USER 0x00000001 | 213 | #define DLM_IFL_USER 0x00000001 |
| 213 | #define DLM_IFL_ORPHAN 0x00000002 | 214 | #define DLM_IFL_ORPHAN 0x00000002 |
| 214 | 215 | ||
| @@ -245,6 +246,7 @@ struct dlm_lkb { | |||
| 245 | 246 | ||
| 246 | int8_t lkb_wait_type; /* type of reply waiting for */ | 247 | int8_t lkb_wait_type; /* type of reply waiting for */ |
| 247 | int8_t lkb_wait_count; | 248 | int8_t lkb_wait_count; |
| 249 | int lkb_wait_nodeid; /* for debugging */ | ||
| 248 | 250 | ||
| 249 | struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ | 251 | struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ |
| 250 | struct list_head lkb_statequeue; /* rsb g/c/w list */ | 252 | struct list_head lkb_statequeue; /* rsb g/c/w list */ |
| @@ -254,6 +256,7 @@ struct dlm_lkb { | |||
| 254 | struct list_head lkb_ownqueue; /* list of locks for a process */ | 256 | struct list_head lkb_ownqueue; /* list of locks for a process */ |
| 255 | struct list_head lkb_time_list; | 257 | struct list_head lkb_time_list; |
| 256 | ktime_t lkb_timestamp; | 258 | ktime_t lkb_timestamp; |
| 259 | ktime_t lkb_wait_time; | ||
| 257 | unsigned long lkb_timeout_cs; | 260 | unsigned long lkb_timeout_cs; |
| 258 | 261 | ||
| 259 | struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; | 262 | struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 56d6bfcc1e4..f71d0b5abd9 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
| @@ -799,10 +799,84 @@ static int msg_reply_type(int mstype) | |||
| 799 | return -1; | 799 | return -1; |
| 800 | } | 800 | } |
| 801 | 801 | ||
| 802 | static int nodeid_warned(int nodeid, int num_nodes, int *warned) | ||
| 803 | { | ||
| 804 | int i; | ||
| 805 | |||
| 806 | for (i = 0; i < num_nodes; i++) { | ||
| 807 | if (!warned[i]) { | ||
| 808 | warned[i] = nodeid; | ||
| 809 | return 0; | ||
| 810 | } | ||
| 811 | if (warned[i] == nodeid) | ||
| 812 | return 1; | ||
| 813 | } | ||
| 814 | return 0; | ||
| 815 | } | ||
| 816 | |||
| 817 | void dlm_scan_waiters(struct dlm_ls *ls) | ||
| 818 | { | ||
| 819 | struct dlm_lkb *lkb; | ||
| 820 | ktime_t zero = ktime_set(0, 0); | ||
| 821 | s64 us; | ||
| 822 | s64 debug_maxus = 0; | ||
| 823 | u32 debug_scanned = 0; | ||
| 824 | u32 debug_expired = 0; | ||
| 825 | int num_nodes = 0; | ||
| 826 | int *warned = NULL; | ||
| 827 | |||
| 828 | if (!dlm_config.ci_waitwarn_us) | ||
| 829 | return; | ||
| 830 | |||
| 831 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 832 | |||
| 833 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | ||
| 834 | if (ktime_equal(lkb->lkb_wait_time, zero)) | ||
| 835 | continue; | ||
| 836 | |||
| 837 | debug_scanned++; | ||
| 838 | |||
| 839 | us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time)); | ||
| 840 | |||
| 841 | if (us < dlm_config.ci_waitwarn_us) | ||
| 842 | continue; | ||
| 843 | |||
| 844 | lkb->lkb_wait_time = zero; | ||
| 845 | |||
| 846 | debug_expired++; | ||
| 847 | if (us > debug_maxus) | ||
| 848 | debug_maxus = us; | ||
| 849 | |||
| 850 | if (!num_nodes) { | ||
| 851 | num_nodes = ls->ls_num_nodes; | ||
| 852 | warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int)); | ||
| 853 | if (warned) | ||
| 854 | memset(warned, 0, num_nodes * sizeof(int)); | ||
| 855 | } | ||
| 856 | if (!warned) | ||
| 857 | continue; | ||
| 858 | if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned)) | ||
| 859 | continue; | ||
| 860 | |||
| 861 | log_error(ls, "waitwarn %x %lld %d us check connection to " | ||
| 862 | "node %d", lkb->lkb_id, (long long)us, | ||
| 863 | dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); | ||
| 864 | } | ||
| 865 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 866 | |||
| 867 | if (warned) | ||
| 868 | kfree(warned); | ||
| 869 | |||
| 870 | if (debug_expired) | ||
| 871 | log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", | ||
| 872 | debug_scanned, debug_expired, | ||
| 873 | dlm_config.ci_waitwarn_us, (long long)debug_maxus); | ||
| 874 | } | ||
| 875 | |||
| 802 | /* add/remove lkb from global waiters list of lkb's waiting for | 876 | /* add/remove lkb from global waiters list of lkb's waiting for |
| 803 | a reply from a remote node */ | 877 | a reply from a remote node */ |
| 804 | 878 | ||
| 805 | static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | 879 | static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid) |
| 806 | { | 880 | { |
| 807 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 881 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; |
| 808 | int error = 0; | 882 | int error = 0; |
| @@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 842 | 916 | ||
| 843 | lkb->lkb_wait_count++; | 917 | lkb->lkb_wait_count++; |
| 844 | lkb->lkb_wait_type = mstype; | 918 | lkb->lkb_wait_type = mstype; |
| 919 | lkb->lkb_wait_time = ktime_get(); | ||
| 920 | lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */ | ||
| 845 | hold_lkb(lkb); | 921 | hold_lkb(lkb); |
| 846 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); | 922 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); |
| 847 | out: | 923 | out: |
| @@ -961,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
| 961 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 1037 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; |
| 962 | int error; | 1038 | int error; |
| 963 | 1039 | ||
| 964 | if (ms != &ls->ls_stub_ms) | 1040 | if (ms->m_flags != DLM_IFL_STUB_MS) |
| 965 | mutex_lock(&ls->ls_waiters_mutex); | 1041 | mutex_lock(&ls->ls_waiters_mutex); |
| 966 | error = _remove_from_waiters(lkb, ms->m_type, ms); | 1042 | error = _remove_from_waiters(lkb, ms->m_type, ms); |
| 967 | if (ms != &ls->ls_stub_ms) | 1043 | if (ms->m_flags != DLM_IFL_STUB_MS) |
| 968 | mutex_unlock(&ls->ls_waiters_mutex); | 1044 | mutex_unlock(&ls->ls_waiters_mutex); |
| 969 | return error; | 1045 | return error; |
| 970 | } | 1046 | } |
| @@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls) | |||
| 1157 | list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) | 1233 | list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) |
| 1158 | lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); | 1234 | lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); |
| 1159 | mutex_unlock(&ls->ls_timeout_mutex); | 1235 | mutex_unlock(&ls->ls_timeout_mutex); |
| 1236 | |||
| 1237 | if (!dlm_config.ci_waitwarn_us) | ||
| 1238 | return; | ||
| 1239 | |||
| 1240 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 1241 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | ||
| 1242 | if (ktime_to_us(lkb->lkb_wait_time)) | ||
| 1243 | lkb->lkb_wait_time = ktime_get(); | ||
| 1244 | } | ||
| 1245 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 1160 | } | 1246 | } |
| 1161 | 1247 | ||
| 1162 | /* lkb is master or local copy */ | 1248 | /* lkb is master or local copy */ |
| @@ -1376,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 1376 | ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become | 1462 | ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become |
| 1377 | compatible with other granted locks */ | 1463 | compatible with other granted locks */ |
| 1378 | 1464 | ||
| 1379 | static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) | 1465 | static void munge_demoted(struct dlm_lkb *lkb) |
| 1380 | { | 1466 | { |
| 1381 | if (ms->m_type != DLM_MSG_CONVERT_REPLY) { | ||
| 1382 | log_print("munge_demoted %x invalid reply type %d", | ||
| 1383 | lkb->lkb_id, ms->m_type); | ||
| 1384 | return; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { | 1467 | if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { |
| 1388 | log_print("munge_demoted %x invalid modes gr %d rq %d", | 1468 | log_print("munge_demoted %x invalid modes gr %d rq %d", |
| 1389 | lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); | 1469 | lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); |
| @@ -2844,12 +2924,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) | |||
| 2844 | struct dlm_mhandle *mh; | 2924 | struct dlm_mhandle *mh; |
| 2845 | int to_nodeid, error; | 2925 | int to_nodeid, error; |
| 2846 | 2926 | ||
| 2847 | error = add_to_waiters(lkb, mstype); | 2927 | to_nodeid = r->res_nodeid; |
| 2928 | |||
| 2929 | error = add_to_waiters(lkb, mstype, to_nodeid); | ||
| 2848 | if (error) | 2930 | if (error) |
| 2849 | return error; | 2931 | return error; |
| 2850 | 2932 | ||
| 2851 | to_nodeid = r->res_nodeid; | ||
| 2852 | |||
| 2853 | error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); | 2933 | error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); |
| 2854 | if (error) | 2934 | if (error) |
| 2855 | goto fail; | 2935 | goto fail; |
| @@ -2880,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 2880 | /* down conversions go without a reply from the master */ | 2960 | /* down conversions go without a reply from the master */ |
| 2881 | if (!error && down_conversion(lkb)) { | 2961 | if (!error && down_conversion(lkb)) { |
| 2882 | remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); | 2962 | remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); |
| 2963 | r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS; | ||
| 2883 | r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; | 2964 | r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; |
| 2884 | r->res_ls->ls_stub_ms.m_result = 0; | 2965 | r->res_ls->ls_stub_ms.m_result = 0; |
| 2885 | r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
| 2886 | __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); | 2966 | __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); |
| 2887 | } | 2967 | } |
| 2888 | 2968 | ||
| @@ -2951,12 +3031,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 2951 | struct dlm_mhandle *mh; | 3031 | struct dlm_mhandle *mh; |
| 2952 | int to_nodeid, error; | 3032 | int to_nodeid, error; |
| 2953 | 3033 | ||
| 2954 | error = add_to_waiters(lkb, DLM_MSG_LOOKUP); | 3034 | to_nodeid = dlm_dir_nodeid(r); |
| 3035 | |||
| 3036 | error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid); | ||
| 2955 | if (error) | 3037 | if (error) |
| 2956 | return error; | 3038 | return error; |
| 2957 | 3039 | ||
| 2958 | to_nodeid = dlm_dir_nodeid(r); | ||
| 2959 | |||
| 2960 | error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); | 3040 | error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); |
| 2961 | if (error) | 3041 | if (error) |
| 2962 | goto fail; | 3042 | goto fail; |
| @@ -3070,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
| 3070 | 3150 | ||
| 3071 | static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 3151 | static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) |
| 3072 | { | 3152 | { |
| 3153 | if (ms->m_flags == DLM_IFL_STUB_MS) | ||
| 3154 | return; | ||
| 3155 | |||
| 3073 | lkb->lkb_sbflags = ms->m_sbflags; | 3156 | lkb->lkb_sbflags = ms->m_sbflags; |
| 3074 | lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | | 3157 | lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | |
| 3075 | (ms->m_flags & 0x0000FFFF); | 3158 | (ms->m_flags & 0x0000FFFF); |
| @@ -3612,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 3612 | /* convert was queued on remote master */ | 3695 | /* convert was queued on remote master */ |
| 3613 | receive_flags_reply(lkb, ms); | 3696 | receive_flags_reply(lkb, ms); |
| 3614 | if (is_demoted(lkb)) | 3697 | if (is_demoted(lkb)) |
| 3615 | munge_demoted(lkb, ms); | 3698 | munge_demoted(lkb); |
| 3616 | del_lkb(r, lkb); | 3699 | del_lkb(r, lkb); |
| 3617 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | 3700 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); |
| 3618 | add_timeout(lkb); | 3701 | add_timeout(lkb); |
| @@ -3622,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 3622 | /* convert was granted on remote master */ | 3705 | /* convert was granted on remote master */ |
| 3623 | receive_flags_reply(lkb, ms); | 3706 | receive_flags_reply(lkb, ms); |
| 3624 | if (is_demoted(lkb)) | 3707 | if (is_demoted(lkb)) |
| 3625 | munge_demoted(lkb, ms); | 3708 | munge_demoted(lkb); |
| 3626 | grant_lock_pc(r, lkb, ms); | 3709 | grant_lock_pc(r, lkb, ms); |
| 3627 | queue_cast(r, lkb, 0); | 3710 | queue_cast(r, lkb, 0); |
| 3628 | break; | 3711 | break; |
| @@ -3996,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid) | |||
| 3996 | dlm_put_lockspace(ls); | 4079 | dlm_put_lockspace(ls); |
| 3997 | } | 4080 | } |
| 3998 | 4081 | ||
| 3999 | static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) | 4082 | static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb, |
| 4083 | struct dlm_message *ms_stub) | ||
| 4000 | { | 4084 | { |
| 4001 | if (middle_conversion(lkb)) { | 4085 | if (middle_conversion(lkb)) { |
| 4002 | hold_lkb(lkb); | 4086 | hold_lkb(lkb); |
| 4003 | ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; | 4087 | memset(ms_stub, 0, sizeof(struct dlm_message)); |
| 4004 | ls->ls_stub_ms.m_result = -EINPROGRESS; | 4088 | ms_stub->m_flags = DLM_IFL_STUB_MS; |
| 4005 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 4089 | ms_stub->m_type = DLM_MSG_CONVERT_REPLY; |
| 4006 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | 4090 | ms_stub->m_result = -EINPROGRESS; |
| 4007 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | 4091 | ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; |
| 4092 | _receive_convert_reply(lkb, ms_stub); | ||
| 4008 | 4093 | ||
| 4009 | /* Same special case as in receive_rcom_lock_args() */ | 4094 | /* Same special case as in receive_rcom_lock_args() */ |
| 4010 | lkb->lkb_grmode = DLM_LOCK_IV; | 4095 | lkb->lkb_grmode = DLM_LOCK_IV; |
| @@ -4045,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
| 4045 | void dlm_recover_waiters_pre(struct dlm_ls *ls) | 4130 | void dlm_recover_waiters_pre(struct dlm_ls *ls) |
| 4046 | { | 4131 | { |
| 4047 | struct dlm_lkb *lkb, *safe; | 4132 | struct dlm_lkb *lkb, *safe; |
| 4133 | struct dlm_message *ms_stub; | ||
| 4048 | int wait_type, stub_unlock_result, stub_cancel_result; | 4134 | int wait_type, stub_unlock_result, stub_cancel_result; |
| 4049 | 4135 | ||
| 4136 | ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message)); | ||
| 4137 | if (!ms_stub) { | ||
| 4138 | log_error(ls, "dlm_recover_waiters_pre no mem"); | ||
| 4139 | return; | ||
| 4140 | } | ||
| 4141 | |||
| 4050 | mutex_lock(&ls->ls_waiters_mutex); | 4142 | mutex_lock(&ls->ls_waiters_mutex); |
| 4051 | 4143 | ||
| 4052 | list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { | 4144 | list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { |
| 4053 | log_debug(ls, "pre recover waiter lkid %x type %d flags %x", | 4145 | |
| 4054 | lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); | 4146 | /* exclude debug messages about unlocks because there can be so |
| 4147 | many and they aren't very interesting */ | ||
| 4148 | |||
| 4149 | if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) { | ||
| 4150 | log_debug(ls, "recover_waiter %x nodeid %d " | ||
| 4151 | "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid, | ||
| 4152 | lkb->lkb_wait_type, lkb->lkb_wait_nodeid); | ||
| 4153 | } | ||
| 4055 | 4154 | ||
| 4056 | /* all outstanding lookups, regardless of destination will be | 4155 | /* all outstanding lookups, regardless of destination will be |
| 4057 | resent after recovery is done */ | 4156 | resent after recovery is done */ |
| @@ -4097,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
| 4097 | break; | 4196 | break; |
| 4098 | 4197 | ||
| 4099 | case DLM_MSG_CONVERT: | 4198 | case DLM_MSG_CONVERT: |
| 4100 | recover_convert_waiter(ls, lkb); | 4199 | recover_convert_waiter(ls, lkb, ms_stub); |
| 4101 | break; | 4200 | break; |
| 4102 | 4201 | ||
| 4103 | case DLM_MSG_UNLOCK: | 4202 | case DLM_MSG_UNLOCK: |
| 4104 | hold_lkb(lkb); | 4203 | hold_lkb(lkb); |
| 4105 | ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; | 4204 | memset(ms_stub, 0, sizeof(struct dlm_message)); |
| 4106 | ls->ls_stub_ms.m_result = stub_unlock_result; | 4205 | ms_stub->m_flags = DLM_IFL_STUB_MS; |
| 4107 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 4206 | ms_stub->m_type = DLM_MSG_UNLOCK_REPLY; |
| 4108 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | 4207 | ms_stub->m_result = stub_unlock_result; |
| 4109 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | 4208 | ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; |
| 4209 | _receive_unlock_reply(lkb, ms_stub); | ||
| 4110 | dlm_put_lkb(lkb); | 4210 | dlm_put_lkb(lkb); |
| 4111 | break; | 4211 | break; |
| 4112 | 4212 | ||
| 4113 | case DLM_MSG_CANCEL: | 4213 | case DLM_MSG_CANCEL: |
| 4114 | hold_lkb(lkb); | 4214 | hold_lkb(lkb); |
| 4115 | ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; | 4215 | memset(ms_stub, 0, sizeof(struct dlm_message)); |
| 4116 | ls->ls_stub_ms.m_result = stub_cancel_result; | 4216 | ms_stub->m_flags = DLM_IFL_STUB_MS; |
| 4117 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 4217 | ms_stub->m_type = DLM_MSG_CANCEL_REPLY; |
| 4118 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | 4218 | ms_stub->m_result = stub_cancel_result; |
| 4119 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | 4219 | ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; |
| 4220 | _receive_cancel_reply(lkb, ms_stub); | ||
| 4120 | dlm_put_lkb(lkb); | 4221 | dlm_put_lkb(lkb); |
| 4121 | break; | 4222 | break; |
| 4122 | 4223 | ||
| @@ -4127,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
| 4127 | schedule(); | 4228 | schedule(); |
| 4128 | } | 4229 | } |
| 4129 | mutex_unlock(&ls->ls_waiters_mutex); | 4230 | mutex_unlock(&ls->ls_waiters_mutex); |
| 4231 | kfree(ms_stub); | ||
| 4130 | } | 4232 | } |
| 4131 | 4233 | ||
| 4132 | static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) | 4234 | static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) |
| @@ -4191,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) | |||
| 4191 | ou = is_overlap_unlock(lkb); | 4293 | ou = is_overlap_unlock(lkb); |
| 4192 | err = 0; | 4294 | err = 0; |
| 4193 | 4295 | ||
| 4194 | log_debug(ls, "recover_waiters_post %x type %d flags %x %s", | 4296 | log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d", |
| 4195 | lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); | 4297 | lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid); |
| 4196 | 4298 | ||
| 4197 | /* At this point we assume that we won't get a reply to any | 4299 | /* At this point we assume that we won't get a reply to any |
| 4198 | previous op or overlap op on this lock. First, do a big | 4300 | previous op or overlap op on this lock. First, do a big |
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 88e93c80cc2..265017a7c3e 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h | |||
| @@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb); | |||
| 24 | void dlm_scan_rsbs(struct dlm_ls *ls); | 24 | void dlm_scan_rsbs(struct dlm_ls *ls); |
| 25 | int dlm_lock_recovery_try(struct dlm_ls *ls); | 25 | int dlm_lock_recovery_try(struct dlm_ls *ls); |
| 26 | void dlm_unlock_recovery(struct dlm_ls *ls); | 26 | void dlm_unlock_recovery(struct dlm_ls *ls); |
| 27 | void dlm_scan_waiters(struct dlm_ls *ls); | ||
| 27 | void dlm_scan_timeout(struct dlm_ls *ls); | 28 | void dlm_scan_timeout(struct dlm_ls *ls); |
| 28 | void dlm_adjust_timeouts(struct dlm_ls *ls); | 29 | void dlm_adjust_timeouts(struct dlm_ls *ls); |
| 29 | 30 | ||
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index f994a7dfda8..14cbf409975 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
| @@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void) | |||
| 243 | static int dlm_scand(void *data) | 243 | static int dlm_scand(void *data) |
| 244 | { | 244 | { |
| 245 | struct dlm_ls *ls; | 245 | struct dlm_ls *ls; |
| 246 | int timeout_jiffies = dlm_config.ci_scan_secs * HZ; | ||
| 247 | 246 | ||
| 248 | while (!kthread_should_stop()) { | 247 | while (!kthread_should_stop()) { |
| 249 | ls = find_ls_to_scan(); | 248 | ls = find_ls_to_scan(); |
| @@ -252,13 +251,14 @@ static int dlm_scand(void *data) | |||
| 252 | ls->ls_scan_time = jiffies; | 251 | ls->ls_scan_time = jiffies; |
| 253 | dlm_scan_rsbs(ls); | 252 | dlm_scan_rsbs(ls); |
| 254 | dlm_scan_timeout(ls); | 253 | dlm_scan_timeout(ls); |
| 254 | dlm_scan_waiters(ls); | ||
| 255 | dlm_unlock_recovery(ls); | 255 | dlm_unlock_recovery(ls); |
| 256 | } else { | 256 | } else { |
| 257 | ls->ls_scan_time += HZ; | 257 | ls->ls_scan_time += HZ; |
| 258 | } | 258 | } |
| 259 | } else { | 259 | continue; |
| 260 | schedule_timeout_interruptible(timeout_jiffies); | ||
| 261 | } | 260 | } |
| 261 | schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); | ||
| 262 | } | 262 | } |
| 263 | return 0; | 263 | return 0; |
| 264 | } | 264 | } |
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 30d8b85febb..e2b87800436 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c | |||
| @@ -71,6 +71,36 @@ static void send_op(struct plock_op *op) | |||
| 71 | wake_up(&send_wq); | 71 | wake_up(&send_wq); |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | /* If a process was killed while waiting for the only plock on a file, | ||
| 75 | locks_remove_posix will not see any lock on the file so it won't | ||
| 76 | send an unlock-close to us to pass on to userspace to clean up the | ||
| 77 | abandoned waiter. So, we have to insert the unlock-close when the | ||
| 78 | lock call is interrupted. */ | ||
| 79 | |||
| 80 | static void do_unlock_close(struct dlm_ls *ls, u64 number, | ||
| 81 | struct file *file, struct file_lock *fl) | ||
| 82 | { | ||
| 83 | struct plock_op *op; | ||
| 84 | |||
| 85 | op = kzalloc(sizeof(*op), GFP_NOFS); | ||
| 86 | if (!op) | ||
| 87 | return; | ||
| 88 | |||
| 89 | op->info.optype = DLM_PLOCK_OP_UNLOCK; | ||
| 90 | op->info.pid = fl->fl_pid; | ||
| 91 | op->info.fsid = ls->ls_global_id; | ||
| 92 | op->info.number = number; | ||
| 93 | op->info.start = 0; | ||
| 94 | op->info.end = OFFSET_MAX; | ||
| 95 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) | ||
| 96 | op->info.owner = (__u64) fl->fl_pid; | ||
| 97 | else | ||
| 98 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
| 99 | |||
| 100 | op->info.flags |= DLM_PLOCK_FL_CLOSE; | ||
| 101 | send_op(op); | ||
| 102 | } | ||
| 103 | |||
| 74 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | 104 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
| 75 | int cmd, struct file_lock *fl) | 105 | int cmd, struct file_lock *fl) |
| 76 | { | 106 | { |
| @@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | |||
| 114 | 144 | ||
| 115 | send_op(op); | 145 | send_op(op); |
| 116 | 146 | ||
| 117 | if (xop->callback == NULL) | 147 | if (xop->callback == NULL) { |
| 118 | wait_event(recv_wq, (op->done != 0)); | 148 | rv = wait_event_killable(recv_wq, (op->done != 0)); |
| 119 | else { | 149 | if (rv == -ERESTARTSYS) { |
| 150 | log_debug(ls, "dlm_posix_lock: wait killed %llx", | ||
| 151 | (unsigned long long)number); | ||
| 152 | spin_lock(&ops_lock); | ||
| 153 | list_del(&op->list); | ||
| 154 | spin_unlock(&ops_lock); | ||
| 155 | kfree(xop); | ||
| 156 | do_unlock_close(ls, number, file, fl); | ||
| 157 | goto out; | ||
| 158 | } | ||
| 159 | } else { | ||
| 120 | rv = FILE_LOCK_DEFERRED; | 160 | rv = FILE_LOCK_DEFERRED; |
| 121 | goto out; | 161 | goto out; |
| 122 | } | 162 | } |
| @@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | |||
| 233 | else | 273 | else |
| 234 | op->info.owner = (__u64)(long) fl->fl_owner; | 274 | op->info.owner = (__u64)(long) fl->fl_owner; |
| 235 | 275 | ||
| 276 | if (fl->fl_flags & FL_CLOSE) { | ||
| 277 | op->info.flags |= DLM_PLOCK_FL_CLOSE; | ||
| 278 | send_op(op); | ||
| 279 | rv = 0; | ||
| 280 | goto out; | ||
| 281 | } | ||
| 282 | |||
| 236 | send_op(op); | 283 | send_op(op); |
| 237 | wait_event(recv_wq, (op->done != 0)); | 284 | wait_event(recv_wq, (op->done != 0)); |
| 238 | 285 | ||
| @@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, | |||
| 334 | spin_lock(&ops_lock); | 381 | spin_lock(&ops_lock); |
| 335 | if (!list_empty(&send_list)) { | 382 | if (!list_empty(&send_list)) { |
| 336 | op = list_entry(send_list.next, struct plock_op, list); | 383 | op = list_entry(send_list.next, struct plock_op, list); |
| 337 | list_move(&op->list, &recv_list); | 384 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) |
| 385 | list_del(&op->list); | ||
| 386 | else | ||
| 387 | list_move(&op->list, &recv_list); | ||
| 338 | memcpy(&info, &op->info, sizeof(info)); | 388 | memcpy(&info, &op->info, sizeof(info)); |
| 339 | } | 389 | } |
| 340 | spin_unlock(&ops_lock); | 390 | spin_unlock(&ops_lock); |
| @@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, | |||
| 342 | if (!op) | 392 | if (!op) |
| 343 | return -EAGAIN; | 393 | return -EAGAIN; |
| 344 | 394 | ||
| 395 | /* there is no need to get a reply from userspace for unlocks | ||
| 396 | that were generated by the vfs cleaning up for a close | ||
| 397 | (the process did not make an unlock call). */ | ||
| 398 | |||
| 399 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) | ||
| 400 | kfree(op); | ||
| 401 | |||
| 345 | if (copy_to_user(u, &info, sizeof(info))) | 402 | if (copy_to_user(u, &info, sizeof(info))) |
| 346 | return -EFAULT; | 403 | return -EFAULT; |
| 347 | return sizeof(info); | 404 | return sizeof(info); |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index d5ab3fe7c19..e96bf3e9be8 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
| @@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
| 611 | 611 | ||
| 612 | out_sig: | 612 | out_sig: |
| 613 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | 613 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); |
| 614 | recalc_sigpending(); | ||
| 615 | out_free: | 614 | out_free: |
| 616 | kfree(kbuf); | 615 | kfree(kbuf); |
| 617 | return error; | 616 | return error; |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 0a78dae7e2c..1dd62ed35b8 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
| @@ -898,7 +898,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 898 | brelse(bh); | 898 | brelse(bh); |
| 899 | 899 | ||
| 900 | if (!sb_set_blocksize(sb, blocksize)) { | 900 | if (!sb_set_blocksize(sb, blocksize)) { |
| 901 | ext2_msg(sb, KERN_ERR, "error: blocksize is too small"); | 901 | ext2_msg(sb, KERN_ERR, |
| 902 | "error: bad blocksize %d", blocksize); | ||
| 902 | goto failed_sbi; | 903 | goto failed_sbi; |
| 903 | } | 904 | } |
| 904 | 905 | ||
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 32f3b869585..34b6d9bfc48 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
| @@ -1416,10 +1416,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
| 1416 | frame->at = entries; | 1416 | frame->at = entries; |
| 1417 | frame->bh = bh; | 1417 | frame->bh = bh; |
| 1418 | bh = bh2; | 1418 | bh = bh2; |
| 1419 | /* | ||
| 1420 | * Mark buffers dirty here so that if do_split() fails we write a | ||
| 1421 | * consistent set of buffers to disk. | ||
| 1422 | */ | ||
| 1423 | ext3_journal_dirty_metadata(handle, frame->bh); | ||
| 1424 | ext3_journal_dirty_metadata(handle, bh); | ||
| 1419 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); | 1425 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); |
| 1420 | dx_release (frames); | 1426 | if (!de) { |
| 1421 | if (!(de)) | 1427 | ext3_mark_inode_dirty(handle, dir); |
| 1428 | dx_release(frames); | ||
| 1422 | return retval; | 1429 | return retval; |
| 1430 | } | ||
| 1431 | dx_release(frames); | ||
| 1423 | 1432 | ||
| 1424 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1433 | return add_dirent_to_buf(handle, dentry, inode, de, bh); |
| 1425 | } | 1434 | } |
| @@ -2189,6 +2198,7 @@ static int ext3_symlink (struct inode * dir, | |||
| 2189 | handle_t *handle; | 2198 | handle_t *handle; |
| 2190 | struct inode * inode; | 2199 | struct inode * inode; |
| 2191 | int l, err, retries = 0; | 2200 | int l, err, retries = 0; |
| 2201 | int credits; | ||
| 2192 | 2202 | ||
| 2193 | l = strlen(symname)+1; | 2203 | l = strlen(symname)+1; |
| 2194 | if (l > dir->i_sb->s_blocksize) | 2204 | if (l > dir->i_sb->s_blocksize) |
| @@ -2196,10 +2206,26 @@ static int ext3_symlink (struct inode * dir, | |||
| 2196 | 2206 | ||
| 2197 | dquot_initialize(dir); | 2207 | dquot_initialize(dir); |
| 2198 | 2208 | ||
| 2209 | if (l > EXT3_N_BLOCKS * 4) { | ||
| 2210 | /* | ||
| 2211 | * For non-fast symlinks, we just allocate inode and put it on | ||
| 2212 | * orphan list in the first transaction => we need bitmap, | ||
| 2213 | * group descriptor, sb, inode block, quota blocks. | ||
| 2214 | */ | ||
| 2215 | credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
| 2216 | } else { | ||
| 2217 | /* | ||
| 2218 | * Fast symlink. We have to add entry to directory | ||
| 2219 | * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS), | ||
| 2220 | * allocate new inode (bitmap, group descriptor, inode block, | ||
| 2221 | * quota blocks, sb is already counted in previous macros). | ||
| 2222 | */ | ||
| 2223 | credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
| 2224 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + | ||
| 2225 | EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
| 2226 | } | ||
| 2199 | retry: | 2227 | retry: |
| 2200 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | 2228 | handle = ext3_journal_start(dir, credits); |
| 2201 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + | ||
| 2202 | EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); | ||
| 2203 | if (IS_ERR(handle)) | 2229 | if (IS_ERR(handle)) |
| 2204 | return PTR_ERR(handle); | 2230 | return PTR_ERR(handle); |
| 2205 | 2231 | ||
| @@ -2211,21 +2237,45 @@ retry: | |||
| 2211 | if (IS_ERR(inode)) | 2237 | if (IS_ERR(inode)) |
| 2212 | goto out_stop; | 2238 | goto out_stop; |
| 2213 | 2239 | ||
| 2214 | if (l > sizeof (EXT3_I(inode)->i_data)) { | 2240 | if (l > EXT3_N_BLOCKS * 4) { |
| 2215 | inode->i_op = &ext3_symlink_inode_operations; | 2241 | inode->i_op = &ext3_symlink_inode_operations; |
| 2216 | ext3_set_aops(inode); | 2242 | ext3_set_aops(inode); |
| 2217 | /* | 2243 | /* |
| 2218 | * page_symlink() calls into ext3_prepare/commit_write. | 2244 | * We cannot call page_symlink() with transaction started |
| 2219 | * We have a transaction open. All is sweetness. It also sets | 2245 | * because it calls into ext3_write_begin() which acquires page |
| 2220 | * i_size in generic_commit_write(). | 2246 | * lock which ranks below transaction start (and it can also |
| 2247 | * wait for journal commit if we are running out of space). So | ||
| 2248 | * we have to stop transaction now and restart it when symlink | ||
| 2249 | * contents is written. | ||
| 2250 | * | ||
| 2251 | * To keep fs consistent in case of crash, we have to put inode | ||
| 2252 | * to orphan list in the mean time. | ||
| 2221 | */ | 2253 | */ |
| 2254 | drop_nlink(inode); | ||
| 2255 | err = ext3_orphan_add(handle, inode); | ||
| 2256 | ext3_journal_stop(handle); | ||
| 2257 | if (err) | ||
| 2258 | goto err_drop_inode; | ||
| 2222 | err = __page_symlink(inode, symname, l, 1); | 2259 | err = __page_symlink(inode, symname, l, 1); |
| 2260 | if (err) | ||
| 2261 | goto err_drop_inode; | ||
| 2262 | /* | ||
| 2263 | * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS | ||
| 2264 | * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified | ||
| 2265 | */ | ||
| 2266 | handle = ext3_journal_start(dir, | ||
| 2267 | EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
| 2268 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); | ||
| 2269 | if (IS_ERR(handle)) { | ||
| 2270 | err = PTR_ERR(handle); | ||
| 2271 | goto err_drop_inode; | ||
| 2272 | } | ||
| 2273 | inc_nlink(inode); | ||
| 2274 | err = ext3_orphan_del(handle, inode); | ||
| 2223 | if (err) { | 2275 | if (err) { |
| 2276 | ext3_journal_stop(handle); | ||
| 2224 | drop_nlink(inode); | 2277 | drop_nlink(inode); |
| 2225 | unlock_new_inode(inode); | 2278 | goto err_drop_inode; |
| 2226 | ext3_mark_inode_dirty(handle, inode); | ||
| 2227 | iput (inode); | ||
| 2228 | goto out_stop; | ||
| 2229 | } | 2279 | } |
| 2230 | } else { | 2280 | } else { |
| 2231 | inode->i_op = &ext3_fast_symlink_inode_operations; | 2281 | inode->i_op = &ext3_fast_symlink_inode_operations; |
| @@ -2239,6 +2289,10 @@ out_stop: | |||
| 2239 | if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) | 2289 | if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) |
| 2240 | goto retry; | 2290 | goto retry; |
| 2241 | return err; | 2291 | return err; |
| 2292 | err_drop_inode: | ||
| 2293 | unlock_new_inode(inode); | ||
| 2294 | iput(inode); | ||
| 2295 | return err; | ||
| 2242 | } | 2296 | } |
| 2243 | 2297 | ||
| 2244 | static int ext3_link (struct dentry * old_dentry, | 2298 | static int ext3_link (struct dentry * old_dentry, |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 69b18045946..72ffa974b0b 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
| @@ -302,12 +302,6 @@ void journal_commit_transaction(journal_t *journal) | |||
| 302 | * all outstanding updates to complete. | 302 | * all outstanding updates to complete. |
| 303 | */ | 303 | */ |
| 304 | 304 | ||
| 305 | #ifdef COMMIT_STATS | ||
| 306 | spin_lock(&journal->j_list_lock); | ||
| 307 | summarise_journal_usage(journal); | ||
| 308 | spin_unlock(&journal->j_list_lock); | ||
| 309 | #endif | ||
| 310 | |||
| 311 | /* Do we need to erase the effects of a prior journal_flush? */ | 305 | /* Do we need to erase the effects of a prior journal_flush? */ |
| 312 | if (journal->j_flags & JFS_FLUSHED) { | 306 | if (journal->j_flags & JFS_FLUSHED) { |
| 313 | jbd_debug(3, "super block updated\n"); | 307 | jbd_debug(3, "super block updated\n"); |
| @@ -722,8 +716,13 @@ wait_for_iobuf: | |||
| 722 | required. */ | 716 | required. */ |
| 723 | JBUFFER_TRACE(jh, "file as BJ_Forget"); | 717 | JBUFFER_TRACE(jh, "file as BJ_Forget"); |
| 724 | journal_file_buffer(jh, commit_transaction, BJ_Forget); | 718 | journal_file_buffer(jh, commit_transaction, BJ_Forget); |
| 725 | /* Wake up any transactions which were waiting for this | 719 | /* |
| 726 | IO to complete */ | 720 | * Wake up any transactions which were waiting for this |
| 721 | * IO to complete. The barrier must be here so that changes | ||
| 722 | * by journal_file_buffer() take effect before wake_up_bit() | ||
| 723 | * does the waitqueue check. | ||
| 724 | */ | ||
| 725 | smp_mb(); | ||
| 727 | wake_up_bit(&bh->b_state, BH_Unshadow); | 726 | wake_up_bit(&bh->b_state, BH_Unshadow); |
| 728 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); | 727 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); |
| 729 | __brelse(bh); | 728 | __brelse(bh); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b3713afaaa9..e2d4285fbe9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
| @@ -437,9 +437,12 @@ int __log_space_left(journal_t *journal) | |||
| 437 | int __log_start_commit(journal_t *journal, tid_t target) | 437 | int __log_start_commit(journal_t *journal, tid_t target) |
| 438 | { | 438 | { |
| 439 | /* | 439 | /* |
| 440 | * Are we already doing a recent enough commit? | 440 | * The only transaction we can possibly wait upon is the |
| 441 | * currently running transaction (if it exists). Otherwise, | ||
| 442 | * the target tid must be an old one. | ||
| 441 | */ | 443 | */ |
| 442 | if (!tid_geq(journal->j_commit_request, target)) { | 444 | if (journal->j_running_transaction && |
| 445 | journal->j_running_transaction->t_tid == target) { | ||
| 443 | /* | 446 | /* |
| 444 | * We want a new commit: OK, mark the request and wakeup the | 447 | * We want a new commit: OK, mark the request and wakeup the |
| 445 | * commit thread. We do _not_ do the commit ourselves. | 448 | * commit thread. We do _not_ do the commit ourselves. |
| @@ -451,7 +454,14 @@ int __log_start_commit(journal_t *journal, tid_t target) | |||
| 451 | journal->j_commit_sequence); | 454 | journal->j_commit_sequence); |
| 452 | wake_up(&journal->j_wait_commit); | 455 | wake_up(&journal->j_wait_commit); |
| 453 | return 1; | 456 | return 1; |
| 454 | } | 457 | } else if (!tid_geq(journal->j_commit_request, target)) |
| 458 | /* This should never happen, but if it does, preserve | ||
| 459 | the evidence before kjournald goes into a loop and | ||
| 460 | increments j_commit_sequence beyond all recognition. */ | ||
| 461 | WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", | ||
| 462 | journal->j_commit_request, journal->j_commit_sequence, | ||
| 463 | target, journal->j_running_transaction ? | ||
| 464 | journal->j_running_transaction->t_tid : 0); | ||
| 455 | return 0; | 465 | return 0; |
| 456 | } | 466 | } |
| 457 | 467 | ||
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 60d2319651b..f7ee81a065d 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
| @@ -266,7 +266,8 @@ static handle_t *new_handle(int nblocks) | |||
| 266 | * This function is visible to journal users (like ext3fs), so is not | 266 | * This function is visible to journal users (like ext3fs), so is not |
| 267 | * called with the journal already locked. | 267 | * called with the journal already locked. |
| 268 | * | 268 | * |
| 269 | * Return a pointer to a newly allocated handle, or NULL on failure | 269 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value |
| 270 | * on failure. | ||
| 270 | */ | 271 | */ |
| 271 | handle_t *journal_start(journal_t *journal, int nblocks) | 272 | handle_t *journal_start(journal_t *journal, int nblocks) |
| 272 | { | 273 | { |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6e28000a4b2..29148a81c78 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -338,12 +338,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 338 | * all outstanding updates to complete. | 338 | * all outstanding updates to complete. |
| 339 | */ | 339 | */ |
| 340 | 340 | ||
| 341 | #ifdef COMMIT_STATS | ||
| 342 | spin_lock(&journal->j_list_lock); | ||
| 343 | summarise_journal_usage(journal); | ||
| 344 | spin_unlock(&journal->j_list_lock); | ||
| 345 | #endif | ||
| 346 | |||
| 347 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ | 341 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ |
| 348 | if (journal->j_flags & JBD2_FLUSHED) { | 342 | if (journal->j_flags & JBD2_FLUSHED) { |
| 349 | jbd_debug(3, "super block updated\n"); | 343 | jbd_debug(3, "super block updated\n"); |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index d8a0313e99e..f17e58b3298 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
| @@ -30,6 +30,7 @@ ocfs2-objs := \ | |||
| 30 | namei.o \ | 30 | namei.o \ |
| 31 | refcounttree.o \ | 31 | refcounttree.o \ |
| 32 | reservations.o \ | 32 | reservations.o \ |
| 33 | move_extents.o \ | ||
| 33 | resize.o \ | 34 | resize.o \ |
| 34 | slot_map.o \ | 35 | slot_map.o \ |
| 35 | suballoc.o \ | 36 | suballoc.o \ |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 312a28f433a..bc91072b721 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -22,6 +22,11 @@ | |||
| 22 | #include "ioctl.h" | 22 | #include "ioctl.h" |
| 23 | #include "resize.h" | 23 | #include "resize.h" |
| 24 | #include "refcounttree.h" | 24 | #include "refcounttree.h" |
| 25 | #include "sysfile.h" | ||
| 26 | #include "dir.h" | ||
| 27 | #include "buffer_head_io.h" | ||
| 28 | #include "suballoc.h" | ||
| 29 | #include "move_extents.h" | ||
| 25 | 30 | ||
| 26 | #include <linux/ext2_fs.h> | 31 | #include <linux/ext2_fs.h> |
| 27 | 32 | ||
| @@ -35,31 +40,27 @@ | |||
| 35 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | 40 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's |
| 36 | * just a best-effort to tell userspace that this request caused the error. | 41 | * just a best-effort to tell userspace that this request caused the error. |
| 37 | */ | 42 | */ |
| 38 | static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, | 43 | static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, |
| 39 | struct ocfs2_info_request __user *req) | 44 | struct ocfs2_info_request __user *req) |
| 40 | { | 45 | { |
| 41 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; | 46 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; |
| 42 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); | 47 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); |
| 43 | } | 48 | } |
| 44 | 49 | ||
| 45 | #define o2info_set_request_error(a, b) \ | 50 | static inline void o2info_set_request_filled(struct ocfs2_info_request *req) |
| 46 | __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) | ||
| 47 | |||
| 48 | static inline void __o2info_set_request_filled(struct ocfs2_info_request *req) | ||
| 49 | { | 51 | { |
| 50 | req->ir_flags |= OCFS2_INFO_FL_FILLED; | 52 | req->ir_flags |= OCFS2_INFO_FL_FILLED; |
| 51 | } | 53 | } |
| 52 | 54 | ||
| 53 | #define o2info_set_request_filled(a) \ | 55 | static inline void o2info_clear_request_filled(struct ocfs2_info_request *req) |
| 54 | __o2info_set_request_filled((struct ocfs2_info_request *)&(a)) | ||
| 55 | |||
| 56 | static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req) | ||
| 57 | { | 56 | { |
| 58 | req->ir_flags &= ~OCFS2_INFO_FL_FILLED; | 57 | req->ir_flags &= ~OCFS2_INFO_FL_FILLED; |
| 59 | } | 58 | } |
| 60 | 59 | ||
| 61 | #define o2info_clear_request_filled(a) \ | 60 | static inline int o2info_coherent(struct ocfs2_info_request *req) |
| 62 | __o2info_clear_request_filled((struct ocfs2_info_request *)&(a)) | 61 | { |
| 62 | return (!(req->ir_flags & OCFS2_INFO_FL_NON_COHERENT)); | ||
| 63 | } | ||
| 63 | 64 | ||
| 64 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | 65 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) |
| 65 | { | 66 | { |
| @@ -153,7 +154,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode, | |||
| 153 | 154 | ||
| 154 | oib.ib_blocksize = inode->i_sb->s_blocksize; | 155 | oib.ib_blocksize = inode->i_sb->s_blocksize; |
| 155 | 156 | ||
| 156 | o2info_set_request_filled(oib); | 157 | o2info_set_request_filled(&oib.ib_req); |
| 157 | 158 | ||
| 158 | if (o2info_to_user(oib, req)) | 159 | if (o2info_to_user(oib, req)) |
| 159 | goto bail; | 160 | goto bail; |
| @@ -161,7 +162,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode, | |||
| 161 | status = 0; | 162 | status = 0; |
| 162 | bail: | 163 | bail: |
| 163 | if (status) | 164 | if (status) |
| 164 | o2info_set_request_error(oib, req); | 165 | o2info_set_request_error(&oib.ib_req, req); |
| 165 | 166 | ||
| 166 | return status; | 167 | return status; |
| 167 | } | 168 | } |
| @@ -178,7 +179,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode, | |||
| 178 | 179 | ||
| 179 | oic.ic_clustersize = osb->s_clustersize; | 180 | oic.ic_clustersize = osb->s_clustersize; |
| 180 | 181 | ||
| 181 | o2info_set_request_filled(oic); | 182 | o2info_set_request_filled(&oic.ic_req); |
| 182 | 183 | ||
| 183 | if (o2info_to_user(oic, req)) | 184 | if (o2info_to_user(oic, req)) |
| 184 | goto bail; | 185 | goto bail; |
| @@ -186,7 +187,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode, | |||
| 186 | status = 0; | 187 | status = 0; |
| 187 | bail: | 188 | bail: |
| 188 | if (status) | 189 | if (status) |
| 189 | o2info_set_request_error(oic, req); | 190 | o2info_set_request_error(&oic.ic_req, req); |
| 190 | 191 | ||
| 191 | return status; | 192 | return status; |
| 192 | } | 193 | } |
| @@ -203,7 +204,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode, | |||
| 203 | 204 | ||
| 204 | oim.im_max_slots = osb->max_slots; | 205 | oim.im_max_slots = osb->max_slots; |
| 205 | 206 | ||
| 206 | o2info_set_request_filled(oim); | 207 | o2info_set_request_filled(&oim.im_req); |
| 207 | 208 | ||
| 208 | if (o2info_to_user(oim, req)) | 209 | if (o2info_to_user(oim, req)) |
| 209 | goto bail; | 210 | goto bail; |
| @@ -211,7 +212,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode, | |||
| 211 | status = 0; | 212 | status = 0; |
| 212 | bail: | 213 | bail: |
| 213 | if (status) | 214 | if (status) |
| 214 | o2info_set_request_error(oim, req); | 215 | o2info_set_request_error(&oim.im_req, req); |
| 215 | 216 | ||
| 216 | return status; | 217 | return status; |
| 217 | } | 218 | } |
| @@ -228,7 +229,7 @@ int ocfs2_info_handle_label(struct inode *inode, | |||
| 228 | 229 | ||
| 229 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | 230 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); |
| 230 | 231 | ||
| 231 | o2info_set_request_filled(oil); | 232 | o2info_set_request_filled(&oil.il_req); |
| 232 | 233 | ||
| 233 | if (o2info_to_user(oil, req)) | 234 | if (o2info_to_user(oil, req)) |
| 234 | goto bail; | 235 | goto bail; |
| @@ -236,7 +237,7 @@ int ocfs2_info_handle_label(struct inode *inode, | |||
| 236 | status = 0; | 237 | status = 0; |
| 237 | bail: | 238 | bail: |
| 238 | if (status) | 239 | if (status) |
| 239 | o2info_set_request_error(oil, req); | 240 | o2info_set_request_error(&oil.il_req, req); |
| 240 | 241 | ||
| 241 | return status; | 242 | return status; |
| 242 | } | 243 | } |
| @@ -253,7 +254,7 @@ int ocfs2_info_handle_uuid(struct inode *inode, | |||
| 253 | 254 | ||
| 254 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | 255 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); |
| 255 | 256 | ||
| 256 | o2info_set_request_filled(oiu); | 257 | o2info_set_request_filled(&oiu.iu_req); |
| 257 | 258 | ||
| 258 | if (o2info_to_user(oiu, req)) | 259 | if (o2info_to_user(oiu, req)) |
| 259 | goto bail; | 260 | goto bail; |
| @@ -261,7 +262,7 @@ int ocfs2_info_handle_uuid(struct inode *inode, | |||
| 261 | status = 0; | 262 | status = 0; |
| 262 | bail: | 263 | bail: |
| 263 | if (status) | 264 | if (status) |
| 264 | o2info_set_request_error(oiu, req); | 265 | o2info_set_request_error(&oiu.iu_req, req); |
| 265 | 266 | ||
| 266 | return status; | 267 | return status; |
| 267 | } | 268 | } |
| @@ -280,7 +281,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode, | |||
| 280 | oif.if_incompat_features = osb->s_feature_incompat; | 281 | oif.if_incompat_features = osb->s_feature_incompat; |
| 281 | oif.if_ro_compat_features = osb->s_feature_ro_compat; | 282 | oif.if_ro_compat_features = osb->s_feature_ro_compat; |
| 282 | 283 | ||
| 283 | o2info_set_request_filled(oif); | 284 | o2info_set_request_filled(&oif.if_req); |
| 284 | 285 | ||
| 285 | if (o2info_to_user(oif, req)) | 286 | if (o2info_to_user(oif, req)) |
| 286 | goto bail; | 287 | goto bail; |
| @@ -288,7 +289,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode, | |||
| 288 | status = 0; | 289 | status = 0; |
| 289 | bail: | 290 | bail: |
| 290 | if (status) | 291 | if (status) |
| 291 | o2info_set_request_error(oif, req); | 292 | o2info_set_request_error(&oif.if_req, req); |
| 292 | 293 | ||
| 293 | return status; | 294 | return status; |
| 294 | } | 295 | } |
| @@ -305,7 +306,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode, | |||
| 305 | 306 | ||
| 306 | oij.ij_journal_size = osb->journal->j_inode->i_size; | 307 | oij.ij_journal_size = osb->journal->j_inode->i_size; |
| 307 | 308 | ||
| 308 | o2info_set_request_filled(oij); | 309 | o2info_set_request_filled(&oij.ij_req); |
| 309 | 310 | ||
| 310 | if (o2info_to_user(oij, req)) | 311 | if (o2info_to_user(oij, req)) |
| 311 | goto bail; | 312 | goto bail; |
| @@ -313,7 +314,408 @@ int ocfs2_info_handle_journal_size(struct inode *inode, | |||
| 313 | status = 0; | 314 | status = 0; |
| 314 | bail: | 315 | bail: |
| 315 | if (status) | 316 | if (status) |
| 316 | o2info_set_request_error(oij, req); | 317 | o2info_set_request_error(&oij.ij_req, req); |
| 318 | |||
| 319 | return status; | ||
| 320 | } | ||
| 321 | |||
| 322 | int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, | ||
| 323 | struct inode *inode_alloc, u64 blkno, | ||
| 324 | struct ocfs2_info_freeinode *fi, u32 slot) | ||
| 325 | { | ||
| 326 | int status = 0, unlock = 0; | ||
| 327 | |||
| 328 | struct buffer_head *bh = NULL; | ||
| 329 | struct ocfs2_dinode *dinode_alloc = NULL; | ||
| 330 | |||
| 331 | if (inode_alloc) | ||
| 332 | mutex_lock(&inode_alloc->i_mutex); | ||
| 333 | |||
| 334 | if (o2info_coherent(&fi->ifi_req)) { | ||
| 335 | status = ocfs2_inode_lock(inode_alloc, &bh, 0); | ||
| 336 | if (status < 0) { | ||
| 337 | mlog_errno(status); | ||
| 338 | goto bail; | ||
| 339 | } | ||
| 340 | unlock = 1; | ||
| 341 | } else { | ||
| 342 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
| 343 | if (status < 0) { | ||
| 344 | mlog_errno(status); | ||
| 345 | goto bail; | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | dinode_alloc = (struct ocfs2_dinode *)bh->b_data; | ||
| 350 | |||
| 351 | fi->ifi_stat[slot].lfi_total = | ||
| 352 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_total); | ||
| 353 | fi->ifi_stat[slot].lfi_free = | ||
| 354 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) - | ||
| 355 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_used); | ||
| 356 | |||
| 357 | bail: | ||
| 358 | if (unlock) | ||
| 359 | ocfs2_inode_unlock(inode_alloc, 0); | ||
| 360 | |||
| 361 | if (inode_alloc) | ||
| 362 | mutex_unlock(&inode_alloc->i_mutex); | ||
| 363 | |||
| 364 | brelse(bh); | ||
| 365 | |||
| 366 | return status; | ||
| 367 | } | ||
| 368 | |||
| 369 | int ocfs2_info_handle_freeinode(struct inode *inode, | ||
| 370 | struct ocfs2_info_request __user *req) | ||
| 371 | { | ||
| 372 | u32 i; | ||
| 373 | u64 blkno = -1; | ||
| 374 | char namebuf[40]; | ||
| 375 | int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; | ||
| 376 | struct ocfs2_info_freeinode *oifi = NULL; | ||
| 377 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 378 | struct inode *inode_alloc = NULL; | ||
| 379 | |||
| 380 | oifi = kzalloc(sizeof(struct ocfs2_info_freeinode), GFP_KERNEL); | ||
| 381 | if (!oifi) { | ||
| 382 | status = -ENOMEM; | ||
| 383 | mlog_errno(status); | ||
| 384 | goto bail; | ||
| 385 | } | ||
| 386 | |||
| 387 | if (o2info_from_user(*oifi, req)) | ||
| 388 | goto bail; | ||
| 389 | |||
| 390 | oifi->ifi_slotnum = osb->max_slots; | ||
| 391 | |||
| 392 | for (i = 0; i < oifi->ifi_slotnum; i++) { | ||
| 393 | if (o2info_coherent(&oifi->ifi_req)) { | ||
| 394 | inode_alloc = ocfs2_get_system_file_inode(osb, type, i); | ||
| 395 | if (!inode_alloc) { | ||
| 396 | mlog(ML_ERROR, "unable to get alloc inode in " | ||
| 397 | "slot %u\n", i); | ||
| 398 | status = -EIO; | ||
| 399 | goto bail; | ||
| 400 | } | ||
| 401 | } else { | ||
| 402 | ocfs2_sprintf_system_inode_name(namebuf, | ||
| 403 | sizeof(namebuf), | ||
| 404 | type, i); | ||
| 405 | status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, | ||
| 406 | namebuf, | ||
| 407 | strlen(namebuf), | ||
| 408 | &blkno); | ||
| 409 | if (status < 0) { | ||
| 410 | status = -ENOENT; | ||
| 411 | goto bail; | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); | ||
| 416 | if (status < 0) | ||
| 417 | goto bail; | ||
| 418 | |||
| 419 | iput(inode_alloc); | ||
| 420 | inode_alloc = NULL; | ||
| 421 | } | ||
| 422 | |||
| 423 | o2info_set_request_filled(&oifi->ifi_req); | ||
| 424 | |||
| 425 | if (o2info_to_user(*oifi, req)) | ||
| 426 | goto bail; | ||
| 427 | |||
| 428 | status = 0; | ||
| 429 | bail: | ||
| 430 | if (status) | ||
| 431 | o2info_set_request_error(&oifi->ifi_req, req); | ||
| 432 | |||
| 433 | kfree(oifi); | ||
| 434 | |||
| 435 | return status; | ||
| 436 | } | ||
| 437 | |||
| 438 | static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist, | ||
| 439 | unsigned int chunksize) | ||
| 440 | { | ||
| 441 | int index; | ||
| 442 | |||
| 443 | index = __ilog2_u32(chunksize); | ||
| 444 | if (index >= OCFS2_INFO_MAX_HIST) | ||
| 445 | index = OCFS2_INFO_MAX_HIST - 1; | ||
| 446 | |||
| 447 | hist->fc_chunks[index]++; | ||
| 448 | hist->fc_clusters[index] += chunksize; | ||
| 449 | } | ||
| 450 | |||
| 451 | static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats, | ||
| 452 | unsigned int chunksize) | ||
| 453 | { | ||
| 454 | if (chunksize > stats->ffs_max) | ||
| 455 | stats->ffs_max = chunksize; | ||
| 456 | |||
| 457 | if (chunksize < stats->ffs_min) | ||
| 458 | stats->ffs_min = chunksize; | ||
| 459 | |||
| 460 | stats->ffs_avg += chunksize; | ||
| 461 | stats->ffs_free_chunks_real++; | ||
| 462 | } | ||
| 463 | |||
| 464 | void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg, | ||
| 465 | unsigned int chunksize) | ||
| 466 | { | ||
| 467 | o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize); | ||
| 468 | o2ffg_update_stats(&(ffg->iff_ffs), chunksize); | ||
| 469 | } | ||
| 470 | |||
| 471 | int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb, | ||
| 472 | struct inode *gb_inode, | ||
| 473 | struct ocfs2_dinode *gb_dinode, | ||
| 474 | struct ocfs2_chain_rec *rec, | ||
| 475 | struct ocfs2_info_freefrag *ffg, | ||
| 476 | u32 chunks_in_group) | ||
| 477 | { | ||
| 478 | int status = 0, used; | ||
| 479 | u64 blkno; | ||
| 480 | |||
| 481 | struct buffer_head *bh = NULL; | ||
| 482 | struct ocfs2_group_desc *bg = NULL; | ||
| 483 | |||
| 484 | unsigned int max_bits, num_clusters; | ||
| 485 | unsigned int offset = 0, cluster, chunk; | ||
| 486 | unsigned int chunk_free, last_chunksize = 0; | ||
| 487 | |||
| 488 | if (!le32_to_cpu(rec->c_free)) | ||
| 489 | goto bail; | ||
| 490 | |||
| 491 | do { | ||
| 492 | if (!bg) | ||
| 493 | blkno = le64_to_cpu(rec->c_blkno); | ||
| 494 | else | ||
| 495 | blkno = le64_to_cpu(bg->bg_next_group); | ||
| 496 | |||
| 497 | if (bh) { | ||
| 498 | brelse(bh); | ||
| 499 | bh = NULL; | ||
| 500 | } | ||
| 501 | |||
| 502 | if (o2info_coherent(&ffg->iff_req)) | ||
| 503 | status = ocfs2_read_group_descriptor(gb_inode, | ||
| 504 | gb_dinode, | ||
| 505 | blkno, &bh); | ||
| 506 | else | ||
| 507 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
| 508 | |||
| 509 | if (status < 0) { | ||
| 510 | mlog(ML_ERROR, "Can't read the group descriptor # " | ||
| 511 | "%llu from device.", (unsigned long long)blkno); | ||
| 512 | status = -EIO; | ||
| 513 | goto bail; | ||
| 514 | } | ||
| 515 | |||
| 516 | bg = (struct ocfs2_group_desc *)bh->b_data; | ||
| 517 | |||
| 518 | if (!le16_to_cpu(bg->bg_free_bits_count)) | ||
| 519 | continue; | ||
| 520 | |||
| 521 | max_bits = le16_to_cpu(bg->bg_bits); | ||
| 522 | offset = 0; | ||
| 523 | |||
| 524 | for (chunk = 0; chunk < chunks_in_group; chunk++) { | ||
| 525 | /* | ||
| 526 | * last chunk may be not an entire one. | ||
| 527 | */ | ||
| 528 | if ((offset + ffg->iff_chunksize) > max_bits) | ||
| 529 | num_clusters = max_bits - offset; | ||
| 530 | else | ||
| 531 | num_clusters = ffg->iff_chunksize; | ||
| 532 | |||
| 533 | chunk_free = 0; | ||
| 534 | for (cluster = 0; cluster < num_clusters; cluster++) { | ||
| 535 | used = ocfs2_test_bit(offset, | ||
| 536 | (unsigned long *)bg->bg_bitmap); | ||
| 537 | /* | ||
| 538 | * - chunk_free counts free clusters in #N chunk. | ||
| 539 | * - last_chunksize records the size(in) clusters | ||
| 540 | * for the last real free chunk being counted. | ||
| 541 | */ | ||
| 542 | if (!used) { | ||
| 543 | last_chunksize++; | ||
| 544 | chunk_free++; | ||
| 545 | } | ||
| 546 | |||
| 547 | if (used && last_chunksize) { | ||
| 548 | ocfs2_info_update_ffg(ffg, | ||
| 549 | last_chunksize); | ||
| 550 | last_chunksize = 0; | ||
| 551 | } | ||
| 552 | |||
| 553 | offset++; | ||
| 554 | } | ||
| 555 | |||
| 556 | if (chunk_free == ffg->iff_chunksize) | ||
| 557 | ffg->iff_ffs.ffs_free_chunks++; | ||
| 558 | } | ||
| 559 | |||
| 560 | /* | ||
| 561 | * need to update the info for last free chunk. | ||
| 562 | */ | ||
| 563 | if (last_chunksize) | ||
| 564 | ocfs2_info_update_ffg(ffg, last_chunksize); | ||
| 565 | |||
| 566 | } while (le64_to_cpu(bg->bg_next_group)); | ||
| 567 | |||
| 568 | bail: | ||
| 569 | brelse(bh); | ||
| 570 | |||
| 571 | return status; | ||
| 572 | } | ||
| 573 | |||
| 574 | int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb, | ||
| 575 | struct inode *gb_inode, u64 blkno, | ||
| 576 | struct ocfs2_info_freefrag *ffg) | ||
| 577 | { | ||
| 578 | u32 chunks_in_group; | ||
| 579 | int status = 0, unlock = 0, i; | ||
| 580 | |||
| 581 | struct buffer_head *bh = NULL; | ||
| 582 | struct ocfs2_chain_list *cl = NULL; | ||
| 583 | struct ocfs2_chain_rec *rec = NULL; | ||
| 584 | struct ocfs2_dinode *gb_dinode = NULL; | ||
| 585 | |||
| 586 | if (gb_inode) | ||
| 587 | mutex_lock(&gb_inode->i_mutex); | ||
| 588 | |||
| 589 | if (o2info_coherent(&ffg->iff_req)) { | ||
| 590 | status = ocfs2_inode_lock(gb_inode, &bh, 0); | ||
| 591 | if (status < 0) { | ||
| 592 | mlog_errno(status); | ||
| 593 | goto bail; | ||
| 594 | } | ||
| 595 | unlock = 1; | ||
| 596 | } else { | ||
| 597 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
| 598 | if (status < 0) { | ||
| 599 | mlog_errno(status); | ||
| 600 | goto bail; | ||
| 601 | } | ||
| 602 | } | ||
| 603 | |||
| 604 | gb_dinode = (struct ocfs2_dinode *)bh->b_data; | ||
| 605 | cl = &(gb_dinode->id2.i_chain); | ||
| 606 | |||
| 607 | /* | ||
| 608 | * Chunksize(in) clusters from userspace should be | ||
| 609 | * less than clusters in a group. | ||
| 610 | */ | ||
| 611 | if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) { | ||
| 612 | status = -EINVAL; | ||
| 613 | goto bail; | ||
| 614 | } | ||
| 615 | |||
| 616 | memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats)); | ||
| 617 | |||
| 618 | ffg->iff_ffs.ffs_min = ~0U; | ||
| 619 | ffg->iff_ffs.ffs_clusters = | ||
| 620 | le32_to_cpu(gb_dinode->id1.bitmap1.i_total); | ||
| 621 | ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters - | ||
| 622 | le32_to_cpu(gb_dinode->id1.bitmap1.i_used); | ||
| 623 | |||
| 624 | chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1; | ||
| 625 | |||
| 626 | for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { | ||
| 627 | rec = &(cl->cl_recs[i]); | ||
| 628 | status = ocfs2_info_freefrag_scan_chain(osb, gb_inode, | ||
| 629 | gb_dinode, | ||
| 630 | rec, ffg, | ||
| 631 | chunks_in_group); | ||
| 632 | if (status) | ||
| 633 | goto bail; | ||
| 634 | } | ||
| 635 | |||
| 636 | if (ffg->iff_ffs.ffs_free_chunks_real) | ||
| 637 | ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg / | ||
| 638 | ffg->iff_ffs.ffs_free_chunks_real); | ||
| 639 | bail: | ||
| 640 | if (unlock) | ||
| 641 | ocfs2_inode_unlock(gb_inode, 0); | ||
| 642 | |||
| 643 | if (gb_inode) | ||
| 644 | mutex_unlock(&gb_inode->i_mutex); | ||
| 645 | |||
| 646 | if (gb_inode) | ||
| 647 | iput(gb_inode); | ||
| 648 | |||
| 649 | brelse(bh); | ||
| 650 | |||
| 651 | return status; | ||
| 652 | } | ||
| 653 | |||
| 654 | int ocfs2_info_handle_freefrag(struct inode *inode, | ||
| 655 | struct ocfs2_info_request __user *req) | ||
| 656 | { | ||
| 657 | u64 blkno = -1; | ||
| 658 | char namebuf[40]; | ||
| 659 | int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; | ||
| 660 | |||
| 661 | struct ocfs2_info_freefrag *oiff; | ||
| 662 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 663 | struct inode *gb_inode = NULL; | ||
| 664 | |||
| 665 | oiff = kzalloc(sizeof(struct ocfs2_info_freefrag), GFP_KERNEL); | ||
| 666 | if (!oiff) { | ||
| 667 | status = -ENOMEM; | ||
| 668 | mlog_errno(status); | ||
| 669 | goto bail; | ||
| 670 | } | ||
| 671 | |||
| 672 | if (o2info_from_user(*oiff, req)) | ||
| 673 | goto bail; | ||
| 674 | /* | ||
| 675 | * chunksize from userspace should be power of 2. | ||
| 676 | */ | ||
| 677 | if ((oiff->iff_chunksize & (oiff->iff_chunksize - 1)) || | ||
| 678 | (!oiff->iff_chunksize)) { | ||
| 679 | status = -EINVAL; | ||
| 680 | goto bail; | ||
| 681 | } | ||
| 682 | |||
| 683 | if (o2info_coherent(&oiff->iff_req)) { | ||
| 684 | gb_inode = ocfs2_get_system_file_inode(osb, type, | ||
| 685 | OCFS2_INVALID_SLOT); | ||
| 686 | if (!gb_inode) { | ||
| 687 | mlog(ML_ERROR, "unable to get global_bitmap inode\n"); | ||
| 688 | status = -EIO; | ||
| 689 | goto bail; | ||
| 690 | } | ||
| 691 | } else { | ||
| 692 | ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, | ||
| 693 | OCFS2_INVALID_SLOT); | ||
| 694 | status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, | ||
| 695 | namebuf, | ||
| 696 | strlen(namebuf), | ||
| 697 | &blkno); | ||
| 698 | if (status < 0) { | ||
| 699 | status = -ENOENT; | ||
| 700 | goto bail; | ||
| 701 | } | ||
| 702 | } | ||
| 703 | |||
| 704 | status = ocfs2_info_freefrag_scan_bitmap(osb, gb_inode, blkno, oiff); | ||
| 705 | if (status < 0) | ||
| 706 | goto bail; | ||
| 707 | |||
| 708 | o2info_set_request_filled(&oiff->iff_req); | ||
| 709 | |||
| 710 | if (o2info_to_user(*oiff, req)) | ||
| 711 | goto bail; | ||
| 712 | |||
| 713 | status = 0; | ||
| 714 | bail: | ||
| 715 | if (status) | ||
| 716 | o2info_set_request_error(&oiff->iff_req, req); | ||
| 717 | |||
| 718 | kfree(oiff); | ||
| 317 | 719 | ||
| 318 | return status; | 720 | return status; |
| 319 | } | 721 | } |
| @@ -327,7 +729,7 @@ int ocfs2_info_handle_unknown(struct inode *inode, | |||
| 327 | if (o2info_from_user(oir, req)) | 729 | if (o2info_from_user(oir, req)) |
| 328 | goto bail; | 730 | goto bail; |
| 329 | 731 | ||
| 330 | o2info_clear_request_filled(oir); | 732 | o2info_clear_request_filled(&oir); |
| 331 | 733 | ||
| 332 | if (o2info_to_user(oir, req)) | 734 | if (o2info_to_user(oir, req)) |
| 333 | goto bail; | 735 | goto bail; |
| @@ -335,7 +737,7 @@ int ocfs2_info_handle_unknown(struct inode *inode, | |||
| 335 | status = 0; | 737 | status = 0; |
| 336 | bail: | 738 | bail: |
| 337 | if (status) | 739 | if (status) |
| 338 | o2info_set_request_error(oir, req); | 740 | o2info_set_request_error(&oir, req); |
| 339 | 741 | ||
| 340 | return status; | 742 | return status; |
| 341 | } | 743 | } |
| @@ -389,6 +791,14 @@ int ocfs2_info_handle_request(struct inode *inode, | |||
| 389 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) | 791 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) |
| 390 | status = ocfs2_info_handle_journal_size(inode, req); | 792 | status = ocfs2_info_handle_journal_size(inode, req); |
| 391 | break; | 793 | break; |
| 794 | case OCFS2_INFO_FREEINODE: | ||
| 795 | if (oir.ir_size == sizeof(struct ocfs2_info_freeinode)) | ||
| 796 | status = ocfs2_info_handle_freeinode(inode, req); | ||
| 797 | break; | ||
| 798 | case OCFS2_INFO_FREEFRAG: | ||
| 799 | if (oir.ir_size == sizeof(struct ocfs2_info_freefrag)) | ||
| 800 | status = ocfs2_info_handle_freefrag(inode, req); | ||
| 801 | break; | ||
| 392 | default: | 802 | default: |
| 393 | status = ocfs2_info_handle_unknown(inode, req); | 803 | status = ocfs2_info_handle_unknown(inode, req); |
| 394 | break; | 804 | break; |
| @@ -565,6 +975,8 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 565 | 975 | ||
| 566 | return 0; | 976 | return 0; |
| 567 | } | 977 | } |
| 978 | case OCFS2_IOC_MOVE_EXT: | ||
| 979 | return ocfs2_ioctl_move_extents(filp, (void __user *)arg); | ||
| 568 | default: | 980 | default: |
| 569 | return -ENOTTY; | 981 | return -ENOTTY; |
| 570 | } | 982 | } |
| @@ -608,6 +1020,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 608 | return -EFAULT; | 1020 | return -EFAULT; |
| 609 | 1021 | ||
| 610 | return ocfs2_info_handle(inode, &info, 1); | 1022 | return ocfs2_info_handle(inode, &info, 1); |
| 1023 | case OCFS2_IOC_MOVE_EXT: | ||
| 1024 | break; | ||
| 611 | default: | 1025 | default: |
| 612 | return -ENOIOCTLCMD; | 1026 | return -ENOIOCTLCMD; |
| 613 | } | 1027 | } |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c new file mode 100644 index 00000000000..4c5488468c1 --- /dev/null +++ b/fs/ocfs2/move_extents.c | |||
| @@ -0,0 +1,1153 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * move_extents.c | ||
| 5 | * | ||
| 6 | * Copyright (C) 2011 Oracle. All rights reserved. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public | ||
| 10 | * License version 2 as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * General Public License for more details. | ||
| 16 | */ | ||
| 17 | #include <linux/fs.h> | ||
| 18 | #include <linux/types.h> | ||
| 19 | #include <linux/mount.h> | ||
| 20 | #include <linux/swap.h> | ||
| 21 | |||
| 22 | #include <cluster/masklog.h> | ||
| 23 | |||
| 24 | #include "ocfs2.h" | ||
| 25 | #include "ocfs2_ioctl.h" | ||
| 26 | |||
| 27 | #include "alloc.h" | ||
| 28 | #include "aops.h" | ||
| 29 | #include "dlmglue.h" | ||
| 30 | #include "extent_map.h" | ||
| 31 | #include "inode.h" | ||
| 32 | #include "journal.h" | ||
| 33 | #include "suballoc.h" | ||
| 34 | #include "uptodate.h" | ||
| 35 | #include "super.h" | ||
| 36 | #include "dir.h" | ||
| 37 | #include "buffer_head_io.h" | ||
| 38 | #include "sysfile.h" | ||
| 39 | #include "suballoc.h" | ||
| 40 | #include "refcounttree.h" | ||
| 41 | #include "move_extents.h" | ||
| 42 | |||
| 43 | struct ocfs2_move_extents_context { | ||
| 44 | struct inode *inode; | ||
| 45 | struct file *file; | ||
| 46 | int auto_defrag; | ||
| 47 | int partial; | ||
| 48 | int credits; | ||
| 49 | u32 new_phys_cpos; | ||
| 50 | u32 clusters_moved; | ||
| 51 | u64 refcount_loc; | ||
| 52 | struct ocfs2_move_extents *range; | ||
| 53 | struct ocfs2_extent_tree et; | ||
| 54 | struct ocfs2_alloc_context *meta_ac; | ||
| 55 | struct ocfs2_alloc_context *data_ac; | ||
| 56 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
| 57 | }; | ||
| 58 | |||
| 59 | static int __ocfs2_move_extent(handle_t *handle, | ||
| 60 | struct ocfs2_move_extents_context *context, | ||
| 61 | u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos, | ||
| 62 | int ext_flags) | ||
| 63 | { | ||
| 64 | int ret = 0, index; | ||
| 65 | struct inode *inode = context->inode; | ||
| 66 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 67 | struct ocfs2_extent_rec *rec, replace_rec; | ||
| 68 | struct ocfs2_path *path = NULL; | ||
| 69 | struct ocfs2_extent_list *el; | ||
| 70 | u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); | ||
| 71 | u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); | ||
| 72 | |||
| 73 | ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, | ||
| 74 | p_cpos, new_p_cpos, len); | ||
| 75 | if (ret) { | ||
| 76 | mlog_errno(ret); | ||
| 77 | goto out; | ||
| 78 | } | ||
| 79 | |||
| 80 | memset(&replace_rec, 0, sizeof(replace_rec)); | ||
| 81 | replace_rec.e_cpos = cpu_to_le32(cpos); | ||
| 82 | replace_rec.e_leaf_clusters = cpu_to_le16(len); | ||
| 83 | replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, | ||
| 84 | new_p_cpos)); | ||
| 85 | |||
| 86 | path = ocfs2_new_path_from_et(&context->et); | ||
| 87 | if (!path) { | ||
| 88 | ret = -ENOMEM; | ||
| 89 | mlog_errno(ret); | ||
| 90 | goto out; | ||
| 91 | } | ||
| 92 | |||
| 93 | ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos); | ||
| 94 | if (ret) { | ||
| 95 | mlog_errno(ret); | ||
| 96 | goto out; | ||
| 97 | } | ||
| 98 | |||
| 99 | el = path_leaf_el(path); | ||
| 100 | |||
| 101 | index = ocfs2_search_extent_list(el, cpos); | ||
| 102 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
| 103 | ocfs2_error(inode->i_sb, | ||
| 104 | "Inode %llu has an extent at cpos %u which can no " | ||
| 105 | "longer be found.\n", | ||
| 106 | (unsigned long long)ino, cpos); | ||
| 107 | ret = -EROFS; | ||
| 108 | goto out; | ||
| 109 | } | ||
| 110 | |||
| 111 | rec = &el->l_recs[index]; | ||
| 112 | |||
| 113 | BUG_ON(ext_flags != rec->e_flags); | ||
| 114 | /* | ||
| 115 | * after moving/defraging to new location, the extent is not going | ||
| 116 | * to be refcounted anymore. | ||
| 117 | */ | ||
| 118 | replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; | ||
| 119 | |||
| 120 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), | ||
| 121 | context->et.et_root_bh, | ||
| 122 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 123 | if (ret) { | ||
| 124 | mlog_errno(ret); | ||
| 125 | goto out; | ||
| 126 | } | ||
| 127 | |||
| 128 | ret = ocfs2_split_extent(handle, &context->et, path, index, | ||
| 129 | &replace_rec, context->meta_ac, | ||
| 130 | &context->dealloc); | ||
| 131 | if (ret) { | ||
| 132 | mlog_errno(ret); | ||
| 133 | goto out; | ||
| 134 | } | ||
| 135 | |||
| 136 | ocfs2_journal_dirty(handle, context->et.et_root_bh); | ||
| 137 | |||
| 138 | context->new_phys_cpos = new_p_cpos; | ||
| 139 | |||
| 140 | /* | ||
| 141 | * need I to append truncate log for old clusters? | ||
| 142 | */ | ||
| 143 | if (old_blkno) { | ||
| 144 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | ||
| 145 | ret = ocfs2_decrease_refcount(inode, handle, | ||
| 146 | ocfs2_blocks_to_clusters(osb->sb, | ||
| 147 | old_blkno), | ||
| 148 | len, context->meta_ac, | ||
| 149 | &context->dealloc, 1); | ||
| 150 | else | ||
| 151 | ret = ocfs2_truncate_log_append(osb, handle, | ||
| 152 | old_blkno, len); | ||
| 153 | } | ||
| 154 | |||
| 155 | out: | ||
| 156 | return ret; | ||
| 157 | } | ||
| 158 | |||
| 159 | /* | ||
| 160 | * lock allocators, and reserving appropriate number of bits for | ||
| 161 | * meta blocks and data clusters. | ||
| 162 | * | ||
| 163 | * in some cases, we don't need to reserve clusters, just let data_ac | ||
| 164 | * be NULL. | ||
| 165 | */ | ||
| 166 | static int ocfs2_lock_allocators_move_extents(struct inode *inode, | ||
| 167 | struct ocfs2_extent_tree *et, | ||
| 168 | u32 clusters_to_move, | ||
| 169 | u32 extents_to_split, | ||
| 170 | struct ocfs2_alloc_context **meta_ac, | ||
| 171 | struct ocfs2_alloc_context **data_ac, | ||
| 172 | int extra_blocks, | ||
| 173 | int *credits) | ||
| 174 | { | ||
| 175 | int ret, num_free_extents; | ||
| 176 | unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move; | ||
| 177 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 178 | |||
| 179 | num_free_extents = ocfs2_num_free_extents(osb, et); | ||
| 180 | if (num_free_extents < 0) { | ||
| 181 | ret = num_free_extents; | ||
| 182 | mlog_errno(ret); | ||
| 183 | goto out; | ||
| 184 | } | ||
| 185 | |||
| 186 | if (!num_free_extents || | ||
| 187 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) | ||
| 188 | extra_blocks += ocfs2_extend_meta_needed(et->et_root_el); | ||
| 189 | |||
| 190 | ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac); | ||
| 191 | if (ret) { | ||
| 192 | mlog_errno(ret); | ||
| 193 | goto out; | ||
| 194 | } | ||
| 195 | |||
| 196 | if (data_ac) { | ||
| 197 | ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); | ||
| 198 | if (ret) { | ||
| 199 | mlog_errno(ret); | ||
| 200 | goto out; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el, | ||
| 205 | clusters_to_move + 2); | ||
| 206 | |||
| 207 | mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", | ||
| 208 | extra_blocks, clusters_to_move, *credits); | ||
| 209 | out: | ||
| 210 | if (ret) { | ||
| 211 | if (*meta_ac) { | ||
| 212 | ocfs2_free_alloc_context(*meta_ac); | ||
| 213 | *meta_ac = NULL; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | return ret; | ||
| 218 | } | ||
| 219 | |||
| 220 | /* | ||
| 221 | * Using one journal handle to guarantee the data consistency in case | ||
| 222 | * crash happens anywhere. | ||
| 223 | * | ||
| 224 | * XXX: defrag can end up with finishing partial extent as requested, | ||
| 225 | * due to not enough contiguous clusters can be found in allocator. | ||
| 226 | */ | ||
| 227 | static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | ||
| 228 | u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) | ||
| 229 | { | ||
| 230 | int ret, credits = 0, extra_blocks = 0, partial = context->partial; | ||
| 231 | handle_t *handle; | ||
| 232 | struct inode *inode = context->inode; | ||
| 233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 234 | struct inode *tl_inode = osb->osb_tl_inode; | ||
| 235 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
| 236 | u32 new_phys_cpos, new_len; | ||
| 237 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
| 238 | |||
| 239 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { | ||
| 240 | |||
| 241 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | ||
| 242 | OCFS2_HAS_REFCOUNT_FL)); | ||
| 243 | |||
| 244 | BUG_ON(!context->refcount_loc); | ||
| 245 | |||
| 246 | ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, | ||
| 247 | &ref_tree, NULL); | ||
| 248 | if (ret) { | ||
| 249 | mlog_errno(ret); | ||
| 250 | return ret; | ||
| 251 | } | ||
| 252 | |||
| 253 | ret = ocfs2_prepare_refcount_change_for_del(inode, | ||
| 254 | context->refcount_loc, | ||
| 255 | phys_blkno, | ||
| 256 | *len, | ||
| 257 | &credits, | ||
| 258 | &extra_blocks); | ||
| 259 | if (ret) { | ||
| 260 | mlog_errno(ret); | ||
| 261 | goto out; | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, | ||
| 266 | &context->meta_ac, | ||
| 267 | &context->data_ac, | ||
| 268 | extra_blocks, &credits); | ||
| 269 | if (ret) { | ||
| 270 | mlog_errno(ret); | ||
| 271 | goto out; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * should be using allocation reservation strategy there? | ||
| 276 | * | ||
| 277 | * if (context->data_ac) | ||
| 278 | * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; | ||
| 279 | */ | ||
| 280 | |||
| 281 | mutex_lock(&tl_inode->i_mutex); | ||
| 282 | |||
| 283 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
| 284 | ret = __ocfs2_flush_truncate_log(osb); | ||
| 285 | if (ret < 0) { | ||
| 286 | mlog_errno(ret); | ||
| 287 | goto out_unlock_mutex; | ||
| 288 | } | ||
| 289 | } | ||
| 290 | |||
| 291 | handle = ocfs2_start_trans(osb, credits); | ||
| 292 | if (IS_ERR(handle)) { | ||
| 293 | ret = PTR_ERR(handle); | ||
| 294 | mlog_errno(ret); | ||
| 295 | goto out_unlock_mutex; | ||
| 296 | } | ||
| 297 | |||
| 298 | ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, | ||
| 299 | &new_phys_cpos, &new_len); | ||
| 300 | if (ret) { | ||
| 301 | mlog_errno(ret); | ||
| 302 | goto out_commit; | ||
| 303 | } | ||
| 304 | |||
| 305 | /* | ||
| 306 | * allowing partial extent moving is kind of 'pros and cons', it makes | ||
| 307 | * whole defragmentation less likely to fail, on the contrary, the bad | ||
| 308 | * thing is it may make the fs even more fragmented after moving, let | ||
| 309 | * userspace make a good decision here. | ||
| 310 | */ | ||
| 311 | if (new_len != *len) { | ||
| 312 | mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); | ||
| 313 | if (!partial) { | ||
| 314 | context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; | ||
| 315 | ret = -ENOSPC; | ||
| 316 | goto out_commit; | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, | ||
| 321 | phys_cpos, new_phys_cpos); | ||
| 322 | |||
| 323 | ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, | ||
| 324 | new_phys_cpos, ext_flags); | ||
| 325 | if (ret) | ||
| 326 | mlog_errno(ret); | ||
| 327 | |||
| 328 | if (partial && (new_len != *len)) | ||
| 329 | *len = new_len; | ||
| 330 | |||
| 331 | /* | ||
| 332 | * Here we should write the new page out first if we are | ||
| 333 | * in write-back mode. | ||
| 334 | */ | ||
| 335 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); | ||
| 336 | if (ret) | ||
| 337 | mlog_errno(ret); | ||
| 338 | |||
| 339 | out_commit: | ||
| 340 | ocfs2_commit_trans(osb, handle); | ||
| 341 | |||
| 342 | out_unlock_mutex: | ||
| 343 | mutex_unlock(&tl_inode->i_mutex); | ||
| 344 | |||
| 345 | if (context->data_ac) { | ||
| 346 | ocfs2_free_alloc_context(context->data_ac); | ||
| 347 | context->data_ac = NULL; | ||
| 348 | } | ||
| 349 | |||
| 350 | if (context->meta_ac) { | ||
| 351 | ocfs2_free_alloc_context(context->meta_ac); | ||
| 352 | context->meta_ac = NULL; | ||
| 353 | } | ||
| 354 | |||
| 355 | out: | ||
| 356 | if (ref_tree) | ||
| 357 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
| 358 | |||
| 359 | return ret; | ||
| 360 | } | ||
| 361 | |||
| 362 | /* | ||
| 363 | * find the victim alloc group, where #blkno fits. | ||
| 364 | */ | ||
| 365 | static int ocfs2_find_victim_alloc_group(struct inode *inode, | ||
| 366 | u64 vict_blkno, | ||
| 367 | int type, int slot, | ||
| 368 | int *vict_bit, | ||
| 369 | struct buffer_head **ret_bh) | ||
| 370 | { | ||
| 371 | int ret, i, blocks_per_unit = 1; | ||
| 372 | u64 blkno; | ||
| 373 | char namebuf[40]; | ||
| 374 | |||
| 375 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 376 | struct buffer_head *ac_bh = NULL, *gd_bh = NULL; | ||
| 377 | struct ocfs2_chain_list *cl; | ||
| 378 | struct ocfs2_chain_rec *rec; | ||
| 379 | struct ocfs2_dinode *ac_dinode; | ||
| 380 | struct ocfs2_group_desc *bg; | ||
| 381 | |||
| 382 | ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); | ||
| 383 | ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, | ||
| 384 | strlen(namebuf), &blkno); | ||
| 385 | if (ret) { | ||
| 386 | ret = -ENOENT; | ||
| 387 | goto out; | ||
| 388 | } | ||
| 389 | |||
| 390 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh); | ||
| 391 | if (ret) { | ||
| 392 | mlog_errno(ret); | ||
| 393 | goto out; | ||
| 394 | } | ||
| 395 | |||
| 396 | ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data; | ||
| 397 | cl = &(ac_dinode->id2.i_chain); | ||
| 398 | rec = &(cl->cl_recs[0]); | ||
| 399 | |||
| 400 | if (type == GLOBAL_BITMAP_SYSTEM_INODE) | ||
| 401 | blocks_per_unit <<= (osb->s_clustersize_bits - | ||
| 402 | inode->i_sb->s_blocksize_bits); | ||
| 403 | /* | ||
| 404 | * 'vict_blkno' was out of the valid range. | ||
| 405 | */ | ||
| 406 | if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || | ||
| 407 | (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) * | ||
| 408 | blocks_per_unit))) { | ||
| 409 | ret = -EINVAL; | ||
| 410 | goto out; | ||
| 411 | } | ||
| 412 | |||
| 413 | for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { | ||
| 414 | |||
| 415 | rec = &(cl->cl_recs[i]); | ||
| 416 | if (!rec) | ||
| 417 | continue; | ||
| 418 | |||
| 419 | bg = NULL; | ||
| 420 | |||
| 421 | do { | ||
| 422 | if (!bg) | ||
| 423 | blkno = le64_to_cpu(rec->c_blkno); | ||
| 424 | else | ||
| 425 | blkno = le64_to_cpu(bg->bg_next_group); | ||
| 426 | |||
| 427 | if (gd_bh) { | ||
| 428 | brelse(gd_bh); | ||
| 429 | gd_bh = NULL; | ||
| 430 | } | ||
| 431 | |||
| 432 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh); | ||
| 433 | if (ret) { | ||
| 434 | mlog_errno(ret); | ||
| 435 | goto out; | ||
| 436 | } | ||
| 437 | |||
| 438 | bg = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
| 439 | |||
| 440 | if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + | ||
| 441 | le16_to_cpu(bg->bg_bits))) { | ||
| 442 | |||
| 443 | *ret_bh = gd_bh; | ||
| 444 | *vict_bit = (vict_blkno - blkno) / | ||
| 445 | blocks_per_unit; | ||
| 446 | mlog(0, "find the victim group: #%llu, " | ||
| 447 | "total_bits: %u, vict_bit: %u\n", | ||
| 448 | blkno, le16_to_cpu(bg->bg_bits), | ||
| 449 | *vict_bit); | ||
| 450 | goto out; | ||
| 451 | } | ||
| 452 | |||
| 453 | } while (le64_to_cpu(bg->bg_next_group)); | ||
| 454 | } | ||
| 455 | |||
| 456 | ret = -EINVAL; | ||
| 457 | out: | ||
| 458 | brelse(ac_bh); | ||
| 459 | |||
| 460 | /* | ||
| 461 | * caller has to release the gd_bh properly. | ||
| 462 | */ | ||
| 463 | return ret; | ||
| 464 | } | ||
| 465 | |||
| 466 | /* | ||
| 467 | * XXX: helper to validate and adjust moving goal. | ||
| 468 | */ | ||
| 469 | static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, | ||
| 470 | struct ocfs2_move_extents *range) | ||
| 471 | { | ||
| 472 | int ret, goal_bit = 0; | ||
| 473 | |||
| 474 | struct buffer_head *gd_bh = NULL; | ||
| 475 | struct ocfs2_group_desc *bg; | ||
| 476 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 477 | int c_to_b = 1 << (osb->s_clustersize_bits - | ||
| 478 | inode->i_sb->s_blocksize_bits); | ||
| 479 | |||
| 480 | /* | ||
| 481 | * validate goal sits within global_bitmap, and return the victim | ||
| 482 | * group desc | ||
| 483 | */ | ||
| 484 | ret = ocfs2_find_victim_alloc_group(inode, range->me_goal, | ||
| 485 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 486 | OCFS2_INVALID_SLOT, | ||
| 487 | &goal_bit, &gd_bh); | ||
| 488 | if (ret) | ||
| 489 | goto out; | ||
| 490 | |||
| 491 | bg = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
| 492 | |||
| 493 | /* | ||
| 494 | * make goal become cluster aligned. | ||
| 495 | */ | ||
| 496 | if (range->me_goal % c_to_b) | ||
| 497 | range->me_goal = range->me_goal / c_to_b * c_to_b; | ||
| 498 | |||
| 499 | /* | ||
| 500 | * moving goal is not allowd to start with a group desc blok(#0 blk) | ||
| 501 | * let's compromise to the latter cluster. | ||
| 502 | */ | ||
| 503 | if (range->me_goal == le64_to_cpu(bg->bg_blkno)) | ||
| 504 | range->me_goal += c_to_b; | ||
| 505 | |||
| 506 | /* | ||
| 507 | * movement is not gonna cross two groups. | ||
| 508 | */ | ||
| 509 | if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize < | ||
| 510 | range->me_len) { | ||
| 511 | ret = -EINVAL; | ||
| 512 | goto out; | ||
| 513 | } | ||
| 514 | /* | ||
| 515 | * more exact validations/adjustments will be performed later during | ||
| 516 | * moving operation for each extent range. | ||
| 517 | */ | ||
| 518 | mlog(0, "extents get ready to be moved to #%llu block\n", | ||
| 519 | range->me_goal); | ||
| 520 | |||
| 521 | out: | ||
| 522 | brelse(gd_bh); | ||
| 523 | |||
| 524 | return ret; | ||
| 525 | } | ||
| 526 | |||
| 527 | static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, | ||
| 528 | int *goal_bit, u32 move_len, u32 max_hop, | ||
| 529 | u32 *phys_cpos) | ||
| 530 | { | ||
| 531 | int i, used, last_free_bits = 0, base_bit = *goal_bit; | ||
| 532 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; | ||
| 533 | u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb, | ||
| 534 | le64_to_cpu(gd->bg_blkno)); | ||
| 535 | |||
| 536 | for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) { | ||
| 537 | |||
| 538 | used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap); | ||
| 539 | if (used) { | ||
| 540 | /* | ||
| 541 | * we even tried searching the free chunk by jumping | ||
| 542 | * a 'max_hop' distance, but still failed. | ||
| 543 | */ | ||
| 544 | if ((i - base_bit) > max_hop) { | ||
| 545 | *phys_cpos = 0; | ||
| 546 | break; | ||
| 547 | } | ||
| 548 | |||
| 549 | if (last_free_bits) | ||
| 550 | last_free_bits = 0; | ||
| 551 | |||
| 552 | continue; | ||
| 553 | } else | ||
| 554 | last_free_bits++; | ||
| 555 | |||
| 556 | if (last_free_bits == move_len) { | ||
| 557 | *goal_bit = i; | ||
| 558 | *phys_cpos = base_cpos + i; | ||
| 559 | break; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); | ||
| 564 | } | ||
| 565 | |||
| 566 | static int ocfs2_alloc_dinode_update_counts(struct inode *inode, | ||
| 567 | handle_t *handle, | ||
| 568 | struct buffer_head *di_bh, | ||
| 569 | u32 num_bits, | ||
| 570 | u16 chain) | ||
| 571 | { | ||
| 572 | int ret; | ||
| 573 | u32 tmp_used; | ||
| 574 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
| 575 | struct ocfs2_chain_list *cl = | ||
| 576 | (struct ocfs2_chain_list *) &di->id2.i_chain; | ||
| 577 | |||
| 578 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
| 579 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 580 | if (ret < 0) { | ||
| 581 | mlog_errno(ret); | ||
| 582 | goto out; | ||
| 583 | } | ||
| 584 | |||
| 585 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); | ||
| 586 | di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); | ||
| 587 | le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); | ||
| 588 | ocfs2_journal_dirty(handle, di_bh); | ||
| 589 | |||
| 590 | out: | ||
| 591 | return ret; | ||
| 592 | } | ||
| 593 | |||
| 594 | static inline int ocfs2_block_group_set_bits(handle_t *handle, | ||
| 595 | struct inode *alloc_inode, | ||
| 596 | struct ocfs2_group_desc *bg, | ||
| 597 | struct buffer_head *group_bh, | ||
| 598 | unsigned int bit_off, | ||
| 599 | unsigned int num_bits) | ||
| 600 | { | ||
| 601 | int status; | ||
| 602 | void *bitmap = bg->bg_bitmap; | ||
| 603 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; | ||
| 604 | |||
| 605 | /* All callers get the descriptor via | ||
| 606 | * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ | ||
| 607 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); | ||
| 608 | BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); | ||
| 609 | |||
| 610 | mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, | ||
| 611 | num_bits); | ||
| 612 | |||
| 613 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | ||
| 614 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | ||
| 615 | |||
| 616 | status = ocfs2_journal_access_gd(handle, | ||
| 617 | INODE_CACHE(alloc_inode), | ||
| 618 | group_bh, | ||
| 619 | journal_type); | ||
| 620 | if (status < 0) { | ||
| 621 | mlog_errno(status); | ||
| 622 | goto bail; | ||
| 623 | } | ||
| 624 | |||
| 625 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | ||
| 626 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
| 627 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
| 628 | " count %u but claims %u are freed. num_bits %d", | ||
| 629 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
| 630 | le16_to_cpu(bg->bg_bits), | ||
| 631 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
| 632 | return -EROFS; | ||
| 633 | } | ||
| 634 | while (num_bits--) | ||
| 635 | ocfs2_set_bit(bit_off++, bitmap); | ||
| 636 | |||
| 637 | ocfs2_journal_dirty(handle, group_bh); | ||
| 638 | |||
| 639 | bail: | ||
| 640 | return status; | ||
| 641 | } | ||
| 642 | |||
| 643 | static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, | ||
| 644 | u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, | ||
| 645 | u32 len, int ext_flags) | ||
| 646 | { | ||
| 647 | int ret, credits = 0, extra_blocks = 0, goal_bit = 0; | ||
| 648 | handle_t *handle; | ||
| 649 | struct inode *inode = context->inode; | ||
| 650 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 651 | struct inode *tl_inode = osb->osb_tl_inode; | ||
| 652 | struct inode *gb_inode = NULL; | ||
| 653 | struct buffer_head *gb_bh = NULL; | ||
| 654 | struct buffer_head *gd_bh = NULL; | ||
| 655 | struct ocfs2_group_desc *gd; | ||
| 656 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
| 657 | u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb, | ||
| 658 | context->range->me_threshold); | ||
| 659 | u64 phys_blkno, new_phys_blkno; | ||
| 660 | |||
| 661 | phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
| 662 | |||
| 663 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { | ||
| 664 | |||
| 665 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | ||
| 666 | OCFS2_HAS_REFCOUNT_FL)); | ||
| 667 | |||
| 668 | BUG_ON(!context->refcount_loc); | ||
| 669 | |||
| 670 | ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, | ||
| 671 | &ref_tree, NULL); | ||
| 672 | if (ret) { | ||
| 673 | mlog_errno(ret); | ||
| 674 | return ret; | ||
| 675 | } | ||
| 676 | |||
| 677 | ret = ocfs2_prepare_refcount_change_for_del(inode, | ||
| 678 | context->refcount_loc, | ||
| 679 | phys_blkno, | ||
| 680 | len, | ||
| 681 | &credits, | ||
| 682 | &extra_blocks); | ||
| 683 | if (ret) { | ||
| 684 | mlog_errno(ret); | ||
| 685 | goto out; | ||
| 686 | } | ||
| 687 | } | ||
| 688 | |||
| 689 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, | ||
| 690 | &context->meta_ac, | ||
| 691 | NULL, extra_blocks, &credits); | ||
| 692 | if (ret) { | ||
| 693 | mlog_errno(ret); | ||
| 694 | goto out; | ||
| 695 | } | ||
| 696 | |||
| 697 | /* | ||
| 698 | * need to count 2 extra credits for global_bitmap inode and | ||
| 699 | * group descriptor. | ||
| 700 | */ | ||
| 701 | credits += OCFS2_INODE_UPDATE_CREDITS + 1; | ||
| 702 | |||
| 703 | /* | ||
| 704 | * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() | ||
| 705 | * logic, while we still need to lock the global_bitmap. | ||
| 706 | */ | ||
| 707 | gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 708 | OCFS2_INVALID_SLOT); | ||
| 709 | if (!gb_inode) { | ||
| 710 | mlog(ML_ERROR, "unable to get global_bitmap inode\n"); | ||
| 711 | ret = -EIO; | ||
| 712 | goto out; | ||
| 713 | } | ||
| 714 | |||
| 715 | mutex_lock(&gb_inode->i_mutex); | ||
| 716 | |||
| 717 | ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); | ||
| 718 | if (ret) { | ||
| 719 | mlog_errno(ret); | ||
| 720 | goto out_unlock_gb_mutex; | ||
| 721 | } | ||
| 722 | |||
| 723 | mutex_lock(&tl_inode->i_mutex); | ||
| 724 | |||
| 725 | handle = ocfs2_start_trans(osb, credits); | ||
| 726 | if (IS_ERR(handle)) { | ||
| 727 | ret = PTR_ERR(handle); | ||
| 728 | mlog_errno(ret); | ||
| 729 | goto out_unlock_tl_inode; | ||
| 730 | } | ||
| 731 | |||
| 732 | new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); | ||
| 733 | ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno, | ||
| 734 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 735 | OCFS2_INVALID_SLOT, | ||
| 736 | &goal_bit, &gd_bh); | ||
| 737 | if (ret) { | ||
| 738 | mlog_errno(ret); | ||
| 739 | goto out_commit; | ||
| 740 | } | ||
| 741 | |||
| 742 | /* | ||
| 743 | * probe the victim cluster group to find a proper | ||
| 744 | * region to fit wanted movement, it even will perfrom | ||
| 745 | * a best-effort attempt by compromising to a threshold | ||
| 746 | * around the goal. | ||
| 747 | */ | ||
| 748 | ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, | ||
| 749 | new_phys_cpos); | ||
| 750 | if (!new_phys_cpos) { | ||
| 751 | ret = -ENOSPC; | ||
| 752 | goto out_commit; | ||
| 753 | } | ||
| 754 | |||
| 755 | ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, | ||
| 756 | *new_phys_cpos, ext_flags); | ||
| 757 | if (ret) { | ||
| 758 | mlog_errno(ret); | ||
| 759 | goto out_commit; | ||
| 760 | } | ||
| 761 | |||
| 762 | gd = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
| 763 | ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len, | ||
| 764 | le16_to_cpu(gd->bg_chain)); | ||
| 765 | if (ret) { | ||
| 766 | mlog_errno(ret); | ||
| 767 | goto out_commit; | ||
| 768 | } | ||
| 769 | |||
| 770 | ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, | ||
| 771 | goal_bit, len); | ||
| 772 | if (ret) | ||
| 773 | mlog_errno(ret); | ||
| 774 | |||
| 775 | /* | ||
| 776 | * Here we should write the new page out first if we are | ||
| 777 | * in write-back mode. | ||
| 778 | */ | ||
| 779 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); | ||
| 780 | if (ret) | ||
| 781 | mlog_errno(ret); | ||
| 782 | |||
| 783 | out_commit: | ||
| 784 | ocfs2_commit_trans(osb, handle); | ||
| 785 | brelse(gd_bh); | ||
| 786 | |||
| 787 | out_unlock_tl_inode: | ||
| 788 | mutex_unlock(&tl_inode->i_mutex); | ||
| 789 | |||
| 790 | ocfs2_inode_unlock(gb_inode, 1); | ||
| 791 | out_unlock_gb_mutex: | ||
| 792 | mutex_unlock(&gb_inode->i_mutex); | ||
| 793 | brelse(gb_bh); | ||
| 794 | iput(gb_inode); | ||
| 795 | |||
| 796 | out: | ||
| 797 | if (context->meta_ac) { | ||
| 798 | ocfs2_free_alloc_context(context->meta_ac); | ||
| 799 | context->meta_ac = NULL; | ||
| 800 | } | ||
| 801 | |||
| 802 | if (ref_tree) | ||
| 803 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
| 804 | |||
| 805 | return ret; | ||
| 806 | } | ||
| 807 | |||
| 808 | /* | ||
| 809 | * Helper to calculate the defraging length in one run according to threshold. | ||
| 810 | */ | ||
| 811 | static void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged, | ||
| 812 | u32 threshold, int *skip) | ||
| 813 | { | ||
| 814 | if ((*alloc_size + *len_defraged) < threshold) { | ||
| 815 | /* | ||
| 816 | * proceed defragmentation until we meet the thresh | ||
| 817 | */ | ||
| 818 | *len_defraged += *alloc_size; | ||
| 819 | } else if (*len_defraged == 0) { | ||
| 820 | /* | ||
| 821 | * XXX: skip a large extent. | ||
| 822 | */ | ||
| 823 | *skip = 1; | ||
| 824 | } else { | ||
| 825 | /* | ||
| 826 | * split this extent to coalesce with former pieces as | ||
| 827 | * to reach the threshold. | ||
| 828 | * | ||
| 829 | * we're done here with one cycle of defragmentation | ||
| 830 | * in a size of 'thresh', resetting 'len_defraged' | ||
| 831 | * forces a new defragmentation. | ||
| 832 | */ | ||
| 833 | *alloc_size = threshold - *len_defraged; | ||
| 834 | *len_defraged = 0; | ||
| 835 | } | ||
| 836 | } | ||
| 837 | |||
| 838 | static int __ocfs2_move_extents_range(struct buffer_head *di_bh, | ||
| 839 | struct ocfs2_move_extents_context *context) | ||
| 840 | { | ||
| 841 | int ret = 0, flags, do_defrag, skip = 0; | ||
| 842 | u32 cpos, phys_cpos, move_start, len_to_move, alloc_size; | ||
| 843 | u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0; | ||
| 844 | |||
| 845 | struct inode *inode = context->inode; | ||
| 846 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 847 | struct ocfs2_move_extents *range = context->range; | ||
| 848 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 849 | |||
| 850 | if ((inode->i_size == 0) || (range->me_len == 0)) | ||
| 851 | return 0; | ||
| 852 | |||
| 853 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
| 854 | return 0; | ||
| 855 | |||
| 856 | context->refcount_loc = le64_to_cpu(di->i_refcount_loc); | ||
| 857 | |||
| 858 | ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh); | ||
| 859 | ocfs2_init_dealloc_ctxt(&context->dealloc); | ||
| 860 | |||
| 861 | /* | ||
| 862 | * TO-DO XXX: | ||
| 863 | * | ||
| 864 | * - xattr extents. | ||
| 865 | */ | ||
| 866 | |||
| 867 | do_defrag = context->auto_defrag; | ||
| 868 | |||
| 869 | /* | ||
| 870 | * extents moving happens in unit of clusters, for the sake | ||
| 871 | * of simplicity, we may ignore two clusters where 'byte_start' | ||
| 872 | * and 'byte_start + len' were within. | ||
| 873 | */ | ||
| 874 | move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start); | ||
| 875 | len_to_move = (range->me_start + range->me_len) >> | ||
| 876 | osb->s_clustersize_bits; | ||
| 877 | if (len_to_move >= move_start) | ||
| 878 | len_to_move -= move_start; | ||
| 879 | else | ||
| 880 | len_to_move = 0; | ||
| 881 | |||
| 882 | if (do_defrag) { | ||
| 883 | defrag_thresh = range->me_threshold >> osb->s_clustersize_bits; | ||
| 884 | if (defrag_thresh <= 1) | ||
| 885 | goto done; | ||
| 886 | } else | ||
| 887 | new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, | ||
| 888 | range->me_goal); | ||
| 889 | |||
| 890 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, " | ||
| 891 | "thresh: %u\n", | ||
| 892 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 893 | (unsigned long long)range->me_start, | ||
| 894 | (unsigned long long)range->me_len, | ||
| 895 | move_start, len_to_move, defrag_thresh); | ||
| 896 | |||
| 897 | cpos = move_start; | ||
| 898 | while (len_to_move) { | ||
| 899 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size, | ||
| 900 | &flags); | ||
| 901 | if (ret) { | ||
| 902 | mlog_errno(ret); | ||
| 903 | goto out; | ||
| 904 | } | ||
| 905 | |||
| 906 | if (alloc_size > len_to_move) | ||
| 907 | alloc_size = len_to_move; | ||
| 908 | |||
| 909 | /* | ||
| 910 | * XXX: how to deal with a hole: | ||
| 911 | * | ||
| 912 | * - skip the hole of course | ||
| 913 | * - force a new defragmentation | ||
| 914 | */ | ||
| 915 | if (!phys_cpos) { | ||
| 916 | if (do_defrag) | ||
| 917 | len_defraged = 0; | ||
| 918 | |||
| 919 | goto next; | ||
| 920 | } | ||
| 921 | |||
| 922 | if (do_defrag) { | ||
| 923 | ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged, | ||
| 924 | defrag_thresh, &skip); | ||
| 925 | /* | ||
| 926 | * skip large extents | ||
| 927 | */ | ||
| 928 | if (skip) { | ||
| 929 | skip = 0; | ||
| 930 | goto next; | ||
| 931 | } | ||
| 932 | |||
| 933 | mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, " | ||
| 934 | "alloc_size: %u, len_defraged: %u\n", | ||
| 935 | cpos, phys_cpos, alloc_size, len_defraged); | ||
| 936 | |||
| 937 | ret = ocfs2_defrag_extent(context, cpos, phys_cpos, | ||
| 938 | &alloc_size, flags); | ||
| 939 | } else { | ||
| 940 | ret = ocfs2_move_extent(context, cpos, phys_cpos, | ||
| 941 | &new_phys_cpos, alloc_size, | ||
| 942 | flags); | ||
| 943 | |||
| 944 | new_phys_cpos += alloc_size; | ||
| 945 | } | ||
| 946 | |||
| 947 | if (ret < 0) { | ||
| 948 | mlog_errno(ret); | ||
| 949 | goto out; | ||
| 950 | } | ||
| 951 | |||
| 952 | context->clusters_moved += alloc_size; | ||
| 953 | next: | ||
| 954 | cpos += alloc_size; | ||
| 955 | len_to_move -= alloc_size; | ||
| 956 | } | ||
| 957 | |||
| 958 | done: | ||
| 959 | range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE; | ||
| 960 | |||
| 961 | out: | ||
| 962 | range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb, | ||
| 963 | context->clusters_moved); | ||
| 964 | range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb, | ||
| 965 | context->new_phys_cpos); | ||
| 966 | |||
| 967 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
| 968 | ocfs2_run_deallocs(osb, &context->dealloc); | ||
| 969 | |||
| 970 | return ret; | ||
| 971 | } | ||
| 972 | |||
| 973 | static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) | ||
| 974 | { | ||
| 975 | int status; | ||
| 976 | handle_t *handle; | ||
| 977 | struct inode *inode = context->inode; | ||
| 978 | struct ocfs2_dinode *di; | ||
| 979 | struct buffer_head *di_bh = NULL; | ||
| 980 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 981 | |||
| 982 | if (!inode) | ||
| 983 | return -ENOENT; | ||
| 984 | |||
| 985 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
| 986 | return -EROFS; | ||
| 987 | |||
| 988 | mutex_lock(&inode->i_mutex); | ||
| 989 | |||
| 990 | /* | ||
| 991 | * This prevents concurrent writes from other nodes | ||
| 992 | */ | ||
| 993 | status = ocfs2_rw_lock(inode, 1); | ||
| 994 | if (status) { | ||
| 995 | mlog_errno(status); | ||
| 996 | goto out; | ||
| 997 | } | ||
| 998 | |||
| 999 | status = ocfs2_inode_lock(inode, &di_bh, 1); | ||
| 1000 | if (status) { | ||
| 1001 | mlog_errno(status); | ||
| 1002 | goto out_rw_unlock; | ||
| 1003 | } | ||
| 1004 | |||
| 1005 | /* | ||
| 1006 | * rememer ip_xattr_sem also needs to be held if necessary | ||
| 1007 | */ | ||
| 1008 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1009 | |||
| 1010 | status = __ocfs2_move_extents_range(di_bh, context); | ||
| 1011 | |||
| 1012 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1013 | if (status) { | ||
| 1014 | mlog_errno(status); | ||
| 1015 | goto out_inode_unlock; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | /* | ||
| 1019 | * We update ctime for these changes | ||
| 1020 | */ | ||
| 1021 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
| 1022 | if (IS_ERR(handle)) { | ||
| 1023 | status = PTR_ERR(handle); | ||
| 1024 | mlog_errno(status); | ||
| 1025 | goto out_inode_unlock; | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
| 1029 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1030 | if (status) { | ||
| 1031 | mlog_errno(status); | ||
| 1032 | goto out_commit; | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 1036 | inode->i_ctime = CURRENT_TIME; | ||
| 1037 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
| 1038 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
| 1039 | |||
| 1040 | ocfs2_journal_dirty(handle, di_bh); | ||
| 1041 | |||
| 1042 | out_commit: | ||
| 1043 | ocfs2_commit_trans(osb, handle); | ||
| 1044 | |||
| 1045 | out_inode_unlock: | ||
| 1046 | brelse(di_bh); | ||
| 1047 | ocfs2_inode_unlock(inode, 1); | ||
| 1048 | out_rw_unlock: | ||
| 1049 | ocfs2_rw_unlock(inode, 1); | ||
| 1050 | out: | ||
| 1051 | mutex_unlock(&inode->i_mutex); | ||
| 1052 | |||
| 1053 | return status; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) | ||
| 1057 | { | ||
| 1058 | int status; | ||
| 1059 | |||
| 1060 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
| 1061 | struct ocfs2_move_extents range; | ||
| 1062 | struct ocfs2_move_extents_context *context = NULL; | ||
| 1063 | |||
| 1064 | status = mnt_want_write(filp->f_path.mnt); | ||
| 1065 | if (status) | ||
| 1066 | return status; | ||
| 1067 | |||
| 1068 | if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) | ||
| 1069 | goto out; | ||
| 1070 | |||
| 1071 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | ||
| 1072 | status = -EPERM; | ||
| 1073 | goto out; | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS); | ||
| 1077 | if (!context) { | ||
| 1078 | status = -ENOMEM; | ||
| 1079 | mlog_errno(status); | ||
| 1080 | goto out; | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | context->inode = inode; | ||
| 1084 | context->file = filp; | ||
| 1085 | |||
| 1086 | if (argp) { | ||
| 1087 | if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, | ||
| 1088 | sizeof(range))) { | ||
| 1089 | status = -EFAULT; | ||
| 1090 | goto out; | ||
| 1091 | } | ||
| 1092 | } else { | ||
| 1093 | status = -EINVAL; | ||
| 1094 | goto out; | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | if (range.me_start > i_size_read(inode)) | ||
| 1098 | goto out; | ||
| 1099 | |||
| 1100 | if (range.me_start + range.me_len > i_size_read(inode)) | ||
| 1101 | range.me_len = i_size_read(inode) - range.me_start; | ||
| 1102 | |||
| 1103 | context->range = ⦥ | ||
| 1104 | |||
| 1105 | if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { | ||
| 1106 | context->auto_defrag = 1; | ||
| 1107 | /* | ||
| 1108 | * ok, the default theshold for the defragmentation | ||
| 1109 | * is 1M, since our maximum clustersize was 1M also. | ||
| 1110 | * any thought? | ||
| 1111 | */ | ||
| 1112 | if (!range.me_threshold) | ||
| 1113 | range.me_threshold = 1024 * 1024; | ||
| 1114 | |||
| 1115 | if (range.me_threshold > i_size_read(inode)) | ||
| 1116 | range.me_threshold = i_size_read(inode); | ||
| 1117 | |||
| 1118 | if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) | ||
| 1119 | context->partial = 1; | ||
| 1120 | } else { | ||
| 1121 | /* | ||
| 1122 | * first best-effort attempt to validate and adjust the goal | ||
| 1123 | * (physical address in block), while it can't guarantee later | ||
| 1124 | * operation can succeed all the time since global_bitmap may | ||
| 1125 | * change a bit over time. | ||
| 1126 | */ | ||
| 1127 | |||
| 1128 | status = ocfs2_validate_and_adjust_move_goal(inode, &range); | ||
| 1129 | if (status) | ||
| 1130 | goto out; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | status = ocfs2_move_extents(context); | ||
| 1134 | if (status) | ||
| 1135 | mlog_errno(status); | ||
| 1136 | out: | ||
| 1137 | /* | ||
| 1138 | * movement/defragmentation may end up being partially completed, | ||
| 1139 | * that's the reason why we need to return userspace the finished | ||
| 1140 | * length and new_offset even if failure happens somewhere. | ||
| 1141 | */ | ||
| 1142 | if (argp) { | ||
| 1143 | if (copy_to_user((struct ocfs2_move_extents *)argp, &range, | ||
| 1144 | sizeof(range))) | ||
| 1145 | status = -EFAULT; | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | kfree(context); | ||
| 1149 | |||
| 1150 | mnt_drop_write(filp->f_path.mnt); | ||
| 1151 | |||
| 1152 | return status; | ||
| 1153 | } | ||
diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h new file mode 100644 index 00000000000..4e143e81144 --- /dev/null +++ b/fs/ocfs2/move_extents.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * move_extents.h | ||
| 5 | * | ||
| 6 | * Copyright (C) 2011 Oracle. All rights reserved. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public | ||
| 10 | * License version 2 as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * General Public License for more details. | ||
| 16 | */ | ||
| 17 | #ifndef OCFS2_MOVE_EXTENTS_H | ||
| 18 | #define OCFS2_MOVE_EXTENTS_H | ||
| 19 | |||
| 20 | int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp); | ||
| 21 | |||
| 22 | #endif /* OCFS2_MOVE_EXTENTS_H */ | ||
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index b46f39bf743..5b27ff1fa57 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
| @@ -142,6 +142,38 @@ struct ocfs2_info_journal_size { | |||
| 142 | __u64 ij_journal_size; | 142 | __u64 ij_journal_size; |
| 143 | }; | 143 | }; |
| 144 | 144 | ||
| 145 | struct ocfs2_info_freeinode { | ||
| 146 | struct ocfs2_info_request ifi_req; | ||
| 147 | struct ocfs2_info_local_freeinode { | ||
| 148 | __u64 lfi_total; | ||
| 149 | __u64 lfi_free; | ||
| 150 | } ifi_stat[OCFS2_MAX_SLOTS]; | ||
| 151 | __u32 ifi_slotnum; /* out */ | ||
| 152 | __u32 ifi_pad; | ||
| 153 | }; | ||
| 154 | |||
| 155 | #define OCFS2_INFO_MAX_HIST (32) | ||
| 156 | |||
| 157 | struct ocfs2_info_freefrag { | ||
| 158 | struct ocfs2_info_request iff_req; | ||
| 159 | struct ocfs2_info_freefrag_stats { /* (out) */ | ||
| 160 | struct ocfs2_info_free_chunk_list { | ||
| 161 | __u32 fc_chunks[OCFS2_INFO_MAX_HIST]; | ||
| 162 | __u32 fc_clusters[OCFS2_INFO_MAX_HIST]; | ||
| 163 | } ffs_fc_hist; | ||
| 164 | __u32 ffs_clusters; | ||
| 165 | __u32 ffs_free_clusters; | ||
| 166 | __u32 ffs_free_chunks; | ||
| 167 | __u32 ffs_free_chunks_real; | ||
| 168 | __u32 ffs_min; /* Minimum free chunksize in clusters */ | ||
| 169 | __u32 ffs_max; | ||
| 170 | __u32 ffs_avg; | ||
| 171 | __u32 ffs_pad; | ||
| 172 | } iff_ffs; | ||
| 173 | __u32 iff_chunksize; /* chunksize in clusters(in) */ | ||
| 174 | __u32 iff_pad; | ||
| 175 | }; | ||
| 176 | |||
| 145 | /* Codes for ocfs2_info_request */ | 177 | /* Codes for ocfs2_info_request */ |
| 146 | enum ocfs2_info_type { | 178 | enum ocfs2_info_type { |
| 147 | OCFS2_INFO_CLUSTERSIZE = 1, | 179 | OCFS2_INFO_CLUSTERSIZE = 1, |
| @@ -151,6 +183,8 @@ enum ocfs2_info_type { | |||
| 151 | OCFS2_INFO_UUID, | 183 | OCFS2_INFO_UUID, |
| 152 | OCFS2_INFO_FS_FEATURES, | 184 | OCFS2_INFO_FS_FEATURES, |
| 153 | OCFS2_INFO_JOURNAL_SIZE, | 185 | OCFS2_INFO_JOURNAL_SIZE, |
| 186 | OCFS2_INFO_FREEINODE, | ||
| 187 | OCFS2_INFO_FREEFRAG, | ||
| 154 | OCFS2_INFO_NUM_TYPES | 188 | OCFS2_INFO_NUM_TYPES |
| 155 | }; | 189 | }; |
| 156 | 190 | ||
| @@ -171,4 +205,38 @@ enum ocfs2_info_type { | |||
| 171 | 205 | ||
| 172 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) | 206 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) |
| 173 | 207 | ||
| 208 | struct ocfs2_move_extents { | ||
| 209 | /* All values are in bytes */ | ||
| 210 | /* in */ | ||
| 211 | __u64 me_start; /* Virtual start in the file to move */ | ||
| 212 | __u64 me_len; /* Length of the extents to be moved */ | ||
| 213 | __u64 me_goal; /* Physical offset of the goal, | ||
| 214 | it's in block unit */ | ||
| 215 | __u64 me_threshold; /* Maximum distance from goal or threshold | ||
| 216 | for auto defragmentation */ | ||
| 217 | __u64 me_flags; /* Flags for the operation: | ||
| 218 | * - auto defragmentation. | ||
| 219 | * - refcount,xattr cases. | ||
| 220 | */ | ||
| 221 | /* out */ | ||
| 222 | __u64 me_moved_len; /* Moved/defraged length */ | ||
| 223 | __u64 me_new_offset; /* Resulting physical location */ | ||
| 224 | __u32 me_reserved[2]; /* Reserved for futhure */ | ||
| 225 | }; | ||
| 226 | |||
| 227 | #define OCFS2_MOVE_EXT_FL_AUTO_DEFRAG (0x00000001) /* Kernel manages to | ||
| 228 | claim new clusters | ||
| 229 | as the goal place | ||
| 230 | for extents moving */ | ||
| 231 | #define OCFS2_MOVE_EXT_FL_PART_DEFRAG (0x00000002) /* Allow partial extent | ||
| 232 | moving, is to make | ||
| 233 | movement less likely | ||
| 234 | to fail, may make fs | ||
| 235 | even more fragmented */ | ||
| 236 | #define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation | ||
| 237 | completely gets done. | ||
| 238 | */ | ||
| 239 | |||
| 240 | #define OCFS2_IOC_MOVE_EXT _IOW('o', 6, struct ocfs2_move_extents) | ||
| 241 | |||
| 174 | #endif /* OCFS2_IOCTL_H */ | 242 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3c7606cff1a..ebfd3825f12 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
| @@ -66,7 +66,7 @@ struct ocfs2_cow_context { | |||
| 66 | u32 *num_clusters, | 66 | u32 *num_clusters, |
| 67 | unsigned int *extent_flags); | 67 | unsigned int *extent_flags); |
| 68 | int (*cow_duplicate_clusters)(handle_t *handle, | 68 | int (*cow_duplicate_clusters)(handle_t *handle, |
| 69 | struct ocfs2_cow_context *context, | 69 | struct file *file, |
| 70 | u32 cpos, u32 old_cluster, | 70 | u32 cpos, u32 old_cluster, |
| 71 | u32 new_cluster, u32 new_len); | 71 | u32 new_cluster, u32 new_len); |
| 72 | }; | 72 | }; |
| @@ -2921,20 +2921,21 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) | |||
| 2921 | return 0; | 2921 | return 0; |
| 2922 | } | 2922 | } |
| 2923 | 2923 | ||
| 2924 | static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | 2924 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, |
| 2925 | struct ocfs2_cow_context *context, | 2925 | struct file *file, |
| 2926 | u32 cpos, u32 old_cluster, | 2926 | u32 cpos, u32 old_cluster, |
| 2927 | u32 new_cluster, u32 new_len) | 2927 | u32 new_cluster, u32 new_len) |
| 2928 | { | 2928 | { |
| 2929 | int ret = 0, partial; | 2929 | int ret = 0, partial; |
| 2930 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | 2930 | struct inode *inode = file->f_path.dentry->d_inode; |
| 2931 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | ||
| 2931 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | 2932 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); |
| 2932 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
| 2933 | struct page *page; | 2934 | struct page *page; |
| 2934 | pgoff_t page_index; | 2935 | pgoff_t page_index; |
| 2935 | unsigned int from, to, readahead_pages; | 2936 | unsigned int from, to, readahead_pages; |
| 2936 | loff_t offset, end, map_end; | 2937 | loff_t offset, end, map_end; |
| 2937 | struct address_space *mapping = context->inode->i_mapping; | 2938 | struct address_space *mapping = inode->i_mapping; |
| 2938 | 2939 | ||
| 2939 | trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, | 2940 | trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, |
| 2940 | new_cluster, new_len); | 2941 | new_cluster, new_len); |
| @@ -2948,8 +2949,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
| 2948 | * We only duplicate pages until we reach the page contains i_size - 1. | 2949 | * We only duplicate pages until we reach the page contains i_size - 1. |
| 2949 | * So trim 'end' to i_size. | 2950 | * So trim 'end' to i_size. |
| 2950 | */ | 2951 | */ |
| 2951 | if (end > i_size_read(context->inode)) | 2952 | if (end > i_size_read(inode)) |
| 2952 | end = i_size_read(context->inode); | 2953 | end = i_size_read(inode); |
| 2953 | 2954 | ||
| 2954 | while (offset < end) { | 2955 | while (offset < end) { |
| 2955 | page_index = offset >> PAGE_CACHE_SHIFT; | 2956 | page_index = offset >> PAGE_CACHE_SHIFT; |
| @@ -2972,10 +2973,9 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
| 2972 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2973 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
| 2973 | BUG_ON(PageDirty(page)); | 2974 | BUG_ON(PageDirty(page)); |
| 2974 | 2975 | ||
| 2975 | if (PageReadahead(page) && context->file) { | 2976 | if (PageReadahead(page)) { |
| 2976 | page_cache_async_readahead(mapping, | 2977 | page_cache_async_readahead(mapping, |
| 2977 | &context->file->f_ra, | 2978 | &file->f_ra, file, |
| 2978 | context->file, | ||
| 2979 | page, page_index, | 2979 | page, page_index, |
| 2980 | readahead_pages); | 2980 | readahead_pages); |
| 2981 | } | 2981 | } |
| @@ -2999,8 +2999,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
| 2999 | } | 2999 | } |
| 3000 | } | 3000 | } |
| 3001 | 3001 | ||
| 3002 | ocfs2_map_and_dirty_page(context->inode, | 3002 | ocfs2_map_and_dirty_page(inode, handle, from, to, |
| 3003 | handle, from, to, | ||
| 3004 | page, 0, &new_block); | 3003 | page, 0, &new_block); |
| 3005 | mark_page_accessed(page); | 3004 | mark_page_accessed(page); |
| 3006 | unlock: | 3005 | unlock: |
| @@ -3015,14 +3014,15 @@ unlock: | |||
| 3015 | return ret; | 3014 | return ret; |
| 3016 | } | 3015 | } |
| 3017 | 3016 | ||
| 3018 | static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | 3017 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, |
| 3019 | struct ocfs2_cow_context *context, | 3018 | struct file *file, |
| 3020 | u32 cpos, u32 old_cluster, | 3019 | u32 cpos, u32 old_cluster, |
| 3021 | u32 new_cluster, u32 new_len) | 3020 | u32 new_cluster, u32 new_len) |
| 3022 | { | 3021 | { |
| 3023 | int ret = 0; | 3022 | int ret = 0; |
| 3024 | struct super_block *sb = context->inode->i_sb; | 3023 | struct inode *inode = file->f_path.dentry->d_inode; |
| 3025 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | 3024 | struct super_block *sb = inode->i_sb; |
| 3025 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | ||
| 3026 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); | 3026 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); |
| 3027 | u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); | 3027 | u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); |
| 3028 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 3028 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
| @@ -3145,8 +3145,8 @@ static int ocfs2_replace_clusters(handle_t *handle, | |||
| 3145 | 3145 | ||
| 3146 | /*If the old clusters is unwritten, no need to duplicate. */ | 3146 | /*If the old clusters is unwritten, no need to duplicate. */ |
| 3147 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { | 3147 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { |
| 3148 | ret = context->cow_duplicate_clusters(handle, context, cpos, | 3148 | ret = context->cow_duplicate_clusters(handle, context->file, |
| 3149 | old, new, len); | 3149 | cpos, old, new, len); |
| 3150 | if (ret) { | 3150 | if (ret) { |
| 3151 | mlog_errno(ret); | 3151 | mlog_errno(ret); |
| 3152 | goto out; | 3152 | goto out; |
| @@ -3162,22 +3162,22 @@ out: | |||
| 3162 | return ret; | 3162 | return ret; |
| 3163 | } | 3163 | } |
| 3164 | 3164 | ||
| 3165 | static int ocfs2_cow_sync_writeback(struct super_block *sb, | 3165 | int ocfs2_cow_sync_writeback(struct super_block *sb, |
| 3166 | struct ocfs2_cow_context *context, | 3166 | struct inode *inode, |
| 3167 | u32 cpos, u32 num_clusters) | 3167 | u32 cpos, u32 num_clusters) |
| 3168 | { | 3168 | { |
| 3169 | int ret = 0; | 3169 | int ret = 0; |
| 3170 | loff_t offset, end, map_end; | 3170 | loff_t offset, end, map_end; |
| 3171 | pgoff_t page_index; | 3171 | pgoff_t page_index; |
| 3172 | struct page *page; | 3172 | struct page *page; |
| 3173 | 3173 | ||
| 3174 | if (ocfs2_should_order_data(context->inode)) | 3174 | if (ocfs2_should_order_data(inode)) |
| 3175 | return 0; | 3175 | return 0; |
| 3176 | 3176 | ||
| 3177 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | 3177 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; |
| 3178 | end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); | 3178 | end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); |
| 3179 | 3179 | ||
| 3180 | ret = filemap_fdatawrite_range(context->inode->i_mapping, | 3180 | ret = filemap_fdatawrite_range(inode->i_mapping, |
| 3181 | offset, end - 1); | 3181 | offset, end - 1); |
| 3182 | if (ret < 0) { | 3182 | if (ret < 0) { |
| 3183 | mlog_errno(ret); | 3183 | mlog_errno(ret); |
| @@ -3190,7 +3190,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, | |||
| 3190 | if (map_end > end) | 3190 | if (map_end > end) |
| 3191 | map_end = end; | 3191 | map_end = end; |
| 3192 | 3192 | ||
| 3193 | page = find_or_create_page(context->inode->i_mapping, | 3193 | page = find_or_create_page(inode->i_mapping, |
| 3194 | page_index, GFP_NOFS); | 3194 | page_index, GFP_NOFS); |
| 3195 | BUG_ON(!page); | 3195 | BUG_ON(!page); |
| 3196 | 3196 | ||
| @@ -3349,7 +3349,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, | |||
| 3349 | * in write-back mode. | 3349 | * in write-back mode. |
| 3350 | */ | 3350 | */ |
| 3351 | if (context->get_clusters == ocfs2_di_get_clusters) { | 3351 | if (context->get_clusters == ocfs2_di_get_clusters) { |
| 3352 | ret = ocfs2_cow_sync_writeback(sb, context, cpos, | 3352 | ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos, |
| 3353 | orig_num_clusters); | 3353 | orig_num_clusters); |
| 3354 | if (ret) | 3354 | if (ret) |
| 3355 | mlog_errno(ret); | 3355 | mlog_errno(ret); |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index c8ce46f7d8e..7754608c83a 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
| @@ -84,6 +84,17 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, | |||
| 84 | struct buffer_head *ref_root_bh, | 84 | struct buffer_head *ref_root_bh, |
| 85 | u32 cpos, u32 write_len, | 85 | u32 cpos, u32 write_len, |
| 86 | struct ocfs2_post_refcount *post); | 86 | struct ocfs2_post_refcount *post); |
| 87 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, | ||
| 88 | struct file *file, | ||
| 89 | u32 cpos, u32 old_cluster, | ||
| 90 | u32 new_cluster, u32 new_len); | ||
| 91 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | ||
| 92 | struct file *file, | ||
| 93 | u32 cpos, u32 old_cluster, | ||
| 94 | u32 new_cluster, u32 new_len); | ||
| 95 | int ocfs2_cow_sync_writeback(struct super_block *sb, | ||
| 96 | struct inode *inode, | ||
| 97 | u32 cpos, u32 num_clusters); | ||
| 87 | int ocfs2_add_refcount_flag(struct inode *inode, | 98 | int ocfs2_add_refcount_flag(struct inode *inode, |
| 88 | struct ocfs2_extent_tree *data_et, | 99 | struct ocfs2_extent_tree *data_et, |
| 89 | struct ocfs2_caching_info *ref_ci, | 100 | struct ocfs2_caching_info *ref_ci, |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 8b3a7da531e..315de66e52b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
| @@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c) | |||
| 106 | long long liab; | 106 | long long liab; |
| 107 | 107 | ||
| 108 | spin_lock(&c->space_lock); | 108 | spin_lock(&c->space_lock); |
| 109 | liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; | 109 | liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; |
| 110 | spin_unlock(&c->space_lock); | 110 | spin_unlock(&c->space_lock); |
| 111 | return liab; | 111 | return liab; |
| 112 | } | 112 | } |
| @@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) | |||
| 180 | int idx_lebs; | 180 | int idx_lebs; |
| 181 | long long idx_size; | 181 | long long idx_size; |
| 182 | 182 | ||
| 183 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; | 183 | idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; |
| 184 | /* And make sure we have thrice the index size of space reserved */ | 184 | /* And make sure we have thrice the index size of space reserved */ |
| 185 | idx_size += idx_size << 1; | 185 | idx_size += idx_size << 1; |
| 186 | /* | 186 | /* |
| @@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c) | |||
| 292 | * budgeted index space to the size of the current index, multiplies this by 3, | 292 | * budgeted index space to the size of the current index, multiplies this by 3, |
| 293 | * and makes sure this does not exceed the amount of free LEBs. | 293 | * and makes sure this does not exceed the amount of free LEBs. |
| 294 | * | 294 | * |
| 295 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: | 295 | * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: |
| 296 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might | 296 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might |
| 297 | * be large, because UBIFS does not do any index consolidation as long as | 297 | * be large, because UBIFS does not do any index consolidation as long as |
| 298 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs | 298 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs |
| 299 | * will contain a lot of dirt. | 299 | * will contain a lot of dirt. |
| 300 | * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, | 300 | * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, |
| 301 | * the index may be consolidated to take up to @c->min_idx_lebs LEBs. | 301 | * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. |
| 302 | * | 302 | * |
| 303 | * This function returns zero in case of success, and %-ENOSPC in case of | 303 | * This function returns zero in case of success, and %-ENOSPC in case of |
| 304 | * failure. | 304 | * failure. |
| @@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c) | |||
| 343 | c->lst.taken_empty_lebs; | 343 | c->lst.taken_empty_lebs; |
| 344 | if (unlikely(rsvd_idx_lebs > lebs)) { | 344 | if (unlikely(rsvd_idx_lebs > lebs)) { |
| 345 | dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " | 345 | dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " |
| 346 | "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, | 346 | "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, |
| 347 | rsvd_idx_lebs); | 347 | rsvd_idx_lebs); |
| 348 | return -ENOSPC; | 348 | return -ENOSPC; |
| 349 | } | 349 | } |
| 350 | 350 | ||
| 351 | available = ubifs_calc_available(c, min_idx_lebs); | 351 | available = ubifs_calc_available(c, min_idx_lebs); |
| 352 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 352 | outstanding = c->bi.data_growth + c->bi.dd_growth; |
| 353 | 353 | ||
| 354 | if (unlikely(available < outstanding)) { | 354 | if (unlikely(available < outstanding)) { |
| 355 | dbg_budg("out of data space: available %lld, outstanding %lld", | 355 | dbg_budg("out of data space: available %lld, outstanding %lld", |
| @@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c) | |||
| 360 | if (available - outstanding <= c->rp_size && !can_use_rp(c)) | 360 | if (available - outstanding <= c->rp_size && !can_use_rp(c)) |
| 361 | return -ENOSPC; | 361 | return -ENOSPC; |
| 362 | 362 | ||
| 363 | c->min_idx_lebs = min_idx_lebs; | 363 | c->bi.min_idx_lebs = min_idx_lebs; |
| 364 | return 0; | 364 | return 0; |
| 365 | } | 365 | } |
| 366 | 366 | ||
| @@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c, | |||
| 393 | { | 393 | { |
| 394 | int data_growth; | 394 | int data_growth; |
| 395 | 395 | ||
| 396 | data_growth = req->new_ino ? c->inode_budget : 0; | 396 | data_growth = req->new_ino ? c->bi.inode_budget : 0; |
| 397 | if (req->new_page) | 397 | if (req->new_page) |
| 398 | data_growth += c->page_budget; | 398 | data_growth += c->bi.page_budget; |
| 399 | if (req->new_dent) | 399 | if (req->new_dent) |
| 400 | data_growth += c->dent_budget; | 400 | data_growth += c->bi.dent_budget; |
| 401 | data_growth += req->new_ino_d; | 401 | data_growth += req->new_ino_d; |
| 402 | return data_growth; | 402 | return data_growth; |
| 403 | } | 403 | } |
| @@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c, | |||
| 413 | { | 413 | { |
| 414 | int dd_growth; | 414 | int dd_growth; |
| 415 | 415 | ||
| 416 | dd_growth = req->dirtied_page ? c->page_budget : 0; | 416 | dd_growth = req->dirtied_page ? c->bi.page_budget : 0; |
| 417 | 417 | ||
| 418 | if (req->dirtied_ino) | 418 | if (req->dirtied_ino) |
| 419 | dd_growth += c->inode_budget << (req->dirtied_ino - 1); | 419 | dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); |
| 420 | if (req->mod_dent) | 420 | if (req->mod_dent) |
| 421 | dd_growth += c->dent_budget; | 421 | dd_growth += c->bi.dent_budget; |
| 422 | dd_growth += req->dirtied_ino_d; | 422 | dd_growth += req->dirtied_ino_d; |
| 423 | return dd_growth; | 423 | return dd_growth; |
| 424 | } | 424 | } |
| @@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) | |||
| 460 | 460 | ||
| 461 | again: | 461 | again: |
| 462 | spin_lock(&c->space_lock); | 462 | spin_lock(&c->space_lock); |
| 463 | ubifs_assert(c->budg_idx_growth >= 0); | 463 | ubifs_assert(c->bi.idx_growth >= 0); |
| 464 | ubifs_assert(c->budg_data_growth >= 0); | 464 | ubifs_assert(c->bi.data_growth >= 0); |
| 465 | ubifs_assert(c->budg_dd_growth >= 0); | 465 | ubifs_assert(c->bi.dd_growth >= 0); |
| 466 | 466 | ||
| 467 | if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { | 467 | if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { |
| 468 | dbg_budg("no space"); | 468 | dbg_budg("no space"); |
| 469 | spin_unlock(&c->space_lock); | 469 | spin_unlock(&c->space_lock); |
| 470 | return -ENOSPC; | 470 | return -ENOSPC; |
| 471 | } | 471 | } |
| 472 | 472 | ||
| 473 | c->budg_idx_growth += idx_growth; | 473 | c->bi.idx_growth += idx_growth; |
| 474 | c->budg_data_growth += data_growth; | 474 | c->bi.data_growth += data_growth; |
| 475 | c->budg_dd_growth += dd_growth; | 475 | c->bi.dd_growth += dd_growth; |
| 476 | 476 | ||
| 477 | err = do_budget_space(c); | 477 | err = do_budget_space(c); |
| 478 | if (likely(!err)) { | 478 | if (likely(!err)) { |
| @@ -484,9 +484,9 @@ again: | |||
| 484 | } | 484 | } |
| 485 | 485 | ||
| 486 | /* Restore the old values */ | 486 | /* Restore the old values */ |
| 487 | c->budg_idx_growth -= idx_growth; | 487 | c->bi.idx_growth -= idx_growth; |
| 488 | c->budg_data_growth -= data_growth; | 488 | c->bi.data_growth -= data_growth; |
| 489 | c->budg_dd_growth -= dd_growth; | 489 | c->bi.dd_growth -= dd_growth; |
| 490 | spin_unlock(&c->space_lock); | 490 | spin_unlock(&c->space_lock); |
| 491 | 491 | ||
| 492 | if (req->fast) { | 492 | if (req->fast) { |
| @@ -506,9 +506,9 @@ again: | |||
| 506 | goto again; | 506 | goto again; |
| 507 | } | 507 | } |
| 508 | dbg_budg("FS is full, -ENOSPC"); | 508 | dbg_budg("FS is full, -ENOSPC"); |
| 509 | c->nospace = 1; | 509 | c->bi.nospace = 1; |
| 510 | if (can_use_rp(c) || c->rp_size == 0) | 510 | if (can_use_rp(c) || c->rp_size == 0) |
| 511 | c->nospace_rp = 1; | 511 | c->bi.nospace_rp = 1; |
| 512 | smp_wmb(); | 512 | smp_wmb(); |
| 513 | } else | 513 | } else |
| 514 | ubifs_err("cannot budget space, error %d", err); | 514 | ubifs_err("cannot budget space, error %d", err); |
| @@ -523,8 +523,8 @@ again: | |||
| 523 | * This function releases the space budgeted by 'ubifs_budget_space()'. Note, | 523 | * This function releases the space budgeted by 'ubifs_budget_space()'. Note, |
| 524 | * since the index changes (which were budgeted for in @req->idx_growth) will | 524 | * since the index changes (which were budgeted for in @req->idx_growth) will |
| 525 | * only be written to the media on commit, this function moves the index budget | 525 | * only be written to the media on commit, this function moves the index budget |
| 526 | * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be | 526 | * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed |
| 527 | * zeroed by the commit operation. | 527 | * by the commit operation. |
| 528 | */ | 528 | */ |
| 529 | void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) | 529 | void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) |
| 530 | { | 530 | { |
| @@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) | |||
| 553 | if (!req->data_growth && !req->dd_growth) | 553 | if (!req->data_growth && !req->dd_growth) |
| 554 | return; | 554 | return; |
| 555 | 555 | ||
| 556 | c->nospace = c->nospace_rp = 0; | 556 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 557 | smp_wmb(); | 557 | smp_wmb(); |
| 558 | 558 | ||
| 559 | spin_lock(&c->space_lock); | 559 | spin_lock(&c->space_lock); |
| 560 | c->budg_idx_growth -= req->idx_growth; | 560 | c->bi.idx_growth -= req->idx_growth; |
| 561 | c->budg_uncommitted_idx += req->idx_growth; | 561 | c->bi.uncommitted_idx += req->idx_growth; |
| 562 | c->budg_data_growth -= req->data_growth; | 562 | c->bi.data_growth -= req->data_growth; |
| 563 | c->budg_dd_growth -= req->dd_growth; | 563 | c->bi.dd_growth -= req->dd_growth; |
| 564 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 564 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 565 | 565 | ||
| 566 | ubifs_assert(c->budg_idx_growth >= 0); | 566 | ubifs_assert(c->bi.idx_growth >= 0); |
| 567 | ubifs_assert(c->budg_data_growth >= 0); | 567 | ubifs_assert(c->bi.data_growth >= 0); |
| 568 | ubifs_assert(c->budg_dd_growth >= 0); | 568 | ubifs_assert(c->bi.dd_growth >= 0); |
| 569 | ubifs_assert(c->min_idx_lebs < c->main_lebs); | 569 | ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); |
| 570 | ubifs_assert(!(c->budg_idx_growth & 7)); | 570 | ubifs_assert(!(c->bi.idx_growth & 7)); |
| 571 | ubifs_assert(!(c->budg_data_growth & 7)); | 571 | ubifs_assert(!(c->bi.data_growth & 7)); |
| 572 | ubifs_assert(!(c->budg_dd_growth & 7)); | 572 | ubifs_assert(!(c->bi.dd_growth & 7)); |
| 573 | spin_unlock(&c->space_lock); | 573 | spin_unlock(&c->space_lock); |
| 574 | } | 574 | } |
| 575 | 575 | ||
| @@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c) | |||
| 586 | { | 586 | { |
| 587 | spin_lock(&c->space_lock); | 587 | spin_lock(&c->space_lock); |
| 588 | /* Release the index growth reservation */ | 588 | /* Release the index growth reservation */ |
| 589 | c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; | 589 | c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; |
| 590 | /* Release the data growth reservation */ | 590 | /* Release the data growth reservation */ |
| 591 | c->budg_data_growth -= c->page_budget; | 591 | c->bi.data_growth -= c->bi.page_budget; |
| 592 | /* Increase the dirty data growth reservation instead */ | 592 | /* Increase the dirty data growth reservation instead */ |
| 593 | c->budg_dd_growth += c->page_budget; | 593 | c->bi.dd_growth += c->bi.page_budget; |
| 594 | /* And re-calculate the indexing space reservation */ | 594 | /* And re-calculate the indexing space reservation */ |
| 595 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 595 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 596 | spin_unlock(&c->space_lock); | 596 | spin_unlock(&c->space_lock); |
| 597 | } | 597 | } |
| 598 | 598 | ||
| @@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
| 612 | 612 | ||
| 613 | memset(&req, 0, sizeof(struct ubifs_budget_req)); | 613 | memset(&req, 0, sizeof(struct ubifs_budget_req)); |
| 614 | /* The "no space" flags will be cleared because dd_growth is > 0 */ | 614 | /* The "no space" flags will be cleared because dd_growth is > 0 */ |
| 615 | req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); | 615 | req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); |
| 616 | ubifs_release_budget(c, &req); | 616 | ubifs_release_budget(c, &req); |
| 617 | } | 617 | } |
| 618 | 618 | ||
| @@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) | |||
| 682 | int rsvd_idx_lebs, lebs; | 682 | int rsvd_idx_lebs, lebs; |
| 683 | long long available, outstanding, free; | 683 | long long available, outstanding, free; |
| 684 | 684 | ||
| 685 | ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); | 685 | ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); |
| 686 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 686 | outstanding = c->bi.data_growth + c->bi.dd_growth; |
| 687 | available = ubifs_calc_available(c, c->min_idx_lebs); | 687 | available = ubifs_calc_available(c, c->bi.min_idx_lebs); |
| 688 | 688 | ||
| 689 | /* | 689 | /* |
| 690 | * When reporting free space to user-space, UBIFS guarantees that it is | 690 | * When reporting free space to user-space, UBIFS guarantees that it is |
| @@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) | |||
| 697 | * Note, the calculations below are similar to what we have in | 697 | * Note, the calculations below are similar to what we have in |
| 698 | * 'do_budget_space()', so refer there for comments. | 698 | * 'do_budget_space()', so refer there for comments. |
| 699 | */ | 699 | */ |
| 700 | if (c->min_idx_lebs > c->lst.idx_lebs) | 700 | if (c->bi.min_idx_lebs > c->lst.idx_lebs) |
| 701 | rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; | 701 | rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; |
| 702 | else | 702 | else |
| 703 | rsvd_idx_lebs = 0; | 703 | rsvd_idx_lebs = 0; |
| 704 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 704 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 1bd01ded712..87cd0ead863 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c | |||
| @@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c) | |||
| 182 | c->mst_node->root_len = cpu_to_le32(zroot.len); | 182 | c->mst_node->root_len = cpu_to_le32(zroot.len); |
| 183 | c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); | 183 | c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); |
| 184 | c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); | 184 | c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); |
| 185 | c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); | 185 | c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); |
| 186 | c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); | 186 | c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); |
| 187 | c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); | 187 | c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); |
| 188 | c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); | 188 | c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 004d3745dc4..0bb2bcef0de 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
| @@ -34,7 +34,6 @@ | |||
| 34 | #include <linux/moduleparam.h> | 34 | #include <linux/moduleparam.h> |
| 35 | #include <linux/debugfs.h> | 35 | #include <linux/debugfs.h> |
| 36 | #include <linux/math64.h> | 36 | #include <linux/math64.h> |
| 37 | #include <linux/slab.h> | ||
| 38 | 37 | ||
| 39 | #ifdef CONFIG_UBIFS_FS_DEBUG | 38 | #ifdef CONFIG_UBIFS_FS_DEBUG |
| 40 | 39 | ||
| @@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock); | |||
| 43 | static char dbg_key_buf0[128]; | 42 | static char dbg_key_buf0[128]; |
| 44 | static char dbg_key_buf1[128]; | 43 | static char dbg_key_buf1[128]; |
| 45 | 44 | ||
| 46 | unsigned int ubifs_msg_flags; | ||
| 47 | unsigned int ubifs_chk_flags; | 45 | unsigned int ubifs_chk_flags; |
| 48 | unsigned int ubifs_tst_flags; | 46 | unsigned int ubifs_tst_flags; |
| 49 | 47 | ||
| 50 | module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); | ||
| 51 | module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); | 48 | module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); |
| 52 | module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); | 49 | module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); |
| 53 | 50 | ||
| 54 | MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); | ||
| 55 | MODULE_PARM_DESC(debug_chks, "Debug check flags"); | 51 | MODULE_PARM_DESC(debug_chks, "Debug check flags"); |
| 56 | MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); | 52 | MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); |
| 57 | 53 | ||
| @@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
| 317 | printk(KERN_DEBUG "\tflags %#x\n", sup_flags); | 313 | printk(KERN_DEBUG "\tflags %#x\n", sup_flags); |
| 318 | printk(KERN_DEBUG "\t big_lpt %u\n", | 314 | printk(KERN_DEBUG "\t big_lpt %u\n", |
| 319 | !!(sup_flags & UBIFS_FLG_BIGLPT)); | 315 | !!(sup_flags & UBIFS_FLG_BIGLPT)); |
| 316 | printk(KERN_DEBUG "\t space_fixup %u\n", | ||
| 317 | !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); | ||
| 320 | printk(KERN_DEBUG "\tmin_io_size %u\n", | 318 | printk(KERN_DEBUG "\tmin_io_size %u\n", |
| 321 | le32_to_cpu(sup->min_io_size)); | 319 | le32_to_cpu(sup->min_io_size)); |
| 322 | printk(KERN_DEBUG "\tleb_size %u\n", | 320 | printk(KERN_DEBUG "\tleb_size %u\n", |
| @@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst) | |||
| 602 | spin_unlock(&dbg_lock); | 600 | spin_unlock(&dbg_lock); |
| 603 | } | 601 | } |
| 604 | 602 | ||
| 605 | void dbg_dump_budg(struct ubifs_info *c) | 603 | void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) |
| 606 | { | 604 | { |
| 607 | int i; | 605 | int i; |
| 608 | struct rb_node *rb; | 606 | struct rb_node *rb; |
| @@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c) | |||
| 610 | struct ubifs_gced_idx_leb *idx_gc; | 608 | struct ubifs_gced_idx_leb *idx_gc; |
| 611 | long long available, outstanding, free; | 609 | long long available, outstanding, free; |
| 612 | 610 | ||
| 613 | ubifs_assert(spin_is_locked(&c->space_lock)); | 611 | spin_lock(&c->space_lock); |
| 614 | spin_lock(&dbg_lock); | 612 | spin_lock(&dbg_lock); |
| 615 | printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " | 613 | printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " |
| 616 | "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, | 614 | "total budget sum %lld\n", current->pid, |
| 617 | c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); | 615 | bi->data_growth + bi->dd_growth, |
| 618 | printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " | 616 | bi->data_growth + bi->dd_growth + bi->idx_growth); |
| 619 | "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, | 617 | printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " |
| 620 | c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, | 618 | "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, |
| 621 | c->freeable_cnt); | 619 | bi->idx_growth); |
| 622 | printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " | 620 | printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " |
| 623 | "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, | 621 | "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, |
| 624 | c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); | 622 | bi->uncommitted_idx); |
| 623 | printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", | ||
| 624 | bi->page_budget, bi->inode_budget, bi->dent_budget); | ||
| 625 | printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", | ||
| 626 | bi->nospace, bi->nospace_rp); | ||
| 627 | printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", | ||
| 628 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); | ||
| 629 | |||
| 630 | if (bi != &c->bi) | ||
| 631 | /* | ||
| 632 | * If we are dumping saved budgeting data, do not print | ||
| 633 | * additional information which is about the current state, not | ||
| 634 | * the old one which corresponded to the saved budgeting data. | ||
| 635 | */ | ||
| 636 | goto out_unlock; | ||
| 637 | |||
| 638 | printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", | ||
| 639 | c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); | ||
| 625 | printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " | 640 | printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " |
| 626 | "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), | 641 | "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), |
| 627 | atomic_long_read(&c->dirty_zn_cnt), | 642 | atomic_long_read(&c->dirty_zn_cnt), |
| 628 | atomic_long_read(&c->clean_zn_cnt)); | 643 | atomic_long_read(&c->clean_zn_cnt)); |
| 629 | printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", | ||
| 630 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); | ||
| 631 | printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", | 644 | printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", |
| 632 | c->gc_lnum, c->ihead_lnum); | 645 | c->gc_lnum, c->ihead_lnum); |
| 646 | |||
| 633 | /* If we are in R/O mode, journal heads do not exist */ | 647 | /* If we are in R/O mode, journal heads do not exist */ |
| 634 | if (c->jheads) | 648 | if (c->jheads) |
| 635 | for (i = 0; i < c->jhead_cnt; i++) | 649 | for (i = 0; i < c->jhead_cnt; i++) |
| @@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c) | |||
| 648 | printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); | 662 | printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); |
| 649 | 663 | ||
| 650 | /* Print budgeting predictions */ | 664 | /* Print budgeting predictions */ |
| 651 | available = ubifs_calc_available(c, c->min_idx_lebs); | 665 | available = ubifs_calc_available(c, c->bi.min_idx_lebs); |
| 652 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 666 | outstanding = c->bi.data_growth + c->bi.dd_growth; |
| 653 | free = ubifs_get_free_space_nolock(c); | 667 | free = ubifs_get_free_space_nolock(c); |
| 654 | printk(KERN_DEBUG "Budgeting predictions:\n"); | 668 | printk(KERN_DEBUG "Budgeting predictions:\n"); |
| 655 | printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", | 669 | printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", |
| 656 | available, outstanding, free); | 670 | available, outstanding, free); |
| 671 | out_unlock: | ||
| 657 | spin_unlock(&dbg_lock); | 672 | spin_unlock(&dbg_lock); |
| 673 | spin_unlock(&c->space_lock); | ||
| 658 | } | 674 | } |
| 659 | 675 | ||
| 660 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | 676 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) |
| @@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | |||
| 729 | if (bud->lnum == lp->lnum) { | 745 | if (bud->lnum == lp->lnum) { |
| 730 | int head = 0; | 746 | int head = 0; |
| 731 | for (i = 0; i < c->jhead_cnt; i++) { | 747 | for (i = 0; i < c->jhead_cnt; i++) { |
| 732 | if (lp->lnum == c->jheads[i].wbuf.lnum) { | 748 | /* |
| 749 | * Note, if we are in R/O mode or in the middle | ||
| 750 | * of mounting/re-mounting, the write-buffers do | ||
| 751 | * not exist. | ||
| 752 | */ | ||
| 753 | if (c->jheads && | ||
| 754 | lp->lnum == c->jheads[i].wbuf.lnum) { | ||
| 733 | printk(KERN_CONT ", jhead %s", | 755 | printk(KERN_CONT ", jhead %s", |
| 734 | dbg_jhead(i)); | 756 | dbg_jhead(i)); |
| 735 | head = 1; | 757 | head = 1; |
| @@ -976,6 +998,8 @@ void dbg_save_space_info(struct ubifs_info *c) | |||
| 976 | 998 | ||
| 977 | spin_lock(&c->space_lock); | 999 | spin_lock(&c->space_lock); |
| 978 | memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); | 1000 | memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); |
| 1001 | memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); | ||
| 1002 | d->saved_idx_gc_cnt = c->idx_gc_cnt; | ||
| 979 | 1003 | ||
| 980 | /* | 1004 | /* |
| 981 | * We use a dirty hack here and zero out @c->freeable_cnt, because it | 1005 | * We use a dirty hack here and zero out @c->freeable_cnt, because it |
| @@ -1042,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c) | |||
| 1042 | out: | 1066 | out: |
| 1043 | ubifs_msg("saved lprops statistics dump"); | 1067 | ubifs_msg("saved lprops statistics dump"); |
| 1044 | dbg_dump_lstats(&d->saved_lst); | 1068 | dbg_dump_lstats(&d->saved_lst); |
| 1045 | ubifs_get_lp_stats(c, &lst); | 1069 | ubifs_msg("saved budgeting info dump"); |
| 1046 | 1070 | dbg_dump_budg(c, &d->saved_bi); | |
| 1071 | ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); | ||
| 1047 | ubifs_msg("current lprops statistics dump"); | 1072 | ubifs_msg("current lprops statistics dump"); |
| 1073 | ubifs_get_lp_stats(c, &lst); | ||
| 1048 | dbg_dump_lstats(&lst); | 1074 | dbg_dump_lstats(&lst); |
| 1049 | 1075 | ubifs_msg("current budgeting info dump"); | |
| 1050 | spin_lock(&c->space_lock); | 1076 | dbg_dump_budg(c, &c->bi); |
| 1051 | dbg_dump_budg(c); | ||
| 1052 | spin_unlock(&c->space_lock); | ||
| 1053 | dump_stack(); | 1077 | dump_stack(); |
| 1054 | return -EINVAL; | 1078 | return -EINVAL; |
| 1055 | } | 1079 | } |
| @@ -1793,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, | |||
| 1793 | struct rb_node **p, *parent = NULL; | 1817 | struct rb_node **p, *parent = NULL; |
| 1794 | struct fsck_inode *fscki; | 1818 | struct fsck_inode *fscki; |
| 1795 | ino_t inum = key_inum_flash(c, &ino->key); | 1819 | ino_t inum = key_inum_flash(c, &ino->key); |
| 1820 | struct inode *inode; | ||
| 1821 | struct ubifs_inode *ui; | ||
| 1796 | 1822 | ||
| 1797 | p = &fsckd->inodes.rb_node; | 1823 | p = &fsckd->inodes.rb_node; |
| 1798 | while (*p) { | 1824 | while (*p) { |
| @@ -1816,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, | |||
| 1816 | if (!fscki) | 1842 | if (!fscki) |
| 1817 | return ERR_PTR(-ENOMEM); | 1843 | return ERR_PTR(-ENOMEM); |
| 1818 | 1844 | ||
| 1845 | inode = ilookup(c->vfs_sb, inum); | ||
| 1846 | |||
| 1819 | fscki->inum = inum; | 1847 | fscki->inum = inum; |
| 1820 | fscki->nlink = le32_to_cpu(ino->nlink); | 1848 | /* |
| 1821 | fscki->size = le64_to_cpu(ino->size); | 1849 | * If the inode is present in the VFS inode cache, use it instead of |
| 1822 | fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); | 1850 | * the on-flash inode which might be out-of-date. E.g., the size might |
| 1823 | fscki->xattr_sz = le32_to_cpu(ino->xattr_size); | 1851 | * be out-of-date. If we do not do this, the following may happen, for |
| 1824 | fscki->xattr_nms = le32_to_cpu(ino->xattr_names); | 1852 | * example: |
| 1825 | fscki->mode = le32_to_cpu(ino->mode); | 1853 | * 1. A power cut happens |
| 1854 | * 2. We mount the file-system R/O, the replay process fixes up the | ||
| 1855 | * inode size in the VFS cache, but on on-flash. | ||
| 1856 | * 3. 'check_leaf()' fails because it hits a data node beyond inode | ||
| 1857 | * size. | ||
| 1858 | */ | ||
| 1859 | if (!inode) { | ||
| 1860 | fscki->nlink = le32_to_cpu(ino->nlink); | ||
| 1861 | fscki->size = le64_to_cpu(ino->size); | ||
| 1862 | fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); | ||
| 1863 | fscki->xattr_sz = le32_to_cpu(ino->xattr_size); | ||
| 1864 | fscki->xattr_nms = le32_to_cpu(ino->xattr_names); | ||
| 1865 | fscki->mode = le32_to_cpu(ino->mode); | ||
| 1866 | } else { | ||
| 1867 | ui = ubifs_inode(inode); | ||
| 1868 | fscki->nlink = inode->i_nlink; | ||
| 1869 | fscki->size = inode->i_size; | ||
| 1870 | fscki->xattr_cnt = ui->xattr_cnt; | ||
| 1871 | fscki->xattr_sz = ui->xattr_size; | ||
| 1872 | fscki->xattr_nms = ui->xattr_names; | ||
| 1873 | fscki->mode = inode->i_mode; | ||
| 1874 | iput(inode); | ||
| 1875 | } | ||
| 1876 | |||
| 1826 | if (S_ISDIR(fscki->mode)) { | 1877 | if (S_ISDIR(fscki->mode)) { |
| 1827 | fscki->calc_sz = UBIFS_INO_NODE_SZ; | 1878 | fscki->calc_sz = UBIFS_INO_NODE_SZ; |
| 1828 | fscki->calc_cnt = 2; | 1879 | fscki->calc_cnt = 2; |
| 1829 | } | 1880 | } |
| 1881 | |||
| 1830 | rb_link_node(&fscki->rb, parent, p); | 1882 | rb_link_node(&fscki->rb, parent, p); |
| 1831 | rb_insert_color(&fscki->rb, &fsckd->inodes); | 1883 | rb_insert_color(&fscki->rb, &fsckd->inodes); |
| 1884 | |||
| 1832 | return fscki; | 1885 | return fscki; |
| 1833 | } | 1886 | } |
| 1834 | 1887 | ||
| @@ -2421,7 +2474,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) | |||
| 2421 | hashb = key_block(c, &sb->key); | 2474 | hashb = key_block(c, &sb->key); |
| 2422 | 2475 | ||
| 2423 | if (hasha > hashb) { | 2476 | if (hasha > hashb) { |
| 2424 | ubifs_err("larger hash %u goes before %u", hasha, hashb); | 2477 | ubifs_err("larger hash %u goes before %u", |
| 2478 | hasha, hashb); | ||
| 2425 | goto error_dump; | 2479 | goto error_dump; |
| 2426 | } | 2480 | } |
| 2427 | } | 2481 | } |
| @@ -2437,14 +2491,12 @@ error_dump: | |||
| 2437 | return 0; | 2491 | return 0; |
| 2438 | } | 2492 | } |
| 2439 | 2493 | ||
| 2440 | static int invocation_cnt; | ||
| 2441 | |||
| 2442 | int dbg_force_in_the_gaps(void) | 2494 | int dbg_force_in_the_gaps(void) |
| 2443 | { | 2495 | { |
| 2444 | if (!dbg_force_in_the_gaps_enabled) | 2496 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) |
| 2445 | return 0; | 2497 | return 0; |
| 2446 | /* Force in-the-gaps every 8th commit */ | 2498 | |
| 2447 | return !((invocation_cnt++) & 0x7); | 2499 | return !(random32() & 7); |
| 2448 | } | 2500 | } |
| 2449 | 2501 | ||
| 2450 | /* Failure mode for recovery testing */ | 2502 | /* Failure mode for recovery testing */ |
| @@ -2632,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, | |||
| 2632 | int len, int check) | 2684 | int len, int check) |
| 2633 | { | 2685 | { |
| 2634 | if (in_failure_mode(desc)) | 2686 | if (in_failure_mode(desc)) |
| 2635 | return -EIO; | 2687 | return -EROFS; |
| 2636 | return ubi_leb_read(desc, lnum, buf, offset, len, check); | 2688 | return ubi_leb_read(desc, lnum, buf, offset, len, check); |
| 2637 | } | 2689 | } |
| 2638 | 2690 | ||
| @@ -2642,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, | |||
| 2642 | int err, failing; | 2694 | int err, failing; |
| 2643 | 2695 | ||
| 2644 | if (in_failure_mode(desc)) | 2696 | if (in_failure_mode(desc)) |
| 2645 | return -EIO; | 2697 | return -EROFS; |
| 2646 | failing = do_fail(desc, lnum, 1); | 2698 | failing = do_fail(desc, lnum, 1); |
| 2647 | if (failing) | 2699 | if (failing) |
| 2648 | cut_data(buf, len); | 2700 | cut_data(buf, len); |
| @@ -2650,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, | |||
| 2650 | if (err) | 2702 | if (err) |
| 2651 | return err; | 2703 | return err; |
| 2652 | if (failing) | 2704 | if (failing) |
| 2653 | return -EIO; | 2705 | return -EROFS; |
| 2654 | return 0; | 2706 | return 0; |
| 2655 | } | 2707 | } |
| 2656 | 2708 | ||
| @@ -2660,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, | |||
| 2660 | int err; | 2712 | int err; |
| 2661 | 2713 | ||
| 2662 | if (do_fail(desc, lnum, 1)) | 2714 | if (do_fail(desc, lnum, 1)) |
| 2663 | return -EIO; | 2715 | return -EROFS; |
| 2664 | err = ubi_leb_change(desc, lnum, buf, len, dtype); | 2716 | err = ubi_leb_change(desc, lnum, buf, len, dtype); |
| 2665 | if (err) | 2717 | if (err) |
| 2666 | return err; | 2718 | return err; |
| 2667 | if (do_fail(desc, lnum, 1)) | 2719 | if (do_fail(desc, lnum, 1)) |
| 2668 | return -EIO; | 2720 | return -EROFS; |
| 2669 | return 0; | 2721 | return 0; |
| 2670 | } | 2722 | } |
| 2671 | 2723 | ||
| @@ -2674,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) | |||
| 2674 | int err; | 2726 | int err; |
| 2675 | 2727 | ||
| 2676 | if (do_fail(desc, lnum, 0)) | 2728 | if (do_fail(desc, lnum, 0)) |
| 2677 | return -EIO; | 2729 | return -EROFS; |
| 2678 | err = ubi_leb_erase(desc, lnum); | 2730 | err = ubi_leb_erase(desc, lnum); |
| 2679 | if (err) | 2731 | if (err) |
| 2680 | return err; | 2732 | return err; |
| 2681 | if (do_fail(desc, lnum, 0)) | 2733 | if (do_fail(desc, lnum, 0)) |
| 2682 | return -EIO; | 2734 | return -EROFS; |
| 2683 | return 0; | 2735 | return 0; |
| 2684 | } | 2736 | } |
| 2685 | 2737 | ||
| @@ -2688,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) | |||
| 2688 | int err; | 2740 | int err; |
| 2689 | 2741 | ||
| 2690 | if (do_fail(desc, lnum, 0)) | 2742 | if (do_fail(desc, lnum, 0)) |
| 2691 | return -EIO; | 2743 | return -EROFS; |
| 2692 | err = ubi_leb_unmap(desc, lnum); | 2744 | err = ubi_leb_unmap(desc, lnum); |
| 2693 | if (err) | 2745 | if (err) |
| 2694 | return err; | 2746 | return err; |
| 2695 | if (do_fail(desc, lnum, 0)) | 2747 | if (do_fail(desc, lnum, 0)) |
| 2696 | return -EIO; | 2748 | return -EROFS; |
| 2697 | return 0; | 2749 | return 0; |
| 2698 | } | 2750 | } |
| 2699 | 2751 | ||
| 2700 | int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) | 2752 | int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) |
| 2701 | { | 2753 | { |
| 2702 | if (in_failure_mode(desc)) | 2754 | if (in_failure_mode(desc)) |
| 2703 | return -EIO; | 2755 | return -EROFS; |
| 2704 | return ubi_is_mapped(desc, lnum); | 2756 | return ubi_is_mapped(desc, lnum); |
| 2705 | } | 2757 | } |
| 2706 | 2758 | ||
| @@ -2709,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) | |||
| 2709 | int err; | 2761 | int err; |
| 2710 | 2762 | ||
| 2711 | if (do_fail(desc, lnum, 0)) | 2763 | if (do_fail(desc, lnum, 0)) |
| 2712 | return -EIO; | 2764 | return -EROFS; |
| 2713 | err = ubi_leb_map(desc, lnum, dtype); | 2765 | err = ubi_leb_map(desc, lnum, dtype); |
| 2714 | if (err) | 2766 | if (err) |
| 2715 | return err; | 2767 | return err; |
| 2716 | if (do_fail(desc, lnum, 0)) | 2768 | if (do_fail(desc, lnum, 0)) |
| 2717 | return -EIO; | 2769 | return -EROFS; |
| 2718 | return 0; | 2770 | return 0; |
| 2719 | } | 2771 | } |
| 2720 | 2772 | ||
| @@ -2784,7 +2836,7 @@ void dbg_debugfs_exit(void) | |||
| 2784 | static int open_debugfs_file(struct inode *inode, struct file *file) | 2836 | static int open_debugfs_file(struct inode *inode, struct file *file) |
| 2785 | { | 2837 | { |
| 2786 | file->private_data = inode->i_private; | 2838 | file->private_data = inode->i_private; |
| 2787 | return 0; | 2839 | return nonseekable_open(inode, file); |
| 2788 | } | 2840 | } |
| 2789 | 2841 | ||
| 2790 | static ssize_t write_debugfs_file(struct file *file, const char __user *buf, | 2842 | static ssize_t write_debugfs_file(struct file *file, const char __user *buf, |
| @@ -2795,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, | |||
| 2795 | 2847 | ||
| 2796 | if (file->f_path.dentry == d->dfs_dump_lprops) | 2848 | if (file->f_path.dentry == d->dfs_dump_lprops) |
| 2797 | dbg_dump_lprops(c); | 2849 | dbg_dump_lprops(c); |
| 2798 | else if (file->f_path.dentry == d->dfs_dump_budg) { | 2850 | else if (file->f_path.dentry == d->dfs_dump_budg) |
| 2799 | spin_lock(&c->space_lock); | 2851 | dbg_dump_budg(c, &c->bi); |
| 2800 | dbg_dump_budg(c); | 2852 | else if (file->f_path.dentry == d->dfs_dump_tnc) { |
| 2801 | spin_unlock(&c->space_lock); | ||
| 2802 | } else if (file->f_path.dentry == d->dfs_dump_tnc) { | ||
| 2803 | mutex_lock(&c->tnc_mutex); | 2853 | mutex_lock(&c->tnc_mutex); |
| 2804 | dbg_dump_tnc(c); | 2854 | dbg_dump_tnc(c); |
| 2805 | mutex_unlock(&c->tnc_mutex); | 2855 | mutex_unlock(&c->tnc_mutex); |
| 2806 | } else | 2856 | } else |
| 2807 | return -EINVAL; | 2857 | return -EINVAL; |
| 2808 | 2858 | ||
| 2809 | *ppos += count; | ||
| 2810 | return count; | 2859 | return count; |
| 2811 | } | 2860 | } |
| 2812 | 2861 | ||
| @@ -2814,7 +2863,7 @@ static const struct file_operations dfs_fops = { | |||
| 2814 | .open = open_debugfs_file, | 2863 | .open = open_debugfs_file, |
| 2815 | .write = write_debugfs_file, | 2864 | .write = write_debugfs_file, |
| 2816 | .owner = THIS_MODULE, | 2865 | .owner = THIS_MODULE, |
| 2817 | .llseek = default_llseek, | 2866 | .llseek = no_llseek, |
| 2818 | }; | 2867 | }; |
| 2819 | 2868 | ||
| 2820 | /** | 2869 | /** |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index e6493cac193..a811ac4a26b 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
| @@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, | |||
| 31 | 31 | ||
| 32 | #ifdef CONFIG_UBIFS_FS_DEBUG | 32 | #ifdef CONFIG_UBIFS_FS_DEBUG |
| 33 | 33 | ||
| 34 | #include <linux/random.h> | ||
| 35 | |||
| 34 | /** | 36 | /** |
| 35 | * ubifs_debug_info - per-FS debugging information. | 37 | * ubifs_debug_info - per-FS debugging information. |
| 36 | * @old_zroot: old index root - used by 'dbg_check_old_index()' | 38 | * @old_zroot: old index root - used by 'dbg_check_old_index()' |
| @@ -50,13 +52,15 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, | |||
| 50 | * @new_ihead_offs: used by debugging to check @c->ihead_offs | 52 | * @new_ihead_offs: used by debugging to check @c->ihead_offs |
| 51 | * | 53 | * |
| 52 | * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') | 54 | * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') |
| 53 | * @saved_free: saved free space (used by 'dbg_save_space_info()') | 55 | * @saved_bi: saved budgeting information |
| 56 | * @saved_free: saved amount of free space | ||
| 57 | * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt | ||
| 54 | * | 58 | * |
| 55 | * dfs_dir_name: name of debugfs directory containing this file-system's files | 59 | * @dfs_dir_name: name of debugfs directory containing this file-system's files |
| 56 | * dfs_dir: direntry object of the file-system debugfs directory | 60 | * @dfs_dir: direntry object of the file-system debugfs directory |
| 57 | * dfs_dump_lprops: "dump lprops" debugfs knob | 61 | * @dfs_dump_lprops: "dump lprops" debugfs knob |
| 58 | * dfs_dump_budg: "dump budgeting information" debugfs knob | 62 | * @dfs_dump_budg: "dump budgeting information" debugfs knob |
| 59 | * dfs_dump_tnc: "dump TNC" debugfs knob | 63 | * @dfs_dump_tnc: "dump TNC" debugfs knob |
| 60 | */ | 64 | */ |
| 61 | struct ubifs_debug_info { | 65 | struct ubifs_debug_info { |
| 62 | struct ubifs_zbranch old_zroot; | 66 | struct ubifs_zbranch old_zroot; |
| @@ -76,7 +80,9 @@ struct ubifs_debug_info { | |||
| 76 | int new_ihead_offs; | 80 | int new_ihead_offs; |
| 77 | 81 | ||
| 78 | struct ubifs_lp_stats saved_lst; | 82 | struct ubifs_lp_stats saved_lst; |
| 83 | struct ubifs_budg_info saved_bi; | ||
| 79 | long long saved_free; | 84 | long long saved_free; |
| 85 | int saved_idx_gc_cnt; | ||
| 80 | 86 | ||
| 81 | char dfs_dir_name[100]; | 87 | char dfs_dir_name[100]; |
| 82 | struct dentry *dfs_dir; | 88 | struct dentry *dfs_dir; |
| @@ -101,23 +107,7 @@ struct ubifs_debug_info { | |||
| 101 | } \ | 107 | } \ |
| 102 | } while (0) | 108 | } while (0) |
| 103 | 109 | ||
| 104 | #define dbg_dump_stack() do { \ | 110 | #define dbg_dump_stack() dump_stack() |
| 105 | if (!dbg_failure_mode) \ | ||
| 106 | dump_stack(); \ | ||
| 107 | } while (0) | ||
| 108 | |||
| 109 | /* Generic debugging messages */ | ||
| 110 | #define dbg_msg(fmt, ...) do { \ | ||
| 111 | spin_lock(&dbg_lock); \ | ||
| 112 | printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ | ||
| 113 | __func__, ##__VA_ARGS__); \ | ||
| 114 | spin_unlock(&dbg_lock); \ | ||
| 115 | } while (0) | ||
| 116 | |||
| 117 | #define dbg_do_msg(typ, fmt, ...) do { \ | ||
| 118 | if (ubifs_msg_flags & typ) \ | ||
| 119 | dbg_msg(fmt, ##__VA_ARGS__); \ | ||
| 120 | } while (0) | ||
| 121 | 111 | ||
| 122 | #define dbg_err(fmt, ...) do { \ | 112 | #define dbg_err(fmt, ...) do { \ |
| 123 | spin_lock(&dbg_lock); \ | 113 | spin_lock(&dbg_lock); \ |
| @@ -137,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c, | |||
| 137 | #define DBGKEY(key) dbg_key_str0(c, (key)) | 127 | #define DBGKEY(key) dbg_key_str0(c, (key)) |
| 138 | #define DBGKEY1(key) dbg_key_str1(c, (key)) | 128 | #define DBGKEY1(key) dbg_key_str1(c, (key)) |
| 139 | 129 | ||
| 140 | /* General messages */ | 130 | #define ubifs_dbg_msg(type, fmt, ...) do { \ |
| 141 | #define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) | 131 | spin_lock(&dbg_lock); \ |
| 132 | pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ | ||
| 133 | spin_unlock(&dbg_lock); \ | ||
| 134 | } while (0) | ||
| 142 | 135 | ||
| 136 | /* Just a debugging messages not related to any specific UBIFS subsystem */ | ||
| 137 | #define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) | ||
| 138 | /* General messages */ | ||
| 139 | #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) | ||
| 143 | /* Additional journal messages */ | 140 | /* Additional journal messages */ |
| 144 | #define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) | 141 | #define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) |
| 145 | |||
| 146 | /* Additional TNC messages */ | 142 | /* Additional TNC messages */ |
| 147 | #define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) | 143 | #define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) |
| 148 | |||
| 149 | /* Additional lprops messages */ | 144 | /* Additional lprops messages */ |
| 150 | #define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) | 145 | #define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) |
| 151 | |||
| 152 | /* Additional LEB find messages */ | 146 | /* Additional LEB find messages */ |
| 153 | #define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) | 147 | #define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) |
| 154 | |||
| 155 | /* Additional mount messages */ | 148 | /* Additional mount messages */ |
| 156 | #define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) | 149 | #define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) |
| 157 | |||
| 158 | /* Additional I/O messages */ | 150 | /* Additional I/O messages */ |
| 159 | #define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) | 151 | #define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) |
| 160 | |||
| 161 | /* Additional commit messages */ | 152 | /* Additional commit messages */ |
| 162 | #define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) | 153 | #define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) |
| 163 | |||
| 164 | /* Additional budgeting messages */ | 154 | /* Additional budgeting messages */ |
| 165 | #define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) | 155 | #define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) |
| 166 | |||
| 167 | /* Additional log messages */ | 156 | /* Additional log messages */ |
| 168 | #define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) | 157 | #define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) |
| 169 | |||
| 170 | /* Additional gc messages */ | 158 | /* Additional gc messages */ |
| 171 | #define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) | 159 | #define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) |
| 172 | |||
| 173 | /* Additional scan messages */ | 160 | /* Additional scan messages */ |
| 174 | #define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) | 161 | #define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) |
| 175 | |||
| 176 | /* Additional recovery messages */ | 162 | /* Additional recovery messages */ |
| 177 | #define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) | 163 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) |
| 178 | |||
| 179 | /* | ||
| 180 | * Debugging message type flags. | ||
| 181 | * | ||
| 182 | * UBIFS_MSG_GEN: general messages | ||
| 183 | * UBIFS_MSG_JNL: journal messages | ||
| 184 | * UBIFS_MSG_MNT: mount messages | ||
| 185 | * UBIFS_MSG_CMT: commit messages | ||
| 186 | * UBIFS_MSG_FIND: LEB find messages | ||
| 187 | * UBIFS_MSG_BUDG: budgeting messages | ||
| 188 | * UBIFS_MSG_GC: garbage collection messages | ||
| 189 | * UBIFS_MSG_TNC: TNC messages | ||
| 190 | * UBIFS_MSG_LP: lprops messages | ||
| 191 | * UBIFS_MSG_IO: I/O messages | ||
| 192 | * UBIFS_MSG_LOG: log messages | ||
| 193 | * UBIFS_MSG_SCAN: scan messages | ||
| 194 | * UBIFS_MSG_RCVRY: recovery messages | ||
| 195 | */ | ||
| 196 | enum { | ||
| 197 | UBIFS_MSG_GEN = 0x1, | ||
| 198 | UBIFS_MSG_JNL = 0x2, | ||
| 199 | UBIFS_MSG_MNT = 0x4, | ||
| 200 | UBIFS_MSG_CMT = 0x8, | ||
| 201 | UBIFS_MSG_FIND = 0x10, | ||
| 202 | UBIFS_MSG_BUDG = 0x20, | ||
| 203 | UBIFS_MSG_GC = 0x40, | ||
| 204 | UBIFS_MSG_TNC = 0x80, | ||
| 205 | UBIFS_MSG_LP = 0x100, | ||
| 206 | UBIFS_MSG_IO = 0x200, | ||
| 207 | UBIFS_MSG_LOG = 0x400, | ||
| 208 | UBIFS_MSG_SCAN = 0x800, | ||
| 209 | UBIFS_MSG_RCVRY = 0x1000, | ||
| 210 | }; | ||
| 211 | 164 | ||
| 212 | /* | 165 | /* |
| 213 | * Debugging check flags. | 166 | * Debugging check flags. |
| @@ -233,11 +186,9 @@ enum { | |||
| 233 | /* | 186 | /* |
| 234 | * Special testing flags. | 187 | * Special testing flags. |
| 235 | * | 188 | * |
| 236 | * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method | ||
| 237 | * UBIFS_TST_RCVRY: failure mode for recovery testing | 189 | * UBIFS_TST_RCVRY: failure mode for recovery testing |
| 238 | */ | 190 | */ |
| 239 | enum { | 191 | enum { |
| 240 | UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, | ||
| 241 | UBIFS_TST_RCVRY = 0x4, | 192 | UBIFS_TST_RCVRY = 0x4, |
| 242 | }; | 193 | }; |
| 243 | 194 | ||
| @@ -262,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, | |||
| 262 | int offs); | 213 | int offs); |
| 263 | void dbg_dump_budget_req(const struct ubifs_budget_req *req); | 214 | void dbg_dump_budget_req(const struct ubifs_budget_req *req); |
| 264 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst); | 215 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst); |
| 265 | void dbg_dump_budg(struct ubifs_info *c); | 216 | void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); |
| 266 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); | 217 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); |
| 267 | void dbg_dump_lprops(struct ubifs_info *c); | 218 | void dbg_dump_lprops(struct ubifs_info *c); |
| 268 | void dbg_dump_lpt_info(struct ubifs_info *c); | 219 | void dbg_dump_lpt_info(struct ubifs_info *c); |
| @@ -304,18 +255,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); | |||
| 304 | int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); | 255 | int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); |
| 305 | 256 | ||
| 306 | /* Force the use of in-the-gaps method for testing */ | 257 | /* Force the use of in-the-gaps method for testing */ |
| 307 | 258 | static inline int dbg_force_in_the_gaps_enabled(void) | |
| 308 | #define dbg_force_in_the_gaps_enabled \ | 259 | { |
| 309 | (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) | 260 | return ubifs_chk_flags & UBIFS_CHK_GEN; |
| 310 | 261 | } | |
| 311 | int dbg_force_in_the_gaps(void); | 262 | int dbg_force_in_the_gaps(void); |
| 312 | 263 | ||
| 313 | /* Failure mode for recovery testing */ | 264 | /* Failure mode for recovery testing */ |
| 314 | |||
| 315 | #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) | 265 | #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) |
| 316 | 266 | ||
| 317 | #ifndef UBIFS_DBG_PRESERVE_UBI | 267 | #ifndef UBIFS_DBG_PRESERVE_UBI |
| 318 | |||
| 319 | #define ubi_leb_read dbg_leb_read | 268 | #define ubi_leb_read dbg_leb_read |
| 320 | #define ubi_leb_write dbg_leb_write | 269 | #define ubi_leb_write dbg_leb_write |
| 321 | #define ubi_leb_change dbg_leb_change | 270 | #define ubi_leb_change dbg_leb_change |
| @@ -323,7 +272,6 @@ int dbg_force_in_the_gaps(void); | |||
| 323 | #define ubi_leb_unmap dbg_leb_unmap | 272 | #define ubi_leb_unmap dbg_leb_unmap |
| 324 | #define ubi_is_mapped dbg_is_mapped | 273 | #define ubi_is_mapped dbg_is_mapped |
| 325 | #define ubi_leb_map dbg_leb_map | 274 | #define ubi_leb_map dbg_leb_map |
| 326 | |||
| 327 | #endif | 275 | #endif |
| 328 | 276 | ||
| 329 | int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, | 277 | int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, |
| @@ -370,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); | |||
| 370 | __func__, __LINE__, current->pid); \ | 318 | __func__, __LINE__, current->pid); \ |
| 371 | } while (0) | 319 | } while (0) |
| 372 | 320 | ||
| 373 | #define dbg_err(fmt, ...) do { \ | 321 | #define dbg_err(fmt, ...) do { \ |
| 374 | if (0) \ | 322 | if (0) \ |
| 375 | ubifs_err(fmt, ##__VA_ARGS__); \ | 323 | ubifs_err(fmt, ##__VA_ARGS__); \ |
| 376 | } while (0) | 324 | } while (0) |
| 377 | 325 | ||
| 378 | #define dbg_msg(fmt, ...) do { \ | 326 | #define ubifs_dbg_msg(fmt, ...) do { \ |
| 379 | if (0) \ | 327 | if (0) \ |
| 380 | printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ | 328 | pr_debug(fmt "\n", ##__VA_ARGS__); \ |
| 381 | current->pid, __func__, ##__VA_ARGS__); \ | ||
| 382 | } while (0) | 329 | } while (0) |
| 383 | 330 | ||
| 384 | #define dbg_dump_stack() | 331 | #define dbg_dump_stack() |
| 385 | #define ubifs_assert_cmt_locked(c) | 332 | #define ubifs_assert_cmt_locked(c) |
| 386 | 333 | ||
| 387 | #define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 334 | #define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 388 | #define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 335 | #define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 389 | #define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 336 | #define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 390 | #define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 337 | #define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 391 | #define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 338 | #define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 392 | #define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 339 | #define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 393 | #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 340 | #define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 394 | #define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 341 | #define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 395 | #define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 342 | #define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 396 | #define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 343 | #define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 397 | #define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 344 | #define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 398 | #define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 345 | #define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 399 | #define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 346 | #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 347 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | ||
| 400 | 348 | ||
| 401 | #define DBGKEY(key) ((char *)(key)) | 349 | #define DBGKEY(key) ((char *)(key)) |
| 402 | #define DBGKEY1(key) ((char *)(key)) | 350 | #define DBGKEY1(key) ((char *)(key)) |
| @@ -420,7 +368,9 @@ static inline void | |||
| 420 | dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } | 368 | dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } |
| 421 | static inline void | 369 | static inline void |
| 422 | dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } | 370 | dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } |
| 423 | static inline void dbg_dump_budg(struct ubifs_info *c) { return; } | 371 | static inline void |
| 372 | dbg_dump_budg(struct ubifs_info *c, | ||
| 373 | const struct ubifs_budg_info *bi) { return; } | ||
| 424 | static inline void dbg_dump_lprop(const struct ubifs_info *c, | 374 | static inline void dbg_dump_lprop(const struct ubifs_info *c, |
| 425 | const struct ubifs_lprops *lp) { return; } | 375 | const struct ubifs_lprops *lp) { return; } |
| 426 | static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } | 376 | static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } |
| @@ -482,8 +432,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c, | |||
| 482 | struct list_head *head) { return 0; } | 432 | struct list_head *head) { return 0; } |
| 483 | 433 | ||
| 484 | static inline int dbg_force_in_the_gaps(void) { return 0; } | 434 | static inline int dbg_force_in_the_gaps(void) { return 0; } |
| 485 | #define dbg_force_in_the_gaps_enabled 0 | 435 | #define dbg_force_in_the_gaps_enabled() 0 |
| 486 | #define dbg_failure_mode 0 | 436 | #define dbg_failure_mode 0 |
| 487 | 437 | ||
| 488 | static inline int dbg_debugfs_init(void) { return 0; } | 438 | static inline int dbg_debugfs_init(void) { return 0; } |
| 489 | static inline void dbg_debugfs_exit(void) { return; } | 439 | static inline void dbg_debugfs_exit(void) { return; } |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 7217d67a80a..ef5abd38f0b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
| @@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 603 | ubifs_release_budget(c, &req); | 603 | ubifs_release_budget(c, &req); |
| 604 | else { | 604 | else { |
| 605 | /* We've deleted something - clean the "no space" flags */ | 605 | /* We've deleted something - clean the "no space" flags */ |
| 606 | c->nospace = c->nospace_rp = 0; | 606 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 607 | smp_wmb(); | 607 | smp_wmb(); |
| 608 | } | 608 | } |
| 609 | return 0; | 609 | return 0; |
| @@ -693,7 +693,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 693 | ubifs_release_budget(c, &req); | 693 | ubifs_release_budget(c, &req); |
| 694 | else { | 694 | else { |
| 695 | /* We've deleted something - clean the "no space" flags */ | 695 | /* We've deleted something - clean the "no space" flags */ |
| 696 | c->nospace = c->nospace_rp = 0; | 696 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 697 | smp_wmb(); | 697 | smp_wmb(); |
| 698 | } | 698 | } |
| 699 | return 0; | 699 | return 0; |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b286db79c68..5e7fccfc4b2 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
| @@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c) | |||
| 212 | */ | 212 | */ |
| 213 | static void release_existing_page_budget(struct ubifs_info *c) | 213 | static void release_existing_page_budget(struct ubifs_info *c) |
| 214 | { | 214 | { |
| 215 | struct ubifs_budget_req req = { .dd_growth = c->page_budget}; | 215 | struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; |
| 216 | 216 | ||
| 217 | ubifs_release_budget(c, &req); | 217 | ubifs_release_budget(c, &req); |
| 218 | } | 218 | } |
| @@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len) | |||
| 971 | * the page locked, and it locks @ui_mutex. However, write-back does take inode | 971 | * the page locked, and it locks @ui_mutex. However, write-back does take inode |
| 972 | * @i_mutex, which means other VFS operations may be run on this inode at the | 972 | * @i_mutex, which means other VFS operations may be run on this inode at the |
| 973 | * same time. And the problematic one is truncation to smaller size, from where | 973 | * same time. And the problematic one is truncation to smaller size, from where |
| 974 | * we have to call 'truncate_setsize()', which first changes @inode->i_size, then | 974 | * we have to call 'truncate_setsize()', which first changes @inode->i_size, |
| 975 | * drops the truncated pages. And while dropping the pages, it takes the page | 975 | * then drops the truncated pages. And while dropping the pages, it takes the |
| 976 | * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with | 976 | * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' |
| 977 | * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This | 977 | * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. |
| 978 | * means that @inode->i_size is changed while @ui_mutex is unlocked. | 978 | * This means that @inode->i_size is changed while @ui_mutex is unlocked. |
| 979 | * | 979 | * |
| 980 | * XXX(truncate): with the new truncate sequence this is not true anymore, | 980 | * XXX(truncate): with the new truncate sequence this is not true anymore, |
| 981 | * and the calls to truncate_setsize can be move around freely. They should | 981 | * and the calls to truncate_setsize can be move around freely. They should |
| @@ -1189,7 +1189,7 @@ out_budg: | |||
| 1189 | if (budgeted) | 1189 | if (budgeted) |
| 1190 | ubifs_release_budget(c, &req); | 1190 | ubifs_release_budget(c, &req); |
| 1191 | else { | 1191 | else { |
| 1192 | c->nospace = c->nospace_rp = 0; | 1192 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 1193 | smp_wmb(); | 1193 | smp_wmb(); |
| 1194 | } | 1194 | } |
| 1195 | return err; | 1195 | return err; |
| @@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync) | |||
| 1312 | 1312 | ||
| 1313 | dbg_gen("syncing inode %lu", inode->i_ino); | 1313 | dbg_gen("syncing inode %lu", inode->i_ino); |
| 1314 | 1314 | ||
| 1315 | if (inode->i_sb->s_flags & MS_RDONLY) | 1315 | if (c->ro_mount) |
| 1316 | /* | ||
| 1317 | * For some really strange reasons VFS does not filter out | ||
| 1318 | * 'fsync()' for R/O mounted file-systems as per 2.6.39. | ||
| 1319 | */ | ||
| 1316 | return 0; | 1320 | return 0; |
| 1317 | 1321 | ||
| 1318 | /* | 1322 | /* |
| @@ -1432,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) | |||
| 1432 | } | 1436 | } |
| 1433 | 1437 | ||
| 1434 | /* | 1438 | /* |
| 1435 | * mmap()d file has taken write protection fault and is being made | 1439 | * mmap()d file has taken write protection fault and is being made writable. |
| 1436 | * writable. UBIFS must ensure page is budgeted for. | 1440 | * UBIFS must ensure page is budgeted for. |
| 1437 | */ | 1441 | */ |
| 1438 | static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 1442 | static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, |
| 1443 | struct vm_fault *vmf) | ||
| 1439 | { | 1444 | { |
| 1440 | struct page *page = vmf->page; | 1445 | struct page *page = vmf->page; |
| 1441 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 1446 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
| @@ -1536,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 1536 | { | 1541 | { |
| 1537 | int err; | 1542 | int err; |
| 1538 | 1543 | ||
| 1539 | /* 'generic_file_mmap()' takes care of NOMMU case */ | ||
| 1540 | err = generic_file_mmap(file, vma); | 1544 | err = generic_file_mmap(file, vma); |
| 1541 | if (err) | 1545 | if (err) |
| 1542 | return err; | 1546 | return err; |
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 1d54383d126..2559d174e00 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
| @@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
| 252 | * But if the index takes fewer LEBs than it is reserved for it, | 252 | * But if the index takes fewer LEBs than it is reserved for it, |
| 253 | * this function must avoid picking those reserved LEBs. | 253 | * this function must avoid picking those reserved LEBs. |
| 254 | */ | 254 | */ |
| 255 | if (c->min_idx_lebs >= c->lst.idx_lebs) { | 255 | if (c->bi.min_idx_lebs >= c->lst.idx_lebs) { |
| 256 | rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; | 256 | rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; |
| 257 | exclude_index = 1; | 257 | exclude_index = 1; |
| 258 | } | 258 | } |
| 259 | spin_unlock(&c->space_lock); | 259 | spin_unlock(&c->space_lock); |
| @@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
| 276 | pick_free = 0; | 276 | pick_free = 0; |
| 277 | } else { | 277 | } else { |
| 278 | spin_lock(&c->space_lock); | 278 | spin_lock(&c->space_lock); |
| 279 | exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); | 279 | exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs); |
| 280 | spin_unlock(&c->space_lock); | 280 | spin_unlock(&c->space_lock); |
| 281 | } | 281 | } |
| 282 | 282 | ||
| @@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, | |||
| 501 | 501 | ||
| 502 | /* Check if there are enough empty LEBs for commit */ | 502 | /* Check if there are enough empty LEBs for commit */ |
| 503 | spin_lock(&c->space_lock); | 503 | spin_lock(&c->space_lock); |
| 504 | if (c->min_idx_lebs > c->lst.idx_lebs) | 504 | if (c->bi.min_idx_lebs > c->lst.idx_lebs) |
| 505 | rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; | 505 | rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; |
| 506 | else | 506 | else |
| 507 | rsvd_idx_lebs = 0; | 507 | rsvd_idx_lebs = 0; |
| 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 151f1088282..ded29f6224c 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
| @@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c) | |||
| 100 | if (err) | 100 | if (err) |
| 101 | return err; | 101 | return err; |
| 102 | 102 | ||
| 103 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 104 | if (err) | ||
| 105 | return err; | ||
| 106 | |||
| 103 | err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); | 107 | err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); |
| 104 | if (err) | 108 | if (err) |
| 105 | return err; | 109 | return err; |
| @@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c) | |||
| 118 | * This function compares data nodes @a and @b. Returns %1 if @a has greater | 122 | * This function compares data nodes @a and @b. Returns %1 if @a has greater |
| 119 | * inode or block number, and %-1 otherwise. | 123 | * inode or block number, and %-1 otherwise. |
| 120 | */ | 124 | */ |
| 121 | int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | 125 | static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) |
| 122 | { | 126 | { |
| 123 | ino_t inuma, inumb; | 127 | ino_t inuma, inumb; |
| 124 | struct ubifs_info *c = priv; | 128 | struct ubifs_info *c = priv; |
| @@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
| 161 | * first and sorted by length in descending order. Directory entry nodes go | 165 | * first and sorted by length in descending order. Directory entry nodes go |
| 162 | * after inode nodes and are sorted in ascending hash valuer order. | 166 | * after inode nodes and are sorted in ascending hash valuer order. |
| 163 | */ | 167 | */ |
| 164 | int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | 168 | static int nondata_nodes_cmp(void *priv, struct list_head *a, |
| 169 | struct list_head *b) | ||
| 165 | { | 170 | { |
| 166 | ino_t inuma, inumb; | 171 | ino_t inuma, inumb; |
| 167 | struct ubifs_info *c = priv; | 172 | struct ubifs_info *c = priv; |
| @@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
| 473 | ubifs_assert(c->gc_lnum != lnum); | 478 | ubifs_assert(c->gc_lnum != lnum); |
| 474 | ubifs_assert(wbuf->lnum != lnum); | 479 | ubifs_assert(wbuf->lnum != lnum); |
| 475 | 480 | ||
| 481 | if (lp->free + lp->dirty == c->leb_size) { | ||
| 482 | /* Special case - a free LEB */ | ||
| 483 | dbg_gc("LEB %d is free, return it", lp->lnum); | ||
| 484 | ubifs_assert(!(lp->flags & LPROPS_INDEX)); | ||
| 485 | |||
| 486 | if (lp->free != c->leb_size) { | ||
| 487 | /* | ||
| 488 | * Write buffers must be sync'd before unmapping | ||
| 489 | * freeable LEBs, because one of them may contain data | ||
| 490 | * which obsoletes something in 'lp->pnum'. | ||
| 491 | */ | ||
| 492 | err = gc_sync_wbufs(c); | ||
| 493 | if (err) | ||
| 494 | return err; | ||
| 495 | err = ubifs_change_one_lp(c, lp->lnum, c->leb_size, | ||
| 496 | 0, 0, 0, 0); | ||
| 497 | if (err) | ||
| 498 | return err; | ||
| 499 | } | ||
| 500 | err = ubifs_leb_unmap(c, lp->lnum); | ||
| 501 | if (err) | ||
| 502 | return err; | ||
| 503 | |||
| 504 | if (c->gc_lnum == -1) { | ||
| 505 | c->gc_lnum = lnum; | ||
| 506 | return LEB_RETAINED; | ||
| 507 | } | ||
| 508 | |||
| 509 | return LEB_FREED; | ||
| 510 | } | ||
| 511 | |||
| 476 | /* | 512 | /* |
| 477 | * We scan the entire LEB even though we only really need to scan up to | 513 | * We scan the entire LEB even though we only really need to scan up to |
| 478 | * (c->leb_size - lp->free). | 514 | * (c->leb_size - lp->free). |
| @@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) | |||
| 682 | "(min. space %d)", lp.lnum, lp.free, lp.dirty, | 718 | "(min. space %d)", lp.lnum, lp.free, lp.dirty, |
| 683 | lp.free + lp.dirty, min_space); | 719 | lp.free + lp.dirty, min_space); |
| 684 | 720 | ||
| 685 | if (lp.free + lp.dirty == c->leb_size) { | ||
| 686 | /* An empty LEB was returned */ | ||
| 687 | dbg_gc("LEB %d is free, return it", lp.lnum); | ||
| 688 | /* | ||
| 689 | * ubifs_find_dirty_leb() doesn't return freeable index | ||
| 690 | * LEBs. | ||
| 691 | */ | ||
| 692 | ubifs_assert(!(lp.flags & LPROPS_INDEX)); | ||
| 693 | if (lp.free != c->leb_size) { | ||
| 694 | /* | ||
| 695 | * Write buffers must be sync'd before | ||
| 696 | * unmapping freeable LEBs, because one of them | ||
| 697 | * may contain data which obsoletes something | ||
| 698 | * in 'lp.pnum'. | ||
| 699 | */ | ||
| 700 | ret = gc_sync_wbufs(c); | ||
| 701 | if (ret) | ||
| 702 | goto out; | ||
| 703 | ret = ubifs_change_one_lp(c, lp.lnum, | ||
| 704 | c->leb_size, 0, 0, 0, | ||
| 705 | 0); | ||
| 706 | if (ret) | ||
| 707 | goto out; | ||
| 708 | } | ||
| 709 | ret = ubifs_leb_unmap(c, lp.lnum); | ||
| 710 | if (ret) | ||
| 711 | goto out; | ||
| 712 | ret = lp.lnum; | ||
| 713 | break; | ||
| 714 | } | ||
| 715 | |||
| 716 | space_before = c->leb_size - wbuf->offs - wbuf->used; | 721 | space_before = c->leb_size - wbuf->offs - wbuf->used; |
| 717 | if (wbuf->lnum == -1) | 722 | if (wbuf->lnum == -1) |
| 718 | space_before = 0; | 723 | space_before = 0; |
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index dfd168b7807..166951e0dcd 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
| @@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | |||
| 393 | ubifs_assert(wbuf->size % c->min_io_size == 0); | 393 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
| 394 | ubifs_assert(!c->ro_media && !c->ro_mount); | 394 | ubifs_assert(!c->ro_media && !c->ro_mount); |
| 395 | if (c->leb_size - wbuf->offs >= c->max_write_size) | 395 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
| 396 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); | 396 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); |
| 397 | 397 | ||
| 398 | if (c->ro_error) | 398 | if (c->ro_error) |
| 399 | return -EROFS; | 399 | return -EROFS; |
| @@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | |||
| 452 | * @dtype: data type | 452 | * @dtype: data type |
| 453 | * | 453 | * |
| 454 | * This function targets the write-buffer to logical eraseblock @lnum:@offs. | 454 | * This function targets the write-buffer to logical eraseblock @lnum:@offs. |
| 455 | * The write-buffer is synchronized if it is not empty. Returns zero in case of | 455 | * The write-buffer has to be empty. Returns zero in case of success and a |
| 456 | * success and a negative error code in case of failure. | 456 | * negative error code in case of failure. |
| 457 | */ | 457 | */ |
| 458 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | 458 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, |
| 459 | int dtype) | 459 | int dtype) |
| @@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | |||
| 465 | ubifs_assert(offs >= 0 && offs <= c->leb_size); | 465 | ubifs_assert(offs >= 0 && offs <= c->leb_size); |
| 466 | ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); | 466 | ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); |
| 467 | ubifs_assert(lnum != wbuf->lnum); | 467 | ubifs_assert(lnum != wbuf->lnum); |
| 468 | 468 | ubifs_assert(wbuf->used == 0); | |
| 469 | if (wbuf->used > 0) { | ||
| 470 | int err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 471 | |||
| 472 | if (err) | ||
| 473 | return err; | ||
| 474 | } | ||
| 475 | 469 | ||
| 476 | spin_lock(&wbuf->lock); | 470 | spin_lock(&wbuf->lock); |
| 477 | wbuf->lnum = lnum; | 471 | wbuf->lnum = lnum; |
| @@ -573,7 +567,7 @@ out_timers: | |||
| 573 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | 567 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) |
| 574 | { | 568 | { |
| 575 | struct ubifs_info *c = wbuf->c; | 569 | struct ubifs_info *c = wbuf->c; |
| 576 | int err, written, n, aligned_len = ALIGN(len, 8), offs; | 570 | int err, written, n, aligned_len = ALIGN(len, 8); |
| 577 | 571 | ||
| 578 | dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, | 572 | dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, |
| 579 | dbg_ntype(((struct ubifs_ch *)buf)->node_type), | 573 | dbg_ntype(((struct ubifs_ch *)buf)->node_type), |
| @@ -588,7 +582,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 588 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); | 582 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); |
| 589 | ubifs_assert(!c->ro_media && !c->ro_mount); | 583 | ubifs_assert(!c->ro_media && !c->ro_mount); |
| 590 | if (c->leb_size - wbuf->offs >= c->max_write_size) | 584 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
| 591 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); | 585 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); |
| 592 | 586 | ||
| 593 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { | 587 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { |
| 594 | err = -ENOSPC; | 588 | err = -ENOSPC; |
| @@ -636,7 +630,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 636 | goto exit; | 630 | goto exit; |
| 637 | } | 631 | } |
| 638 | 632 | ||
| 639 | offs = wbuf->offs; | ||
| 640 | written = 0; | 633 | written = 0; |
| 641 | 634 | ||
| 642 | if (wbuf->used) { | 635 | if (wbuf->used) { |
| @@ -653,7 +646,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 653 | if (err) | 646 | if (err) |
| 654 | goto out; | 647 | goto out; |
| 655 | 648 | ||
| 656 | offs += wbuf->size; | 649 | wbuf->offs += wbuf->size; |
| 657 | len -= wbuf->avail; | 650 | len -= wbuf->avail; |
| 658 | aligned_len -= wbuf->avail; | 651 | aligned_len -= wbuf->avail; |
| 659 | written += wbuf->avail; | 652 | written += wbuf->avail; |
| @@ -672,7 +665,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 672 | if (err) | 665 | if (err) |
| 673 | goto out; | 666 | goto out; |
| 674 | 667 | ||
| 675 | offs += wbuf->size; | 668 | wbuf->offs += wbuf->size; |
| 676 | len -= wbuf->size; | 669 | len -= wbuf->size; |
| 677 | aligned_len -= wbuf->size; | 670 | aligned_len -= wbuf->size; |
| 678 | written += wbuf->size; | 671 | written += wbuf->size; |
| @@ -687,12 +680,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 687 | n = aligned_len >> c->max_write_shift; | 680 | n = aligned_len >> c->max_write_shift; |
| 688 | if (n) { | 681 | if (n) { |
| 689 | n <<= c->max_write_shift; | 682 | n <<= c->max_write_shift; |
| 690 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); | 683 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, |
| 691 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, | 684 | wbuf->offs); |
| 692 | wbuf->dtype); | 685 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, |
| 686 | wbuf->offs, n, wbuf->dtype); | ||
| 693 | if (err) | 687 | if (err) |
| 694 | goto out; | 688 | goto out; |
| 695 | offs += n; | 689 | wbuf->offs += n; |
| 696 | aligned_len -= n; | 690 | aligned_len -= n; |
| 697 | len -= n; | 691 | len -= n; |
| 698 | written += n; | 692 | written += n; |
| @@ -707,7 +701,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 707 | */ | 701 | */ |
| 708 | memcpy(wbuf->buf, buf + written, len); | 702 | memcpy(wbuf->buf, buf + written, len); |
| 709 | 703 | ||
| 710 | wbuf->offs = offs; | ||
| 711 | if (c->leb_size - wbuf->offs >= c->max_write_size) | 704 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
| 712 | wbuf->size = c->max_write_size; | 705 | wbuf->size = c->max_write_size; |
| 713 | else | 706 | else |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index aed25e86422..34b1679e6e3 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
| @@ -141,14 +141,8 @@ again: | |||
| 141 | * LEB with some empty space. | 141 | * LEB with some empty space. |
| 142 | */ | 142 | */ |
| 143 | lnum = ubifs_find_free_space(c, len, &offs, squeeze); | 143 | lnum = ubifs_find_free_space(c, len, &offs, squeeze); |
| 144 | if (lnum >= 0) { | 144 | if (lnum >= 0) |
| 145 | /* Found an LEB, add it to the journal head */ | ||
| 146 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); | ||
| 147 | if (err) | ||
| 148 | goto out_return; | ||
| 149 | /* A new bud was successfully allocated and added to the log */ | ||
| 150 | goto out; | 145 | goto out; |
| 151 | } | ||
| 152 | 146 | ||
| 153 | err = lnum; | 147 | err = lnum; |
| 154 | if (err != -ENOSPC) | 148 | if (err != -ENOSPC) |
| @@ -203,12 +197,23 @@ again: | |||
| 203 | return 0; | 197 | return 0; |
| 204 | } | 198 | } |
| 205 | 199 | ||
| 206 | err = ubifs_add_bud_to_log(c, jhead, lnum, 0); | ||
| 207 | if (err) | ||
| 208 | goto out_return; | ||
| 209 | offs = 0; | 200 | offs = 0; |
| 210 | 201 | ||
| 211 | out: | 202 | out: |
| 203 | /* | ||
| 204 | * Make sure we synchronize the write-buffer before we add the new bud | ||
| 205 | * to the log. Otherwise we may have a power cut after the log | ||
| 206 | * reference node for the last bud (@lnum) is written but before the | ||
| 207 | * write-buffer data are written to the next-to-last bud | ||
| 208 | * (@wbuf->lnum). And the effect would be that the recovery would see | ||
| 209 | * that there is corruption in the next-to-last bud. | ||
| 210 | */ | ||
| 211 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 212 | if (err) | ||
| 213 | goto out_return; | ||
| 214 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); | ||
| 215 | if (err) | ||
| 216 | goto out_return; | ||
| 212 | err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); | 217 | err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); |
| 213 | if (err) | 218 | if (err) |
| 214 | goto out_unlock; | 219 | goto out_unlock; |
| @@ -380,10 +385,8 @@ out: | |||
| 380 | if (err == -ENOSPC) { | 385 | if (err == -ENOSPC) { |
| 381 | /* This are some budgeting problems, print useful information */ | 386 | /* This are some budgeting problems, print useful information */ |
| 382 | down_write(&c->commit_sem); | 387 | down_write(&c->commit_sem); |
| 383 | spin_lock(&c->space_lock); | ||
| 384 | dbg_dump_stack(); | 388 | dbg_dump_stack(); |
| 385 | dbg_dump_budg(c); | 389 | dbg_dump_budg(c, &c->bi); |
| 386 | spin_unlock(&c->space_lock); | ||
| 387 | dbg_dump_lprops(c); | 390 | dbg_dump_lprops(c); |
| 388 | cmt_retries = dbg_check_lprops(c); | 391 | cmt_retries = dbg_check_lprops(c); |
| 389 | up_write(&c->commit_sem); | 392 | up_write(&c->commit_sem); |
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 40fa780ebea..affea9494ae 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c | |||
| @@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) | |||
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | /** | 102 | /** |
| 103 | * next_log_lnum - switch to the next log LEB. | ||
| 104 | * @c: UBIFS file-system description object | ||
| 105 | * @lnum: current log LEB | ||
| 106 | */ | ||
| 107 | static inline int next_log_lnum(const struct ubifs_info *c, int lnum) | ||
| 108 | { | ||
| 109 | lnum += 1; | ||
| 110 | if (lnum > c->log_last) | ||
| 111 | lnum = UBIFS_LOG_LNUM; | ||
| 112 | |||
| 113 | return lnum; | ||
| 114 | } | ||
| 115 | |||
| 116 | /** | ||
| 117 | * empty_log_bytes - calculate amount of empty space in the log. | 103 | * empty_log_bytes - calculate amount of empty space in the log. |
| 118 | * @c: UBIFS file-system description object | 104 | * @c: UBIFS file-system description object |
| 119 | */ | 105 | */ |
| @@ -257,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) | |||
| 257 | ref->jhead = cpu_to_le32(jhead); | 243 | ref->jhead = cpu_to_le32(jhead); |
| 258 | 244 | ||
| 259 | if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { | 245 | if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { |
| 260 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | 246 | c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); |
| 261 | c->lhead_offs = 0; | 247 | c->lhead_offs = 0; |
| 262 | } | 248 | } |
| 263 | 249 | ||
| @@ -425,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) | |||
| 425 | 411 | ||
| 426 | /* Switch to the next log LEB */ | 412 | /* Switch to the next log LEB */ |
| 427 | if (c->lhead_offs) { | 413 | if (c->lhead_offs) { |
| 428 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | 414 | c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); |
| 429 | c->lhead_offs = 0; | 415 | c->lhead_offs = 0; |
| 430 | } | 416 | } |
| 431 | 417 | ||
| @@ -446,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) | |||
| 446 | 432 | ||
| 447 | c->lhead_offs += len; | 433 | c->lhead_offs += len; |
| 448 | if (c->lhead_offs == c->leb_size) { | 434 | if (c->lhead_offs == c->leb_size) { |
| 449 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | 435 | c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); |
| 450 | c->lhead_offs = 0; | 436 | c->lhead_offs = 0; |
| 451 | } | 437 | } |
| 452 | 438 | ||
| @@ -533,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) | |||
| 533 | } | 519 | } |
| 534 | mutex_lock(&c->log_mutex); | 520 | mutex_lock(&c->log_mutex); |
| 535 | for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; | 521 | for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; |
| 536 | lnum = next_log_lnum(c, lnum)) { | 522 | lnum = ubifs_next_log_lnum(c, lnum)) { |
| 537 | dbg_log("unmap log LEB %d", lnum); | 523 | dbg_log("unmap log LEB %d", lnum); |
| 538 | err = ubifs_leb_unmap(c, lnum); | 524 | err = ubifs_leb_unmap(c, lnum); |
| 539 | if (err) | 525 | if (err) |
| @@ -642,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, | |||
| 642 | err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); | 628 | err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); |
| 643 | if (err) | 629 | if (err) |
| 644 | return err; | 630 | return err; |
| 645 | *lnum = next_log_lnum(c, *lnum); | 631 | *lnum = ubifs_next_log_lnum(c, *lnum); |
| 646 | *offs = 0; | 632 | *offs = 0; |
| 647 | } | 633 | } |
| 648 | memcpy(buf + *offs, node, len); | 634 | memcpy(buf + *offs, node, len); |
| @@ -712,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) | |||
| 712 | ubifs_scan_destroy(sleb); | 698 | ubifs_scan_destroy(sleb); |
| 713 | if (lnum == c->lhead_lnum) | 699 | if (lnum == c->lhead_lnum) |
| 714 | break; | 700 | break; |
| 715 | lnum = next_log_lnum(c, lnum); | 701 | lnum = ubifs_next_log_lnum(c, lnum); |
| 716 | } | 702 | } |
| 717 | if (offs) { | 703 | if (offs) { |
| 718 | int sz = ALIGN(offs, c->min_io_size); | 704 | int sz = ALIGN(offs, c->min_io_size); |
| @@ -732,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) | |||
| 732 | /* Unmap remaining LEBs */ | 718 | /* Unmap remaining LEBs */ |
| 733 | lnum = write_lnum; | 719 | lnum = write_lnum; |
| 734 | do { | 720 | do { |
| 735 | lnum = next_log_lnum(c, lnum); | 721 | lnum = ubifs_next_log_lnum(c, lnum); |
| 736 | err = ubifs_leb_unmap(c, lnum); | 722 | err = ubifs_leb_unmap(c, lnum); |
| 737 | if (err) | 723 | if (err) |
| 738 | return err; | 724 | return err; |
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 0ee0847f242..667884f4a61 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c | |||
| @@ -1007,21 +1007,11 @@ out: | |||
| 1007 | } | 1007 | } |
| 1008 | 1008 | ||
| 1009 | /** | 1009 | /** |
| 1010 | * struct scan_check_data - data provided to scan callback function. | ||
| 1011 | * @lst: LEB properties statistics | ||
| 1012 | * @err: error code | ||
| 1013 | */ | ||
| 1014 | struct scan_check_data { | ||
| 1015 | struct ubifs_lp_stats lst; | ||
| 1016 | int err; | ||
| 1017 | }; | ||
| 1018 | |||
| 1019 | /** | ||
| 1020 | * scan_check_cb - scan callback. | 1010 | * scan_check_cb - scan callback. |
| 1021 | * @c: the UBIFS file-system description object | 1011 | * @c: the UBIFS file-system description object |
| 1022 | * @lp: LEB properties to scan | 1012 | * @lp: LEB properties to scan |
| 1023 | * @in_tree: whether the LEB properties are in main memory | 1013 | * @in_tree: whether the LEB properties are in main memory |
| 1024 | * @data: information passed to and from the caller of the scan | 1014 | * @lst: lprops statistics to update |
| 1025 | * | 1015 | * |
| 1026 | * This function returns a code that indicates whether the scan should continue | 1016 | * This function returns a code that indicates whether the scan should continue |
| 1027 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree | 1017 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree |
| @@ -1030,11 +1020,10 @@ struct scan_check_data { | |||
| 1030 | */ | 1020 | */ |
| 1031 | static int scan_check_cb(struct ubifs_info *c, | 1021 | static int scan_check_cb(struct ubifs_info *c, |
| 1032 | const struct ubifs_lprops *lp, int in_tree, | 1022 | const struct ubifs_lprops *lp, int in_tree, |
| 1033 | struct scan_check_data *data) | 1023 | struct ubifs_lp_stats *lst) |
| 1034 | { | 1024 | { |
| 1035 | struct ubifs_scan_leb *sleb; | 1025 | struct ubifs_scan_leb *sleb; |
| 1036 | struct ubifs_scan_node *snod; | 1026 | struct ubifs_scan_node *snod; |
| 1037 | struct ubifs_lp_stats *lst = &data->lst; | ||
| 1038 | int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; | 1027 | int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; |
| 1039 | void *buf = NULL; | 1028 | void *buf = NULL; |
| 1040 | 1029 | ||
| @@ -1044,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1044 | if (cat != (lp->flags & LPROPS_CAT_MASK)) { | 1033 | if (cat != (lp->flags & LPROPS_CAT_MASK)) { |
| 1045 | ubifs_err("bad LEB category %d expected %d", | 1034 | ubifs_err("bad LEB category %d expected %d", |
| 1046 | (lp->flags & LPROPS_CAT_MASK), cat); | 1035 | (lp->flags & LPROPS_CAT_MASK), cat); |
| 1047 | goto out; | 1036 | return -EINVAL; |
| 1048 | } | 1037 | } |
| 1049 | } | 1038 | } |
| 1050 | 1039 | ||
| @@ -1078,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1078 | } | 1067 | } |
| 1079 | if (!found) { | 1068 | if (!found) { |
| 1080 | ubifs_err("bad LPT list (category %d)", cat); | 1069 | ubifs_err("bad LPT list (category %d)", cat); |
| 1081 | goto out; | 1070 | return -EINVAL; |
| 1082 | } | 1071 | } |
| 1083 | } | 1072 | } |
| 1084 | } | 1073 | } |
| @@ -1090,45 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1090 | if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || | 1079 | if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || |
| 1091 | lp != heap->arr[lp->hpos]) { | 1080 | lp != heap->arr[lp->hpos]) { |
| 1092 | ubifs_err("bad LPT heap (category %d)", cat); | 1081 | ubifs_err("bad LPT heap (category %d)", cat); |
| 1093 | goto out; | 1082 | return -EINVAL; |
| 1094 | } | 1083 | } |
| 1095 | } | 1084 | } |
| 1096 | 1085 | ||
| 1097 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); | 1086 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
| 1098 | if (!buf) { | 1087 | if (!buf) |
| 1099 | ubifs_err("cannot allocate memory to scan LEB %d", lnum); | 1088 | return -ENOMEM; |
| 1100 | goto out; | 1089 | |
| 1090 | /* | ||
| 1091 | * After an unclean unmount, empty and freeable LEBs | ||
| 1092 | * may contain garbage - do not scan them. | ||
| 1093 | */ | ||
| 1094 | if (lp->free == c->leb_size) { | ||
| 1095 | lst->empty_lebs += 1; | ||
| 1096 | lst->total_free += c->leb_size; | ||
| 1097 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); | ||
| 1098 | return LPT_SCAN_CONTINUE; | ||
| 1099 | } | ||
| 1100 | if (lp->free + lp->dirty == c->leb_size && | ||
| 1101 | !(lp->flags & LPROPS_INDEX)) { | ||
| 1102 | lst->total_free += lp->free; | ||
| 1103 | lst->total_dirty += lp->dirty; | ||
| 1104 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); | ||
| 1105 | return LPT_SCAN_CONTINUE; | ||
| 1101 | } | 1106 | } |
| 1102 | 1107 | ||
| 1103 | sleb = ubifs_scan(c, lnum, 0, buf, 0); | 1108 | sleb = ubifs_scan(c, lnum, 0, buf, 0); |
| 1104 | if (IS_ERR(sleb)) { | 1109 | if (IS_ERR(sleb)) { |
| 1105 | /* | 1110 | ret = PTR_ERR(sleb); |
| 1106 | * After an unclean unmount, empty and freeable LEBs | 1111 | if (ret == -EUCLEAN) { |
| 1107 | * may contain garbage. | 1112 | dbg_dump_lprops(c); |
| 1108 | */ | 1113 | dbg_dump_budg(c, &c->bi); |
| 1109 | if (lp->free == c->leb_size) { | ||
| 1110 | ubifs_err("scan errors were in empty LEB " | ||
| 1111 | "- continuing checking"); | ||
| 1112 | lst->empty_lebs += 1; | ||
| 1113 | lst->total_free += c->leb_size; | ||
| 1114 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); | ||
| 1115 | ret = LPT_SCAN_CONTINUE; | ||
| 1116 | goto exit; | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | if (lp->free + lp->dirty == c->leb_size && | ||
| 1120 | !(lp->flags & LPROPS_INDEX)) { | ||
| 1121 | ubifs_err("scan errors were in freeable LEB " | ||
| 1122 | "- continuing checking"); | ||
| 1123 | lst->total_free += lp->free; | ||
| 1124 | lst->total_dirty += lp->dirty; | ||
| 1125 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); | ||
| 1126 | ret = LPT_SCAN_CONTINUE; | ||
| 1127 | goto exit; | ||
| 1128 | } | 1114 | } |
| 1129 | data->err = PTR_ERR(sleb); | 1115 | goto out; |
| 1130 | ret = LPT_SCAN_STOP; | ||
| 1131 | goto exit; | ||
| 1132 | } | 1116 | } |
| 1133 | 1117 | ||
| 1134 | is_idx = -1; | 1118 | is_idx = -1; |
| @@ -1246,10 +1230,8 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1246 | } | 1230 | } |
| 1247 | 1231 | ||
| 1248 | ubifs_scan_destroy(sleb); | 1232 | ubifs_scan_destroy(sleb); |
| 1249 | ret = LPT_SCAN_CONTINUE; | ||
| 1250 | exit: | ||
| 1251 | vfree(buf); | 1233 | vfree(buf); |
| 1252 | return ret; | 1234 | return LPT_SCAN_CONTINUE; |
| 1253 | 1235 | ||
| 1254 | out_print: | 1236 | out_print: |
| 1255 | ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " | 1237 | ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " |
| @@ -1258,10 +1240,10 @@ out_print: | |||
| 1258 | dbg_dump_leb(c, lnum); | 1240 | dbg_dump_leb(c, lnum); |
| 1259 | out_destroy: | 1241 | out_destroy: |
| 1260 | ubifs_scan_destroy(sleb); | 1242 | ubifs_scan_destroy(sleb); |
| 1243 | ret = -EINVAL; | ||
| 1261 | out: | 1244 | out: |
| 1262 | vfree(buf); | 1245 | vfree(buf); |
| 1263 | data->err = -EINVAL; | 1246 | return ret; |
| 1264 | return LPT_SCAN_STOP; | ||
| 1265 | } | 1247 | } |
| 1266 | 1248 | ||
| 1267 | /** | 1249 | /** |
| @@ -1278,8 +1260,7 @@ out: | |||
| 1278 | int dbg_check_lprops(struct ubifs_info *c) | 1260 | int dbg_check_lprops(struct ubifs_info *c) |
| 1279 | { | 1261 | { |
| 1280 | int i, err; | 1262 | int i, err; |
| 1281 | struct scan_check_data data; | 1263 | struct ubifs_lp_stats lst; |
| 1282 | struct ubifs_lp_stats *lst = &data.lst; | ||
| 1283 | 1264 | ||
| 1284 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) | 1265 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) |
| 1285 | return 0; | 1266 | return 0; |
| @@ -1294,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c) | |||
| 1294 | return err; | 1275 | return err; |
| 1295 | } | 1276 | } |
| 1296 | 1277 | ||
| 1297 | memset(lst, 0, sizeof(struct ubifs_lp_stats)); | 1278 | memset(&lst, 0, sizeof(struct ubifs_lp_stats)); |
| 1298 | |||
| 1299 | data.err = 0; | ||
| 1300 | err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, | 1279 | err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, |
| 1301 | (ubifs_lpt_scan_callback)scan_check_cb, | 1280 | (ubifs_lpt_scan_callback)scan_check_cb, |
| 1302 | &data); | 1281 | &lst); |
| 1303 | if (err && err != -ENOSPC) | 1282 | if (err && err != -ENOSPC) |
| 1304 | goto out; | 1283 | goto out; |
| 1305 | if (data.err) { | ||
| 1306 | err = data.err; | ||
| 1307 | goto out; | ||
| 1308 | } | ||
| 1309 | 1284 | ||
| 1310 | if (lst->empty_lebs != c->lst.empty_lebs || | 1285 | if (lst.empty_lebs != c->lst.empty_lebs || |
| 1311 | lst->idx_lebs != c->lst.idx_lebs || | 1286 | lst.idx_lebs != c->lst.idx_lebs || |
| 1312 | lst->total_free != c->lst.total_free || | 1287 | lst.total_free != c->lst.total_free || |
| 1313 | lst->total_dirty != c->lst.total_dirty || | 1288 | lst.total_dirty != c->lst.total_dirty || |
| 1314 | lst->total_used != c->lst.total_used) { | 1289 | lst.total_used != c->lst.total_used) { |
| 1315 | ubifs_err("bad overall accounting"); | 1290 | ubifs_err("bad overall accounting"); |
| 1316 | ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " | 1291 | ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " |
| 1317 | "total_free %lld, total_dirty %lld, total_used %lld", | 1292 | "total_free %lld, total_dirty %lld, total_used %lld", |
| 1318 | lst->empty_lebs, lst->idx_lebs, lst->total_free, | 1293 | lst.empty_lebs, lst.idx_lebs, lst.total_free, |
| 1319 | lst->total_dirty, lst->total_used); | 1294 | lst.total_dirty, lst.total_used); |
| 1320 | ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " | 1295 | ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " |
| 1321 | "total_free %lld, total_dirty %lld, total_used %lld", | 1296 | "total_free %lld, total_dirty %lld, total_used %lld", |
| 1322 | c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, | 1297 | c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, |
| @@ -1325,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c) | |||
| 1325 | goto out; | 1300 | goto out; |
| 1326 | } | 1301 | } |
| 1327 | 1302 | ||
| 1328 | if (lst->total_dead != c->lst.total_dead || | 1303 | if (lst.total_dead != c->lst.total_dead || |
| 1329 | lst->total_dark != c->lst.total_dark) { | 1304 | lst.total_dark != c->lst.total_dark) { |
| 1330 | ubifs_err("bad dead/dark space accounting"); | 1305 | ubifs_err("bad dead/dark space accounting"); |
| 1331 | ubifs_err("calculated: total_dead %lld, total_dark %lld", | 1306 | ubifs_err("calculated: total_dead %lld, total_dark %lld", |
| 1332 | lst->total_dead, lst->total_dark); | 1307 | lst.total_dead, lst.total_dark); |
| 1333 | ubifs_err("read from lprops: total_dead %lld, total_dark %lld", | 1308 | ubifs_err("read from lprops: total_dead %lld, total_dark %lld", |
| 1334 | c->lst.total_dead, c->lst.total_dark); | 1309 | c->lst.total_dead, c->lst.total_dark); |
| 1335 | err = -EINVAL; | 1310 | err = -EINVAL; |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 0c9c69bd983..dfcb5748a7d 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
| @@ -29,6 +29,12 @@ | |||
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include "ubifs.h" | 30 | #include "ubifs.h" |
| 31 | 31 | ||
| 32 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 33 | static int dbg_populate_lsave(struct ubifs_info *c); | ||
| 34 | #else | ||
| 35 | #define dbg_populate_lsave(c) 0 | ||
| 36 | #endif | ||
| 37 | |||
| 32 | /** | 38 | /** |
| 33 | * first_dirty_cnode - find first dirty cnode. | 39 | * first_dirty_cnode - find first dirty cnode. |
| 34 | * @c: UBIFS file-system description object | 40 | * @c: UBIFS file-system description object |
| @@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, | |||
| 586 | if (nnode->nbranch[iip].lnum) | 592 | if (nnode->nbranch[iip].lnum) |
| 587 | break; | 593 | break; |
| 588 | } | 594 | } |
| 589 | } while (iip >= UBIFS_LPT_FANOUT); | 595 | } while (iip >= UBIFS_LPT_FANOUT); |
| 590 | 596 | ||
| 591 | /* Go right */ | 597 | /* Go right */ |
| 592 | nnode = ubifs_get_nnode(c, nnode, iip); | 598 | nnode = ubifs_get_nnode(c, nnode, iip); |
| @@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c) | |||
| 815 | c->lpt_drty_flgs |= LSAVE_DIRTY; | 821 | c->lpt_drty_flgs |= LSAVE_DIRTY; |
| 816 | ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); | 822 | ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); |
| 817 | } | 823 | } |
| 824 | |||
| 825 | if (dbg_populate_lsave(c)) | ||
| 826 | return; | ||
| 827 | |||
| 818 | list_for_each_entry(lprops, &c->empty_list, list) { | 828 | list_for_each_entry(lprops, &c->empty_list, list) { |
| 819 | c->lsave[cnt++] = lprops->lnum; | 829 | c->lsave[cnt++] = lprops->lnum; |
| 820 | if (cnt >= c->lsave_cnt) | 830 | if (cnt >= c->lsave_cnt) |
| @@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c) | |||
| 1994 | current->pid); | 2004 | current->pid); |
| 1995 | } | 2005 | } |
| 1996 | 2006 | ||
| 2007 | /** | ||
| 2008 | * dbg_populate_lsave - debugging version of 'populate_lsave()' | ||
| 2009 | * @c: UBIFS file-system description object | ||
| 2010 | * | ||
| 2011 | * This is a debugging version for 'populate_lsave()' which populates lsave | ||
| 2012 | * with random LEBs instead of useful LEBs, which is good for test coverage. | ||
| 2013 | * Returns zero if lsave has not been populated (this debugging feature is | ||
| 2014 | * disabled) an non-zero if lsave has been populated. | ||
| 2015 | */ | ||
| 2016 | static int dbg_populate_lsave(struct ubifs_info *c) | ||
| 2017 | { | ||
| 2018 | struct ubifs_lprops *lprops; | ||
| 2019 | struct ubifs_lpt_heap *heap; | ||
| 2020 | int i; | ||
| 2021 | |||
| 2022 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
| 2023 | return 0; | ||
| 2024 | if (random32() & 3) | ||
| 2025 | return 0; | ||
| 2026 | |||
| 2027 | for (i = 0; i < c->lsave_cnt; i++) | ||
| 2028 | c->lsave[i] = c->main_first; | ||
| 2029 | |||
| 2030 | list_for_each_entry(lprops, &c->empty_list, list) | ||
| 2031 | c->lsave[random32() % c->lsave_cnt] = lprops->lnum; | ||
| 2032 | list_for_each_entry(lprops, &c->freeable_list, list) | ||
| 2033 | c->lsave[random32() % c->lsave_cnt] = lprops->lnum; | ||
| 2034 | list_for_each_entry(lprops, &c->frdi_idx_list, list) | ||
| 2035 | c->lsave[random32() % c->lsave_cnt] = lprops->lnum; | ||
| 2036 | |||
| 2037 | heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; | ||
| 2038 | for (i = 0; i < heap->cnt; i++) | ||
| 2039 | c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; | ||
| 2040 | heap = &c->lpt_heap[LPROPS_DIRTY - 1]; | ||
| 2041 | for (i = 0; i < heap->cnt; i++) | ||
| 2042 | c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; | ||
| 2043 | heap = &c->lpt_heap[LPROPS_FREE - 1]; | ||
| 2044 | for (i = 0; i < heap->cnt; i++) | ||
| 2045 | c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; | ||
| 2046 | |||
| 2047 | return 1; | ||
| 2048 | } | ||
| 2049 | |||
| 1997 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | 2050 | #endif /* CONFIG_UBIFS_FS_DEBUG */ |
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 21f47afdacf..278c2382e8c 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c | |||
| @@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c) | |||
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | main_sz = (long long)c->main_lebs * c->leb_size; | 150 | main_sz = (long long)c->main_lebs * c->leb_size; |
| 151 | if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { | 151 | if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { |
| 152 | err = 9; | 152 | err = 9; |
| 153 | goto out; | 153 | goto out; |
| 154 | } | 154 | } |
| @@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c) | |||
| 218 | } | 218 | } |
| 219 | 219 | ||
| 220 | if (c->lst.total_dead + c->lst.total_dark + | 220 | if (c->lst.total_dead + c->lst.total_dark + |
| 221 | c->lst.total_used + c->old_idx_sz > main_sz) { | 221 | c->lst.total_used + c->bi.old_idx_sz > main_sz) { |
| 222 | err = 21; | 222 | err = 21; |
| 223 | goto out; | 223 | goto out; |
| 224 | } | 224 | } |
| @@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c) | |||
| 286 | c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); | 286 | c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); |
| 287 | c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); | 287 | c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); |
| 288 | c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); | 288 | c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); |
| 289 | c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); | 289 | c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); |
| 290 | c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); | 290 | c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); |
| 291 | c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); | 291 | c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); |
| 292 | c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); | 292 | c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); |
| @@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c) | |||
| 305 | c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); | 305 | c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); |
| 306 | c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); | 306 | c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); |
| 307 | 307 | ||
| 308 | c->calc_idx_sz = c->old_idx_sz; | 308 | c->calc_idx_sz = c->bi.old_idx_sz; |
| 309 | 309 | ||
| 310 | if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) | 310 | if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) |
| 311 | c->no_orphs = 1; | 311 | c->no_orphs = 1; |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index c3de04dc952..0b5296a9a4c 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
| @@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c) | |||
| 340 | mutex_unlock(&c->lp_mutex); | 340 | mutex_unlock(&c->lp_mutex); |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | /** | ||
| 344 | * ubifs_next_log_lnum - switch to the next log LEB. | ||
| 345 | * @c: UBIFS file-system description object | ||
| 346 | * @lnum: current log LEB | ||
| 347 | * | ||
| 348 | * This helper function returns the log LEB number which goes next after LEB | ||
| 349 | * 'lnum'. | ||
| 350 | */ | ||
| 351 | static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) | ||
| 352 | { | ||
| 353 | lnum += 1; | ||
| 354 | if (lnum > c->log_last) | ||
| 355 | lnum = UBIFS_LOG_LNUM; | ||
| 356 | |||
| 357 | return lnum; | ||
| 358 | } | ||
| 359 | |||
| 343 | #endif /* __UBIFS_MISC_H__ */ | 360 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 09df318e368..bd644bf587a 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c | |||
| @@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c) | |||
| 673 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); | 673 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); |
| 674 | if (IS_ERR(sleb)) { | 674 | if (IS_ERR(sleb)) { |
| 675 | if (PTR_ERR(sleb) == -EUCLEAN) | 675 | if (PTR_ERR(sleb) == -EUCLEAN) |
| 676 | sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); | 676 | sleb = ubifs_recover_leb(c, lnum, 0, |
| 677 | c->sbuf, 0); | ||
| 677 | if (IS_ERR(sleb)) { | 678 | if (IS_ERR(sleb)) { |
| 678 | err = PTR_ERR(sleb); | 679 | err = PTR_ERR(sleb); |
| 679 | break; | 680 | break; |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 3dbad6fbd1e..731d9e2e7b5 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
| @@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
| 564 | } | 564 | } |
| 565 | 565 | ||
| 566 | /** | 566 | /** |
| 567 | * drop_incomplete_group - drop nodes from an incomplete group. | 567 | * drop_last_node - drop the last node or group of nodes. |
| 568 | * @sleb: scanned LEB information | 568 | * @sleb: scanned LEB information |
| 569 | * @offs: offset of dropped nodes is returned here | 569 | * @offs: offset of dropped nodes is returned here |
| 570 | * @grouped: non-zero if whole group of nodes have to be dropped | ||
| 570 | * | 571 | * |
| 571 | * This function returns %1 if nodes are dropped and %0 otherwise. | 572 | * This is a helper function for 'ubifs_recover_leb()' which drops the last |
| 573 | * node of the scanned LEB or the last group of nodes if @grouped is not zero. | ||
| 574 | * This function returns %1 if a node was dropped and %0 otherwise. | ||
| 572 | */ | 575 | */ |
| 573 | static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | 576 | static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) |
| 574 | { | 577 | { |
| 575 | int dropped = 0; | 578 | int dropped = 0; |
| 576 | 579 | ||
| @@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | |||
| 589 | kfree(snod); | 592 | kfree(snod); |
| 590 | sleb->nodes_cnt -= 1; | 593 | sleb->nodes_cnt -= 1; |
| 591 | dropped = 1; | 594 | dropped = 1; |
| 595 | if (!grouped) | ||
| 596 | break; | ||
| 592 | } | 597 | } |
| 593 | return dropped; | 598 | return dropped; |
| 594 | } | 599 | } |
| @@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | |||
| 609 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | 614 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, |
| 610 | int offs, void *sbuf, int grouped) | 615 | int offs, void *sbuf, int grouped) |
| 611 | { | 616 | { |
| 612 | int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; | 617 | int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; |
| 613 | int empty_chkd = 0, start = offs; | ||
| 614 | struct ubifs_scan_leb *sleb; | 618 | struct ubifs_scan_leb *sleb; |
| 615 | void *buf = sbuf + offs; | 619 | void *buf = sbuf + offs; |
| 616 | 620 | ||
| @@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
| 620 | if (IS_ERR(sleb)) | 624 | if (IS_ERR(sleb)) |
| 621 | return sleb; | 625 | return sleb; |
| 622 | 626 | ||
| 623 | if (sleb->ecc) | 627 | ubifs_assert(len >= 8); |
| 624 | need_clean = 1; | ||
| 625 | |||
| 626 | while (len >= 8) { | 628 | while (len >= 8) { |
| 627 | int ret; | ||
| 628 | |||
| 629 | dbg_scan("look at LEB %d:%d (%d bytes left)", | 629 | dbg_scan("look at LEB %d:%d (%d bytes left)", |
| 630 | lnum, offs, len); | 630 | lnum, offs, len); |
| 631 | 631 | ||
| @@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
| 635 | * Scan quietly until there is an error from which we cannot | 635 | * Scan quietly until there is an error from which we cannot |
| 636 | * recover | 636 | * recover |
| 637 | */ | 637 | */ |
| 638 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); | 638 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); |
| 639 | |||
| 640 | if (ret == SCANNED_A_NODE) { | 639 | if (ret == SCANNED_A_NODE) { |
| 641 | /* A valid node, and not a padding node */ | 640 | /* A valid node, and not a padding node */ |
| 642 | struct ubifs_ch *ch = buf; | 641 | struct ubifs_ch *ch = buf; |
| @@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
| 649 | offs += node_len; | 648 | offs += node_len; |
| 650 | buf += node_len; | 649 | buf += node_len; |
| 651 | len -= node_len; | 650 | len -= node_len; |
| 652 | continue; | 651 | } else if (ret > 0) { |
| 653 | } | ||
| 654 | |||
| 655 | if (ret > 0) { | ||
| 656 | /* Padding bytes or a valid padding node */ | 652 | /* Padding bytes or a valid padding node */ |
| 657 | offs += ret; | 653 | offs += ret; |
| 658 | buf += ret; | 654 | buf += ret; |
| 659 | len -= ret; | 655 | len -= ret; |
| 660 | continue; | 656 | } else if (ret == SCANNED_EMPTY_SPACE || |
| 661 | } | 657 | ret == SCANNED_GARBAGE || |
| 662 | 658 | ret == SCANNED_A_BAD_PAD_NODE || | |
| 663 | if (ret == SCANNED_EMPTY_SPACE) { | 659 | ret == SCANNED_A_CORRUPT_NODE) { |
| 664 | if (!is_empty(buf, len)) { | 660 | dbg_rcvry("found corruption - %d", ret); |
| 665 | if (!is_last_write(c, buf, offs)) | ||
| 666 | break; | ||
| 667 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 668 | need_clean = 1; | ||
| 669 | } | ||
| 670 | empty_chkd = 1; | ||
| 671 | break; | 661 | break; |
| 672 | } | 662 | } else { |
| 673 | 663 | dbg_err("unexpected return value %d", ret); | |
| 674 | if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) | ||
| 675 | if (is_last_write(c, buf, offs)) { | ||
| 676 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 677 | need_clean = 1; | ||
| 678 | empty_chkd = 1; | ||
| 679 | break; | ||
| 680 | } | ||
| 681 | |||
| 682 | if (ret == SCANNED_A_CORRUPT_NODE) | ||
| 683 | if (no_more_nodes(c, buf, len, lnum, offs)) { | ||
| 684 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 685 | need_clean = 1; | ||
| 686 | empty_chkd = 1; | ||
| 687 | break; | ||
| 688 | } | ||
| 689 | |||
| 690 | if (quiet) { | ||
| 691 | /* Redo the last scan but noisily */ | ||
| 692 | quiet = 0; | ||
| 693 | continue; | ||
| 694 | } | ||
| 695 | |||
| 696 | switch (ret) { | ||
| 697 | case SCANNED_GARBAGE: | ||
| 698 | dbg_err("garbage"); | ||
| 699 | goto corrupted; | ||
| 700 | case SCANNED_A_CORRUPT_NODE: | ||
| 701 | case SCANNED_A_BAD_PAD_NODE: | ||
| 702 | dbg_err("bad node"); | ||
| 703 | goto corrupted; | ||
| 704 | default: | ||
| 705 | dbg_err("unknown"); | ||
| 706 | err = -EINVAL; | 664 | err = -EINVAL; |
| 707 | goto error; | 665 | goto error; |
| 708 | } | 666 | } |
| 709 | } | 667 | } |
| 710 | 668 | ||
| 711 | if (!empty_chkd && !is_empty(buf, len)) { | 669 | if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { |
| 712 | if (is_last_write(c, buf, offs)) { | 670 | if (!is_last_write(c, buf, offs)) |
| 713 | clean_buf(c, &buf, lnum, &offs, &len); | 671 | goto corrupted_rescan; |
| 714 | need_clean = 1; | 672 | } else if (ret == SCANNED_A_CORRUPT_NODE) { |
| 715 | } else { | 673 | if (!no_more_nodes(c, buf, len, lnum, offs)) |
| 674 | goto corrupted_rescan; | ||
| 675 | } else if (!is_empty(buf, len)) { | ||
| 676 | if (!is_last_write(c, buf, offs)) { | ||
| 716 | int corruption = first_non_ff(buf, len); | 677 | int corruption = first_non_ff(buf, len); |
| 717 | 678 | ||
| 718 | /* | 679 | /* |
| @@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
| 728 | } | 689 | } |
| 729 | } | 690 | } |
| 730 | 691 | ||
| 731 | /* Drop nodes from incomplete group */ | 692 | min_io_unit = round_down(offs, c->min_io_size); |
| 732 | if (grouped && drop_incomplete_group(sleb, &offs)) { | 693 | if (grouped) |
| 733 | buf = sbuf + offs; | 694 | /* |
| 734 | len = c->leb_size - offs; | 695 | * If nodes are grouped, always drop the incomplete group at |
| 735 | clean_buf(c, &buf, lnum, &offs, &len); | 696 | * the end. |
| 736 | need_clean = 1; | 697 | */ |
| 737 | } | 698 | drop_last_node(sleb, &offs, 1); |
| 738 | 699 | ||
| 739 | if (offs % c->min_io_size) { | 700 | /* |
| 740 | clean_buf(c, &buf, lnum, &offs, &len); | 701 | * While we are in the middle of the same min. I/O unit keep dropping |
| 741 | need_clean = 1; | 702 | * nodes. So basically, what we want is to make sure that the last min. |
| 742 | } | 703 | * I/O unit where we saw the corruption is dropped completely with all |
| 704 | * the uncorrupted node which may possibly sit there. | ||
| 705 | * | ||
| 706 | * In other words, let's name the min. I/O unit where the corruption | ||
| 707 | * starts B, and the previous min. I/O unit A. The below code tries to | ||
| 708 | * deal with a situation when half of B contains valid nodes or the end | ||
| 709 | * of a valid node, and the second half of B contains corrupted data or | ||
| 710 | * garbage. This means that UBIFS had been writing to B just before the | ||
| 711 | * power cut happened. I do not know how realistic is this scenario | ||
| 712 | * that half of the min. I/O unit had been written successfully and the | ||
| 713 | * other half not, but this is possible in our 'failure mode emulation' | ||
| 714 | * infrastructure at least. | ||
| 715 | * | ||
| 716 | * So what is the problem, why we need to drop those nodes? Whey can't | ||
| 717 | * we just clean-up the second half of B by putting a padding node | ||
| 718 | * there? We can, and this works fine with one exception which was | ||
| 719 | * reproduced with power cut emulation testing and happens extremely | ||
| 720 | * rarely. The description follows, but it is worth noting that that is | ||
| 721 | * only about the GC head, so we could do this trick only if the bud | ||
| 722 | * belongs to the GC head, but it does not seem to be worth an | ||
| 723 | * additional "if" statement. | ||
| 724 | * | ||
| 725 | * So, imagine the file-system is full, we run GC which is moving valid | ||
| 726 | * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head | ||
| 727 | * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X | ||
| 728 | * and will try to continue. Imagine that LEB X is currently the | ||
| 729 | * dirtiest LEB, and the amount of used space in LEB Y is exactly the | ||
| 730 | * same as amount of free space in LEB X. | ||
| 731 | * | ||
| 732 | * And a power cut happens when nodes are moved from LEB X to LEB Y. We | ||
| 733 | * are here trying to recover LEB Y which is the GC head LEB. We find | ||
| 734 | * the min. I/O unit B as described above. Then we clean-up LEB Y by | ||
| 735 | * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function | ||
| 736 | * fails, because it cannot find a dirty LEB which could be GC'd into | ||
| 737 | * LEB Y! Even LEB X does not match because the amount of valid nodes | ||
| 738 | * there does not fit the free space in LEB Y any more! And this is | ||
| 739 | * because of the padding node which we added to LEB Y. The | ||
| 740 | * user-visible effect of this which I once observed and analysed is | ||
| 741 | * that we cannot mount the file-system with -ENOSPC error. | ||
| 742 | * | ||
| 743 | * So obviously, to make sure that situation does not happen we should | ||
| 744 | * free min. I/O unit B in LEB Y completely and the last used min. I/O | ||
| 745 | * unit in LEB Y should be A. This is basically what the below code | ||
| 746 | * tries to do. | ||
| 747 | */ | ||
| 748 | while (min_io_unit == round_down(offs, c->min_io_size) && | ||
| 749 | min_io_unit != offs && | ||
| 750 | drop_last_node(sleb, &offs, grouped)); | ||
| 751 | |||
| 752 | buf = sbuf + offs; | ||
| 753 | len = c->leb_size - offs; | ||
| 743 | 754 | ||
| 755 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 744 | ubifs_end_scan(c, sleb, lnum, offs); | 756 | ubifs_end_scan(c, sleb, lnum, offs); |
| 745 | 757 | ||
| 746 | if (need_clean) { | 758 | err = fix_unclean_leb(c, sleb, start); |
| 747 | err = fix_unclean_leb(c, sleb, start); | 759 | if (err) |
| 748 | if (err) | 760 | goto error; |
| 749 | goto error; | ||
| 750 | } | ||
| 751 | 761 | ||
| 752 | return sleb; | 762 | return sleb; |
| 753 | 763 | ||
| 764 | corrupted_rescan: | ||
| 765 | /* Re-scan the corrupted data with verbose messages */ | ||
| 766 | dbg_err("corruptio %d", ret); | ||
| 767 | ubifs_scan_a_node(c, buf, len, lnum, offs, 1); | ||
| 754 | corrupted: | 768 | corrupted: |
| 755 | ubifs_scanned_corruption(c, lnum, offs, buf); | 769 | ubifs_scanned_corruption(c, lnum, offs, buf); |
| 756 | err = -EUCLEAN; | 770 | err = -EUCLEAN; |
| @@ -1070,6 +1084,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) | |||
| 1070 | } | 1084 | } |
| 1071 | 1085 | ||
| 1072 | /** | 1086 | /** |
| 1087 | * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. | ||
| 1088 | * @c: UBIFS file-system description object | ||
| 1089 | * | ||
| 1090 | * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty | ||
| 1091 | * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns | ||
| 1092 | * zero in case of success and a negative error code in case of failure. | ||
| 1093 | */ | ||
| 1094 | static int grab_empty_leb(struct ubifs_info *c) | ||
| 1095 | { | ||
| 1096 | int lnum, err; | ||
| 1097 | |||
| 1098 | /* | ||
| 1099 | * Note, it is very important to first search for an empty LEB and then | ||
| 1100 | * run the commit, not vice-versa. The reason is that there might be | ||
| 1101 | * only one empty LEB at the moment, the one which has been the | ||
| 1102 | * @c->gc_lnum just before the power cut happened. During the regular | ||
| 1103 | * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no | ||
| 1104 | * one but GC can grab it. But at this moment this single empty LEB is | ||
| 1105 | * not marked as taken, so if we run commit - what happens? Right, the | ||
| 1106 | * commit will grab it and write the index there. Remember that the | ||
| 1107 | * index always expands as long as there is free space, and it only | ||
| 1108 | * starts consolidating when we run out of space. | ||
| 1109 | * | ||
| 1110 | * IOW, if we run commit now, we might not be able to find a free LEB | ||
| 1111 | * after this. | ||
| 1112 | */ | ||
| 1113 | lnum = ubifs_find_free_leb_for_idx(c); | ||
| 1114 | if (lnum < 0) { | ||
| 1115 | dbg_err("could not find an empty LEB"); | ||
| 1116 | dbg_dump_lprops(c); | ||
| 1117 | dbg_dump_budg(c, &c->bi); | ||
| 1118 | return lnum; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | /* Reset the index flag */ | ||
| 1122 | err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, | ||
| 1123 | LPROPS_INDEX, 0); | ||
| 1124 | if (err) | ||
| 1125 | return err; | ||
| 1126 | |||
| 1127 | c->gc_lnum = lnum; | ||
| 1128 | dbg_rcvry("found empty LEB %d, run commit", lnum); | ||
| 1129 | |||
| 1130 | return ubifs_run_commit(c); | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | /** | ||
| 1073 | * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. | 1134 | * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. |
| 1074 | * @c: UBIFS file-system description object | 1135 | * @c: UBIFS file-system description object |
| 1075 | * | 1136 | * |
| @@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) | |||
| 1091 | { | 1152 | { |
| 1092 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | 1153 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; |
| 1093 | struct ubifs_lprops lp; | 1154 | struct ubifs_lprops lp; |
| 1094 | int lnum, err; | 1155 | int err; |
| 1156 | |||
| 1157 | dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); | ||
| 1095 | 1158 | ||
| 1096 | c->gc_lnum = -1; | 1159 | c->gc_lnum = -1; |
| 1097 | if (wbuf->lnum == -1) { | 1160 | if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) |
| 1098 | dbg_rcvry("no GC head LEB"); | 1161 | return grab_empty_leb(c); |
| 1099 | goto find_free; | 1162 | |
| 1100 | } | ||
| 1101 | /* | ||
| 1102 | * See whether the used space in the dirtiest LEB fits in the GC head | ||
| 1103 | * LEB. | ||
| 1104 | */ | ||
| 1105 | if (wbuf->offs == c->leb_size) { | ||
| 1106 | dbg_rcvry("no room in GC head LEB"); | ||
| 1107 | goto find_free; | ||
| 1108 | } | ||
| 1109 | err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); | 1163 | err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); |
| 1110 | if (err) { | 1164 | if (err) { |
| 1111 | /* | 1165 | if (err != -ENOSPC) |
| 1112 | * There are no dirty or empty LEBs subject to here being | ||
| 1113 | * enough for the index. Try to use | ||
| 1114 | * 'ubifs_find_free_leb_for_idx()', which will return any empty | ||
| 1115 | * LEBs (ignoring index requirements). If the index then | ||
| 1116 | * doesn't have enough LEBs the recovery commit will fail - | ||
| 1117 | * which is the same result anyway i.e. recovery fails. So | ||
| 1118 | * there is no problem ignoring index requirements and just | ||
| 1119 | * grabbing a free LEB since we have already established there | ||
| 1120 | * is not a dirty LEB we could have used instead. | ||
| 1121 | */ | ||
| 1122 | if (err == -ENOSPC) { | ||
| 1123 | dbg_rcvry("could not find a dirty LEB"); | ||
| 1124 | goto find_free; | ||
| 1125 | } | ||
| 1126 | return err; | ||
| 1127 | } | ||
| 1128 | ubifs_assert(!(lp.flags & LPROPS_INDEX)); | ||
| 1129 | lnum = lp.lnum; | ||
| 1130 | if (lp.free + lp.dirty == c->leb_size) { | ||
| 1131 | /* An empty LEB was returned */ | ||
| 1132 | if (lp.free != c->leb_size) { | ||
| 1133 | err = ubifs_change_one_lp(c, lnum, c->leb_size, | ||
| 1134 | 0, 0, 0, 0); | ||
| 1135 | if (err) | ||
| 1136 | return err; | ||
| 1137 | } | ||
| 1138 | err = ubifs_leb_unmap(c, lnum); | ||
| 1139 | if (err) | ||
| 1140 | return err; | 1166 | return err; |
| 1141 | c->gc_lnum = lnum; | 1167 | |
| 1142 | dbg_rcvry("allocated LEB %d for GC", lnum); | 1168 | dbg_rcvry("could not find a dirty LEB"); |
| 1143 | /* Run the commit */ | 1169 | return grab_empty_leb(c); |
| 1144 | dbg_rcvry("committing"); | ||
| 1145 | return ubifs_run_commit(c); | ||
| 1146 | } | ||
| 1147 | /* | ||
| 1148 | * There was no empty LEB so the used space in the dirtiest LEB must fit | ||
| 1149 | * in the GC head LEB. | ||
| 1150 | */ | ||
| 1151 | if (lp.free + lp.dirty < wbuf->offs) { | ||
| 1152 | dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", | ||
| 1153 | lnum, wbuf->lnum, wbuf->offs); | ||
| 1154 | err = ubifs_return_leb(c, lnum); | ||
| 1155 | if (err) | ||
| 1156 | return err; | ||
| 1157 | goto find_free; | ||
| 1158 | } | 1170 | } |
| 1171 | |||
| 1172 | ubifs_assert(!(lp.flags & LPROPS_INDEX)); | ||
| 1173 | ubifs_assert(lp.free + lp.dirty >= wbuf->offs); | ||
| 1174 | |||
| 1159 | /* | 1175 | /* |
| 1160 | * We run the commit before garbage collection otherwise subsequent | 1176 | * We run the commit before garbage collection otherwise subsequent |
| 1161 | * mounts will see the GC and orphan deletion in a different order. | 1177 | * mounts will see the GC and orphan deletion in a different order. |
| @@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) | |||
| 1164 | err = ubifs_run_commit(c); | 1180 | err = ubifs_run_commit(c); |
| 1165 | if (err) | 1181 | if (err) |
| 1166 | return err; | 1182 | return err; |
| 1167 | /* | 1183 | |
| 1168 | * The data in the dirtiest LEB fits in the GC head LEB, so do the GC | 1184 | dbg_rcvry("GC'ing LEB %d", lp.lnum); |
| 1169 | * - use locking to keep 'ubifs_assert()' happy. | ||
| 1170 | */ | ||
| 1171 | dbg_rcvry("GC'ing LEB %d", lnum); | ||
| 1172 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | 1185 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); |
| 1173 | err = ubifs_garbage_collect_leb(c, &lp); | 1186 | err = ubifs_garbage_collect_leb(c, &lp); |
| 1174 | if (err >= 0) { | 1187 | if (err >= 0) { |
| @@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) | |||
| 1184 | err = -EINVAL; | 1197 | err = -EINVAL; |
| 1185 | return err; | 1198 | return err; |
| 1186 | } | 1199 | } |
| 1187 | if (err != LEB_RETAINED) { | 1200 | |
| 1188 | dbg_err("GC returned %d", err); | 1201 | ubifs_assert(err == LEB_RETAINED); |
| 1202 | if (err != LEB_RETAINED) | ||
| 1189 | return -EINVAL; | 1203 | return -EINVAL; |
| 1190 | } | 1204 | |
| 1191 | err = ubifs_leb_unmap(c, c->gc_lnum); | 1205 | err = ubifs_leb_unmap(c, c->gc_lnum); |
| 1192 | if (err) | 1206 | if (err) |
| 1193 | return err; | 1207 | return err; |
| 1194 | dbg_rcvry("allocated LEB %d for GC", lnum); | ||
| 1195 | return 0; | ||
| 1196 | 1208 | ||
| 1197 | find_free: | 1209 | dbg_rcvry("allocated LEB %d for GC", lp.lnum); |
| 1198 | /* | 1210 | return 0; |
| 1199 | * There is no GC head LEB or the free space in the GC head LEB is too | ||
| 1200 | * small, or there are not dirty LEBs. Allocate gc_lnum by calling | ||
| 1201 | * 'ubifs_find_free_leb_for_idx()' so GC is not run. | ||
| 1202 | */ | ||
| 1203 | lnum = ubifs_find_free_leb_for_idx(c); | ||
| 1204 | if (lnum < 0) { | ||
| 1205 | dbg_err("could not find an empty LEB"); | ||
| 1206 | return lnum; | ||
| 1207 | } | ||
| 1208 | /* And reset the index flag */ | ||
| 1209 | err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, | ||
| 1210 | LPROPS_INDEX, 0); | ||
| 1211 | if (err) | ||
| 1212 | return err; | ||
| 1213 | c->gc_lnum = lnum; | ||
| 1214 | dbg_rcvry("allocated LEB %d for GC", lnum); | ||
| 1215 | /* Run the commit */ | ||
| 1216 | dbg_rcvry("committing"); | ||
| 1217 | return ubifs_run_commit(c); | ||
| 1218 | } | 1211 | } |
| 1219 | 1212 | ||
| 1220 | /** | 1213 | /** |
| @@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) | |||
| 1456 | err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); | 1449 | err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); |
| 1457 | if (err) | 1450 | if (err) |
| 1458 | goto out; | 1451 | goto out; |
| 1459 | dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", | 1452 | dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", |
| 1460 | (unsigned long)e->inum, lnum, offs, i_size, e->d_size); | 1453 | (unsigned long)e->inum, lnum, offs, i_size, e->d_size); |
| 1461 | return 0; | 1454 | return 0; |
| 1462 | 1455 | ||
| @@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c) | |||
| 1505 | e->i_size = le64_to_cpu(ino->size); | 1498 | e->i_size = le64_to_cpu(ino->size); |
| 1506 | } | 1499 | } |
| 1507 | } | 1500 | } |
| 1501 | |||
| 1508 | if (e->exists && e->i_size < e->d_size) { | 1502 | if (e->exists && e->i_size < e->d_size) { |
| 1509 | if (!e->inode && c->ro_mount) { | 1503 | if (c->ro_mount) { |
| 1510 | /* Fix the inode size and pin it in memory */ | 1504 | /* Fix the inode size and pin it in memory */ |
| 1511 | struct inode *inode; | 1505 | struct inode *inode; |
| 1506 | struct ubifs_inode *ui; | ||
| 1507 | |||
| 1508 | ubifs_assert(!e->inode); | ||
| 1512 | 1509 | ||
| 1513 | inode = ubifs_iget(c->vfs_sb, e->inum); | 1510 | inode = ubifs_iget(c->vfs_sb, e->inum); |
| 1514 | if (IS_ERR(inode)) | 1511 | if (IS_ERR(inode)) |
| 1515 | return PTR_ERR(inode); | 1512 | return PTR_ERR(inode); |
| 1513 | |||
| 1514 | ui = ubifs_inode(inode); | ||
| 1516 | if (inode->i_size < e->d_size) { | 1515 | if (inode->i_size < e->d_size) { |
| 1517 | dbg_rcvry("ino %lu size %lld -> %lld", | 1516 | dbg_rcvry("ino %lu size %lld -> %lld", |
| 1518 | (unsigned long)e->inum, | 1517 | (unsigned long)e->inum, |
| 1519 | e->d_size, inode->i_size); | 1518 | inode->i_size, e->d_size); |
| 1520 | inode->i_size = e->d_size; | 1519 | inode->i_size = e->d_size; |
| 1521 | ubifs_inode(inode)->ui_size = e->d_size; | 1520 | ui->ui_size = e->d_size; |
| 1521 | ui->synced_i_size = e->d_size; | ||
| 1522 | e->inode = inode; | 1522 | e->inode = inode; |
| 1523 | this = rb_next(this); | 1523 | this = rb_next(this); |
| 1524 | continue; | 1524 | continue; |
| @@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c) | |||
| 1533 | iput(e->inode); | 1533 | iput(e->inode); |
| 1534 | } | 1534 | } |
| 1535 | } | 1535 | } |
| 1536 | |||
| 1536 | this = rb_next(this); | 1537 | this = rb_next(this); |
| 1537 | rb_erase(&e->rb, &c->size_tree); | 1538 | rb_erase(&e->rb, &c->size_tree); |
| 1538 | kfree(e); | 1539 | kfree(e); |
| 1539 | } | 1540 | } |
| 1541 | |||
| 1540 | return 0; | 1542 | return 0; |
| 1541 | } | 1543 | } |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index d3d6d365bfc..6617280d167 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
| @@ -33,44 +33,32 @@ | |||
| 33 | */ | 33 | */ |
| 34 | 34 | ||
| 35 | #include "ubifs.h" | 35 | #include "ubifs.h" |
| 36 | 36 | #include <linux/list_sort.h> | |
| 37 | /* | ||
| 38 | * Replay flags. | ||
| 39 | * | ||
| 40 | * REPLAY_DELETION: node was deleted | ||
| 41 | * REPLAY_REF: node is a reference node | ||
| 42 | */ | ||
| 43 | enum { | ||
| 44 | REPLAY_DELETION = 1, | ||
| 45 | REPLAY_REF = 2, | ||
| 46 | }; | ||
| 47 | 37 | ||
| 48 | /** | 38 | /** |
| 49 | * struct replay_entry - replay tree entry. | 39 | * struct replay_entry - replay list entry. |
| 50 | * @lnum: logical eraseblock number of the node | 40 | * @lnum: logical eraseblock number of the node |
| 51 | * @offs: node offset | 41 | * @offs: node offset |
| 52 | * @len: node length | 42 | * @len: node length |
| 43 | * @deletion: non-zero if this entry corresponds to a node deletion | ||
| 53 | * @sqnum: node sequence number | 44 | * @sqnum: node sequence number |
| 54 | * @flags: replay flags | 45 | * @list: links the replay list |
| 55 | * @rb: links the replay tree | ||
| 56 | * @key: node key | 46 | * @key: node key |
| 57 | * @nm: directory entry name | 47 | * @nm: directory entry name |
| 58 | * @old_size: truncation old size | 48 | * @old_size: truncation old size |
| 59 | * @new_size: truncation new size | 49 | * @new_size: truncation new size |
| 60 | * @free: amount of free space in a bud | ||
| 61 | * @dirty: amount of dirty space in a bud from padding and deletion nodes | ||
| 62 | * @jhead: journal head number of the bud | ||
| 63 | * | 50 | * |
| 64 | * UBIFS journal replay must compare node sequence numbers, which means it must | 51 | * The replay process first scans all buds and builds the replay list, then |
| 65 | * build a tree of node information to insert into the TNC. | 52 | * sorts the replay list in nodes sequence number order, and then inserts all |
| 53 | * the replay entries to the TNC. | ||
| 66 | */ | 54 | */ |
| 67 | struct replay_entry { | 55 | struct replay_entry { |
| 68 | int lnum; | 56 | int lnum; |
| 69 | int offs; | 57 | int offs; |
| 70 | int len; | 58 | int len; |
| 59 | unsigned int deletion:1; | ||
| 71 | unsigned long long sqnum; | 60 | unsigned long long sqnum; |
| 72 | int flags; | 61 | struct list_head list; |
| 73 | struct rb_node rb; | ||
| 74 | union ubifs_key key; | 62 | union ubifs_key key; |
| 75 | union { | 63 | union { |
| 76 | struct qstr nm; | 64 | struct qstr nm; |
| @@ -78,11 +66,6 @@ struct replay_entry { | |||
| 78 | loff_t old_size; | 66 | loff_t old_size; |
| 79 | loff_t new_size; | 67 | loff_t new_size; |
| 80 | }; | 68 | }; |
| 81 | struct { | ||
| 82 | int free; | ||
| 83 | int dirty; | ||
| 84 | int jhead; | ||
| 85 | }; | ||
| 86 | }; | 69 | }; |
| 87 | }; | 70 | }; |
| 88 | 71 | ||
| @@ -90,57 +73,64 @@ struct replay_entry { | |||
| 90 | * struct bud_entry - entry in the list of buds to replay. | 73 | * struct bud_entry - entry in the list of buds to replay. |
| 91 | * @list: next bud in the list | 74 | * @list: next bud in the list |
| 92 | * @bud: bud description object | 75 | * @bud: bud description object |
| 93 | * @free: free bytes in the bud | ||
| 94 | * @sqnum: reference node sequence number | 76 | * @sqnum: reference node sequence number |
| 77 | * @free: free bytes in the bud | ||
| 78 | * @dirty: dirty bytes in the bud | ||
| 95 | */ | 79 | */ |
| 96 | struct bud_entry { | 80 | struct bud_entry { |
| 97 | struct list_head list; | 81 | struct list_head list; |
| 98 | struct ubifs_bud *bud; | 82 | struct ubifs_bud *bud; |
| 99 | int free; | ||
| 100 | unsigned long long sqnum; | 83 | unsigned long long sqnum; |
| 84 | int free; | ||
| 85 | int dirty; | ||
| 101 | }; | 86 | }; |
| 102 | 87 | ||
| 103 | /** | 88 | /** |
| 104 | * set_bud_lprops - set free and dirty space used by a bud. | 89 | * set_bud_lprops - set free and dirty space used by a bud. |
| 105 | * @c: UBIFS file-system description object | 90 | * @c: UBIFS file-system description object |
| 106 | * @r: replay entry of bud | 91 | * @b: bud entry which describes the bud |
| 92 | * | ||
| 93 | * This function makes sure the LEB properties of bud @b are set correctly | ||
| 94 | * after the replay. Returns zero in case of success and a negative error code | ||
| 95 | * in case of failure. | ||
| 107 | */ | 96 | */ |
| 108 | static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | 97 | static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) |
| 109 | { | 98 | { |
| 110 | const struct ubifs_lprops *lp; | 99 | const struct ubifs_lprops *lp; |
| 111 | int err = 0, dirty; | 100 | int err = 0, dirty; |
| 112 | 101 | ||
| 113 | ubifs_get_lprops(c); | 102 | ubifs_get_lprops(c); |
| 114 | 103 | ||
| 115 | lp = ubifs_lpt_lookup_dirty(c, r->lnum); | 104 | lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); |
| 116 | if (IS_ERR(lp)) { | 105 | if (IS_ERR(lp)) { |
| 117 | err = PTR_ERR(lp); | 106 | err = PTR_ERR(lp); |
| 118 | goto out; | 107 | goto out; |
| 119 | } | 108 | } |
| 120 | 109 | ||
| 121 | dirty = lp->dirty; | 110 | dirty = lp->dirty; |
| 122 | if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { | 111 | if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { |
| 123 | /* | 112 | /* |
| 124 | * The LEB was added to the journal with a starting offset of | 113 | * The LEB was added to the journal with a starting offset of |
| 125 | * zero which means the LEB must have been empty. The LEB | 114 | * zero which means the LEB must have been empty. The LEB |
| 126 | * property values should be lp->free == c->leb_size and | 115 | * property values should be @lp->free == @c->leb_size and |
| 127 | * lp->dirty == 0, but that is not the case. The reason is that | 116 | * @lp->dirty == 0, but that is not the case. The reason is that |
| 128 | * the LEB was garbage collected. The garbage collector resets | 117 | * the LEB had been garbage collected before it became the bud, |
| 129 | * the free and dirty space without recording it anywhere except | 118 | * and there was not commit inbetween. The garbage collector |
| 130 | * lprops, so if there is not a commit then lprops does not have | 119 | * resets the free and dirty space without recording it |
| 131 | * that information next time the file system is mounted. | 120 | * anywhere except lprops, so if there was no commit then |
| 121 | * lprops does not have that information. | ||
| 132 | * | 122 | * |
| 133 | * We do not need to adjust free space because the scan has told | 123 | * We do not need to adjust free space because the scan has told |
| 134 | * us the exact value which is recorded in the replay entry as | 124 | * us the exact value which is recorded in the replay entry as |
| 135 | * r->free. | 125 | * @b->free. |
| 136 | * | 126 | * |
| 137 | * However we do need to subtract from the dirty space the | 127 | * However we do need to subtract from the dirty space the |
| 138 | * amount of space that the garbage collector reclaimed, which | 128 | * amount of space that the garbage collector reclaimed, which |
| 139 | * is the whole LEB minus the amount of space that was free. | 129 | * is the whole LEB minus the amount of space that was free. |
| 140 | */ | 130 | */ |
| 141 | dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, | 131 | dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, |
| 142 | lp->free, lp->dirty); | 132 | lp->free, lp->dirty); |
| 143 | dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, | 133 | dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, |
| 144 | lp->free, lp->dirty); | 134 | lp->free, lp->dirty); |
| 145 | dirty -= c->leb_size - lp->free; | 135 | dirty -= c->leb_size - lp->free; |
| 146 | /* | 136 | /* |
| @@ -152,10 +142,10 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | |||
| 152 | */ | 142 | */ |
| 153 | if (dirty != 0) | 143 | if (dirty != 0) |
| 154 | dbg_msg("LEB %d lp: %d free %d dirty " | 144 | dbg_msg("LEB %d lp: %d free %d dirty " |
| 155 | "replay: %d free %d dirty", r->lnum, lp->free, | 145 | "replay: %d free %d dirty", b->bud->lnum, |
| 156 | lp->dirty, r->free, r->dirty); | 146 | lp->free, lp->dirty, b->free, b->dirty); |
| 157 | } | 147 | } |
| 158 | lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, | 148 | lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, |
| 159 | lp->flags | LPROPS_TAKEN, 0); | 149 | lp->flags | LPROPS_TAKEN, 0); |
| 160 | if (IS_ERR(lp)) { | 150 | if (IS_ERR(lp)) { |
| 161 | err = PTR_ERR(lp); | 151 | err = PTR_ERR(lp); |
| @@ -163,8 +153,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | |||
| 163 | } | 153 | } |
| 164 | 154 | ||
| 165 | /* Make sure the journal head points to the latest bud */ | 155 | /* Make sure the journal head points to the latest bud */ |
| 166 | err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum, | 156 | err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, |
| 167 | c->leb_size - r->free, UBI_SHORTTERM); | 157 | b->bud->lnum, c->leb_size - b->free, |
| 158 | UBI_SHORTTERM); | ||
| 168 | 159 | ||
| 169 | out: | 160 | out: |
| 170 | ubifs_release_lprops(c); | 161 | ubifs_release_lprops(c); |
| @@ -172,6 +163,27 @@ out: | |||
| 172 | } | 163 | } |
| 173 | 164 | ||
| 174 | /** | 165 | /** |
| 166 | * set_buds_lprops - set free and dirty space for all replayed buds. | ||
| 167 | * @c: UBIFS file-system description object | ||
| 168 | * | ||
| 169 | * This function sets LEB properties for all replayed buds. Returns zero in | ||
| 170 | * case of success and a negative error code in case of failure. | ||
| 171 | */ | ||
| 172 | static int set_buds_lprops(struct ubifs_info *c) | ||
| 173 | { | ||
| 174 | struct bud_entry *b; | ||
| 175 | int err; | ||
| 176 | |||
| 177 | list_for_each_entry(b, &c->replay_buds, list) { | ||
| 178 | err = set_bud_lprops(c, b); | ||
| 179 | if (err) | ||
| 180 | return err; | ||
| 181 | } | ||
| 182 | |||
| 183 | return 0; | ||
| 184 | } | ||
| 185 | |||
| 186 | /** | ||
| 175 | * trun_remove_range - apply a replay entry for a truncation to the TNC. | 187 | * trun_remove_range - apply a replay entry for a truncation to the TNC. |
| 176 | * @c: UBIFS file-system description object | 188 | * @c: UBIFS file-system description object |
| 177 | * @r: replay entry of truncation | 189 | * @r: replay entry of truncation |
| @@ -207,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) | |||
| 207 | */ | 219 | */ |
| 208 | static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | 220 | static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) |
| 209 | { | 221 | { |
| 210 | int err, deletion = ((r->flags & REPLAY_DELETION) != 0); | 222 | int err; |
| 211 | 223 | ||
| 212 | dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, | 224 | dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum, |
| 213 | r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); | 225 | r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key)); |
| 214 | 226 | ||
| 215 | /* Set c->replay_sqnum to help deal with dangling branches. */ | 227 | /* Set c->replay_sqnum to help deal with dangling branches. */ |
| 216 | c->replay_sqnum = r->sqnum; | 228 | c->replay_sqnum = r->sqnum; |
| 217 | 229 | ||
| 218 | if (r->flags & REPLAY_REF) | 230 | if (is_hash_key(c, &r->key)) { |
| 219 | err = set_bud_lprops(c, r); | 231 | if (r->deletion) |
| 220 | else if (is_hash_key(c, &r->key)) { | ||
| 221 | if (deletion) | ||
| 222 | err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); | 232 | err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); |
| 223 | else | 233 | else |
| 224 | err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, | 234 | err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, |
| 225 | r->len, &r->nm); | 235 | r->len, &r->nm); |
| 226 | } else { | 236 | } else { |
| 227 | if (deletion) | 237 | if (r->deletion) |
| 228 | switch (key_type(c, &r->key)) { | 238 | switch (key_type(c, &r->key)) { |
| 229 | case UBIFS_INO_KEY: | 239 | case UBIFS_INO_KEY: |
| 230 | { | 240 | { |
| @@ -247,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | |||
| 247 | return err; | 257 | return err; |
| 248 | 258 | ||
| 249 | if (c->need_recovery) | 259 | if (c->need_recovery) |
| 250 | err = ubifs_recover_size_accum(c, &r->key, deletion, | 260 | err = ubifs_recover_size_accum(c, &r->key, r->deletion, |
| 251 | r->new_size); | 261 | r->new_size); |
| 252 | } | 262 | } |
| 253 | 263 | ||
| @@ -255,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | |||
| 255 | } | 265 | } |
| 256 | 266 | ||
| 257 | /** | 267 | /** |
| 258 | * destroy_replay_tree - destroy the replay. | 268 | * replay_entries_cmp - compare 2 replay entries. |
| 259 | * @c: UBIFS file-system description object | 269 | * @priv: UBIFS file-system description object |
| 270 | * @a: first replay entry | ||
| 271 | * @a: second replay entry | ||
| 260 | * | 272 | * |
| 261 | * Destroy the replay tree. | 273 | * This is a comparios function for 'list_sort()' which compares 2 replay |
| 274 | * entries @a and @b by comparing their sequence numer. Returns %1 if @a has | ||
| 275 | * greater sequence number and %-1 otherwise. | ||
| 262 | */ | 276 | */ |
| 263 | static void destroy_replay_tree(struct ubifs_info *c) | 277 | static int replay_entries_cmp(void *priv, struct list_head *a, |
| 278 | struct list_head *b) | ||
| 264 | { | 279 | { |
| 265 | struct rb_node *this = c->replay_tree.rb_node; | 280 | struct replay_entry *ra, *rb; |
| 266 | struct replay_entry *r; | 281 | |
| 267 | 282 | cond_resched(); | |
| 268 | while (this) { | 283 | if (a == b) |
| 269 | if (this->rb_left) { | 284 | return 0; |
| 270 | this = this->rb_left; | 285 | |
| 271 | continue; | 286 | ra = list_entry(a, struct replay_entry, list); |
| 272 | } else if (this->rb_right) { | 287 | rb = list_entry(b, struct replay_entry, list); |
| 273 | this = this->rb_right; | 288 | ubifs_assert(ra->sqnum != rb->sqnum); |
| 274 | continue; | 289 | if (ra->sqnum > rb->sqnum) |
| 275 | } | 290 | return 1; |
| 276 | r = rb_entry(this, struct replay_entry, rb); | 291 | return -1; |
| 277 | this = rb_parent(this); | ||
| 278 | if (this) { | ||
| 279 | if (this->rb_left == &r->rb) | ||
| 280 | this->rb_left = NULL; | ||
| 281 | else | ||
| 282 | this->rb_right = NULL; | ||
| 283 | } | ||
| 284 | if (is_hash_key(c, &r->key)) | ||
| 285 | kfree(r->nm.name); | ||
| 286 | kfree(r); | ||
| 287 | } | ||
| 288 | c->replay_tree = RB_ROOT; | ||
| 289 | } | 292 | } |
| 290 | 293 | ||
| 291 | /** | 294 | /** |
| 292 | * apply_replay_tree - apply the replay tree to the TNC. | 295 | * apply_replay_list - apply the replay list to the TNC. |
| 293 | * @c: UBIFS file-system description object | 296 | * @c: UBIFS file-system description object |
| 294 | * | 297 | * |
| 295 | * Apply the replay tree. | 298 | * Apply all entries in the replay list to the TNC. Returns zero in case of |
| 296 | * Returns zero in case of success and a negative error code in case of | 299 | * success and a negative error code in case of failure. |
| 297 | * failure. | ||
| 298 | */ | 300 | */ |
| 299 | static int apply_replay_tree(struct ubifs_info *c) | 301 | static int apply_replay_list(struct ubifs_info *c) |
| 300 | { | 302 | { |
| 301 | struct rb_node *this = rb_first(&c->replay_tree); | 303 | struct replay_entry *r; |
| 304 | int err; | ||
| 302 | 305 | ||
| 303 | while (this) { | 306 | list_sort(c, &c->replay_list, &replay_entries_cmp); |
| 304 | struct replay_entry *r; | ||
| 305 | int err; | ||
| 306 | 307 | ||
| 308 | list_for_each_entry(r, &c->replay_list, list) { | ||
| 307 | cond_resched(); | 309 | cond_resched(); |
| 308 | 310 | ||
| 309 | r = rb_entry(this, struct replay_entry, rb); | ||
| 310 | err = apply_replay_entry(c, r); | 311 | err = apply_replay_entry(c, r); |
| 311 | if (err) | 312 | if (err) |
| 312 | return err; | 313 | return err; |
| 313 | this = rb_next(this); | ||
| 314 | } | 314 | } |
| 315 | |||
| 315 | return 0; | 316 | return 0; |
| 316 | } | 317 | } |
| 317 | 318 | ||
| 318 | /** | 319 | /** |
| 319 | * insert_node - insert a node to the replay tree. | 320 | * destroy_replay_list - destroy the replay. |
| 321 | * @c: UBIFS file-system description object | ||
| 322 | * | ||
| 323 | * Destroy the replay list. | ||
| 324 | */ | ||
| 325 | static void destroy_replay_list(struct ubifs_info *c) | ||
| 326 | { | ||
| 327 | struct replay_entry *r, *tmp; | ||
| 328 | |||
| 329 | list_for_each_entry_safe(r, tmp, &c->replay_list, list) { | ||
| 330 | if (is_hash_key(c, &r->key)) | ||
| 331 | kfree(r->nm.name); | ||
| 332 | list_del(&r->list); | ||
| 333 | kfree(r); | ||
| 334 | } | ||
| 335 | } | ||
| 336 | |||
| 337 | /** | ||
| 338 | * insert_node - insert a node to the replay list | ||
| 320 | * @c: UBIFS file-system description object | 339 | * @c: UBIFS file-system description object |
| 321 | * @lnum: node logical eraseblock number | 340 | * @lnum: node logical eraseblock number |
| 322 | * @offs: node offset | 341 | * @offs: node offset |
| @@ -328,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c) | |||
| 328 | * @old_size: truncation old size | 347 | * @old_size: truncation old size |
| 329 | * @new_size: truncation new size | 348 | * @new_size: truncation new size |
| 330 | * | 349 | * |
| 331 | * This function inserts a scanned non-direntry node to the replay tree. The | 350 | * This function inserts a scanned non-direntry node to the replay list. The |
| 332 | * replay tree is an RB-tree containing @struct replay_entry elements which are | 351 | * replay list contains @struct replay_entry elements, and we sort this list in |
| 333 | * indexed by the sequence number. The replay tree is applied at the very end | 352 | * sequence number order before applying it. The replay list is applied at the |
| 334 | * of the replay process. Since the tree is sorted in sequence number order, | 353 | * very end of the replay process. Since the list is sorted in sequence number |
| 335 | * the older modifications are applied first. This function returns zero in | 354 | * order, the older modifications are applied first. This function returns zero |
| 336 | * case of success and a negative error code in case of failure. | 355 | * in case of success and a negative error code in case of failure. |
| 337 | */ | 356 | */ |
| 338 | static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | 357 | static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, |
| 339 | union ubifs_key *key, unsigned long long sqnum, | 358 | union ubifs_key *key, unsigned long long sqnum, |
| 340 | int deletion, int *used, loff_t old_size, | 359 | int deletion, int *used, loff_t old_size, |
| 341 | loff_t new_size) | 360 | loff_t new_size) |
| 342 | { | 361 | { |
| 343 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 344 | struct replay_entry *r; | 362 | struct replay_entry *r; |
| 345 | 363 | ||
| 364 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 365 | |||
| 346 | if (key_inum(c, key) >= c->highest_inum) | 366 | if (key_inum(c, key) >= c->highest_inum) |
| 347 | c->highest_inum = key_inum(c, key); | 367 | c->highest_inum = key_inum(c, key); |
| 348 | 368 | ||
| 349 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 350 | while (*p) { | ||
| 351 | parent = *p; | ||
| 352 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 353 | if (sqnum < r->sqnum) { | ||
| 354 | p = &(*p)->rb_left; | ||
| 355 | continue; | ||
| 356 | } else if (sqnum > r->sqnum) { | ||
| 357 | p = &(*p)->rb_right; | ||
| 358 | continue; | ||
| 359 | } | ||
| 360 | ubifs_err("duplicate sqnum in replay"); | ||
| 361 | return -EINVAL; | ||
| 362 | } | ||
| 363 | |||
| 364 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | 369 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); |
| 365 | if (!r) | 370 | if (!r) |
| 366 | return -ENOMEM; | 371 | return -ENOMEM; |
| @@ -370,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | |||
| 370 | r->lnum = lnum; | 375 | r->lnum = lnum; |
| 371 | r->offs = offs; | 376 | r->offs = offs; |
| 372 | r->len = len; | 377 | r->len = len; |
| 378 | r->deletion = !!deletion; | ||
| 373 | r->sqnum = sqnum; | 379 | r->sqnum = sqnum; |
| 374 | r->flags = (deletion ? REPLAY_DELETION : 0); | 380 | key_copy(c, key, &r->key); |
| 375 | r->old_size = old_size; | 381 | r->old_size = old_size; |
| 376 | r->new_size = new_size; | 382 | r->new_size = new_size; |
| 377 | key_copy(c, key, &r->key); | ||
| 378 | 383 | ||
| 379 | rb_link_node(&r->rb, parent, p); | 384 | list_add_tail(&r->list, &c->replay_list); |
| 380 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 381 | return 0; | 385 | return 0; |
| 382 | } | 386 | } |
| 383 | 387 | ||
| 384 | /** | 388 | /** |
| 385 | * insert_dent - insert a directory entry node into the replay tree. | 389 | * insert_dent - insert a directory entry node into the replay list. |
| 386 | * @c: UBIFS file-system description object | 390 | * @c: UBIFS file-system description object |
| 387 | * @lnum: node logical eraseblock number | 391 | * @lnum: node logical eraseblock number |
| 388 | * @offs: node offset | 392 | * @offs: node offset |
| @@ -394,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | |||
| 394 | * @deletion: non-zero if this is a deletion | 398 | * @deletion: non-zero if this is a deletion |
| 395 | * @used: number of bytes in use in a LEB | 399 | * @used: number of bytes in use in a LEB |
| 396 | * | 400 | * |
| 397 | * This function inserts a scanned directory entry node to the replay tree. | 401 | * This function inserts a scanned directory entry node or an extended |
| 398 | * Returns zero in case of success and a negative error code in case of | 402 | * attribute entry to the replay list. Returns zero in case of success and a |
| 399 | * failure. | 403 | * negative error code in case of failure. |
| 400 | * | ||
| 401 | * This function is also used for extended attribute entries because they are | ||
| 402 | * implemented as directory entry nodes. | ||
| 403 | */ | 404 | */ |
| 404 | static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | 405 | static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, |
| 405 | union ubifs_key *key, const char *name, int nlen, | 406 | union ubifs_key *key, const char *name, int nlen, |
| 406 | unsigned long long sqnum, int deletion, int *used) | 407 | unsigned long long sqnum, int deletion, int *used) |
| 407 | { | 408 | { |
| 408 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 409 | struct replay_entry *r; | 409 | struct replay_entry *r; |
| 410 | char *nbuf; | 410 | char *nbuf; |
| 411 | 411 | ||
| 412 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 412 | if (key_inum(c, key) >= c->highest_inum) | 413 | if (key_inum(c, key) >= c->highest_inum) |
| 413 | c->highest_inum = key_inum(c, key); | 414 | c->highest_inum = key_inum(c, key); |
| 414 | 415 | ||
| 415 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 416 | while (*p) { | ||
| 417 | parent = *p; | ||
| 418 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 419 | if (sqnum < r->sqnum) { | ||
| 420 | p = &(*p)->rb_left; | ||
| 421 | continue; | ||
| 422 | } | ||
| 423 | if (sqnum > r->sqnum) { | ||
| 424 | p = &(*p)->rb_right; | ||
| 425 | continue; | ||
| 426 | } | ||
| 427 | ubifs_err("duplicate sqnum in replay"); | ||
| 428 | return -EINVAL; | ||
| 429 | } | ||
| 430 | |||
| 431 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | 416 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); |
| 432 | if (!r) | 417 | if (!r) |
| 433 | return -ENOMEM; | 418 | return -ENOMEM; |
| 419 | |||
| 434 | nbuf = kmalloc(nlen + 1, GFP_KERNEL); | 420 | nbuf = kmalloc(nlen + 1, GFP_KERNEL); |
| 435 | if (!nbuf) { | 421 | if (!nbuf) { |
| 436 | kfree(r); | 422 | kfree(r); |
| @@ -442,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | |||
| 442 | r->lnum = lnum; | 428 | r->lnum = lnum; |
| 443 | r->offs = offs; | 429 | r->offs = offs; |
| 444 | r->len = len; | 430 | r->len = len; |
| 431 | r->deletion = !!deletion; | ||
| 445 | r->sqnum = sqnum; | 432 | r->sqnum = sqnum; |
| 433 | key_copy(c, key, &r->key); | ||
| 446 | r->nm.len = nlen; | 434 | r->nm.len = nlen; |
| 447 | memcpy(nbuf, name, nlen); | 435 | memcpy(nbuf, name, nlen); |
| 448 | nbuf[nlen] = '\0'; | 436 | nbuf[nlen] = '\0'; |
| 449 | r->nm.name = nbuf; | 437 | r->nm.name = nbuf; |
| 450 | r->flags = (deletion ? REPLAY_DELETION : 0); | ||
| 451 | key_copy(c, key, &r->key); | ||
| 452 | 438 | ||
| 453 | ubifs_assert(!*p); | 439 | list_add_tail(&r->list, &c->replay_list); |
| 454 | rb_link_node(&r->rb, parent, p); | ||
| 455 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 456 | return 0; | 440 | return 0; |
| 457 | } | 441 | } |
| 458 | 442 | ||
| @@ -489,29 +473,92 @@ int ubifs_validate_entry(struct ubifs_info *c, | |||
| 489 | } | 473 | } |
| 490 | 474 | ||
| 491 | /** | 475 | /** |
| 476 | * is_last_bud - check if the bud is the last in the journal head. | ||
| 477 | * @c: UBIFS file-system description object | ||
| 478 | * @bud: bud description object | ||
| 479 | * | ||
| 480 | * This function checks if bud @bud is the last bud in its journal head. This | ||
| 481 | * information is then used by 'replay_bud()' to decide whether the bud can | ||
| 482 | * have corruptions or not. Indeed, only last buds can be corrupted by power | ||
| 483 | * cuts. Returns %1 if this is the last bud, and %0 if not. | ||
| 484 | */ | ||
| 485 | static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) | ||
| 486 | { | ||
| 487 | struct ubifs_jhead *jh = &c->jheads[bud->jhead]; | ||
| 488 | struct ubifs_bud *next; | ||
| 489 | uint32_t data; | ||
| 490 | int err; | ||
| 491 | |||
| 492 | if (list_is_last(&bud->list, &jh->buds_list)) | ||
| 493 | return 1; | ||
| 494 | |||
| 495 | /* | ||
| 496 | * The following is a quirk to make sure we work correctly with UBIFS | ||
| 497 | * images used with older UBIFS. | ||
| 498 | * | ||
| 499 | * Normally, the last bud will be the last in the journal head's list | ||
| 500 | * of bud. However, there is one exception if the UBIFS image belongs | ||
| 501 | * to older UBIFS. This is fairly unlikely: one would need to use old | ||
| 502 | * UBIFS, then have a power cut exactly at the right point, and then | ||
| 503 | * try to mount this image with new UBIFS. | ||
| 504 | * | ||
| 505 | * The exception is: it is possible to have 2 buds A and B, A goes | ||
| 506 | * before B, and B is the last, bud B is contains no data, and bud A is | ||
| 507 | * corrupted at the end. The reason is that in older versions when the | ||
| 508 | * journal code switched the next bud (from A to B), it first added a | ||
| 509 | * log reference node for the new bud (B), and only after this it | ||
| 510 | * synchronized the write-buffer of current bud (A). But later this was | ||
| 511 | * changed and UBIFS started to always synchronize the write-buffer of | ||
| 512 | * the bud (A) before writing the log reference for the new bud (B). | ||
| 513 | * | ||
| 514 | * But because older UBIFS always synchronized A's write-buffer before | ||
| 515 | * writing to B, we can recognize this exceptional situation but | ||
| 516 | * checking the contents of bud B - if it is empty, then A can be | ||
| 517 | * treated as the last and we can recover it. | ||
| 518 | * | ||
| 519 | * TODO: remove this piece of code in a couple of years (today it is | ||
| 520 | * 16.05.2011). | ||
| 521 | */ | ||
| 522 | next = list_entry(bud->list.next, struct ubifs_bud, list); | ||
| 523 | if (!list_is_last(&next->list, &jh->buds_list)) | ||
| 524 | return 0; | ||
| 525 | |||
| 526 | err = ubi_read(c->ubi, next->lnum, (char *)&data, | ||
| 527 | next->start, 4); | ||
| 528 | if (err) | ||
| 529 | return 0; | ||
| 530 | |||
| 531 | return data == 0xFFFFFFFF; | ||
| 532 | } | ||
| 533 | |||
| 534 | /** | ||
| 492 | * replay_bud - replay a bud logical eraseblock. | 535 | * replay_bud - replay a bud logical eraseblock. |
| 493 | * @c: UBIFS file-system description object | 536 | * @c: UBIFS file-system description object |
| 494 | * @lnum: bud logical eraseblock number to replay | 537 | * @b: bud entry which describes the bud |
| 495 | * @offs: bud start offset | ||
| 496 | * @jhead: journal head to which this bud belongs | ||
| 497 | * @free: amount of free space in the bud is returned here | ||
| 498 | * @dirty: amount of dirty space from padding and deletion nodes is returned | ||
| 499 | * here | ||
| 500 | * | 538 | * |
| 501 | * This function returns zero in case of success and a negative error code in | 539 | * This function replays bud @bud, recovers it if needed, and adds all nodes |
| 502 | * case of failure. | 540 | * from this bud to the replay list. Returns zero in case of success and a |
| 541 | * negative error code in case of failure. | ||
| 503 | */ | 542 | */ |
| 504 | static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | 543 | static int replay_bud(struct ubifs_info *c, struct bud_entry *b) |
| 505 | int *free, int *dirty) | ||
| 506 | { | 544 | { |
| 507 | int err = 0, used = 0; | 545 | int is_last = is_last_bud(c, b->bud); |
| 546 | int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; | ||
| 508 | struct ubifs_scan_leb *sleb; | 547 | struct ubifs_scan_leb *sleb; |
| 509 | struct ubifs_scan_node *snod; | 548 | struct ubifs_scan_node *snod; |
| 510 | struct ubifs_bud *bud; | ||
| 511 | 549 | ||
| 512 | dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); | 550 | dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", |
| 513 | if (c->need_recovery) | 551 | lnum, b->bud->jhead, offs, is_last); |
| 514 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); | 552 | |
| 553 | if (c->need_recovery && is_last) | ||
| 554 | /* | ||
| 555 | * Recover only last LEBs in the journal heads, because power | ||
| 556 | * cuts may cause corruptions only in these LEBs, because only | ||
| 557 | * these LEBs could possibly be written to at the power cut | ||
| 558 | * time. | ||
| 559 | */ | ||
| 560 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, | ||
| 561 | b->bud->jhead != GCHD); | ||
| 515 | else | 562 | else |
| 516 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); | 563 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); |
| 517 | if (IS_ERR(sleb)) | 564 | if (IS_ERR(sleb)) |
| @@ -627,15 +674,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | |||
| 627 | goto out; | 674 | goto out; |
| 628 | } | 675 | } |
| 629 | 676 | ||
| 630 | bud = ubifs_search_bud(c, lnum); | 677 | ubifs_assert(ubifs_search_bud(c, lnum)); |
| 631 | if (!bud) | ||
| 632 | BUG(); | ||
| 633 | |||
| 634 | ubifs_assert(sleb->endpt - offs >= used); | 678 | ubifs_assert(sleb->endpt - offs >= used); |
| 635 | ubifs_assert(sleb->endpt % c->min_io_size == 0); | 679 | ubifs_assert(sleb->endpt % c->min_io_size == 0); |
| 636 | 680 | ||
| 637 | *dirty = sleb->endpt - offs - used; | 681 | b->dirty = sleb->endpt - offs - used; |
| 638 | *free = c->leb_size - sleb->endpt; | 682 | b->free = c->leb_size - sleb->endpt; |
| 683 | dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); | ||
| 639 | 684 | ||
| 640 | out: | 685 | out: |
| 641 | ubifs_scan_destroy(sleb); | 686 | ubifs_scan_destroy(sleb); |
| @@ -649,58 +694,6 @@ out_dump: | |||
| 649 | } | 694 | } |
| 650 | 695 | ||
| 651 | /** | 696 | /** |
| 652 | * insert_ref_node - insert a reference node to the replay tree. | ||
| 653 | * @c: UBIFS file-system description object | ||
| 654 | * @lnum: node logical eraseblock number | ||
| 655 | * @offs: node offset | ||
| 656 | * @sqnum: sequence number | ||
| 657 | * @free: amount of free space in bud | ||
| 658 | * @dirty: amount of dirty space from padding and deletion nodes | ||
| 659 | * @jhead: journal head number for the bud | ||
| 660 | * | ||
| 661 | * This function inserts a reference node to the replay tree and returns zero | ||
| 662 | * in case of success or a negative error code in case of failure. | ||
| 663 | */ | ||
| 664 | static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, | ||
| 665 | unsigned long long sqnum, int free, int dirty, | ||
| 666 | int jhead) | ||
| 667 | { | ||
| 668 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 669 | struct replay_entry *r; | ||
| 670 | |||
| 671 | dbg_mnt("add ref LEB %d:%d", lnum, offs); | ||
| 672 | while (*p) { | ||
| 673 | parent = *p; | ||
| 674 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 675 | if (sqnum < r->sqnum) { | ||
| 676 | p = &(*p)->rb_left; | ||
| 677 | continue; | ||
| 678 | } else if (sqnum > r->sqnum) { | ||
| 679 | p = &(*p)->rb_right; | ||
| 680 | continue; | ||
| 681 | } | ||
| 682 | ubifs_err("duplicate sqnum in replay tree"); | ||
| 683 | return -EINVAL; | ||
| 684 | } | ||
| 685 | |||
| 686 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | ||
| 687 | if (!r) | ||
| 688 | return -ENOMEM; | ||
| 689 | |||
| 690 | r->lnum = lnum; | ||
| 691 | r->offs = offs; | ||
| 692 | r->sqnum = sqnum; | ||
| 693 | r->flags = REPLAY_REF; | ||
| 694 | r->free = free; | ||
| 695 | r->dirty = dirty; | ||
| 696 | r->jhead = jhead; | ||
| 697 | |||
| 698 | rb_link_node(&r->rb, parent, p); | ||
| 699 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 700 | return 0; | ||
| 701 | } | ||
| 702 | |||
| 703 | /** | ||
| 704 | * replay_buds - replay all buds. | 697 | * replay_buds - replay all buds. |
| 705 | * @c: UBIFS file-system description object | 698 | * @c: UBIFS file-system description object |
| 706 | * | 699 | * |
| @@ -710,17 +703,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, | |||
| 710 | static int replay_buds(struct ubifs_info *c) | 703 | static int replay_buds(struct ubifs_info *c) |
| 711 | { | 704 | { |
| 712 | struct bud_entry *b; | 705 | struct bud_entry *b; |
| 713 | int err, uninitialized_var(free), uninitialized_var(dirty); | 706 | int err; |
| 707 | unsigned long long prev_sqnum = 0; | ||
| 714 | 708 | ||
| 715 | list_for_each_entry(b, &c->replay_buds, list) { | 709 | list_for_each_entry(b, &c->replay_buds, list) { |
| 716 | err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, | 710 | err = replay_bud(c, b); |
| 717 | &free, &dirty); | ||
| 718 | if (err) | ||
| 719 | return err; | ||
| 720 | err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, | ||
| 721 | free, dirty, b->bud->jhead); | ||
| 722 | if (err) | 711 | if (err) |
| 723 | return err; | 712 | return err; |
| 713 | |||
| 714 | ubifs_assert(b->sqnum > prev_sqnum); | ||
| 715 | prev_sqnum = b->sqnum; | ||
| 724 | } | 716 | } |
| 725 | 717 | ||
| 726 | return 0; | 718 | return 0; |
| @@ -1060,25 +1052,29 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
| 1060 | if (err) | 1052 | if (err) |
| 1061 | goto out; | 1053 | goto out; |
| 1062 | 1054 | ||
| 1063 | err = apply_replay_tree(c); | 1055 | err = apply_replay_list(c); |
| 1056 | if (err) | ||
| 1057 | goto out; | ||
| 1058 | |||
| 1059 | err = set_buds_lprops(c); | ||
| 1064 | if (err) | 1060 | if (err) |
| 1065 | goto out; | 1061 | goto out; |
| 1066 | 1062 | ||
| 1067 | /* | 1063 | /* |
| 1068 | * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable | 1064 | * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable |
| 1069 | * to roughly estimate index growth. Things like @c->min_idx_lebs | 1065 | * to roughly estimate index growth. Things like @c->bi.min_idx_lebs |
| 1070 | * depend on it. This means we have to initialize it to make sure | 1066 | * depend on it. This means we have to initialize it to make sure |
| 1071 | * budgeting works properly. | 1067 | * budgeting works properly. |
| 1072 | */ | 1068 | */ |
| 1073 | c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); | 1069 | c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); |
| 1074 | c->budg_uncommitted_idx *= c->max_idx_node_sz; | 1070 | c->bi.uncommitted_idx *= c->max_idx_node_sz; |
| 1075 | 1071 | ||
| 1076 | ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); | 1072 | ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); |
| 1077 | dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " | 1073 | dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " |
| 1078 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, | 1074 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, |
| 1079 | (unsigned long)c->highest_inum); | 1075 | (unsigned long)c->highest_inum); |
| 1080 | out: | 1076 | out: |
| 1081 | destroy_replay_tree(c); | 1077 | destroy_replay_list(c); |
| 1082 | destroy_bud_list(c); | 1078 | destroy_bud_list(c); |
| 1083 | c->replaying = 0; | 1079 | c->replaying = 0; |
| 1084 | return err; | 1080 | return err; |
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index bf31b4729e5..c606f010e8d 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c | |||
| @@ -475,7 +475,8 @@ failed: | |||
| 475 | * @c: UBIFS file-system description object | 475 | * @c: UBIFS file-system description object |
| 476 | * | 476 | * |
| 477 | * This function returns a pointer to the superblock node or a negative error | 477 | * This function returns a pointer to the superblock node or a negative error |
| 478 | * code. | 478 | * code. Note, the user of this function is responsible of kfree()'ing the |
| 479 | * returned superblock buffer. | ||
| 479 | */ | 480 | */ |
| 480 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) | 481 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) |
| 481 | { | 482 | { |
| @@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
| 616 | c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); | 617 | c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); |
| 617 | memcpy(&c->uuid, &sup->uuid, 16); | 618 | memcpy(&c->uuid, &sup->uuid, 16); |
| 618 | c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); | 619 | c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); |
| 620 | c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); | ||
| 619 | 621 | ||
| 620 | /* Automatically increase file system size to the maximum size */ | 622 | /* Automatically increase file system size to the maximum size */ |
| 621 | c->old_leb_cnt = c->leb_cnt; | 623 | c->old_leb_cnt = c->leb_cnt; |
| @@ -650,3 +652,152 @@ out: | |||
| 650 | kfree(sup); | 652 | kfree(sup); |
| 651 | return err; | 653 | return err; |
| 652 | } | 654 | } |
| 655 | |||
| 656 | /** | ||
| 657 | * fixup_leb - fixup/unmap an LEB containing free space. | ||
| 658 | * @c: UBIFS file-system description object | ||
| 659 | * @lnum: the LEB number to fix up | ||
| 660 | * @len: number of used bytes in LEB (starting at offset 0) | ||
| 661 | * | ||
| 662 | * This function reads the contents of the given LEB number @lnum, then fixes | ||
| 663 | * it up, so that empty min. I/O units in the end of LEB are actually erased on | ||
| 664 | * flash (rather than being just all-0xff real data). If the LEB is completely | ||
| 665 | * empty, it is simply unmapped. | ||
| 666 | */ | ||
| 667 | static int fixup_leb(struct ubifs_info *c, int lnum, int len) | ||
| 668 | { | ||
| 669 | int err; | ||
| 670 | |||
| 671 | ubifs_assert(len >= 0); | ||
| 672 | ubifs_assert(len % c->min_io_size == 0); | ||
| 673 | ubifs_assert(len < c->leb_size); | ||
| 674 | |||
| 675 | if (len == 0) { | ||
| 676 | dbg_mnt("unmap empty LEB %d", lnum); | ||
| 677 | return ubi_leb_unmap(c->ubi, lnum); | ||
| 678 | } | ||
| 679 | |||
| 680 | dbg_mnt("fixup LEB %d, data len %d", lnum, len); | ||
| 681 | err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); | ||
| 682 | if (err) | ||
| 683 | return err; | ||
| 684 | |||
| 685 | return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); | ||
| 686 | } | ||
| 687 | |||
| 688 | /** | ||
| 689 | * fixup_free_space - find & remap all LEBs containing free space. | ||
| 690 | * @c: UBIFS file-system description object | ||
| 691 | * | ||
| 692 | * This function walks through all LEBs in the filesystem and fiexes up those | ||
| 693 | * containing free/empty space. | ||
| 694 | */ | ||
| 695 | static int fixup_free_space(struct ubifs_info *c) | ||
| 696 | { | ||
| 697 | int lnum, err = 0; | ||
| 698 | struct ubifs_lprops *lprops; | ||
| 699 | |||
| 700 | ubifs_get_lprops(c); | ||
| 701 | |||
| 702 | /* Fixup LEBs in the master area */ | ||
| 703 | for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { | ||
| 704 | err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); | ||
| 705 | if (err) | ||
| 706 | goto out; | ||
| 707 | } | ||
| 708 | |||
| 709 | /* Unmap unused log LEBs */ | ||
| 710 | lnum = ubifs_next_log_lnum(c, c->lhead_lnum); | ||
| 711 | while (lnum != c->ltail_lnum) { | ||
| 712 | err = fixup_leb(c, lnum, 0); | ||
| 713 | if (err) | ||
| 714 | goto out; | ||
| 715 | lnum = ubifs_next_log_lnum(c, lnum); | ||
| 716 | } | ||
| 717 | |||
| 718 | /* Fixup the current log head */ | ||
| 719 | err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); | ||
| 720 | if (err) | ||
| 721 | goto out; | ||
| 722 | |||
| 723 | /* Fixup LEBs in the LPT area */ | ||
| 724 | for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { | ||
| 725 | int free = c->ltab[lnum - c->lpt_first].free; | ||
| 726 | |||
| 727 | if (free > 0) { | ||
| 728 | err = fixup_leb(c, lnum, c->leb_size - free); | ||
| 729 | if (err) | ||
| 730 | goto out; | ||
| 731 | } | ||
| 732 | } | ||
| 733 | |||
| 734 | /* Unmap LEBs in the orphans area */ | ||
| 735 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { | ||
| 736 | err = fixup_leb(c, lnum, 0); | ||
| 737 | if (err) | ||
| 738 | goto out; | ||
| 739 | } | ||
| 740 | |||
| 741 | /* Fixup LEBs in the main area */ | ||
| 742 | for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { | ||
| 743 | lprops = ubifs_lpt_lookup(c, lnum); | ||
| 744 | if (IS_ERR(lprops)) { | ||
| 745 | err = PTR_ERR(lprops); | ||
| 746 | goto out; | ||
| 747 | } | ||
| 748 | |||
| 749 | if (lprops->free > 0) { | ||
| 750 | err = fixup_leb(c, lnum, c->leb_size - lprops->free); | ||
| 751 | if (err) | ||
| 752 | goto out; | ||
| 753 | } | ||
| 754 | } | ||
| 755 | |||
| 756 | out: | ||
| 757 | ubifs_release_lprops(c); | ||
| 758 | return err; | ||
| 759 | } | ||
| 760 | |||
| 761 | /** | ||
| 762 | * ubifs_fixup_free_space - find & fix all LEBs with free space. | ||
| 763 | * @c: UBIFS file-system description object | ||
| 764 | * | ||
| 765 | * This function fixes up LEBs containing free space on first mount, if the | ||
| 766 | * appropriate flag was set when the FS was created. Each LEB with one or more | ||
| 767 | * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure | ||
| 768 | * the free space is actually erased. E.g., this is necessary for some NAND | ||
| 769 | * chips, since the free space may have been programmed like real "0xff" data | ||
| 770 | * (generating a non-0xff ECC), causing future writes to the not-really-erased | ||
| 771 | * NAND pages to behave badly. After the space is fixed up, the superblock flag | ||
| 772 | * is cleared, so that this is skipped for all future mounts. | ||
| 773 | */ | ||
| 774 | int ubifs_fixup_free_space(struct ubifs_info *c) | ||
| 775 | { | ||
| 776 | int err; | ||
| 777 | struct ubifs_sb_node *sup; | ||
| 778 | |||
| 779 | ubifs_assert(c->space_fixup); | ||
| 780 | ubifs_assert(!c->ro_mount); | ||
| 781 | |||
| 782 | ubifs_msg("start fixing up free space"); | ||
| 783 | |||
| 784 | err = fixup_free_space(c); | ||
| 785 | if (err) | ||
| 786 | return err; | ||
| 787 | |||
| 788 | sup = ubifs_read_sb_node(c); | ||
| 789 | if (IS_ERR(sup)) | ||
| 790 | return PTR_ERR(sup); | ||
| 791 | |||
| 792 | /* Free-space fixup is no longer required */ | ||
| 793 | c->space_fixup = 0; | ||
| 794 | sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); | ||
| 795 | |||
| 796 | err = ubifs_write_sb_node(c, sup); | ||
| 797 | kfree(sup); | ||
| 798 | if (err) | ||
| 799 | return err; | ||
| 800 | |||
| 801 | ubifs_msg("free space fixup complete"); | ||
| 802 | return err; | ||
| 803 | } | ||
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 04ad07f4fcc..6db0bdaa9f7 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -375,7 +375,7 @@ out: | |||
| 375 | ubifs_release_dirty_inode_budget(c, ui); | 375 | ubifs_release_dirty_inode_budget(c, ui); |
| 376 | else { | 376 | else { |
| 377 | /* We've deleted something - clean the "no space" flags */ | 377 | /* We've deleted something - clean the "no space" flags */ |
| 378 | c->nospace = c->nospace_rp = 0; | 378 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 379 | smp_wmb(); | 379 | smp_wmb(); |
| 380 | } | 380 | } |
| 381 | done: | 381 | done: |
| @@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c) | |||
| 694 | * be compressed and direntries are of the maximum size. | 694 | * be compressed and direntries are of the maximum size. |
| 695 | * | 695 | * |
| 696 | * Note, data, which may be stored in inodes is budgeted separately, so | 696 | * Note, data, which may be stored in inodes is budgeted separately, so |
| 697 | * it is not included into 'c->inode_budget'. | 697 | * it is not included into 'c->bi.inode_budget'. |
| 698 | */ | 698 | */ |
| 699 | c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; | 699 | c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; |
| 700 | c->inode_budget = UBIFS_INO_NODE_SZ; | 700 | c->bi.inode_budget = UBIFS_INO_NODE_SZ; |
| 701 | c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; | 701 | c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; |
| 702 | 702 | ||
| 703 | /* | 703 | /* |
| 704 | * When the amount of flash space used by buds becomes | 704 | * When the amount of flash space used by buds becomes |
| @@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c) | |||
| 742 | { | 742 | { |
| 743 | long long tmp64; | 743 | long long tmp64; |
| 744 | 744 | ||
| 745 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 745 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 746 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); | 746 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); |
| 747 | 747 | ||
| 748 | /* | 748 | /* |
| @@ -1144,8 +1144,8 @@ static int check_free_space(struct ubifs_info *c) | |||
| 1144 | { | 1144 | { |
| 1145 | ubifs_assert(c->dark_wm > 0); | 1145 | ubifs_assert(c->dark_wm > 0); |
| 1146 | if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { | 1146 | if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { |
| 1147 | ubifs_err("insufficient free space to mount in read/write mode"); | 1147 | ubifs_err("insufficient free space to mount in R/W mode"); |
| 1148 | dbg_dump_budg(c); | 1148 | dbg_dump_budg(c, &c->bi); |
| 1149 | dbg_dump_lprops(c); | 1149 | dbg_dump_lprops(c); |
| 1150 | return -ENOSPC; | 1150 | return -ENOSPC; |
| 1151 | } | 1151 | } |
| @@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1304 | if (err) | 1304 | if (err) |
| 1305 | goto out_lpt; | 1305 | goto out_lpt; |
| 1306 | 1306 | ||
| 1307 | err = dbg_check_idx_size(c, c->old_idx_sz); | 1307 | err = dbg_check_idx_size(c, c->bi.old_idx_sz); |
| 1308 | if (err) | 1308 | if (err) |
| 1309 | goto out_lpt; | 1309 | goto out_lpt; |
| 1310 | 1310 | ||
| @@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1313 | goto out_journal; | 1313 | goto out_journal; |
| 1314 | 1314 | ||
| 1315 | /* Calculate 'min_idx_lebs' after journal replay */ | 1315 | /* Calculate 'min_idx_lebs' after journal replay */ |
| 1316 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 1316 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 1317 | 1317 | ||
| 1318 | err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); | 1318 | err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); |
| 1319 | if (err) | 1319 | if (err) |
| @@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1396 | } else | 1396 | } else |
| 1397 | ubifs_assert(c->lst.taken_empty_lebs > 0); | 1397 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
| 1398 | 1398 | ||
| 1399 | if (!c->ro_mount && c->space_fixup) { | ||
| 1400 | err = ubifs_fixup_free_space(c); | ||
| 1401 | if (err) | ||
| 1402 | goto out_infos; | ||
| 1403 | } | ||
| 1404 | |||
| 1399 | err = dbg_check_filesystem(c); | 1405 | err = dbg_check_filesystem(c); |
| 1400 | if (err) | 1406 | if (err) |
| 1401 | goto out_infos; | 1407 | goto out_infos; |
| @@ -1442,7 +1448,8 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1442 | c->main_lebs, c->main_first, c->leb_cnt - 1); | 1448 | c->main_lebs, c->main_first, c->leb_cnt - 1); |
| 1443 | dbg_msg("index LEBs: %d", c->lst.idx_lebs); | 1449 | dbg_msg("index LEBs: %d", c->lst.idx_lebs); |
| 1444 | dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", | 1450 | dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", |
| 1445 | c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); | 1451 | c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, |
| 1452 | c->bi.old_idx_sz >> 20); | ||
| 1446 | dbg_msg("key hash type: %d", c->key_hash_type); | 1453 | dbg_msg("key hash type: %d", c->key_hash_type); |
| 1447 | dbg_msg("tree fanout: %d", c->fanout); | 1454 | dbg_msg("tree fanout: %d", c->fanout); |
| 1448 | dbg_msg("reserved GC LEB: %d", c->gc_lnum); | 1455 | dbg_msg("reserved GC LEB: %d", c->gc_lnum); |
| @@ -1456,7 +1463,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1456 | dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", | 1463 | dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", |
| 1457 | UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); | 1464 | UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); |
| 1458 | dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", | 1465 | dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", |
| 1459 | UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, | 1466 | UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, |
| 1460 | UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); | 1467 | UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); |
| 1461 | dbg_msg("dead watermark: %d", c->dead_wm); | 1468 | dbg_msg("dead watermark: %d", c->dead_wm); |
| 1462 | dbg_msg("dark watermark: %d", c->dark_wm); | 1469 | dbg_msg("dark watermark: %d", c->dark_wm); |
| @@ -1584,6 +1591,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1584 | } | 1591 | } |
| 1585 | sup->leb_cnt = cpu_to_le32(c->leb_cnt); | 1592 | sup->leb_cnt = cpu_to_le32(c->leb_cnt); |
| 1586 | err = ubifs_write_sb_node(c, sup); | 1593 | err = ubifs_write_sb_node(c, sup); |
| 1594 | kfree(sup); | ||
| 1587 | if (err) | 1595 | if (err) |
| 1588 | goto out; | 1596 | goto out; |
| 1589 | } | 1597 | } |
| @@ -1684,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1684 | */ | 1692 | */ |
| 1685 | err = dbg_check_space_info(c); | 1693 | err = dbg_check_space_info(c); |
| 1686 | } | 1694 | } |
| 1695 | |||
| 1696 | if (c->space_fixup) { | ||
| 1697 | err = ubifs_fixup_free_space(c); | ||
| 1698 | if (err) | ||
| 1699 | goto out; | ||
| 1700 | } | ||
| 1701 | |||
| 1687 | mutex_unlock(&c->umount_mutex); | 1702 | mutex_unlock(&c->umount_mutex); |
| 1688 | return err; | 1703 | return err; |
| 1689 | 1704 | ||
| @@ -1766,10 +1781,9 @@ static void ubifs_put_super(struct super_block *sb) | |||
| 1766 | * to write them back because of I/O errors. | 1781 | * to write them back because of I/O errors. |
| 1767 | */ | 1782 | */ |
| 1768 | if (!c->ro_error) { | 1783 | if (!c->ro_error) { |
| 1769 | ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); | 1784 | ubifs_assert(c->bi.idx_growth == 0); |
| 1770 | ubifs_assert(c->budg_idx_growth == 0); | 1785 | ubifs_assert(c->bi.dd_growth == 0); |
| 1771 | ubifs_assert(c->budg_dd_growth == 0); | 1786 | ubifs_assert(c->bi.data_growth == 0); |
| 1772 | ubifs_assert(c->budg_data_growth == 0); | ||
| 1773 | } | 1787 | } |
| 1774 | 1788 | ||
| 1775 | /* | 1789 | /* |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index de485979ca3..8119b1fd8d9 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
| @@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
| 2557 | if (err) { | 2557 | if (err) { |
| 2558 | /* Ensure the znode is dirtied */ | 2558 | /* Ensure the znode is dirtied */ |
| 2559 | if (znode->cnext || !ubifs_zn_dirty(znode)) { | 2559 | if (znode->cnext || !ubifs_zn_dirty(znode)) { |
| 2560 | znode = dirty_cow_bottom_up(c, znode); | 2560 | znode = dirty_cow_bottom_up(c, znode); |
| 2561 | if (IS_ERR(znode)) { | 2561 | if (IS_ERR(znode)) { |
| 2562 | err = PTR_ERR(znode); | 2562 | err = PTR_ERR(znode); |
| 2563 | goto out_unlock; | 2563 | goto out_unlock; |
| 2564 | } | 2564 | } |
| 2565 | } | 2565 | } |
| 2566 | err = tnc_delete(c, znode, n); | 2566 | err = tnc_delete(c, znode, n); |
| 2567 | } | 2567 | } |
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 53288e5d604..41920f357bb 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c | |||
| @@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) | |||
| 377 | c->gap_lebs = NULL; | 377 | c->gap_lebs = NULL; |
| 378 | return err; | 378 | return err; |
| 379 | } | 379 | } |
| 380 | if (!dbg_force_in_the_gaps_enabled) { | 380 | if (dbg_force_in_the_gaps_enabled()) { |
| 381 | /* | 381 | /* |
| 382 | * Do not print scary warnings if the debugging | 382 | * Do not print scary warnings if the debugging |
| 383 | * option which forces in-the-gaps is enabled. | 383 | * option which forces in-the-gaps is enabled. |
| 384 | */ | 384 | */ |
| 385 | ubifs_err("out of space"); | 385 | ubifs_warn("out of space"); |
| 386 | spin_lock(&c->space_lock); | 386 | dbg_dump_budg(c, &c->bi); |
| 387 | dbg_dump_budg(c); | ||
| 388 | spin_unlock(&c->space_lock); | ||
| 389 | dbg_dump_lprops(c); | 387 | dbg_dump_lprops(c); |
| 390 | } | 388 | } |
| 391 | /* Try to commit anyway */ | 389 | /* Try to commit anyway */ |
| @@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) | |||
| 796 | spin_lock(&c->space_lock); | 794 | spin_lock(&c->space_lock); |
| 797 | /* | 795 | /* |
| 798 | * Although we have not finished committing yet, update size of the | 796 | * Although we have not finished committing yet, update size of the |
| 799 | * committed index ('c->old_idx_sz') and zero out the index growth | 797 | * committed index ('c->bi.old_idx_sz') and zero out the index growth |
| 800 | * budget. It is OK to do this now, because we've reserved all the | 798 | * budget. It is OK to do this now, because we've reserved all the |
| 801 | * space which is needed to commit the index, and it is save for the | 799 | * space which is needed to commit the index, and it is save for the |
| 802 | * budgeting subsystem to assume the index is already committed, | 800 | * budgeting subsystem to assume the index is already committed, |
| 803 | * even though it is not. | 801 | * even though it is not. |
| 804 | */ | 802 | */ |
| 805 | ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); | 803 | ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); |
| 806 | c->old_idx_sz = c->calc_idx_sz; | 804 | c->bi.old_idx_sz = c->calc_idx_sz; |
| 807 | c->budg_uncommitted_idx = 0; | 805 | c->bi.uncommitted_idx = 0; |
| 808 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 806 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 809 | spin_unlock(&c->space_lock); | 807 | spin_unlock(&c->space_lock); |
| 810 | mutex_unlock(&c->tnc_mutex); | 808 | mutex_unlock(&c->tnc_mutex); |
| 811 | 809 | ||
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 191ca7863fe..e24380cf46e 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
| @@ -408,9 +408,11 @@ enum { | |||
| 408 | * Superblock flags. | 408 | * Superblock flags. |
| 409 | * | 409 | * |
| 410 | * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set | 410 | * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set |
| 411 | * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed | ||
| 411 | */ | 412 | */ |
| 412 | enum { | 413 | enum { |
| 413 | UBIFS_FLG_BIGLPT = 0x02, | 414 | UBIFS_FLG_BIGLPT = 0x02, |
| 415 | UBIFS_FLG_SPACE_FIXUP = 0x04, | ||
| 414 | }; | 416 | }; |
| 415 | 417 | ||
| 416 | /** | 418 | /** |
| @@ -434,7 +436,7 @@ struct ubifs_ch { | |||
| 434 | __u8 node_type; | 436 | __u8 node_type; |
| 435 | __u8 group_type; | 437 | __u8 group_type; |
| 436 | __u8 padding[2]; | 438 | __u8 padding[2]; |
| 437 | } __attribute__ ((packed)); | 439 | } __packed; |
| 438 | 440 | ||
| 439 | /** | 441 | /** |
| 440 | * union ubifs_dev_desc - device node descriptor. | 442 | * union ubifs_dev_desc - device node descriptor. |
| @@ -448,7 +450,7 @@ struct ubifs_ch { | |||
| 448 | union ubifs_dev_desc { | 450 | union ubifs_dev_desc { |
| 449 | __le32 new; | 451 | __le32 new; |
| 450 | __le64 huge; | 452 | __le64 huge; |
| 451 | } __attribute__ ((packed)); | 453 | } __packed; |
| 452 | 454 | ||
| 453 | /** | 455 | /** |
| 454 | * struct ubifs_ino_node - inode node. | 456 | * struct ubifs_ino_node - inode node. |
| @@ -509,7 +511,7 @@ struct ubifs_ino_node { | |||
| 509 | __le16 compr_type; | 511 | __le16 compr_type; |
| 510 | __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ | 512 | __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ |
| 511 | __u8 data[]; | 513 | __u8 data[]; |
| 512 | } __attribute__ ((packed)); | 514 | } __packed; |
| 513 | 515 | ||
| 514 | /** | 516 | /** |
| 515 | * struct ubifs_dent_node - directory entry node. | 517 | * struct ubifs_dent_node - directory entry node. |
| @@ -534,7 +536,7 @@ struct ubifs_dent_node { | |||
| 534 | __le16 nlen; | 536 | __le16 nlen; |
| 535 | __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ | 537 | __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ |
| 536 | __u8 name[]; | 538 | __u8 name[]; |
| 537 | } __attribute__ ((packed)); | 539 | } __packed; |
| 538 | 540 | ||
| 539 | /** | 541 | /** |
| 540 | * struct ubifs_data_node - data node. | 542 | * struct ubifs_data_node - data node. |
| @@ -555,7 +557,7 @@ struct ubifs_data_node { | |||
| 555 | __le16 compr_type; | 557 | __le16 compr_type; |
| 556 | __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ | 558 | __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ |
| 557 | __u8 data[]; | 559 | __u8 data[]; |
| 558 | } __attribute__ ((packed)); | 560 | } __packed; |
| 559 | 561 | ||
| 560 | /** | 562 | /** |
| 561 | * struct ubifs_trun_node - truncation node. | 563 | * struct ubifs_trun_node - truncation node. |
| @@ -575,7 +577,7 @@ struct ubifs_trun_node { | |||
| 575 | __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ | 577 | __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ |
| 576 | __le64 old_size; | 578 | __le64 old_size; |
| 577 | __le64 new_size; | 579 | __le64 new_size; |
| 578 | } __attribute__ ((packed)); | 580 | } __packed; |
| 579 | 581 | ||
| 580 | /** | 582 | /** |
| 581 | * struct ubifs_pad_node - padding node. | 583 | * struct ubifs_pad_node - padding node. |
| @@ -586,7 +588,7 @@ struct ubifs_trun_node { | |||
| 586 | struct ubifs_pad_node { | 588 | struct ubifs_pad_node { |
| 587 | struct ubifs_ch ch; | 589 | struct ubifs_ch ch; |
| 588 | __le32 pad_len; | 590 | __le32 pad_len; |
| 589 | } __attribute__ ((packed)); | 591 | } __packed; |
| 590 | 592 | ||
| 591 | /** | 593 | /** |
| 592 | * struct ubifs_sb_node - superblock node. | 594 | * struct ubifs_sb_node - superblock node. |
| @@ -644,7 +646,7 @@ struct ubifs_sb_node { | |||
| 644 | __u8 uuid[16]; | 646 | __u8 uuid[16]; |
| 645 | __le32 ro_compat_version; | 647 | __le32 ro_compat_version; |
| 646 | __u8 padding2[3968]; | 648 | __u8 padding2[3968]; |
| 647 | } __attribute__ ((packed)); | 649 | } __packed; |
| 648 | 650 | ||
| 649 | /** | 651 | /** |
| 650 | * struct ubifs_mst_node - master node. | 652 | * struct ubifs_mst_node - master node. |
| @@ -711,7 +713,7 @@ struct ubifs_mst_node { | |||
| 711 | __le32 idx_lebs; | 713 | __le32 idx_lebs; |
| 712 | __le32 leb_cnt; | 714 | __le32 leb_cnt; |
| 713 | __u8 padding[344]; | 715 | __u8 padding[344]; |
| 714 | } __attribute__ ((packed)); | 716 | } __packed; |
| 715 | 717 | ||
| 716 | /** | 718 | /** |
| 717 | * struct ubifs_ref_node - logical eraseblock reference node. | 719 | * struct ubifs_ref_node - logical eraseblock reference node. |
| @@ -727,7 +729,7 @@ struct ubifs_ref_node { | |||
| 727 | __le32 offs; | 729 | __le32 offs; |
| 728 | __le32 jhead; | 730 | __le32 jhead; |
| 729 | __u8 padding[28]; | 731 | __u8 padding[28]; |
| 730 | } __attribute__ ((packed)); | 732 | } __packed; |
| 731 | 733 | ||
| 732 | /** | 734 | /** |
| 733 | * struct ubifs_branch - key/reference/length branch | 735 | * struct ubifs_branch - key/reference/length branch |
| @@ -741,7 +743,7 @@ struct ubifs_branch { | |||
| 741 | __le32 offs; | 743 | __le32 offs; |
| 742 | __le32 len; | 744 | __le32 len; |
| 743 | __u8 key[]; | 745 | __u8 key[]; |
| 744 | } __attribute__ ((packed)); | 746 | } __packed; |
| 745 | 747 | ||
| 746 | /** | 748 | /** |
| 747 | * struct ubifs_idx_node - indexing node. | 749 | * struct ubifs_idx_node - indexing node. |
| @@ -755,7 +757,7 @@ struct ubifs_idx_node { | |||
| 755 | __le16 child_cnt; | 757 | __le16 child_cnt; |
| 756 | __le16 level; | 758 | __le16 level; |
| 757 | __u8 branches[]; | 759 | __u8 branches[]; |
| 758 | } __attribute__ ((packed)); | 760 | } __packed; |
| 759 | 761 | ||
| 760 | /** | 762 | /** |
| 761 | * struct ubifs_cs_node - commit start node. | 763 | * struct ubifs_cs_node - commit start node. |
| @@ -765,7 +767,7 @@ struct ubifs_idx_node { | |||
| 765 | struct ubifs_cs_node { | 767 | struct ubifs_cs_node { |
| 766 | struct ubifs_ch ch; | 768 | struct ubifs_ch ch; |
| 767 | __le64 cmt_no; | 769 | __le64 cmt_no; |
| 768 | } __attribute__ ((packed)); | 770 | } __packed; |
| 769 | 771 | ||
| 770 | /** | 772 | /** |
| 771 | * struct ubifs_orph_node - orphan node. | 773 | * struct ubifs_orph_node - orphan node. |
| @@ -777,6 +779,6 @@ struct ubifs_orph_node { | |||
| 777 | struct ubifs_ch ch; | 779 | struct ubifs_ch ch; |
| 778 | __le64 cmt_no; | 780 | __le64 cmt_no; |
| 779 | __le64 inos[]; | 781 | __le64 inos[]; |
| 780 | } __attribute__ ((packed)); | 782 | } __packed; |
| 781 | 783 | ||
| 782 | #endif /* __UBIFS_MEDIA_H__ */ | 784 | #endif /* __UBIFS_MEDIA_H__ */ |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 8c40ad3c672..93d1412a06f 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
| @@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb { | |||
| 389 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses | 389 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses |
| 390 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot | 390 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot |
| 391 | * make sure @inode->i_size is always changed under @ui_mutex, because it | 391 | * make sure @inode->i_size is always changed under @ui_mutex, because it |
| 392 | * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock | 392 | * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would |
| 393 | * with 'ubifs_writepage()' (see file.c). All the other inode fields are | 393 | * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields |
| 394 | * changed under @ui_mutex, so they do not need "shadow" fields. Note, one | 394 | * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one |
| 395 | * could consider to rework locking and base it on "shadow" fields. | 395 | * could consider to rework locking and base it on "shadow" fields. |
| 396 | */ | 396 | */ |
| 397 | struct ubifs_inode { | 397 | struct ubifs_inode { |
| @@ -937,6 +937,40 @@ struct ubifs_mount_opts { | |||
| 937 | unsigned int compr_type:2; | 937 | unsigned int compr_type:2; |
| 938 | }; | 938 | }; |
| 939 | 939 | ||
| 940 | /** | ||
| 941 | * struct ubifs_budg_info - UBIFS budgeting information. | ||
| 942 | * @idx_growth: amount of bytes budgeted for index growth | ||
| 943 | * @data_growth: amount of bytes budgeted for cached data | ||
| 944 | * @dd_growth: amount of bytes budgeted for cached data that will make | ||
| 945 | * other data dirty | ||
| 946 | * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but | ||
| 947 | * which still have to be taken into account because the index | ||
| 948 | * has not been committed so far | ||
| 949 | * @old_idx_sz: size of index on flash | ||
| 950 | * @min_idx_lebs: minimum number of LEBs required for the index | ||
| 951 | * @nospace: non-zero if the file-system does not have flash space (used as | ||
| 952 | * optimization) | ||
| 953 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
| 954 | * pool is full | ||
| 955 | * @page_budget: budget for a page (constant, nenver changed after mount) | ||
| 956 | * @inode_budget: budget for an inode (constant, nenver changed after mount) | ||
| 957 | * @dent_budget: budget for a directory entry (constant, nenver changed after | ||
| 958 | * mount) | ||
| 959 | */ | ||
| 960 | struct ubifs_budg_info { | ||
| 961 | long long idx_growth; | ||
| 962 | long long data_growth; | ||
| 963 | long long dd_growth; | ||
| 964 | long long uncommitted_idx; | ||
| 965 | unsigned long long old_idx_sz; | ||
| 966 | int min_idx_lebs; | ||
| 967 | unsigned int nospace:1; | ||
| 968 | unsigned int nospace_rp:1; | ||
| 969 | int page_budget; | ||
| 970 | int inode_budget; | ||
| 971 | int dent_budget; | ||
| 972 | }; | ||
| 973 | |||
| 940 | struct ubifs_debug_info; | 974 | struct ubifs_debug_info; |
| 941 | 975 | ||
| 942 | /** | 976 | /** |
| @@ -980,6 +1014,7 @@ struct ubifs_debug_info; | |||
| 980 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running | 1014 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running |
| 981 | * | 1015 | * |
| 982 | * @big_lpt: flag that LPT is too big to write whole during commit | 1016 | * @big_lpt: flag that LPT is too big to write whole during commit |
| 1017 | * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up | ||
| 983 | * @no_chk_data_crc: do not check CRCs when reading data nodes (except during | 1018 | * @no_chk_data_crc: do not check CRCs when reading data nodes (except during |
| 984 | * recovery) | 1019 | * recovery) |
| 985 | * @bulk_read: enable bulk-reads | 1020 | * @bulk_read: enable bulk-reads |
| @@ -1057,32 +1092,14 @@ struct ubifs_debug_info; | |||
| 1057 | * @dirty_zn_cnt: number of dirty znodes | 1092 | * @dirty_zn_cnt: number of dirty znodes |
| 1058 | * @clean_zn_cnt: number of clean znodes | 1093 | * @clean_zn_cnt: number of clean znodes |
| 1059 | * | 1094 | * |
| 1060 | * @budg_idx_growth: amount of bytes budgeted for index growth | 1095 | * @space_lock: protects @bi and @lst |
| 1061 | * @budg_data_growth: amount of bytes budgeted for cached data | 1096 | * @lst: lprops statistics |
| 1062 | * @budg_dd_growth: amount of bytes budgeted for cached data that will make | 1097 | * @bi: budgeting information |
| 1063 | * other data dirty | ||
| 1064 | * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, | ||
| 1065 | * but which still have to be taken into account because | ||
| 1066 | * the index has not been committed so far | ||
| 1067 | * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, | ||
| 1068 | * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, | ||
| 1069 | * @nospace, and @nospace_rp; | ||
| 1070 | * @min_idx_lebs: minimum number of LEBs required for the index | ||
| 1071 | * @old_idx_sz: size of index on flash | ||
| 1072 | * @calc_idx_sz: temporary variable which is used to calculate new index size | 1098 | * @calc_idx_sz: temporary variable which is used to calculate new index size |
| 1073 | * (contains accurate new index size at end of TNC commit start) | 1099 | * (contains accurate new index size at end of TNC commit start) |
| 1074 | * @lst: lprops statistics | ||
| 1075 | * @nospace: non-zero if the file-system does not have flash space (used as | ||
| 1076 | * optimization) | ||
| 1077 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
| 1078 | * pool is full | ||
| 1079 | * | ||
| 1080 | * @page_budget: budget for a page | ||
| 1081 | * @inode_budget: budget for an inode | ||
| 1082 | * @dent_budget: budget for a directory entry | ||
| 1083 | * | 1100 | * |
| 1084 | * @ref_node_alsz: size of the LEB reference node aligned to the min. flash | 1101 | * @ref_node_alsz: size of the LEB reference node aligned to the min. flash |
| 1085 | * I/O unit | 1102 | * I/O unit |
| 1086 | * @mst_node_alsz: master node aligned size | 1103 | * @mst_node_alsz: master node aligned size |
| 1087 | * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary | 1104 | * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary |
| 1088 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 1105 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
| @@ -1189,7 +1206,6 @@ struct ubifs_debug_info; | |||
| 1189 | * @replaying: %1 during journal replay | 1206 | * @replaying: %1 during journal replay |
| 1190 | * @mounting: %1 while mounting | 1207 | * @mounting: %1 while mounting |
| 1191 | * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode | 1208 | * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode |
| 1192 | * @replay_tree: temporary tree used during journal replay | ||
| 1193 | * @replay_list: temporary list used during journal replay | 1209 | * @replay_list: temporary list used during journal replay |
| 1194 | * @replay_buds: list of buds to replay | 1210 | * @replay_buds: list of buds to replay |
| 1195 | * @cs_sqnum: sequence number of first node in the log (commit start node) | 1211 | * @cs_sqnum: sequence number of first node in the log (commit start node) |
| @@ -1238,6 +1254,7 @@ struct ubifs_info { | |||
| 1238 | wait_queue_head_t cmt_wq; | 1254 | wait_queue_head_t cmt_wq; |
| 1239 | 1255 | ||
| 1240 | unsigned int big_lpt:1; | 1256 | unsigned int big_lpt:1; |
| 1257 | unsigned int space_fixup:1; | ||
| 1241 | unsigned int no_chk_data_crc:1; | 1258 | unsigned int no_chk_data_crc:1; |
| 1242 | unsigned int bulk_read:1; | 1259 | unsigned int bulk_read:1; |
| 1243 | unsigned int default_compr:2; | 1260 | unsigned int default_compr:2; |
| @@ -1308,21 +1325,10 @@ struct ubifs_info { | |||
| 1308 | atomic_long_t dirty_zn_cnt; | 1325 | atomic_long_t dirty_zn_cnt; |
| 1309 | atomic_long_t clean_zn_cnt; | 1326 | atomic_long_t clean_zn_cnt; |
| 1310 | 1327 | ||
| 1311 | long long budg_idx_growth; | ||
| 1312 | long long budg_data_growth; | ||
| 1313 | long long budg_dd_growth; | ||
| 1314 | long long budg_uncommitted_idx; | ||
| 1315 | spinlock_t space_lock; | 1328 | spinlock_t space_lock; |
| 1316 | int min_idx_lebs; | ||
| 1317 | unsigned long long old_idx_sz; | ||
| 1318 | unsigned long long calc_idx_sz; | ||
| 1319 | struct ubifs_lp_stats lst; | 1329 | struct ubifs_lp_stats lst; |
| 1320 | unsigned int nospace:1; | 1330 | struct ubifs_budg_info bi; |
| 1321 | unsigned int nospace_rp:1; | 1331 | unsigned long long calc_idx_sz; |
| 1322 | |||
| 1323 | int page_budget; | ||
| 1324 | int inode_budget; | ||
| 1325 | int dent_budget; | ||
| 1326 | 1332 | ||
| 1327 | int ref_node_alsz; | 1333 | int ref_node_alsz; |
| 1328 | int mst_node_alsz; | 1334 | int mst_node_alsz; |
| @@ -1430,7 +1436,6 @@ struct ubifs_info { | |||
| 1430 | unsigned int replaying:1; | 1436 | unsigned int replaying:1; |
| 1431 | unsigned int mounting:1; | 1437 | unsigned int mounting:1; |
| 1432 | unsigned int remounting_rw:1; | 1438 | unsigned int remounting_rw:1; |
| 1433 | struct rb_root replay_tree; | ||
| 1434 | struct list_head replay_list; | 1439 | struct list_head replay_list; |
| 1435 | struct list_head replay_buds; | 1440 | struct list_head replay_buds; |
| 1436 | unsigned long long cs_sqnum; | 1441 | unsigned long long cs_sqnum; |
| @@ -1628,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c); | |||
| 1628 | int ubifs_read_superblock(struct ubifs_info *c); | 1633 | int ubifs_read_superblock(struct ubifs_info *c); |
| 1629 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); | 1634 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); |
| 1630 | int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); | 1635 | int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); |
| 1636 | int ubifs_fixup_free_space(struct ubifs_info *c); | ||
| 1631 | 1637 | ||
| 1632 | /* replay.c */ | 1638 | /* replay.c */ |
| 1633 | int ubifs_validate_entry(struct ubifs_info *c, | 1639 | int ubifs_validate_entry(struct ubifs_info *c, |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 3299f469e71..16f19f55e63 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
| @@ -80,8 +80,8 @@ enum { | |||
| 80 | SECURITY_XATTR, | 80 | SECURITY_XATTR, |
| 81 | }; | 81 | }; |
| 82 | 82 | ||
| 83 | static const struct inode_operations none_inode_operations; | 83 | static const struct inode_operations empty_iops; |
| 84 | static const struct file_operations none_file_operations; | 84 | static const struct file_operations empty_fops; |
| 85 | 85 | ||
| 86 | /** | 86 | /** |
| 87 | * create_xattr - create an extended attribute. | 87 | * create_xattr - create an extended attribute. |
| @@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, | |||
| 131 | 131 | ||
| 132 | /* Re-define all operations to be "nothing" */ | 132 | /* Re-define all operations to be "nothing" */ |
| 133 | inode->i_mapping->a_ops = &empty_aops; | 133 | inode->i_mapping->a_ops = &empty_aops; |
| 134 | inode->i_op = &none_inode_operations; | 134 | inode->i_op = &empty_iops; |
| 135 | inode->i_fop = &none_file_operations; | 135 | inode->i_fop = &empty_fops; |
| 136 | 136 | ||
| 137 | inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; | 137 | inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; |
| 138 | ui = ubifs_inode(inode); | 138 | ui = ubifs_inode(inode); |
