13944814749

Committed 19 Mar 2025 10:54AM UTC coverage: 38.0% (-0.1%) from 38.102%

Build # 13944814749

Build Type

push

github

Committed by

bohlinger

Commit Message

another bugfix in plotting projections

Run Details

0 of 3 new or added lines in 1 file covered. (0.0%)

96 existing lines in 3 files now uncovered.

2098 of 5521 relevant lines covered (38.0%)

0.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

9.55

/wavy/validationmod.py

"""
    Module to organize the validation procedure
    Consists mostly of functions computing validation metrics
"""
import numpy as np
from scipy import stats

# define global functions

def calc_model_activity_ratio(a, b):
    """
    computes the model activity ratio of input a (mode) and input b (obs)
    if nans exist the prinziple of marginalization is applied
    input: np.arrays with np.nan for invalids
    """
    comb = a + b
    idx = np.array(range(len(a)))[~np.isnan(comb)]
    a1 = a[idx]
    b1 = b[idx]
    mar = np.std(a1)/np.std(b1)
    return mar

def calc_rmsd(a, b):
    '''
    root mean square deviation
    if nans exist the prinziple of marginalization is applied
    input: np.arrays with np.nan for invalids
    '''
    comb = a + b
    idx = np.array(range(len(a)))[~np.isnan(comb)]
    a1 = a[idx]
    b1 = b[idx]
    n = len(a1)
    diff2 = (a1-b1)**2
    msd = diff2.sum()/n
    rmsd = np.sqrt(msd)
    return msd, rmsd

def calc_nrmsd(a, b):
    '''
    Normalized root mean square deviation
    if nans exist the prinziple of marginalization is applied
    input: np.arrays with np.nan for invalids
    '''
    comb = a + b
    idx = np.array(range(len(a)))[~np.isnan(comb)]
    a1 = a[idx]
    b1 = b[idx]
    diff2 = (a1-b1)**2
    msd = diff2.sum()/np.sum(b1**2)
    rmsd = np.sqrt(msd)
    return msd, rmsd

def calc_drmsd(a, b):
    '''
    debiased root mean square deviation
    if nans exist the prinziple of marginalization is applied
    '''
    a, b = np.array(a), np.array(b)
    comb = a + b
    idx = np.array(range(len(a)))[~np.isnan(comb)]
    a1 = a[idx]
    b1 = b[idx]
    n = len(a1)
    diff2 = (a1-b1)**2
    msd = diff2.sum()/n
    dmsd = msd - calc_bias(a, b)**2
    drmsd = np.sqrt(dmsd)
    return dmsd, drmsd

def calc_scatter_index(model, obs):
    '''
    Scatter index based on rmse and on std of diff
    '''
    _, rmsd = calc_rmsd(obs, model)
    stddiff = np.nanstd(obs-model)
    SIrmse = rmsd/np.nanmean(obs)*100.
    SIstd = stddiff/np.nanmean(obs)*100.
    return SIrmse, SIstd

def calc_corrcoef(a, b):
    '''
    if nans exist the prinziple of marginalization is applied
    input: np.arrays with np.nan for invalids
    '''
    comb = a + b
    idx = np.array(range(len(a)))[~np.isnan(comb)]
    a1 = a[idx]
    b1 = b[idx]
    corr = np.corrcoef(a1, b1)[1, 0]
    return corr

def calc_bias(a, b):
    """
    Bias
    if nans exist the prinziple of marginalization is applied
    input: np.arrays with np.nan for invalids
    """
    comb = a + b
    idx = np.array(range(len(a)))[~np.isnan(comb)]
    a1 = a[idx]
    b1 = b[idx]
    N = len(a1)
    bias = np.sum(a1-b1)/N
    return bias

def calc_nbias(a, b):
    """
    Normalized Bias [dimensionless]
    if nans exist the prinziple of marginalization is applied
    input: np.arrays with np.nan for invalids
    """
    comb = a + b
    idx = np.array(range(len(a)))[~np.isnan(comb)]
    a1 = a[idx]
    b1 = b[idx]
    nbias = np.sum(a1-b1)/np.sum(b1)
    return nbias

def calc_mad(a, b):
    """
    mean absolute deviation
    if nans exist the prinziple of marginalization is applied
    input: np.arrays with np.nan for invalids
    """
    comb = a + b
    idx = np.array(range(len(a)))[~np.isnan(comb)]
    a1 = a[idx]
    b1 = b[idx]
    N = len(a1)
    mad = np.sum(np.abs(a1-b1))/N
    return mad

def disp_validation(valid_dict):
    '''
    Print to screen validation scores.
    '''
    print('\n')
    print('# ---')
    print('Validation stats')
    print('# ---')
    print('Correlation Coefficient: '
            + '{:0.2f}'.format(valid_dict['corr']))
    print('Mean Absolute Difference: ' + '{:0.2f}'.format(valid_dict['mad']))
    print('Root Mean Squared Difference: '
            + '{:0.2f}'.format(valid_dict['rmsd']))
    print('Normalized Root Mean Squared Difference: '
            + '{:0.2f}'.format(valid_dict['nrmsd']))
    print('Debiased Root Mean Squared Difference: '
            + '{:0.2f}'.format(valid_dict['drmsd']))
    print('Bias: ' + '{:0.2f}'.format(valid_dict['bias']))
    print('Normalized Bias: ' + '{:0.2f}'.format(valid_dict['nbias']))
    print('Scatter Index: ' + '{:0.2f}'.format(valid_dict['SI'][1]))
    print('Model Activity Ratio: ' + '{:0.2f}'.format(valid_dict['mar']))
    print('Mean of Model: ' + '{:0.2f}'.format(valid_dict['mop']))
    print('Mean of Observations: ' + '{:0.2f}'.format(valid_dict['mor']))
    print('Number of Collocated Values: ' + str(valid_dict['nov']))
    print('\n')
    pass

def validate(results_dict, boot=None):
    import numpy as np
    """
    vars in dict: np.arrays with np.nan for invalids

    produced metrics:
    mean of product --> mop
    mean of reference --> mor
    mean square difference --> msd
    number of data values --> nov
    scatter index --> SI
    """
    # date_matches = results_dict['datetime']
    if isinstance(results_dict['model_values'], list):
        model_matches = np.array(results_dict['model_values'])
    else:
        model_matches = results_dict['model_values']
    if isinstance(results_dict['obs_values'], list):
        obs_matches = np.array(results_dict['obs_values'])
    else:
        obs_matches = results_dict['obs_values']
    if (boot is None or boot is False):
        mop = np.nanmean(model_matches)
        mor = np.nanmean(obs_matches)
        msd, rmsd = calc_rmsd(model_matches, obs_matches)
        _, nrmsd = calc_nrmsd(model_matches, obs_matches)
        _, drmsd = calc_drmsd(model_matches, obs_matches)
        nov = len(obs_matches)
        mad = calc_mad(model_matches, obs_matches)
        corr = calc_corrcoef(model_matches, obs_matches)
        bias = calc_bias(model_matches, obs_matches)
        nbias = calc_nbias(model_matches, obs_matches)
        SI = calc_scatter_index(model_matches, obs_matches)
        mar = calc_model_activity_ratio(model_matches, obs_matches)
        validation_dict = {
            'mop': mop,
            'mor': mor,
            'msd': msd,
            'nov': nov,
            'rmsd': rmsd,
            'nrmsd': nrmsd,
            'drmsd': drmsd,
            'corr': corr,
            'mad': mad,
            'bias': bias,
            'nbias': nbias,
            'SI': SI,
            'mar': mar}
    elif boot is True:
        from wavy.utils import bootstr, marginalize
        reps = 1000
        newmodel, newobs, _ = marginalize(model_matches, obs_matches)
        obs_boot, boot_idx = bootstr(newobs, reps)
        print(len(obs_boot[np.isnan(obs_boot)]))
        RMSD = np.zeros(reps)*np.nan
        MSD = np.zeros(reps)*np.nan
        BIAS = np.zeros(reps)*np.nan
        CORR = np.zeros(reps)*np.nan
        for i in range(reps):
            results_dict = {
                        #'date_matches':date_matches[newidx[boot_idx[:,i]]],
                        'model_matches': newmodel[boot_idx[:, i]],
                        'sat_matches': newobs[boot_idx[:, i]]}
            try:
                RMSD[i] = validate(results_dict)['rmsd']
                MSD[i] = validate(results_dict)['mad']
                BIAS[i] = validate(results_dict)['bias']
                CORR[i] = validate(results_dict)['corr']
            except IndexError as e:
                print(e)
        validation_dict = {'rmsd': RMSD, 'mad': MSD, 'bias': BIAS, 'corr': CORR}
    return validation_dict

def linreg_evm(x, y, **kwargs):
    #  Linear regression by the maximum likelihood effective variance method.
    #
    #  K.K.Kahma 1991. Iterative solution replaced by explicit solution 1998.
    #  J.-V. Björkqvist 2020. From MATLAB to Python
    #
    #  Reference: Orear,J 1982: Least squares when both variables have
    #             uncertanties J.Am Phys 50(10)

    stdx = kwargs.get('stdx', 1)
    stdy = kwargs.get('stdy', 1)

    x0 = np.mean(x)
    y0 = np.mean(y)

    sx2 = stdx**2
    sy2 = stdy**2
    Sx2 = sum((x-x0)**2)
    Sy2 = sum((y-y0)**2)
    Sxy = sum((x-x0)*(y-y0))

    if sx2 == 0 or Sxy == 0:
        P = np.array([Sxy/Sx2])
    else:
        P = np.array([(sx2*Sy2-sy2*Sx2
                       + np.sqrt((sy2*Sx2)**2
                                 - 2*Sx2*sy2*sx2*Sy2
                                 + (sx2*Sy2)**2+4*Sxy**2*sx2*sy2)
                       )/(2*Sxy*sx2)])

    P = np.append(P, y0-P[0]*x0)
    return P

def linreg_std(x, y, **kwargs):
    slope, intercept, r, p, std_err = stats.linregress(x, y)
    return {'slope': slope, 'intercept': intercept}

1	"""
2	Module to organize the validation procedure
3	Consists mostly of functions computing validation metrics
4	"""
5	import numpy as np	1✔
6	from scipy import stats	1✔
7
8	# define global functions
9
10	def calc_model_activity_ratio(a, b):	1✔
11	"""
12	computes the model activity ratio of input a (mode) and input b (obs)
13	if nans exist the prinziple of marginalization is applied
14	input: np.arrays with np.nan for invalids
15	"""
16	comb = a + b	×
17	idx = np.array(range(len(a)))[~np.isnan(comb)]	×
18	a1 = a[idx]	×
19	b1 = b[idx]	×
20	mar = np.std(a1)/np.std(b1)	×
UNCOV 21	return mar	×
22
23	def calc_rmsd(a, b):	1✔
24	'''
25	root mean square deviation
26	if nans exist the prinziple of marginalization is applied
27	input: np.arrays with np.nan for invalids
28	'''
29	comb = a + b	×
30	idx = np.array(range(len(a)))[~np.isnan(comb)]	×
31	a1 = a[idx]	×
32	b1 = b[idx]	×
33	n = len(a1)	×
34	diff2 = (a1-b1)**2	×
35	msd = diff2.sum()/n	×
36	rmsd = np.sqrt(msd)	×
UNCOV 37	return msd, rmsd	×
38
39	def calc_nrmsd(a, b):	1✔
40	'''
41	Normalized root mean square deviation
42	if nans exist the prinziple of marginalization is applied
43	input: np.arrays with np.nan for invalids
44	'''
45	comb = a + b	×
46	idx = np.array(range(len(a)))[~np.isnan(comb)]	×
47	a1 = a[idx]	×
48	b1 = b[idx]	×
49	diff2 = (a1-b1)**2	×
50	msd = diff2.sum()/np.sum(b1**2)	×
51	rmsd = np.sqrt(msd)	×
UNCOV 52	return msd, rmsd	×
53
54	def calc_drmsd(a, b):	1✔
55	'''
56	debiased root mean square deviation
57	if nans exist the prinziple of marginalization is applied
58	'''
59	a, b = np.array(a), np.array(b)	×
60	comb = a + b	×
61	idx = np.array(range(len(a)))[~np.isnan(comb)]	×
62	a1 = a[idx]	×
63	b1 = b[idx]	×
64	n = len(a1)	×
65	diff2 = (a1-b1)**2	×
66	msd = diff2.sum()/n	×
67	dmsd = msd - calc_bias(a, b)**2	×
68	drmsd = np.sqrt(dmsd)	×
UNCOV 69	return dmsd, drmsd	×
70
71	def calc_scatter_index(model, obs):	1✔
72	'''
73	Scatter index based on rmse and on std of diff
74	'''
75	_, rmsd = calc_rmsd(obs, model)	×
76	stddiff = np.nanstd(obs-model)	×
77	SIrmse = rmsd/np.nanmean(obs)*100.	×
78	SIstd = stddiff/np.nanmean(obs)*100.	×
UNCOV 79	return SIrmse, SIstd	×
80
81	def calc_corrcoef(a, b):	1✔
82	'''
83	if nans exist the prinziple of marginalization is applied
84	input: np.arrays with np.nan for invalids
85	'''
86	comb = a + b	×
87	idx = np.array(range(len(a)))[~np.isnan(comb)]	×
88	a1 = a[idx]	×
89	b1 = b[idx]	×
90	corr = np.corrcoef(a1, b1)[1, 0]	×
UNCOV 91	return corr	×
92
93	def calc_bias(a, b):	1✔
94	"""
95	Bias
96	if nans exist the prinziple of marginalization is applied
97	input: np.arrays with np.nan for invalids
98	"""
99	comb = a + b	×
100	idx = np.array(range(len(a)))[~np.isnan(comb)]	×
101	a1 = a[idx]	×
102	b1 = b[idx]	×
103	N = len(a1)	×
104	bias = np.sum(a1-b1)/N	×
UNCOV 105	return bias	×
106
107	def calc_nbias(a, b):	1✔
108	"""
109	Normalized Bias [dimensionless]
110	if nans exist the prinziple of marginalization is applied
111	input: np.arrays with np.nan for invalids
112	"""
113	comb = a + b	×
114	idx = np.array(range(len(a)))[~np.isnan(comb)]	×
115	a1 = a[idx]	×
116	b1 = b[idx]	×
117	nbias = np.sum(a1-b1)/np.sum(b1)	×
UNCOV 118	return nbias	×
119
120	def calc_mad(a, b):	1✔
121	"""
122	mean absolute deviation
123	if nans exist the prinziple of marginalization is applied
124	input: np.arrays with np.nan for invalids
125	"""
126	comb = a + b	×
127	idx = np.array(range(len(a)))[~np.isnan(comb)]	×
128	a1 = a[idx]	×
129	b1 = b[idx]	×
130	N = len(a1)	×
131	mad = np.sum(np.abs(a1-b1))/N	×
UNCOV 132	return mad	×
133
134	def disp_validation(valid_dict):	1✔
135	'''
136	Print to screen validation scores.
137	'''
138	print('\n')	×
139	print('# ---')	×
140	print('Validation stats')	×
141	print('# ---')	×
UNCOV 142	print('Correlation Coefficient: '	×
143	+ '{:0.2f}'.format(valid_dict['corr']))
144	print('Mean Absolute Difference: ' + '{:0.2f}'.format(valid_dict['mad']))	×
UNCOV 145	print('Root Mean Squared Difference: '	×
146	+ '{:0.2f}'.format(valid_dict['rmsd']))
UNCOV 147	print('Normalized Root Mean Squared Difference: '	×
148	+ '{:0.2f}'.format(valid_dict['nrmsd']))
UNCOV 149	print('Debiased Root Mean Squared Difference: '	×
150	+ '{:0.2f}'.format(valid_dict['drmsd']))
151	print('Bias: ' + '{:0.2f}'.format(valid_dict['bias']))	×
152	print('Normalized Bias: ' + '{:0.2f}'.format(valid_dict['nbias']))	×
153	print('Scatter Index: ' + '{:0.2f}'.format(valid_dict['SI'][1]))	×
154	print('Model Activity Ratio: ' + '{:0.2f}'.format(valid_dict['mar']))	×
155	print('Mean of Model: ' + '{:0.2f}'.format(valid_dict['mop']))	×
156	print('Mean of Observations: ' + '{:0.2f}'.format(valid_dict['mor']))	×
157	print('Number of Collocated Values: ' + str(valid_dict['nov']))	×
158	print('\n')	×
UNCOV 159	pass	×
160
161	def validate(results_dict, boot=None):	1✔
162	import numpy as np	×
UNCOV 163	"""	×
164	vars in dict: np.arrays with np.nan for invalids
165
166	produced metrics:
167	mean of product --> mop
168	mean of reference --> mor
169	mean square difference --> msd
170	number of data values --> nov
171	scatter index --> SI
172	"""
173	# date_matches = results_dict['datetime']
174	if isinstance(results_dict['model_values'], list):	×
UNCOV 175	model_matches = np.array(results_dict['model_values'])	×
176	else:
177	model_matches = results_dict['model_values']	×
178	if isinstance(results_dict['obs_values'], list):	×
UNCOV 179	obs_matches = np.array(results_dict['obs_values'])	×
180	else:
181	obs_matches = results_dict['obs_values']	×
182	if (boot is None or boot is False):	×
183	mop = np.nanmean(model_matches)	×
184	mor = np.nanmean(obs_matches)	×
185	msd, rmsd = calc_rmsd(model_matches, obs_matches)	×
186	_, nrmsd = calc_nrmsd(model_matches, obs_matches)	×
187	_, drmsd = calc_drmsd(model_matches, obs_matches)	×
188	nov = len(obs_matches)	×
189	mad = calc_mad(model_matches, obs_matches)	×
190	corr = calc_corrcoef(model_matches, obs_matches)	×
191	bias = calc_bias(model_matches, obs_matches)	×
192	nbias = calc_nbias(model_matches, obs_matches)	×
193	SI = calc_scatter_index(model_matches, obs_matches)	×
194	mar = calc_model_activity_ratio(model_matches, obs_matches)	×
UNCOV 195	validation_dict = {	×
196	'mop': mop,
197	'mor': mor,
198	'msd': msd,
199	'nov': nov,
200	'rmsd': rmsd,
201	'nrmsd': nrmsd,
202	'drmsd': drmsd,
203	'corr': corr,
204	'mad': mad,
205	'bias': bias,
206	'nbias': nbias,
207	'SI': SI,
208	'mar': mar}
209	elif boot is True:	×
210	from wavy.utils import bootstr, marginalize	×
211	reps = 1000	×
212	newmodel, newobs, _ = marginalize(model_matches, obs_matches)	×
213	obs_boot, boot_idx = bootstr(newobs, reps)	×
214	print(len(obs_boot[np.isnan(obs_boot)]))	×
215	RMSD = np.zeros(reps)*np.nan	×
216	MSD = np.zeros(reps)*np.nan	×
217	BIAS = np.zeros(reps)*np.nan	×
218	CORR = np.zeros(reps)*np.nan	×
219	for i in range(reps):	×
UNCOV 220	results_dict = {	×
221	#'date_matches':date_matches[newidx[boot_idx[:,i]]],
222	'model_matches': newmodel[boot_idx[:, i]],
223	'sat_matches': newobs[boot_idx[:, i]]}
224	try:	×
225	RMSD[i] = validate(results_dict)['rmsd']	×
226	MSD[i] = validate(results_dict)['mad']	×
227	BIAS[i] = validate(results_dict)['bias']	×
228	CORR[i] = validate(results_dict)['corr']	×
229	except IndexError as e:	×
230	print(e)	×
231	validation_dict = {'rmsd': RMSD, 'mad': MSD, 'bias': BIAS, 'corr': CORR}	×
UNCOV 232	return validation_dict	×
233
234	def linreg_evm(x, y, **kwargs):	1✔
235	# Linear regression by the maximum likelihood effective variance method.
236	#
237	# K.K.Kahma 1991. Iterative solution replaced by explicit solution 1998.
238	# J.-V. Björkqvist 2020. From MATLAB to Python
239	#
240	# Reference: Orear,J 1982: Least squares when both variables have
241	# uncertanties J.Am Phys 50(10)
242
UNCOV 243	stdx = kwargs.get('stdx', 1)	×
244	stdy = kwargs.get('stdy', 1)	×
245
246	x0 = np.mean(x)	×
247	y0 = np.mean(y)	×
248
UNCOV 249	sx2 = stdx**2	×
250	sy2 = stdy**2	×
251	Sx2 = sum((x-x0)**2)	×
UNCOV 252	Sy2 = sum((y-y0)**2)	×
253	Sxy = sum((x-x0)*(y-y0))	×
254
UNCOV 255	if sx2 == 0 or Sxy == 0:	×
UNCOV 256	P = np.array([Sxy/Sx2])	×
257	else:
UNCOV 258	P = np.array([(sx2Sy2-sy2Sx2	×
259	+ np.sqrt((sy2Sx2)*2
260	- 2Sx2sy2sx2Sy2
261	+ (sx2Sy2)2+4Sxy*2sx2*sy2)
262	)/(2Sxysx2)])
263
UNCOV 264	P = np.append(P, y0-P[0]*x0)	×
UNCOV 265	return P	×
266
267	def linreg_std(x, y, **kwargs):	1✔
UNCOV 268	slope, intercept, r, p, std_err = stats.linregress(x, y)	×
UNCOV 269	return {'slope': slope, 'intercept': intercept}	×

bohlinger / wavy / 13944814749

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous