• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

igmhub / picca / 9971080695

17 Jul 2024 08:45AM UTC coverage: 66.931% (+0.2%) from 66.751%
9971080695

Pull #1071

gihtub

web-flow
Merge 982a0f12c into fb832aeda
Pull Request #1071: Fast desihealpix [bump minor]

1824 of 3127 branches covered (58.33%)

Branch coverage included in aggregate %.

53 of 83 new or added lines in 2 files covered. (63.86%)

209 existing lines in 1 file now uncovered.

6278 of 8978 relevant lines covered (69.93%)

2.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

31.93
/py/picca/pk1d/postproc_pk1d.py
1
"""Module defining a set of functions to postprocess files produced by compute_pk1d.py.
2

3
This module provides 3 main functions:
4
    - read_pk1d:
5
        Read all HDUs in an individual "P1D" FITS file and stacks
6
        all data in one table
7
    - compute_mean_pk1d:
8
        Compute the mean P1D in a given (z,k) grid of bins, from individual
9
        "P1Ds" of individual chunks
10
    - parallelize_p1d_comp:
11
        Main function, runs read_pk1d in parallel, then runs compute_mean_pk1d
12
See the respective documentation for details
13
"""
14

15
import glob
3✔
16
import os
3✔
17
from functools import partial
3✔
18
from multiprocessing import Pool
3✔
19

20
import fitsio
3✔
21
import numpy as np
3✔
22
from astropy.stats import bootstrap
3✔
23
from astropy.table import Table, vstack
3✔
24
from scipy.optimize import curve_fit
3✔
25
from scipy.stats import binned_statistic
3✔
26
from picca.constants import ABSORBER_IGM, SPEED_LIGHT
3✔
27
from picca.pk1d.utils import MEANPK_FITRANGE_SNR, fitfunc_variance_pk1d
3✔
28
from picca.utils import userprint
3✔
29

30

31
def read_pk1d(filename, kbin_edges, snrcut=None, zbins_snrcut=None):
3✔
32
    """Read Pk1D data from a single file.
33

34
    Arguments
35
    ---------
36
    filename: string
37
    Fits file, containing individual "P1D" for each chunk
38

39
    kbin_edges: array of floats
40
    Edges of the wavenumber bins to be later used, in Angstrom^-1
41

42
    snrcut: array of floats or None
43
    Chunks with mean SNR > snrcut are discarded. If len(snrcut)>1,
44
    zbins_snrcut must be set, so that the cut is made redshift dependent.
45

46
    zbins_snrcut: array of floats or None
47
    Required if len(snrcut)>1. List of redshifts
48
    associated to the list of snr cuts.
49

50
    Return
51
    ------
52
    p1d_table: Table
53
    One entry per mode(k) per chunk
54

55
    z_array: array of floats
56
    Nchunks entry.
57
    If no chunk is selected, None will be returned instead
58
    """
UNCOV
59
    p1d_table = []
×
UNCOV
60
    z_array = []
×
61
    with fitsio.FITS(filename, "r") as hdus:
×
62
        for i, hdu in enumerate(hdus[1:]):
×
63
            data = hdu.read()
×
64
            chunk_header = hdu.read_header()
×
65
            chunk_table = Table(data)
×
66
            for colname in [
×
67
                "k",
68
                "Pk",
69
                "Pk_raw",
70
                "Pk_noise",
71
                "Pk_diff",
72
                "cor_reso",
73
                "Pk_noise_miss",
74
                "Delta_k",
75
                "Delta_noise_k",
76
                "Delta_diff_k",
77
            ]:
UNCOV
78
                try:
×
UNCOV
79
                    chunk_table.rename_column(colname.upper(), colname)
×
80
                except KeyError:
×
81
                    pass
×
82

83
            if np.nansum(chunk_table["Pk"]) == 0:
×
UNCOV
84
                chunk_table["Pk"] = (
×
85
                    chunk_table["Pk_raw"] - chunk_table["Pk_noise"]
86
                ) / chunk_table["cor_reso"]
87

UNCOV
88
            chunk_table["forest_z"] = float(chunk_header["MEANZ"])
×
UNCOV
89
            chunk_table["forest_snr"] = float(chunk_header["MEANSNR"])
×
90
            chunk_table["forest_id"] = int(chunk_header["LOS_ID"])
×
91
            if "CHUNK_ID" in chunk_header:
×
92
                chunk_table["sub_forest_id"] = (
×
93
                    f"{chunk_header['LOS_ID']}_{chunk_header['CHUNK_ID']}"
94
                )
95

UNCOV
96
            if snrcut is not None:
×
UNCOV
97
                if len(snrcut) > 1:
×
98
                    if (zbins_snrcut is None) or (len(zbins_snrcut) != len(snrcut)):
×
99
                        raise ValueError(
×
100
                            "Please provide same size for zbins_snrcut and snrcut arrays"
101
                        )
UNCOV
102
                    zbin_index = np.argmin(np.abs(zbins_snrcut - chunk_header["MEANZ"]))
×
UNCOV
103
                    snrcut_chunk = snrcut[zbin_index]
×
104
                else:
105
                    snrcut_chunk = snrcut[0]
×
UNCOV
106
                if chunk_header["MEANSNR"] < snrcut_chunk:
×
107
                    continue
×
108

109
            # Empirically remove very noisy chunks
UNCOV
110
            (wk,) = np.where(chunk_table["k"] < kbin_edges[-1])
×
UNCOV
111
            if (
×
112
                np.abs(chunk_table["Pk_noise"][wk])
113
                > 1000000 * np.abs(chunk_table["Pk_raw"][wk])
114
            ).any():
UNCOV
115
                userprint(
×
116
                    f"file {filename} hdu {i+1} has very high noise power: discarded"
117
                )
UNCOV
118
                continue
×
119

UNCOV
120
            p1d_table.append(chunk_table)
×
121
            z_array.append(float(chunk_header["MEANZ"]))
×
122

UNCOV
123
    if len(p1d_table) == 0:  # No chunk was selected
×
124
        return None
×
125

UNCOV
126
    p1d_table = vstack(p1d_table)
×
127
    p1d_table["Delta2"] = p1d_table["k"] * p1d_table["Pk"] / np.pi
×
128
    p1d_table["Pk_norescor"] = p1d_table["Pk_raw"] - p1d_table["Pk_noise"]
×
129
    p1d_table["Pk_nonoise"] = p1d_table["Pk_raw"] / p1d_table["cor_reso"]
×
130
    p1d_table["Pk_noraw"] = p1d_table["Pk_noise"] / p1d_table["cor_reso"]
×
131
    try:
×
132
        p1d_table["Pk_noraw_miss"] = p1d_table["Pk_noise_miss"] / p1d_table["cor_reso"]
×
133
    except KeyError:
×
134
        pass
×
135

UNCOV
136
    z_array = np.array(z_array)
×
137

UNCOV
138
    return p1d_table, z_array
×
139

140

141
def mean_p1d_table_regular_slice(izbin, nbins_k):
3✔
142
    """Return the arguments of a slice of mean P1D table for a given redshift.
143

144
    Arguments
145
    ---------
146
    izbin (int):
147
    Current redshift bin being considered.
148

149
    nbins_k (int):
150
    Number of k bins.
151

152
    Return
153
    ------
154
    Arguments of a slice of mean P1D table.
155
    """
156
    return izbin * nbins_k, (izbin + 1) * nbins_k
3✔
157

158

159

160
def cov_table_regular_slice(izbin, nbins_k):
3✔
161
    """Return the arguments of a slice of covariance table for a given redshift.
162

163
    Arguments
164
    ---------
165
    izbin (int):
166
    Current redshift bin being considered.
167

168
    nbins_k (int):
169
    Number of k bins.
170

171
    Return
172
    ------
173
    Arguments of a slice of covariance table.
174
    """
175
    return izbin * nbins_k * nbins_k, (izbin + 1) * nbins_k * nbins_k
3✔
176

177

178
def compute_mean_pk1d(
3✔
179
    p1d_table,
180
    z_array,
181
    zbin_edges,
182
    kbin_edges,
183
    weight_method,
184
    apply_z_weights=False,
185
    nomedians=False,
186
    velunits=False,
187
    output_snrfit=None,
188
    compute_covariance=False,
189
    compute_bootstrap=False,
190
    number_bootstrap=50,
191
    number_worker=8,
192
):
193
    """Compute mean P1D in a set of given (z,k) bins, from individual chunks P1Ds.
194

195
    Arguments
196
    ---------
197
    p1d_table: astropy.table.Table
198
    Individual Pk1Ds of the contributing forest chunks, stacked in one table using "read_pk1d",
199
    Contains 'k', 'Pk_raw', 'Pk_noise', 'Pk_diff', 'cor_reso', 'Pk', 'forest_z', 'forest_snr',
200
            'Delta2', 'Pk_norescor', 'Pk_nonoise', 'Pk_noraw'
201

202
    z_array: Array of float
203
    Mean z of each contributing chunk, stacked in one array using "read_pk1d"
204

205
    zbin_edges: Array of float
206
    Edges of the redshift bins we want to use
207

208
    kbin_edges: Array of float
209
    Edges of the wavenumber bins we want to use, either in (Angstrom)-1 or s/km
210

211
    weight_method: string
212
    2 possible options:
213
        'fit_snr': Compute mean P1D with weights estimated by fitting dispersion vs SNR
214
        'no_weights': Compute mean P1D without weights
215

216
    apply_z_weights: Bool
217
    If True, each chunk contributes to two nearest redshift bins with a linear weighting scheme.
218

219
    nomedians: Bool
220
    Skip computation of median quantities
221

222
    velunits: Bool
223
    Compute P1D in velocity units by converting k on-the-fly from AA-1 to s/km
224

225
    output_snrfit: string
226
    If weight_method='fit_snr', the results of the fit can be saved to an ASCII file.
227
    The file contains (z k a b standard_dev_points) for the "Pk" variable, for each (z,k) point
228

229
    compute_covariance: Bool
230
    If True, compute statistical covariance matrix of the mean P1D.
231
    Requires  p1d_table to contain 'sub_forest_id', since correlations are computed
232
    within individual forests.
233

234
    compute_bootstrap: Bool
235
    If True, compute statistical covariance using a simple bootstrap method.
236

237
    number_bootstrap: int
238
    Number of bootstrap samples used if compute_bootstrap is True.
239

240
    number_worker: int
241
    Calculations of mean P1Ds and covariances are run parallel over redshift bins.
242

243
    Return
244
    ------
245
    meanP1d_table: astropy.table.Table
246
    One row per (z,k) bin; one column per statistics (eg. meanPk, errorPk_noise...)
247
    Other columns: 'N' (nb of chunks used), 'index_zbin' (index of associated
248
    row in metadata_table), 'zbin'
249

250
    metadata_table: astropy.table.Table
251
    One row per z bin; column values z_min/max, k_min/max, N_chunks
252
    """
253
    # Initializing stats we want to compute on data
254
    stats_array = ["mean", "error", "min", "max"]
3✔
255
    if not nomedians:
3!
256
        stats_array += ["median"]
3✔
257

258
    p1d_table_cols = p1d_table.colnames
3✔
259
    p1d_table_cols.remove("forest_id")
3✔
260
    if "sub_forest_id" in p1d_table_cols:
3!
261
        p1d_table_cols.remove("sub_forest_id")
3✔
262

263
    p1d_table_cols = [col for col in p1d_table_cols if "Delta_" not in col]
3✔
264

265
    # Convert data into velocity units
266
    if velunits:
3!
UNCOV
267
        conversion_factor = (
×
268
            ABSORBER_IGM["LYA"] * (1.0 + p1d_table["forest_z"])
269
        ) / SPEED_LIGHT
270
        p1d_table["k"] *= conversion_factor
×
271
        for col in p1d_table_cols:
×
UNCOV
272
            if "Pk" in col:
×
UNCOV
273
                p1d_table[col] /= conversion_factor
×
274

275
    # Initialize mean_p1d_table of len = (nzbins * nkbins) corresponding to hdu[1] in final ouput
276
    mean_p1d_table = Table()
3✔
277
    nbins_z, nbins_k = len(zbin_edges) - 1, len(kbin_edges) - 1
3✔
278
    mean_p1d_table["zbin"] = np.zeros(nbins_z * nbins_k)
3✔
279
    mean_p1d_table["index_zbin"] = np.zeros(nbins_z * nbins_k, dtype=int)
3✔
280
    mean_p1d_table["N"] = np.zeros(nbins_z * nbins_k, dtype=int)
3✔
281
    for col in p1d_table_cols:
3✔
282
        for stats in stats_array:
3✔
283
            mean_p1d_table[stats + col] = np.ones(nbins_z * nbins_k) * np.nan
3✔
284

285
    # Initialize metadata_table of len = nbins_z corresponding to hdu[2] in final output
286
    metadata_table = Table()
3✔
287
    metadata_table["z_min"] = zbin_edges[:-1]
3✔
288
    metadata_table["z_max"] = zbin_edges[1:]
3✔
289
    metadata_table["k_min"] = kbin_edges[0] * np.ones(nbins_z)
3✔
290
    metadata_table["k_max"] = kbin_edges[-1] * np.ones(nbins_z)
3✔
291

292
    if compute_covariance or compute_bootstrap:
3✔
293
        if "sub_forest_id" not in p1d_table.columns:
3!
UNCOV
294
            userprint(
×
295
                """sub_forest_id cannot be computed from individual pk files,
296
                necessary to compute covariance. Skipping calculation"""
297
            )
UNCOV
298
            compute_covariance, compute_bootstrap = False, False
×
UNCOV
299
            cov_table = None
×
300

301
        elif apply_z_weights:
3!
UNCOV
302
            userprint(
×
303
                """Covariance calculations are not compatible redshift weighting yes.
304
                Skipping calculation"""
305
            )
UNCOV
306
            compute_covariance, compute_bootstrap = False, False
×
307
            cov_table = None
×
308

309
        else:
310
            # Initialize cov_table of len = (nzbins * nkbins * nkbins)
311
            # corresponding to hdu[3] in final ouput
312
            cov_table = Table()
3✔
313
            cov_table["zbin"] = np.zeros(nbins_z * nbins_k * nbins_k)
3✔
314
            cov_table["index_zbin"] = np.zeros(nbins_z * nbins_k * nbins_k, dtype=int)
3✔
315
            cov_table["N"] = np.zeros(nbins_z * nbins_k * nbins_k, dtype=int)
3✔
316
            cov_table["covariance"] = np.zeros(nbins_z * nbins_k * nbins_k)
3✔
317
            cov_table["k1"] = np.zeros(nbins_z * nbins_k * nbins_k)
3✔
318
            cov_table["k2"] = np.zeros(nbins_z * nbins_k * nbins_k)
3✔
319

320
            if compute_bootstrap:
3!
UNCOV
321
                cov_table["boot_covariance"] = np.zeros(nbins_z * nbins_k * nbins_k)
×
UNCOV
322
                cov_table["error_boot_covariance"] = np.zeros(
×
323
                    nbins_z * nbins_k * nbins_k
324
                )
325

326
            k_index = np.full(len(p1d_table["k"]), -1, dtype=int)
3✔
327
            for ikbin, _ in enumerate(kbin_edges[:-1]):  # First loop 1) k bins
3✔
328
                select = (p1d_table["k"] < kbin_edges[ikbin + 1]) & (
3✔
329
                    p1d_table["k"] > kbin_edges[ikbin]
330
                )  # select a specific k bin
331
                k_index[select] = ikbin
3✔
332
    else:
333
        cov_table = None
3✔
334

335
    # Number of chunks in each redshift bin
336
    n_chunks, _, _ = binned_statistic(
3✔
337
        z_array, z_array, statistic="count", bins=zbin_edges
338
    )
339
    metadata_table["N_chunks"] = n_chunks
3✔
340

341
    zbin_centers = np.around((zbin_edges[1:] + zbin_edges[:-1]) / 2, 5)
3✔
342
    if weight_method == "fit_snr":
3!
UNCOV
343
        snrfit_table = np.zeros(
×
344
            (nbins_z * nbins_k, 13)
345
        )  # 13 entries: z k a b + 9 SNR bins used for the fit
346
    else:
347
        snrfit_table = None
3✔
348

349
    userprint("Computing average p1d")
3✔
350
    # Main loop 1) z bins
351
    params_pool = [[izbin] for izbin, _ in enumerate(zbin_edges[:-1])]
3✔
352

353
    func = partial(
3✔
354
        compute_average_pk_redshift,
355
        p1d_table,
356
        p1d_table_cols,
357
        weight_method,
358
        apply_z_weights,
359
        nomedians,
360
        nbins_z,
361
        zbin_centers,
362
        zbin_edges,
363
        n_chunks,
364
        nbins_k,
365
        kbin_edges,
366
    )
367
    if number_worker == 1:
3!
UNCOV
368
        output_mean = [func(p[0]) for p in params_pool]
×
369
    else:
370
        with Pool(number_worker) as pool:
3✔
371
            output_mean = pool.starmap(func, params_pool)
3✔
372

373
    fill_average_pk(
3✔
374
        nbins_z,
375
        nbins_k,
376
        output_mean,
377
        mean_p1d_table,
378
        p1d_table_cols,
379
        weight_method,
380
        snrfit_table,
381
        nomedians,
382
    )
383

384
    if compute_covariance or compute_bootstrap:
3✔
385
        userprint("Computation of p1d groups for covariance matrix calculation")
3✔
386

387
        p1d_groups = []
3✔
388
        for izbin in range(nbins_z):
3✔
389

390
            zbin_array = np.full(nbins_k * nbins_k, zbin_centers[izbin])
3✔
391
            index_zbin_array = np.full(nbins_k * nbins_k, izbin, dtype=int)
3✔
392
            kbin_centers = (kbin_edges[1:] + kbin_edges[:-1]) / 2
3✔
393
            k1_array, k2_array = np.meshgrid(kbin_centers, kbin_centers, indexing="ij")
3✔
394
            k1_array = np.ravel(k1_array)
3✔
395
            k2_array = np.ravel(k2_array)
3✔
396
            select = mean_p1d_table["index_zbin"] == izbin
3✔
397
            n_array = np.ravel(
3✔
398
                np.outer(mean_p1d_table["N"][select], mean_p1d_table["N"][select])
399
            )
400
            index_cov = cov_table_regular_slice(izbin, nbins_k)
3✔
401
            cov_table["zbin"][index_cov[0]:index_cov[1]] = zbin_array
3✔
402
            cov_table["index_zbin"][index_cov[0]:index_cov[1]] = index_zbin_array
3✔
403
            cov_table["k1"][index_cov[0]:index_cov[1]] = k1_array
3✔
404
            cov_table["k2"][index_cov[0]:index_cov[1]] = k2_array
3✔
405
            cov_table["N"][index_cov[0]:index_cov[1]] = n_array
3✔
406
            index_mean = mean_p1d_table_regular_slice(izbin, nbins_k)
3✔
407
            mean_pk = mean_p1d_table["meanPk"][index_mean[0] : index_mean[1]]
3✔
408
            error_pk = mean_p1d_table["errorPk"][index_mean[0] : index_mean[1]]
3✔
409

410
            if n_chunks[izbin] == 0:
3!
411
                p1d_weights_z, covariance_weights_z, p1d_groups_z = [], [], []
×
412
            else:
413
                p1d_weights_z, covariance_weights_z, p1d_groups_z = compute_p1d_groups(
3✔
414
                    weight_method,
415
                    nbins_k,
416
                    zbin_edges,
417
                    izbin,
418
                    p1d_table,
419
                    k_index,
420
                    snrfit_table,
421
                    number_worker,
422
                )
423

424
            p1d_groups.append(
3✔
425
                [
426
                    mean_pk,
427
                    error_pk,
428
                    p1d_weights_z,
429
                    covariance_weights_z,
430
                    p1d_groups_z,
431
                ]
432
            )
433

434
        compute_and_fill_covariance(
3✔
435
            compute_covariance,
436
            compute_bootstrap,
437
            weight_method,
438
            nbins_k,
439
            nbins_z,
440
            p1d_groups,
441
            number_worker,
442
            number_bootstrap,
443
            cov_table,
444
        )
445

446
    if output_snrfit is not None:
3!
447
        np.savetxt(
×
448
            output_snrfit,
449
            snrfit_table,
450
            fmt="%.5e",
451
            header="Result of fit: Variance(Pks) vs SNR\n"
452
            "SNR bin edges used: 1,  2,  3,  4,  5,  6,  7,  8,  9, 10\n"
453
            "z k a b standard_dev_points",
454
        )
455

456
    return mean_p1d_table, metadata_table, cov_table
3✔
457

458

459
def fill_average_pk(
3✔
460
    nbins_z,
461
    nbins_k,
462
    output_mean,
463
    mean_p1d_table,
464
    p1d_table_cols,
465
    weight_method,
466
    snrfit_table,
467
    nomedians,
468
):
469
    """Fill the average P1D table for all redshift and k bins.
470

471
    Arguments
472
    ---------
473
    nbins_z: int,
474
    Number of redshift bins.
475

476
    nbins_k: int,
477
    Number of k bins.
478

479
    output_mean: tuple of numpy ndarray,
480
    Result of the mean calculation
481

482
    mean_p1d_table: numpy ndarray,
483
    Table to fill.
484

485
    p1d_table_cols: List of str,
486
    Column names in the input table to be averaged.
487

488
    weight_method: string
489
    2 possible options:
490
        'fit_snr': Compute mean P1D with weights estimated by fitting dispersion vs SNR
491
        'no_weights': Compute mean P1D without weights
492

493
    snrfit_table: numpy ndarray,
494
    Table containing SNR fit infos
495

496
    nomedians: bool,
497
    If True, do not use median values in the fit to the SNR.
498

499
    Return
500
    ------
501
    None
502

503
    """
504
    for izbin in range(nbins_z):  # Main loop 1) z bins
3✔
505
        (
3✔
506
            zbin_array,
507
            index_zbin_array,
508
            n_array,
509
            mean_array,
510
            error_array,
511
            min_array,
512
            max_array,
513
            median_array,
514
            snrfit_array,
515
        ) = (*output_mean[izbin],)
516
        index_mean = mean_p1d_table_regular_slice(izbin,nbins_k)
3✔
517

518
        mean_p1d_table["zbin"][index_mean[0]:index_mean[1]] = zbin_array
3✔
519
        mean_p1d_table["index_zbin"][index_mean[0]:index_mean[1]] = index_zbin_array
3✔
520
        mean_p1d_table["N"][index_mean[0]:index_mean[1]] = n_array
3✔
521
        for icol, col in enumerate(p1d_table_cols):
3✔
522
            mean_p1d_table["mean" + col][index_mean[0]:index_mean[1]] = mean_array[icol]
3✔
523
            mean_p1d_table["error" + col][index_mean[0]:index_mean[1]] = error_array[icol]
3✔
524
            mean_p1d_table["min" + col][index_mean[0]:index_mean[1]] = min_array[icol]
3✔
525
            mean_p1d_table["max" + col][index_mean[0]:index_mean[1]] = max_array[icol]
3✔
526
            if not nomedians:
3!
527
                mean_p1d_table["median" + col][index_mean[0]:index_mean[1]] = median_array[icol]
3✔
528
        if weight_method == "fit_snr":
3!
UNCOV
529
            snrfit_table[index_mean[0]:index_mean[1], :] = snrfit_array
×
530

531

532
def compute_average_pk_redshift(
3✔
533
    p1d_table,
534
    p1d_table_cols,
535
    weight_method,
536
    apply_z_weights,
537
    nomedians,
538
    nbins_z,
539
    zbin_centers,
540
    zbin_edges,
541
    n_chunks,
542
    nbins_k,
543
    kbin_edges,
544
    izbin,
545
):
546
    """Compute the average P1D table for the given redshift and k bins.
547

548
    The function computes the mean P1D table for each redshift and k bin.
549
    If there are no chunks in a given bin, the rows in the
550
    table for that bin will be filled with NaNs.
551
    The mean value for each bin is calculated using a weighting method,
552
    either a fit to the SNR or using weights based on the redshift.
553

554
    Arguments
555
    ---------
556
    p1d_table: numpy ndarray,
557
    Table containing the data to be averaged.
558

559
    p1d_table_cols: List of str,
560
    Column names in the input table to be averaged.
561

562
    weight_method: str,
563
    Method to weight the data.
564

565
    apply_z_weights: bool,
566
    If True, apply redshift weights.
567

568
    nomedians: bool,
569
    If True, do not use median values in the fit to the SNR.
570

571
    nbins_z: int,
572
    Number of redshift bins.
573

574
    zbin_centers: numpy ndarray,
575
    Centers of the redshift bins.
576

577
    zbin_edges: numpy ndarray,
578
    Edges of the redshift bins.
579

580
    n_chunks: numpy ndarray,
581
    Number of chunks in each redshift bin.
582

583
    nbins_k: int,
584
    Number of k bins.
585

586
    kbin_edges: numpy ndarray,
587
    Edges of the k bins.
588

589
    izbin: int,
590
    Index of the current redshift bin.
591

592
    Return
593
    ------
594
    zbin_array
595
    index_zbin_array
596
    n_array
597
    mean_array
598
    error_array
599
    min_array
600
    max_array
601
    median_array
602
    snrfit_array
603
    """
UNCOV
604
    n_array = np.zeros(nbins_k, dtype=int)
×
UNCOV
605
    zbin_array = np.zeros(nbins_k)
×
UNCOV
606
    index_zbin_array = np.zeros(nbins_k, dtype=int)
×
UNCOV
607
    mean_array = []
×
UNCOV
608
    error_array = []
×
UNCOV
609
    min_array = []
×
UNCOV
610
    max_array = []
×
UNCOV
611
    if not nomedians:
×
UNCOV
612
        median_array = []
×
613
    else:
614
        median_array = None
×
615
    if weight_method == "fit_snr":
×
616
        snrfit_array = np.zeros((nbins_k, 13))
×
617
    else:
618
        snrfit_array = None
×
619

620
    for col in p1d_table_cols:
×
621
        mean_array.append(np.zeros(nbins_k))
×
UNCOV
622
        error_array.append(np.zeros(nbins_k))
×
623
        min_array.append(np.zeros(nbins_k))
×
624
        max_array.append(np.zeros(nbins_k))
×
625
        if not nomedians:
×
UNCOV
626
            median_array.append(np.zeros(nbins_k))
×
627

UNCOV
628
    if n_chunks[izbin] == 0:  # Fill rows with NaNs
×
629
        zbin_array[:] = zbin_centers[izbin]
×
630
        index_zbin_array[:] = izbin
×
631
        n_array[:] = 0
×
632
        for icol, col in enumerate(p1d_table_cols):
×
633
            mean_array[icol][:] = np.nan
×
634
            error_array[icol][:] = np.nan
×
635
            min_array[icol][:] = np.nan
×
UNCOV
636
            max_array[icol][:] = np.nan
×
637
            if not nomedians:
×
638
                median_array[icol][:] = np.nan
×
639
        if weight_method == "fit_snr":
×
640
            snrfit_array[:] = np.nan
×
641

642
        return (
×
643
            zbin_array,
644
            index_zbin_array,
645
            n_array,
646
            mean_array,
647
            error_array,
648
            min_array,
649
            max_array,
650
            median_array,
651
            snrfit_array,
652
        )
653

UNCOV
654
    for ikbin, kbin in enumerate(kbin_edges[:-1]):  # Main loop 2) k bins
×
UNCOV
655
        if apply_z_weights:  # special chunk selection in that case
×
UNCOV
656
            delta_z = zbin_centers[1:] - zbin_centers[:-1]
×
UNCOV
657
            if not np.allclose(delta_z, delta_z[0], atol=1.0e-3):
×
UNCOV
658
                raise ValueError(
×
659
                    "z bins should have equal widths with apply_z_weights."
660
                )
UNCOV
661
            delta_z = delta_z[0]
×
662

663
            select = (p1d_table["k"] < kbin_edges[ikbin + 1]) & (
×
664
                p1d_table["k"] > kbin_edges[ikbin]
665
            )
666
            if izbin in (0, nbins_z - 1):
×
667
                # First and last bin: in order to avoid edge effects,
668
                #    use only chunks within the bin
UNCOV
669
                select = (
×
670
                    select
671
                    & (p1d_table["forest_z"] > zbin_edges[izbin])
672
                    & (p1d_table["forest_z"] < zbin_edges[izbin + 1])
673
                )
674
            else:
675
                select = (
×
676
                    select
677
                    & (p1d_table["forest_z"] < zbin_centers[izbin + 1])
678
                    & (p1d_table["forest_z"] > zbin_centers[izbin - 1])
679
                )
680

UNCOV
681
            redshift_weights = (
×
682
                1.0
683
                - np.abs(p1d_table["forest_z"][select] - zbin_centers[izbin]) / delta_z
684
            )
685

686
        else:
UNCOV
687
            select = (
×
688
                (p1d_table["forest_z"] < zbin_edges[izbin + 1])
689
                & (p1d_table["forest_z"] > zbin_edges[izbin])
690
                & (p1d_table["k"] < kbin_edges[ikbin + 1])
691
                & (p1d_table["k"] > kbin_edges[ikbin])
692
            )  # select a specific (z,k) bin
693

UNCOV
694
        zbin_array[ikbin] = zbin_centers[izbin]
×
UNCOV
695
        index_zbin_array[ikbin] = izbin
×
696

697
        # Counts the number of chunks in each (z,k) bin
UNCOV
698
        num_chunks = np.ma.count(p1d_table["k"][select])
×
699

UNCOV
700
        n_array[ikbin] = num_chunks
×
701

UNCOV
702
        if weight_method == "fit_snr":
×
703
            if num_chunks == 0:
×
704
                userprint(
×
705
                    "Warning: 0 chunks found in bin "
706
                    + str(zbin_edges[izbin])
707
                    + "<z<"
708
                    + str(zbin_edges[izbin + 1])
709
                    + ", "
710
                    + str(kbin_edges[ikbin])
711
                    + "<k<"
712
                    + str(kbin_edges[ikbin + 1])
713
                )
UNCOV
714
                continue
×
UNCOV
715
            snr_bin_edges = np.arange(
×
716
                MEANPK_FITRANGE_SNR[0], MEANPK_FITRANGE_SNR[1] + 1, 1
717
            )
UNCOV
718
            snr_bins = (snr_bin_edges[:-1] + snr_bin_edges[1:]) / 2
×
719

UNCOV
720
            p1d_values = p1d_table["Pk"][select]
×
UNCOV
721
            data_snr = p1d_table["forest_snr"][select]
×
UNCOV
722
            mask_nan_p1d_values = (~np.isnan(p1d_values)) & (~np.isnan(data_snr))
×
723
            data_snr, p1d_values = (
×
724
                data_snr[mask_nan_p1d_values],
725
                p1d_values[mask_nan_p1d_values],
726
            )
727
            if len(p1d_values) == 0:
×
UNCOV
728
                continue
×
729
            standard_dev, _, _ = binned_statistic(
×
730
                data_snr, p1d_values, statistic="std", bins=snr_bin_edges
731
            )
732
            standard_dev_full = np.copy(standard_dev)
×
UNCOV
733
            standard_dev, snr_bins = (
×
734
                standard_dev[~np.isnan(standard_dev)],
735
                snr_bins[~np.isnan(standard_dev)],
736
            )
737
            if len(standard_dev) == 0:
×
738
                continue
×
UNCOV
739
            coef, *_ = curve_fit(
×
740
                fitfunc_variance_pk1d,
741
                snr_bins,
742
                standard_dev**2,
743
                bounds=(0, np.inf),
744
            )
UNCOV
745
            data_snr[data_snr > MEANPK_FITRANGE_SNR[1]] = MEANPK_FITRANGE_SNR[1]
×
746
            data_snr[data_snr < 1.01] = 1.01
×
747
            variance_estimated = fitfunc_variance_pk1d(data_snr, *coef)
×
748
            weights = 1.0 / variance_estimated
×
749

UNCOV
750
        for icol, col in enumerate(p1d_table_cols):
×
UNCOV
751
            if num_chunks == 0:
×
UNCOV
752
                userprint(
×
753
                    "Warning: 0 chunks found in bin "
754
                    + str(zbin_edges[izbin])
755
                    + "<z<"
756
                    + str(zbin_edges[izbin + 1])
757
                    + ", "
758
                    + str(kbin_edges[ikbin])
759
                    + "<k<"
760
                    + str(kbin_edges[ikbin + 1])
761
                )
UNCOV
762
                continue
×
763

UNCOV
764
            if weight_method == "fit_snr":
×
UNCOV
765
                snr_bin_edges = np.arange(
×
766
                    MEANPK_FITRANGE_SNR[0], MEANPK_FITRANGE_SNR[1] + 1, 1
767
                )
UNCOV
768
                snr_bins = (snr_bin_edges[:-1] + snr_bin_edges[1:]) / 2
×
769

UNCOV
770
                data_values = p1d_table[col][select]
×
771
                data_values = data_values[mask_nan_p1d_values]
×
772
                # Fit function to observed dispersion in col:
773
                standard_dev_col, _, _ = binned_statistic(
×
774
                    data_snr, data_values, statistic="std", bins=snr_bin_edges
775
                )
UNCOV
776
                standard_dev_col, snr_bins = (
×
777
                    standard_dev_col[~np.isnan(standard_dev_col)],
778
                    snr_bins[~np.isnan(standard_dev_col)],
779
                )
780

UNCOV
781
                coef_col, *_ = curve_fit(
×
782
                    fitfunc_variance_pk1d,
783
                    snr_bins,
784
                    standard_dev_col**2,
785
                    bounds=(0, np.inf),
786
                )
787

788
                # Model variance from fit function
UNCOV
789
                data_snr[data_snr > MEANPK_FITRANGE_SNR[1]] = MEANPK_FITRANGE_SNR[1]
×
790
                data_snr[data_snr < 1.01] = 1.01
×
UNCOV
791
                variance_estimated_col = fitfunc_variance_pk1d(data_snr, *coef_col)
×
UNCOV
792
                weights_col = 1.0 / variance_estimated_col
×
UNCOV
793
                if apply_z_weights:
×
UNCOV
794
                    weights *= redshift_weights
×
UNCOV
795
                mean = np.average(data_values, weights=weights)
×
UNCOV
796
                if apply_z_weights:
×
797
                    # Analytic expression for the re-weighted average:
798
                    error = np.sqrt(np.sum(weights_col * redshift_weights)) / np.sum(
×
799
                        weights_col
800
                    )
801
                else:
802
                    error = np.sqrt(1.0 / np.sum(weights_col))
×
803
                if col == "Pk":
×
804
                    standard_dev = np.concatenate(
×
805
                        [
806
                            standard_dev,
807
                            np.full(len(standard_dev[np.isnan(standard_dev)]), np.nan),
808
                        ]
809
                    )
UNCOV
810
                    snrfit_array[ikbin, 0:4] = [
×
811
                        zbin_centers[izbin],
812
                        (kbin + kbin_edges[ikbin + 1]) / 2.0,
813
                        coef[0],
814
                        coef[1],
815
                    ]
UNCOV
816
                    snrfit_array[ikbin, 4:] = standard_dev_full  # also save nan values
×
817

UNCOV
818
            elif weight_method == "no_weights":
×
819
                if apply_z_weights:
×
UNCOV
820
                    mean = np.average(p1d_table[col][select], weights=redshift_weights)
×
821
                    # simple analytic expression:
UNCOV
822
                    error = np.std(p1d_table[col][select]) * (
×
823
                        np.sqrt(np.sum(redshift_weights**2)) / np.sum(redshift_weights)
824
                    )
825
                else:
UNCOV
826
                    mean = np.mean(p1d_table[col][select])
×
827
                    # unbiased estimate: num_chunks-1
828
                    error = np.std(p1d_table[col][select]) / np.sqrt(num_chunks - 1)
×
829

830
            else:
831
                raise ValueError("Option for 'weight_method' argument not found")
×
832

UNCOV
833
            minimum = np.min((p1d_table[col][select]))
×
UNCOV
834
            maximum = np.max((p1d_table[col][select]))
×
UNCOV
835
            mean_array[icol][ikbin] = mean
×
836
            error_array[icol][ikbin] = error
×
UNCOV
837
            min_array[icol][ikbin] = minimum
×
838
            max_array[icol][ikbin] = maximum
×
UNCOV
839
            if not nomedians:
×
UNCOV
840
                median = np.median((p1d_table[col][select]))
×
841
                median_array[icol][ikbin] = median
×
UNCOV
842
    return (
×
843
        zbin_array,
844
        index_zbin_array,
845
        n_array,
846
        mean_array,
847
        error_array,
848
        min_array,
849
        max_array,
850
        median_array,
851
        snrfit_array,
852
    )
853

854

855
def compute_and_fill_covariance(
3✔
856
    compute_covariance,
857
    compute_bootstrap,
858
    weight_method,
859
    nbins_k,
860
    nbins_z,
861
    p1d_groups,
862
    number_worker,
863
    number_bootstrap,
864
    cov_table,
865
):
866
    """Compute the covariance and bootstrap covariance and fill the corresponding
867
    cov_table variable
868

869
    compute_covariance: Bool
870
    If True, compute statistical covariance matrix of the mean P1D.
871
    Requires  p1d_table to contain 'sub_forest_id', since correlations are computed
872
    within individual forests.
873

874
    compute_bootstrap: Bool
875
    If True, compute statistical covariance using a simple bootstrap method.
876

877
    weight_method: str,
878
    Method to weight the data.
879

880
    nbins_k (int):
881
    Number of k bins.
882

883
    nbins_z (int):
884
    Number of z bins.
885

886
    p1d_groups (array-like):
887
    Individual p1d pixels grouped in the same wavenumber binning for all subforest
888

889
    number_worker: int
890
    Calculations of mean P1Ds and covariances are run parallel over redshift bins.
891

892
    number_bootstrap: int
893
    Number of bootstrap samples used if compute_bootstrap is True.
894

895
    cov_table (array-like):
896
    Covariance table to fill.
897

898
    Return
899
    ------
900
    None
901
    """
902

903
    if compute_covariance:
3!
904
        userprint("Computation of covariance matrix")
3✔
905

906
        func = partial(
3✔
907
            compute_cov,
908
            weight_method,
909
            nbins_k,
910
        )
911
        if number_worker == 1:
3!
UNCOV
912
            output_cov = [func(*p1d_group) for p1d_group in p1d_groups]
×
913
        else:
914
            with Pool(number_worker) as pool:
3✔
915
                output_cov = pool.starmap(func, p1d_groups)
3✔
916

917
        for izbin in range(nbins_z):
3✔
918
            covariance_array = output_cov[izbin]
3✔
919
            index_cov = cov_table_regular_slice(izbin, nbins_k)
3✔
920
            cov_table["covariance"][index_cov[0]:index_cov[1]] = covariance_array
3✔
921

922
    if compute_bootstrap:
3!
UNCOV
923
        userprint("Computing covariance matrix with bootstrap method")
×
UNCOV
924
        p1d_groups_bootstrap = []
×
UNCOV
925
        for izbin in range(nbins_z):
×
UNCOV
926
            number_sub_forests = len(p1d_groups[izbin][1])
×
UNCOV
927
            if number_sub_forests > 0:
×
UNCOV
928
                bootid = np.array(
×
929
                    bootstrap(np.arange(number_sub_forests), number_bootstrap)
930
                ).astype(int)
931
            else:
932
                bootid = np.full(number_bootstrap, None)
×
933

934
            for iboot in range(number_bootstrap):
×
935
                if bootid[iboot] is None:
×
936
                    (
×
937
                        mean_pk,
938
                        error_pk,
939
                        p1d_weights_z,
940
                        covariance_weights_z,
941
                        p1d_groups_z,
942
                    ) = ([], [], [], [], [])
943
                else:
944
                    mean_pk = p1d_groups[izbin][0][bootid[iboot]]
×
945
                    error_pk = p1d_groups[izbin][1][bootid[iboot]]
×
946
                    p1d_weights_z = p1d_groups[izbin][2][bootid[iboot]]
×
947
                    covariance_weights_z = p1d_groups[izbin][3][bootid[iboot]]
×
UNCOV
948
                    p1d_groups_z = p1d_groups[izbin][4][bootid[iboot]]
×
UNCOV
949
                p1d_groups_bootstrap.append(
×
950
                    [
951
                        mean_pk,
952
                        error_pk,
953
                        p1d_weights_z,
954
                        covariance_weights_z,
955
                        p1d_groups_z,
956
                    ]
957
                )
958

959
        func = partial(
×
960
            compute_cov,
961
            weight_method,
962
            nbins_k,
963
        )
UNCOV
964
        if number_worker == 1:
×
UNCOV
965
            output_cov = [func(*p) for p in p1d_groups_bootstrap]
×
966
        else:
967
            with Pool(number_worker) as pool:
×
968
                output_cov = pool.starmap(func, p1d_groups_bootstrap)
×
969

UNCOV
970
        for izbin in range(nbins_z):
×
UNCOV
971
            boot_cov = []
×
UNCOV
972
            for iboot in range(number_bootstrap):
×
973
                covariance_array = (output_cov[izbin * number_bootstrap + iboot],)
×
974
                boot_cov.append(covariance_array)
×
975

976
            index_cov = cov_table_regular_slice(izbin, nbins_k)
×
977
            cov_table["boot_covariance"][index_cov[0]:index_cov[1]] = np.mean(boot_cov, axis=0)
×
978
            cov_table["error_boot_covariance"][index_cov[0]:index_cov[1]] = np.std(boot_cov, axis=0)
×
979

980

981
def compute_p1d_groups(
3✔
982
    weight_method,
983
    nbins_k,
984
    zbin_edges,
985
    izbin,
986
    p1d_table,
987
    k_index,
988
    snrfit_table,
989
    number_worker,
990
):
991
    """Compute the P1D groups before covariance matrix calculation.
992
    Put all the P1D in the same k grid.
993

994
    Arguments
995
    ---------
996
    weight_method: str,
997
    Method to weight the data.
998

999
    nbins_k (int):
1000
    Number of k bins.
1001

1002
    zbin_edges (array-like):
1003
    All redshift bins.
1004

1005
    izbin (int):
1006
    Current redshift bin being considered.
1007

1008
    p1d_table (array-like):
1009
    Table of 1D power spectra, with columns 'Pk' and 'sub_forest_id'.
1010

1011
    k_index (array-like):
1012
    Array of indices for k-values, with -1 indicating values outside of the k bins.
1013

1014
    snrfit_table: numpy ndarray,
1015
    Table containing SNR fit infos
1016

1017
    number_worker: int
1018
    Number of workers for the parallelization
1019

1020
    Return
1021
    ------
1022
    p1d_weights  (array-like):
1023
    Weights associated with p1d pixels for all subforest, used in the calculation of covariance.
1024

1025
    covariance_weights (array-like):
1026
    Weights for all subforest used inside the main covariance sum.
1027

1028
    p1d_groups (array-like):
1029
    Individual p1d pixels grouped in the same wavenumber binning for all subforest
1030
    """
1031

1032
    select_z = (p1d_table["forest_z"] < zbin_edges[izbin + 1]) & (
3✔
1033
        p1d_table["forest_z"] > zbin_edges[izbin]
1034
    )
1035
    p1d_sub_table = Table(
3✔
1036
        [
1037
            p1d_table["sub_forest_id"][select_z],
1038
            k_index[select_z],
1039
            p1d_table["Pk"][select_z],
1040
        ],
1041
        names=("sub_forest_id", "k_index", "pk"),
1042
    )
1043
    if weight_method == "fit_snr":
3!
UNCOV
1044
        index_slice = mean_p1d_table_regular_slice(izbin, nbins_k)
×
UNCOV
1045
        snrfit_z = snrfit_table[index_slice[0] : index_slice[1], :]
×
UNCOV
1046
        p1d_sub_table["weight"] = 1 / fitfunc_variance_pk1d(
×
1047
            p1d_table["forest_snr"][select_z],
1048
            snrfit_z[k_index[select_z], 2],
1049
            snrfit_z[k_index[select_z], 3],
1050
        )
1051
    else:
1052
        p1d_sub_table["weight"] = 1
3✔
1053

1054
    # Remove bins that are not associated with any wavenumber bin.
1055
    p1d_sub_table = p1d_sub_table[p1d_sub_table["k_index"] >= 0]
3✔
1056

1057
    grouped_table = p1d_sub_table.group_by("sub_forest_id")
3✔
1058
    p1d_los_table = [
3✔
1059
        group[["k_index", "pk", "weight"]] for group in grouped_table.groups
1060
    ]
1061

1062
    del grouped_table
3✔
1063

1064
    func = partial(
3✔
1065
        compute_groups_for_one_forest,
1066
        nbins_k,
1067
    )
1068
    if number_worker == 1:
3!
1069
        output_cov = [func(*p1d_los) for p1d_los in p1d_los_table]
×
1070
    else:
1071
        with Pool(number_worker) as pool:
3✔
1072
            output_cov = np.array(pool.map(func, p1d_los_table))
3✔
1073

1074
    del p1d_los_table
3✔
1075

1076
    p1d_weights, covariance_weights, p1d_groups = (
3✔
1077
        output_cov[:, 0, :],
1078
        output_cov[:, 1, :],
1079
        output_cov[:, 2, :],
1080
    )
1081
    return p1d_weights, covariance_weights, p1d_groups
3✔
1082

1083

1084
def compute_groups_for_one_forest(nbins_k, p1d_los):
3✔
1085
    """Compute the P1D groups for one subforest.
1086

1087
    Arguments
1088
    ---------
1089
    nbins_k (int):
1090
    Number of k bins.
1091

1092
    p1d_los (array-like):
1093
    Table containing all p1d pixels unordered for one subforest
1094

1095
    Return
1096
    ------
1097
    p1d_weights_id  (array-like):
1098
    Weights associated with p1d pixels for one subforest, used in the calculation of covariance.
1099

1100
    covariance_weights_id (array-like):
1101
    Weights for one subforest used inside the main covariance sum.
1102

1103
    p1d_groups_id (array-like):
1104
    Individual p1d pixels grouped in the same wavenumber binning for one subforest
1105
    """
UNCOV
1106
    p1d_weights_id = np.zeros(nbins_k)
×
UNCOV
1107
    covariance_weights_id = np.zeros(nbins_k)
×
UNCOV
1108
    p1d_groups_id = np.zeros(nbins_k)
×
1109

UNCOV
1110
    for ikbin in range(nbins_k):
×
UNCOV
1111
        mask_ikbin = p1d_los["k_index"] == ikbin
×
UNCOV
1112
        number_in_bins = len(mask_ikbin[mask_ikbin])
×
UNCOV
1113
        if number_in_bins != 0:
×
UNCOV
1114
            weight = p1d_los["weight"][mask_ikbin][0]
×
UNCOV
1115
            p1d_weights_id[ikbin] = weight
×
UNCOV
1116
            covariance_weights_id[ikbin] = np.sqrt(weight / number_in_bins)
×
UNCOV
1117
            p1d_groups_id[ikbin] = np.nansum(
×
1118
                p1d_los["pk"][mask_ikbin] * covariance_weights_id[ikbin]
1119
            )
UNCOV
1120
    return p1d_weights_id, covariance_weights_id, p1d_groups_id
×
1121

1122

1123
def compute_cov(
3✔
1124
    weight_method,
1125
    nbins_k,
1126
    mean_pk,
1127
    error_pk,
1128
    p1d_weights,
1129
    covariance_weights,
1130
    p1d_groups,
1131
):
1132
    """Compute the covariance of a set of 1D power spectra.
1133
    This is a new version of the covariance calculation.
1134
    It needs that the input data are expressed on the same
1135
    wavenumber grid. The calculation is then performed in
1136
    an entire vectorized way.
1137

1138
    Arguments
1139
    ---------
1140
    weight_method: str,
1141
    Method to weight the data.
1142

1143
    nbins_k (int):
1144
    Number of k bins.
1145

1146
    mean_pk (array-like):
1147
    Mean 1D power spectra, for the considered redshift bin.
1148

1149
    error_pk (array-like):
1150
    Standard deviation of the 1D power spectra, for the considered redshift bin.
1151

1152
    p1d_weights  (array-like):
1153
    Weights associated with p1d pixels for all subforest, used in the calculation of covariance.
1154

1155
    covariance_weights (array-like):
1156
    Weights for all subforest used inside the main covariance sum.
1157

1158
    p1d_groups (array-like):
1159
    Individual p1d pixels grouped in the same wavenumber binning for all subforest
1160

1161
    Return
1162
    ------
1163
    covariance_array (array-like):
1164
    Array of covariance coefficients.
1165
    """
1166

UNCOV
1167
    if len(p1d_groups) == 0:
×
UNCOV
1168
        return np.full(nbins_k * nbins_k, np.nan)
×
1169

UNCOV
1170
    mean_pk_product = np.outer(mean_pk, mean_pk)
×
1171

UNCOV
1172
    sum_p1d_weights = np.nansum(p1d_weights, axis=0)
×
UNCOV
1173
    weights_sum_of_product = np.outer(sum_p1d_weights, sum_p1d_weights)
×
1174

UNCOV
1175
    p1d_groups_product_sum = np.zeros((nbins_k, nbins_k))
×
UNCOV
1176
    covariance_weights_product_of_sum = np.zeros((nbins_k, nbins_k))
×
UNCOV
1177
    weights_product_of_sum = np.zeros((nbins_k, nbins_k))
×
1178

UNCOV
1179
    for i, p1d_group in enumerate(p1d_groups):
×
1180
        # The summation is done with np.nansum instead of simple addition to not
1181
        # include the NaN that are present in the individual p1d.
1182
        # The summation is not done at the end, to prevent memory overhead.
UNCOV
1183
        p1d_groups_product_sum = np.nansum(
×
1184
            [p1d_groups_product_sum, np.outer(p1d_group, p1d_group)], axis=0
1185
        )
UNCOV
1186
        covariance_weights_product_of_sum = np.nansum(
×
1187
            [
1188
                covariance_weights_product_of_sum,
1189
                np.outer(covariance_weights[i], covariance_weights[i]),
1190
            ],
1191
            axis=0,
1192
        )
UNCOV
1193
        weights_product_of_sum = np.nansum(
×
1194
            [weights_product_of_sum, np.outer(p1d_weights[i], p1d_weights[i])], axis=0
1195
        )
1196

UNCOV
1197
    del p1d_groups, covariance_weights, p1d_weights
×
1198

UNCOV
1199
    covariance_matrix = (weights_product_of_sum / weights_sum_of_product) * (
×
1200
        (p1d_groups_product_sum / covariance_weights_product_of_sum) - mean_pk_product
1201
    )
1202

1203
    # For fit_snr method, due to the SNR fitting scheme used for weighting,
1204
    # the diagonal of the weigthed sample covariance matrix is not equal
1205
    # to the error in mean P1D. This is tested on Ohio mocks and data.
1206
    # We choose to renormalize the whole covariance matrix.
UNCOV
1207
    if weight_method == "fit_snr":
×
UNCOV
1208
        covariance_diag = np.diag(covariance_matrix)
×
UNCOV
1209
        covariance_matrix = (
×
1210
            covariance_matrix
1211
            * np.outer(error_pk, error_pk)
1212
            / np.sqrt(np.outer(covariance_diag, covariance_diag))
1213
        )
1214

UNCOV
1215
    covariance_array = np.ravel(covariance_matrix)
×
1216

UNCOV
1217
    return covariance_array
×
1218

1219

1220
def compute_cov_not_vectorized(
3✔
1221
    p1d_table,
1222
    mean_p1d_table,
1223
    n_chunks,
1224
    k_index,
1225
    nbins_k,
1226
    weight_method,
1227
    snrfit_table,
1228
    izbin,
1229
    select_z,
1230
    sub_forest_ids,
1231
):
1232
    """Compute the covariance of a set of 1D power spectra.
1233
    This is an old implementation which is summing up each individual mode every time.
1234
    This is very slow for large data samples.
1235

1236
    Arguments
1237
    ---------
1238
    p1d_table (array-like):
1239
    Table of 1D power spectra, with columns 'Pk' and 'sub_forest_id'.
1240

1241
    mean_p1d_table (array-like):
1242
    Table of mean 1D power spectra, with column 'meanPk'.
1243

1244
    n_chunks (array-like):
1245
    Array of the number of chunks in each redshift bin.
1246

1247
    k_index (array-like):
1248
    Array of indices for k-values, with -1 indicating values outside of the k bins.
1249

1250
    nbins_k (int):
1251
    Number of k bins.
1252

1253
    weight_method: str,
1254
    Method to weight the data.
1255

1256
    snrfit_table: numpy ndarray,
1257
    Table containing SNR fit infos
1258

1259
    izbin (int):
1260
    Current redshift bin being considered.
1261

1262
    select_z (array-like):
1263
    Boolean array for selecting data points based on redshift.
1264

1265
    sub_forest_ids (array-like):
1266
    Array of chunk ids.
1267

1268
    Return
1269
    ------
1270
    covariance_array (array-like):
1271
    Array of covariance coefficients.
1272
    """
UNCOV
1273
    n_array = np.zeros(nbins_k * nbins_k)
×
UNCOV
1274
    covariance_array = np.zeros(nbins_k * nbins_k)
×
UNCOV
1275
    if weight_method == "fit_snr":
×
UNCOV
1276
        weight_array = np.zeros(nbins_k * nbins_k)
×
1277

UNCOV
1278
    if n_chunks[izbin] == 0:  # Fill rows with NaNs
×
UNCOV
1279
        covariance_array[:] = np.nan
×
UNCOV
1280
        return covariance_array
×
1281

1282
    # First loop 1) id sub-forest bins
UNCOV
1283
    for sub_forest_id in sub_forest_ids:
×
UNCOV
1284
        select_id = select_z & (p1d_table["sub_forest_id"] == sub_forest_id)
×
UNCOV
1285
        selected_pk = p1d_table["Pk"][select_id]
×
UNCOV
1286
        selected_ikbin = k_index[select_id]
×
1287

UNCOV
1288
        if weight_method == "fit_snr":
×
1289
            # Definition of weighted unbiased sample covariance taken from
1290
            # "George R. Price, Ann. Hum. Genet., Lond, pp485-490,
1291
            # Extension of covariance selection mathematics, 1972"
1292
            # adapted with weights w_i = np.sqrt(w_ipk * w_ipk2) for covariance
1293
            # to obtain the right definition of variance on the diagonal
UNCOV
1294
            selected_snr = p1d_table["forest_snr"][select_id]
×
UNCOV
1295
            index_slice = mean_p1d_table_regular_slice(izbin, nbins_k)
×
UNCOV
1296
            snrfit_z = snrfit_table[index_slice[0] : index_slice[1], :]
×
UNCOV
1297
            selected_variance_estimated = fitfunc_variance_pk1d(
×
1298
                selected_snr, snrfit_z[selected_ikbin, 2], snrfit_z[selected_ikbin, 3]
1299
            )
1300
            # First loop 2) selected pk
UNCOV
1301
            for ipk, _ in enumerate(selected_pk):
×
UNCOV
1302
                ikbin = selected_ikbin[ipk]
×
1303
                # First loop 3) selected pk
UNCOV
1304
                for ipk2 in range(ipk, len(selected_pk)):
×
UNCOV
1305
                    ikbin2 = selected_ikbin[ipk2]
×
UNCOV
1306
                    if (ikbin2 != -1) & (ikbin != -1):
×
1307
                        # index of the (ikbin,ikbin2) coefficient on the top of the matrix
UNCOV
1308
                        index = (nbins_k * ikbin) + ikbin2
×
UNCOV
1309
                        weight = 1 / selected_variance_estimated[ipk]
×
UNCOV
1310
                        weight2 = 1 / selected_variance_estimated[ipk2]
×
UNCOV
1311
                        covariance_array[index] = covariance_array[index] + selected_pk[
×
1312
                            ipk
1313
                        ] * selected_pk[ipk2] * np.sqrt(weight * weight2)
UNCOV
1314
                        n_array[index] = n_array[index] + 1
×
UNCOV
1315
                        weight_array[index] = weight_array[index] + np.sqrt(
×
1316
                            weight * weight2
1317
                        )
1318
        else:
1319
            # First loop 2) selected pk
UNCOV
1320
            for ipk, _ in enumerate(selected_pk):
×
UNCOV
1321
                ikbin = selected_ikbin[ipk]
×
1322
                # First loop 3) selected pk
UNCOV
1323
                for ipk2 in range(ipk, len(selected_pk)):
×
UNCOV
1324
                    ikbin2 = selected_ikbin[ipk2]
×
UNCOV
1325
                    if (ikbin2 != -1) & (ikbin != -1):
×
1326
                        # index of the (ikbin,ikbin2) coefficient on the top of the matrix
UNCOV
1327
                        index = (nbins_k * ikbin) + ikbin2
×
UNCOV
1328
                        covariance_array[index] = (
×
1329
                            covariance_array[index]
1330
                            + selected_pk[ipk] * selected_pk[ipk2]
1331
                        )
UNCOV
1332
                        n_array[index] = n_array[index] + 1
×
1333
    # Second loop 1) k bins
UNCOV
1334
    for ikbin in range(nbins_k):
×
UNCOV
1335
        mean_ikbin = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin]
×
1336

1337
        # Second loop 2) k bins
UNCOV
1338
        for ikbin2 in range(ikbin, nbins_k):
×
UNCOV
1339
            mean_ikbin2 = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin2]
×
1340

1341
            # index of the (ikbin,ikbin2) coefficient on the top of the matrix
UNCOV
1342
            index = (nbins_k * ikbin) + ikbin2
×
UNCOV
1343
            if weight_method == "fit_snr":
×
UNCOV
1344
                covariance_array[index] = (
×
1345
                    covariance_array[index] / weight_array[index]
1346
                ) - mean_ikbin * mean_ikbin2
1347
            else:
UNCOV
1348
                covariance_array[index] = (
×
1349
                    covariance_array[index] / n_array[index]
1350
                ) - mean_ikbin * mean_ikbin2
1351

1352
            # Same normalization for fit_snr, equivalent to dividing to
1353
            # weight_array[index] if weights are normalized to give
1354
            # sum(weight_array[index]) = n_array[index]
UNCOV
1355
            covariance_array[index] = covariance_array[index] / n_array[index]
×
1356

UNCOV
1357
            if ikbin2 != ikbin:
×
1358
                # index of the (ikbin,ikbin2) coefficient on the bottom of the matrix
UNCOV
1359
                index_2 = (nbins_k * ikbin2) + ikbin
×
UNCOV
1360
                covariance_array[index_2] = covariance_array[index]
×
UNCOV
1361
                n_array[index_2] = n_array[index]
×
1362

1363
    # For fit_snr method, due to the SNR fitting scheme used for weighting,
1364
    # the diagonal of the weigthed sample covariance matrix is not equal
1365
    # to the error in mean P1D. This is tested on Ohio mocks.
1366
    # We choose to renormalize the whole covariance matrix.
UNCOV
1367
    if weight_method == "fit_snr":
×
1368
        # Third loop 1) k bins
UNCOV
1369
        for ikbin in range(nbins_k):
×
UNCOV
1370
            std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin]
×
1371
            # Third loop 2) k bins
UNCOV
1372
            for ikbin2 in range(ikbin, nbins_k):
×
UNCOV
1373
                std_ikbin2 = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin2]
×
1374

1375
                # index of the (ikbin,ikbin2) coefficient on the top of the matrix
UNCOV
1376
                index = (nbins_k * ikbin) + ikbin2
×
UNCOV
1377
                covariance_array[index] = (
×
1378
                    covariance_array[index]
1379
                    * (std_ikbin * std_ikbin2)
1380
                    / np.sqrt(
1381
                        covariance_array[(nbins_k * ikbin) + ikbin]
1382
                        * covariance_array[(nbins_k * ikbin2) + ikbin2]
1383
                    )
1384
                )
1385

UNCOV
1386
                if ikbin2 != ikbin:
×
1387
                    # index of the (ikbin,ikbin2) coefficient on the bottom of the matrix
UNCOV
1388
                    index_2 = (nbins_k * ikbin2) + ikbin
×
UNCOV
1389
                    covariance_array[index_2] = (
×
1390
                        covariance_array[index_2]
1391
                        * (std_ikbin * std_ikbin2)
1392
                        / np.sqrt(
1393
                            covariance_array[(nbins_k * ikbin) + ikbin]
1394
                            * covariance_array[(nbins_k * ikbin2) + ikbin2]
1395
                        )
1396
                    )
1397

UNCOV
1398
    return covariance_array
×
1399

1400

1401
def run_postproc_pk1d(
3✔
1402
    data_dir,
1403
    output_file,
1404
    zbin_edges,
1405
    kbin_edges,
1406
    weight_method="no_weights",
1407
    apply_z_weights=False,
1408
    snrcut=None,
1409
    zbins_snrcut=None,
1410
    output_snrfit=None,
1411
    nomedians=False,
1412
    velunits=False,
1413
    overwrite=False,
1414
    ncpu=8,
1415
    compute_covariance=False,
1416
    compute_bootstrap=False,
1417
    number_bootstrap=50,
1418
):
1419
    """
1420
    Read individual Pk1D data from a set of files and compute P1D statistics.
1421

1422
    Arguments
1423
    ---------
1424
    data_dir: string
1425
    Directory where individual P1D FITS files are located
1426

1427
    output_file: string
1428
    Output file name
1429

1430
    overwrite: Bool
1431
    Overwrite output file if existing
1432

1433
    ncpu: int
1434
    The I/O function read_pk1d() is run parallel
1435

1436
    Other arguments are as defined
1437
    in compute_mean_pk1d() or read_pk1d()
1438
    """
1439
    if os.path.exists(output_file) and not overwrite:
3!
UNCOV
1440
        raise RuntimeError("Output file already exists: " + output_file)
×
1441

1442
    searchstr = "*"
3✔
1443
    files = glob.glob(os.path.join(data_dir, f"Pk1D{searchstr}.fits.gz"))
3✔
1444

1445
    with Pool(ncpu) as pool:
3✔
1446
        output_readpk1d = pool.starmap(
3✔
1447
            read_pk1d, [[f, kbin_edges, snrcut, zbins_snrcut] for f in files]
1448
        )
1449

1450
    output_readpk1d = [x for x in output_readpk1d if x is not None]
3✔
1451
    p1d_table = vstack([output_readpk1d[i][0] for i in range(len(output_readpk1d))])
3✔
1452
    z_array = np.concatenate(
3✔
1453
        tuple(output_readpk1d[i][1] for i in range(len(output_readpk1d)))
1454
    )
1455

1456
    userprint("Individual P1Ds read, now computing statistics.")
3✔
1457

1458
    mean_p1d_table, metadata_table, cov_table = compute_mean_pk1d(
3✔
1459
        p1d_table,
1460
        z_array,
1461
        zbin_edges,
1462
        kbin_edges,
1463
        weight_method,
1464
        apply_z_weights,
1465
        nomedians=nomedians,
1466
        velunits=velunits,
1467
        output_snrfit=output_snrfit,
1468
        compute_covariance=compute_covariance,
1469
        compute_bootstrap=compute_bootstrap,
1470
        number_bootstrap=number_bootstrap,
1471
        number_worker=ncpu,
1472
    )
1473

1474
    result = fitsio.FITS(output_file, "rw", clobber=True)
3✔
1475
    result.write(mean_p1d_table.as_array())
3✔
1476
    result.write(
3✔
1477
        metadata_table.as_array(),
1478
        header={"VELUNITS": velunits, "NQSO": len(np.unique(p1d_table["forest_id"]))},
1479
    )
1480
    if cov_table is not None:
3✔
1481
        result.write(cov_table.as_array())
3✔
1482
    result.close()
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc