• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / miranda / 1970579325

pending completion
1970579325

Pull #24

github

GitHub
Merge 76c2593f6 into 250bc4dd4
Pull Request #24: Add CMIP file structure - WIP

206 of 949 new or added lines in 41 files covered. (21.71%)

10 existing lines in 3 files now uncovered.

702 of 3058 relevant lines covered (22.96%)

0.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

21.18
/miranda/archive/groupings.py
1
import logging
3✔
2
import re
3✔
3
from logging.config import dictConfig
3✔
4
from pathlib import Path
3✔
5
from types import GeneratorType
3✔
6
from typing import Dict, List, Union
3✔
7

8
from miranda.scripting import LOGGING_CONFIG
3✔
9
from miranda.storage import report_file_size
3✔
10
from miranda.utils import ingest
3✔
11

12
dictConfig(LOGGING_CONFIG)
3✔
13
Nested_List = List[List[Path]]
3✔
14
PathDict = Dict[str, List[Path]]
3✔
15

16

17
GiB = int(pow(2, 30))
3✔
18

19
__all__ = [
3✔
20
    "group_by_deciphered_date",
21
    "group_by_length",
22
    "group_by_size",
23
    "group_by_subdirectories",
24
]
25

26

27
def group_by_length(
3✔
28
    files: Union[GeneratorType, List], size: int = 10
29
) -> List[List[Path]]:
30
    """
31
    This function groups files by an arbitrary number of file entries
32
    """
NEW
33
    logging.info(f"Creating groups of {size} files")
×
34
    files = ingest(files)
×
35
    grouped_list = list()
×
36
    group = list()
×
37
    for i, f in enumerate(files):
×
38
        group.append(f)
×
39
        if (i + 1) % size == 0:
×
40
            grouped_list.append(group.copy())
×
41
            group.clear()
×
42
            continue
×
43
    if not group:
×
44
        pass
×
45
    else:
46
        grouped_list.append(group.copy())
×
47
    logging.info("Divided files into %s groups." % len(grouped_list))
×
48
    return grouped_list
×
49

50

51
def group_by_deciphered_date(
3✔
52
    files: Union[GeneratorType, List]
53
) -> Dict[str, List[Path]]:
54
    """
55
    This function attempts to find a common date and groups files based on year and month
56
    """
57
    logging.warning("This function doesn't work well with multi-thread processing!")
×
58
    logging.info("Creating files from deciphered dates.")
×
59

60
    year_month_day = re.compile(
×
61
        r"(?P<year>[0-9]{4})-?(?P<month>[0-9]{2})-?(?P<day>[0-9]{2})?.*\.(?P<suffix>nc)$"
62
    )
63

64
    files = ingest(files)
×
65
    dates = dict()
×
66
    total = 0
×
67
    for f in files:
×
68
        match = re.search(year_month_day, str(Path(f).name))
×
69
        if match.group("day"):
×
70
            key = "-".join([match.group("year"), match.group("month")])
×
71
            dates.setdefault(key, list()).append(Path(f))
×
72
            total += 1
×
73
        elif match.group("month"):
×
74
            key = match.group("year")
×
75
            dates.setdefault(key, list()).append(Path(f))
×
76
            total += 1
×
77
        else:
78
            continue
×
79

80
    if dates and total == len(files):
×
81
        logging.info(
×
82
            f"All files have been grouped by date. {len(dates)} groups created."
83
        )
84
        return dict(dates)
×
85

86
    if dates and total != len(files):
×
87
        logging.info(
×
88
            "Not all files were successfully grouped by date. Grouping aborted."
89
        )
90
    else:
91
        logging.info("No matches for dates found. Grouping aborted.")
×
92
    return dict(data=files)
×
93

94

95
def group_by_size(
3✔
96
    files: Union[GeneratorType, List], size: int = 10 * GiB
97
) -> List[List[Path]]:
98
    """
99
    This function will group files up until a desired size and save it as a grouping within a list
100
    """
101
    logging.info(
×
102
        "Creating groups of files based on size not exceeding {}.".format(
103
            report_file_size(size)
104
        )
105
    )
106

107
    files = ingest(files)
×
108
    grouped_list = list()
×
109
    group = list()
×
110
    total = 0
×
111
    for f in files:
×
112
        total += Path.stat(f).st_size
×
113
        group.append(f)
×
114
        if total > size:
×
115
            grouped_list.append(group.copy())
×
116
            group.clear()
×
117
            total = 0
×
118
            continue
×
119
        elif total < size:
×
120
            continue
×
121

122
    if not group:
×
123
        logging.info("The final group is empty. Skipping this set...")
×
124
    else:
125
        grouped_list.append(group.copy())
×
126
    return grouped_list
×
127

128

129
def group_by_subdirectories(
3✔
130
    files: Union[GeneratorType, List], within: str or Path = None
131
) -> Dict[str, List[Path]]:
132
    """
133
    This function will group files based on the parent folder that they are located within.
134
    """
135
    if not within:
×
136
        within = Path.cwd()
×
137

138
    files = ingest(files)
×
139
    groups = dict()
×
140
    for f in files:
×
141
        group_name = Path(f).relative_to(within).parent
×
142
        groups.setdefault(group_name, list()).append(f)
×
143

144
    logging.info(
×
145
        "File subdirectories found. Proceeding with {}.".format(
146
            str([str(key) for key in groups.keys()])
147
        )
148
    )
149
    return groups
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2024 Coveralls, Inc