• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jyablonski / python_docker / 14430722668

13 Apr 2025 03:12PM UTC coverage: 87.512%. First build
14430722668

Pull #94

github

web-flow
Merge e3289599b into 265e9c2b5
Pull Request #94: Ingestion v2.0.0

512 of 605 new or added lines in 13 files covered. (84.63%)

890 of 1017 relevant lines covered (87.51%)

0.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.88
/src/utils.py
1
from datetime import date, datetime
1✔
2
import json
1✔
3
import logging
1✔
4
import os
1✔
5
import re
1✔
6

7
import awswrangler as wr
1✔
8
from nltk.sentiment import SentimentIntensityAnalyzer
1✔
9
import numpy as np
1✔
10
import pandas as pd
1✔
11
import requests
1✔
12

13

14
def filter_spread(value: str) -> str:
1✔
15
    """
16
    Filter out 3-digit values from the `spread` column
17
    in the Scrape Odds Function such as `-108` or `-112`
18

19
    Parameters:
20
        value (str): The original value from the spread column.
21

22
    Returns:
23
        The spread value without any 3-digit values present
24
    """
25
    parts = value.split()
1✔
26
    filtered_parts = [
1✔
27
        (
28
            part
29
            if (part[0] in ["+", "-"] and float(part[1:]) <= 25)
30
            or (part.isdigit() and int(part) <= 25)
31
            else ""
32
        )
33
        for part in parts
34
    ]
35
    result = " ".join(filtered_parts).strip()
1✔
36

37
    # this last part strips out a couple extra white spaces
38
    return re.sub(r"\s+", " ", result)
1✔
39

40

41
def get_season_type(todays_date: date | None = None) -> str:
1✔
42
    """
43
    Function to generate Season Type for a given Date.
44
    **2025-03-16 NOTE** this has been deprecated as this logic
45
    belongs in the dbt project
46

47
    Args:
48
        todays_date (date): The Date to generate a Season Type for.  Defaults to
49
            today's date.
50

51
    Returns:
52
        The Season Type for Given Date
53
    """
54
    if todays_date is None:
1✔
55
        todays_date = datetime.now().date()
×
56

57
    if todays_date < datetime(2025, 4, 15).date():
1✔
58
        season_type = "Regular Season"
1✔
59
    elif (todays_date >= datetime(2025, 4, 16).date()) & (
1✔
60
        todays_date < datetime(2025, 4, 21).date()
61
    ):
62
        season_type = "Play-In"
1✔
63
    else:
64
        season_type = "Playoffs"
1✔
65

66
    return season_type
1✔
67

68

69
def check_schedule(date: datetime.date) -> bool:
1✔
70
    """
71
    Small Function used in Boxscores + PBP Functions to check if
72
    there are any games scheduled for a given date.
73

74
    Args:
75
        date (datetime.date): The Date to check for games on.
76

77
    Returns:
78
        Boolean: True if there are games scheduled, False if not.
79
    """
80
    schedule_endpoint = f"https://api.jyablonski.dev/schedule?date={date}"
1✔
81
    schedule_data = requests.get(schedule_endpoint).json()
1✔
82

83
    return True if len(schedule_data) > 0 else False
1✔
84

85

86
def add_sentiment_analysis(df: pd.DataFrame, sentiment_col: str) -> pd.DataFrame:
1✔
87
    """
88
    Function to add Sentiment Analysis columns to a DataFrame via nltk Vader Lexicon.
89

90
    Args:
91
        df (pd.DataFrame): The Pandas DataFrame
92

93
        sentiment_col (str): The Column in the DataFrame to run Sentiment Analysis on
94
            (comments / tweets etc).
95

96
    Returns:
97
        The same DataFrame but with the Sentiment Analysis columns attached.
98
    """
99
    try:
1✔
100
        analyzer = SentimentIntensityAnalyzer()
1✔
101
        df["compound"] = [
1✔
102
            analyzer.polarity_scores(x)["compound"] for x in df[sentiment_col]
103
        ]
104
        df["neg"] = [analyzer.polarity_scores(x)["neg"] for x in df[sentiment_col]]
1✔
105
        df["neu"] = [analyzer.polarity_scores(x)["neu"] for x in df[sentiment_col]]
1✔
106
        df["pos"] = [analyzer.polarity_scores(x)["pos"] for x in df[sentiment_col]]
1✔
107
        df["sentiment"] = np.where(df["compound"] > 0, 1, 0)
1✔
108
        return df
1✔
109
    except Exception as e:
×
110
        logging.error(f"Error Occurred while adding Sentiment Analysis, {e}")
×
111
        raise
×
112

113

114
def get_leading_zeroes(value: int) -> str:
1✔
115
    """
116
    Function to add leading zeroes to a month (1 (January) -> 01).
117
    Used in the the `write_to_s3` function.
118

119
    Args:
120
        value (int): The value integer (created from `datetime.now().month`)
121

122
    Returns:
123
        The same value integer with a leading 0 if it is less than 10
124
            (Nov/Dec aka 11/12 unaffected).
125
    """
126
    if len(str(value)) > 1:
1✔
127
        return str(value)
1✔
128
    else:
129
        return f"0{value}"
1✔
130

131

132
def clean_player_names(name: str) -> str:
1✔
133
    """
134
    Function to remove suffixes from a player name.
135

136
    Args:
137
        name (str): The raw player name you wish to alter.
138

139
    Returns:
140
        str: Cleaned Name w/ no suffix bs
141
    """
142
    try:
1✔
143
        cleaned_name = (
1✔
144
            name.replace(" Jr.", "")
145
            .replace(" Sr.", "")
146
            .replace(" III", "")  # III HAS TO GO FIRST, OVER II
147
            .replace(" II", "")  # or else Robert Williams III -> Robert WilliamsI
148
            .replace(" IV", "")
149
        )
150
        return cleaned_name
1✔
151
    except Exception as e:
×
152
        logging.error(f"Error Occurred with Clean Player Names, {e}")
×
153
        raise
×
154

155

156
def write_to_s3(
1✔
157
    file_name: str,
158
    df: pd.DataFrame,
159
    date: datetime.date = datetime.now().date(),
160
    bucket: str = os.environ.get("S3_BUCKET"),
161
) -> None:
162
    """
163
    S3 Function using awswrangler to write file.  Only supports parquet right now.
164

165
    Args:
166
        file_name (str): The base name of the file (boxscores, opp_stats)
167

168
        df (pd.DataFrame): The Pandas DataFrame to write to S3
169

170
        bucket (str): The Bucket to write to.  Defaults to `os.environ.get('S3_BUCKET')`
171

172
        date (datetime.date): Date to partition the data by.
173
            Defaults to `datetime.now().date()`
174

175
    Returns:
176
        None, but writes the Pandas DataFrame to an S3 File.
177

178
    """
179
    year_partition = date.year
×
180
    month_partition = get_leading_zeroes(value=date.month)
×
181
    file_name_jn = f"{file_name}-{date}"
×
182
    try:
×
183
        if len(df) == 0:
×
184
            logging.info(f"Not storing {file_name} to s3 because it's empty.")
×
185
            pass
×
186
        else:
187
            wr.s3.to_parquet(
×
188
                df=df,
189
                path=f"s3://{bucket}/{file_name}/validated/year={year_partition}/month={month_partition}/{file_name_jn}.parquet",
190
                index=False,
191
            )
192
            logging.info(
×
193
                f"Storing {len(df)} {file_name} rows to S3 (s3://{bucket}/{file_name}/validated/{year_partition}/{file_name_jn}.parquet)"
194
            )
NEW
195
            return None
×
196
    except Exception as error:
×
197
        logging.error(f"S3 Storage Function Failed {file_name}, {error}")
×
NEW
198
        return None
×
199

200

201
def write_to_sql(con, table_name: str, df: pd.DataFrame, table_type: str) -> None:
1✔
202
    """
203
    Simple Wrapper Function to write a Pandas DataFrame to SQL
204

205
    Args:
206
        con (SQL Connection): The connection to the SQL DB.
207

208
        table_name (str): The Table name to write to SQL as.
209

210
        df (DataFrame): The Pandas DataFrame to store in SQL
211

212
        table_type (str): Whether the table should replace or append to an
213
            existing SQL Table under that name
214

215
    Returns:
216
        Writes the Pandas DataFrame to a Table in the Schema we connected to.
217

218
    """
219
    try:
1✔
220
        if len(df) == 0:
1✔
221
            logging.info(f"{table_name} is empty, not writing to SQL")
×
222
        else:
223
            df.to_sql(
1✔
224
                con=con,
225
                name=table_name,
226
                index=False,
227
                if_exists=table_type,
228
            )
229
            logging.info(
1✔
230
                f"Writing {len(df)} {table_name} rows to aws_{table_name}_source to SQL"
231
            )
232

233
        return None
1✔
234
    except Exception as error:
×
235
        logging.error(f"SQL Write Script Failed, {error}")
×
NEW
236
        return None
×
237

238

239
def query_logs(log_file: str = "logs/example.log") -> list[str]:
1✔
240
    """
241
    Small Function to read Logs CSV File and grab Errors
242

243
    Args:
244
        log_file (str): Optional String of the Log File Name
245

246
    Returns:
247
        list of Error Messages to be passed into Slack Function
248
    """
249
    logs = pd.read_csv(log_file, sep=r"\\t", engine="python", header=None)
1✔
250
    logs = logs.rename(columns={0: "errors"})
1✔
251
    logs = logs.query("errors.str.contains('Failed')", engine="python")
1✔
252
    logs = logs["errors"].to_list()
1✔
253

254
    logging.info(f"Returning {len(logs)} Failed Logs")
1✔
255
    return logs
1✔
256

257

258
def write_to_slack(
1✔
259
    errors: list, webhook_url: str = os.environ.get("WEBHOOK_URL", default="default")
260
) -> int | None:
261
    """ "
262
    Function to write Errors out to Slack.  Requires a pre-configured `webhook_url`
263
    to be setup.
264

265
    Args:
266
        errors (list): The list of Failed Tasks + their associated errors
267

268
        webhook_url (str): Optional Parameter to specify the Webhook to send the
269
            errors to.  Defaults to `os.environ.get("WEBHOOK_URL")`
270

271
    Returns:
272
        None, but writes the Errors to Slack if there are any
273
    """
274
    try:
1✔
275
        date = datetime.now().date()
1✔
276
        num_errors = len(errors)
1✔
277
        str_dump = "\n".join(errors)
1✔
278

279
        if num_errors > 0:
1✔
280
            response = requests.post(
1✔
281
                webhook_url,
282
                data=json.dumps(
283
                    {
284
                        "text": (
285
                            f"\U0001f6d1 {num_errors} Errors during NBA ELT "
286
                            f"Ingestion on {date}: \n {str_dump}"
287
                        )
288
                    }
289
                ),
290
                headers={"Content-Type": "application/json"},
291
            )
292
            logging.info(
1✔
293
                f"Wrote Errors to Slack, Reponse Code {response.status_code}. "
294
                "Exiting ..."
295
            )
296
            return response.status_code
1✔
297
        else:
298
            logging.info("No Error Logs, not writing to Slack.  Exiting out ...")
1✔
299
            return None
1✔
300
    except Exception as e:
×
301
        logging.error(f"Error Writing to Slack, {e}")
×
302
        raise
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc