• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jyablonski / python_docker / 14430807964

13 Apr 2025 03:24PM UTC coverage: 88.616%. First build
14430807964

Pull #94

github

web-flow
Merge 843f0a03c into 265e9c2b5
Pull Request #94: Ingestion v2.0.0

526 of 619 new or added lines in 13 files covered. (84.98%)

903 of 1019 relevant lines covered (88.62%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.93
/src/utils.py
1
from datetime import date, datetime
1✔
2
import json
1✔
3
import logging
1✔
4
import os
1✔
5
import re
1✔
6

7
import awswrangler as wr
1✔
8
from nltk.sentiment import SentimentIntensityAnalyzer
1✔
9
import numpy as np
1✔
10
import pandas as pd
1✔
11
import requests
1✔
12

13

14
def filter_spread(value: str) -> str:
1✔
15
    """
16
    Filter out 3-digit values from the `spread` column
17
    in the Scrape Odds Function such as `-108` or `-112`
18

19
    Parameters:
20
        value (str): The original value from the spread column.
21

22
    Returns:
23
        The spread value without any 3-digit values present
24
    """
25
    parts = value.split()
1✔
26
    filtered_parts = [
1✔
27
        (
28
            part
29
            if (part[0] in ["+", "-"] and float(part[1:]) <= 25)
30
            or (part.isdigit() and int(part) <= 25)
31
            else ""
32
        )
33
        for part in parts
34
    ]
35
    result = " ".join(filtered_parts).strip()
1✔
36

37
    # this last part strips out a couple extra white spaces
38
    return re.sub(r"\s+", " ", result)
1✔
39

40

41
def get_season_type(todays_date: date | None = None) -> str:
1✔
42
    """
43
    Function to generate Season Type for a given Date.
44
    **2025-03-16 NOTE** this has been deprecated as this logic
45
    belongs in the dbt project
46

47
    Args:
48
        todays_date (date): The Date to generate a Season Type for.  Defaults to
49
            today's date.
50

51
    Returns:
52
        The Season Type for Given Date
53
    """
54
    if todays_date is None:
1✔
55
        todays_date = datetime.now().date()
×
56

57
    if todays_date < datetime(2025, 4, 15).date():
1✔
58
        season_type = "Regular Season"
1✔
59
    elif (todays_date >= datetime(2025, 4, 16).date()) & (
1✔
60
        todays_date < datetime(2025, 4, 21).date()
61
    ):
62
        season_type = "Play-In"
1✔
63
    else:
64
        season_type = "Playoffs"
1✔
65

66
    return season_type
1✔
67

68

69
def check_schedule(date: datetime.date) -> bool:
1✔
70
    """
71
    Small Function used in Boxscores + PBP Functions to check if
72
    there are any games scheduled for a given date.
73

74
    Args:
75
        date (datetime.date): The Date to check for games on.
76

77
    Returns:
78
        Boolean: True if there are games scheduled, False if not.
79
    """
80
    schedule_endpoint = f"https://api.jyablonski.dev/schedule?date={date}"
1✔
81
    schedule_data = requests.get(schedule_endpoint).json()
1✔
82

83
    return True if len(schedule_data) > 0 else False
1✔
84

85

86
def add_sentiment_analysis(df: pd.DataFrame, sentiment_col: str) -> pd.DataFrame:
1✔
87
    """
88
    Function to add Sentiment Analysis columns to a DataFrame via nltk Vader Lexicon.
89

90
    Args:
91
        df (pd.DataFrame): The Pandas DataFrame
92

93
        sentiment_col (str): The Column in the DataFrame to run Sentiment Analysis on
94
            (comments / tweets etc).
95

96
    Returns:
97
        The same DataFrame but with the Sentiment Analysis columns attached.
98
    """
99
    try:
1✔
100
        analyzer = SentimentIntensityAnalyzer()
1✔
101
        df["compound"] = [
1✔
102
            analyzer.polarity_scores(x)["compound"] for x in df[sentiment_col]
103
        ]
104
        df["neg"] = [analyzer.polarity_scores(x)["neg"] for x in df[sentiment_col]]
1✔
105
        df["neu"] = [analyzer.polarity_scores(x)["neu"] for x in df[sentiment_col]]
1✔
106
        df["pos"] = [analyzer.polarity_scores(x)["pos"] for x in df[sentiment_col]]
1✔
107
        df["sentiment"] = np.where(df["compound"] > 0, 1, 0)
1✔
108
        return df
1✔
109
    except Exception as e:
×
110
        logging.error(f"Error Occurred while adding Sentiment Analysis, {e}")
×
111
        raise
×
112

113

114
def get_leading_zeroes(value: int) -> str:
1✔
115
    """
116
    Function to add leading zeroes to a month (1 (January) -> 01).
117
    Used in the the `write_to_s3` function.
118

119
    Args:
120
        value (int): The value integer (created from `datetime.now().month`)
121

122
    Returns:
123
        The same value integer with a leading 0 if it is less than 10
124
            (Nov/Dec aka 11/12 unaffected).
125
    """
126
    if len(str(value)) > 1:
1✔
127
        return str(value)
1✔
128
    else:
129
        return f"0{value}"
1✔
130

131

132
def clean_player_names(name: str) -> str:
1✔
133
    """
134
    Function to remove suffixes from a player name.
135

136
    Args:
137
        name (str): The raw player name you wish to alter.
138

139
    Returns:
140
        str: Cleaned Name w/ no suffix bs
141
    """
142
    try:
1✔
143
        cleaned_name = (
1✔
144
            name.replace(" Jr.", "")
145
            .replace(" Sr.", "")
146
            .replace(" III", "")  # III HAS TO GO FIRST, OVER II
147
            .replace(" II", "")  # or else Robert Williams III -> Robert WilliamsI
148
            .replace(" IV", "")
149
        )
150
        return cleaned_name
1✔
151
    except Exception as e:
×
152
        logging.error(f"Error Occurred with Clean Player Names, {e}")
×
153
        raise
×
154

155

156
def write_to_sql(con, table_name: str, df: pd.DataFrame, table_type: str) -> None:
1✔
157
    """
158
    Simple Wrapper Function to write a Pandas DataFrame to SQL
159

160
    Args:
161
        con (SQL Connection): The connection to the SQL DB.
162

163
        table_name (str): The Table name to write to SQL as.
164

165
        df (DataFrame): The Pandas DataFrame to store in SQL
166

167
        table_type (str): Whether the table should replace or append to an
168
            existing SQL Table under that name
169

170
    Returns:
171
        Writes the Pandas DataFrame to a Table in the Schema we connected to.
172

173
    """
174
    try:
1✔
175
        if len(df) == 0:
1✔
176
            logging.info(f"{table_name} is empty, not writing to SQL")
×
177
        else:
178
            df.to_sql(
1✔
179
                con=con,
180
                name=table_name,
181
                index=False,
182
                if_exists=table_type,
183
            )
184
            logging.info(
1✔
185
                f"Writing {len(df)} {table_name} rows to aws_{table_name}_source to SQL"
186
            )
187

188
        return None
1✔
189
    except Exception as error:
×
190
        logging.error(f"SQL Write Script Failed, {error}")
×
NEW
191
        return None
×
192

193

194
def query_logs(log_file: str = "logs/example.log") -> list[str]:
1✔
195
    """
196
    Small Function to read Logs CSV File and grab Errors
197

198
    Args:
199
        log_file (str): Optional String of the Log File Name
200

201
    Returns:
202
        list of Error Messages to be passed into Slack Function
203
    """
204
    logs = pd.read_csv(log_file, sep=r"\\t", engine="python", header=None)
1✔
205
    logs = logs.rename(columns={0: "errors"})
1✔
206
    logs = logs.query("errors.str.contains('Failed')", engine="python")
1✔
207
    logs = logs["errors"].to_list()
1✔
208

209
    logging.info(f"Returning {len(logs)} Failed Logs")
1✔
210
    return logs
1✔
211

212

213
def write_to_slack(
1✔
214
    errors: list, webhook_url: str = os.environ.get("WEBHOOK_URL", default="default")
215
) -> int | None:
216
    """ "
217
    Function to write Errors out to Slack.  Requires a pre-configured `webhook_url`
218
    to be setup.
219

220
    Args:
221
        errors (list): The list of Failed Tasks + their associated errors
222

223
        webhook_url (str): Optional Parameter to specify the Webhook to send the
224
            errors to.  Defaults to `os.environ.get("WEBHOOK_URL")`
225

226
    Returns:
227
        None, but writes the Errors to Slack if there are any
228
    """
229
    try:
1✔
230
        date = datetime.now().date()
1✔
231
        num_errors = len(errors)
1✔
232
        str_dump = "\n".join(errors)
1✔
233

234
        if num_errors > 0:
1✔
235
            response = requests.post(
1✔
236
                webhook_url,
237
                data=json.dumps(
238
                    {
239
                        "text": (
240
                            f"\U0001f6d1 {num_errors} Errors during NBA ELT "
241
                            f"Ingestion on {date}: \n {str_dump}"
242
                        )
243
                    }
244
                ),
245
                headers={"Content-Type": "application/json"},
246
            )
247
            logging.info(
1✔
248
                f"Wrote Errors to Slack, Reponse Code {response.status_code}. "
249
                "Exiting ..."
250
            )
251
            return response.status_code
1✔
252
        else:
253
            logging.info("No Error Logs, not writing to Slack.  Exiting out ...")
1✔
254
            return None
1✔
255
    except Exception as e:
×
256
        logging.error(f"Error Writing to Slack, {e}")
×
257
        raise
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc