• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jyablonski / python_docker / 16378074149

18 Jul 2025 06:48PM UTC coverage: 83.993% (+2.1%) from 81.898%
16378074149

push

github

web-flow
Ingestion v2.0.8 (#103)

### Description
Ruff Linting Updates

## Added
- New Ruff Linting Rules

## Updated
- Docstrings to accommodate new Linting Rules
- `return` cleanup & optimizations

## Deleted
- None

15 of 29 new or added lines in 5 files covered. (51.72%)

1 existing line in 1 file now uncovered.

467 of 556 relevant lines covered (83.99%)

1.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.33
/src/utils.py
1
import json
2✔
2
import logging
2✔
3
import os
2✔
4
import re
2✔
5
from datetime import date, datetime
2✔
6

7
import numpy as np
2✔
8
import pandas as pd
2✔
9
import requests
2✔
10
from nltk.sentiment import SentimentIntensityAnalyzer
2✔
11

12

13
def filter_spread(value: str) -> str:
2✔
14
    """Helper Function for filtering Odds Spread
15

16
    Parameters:
17
        value (str): The original value from the spread column.
18

19
    Returns:
20
        The spread value without any 3-digit values present
21
    """
22
    parts = value.split()
2✔
23
    filtered_parts = [
2✔
24
        (
25
            part
26
            if (part[0] in ["+", "-"] and float(part[1:]) <= 25)
27
            or (part.isdigit() and int(part) <= 25)
28
            else ""
29
        )
30
        for part in parts
31
    ]
32
    result = " ".join(filtered_parts).strip()
2✔
33

34
    # this last part strips out a couple extra white spaces
35
    return re.sub(r"\s+", " ", result)
2✔
36

37

38
def get_season_type(todays_date: date | None = None) -> str:
2✔
39
    """Function to generate Season Type for a given Date.
40

41
    **2025-03-16 NOTE** this has been deprecated as this logic
42
    belongs in the dbt project
43

44
    Args:
45
        todays_date (date): The Date to generate a Season Type for.  Defaults to
46
            today's date.
47

48
    Returns:
49
        The Season Type for Given Date
50
    """
51
    if todays_date is None:
2✔
52
        todays_date = datetime.now().date()
×
53

54
    if todays_date < datetime(2025, 4, 15).date():
2✔
55
        season_type = "Regular Season"
2✔
56
    elif (todays_date >= datetime(2025, 4, 16).date()) & (
2✔
57
        todays_date < datetime(2025, 4, 21).date()
58
    ):
59
        season_type = "Play-In"
2✔
60
    else:
61
        season_type = "Playoffs"
2✔
62

63
    return season_type
2✔
64

65

66
def check_schedule(date: datetime.date) -> bool:
2✔
67
    """Schedule Checker used in Boxscores + PBP
68

69
    Args:
70
        date (datetime.date): The Date to check for games on.
71

72
    Returns:
73
        Boolean: True if there are games scheduled, False if not.
74
    """
75
    schedule_endpoint = f"https://api.jyablonski.dev/schedule?date={date}"
2✔
76
    schedule_data = requests.get(schedule_endpoint).json()
2✔
77

78
    return len(schedule_data) > 0
2✔
79

80

81
def add_sentiment_analysis(df: pd.DataFrame, sentiment_col: str) -> pd.DataFrame:
2✔
82
    """Function to add Sentiment Analysis columns to a DataFrame via nltk Vader Lexicon.
83

84
    Args:
85
        df (pd.DataFrame): The Pandas DataFrame
86

87
        sentiment_col (str): The Column in the DataFrame to run Sentiment Analysis on
88
            (comments / tweets etc).
89

90
    Returns:
91
        The same DataFrame but with the Sentiment Analysis columns attached.
92
    """
93
    try:
2✔
94
        analyzer = SentimentIntensityAnalyzer()
2✔
95
        df["compound"] = [
2✔
96
            analyzer.polarity_scores(x)["compound"] for x in df[sentiment_col]
97
        ]
98
        df["neg"] = [analyzer.polarity_scores(x)["neg"] for x in df[sentiment_col]]
2✔
99
        df["neu"] = [analyzer.polarity_scores(x)["neu"] for x in df[sentiment_col]]
2✔
100
        df["pos"] = [analyzer.polarity_scores(x)["pos"] for x in df[sentiment_col]]
2✔
101
        df["sentiment"] = np.where(df["compound"] > 0, 1, 0)
2✔
102
        return df
2✔
103
    except Exception as e:
×
104
        logging.error(f"Error Occurred while adding Sentiment Analysis, {e}")
×
105
        raise
×
106

107

108
def get_leading_zeroes(value: int) -> str:
2✔
109
    """Adds leading zeroes to integers
110

111
    Args:
112
        value (int): The value integer (created from `datetime.now().month`)
113

114
    Returns:
115
        The same value integer with a leading 0 if it is less than 10
116
            (Nov/Dec aka 11/12 unaffected).
117
    """
118
    if len(str(value)) > 1:
2✔
119
        return str(value)
2✔
120
    return f"0{value}"
2✔
121

122

123
def clean_player_names(name: str) -> str:
2✔
124
    """Function to remove suffixes from a player name.
125

126
    Args:
127
        name (str): The raw player name you wish to alter.
128

129
    Returns:
130
        str: Cleaned Name w/ no suffix bs
131
    """
132
    try:
2✔
133
        return (
2✔
134
            name.replace(" Jr.", "")
135
            .replace(" Sr.", "")
136
            .replace(" III", "")  # III HAS TO GO FIRST, OVER II
137
            .replace(" II", "")  # or else Robert Williams III -> Robert WilliamsI
138
            .replace(" IV", "")
139
        )
UNCOV
140
    except Exception as e:
×
141
        logging.error(f"Error Occurred with Clean Player Names, {e}")
×
142
        raise
×
143

144

145
def write_to_sql(con, table_name: str, df: pd.DataFrame, table_type: str) -> None:
2✔
146
    """Simple Wrapper Function to write a Pandas DataFrame to SQL
147

148
    Args:
149
        con (SQL Connection): The connection to the SQL DB.
150

151
        table_name (str): The Table name to write to SQL as.
152

153
        df (DataFrame): The Pandas DataFrame to store in SQL
154

155
        table_type (str): Whether the table should replace or append to an
156
            existing SQL Table under that name
157

158
    Returns:
159
        Writes the Pandas DataFrame to a Table in the Schema we connected to.
160

161
    """
162
    try:
2✔
163
        if len(df) == 0:
2✔
164
            logging.info(f"{table_name} is empty, not writing to SQL")
×
165
        else:
166
            df.to_sql(
2✔
167
                con=con,
168
                name=table_name,
169
                index=False,
170
                if_exists=table_type,
171
            )
172
            logging.info(
2✔
173
                f"Writing {len(df)} {table_name} rows to aws_{table_name}_source to SQL"
174
            )
175

176
        return
2✔
177
    except Exception as error:
×
178
        logging.error(f"SQL Write Script Failed, {error}")
×
NEW
179
        return
×
180

181

182
def query_logs(log_file: str = "logs/example.log") -> list[str]:
2✔
183
    """Small Function to read Logs CSV File and grab Errors
184

185
    Args:
186
        log_file (str): Optional String of the Log File Name
187

188
    Returns:
189
        list of Error Messages to be passed into Slack Function
190
    """
191
    logs = pd.read_csv(log_file, sep=r"\\t", engine="python", header=None)
2✔
192
    logs = logs.rename(columns={0: "errors"})
2✔
193
    logs = logs.query("errors.str.contains('Failed')", engine="python")
2✔
194
    logs = logs["errors"].to_list()
2✔
195

196
    logging.info(f"Returning {len(logs)} Failed Logs")
2✔
197
    return logs
2✔
198

199

200
def write_to_slack(
2✔
201
    errors: list, webhook_url: str = os.environ.get("WEBHOOK_URL", default="default")
202
) -> int | None:
203
    """Function to write Errors out to Slack.
204

205
    Requires a pre-configured `webhook_url` to be setup.
206

207
    Args:
208
        errors (list): The list of Failed Tasks + their associated errors
209

210
        webhook_url (str): Optional Parameter to specify the Webhook to send the
211
            errors to.  Defaults to `os.environ.get("WEBHOOK_URL")`
212

213
    Returns:
214
        None, but writes the Errors to Slack if there are any
215
    """
216
    try:
2✔
217
        date = datetime.now().date()
2✔
218
        num_errors = len(errors)
2✔
219
        str_dump = "\n".join(errors)
2✔
220

221
        if num_errors > 0:
2✔
222
            response = requests.post(
2✔
223
                webhook_url,
224
                data=json.dumps(
225
                    {
226
                        "text": (
227
                            f"\U0001f6d1 {num_errors} Errors during NBA ELT "
228
                            f"Ingestion on {date}: \n {str_dump}"
229
                        )
230
                    }
231
                ),
232
                headers={"Content-Type": "application/json"},
233
            )
234
            logging.info(
2✔
235
                f"Wrote Errors to Slack, Reponse Code {response.status_code}. "
236
                "Exiting ..."
237
            )
238
            return response.status_code
2✔
239
        logging.info("No Error Logs, not writing to Slack.  Exiting out ...")
2✔
240
        return None
2✔
241
    except Exception as e:
×
242
        logging.error(f"Error Writing to Slack, {e}")
×
243
        raise
×
244

245

246
def generate_schedule_pull_type(season_type: int, playoff_type: int) -> list[str]:
2✔
247
    """Generates Months to use for schedule scraper
248

249
    Args:
250
        season_type (int): The Season Type (0 = Regular Season, 1 = Playoffs)
251

252
        playoff_type (int): The Playoff Type (0 = Regular Season, 1 = Playoffs)
253

254
    Returns:
255
        list: The list of months to pull for the schedule
256
    """
257
    if not season_type:
2✔
258
        return []
2✔
259

260
    return (
2✔
261
        ["october", "november", "december", "january", "february", "march", "april"]
262
        if not playoff_type
263
        else ["april", "may", "june"]
264
    )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc