• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sun770311 / bizsurvival515 / 22927895645

10 Mar 2026 10:52PM UTC coverage: 93.683% (+10.4%) from 83.333%
22927895645

push

github

web-flow
Merge pull request #8 from sun770311/mapbox

Adding .py scripts with unittests and correct CI integration

1320 of 1409 new or added lines in 14 files covered. (93.68%)

1320 of 1409 relevant lines covered (93.68%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.3
/tests/test_mapbox.py
1
"""Tests for the Mapbox GeoJSON pipeline module."""
2

3
from pathlib import Path
1✔
4
import json
1✔
5
import tempfile
1✔
6
import unittest
1✔
7

8
import pandas as pd
1✔
9

10
from pipeline.utils import load_joined_dataset, CUTOFF_DATE, VALID_BOROUGHS, NYC_BBOX
1✔
11
from pipeline.mapbox import (
1✔
12
    GeoJSONConfig,
13
    build_business_license_metadata,
14
    build_business_summary,
15
    build_feature,
16
    build_full_address,
17
    clean_joined_business_ids,
18
    clean_license_fields,
19
    filter_valid_boroughs,
20
    load_licenses_dataset,
21
    merge_business_summary_with_license_metadata,
22
    run_geojson_pipeline,
23
    validate_licenses_dataset,
24
)
25

26
TEST_DATA_DIR = Path(__file__).parent / "data"
1✔
27

28

29
def flatten_geojson_features(geojson: dict) -> pd.DataFrame:
1✔
30
    """Flatten a GeoJSON dictionary into a pandas DataFrame for testing."""
31
    rows = []
1✔
32

33
    for feature in geojson.get("features", []):
1✔
34
        properties = feature.get("properties", {})
1✔
35
        geometry = feature.get("geometry", {})
1✔
36
        coordinates = geometry.get("coordinates", [None, None])
1✔
37

38
        longitude = coordinates[0] if len(coordinates) > 0 else None
1✔
39
        latitude = coordinates[1] if len(coordinates) > 1 else None
1✔
40

41
        rows.append(
1✔
42
            {
43
                "business_id": properties.get("business_id"),
44
                "active": properties.get("active"),
45
                "last_month": properties.get("last_month"),
46
                "complaint_sum": properties.get("complaint_sum"),
47
                "license_count": properties.get("license_count"),
48
                "license_records": properties.get("license_records"),
49
                "geometry_type": geometry.get("type"),
50
                "latitude": latitude,
51
                "longitude": longitude,
52
            }
53
        )
54

55
    flattened = pd.DataFrame(rows)
1✔
56
    if not flattened.empty and "last_month" in flattened.columns:
1✔
57
        flattened["last_month"] = pd.to_datetime(flattened["last_month"], errors="coerce")
1✔
58
    return flattened
1✔
59

60

61
def prepare_clean_inputs() -> tuple[pd.DataFrame, pd.DataFrame]:
1✔
62
    """Load and clean input datasets for Mapbox testing."""
63
    joined = load_joined_dataset(TEST_DATA_DIR / "joined_dataset.csv")
1✔
64
    licenses = load_licenses_dataset(TEST_DATA_DIR / "licenses_sample.csv")
1✔
65

66
    joined = clean_joined_business_ids(joined)
1✔
67

68
    licenses = clean_license_fields(licenses)
1✔
69
    licenses = build_full_address(licenses)
1✔
70
    licenses = filter_valid_boroughs(licenses)
1✔
71

72
    filtered = licenses.dropna(
1✔
73
        subset=["Business Unique ID", "Latitude", "Longitude"]
74
    ).copy()
75
    filtered = filtered[
1✔
76
        filtered["Latitude"].between(NYC_BBOX.lat_min, NYC_BBOX.lat_max)
77
        & filtered["Longitude"].between(NYC_BBOX.lng_min, NYC_BBOX.lng_max)
78
    ].copy()
79

80
    return joined, filtered
1✔
81

82

83
class TestMapbox(unittest.TestCase):
1✔
84
    """Test suite for Mapbox GeoJSON generation pipeline."""
85

86
    def test_validate_licenses_dataset_accepts_valid_data(self):
1✔
87
        """Test that validation succeeds on a properly formatted licenses dataset."""
88
        licenses = load_licenses_dataset(TEST_DATA_DIR / "licenses_sample.csv")
1✔
89
        validate_licenses_dataset(licenses)
1✔
90

91
    def test_validate_licenses_dataset_rejects_missing_required_columns(self):
1✔
92
        """Test that validation fails when the licenses dataset misses required columns."""
93
        bad = pd.DataFrame(
1✔
94
            {
95
                "Business Unique ID": ["A"],
96
                "Latitude": [40.7],
97
                "Longitude": [-73.9],
98
            }
99
        )
100

101
        with self.assertRaisesRegex(ValueError, "Missing required license columns"):
1✔
102
            validate_licenses_dataset(bad)
1✔
103

104
    def test_filter_valid_boroughs_keeps_only_five_boroughs(self):
1✔
105
        """Test that invalid boroughs are correctly filtered out."""
106
        licenses = pd.DataFrame(
1✔
107
            {
108
                "Business Unique ID": ["1", "2", "3"],
109
                "Borough": ["Queens", "Boston", "Bronx"],
110
                "Latitude": [40.7, 40.7, 40.8],
111
                "Longitude": [-73.9, -73.9, -73.8],
112
            }
113
        )
114

115
        filtered = filter_valid_boroughs(licenses)
1✔
116
        self.assertTrue(set(filtered["Borough"].unique()).issubset(VALID_BOROUGHS))
1✔
117
        self.assertNotIn("Boston", filtered["Borough"].tolist())
1✔
118

119
    def test_build_business_summary_has_expected_columns(self):
1✔
120
        """Test that the business summary aggregates expected panel metrics."""
121
        joined, _licenses = prepare_clean_inputs()
1✔
122
        summary = build_business_summary(joined, cutoff_date=CUTOFF_DATE)
1✔
123

124
        self.assertFalse(summary.empty)
1✔
125
        self.assertIn("business_id", summary.columns)
1✔
126
        self.assertIn("complaint_sum", summary.columns)
1✔
127
        self.assertIn("last_month", summary.columns)
1✔
128
        self.assertIn("active", summary.columns)
1✔
129
        self.assertTrue(set(summary["active"].unique()).issubset({0, 1}))
1✔
130

131
    def test_build_business_license_metadata_has_expected_columns(self):
1✔
132
        """Test that license metadata aggregation forms expected structured lists."""
133
        _joined, licenses = prepare_clean_inputs()
1✔
134
        metadata = build_business_license_metadata(licenses)
1✔
135

136
        self.assertFalse(metadata.empty)
1✔
137
        self.assertIn("business_id", metadata.columns)
1✔
138
        self.assertIn("latitude", metadata.columns)
1✔
139
        self.assertIn("longitude", metadata.columns)
1✔
140
        self.assertIn("license_count", metadata.columns)
1✔
141
        self.assertIn("license_records", metadata.columns)
1✔
142

143
        is_list = metadata["license_records"].apply(lambda x: isinstance(x, list))
1✔
144
        self.assertTrue(is_list.all())
1✔
145

146
    def test_merge_business_summary_with_license_metadata_returns_business_rows(self):
1✔
147
        """Test that the summary and metadata dataframes map correctly to one another."""
148
        joined, licenses = prepare_clean_inputs()
1✔
149
        summary = build_business_summary(joined, cutoff_date=CUTOFF_DATE)
1✔
150
        metadata = build_business_license_metadata(licenses)
1✔
151

152
        merged = merge_business_summary_with_license_metadata(summary, metadata)
1✔
153

154
        self.assertFalse(merged.empty)
1✔
155
        self.assertIn("business_id", merged.columns)
1✔
156
        self.assertIn("license_records", merged.columns)
1✔
157

158
    def test_build_feature_returns_valid_geojson_feature(self):
1✔
159
        """Test that a single series row maps to a properly formed GeoJSON feature dict."""
160
        row = pd.Series(
1✔
161
            {
162
                "business_id": "B1",
163
                "active": 1,
164
                "last_month": pd.Timestamp("2026-02-01"),
165
                "complaint_sum": 5.0,
166
                "license_count": 2,
167
                "license_records": [],
168
                "latitude": 40.75,
169
                "longitude": -73.90,
170
            }
171
        )
172

173
        feature = build_feature(row)
1✔
174

175
        self.assertIsNotNone(feature)
1✔
176
        self.assertEqual(feature["type"], "Feature")
1✔
177
        self.assertEqual(feature["geometry"]["type"], "Point")
1✔
178
        self.assertEqual(feature["geometry"]["coordinates"], [-73.90, 40.75])
1✔
179
        self.assertEqual(feature["properties"]["business_id"], "B1")
1✔
180

181
    def test_run_geojson_pipeline_writes_valid_geojson(self):
1✔
182
        """Test that the end-to-end GeoJSON pipeline properly dumps out a valid file."""
183
        with tempfile.TemporaryDirectory() as tmpdir:
1✔
184
            output_path = Path(tmpdir) / "businesses.geojson"
1✔
185

186
            config = GeoJSONConfig(
1✔
187
                joined_data_path=TEST_DATA_DIR / "joined_dataset.csv",
188
                licenses_path=TEST_DATA_DIR / "licenses_sample.csv",
189
                output_path=output_path,
190
            )
191

192
            returned_path = run_geojson_pipeline(config)
1✔
193

194
            self.assertEqual(returned_path, output_path)
1✔
195
            self.assertTrue(output_path.exists())
1✔
196

197
            with output_path.open("r", encoding="utf-8") as file_obj:
1✔
198
                geojson = json.load(file_obj)
1✔
199

200
            self.assertEqual(geojson["type"], "FeatureCollection")
1✔
201
            self.assertIsInstance(geojson["features"], list)
1✔
202
            self.assertGreater(len(geojson["features"]), 0)
1✔
203

204
    def test_geojson_output_matches_active_status(self):
1✔
205
        """Test that the generated GeoJSON maps to the expected active status metrics."""
206
        with tempfile.TemporaryDirectory() as tmpdir:
1✔
207
            output_path = Path(tmpdir) / "businesses.geojson"
1✔
208
            config = GeoJSONConfig(
1✔
209
                joined_data_path=TEST_DATA_DIR / "joined_dataset.csv",
210
                licenses_path=TEST_DATA_DIR / "licenses_sample.csv",
211
                output_path=output_path,
212
            )
213
            run_geojson_pipeline(config)
1✔
214

215
            with output_path.open("r", encoding="utf-8") as file_obj:
1✔
216
                geojson = json.load(file_obj)
1✔
217

218
            features_df = flatten_geojson_features(geojson)
1✔
219
            joined, _ = prepare_clean_inputs()
1✔
220

221
            expected_summary = build_business_summary(joined, cutoff_date=CUTOFF_DATE)
1✔
222
            expected_subset = expected_summary[["business_id", "active", "last_month"]]
1✔
223

224
            merged = features_df.merge(
1✔
225
                expected_subset, on="business_id", how="left", suffixes=("_geojson", "_expected")
226
            )
227

228
            self.assertTrue(merged["active_expected"].notna().all())
1✔
229

230
            active_geojson = merged["active_geojson"].astype(int)
1✔
231
            active_expected = merged["active_expected"].astype(int)
1✔
232
            self.assertTrue((active_geojson == active_expected).all())
1✔
233
            self.assertTrue((merged["last_month_geojson"] == merged["last_month_expected"]).all())
1✔
234

235
    def test_geojson_output_matches_complaints(self):
1✔
236
        """Test that the generated GeoJSON maps to the expected complaint sum metrics."""
237
        with tempfile.TemporaryDirectory() as tmpdir:
1✔
238
            output_path = Path(tmpdir) / "businesses.geojson"
1✔
239
            config = GeoJSONConfig(
1✔
240
                joined_data_path=TEST_DATA_DIR / "joined_dataset.csv",
241
                licenses_path=TEST_DATA_DIR / "licenses_sample.csv",
242
                output_path=output_path,
243
            )
244
            run_geojson_pipeline(config)
1✔
245

246
            with output_path.open("r", encoding="utf-8") as file_obj:
1✔
247
                geojson = json.load(file_obj)
1✔
248

249
            features_df = flatten_geojson_features(geojson)
1✔
250
            joined, _ = prepare_clean_inputs()
1✔
251

252
            expected_summary = build_business_summary(joined, cutoff_date=CUTOFF_DATE)
1✔
253
            expected_subset = expected_summary[["business_id", "complaint_sum"]]
1✔
254

255
            merged = features_df.merge(
1✔
256
                expected_subset, on="business_id", how="left", suffixes=("_geojson", "_expected")
257
            )
258

259
            complaint_geojson = pd.to_numeric(merged["complaint_sum_geojson"], errors="coerce")
1✔
260
            complaint_expected = pd.to_numeric(merged["complaint_sum_expected"], errors="coerce")
1✔
261
            self.assertTrue((complaint_geojson == complaint_expected).all())
1✔
262

263
    def test_geojson_output_matches_coordinates(self):
1✔
264
        """Test that the generated GeoJSON maps to the expected coordinates."""
265
        with tempfile.TemporaryDirectory() as tmpdir:
1✔
266
            output_path = Path(tmpdir) / "businesses.geojson"
1✔
267
            config = GeoJSONConfig(
1✔
268
                joined_data_path=TEST_DATA_DIR / "joined_dataset.csv",
269
                licenses_path=TEST_DATA_DIR / "licenses_sample.csv",
270
                output_path=output_path,
271
            )
272
            run_geojson_pipeline(config)
1✔
273

274
            with output_path.open("r", encoding="utf-8") as file_obj:
1✔
275
                geojson = json.load(file_obj)
1✔
276

277
            features_df = flatten_geojson_features(geojson)
1✔
278
            _, licenses = prepare_clean_inputs()
1✔
279

280
            expected_coords = (
1✔
281
                licenses.sort_values(["Business Unique ID"])
282
                .drop_duplicates(subset=["Business Unique ID"], keep="first")
283
                [["Business Unique ID", "Latitude", "Longitude"]]
284
                .rename(
285
                    columns={
286
                        "Business Unique ID": "business_id",
287
                        "Latitude": "latitude_expected",
288
                        "Longitude": "longitude_expected",
289
                    }
290
                )
291
                .reset_index(drop=True)
292
            )
293

294
            merged_coords = features_df.merge(
1✔
295
                expected_coords,
296
                on="business_id",
297
                how="left",
298
            )
299

300
            self.assertTrue(merged_coords["latitude_expected"].notna().all())
1✔
301
            self.assertTrue(merged_coords["longitude_expected"].notna().all())
1✔
302

303
            lat_geojson = merged_coords["latitude"].round(8)
1✔
304
            lat_expected = merged_coords["latitude_expected"].round(8)
1✔
305
            self.assertTrue((lat_geojson == lat_expected).all())
1✔
306

307

308
if __name__ == "__main__":
1✔
NEW
309
    unittest.main()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc