• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

gcivil-nyu-org / team4-wed-fall25 / 63

10 Nov 2025 07:18PM UTC coverage: 75.054% (+11.0%) from 64.094%
63

push

travis-pro

web-flow
Merge pull request #77 from gcivil-nyu-org/feature/run_test_models_locally

fixed some UI changes and test cases

403 of 431 new or added lines in 11 files covered. (93.5%)

269 existing lines in 4 files now uncovered.

1047 of 1395 relevant lines covered (75.05%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

20.28
/note2webapp/utils.py
1
# note2webapp/utils.py
2
import os
1✔
3
import json
1✔
4
import shutil
1✔
5
import hashlib
1✔
6
import traceback
1✔
7
import importlib.util
1✔
8
import inspect
1✔
9

10
import torch
1✔
11
from django.conf import settings
1✔
12

13
# primitive types we can strictly validate for "custom" schemas
14
TYPE_MAP = {"float": float, "int": int, "str": str, "bool": bool}
1✔
15

16

17
# ---------------------------------------------------------------------
18
# 1. HASHING + MATERIALIZING + DELETING DIRECTORIES
19
# ---------------------------------------------------------------------
20
def sha256_uploaded_file(django_file):
1✔
21
    """
22
    Compute sha256 for an uploaded file (InMemory/Temporary) by streaming chunks.
23
    Used in views to detect duplicate uploads.
24
    """
25
    h = hashlib.sha256()
1✔
26
    for chunk in django_file.chunks():
1✔
27
        h.update(chunk)
1✔
28
    return h.hexdigest()
1✔
29

30

31
def sha256_file_path(path):
1✔
32
    """
33
    Same as above but for an existing file on disk.
34
    """
35
    h = hashlib.sha256()
1✔
36
    with open(path, "rb") as f:
1✔
37
        for chunk in iter(lambda: f.read(8192), b""):
1✔
38
            h.update(chunk)
1✔
39
    return h.hexdigest()
1✔
40

41

42
def materialize_version_to_media(version):
1✔
43
    """
44
    After a version PASSes validation, copy its files into:
45
      media/<category>/<model-name>/v<version_number>/
46
    with consistent filenames.
47
    """
48
    target_dir = os.path.join(
×
49
        settings.MEDIA_ROOT,
50
        version.category,
51
        version.upload.name,
52
        f"v{version.version_number}",
53
    )
UNCOV
54
    os.makedirs(target_dir, exist_ok=True)
×
55

56
    # model.pt
57
    if version.model_file and os.path.isfile(version.model_file.path):
×
UNCOV
58
        shutil.copy(version.model_file.path, os.path.join(target_dir, "model.pt"))
×
59

60
    # predict.py
61
    if version.predict_file and os.path.isfile(version.predict_file.path):
×
62
        shutil.copy(version.predict_file.path, os.path.join(target_dir, "predict.py"))
×
63

64
    # schema.json
UNCOV
65
    if version.schema_file and os.path.isfile(version.schema_file.path):
×
UNCOV
66
        shutil.copy(version.schema_file.path, os.path.join(target_dir, "schema.json"))
×
67

68

69
def delete_version_files_and_dir(version):
1✔
70
    """
71
    Delete the uploaded files (the ones stored by FileField)
72
    AND the materialized media/<category>/<model-name>/vX/ folder for this version.
73
    """
74
    # 1) delete uploaded files
75
    for f in [version.model_file, version.predict_file, version.schema_file]:
1✔
76
        if f and getattr(f, "path", None):
1✔
77
            try:
1✔
78
                if os.path.isfile(f.path):
1✔
UNCOV
79
                    os.remove(f.path)
×
UNCOV
80
            except Exception:
×
81
                # don't crash deletion
UNCOV
82
                pass
×
83

84
    # 2) delete materialized dir
85
    version_dir = os.path.join(
1✔
86
        settings.MEDIA_ROOT,
87
        version.category,
88
        version.upload.name,
89
        f"v{version.version_number}",
90
    )
91
    if os.path.isdir(version_dir):
1✔
92
        try:
×
93
            shutil.rmtree(version_dir)
×
UNCOV
94
        except Exception:
×
UNCOV
95
            pass
×
96

97

98
def delete_model_media_tree(model_upload):
1✔
99
    """
100
    Delete the whole dir for this model:
101
        media/<category>/<model-name>/
102
    We try known categories and the category of any existing version.
103
    """
104
    # try to read category from an existing version
105
    any_version = model_upload.versions.first()
×
UNCOV
106
    possible_categories = []
×
UNCOV
107
    if any_version:
×
UNCOV
108
        possible_categories.append(any_version.category)
×
109

110
    # also try your 3 fixed categories
UNCOV
111
    possible_categories.extend(["sentiment", "recommendation", "text-classification"])
×
112

UNCOV
113
    for cat in possible_categories:
×
UNCOV
114
        candidate = os.path.join(settings.MEDIA_ROOT, cat, model_upload.name)
×
UNCOV
115
        if os.path.isdir(candidate):
×
UNCOV
116
            try:
×
UNCOV
117
                shutil.rmtree(candidate)
×
UNCOV
118
            except Exception:
×
UNCOV
119
                pass
×
120

121

122
# ---------------------------------------------------------------------
123
# 2. SCHEMA BUILDERS
124
# ---------------------------------------------------------------------
125
def _make_value_from_simple_type(typ: str):
1✔
126
    """Used for the old/custom schema style."""
UNCOV
127
    if typ == "float":
×
UNCOV
128
        return 1.0
×
UNCOV
129
    if typ == "int":
×
UNCOV
130
        return 42
×
UNCOV
131
    if typ == "str":
×
UNCOV
132
        return "example"
×
UNCOV
133
    if typ == "bool":
×
UNCOV
134
        return True
×
UNCOV
135
    if typ == "object":
×
UNCOV
136
        return {}
×
UNCOV
137
    return None
×
138

139

140
def _build_from_custom_schema(schema: dict):
1✔
141
    """
142
    Handle schema like:
143
    {
144
      "input": { "text": "str", "age": "int" },
145
      "output": { "prediction": "float" }
146
    }
147
    """
UNCOV
148
    input_schema = schema.get("input", {})
×
UNCOV
149
    dummy = {}
×
UNCOV
150
    for key, typ in input_schema.items():
×
UNCOV
151
        if isinstance(typ, str):
×
UNCOV
152
            dummy[key] = _make_value_from_simple_type(typ)
×
UNCOV
153
        elif isinstance(typ, dict):
×
UNCOV
154
            nested = {}
×
UNCOV
155
            for k2, t2 in typ.items():
×
UNCOV
156
                if isinstance(t2, str):
×
UNCOV
157
                    nested[k2] = _make_value_from_simple_type(t2)
×
158
                else:
UNCOV
159
                    nested[k2] = None
×
UNCOV
160
            dummy[key] = nested
×
161
        else:
UNCOV
162
            dummy[key] = None
×
UNCOV
163
    return dummy, schema.get("output", {})
×
164

165

166
def _build_from_json_schema(schema: dict):
1✔
167
    """
168
    Handle real JSON Schema style:
169
    {
170
      "type": "object",
171
      "required": ["text"],
172
      "properties": {
173
        "text": { "type": "string", "example": "This is great!" }
174
      }
175
    }
176
    We:
177
      - use example if present
178
      - else fill a reasonable default from "type"
179
    """
UNCOV
180
    props = schema.get("properties", {}) or {}
×
UNCOV
181
    data = {}
×
UNCOV
182
    for name, prop in props.items():
×
UNCOV
183
        if not isinstance(prop, dict):
×
UNCOV
184
            data[name] = "example"
×
UNCOV
185
            continue
×
186

UNCOV
187
        if "example" in prop and prop["example"] is not None:
×
UNCOV
188
            data[name] = prop["example"]
×
UNCOV
189
            continue
×
190

UNCOV
191
        ptype = prop.get("type")
×
UNCOV
192
        if ptype == "string":
×
UNCOV
193
            data[name] = "example text"
×
UNCOV
194
        elif ptype == "number":
×
UNCOV
195
            data[name] = 1.0
×
UNCOV
196
        elif ptype == "integer":
×
UNCOV
197
            data[name] = 1
×
UNCOV
198
        elif ptype == "boolean":
×
UNCOV
199
            data[name] = True
×
UNCOV
200
        elif ptype == "object":
×
UNCOV
201
            data[name] = {}
×
UNCOV
202
        elif ptype == "array":
×
UNCOV
203
            data[name] = []
×
204
        else:
UNCOV
205
            data[name] = "example"
×
206

UNCOV
207
    return data, None
×
208

209

210
def generate_input_and_output_schema(schema_path: str):
1✔
211
    """
212
    Decide which schema style we got and build an input dict from it.
213
    Returns (input_data: dict, output_schema: dict|None)
214
    """
UNCOV
215
    with open(schema_path, "r") as f:
×
UNCOV
216
        schema = json.load(f)
×
217

218
    # 1) wrapped format: { "input": {...}, "output": {...} }
UNCOV
219
    if "input" in schema:
×
UNCOV
220
        input_schema = schema["input"]
×
221

222
        # maybe they put JSON Schema INSIDE "input"
UNCOV
223
        if isinstance(input_schema, dict) and "properties" in input_schema:
×
UNCOV
224
            return _build_from_json_schema(input_schema)
×
225
        else:
UNCOV
226
            return _build_from_custom_schema(schema)
×
227

228
    # 2) direct JSON Schema
UNCOV
229
    if "properties" in schema or schema.get("type") == "object":
×
UNCOV
230
        return _build_from_json_schema(schema)
×
231

232
    # 3) fallback
UNCOV
233
    return {}, None
×
234

235

236
# ---------------------------------------------------------------------
237
# 3. MODEL LOADING HELPERS
238
# ---------------------------------------------------------------------
239
def _load_model_for_version(module, model_path):
1✔
240
    """
241
    Best-effort loader for validation.
242
    1) try torch.load
243
    2) try user's _load_model(...)
244
    """
245
    # 1) try torch.load
UNCOV
246
    try:
×
UNCOV
247
        return torch.load(model_path, map_location="cpu")
×
UNCOV
248
    except Exception:
×
UNCOV
249
        pass
×
250

251
    # 2) try user's loader
UNCOV
252
    if hasattr(module, "_load_model") and callable(module._load_model):
×
UNCOV
253
        try:
×
UNCOV
254
            sig = inspect.signature(module._load_model)
×
UNCOV
255
            n = len(sig.parameters)
×
UNCOV
256
            if n == 0:
×
UNCOV
257
                return module._load_model()
×
UNCOV
258
            elif n == 1:
×
UNCOV
259
                return module._load_model(model_path)
×
UNCOV
260
        except Exception:
×
UNCOV
261
            pass
×
262

UNCOV
263
    return None
×
264

265

266
def _is_seek_error(out):
1✔
267
    """
268
    Detect that PyTorch "dict has no attribute 'seek'" message.
269
    """
UNCOV
270
    return (
×
271
        isinstance(out, dict)
272
        and "error" in out
273
        and "no attribute 'seek'" in str(out["error"])
274
    )
275

276

277
# ---------------------------------------------------------------------
278
# 4. VALIDATION
279
# ---------------------------------------------------------------------
280
def validate_model(version):
1✔
281
    """
282
    1. import version's predict.py
283
    2. build input from uploaded schema (custom or JSON Schema)
284
    3. call predict(...) with correct number of args
285
    4. if PASS: materialize to media/<cat>/<model>/vX/
286
    5. if FAIL: store traceback
287
    """
UNCOV
288
    original_cwd = os.getcwd()
×
UNCOV
289
    try:
×
UNCOV
290
        model_dir = os.path.dirname(version.model_file.path)
×
UNCOV
291
        os.chdir(model_dir)
×
292

293
        # import predict.py
UNCOV
294
        spec = importlib.util.spec_from_file_location(
×
295
            "predict", version.predict_file.path
296
        )
UNCOV
297
        module = importlib.util.module_from_spec(spec)
×
UNCOV
298
        spec.loader.exec_module(module)
×
299

UNCOV
300
        if not hasattr(module, "predict"):
×
UNCOV
301
            raise Exception("predict() function missing in predict.py")
×
302

UNCOV
303
        if not version.schema_file:
×
UNCOV
304
            raise Exception("No schema file provided")
×
305

306
        # build input from schema
UNCOV
307
        input_data, output_schema = generate_input_and_output_schema(
×
308
            version.schema_file.path
309
        )
310

311
        # inspect signature
UNCOV
312
        sig = inspect.signature(module.predict)
×
UNCOV
313
        num_params = len(sig.parameters)
×
314

315
        # call predict
UNCOV
316
        if num_params == 1:
×
UNCOV
317
            result = module.predict(input_data)
×
UNCOV
318
        elif num_params == 2:
×
UNCOV
319
            result = module.predict(version.model_file.path, input_data)
×
320
        else:
UNCOV
321
            raise Exception(f"predict() has {num_params} parameters, expected 1 or 2.")
×
322

323
        # try to fix common torch.load seek error
UNCOV
324
        if _is_seek_error(result):
×
UNCOV
325
            try:
×
UNCOV
326
                if num_params == 1:
×
UNCOV
327
                    result = module.predict(version.model_file.path)
×
UNCOV
328
                elif num_params == 2:
×
UNCOV
329
                    model_obj = _load_model_for_version(module, version.model_file.path)
×
UNCOV
330
                    if model_obj is not None:
×
UNCOV
331
                        result = module.predict(model_obj, input_data)
×
UNCOV
332
            except Exception:
×
UNCOV
333
                pass
×
334

UNCOV
335
        if not isinstance(result, dict):
×
UNCOV
336
            raise Exception("predict() must return a dict")
×
337

338
        # If result says error, we mark FAIL
UNCOV
339
        if "error" in result and result.get("prediction") is None:
×
UNCOV
340
            raise Exception(f"Prediction error: {result['error']}")
×
341

342
        # Strict output checking only for simple custom schema
UNCOV
343
        do_strict = (
×
344
            isinstance(output_schema, dict)
345
            and output_schema
346
            and all(
347
                isinstance(v, str) and v in TYPE_MAP for v in output_schema.values()
348
            )
349
        )
UNCOV
350
        if do_strict:
×
UNCOV
351
            for key, typ in output_schema.items():
×
UNCOV
352
                if key not in result:
×
UNCOV
353
                    raise Exception(f"Missing key in output: {key}")
×
UNCOV
354
                if not isinstance(result[key], TYPE_MAP[typ]):
×
UNCOV
355
                    raise Exception(
×
356
                        f"Wrong type for '{key}': expected {typ}, got {type(result[key]).__name__}"
357
                    )
358

359
        # success
UNCOV
360
        version.status = "PASS"
×
UNCOV
361
        version.log = (
×
362
            "✅ Validation Successful\n\n"
363
            "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
364
            "INPUT (from schema):\n"
365
            f"{json.dumps(input_data, indent=2)}\n\n"
366
            "OUTPUT (from predict()):\n"
367
            f"{json.dumps(result, indent=2)}\n"
368
            "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
369
        )
370

371
        # materialize now
UNCOV
372
        materialize_version_to_media(version)
×
373

UNCOV
374
    except Exception:
×
UNCOV
375
        version.status = "FAIL"
×
UNCOV
376
        version.log = (
×
377
            "❌ Validation Failed\n\n"
378
            "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
379
            f"{traceback.format_exc()}\n"
380
            "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
381
        )
382
    finally:
UNCOV
383
        os.chdir(original_cwd)
×
384

UNCOV
385
    version.save()
×
UNCOV
386
    return version
×
387

388

389
# ---------------------------------------------------------------------
390
# 5. TEST MODEL ON CPU (manual testing from UI)
391
# ---------------------------------------------------------------------
392
def test_model_on_cpu(version, input_data):
1✔
393
    """
394
    Called from the test page.
395
    Handles predict(input) and predict(model_path, input).
396
    """
397
    original_cwd = os.getcwd()
1✔
398
    try:
1✔
399
        model_dir = os.path.dirname(version.model_file.path)
1✔
UNCOV
400
        os.chdir(model_dir)
×
401

UNCOV
402
        predict_path = version.predict_file.path
×
UNCOV
403
        model_path = version.model_file.path
×
404

UNCOV
405
        spec = importlib.util.spec_from_file_location("predict_module", predict_path)
×
UNCOV
406
        module = importlib.util.module_from_spec(spec)
×
UNCOV
407
        spec.loader.exec_module(module)
×
408

UNCOV
409
        if not hasattr(module, "predict"):
×
UNCOV
410
            raise Exception("predict() function missing in predict.py")
×
411

UNCOV
412
        sig = inspect.signature(module.predict)
×
UNCOV
413
        num_params = len(sig.parameters)
×
414

UNCOV
415
        if num_params == 1:
×
UNCOV
416
            output = module.predict(input_data)
×
UNCOV
417
        elif num_params == 2:
×
UNCOV
418
            output = module.predict(model_path, input_data)
×
419
        else:
UNCOV
420
            raise Exception(f"predict() has {num_params} parameters, expected 1 or 2")
×
421

UNCOV
422
        if _is_seek_error(output):
×
UNCOV
423
            if num_params == 1:
×
UNCOV
424
                output = module.predict(model_path)
×
425
            else:
UNCOV
426
                model_obj = _load_model_for_version(module, model_path)
×
UNCOV
427
                if model_obj:
×
UNCOV
428
                    output = module.predict(model_obj, input_data)
×
429

UNCOV
430
        return {"status": "ok", "output": output}
×
431

432
    except Exception as e:
1✔
433
        return {
1✔
434
            "status": "error",
435
            "error": str(e),
436
            "trace": traceback.format_exc(),
437
        }
438
    finally:
439
        os.chdir(original_cwd)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc