• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

gcivil-nyu-org / team4-wed-fall25 / 65

11 Nov 2025 04:07PM UTC coverage: 75.054% (+18.5%) from 56.582%
65

push

travis-pro

web-flow
Merge pull request #78 from gcivil-nyu-org/develop

Syncing main with develop

608 of 823 new or added lines in 15 files covered. (73.88%)

37 existing lines in 3 files now uncovered.

1047 of 1395 relevant lines covered (75.05%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

20.28
/note2webapp/utils.py
1
# note2webapp/utils.py
2
import os
1✔
3
import json
1✔
4
import shutil
1✔
5
import hashlib
1✔
6
import traceback
1✔
7
import importlib.util
1✔
8
import inspect
1✔
9

10
import torch
1✔
11
from django.conf import settings
1✔
12

13
# primitive types we can strictly validate for "custom" schemas
14
TYPE_MAP = {"float": float, "int": int, "str": str, "bool": bool}
1✔
15

16

17
# ---------------------------------------------------------------------
18
# 1. HASHING + MATERIALIZING + DELETING DIRECTORIES
19
# ---------------------------------------------------------------------
20
def sha256_uploaded_file(django_file):
1✔
21
    """
22
    Compute sha256 for an uploaded file (InMemory/Temporary) by streaming chunks.
23
    Used in views to detect duplicate uploads.
24
    """
25
    h = hashlib.sha256()
1✔
26
    for chunk in django_file.chunks():
1✔
27
        h.update(chunk)
1✔
28
    return h.hexdigest()
1✔
29

30

31
def sha256_file_path(path):
1✔
32
    """
33
    Same as above but for an existing file on disk.
34
    """
35
    h = hashlib.sha256()
1✔
36
    with open(path, "rb") as f:
1✔
37
        for chunk in iter(lambda: f.read(8192), b""):
1✔
38
            h.update(chunk)
1✔
39
    return h.hexdigest()
1✔
40

41

42
def materialize_version_to_media(version):
1✔
43
    """
44
    After a version PASSes validation, copy its files into:
45
      media/<category>/<model-name>/v<version_number>/
46
    with consistent filenames.
47
    """
NEW
48
    target_dir = os.path.join(
×
49
        settings.MEDIA_ROOT,
50
        version.category,
51
        version.upload.name,
52
        f"v{version.version_number}",
53
    )
NEW
54
    os.makedirs(target_dir, exist_ok=True)
×
55

56
    # model.pt
NEW
57
    if version.model_file and os.path.isfile(version.model_file.path):
×
NEW
58
        shutil.copy(version.model_file.path, os.path.join(target_dir, "model.pt"))
×
59

60
    # predict.py
NEW
61
    if version.predict_file and os.path.isfile(version.predict_file.path):
×
NEW
62
        shutil.copy(version.predict_file.path, os.path.join(target_dir, "predict.py"))
×
63

64
    # schema.json
NEW
65
    if version.schema_file and os.path.isfile(version.schema_file.path):
×
NEW
66
        shutil.copy(version.schema_file.path, os.path.join(target_dir, "schema.json"))
×
67

68

69
def delete_version_files_and_dir(version):
1✔
70
    """
71
    Delete the uploaded files (the ones stored by FileField)
72
    AND the materialized media/<category>/<model-name>/vX/ folder for this version.
73
    """
74
    # 1) delete uploaded files
75
    for f in [version.model_file, version.predict_file, version.schema_file]:
1✔
76
        if f and getattr(f, "path", None):
1✔
77
            try:
1✔
78
                if os.path.isfile(f.path):
1✔
NEW
79
                    os.remove(f.path)
×
NEW
80
            except Exception:
×
81
                # don't crash deletion
NEW
82
                pass
×
83

84
    # 2) delete materialized dir
85
    version_dir = os.path.join(
1✔
86
        settings.MEDIA_ROOT,
87
        version.category,
88
        version.upload.name,
89
        f"v{version.version_number}",
90
    )
91
    if os.path.isdir(version_dir):
1✔
NEW
92
        try:
×
NEW
93
            shutil.rmtree(version_dir)
×
NEW
94
        except Exception:
×
NEW
95
            pass
×
96

97

98
def delete_model_media_tree(model_upload):
1✔
99
    """
100
    Delete the whole dir for this model:
101
        media/<category>/<model-name>/
102
    We try known categories and the category of any existing version.
103
    """
104
    # try to read category from an existing version
NEW
105
    any_version = model_upload.versions.first()
×
NEW
106
    possible_categories = []
×
NEW
107
    if any_version:
×
NEW
108
        possible_categories.append(any_version.category)
×
109

110
    # also try your 3 fixed categories
NEW
111
    possible_categories.extend(["sentiment", "recommendation", "text-classification"])
×
112

NEW
113
    for cat in possible_categories:
×
NEW
114
        candidate = os.path.join(settings.MEDIA_ROOT, cat, model_upload.name)
×
NEW
115
        if os.path.isdir(candidate):
×
NEW
116
            try:
×
NEW
117
                shutil.rmtree(candidate)
×
NEW
118
            except Exception:
×
NEW
119
                pass
×
120

121

122
# ---------------------------------------------------------------------
123
# 2. SCHEMA BUILDERS
124
# ---------------------------------------------------------------------
125
def _make_value_from_simple_type(typ: str):
1✔
126
    """Used for the old/custom schema style."""
NEW
127
    if typ == "float":
×
NEW
128
        return 1.0
×
NEW
129
    if typ == "int":
×
NEW
130
        return 42
×
NEW
131
    if typ == "str":
×
NEW
132
        return "example"
×
NEW
133
    if typ == "bool":
×
NEW
134
        return True
×
NEW
135
    if typ == "object":
×
NEW
136
        return {}
×
NEW
137
    return None
×
138

139

140
def _build_from_custom_schema(schema: dict):
1✔
141
    """
142
    Handle schema like:
143
    {
144
      "input": { "text": "str", "age": "int" },
145
      "output": { "prediction": "float" }
146
    }
147
    """
UNCOV
148
    input_schema = schema.get("input", {})
×
UNCOV
149
    dummy = {}
×
UNCOV
150
    for key, typ in input_schema.items():
×
NEW
151
        if isinstance(typ, str):
×
NEW
152
            dummy[key] = _make_value_from_simple_type(typ)
×
NEW
153
        elif isinstance(typ, dict):
×
NEW
154
            nested = {}
×
NEW
155
            for k2, t2 in typ.items():
×
NEW
156
                if isinstance(t2, str):
×
NEW
157
                    nested[k2] = _make_value_from_simple_type(t2)
×
158
                else:
NEW
159
                    nested[k2] = None
×
NEW
160
            dummy[key] = nested
×
161
        else:
NEW
162
            dummy[key] = None
×
UNCOV
163
    return dummy, schema.get("output", {})
×
164

165

166
def _build_from_json_schema(schema: dict):
1✔
167
    """
168
    Handle real JSON Schema style:
169
    {
170
      "type": "object",
171
      "required": ["text"],
172
      "properties": {
173
        "text": { "type": "string", "example": "This is great!" }
174
      }
175
    }
176
    We:
177
      - use example if present
178
      - else fill a reasonable default from "type"
179
    """
NEW
180
    props = schema.get("properties", {}) or {}
×
NEW
181
    data = {}
×
NEW
182
    for name, prop in props.items():
×
NEW
183
        if not isinstance(prop, dict):
×
NEW
184
            data[name] = "example"
×
NEW
185
            continue
×
186

NEW
187
        if "example" in prop and prop["example"] is not None:
×
NEW
188
            data[name] = prop["example"]
×
NEW
189
            continue
×
190

NEW
191
        ptype = prop.get("type")
×
NEW
192
        if ptype == "string":
×
NEW
193
            data[name] = "example text"
×
NEW
194
        elif ptype == "number":
×
NEW
195
            data[name] = 1.0
×
NEW
196
        elif ptype == "integer":
×
NEW
197
            data[name] = 1
×
NEW
198
        elif ptype == "boolean":
×
NEW
199
            data[name] = True
×
NEW
200
        elif ptype == "object":
×
NEW
201
            data[name] = {}
×
NEW
202
        elif ptype == "array":
×
NEW
203
            data[name] = []
×
204
        else:
NEW
205
            data[name] = "example"
×
206

NEW
207
    return data, None
×
208

209

210
def generate_input_and_output_schema(schema_path: str):
1✔
211
    """
212
    Decide which schema style we got and build an input dict from it.
213
    Returns (input_data: dict, output_schema: dict|None)
214
    """
NEW
215
    with open(schema_path, "r") as f:
×
NEW
216
        schema = json.load(f)
×
217

218
    # 1) wrapped format: { "input": {...}, "output": {...} }
NEW
219
    if "input" in schema:
×
NEW
220
        input_schema = schema["input"]
×
221

222
        # maybe they put JSON Schema INSIDE "input"
NEW
223
        if isinstance(input_schema, dict) and "properties" in input_schema:
×
NEW
224
            return _build_from_json_schema(input_schema)
×
225
        else:
NEW
226
            return _build_from_custom_schema(schema)
×
227

228
    # 2) direct JSON Schema
NEW
229
    if "properties" in schema or schema.get("type") == "object":
×
NEW
230
        return _build_from_json_schema(schema)
×
231

232
    # 3) fallback
NEW
233
    return {}, None
×
234

235

236
# ---------------------------------------------------------------------
237
# 3. MODEL LOADING HELPERS
238
# ---------------------------------------------------------------------
239
def _load_model_for_version(module, model_path):
1✔
240
    """
241
    Best-effort loader for validation.
242
    1) try torch.load
243
    2) try user's _load_model(...)
244
    """
245
    # 1) try torch.load
NEW
246
    try:
×
NEW
247
        return torch.load(model_path, map_location="cpu")
×
NEW
248
    except Exception:
×
NEW
249
        pass
×
250

251
    # 2) try user's loader
NEW
252
    if hasattr(module, "_load_model") and callable(module._load_model):
×
NEW
253
        try:
×
NEW
254
            sig = inspect.signature(module._load_model)
×
NEW
255
            n = len(sig.parameters)
×
NEW
256
            if n == 0:
×
NEW
257
                return module._load_model()
×
NEW
258
            elif n == 1:
×
NEW
259
                return module._load_model(model_path)
×
NEW
260
        except Exception:
×
NEW
261
            pass
×
262

NEW
263
    return None
×
264

265

266
def _is_seek_error(out):
1✔
267
    """
268
    Detect that PyTorch "dict has no attribute 'seek'" message.
269
    """
NEW
270
    return (
×
271
        isinstance(out, dict)
272
        and "error" in out
273
        and "no attribute 'seek'" in str(out["error"])
274
    )
275

276

277
# ---------------------------------------------------------------------
278
# 4. VALIDATION
279
# ---------------------------------------------------------------------
280
def validate_model(version):
1✔
281
    """
282
    1. import version's predict.py
283
    2. build input from uploaded schema (custom or JSON Schema)
284
    3. call predict(...) with correct number of args
285
    4. if PASS: materialize to media/<cat>/<model>/vX/
286
    5. if FAIL: store traceback
287
    """
NEW
288
    original_cwd = os.getcwd()
×
UNCOV
289
    try:
×
UNCOV
290
        model_dir = os.path.dirname(version.model_file.path)
×
291
        os.chdir(model_dir)
×
292

293
        # import predict.py
294
        spec = importlib.util.spec_from_file_location(
×
295
            "predict", version.predict_file.path
296
        )
297
        module = importlib.util.module_from_spec(spec)
×
298
        spec.loader.exec_module(module)
×
299

300
        if not hasattr(module, "predict"):
×
301
            raise Exception("predict() function missing in predict.py")
×
302

UNCOV
303
        if not version.schema_file:
×
304
            raise Exception("No schema file provided")
×
305

306
        # build input from schema
NEW
307
        input_data, output_schema = generate_input_and_output_schema(
×
308
            version.schema_file.path
309
        )
310

311
        # inspect signature
NEW
312
        sig = inspect.signature(module.predict)
×
NEW
313
        num_params = len(sig.parameters)
×
314

315
        # call predict
NEW
316
        if num_params == 1:
×
NEW
317
            result = module.predict(input_data)
×
NEW
318
        elif num_params == 2:
×
NEW
319
            result = module.predict(version.model_file.path, input_data)
×
320
        else:
NEW
321
            raise Exception(f"predict() has {num_params} parameters, expected 1 or 2.")
×
322

323
        # try to fix common torch.load seek error
NEW
324
        if _is_seek_error(result):
×
NEW
325
            try:
×
NEW
326
                if num_params == 1:
×
NEW
327
                    result = module.predict(version.model_file.path)
×
NEW
328
                elif num_params == 2:
×
NEW
329
                    model_obj = _load_model_for_version(module, version.model_file.path)
×
NEW
330
                    if model_obj is not None:
×
NEW
331
                        result = module.predict(model_obj, input_data)
×
NEW
332
            except Exception:
×
NEW
333
                pass
×
334

NEW
335
        if not isinstance(result, dict):
×
UNCOV
336
            raise Exception("predict() must return a dict")
×
337

338
        # If result says error, we mark FAIL
NEW
339
        if "error" in result and result.get("prediction") is None:
×
NEW
340
            raise Exception(f"Prediction error: {result['error']}")
×
341

342
        # Strict output checking only for simple custom schema
NEW
343
        do_strict = (
×
344
            isinstance(output_schema, dict)
345
            and output_schema
346
            and all(
347
                isinstance(v, str) and v in TYPE_MAP for v in output_schema.values()
348
            )
349
        )
NEW
350
        if do_strict:
×
NEW
351
            for key, typ in output_schema.items():
×
NEW
352
                if key not in result:
×
NEW
353
                    raise Exception(f"Missing key in output: {key}")
×
NEW
354
                if not isinstance(result[key], TYPE_MAP[typ]):
×
NEW
355
                    raise Exception(
×
356
                        f"Wrong type for '{key}': expected {typ}, got {type(result[key]).__name__}"
357
                    )
358

359
        # success
360
        version.status = "PASS"
×
NEW
361
        version.log = (
×
362
            "✅ Validation Successful\n\n"
363
            "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
364
            "INPUT (from schema):\n"
365
            f"{json.dumps(input_data, indent=2)}\n\n"
366
            "OUTPUT (from predict()):\n"
367
            f"{json.dumps(result, indent=2)}\n"
368
            "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
369
        )
370

371
        # materialize now
NEW
372
        materialize_version_to_media(version)
×
373

374
    except Exception:
×
375
        version.status = "FAIL"
×
NEW
376
        version.log = (
×
377
            "❌ Validation Failed\n\n"
378
            "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
379
            f"{traceback.format_exc()}\n"
380
            "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
381
        )
382
    finally:
NEW
383
        os.chdir(original_cwd)
×
384

385
    version.save()
×
386
    return version
×
387

388

389
# ---------------------------------------------------------------------
390
# 5. TEST MODEL ON CPU (manual testing from UI)
391
# ---------------------------------------------------------------------
392
def test_model_on_cpu(version, input_data):
1✔
393
    """
394
    Called from the test page.
395
    Handles predict(input) and predict(model_path, input).
396
    """
397
    original_cwd = os.getcwd()
1✔
398
    try:
1✔
399
        model_dir = os.path.dirname(version.model_file.path)
1✔
NEW
400
        os.chdir(model_dir)
×
401

402
        predict_path = version.predict_file.path
×
403
        model_path = version.model_file.path
×
404

UNCOV
405
        spec = importlib.util.spec_from_file_location("predict_module", predict_path)
×
406
        module = importlib.util.module_from_spec(spec)
×
407
        spec.loader.exec_module(module)
×
408

NEW
409
        if not hasattr(module, "predict"):
×
NEW
410
            raise Exception("predict() function missing in predict.py")
×
411

NEW
412
        sig = inspect.signature(module.predict)
×
NEW
413
        num_params = len(sig.parameters)
×
414

NEW
415
        if num_params == 1:
×
NEW
416
            output = module.predict(input_data)
×
NEW
417
        elif num_params == 2:
×
NEW
418
            output = module.predict(model_path, input_data)
×
419
        else:
NEW
420
            raise Exception(f"predict() has {num_params} parameters, expected 1 or 2")
×
421

NEW
422
        if _is_seek_error(output):
×
NEW
423
            if num_params == 1:
×
NEW
424
                output = module.predict(model_path)
×
425
            else:
NEW
426
                model_obj = _load_model_for_version(module, model_path)
×
NEW
427
                if model_obj:
×
NEW
428
                    output = module.predict(model_obj, input_data)
×
429

430
        return {"status": "ok", "output": output}
×
431

432
    except Exception as e:
1✔
433
        return {
1✔
434
            "status": "error",
435
            "error": str(e),
436
            "trace": traceback.format_exc(),
437
        }
438
    finally:
439
        os.chdir(original_cwd)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc