• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

peteretelej / md-server / 17193676866

24 Aug 2025 08:56PM UTC coverage: 91.351% (-8.0%) from 99.312%
17193676866

Pull #7

github

web-flow
Merge 8ac64f79f into e594bd672
Pull Request #7: WIP: Python SDK

99 of 109 branches covered (90.83%)

Branch coverage included in aggregate %.

384 of 436 new or added lines in 13 files covered. (88.07%)

8 existing lines in 2 files now uncovered.

746 of 816 relevant lines covered (91.42%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.94
/src/md_server/core/validation.py
1
from urllib.parse import urlparse
1✔
2
from typing import Optional
1✔
3

4

5
class ValidationError(Exception):
1✔
6
    def __init__(self, message: str, details: Optional[dict] = None):
1✔
7
        super().__init__(message)
1✔
8
        self.details = details or {}
1✔
9

10

11
class URLValidator:
1✔
12
    @classmethod
1✔
13
    def validate_url(cls, url: str) -> str:
1✔
14
        if not url or not url.strip():
1✔
15
            raise ValidationError("URL cannot be empty")
1✔
16

17
        url = url.strip()
1✔
18
        parsed = urlparse(url)
1✔
19

20
        if not parsed.scheme:
1✔
21
            raise ValidationError("Invalid URL format")
1✔
22

23
        if parsed.scheme.lower() not in ["http", "https"]:
1✔
24
            raise ValidationError("Only HTTP/HTTPS URLs allowed")
1✔
25

26
        if not parsed.netloc:
1✔
27
            raise ValidationError("Invalid URL format")
1✔
28

29
        return url
1✔
30

31

32
class FileSizeValidator:
1✔
33
    DEFAULT_MAX_SIZE = 50 * 1024 * 1024  # 50MB default
1✔
34

35
    FORMAT_LIMITS = {
1✔
36
        "application/pdf": 50 * 1024 * 1024,  # 50MB for PDFs
37
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document": 25
38
        * 1024
39
        * 1024,  # 25MB for DOCX
40
        "application/vnd.openxmlformats-officedocument.presentationml.presentation": 25
41
        * 1024
42
        * 1024,  # 25MB for PPTX
43
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": 25
44
        * 1024
45
        * 1024,  # 25MB for XLSX
46
        "text/plain": 10 * 1024 * 1024,  # 10MB for text
47
        "text/html": 10 * 1024 * 1024,  # 10MB for HTML
48
        "text/markdown": 10 * 1024 * 1024,  # 10MB for markdown
49
        "application/json": 5 * 1024 * 1024,  # 5MB for JSON
50
        "image/png": 20 * 1024 * 1024,  # 20MB for images
51
        "image/jpeg": 20 * 1024 * 1024,  # 20MB for images
52
        "image/jpg": 20 * 1024 * 1024,  # 20MB for images
53
    }
54

55
    @classmethod
1✔
56
    def validate_size(
1✔
57
        cls,
58
        content_size: int,
59
        content_type: Optional[str] = None,
60
        max_size_mb: Optional[int] = None,
61
    ) -> None:
62
        if content_size <= 0:
1✔
63
            return
1✔
64

65
        # Use custom limit if provided, otherwise use format-specific limit
66
        if max_size_mb:
1✔
67
            limit = max_size_mb * 1024 * 1024
1✔
68
        else:
69
            limit = cls.FORMAT_LIMITS.get(content_type or "", cls.DEFAULT_MAX_SIZE)
1✔
70

71
        if content_size > limit:
1✔
72
            limit_mb = limit / (1024 * 1024)
1✔
73
            actual_mb = content_size / (1024 * 1024)
1✔
74
            raise ValidationError(
1✔
75
                f"File size {actual_mb:.1f}MB exceeds limit of {limit_mb:.0f}MB for {content_type or 'this format'}",
76
                {
77
                    "file_size": content_size,
78
                    "limit": limit,
79
                    "content_type": content_type,
80
                },
81
            )
82

83

84
class MimeTypeValidator:
1✔
85
    @classmethod
1✔
86
    def validate_mime_type(cls, mime_type: str) -> str:
1✔
87
        if not mime_type:
1✔
88
            raise ValidationError("MIME type cannot be empty")
1✔
89

90
        if len(mime_type) > 100:
1✔
91
            raise ValidationError("MIME type too long (max 100 characters)")
1✔
92

93
        if "/" not in mime_type:
1✔
94
            raise ValidationError("MIME type must contain '/' separator")
1✔
95

96
        if ".." in mime_type or "\\" in mime_type:
1✔
97
            raise ValidationError("Invalid characters in MIME type")
1✔
98

99
        if mime_type.count("/") != 1:
1✔
100
            raise ValidationError("MIME type must contain exactly one '/' separator")
1✔
101

102
        return mime_type.strip().lower()
1✔
103

104

105
class ContentValidator:
1✔
106
    # Magic byte signatures for file type detection
107
    MAGIC_BYTES = {
1✔
108
        b"\x25\x50\x44\x46": "application/pdf",  # PDF
109
        b"\x50\x4b\x03\x04": "application/zip",  # ZIP (includes DOCX, XLSX, PPTX)
110
        b"\x50\x4b\x05\x06": "application/zip",  # Empty ZIP
111
        b"\x50\x4b\x07\x08": "application/zip",  # ZIP
112
        b"\x89\x50\x4e\x47": "image/png",  # PNG
113
        b"\xff\xd8\xff": "image/jpeg",  # JPEG
114
        b"\x47\x49\x46\x38": "image/gif",  # GIF
115
        b"\x52\x49\x46\x46": "audio/wav",  # WAV (RIFF)
116
        b"\x49\x44\x33": "audio/mp3",  # MP3 with ID3
117
        b"\xff\xfb": "audio/mp3",  # MP3
118
        b"\x3c\x3f\x78\x6d\x6c": "application/xml",  # XML <?xml
119
        b"\x3c\x68\x74\x6d\x6c": "text/html",  # HTML <html
120
        b"\x3c\x21\x44\x4f\x43\x54\x59\x50\x45": "text/html",  # HTML <!DOCTYPE
121
    }
122

123
    @classmethod
1✔
124
    def detect_content_type(cls, content: bytes) -> str:
1✔
125
        if not content:
1✔
126
            return "application/octet-stream"
1✔
127

128
        for magic, content_type in cls.MAGIC_BYTES.items():
1✔
129
            if content.startswith(magic):
1✔
130
                return content_type
1✔
131

132
        try:
1✔
133
            content[:1024].decode("utf-8")
1✔
134
            return "text/plain"
1✔
135
        except UnicodeDecodeError:
1✔
136
            pass
1✔
137

138
        return "application/octet-stream"
1✔
139

140
    @classmethod
1✔
141
    def validate_content_type(
1✔
142
        cls, content: bytes, declared_type: Optional[str] = None
143
    ) -> str:
144
        detected_type = cls.detect_content_type(content)
1✔
145

146
        if not declared_type:
1✔
147
            return detected_type
×
148

149
        # Handle Office documents (ZIP-based formats)
150
        if detected_type == "application/zip" and declared_type in [
1✔
151
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
152
            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
153
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
154
        ]:
155
            return declared_type
1✔
156

157
        if detected_type == "application/octet-stream":
1✔
158
            return declared_type
×
159

160
        # For text types, be more permissive as detection can be inaccurate
161
        if declared_type.startswith("text/") and detected_type == "text/plain":
1✔
NEW
162
            return declared_type
×
163

164
        # Strict matching for security-sensitive binary types only
165
        security_sensitive = ["application/pdf", "image/png", "image/jpeg"]
1✔
166
        if declared_type in security_sensitive and detected_type != declared_type:
1✔
167
            raise ValidationError(
1✔
168
                f"Content type mismatch: declared {declared_type} but detected {detected_type}",
169
                {"declared": declared_type, "detected": detected_type},
170
            )
171

172
        return declared_type
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc