• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tcalmant / python-javaobj / 26725838468

31 May 2026 10:06PM UTC coverage: 78.709% (+0.008%) from 78.701%
26725838468

Pull #63

github

web-flow
Merge ad1ebc8ca into 519fc2167
Pull Request #63: Addition of a v3 package

808 of 1023 new or added lines in 7 files covered. (78.98%)

2403 of 3053 relevant lines covered (78.71%)

4.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.88
/javaobj/v3/reader.py
1
#!/usr/bin/env python3
2
"""
3
Low-level typed binary reader for the Java Object Serialization stream format
4

5
:authors: Thomas Calmant
6
:license: Apache License 2.0
7
:version: 0.5.0
8
:status: Alpha
9

10
..
11

12
    Copyright 2026 Thomas Calmant
13

14
    Licensed under the Apache License, Version 2.0 (the "License");
15
    you may not use this file except in compliance with the License.
16
    You may obtain a copy of the License at
17

18
        http://www.apache.org/licenses/LICENSE-2.0
19

20
    Unless required by applicable law or agreed to in writing, software
21
    distributed under the License is distributed on an "AS IS" BASIS,
22
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23
    See the License for the specific language governing permissions and
24
    limitations under the License.
25
"""
26

27
# Standard library
28
import struct
3✔
29
from typing import IO
3✔
30

31
# Javaobj
32
from ..modifiedutf8 import decode_modified_utf8
3✔
33
from .exceptions import ParseError
3✔
34

35
# ------------------------------------------------------------------------------
36

37
# Module version
38
__version_info__ = (0, 5, 0)
3✔
39
__version__ = ".".join(str(x) for x in __version_info__)
3✔
40

41
# Documentation strings format
42
__docformat__ = "restructuredtext en"
3✔
43

44
# ------------------------------------------------------------------------------
45

46
__all__ = ["DataReader"]
3✔
47

48

49
class DataReader:
3✔
50
    """
51
    Typed binary stream reader for the Java Object Serialization protocol.
52

53
    The reader tracks the current stream offset so that :class:`ParseError`
54
    messages can always pinpoint the exact location of a problem.
55

56
    Safety limits prevent allocation attacks:
57

58
    * ``max_array_size`` – maximum number of bytes that a single array or
59
      bulk-read operation may allocate (default 100 MiB).
60
    * ``max_string_size`` – maximum byte length accepted for TC_LONGSTRING
61
      payloads (default 100 MiB).  Normal TC_STRING payloads are limited to
62
      65 535 bytes by the 2-byte length field.
63
    """
64

65
    __slots__ = ("_fd", "_offset", "_max_array_size", "_max_string_size")
3✔
66

67
    #: Default limit on a single array allocation (100 MiB).
68
    DEFAULT_MAX_ARRAY_SIZE: int = 100 * 1024 * 1024
3✔
69

70
    #: Default recursion depth limit for the parser (not enforced here but
71
    #: stored as a convenience constant used by :class:`JavaStreamParser`).
72
    DEFAULT_MAX_DEPTH: int = 500
3✔
73

74
    #: Default limit for TC_LONGSTRING payloads (100 MiB).
75
    DEFAULT_MAX_STRING_SIZE: int = 100 * 1024 * 1024
3✔
76

77
    def __init__(
3✔
78
        self,
79
        fd: IO[bytes],
80
        *,
81
        max_array_size: int = DEFAULT_MAX_ARRAY_SIZE,
82
        max_string_size: int = DEFAULT_MAX_STRING_SIZE,
83
    ) -> None:
84
        """
85
        :param fd: A readable binary file-like object.
86
        :param max_array_size: Maximum bytes for bulk array reads.
87
        :param max_string_size: Maximum bytes for TC_LONGSTRING payloads.
88
        """
89
        self._fd = fd
3✔
90
        self._offset: int = 0
3✔
91
        self._max_array_size = max_array_size
3✔
92
        self._max_string_size = max_string_size
3✔
93

94
    # ------------------------------------------------------------------
95
    # Properties
96
    # ------------------------------------------------------------------
97

98
    @property
3✔
99
    def offset(self) -> int:
3✔
100
        """Current byte offset in the stream (read-only)."""
101
        return self._offset
3✔
102

103
    # ------------------------------------------------------------------
104
    # Raw I/O
105
    # ------------------------------------------------------------------
106

107
    def read_bytes(self, n: int) -> bytes:
3✔
108
        """
109
        Reads exactly *n* bytes from the stream.
110

111
        :raises EOFError: If fewer than *n* bytes are available.
112
        """
113
        data = self._fd.read(n)
3✔
114
        if len(data) != n:
3✔
115
            raise EOFError(
3✔
116
                f"Unexpected end of stream: expected {n} bytes, got {len(data)} at offset 0x{self._offset:x}"
117
            )
118
        self._offset += n
3✔
119
        return data
3✔
120

121
    def read_struct(self, fmt: str) -> tuple:
3✔
122
        """
123
        Reads and unpacks a :mod:`struct` format string.
124

125
        :param fmt: A struct format string (e.g. ``">i"`` for big-endian int).
126
        :return: The unpacked tuple of values.
127
        """
128
        size = struct.calcsize(fmt)
3✔
129
        data = self.read_bytes(size)
3✔
130
        return struct.unpack(fmt, data)
3✔
131

132
    # ------------------------------------------------------------------
133
    # Java primitive types
134
    # ------------------------------------------------------------------
135

136
    def read_bool(self) -> bool:
3✔
137
        """Reads a Java ``boolean`` (1 byte)."""
138
        return bool(self.read_struct(">B")[0])
3✔
139

140
    def read_byte(self) -> int:
3✔
141
        """Reads a Java signed ``byte`` (1 byte, -128 … 127)."""
NEW
142
        return self.read_struct(">b")[0]
×
143

144
    def read_ubyte(self) -> int:
3✔
145
        """Reads an unsigned byte (1 byte, 0 … 255)."""
146
        return self.read_struct(">B")[0]
3✔
147

148
    def read_short(self) -> int:
3✔
149
        """Reads a Java ``short`` (2 bytes, signed)."""
150
        return self.read_struct(">h")[0]
3✔
151

152
    def read_ushort(self) -> int:
3✔
153
        """Reads an unsigned ``short`` (2 bytes)."""
154
        return self.read_struct(">H")[0]
3✔
155

156
    def read_int(self) -> int:
3✔
157
        """Reads a Java ``int`` (4 bytes, signed)."""
158
        return self.read_struct(">i")[0]
3✔
159

160
    def read_long(self) -> int:
3✔
161
        """Reads a Java ``long`` (8 bytes, signed)."""
162
        return self.read_struct(">q")[0]
3✔
163

164
    def read_float(self) -> float:
3✔
165
        """Reads a Java ``float`` (4 bytes, IEEE 754 single-precision)."""
166
        return self.read_struct(">f")[0]
3✔
167

168
    def read_double(self) -> float:
3✔
169
        """Reads a Java ``double`` (8 bytes, IEEE 754 double-precision)."""
NEW
170
        return self.read_struct(">d")[0]
×
171

172
    def read_char(self) -> str:
3✔
173
        """
174
        Reads a Java ``char`` (2 bytes, unsigned UTF-16 code unit) and returns
175
        the corresponding Python :class:`str` character.
176
        """
177
        return chr(self.read_struct(">H")[0])
3✔
178

179
    # ------------------------------------------------------------------
180
    # Java string types (Modified UTF-8)
181
    # ------------------------------------------------------------------
182

183
    def read_utf(self) -> str:
3✔
184
        """
185
        Reads a Java ``UTF`` string: 2-byte unsigned length followed by
186
        Modified UTF-8 encoded bytes.
187
        """
188
        length = self.read_ushort()
3✔
189
        return self._read_mutf8(length)
3✔
190

191
    def read_long_utf(self) -> str:
3✔
192
        """
193
        Reads a Java long ``UTF`` string: 8-byte signed length followed by
194
        Modified UTF-8 encoded bytes.
195

196
        :raises ParseError: If the declared length exceeds ``max_string_size``
197
                            or is negative.
198
        """
NEW
199
        length = self.read_long()
×
NEW
200
        if length < 0 or length > self._max_string_size:
×
NEW
201
            raise ParseError(
×
202
                f"TC_LONGSTRING: invalid length {length} (limit is {self._max_string_size} bytes)",
203
                self._offset,
204
            )
NEW
205
        return self._read_mutf8(length)
×
206

207
    def _read_mutf8(self, length: int) -> str:
3✔
208
        """
209
        Decodes *length* raw bytes as Modified UTF-8.
210

211
        :param length: Number of bytes to read from the stream.
212
        :return: The decoded Python :class:`str`.
213
        :raises ParseError: If the bytes cannot be decoded.
214
        """
215
        data = self.read_bytes(length)
3✔
216
        try:
3✔
217
            value, _ = decode_modified_utf8(data)
3✔
NEW
218
        except UnicodeDecodeError as exc:
×
NEW
219
            raise ParseError(
×
220
                f"Modified UTF-8 decoding failed: {exc}",
221
                self._offset - length,
222
            ) from exc
223
        return value
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc