• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tcalmant / python-javaobj / 26727166173

31 May 2026 11:08PM UTC coverage: 78.962% (+0.5%) from 78.48%
26727166173

push

github

web-flow
Merge pull request #64 from tcalmant/v3-writer

Initial version of v3 marshaller

187 of 226 new or added lines in 2 files covered. (82.74%)

2586 of 3275 relevant lines covered (78.96%)

4.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.67
/javaobj/v3/writer.py
1
#!/usr/bin/env python3
2
"""
3
Serializer for the Java Object Serialization stream format (v3)
4

5
Produces a byte stream readable by Java's ``ObjectInputStream`` from v3 bean
6
objects (:class:`~javaobj.v3.beans.JavaInstance`, :class:`~javaobj.v3.beans.JavaArray`,
7
etc.).
8

9
:authors: Thomas Calmant
10
:license: Apache License 2.0
11
:version: 0.5.0
12
:status: Alpha
13

14
..
15

16
    Copyright 2026 Thomas Calmant
17

18
    Licensed under the Apache License, Version 2.0 (the "License");
19
    you may not use this file except in compliance with the License.
20
    You may obtain a copy of the License at
21

22
        http://www.apache.org/licenses/LICENSE-2.0
23

24
    Unless required by applicable law or agreed to in writing, software
25
    distributed under the License is distributed on an "AS IS" BASIS,
26
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
27
    See the License for the specific language governing permissions and
28
    limitations under the License.
29
"""
30

31
# Standard library
32
import logging
3✔
33
import struct
3✔
34
from io import BytesIO
3✔
35
from typing import IO, Any
3✔
36

37
# Javaobj
38
from ..constants import StreamConstants, TerminalCode
3✔
39
from .beans import (
3✔
40
    BlockData,
41
    ClassDataType,
42
    ClassDescType,
43
    FieldType,
44
    JavaArray,
45
    JavaClass,
46
    JavaClassDesc,
47
    JavaEnum,
48
    JavaInstance,
49
    JavaString,
50
    ParsedContent,
51
)
52
from .exceptions import UnsupportedFeatureError
3✔
53

54
# ------------------------------------------------------------------------------
55

56
# Module version
57
__version_info__ = (0, 5, 0)
3✔
58
__version__ = ".".join(str(x) for x in __version_info__)
3✔
59

60
# Documentation strings format
61
__docformat__ = "restructuredtext en"
3✔
62

63
# ------------------------------------------------------------------------------
64

65
__all__ = ["JavaStreamWriter", "dump", "dumps"]
3✔
66

67
_log = logging.getLogger("javaobj.v3.writer")
3✔
68

69
# ------------------------------------------------------------------------------
70
# Modified UTF-8 encoder
71
# ------------------------------------------------------------------------------
72

73

74
def _encode_mutf8(string: str) -> bytes:
3✔
75
    """
76
    Encodes a Unicode string to Java Modified UTF-8 bytes.
77

78
    Differences from standard UTF-8:
79

80
    * The null character (U+0000) is encoded as two bytes ``\\xC0\\x80``
81
      instead of a single zero byte.
82
    * Supplementary characters (U+10000–U+10FFFF) are encoded as two
83
      three-byte surrogate-pair sequences (six bytes total) instead of the
84
      standard four-byte encoding.
85
    """
86
    out = bytearray()
3✔
87
    for char in string:
3✔
88
        cp = ord(char)
3✔
89
        if cp == 0x0000:
3✔
90
            # Modified UTF-8: null → 0xC0 0x80
91
            out += b"\xc0\x80"
3✔
92
        elif cp <= 0x007F:
3✔
93
            out.append(cp)
3✔
94
        elif cp <= 0x07FF:
3✔
NEW
95
            out += bytes([0xC0 | (cp >> 6), 0x80 | (cp & 0x3F)])
×
96
        elif cp <= 0xFFFF:
3✔
97
            out += bytes(
3✔
98
                [
99
                    0xE0 | (cp >> 12),
100
                    0x80 | ((cp >> 6) & 0x3F),
101
                    0x80 | (cp & 0x3F),
102
                ]
103
            )
104
        else:
105
            # Supplementary character: encode as surrogate pair, each as a
106
            # 3-byte modified-UTF-8 sequence (6 bytes total).
107
            cp -= 0x10000
3✔
108
            high = 0xD800 | (cp >> 10)
3✔
109
            low = 0xDC00 | (cp & 0x3FF)
3✔
110
            out += bytes(
3✔
111
                [
112
                    0xED,
113
                    0xA0 | ((high >> 6) & 0x0F),
114
                    0x80 | (high & 0x3F),
115
                    0xED,
116
                    0xB0 | ((low >> 6) & 0x0F),
117
                    0x80 | (low & 0x3F),
118
                ]
119
            )
120
    return bytes(out)
3✔
121

122

123
# ------------------------------------------------------------------------------
124
# Writer
125
# ------------------------------------------------------------------------------
126

127

128
class JavaStreamWriter:
3✔
129
    """
130
    Serializes v3 bean objects to the Java Object Serialization stream format.
131

132
    The generated stream is fully compatible with Java's ``ObjectInputStream``.
133

134
    Usage::
135

136
        with open("out.ser", "wb") as fd:
137
            writer = JavaStreamWriter(fd)
138
            writer.write_stream(my_instance)
139

140
    Or using the module-level helpers::
141

142
        data = javaobj.v3.dumps(my_instance)
143
        javaobj.v3.dump(fd, my_instance)
144
    """
145

146
    def __init__(self, fd: IO[bytes]) -> None:
3✔
147
        self._fd = fd
3✔
148
        # Maps id(obj) → allocated handle (int, starting at BASE_REFERENCE_IDX)
149
        self._handle_map: dict[int, int] = {}
3✔
150
        self._next_handle: int = int(StreamConstants.BASE_REFERENCE_IDX)
3✔
151
        # Cached JavaString wrappers for class-name strings found inside
152
        # JavaField descriptors.  Keyed by the string value so that identical
153
        # class names (e.g. "Ljava/lang/String;") are written only once and
154
        # referenced thereafter.
155
        self._classname_strings: dict[str, JavaString] = {}
3✔
156

157
    # ------------------------------------------------------------------
158
    # Public entry points
159
    # ------------------------------------------------------------------
160

161
    def write_stream(self, *objects: ParsedContent) -> None:
3✔
162
        """
163
        Writes the Java serialization magic header followed by one or more
164
        top-level content objects.
165

166
        Call this exactly once to produce a complete, self-contained stream.
167

168
        :param objects: Top-level objects to write.  Pass several to create
169
                        a stream that requires multiple ``readObject()`` calls
170
                        on the Java side.
171
        :raises UnsupportedFeatureError: If an object type cannot be
172
                                         serialized (e.g. externalizable
173
                                         Protocol-v1 classes).
174
        """
175
        self._write_header()
3✔
176
        for obj in objects:
3✔
177
            self._write_content(obj)
3✔
178

179
    # ------------------------------------------------------------------
180
    # Stream header
181
    # ------------------------------------------------------------------
182

183
    def _write_header(self) -> None:
3✔
184
        self._fd.write(
3✔
185
            struct.pack(
186
                ">HH",
187
                int(StreamConstants.STREAM_MAGIC),
188
                int(StreamConstants.STREAM_VERSION),
189
            )
190
        )
191

192
    # ------------------------------------------------------------------
193
    # Handle management
194
    # ------------------------------------------------------------------
195

196
    def _alloc_handle(self, obj: Any) -> int:
3✔
197
        """Allocates and records the next handle for *obj*."""
198
        h = self._next_handle
3✔
199
        self._next_handle += 1
3✔
200
        self._handle_map[id(obj)] = h
3✔
201
        _log.debug("Allocated handle 0x%x for %s", h, type(obj).__name__)
3✔
202
        return h
3✔
203

204
    def _try_reference(self, obj: Any) -> bool:
3✔
205
        """
206
        Emits ``TC_REFERENCE`` for *obj* if it was already written.
207

208
        :return: ``True`` when a reference was written and the caller must
209
                 **not** write the object again; ``False`` otherwise.
210
        """
211
        h = self._handle_map.get(id(obj))
3✔
212
        if h is None:
3✔
213
            return False
3✔
214
        _log.debug("TC_REFERENCE 0x%x for %s", h, type(obj).__name__)
3✔
215
        self._fd.write(struct.pack(">Bi", int(TerminalCode.TC_REFERENCE), h))
3✔
216
        return True
3✔
217

218
    # ------------------------------------------------------------------
219
    # Content dispatcher
220
    # ------------------------------------------------------------------
221

222
    def _write_content(self, obj: ParsedContent) -> None:
3✔
223
        """Writes a single content item — any valid v3 bean or ``None``."""
224
        match obj:
3✔
225
            case None:
3✔
226
                self._write_null()
3✔
227
            case JavaInstance():
3✔
228
                self._write_instance(obj)
3✔
229
            case JavaArray():
3✔
230
                self._write_array(obj)
3✔
231
            case JavaString():
3✔
232
                self._write_string_obj(obj)
3✔
233
            case JavaEnum():
3✔
234
                self._write_enum(obj)
3✔
235
            case JavaClass():
3✔
236
                self._write_class(obj)
3✔
237
            case BlockData():
3✔
238
                self._write_blockdata(obj)
3✔
NEW
239
            case JavaClassDesc():
×
240
                # A bare class descriptor written directly to the stream
241
                # (rare but valid at the top level).
NEW
242
                self._write_classdesc(obj)
×
NEW
243
            case _:
×
NEW
244
                raise UnsupportedFeatureError(f"Cannot serialize object of type {type(obj).__name__!r}")
×
245

246
    # ------------------------------------------------------------------
247
    # TC_NULL
248
    # ------------------------------------------------------------------
249

250
    def _write_null(self) -> None:
3✔
251
        self._fd.write(bytes([int(TerminalCode.TC_NULL)]))
3✔
252

253
    # ------------------------------------------------------------------
254
    # TC_OBJECT
255
    # ------------------------------------------------------------------
256

257
    def _write_instance(self, instance: JavaInstance) -> None:
3✔
258
        if self._try_reference(instance):
3✔
NEW
259
            return
×
260
        self._fd.write(bytes([int(TerminalCode.TC_OBJECT)]))
3✔
261
        self._write_classdesc(instance.classdesc)
3✔
262
        self._alloc_handle(instance)
3✔
263
        self._write_class_data(instance)
3✔
264

265
    # ------------------------------------------------------------------
266
    # TC_ARRAY
267
    # ------------------------------------------------------------------
268

269
    def _write_array(self, array: JavaArray) -> None:
3✔
270
        if self._try_reference(array):
3✔
NEW
271
            return
×
272
        self._fd.write(bytes([int(TerminalCode.TC_ARRAY)]))
3✔
273
        self._write_classdesc(array.classdesc)
3✔
274
        self._alloc_handle(array)
3✔
275

276
        data = array.data
3✔
277
        self._fd.write(struct.pack(">i", len(data)))
3✔
278

279
        et = array.element_type
3✔
280
        if et == FieldType.BYTE:
3✔
281
            # Bulk write: data is already bytes (or bytearray)
NEW
282
            self._fd.write(data if isinstance(data, (bytes, bytearray)) else bytes(data))  # type: ignore[arg-type]
×
283
        else:
284
            for item in data:  # type: ignore[union-attr]
3✔
285
                self._write_field_value(et, item)
3✔
286

287
    # ------------------------------------------------------------------
288
    # TC_STRING / TC_LONGSTRING
289
    # ------------------------------------------------------------------
290

291
    def _write_string_obj(self, s: JavaString) -> None:
3✔
292
        if self._try_reference(s):
3✔
293
            return
3✔
294
        encoded = _encode_mutf8(s.value)
3✔
295
        n = len(encoded)
3✔
296
        if n <= 0xFFFF:
3✔
297
            self._fd.write(bytes([int(TerminalCode.TC_STRING)]))
3✔
298
            self._alloc_handle(s)
3✔
299
            self._fd.write(struct.pack(">H", n) + encoded)
3✔
300
        else:
NEW
301
            self._fd.write(bytes([int(TerminalCode.TC_LONGSTRING)]))
×
NEW
302
            self._alloc_handle(s)
×
NEW
303
            self._fd.write(struct.pack(">q", n) + encoded)
×
304

305
    # ------------------------------------------------------------------
306
    # TC_ENUM
307
    # ------------------------------------------------------------------
308

309
    def _write_enum(self, enum: JavaEnum) -> None:
3✔
310
        if self._try_reference(enum):
3✔
311
            return
3✔
312
        self._fd.write(bytes([int(TerminalCode.TC_ENUM)]))
3✔
313
        self._write_classdesc(enum.classdesc)
3✔
314
        self._alloc_handle(enum)
3✔
315
        self._write_string_obj(enum.constant)
3✔
316

317
    # ------------------------------------------------------------------
318
    # TC_CLASS
319
    # ------------------------------------------------------------------
320

321
    def _write_class(self, cls: JavaClass) -> None:
3✔
322
        if self._try_reference(cls):
3✔
NEW
323
            return
×
324
        self._fd.write(bytes([int(TerminalCode.TC_CLASS)]))
3✔
325
        self._write_classdesc(cls.classdesc)
3✔
326
        self._alloc_handle(cls)
3✔
327

328
    # ------------------------------------------------------------------
329
    # TC_CLASSDESC / TC_PROXYCLASSDESC
330
    # ------------------------------------------------------------------
331

332
    def _write_classdesc(self, cd: JavaClassDesc | None) -> None:
3✔
333
        if cd is None:
3✔
334
            self._write_null()
3✔
335
            return
3✔
336
        if self._try_reference(cd):
3✔
337
            return
3✔
338
        match cd.class_type:
3✔
339
            case ClassDescType.NORMALCLASS:
3✔
340
                self._write_normal_classdesc(cd)
3✔
NEW
341
            case ClassDescType.PROXYCLASS:
×
NEW
342
                self._write_proxy_classdesc(cd)
×
343

344
    def _write_normal_classdesc(self, cd: JavaClassDesc) -> None:
3✔
345
        """
346
        Serializes a normal (non-proxy) class descriptor.
347

348
        Wire layout::
349

350
            TC_CLASSDESC utf(className) long(serialVersionUID)
351
            newHandle byte(classDescFlags) short(fieldCount)
352
            [byte(typeCode) utf(fieldName) [string(className2)]] ...
353
            classAnnotation superClassDesc
354
        """
355
        self._fd.write(bytes([int(TerminalCode.TC_CLASSDESC)]))
3✔
356
        self._write_utf(cd.name)
3✔
357
        self._fd.write(struct.pack(">q", cd.serial_version_uid))
3✔
358
        self._alloc_handle(cd)
3✔
359
        self._fd.write(struct.pack(">Bh", cd.desc_flags, len(cd.fields)))
3✔
360

361
        for f in cd.fields:
3✔
362
            # type byte + field name
363
            self._fd.write(bytes([f.type.value]))
3✔
364
            self._write_utf(f.name)
3✔
365
            # Object/array fields carry a second string: the class name
366
            if f.type in (FieldType.OBJECT, FieldType.ARRAY):
3✔
367
                cn = f.class_name or ""
3✔
368
                # Reuse the same JavaString object for identical class names
369
                # so that TC_REFERENCE is written on subsequent occurrences.
370
                if cn not in self._classname_strings:
3✔
371
                    self._classname_strings[cn] = JavaString(handle=0, value=cn)
3✔
372
                self._write_string_obj(self._classname_strings[cn])
3✔
373

374
        # Class annotations written by annotateClass() (usually empty)
375
        for ann in cd.annotations:
3✔
NEW
376
            self._write_content(ann)
×
377
        self._fd.write(bytes([int(TerminalCode.TC_ENDBLOCKDATA)]))
3✔
378

379
        # Super-class descriptor (or TC_NULL)
380
        self._write_classdesc(cd.super_class)
3✔
381

382
    def _write_proxy_classdesc(self, cd: JavaClassDesc) -> None:
3✔
383
        """
384
        Serializes a dynamic proxy class descriptor.
385

386
        Wire layout::
387

388
            TC_PROXYCLASSDESC int(interfaceCount)
389
            [utf(interfaceName)] ...
390
            newHandle classAnnotation superClassDesc
391
        """
NEW
392
        self._fd.write(bytes([int(TerminalCode.TC_PROXYCLASSDESC)]))
×
NEW
393
        self._fd.write(struct.pack(">i", len(cd.interfaces)))
×
NEW
394
        for iface in cd.interfaces:
×
NEW
395
            self._write_utf(iface)
×
NEW
396
        self._alloc_handle(cd)
×
397

NEW
398
        for ann in cd.annotations:
×
NEW
399
            self._write_content(ann)
×
NEW
400
        self._fd.write(bytes([int(TerminalCode.TC_ENDBLOCKDATA)]))
×
401

NEW
402
        self._write_classdesc(cd.super_class)
×
403

404
    # ------------------------------------------------------------------
405
    # TC_BLOCKDATA / TC_BLOCKDATALONG
406
    # ------------------------------------------------------------------
407

408
    def _write_blockdata(self, bd: BlockData) -> None:
3✔
409
        n = len(bd.data)
3✔
410
        if n <= 255:
3✔
411
            self._fd.write(struct.pack(">BB", int(TerminalCode.TC_BLOCKDATA), n))
3✔
412
        else:
NEW
413
            self._fd.write(struct.pack(">Bi", int(TerminalCode.TC_BLOCKDATALONG), n))
×
414
        self._fd.write(bd.data)
3✔
415

416
    # ------------------------------------------------------------------
417
    # classdata — instance field values + annotations per hierarchy class
418
    # ------------------------------------------------------------------
419

420
    def _write_class_data(self, instance: JavaInstance) -> None:
3✔
421
        """
422
        Writes all field values and object annotations for *instance*,
423
        walking the class hierarchy from topmost ancestor to concrete class
424
        (the same order as ``ObjectOutputStream`` on the Java side).
425
        """
426
        if instance.classdesc is None:
3✔
NEW
427
            return
×
428

429
        for cd in instance.classdesc.get_hierarchy():
3✔
430
            try:
3✔
431
                data_type = cd.data_type
3✔
NEW
432
            except ValueError:
×
433
                # No SC_SERIALIZABLE / SC_EXTERNALIZABLE flags — skip.
NEW
434
                continue
×
435

436
            cd_fields = instance.field_data.get(cd, {})
3✔
437

438
            match data_type:
3✔
439
                case ClassDataType.NOWRCLASS:
3✔
440
                    # Plain serializable class: write fields only.
441
                    for f in cd.fields:
3✔
442
                        self._write_field_value(f.type, cd_fields.get(f))
3✔
443

444
                case ClassDataType.WRCLASS:
3✔
445
                    # Serializable class with writeObject():
446
                    # fields first, then the custom annotation block.
447
                    for f in cd.fields:
3✔
448
                        self._write_field_value(f.type, cd_fields.get(f))
3✔
449
                    for ann in instance.annotations.get(cd, []):
3✔
450
                        self._write_content(ann)
3✔
451
                    self._fd.write(bytes([int(TerminalCode.TC_ENDBLOCKDATA)]))
3✔
452

NEW
453
                case ClassDataType.OBJECT_ANNOTATION:
×
454
                    # Externalizable + SC_BLOCK_DATA:
455
                    # all data lives in the annotation block.
NEW
456
                    for ann in instance.annotations.get(cd, []):
×
NEW
457
                        self._write_content(ann)
×
NEW
458
                    self._fd.write(bytes([int(TerminalCode.TC_ENDBLOCKDATA)]))
×
459

NEW
460
                case ClassDataType.EXTERNAL_CONTENTS:
×
NEW
461
                    raise UnsupportedFeatureError(
×
462
                        f"SC_EXTERNALIZABLE without SC_BLOCK_DATA "
463
                        f"(Protocol v1) is not supported for class {cd.name!r}"
464
                    )
465

466
    # ------------------------------------------------------------------
467
    # Field value writer
468
    # ------------------------------------------------------------------
469

470
    def _write_field_value(self, field_type: FieldType, value: Any) -> None:
3✔
471
        """Writes a single field value according to *field_type*."""
472
        match field_type:
3✔
473
            case FieldType.BYTE:
3✔
NEW
474
                self._fd.write(struct.pack(">b", int(value) if value is not None else 0))
×
475
            case FieldType.CHAR:
3✔
476
                cp = ord(value) if isinstance(value, str) else int(value)
3✔
477
                self._fd.write(struct.pack(">H", cp & 0xFFFF))
3✔
478
            case FieldType.SHORT:
3✔
NEW
479
                self._fd.write(struct.pack(">h", int(value) if value is not None else 0))
×
480
            case FieldType.INTEGER:
3✔
481
                self._fd.write(struct.pack(">i", int(value) if value is not None else 0))
3✔
482
            case FieldType.LONG:
3✔
NEW
483
                self._fd.write(struct.pack(">q", int(value) if value is not None else 0))
×
484
            case FieldType.FLOAT:
3✔
485
                self._fd.write(struct.pack(">f", float(value) if value is not None else 0.0))
3✔
486
            case FieldType.DOUBLE:
3✔
NEW
487
                self._fd.write(struct.pack(">d", float(value) if value is not None else 0.0))
×
488
            case FieldType.BOOLEAN:
3✔
489
                self._fd.write(bytes([1 if value else 0]))
3✔
490
            case FieldType.OBJECT | FieldType.ARRAY:
3✔
491
                self._write_content(value)
3✔
492

493
    # ------------------------------------------------------------------
494
    # Short-length UTF helper
495
    # ------------------------------------------------------------------
496

497
    def _write_utf(self, s: str) -> None:
3✔
498
        """
499
        Writes a "short" UTF entry: 2-byte unsigned length + Modified UTF-8
500
        bytes.
501

502
        Used for class names, field names, and interface names *inside* class
503
        descriptor records.  These strings do **not** receive handles and are
504
        **not** written as ``TC_STRING`` objects.
505

506
        :raises ValueError: If the encoded byte length exceeds 65535.
507
        """
508
        encoded = _encode_mutf8(s)
3✔
509
        n = len(encoded)
3✔
510
        if n > 0xFFFF:
3✔
NEW
511
            raise ValueError(f"String too long for short-length UTF field: {n} bytes (max 65535)")
×
512
        self._fd.write(struct.pack(">H", n) + encoded)
3✔
513

514

515
# ------------------------------------------------------------------------------
516
# Module-level convenience functions
517
# ------------------------------------------------------------------------------
518

519

520
def dump(fd: IO[bytes], *objects: ParsedContent) -> None:
3✔
521
    """
522
    Serializes one or more v3 bean objects to a binary file-like object.
523

524
    :param fd: A writable binary stream (opened in ``"wb"`` mode).
525
    :param objects: Top-level objects to serialize.  Pass several to create a
526
                    multi-object stream (each requiring a separate
527
                    ``readObject()`` call on the Java side).
528
    :raises UnsupportedFeatureError: If an object type cannot be serialized.
529
    """
530
    writer = JavaStreamWriter(fd)
3✔
531
    writer.write_stream(*objects)
3✔
532

533

534
def dumps(*objects: ParsedContent) -> bytes:
3✔
535
    """
536
    Serializes one or more v3 bean objects to a :class:`bytes` object.
537

538
    :param objects: Top-level objects to serialize (see :func:`dump`).
539
    :return: A complete Java Object Serialization stream as :class:`bytes`.
540
    :raises UnsupportedFeatureError: If an object type cannot be serialized.
541
    """
542
    buf = BytesIO()
3✔
543
    dump(buf, *objects)
3✔
544
    return buf.getvalue()
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc