diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..a0c19b2
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,5 @@
+[run]
+source = javaobj/
+
+[report]
+include = javaobj/*
diff --git a/.coveralls.yml b/.coveralls.yml
deleted file mode 100644
index 9160059..0000000
--- a/.coveralls.yml
+++ /dev/null
@@ -1 +0,0 @@
-service_name: travis-ci
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..b1493ad
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,17 @@
+root=true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+indent_style = space
+trim_trailing_whitespace = true
+
+[*.py]
+indent_size = 4
+
+[*.rst]
+indent_size = 3
+
+[*.{yml,yaml,toml}]
+indent_size = 2
diff --git a/.github/workflows/build-20.04.yml b/.github/workflows/build-20.04.yml
new file mode 100644
index 0000000..2d8a2bc
--- /dev/null
+++ b/.github/workflows/build-20.04.yml
@@ -0,0 +1,49 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: CI Build - Python 3.5-3.7
+
+on:
+ push:
+ branches: [ "master" ]
+ tags: '**'
+ pull_request:
+ branches: [ "master" ]
+
+jobs:
+ build:
+ timeout-minutes: 10
+ runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.5", "3.6", "3.7"]
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ env:
+ PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install flake8 pytest coverage
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+ - name: Lint with flake8
+ run: |
+ # stop the build if there are Python syntax errors or undefined names
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+ - name: Test
+ run: |
+ coverage run -m pytest
+ - name: Coveralls
+ env:
+ COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
+ run: |
+ pip install coveralls
+ coveralls
diff --git a/.github/workflows/build-24.04.yml b/.github/workflows/build-24.04.yml
new file mode 100644
index 0000000..4c25cf3
--- /dev/null
+++ b/.github/workflows/build-24.04.yml
@@ -0,0 +1,47 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: CI Build - Python 3.8+
+
+on:
+ push:
+ branches: [ "master" ]
+ tags: '**'
+ pull_request:
+ branches: [ "master" ]
+
+jobs:
+ build:
+ timeout-minutes: 10
+ runs-on: ubuntu-24.04
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14-dev"]
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install flake8 pytest coverage
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+ - name: Lint with flake8
+ run: |
+ # stop the build if there are Python syntax errors or undefined names
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+ - name: Test
+ run: |
+ coverage run -m pytest
+ - name: Coveralls
+ env:
+ COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
+ run: |
+ pip install coveralls
+ coveralls
diff --git a/.gitignore b/.gitignore
index 02b65dc..9711698 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
*.so
# Packages
+.eggs/
*.egg
*.egg-info
dist
@@ -35,3 +36,13 @@ nosetests.xml
.project
.pydevproject
.idea/
+.vscode/
+.*cache/
+
+# Log files
+*.log
+
+# Folders and scripts used to reproduce issues
+/issue*/
+/repro*.py
+/test*.py
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index e9564e2..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-language: python
-python: 3.5
-
-env:
- - TOX_ENV=py27
- - TOX_ENV=py33
- - TOX_ENV=py34
- - TOX_ENV=py35
- - TOX_ENV=pypy
- - TOX_ENV=pypy3
-
-install:
- - pip install tox
- - pip install coverage
- - pip install coveralls
- - pip install pytest>=2.7.3 --upgrade
-
-script:
- - tox -e $TOX_ENV
-
-after_success:
- - coverage combine
- - coveralls
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..bbc1a99
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,15 @@
+The python-javaobj package was originally developed by
+* Volodymyr Buell (@vbuell)
+
+This fork, javaobj-py3, is maintained by
+* Thomas Calmant (@tcalmant)
+
+Many thanks to the contributors:
+* Patrick J. McNerthney (@iciclespider)
+* @voetsjoeba
+* Vadim Markovtsev (@vmarkovtsev)
+* Jason Spencer, Google LLC (@j8spencer)
+* @guywithface
+* Chris van Marle (@qistoph)
+* Federico Alves (@UruDev)
+* @sarimak
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4385a0b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,482 @@
+# javaobj-py3
+
+[](https://pypi.python.org/pypi/javaobj-py3/)
+[](https://pypi.python.org/pypi/javaobj-py3/)
+[](https://github.com/tcalmant/python-javaobj/actions/workflows/build.yml)
+[](https://coveralls.io/r/tcalmant/python-javaobj?branch=master)
+
+*python-javaobj* is a python library that provides functions for reading and
+writing (writing is WIP currently) Java objects serialized or will be
+deserialized by `ObjectOutputStream`. This form of object representation is a
+standard data interchange format in Java world.
+
+The `javaobj` module exposes an API familiar to users of the standard library
+`marshal`, `pickle` and `json` modules.
+
+## About this repository
+
+This project is a fork of *python-javaobj* by Volodymyr Buell, originally from
+[Google Code](http://code.google.com/p/python-javaobj/) and now hosted on
+[GitHub](https://github.com/vbuell/python-javaobj).
+
+This fork intends to work both on Python 2.7 and Python 3.4+.
+
+## Compatibility Warnings
+
+### New implementation of the parser
+
+| Implementations | Version |
+|-----------------|----------|
+| `v1`, `v2` | `0.4.0+` |
+
+Since version 0.4.0, two implementations of the parser are available:
+
+* `v1`: the *classic* implementation of `javaobj`, with a work in progress
+ implementation of a writer.
+* `v2`: the *new* implementation, which is a port of the Java project
+ [`jdeserialize`](https://github.com/frohoff/jdeserialize/),
+ with support of the object transformer (with a new API) and of the `numpy`
+ arrays loading.
+
+You can use the `v1` parser to ensure that the behaviour of your scripts
+doesn't change and to keep the ability to write down files.
+
+You can use the `v2` parser for new developments
+*which won't require marshalling* and as a *fallback* if the `v1`
+fails to parse a file.
+
+### Object transformers V1
+
+| Implementations | Version |
+|-----------------|----------|
+| `v1` | `0.2.0+` |
+
+As of version 0.2.0, the notion of *object transformer* from the original
+project as been replaced by an *object creator*.
+
+The *object creator* is called before the deserialization.
+This allows to store the reference of the converted object before deserializing
+it, and avoids a mismatch between the referenced object and the transformed one.
+
+### Object transformers V2
+
+| Implementations | Version |
+|-----------------|----------|
+| `v2` | `0.4.0+` |
+
+The `v2` implementation provides a new API for the object transformers.
+Please look at the *Usage (V2)* section in this file.
+
+### Bytes arrays
+
+| Implementations | Version |
+|-----------------|----------|
+| `v1` | `0.2.3+` |
+
+As of version 0.2.3, bytes arrays are loaded as a `bytes` object instead of
+an array of integers.
+
+### Custom Transformer
+
+| Implementations | Version |
+|-----------------|----------|
+| `v2` | `0.4.2+` |
+
+A new transformer API has been proposed to handle objects written with a custom
+Java writer.
+You can find a sample usage in the *Custom Transformer* section in this file.
+
+## Features
+
+* Java object instance un-marshalling
+* Java classes un-marshalling
+* Primitive values un-marshalling
+* Automatic conversion of Java Collections to python ones
+ (`HashMap` => `dict`, `ArrayList` => `list`, etc.)
+* Basic marshalling of simple Java objects (`v1` implementation only)
+* Automatically uncompresses GZipped files
+
+## Requirements
+
+* Python >= 2.7 or Python >= 3.4
+* `enum34` and `typing` when using Python <= 3.4 (installable with `pip`)
+* Maven 2+ (for building test data of serialized objects.
+ You can skip it if you do not plan to run `tests.py`)
+
+## Usage (V1 implementation)
+
+Un-marshalling of Java serialised object:
+
+```python
+import javaobj
+
+with open("obj5.ser", "rb") as fd:
+ jobj = fd.read()
+
+pobj = javaobj.loads(jobj)
+print(pobj)
+```
+
+Or, you can use `JavaObjectUnmarshaller` object directly:
+
+```python
+import javaobj
+
+with open("objCollections.ser", "rb") as fd:
+ marshaller = javaobj.JavaObjectUnmarshaller(fd)
+ pobj = marshaller.readObject()
+
+ print(pobj.value, "should be", 17)
+ print(pobj.next, "should be", True)
+
+ pobj = marshaller.readObject()
+```
+
+**Note:** The objects and methods provided by `javaobj` module are shortcuts
+to the `javaobj.v1` package, for Compatibility purpose.
+It is **recommended** to explicitly import methods and classes from the `v1`
+(or `v2`) package when writing new code, in order to be sure that your code
+won't need import updates in the future.
+
+
+## Usage (V2 implementation)
+
+The following methods are provided by the `javaobj.v2` package:
+
+* `load(fd, *transformers, use_numpy_arrays=False)`:
+ Parses the content of the given file descriptor, opened in binary mode (`rb`).
+ The method accepts a list of custom object transformers. The default object
+ transformer is always added to the list.
+
+ The `use_numpy_arrays` flag indicates that the arrays of primitive type
+ elements must be loaded using `numpy` (if available) instead of using the
+ standard parsing technic.
+
+* `loads(bytes, *transformers, use_numpy_arrays=False)`:
+ This the a shortcut to the `load()` method, providing it the binary data
+ using a `BytesIO` object.
+
+**Note:** The V2 parser doesn't have the marshalling capability.
+
+Sample usage:
+
+```python
+import javaobj.v2 as javaobj
+
+with open("obj5.ser", "rb") as fd:
+ pobj = javaobj.load(fd)
+
+print(pobj.dump())
+```
+
+### Object Transformer
+
+An object transformer can be called during the parsing of a Java object
+instance or while loading an array.
+
+The Java object instance parsing works in two main steps:
+
+1. The transformer is called to create an instance of a bean that inherits
+ `JavaInstance`.
+1. The latter bean is then called:
+
+ * When the object is written with a custom block data
+ * After the fields and annotations have been parsed, to update the content
+ of the Python bean.
+
+Here is an example for a Java `HashMap` object. You can look at the code of
+the `javaobj.v2.transformer` module to see the whole implementation.
+
+```python
+class JavaMap(dict, javaobj.v2.beans.JavaInstance):
+ """
+ Inherits from dict for Python usage, JavaInstance for parsing purpose
+ """
+ def __init__(self):
+ # Don't forget to call both constructors
+ dict.__init__(self)
+ JavaInstance.__init__(self)
+
+ def load_from_blockdata(self, parser, reader, indent=0):
+ """
+ Reads content stored in a block data.
+
+ This method is called only if the class description has both the
+ `SC_EXTERNALIZABLE` and `SC_BLOCK_DATA` flags set.
+
+ The stream parsing will stop and fail if this method returns False.
+
+ :param parser: The JavaStreamParser in use
+ :param reader: The underlying data stream reader
+ :param indent: Indentation to use in logs
+ :return: True on success, False on error
+ """
+ # This kind of class is not supposed to have the SC_BLOCK_DATA flag set
+ return False
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from the parsed instance object.
+
+ This method is called after the block data (if any), the fields and
+ the annotations have been loaded.
+
+ :param indent: Indentation to use while logging
+ :return: True on success (currently ignored)
+ """
+ # Maps have their content in their annotations
+ for cd, annotations in self.annotations.items():
+ # Annotations are associated to their definition class
+ if cd.name == "java.util.HashMap":
+ # We are in the annotation created by the handled class
+ # Group annotation elements 2 by 2
+ # (storage is: key, value, key, value, ...)
+ args = [iter(annotations[1:])] * 2
+ for key, value in zip(*args):
+ self[key] = value
+
+ # Job done
+ return True
+
+ # Couldn't load the data
+ return False
+
+class MapObjectTransformer(javaobj.v2.api.ObjectTransformer):
+ """
+ Creates a JavaInstance object with custom loading methods for the
+ classes it can handle
+ """
+ def create_instance(self, classdesc):
+ # type: (JavaClassDesc) -> Optional[JavaInstance]
+ """
+ Transforms a parsed Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ if classdesc.name == "java.util.HashMap":
+ # We can handle this class description
+ return JavaMap()
+ else:
+ # Return None if the class is not handled
+ return None
+```
+
+### Custom Object Transformer
+
+The custom transformer is called when the class is not handled by the default
+object transformer.
+A custom object transformer still inherits from the `ObjectTransformer` class,
+but it also implements the `load_custom_writeObject` method.
+
+The sample given here is used in the unit tests.
+
+#### Java sample
+
+On the Java side, we create various classes and write them as we wish:
+
+```java
+class CustomClass implements Serializable {
+
+ private static final long serialVersionUID = 1;
+
+ public void start(ObjectOutputStream out) throws Exception {
+ this.writeObject(out);
+ }
+
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ CustomWriter custom = new CustomWriter(42);
+ out.writeObject(custom);
+ out.flush();
+ }
+}
+
+class RandomChild extends Random {
+
+ private static final long serialVersionUID = 1;
+ private int num = 1;
+ private double doub = 4.5;
+
+ RandomChild(int seed) {
+ super(seed);
+ }
+}
+
+class CustomWriter implements Serializable {
+ protected RandomChild custom_obj;
+
+ CustomWriter(int seed) {
+ custom_obj = new RandomChild(seed);
+ }
+
+ private static final long serialVersionUID = 1;
+ private static final int CURRENT_SERIAL_VERSION = 0;
+
+ private void writeObject(ObjectOutputStream out) throws IOException {
+ out.writeInt(CURRENT_SERIAL_VERSION);
+ out.writeObject(custom_obj);
+ }
+}
+```
+
+An here is a sample writing of that kind of object:
+
+```java
+ObjectOutputStream oos = new ObjectOutputStream(
+ new FileOutputStream("custom_objects.ser"));
+CustomClass writer = new CustomClass();
+writer.start(oos);
+oos.flush();
+oos.close();
+```
+
+#### Python sample
+
+On the Python side, the first step is to define the custom transformers.
+They are children of the `javaobj.v2.transformers.ObjectTransformer` class.
+
+```python
+class BaseTransformer(javaobj.v2.transformers.ObjectTransformer):
+ """
+ Creates a JavaInstance object with custom loading methods for the
+ classes it can handle
+ """
+
+ def __init__(self, handled_classes=None):
+ self.instance = None
+ self.handled_classes = handled_classes or {}
+
+ def create_instance(self, classdesc):
+ """
+ Transforms a parsed Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ if classdesc.name in self.handled_classes:
+ self.instance = self.handled_classes[classdesc.name]()
+ return self.instance
+
+ return None
+
+class RandomChildTransformer(BaseTransformer):
+ def __init__(self):
+ super(RandomChildTransformer, self).__init__(
+ {"RandomChild": RandomChildInstance}
+ )
+
+class CustomWriterTransformer(BaseTransformer):
+ def __init__(self):
+ super(CustomWriterTransformer, self).__init__(
+ {"CustomWriter": CustomWriterInstance}
+ )
+
+class JavaRandomTransformer(BaseTransformer):
+ def __init__(self):
+ super(JavaRandomTransformer, self).__init__()
+ self.name = "java.util.Random"
+ self.field_names = ["haveNextNextGaussian", "nextNextGaussian", "seed"]
+ self.field_types = [
+ javaobj.v2.beans.FieldType.BOOLEAN,
+ javaobj.v2.beans.FieldType.DOUBLE,
+ javaobj.v2.beans.FieldType.LONG,
+ ]
+
+ def load_custom_writeObject(self, parser, reader, name):
+ if name != self.name:
+ return None
+
+ fields = []
+ values = []
+ for f_name, f_type in zip(self.field_names, self.field_types):
+ values.append(parser._read_field_value(f_type))
+ fields.append(javaobj.beans.JavaField(f_type, f_name))
+
+ class_desc = javaobj.beans.JavaClassDesc(
+ javaobj.beans.ClassDescType.NORMALCLASS
+ )
+ class_desc.name = self.name
+ class_desc.desc_flags = javaobj.beans.ClassDataType.EXTERNAL_CONTENTS
+ class_desc.fields = fields
+ class_desc.field_data = values
+ return class_desc
+```
+
+Second step is defining the representation of the instances, where the real
+object loading occurs. Those classes inherit from
+`javaobj.v2.beans.JavaInstance`.
+
+```python
+class CustomWriterInstance(javaobj.v2.beans.JavaInstance):
+ def __init__(self):
+ javaobj.v2.beans.JavaInstance.__init__(self)
+
+ def load_from_instance(self):
+ """
+ Updates the content of this instance
+ from its parsed fields and annotations
+ :return: True on success, False on error
+ """
+ if self.classdesc and self.classdesc in self.annotations:
+ # Here, we known there is something written before the fields,
+ # even if it's not declared in the class description
+ fields = ["int_not_in_fields"] + self.classdesc.fields_names
+ raw_data = self.annotations[self.classdesc]
+ int_not_in_fields = struct.unpack(
+ ">i", BytesIO(raw_data[0].data).read(4)
+ )[0]
+ custom_obj = raw_data[1]
+ values = [int_not_in_fields, custom_obj]
+ self.field_data = dict(zip(fields, values))
+ return True
+
+ return False
+
+
+class RandomChildInstance(javaobj.v2.beans.JavaInstance):
+ def load_from_instance(self):
+ """
+ Updates the content of this instance
+ from its parsed fields and annotations
+ :return: True on success, False on error
+ """
+ if self.classdesc and self.classdesc in self.field_data:
+ fields = self.classdesc.fields_names
+ values = [
+ self.field_data[self.classdesc][self.classdesc.fields[i]]
+ for i in range(len(fields))
+ ]
+ self.field_data = dict(zip(fields, values))
+ if (
+ self.classdesc.super_class
+ and self.classdesc.super_class in self.annotations
+ ):
+ super_class = self.annotations[self.classdesc.super_class][0]
+ self.annotations = dict(
+ zip(super_class.fields_names, super_class.field_data)
+ )
+ return True
+
+ return False
+```
+
+Finally we can use the transformers in the loading process.
+Note that even if it is not explicitly given, the `DefaultObjectTransformer`
+will be also be used, as it is added automatically by `javaobj` if it is
+missing from the given list.
+
+```python
+# Load the object using those transformers
+transformers = [
+ CustomWriterTransformer(),
+ RandomChildTransformer(),
+ JavaRandomTransformer()
+]
+pobj = javaobj.loads("custom_objects.ser", *transformers)
+
+# Here we show a field that isn't visible from the class description
+# The field belongs to the class but it's not serialized by default because
+# it's static. See: https://stackoverflow.com/a/16477421/12621168
+print(pobj.field_data["int_not_in_fields"])
+```
diff --git a/README.rst b/README.rst
deleted file mode 100644
index dd35cf4..0000000
--- a/README.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-javaobj-py3
-###########
-
-.. image:: https://img.shields.io/pypi/v/javaobj-py3.svg
- :target: https://pypi.python.org/pypi/javaobj-py3/
- :alt: Latest Version
-
-.. image:: https://img.shields.io/pypi/l/javaobj-py3.svg
- :target: https://pypi.python.org/pypi/javaobj-py3/
- :alt: License
-
-.. image:: https://travis-ci.org/tcalmant/python-javaobj.svg?branch=master
- :target: https://travis-ci.org/tcalmant/python-javaobj
- :alt: Travis-CI status
-
-.. image:: https://coveralls.io/repos/tcalmant/python-javaobj/badge.svg?branch=master
- :target: https://coveralls.io/r/tcalmant/python-javaobj?branch=master
- :alt: Coveralls status
-
-python-javaobj is a python library that provides functions for reading and
-writing (writing is WIP currently) Java objects serialized or will be
-deserialized by _ObjectOutputStream_. This form of object representation is a
-standard data interchange format in Java world.
-
-javaobj module exposes an API familiar to users of the standard library
-marshal, pickle and json modules.
-
-About this repository
-=====================
-
-This project is a fork of python-javaobj by Volodymyr Buell, originally from
-`Google Code `_ and now hosted on
-`GitHub `_.
-
-This fork intends to work both on Python 2.7 and Python 3.2+.
-
-Features
-========
-
-* Java object instance unmarshaling
-* Java classes unmarshaling
-* Primitive values unmarshaling
-* Automatic conversion of Java Collections to python ones
- (_HashMap_ => dict, _ArrayList_ => list, etc)
-
-Requirements
-============
-
-* Python >= 2.7 or Python >= 3.2
-* Maven 2+ (for building test data of serialized objects.
- You can skip it if you do not plan to run tests.py)
-
-Usage
-=====
-
-Unmarshalling of Java serialised object:
-
-.. code-block:: python
-
- import javaobj
-
- jobj = self.read_file("obj5.ser")
- pobj = javaobj.loads(jobj)
- print(pobj)
-
-Or, you can use Unmarshaller object directly:
-
-.. code-block:: python
-
- import javaobj
-
- marshaller = javaobj.JavaObjectUnmarshaller(open("objCollections.ser"))
- pobj = marshaller.readObject()
-
- self.assertEqual(pobj.value, 17)
- self.assertTrue(pobj.next)
-
- pobj = marshaller.readObject()
diff --git a/javaobj.py b/javaobj.py
deleted file mode 100644
index ef4a3d8..0000000
--- a/javaobj.py
+++ /dev/null
@@ -1,1440 +0,0 @@
-#!/usr/bin/python
-# -- Content-Encoding: UTF-8 --
-"""
-Provides functions for reading and writing (writing is WIP currently) Java
-objects serialized or will be deserialized by ObjectOutputStream. This form of
-object representation is a standard data interchange format in Java world.
-
-javaobj module exposes an API familiar to users of the standard library marshal,
-pickle and json modules.
-
-See:
-http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
-
-:authors: Volodymyr Buell, Thomas Calmant
-:license: Apache License 2.0
-:version: 0.1.4
-:status: Alpha
-
-..
-
- Copyright 2016 Thomas Calmant
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-# Standard library
-import logging
-import os
-import struct
-import sys
-
-try:
- # Python 2
- from StringIO import StringIO as BytesIO
-except ImportError:
- # Python 3+
- from io import BytesIO
-
-# ------------------------------------------------------------------------------
-
-# Module version
-__version_info__ = (0, 1, 4)
-__version__ = ".".join(str(x) for x in __version_info__)
-
-# Documentation strings format
-__docformat__ = "restructuredtext en"
-
-# ------------------------------------------------------------------------------
-
-# Setup the logger
-_log = logging.getLogger(__name__)
-
-
-def log_debug(message, ident=0):
- """
- Logs a message at debug level
-
- :param message: Message to log
- :param ident: Number of indentation spaces
- """
- _log.debug(" " * (ident * 2) + str(message))
-
-
-def log_error(message, ident=0):
- """
- Logs a message at error level
-
- :param message: Message to log
- :param ident: Number of indentation spaces
- """
- _log.error(" " * (ident * 2) + str(message))
-
-# ------------------------------------------------------------------------------
-
-if sys.version_info[0] >= 3:
- # Python 3 interpreter : bytes & str
- def to_bytes(data, encoding="UTF-8"):
- """
- Converts the given string to an array of bytes.
- Returns the first parameter if it is already an array of bytes.
-
- :param data: A unicode string
- :param encoding: The encoding of data
- :return: The corresponding array of bytes
- """
- if type(data) is bytes:
- # Nothing to do
- return data
- return data.encode(encoding)
-
- def to_str(data, encoding="UTF-8"):
- """
- Converts the given parameter to a string.
- Returns the first parameter if it is already an instance of ``str``.
-
- :param data: A string
- :param encoding: The encoding of data
- :return: The corresponding string
- """
- if type(data) is str:
- # Nothing to do
- return data
- return str(data, encoding)
-
- def read_to_str(data):
- """
- Concats all bytes into a string
- """
- return ''.join(chr(char) for char in data)
-
-else:
- # Python 2 interpreter : str & unicode
- def to_str(data, encoding="UTF-8"):
- """
- Converts the given parameter to a string.
- Returns the first parameter if it is already an instance of ``str``.
-
- :param data: A string
- :param encoding: The encoding of data
- :return: The corresponding string
- """
- if type(data) is str:
- # Nothing to do
- return data
- return data.encode(encoding)
-
- # Same operation
- to_bytes = to_str
-
- def read_to_str(data):
- """
- Nothing to do in Python 2
- """
- return data
-
-# ------------------------------------------------------------------------------
-
-
-def load(file_object, *transformers, **kwargs):
- """
- Deserializes Java primitive data and objects serialized using
- ObjectOutputStream from a file-like object.
-
- :param file_object: A file-like object
- :param transformers: Custom transformers to use
- :param ignore_remaining_data: If True, don't log an error when unused
- trailing bytes are remaining
- :return: The deserialized object
- """
- # Read keyword argument
- ignore_remaining_data = kwargs.get('ignore_remaining_data', False)
-
- marshaller = JavaObjectUnmarshaller(file_object)
-
- # Add custom transformers first
- for transformer in transformers:
- marshaller.add_transformer(transformer)
- marshaller.add_transformer(DefaultObjectTransformer())
-
- # Read the file object
- return marshaller.readObject(ignore_remaining_data=ignore_remaining_data)
-
-
-def loads(string, *transformers, **kwargs):
- """
- Deserializes Java objects and primitive data serialized using
- ObjectOutputStream from a string.
-
- :param string: A Java data string
- :param transformers: Custom transformers to use
- :param ignore_remaining_data: If True, don't log an error when unused
- trailing bytes are remaining
- :return: The deserialized object
- """
- # Read keyword argument
- ignore_remaining_data = kwargs.get('ignore_remaining_data', False)
-
- # Reuse the load method (avoid code duplication)
- return load(BytesIO(string), *transformers,
- ignore_remaining_data=ignore_remaining_data)
-
-
-def dumps(obj, *transformers):
- """
- Serializes Java primitive data and objects unmarshaled by load(s) before
- into string.
-
- :param obj: A Python primitive object, or one loaded using load(s)
- :param transformers: Custom transformers to use
- :return: The serialized data as a string
- """
- marshaller = JavaObjectMarshaller()
- # Add custom transformers
- for transformer in transformers:
- marshaller.add_transformer(transformer)
-
- return marshaller.dump(obj)
-
-# ------------------------------------------------------------------------------
-
-
-class JavaClass(object):
- """
- Represents a class in the Java world
- """
- def __init__(self):
- """
- Sets up members
- """
- self.name = None
- self.serialVersionUID = None
- self.flags = None
- self.fields_names = []
- self.fields_types = []
- self.superclass = None
-
- def __str__(self):
- """
- String representation of the Java class
- """
- return self.__repr__()
-
- def __repr__(self):
- """
- String representation of the Java class
- """
- return "[{0:s}:0x{1:X}]".format(self.name, self.serialVersionUID)
-
- def __eq__(self, other):
- """
- Equality test between two Java classes
-
- :param other: Other JavaClass to test
- :return: True if both classes share the same fields and name
- """
- if not isinstance(other, type(self)):
- return False
-
- return (self.name == other.name and
- self.serialVersionUID == other.serialVersionUID and
- self.flags == other.flags and
- self.fields_names == other.fields_names and
- self.fields_types == other.fields_types and
- self.superclass == other.superclass)
-
-
-class JavaObject(object):
- """
- Represents a deserialized non-primitive Java object
- """
- def __init__(self):
- """
- Sets up members
- """
- self.classdesc = None
- self.annotations = []
-
- def get_class(self):
- """
- Returns the JavaClass that defines the type of this object
- """
- return self.classdesc
-
- def __str__(self):
- """
- String representation
- """
- return self.__repr__()
-
- def __repr__(self):
- """
- String representation
- """
- name = "UNKNOWN"
- if self.classdesc:
- name = self.classdesc.name
- return "".format(name)
-
- def __eq__(self, other):
- """
- Equality test between two Java classes
-
- :param other: Other JavaClass to test
- :return: True if both classes share the same fields and name
- """
- if not isinstance(other, type(self)):
- return False
-
- res = (self.classdesc == other.classdesc and
- self.annotations == other.annotations)
- if not res:
- return False
-
- for name in self.classdesc.fields_names:
- if not (getattr(self, name) == getattr(other, name)):
- return False
- return True
-
-
-class JavaString(str):
- """
- Represents a Java String
- """
- def __hash__(self):
- return str.__hash__(self)
-
- def __eq__(self, other):
- if not isinstance(other, str):
- return False
- return str.__eq__(self, other)
-
-
-class JavaEnum(JavaObject):
- """
- Represents a Java enumeration
- """
- def __init__(self, constant=None):
- super(JavaEnum, self).__init__()
- self.constant = constant
-
-
-class JavaArray(list, JavaObject):
- """
- Represents a Java Array
- """
- def __init__(self, classdesc=None):
- list.__init__(self)
- JavaObject.__init__(self)
- self.classdesc = classdesc
-
-# ------------------------------------------------------------------------------
-
-
-class JavaObjectConstants(object):
- """
- Defines the constants of the Java serialization format
- """
- STREAM_MAGIC = 0xaced
- STREAM_VERSION = 0x05
-
- TC_NULL = 0x70
- TC_REFERENCE = 0x71
- TC_CLASSDESC = 0x72
- TC_OBJECT = 0x73
- TC_STRING = 0x74
- TC_ARRAY = 0x75
- TC_CLASS = 0x76
- TC_BLOCKDATA = 0x77
- TC_ENDBLOCKDATA = 0x78
- TC_RESET = 0x79
- TC_BLOCKDATALONG = 0x7A
- TC_EXCEPTION = 0x7B
- TC_LONGSTRING = 0x7C
- TC_PROXYCLASSDESC = 0x7D
- TC_ENUM = 0x7E
- TC_MAX = 0x7E
-
- # classDescFlags
- SC_WRITE_METHOD = 0x01 # if SC_SERIALIZABLE
- SC_BLOCK_DATA = 0x08 # if SC_EXTERNALIZABLE
- SC_SERIALIZABLE = 0x02
- SC_EXTERNALIZABLE = 0x04
- SC_ENUM = 0x10
-
- # type definition chars (typecode)
- TYPE_BYTE = 'B' # 0x42
- TYPE_CHAR = 'C' # 0x43
- TYPE_DOUBLE = 'D' # 0x44
- TYPE_FLOAT = 'F' # 0x46
- TYPE_INTEGER = 'I' # 0x49
- TYPE_LONG = 'J' # 0x4A
- TYPE_SHORT = 'S' # 0x53
- TYPE_BOOLEAN = 'Z' # 0x5A
- TYPE_OBJECT = 'L' # 0x4C
- TYPE_ARRAY = '[' # 0x5B
-
- # list of supported typecodes listed above
- TYPECODES_LIST = [
- # primitive types
- TYPE_BYTE,
- TYPE_CHAR,
- TYPE_DOUBLE,
- TYPE_FLOAT,
- TYPE_INTEGER,
- TYPE_LONG,
- TYPE_SHORT,
- TYPE_BOOLEAN,
- # object types
- TYPE_OBJECT,
- TYPE_ARRAY]
-
- BASE_REFERENCE_IDX = 0x7E0000
-
-
-class OpCodeDebug(object):
- # Type codes
- OP_CODE = {getattr(JavaObjectConstants, key): key
- for key in dir(JavaObjectConstants) if key.startswith("TC_")}
-
- TYPE = {getattr(JavaObjectConstants, key): key
- for key in dir(JavaObjectConstants) if key.startswith("TYPE_")}
-
- STREAM_CONSTANT = {getattr(JavaObjectConstants, key): key
- for key in dir(JavaObjectConstants)
- if key.startswith("SC_")}
-
- @staticmethod
- def op_id(op_id):
- return OpCodeDebug.OP_CODE.get(
- op_id, "".format(op_id))
-
- @staticmethod
- def type_code(type_id):
- return OpCodeDebug.TYPE.get(
- type_id, "".format(type_id))
-
- @staticmethod
- def flags(flags):
- names = sorted(
- descr for key, descr in OpCodeDebug.STREAM_CONSTANT.items()
- if key & flags)
- return ', '.join(names)
-
-
-# ------------------------------------------------------------------------------
-
-
-class JavaObjectUnmarshaller(JavaObjectConstants):
- """
- Deserializes a Java serialization stream
- """
- def __init__(self, stream):
- """
- Sets up members
-
- :param stream: An input stream (opened in binary/bytes mode)
- :raise IOError: Invalid input stream
- """
- # Check stream
- if stream is None:
- raise IOError("No input stream given")
-
- # Prepare the association Terminal Symbol -> Reading method
- self.opmap = {
- self.TC_NULL: self.do_null,
- self.TC_CLASSDESC: self.do_classdesc,
- self.TC_OBJECT: self.do_object,
- self.TC_STRING: self.do_string,
- self.TC_LONGSTRING: self.do_string_long,
- self.TC_ARRAY: self.do_array,
- self.TC_CLASS: self.do_class,
- self.TC_BLOCKDATA: self.do_blockdata,
- self.TC_BLOCKDATALONG: self.do_blockdata_long,
- self.TC_REFERENCE: self.do_reference,
- self.TC_ENUM: self.do_enum,
- # note that we are reusing do_null:
- self.TC_ENDBLOCKDATA: self.do_null,
- }
-
- # Set up members
- self.current_object = None
- self.reference_counter = 0
- self.references = []
- self.object_transformers = []
- self.object_stream = stream
-
- # Read the stream header (magic & version)
- self._readStreamHeader()
-
- def readObject(self, ignore_remaining_data=False):
- """
- Reads an object from the input stream
-
- :param ignore_remaining_data: If True, don't log an error when
- unused trailing bytes are remaining
- :return: The unmarshalled object
- :raise Exception: Any exception that occurred during unmarshalling
- """
- try:
- # TODO: add expects
- _, res = self._read_and_exec_opcode(ident=0)
-
- position_bak = self.object_stream.tell()
- the_rest = self.object_stream.read()
- if not ignore_remaining_data and len(the_rest):
- log_error("Warning!!!!: Stream still has {0} bytes left. "
- "Enable debug mode of logging to see the hexdump."
- .format(len(the_rest)))
- log_debug(self._create_hexdump(the_rest))
- else:
- log_debug("Java Object unmarshalled successfully!")
-
- self.object_stream.seek(position_bak)
- return res
- except Exception:
- self._oops_dump_state(ignore_remaining_data)
- raise
-
- def add_transformer(self, transformer):
- """
- Appends an object transformer to the deserialization process
-
- :param transformer: An object with a transform(obj) method
- """
- self.object_transformers.append(transformer)
-
- def _readStreamHeader(self):
- """
- Reads the magic header of a Java serialization stream
-
- :raise IOError: Invalid magic header (not a Java stream)
- """
- (magic, version) = self._readStruct(">HH")
- if magic != self.STREAM_MAGIC or version != self.STREAM_VERSION:
- raise IOError("The stream is not java serialized object. "
- "Invalid stream header: {0:04X}{1:04X}"
- .format(magic, version))
-
- def _read_and_exec_opcode(self, ident=0, expect=None):
- """
- Reads the next opcode, and executes its handler
-
- :param ident: Log identation level
- :param expect: A list of expected opcodes
- :return: A tuple: (opcode, result of the handler)
- :raise IOError: Read opcode is not one of the expected ones
- :raise RuntimeError: Unknown opcode
- """
- position = self.object_stream.tell()
- (opid,) = self._readStruct(">B")
- log_debug("OpCode: 0x{0:X} -- {1} (at offset 0x{2:X})"
- .format(opid, OpCodeDebug.op_id(opid), position), ident)
-
- if expect and opid not in expect:
- raise IOError(
- "Unexpected opcode 0x{0:X} -- {1} (at offset 0x{2:X})"
- .format(opid, OpCodeDebug.op_id(opid), position))
-
- try:
- handler = self.opmap[opid]
- except KeyError:
- raise RuntimeError(
- "Unknown OpCode in the stream: 0x{0:X} (at offset 0x{1:X})"
- .format(opid, position))
- else:
- return opid, handler(ident=ident)
-
- def _readStruct(self, unpack):
- """
- Reads from the input stream, using struct
-
- :param unpack: An unpack format string
- :return: The result of struct.unpack (tuple)
- :raise RuntimeError: End of stream reached during unpacking
- """
- length = struct.calcsize(unpack)
- ba = self.object_stream.read(length)
-
- if len(ba) != length:
- raise RuntimeError("Stream has been ended unexpectedly while "
- "unmarshaling.")
-
- return struct.unpack(unpack, ba)
-
- def _readString(self, length_fmt="H"):
- """
- Reads a serialized string
-
- :param length_fmt: Structure format of the string length (H or Q)
- :return: The deserialized string
- :raise RuntimeError: Unexpected end of stream
- """
- (length,) = self._readStruct(">{0}".format(length_fmt))
- ba = self.object_stream.read(length)
- return to_str(ba)
-
- def do_classdesc(self, parent=None, ident=0):
- """
- Handles a TC_CLASSDESC opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A JavaClass object
- """
- # TC_CLASSDESC className serialVersionUID newHandle classDescInfo
- # classDescInfo:
- # classDescFlags fields classAnnotation superClassDesc
- # classDescFlags:
- # (byte) // Defined in Terminal Symbols and Constants
- # fields:
- # (short) fieldDesc[count]
-
- # fieldDesc:
- # primitiveDesc
- # objectDesc
- # primitiveDesc:
- # prim_typecode fieldName
- # objectDesc:
- # obj_typecode fieldName className1
- clazz = JavaClass()
- log_debug("[classdesc]", ident)
- class_name = self._readString()
- clazz.name = class_name
- log_debug("Class name: %s" % class_name, ident)
-
- # serialVersionUID is a Java (signed) long => 8 bytes
- serialVersionUID, classDescFlags = self._readStruct(">qB")
- clazz.serialVersionUID = serialVersionUID
- clazz.flags = classDescFlags
-
- self._add_reference(clazz, ident)
-
- log_debug("Serial: 0x{0:X} / {0:d} - classDescFlags: 0x{1:X} {2}"
- .format(serialVersionUID, classDescFlags,
- OpCodeDebug.flags(classDescFlags)), ident)
- (length,) = self._readStruct(">H")
- log_debug("Fields num: 0x{0:X}".format(length), ident)
-
- clazz.fields_names = []
- clazz.fields_types = []
- for fieldId in range(length):
- (typecode,) = self._readStruct(">B")
- field_name = self._readString()
- field_type = self._convert_char_to_type(typecode)
-
- log_debug("> Reading field {0}".format(field_name), ident)
-
- if field_type == self.TYPE_ARRAY:
- _, field_type = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_STRING, self.TC_REFERENCE))
-
- if type(field_type) is not JavaString:
- raise AssertionError("Field type must be a JavaString, "
- "not {0}".format(type(field_type)))
-
- elif field_type == self.TYPE_OBJECT:
- _, field_type = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_STRING, self.TC_REFERENCE))
-
- if type(field_type) is not JavaString:
- raise AssertionError("Field type must be a JavaString, "
- "not {0}".format(type(field_type)))
-
- log_debug("< FieldName: 0x{0:X} Name:{1} Type:{2} ID:{3}"
- .format(typecode, field_name, field_type, fieldId),
- ident)
- assert field_name is not None
- assert field_type is not None
-
- clazz.fields_names.append(field_name)
- clazz.fields_types.append(field_type)
-
- if parent:
- parent.__fields = clazz.fields_names
- parent.__types = clazz.fields_types
-
- # classAnnotation
- (opid,) = self._readStruct(">B")
- log_debug("OpCode: 0x{0:X} -- {1} (classAnnotation)"
- .format(opid, OpCodeDebug.op_id(opid)), ident)
- if opid != self.TC_ENDBLOCKDATA:
- raise NotImplementedError("classAnnotation isn't implemented yet")
-
- # superClassDesc
- log_debug("Reading Super Class of {0}".format(clazz.name), ident)
- _, superclassdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_NULL, self.TC_REFERENCE))
- log_debug("Super Class for {0}: {1}"
- .format(clazz.name, str(superclassdesc)), ident)
- clazz.superclass = superclassdesc
- return clazz
-
- def do_blockdata(self, parent=None, ident=0):
- """
- Handles TC_BLOCKDATA opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A string containing the block data
- """
- # TC_BLOCKDATA (unsigned byte) (byte)[size]
- log_debug("[blockdata]", ident)
- (length,) = self._readStruct(">B")
- ba = self.object_stream.read(length)
-
- # Ensure we have an str
- return read_to_str(ba)
-
- def do_blockdata_long(self, parent=None, ident=0):
- """
- Handles TC_BLOCKDATALONG opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A string containing the block data
- """
- # TC_BLOCKDATALONG (int) (byte)[size]
- log_debug("[blockdatalong]", ident)
- (length,) = self._readStruct(">I")
- ba = self.object_stream.read(length)
-
- # Ensure we have an str
- return read_to_str(ba)
-
- def do_class(self, parent=None, ident=0):
- """
- Handles TC_CLASS opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A JavaClass object
- """
- # TC_CLASS classDesc newHandle
- log_debug("[class]", ident)
-
- # TODO: what to do with "(ClassDesc)prevObject".
- # (see 3rd line for classDesc:)
- _, classdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC,
- self.TC_NULL, self.TC_REFERENCE))
- log_debug("Classdesc: {0}".format(classdesc), ident)
- self._add_reference(classdesc, ident)
- return classdesc
-
- def do_object(self, parent=None, ident=0):
- """
- Handles a TC_OBJECT opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A JavaClass object
- """
- # TC_OBJECT classDesc newHandle classdata[] // data for each class
- java_object = JavaObject()
- log_debug("[object]", ident)
- log_debug("java_object.annotations just after instantiation: {0}"
- .format(java_object.annotations), ident)
-
- # TODO: what to do with "(ClassDesc)prevObject".
- # (see 3rd line for classDesc:)
- opcode, classdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC,
- self.TC_NULL, self.TC_REFERENCE))
- # self.TC_REFERENCE hasn't shown in spec, but actually is here
-
- # Create object
- for transformer in self.object_transformers:
- java_object = transformer.create(classdesc)
- if java_object:
- break
-
- # Store classdesc of this object
- java_object.classdesc = classdesc
-
- # Store the reference
- self._add_reference(java_object, ident)
-
- # classdata[]
-
- if classdesc.flags & self.SC_EXTERNALIZABLE \
- and not classdesc.flags & self.SC_BLOCK_DATA:
- # TODO:
- raise NotImplementedError("externalContents isn't implemented yet")
-
- if classdesc.flags & self.SC_SERIALIZABLE:
- # TODO: look at ObjectInputStream.readSerialData()
- # FIXME: Handle the SC_WRITE_METHOD flag
-
- # create megalist
- tempclass = classdesc
- megalist = []
- megatypes = []
- log_debug("Constructing class...", ident)
- while tempclass:
- log_debug("Class: {0}".format(tempclass.name), ident + 1)
- class_fields_str = ' - '.join(
- ' '.join((field_type, field_name))
- for field_type, field_name
- in zip(tempclass.fields_types, tempclass.fields_names))
- if class_fields_str:
- log_debug(class_fields_str, ident + 2)
-
- fieldscopy = tempclass.fields_names[:]
- fieldscopy.extend(megalist)
- megalist = fieldscopy
-
- fieldscopy = tempclass.fields_types[:]
- fieldscopy.extend(megatypes)
- megatypes = fieldscopy
-
- tempclass = tempclass.superclass
-
- log_debug("Values count: {0}".format(len(megalist)), ident)
- log_debug("Prepared list of values: {0}".format(megalist), ident)
- log_debug("Prepared list of types: {0}".format(megatypes), ident)
-
- for field_name, field_type in zip(megalist, megatypes):
- log_debug("Reading field: {0} - {1}"
- .format(field_type, field_name))
- res = self._read_value(field_type, ident, name=field_name)
- java_object.__setattr__(field_name, res)
-
- if classdesc.flags & self.SC_SERIALIZABLE \
- and classdesc.flags & self.SC_WRITE_METHOD \
- or classdesc.flags & self.SC_EXTERNALIZABLE \
- and classdesc.flags & self.SC_BLOCK_DATA:
- # objectAnnotation
- log_debug("java_object.annotations before: {0}"
- .format(java_object.annotations), ident)
-
- while opcode != self.TC_ENDBLOCKDATA:
- opcode, obj = self._read_and_exec_opcode(ident=ident + 1)
- # , expect=[self.TC_ENDBLOCKDATA, self.TC_BLOCKDATA,
- # self.TC_OBJECT, self.TC_NULL, self.TC_REFERENCE])
- if opcode != self.TC_ENDBLOCKDATA:
- java_object.annotations.append(obj)
-
- log_debug("objectAnnotation value: {0}".format(obj), ident)
-
- log_debug("java_object.annotations after: {0}"
- .format(java_object.annotations), ident)
-
- log_debug(">>> java_object: {0}".format(java_object), ident)
- return java_object
-
- def do_string(self, parent=None, ident=0):
- """
- Handles a TC_STRING opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A string
- """
- log_debug("[string]", ident)
- ba = JavaString(self._readString())
- self._add_reference(ba, ident)
- return ba
-
- def do_string_long(self, parent=None, ident=0):
- """
- Handles a TC_LONGSTRING opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A string
- """
- log_debug("[long string]", ident)
- ba = JavaString(self._readString("Q"))
- self._add_reference(ba, ident)
- return ba
-
- def do_array(self, parent=None, ident=0):
- """
- Handles a TC_ARRAY opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A list of deserialized objects
- """
- # TC_ARRAY classDesc newHandle (int) values[size]
- log_debug("[array]", ident)
- _, classdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC,
- self.TC_NULL, self.TC_REFERENCE))
-
- array = JavaArray(classdesc)
-
- self._add_reference(array, ident)
-
- (size,) = self._readStruct(">i")
- log_debug("size: {0}".format(size), ident)
-
- type_char = classdesc.name[0]
- assert type_char == self.TYPE_ARRAY
- type_char = classdesc.name[1]
-
- if type_char == self.TYPE_OBJECT or type_char == self.TYPE_ARRAY:
- for _ in range(size):
- _, res = self._read_and_exec_opcode(ident=ident + 1)
- log_debug("Object value: {0}".format(res), ident)
- array.append(res)
- else:
- for _ in range(size):
- res = self._read_value(type_char, ident)
- log_debug("Native value: {0}".format(res), ident)
- array.append(res)
-
- return array
-
- def do_reference(self, parent=None, ident=0):
- """
- Handles a TC_REFERENCE opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: The referenced object
- """
- (handle,) = self._readStruct(">L")
- log_debug("## Reference handle: 0x{0:X}".format(handle), ident)
- ref = self.references[handle - self.BASE_REFERENCE_IDX]
- log_debug("###-> Type: {0} - Value: {1}".format(type(ref), ref), ident)
- return ref
-
- @staticmethod
- def do_null(parent=None, ident=0):
- """
- Handles a TC_NULL opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: Always None
- """
- return None
-
- def do_enum(self, parent=None, ident=0):
- """
- Handles a TC_ENUM opcode
-
- :param parent:
- :param ident: Log indentation level
- :return: A JavaEnum object
- """
- # TC_ENUM classDesc newHandle enumConstantName
- enum = JavaEnum()
- _, classdesc = self._read_and_exec_opcode(
- ident=ident + 1,
- expect=(self.TC_CLASSDESC, self.TC_PROXYCLASSDESC,
- self.TC_NULL, self.TC_REFERENCE))
- enum.classdesc = classdesc
- self._add_reference(enum, ident)
- _, enumConstantName = self._read_and_exec_opcode(
- ident=ident + 1, expect=(self.TC_STRING, self.TC_REFERENCE))
- enum.constant = enumConstantName
- return enum
-
- @staticmethod
- def _create_hexdump(src, start_offset=0, length=16):
- """
- Prepares an hexadecimal dump string
-
- :param src: A string containing binary data
- :param start_offset: The start offset of the source
- :param length: Length of a dump line
- :return: A dump string
- """
- FILTER = ''.join((len(repr(chr(x))) == 3) and chr(x) or '.'
- for x in range(256))
- pattern = "{{0:04X}} {{1:<{0}}} {{2}}\n".format(length * 3)
-
- # Convert raw data to str (Python 3 compatibility)
- src = to_str(src, 'latin-1')
-
- result = []
- for i in range(0, len(src), length):
- s = src[i:i + length]
- hexa = ' '.join("{0:02X}".format(ord(x)) for x in s)
- printable = s.translate(FILTER)
- result.append(pattern.format(i + start_offset, hexa, printable))
-
- return ''.join(result)
-
- def _read_value(self, field_type, ident, name=""):
- """
- Reads the next value, of the given type
-
- :param field_type: A serialization typecode
- :param ident: Log indentation
- :param name: Field name (for logs)
- :return: The read value
- :raise RuntimeError: Unknown field type
- """
- if len(field_type) > 1:
- # We don't need details for arrays and objects
- field_type = field_type[0]
-
- if field_type == self.TYPE_BOOLEAN:
- (val,) = self._readStruct(">B")
- res = bool(val)
- elif field_type == self.TYPE_BYTE:
- (res,) = self._readStruct(">b")
- elif field_type == self.TYPE_SHORT:
- (res,) = self._readStruct(">h")
- elif field_type == self.TYPE_INTEGER:
- (res,) = self._readStruct(">i")
- elif field_type == self.TYPE_LONG:
- (res,) = self._readStruct(">q")
- elif field_type == self.TYPE_FLOAT:
- (res,) = self._readStruct(">f")
- elif field_type == self.TYPE_DOUBLE:
- (res,) = self._readStruct(">d")
- elif field_type == self.TYPE_OBJECT or field_type == self.TYPE_ARRAY:
- _, res = self._read_and_exec_opcode(ident=ident + 1)
- else:
- raise RuntimeError("Unknown typecode: {0}".format(field_type))
-
- log_debug("* {0} {1}: {2}".format(field_type, name, res), ident)
- return res
-
- def _convert_char_to_type(self, type_char):
- """
- Ensures a read character is a typecode.
-
- :param type_char: Read typecode
- :return: The typecode as a string (using chr)
- :raise RuntimeError: Unknown typecode
- """
- typecode = type_char
- if type(type_char) is int:
- typecode = chr(type_char)
-
- if typecode in self.TYPECODES_LIST:
- return typecode
- else:
- raise RuntimeError("Typecode {0} ({1}) isn't supported."
- .format(type_char, typecode))
-
- def _add_reference(self, obj, ident=0):
- """
- Adds a read reference to the marshaler storage
-
- :param obj: Reference to add
- :param ident: Log indentation level
- """
- log_debug("## New reference handle 0x{0:X}: {1} -> {2}"
- .format(len(self.references) + self.BASE_REFERENCE_IDX,
- type(obj).__name__, obj), ident)
- self.references.append(obj)
-
- def _oops_dump_state(self, ignore_remaining_data=False):
- """
- Log a deserialization error
-
- :param ignore_remaining_data: If True, don't log an error when
- unused trailing bytes are remaining
- """
- log_error("==Oops state dump" + "=" * (30 - 17))
- log_error("References: {0}".format(self.references))
- log_error("Stream seeking back at -16 byte (2nd line is an actual "
- "position!):")
-
- # Do not use a keyword argument
- self.object_stream.seek(-16, os.SEEK_CUR)
- position = self.object_stream.tell()
- the_rest = self.object_stream.read()
-
- if not ignore_remaining_data and len(the_rest):
- log_error("Warning!!!!: Stream still has {0} bytes left."
- .format(len(the_rest)))
- log_error(self._create_hexdump(the_rest, position))
-
- log_error("=" * 30)
-
-# ------------------------------------------------------------------------------
-
-
-class JavaObjectMarshaller(JavaObjectConstants):
- """
- Serializes objects into Java serialization format
- """
- def __init__(self, stream=None):
- """
- Sets up members
-
- :param stream: An output stream
- """
- self.object_stream = stream
- self.object_obj = None
- self.object_transformers = []
-
- def add_transformer(self, transformer):
- """
- Appends an object transformer to the serialization process
-
- :param transformer: An object with a transform(obj) method
- """
- self.object_transformers.append(transformer)
-
- def dump(self, obj):
- """
- Dumps the given object in the Java serialization format
- """
- self.object_obj = obj
- self.object_stream = BytesIO()
- self._writeStreamHeader()
- self.writeObject(obj)
- return self.object_stream.getvalue()
-
- def _writeStreamHeader(self):
- """
- Writes the Java serialization magic header in the serialization stream
- """
- self._writeStruct(">HH", 4, (self.STREAM_MAGIC, self.STREAM_VERSION))
-
- def writeObject(self, obj):
- """
- Appends an object to the serialization stream
-
- :param obj: A string or a deserialized Java object
- :raise RuntimeError: Unsupported type
- """
- log_debug("Writing object of type {0}".format(type(obj).__name__))
- if isinstance(obj, JavaArray):
- # Deserialized Java array
- self.write_array(obj)
- elif isinstance(obj, JavaEnum):
- # Deserialized Java Enum
- self.write_enum(obj)
- elif isinstance(obj, JavaObject):
- # Deserialized Java object
- self.write_object(obj)
- elif isinstance(obj, JavaString):
- # Deserialized String
- self.write_string(obj)
- elif isinstance(obj, JavaClass):
- # Java class
- self.write_class(obj)
- elif obj is None:
- # Null
- self.write_null()
- elif type(obj) is str:
- # String value
- self.write_blockdata(obj)
- else:
- # Unhandled type
- raise RuntimeError("Object serialization of type {0} is not "
- "supported.".format(type(obj)))
-
- def _writeStruct(self, unpack, length, args):
- """
- Appends data to the serialization stream
-
- :param unpack: Struct format string
- :param length: Unused
- :param args: Struct arguments
- """
- ba = struct.pack(unpack, *args)
- self.object_stream.write(ba)
-
- def _writeString(self, string):
- """
- Appends a string to the serialization stream
-
- :param string: String to serialize
- """
- # TODO: Convert to "modified UTF-8"
- # http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8
- string = to_bytes(string, "utf-8")
-
- self._writeStruct(">H", 2, (len(string),))
- self.object_stream.write(string)
-
- def write_string(self, obj):
- """
- Writes a Java string with the TC_STRING type marker
-
- :param obj: The string to print
- """
- self._writeStruct(">B", 1, (self.TC_STRING,))
- self._writeString(obj)
-
- def write_enum(self, obj):
- """
- Writes an Enum value
-
- :param obj: A JavaEnum object
- """
- self._writeStruct(">B", 1, (self.TC_ENUM,))
- self.write_classdesc(obj.get_class())
- self.write_string(obj.constant)
-
- def write_blockdata(self, obj, parent=None):
- """
- Appends a block of data to the serialization stream
-
- :param obj: String form of the data block
- """
- if type(obj) is str:
- # Latin-1: keep bytes as is
- obj = to_bytes(obj, "latin-1")
-
- length = len(obj)
- if length <= 256:
- # Small block data
- # TC_BLOCKDATA (unsigned byte) (byte)[size]
- self._writeStruct(">B", 1, (self.TC_BLOCKDATA,))
- self._writeStruct(">B", 1, (length,))
- else:
- # Large block data
- # TC_BLOCKDATALONG (unsigned int) (byte)[size]
- self._writeStruct(">B", 1, (self.TC_BLOCKDATALONG,))
- self._writeStruct(">I", 1, (length,))
-
- self.object_stream.write(obj)
-
- def write_null(self):
- """
- Writes a "null" value
- """
- self._writeStruct(">B", 1, (self.TC_NULL,))
-
- def write_object(self, obj, parent=None):
- """
- Writes an object header to the serialization stream
-
- :param obj: Not yet used
- :param parent: Not yet used
- """
- # Transform object
- for transformer in self.object_transformers:
- tmp_object = transformer.transform(obj)
- if tmp_object is not obj:
- obj = tmp_object
- break
-
- self._writeStruct(">B", 1, (self.TC_OBJECT,))
- cls = obj.get_class()
- self.write_classdesc(cls)
-
- all_names = []
- all_types = []
- tmpcls = cls
- while tmpcls:
- all_names.extend(tmpcls.fields_names)
- all_types.extend(tmpcls.fields_types)
- tmpcls = tmpcls.superclass
- del tmpcls
-
- for field_name, field_type in zip(all_names, all_types):
- try:
- self._write_value(field_type, getattr(obj, field_name))
- except AttributeError as ex:
- log_error("No attribute {0} for object {1}\nDir: {2}"
- .format(ex, repr(obj), dir(obj)))
- raise
- del all_names, all_types
-
- if cls.flags & self.SC_SERIALIZABLE \
- and cls.flags & self.SC_WRITE_METHOD \
- or cls.flags & self.SC_EXTERNALIZABLE \
- and cls.flags & self.SC_BLOCK_DATA:
- for annotation in obj.annotations:
- log_debug("Write annotation {0} for {1}"
- .format(repr(annotation), repr(obj)))
- if annotation is None:
- self.write_null()
- else:
- self.writeObject(annotation)
- self._writeStruct('>B', 1, (self.TC_ENDBLOCKDATA,))
-
- def write_class(self, obj, parent=None):
- """
- Writes a class to the stream
-
- :param obj: A JavaClass object
- :param parent:
- """
- self._writeStruct(">B", 1, (self.TC_CLASS,))
- self.write_classdesc(obj)
-
- def write_classdesc(self, obj, parent=None):
- """
- Writes a class description
-
- :param obj: Class description to write
- :param parent:
- """
- self._writeStruct(">B", 1, (self.TC_CLASSDESC,))
- self._writeString(obj.name)
- self._writeStruct(">qB", 1, (obj.serialVersionUID, obj.flags))
- self._writeStruct(">H", 1, (len(obj.fields_names),))
-
- for field_name, field_type in zip(obj.fields_names, obj.fields_types):
- self._writeStruct(
- ">B", 1, (self._convert_type_to_char(field_type),))
- self._writeString(field_name)
- if field_type[0] in (self.TYPE_OBJECT, self.TYPE_ARRAY):
- self.write_string(field_type)
-
- self._writeStruct(">B", 1, (self.TC_ENDBLOCKDATA,))
- if obj.superclass:
- self.write_classdesc(obj.superclass)
- else:
- self.write_null()
-
- def write_array(self, obj):
- """
- Writes a JavaArray
-
- :param obj: A JavaArray object
- """
- self._writeStruct(">B", 1, (self.TC_ARRAY,))
- self.write_classdesc(obj.get_class())
- self._writeStruct(">i", 1, (len(obj),))
-
- classdesc = obj.get_class()
-
- type_char = classdesc.name[0]
- assert type_char == self.TYPE_ARRAY
- type_char = classdesc.name[1]
-
- if type_char == self.TYPE_OBJECT:
- for o in obj:
- self.write_object(o)
- elif type_char == self.TYPE_ARRAY:
- for a in obj:
- self.write_array(a)
- else:
- log_debug("Write array of type %s" % type_char)
- for v in obj:
- self._write_value(type_char, v)
-
- def _write_value(self, field_type, value):
- """
- Writes an item of an array
-
- :param field_type: Value type
- :param value: The value itself
- """
- if len(field_type) > 1:
- # We don't need details for arrays and objects
- field_type = field_type[0]
-
- if field_type == self.TYPE_BOOLEAN:
- self._writeStruct(">B", 1, (1 if value else 0,))
- elif field_type == self.TYPE_BYTE:
- self._writeStruct(">B", 1, (value,))
- elif field_type == self.TYPE_SHORT:
- self._writeStruct(">h", 1, (value,))
- elif field_type == self.TYPE_INTEGER:
- self._writeStruct(">i", 1, (value,))
- elif field_type == self.TYPE_LONG:
- self._writeStruct(">q", 1, (value,))
- elif field_type == self.TYPE_FLOAT:
- self._writeStruct(">f", 1, (value,))
- elif field_type == self.TYPE_DOUBLE:
- self._writeStruct(">d", 1, (value,))
- elif field_type == self.TYPE_OBJECT or field_type == self.TYPE_ARRAY:
- if value is None:
- self.write_null()
- elif isinstance(value, JavaEnum):
- self.write_enum(value)
- elif isinstance(value, JavaObject):
- self.write_object(value)
- elif isinstance(value, JavaString):
- self.write_string(value)
- elif isinstance(value, str):
- self.write_blockdata(value)
- else:
- raise RuntimeError("Unknown typecode: {0}".format(field_type))
- else:
- raise RuntimeError("Unknown typecode: {0}".format(field_type))
-
- def _convert_type_to_char(self, type_char):
- """
- Converts the given type code to an int
-
- :param type_char: A type code character
- """
- typecode = type_char
- if type(type_char) is int:
- typecode = chr(type_char)
-
- if typecode in self.TYPECODES_LIST:
- return ord(typecode)
- elif len(typecode) > 1:
- if typecode[0] == 'L':
- return ord(self.TYPE_OBJECT)
- elif typecode[0] == '[':
- return ord(self.TYPE_ARRAY)
-
- raise RuntimeError("Typecode {0} ({1}) isn't supported."
- .format(type_char, typecode))
-
-# ------------------------------------------------------------------------------
-
-
-class DefaultObjectTransformer(object):
- """
- Default transformer for the deserialized objects.
- Converts JavaObject objects to Python types (maps, lists, ...)
- """
- class JavaList(list, JavaObject):
- def __init__(self, *args, **kwargs):
- list.__init__(self, *args, **kwargs)
- JavaObject.__init__(self)
-
- class JavaMap(dict, JavaObject):
- def __init__(self, *args, **kwargs):
- dict.__init__(self, *args, **kwargs)
- JavaObject.__init__(self)
-
- def create(self, classdesc):
- """
- Transforms a deserialized Java object into a Python object
-
- :param java_object: A JavaObject instance
- :return: The Python form of the object, or the original JavaObject
- """
-
- if classdesc.name in ("java.util.ArrayList", "java.util.LinkedList"):
- # @serialData The length of the array backing the ArrayList
- # instance is emitted (int), followed by all of its
- # elements (each an Object) in the proper order
- log_debug("---")
- log_debug(classdesc.name)
- log_debug("---")
-
- java_object = self.JavaList()
-
- log_debug(">>> java_object: {0}".format(java_object))
- return java_object
-
- if classdesc.name == "java.util.HashMap":
- log_debug("---")
- log_debug("java.util.HashMap")
- log_debug("---")
-
- java_object = self.JavaMap()
-
- log_debug(">>> java_object: {0}".format(java_object))
- return java_object
-
- # Return a JavaObject by default
- return JavaObject()
diff --git a/javaobj/__init__.py b/javaobj/__init__.py
new file mode 100644
index 0000000..d1b146d
--- /dev/null
+++ b/javaobj/__init__.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides functions for reading and writing (writing is WIP currently) Java
+objects serialized or will be deserialized by ObjectOutputStream. This form of
+object representation is a standard data interchange format in Java world.
+
+javaobj module exposes an API familiar to users of the standard library
+marshal, pickle and json modules.
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+# Imports giving access to what the javaobj module provides
+from javaobj.v1.beans import * # noqa: F401,F403
+from javaobj.v1.core import * # noqa: F401,F403
+from javaobj.v1.transformers import * # noqa: F401,F403
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
diff --git a/javaobj/constants.py b/javaobj/constants.py
new file mode 100644
index 0000000..d4dd1cb
--- /dev/null
+++ b/javaobj/constants.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""
+Definition of the constants used in the deserialization process
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import enum
+
+# ------------------------------------------------------------------------------
+
+__all__ = (
+ "PRIMITIVE_TYPES",
+ "StreamConstants",
+ "TerminalCode",
+ "ClassDescFlags",
+ "TypeCode",
+ "StreamCodeDebug",
+)
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class StreamConstants(enum.IntEnum):
+ """
+ Basic constants of the stream protocol
+ """
+
+ # Magic bytes of any serialized files
+ STREAM_MAGIC = 0xACED
+
+ # Only protocol version supported by javaobj
+ STREAM_VERSION = 0x05
+
+ # Base index for handles
+ BASE_REFERENCE_IDX = 0x7E0000
+
+
+class TerminalCode(enum.IntEnum):
+ """
+ Stream type Codes
+ """
+
+ TC_NULL = 0x70
+ TC_REFERENCE = 0x71
+ TC_CLASSDESC = 0x72
+ TC_OBJECT = 0x73
+ TC_STRING = 0x74
+ TC_ARRAY = 0x75
+ TC_CLASS = 0x76
+ TC_BLOCKDATA = 0x77
+ TC_ENDBLOCKDATA = 0x78
+ TC_RESET = 0x79
+ TC_BLOCKDATALONG = 0x7A
+ TC_EXCEPTION = 0x7B
+ TC_LONGSTRING = 0x7C
+ TC_PROXYCLASSDESC = 0x7D
+ TC_ENUM = 0x7E
+ # Ignore TC_MAX: we don't use it and it messes with TC_ENUM
+ # TC_MAX = 0x7E
+
+
+class ClassDescFlags(enum.IntEnum):
+ """
+ Class description flags
+ """
+
+ SC_WRITE_METHOD = 0x01 # if SC_SERIALIZABLE
+ SC_BLOCK_DATA = 0x08 # if SC_EXTERNALIZABLE
+ SC_SERIALIZABLE = 0x02
+ SC_EXTERNALIZABLE = 0x04
+ SC_ENUM = 0x10
+
+
+class TypeCode(enum.IntEnum):
+ """
+ Type definition chars (typecode)
+ """
+
+ # Primitive types
+ TYPE_BYTE = ord("B") # 0x42
+ TYPE_CHAR = ord("C") # 0x43
+ TYPE_DOUBLE = ord("D") # 0x44
+ TYPE_FLOAT = ord("F") # 0x46
+ TYPE_INTEGER = ord("I") # 0x49
+ TYPE_LONG = ord("J") # 0x4A
+ TYPE_SHORT = ord("S") # 0x53
+ TYPE_BOOLEAN = ord("Z") # 0x5A
+ # Object types
+ TYPE_OBJECT = ord("L") # 0x4C
+ TYPE_ARRAY = ord("[") # 0x5B
+
+
+# List of the types defined as primitive
+PRIMITIVE_TYPES = (
+ TypeCode.TYPE_BYTE,
+ TypeCode.TYPE_CHAR,
+ TypeCode.TYPE_DOUBLE,
+ TypeCode.TYPE_FLOAT,
+ TypeCode.TYPE_INTEGER,
+ TypeCode.TYPE_LONG,
+ TypeCode.TYPE_SHORT,
+ TypeCode.TYPE_BOOLEAN,
+)
+
+
+class StreamCodeDebug:
+ """
+ Codes utility methods
+ """
+
+ @staticmethod
+ def op_id(op_id):
+ # type: (int) -> str
+ """
+ Returns the name of the given OP Code
+ :param op_id: OP Code
+ :return: Name of the OP Code
+ """
+ try:
+ return TerminalCode(op_id).name
+ except ValueError:
+ return "".format(op_id)
+
+ @staticmethod
+ def type_code(type_id):
+ # type: (int) -> str
+ """
+ Returns the name of the given Type Code
+ :param type_id: Type code
+ :return: Name of the type code
+ """
+ try:
+ return TypeCode(type_id).name
+ except ValueError:
+ return "".format(type_id)
+
+ @staticmethod
+ def flags(flags):
+ # type: (int) -> str
+ """
+ Returns the names of the class description flags found in the given
+ integer
+
+ :param flags: A class description flag entry
+ :return: The flags names as a single string
+ """
+ names = sorted(key.name for key in ClassDescFlags if key & flags)
+ return ", ".join(names)
diff --git a/javaobj/modifiedutf8.py b/javaobj/modifiedutf8.py
new file mode 100644
index 0000000..ac29ce5
--- /dev/null
+++ b/javaobj/modifiedutf8.py
@@ -0,0 +1,255 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Implements the support of the Java-specific kind of UTF-8 encoding.
+
+This module is a modified version of ``py2jdbc.mutf8`` provided by
+`@guywithface `_.
+
+The project the original file comes from is available at:
+https://github.com/swstephe/py2jdbc/
+
+:authors: Scott Stephens (@swstephe), @guywithface
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+"""
+
+from __future__ import unicode_literals
+
+import sys
+
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# Encoding name: not cesu-8, which uses a different zero-byte
+NAME = "mutf8"
+
+# ------------------------------------------------------------------------------
+
+if sys.version_info[0] >= 3:
+ unicode_char = chr # pylint:disable=C0103
+
+ def byte_to_int(data):
+ # type: (bytes) -> int
+ """
+ Converts the first byte of the given data to an integer
+ """
+ if isinstance(data, int):
+ return data
+
+ if isinstance(data, bytes):
+ return data[0]
+
+ raise ValueError(
+ "Expected byte or int as input, got: {0}".format(
+ type(data).__name__
+ )
+ )
+
+
+else:
+ unicode_char = (
+ unichr # pylint:disable=C0103,undefined-variable # noqa: F821
+ )
+
+ def byte_to_int(data):
+ # type: (bytes) -> int
+ """
+ Converts the first byte of the given data to an integer
+ """
+ if isinstance(data, int):
+ return data
+
+ if isinstance(data, str):
+ return ord(data[0])
+
+ raise ValueError(
+ "Expected byte or int as input, got: {0}".format(
+ type(data).__name__
+ )
+ )
+
+
+# ------------------------------------------------------------------------------
+
+
+class DecodeMap(object): # pylint:disable=R0205
+ """
+ A utility class which manages masking, comparing and mapping in bits.
+ If the mask and compare fails, this will raise UnicodeDecodeError so
+ encode and decode will correctly handle bad characters.
+ """
+
+ def __init__(self, count, mask, value, bits):
+ """
+ Initialize a DecodeMap, entry from a static dictionary for the module.
+ It automatically calculates the mask for the bits for the value
+ (always assumed to be at the bottom of the byte).
+
+ :param count: The number of bytes in this entire sequence.
+ :param mask: The mask to apply to the byte at this position.
+ :param value: The value of masked bits, (without shifting).
+ :param bits: The number of bits.
+ """
+ self.count = count
+ self.mask = mask
+ self.value = value
+ self.bits = bits
+ self.mask2 = (1 << bits) - 1
+
+ def apply(self, byte, value, data, i, count):
+ """
+ Apply mask, compare to expected value, shift and return result.
+ Eventually, this could become a ``reduce`` function.
+
+ :param byte: The byte to compare
+ :param value: The currently accumulated value.
+ :param data: The data buffer, (array of bytes).
+ :param i: The position within the data buffer.
+ :param count: The position of this comparison.
+ :return: A new value with the bits merged in.
+ :raises UnicodeDecodeError: if marked bits don't match.
+ """
+ if byte & self.mask == self.value:
+ value <<= self.bits
+ value |= byte & self.mask2
+ else:
+ raise UnicodeDecodeError(
+ NAME,
+ data,
+ i,
+ i + count,
+ "invalid {}-byte sequence".format(self.count),
+ )
+ return value
+
+ def __repr__(self):
+ return "DecodeMap({})".format(
+ ", ".join(
+ "{}=0x{:02x}".format(n, getattr(self, n))
+ for n in ("count", "mask", "value", "bits", "mask2")
+ )
+ )
+
+
+DECODER_MAP = {
+ 2: ((0xC0, 0x80, 6),),
+ 3: ((0xC0, 0x80, 6), (0xC0, 0x80, 6)),
+ 6: (
+ (0xF0, 0xA0, 4),
+ (0xC0, 0x80, 6),
+ (0xFF, 0xED, 0),
+ (0xF0, 0xB0, 4),
+ (0xC0, 0x80, 6),
+ ),
+}
+
+DECODE_MAP = dict(
+ (k, tuple(DecodeMap(k, *vv) for vv in v)) for k, v in DECODER_MAP.items()
+)
+
+
+def decoder(data):
+ """
+ This generator processes a sequence of bytes in Modified UTF-8 encoding
+ and produces a sequence of unicode string characters.
+
+ It takes bits from the byte until it matches one of the known encoding
+ sequences.
+ It uses ``DecodeMap`` to mask, compare and generate values.
+
+ :param data: a string of bytes in Modified UTF-8 encoding.
+ :return: a generator producing a string of unicode characters
+ :raises UnicodeDecodeError: unrecognised byte in sequence encountered.
+ """
+
+ def next_byte(_it, start, count):
+ try:
+ return next(_it)[1]
+ except StopIteration:
+ raise UnicodeDecodeError(
+ NAME, data, start, start + count, "incomplete byte sequence"
+ )
+
+ it = iter(enumerate(data))
+ for i, d in it:
+ if d == 0x00: # 00000000
+ raise UnicodeDecodeError(
+ NAME, data, i, i + 1, "embedded zero-byte not allowed"
+ )
+
+ if d & 0x80: # 1xxxxxxx
+ if d & 0x40: # 11xxxxxx
+ if d & 0x20: # 111xxxxx
+ if d & 0x10: # 1111xxxx
+ raise UnicodeDecodeError(
+ NAME, data, i, i + 1, "invalid encoding character"
+ )
+
+ if d == 0xED:
+ value = 0
+ for i1, dm in enumerate(DECODE_MAP[6]):
+ d1 = next_byte(it, i, i1 + 1)
+ value = dm.apply(d1, value, data, i, i1 + 1)
+ else: # 1110xxxx
+ value = d & 0x0F
+ for i1, dm in enumerate(DECODE_MAP[3]):
+ d1 = next_byte(it, i, i1 + 1)
+ value = dm.apply(d1, value, data, i, i1 + 1)
+ else: # 110xxxxx
+ value = d & 0x1F
+ for i1, dm in enumerate(DECODE_MAP[2]):
+ d1 = next_byte(it, i, i1 + 1)
+ value = dm.apply(d1, value, data, i, i1 + 1)
+ else: # 10xxxxxx
+ raise UnicodeDecodeError(
+ NAME, data, i, i + 1, "misplaced continuation character"
+ )
+ else: # 0xxxxxxx
+ value = d
+ # noinspection PyCompatibility
+ yield mutf8_unichr(value)
+
+
+def decode_modified_utf8(data, errors="strict"):
+ """
+ Decodes a sequence of bytes to a unicode text and length using
+ Modified UTF-8.
+ This function is designed to be used with Python ``codecs`` module.
+
+ :param data: a string of bytes in Modified UTF-8
+ :param errors: handle decoding errors
+ :return: unicode text and length
+ :raises UnicodeDecodeError: sequence is invalid.
+ """
+ value, length = "", 0
+ it = iter(decoder(byte_to_int(d) for d in data))
+ while True:
+ try:
+ value += next(it)
+ length += 1
+ except StopIteration:
+ break
+ except UnicodeDecodeError as e:
+ if errors == "strict":
+ raise e
+
+ if errors == "ignore":
+ pass
+ elif errors == "replace":
+ value += "\uFFFD"
+ length += 1
+ return value, length
+
+
+def mutf8_unichr(value):
+ """
+ Mimics Python 2 unichr() and Python 3 chr()
+ """
+ return unicode_char(value)
diff --git a/javaobj/utils.py b/javaobj/utils.py
new file mode 100644
index 0000000..2d6f761
--- /dev/null
+++ b/javaobj/utils.py
@@ -0,0 +1,276 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides utility methods used by the core implementation of javaobj.
+
+Namely: logging methods, bytes/str/unicode converters
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+# Standard library
+from typing import IO, Tuple # noqa: F401
+import gzip
+import logging
+import os
+import struct
+import sys
+
+# Modified UTF-8 parser
+from .modifiedutf8 import byte_to_int, decode_modified_utf8
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+# Setup the logger
+_log = logging.getLogger("javaobj")
+
+
+def log_debug(message, ident=0):
+ """
+ Logs a message at debug level
+
+ :param message: Message to log
+ :param ident: Number of indentation spaces
+ """
+ _log.debug("%s%s", " " * (ident * 2), message)
+
+
+def log_error(message, ident=0):
+ """
+ Logs a message at error level
+
+ :param message: Message to log
+ :param ident: Number of indentation spaces
+ """
+ _log.error("%s%s", " " * (ident * 2), message)
+
+
+# ------------------------------------------------------------------------------
+
+
+def read_struct(data, fmt_str):
+ # type: (bytes, str) -> Tuple
+ """
+ Reads input bytes and extract the given structure. Returns both the read
+ elements and the remaining data
+
+ :param data: Data as bytes
+ :param fmt_str: Struct unpack format string
+ :return: A tuple (results as tuple, remaining data)
+ """
+ size = struct.calcsize(fmt_str)
+ return struct.unpack(fmt_str, data[:size]), data[size:]
+
+
+def read_string(data, length_fmt="H"):
+ # type: (bytes, str) -> Tuple[UNICODE_TYPE, bytes]
+ """
+ Reads a serialized string
+
+ :param data: Bytes where to read the string from
+ :param length_fmt: Structure format of the string length (H or Q)
+ :return: The deserialized string
+ """
+ (length,), data = read_struct(data, ">{0}".format(length_fmt))
+ ba, data = data[:length], data[length:]
+ return to_unicode(ba), data
+
+
+# ------------------------------------------------------------------------------
+
+
+def java_data_fd(original_df):
+ # type: (IO[bytes]) -> IO[bytes]
+ """
+ Ensures that the input file descriptor contains a Java serialized content.
+ Automatically uncompresses GZipped data
+
+ :param original_df: Input file descriptor
+ :return: Input file descriptor or a fake one to access uncompressed data
+ :raise IOError: Error reading input file
+ """
+ # Read the first bytes
+ start_idx = original_df.tell()
+ magic_header = [byte_to_int(x) for x in original_df.read(2)] # type: ignore
+ original_df.seek(start_idx, os.SEEK_SET)
+
+ if magic_header[0] == 0xAC:
+ # Consider we have a raw seralized stream: use it
+ original_df.seek(start_idx, os.SEEK_SET)
+ return original_df
+ elif magic_header[0] == 0x1F and magic_header[1] == 0x8B:
+ # Open the GZip file
+ return gzip.GzipFile(fileobj=original_df, mode="rb") # type: ignore
+ else:
+ # Let the parser raise the error
+ return original_df
+
+
+# ------------------------------------------------------------------------------
+
+
+def hexdump(src, start_offset=0, length=16):
+ # type: (str, int, int) -> str
+ """
+ Prepares an hexadecimal dump string
+
+ :param src: A string containing binary data
+ :param start_offset: The start offset of the source
+ :param length: Length of a dump line
+ :return: A dump string
+ """
+ hex_filter = "".join(
+ (len(repr(chr(x))) == 3) and chr(x) or "." for x in range(256)
+ )
+ pattern = "{{0:04X}} {{1:<{0}}} {{2}}\n".format(length * 3)
+
+ # Convert raw data to str (Python 3 compatibility)
+ src = to_str(src, "latin-1")
+
+ result = []
+ for i in range(0, len(src), length):
+ s = src[i : i + length]
+ hexa = " ".join("{0:02X}".format(ord(x)) for x in s)
+ printable = s.translate(hex_filter)
+ result.append(pattern.format(i + start_offset, hexa, printable))
+
+ return "".join(result)
+
+
+# ------------------------------------------------------------------------------
+
+
+if sys.version_info[0] >= 3:
+ BYTES_TYPE = bytes # pylint:disable=C0103
+ UNICODE_TYPE = str # pylint:disable=C0103
+ unicode_char = chr # pylint:disable=C0103
+
+ def bytes_char(c):
+ """
+ Converts the given character to a bytes string
+ """
+ return bytes((c,))
+
+ # Python 3 interpreter : bytes & str
+ def to_bytes(data, encoding="UTF-8"):
+ """
+ Converts the given string to an array of bytes.
+ Returns the first parameter if it is already an array of bytes.
+
+ :param data: A unicode string
+ :param encoding: The encoding of data
+ :return: The corresponding array of bytes
+ """
+ if type(data) is bytes: # pylint:disable=C0123
+ # Nothing to do
+ return data
+ return data.encode(encoding)
+
+ def to_str(data, encoding="UTF-8"):
+ """
+ Converts the given parameter to a string.
+ Returns the first parameter if it is already an instance of ``str``.
+
+ :param data: A string
+ :param encoding: The encoding of data
+ :return: The corresponding string
+ """
+ if type(data) is str: # pylint:disable=C0123
+ # Nothing to do
+ return data
+ try:
+ return str(data, encoding)
+ except UnicodeDecodeError:
+ return decode_modified_utf8(data)[0]
+
+ # Same operation
+ to_unicode = to_str # pylint:disable=C0103
+
+ def read_to_str(data):
+ """
+ Concats all bytes into a string
+ """
+ return "".join(chr(char) for char in data)
+
+
+else:
+ BYTES_TYPE = str # pylint:disable=C0103
+ UNICODE_TYPE = (
+ unicode # pylint:disable=C0103,undefined-variable # noqa: F821
+ )
+ unicode_char = (
+ unichr # pylint:disable=C0103,undefined-variable # noqa: F821
+ )
+ bytes_char = chr # pylint:disable=C0103
+
+ # Python 2 interpreter : str & unicode
+ def to_str(data, encoding="UTF-8"):
+ """
+ Converts the given parameter to a string.
+ Returns the first parameter if it is already an instance of ``str``.
+
+ :param data: A string
+ :param encoding: The encoding of data
+ :return: The corresponding string
+ """
+ if type(data) is str: # pylint:disable=C0123
+ # Nothing to do
+ return data
+ return data.encode(encoding)
+
+ # Same operation
+ to_bytes = to_str # pylint:disable=C0103
+
+ # Python 2 interpreter : str & unicode
+ def to_unicode(data, encoding="UTF-8"):
+ """
+ Converts the given parameter to a string.
+ Returns the first parameter if it is already an instance of ``str``.
+
+ :param data: A string
+ :param encoding: The encoding of data
+ :return: The corresponding string
+ """
+ if type(data) is UNICODE_TYPE: # pylint:disable=C0123
+ # Nothing to do
+ return data
+ try:
+ return data.decode(encoding)
+ except UnicodeDecodeError:
+ return decode_modified_utf8(data)[0]
+
+ def read_to_str(data):
+ """
+ Nothing to do in Python 2
+ """
+ return data
diff --git a/javaobj/v1/__init__.py b/javaobj/v1/__init__.py
new file mode 100644
index 0000000..cc4aaaa
--- /dev/null
+++ b/javaobj/v1/__init__.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+"""
+First version of the un-marshalling process of javaobj.
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from . import beans, core, transformers # noqa: F401
+from .core import ( # noqa: F401
+ load,
+ loads,
+ dumps,
+ JavaObjectMarshaller,
+ JavaObjectUnmarshaller,
+)
+from .transformers import DefaultObjectTransformer # noqa: F401
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
diff --git a/javaobj/v1/beans.py b/javaobj/v1/beans.py
new file mode 100644
index 0000000..bf867bb
--- /dev/null
+++ b/javaobj/v1/beans.py
@@ -0,0 +1,225 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Definition of the beans of the v1 parser
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+from typing import List
+import struct
+
+from ..utils import UNICODE_TYPE
+
+# ------------------------------------------------------------------------------
+
+__all__ = (
+ "JavaArray",
+ "JavaByteArray",
+ "JavaClass",
+ "JavaEnum",
+ "JavaObject",
+ "JavaString",
+)
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class JavaClass(object): # pylint:disable=R0205
+ """
+ Represents a class in the Java world
+ """
+
+ def __init__(self):
+ """
+ Sets up members
+ """
+ self.name = None # type: str
+ self.serialVersionUID = None # type: int # pylint:disable=C0103
+ self.flags = None # type: int
+ self.fields_names = [] # type: List[str]
+ self.fields_types = [] # type: List[JavaString]
+ self.superclass = None # type: JavaClass
+
+ def __str__(self):
+ """
+ String representation of the Java class
+ """
+ return self.__repr__()
+
+ def __repr__(self):
+ """
+ String representation of the Java class
+ """
+ return "[{0:s}:0x{1:X}]".format(self.name, self.serialVersionUID)
+
+ def __eq__(self, other):
+ """
+ Equality test between two Java classes
+
+ :param other: Other JavaClass to test
+ :return: True if both classes share the same fields and name
+ """
+ if not isinstance(other, type(self)):
+ return False
+
+ return (
+ self.name == other.name
+ and self.serialVersionUID == other.serialVersionUID
+ and self.flags == other.flags
+ and self.fields_names == other.fields_names
+ and self.fields_types == other.fields_types
+ and self.superclass == other.superclass
+ )
+
+
+class JavaObject(object): # pylint:disable=R0205
+ """
+ Represents a deserialized non-primitive Java object
+ """
+
+ def __init__(self):
+ """
+ Sets up members
+ """
+ self.classdesc = None # type: JavaClass
+ self.annotations = []
+
+ def get_class(self):
+ """
+ Returns the JavaClass that defines the type of this object
+ """
+ return self.classdesc
+
+ def __str__(self):
+ """
+ String representation
+ """
+ return self.__repr__()
+
+ def __repr__(self):
+ """
+ String representation
+ """
+ name = "UNKNOWN"
+ if self.classdesc:
+ name = self.classdesc.name
+ return "".format(name)
+
+ def __hash__(self):
+ """
+ Each JavaObject we load must have a hash method to be accepted in sets
+ and alike. The default hash is the memory address of the object.
+ """
+ return id(self)
+
+ def __eq__(self, other):
+ """
+ Equality test between two Java classes
+
+ :param other: Other JavaClass to test
+ :return: True if both classes share the same fields and name
+ """
+ if not isinstance(other, type(self)):
+ return False
+
+ res = (
+ self.classdesc == other.classdesc
+ and self.annotations == other.annotations
+ )
+ if not res:
+ return False
+
+ for name in self.classdesc.fields_names:
+ if not getattr(self, name) == getattr(other, name):
+ return False
+ return True
+
+
+class JavaString(UNICODE_TYPE):
+ """
+ Represents a Java String
+ """
+
+ def __hash__(self):
+ return UNICODE_TYPE.__hash__(self)
+
+ def __eq__(self, other):
+ if not isinstance(other, UNICODE_TYPE):
+ return False
+ return UNICODE_TYPE.__eq__(self, other)
+
+
+class JavaEnum(JavaObject):
+ """
+ Represents a Java enumeration
+ """
+
+ def __init__(self, constant=None):
+ super(JavaEnum, self).__init__()
+ self.constant = constant
+
+
+class JavaArray(list, JavaObject):
+ """
+ Represents a Java Array
+ """
+
+ def __init__(self, classdesc=None):
+ list.__init__(self)
+ JavaObject.__init__(self)
+ self.classdesc = classdesc
+
+ def __hash__(self):
+ return list.__hash__(self)
+
+
+class JavaByteArray(JavaObject):
+ """
+ Represents the special case of Java Array which contains bytes
+ """
+
+ def __init__(self, data, classdesc=None):
+ JavaObject.__init__(self)
+ self._data = struct.unpack("b" * len(data), data)
+ self.classdesc = classdesc
+
+ def __str__(self):
+ return "JavaByteArray({0})".format(self._data)
+
+ def __getitem__(self, item):
+ return self._data[item]
+
+ def __iter__(self):
+ return iter(self._data)
+
+ def __len__(self):
+ return len(self._data)
diff --git a/javaobj/v1/core.py b/javaobj/v1/core.py
new file mode 100644
index 0000000..ae5eeb5
--- /dev/null
+++ b/javaobj/v1/core.py
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides functions for reading and writing (writing is WIP currently) Java
+objects serialized or will be deserialized by ObjectOutputStream. This form of
+object representation is a standard data interchange format in Java world.
+
+javaobj module exposes an API familiar to users of the standard library
+marshal, pickle and json modules.
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+# Standard library
+try:
+ # Python 2
+ from StringIO import StringIO as BytesIO
+except ImportError:
+ # Python 3+
+ from io import BytesIO
+
+# Javaobj modules
+from .marshaller import JavaObjectMarshaller
+from .unmarshaller import JavaObjectUnmarshaller
+from .transformers import DefaultObjectTransformer
+from ..utils import java_data_fd
+
+# ------------------------------------------------------------------------------
+
+__all__ = (
+ "__version_info__",
+ "__version__",
+ "JavaObjectMarshaller",
+ "JavaObjectUnmarshaller",
+ "dumps",
+ "load",
+ "loads",
+)
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+def load(file_object, *transformers, **kwargs):
+ """
+ Deserializes Java primitive data and objects serialized using
+ ObjectOutputStream from a file-like object.
+
+ :param file_object: A file-like object
+ :param transformers: Custom transformers to use
+ :param ignore_remaining_data: If True, don't log an error when unused
+ trailing bytes are remaining
+ :return: The deserialized object
+ """
+ # Check file format (uncompress if necessary)
+ file_object = java_data_fd(file_object)
+
+ # Read keyword argument
+ ignore_remaining_data = kwargs.get("ignore_remaining_data", False)
+
+ marshaller = JavaObjectUnmarshaller(
+ file_object, kwargs.get("use_numpy_arrays", False)
+ )
+
+ # Add custom transformers first
+ for transformer in transformers:
+ marshaller.add_transformer(transformer)
+ marshaller.add_transformer(DefaultObjectTransformer())
+
+ # Read the file object
+ return marshaller.readObject(ignore_remaining_data=ignore_remaining_data)
+
+
+def loads(string, *transformers, **kwargs):
+ """
+ Deserializes Java objects and primitive data serialized using
+ ObjectOutputStream from a string.
+
+ :param string: A Java data string
+ :param transformers: Custom transformers to use
+ :param ignore_remaining_data: If True, don't log an error when unused
+ trailing bytes are remaining
+ :return: The deserialized object
+ """
+ # Reuse the load method (avoid code duplication)
+ return load(BytesIO(string), *transformers, **kwargs)
+
+
+def dumps(obj, *transformers):
+ """
+ Serializes Java primitive data and objects unmarshaled by load(s) before
+ into string.
+
+ :param obj: A Python primitive object, or one loaded using load(s)
+ :param transformers: Custom transformers to use
+ :return: The serialized data as a string
+ """
+ marshaller = JavaObjectMarshaller()
+ # Add custom transformers
+ for transformer in transformers:
+ marshaller.add_transformer(transformer)
+
+ return marshaller.dump(obj)
diff --git a/javaobj/v1/marshaller.py b/javaobj/v1/marshaller.py
new file mode 100644
index 0000000..9e5bdeb
--- /dev/null
+++ b/javaobj/v1/marshaller.py
@@ -0,0 +1,574 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides functions for writing (writing is WIP currently) Java
+objects that will be deserialized by ObjectOutputStream. This form of
+object representation is a standard data interchange format in Java world.
+
+javaobj module exposes an API familiar to users of the standard library
+marshal, pickle and json modules.
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+# Standard library
+import collections
+import logging
+import struct
+
+try:
+ # Python 2
+ from StringIO import StringIO as BytesIO
+except ImportError:
+ # Python 3+
+ from io import BytesIO
+
+# Javaobj modules
+from .beans import (
+ JavaClass,
+ JavaString,
+ JavaObject,
+ JavaByteArray,
+ JavaEnum,
+ JavaArray,
+)
+from ..constants import (
+ StreamConstants,
+ ClassDescFlags,
+ TerminalCode,
+ TypeCode,
+)
+from ..utils import (
+ log_debug,
+ log_error,
+ to_bytes,
+ BYTES_TYPE,
+ UNICODE_TYPE,
+)
+
+# ------------------------------------------------------------------------------
+
+__all__ = ("JavaObjectMarshaller",)
+
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class JavaObjectMarshaller:
+ """
+ Serializes objects into Java serialization format
+ """
+
+ def __init__(self, stream=None):
+ """
+ Sets up members
+
+ :param stream: An output stream
+ """
+ self.object_stream = stream
+ self.object_obj = None
+ self.object_transformers = []
+ self.references = []
+
+ def add_transformer(self, transformer):
+ """
+ Appends an object transformer to the serialization process
+
+ :param transformer: An object with a transform(obj) method
+ """
+ self.object_transformers.append(transformer)
+
+ def dump(self, obj):
+ """
+ Dumps the given object in the Java serialization format
+ """
+ self.references = []
+ self.object_obj = obj
+ self.object_stream = BytesIO()
+ self._writeStreamHeader()
+ self.writeObject(obj)
+ return self.object_stream.getvalue()
+
+ def _writeStreamHeader(self): # pylint:disable=C0103
+ """
+ Writes the Java serialization magic header in the serialization stream
+ """
+ self._writeStruct(
+ ">HH",
+ 4,
+ (StreamConstants.STREAM_MAGIC, StreamConstants.STREAM_VERSION),
+ )
+
+ def writeObject(self, obj): # pylint:disable=C0103
+ """
+ Appends an object to the serialization stream
+
+ :param obj: A string or a deserialized Java object
+ :raise RuntimeError: Unsupported type
+ """
+ log_debug("Writing object of type {0}".format(type(obj).__name__))
+ if isinstance(obj, JavaArray):
+ # Deserialized Java array
+ self.write_array(obj)
+ elif isinstance(obj, JavaByteArray):
+ # Deserialized Java byte array
+ self.write_array(obj)
+ elif isinstance(obj, JavaEnum):
+ # Deserialized Java Enum
+ self.write_enum(obj)
+ elif isinstance(obj, JavaObject):
+ # Deserialized Java object
+ self.write_object(obj)
+ elif isinstance(obj, JavaString):
+ # Deserialized String
+ self.write_string(obj)
+ elif isinstance(obj, JavaClass):
+ # Java class
+ self.write_class(obj)
+ elif obj is None:
+ # Null
+ self.write_null()
+ elif type(obj) is str: # pylint:disable=C0123
+ # String value
+ self.write_blockdata(obj)
+ else:
+ # Unhandled type
+ raise RuntimeError(
+ "Object serialization of type {0} is not "
+ "supported.".format(type(obj))
+ )
+
+ def _writeStruct(self, unpack, length, args): # pylint:disable=C0103
+ """
+ Appends data to the serialization stream
+
+ :param unpack: Struct format string
+ :param length: Unused
+ :param args: Struct arguments
+ """
+ ba = struct.pack(unpack, *args)
+ self.object_stream.write(ba)
+
+ def _writeString(self, obj, use_reference=True): # pylint:disable=C0103
+ """
+ Appends a string to the serialization stream
+
+ :param obj: String to serialize
+ :param use_reference: If True, allow writing a reference
+ """
+ # TODO: Convert to "modified UTF-8"
+ # http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8
+ string = to_bytes(obj, "utf-8")
+
+ if use_reference and isinstance(obj, JavaString):
+ try:
+ idx = self.references.index(obj)
+ except ValueError:
+ # First appearance of the string
+ self.references.append(obj)
+ logging.debug(
+ "*** Adding ref 0x%X for string: %s",
+ len(self.references)
+ - 1
+ + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+
+ self._writeStruct(">H", 2, (len(string),))
+ self.object_stream.write(string)
+ else:
+ # Write a reference to the previous type
+ logging.debug(
+ "*** Reusing ref 0x%X for string: %s",
+ idx + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+ self.write_reference(idx)
+ else:
+ self._writeStruct(">H", 2, (len(string),))
+ self.object_stream.write(string)
+
+ def write_string(self, obj, use_reference=True):
+ """
+ Writes a Java string with the TC_STRING type marker
+
+ :param obj: The string to print
+ :param use_reference: If True, allow writing a reference
+ """
+ if use_reference and isinstance(obj, JavaString):
+ try:
+ idx = self.references.index(obj)
+ except ValueError:
+ # String is not referenced: let _writeString store it
+ self._writeStruct(">B", 1, (TerminalCode.TC_STRING,))
+ self._writeString(obj, use_reference)
+ else:
+ # Reuse the referenced string
+ logging.debug(
+ "*** Reusing ref 0x%X for String: %s",
+ idx + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+ self.write_reference(idx)
+ else:
+ # Don't use references
+ self._writeStruct(">B", 1, (TerminalCode.TC_STRING,))
+ self._writeString(obj, use_reference)
+
+ def write_enum(self, obj):
+ """
+ Writes an Enum value
+
+ :param obj: A JavaEnum object
+ """
+ # FIXME: the output doesn't have the same references as the real
+ # serializable form
+ self._writeStruct(">B", 1, (TerminalCode.TC_ENUM,))
+
+ try:
+ idx = self.references.index(obj)
+ except ValueError:
+ # New reference
+ self.references.append(obj)
+ logging.debug(
+ "*** Adding ref 0x%X for enum: %s",
+ len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+
+ self.write_classdesc(obj.get_class())
+ else:
+ self.write_reference(idx)
+
+ self.write_string(obj.constant)
+
+ def write_blockdata(self, obj, parent=None): # pylint:disable=W0613
+ """
+ Appends a block of data to the serialization stream
+
+ :param obj: String form of the data block
+ """
+ if isinstance(obj, UNICODE_TYPE):
+ # Latin-1: keep bytes as is
+ obj = to_bytes(obj, "latin-1")
+
+ length = len(obj)
+ if length <= 256:
+ # Small block data
+ # TC_BLOCKDATA (unsigned byte) (byte)[size]
+ self._writeStruct(">B", 1, (TerminalCode.TC_BLOCKDATA,))
+ self._writeStruct(">B", 1, (length,))
+ else:
+ # Large block data
+ # TC_BLOCKDATALONG (unsigned int) (byte)[size]
+ self._writeStruct(">B", 1, (TerminalCode.TC_BLOCKDATALONG,))
+ self._writeStruct(">I", 1, (length,))
+
+ self.object_stream.write(obj)
+
+ def write_null(self):
+ """
+ Writes a "null" value
+ """
+ self._writeStruct(">B", 1, (TerminalCode.TC_NULL,))
+
+ def write_object(self, obj, parent=None):
+ """
+ Writes an object header to the serialization stream
+
+ :param obj: Not yet used
+ :param parent: Not yet used
+ """
+ # Transform object
+ for transformer in self.object_transformers:
+ tmp_object = transformer.transform(obj)
+ if tmp_object is not obj:
+ obj = tmp_object
+ break
+
+ self._writeStruct(">B", 1, (TerminalCode.TC_OBJECT,))
+ cls = obj.get_class()
+ self.write_classdesc(cls)
+
+ # Add reference
+ self.references.append([])
+ logging.debug(
+ "*** Adding ref 0x%X for object %s",
+ len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX,
+ obj,
+ )
+
+ all_names = collections.deque()
+ all_types = collections.deque()
+ tmpcls = cls
+ while tmpcls:
+ all_names.extendleft(reversed(tmpcls.fields_names))
+ all_types.extendleft(reversed(tmpcls.fields_types))
+ tmpcls = tmpcls.superclass
+ del tmpcls
+
+ logging.debug("<=> Field names: %s", all_names)
+ logging.debug("<=> Field types: %s", all_types)
+
+ for field_name, field_type in zip(all_names, all_types):
+ try:
+ logging.debug(
+ "Writing field %s (%s): %s",
+ field_name,
+ field_type,
+ getattr(obj, field_name),
+ )
+ self._write_value(field_type, getattr(obj, field_name))
+ except AttributeError as ex:
+ log_error(
+ "No attribute {0} for object {1}\nDir: {2}".format(
+ ex, repr(obj), dir(obj)
+ )
+ )
+ raise
+ del all_names, all_types
+
+ if (
+ cls.flags & ClassDescFlags.SC_SERIALIZABLE
+ and cls.flags & ClassDescFlags.SC_WRITE_METHOD
+ or cls.flags & ClassDescFlags.SC_EXTERNALIZABLE
+ and cls.flags & ClassDescFlags.SC_BLOCK_DATA
+ ):
+ for annotation in obj.annotations:
+ log_debug(
+ "Write annotation {0} for {1}".format(
+ repr(annotation), repr(obj)
+ )
+ )
+ if annotation is None:
+ self.write_null()
+ else:
+ self.writeObject(annotation)
+ self._writeStruct(">B", 1, (TerminalCode.TC_ENDBLOCKDATA,))
+
+ def write_class(self, obj, parent=None): # pylint:disable=W0613
+ """
+ Writes a class to the stream
+
+ :param obj: A JavaClass object
+ :param parent:
+ """
+ self._writeStruct(">B", 1, (TerminalCode.TC_CLASS,))
+ self.write_classdesc(obj)
+
+ def write_classdesc(self, obj, parent=None): # pylint:disable=W0613
+ """
+ Writes a class description
+
+ :param obj: Class description to write
+ :param parent:
+ """
+ if obj not in self.references:
+ # Add reference
+ self.references.append(obj)
+ logging.debug(
+ "*** Adding ref 0x%X for classdesc %s",
+ len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX,
+ obj.name,
+ )
+
+ self._writeStruct(">B", 1, (TerminalCode.TC_CLASSDESC,))
+ self._writeString(obj.name)
+ self._writeStruct(">qB", 1, (obj.serialVersionUID, obj.flags))
+ self._writeStruct(">H", 1, (len(obj.fields_names),))
+
+ for field_name, field_type in zip(
+ obj.fields_names, obj.fields_types
+ ):
+ self._writeStruct(
+ ">B", 1, (self._convert_type_to_char(field_type),)
+ )
+ self._writeString(field_name)
+ if ord(field_type[0]) in (
+ TypeCode.TYPE_OBJECT,
+ TypeCode.TYPE_ARRAY,
+ ):
+ try:
+ idx = self.references.index(field_type)
+ except ValueError:
+ # First appearance of the type
+ self.references.append(field_type)
+ logging.debug(
+ "*** Adding ref 0x%X for field type %s",
+ len(self.references)
+ - 1
+ + StreamConstants.BASE_REFERENCE_IDX,
+ field_type,
+ )
+
+ self.write_string(field_type, False)
+ else:
+ # Write a reference to the previous type
+ logging.debug(
+ "*** Reusing ref 0x%X for %s (%s)",
+ idx + StreamConstants.BASE_REFERENCE_IDX,
+ field_type,
+ field_name,
+ )
+ self.write_reference(idx)
+
+ self._writeStruct(">B", 1, (TerminalCode.TC_ENDBLOCKDATA,))
+ if obj.superclass:
+ self.write_classdesc(obj.superclass)
+ else:
+ self.write_null()
+ else:
+ # Use reference
+ self.write_reference(self.references.index(obj))
+
+ def write_reference(self, ref_index):
+ """
+ Writes a reference
+ :param ref_index: Local index (0-based) to the reference
+ """
+ self._writeStruct(
+ ">BL",
+ 1,
+ (
+ TerminalCode.TC_REFERENCE,
+ ref_index + StreamConstants.BASE_REFERENCE_IDX,
+ ),
+ )
+
+ def write_array(self, obj):
+ """
+ Writes a JavaArray
+
+ :param obj: A JavaArray object
+ """
+ classdesc = obj.get_class()
+ self._writeStruct(">B", 1, (TerminalCode.TC_ARRAY,))
+ self.write_classdesc(classdesc)
+ self._writeStruct(">i", 1, (len(obj),))
+
+ # Add reference
+ self.references.append(obj)
+ logging.debug(
+ "*** Adding ref 0x%X for array []",
+ len(self.references) - 1 + StreamConstants.BASE_REFERENCE_IDX,
+ )
+
+ array_type_code = TypeCode(ord(classdesc.name[0]))
+ assert array_type_code == TypeCode.TYPE_ARRAY
+ type_code = TypeCode(ord(classdesc.name[1]))
+
+ if type_code == TypeCode.TYPE_OBJECT:
+ for o in obj:
+ self._write_value(classdesc.name[1:], o)
+ elif type_code == TypeCode.TYPE_ARRAY:
+ for a in obj:
+ self.write_array(a)
+ else:
+ log_debug("Write array of type {0}".format(chr(type_code.value)))
+ for v in obj:
+ log_debug("Writing: %s" % v)
+ self._write_value(type_code, v)
+
+ def _write_value(self, raw_field_type, value):
+ """
+ Writes an item of an array
+
+ :param raw_field_type: Value type
+ :param value: The value itself
+ """
+ if isinstance(raw_field_type, (TypeCode, int)):
+ field_type = raw_field_type
+ else:
+ # We don't need details for arrays and objects
+ field_type = TypeCode(ord(raw_field_type[0]))
+
+ if field_type == TypeCode.TYPE_BOOLEAN:
+ self._writeStruct(">B", 1, (1 if value else 0,))
+ elif field_type == TypeCode.TYPE_BYTE:
+ self._writeStruct(">b", 1, (value,))
+ elif field_type == TypeCode.TYPE_CHAR:
+ self._writeStruct(">H", 1, (ord(value),))
+ elif field_type == TypeCode.TYPE_SHORT:
+ self._writeStruct(">h", 1, (value,))
+ elif field_type == TypeCode.TYPE_INTEGER:
+ self._writeStruct(">i", 1, (value,))
+ elif field_type == TypeCode.TYPE_LONG:
+ self._writeStruct(">q", 1, (value,))
+ elif field_type == TypeCode.TYPE_FLOAT:
+ self._writeStruct(">f", 1, (value,))
+ elif field_type == TypeCode.TYPE_DOUBLE:
+ self._writeStruct(">d", 1, (value,))
+ elif field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY):
+ if value is None:
+ self.write_null()
+ elif isinstance(value, JavaEnum):
+ self.write_enum(value)
+ elif isinstance(value, (JavaArray, JavaByteArray)):
+ self.write_array(value)
+ elif isinstance(value, JavaObject):
+ self.write_object(value)
+ elif isinstance(value, JavaString):
+ self.write_string(value)
+ elif isinstance(value, JavaClass):
+ self.write_class(value)
+ elif isinstance(value, (BYTES_TYPE, UNICODE_TYPE)):
+ self.write_blockdata(value)
+ else:
+ raise RuntimeError("Unknown typecode: {0}".format(field_type))
+ else:
+ raise RuntimeError("Unknown typecode: {0}".format(field_type))
+
+ @staticmethod
+ def _convert_type_to_char(type_char):
+ """
+ Converts the given type code to an int
+
+ :param type_char: A type code character
+ """
+ if isinstance(type_char, TypeCode):
+ return type_char.value
+
+ if isinstance(type_char, int):
+ return type_char
+
+ if isinstance(type_char, (BYTES_TYPE, UNICODE_TYPE)):
+ # Conversion to TypeCode will raise an error if the type
+ # is invalid
+ return TypeCode(ord(type_char[0])).value
+
+ raise RuntimeError(
+ "Typecode {0} ({1}) isn't supported.".format(
+ type_char, ord(type_char[0])
+ )
+ )
diff --git a/javaobj/v1/transformers.py b/javaobj/v1/transformers.py
new file mode 100644
index 0000000..c581125
--- /dev/null
+++ b/javaobj/v1/transformers.py
@@ -0,0 +1,392 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Implementation of the object transformers in v1 parser
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+from typing import Callable, Dict
+import functools
+
+from .beans import JavaClass, JavaObject
+from .unmarshaller import JavaObjectUnmarshaller
+from ..constants import ClassDescFlags, TerminalCode, TypeCode
+from ..utils import (
+ log_debug,
+ log_error,
+ to_bytes,
+ read_struct,
+ read_string,
+)
+
+
+__all__ = ("DefaultObjectTransformer",)
+
+
+class DefaultObjectTransformer(object): # pylint:disable=R0205
+ """
+ Default transformer for the deserialized objects.
+ Converts JavaObject objects to Python types (maps, lists, ...)
+ """
+
+ class JavaList(list, JavaObject):
+ """
+ Python-Java list bridge type
+ """
+
+ def __init__(self, unmarshaller):
+ # type: (JavaObjectUnmarshaller) -> None
+ list.__init__(self)
+ JavaObject.__init__(self)
+
+ def __hash__(self):
+ return list.__hash__(self)
+
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Lists have their content in there annotations
+ self.extend(self.annotations[1:])
+
+ @functools.total_ordering
+ class JavaPrimitiveClass(JavaObject):
+ """
+ Parent of Java classes matching a primitive (Bool, Integer, Long, ...)
+ """
+
+ def __init__(self, unmarshaller):
+ JavaObject.__init__(self)
+ self.value = None
+
+ def __str__(self):
+ return str(self.value)
+
+ def __repr__(self):
+ return repr(self.value)
+
+ def __hash__(self):
+ return hash(self.value)
+
+ def __eq__(self, other):
+ return self.value == other
+
+ def __lt__(self, other):
+ return self.value < other
+
+ class JavaBool(JavaPrimitiveClass):
+ def __bool__(self):
+ return self.value
+
+ class JavaInt(JavaPrimitiveClass):
+ def __int__(self):
+ return self.value
+
+ class JavaMap(dict, JavaObject):
+ """
+ Python-Java dictionary/map bridge type
+ """
+
+ def __init__(self, unmarshaller):
+ # type: (JavaObjectUnmarshaller) -> None
+ dict.__init__(self)
+ JavaObject.__init__(self)
+
+ def __hash__(self):
+ return dict.__hash__(self)
+
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Group annotation elements 2 by 2
+ args = [iter(self.annotations[1:])] * 2
+ for key, value in zip(*args):
+ self[key] = value
+
+ class JavaLinkedHashMap(JavaMap):
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Ignore the blockdata opid
+ (opid,) = unmarshaller._readStruct(">B")
+ if opid != ClassDescFlags.SC_BLOCK_DATA:
+ raise ValueError("Start of block data not found")
+
+ # Read HashMap fields
+ self.buckets = unmarshaller._read_value(
+ TypeCode.TYPE_INTEGER, ident
+ )
+ self.size = unmarshaller._read_value(TypeCode.TYPE_INTEGER, ident)
+
+ # Read entries
+ for _ in range(self.size):
+ key = unmarshaller._read_and_exec_opcode()[1]
+ value = unmarshaller._read_and_exec_opcode()[1]
+ self[key] = value
+
+ # Ignore the end of the blockdata
+ unmarshaller._read_and_exec_opcode(
+ ident, [TerminalCode.TC_ENDBLOCKDATA]
+ )
+
+ # Ignore the trailing 0
+ (opid,) = unmarshaller._readStruct(">B")
+ if opid != 0:
+ raise ValueError("Should find 0x0, got {0:x}".format(opid))
+
+ class JavaSet(set, JavaObject):
+ """
+ Python-Java set bridge type
+ """
+
+ def __init__(self, unmarshaller):
+ # type: (JavaObjectUnmarshaller) -> None
+ set.__init__(self)
+ JavaObject.__init__(self)
+
+ def __hash__(self):
+ return set.__hash__(self)
+
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ self.update(self.annotations[1:])
+
+ class JavaTreeSet(JavaSet):
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Annotation[1] == size of the set
+ self.update(self.annotations[2:])
+
+ class JavaTime(JavaObject):
+ """
+ Represents the classes found in the java.time package
+
+ The semantic of the fields depends on the type of time that has been
+ parsed
+ """
+
+ DURATION_TYPE = 1
+ INSTANT_TYPE = 2
+ LOCAL_DATE_TYPE = 3
+ LOCAL_TIME_TYPE = 4
+ LOCAL_DATE_TIME_TYPE = 5
+ ZONE_DATE_TIME_TYPE = 6
+ ZONE_REGION_TYPE = 7
+ ZONE_OFFSET_TYPE = 8
+ OFFSET_TIME_TYPE = 9
+ OFFSET_DATE_TIME_TYPE = 10
+ YEAR_TYPE = 11
+ YEAR_MONTH_TYPE = 12
+ MONTH_DAY_TYPE = 13
+ PERIOD_TYPE = 14
+
+ def __init__(self, unmarshaller):
+ # type: (JavaObjectUnmarshaller) -> None
+ JavaObject.__init__(self)
+ self.type = -1
+ self.year = None
+ self.month = None
+ self.day = None
+ self.hour = None
+ self.minute = None
+ self.second = None
+ self.nano = None
+ self.offset = None
+ self.zone = None
+
+ self.time_handlers = {
+ self.DURATION_TYPE: self.do_duration,
+ self.INSTANT_TYPE: self.do_instant,
+ self.LOCAL_DATE_TYPE: self.do_local_date,
+ self.LOCAL_DATE_TIME_TYPE: self.do_local_date_time,
+ self.LOCAL_TIME_TYPE: self.do_local_time,
+ self.ZONE_DATE_TIME_TYPE: self.do_zoned_date_time,
+ self.ZONE_OFFSET_TYPE: self.do_zone_offset,
+ self.ZONE_REGION_TYPE: self.do_zone_region,
+ self.OFFSET_TIME_TYPE: self.do_offset_time,
+ self.OFFSET_DATE_TIME_TYPE: self.do_offset_date_time,
+ self.YEAR_TYPE: self.do_year,
+ self.YEAR_MONTH_TYPE: self.do_year_month,
+ self.MONTH_DAY_TYPE: self.do_month_day,
+ self.PERIOD_TYPE: self.do_period,
+ }
+
+ def __str__(self):
+ return (
+ "JavaTime(type=0x{s.type}, "
+ "year={s.year}, month={s.month}, day={s.day}, "
+ "hour={s.hour}, minute={s.minute}, second={s.second}, "
+ "nano={s.nano}, offset={s.offset}, zone={s.zone})"
+ ).format(s=self)
+
+ def __extra_loading__(self, unmarshaller, ident=0):
+ # type: (JavaObjectUnmarshaller, int) -> None
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Convert back annotations to bytes
+ # latin-1 is used to ensure that bytes are kept as is
+ content = to_bytes(self.annotations[0], "latin1")
+ (self.type,), content = read_struct(content, ">b")
+
+ try:
+ self.time_handlers[self.type](unmarshaller, content)
+ except KeyError as ex:
+ log_error("Unhandled kind of time: {}".format(ex))
+
+ def do_duration(self, unmarshaller, data):
+ (self.second, self.nano), data = read_struct(data, ">qi")
+ return data
+
+ def do_instant(self, unmarshaller, data):
+ (self.second, self.nano), data = read_struct(data, ">qi")
+ return data
+
+ def do_local_date(self, unmarshaller, data):
+ (self.year, self.month, self.day), data = read_struct(data, ">ibb")
+ return data
+
+ def do_local_time(self, unmarshaller, data):
+ (hour,), data = read_struct(data, ">b")
+ minute = 0
+ second = 0
+ nano = 0
+
+ if hour < 0:
+ hour = ~hour
+ else:
+ (minute,), data = read_struct(data, ">b")
+ if minute < 0:
+ minute = ~minute
+ else:
+ (second,), data = read_struct(data, ">b")
+ if second < 0:
+ second = ~second
+ else:
+ (nano,), data = read_struct(data, ">i")
+
+ self.hour = hour
+ self.minute = minute
+ self.second = second
+ self.nano = nano
+ return data
+
+ def do_local_date_time(self, unmarshaller, data):
+ data = self.do_local_date(unmarshaller, data)
+ data = self.do_local_time(unmarshaller, data)
+ return data
+
+ def do_zoned_date_time(self, unmarshaller, data):
+ data = self.do_local_date_time(unmarshaller, data)
+ data = self.do_zone_offset(unmarshaller, data)
+ data = self.do_zone_region(unmarshaller, data)
+ return data
+
+ def do_zone_offset(self, unmarshaller, data):
+ (offset_byte,), data = read_struct(data, ">b")
+ if offset_byte == 127:
+ (self.offset,), data = read_struct(data, ">i")
+ else:
+ self.offset = offset_byte * 900
+ return data
+
+ def do_zone_region(self, unmarshaller, data):
+ self.zone, data = read_string(data)
+ return data
+
+ def do_offset_time(self, unmarshaller, data):
+ data = self.do_local_time(unmarshaller, data)
+ data = self.do_zone_offset(unmarshaller, data)
+ return data
+
+ def do_offset_date_time(self, unmarshaller, data):
+ data = self.do_local_date_time(unmarshaller, data)
+ data = self.do_zone_offset(unmarshaller, data)
+ return data
+
+ def do_year(self, unmarshaller, data):
+ (self.year,), data = read_struct(data, ">i")
+ return data
+
+ def do_year_month(self, unmarshaller, data):
+ (self.year, self.month), data = read_struct(data, ">ib")
+ return data
+
+ def do_month_day(self, unmarshaller, data):
+ (self.month, self.day), data = read_struct(data, ">bb")
+ return data
+
+ def do_period(self, unmarshaller, data):
+ (self.year, self.month, self.day), data = read_struct(data, ">iii")
+ return data
+
+ TYPE_MAPPER = {
+ "java.util.ArrayList": JavaList,
+ "java.util.LinkedList": JavaList,
+ "java.util.HashMap": JavaMap,
+ "java.util.LinkedHashMap": JavaLinkedHashMap,
+ "java.util.TreeMap": JavaMap,
+ "java.util.HashSet": JavaSet,
+ "java.util.LinkedHashSet": JavaSet,
+ "java.util.TreeSet": JavaTreeSet,
+ "java.time.Ser": JavaTime,
+ "java.lang.Boolean": JavaBool,
+ "java.lang.Integer": JavaInt,
+ "java.lang.Long": JavaInt,
+ } # type: Dict[str, Callable[[JavaObjectUnmarshaller], JavaObject]]
+
+ def create(self, classdesc, unmarshaller):
+ # type: (JavaClass, JavaObjectUnmarshaller) -> JavaObject
+ """
+ Transforms a deserialized Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ try:
+ mapped_type = self.TYPE_MAPPER[classdesc.name]
+ except KeyError:
+ # Return a JavaObject by default
+ return JavaObject()
+ else:
+ log_debug("---")
+ log_debug(classdesc.name)
+ log_debug("---")
+
+ java_object = mapped_type(unmarshaller)
+
+ log_debug(">>> java_object: {0}".format(java_object))
+ return java_object
diff --git a/javaobj/v1/unmarshaller.py b/javaobj/v1/unmarshaller.py
new file mode 100644
index 0000000..c3c7709
--- /dev/null
+++ b/javaobj/v1/unmarshaller.py
@@ -0,0 +1,853 @@
+#!/usr/bin/python
+# -- Content-Encoding: utf-8 --
+"""
+Provides functions for reading Java objects serialized by ObjectOutputStream.
+This form of object representation is a standard data interchange format in
+Java world.
+
+javaobj module exposes an API familiar to users of the standard library
+marshal, pickle and json modules.
+
+See:
+http://download.oracle.com/javase/6/docs/platform/serialization/spec/protocol.html
+
+:authors: Volodymyr Buell, Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+# Standard library
+from typing import Any, Union
+import os
+import struct
+
+# Javaobj modules
+from .beans import (
+ JavaClass,
+ JavaString,
+ JavaObject,
+ JavaByteArray,
+ JavaEnum,
+ JavaArray,
+)
+from ..constants import (
+ StreamConstants,
+ ClassDescFlags,
+ TerminalCode,
+ TypeCode,
+ StreamCodeDebug,
+)
+from ..utils import (
+ log_debug,
+ log_error,
+ read_to_str,
+ to_unicode,
+ unicode_char,
+ hexdump,
+)
+
+numpy = None # Imported only when really used
+
+# ------------------------------------------------------------------------------
+
+__all__ = ("JavaObjectUnmarshaller",)
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+# Convertion of a Java type char to its NumPy equivalent
+NUMPY_TYPE_MAP = {
+ TypeCode.TYPE_BYTE: "B",
+ TypeCode.TYPE_CHAR: "b",
+ TypeCode.TYPE_DOUBLE: ">d",
+ TypeCode.TYPE_FLOAT: ">f",
+ TypeCode.TYPE_INTEGER: ">i",
+ TypeCode.TYPE_LONG: ">l",
+ TypeCode.TYPE_SHORT: ">h",
+ TypeCode.TYPE_BOOLEAN: ">B",
+}
+
+# ------------------------------------------------------------------------------
+
+
+class JavaObjectUnmarshaller:
+ """
+ Deserializes a Java serialization stream
+ """
+
+ def __init__(self, stream, use_numpy_arrays=False):
+ """
+ Sets up members
+
+ :param stream: An input stream (opened in binary/bytes mode)
+ :raise IOError: Invalid input stream
+ """
+ self.use_numpy_arrays = use_numpy_arrays
+
+ # Numpy array support
+ if self.use_numpy_arrays:
+ try:
+ global numpy
+ import numpy as np
+
+ numpy = np
+ except ImportError:
+ pass
+
+ # Check stream
+ if stream is None:
+ raise IOError("No input stream given")
+
+ # Prepare the association Terminal Symbol -> Reading method
+ self.opmap = {
+ TerminalCode.TC_NULL: self.do_null,
+ TerminalCode.TC_CLASSDESC: self.do_classdesc,
+ TerminalCode.TC_OBJECT: self.do_object,
+ TerminalCode.TC_STRING: self.do_string,
+ TerminalCode.TC_LONGSTRING: self.do_string_long,
+ TerminalCode.TC_ARRAY: self.do_array,
+ TerminalCode.TC_CLASS: self.do_class,
+ TerminalCode.TC_BLOCKDATA: self.do_blockdata,
+ TerminalCode.TC_BLOCKDATALONG: self.do_blockdata_long,
+ TerminalCode.TC_REFERENCE: self.do_reference,
+ TerminalCode.TC_ENUM: self.do_enum,
+ # note that we are reusing do_null:
+ TerminalCode.TC_ENDBLOCKDATA: self.do_null,
+ }
+
+ # Set up members
+ self.current_object = None
+ self.reference_counter = 0
+ self.references = []
+ self.object_transformers = []
+ self.object_stream = stream
+
+ # Read the stream header (magic & version)
+ self._readStreamHeader()
+
+ def readObject(self, ignore_remaining_data=False):
+ """
+ Reads an object from the input stream
+
+ :param ignore_remaining_data: If True, don't log an error when
+ unused trailing bytes are remaining
+ :return: The unmarshalled object
+ :raise Exception: Any exception that occurred during unmarshalling
+ """
+ try:
+ # TODO: add expects
+ _, res = self._read_and_exec_opcode(ident=0)
+
+ position_bak = self.object_stream.tell()
+ the_rest = self.object_stream.read()
+ if not ignore_remaining_data and len(the_rest) != 0:
+ log_error(
+ "Warning!!!!: Stream still has {0} bytes left. "
+ "Enable debug mode of logging to see the hexdump.".format(
+ len(the_rest)
+ )
+ )
+ log_debug("\n{0}".format(hexdump(the_rest)))
+ else:
+ log_debug("Java Object unmarshalled successfully!")
+
+ self.object_stream.seek(position_bak)
+ return res
+ except Exception:
+ self._oops_dump_state(ignore_remaining_data)
+ raise
+
+ def add_transformer(self, transformer):
+ """
+ Appends an object transformer to the deserialization process
+
+ :param transformer: An object with a transform(obj) method
+ """
+ self.object_transformers.append(transformer)
+
+ def _readStreamHeader(self):
+ """
+ Reads the magic header of a Java serialization stream
+
+ :raise IOError: Invalid magic header (not a Java stream)
+ """
+ (magic, version) = self._readStruct(">HH")
+ if (
+ magic != StreamConstants.STREAM_MAGIC
+ or version != StreamConstants.STREAM_VERSION
+ ):
+ raise IOError(
+ "The stream is not java serialized object. "
+ "Invalid stream header: {0:04X}{1:04X}".format(magic, version)
+ )
+
+ def _read_and_exec_opcode(self, ident=0, expect=None):
+ """
+ Reads the next opcode, and executes its handler
+
+ :param ident: Log identation level
+ :param expect: A list of expected opcodes
+ :return: A tuple: (opcode, result of the handler)
+ :raise IOError: Read opcode is not one of the expected ones
+ :raise RuntimeError: Unknown opcode
+ """
+ position = self.object_stream.tell()
+ (opid,) = self._readStruct(">B")
+ log_debug(
+ "OpCode: 0x{0:X} -- {1} (at offset 0x{2:X})".format(
+ opid, StreamCodeDebug.op_id(opid), position
+ ),
+ ident,
+ )
+
+ if expect and opid not in expect:
+ raise IOError(
+ "Unexpected opcode 0x{0:X} -- {1} "
+ "(at offset 0x{2:X})".format(
+ opid, StreamCodeDebug.op_id(opid), position
+ )
+ )
+
+ try:
+ handler = self.opmap[opid]
+ except KeyError:
+ raise RuntimeError(
+ "Unknown OpCode in the stream: 0x{0:X} "
+ "(at offset 0x{1:X})".format(opid, position)
+ )
+ else:
+ return opid, handler(ident=ident)
+
+ def _readStruct(self, unpack):
+ """
+ Reads from the input stream, using struct
+
+ :param unpack: An unpack format string
+ :return: The result of struct.unpack (tuple)
+ :raise RuntimeError: End of stream reached during unpacking
+ """
+ length = struct.calcsize(unpack)
+ ba = self.object_stream.read(length)
+
+ if len(ba) != length:
+ raise RuntimeError(
+ "Stream has been ended unexpectedly while unmarshaling."
+ )
+
+ return struct.unpack(unpack, ba)
+
+ def _readString(self, length_fmt="H"):
+ """
+ Reads a serialized string
+
+ :param length_fmt: Structure format of the string length (H or Q)
+ :return: The deserialized string
+ :raise RuntimeError: Unexpected end of stream
+ """
+ (length,) = self._readStruct(">{0}".format(length_fmt))
+ ba = self.object_stream.read(length)
+ return to_unicode(ba)
+
+ def do_classdesc(self, parent=None, ident=0):
+ """
+ Handles a TC_CLASSDESC opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A JavaClass object
+ """
+ # TC_CLASSDESC className serialVersionUID newHandle classDescInfo
+ # classDescInfo:
+ # classDescFlags fields classAnnotation superClassDesc
+ # classDescFlags:
+ # (byte) // Defined in Terminal Symbols and Constants
+ # fields:
+ # (short) fieldDesc[count]
+
+ # fieldDesc:
+ # primitiveDesc
+ # objectDesc
+ # primitiveDesc:
+ # prim_typecode fieldName
+ # objectDesc:
+ # obj_typecode fieldName className1
+ clazz = JavaClass()
+ log_debug("[classdesc]", ident)
+ class_name = self._readString()
+ clazz.name = class_name
+ log_debug("Class name: %s" % class_name, ident)
+
+ # serialVersionUID is a Java (signed) long => 8 bytes
+ serialVersionUID, classDescFlags = self._readStruct(">qB")
+ clazz.serialVersionUID = serialVersionUID
+ clazz.flags = classDescFlags
+
+ self._add_reference(clazz, ident)
+
+ log_debug(
+ "Serial: 0x{0:X} / {0:d} - classDescFlags: 0x{1:X} {2}".format(
+ serialVersionUID,
+ classDescFlags,
+ StreamCodeDebug.flags(classDescFlags),
+ ),
+ ident,
+ )
+ (length,) = self._readStruct(">H")
+ log_debug("Fields num: 0x{0:X}".format(length), ident)
+
+ clazz.fields_names = []
+ clazz.fields_types = []
+ for fieldId in range(length):
+ (typecode,) = self._readStruct(">B")
+ field_name = self._readString()
+ base_field_type = self._convert_char_to_type(typecode)
+
+ log_debug("> Reading field {0}".format(field_name), ident)
+
+ if base_field_type == TypeCode.TYPE_ARRAY:
+ _, field_type = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE),
+ )
+
+ if type(field_type) is not JavaString: # pylint:disable=C0123
+ raise AssertionError(
+ "Field type must be a JavaString, "
+ "not {0}".format(type(field_type))
+ )
+
+ elif base_field_type == TypeCode.TYPE_OBJECT:
+ _, field_type = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE),
+ )
+
+ if isinstance(field_type, JavaClass):
+ # FIXME: ugly trick
+ field_type = JavaString(field_type.name)
+
+ if type(field_type) is not JavaString: # pylint:disable=C0123
+ raise AssertionError(
+ "Field type must be a JavaString, "
+ "not {0}".format(type(field_type))
+ )
+ else:
+ # Convert the TypeCode to its char value
+ field_type = JavaString(str(chr(base_field_type.value)))
+
+ log_debug(
+ "< FieldName: 0x{0:X} Name:{1} Type:{2} ID:{3}".format(
+ typecode, field_name, field_type, fieldId
+ ),
+ ident,
+ )
+ assert field_name is not None
+ assert field_type is not None
+
+ clazz.fields_names.append(field_name)
+ clazz.fields_types.append(field_type)
+
+ if parent:
+ parent.__fields = clazz.fields_names # pylint:disable=W0212
+ parent.__types = clazz.fields_types # pylint:disable=W0212
+
+ # classAnnotation
+ (opid,) = self._readStruct(">B")
+ log_debug(
+ "OpCode: 0x{0:X} -- {1} (classAnnotation)".format(
+ opid, StreamCodeDebug.op_id(opid)
+ ),
+ ident,
+ )
+ if opid != TerminalCode.TC_ENDBLOCKDATA:
+ raise NotImplementedError("classAnnotation isn't implemented yet")
+
+ # superClassDesc
+ log_debug("Reading Super Class of {0}".format(clazz.name), ident)
+ _, superclassdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+ log_debug(
+ "Super Class for {0}: {1}".format(clazz.name, str(superclassdesc)),
+ ident,
+ )
+ clazz.superclass = superclassdesc
+ return clazz
+
+ def do_blockdata(self, parent=None, ident=0):
+ """
+ Handles TC_BLOCKDATA opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A string containing the block data
+ """
+ # TC_BLOCKDATA (unsigned byte) (byte)[size]
+ log_debug("[blockdata]", ident)
+ (length,) = self._readStruct(">B")
+ ba = self.object_stream.read(length)
+
+ # Ensure we have an str
+ return read_to_str(ba)
+
+ def do_blockdata_long(self, parent=None, ident=0):
+ """
+ Handles TC_BLOCKDATALONG opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A string containing the block data
+ """
+ # TC_BLOCKDATALONG (int) (byte)[size]
+ log_debug("[blockdatalong]", ident)
+ (length,) = self._readStruct(">I")
+ ba = self.object_stream.read(length)
+
+ # Ensure we have an str
+ return read_to_str(ba)
+
+ def do_class(self, parent=None, ident=0):
+ """
+ Handles TC_CLASS opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A JavaClass object
+ """
+ # TC_CLASS classDesc newHandle
+ log_debug("[class]", ident)
+
+ # TODO: what to do with "(ClassDesc)prevObject".
+ # (see 3rd line for classDesc:)
+ _, classdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_PROXYCLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+ log_debug("Classdesc: {0}".format(classdesc), ident)
+ self._add_reference(classdesc, ident)
+ return classdesc
+
+ def do_object(self, parent=None, ident=0):
+ """
+ Handles a TC_OBJECT opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A JavaClass object
+ """
+ # TC_OBJECT classDesc newHandle classdata[] // data for each class
+ java_object = JavaObject()
+ log_debug("[object]", ident)
+ log_debug(
+ "java_object.annotations just after instantiation: {0}".format(
+ java_object.annotations
+ ),
+ ident,
+ )
+
+ # TODO: what to do with "(ClassDesc)prevObject".
+ # (see 3rd line for classDesc:)
+ opcode, classdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_PROXYCLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+ # self.TC_REFERENCE hasn't shown in spec, but actually is here
+
+ # Create object
+ for transformer in self.object_transformers:
+ java_object = transformer.create(classdesc, self)
+ if java_object is not None:
+ break
+
+ # Store classdesc of this object
+ java_object.classdesc = classdesc
+
+ # Store the reference
+ self._add_reference(java_object, ident)
+
+ # classdata[]
+
+ if (
+ classdesc.flags & ClassDescFlags.SC_EXTERNALIZABLE
+ and not classdesc.flags & ClassDescFlags.SC_BLOCK_DATA
+ ):
+ # TODO:
+ raise NotImplementedError("externalContents isn't implemented yet")
+
+ if classdesc.flags & ClassDescFlags.SC_SERIALIZABLE:
+ # TODO: look at ObjectInputStream.readSerialData()
+ # FIXME: Handle the SC_WRITE_METHOD flag
+
+ # create megalist
+ tempclass = classdesc
+ megalist = []
+ megatypes = []
+ log_debug("Constructing class...", ident)
+ while tempclass:
+ log_debug("Class: {0}".format(tempclass.name), ident + 1)
+ class_fields_str = " - ".join(
+ " ".join((str(field_type), field_name))
+ for field_type, field_name in zip(
+ tempclass.fields_types, tempclass.fields_names
+ )
+ )
+ if class_fields_str:
+ log_debug(class_fields_str, ident + 2)
+
+ fieldscopy = tempclass.fields_names[:]
+ fieldscopy.extend(megalist)
+ megalist = fieldscopy
+
+ fieldscopy = tempclass.fields_types[:]
+ fieldscopy.extend(megatypes)
+ megatypes = fieldscopy
+
+ tempclass = tempclass.superclass
+
+ log_debug("Values count: {0}".format(len(megalist)), ident)
+ log_debug("Prepared list of values: {0}".format(megalist), ident)
+ log_debug("Prepared list of types: {0}".format(megatypes), ident)
+
+ for field_name, field_type in zip(megalist, megatypes):
+ log_debug(
+ "Reading field: {0} - {1}".format(field_type, field_name)
+ )
+ res = self._read_value(field_type, ident, name=field_name)
+ java_object.__setattr__(field_name, res)
+
+ if (
+ classdesc.flags & ClassDescFlags.SC_SERIALIZABLE
+ and classdesc.flags & ClassDescFlags.SC_WRITE_METHOD
+ or classdesc.flags & ClassDescFlags.SC_EXTERNALIZABLE
+ and classdesc.flags & ClassDescFlags.SC_BLOCK_DATA
+ or classdesc.superclass is not None
+ and classdesc.superclass.flags & ClassDescFlags.SC_SERIALIZABLE
+ and classdesc.superclass.flags & ClassDescFlags.SC_WRITE_METHOD
+ ):
+ # objectAnnotation
+ log_debug(
+ "java_object.annotations before: {0}".format(
+ java_object.annotations
+ ),
+ ident,
+ )
+
+ while opcode != TerminalCode.TC_ENDBLOCKDATA:
+ opcode, obj = self._read_and_exec_opcode(ident=ident + 1)
+ # , expect=[self.TC_ENDBLOCKDATA, self.TC_BLOCKDATA,
+ # self.TC_OBJECT, self.TC_NULL, self.TC_REFERENCE])
+ if opcode != TerminalCode.TC_ENDBLOCKDATA:
+ java_object.annotations.append(obj)
+
+ log_debug("objectAnnotation value: {0}".format(obj), ident)
+
+ log_debug(
+ "java_object.annotations after: {0}".format(
+ java_object.annotations
+ ),
+ ident,
+ )
+
+ # Allow extra loading operations
+ if hasattr(java_object, "__extra_loading__"):
+ log_debug("Java object has extra loading capability.")
+ java_object.__extra_loading__(self, ident)
+
+ log_debug(">>> java_object: {0}".format(java_object), ident)
+ return java_object
+
+ def do_string(self, parent=None, ident=0):
+ """
+ Handles a TC_STRING opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A string
+ """
+ log_debug("[string]", ident)
+ ba = JavaString(self._readString())
+ self._add_reference(ba, ident)
+ return ba
+
+ def do_string_long(self, parent=None, ident=0):
+ """
+ Handles a TC_LONGSTRING opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A string
+ """
+ log_debug("[long string]", ident)
+ ba = JavaString(self._readString("Q"))
+ self._add_reference(ba, ident)
+ return ba
+
+ def do_array(self, parent=None, ident=0):
+ """
+ Handles a TC_ARRAY opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A list of deserialized objects
+ """
+ # TC_ARRAY classDesc newHandle (int) values[size]
+ log_debug("[array]", ident)
+ _, classdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_PROXYCLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+
+ array = JavaArray(classdesc)
+
+ self._add_reference(array, ident)
+
+ (size,) = self._readStruct(">i")
+ log_debug("size: {0}".format(size), ident)
+
+ array_type_code = TypeCode(ord(classdesc.name[0]))
+ assert array_type_code == TypeCode.TYPE_ARRAY
+ type_code = TypeCode(ord(classdesc.name[1]))
+
+ if type_code in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY):
+ for _ in range(size):
+ _, res = self._read_and_exec_opcode(ident=ident + 1)
+ log_debug("Object value: {0}".format(res), ident)
+ array.append(res)
+ elif type_code == TypeCode.TYPE_BYTE:
+ array = JavaByteArray(self.object_stream.read(size), classdesc)
+ elif self.use_numpy_arrays and numpy is not None:
+ array = numpy.fromfile(
+ self.object_stream,
+ dtype=NUMPY_TYPE_MAP[type_code],
+ count=size,
+ )
+ else:
+ for _ in range(size):
+ res = self._read_value(type_code, ident)
+ log_debug("Native value: {0}".format(repr(res)), ident)
+ array.append(res)
+
+ return array
+
+ def do_reference(self, parent=None, ident=0):
+ """
+ Handles a TC_REFERENCE opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: The referenced object
+ """
+ (handle,) = self._readStruct(">L")
+ log_debug("## Reference handle: 0x{0:X}".format(handle), ident)
+ ref = self.references[handle - StreamConstants.BASE_REFERENCE_IDX]
+ log_debug("###-> Type: {0} - Value: {1}".format(type(ref), ref), ident)
+ return ref
+
+ @staticmethod
+ def do_null(parent=None, ident=0):
+ """
+ Handles a TC_NULL opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: Always None
+ """
+ return None
+
+ def do_enum(self, parent=None, ident=0):
+ """
+ Handles a TC_ENUM opcode
+
+ :param parent:
+ :param ident: Log indentation level
+ :return: A JavaEnum object
+ """
+ # TC_ENUM classDesc newHandle enumConstantName
+ enum = JavaEnum()
+ _, classdesc = self._read_and_exec_opcode(
+ ident=ident + 1,
+ expect=(
+ TerminalCode.TC_CLASSDESC,
+ TerminalCode.TC_PROXYCLASSDESC,
+ TerminalCode.TC_NULL,
+ TerminalCode.TC_REFERENCE,
+ ),
+ )
+ enum.classdesc = classdesc
+ self._add_reference(enum, ident)
+ (
+ _,
+ enumConstantName,
+ ) = self._read_and_exec_opcode( # pylint:disable=C0103
+ ident=ident + 1,
+ expect=(TerminalCode.TC_STRING, TerminalCode.TC_REFERENCE),
+ )
+ enum.constant = enumConstantName
+ return enum
+
+ def _read_value(self, raw_field_type, ident, name=""):
+ # type: (Union[bytes, int, TypeCode], int, str) -> Any
+ """
+ Reads the next value, of the given type
+
+ :param raw_field_type: A serialization typecode
+ :param ident: Log indentation
+ :param name: Field name (for logs)
+ :return: The read value
+ :raise RuntimeError: Unknown field type
+ """
+ if isinstance(raw_field_type, TypeCode):
+ field_type = raw_field_type
+ elif isinstance(raw_field_type, int):
+ field_type = TypeCode(raw_field_type)
+ else:
+ # We don't need details for arrays and objects
+ raw_code = raw_field_type[0]
+ if isinstance(raw_code, int):
+ field_type = TypeCode(raw_code)
+ else:
+ field_type = TypeCode(ord(raw_code))
+
+ if field_type == TypeCode.TYPE_BOOLEAN:
+ (val,) = self._readStruct(">B")
+ res = bool(val) # type: Any
+ elif field_type == TypeCode.TYPE_BYTE:
+ (res,) = self._readStruct(">b")
+ elif field_type == TypeCode.TYPE_CHAR:
+ # TYPE_CHAR is defined by the serialization specification
+ # but not used in the implementation, so this is
+ # a hypothetical code
+ res = unicode_char(self._readStruct(">H")[0])
+ elif field_type == TypeCode.TYPE_SHORT:
+ (res,) = self._readStruct(">h")
+ elif field_type == TypeCode.TYPE_INTEGER:
+ (res,) = self._readStruct(">i")
+ elif field_type == TypeCode.TYPE_LONG:
+ (res,) = self._readStruct(">q")
+ elif field_type == TypeCode.TYPE_FLOAT:
+ (res,) = self._readStruct(">f")
+ elif field_type == TypeCode.TYPE_DOUBLE:
+ (res,) = self._readStruct(">d")
+ elif field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY):
+ _, res = self._read_and_exec_opcode(ident=ident + 1)
+ else:
+ raise RuntimeError("Unknown typecode: {0}".format(field_type))
+
+ log_debug(
+ "* {0} {1}: {2}".format(chr(field_type.value), name, repr(res)),
+ ident,
+ )
+ return res
+
+ @staticmethod
+ def _convert_char_to_type(type_char):
+ # type: (Any) -> TypeCode
+ """
+ Ensures a read character is a typecode.
+
+ :param type_char: Read typecode
+ :return: The typecode as an integer (using ord)
+ :raise RuntimeError: Unknown typecode
+ """
+ typecode = type_char
+ if not isinstance(type_char, int):
+ typecode = ord(type_char)
+
+ try:
+ return TypeCode(typecode)
+ except ValueError:
+ raise RuntimeError(
+ "Typecode {0} ({1}) isn't supported.".format(
+ type_char, typecode
+ )
+ )
+
+ def _add_reference(self, obj, ident=0):
+ """
+ Adds a read reference to the marshaler storage
+
+ :param obj: Reference to add
+ :param ident: Log indentation level
+ """
+ log_debug(
+ "## New reference handle 0x{0:X}: {1} -> {2}".format(
+ len(self.references) + StreamConstants.BASE_REFERENCE_IDX,
+ type(obj).__name__,
+ repr(obj),
+ ),
+ ident,
+ )
+ self.references.append(obj)
+
+ def _oops_dump_state(self, ignore_remaining_data=False):
+ """
+ Log a deserialization error
+
+ :param ignore_remaining_data: If True, don't log an error when
+ unused trailing bytes are remaining
+ """
+ log_error("==Oops state dump" + "=" * (30 - 17))
+ log_error("References: {0}".format(self.references))
+ log_error(
+ "Stream seeking back at -16 byte "
+ "(2nd line is an actual position!):"
+ )
+
+ # Do not use a keyword argument
+ self.object_stream.seek(-16, os.SEEK_CUR)
+ position = self.object_stream.tell()
+ the_rest = self.object_stream.read()
+
+ if not ignore_remaining_data and len(the_rest) != 0:
+ log_error(
+ "Warning!!!!: Stream still has {0} bytes left:\n{1}".format(
+ len(the_rest), hexdump(the_rest, position)
+ )
+ )
+
+ log_error("=" * 30)
diff --git a/javaobj/v2/__init__.py b/javaobj/v2/__init__.py
new file mode 100644
index 0000000..e9745ea
--- /dev/null
+++ b/javaobj/v2/__init__.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""
+Rewritten version of the un-marshalling process of javaobj.
+
+The previous process had issues in some cases that
+
+This package is based on the approach of the jdeserialize project (in Java)
+See: https://github.com/frohoff/jdeserialize
+
+The object transformer concept of javaobj has been adapted to work with this
+approach.
+
+This package should handle more files than before, in read-only mode.
+The writing mode should be handled by the "classic" javaobj code.
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from . import api, beans, core, main, stream, transformers # noqa: 401
+from .main import load, loads # noqa: 401
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
diff --git a/javaobj/v2/api.py b/javaobj/v2/api.py
new file mode 100644
index 0000000..8d9cd0d
--- /dev/null
+++ b/javaobj/v2/api.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Definition of the object transformer API
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+from typing import List, Optional
+
+from ..constants import TypeCode # pylint:disable=W0611
+from .beans import ( # pylint:disable=W0611
+ JavaClassDesc,
+ JavaInstance,
+ ParsedJavaContent,
+)
+from .stream import DataStreamReader # pylint:disable=W0611
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class IJavaStreamParser:
+ """
+ API of the Java stream parser
+ """
+
+ def run(self):
+ # type: () -> List[ParsedJavaContent]
+ """
+ Parses the input stream
+ """
+ raise NotImplementedError
+
+ def dump(self, content):
+ # type: (List[ParsedJavaContent]) -> str
+ """
+ Dumps to a string the given objects
+ """
+ raise NotImplementedError
+
+ def _read_content(self, type_code, block_data, class_desc=None):
+ # type: (int, bool, Optional[JavaClassDesc]) -> ParsedJavaContent
+ """
+ Parses the next content. Use with care (use only in a transformer)
+ """
+
+
+class ObjectTransformer(object): # pylint:disable=R0205
+ """
+ Representation of an object transformer
+ """
+
+ def create_instance(self, classdesc): # pylint:disable=W0613,R0201
+ # type: (JavaClassDesc) -> Optional[JavaInstance]
+ """
+ Transforms a parsed Java object into a Python object.
+
+ The result must be a JavaInstance bean, or None if the transformer
+ doesn't support this kind of instance.
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ return None
+
+ def load_array(
+ self, reader, type_code, size
+ ): # pylint:disable=W0613,R0201
+ # type: (DataStreamReader, TypeCode, int) -> Optional[list]
+ """
+ Loads and returns the content of a Java array, if possible.
+
+ The result of this method must be the content of the array, i.e. a list
+ or an array. It will be stored in a JavaArray bean created by the
+ parser.
+
+ This method must return None if it can't handle the array.
+
+ :param reader: The data stream reader
+ :param type_code: Type of the elements of the array
+ :param size: Number of elements in the array
+ """
+ return None
+
+ def load_custom_writeObject(
+ self, parser, reader, name
+ ): # pylint:disable=W0613,R0201
+ # type: (IJavaStreamParser, DataStreamReader, str) -> Optional[JavaClassDesc]
+ """
+ Reads content stored from a custom writeObject.
+
+ This method is called only if the class description has both the
+ ``SC_SERIALIZABLE`` and ``SC_WRITE_METHOD`` flags set.
+
+ The stream parsing will stop and fail if this method returns None.
+
+ :param parser: The JavaStreamParser in use
+ :param reader: The data stream reader
+ :param name: The class description name
+ :return: A Java class description, if handled, else None
+ """
+ return None
diff --git a/javaobj/v2/beans.py b/javaobj/v2/beans.py
new file mode 100644
index 0000000..0b81f16
--- /dev/null
+++ b/javaobj/v2/beans.py
@@ -0,0 +1,641 @@
+#!/usr/bin/env python3
+"""
+Definition of the beans used to represent the parsed objects
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+import logging
+from enum import IntEnum
+from typing import Any, Dict, List, Optional, Set
+
+from ..constants import ClassDescFlags, TypeCode
+from ..modifiedutf8 import byte_to_int, decode_modified_utf8
+from ..utils import UNICODE_TYPE
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class ContentType(IntEnum):
+ """
+ Types of objects
+ """
+
+ INSTANCE = 0
+ CLASS = 1
+ ARRAY = 2
+ STRING = 3
+ ENUM = 4
+ CLASSDESC = 5
+ BLOCKDATA = 6
+ EXCEPTIONSTATE = 7
+
+
+class ClassDataType(IntEnum):
+ """
+ Class data types
+ """
+
+ NOWRCLASS = 0
+ WRCLASS = 1
+ EXTERNAL_CONTENTS = 2
+ OBJECT_ANNOTATION = 3
+
+
+class ClassDescType(IntEnum):
+ """
+ Types of class descriptions
+ """
+
+ NORMALCLASS = 0
+ PROXYCLASS = 1
+
+
+class FieldType(IntEnum):
+ """
+ Types of class fields
+ """
+
+ BYTE = TypeCode.TYPE_BYTE.value
+ CHAR = TypeCode.TYPE_CHAR.value
+ DOUBLE = TypeCode.TYPE_DOUBLE.value
+ FLOAT = TypeCode.TYPE_FLOAT.value
+ INTEGER = TypeCode.TYPE_INTEGER.value
+ LONG = TypeCode.TYPE_LONG.value
+ SHORT = TypeCode.TYPE_SHORT.value
+ BOOLEAN = TypeCode.TYPE_BOOLEAN.value
+ ARRAY = TypeCode.TYPE_ARRAY.value
+ OBJECT = TypeCode.TYPE_OBJECT.value
+
+ def type_code(self):
+ # type: () -> TypeCode
+ """
+ Converts this FieldType to its matching TypeCode
+ """
+ return TypeCode(self.value)
+
+
+class ParsedJavaContent(object): # pylint:disable=R205
+ """
+ Generic representation of data parsed from the stream
+ """
+
+ def __init__(self, content_type):
+ # type: (ContentType) -> None
+ self.type = content_type # type: ContentType
+ self.is_exception = False # type: bool
+ self.handle = 0 # type: int
+
+ def __str__(self):
+ return "[ParseJavaObject 0x{0:x} - {1}]".format(self.handle, self.type)
+
+ __repr__ = __str__
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Base implementation of a parsed object
+ """
+ return "\t" * indent + str(self)
+
+ def validate(self):
+ """
+ Validity check on the object
+ """
+ pass
+
+
+class ExceptionState(ParsedJavaContent):
+ """
+ Representation of a failed parsing
+ """
+
+ def __init__(self, exception_object, data):
+ # type: (ParsedJavaContent, bytes) -> None
+ super(ExceptionState, self).__init__(ContentType.EXCEPTIONSTATE)
+ self.exception_object = exception_object
+ self.stream_data = data
+ self.handle = exception_object.handle
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the exception
+ """
+ return "\t" * indent + "[ExceptionState {0:x}]".format(self.handle)
+
+
+class ExceptionRead(Exception):
+ """
+ Exception used to indicate that an exception object has been parsed
+ """
+
+ def __init__(self, content):
+ # type: (ParsedJavaContent) -> None
+ self.exception_object = content
+
+
+class JavaString(ParsedJavaContent):
+ """
+ Represents a Java string
+ """
+
+ def __init__(self, handle, data):
+ # type: (int, bytes) -> None
+ super(JavaString, self).__init__(ContentType.STRING)
+ self.handle = handle
+ value, length = decode_modified_utf8(data)
+ self.value = value # type: str
+ self.length = length # type: int
+
+ def __repr__(self):
+ return repr(self.value)
+
+ def __str__(self):
+ return self.value
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the string
+ """
+ return "\t" * indent + "[String {0:x}: {1}]".format(
+ self.handle, repr(self.value)
+ )
+
+ def __hash__(self):
+ return hash(self.value)
+
+ def __eq__(self, other):
+ return self.value == other
+
+
+class JavaField:
+ """
+ Represents a field in a Java class description
+ """
+
+ def __init__(self, field_type, name, class_name=None):
+ # type: (FieldType, str, Optional[JavaString]) -> None
+ self.type = field_type
+ self.name = name
+ self.class_name = class_name
+ self.is_inner_class_reference = False
+
+ if self.class_name:
+ self.validate(self.class_name.value)
+
+ def validate(self, java_type):
+ # type: (str) -> None
+ """
+ Validates the type given as parameter
+ """
+ if self.type == FieldType.OBJECT:
+ if not java_type:
+ raise ValueError("Class name can't be empty")
+
+ if java_type[0] != "L" or java_type[-1] != ";":
+ raise ValueError(
+ "Invalid object field type: {0}".format(java_type)
+ )
+
+
+class JavaClassDesc(ParsedJavaContent):
+ """
+ Represents the description of a class
+ """
+
+ def __init__(self, class_desc_type):
+ # type: (ClassDescType) -> None
+ super(JavaClassDesc, self).__init__(ContentType.CLASSDESC)
+
+ # Type of class description
+ self.class_type = class_desc_type # type: ClassDescType
+
+ # Class name
+ self.name = None # type: Optional[str]
+
+ # Serial version UID
+ self.serial_version_uid = 0 # type: int
+
+ # Description flags byte
+ self.desc_flags = 0 # type: int
+
+ # Fields in the class
+ self.fields = [] # type: List[JavaField]
+
+ # Inner classes
+ self.inner_classes = [] # type: List[JavaClassDesc]
+
+ # List of annotations objects
+ self.annotations = [] # type: List[ParsedJavaContent]
+
+ # The super class of this one, if any
+ self.super_class = None # type: Optional[JavaClassDesc]
+
+ # Indicates if it is a super class
+ self.is_super_class = False
+
+ # List of the interfaces of the class
+ self.interfaces = [] # type: List[str]
+
+ # Set of enum constants
+ self.enum_constants = set() # type: Set[str]
+
+ # Flag to indicate if this is an inner class
+ self.is_inner_class = False # type: bool
+
+ # Flag to indicate if this is a local inner class
+ self.is_local_inner_class = False # type: bool
+
+ # Flag to indicate if this is a static member class
+ self.is_static_member_class = False # type: bool
+
+ def __str__(self):
+ return "[classdesc 0x{0:x}: name {1}, uid {2}]".format(
+ self.handle, self.name, self.serial_version_uid
+ )
+
+ __repr__ = __str__
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the exception
+ """
+ return "\t" * indent + "[classdesc 0x{0:x}: name {1}, uid {2}]".format(
+ self.handle, self.name, self.serial_version_uid
+ )
+
+ @property
+ def serialVersionUID(self): # pylint:disable=C0103
+ """
+ Mimics the javaobj API
+ """
+ return self.serial_version_uid
+
+ @property
+ def flags(self):
+ """
+ Mimics the javaobj API
+ """
+ return self.desc_flags
+
+ @property
+ def fields_names(self):
+ """
+ Mimics the javaobj API
+ """
+ return [field.name for field in self.fields]
+
+ @property
+ def fields_types(self):
+ """
+ Mimics the javaobj API
+ """
+ return [field.type for field in self.fields]
+
+ @property
+ def data_type(self):
+ """
+ Computes the data type of this class (Write, No Write, Annotation)
+ """
+ if ClassDescFlags.SC_SERIALIZABLE & self.desc_flags:
+ return (
+ ClassDataType.WRCLASS
+ if (ClassDescFlags.SC_WRITE_METHOD & self.desc_flags)
+ else ClassDataType.NOWRCLASS
+ )
+
+ if ClassDescFlags.SC_EXTERNALIZABLE & self.desc_flags:
+ return (
+ ClassDataType.OBJECT_ANNOTATION
+ if (ClassDescFlags.SC_WRITE_METHOD & self.desc_flags)
+ else ClassDataType.EXTERNAL_CONTENTS
+ )
+
+ raise ValueError("Unhandled Class Data Type")
+
+ def is_array_class(self):
+ # type: () -> bool
+ """
+ Determines if this is an array type
+ """
+ return self.name.startswith("[") if self.name else False
+
+ def get_hierarchy(self, classes):
+ # type: (List["JavaClassDesc"]) -> None
+ """
+ Generates a list of class descriptions in this class's hierarchy, in
+ the order described by the Object Stream Serialization Protocol.
+ This is the order in which fields are read from the stream.
+
+ :param classes: A list to be filled in with the hierarchy
+ """
+ if self.super_class is not None:
+ if self.super_class.class_type == ClassDescType.PROXYCLASS:
+ logging.warning("Hit a proxy class in super class hierarchy")
+ else:
+ self.super_class.get_hierarchy(classes)
+
+ classes.append(self)
+
+ def validate(self):
+ """
+ Checks the validity of this class description
+ """
+ serial_or_extern = (
+ ClassDescFlags.SC_SERIALIZABLE | ClassDescFlags.SC_EXTERNALIZABLE
+ )
+ if (self.desc_flags & serial_or_extern) == 0 and self.fields:
+ raise ValueError(
+ "Non-serializable, non-externalizable class has fields"
+ )
+
+ if self.desc_flags & serial_or_extern == serial_or_extern:
+ raise ValueError("Class is both serializable and externalizable")
+
+ if self.desc_flags & ClassDescFlags.SC_ENUM:
+ if self.fields or self.interfaces:
+ raise ValueError(
+ "Enums shouldn't implement interfaces "
+ "or have non-constant fields"
+ )
+ else:
+ if self.enum_constants:
+ raise ValueError(
+ "Non-enum classes shouldn't have enum constants"
+ )
+
+
+class JavaInstance(ParsedJavaContent):
+ """
+ Represents an instance of Java object
+ """
+
+ def __init__(self):
+ super(JavaInstance, self).__init__(ContentType.INSTANCE)
+ self.classdesc = None # type: JavaClassDesc
+ self.field_data = {} # type: Dict[JavaClassDesc, Dict[JavaField, Any]]
+ self.annotations = (
+ {}
+ ) # type: Dict[JavaClassDesc, List[ParsedJavaContent]]
+ self.is_external_instance = False
+
+ def __str__(self):
+ return "[instance 0x{0:x}: type {1}]".format(
+ self.handle, self.classdesc.name
+ )
+
+ __repr__ = __str__
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the exception
+ """
+ prefix = "\t" * indent
+ sub_prefix = "\t" * (indent + 1)
+
+ dump = [
+ prefix
+ + "[instance 0x{0:x}: {1:x} / {2}]".format(
+ self.handle, self.classdesc.handle, self.classdesc.name
+ )
+ ]
+
+ for cd, annotations in self.annotations.items():
+ dump.append(
+ "{0}{1} -- {2} annotations".format(
+ prefix, cd.name, len(annotations)
+ )
+ )
+ for ann in annotations:
+ dump.append(sub_prefix + repr(ann))
+
+ for cd, fields in self.field_data.items():
+ dump.append(
+ "{0}{1} -- {2} fields".format(prefix, cd.name, len(fields))
+ )
+ for field, value in fields.items():
+ if isinstance(value, ParsedJavaContent):
+ if self.handle != 0 and value.handle == self.handle:
+ value_str = "this"
+ else:
+ value_str = "\n" + value.dump(indent + 2)
+ else:
+ value_str = repr(value)
+
+ dump.append(
+ "{0}{1} {2}: {3}".format(
+ sub_prefix, field.type.name, field.name, value_str
+ )
+ )
+
+ dump.append(prefix + "[/instance 0x{0:x}]".format(self.handle))
+ return "\n".join(dump)
+
+ def __getattr__(self, name):
+ """
+ Returns the field with the given name
+ """
+ for cd_fields in self.field_data.values():
+ for field, value in cd_fields.items():
+ if field.name == name:
+ return value
+
+ raise AttributeError(name)
+
+ def get_class(self):
+ """
+ Returns the class of this instance
+ """
+ return self.classdesc
+
+ def load_from_blockdata(
+ self, parser, reader, indent=0
+ ): # pylint:disable=W0613,R0201
+ """
+ Reads content stored in a block data.
+
+ This method is called only if the class description has both the
+ ``SC_EXTERNALIZABLE`` and ``SC_BLOCK_DATA`` flags set.
+
+ The stream parsing will stop and fail if this method returns False.
+
+ :param parser: The JavaStreamParser in use
+ :param reader: The underlying data stream reader
+ :param indent: Indentation to use in logs
+ :return: True on success, False on error
+ """
+ return False
+
+ def load_from_instance(self, indent=0): # pylint:disable=W0613,R0201
+ # type: (int) -> bool
+ """
+ Updates the content of this instance from its parsed fields and
+ annotations
+
+ :param indent: Indentation to use in logs
+ :return: True on success, False on error (currently ignored)
+ """
+ return False
+
+
+class JavaClass(ParsedJavaContent):
+ """
+ Represents a stored Java class
+ """
+
+ def __init__(self, handle, class_desc):
+ # type: (int, JavaClassDesc) -> None
+ super(JavaClass, self).__init__(ContentType.CLASS)
+ self.handle = handle
+ self.classdesc = class_desc
+
+ def __str__(self):
+ return "[class 0x{0:x}: {1}]".format(self.handle, self.classdesc)
+
+ __repr__ = __str__
+
+ @property
+ def name(self):
+ """
+ Mimics the javaobj API
+ """
+ return self.classdesc.name
+
+
+class JavaEnum(ParsedJavaContent):
+ """
+ Represents an enumeration value
+ """
+
+ def __init__(self, handle, class_desc, value):
+ # type: (int, JavaClassDesc, JavaString) -> None
+ super(JavaEnum, self).__init__(ContentType.ENUM)
+ self.handle = handle
+ self.classdesc = class_desc
+ self.value = value
+
+ def __str__(self):
+ return "[Enum 0x{0:x}: {1}]".format(self.handle, self.value)
+
+ __repr__ = __str__
+
+ @property
+ def constant(self):
+ """
+ Mimics the javaobj API
+ """
+ return self.value
+
+
+class JavaArray(ParsedJavaContent, list):
+ """
+ Represents a Java array
+ """
+
+ def __init__(self, handle, class_desc, field_type, content):
+ # type: (int, JavaClassDesc, FieldType, List[Any]) -> None
+ list.__init__(self, content)
+ ParsedJavaContent.__init__(self, ContentType.ARRAY)
+ self.handle = handle
+ self.classdesc = class_desc
+ self.field_type = field_type
+ self.data = content
+
+ def __str__(self):
+ return "[{0}]".format(", ".join(repr(x) for x in self))
+
+ __repr__ = __str__
+
+ def dump(self, indent=0):
+ # type: (int) -> str
+ """
+ Returns a dump representation of the array
+ """
+ prefix = "\t" * indent
+ sub_prefix = "\t" * (indent + 1)
+ dump = [
+ "{0}[array 0x{1:x}: {2} items - stored as {3}]".format(
+ prefix, self.handle, len(self), type(self.data).__name__
+ )
+ ]
+ for x in self:
+ if isinstance(x, ParsedJavaContent):
+ if self.handle != 0 and x.handle == self.handle:
+ dump.append("this,")
+ else:
+ dump.append(x.dump(indent + 1) + ",")
+ else:
+ dump.append(sub_prefix + repr(x) + ",")
+ dump.append(prefix + "[/array 0x{0:x}]".format(self.handle))
+ return "\n".join(dump)
+
+ @property
+ def _data(self):
+ """
+ Mimics the javaobj API
+ """
+ return tuple(self)
+
+
+class BlockData(ParsedJavaContent):
+ """
+ Represents a data block
+ """
+
+ def __init__(self, data):
+ # type: (bytes) -> None
+ super(BlockData, self).__init__(ContentType.BLOCKDATA)
+ self.data = data
+
+ def __str__(self):
+ return "[blockdata 0x{0:x}: {1} bytes]".format(
+ self.handle, len(self.data)
+ )
+
+ def __repr__(self):
+ return repr(self.data)
+
+ def __eq__(self, other):
+ if isinstance(other, (str, UNICODE_TYPE)):
+ other_data = tuple(ord(x) for x in other)
+ elif isinstance(other, bytes):
+ other_data = tuple(byte_to_int(x) for x in other)
+ else:
+ # Can't compare
+ return False
+
+ return other_data == tuple(byte_to_int(x) for x in self.data)
diff --git a/javaobj/v2/core.py b/javaobj/v2/core.py
new file mode 100644
index 0000000..8e018a6
--- /dev/null
+++ b/javaobj/v2/core.py
@@ -0,0 +1,766 @@
+#!/usr/bin/env python3
+"""
+Second parsing approach for javaobj, using the same approach as jdeserialize
+See: https://github.com/frohoff/jdeserialize
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+import logging
+import os
+from typing import ( # pylint:disable=W0611
+ IO,
+ Any,
+ Callable,
+ Dict,
+ List,
+ Optional,
+)
+
+from ..constants import (
+ PRIMITIVE_TYPES,
+ StreamConstants,
+ TerminalCode,
+ TypeCode,
+)
+from ..modifiedutf8 import ( # pylint:disable=W0611 # noqa: F401
+ decode_modified_utf8,
+)
+from . import api # pylint:disable=W0611
+from .beans import (
+ BlockData,
+ ClassDataType,
+ ClassDescType,
+ ExceptionRead,
+ ExceptionState,
+ FieldType,
+ JavaArray,
+ JavaClass,
+ JavaClassDesc,
+ JavaEnum,
+ JavaField,
+ JavaInstance,
+ JavaString,
+ ParsedJavaContent,
+)
+from .stream import DataStreamReader
+from .transformers import DefaultObjectTransformer
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class JavaStreamParser(api.IJavaStreamParser):
+ """
+ Parses a Java stream
+ """
+
+ def __init__(self, fd, transformers):
+ # type: (IO[bytes], List[api.ObjectTransformer]) -> None
+ """
+ :param fd: File-object to read from
+ :param transformers: Custom object transformers
+ """
+ # Input stream
+ self.__fd = fd
+ self.__reader = DataStreamReader(fd)
+
+ # Object transformers
+ self.__transformers = list(transformers)
+
+ # Logger
+ self._log = logging.getLogger("javaobj.parser")
+
+ # Handles
+ self.__handle_maps = [] # type: List[Dict[int, ParsedJavaContent]]
+ self.__handles = {} # type: Dict[int, ParsedJavaContent]
+
+ # Initial handle value
+ self.__current_handle = StreamConstants.BASE_REFERENCE_IDX.value
+
+ # Definition of the type code handlers
+ # Each takes the type code as argument
+ self.__type_code_handlers = {
+ TerminalCode.TC_OBJECT: self._do_object,
+ TerminalCode.TC_CLASS: self._do_class,
+ TerminalCode.TC_ARRAY: self._do_array,
+ TerminalCode.TC_STRING: self._read_new_string,
+ TerminalCode.TC_LONGSTRING: self._read_new_string,
+ TerminalCode.TC_ENUM: self._do_enum,
+ TerminalCode.TC_CLASSDESC: self._do_classdesc,
+ TerminalCode.TC_PROXYCLASSDESC: self._do_classdesc,
+ TerminalCode.TC_REFERENCE: self._do_reference,
+ TerminalCode.TC_NULL: self._do_null,
+ TerminalCode.TC_EXCEPTION: self._do_exception,
+ TerminalCode.TC_BLOCKDATA: self._do_block_data,
+ TerminalCode.TC_BLOCKDATALONG: self._do_block_data,
+ } # type: Dict[int, Callable[[int], ParsedJavaContent]]
+
+ def run(self):
+ # type: () -> List[ParsedJavaContent]
+ """
+ Parses the input stream
+ """
+ # Check the magic byte
+ magic = self.__reader.read_ushort()
+ if magic != StreamConstants.STREAM_MAGIC:
+ raise ValueError("Invalid file magic: 0x{0:x}".format(magic))
+
+ # Check the stream version
+ version = self.__reader.read_ushort()
+ if version != StreamConstants.STREAM_VERSION:
+ raise ValueError("Invalid file version: 0x{0:x}".format(version))
+
+ # Reset internal state
+ self._reset()
+
+ # Read content
+ contents = [] # type: List[ParsedJavaContent]
+ while True:
+ self._log.info("Reading next content")
+ start = self.__fd.tell()
+ try:
+ type_code = self.__reader.read_byte()
+ except EOFError:
+ # End of file
+ break
+
+ if type_code == TerminalCode.TC_RESET:
+ # Explicit reset
+ self._reset()
+ continue
+
+ parsed_content = self._read_content(type_code, True)
+ self._log.debug("Read: %s", parsed_content)
+ if parsed_content is not None and parsed_content.is_exception:
+ # Get the raw data between the start of the object and our
+ # current position
+ end = self.__fd.tell()
+ self.__fd.seek(start, os.SEEK_SET)
+ stream_data = self.__fd.read(end - start)
+
+ # Prepare an exception object
+ parsed_content = ExceptionState(parsed_content, stream_data)
+
+ contents.append(parsed_content)
+
+ for content in self.__handles.values():
+ content.validate()
+
+ # TODO: connect member classes ? (see jdeserialize @ 864)
+
+ if self.__handles:
+ self.__handle_maps.append(self.__handles.copy())
+
+ return contents
+
+ def dump(self, content):
+ # type: (List[ParsedJavaContent]) -> str
+ """
+ Dumps to a string the given objects
+ """
+ lines = [] # type: List[str]
+
+ # Stream content
+ lines.append("//// BEGIN stream content output")
+ lines.extend(str(c) for c in content)
+ lines.append("//// END stream content output")
+ lines.append("")
+
+ lines.append("//// BEGIN instance dump")
+ for c in self.__handles.values():
+ if isinstance(c, JavaInstance):
+ instance = c # type: JavaInstance
+ lines.extend(self._dump_instance(instance))
+ lines.append("//// END instance dump")
+ lines.append("")
+ return "\n".join(lines)
+
+ @staticmethod
+ def _dump_instance(instance):
+ # type: (JavaInstance) -> List[str]
+ """
+ Dumps an instance to a set of lines
+ """
+ lines = [] # type: List[str]
+ lines.append(
+ "[instance 0x{0:x}: 0x{1:x} / {2}".format(
+ instance.handle,
+ instance.classdesc.handle,
+ instance.classdesc.name,
+ )
+ )
+
+ if instance.annotations:
+ lines.append("\tobject annotations:")
+ for cd, annotation in instance.annotations.items():
+ lines.append("\t" + (cd.name or "null"))
+ for c in annotation:
+ lines.append("\t\t" + str(c))
+
+ if instance.field_data:
+ lines.append("\tfield data:")
+ for field, obj in instance.field_data.items():
+ line = "\t\t" + (field.name or "null") + ": "
+ if isinstance(obj, ParsedJavaContent):
+ content = obj # type: ParsedJavaContent
+ h = content.handle
+ if h == instance.handle:
+ line += "this"
+ else:
+ line += "r0x{0:x}".format(h)
+
+ line += ": " + str(content)
+ else:
+ line += str(obj)
+
+ lines.append(line)
+
+ lines.append("]")
+ return lines
+
+ def _reset(self):
+ """
+ Resets the internal state of the parser
+ """
+ if self.__handles:
+ self.__handle_maps.append(self.__handles.copy())
+
+ self.__handles.clear()
+
+ # Reset handle index
+ self.__current_handle = StreamConstants.BASE_REFERENCE_IDX
+
+ def _new_handle(self):
+ # type: () -> int
+ """
+ Returns a new handle value
+ """
+ handle = self.__current_handle
+ self.__current_handle += 1
+ return handle
+
+ def _set_handle(self, handle, content):
+ # type: (int, ParsedJavaContent) -> None
+ """
+ Stores the reference to an object
+ """
+ if handle in self.__handles:
+ raise ValueError("Trying to reset handle {0:x}".format(handle))
+
+ self.__handles[handle] = content
+
+ @staticmethod
+ def _do_null(_):
+ """
+ The easiest one
+ """
+ return None
+
+ def _read_content(self, type_code, block_data, class_desc=None):
+ # type: (int, bool, Optional[JavaClassDesc]) -> ParsedJavaContent
+ """
+ Parses the next content
+ """
+ if not block_data and type_code in (
+ TerminalCode.TC_BLOCKDATA,
+ TerminalCode.TC_BLOCKDATALONG,
+ ):
+ raise ValueError("Got a block data, but not allowed here.")
+
+ try:
+ # Look for a handler for that type code
+ handler = self.__type_code_handlers[type_code]
+ except KeyError:
+ # Look for an external reader
+ if (
+ class_desc
+ and class_desc.name
+ and class_desc.data_type == ClassDataType.WRCLASS
+ ):
+ # Return its result immediately
+ return self._custom_readObject(class_desc.name)
+
+ # No valid custom reader: abandon
+ raise ValueError("Unknown type code: 0x{0:x}".format(type_code))
+ else:
+ try:
+ # Parse the object
+ return handler(type_code)
+ except ExceptionRead as ex:
+ # We found an exception object: return it (raise later)
+ return ex.exception_object
+
+ def _read_new_string(self, type_code):
+ # type: (int) -> JavaString
+ """
+ Reads a Java String
+ """
+ if type_code == TerminalCode.TC_REFERENCE:
+ # Got a reference
+ previous = self._do_reference()
+ if not isinstance(previous, JavaString):
+ raise ValueError("Invalid reference to a Java string")
+ return previous
+
+ # Assign a new handle
+ handle = self._new_handle()
+
+ # Read the length
+ if type_code == TerminalCode.TC_STRING:
+ length = self.__reader.read_ushort()
+ elif type_code == TerminalCode.TC_LONGSTRING:
+ length = self.__reader.read_long()
+ if length < 0 or length > 2147483647:
+ raise ValueError("Invalid string length: {0}".format(length))
+
+ if length < 65536:
+ self._log.warning("Small string stored as a long one")
+
+ # Parse the content
+ data = self.__fd.read(length)
+ java_str = JavaString(handle, data)
+
+ # Store the reference to the string
+ self._set_handle(handle, java_str)
+ return java_str
+
+ def _read_classdesc(self):
+ # type: () -> JavaClassDesc
+ """
+ Reads a class description with its type code
+ """
+ type_code = self.__reader.read_byte()
+ return self._do_classdesc(type_code)
+
+ def _do_classdesc(self, type_code):
+ # type: (int) -> JavaClassDesc
+ """
+ Parses a class description
+ """
+ if type_code == TerminalCode.TC_CLASSDESC:
+ # Do the real job
+ name = self.__reader.read_UTF()
+ serial_version_uid = self.__reader.read_long()
+ handle = self._new_handle()
+ desc_flags = self.__reader.read_byte()
+ nb_fields = self.__reader.read_short()
+
+ if nb_fields < 0:
+ raise ValueError("Invalid field count: {0}".format(nb_fields))
+
+ fields = [] # type: List[JavaField]
+ for _ in range(nb_fields):
+ field_type = self.__reader.read_byte()
+ field_name = self.__reader.read_UTF()
+ class_name = None
+
+ if field_type in (TypeCode.TYPE_OBJECT, TypeCode.TYPE_ARRAY):
+ # String type code
+ str_type_code = self.__reader.read_byte()
+ class_name = self._read_new_string(str_type_code)
+ elif field_type not in PRIMITIVE_TYPES:
+ raise ValueError(
+ "Invalid field type char: 0x{0:x}".format(field_type)
+ )
+
+ fields.append(
+ JavaField(FieldType(field_type), field_name, class_name)
+ )
+
+ # Setup the class description bean
+ class_desc = JavaClassDesc(ClassDescType.NORMALCLASS)
+ class_desc.name = name
+ class_desc.serial_version_uid = serial_version_uid
+ class_desc.handle = handle
+ class_desc.desc_flags = desc_flags
+ class_desc.fields = fields
+ class_desc.annotations = self._read_class_annotations(class_desc)
+ class_desc.super_class = self._read_classdesc()
+
+ if class_desc.super_class:
+ class_desc.super_class.is_super_class = True
+
+ # Store the reference to the parsed bean
+ self._set_handle(handle, class_desc)
+ return class_desc
+ elif type_code == TerminalCode.TC_NULL:
+ # Null reference
+ return None
+ elif type_code == TerminalCode.TC_REFERENCE:
+ # Reference to an already loading class description
+ previous = self._do_reference()
+ if not isinstance(previous, JavaClassDesc):
+ raise ValueError(
+ "Referenced object is not a class description"
+ )
+ return previous
+ elif type_code == TerminalCode.TC_PROXYCLASSDESC:
+ # Proxy class description
+ handle = self._new_handle()
+ nb_interfaces = self.__reader.read_int()
+ interfaces = [
+ self.__reader.read_UTF() for _ in range(nb_interfaces)
+ ]
+
+ class_desc = JavaClassDesc(ClassDescType.PROXYCLASS)
+ class_desc.handle = handle
+ class_desc.interfaces = interfaces
+ class_desc.annotations = self._read_class_annotations()
+ class_desc.super_class = self._read_classdesc()
+
+ if class_desc.super_class:
+ class_desc.super_class.is_super_class = True
+
+ # Store the reference to the parsed bean
+ self._set_handle(handle, class_desc)
+ return class_desc
+
+ raise ValueError("Expected a valid class description starter")
+
+ def _custom_readObject(self, class_name):
+ # type: (str) -> ParsedJavaContent
+ """
+ Reads an object with a custom serialization process
+
+ :param class_name: Name of the class to load
+ :return: The parsed object
+ :raise ValueError: Unknown kind of class
+ """
+ self.__fd.seek(-1, os.SEEK_CUR)
+ for transformer in self.__transformers:
+ class_data = transformer.load_custom_writeObject(
+ self, self.__reader, class_name
+ )
+ if class_data:
+ return class_data
+
+ raise ValueError("Custom readObject can not be processed")
+
+ def _read_class_annotations(self, class_desc=None):
+ # type: (Optional[JavaClassDesc]) -> List[ParsedJavaContent]
+ """
+ Reads the annotations associated to a class
+ """
+ contents = [] # type: List[ParsedJavaContent]
+ while True:
+ type_code = self.__reader.read_byte()
+ if type_code == TerminalCode.TC_ENDBLOCKDATA:
+ # We're done here
+ return contents
+ elif type_code == TerminalCode.TC_RESET:
+ # Reset references
+ self._reset()
+ continue
+
+ java_object = self._read_content(type_code, True, class_desc)
+
+ if java_object is not None and java_object.is_exception:
+ # Found an exception: raise it
+ raise ExceptionRead(java_object)
+
+ contents.append(java_object)
+
+ raise Exception("Class annotation reading stopped before end")
+
+ def _create_instance(self, class_desc):
+ # type: (JavaClassDesc) -> JavaInstance
+ """
+ Creates a JavaInstance object, by a transformer if possible
+ """
+ # Try to create the transformed object
+ for transformer in self.__transformers:
+ instance = transformer.create_instance(class_desc)
+ if instance is not None:
+ if class_desc.name:
+ instance.is_external_instance = not self._is_default_supported(
+ class_desc.name
+ )
+ return instance
+
+ return JavaInstance()
+
+ def _do_object(self, type_code=0):
+ # type: (int) -> JavaInstance
+ """
+ Parses an object
+ """
+ # Parse the object class description
+ class_desc = self._read_classdesc()
+
+ # Assign a new handle
+ handle = self._new_handle()
+ self._log.debug(
+ "Reading new object: handle %x, classdesc %s", handle, class_desc
+ )
+
+ # Prepare the instance object
+ instance = self._create_instance(class_desc)
+ instance.classdesc = class_desc
+ instance.handle = handle
+
+ # Store the instance
+ self._set_handle(handle, instance)
+
+ # Read the instance content
+ self._read_class_data(instance)
+ self._log.debug("Done reading object handle %x", handle)
+ return instance
+
+ def _is_default_supported(self, class_name):
+ # type: (str) -> bool
+ """
+ Checks if this class is supported by the default object transformer
+ """
+ default_transf = [
+ x
+ for x in self.__transformers
+ if isinstance(x, DefaultObjectTransformer)
+ ]
+ return (
+ bool(default_transf)
+ and class_name in default_transf[0]._type_mapper
+ )
+
+ def _read_class_data(self, instance):
+ # type: (JavaInstance) -> None
+ """
+ Reads the content of an instance
+ """
+ # Read the class hierarchy
+ classes = [] # type: List[JavaClassDesc]
+ instance.classdesc.get_hierarchy(classes)
+
+ all_data = {} # type: Dict[JavaClassDesc, Dict[JavaField, Any]]
+ annotations = {} # type: Dict[JavaClassDesc, List[ParsedJavaContent]]
+
+ for cd in classes:
+ values = {} # type: Dict[JavaField, Any]
+ cd.validate()
+ if (
+ cd.data_type == ClassDataType.NOWRCLASS
+ or cd.data_type == ClassDataType.WRCLASS
+ ):
+ if (
+ cd.data_type == ClassDataType.WRCLASS
+ and instance.is_external_instance
+ ):
+ annotations[cd] = self._read_class_annotations(cd)
+ else:
+ for field in cd.fields:
+ values[field] = self._read_field_value(field.type)
+ all_data[cd] = values
+
+ if cd.data_type == ClassDataType.WRCLASS:
+ annotations[cd] = self._read_class_annotations(cd)
+ else:
+ if cd.data_type == ClassDataType.OBJECT_ANNOTATION:
+ # Call the transformer if possible
+ if not instance.load_from_blockdata(self, self.__reader):
+ # Can't read :/
+ raise ValueError(
+ "hit externalizable with nonzero SC_BLOCK_DATA; "
+ "can't interpret data"
+ )
+ annotations[cd] = self._read_class_annotations(cd)
+
+ # Fill the instance object
+ instance.annotations = annotations
+ instance.field_data = all_data
+
+ # Load transformation from the fields and annotations
+ instance.load_from_instance()
+
+ def _read_field_value(self, field_type):
+ # type: (FieldType) -> Any
+ """
+ Reads the value of an instance field
+ """
+ if field_type == FieldType.BYTE:
+ return self.__reader.read_byte()
+ if field_type == FieldType.CHAR:
+ return self.__reader.read_char()
+ if field_type == FieldType.DOUBLE:
+ return self.__reader.read_double()
+ if field_type == FieldType.FLOAT:
+ return self.__reader.read_float()
+ if field_type == FieldType.INTEGER:
+ return self.__reader.read_int()
+ if field_type == FieldType.LONG:
+ return self.__reader.read_long()
+ if field_type == FieldType.SHORT:
+ return self.__reader.read_short()
+ if field_type == FieldType.BOOLEAN:
+ return self.__reader.read_bool()
+ if field_type in (FieldType.OBJECT, FieldType.ARRAY):
+ sub_type_code = self.__reader.read_byte()
+ if field_type == FieldType.ARRAY:
+ if sub_type_code == TerminalCode.TC_NULL:
+ # Seems required, according to issue #46
+ return None
+ if sub_type_code == TerminalCode.TC_REFERENCE:
+ return self._do_classdesc(sub_type_code)
+ if sub_type_code != TerminalCode.TC_ARRAY:
+ raise ValueError(
+ "Array type listed, but type code != TC_ARRAY"
+ )
+
+ content = self._read_content(sub_type_code, False)
+ if content is not None and content.is_exception:
+ raise ExceptionRead(content)
+
+ return content
+
+ raise ValueError("Can't process type: {0}".format(field_type))
+
+ def _do_reference(self, type_code=0):
+ # type: (int) -> ParsedJavaContent
+ """
+ Returns an object already parsed
+ """
+ handle = self.__reader.read_int()
+ try:
+ return self.__handles[handle]
+ except KeyError:
+ raise ValueError("Invalid reference handle: {0:x}".format(handle))
+
+ def _do_enum(self, type_code):
+ # type: (int) -> JavaEnum
+ """
+ Parses an enumeration
+ """
+ cd = self._read_classdesc()
+ if cd is None:
+ raise ValueError("Enum description can't be null")
+
+ handle = self._new_handle()
+
+ # Read the enum string
+ sub_type_code = self.__reader.read_byte()
+ enum_str = self._read_new_string(sub_type_code)
+ cd.enum_constants.add(enum_str.value)
+
+ # Store the object
+ enum_obj = JavaEnum(handle, cd, enum_str)
+ self._set_handle(handle, enum_obj)
+ return enum_obj
+
+ def _do_class(self, type_code):
+ # type: (int) -> JavaClass
+ """
+ Parses a class
+ """
+ cd = self._read_classdesc()
+ handle = self._new_handle()
+ class_obj = JavaClass(handle, cd)
+
+ # Store the class object
+ self._set_handle(handle, class_obj)
+ return class_obj
+
+ def _do_array(self, type_code):
+ # type: (int) -> JavaArray
+ """
+ Parses an array
+ """
+ cd = self._read_classdesc()
+ handle = self._new_handle()
+ if not cd.name or len(cd.name) < 2:
+ raise ValueError("Invalid name in array class description")
+
+ # ParsedJavaContent type
+ content_type_byte = ord(cd.name[1].encode("latin1"))
+ field_type = FieldType(content_type_byte)
+
+ # Array size
+ size = self.__reader.read_int()
+ if size < 0:
+ raise ValueError("Invalid array size")
+
+ # Array content
+ for transformer in self.__transformers:
+ content = transformer.load_array(
+ self.__reader, field_type.type_code(), size
+ )
+ if content is not None:
+ break
+ else:
+ content = [self._read_field_value(field_type) for _ in range(size)]
+
+ return JavaArray(handle, cd, field_type, content)
+
+ def _do_exception(self, type_code):
+ # type: (int) -> ParsedJavaContent
+ """
+ Read the content of a thrown exception
+ """
+ # Start by resetting current state
+ self._reset()
+
+ type_code = self.__reader.read_byte()
+ if type_code == TerminalCode.TC_RESET:
+ raise ValueError("TC_RESET read while reading exception")
+
+ content = self._read_content(type_code, False)
+ if content is None:
+ raise ValueError("Null exception object")
+
+ if not isinstance(content, JavaInstance):
+ raise ValueError("Exception object is not an instance")
+
+ if content.is_exception:
+ raise ExceptionRead(content)
+
+ # Strange object ?
+ content.is_exception = True
+ self._reset()
+ return content
+
+ def _do_block_data(self, type_code):
+ # type: (int) -> BlockData
+ """
+ Reads a block data
+ """
+ # Parse the size
+ if type_code == TerminalCode.TC_BLOCKDATA:
+ size = self.__reader.read_ubyte()
+ elif type_code == TerminalCode.TC_BLOCKDATALONG:
+ size = self.__reader.read_int()
+ else:
+ raise ValueError("Invalid type code for blockdata")
+
+ if size < 0:
+ raise ValueError("Invalid value for block data size")
+
+ # Read the block
+ data = self.__fd.read(size)
+ return BlockData(data)
diff --git a/javaobj/v2/main.py b/javaobj/v2/main.py
new file mode 100644
index 0000000..24b51b0
--- /dev/null
+++ b/javaobj/v2/main.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""
+Mimics the core API with the new deserializer
+"""
+
+from __future__ import absolute_import
+
+from typing import IO, Any # pylint:disable=W0611
+
+try:
+ # Python 2
+ from StringIO import StringIO as BytesIO
+except ImportError:
+ # Python 3+
+ from io import BytesIO
+
+from ..utils import java_data_fd
+from .api import ObjectTransformer # pylint:disable=W0611
+from .core import JavaStreamParser
+from .transformers import DefaultObjectTransformer, NumpyArrayTransformer
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+def load(file_object, *transformers, **kwargs):
+ # type: (IO[bytes], ObjectTransformer, Any) -> Any
+ """
+ Deserializes Java primitive data and objects serialized using
+ ObjectOutputStream from a file-like object.
+
+ :param file_object: A file-like object
+ :param transformers: Custom transformers to use
+ :return: The deserialized object
+ """
+ # Check file format (uncompress if necessary)
+ file_object = java_data_fd(file_object)
+
+ # Ensure we have the default object transformer
+ all_transformers = list(transformers)
+ for t in all_transformers:
+ if isinstance(t, DefaultObjectTransformer):
+ break
+ else:
+ all_transformers.append(DefaultObjectTransformer())
+
+ if kwargs.get("use_numpy_arrays", False):
+ # Use the numpy array transformer if requested
+ all_transformers.append(NumpyArrayTransformer())
+
+ # Parse the object(s)
+ parser = JavaStreamParser(file_object, all_transformers)
+ contents = parser.run()
+
+ if len(contents) == 0:
+ # Nothing was parsed, but no error
+ return None
+ elif len(contents) == 1:
+ # Return the only object as is
+ return contents[0]
+ else:
+ # Returns all objects if they are more than one
+ return contents
+
+
+def loads(data, *transformers, **kwargs):
+ # type: (bytes, ObjectTransformer, Any) -> Any
+ """
+ Deserializes Java objects and primitive data serialized using
+ ObjectOutputStream from bytes.
+
+ :param data: A Java data string
+ :param transformers: Custom transformers to use
+ :param ignore_remaining_data: If True, don't log an error when unused
+ trailing bytes are remaining
+ :return: The deserialized object
+ """
+ return load(BytesIO(data), *transformers, **kwargs)
diff --git a/javaobj/v2/stream.py b/javaobj/v2/stream.py
new file mode 100644
index 0000000..7cb8a9f
--- /dev/null
+++ b/javaobj/v2/stream.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""
+Utility module to handle streams like in Java
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+import struct
+from typing import IO, Any, Tuple # pylint:disable=W0611
+
+from ..modifiedutf8 import decode_modified_utf8
+from ..utils import UNICODE_TYPE, unicode_char # pylint:disable=W0611
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class DataStreamReader:
+ """
+ Reads the given file object with object input stream-like methods
+ """
+
+ def __init__(self, fd):
+ # type: (IO[bytes]) -> None
+ """
+ :param fd: The input stream
+ """
+ self.__fd = fd
+
+ @property
+ def file_descriptor(self):
+ # type: () -> IO[bytes]
+ """
+ The underlying file descriptor
+ """
+ return self.__fd
+
+ def read(self, struct_format):
+ # type: (str) -> Tuple[Any, ...]
+ """
+ Reads from the input stream, using struct
+
+ :param struct_format: An unpack format string
+ :return: The result of struct.unpack (tuple)
+ :raise EOFError: End of stream reached during unpacking
+ """
+ length = struct.calcsize(struct_format)
+ bytes_array = self.__fd.read(length)
+
+ if len(bytes_array) != length:
+ raise EOFError("Stream has ended unexpectedly while parsing.")
+
+ return struct.unpack(struct_format, bytes_array)
+
+ def read_bool(self):
+ # type: () -> bool
+ """
+ Shortcut to read a single `boolean` (1 byte)
+ """
+ return bool(self.read(">B")[0])
+
+ def read_byte(self):
+ # type: () -> int
+ """
+ Shortcut to read a single `byte` (1 byte)
+ """
+ return self.read(">b")[0]
+
+ def read_ubyte(self):
+ # type: () -> int
+ """
+ Shortcut to read an unsigned `byte` (1 byte)
+ """
+ return self.read(">B")[0]
+
+ def read_char(self):
+ # type: () -> UNICODE_TYPE
+ """
+ Shortcut to read a single `char` (2 bytes)
+ """
+ return unicode_char(self.read(">H")[0])
+
+ def read_short(self):
+ # type: () -> int
+ """
+ Shortcut to read a single `short` (2 bytes)
+ """
+ return self.read(">h")[0]
+
+ def read_ushort(self):
+ # type: () -> int
+ """
+ Shortcut to read an unsigned `short` (2 bytes)
+ """
+ return self.read(">H")[0]
+
+ def read_int(self):
+ # type: () -> int
+ """
+ Shortcut to read a single `int` (4 bytes)
+ """
+ return self.read(">i")[0]
+
+ def read_float(self):
+ # type: () -> float
+ """
+ Shortcut to read a single `float` (4 bytes)
+ """
+ return self.read(">f")[0]
+
+ def read_long(self):
+ # type: () -> int
+ """
+ Shortcut to read a single `long` (8 bytes)
+ """
+ return self.read(">q")[0]
+
+ def read_double(self):
+ # type: () -> float
+ """
+ Shortcut to read a single `double` (8 bytes)
+ """
+ return self.read(">d")[0]
+
+ def read_UTF(self): # pylint:disable=C0103
+ # type: () -> str
+ """
+ Reads a Java string
+ """
+ length = self.read_ushort()
+ ba = self.__fd.read(length)
+ return decode_modified_utf8(ba)[0]
diff --git a/javaobj/v2/transformers.py b/javaobj/v2/transformers.py
new file mode 100644
index 0000000..087eea9
--- /dev/null
+++ b/javaobj/v2/transformers.py
@@ -0,0 +1,534 @@
+#!/usr/bin/env python3
+"""
+Defines the default object transformers
+
+:authors: Thomas Calmant
+:license: Apache License 2.0
+:version: 0.4.4
+:status: Alpha
+
+..
+
+ Copyright 2024 Thomas Calmant
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+# Standard library
+import functools
+from typing import List, Optional, Tuple
+
+# Numpy (optional)
+try:
+ import numpy
+except ImportError:
+ numpy = None # type: ignore
+
+# Javaobj
+from ..constants import TerminalCode, TypeCode
+from ..utils import log_debug, log_error, read_string, read_struct, to_bytes
+from .api import IJavaStreamParser, ObjectTransformer
+from .beans import ( # pylint:disable=W0611
+ BlockData,
+ JavaClassDesc,
+ JavaInstance,
+)
+from .stream import DataStreamReader
+
+# ------------------------------------------------------------------------------
+
+# Module version
+__version_info__ = (0, 4, 4)
+__version__ = ".".join(str(x) for x in __version_info__)
+
+# Documentation strings format
+__docformat__ = "restructuredtext en"
+
+# ------------------------------------------------------------------------------
+
+
+class JavaList(list, JavaInstance):
+ """
+ Python-Java list bridge type
+ """
+
+ HANDLED_CLASSES = ("java.util.ArrayList", "java.util.LinkedList")
+
+ def __init__(self):
+ list.__init__(self)
+ JavaInstance.__init__(self)
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Lists have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in self.HANDLED_CLASSES:
+ self.extend(ann for ann in annotations[1:])
+ return True
+
+ return False
+
+
+@functools.total_ordering
+class JavaPrimitiveClass(JavaInstance):
+ """
+ Parent of Java classes matching a primitive (Bool, Integer, Long, ...)
+ """
+
+ def __init__(self):
+ JavaInstance.__init__(self)
+ self.value = None
+
+ def __str__(self):
+ return str(self.value)
+
+ def __repr__(self):
+ return repr(self.value)
+
+ def __hash__(self):
+ return hash(self.value)
+
+ def __eq__(self, other):
+ return self.value == other
+
+ def __lt__(self, other):
+ return self.value < other
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ for fields in self.field_data.values():
+ for field, value in fields.items():
+ if field.name == "value":
+ self.value = value
+ return True
+
+ return False
+
+
+class JavaBool(JavaPrimitiveClass):
+ """
+ Represents a Java Boolean object
+ """
+
+ HANDLED_CLASSES = "java.lang.Boolean"
+
+ def __bool__(self):
+ return self.value
+
+
+class JavaInt(JavaPrimitiveClass):
+ """
+ Represents a Java Integer or Long object
+ """
+
+ HANDLED_CLASSES = ("java.lang.Integer", "java.lang.Long")
+
+ def __int__(self):
+ return self.value
+
+
+class JavaMap(dict, JavaInstance):
+ """
+ Python-Java dictionary/map bridge type
+ """
+
+ HANDLED_CLASSES = (
+ "java.util.HashMap",
+ "java.util.TreeMap",
+ ) # type: Tuple[str, ...]
+
+ def __init__(self):
+ dict.__init__(self)
+ JavaInstance.__init__(self)
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Maps have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in JavaMap.HANDLED_CLASSES:
+ # Group annotation elements 2 by 2
+ args = [iter(annotations[1:])] * 2
+ for key, value in zip(*args):
+ self[key] = value
+
+ return True
+
+ return False
+
+
+class JavaLinkedHashMap(JavaMap):
+ """
+ Linked has map are handled with a specific block data
+ """
+
+ HANDLED_CLASSES = ("java.util.LinkedHashMap",)
+
+ def load_from_blockdata(self, parser, reader, indent=0):
+ # type: (IJavaStreamParser, DataStreamReader, int) -> bool
+ """
+ Loads the content of the map, written with a custom implementation
+ """
+ # Read HashMap fields
+ self.buckets = reader.read_int()
+ self.size = reader.read_int()
+
+ # Read entries
+ for _ in range(self.size):
+ key_code = reader.read_byte()
+ key = parser._read_content(key_code, True)
+
+ value_code = reader.read_byte()
+ value = parser._read_content(value_code, True)
+ self[key] = value
+
+ # Ignore the end of the blockdata
+ type_code = reader.read_byte()
+ if type_code != TerminalCode.TC_ENDBLOCKDATA:
+ raise ValueError("Didn't find the end of block data")
+
+ # Ignore the trailing 0
+ final_byte = reader.read_byte()
+ if final_byte != 0:
+ raise ValueError("Should find 0x0, got {0:x}".format(final_byte))
+
+ return True
+
+
+class JavaSet(set, JavaInstance):
+ """
+ Python-Java set bridge type
+ """
+
+ HANDLED_CLASSES = (
+ "java.util.HashSet",
+ "java.util.LinkedHashSet",
+ ) # type: Tuple[str, ...]
+
+ def __init__(self):
+ set.__init__(self)
+ JavaInstance.__init__(self)
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Lists have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in self.HANDLED_CLASSES:
+ self.update(x for x in annotations[1:])
+ return True
+
+ return False
+
+
+class JavaTreeSet(JavaSet):
+ """
+ Tree sets are handled a bit differently
+ """
+
+ HANDLED_CLASSES = ("java.util.TreeSet",)
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Lists have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in self.HANDLED_CLASSES:
+ # Annotation[1] == size of the set
+ self.update(x for x in annotations[2:])
+ return True
+
+ return False
+
+
+class JavaTime(JavaInstance):
+ """
+ Represents the classes found in the java.time package
+
+ The semantic of the fields depends on the type of time that has been
+ parsed
+ """
+
+ HANDLED_CLASSES = ("java.time.Ser",) # type: Tuple[str, ...]
+
+ DURATION_TYPE = 1
+ INSTANT_TYPE = 2
+ LOCAL_DATE_TYPE = 3
+ LOCAL_TIME_TYPE = 4
+ LOCAL_DATE_TIME_TYPE = 5
+ ZONE_DATE_TIME_TYPE = 6
+ ZONE_REGION_TYPE = 7
+ ZONE_OFFSET_TYPE = 8
+ OFFSET_TIME_TYPE = 9
+ OFFSET_DATE_TIME_TYPE = 10
+ YEAR_TYPE = 11
+ YEAR_MONTH_TYPE = 12
+ MONTH_DAY_TYPE = 13
+ PERIOD_TYPE = 14
+
+ def __init__(self):
+ JavaInstance.__init__(self)
+ self.type = -1
+ self.year = None
+ self.month = None
+ self.day = None
+ self.hour = None
+ self.minute = None
+ self.second = None
+ self.nano = None
+ self.offset = None
+ self.zone = None
+
+ self.time_handlers = {
+ self.DURATION_TYPE: self.do_duration,
+ self.INSTANT_TYPE: self.do_instant,
+ self.LOCAL_DATE_TYPE: self.do_local_date,
+ self.LOCAL_DATE_TIME_TYPE: self.do_local_date_time,
+ self.LOCAL_TIME_TYPE: self.do_local_time,
+ self.ZONE_DATE_TIME_TYPE: self.do_zoned_date_time,
+ self.ZONE_OFFSET_TYPE: self.do_zone_offset,
+ self.ZONE_REGION_TYPE: self.do_zone_region,
+ self.OFFSET_TIME_TYPE: self.do_offset_time,
+ self.OFFSET_DATE_TIME_TYPE: self.do_offset_date_time,
+ self.YEAR_TYPE: self.do_year,
+ self.YEAR_MONTH_TYPE: self.do_year_month,
+ self.MONTH_DAY_TYPE: self.do_month_day,
+ self.PERIOD_TYPE: self.do_period,
+ }
+
+ def __str__(self):
+ return (
+ "JavaTime(type=0x{s.type}, "
+ "year={s.year}, month={s.month}, day={s.day}, "
+ "hour={s.hour}, minute={s.minute}, second={s.second}, "
+ "nano={s.nano}, offset={s.offset}, zone={s.zone})"
+ ).format(s=self)
+
+ def load_from_blockdata(self, parser, reader, indent=0):
+ """
+ Ignore the SC_BLOCK_DATA flag
+ """
+ return True
+
+ def load_from_instance(self, indent=0):
+ # type: (int) -> bool
+ """
+ Load content from a parsed instance object
+ """
+ # Lists have their content in there annotations
+ for cd, annotations in self.annotations.items():
+ if cd.name in self.HANDLED_CLASSES:
+ if not isinstance(annotations[0], BlockData):
+ raise ValueError("Require a BlockData as annotation")
+
+ # Convert back annotations to bytes
+ # latin-1 is used to ensure that bytes are kept as is
+ content = to_bytes(annotations[0].data, "latin1")
+ (self.type,), content = read_struct(content, ">b")
+
+ try:
+ self.time_handlers[self.type](content)
+ except KeyError as ex:
+ log_error("Unhandled kind of time: {}".format(ex))
+
+ return True
+
+ return False
+
+ def do_duration(self, data):
+ (self.second, self.nano), data = read_struct(data, ">qi")
+ return data
+
+ def do_instant(self, data):
+ (self.second, self.nano), data = read_struct(data, ">qi")
+ return data
+
+ def do_local_date(self, data):
+ (self.year, self.month, self.day), data = read_struct(data, ">ibb")
+ return data
+
+ def do_local_time(self, data):
+ (hour,), data = read_struct(data, ">b")
+ minute = 0
+ second = 0
+ nano = 0
+
+ if hour < 0:
+ hour = ~hour
+ else:
+ (minute,), data = read_struct(data, ">b")
+ if minute < 0:
+ minute = ~minute
+ else:
+ (second,), data = read_struct(data, ">b")
+ if second < 0:
+ second = ~second
+ else:
+ (nano,), data = read_struct(data, ">i")
+
+ self.hour = hour
+ self.minute = minute
+ self.second = second
+ self.nano = nano
+ return data
+
+ def do_local_date_time(self, data):
+ data = self.do_local_date(data)
+ data = self.do_local_time(data)
+ return data
+
+ def do_zoned_date_time(self, data):
+ data = self.do_local_date_time(data)
+ data = self.do_zone_offset(data)
+ data = self.do_zone_region(data)
+ return data
+
+ def do_zone_offset(self, data):
+ (offset_byte,), data = read_struct(data, ">b")
+ if offset_byte == 127:
+ (self.offset,), data = read_struct(data, ">i")
+ else:
+ self.offset = offset_byte * 900
+ return data
+
+ def do_zone_region(self, data):
+ self.zone, data = read_string(data)
+ return data
+
+ def do_offset_time(self, data):
+ data = self.do_local_time(data)
+ data = self.do_zone_offset(data)
+ return data
+
+ def do_offset_date_time(self, data):
+ data = self.do_local_date_time(data)
+ data = self.do_zone_offset(data)
+ return data
+
+ def do_year(self, data):
+ (self.year,), data = read_struct(data, ">i")
+ return data
+
+ def do_year_month(self, data):
+ (self.year, self.month), data = read_struct(data, ">ib")
+ return data
+
+ def do_month_day(self, data):
+ (self.month, self.day), data = read_struct(data, ">bb")
+ return data
+
+ def do_period(self, data):
+ (self.year, self.month, self.day), data = read_struct(data, ">iii")
+ return data
+
+
+class DefaultObjectTransformer(ObjectTransformer):
+ """
+ Provider of the default object transformers
+ """
+
+ KNOWN_TRANSFORMERS = (
+ JavaBool,
+ JavaInt,
+ JavaList,
+ JavaMap,
+ JavaLinkedHashMap,
+ JavaSet,
+ JavaTreeSet,
+ JavaTime,
+ )
+
+ def __init__(self):
+ # Construct the link: Java class name -> Python transformer
+ self._type_mapper = {}
+ for transformer_class in self.KNOWN_TRANSFORMERS:
+ handled_classes = transformer_class.HANDLED_CLASSES
+ if isinstance(handled_classes, str):
+ # Single class handled
+ self._type_mapper[handled_classes] = transformer_class
+ else:
+ # Multiple classes handled
+ for class_name in transformer_class.HANDLED_CLASSES:
+ self._type_mapper[class_name] = transformer_class
+
+ def create_instance(self, classdesc):
+ # type: (JavaClassDesc) -> Optional[JavaInstance]
+ """
+ Transforms a parsed Java object into a Python object
+
+ :param classdesc: The description of a Java class
+ :return: The Python form of the object, or the original JavaObject
+ """
+ try:
+ mapped_type = self._type_mapper[classdesc.name]
+ except KeyError:
+ # Return None if not handled
+ return None
+ else:
+ log_debug("---")
+ log_debug(classdesc.name)
+ log_debug("---")
+
+ java_object = mapped_type()
+ java_object.classdesc = classdesc
+
+ log_debug(">>> java_object: {0}".format(java_object))
+ return java_object
+
+
+class NumpyArrayTransformer(ObjectTransformer):
+ """
+ Loads arrays as numpy arrays if possible
+ """
+
+ # Convertion of a Java type char to its NumPy equivalent
+ NUMPY_TYPE_MAP = {
+ TypeCode.TYPE_BYTE: "B",
+ TypeCode.TYPE_CHAR: "b",
+ TypeCode.TYPE_DOUBLE: ">d",
+ TypeCode.TYPE_FLOAT: ">f",
+ TypeCode.TYPE_INTEGER: ">i",
+ TypeCode.TYPE_LONG: ">l",
+ TypeCode.TYPE_SHORT: ">h",
+ TypeCode.TYPE_BOOLEAN: ">B",
+ }
+
+ def load_array(self, reader, type_code, size):
+ # type: (DataStreamReader, TypeCode, int) -> Optional[list]
+ """
+ Loads a Java array, if possible
+ """
+ if numpy is not None:
+ try:
+ dtype = self.NUMPY_TYPE_MAP[type_code]
+ except KeyError:
+ # Unhandled data type
+ return None
+ else:
+ return numpy.fromfile(
+ reader.file_descriptor, dtype=dtype, count=size,
+ )
+
+ return None
diff --git a/manifest.in b/manifest.in
new file mode 100644
index 0000000..cf4e570
--- /dev/null
+++ b/manifest.in
@@ -0,0 +1,8 @@
+# Include the README
+include README.md
+
+# Include the authors file
+include AUTHORS
+
+# Include the license file
+include LICENSE
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..8789351
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,58 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["javaobj"]
+
+[project]
+name = "javaobj-py3"
+version = "0.4.4"
+description = "Module for serializing and de-serializing Java objects."
+readme = "README.md"
+license = "Apache-2.0"
+authors = [
+ { name = "Volodymyr Buell", email = "vbuell@gmail.com" }
+]
+maintainers = [
+ { name = "Thomas Calmant", email = "thomas.calmant@gmail.com" }
+]
+keywords = ["python", "java", "marshalling", "serialization"]
+classifiers = [
+ "Development Status :: 3 - Alpha",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Software Development :: Libraries :: Python Modules"
+]
+
+dependencies = [
+ "enum34; python_version<='3.4'",
+ "typing; python_version<='3.4'"
+]
+
+[project.optional-dependencies]
+test = ["pytest"]
+
+[project.urls]
+Homepage = "https://github.com/tcalmant/python-javaobj"
+Issues = "http://github.com/tcalmant/python-javaobj/issues"
+Source = "http://github.com/tcalmant/python-javaobj/"
+
+[tool.hatch.envs.test]
+dependencies = ["pytest"]
+
+[tool.hatch.envs.test.scripts]
+run = "pytest tests"
+
+[tool.black]
+line-length = 79
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..17b0412
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+enum34;python_version<="3.4"
+typing;python_version<="3.4"
diff --git a/setup.py b/setup.py
index 67c190e..cf93fb8 100644
--- a/setup.py
+++ b/setup.py
@@ -7,12 +7,12 @@
:authors: Volodymyr Buell, Thomas Calmant
:license: Apache License 2.0
-:version: 0.1.4
+:version: 0.4.4
:status: Alpha
..
- Copyright 2016 Thomas Calmant
+ Copyright 2024 Thomas Calmant
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@
# ------------------------------------------------------------------------------
# Module version
-__version_info__ = (0, 1, 4)
+__version_info__ = (0, 4, 4)
__version__ = ".".join(str(x) for x in __version_info__)
# Documentation strings format
@@ -53,8 +53,10 @@ def read(fname):
with open(os.path.join(os.path.dirname(__file__), fname)) as fd:
return fd.read()
+
# ------------------------------------------------------------------------------
+
setup(
name="javaobj-py3",
version=__version__,
@@ -64,18 +66,31 @@ def read(fname):
maintainer_email="thomas.calmant@gmail.com",
url="https://github.com/tcalmant/python-javaobj",
description="Module for serializing and de-serializing Java objects.",
- license='Apache License 2.0',
+ license="Apache License 2.0",
+ license_file="LICENSE",
keywords="python java marshalling serialization",
- py_modules=['javaobj'],
- test_suite="tests.tests",
- long_description=read('README.rst'),
+ packages=["javaobj", "javaobj.v1", "javaobj.v2"],
+ test_suite="tests",
+ install_requires=[
+ 'enum34;python_version<="3.4"',
+ 'typing;python_version<="3.4"',
+ ],
+ long_description=read("README.md"),
+ long_description_content_type="text/markdown",
classifiers=[
- "Development Status :: 3 - Alpha",
- "License :: OSI Approved :: Apache Software License",
- 'Operating System :: OS Independent',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3.3',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
- "Topic :: Software Development :: Libraries :: Python Modules",
- ])
+ "Development Status :: 3 - Alpha",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Software Development :: Libraries :: Python Modules",
+ ],
+)
diff --git a/tests/java/.classpath b/tests/java/.classpath
deleted file mode 100644
index 61c3fab..0000000
--- a/tests/java/.classpath
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-
-
-
-
-
diff --git a/tests/java/.gitignore b/tests/java/.gitignore
index 073063f..562f411 100644
--- a/tests/java/.gitignore
+++ b/tests/java/.gitignore
@@ -4,3 +4,8 @@ target/
# Generated files
*.ser
+# Project files
+.idea/
+.classpath
+.project
+.settings/
diff --git a/tests/java/pom.xml b/tests/java/pom.xml
index 12f7250..d4cc75d 100644
--- a/tests/java/pom.xml
+++ b/tests/java/pom.xml
@@ -9,14 +9,16 @@
UTF-8
+ 1.7
+ 1.7
junit
junit
- 4.9
+ 4.13.1
test
-
\ No newline at end of file
+
diff --git a/tests/java/src/test/java/OneTest.java b/tests/java/src/test/java/OneTest.java
index ad29b0e..7ffb10a 100644
--- a/tests/java/src/test/java/OneTest.java
+++ b/tests/java/src/test/java/OneTest.java
@@ -1,13 +1,29 @@
import java.awt.event.WindowAdapter;
import java.awt.event.WindowEvent;
import java.io.ByteArrayOutputStream;
+import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZonedDateTime;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.Hashtable;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
import java.util.Vector;
+import java.util.Random;
+import java.util.zip.GZIPOutputStream;
import javax.swing.JScrollPane;
import javax.swing.SwingUtilities;
@@ -19,13 +35,18 @@
class ClassWithEnum implements Serializable {
/**
- *
+ *
*/
private static final long serialVersionUID = 1L;
public Color color = Color.GREEN;
public Color[] colors = { Color.GREEN, Color.BLUE, Color.RED };
}
+class ClassWithByteArray implements Serializable {
+ private static final long serialVersionUID = 1L;
+ public byte[] myArray = new byte[]{1,3,7,11};
+}
+
enum Color {
BLUE("BLUE"), GREEN("GREEN"), RED("RED"), UNKNOWN("UNKNOWN");
private final String value;
@@ -43,13 +64,13 @@ class MyExceptionWhenDumping implements java.io.Serializable {
protected static class MyException extends java.io.IOException {
/**
- *
+ *
*/
private static final long serialVersionUID = 1L;
}
/**
- *
+ *
*/
private static final long serialVersionUID = 1L;;
@@ -81,7 +102,7 @@ public static class A1 implements Serializable {
public static class B1 implements Serializable {
/**
- *
+ *
*/
private static final long serialVersionUID = 1L;
Hashtable