From c580643b3309de75ad83cc8ad58f0c90347160f8 Mon Sep 17 00:00:00 2001 From: domfournier Date: Tue, 2 Jun 2026 10:43:52 -0400 Subject: [PATCH 1/7] Allow to set page_size for outgoing workspace. Pass compression to entity creation --- omf/fileio/fileio.py | 26 +++-- omf/fileio/geoh5.py | 168 ++++++++++++++++++++------------- tests/conftest.py | 14 +-- tests/convert_curve_test.py | 2 +- tests/convert_grid2d_test.py | 2 +- tests/convert_pointset_test.py | 2 +- tests/convert_project_test.py | 10 +- tests/convert_surface_test.py | 2 +- tests/convert_volume_test.py | 4 +- 9 files changed, 133 insertions(+), 97 deletions(-) diff --git a/omf/fileio/fileio.py b/omf/fileio/fileio.py index 5d6763a..69e22e4 100644 --- a/omf/fileio/fileio.py +++ b/omf/fileio/fileio.py @@ -18,7 +18,7 @@ from omf.base import UidModel from omf.fileio.geoh5 import GeoH5Writer - +from geoh5py.shared.utils import DEFAULT_PAGE_SIZE __version__ = b"OMF-v0.9.0" @@ -47,24 +47,20 @@ class OMFWriter: in the binary blob. """ - def __init__(self, project: UidModel, fname: str, compression: int = 5): + def __init__(self, project: UidModel, fname: str): """Project serialization is performed on OMFWriter init Binary data is written during project serialization """ - - if fname.endswith(".geoh5"): - GeoH5Writer(project, fname, compression=compression) - else: - if not fname.endswith(".omf"): - fname = fname + ".omf" - - self.fname = fname - with open(fname, "wb") as fopen: - self.initialize_header(fopen, project.uid) - self.project_json = project.serialize(open_file=fopen) - self.update_header(fopen) - fopen.write(json.dumps(self.project_json).encode("utf-8")) + if not fname.endswith(".omf"): + fname = fname + ".omf" + + self.fname = fname + with open(fname, "wb") as fopen: + self.initialize_header(fopen, project.uid) + self.project_json = project.serialize(open_file=fopen) + self.update_header(fopen) + fopen.write(json.dumps(self.project_json).encode("utf-8")) @staticmethod def initialize_header(fopen, uid): diff --git a/omf/fileio/geoh5.py b/omf/fileio/geoh5.py index 82d49fb..71dc7db 100644 --- a/omf/fileio/geoh5.py +++ b/omf/fileio/geoh5.py @@ -31,9 +31,10 @@ ReferencedData, VisualParameters, ) -from geoh5py.groups import ContainerGroup, PropertyGroup, RootGroup +from geoh5py.groups import ContainerGroup, RootGroup from geoh5py.objects import BlockModel, Curve, Grid2D, ObjectBase, Points, Surface from geoh5py.shared import FLOAT_NDV, INTEGER_NDV, Entity +from geoh5py.shared.utils import DEFAULT_PAGE_SIZE from geoh5py.workspace import Workspace from omf.base import ContentModel, Project, UidModel @@ -84,14 +85,16 @@ class GeoH5Writer: # pylint: disable=too-few-public-methods def __init__( self, element: UidModel, - file_name: str | Path, + file_name: str | Path | Workspace, compression: int = 5, + page_size: int = DEFAULT_PAGE_SIZE, ): if not isinstance(file_name, str | Path): raise TypeError("Input 'file' must be of str or Path.") - self.file = file_name self.compression = compression + self.page_size = page_size + self.file = self.validate_geoh5_file(file_name) self.entity = element self.element = element @@ -102,16 +105,35 @@ def entity(self) -> Entity: @entity.setter def entity(self, element: UidModel): - converter = get_conversion_map(element, self.file, self.compression) - self._entity = converter.from_omf(element) + with fetch_active_workspace(self.file) as workspace: + if isinstance(element, Project): + converter = ProjectConversion(element, workspace, self.compression) + else: + converter = get_conversion_map(element, workspace, self.compression) + + self._entity = converter.from_omf(element) def __call__(self): return self.entity.workspace + def validate_geoh5_file(self, file: str | Path) -> Path: + if not isinstance(file, str | Path): + raise TypeError("Input 'file' must be of str or Path.") + + file_path = Path(file) + if not file_path.exists(): + h5file = Workspace.create(file, page_size=self.page_size) + h5file.close() + + if file_path.suffix != ".geoh5": + raise ValueError("Input 'file' must be a '.geoh5' file.") + + return file_path + def get_conversion_map( element: UidModel | Entity, - workspace: str | Path | Workspace, + workspace: Workspace, compression: int = 5, parent=None, ): @@ -119,7 +141,7 @@ def get_conversion_map( Utility method to get the appropriate conversion class is it exists. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param compression: Compression level for data. :param parent: Optional parent object used for conversion. @@ -149,9 +171,9 @@ class GeoH5Reader: # pylint: disable=too-few-public-methods """ def __init__(self, file_name: str | Path): + self.file = file_name with Workspace(file_name, mode="r") as workspace: - self.file = workspace - converter = ProjectConversion(workspace.root, self.file) + converter = ProjectConversion(workspace.root, workspace) self.project = converter.from_geoh5(workspace.root) def __call__(self) -> Project: @@ -163,7 +185,7 @@ class BaseConversion(ABC): Base conversion between OMF and geoh5 format. :param element: Either an omf or geoh5 class. - :param geoh5: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param geoh5: An active :obj:`geoh5py.workspace.Workspace`. :param compression: Compression level for data. :param parent: (Optional) Parental object """ @@ -178,27 +200,28 @@ class BaseConversion(ABC): def __init__( self, element: UidModel | Entity, - geoh5: str | Path | Workspace, + geoh5: Workspace, compression: int = 5, parent=None, ): if element is None: raise ValueError("Input 'element' cannot be None.") - self._geoh5 = None self.geoh5 = geoh5 self.compression = compression self._parent = parent @property - def geoh5(self) -> Workspace | str | Path: + def geoh5(self) -> Workspace: if self._geoh5 is None: raise ValueError("Input 'geoh5' cannot be None.") return self._geoh5 @geoh5.setter def geoh5(self, val): + if not isinstance(val, Workspace): + raise ValueError("Input 'geoh5' must be a Workspace.") self._geoh5 = val @abstractmethod @@ -213,7 +236,7 @@ def from_geoh5(self, entity, **kwargs) -> dict: def process_dependents( element: UidModel | Entity, parent: Entity | None, - workspace: str | Path | Workspace, + workspace: Workspace, compression: int, ) -> list: """ @@ -221,7 +244,7 @@ def process_dependents( :param element: Either an omf or geoh5 class. :param parent: Parental omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param compression: Compression level for data. :return: List of children UiDModel or Entity objects. @@ -275,20 +298,20 @@ def process_dependents( def collect_attributes( self, element: UidModel | Entity, - workspace: str | Workspace | Path, + workspace: Workspace, **kwargs, ) -> dict: """ Collect and convert attributes needed to construct an omf or geoh5 object. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace` class. :param kwargs: Input dictionary of attributes to be appended. :return: Updated arguments. """ - with fetch_h5_handle(workspace): + with fetch_active_workspace(workspace): for key, alias in self._attribute_map.items(): if inspect.isclass(alias) and issubclass(alias, BaseConversion): conversion = alias( # pylint: disable=not-callable @@ -344,7 +367,7 @@ def from_omf(self, element: Entity, **kwargs) -> Data | list[Data]: # type: ign :returns: :obj:`geoh5.data.Data` entity. """ - with fetch_h5_handle(self.geoh5) as workspace: + with fetch_active_workspace(self.geoh5) as workspace: kwargs = self.collect_attributes(element, workspace, **kwargs) parent = kwargs.pop("parent", None) @@ -378,7 +401,7 @@ def from_geoh5(self, entity: Data, **kwargs) -> UidModel | list: # type: ignore :returns: OMF data object. """ - with fetch_h5_handle(self.geoh5) as workspace: + with fetch_active_workspace(self.geoh5) as workspace: kwargs = self.collect_attributes(entity, workspace, **kwargs) uid = kwargs.pop("uid") @@ -403,7 +426,7 @@ class ContainerGroupConversion(BaseConversion): OMF project. :param obj: Either an omf or geoh5 class. - :param geoh5: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param geoh5: An active :obj:`geoh5py.workspace.Workspace`. :param compression: Compression level for data. """ @@ -415,7 +438,7 @@ class ContainerGroupConversion(BaseConversion): def __init__( self, obj: UidModel | Entity, - geoh5: str | Path | Workspace, + geoh5: Workspace, compression: int, **kwargs, ): @@ -433,7 +456,7 @@ def from_geoh5(self, entity: ObjectBase, **kwargs) -> UidModel: # type: ignore :returns: An OMF Element. """ - with fetch_h5_handle(self.geoh5) as workspace: + with fetch_active_workspace(self.geoh5) as workspace: return self.process_dependents( entity, None, @@ -448,7 +471,7 @@ class ElementConversion(BaseConversion): :obj:`geoh5py.objects.Points` :param obj: Either an omf or geoh5 class. - :param geoh5: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param geoh5: An active :obj:`geoh5py.workspace.Workspace`. :param compression: Compression level for data. """ @@ -460,7 +483,7 @@ class ElementConversion(BaseConversion): def __init__( self, obj: UidModel | Entity, - geoh5: str | Path | Workspace, + geoh5: Workspace, compression: int, **kwargs, ): @@ -478,14 +501,16 @@ def from_omf(self, element: PointSetElement, **kwargs) -> Entity | None: # type :returns: :obj:`geoh5.objects` class. """ - with fetch_h5_handle(self.geoh5) as workspace: + with fetch_active_workspace(self.geoh5) as workspace: try: kwargs = self.collect_attributes(element, workspace, **kwargs) except OMFtoGeoh5NotImplemented as error: _logger.warning(str(error)) return None - entity = workspace.create_entity(self.geoh5_type, **{"entity": kwargs}) # type: ignore + entity = workspace.create_entity( + self.geoh5_type, compression=self.compression, entity=kwargs + ) if entity is not None: self.process_dependents(element, entity, workspace, self.compression) @@ -500,7 +525,7 @@ def from_geoh5(self, entity: ObjectBase, **kwargs) -> UidModel: # type: ignore :returns: An OMF Element. """ - with fetch_h5_handle(self.geoh5) as workspace: + with fetch_active_workspace(self.geoh5) as workspace: kwargs = self.collect_attributes(entity, workspace, **kwargs) uid = kwargs.pop("uid") element = self.omf_type(**kwargs) @@ -532,7 +557,19 @@ class ProjectConversion(BaseConversion): "revision": "version", } - def from_omf(self, element: Project, **kwargs) -> Entity | PropertyGroup | None: # type: ignore + def __init__( + self, + element: UidModel | Entity, + geoh5: Workspace, + compression: int = 5, + parent=None, + page_size: int = DEFAULT_PAGE_SIZE, + ): + + super().__init__(element, geoh5, compression, parent=parent) + self.page_size = page_size + + def from_omf(self, element: Project, **kwargs) -> Entity: # type: ignore """ Convert omf project to geoh5 root. @@ -541,7 +578,7 @@ def from_omf(self, element: Project, **kwargs) -> Entity | PropertyGroup | None: :return: Workspace root group. """ - with fetch_h5_handle(self.geoh5) as workspace: + with fetch_active_workspace(self.geoh5, page_size=self.page_size) as workspace: kwargs = self.collect_attributes(element, workspace, **kwargs) root = workspace.root @@ -561,7 +598,7 @@ def from_geoh5(self, entity: RootGroup, **kwargs) -> Project: # type: ignore :return: OMF project. """ - with fetch_h5_handle(self.geoh5) as workspace: + with fetch_active_workspace(self.geoh5) as workspace: kwargs = self.collect_attributes(entity, workspace, **kwargs) uid = kwargs.pop("uid") project = self.omf_type(**kwargs) @@ -613,18 +650,18 @@ def from_geoh5(self, entity: Entity, **kwargs) -> dict: return kwargs def collect_attributes( - self, element: UidModel | Entity, workspace: str | Workspace | Path, **kwargs + self, element: UidModel | Entity, workspace: Workspace, **kwargs ) -> dict: """ Collect and convert attributes needed to construct an omf or geoh5 object. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param kwargs: Input dictionary of attributes to be appended. :return: Updated arguments. """ - with fetch_h5_handle(workspace): + with fetch_active_workspace(workspace): if isinstance(element, UidModel): values = element.array.array ndvs = np.isnan(values) @@ -642,7 +679,7 @@ def collect_attributes( if values is None and isinstance(element, NumericData): dtype = DataTypeEnum[element.entity_type.primitive_type.name].value - values = np.ones(element.n_values, dtype=dtype) * element.ndv + values = np.full(element.n_values, element.ndv, dtype=dtype) if np.issubdtype(values.dtype, np.floating): values[np.isclose(values, FLOAT_NDV, atol=2e-45)] = np.nan @@ -674,26 +711,26 @@ class IndicesConversion(ArrayConversion): def collect_attributes( self, element: UidModel | Entity, - workspace: str | Workspace | Path, + workspace: Workspace, **kwargs, ) -> dict: """ Collect and convert attributes needed to construct an omf or geoh5 object. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param kwargs: Input dictionary of attributes to be appended. :return: Updated arguments. """ - with fetch_h5_handle(workspace): + with fetch_active_workspace(workspace): if isinstance(element, UidModel): values = element.array.array else: values = getattr(element, "values", None) if values is None and isinstance(element, NumericData): - values = np.ones(element.n_values, dtype=np.int32) * INTEGER_NDV + values = np.full(element.n_values, INTEGER_NDV, dtype=np.int32) values[np.isclose(values, INTEGER_NDV)] = 0 @@ -731,14 +768,14 @@ class ReferenceMapConversion(ArrayConversion): def collect_attributes( # type: ignore self, element: MappedData | ReferencedData, - workspace: str | Workspace | Path, + workspace: Workspace, **kwargs, ) -> dict: """ Collect and convert attributes needed to construct an omf or geoh5 object. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param kwargs: Input dictionary of attributes to be appended. :return: Updated arguments. @@ -782,9 +819,9 @@ def collect_omf_attributes(element: MappedData, **kwargs) -> dict: @staticmethod def collect_h5_attributes( - element: ReferencedData, workspace: str | Workspace | Path, **kwargs + element: ReferencedData, workspace: Workspace, **kwargs ) -> dict: - with fetch_h5_handle(workspace): + with fetch_active_workspace(workspace): if element.value_map is None: return kwargs @@ -842,13 +879,13 @@ class ColormapConversion(ArrayConversion): _attribute_map: dict = {"colormap": "color_map"} def collect_attributes( - self, element: UidModel | Entity, workspace: str | Workspace | Path, **kwargs + self, element: UidModel | Entity, workspace: Workspace, **kwargs ): """ Collect and convert attributes needed to construct an omf or geoh5 object. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param kwargs: Input dictionary of attributes to be appended. :return: Updated arguments. @@ -873,9 +910,9 @@ def collect_omf_attributes(element: UidModel, **kwargs) -> dict: @staticmethod def collect_h5_attributes( - element: UidModel | Entity, workspace: str | Workspace | Path, **kwargs + element: UidModel | Entity, workspace: Workspace, **kwargs ) -> dict: - with fetch_h5_handle(workspace): + with fetch_active_workspace(workspace): if getattr(element.entity_type, "color_map", None) is not None: cmap = element.entity_type.color_map # type: ignore ind = np.argsort(cmap.values[0, :]) @@ -943,13 +980,13 @@ def from_geoh5(self, entity: Entity, **kwargs) -> dict: return kwargs def collect_attributes( - self, element: UidModel | Entity, workspace: str | Workspace | Path, **kwargs + self, element: UidModel | Entity, workspace: Workspace, **kwargs ) -> dict: """ Collect and convert attributes needed to construct an omf or geoh5 object. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param kwargs: Input dictionary of attributes to be appended. :return: Updated arguments. @@ -958,7 +995,7 @@ def collect_attributes( for key, alias in self._attribute_map.items(): kwargs[alias] = np.vstack(getattr(element.geometry, key)) else: - with fetch_h5_handle(workspace): + with fetch_active_workspace(workspace): geometry = self.omf_type( **{ key: getattr(element, alias) @@ -1019,14 +1056,14 @@ class SurfaceGridGeometryConversion(BaseGeometryConversion): def collect_attributes( # type: ignore self, element: SurfaceGridGeometry | Grid2D, - workspace: str | Workspace | Path, + workspace: Workspace, **kwargs, ) -> dict: """ Collect and convert attributes needed to construct an omf or geoh5 object. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param kwargs: Input dictionary of attributes to be appended. :return: Updated arguments. @@ -1084,16 +1121,16 @@ def collect_omf_attributes(cls, element: SurfaceGridGeometry, **kwargs) -> dict: @classmethod def collect_h5_attributes( - cls, entity: Grid2D, workspace: str | Workspace | Path, **kwargs + cls, entity: Grid2D, workspace: Workspace, **kwargs ) -> dict: - with fetch_h5_handle(workspace): + with fetch_active_workspace(workspace): geometry = {} for key, alias in cls._attribute_map.items(): cell_size, count = ( getattr(entity, f"{alias}_cell_size"), getattr(entity, f"{alias}_count"), ) - tensor = np.ones(count) * np.abs(cell_size) + tensor = np.full(count, np.abs(cell_size)) geometry.update({f"tensor_{key}": tensor}) if entity.rotation is not None or entity.dip is not None: @@ -1121,7 +1158,7 @@ class VolumeGridGeometryConversion(BaseGeometryConversion): :obj:`geoh5py.objects.BlockModel` attributes. :param obj: Either an omf or geoh5 class. - :param geoh5: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param geoh5: An active :obj:`geoh5py.workspace.Workspace`. :param compression: Compression level for data. :param parent: (Optional) Parental object """ @@ -1133,7 +1170,7 @@ class VolumeGridGeometryConversion(BaseGeometryConversion): def __init__( self, obj: UidModel | Entity, - geoh5: str | Path | Workspace, + geoh5: Workspace, compression: int, parent=None, ): @@ -1142,14 +1179,14 @@ def __init__( def collect_attributes( # type: ignore self, element: VolumeGridGeometry | BlockModel, - workspace: str | Workspace | Path, + workspace: Workspace, **kwargs, ): """ Collect and convert attributes needed to construct an omf or geoh5 object. :param element: Either an omf or geoh5 class. - :param workspace: Path to a geoh5 or active :obj:`geoh5py.workspace.Workspace`. + :param workspace: An active :obj:`geoh5py.workspace.Workspace`. :param kwargs: Input dictionary of attributes to be appended. :return: Updated arguments. @@ -1183,9 +1220,9 @@ def collect_omf_attributes(cls, element: VolumeElement, **kwargs) -> dict: @classmethod def collect_h5_attributes( - cls, entity: Entity, workspace: str | Workspace | Path, **kwargs + cls, entity: Entity, workspace: Workspace, **kwargs ) -> dict: - with fetch_h5_handle(workspace): + with fetch_active_workspace(workspace): geometry = {} axis = [] for key, alias in cls._attribute_map.items(): @@ -1294,8 +1331,10 @@ def rotation_opt(azimuth: float, dip: float): @contextmanager -def fetch_h5_handle( - file: str | Workspace | Path, mode: str = "a" +def fetch_active_workspace( + file: str | Workspace | Path, + mode: str = "a", + page_size: int = DEFAULT_PAGE_SIZE, ) -> Generator[Workspace, None, None]: """ Open in read+ mode a geoh5 file from string. @@ -1303,6 +1342,7 @@ def fetch_h5_handle( :param file: Name or handle to a geoh5 file. :param mode: Set the h5 read/write mode + :param page_size: Set the h5 page buffer size in bytes. :return h5py.File: Handle to an opened h5py file. """ @@ -1317,7 +1357,7 @@ def fetch_h5_handle( raise ValueError("Input h5 file must have a 'geoh5' extension.") if not file_path.exists(): - h5file = Workspace.create(file) + h5file = Workspace.create(file, page_size=page_size) else: h5file = Workspace(file, mode=mode) diff --git a/tests/conftest.py b/tests/conftest.py index 7f5f5e8..b7da979 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,14 +28,14 @@ def random_project() -> omf.Project: pts = omf.PointSetElement( name="Random Points", description="Just random points", - geometry=omf.PointSetGeometry(vertices=np.random.rand(100, 3)), + geometry=omf.PointSetGeometry(vertices=np.random.rand(50000, 3)), data=[ omf.ScalarData( - name="rand data", array=np.random.rand(100), location="vertices" + name="rand data", array=np.random.rand(50000), location="vertices" ), omf.ScalarData( name="More rand data", - array=np.random.rand(100), + array=np.random.rand(50000), location="vertices", ), ], @@ -61,13 +61,13 @@ def random_project() -> omf.Project: lin = omf.LineSetElement( name="Random Line", geometry=omf.LineSetGeometry( - vertices=np.random.rand(100, 3), + vertices=np.random.rand(50000, 3), segments=np.floor(np.random.rand(50, 2) * 100).astype(int), ), data=[ omf.ScalarData( name="rand vert data", - array=np.random.rand(100), + array=np.random.rand(50000), location="vertices", ), omf.ScalarData( @@ -82,13 +82,13 @@ def random_project() -> omf.Project: surf = omf.SurfaceElement( name="trisurf", geometry=omf.SurfaceGeometry( - vertices=np.random.rand(100, 3), + vertices=np.random.rand(50000, 3), triangles=np.floor(np.random.rand(50, 3) * 100).astype(int), ), data=[ omf.ScalarData( name="rand vert data", - array=np.random.rand(100), + array=np.random.rand(50000), location="vertices", ), omf.ScalarData( diff --git a/tests/convert_curve_test.py b/tests/convert_curve_test.py index 883e5bc..9212861 100644 --- a/tests/convert_curve_test.py +++ b/tests/convert_curve_test.py @@ -37,7 +37,7 @@ def test_lineset_to_geoh5(tmp_path: Path): color="#0000FF", ) file = str(tmp_path / "lineset.geoh5") - omf.OMFWriter(line, file) + omf.GeoH5Writer(line, file) with Workspace(file) as workspace: curve = workspace.get_entity("Random Line")[0] diff --git a/tests/convert_grid2d_test.py b/tests/convert_grid2d_test.py index 2d0fd81..a13fa0f 100644 --- a/tests/convert_grid2d_test.py +++ b/tests/convert_grid2d_test.py @@ -50,7 +50,7 @@ def test_grid2d_to_geoh5(tmp_path: Path, caplog): ) file = str(tmp_path / "grid2d.geoh5") - omf.OMFWriter(grid, file) + omf.GeoH5Writer(grid, file) warning_records = [r for r in caplog.records if r.levelno == logging.WARNING] assert len(warning_records) == 1 diff --git a/tests/convert_pointset_test.py b/tests/convert_pointset_test.py index 4a2d048..4786a73 100644 --- a/tests/convert_pointset_test.py +++ b/tests/convert_pointset_test.py @@ -47,7 +47,7 @@ def test_pointset_to_geoh5(tmp_path: Path, caplog): ) file = str(tmp_path / "pointset.geoh5") - omf.OMFWriter(orig_pts, file) + omf.GeoH5Writer(orig_pts, file) # Check that the file was created with Workspace(file) as workspace: diff --git a/tests/convert_project_test.py b/tests/convert_project_test.py index 9802b64..ff2ea1e 100644 --- a/tests/convert_project_test.py +++ b/tests/convert_project_test.py @@ -24,7 +24,7 @@ def test_project_to_geoh5(random_project: omf.Project, tmp_path: Path, caplog): """Test pointset geometry validation""" file = str(tmp_path / "project.geoh5") - omf.OMFWriter(random_project, file) + omf.GeoH5Writer(random_project, file) warning_records = [r for r in caplog.records if r.levelno == logging.WARNING] assert len(warning_records) == 1 @@ -42,9 +42,9 @@ def test_project_compression(random_project: omf.Project, tmp_path: Path): file_med_comp = str(tmp_path / "project_med_comp.geoh5") file_high_comp = str(tmp_path / "project_high_comp.geoh5") - omf.OMFWriter(random_project, file_low_comp, compression=1) - omf.OMFWriter(random_project, file_med_comp, compression=5) - omf.OMFWriter(random_project, file_high_comp, compression=9) + omf.GeoH5Writer(random_project, file_low_comp, compression=1, page_size=512) + omf.GeoH5Writer(random_project, file_med_comp, compression=5, page_size=512) + omf.GeoH5Writer(random_project, file_high_comp, compression=9, page_size=512) size_low_comp = os.stat(file_low_comp).st_size size_med_comp = os.stat(file_med_comp).st_size @@ -59,7 +59,7 @@ def test_container_group(random_project: omf.Project, tmp_path: Path): """Test that a container group is flatten in the omf file.""" file = str(tmp_path / f"{__name__}.geoh5") - omf.OMFWriter(random_project, file) + omf.GeoH5Writer(random_project, file) with Workspace(tmp_path / f"{__name__}.geoh5") as ws: group = ContainerGroup.create(ws, name="Test Group") for obj in ws.objects: diff --git a/tests/convert_surface_test.py b/tests/convert_surface_test.py index d978f05..156318a 100644 --- a/tests/convert_surface_test.py +++ b/tests/convert_surface_test.py @@ -37,7 +37,7 @@ def test_surface_to_geoh5(tmp_path: Path): color=[100, 200, 200], ) file = str(tmp_path / "surface.geoh5") - omf.OMFWriter(surf, file) + omf.GeoH5Writer(surf, file) with Workspace(file) as workspace: geoh5_surf = workspace.get_entity("trisurf")[0] diff --git a/tests/convert_volume_test.py b/tests/convert_volume_test.py index e0d04dc..9e3f230 100644 --- a/tests/convert_volume_test.py +++ b/tests/convert_volume_test.py @@ -85,7 +85,7 @@ def test_volume_to_geoh5(tmp_path: Path): ) file = str(tmp_path / f"{__name__}.geoh5") - omf.OMFWriter(vol, file) + omf.GeoH5Writer(vol, file) with Workspace(file) as workspace: block_model = workspace.get_entity("vol")[0] @@ -131,7 +131,7 @@ def test_volume_flip_origin_z(tmp_path): ) file = str(tmp_path / f"{__name__}.geoh5") - omf.OMFWriter(vol, file) + omf.GeoH5Writer(vol, file) with Workspace(file) as workspace: block_model = workspace.get_entity("vol")[0] From 5810feed39f55bc4e890d818968ecd994ca389e8 Mon Sep 17 00:00:00 2001 From: domfournier Date: Tue, 2 Jun 2026 10:45:28 -0400 Subject: [PATCH 2/7] Temp relocking on geoh5py branch --- poetry.lock | 30 +++++++++++++++++------------- pyproject.toml | 4 ++-- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/poetry.lock b/poetry.lock index e484dce..355d704 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.4.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand. [[package]] name = "alabaster" @@ -364,22 +364,26 @@ files = [ [[package]] name = "geoh5py" -version = "0.13.0b1" +version = "0.0.0.dev0" description = "Python API for geoh5, an open file format for geoscientific data" optional = false -python-versions = "<3.15,>=3.12" +python-versions = ">=3.12,<3.15" groups = ["main"] -files = [ - {file = "geoh5py-0.13.0b1-py3-none-any.whl", hash = "sha256:d747f3da35d08d51a809736103832c477b242758341815904f603a4886a83397"}, - {file = "geoh5py-0.13.0b1.tar.gz", hash = "sha256:43c12da0e0a294b4d4e8c0dda47d6f842349be01d2fcf269e8dc4024ab9823f6"}, -] +files = [] +develop = false [package.dependencies] -h5py = ">=3.15.0,<3.16.0" -numpy = ">=2.4.0,<2.5.0" -Pillow = ">=12.2.0,<12.3.0" -psutil = ">=7.2.2,<7.3.0" -pydantic = ">=2.12.0,<2.13.0" +h5py = "~3.15.0" +numpy = "~2.4.0" +Pillow = "~12.2.0" +psutil = "~7.2.2" +pydantic = "~2.12.0" + +[package.source] +type = "git" +url = "https://github.com/MiraGeoscience/geoh5py.git" +reference = "GEOPY-2893" +resolved_reference = "319ff3a06c8bdae7fe043b03e74f63fb59c3f799" [[package]] name = "h5py" @@ -1502,4 +1506,4 @@ numpy = ">=1.7" [metadata] lock-version = "2.1" python-versions = ">=3.12,<3.15" -content-hash = "fb4c4c983079faee394d9002c6aa3d25ef5d4ddff9cf302edda7e0c9284949a5" +content-hash = "2d19c8ffcefe5d3b8d152591d3dd41e16d016f5df6ea52de03d03ba90fef0fc0" diff --git a/pyproject.toml b/pyproject.toml index 49d1711..4899546 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,8 +73,8 @@ standard-imghdr = "3.13.*" ## dependencies from Git repositories #------------------------------------ -geoh5py = {version = ">=0.13.0b1, 0.13.*", allow-prereleases = true} -#geoh5py = {git = "https://github.com/MiraGeoscience/geoh5py.git", rev = "develop"} +#geoh5py = {version = ">=0.13.0b1, 0.13.*", allow-prereleases = true} +geoh5py = {git = "https://github.com/MiraGeoscience/geoh5py.git", rev = "GEOPY-2893"} #geoh5py = {path = "../geoh5py", develop = true} [tool.poetry.group.dev.dependencies] From 537a702c2c37f18da4ae0f22bf7166cd27ef3869 Mon Sep 17 00:00:00 2001 From: domfournier Date: Tue, 2 Jun 2026 12:58:26 -0400 Subject: [PATCH 3/7] Remove unused imports --- omf/fileio/fileio.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/omf/fileio/fileio.py b/omf/fileio/fileio.py index 69e22e4..b59cbed 100644 --- a/omf/fileio/fileio.py +++ b/omf/fileio/fileio.py @@ -17,8 +17,6 @@ import uuid from omf.base import UidModel -from omf.fileio.geoh5 import GeoH5Writer -from geoh5py.shared.utils import DEFAULT_PAGE_SIZE __version__ = b"OMF-v0.9.0" From 1fd4a9716b5b369b01de8860ebeba6362dd001f1 Mon Sep 17 00:00:00 2001 From: domfournier Date: Tue, 2 Jun 2026 15:38:08 -0400 Subject: [PATCH 4/7] Copilot warnings --- omf/fileio/fileio.py | 14 +++++++++----- omf/fileio/geoh5.py | 9 +++++---- tests/conftest.py | 18 +++++++++--------- tests/convert_project_test.py | 2 +- tests/script_geoh5_to_omf_test.py | 2 +- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/omf/fileio/fileio.py b/omf/fileio/fileio.py index b59cbed..d939543 100644 --- a/omf/fileio/fileio.py +++ b/omf/fileio/fileio.py @@ -11,7 +11,7 @@ # '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' from __future__ import annotations - +from pathlib import Path import json import struct import uuid @@ -50,11 +50,15 @@ def __init__(self, project: UidModel, fname: str): Binary data is written during project serialization """ - if not fname.endswith(".omf"): - fname = fname + ".omf" + file_name = Path(fname) + if not file_name.suffix: + file_name.with_suffix(".omf") + + if file_name.suffix != ".omf": + raise ValueError("OMFWriter only supports .omf file extensions.") - self.fname = fname - with open(fname, "wb") as fopen: + self.fname = file_name + with open(file_name, "wb") as fopen: self.initialize_header(fopen, project.uid) self.project_json = project.serialize(open_file=fopen) self.update_header(fopen) diff --git a/omf/fileio/geoh5.py b/omf/fileio/geoh5.py index 71dc7db..48214f7 100644 --- a/omf/fileio/geoh5.py +++ b/omf/fileio/geoh5.py @@ -85,7 +85,7 @@ class GeoH5Writer: # pylint: disable=too-few-public-methods def __init__( self, element: UidModel, - file_name: str | Path | Workspace, + file_name: str | Path, compression: int = 5, page_size: int = DEFAULT_PAGE_SIZE, ): @@ -121,13 +121,14 @@ def validate_geoh5_file(self, file: str | Path) -> Path: raise TypeError("Input 'file' must be of str or Path.") file_path = Path(file) - if not file_path.exists(): - h5file = Workspace.create(file, page_size=self.page_size) - h5file.close() if file_path.suffix != ".geoh5": raise ValueError("Input 'file' must be a '.geoh5' file.") + if not file_path.exists(): + h5file = Workspace.create(file, page_size=self.page_size) + h5file.close() + return file_path diff --git a/tests/conftest.py b/tests/conftest.py index b7da979..eb13a51 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,17 +25,17 @@ def random_project() -> omf.Project: png_file_path = tests_dir.parent / "docs" / "images" / "PointSetGeometry.png" proj = omf.Project(name="Test project", description="Just some assorted elements") + vertices = np.repeat(np.arange(1000).reshape(-1, 1), 3, axis=1).astype(float) + values = np.arange(1000).astype(float) pts = omf.PointSetElement( name="Random Points", description="Just random points", - geometry=omf.PointSetGeometry(vertices=np.random.rand(50000, 3)), + geometry=omf.PointSetGeometry(vertices=vertices), data=[ - omf.ScalarData( - name="rand data", array=np.random.rand(50000), location="vertices" - ), + omf.ScalarData(name="rand data", array=values, location="vertices"), omf.ScalarData( name="More rand data", - array=np.random.rand(50000), + array=values, location="vertices", ), ], @@ -61,13 +61,13 @@ def random_project() -> omf.Project: lin = omf.LineSetElement( name="Random Line", geometry=omf.LineSetGeometry( - vertices=np.random.rand(50000, 3), + vertices=vertices, segments=np.floor(np.random.rand(50, 2) * 100).astype(int), ), data=[ omf.ScalarData( name="rand vert data", - array=np.random.rand(50000), + array=values, location="vertices", ), omf.ScalarData( @@ -82,13 +82,13 @@ def random_project() -> omf.Project: surf = omf.SurfaceElement( name="trisurf", geometry=omf.SurfaceGeometry( - vertices=np.random.rand(50000, 3), + vertices=vertices, triangles=np.floor(np.random.rand(50, 3) * 100).astype(int), ), data=[ omf.ScalarData( name="rand vert data", - array=np.random.rand(50000), + array=values, location="vertices", ), omf.ScalarData( diff --git a/tests/convert_project_test.py b/tests/convert_project_test.py index ff2ea1e..77fbeac 100644 --- a/tests/convert_project_test.py +++ b/tests/convert_project_test.py @@ -43,7 +43,7 @@ def test_project_compression(random_project: omf.Project, tmp_path: Path): file_high_comp = str(tmp_path / "project_high_comp.geoh5") omf.GeoH5Writer(random_project, file_low_comp, compression=1, page_size=512) - omf.GeoH5Writer(random_project, file_med_comp, compression=5, page_size=512) + omf.GeoH5Writer(random_project, file_med_comp, compression=3, page_size=512) omf.GeoH5Writer(random_project, file_high_comp, compression=9, page_size=512) size_low_comp = os.stat(file_low_comp).st_size diff --git a/tests/script_geoh5_to_omf_test.py b/tests/script_geoh5_to_omf_test.py index 429683c..bc0f1ba 100644 --- a/tests/script_geoh5_to_omf_test.py +++ b/tests/script_geoh5_to_omf_test.py @@ -39,7 +39,7 @@ def geoh5_input_path_fixture(request, tmp_path_factory) -> Path: ) file_path = tmp_path_factory.mktemp("input") / request.param - omf.OMFWriter(points, str(file_path)) + omf.GeoH5Writer(points, str(file_path)) return file_path From 8e9f1d81503b694febc326adc383451fcdaf5d9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Hensgen?= <24550538+sebhmg@users.noreply.github.com> Date: Wed, 3 Jun 2026 08:12:39 -0400 Subject: [PATCH 5/7] [GEOPY-2893] poetry update --- poetry.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index 355d704..e74c5e8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.4.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -364,7 +364,7 @@ files = [ [[package]] name = "geoh5py" -version = "0.0.0.dev0" +version = "0.13.0b2.dev13+b31f4f21" description = "Python API for geoh5, an open file format for geoscientific data" optional = false python-versions = ">=3.12,<3.15" @@ -383,7 +383,7 @@ pydantic = "~2.12.0" type = "git" url = "https://github.com/MiraGeoscience/geoh5py.git" reference = "GEOPY-2893" -resolved_reference = "319ff3a06c8bdae7fe043b03e74f63fb59c3f799" +resolved_reference = "b31f4f213367dee322687f69f76ce33c97acee08" [[package]] name = "h5py" @@ -440,14 +440,14 @@ numpy = ">=1.21.2" [[package]] name = "idna" -version = "3.17" +version = "3.18" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "idna-3.17-py3-none-any.whl", hash = "sha256:466e48829084efe2548012b855df21540b96f2e20e51bd124c851536556a592c"}, - {file = "idna-3.17.tar.gz", hash = "sha256:5eb0cb53bc467c12eadcf6de83163ad8527cec9416f44b9b61b19caedad2b87f"}, + {file = "idna-3.18-py3-none-any.whl", hash = "sha256:7f952cbe720b688055e3f87de14f5c3e5fdaa8bc3928985c4077ca689de849a2"}, + {file = "idna-3.18.tar.gz", hash = "sha256:ffb385a7e039654cef1ab9ef32c6fafe283c0c0467bba1d9029738ce4a14a848"}, ] [package.extras] @@ -1275,14 +1275,14 @@ files = [ [[package]] name = "snowballstemmer" -version = "3.1.0" +version = "3.1.1" description = "This package provides 36 stemmers for 34 languages generated from Snowball algorithms." optional = false python-versions = ">=3.3" groups = ["dev"] files = [ - {file = "snowballstemmer-3.1.0-py3-none-any.whl", hash = "sha256:17e6d1da216aa07db6dad37139ea70cf13c4b2e9a096f6e64a9648fc657d3154"}, - {file = "snowballstemmer-3.1.0.tar.gz", hash = "sha256:fd9e34526b23340cd23ffea6c9f9760974ecc2c2ac9e1d81401443ccdb2a801f"}, + {file = "snowballstemmer-3.1.1-py3-none-any.whl", hash = "sha256:7e207fa178741da09cdee59d3ecec3827ad5f92b1fc5c9ff3755b639f71f5752"}, + {file = "snowballstemmer-3.1.1.tar.gz", hash = "sha256:e07bbc54a0d798fe6010a12398422e62a8bfbba95c394fd0956ef58cb4d3e260"}, ] [[package]] From 3533d75c0839f84d3e610c4c96608816809e57c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Hensgen?= <24550538+sebhmg@users.noreply.github.com> Date: Wed, 3 Jun 2026 08:32:13 -0400 Subject: [PATCH 6/7] [GEOPY-2893] fix too many positional arguments --- omf/fileio/geoh5.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/omf/fileio/geoh5.py b/omf/fileio/geoh5.py index 48214f7..84d857d 100644 --- a/omf/fileio/geoh5.py +++ b/omf/fileio/geoh5.py @@ -107,9 +107,13 @@ def entity(self) -> Entity: def entity(self, element: UidModel): with fetch_active_workspace(self.file) as workspace: if isinstance(element, Project): - converter = ProjectConversion(element, workspace, self.compression) + converter = ProjectConversion( + element, workspace, compression=self.compression + ) else: - converter = get_conversion_map(element, workspace, self.compression) + converter = get_conversion_map( + element, workspace, compression=self.compression + ) self._entity = converter.from_omf(element) @@ -562,6 +566,7 @@ def __init__( self, element: UidModel | Entity, geoh5: Workspace, + *, compression: int = 5, parent=None, page_size: int = DEFAULT_PAGE_SIZE, From 1da76785290bdfce708912d8344e91ef74b5e4b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Hensgen?= <24550538+sebhmg@users.noreply.github.com> Date: Wed, 3 Jun 2026 22:25:23 -0400 Subject: [PATCH 7/7] [GEOPY-2893] poetry relock on geoh5py 0.13.0b2 --- poetry.lock | 28 ++++++++++++---------------- pyproject.toml | 6 +++--- recipe.yaml | 2 +- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/poetry.lock b/poetry.lock index e74c5e8..329cac7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -364,26 +364,22 @@ files = [ [[package]] name = "geoh5py" -version = "0.13.0b2.dev13+b31f4f21" +version = "0.13.0b2" description = "Python API for geoh5, an open file format for geoscientific data" optional = false -python-versions = ">=3.12,<3.15" +python-versions = "<3.15,>=3.12" groups = ["main"] -files = [] -develop = false +files = [ + {file = "geoh5py-0.13.0b2-py3-none-any.whl", hash = "sha256:89359d8d71027ea4a55b978ed8c2d6145fe67c4092afa2bbe7d90e6a0ceb19b7"}, + {file = "geoh5py-0.13.0b2.tar.gz", hash = "sha256:65942baea8e575331d6cc71398c6960edbcc10a696f51325af9bfe7de8735f00"}, +] [package.dependencies] -h5py = "~3.15.0" -numpy = "~2.4.0" -Pillow = "~12.2.0" -psutil = "~7.2.2" -pydantic = "~2.12.0" - -[package.source] -type = "git" -url = "https://github.com/MiraGeoscience/geoh5py.git" -reference = "GEOPY-2893" -resolved_reference = "b31f4f213367dee322687f69f76ce33c97acee08" +h5py = ">=3.15.0,<3.16.0" +numpy = ">=2.4.0,<2.5.0" +Pillow = ">=12.2.0,<12.3.0" +psutil = ">=7.2.2,<7.3.0" +pydantic = ">=2.12.0,<2.13.0" [[package]] name = "h5py" @@ -1506,4 +1502,4 @@ numpy = ">=1.7" [metadata] lock-version = "2.1" python-versions = ">=3.12,<3.15" -content-hash = "2d19c8ffcefe5d3b8d152591d3dd41e16d016f5df6ea52de03d03ba90fef0fc0" +content-hash = "5e9d990c173cca94105b0358a91d6d8f16b2fd4dfa41f0ff38f18b7d4277afde" diff --git a/pyproject.toml b/pyproject.toml index 4899546..4747b3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,9 +73,9 @@ standard-imghdr = "3.13.*" ## dependencies from Git repositories #------------------------------------ -#geoh5py = {version = ">=0.13.0b1, 0.13.*", allow-prereleases = true} -geoh5py = {git = "https://github.com/MiraGeoscience/geoh5py.git", rev = "GEOPY-2893"} -#geoh5py = {path = "../geoh5py", develop = true} +geoh5py = {version = ">=0.13.0b2, 0.13.*", allow-prereleases = true} +# geoh5py = {git = "https://github.com/MiraGeoscience/geoh5py.git", rev = "GEOPY-2893"} +# geoh5py = {path = "../geoh5py", develop = true} [tool.poetry.group.dev.dependencies] Pygments = "*" diff --git a/recipe.yaml b/recipe.yaml index 0749da8..01af070 100644 --- a/recipe.yaml +++ b/recipe.yaml @@ -36,7 +36,7 @@ requirements: run: - python >=${{ python_min }} # Mira packages - - geoh5py >=0.13.0b1, 0.13.* + - geoh5py >=0.13.0b2, 0.13.* # direct dependencies - numpy 2.4.* - properties 0.6.*