diff --git a/lib/src/rdswrapper.cpp b/lib/src/rdswrapper.cpp index a7f2ab8..28131d9 100644 --- a/lib/src/rdswrapper.cpp +++ b/lib/src/rdswrapper.cpp @@ -20,7 +20,6 @@ class RdsReader { std::string get_rtype() const { if (!ptr) throw std::runtime_error("Null pointer in 'get_rtype'."); - // py::print("arg::", static_cast(ptr->type())); switch (ptr->type()) { case rds2cpp::SEXPType::S4: return "S4"; case rds2cpp::SEXPType::INT: return "integer"; @@ -239,29 +238,220 @@ class RdaObject { } }; +// ---- writers ---- + +std::unique_ptr py_to_robject(const py::object& obj, std::vector& symbols); + +void add_names_attribute( + std::vector& attributes, + const py::list& names, + std::vector& symbols) +{ + auto svec = std::make_unique(); + for (size_t i = 0; i < py::len(names); ++i) { + auto item = names[i]; + if (item.is_none()) { + svec->data.emplace_back(); + } else { + svec->data.emplace_back(item.cast(), rds2cpp::StringEncoding::UTF8); + } + } + attributes.emplace_back( + rds2cpp::register_symbol("names", rds2cpp::StringEncoding::UTF8, symbols), + std::move(svec) + ); +} + +std::unique_ptr py_to_robject(const py::object& obj, std::vector& symbols) { + // None -> Null + if (obj.is_none()) { + return std::make_unique(); + } + + // numpy array + if (py::isinstance(obj)) { + auto arr = obj.cast(); + auto dtype = arr.dtype(); + + // bool arrays + if (dtype.is(py::dtype::of())) { + auto buf = arr.cast>(); + auto r = buf.unchecked<1>(); + auto vec = std::make_unique(); + + vec->data.reserve(r.shape(0)); + for (ssize_t i = 0; i < r.shape(0); ++i) { + vec->data.push_back(r(i) ? 1 : 0); + } + + return vec; + } + + // integer arrays + if (py::isinstance>(arr) || + py::isinstance>(arr) || + py::isinstance>(arr) || + py::isinstance>(arr)) { + auto buf = arr.cast>(); + auto r = buf.unchecked<1>(); + auto vec = std::make_unique(); + + vec->data.reserve(r.shape(0)); + for (ssize_t i = 0; i < r.shape(0); ++i) { + vec->data.push_back(r(i)); + } + + return vec; + } + + // float arrays + if (py::isinstance>(arr) || + py::isinstance>(arr)) { + auto buf = arr.cast>(); + auto r = buf.unchecked<1>(); + auto vec = std::make_unique(); + + vec->data.reserve(r.shape(0)); + for (ssize_t i = 0; i < r.shape(0); ++i) { + vec->data.push_back(r(i)); + } + return vec; + } + + throw std::runtime_error("Unsupported numpy dtype for RDS writing"); + } + + // dict -> GenericVector with names attribute + if (py::isinstance(obj)) { + auto d = obj.cast(); + auto gvec = std::make_unique(); + + py::list keys; + for (auto& item : d) { + keys.append(item.first); + gvec->data.push_back(py_to_robject(py::reinterpret_borrow(item.second), symbols)); + } + add_names_attribute(gvec->attributes, keys, symbols); + + return gvec; + } + + // list + if (py::isinstance(obj)) { + auto lst = obj.cast(); + if (py::len(lst) == 0) { + return std::make_unique(); + } + + // Check if all elements are strings (or None) -> StringVector + bool all_strings = true; + for (size_t i = 0; i < py::len(lst); ++i) { + auto item = lst[i]; + if (!item.is_none() && !py::isinstance(item)) { + all_strings = false; + break; + } + } + + if (all_strings) { + auto svec = std::make_unique(); + for (size_t i = 0; i < py::len(lst); ++i) { + auto item = lst[i]; + if (item.is_none()) { + svec->data.emplace_back(); + } else { + svec->data.emplace_back(item.cast(), rds2cpp::StringEncoding::UTF8); + } + } + + return svec; + } + + // Otherwise -> GenericVector + auto gvec = std::make_unique(); + for (size_t i = 0; i < py::len(lst); ++i) { + gvec->data.push_back(py_to_robject(lst[i].cast(), symbols)); + } + + return gvec; + } + + // bool check before int, since bool is a subclass of int + if (py::isinstance(obj)) { + auto vec = std::make_unique(); + vec->data.push_back(obj.cast() ? 1 : 0); + return vec; + } + + if (py::isinstance(obj)) { + auto vec = std::make_unique(); + vec->data.push_back(obj.cast()); + return vec; + } + + if (py::isinstance(obj)) { + auto vec = std::make_unique(); + vec->data.push_back(obj.cast()); + return vec; + } + + if (py::isinstance(obj)) { + auto svec = std::make_unique(); + svec->data.emplace_back(obj.cast(), rds2cpp::StringEncoding::UTF8); + return svec; + } + + throw std::runtime_error("Unsupported Python type for RDS writing: " + std::string(py::str(obj.get_type()))); +} + +void write_rds_file(const py::object& obj, const std::string& path) { + rds2cpp::RdsFile file_info; + file_info.object = py_to_robject(obj, file_info.symbols); + rds2cpp::WriteRdsOptions options; + rds2cpp::write_rds(file_info, path, options); +} + +void write_rda_file(const py::dict& objects, const std::string& path) { + rds2cpp::RdaFile file_info; + for (auto& item : objects) { + auto name = item.first.cast(); + auto sym = rds2cpp::register_symbol(name, rds2cpp::StringEncoding::UTF8, file_info.symbols); + auto value = py_to_robject(py::reinterpret_borrow(item.second), file_info.symbols); + file_info.objects.emplace_back(std::move(sym), std::move(value)); + } + rds2cpp::WriteRdaOptions options; + rds2cpp::write_rda(file_info, path, options); +} + PYBIND11_MODULE(lib_rds_parser, m) { py::register_exception(m, "RdsParserError"); py::class_(m, "RdsObject") - .def(py::init()) - .def("get_robject", &RdsObject::get_robject, py::return_value_policy::reference_internal); + .def(py::init()) + .def("get_robject", &RdsObject::get_robject, py::return_value_policy::reference_internal); py::class_(m, "RdaObject") - .def(py::init()) - .def("get_object_names", &RdaObject::get_object_names) - .def("get_object_count", &RdaObject::get_object_count) - .def("get_object_by_index", &RdaObject::get_object_by_index, py::return_value_policy::take_ownership, py::keep_alive<0, 1>()) - .def("get_object_by_name", &RdaObject::get_object_by_name, py::return_value_policy::take_ownership, py::keep_alive<0, 1>()); + .def(py::init()) + .def("get_object_names", &RdaObject::get_object_names) + .def("get_object_count", &RdaObject::get_object_count) + .def("get_object_by_index", &RdaObject::get_object_by_index, py::return_value_policy::take_ownership, py::keep_alive<0, 1>()) + .def("get_object_by_name", &RdaObject::get_object_by_name, py::return_value_policy::take_ownership, py::keep_alive<0, 1>()); py::class_(m, "RdsReader") - .def("get_rtype", &RdsReader::get_rtype) - .def("get_rsize", &RdsReader::get_rsize) - .def("get_numeric_data", &RdsReader::get_numeric_data) - .def("get_string_arr", &RdsReader::get_string_arr) - .def("get_attribute_names", &RdsReader::get_attribute_names) - .def("load_attribute_by_name", &RdsReader::load_attribute_by_name) - .def("load_vec_element", &RdsReader::load_vec_element) - .def("get_package_name", &RdsReader::get_package_name) - .def("get_class_name", &RdsReader::get_class_name) - .def("get_dimensions", &RdsReader::get_dimensions); + .def("get_rtype", &RdsReader::get_rtype) + .def("get_rsize", &RdsReader::get_rsize) + .def("get_numeric_data", &RdsReader::get_numeric_data) + .def("get_string_arr", &RdsReader::get_string_arr) + .def("get_attribute_names", &RdsReader::get_attribute_names) + .def("load_attribute_by_name", &RdsReader::load_attribute_by_name) + .def("load_vec_element", &RdsReader::load_vec_element) + .def("get_package_name", &RdsReader::get_package_name) + .def("get_class_name", &RdsReader::get_class_name) + .def("get_dimensions", &RdsReader::get_dimensions); + + m.def("write_rds", &write_rds_file, "Write a Python object to an RDS file", + py::arg("obj"), py::arg("path")); + + m.def("write_rda", &write_rda_file, "Write named Python objects to an RData file", + py::arg("objects"), py::arg("path")); } diff --git a/setup.cfg b/setup.cfg index 6de566b..4a9a6c8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,7 @@ package_dir = =src # Require a min/specific Python version (comma-separated conditions) -python_requires = >=3.9 +python_requires = >=3.10 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0. # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in @@ -50,7 +50,7 @@ python_requires = >=3.9 install_requires = importlib-metadata; python_version<"3.8" numpy - biocutils>=0.1.5 + biocutils>=0.4.1 [options.packages.find] where = src diff --git a/src/rds2py/__init__.py b/src/rds2py/__init__.py index 58dee0f..63be5d3 100644 --- a/src/rds2py/__init__.py +++ b/src/rds2py/__init__.py @@ -16,5 +16,5 @@ del version, PackageNotFoundError -from .generics import read_rds, read_rda -from .rdsutils import parse_rds, parse_rda +from .generics import read_rds, read_rda, save_rds +from .rdsutils import parse_rds, parse_rda, write_rds, write_rda diff --git a/src/rds2py/generics.py b/src/rds2py/generics.py index a448f26..1d86081 100644 --- a/src/rds2py/generics.py +++ b/src/rds2py/generics.py @@ -15,8 +15,9 @@ print(type(data)) """ +from functools import singledispatch from importlib import import_module -from typing import List, Optional +from typing import Any, List, Optional from warnings import warn from .rdsutils import get_class, parse_rda, parse_rds @@ -72,22 +73,6 @@ } -# @singledispatch -# def save_rds(x, path: str): -# """Save a Python object as RDS file. - -# Args: -# x: -# Object to save. - -# path: -# Path to save the object. -# """ -# raise NotImplementedError( -# f"No `save_rds` method implemented for '{type(x).__name__}' objects." -# ) - - def read_rds(path: str, **kwargs): """Read an RDS file and convert it to an appropriate Python object. @@ -177,3 +162,34 @@ def _dispatcher(robject: dict, **kwargs): ) return robject + + +@singledispatch +def save_rds(x: Any, path: Optional[str] = None): + """Save a Python object as RDS file. + + Args: + x: + Object to save. + + path: + Path to save the object. If ``None``, returns the converted representation. + """ + raise NotImplementedError(f"No `save_rds` method implemented for '{type(x).__name__}' objects.") + + +# Import all modules with save_rds registrations to ensure they are loaded +from . import ( # noqa: E402 + save_atomic, # noqa: F401 + save_compressed_list, # noqa: F401 + save_delayed_matrix, # noqa: F401 + save_dict, # noqa: F401 + save_factor, # noqa: F401 + save_frame, # noqa: F401 + save_granges, # noqa: F401 + save_mae, # noqa: F401 + save_matrix, # noqa: F401 + save_rle, # noqa: F401 + save_sce, # noqa: F401 + save_se, # noqa: F401 +) diff --git a/src/rds2py/rdsutils.py b/src/rds2py/rdsutils.py index 2359b43..08f58a0 100644 --- a/src/rds2py/rdsutils.py +++ b/src/rds2py/rdsutils.py @@ -4,8 +4,9 @@ information from parsed objects. """ -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional +from .lib_rds_parser import write_rda as _write_rda_native from .PyRdaReader import PyRdaParser from .PyRdsReader import PyRdsParser @@ -57,6 +58,39 @@ def parse_rda(path: str, objects: Optional[List[str]] = None) -> Dict[str, dict] return result +def write_rds(obj: Any, path: str) -> None: + """Write a Python object to RDS file. + + Args: + obj: + The Python object to write. + + path: + Output file path. + """ + from .generics import save_rds + + save_rds(obj, path) + + +def write_rda(objects: Dict[str, Any], path: str) -> None: + """Write multiple named Python objects to a gzip-compressed RData file. + + Each value is converted using :py:func:`~.write_rds`. + + Args: + objects: + Dictionary mapping variable names to Python objects. + + path: + Output file path. + """ + from .generics import save_rds + + converted = {str(k): save_rds(v) for k, v in objects.items()} + _write_rda_native(converted, path) + + def get_class(robj: dict) -> str: """Infer the R class name from a parsed RDS object. diff --git a/src/rds2py/save_atomic.py b/src/rds2py/save_atomic.py new file mode 100644 index 0000000..b170a5e --- /dev/null +++ b/src/rds2py/save_atomic.py @@ -0,0 +1,70 @@ +"""Functions for saving atomic R vector types from Python objects.""" + +from typing import Optional + +import numpy as np +from biocutils import BooleanList, FloatList, IntegerList, StringList + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +@save_rds.register(bool) +@save_rds.register(int) +@save_rds.register(float) +@save_rds.register(str) +@save_rds.register(type(None)) +def _save_rds_primitives(x, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + if path is not None: + _write_rds_native(x, path) + + return x + + +@save_rds.register(BooleanList) +def _save_rds_booleanlist(x: BooleanList, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = np.array(list(x), dtype=bool) + if path is not None: + _write_rds_native(converted, path) + + return converted + + +@save_rds.register(IntegerList) +def _save_rds_integerlist(x: IntegerList, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = np.array(list(x), dtype=np.int32) + if path is not None: + _write_rds_native(converted, path) + + return converted + + +@save_rds.register(FloatList) +def _save_rds_floatlist(x: FloatList, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = np.array(list(x), dtype=np.float64) + if path is not None: + _write_rds_native(converted, path) + + return converted + + +@save_rds.register(StringList) +def _save_rds_stringlist(x: StringList, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = list(x) + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_compressed_list.py b/src/rds2py/save_compressed_list.py new file mode 100644 index 0000000..fcc1666 --- /dev/null +++ b/src/rds2py/save_compressed_list.py @@ -0,0 +1,53 @@ +"""Functions and classes for parsing Compressed List data structures.""" + +from typing import Optional + +from biocutils.package_utils import is_package_installed + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + +if is_package_installed("compressed_lists", verbose=True): + from compressed_lists import CompressedList, Partitioning + + @save_rds.register(CompressedList) + def _save_rds_compressedlist(x: CompressedList, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "unlist_data": save_rds(_get(x, "unlist_data")), + "partitioning": save_rds(_get(x, "partitioning")), + "metadata": save_rds(_get(x, "metadata")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted + + @save_rds.register(Partitioning) + def _save_rds_partitioning(x: Partitioning, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "ends": save_rds(_get(x, "ends")), + "names": save_rds(_get(x, "names")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_delayed_matrix.py b/src/rds2py/save_delayed_matrix.py new file mode 100644 index 0000000..7e2874a --- /dev/null +++ b/src/rds2py/save_delayed_matrix.py @@ -0,0 +1,32 @@ +from typing import Optional + +from biocutils.package_utils import is_package_installed + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +if is_package_installed("delayedarray", verbose=True): + from delayedarray import DelayedArray + + @save_rds.register(DelayedArray) + def _save_rds_delayedarray(x: DelayedArray, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + + return getattr(obj, name, None) + + converted = { + "seed": save_rds(_get(x, "seed")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_dict.py b/src/rds2py/save_dict.py new file mode 100644 index 0000000..ffe2891 --- /dev/null +++ b/src/rds2py/save_dict.py @@ -0,0 +1,43 @@ +from typing import Optional + +from biocutils import NamedList + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +@save_rds.register(dict) +def _save_rds_dict(x: dict, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = {str(k): save_rds(v) for k, v in x.items()} + if path is not None: + _write_rds_native(converted, path) + + return converted + + +@save_rds.register(list) +@save_rds.register(tuple) +def _save_rds_list(x, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = [save_rds(v) for v in x] + if path is not None: + _write_rds_native(converted, path) + + return converted + + +@save_rds.register(NamedList) +def _save_rds_namedlist(x: NamedList, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = {str(k): save_rds(v) for k, v in x.items()} + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_factor.py b/src/rds2py/save_factor.py new file mode 100644 index 0000000..4cdf428 --- /dev/null +++ b/src/rds2py/save_factor.py @@ -0,0 +1,23 @@ +from typing import Optional + +from biocutils import Factor + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +@save_rds.register(Factor) +def _save_rds_factor(x: Factor, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = { + "levels": save_rds(x.get_levels()), + "data": save_rds(x.get_codes() + 1), + } + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_frame.py b/src/rds2py/save_frame.py new file mode 100644 index 0000000..68e8979 --- /dev/null +++ b/src/rds2py/save_frame.py @@ -0,0 +1,26 @@ +from typing import Optional + +from biocutils.package_utils import is_package_installed + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +if is_package_installed("biocframe", verbose=True): + from biocframe import BiocFrame + + @save_rds.register(BiocFrame) + def _save_rds_biocframe(x: BiocFrame, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + converted = {} + for col_name in x.column_names: + converted[col_name] = save_rds(x.column(col_name)) + + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_granges.py b/src/rds2py/save_granges.py new file mode 100644 index 0000000..660674c --- /dev/null +++ b/src/rds2py/save_granges.py @@ -0,0 +1,77 @@ +from typing import Optional + +from biocutils.package_utils import is_package_installed + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +if is_package_installed("genomicranges", verbose=True): + from genomicranges import CompressedGenomicRangesList, GenomicRanges, SeqInfo + + @save_rds.register(SeqInfo) + def _save_rds_seqinfo(x: SeqInfo, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "seqnames": save_rds(_get(x, "seqnames")), + "seqlengths": save_rds(_get(x, "seqlengths")), + "is_circular": save_rds(_get(x, "is_circular")), + "genome": save_rds(_get(x, "genome")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted + + @save_rds.register(GenomicRanges) + def _save_rds_genomicranges(x: GenomicRanges, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "seqnames": save_rds(_get(x, "seqnames")), + "ranges": save_rds(_get(x, "ranges")), + "strand": save_rds(_get(x, "strand")), + "seqinfo": save_rds(_get(x, "seqinfo")), + "mcols": save_rds(_get(x, "mcols")), + "metadata": save_rds(_get(x, "metadata")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted + + @save_rds.register(CompressedGenomicRangesList) + def _save_rds_cgrl(x: CompressedGenomicRangesList, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "unlist_data": save_rds(_get(x, "unlist_data")), + "partitioning": save_rds(_get(x, "partitioning")), + "metadata": save_rds(_get(x, "metadata")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_mae.py b/src/rds2py/save_mae.py new file mode 100644 index 0000000..14fd1ae --- /dev/null +++ b/src/rds2py/save_mae.py @@ -0,0 +1,34 @@ +from typing import Optional + +from biocutils.package_utils import is_package_installed + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +if is_package_installed("multiassayexperiment", verbose=True): + from multiassayexperiment import MultiAssayExperiment + + @save_rds.register(MultiAssayExperiment) + def _save_rds_mae(x: MultiAssayExperiment, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "experiments": save_rds(_get(x, "experiments")), + "col_data": save_rds(_get(x, "col_data")), + "sample_map": save_rds(_get(x, "sample_map")), + "metadata": save_rds(_get(x, "metadata")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_matrix.py b/src/rds2py/save_matrix.py new file mode 100644 index 0000000..4a76bc1 --- /dev/null +++ b/src/rds2py/save_matrix.py @@ -0,0 +1,20 @@ +from typing import Optional + +from numpy import ndarray + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +@save_rds.register(ndarray) +def _save_rds_ndarray(x: ndarray, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + x_flat = x.flatten(order="F") if x.ndim > 1 else x + if path is not None: + _write_rds_native(x_flat, path) + + return x_flat diff --git a/src/rds2py/save_rle.py b/src/rds2py/save_rle.py new file mode 100644 index 0000000..8195ee7 --- /dev/null +++ b/src/rds2py/save_rle.py @@ -0,0 +1,35 @@ +from typing import Optional + +from biocutils.package_utils import is_package_installed + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +if is_package_installed("iranges", verbose=True): + from iranges import IRanges + + @save_rds.register(IRanges) + def _save_rds_iranges(x: IRanges, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "start": save_rds(_get(x, "start")), + "width": save_rds(_get(x, "width")), + "names": save_rds(_get(x, "names")), + "mcols": save_rds(_get(x, "mcols")), + "metadata": save_rds(_get(x, "metadata")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_sce.py b/src/rds2py/save_sce.py new file mode 100644 index 0000000..0e28122 --- /dev/null +++ b/src/rds2py/save_sce.py @@ -0,0 +1,40 @@ +from typing import Optional + +from biocutils.package_utils import is_package_installed + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +if is_package_installed("singlecellexperiment", verbose=True): + from singlecellexperiment import SingleCellExperiment + + @save_rds.register(SingleCellExperiment) + def _save_rds_sce(x: SingleCellExperiment, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "assays": save_rds(_get(x, "assays")), + "row_data": save_rds(_get(x, "row_data")), + "column_data": save_rds(_get(x, "column_data")), + "row_ranges": save_rds(_get(x, "row_ranges")), + "metadata": save_rds(_get(x, "metadata")), + "reduced_dims": save_rds(_get(x, "reduced_dims")), + "main_experiment_name": save_rds(_get(x, "main_experiment_name")), + "alternative_experiments": save_rds(_get(x, "alternative_experiments")), + "row_pairs": save_rds(_get(x, "row_pairs")), + "column_pairs": save_rds(_get(x, "column_pairs")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/src/rds2py/save_se.py b/src/rds2py/save_se.py new file mode 100644 index 0000000..e743f27 --- /dev/null +++ b/src/rds2py/save_se.py @@ -0,0 +1,56 @@ +from typing import Optional + +from biocutils.package_utils import is_package_installed + +from .generics import save_rds + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +if is_package_installed("summarizedexperiment", verbose=True): + from summarizedexperiment import RangedSummarizedExperiment, SummarizedExperiment + + @save_rds.register(SummarizedExperiment) + def _save_rds_se(x: SummarizedExperiment, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "assays": save_rds(_get(x, "assays")), + "row_data": save_rds(_get(x, "row_data")), + "column_data": save_rds(_get(x, "column_data")), + "metadata": save_rds(_get(x, "metadata")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted + + @save_rds.register(RangedSummarizedExperiment) + def _save_rds_rse(x: RangedSummarizedExperiment, path: Optional[str] = None): + from .lib_rds_parser import write_rds as _write_rds_native + + def _get(obj, name): + if hasattr(obj, f"get_{name}"): + return getattr(obj, f"get_{name}")() + return getattr(obj, name, None) + + converted = { + "assays": save_rds(_get(x, "assays")), + "row_data": save_rds(_get(x, "row_data")), + "column_data": save_rds(_get(x, "column_data")), + "row_ranges": save_rds(_get(x, "row_ranges")), + "metadata": save_rds(_get(x, "metadata")), + } + + if path is not None: + _write_rds_native(converted, path) + + return converted diff --git a/tests/test_save_rds_complex.py b/tests/test_save_rds_complex.py new file mode 100644 index 0000000..b9c8675 --- /dev/null +++ b/tests/test_save_rds_complex.py @@ -0,0 +1,108 @@ +import os +import tempfile + +import numpy as np +from biocframe import BiocFrame +from genomicranges import GenomicRanges +from iranges import IRanges + +from rds2py import read_rds, save_rds, write_rds + + +def test_save_rds_genomicranges(): + try: + from iranges import IRanges + + ir = IRanges(start=[1, 2], width=[10, 20]) + except ImportError: + ir = None + if ir is None: + return + + gr = GenomicRanges(seqnames=["chr1", "chr2"], ranges=ir, strand=["+", "-"], mcols=BiocFrame({"score": [1.0, 2.0]})) + + res = save_rds(gr) + assert isinstance(res, dict) + assert "seqnames" in res + assert "ranges" in res + assert "strand" in res + assert "mcols" in res + + +from summarizedexperiment import SummarizedExperiment + + +def test_save_rds_summarizedexperiment(): + se = SummarizedExperiment( + assays={"counts": np.array([[1, 2], [3, 4]])}, + row_data=BiocFrame({"gene": ["g1", "g2"]}), + column_data=BiocFrame({"cell": ["c1", "c2"]}), + ) + + res = save_rds(se) + assert isinstance(res, dict) + assert "assays" in res + assert "row_data" in res + assert "column_data" in res + + +from singlecellexperiment import SingleCellExperiment + + +def test_save_rds_singlecellexperiment(): + sce = SingleCellExperiment( + assays={"counts": np.array([[1, 2], [3, 4]])}, reduced_dims={"PCA": np.array([[0.1, 0.2], [0.3, 0.4]])} + ) + + res = save_rds(sce) + assert isinstance(res, dict) + assert "reduced_dims" in res + assert "assays" in res + + +def test_roundtrip_genomicranges(): + gr = GenomicRanges( + seqnames=["chr1", "chr2"], + ranges=IRanges(start=[1, 2], width=[10, 20]), + strand=["+", "-"], + mcols=BiocFrame({"score": [1.0, 2.0]}), + ) + + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + + try: + write_rds(gr, path) + result = read_rds(path) + + # Complex objects are saved as GenericVectors with names, + # so they should be read back as dictionaries. + assert isinstance(result, dict) + assert "seqnames" in result + assert "ranges" in result + assert "strand" in result + assert "mcols" in result + finally: + os.unlink(path) + + +def test_roundtrip_summarizedexperiment(): + se = SummarizedExperiment( + assays={"counts": np.array([[1, 2], [3, 4]])}, + row_data=BiocFrame({"gene": ["g1", "g2"]}), + column_data=BiocFrame({"cell": ["c1", "c2"]}), + ) + + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + + try: + write_rds(se, path) + result = read_rds(path) + + assert isinstance(result, dict) + assert "assays" in result + assert "row_data" in result + assert "column_data" in result + finally: + os.unlink(path) diff --git a/tests/test_write.py b/tests/test_write.py new file mode 100644 index 0000000..dfca4f4 --- /dev/null +++ b/tests/test_write.py @@ -0,0 +1,337 @@ +import os +import tempfile + +import numpy as np + +from rds2py import read_rda, read_rds, write_rda, write_rds + + +class TestWriteRdsIntegers: + def test_integer_array(self): + data = np.array([1, 2, 3, 4, 5], dtype=np.int32) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + np.testing.assert_array_equal(np.array(list(result)), data) + finally: + os.unlink(path) + + def test_integer_scalar(self): + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(42, path) + result = read_rds(path) + assert list(result) == [42] + finally: + os.unlink(path) + + def test_int64_array(self): + data = np.array([10, 20, 30], dtype=np.int64) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + np.testing.assert_array_equal(np.array(list(result)), data.astype(np.int32)) + finally: + os.unlink(path) + + +class TestWriteRdsDoubles: + def test_double_array(self): + data = np.array([1.1, 2.2, 3.3], dtype=np.float64) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + np.testing.assert_array_almost_equal(np.array(list(result)), data) + finally: + os.unlink(path) + + def test_float_scalar(self): + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(3.14, path) + result = read_rds(path) + assert abs(list(result)[0] - 3.14) < 1e-10 + finally: + os.unlink(path) + + def test_float32_array(self): + data = np.array([1.5, 2.5], dtype=np.float32) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + np.testing.assert_array_almost_equal(np.array(list(result)), data, decimal=5) + finally: + os.unlink(path) + + +class TestWriteRdsBooleans: + def test_bool_array(self): + data = np.array([True, False, True], dtype=bool) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert list(result) == [True, False, True] + finally: + os.unlink(path) + + def test_bool_scalar(self): + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(True, path) + result = read_rds(path) + assert list(result) == [True] + finally: + os.unlink(path) + + +class TestWriteRdsStrings: + def test_string_list(self): + data = ["hello", "world", "foo"] + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert list(result) == data + finally: + os.unlink(path) + + def test_string_scalar(self): + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds("test", path) + result = read_rds(path) + assert list(result) == ["test"] + finally: + os.unlink(path) + + def test_unicode_strings(self): + data = ["α-globin", "fußball"] + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert list(result) == data + finally: + os.unlink(path) + + +class TestWriteRdsNull: + def test_none(self): + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(None, path) + result = read_rds(path) + assert result is None + finally: + os.unlink(path) + + +class TestWriteRdsDict: + def test_named_list(self): + data = {"a": np.array([1, 2, 3], dtype=np.int32), "b": ["x", "y"]} + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert isinstance(result, dict) + assert set(result.keys()) == {"a", "b"} + finally: + os.unlink(path) + + def test_nested_dict(self): + data = {"outer": {"inner_a": 1, "inner_b": 2.5}} + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert isinstance(result, dict) + assert "outer" in result + finally: + os.unlink(path) + + +class TestWriteRdsList: + def test_generic_list(self): + data = [np.array([1, 2], dtype=np.int32), ["a", "b"], 3.14] + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + # read back as raw parsed dict to verify structure + from rds2py.rdsutils import parse_rds + + result = parse_rds(path) + assert result["type"] == "vector" + assert len(result["data"]) == 3 + finally: + os.unlink(path) + + +class TestWriteRdsBiocUtilsLists: + def test_integer_list(self): + from biocutils import IntegerList + + data = IntegerList([1, 2, 3, 4, 5]) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert list(result) == [1, 2, 3, 4, 5] + finally: + os.unlink(path) + + def test_float_list(self): + from biocutils import FloatList + + data = FloatList([1.1, 2.2, 3.3]) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + np.testing.assert_array_almost_equal(list(result), [1.1, 2.2, 3.3]) + finally: + os.unlink(path) + + def test_string_list(self): + from biocutils import StringList + + data = StringList(["hello", "world"]) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert list(result) == ["hello", "world"] + finally: + os.unlink(path) + + def test_boolean_list(self): + from biocutils import BooleanList + + data = BooleanList([True, False, True]) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert list(result) == [True, False, True] + finally: + os.unlink(path) + + +class TestWriteRda: + def test_write_multiple_objects(self): + objects = { + "int_vec": np.array([1, 2, 3], dtype=np.int32), + "dbl_vec": np.array([1.1, 2.2, 3.3]), + "str_vec": ["a", "b", "c"], + "bool_vec": np.array([True, False], dtype=bool), + } + with tempfile.NamedTemporaryFile(suffix=".RData", delete=False) as f: + path = f.name + try: + write_rda(objects, path) + result = read_rda(path) + assert set(result.keys()) == {"int_vec", "dbl_vec", "str_vec", "bool_vec"} + finally: + os.unlink(path) + + def test_write_single_object(self): + objects = {"my_data": np.array([10, 20, 30], dtype=np.int32)} + with tempfile.NamedTemporaryFile(suffix=".RData", delete=False) as f: + path = f.name + try: + write_rda(objects, path) + result = read_rda(path) + assert "my_data" in result + np.testing.assert_array_equal(np.array(list(result["my_data"])), [10, 20, 30]) + finally: + os.unlink(path) + + def test_write_dict_object(self): + objects = {"my_list": {"x": 1, "y": 2.5, "z": ["a", "b"]}} + with tempfile.NamedTemporaryFile(suffix=".RData", delete=False) as f: + path = f.name + try: + write_rda(objects, path) + result = read_rda(path) + assert "my_list" in result + assert isinstance(result["my_list"], dict) + finally: + os.unlink(path) + + def test_write_none_object(self): + objects = {"empty": None, "data": np.array([1, 2], dtype=np.int32)} + with tempfile.NamedTemporaryFile(suffix=".RData", delete=False) as f: + path = f.name + try: + write_rda(objects, path) + result = read_rda(path) + assert "empty" in result + assert result["empty"] is None + assert "data" in result + finally: + os.unlink(path) + + +class TestWriteRdsRoundtripWithR: + """Verify written files can be read again.""" + + def test_roundtrip_integers(self): + data = np.array([10, 20, 30, 40, 50], dtype=np.int32) + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + values = [int(v) for v in result] + assert values == [10, 20, 30, 40, 50] + finally: + os.unlink(path) + + def test_roundtrip_strings(self): + data = ["hello", "world"] + with tempfile.NamedTemporaryFile(suffix=".rds", delete=False) as f: + path = f.name + try: + write_rds(data, path) + result = read_rds(path) + assert list(result) == ["hello", "world"] + finally: + os.unlink(path) + + def test_roundtrip_rda(self): + objects = { + "nums": np.array([1.5, 2.5, 3.5]), + "words": ["alpha", "beta"], + } + with tempfile.NamedTemporaryFile(suffix=".RData", delete=False) as f: + path = f.name + try: + write_rda(objects, path) + result = read_rda(path) + np.testing.assert_array_almost_equal(list(result["nums"]), [1.5, 2.5, 3.5]) + assert list(result["words"]) == ["alpha", "beta"] + finally: + os.unlink(path)