Source code for mth5.groups.reports

# -*- coding: utf-8 -*-
from __future__ import annotations


"""Reports group utilities for storing report and image artifacts in MTH5."""

from pathlib import Path
from typing import Any

import h5py

# =============================================================================
# Imports
# =============================================================================
import numpy as np
from PIL import Image

from mth5.groups.base import BaseGroup


# =============================================================================
# Reports Group
# =============================================================================
[docs] class ReportsGroup(BaseGroup): """Store report files (PDF/text) and images under ``/Survey/Reports``. Files are embedded into HDF5 datasets with basic metadata preserved. Examples -------- >>> reports = survey.reports_group >>> _ = reports.add_report("site_report", filename="/tmp/report.pdf") >>> _ = reports.get_report("site_report") # doctest: +SKIP """ def __init__(self, group: h5py.Group, **kwargs: Any) -> None: super().__init__(group, **kwargs) self._accepted_reports: list[str] = ["pdf", "txt", "md"] self._accepted_images: list[str] = ["png", "jpg", "jpeg", "tif", "tiff", "bmp"] # summary of reports self._defaults_summary_attrs = { "name": "summary", "max_shape": (1000,), "dtype": np.dtype( [ ("name", "S5"), ("type", "S32"), ("summary", "S200"), ("hdf5_reference", h5py.ref_dtype), ] ), }
[docs] def add_report( self, report_name: str, report_metadata: dict[str, Any] | None = None, filename: str | Path | None = None, ) -> None: """Add a report or image file to the group. Parameters ---------- report_name : str Dataset name to store the file under. report_metadata : dict, optional Additional attributes to attach to the dataset. filename : str or Path, optional Path to the file to embed; supported types: PDF/TXT/MD and common images. Raises ------ FileNotFoundError If ``filename`` does not exist. Examples -------- >>> reports.add_report("manual", filename="docs/manual.pdf") # doctest: +SKIP """ if filename is not None: filename = Path(filename) if not filename.exists(): raise FileNotFoundError(f"{filename} does not exist") extension = filename.suffix.lower()[1:] if extension in self._accepted_reports: fn_bytes = filename.read_bytes() # Save PDF bytes into HDF5 dataset = self.hdf5_group.create_dataset(report_name, data=fn_bytes) # Add metadata if provided if report_metadata is not None: for key, value in report_metadata.items(): dataset.attrs[key] = value else: dataset.attrs["description"] = f"{extension.upper()} report file" dataset.attrs["filename"] = filename.name dataset.attrs["file_type"] = extension elif extension in self._accepted_images: # Open image and convert to numpy array img = Image.open(filename) img_data = np.array(img) # Save image data into HDF5 dataset = self.hdf5_group.create_dataset(report_name, data=img_data) # Add metadata if provided if report_metadata is not None: for key, value in report_metadata.items(): dataset.attrs[key] = value else: dataset.attrs["description"] = f"{extension.upper()} image file" dataset.attrs["filename"] = filename.name dataset.attrs["file_type"] = extension else: self.logger.error( f"Adding files of type {extension} is not implemented yet" )
[docs] def get_report(self, report_name: str) -> Path: """Extract a stored report or image to the current working directory. Parameters ---------- report_name : str Name of the stored dataset. Returns ------- pathlib.Path Path to the materialized file on disk. Raises ------ ValueError If the stored file type is unsupported. Examples -------- >>> path = reports.get_report("site_report") # doctest: +SKIP >>> path.exists() True """ dataset = self.hdf5_group[report_name] file_type = dataset.attrs["file_type"] if file_type in self._accepted_reports: report_data = bytes(dataset[()]) fn_path = Path().cwd().joinpath(dataset.attrs["filename"]) fn_path.write_bytes(report_data) self.logger.info(f"Report written to {fn_path}") return fn_path if file_type in self._accepted_images: img_data = np.array(dataset[()]) img = Image.fromarray(img_data) fn_path = Path().cwd().joinpath(dataset.attrs["filename"]) img.save(fn_path) self.logger.info(f"Image report written to {fn_path}") return fn_path raise ValueError(f"Unsupported file type '{file_type}' for {report_name}")