diff --git a/src/murfey/client/analyser.py b/src/murfey/client/analyser.py index 88ee0721c..44e80a8ed 100644 --- a/src/murfey/client/analyser.py +++ b/src/murfey/client/analyser.py @@ -14,7 +14,7 @@ import threading from importlib.metadata import entry_points from pathlib import Path -from typing import Type +from typing import OrderedDict, Type from murfey.client.context import Context from murfey.client.destinations import find_longest_data_directory @@ -56,23 +56,14 @@ def __init__( ): super().__init__() self._basepath = basepath_local.absolute() + self._token = token + self._environment = environment self._limited = limited self._experiment_type = "" self._acquisition_software = "" - self._extension: str = "" - self._unseen_xml: list = [] self._context: Context | None = None - self._batch_store: dict = {} - self._environment = environment - self._force_mdoc_metadata = force_mdoc_metadata - self._token = token - self._serialem = serialem - self.parameters_model: ( - Type[ProcessingParametersSPA] | Type[ProcessingParametersTomo] | None - ) = None - self.queue: queue.Queue = queue.Queue() - self.thread = threading.Thread(name="Analyser", target=self._analyse) + self.thread = threading.Thread(name="Analyser", target=self._analyse_in_thread) self._stopping = False self._halt_thread = False self._murfey_config = ( @@ -85,6 +76,17 @@ def __init__( else {} ) + # SPA & Tomo-specific attributes + self._extension: str = "" + self._unseen_xml: list = [] + self._batch_store: dict = {} + self._force_mdoc_metadata = force_mdoc_metadata + self._mdoc_for_reading: Path | None = None + self._serialem = serialem + self.parameters_model: ( + Type[ProcessingParametersSPA] | Type[ProcessingParametersTomo] | None + ) = None + def __repr__(self) -> str: return f"" @@ -334,9 +336,8 @@ def post_transfer(self, transferred_file: Path): f"An exception was encountered post transfer: {e}", exc_info=True ) - def _analyse(self): + def _analyse_in_thread(self): logger.info("Analyser thread started") - mdoc_for_reading = None while not self._halt_thread: transferred_file = self.queue.get() transferred_file = ( @@ -347,185 +348,150 @@ def _analyse(self): if not transferred_file: self._halt_thread = True continue - if self._limited: - if ( - "Metadata" in transferred_file.parts - or transferred_file.name == "EpuSession.dm" - and not self._context - ): - if not (context := _get_context("SPAMetadataContext")): - continue - self._context = context.load()( - "epu", - self._basepath, - self._murfey_config, - self._token, - ) - elif ( - "Batch" in transferred_file.parts - or "SearchMaps" in transferred_file.parts - or transferred_file.name == "Session.dm" - and not self._context - ): - if not (context := _get_context("TomographyMetadataContext")): - continue - self._context = context.load()( - "tomo", - self._basepath, - self._murfey_config, - self._token, - ) + self._analyse(transferred_file) + self.queue.task_done() + logger.debug("Analyser thread has stopped analysing incoming files") + self.notify(final=True) + + def _analyse(self, transferred_file: Path): + if self._limited: + if ( + "Metadata" in transferred_file.parts + or transferred_file.name == "EpuSession.dm" + and not self._context + ): + if not (context := _get_context("SPAMetadataContext")): + return + self._context = context.load()( + "epu", + self._basepath, + self._murfey_config, + self._token, + ) + elif ( + "Batch" in transferred_file.parts + or "SearchMaps" in transferred_file.parts + or transferred_file.name == "Session.dm" + and not self._context + ): + if not (context := _get_context("TomographyMetadataContext")): + return + self._context = context.load()( + "tomo", + self._basepath, + self._murfey_config, + self._token, + ) + self.post_transfer(transferred_file) + else: + # Logic that doesn't require context determination + if not self._serialem and ( + self._force_mdoc_metadata and transferred_file.suffix == ".mdoc" + ): + self._mdoc_for_reading = transferred_file + + # Try and determine context, and notify once when context is found + if self._context is None: + # Exit early if the file can't be used to determine the context + if not self._find_context(transferred_file): + logger.debug(f"Couldn't find context for {str(transferred_file)!r}") + return + else: + logger.info(f"Context found successfully using {transferred_file}") + + # Trigger processing or metadata parsing according to the context + # Go through the straightforward ones first + if "CLEMContext" in str(self._context): + logger.debug(f"File {transferred_file.name!r} is part of CLEM workflow") + self.post_transfer(transferred_file) + elif "FIBContext" in str(self._context): + logger.debug( + f"File {transferred_file.name!r} is part of the FIB workflow" + ) self.post_transfer(transferred_file) - else: - dc_metadata = {} + elif "SXTContext" in str(self._context): + logger.debug(f"File {transferred_file.name!r} is an SXT file") + self.post_transfer(transferred_file) + elif "AtlasContext" in str(self._context): + logger.debug(f"File {transferred_file.name!r} is part of the atlas") + self.post_transfer(transferred_file) + + # Handle files with tomography and SPA context differently + elif ( + any( + context in str(self._context) + for context in ( + "SPAContext", + "SPAMetadataContext", + "TomographyContext", + "TomographyMetadataContext", + ) + ) + and self._context is not None + ): + context = str(self._context).split(" ")[0].split(".")[-1] + + dc_metadata: OrderedDict | None = None if not self._serialem and ( self._force_mdoc_metadata and transferred_file.suffix == ".mdoc" - or mdoc_for_reading + or self._mdoc_for_reading ): - if self._context: - try: - dc_metadata = self._context.gather_metadata( - mdoc_for_reading or transferred_file, - environment=self._environment, - ) - except KeyError as e: - logger.error( - f"Metadata gathering failed with a key error for key: {e.args[0]}" - ) - raise e - if not dc_metadata: - mdoc_for_reading = None - elif transferred_file.suffix == ".mdoc": - mdoc_for_reading = transferred_file - if not self._context: - if not self._find_extension(transferred_file): - logger.debug(f"No extension found for {transferred_file}") - continue - if not self._find_context(transferred_file): - logger.debug( - f"Couldn't find context for {str(transferred_file)!r}" + try: + dc_metadata = self._context.gather_metadata( + self._mdoc_for_reading or transferred_file, + environment=self._environment, ) - self.queue.task_done() - continue - elif self._extension: - logger.info( - f"Context found successfully for {transferred_file}" + except KeyError as e: + logger.error( + f"Metadata gathering failed with a key error for key: " + f"{e.args[0]}" ) - try: - self._context.post_first_transfer( - transferred_file, - environment=self._environment, - ) - except Exception as e: - logger.error(f"Exception encountered: {e}") - if "AtlasContext" not in str(self._context): - if not dc_metadata: - try: - dc_metadata = self._context.gather_metadata( - self._xml_file(transferred_file), - environment=self._environment, - ) - except NotImplementedError: - dc_metadata = {} - except KeyError as e: - logger.error( - f"Metadata gathering failed with a key error for key: {e.args[0]}" - ) - raise e - except ValueError as e: - logger.error( - f"Metadata gathering failed with a value error: {e}" - ) - if not dc_metadata or not self._force_mdoc_metadata: - self._unseen_xml.append(transferred_file) - else: - self._unseen_xml = [] - if dc_metadata.get("file_extension"): - self._extension = dc_metadata["file_extension"] - else: - dc_metadata["file_extension"] = self._extension - dc_metadata["acquisition_software"] = ( - self._context._acquisition_software - ) - self.notify(dc_metadata) - - # Contexts that can be immediately posted without additional work - elif "CLEMContext" in str(self._context): - logger.debug( - f"File {transferred_file.name!r} is part of CLEM workflow" - ) - self.post_transfer(transferred_file) - elif "FIBContext" in str(self._context): - logger.debug( - f"File {transferred_file.name!r} is part of the FIB workflow" - ) - self.post_transfer(transferred_file) - elif "SXTContext" in str(self._context): - logger.debug(f"File {transferred_file.name!r} is an SXT file") - self.post_transfer(transferred_file) - elif "AtlasContext" in str(self._context): - logger.debug(f"File {transferred_file.name!r} is part of the atlas") - self.post_transfer(transferred_file) - - # Handle files with tomography and SPA context differently - elif not self._extension or self._unseen_xml: + raise e + # Set the mdoc field to None if no metadata was found + if not dc_metadata: + self._mdoc_for_reading = None + + if not self._extension or self._unseen_xml: + # Early return if no extension was found if not self._find_extension(transferred_file): - logger.error(f"No extension found for {transferred_file}") - continue - if self._extension: + logger.warning(f"No extension found for {transferred_file}") + return + else: logger.info( f"Extension found successfully for {transferred_file}" ) - try: - self._context.post_first_transfer( - transferred_file, - environment=self._environment, - ) - except Exception as e: - logger.error(f"Exception encountered: {e}") - if not dc_metadata: - try: - dc_metadata = self._context.gather_metadata( - mdoc_for_reading - or self._xml_file(transferred_file), - environment=self._environment, - ) - except KeyError as e: - logger.error( - f"Metadata gathering failed with a key error for key: {e.args[0]}" - ) - raise e - if not dc_metadata or not self._force_mdoc_metadata: - mdoc_for_reading = None - self._unseen_xml.append(transferred_file) - if dc_metadata: - self._unseen_xml = [] - if dc_metadata.get("file_extension"): - self._extension = dc_metadata["file_extension"] - else: - dc_metadata["file_extension"] = self._extension - dc_metadata["acquisition_software"] = ( - self._context._acquisition_software - ) - self.notify(dc_metadata) - elif any( - context in str(self._context) - for context in ( - "SPAContext", - "SPAMetadataContext", - "TomographyContext", - "TomographyMetadataContext", - ) - ): - context = str(self._context).split(" ")[0].split(".")[-1] - logger.debug( - f"Transferring file {str(transferred_file)} with context {context!r}" + + logger.debug( + f"Transferring file {str(transferred_file)} with context {context!r}" + ) + self.post_transfer(transferred_file) + + if not dc_metadata and transferred_file.suffix != ".mdoc": + try: + dc_metadata = self._context.gather_metadata( + self._mdoc_for_reading or self._xml_file(transferred_file), + environment=self._environment, + ) + except KeyError as e: + logger.error( + f"Metadata gathering failed with a key error for key: {e.args[0]}" + ) + raise e + if not dc_metadata or not self._force_mdoc_metadata: + self._mdoc_for_reading = None + self._unseen_xml.append(transferred_file) + if dc_metadata: + self._unseen_xml = [] + if dc_metadata.get("file_extension"): + self._extension = dc_metadata["file_extension"] + else: + dc_metadata["file_extension"] = self._extension + dc_metadata["acquisition_software"] = ( + self._context._acquisition_software ) - self.post_transfer(transferred_file) - self.queue.task_done() - logger.debug("Analyer thread has stopped analysing incoming files") - self.notify(final=True) + self.notify(dc_metadata) + return def _xml_file(self, data_file: Path) -> Path: if not self._environment: diff --git a/src/murfey/client/context.py b/src/murfey/client/context.py index c64f713c8..73172e62c 100644 --- a/src/murfey/client/context.py +++ b/src/murfey/client/context.py @@ -3,7 +3,7 @@ import logging from importlib.metadata import entry_points from pathlib import Path -from typing import Any, List, NamedTuple +from typing import Any, NamedTuple, OrderedDict import xmltodict @@ -209,12 +209,6 @@ def ensure_dcg_exists( return dcg_tag -class ProcessingParameter(NamedTuple): - name: str - label: str - default: Any = None - - def detect_acquisition_software(dir_for_transfer: Path) -> str: glob = dir_for_transfer.glob("*") for f in glob: @@ -225,9 +219,15 @@ def detect_acquisition_software(dir_for_transfer: Path) -> str: return "" +class ProcessingParameter(NamedTuple): + name: str + label: str + default: Any = None + + class Context: - user_params: List[ProcessingParameter] = [] - metadata_params: List[ProcessingParameter] = [] + user_params: list[ProcessingParameter] = [] + metadata_params: list[ProcessingParameter] = [] def __init__(self, name: str, acquisition_software: str, token: str): self._acquisition_software = acquisition_software @@ -256,7 +256,7 @@ def post_first_transfer( def gather_metadata( self, metadata_file: Path, environment: MurfeyInstanceEnvironment | None = None - ): + ) -> OrderedDict | None: raise NotImplementedError( f"gather_metadata must be declared in derived class to be used: {self}" ) diff --git a/tests/client/test_analyser.py b/tests/client/test_analyser.py index 3c21b3805..65d57c447 100644 --- a/tests/client/test_analyser.py +++ b/tests/client/test_analyser.py @@ -1,107 +1,83 @@ from __future__ import annotations +from pathlib import Path + import pytest +from pytest_mock import MockerFixture from murfey.client.analyser import Analyser -from murfey.client.contexts.atlas import AtlasContext -from murfey.client.contexts.clem import CLEMContext -from murfey.client.contexts.fib import FIBContext from murfey.client.contexts.spa import SPAContext -from murfey.client.contexts.spa_metadata import SPAMetadataContext -from murfey.client.contexts.sxt import SXTContext from murfey.client.contexts.tomo import TomographyContext -from murfey.client.contexts.tomo_metadata import TomographyMetadataContext from murfey.util.models import ProcessingParametersSPA, ProcessingParametersTomo -example_files = [ - # Tomography - ["visit/Position_1_001_0.0_20250715_012434_fractions.tiff", TomographyContext], - ["visit/Position_1_2_002_3.0_20250715_012434_Fractions.mrc", TomographyContext], - ["visit/Position_1_2_003_6.0_20250715_012434_EER.eer", TomographyContext], - ["visit/name1_004_9.0_20250715_012434_fractions.tiff", TomographyContext], - ["visit/Position_1_[30.0].tiff", TomographyContext], - ["visit/Position_1.mdoc", TomographyContext], - ["visit/name1_2.mdoc", TomographyContext], - # Tomography metadata - ["visit/Session.dm", TomographyMetadataContext], - ["visit/SearchMaps/SearchMap.xml", TomographyMetadataContext], - ["visit/Batch/BatchPositionsList.xml", TomographyMetadataContext], - ["visit/Thumbnails/file.mrc", TomographyMetadataContext], - # SPA - ["visit/FoilHole_01234_fractions.tiff", SPAContext], - ["visit/FoilHole_01234_EER.eer", SPAContext], - # SPA metadata - ["atlas/atlas.mrc", AtlasContext], - ["visit/EpuSession.dm", SPAMetadataContext], - ["visit/Metadata/GridSquare.dm", SPAMetadataContext], - # CLEM LIF file - ["visit/images/test_file.lif", CLEMContext], - # CLEM TIFF files - [ +example_files = { + "CLEMContext": [ + # CLEM LIF file + "visit/images/test_file.lif", + # CLEM TIFF files "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12--Z02--C01.tif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12_Lng_LVCC--Z02--C01.tif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Series001--Z00--C00.tif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Series001_Lng_LVCC--Z00--C00.tif", - CLEMContext, - ], - # CLEM TIFF file accompanying metadata - [ + # CLEM TIFF file accompanying metadata "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12_Lng_LVCC.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_histo.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_Lng_LVCC_histo.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001_Lng_LVCC.xlif", - CLEMContext, ], - # FIB Autotem files - ["visit/autotem/visit/ProjectData.dat", FIBContext], - ["visit/autotem/visit/Sites/Lamella/SetupImages/Preparation.tif", FIBContext], - [ + "FIBContext": [ + # FIB Autotem files + "visit/autotem/visit/ProjectData.dat", + "visit/autotem/visit/Sites/Lamella/SetupImages/Preparation.tif", "visit/autotem/visit/Sites/Lamella (2)//DCImages/DCM_2026-03-09-23-45-40.926/2026-03-09-23-48-43-Finer-Milling-dc_rescan-image-.png", - FIBContext, - ], - # FIB Maps files - ["visit/maps/visit/EMproject.emxml", FIBContext], - [ + # FIB Maps files + "visit/maps/visit/EMproject.emxml", "visit/maps/visit/LayersData/Layer/Electron Snapshot/Electron Snapshot.tiff", - FIBContext, - ], - [ "visit/maps/visit/LayersData/Layer/Electron Snapshot (2)/Electron Snapshot (2).tiff", - FIBContext, ], - # Soft x-ray tomography - ["visit/tomo__tag_ROI10_area1_angle-60to60@1.5_1sec_251p.txrm", SXTContext], - ["visit/X-ray_mosaic_ROI2.xrm", SXTContext], -] + "SXTContext": [ + "visit/tomo__tag_ROI10_area1_angle-60to60@1.5_1sec_251p.txrm", + "visit/X-ray_mosaic_ROI2.xrm", + ], + "AtlasContext": [ + "atlas/atlas.mrc", + ], + "TomographyContext": [ + "visit/Position_1_001_0.0_20250715_012434_fractions.tiff", + "visit/Position_1_2_002_3.0_20250715_012434_Fractions.mrc", + "visit/Position_1_2_003_6.0_20250715_012434_EER.eer", + "visit/name1_004_9.0_20250715_012434_fractions.tiff", + "visit/Position_1_[30.0].tiff", + "visit/Position_1.mdoc", + "visit/name1_2.mdoc", + ], + "TomographyMetadataContext": [ + "visit/Session.dm", + "visit/SearchMaps/SearchMap.xml", + "visit/Batch/BatchPositionsList.xml", + "visit/Thumbnails/file.mrc", + ], + "SPAContext": [ + "visit/FoilHole_01234_fractions.tiff", + "visit/FoilHole_01234_EER.eer", + ], + "SPAMetadataContext": [ + "visit/EpuSession.dm", + "visit/Metadata/GridSquare.dm", + ], +} -@pytest.mark.parametrize("file_and_context", example_files) +@pytest.mark.parametrize( + "file_and_context", + [ + [file, context] + for context, file_list in example_files.items() + for file in file_list + ], +) def test_find_context(file_and_context, tmp_path): # Unpack parametrised variables file_name, context = file_and_context @@ -111,7 +87,7 @@ def test_find_context(file_and_context, tmp_path): # Check that the results are as expected assert analyser._find_context(tmp_path / file_name) - assert isinstance(analyser._context, context) + assert analyser._context is not None and context in str(analyser._context) # Checks for the specific workflow contexts if isinstance(analyser._context, TomographyContext): @@ -168,3 +144,162 @@ def test_analyser_epu_determination(tmp_path): analyser.queue.put(tomo_file) analyser.stop() assert analyser._context._acquisition_software == "epu" + + +@pytest.mark.parametrize("test_file", contextless_files) +def test_analyse_no_context( + test_file: str, + mocker: MockerFixture, + tmp_path: Path, +): + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + spy_find_context = mocker.spy(Analyser, "_find_context") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "") + analyser._analyse(tmp_path / test_file) + + # "_find_context" should be called + assert spy_find_context.call_count == 1 + + # "post_transfer" should not be called + mock_post_transfer.assert_not_called() + + +def test_analyse_clem( + mocker: MockerFixture, + tmp_path: Path, +): + # Gather example files related to the CLEM workflow + test_files = [ + file + for context, file_list in example_files.items() + for file in file_list + if context == "CLEMContext" and not file.endswith(".lif") + ] + + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + spy_find_context = mocker.spy(Analyser, "_find_context") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "") + for file in test_files: + analyser._analyse(tmp_path / file) + + # "_find_context" should be called only once + assert spy_find_context.call_count == 1 + assert analyser._context is not None and "CLEMContext" in str(analyser._context) + + # "_post_transfer" should be called on every one of these files + assert mock_post_transfer.call_count == len(test_files) + + +@pytest.mark.parametrize( + "test_params", + # Test the "autotem" and "maps" workflows separately + [ + [software, [file for file in file_list if software in file]] + for software in ("autotem", "maps") + for context, file_list in example_files.items() + if context == "FIBContext" + ], +) +def test_analyse_fib( + mocker: MockerFixture, + test_params: tuple[str, list[str]], + tmp_path: Path, +): + # Unpack test params + software, test_files = test_params + + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + spy_find_context = mocker.spy(Analyser, "_find_context") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "") + for file in test_files: + analyser._analyse(tmp_path / file) + + # "_find_context" should be called only once + assert spy_find_context.call_count == 1 + assert analyser._context is not None and "FIBContext" in str(analyser._context) + assert analyser._context._acquisition_software == software + + # "_post_transfer" should be called on every one of these files + assert mock_post_transfer.call_count == len(test_files) + + +def test_analyse_sxt( + mocker: MockerFixture, + tmp_path: Path, +): + # Load the example files corresponding to the SXT workflow + test_files = [ + file + for context, file_list in example_files.items() + for file in file_list + if context == "SXTContext" + ] + + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + spy_find_context = mocker.spy(Analyser, "_find_context") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "") + for file in test_files: + analyser._analyse(tmp_path / file) + + # "_find_context" should be called only once + assert spy_find_context.call_count == 1 + assert analyser._context is not None and "SXTContext" in str(analyser._context) + + # "_post_transfer" should be called on every one of these files + assert mock_post_transfer.call_count == len(test_files) + + +@pytest.mark.parametrize( + "test_params", + [ + ["SPAMetadataContext", ["AtlasContext", "SPAContext"]], + ["TomographyMetadataContext", ["AtlasContext", "SPAContext"]], + ], +) +def test_analyse_limited( + mocker: MockerFixture, tmp_path: Path, test_params: tuple[str, list[str]] +): + # Unpack test params + expected_context, other_contexts = test_params + + # Load example files related to the CLEM + test_files = [ + file + for context, file_list in example_files.items() + for file in file_list + if context in (expected_context, *other_contexts) + ] + + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "", limited=True) + for file in test_files: + analyser._analyse(tmp_path / file) + + # "_find_context" should be called only once + assert analyser._context is not None and expected_context in str(analyser._context) + + # "_post_transfer" should be called on every one of these files + assert mock_post_transfer.call_count == len(test_files) + + +def test_analyse_spa(): + pass + + +def test_analyse_tomo(): + pass