diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 27eeaac22..2b80023fd 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -12,6 +12,7 @@ from typing import Any, cast from urllib.parse import urlencode, urljoin, urlparse +import arff import requests import xmltodict from requests import Response @@ -98,16 +99,32 @@ def _get_body_filename_from_response(self, response: Response) -> str: if "text/xml" in content_type: return "body.xml" + if response.content.startswith(b"PK\x03\x04"): + return "body.zip" + + try: + arff.loads(response.text) + return "body.arff" + except arff.ArffException: + pass + return "body.txt" def _get_body_filename_from_path(self, path: Path) -> str: - if (path / "body.json").exists(): - return "body.json" + candidates = [] + for p in path.iterdir(): + if p.name.startswith("body.") and len(p.suffixes) == 1: + candidates.append(p) - if (path / "body.xml").exists(): - return "body.xml" + if not candidates: + raise FileNotFoundError(f"No body file found in path: {path}") - return "body.txt" + if len(candidates) > 1: + raise FileNotFoundError( + f"Multiple body files found in path: {path} ({[p.name for p in candidates]})" + ) + + return candidates[0].name def load(self, key: str) -> Response: """ @@ -132,6 +149,9 @@ def load(self, key: str) -> Response: """ path = self._key_to_path(key) + if not path.exists(): + raise FileNotFoundError(f"Cache path not found: {path}") + meta_path = path / "meta.json" meta_raw = meta_path.read_bytes() if meta_path.exists() else "{}" meta = json.loads(meta_raw) @@ -141,8 +161,6 @@ def load(self, key: str) -> Response: headers = json.loads(headers_raw) body_path = path / self._get_body_filename_from_path(path) - if not body_path.exists(): - raise FileNotFoundError(f"Incomplete cache at {body_path}") body = body_path.read_bytes() response = Response() @@ -825,3 +843,9 @@ def write_to_file(response: Response, path: Path, encoding: str) -> None: handler = handler or write_to_file handler(response, file_path, encoding) return file_path + + def cache_path_from_url(self, url: str) -> Path: + full_url = urljoin(self.server, url) + key = self.cache.get_key(full_url, params={}) + path = self.cache._key_to_path(key) + return path / self.cache._get_body_filename_from_path(path) diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 0c60e69de..fde243125 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -10,10 +10,14 @@ from .base import ResourceAPI if TYPE_CHECKING: + import pandas as pd + from openml.estimation_procedures import OpenMLEstimationProcedure from openml.evaluations import OpenMLEvaluation from openml.flows.flow import OpenMLFlow + from openml.runs.run import OpenMLRun from openml.setups.setup import OpenMLSetup + from openml.tasks.task import TaskType class DatasetAPI(ResourceAPI): @@ -86,6 +90,45 @@ class RunAPI(ResourceAPI): resource_type: ResourceType = ResourceType.RUN + @abstractmethod + def get( + self, + run_id: int, + *, + reset_cache: bool = False, + ) -> OpenMLRun: ... + + def list( # type: ignore[valid-type] # noqa: PLR0913 + self, + limit: int, + offset: int, + *, + ids: builtins.list[int] | None = None, + task: builtins.list[int] | None = None, + setup: builtins.list[int] | None = None, + flow: builtins.list[int] | None = None, + uploader: builtins.list[int] | None = None, + study: int | None = None, + tag: str | None = None, + display_errors: bool = False, + task_type: TaskType | int | None = None, + ) -> pd.DataFrame: ... + + @abstractmethod + def download_text_file( + self, + source: str, + *, + md5_checksum: str | None = None, + ) -> str: ... + + @abstractmethod + def file_id_to_url( + self, + file_id: int, + filename: str | None = None, + ) -> str: ... + class SetupAPI(ResourceAPI): """Abstract API interface for setup resources.""" diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index bba59b869..db0707959 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Mapping -from typing import Any, cast +from typing import Any import xmltodict @@ -189,7 +189,10 @@ def untag(self, resource_id: int, tag: str) -> list[str]: def _get_endpoint_name(self) -> str: if self.resource_type == ResourceType.DATASET: return "data" - return cast("str", self.resource_type.value) + endpoint_name = self.resource_type.value + if not isinstance(endpoint_name, str): + raise TypeError(f"Unexpected endpoint type: {type(endpoint_name)}") + return endpoint_name def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: """ @@ -220,18 +223,23 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: if not isinstance(root_value, Mapping): raise ValueError("Unexpected XML structure") - # Look for oml:id directly in the root value - if "oml:id" in root_value: - id_value = root_value["oml:id"] - if isinstance(id_value, (str, int)): - return int(id_value) + # 1. Specifically look for keys ending in _id or id (e.g., oml:id, oml:run_id) + for k, v in root_value.items(): + if ( + (k.endswith(("id", "_id")) or "id" in k.lower()) + and isinstance(v, (str, int)) + and str(v).isdigit() + ): + return int(v) - # Fallback: check all values for numeric/string IDs + # 2. Fallback: check all values for numeric/string IDs, excluding xmlns or URLs for v in root_value.values(): if isinstance(v, (str, int)): - return int(v) + val_str = str(v) + if val_str.isdigit(): + return int(val_str) - raise ValueError("No ID found in upload response") + raise ValueError(f"No ID found in upload response: {root_value}") class ResourceV2API(ResourceAPI): @@ -258,4 +266,7 @@ def untag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002 self._not_supported(method="untag") def _get_endpoint_name(self) -> str: - return cast("str", self.resource_type.value) + endpoint_name = self.resource_type.value + if not isinstance(endpoint_name, str): + raise TypeError(f"Unexpected endpoint type: {type(endpoint_name)}") + return endpoint_name diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py index 4caccb0b6..003c5dc49 100644 --- a/openml/_api/resources/run.py +++ b/openml/_api/resources/run.py @@ -1,11 +1,289 @@ from __future__ import annotations -from .base import ResourceV1API, ResourceV2API, RunAPI +import builtins +from typing import TYPE_CHECKING + +import pandas as pd +import xmltodict + +import openml +from openml._api.resources.base import ResourceV1API, ResourceV2API, RunAPI +from openml.tasks.task import TaskType + +if TYPE_CHECKING: + from openml.runs.run import OpenMLRun class RunV1API(ResourceV1API, RunAPI): - """Version 1 API implementation for run resources.""" + def get( + self, + run_id: int, + *, + reset_cache: bool = False, + ) -> OpenMLRun: # type: ignore[override] + """Fetch a single run from the OpenML server. + + Parameters + ---------- + run_id : int + The ID of the run to fetch. + reset_cache : bool, default=False + Whether to reset the cache. + + Returns + ------- + OpenMLRun + The run object with all details populated. + + Raises + ------ + openml.exceptions.OpenMLServerException + If the run does not exist or server error occurs. + """ + path = f"run/{run_id}" + response = self._http.get( + path, + enable_cache=True, + refresh_cache=reset_cache, + ) + xml_content = response.text + return openml.runs.functions._create_run_from_xml(xml_content) + + def list( # type: ignore[valid-type] # noqa: PLR0913 + self, + limit: int, + offset: int, + *, + ids: builtins.list[int] | None = None, + task: builtins.list[int] | None = None, + setup: builtins.list[int] | None = None, + flow: builtins.list[int] | None = None, + uploader: builtins.list[int] | None = None, + study: int | None = None, + tag: str | None = None, + display_errors: bool = False, + task_type: TaskType | int | None = None, + ) -> pd.DataFrame: + """List runs from the OpenML server with optional filtering. + + Parameters + ---------- + limit : int + Maximum number of runs to return. + offset : int + Starting position for pagination. + ids : list of int, optional + List of run IDs to filter by. + task : list of int, optional + List of task IDs to filter by. + setup : list of int, optional + List of setup IDs to filter by. + flow : list of int, optional + List of flow IDs to filter by. + uploader : list of int, optional + List of uploader user IDs to filter by. + study : int, optional + Study ID to filter by. + tag : str, optional + Tag to filter by. + display_errors : bool, default=False + If True, include runs with error messages. + task_type : TaskType or int, optional + Task type ID to filter by. + + Returns + ------- + pd.DataFrame + DataFrame with columns: run_id, task_id, setup_id, flow_id, + uploader, task_type, upload_time, error_message. + + Raises + ------ + ValueError + If the server response is invalid or malformed. + """ + path = self._build_url( + limit=limit, + offset=offset, + ids=ids, + task=task, + setup=setup, + flow=flow, + uploader=uploader, + study=study, + tag=tag, + display_errors=display_errors, + task_type=task_type, + ) + xml_string = self._http.get(path).text + return self._parse_list_xml(xml_string) + + def _build_url( # noqa: PLR0913, C901 + self, + limit: int, + offset: int, + *, + ids: builtins.list[int] | None = None, + task: builtins.list[int] | None = None, + setup: builtins.list[int] | None = None, + flow: builtins.list[int] | None = None, + uploader: builtins.list[int] | None = None, + study: int | None = None, + tag: str | None = None, + display_errors: bool = False, + task_type: TaskType | int | None = None, + ) -> str: + path = "run/list" + if limit is not None: + path += f"/limit/{limit}" + if offset is not None: + path += f"/offset/{offset}" + if ids is not None: + path += f"/run/{','.join([str(int(i)) for i in ids])}" + if task is not None: + path += f"/task/{','.join([str(int(i)) for i in task])}" + if setup is not None: + path += f"/setup/{','.join([str(int(i)) for i in setup])}" + if flow is not None: + path += f"/flow/{','.join([str(int(i)) for i in flow])}" + if uploader is not None: + path += f"/uploader/{','.join([str(int(i)) for i in uploader])}" + if study is not None: + path += f"/study/{study}" + if display_errors: + path += "/show_errors/true" + if tag is not None: + path += f"/tag/{tag}" + if task_type is not None: + tvalue = task_type.value if isinstance(task_type, TaskType) else task_type + path += f"/task_type/{tvalue}" + return path + + def _parse_list_xml(self, xml_string: str) -> pd.DataFrame: + runs_dict = xmltodict.parse(xml_string, force_list=("oml:run",)) + # Minimalistic check if the XML is useful + if "oml:runs" not in runs_dict: + raise ValueError(f'Error in return XML, does not contain "oml:runs": {runs_dict}') + + if "@xmlns:oml" not in runs_dict["oml:runs"]: + raise ValueError( + f'Error in return XML, does not contain "oml:runs"/@xmlns:oml: {runs_dict}' + ) + + if runs_dict["oml:runs"]["@xmlns:oml"] != "http://openml.org/openml": + raise ValueError( + "Error in return XML, value of " + '"oml:runs"/@xmlns:oml is not ' + f'"http://openml.org/openml": {runs_dict}', + ) + + assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(runs_dict["oml:runs"]) + + runs = { + int(r["oml:run_id"]): { + "run_id": int(r["oml:run_id"]), + "task_id": int(r["oml:task_id"]), + "setup_id": int(r["oml:setup_id"]), + "flow_id": int(r["oml:flow_id"]), + "uploader": int(r["oml:uploader"]), + "task_type": TaskType(int(r["oml:task_type_id"])), + "upload_time": str(r["oml:upload_time"]), + "error_message": str((r["oml:error_message"]) or ""), + } + for r in runs_dict["oml:runs"]["oml:run"] + } + return pd.DataFrame.from_dict(runs, orient="index") + + def download_text_file( + self, + source: str, + *, + md5_checksum: str | None = None, + ) -> str: + response = self._http.get( + source, + use_api_key=False, + md5_checksum=md5_checksum, + ) + return response.text + + def file_id_to_url( + self, + file_id: int, + filename: str | None = None, + ) -> str: + server_base = self._http.server.split("/api/", 1)[0].rstrip("/") + url = f"{server_base}/data/download/{file_id}" + if filename is not None: + url += f"/{filename}" + return url class RunV2API(ResourceV2API, RunAPI): - """Version 2 API implementation for run resources.""" + """V2 API resource for runs. Currently read-only until V2 server supports POST.""" + + def get( + self, + run_id: int, # noqa: ARG002 + *, + reset_cache: bool = False, # noqa: ARG002 + ) -> OpenMLRun: # type: ignore[override] + """Fetch a single run from the V2 server. + + Parameters + ---------- + run_id : int + The ID of the run to fetch. + reset_cache : bool, default=False + Whether to reset the cache. + + Returns + ------- + OpenMLRun + The run object. + + Raises + ------ + OpenMLNotSupportedError + V2 server API not yet available for this operation. + """ + self._not_supported(method="get") + + def list( # type: ignore[valid-type] # noqa: PLR0913 + self, + limit: int, # noqa: ARG002 + offset: int, # noqa: ARG002 + *, + ids: builtins.list[int] | None = None, # noqa: ARG002 + task: builtins.list[int] | None = None, # noqa: ARG002 + setup: builtins.list[int] | None = None, # noqa: ARG002 + flow: builtins.list[int] | None = None, # noqa: ARG002 + uploader: builtins.list[int] | None = None, # noqa: ARG002 + study: int | None = None, # noqa: ARG002 + tag: str | None = None, # noqa: ARG002 + display_errors: bool = False, # noqa: ARG002 + task_type: TaskType | int | None = None, # noqa: ARG002 + ) -> pd.DataFrame: + """List runs from the V2 server. + + Raises + ------ + OpenMLNotSupportedError + V2 server API not yet available for this operation. + """ + self._not_supported(method="list") + + def download_text_file( + self, + source: str, # noqa: ARG002 + *, + md5_checksum: str | None = None, # noqa: ARG002 + ) -> str: + self._not_supported(method="download_text_file") + + def file_id_to_url( + self, + file_id: int, # noqa: ARG002 + filename: str | None = None, # noqa: ARG002 + ) -> str: + self._not_supported(method="file_id_to_url") diff --git a/openml/runs/functions.py b/openml/runs/functions.py index d87bd3e18..54b972e85 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -6,7 +6,6 @@ import warnings from collections import OrderedDict from functools import partial -from pathlib import Path from typing import TYPE_CHECKING, Any import numpy as np @@ -19,7 +18,6 @@ import openml._api_calls import openml.utils from openml.exceptions import ( - OpenMLCacheException, OpenMLRunsExistError, OpenMLServerException, PyOpenMLError, @@ -49,7 +47,6 @@ # get_dict is in run.py to avoid circular imports -RUNS_CACHE_DIR_NAME = "runs" ERROR_CODE = 512 @@ -819,30 +816,15 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun: # noqa: FBT0 Whether to ignore the cache. If ``true`` this will download and overwrite the run xml even if the requested run is already cached. - ignore_cache - Returns ------- run : OpenMLRun Run corresponding to ID, fetched from the server. """ - run_dir = Path(openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id)) - run_file = run_dir / "description.xml" - - run_dir.mkdir(parents=True, exist_ok=True) - - try: - if not ignore_cache: - return _get_cached_run(run_id) - - raise OpenMLCacheException(message="dummy") - - except OpenMLCacheException: - run_xml = openml._api_calls._perform_api_call(f"run/{run_id}", "get") - with run_file.open("w", encoding="utf8") as fh: - fh.write(run_xml) - - return _create_run_from_xml(run_xml) + return openml._backend.run.get( + run_id, + reset_cache=ignore_cache, + ) def _create_run_from_xml(xml: str, from_server: bool = True) -> OpenMLRun: # noqa: PLR0915, PLR0912, C901, FBT002 @@ -1032,17 +1014,6 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None): # type: ignore ) -def _get_cached_run(run_id: int) -> OpenMLRun: - """Load a run from the cache.""" - run_cache_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id) - run_file = run_cache_dir / "description.xml" - try: - with run_file.open(encoding="utf8") as fh: - return _create_run_from_xml(xml=fh.read()) - except OSError as e: - raise OpenMLCacheException(f"Run file for run id {run_id} not cached") from e - - def list_runs( # noqa: PLR0913 offset: int | None = None, size: int | None = None, @@ -1103,8 +1074,8 @@ def list_runs( # noqa: PLR0913 raise TypeError("uploader must be of type list.") listing_call = partial( - _list_runs, - id=id, + openml._backend.run.list, + ids=id, task=task, setup=setup, flow=flow, @@ -1121,125 +1092,6 @@ def list_runs( # noqa: PLR0913 return pd.concat(batches) -def _list_runs( # noqa: PLR0913, C901 - limit: int, - offset: int, - *, - id: list | None = None, # noqa: A002 - task: list | None = None, - setup: list | None = None, - flow: list | None = None, - uploader: list | None = None, - study: int | None = None, - tag: str | None = None, - display_errors: bool = False, - task_type: TaskType | int | None = None, -) -> pd.DataFrame: - """ - Perform API call `/run/list/{filters}' - ` - - Parameters - ---------- - The arguments that are lists are separated from the single value - ones which are put into the kwargs. - display_errors is also separated from the kwargs since it has a - default value. - - id : list, optional - - task : list, optional - - setup: list, optional - - flow : list, optional - - tag: str, optional - - uploader : list, optional - - study : int, optional - - display_errors : bool, optional (default=None) - Whether to list runs which have an error (for example a missing - prediction file). - - task_type : str, optional - - Returns - ------- - dict, or dataframe - List of found runs. - """ - api_call = "run/list" - if limit is not None: - api_call += f"/limit/{limit}" - if offset is not None: - api_call += f"/offset/{offset}" - if id is not None: - api_call += f"/run/{','.join([str(int(i)) for i in id])}" - if task is not None: - api_call += f"/task/{','.join([str(int(i)) for i in task])}" - if setup is not None: - api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" - if flow is not None: - api_call += f"/flow/{','.join([str(int(i)) for i in flow])}" - if uploader is not None: - api_call += f"/uploader/{','.join([str(int(i)) for i in uploader])}" - if study is not None: - api_call += f"/study/{study}" - if display_errors: - api_call += "/show_errors/true" - if tag is not None: - api_call += f"/tag/{tag}" - if task_type is not None: - tvalue = task_type.value if isinstance(task_type, TaskType) else task_type - api_call += f"/task_type/{tvalue}" - return __list_runs(api_call=api_call) - - -def __list_runs(api_call: str) -> pd.DataFrame: - """Helper function to parse API calls which are lists of runs""" - xml_string = openml._api_calls._perform_api_call(api_call, "get") - runs_dict = xmltodict.parse(xml_string, force_list=("oml:run",)) - # Minimalistic check if the XML is useful - if "oml:runs" not in runs_dict: - raise ValueError(f'Error in return XML, does not contain "oml:runs": {runs_dict}') - - if "@xmlns:oml" not in runs_dict["oml:runs"]: - raise ValueError( - f'Error in return XML, does not contain "oml:runs"/@xmlns:oml: {runs_dict}' - ) - - if runs_dict["oml:runs"]["@xmlns:oml"] != "http://openml.org/openml": - raise ValueError( - "Error in return XML, value of " - '"oml:runs"/@xmlns:oml is not ' - f'"http://openml.org/openml": {runs_dict}', - ) - - if not isinstance(runs_dict["oml:runs"]["oml:run"], list): - raise TypeError( - f"Expected runs_dict['oml:runs']['oml:run'] to be a list, " - f"got {type(runs_dict['oml:runs']['oml:run']).__name__}" - ) - - runs = { - int(r["oml:run_id"]): { - "run_id": int(r["oml:run_id"]), - "task_id": int(r["oml:task_id"]), - "setup_id": int(r["oml:setup_id"]), - "flow_id": int(r["oml:flow_id"]), - "uploader": int(r["oml:uploader"]), - "task_type": TaskType(int(r["oml:task_type_id"])), - "upload_time": str(r["oml:upload_time"]), - "error_message": str((r["oml:error_message"]) or ""), - } - for r in runs_dict["oml:runs"]["oml:run"] - } - return pd.DataFrame.from_dict(runs, orient="index") - - def format_prediction( # noqa: PLR0913 task: OpenMLSupervisedTask, repeat: int, @@ -1325,4 +1177,4 @@ def delete_run(run_id: int) -> bool: bool True if the deletion was successful. False otherwise. """ - return openml.utils._delete_entity("run", run_id) + return openml._backend.run.delete(run_id) diff --git a/openml/runs/run.py b/openml/runs/run.py index 086e9c046..5b3d5572c 100644 --- a/openml/runs/run.py +++ b/openml/runs/run.py @@ -16,7 +16,6 @@ import pandas as pd import openml -import openml._api_calls from openml.base import OpenMLBase from openml.exceptions import PyOpenMLError from openml.flows import OpenMLFlow, get_flow @@ -154,10 +153,11 @@ def __init__( # noqa: PLR0913 def predictions(self) -> pd.DataFrame: """Return a DataFrame with predictions for this run""" if self._predictions is None: + arff_dict: dict[str, Any] if self.data_content: arff_dict = self._generate_arff_dict() elif self.predictions_url: - arff_text = openml._api_calls._download_text_file(self.predictions_url) + arff_text = openml._backend.run.download_text_file(self.predictions_url) arff_dict = arff.loads(arff_text) else: raise RuntimeError("Run has no predictions.") @@ -343,6 +343,37 @@ def from_filesystem(cls, directory: str | Path, expect_model: bool = True) -> Op return run + def publish(self) -> OpenMLRun: + """Publish the run object on the OpenML server.""" + file_elements = self._get_file_elements() + + if "description" not in file_elements: + file_elements["description"] = self._to_xml() + + result = openml._backend.run.publish(path="run", files=file_elements) + self.run_id = result + return self + + def push_tag(self, tag: str) -> None: + """Push a tag for this run on the OpenML server.""" + if self.run_id is None: + raise openml.exceptions.ObjectNotPublishedError( + "Cannot tag a run that has not been published yet." + " Please publish the run first before being able to tag it.", + ) + + openml._backend.run.tag(self.run_id, tag) + + def remove_tag(self, tag: str) -> None: + """Remove a tag for this run on the OpenML server.""" + if self.run_id is None: + raise openml.exceptions.ObjectNotPublishedError( + "Cannot untag a run that has not been published yet." + " Please publish the run first before being able to untag it.", + ) + + openml._backend.run.untag(self.run_id, tag) + def to_filesystem( self, directory: str | Path, @@ -494,15 +525,16 @@ def get_metric_fn(self, sklearn_fn: Callable, kwargs: dict | None = None) -> np. metric results """ kwargs = kwargs if kwargs else {} + predictions_arff: dict[str, Any] if self.data_content is not None and self.task_id is not None: predictions_arff = self._generate_arff_dict() elif (self.output_files is not None) and ("predictions" in self.output_files): - predictions_file_url = openml._api_calls._file_id_to_url( + predictions_file_url = openml._backend.run.file_id_to_url( self.output_files["predictions"], "predictions.arff", ) - response = openml._api_calls._download_text_file(predictions_file_url) - predictions_arff = arff.loads(response) + predictions_text = openml._backend.run.download_text_file(predictions_file_url) + predictions_arff = arff.loads(predictions_text) # TODO: make this a stream reader else: raise ValueError( diff --git a/tests/conftest.py b/tests/conftest.py index 1359e6247..580b26f64 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -205,7 +205,7 @@ def _expected_static_cache_state(root_dir: Path) -> list[Path]: _c_root_dir = root_dir / "org" / "openml" / "test" res_paths = [root_dir, _c_root_dir] - for _d in ["datasets", "tasks", "runs"]: + for _d in ["datasets", "tasks"]: res_paths.append(_c_root_dir / _d) for _id in ["-1", "2"]: @@ -220,7 +220,6 @@ def _expected_static_cache_state(root_dir: Path) -> list[Path]: ) res_paths.append(_c_root_dir / "datasets" / "30" / "dataset_30.pq") - res_paths.append(_c_root_dir / "runs" / "1" / "description.xml") for _id in ["1", "3", "1882"]: tmp_p = _c_root_dir / "tasks" / _id @@ -235,6 +234,9 @@ def _expected_static_cache_state(root_dir: Path) -> list[Path]: _c_root_dir / "api" / "v1" / "xml" / "setup", _c_root_dir / "api" / "v1" / "xml" / "setup" / "1", _c_root_dir / "api" / "v1" / "xml" / "setup" / "1" / "body.xml", + _c_root_dir / "api" / "v1" / "xml" / "run", + _c_root_dir / "api" / "v1" / "xml" / "run" / "1", + _c_root_dir / "api" / "v1" / "xml" / "run" / "1" / "body.xml", ]) return res_paths diff --git a/tests/files/org/openml/test/runs/1/description.xml b/tests/files/org/openml/test/api/v1/xml/run/1/body.xml similarity index 100% rename from tests/files/org/openml/test/runs/1/description.xml rename to tests/files/org/openml/test/api/v1/xml/run/1/body.xml diff --git a/tests/test_api/test_run.py b/tests/test_api/test_run.py new file mode 100644 index 000000000..45edce4f3 --- /dev/null +++ b/tests/test_api/test_run.py @@ -0,0 +1,122 @@ +# License: BSD 3-Clause +from __future__ import annotations + +from unittest.mock import patch + +import pytest +from requests import Response, Session + +import openml +from openml._api import RunV1API, RunV2API +from openml.exceptions import OpenMLNotSupportedError +from openml.runs.run import OpenMLRun + + +@pytest.fixture +def run_v1(http_client_v1, minio_client) -> RunV1API: + return RunV1API(http=http_client_v1, minio=minio_client) + + +@pytest.fixture +def run_v2(http_client_v2, minio_client) -> RunV2API: + return RunV2API(http=http_client_v2, minio=minio_client) + + +def _assert_run_shape(run: OpenMLRun) -> None: + assert isinstance(run, OpenMLRun) + assert isinstance(run.run_id, int) + assert run.run_id > 0 + assert isinstance(run.task_id, int) + + +@pytest.mark.test_server() +def test_run_v1_get(run_v1, test_files_directory): + openml.config.set_root_cache_directory(test_files_directory) + run = run_v1.get(run_id=1) + _assert_run_shape(run) + + +@pytest.mark.test_server() +def test_run_v1_list(run_v1): + limit = 5 + runs_df = run_v1.list(limit=limit, offset=0) + + assert len(runs_df) == limit + assert "run_id" in runs_df.columns + assert "task_id" in runs_df.columns + assert "setup_id" in runs_df.columns + assert "flow_id" in runs_df.columns + + +def test_run_v1_publish_mocked(run_v1, test_apikey_v1): + files = {"description": ""} + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = ( + '\n' + " 456\n" + "\n" + ).encode("utf-8") + + result = run_v1.publish(path="run", files=files) + + assert result == 456 + mock_request.assert_called_once_with( + method="POST", + url=openml.config.server + "run", + params={}, + data={"api_key": test_apikey_v1}, + headers=openml.config._HEADERS, + files=files, + ) + + +def test_run_v1_delete_mocked(run_v1, test_apikey_v1): + run_id = 456 + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = ( + '\n' + " 456\n" + "\n" + ).encode("utf-8") + + result = run_v1.delete(run_id) + + assert result is True + mock_request.assert_called_once_with( + method="DELETE", + url=openml.config.server + f"run/{run_id}", + params={"api_key": test_apikey_v1}, + data={}, + headers=openml.config._HEADERS, + files=None, + ) + + +def test_run_v2_get_not_supported(run_v2): + with pytest.raises( + OpenMLNotSupportedError, + match="RunV2API: v2 API does not support `get` for resource `run`", + ): + run_v2.get(run_id=1) + + +def test_run_v2_list_not_supported(run_v2): + with pytest.raises( + OpenMLNotSupportedError, + match="RunV2API: v2 API does not support `list` for resource `run`", + ): + run_v2.list(limit=5, offset=0) + + +def test_run_v2_publish_not_supported(run_v2): + with pytest.raises( + OpenMLNotSupportedError, + match="RunV2API: v2 API does not support `publish` for resource `run`", + ): + run_v2.publish(path="run", files={"description": ""}) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 3728e0d78..e497f303c 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -40,7 +40,6 @@ OpenMLNotAuthorizedError, OpenMLServerException, ) -#from openml.extensions.sklearn import cat, cont from openml.runs.functions import ( _run_task_get_arffcontent, delete_run, @@ -1659,14 +1658,19 @@ def test_run_on_dataset_with_missing_labels_array(self): assert len(row) == 12 @pytest.mark.test_server() - def test_get_cached_run(self): + @mock.patch.object(requests.Session, "request") + def test_get_cached_run(self, mock_request): openml.config.set_root_cache_directory(self.static_cache_dir) - openml.runs.functions._get_cached_run(1) + mock_request.side_effect = Exception("Mocked Exception") + openml.runs.get_run(1) - def test_get_uncached_run(self): + @pytest.mark.test_server() + @mock.patch.object(requests.Session, "request") + def test_get_uncached_run(self, mock_request): openml.config.set_root_cache_directory(self.static_cache_dir) - with pytest.raises(openml.exceptions.OpenMLCacheException): - openml.runs.functions._get_cached_run(10) + mock_request.side_effect = Exception("Mocked Exception") + with pytest.raises(Exception, match="Mocked Exception"): + openml.runs.get_run(10) @pytest.mark.sklearn() @pytest.mark.test_server() @@ -1812,10 +1816,10 @@ def test_initialize_model_from_run_nonstrict(self): _ = openml.runs.initialize_model_from_run(run_id=1, strict_version=False) -@mock.patch.object(requests.Session, "delete") -def test_delete_run_not_owned(mock_delete, test_files_directory, test_server_v1, test_apikey_v1): +@mock.patch.object(requests.Session, "request") +def test_delete_run_not_owned(mock_request, test_files_directory, test_server_v1, test_apikey_v1): content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml" - mock_delete.return_value = create_request_response( + mock_request.return_value = create_request_response( status_code=412, content_filepath=content_file, ) @@ -1827,14 +1831,15 @@ def test_delete_run_not_owned(mock_delete, test_files_directory, test_server_v1, openml.runs.delete_run(40_000) run_url = test_server_v1 + "run/40000" - assert run_url == mock_delete.call_args.args[0] - assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key") + assert run_url == mock_request.call_args.kwargs.get("url") + assert "DELETE" == mock_request.call_args.kwargs.get("method") + assert test_apikey_v1 == mock_request.call_args.kwargs.get("params", {}).get("api_key") -@mock.patch.object(requests.Session, "delete") -def test_delete_run_success(mock_delete, test_files_directory, test_server_v1, test_apikey_v1): +@mock.patch.object(requests.Session, "request") +def test_delete_run_success(mock_request, test_files_directory, test_server_v1, test_apikey_v1): content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml" - mock_delete.return_value = create_request_response( + mock_request.return_value = create_request_response( status_code=200, content_filepath=content_file, ) @@ -1843,14 +1848,15 @@ def test_delete_run_success(mock_delete, test_files_directory, test_server_v1, t assert success run_url = test_server_v1 + "run/10591880" - assert run_url == mock_delete.call_args.args[0] - assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key") + assert run_url == mock_request.call_args.kwargs.get("url") + assert "DELETE" == mock_request.call_args.kwargs.get("method") + assert test_apikey_v1 == mock_request.call_args.kwargs.get("params", {}).get("api_key") -@mock.patch.object(requests.Session, "delete") -def test_delete_unknown_run(mock_delete, test_files_directory, test_server_v1, test_apikey_v1): +@mock.patch.object(requests.Session, "request") +def test_delete_unknown_run(mock_request, test_files_directory, test_server_v1, test_apikey_v1): content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml" - mock_delete.return_value = create_request_response( + mock_request.return_value = create_request_response( status_code=412, content_filepath=content_file, ) @@ -1862,8 +1868,9 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_server_v1, t openml.runs.delete_run(9_999_999) run_url = test_server_v1 + "run/9999999" - assert run_url == mock_delete.call_args.args[0] - assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key") + assert run_url == mock_request.call_args.kwargs.get("url") + assert "DELETE" == mock_request.call_args.kwargs.get("method") + assert test_apikey_v1 == mock_request.call_args.kwargs.get("params", {}).get("api_key") @pytest.mark.sklearn()