From 36f539d3481ba1ee628cb1727edf9f684c7679b6 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Wed, 15 Apr 2026 16:34:13 +0500 Subject: [PATCH 1/4] Replace AWS list_service_quotas with get_service_quota --- .../_internal/core/backends/aws/compute.py | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/dstack/_internal/core/backends/aws/compute.py b/src/dstack/_internal/core/backends/aws/compute.py index cd472ae11..32fd221a3 100644 --- a/src/dstack/_internal/core/backends/aws/compute.py +++ b/src/dstack/_internal/core/backends/aws/compute.py @@ -1129,23 +1129,31 @@ def _get_vpc_id_subnets_ids_by_vpc_name_or_error( ) +_ON_DEMAND_QUOTA_CODES = { + "L-1216C47A": "Standard/OnDemand", + "L-417A185B": "P/OnDemand", + "L-DB2E81BA": "G/OnDemand", +} + + def _get_regions_to_quotas( session: boto3.Session, regions: List[str] ) -> Dict[str, Dict[str, int]]: - def get_region_quotas(client: botocore.client.BaseClient) -> Dict[str, int]: + def get_region_quotas(region_name: str, client: botocore.client.BaseClient) -> Dict[str, int]: region_quotas = {} - try: - for page in client.get_paginator("list_service_quotas").paginate(ServiceCode="ec2"): - for q in page["Quotas"]: - if "On-Demand" in q["QuotaName"]: - region_quotas[q["UsageMetric"]["MetricDimensions"]["Class"]] = q["Value"] - except botocore.exceptions.ClientError as e: - if len(e.args) > 0 and "TooManyRequestsException" in e.args[0]: - logger.warning( - "Failed to get quotas due to rate limits. Quotas won't be accounted for." - ) - else: - logger.exception(e) + for quota_code, quota_class in _ON_DEMAND_QUOTA_CODES.items(): + try: + resp = client.get_service_quota(ServiceCode="ec2", QuotaCode=quota_code) + region_quotas[quota_class] = resp["Quota"]["Value"] + except botocore.exceptions.ClientError as e: + if "TooManyRequestsException" in str(e): + logger.warning( + "Failed to get quota %s in %s due to rate limits", + quota_code, + region_name, + ) + else: + logger.exception(e) return region_quotas regions_to_quotas = {} @@ -1153,7 +1161,7 @@ def get_region_quotas(client: botocore.client.BaseClient) -> Dict[str, int]: future_to_region = {} for region in regions: future = executor.submit( - get_region_quotas, session.client("service-quotas", region_name=region) + get_region_quotas, region, session.client("service-quotas", region_name=region) ) future_to_region[future] = region for future in as_completed(future_to_region): From 2b942c62f8d89dff517656d9f27fa3c10a26895b Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Wed, 15 Apr 2026 16:43:41 +0500 Subject: [PATCH 2/4] Add preload_catalog task --- .../server/background/scheduled_tasks/__init__.py | 6 +++++- .../background/scheduled_tasks/offers_catalog.py | 13 +++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py diff --git a/src/dstack/_internal/server/background/scheduled_tasks/__init__.py b/src/dstack/_internal/server/background/scheduled_tasks/__init__.py index 53e4a46b0..85f55e447 100644 --- a/src/dstack/_internal/server/background/scheduled_tasks/__init__.py +++ b/src/dstack/_internal/server/background/scheduled_tasks/__init__.py @@ -18,6 +18,9 @@ collect_metrics, delete_metrics, ) +from dstack._internal.server.background.scheduled_tasks.offers_catalog import ( + preload_offers_catalog, +) from dstack._internal.server.background.scheduled_tasks.probes import process_probes from dstack._internal.server.background.scheduled_tasks.prometheus_metrics import ( collect_prometheus_metrics, @@ -36,8 +39,9 @@ def start_scheduled_tasks() -> AsyncIOScheduler: Start periodic tasks triggered by `apscheduler` at specific times/intervals. Suitable for tasks that run infrequently and don't need to lock rows for a long time. """ - # DateTrigger() to init gateways immediately. + # DateTrigger() to run one-time init tasks immediately. _scheduler.add_job(init_gateways_in_background, DateTrigger(), max_instances=1) + _scheduler.add_job(preload_offers_catalog, DateTrigger(), max_instances=1) _scheduler.add_job(process_probes, IntervalTrigger(seconds=3, jitter=1)) _scheduler.add_job(collect_metrics, IntervalTrigger(seconds=10), max_instances=1) _scheduler.add_job(delete_metrics, IntervalTrigger(minutes=5), max_instances=1) diff --git a/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py b/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py new file mode 100644 index 000000000..8d9d35266 --- /dev/null +++ b/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py @@ -0,0 +1,13 @@ +import gpuhunt + +from dstack._internal.utils.common import run_async +from dstack._internal.utils.logging import get_logger + +logger = get_logger(__name__) + + +async def preload_offers_catalog(): + """Pre-load the `gpuhunt` offers catalog so the first offer request doesn't pay the S3 download cost.""" + logger.debug("Pre-loading `gpuhunt` offers catalog") + await run_async(gpuhunt.default_catalog) + logger.debug("`gpuhunt` offers catalog pre-loaded") From d4aff1a5806ce565f790fa3f369364007f279f37 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 16 Apr 2026 10:10:38 +0500 Subject: [PATCH 3/4] Pre-load offers catalog periodically --- .../server/background/scheduled_tasks/__init__.py | 2 ++ .../server/background/scheduled_tasks/offers_catalog.py | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/dstack/_internal/server/background/scheduled_tasks/__init__.py b/src/dstack/_internal/server/background/scheduled_tasks/__init__.py index 85f55e447..0e929811e 100644 --- a/src/dstack/_internal/server/background/scheduled_tasks/__init__.py +++ b/src/dstack/_internal/server/background/scheduled_tasks/__init__.py @@ -41,7 +41,9 @@ def start_scheduled_tasks() -> AsyncIOScheduler: """ # DateTrigger() to run one-time init tasks immediately. _scheduler.add_job(init_gateways_in_background, DateTrigger(), max_instances=1) + # Pre-load catalog offers both on server start and before catalog needs reload (15m). _scheduler.add_job(preload_offers_catalog, DateTrigger(), max_instances=1) + _scheduler.add_job(preload_offers_catalog, IntervalTrigger(minutes=10), max_instances=1) _scheduler.add_job(process_probes, IntervalTrigger(seconds=3, jitter=1)) _scheduler.add_job(collect_metrics, IntervalTrigger(seconds=10), max_instances=1) _scheduler.add_job(delete_metrics, IntervalTrigger(minutes=5), max_instances=1) diff --git a/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py b/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py index 8d9d35266..9fafd21a3 100644 --- a/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py +++ b/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py @@ -7,7 +7,8 @@ async def preload_offers_catalog(): - """Pre-load the `gpuhunt` offers catalog so the first offer request doesn't pay the S3 download cost.""" - logger.debug("Pre-loading `gpuhunt` offers catalog") - await run_async(gpuhunt.default_catalog) - logger.debug("`gpuhunt` offers catalog pre-loaded") + """Pre-load the `gpuhunt` offers catalog so the get offer requests do not pay the catalog download cost.""" + logger.debug("Pre-loading offers catalog") + catalog = gpuhunt.default_catalog() + await run_async(catalog.load) + logger.debug("Pre-loaded offers catalog") From dfa2f6fca46178bf8dfff57f3908c57837d524d1 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 16 Apr 2026 11:24:25 +0500 Subject: [PATCH 4/4] Bump gpuhunt --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 33d463030..aa92ed618 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-multipart>=0.0.16", "filelock", "psutil", - "gpuhunt==0.1.19", + "gpuhunt==0.1.20", "argcomplete>=3.5.0", "ignore-python>=0.2.0", "orjson",