diff --git a/pyproject.toml b/pyproject.toml index 33d463030..aa92ed618 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-multipart>=0.0.16", "filelock", "psutil", - "gpuhunt==0.1.19", + "gpuhunt==0.1.20", "argcomplete>=3.5.0", "ignore-python>=0.2.0", "orjson", diff --git a/src/dstack/_internal/core/backends/aws/compute.py b/src/dstack/_internal/core/backends/aws/compute.py index cd472ae11..32fd221a3 100644 --- a/src/dstack/_internal/core/backends/aws/compute.py +++ b/src/dstack/_internal/core/backends/aws/compute.py @@ -1129,23 +1129,31 @@ def _get_vpc_id_subnets_ids_by_vpc_name_or_error( ) +_ON_DEMAND_QUOTA_CODES = { + "L-1216C47A": "Standard/OnDemand", + "L-417A185B": "P/OnDemand", + "L-DB2E81BA": "G/OnDemand", +} + + def _get_regions_to_quotas( session: boto3.Session, regions: List[str] ) -> Dict[str, Dict[str, int]]: - def get_region_quotas(client: botocore.client.BaseClient) -> Dict[str, int]: + def get_region_quotas(region_name: str, client: botocore.client.BaseClient) -> Dict[str, int]: region_quotas = {} - try: - for page in client.get_paginator("list_service_quotas").paginate(ServiceCode="ec2"): - for q in page["Quotas"]: - if "On-Demand" in q["QuotaName"]: - region_quotas[q["UsageMetric"]["MetricDimensions"]["Class"]] = q["Value"] - except botocore.exceptions.ClientError as e: - if len(e.args) > 0 and "TooManyRequestsException" in e.args[0]: - logger.warning( - "Failed to get quotas due to rate limits. Quotas won't be accounted for." - ) - else: - logger.exception(e) + for quota_code, quota_class in _ON_DEMAND_QUOTA_CODES.items(): + try: + resp = client.get_service_quota(ServiceCode="ec2", QuotaCode=quota_code) + region_quotas[quota_class] = resp["Quota"]["Value"] + except botocore.exceptions.ClientError as e: + if "TooManyRequestsException" in str(e): + logger.warning( + "Failed to get quota %s in %s due to rate limits", + quota_code, + region_name, + ) + else: + logger.exception(e) return region_quotas regions_to_quotas = {} @@ -1153,7 +1161,7 @@ def get_region_quotas(client: botocore.client.BaseClient) -> Dict[str, int]: future_to_region = {} for region in regions: future = executor.submit( - get_region_quotas, session.client("service-quotas", region_name=region) + get_region_quotas, region, session.client("service-quotas", region_name=region) ) future_to_region[future] = region for future in as_completed(future_to_region): diff --git a/src/dstack/_internal/server/background/scheduled_tasks/__init__.py b/src/dstack/_internal/server/background/scheduled_tasks/__init__.py index 53e4a46b0..0e929811e 100644 --- a/src/dstack/_internal/server/background/scheduled_tasks/__init__.py +++ b/src/dstack/_internal/server/background/scheduled_tasks/__init__.py @@ -18,6 +18,9 @@ collect_metrics, delete_metrics, ) +from dstack._internal.server.background.scheduled_tasks.offers_catalog import ( + preload_offers_catalog, +) from dstack._internal.server.background.scheduled_tasks.probes import process_probes from dstack._internal.server.background.scheduled_tasks.prometheus_metrics import ( collect_prometheus_metrics, @@ -36,8 +39,11 @@ def start_scheduled_tasks() -> AsyncIOScheduler: Start periodic tasks triggered by `apscheduler` at specific times/intervals. Suitable for tasks that run infrequently and don't need to lock rows for a long time. """ - # DateTrigger() to init gateways immediately. + # DateTrigger() to run one-time init tasks immediately. _scheduler.add_job(init_gateways_in_background, DateTrigger(), max_instances=1) + # Pre-load catalog offers both on server start and before catalog needs reload (15m). + _scheduler.add_job(preload_offers_catalog, DateTrigger(), max_instances=1) + _scheduler.add_job(preload_offers_catalog, IntervalTrigger(minutes=10), max_instances=1) _scheduler.add_job(process_probes, IntervalTrigger(seconds=3, jitter=1)) _scheduler.add_job(collect_metrics, IntervalTrigger(seconds=10), max_instances=1) _scheduler.add_job(delete_metrics, IntervalTrigger(minutes=5), max_instances=1) diff --git a/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py b/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py new file mode 100644 index 000000000..9fafd21a3 --- /dev/null +++ b/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py @@ -0,0 +1,14 @@ +import gpuhunt + +from dstack._internal.utils.common import run_async +from dstack._internal.utils.logging import get_logger + +logger = get_logger(__name__) + + +async def preload_offers_catalog(): + """Pre-load the `gpuhunt` offers catalog so the get offer requests do not pay the catalog download cost.""" + logger.debug("Pre-loading offers catalog") + catalog = gpuhunt.default_catalog() + await run_async(catalog.load) + logger.debug("Pre-loaded offers catalog")