diff --git a/pyproject.toml b/pyproject.toml
index 33d463030..aa92ed618 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
     "python-multipart>=0.0.16",
     "filelock",
     "psutil",
-    "gpuhunt==0.1.19",
+    "gpuhunt==0.1.20",
     "argcomplete>=3.5.0",
     "ignore-python>=0.2.0",
     "orjson",
diff --git a/src/dstack/_internal/core/backends/aws/compute.py b/src/dstack/_internal/core/backends/aws/compute.py
index cd472ae11..32fd221a3 100644
--- a/src/dstack/_internal/core/backends/aws/compute.py
+++ b/src/dstack/_internal/core/backends/aws/compute.py
@@ -1129,23 +1129,31 @@ def _get_vpc_id_subnets_ids_by_vpc_name_or_error(
     )
 
 
+_ON_DEMAND_QUOTA_CODES = {
+    "L-1216C47A": "Standard/OnDemand",
+    "L-417A185B": "P/OnDemand",
+    "L-DB2E81BA": "G/OnDemand",
+}
+
+
 def _get_regions_to_quotas(
     session: boto3.Session, regions: List[str]
 ) -> Dict[str, Dict[str, int]]:
-    def get_region_quotas(client: botocore.client.BaseClient) -> Dict[str, int]:
+    def get_region_quotas(region_name: str, client: botocore.client.BaseClient) -> Dict[str, int]:
         region_quotas = {}
-        try:
-            for page in client.get_paginator("list_service_quotas").paginate(ServiceCode="ec2"):
-                for q in page["Quotas"]:
-                    if "On-Demand" in q["QuotaName"]:
-                        region_quotas[q["UsageMetric"]["MetricDimensions"]["Class"]] = q["Value"]
-        except botocore.exceptions.ClientError as e:
-            if len(e.args) > 0 and "TooManyRequestsException" in e.args[0]:
-                logger.warning(
-                    "Failed to get quotas due to rate limits. Quotas won't be accounted for."
-                )
-            else:
-                logger.exception(e)
+        for quota_code, quota_class in _ON_DEMAND_QUOTA_CODES.items():
+            try:
+                resp = client.get_service_quota(ServiceCode="ec2", QuotaCode=quota_code)
+                region_quotas[quota_class] = resp["Quota"]["Value"]
+            except botocore.exceptions.ClientError as e:
+                if "TooManyRequestsException" in str(e):
+                    logger.warning(
+                        "Failed to get quota %s in %s due to rate limits",
+                        quota_code,
+                        region_name,
+                    )
+                else:
+                    logger.exception(e)
         return region_quotas
 
     regions_to_quotas = {}
@@ -1153,7 +1161,7 @@ def get_region_quotas(client: botocore.client.BaseClient) -> Dict[str, int]:
         future_to_region = {}
         for region in regions:
             future = executor.submit(
-                get_region_quotas, session.client("service-quotas", region_name=region)
+                get_region_quotas, region, session.client("service-quotas", region_name=region)
             )
             future_to_region[future] = region
         for future in as_completed(future_to_region):
diff --git a/src/dstack/_internal/server/background/scheduled_tasks/__init__.py b/src/dstack/_internal/server/background/scheduled_tasks/__init__.py
index 53e4a46b0..0e929811e 100644
--- a/src/dstack/_internal/server/background/scheduled_tasks/__init__.py
+++ b/src/dstack/_internal/server/background/scheduled_tasks/__init__.py
@@ -18,6 +18,9 @@
     collect_metrics,
     delete_metrics,
 )
+from dstack._internal.server.background.scheduled_tasks.offers_catalog import (
+    preload_offers_catalog,
+)
 from dstack._internal.server.background.scheduled_tasks.probes import process_probes
 from dstack._internal.server.background.scheduled_tasks.prometheus_metrics import (
     collect_prometheus_metrics,
@@ -36,8 +39,11 @@ def start_scheduled_tasks() -> AsyncIOScheduler:
     Start periodic tasks triggered by `apscheduler` at specific times/intervals.
     Suitable for tasks that run infrequently and don't need to lock rows for a long time.
     """
-    # DateTrigger() to init gateways immediately.
+    # DateTrigger() to run one-time init tasks immediately.
     _scheduler.add_job(init_gateways_in_background, DateTrigger(), max_instances=1)
+    # Pre-load catalog offers both on server start and before catalog needs reload (15m).
+    _scheduler.add_job(preload_offers_catalog, DateTrigger(), max_instances=1)
+    _scheduler.add_job(preload_offers_catalog, IntervalTrigger(minutes=10), max_instances=1)
     _scheduler.add_job(process_probes, IntervalTrigger(seconds=3, jitter=1))
     _scheduler.add_job(collect_metrics, IntervalTrigger(seconds=10), max_instances=1)
     _scheduler.add_job(delete_metrics, IntervalTrigger(minutes=5), max_instances=1)
diff --git a/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py b/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py
new file mode 100644
index 000000000..9fafd21a3
--- /dev/null
+++ b/src/dstack/_internal/server/background/scheduled_tasks/offers_catalog.py
@@ -0,0 +1,14 @@
+import gpuhunt
+
+from dstack._internal.utils.common import run_async
+from dstack._internal.utils.logging import get_logger
+
+logger = get_logger(__name__)
+
+
+async def preload_offers_catalog():
+    """Pre-load the `gpuhunt` offers catalog so the get offer requests do not pay the catalog download cost."""
+    logger.debug("Pre-loading offers catalog")
+    catalog = gpuhunt.default_catalog()
+    await run_async(catalog.load)
+    logger.debug("Pre-loaded offers catalog")