dstackai · r4victor · Apr 24, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/contributing/RUNS-AND-JOBS.md b/contributing/RUNS-AND-JOBS.md
@@ -61,7 +61,9 @@ Services' run lifecycle has some modifications:
 ## Job's Lifecycle
 
 - STEP 1: A newly submitted job has status `SUBMITTED`. It is not assigned to any instance yet.
-- STEP 2: `JobSubmittedPipeline` tries to assign an existing instance or provision new capacity.
+- STEP 2: `JobSubmittedPipeline` assigns the job in two phases:
+  - Assignment: claim an existing instance or reserve a *placeholder* `InstanceModel`. Placeholders are `PENDING` instances that reserve an `instance_num` and a `nodes.max` slot. `InstancePipeline` ignores them.
+  - Provisioning: reuse the existing instance, or cloud-provision and promote the placeholder to `PROVISIONING`.
   - On success, the job becomes `PROVISIONING`.
   - On failure, the job becomes `TERMINATING`. `JobTerminatingPipeline` later assigns the final failed status.
 - STEP 3: `JobRunningPipeline` processes `PROVISIONING`, `PULLING`, and `RUNNING` jobs.

diff --git a/src/dstack/_internal/server/background/pipeline_tasks/fleets.py b/src/dstack/_internal/server/background/pipeline_tasks/fleets.py
@@ -49,7 +49,10 @@
     is_fleet_empty,
     is_fleet_in_use,
 )
-from dstack._internal.server.services.instances import instance_matches_constraints
+from dstack._internal.server.services.instances import (
+    instance_matches_constraints,
+    is_placeholder_instance,
+)
 from dstack._internal.server.services.locking import get_locker
 from dstack._internal.server.services.pipelines import PipelineHinterProtocol
 from dstack._internal.server.utils import sentry_utils
@@ -935,8 +938,12 @@ def _select_current_master_instance_id(
             return instance_model.id
 
     # Prefer existing surviving instances over freshly planned replacements to
-    # avoid election churn during min-nodes backfill.
+    # avoid election churn during min-nodes backfill. Skip placeholders —
+    # they have no JPD and cannot anchor cluster placement, so electing one
+    # just defers the real master decision.
     for instance_model in surviving_instance_models:
+        if is_placeholder_instance(instance_model):
+            continue
         if (
             _get_effective_instance_status(
                 instance_model,

diff --git a/src/dstack/_internal/server/background/pipeline_tasks/instances/__init__.py b/src/dstack/_internal/server/background/pipeline_tasks/instances/__init__.py
@@ -179,6 +179,13 @@ async def fetch(self, limit: int) -> list[InstancePipelineItem]:
                                 InstanceModel.compute_group_id.is_not(None),
                             )
                         ),
+                        # Skip placeholder instances managed by JobSubmittedPipeline.
+                        not_(
+                            and_(
+                                InstanceModel.status == InstanceStatus.PENDING,
+                                InstanceModel.provisioning_job_id.is_not(None),
+                            )
+                        ),
                         InstanceModel.deleted == False,
                         or_(
                             # Process fast-moving instances (pending, provisioning, terminating)