diff --git a/core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java b/core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java index ffc67b628a7e..9e8282b16a80 100644 --- a/core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java +++ b/core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java @@ -29,6 +29,17 @@ public class BackupAnswer extends Answer { private Long virtualSize; private Map volumes; Boolean needsCleanup; + // Set by the NAS backup provider after a checkpoint/bitmap was created during this backup. + // The provider persists it in backup_details under NASBackupChainKeys.BITMAP_NAME. + private String bitmapCreated; + // Set when an incremental was requested but the agent had to fall back to a full + // (e.g. VM was stopped). Provider should record this backup as type=full. + private Boolean incrementalFallback; + // Set when the agent had to recreate the parent bitmap before this incremental + // (e.g. CloudStack rebuilt the domain XML on the previous VM start, losing bitmaps). + // The first incremental after a recreate is larger than usual; subsequent + // incrementals return to normal size. Informational — recorded in backup_details. + private String bitmapRecreated; public BackupAnswer(final Command command, final boolean success, final String details) { super(command, success, details); @@ -68,4 +79,28 @@ public Boolean getNeedsCleanup() { public void setNeedsCleanup(Boolean needsCleanup) { this.needsCleanup = needsCleanup; } + + public String getBitmapCreated() { + return bitmapCreated; + } + + public void setBitmapCreated(String bitmapCreated) { + this.bitmapCreated = bitmapCreated; + } + + public Boolean getIncrementalFallback() { + return incrementalFallback != null && incrementalFallback; + } + + public void setIncrementalFallback(Boolean incrementalFallback) { + this.incrementalFallback = incrementalFallback; + } + + public String getBitmapRecreated() { + return bitmapRecreated; + } + + public void setBitmapRecreated(String bitmapRecreated) { + this.bitmapRecreated = bitmapRecreated; + } } diff --git a/core/src/main/java/org/apache/cloudstack/backup/RebaseBackupCommand.java b/core/src/main/java/org/apache/cloudstack/backup/RebaseBackupCommand.java new file mode 100644 index 000000000000..e31114461263 --- /dev/null +++ b/core/src/main/java/org/apache/cloudstack/backup/RebaseBackupCommand.java @@ -0,0 +1,73 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package org.apache.cloudstack.backup; + +import com.cloud.agent.api.Command; +import com.cloud.agent.api.LogLevel; + +/** + * Tells the KVM agent to rebase a NAS backup qcow2 onto a new backing parent. Used by the + * NAS backup provider during chain repair when a middle incremental is being deleted: the + * immediate child must absorb the soon-to-be-deleted parent's blocks and then re-link to + * the grandparent. Both target and new-backing paths are NAS-mount-relative. + */ +public class RebaseBackupCommand extends Command { + private String targetPath; // mount-relative path of the qcow2 to repoint + private String newBackingPath; // mount-relative path of the new backing parent + private String backupRepoType; + private String backupRepoAddress; + @LogLevel(LogLevel.Log4jLevel.Off) + private String mountOptions; + + public RebaseBackupCommand(String targetPath, String newBackingPath, + String backupRepoType, String backupRepoAddress, String mountOptions) { + super(); + this.targetPath = targetPath; + this.newBackingPath = newBackingPath; + this.backupRepoType = backupRepoType; + this.backupRepoAddress = backupRepoAddress; + this.mountOptions = mountOptions; + } + + public String getTargetPath() { + return targetPath; + } + + public String getNewBackingPath() { + return newBackingPath; + } + + public String getBackupRepoType() { + return backupRepoType; + } + + public String getBackupRepoAddress() { + return backupRepoAddress; + } + + public String getMountOptions() { + return mountOptions == null ? "" : mountOptions; + } + + @Override + public boolean executeInSequence() { + return true; + } +} diff --git a/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java b/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java index 5402b6b24760..3f5b911bdb6e 100644 --- a/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java +++ b/core/src/main/java/org/apache/cloudstack/backup/TakeBackupCommand.java @@ -36,6 +36,12 @@ public class TakeBackupCommand extends Command { @LogLevel(LogLevel.Log4jLevel.Off) private String mountOptions; + // Incremental backup fields (NAS provider; null/empty for legacy full-only callers). + private String mode; // "full" or "incremental"; null => legacy behaviour (script default) + private String bitmapNew; // Checkpoint/bitmap name to create with this backup (timestamp-based) + private String bitmapParent; // Incremental: parent bitmap to read changes since + private String parentPath; // Incremental: parent backup file path on the mounted NAS (for qemu-img rebase) + public TakeBackupCommand(String vmName, String backupPath) { super(); this.vmName = vmName; @@ -106,6 +112,38 @@ public void setQuiesce(Boolean quiesce) { this.quiesce = quiesce; } + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public String getBitmapNew() { + return bitmapNew; + } + + public void setBitmapNew(String bitmapNew) { + this.bitmapNew = bitmapNew; + } + + public String getBitmapParent() { + return bitmapParent; + } + + public void setBitmapParent(String bitmapParent) { + this.bitmapParent = bitmapParent; + } + + public String getParentPath() { + return parentPath; + } + + public void setParentPath(String parentPath) { + this.parentPath = parentPath; + } + @Override public boolean executeInSequence() { return true; diff --git a/docs/rfcs/incremental-nas-backup.md b/docs/rfcs/incremental-nas-backup.md new file mode 100644 index 000000000000..aa2e55ff55bf --- /dev/null +++ b/docs/rfcs/incremental-nas-backup.md @@ -0,0 +1,406 @@ +# RFC: Incremental NAS Backup Support for KVM Hypervisor + +| Field | Value | +|---------------|--------------------------------------------| +| **Author** | James Peru, Xcobean Systems Limited | +| **Status** | Draft | +| **Created** | 2026-03-27 | +| **Target** | Apache CloudStack 4.23+ | +| **Component** | Backup & Recovery (NAS Backup Provider) | + +--- + +## Summary + +This RFC proposes adding incremental backup support to CloudStack's NAS backup provider for KVM hypervisors. By leveraging QEMU dirty bitmaps and libvirt's `backup-begin` API, CloudStack can track changed disk blocks between backups and export only the delta, reducing daily backup storage consumption by 80--95% and shortening backup windows from hours to minutes for large VMs. The feature is opt-in at the zone level, backward-compatible with existing full-backup behavior, and gracefully degrades on older QEMU/libvirt versions. + +--- + +## Motivation + +CloudStack's NAS backup provider currently performs a full disk copy every time a backup is taken. For a 500 GB VM with five daily backups retained, that amounts to 2.5 TB of storage consumed. At scale -- tens or hundreds of VMs -- this becomes a serious operational and financial burden. + +**Problems with the current approach:** + +1. **Storage waste.** Every backup is a full copy of the entire virtual disk, regardless of how little data actually changed since the last backup. +2. **Long backup windows.** Copying hundreds of gigabytes over NFS or SMB takes hours, increasing the risk of I/O contention on production workloads. +3. **Network bandwidth pressure.** Full-disk transfers saturate the storage network during backup windows, impacting other VMs on the same host. +4. **Uncompetitive feature set.** VMware (Changed Block Tracking / VADP), Proxmox Backup Server, and Veeam all support incremental backups natively. CloudStack's lack of incremental backup is a common complaint on the users@ mailing list and a blocker for adoption in environments with large VMs. + +**What incremental backup achieves:** + +- Only changed blocks are transferred and stored after the initial full backup. +- A typical daily incremental for a 500 GB VM with moderate write activity is 5--15 GB, a reduction of 97--99% compared to a full copy. +- Backup completes in minutes rather than hours. +- Retention of 30+ daily restore points becomes economically feasible. + +--- + +## Proposed Design + +### Backup Chain Model + +Incremental backups form a chain anchored by a periodic full backup: + +``` +Full (Day 0) -> Inc 1 (Day 1) -> Inc 2 (Day 2) -> ... -> Inc 6 (Day 6) -> Full (Day 7) -> ... +``` + +Restoring to any point in time requires the full backup plus every incremental up to the desired restore point. To bound restore complexity and protect against chain corruption, a new full backup is forced at a configurable interval. + +**Global settings (zone scope):** + +| Setting | Type | Default | Description | +|--------------------------------------|---------|---------|------------------------------------------------------| +| `nas.backup.incremental.enabled` | Boolean | `false` | Enable incremental backup for the zone | +| `nas.backup.full.interval` | Integer | `7` | Days between full backups | +| `nas.backup.incremental.max.chain` | Integer | `6` | Max incremental backups before forcing a new full | + +When `nas.backup.incremental.enabled` is `false` (the default), behavior is identical to today -- every backup is a full copy. Existing deployments are unaffected. + +--- + +### Technical Approach + +#### 1. Dirty Bitmap Tracking (QEMU Layer) + +QEMU 4.0 introduced persistent dirty bitmaps: per-disk bitmaps that record which blocks have been written since the bitmap was created. These bitmaps survive QEMU restarts (they are stored in the qcow2 image header) and are the foundation for incremental backup. + +**Lifecycle:** + +1. When incremental backup is enabled for a VM, the agent creates a persistent dirty bitmap on each virtual disk via QMP: + ```json + { + "execute": "block-dirty-bitmap-add", + "arguments": { + "node": "drive-virtio-disk0", + "name": "backup-20260327", + "persistent": true + } + } + ``` +2. QEMU automatically sets bits in this bitmap whenever the guest writes to a block. +3. At backup time, the bitmap tells the backup process exactly which blocks to read. +4. After a successful backup, a new bitmap is created for the next cycle and the old bitmap is optionally removed. + +#### 2. Backup Flow + +**Full backup (Day 0 or every `nas.backup.full.interval` days):** + +```bash +# 1. Export the entire disk to the NAS mount +qemu-img convert -f qcow2 -O qcow2 \ + /var/lib/libvirt/images/vm-disk.qcow2 \ + /mnt/nas/backups/vm-uuid/backup-full-20260327.qcow2 + +# 2. Create a new dirty bitmap to track changes from this point +virsh qemu-monitor-command $DOMAIN --hmp \ + 'block-dirty-bitmap-add drive-virtio-disk0 backup-20260327 persistent=true' +``` + +**Incremental backup (Day 1 through Day N):** + +```bash +# 1. Use libvirt backup-begin with incremental mode +# This exports only blocks dirty since bitmap "backup-20260327" +cat > /tmp/backup.xml <<'XML' + + + + + + + + backup-20260327 + +XML + +virsh backup-begin $DOMAIN /tmp/backup.xml + +# 2. Wait for completion +virsh domjobinfo $DOMAIN --completed + +# 3. Rotate bitmaps: remove old, create new +virsh qemu-monitor-command $DOMAIN --hmp \ + 'block-dirty-bitmap-remove drive-virtio-disk0 backup-20260327' +virsh qemu-monitor-command $DOMAIN --hmp \ + 'block-dirty-bitmap-add drive-virtio-disk0 backup-20260328 persistent=true' +``` + +**New full backup cycle (Day 7):** + +```bash +# Remove all existing bitmaps +virsh qemu-monitor-command $DOMAIN --hmp \ + 'block-dirty-bitmap-remove drive-virtio-disk0 backup-20260327' + +# Take a full backup (same as Day 0) +# Optionally prune expired chains from NAS +``` + +#### 3. Restore Flow + +Restoring from an incremental chain requires replaying the full backup plus all incrementals up to the target restore point. This is handled entirely within `nasbackup.sh` and is transparent to the management server and the end user. + +**Example: Restore to Day 3 (full + 3 incrementals):** + +```bash +# 1. Create a working copy from the full backup +cp /mnt/nas/backups/vm-uuid/backup-full-20260327.qcow2 /tmp/restored.qcow2 + +# 2. Apply each incremental in order using qemu-img rebase +# Each incremental is a thin qcow2 containing only changed blocks. +# Rebasing merges the incremental's blocks into the chain. +qemu-img rebase -u -b /tmp/restored.qcow2 \ + /mnt/nas/backups/vm-uuid/backup-inc-20260328.qcow2 + +qemu-img rebase -u -b /mnt/nas/backups/vm-uuid/backup-inc-20260328.qcow2 \ + /mnt/nas/backups/vm-uuid/backup-inc-20260329.qcow2 + +qemu-img rebase -u -b /mnt/nas/backups/vm-uuid/backup-inc-20260329.qcow2 \ + /mnt/nas/backups/vm-uuid/backup-inc-20260330.qcow2 + +# 3. Flatten the chain into a single image +qemu-img convert -f qcow2 -O qcow2 \ + /mnt/nas/backups/vm-uuid/backup-inc-20260330.qcow2 \ + /tmp/vm-restored-final.qcow2 + +# 4. Return the flattened image for CloudStack to import +``` + +An alternative approach uses `qemu-img commit` to merge each layer down. The implementation will benchmark both methods and choose the faster one for large images. + +#### 4. Database Schema Changes + +**Modified table: `backups`** + +| Column | Type | Description | +|--------------------|--------------|------------------------------------------------| +| `backup_type` | VARCHAR(16) | `FULL` or `INCREMENTAL` | +| `parent_backup_id` | BIGINT (FK) | For incremental: ID of the previous backup | +| `bitmap_name` | VARCHAR(128) | QEMU dirty bitmap identifier for this backup | +| `chain_id` | BIGINT (FK) | Links to the backup chain this backup belongs to | + +**New table: `backup_chains`** + +| Column | Type | Description | +|-------------------|-------------|------------------------------------------------| +| `id` | BIGINT (PK) | Auto-increment primary key | +| `vm_instance_id` | BIGINT (FK) | The VM this chain belongs to | +| `full_backup_id` | BIGINT (FK) | The full backup anchoring this chain | +| `state` | VARCHAR(16) | `ACTIVE`, `SEALED`, `EXPIRED` | +| `created` | DATETIME | When the chain was started | + +**Schema migration** will be provided as a Liquibase changeset, consistent with CloudStack's existing migration framework. The new columns are nullable to maintain backward compatibility with existing backup records. + +#### 5. Management Server Changes + +**`BackupManagerImpl` (orchestration):** + +- Before taking a backup, query the active chain for the VM. +- If no active chain exists, or the chain has reached `nas.backup.incremental.max.chain` incrementals, or `nas.backup.full.interval` days have elapsed since the last full backup: schedule a full backup and start a new chain. +- Otherwise: schedule an incremental backup linked to the previous backup in the chain. +- On backup failure: if the bitmap is suspected corrupt, mark the chain as `SEALED` and force a full backup on the next run. + +**`NASBackupProvider.takeBackup()`:** + +- Accept a new parameter `BackupType` (FULL or INCREMENTAL). +- For incremental: pass the parent backup's bitmap name and NAS path to the agent command. + +**`TakeBackupCommand` / `TakeBackupAnswer`:** + +- Add fields: `backupType` (FULL/INCREMENTAL), `parentBackupId`, `bitmapName`, `parentBackupPath`. +- The answer includes the actual size of the backup (important for incrementals, which are much smaller than the disk size). + +**`RestoreBackupCommand`:** + +- Add field: `backupChain` (ordered list of backup paths from full through the target incremental). +- The agent reconstructs the full image from the chain before importing. + +#### 6. KVM Agent Changes + +**`LibvirtTakeBackupCommandWrapper`:** + +- For `FULL` backups: existing behavior (qemu-img convert), plus create the initial dirty bitmap. +- For `INCREMENTAL` backups: use `virsh backup-begin` with `` XML, then rotate bitmaps. +- Pre-flight check: verify QEMU version >= 4.0 and libvirt version >= 6.0. If not met, fall back to full backup and log a warning. + +**`nasbackup.sh` enhancements:** + +- New flag `-i` for incremental mode. +- New flag `-p ` to specify the parent backup on the NAS. +- New flag `-b ` to specify which dirty bitmap to use. +- New subcommand `restore-chain` that accepts an ordered list of backup paths and produces a flattened image. + +**`LibvirtRestoreBackupCommandWrapper`:** + +- If the restore target is an incremental backup, request the full chain from the management server and pass it to `nasbackup.sh restore-chain`. + +#### 7. API Changes + +**Existing API: `createBackup`** + +No change to the API signature. The management server automatically decides full vs. incremental based on the zone configuration and the current chain state. Callers do not need to specify the backup type. + +**Existing API: `listBackups`** + +Response gains two new fields: +- `backuptype` (string): `Full` or `Incremental` +- `parentbackupid` (string): UUID of the parent backup (null for full backups) + +**Existing API: `restoreBackup`** + +No change. The management server resolves the full chain internally. + +#### 8. UI Changes + +- **Backup list view:** Add a "Type" column showing `Full` or `Incremental`, with a visual indicator (e.g., a small chain icon for incrementals). +- **Backup detail view:** Show the backup chain as a vertical timeline: full backup at the top, incrementals branching down, with sizes and timestamps. +- **Restore dialog:** When the user selects an incremental restore point, display a note: "This restore will replay N backups (total chain size: X GB)." +- **Backup schedule settings** (zone-level): Toggle for incremental backup, full backup interval slider, max chain length input. + +--- + +### Storage Savings Projections + +The following estimates assume a moderate write workload (2--5% of disk blocks changed per day), which is typical for application servers, databases with WAL, and file servers. + +| Scenario | Full Backups Only | With Incremental | Savings | +|-----------------------------------|-------------------|--------------------|------------| +| 500 GB VM, 7 daily backups | 3.5 TB | ~550 GB | **84%** | +| 1 TB VM, 30 daily backups | 30 TB | ~1.3 TB | **96%** | +| 100 VMs x 100 GB, weekly cycle | 70 TB/week | ~12 TB/week | **83%** | +| 50 VMs x 200 GB, 30-day retain | 300 TB | ~18 TB | **94%** | + +For environments with higher change rates (e.g., heavy database writes), incremental sizes will be larger, but savings still typically exceed 60%. + +--- + +### Requirements + +| Requirement | Minimum Version | Notes | +|-------------------------|----------------|-------------------------------------------------| +| QEMU | 4.0+ | Dirty bitmap support. Ubuntu 20.04+, RHEL 8+. | +| libvirt | 6.0+ | `virsh backup-begin` support. Ubuntu 22.04+, RHEL 8.3+. | +| CloudStack | 4.19+ | NAS backup provider must already be present. | +| NAS storage | NFS or SMB | No special requirements beyond existing NAS backup support. | + +**Graceful degradation:** If a KVM host runs QEMU < 4.0 or libvirt < 6.0, the agent will detect this at startup and report `incrementalBackupCapable=false` to the management server. Backups for VMs on that host will remain full-only, with a warning logged. No manual intervention is required. + +--- + +### Risks and Mitigations + +| Risk | Impact | Mitigation | +|------|--------|------------| +| **Bitmap corruption** (host crash during backup, QEMU bug) | Incremental backup produces an incomplete or incorrect image | Detect bitmap inconsistency via QMP query; force a new full backup and start a fresh chain. Data in previous full backup is unaffected. | +| **Chain too long** (missed full backup schedule) | Restore time increases; single corrupt link breaks the chain | Enforce `nas.backup.incremental.max.chain` hard limit. If exceeded, the next backup is automatically a full. | +| **Restore complexity** | User confusion about which backup to pick; longer restore for deep chains | Restore logic is fully automated in `nasbackup.sh`. The UI shows a single "Restore" button per restore point, with the chain replayed transparently. | +| **VM live migration during backup** | Dirty bitmap may be lost if migrated mid-backup | Check VM state before backup; abort and retry if migration is in progress. Bitmaps persist across clean shutdowns and restarts but not across live migration in older QEMU versions. For QEMU 6.2+, bitmaps survive migration. | +| **Backward compatibility** | Existing full-backup users should not be affected | Feature is disabled by default. No schema changes affect existing rows (new columns are nullable). Full-backup code path is unchanged. | +| **Disk space during restore** | Flattening a chain requires temporary disk space equal to the full disk size | Use the same scratch space already used for full backup restores. Document the requirement. | + +--- + +### Implementation Plan + +| Phase | Scope | Estimated Effort | +|-------|-------|------------------| +| **Phase 1** | Core incremental backup and restore in `nasbackup.sh` and KVM agent wrappers. Dirty bitmap lifecycle management. Manual testing with `virsh` and `qemu-img`. | 2--3 weeks | +| **Phase 2** | Management server changes: chain management, scheduling logic, global settings, database schema migration, API response changes. | 2 weeks | +| **Phase 3** | UI changes: backup type column, chain visualization, restore dialog enhancements, zone-level settings. | 1 week | +| **Phase 4** | Integration testing (full cycle: enable, backup, restore, disable, upgrade from older version). Edge case testing (host crash, bitmap loss, migration, mixed QEMU versions). Documentation. | 2 weeks | + +**Total estimated effort: 7--8 weeks.** + +We (Xcobean Systems) intend to implement this and submit PRs against the `main` branch. We would appreciate early design feedback before starting implementation to avoid rework. + +--- + +### Prior Art + +- **VMware VADP / Changed Block Tracking (CBT):** VMware's CBT is the industry-standard approach. A change tracking driver inside the hypervisor records changed blocks, and backup vendors query the CBT via the vSphere API. This RFC's approach is analogous, using QEMU dirty bitmaps as the CBT equivalent. + +- **Proxmox Backup Server (PBS):** PBS uses QEMU dirty bitmaps to implement incremental backups natively. Their implementation validates that the dirty bitmap approach is production-ready for KVM/QEMU environments. PBS has been stable since Proxmox VE 6.4 (2020). + +- **Veeam Backup & Replication:** Veeam uses a "reverse incremental" model where the most recent backup is always a synthetic full, and older backups are stored as reverse deltas. This simplifies restore (always restore from the latest full) at the cost of more I/O during backup. We chose the forward-incremental model for simplicity and because it aligns with how QEMU dirty bitmaps work natively. + +- **libvirt backup API:** The `virsh backup-begin` command and its underlying `virDomainBackupBegin()` API were specifically designed for this use case. The libvirt documentation includes examples of incremental backup using dirty bitmaps. See: https://libvirt.org/kbase/incremental-backup.html + +--- + +### About the Author + +Xcobean Systems Limited operates a production Apache CloudStack deployment providing IaaS to 50+ client VMs. We use the NAS backup provider daily and have contributed several improvements to it: + +- PR [#12805](https://github.com/apache/cloudstack/pull/12805) -- NAS backup NPE fix +- PR [#12822](https://github.com/apache/cloudstack/pull/12822) -- Backup restore improvements +- PR [#12826](https://github.com/apache/cloudstack/pull/12826) -- NAS backup script hardening +- PRs [#12843](https://github.com/apache/cloudstack/pull/12843)--[#12848](https://github.com/apache/cloudstack/pull/12848) -- Various NAS backup fixes +- PR [#12872](https://github.com/apache/cloudstack/pull/12872) -- Additional backup provider fixes + +We experience the storage and bandwidth cost of full-only backups firsthand and are motivated to solve this problem upstream rather than maintaining a fork. + +--- + +## Open Questions for Discussion + +We welcome feedback from the community on the following: + +1. **Interest level.** Is there sufficient demand for this feature to justify the implementation effort? We believe so based on mailing list threads, but would like confirmation. + +2. **Dirty bitmaps vs. alternatives.** Are there concerns about relying on QEMU dirty bitmaps? Alternative approaches include file-level deduplication on the NAS (less efficient, not hypervisor-aware) or `qemu-img compare` (slower, requires reading both images). + +3. **Target release.** Should this target CloudStack 4.23, or is a later release more appropriate given the scope? + +4. **Chain model.** We proposed forward-incremental with periodic full backups. Would the community prefer a different model (e.g., reverse-incremental like Veeam, or forever-incremental with periodic synthetic fulls)? + +5. **Scope of first PR.** Should we submit the entire feature as one PR, or break it into smaller PRs (e.g., nasbackup.sh changes first, then agent, then management server, then UI)? + +6. **Testing infrastructure.** We can test against our production environment (Ubuntu 22.04, QEMU 6.2, libvirt 8.0). Are there CI environments or community test labs available for broader testing (RHEL, Rocky, older QEMU versions)? + +--- + +*This RFC is posted as a GitHub Discussion to gather community feedback before implementation begins. Please share your thoughts, concerns, and suggestions.* + +--- + +## Appendix: Related Proposal — CloudStack Infrastructure Backup to NAS + +### Problem +CloudStack's NAS backup provider only backs up VM disks. The management server database, agent configurations, SSL certificates, and global settings are not backed up. If the management server fails, all metadata is lost unless someone manually configured mysqldump. + +### Proposed Solution +Add a new scheduled task that automatically backs up CloudStack infrastructure to the same NAS backup storage. + +**What gets backed up:** +| Component | Method | Size | +|-----------|--------|------| +| CloudStack database (`cloud`, `cloud_usage`) | mysqldump | ~50-500MB | +| Management server config (`/etc/cloudstack/management/`) | tar | <1MB | +| Agent configs (`/etc/cloudstack/agent/`) | tar | <1MB | +| SSL certificates and keystores | tar | <1MB | +| Global settings export | SQL dump | <1MB | + +**Configuration:** +- `nas.infra.backup.enabled` (global, default: false) +- `nas.infra.backup.schedule` (cron expression, default: `0 2 * * *` — daily at 2am) +- `nas.infra.backup.retention` (number of backups to keep, default: 7) + +**Implementation:** +- New class: `InfrastructureBackupTask` extending `ManagedContextRunnable` +- Runs on management server (not KVM agent) +- Uses existing NAS mount point from backup storage pool +- Creates timestamped directory: `infra-backup/2026-03-27/` +- Runs `mysqldump --single-transaction` for hot backup +- Tars config directories +- Manages retention (delete backups older than N days) +- Logs to CloudStack events for audit trail + +**Restore:** +- Manual via CLI: `mysql cloud < backup.sql` + extract config tars +- Future: one-click restore from UI + +This is a much simpler change (~200 lines of Java) that addresses a real operational gap. Could target 4.22.1 or 4.23. + diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupChainKeys.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupChainKeys.java new file mode 100644 index 000000000000..a3e811889114 --- /dev/null +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupChainKeys.java @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.backup; + +/** + * Keys used by the NAS backup provider when storing incremental-chain metadata + * in the existing {@code backup_details} key/value table. Stored here (not on + * the {@code backups} table) so other providers do not need a schema change to + * support their own incremental implementations. + */ +public final class NASBackupChainKeys { + + /** UUID of the parent backup (full or previous incremental). Empty for full backups. */ + public static final String PARENT_BACKUP_ID = "nas.parent_backup_id"; + + /** QEMU dirty-bitmap name created by this backup, used as the {@code } reference for the next one. */ + public static final String BITMAP_NAME = "nas.bitmap_name"; + + /** Identifier shared by every backup in the same chain (the full anchors a chain; its incrementals inherit the id). */ + public static final String CHAIN_ID = "nas.chain_id"; + + /** Position within the chain: 0 for the full, 1 for the first incremental, and so on. */ + public static final String CHAIN_POSITION = "nas.chain_position"; + + /** Backup type marker: {@value #TYPE_FULL} or {@value #TYPE_INCREMENTAL}. Mirrors {@code backups.type} for fast lookup without a join. */ + public static final String TYPE = "nas.type"; + + public static final String TYPE_FULL = "full"; + public static final String TYPE_INCREMENTAL = "incremental"; + + /** Set to the bitmap name when this incremental had to recreate its parent bitmap on the host (informational; this incremental is larger than usual). */ + public static final String BITMAP_RECREATED = "nas.bitmap_recreated"; + + private NASBackupChainKeys() { + } +} diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java index df9336026f4d..aca3dc108f27 100644 --- a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java @@ -48,6 +48,7 @@ import org.apache.cloudstack.backup.dao.BackupDao; +import org.apache.cloudstack.backup.dao.BackupDetailsDao; import org.apache.cloudstack.backup.dao.BackupRepositoryDao; import org.apache.cloudstack.engine.subsystem.api.storage.DataStore; import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; @@ -85,6 +86,16 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co true, BackupFrameworkEnabled.key()); + ConfigKey NASBackupFullEvery = new ConfigKey<>("Advanced", Integer.class, + "nas.backup.full.every", + "10", + "Take a full NAS backup every Nth backup; remaining backups in between are incremental. " + + "Counts backups, not days, so it works for hourly, daily, and ad-hoc schedules. " + + "Set to 1 to disable incrementals (every backup is full).", + true, + ConfigKey.Scope.Zone, + BackupFrameworkEnabled.key()); + @Inject private BackupDao backupDao; @@ -130,6 +141,9 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co @Inject private DiskOfferingDao diskOfferingDao; + @Inject + private BackupDetailsDao backupDetailsDao; + private Long getClusterIdFromRootVolume(VirtualMachine vm) { VolumeVO rootVolume = volumeDao.getInstanceRootVolume(vm.getId()); StoragePoolVO rootDiskPool = primaryDataStoreDao.findById(rootVolume.getPoolId()); @@ -168,6 +182,168 @@ protected Host getVMHypervisorHost(VirtualMachine vm) { return resourceManager.findOneRandomRunningHostByHypervisor(Hypervisor.HypervisorType.KVM, vm.getDataCenterId()); } + /** + * Returned by {@link #decideChain(VirtualMachine)} to describe the next backup's place in + * the chain: full vs incremental, the bitmap name to create, and (for incrementals) the + * parent bitmap and parent file path. + */ + static final class ChainDecision { + final String mode; // "full" or "incremental" + final String bitmapNew; + final String bitmapParent; // null for full + final String parentPath; // null for full + final String chainId; // chain identifier this backup belongs to + final int chainPosition; // 0 for full, N for the Nth incremental in the chain + + private ChainDecision(String mode, String bitmapNew, String bitmapParent, String parentPath, + String chainId, int chainPosition) { + this.mode = mode; + this.bitmapNew = bitmapNew; + this.bitmapParent = bitmapParent; + this.parentPath = parentPath; + this.chainId = chainId; + this.chainPosition = chainPosition; + } + + static ChainDecision fullStart(String bitmapName) { + return new ChainDecision(NASBackupChainKeys.TYPE_FULL, bitmapName, null, null, + UUID.randomUUID().toString(), 0); + } + + static ChainDecision incremental(String bitmapNew, String bitmapParent, String parentPath, + String chainId, int chainPosition) { + return new ChainDecision(NASBackupChainKeys.TYPE_INCREMENTAL, bitmapNew, bitmapParent, + parentPath, chainId, chainPosition); + } + + boolean isIncremental() { + return NASBackupChainKeys.TYPE_INCREMENTAL.equals(mode); + } + } + + /** + * Decides whether the next backup for {@code vm} should be a fresh full or an incremental + * appended to the existing chain. Stopped VMs are always full (libvirt {@code backup-begin} + * requires a running QEMU process). The {@code nas.backup.full.every} ConfigKey controls + * how many backups (full + incrementals) form one chain before a new full is forced. + */ + protected ChainDecision decideChain(VirtualMachine vm) { + final String newBitmap = "backup-" + System.currentTimeMillis() / 1000L; + + // Stopped VMs cannot do incrementals — script will also fall back, but we make the + // decision here so we register the right type up-front. + if (VirtualMachine.State.Stopped.equals(vm.getState())) { + return ChainDecision.fullStart(newBitmap); + } + + Integer fullEvery = NASBackupFullEvery.valueIn(vm.getDataCenterId()); + if (fullEvery == null || fullEvery <= 1) { + // Disabled or every-backup-is-full mode. + return ChainDecision.fullStart(newBitmap); + } + + // Walk this VM's backups newest→oldest, find the most recent BackedUp backup that has a + // bitmap stored. If we don't find one, this is the first backup in a chain — start full. + List history = backupDao.listByVmId(vm.getDataCenterId(), vm.getId()); + if (history == null || history.isEmpty()) { + return ChainDecision.fullStart(newBitmap); + } + history.sort(Comparator.comparing(Backup::getDate).reversed()); + + Backup parent = null; + String parentBitmap = null; + String parentChainId = null; + int parentChainPosition = -1; + for (Backup b : history) { + if (!Backup.Status.BackedUp.equals(b.getStatus())) { + continue; + } + String bm = readDetail(b, NASBackupChainKeys.BITMAP_NAME); + if (bm == null) { + continue; + } + parent = b; + parentBitmap = bm; + parentChainId = readDetail(b, NASBackupChainKeys.CHAIN_ID); + String posStr = readDetail(b, NASBackupChainKeys.CHAIN_POSITION); + try { + parentChainPosition = posStr == null ? 0 : Integer.parseInt(posStr); + } catch (NumberFormatException e) { + parentChainPosition = 0; + } + break; + } + if (parent == null || parentBitmap == null || parentChainId == null) { + return ChainDecision.fullStart(newBitmap); + } + + // Force a fresh full when the chain has reached the configured length. + if (parentChainPosition + 1 >= fullEvery) { + return ChainDecision.fullStart(newBitmap); + } + + // The script needs the parent backup's on-NAS file path so it can rebase the new + // qcow2 onto it. The path is stored relative to the NAS mount point — the script + // resolves it inside its mount session. + String parentPath = composeParentBackupPath(parent); + return ChainDecision.incremental(newBitmap, parentBitmap, parentPath, + parentChainId, parentChainPosition + 1); + } + + private String readDetail(Backup backup, String key) { + BackupDetailVO d = backupDetailsDao.findDetail(backup.getId(), key); + return d == null ? null : d.getValue(); + } + + /** + * Compose the on-NAS path of a parent backup's root-disk qcow2. Relative to the NAS mount, + * matches the layout written by {@code nasbackup.sh} ({@code /root..qcow2}). + */ + private String composeParentBackupPath(Backup parent) { + // backupPath is stored as externalId by createBackupObject — e.g. "i-2-1234-VM/2026.04.27.13.45.00". + // Volume UUID for the root volume is what the script keys backup files on. + VolumeVO rootVolume = volumeDao.getInstanceRootVolume(parent.getVmId()); + String volUuid = rootVolume == null ? "root" : rootVolume.getUuid(); + return parent.getExternalId() + "/root." + volUuid + ".qcow2"; + } + + /** + * Persist chain metadata under backup_details. Stored here (not on the backups table) so + * other providers can implement their own chain semantics without schema changes. + */ + private void persistChainMetadata(Backup backup, ChainDecision decision, String bitmapFromAgent) { + // Prefer the bitmap name confirmed by the agent (BITMAP_CREATED= line). Fall back to + // what we asked it to create — they should match. + String bitmap = bitmapFromAgent != null ? bitmapFromAgent : decision.bitmapNew; + if (bitmap != null) { + backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.BITMAP_NAME, bitmap, true)); + } + backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.CHAIN_ID, decision.chainId, true)); + backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.CHAIN_POSITION, + String.valueOf(decision.chainPosition), true)); + backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.TYPE, decision.mode, true)); + if (decision.isIncremental()) { + // Resolve the parent backup's UUID so restore can walk the chain by id, not by path. + String parentUuid = lookupParentBackupUuid(backup.getVmId(), decision.bitmapParent); + if (parentUuid != null) { + backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.PARENT_BACKUP_ID, parentUuid, true)); + } + } + } + + private String lookupParentBackupUuid(long vmId, String parentBitmap) { + if (parentBitmap == null) { + return null; + } + for (Backup b : backupDao.listByVmId(null, vmId)) { + String bm = readDetail(b, NASBackupChainKeys.BITMAP_NAME); + if (parentBitmap.equals(bm)) { + return b.getUuid(); + } + } + return null; + } + protected Host getVMHypervisorHostForBackup(VirtualMachine vm) { Long hostId = vm.getHostId(); if (hostId == null && VirtualMachine.State.Running.equals(vm.getState())) { @@ -205,12 +381,20 @@ public Pair takeBackup(final VirtualMachine vm, Boolean quiesce final String backupPath = String.format("%s/%s", vm.getInstanceName(), new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(creationDate)); - BackupVO backupVO = createBackupObject(vm, backupPath); + // Decide full vs incremental for this backup. Stopped VMs are always full + // (libvirt backup-begin requires a running QEMU process). + ChainDecision decision = decideChain(vm); + + BackupVO backupVO = createBackupObject(vm, backupPath, decision.isIncremental() ? "INCREMENTAL" : "FULL"); TakeBackupCommand command = new TakeBackupCommand(vm.getInstanceName(), backupPath); command.setBackupRepoType(backupRepository.getType()); command.setBackupRepoAddress(backupRepository.getAddress()); command.setMountOptions(backupRepository.getMountOptions()); command.setQuiesce(quiesceVM); + command.setMode(decision.mode); + command.setBitmapNew(decision.bitmapNew); + command.setBitmapParent(decision.bitmapParent); + command.setParentPath(decision.parentPath); if (VirtualMachine.State.Stopped.equals(vm.getState())) { List vmVolumes = volumeDao.findByInstance(vm.getId()); @@ -239,9 +423,23 @@ public Pair takeBackup(final VirtualMachine vm, Boolean quiesce backupVO.setDate(new Date()); backupVO.setSize(answer.getSize()); backupVO.setStatus(Backup.Status.BackedUp); + // If the agent fell back to full (stopped VM mid-incremental cycle), record this + // backup as a full and start a new chain. + ChainDecision effective = decision; + if (answer.getIncrementalFallback()) { + effective = ChainDecision.fullStart(decision.bitmapNew); + backupVO.setType("FULL"); + } List volumes = new ArrayList<>(volumeDao.findByInstance(vm.getId())); backupVO.setBackedUpVolumes(backupManager.createVolumeInfoFromVolumes(volumes)); if (backupDao.update(backupVO.getId(), backupVO)) { + persistChainMetadata(backupVO, effective, answer.getBitmapCreated()); + if (answer.getBitmapRecreated() != null) { + backupDetailsDao.persist(new BackupDetailVO(backupVO.getId(), + NASBackupChainKeys.BITMAP_RECREATED, answer.getBitmapRecreated(), true)); + logger.info("NAS incremental for VM {} recreated parent bitmap {} (likely VM was restarted since last backup)", + vm.getInstanceName(), answer.getBitmapRecreated()); + } return new Pair<>(true, backupVO); } else { throw new CloudRuntimeException("Failed to update backup"); @@ -260,11 +458,11 @@ public Pair takeBackup(final VirtualMachine vm, Boolean quiesce } } - private BackupVO createBackupObject(VirtualMachine vm, String backupPath) { + private BackupVO createBackupObject(VirtualMachine vm, String backupPath, String type) { BackupVO backup = new BackupVO(); backup.setVmId(vm.getId()); backup.setExternalId(backupPath); - backup.setType("FULL"); + backup.setType(type); backup.setDate(new Date()); long virtualSize = 0L; for (final Volume volume: volumeDao.findByInstance(vm.getId())) { @@ -495,24 +693,244 @@ public boolean deleteBackup(Backup backup, boolean forced) { throw new CloudRuntimeException(String.format("Unable to find a running KVM host in zone %d to delete backup %s", backup.getZoneId(), backup.getUuid())); } - DeleteBackupCommand command = new DeleteBackupCommand(backup.getExternalId(), backupRepository.getType(), - backupRepository.getAddress(), backupRepository.getMountOptions()); + // Repair the chain (if any) before removing the backup file. For chained backups, + // children that point at this backup must be re-pointed at this backup's parent + // (with their blocks merged via qemu-img rebase). For a full at the head of a chain + // with surviving children, refuse unless forced — `forced=true` then deletes the + // full plus every descendant. + ChainRepairPlan plan = computeChainRepair(backup, forced); + if (!plan.proceed) { + throw new CloudRuntimeException(plan.reason); + } - BackupAnswer answer; - try { - answer = (BackupAnswer) agentManager.send(host.getId(), command); - } catch (AgentUnavailableException e) { - throw new CloudRuntimeException("Unable to contact backend control plane to initiate backup"); - } catch (OperationTimedoutException e) { - throw new CloudRuntimeException("Operation to delete backup timed out, please try again"); + // Issue rebase commands for each child that needs re-pointing (ordered so each rebase + // operates on a chain that still resolves: children first if there are nested ones). + for (RebaseStep step : plan.rebaseSteps) { + RebaseBackupCommand rebase = new RebaseBackupCommand(step.targetMountRelativePath, + step.newBackingMountRelativePath, backupRepository.getType(), + backupRepository.getAddress(), backupRepository.getMountOptions()); + BackupAnswer rebaseAnswer; + try { + rebaseAnswer = (BackupAnswer) agentManager.send(host.getId(), rebase); + } catch (AgentUnavailableException e) { + throw new CloudRuntimeException("Unable to contact backend control plane to repair backup chain"); + } catch (OperationTimedoutException e) { + throw new CloudRuntimeException("Backup chain repair (rebase) timed out, please try again"); + } + if (rebaseAnswer == null || !rebaseAnswer.getResult()) { + throw new CloudRuntimeException(String.format( + "Backup chain repair failed: rebase of %s onto %s returned %s", + step.targetMountRelativePath, step.newBackingMountRelativePath, + rebaseAnswer == null ? "no answer" : rebaseAnswer.getDetails())); + } + // Update the rebased child's parent reference + position in backup_details. + BackupDetailVO parentDetail = backupDetailsDao.findDetail(step.childBackupId, NASBackupChainKeys.PARENT_BACKUP_ID); + if (parentDetail != null) { + parentDetail.setValue(step.newParentUuid == null ? "" : step.newParentUuid); + backupDetailsDao.update(parentDetail.getId(), parentDetail); + } else if (step.newParentUuid != null) { + backupDetailsDao.persist(new BackupDetailVO(step.childBackupId, + NASBackupChainKeys.PARENT_BACKUP_ID, step.newParentUuid, true)); + } + BackupDetailVO posDetail = backupDetailsDao.findDetail(step.childBackupId, NASBackupChainKeys.CHAIN_POSITION); + if (posDetail != null) { + posDetail.setValue(String.valueOf(step.newChainPosition)); + backupDetailsDao.update(posDetail.getId(), posDetail); + } } - if (answer != null && answer.getResult()) { - return backupDao.remove(backup.getId()); + // Now delete this backup's files. For a forced delete of a full with descendants we + // also delete all descendants' files (newest first so each rm targets a leaf). + for (Backup victim : plan.toDelete) { + DeleteBackupCommand command = new DeleteBackupCommand(victim.getExternalId(), backupRepository.getType(), + backupRepository.getAddress(), backupRepository.getMountOptions()); + BackupAnswer answer; + try { + answer = (BackupAnswer) agentManager.send(host.getId(), command); + } catch (AgentUnavailableException e) { + throw new CloudRuntimeException("Unable to contact backend control plane to initiate backup"); + } catch (OperationTimedoutException e) { + throw new CloudRuntimeException("Operation to delete backup timed out, please try again"); + } + if (answer == null || !answer.getResult()) { + logger.warn("Failed to delete backup file for {} ({}); leaving DB row intact", victim.getUuid(), victim.getExternalId()); + return false; + } + backupDao.remove(victim.getId()); } - logger.debug("There was an error removing the backup with id {}", backup.getId()); - return false; + // Shift chain_position down by 1 for any survivors deeper in the chain than the + // backup we just removed (their direct parent reference is unchanged, but their + // numeric position needs to stay consistent so future full-every cadence math works). + if (plan.shiftPositionsBelow != null) { + for (Backup b : backupDao.listByVmId(null, backup.getVmId())) { + if (!plan.shiftPositionsBelow.chainId.equals(readDetail(b, NASBackupChainKeys.CHAIN_ID))) { + continue; + } + int pos = chainPosition(b); + if (pos > plan.shiftPositionsBelow.afterPosition && pos != Integer.MAX_VALUE) { + BackupDetailVO posDetail = backupDetailsDao.findDetail(b.getId(), NASBackupChainKeys.CHAIN_POSITION); + if (posDetail != null) { + posDetail.setValue(String.valueOf(pos - 1)); + backupDetailsDao.update(posDetail.getId(), posDetail); + } + } + } + } + + return true; + } + + private static final class PositionShift { + final String chainId; + final int afterPosition; // shift positions strictly greater than this by -1 + PositionShift(String chainId, int afterPosition) { + this.chainId = chainId; + this.afterPosition = afterPosition; + } + } + + /** + * Result of {@link #computeChainRepair}: whether to proceed, what to rebase, what to delete. + */ + private static final class ChainRepairPlan { + final boolean proceed; + final String reason; + final List rebaseSteps; + final List toDelete; + final PositionShift shiftPositionsBelow; + + private ChainRepairPlan(boolean proceed, String reason, List rebaseSteps, List toDelete, + PositionShift shiftPositionsBelow) { + this.proceed = proceed; + this.reason = reason; + this.rebaseSteps = rebaseSteps; + this.toDelete = toDelete; + this.shiftPositionsBelow = shiftPositionsBelow; + } + + static ChainRepairPlan refuse(String reason) { + return new ChainRepairPlan(false, reason, Collections.emptyList(), Collections.emptyList(), null); + } + + static ChainRepairPlan proceed(List rebaseSteps, List toDelete) { + return new ChainRepairPlan(true, null, rebaseSteps, toDelete, null); + } + + static ChainRepairPlan proceed(List rebaseSteps, List toDelete, PositionShift shift) { + return new ChainRepairPlan(true, null, rebaseSteps, toDelete, shift); + } + } + + private static final class RebaseStep { + final long childBackupId; + final String targetMountRelativePath; + final String newBackingMountRelativePath; + final String newParentUuid; // null when re-pointed onto an existing full's UUID is desired but unavailable + final int newChainPosition; + + RebaseStep(long childBackupId, String targetMountRelativePath, String newBackingMountRelativePath, + String newParentUuid, int newChainPosition) { + this.childBackupId = childBackupId; + this.targetMountRelativePath = targetMountRelativePath; + this.newBackingMountRelativePath = newBackingMountRelativePath; + this.newParentUuid = newParentUuid; + this.newChainPosition = newChainPosition; + } + } + + /** + * Compute the chain-repair plan for deleting {@code backup}. Conservative semantics: + * - Backups outside any tracked chain (no NAS chain metadata) are deleted as-is. + * - A standalone backup with no children is deleted as-is. + * - A middle incremental: rebase its immediate child onto its own parent, then delete it. + * Descendants of that child are unaffected (their backing chain still resolves). + * - A full with surviving descendants: refuse unless {@code forced=true}; then delete + * full + every descendant (newest first). + */ + private ChainRepairPlan computeChainRepair(Backup backup, boolean forced) { + String chainId = readDetail(backup, NASBackupChainKeys.CHAIN_ID); + if (chainId == null) { + // Pre-incremental backups (or callers that never wrote chain metadata) — single delete. + return ChainRepairPlan.proceed(Collections.emptyList(), Collections.singletonList(backup)); + } + + // Gather every backup in the same chain for this VM. + List chain = new ArrayList<>(); + for (Backup b : backupDao.listByVmId(null, backup.getVmId())) { + if (chainId.equals(readDetail(b, NASBackupChainKeys.CHAIN_ID))) { + chain.add(b); + } + } + chain.sort(Comparator.comparingInt(b -> chainPosition(b))); + + int targetPos = chainPosition(backup); + boolean isFull = targetPos == 0; + List descendants = chain.stream() + .filter(b -> chainPosition(b) > targetPos) + .collect(Collectors.toList()); + + if (isFull) { + if (descendants.isEmpty()) { + return ChainRepairPlan.proceed(Collections.emptyList(), Collections.singletonList(backup)); + } + if (!forced) { + return ChainRepairPlan.refuse(String.format( + "Backup %s is the full anchor of a chain with %d incremental(s). Delete the incrementals first, " + + "or pass forced=true to remove the entire chain.", + backup.getUuid(), descendants.size())); + } + // Forced delete: remove descendants newest first, then the full. + List victims = new ArrayList<>(descendants); + victims.sort(Comparator.comparingInt((Backup b) -> chainPosition(b)).reversed()); + victims.add(backup); + return ChainRepairPlan.proceed(Collections.emptyList(), victims); + } + + // Middle (or tail) incremental. + if (descendants.isEmpty()) { + // Tail: nothing to rebase, just delete. + return ChainRepairPlan.proceed(Collections.emptyList(), Collections.singletonList(backup)); + } + + // Middle: only the immediate child needs to absorb our blocks and rebase onto our parent. + Backup immediateChild = descendants.stream() + .min(Comparator.comparingInt(b -> chainPosition(b))) + .orElseThrow(() -> new CloudRuntimeException("Internal error: no immediate child found for chain repair")); + Backup ourParent = chain.stream() + .filter(b -> chainPosition(b) == targetPos - 1) + .findFirst() + .orElseThrow(() -> new CloudRuntimeException(String.format( + "Cannot delete %s: its parent (chain_position=%d) is missing from the chain", + backup.getUuid(), targetPos - 1))); + + VolumeVO rootVolume = volumeDao.getInstanceRootVolume(backup.getVmId()); + String volUuid = rootVolume == null ? "root" : rootVolume.getUuid(); + String childPath = immediateChild.getExternalId() + "/root." + volUuid + ".qcow2"; + String parentPath = ourParent.getExternalId() + "/root." + volUuid + ".qcow2"; + + RebaseStep step = new RebaseStep(immediateChild.getId(), childPath, parentPath, + ourParent.getUuid(), chainPosition(immediateChild) - 1); + + // After we delete the middle backup, every descendant's numeric chain_position + // becomes stale (off by one). Their backing-file pointers don't need re-writing + // (only the immediate child changed parents) but their position metadata does. + return ChainRepairPlan.proceed( + Collections.singletonList(step), + Collections.singletonList(backup), + new PositionShift(chainId, targetPos)); + } + + private int chainPosition(Backup b) { + String s = readDetail(b, NASBackupChainKeys.CHAIN_POSITION); + if (s == null) { + return Integer.MAX_VALUE; // no metadata => sort to end + } + try { + return Integer.parseInt(s); + } catch (NumberFormatException e) { + return Integer.MAX_VALUE; + } } public void syncBackupMetrics(Long zoneId) { @@ -629,7 +1047,8 @@ public Boolean crossZoneInstanceCreationEnabled(BackupOffering backupOffering) { @Override public ConfigKey[] getConfigKeys() { return new ConfigKey[]{ - NASBackupRestoreMountTimeout + NASBackupRestoreMountTimeout, + NASBackupFullEvery }; } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRebaseBackupCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRebaseBackupCommandWrapper.java new file mode 100644 index 000000000000..3238e0393c94 --- /dev/null +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRebaseBackupCommandWrapper.java @@ -0,0 +1,59 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.hypervisor.kvm.resource.wrapper; + +import com.cloud.agent.api.Answer; +import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource; +import com.cloud.resource.CommandWrapper; +import com.cloud.resource.ResourceWrapper; +import com.cloud.utils.Pair; +import com.cloud.utils.script.Script; +import org.apache.cloudstack.backup.BackupAnswer; +import org.apache.cloudstack.backup.RebaseBackupCommand; + +import java.util.ArrayList; +import java.util.List; + +@ResourceWrapper(handles = RebaseBackupCommand.class) +public class LibvirtRebaseBackupCommandWrapper extends CommandWrapper { + @Override + public Answer execute(RebaseBackupCommand command, LibvirtComputingResource libvirtComputingResource) { + List commands = new ArrayList<>(); + commands.add(new String[]{ + libvirtComputingResource.getNasBackupPath(), + "-o", "rebase", + "-t", command.getBackupRepoType(), + "-s", command.getBackupRepoAddress(), + "-m", command.getMountOptions(), + "--rebase-target", command.getTargetPath(), + "--rebase-new-backing", command.getNewBackingPath() + }); + + Pair result = Script.executePipedCommands(commands, libvirtComputingResource.getCmdsTimeout()); + logger.debug("Backup rebase result: {} , exit code: {}", result.second(), result.first()); + + if (result.first() != 0) { + logger.warn("Failed to rebase backup file {} onto {}: {}", + command.getTargetPath(), command.getNewBackingPath(), result.second()); + return new BackupAnswer(command, false, result.second()); + } + return new BackupAnswer(command, true, null); + } +} diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRestoreBackupCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRestoreBackupCommandWrapper.java index 22dbfbdd67a2..cc2a0868fe17 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRestoreBackupCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRestoreBackupCommandWrapper.java @@ -60,6 +60,15 @@ public class LibvirtRestoreBackupCommandWrapper extends CommandWrapper/dev/null | grep -q '\"backing-filename\"'"; private String getVolumeUuidFromPath(String volumePath, PrimaryDataStoreTO volumePool) { if (Storage.StoragePoolType.Linstor.equals(volumePool.getPoolType())) { @@ -270,10 +279,27 @@ private boolean replaceVolumeWithBackup(KVMStoragePoolManager storagePoolMgr, Pr return replaceBlockDeviceWithBackup(storagePoolMgr, volumePool, volumePath, backupPath, timeout, createTargetVolume, size); } + // For NAS-backed incremental backups, the source qcow2 has a backing-file + // reference to its parent (set by nasbackup.sh's qemu-img rebase). A plain + // rsync would copy only the differential blocks, leaving a volume that + // depends on a backing file the primary storage doesn't have. Flatten the + // chain via qemu-img convert, which follows the backing-file links and + // produces a single self-contained qcow2. + if (hasBackingChain(backupPath)) { + int flattenExit = Script.runSimpleBashScriptForExitValue( + String.format(QEMU_IMG_FLATTEN_COMMAND, backupPath, volumePath), timeout, false); + return flattenExit == 0; + } + int exitValue = Script.runSimpleBashScriptForExitValue(String.format(RSYNC_COMMAND, backupPath, volumePath), timeout, false); return exitValue == 0; } + private boolean hasBackingChain(String qcow2Path) { + return Script.runSimpleBashScriptForExitValue( + String.format(QEMU_IMG_HAS_BACKING_COMMAND, qcow2Path)) == 0; + } + private boolean replaceBlockDeviceWithBackup(KVMStoragePoolManager storagePoolMgr, PrimaryDataStoreTO volumePool, String volumePath, String backupPath, int timeout, boolean createTargetVolume, Long size) { KVMStoragePool volumeStoragePool = storagePoolMgr.getStoragePool(volumePool.getPoolType(), volumePool.getUuid()); QemuImg qemu; diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java index 42953aa9f835..3654116869c5 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java @@ -69,8 +69,7 @@ public Answer execute(TakeBackupCommand command, LibvirtComputingResource libvir } } - List commands = new ArrayList<>(); - commands.add(new String[]{ + List argv = new ArrayList<>(Arrays.asList( libvirtComputingResource.getNasBackupPath(), "-o", "backup", "-v", vmName, @@ -80,7 +79,27 @@ public Answer execute(TakeBackupCommand command, LibvirtComputingResource libvir "-p", backupPath, "-q", command.getQuiesce() != null && command.getQuiesce() ? "true" : "false", "-d", diskPaths.isEmpty() ? "" : String.join(",", diskPaths) - }); + )); + // Incremental NAS backup args (only added when the orchestrator asked for full/inc mode). + if (command.getMode() != null && !command.getMode().isEmpty()) { + argv.add("-M"); + argv.add(command.getMode()); + } + if (command.getBitmapNew() != null && !command.getBitmapNew().isEmpty()) { + argv.add("--bitmap-new"); + argv.add(command.getBitmapNew()); + } + if (command.getBitmapParent() != null && !command.getBitmapParent().isEmpty()) { + argv.add("--bitmap-parent"); + argv.add(command.getBitmapParent()); + } + if (command.getParentPath() != null && !command.getParentPath().isEmpty()) { + argv.add("--parent-path"); + argv.add(command.getParentPath()); + } + + List commands = new ArrayList<>(); + commands.add(argv.toArray(new String[0])); Pair result = Script.executePipedCommands(commands, timeout); @@ -94,21 +113,52 @@ public Answer execute(TakeBackupCommand command, LibvirtComputingResource libvir return answer; } + // Strip out our incremental marker lines before parsing size, so the legacy + // numeric-suffix parser keeps working. + String stdout = result.second().trim(); + String bitmapCreated = null; + String bitmapRecreated = null; + boolean incrementalFallback = false; + StringBuilder filtered = new StringBuilder(); + for (String line : stdout.split("\n")) { + String trimmed = line.trim(); + if (trimmed.startsWith("BITMAP_CREATED=")) { + bitmapCreated = trimmed.substring("BITMAP_CREATED=".length()); + continue; + } + if (trimmed.startsWith("BITMAP_RECREATED=")) { + bitmapRecreated = trimmed.substring("BITMAP_RECREATED=".length()); + continue; + } + if (trimmed.startsWith("INCREMENTAL_FALLBACK=")) { + incrementalFallback = true; + continue; + } + if (filtered.length() > 0) { + filtered.append("\n"); + } + filtered.append(line); + } + String numericOutput = filtered.toString().trim(); + long backupSize = 0L; if (CollectionUtils.isNullOrEmpty(diskPaths)) { - List outputLines = Arrays.asList(result.second().trim().split("\n")); + List outputLines = Arrays.asList(numericOutput.split("\n")); if (!outputLines.isEmpty()) { backupSize = Long.parseLong(outputLines.get(outputLines.size() - 1).trim()); } } else { - String[] outputLines = result.second().trim().split("\n"); + String[] outputLines = numericOutput.split("\n"); for(String line : outputLines) { backupSize = backupSize + Long.parseLong(line.split(" ")[0].trim()); } } - BackupAnswer answer = new BackupAnswer(command, true, result.second().trim()); + BackupAnswer answer = new BackupAnswer(command, true, stdout); answer.setSize(backupSize); + answer.setBitmapCreated(bitmapCreated); + answer.setBitmapRecreated(bitmapRecreated); + answer.setIncrementalFallback(incrementalFallback); return answer; } } diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRestoreBackupCommandWrapperTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRestoreBackupCommandWrapperTest.java index ef6b5c08189d..fd8a3b02e0a0 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRestoreBackupCommandWrapperTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRestoreBackupCommandWrapperTest.java @@ -407,6 +407,8 @@ public void testExecuteWithRsyncFailure() throws Exception { return 0; // File exists } else if (command.contains("qemu-img check")) { return 0; // File is valid + } else if (command.contains("qemu-img info") && command.contains("backing-filename")) { + return 1; // No backing chain — exercise the rsync path (full backups) } return 0; // Other commands success }); diff --git a/scripts/vm/hypervisor/kvm/nasbackup.sh b/scripts/vm/hypervisor/kvm/nasbackup.sh index 441312f35e86..9058a6a0fc98 100755 --- a/scripts/vm/hypervisor/kvm/nasbackup.sh +++ b/scripts/vm/hypervisor/kvm/nasbackup.sh @@ -33,9 +33,18 @@ MOUNT_OPTS="" BACKUP_DIR="" DISK_PATHS="" QUIESCE="" +# Incremental backup parameters (all optional; legacy callers omit them) +MODE="" # "full" or "incremental"; empty => legacy full-only behavior (no checkpoint created) +BITMAP_NEW="" # Bitmap/checkpoint name to create with this backup (e.g. "backup-1711586400") +BITMAP_PARENT="" # For incremental: parent bitmap name to read changes since +PARENT_PATH="" # For incremental: parent backup file path (used as backing for qemu-img rebase) +# Rebase operation parameters (used only with -o rebase, for chain repair on delete-middle) +REBASE_TARGET="" # The qcow2 file to repoint at a new backing (mount-relative path) +REBASE_NEW_BACKING="" # The new backing parent file (mount-relative path) logFile="/var/log/cloudstack/agent/agent.log" EXIT_CLEANUP_FAILED=20 +EXIT_INCREMENTAL_UNSUPPORTED=21 log() { [[ "$verb" -eq 1 ]] && builtin echo "$@" @@ -113,20 +122,93 @@ backup_running_vm() { mount_operation mkdir -p "$dest" || { echo "Failed to create backup directory $dest"; exit 1; } + # Determine effective mode for this run. + # Legacy callers (no -M argument) get the original full-only behavior with no checkpoint. + local effective_mode="${MODE:-legacy-full}" + local make_checkpoint=0 + case "$effective_mode" in + incremental) + if [[ -z "$BITMAP_PARENT" || -z "$BITMAP_NEW" || -z "$PARENT_PATH" ]]; then + echo "incremental mode requires --bitmap-parent, --bitmap-new, and --parent-path" + cleanup + exit 1 + fi + make_checkpoint=1 + ;; + full) + if [[ -z "$BITMAP_NEW" ]]; then + echo "full mode requires --bitmap-new (the bitmap to create for the next incremental)" + cleanup + exit 1 + fi + make_checkpoint=1 + ;; + legacy-full) + make_checkpoint=0 + ;; + *) + echo "Unknown mode: $effective_mode" + cleanup + exit 1 + ;; + esac + + # When incremental, verify the parent bitmap still exists on the running domain. + # CloudStack rebuilds the libvirt domain XML on every VM start, so persistent bitmaps + # are lost across stop/start. If the parent is missing, recreate it as a fresh bitmap + # so libvirt accepts the reference. The first backup after a recreate + # captures all writes since the recreate point — slightly larger than ideal, but correct. + if [[ "$effective_mode" == "incremental" ]]; then + if ! virsh -c qemu:///system checkpoint-list "$VM" --name 2>/dev/null | grep -qx "$BITMAP_PARENT"; then + cat > $dest/recreate-checkpoint.xml <$BITMAP_PARENT +$(virsh -c qemu:///system domblklist "$VM" --details 2>/dev/null | awk '$2=="disk"{printf "\n", $3}') + +XML + if ! virsh -c qemu:///system checkpoint-create "$VM" --xmlfile $dest/recreate-checkpoint.xml > /dev/null 2>&1; then + echo "Failed to recreate parent bitmap $BITMAP_PARENT for $VM" + cleanup + exit 1 + fi + # Marker for the orchestrator: this incremental is larger because the bitmap was rebuilt. + echo "BITMAP_RECREATED=$BITMAP_PARENT" + rm -f $dest/recreate-checkpoint.xml + fi + fi + + # Build backup XML (and matching checkpoint XML when applicable). name="root" - echo "" > $dest/backup.xml + echo "" > $dest/backup.xml + if [[ "$effective_mode" == "incremental" ]]; then + echo "$BITMAP_PARENT" >> $dest/backup.xml + fi + echo "" >> $dest/backup.xml + if [[ $make_checkpoint -eq 1 ]]; then + echo "$BITMAP_NEW" > $dest/checkpoint.xml + fi while read -r disk fullpath; do if [[ "$fullpath" == /dev/drbd/by-res/* ]]; then volUuid=$(get_linstor_uuid_from_path "$fullpath") else volUuid="${fullpath##*/}" fi - echo "" >> $dest/backup.xml + if [[ "$effective_mode" == "incremental" ]]; then + # Incremental disk entry — no backupmode attr, libvirt picks it up from . + echo "" >> $dest/backup.xml + else + echo "" >> $dest/backup.xml + fi + if [[ $make_checkpoint -eq 1 ]]; then + echo "" >> $dest/checkpoint.xml + fi name="datadisk" done < <( virsh -c qemu:///system domblklist "$VM" --details 2>/dev/null | awk '$2=="disk"{print $3, $4}' ) echo "" >> $dest/backup.xml + if [[ $make_checkpoint -eq 1 ]]; then + echo "" >> $dest/checkpoint.xml + fi local thaw=0 if [[ ${QUIESCE} == "true" ]]; then @@ -135,10 +217,16 @@ backup_running_vm() { fi fi - # Start push backup + # Start push backup, atomically registering the new checkpoint when applicable. local backup_begin=0 - if virsh -c qemu:///system backup-begin --domain $VM --backupxml $dest/backup.xml 2>&1 > /dev/null; then - backup_begin=1; + if [[ $make_checkpoint -eq 1 ]]; then + if virsh -c qemu:///system backup-begin --domain $VM --backupxml $dest/backup.xml --checkpointxml $dest/checkpoint.xml 2>&1 > /dev/null; then + backup_begin=1; + fi + else + if virsh -c qemu:///system backup-begin --domain $VM --backupxml $dest/backup.xml 2>&1 > /dev/null; then + backup_begin=1; + fi fi if [[ $thaw -eq 1 ]]; then @@ -172,9 +260,37 @@ backup_running_vm() { sleep 5 done - # Use qemu-img convert to sparsify linstor backups which get bloated due to virsh backup-begin. + # Sparsify behavior: + # - For LINSTOR backups (existing): qemu-img convert sparsifies the bloated output. + # - For INCREMENTAL: rebase the resulting thin qcow2 onto its parent so the chain is self-describing + # (so a future restore can flatten without external chain metadata). name="root" while read -r disk fullpath; do + if [[ "$effective_mode" == "incremental" ]]; then + volUuid="${fullpath##*/}" + if [[ "$fullpath" == /dev/drbd/by-res/* ]]; then + volUuid=$(get_linstor_uuid_from_path "$fullpath") + fi + # PARENT_PATH from the orchestrator is the parent backup's path relative to the + # NAS mount root (e.g. "i-2-X/2026.04.27.12.00.00/root.UUID.qcow2"). Convert it to + # a path relative to THIS new qcow2's directory so the backing reference resolves + # correctly the next time the NAS is mounted (mount points are ephemeral). + local parent_abs="$mount_point/$PARENT_PATH" + if [[ ! -f "$parent_abs" ]]; then + echo "Parent backup file does not exist on NAS: $parent_abs" + cleanup + exit 1 + fi + local parent_rel + parent_rel=$(realpath --relative-to="$dest" "$parent_abs") + if ! qemu-img rebase -u -b "$parent_rel" -F qcow2 "$dest/$name.$volUuid.qcow2" >> "$logFile" 2> >(cat >&2); then + echo "qemu-img rebase failed for $dest/$name.$volUuid.qcow2 onto $parent_rel" + cleanup + exit 1 + fi + name="datadisk" + continue + fi if [[ "$fullpath" != /dev/drbd/by-res/* ]]; then continue fi @@ -191,18 +307,30 @@ backup_running_vm() { virsh -c qemu:///system domblklist "$VM" --details 2>/dev/null | awk '$2=="disk"{print $3, $4}' ) - rm -f $dest/backup.xml + rm -f $dest/backup.xml $dest/checkpoint.xml sync # Print statistics virsh -c qemu:///system domjobinfo $VM --completed du -sb $dest | cut -f1 + if [[ -n "$BITMAP_NEW" ]]; then + # Echo the bitmap name on its own line so the Java caller can capture it for backup_details. + echo "BITMAP_CREATED=$BITMAP_NEW" + fi umount $mount_point rmdir $mount_point } backup_stopped_vm() { + # Stopped VMs cannot use libvirt's backup-begin (no QEMU process). Take a full + # backup via qemu-img convert. If the caller asked for incremental, fall back + # to full and signal the fallback so the orchestrator can record it as a full + # in the chain. + if [[ "$MODE" == "incremental" ]]; then + echo "INCREMENTAL_FALLBACK=full (VM stopped — incremental requires running VM)" >&2 + fi + mount_operation mkdir -p "$dest" || { echo "Failed to create backup directory $dest"; exit 1; } @@ -238,6 +366,51 @@ delete_backup() { rmdir $mount_point } +# Rebase an existing backup qcow2 (e.g. a chain child) onto a new backing parent so the chain +# stays valid after a middle backup is deleted. Both --target and --new-backing are passed as +# paths relative to the NAS mount root; we resolve them under $mount_point and write the new +# backing reference relative to the target file's directory (mount points are ephemeral). +rebase_backup() { + mount_operation + + if [[ -z "$REBASE_TARGET" || -z "$REBASE_NEW_BACKING" ]]; then + echo "rebase requires --rebase-target and --rebase-new-backing" + cleanup + exit 1 + fi + + local target_abs="$mount_point/$REBASE_TARGET" + local backing_abs="$mount_point/$REBASE_NEW_BACKING" + if [[ ! -f "$target_abs" ]]; then + echo "Rebase target file does not exist: $target_abs" + cleanup + exit 1 + fi + if [[ ! -f "$backing_abs" ]]; then + echo "New backing file does not exist: $backing_abs" + cleanup + exit 1 + fi + local target_dir + target_dir=$(dirname "$target_abs") + local backing_rel + backing_rel=$(realpath --relative-to="$target_dir" "$backing_abs") + + # SAFE rebase (no -u): qemu-img reads blocks from the old chain and writes them into + # the target where the new chain doesn't cover them. This is the "merge into" semantic + # required when we're about to delete the old immediate parent — the target needs to + # absorb the to-be-deleted parent's blocks so the chain remains consistent against the + # new (further-back) backing. + if ! qemu-img rebase -b "$backing_rel" -F qcow2 "$target_abs" >> "$logFile" 2> >(cat >&2); then + echo "qemu-img rebase failed for $target_abs onto $backing_rel" + cleanup + exit 1 + fi + sync + umount $mount_point + rmdir $mount_point +} + get_backup_stats() { mount_operation @@ -278,6 +451,13 @@ cleanup() { function usage { echo "" echo "Usage: $0 -o -v|--vm -t -s -m -p -d -q|--quiesce " + echo " [-M|--mode ] [--bitmap-new ] [--bitmap-parent ] [--parent-path ]" + echo "" + echo "Incremental backup options (running VMs only; requires QEMU >= 4.2 and libvirt >= 7.2):" + echo " -M|--mode full Take a full backup AND create a checkpoint (--bitmap-new required) for future incrementals." + echo " -M|--mode incremental Take an incremental backup since --bitmap-parent and create new checkpoint --bitmap-new." + echo " Requires --bitmap-parent, --bitmap-new, and --parent-path (parent backup file for rebase)." + echo " Without -M, behaves as legacy full-only backup with no checkpoint creation." echo "" exit 1 } @@ -324,6 +504,36 @@ while [[ $# -gt 0 ]]; do shift shift ;; + -M|--mode) + MODE="$2" + shift + shift + ;; + --bitmap-new) + BITMAP_NEW="$2" + shift + shift + ;; + --bitmap-parent) + BITMAP_PARENT="$2" + shift + shift + ;; + --parent-path) + PARENT_PATH="$2" + shift + shift + ;; + --rebase-target) + REBASE_TARGET="$2" + shift + shift + ;; + --rebase-new-backing) + REBASE_NEW_BACKING="$2" + shift + shift + ;; -h|--help) usage shift @@ -347,6 +557,8 @@ if [ "$OP" = "backup" ]; then fi elif [ "$OP" = "delete" ]; then delete_backup +elif [ "$OP" = "rebase" ]; then + rebase_backup elif [ "$OP" = "stats" ]; then get_backup_stats fi diff --git a/test/integration/smoke/test_backup_recovery_nas.py b/test/integration/smoke/test_backup_recovery_nas.py index 409a08acc9f0..3284e566f95f 100644 --- a/test/integration/smoke/test_backup_recovery_nas.py +++ b/test/integration/smoke/test_backup_recovery_nas.py @@ -265,3 +265,222 @@ def test_vm_backup_create_vm_from_backup_in_another_zone(self): self.assertEqual(backup_repository.crosszoneinstancecreation, True, "Cross-Zone Instance Creation could not be enabled on the backup repository") self.vm_backup_create_vm_from_backup_int(template.id, [network.id]) + + # ------------------------------------------------------------------ + # Incremental backup tests (RFC #12899 / PR #13074) + # ------------------------------------------------------------------ + # These tests exercise the incremental NAS backup chain semantics: + # full -> incN cadence, restore-from-incremental, delete-middle chain + # repair, refuse-delete-full-with-children, and stopped-VM fallback. + # + # All tests set nas.backup.full.every to a small value (3) so a chain + # forms quickly without needing many backup iterations. They restore + # the original value at teardown. + + def _set_full_every(self, value): + Configurations.update(self.apiclient, name='nas.backup.full.every', + value=str(value), zoneid=self.zone.id) + + def _backup_type(self, backup): + # Backup objects expose `type`; for chained backups it's "INCREMENTAL", else "FULL". + return getattr(backup, 'type', 'FULL') or 'FULL' + + @attr(tags=["advanced", "backup"], required_hardware="true") + def test_incremental_chain_cadence(self): + """ + With nas.backup.full.every=3, the sequence of backups should be + FULL, INCREMENTAL, INCREMENTAL, FULL, INCREMENTAL, ... + """ + self.backup_offering.assignOffering(self.apiclient, self.vm.id) + self._set_full_every(3) + try: + ssh_client_vm = self.vm.get_ssh_client(reconnect=True) + ssh_client_vm.execute("touch /root/incremental_marker_1.txt") + + created = [] + for i in range(5): + Backup.create(self.apiclient, self.vm.id, "inc_chain_%d" % i) + # write a small change so each incremental has something to capture + ssh_client_vm.execute("dd if=/dev/urandom of=/root/delta_%d bs=64k count=4 2>/dev/null" % i) + time.sleep(2) + created = Backup.list(self.apiclient, self.vm.id) + + self.assertEqual(len(created), 5, "Expected 5 backups after 5 Backup.create calls") + # Sort oldest-first by date + created.sort(key=lambda b: b.created) + + expected = ['FULL', 'INCREMENTAL', 'INCREMENTAL', 'FULL', 'INCREMENTAL'] + actual = [self._backup_type(b).upper() for b in created] + self.assertEqual(actual, expected, + "With nas.backup.full.every=3, chain pattern should be %s but was %s" % (expected, actual)) + + # Cleanup all backups (newest first to satisfy chain rules without forced=true) + for b in reversed(created): + Backup.delete(self.apiclient, b.id) + finally: + self._set_full_every(10) + self.backup_offering.removeOffering(self.apiclient, self.vm.id) + + @attr(tags=["advanced", "backup"], required_hardware="true") + def test_restore_from_incremental(self): + """ + Take FULL + 2 INCREMENTAL backups, each with a marker file. Restore from the + latest incremental and verify all three markers are present (chain flatten). + """ + self.backup_offering.assignOffering(self.apiclient, self.vm.id) + self._set_full_every(5) + try: + ssh_client_vm = self.vm.get_ssh_client(reconnect=True) + ssh_client_vm.execute("touch /root/marker_full.txt") + Backup.create(self.apiclient, self.vm.id, "rfi_full") + time.sleep(3) + + ssh_client_vm.execute("touch /root/marker_inc1.txt") + Backup.create(self.apiclient, self.vm.id, "rfi_inc1") + time.sleep(3) + + ssh_client_vm.execute("touch /root/marker_inc2.txt") + Backup.create(self.apiclient, self.vm.id, "rfi_inc2") + time.sleep(3) + + backups = Backup.list(self.apiclient, self.vm.id) + backups.sort(key=lambda b: b.created) + self.assertEqual(len(backups), 3) + self.assertEqual(self._backup_type(backups[0]).upper(), 'FULL') + self.assertEqual(self._backup_type(backups[2]).upper(), 'INCREMENTAL') + + new_vm_name = "vm-from-inc-" + str(int(time.time())) + new_vm = Backup.createVMFromBackup(self.apiclient, self.services["small"], + mode=self.services["mode"], backupid=backups[2].id, vmname=new_vm_name, + accountname=self.account.name, domainid=self.account.domainid, + zoneid=self.zone.id) + self.cleanup.append(new_vm) + + ssh_new = new_vm.get_ssh_client(reconnect=True) + for marker in ("marker_full.txt", "marker_inc1.txt", "marker_inc2.txt"): + result = ssh_new.execute("ls /root/%s" % marker) + self.assertIn(marker, result[0], + "Restored VM should have %s (chain flattened correctly)" % marker) + + for b in reversed(backups): + Backup.delete(self.apiclient, b.id) + finally: + self._set_full_every(10) + self.backup_offering.removeOffering(self.apiclient, self.vm.id) + + @attr(tags=["advanced", "backup"], required_hardware="true") + def test_delete_middle_incremental_repairs_chain(self): + """ + Delete a MIDDLE incremental from a FULL -> INC1 -> INC2 chain. + The chain repair should rebase INC2 onto FULL, and the final restore + should still produce a working VM with all expected blocks. + """ + self.backup_offering.assignOffering(self.apiclient, self.vm.id) + self._set_full_every(5) + try: + ssh_client_vm = self.vm.get_ssh_client(reconnect=True) + ssh_client_vm.execute("touch /root/dmi_full.txt") + Backup.create(self.apiclient, self.vm.id, "dmi_full") + time.sleep(3) + ssh_client_vm.execute("touch /root/dmi_inc1.txt") + Backup.create(self.apiclient, self.vm.id, "dmi_inc1") + time.sleep(3) + ssh_client_vm.execute("touch /root/dmi_inc2.txt") + Backup.create(self.apiclient, self.vm.id, "dmi_inc2") + time.sleep(3) + + backups = Backup.list(self.apiclient, self.vm.id) + backups.sort(key=lambda b: b.created) + full, inc1, inc2 = backups[0], backups[1], backups[2] + + # Delete the middle incremental — should succeed via chain repair (no force needed) + Backup.delete(self.apiclient, inc1.id) + remaining = Backup.list(self.apiclient, self.vm.id) + self.assertEqual(len(remaining), 2, "After deleting middle inc, two backups should remain") + + # Restore from the remaining tail (formerly inc2) — must still produce a usable VM + new_vm_name = "vm-after-mid-del-" + str(int(time.time())) + new_vm = Backup.createVMFromBackup(self.apiclient, self.services["small"], + mode=self.services["mode"], backupid=inc2.id, vmname=new_vm_name, + accountname=self.account.name, domainid=self.account.domainid, + zoneid=self.zone.id) + self.cleanup.append(new_vm) + ssh_new = new_vm.get_ssh_client(reconnect=True) + # Both the FULL marker and (importantly) the deleted-INC1 marker should still + # be present, because the rebase merged INC1's blocks into INC2. + for marker in ("dmi_full.txt", "dmi_inc1.txt", "dmi_inc2.txt"): + result = ssh_new.execute("ls /root/%s" % marker) + self.assertIn(marker, result[0], + "After mid-incremental delete and rebase, %s should still be restorable" % marker) + + Backup.delete(self.apiclient, inc2.id) + Backup.delete(self.apiclient, full.id) + finally: + self._set_full_every(10) + self.backup_offering.removeOffering(self.apiclient, self.vm.id) + + @attr(tags=["advanced", "backup"], required_hardware="true") + def test_refuse_delete_full_with_children(self): + """ + Deleting a FULL that has surviving incrementals must fail without forced=true. + With forced=true it must succeed and remove the entire chain. + """ + self.backup_offering.assignOffering(self.apiclient, self.vm.id) + self._set_full_every(5) + try: + Backup.create(self.apiclient, self.vm.id, "rdc_full") + time.sleep(3) + Backup.create(self.apiclient, self.vm.id, "rdc_inc") + time.sleep(3) + + backups = Backup.list(self.apiclient, self.vm.id) + backups.sort(key=lambda b: b.created) + full = backups[0] + + failed = False + try: + Backup.delete(self.apiclient, full.id) + except Exception: + failed = True + self.assertTrue(failed, "Deleting a FULL with children should be refused without forced=true") + + # Forced delete should succeed and clear the whole chain + Backup.delete(self.apiclient, full.id, forced=True) + remaining = Backup.list(self.apiclient, self.vm.id) + self.assertIsNone(remaining, "Forced delete of FULL should remove the entire chain") + finally: + self._set_full_every(10) + self.backup_offering.removeOffering(self.apiclient, self.vm.id) + + @attr(tags=["advanced", "backup"], required_hardware="true") + def test_stopped_vm_falls_back_to_full(self): + """ + When a backup is requested while the VM is stopped, even if the chain cadence + would call for an incremental, the agent must fall back to a full and start a + new chain. The incrementalFallback flag should be reflected in backup.type=FULL. + """ + self.backup_offering.assignOffering(self.apiclient, self.vm.id) + self._set_full_every(2) # next backup after the first should be incremental + try: + Backup.create(self.apiclient, self.vm.id, "svf_first") + time.sleep(3) + + # Stop the VM and trigger another backup — should fall back to FULL + self.vm.stop(self.apiclient) + time.sleep(5) + Backup.create(self.apiclient, self.vm.id, "svf_second") + time.sleep(3) + + backups = Backup.list(self.apiclient, self.vm.id) + backups.sort(key=lambda b: b.created) + self.assertEqual(len(backups), 2) + self.assertEqual(self._backup_type(backups[0]).upper(), 'FULL') + self.assertEqual(self._backup_type(backups[1]).upper(), 'FULL', + "Stopped-VM backup must be a FULL even when cadence would have asked for an INCREMENTAL") + + self.vm.start(self.apiclient) + for b in reversed(backups): + Backup.delete(self.apiclient, b.id) + finally: + self._set_full_every(10) + self.backup_offering.removeOffering(self.apiclient, self.vm.id)