Enhance provisioning documentation and scripts for improved network boot and DNS management</message>

<message>Add new documentation files for device DNS management via DHCP and dnsmasq configuration. Update cloud-init scripts to ensure proper handling of /etc/resolv.conf and DNS settings, allowing for seamless integration with file.server. Modify existing scripts to support dynamic LAN subnet configuration and improve overall network boot functionality. These changes enhance user experience and streamline the setup process for the CM4 eMMC provisioning service.
This commit is contained in:
nearxos
2026-03-04 19:15:38 +02:00
parent b5134098c0
commit 031e1c3415
16 changed files with 658 additions and 60 deletions

View File

@@ -27,6 +27,7 @@
# DEPLOY_LXC_WAN_IP=dhcp — WAN address: dhcp (default) or static e.g. 192.168.1.10/24
# DEPLOY_LXC_LAN_BRIDGE=vmbr1 — If set, add eth1 as LAN on this bridge (e.g. provisioning / network-boot)
# DEPLOY_LXC_LAN_SUBNET=10.20.50.1/24 — LXC IP on LAN (gateway); used only if DEPLOY_LXC_LAN_BRIDGE is set; default 10.20.50.1/24
# DEPLOY_EMMC_SIZE_GB=32 — optional: eMMC size in GB (used only when multiple new devices appear; default 32). Detection is dynamic — single new device is used regardless of size.
#
# Legacy: DEPLOY_LXC_NET1="name=eth1,bridge=vmbr1,ip=10.20.50.1/24" still works; overridden by DEPLOY_LXC_LAN_BRIDGE + DEPLOY_LXC_LAN_SUBNET if both are set.
#
@@ -150,8 +151,10 @@ rsync -a "$REPO_DIR/" "$PROXMOX:/tmp/emmc-provisioning-deploy/" --exclude='.git'
log "[4/5] Running remote install (host + LXC) ..."
# Pass optional LXC SSH vars (base64), selected storage, and network (WAN/LAN bridge + subnet)
ssh "$PROXMOX" "ROOTFS_STORAGE='$ROOTFS_STORAGE' CM4_BACKUPS_HOST_PATH='${CM4_BACKUPS_HOST_PATH:-}' DEPLOY_SSH_KEY_B64='${DEPLOY_SSH_KEY_B64:-}' DEPLOY_LXC_PWD_B64='${DEPLOY_LXC_PWD_B64:-}' DEPLOY_LXC_WAN_BRIDGE='${DEPLOY_LXC_WAN_BRIDGE:-}' DEPLOY_LXC_WAN_IP='${DEPLOY_LXC_WAN_IP:-}' DEPLOY_LXC_LAN_BRIDGE='${DEPLOY_LXC_LAN_BRIDGE:-}' DEPLOY_LXC_LAN_SUBNET='${DEPLOY_LXC_LAN_SUBNET:-}' DEPLOY_LXC_NET1='${DEPLOY_LXC_NET1:-}'" bash -s << 'REMOTE'
# Pass optional LXC SSH vars (base64), selected storage, network (WAN/LAN), and eMMC size
EMMC_GB="${DEPLOY_EMMC_SIZE_GB:-32}"
EMMC_SIZE_BYTES=$(( EMMC_GB * 1024 * 1024 * 1024 ))
ssh "$PROXMOX" "ROOTFS_STORAGE='$ROOTFS_STORAGE' CM4_BACKUPS_HOST_PATH='${CM4_BACKUPS_HOST_PATH:-}' DEPLOY_SSH_KEY_B64='${DEPLOY_SSH_KEY_B64:-}' DEPLOY_LXC_PWD_B64='${DEPLOY_LXC_PWD_B64:-}' DEPLOY_LXC_WAN_BRIDGE='${DEPLOY_LXC_WAN_BRIDGE:-}' DEPLOY_LXC_WAN_IP='${DEPLOY_LXC_WAN_IP:-}' DEPLOY_LXC_LAN_BRIDGE='${DEPLOY_LXC_LAN_BRIDGE:-}' DEPLOY_LXC_LAN_SUBNET='${DEPLOY_LXC_LAN_SUBNET:-}' DEPLOY_LXC_NET1='${DEPLOY_LXC_NET1:-}' EMMC_SIZE_BYTES='$EMMC_SIZE_BYTES' EMMC_GB='$EMMC_GB'" bash -s << 'REMOTE'
set -e
DEPLOY=/tmp/emmc-provisioning-deploy
ROOTFS_STORAGE="${ROOTFS_STORAGE:?ROOTFS_STORAGE not set}"
@@ -177,6 +180,7 @@ for id in $(pct list 2>/dev/null | awk 'NR>1 {print $1}'); do
done
if [[ -n "$CTID" ]]; then
log "Found existing LXC $CTID (hostname: $LXC_HOSTNAME)."
pct set "$CTID" -nameserver 8.8.8.8
else
MAX_ID=$(pct list 2>/dev/null | awk 'NR>1 {print $1}' | sort -n | tail -1)
[[ -z "$MAX_ID" ]] && MAX_ID=0
@@ -210,9 +214,10 @@ else
--hostname "$LXC_HOSTNAME" --memory 1024 --swap 0 --cores 1 \
--rootfs "${ROOTFS_STORAGE}:8" --net0 name=eth0,bridge="$WAN_BRIDGE",ip="$WAN_IP" $NET1_OPT \
--unprivileged 0 --features nesting=1 -tag cm4-provisioning
pct set "$CTID" -nameserver 8.8.8.8
mkdir -p /var/lib/cm4-provisioning
pct set "$CTID" -mp0 /var/lib/cm4-provisioning,mp=/var/lib/cm4-provisioning
log "LXC $CTID created and mount configured."
log "LXC $CTID created and mount configured (DNS 8.8.8.8)."
fi
# Optional: bind-mount host directory for backup images (skip if already mounted with same path)
@@ -265,11 +270,11 @@ cp "$DEPLOY/host/89-cm4-boot-mode-permissions.rules" /etc/udev/rules.d/ 2>/dev/n
cp "$DEPLOY/host/90-cm4-boot-mode.rules" /etc/udev/rules.d/
udevadm control --reload-rules 2>/dev/null || true
log "Host: env and dirs ..."
cat > /opt/cm4-provisioning/env << 'ENV'
log "Host: env and dirs (EMMC ${EMMC_GB:-32}GB = $EMMC_SIZE_BYTES bytes) ..."
cat > /opt/cm4-provisioning/env << ENV
GOLDEN_IMAGE=/var/lib/cm4-provisioning/golden.img
RPIBOOT_DIR=/opt/usbboot
EMMC_SIZE_BYTES=8589934592
EMMC_SIZE_BYTES=${EMMC_SIZE_BYTES:-34359738368}
ENV
[[ -n "$BACKUPS_HOST_PATH" ]] && echo "BACKUPS_DIR=$BACKUPS_HOST_PATH" >> /opt/cm4-provisioning/env
touch /etc/cm4-provisioning/enabled
@@ -330,7 +335,8 @@ if [[ -n "$LAN_SUBNET_FOR_CONF" ]]; then
DHCP_RANGE_START="${BASE_3}.100"
DHCP_RANGE_END="${BASE_3}.200"
pct exec "$CTID" -- bash -c "mkdir -p /opt/cm4-provisioning && echo 'LAN_GW=$LAN_GW' > /opt/cm4-provisioning/lan-subnet.conf && echo 'LAN_CIDR=$LAN_CIDR' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_START=$DHCP_RANGE_START' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_END=$DHCP_RANGE_END' >> /opt/cm4-provisioning/lan-subnet.conf"
log "LXC: wrote /opt/cm4-provisioning/lan-subnet.conf (LAN_GW=$LAN_GW, LAN_CIDR=$LAN_CIDR, DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END})"
echo "$LAN_GW" > "$DEPLOY/lxc_lan_ip.txt"
log "LXC: wrote /opt/cm4-provisioning/lan-subnet.conf (LAN_GW=$LAN_GW); dashboard will be reachable on LAN at http://${LAN_GW}:5000"
else
log "Warning: DEPLOY_LXC_LAN_SUBNET=$LAN_SUBNET_FOR_CONF not in form A.B.C.D/PREFIX; skipping lan-subnet.conf"
fi
@@ -343,7 +349,7 @@ pct push "$CTID" "$DEPLOY/host/flash-emmc-on-connect.sh" /opt/cm4-provisioning/f
pct exec "$CTID" -- chmod +x /opt/cm4-provisioning/flash-emmc-on-connect.sh
pct push "$CTID" "$DEPLOY/host/cm4-flash-trigger.sh" /usr/local/bin/cm4-flash-trigger.sh
pct exec "$CTID" -- chmod +x /usr/local/bin/cm4-flash-trigger.sh
pct exec "$CTID" -- bash -c 'echo -e "GOLDEN_IMAGE=/var/lib/cm4-provisioning/golden.img\nRPIBOOT_DIR=/opt/usbboot\nEMMC_SIZE_BYTES=8589934592" > /opt/cm4-provisioning/env'
pct exec "$CTID" -- bash -c "echo -e 'GOLDEN_IMAGE=/var/lib/cm4-provisioning/golden.img\nRPIBOOT_DIR=/opt/usbboot\nEMMC_SIZE_BYTES=$EMMC_SIZE_BYTES' > /opt/cm4-provisioning/env"
# --- LXC: dashboard (all files) ---
log "LXC: installing dashboard ..."
@@ -352,6 +358,8 @@ pct push "$CTID" "$DEPLOY/dashboard/app.py" /opt/cm4-provisioning/dashboard/app.
pct push "$CTID" "$DEPLOY/dashboard/templates/home.html" /opt/cm4-provisioning/dashboard/templates/home.html
pct push "$CTID" "$DEPLOY/dashboard/templates/login.html" /opt/cm4-provisioning/dashboard/templates/login.html
pct push "$CTID" "$DEPLOY/dashboard/templates/admin.html" /opt/cm4-provisioning/dashboard/templates/admin.html
pct push "$CTID" "$DEPLOY/dashboard/templates/portal_files.html" /opt/cm4-provisioning/dashboard/templates/portal_files.html
pct push "$CTID" "$DEPLOY/dashboard/templates/cloudinit_build.html" /opt/cm4-provisioning/dashboard/templates/cloudinit_build.html
pct push "$CTID" "$DEPLOY/dashboard/cm4-dashboard.service" /opt/cm4-provisioning/dashboard/cm4-dashboard.service
# Dashboard secret for sessions (create once so logins persist across restarts)
pct exec "$CTID" -- bash -c '[[ -f /opt/cm4-provisioning/dashboard.env ]] || echo "CM4_DASHBOARD_SECRET_KEY=$(openssl rand -hex 24 2>/dev/null || head -c 24 /dev/urandom | xxd -p)" > /opt/cm4-provisioning/dashboard.env'
@@ -397,14 +405,16 @@ log "Deploy done on remote. LXC ID: $CTID"
# Heredoc terminator (must be at column 1, no leading space/tab)
REMOTE
# Read LXC IP written by remote (container hostname -I)
# Read LXC IP and optional LAN IP written by remote
LXC_IP=$(ssh "$PROXMOX" "cat /tmp/emmc-provisioning-deploy/lxc_ip.txt 2>/dev/null" | tr -d '\n\r')
LXC_LAN_IP=$(ssh "$PROXMOX" "cat /tmp/emmc-provisioning-deploy/lxc_lan_ip.txt 2>/dev/null" | tr -d '\n\r')
log "[5/5] Deploy finished."
echo ""
echo "=== Deploy complete ==="
echo "Host and LXC are fully set up: usbboot (rpiboot), PiShrink, dashboard, systemd, udev."
[[ -n "$LXC_IP" ]] && echo " LXC IP: $LXC_IP"
[[ -n "$LXC_IP" ]] && echo " LXC IP (WAN): $LXC_IP"
[[ -n "$LXC_LAN_IP" ]] && echo " LXC IP (LAN): $LXC_LAN_IP"
echo ""
echo "--- Only remaining step (manual) ---"
echo " Add a golden image for Deploy (writing image to device):"
@@ -413,7 +423,8 @@ echo " • Or copy your image: scp your-image.img $PROXMOX:/var/lib/cm4-provi
echo " Backup (read from device) works without golden.img."
echo ""
echo "--- You have ---"
echo " - Dashboard: http://${LXC_IP:-<LXC-IP>}:5000"
echo " - Dashboard (WAN): http://${LXC_IP:-<LXC-IP>}:5000"
[[ -n "$LXC_LAN_IP" ]] && echo " - Dashboard (LAN): http://${LXC_LAN_IP}:5000 (use from devices on provisioning LAN)"
[[ -n "${DEPLOY_LXC_ROOT_PASSWORD:-}" || -n "${DEPLOY_SSH_KEY_B64:-}" ]] && [[ -n "$LXC_IP" ]] && echo " - LXC SSH: ssh root@$LXC_IP (password and/or key were set)"
[[ -n "${DEPLOY_LXC_ROOT_PASSWORD:-}" || -n "${DEPLOY_SSH_KEY_B64:-}" ]] && [[ -z "$LXC_IP" ]] && echo " - LXC SSH: ssh root@<LXC-IP> (password and/or key were set)"
[[ -n "${CM4_BACKUPS_HOST_PATH:-}" ]] && echo " - Backups on host: $CM4_BACKUPS_HOST_PATH"

View File

@@ -3,6 +3,9 @@
# Cause: mass-storage-gadget64 has no real boot files (broken symlinks or Git LFS not pulled).
# This script removes broken symlinks and extracts bootcode4.bin from the installed rpiboot binary.
#
# Does NOT fix: "libusb_bulk_transfer returned -7" / "Failed to write correct length" — that is a USB
# transfer/timing issue (try USB 2.0 port, or rpiboot -m 2000). See PROXMOX-LXC-DEPLOYMENT.md.
#
# On host: bash fix-gadget-bootcode-on-host.sh
# From your machine: ssh root@HOST 'bash -s' < emmc-provisioning/scripts/fix-gadget-bootcode-on-host.sh

View File

@@ -1,10 +1,12 @@
#!/usr/bin/env bash
# Setup network boot on the provisioning LXC: DHCP + TFTP on eth1, NAT so LAN uses eth0 for internet.
# Run inside the LXC (as root), or from your machine: ./setup-network-boot-on-lxc.sh root@10.130.60.141
# When run with ssh target, rsyncs lxc/ and runs this script inside the container.
# Run inside the LXC (as root), or from your machine: ./setup-network-boot-on-lxc.sh root@10.130.60.141 [SUBNET]
# SUBNET optional: A.B.C.D/PREFIX (e.g. 10.100.1.1/24). When run with ssh target, writes lan-subnet.conf on LXC if SUBNET given.
# When run with ssh target, rsyncs lxc/ and runs this script inside the container. Subnet is read from /opt/cm4-provisioning/lan-subnet.conf.
set -e
TARGET="${1:-}"
SUBNET_ARG="${2:-}"
if [[ -n "$TARGET" ]]; then
# Run remotely: sync lxc/ and script, then execute inside LXC
@@ -19,21 +21,51 @@ if [[ -n "$TARGET" ]]; then
echo "Note: network-boot-initramfs/initrd.img not found (run build.sh first); skipping."
fi
scp "$SCRIPT_DIR/setup-network-boot-on-lxc.sh" "$TARGET:/tmp/cm4-network-boot-lxc/setup.sh"
# If SUBNET_ARG given, write lan-subnet.conf on LXC so inner script uses the set subnet
if [[ -n "$SUBNET_ARG" ]]; then
if [[ "$SUBNET_ARG" =~ ^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/([0-9]+)$ ]]; then
LAN_GW="${BASH_REMATCH[1]}"
PREFIX="${BASH_REMATCH[2]}"
BASE_3="${LAN_GW%.*}"
LAN_CIDR="${BASE_3}.0/${PREFIX}"
DHCP_RANGE_START="${BASE_3}.100"
DHCP_RANGE_END="${BASE_3}.200"
ssh "$TARGET" "mkdir -p /opt/cm4-provisioning && echo 'LAN_GW=$LAN_GW' > /opt/cm4-provisioning/lan-subnet.conf && echo 'LAN_CIDR=$LAN_CIDR' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_START=$DHCP_RANGE_START' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_END=$DHCP_RANGE_END' >> /opt/cm4-provisioning/lan-subnet.conf"
echo "Wrote lan-subnet.conf on LXC (LAN_GW=$LAN_GW, DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END})."
else
echo "Warning: SUBNET must be A.B.C.D/PREFIX (e.g. 10.100.1.1/24); ignoring '$SUBNET_ARG'."
fi
fi
ssh "$TARGET" "bash /tmp/cm4-network-boot-lxc/setup.sh"
echo "Done."
exit 0
fi
# --- Running inside the LXC from here ---
# LAN subnet: use /opt/cm4-provisioning/lan-subnet.conf (written by deploy-to-proxmox.sh when DEPLOY_LXC_LAN_SUBNET is set)
# LAN subnet: use /opt/cm4-provisioning/lan-subnet.conf (written by deploy-to-proxmox.sh or passed as SUBNET when running remotely)
# Optional first arg when running locally: A.B.C.D/PREFIX to set/write lan-subnet.conf
LAN_CONF="/opt/cm4-provisioning/lan-subnet.conf"
if [[ -f "$LAN_CONF" ]]; then
if [[ "$1" =~ ^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/([0-9]+)$ ]]; then
LAN_GW="${BASH_REMATCH[1]}"
PREFIX="${BASH_REMATCH[2]}"
BASE_3="${LAN_GW%.*}"
LAN_CIDR="${BASE_3}.0/${PREFIX}"
DHCP_RANGE_START="${BASE_3}.100"
DHCP_RANGE_END="${BASE_3}.200"
mkdir -p /opt/cm4-provisioning
echo "LAN_GW=$LAN_GW" > "$LAN_CONF"
echo "LAN_CIDR=$LAN_CIDR" >> "$LAN_CONF"
echo "DHCP_RANGE_START=$DHCP_RANGE_START" >> "$LAN_CONF"
echo "DHCP_RANGE_END=$DHCP_RANGE_END" >> "$LAN_CONF"
echo "Using set subnet: $LAN_CIDR (gateway $LAN_GW), DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END}."
elif [[ -f "$LAN_CONF" ]]; then
source "$LAN_CONF"
else
LAN_GW="10.20.50.1"
LAN_CIDR="10.20.50.0/24"
DHCP_RANGE_START="10.20.50.100"
DHCP_RANGE_END="10.20.50.200"
echo "No lan-subnet.conf and no SUBNET argument; using defaults: $LAN_CIDR."
fi
echo "Configuring network boot (DHCP + TFTP on eth1, NAT via eth0) — LAN $LAN_CIDR (gateway $LAN_GW), DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END} ..."
@@ -42,17 +74,21 @@ if ! command -v dnsmasq >/dev/null 2>&1; then
apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dnsmasq
fi
# 2) dnsmasq config for eth1 only (DHCP + TFTP); PXE options in network-boot-pxe.conf (toggle with toggle-network-boot-dhcp.sh)
# 2) dnsmasq config for eth1 only (DHCP + TFTP + DNS); PXE options in network-boot-pxe.conf (toggle with toggle-network-boot-dhcp.sh)
mkdir -p /etc/dnsmasq.d
cat > /etc/dnsmasq.d/network-boot.conf << DNSMASQ
# DHCP on eth1 only (provisioning LAN)
# TFTP and PXE options are in network-boot-pxe.conf, controlled by toggle-network-boot-dhcp.sh
# DHCP + DNS on eth1 only (provisioning LAN)
# TFTP and PXE options in network-boot-pxe.conf, controlled by toggle-network-boot-dhcp.sh
interface=eth1
bind-interfaces
dhcp-range=${DHCP_RANGE_START},${DHCP_RANGE_END},12h
# DNS: file.server resolves to this host (eth1) so scripts can use http://file.server/...
address=/file.server/${LAN_GW}
# Explicitly send this host as DNS server to DHCP clients (option 6) so they use LXC DNS and resolve file.server
dhcp-option=6,${LAN_GW}
# Other DNS queries forwarded via LXC's resolv.conf
log-dhcp
log-queries
port=0
DNSMASQ
mkdir -p /opt/cm4-provisioning
if [ -f /tmp/cm4-network-boot-lxc/toggle-network-boot-dhcp.sh ]; then

View File

@@ -0,0 +1,64 @@
#!/usr/bin/env bash
# Manually test rpiboot on the Proxmox host (device in boot mode must be connected).
# Usage:
# From your machine: ./test-usbboot-on-host.sh [proxmox_host]
# On the host: ./test-usbboot-on-host.sh
# With timeout (e.g. 60s): TIMEOUT=60 ./test-usbboot-on-host.sh root@100.106.128.36
# If you see "Failed to write correct length, returned -7", try USB 2.0 port or add delay:
# RPIBOOT_EXTRA_OPTS='-m 2000' ./test-usbboot-on-host.sh root@100.106.128.36
#
# Replace proxmox_host with your host, e.g. root@100.106.128.36
set -e
HOST="${1:-}"
RPIBOOT="${RPIBOOT:-/opt/usbboot/rpiboot}"
GADGET="${GADGET:-/opt/usbboot/mass-storage-gadget64}"
TIMEOUT="${TIMEOUT:-0}"
RPIBOOT_EXTRA_OPTS="${RPIBOOT_EXTRA_OPTS:-}"
run_on_host() {
if [[ -n "$HOST" ]]; then
ssh "$HOST" "$@"
else
"$@"
fi
}
echo "=== Checking usbboot and gadget on ${HOST:-localhost} ==="
run_on_host "test -x $RPIBOOT" || { echo "Error: $RPIBOOT not found or not executable"; exit 1; }
run_on_host "test -d $GADGET" || { echo "Error: $GADGET not found"; exit 1; }
run_on_host "test -f $GADGET/bootcode4.bin || test -f $GADGET/boot.img || test -f $GADGET/bootfiles.bin" || { echo "Error: no boot file in $GADGET"; exit 1; }
echo " rpiboot: $RPIBOOT"
echo " gadget: $GADGET"
echo ""
echo "=== USB devices (2b8e / 0a5c:2711 = CM4 boot mode) ==="
run_on_host "lsusb | grep -E '2b8e|0a5c' || echo ' None. Connect reTerminal with eMMC disable jumper and USB slave port.'"
echo ""
echo "=== Tip: if rpiboot fails with 'Failed to write correct length, returned -7', use a USB 2.0 port, or run: RPIBOOT_EXTRA_OPTS='-m 2000' $0 $* ==="
echo ""
echo "=== Running rpiboot (verbose) — connect device now if not already ==="
echo " When the device switches to mass storage, rpiboot will exit and a new /dev/sdX may appear."
echo " Use Ctrl+C to stop, or wait for exit."
echo ""
RPIBOOT_CMD="$RPIBOOT -v -d $GADGET $RPIBOOT_EXTRA_OPTS"
if [[ -n "$HOST" ]]; then
if [[ "$TIMEOUT" -gt 0 ]]; then
ssh "$HOST" "timeout $TIMEOUT $RPIBOOT_CMD" || true
else
ssh -t "$HOST" "$RPIBOOT_CMD" || true
fi
else
if [[ "$TIMEOUT" -gt 0 ]]; then
timeout "$TIMEOUT" $RPIBOOT_CMD || true
else
$RPIBOOT_CMD || true
fi
fi
echo ""
echo "=== Block devices now (check for new /dev/sdX) ==="
run_on_host "lsblk -nd -o NAME,SIZE,TYPE /dev/sd[a-z] 2>/dev/null || true"