Enhance provisioning documentation and scripts for improved network boot and DNS management</message>

<message>Add new documentation files for device DNS management via DHCP and dnsmasq configuration. Update cloud-init scripts to ensure proper handling of /etc/resolv.conf and DNS settings, allowing for seamless integration with file.server. Modify existing scripts to support dynamic LAN subnet configuration and improve overall network boot functionality. These changes enhance user experience and streamline the setup process for the CM4 eMMC provisioning service.
This commit is contained in:
nearxos
2026-03-04 19:15:38 +02:00
parent b5134098c0
commit 031e1c3415
16 changed files with 658 additions and 60 deletions

View File

@@ -16,6 +16,8 @@ emmc-provisioning/
│ ├── DEPLOY-NEW-PROXMOX.md Step-by-step: deploy to a new Proxmox instance │ ├── DEPLOY-NEW-PROXMOX.md Step-by-step: deploy to a new Proxmox instance
│ ├── EMMC-PROVISIONING-GUIDE.md Full setup and usage │ ├── EMMC-PROVISIONING-GUIDE.md Full setup and usage
│ ├── NETWORK-BOOT-LXC.md Network boot (PXE/dnsmasq) and LXC │ ├── NETWORK-BOOT-LXC.md Network boot (PXE/dnsmasq) and LXC
│ ├── DEVICE-DNS-DHCP-RESOLVCONF.md Device DNS from DHCP, resolv.conf, cloud-init
│ ├── DNSMASQ-DNS-FILESERVER.md dnsmasq DNS and file.server on LXC
│ ├── PROXMOX-LXC-DEPLOYMENT.md Proxmox LXC + host setup (reference) │ ├── PROXMOX-LXC-DEPLOYMENT.md Proxmox LXC + host setup (reference)
│ └── PORTAL_STYLING_GUIDE.md Dashboard UI styling reference │ └── PORTAL_STYLING_GUIDE.md Dashboard UI styling reference
├── host/ Scripts that run on the provisioning host (Proxmox host) ├── host/ Scripts that run on the provisioning host (Proxmox host)

View File

@@ -6,4 +6,124 @@ set -e
H="$(hostname)" H="$(hostname)"
grep -q "127.0.1.1.*$H" /etc/hosts || echo "127.0.1.1 $H" >> /etc/hosts grep -q "127.0.1.1.*$H" /etc/hosts || echo "127.0.1.1 $H" >> /etc/hosts
# Do not overwrite /etc/resolv.conf: use DNS from DHCP so file.server and LXC DNS work.
# --- Chromium kiosk autostart (same behaviour as gnss-guard start-chromium.sh) ---
PI_USER="${PI_USER:-pi}"
SCRIPT_DEST="/usr/local/bin/start-chromium.sh"
AUTOSTART_SYSTEM="/etc/xdg/autostart"
PI_HOME="/home/$PI_USER"
# Icon: download start-here.png from file server, or set DESKTOP_ICON to override
FILE_SERVER="${FILE_SERVER:-http://file.server:5000/files/first-boot}"
ICON_DEST="/usr/share/pixmaps/tm.png"
DESKTOP_ICON="${DESKTOP_ICON:-chromium-browser}"
if [ "$DESKTOP_ICON" = "chromium-browser" ]; then
mkdir -p /usr/share/pixmaps
icon_url="${FILE_SERVER}/start-here.png"
if ! curl -fsSL "$icon_url" -o "$ICON_DEST" 2>/dev/null; then
# Fallback: use gateway IP (LXC on provisioning LAN) when DNS not ready yet at first boot
gw="$(ip -4 route show default 2>/dev/null | awk '{print $3; exit}')"
if [ -n "$gw" ]; then
curl -fsSL "http://${gw}:5000/files/first-boot/start-here.png" -o "$ICON_DEST" 2>/dev/null || true
fi
fi
if [ -s "$ICON_DEST" ]; then
chmod 644 "$ICON_DEST"
DESKTOP_ICON="$ICON_DEST"
fi
fi
# Install start-chromium.sh system-wide so it works regardless of user home
cat << 'START_CHROMIUM_EOF' > "$SCRIPT_DEST"
#!/bin/bash
# Disable keyring prompts
export GNOME_KEYRING_CONTROL=""
export DISPLAY=:0
# Force X11 instead of Wayland for better fullscreen support
export GDK_BACKEND=x11
unset WAYLAND_DISPLAY
# Wait for display and desktop environment to be ready
for i in {1..60}; do
if xset q >/dev/null 2>&1 || [ -n "$DISPLAY" ]; then
if pgrep -x pcmanfm >/dev/null 2>&1 || pgrep -x lxsession >/dev/null 2>&1 || pgrep -x xfdesktop >/dev/null 2>&1; then
break
fi
fi
sleep 0.5
done
sleep 5
/usr/bin/chromium --start-fullscreen --noerrdialogs --disable-infobars --disable-session-crashed-bubble --disable-restore-session-state --no-first-run --password-store=basic --use-mock-keychain --ozone-platform=x11 --disable-features=UseChromeOSDirectVideoDecoder --app=http://127.0.0.1:8080 &
sleep 3
for i in {1..10}; do
WINDOW_ID=$(wmctrl -l 2>/dev/null | grep -i chromium | head -1 | awk '{print $1}')
if [ -n "$WINDOW_ID" ]; then
wmctrl -i -r "$WINDOW_ID" -b add,fullscreen 2>/dev/null
break
fi
sleep 0.5
done
wait
START_CHROMIUM_EOF
chmod 755 "$SCRIPT_DEST"
# Autostart entry (runs Chromium at desktop login)
mkdir -p "$AUTOSTART_SYSTEM"
cat > "$AUTOSTART_SYSTEM/chromium-kiosk.desktop" << DESKTOP_EOF
[Desktop Entry]
Type=Application
Name=Chromium Fullscreen
Exec=/usr/local/bin/start-chromium.sh
Icon=$DESKTOP_ICON
Hidden=false
NoDisplay=false
X-GNOME-Autostart-enabled=true
DESKTOP_EOF
chmod 644 "$AUTOSTART_SYSTEM/chromium-kiosk.desktop"
# Desktop shortcut: real .desktop file on Desktop so the file manager treats it as a launcher (not a script).
# Symlink with no extension was shown as "executable script" and prompted; .desktop runs directly with quick_exec=1.
if getent passwd "$PI_USER" >/dev/null 2>&1; then
mkdir -p "$PI_HOME/Desktop" "$PI_HOME/.config/libfm"
if [ -f "$PI_HOME/.config/libfm/libfm.conf" ] && grep -q '^quick_exec=' "$PI_HOME/.config/libfm/libfm.conf"; then
sed -i 's/^quick_exec=.*/quick_exec=1/' "$PI_HOME/.config/libfm/libfm.conf"
else
echo 'quick_exec=1' >> "$PI_HOME/.config/libfm/libfm.conf"
fi
chown -R "$PI_USER:$PI_USER" "$PI_HOME/.config/libfm" 2>/dev/null || true
DESKTOP_FILE="$PI_HOME/Desktop/GNSS Guard.desktop"
cat > "$DESKTOP_FILE" << DESKTOP_SHORTCUT_EOF
[Desktop Entry]
Type=Application
Name=GNSS Guard
Comment=GNSS Guard Dashboard (e.g. if closed)
Exec=/usr/local/bin/start-chromium.sh
Icon=$DESKTOP_ICON
Terminal=false
Categories=Utility;
DESKTOP_SHORTCUT_EOF
chmod 644 "$DESKTOP_FILE"
chown "$PI_USER:$PI_USER" "$DESKTOP_FILE"
# Remove old symlink if present
rm -f "$PI_HOME/Desktop/GNSS Guard"
# Application menu/panel entry (same content)
SHORTCUT_FILE="/usr/share/applications/gnss-guard.desktop"
cat > "$SHORTCUT_FILE" << DESKTOP_SHORTCUT_EOF
[Desktop Entry]
Type=Application
Name=GNSS Guard
Comment=GNSS Guard Dashboard (e.g. if closed)
Exec=/usr/local/bin/start-chromium.sh
Icon=$DESKTOP_ICON
Terminal=true
Categories=Utility;
DESKTOP_SHORTCUT_EOF
chmod 644 "$SHORTCUT_FILE"
fi
echo "[$(date -Iseconds)] test completed" | tee -a /var/log/cloud-init-bootstrap.log echo "[$(date -Iseconds)] test completed" | tee -a /var/log/cloud-init-bootstrap.log

View File

@@ -10,10 +10,15 @@
# as first-boot.conf; then add a runcmd line to download it to /tmp/first-boot.conf before # as first-boot.conf; then add a runcmd line to download it to /tmp/first-boot.conf before
# running first-boot.sh so the script loads your config. # running first-boot.sh so the script loads your config.
# 4. To use a different username than "pi", set PI_USER in first-boot.conf and create that user below. # 4. To use a different username than "pi", set PI_USER in first-boot.conf and create that user below.
# 5. DNS: manage_resolv_conf: false and NM rc-manager=symlink so the device uses DNS from DHCP
# (LXC option 6) and file.server resolves. See docs/DEVICE-DNS-DHCP-RESOLVCONF.md.
package_update: true package_update: true
package_upgrade: false package_upgrade: false
# Do not overwrite /etc/resolv.conf; device will use DNS from DHCP (LXC sends option 6).
manage_resolv_conf: false
packages: packages:
- curl - curl
@@ -30,7 +35,17 @@ write_files:
PasswordAuthentication yes PasswordAuthentication yes
PermitRootLogin no PermitRootLogin no
# NetworkManager: manage resolv.conf via symlink so it gets DNS from DHCP (option 6 from LXC).
- path: /etc/NetworkManager/conf.d/99-resolv-dhcp.conf
content: |
[main]
rc-manager=symlink
permissions: '0644'
runcmd: runcmd:
# Allow NM to manage resolv.conf with DHCP DNS (remove static file if present).
- rm -f /etc/resolv.conf
- systemctl restart NetworkManager || true
- systemctl enable ssh - systemctl enable ssh
- systemctl start ssh - systemctl start ssh
- curl -fsSL "http://10.20.50.1:5000/files/first-boot.sh" -o /tmp/first-boot.sh - curl -fsSL "http://10.20.50.1:5000/files/first-boot.sh" -o /tmp/first-boot.sh

View File

@@ -5,6 +5,9 @@
# provisioning portal or file server). Example: http://10.20.50.1:5000/files/bootstrap.sh # provisioning portal or file server). Example: http://10.20.50.1:5000/files/bootstrap.sh
# 2. Copy this file to the boot partition as "user-data" (with meta-data and optional network-config). # 2. Copy this file to the boot partition as "user-data" (with meta-data and optional network-config).
# 3. Edit BOOTSTRAP_URL below to match your server (or set it once in the runcmd section). # 3. Edit BOOTSTRAP_URL below to match your server (or set it once in the runcmd section).
#
# DNS: This config uses systemd-resolved; /etc/resolv.conf is a stub and DNS comes from DHCP
# (LXC option 6). Ensure bootstrap.sh does not overwrite /etc/resolv.conf. See docs/DEVICE-DNS-DHCP-RESOLVCONF.md.
package_update: true package_update: true
package_upgrade: false package_upgrade: false
@@ -99,7 +102,7 @@ runcmd:
- systemctl start ssh - systemctl start ssh
# Download and run bootstrap script (edit URL to match your file server) # Download and run bootstrap script (edit URL to match your file server)
- | - |
BOOTSTRAP_URL="http://10.20.50.1:5000/files/bootstrap.sh" BOOTSTRAP_URL="http://file.server:5000/files/bootstrap.sh"
LOG="/var/log/cloud-init-bootstrap.log" LOG="/var/log/cloud-init-bootstrap.log"
if ! curl -fsSL "$BOOTSTRAP_URL" -o /tmp/bootstrap.sh 2>>"$LOG" || [ ! -s /tmp/bootstrap.sh ]; then if ! curl -fsSL "$BOOTSTRAP_URL" -o /tmp/bootstrap.sh 2>>"$LOG" || [ ! -s /tmp/bootstrap.sh ]; then
echo "$(date -Iseconds) ERROR: Failed to download bootstrap.sh from $BOOTSTRAP_URL (file missing or empty)" >> "$LOG" echo "$(date -Iseconds) ERROR: Failed to download bootstrap.sh from $BOOTSTRAP_URL (file missing or empty)" >> "$LOG"

View File

@@ -21,7 +21,7 @@ Step-by-step guide to deploy the provisioning service (host + LXC) on a **new**
- `CM4_BACKUPS_HOST_PATH=/mnt/storage/cm4-backups` — Store backups on this host path (create the directory on the host if needed). - `CM4_BACKUPS_HOST_PATH=/mnt/storage/cm4-backups` — Store backups on this host path (create the directory on the host if needed).
- **Network (WAN/LAN):** - **Network (WAN/LAN):**
`DEPLOY_LXC_WAN_BRIDGE=vmbr0` (default), `DEPLOY_LXC_WAN_IP=dhcp` (default), `DEPLOY_LXC_WAN_BRIDGE=vmbr0` (default), `DEPLOY_LXC_WAN_IP=dhcp` (default),
`DEPLOY_LXC_LAN_BRIDGE=vmbr1`, `DEPLOY_LXC_LAN_SUBNET=10.20.50.1/24` — To add eth1 as provisioning LAN with a custom subnet. `DEPLOY_LXC_LAN_BRIDGE=vmbr1`, `DEPLOY_LXC_LAN_SUBNET=10.20.50.1/24` — To add eth1 as provisioning LAN. **Set these if you want the portal reachable from the LAN** (e.g. http://10.20.50.1:5000); the dashboard listens on all interfaces.
--- ---
@@ -48,8 +48,7 @@ DEPLOY_LXC_LAN_SUBNET=10.20.50.1/24 \
``` ```
- On **first run**, the script will ask you to choose LXC rootfs storage (unless `DEPLOY_ROOTFS_STORAGE` is set). It then creates the LXC, installs host scripts, udev, systemd units, and the dashboard in the LXC. - On **first run**, the script will ask you to choose LXC rootfs storage (unless `DEPLOY_ROOTFS_STORAGE` is set). It then creates the LXC, installs host scripts, udev, systemd units, and the dashboard in the LXC.
- The script prints the **LXC IP** at the end. Note it for the next steps (or get it with: - The script prints **LXC IP (WAN)** and, if you set `DEPLOY_LXC_LAN_BRIDGE`, **LXC IP (LAN)**. The portal is reachable at `http://<IP>:5000` on both; use the LAN IP from devices on the provisioning LAN.
`ssh root@YOUR_PROXMOX_HOST "pct exec \$(pct list -no-header -output vmid,name | awk '\''\$2==\"cm4-provisioning\"{print \$1}'\'') -- hostname -I"`).
--- ---
@@ -90,6 +89,16 @@ scp /path/to/your-golden.img root@YOUR_PROXMOX_HOST:/var/lib/cm4-provisioning/go
--- ---
## Accessing the portal from the LAN
The dashboard listens on **all interfaces** (`0.0.0.0:5000`), so it is reachable on both WAN and LAN IPs when the LXC has two networks.
- **Deploy with a LAN interface:** set `DEPLOY_LXC_LAN_BRIDGE=vmbr1` (and optionally `DEPLOY_LXC_LAN_SUBNET=10.20.50.1/24`) when running the deploy script. The LXC will get eth1 with the LAN IP (e.g. 10.20.50.1).
- **From the provisioning LAN:** open **http://&lt;LAN-IP&gt;:5000** (e.g. http://10.20.50.1:5000). Devices on that subnet can use the portal without going through WAN.
- If you did not set a LAN bridge at deploy time, you only have one IP (WAN); use that for the portal. To add LAN later you would need to add eth1 to the container and reconfigure (see PROXMOX-LXC-DEPLOYMENT.md).
---
## Step 4: (Optional) SSH into the LXC ## Step 4: (Optional) SSH into the LXC
If you set `DEPLOY_LXC_ROOT_PASSWORD` or had a default SSH key, you can already run: If you set `DEPLOY_LXC_ROOT_PASSWORD` or had a default SSH key, you can already run:
@@ -144,9 +153,11 @@ Or from your machine (stream the script): use the same pattern as in [PROXMOX-LX
**After deployment:** **After deployment:**
- **Dashboard:** http://&lt;LXC-IP&gt;:5000 - **Dashboard:** http://&lt;LXC-IP&gt;:5000 (WAN). If you set `DEPLOY_LXC_LAN_BRIDGE`, also **http://&lt;LAN-IP&gt;:5000** (e.g. http://10.20.50.1:5000) from the LAN.
- **Golden image path (host and LXC):** `/var/lib/cm4-provisioning/golden.img` - **Golden image path (host and LXC):** `/var/lib/cm4-provisioning/golden.img`
- **Disable auto-flash:** `ssh root@YOUR_PROXMOX_HOST "rm /etc/cm4-provisioning/enabled"` - **Disable auto-flash:** `ssh root@YOUR_PROXMOX_HOST "rm /etc/cm4-provisioning/enabled"`
- **Enable again:** `ssh root@YOUR_PROXMOX_HOST "touch /etc/cm4-provisioning/enabled"` - **Enable again:** `ssh root@YOUR_PROXMOX_HOST "touch /etc/cm4-provisioning/enabled"`
**If you see "rpiboot failed or no device connected":** The error is from the **Proxmox host** (where USB is connected). On the host run: `tail -50 /var/lib/cm4-provisioning/flash.log` to see the real rpiboot message. Ensure the reTerminal is in **boot mode** (eMMC disable jumper, USB slave port), then unplug/replug. See [PROXMOX-LXC-DEPLOYMENT.md](PROXMOX-LXC-DEPLOYMENT.md) § "If rpiboot fails" for full steps.
Full reference: [PROXMOX-LXC-DEPLOYMENT.md](PROXMOX-LXC-DEPLOYMENT.md). Full reference: [PROXMOX-LXC-DEPLOYMENT.md](PROXMOX-LXC-DEPLOYMENT.md).

View File

@@ -0,0 +1,90 @@
# Device DNS from DHCP and /etc/resolv.conf
This document describes how to configure provisioned devices (e.g. Raspberry Pi / reTerminal) so they **use DNS from DHCP** and do **not** have a fixed nameserver in `/etc/resolv.conf`. That way the LXCs dnsmasq (option 6) is used, **file.server** resolves, and scripts can use `http://file.server/...` without hardcoding IPs.
## Summary of changes (what we did on the Pi)
1. **Do not overwrite `/etc/resolv.conf`**
No script (e.g. bootstrap or first-boot) should write a fixed nameserver (e.g. `8.8.8.8`) into `/etc/resolv.conf`. DNS should come from DHCP.
2. **LXC sends DHCP option 6 (DNS server)**
dnsmasq on the LXC must send the LXCs eth1 IP as the DNS server so clients use it and get **file.server** resolution. See [DNSMASQ-DNS-FILESERVER.md](DNSMASQ-DNS-FILESERVER.md) and `scripts/setup-network-boot-on-lxc.sh` (`dhcp-option=6,${LAN_GW}`).
3. **Let NetworkManager manage `/etc/resolv.conf`**
So that the nameserver in `/etc/resolv.conf` is the one from DHCP (option 6), either:
- **Option A (recommended for full cloud-init):** Use **systemd-resolved** and make `/etc/resolv.conf` a symlink to the resolved stub; resolved gets DNS from NetworkManager.
- **Option B (minimal cloud-init):** Use **NetworkManager** to manage `/etc/resolv.conf` via a symlink: add `rc-manager=symlink` in NetworkManager config so `/etc/resolv.conf` points to `/run/NetworkManager/resolv.conf`, which NM fills with the DHCP DNS.
4. **Ensure connection uses DHCP DNS**
The NetworkManager connection should have `ipv4.ignore-auto-dns: no` (default) so it accepts option 6 from DHCP. No fixed `ipv4.dns` in the connection.
## What to change in cloud-init
### Option A: user-data.bootstrap (systemd-resolved)
**File:** `cloud-init/user-data.bootstrap`
- **manage_resolv_conf: false** — already set; cloud-init must not overwrite resolv.conf.
- **systemd-resolved** — runcmd enables/starts resolved and makes `/etc/resolv.conf` a symlink to `stub-resolv.conf`. Resolved gets DNS from NetworkManager (and from the hooks in write_files).
- **NetworkManager** — `99-use-resolved.conf` has `dns=systemd-resolved` and `rc-manager=unmanaged` so NM doesnt write resolv.conf; resolved does.
- **Bootstrap script** — must **not** write `nameserver 8.8.8.8` (or any fixed server) into `/etc/resolv.conf`. Our `bootstrap.sh` no longer does that.
No extra changes needed if you use `user-data.bootstrap` as-is; just ensure your bootstrap script does not touch resolv.conf.
### Option B: Minimal user-data (first-boot or bootstrap-only, no systemd-resolved)
If your user-data only runs a remote script (e.g. `first-boot.sh` or `bootstrap.sh`) and does **not** enable systemd-resolved, add the following so the device uses DNS from DHCP and NM manages resolv.conf:
1. **Set in user-data (cloud-config):**
```yaml
manage_resolv_conf: false
```
2. **Add a write_files entry** so NetworkManager manages resolv.conf with the DHCP-provided DNS:
```yaml
write_files:
# ... your other write_files ...
- path: /etc/NetworkManager/conf.d/99-resolv-dhcp.conf
content: |
[main]
rc-manager=symlink
permissions: '0644'
```
3. **In your bootstrap/first-boot script:**
Do **not** write a fixed nameserver to `/etc/resolv.conf` (e.g. remove any line like `echo "nameserver 8.8.8.8" > /etc/resolv.conf`).
4. **Optional runcmd** (if you want a clean state on first boot):
Remove any existing static resolv.conf so NM can create its symlink and write DHCP DNS:
```yaml
runcmd:
- rm -f /etc/resolv.conf
- systemctl restart NetworkManager
# ... then your download and run of bootstrap.sh or first-boot.sh ...
```
After first boot, devices will get DNS from DHCP (LXC option 6), and **file.server** will resolve to the LXCs eth1 IP.
## Verification on the device
```bash
# Should show the LXC as nameserver (e.g. 10.20.40.1), not 8.8.8.8
cat /etc/resolv.conf
# Should resolve to LXC eth1
getent hosts file.server
```
## Reference: manual fix on an already-provisioned device
If a device was provisioned before these changes and still has a fixed DNS (e.g. 8.8.8.8):
1. **LXC:** Ensure dnsmasq sends option 6 (see [DNSMASQ-DNS-FILESERVER.md](DNSMASQ-DNS-FILESERVER.md)); re-run `setup-network-boot-on-lxc.sh` if needed.
2. **On the device:**
- Add NetworkManager config:
`echo -e '[main]\nrc-manager=symlink' | sudo tee /etc/NetworkManager/conf.d/99-resolv-dhcp.conf`
- Remove static resolv.conf and restart NM:
`sudo rm -f /etc/resolv.conf && sudo systemctl restart NetworkManager`
- Renew DHCP so the device gets option 6:
`sudo nmcli con down "Wired connection 1"; sudo nmcli con up "Wired connection 1"`
3. Check: `cat /etc/resolv.conf` and `getent hosts file.server`.

View File

@@ -0,0 +1,101 @@
# dnsmasq DNS and file.server on the LXC
This document describes the dnsmasq DNS configuration on the provisioning LXC and the static hostname **file.server** used for the fileserver.
## What was changed
### 1. dnsmasq now provides DNS on eth1
Previously, dnsmasq on the LXC was configured with **`port=0`**, which disabled DNS and provided only DHCP and TFTP on the provisioning interface (eth1).
**Change:** `port=0` was removed so dnsmasq also acts as a DNS server on eth1 (port 53). Clients that receive DHCP from dnsmasq will use the LXC as their DNS server for the provisioning LAN.
- **DHCP** on eth1 — unchanged (range from `lan-subnet.conf`, e.g. 10.20.40.10010.20.40.200).
- **TFTP/PXE** on eth1 — unchanged (toggle with `/opt/cm4-provisioning/toggle-network-boot-dhcp.sh`).
- **DNS** on eth1 — **new**: local static records (e.g. `file.server`) plus forwarding of other queries via the LXCs `/etc/resolv.conf`.
### 2. Static DNS record: file.server → eth1 IP
A static A record was added so the hostname **file.server** resolves to the LXCs eth1 address (the provisioning LAN gateway). That IP is taken from **`/opt/cm4-provisioning/lan-subnet.conf`** as **`LAN_GW`** (e.g. `10.20.40.1`).
**dnsmasq config (written by `setup-network-boot-on-lxc.sh`):**
```text
address=/file.server/${LAN_GW}
```
So scripts and devices on the provisioning LAN can use **`http://file.server/...`** (or `file.server` in general) without hardcoding the LXCs IP. The IP stays correct even if the LAN subnet is changed and the setup script is re-run.
### 3. Files modified in the repo
| File | Change |
|------|--------|
| **scripts/setup-network-boot-on-lxc.sh** | Removed `port=0`; added `address=/file.server/${LAN_GW}` and comments in the generated `/etc/dnsmasq.d/network-boot.conf`. |
| **lxc/dnsmasq-network-boot.conf** | Template updated: removed `port=0`, added comment for DNS and `file.server` (commented example). |
### 4. Applied on LXC (root@10.20.40.1)
On **2025-03-04** the setup script was run against **root@10.20.40.1**:
```bash
./emmc-provisioning/scripts/setup-network-boot-on-lxc.sh root@10.20.40.1
```
Result on that LXC:
- **LAN:** 10.20.40.0/24, gateway 10.20.40.1 (from existing `lan-subnet.conf`).
- **DHCP:** 10.20.40.10010.20.40.200 on eth1.
- **DNS:** Enabled on eth1; **file.server****10.20.40.1**.
- dnsmasq and NAT were (re)configured; TFTP root and network boot toggle unchanged.
So on the provisioning LAN, **file.server** resolves to **10.20.40.1** (the LXCs eth1).
## How to use file.server in scripts
On devices that get DHCP (and thus DNS) from the LXC on the provisioning LAN:
- Use **`http://file.server/...`** (or `file.server` as hostname) instead of `http://10.20.40.1/...`.
- No need to hardcode the LXC IP; if you change the subnet and re-run the setup script, **file.server** will still point at the correct gateway.
Example:
```bash
curl -O http://file.server/cloud-init/seed.img
```
## Adding more static DNS entries
To add more names (e.g. `fileserver` or another hostname), add more **`address=/name/${LAN_GW}`** lines in the heredoc in **scripts/setup-network-boot-on-lxc.sh** (where `network-boot.conf` is generated), or add a separate file under `/etc/dnsmasq.d/` on the LXC with the same format. Then restart dnsmasq:
```bash
systemctl restart dnsmasq
```
## Re-applying on another or existing LXC
To apply or refresh this configuration on any LXC:
```bash
./emmc-provisioning/scripts/setup-network-boot-on-lxc.sh root@<LXC-IP> [SUBNET]
```
Example with explicit subnet:
```bash
./emmc-provisioning/scripts/setup-network-boot-on-lxc.sh root@10.20.40.1 10.20.40.1/24
```
This rewrites `/etc/dnsmasq.d/network-boot.conf` (including `address=/file.server/${LAN_GW}`) and restarts dnsmasq.
## Verification on the LXC
```bash
# DNS and file.server
grep -E 'address=|port=' /etc/dnsmasq.d/network-boot.conf
# Resolve file.server (from a client on the provisioning LAN, or from LXC with server 127.0.0.1)
getent hosts file.server
# or: dig @10.20.40.1 file.server
```
Expected: **file.server** resolves to the LAN gateway (e.g. 10.20.40.1).

View File

@@ -18,8 +18,9 @@ Devices plugged into the same network as **eth1** (e.g. reTerminals with network
## What you need on the LXC ## What you need on the LXC
1. **DHCP server** on eth1 only (e.g. **dnsmasq**), handing out addresses in e.g. `10.20.50.100``10.20.50.200` and advertising the TFTP server (next-server = LXCs eth1 IP). 1. **DHCP server** on eth1 only (e.g. **dnsmasq**), handing out addresses in e.g. `10.20.50.100``10.20.50.200` and advertising the TFTP server (next-server = LXCs eth1 IP).
2. **TFTP server** (dnsmasq can provide this) with **TFTP root** containing Raspberry Pi 4 / CM4 boot files. 2. **DNS server** on eth1 (dnsmasq): static name **file.server** → eth1 IP so scripts can use `http://file.server/...`; other queries forwarded upstream. See [DNSMASQ-DNS-FILESERVER.md](DNSMASQ-DNS-FILESERVER.md).
3. **IP forwarding** and **NAT** (nftables or iptables) so traffic from `10.20.50.0/24` is masqueraded out **eth0**. 3. **TFTP server** (dnsmasq can provide this) with **TFTP root** containing Raspberry Pi 4 / CM4 boot files.
4. **IP forwarding** and **NAT** (nftables or iptables) so traffic from `10.20.50.0/24` is masqueraded out **eth0**.
## One-time setup (inside the LXC) ## One-time setup (inside the LXC)
@@ -40,8 +41,8 @@ bash /path/to/setup-network-boot-on-lxc.sh
The script will: The script will:
- Install **dnsmasq** (DHCP + TFTP). - Install **dnsmasq** (DHCP + TFTP + DNS).
- Configure dnsmasq to listen only on **eth1**, with a DHCP range and TFTP root. - Configure dnsmasq to listen only on **eth1**, with a DHCP range, TFTP root, and DNS (including **file.server** → eth1).
- Create `/srv/tftpboot` and **fetch Raspberry Pi 4 boot files from GitHub** (raspberrypi/firmware, `boot/` folder) if not already present. - Create `/srv/tftpboot` and **fetch Raspberry Pi 4 boot files from GitHub** (raspberrypi/firmware, `boot/` folder) if not already present.
- Enable **IPv4 forwarding** and **NAT** (nftables) so clients on eth1 use eth0 for internet. - Enable **IPv4 forwarding** and **NAT** (nftables) so clients on eth1 use eth0 for internet.
- Enable and start the **dnsmasq** service. - Enable and start the **dnsmasq** service.

View File

@@ -0,0 +1,63 @@
# Proxmox host comparison: working vs new
Quick reference from comparing **working** (10.130.60.224) and **new** (100.106.128.36) Proxmox hosts.
## Same on both
| Item | Status |
|------|--------|
| `/opt/cm4-provisioning/env` | Same (GOLDEN_IMAGE, RPIBOOT_DIR, EMMC_SIZE_BYTES=8589934592, SHRINK_BACKUP, etc.) |
| `/etc/cm4-provisioning/enabled` | Present (provisioning enabled) |
| udev rules `89-cm4-boot-mode-permissions.rules` | Same (MODE="0666" for 2b8e, 0a5c:2711) |
| udev rules `90-cm4-boot-mode.rules` | Same (trigger cm4-flash-trigger.sh on add) |
| `/usr/local/bin/cm4-flash-trigger.sh` | Same (starts cm4-flash.service) |
| `cm4-flash.service` | Same (ExecStartPre=sleep 5, TimeoutStartSec=7200) |
| `cm4-build-cloudinit.path`, `cm4-shrink.path` + services | Same |
| `build-cloudinit-image.sh`, `run-shrink-on-host.sh`, `fix-gadget-bootcode-on-host.sh` | Same (md5 match) |
## Differences
### 1. `/opt/usbboot/mass-storage-gadget64/bootcode4.bin` — **fixed**
- **Working:** Present (105984 bytes). rpiboot needs this or it prints "No 'bootcode' files found".
- **New:** Was missing; fixed by copying from working host. Verify: `ls -la /opt/usbboot/mass-storage-gadget64/bootcode4.bin` on new.
### 2. `/opt/cm4-provisioning/flash-emmc-on-connect.sh`
- **Working:** Older version (14421 bytes, md5 `dbac0bc2...`).
- **New:** Newer version (15449 bytes, md5 `6081bda7...`) with longer device wait (60s/90s), relaxed size check (50120%), and extra diagnostics on "No suitable block device".
**Recommendation:** Copy the repo script to the **working** host so both use the same version:
```bash
scp emmc-provisioning/host/flash-emmc-on-connect.sh root@10.130.60.224:/opt/cm4-provisioning/
```
### 3. `/var/lib/cm4-provisioning/golden.img`
- **Working:** Present (symlink to a backup image). Required for **Deploy** (writing image to device).
- **New:** Missing. Backup works; Deploy will show "Golden image not found" until you set one (dashboard “Set as golden” or `scp` an image to the host as `golden.img`).
### 4. `/opt/cm4-provisioning/dashboard/` (host only)
- **Working:** Directory exists (owner 1000:1000). Dashboard normally runs in the **LXC**, so this may be leftover or unused on the host.
- **New:** No dashboard dir on host. No action needed unless you run the dashboard on the host.
### 5. Extra files in `/var/lib/cm4-provisioning/` on working
- **Working:** `build_cloudinit_status.json`, `cloudinit_templates.json`, `first_boot_status.json` (written by dashboard/LXC when using cloud-init build).
- **New:** Not present yet. They appear when you use the dashboard from the LXC; not required for flash/backup.
## Checklist for new host (100.106.128.36)
- [x] `bootcode4.bin` in `/opt/usbboot/mass-storage-gadget64/` (copied from working)
- [x] Same udev rules, trigger, and systemd units
- [x] Same env. `EMMC_SIZE_BYTES` is optional; device detection is dynamic (any new block device after rpiboot is accepted for 8/16/32 GB CM4).
- [ ] Set `golden.img` for Deploy (copy image or use dashboard “Set as golden” from a backup)
- [x] `flash-emmc-on-connect.sh` is the updated version (longer wait, diagnostics)
## If flash still fails on new host
1. Check flash.log: `ssh root@100.106.128.36 'tail -80 /var/lib/cm4-provisioning/flash.log'` — the script now logs "Current block devices" and sizes when no device is found.
2. Ensure eMMC disable jumper is set and you use the USB slave port; unplug and replug.
3. Optional: increase udev/systemd delay (e.g. `ExecStartPre=/bin/sleep 10` in `cm4-flash.service`) if the device is slow to enumerate.

View File

@@ -210,7 +210,50 @@ Or copy `scripts/monitor-from-host.sh` to the host and run `./monitor-from-host.
2. **Unplug and replug the USB** udev runs the trigger only when the device is *added*. Unplug the reTerminal USB (keep it in boot mode), then plug it back in. The trigger will run the script and rpiboot; when the eMMC is exposed, the portal shows "Device connected" with Backup/Deploy. 2. **Unplug and replug the USB** udev runs the trigger only when the device is *added*. Unplug the reTerminal USB (keep it in boot mode), then plug it back in. The trigger will run the script and rpiboot; when the eMMC is exposed, the portal shows "Device connected" with Backup/Deploy.
3. **If rpiboot fails** Check on the host: `ssh root@10.130.60.224 'tail -30 /var/lib/cm4-provisioning/flash.log'` (rpiboot stderr is appended there). Try unplug/replug again. To see the exact rpiboot error: `ssh root@10.130.60.224 '/opt/usbboot/rpiboot -d /opt/usbboot/mass-storage-gadget64'` (device connected; Ctrl+C to stop). Run `scripts/monitor-from-host.sh` for a full snapshot. 3. **If rpiboot fails** ("rpiboot failed or no device connected"):
- **Check flash.log on the host** (rpiboot runs there; the log has the real error):
`ssh root@YOUR_PROXMOX_HOST 'tail -50 /var/lib/cm4-provisioning/flash.log'`
- **Quick diagnostic:**
`ssh root@YOUR_PROXMOX_HOST 'bash -s' < emmc-provisioning/scripts/check-usb-on-host.sh`
This shows whether the device is seen (lsusb), status, and last lines of flash.log.
- **Ensure device is in boot mode:** eMMC disable jumper set, use the **USB slave** port (not host port). Unplug and replug after setting jumper.
- **Run rpiboot manually** (device connected; Ctrl+C to stop):
`ssh root@YOUR_PROXMOX_HOST '/opt/usbboot/rpiboot -v -d /opt/usbboot/mass-storage-gadget64'`
The last line before exit is usually the error (e.g. "No device found", "Unable to open device").
- Run `scripts/monitor-from-host.sh root@YOUR_PROXMOX_HOST` for a full snapshot.
**Manual usbboot test** (to verify rpiboot and the CM4 without the full flash flow):
1. **On the Proxmox host** (where USB is connected), with the reTerminal in **boot mode** (eMMC disable jumper set, USB slave port connected):
```bash
/opt/usbboot/rpiboot -v -d /opt/usbboot/mass-storage-gadget64
```
2. You should see lines like: `Device located successfully`, `Loading: .../bootcode4.bin`, `Sending bootcode.bin`. When the device switches to mass storage, rpiboot exits and a new `/dev/sdX` may appear (check with `lsblk` in another terminal). Press **Ctrl+C** to stop rpiboot at any time.
3. **From your machine** (with device already connected to the host):
```bash
./emmc-provisioning/scripts/test-usbboot-on-host.sh root@YOUR_PROXMOX_HOST
```
Optional: run with a timeout so it doesnt wait forever: `TIMEOUT=60 ./emmc-provisioning/scripts/test-usbboot-on-host.sh root@YOUR_PROXMOX_HOST`
**"libusb_bulk_transfer sent 0 bytes; returned -7" / "Failed to write correct length"** (device found, then transfer fails in a loop):
This is a known USB timing/controller issue with CM4 and rpiboot ([raspberrypi/usbboot#36](https://github.com/raspberrypi/usbboot/issues/36)). Try in order:
1. **Use a USB 2.0 port** on the Proxmox host (not USB 3.0). Many reports say USB 2.0 is more reliable for rpiboot.
2. **Avoid udev fighting with manual rpiboot:** Temporarily disable the provisioning trigger so only your manual rpiboot runs:
`ssh root@HOST 'mv /etc/cm4-provisioning/enabled /etc/cm4-provisioning/enabled.bak'`
Run rpiboot, then re-enable:
`ssh root@HOST 'mv /etc/cm4-provisioning/enabled.bak /etc/cm4-provisioning/enabled'`
3. **Try a different USB port and cable** on the host; unplug/replug and retry.
4. **Add a delay** so the device is ready before transfer:
`rpiboot -v -d /opt/usbboot/mass-storage-gadget64 -m 2000`
(`-m` is microseconds between device checks; 2000 = 2 ms.)
5. **Reboot the Proxmox host** and try again (USB controller state can get stuck).
6. **Find a USB 2.0 port:** On the host run `lsusb -t` — look for the BCM2711 device; the tree shows which controller (e.g. "xHCI" = USB 3, "ehci" = USB 2). Try a port that is under an **ehci** or **ohci** controller, or a black (nonblue) physical port.
7. **Try a powered USB 2.0 hub** between host and reTerminal (some hosts work only through a hub).
8. If you have the **working host** (10.130.60.224), try the same reTerminal there; if it works there, the difference is host USB controller or port.
3b. **"No suitable block device after rpiboot"** rpiboot ran but no new block device was seen. Detection is **dynamic**: any block device that appears after rpiboot (not present before) is used, so 8/16/32 GB CM4 work without setting eMMC size. **Check on host:** `tail -80 /var/lib/cm4-provisioning/flash.log` — at the end youll see "Current block devices" and each `/dev/sdX` with size. **Causes:** (1) Device didnt switch to mass storage (try unplug/replug, keep eMMC disable jumper set). (2) udev slow — try again; the script waits up to 90s for the device.
4. **"No 'bootcode' files found in mass-storage-gadget64"** Usually because `bootfiles.bin` is a **broken symlink** (e.g. `-> ../firmware/bootfiles.bin`) and that target doesnt exist. **Fix on host:** run `scripts/fix-gadget-bootcode-on-host.sh` on the host (it removes the symlink and extracts `bootcode4.bin` from the installed rpiboot binary). From your machine: `ssh root@10.130.60.224 'bash -s' < scripts/fix-gadget-bootcode-on-host.sh`. **Alternative:** repopulate the gadget dir with `./scripts/populate-gadget-on-host.sh root@10.130.60.224`, or full reinstall with `./scripts/build-and-deploy-usbboot-to-host.sh root@10.130.60.224`. Then verify: `ls -la /opt/usbboot/mass-storage-gadget64/` (should list a real `bootcode4.bin` or `bootfiles.bin`, plus `boot.img`, `config.txt`). 4. **"No 'bootcode' files found in mass-storage-gadget64"** Usually because `bootfiles.bin` is a **broken symlink** (e.g. `-> ../firmware/bootfiles.bin`) and that target doesnt exist. **Fix on host:** run `scripts/fix-gadget-bootcode-on-host.sh` on the host (it removes the symlink and extracts `bootcode4.bin` from the installed rpiboot binary). From your machine: `ssh root@10.130.60.224 'bash -s' < scripts/fix-gadget-bootcode-on-host.sh`. **Alternative:** repopulate the gadget dir with `./scripts/populate-gadget-on-host.sh root@10.130.60.224`, or full reinstall with `./scripts/build-and-deploy-usbboot-to-host.sh root@10.130.60.224`. Then verify: `ls -la /opt/usbboot/mass-storage-gadget64/` (should list a real `bootcode4.bin` or `bootfiles.bin`, plus `boot.img`, `config.txt`).

View File

@@ -17,8 +17,8 @@ exec >> "$LOG_FILE" 2>&1
# Configuration - adjust paths and size for your setup # Configuration - adjust paths and size for your setup
RPIBOOT_DIR="${RPIBOOT_DIR:-/opt/usbboot}" RPIBOOT_DIR="${RPIBOOT_DIR:-/opt/usbboot}"
GOLDEN_IMAGE="${GOLDEN_IMAGE:-/var/lib/cm4-provisioning/golden.img}" GOLDEN_IMAGE="${GOLDEN_IMAGE:-/var/lib/cm4-provisioning/golden.img}"
# Expected eMMC size in bytes. reTerminal DM (CM4) has 32 GB eMMC (~31268536320 bytes). # Expected eMMC size in bytes (optional). If set, used to prefer among multiple new devices; if unset, any new block device after rpiboot is accepted (works for 8/16/32 GB CM4).
EMMC_SIZE_BYTES="${EMMC_SIZE_BYTES:-$(( 32 * 1024 * 1024 * 1024 ))}" EMMC_SIZE_BYTES="${EMMC_SIZE_BYTES:-}"
LOG_TAG="cm4-flash" LOG_TAG="cm4-flash"
STATUS_FILE="${STATUS_FILE:-/var/lib/cm4-provisioning/status.json}" STATUS_FILE="${STATUS_FILE:-/var/lib/cm4-provisioning/status.json}"
LOG_FILE="${LOG_FILE:-/var/lib/cm4-provisioning/flash.log}" LOG_FILE="${LOG_FILE:-/var/lib/cm4-provisioning/flash.log}"
@@ -51,7 +51,7 @@ trap 'rm -f "$LOCK_FILE" "$CURRENT_DEVICE_FILE" "$DEVICE_SOURCE_FILE" 2>/dev/nul
ENABLE_FILE="${ENABLE_FILE:-/etc/cm4-provisioning/enabled}" ENABLE_FILE="${ENABLE_FILE:-/etc/cm4-provisioning/enabled}"
if [[ -n "$ENABLE_FILE" && ! -f "$ENABLE_FILE" ]]; then if [[ -n "$ENABLE_FILE" && ! -f "$ENABLE_FILE" ]]; then
log "Skipping: $ENABLE_FILE not present" log "Skipping: $ENABLE_FILE not present"
write_status "idle" "Provisioning disabled (remove /etc/cm4-provisioning/enabled to enable)" "null" 2>/dev/null || true write_status "idle" "Provisioning disabled (touch /etc/cm4-provisioning/enabled to enable)" "null" 2>/dev/null || true
exit 0 exit 0
fi fi
@@ -90,8 +90,8 @@ if [[ -z "$RPIBOOT_GADGET" ]]; then
write_status "error" "rpiboot gadget missing" "null" "Copy mass-storage-gadget(64) to $RPIBOOT_DIR" write_status "error" "rpiboot gadget missing" "null" "Copy mass-storage-gadget(64) to $RPIBOOT_DIR"
exit 1 exit 1
fi fi
# rpiboot requires bootfiles.bin or one of bootcode*.bin in the gadget dir; empty dir causes "No 'bootcode' files found" # rpiboot requires bootfiles.bin, bootcode*.bin, or boot.img in the gadget dir; empty dir causes "No 'bootcode' files found"
if [[ ! -f "$RPIBOOT_GADGET/bootfiles.bin" && ! -f "$RPIBOOT_GADGET/bootcode.bin" && ! -f "$RPIBOOT_GADGET/bootcode4.bin" && ! -f "$RPIBOOT_GADGET/bootcode5.bin" ]]; then if [[ ! -f "$RPIBOOT_GADGET/bootfiles.bin" && ! -f "$RPIBOOT_GADGET/bootcode.bin" && ! -f "$RPIBOOT_GADGET/bootcode4.bin" && ! -f "$RPIBOOT_GADGET/bootcode5.bin" && ! -f "$RPIBOOT_GADGET/boot.img" ]]; then
log "rpiboot gadget dir has no boot files: $RPIBOOT_GADGET (reinstall usbboot)" log "rpiboot gadget dir has no boot files: $RPIBOOT_GADGET (reinstall usbboot)"
write_status "error" "rpiboot gadget empty" "null" "No boot files in $RPIBOOT_GADGET. On the host run: fix-gadget-bootcode-on-host.sh (or from your machine: ssh root@HOST 'bash -s' < scripts/fix-gadget-bootcode-on-host.sh). See docs troubleshooting." write_status "error" "rpiboot gadget empty" "null" "No boot files in $RPIBOOT_GADGET. On the host run: fix-gadget-bootcode-on-host.sh (or from your machine: ssh root@HOST 'bash -s' < scripts/fix-gadget-bootcode-on-host.sh). See docs troubleshooting."
exit 1 exit 1
@@ -104,51 +104,86 @@ write_status "rpiboot" "Connecting to CM4 in boot mode…" "0"
# Block devices before rpiboot (so we can detect new one after) # Block devices before rpiboot (so we can detect new one after)
before_devs=$(lsblk -nd -o NAME 2>/dev/null | sort) before_devs=$(lsblk -nd -o NAME 2>/dev/null | sort)
log "Starting rpiboot to expose CM4 eMMC as mass storage..." log "Starting rpiboot to expose CM4 eMMC as mass storage (gadget: $RPIBOOT_GADGET)..."
# Run rpiboot with 90s timeout so we don't hang if it doesn't exit cleanly when device switches to mass storage # Run rpiboot with 180s timeout so device has time to receive bootcode and switch to mass storage; -v for verbose
rpiboot_exit=0 rpiboot_exit=0
timeout 90 "$RPIBOOT_BIN" -d "$RPIBOOT_GADGET" || rpiboot_exit=$? timeout 180 "$RPIBOOT_BIN" -v -d "$RPIBOOT_GADGET" || rpiboot_exit=$?
# timeout returns 124 if killed by timeout; 0 or other if rpiboot exited on its own # timeout returns 124 if killed by timeout; 0 or other if rpiboot exited on its own
if [[ "$rpiboot_exit" -eq 124 ]]; then if [[ "$rpiboot_exit" -eq 124 ]]; then
log "rpiboot timed out after 90s (device may have switched to mass storage)" log "rpiboot timed out after 180s (device may have switched to mass storage)"
elif [[ "$rpiboot_exit" -ne 0 ]]; then elif [[ "$rpiboot_exit" -ne 0 ]]; then
log "rpiboot exited with code $rpiboot_exit" log "rpiboot exited with code $rpiboot_exit"
log "Common causes: (1) No device in USB boot mode — set eMMC disable jumper and use USB slave port. (2) Wrong USB port or cable. (3) Run on host: tail -50 /var/lib/cm4-provisioning/flash.log"
write_status "error" "rpiboot failed" "null" "rpiboot failed or no device connected. Check flash.log on host. Try unplug/replug USB." write_status "error" "rpiboot failed" "null" "rpiboot failed or no device connected. Check flash.log on host. Try unplug/replug USB."
exit 1 exit 1
fi fi
echo "[$(date -Iseconds)] rpiboot finished (exit=$rpiboot_exit); starting device scan" echo "[$(date -Iseconds)] rpiboot finished (exit=$rpiboot_exit); starting device scan"
log "rpiboot completed; waiting for block device..." log "rpiboot completed; waiting for new block device (any size — 8/16/32 GB CM4 supported)..."
write_status "rpiboot" "rpiboot done, waiting for block device…" "10" write_status "rpiboot" "rpiboot done, waiting for block device…" "10"
# rpiboot exits when device switches to mass storage; udev may need several seconds to create /dev/sdX # rpiboot exits when device switches to mass storage; udev may need many seconds to create /dev/sdX
# Poll for new block device for up to 30s (device switch can be slow) # Dynamic detection: accept any NEW block device (not present before rpiboot). No fixed eMMC size required.
# When rpiboot timed out (124), device may still be switching — wait longer in that case
max_wait=60
[[ "$rpiboot_exit" -eq 124 ]] && max_wait=90
target_dev="" target_dev=""
for wait_sec in $(seq 2 2 10) $(seq 12 2 30); do new_devs=""
for wait_sec in $(seq 2 2 20) $(seq 22 2 $max_wait); do
sleep 2 sleep 2
new_devs=""
for dev in /dev/sd[a-z] /dev/sd[a-z][a-z]; do for dev in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
[[ -b "$dev" ]] || continue [[ -b "$dev" ]] || continue
[[ "$dev" =~ [0-9]$ ]] && continue [[ "$dev" =~ [0-9]$ ]] && continue
# Only consider devices that appeared after rpiboot (the CM4 eMMC)
if [[ "$before_devs" != *"${dev#/dev/}"* ]]; then
size=$(blockdev --getsize64 "$dev" 2>/dev/null || true) size=$(blockdev --getsize64 "$dev" 2>/dev/null || true)
if [[ -n "$size" ]]; then [[ -n "$size" ]] && new_devs="$new_devs $dev:$size"
if (( size >= EMMC_SIZE_BYTES * 95 / 100 && size <= EMMC_SIZE_BYTES * 105 / 100 )); then
target_dev=$dev
break 2
fi
if [[ -z "$target_dev" && "$before_devs" != *"${dev#/dev/}"* ]]; then
target_dev=$dev
fi
fi fi
done done
[[ -n "$target_dev" ]] && break new_devs="${new_devs# }"
if [[ -n "$new_devs" ]]; then
# One new device: use it (dynamic — works for any eMMC size)
# Multiple new devices: prefer one matching EMMC_SIZE_BYTES if set, else largest
if [[ "$new_devs" != *" "* ]]; then
target_dev="${new_devs%%:*}"
break
else
best_dev="" best_size=0 best_delta=999999999999
for entry in $new_devs; do
dev="${entry%%:*}"
size="${entry##*:}"
if [[ -z "$EMMC_SIZE_BYTES" || "$EMMC_SIZE_BYTES" -eq 0 ]]; then
# No size hint: take largest new device
[[ "$size" -gt "$best_size" ]] && { best_dev="$dev"; best_size="$size"; }
else
delta=$(( size - EMMC_SIZE_BYTES )); [[ "$delta" -lt 0 ]] && delta=$(( -delta ))
[[ "$delta" -lt "$best_delta" ]] && { best_dev="$dev"; best_delta="$delta"; }
fi
done
[[ -n "$best_dev" ]] && target_dev="$best_dev" && break
fi
fi
log "Waiting for block device... ${wait_sec}s" log "Waiting for block device... ${wait_sec}s"
write_status "rpiboot" "Waiting for eMMC block device… (${wait_sec}s)" "10" write_status "rpiboot" "Waiting for eMMC block device… (${wait_sec}s)" "10"
done done
log "Device scan complete. before_devs=[$before_devs] target_dev=[$target_dev]" log "Device scan complete. before_devs=[$before_devs] target_dev=[$target_dev]"
if [[ -n "$target_dev" ]]; then
detected_size=$(blockdev --getsize64 "$target_dev" 2>/dev/null || true)
log "Using $target_dev (size=${detected_size:-?} bytes, $(( ${detected_size:-0} / 1024 / 1024 / 1024 )) GB)"
fi
if [[ -z "$target_dev" ]]; then if [[ -z "$target_dev" ]]; then
log "No suitable block device found after rpiboot (expected ~${EMMC_SIZE_BYTES} bytes)" log "No new block device found after rpiboot"
write_status "error" "No eMMC device found" "null" "No suitable block device after rpiboot" log "Current block devices (for debugging):"
lsblk -nd -o NAME,SIZE,TYPE 2>/dev/null | while read -r line; do log " $line"; done
for d in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
[[ -b "$d" ]] || continue
[[ "$d" =~ [0-9]$ ]] && continue
s=$(blockdev --getsize64 "$d" 2>/dev/null || true)
log " $d size=$s ($((${s:-0} / 1024 / 1024 / 1024)) GB)"
done
write_status "error" "No eMMC device found" "null" "No suitable block device after rpiboot. Check flash.log on host; unplug/replug and ensure eMMC disable jumper is set."
exit 1 exit 1
fi fi

View File

@@ -1,8 +1,8 @@
# dnsmasq: DHCP + TFTP on eth1 only (provisioning LAN). # dnsmasq: DHCP + TFTP + DNS on eth1 only (provisioning LAN).
# Install to /etc/dnsmasq.d/network-boot.conf on the LXC. # Install to /etc/dnsmasq.d/network-boot.conf on the LXC.
# Restrict to eth1 so we don't interfere with host/other DHCP. # Restrict to eth1 so we don't interfere with host/other DHCP.
# When using setup-network-boot-on-lxc.sh, the actual subnet and DHCP range # When using setup-network-boot-on-lxc.sh, the actual subnet, DHCP range, and
# come from /opt/cm4-provisioning/lan-subnet.conf (written by deploy-to-proxmox.sh). # file.server address come from /opt/cm4-provisioning/lan-subnet.conf (written by deploy-to-proxmox.sh).
# Listen only on eth1 (provisioning LAN) # Listen only on eth1 (provisioning LAN)
interface=eth1 interface=eth1
@@ -11,6 +11,9 @@ bind-interfaces
# DHCP range for devices on eth1 (template; setup script uses lan-subnet.conf) # DHCP range for devices on eth1 (template; setup script uses lan-subnet.conf)
dhcp-range=10.20.50.100,10.20.50.200,12h dhcp-range=10.20.50.100,10.20.50.200,12h
# DNS: file.server -> eth1 IP (LAN_GW) so scripts can use http://file.server/... (setup script writes this)
# address=/file.server/10.20.50.1
# TFTP for Raspberry Pi / CM4 network boot # TFTP for Raspberry Pi / CM4 network boot
enable-tftp enable-tftp
tftp-root=/srv/tftpboot tftp-root=/srv/tftpboot
@@ -19,6 +22,3 @@ tftp-root=/srv/tftpboot
# Logging (optional; disable in production if too noisy) # Logging (optional; disable in production if too noisy)
log-dhcp log-dhcp
log-queries log-queries
# Do not use /etc/resolv.conf or act as DNS if you only want DHCP+TFTP
port=0

View File

@@ -27,6 +27,7 @@
# DEPLOY_LXC_WAN_IP=dhcp — WAN address: dhcp (default) or static e.g. 192.168.1.10/24 # DEPLOY_LXC_WAN_IP=dhcp — WAN address: dhcp (default) or static e.g. 192.168.1.10/24
# DEPLOY_LXC_LAN_BRIDGE=vmbr1 — If set, add eth1 as LAN on this bridge (e.g. provisioning / network-boot) # DEPLOY_LXC_LAN_BRIDGE=vmbr1 — If set, add eth1 as LAN on this bridge (e.g. provisioning / network-boot)
# DEPLOY_LXC_LAN_SUBNET=10.20.50.1/24 — LXC IP on LAN (gateway); used only if DEPLOY_LXC_LAN_BRIDGE is set; default 10.20.50.1/24 # DEPLOY_LXC_LAN_SUBNET=10.20.50.1/24 — LXC IP on LAN (gateway); used only if DEPLOY_LXC_LAN_BRIDGE is set; default 10.20.50.1/24
# DEPLOY_EMMC_SIZE_GB=32 — optional: eMMC size in GB (used only when multiple new devices appear; default 32). Detection is dynamic — single new device is used regardless of size.
# #
# Legacy: DEPLOY_LXC_NET1="name=eth1,bridge=vmbr1,ip=10.20.50.1/24" still works; overridden by DEPLOY_LXC_LAN_BRIDGE + DEPLOY_LXC_LAN_SUBNET if both are set. # Legacy: DEPLOY_LXC_NET1="name=eth1,bridge=vmbr1,ip=10.20.50.1/24" still works; overridden by DEPLOY_LXC_LAN_BRIDGE + DEPLOY_LXC_LAN_SUBNET if both are set.
# #
@@ -150,8 +151,10 @@ rsync -a "$REPO_DIR/" "$PROXMOX:/tmp/emmc-provisioning-deploy/" --exclude='.git'
log "[4/5] Running remote install (host + LXC) ..." log "[4/5] Running remote install (host + LXC) ..."
# Pass optional LXC SSH vars (base64), selected storage, and network (WAN/LAN bridge + subnet) # Pass optional LXC SSH vars (base64), selected storage, network (WAN/LAN), and eMMC size
ssh "$PROXMOX" "ROOTFS_STORAGE='$ROOTFS_STORAGE' CM4_BACKUPS_HOST_PATH='${CM4_BACKUPS_HOST_PATH:-}' DEPLOY_SSH_KEY_B64='${DEPLOY_SSH_KEY_B64:-}' DEPLOY_LXC_PWD_B64='${DEPLOY_LXC_PWD_B64:-}' DEPLOY_LXC_WAN_BRIDGE='${DEPLOY_LXC_WAN_BRIDGE:-}' DEPLOY_LXC_WAN_IP='${DEPLOY_LXC_WAN_IP:-}' DEPLOY_LXC_LAN_BRIDGE='${DEPLOY_LXC_LAN_BRIDGE:-}' DEPLOY_LXC_LAN_SUBNET='${DEPLOY_LXC_LAN_SUBNET:-}' DEPLOY_LXC_NET1='${DEPLOY_LXC_NET1:-}'" bash -s << 'REMOTE' EMMC_GB="${DEPLOY_EMMC_SIZE_GB:-32}"
EMMC_SIZE_BYTES=$(( EMMC_GB * 1024 * 1024 * 1024 ))
ssh "$PROXMOX" "ROOTFS_STORAGE='$ROOTFS_STORAGE' CM4_BACKUPS_HOST_PATH='${CM4_BACKUPS_HOST_PATH:-}' DEPLOY_SSH_KEY_B64='${DEPLOY_SSH_KEY_B64:-}' DEPLOY_LXC_PWD_B64='${DEPLOY_LXC_PWD_B64:-}' DEPLOY_LXC_WAN_BRIDGE='${DEPLOY_LXC_WAN_BRIDGE:-}' DEPLOY_LXC_WAN_IP='${DEPLOY_LXC_WAN_IP:-}' DEPLOY_LXC_LAN_BRIDGE='${DEPLOY_LXC_LAN_BRIDGE:-}' DEPLOY_LXC_LAN_SUBNET='${DEPLOY_LXC_LAN_SUBNET:-}' DEPLOY_LXC_NET1='${DEPLOY_LXC_NET1:-}' EMMC_SIZE_BYTES='$EMMC_SIZE_BYTES' EMMC_GB='$EMMC_GB'" bash -s << 'REMOTE'
set -e set -e
DEPLOY=/tmp/emmc-provisioning-deploy DEPLOY=/tmp/emmc-provisioning-deploy
ROOTFS_STORAGE="${ROOTFS_STORAGE:?ROOTFS_STORAGE not set}" ROOTFS_STORAGE="${ROOTFS_STORAGE:?ROOTFS_STORAGE not set}"
@@ -177,6 +180,7 @@ for id in $(pct list 2>/dev/null | awk 'NR>1 {print $1}'); do
done done
if [[ -n "$CTID" ]]; then if [[ -n "$CTID" ]]; then
log "Found existing LXC $CTID (hostname: $LXC_HOSTNAME)." log "Found existing LXC $CTID (hostname: $LXC_HOSTNAME)."
pct set "$CTID" -nameserver 8.8.8.8
else else
MAX_ID=$(pct list 2>/dev/null | awk 'NR>1 {print $1}' | sort -n | tail -1) MAX_ID=$(pct list 2>/dev/null | awk 'NR>1 {print $1}' | sort -n | tail -1)
[[ -z "$MAX_ID" ]] && MAX_ID=0 [[ -z "$MAX_ID" ]] && MAX_ID=0
@@ -210,9 +214,10 @@ else
--hostname "$LXC_HOSTNAME" --memory 1024 --swap 0 --cores 1 \ --hostname "$LXC_HOSTNAME" --memory 1024 --swap 0 --cores 1 \
--rootfs "${ROOTFS_STORAGE}:8" --net0 name=eth0,bridge="$WAN_BRIDGE",ip="$WAN_IP" $NET1_OPT \ --rootfs "${ROOTFS_STORAGE}:8" --net0 name=eth0,bridge="$WAN_BRIDGE",ip="$WAN_IP" $NET1_OPT \
--unprivileged 0 --features nesting=1 -tag cm4-provisioning --unprivileged 0 --features nesting=1 -tag cm4-provisioning
pct set "$CTID" -nameserver 8.8.8.8
mkdir -p /var/lib/cm4-provisioning mkdir -p /var/lib/cm4-provisioning
pct set "$CTID" -mp0 /var/lib/cm4-provisioning,mp=/var/lib/cm4-provisioning pct set "$CTID" -mp0 /var/lib/cm4-provisioning,mp=/var/lib/cm4-provisioning
log "LXC $CTID created and mount configured." log "LXC $CTID created and mount configured (DNS 8.8.8.8)."
fi fi
# Optional: bind-mount host directory for backup images (skip if already mounted with same path) # Optional: bind-mount host directory for backup images (skip if already mounted with same path)
@@ -265,11 +270,11 @@ cp "$DEPLOY/host/89-cm4-boot-mode-permissions.rules" /etc/udev/rules.d/ 2>/dev/n
cp "$DEPLOY/host/90-cm4-boot-mode.rules" /etc/udev/rules.d/ cp "$DEPLOY/host/90-cm4-boot-mode.rules" /etc/udev/rules.d/
udevadm control --reload-rules 2>/dev/null || true udevadm control --reload-rules 2>/dev/null || true
log "Host: env and dirs ..." log "Host: env and dirs (EMMC ${EMMC_GB:-32}GB = $EMMC_SIZE_BYTES bytes) ..."
cat > /opt/cm4-provisioning/env << 'ENV' cat > /opt/cm4-provisioning/env << ENV
GOLDEN_IMAGE=/var/lib/cm4-provisioning/golden.img GOLDEN_IMAGE=/var/lib/cm4-provisioning/golden.img
RPIBOOT_DIR=/opt/usbboot RPIBOOT_DIR=/opt/usbboot
EMMC_SIZE_BYTES=8589934592 EMMC_SIZE_BYTES=${EMMC_SIZE_BYTES:-34359738368}
ENV ENV
[[ -n "$BACKUPS_HOST_PATH" ]] && echo "BACKUPS_DIR=$BACKUPS_HOST_PATH" >> /opt/cm4-provisioning/env [[ -n "$BACKUPS_HOST_PATH" ]] && echo "BACKUPS_DIR=$BACKUPS_HOST_PATH" >> /opt/cm4-provisioning/env
touch /etc/cm4-provisioning/enabled touch /etc/cm4-provisioning/enabled
@@ -330,7 +335,8 @@ if [[ -n "$LAN_SUBNET_FOR_CONF" ]]; then
DHCP_RANGE_START="${BASE_3}.100" DHCP_RANGE_START="${BASE_3}.100"
DHCP_RANGE_END="${BASE_3}.200" DHCP_RANGE_END="${BASE_3}.200"
pct exec "$CTID" -- bash -c "mkdir -p /opt/cm4-provisioning && echo 'LAN_GW=$LAN_GW' > /opt/cm4-provisioning/lan-subnet.conf && echo 'LAN_CIDR=$LAN_CIDR' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_START=$DHCP_RANGE_START' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_END=$DHCP_RANGE_END' >> /opt/cm4-provisioning/lan-subnet.conf" pct exec "$CTID" -- bash -c "mkdir -p /opt/cm4-provisioning && echo 'LAN_GW=$LAN_GW' > /opt/cm4-provisioning/lan-subnet.conf && echo 'LAN_CIDR=$LAN_CIDR' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_START=$DHCP_RANGE_START' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_END=$DHCP_RANGE_END' >> /opt/cm4-provisioning/lan-subnet.conf"
log "LXC: wrote /opt/cm4-provisioning/lan-subnet.conf (LAN_GW=$LAN_GW, LAN_CIDR=$LAN_CIDR, DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END})" echo "$LAN_GW" > "$DEPLOY/lxc_lan_ip.txt"
log "LXC: wrote /opt/cm4-provisioning/lan-subnet.conf (LAN_GW=$LAN_GW); dashboard will be reachable on LAN at http://${LAN_GW}:5000"
else else
log "Warning: DEPLOY_LXC_LAN_SUBNET=$LAN_SUBNET_FOR_CONF not in form A.B.C.D/PREFIX; skipping lan-subnet.conf" log "Warning: DEPLOY_LXC_LAN_SUBNET=$LAN_SUBNET_FOR_CONF not in form A.B.C.D/PREFIX; skipping lan-subnet.conf"
fi fi
@@ -343,7 +349,7 @@ pct push "$CTID" "$DEPLOY/host/flash-emmc-on-connect.sh" /opt/cm4-provisioning/f
pct exec "$CTID" -- chmod +x /opt/cm4-provisioning/flash-emmc-on-connect.sh pct exec "$CTID" -- chmod +x /opt/cm4-provisioning/flash-emmc-on-connect.sh
pct push "$CTID" "$DEPLOY/host/cm4-flash-trigger.sh" /usr/local/bin/cm4-flash-trigger.sh pct push "$CTID" "$DEPLOY/host/cm4-flash-trigger.sh" /usr/local/bin/cm4-flash-trigger.sh
pct exec "$CTID" -- chmod +x /usr/local/bin/cm4-flash-trigger.sh pct exec "$CTID" -- chmod +x /usr/local/bin/cm4-flash-trigger.sh
pct exec "$CTID" -- bash -c 'echo -e "GOLDEN_IMAGE=/var/lib/cm4-provisioning/golden.img\nRPIBOOT_DIR=/opt/usbboot\nEMMC_SIZE_BYTES=8589934592" > /opt/cm4-provisioning/env' pct exec "$CTID" -- bash -c "echo -e 'GOLDEN_IMAGE=/var/lib/cm4-provisioning/golden.img\nRPIBOOT_DIR=/opt/usbboot\nEMMC_SIZE_BYTES=$EMMC_SIZE_BYTES' > /opt/cm4-provisioning/env"
# --- LXC: dashboard (all files) --- # --- LXC: dashboard (all files) ---
log "LXC: installing dashboard ..." log "LXC: installing dashboard ..."
@@ -352,6 +358,8 @@ pct push "$CTID" "$DEPLOY/dashboard/app.py" /opt/cm4-provisioning/dashboard/app.
pct push "$CTID" "$DEPLOY/dashboard/templates/home.html" /opt/cm4-provisioning/dashboard/templates/home.html pct push "$CTID" "$DEPLOY/dashboard/templates/home.html" /opt/cm4-provisioning/dashboard/templates/home.html
pct push "$CTID" "$DEPLOY/dashboard/templates/login.html" /opt/cm4-provisioning/dashboard/templates/login.html pct push "$CTID" "$DEPLOY/dashboard/templates/login.html" /opt/cm4-provisioning/dashboard/templates/login.html
pct push "$CTID" "$DEPLOY/dashboard/templates/admin.html" /opt/cm4-provisioning/dashboard/templates/admin.html pct push "$CTID" "$DEPLOY/dashboard/templates/admin.html" /opt/cm4-provisioning/dashboard/templates/admin.html
pct push "$CTID" "$DEPLOY/dashboard/templates/portal_files.html" /opt/cm4-provisioning/dashboard/templates/portal_files.html
pct push "$CTID" "$DEPLOY/dashboard/templates/cloudinit_build.html" /opt/cm4-provisioning/dashboard/templates/cloudinit_build.html
pct push "$CTID" "$DEPLOY/dashboard/cm4-dashboard.service" /opt/cm4-provisioning/dashboard/cm4-dashboard.service pct push "$CTID" "$DEPLOY/dashboard/cm4-dashboard.service" /opt/cm4-provisioning/dashboard/cm4-dashboard.service
# Dashboard secret for sessions (create once so logins persist across restarts) # Dashboard secret for sessions (create once so logins persist across restarts)
pct exec "$CTID" -- bash -c '[[ -f /opt/cm4-provisioning/dashboard.env ]] || echo "CM4_DASHBOARD_SECRET_KEY=$(openssl rand -hex 24 2>/dev/null || head -c 24 /dev/urandom | xxd -p)" > /opt/cm4-provisioning/dashboard.env' pct exec "$CTID" -- bash -c '[[ -f /opt/cm4-provisioning/dashboard.env ]] || echo "CM4_DASHBOARD_SECRET_KEY=$(openssl rand -hex 24 2>/dev/null || head -c 24 /dev/urandom | xxd -p)" > /opt/cm4-provisioning/dashboard.env'
@@ -397,14 +405,16 @@ log "Deploy done on remote. LXC ID: $CTID"
# Heredoc terminator (must be at column 1, no leading space/tab) # Heredoc terminator (must be at column 1, no leading space/tab)
REMOTE REMOTE
# Read LXC IP written by remote (container hostname -I) # Read LXC IP and optional LAN IP written by remote
LXC_IP=$(ssh "$PROXMOX" "cat /tmp/emmc-provisioning-deploy/lxc_ip.txt 2>/dev/null" | tr -d '\n\r') LXC_IP=$(ssh "$PROXMOX" "cat /tmp/emmc-provisioning-deploy/lxc_ip.txt 2>/dev/null" | tr -d '\n\r')
LXC_LAN_IP=$(ssh "$PROXMOX" "cat /tmp/emmc-provisioning-deploy/lxc_lan_ip.txt 2>/dev/null" | tr -d '\n\r')
log "[5/5] Deploy finished." log "[5/5] Deploy finished."
echo "" echo ""
echo "=== Deploy complete ===" echo "=== Deploy complete ==="
echo "Host and LXC are fully set up: usbboot (rpiboot), PiShrink, dashboard, systemd, udev." echo "Host and LXC are fully set up: usbboot (rpiboot), PiShrink, dashboard, systemd, udev."
[[ -n "$LXC_IP" ]] && echo " LXC IP: $LXC_IP" [[ -n "$LXC_IP" ]] && echo " LXC IP (WAN): $LXC_IP"
[[ -n "$LXC_LAN_IP" ]] && echo " LXC IP (LAN): $LXC_LAN_IP"
echo "" echo ""
echo "--- Only remaining step (manual) ---" echo "--- Only remaining step (manual) ---"
echo " Add a golden image for Deploy (writing image to device):" echo " Add a golden image for Deploy (writing image to device):"
@@ -413,7 +423,8 @@ echo " • Or copy your image: scp your-image.img $PROXMOX:/var/lib/cm4-provi
echo " Backup (read from device) works without golden.img." echo " Backup (read from device) works without golden.img."
echo "" echo ""
echo "--- You have ---" echo "--- You have ---"
echo " - Dashboard: http://${LXC_IP:-<LXC-IP>}:5000" echo " - Dashboard (WAN): http://${LXC_IP:-<LXC-IP>}:5000"
[[ -n "$LXC_LAN_IP" ]] && echo " - Dashboard (LAN): http://${LXC_LAN_IP}:5000 (use from devices on provisioning LAN)"
[[ -n "${DEPLOY_LXC_ROOT_PASSWORD:-}" || -n "${DEPLOY_SSH_KEY_B64:-}" ]] && [[ -n "$LXC_IP" ]] && echo " - LXC SSH: ssh root@$LXC_IP (password and/or key were set)" [[ -n "${DEPLOY_LXC_ROOT_PASSWORD:-}" || -n "${DEPLOY_SSH_KEY_B64:-}" ]] && [[ -n "$LXC_IP" ]] && echo " - LXC SSH: ssh root@$LXC_IP (password and/or key were set)"
[[ -n "${DEPLOY_LXC_ROOT_PASSWORD:-}" || -n "${DEPLOY_SSH_KEY_B64:-}" ]] && [[ -z "$LXC_IP" ]] && echo " - LXC SSH: ssh root@<LXC-IP> (password and/or key were set)" [[ -n "${DEPLOY_LXC_ROOT_PASSWORD:-}" || -n "${DEPLOY_SSH_KEY_B64:-}" ]] && [[ -z "$LXC_IP" ]] && echo " - LXC SSH: ssh root@<LXC-IP> (password and/or key were set)"
[[ -n "${CM4_BACKUPS_HOST_PATH:-}" ]] && echo " - Backups on host: $CM4_BACKUPS_HOST_PATH" [[ -n "${CM4_BACKUPS_HOST_PATH:-}" ]] && echo " - Backups on host: $CM4_BACKUPS_HOST_PATH"

View File

@@ -3,6 +3,9 @@
# Cause: mass-storage-gadget64 has no real boot files (broken symlinks or Git LFS not pulled). # Cause: mass-storage-gadget64 has no real boot files (broken symlinks or Git LFS not pulled).
# This script removes broken symlinks and extracts bootcode4.bin from the installed rpiboot binary. # This script removes broken symlinks and extracts bootcode4.bin from the installed rpiboot binary.
# #
# Does NOT fix: "libusb_bulk_transfer returned -7" / "Failed to write correct length" — that is a USB
# transfer/timing issue (try USB 2.0 port, or rpiboot -m 2000). See PROXMOX-LXC-DEPLOYMENT.md.
#
# On host: bash fix-gadget-bootcode-on-host.sh # On host: bash fix-gadget-bootcode-on-host.sh
# From your machine: ssh root@HOST 'bash -s' < emmc-provisioning/scripts/fix-gadget-bootcode-on-host.sh # From your machine: ssh root@HOST 'bash -s' < emmc-provisioning/scripts/fix-gadget-bootcode-on-host.sh

View File

@@ -1,10 +1,12 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Setup network boot on the provisioning LXC: DHCP + TFTP on eth1, NAT so LAN uses eth0 for internet. # Setup network boot on the provisioning LXC: DHCP + TFTP on eth1, NAT so LAN uses eth0 for internet.
# Run inside the LXC (as root), or from your machine: ./setup-network-boot-on-lxc.sh root@10.130.60.141 # Run inside the LXC (as root), or from your machine: ./setup-network-boot-on-lxc.sh root@10.130.60.141 [SUBNET]
# When run with ssh target, rsyncs lxc/ and runs this script inside the container. # SUBNET optional: A.B.C.D/PREFIX (e.g. 10.100.1.1/24). When run with ssh target, writes lan-subnet.conf on LXC if SUBNET given.
# When run with ssh target, rsyncs lxc/ and runs this script inside the container. Subnet is read from /opt/cm4-provisioning/lan-subnet.conf.
set -e set -e
TARGET="${1:-}" TARGET="${1:-}"
SUBNET_ARG="${2:-}"
if [[ -n "$TARGET" ]]; then if [[ -n "$TARGET" ]]; then
# Run remotely: sync lxc/ and script, then execute inside LXC # Run remotely: sync lxc/ and script, then execute inside LXC
@@ -19,21 +21,51 @@ if [[ -n "$TARGET" ]]; then
echo "Note: network-boot-initramfs/initrd.img not found (run build.sh first); skipping." echo "Note: network-boot-initramfs/initrd.img not found (run build.sh first); skipping."
fi fi
scp "$SCRIPT_DIR/setup-network-boot-on-lxc.sh" "$TARGET:/tmp/cm4-network-boot-lxc/setup.sh" scp "$SCRIPT_DIR/setup-network-boot-on-lxc.sh" "$TARGET:/tmp/cm4-network-boot-lxc/setup.sh"
# If SUBNET_ARG given, write lan-subnet.conf on LXC so inner script uses the set subnet
if [[ -n "$SUBNET_ARG" ]]; then
if [[ "$SUBNET_ARG" =~ ^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/([0-9]+)$ ]]; then
LAN_GW="${BASH_REMATCH[1]}"
PREFIX="${BASH_REMATCH[2]}"
BASE_3="${LAN_GW%.*}"
LAN_CIDR="${BASE_3}.0/${PREFIX}"
DHCP_RANGE_START="${BASE_3}.100"
DHCP_RANGE_END="${BASE_3}.200"
ssh "$TARGET" "mkdir -p /opt/cm4-provisioning && echo 'LAN_GW=$LAN_GW' > /opt/cm4-provisioning/lan-subnet.conf && echo 'LAN_CIDR=$LAN_CIDR' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_START=$DHCP_RANGE_START' >> /opt/cm4-provisioning/lan-subnet.conf && echo 'DHCP_RANGE_END=$DHCP_RANGE_END' >> /opt/cm4-provisioning/lan-subnet.conf"
echo "Wrote lan-subnet.conf on LXC (LAN_GW=$LAN_GW, DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END})."
else
echo "Warning: SUBNET must be A.B.C.D/PREFIX (e.g. 10.100.1.1/24); ignoring '$SUBNET_ARG'."
fi
fi
ssh "$TARGET" "bash /tmp/cm4-network-boot-lxc/setup.sh" ssh "$TARGET" "bash /tmp/cm4-network-boot-lxc/setup.sh"
echo "Done." echo "Done."
exit 0 exit 0
fi fi
# --- Running inside the LXC from here --- # --- Running inside the LXC from here ---
# LAN subnet: use /opt/cm4-provisioning/lan-subnet.conf (written by deploy-to-proxmox.sh when DEPLOY_LXC_LAN_SUBNET is set) # LAN subnet: use /opt/cm4-provisioning/lan-subnet.conf (written by deploy-to-proxmox.sh or passed as SUBNET when running remotely)
# Optional first arg when running locally: A.B.C.D/PREFIX to set/write lan-subnet.conf
LAN_CONF="/opt/cm4-provisioning/lan-subnet.conf" LAN_CONF="/opt/cm4-provisioning/lan-subnet.conf"
if [[ -f "$LAN_CONF" ]]; then if [[ "$1" =~ ^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/([0-9]+)$ ]]; then
LAN_GW="${BASH_REMATCH[1]}"
PREFIX="${BASH_REMATCH[2]}"
BASE_3="${LAN_GW%.*}"
LAN_CIDR="${BASE_3}.0/${PREFIX}"
DHCP_RANGE_START="${BASE_3}.100"
DHCP_RANGE_END="${BASE_3}.200"
mkdir -p /opt/cm4-provisioning
echo "LAN_GW=$LAN_GW" > "$LAN_CONF"
echo "LAN_CIDR=$LAN_CIDR" >> "$LAN_CONF"
echo "DHCP_RANGE_START=$DHCP_RANGE_START" >> "$LAN_CONF"
echo "DHCP_RANGE_END=$DHCP_RANGE_END" >> "$LAN_CONF"
echo "Using set subnet: $LAN_CIDR (gateway $LAN_GW), DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END}."
elif [[ -f "$LAN_CONF" ]]; then
source "$LAN_CONF" source "$LAN_CONF"
else else
LAN_GW="10.20.50.1" LAN_GW="10.20.50.1"
LAN_CIDR="10.20.50.0/24" LAN_CIDR="10.20.50.0/24"
DHCP_RANGE_START="10.20.50.100" DHCP_RANGE_START="10.20.50.100"
DHCP_RANGE_END="10.20.50.200" DHCP_RANGE_END="10.20.50.200"
echo "No lan-subnet.conf and no SUBNET argument; using defaults: $LAN_CIDR."
fi fi
echo "Configuring network boot (DHCP + TFTP on eth1, NAT via eth0) — LAN $LAN_CIDR (gateway $LAN_GW), DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END} ..." echo "Configuring network boot (DHCP + TFTP on eth1, NAT via eth0) — LAN $LAN_CIDR (gateway $LAN_GW), DHCP ${DHCP_RANGE_START}-${DHCP_RANGE_END} ..."
@@ -42,17 +74,21 @@ if ! command -v dnsmasq >/dev/null 2>&1; then
apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dnsmasq apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dnsmasq
fi fi
# 2) dnsmasq config for eth1 only (DHCP + TFTP); PXE options in network-boot-pxe.conf (toggle with toggle-network-boot-dhcp.sh) # 2) dnsmasq config for eth1 only (DHCP + TFTP + DNS); PXE options in network-boot-pxe.conf (toggle with toggle-network-boot-dhcp.sh)
mkdir -p /etc/dnsmasq.d mkdir -p /etc/dnsmasq.d
cat > /etc/dnsmasq.d/network-boot.conf << DNSMASQ cat > /etc/dnsmasq.d/network-boot.conf << DNSMASQ
# DHCP on eth1 only (provisioning LAN) # DHCP + DNS on eth1 only (provisioning LAN)
# TFTP and PXE options are in network-boot-pxe.conf, controlled by toggle-network-boot-dhcp.sh # TFTP and PXE options in network-boot-pxe.conf, controlled by toggle-network-boot-dhcp.sh
interface=eth1 interface=eth1
bind-interfaces bind-interfaces
dhcp-range=${DHCP_RANGE_START},${DHCP_RANGE_END},12h dhcp-range=${DHCP_RANGE_START},${DHCP_RANGE_END},12h
# DNS: file.server resolves to this host (eth1) so scripts can use http://file.server/...
address=/file.server/${LAN_GW}
# Explicitly send this host as DNS server to DHCP clients (option 6) so they use LXC DNS and resolve file.server
dhcp-option=6,${LAN_GW}
# Other DNS queries forwarded via LXC's resolv.conf
log-dhcp log-dhcp
log-queries log-queries
port=0
DNSMASQ DNSMASQ
mkdir -p /opt/cm4-provisioning mkdir -p /opt/cm4-provisioning
if [ -f /tmp/cm4-network-boot-lxc/toggle-network-boot-dhcp.sh ]; then if [ -f /tmp/cm4-network-boot-lxc/toggle-network-boot-dhcp.sh ]; then

View File

@@ -0,0 +1,64 @@
#!/usr/bin/env bash
# Manually test rpiboot on the Proxmox host (device in boot mode must be connected).
# Usage:
# From your machine: ./test-usbboot-on-host.sh [proxmox_host]
# On the host: ./test-usbboot-on-host.sh
# With timeout (e.g. 60s): TIMEOUT=60 ./test-usbboot-on-host.sh root@100.106.128.36
# If you see "Failed to write correct length, returned -7", try USB 2.0 port or add delay:
# RPIBOOT_EXTRA_OPTS='-m 2000' ./test-usbboot-on-host.sh root@100.106.128.36
#
# Replace proxmox_host with your host, e.g. root@100.106.128.36
set -e
HOST="${1:-}"
RPIBOOT="${RPIBOOT:-/opt/usbboot/rpiboot}"
GADGET="${GADGET:-/opt/usbboot/mass-storage-gadget64}"
TIMEOUT="${TIMEOUT:-0}"
RPIBOOT_EXTRA_OPTS="${RPIBOOT_EXTRA_OPTS:-}"
run_on_host() {
if [[ -n "$HOST" ]]; then
ssh "$HOST" "$@"
else
"$@"
fi
}
echo "=== Checking usbboot and gadget on ${HOST:-localhost} ==="
run_on_host "test -x $RPIBOOT" || { echo "Error: $RPIBOOT not found or not executable"; exit 1; }
run_on_host "test -d $GADGET" || { echo "Error: $GADGET not found"; exit 1; }
run_on_host "test -f $GADGET/bootcode4.bin || test -f $GADGET/boot.img || test -f $GADGET/bootfiles.bin" || { echo "Error: no boot file in $GADGET"; exit 1; }
echo " rpiboot: $RPIBOOT"
echo " gadget: $GADGET"
echo ""
echo "=== USB devices (2b8e / 0a5c:2711 = CM4 boot mode) ==="
run_on_host "lsusb | grep -E '2b8e|0a5c' || echo ' None. Connect reTerminal with eMMC disable jumper and USB slave port.'"
echo ""
echo "=== Tip: if rpiboot fails with 'Failed to write correct length, returned -7', use a USB 2.0 port, or run: RPIBOOT_EXTRA_OPTS='-m 2000' $0 $* ==="
echo ""
echo "=== Running rpiboot (verbose) — connect device now if not already ==="
echo " When the device switches to mass storage, rpiboot will exit and a new /dev/sdX may appear."
echo " Use Ctrl+C to stop, or wait for exit."
echo ""
RPIBOOT_CMD="$RPIBOOT -v -d $GADGET $RPIBOOT_EXTRA_OPTS"
if [[ -n "$HOST" ]]; then
if [[ "$TIMEOUT" -gt 0 ]]; then
ssh "$HOST" "timeout $TIMEOUT $RPIBOOT_CMD" || true
else
ssh -t "$HOST" "$RPIBOOT_CMD" || true
fi
else
if [[ "$TIMEOUT" -gt 0 ]]; then
timeout "$TIMEOUT" $RPIBOOT_CMD || true
else
$RPIBOOT_CMD || true
fi
fi
echo ""
echo "=== Block devices now (check for new /dev/sdX) ==="
run_on_host "lsblk -nd -o NAME,SIZE,TYPE /dev/sd[a-z] 2>/dev/null || true"