diff --git a/emmc-provisioning/docs/NETWORK-BOOT-TROUBLESHOOTING.md b/emmc-provisioning/docs/NETWORK-BOOT-TROUBLESHOOTING.md index bd93888..34da304 100644 --- a/emmc-provisioning/docs/NETWORK-BOOT-TROUBLESHOOTING.md +++ b/emmc-provisioning/docs/NETWORK-BOOT-TROUBLESHOOTING.md @@ -244,10 +244,17 @@ If you set **BOOT_ORDER=0x2** (network only) for testing, the device will never ``` The bootloader will apply the EEPROM update and on the next boot use the new order (eMMC only with 0x1, or network then eMMC with 0x21). -5. **On the LXC**, restore normal cmdline for the device so the next network boot runs the provisioning client, not rescue: +5. **Reboot and apply the update** — The EEPROM update is only applied when the bootloader **boots from the same storage** where the update file was written. You wrote it to **eMMC**, so the bootloader must **boot from eMMC** once to apply it. With **BOOT_ORDER=0x2** (network only) the next reboot netboots again, so the bootloader never reads eMMC and the update is never applied. Do this **before** rebooting from the rescue shell: + - **On the LXC**, disable PXE so the next boot does not advertise TFTP: + `ssh root@ '/opt/cm4-provisioning/toggle-network-boot-dhcp.sh disable'` + - Then **power cycle** the reTerminal (or run `reboot -f` / `echo b > /proc/sysrq-trigger` in the rescue shell). The bootloader will get DHCP without option 66/67; it may then try eMMC (depending on firmware) and apply the update. If it still netboots (e.g. cached TFTP), unplug the Ethernet cable and power cycle so it has no choice but eMMC. + +6. **After you are back in Raspbian**, restore normal cmdline for the device so the next network boot runs the provisioning client, not rescue: ```bash - rm -f /srv/tftpboot/0d1ddbda/cmdline.txt - ln -s ../cmdline.txt /srv/tftpboot/0d1ddbda/cmdline.txt + ./emmc-provisioning/scripts/disable-rescue-cmdline-on-lxc.sh root@ 0d1ddbda ``` + Or on the LXC: `rm -f /srv/tftpboot/0d1ddbda/cmdline.txt && ln -s ../cmdline.txt /srv/tftpboot/0d1ddbda/cmdline.txt` + +**Why did my boot order not change?** The update file was written to the **eMMC** boot partition. The bootloader applies it only when it **boots from that partition**. When you rebooted, the device netbooted again (TFTP), so the bootloader read the “boot” files from the network, not from eMMC, and never saw or applied the update. Disable PXE (and optionally unplug Ethernet) before rebooting so the next boot is from eMMC and the update is applied. See also **NETWORK-BOOT-LXC.md** for setup and monitoring. diff --git a/emmc-provisioning/network-boot-initramfs/build.sh b/emmc-provisioning/network-boot-initramfs/build.sh index 430715e..86cf431 100755 --- a/emmc-provisioning/network-boot-initramfs/build.sh +++ b/emmc-provisioning/network-boot-initramfs/build.sh @@ -14,12 +14,17 @@ trap "rm -rf $BUILD_DIR" EXIT echo "Build dir: $BUILD_DIR" -# Layout: /init, /provisioning-client.sh, /bin/busybox, /bin/sh, /usr/bin/curl, /lib/*.so -mkdir -p "$BUILD_DIR"/{bin,usr/bin,proc,sys,dev,dev/pts,lib,mnt} +# Layout: /init, /provisioning-client.sh, /revision.txt, /bin/busybox, ... +mkdir -p "$BUILD_DIR"/{bin,usr/bin,proc,sys,dev,dev/pts,lib,mnt,etc,usr/share/udhcpc} cp "$SCRIPT_DIR/init" "$BUILD_DIR/init" cp "$SCRIPT_DIR/provisioning-client.sh" "$BUILD_DIR/provisioning-client.sh" cp "$SCRIPT_DIR/rescue-eeprom.sh" "$BUILD_DIR/rescue-eeprom.sh" -chmod +x "$BUILD_DIR/init" "$BUILD_DIR/provisioning-client.sh" "$BUILD_DIR/rescue-eeprom.sh" +cp "$SCRIPT_DIR/udhcpc.script" "$BUILD_DIR/usr/share/udhcpc/default.script" +chmod +x "$BUILD_DIR/init" "$BUILD_DIR/provisioning-client.sh" "$BUILD_DIR/rescue-eeprom.sh" "$BUILD_DIR/usr/share/udhcpc/default.script" +# Revision shown on serial so you can confirm the device is running the latest initrd +REV=$(date +%Y%m%d-%H%M 2>/dev/null || echo "unknown") +[ -d "$SCRIPT_DIR/../.git" ] && REV="${REV}-$(git -C "$SCRIPT_DIR" rev-parse --short HEAD 2>/dev/null)" || true +echo "$REV" > "$BUILD_DIR/revision.txt" ARCH=$(uname -m 2>/dev/null) if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || [ "$ARCH" = "armv8l" ]; then diff --git a/emmc-provisioning/network-boot-initramfs/init b/emmc-provisioning/network-boot-initramfs/init index e0660bb..f46d97a 100644 --- a/emmc-provisioning/network-boot-initramfs/init +++ b/emmc-provisioning/network-boot-initramfs/init @@ -7,6 +7,8 @@ export PATH=/bin:/usr/bin export LD_LIBRARY_PATH=/lib echo "=== CM4 provisioning initramfs ===" +# Revision is set at build time; cat /revision.txt to confirm you have the latest initrd on TFTP +[ -f /revision.txt ] && echo "Revision: $(cat /revision.txt)" || echo "Revision: (none)" # Minimal filesystem mount -t proc none /proc @@ -15,13 +17,28 @@ mount -t devtmpfs none /dev mkdir -p /dev/pts mount -t devpts none /dev/pts -# Kernel might have brought up eth0 via ip=dhcp; ensure we have an IP (run in background with timeout so we don't block rescue shell) -if ! ip addr show | grep -q 'inet .* scope global'; then +# Bring up eth0 (bootloader used it for TFTP but kernel starts with it down) +echo "Bringing up eth0..." +ip link set lo up 2>/dev/null || true +ip link set eth0 up 2>/dev/null || true + +# Wait for link (PHY negotiation takes a few seconds after ip link set up) +echo "Waiting for link..." +for _ in 1 2 3 4 5 6 7 8 9 10; do + ip link show eth0 2>/dev/null | grep -q 'LOWER_UP' && break + sleep 1 +done + +# Get DHCP lease (foreground with retries; -q exits after obtaining lease) +if ! ip addr show eth0 2>/dev/null | grep -q 'inet [0-9]'; then echo "Getting DHCP lease..." - ( udhcpc -f -q -i eth0 -n -T 5 2>/dev/null || true ) & - sleep 6 + udhcpc -i eth0 -q -T 5 -t 5 -n -s /usr/share/udhcpc/default.script 2>&1 || echo "udhcpc failed (will retry)" fi +# /tmp for client_ip (so client can read IP without running ip/awk) +mkdir -p /tmp +mount -t tmpfs none /tmp 2>/dev/null || true + # Allow kernel cmdline to override: provisioning_server=... and rescue mode RESCUE=0 for arg in $(cat /proc/cmdline); do @@ -42,5 +59,13 @@ if [ "$RESCUE" -eq 1 ]; then fi echo "Provisioning server: $PROVISIONING_SERVER" +# Capture eth0 IP; retry in case DHCP is still completing +for _ in 1 2 3 4 5; do + ip addr show dev eth0 2>/dev/null | awk '/inet [0-9]/ { print $2; exit }' | cut -d/ -f1 > /tmp/client_ip 2>/dev/null || true + [ -s /tmp/client_ip ] && break + echo "Waiting for IP on eth0..." + sleep 2 +done +echo "Client IP: $(cat /tmp/client_ip 2>/dev/null || echo '(none)')" echo "Running provisioning client..." exec /bin/sh /provisioning-client.sh diff --git a/emmc-provisioning/network-boot-initramfs/initrd.img b/emmc-provisioning/network-boot-initramfs/initrd.img index a764ce8..8b93538 100644 Binary files a/emmc-provisioning/network-boot-initramfs/initrd.img and b/emmc-provisioning/network-boot-initramfs/initrd.img differ diff --git a/emmc-provisioning/network-boot-initramfs/provisioning-client.sh b/emmc-provisioning/network-boot-initramfs/provisioning-client.sh index 9dc0a7e..c38d380 100644 --- a/emmc-provisioning/network-boot-initramfs/provisioning-client.sh +++ b/emmc-provisioning/network-boot-initramfs/provisioning-client.sh @@ -13,7 +13,12 @@ get_mac() { } get_ip() { - hostname -I 2>/dev/null | awk '{print $1}' || echo "" + # Prefer IP captured by init; fallback to ip (match "inet 1.2.3.4/..." to skip inet6) + if [ -f /tmp/client_ip ] && [ -s /tmp/client_ip ]; then + cat /tmp/client_ip + return + fi + ip addr show dev eth0 2>/dev/null | awk '/inet [0-9]/ { print $2; exit }' | cut -d/ -f1 } MAC=$(get_mac) @@ -37,10 +42,14 @@ while true; do sleep 10 continue fi - curl -sL "$url" | dd of="$EMMC_DEV" bs=4M status=progress conv=fsync + curl -sL "$url" | dd of="$EMMC_DEV" bs=4M conv=fsync 2>&1 + sync echo "Deploy done. Disabling network boot on server so device boots from eMMC next time." curl -s -X POST "$BASE_URL/api/action-done?mac=$MAC" || true - exit 0 + echo "Rebooting in 3 seconds..." + sleep 3 + reboot -f 2>/dev/null || echo b > /proc/sysrq-trigger + sleep 60 fi if [ "$action" = "backup" ] && [ -n "$upload_url" ]; then @@ -50,16 +59,20 @@ while true; do sleep 10 continue fi - dd if="$EMMC_DEV" bs=4M status=progress 2>/dev/null | curl -s -X POST -T - "$upload_url" + dd if="$EMMC_DEV" bs=4M 2>/dev/null | curl -s -X POST -T - "$upload_url" + sync echo "Backup done. Disabling network boot on server." curl -s -X POST "$BASE_URL/api/action-done?mac=$MAC" || true - exit 0 + echo "Rebooting in 3 seconds..." + sleep 3 + reboot -f 2>/dev/null || echo b > /proc/sysrq-trigger + sleep 60 fi if [ "$action" = "reboot" ]; then - echo "Boot normally: rebooting..." - reboot -f 2>/dev/null || exec reboot 2>/dev/null || true - exit 0 + echo "Rebooting..." + reboot -f 2>/dev/null || echo b > /proc/sysrq-trigger + sleep 60 fi sleep 5 diff --git a/emmc-provisioning/network-boot-initramfs/udhcpc.script b/emmc-provisioning/network-boot-initramfs/udhcpc.script new file mode 100755 index 0000000..4602ffa --- /dev/null +++ b/emmc-provisioning/network-boot-initramfs/udhcpc.script @@ -0,0 +1,38 @@ +#!/bin/sh +# Minimal udhcpc script: apply IP and default route when lease is obtained. +# udhcpc sets: $1=bound|renew|deconfig, $ip, $subnet (dotted), $router, $dns, $interface + +mask2cidr() { + # Convert dotted subnet (e.g. 255.255.255.0) to CIDR prefix (e.g. 24) + _bits=0 + for _octet in $(echo "$1" | cut -d. -f1) $(echo "$1" | cut -d. -f2) $(echo "$1" | cut -d. -f3) $(echo "$1" | cut -d. -f4); do + case "$_octet" in + 255) _bits=$((_bits+8)) ;; 254) _bits=$((_bits+7)) ;; 252) _bits=$((_bits+6)) ;; + 248) _bits=$((_bits+5)) ;; 240) _bits=$((_bits+4)) ;; 224) _bits=$((_bits+3)) ;; + 192) _bits=$((_bits+2)) ;; 128) _bits=$((_bits+1)) ;; 0) ;; + esac + done + echo "$_bits" +} + +case "$1" in + deconfig) + ip addr flush dev "$interface" 2>/dev/null + ;; + bound|renew) + CIDR=$(mask2cidr "${subnet:-255.255.255.0}") + ip addr flush dev "$interface" 2>/dev/null + ip addr add "$ip/$CIDR" dev "$interface" + if [ -n "$router" ]; then + for r in $router; do + ip route add default via "$r" dev "$interface" 2>/dev/null + done + fi + if [ -n "$dns" ]; then + : > /etc/resolv.conf + for d in $dns; do + echo "nameserver $d" >> /etc/resolv.conf + done + fi + ;; +esac