Enhance network boot troubleshooting documentation and scripts

Update NETWORK-BOOT-TROUBLESHOOTING.md to clarify the boot process after start4.elf, emphasizing the importance of config.txt settings for kernel and initramfs. Introduce checks for GPU logging and ensure proper configuration for UART. Modify initramfs scripts to improve DHCP lease acquisition and ensure shell output is directed to the serial console. Update ensure-tftpboot-config-kernel-initrd.sh to enforce necessary config settings and link DTB files in serial-prefix directories for better device compatibility.
This commit is contained in:
nearxos
2026-02-21 02:27:48 +02:00
parent 4d5909904c
commit a6e27219f4
6 changed files with 108 additions and 21 deletions

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
# Check whether DHCP network-boot options (66/67) are enabled on the LXC.
# Usage: ./check-dhcp-network-boot-on-lxc.sh [LXC_HOST]
# Example: ./check-dhcp-network-boot-on-lxc.sh root@10.20.30.153
LXC="${1:-root@10.20.30.153}"
PXE_CONF="/etc/dnsmasq.d/network-boot-pxe.conf"
echo "Checking DHCP network-boot status on $LXC ..."
ssh "$LXC" "bash -s" << 'REMOTE'
PXE_CONF="/etc/dnsmasq.d/network-boot-pxe.conf"
if [ -f "$PXE_CONF" ]; then
echo "Status: ENABLED (option 66/67 are advertised - devices will try network boot)"
echo "Content of $PXE_CONF:"
cat "$PXE_CONF"
else
echo "Status: DISABLED (no PXE options - devices get DHCP only and boot from local storage)"
fi
# Also show toggle script status if present
if [ -x /opt/cm4-provisioning/toggle-network-boot-dhcp.sh ]; then
echo ""
echo "Toggle script output: $(/opt/cm4-provisioning/toggle-network-boot-dhcp.sh status 2>/dev/null)"
fi
REMOTE

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bash
# Ensure TFTP config.txt on the LXC has kernel=kernel8.img and initramfs initrd.img followkernel
# so the bootloader loads the kernel and initrd (otherwise boot stops after start4.elf).
# Ensure TFTP config.txt on the LXC has kernel=kernel8.img, initramfs initrd.img followkernel,
# and uart_2ndstage=1 (GPU firmware logs to UART for netboot debugging).
# Run on LXC: bash ensure-tftpboot-config-kernel-initrd.sh
# Or: ssh root@10.20.30.153 'bash -s' < emmc-provisioning/scripts/ensure-tftpboot-config-kernel-initrd.sh
@@ -14,6 +14,12 @@ if [[ ! -f "$CONFIG" ]]; then
fi
CHANGED=0
# enable_uart=1 must be present (and within first 4KB of config) so netboot firmware sets 8250.nr_uarts=1; else kernel has no serial console (Pi firmware #1575).
if ! grep -qE 'enable_uart=1' "$CONFIG" 2>/dev/null; then
echo "Adding enable_uart=1 to $CONFIG (required for kernel serial on netboot)"
echo "enable_uart=1" >> "$CONFIG"
CHANGED=1
fi
if ! grep -qE '^kernel=kernel8\.img' "$CONFIG" 2>/dev/null; then
echo "Adding kernel=kernel8.img to $CONFIG"
echo "kernel=kernel8.img" >> "$CONFIG"
@@ -26,20 +32,34 @@ if ! grep -qE 'initramfs initrd\.img' "$CONFIG" 2>/dev/null; then
echo "initramfs initrd.img followkernel" >> "$CONFIG"
CHANGED=1
fi
if ! grep -qE 'uart_2ndstage=1' "$CONFIG" 2>/dev/null; then
echo "Adding uart_2ndstage=1 to $CONFIG (GPU firmware logs to UART for netboot debug)"
echo "" >> "$CONFIG"
echo "# GPU firmware logs to UART (see MESS: lines after PCI0 reset)" >> "$CONFIG"
echo "uart_2ndstage=1" >> "$CONFIG"
CHANGED=1
fi
if [[ "$CHANGED" -eq 1 ]]; then
echo "Config updated. Ensure $TFTP_ROOT has kernel8.img and initrd.img."
else
echo "Config already has kernel and initramfs lines."
echo "Config already has kernel, initramfs and uart_2ndstage lines."
fi
grep -E 'kernel|initramfs' "$CONFIG" 2>/dev/null || true
grep -E 'enable_uart|kernel|initramfs|uart_2ndstage' "$CONFIG" 2>/dev/null || true
# Ensure serial-prefix dir gets a real copy of config (some TFTP servers don't follow symlinks)
# Ensure serial-prefix dirs get a real copy of config and symlinks to DTB files.
# GPU loads kernel/initrd/dtb from the serial prefix; missing DTBs cause "Failed to load Device Tree file '?'" and the kernel can hang.
for serial_dir in "$TFTP_ROOT"/[0-9a-f]*/; do
[[ -d "$serial_dir" ]] || continue
if [[ -L "$serial_dir/config.txt" ]] || [[ ! -f "$serial_dir/config.txt" ]]; then
rm -f "$serial_dir/config.txt"
cp "$CONFIG" "$serial_dir/config.txt"
echo "Copied config.txt into $(basename "$serial_dir")/ (real file) so device gets full config."
fi
rm -f "$serial_dir/config.txt"
cp "$CONFIG" "$serial_dir/config.txt"
echo "Copied config.txt into $(basename "$serial_dir")/ (real file) so device gets full config."
for dtb in "$TFTP_ROOT"/*.dtb; do
[[ -f "$dtb" ]] || continue
base=$(basename "$dtb")
if [[ ! -e "$serial_dir/$base" ]]; then
ln -sf "../$base" "$serial_dir/$base"
echo "Linked $base into $(basename "$serial_dir")/"
fi
done
done