Update autopilot workflow and documentation for project cycle completion

- Modified the existing-code workflow to automatically loop back to New Task after project completion without user confirmation.
- Updated the autopilot state to reflect the current step as `done` and status as `completed`.
- Clarified the deployment status report by specifying non-deployed services and their purposes.

These changes enhance the automation of task management and improve documentation clarity.
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-29 05:02:22 +03:00
parent 0bf3894e03
commit aeb7f8ca8c
20 changed files with 1360 additions and 12 deletions
+105
View File
@@ -0,0 +1,105 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
usage() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS]
Azaion AI Training — Deployment orchestrator.
Options:
--rollback Rollback to previous image tags
--local Run locally (skip SSH, default if DEPLOY_HOST is unset)
--help Show this help message
Environment:
DEPLOY_HOST Target server for remote deployment (optional)
DEPLOY_USER SSH user (default: deploy)
EOF
exit 0
}
ROLLBACK=false
LOCAL=false
for arg in "$@"; do
case "$arg" in
--rollback) ROLLBACK=true ;;
--local) LOCAL=true ;;
--help) usage ;;
*) echo "Unknown option: $arg"; usage ;;
esac
done
if [[ -f "$PROJECT_ROOT/.env" ]]; then
set -a
source "$PROJECT_ROOT/.env"
set +a
fi
DEPLOY_HOST="${DEPLOY_HOST:-}"
DEPLOY_USER="${DEPLOY_USER:-deploy}"
if [[ -z "$DEPLOY_HOST" ]]; then
LOCAL=true
fi
run_cmd() {
if [[ "$LOCAL" == true ]]; then
bash -c "$1"
else
ssh "${DEPLOY_USER}@${DEPLOY_HOST}" "$1"
fi
}
run_script() {
local script="$1"
shift
if [[ "$LOCAL" == true ]]; then
bash "$SCRIPT_DIR/$script" "$@"
else
ssh "${DEPLOY_USER}@${DEPLOY_HOST}" "cd /opt/azaion-training && bash scripts/$script $*"
fi
}
echo "=== Azaion AI Training — Deploy ==="
echo "Mode: $(if $LOCAL; then echo 'local'; else echo "remote ($DEPLOY_HOST)"; fi)"
echo "Action: $(if $ROLLBACK; then echo 'rollback'; else echo 'deploy'; fi)"
echo ""
"$SCRIPT_DIR/generate-config.sh"
if [[ "$ROLLBACK" == true ]]; then
PREV_TAGS="$SCRIPT_DIR/.previous-tags"
if [[ ! -f "$PREV_TAGS" ]]; then
echo "ERROR: No previous tags found at $PREV_TAGS — cannot rollback"
exit 1
fi
echo "Rolling back to previous image tags..."
set -a
source "$PREV_TAGS"
set +a
fi
echo "[1/4] Pulling images..."
run_script pull-images.sh
echo "[2/4] Stopping services..."
run_script stop-services.sh
echo "[3/4] Starting services..."
run_script start-services.sh
echo "[4/4] Checking health..."
if run_script health-check.sh; then
echo ""
echo "=== Deploy successful ==="
else
echo ""
echo "=== Health check FAILED ==="
echo "Run: $0 --rollback"
exit 1
fi
+77
View File
@@ -0,0 +1,77 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
CONFIG_FILE="$PROJECT_ROOT/config.yaml"
usage() {
cat <<EOF
Usage: $(basename "$0") [--help]
Generate config.yaml from environment variables.
Sources .env from project root if present.
EOF
exit 0
}
[[ "${1:-}" == "--help" ]] && usage
if [[ -f "$PROJECT_ROOT/.env" ]]; then
set -a
source "$PROJECT_ROOT/.env"
set +a
fi
required_vars=(
AZAION_API_URL AZAION_API_EMAIL AZAION_API_PASSWORD
RABBITMQ_HOST RABBITMQ_PORT RABBITMQ_USER RABBITMQ_PASSWORD RABBITMQ_QUEUE_NAME
AZAION_ROOT_DIR
)
missing=()
for var in "${required_vars[@]}"; do
if [[ -z "${!var:-}" ]]; then
missing+=("$var")
fi
done
if [[ ${#missing[@]} -gt 0 ]]; then
echo "ERROR: Missing required environment variables:"
printf ' %s\n' "${missing[@]}"
echo "Set them in .env or export them before running."
exit 1
fi
cat > "$CONFIG_FILE" <<YAML
api:
url: '${AZAION_API_URL}'
email: '${AZAION_API_EMAIL}'
password: '${AZAION_API_PASSWORD}'
queue:
host: '${RABBITMQ_HOST}'
port: ${RABBITMQ_PORT}
consumer_user: '${RABBITMQ_USER}'
consumer_pw: '${RABBITMQ_PASSWORD}'
name: '${RABBITMQ_QUEUE_NAME}'
dirs:
root: '${AZAION_ROOT_DIR}'
data: '${AZAION_DATA_DIR:-data}'
data_seed: '${AZAION_DATA_SEED_DIR:-data-seed}'
data_deleted: '${AZAION_DATA_DELETED_DIR:-data_deleted}'
training:
model: '${TRAINING_MODEL:-yolo26m.pt}'
epochs: ${TRAINING_EPOCHS:-120}
batch: ${TRAINING_BATCH_SIZE:-11}
imgsz: ${TRAINING_IMGSZ:-1280}
save_period: ${TRAINING_SAVE_PERIOD:-1}
workers: ${TRAINING_WORKERS:-24}
export:
onnx_imgsz: ${EXPORT_ONNX_IMGSZ:-1280}
YAML
echo "Generated $CONFIG_FILE"
+119
View File
@@ -0,0 +1,119 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
usage() {
cat <<EOF
Usage: $(basename "$0") [--help]
Check health of Azaion AI Training deployment.
Checks: container status, GPU availability, disk usage, queue offset.
Exit code 0 = healthy, 1 = unhealthy.
EOF
exit 0
}
[[ "${1:-}" == "--help" ]] && usage
if [[ -f "$PROJECT_ROOT/.env" ]]; then
set -a
source "$PROJECT_ROOT/.env"
set +a
fi
AZAION_ROOT_DIR="${AZAION_ROOT_DIR:-/azaion}"
HEALTHY=true
check() {
local name="$1"
local result="$2"
if [[ "$result" == "OK" ]]; then
printf " %-30s %s\n" "$name" "[OK]"
else
printf " %-30s %s\n" "$name" "[FAIL] $result"
HEALTHY=false
fi
}
echo "=== Azaion AI Training — Health Check ==="
echo ""
echo "Containers:"
for svc in annotation-queue rabbitmq; do
cid=$(docker compose -f "$PROJECT_ROOT/docker-compose.yml" ps -q "$svc" 2>/dev/null || true)
if [[ -z "$cid" ]]; then
check "$svc" "container not found"
else
state=$(docker inspect --format='{{.State.Status}}' "$cid" 2>/dev/null || echo "unknown")
if [[ "$state" == "running" ]]; then
check "$svc" "OK"
else
check "$svc" "state=$state"
fi
fi
done
echo ""
echo "GPU:"
if command -v nvidia-smi &>/dev/null; then
gpu_temp=$(nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader,nounits 2>/dev/null | head -1 || echo "N/A")
gpu_mem=$(nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits 2>/dev/null | head -1 || echo "N/A")
gpu_util=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits 2>/dev/null | head -1 || echo "N/A")
if [[ "$gpu_temp" != "N/A" ]]; then
check "GPU available" "OK"
printf " %-30s %s°C\n" " Temperature" "$gpu_temp"
printf " %-30s %s MiB\n" " Memory (used/total)" "$gpu_mem"
printf " %-30s %s%%\n" " Utilization" "$gpu_util"
if [[ "$gpu_temp" -gt 90 ]]; then
check "GPU temperature" "CRITICAL: ${gpu_temp}°C > 90°C"
fi
else
check "GPU" "nvidia-smi failed"
fi
else
check "GPU (nvidia-smi)" "not installed"
fi
echo ""
echo "Disk:"
if [[ -d "$AZAION_ROOT_DIR" ]]; then
disk_pct=$(df "$AZAION_ROOT_DIR" --output=pcent 2>/dev/null | tail -1 | tr -d ' %' || echo "N/A")
if [[ "$disk_pct" != "N/A" ]]; then
if [[ "$disk_pct" -gt 95 ]]; then
check "Disk usage ($AZAION_ROOT_DIR)" "CRITICAL: ${disk_pct}%"
elif [[ "$disk_pct" -gt 80 ]]; then
check "Disk usage ($AZAION_ROOT_DIR)" "WARNING: ${disk_pct}%"
else
check "Disk usage ($AZAION_ROOT_DIR)" "OK"
fi
printf " %-30s %s%%\n" " Usage" "$disk_pct"
fi
azaion_size=$(du -sh "$AZAION_ROOT_DIR" 2>/dev/null | cut -f1 || echo "N/A")
printf " %-30s %s\n" " Total size" "$azaion_size"
else
check "Data directory ($AZAION_ROOT_DIR)" "does not exist"
fi
echo ""
echo "Queue:"
OFFSET_FILE="$PROJECT_ROOT/src/annotation-queue/offset.yaml"
if [[ -f "$OFFSET_FILE" ]]; then
offset=$(grep 'offset_queue' "$OFFSET_FILE" 2>/dev/null | awk '{print $2}' || echo "N/A")
printf " %-30s %s\n" "Last queue offset" "$offset"
check "Offset file" "OK"
else
check "Offset file" "not found at $OFFSET_FILE"
fi
echo ""
echo "=== Result: $(if $HEALTHY; then echo 'HEALTHY'; else echo 'UNHEALTHY'; fi) ==="
if $HEALTHY; then
exit 0
else
exit 1
fi
+48
View File
@@ -0,0 +1,48 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
usage() {
cat <<EOF
Usage: $(basename "$0") [--help]
Pull Azaion AI Training Docker images from the container registry.
Environment:
DOCKER_REGISTRY Registry URL (required)
DOCKER_IMAGE_TAG Image tag to pull (default: latest)
EOF
exit 0
}
[[ "${1:-}" == "--help" ]] && usage
if [[ -f "$PROJECT_ROOT/.env" ]]; then
set -a
source "$PROJECT_ROOT/.env"
set +a
fi
DOCKER_REGISTRY="${DOCKER_REGISTRY:?DOCKER_REGISTRY is required}"
DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-latest}"
IMAGES=(
"${DOCKER_REGISTRY}/azaion/training:${DOCKER_IMAGE_TAG}"
"${DOCKER_REGISTRY}/azaion/annotation-queue:${DOCKER_IMAGE_TAG}"
)
echo "Pulling images (tag: ${DOCKER_IMAGE_TAG})..."
for image in "${IMAGES[@]}"; do
echo " Pulling $image ..."
if docker pull "$image"; then
echo " OK: $image"
else
echo " FAILED: $image"
exit 1
fi
done
echo "All images pulled successfully."
+54
View File
@@ -0,0 +1,54 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
usage() {
cat <<EOF
Usage: $(basename "$0") [--help]
Start Azaion AI Training services via Docker Compose.
Environment:
AZAION_ROOT_DIR Root data directory (default: /azaion)
EOF
exit 0
}
[[ "${1:-}" == "--help" ]] && usage
if [[ -f "$PROJECT_ROOT/.env" ]]; then
set -a
source "$PROJECT_ROOT/.env"
set +a
fi
AZAION_ROOT_DIR="${AZAION_ROOT_DIR:-/azaion}"
dirs=(
"$AZAION_ROOT_DIR"
"$AZAION_ROOT_DIR/${AZAION_DATA_DIR:-data}/images"
"$AZAION_ROOT_DIR/${AZAION_DATA_DIR:-data}/labels"
"$AZAION_ROOT_DIR/${AZAION_DATA_SEED_DIR:-data-seed}/images"
"$AZAION_ROOT_DIR/${AZAION_DATA_SEED_DIR:-data-seed}/labels"
"$AZAION_ROOT_DIR/${AZAION_DATA_DELETED_DIR:-data_deleted}/images"
"$AZAION_ROOT_DIR/${AZAION_DATA_DELETED_DIR:-data_deleted}/labels"
"$AZAION_ROOT_DIR/datasets"
"$AZAION_ROOT_DIR/models"
)
echo "Ensuring directory structure..."
for d in "${dirs[@]}"; do
mkdir -p "$d"
done
echo "Starting services..."
docker compose -f "$PROJECT_ROOT/docker-compose.yml" --env-file "$PROJECT_ROOT/.env" up -d
echo "Waiting for containers to start..."
sleep 5
docker compose -f "$PROJECT_ROOT/docker-compose.yml" ps
echo "Services started."
+44
View File
@@ -0,0 +1,44 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
usage() {
cat <<EOF
Usage: $(basename "$0") [--help]
Gracefully stop Azaion AI Training services.
Saves current image tags for rollback.
EOF
exit 0
}
[[ "${1:-}" == "--help" ]] && usage
if [[ -f "$PROJECT_ROOT/.env" ]]; then
set -a
source "$PROJECT_ROOT/.env"
set +a
fi
PREV_TAGS="$SCRIPT_DIR/.previous-tags"
echo "Saving current image tags for rollback..."
{
for svc in annotation-queue; do
cid=$(docker compose -f "$PROJECT_ROOT/docker-compose.yml" ps -q "$svc" 2>/dev/null || true)
if [[ -n "$cid" ]]; then
img=$(docker inspect --format='{{.Config.Image}}' "$cid" 2>/dev/null || echo "unknown")
echo "PREV_IMAGE_${svc//-/_}=$img"
fi
done
} > "$PREV_TAGS"
echo "Stopping services (30s grace period)..."
docker compose -f "$PROJECT_ROOT/docker-compose.yml" stop -t 30
echo "Removing containers..."
docker compose -f "$PROJECT_ROOT/docker-compose.yml" down --remove-orphans
echo "Services stopped. Previous tags saved to $PREV_TAGS"