From 2178737b3658057456e78ee97956ca1b21e70d2b Mon Sep 17 00:00:00 2001 From: Oleksandr Bezdieniezhnykh Date: Sat, 25 Apr 2026 23:13:40 +0300 Subject: [PATCH] fresh start. Another try --- _docs/00_problem/problem.md | 2 +- .../00_question_decomposition.md | 91 --- .../01_source_registry.md | 212 ------ .../02_fact_cards.md | 142 ---- .../03_comparison_framework.md | 31 - .../04_reasoning_chain.md | 192 ------ .../05_validation_log.md | 100 --- .../gps_denied_nav/00_ac_assessment.md | 74 --- .../00_question_decomposition.md | 88 --- .../gps_denied_nav/01_source_registry.md | 151 ----- .../gps_denied_nav/02_fact_cards.md | 121 ---- .../gps_denied_nav/03_comparison_framework.md | 71 -- .../gps_denied_nav/04_reasoning_chain.md | 129 ---- .../gps_denied_nav/05_validation_log.md | 98 --- .../00_question_decomposition.md | 80 --- .../01_source_registry.md | 201 ------ .../02_fact_cards.md | 161 ----- .../03_comparison_framework.md | 79 --- .../04_reasoning_chain.md | 145 ---- .../05_validation_log.md | 93 --- .../00_question_decomposition.md | 56 -- .../gps_denied_nav_v2/01_source_registry.md | 121 ---- .../gps_denied_nav_v2/02_fact_cards.md | 122 ---- .../03_comparison_framework.md | 45 -- .../gps_denied_nav_v2/04_reasoning_chain.md | 90 --- .../gps_denied_nav_v2/05_validation_log.md | 52 -- .../00_question_decomposition.md | 102 --- .../gps_denied_nav_v3/01_source_registry.md | 175 ----- .../gps_denied_nav_v3/02_fact_cards.md | 105 --- .../03_comparison_framework.md | 62 -- .../gps_denied_nav_v3/04_reasoning_chain.md | 202 ------ .../gps_denied_nav_v3/05_validation_log.md | 88 --- .../gps_denied_visual_nav/00_ac_assessment.md | 76 --- .../00_question_decomposition.md | 63 -- .../01_source_registry.md | 133 ---- .../gps_denied_visual_nav/02_fact_cards.md | 121 ---- .../03_comparison_framework.md | 115 ---- .../04_reasoning_chain.md | 146 ---- .../05_validation_log.md | 57 -- .../00_question_decomposition.md | 73 -- .../01_source_registry.md | 166 ----- .../02_fact_cards.md | 169 ----- .../03_comparison_framework.md | 63 -- .../04_reasoning_chain.md | 166 ----- .../05_validation_log.md | 96 --- .../00_question_decomposition.md | 57 -- .../01_source_registry.md | 231 ------- .../trt_engine_migration/02_fact_cards.md | 193 ------ .../03_comparison_framework.md | 38 -- .../04_reasoning_chain.md | 124 ---- .../trt_engine_migration/05_validation_log.md | 65 -- _docs/01_solution/security_analysis.md | 346 ---------- _docs/01_solution/solution.md | 622 ------------------ _docs/01_solution/solution_draft01.md | 283 -------- _docs/01_solution/solution_draft02.md | 356 ---------- _docs/01_solution/solution_draft03.md | 491 -------------- _docs/01_solution/solution_draft04.md | 385 ----------- _docs/01_solution/solution_draft05.md | 562 ---------------- _docs/01_solution/solution_draft06.md | 622 ------------------ _docs/01_solution/tech_stack.md | 257 -------- _docs/02_document/tests/blackbox-tests.md | 503 -------------- _docs/02_document/tests/environment.md | 149 ----- _docs/02_document/tests/performance-tests.md | 138 ---- _docs/02_document/tests/resilience-tests.md | 169 ----- .../02_document/tests/resource-limit-tests.md | 90 --- _docs/02_document/tests/security-tests.md | 88 --- _docs/02_document/tests/test-data.md | 95 --- .../02_document/tests/traceability-matrix.md | 69 -- _docs/_autopilot_state.md | 40 -- .../00_research/00_question_decomposition.md | 68 -- .../00_research/01_source_registry.md | 133 ---- .../00_research/02_fact_cards.md | 151 ----- .../00_research/03_comparison_framework.md | 35 - .../00_research/04_reasoning_chain.md | 133 ---- .../00_research/05_validation_log.md | 42 -- .../01_solution/solution_draft01.md | 196 ------ .../UAV_frame_material.md | 1 - .../UAV_frame_material/00_ac_assessment.md | 41 -- .../00_question_decomposition.md | 72 -- .../UAV_frame_material/01_source_registry.md | 199 ------ .../UAV_frame_material/02_fact_cards.md | 145 ---- .../03_comparison_framework.md | 39 -- .../UAV_frame_material/04_reasoning_chain.md | 143 ---- .../UAV_frame_material/05_validation_log.md | 61 -- .../01_solution/solution_draft01.md | 177 ----- .../01_solution/solution_draft02.md | 428 ------------ .../01_solution/solution_draft03.md | 489 -------------- .../01_solution/solution_draft04.md | 296 --------- .../01_solution/solution_draft05.md | 354 ---------- .../01_solution/solution_draft06.md | 206 ------ .../01_solution/solution_draft07.md | 418 ------------ .../UAV_frame_material/UAV_frame_material.md | 1 - .../camera_high_altitude/00_ac_assessment.md | 98 --- .../00_question_decomposition.md | 56 -- .../01_source_registry.md | 146 ---- .../camera_high_altitude/02_fact_cards.md | 103 --- .../03_comparison_framework.md | 51 -- .../04_reasoning_chain.md | 130 ---- .../camera_high_altitude/05_validation_log.md | 51 -- .../01_solution/solution_draft01.md | 152 ----- .../camera_high_altitude.md | 6 - 101 files changed, 1 insertion(+), 15518 deletions(-) delete mode 100644 _docs/00_research/gps_denied_draft02_assessment/00_question_decomposition.md delete mode 100644 _docs/00_research/gps_denied_draft02_assessment/01_source_registry.md delete mode 100644 _docs/00_research/gps_denied_draft02_assessment/02_fact_cards.md delete mode 100644 _docs/00_research/gps_denied_draft02_assessment/03_comparison_framework.md delete mode 100644 _docs/00_research/gps_denied_draft02_assessment/04_reasoning_chain.md delete mode 100644 _docs/00_research/gps_denied_draft02_assessment/05_validation_log.md delete mode 100644 _docs/00_research/gps_denied_nav/00_ac_assessment.md delete mode 100644 _docs/00_research/gps_denied_nav/00_question_decomposition.md delete mode 100644 _docs/00_research/gps_denied_nav/01_source_registry.md delete mode 100644 _docs/00_research/gps_denied_nav/02_fact_cards.md delete mode 100644 _docs/00_research/gps_denied_nav/03_comparison_framework.md delete mode 100644 _docs/00_research/gps_denied_nav/04_reasoning_chain.md delete mode 100644 _docs/00_research/gps_denied_nav/05_validation_log.md delete mode 100644 _docs/00_research/gps_denied_nav_assessment/00_question_decomposition.md delete mode 100644 _docs/00_research/gps_denied_nav_assessment/01_source_registry.md delete mode 100644 _docs/00_research/gps_denied_nav_assessment/02_fact_cards.md delete mode 100644 _docs/00_research/gps_denied_nav_assessment/03_comparison_framework.md delete mode 100644 _docs/00_research/gps_denied_nav_assessment/04_reasoning_chain.md delete mode 100644 _docs/00_research/gps_denied_nav_assessment/05_validation_log.md delete mode 100644 _docs/00_research/gps_denied_nav_v2/00_question_decomposition.md delete mode 100644 _docs/00_research/gps_denied_nav_v2/01_source_registry.md delete mode 100644 _docs/00_research/gps_denied_nav_v2/02_fact_cards.md delete mode 100644 _docs/00_research/gps_denied_nav_v2/03_comparison_framework.md delete mode 100644 _docs/00_research/gps_denied_nav_v2/04_reasoning_chain.md delete mode 100644 _docs/00_research/gps_denied_nav_v2/05_validation_log.md delete mode 100644 _docs/00_research/gps_denied_nav_v3/00_question_decomposition.md delete mode 100644 _docs/00_research/gps_denied_nav_v3/01_source_registry.md delete mode 100644 _docs/00_research/gps_denied_nav_v3/02_fact_cards.md delete mode 100644 _docs/00_research/gps_denied_nav_v3/03_comparison_framework.md delete mode 100644 _docs/00_research/gps_denied_nav_v3/04_reasoning_chain.md delete mode 100644 _docs/00_research/gps_denied_nav_v3/05_validation_log.md delete mode 100644 _docs/00_research/gps_denied_visual_nav/00_ac_assessment.md delete mode 100644 _docs/00_research/gps_denied_visual_nav/00_question_decomposition.md delete mode 100644 _docs/00_research/gps_denied_visual_nav/01_source_registry.md delete mode 100644 _docs/00_research/gps_denied_visual_nav/02_fact_cards.md delete mode 100644 _docs/00_research/gps_denied_visual_nav/03_comparison_framework.md delete mode 100644 _docs/00_research/gps_denied_visual_nav/04_reasoning_chain.md delete mode 100644 _docs/00_research/gps_denied_visual_nav/05_validation_log.md delete mode 100644 _docs/00_research/solution_completeness_assessment/00_question_decomposition.md delete mode 100644 _docs/00_research/solution_completeness_assessment/01_source_registry.md delete mode 100644 _docs/00_research/solution_completeness_assessment/02_fact_cards.md delete mode 100644 _docs/00_research/solution_completeness_assessment/03_comparison_framework.md delete mode 100644 _docs/00_research/solution_completeness_assessment/04_reasoning_chain.md delete mode 100644 _docs/00_research/solution_completeness_assessment/05_validation_log.md delete mode 100644 _docs/00_research/trt_engine_migration/00_question_decomposition.md delete mode 100644 _docs/00_research/trt_engine_migration/01_source_registry.md delete mode 100644 _docs/00_research/trt_engine_migration/02_fact_cards.md delete mode 100644 _docs/00_research/trt_engine_migration/03_comparison_framework.md delete mode 100644 _docs/00_research/trt_engine_migration/04_reasoning_chain.md delete mode 100644 _docs/00_research/trt_engine_migration/05_validation_log.md delete mode 100644 _docs/01_solution/security_analysis.md delete mode 100644 _docs/01_solution/solution.md delete mode 100644 _docs/01_solution/solution_draft01.md delete mode 100644 _docs/01_solution/solution_draft02.md delete mode 100644 _docs/01_solution/solution_draft03.md delete mode 100644 _docs/01_solution/solution_draft04.md delete mode 100644 _docs/01_solution/solution_draft05.md delete mode 100644 _docs/01_solution/solution_draft06.md delete mode 100644 _docs/01_solution/tech_stack.md delete mode 100644 _docs/02_document/tests/blackbox-tests.md delete mode 100644 _docs/02_document/tests/environment.md delete mode 100644 _docs/02_document/tests/performance-tests.md delete mode 100644 _docs/02_document/tests/resilience-tests.md delete mode 100644 _docs/02_document/tests/resource-limit-tests.md delete mode 100644 _docs/02_document/tests/security-tests.md delete mode 100644 _docs/02_document/tests/test-data.md delete mode 100644 _docs/02_document/tests/traceability-matrix.md delete mode 100644 _docs/_autopilot_state.md delete mode 100644 _standalone/UAV_camera_comparison/00_research/00_question_decomposition.md delete mode 100644 _standalone/UAV_camera_comparison/00_research/01_source_registry.md delete mode 100644 _standalone/UAV_camera_comparison/00_research/02_fact_cards.md delete mode 100644 _standalone/UAV_camera_comparison/00_research/03_comparison_framework.md delete mode 100644 _standalone/UAV_camera_comparison/00_research/04_reasoning_chain.md delete mode 100644 _standalone/UAV_camera_comparison/00_research/05_validation_log.md delete mode 100644 _standalone/UAV_camera_comparison/01_solution/solution_draft01.md delete mode 100644 _standalone/UAV_camera_comparison/UAV_frame_material.md delete mode 100644 _standalone/UAV_frame_material/00_research/UAV_frame_material/00_ac_assessment.md delete mode 100644 _standalone/UAV_frame_material/00_research/UAV_frame_material/00_question_decomposition.md delete mode 100644 _standalone/UAV_frame_material/00_research/UAV_frame_material/01_source_registry.md delete mode 100644 _standalone/UAV_frame_material/00_research/UAV_frame_material/02_fact_cards.md delete mode 100644 _standalone/UAV_frame_material/00_research/UAV_frame_material/03_comparison_framework.md delete mode 100644 _standalone/UAV_frame_material/00_research/UAV_frame_material/04_reasoning_chain.md delete mode 100644 _standalone/UAV_frame_material/00_research/UAV_frame_material/05_validation_log.md delete mode 100644 _standalone/UAV_frame_material/01_solution/solution_draft01.md delete mode 100644 _standalone/UAV_frame_material/01_solution/solution_draft02.md delete mode 100644 _standalone/UAV_frame_material/01_solution/solution_draft03.md delete mode 100644 _standalone/UAV_frame_material/01_solution/solution_draft04.md delete mode 100644 _standalone/UAV_frame_material/01_solution/solution_draft05.md delete mode 100644 _standalone/UAV_frame_material/01_solution/solution_draft06.md delete mode 100644 _standalone/UAV_frame_material/01_solution/solution_draft07.md delete mode 100644 _standalone/UAV_frame_material/UAV_frame_material.md delete mode 100644 _standalone/camera_high_altitude/00_research/camera_high_altitude/00_ac_assessment.md delete mode 100644 _standalone/camera_high_altitude/00_research/camera_high_altitude/00_question_decomposition.md delete mode 100644 _standalone/camera_high_altitude/00_research/camera_high_altitude/01_source_registry.md delete mode 100644 _standalone/camera_high_altitude/00_research/camera_high_altitude/02_fact_cards.md delete mode 100644 _standalone/camera_high_altitude/00_research/camera_high_altitude/03_comparison_framework.md delete mode 100644 _standalone/camera_high_altitude/00_research/camera_high_altitude/04_reasoning_chain.md delete mode 100644 _standalone/camera_high_altitude/00_research/camera_high_altitude/05_validation_log.md delete mode 100644 _standalone/camera_high_altitude/01_solution/solution_draft01.md delete mode 100644 _standalone/camera_high_altitude/camera_high_altitude.md diff --git a/_docs/00_problem/problem.md b/_docs/00_problem/problem.md index f6c9723..3bd3b70 100644 --- a/_docs/00_problem/problem.md +++ b/_docs/00_problem/problem.md @@ -1,2 +1,2 @@ We have a wing-type UAV with a camera pointing downwards that can take photos 3 times per second with a resolution 6200*4100. Also plane has flight controller with IMU. During the plane flight, we know GPS coordinates initially. During the flight, GPS could be disabled or spoofed. We need to determine the GPS of the centers of the next frame from the camera. And also the coordinates of the center of any object in these photos. We can use an external satellite provider for ground checks on the existing photos. So, before the flight, UAV's operator should upload the satellite photos to the plane's companion PC. -The real world examples are in input_data folder, but the distance between each photo is way bigger than it will be from a real plane. On that particular example photos were taken 1 photo per 2-3 seconds. But in real-world scenario frames would appear within the interval no more than 500ms or even 400 ms. \ No newline at end of file +The real world examples are in input_data folder, but the distance between each photo is way bigger than it will be from a real plane. On that particular example photos were taken 1 photo per 2-3 seconds. But in real-world scenario frames would appear within the interval no more than 500ms. We also don't have IMU data for the test. For now we have to search for the public data for that in internet. We've tried to record that with Mavic 3 Pro Mini, but failed, cause of the closed system if DJI. \ No newline at end of file diff --git a/_docs/00_research/gps_denied_draft02_assessment/00_question_decomposition.md b/_docs/00_research/gps_denied_draft02_assessment/00_question_decomposition.md deleted file mode 100644 index 419db3c..0000000 --- a/_docs/00_research/gps_denied_draft02_assessment/00_question_decomposition.md +++ /dev/null @@ -1,91 +0,0 @@ -# Question Decomposition — Solution Assessment (Mode B, Draft02) - -## Original Question -Assess solution_draft02.md for weak points, security vulnerabilities, and performance bottlenecks, then produce a revised solution draft03. - -## Active Mode -Mode B: Solution Assessment — `solution_draft02.md` is the highest-numbered draft. - -## Question Type Classification -- **Primary**: Problem Diagnosis — identify weak points, vulnerabilities, bottlenecks in draft02 -- **Secondary**: Decision Support — evaluate alternatives for identified issues - -## Research Subject Boundary Definition - -| Dimension | Boundary | -|-----------|----------| -| **Domain** | GPS-denied UAV visual navigation, aerial geo-referencing | -| **Geography** | Eastern/southern Ukraine (left of Dnipro River) — steppe terrain | -| **Hardware** | Desktop/laptop with NVIDIA RTX 2060+, 16GB RAM, 6GB VRAM | -| **Software** | Python ecosystem, GPU-accelerated CV/ML | -| **Timeframe** | Current state-of-the-art (2024-2026), production-ready tools | -| **Scale** | 500-3000 images per flight, up to 6252×4168 resolution | - -## Problem Context Summary -- UAV aerial photos taken consecutively ~100m apart, downward non-stabilized camera -- Only starting GPS known — must determine GPS for all subsequent images -- Must handle: sharp turns, outlier photos (up to 350m gap), disconnected route segments -- Processing <5s/image, real-time SSE streaming, REST API service -- No IMU data available -- Camera: 26MP (6252×4168), 25mm focal length, 23.5mm sensor width, 400m altitude - -## Decomposed Sub-Questions - -### A: DINOv2 Cross-View Retrieval Viability -"Is DINOv2 proven for UAV-to-satellite coarse retrieval? What are real-world performance numbers? What search radius is realistic?" - -### B: XFeat Reliability for Aerial VO -"Is XFeat proven for aerial visual odometry? How does it compare to SuperPoint in aerial scenes specifically? What are known failure modes?" - -### C: LightGlue ONNX on RTX 2060 (Turing) -"Does LightGlue-ONNX work reliably on Turing architecture? What precision (FP16/FP32)? What are actual benchmarks?" - -### D: GTSAM iSAM2 Factor Graph Design -"Is the proposed factor graph structure sound? Are the noise models appropriate? Are custom factors (DEM, drift limit) well-specified?" - -### E: Copernicus DEM Integration -"How is Copernicus DEM accessed programmatically? Is it truly free? What are the actual API requirements?" - -### F: Homography Decomposition Robustness -"How reliable is cv2.decomposeHomographyMat selection heuristic when UAV changes direction? What are failure modes?" - -### G: Image Rotation Handling Completeness -"Is heading-based rotation normalization sufficient? What if heading estimate is wrong early in a segment?" - -### H: Memory Model Under Load -"Can DINOv2 embeddings + SuperPoint features + GTSAM factor graph + satellite cache fit within 16GB RAM and 6GB VRAM during a 3000-image flight?" - -### I: Satellite Match Failure Cascading -"What happens when satellite matching fails for 50+ consecutive frames? How does the 100m drift limit interact with extended VO-only sections?" - -### J: Multi-Provider Tile Schema Compatibility -"Do Google Maps and Mapbox use the same tile coordinate system? What are the practical differences in switching providers?" - -### K: Security Attack Surface -"What are the remaining security vulnerabilities beyond JWT auth? SSE connection abuse? Image processing exploits?" - -### L: Recent Advances (2025-2026) -"Are there newer models or approaches published since draft02 that could improve accuracy or performance?" - -### M: End-to-End Processing Time Budget -"Is the total per-frame time budget realistic when all components run together? What is the critical path?" - ---- - -## Timeliness Sensitivity Assessment - -- **Research Topic**: GPS-denied UAV visual navigation — assessment of solution_draft02 architecture and component choices -- **Sensitivity Level**: 🟠 High -- **Rationale**: CV feature matching models (SuperPoint, LightGlue, XFeat, DINOv2) evolve rapidly with new versions and competitors. GTSAM is stable. Satellite tile API pricing/limits change. Core algorithms (homography, VO) are stable. -- **Source Time Window**: 12 months (2025-2026) -- **Priority official sources to consult**: - 1. GTSAM official documentation and PyPI (factor type compatibility) - 2. LightGlue-ONNX GitHub (Turing GPU compatibility) - 3. Google Maps Tiles API documentation (pricing, session tokens) - 4. DINOv2 official repo (model variants, VRAM) - 5. faiss wiki (GPU memory allocation) -- **Key version information to verify**: - - GTSAM: 4.2 stable, 4.3 alpha (breaking changes) - - LightGlue-ONNX: FP16 on Turing, FP8 requires Ada Lovelace - - Pillow: ≥11.3.0 required (CVE-2025-48379) - - FastAPI: ≥0.135.0 (SSE support) diff --git a/_docs/00_research/gps_denied_draft02_assessment/01_source_registry.md b/_docs/00_research/gps_denied_draft02_assessment/01_source_registry.md deleted file mode 100644 index f48875b..0000000 --- a/_docs/00_research/gps_denied_draft02_assessment/01_source_registry.md +++ /dev/null @@ -1,212 +0,0 @@ -# Source Registry — Draft02 Assessment - -## Source #1 -- **Title**: GTSAM GPSFactor Class Reference -- **Link**: https://gtsam.org/doxygen/a04084.html -- **Tier**: L1 -- **Publication Date**: 2025 (latest docs) -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: GTSAM 4.2 -- **Target Audience**: GTSAM users building factor graphs with GPS constraints -- **Research Boundary Match**: ✅ Full match -- **Summary**: GPSFactor and GPSFactor2 work with Pose3/NavState, NOT Pose2. For 2D position constraints, PriorFactorPoint2 or custom factors are needed. -- **Related Sub-question**: D (GTSAM iSAM2 Factor Graph Design) - -## Source #2 -- **Title**: GTSAM Pose2 SLAM Example -- **Link**: https://gtbook.github.io/gtsam-examples/Pose2SLAMExample.html -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: GTSAM 4.2 -- **Target Audience**: GTSAM users -- **Research Boundary Match**: ✅ Full match -- **Summary**: BetweenFactorPose2 provides odometry constraints with noise model Diagonal.Sigmas(Point3(sigma_x, sigma_y, sigma_theta)). PriorFactorPose2 anchors poses. -- **Related Sub-question**: D - -## Source #3 -- **Title**: GTSAM Python pip install version compatibility (PyPI) -- **Link**: https://pypi.org/project/gtsam/ -- **Tier**: L1 -- **Publication Date**: 2026-01 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: gtsam 4.2 (stable), gtsam-develop 4.3a1 (alpha) -- **Target Audience**: Python developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: GTSAM 4.2 stable on pip. 4.3 alpha has breaking changes (C++17, Boost removal). Known issues with Eigen 5.0.0, ARM64 builds. Stick with 4.2 for production. -- **Related Sub-question**: D - -## Source #4 -- **Title**: LightGlue-ONNX repository -- **Link**: https://github.com/fabio-sim/LightGlue-ONNX -- **Tier**: L1 -- **Publication Date**: 2026-01 (last updated) -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: LightGlue-ONNX (supports ONNX Runtime + TensorRT) -- **Target Audience**: Computer vision developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: ONNX export with 2-4x speedup over PyTorch. FP16 works on Turing (RTX 2060). FP8 requires Ada Lovelace/Hopper. Mixed precision supported since July 2023. -- **Related Sub-question**: C (LightGlue ONNX on RTX 2060) - -## Source #5 -- **Title**: LightGlue rotation issue #64 -- **Link**: https://github.com/cvg/LightGlue/issues/64 -- **Tier**: L4 -- **Publication Date**: 2023 -- **Timeliness Status**: ✅ Currently valid (issue still open) -- **Target Audience**: LightGlue users -- **Research Boundary Match**: ✅ Full match -- **Summary**: SuperPoint+LightGlue not rotation-invariant. Fails at 90°/180°. Workaround: try rotating images by {0°, 90°, 180°, 270°}. Steerable CNNs proposed but not available. -- **Related Sub-question**: G (Image Rotation Handling) - -## Source #6 -- **Title**: SIFT+LightGlue for UAV Image Mosaicking (ISPRS 2025) -- **Link**: https://isprs-archives.copernicus.org/articles/XLVIII-2-W11-2025/169/2025/ -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: SIFT+LightGlue hybrid achieves robust matching in low-texture and high-rotation UAV scenarios. Outperforms both pure SIFT and SuperPoint+LightGlue. -- **Related Sub-question**: G - -## Source #7 -- **Title**: DINOv2-Based UAV Visual Self-Localization -- **Link**: https://ui.adsabs.harvard.edu/abs/2025IRAL...10.2080Y/abstract -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV localization researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: DINOv2 with adaptive enhancement achieves 86.27 R@1 on DenseUAV benchmark for UAV-to-satellite matching. Proves DINOv2 viable for coarse retrieval. -- **Related Sub-question**: A (DINOv2 Cross-View Retrieval) - -## Source #8 -- **Title**: SatLoc-Fusion (Remote Sensing 2025) -- **Link**: https://www.mdpi.com/2072-4292/17/17/3048 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV navigation researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Hierarchical: DINOv2 absolute + XFeat VO + optical flow. <15m error, >2Hz on 6 TFLOPS edge. Adaptive confidence-based fusion. Validates our approach architecture. -- **Related Sub-question**: A, B - -## Source #9 -- **Title**: XFeat: Accelerated Features (CVPR 2024) -- **Link**: https://arxiv.org/abs/2404.19174 -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: CV researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: 5x faster than SuperPoint. Real-time on CPU. Semi-dense matching. XFeat has built-in matcher for fast VO; also compatible with LightGlue via xfeat-lightglue models. -- **Related Sub-question**: B (XFeat Reliability) - -## Source #10 -- **Title**: XFeat + LightGlue compatibility (GitHub issue #128) -- **Link**: https://github.com/cvg/LightGlue/issues/128 -- **Tier**: L4 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: LightGlue/XFeat users -- **Research Boundary Match**: ✅ Full match -- **Summary**: XFeat-LightGlue trained models available on HuggingFace (vismatch/xfeat-lightglue). Also ONNX export available. XFeat's built-in matcher is separate. -- **Related Sub-question**: B - -## Source #11 -- **Title**: DINOv2 VRAM usage by model variant -- **Link**: https://blog.iamfax.com/tech/image-processing/dinov2/ -- **Tier**: L3 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Developers deploying DINOv2 -- **Research Boundary Match**: ✅ Full match -- **Summary**: ViT-S/14: ~300MB VRAM, 0.05s/img. ViT-B/14: ~600MB, 0.1s/img. ViT-L/14: ~1.5GB, 0.35s/img. ViT-G/14: ~5GB, 2s/img. -- **Related Sub-question**: H (Memory Model) - -## Source #12 -- **Title**: Copernicus DEM on AWS Open Data -- **Link**: https://registry.opendata.aws/copernicus-dem/ -- **Tier**: L1 -- **Publication Date**: Ongoing -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Developers needing DEM data -- **Research Boundary Match**: ✅ Full match -- **Summary**: Free access via S3 without authentication. Cloud Optimized GeoTIFFs, 1x1 degree tiles, 30m resolution. `aws s3 ls --no-sign-required s3://copernicus-dem-30m/` -- **Related Sub-question**: E (Copernicus DEM) - -## Source #13 -- **Title**: Google Maps Tiles API Usage and Billing -- **Link**: https://developers.google.com/maps/documentation/tile/usage-and-billing -- **Tier**: L1 -- **Publication Date**: 2026-02 (updated) -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Google Maps API consumers -- **Research Boundary Match**: ✅ Full match -- **Summary**: 100K free requests/month. 6,000/min, 15,000/day rate limits. $200 monthly credit expired Feb 2025. Requires session tokens. -- **Related Sub-question**: J - -## Source #14 -- **Title**: Google Maps vs Mapbox tile schema -- **Link**: https://developers.google.com/maps/documentation/tile/2d-tiles-overview -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Both use z/x/y Web Mercator tiles (256px). Compatible coordinate systems. Google requires session tokens; Mapbox requires API tokens. Mapbox global to zoom 16, regional to 21+. -- **Related Sub-question**: J - -## Source #15 -- **Title**: FastAPI SSE connection cleanup issues (sse-starlette #99) -- **Link**: https://github.com/sysid/sse-starlette/issues/99 -- **Tier**: L4 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: FastAPI SSE developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Async generators cannot be easily cancelled once awaited. Use EventPublisher pattern with asyncio.Queue for proper cleanup. Prevents shutdown hangs and connection lingering. -- **Related Sub-question**: K (Security/Stability) - -## Source #16 -- **Title**: OpenCV decomposeHomographyMat issues (#23282) -- **Link**: https://github.com/opencv/opencv/issues/23282 -- **Tier**: L4 -- **Publication Date**: 2023 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: decomposeHomographyMat can return non-orthogonal rotation matrices. Returns 4 solutions. Positive depth constraint needed for disambiguation. Calibration matrix K precision critical. -- **Related Sub-question**: F (Homography Decomposition) - -## Source #17 -- **Title**: CVE-2025-48379: Pillow Heap Buffer Overflow -- **Link**: https://nvd.nist.gov/vuln/detail/CVE-2025-48379 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: Pillow 11.2.0-11.2.1 affected, fixed in 11.3.0 -- **Summary**: Heap buffer overflow in Pillow's image encoding. Requires pinning Pillow ≥11.3.0 and validating image formats. -- **Related Sub-question**: K (Security) - -## Source #18 -- **Title**: SALAD: Optimal Transport Aggregation for Visual Place Recognition -- **Link**: https://arxiv.org/abs/2311.15937 -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: DINOv2+SALAD outperforms NetVLAD. Single-stage retrieval, no re-ranking. 30min training. Optimal transport aggregation better than raw DINOv2 CLS token for retrieval. -- **Related Sub-question**: A - -## Source #19 -- **Title**: NaviLoc: Trajectory-Level Visual Localization -- **Link**: https://www.mdpi.com/2504-446X/10/2/97 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Treats VPR as noisy measurement, uses trajectory-level optimization. 19.5m MLE, 16x improvement over per-frame VPR. Validates trajectory optimization approach. -- **Related Sub-question**: L (Recent Advances) - -## Source #20 -- **Title**: FAISS GPU memory management -- **Link**: https://github.com/facebookresearch/faiss/wiki/Faiss-on-the-GPU -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: GPU faiss allocates ~2GB scratch space by default. On 6GB VRAM RTX 2060, CPU-based faiss recommended. Supports CPU-GPU interop. -- **Related Sub-question**: H (Memory) diff --git a/_docs/00_research/gps_denied_draft02_assessment/02_fact_cards.md b/_docs/00_research/gps_denied_draft02_assessment/02_fact_cards.md deleted file mode 100644 index 41a3f98..0000000 --- a/_docs/00_research/gps_denied_draft02_assessment/02_fact_cards.md +++ /dev/null @@ -1,142 +0,0 @@ -# Fact Cards — Draft02 Assessment - -## Fact #1 -- **Statement**: GTSAM `GPSFactor` works with `Pose3` variables, NOT `Pose2`. `GPSFactor2` works with `NavState`. Neither accepts `Pose2`. For 2D position constraints, use `PriorFactorPoint2` or a custom factor. -- **Source**: [Source #1] https://gtsam.org/doxygen/a04084.html, [Source #2] -- **Phase**: Assessment -- **Target Audience**: GPS-denied UAV navigation developers -- **Confidence**: ✅ High (official GTSAM documentation) -- **Related Dimension**: Factor Graph Design - -## Fact #2 -- **Statement**: GTSAM 4.2 is stable on pip. GTSAM 4.3 alpha has breaking changes (C++17 migration, Boost removal). Known pip dependency resolution issues for Python bindings (Sept 2025). Production should use 4.2. -- **Source**: [Source #3] https://pypi.org/project/gtsam/ -- **Phase**: Assessment -- **Confidence**: ✅ High (PyPI official) -- **Related Dimension**: Factor Graph Design - -## Fact #3 -- **Statement**: LightGlue-ONNX achieves 2-4x speedup over compiled PyTorch. FP16 works on Turing (RTX 2060). FP8 requires Ada Lovelace/Hopper — falls back to higher precision on Turing. Mixed precision supported since July 2023. -- **Source**: [Source #4] https://github.com/fabio-sim/LightGlue-ONNX -- **Phase**: Assessment -- **Confidence**: ✅ High (official repo, benchmarks) -- **Related Dimension**: Processing Performance - -## Fact #4 -- **Statement**: SuperPoint and LightGlue are NOT rotation-invariant. Performance degrades significantly at 90°/180° rotations. Practical workaround: try matching at {0°, 90°, 180°, 270°} rotations. SIFT+LightGlue hybrid is proven better for high-rotation UAV scenarios (ISPRS 2025). -- **Source**: [Source #5] issue #64, [Source #6] ISPRS 2025 -- **Phase**: Assessment -- **Confidence**: ✅ High (confirmed in official issue + peer-reviewed paper) -- **Related Dimension**: Rotation Handling - -## Fact #5 -- **Statement**: DINOv2 achieves 86.27 R@1 on DenseUAV benchmark for UAV-to-satellite matching (with adaptive enhancement). Raw DINOv2 performance is lower but still viable for coarse retrieval. -- **Source**: [Source #7] https://ui.adsabs.harvard.edu/abs/2025IRAL...10.2080Y/ -- **Phase**: Assessment -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: Satellite Matching - -## Fact #6 -- **Statement**: SatLoc-Fusion validates the exact architecture pattern: DINOv2 for absolute geo-localization + XFeat for VO + optical flow for velocity. Achieves <15m error at >2Hz on 6 TFLOPS edge hardware. Uses adaptive confidence-based fusion. -- **Source**: [Source #8] https://www.mdpi.com/2072-4292/17/17/3048 -- **Phase**: Assessment -- **Confidence**: ✅ High (peer-reviewed, dataset provided) -- **Related Dimension**: Overall Architecture - -## Fact #7 -- **Statement**: XFeat is 5x faster than SuperPoint. Runs real-time on CPU. Has built-in matcher for fast matching. Also compatible with LightGlue via xfeat-lightglue trained models (HuggingFace vismatch/xfeat-lightglue). ONNX export available. -- **Source**: [Source #9] CVPR 2024, [Source #10] GitHub -- **Phase**: Assessment -- **Confidence**: ✅ High (CVPR paper + working implementations) -- **Related Dimension**: Feature Extraction - -## Fact #8 -- **Statement**: DINOv2 VRAM: ViT-S/14 ~300MB (0.05s/img), ViT-B/14 ~600MB (0.1s/img), ViT-L/14 ~1.5GB (0.35s/img). On GTX 1080. -- **Source**: [Source #11] blog benchmark -- **Phase**: Assessment -- **Confidence**: ⚠️ Medium (third-party benchmark, not official) -- **Related Dimension**: Memory Model - -## Fact #9 -- **Statement**: Copernicus DEM GLO-30 is freely available on AWS S3 without authentication: `s3://copernicus-dem-30m/`. Cloud Optimized GeoTIFFs, 30m resolution, global coverage. Alternative: Sentinel Hub API (requires free registration). -- **Source**: [Source #12] AWS Registry -- **Phase**: Assessment -- **Confidence**: ✅ High (AWS official registry) -- **Related Dimension**: DEM Integration - -## Fact #10 -- **Statement**: Google Maps Tiles API: 100K free requests/month, 15,000/day, 6,000/min. Requires session tokens (not just API key). $200 monthly credit expired Feb 2025. February 2026: split quota buckets for 2D and Street View tiles. -- **Source**: [Source #13] Google official docs -- **Phase**: Assessment -- **Confidence**: ✅ High (official documentation) -- **Related Dimension**: Satellite Provider - -## Fact #11 -- **Statement**: Google Maps and Mapbox both use z/x/y Web Mercator tiles (256px). Compatible coordinate systems. Main differences: authentication method (session tokens vs API tokens), max zoom levels (Google: 22, Mapbox: global 16, regional 21+). -- **Source**: [Source #14] Google + Mapbox docs -- **Phase**: Assessment -- **Confidence**: ✅ High (official docs) -- **Related Dimension**: Multi-Provider Cache - -## Fact #12 -- **Statement**: FastAPI SSE async generators cannot be easily cancelled once awaited. Causes shutdown hangs, connection lingering. Solution: EventPublisher pattern with asyncio.Queue for proper lifecycle management. -- **Source**: [Source #15] sse-starlette issue #99 -- **Phase**: Assessment -- **Confidence**: ⚠️ Medium (community report, confirmed by library maintainer) -- **Related Dimension**: API Stability - -## Fact #13 -- **Statement**: cv2.decomposeHomographyMat returns up to 4 solutions. Can return non-orthogonal rotation matrices. Disambiguation requires: positive depth constraint + calibration matrix K precision. Not just "motion consistent with previous direction." -- **Source**: [Source #16] OpenCV issue #23282 -- **Phase**: Assessment -- **Confidence**: ✅ High (confirmed OpenCV issue) -- **Related Dimension**: VO Robustness - -## Fact #14 -- **Statement**: Pillow CVE-2025-48379: heap buffer overflow in 11.2.0-11.2.1. Fixed in 11.3.0. Image processing pipeline must pin Pillow ≥11.3.0. -- **Source**: [Source #17] NVD -- **Phase**: Assessment -- **Confidence**: ✅ High (CVE database) -- **Related Dimension**: Security - -## Fact #15 -- **Statement**: DINOv2+SALAD (optimal transport aggregation) outperforms raw DINOv2 CLS token for visual place recognition. Single-stage, no re-ranking needed. 30-minute training. Better suited for coarse retrieval than raw cosine similarity on CLS tokens. -- **Source**: [Source #18] arXiv -- **Phase**: Assessment -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: Satellite Matching - -## Fact #16 -- **Statement**: FAISS GPU allocates ~2GB scratch space by default. On 6GB VRAM RTX 2060, GPU faiss would consume 33% of VRAM just for indexing. CPU-based faiss recommended for this hardware profile. -- **Source**: [Source #20] FAISS wiki -- **Phase**: Assessment -- **Confidence**: ✅ High (official wiki) -- **Related Dimension**: Memory Model - -## Fact #17 -- **Statement**: NaviLoc achieves 19.5m MLE by treating VPR as noisy measurement and optimizing at trajectory level (not per-frame). 16x improvement over per-frame approaches. Validates trajectory-level optimization concept. -- **Source**: [Source #19] -- **Phase**: Assessment -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: Optimization Strategy - -## Fact #18 -- **Statement**: Mapbox satellite imagery for Ukraine: no specific update schedule. Uses Maxar Vivid product. Coverage to zoom 16 globally (~2.5m/px), regional zoom 18+ (~0.6m/px). No guarantee of Ukraine freshness — likely 2+ years old in conflict areas. -- **Source**: [Source #14] Mapbox docs -- **Phase**: Assessment -- **Confidence**: ⚠️ Medium (general Mapbox info, no Ukraine-specific data) -- **Related Dimension**: Satellite Provider - -## Fact #19 -- **Statement**: For 2D factor graphs, GTSAM uses Pose2 (x, y, theta) with BetweenFactorPose2 for odometry and PriorFactorPose2 for anchoring. Position-only constraints use PriorFactorPoint2. Custom factors via Python callbacks are supported but slower than C++ factors. -- **Source**: [Source #2] GTSAM by Example -- **Phase**: Assessment -- **Confidence**: ✅ High (official GTSAM examples) -- **Related Dimension**: Factor Graph Design - -## Fact #20 -- **Statement**: XFeat's built-in matcher (match_xfeat) is fastest for VO (~15ms total extraction+matching). xfeat-lightglue is higher quality but slower. For satellite matching where accuracy matters more, SuperPoint+LightGlue remains the better choice. -- **Source**: [Source #9] CVPR 2024, [Source #10] -- **Phase**: Assessment -- **Confidence**: ⚠️ Medium (inference from multiple sources) -- **Related Dimension**: Feature Extraction Strategy diff --git a/_docs/00_research/gps_denied_draft02_assessment/03_comparison_framework.md b/_docs/00_research/gps_denied_draft02_assessment/03_comparison_framework.md deleted file mode 100644 index 6ecc687..0000000 --- a/_docs/00_research/gps_denied_draft02_assessment/03_comparison_framework.md +++ /dev/null @@ -1,31 +0,0 @@ -# Comparison Framework — Draft02 Assessment - -## Selected Framework Type -Problem Diagnosis + Decision Support - -## Selected Dimensions -1. Factor Graph Design Correctness -2. VRAM/Memory Budget Feasibility -3. Rotation Handling Completeness -4. VO Robustness (Homography Decomposition) -5. Satellite Matching Reliability -6. Concurrency & Pipeline Architecture -7. Security Attack Surface -8. API/SSE Stability -9. Provider Integration Completeness -10. Drift Management Strategy - -## Findings Matrix - -| Dimension | Draft02 Approach | Weak Point | Severity | Proposed Fix | Factual Basis | -|-----------|------------------|------------|----------|--------------|---------------| -| Factor Graph | Pose2 + GPSFactor + custom DEM/drift factors | GPSFactor requires Pose3, not Pose2. Custom Python factors are slow. | **Critical** | Use Pose2 + BetweenFactorPose2 + PriorFactorPoint2 for satellite anchors. Convert lat/lon to local ENU. Avoid Python custom factors. | Fact #1, #2, #19 | -| VRAM Budget | DINOv2 + SuperPoint + LightGlue ONNX + faiss GPU | No model specified for DINOv2. faiss GPU uses 2GB scratch. Combined VRAM could exceed 6GB. | **High** | Use DINOv2 ViT-S/14 (300MB). faiss on CPU only. Sequence model loading (not concurrent). Explicit budget: XFeat 200MB + DINOv2-S 300MB + SuperPoint 400MB + LightGlue 500MB. | Fact #8, #16 | -| Rotation Handling | Heading-based rectification + SIFT fallback | No heading at segment start. No trigger criteria for SIFT vs rotation retry. Multi-rotation matching not mentioned. | **High** | At segment start: try 4 rotations {0°, 90°, 180°, 270°}. After heading established: rectify. SIFT fallback when SuperPoint inlier ratio < 0.2. | Fact #4 | -| Homography Decomposition | Motion consistency selection | Only "motion consistent with previous direction" — underspecified. 4 solutions possible. Non-orthogonal matrices can occur. | **Medium** | Positive depth constraint first. Then normal direction check (plane normal should point up). Then motion consistency. Orthogonality check on R. | Fact #13 | -| Satellite Coarse Retrieval | DINOv2 + faiss cosine similarity | Raw DINOv2 CLS token suboptimal for retrieval. SALAD aggregation proven better. | **Medium** | Use DINOv2+SALAD or at minimum use patch-level features, not just CLS token. Alternatively, fine-tune DINOv2 on remote sensing. | Fact #5, #15 | -| Concurrency Model | "Async — don't block VO pipeline" | No concrete concurrency design. GPU can't run two models simultaneously. | **High** | Sequential GPU: XFeat VO first (15ms), then async satellite matching on same GPU. Use asyncio for I/O (tile download, DEM fetch). CPU faiss for retrieval. | Fact #8, #16 | -| Security | JWT + rate limiting + CORS | No image format validation. No Pillow version pinning. No SSE abuse protection beyond connection limits. No sandbox for image processing. | **Medium** | Pin Pillow ≥11.3.0. Validate image magic bytes. Limit image dimensions before loading. Memory-map large images. CSP headers. | Fact #14 | -| SSE Stability | FastAPI EventSourceResponse | Async generator cleanup issues on shutdown. No heartbeat. No reconnection strategy. | **Medium** | Use asyncio.Queue-based EventPublisher. Add SSE heartbeat every 15s. Include Last-Event-ID for reconnection. | Fact #12 | -| Provider Integration | Google Maps + Mapbox + user tiles | Google requires session tokens (not just API key). 15K/day limit = ~7 flights from cache misses. Mapbox Ukraine coverage uncertain. | **Medium** | Implement session token management for Google. Add Bing Maps as third provider. Document DEM+tile download budget per flight. | Fact #10, #11, #18 | -| Drift Management | 100m cumulative drift limit factor | Custom factor. If satellite fails for 50+ frames, no anchors → drift factor has nothing to constrain against. | **High** | Add dead-reckoning confidence decay: after N frames without anchor, emit warning + request user input. Track estimated drift explicitly. Set hard limit for user input request. | Fact #17 | diff --git a/_docs/00_research/gps_denied_draft02_assessment/04_reasoning_chain.md b/_docs/00_research/gps_denied_draft02_assessment/04_reasoning_chain.md deleted file mode 100644 index b69e84c..0000000 --- a/_docs/00_research/gps_denied_draft02_assessment/04_reasoning_chain.md +++ /dev/null @@ -1,192 +0,0 @@ -# Reasoning Chain — Draft02 Assessment - -## Dimension 1: Factor Graph Design Correctness - -### Fact Confirmation -According to Fact #1, GTSAM's `GPSFactor` class works exclusively with `Pose3` variables. `GPSFactor2` works with `NavState`. Neither accepts `Pose2`. According to Fact #19, `PriorFactorPoint2` provides 2D position constraints, and `BetweenFactorPose2` provides 2D odometry constraints. - -### Problem -Draft02 specifies `Pose2 (x, y, heading)` variables but lists `GPSFactor` for satellite anchors. This is an API mismatch — the code would fail at runtime. - -### Solution -Two valid approaches: -1. **Pose2 graph (recommended)**: Use `Pose2` variables + `BetweenFactorPose2` for VO + `PriorFactorPose2` for satellite anchors (constraining full pose when heading is available) or use a custom partial factor that constrains only the position part of Pose2. Convert WGS84 to local ENU coordinates centered on starting GPS. -2. **Pose3 graph**: Use `Pose3` with fixed altitude. More accurate but adds unnecessary complexity for 2D problem. - -The custom DEM terrain factor and drift limit factor also need reconsideration: Python custom factors invoke a Python callback per optimization step, which is slow. DEM terrain is irrelevant for 2D Pose2 (altitude is not a variable). Drift should be managed by the Segment Manager logic, not as a factor. - -### Conclusion -Switch to Pose2 + BetweenFactorPose2 + PriorFactorPose2 (or partial position prior). Remove DEM terrain factor (handle elevation in GSD calculation outside the graph). Remove drift limit factor (handle in Segment Manager). This simplifies the factor graph and avoids Python callback overhead. - -### Confidence -✅ High — based on official GTSAM documentation - ---- - -## Dimension 2: VRAM/Memory Budget Feasibility - -### Fact Confirmation -According to Fact #8: DINOv2 ViT-S/14 ~300MB, ViT-B/14 ~600MB. Fact #16: faiss GPU uses ~2GB scratch. Fact #3: LightGlue ONNX FP16 works on RTX 2060. - -### Problem -Draft02 doesn't specify DINOv2 model size. Proposing faiss GPU would consume 2GB of 6GB VRAM. Combined with other models, total could exceed 6GB. Draft02 estimates ~1.5GB per frame for XFeat/SuperPoint + LightGlue, but doesn't account for DINOv2 or faiss. - -### Budget Analysis -Concurrent peak VRAM (worst case): -- XFeat inference: ~200MB -- LightGlue ONNX (FP16): ~500MB -- DINOv2 ViT-B/14: ~600MB -- SuperPoint: ~400MB -- faiss GPU: ~2GB -- ONNX Runtime overhead: ~300MB -- **Total: ~4.0GB** (without faiss GPU: ~2.0GB) - -Sequential loading (recommended): -- Step 1: XFeat + XFeat matcher (VO): ~400MB -- Step 2: DINOv2-S (coarse retrieval): ~300MB → unload -- Step 3: SuperPoint + LightGlue ONNX (fine matching): ~900MB -- Peak: ~1.3GB (with model switching) - -### Conclusion -Use DINOv2 ViT-S/14 (300MB, 0.05s/img — fast enough for coarse retrieval). Run faiss on CPU (the embedding vectors are small, CPU search is <1ms for ~2000 vectors). Sequential model loading for GPU: VO models first, then satellite matching models. Keep models loaded but process sequentially (no concurrent GPU inference). - -### Confidence -✅ High — based on documented VRAM numbers - ---- - -## Dimension 3: Rotation Handling Completeness - -### Fact Confirmation -According to Fact #4, SuperPoint+LightGlue fail at 90°/180° rotations. According to Fact #6 (ISPRS 2025), SIFT+LightGlue outperforms SuperPoint+LightGlue in high-rotation UAV scenarios. - -### Problem -Draft02 says "estimate heading from VO chain, rectify images before satellite matching, SIFT fallback for rotation-heavy cases." But: -1. At segment start, there's no heading from VO chain — no rectification possible. -2. No criteria for when to trigger SIFT fallback. -3. Multi-rotation matching strategy ({0°, 90°, 180°, 270°}) not mentioned. - -### Conclusion -Three-tier rotation handling: -1. **Segment start (no heading)**: Try DINOv2 coarse retrieval (more rotation-robust than local features) → if match found, estimate heading from satellite alignment → proceed normally. -2. **Normal operation (heading available)**: Rectify to approximate north-up using accumulated heading → SuperPoint+LightGlue. -3. **Match failure fallback**: Try 4 rotations {0°, 90°, 180°, 270°} with SuperPoint. If still fails → SIFT+LightGlue (rotation-invariant). - -Trigger for SIFT: SuperPoint inlier ratio < 0.15 after rotation retry. - -### Confidence -✅ High — based on confirmed LightGlue limitation + proven SIFT+LightGlue alternative - ---- - -## Dimension 4: VO Robustness (Homography Decomposition) - -### Fact Confirmation -According to Fact #13, cv2.decomposeHomographyMat returns 4 solutions. Can return non-orthogonal matrices. Calibration matrix K precision is critical. - -### Problem -Draft02 specifies selection by "motion consistent with previous direction + positive depth." This is underspecified for the first frame pair in a segment (no previous direction). Non-orthogonal R detection is missing. - -### Conclusion -Disambiguation procedure: -1. Compute all 4 decompositions. -2. **Filter by positive depth**: triangulate a few matched points, reject solutions where points are behind camera. -3. **Filter by plane normal**: for downward-looking camera, the normal should approximately point up (positive z component in camera frame). -4. **Motion consistency**: if previous direction available, prefer solution consistent with expected motion direction. -5. **Orthogonality check**: verify R'R ≈ I, det(R) ≈ 1. If not, re-orthogonalize via SVD. -6. For first frame pair: rely on filters 2+3 only. - -### Confidence -✅ High — based on well-documented decomposition ambiguity - ---- - -## Dimension 5: Satellite Coarse Retrieval - -### Fact Confirmation -According to Fact #15, DINOv2+SALAD outperforms raw DINOv2 CLS token for retrieval. According to Fact #5, DINOv2 achieves 86.27 R@1 with adaptive enhancement. - -### Problem -Draft02 proposes "DINOv2 global retrieval + faiss cosine similarity." Using raw CLS token is suboptimal. SALAD or patch-level feature aggregation would improve retrieval accuracy. - -### Conclusion -DINOv2+SALAD is the better approach but adds a training/fine-tuning dependency. For a production system without the ability to fine-tune: use DINOv2 patch tokens (not just CLS) with spatial pooling, then cosine similarity via faiss. This captures more spatial information than CLS alone. If time permits, train SALAD head (30 minutes on appropriate dataset). - -Alternatively, consider SatDINO (DINOv2 pre-trained on satellite imagery) if available as a checkpoint. - -### Confidence -⚠️ Medium — SALAD is proven but adding training dependency may not be worth the complexity for this use case - ---- - -## Dimension 6: Concurrency & Pipeline Architecture - -### Fact Confirmation -Single GPU (RTX 2060) cannot run two models concurrently. Fact #8 shows sequential model inference times. Fact #3 shows LightGlue ONNX at ~50-100ms. - -### Problem -Draft02 says satellite matching is "async — don't block VO pipeline" but on a single GPU, you can't parallelize GPU inference. - -### Conclusion -Pipeline design: -1. **VO (synchronous, per-frame)**: XFeat extract + match (~30ms total) → homography estimation (~5ms) → GTSAM update (~5ms) → emit position via SSE. **Total: ~40ms per frame.** -2. **Satellite matching (asynchronous, overlapped with next frame's VO)**: DINOv2 coarse (~50ms) → SuperPoint+LightGlue fine (~150ms) → GTSAM update (~5ms) → emit refined position. **Total: ~205ms but overlapped.** -3. **I/O (fully async)**: Tile download, DEM fetch, cache management — all via asyncio. -4. **CPU tasks (parallel)**: faiss search (CPU), homography RANSAC (CPU-bound but fast). - -The GPU processes frames sequentially. The "async" part is that satellite matching for frame N happens while VO for frame N+1 proceeds. Since satellite matching (~205ms) is longer than VO (~40ms), the pipeline is satellite-matching-bound but VO results stream immediately. - -### Confidence -✅ High — based on documented inference times - ---- - -## Dimension 7: Security Attack Surface - -### Fact Confirmation -According to Fact #14, Pillow CVE-2025-48379 affects image loading. Fact #12 confirms SSE cleanup issues. - -### Problem -Draft02 has JWT + rate limiting + CORS but misses: -- Image format/magic byte validation before loading -- Pillow version pinning -- Memory-limited image loading (a 100,000 × 100,000 pixel image could OOM) -- SSE heartbeat for connection health -- No mention of directory traversal prevention depth - -### Conclusion -Additional security measures: -1. Pin Pillow ≥11.3.0 in requirements. -2. Validate image magic bytes (JPEG/PNG/TIFF) before loading with PIL. -3. Check image dimensions before loading: reject if either dimension > 10,000px. -4. Use OpenCV for loading (separate from PIL) — validate separately. -5. Resolve image_folder path to canonical form (os.path.realpath) and verify it's under allowed base directories. -6. Add Content-Security-Policy headers. -7. SSE heartbeat every 15s to detect stale connections. -8. Implement asyncio.Queue-based event publisher for SSE. - -### Confidence -✅ High — based on documented CVE + known SSE issues - ---- - -## Dimension 8: Drift Management Strategy - -### Fact Confirmation -According to Fact #17, NaviLoc demonstrates that trajectory-level optimization with noisy VPR measurements achieves 16x better accuracy than per-frame approaches. SatLoc-Fusion uses adaptive confidence metrics. - -### Problem -Draft02's "drift limit factor" as a GTSAM custom factor is problematic: (1) custom Python factors are slow, (2) if no satellite anchors arrive for extended period, the drift factor has nothing to constrain against. - -### Conclusion -Replace GTSAM drift factor with Segment Manager logic: -1. Track cumulative VO displacement since last satellite anchor. -2. If cumulative displacement > 100m without anchor: emit warning SSE event, increase satellite matching frequency/radius. -3. If cumulative displacement > 200m: request user input with timeout. -4. If cumulative displacement > 500m: mark segment as LOW confidence, continue but warn. -5. Confidence score per position: decays exponentially with distance from nearest anchor. - -This is simpler, faster, and more controllable than a GTSAM custom factor. - -### Confidence -✅ High — engineering judgment supported by SatLoc-Fusion's confidence-based approach diff --git a/_docs/00_research/gps_denied_draft02_assessment/05_validation_log.md b/_docs/00_research/gps_denied_draft02_assessment/05_validation_log.md deleted file mode 100644 index c3b9342..0000000 --- a/_docs/00_research/gps_denied_draft02_assessment/05_validation_log.md +++ /dev/null @@ -1,100 +0,0 @@ -# Validation Log — Draft02 Assessment (Draft03) - -## Validation Scenario 1: Factor graph initialization with first satellite match - -**Scenario**: Flight starts, VO processes 10 frames, satellite match arrives for frame 5. - -**Expected with Draft03 fixes**: -1. GTSAM graph starts with PriorFactorPose2 at starting GPS (frame 0). -2. BetweenFactorPose2 added for frames 0→1, 1→2, ..., 9→10. -3. Satellite match for frame 5: add PriorFactorPose2 with position from satellite match and noise proportional to reprojection error × GSD. -4. iSAM2.update() triggers backward correction — frames 0-4 and 5-10 both adjust. -5. All positions in local ENU coordinates, converted to WGS84 for output. - -**Validation result**: Consistent. PriorFactorPose2 correctly constrains Pose2 variables. No GPSFactor API mismatch. - -## Validation Scenario 2: Segment start with unknown heading (rotation handling) - -**Scenario**: After a sharp turn, new segment starts. First image has unknown heading. - -**Expected with Draft03 fixes**: -1. VO triple check fails → segment break. -2. New segment starts. No heading available. -3. For satellite coarse retrieval: DINOv2-S processes unrotated image → top-5 tiles. -4. For fine matching: try SuperPoint+LightGlue at 4 rotations {0°, 90°, 180°, 270°}. -5. If match found: heading estimated from satellite alignment. Subsequent images rectified. -6. If no match: try SIFT+LightGlue (rotation-invariant). -7. If still no match: request user input. - -**Validation result**: Consistent. Three-tier fallback addresses the heading bootstrap problem. - -## Validation Scenario 3: VRAM budget during satellite matching - -**Scenario**: Processing frame with concurrent VO + satellite matching on RTX 2060 (6GB). - -**Expected with Draft03 fixes**: -1. XFeat features already extracted for VO: ~200MB VRAM. -2. DINOv2 ViT-S/14 loaded for coarse retrieval: ~300MB. -3. After coarse retrieval, DINOv2 can be unloaded or kept resident. -4. SuperPoint loaded for fine matching: ~400MB. -5. LightGlue ONNX loaded: ~500MB. -6. Peak if all loaded: ~1.4GB. -7. ONNX Runtime workspace: ~300MB. -8. Total peak: ~1.7GB — well within 6GB. -9. faiss runs on CPU — no VRAM impact. - -**Validation result**: Consistent. VRAM budget is comfortable even without model unloading. - -## Validation Scenario 4: Extended satellite failure (50+ frames) - -**Scenario**: Flying over area with outdated/changed satellite imagery. Satellite matching fails for 80 consecutive frames (~8km). - -**Expected with Draft03 fixes**: -1. Frames 1-10: normal VO, satellite matching fails. Cumulative drift increases. -2. Frame ~10 (1km drift): warning SSE event emitted. Satellite search radius expanded. -3. Frame ~20 (2km drift): user_input_needed SSE event. If user provides GPS → anchor + backward correction. -4. If user doesn't respond within timeout: continue with LOW confidence. -5. Frame ~50 (5km drift): positions marked as very low confidence. -6. Confidence score per position decays exponentially from last anchor. -7. If satellite finally matches at frame 80: massive backward correction. Refined events emitted. - -**Validation result**: Consistent. Explicit drift thresholds are more predictable than the custom GTSAM factor approach. - -## Validation Scenario 5: Google Maps session token management - -**Scenario**: Processing a 3000-image flight. Need ~2000 satellite tiles. - -**Expected with Draft03 fixes**: -1. On job start: create Google Maps session with POST /v1/createSession (returns session token). -2. Use session token in all tile requests for this session. -3. Daily limit: 15,000 tiles → sufficient for single flight. -4. Monthly limit: 100,000 → ~50 flights. -5. At 80% daily limit (12,000): switch to Mapbox. -6. Mapbox: 200,000/month → additional ~100 flights capacity. - -**Validation result**: Consistent. Session management addressed correctly. - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions actionable/verifiable -- [x] GPSFactor API mismatch verified against GTSAM docs -- [x] VRAM budget calculated with specific model variants -- [x] Rotation handling addresses segment start edge case -- [x] Drift management has concrete thresholds - -## Counterexamples -- **Very low-texture terrain** (uniform sand/snow): XFeat VO might fail even on consecutive frames. Mitigation: track texture score per image, warn when low. -- **Satellite imagery completely missing for region**: Both Google and Mapbox might have no data. Mitigation: user-provided tiles are highest priority. -- **Multiple concurrent GPU processes**: Another process using the GPU could reduce available VRAM. Mitigation: document exclusive GPU access requirement. - -## Conclusions Requiring No Revision -All conclusions validated. Key improvements are well-supported: -1. Correct GTSAM factor types (PriorFactorPose2 instead of GPSFactor) -2. DINOv2 ViT-S/14 for VRAM efficiency -3. Three-tier rotation handling -4. Explicit drift thresholds in Segment Manager -5. asyncio.Queue-based SSE publisher -6. CPU-based faiss -7. Session token management for Google Maps diff --git a/_docs/00_research/gps_denied_nav/00_ac_assessment.md b/_docs/00_research/gps_denied_nav/00_ac_assessment.md deleted file mode 100644 index 7908290..0000000 --- a/_docs/00_research/gps_denied_nav/00_ac_assessment.md +++ /dev/null @@ -1,74 +0,0 @@ -# Acceptance Criteria Assessment - -## Acceptance Criteria - -| Criterion | Our Values | Researched Values | Cost/Timeline Impact | Status | -|-----------|-----------|-------------------|---------------------|--------| -| Position accuracy (80% of photos) | ≤50m error | 15-150m achievable depending on method. SatLoc (2025): <15m with adaptive fusion. Mateos-Ramirez (2024): 142m mean at 1000m+ altitude. At 400m altitude with better GSD (~6cm/px) and satellite correction, ≤50m for 80% is realistic | Moderate — requires high-quality satellite imagery and robust feature matching pipeline | **Modified** — see notes on satellite imagery quality dependency | -| Position accuracy (60% of photos) | ≤20m error | Achievable only with satellite-anchored corrections, not with VO alone. SatLoc reports <15m with satellite anchoring + VO fusion. Requires 0.3-0.5 m/px satellite imagery and good terrain texture | High — requires premium satellite imagery, robust cross-view matching, and careful calibration | **Modified** — add dependency on satellite correction frequency | -| Outlier tolerance | 350m displacement between consecutive photos | At 400m altitude, image footprint is ~375x250m. A 350m displacement means near-zero overlap. VO will fail; system must rely on IMU dead-reckoning or satellite re-localization | Low — standard outlier detection can handle this | Modified — specify fallback strategy (IMU dead-reckoning + satellite re-matching) | -| Sharp turn handling (partial overlap) | <200m drift, <70° angle, <5% overlap | Standard VO fails below ~20-30% overlap. With <5% overlap, feature matching between consecutive frames is unreliable. Requires satellite-based re-localization or IMU bridging | High — requires separate re-localization module | Modified — clarify: "70%" should likely be "70 degrees"; add IMU-bridge requirement | -| Disconnected route segments | System should reconnect disconnected chunks | This is essentially a place recognition / re-localization problem. Solvable via satellite image matching for each new segment independently | High — core architectural requirement affecting system design | Modified — add: each segment should independently localize via satellite matching | -| User fallback input | Ask user after 3 consecutive failures | Reasonable fallback. Needs UI/API integration for interactive input | Low | No change | -| Processing time per image | <5 seconds | On Jetson Orin Nano Super (8GB shared memory): feasible with optimized pipeline. CUDA feature extraction ~50ms, matching ~100-500ms, satellite crop+match ~1-3s. Full pipeline 2-4s is achievable with image downsampling and TensorRT optimization | Moderate — requires TensorRT optimization and image downsampling strategy | **Modified** — specify this is for Jetson Orin Nano Super, not RTX 2060 | -| Real-time streaming | SSE for immediate results + refinement | Standard pattern, well-supported | Low | No change | -| Image Registration Rate | >95% | For consecutive frames with nadir camera in good conditions: 90-98% achievable. Drops significantly during sharp turns and over low-texture terrain (water, uniform fields). The 95% target conflicts with sharp-turn handling requirement | Moderate — requires learning-based matchers (SuperPoint/LightGlue) | **Modified** — clarify: 95% applies to "normal flight" segments only; sharp-turn frames are expected failures handled by re-localization | -| Mean Reprojection Error | <1.0 pixels | Achievable with modern methods (LightGlue, SuperGlue). Traditional methods typically 1-3 px. Deep learning matchers routinely achieve 0.3-0.8 px with proper calibration | Moderate — requires deep learning feature matchers | No change — achievable | -| REST API + SSE architecture | Background service | Standard architecture, well-supported in Python (FastAPI + SSE) | Low | No change | -| Satellite imagery resolution | ≥0.5 m/px, ideally 0.3 m/px | Google Maps for eastern Ukraine: variable, typically 0.5-1.0 m/px in rural areas. 0.3 m/px unlikely from Google Maps. Commercial providers (Maxar, Planet) offer 0.3-0.5 m/px but at significant cost | **High** — Google Maps may not meet 0.5 m/px in all areas of the operational region. 0.3 m/px requires commercial satellite providers | **Modified** — current Google Maps limitation may make this unachievable for all areas; consider fallback for degraded satellite quality | -| Confidence scoring | Per-position estimate (high=satellite, low=VO) | Standard practice in sensor fusion. Easy to implement | Low | No change | -| Output format | WGS84, GeoJSON or CSV | Standard, trivial to implement | Negligible | No change | -| Satellite imagery age | <2 years where possible | Google Maps imagery for conflict zones (eastern Ukraine) may be significantly outdated or intentionally degraded. Recency is hard to guarantee | Medium — may need multiple satellite sources | **Modified** — flag: conflict zone imagery may be intentionally limited | -| Max VO cumulative drift | <100m between satellite corrections | VIO drift typically 0.8-1% of distance. Between corrections at 1km intervals: ~10m drift. 100m budget allows corrections every ~10km — very generous | Low — easily achievable if corrections happen at reasonable intervals | No change — generous threshold | -| Memory usage | <8GB shared memory (Jetson Orin Nano Super) | Binding constraint. 8GB LPDDR5 shared between CPU and GPU. ~6-7GB usable after OS. 26MP images need downsampling | **Critical** — all processing must fit within 8GB shared memory | **Updated** — changed to Jetson Orin Nano Super constraint | -| Object center coordinates | Accuracy consistent with frame-center accuracy | New criterion — derives from problem statement requirement | Low — once frame position is known, object position follows from pixel offset + GSD | **Added** | -| Sharp turn handling | <200m drift, <70 degrees, <5% overlap. 95% registration rate applies to normal flight only | Clarified from original "70%" to "70 degrees". Split registration rate expectation | Low — clarification only | **Updated** | -| Offline preprocessing time | Not time-critical (minutes/hours before flight) | New criterion — no constraint existed | Low | **Added** | - -## Restrictions Assessment - -| Restriction | Our Values | Researched Values | Cost/Timeline Impact | Status | -|-------------|-----------|-------------------|---------------------|--------| -| Aircraft type | Fixed-wing only | Appropriate — fixed-wing has predictable motion model, mostly forward flight. Simplifies VO assumptions | N/A | No change | -| Camera mount | Downward-pointing, fixed, not autostabilized | Implies roll/pitch affect image. At 400m altitude, moderate roll/pitch causes manageable image shift. IMU data can compensate. Non-stabilized means more variable image overlap and orientation | Medium — must use IMU data for image dewarping or accept orientation-dependent accuracy | **Modified** — add: IMU-based image orientation correction should be considered | -| Operational region | Eastern/southern Ukraine (left of Dnipro) | Conflict zone — satellite imagery may be degraded, outdated, or restricted. Terrain: mix of agricultural, urban, forest. Agricultural areas have seasonal texture changes | **High** — satellite imagery availability and quality is a significant risk | **Modified** — flag operational risk: imagery access in conflict zones | -| Image resolution | FullHD to 6252x4168, known camera parameters | 26MP at max is large for edge processing. Must downsample for feature extraction. Known camera intrinsics enable proper projective geometry | Medium — pipeline must handle variable resolutions | No change | -| Altitude | Predefined, ≤1km, terrain height negligible | At 400m: GSD ~6cm/px, footprint ~375x250m. Terrain "negligible" is an approximation — even 50m terrain variation at 400m altitude causes ~12% scale error. The referenced paper (Mateos-Ramirez 2024) shows terrain elevation is a primary error source | **Medium** — "terrain height negligible" needs qualification. At 400m, terrain variations >50m become significant | **Modified** — add: terrain height can be neglected only if variations <50m within image footprint | -| IMU data availability | "A lot of data from IMU" | IMU provides: accelerometer, gyroscope, magnetometer. Crucial for: dead-reckoning during feature-less frames, image orientation compensation, scale estimation, motion prediction. Standard tactical IMUs provide 100-400Hz data | Low — standard IMU integration | **Modified** — specify: IMU data includes gyroscope + accelerometer at ≥100Hz; will be used for orientation compensation and dead-reckoning fallback | -| Weather | Mostly sunny | Favorable for visual methods. Shadows can actually help feature matching. Reduces image quality variability | Low — favorable condition | No change | -| Satellite provider | Google Maps (potentially outdated) | **Critical limitation**: Google Maps satellite API has usage limits, unknown update frequency for eastern Ukraine, potential conflict-zone restrictions. Resolution may not meet 0.5 m/px in rural areas. No guarantee of recency | **High** — single-provider dependency is a significant risk | **Modified** — consider: (1) downloading tiles ahead of time for the operational area, (2) having a fallback provider strategy | -| Photo count | Up to 3000, typically 500-1500 | At 3fps and 500-1500 photos: 3-8 minutes of flight. At ~100m spacing: 50-150km route. Memory for 3000 pre-extracted satellite feature maps needs careful management on 8GB | Medium — batch processing and memory management needed | **Modified** — add: pipeline must manage memory for up to 3000 frames on 8GB device | -| Sharp turns | Next photo may have no common objects with previous | This is the hardest edge case. Complete visual discontinuity requires satellite-based re-localization. IMU provides heading/velocity for bridging. System must be architected around this possibility | High — drives core architecture decision | No change — already captured as a defining constraint | -| Processing hardware | Jetson Orin Nano Super, 67 TOPS | 8GB shared LPDDR5, 1024 CUDA cores, 32 Tensor Cores, 102 GB/s bandwidth. TensorRT for inference optimization. Power: 7-25W. Significantly less capable than desktop GPU | **Critical** — all processing must fit within 8GB shared memory, pipeline must be optimized for TensorRT | **Modified** — CONTRADICTS AC's RTX 2060 reference. Must be the binding constraint | - -## Key Findings - -1. **CRITICAL CONTRADICTION**: The AC mentions "RTX 2060 compatibility" (16GB RAM + 6GB VRAM) but the restriction specifies Jetson Orin Nano Super (8GB shared memory). These are fundamentally different platforms. **The Jetson must be the binding constraint.** All processing, including model weights, image buffers, and intermediate results, must fit within ~6-7GB usable memory (OS takes ~1-1.5GB). - -2. **Satellite Imagery Risk**: Google Maps as the sole satellite provider for a conflict zone in eastern Ukraine presents significant quality, resolution, and recency risks. The 0.3 m/px "ideal" resolution is unlikely available from Google Maps for this region. The system design must be robust to degraded satellite reference quality (0.5-1.0 m/px). - -3. **Accuracy is Achievable but Conditional**: The 50m/80% and 20m/60% accuracy targets are achievable based on recent research (SatLoc 2025: <15m with adaptive fusion), but **only when satellite corrections are successful**. VO-only segments will drift ~1% of distance traveled. The system must maximize satellite correction frequency. - -4. **Sharp Turn Handling Drives Architecture**: The requirement to handle disconnected route segments with no visual overlap between consecutive frames means the system cannot rely solely on sequential VO. It must have an independent satellite-based geo-localization capability for each frame or segment — this is a core architectural requirement. - -5. **Processing Time is Feasible**: <5s per image on Jetson Orin Nano Super is achievable with: (a) image downsampling (e.g., to 2000x1300), (b) TensorRT-optimized models, (c) efficient satellite region cropping. GPU-accelerated feature extraction takes ~50ms, matching ~100-500ms, satellite matching ~1-3s. - -6. **Missing AC: Object Center Coordinates**: The problem statement mentions "coordinates of the center of any object in these photos" but no acceptance criterion specifies the accuracy requirement for this. Need to add. - -7. **Missing AC: DEM/Elevation Data**: Research shows terrain elevation is a primary error source for pixel-to-meter conversion at these altitudes. If terrain variations are >50m, a DEM is needed. No current restriction mentions DEM availability. - -8. **Missing AC: Offline Preprocessing Time**: No constraint on how long satellite image preprocessing can take before the flight. - -9. **"70%" in Sharp Turn AC is Ambiguous**: "at an angle of less than 70%" — this likely means 70 degrees, not 70%. - -## Sources - -- SatLoc: Hierarchical Adaptive Fusion Framework for GNSS-denied UAV Localization (2025) — <15m error, >90% coverage, 2+ Hz on edge hardware -- Mateos-Ramirez et al. "Visual Odometry in GPS-Denied Zones for Fixed-Wing UAV" (2024) — 142.88m mean error over 17km at 1000m+ altitude, 0.83% error rate with satellite correction -- NVIDIA Jetson Orin Nano Super specs: 8GB LPDDR5, 67 TOPS, 1024 CUDA cores, 102 GB/s bandwidth -- cuda-efficient-features: Feature extraction benchmarks — 4K in ~12ms on Jetson Xavier -- SIFT+LightGlue for UAV image mosaicking (ISPRS 2025) — superior performance across diverse scenarios -- SuperPoint+LightGlue comparative analysis (2024) — best balance of robustness, accuracy, efficiency -- Google Maps satellite resolution: 0.15m-30m depending on location and source imagery -- VIO drift benchmarks: 0.82-1% of distance traveled (EuRoC, outdoor flights) -- UAVSAR cross-modality matching: 1.83-2.86m RMSE with deep learning approach (Springer 2026) diff --git a/_docs/00_research/gps_denied_nav/00_question_decomposition.md b/_docs/00_research/gps_denied_nav/00_question_decomposition.md deleted file mode 100644 index 7a7d15f..0000000 --- a/_docs/00_research/gps_denied_nav/00_question_decomposition.md +++ /dev/null @@ -1,88 +0,0 @@ -# Question Decomposition - -## Original Question -Research the GPS-denied onboard navigation problem for a fixed-wing UAV and find the best solution architecture. The system must determine frame-center GPS coordinates using visual odometry, satellite image matching, and IMU fusion — all running on a Jetson Orin Nano Super (8GB shared memory, 67 TOPS). - -## Active Mode -Mode A Phase 2 — Initial Research (Problem & Solution) - -## Rationale -No existing solution drafts. Full problem decomposition and solution research needed. - -## Problem Context Summary (from INPUT_DIR) -- **Platform**: Fixed-wing UAV, camera pointing down (not stabilized), 400m altitude max 1km -- **Camera**: ADTi Surveyor Lite 26S v2, 26MP (6252x4168), focal length 25mm, sensor width 23.5mm -- **GSD at 400m**: ~6cm/pixel, footprint ~375x250m -- **Frame rate**: 3 fps (interval ~333ms, real-world could be 400-500ms) -- **Photo count**: 500-3000 per flight -- **IMU**: Available at high rate -- **Initial GPS**: Known; GPS may be denied/spoofed during flight -- **Satellite reference**: Pre-uploaded Google Maps tiles -- **Hardware**: Jetson Orin Nano Super, 8GB shared memory, 67 TOPS -- **Region**: Eastern/southern Ukraine (conflict zone) -- **Key challenge**: Reconnecting disconnected route segments after sharp turns - -## Question Type Classification -**Decision Support** — we need to evaluate and select the best architectural approach and component technologies for each part of the pipeline. - -## Research Subject Boundary Definition - -| Dimension | Boundary | -|-----------|----------| -| Population | Fixed-wing UAVs with nadir cameras at 200-1000m altitude | -| Geography | Rural/semi-urban terrain in eastern Ukraine | -| Timeframe | Current state-of-the-art (2023-2026) | -| Level | Edge computing (Jetson-class, 8GB memory), real-time processing | - -## Decomposed Sub-Questions - -### A. Existing/Competitor Solutions -1. What existing systems solve GPS-denied UAV visual navigation? -2. What open-source implementations exist for VO + satellite matching? -3. What commercial/military solutions address this problem? - -### B. Architecture Components -4. What is the optimal pipeline architecture (sequential vs parallel, streaming)? -5. How should VO, satellite matching, and IMU fusion be combined (loosely vs tightly coupled)? -6. How to handle disconnected route segments (the core architectural challenge)? - -### C. Visual Odometry Component -7. What VO algorithms work best for aerial nadir imagery on edge hardware? -8. What feature extractors/matchers are optimal for Jetson (SuperPoint, ORB, XFeat)? -9. How to handle scale estimation with known altitude and camera parameters? -10. What is the optimal image downsampling strategy for 26MP on 8GB memory? - -### D. Satellite Image Matching Component -11. How to efficiently match UAV frames against pre-loaded satellite tiles? -12. What cross-view matching methods work for aerial-to-satellite registration? -13. How to preprocess and index satellite tiles for fast retrieval? -14. How to handle resolution mismatch (6cm UAV vs 50cm+ satellite)? - -### E. IMU Fusion Component -15. How to fuse IMU data with visual estimates (EKF, UKF, factor graph)? -16. How to use IMU for dead-reckoning during feature-less frames? -17. How to use IMU for image orientation compensation (non-stabilized camera)? - -### F. Edge Optimization -18. How to fit the full pipeline in 8GB shared memory? -19. What TensorRT optimizations are available for feature extractors? -20. How to achieve <5s per frame on Jetson Orin Nano Super? - -### G. API & Streaming -21. What is the best approach for REST API + SSE on Python/Jetson? -22. How to implement progressive result refinement? - -## Timeliness Sensitivity Assessment - -- **Research Topic**: GPS-denied UAV visual navigation with edge processing -- **Sensitivity Level**: 🟠 High -- **Rationale**: Deep learning feature matchers (SuperPoint, LightGlue, XFeat) and edge inference frameworks (TensorRT) evolve rapidly. Jetson Orin Nano Super is a recent (Dec 2024) product. Cross-view geo-localization is an active research area. -- **Source Time Window**: 12 months (prioritize 2025-2026) -- **Priority official sources to consult**: - 1. NVIDIA Jetson documentation and benchmarks - 2. OpenCV / kornia / hloc official docs - 3. Recent papers on cross-view geo-localization (CVPR, ECCV, ICCV 2024-2025) -- **Key version information to verify**: - - JetPack SDK: Current version ____ - - SuperPoint/LightGlue: Latest available for TensorRT ____ - - XFeat: Version and Jetson compatibility ____ diff --git a/_docs/00_research/gps_denied_nav/01_source_registry.md b/_docs/00_research/gps_denied_nav/01_source_registry.md deleted file mode 100644 index a18a285..0000000 --- a/_docs/00_research/gps_denied_nav/01_source_registry.md +++ /dev/null @@ -1,151 +0,0 @@ -# Source Registry - -## Source #1 -- **Title**: Visual Odometry in GPS-Denied Zones for Fixed-Wing UAV with Reduced Accumulative Error Based on Satellite Imagery -- **Link**: https://www.mdpi.com/2076-3417/14/16/7420 -- **Tier**: L1 -- **Publication Date**: 2024-08-22 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Fixed-wing UAV GPS-denied navigation -- **Research Boundary Match**: ✅ Full match -- **Summary**: VO + satellite correction pipeline for fixed-wing UAV at 1000m+ altitude. Mean error 142.88m over 17km (0.83%). Uses ORB features, centroid-based displacement, Kalman filter smoothing, quadtree for satellite keypoint indexing. -- **Related Sub-question**: A1, B5, C7, D11 - -## Source #2 -- **Title**: SatLoc: Hierarchical Adaptive Fusion Framework for GNSS-denied UAV Localization -- **Link**: https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV localization in GNSS-denied environments -- **Research Boundary Match**: ✅ Full match -- **Summary**: Three-layer fusion: DinoV2 for satellite geo-localization, XFeat for VO, optical flow for velocity. Adaptive confidence-based weighting. <15m error, >90% coverage, 2+ Hz on edge hardware. -- **Related Sub-question**: B4, B5, C8, D12 - -## Source #3 -- **Title**: XFeat: Accelerated Features for Lightweight Image Matching (CVPR 2024) -- **Link**: https://arxiv.org/abs/2404.19174 -- **Tier**: L1 -- **Publication Date**: 2024-04 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Edge device feature matching -- **Research Boundary Match**: ✅ Full match -- **Summary**: 5x faster than SuperPoint, runs on CPU at VGA resolution. Sparse and semi-dense matching. TensorRT deployment available for Jetson. Comparable accuracy to SuperPoint. -- **Related Sub-question**: C8, F18, F20 - -## Source #4 -- **Title**: XFeat TensorRT Implementation -- **Link**: https://github.com/PranavNedunghat/XFeatTensorRT -- **Tier**: L2 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: C++ TensorRT implementation of XFeat, tested on Jetson Orin NX 16GB with JetPack 6.0, CUDA 12.2, TensorRT 8.6. -- **Related Sub-question**: C8, F18, F19 - -## Source #5 -- **Title**: SuperPoint+LightGlue TensorRT Deployment -- **Link**: https://github.com/fettahyildizz/superpoint_lightglue_tensorrt -- **Tier**: L2 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: C++ TensorRT implementation of SuperPoint+LightGlue. Production-ready deployment for Jetson platforms. -- **Related Sub-question**: C8, F19 - -## Source #6 -- **Title**: FP8 Quantized LightGlue in TensorRT -- **Link**: https://fabio-sim.github.io/blog/fp8-quantized-lightglue-tensorrt-nvidia-model-optimizer/ -- **Tier**: L2 -- **Publication Date**: 2026 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Up to ~6x speedup with FP8 quantization. Requires Hopper/Ada Lovelace GPUs (not available on Jetson Orin Nano Ampere). FP16 is the best available precision for Orin Nano. -- **Related Sub-question**: F19 - -## Source #7 -- **Title**: NVIDIA JetPack 6.2 Release Notes -- **Link**: https://docs.nvidia.com/jetson/archives/jetpack-archived/jetpack-62/release-notes/index.html -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: CUDA 12.6.10, TensorRT 10.3.0, cuDNN 9.3. Super Mode for Orin Nano: up to 2x inference performance, 50% memory bandwidth boost. Power modes: 15W, 25W, MAXN SUPER. -- **Related Sub-question**: F18, F19, F20 - -## Source #8 -- **Title**: cuda-efficient-features (GPU feature detection benchmarks) -- **Link**: https://github.com/fixstars/cuda-efficient-features -- **Tier**: L2 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: 4K detection: 12ms on Jetson Xavier. 8K: 27.5ms. 40K keypoints extraction: 20-25ms on Xavier. Orin Nano Super should be comparable or better. -- **Related Sub-question**: F20 - -## Source #9 -- **Title**: Adaptive Covariance Hybrid EKF/UKF for Visual-Inertial Odometry -- **Link**: https://arxiv.org/abs/2512.17505 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Hybrid EKF/UKF achieves 49% better position accuracy, 57% better rotation accuracy than ESKF alone, at 48% lower computational cost than full UKF. Includes adaptive sensor confidence scoring. -- **Related Sub-question**: E15 - -## Source #10 -- **Title**: SIFT+LightGlue for UAV Image Mosaicking (ISPRS 2025) -- **Link**: https://isprs-archives.copernicus.org/articles/XLVIII-2-W11-2025/169/2025/ -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: SIFT+LightGlue outperforms SuperPoint+LightGlue for UAV mosaicking across diverse scenarios. Superior in both low-texture and high-texture environments. -- **Related Sub-question**: C8, D12 - -## Source #11 -- **Title**: UAVision - GNSS-Denied UAV Visual Localization System -- **Link**: https://github.com/ArboriseRS/UAVision -- **Tier**: L4 -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Open-source system using LightGlue for map matching. Includes image processing modules and visualization. -- **Related Sub-question**: A2 - -## Source #12 -- **Title**: TerboucheHacene/visual_localization -- **Link**: https://github.com/TerboucheHacene/visual_localization -- **Tier**: L4 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Vision-based GNSS-free localization with SuperPoint/SuperGlue/GIM matching. Optimized VO + satellite image matching hybrid pipeline. Learning-based matchers for natural environments. -- **Related Sub-question**: A2, D12 - -## Source #13 -- **Title**: GNSS-Denied Geolocalization with Terrain Constraints -- **Link**: https://github.com/yfs90/gnss-denied-uav-geolocalization -- **Tier**: L4 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: No altimeters/IMU required, uses image matching + terrain constraints. GPS-comparable accuracy for day/night across varied terrain. -- **Related Sub-question**: A2 - -## Source #14 -- **Title**: Google Maps Tile API Documentation -- **Link**: https://developers.google.com/maps/documentation/tile/satellite -- **Tier**: L1 -- **Publication Date**: Current -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Zoom levels 0-22. Satellite tiles via HTTPS. Session tokens required. Bulk download possible but subject to usage policies. -- **Related Sub-question**: D13 - -## Source #15 -- **Title**: NaviLoc: Trajectory-Level Visual Localization for GNSS-Denied UAV Navigation -- **Link**: https://www.mdpi.com/2504-446X/10/2/97 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Trajectory-level optimization rather than per-frame matching. Optimizes entire trajectory against satellite reference for improved accuracy. -- **Related Sub-question**: B4, D11 - -## Source #16 -- **Title**: GSD Estimation for UAV Photogrammetry -- **Link**: https://blog.truegeometry.com/calculators/UAV_photogrammetry_workflows_calculation.html -- **Tier**: L3 -- **Publication Date**: Current -- **Timeliness Status**: ✅ Currently valid -- **Summary**: GSD = (sensor_width × altitude) / (focal_length × image_width). For our case: (23.5mm × 400m) / (25mm × 6252) = 0.06 m/pixel. -- **Related Sub-question**: C9 diff --git a/_docs/00_research/gps_denied_nav/02_fact_cards.md b/_docs/00_research/gps_denied_nav/02_fact_cards.md deleted file mode 100644 index 962ce99..0000000 --- a/_docs/00_research/gps_denied_nav/02_fact_cards.md +++ /dev/null @@ -1,121 +0,0 @@ -# Fact Cards - -## Fact #1 -- **Statement**: XFeat achieves up to 5x faster inference than SuperPoint while maintaining comparable accuracy for pose estimation. It runs in real-time on CPU at VGA resolution. -- **Source**: Source #3 (CVPR 2024 paper) -- **Phase**: Phase 2 -- **Target Audience**: Edge device deployments -- **Confidence**: ✅ High -- **Related Dimension**: Feature Extraction - -## Fact #2 -- **Statement**: XFeat TensorRT implementation exists and is tested on Jetson Orin NX 16GB with JetPack 6.0, CUDA 12.2, TensorRT 8.6. -- **Source**: Source #4 -- **Phase**: Phase 2 -- **Target Audience**: Jetson platform deployment -- **Confidence**: ✅ High -- **Related Dimension**: Feature Extraction, Edge Optimization - -## Fact #3 -- **Statement**: SatLoc framework achieves <15m absolute localization error with >90% trajectory coverage at 2+ Hz on edge hardware, using DinoV2 for satellite matching, XFeat for VO, and optical flow for velocity. -- **Source**: Source #2 -- **Phase**: Phase 2 -- **Target Audience**: GNSS-denied UAV localization -- **Confidence**: ⚠️ Medium (paper details not fully accessible) -- **Related Dimension**: Overall Architecture, Accuracy - -## Fact #4 -- **Statement**: Mateos-Ramirez et al. achieved 142.88m mean error over 17km (0.83% error rate) with VO + satellite correction on a fixed-wing UAV at 1000m+ altitude. Without satellite correction, error accumulated to 850m+ over 17km. -- **Source**: Source #1 -- **Phase**: Phase 2 -- **Target Audience**: Fixed-wing UAV at high altitude -- **Confidence**: ✅ High -- **Related Dimension**: Accuracy, Architecture - -## Fact #5 -- **Statement**: VIO systems typically drift 0.8-1% of distance traveled. Between satellite corrections at 1km intervals, expected drift is ~10m. -- **Source**: Multiple sources (arxiv VIO benchmarks) -- **Phase**: Phase 2 -- **Target Audience**: Aerial VIO systems -- **Confidence**: ✅ High -- **Related Dimension**: VO Drift - -## Fact #6 -- **Statement**: Jetson Orin Nano Super: 8GB LPDDR5 shared memory, 1024 CUDA cores, 32 Tensor Cores, 102 GB/s bandwidth, 67 TOPS INT8. JetPack 6.2: CUDA 12.6.10, TensorRT 10.3.0. -- **Source**: Source #7 -- **Phase**: Phase 2 -- **Target Audience**: Hardware specification -- **Confidence**: ✅ High -- **Related Dimension**: Edge Optimization - -## Fact #7 -- **Statement**: CUDA-accelerated feature detection at 4K (3840x2160): ~12ms on Jetson Xavier. At 8K: ~27.5ms. Descriptor extraction for 40K keypoints: ~20-25ms on Xavier. Orin Nano Super has comparable or slightly better compute. -- **Source**: Source #8 -- **Phase**: Phase 2 -- **Target Audience**: Jetson GPU performance -- **Confidence**: ✅ High -- **Related Dimension**: Processing Time - -## Fact #8 -- **Statement**: Hybrid EKF/UKF achieves 49% better position accuracy than ESKF alone at 48% lower computational cost than full UKF. Includes adaptive sensor confidence scoring based on image entropy and motion blur. -- **Source**: Source #9 -- **Phase**: Phase 2 -- **Target Audience**: VIO fusion -- **Confidence**: ✅ High -- **Related Dimension**: Sensor Fusion - -## Fact #9 -- **Statement**: SIFT+LightGlue outperforms SuperPoint+LightGlue for UAV mosaicking across diverse scenarios (low-texture agricultural and high-texture urban). -- **Source**: Source #10 -- **Phase**: Phase 2 -- **Target Audience**: UAV image matching -- **Confidence**: ✅ High -- **Related Dimension**: Feature Matching - -## Fact #10 -- **Statement**: GSD for our system at 400m: (23.5mm × 400m) / (25mm × 6252px) = 0.060 m/pixel. Image footprint: 6252 × 0.06 = 375m width, 4168 × 0.06 = 250m height. -- **Source**: Source #16 + camera parameters -- **Phase**: Phase 2 -- **Target Audience**: Our specific system -- **Confidence**: ✅ High -- **Related Dimension**: Scale Estimation - -## Fact #11 -- **Statement**: Google Maps satellite tiles available via Tile API at zoom levels 0-22. Max zoom varies by region. For eastern Ukraine, zoom 18 (~0.6 m/px) is typically available; zoom 19 (~0.3 m/px) may not be. -- **Source**: Source #14 -- **Phase**: Phase 2 -- **Target Audience**: Satellite imagery -- **Confidence**: ⚠️ Medium (exact zoom availability for eastern Ukraine unverified) -- **Related Dimension**: Satellite Reference - -## Fact #12 -- **Statement**: FP8 quantization for LightGlue requires Hopper/Ada GPUs. Jetson Orin Nano uses Ampere architecture — limited to FP16 as best TensorRT precision. -- **Source**: Source #6, Source #7 -- **Phase**: Phase 2 -- **Target Audience**: Jetson optimization -- **Confidence**: ✅ High -- **Related Dimension**: Edge Optimization - -## Fact #13 -- **Statement**: SuperPoint+LightGlue TensorRT C++ deployment is available and production-tested. ONNX Runtime path achieves 2-4x speedup over compiled PyTorch. -- **Source**: Source #5, Source #6 -- **Phase**: Phase 2 -- **Target Audience**: Production deployment -- **Confidence**: ✅ High -- **Related Dimension**: Feature Matching, Edge Optimization - -## Fact #14 -- **Statement**: Cross-view matching (UAV-to-satellite) is fundamentally harder than same-view matching due to extreme viewpoint differences. Deep learning embeddings (DinoV2, CLIP-based) are the state-of-the-art for coarse retrieval. Local features are used for fine alignment. -- **Source**: Multiple (Sources #2, #12, #15) -- **Phase**: Phase 2 -- **Target Audience**: Cross-view geo-localization -- **Confidence**: ✅ High -- **Related Dimension**: Satellite Matching - -## Fact #15 -- **Statement**: Quadtree spatial indexing enables O(log n) nearest-neighbor lookup for satellite keypoints. Combined with GeoHash for fast region encoding, this is the standard approach for tile management. -- **Source**: Sources #1, #14 -- **Phase**: Phase 2 -- **Target Audience**: Spatial indexing -- **Confidence**: ✅ High -- **Related Dimension**: Satellite Tile Management diff --git a/_docs/00_research/gps_denied_nav/03_comparison_framework.md b/_docs/00_research/gps_denied_nav/03_comparison_framework.md deleted file mode 100644 index 87cde80..0000000 --- a/_docs/00_research/gps_denied_nav/03_comparison_framework.md +++ /dev/null @@ -1,71 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Decision Support — evaluating solution options per component - -## Architecture Components to Evaluate - -1. Feature Extraction & Matching (VO frame-to-frame) -2. Satellite Image Matching (cross-view geo-registration) -3. Sensor Fusion (VO + satellite + IMU) -4. Satellite Tile Preprocessing & Indexing -5. Image Downsampling Strategy -6. Re-localization (disconnected segments) -7. API & Streaming Layer - -## Component 1: Feature Extraction & Matching (VO) - -| Dimension | XFeat | SuperPoint + LightGlue | ORB (OpenCV) | -|-----------|-------|----------------------|--------------| -| Speed (Jetson) | ~2-5ms per frame (VGA), 5x faster than SuperPoint | ~15-50ms per frame (VGA, TensorRT FP16) | ~5-10ms per frame (CUDA) | -| Accuracy | Comparable to SuperPoint on pose estimation | State-of-the-art for local features | Lower accuracy, not scale-invariant | -| Memory | <100MB model | ~200-400MB model+inference | Negligible | -| TensorRT support | Yes (C++ impl available for Jetson Orin NX) | Yes (C++ impl available) | N/A (native CUDA) | -| Cross-view capability | Limited (same-view designed) | Better with LightGlue attention | Poor for cross-view | -| Rotation invariance | Moderate | Good with LightGlue | Good (by design) | -| Jetson validation | Tested on Orin NX (JetPack 6.0) | Tested on multiple Jetson platforms | Native OpenCV CUDA | -| **Fit for VO** | ✅ Best — fast, accurate, Jetson-proven | ⚠️ Good but heavier | ⚠️ Fast but less accurate | -| **Fit for satellite matching** | ⚠️ Moderate | ✅ Better for cross-view with attention | ❌ Poor for cross-view | - -## Component 2: Satellite Image Matching (Cross-View) - -| Dimension | Local Feature Matching (SIFT/SuperPoint + LightGlue) | Global Descriptor Retrieval (DinoV2/CLIP) | Template Matching (NCC) | -|-----------|-----------------------------------------------------|------------------------------------------|------------------------| -| Approach | Extract keypoints in both UAV and satellite images, match descriptors | Encode both images into global vectors, compare by distance | Slide UAV image over satellite tile, compute correlation | -| Accuracy | Sub-pixel when matches found (best for fine alignment) | Tile-level (~50-200m depending on tile size) | Pixel-level but sensitive to appearance changes | -| Speed | ~100-500ms for match+geometric verification | ~50-100ms for descriptor comparison | ~500ms-2s for large search area | -| Robustness to viewpoint | Good with LightGlue attention | Excellent (trained for cross-view) | Poor (requires similar viewpoint) | -| Memory | ~300-500MB (model + keypoints) | ~200-500MB (model) | Low | -| Failure rate | High in low-texture, seasonal changes | Lower — semantic understanding | High in changed scenes | -| **Recommended role** | Fine alignment (after coarse retrieval) | Coarse retrieval (select candidate tile) | Not recommended | - -## Component 3: Sensor Fusion - -| Dimension | EKF (Extended Kalman Filter) | Error-State EKF (ESKF) | Hybrid ESKF/UKF | Factor Graph (GTSAM) | -|-----------|-------------------------------|------------------------|------------------|---------------------| -| Accuracy | Baseline | Better for rotation | 49% better than ESKF | Best overall | -| Compute cost | Lowest | Low | 48% less than full UKF | Highest | -| Implementation complexity | Low | Medium | Medium-High | High | -| Handles non-linearity | Linearization errors | Better for small errors | Best among KF variants | Full non-linear | -| Real-time on Jetson | ✅ | ✅ | ✅ | ⚠️ Depends on graph size | -| Multi-rate sensor support | Manual | Manual | Manual | Native | -| **Fit** | ⚠️ Baseline option | ✅ Good starting point | ✅ Best KF option | ⚠️ Overkill for this system | - -## Component 4: Satellite Tile Management - -| Dimension | GeoHash + In-Memory | Quadtree + Memory-Mapped Files | Pre-extracted Feature DB | -|-----------|--------------------|-----------------------------|------------------------| -| Lookup speed | O(1) hash | O(log n) tree traversal | O(1) hash + feature load | -| Memory usage | All tiles in RAM | On-demand loading | Features only (smaller) | -| Preprocessing | Fast | Moderate | Slow (extract all features offline) | -| Flexibility | Fixed grid | Adaptive resolution | Fixed per-tile | -| **Fit for 8GB** | ❌ Too much RAM for large areas | ✅ Memory-efficient | ✅ Best — smallest footprint | - -## Component 5: Image Downsampling Strategy - -| Dimension | Fixed Resize (e.g., 1600x1066) | Pyramid (multi-scale) | ROI-based (center crop + full) | -|-----------|-------------------------------|----------------------|-------------------------------| -| Speed | Fast, single scale | Slower, multiple passes | Medium | -| Accuracy | Good if GSD ratio maintained | Best for multi-scale features | Good for center, loses edges | -| Memory | ~5MB per frame | ~7-8MB per frame | ~6MB per frame | -| **Fit** | ✅ Best tradeoff | ⚠️ Unnecessary complexity | ⚠️ Loses coverage | diff --git a/_docs/00_research/gps_denied_nav/04_reasoning_chain.md b/_docs/00_research/gps_denied_nav/04_reasoning_chain.md deleted file mode 100644 index a25a108..0000000 --- a/_docs/00_research/gps_denied_nav/04_reasoning_chain.md +++ /dev/null @@ -1,129 +0,0 @@ -# Reasoning Chain - -## Dimension 1: Feature Extraction for Visual Odometry - -### Fact Confirmation -XFeat is 5x faster than SuperPoint (Fact #1), has TensorRT deployment on Jetson (Fact #2), and comparable accuracy for pose estimation. SatLoc (the most relevant state-of-the-art system) uses XFeat for its VO component (Fact #3). - -### Reference Comparison -SuperPoint+LightGlue is more accurate for cross-view matching but heavier. ORB is fast but less accurate and not robust to appearance changes. SIFT+LightGlue is best for mosaicking (Fact #9) but slower. - -### Conclusion -**XFeat for VO (frame-to-frame)** — it's the fastest learned feature, Jetson-proven, and used by the closest state-of-the-art system (SatLoc). For satellite matching, a different approach is needed because cross-view matching requires viewpoint-invariant features. - -### Confidence -✅ High — supported by SatLoc architecture and CVPR 2024 benchmarks. - ---- - -## Dimension 2: Satellite Image Matching Strategy - -### Fact Confirmation -Cross-view matching is fundamentally harder than same-view (Fact #14). Deep learning embeddings (DinoV2) are state-of-the-art for coarse retrieval (Fact #3). Local features are better for fine alignment. SatLoc uses DinoV2 for satellite matching specifically. - -### Reference Comparison -A two-stage coarse-to-fine approach is the dominant pattern in literature: (1) global descriptor retrieves candidate region, (2) local feature matching refines position. Pure local-feature matching has high failure rate for cross-view due to extreme viewpoint differences. - -### Conclusion -**Two-stage approach**: (1) Coarse — use a lightweight global descriptor to find the best-matching satellite tile within the search area (VO-predicted position ± uncertainty radius). (2) Fine — use local feature matching (SuperPoint+LightGlue or XFeat) between UAV frame and the matched satellite tile to get precise position. The coarse stage can also serve as the re-localization mechanism for disconnected segments. - -### Confidence -✅ High — consensus across multiple recent papers and the SatLoc system. - ---- - -## Dimension 3: Sensor Fusion Approach - -### Fact Confirmation -Hybrid ESKF/UKF achieves 49% better accuracy than ESKF alone at 48% lower cost than full UKF (Fact #8). Factor graphs (GTSAM) offer the best accuracy but are computationally expensive. - -### Reference Comparison -For our system: IMU runs at 100-400Hz, VO at ~3Hz (frame rate), satellite corrections at variable rate (whenever matching succeeds). We need multi-rate fusion that handles intermittent satellite corrections and continuous IMU. - -### Conclusion -**Error-State EKF (ESKF)** as the baseline fusion approach — it's well-understood, lightweight, handles multi-rate sensors naturally, and is proven for VIO on edge hardware. Upgrade to hybrid ESKF/UKF if ESKF accuracy is insufficient. Factor graphs are overkill for this real-time edge system. - -The filter state: position (lat/lon), velocity, orientation (quaternion), IMU biases. Measurements: VO-derived displacement (high rate), satellite-derived absolute position (variable rate), IMU (highest rate for prediction). - -### Confidence -✅ High — ESKF is the standard choice for embedded VIO systems. - ---- - -## Dimension 4: Satellite Tile Preprocessing & Indexing - -### Fact Confirmation -Quadtree enables O(log n) lookups (Fact #15). Pre-extracting features offline saves runtime compute. 8GB memory limits in-memory tile storage. - -### Reference Comparison -Full tiles in memory is infeasible for large areas. Memory-mapped files allow on-demand loading. Pre-extracted feature databases have the smallest runtime footprint. - -### Conclusion -**Offline preprocessing pipeline**: -1. Download Google Maps satellite tiles at max zoom (18-19) for the operational area -2. Extract features (XFeat or SuperPoint) from each tile -3. Compute global descriptors (lightweight, e.g., NetVLAD or cosine-pooled XFeat descriptors) per tile -4. Store: tile metadata (GPS bounds, zoom level), features + descriptors in a GeoHash-indexed database -5. Build spatial index (GeoHash) for fast lookup by GPS region - -**Runtime**: Given VO-estimated position, query GeoHash to find nearby tiles, compare global descriptors for coarse match, then local feature matching for fine alignment. - -### Confidence -✅ High — standard approach used by all relevant systems. - ---- - -## Dimension 5: Image Downsampling Strategy - -### Fact Confirmation -26MP images need downsampling for 8GB device (Fact #6). Feature extraction at 4K takes ~12ms on Jetson Xavier (Fact #7). UAV GSD at 400m is ~6cm/px (Fact #10). Satellite GSD is ~60cm/px at zoom 18. - -### Reference Comparison -For VO (frame-to-frame): features at full resolution are wasteful — consecutive frames at 6cm GSD overlap ~80%, and features at lower resolution are sufficient for displacement estimation. For satellite matching: we need to match at satellite resolution (~60cm/px), so downsampling to match satellite GSD is natural. - -### Conclusion -**Downsample to ~1600x1066** (factor ~4x each dimension). This yields ~24cm/px GSD — still 2.5x finer than satellite, sufficient for feature matching. Image size: ~5MB (RGB). Feature extraction at this resolution: <10ms. This is the single resolution for both VO and satellite matching. - -### Confidence -✅ High — standard practice for edge processing of high-res imagery. - ---- - -## Dimension 6: Disconnected Segment Handling - -### Fact Confirmation -SatLoc uses satellite matching as an independent localization source that works regardless of VO state (Fact #3). The AC requires reconnecting disconnected segments as a core capability. - -### Reference Comparison -Pure VO cannot handle zero-overlap transitions. IMU dead-reckoning bridges short gaps (seconds). Satellite-based re-localization provides absolute position regardless of VO state. - -### Conclusion -**Independent satellite localization per frame** — every frame attempts satellite matching regardless of VO state. This naturally handles disconnected segments: -1. When VO succeeds: satellite matching refines position (high confidence) -2. When VO fails (sharp turn): satellite matching provides absolute position (sole source) -3. When both fail: IMU dead-reckoning with low confidence score -4. After 3 consecutive total failures: request user input - -Segment reconnection is automatic: all positions are in the same global (WGS84) frame via satellite matching. No explicit "reconnection" needed — segments share the satellite reference. - -### Confidence -✅ High — this is the key architectural insight. - ---- - -## Dimension 7: Processing Pipeline Architecture - -### Fact Confirmation -<5s per frame required (AC). Feature extraction ~10ms, VO matching ~20-50ms, satellite coarse retrieval ~50-100ms, satellite fine matching ~200-500ms, fusion ~1ms. Total: ~300-700ms per frame. - -### Conclusion -**Pipelined parallel architecture**: -- Thread 1 (Camera): Capture frame, downsample, extract features → push to queue -- Thread 2 (VO): Match with previous frame, compute displacement → push to fusion -- Thread 3 (Satellite): Search nearby tiles, coarse retrieval, fine matching → push to fusion -- Thread 4 (Fusion): ESKF prediction (IMU), update (VO), update (satellite) → emit result via SSE - -VO and satellite matching can run in parallel for each frame. Fusion integrates results as they arrive. This enables <1s per frame total latency. - -### Confidence -✅ High — standard producer-consumer pipeline. diff --git a/_docs/00_research/gps_denied_nav/05_validation_log.md b/_docs/00_research/gps_denied_nav/05_validation_log.md deleted file mode 100644 index a5f751c..0000000 --- a/_docs/00_research/gps_denied_nav/05_validation_log.md +++ /dev/null @@ -1,98 +0,0 @@ -# Validation Log - -## Validation Scenario 1: Normal Flight (80% of time) -UAV flies straight, consecutive frames overlap ~70-80%. Terrain has moderate texture (agricultural + urban mix). - -### Expected Based on Conclusions -- XFeat extracts features in ~5ms, VO matching in ~20ms -- Satellite matching succeeds: coarse retrieval ~50ms, fine matching ~300ms -- ESKF fuses both: position accuracy ~10-20m (satellite-anchored) -- Total processing: <500ms per frame -- Confidence: HIGH - -### Actual Validation (against literature) -SatLoc reports <15m error with >90% coverage under similar conditions. Mateos-Ramirez reports 0.83% drift with satellite correction. Both align with our expected performance. - -### Result: ✅ PASS - ---- - -## Validation Scenario 2: Sharp Turn (5-10% of time) -UAV makes a 60-degree turn. Next frame has <5% overlap with previous. Heading changes rapidly. - -### Expected Based on Conclusions -- VO fails (insufficient feature overlap) — detected by low match count -- IMU provides heading and approximate displacement for ~1-2 frames -- Satellite matching attempts independent localization of the new frame -- If satellite match succeeds: position recovered, segment continues -- If satellite match fails: IMU dead-reckoning with LOW confidence - -### Potential Issues -- Satellite matching may also fail if the frame is heavily tilted (non-nadir view during turn) -- IMU drift during turn: at 100m/s for 1s, displacement ~100m. IMU drift over 1s: ~1-5m — acceptable - -### Result: ⚠️ CONDITIONAL PASS — depends on satellite matching success during turn. Non-stabilized camera may produce tilted images that are harder to match. IMU provides reasonable bridge. - ---- - -## Validation Scenario 3: Disconnected Route (rare, <5%) -UAV completes segment A, makes a 90+ degree turn, flies a new heading. Segment B has no overlap with segment A. Multiple such segments possible. - -### Expected Based on Conclusions -- Each segment independently localizes via satellite matching -- No explicit reconnection needed — all in WGS84 frame -- Per-segment accuracy depends on satellite matching success rate -- Low-confidence gaps between segments until satellite match succeeds - -### Result: ✅ PASS — architecture handles this natively via independent per-frame satellite matching. - ---- - -## Validation Scenario 4: Memory-Constrained Operation (always) -3000 frames, 8GB shared memory. Full pipeline running. - -### Expected Based on Conclusions -- Downsampled frame: ~5MB per frame. Keep 2 in memory (current + previous): ~10MB -- XFeat model (TensorRT): ~50-100MB -- Satellite tile features (loaded tiles): ~200-500MB for tiles near current position -- ESKF state: <1MB -- OS + runtime: ~1.5GB -- Total: ~2-3GB active, well within 8GB - -### Potential Issues -- Satellite feature DB for large operational areas could be large on disk (not memory — loaded on demand) -- Need careful management of tile loading/unloading - -### Result: ✅ PASS — 8GB is sufficient with proper memory management. - ---- - -## Validation Scenario 5: Degraded Satellite Imagery -Google Maps tiles at 0.5-1.0 m/px resolution. Some areas have outdated imagery. Seasonal appearance changes. - -### Expected Based on Conclusions -- Coarse retrieval (global descriptors) should handle moderate appearance changes -- Fine matching may fail on outdated/seasonal tiles — confidence drops to LOW -- System falls back to VO + IMU in degraded areas -- Multiple consecutive failures → user input request - -### Potential Issues -- If large areas have degraded satellite imagery, the system may operate mostly in VO+IMU mode with significant drift -- 50m accuracy target may not be achievable in these areas - -### Result: ⚠️ CONDITIONAL PASS — system degrades gracefully, but accuracy targets depend on satellite quality. This is a known risk per Phase 1 assessment. - ---- - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions actionable/verifiable -- [x] Sharp turn handling addressed -- [x] Memory constraints validated -- [ ] Issue: Satellite imagery quality in eastern Ukraine remains a risk -- [ ] Issue: Non-stabilized camera during turns may degrade satellite matching - -## Conclusions Requiring No Revision -All major architectural decisions validated. Two known risks (satellite quality, non-stabilized camera during turns) are acknowledged and handled by the fallback hierarchy. diff --git a/_docs/00_research/gps_denied_nav_assessment/00_question_decomposition.md b/_docs/00_research/gps_denied_nav_assessment/00_question_decomposition.md deleted file mode 100644 index 817dfcf..0000000 --- a/_docs/00_research/gps_denied_nav_assessment/00_question_decomposition.md +++ /dev/null @@ -1,80 +0,0 @@ -# Question Decomposition — Solution Assessment (Mode B) - -## Original Question -Assess the existing solution draft (solution_draft01.md) for weak points, security vulnerabilities, and performance bottlenecks, then produce a revised solution draft. - -## Active Mode -Mode B: Solution Assessment — `solution_draft01.md` exists and is the highest-numbered draft. - -## Question Type Classification -- **Primary**: Problem Diagnosis — identify weak points, vulnerabilities, bottlenecks in existing solution -- **Secondary**: Decision Support — evaluate alternatives for identified issues - -## Research Subject Boundary Definition - -| Dimension | Boundary | -|-----------|----------| -| **Domain** | GPS-denied UAV visual navigation, aerial geo-referencing | -| **Geography** | Eastern/southern Ukraine (left of Dnipro River) — steppe terrain, potential conflict-related satellite imagery degradation | -| **Hardware** | Desktop/laptop with NVIDIA RTX 2060+, 16GB RAM, 6GB VRAM | -| **Software** | Python ecosystem, GPU-accelerated CV/ML | -| **Timeframe** | Current state-of-the-art (2024-2026), production-ready tools | -| **Scale** | 500-3000 images per flight, up to 6252×4168 resolution | - -## Problem Context Summary -- UAV aerial photos taken consecutively ~100m apart, camera pointing down (not autostabilized) -- Only starting GPS known — must determine GPS for all subsequent images -- Must handle: sharp turns, outlier photos (up to 350m gap), disconnected route segments -- Processing <5s/image, real-time SSE streaming, REST API service -- No IMU data available - -## Decomposed Sub-Questions - -### A: Cross-View Matching Viability -"Is SuperPoint+LightGlue with perspective warping reliable for UAV-to-satellite cross-view matching, or are there specialized cross-view methods that would perform better?" - -### B: Homography-Based VO Robustness -"Is homography-based VO (flat terrain assumption) robust enough for non-stabilized camera with potential roll/pitch variations and non-flat objects?" - -### C: Satellite Imagery Reliability -"What are the risks of relying solely on Google Maps satellite imagery for eastern Ukraine, and what fallback strategies exist?" - -### D: Processing Time Feasibility -"Are the processing time estimates (<5s per image) realistic on RTX 2060 with SuperPoint+LightGlue+satellite matching pipeline?" - -### E: Optimizer Specification -"Is the sliding window optimizer well-specified, and are there more proven alternatives like factor graph optimization?" - -### F: Camera Rotation Handling -"How should the system handle arbitrary image rotation from non-stabilized camera mount?" - -### G: Security Assessment -"What are the security vulnerabilities in the REST API + SSE architecture with image processing pipeline?" - -### H: Newer Tools & Libraries -"Are there newer (2025-2026) tools, models, or approaches that outperform the current selections (SuperPoint, LightGlue, etc.)?" - -### I: Segment Management Robustness -"Is the segment management strategy robust enough for multiple disconnected segments, especially when satellite anchoring fails for a segment?" - -### J: Memory & Resource Management -"Can the pipeline stay within 16GB RAM / 6GB VRAM while processing 3000 images at 6252×4168 resolution?" - ---- - -## Timeliness Sensitivity Assessment - -- **Research Topic**: GPS-denied UAV visual navigation using learned feature matching and satellite geo-referencing -- **Sensitivity Level**: 🟠 High -- **Rationale**: Computer vision feature matching models (SuperPoint, LightGlue, etc.) are actively evolving with new versions and competitors. However, the core algorithms (homography, VO, optimization) are stable. The tool ecosystem changes frequently. -- **Source Time Window**: 12 months (2025-2026) -- **Priority official sources to consult**: - 1. LightGlue / SuperPoint GitHub repos (releases, issues) - 2. OpenCV documentation (current version) - 3. Google Maps Tiles API documentation - 4. Recent aerial geo-referencing papers (2024-2026) -- **Key version information to verify**: - - LightGlue: current version and ONNX/TensorRT support status - - SuperPoint: current version and alternatives - - FastAPI: SSE support status - - Google Maps Tiles API: pricing, coverage, rate limits diff --git a/_docs/00_research/gps_denied_nav_assessment/01_source_registry.md b/_docs/00_research/gps_denied_nav_assessment/01_source_registry.md deleted file mode 100644 index 82734a8..0000000 --- a/_docs/00_research/gps_denied_nav_assessment/01_source_registry.md +++ /dev/null @@ -1,201 +0,0 @@ -# Source Registry — Solution Assessment (Mode B) - -## Source #1 -- **Title**: GLEAM: Learning to Match and Explain in Cross-View Geo-Localization -- **Link**: https://arxiv.org/abs/2509.07450 -- **Tier**: L1 -- **Publication Date**: 2025-09 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Cross-view geo-localization researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Framework for cross-view geo-localization with explainable matching across modalities. Demonstrates that specialized cross-view methods outperform generic feature matchers. - -## Source #2 -- **Title**: Robust UAV Image Mosaicking Using SIFT and LightGlue (ISPRS 2025) -- **Link**: https://isprs-archives.copernicus.org/articles/XLVIII-2-W11-2025/169/2025/ -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV photogrammetry and aerial image processing -- **Research Boundary Match**: ✅ Full match -- **Summary**: SIFT+LightGlue achieves superior spatial consistency and reliability for UAV image mosaicking, including low-texture and high-rotation conditions. SIFT outperforms SuperPoint for rotation-heavy scenarios. - -## Source #3 -- **Title**: Precise GPS-Denied UAV Self-Positioning via Context-Enhanced Cross-View Geo-Localization (CEUSP) -- **Link**: https://arxiv.org/abs/2502.11408 / https://github.com/eksnew/ceusp -- **Tier**: L1 -- **Publication Date**: 2025-02 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: GPS-denied UAV navigation -- **Research Boundary Match**: ⚠️ Partial overlap (urban, not steppe) -- **Summary**: DINOv2-based cross-view matching for UAV self-positioning. State-of-the-art on DenseUAV benchmark. Uses retrieval-based (not feature-matching) approach. - -## Source #4 -- **Title**: SatLoc Dataset and Hierarchical Adaptive Fusion Framework -- **Link**: https://www.mdpi.com/2072-4292/17/17/3048 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: GNSS-denied UAV navigation -- **Research Boundary Match**: ✅ Full match -- **Summary**: Three-layer architecture: DINOv2 for absolute geo-localization, XFeat for VO, optical flow for velocity. Adaptive fusion with confidence weighting. <15m absolute error on edge hardware. - -## Source #5 -- **Title**: LightGlue ONNX/TensorRT acceleration blog -- **Link**: https://fabio-sim.github.io/blog/accelerating-lightglue-inference-onnx-runtime-tensorrt/ -- **Tier**: L2 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: LightGlue users optimizing inference -- **Research Boundary Match**: ✅ Full match -- **Summary**: LightGlue ONNX achieves 2-4x speedup over PyTorch. FP8 quantization (Ada/Hopper GPUs only) adds 6x more. RTX 2060 does NOT support FP8. - -## Source #6 -- **Title**: LightGlue-ONNX GitHub repository -- **Link**: https://github.com/fabio-sim/LightGlue-ONNX -- **Tier**: L2 -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: LightGlue deployment engineers -- **Research Boundary Match**: ✅ Full match -- **Summary**: ONNX export for LightGlue with FlashAttention-2 support. TopK-trick for ~30% speedup. Pre-exported models available. - -## Source #7 -- **Title**: LightGlue GitHub Issue #64 — Rotation sensitivity -- **Link**: https://github.com/cvg/LightGlue/issues/64 -- **Tier**: L4 -- **Publication Date**: 2023-2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: LightGlue users -- **Research Boundary Match**: ✅ Full match -- **Summary**: LightGlue (with SuperPoint/DISK) is NOT rotation-invariant. 90° or 180° rotation causes matching failure. Manual rectification needed. - -## Source #8 -- **Title**: LightGlue GitHub Issue #13 — No-match handling -- **Link**: https://github.com/cvg/LightGlue/issues/13 -- **Tier**: L4 -- **Publication Date**: 2023 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: LightGlue users -- **Research Boundary Match**: ✅ Full match -- **Summary**: LightGlue lacks explicit training on unmatchable pairs. May produce geometrically meaningless matches instead of rejecting non-overlapping views. - -## Source #9 -- **Title**: YFS90/GNSS-Denied-UAV-Geolocalization GitHub -- **Link**: https://github.com/yfs90/gnss-denied-uav-geolocalization -- **Tier**: L1 -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: GPS-denied UAV navigation -- **Research Boundary Match**: ✅ Full match -- **Summary**: <7m MAE using terrain-weighted constraint optimization + 2D-3D geo-registration. Uses DEM data. Validated across 20 complex scenarios. Works with publicly available satellite maps. - -## Source #10 -- **Title**: Efficient image matching for UAV visual navigation via DALGlue (Scientific Reports 2025) -- **Link**: https://www.nature.com/articles/s41598-025-21602-5 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV visual navigation -- **Research Boundary Match**: ✅ Full match -- **Summary**: 11.8% MMA improvement over LightGlue. Uses dual-tree complex wavelet transform + adaptive spatial feature fusion + linear attention. Designed for UAV dynamic flight. - -## Source #11 -- **Title**: XFeat: Accelerated Features for Lightweight Image Matching (CVPR 2024) -- **Link**: https://arxiv.org/html/2404.19174v1 / https://github.com/verlab/accelerated_features -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Real-time feature matching applications -- **Research Boundary Match**: ✅ Full match -- **Summary**: 5x faster than SuperPoint. Runs real-time on CPU. Sparse + semi-dense matching. Used by SatLoc-Fusion for VO. 1500+ GitHub stars. - -## Source #12 -- **Title**: An Oblique-Robust Absolute Visual Localization Method (IEEE TGRS 2024) -- **Link**: https://ieeexplore.ieee.org/iel7/36/10354519/10356107.pdf -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: GPS-denied UAV localization -- **Research Boundary Match**: ✅ Full match -- **Summary**: SE(2)-steerable network for rotation-equivariant features. Handles drastic perspective changes, non-perpendicular camera angles. No additional training for new scenes. - -## Source #13 -- **Title**: Google Maps Tiles API Usage and Billing -- **Link**: https://developers.google.com/maps/documentation/tile/usage-and-billing -- **Tier**: L1 -- **Publication Date**: 2025-2026 (continuously updated) -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Google Maps API users -- **Research Boundary Match**: ✅ Full match -- **Summary**: 100,000 free tile requests/month. Rate limit: 6,000/min, 15,000/day for 2D tiles. $200/month free credit expired Feb 2025. Now pay-as-you-go only. - -## Source #14 -- **Title**: GTSAM Python API and Factor Graph examples -- **Link**: https://github.com/borglab/gtsam / https://pypi.org/project/gtsam-develop/ -- **Tier**: L1 -- **Publication Date**: 2025-2026 (v4.2 stable, v4.3a1 dev) -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Robot navigation, SLAM -- **Research Boundary Match**: ✅ Full match -- **Summary**: Python bindings for factor graph optimization. GPSFactor for absolute position constraints. iSAM2 for incremental optimization. Stable v4.2 for production use. - -## Source #15 -- **Title**: Copernicus DEM documentation -- **Link**: https://documentation.dataspace.copernicus.eu/APIs/SentinelHub/Data/DEM.html -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: DEM data users -- **Research Boundary Match**: ✅ Full match -- **Summary**: Free 30m DEM (GLO-30) covering Ukraine. API access via Sentinel Hub Process API. Registration required. - -## Source #16 -- **Title**: Homography Decomposition Revisited (IJCV 2025) -- **Link**: https://link.springer.com/article/10.1007/s11263-025-02680-4 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Computer vision researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Existing homography decomposition methods can be unstable in certain configurations. Proposes hybrid framework for improved stability. - -## Source #17 -- **Title**: Sliding window factor graph optimization for visual/inertial navigation (Cambridge 2020) -- **Link**: https://www.cambridge.org/core/services/aop-cambridge-core/content/view/523C7C41D18A8D7C159C59235DF502D0/ -- **Tier**: L1 -- **Publication Date**: 2020 -- **Timeliness Status**: ✅ Currently valid (foundational method) -- **Target Audience**: Navigation system designers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Sliding-window factor graph optimization combines accuracy of graph optimization with efficiency of windowed approach. Superior to separate filtering or full batch optimization. - -## Source #18 -- **Title**: SuperPoint feature extraction and matching benchmarks -- **Link**: https://preview-www.nature.com/articles/s41598-024-59626-y/tables/3 -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Feature matching benchmarking -- **Research Boundary Match**: ✅ Full match -- **Summary**: SuperPoint+LightGlue: ~0.36±0.06s per image pair for extraction+matching on GPU. Competitive accuracy for satellite stereo scenarios. - -## Source #19 -- **Title**: DINOv2-Based UAV Visual Self-Localization in Low-Altitude Urban Environments -- **Link**: https://ui.adsabs.harvard.edu/abs/2025IRAL...10.2080Y/ -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV visual localization researchers -- **Research Boundary Match**: ⚠️ Partial overlap (urban, not steppe) -- **Summary**: DINOv2-based method achieves 86.27 R@1 on DenseUAV benchmark for cross-view matching. Integrates global-local feature enhancement. - -## Source #20 -- **Title**: Mapbox Satellite Tiles and Pricing -- **Link**: https://docs.mapbox.com/data/tilesets/reference/mapbox-satellite/ / https://mapbox.com/pricing -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Map tile consumers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Mapbox offers satellite tiles up to 0.3m resolution (zoom 16+). 200,000 free vector tile requests/month. Unlimited offline downloads on pay-as-you-go. Multi-provider imagery (Maxar, Landsat, Sentinel). diff --git a/_docs/00_research/gps_denied_nav_assessment/02_fact_cards.md b/_docs/00_research/gps_denied_nav_assessment/02_fact_cards.md deleted file mode 100644 index cee4ae8..0000000 --- a/_docs/00_research/gps_denied_nav_assessment/02_fact_cards.md +++ /dev/null @@ -1,161 +0,0 @@ -# Fact Cards — Solution Assessment (Mode B) - -## Fact #1 -- **Statement**: LightGlue (with SuperPoint/DISK descriptors) is NOT rotation-invariant. Image pairs with 90° or 180° rotation produce very few or zero matches. Manual image rectification is required before matching. -- **Source**: Source #7 (LightGlue GitHub Issue #64) -- **Phase**: Assessment -- **Target Audience**: UAV systems with non-stabilized cameras -- **Confidence**: ✅ High (confirmed by LightGlue maintainers) -- **Related Dimension**: Cross-view matching robustness, camera rotation handling - -## Fact #2 -- **Statement**: LightGlue lacks explicit training on unmatchable image pairs. When given non-overlapping views (e.g., after sharp turn), it may return semantically correct but geometrically meaningless matches instead of correctly rejecting the pair. -- **Source**: Source #8 (LightGlue GitHub Issue #13) -- **Phase**: Assessment -- **Target Audience**: Systems requiring segment detection (VO failure detection) -- **Confidence**: ✅ High (confirmed by LightGlue maintainers) -- **Related Dimension**: Segment management, VO failure detection - -## Fact #3 -- **Statement**: SatLoc-Fusion achieves <15m absolute localization error using a three-layer hierarchical approach: DINOv2 for coarse absolute geo-localization, XFeat for high-frequency VO, optical flow for velocity estimation. Runs real-time on 6 TFLOPS edge hardware. -- **Source**: Source #4 (SatLoc-Fusion, Remote Sensing 2025) -- **Phase**: Assessment -- **Target Audience**: GPS-denied UAV systems -- **Confidence**: ✅ High (peer-reviewed, with dataset) -- **Related Dimension**: Architecture, localization accuracy, hierarchical matching - -## Fact #4 -- **Statement**: XFeat is 5x faster than SuperPoint with comparable accuracy. Runs real-time on CPU. Supports both sparse and semi-dense matching. 1500+ GitHub stars, actively maintained. -- **Source**: Source #11 (CVPR 2024) -- **Phase**: Assessment -- **Target Audience**: Real-time feature extraction -- **Confidence**: ✅ High (peer-reviewed, CVPR 2024) -- **Related Dimension**: Processing speed, feature extraction - -## Fact #5 -- **Statement**: SIFT+LightGlue achieves superior spatial consistency and reliability for UAV image mosaicking, including in low-texture and high-rotation conditions. SIFT is rotation-invariant unlike SuperPoint. -- **Source**: Source #2 (ISPRS 2025) -- **Phase**: Assessment -- **Target Audience**: UAV image matching -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: Feature extraction, rotation handling - -## Fact #6 -- **Statement**: SuperPoint+LightGlue extraction+matching takes ~0.36±0.06s per image pair on GPU (unspecified GPU model). This is for standard resolution images, not 6000+ pixel width. -- **Source**: Source #18 -- **Phase**: Assessment -- **Target Audience**: Performance planning -- **Confidence**: ⚠️ Medium (GPU model not specified, may not be RTX 2060) -- **Related Dimension**: Processing time - -## Fact #7 -- **Statement**: LightGlue ONNX/TensorRT achieves 2-4x speedup over compiled PyTorch. FP8 quantization adds 6x more but requires Ada Lovelace or newer GPUs. RTX 2060 (Turing) does NOT support FP8 — limited to FP16/INT8 acceleration. -- **Source**: Source #5, #6 (LightGlue-ONNX blog and repo) -- **Phase**: Assessment -- **Target Audience**: RTX 2060 deployment -- **Confidence**: ✅ High (benchmarked by repo maintainer) -- **Related Dimension**: Processing time, hardware constraints - -## Fact #8 -- **Statement**: YFS90 achieves <7m MAE using terrain-weighted constraint optimization + 2D-3D geo-registration with DEM data. Validated across 20 complex scenarios including plains, hilly terrain, urban/rural. Works with publicly available satellite maps and DEM data. Re-localization capability after failures. -- **Source**: Source #9 (YFS90 GitHub) -- **Phase**: Assessment -- **Target Audience**: GPS-denied UAV navigation -- **Confidence**: ✅ High (peer-reviewed, open source, 69★) -- **Related Dimension**: Optimization approach, DEM integration, accuracy - -## Fact #9 -- **Statement**: Google Maps $200/month free credit expired February 28, 2025. Current free tier is 100,000 tile requests/month. Rate limits: 6,000 requests/min, 15,000 requests/day for 2D tiles. -- **Source**: Source #13 (Google Maps official docs) -- **Phase**: Assessment -- **Target Audience**: Cost planning -- **Confidence**: ✅ High (official documentation) -- **Related Dimension**: Cost, satellite imagery access - -## Fact #10 -- **Statement**: Google Maps satellite imagery for eastern Ukraine is likely updated only every 3-5+ years due to: conflict zone (lower priority), geopolitical challenges, limited user demand. This may not meet the AC requirement of "less than 2 years old." -- **Source**: Multiple web sources on Google Maps update frequency -- **Phase**: Assessment -- **Target Audience**: Satellite imagery reliability -- **Confidence**: ⚠️ Medium (general guidelines, not Ukraine-specific confirmation) -- **Related Dimension**: Satellite imagery reliability - -## Fact #11 -- **Statement**: Mapbox Satellite offers imagery up to 0.3m resolution at zoom 16+, sourced from Maxar, Landsat, Sentinel. 200,000 free vector tile requests/month. Unlimited offline downloads on pay-as-you-go. Potentially more diverse and recent imagery for Ukraine than Google Maps alone. -- **Source**: Source #20 (Mapbox docs) -- **Phase**: Assessment -- **Target Audience**: Alternative satellite providers -- **Confidence**: ✅ High (official documentation) -- **Related Dimension**: Satellite imagery reliability, cost - -## Fact #12 -- **Statement**: Copernicus DEM GLO-30 provides free 30m resolution global elevation data including Ukraine. Accessible via Sentinel Hub API. Can be used for terrain-weighted optimization like YFS90. -- **Source**: Source #15 (Copernicus docs) -- **Phase**: Assessment -- **Target Audience**: DEM integration -- **Confidence**: ✅ High (official documentation) -- **Related Dimension**: Position optimizer, terrain constraints - -## Fact #13 -- **Statement**: GTSAM v4.2 (stable) provides Python bindings with GPSFactor for absolute position constraints and iSAM2 for incremental optimization. Can model VO constraints, satellite anchor constraints, and drift limits in a unified factor graph. -- **Source**: Source #14 (GTSAM docs) -- **Phase**: Assessment -- **Target Audience**: Optimizer design -- **Confidence**: ✅ High (widely used in robotics) -- **Related Dimension**: Position optimizer - -## Fact #14 -- **Statement**: DALGlue achieves 11.8% MMA improvement over LightGlue on MegaDepth benchmark. Specifically designed for UAV visual navigation with wavelet transform preprocessing for handling dynamic flight blur. -- **Source**: Source #10 (Scientific Reports 2025) -- **Phase**: Assessment -- **Target Audience**: Feature matching selection -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: Feature matching - -## Fact #15 -- **Statement**: The oblique-robust AVL method (IEEE TGRS 2024) uses SE(2)-steerable networks for rotation-equivariant features. Handles drastic perspective changes and non-perpendicular camera angles for UAV-to-satellite matching. No retraining needed for new scenes. -- **Source**: Source #12 (IEEE TGRS 2024) -- **Phase**: Assessment -- **Target Audience**: Cross-view matching -- **Confidence**: ✅ High (peer-reviewed, IEEE) -- **Related Dimension**: Cross-view matching, rotation handling - -## Fact #16 -- **Statement**: Homography decomposition can be unstable in certain configurations (2025 IJCV study). Non-planar objects (buildings, trees) violate planar assumption. For aerial images, dominant ground plane exists but RANSAC inlier ratio drops with non-planar content. -- **Source**: Source #16 (IJCV 2025) -- **Phase**: Assessment -- **Target Audience**: VO design -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: VO robustness - -## Fact #17 -- **Statement**: Sliding-window factor graph optimization combines the accuracy of full graph optimization with the efficiency of windowed processing. Superior to either pure filtering or full batch optimization for real-time navigation. -- **Source**: Source #17 (Cambridge 2020) -- **Phase**: Assessment -- **Target Audience**: Optimizer design -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: Position optimizer - -## Fact #18 -- **Statement**: SuperPoint is a fully-convolutional model — GPU memory scales linearly with image resolution. 6252×4168 input would require significant VRAM. Standard practice is to downscale to 1024-2048 long edge for feature extraction. -- **Source**: Source #18, SuperPoint docs -- **Phase**: Assessment -- **Target Audience**: Memory management -- **Confidence**: ✅ High (architectural fact) -- **Related Dimension**: Memory management, processing pipeline - -## Fact #19 -- **Statement**: For GPS-denied UAV localization, hierarchical coarse-to-fine approaches (image retrieval → local feature matching) are state-of-the-art. Direct local feature matching alone fails when the search area is too large or viewpoint difference is too high. -- **Source**: Source #3, #4, #12 (CEUSP, SatLoc, Oblique-robust AVL) -- **Phase**: Assessment -- **Target Audience**: Architecture design -- **Confidence**: ✅ High (consensus across multiple papers) -- **Related Dimension**: Architecture, satellite matching - -## Fact #20 -- **Statement**: Google Maps Tiles API daily rate limit of 15,000 requests would be hit when processing a 3000-image flight requiring ~2000 satellite tiles plus expansion tiles. Need to either pre-cache or use the per-minute limit (6,000/min) strategically across multiple days. -- **Source**: Source #13 (Google Maps docs) -- **Phase**: Assessment -- **Target Audience**: System design -- **Confidence**: ✅ High (official rate limits) -- **Related Dimension**: Satellite tile management, rate limiting diff --git a/_docs/00_research/gps_denied_nav_assessment/03_comparison_framework.md b/_docs/00_research/gps_denied_nav_assessment/03_comparison_framework.md deleted file mode 100644 index 70a5059..0000000 --- a/_docs/00_research/gps_denied_nav_assessment/03_comparison_framework.md +++ /dev/null @@ -1,79 +0,0 @@ -# Comparison Framework — Solution Assessment (Mode B) - -## Selected Framework Type -Problem Diagnosis + Decision Support - -## Identified Weak Points and Assessment Dimensions - -### Dimension 1: Cross-View Matching Strategy (UAV→Satellite) - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| Strategy | Direct SuperPoint+LightGlue matching with perspective warping | No coarse localization stage. Fails when VO drift is large. LightGlue not rotation-invariant. | Hierarchical: DINOv2/global retrieval → SuperPoint+LightGlue refinement | Fact #1, #2, #15, #19 | -| Rotation handling | Not addressed | Non-stabilized camera = rotated images. SuperPoint/LightGlue fail at 90°/180° | Image rectification via VO-estimated heading, or rotation-invariant features (SIFT for fallback) | Fact #1, #5 | -| Domain gap | Perspective warping only | Insufficient for seasonal/illumination/resolution differences | Multi-scale matching, DINOv2 for semantic retrieval, warping + matched features | Fact #3, #15 | - -### Dimension 2: Feature Extraction & Matching - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| VO features | SuperPoint (~80ms) | Adequate but not optimized for speed | XFeat (5x faster, CPU-capable) for VO; keep SuperPoint for satellite matching | Fact #4 | -| Matching | LightGlue | Good baseline. DALGlue 11.8% better MMA. | LightGlue with ONNX optimization as primary. DALGlue for evaluation. | Fact #7, #14 | -| Non-match detection | Not addressed | LightGlue returns false matches on non-overlapping pairs | Inlier ratio + match count threshold + geometric consistency check | Fact #2 | - -### Dimension 3: Visual Odometry Robustness - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| Geometric model | Homography (planar assumption) | Unstable for non-planar objects. Decomposition instability in certain configs. | Homography with RANSAC + high inlier ratio requirement. Essential matrix as fallback. | Fact #16 | -| Scale estimation | GSD from altitude | Valid if altitude is constant. Terrain elevation changes not accounted for. | Integrate Copernicus DEM for terrain-corrected GSD | Fact #12 | -| Camera rotation | Not addressed | Non-stabilized camera introduces roll/pitch | Estimate rotation from VO, apply rectification before satellite matching | Fact #1, #5 | - -### Dimension 4: Position Optimizer - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| Algorithm | scipy.optimize sliding window | Generic optimizer, no proper uncertainty modeling, no factor types | GTSAM factor graph with iSAM2 incremental optimization | Fact #13, #17 | -| Terrain constraints | Not used | YFS90 achieves <7m with terrain weighting | Integrate DEM-based terrain constraints via Copernicus DEM | Fact #8, #12 | -| Drift modeling | Max 100m between anchors | Single hard constraint, no probabilistic modeling | Per-VO-step uncertainty based on inlier ratio, propagated through factor graph | Fact #17 | - -### Dimension 5: Satellite Imagery Reliability - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| Provider | Google Maps only | Eastern Ukraine: 3-5 year update cycle. $200 credit expired. 15K/day rate limit. | Multi-provider: Google Maps primary + Mapbox fallback + pre-cached tiles | Fact #9, #10, #11, #20 | -| Freshness | Assumed adequate | May not meet AC "< 2 years old" for conflict zone | Provider selection per-area. User can provide custom imagery. | Fact #10 | -| Rate limiting | Not addressed | 15,000/day cap could block large flights | Progressive download with request budgeting. Pre-cache for known areas. | Fact #20 | - -### Dimension 6: Processing Time Budget - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| Target | <5s (claim <2s) | Per-frame pipeline: VO match + satellite match + optimization. Total could exceed budget. | XFeat for VO (~20ms). LightGlue ONNX for satellite (~100ms). Async satellite matching. | Fact #4, #6, #7 | -| Image downscaling | Not specified | 6252×4168 cannot be processed at full resolution | Downscale to 1600 long edge for features. Keep full resolution for GSD calculation. | Fact #18 | -| Parallelism | Not specified | Sequential pipeline wastes GPU idle time | Async: extract features while satellite tile downloads. Pipeline overlap. | — | - -### Dimension 7: Memory Management - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| Image loading | Not specified | 6252×4168 × 3ch = 78MB per raw image. 3000 images = 234GB. | Stream images one at a time. Keep only current + previous features in memory. | Fact #18 | -| VRAM budget | Not specified | SuperPoint on full resolution could exceed 6GB VRAM | Downscale images. Batch size 1. Clear GPU cache between frames. | Fact #18 | -| Feature storage | Not specified | 3000 images × features = significant RAM | Store only features needed for sliding window. Disk-backed for older frames. | — | - -### Dimension 8: Security - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| Authentication | API key mentioned | No implementation details. API key in query params = insecure. | JWT tokens for session auth. Short-lived tokens for SSE connections. | SSE security research | -| Path traversal | Mentioned in testing | image_folder parameter could be exploited | Whitelist base directories. Validate path doesn't escape allowed root. | — | -| DoS protection | Not addressed | Large image uploads, SSE connection exhaustion | Max file size limits. Connection pool limits. Request rate limiting. | — | -| API key storage | env var mentioned | Adequate baseline | .env file + secrets manager in production. Never log API keys. | — | - -### Dimension 9: Segment Management - -| Aspect | Draft01 Approach | Identified Problem | Alternative | Factual Basis | -|--------|-----------------|-------------------|------------|---------------| -| Re-connection | Via satellite anchoring only | If satellite matching fails, segment stays floating | Attempt cross-segment matching when new anchors arrive. DEM-based constraint stitching. | Fact #8 | -| Multi-segment handling | Described conceptually | No detail on how >2 segments are managed | Explicit segment graph with pending connections. Priority queue for unresolved segments. | — | -| User input fallback | POST /jobs/{id}/anchor | Good design. Needs timeout/escalation for when user doesn't respond. | Add configurable timeout before continuing with VO-only estimate. | — | diff --git a/_docs/00_research/gps_denied_nav_assessment/04_reasoning_chain.md b/_docs/00_research/gps_denied_nav_assessment/04_reasoning_chain.md deleted file mode 100644 index 9e04359..0000000 --- a/_docs/00_research/gps_denied_nav_assessment/04_reasoning_chain.md +++ /dev/null @@ -1,145 +0,0 @@ -# Reasoning Chain — Solution Assessment (Mode B) - -## Dimension 1: Cross-View Matching Strategy - -### Fact Confirmation -According to Fact #1, LightGlue is not rotation-invariant and fails on rotated images. According to Fact #2, it returns false matches on non-overlapping pairs. According to Fact #19, state-of-the-art GPS-denied localization uses hierarchical coarse-to-fine approaches. SatLoc-Fusion (Fact #3) achieves <15m with DINOv2 + XFeat + optical flow. - -### Reference Comparison -Draft01 uses direct SuperPoint+LightGlue matching with perspective warping. This is a single-stage approach — it assumes the VO-estimated position is close enough to fetch the right satellite tile, then matches directly. But: (a) when VO drift accumulates between satellite anchors, the estimated position may be wrong enough to fetch the wrong tile; (b) the domain gap between UAV oblique images and satellite nadir is significant; (c) rotation from non-stabilized camera is not handled. - -State-of-the-art approaches add a coarse localization stage (DINOv2 image retrieval over a wider area) before fine matching. This makes satellite matching robust to larger VO drift. - -### Conclusion -**Replace single-stage with two-stage satellite matching**: (1) DINOv2-based coarse retrieval over a search area (e.g., 500m radius around VO estimate) to find the best-matching satellite tile, (2) SuperPoint+LightGlue for precise alignment on the selected tile. Add image rotation normalization before matching. This is the most critical improvement. - -### Confidence -✅ High — multiple independent sources confirm hierarchical approach superiority. - ---- - -## Dimension 2: Feature Extraction & Matching - -### Fact Confirmation -According to Fact #4, XFeat is 5x faster than SuperPoint with comparable accuracy and is used in SatLoc-Fusion for real-time VO. According to Fact #5, SIFT+LightGlue is more robust for high-rotation conditions. According to Fact #14, DALGlue improves LightGlue MMA by 11.8% for UAV scenarios. - -### Reference Comparison -Draft01 uses SuperPoint for all feature extraction (both VO and satellite matching). This is simpler (unified pipeline) but suboptimal: VO needs speed (processed every frame), while satellite matching needs accuracy (processed periodically). - -### Conclusion -**Dual-extractor strategy**: XFeat for VO (fast, adequate accuracy for frame-to-frame), SuperPoint for satellite matching (higher accuracy needed for cross-view). LightGlue with ONNX/TensorRT optimization as matcher. SIFT as fallback for rotation-heavy scenarios. DALGlue is promising but too new for production — monitor. - -### Confidence -✅ High — XFeat benchmarks are from CVPR 2024, well-established. - ---- - -## Dimension 3: Visual Odometry Robustness - -### Fact Confirmation -According to Fact #16, homography decomposition can be unstable and non-planar objects degrade results. According to Fact #12, Copernicus DEM provides free 30m elevation data for terrain-corrected GSD. - -### Reference Comparison -Draft01's homography-based VO is valid for flat terrain but doesn't account for: (a) terrain elevation changes affecting GSD calculation, (b) non-planar objects in the scene, (c) camera roll/pitch from non-stabilized mount. The terrain in eastern Ukraine is mostly steppe but has settlements, forests, and infrastructure. - -### Conclusion -**Keep homography VO as primary** (valid for dominant ground plane), but: (1) add RANSAC inlier ratio check — if below threshold, fall back to essential matrix estimation; (2) integrate Copernicus DEM for terrain-corrected altitude in GSD calculation; (3) estimate and track camera rotation (roll/pitch/yaw) from consecutive VO estimates and use it for image rectification before satellite matching. - -### Confidence -✅ High — homography with RANSAC and fallback is well-established. - ---- - -## Dimension 4: Position Optimizer - -### Fact Confirmation -According to Fact #13, GTSAM provides Python bindings with GPSFactor and iSAM2 incremental optimization. According to Fact #17, sliding-window factor graph optimization is superior to either pure filtering or full batch optimization. According to Fact #8, YFS90 achieves <7m MAE with terrain-weighted constraints + DEM. - -### Reference Comparison -Draft01 proposes scipy.optimize with a custom sliding window. While functional, this is reinventing the wheel — GTSAM's iSAM2 already implements incremental smoothing with proper uncertainty propagation. GTSAM's factor graph naturally supports: BetweenFactor for VO constraints (with uncertainty), GPSFactor for satellite anchors, custom factors for terrain constraints, drift limit constraints. - -### Conclusion -**Replace scipy.optimize with GTSAM iSAM2 factor graph**. Use BetweenFactor for VO relative motion, GPSFactor for satellite anchors (with uncertainty based on match quality), and a custom terrain factor using Copernicus DEM. This provides: proper uncertainty propagation, incremental updates (fits SSE streaming), backwards smoothing when new anchors arrive. - -### Confidence -✅ High — GTSAM is production-proven, stable v4.2 available via pip. - ---- - -## Dimension 5: Satellite Imagery Reliability - -### Fact Confirmation -According to Fact #9, Google Maps $200/month free credit expired Feb 2025. Current free tier is 100K tiles/month. According to Fact #10, eastern Ukraine imagery may be 3-5+ years old. According to Fact #20, 15,000/day rate limit could be hit on large flights. According to Fact #11, Mapbox offers alternative satellite tiles at comparable resolution. - -### Reference Comparison -Draft01 relies solely on Google Maps. Single-provider dependency creates multiple risk points: outdated imagery, rate limits, cost, API changes. - -### Conclusion -**Multi-provider satellite tile manager**: Google Maps as primary, Mapbox as secondary, user-provided tiles as override. Implement: provider fallback when matching confidence is low, request budgeting to stay within rate limits, tile freshness metadata logging, pre-caching mode for known operational areas. - -### Confidence -✅ High — multi-provider is standard practice for production systems. - ---- - -## Dimension 6: Processing Time Budget - -### Fact Confirmation -According to Fact #6, SuperPoint+LightGlue takes ~0.36s per pair on GPU. According to Fact #7, ONNX optimization adds 2-4x speedup (on RTX 2060, limited to FP16). According to Fact #4, XFeat is 5x faster than SuperPoint for VO. - -### Reference Comparison -Draft01's per-frame pipeline: (1) feature extraction, (2) VO matching, (3) satellite tile fetch, (4) satellite matching, (5) optimization, (6) SSE emit. Total estimated without optimization: ~1-2s for VO + ~0.5-1s for satellite + overhead = 2-4s. With ONNX optimization for matching and XFeat for VO, this drops to ~0.5-1.5s. - -### Conclusion -**Budget is achievable with optimizations**: XFeat for VO (~20ms extraction + ~50ms matching), LightGlue ONNX for satellite (~100ms extraction + ~100ms matching), async satellite tile download (overlapped with VO), GTSAM incremental update (~10ms). Total: ~0.5-1s per frame. Satellite matching can be async — not every frame needs satellite match. Image downscaling to 1600 long edge is essential. - -### Confidence -⚠️ Medium — depends on actual RTX 2060 benchmarks, which are extrapolated from general numbers. - ---- - -## Dimension 7: Memory Management - -### Fact Confirmation -According to Fact #18, SuperPoint is fully-convolutional and VRAM scales with resolution. 6252×4168 images would require significant VRAM and RAM. - -### Reference Comparison -Draft01 doesn't specify memory management. With 3000 images at max resolution, naive processing would exceed 16GB RAM. - -### Conclusion -**Strict memory management**: (1) Downscale all images to max 1600 long edge before feature extraction; (2) stream images one at a time — only keep current + previous frame features in GPU memory; (3) store features for sliding window in CPU RAM, older features to disk; (4) limit satellite tile cache to 500MB in RAM, overflow to disk; (5) batch size 1 for all GPU operations; (6) explicit torch.cuda.empty_cache() between frames if VRAM pressure detected. - -### Confidence -✅ High — standard memory management patterns. - ---- - -## Dimension 8: Security - -### Fact Confirmation -JWT tokens are recommended for SSE endpoint security. API keys in query parameters are insecure (persist in logs, browser history). - -### Reference Comparison -Draft01 mentions API key auth but no implementation details. SSE connections need proper authentication and resource limits. - -### Conclusion -**Security improvements**: (1) JWT-based authentication for all endpoints; (2) short-lived tokens for SSE connections; (3) image folder whitelist (not just path traversal prevention — explicit whitelist of allowed base directories); (4) max concurrent SSE connections per client; (5) request rate limiting; (6) max image size validation; (7) all API keys in environment variables, never logged. - -### Confidence -✅ High — standard security practices. - ---- - -## Dimension 9: Segment Management - -### Fact Confirmation -According to Fact #8, YFS90 has re-localization capability after positioning failures. According to Fact #2, LightGlue may return false matches on non-overlapping pairs. - -### Reference Comparison -Draft01's segment management relies on satellite matching to anchor each segment independently. If satellite matching fails, the segment stays "floating." No mechanism for cross-segment matching or delayed resolution. - -### Conclusion -**Enhanced segment management**: (1) Explicit VO failure detection using match count + inlier ratio + geometric consistency (not just match count); (2) when a new segment gets satellite-anchored, attempt to connect to nearby floating segments using satellite-based position proximity; (3) DEM-based constraint: position must be consistent with terrain elevation; (4) configurable timeout for user input request — if no response within N frames, continue with best estimate and flag. - -### Confidence -⚠️ Medium — cross-segment connection is logical but needs careful implementation to avoid false connections. diff --git a/_docs/00_research/gps_denied_nav_assessment/05_validation_log.md b/_docs/00_research/gps_denied_nav_assessment/05_validation_log.md deleted file mode 100644 index 8f757ce..0000000 --- a/_docs/00_research/gps_denied_nav_assessment/05_validation_log.md +++ /dev/null @@ -1,93 +0,0 @@ -# Validation Log — Solution Assessment (Mode B) - -## Validation Scenario 1: Normal flight over steppe with gradual turns - -**Scenario**: 1000-image flight over flat agricultural steppe. FullHD resolution. Starting GPS known. Gradual turns every 200 frames. Satellite imagery 2 years old. - -**Expected with Draft02 improvements**: -1. XFeat VO processes frames at ~70ms each → well under 5s budget -2. DINOv2 coarse retrieval finds correct satellite area despite 50-100m VO drift -3. SuperPoint+LightGlue ONNX refines position to ~10-20m accuracy -4. GTSAM iSAM2 smooths trajectory, reduces drift between anchors -5. At gradual turns, VO continues working (overlap >30%) -6. Processing stays under 1GB VRAM with 1600px downscale - -**Actual validation result**: Consistent with expectations. This is the "happy path" — both draft01 and draft02 would work. Draft02 advantage: faster processing, better optimizer. - -## Validation Scenario 2: Sharp turn with no overlap - -**Scenario**: After 500 normal frames, UAV makes a 90° sharp turn. Next 3 images have zero overlap with previous route. Then normal flight continues. - -**Expected with Draft02 improvements**: -1. VO detects failure: match count drops below threshold → segment break -2. LightGlue false-match protection: geometric consistency check rejects bad matches -3. New segment starts. DINOv2 coarse retrieval searches wider area for satellite match -4. If satellite match succeeds: new segment anchored, connected to previous via shared coordinate frame -5. If satellite match fails: segment marked floating, user input requested (with timeout) -6. After turn, if UAV returns near previous route, cross-segment connection attempted - -**Draft01 comparison**: Draft01 would also detect VO failure and create new segment, but lacks coarse retrieval → satellite matching depends entirely on VO estimate which may be wrong after turn. Higher risk of satellite match failure. - -## Validation Scenario 3: High-resolution images (6252×4168) - -**Scenario**: 500 images at full 6252×4168 resolution. RTX 2060 (6GB VRAM). - -**Expected with Draft02 improvements**: -1. Images downscaled to 1600×1066 for feature extraction -2. Full resolution preserved for GSD calculation only -3. Per-frame VRAM: ~1.5GB for XFeat/SuperPoint + LightGlue -4. RAM per frame: ~78MB raw + ~5MB features → manageable with streaming -5. Total peak RAM: sliding window (50 frames × 5MB features) + satellite cache (500MB) + overhead ≈ 1.5GB pipeline -6. Well within 16GB RAM budget - -**Actual validation result**: Consistent. Downscaling strategy is essential and was missing from draft01. - -## Validation Scenario 4: Outdated satellite imagery - -**Scenario**: Flight over area where Google Maps imagery is 4 years old. Significant changes: new buildings, removed forests, changed roads. - -**Expected with Draft02 improvements**: -1. DINOv2 coarse retrieval: partial success (terrain structure still recognizable) -2. SuperPoint+LightGlue fine matching: lower match count on changed areas -3. Confidence score drops for affected frames → flagged in output -4. Multi-provider fallback: try Mapbox tiles if Google matches are poor -5. System falls back to VO-only for sections with no good satellite match -6. User can provide custom satellite imagery for specific areas - -**Draft01 comparison**: Draft01 would also fail on changed areas but has no alternative provider and no coarse retrieval to help. - -## Validation Scenario 5: 3000-image flight hitting API rate limits - -**Scenario**: First flight in a new area. No cached tiles. 3000 images need ~2000 satellite tiles. - -**Expected with Draft02 improvements**: -1. Initial download: 300 tiles around starting GPS (within rate limits) -2. Progressive download as route extends: 5-20 tiles per frame -3. Daily limit (15,000): sufficient for tiles but tight if multiple flights -4. Request budgeting: prioritize tiles around current position, defer expansion -5. Per-minute limit (6,000): no issue -6. Monthly limit (100,000): covers ~50 flights at 2000 tiles each -7. Mapbox fallback if Google budget exhausted - -**Draft01 comparison**: Draft01 assumed $200 free credit (expired). Rate limit analysis was incorrect. - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions actionable/verifiable -- [x] All scenarios plausible for the operational context - -## Counterexamples -- **Night flight**: Not addressed (out of scope — restriction says "mostly sunny weather") -- **Very low altitude (<100m)**: Satellite matching would have poor GSD match — not addressed but within restrictions (altitude ≤1km) -- **Urban area with tall buildings**: Homography VO degradation — mitigated by essential matrix fallback but not fully addressed - -## Conclusions Requiring No Revision -All conclusions validated against scenarios. Key improvements are well-supported: -1. Hierarchical satellite matching (coarse + fine) -2. GTSAM factor graph optimization -3. Multi-provider satellite tiles -4. XFeat for VO speed -5. Image downscaling for memory -6. Proper security (JWT, rate limiting) diff --git a/_docs/00_research/gps_denied_nav_v2/00_question_decomposition.md b/_docs/00_research/gps_denied_nav_v2/00_question_decomposition.md deleted file mode 100644 index 49ae6be..0000000 --- a/_docs/00_research/gps_denied_nav_v2/00_question_decomposition.md +++ /dev/null @@ -1,56 +0,0 @@ -# Question Decomposition - -## Original Question -Assess current solution draft. Additionally: -1. Try SuperPoint + LightGlue for visual odometry -2. Can LiteSAM be SO SLOW because of big images? If we reduce size to 1280p, would that work faster? - -## Active Mode -Mode B: Solution Assessment — `solution_draft01.md` exists in OUTPUT_DIR. - -## Question Type -Problem Diagnosis + Decision Support - -## Research Subject Boundary -- **Population**: GPS-denied UAV navigation systems on edge hardware -- **Geography**: Eastern Ukraine conflict zone -- **Timeframe**: Current (2025-2026), using latest available tools -- **Level**: Jetson Orin Nano Super (8GB, 67 TOPS) — edge deployment - -## Decomposed Sub-Questions - -### Q1: SuperPoint + LightGlue for Visual Odometry -- What is SP+LG inference speed on Jetson-class hardware? -- How does it compare to cuVSLAM (116fps on Orin Nano)? -- Is SP+LG suitable for frame-to-frame VO at 3fps? -- What is SP+LG accuracy vs cuVSLAM for VO? - -### Q2: LiteSAM Speed vs Image Resolution -- What resolution was LiteSAM benchmarked at? (1184px on AGX Orin) -- How does LiteSAM speed scale with resolution? -- What would 1280px achieve on Orin Nano Super vs AGX Orin? -- Is the bottleneck image size or compute power gap? - -### Q3: General Weak Points in solution_draft01 -- Are there functional weak points? -- Are there performance bottlenecks? -- Are there security gaps? - -### Q4: SP+LG for Satellite Matching (alternative to LiteSAM/XFeat) -- How does SP+LG perform on cross-view satellite-aerial matching? -- What does the LiteSAM paper say about SP+LG accuracy? - -## Timeliness Sensitivity Assessment -- **Research Topic**: Edge-deployed visual odometry and satellite-aerial matching -- **Sensitivity Level**: 🟠 High -- **Rationale**: cuVSLAM v15.0.0 released March 2026; LiteSAM published October 2025; LightGlue TensorRT optimizations actively evolving -- **Source Time Window**: 12 months -- **Priority official sources**: - 1. LiteSAM paper (MDPI Remote Sensing, October 2025) - 2. cuVSLAM / PyCuVSLAM v15.0.0 (March 2026) - 3. LightGlue-ONNX / TensorRT benchmarks (2024-2026) - 4. Intermodalics cuVSLAM benchmark (2025) -- **Key version information**: - - cuVSLAM: v15.0.0 (March 2026) - - LightGlue: ICCV 2023, TensorRT via fabio-sim/LightGlue-ONNX - - LiteSAM: Published October 2025, code at boyagesmile/LiteSAM diff --git a/_docs/00_research/gps_denied_nav_v2/01_source_registry.md b/_docs/00_research/gps_denied_nav_v2/01_source_registry.md deleted file mode 100644 index 516a5ca..0000000 --- a/_docs/00_research/gps_denied_nav_v2/01_source_registry.md +++ /dev/null @@ -1,121 +0,0 @@ -# Source Registry - -## Source #1 -- **Title**: LiteSAM: Lightweight and Robust Feature Matching for Satellite and Aerial Imagery -- **Link**: https://www.mdpi.com/2072-4292/17/19/3349 -- **Tier**: L1 -- **Publication Date**: 2025-10-01 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: LiteSAM v1.0; benchmarked on Jetson AGX Orin (JetPack 5.x era) -- **Target Audience**: UAV visual localization researchers and edge deployers -- **Research Boundary Match**: ✅ Full match -- **Summary**: LiteSAM (opt) achieves 497.49ms on Jetson AGX Orin at 1184px input. 6.31M params. RMSE@30 = 17.86m on UAV-VisLoc. Paper directly compares with SP+LG, stating "SP+LG achieves the fastest inference speed but at the expense of accuracy." Section 4.9 shows resolution vs speed tradeoff on RTX 3090Ti. -- **Related Sub-question**: Q2 (LiteSAM speed), Q4 (SP+LG for satellite matching) - -## Source #2 -- **Title**: cuVSLAM: CUDA accelerated visual odometry and mapping -- **Link**: https://arxiv.org/abs/2506.04359 -- **Tier**: L1 -- **Publication Date**: 2025-06 (paper), v15.0.0 released 2026-03-10 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: cuVSLAM v15.0.0 / PyCuVSLAM v15.0.0 -- **Target Audience**: Robotics/UAV visual odometry on NVIDIA Jetson -- **Research Boundary Match**: ✅ Full match -- **Summary**: CUDA-accelerated VO+SLAM, supports mono+IMU. 116fps on Jetson Orin Nano 8GB at 720p. <1% trajectory error on KITTI. <5cm on EuRoC. -- **Related Sub-question**: Q1 (SP+LG vs cuVSLAM) - -## Source #3 -- **Title**: Intermodalics — NVIDIA Isaac ROS In-Depth: cuVSLAM and the DP3.1 Release -- **Link**: https://www.intermodalics.ai/blog/nvidia-isaac-ros-in-depth-cuvslam-and-the-dp3-1-release -- **Tier**: L2 -- **Publication Date**: 2025 (DP3.1 release) -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: cuVSLAM v11 (DP3.1), benchmark data applicable to later versions -- **Target Audience**: Robotics developers using Isaac ROS -- **Research Boundary Match**: ✅ Full match -- **Summary**: 116fps on Orin Nano 8GB, 232fps on AGX Orin, 386fps on RTX 4060 Ti. Outperforms ORB-SLAM2 on KITTI. -- **Related Sub-question**: Q1 - -## Source #4 -- **Title**: Accelerating LightGlue Inference with ONNX Runtime and TensorRT -- **Link**: https://fabio-sim.github.io/blog/accelerating-lightglue-inference-onnx-runtime-tensorrt/ -- **Tier**: L2 -- **Publication Date**: 2024-07-17 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: torch 2.4.0, TensorRT 10.2.0, RTX 4080 benchmarks -- **Target Audience**: ML engineers deploying LightGlue -- **Research Boundary Match**: ⚠️ Partial (desktop GPU, not Jetson) -- **Summary**: TensorRT achieves 2-4x speedup over compiled PyTorch for SuperPoint+LightGlue. Full pipeline benchmarks on RTX 4080. TensorRT has 3840 keypoint limit. No Jetson-specific benchmarks provided. -- **Related Sub-question**: Q1 - -## Source #5 -- **Title**: LightGlue-with-FlashAttentionV2-TensorRT (Jetson Orin NX 8GB) -- **Link**: https://github.com/qdLMF/LightGlue-with-FlashAttentionV2-TensorRT -- **Tier**: L4 -- **Publication Date**: 2025-02 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: TensorRT 8.5.2, Jetson Orin NX 8GB -- **Target Audience**: Edge ML deployers -- **Research Boundary Match**: ✅ Full match (similar hardware) -- **Summary**: CUTLASS-based FlashAttention V2 TensorRT plugin for LightGlue, tested on Jetson Orin NX 8GB. No published latency numbers, but confirms LightGlue TensorRT deployment on Orin-class hardware is feasible. -- **Related Sub-question**: Q1 - -## Source #6 -- **Title**: vo_lightglue — Visual Odometry with LightGlue -- **Link**: https://github.com/himadrir/vo_lightglue -- **Tier**: L4 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: N/A -- **Target Audience**: VO researchers -- **Research Boundary Match**: ⚠️ Partial (desktop, KITTI dataset) -- **Summary**: SP+LG achieves 10fps on KITTI dataset (desktop GPU). Odometric error ~1% vs 3.5-4.1% for FLANN-based matching. Much slower than cuVSLAM. -- **Related Sub-question**: Q1 - -## Source #7 -- **Title**: ForestVO: Enhancing Visual Odometry in Forest Environments through ForestGlue -- **Link**: https://arxiv.org/html/2504.01261v1 -- **Tier**: L1 -- **Publication Date**: 2025-04 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: N/A -- **Target Audience**: VO researchers -- **Research Boundary Match**: ⚠️ Partial (forest environment, not nadir UAV) -- **Summary**: SP+LG VO pipeline achieves 1.09m avg relative pose error, KITTI score 2.33%. Uses 512 keypoints (reduced from 2048) to cut compute. Outperforms DSO by 40%. -- **Related Sub-question**: Q1 - -## Source #8 -- **Title**: SuperPoint-SuperGlue-TensorRT (C++ deployment) -- **Link**: https://github.com/yuefanhao/SuperPoint-SuperGlue-TensorRT -- **Tier**: L4 -- **Publication Date**: 2023-2024 -- **Timeliness Status**: ⚠️ Needs verification (SuperGlue, not LightGlue) -- **Version Info**: TensorRT 8.x -- **Target Audience**: Edge deployers -- **Research Boundary Match**: ⚠️ Partial -- **Summary**: SuperPoint TensorRT extraction ~40ms on Jetson for 200 keypoints. C++ implementation. -- **Related Sub-question**: Q1 - -## Source #9 -- **Title**: Comparative Analysis of Advanced Feature Matching Algorithms in HSR Satellite Stereo -- **Link**: https://arxiv.org/abs/2405.06246 -- **Tier**: L1 -- **Publication Date**: 2024-05 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: N/A -- **Target Audience**: Remote sensing researchers -- **Research Boundary Match**: ⚠️ Partial (satellite stereo, not UAV-satellite cross-view) -- **Summary**: SP+LG shows "overall superior performance in balancing robustness, accuracy, distribution, and efficiency" for satellite stereo matching. But this is same-view satellite-satellite, not cross-view UAV-satellite. -- **Related Sub-question**: Q4 - -## Source #10 -- **Title**: PyCuVSLAM with reComputer (Seeed Studio) -- **Link**: https://wiki.seeedstudio.com/pycuvslam_recomputer_robotics/ -- **Tier**: L3 -- **Publication Date**: 2026 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: PyCuVSLAM v15.0.0, JetPack 6.2 -- **Target Audience**: Robotics developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Tutorial for deploying PyCuVSLAM on Jetson Orin NX. Confirms mono+IMU mode, pip install from aarch64 wheel, EuRoC dataset examples. -- **Related Sub-question**: Q1 diff --git a/_docs/00_research/gps_denied_nav_v2/02_fact_cards.md b/_docs/00_research/gps_denied_nav_v2/02_fact_cards.md deleted file mode 100644 index a8b9a6d..0000000 --- a/_docs/00_research/gps_denied_nav_v2/02_fact_cards.md +++ /dev/null @@ -1,122 +0,0 @@ -# Fact Cards - -## Fact #1 -- **Statement**: cuVSLAM achieves 116fps on Jetson Orin Nano 8GB at 720p resolution (~8.6ms/frame). 232fps on AGX Orin. 386fps on RTX 4060 Ti. -- **Source**: [Source #3] Intermodalics benchmark -- **Phase**: Assessment -- **Confidence**: ✅ High -- **Related Dimension**: VO speed comparison - -## Fact #2 -- **Statement**: SuperPoint+LightGlue VO achieves ~10fps on KITTI dataset on desktop GPU (~100ms/frame). With 274 keypoints on RTX 2080Ti, LightGlue matching alone takes 33.9ms. -- **Source**: vo_lightglue, LG issue #36 -- **Confidence**: ⚠️ Medium (desktop GPU, not Jetson) -- **Related Dimension**: VO speed comparison - -## Fact #3 -- **Statement**: SuperPoint feature extraction takes ~40ms on Jetson (TensorRT, 200 keypoints). -- **Source**: SuperPoint-SuperGlue-TensorRT -- **Confidence**: ⚠️ Medium (older Jetson) -- **Related Dimension**: VO speed comparison - -## Fact #4 -- **Statement**: LightGlue TensorRT with FlashAttention V2 has been deployed on Jetson Orin NX 8GB. No published latency numbers. -- **Source**: qdLMF/LightGlue-with-FlashAttentionV2-TensorRT -- **Confidence**: ⚠️ Medium -- **Related Dimension**: VO speed comparison - -## Fact #5 -- **Statement**: LiteSAM (opt) inference: 61.98ms on RTX 3090, 497.49ms on Jetson AGX Orin at 1184px input. 6.31M params. -- **Source**: LiteSAM paper, abstract + Section 4.10 -- **Confidence**: ✅ High -- **Related Dimension**: Satellite matcher speed - -## Fact #6 -- **Statement**: Jetson AGX Orin has 275 TOPS INT8, 2048 CUDA cores. Orin Nano Super has 67 TOPS INT8, 1024 CUDA cores. AGX Orin is ~3-4x more powerful. -- **Source**: NVIDIA official specs -- **Confidence**: ✅ High -- **Related Dimension**: Hardware scaling - -## Fact #7 -- **Statement**: LiteSAM processes at 1/8 scale internally. Coarse matching is O(N²) where N = (H/8 × W/8). For 1184px: ~21,904 tokens. For 1280px: ~25,600. For 480px: ~3,600. -- **Source**: LiteSAM paper, Sections 3.1-3.3 -- **Confidence**: ✅ High -- **Related Dimension**: LiteSAM speed vs resolution - -## Fact #8 -- **Statement**: LiteSAM paper Figure 1 states: "SP+LG achieves the fastest inference speed but at the expense of accuracy" vs LiteSAM on satellite-aerial benchmarks. -- **Source**: LiteSAM paper -- **Confidence**: ✅ High -- **Related Dimension**: SP+LG vs LiteSAM - -## Fact #9 -- **Statement**: LiteSAM achieves RMSE@30 = 17.86m on UAV-VisLoc. SP+LG is worse on same benchmark. -- **Source**: LiteSAM paper -- **Confidence**: ✅ High -- **Related Dimension**: Satellite matcher accuracy - -## Fact #10 -- **Statement**: cuVSLAM uses Shi-Tomasi corners ("Good Features to Track") for keypoint detection, divided into NxM grid patches. Uses Lucas-Kanade optical flow for tracking. When tracked keypoints fall below threshold, creates new keyframe. -- **Source**: cuVSLAM paper (arXiv:2506.04359), Section 2.1 -- **Confidence**: ✅ High -- **Related Dimension**: cuVSLAM on difficult terrain - -## Fact #11 -- **Statement**: cuVSLAM automatically switches to IMU when visual tracking fails (dark lighting, long solid surfaces). IMU integrator provides ~1 second of acceptable tracking. After IMU, constant-velocity integrator provides ~0.5 seconds more. -- **Source**: Isaac ROS cuVSLAM docs -- **Confidence**: ✅ High -- **Related Dimension**: cuVSLAM on difficult terrain - -## Fact #12 -- **Statement**: cuVSLAM does NOT guarantee correct pose recovery after losing track. External algorithms required for global re-localization after tracking loss. Cannot fuse GNSS, wheel odometry, or LiDAR. -- **Source**: Intermodalics blog -- **Confidence**: ✅ High -- **Related Dimension**: cuVSLAM on difficult terrain - -## Fact #13 -- **Statement**: cuVSLAM benchmarked on KITTI (mostly urban/suburban driving) and EuRoC (indoor drone). Neither benchmark includes nadir agricultural terrain, flat fields, or uniform vegetation. No published results for these conditions. -- **Source**: cuVSLAM paper Section 3 -- **Confidence**: ✅ High -- **Related Dimension**: cuVSLAM on difficult terrain - -## Fact #14 -- **Statement**: cuVSLAM multi-stereo mode "significantly improves accuracy and robustness on challenging sequences compared to single stereo cameras", designed for featureless surfaces (narrow corridors, elevators). But our system uses monocular camera only. -- **Source**: cuVSLAM paper Section 2.2.2 -- **Confidence**: ✅ High -- **Related Dimension**: cuVSLAM on difficult terrain - -## Fact #15 -- **Statement**: PFED achieves 97.15% Recall@1 on University-1652 at 251.5 FPS on AGX Orin with only 4.45G FLOPs. But this is image RETRIEVAL (which satellite tile matches), NOT pixel-level correspondence matching. -- **Source**: PFED paper (arXiv:2510.22582) -- **Confidence**: ✅ High -- **Related Dimension**: Satellite matching alternatives - -## Fact #16 -- **Statement**: EfficientLoFTR is ~2.5x faster than LoFTR with higher accuracy. Semi-dense matcher, 15.05M params. Has TensorRT adaptation (LoFTR_TRT). Performs well on weak-texture areas where traditional methods fail. Designed for aerial imagery. -- **Source**: EfficientLoFTR paper (CVPR 2024), HuggingFace docs -- **Confidence**: ✅ High -- **Related Dimension**: Satellite matching alternatives - -## Fact #17 -- **Statement**: Hierarchical AVL system (2025) uses two-stage approach: DINOv2 for coarse retrieval + SuperPoint for fine matching. 64.5-95% success rate on real-world drone trajectories. Includes IMU-based prior correction and sliding-window map updates. -- **Source**: MDPI Remote Sensing 2025 -- **Confidence**: ✅ High -- **Related Dimension**: Satellite matching alternatives - -## Fact #18 -- **Statement**: STHN uses deep homography estimation for UAV geo-localization: directly estimates homography transform (no feature detection/matching/RANSAC). Achieves 4.24m MACE at 50m range. Designed for thermal but architecture is modality-agnostic. -- **Source**: STHN paper (IEEE RA-L 2024) -- **Confidence**: ✅ High -- **Related Dimension**: Satellite matching alternatives - -## Fact #19 -- **Statement**: For our nadir UAV → satellite matching, the cross-view gap is SMALL compared to typical cross-view problems (ground-to-satellite). Both views are approximately top-down. Main challenges: season/lighting, resolution mismatch, temporal changes. This means general-purpose matchers may work better than expected. -- **Source**: Analytical observation -- **Confidence**: ⚠️ Medium -- **Related Dimension**: Satellite matching alternatives - -## Fact #20 -- **Statement**: LiteSAM paper benchmarked EfficientLoFTR (opt) on satellite-aerial: 19.8% slower than LiteSAM (opt) on AGX Orin but with 2.4x more parameters. EfficientLoFTR achieves competitive accuracy. LiteSAM paper Table 3/4 provides direct comparison. -- **Source**: LiteSAM paper, Section 4.5 -- **Confidence**: ✅ High -- **Related Dimension**: EfficientLoFTR vs LiteSAM diff --git a/_docs/00_research/gps_denied_nav_v2/03_comparison_framework.md b/_docs/00_research/gps_denied_nav_v2/03_comparison_framework.md deleted file mode 100644 index 1b43e70..0000000 --- a/_docs/00_research/gps_denied_nav_v2/03_comparison_framework.md +++ /dev/null @@ -1,45 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Decision Support + Problem Diagnosis - -## Selected Dimensions -1. Inference speed on Orin Nano Super -2. Accuracy for the target task -3. Cross-view robustness (satellite-aerial gap) -4. Implementation complexity / ecosystem maturity -5. Memory footprint -6. TensorRT optimization readiness - -## Comparison 1: Visual Odometry — cuVSLAM vs SuperPoint+LightGlue - -| Dimension | cuVSLAM v15.0.0 | SuperPoint + LightGlue (TRT) | Factual Basis | -|-----------|-----------------|-------------------------------|---------------| -| Speed on Orin Nano | ~8.6ms/frame (116fps @ 720p) | Est. ~150-300ms/frame (SP ~40-60ms + LG ~100-200ms) | Fact #1, #2, #3 | -| VO accuracy (KITTI) | <1% trajectory error | ~1% odometric error (desktop) | Fact #1, #2 | -| VO accuracy (EuRoC) | <5cm position error | Not benchmarked | Fact #1 | -| IMU integration | Native mono+IMU mode, auto-fallback | None — must add custom IMU fusion | Fact #1 | -| Loop closure | Built-in | Not available | Fact #1 | -| TensorRT ready | Native CUDA (not TensorRT, raw CUDA) | Requires ONNX export + TRT build | Fact #4 | -| Memory | ~200-300MB | SP ~50MB + LG ~50-100MB = ~100-150MB | Fact #1 | -| Implementation | pip install aarch64 wheel | Custom pipeline: SP export + LG export + matching + pose estimation | Fact #1, #4 | -| Maturity on Jetson | NVIDIA-maintained, production-ready | Community TRT plugins, limited Jetson benchmarks | Fact #4, #5 | - -## Comparison 2: LiteSAM Speed at Different Resolutions - -| Dimension | 1184px (paper default) | 1280px (user proposal) | 640px | 480px | Factual Basis | -|-----------|------------------------|------------------------|-------|-------|---------------| -| Tokens at 1/8 scale | ~21,904 | ~25,600 | ~6,400 | ~3,600 | Fact #7 | -| AGX Orin time | 497ms | Est. ~580ms (1.17x tokens) | Est. ~150ms | Est. ~90ms | Fact #5, #7 | -| Orin Nano Super time (est.) | ~1.5-2.0s | ~1.7-2.3s | ~450-600ms | ~270-360ms | Fact #5, #6 | -| Accuracy (RMSE@30) | 17.86m | Similar (slightly less) | Degraded | Significantly degraded | Fact #8, #10 | - -## Comparison 3: Satellite Matching — LiteSAM vs SP+LG vs XFeat - -| Dimension | LiteSAM (opt) | SuperPoint+LightGlue | XFeat semi-dense | Factual Basis | -|-----------|--------------|---------------------|------------------|---------------| -| Cross-view accuracy | RMSE@30 = 17.86m (UAV-VisLoc) | Worse than LiteSAM (paper confirms) | Not benchmarked on UAV-VisLoc | Fact #9, #10 | -| Speed on Orin Nano (est.) | ~1.5-2s @ 1184px, ~270-360ms @ 480px | Est. ~100-200ms total | ~50-100ms | Fact #5, #2, existing draft | -| Cross-view robustness | Designed for satellite-aerial gap | Sparse matcher, "lacks sufficient accuracy" for cross-view | General-purpose, less robust | Fact #9, #13 | -| Parameters | 6.31M | SP ~1.3M + LG ~7M = ~8.3M | ~5M | Fact #5 | -| Approach | Semi-dense (coarse-to-fine, subpixel) | Sparse (detect → match → verify) | Semi-dense (detect → KNN → refine) | Fact #1, existing draft | diff --git a/_docs/00_research/gps_denied_nav_v2/04_reasoning_chain.md b/_docs/00_research/gps_denied_nav_v2/04_reasoning_chain.md deleted file mode 100644 index d32b622..0000000 --- a/_docs/00_research/gps_denied_nav_v2/04_reasoning_chain.md +++ /dev/null @@ -1,90 +0,0 @@ -# Reasoning Chain - -## Dimension 1: SuperPoint+LightGlue for Visual Odometry - -### Fact Confirmation -cuVSLAM achieves 116fps (~8.6ms/frame) on Orin Nano 8GB at 720p (Fact #1). SP+LG achieves ~10fps on KITTI on desktop GPU (Fact #2). SuperPoint alone takes ~40ms on Jetson for 200 keypoints (Fact #3). LightGlue matching on desktop GPU takes ~20-34ms for 274 keypoints (Fact #2). - -### Extrapolation to Orin Nano Super -On Orin Nano Super, estimating SP+LG pipeline: -- SuperPoint extraction (1024 keypoints, 720p): ~50-80ms (based on Fact #3, scaled for more keypoints) -- LightGlue matching (TensorRT FP16, 1024 keypoints): ~80-200ms (based on Fact #11 — 2-4x speedup over PyTorch, but Orin Nano is ~4-6x slower than RTX 4080) -- Total SP+LG: ~130-280ms per frame - -cuVSLAM: ~8.6ms per frame. - -SP+LG would be **15-33x slower** than cuVSLAM for visual odometry on Orin Nano Super. - -### Additional Considerations -cuVSLAM includes native IMU integration, loop closure, and auto-fallback. SP+LG provides none of these — they would need custom implementation, adding both development time and latency. - -### Conclusion -**SP+LG is not viable as a cuVSLAM replacement for VO on Orin Nano Super.** cuVSLAM is purpose-built for Jetson and 15-33x faster. SP+LG's value lies in its accuracy for feature matching tasks, not real-time VO on edge hardware. - -### Confidence -✅ High — performance gap is enormous and well-supported by multiple sources. - ---- - -## Dimension 2: LiteSAM Speed vs Image Resolution (1280px question) - -### Fact Confirmation -LiteSAM (opt) achieves 497ms on AGX Orin at 1184px (Fact #5). AGX Orin is ~3-4x more powerful than Orin Nano Super (Fact #6). LiteSAM processes at 1/8 scale internally — coarse matching is O(N²) where N is proportional to resolution² (Fact #7). - -### Resolution Scaling Analysis - -**1280px vs 1184px**: Token count increases from ~21,904 to ~25,600 (+17%). Compute increases ~17-37% (linear to quadratic depending on bottleneck). This makes the problem WORSE, not better. - -**The user's intuition is likely**: "If 6252×4168 camera images are huge, maybe LiteSAM is slow because we feed it those big images. What if we use 1280px?" But the solution draft already specifies resizing to 480-640px before feeding LiteSAM. The 497ms benchmark on AGX Orin was already at 1184px (the UAV-VisLoc benchmark resolution). - -**The real bottleneck is hardware, not image size:** -- At 1184px on AGX Orin: 497ms → on Orin Nano Super: est. **~1.5-2.0s** -- At 1280px on Orin Nano Super: est. **~1.7-2.3s** (WORSE — more tokens) -- At 640px on Orin Nano Super: est. **~450-600ms** (borderline) -- At 480px on Orin Nano Super: est. **~270-360ms** (possibly within 400ms budget) - -### Conclusion -**1280px would make LiteSAM SLOWER, not faster.** The paper benchmarked at 1184px. The bottleneck is the hardware gap (AGX Orin 275 TOPS → Orin Nano Super 67 TOPS). To make LiteSAM fit the 400ms budget, resolution must drop to ~480px, which may significantly degrade cross-view matching accuracy. The original solution draft's approach (benchmark at 480px, abandon if too slow) remains correct. - -### Confidence -✅ High — paper benchmarks + hardware specs provide strong basis. - ---- - -## Dimension 3: SP+LG for Satellite Matching (alternative to LiteSAM) - -### Fact Confirmation -LiteSAM paper explicitly states "SP+LG achieves the fastest inference speed but at the expense of accuracy" on satellite-aerial benchmarks (Fact #9). SP+LG is a sparse matcher; the paper notes sparse matchers "lack sufficient accuracy" for cross-view UAV-satellite matching due to texture-scarce regions (Fact #13). LiteSAM achieves RMSE@30 = 17.86m; SP+LG is worse (Fact #10). - -### Speed Advantage of SP+LG -On Orin Nano Super, SP+LG satellite matching pipeline: -- SuperPoint extraction (both images): ~50-80ms × 2 images -- LightGlue matching: ~80-200ms -- Total: ~180-360ms - -This is competitive with the 400ms budget. But accuracy is worse than LiteSAM. - -### Comparison with XFeat -XFeat semi-dense: ~50-100ms on Orin Nano Super (from existing draft). XFeat is 2-4x faster than SP+LG and also handles semi-dense matching. For the satellite matching role, XFeat is a better "fast fallback" than SP+LG. - -### Conclusion -**SP+LG is not recommended for satellite matching.** It's slower than XFeat and less accurate than LiteSAM for cross-view matching. XFeat remains the better fallback. SP+LG could serve as a third-tier fallback, but the added complexity isn't justified given XFeat's advantages. - -### Confidence -✅ High — direct comparison from the LiteSAM paper. - ---- - -## Dimension 4: Other Weak Points in solution_draft01 - -### cuVSLAM Nadir Camera Concern -The solution correctly flags cuVSLAM's "nadir-only camera" as untested. cuVSLAM was designed for robotics (forward-facing cameras). Nadir UAV camera looking straight down at terrain has different motion characteristics. However, cuVSLAM supports arbitrary camera configurations and IMU mode should compensate. **Risk is MEDIUM, mitigation is adequate** (XFeat fallback). - -### Memory Budget Gap -The solution estimates ~1.9-2.4GB total. This looks optimistic if cuVSLAM needs to maintain a map for loop closure. The cuVSLAM map grows over time. For a 3000-frame flight (~16 min at 3fps), map memory could grow to 500MB-1GB. **Risk: memory pressure late in flight.** Mitigation: configure cuVSLAM map pruning, limit map size. - -### Tile Search Strategy Underspecified -The solution mentions GeoHash-indexed tiles but doesn't detail how the system determines which tile to match against when ESKF position has high uncertainty (e.g., after VO failure). The expanded search (±1km) could require loading 10-20 tiles, which is slow from storage. - -### Confidence -⚠️ Medium — these are analytical observations, not empirically verified. diff --git a/_docs/00_research/gps_denied_nav_v2/05_validation_log.md b/_docs/00_research/gps_denied_nav_v2/05_validation_log.md deleted file mode 100644 index 687d2a7..0000000 --- a/_docs/00_research/gps_denied_nav_v2/05_validation_log.md +++ /dev/null @@ -1,52 +0,0 @@ -# Validation Log - -## Validation Scenario 1: SP+LG for VO during Normal Flight - -A UAV flies straight at 3fps. Each frame needs VO within 400ms. - -### Expected Based on Conclusions -cuVSLAM: processes each frame in ~8.6ms, leaves 391ms for satellite matching and fusion. Immediate VO result via SSE. -SP+LG: processes each frame in ~130-280ms, leaves ~120-270ms. May interfere with satellite matching CUDA resources. - -### Actual Validation -cuVSLAM is clearly superior. SP+LG offers no advantage here — cuVSLAM is 15-33x faster AND includes IMU fallback. SP+LG would require building a custom VO pipeline around a feature matcher, whereas cuVSLAM is a complete VO solution. - -### Counterexamples -If cuVSLAM fails on nadir camera (its main risk), SP+LG could serve as a fallback VO method. But XFeat frame-to-frame (~30-50ms) is already identified as the cuVSLAM fallback and is 3-6x faster than SP+LG. - -## Validation Scenario 2: LiteSAM at 1280px on Orin Nano Super - -A keyframe needs satellite matching. Image is resized to 1280px for LiteSAM. - -### Expected Based on Conclusions -LiteSAM at 1280px on Orin Nano Super: ~1.7-2.3s. This is 4-6x over the 400ms budget. Even running async, it means satellite corrections arrive 5-7 frames later. - -### Actual Validation -1280px is LARGER than the paper's 1184px benchmark resolution. The user likely assumed we feed the full camera image (6252×4168) to LiteSAM, causing slowness. But the solution already downsamples. The bottleneck is the hardware performance gap (Orin Nano Super = ~25% of AGX Orin compute). - -### Counterexamples -If LiteSAM's TensorRT FP16 engine with reparameterized MobileOne achieves better optimization than the paper's AMP benchmark (which uses PyTorch, not TensorRT), speed could improve 2-3x. At 480px with TensorRT FP16: potentially ~90-180ms on Orin Nano Super. This is worth benchmarking. - -## Validation Scenario 3: SP+LG as Satellite Matcher After LiteSAM Abandonment - -LiteSAM fails benchmark. Instead of XFeat, we try SP+LG for satellite matching. - -### Expected Based on Conclusions -SP+LG: ~180-360ms on Orin Nano Super. Accuracy is worse than LiteSAM for cross-view matching. -XFeat: ~50-100ms. Accuracy is unproven on cross-view but general-purpose semi-dense. - -### Actual Validation -SP+LG is 2-4x slower than XFeat and the LiteSAM paper confirms worse accuracy for satellite-aerial. XFeat's semi-dense approach is more suited to the texture-scarce regions common in UAV imagery. SP+LG's sparse keypoint detection may fail on agricultural fields or water bodies. - -### Counterexamples -SP+LG could outperform XFeat on high-texture urban areas where sparse features are abundant. But the operational region (eastern Ukraine) is primarily agricultural, making this advantage unlikely. - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions actionable/verifiable -- [ ] Note: Orin Nano Super estimates are extrapolated from AGX Orin data using the 3-4x compute ratio. Day-one benchmarking remains essential. - -## Conclusions Requiring Revision -None — the original solution draft's architecture (cuVSLAM for VO, benchmark-driven LiteSAM/XFeat for satellite) is confirmed sound. SP+LG is not recommended for either role on this hardware. diff --git a/_docs/00_research/gps_denied_nav_v3/00_question_decomposition.md b/_docs/00_research/gps_denied_nav_v3/00_question_decomposition.md deleted file mode 100644 index affe2b6..0000000 --- a/_docs/00_research/gps_denied_nav_v3/00_question_decomposition.md +++ /dev/null @@ -1,102 +0,0 @@ -# Question Decomposition - -## Original Question -Assess solution_draft02.md against updated acceptance criteria and restrictions. The AC and restrictions have been significantly revised to reflect real onboard deployment requirements (MAVLink integration, ground station telemetry, startup/failsafe, object localization, thermal management, satellite imagery specs). - -## Active Mode -Mode B: Solution Assessment — `solution_draft02.md` is the latest draft in OUTPUT_DIR. - -## Question Type -Problem Diagnosis + Decision Support - -## Research Subject Boundary -- **Population**: GPS-denied UAV navigation systems on edge hardware (Jetson Orin Nano Super) -- **Geography**: Eastern/southern Ukraine (east of Dnipro River), conflict zone -- **Timeframe**: Current (2025-2026), latest available tools and libraries -- **Level**: Onboard companion computer, real-time flight controller integration via MAVLink - -## Key Delta: What Changed in AC/Restrictions - -### Restrictions Changes -1. Two cameras: Navigation (fixed, downward) + AI camera (configurable angle/zoom) -2. Processing on Jetson Orin Nano Super (was "stationary computer or laptop") -3. IMU data IS available via flight controller (was "NO data from IMU") -4. MAVLink protocol via MAVSDK for flight controller communication -5. Must output GPS_INPUT messages as GPS replacement -6. Ground station telemetry link available but bandwidth-limited -7. Thermal throttling must be accounted for -8. Satellite imagery pre-loaded, storage limited - -### Acceptance Criteria Changes -1. <400ms per frame to flight controller (was <5s for processing) -2. MAVLink GPS_INPUT output (was REST API + SSE) -3. Ground station: stream position/confidence, receive re-localization commands -4. Object localization: trigonometric GPS from AI camera angle/zoom/altitude -5. Startup: initialize from last known GPS before GPS denial -6. Failsafe: IMU-only fallback after N seconds of total failure -7. Reboot recovery: re-initialize from flight controller IMU-extrapolated position -8. Max cumulative VO drift <100m between satellite anchors -9. Confidence score per position estimate (high/low) -10. Satellite imagery: ≥0.5 m/pixel, <2 years old -11. WGS84 output format -12. Re-localization via telemetry to ground station (not REST API user input) - -## Decomposed Sub-Questions - -### Q1: MAVLink GPS_INPUT Integration -- How does MAVSDK Python handle GPS_INPUT messages? -- What fields are required in GPS_INPUT? -- What update rate does the flight controller expect? -- Can we send confidence/accuracy indicators via MAVLink? -- How does this replace the REST API + SSE architecture? - -### Q2: Ground Station Telemetry Integration -- How to stream position + confidence over bandwidth-limited telemetry? -- How to receive operator re-localization commands? -- What MAVLink messages support custom data? -- What bandwidth is typical for UAV telemetry links? - -### Q3: Startup & Failsafe Mechanisms -- How to initialize from flight controller's last GPS position? -- How to detect GPS denial onset? -- What happens on companion computer reboot mid-flight? -- How to implement IMU-only dead reckoning fallback? - -### Q4: Object Localization via AI Camera -- How to compute ground GPS from UAV position + camera angle + zoom + altitude? -- What accuracy can be expected given GPS-denied position error? -- How to handle the API between GPS-denied system and AI detection system? - -### Q5: Thermal Management on Jetson Orin Nano Super -- What is sustained thermal performance under GPU load? -- How to monitor and mitigate thermal throttling? -- What power modes are available? - -### Q6: VO Drift Budget & Monitoring -- How to measure cumulative drift between satellite anchors? -- How to trigger satellite matching when drift approaches 100m? -- ESKF covariance as drift proxy? - -### Q7: Weak Points in Draft02 Architecture -- REST API + SSE architecture is wrong — must be MAVLink -- No ground station integration -- No startup/shutdown procedures -- No thermal management -- No object localization detail for AI camera with configurable angle/zoom -- Memory budget doesn't account for MAVSDK overhead - -## Timeliness Sensitivity Assessment -- **Research Topic**: MAVLink integration, MAVSDK for Jetson, ground station telemetry, thermal management -- **Sensitivity Level**: 🟠 High -- **Rationale**: MAVSDK actively developed; MAVLink message set evolving; Jetson JetPack 6.2 specific -- **Source Time Window**: 12 months -- **Priority official sources**: - 1. MAVSDK Python documentation (mavsdk.io) - 2. MAVLink message definitions (mavlink.io) - 3. NVIDIA Jetson Orin Nano thermal documentation - 4. PX4/ArduPilot GPS_INPUT documentation -- **Key version information**: - - MAVSDK-Python: latest PyPI version - - MAVLink: v2 protocol - - JetPack: 6.2.2 - - PyCuVSLAM: v15.0.0 diff --git a/_docs/00_research/gps_denied_nav_v3/01_source_registry.md b/_docs/00_research/gps_denied_nav_v3/01_source_registry.md deleted file mode 100644 index dd4dfd8..0000000 --- a/_docs/00_research/gps_denied_nav_v3/01_source_registry.md +++ /dev/null @@ -1,175 +0,0 @@ -# Source Registry - -## Source #1 -- **Title**: MAVSDK-Python Issue #320: Input external GPS through MAVSDK -- **Link**: https://github.com/mavlink/MAVSDK-Python/issues/320 -- **Tier**: L4 -- **Publication Date**: 2021 (still open 2025) -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: MAVSDK-Python — GPS_INPUT not supported as of v3.15.3 -- **Target Audience**: Companion computer developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: MAVSDK-Python does not support GPS_INPUT message. Feature requested but unresolved. -- **Related Sub-question**: Q1 - -## Source #2 -- **Title**: MAVLink GPS_INPUT Message Specification (mavlink_msg_gps_input.h) -- **Link**: https://rflysim.com/doc/en/RflySimAPIs/RflySimSDK/html/mavlink__msg__gps__input_8h_source.html -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: MAVLink v2, Message ID 232 -- **Target Audience**: MAVLink developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: GPS_INPUT message fields: lat/lon (1E7), alt, fix_type, horiz_accuracy, vert_accuracy, speed_accuracy, hdop, vdop, satellites_visible, velocities NED, yaw, ignore_flags. -- **Related Sub-question**: Q1 - -## Source #3 -- **Title**: ArduPilot GPS Input MAVProxy Documentation -- **Link**: https://ardupilot.org/mavproxy/docs/modules/GPSInput.html -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: ArduPilot GPS1_TYPE=14 -- **Target Audience**: ArduPilot companion computer developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: GPS_INPUT requires GPS1_TYPE=14. Accepts JSON over UDP port 25100. Fields: lat, lon, alt, fix_type, hdop, timestamps. -- **Related Sub-question**: Q1 - -## Source #4 -- **Title**: pymavlink GPS_INPUT example -- **Link**: https://webperso.ensta.fr/lebars/Share/GPS_INPUT_pymavlink.py -- **Tier**: L3 -- **Publication Date**: 2023 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: pymavlink -- **Target Audience**: Companion computer developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Complete pymavlink example for sending GPS_INPUT with all fields including yaw. Uses gps_input_send() method. -- **Related Sub-question**: Q1 - -## Source #5 -- **Title**: ArduPilot AP_GPS_Params.cpp — GPS_RATE_MS -- **Link**: https://cocalc.com/github/ardupilot/ardupilot/blob/master/libraries/AP_GPS/AP_GPS_Params.cpp -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: ArduPilot master -- **Target Audience**: ArduPilot developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: GPS_RATE_MS default 200ms (5Hz), range 50-200ms (5-20Hz). Below 5Hz not allowed. -- **Related Sub-question**: Q1 - -## Source #6 -- **Title**: MAVLink Telemetry Bandwidth Optimization Issue #1605 -- **Link**: https://github.com/mavlink/mavlink/issues/1605 -- **Tier**: L2 -- **Publication Date**: 2022 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: MAVLink protocol developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Minimal telemetry requires ~12kbit/s. Optimized ~6kbit/s. SiK at 57600 baud provides ~21% usable budget. RFD900 for long range (15km+). -- **Related Sub-question**: Q2 - -## Source #7 -- **Title**: NVIDIA JetPack 6.2 Super Mode Blog -- **Link**: https://developer.nvidia.com/blog/nvidia-jetpack-6-2-brings-super-mode-to-nvidia-jetson-orin-nano-and-jetson-orin-nx-modules/ -- **Tier**: L1 -- **Publication Date**: 2025-01 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: JetPack 6.2, Orin Nano Super -- **Target Audience**: Jetson developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: MAXN SUPER mode for peak performance. Thermal throttling at 80°C. Power modes: 15W, 25W, MAXN SUPER. Up to 1.7x AI boost. -- **Related Sub-question**: Q5 - -## Source #8 -- **Title**: Jetson Orin Nano Power Consumption Analysis -- **Link**: https://edgeaistack.app/blog/jetson-orin-nano-power-consumption/ -- **Tier**: L3 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Jetson edge deployment engineers -- **Research Boundary Match**: ✅ Full match -- **Summary**: 5W idle, 8-12W typical inference, 25W peak. Throttling above 80°C drops GPU from 1GHz to 300MHz. Active cooling required for sustained loads. -- **Related Sub-question**: Q5 - -## Source #9 -- **Title**: UAV Target Geolocation (Sensors 2022) -- **Link**: https://www.mdpi.com/1424-8220/22/5/1903 -- **Tier**: L1 -- **Publication Date**: 2022 -- **Timeliness Status**: ✅ Currently valid (math doesn't change) -- **Target Audience**: UAV reconnaissance systems -- **Research Boundary Match**: ✅ Full match -- **Summary**: Trigonometric target geolocation from camera angle, altitude, UAV position. Iterative refinement improves accuracy 22-38x. -- **Related Sub-question**: Q4 - -## Source #10 -- **Title**: pymavlink vs MAVSDK-Python for custom messages (Issue #739) -- **Link**: https://github.com/mavlink/MAVSDK-Python/issues/739 -- **Tier**: L4 -- **Publication Date**: 2024-12 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: MAVSDK-Python, pymavlink -- **Target Audience**: Companion computer developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: MAVSDK-Python lacks custom message support. pymavlink recommended for GPS_INPUT and custom messages. MAVSDK v4 may add MavlinkDirect plugin. -- **Related Sub-question**: Q1 - -## Source #11 -- **Title**: NAMED_VALUE_FLOAT for custom telemetry (PR #18501) -- **Link**: https://github.com/ArduPilot/ardupilot/pull/18501 -- **Tier**: L2 -- **Publication Date**: 2022 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: ArduPilot companion computer developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: NAMED_VALUE_FLOAT messages from companion computer are logged by ArduPilot and forwarded to GCS. Useful for custom telemetry data. -- **Related Sub-question**: Q2 - -## Source #12 -- **Title**: ArduPilot Companion Computer UART Connection -- **Link**: https://ardupilot.org/dev/docs/raspberry-pi-via-mavlink.html -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: ArduPilot companion computer developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Connect via TELEM2 UART. SERIAL2_PROTOCOL=2 (MAVLink2). Baud up to 1.5Mbps. TX/RX crossover. -- **Related Sub-question**: Q1, Q2 - -## Source #13 -- **Title**: Jetson Orin Nano UART with ArduPilot -- **Link**: https://forums.developer.nvidia.com/t/uart-connection-between-jetson-nano-orin-and-ardupilot/325416 -- **Tier**: L4 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: JetPack 6.x, Orin Nano -- **Target Audience**: Jetson + ArduPilot integration -- **Research Boundary Match**: ✅ Full match -- **Summary**: UART instability reported on Orin Nano with ArduPilot. Use /dev/ttyTHS0 or /dev/ttyTHS1. Check pinout carefully. -- **Related Sub-question**: Q1 - -## Source #14 -- **Title**: MAVSDK-Python v3.15.3 PyPI (aarch64 wheels) -- **Link**: https://pypi.org/project/mavsdk/ -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: v3.15.3, manylinux2014_aarch64 -- **Target Audience**: MAVSDK Python developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: MAVSDK-Python has aarch64 wheels. pip install works on Jetson. But no GPS_INPUT support. -- **Related Sub-question**: Q1 - -## Source #15 -- **Title**: ArduPilot receive COMMAND_LONG on companion computer -- **Link**: https://discuss.ardupilot.org/t/recieve-mav-cmd-on-companion-computer/48928 -- **Tier**: L4 -- **Publication Date**: 2020 -- **Timeliness Status**: ⚠️ Needs verification (old but concept still valid) -- **Target Audience**: ArduPilot companion computer developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Companion computer can receive COMMAND_LONG messages from GCS via MAVLink. ArduPilot scripting can intercept specific command IDs. -- **Related Sub-question**: Q2 diff --git a/_docs/00_research/gps_denied_nav_v3/02_fact_cards.md b/_docs/00_research/gps_denied_nav_v3/02_fact_cards.md deleted file mode 100644 index cf5bb72..0000000 --- a/_docs/00_research/gps_denied_nav_v3/02_fact_cards.md +++ /dev/null @@ -1,105 +0,0 @@ -# Fact Cards - -## Fact #1 -- **Statement**: MAVSDK-Python (v3.15.3) does NOT support sending GPS_INPUT MAVLink messages. The feature has been requested since 2021 and remains unresolved. Custom message support is planned for MAVSDK v4 but not available in Python wrapper. -- **Source**: Source #1, #10, #14 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — confirmed by MAVSDK maintainers -- **Related Dimension**: Flight Controller Integration - -## Fact #2 -- **Statement**: pymavlink provides full access to all MAVLink messages including GPS_INPUT via `mav.gps_input_send()`. It is the recommended library for companion computers that need to send GPS_INPUT messages. -- **Source**: Source #4, #10 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — working examples exist -- **Related Dimension**: Flight Controller Integration - -## Fact #3 -- **Statement**: GPS_INPUT (MAVLink msg ID 232) contains: lat/lon (WGS84, degrees×1E7), alt (AMSL), fix_type (0-8), horiz_accuracy (m), vert_accuracy (m), speed_accuracy (m/s), hdop, vdop, satellites_visible, vn/ve/vd (NED m/s), yaw (centidegrees), gps_id, ignore_flags. -- **Source**: Source #2 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — official MAVLink spec -- **Related Dimension**: Flight Controller Integration - -## Fact #4 -- **Statement**: ArduPilot requires GPS1_TYPE=14 (MAVLink) to accept GPS_INPUT messages from a companion computer. Connection via TELEM2 UART, SERIAL2_PROTOCOL=2 (MAVLink2). -- **Source**: Source #3, #12 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — official ArduPilot documentation -- **Related Dimension**: Flight Controller Integration - -## Fact #5 -- **Statement**: ArduPilot GPS update rate (GPS_RATE_MS) default is 200ms (5Hz), range 50-200ms (5-20Hz). Our camera at 3fps (333ms) means GPS_INPUT at 3Hz. ArduPilot minimum is 5Hz. We must interpolate/predict between camera frames to meet 5Hz minimum. -- **Source**: Source #5 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — ArduPilot source code -- **Related Dimension**: Flight Controller Integration - -## Fact #6 -- **Statement**: GPS_INPUT horiz_accuracy field directly maps to our confidence scoring. We can report: satellite-anchored ≈ 10-20m accuracy, VO-extrapolated ≈ 20-50m, IMU-only ≈ 100m+. ArduPilot EKF uses this for fusion weighting internally. -- **Source**: Source #2, #3 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ⚠️ Medium — accuracy mapping is estimated, EKF weighting not fully documented -- **Related Dimension**: Flight Controller Integration - -## Fact #7 -- **Statement**: Typical UAV telemetry bandwidth: SiK radio at 57600 baud provides ~12kbit/s usable for MAVLink. RFD900 provides long range (15km+) at similar data rates. Position telemetry must be compact — ~50 bytes per position update. -- **Source**: Source #6 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — MAVLink protocol specs -- **Related Dimension**: Ground Station Telemetry - -## Fact #8 -- **Statement**: NAMED_VALUE_FLOAT MAVLink message can stream custom telemetry from companion computer to ground station. ArduPilot logs and forwards these. Mission Planner displays them. Useful for confidence score, drift status, matching status. -- **Source**: Source #11 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — ArduPilot merged PR -- **Related Dimension**: Ground Station Telemetry - -## Fact #9 -- **Statement**: Jetson Orin Nano Super throttles GPU from 1GHz to ~300MHz when junction temperature exceeds 80°C. Active cooling (fan) required for sustained load. Power consumption: 5W idle, 8-12W typical inference, 25W peak. Modes: 15W, 25W, MAXN SUPER. -- **Source**: Source #7, #8 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — NVIDIA official -- **Related Dimension**: Thermal Management - -## Fact #10 -- **Statement**: Jetson Orin Nano UART connection to ArduPilot uses /dev/ttyTHS0 or /dev/ttyTHS1. UART instability reported on some units — verify pinout, use JetPack 6.2.2+. Baud up to 1.5Mbps supported. -- **Source**: Source #12, #13 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ⚠️ Medium — UART instability is a known issue with workarounds -- **Related Dimension**: Flight Controller Integration - -## Fact #11 -- **Statement**: Object geolocation from UAV: for nadir (downward) camera, pixel offset from center → meters via GSD → rotate by heading → add to UAV GPS. For oblique (AI) camera with angle θ from vertical: ground_distance = altitude × tan(θ). Combined with zoom → effective focal length → pixel-to-meter conversion. Flat terrain assumption simplifies to basic trigonometry. -- **Source**: Source #9 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — well-established trigonometry -- **Related Dimension**: Object Localization - -## Fact #12 -- **Statement**: Companion computer can receive COMMAND_LONG from ground station via MAVLink. For re-localization hints: ground station sends a custom command with approximate lat/lon, companion computer receives it via pymavlink message listener. -- **Source**: Source #15 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ⚠️ Medium — specific implementation for re-localization hint would be custom -- **Related Dimension**: Ground Station Telemetry - -## Fact #13 -- **Statement**: The restrictions.md now says "using MAVSDK library" but MAVSDK-Python cannot send GPS_INPUT. pymavlink is the only viable Python option for GPS_INPUT. This is a restriction conflict that must be resolved — use pymavlink for GPS_INPUT (core function) or accept MAVSDK + pymavlink hybrid. -- **Source**: Source #1, #2, #10 -- **Phase**: Assessment -- **Target Audience**: GPS-Denied system developers -- **Confidence**: ✅ High — confirmed limitation -- **Related Dimension**: Flight Controller Integration diff --git a/_docs/00_research/gps_denied_nav_v3/03_comparison_framework.md b/_docs/00_research/gps_denied_nav_v3/03_comparison_framework.md deleted file mode 100644 index 0e156a8..0000000 --- a/_docs/00_research/gps_denied_nav_v3/03_comparison_framework.md +++ /dev/null @@ -1,62 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Problem Diagnosis + Decision Support (Mode B) - -## Weak Point Dimensions (from draft02 → new AC/restrictions) - -### Dimension 1: Output Architecture (CRITICAL) -Draft02 uses FastAPI + SSE to stream positions to clients. -New AC requires MAVLink GPS_INPUT to flight controller as PRIMARY output. -Entire output architecture must change. - -### Dimension 2: Ground Station Communication (CRITICAL) -Draft02 has no ground station integration. -New AC requires: stream position+confidence via telemetry, receive re-localization commands. - -### Dimension 3: MAVLink Library Choice (CRITICAL) -Restrictions say "MAVSDK library" but MAVSDK-Python cannot send GPS_INPUT. -Must use pymavlink for core function. - -### Dimension 4: GPS Update Rate (HIGH) -Camera at 3fps → 3Hz position updates. ArduPilot minimum GPS rate is 5Hz. -Need IMU-based interpolation between camera frames. - -### Dimension 5: Startup & Failsafe (HIGH) -Draft02 has no initialization or failsafe procedures. -New AC requires: init from last GPS, reboot recovery, IMU fallback after N seconds. - -### Dimension 6: Object Localization (MEDIUM) -Draft02 has basic pixel-to-GPS for navigation camera only. -New AC adds AI camera with configurable angle, zoom — trigonometric projection needed. - -### Dimension 7: Thermal Management (MEDIUM) -Draft02 ignores thermal throttling. -Jetson Orin Nano Super throttles at 80°C — can drop GPU 3x. - -### Dimension 8: VO Drift Budget Monitoring (MEDIUM) -New AC: max cumulative VO drift <100m between satellite anchors. -Draft02 uses ESKF covariance but doesn't explicitly track drift budget. - -### Dimension 9: Satellite Imagery Specs (LOW) -New AC: ≥0.5 m/pixel, <2 years old. Draft02 uses Google Maps zoom 18-19 which is ~0.3-0.6 m/pixel. -Mostly compatible, needs explicit validation. - -### Dimension 10: API for Internal Systems (LOW) -Object localization requests from AI systems need a local IPC mechanism. -FastAPI could be retained for local-only inter-process communication. - -## Initial Population - -| Dimension | Draft02 State | Required State | Gap Severity | -|-----------|--------------|----------------|-------------| -| Output Architecture | FastAPI + SSE to client | MAVLink GPS_INPUT to flight controller | CRITICAL — full redesign | -| Ground Station | None | Bidirectional MAVLink telemetry | CRITICAL — new component | -| MAVLink Library | Not applicable (no MAVLink) | pymavlink (MAVSDK can't do GPS_INPUT) | CRITICAL — new dependency | -| GPS Update Rate | 3fps → ~3Hz output | ≥5Hz to ArduPilot EKF | HIGH — need IMU interpolation | -| Startup & Failsafe | None | Init from GPS, reboot recovery, IMU fallback | HIGH — new procedures | -| Object Localization | Basic nadir pixel-to-GPS | AI camera angle+zoom trigonometry | MEDIUM — extend existing | -| Thermal Management | Not addressed | Monitor + mitigate throttling | MEDIUM — operational concern | -| VO Drift Budget | ESKF covariance only | Explicit <100m tracking + trigger | MEDIUM — extend ESKF | -| Satellite Imagery Specs | Google Maps zoom 18-19 | ≥0.5 m/pixel, <2 years | LOW — mostly met | -| Internal IPC | REST API | Lightweight local API or shared memory | LOW — simplify from draft02 | diff --git a/_docs/00_research/gps_denied_nav_v3/04_reasoning_chain.md b/_docs/00_research/gps_denied_nav_v3/04_reasoning_chain.md deleted file mode 100644 index 05fea69..0000000 --- a/_docs/00_research/gps_denied_nav_v3/04_reasoning_chain.md +++ /dev/null @@ -1,202 +0,0 @@ -# Reasoning Chain - -## Dimension 1: Output Architecture - -### Fact Confirmation -Per Fact #3, GPS_INPUT (MAVLink msg ID 232) accepts lat/lon in WGS84 (degrees×1E7), altitude, fix_type, accuracy fields, and NED velocities. Per Fact #4, ArduPilot uses GPS1_TYPE=14 to accept MAVLink GPS input. The flight controller's EKF fuses this as if it were a real GPS module. - -### Reference Comparison -Draft02 uses FastAPI + SSE to stream position data to a REST client. The new AC requires the system to output GPS coordinates directly to the flight controller via MAVLink GPS_INPUT, replacing the real GPS module. The flight controller then uses these coordinates for navigation/autopilot functions. The ground station receives position data indirectly via the flight controller's telemetry forwarding. - -### Conclusion -The entire output architecture must change from REST API + SSE → pymavlink GPS_INPUT sender. FastAPI is no longer the primary output mechanism. It may be retained only for local IPC with other onboard AI systems (object localization requests). The SSE streaming to external clients is replaced by MAVLink telemetry forwarding through the flight controller. - -### Confidence -✅ High — clear requirement change backed by MAVLink specification - ---- - -## Dimension 2: Ground Station Communication - -### Fact Confirmation -Per Fact #7, typical telemetry bandwidth is ~12kbit/s (SiK). Per Fact #8, NAMED_VALUE_FLOAT can stream custom values from companion to GCS. Per Fact #12, COMMAND_LONG can deliver commands from GCS to companion. - -### Reference Comparison -Draft02 has no ground station integration. The new AC requires: -1. Stream position + confidence to ground station (passive, via telemetry forwarding of GPS_INPUT data + custom NAMED_VALUE_FLOAT for confidence/drift) -2. Receive re-localization commands from operator (active, via COMMAND_LONG or custom MAVLink message) - -### Conclusion -Ground station communication uses MAVLink messages forwarded through the flight controller's telemetry radio. Position data flows automatically (flight controller forwards GPS data to GCS). Custom telemetry (confidence, drift, status) uses NAMED_VALUE_FLOAT. Re-localization hints from operator use a custom COMMAND_LONG with lat/lon payload. Bandwidth is tight (~12kbit/s) so minimize custom message frequency (1-2Hz max for NAMED_VALUE_FLOAT). - -### Confidence -✅ High — standard MAVLink patterns - ---- - -## Dimension 3: MAVLink Library Choice - -### Fact Confirmation -Per Fact #1, MAVSDK-Python v3.15.3 does NOT support GPS_INPUT. Per Fact #2, pymavlink provides full GPS_INPUT support via `mav.gps_input_send()`. Per Fact #13, the restrictions say "using MAVSDK library" but MAVSDK literally cannot do the core function. - -### Reference Comparison -MAVSDK is a higher-level abstraction over MAVLink. pymavlink is the lower-level direct MAVLink implementation. For GPS_INPUT (our core output), only pymavlink works. - -### Conclusion -Use **pymavlink** as the MAVLink library. The restriction mentioning MAVSDK must be noted as a conflict — pymavlink is the only viable option for GPS_INPUT in Python. pymavlink is lightweight, pure Python, works on aarch64, and provides full access to all MAVLink messages. MAVSDK v4 may add custom message support in the future but is not available now. - -### Confidence -✅ High — confirmed limitation, clear alternative - ---- - -## Dimension 4: GPS Update Rate - -### Fact Confirmation -Per Fact #5, ArduPilot GPS_RATE_MS has a minimum of 200ms (5Hz). Our camera shoots at ~3fps (333ms). We produce a full VO+ESKF position estimate per frame at ~3Hz. - -### Reference Comparison -3Hz < 5Hz minimum. ArduPilot's EKF expects at least 5Hz GPS updates for stable fusion. - -### Conclusion -Between camera frames, use IMU prediction from the ESKF to interpolate position at 5Hz (or higher, e.g., 10Hz). The ESKF already runs IMU prediction at 100+Hz internally. Simply emit the ESKF predicted state as GPS_INPUT at 5-10Hz. Camera frame updates (3Hz) provide the measurement corrections. This is standard in sensor fusion: prediction runs fast, measurements arrive slower. The `fix_type` field can differentiate: camera-corrected frames → fix_type=3 (3D), IMU-predicted → fix_type=2 (2D) or adjust horiz_accuracy to reflect lower confidence. - -### Confidence -✅ High — standard sensor fusion approach - ---- - -## Dimension 5: Startup & Failsafe - -### Fact Confirmation -Per new AC: system initializes from last known GPS before GPS denial. On reboot: re-initialize from flight controller's IMU-extrapolated position. On total failure for N seconds: flight controller falls back to IMU-only. - -### Reference Comparison -Draft02 has no startup or failsafe procedures. The system was assumed to already know its position at session start. - -### Conclusion -Startup sequence: -1. On boot, connect to flight controller via pymavlink -2. Read current GPS position from flight controller (GLOBAL_POSITION_INT or GPS_RAW_INT message) -3. Initialize ESKF state with this position -4. Begin cuVSLAM initialization with first camera frames -5. Start sending GPS_INPUT once ESKF has a valid position estimate - -Failsafe: -1. If no position estimate for N seconds → stop sending GPS_INPUT (flight controller auto-detects GPS loss and falls back to IMU) -2. Log failure event -3. Continue attempting VO/satellite matching - -Reboot recovery: -1. On companion computer reboot, reconnect to flight controller -2. Read current GPS_RAW_INT (which is now IMU-extrapolated by flight controller) -3. Re-initialize ESKF with this position (lower confidence) -4. Resume normal operation - -### Confidence -✅ High — standard autopilot integration patterns - ---- - -## Dimension 6: Object Localization - -### Fact Confirmation -Per Fact #11, for oblique camera: ground_distance = altitude × tan(θ) where θ is angle from vertical. Combined with camera azimuth (yaw + camera pan angle) gives direction. With zoom, effective FOV narrows → higher pixel-to-meter resolution. - -### Reference Comparison -Draft02 has basic nadir-only projection: pixel offset × GSD → meters → rotate by heading → lat/lon. The AI camera has configurable angle and zoom, so this needs extension. - -### Conclusion -Object localization for AI camera: -1. Get current UAV position from GPS-Denied system -2. Get AI camera params: pan angle (azimuth relative to heading), tilt angle (from vertical), zoom level (→ effective focal length) -3. Get pixel coordinates of detected object in AI camera frame -4. Compute: a) bearing = UAV heading + camera pan angle + pixel horizontal offset angle, b) ground_distance = altitude / cos(tilt) × (tilt + pixel vertical offset angle) → simplified for flat terrain, c) convert bearing + distance to lat/lon offset from UAV position -5. Accuracy inherits GPS-Denied position error + projection error from altitude/angle uncertainty - -Expose as lightweight local API (Unix socket or shared memory for speed, or simple HTTP on localhost). - -### Confidence -✅ High — well-established trigonometry, flat terrain simplifies - ---- - -## Dimension 7: Thermal Management - -### Fact Confirmation -Per Fact #9, Jetson Orin Nano Super throttles at 80°C junction temperature, dropping GPU from ~1GHz to ~300MHz (3x slowdown). Active cooling required. Power modes: 15W, 25W, MAXN SUPER. - -### Reference Comparison -Draft02 ignores thermal constraints. Our pipeline (cuVSLAM ~9ms + satellite matcher ~50-200ms) runs on GPU continuously at 3fps. This is moderate but sustained load. - -### Conclusion -Mitigation: -1. Use 25W power mode (not MAXN SUPER) for stable sustained performance -2. Require active cooling (5V fan, should be standard on any UAV companion computer mount) -3. Monitor temperature via tegrastats/jtop at runtime -4. If temp >75°C: reduce satellite matching frequency (every 5-10 frames instead of 3) -5. If temp >80°C: skip satellite matching entirely, rely on VO+IMU only (cuVSLAM at 9ms is low power) -6. Our total GPU time per 333ms frame: ~9ms cuVSLAM + ~50-200ms satellite match (async) = <60% GPU utilization → thermal throttling unlikely with proper cooling - -### Confidence -⚠️ Medium — actual thermal behavior depends on airflow in UAV enclosure, ambient temperature in-flight - ---- - -## Dimension 8: VO Drift Budget Monitoring - -### Fact Confirmation -New AC: max cumulative VO drift between satellite correction anchors < 100m. The ESKF maintains a position covariance matrix that grows during VO-only periods and shrinks on satellite corrections. - -### Reference Comparison -Draft02 uses ESKF covariance for keyframe selection (trigger satellite match when covariance exceeds threshold) but doesn't explicitly track drift as a budget. - -### Conclusion -Use ESKF position covariance diagonal (σ_x² + σ_y²) as the drift estimate. When √(σ_x² + σ_y²) approaches 100m: -1. Force satellite matching on every frame (not just keyframes) -2. Report LOW confidence via GPS_INPUT horiz_accuracy -3. If drift exceeds 100m without satellite correction → flag as critical, increase matching frequency, send alert to ground station -This is essentially what draft02 already does with covariance-based keyframe triggering, but now with an explicit 100m threshold. - -### Confidence -✅ High — standard ESKF covariance interpretation - ---- - -## Dimension 9: Satellite Imagery Specs - -### Fact Confirmation -New AC: ≥0.5 m/pixel resolution, <2 years old. Google Maps at zoom 18 = ~0.6 m/pixel, zoom 19 = ~0.3 m/pixel. - -### Reference Comparison -Draft02 uses Google Maps zoom 18-19. Zoom 19 (0.3 m/pixel) exceeds the requirement. Zoom 18 (0.6 m/pixel) meets the minimum. Age depends on Google's imagery updates for eastern Ukraine — conflict zone may have stale imagery. - -### Conclusion -Validate during offline preprocessing: -1. Download at zoom 19 first (0.3 m/pixel) -2. If zoom 19 unavailable for some tiles, fall back to zoom 18 (0.6 m/pixel — exceeds 0.5 minimum) -3. Check imagery date metadata if available from Google Maps API -4. Flag tiles where imagery appears stale (seasonal mismatch, destroyed buildings, etc.) -5. No architectural change needed — add validation step to preprocessing pipeline - -### Confidence -⚠️ Medium — Google Maps imagery age is not reliably queryable - ---- - -## Dimension 10: Internal IPC for Object Localization - -### Fact Confirmation -Other onboard AI systems need to request GPS coordinates of detected objects. These systems run on the same Jetson. - -### Reference Comparison -Draft02 has FastAPI for external API. For local IPC between processes on the same device, FastAPI is overkill but works. - -### Conclusion -Retain a minimal FastAPI server on localhost:8000 for inter-process communication: -- POST /localize: accepts pixel coordinates + AI camera params → returns GPS coordinates -- GET /status: returns system health/state for monitoring -This is local-only (bind to 127.0.0.1), not exposed externally. The primary output channel is MAVLink GPS_INPUT. This is a lightweight addition, not the core architecture. - -### Confidence -✅ High — simple local IPC pattern diff --git a/_docs/00_research/gps_denied_nav_v3/05_validation_log.md b/_docs/00_research/gps_denied_nav_v3/05_validation_log.md deleted file mode 100644 index 651c7c3..0000000 --- a/_docs/00_research/gps_denied_nav_v3/05_validation_log.md +++ /dev/null @@ -1,88 +0,0 @@ -# Validation Log - -## Validation Scenario -A typical 15-minute flight over eastern Ukraine agricultural terrain. GPS is jammed after first 2 minutes. Flight includes straight segments, two sharp 90-degree turns, and one low-texture segment over a large plowed field. Ground station operator monitors via telemetry link. During the flight, companion computer reboots once due to power glitch. - -## Expected Based on Conclusions - -### Phase 1: Normal start (GPS available, first 2 min) -- System boots, connects to flight controller via pymavlink on UART -- Reads GLOBAL_POSITION_INT → initializes ESKF with real GPS position -- Begins cuVSLAM initialization with first camera frames -- Starts sending GPS_INPUT at 5Hz (ESKF prediction between frames) -- Ground station sees position + confidence via telemetry forwarding - -### Phase 2: GPS denial begins -- Flight controller's real GPS becomes unreliable/lost -- GPS-Denied system continues sending GPS_INPUT — seamless for autopilot -- horiz_accuracy changes from real-GPS level to VO-estimated level (~20m) -- cuVSLAM provides VO at every frame (~9ms), ESKF fuses with IMU -- Satellite matching runs every 3-10 frames on keyframes -- After successful satellite match: horiz_accuracy improves, fix_type stays 3 -- NAMED_VALUE_FLOAT sends confidence/drift data to ground station at ~1Hz - -### Phase 3: Sharp turn -- cuVSLAM loses tracking (no overlapping features) -- ESKF falls back to IMU prediction, horiz_accuracy increases -- Next frame flagged as keyframe → satellite matching triggered immediately -- Satellite match against preloaded tiles using IMU dead-reckoning position -- If match found: position recovered, new segment begins, horiz_accuracy drops -- If 3 consecutive failures: send re-localization request to ground station via NAMED_VALUE_FLOAT/STATUSTEXT -- Ground station operator sends COMMAND_LONG with approximate coordinates -- System receives hint, constrains tile search → likely recovers position - -### Phase 4: Low-texture plowed field -- cuVSLAM keypoint count drops below threshold -- Satellite matching frequency increases (every frame) -- If satellite matching works on plowed field vs satellite imagery: position maintained -- If satellite also fails (seasonal difference): drift accumulates, ESKF covariance grows -- When √(σ²) approaches 100m: force continuous satellite matching -- horiz_accuracy reported as 50-100m, fix_type=2 - -### Phase 5: Companion computer reboot -- Power glitch → Jetson reboots (~30-60 seconds) -- During reboot: flight controller gets no GPS_INPUT → detects GPS timeout → falls back to IMU-only dead reckoning -- Jetson comes back: reconnects via pymavlink, reads GPS_RAW_INT (IMU-extrapolated) -- Initializes ESKF with this position (low confidence, horiz_accuracy=100m) -- Begins cuVSLAM + satellite matching → gradually improves accuracy -- Operator on ground station sees position return with improving confidence - -### Phase 6: Object localization request -- AI detection system on same Jetson detects a vehicle in AI camera frame -- Sends POST /localize with pixel coords + camera angle (30° from vertical) + zoom level + altitude (500m) -- GPS-Denied system computes: ground_distance = 500 / cos(30°) = 577m slant, horizontal distance = 500 × tan(30°) = 289m -- Adds bearing from heading + camera pan → lat/lon offset -- Returns GPS coordinates with accuracy estimate (GPS-Denied accuracy + projection error) - -## Actual Validation Results -The scenario covers all new AC requirements: -- ✅ MAVLink GPS_INPUT at 5Hz (camera frames + IMU interpolation) -- ✅ Confidence via horiz_accuracy field maps to confidence levels -- ✅ Ground station telemetry via MAVLink forwarding + NAMED_VALUE_FLOAT -- ✅ Re-localization via ground station command -- ✅ Startup from GPS → seamless transition on denial -- ✅ Reboot recovery from flight controller IMU-extrapolated position -- ✅ Drift budget tracking via ESKF covariance -- ✅ Object localization with AI camera angle/zoom - -## Counterexamples - -### Potential issue: 5Hz interpolation accuracy -Between camera frames (333ms apart), ESKF predicts using IMU only. At 200km/h = 55m/s, the UAV moves ~18m between frames. IMU prediction over 200ms (one interpolation step) at this speed introduces ~1-5m error — acceptable for GPS_INPUT. - -### Potential issue: UART reliability -Jetson Orin Nano UART instability reported (Fact #10). If MAVLink connection drops during flight, GPS_INPUT stops → autopilot loses GPS. Mitigation: use TCP over USB-C if UART unreliable, or add watchdog to reconnect. This is a hardware integration risk. - -### Potential issue: Telemetry bandwidth saturation -If GPS-Denied sends too many NAMED_VALUE_FLOAT messages, it could compete with standard autopilot telemetry for bandwidth. Keep custom messages to 1Hz max (50-100 bytes/s = <1kbit/s). - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions actionable and verifiable -- [x] All new AC requirements addressed -- [ ] UART reliability needs hardware testing — cannot validate without physical setup - -## Conclusions Requiring Revision -None — all conclusions hold under validation. The UART reliability risk needs flagging but doesn't change the architecture. diff --git a/_docs/00_research/gps_denied_visual_nav/00_ac_assessment.md b/_docs/00_research/gps_denied_visual_nav/00_ac_assessment.md deleted file mode 100644 index b2a029a..0000000 --- a/_docs/00_research/gps_denied_visual_nav/00_ac_assessment.md +++ /dev/null @@ -1,76 +0,0 @@ -# Acceptance Criteria Assessment - -## System Parameters (Calculated) - -| Parameter | Value | -|-----------|-------| -| GSD (at 400m) | 6.01 cm/pixel | -| Ground footprint | 376m × 250m | -| Consecutive overlap | 60-73% (at 100m intervals) | -| Pixels per 50m | ~832 pixels | -| Pixels per 20m | ~333 pixels | - -## Acceptance Criteria - -| Criterion | Our Values | Researched Values | Cost/Timeline Impact | Status | -|-----------|-----------|-------------------|---------------------|--------| -| GPS accuracy: 80% within 50m | 50m error for 80% of photos | NaviLoc: 19.5m MLE at 50-150m alt. Mateos-Ramirez: 143m mean at >1000m alt (with IMU). At 400m with 26MP + satellite correction, 50m for 80% is achievable with VO+SIM. No IMU adds ~30-50% error overhead. | Medium cost — needs robust satellite matching pipeline. ~3-4 weeks for core pipeline. | **Achievable** — keep as-is | -| GPS accuracy: 60% within 20m | 20m error for 60% of photos | NaviLoc: 19.5m MLE at lower altitude (50-150m). At 400m, larger viewpoint gap increases error. Cross-view matching MA@20m improving +10% yearly. Needs high-quality satellite imagery and robust matching. | Higher cost — requires higher-quality satellite imagery (0.3-0.5m resolution). Additional 1-2 weeks for refinement. | **Challenging but achievable** — consider relaxing to 30m initially, tighten with iteration | -| Handle 350m outlier photos | Tolerate up to 350m jump between consecutive photos | Standard VO systems detect outliers via feature matching failure. 350m at GSD 6cm = ~5833 pixels. Satellite re-localization can handle this if area is textured. | Low additional cost — outlier detection is standard in VO pipelines. | **Achievable** — keep as-is | -| Sharp turns: <5% overlap, <200m drift, <70° angle | System continues working during sharp turns | <5% overlap means consecutive feature matching will fail. Must fall back to satellite matching for absolute position. At 400m altitude with 376m footprint, 200m drift means partial overlap with satellite. 70° rotation is large but manageable with rotation-invariant matchers (AKAZE, SuperPoint). | High complexity — requires multi-strategy architecture (VO primary, satellite fallback). +2-3 weeks. | **Achievable with architectural investment** — keep as-is | -| Route disconnection & reconnection | Handle multiple disconnected route segments | Each segment needs independent satellite geo-referencing. Segments are stitched via common satellite reference frame. Similar to loop closure in SLAM but via external reference. | High complexity — core architectural challenge. +2-3 weeks for segment management. | **Achievable** — this should be a core design principle, not an edge case | -| User input fallback (20% of route) | User provides GPS when system cannot determine | Simple UI interaction — user clicks approximate position on map. Becomes new anchor point. | Low cost — straightforward feature. | **Achievable** — keep as-is | -| Processing speed: <5s per image | 5 seconds maximum per image | SuperPoint: ~50-100ms. LightGlue: ~20-50ms. Satellite crop+match: ~200-500ms. Full pipeline: ~500ms-2s on RTX 2060. NaviLoc runs 9 FPS on Raspberry Pi 5. ORB-SLAM3 with GPU: 30 FPS on Jetson TX2. | Low risk — well within budget on RTX 2060+. | **Easily achievable** — could target <2s. Keep 5s as safety margin | -| Real-time streaming via SSE | Results appear immediately, refinement sent later | Standard architecture pattern. Process-and-stream is well-supported. | Low cost — standard web engineering. | **Achievable** — keep as-is | -| Image Registration Rate > 95% | >95% of images successfully registered | ITU thesis: 93% SIM matching. With 60-73% consecutive overlap and deep learning features, >95% for VO between consecutive frames is achievable. The 5% tolerance covers sharp turns. | Medium cost — depends on feature matcher quality and satellite image quality. | **Achievable** — but interpret as "95% for normal consecutive frames". Sharp turn frames counted separately. | -| MRE < 1.0 pixels | Mean Reprojection Error below 1 pixel | Sub-pixel accuracy is standard for SuperPoint/LightGlue. SVO achieves sub-pixel via direct methods. Typical range: 0.3-0.8 pixels. | No additional cost — inherent to modern matchers. | **Easily achievable** — keep as-is | -| REST API + SSE background service | Always-running service, start on request, stream results | Standard Python (FastAPI) or .NET architecture. | Low cost — standard engineering. ~1 week for API layer. | **Achievable** — keep as-is | - -## Restrictions Assessment - -| Restriction | Our Values | Researched Values | Cost/Timeline Impact | Status | -|-------------|-----------|-------------------|---------------------|--------| -| No IMU data | No heading, no pitch/roll correction | **CRITICAL restriction.** Most published systems use IMU for heading and as fallback. Without IMU: (1) heading must be derived from consecutive frame matching or satellite matching, (2) no pitch/roll correction — rely on robust feature matchers, (3) scale from known altitude only. Adds ~30-50% error vs IMU-equipped systems. | High impact — requires visual heading estimation. All VO literature assumes at least heading from IMU. +2-3 weeks R&D for pure visual heading. | **Realistic but significantly harder.** Consider: can barometer data be available? | -| Camera not auto-stabilized | Images have varying pitch/roll | At 400m with fixed-wing, typical roll ±15°, pitch ±10°. Causes trapezoidal distortion in images. Robust matchers (SuperPoint, LightGlue) handle moderate viewpoint changes. Homography estimation between frames compensates. | Medium impact — modern matchers handle this. Pre-rectification using estimated attitude could help. | **Realistic** — keep as-is. Mitigated by robust matchers. | -| Google Maps only (cost-dependent) | Currently limited to Google Maps | Google Maps in eastern Ukraine may have 2-5 year old imagery. Conflict damage makes old imagery unreliable. **Risk: satellite-UAV matching may fail in areas with significant ground changes.** Alternatives: Mapbox (Maxar Vivid, sub-meter), Bing Maps (0.3-1m), Maxar SecureWatch (30cm, enterprise pricing). | High risk — may need multiple providers. Google: $200/month free credit. Mapbox: free tier for 100K requests. Maxar: enterprise pricing. | **Tighten** — add fallback provider. Pre-download tile cache for operational area. | -| Image resolution FullHD to 6252×4168 | Variable resolution across flights | Lower resolution (FullHD=1920×1080) at 400m: GSD ≈ 0.20m/pixel, footprint ~384m × 216m. Significantly worse matching but still functional. Need to handle both extremes. | Medium impact — pipeline must be resolution-adaptive. | **Realistic** — keep. But note: FullHD accuracy will be ~3x worse than 26MP. | -| Altitude ≤ 1km, terrain height negligible | Flat terrain assumption at known altitude | Simplifies scale estimation. At 400m, terrain variations of ±50m cause ±12.5% scale error. Eastern Ukraine is relatively flat (steppe), so this is reasonable. | Low impact for the operational area. | **Realistic** — keep as-is | -| Mostly sunny weather | Good lighting conditions assumed | Sunny weather = good texture, consistent illumination. Shadows may cause matching issues but are manageable. | Low impact — favorable condition. | **Realistic** — keep. Add: "system performance degrades in overcast/low-light" | -| Up to 3000 photos per flight | 500-1500 typical, 3000 maximum | At <5s per image: 3000 photos = ~4 hours max. Memory: 3000 × 26MP ≈ 78GB raw. Need efficient memory management and incremental processing. | Medium impact — requires streaming architecture and careful memory management. | **Realistic** — keep. Memory management is engineering, not research. | -| Sharp turns with completely different next photo | Route discontinuity is possible | Most VO systems fail at 0% overlap. This is effectively a new "start point" problem. Satellite matching is the only recovery path. | High impact — already addressed in AC. | **Realistic** — this is the defining challenge | -| Desktop/laptop with RTX 2060+ | Minimum GPU requirement | RTX 2060: 6GB VRAM, 1920 CUDA cores. Sufficient for SuperPoint, LightGlue, satellite matching. RTX 3070: 8GB VRAM, 5888 CUDA cores — significantly faster. | Low risk — hardware is adequate. | **Realistic** — keep as-is | - -## Missing Acceptance Criteria (Suggested Additions) - -| Criterion | Suggested Value | Rationale | -|-----------|----------------|-----------| -| Satellite imagery resolution requirement | ≥ 0.5 m/pixel, ideally 0.3 m/pixel | Matching quality depends heavily on reference imagery resolution. At GSD 6cm, satellite must be at least 0.5m for reliable cross-view matching. | -| Confidence/uncertainty reporting | Report confidence score per position estimate | User needs to know which positions are reliable (satellite-anchored) vs uncertain (VO-only, accumulating drift). | -| Output format | WGS84 coordinates in GeoJSON or CSV | Standardize output for downstream integration. | -| Satellite image freshness requirement | < 2 years old for operational area | Older imagery may not match current ground truth due to conflict damage. | -| Maximum drift between satellite corrections | < 100m cumulative VO drift before satellite re-anchor | Prevents long uncorrected VO segments from exceeding 50m target. | -| Memory usage limit | < 16GB RAM, < 6GB VRAM | Ensures compatibility with RTX 2060 systems. | - -## Key Findings - -1. **The 50m/80% accuracy target is achievable** with a well-designed VO + satellite matching pipeline, even without IMU, given the high camera resolution (6cm GSD) and known altitude. NaviLoc achieves 19.5m at lower altitudes; our 400m altitude adds difficulty but 26MP resolution compensates. - -2. **The 20m/60% target is aggressive but possible** with high-quality satellite imagery (≤0.5m resolution). Consider starting with a 30m target and tightening through iteration. Performance heavily depends on satellite image quality and freshness for the operational area. - -3. **No IMU is the single biggest technical risk.** All published comparable systems use at least heading from IMU/magnetometer. Visual heading estimation from consecutive frames is feasible but adds noise. This restriction alone could require 2-3 extra weeks of R&D. - -4. **Google Maps satellite imagery for eastern Ukraine is a significant risk.** Imagery may be outdated (2-5 years) and may not reflect current ground conditions. A fallback satellite provider is strongly recommended. - -5. **Processing speed (<5s) is easily achievable** on RTX 2060+. Modern feature matching pipelines process in <500ms per pair. The pipeline could realistically achieve <2s per image. - -6. **Route disconnection handling should be the core architectural principle**, not an edge case. The system should be designed "segments-first" — each segment independently geo-referenced, then stitched. - -7. **Missing criterion: confidence reporting.** The user should see which positions are high-confidence (satellite-anchored) vs low-confidence (VO-extrapolated). This is critical for operational use. - -## Sources -- [Source #1] Mateos-Ramirez et al. (2024) — VO + satellite correction for fixed-wing UAV -- [Source #2] Öztürk (2025) — ORB-SLAM3 + SIM integration thesis -- [Source #3] NaviLoc (2025) — Trajectory-level visual localization -- [Source #4] LightGlue GitHub — Feature matching benchmarks -- [Source #5] DALGlue (2025) — Enhanced feature matching -- [Source #8-9] Satellite imagery coverage and pricing reports diff --git a/_docs/00_research/gps_denied_visual_nav/00_question_decomposition.md b/_docs/00_research/gps_denied_visual_nav/00_question_decomposition.md deleted file mode 100644 index 8349f3c..0000000 --- a/_docs/00_research/gps_denied_visual_nav/00_question_decomposition.md +++ /dev/null @@ -1,63 +0,0 @@ -# Question Decomposition — AC & Restrictions Assessment - -## Original Question -How realistic are the acceptance criteria and restrictions for a GPS-denied visual navigation system for fixed-wing UAV imagery? - -## Active Mode -Mode A, Phase 1: AC & Restrictions Assessment - -## Question Type -Knowledge Organization + Decision Support - -## Research Subject Boundary Definition - -| Dimension | Boundary | -|-----------|----------| -| **Platform** | Fixed-wing UAV, airplane type, not multirotor | -| **Geography** | Eastern/southern Ukraine, left of Dnipro River (conflict zone, ~48.27°N, 37.38°E based on sample data) | -| **Altitude** | ≤ 1km, sample data at 400m | -| **Sensor** | Monocular RGB camera, 26MP, no IMU, no LiDAR | -| **Processing** | Ground-based desktop/laptop with NVIDIA RTX 2060+ GPU | -| **Time Window** | Current state-of-the-art (2024-2026) | - -## Problem Context Summary - -The system must determine GPS coordinates of consecutive aerial photo centers using only: -- Known starting GPS coordinates -- Known camera parameters (25mm focal, 23.5mm sensor, 6252×4168 resolution) -- Known flight altitude (≤1km, sample: 400m) -- Consecutive photos taken within ~100m of each other -- Satellite imagery (Google Maps) for ground reference - -Key constraints: NO IMU data, camera not auto-stabilized, potentially outdated satellite imagery for conflict zone. - -**Ground Sample Distance (GSD) at 400m altitude**: -- GSD = (400 × 23.5) / (25 × 6252) ≈ 0.060 m/pixel (6 cm/pixel) -- Ground footprint: ~376m × 250m per image -- Estimated consecutive overlap: 60-73% (depending on camera orientation relative to flight direction) - -## Sub-Questions for AC Assessment - -1. What GPS accuracy is achievable with VO + satellite matching at 400m altitude with 26MP camera? -2. How does the absence of IMU affect accuracy and what compensations exist? -3. What processing speed is achievable per image on RTX 2060+ for the required pipeline? -4. What image registration rates are achievable with deep learning matchers? -5. What reprojection errors are typical for modern feature matching? -6. How do sharp turns and route disconnections affect VO systems? -7. What satellite imagery quality is available for the operational area? -8. What domain-specific acceptance criteria might be missing? - -## Timeliness Sensitivity Assessment - -- **Research Topic**: GPS-denied visual navigation using deep learning feature matching -- **Sensitivity Level**: 🟠 High -- **Rationale**: Deep learning feature matchers (SuperPoint, LightGlue, GIM) are evolving rapidly; new methods appear quarterly. Satellite imagery providers update pricing and coverage frequently. -- **Source Time Window**: 12 months (2024-2026) -- **Priority official sources to consult**: - 1. LightGlue GitHub repository (cvg/LightGlue) - 2. ORB-SLAM3 documentation - 3. Recent MDPI/IEEE papers on GPS-denied UAV navigation -- **Key version information to verify**: - - LightGlue: Current release and performance benchmarks - - SuperPoint: Compatibility and inference speed - - ORB-SLAM3: Monocular mode capabilities diff --git a/_docs/00_research/gps_denied_visual_nav/01_source_registry.md b/_docs/00_research/gps_denied_visual_nav/01_source_registry.md deleted file mode 100644 index be8fcfa..0000000 --- a/_docs/00_research/gps_denied_visual_nav/01_source_registry.md +++ /dev/null @@ -1,133 +0,0 @@ -# Source Registry - -## Source #1 -- **Title**: Visual Odometry in GPS-Denied Zones for Fixed-Wing UAV with Reduced Accumulative Error Based on Satellite Imagery -- **Link**: https://www.mdpi.com/2076-3417/14/16/7420 -- **Tier**: L1 -- **Publication Date**: 2024-08-22 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Fixed-wing UAV navigation researchers -- **Research Boundary Match**: ✅ Full match (fixed-wing, high altitude, satellite matching) -- **Summary**: VO + satellite image correction achieves 142.88m mean error over 17km at >1000m altitude using ORB + AKAZE. Uses IMU for heading and barometer for altitude. Error rate 0.83% of total distance. -- **Related Sub-question**: 1, 2 - -## Source #2 -- **Title**: Optimized visual odometry and satellite image matching-based localization for UAVs in GPS-denied environments (ITU Thesis) -- **Link**: https://polen.itu.edu.tr/items/1fe1e872-7cea-44d8-a8de-339e4587bee6 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV navigation researchers -- **Research Boundary Match**: ⚠️ Partial overlap (multirotor at 30-100m, but same VO+SIM methodology) -- **Summary**: ORB-SLAM3 + SuperPoint/SuperGlue/GIM achieves GPS-level accuracy. VO module: ±2m local accuracy. SIM module: 93% matching success rate. Demonstrated on DJI Mavic Air 2 at 30-100m. -- **Related Sub-question**: 1, 2, 4 - -## Source #3 -- **Title**: NaviLoc: Trajectory-Level Visual Localization for GNSS-Denied UAV Navigation -- **Link**: https://www.mdpi.com/2504-446X/10/2/97 -- **Tier**: L1 -- **Publication Date**: 2025-12 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV navigation / VPR researchers -- **Research Boundary Match**: ⚠️ Partial overlap (50-150m altitude, uses VIO not pure VO) -- **Summary**: Achieves 19.5m Mean Localization Error at 50-150m altitude. Runs at 9 FPS on Raspberry Pi 5. 16x improvement over AnyLoc-VLAD, 32x over raw VIO drift. Training-free system. -- **Related Sub-question**: 1, 7 - -## Source #4 -- **Title**: LightGlue: Local Feature Matching at Light Speed (GitHub + ICCV 2023) -- **Link**: https://github.com/cvg/LightGlue -- **Tier**: L1 -- **Publication Date**: 2023 (actively maintained through 2025) -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Computer vision practitioners -- **Research Boundary Match**: ✅ Full match (core component) -- **Summary**: ~20-34ms per image pair on RTX 2080Ti. Adaptive pruning for fast inference. 2-4x speedup with PyTorch compilation. -- **Related Sub-question**: 3, 4 - -## Source #5 -- **Title**: Efficient image matching for UAV visual navigation via DALGlue -- **Link**: https://www.nature.com/articles/s41598-025-21602-5 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV navigation researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: DALGlue achieves 11.8% improvement over LightGlue on matching accuracy. Uses dual-tree complex wavelet preprocessing + linear attention for real-time performance. -- **Related Sub-question**: 3, 4 - -## Source #6 -- **Title**: Deep-UAV SLAM: SuperPoint and SuperGlue enhanced SLAM -- **Link**: https://isprs-archives.copernicus.org/articles/XLVIII-1-W5-2025/177/2025/ -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV SLAM researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Replacing ORB-SLAM3's ORB features with SuperPoint+SuperGlue improved robustness and accuracy in aerial RGB scenarios. -- **Related Sub-question**: 4, 5 - -## Source #7 -- **Title**: SCAR: Satellite Imagery-Based Calibration for Aerial Recordings -- **Link**: https://arxiv.org/html/2602.16349v1 -- **Tier**: L1 -- **Publication Date**: 2026-02 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Aerial/satellite vision researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Long-term auto-calibration refinement by aligning aerial images with 2D-3D correspondences from orthophotos and elevation models. -- **Related Sub-question**: 1, 5 - -## Source #8 -- **Title**: Google Maps satellite imagery coverage and update frequency -- **Link**: https://ongeo-intelligence.com/blog/how-often-does-google-maps-update-satellite-images -- **Tier**: L3 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: GIS practitioners -- **Research Boundary Match**: ✅ Full match -- **Summary**: Conflict zones like eastern Ukraine face 2-5+ year update cycles. Imagery may be intentionally limited or blurred. -- **Related Sub-question**: 7 - -## Source #9 -- **Title**: Satellite Mapping Services comparison 2025 -- **Link**: https://ts2.tech/en/exploring-the-world-from-above-top-satellite-mapping-services-for-web-mobile-in-2025/ -- **Tier**: L3 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Developers, GIS practitioners -- **Research Boundary Match**: ✅ Full match -- **Summary**: Google: $200/month free credit, sub-meter resolution. Mapbox: Maxar imagery, generous free tier. Maxar SecureWatch: 30cm resolution, enterprise pricing. Planet: daily 3-4m imagery. -- **Related Sub-question**: 7 - -## Source #10 -- **Title**: Scale Estimation for Monocular Visual Odometry Using Reliable Camera Height -- **Link**: https://ieeexplore.ieee.org/document/9945178/ -- **Tier**: L1 -- **Publication Date**: 2022 -- **Timeliness Status**: ✅ Currently valid (fundamental method) -- **Target Audience**: VO researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Known camera height/altitude resolves scale ambiguity in monocular VO. Essential for systems without IMU. -- **Related Sub-question**: 2 - -## Source #11 -- **Title**: Cross-View Geo-Localization benchmarks (SSPT, MA metrics) -- **Link**: https://www.mdpi.com/1424-8220/24/12/3719 -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: VPR/geo-localization researchers -- **Research Boundary Match**: ⚠️ Partial overlap (general cross-view, not UAV-specific) -- **Summary**: SSPT achieved 84.40% RDS on UL14 dataset. MA improvements: +12% at 3m, +12% at 5m, +10% at 20m thresholds. -- **Related Sub-question**: 1 - -## Source #12 -- **Title**: ORB-SLAM3 GPU Acceleration Performance -- **Link**: https://arxiv.org/html/2509.10757v1 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: SLAM/VO engineers -- **Research Boundary Match**: ✅ Full match -- **Summary**: GPU acceleration achieves 2.8x speedup on desktop systems. 30 FPS achievable on Jetson TX2. Feature extraction up to 3x speedup with CUDA. -- **Related Sub-question**: 3 diff --git a/_docs/00_research/gps_denied_visual_nav/02_fact_cards.md b/_docs/00_research/gps_denied_visual_nav/02_fact_cards.md deleted file mode 100644 index 75736a1..0000000 --- a/_docs/00_research/gps_denied_visual_nav/02_fact_cards.md +++ /dev/null @@ -1,121 +0,0 @@ -# Fact Cards - -## Fact #1 -- **Statement**: VO + satellite image correction achieves ~142.88m mean error over 17km flight at >1000m altitude using ORB features and AKAZE satellite matching. Error rate: 0.83% of total distance. This system uses IMU for heading and barometer for altitude. -- **Source**: Source #1 — https://www.mdpi.com/2076-3417/14/16/7420 -- **Phase**: Phase 1 -- **Target Audience**: Fixed-wing UAV at high altitude (>1000m) -- **Confidence**: ✅ High (peer-reviewed, real-world flight data) -- **Related Dimension**: GPS accuracy, drift correction - -## Fact #2 -- **Statement**: ORB-SLAM3 monocular mode with optimized parameters achieves ±2m local accuracy for visual odometry. Scale ambiguity and drift remain for long flights. -- **Source**: Source #2 — ITU Thesis -- **Phase**: Phase 1 -- **Target Audience**: UAV navigation (30-100m altitude, multirotor) -- **Confidence**: ✅ High (thesis with experimental validation) -- **Related Dimension**: VO accuracy, scale ambiguity - -## Fact #3 -- **Statement**: Combined VO + Satellite Image Matching (SIM) with SuperPoint/SuperGlue/GIM achieves 93% matching success rate and "GPS-level accuracy" at 30-100m altitude. -- **Source**: Source #2 — ITU Thesis -- **Phase**: Phase 1 -- **Target Audience**: Low-altitude UAV (30-100m) -- **Confidence**: ✅ High -- **Related Dimension**: Registration rate, satellite matching - -## Fact #4 -- **Statement**: NaviLoc achieves 19.5m Mean Localization Error at 50-150m altitude, runs at 9 FPS on Raspberry Pi 5. 16x improvement over AnyLoc-VLAD. Training-free system. -- **Source**: Source #3 — NaviLoc paper -- **Phase**: Phase 1 -- **Target Audience**: Low-altitude UAV (50-150m) in rural areas -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: GPS accuracy, processing speed - -## Fact #5 -- **Statement**: LightGlue inference: ~20-34ms per image pair on RTX 2080Ti for 1024 keypoints. 2-4x speedup possible with PyTorch compilation and TensorRT. -- **Source**: Source #4 — LightGlue GitHub Issues -- **Phase**: Phase 1 -- **Target Audience**: All GPU-accelerated vision systems -- **Confidence**: ✅ High (official repository benchmarks) -- **Related Dimension**: Processing speed - -## Fact #6 -- **Statement**: SuperPoint+SuperGlue replacing ORB features in SLAM improves robustness and accuracy for aerial RGB imagery over classical handcrafted features. -- **Source**: Source #6 — ISPRS 2025 -- **Phase**: Phase 1 -- **Target Audience**: UAV SLAM researchers -- **Confidence**: ✅ High (peer-reviewed) -- **Related Dimension**: Feature matching quality - -## Fact #7 -- **Statement**: Eastern Ukraine / conflict zones may have 2-5+ year old satellite imagery on Google Maps. Imagery may be intentionally limited, blurred, or restricted for security reasons. -- **Source**: Source #8 -- **Phase**: Phase 1 -- **Target Audience**: Ukraine conflict zone operations -- **Confidence**: ⚠️ Medium (general reporting, not Ukraine-specific verification) -- **Related Dimension**: Satellite imagery quality - -## Fact #8 -- **Statement**: Maxar SecureWatch offers 30cm resolution with ~3M km² new imagery daily. Mapbox uses Maxar's Vivid imagery with sub-meter resolution. Google Maps offers sub-meter detail in urban areas but 1-3m in rural areas. -- **Source**: Source #9 -- **Phase**: Phase 1 -- **Target Audience**: All satellite imagery users -- **Confidence**: ✅ High -- **Related Dimension**: Satellite providers, cost - -## Fact #9 -- **Statement**: Known camera height/altitude resolves scale ambiguity in monocular VO. The pixel-to-meter conversion is s = H / f × sensor_pixel_size, enabling metric reconstruction without IMU. -- **Source**: Source #10 -- **Phase**: Phase 1 -- **Target Audience**: Monocular VO systems -- **Confidence**: ✅ High (fundamental geometric relationship) -- **Related Dimension**: No-IMU compensation - -## Fact #10 -- **Statement**: Camera heading (yaw) can be estimated from consecutive frame feature matching by decomposing the homography or essential matrix. Pitch/roll can be estimated from horizon detection or vanishing points. Without IMU, these estimates are noisier but functional. -- **Source**: Multiple vision-based heading estimation papers -- **Phase**: Phase 1 -- **Target Audience**: Vision-only navigation systems -- **Confidence**: ⚠️ Medium (well-established but accuracy varies) -- **Related Dimension**: No-IMU compensation - -## Fact #11 -- **Statement**: GSD at 400m with 25mm/23.5mm sensor/6252px = 6.01 cm/pixel. Ground footprint: 376m × 250m. At 100m photo interval, consecutive overlap is 60-73%. -- **Source**: Calculated from problem data using standard GSD formula -- **Phase**: Phase 1 -- **Target Audience**: This specific system -- **Confidence**: ✅ High (deterministic calculation) -- **Related Dimension**: Image coverage, overlap - -## Fact #12 -- **Statement**: GPU-accelerated ORB-SLAM3 achieves 2.8x speedup on desktop systems. 30 FPS possible on Jetson TX2. Feature extraction speedup up to 3x with CUDA-optimized pipelines. -- **Source**: Source #12 -- **Phase**: Phase 1 -- **Target Audience**: GPU-equipped systems -- **Confidence**: ✅ High -- **Related Dimension**: Processing speed - -## Fact #13 -- **Statement**: Without IMU, the Mateos-Ramirez paper (Source #1) would lose: (a) yaw angle for rotation compensation, (b) fallback when feature matching fails. Their 142.88m error would likely be significantly higher without IMU heading data. -- **Source**: Inference from Source #1 methodology -- **Phase**: Phase 1 -- **Target Audience**: This specific system -- **Confidence**: ⚠️ Medium (reasoned inference) -- **Related Dimension**: No-IMU impact - -## Fact #14 -- **Statement**: DALGlue achieves 11.8% improvement over LightGlue on matching accuracy while maintaining real-time performance through dual-tree complex wavelet preprocessing and linear attention. -- **Source**: Source #5 -- **Phase**: Phase 1 -- **Target Audience**: Feature matching systems -- **Confidence**: ✅ High (peer-reviewed, 2025) -- **Related Dimension**: Feature matching quality - -## Fact #15 -- **Statement**: Cross-view geo-localization benchmarks show MA@20m improving by +10% with latest methods (SSPT). RDS metric at 84.40% indicates reliable spatial positioning. -- **Source**: Source #11 -- **Phase**: Phase 1 -- **Target Audience**: Cross-view matching researchers -- **Confidence**: ✅ High -- **Related Dimension**: Cross-view matching accuracy diff --git a/_docs/00_research/gps_denied_visual_nav/03_comparison_framework.md b/_docs/00_research/gps_denied_visual_nav/03_comparison_framework.md deleted file mode 100644 index b0cb86e..0000000 --- a/_docs/00_research/gps_denied_visual_nav/03_comparison_framework.md +++ /dev/null @@ -1,115 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Decision Support (component-by-component solution comparison) - -## System Components -1. Visual Odometry (consecutive frame matching) -2. Satellite Image Geo-Referencing (cross-view matching) -3. Heading & Orientation Estimation (without IMU) -4. Drift Correction & Position Fusion -5. Segment Management & Route Reconnection -6. Interactive Point-to-GPS Lookup -7. Pipeline Orchestration & API - ---- - -## Component 1: Visual Odometry - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| ORB-SLAM3 monocular | ORB features, BA, map management | Mature, well-tested, handles loop closure. GPU-accelerated. 30FPS on Jetson TX2. | Scale ambiguity without IMU. Over-engineered for sequential aerial — map building not needed. Heavy dependency. | Medium — too complex for the use case | -| Homography-based VO with SuperPoint+LightGlue | SuperPoint, LightGlue, OpenCV homography | Ground plane assumption perfect for flat terrain at 400m. Cleanly separates rotation/translation. Known altitude resolves scale directly. Fast. | Assumes planar scene (valid for our case). Fails at sharp turns (but that's expected). | **Best fit** — matches constraints exactly | -| Optical flow VO | cv2.calcOpticalFlowPyrLK or RAFT | Dense motion field, no feature extraction needed. | Less accurate for large motions. Struggles with texture-sparse areas. No inherent rotation estimation. | Low — not suitable for 100m baselines | -| Direct method (SVO) | SVO Pro | Sub-pixel precision, fast. | Designed for small baselines and forward cameras. Poor for downward aerial at large baselines. | Low | - -**Selected**: Homography-based VO with SuperPoint + LightGlue features - ---- - -## Component 2: Satellite Image Geo-Referencing - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| SuperPoint + LightGlue cross-view matching | SuperPoint, LightGlue, perspective warp | Best overall performance on satellite stereo benchmarks. Fast (~50ms matching). Rotation-invariant. Handles viewpoint/scale changes. | Requires perspective warping to reduce viewpoint gap. Needs good satellite image quality. | **Best fit** — proven on satellite imagery | -| SuperPoint + SuperGlue + GIM | SuperPoint, SuperGlue, GIM | GIM adds generalization for challenging scenes. 93% match rate (ITU thesis). | SuperGlue slower than LightGlue. GIM adds complexity. | Good — slightly better robustness, slower | -| LoFTR (detector-free) | LoFTR | No keypoint detection step. Works on low-texture. | Slower than detector-based methods. Fixed resolution (coarse). Less accurate than SuperPoint+LightGlue on satellite benchmarks. | Medium — fallback option | -| DUSt3R/MASt3R | DUSt3R/MASt3R | Handles extreme viewpoints and low overlap. +50% completeness over COLMAP in sparse scenarios. | Very slow. Designed for 3D reconstruction not 2D matching. Unreliable with many images. | Low — only for extreme fallback | -| Terrain-weighted optimization (YFS90) | Custom pipeline + DEM | <7m MAE without IMU! Drift-free. Handles thermal IR. 20 scenarios validated. | Requires DEM data. More complex implementation. Not open-source matching details. | High — architecture inspiration | - -**Selected**: SuperPoint + LightGlue (primary) with perspective warping. GIM as supplementary for difficult matches. YFS90-style terrain-weighted sliding window for position optimization. - ---- - -## Component 3: Heading & Orientation Estimation - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| Homography decomposition (consecutive frames) | OpenCV decomposeHomographyMat | Directly gives rotation between frames. Works with ground plane assumption. No extra sensors needed. | Accumulates heading drift over time. Noisy for small motions. Ambiguous decomposition (need to select correct solution). | **Best fit** — primary heading source | -| Satellite matching absolute orientation | From satellite match homography | Provides absolute heading correction. Eliminates accumulated heading drift. | Only available when satellite match succeeds. Intermittent. | **Best fit** — drift correction for heading | -| Optical flow direction | Dense flow vectors | Simple to compute. | Very noisy at high altitude. Unreliable for heading. | Low | - -**Selected**: Homography decomposition for frame-to-frame heading + satellite matching for periodic absolute heading correction. - ---- - -## Component 4: Drift Correction & Position Fusion - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| Kalman filter (EKF/UKF) | filterpy or custom | Well-understood. Handles noisy measurements. Good for fusing VO + satellite. | Assumes Gaussian noise. Linearization issues with EKF. | Good — simple and effective | -| Sliding window optimization with terrain constraints | Custom optimization, scipy.optimize | YFS90 achieves <7m with this. Directly constrains drift. No loop closure needed. | More complex to implement. Needs tuning. | **Best fit** — proven for this exact problem | -| Pose graph optimization | g2o, GTSAM | Standard in SLAM. Handles satellite anchors as prior factors. Globally optimal. | Heavy dependency. Over-engineered if segments are short. | Medium — overkill unless routes are very long | -| Simple anchor reset | Direct correction at satellite match | Simplest. Just replace VO position with satellite position. | Discontinuous trajectory. No smoothing. | Low — too crude | - -**Selected**: Sliding window optimization with terrain constraints (inspired by YFS90), with Kalman filter as simpler fallback. Satellite matches as absolute anchor constraints. - ---- - -## Component 5: Segment Management & Route Reconnection - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| Segments-first architecture with satellite anchoring | Custom segment manager | Each segment independently geo-referenced. No dependency between disconnected segments. Natural handling of sharp turns. | Needs robust satellite matching per segment. Segments without any satellite match are "floating". | **Best fit** — matches AC requirement for core strategy | -| Global pose graph with loop closure | g2o/GTSAM | Can connect segments when they revisit same area. | Heavy. Doesn't help if segments don't overlap with each other. | Low — segments may not revisit same areas | -| Trajectory-level VPR (NaviLoc-style) | VPR + trajectory optimization | Global optimization across trajectory. | Requires pre-computed VPR database. Complex. Designed for continuous trajectory, not disconnected segments. | Low | - -**Selected**: Segments-first architecture. Each segment starts from a satellite anchor or user input. Segments connected through shared satellite coordinate frame. - ---- - -## Component 6: Interactive Point-to-GPS Lookup - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| Homography projection (image → ground) | Computed homography from satellite match | Already computed during geo-referencing. Accurate for flat terrain. | Only works for images with successful satellite match. | **Best fit** | -| Camera ray-casting with known altitude | Camera intrinsics + pose estimate | Works for any image with pose estimate. Simpler math. | Accuracy depends on pose estimate quality. | Good — fallback for non-satellite-matched images | - -**Selected**: Homography projection (primary) + ray-casting (fallback). - ---- - -## Component 7: Pipeline & API - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| Python FastAPI + SSE | FastAPI, EventSourceResponse, asyncio | Native SSE support (since 0.135.0). Async GPU pipeline. Excellent for ML/CV workloads. Rich ecosystem. | Python GIL (mitigated with async/multiprocessing). | **Best fit** — natural for CV/ML pipeline | -| .NET ASP.NET Core + SSE | ASP.NET Core, SignalR | High performance. Good for enterprise. | Less natural for CV/ML. Python interop needed for PyTorch models. Adds complexity. | Low — unnecessary indirection | -| Python + gRPC streaming | gRPC | Efficient binary protocol. Bidirectional streaming. | More complex client integration. No browser-native support. | Medium — overkill for this use case | - -**Selected**: Python FastAPI with SSE. - ---- - -## Google Maps Tile Resolution at Latitude 48° (Operational Area) - -| Zoom Level | Meters/pixel | Tile coverage (256px) | Tiles for 20km² | Download size est. | -|-----------|-------------|----------------------|-----------------|-------------------| -| 17 | 0.80 m/px | ~205m × 205m | ~500 tiles | ~20MB | -| 18 | 0.40 m/px | ~102m × 102m | ~2,000 tiles | ~80MB | -| 19 | 0.20 m/px | ~51m × 51m | ~8,000 tiles | ~320MB | -| 20 | 0.10 m/px | ~26m × 26m | ~30,000 tiles | ~1.2GB | - -Formula: metersPerPx = 156543.03 × cos(48° × π/180) / 2^zoom ≈ 104,771 / 2^zoom - -**Selected**: Zoom 18 (0.40 m/px) as primary matching resolution. Zoom 19 (0.20 m/px) for refinement if available. Meets the ≥0.5 m/pixel AC requirement. diff --git a/_docs/00_research/gps_denied_visual_nav/04_reasoning_chain.md b/_docs/00_research/gps_denied_visual_nav/04_reasoning_chain.md deleted file mode 100644 index 9c3ad06..0000000 --- a/_docs/00_research/gps_denied_visual_nav/04_reasoning_chain.md +++ /dev/null @@ -1,146 +0,0 @@ -# Reasoning Chain - -## Dimension 1: GPS Accuracy (50m/80%, 20m/60%) - -### Fact Confirmation -- YFS90 system achieves <7m MAE without IMU (Fact from Source DOAJ/GitHub) -- NaviLoc achieves 19.5m MLE at 50-150m altitude (Fact #4) -- Mateos-Ramirez achieves 143m mean error at >1000m altitude with IMU (Fact #1) -- Our GSD is 6cm/pixel at 400m altitude (Fact #11) -- ITU thesis achieves GPS-level accuracy with VO+SIM at 30-100m (Fact #3) - -### Reference Comparison -- At 400m altitude, our camera produces much higher resolution imagery than typical systems -- YFS90 at <7m without IMU is the strongest reference — uses terrain-weighted constraint optimization -- NaviLoc at 19.5m uses trajectory-level optimization but at lower altitude -- The combination of VO + satellite matching with sliding window optimization should achieve 10-30m depending on satellite image quality - -### Conclusion -- **50m / 80%**: High confidence achievable. Multiple systems achieve better than this. -- **20m / 60%**: Achievable with good satellite imagery. YFS90 achieves <7m. Our higher altitude makes cross-view matching harder, but 26MP camera compensates. -- **10m stretch**: Possible with zoom 19 satellite tiles (0.2m/px) and terrain-weighted optimization. - -### Confidence: ✅ High for 50m, ⚠️ Medium for 20m, ❓ Low for 10m - ---- - -## Dimension 2: No-IMU Heading Estimation - -### Fact Confirmation -- Homography decomposition gives rotation between frames for planar scenes (multiple sources) -- Ground plane assumption is valid for flat terrain (eastern Ukraine steppe) -- Satellite matching provides absolute orientation correction (Sources #1, #2) -- YFS90 achieves <7m without requiring IMU (Source #3 DOAJ) - -### Reference Comparison -- Most published systems use IMU for heading — our approach is less common -- YFS90 proves it's possible without IMU, but uses DEM data for terrain weighting -- The key insight: satellite matching provides both position AND heading correction, making intermittent heading drift from VO acceptable - -### Conclusion -Heading estimation from homography decomposition between consecutive frames + periodic satellite matching correction is viable. The frame-to-frame heading drift accumulates, but satellite corrections at regular intervals (every 5-20 frames) reset it. The flat terrain of the operational area makes the ground plane assumption reliable. - -### Confidence: ⚠️ Medium — novel approach but supported by YFS90 results - ---- - -## Dimension 3: Processing Speed (<5s per image) - -### Fact Confirmation -- LightGlue: ~20-50ms per pair (Fact #5) -- SuperPoint extraction: ~50-100ms per image -- GPU-accelerated ORB-SLAM3: 30 FPS (Fact #12) -- NaviLoc: 9 FPS on Raspberry Pi 5 (Fact #4) - -### Pipeline Time Budget Estimate (per image on RTX 2060) -1. SuperPoint feature extraction: ~80ms -2. LightGlue VO matching (vs previous frame): ~40ms -3. Homography estimation + position update: ~5ms -4. Satellite tile crop (from cache): ~10ms -5. SuperPoint extraction on satellite crop: ~80ms -6. LightGlue satellite matching: ~60ms -7. Position correction + sliding window optimization: ~20ms -8. Total: ~295ms ≈ 0.3s - -### Conclusion -Processing comfortably fits within 5s budget. Even with additional overhead (satellite tile download, perspective warping, GIM fallback), the pipeline stays under 2s. The 5s budget provides ample margin. - -### Confidence: ✅ High - ---- - -## Dimension 4: Sharp Turns & Route Disconnection - -### Fact Confirmation -- At <5% overlap, consecutive feature matching will fail -- Satellite matching can provide absolute position independently of VO -- DUSt3R/MASt3R handle extreme low overlap (+50% completeness vs COLMAP) -- YFS90 handles positioning failures with re-localization - -### Reference Comparison -- Traditional VO systems fail at sharp turns — this is expected and acceptable -- The segments-first architecture treats each continuous VO chain as a segment -- Satellite matching re-localizes at the start of each new segment -- If satellite matching fails too → wider search area → user input - -### Conclusion -The system should not try to match across sharp turns. Instead: -1. Detect VO failure (low match count / high reprojection error) -2. Start new segment -3. Attempt satellite geo-referencing for new segment start -4. Each segment is independently positioned in the global satellite coordinate frame - -This is architecturally simpler and more robust than trying to bridge disconnections. - -### Confidence: ✅ High - ---- - -## Dimension 5: Satellite Image Matching Reliability - -### Fact Confirmation -- Google Maps at zoom 18: 0.40 m/px at lat 48° — meets AC requirement -- Eastern Ukraine imagery may be 2-5 years old (Fact #7) -- SuperPoint+LightGlue is best performer for satellite matching (Source comparison study) -- Perspective warping improves cross-view matching significantly -- 93% match rate achieved in ITU thesis (Fact #3) - -### Reference Comparison -- The main risk is satellite image freshness in conflict zone -- Natural terrain features (rivers, forests, field boundaries) are relatively stable over years -- Man-made features (buildings, roads) may change due to conflict -- Agricultural field patterns change seasonally - -### Conclusion -Satellite matching will work reliably in areas with stable natural features. Performance degrades in: -1. Areas with significant conflict damage (buildings destroyed) -2. Areas with seasonal agricultural changes -3. Areas with very homogeneous texture (large uniform fields) - -Mitigation: use multiple scale levels, widen search area, accept lower confidence. - -### Confidence: ⚠️ Medium — depends heavily on operational area characteristics - ---- - -## Dimension 6: Architecture Selection - -### Fact Confirmation -- YFS90 architecture (VO + satellite matching + terrain-weighted optimization) achieves <7m -- ITU thesis architecture (ORB-SLAM3 + SIM) achieves GPS-level accuracy -- NaviLoc architecture (VPR + trajectory optimization) achieves 19.5m - -### Reference Comparison -- YFS90 is closest to our requirements: no IMU, satellite matching, drift correction -- Our system adds: segment management, real-time streaming, user fallback -- We need simpler VO than ORB-SLAM3 (no map building needed) -- We need faster matching than SuperGlue (LightGlue preferred) - -### Conclusion -Hybrid architecture combining: -- YFS90-style sliding window optimization for drift correction -- SuperPoint + LightGlue for both VO and satellite matching (unified feature pipeline) -- Segments-first architecture for disconnection handling -- FastAPI + SSE for real-time streaming - -### Confidence: ✅ High diff --git a/_docs/00_research/gps_denied_visual_nav/05_validation_log.md b/_docs/00_research/gps_denied_visual_nav/05_validation_log.md deleted file mode 100644 index 8f96b35..0000000 --- a/_docs/00_research/gps_denied_visual_nav/05_validation_log.md +++ /dev/null @@ -1,57 +0,0 @@ -# Validation Log - -## Validation Scenario -Using the provided sample data: 60 consecutive images from a flight starting at (48.275292, 37.385220) heading generally south-southwest. Camera: 26MP at 400m altitude. - -## Expected Behavior Based on Conclusions - -### Normal consecutive frames (AD000001-AD000032) -- VO successfully matches consecutive frames (60-73% overlap) -- Satellite matching every 5-10 frames provides absolute correction -- Position error stays within 20-50m corridor around ground truth -- Heading estimated from homography, corrected by satellite matching - -### Apparent maneuver zone (AD000033-AD000048) -- The coordinates show the UAV making a complex turn around images 33-48 -- Some consecutive pairs may have low overlap → VO quality drops -- Satellite matching becomes the primary position source -- New segments may be created if VO fails completely -- Position confidence drops in this zone - -### Return to straight flight (AD000049-AD000060) -- VO re-establishes strong consecutive matching -- Satellite matching re-anchors position -- Accuracy returns to normal levels - -## Actual Validation (Calculated) - -Distances between consecutive samples in the data: -- AD000001→002: ~180m (larger than stated 100m — likely exaggeration in problem description) -- AD000002→003: ~115m -- Typical gap: 80-180m -- At 376m footprint width and 250m height, even 180m gap gives 52-73% overlap → sufficient for VO - -At the turn zone (images 33-48): -- AD000041→042: ~230m with direction change → overlap may drop to 30-40% -- AD000042→043: ~230m with direction change → overlap may drop significantly -- AD000045→046: ~160m with direction change → may be <20% overlap -- These transitions are where VO may fail → satellite matching needed - -## Counterexamples - -1. **Homogeneous terrain**: If a section of the flight is over large uniform agricultural fields with no distinguishing features, both VO and satellite matching may fail. Mitigation: use higher zoom satellite tiles, rely on VO with lower confidence. - -2. **Conflict-damaged area**: If satellite imagery shows pre-war structures that no longer exist, satellite matching will produce incorrect position estimates. Mitigation: confidence scoring will flag inconsistent matches. - -3. **FullHD resolution flight**: At GSD 20cm/pixel instead of 6cm, matching quality degrades ~3x. The 50m target may still be achievable but 20m will be very difficult. - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Issue found: The problem states "within 100 meters of each other" but actual data shows 80-230m. Pipeline must handle larger baselines. -- [x] Issue found: Tile download strategy needs to handle unknown route direction — progressive expansion needed. - -## Conclusions Requiring Revision -- Photo spacing is 80-230m not strictly 100m — increases the range of overlap variations. Still functional but wider variance than assumed. -- Route direction is unknown at start — satellite tile pre-loading must use expanding radius strategy, not directional pre-loading. diff --git a/_docs/00_research/solution_completeness_assessment/00_question_decomposition.md b/_docs/00_research/solution_completeness_assessment/00_question_decomposition.md deleted file mode 100644 index 82c1071..0000000 --- a/_docs/00_research/solution_completeness_assessment/00_question_decomposition.md +++ /dev/null @@ -1,73 +0,0 @@ -# Question Decomposition - -## Original Question -"Analyze completeness of the current solution. How mature is it?" - -## Active Mode -**Mode B: Solution Assessment** — 5 solution drafts exist (`solution_draft01.md` through `solution_draft05.md`). Assessing the latest draft (05) for completeness and maturity. - -## Question Type Classification -**Knowledge Organization + Problem Diagnosis** -- Knowledge Organization: systematically map what a complete GPS-denied nav system requires vs what is present -- Problem Diagnosis: identify gaps, weak points, and missing elements that reduce maturity - -## Research Subject Boundary Definition - -| Dimension | Boundary | -|-----------|----------| -| **Population** | Fixed-wing UAV GPS-denied visual navigation systems using visual odometry + satellite matching + IMU fusion | -| **Geography** | Eastern/southern Ukraine conflict zone operations | -| **Timeframe** | Current state of art (2024-2026), focusing on Jetson-class embedded deployment | -| **Level** | System-level architecture completeness — from sensor input to flight controller output | - -## Problem Context Summary - -The solution is a real-time GPS-denied visual navigation system for a custom 3.5m fixed-wing UAV: -- **Hardware**: Jetson Orin Nano Super (8GB), ADTI 20L V1 camera (0.7fps), Viewpro A40 Pro gimbal, Pixhawk 6x -- **Core pipeline**: cuVSLAM VO (0.7fps) → ESKF fusion → GPS_INPUT via pymavlink at 5-10Hz -- **Satellite correction**: LiteSAM/EfficientLoFTR/XFeat TRT FP16 on keyframes, async Stream B -- **5 draft iterations**: progressed from initial architecture → TRT migration → camera rate correction + UAV platform specs -- **Supporting docs**: tech_stack.md, security_analysis.md - -## Decomposed Sub-Questions (Mode B) - -### Functional Completeness -- **SQ-1**: What components does a mature GPS-denied visual navigation system require that are missing or under-specified in the current draft? -- **SQ-2**: How complete is the ESKF sensor fusion specification? (state vector, process model, measurement models, Q/R tuning, observability analysis) -- **SQ-3**: How does the system handle disconnected route segments (sharp turns with no overlap)? Is this adequately specified? -- **SQ-4**: What coordinate system transformations are needed (camera → body → NED → WGS84) and are they specified? -- **SQ-5**: How does the system handle initial localization (first frame + satellite matching bootstrap)? -- **SQ-6**: Is the re-localization request workflow (to ground station) sufficiently defined? -- **SQ-7**: How complete is the offline tile preparation pipeline (zoom levels, storage requirements, coverage calculation)? -- **SQ-8**: Is the object localization component sufficiently specified for operational use? - -### Performance & Robustness -- **SQ-9**: What are the realistic drift characteristics of cuVSLAM at 0.7fps over long straight segments? -- **SQ-10**: How robust is satellite matching with Google Maps imagery in the operational area? -- **SQ-11**: What happens during extended periods with no satellite match (cloud cover on tiles, homogeneous terrain)? -- **SQ-12**: Is the 5-10Hz GPS_INPUT rate adequate for the flight controller's EKF? - -### Maturity Assessment -- **SQ-13**: What is the Technology Readiness Level (TRL) of each component? -- **SQ-14**: What validation/testing has been done vs what is only planned? -- **SQ-15**: What operational procedures are missing (pre-flight checklist, in-flight monitoring, post-flight analysis)? -- **SQ-16**: Are there any inconsistencies between documents (tech_stack.md, security_analysis.md, solution_draft05.md)? - -### Security -- **SQ-17**: Are there security gaps not covered by the existing security_analysis.md? -- **SQ-18**: How does the MAVLink GPS_INPUT message security work (spoofing of the GPS replacement itself)? - -## Timeliness Sensitivity Assessment - -- **Research Topic**: GPS-denied visual navigation system completeness and maturity -- **Sensitivity Level**: 🟡 Medium -- **Rationale**: Core algorithms (VO, ESKF, feature matching) are well-established. Hardware (Jetson Orin Nano Super) is relatively new but stable. cuVSLAM library updates are moderate pace. No rapidly-changing AI/LLM dependencies. -- **Source Time Window**: 1-2 years -- **Priority official sources to consult**: - 1. NVIDIA cuVSLAM / Isaac ROS documentation - 2. PX4/ArduPilot MAVLink GPS_INPUT documentation - 3. LiteSAM / EfficientLoFTR papers and repos -- **Key version information to verify**: - - cuVSLAM: PyCuVSLAM v15.0.0 - - TensorRT: 10.3.0 - - JetPack: 6.2.2 diff --git a/_docs/00_research/solution_completeness_assessment/01_source_registry.md b/_docs/00_research/solution_completeness_assessment/01_source_registry.md deleted file mode 100644 index ee39d1a..0000000 --- a/_docs/00_research/solution_completeness_assessment/01_source_registry.md +++ /dev/null @@ -1,166 +0,0 @@ -# Source Registry - -## Source #1 -- **Title**: ArduPilot AP_GPS_Params — GPS_RATE minimum 5Hz -- **Link**: https://github.com/ArduPilot/ardupilot/pull/15980 -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: ArduPilot flight controller users -- **Research Boundary Match**: ✅ Full match -- **Summary**: ArduPilot enforces minimum 5Hz GPS update rate. GPS_RATE parameter description: "Lowering below 5Hz(default) is not allowed." -- **Related Sub-question**: SQ-12 - -## Source #2 -- **Title**: MAVLink GPS_INPUT Message Definition -- **Link**: https://ardupilot.org/mavproxy/docs/modules/GPSInput.html -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: MAVLink developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: GPS_INPUT requires: lat, lon, alt, fix_type, hdop, vdop, horiz_accuracy, vert_accuracy, speed_accuracy, vn, ve, vd, time_usec, time_week, time_week_ms, satellites_visible, gps_id, ignore_flags. GPS_TYPE=14 for MAVLink GPS. -- **Related Sub-question**: SQ-6, SQ-12 - -## Source #3 -- **Title**: pymavlink GPS_INPUT example (GPS_INPUT_pymavlink.py) -- **Link**: https://webperso.ensta.fr/lebars/Share/GPS_INPUT_pymavlink.py -- **Tier**: L3 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: pymavlink developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Working pymavlink example sending GPS_INPUT over serial at 10Hz with GPS time calculation from system time. -- **Related Sub-question**: SQ-6 - -## Source #4 -- **Title**: PyCuVSLAM API Reference (v15.0.0) -- **Link**: https://wiki.seeedstudio.com/pycuvslam_recomputer_robotics/ -- **Tier**: L2 -- **Publication Date**: 2026-03 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: cuVSLAM developers on Jetson -- **Research Boundary Match**: ✅ Full match -- **Summary**: cuVSLAM supports mono/stereo/inertial modes. Requires Camera model (fx,fy,cx,cy,distortion), ImuCalibration (noise density, random walk, frequency, T_imu_rig). Modes: Performance/Precision/Moderate. IMU fallback ~1s acceptable quality. -- **Related Sub-question**: SQ-1, SQ-5, SQ-9 - -## Source #5 -- **Title**: ESKF Python implementation for fixed-wing UAV -- **Link**: https://github.com/ludvigls/ESKF -- **Tier**: L4 -- **Publication Date**: 2023 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: ESKF implementers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Reference ESKF: 16-state vector (pos[3], vel[3], quat[4], acc_bias[3], gyro_bias[3]). Prediction with IMU at high rate. Update with GPS position/velocity. Tuning parameters: Q (process noise), R (measurement noise). -- **Related Sub-question**: SQ-2 - -## Source #6 -- **Title**: ROS ESKF based on PX4/ecl — multi-sensor fusion -- **Link**: https://github.com/EliaTarasov/ESKF -- **Tier**: L4 -- **Publication Date**: 2022 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV ESKF implementers -- **Research Boundary Match**: ✅ Full match -- **Summary**: ESKF fusing GPS, Magnetometer, Vision Pose, Optical Flow, RangeFinder with IMU. Shows that vision pose and optical flow are separate measurement models, each with its own observation matrix and noise parameters. -- **Related Sub-question**: SQ-2 - -## Source #7 -- **Title**: Visual-Inertial Odometry Scale Observability (Range-VIO) -- **Link**: https://arxiv.org/abs/2103.15215 -- **Tier**: L1 -- **Publication Date**: 2021 -- **Timeliness Status**: ✅ Currently valid (fundamental research) -- **Target Audience**: VIO researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Monocular VIO cannot observe metric scale without accelerometer excitation (not constant velocity). A 1D range sensor makes scale observable. For our case, barometric altitude + known flight altitude provides this constraint. -- **Related Sub-question**: SQ-2, SQ-4 - -## Source #8 -- **Title**: NaviLoc: Trajectory-Level Visual Localization for GNSS-Denied UAVs -- **Link**: https://www.mdpi.com/2504-446X/10/2/97 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: GPS-denied UAV navigation researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Trajectory-level optimization fusing VPR with VIO achieves 19.5m mean error at 50-150m altitude. Key insight: treating satellite matching as noisy measurement rather than ground truth, with trajectory-level optimization. Runs at 9 FPS on RPi 5. -- **Related Sub-question**: SQ-3, SQ-13 - -## Source #9 -- **Title**: SatLoc-Fusion: Hierarchical Adaptive Fusion Framework -- **Link**: https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: GPS-denied UAV researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Three-layer fusion: absolute geo-localization (DinoV2), relative VO (XFeat), optical flow velocity. Adaptive weighting based on confidence. Achieves <15m error, >90% trajectory coverage. 2Hz on 6 TFLOPS edge. -- **Related Sub-question**: SQ-3, SQ-10, SQ-13 - -## Source #10 -- **Title**: Auterion GPS-Denied Workflow -- **Link**: https://docs.auterion.com/vehicle-operation/auterion-mission-control/useful-resources/operations/gps-denied-workflow -- **Tier**: L2 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: UAV operators -- **Research Boundary Match**: ⚠️ Partial overlap (multirotor focus, but procedures applicable) -- **Summary**: Pre-flight: manually set home position, reset heading/position, configure wind. In-flight: enable INS mode. Defines operational procedures for GPS-denied missions. -- **Related Sub-question**: SQ-15 - -## Source #11 -- **Title**: PX4 GNSS-Degraded & Denied Flight (Dead-Reckoning) -- **Link**: https://docs.px4.io/main/en/advanced_config/gnss_degraded_or_denied_flight.html -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: PX4 users -- **Research Boundary Match**: ⚠️ Partial overlap (PX4-specific, but concepts apply to ArduPilot) -- **Summary**: GPS-denied requires redundant position/velocity sensors. Dead-reckoning mode for intermittent GNSS loss. Defines failsafe behaviors when GPS is lost. -- **Related Sub-question**: SQ-15 - -## Source #12 -- **Title**: Google Maps Ukraine satellite imagery coverage -- **Link**: https://newsukraine.rbc.ua/news/google-maps-has-surprise-for-satellite-imagery-1727182380.html -- **Tier**: L3 -- **Publication Date**: 2024-09 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: General public -- **Research Boundary Match**: ✅ Full match -- **Summary**: Google Maps improved imagery quality with Cloud Score+ AI. However, conflict zone imagery is intentionally older (>1 year). Ukrainian officials flagged security concerns about imagery revealing military positions. -- **Related Sub-question**: SQ-10 - -## Source #13 -- **Title**: Jetson Orin Nano Super thermal behavior at 25W -- **Link**: https://edgeaistack.app/blog/jetson-orin-nano-power-consumption/ -- **Tier**: L3 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Jetson developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Thermal throttling at SoC junction >80°C. Sustained GPU at 25W: ~50-51°C reported. Active cooling required for >15W. Most production workloads 8-15W. -- **Related Sub-question**: SQ-11 - -## Source #14 -- **Title**: Automated Image Matching for Satellite Images with Different GSDs -- **Link**: https://www.kjrs.org/journal/view.html?pn=related&uid=756&vmd=Full -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Remote sensing researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: GSD mismatch between satellite and aerial images requires scale normalization via subsampling/super-resolution. Coarse-to-fine matching strategy effective. Scale-invariant features (SIFT, deep features) partially handle scale differences. -- **Related Sub-question**: SQ-7 - -## Source #15 -- **Title**: Optimized VO and satellite image matching for UAVs (Istanbul Tech thesis) -- **Link**: https://polen.itu.edu.tr/items/1fe1e872-7cea-44d8-a8de-339e4587bee6 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: GPS-denied UAV researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Complete VO+satellite matching pipeline. Coordinate transforms: GPS → local NED for trajectory comparison. PnP solver for UAV pose from correspondences. Map retrieval using VO-estimated position to crop satellite tiles. -- **Related Sub-question**: SQ-4, SQ-5 diff --git a/_docs/00_research/solution_completeness_assessment/02_fact_cards.md b/_docs/00_research/solution_completeness_assessment/02_fact_cards.md deleted file mode 100644 index 97c850e..0000000 --- a/_docs/00_research/solution_completeness_assessment/02_fact_cards.md +++ /dev/null @@ -1,169 +0,0 @@ -# Fact Cards - -## Fact #1 — ArduPilot minimum GPS rate is 5Hz -- **Statement**: ArduPilot enforces a hard minimum of 5Hz for GPS_INPUT updates. The GPS_RATE parameter description states: "Lowering below 5Hz(default) is not allowed." The EKF scales buffers based on this rate. -- **Source**: Source #1 (ArduPilot AP_GPS_Params) -- **Phase**: Assessment -- **Target Audience**: ArduPilot-based flight controllers -- **Confidence**: ✅ High -- **Related Dimension**: Flight controller integration completeness - -## Fact #2 — GPS_INPUT requires velocity + accuracy + GPS time fields -- **Statement**: GPS_INPUT message requires not just lat/lon/alt, but also: vn/ve/vd velocity components, hdop/vdop, horiz_accuracy/vert_accuracy/speed_accuracy, fix_type, time_week/time_week_ms, satellites_visible, and ignore_flags bitmap. The solution draft05 mentions GPS_INPUT but does not specify how these fields are populated (especially velocity from ESKF, accuracy from covariance, GPS time conversion from system time). -- **Source**: Source #2 (MAVLink GPS_INPUT definition), Source #3 (pymavlink example) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Flight controller integration completeness - -## Fact #3 — ESKF state vector needs explicit definition -- **Statement**: Standard ESKF for UAV VIO fusion uses 15-16 state error vector: δp[3], δv[3], δθ[3] (attitude error in so(3)), δba[3] (accel bias), δbg[3] (gyro bias), optionally δg[3] (gravity). The solution draft05 says "16-state vector" and "ESKF + buffers ~10MB" but never defines the actual state vector, process model (F, Q matrices), measurement models (H matrices for VO and satellite), or noise parameters. -- **Source**: Source #5 (ludvigls/ESKF), Source #6 (EliaTarasov/ESKF) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Sensor fusion completeness - -## Fact #4 — Monocular VIO has scale ambiguity without excitation -- **Statement**: Monocular visual-inertial odometry cannot observe metric scale during constant-velocity flight (zero accelerometer excitation). This is a fundamental observability limitation. The solution uses monocular cuVSLAM + IMU, and fixed-wing UAVs fly mostly at constant velocity. Scale must be provided externally — via known altitude (barometric + predefined mission altitude) or satellite matching absolute position. -- **Source**: Source #7 (Range-VIO scale observability paper) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Sensor fusion completeness, core algorithm correctness - -## Fact #5 — cuVSLAM requires explicit camera calibration and IMU calibration -- **Statement**: PyCuVSLAM requires Camera(fx, fy, cx, cy, width, height) + Distortion model + ImuCalibration(gyroscope_noise_density, gyroscope_random_walk, accelerometer_noise_density, accelerometer_random_walk, frequency, T_imu_rig). The solution draft05 does not specify any camera calibration procedure, IMU noise parameters, or the T_imu_rig (IMU-to-camera) extrinsic transformation. -- **Source**: Source #4 (PyCuVSLAM docs) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Visual odometry completeness - -## Fact #6 — cuVSLAM IMU fallback provides ~1s acceptable tracking -- **Statement**: When visual tracking fails (featureless terrain, darkness), cuVSLAM falls back to IMU-only integration which provides "approximately 1 second" of acceptable tracking quality before drift becomes unacceptable. After that, tracking is lost. -- **Source**: Source #4 (PyCuVSLAM/Isaac ROS docs) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Resilience, edge case handling - -## Fact #7 — Disconnected route segments need satellite re-localization -- **Statement**: When a UAV makes a sharp turn and the next photos have no overlap with previous frames, cuVSLAM will lose tracking. The solution must re-localize using satellite imagery. The AC requires handling "more than 2 such disconnected segments" as a core strategy. Solution draft05 mentions this requirement but does not define the concrete re-localization algorithm (how satellite match triggers, how the new position is initialized in ESKF, how the map is connected to the previous segment). -- **Source**: Source #8 (NaviLoc), Source #9 (SatLoc-Fusion), AC requirements -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Functional completeness — disconnected segments - -## Fact #8 — Coordinate transformation chain is undefined -- **Statement**: The system needs a well-defined coordinate transformation chain: (1) pixel coordinates → camera frame (using intrinsics), (2) camera frame → body frame (camera mount extrinsics), (3) body frame → NED frame (using attitude from ESKF), (4) NED → WGS84 (using reference point). For satellite matching: geo-referenced tile coordinates → WGS84. For object localization: pixel + camera angle + altitude → ground point → WGS84. None of these transformations are explicitly defined in draft05. -- **Source**: Source #15 (Istanbul Tech thesis), Source #7 -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Coordinate system completeness - -## Fact #9 — GSD normalization required for satellite-aerial matching -- **Statement**: Camera GSD at 600m altitude with ADTI 20L V1 (16mm, APS-C) is ~15.9 cm/pixel. Google Maps zoom 19 ≈ 0.3 m/pixel, zoom 18 ≈ 0.6 m/pixel. The GSD ratio is ~2:1 to ~4:1 depending on zoom level and altitude. Draft05's "pre-resize" step in the offline pipeline is mentioned but not specified: what resolution? what zoom level? The matching model (LiteSAM/XFeat) input size must match appropriately. -- **Source**: Source #14 (GSD matching paper), solution_draft05 calculations -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Satellite matching completeness - -## Fact #10 — Google Maps imagery in conflict zones is intentionally outdated -- **Statement**: Google Maps deliberately serves older imagery (>1 year) for conflict zones in Ukraine. Ukrainian officials have flagged security concerns. The operational area (eastern/southern Ukraine) is directly in the conflict zone. Imagery may be 1-3+ years old, with seasonal differences (summer tiles vs winter flight, or vice versa). This is a HIGH-severity gap for satellite matching accuracy. -- **Source**: Source #12 (Google Maps Ukraine coverage) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Satellite imagery quality risk - -## Fact #11 — No operational procedures defined -- **Statement**: Mature GPS-denied systems (Auterion, PX4) define: pre-flight checklist (set home position, verify sensors, verify tile coverage), in-flight monitoring procedures (what to watch, when to intervene), and post-flight analysis (compare estimated vs actual GPS on return). Solution draft05 has no operational procedures section. -- **Source**: Source #10 (Auterion), Source #11 (PX4) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system operators -- **Confidence**: ✅ High -- **Related Dimension**: Operational maturity - -## Fact #12 — Object localization lacks implementation detail -- **Statement**: AC requires: "Other onboard AI systems can request GPS coordinates of objects detected by the AI camera." The solution says "trigonometric calculation using UAV GPS position, camera angle, zoom, altitude." But no API is defined, no coordinate math is shown, no handling of camera zoom/angle → ground projection is specified. The Viewpro A40 Pro gimbal angle and zoom parameters are not integrated. -- **Source**: Acceptance criteria, solution_draft05 -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Object localization completeness - -## Fact #13 — Tech stack document is inconsistent with draft05 -- **Statement**: tech_stack.md says "camera @ ~3fps" in non-functional requirements. Draft05 corrected this to 0.7fps. tech_stack.md lists LiteSAM benchmark decision at 480px/640px/800px; draft05 uses 1280px. tech_stack.md doesn't mention EfficientLoFTR as fallback. These inconsistencies indicate the tech_stack.md was not updated after draft05 changes. -- **Source**: tech_stack.md, solution_draft05.md -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Document consistency, maturity - -## Fact #14 — Confidence scoring is undefined in draft05 -- **Statement**: Draft05 says "Confidence Scoring → GPS_INPUT Mapping — Unchanged from draft03" but draft05 is supposed to be self-contained. The actual confidence scoring logic (how VO confidence + satellite match confidence map to GPS_INPUT fix_type, hdop, horiz_accuracy) is never defined in the current draft. This is critical because ArduPilot's EKF uses these accuracy fields to weight the GPS data. -- **Source**: Source #2 (GPS_INPUT fields), solution_draft05 -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Flight controller integration, confidence scoring - -## Fact #15 — Initial bootstrap sequence is incomplete -- **Statement**: Draft05 startup reads GLOBAL_POSITION_INT to get initial GPS position. But: (1) cuVSLAM needs its first frame + features to initialize — how is the first satellite match triggered? (2) ESKF needs initial state — position from GPS, but velocity? attitude? (3) How does the system know GPS is denied and should start sending GPS_INPUT? (4) Is there a handoff protocol from real GPS to GPS-denied system? -- **Source**: solution_draft05, Source #10 (Auterion procedures) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Startup and bootstrap completeness - -## Fact #16 — No recovery from companion computer reboot -- **Statement**: AC requires: "On companion computer reboot mid-flight, the system should attempt to re-initialize from the flight controller's current IMU-extrapolated position." Draft05 does not address this scenario. The system needs: read current FC position estimate, re-initialize ESKF, reload TRT engines (~1-3s), start cuVSLAM with no prior map, trigger immediate satellite re-localization. -- **Source**: Acceptance criteria, solution_draft05 -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Resilience, failsafe completeness - -## Fact #17 — No position refinement mechanism -- **Statement**: AC states: "The system may refine previously calculated positions and send corrections to the flight controller as updated estimates." Draft05 does not define how this works. When a satellite match provides an absolute correction, do previously estimated positions get retroactively corrected? Is this communicated to the flight controller? How? -- **Source**: Acceptance criteria, solution_draft05 -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ⚠️ Medium (AC is ambiguous about necessity) -- **Related Dimension**: Position refinement - -## Fact #18 — Tile storage requirements not calculated -- **Statement**: The solution mentions "preload tiles ±2km" and "GeoHash-indexed directory" but never calculates: how many tiles are needed for a mission area, what storage space is required, what zoom levels to use, or how to handle the trade-off between tile coverage area and storage limit. At zoom 19 (~0.3m/pixel), each 256×256 tile covers ~77m × 77m. Covering a 200km flight path with ±2km buffer would require ~130,000 tiles (~2.5GB JPEG). -- **Source**: solution_draft05, tech_stack.md -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ⚠️ Medium (estimates based on standard tile sizes) -- **Related Dimension**: Offline preparation completeness - -## Fact #19 — 3 consecutive failed frames → re-localization request undefined -- **Statement**: AC requires: "If system cannot determine position of 3 consecutive frames by any means, send re-localization request to ground station operator via telemetry link." Draft05 does not define: (1) the re-localization request message format, (2) what "any means" includes (VO failed + satellite match failed + IMU drift exceeded threshold?), (3) how the operator response is received and applied, (4) what the system does while waiting. -- **Source**: Acceptance criteria, solution_draft05 -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: Ground station integration, resilience - -## Fact #20 — FastAPI endpoints mentioned but not defined -- **Statement**: Draft05 mentions FastAPI for "local IPC" but the REST API endpoints are only defined in security_analysis.md (POST /sessions, GET /sessions/{id}/stream, POST /sessions/{id}/anchor, DELETE /sessions/{id}). The solution draft itself doesn't specify the API contract, request/response schemas, or how other onboard systems interact. -- **Source**: solution_draft05, security_analysis.md -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ✅ High -- **Related Dimension**: API completeness - -## Fact #21 — NaviLoc achieves 19.5m error with trajectory-level optimization -- **Statement**: Recent research (NaviLoc, 2025) shows that trajectory-level optimization treating satellite matching as noisy measurement achieves 19.5m mean error at 50-150m altitude, 16× better than per-frame matching. The solution's approach of per-keyframe satellite matching + ESKF correction is simpler but potentially less accurate than trajectory-level optimization. -- **Source**: Source #8 (NaviLoc) -- **Phase**: Assessment -- **Target Audience**: GPS-denied system -- **Confidence**: ⚠️ Medium (NaviLoc operates at lower altitude with higher overlap) -- **Related Dimension**: Algorithm maturity, accuracy potential diff --git a/_docs/00_research/solution_completeness_assessment/03_comparison_framework.md b/_docs/00_research/solution_completeness_assessment/03_comparison_framework.md deleted file mode 100644 index ac34d22..0000000 --- a/_docs/00_research/solution_completeness_assessment/03_comparison_framework.md +++ /dev/null @@ -1,63 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Knowledge Organization + Problem Diagnosis — mapping completeness dimensions against solution state - -## Completeness Dimensions - -1. Core Pipeline Definition -2. Sensor Fusion (ESKF) Specification -3. Visual Odometry Configuration -4. Satellite Image Matching Pipeline -5. Coordinate System & Transformations -6. Flight Controller Integration (GPS_INPUT) -7. Disconnected Route Segment Handling -8. Startup, Bootstrap & Failsafe -9. Object Localization -10. Offline Preparation Pipeline -11. Ground Station Integration -12. Operational Procedures -13. API & Inter-system Communication -14. Document Consistency -15. Testing Coverage vs AC - -## Maturity Scoring - -| Score | Level | Description | -|-------|-------|-------------| -| 1 | Concept | Mentioned but not specified | -| 2 | Defined | Architecture-level description, component selected | -| 3 | Detailed | Implementation details, data flows, algorithms specified | -| 4 | Validated | Benchmarked, tested, edge cases handled | -| 5 | Production | Field-tested, operational procedures, monitoring | - -## Completeness Assessment Matrix - -| Dimension | Current State | Maturity | Key Gaps | Facts | -|-----------|--------------|----------|----------|-------| -| Core Pipeline | VO → ESKF → GPS_INPUT flow well-defined. Dual CUDA stream architecture. Camera rate corrected to 0.7fps. Time budgets calculated. | 3 | Self-contained — but references "unchanged from draft03" in several places instead of restating | — | -| ESKF Specification | "16-state vector", "ESKF + buffers ~10MB", "ESKF measurement update" | 1.5 | No state vector definition, no process model (F,Q), no measurement models (H for VO, H for satellite), no noise parameters, no scale observability analysis, no tuning strategy | #3, #4 | -| VO Configuration | cuVSLAM selected, 0.7fps feasibility analyzed, pyramid-LK range calculated, overlap >95% | 2.5 | No camera calibration procedure, no IMU calibration parameters (noise density, random walk), no T_imu_rig extrinsic, no cuVSLAM mode selection (Mono vs Inertial), no cuVSLAM initialization procedure | #5, #6 | -| Satellite Matching | LiteSAM/EfficientLoFTR/XFeat decision tree, TRT conversion workflow, async Stream B | 2.5 | No GSD normalization spec, no tile-to-camera scale matching, no matching confidence threshold, no geometric verification details beyond "RANSAC homography", no match-to-WGS84 conversion | #9, #14 | -| Coordinate System | WGS84 output mentioned. Camera footprint calculated. | 1 | No transformation chain defined (pixel→camera→body→NED→WGS84). No camera-to-body extrinsic. No reference point definition for NED. No handling of terrain elevation. | #8 | -| Flight Controller Integration | pymavlink, GPS_INPUT, 5-10Hz, UART | 2 | No GPS_INPUT field population spec (where do velocity, accuracy, hdop come from?). No fix_type mapping. No GPS time conversion. No ignore_flags. Confidence scoring undefined in current draft. | #1, #2, #14 | -| Disconnected Segments | Mentioned in AC, satellite matching acknowledged as solution | 1.5 | No algorithm for detecting tracking loss. No re-localization trigger. No position initialization after re-localization. No map discontinuity handling. | #7 | -| Startup & Failsafe | 12-step startup sequence. Engine load times. | 2 | No GPS-denied handoff protocol. No mid-flight reboot recovery. No "3 consecutive failed frames" handling. No operator re-localization workflow. | #15, #16, #19 | -| Object Localization | "Trigonometric calculation" mentioned | 1 | No math defined. No API endpoint. No Viewpro gimbal integration spec. No accuracy analysis. | #12 | -| Offline Preparation | Tile download → validate → pre-resize → store. TRT engine build. | 2 | No zoom level selection. No storage calculation. No coverage verification. No tile freshness check. No pre-flight validation tool. | #18 | -| Ground Station Integration | NAMED_VALUE_FLOAT at 1Hz for confidence/drift. Operator re-localization hint mentioned in AC. | 1.5 | Re-localization request/response undefined. Ground station display requirements undefined. Operator workflow undefined. | #19 | -| Operational Procedures | None defined | 0 | No pre-flight checklist. No in-flight monitoring guide. No post-flight analysis. No failure response procedures. | #11 | -| API & IPC | FastAPI mentioned for "local IPC" | 1.5 | Endpoints only in security_analysis.md, not in solution. No request/response schemas. No SSE event format. No object localization API. | #20 | -| Document Consistency | 3 documents (draft05, tech_stack, security) | — | tech_stack.md has 3fps (should be 0.7fps). LiteSAM resolution mismatch. EfficientLoFTR missing from tech_stack. | #13 | -| Testing vs AC | Tests cover TRT, cuVSLAM 0.7fps, shutter. | 2.5 | No explicit mapping of tests to AC items. Missing tests: disconnected segments, re-localization, 3-consecutive-failure, object localization, operator workflow, mid-flight reboot. | — | - -## Overall Maturity Assessment - -| Category | Avg Score | Assessment | -|----------|-----------|------------| -| Hardware/Platform | 3.5 | Well-researched: UAV specs, camera analysis, memory budget, thermal | -| Core Algorithms (VO, matching) | 2.5 | Component selection solid, but implementation specs missing | -| Sensor Fusion (ESKF) | 1.5 | Severely under-specified | -| System Integration | 1.5 | GPS_INPUT, coordinate transforms, API all incomplete | -| Operational Readiness | 0.5 | No operational procedures, no deployment pipeline | -| **Overall** | **~2.0** | **Architecture-level design, not implementation-ready** | diff --git a/_docs/00_research/solution_completeness_assessment/04_reasoning_chain.md b/_docs/00_research/solution_completeness_assessment/04_reasoning_chain.md deleted file mode 100644 index c93e6a6..0000000 --- a/_docs/00_research/solution_completeness_assessment/04_reasoning_chain.md +++ /dev/null @@ -1,166 +0,0 @@ -# Reasoning Chain - -## Dimension 1: ESKF Sensor Fusion Specification - -### Fact Confirmation -Per Fact #3, standard ESKF for UAV VIO uses 15-16 state error vector: δp[3], δv[3], δθ[3], δba[3], δbg[3]. Per Fact #4, monocular VIO cannot observe metric scale during constant-velocity flight (fundamental observability limitation). Per Fact #7, scale requires external constraint (altitude or satellite absolute position). - -### Current State -Draft05 says "Custom ESKF (NumPy/SciPy)", "16-state vector", "ESKF measurement update ~1ms", "ESKF IMU prediction at 5-10Hz". But provides zero mathematical detail. - -### Conclusion -The ESKF is the **most under-specified critical component**. Without defining: -- State vector and error state vector explicitly -- Process model (how IMU data propagates the state) -- VO measurement model (how cuVSLAM relative pose updates the filter) -- Satellite measurement model (how absolute position corrections are applied) -- How scale is maintained (altitude constraint? satellite corrections only?) -- Q and R matrices (at least initial values and tuning approach) - -...the system cannot be implemented. The ESKF is the central hub connecting all sensors — its specification drives the entire data flow. - -### Confidence: ✅ High - ---- - -## Dimension 2: Flight Controller Integration (GPS_INPUT) - -### Fact Confirmation -Per Fact #1, ArduPilot requires minimum 5Hz GPS_INPUT rate. Per Fact #2, GPS_INPUT has 15+ mandatory fields including velocity, accuracy, fix_type, GPS time. Per Fact #14, confidence scoring that maps internal state to GPS_INPUT accuracy fields is undefined. - -### Current State -Draft05 specifies: pymavlink, GPS_INPUT, 5-10Hz, UART. This satisfies the rate requirement. But the message population is unspecified. - -### Conclusion -The GPS_INPUT integration has the right architecture (5-10Hz, pymavlink, UART) but is missing the **data mapping layer**: -- `vn, ve, vd` must come from ESKF velocity estimate — requires ESKF to output velocity -- `horiz_accuracy, vert_accuracy` must come from ESKF covariance matrix (sqrt of position covariance diagonal) -- `hdop, vdop` need to be synthesized from accuracy values (hdop ≈ horiz_accuracy / expected_CEP_factor) -- `fix_type` must map from internal confidence (3=3D fix when satellite-anchored, 2=2D when VO-only?) -- `speed_accuracy` from ESKF velocity covariance -- GPS time (time_week, time_week_ms) requires conversion from system time to GPS epoch -- `satellites_visible` should be set to a constant (e.g., 10) to avoid triggering satellite-count failsafes - -This is a tractable implementation detail but must be specified before coding. - -### Confidence: ✅ High - ---- - -## Dimension 3: Coordinate System & Transformations - -### Fact Confirmation -Per Fact #8, the system needs pixel → camera → body → NED → WGS84 chain. Per Fact #9, satellite tiles have different GSD than camera imagery. Per Source #15, similar systems define explicit coordinate transforms with PnP solvers. - -### Current State -Draft05 calculates camera footprint and GSD but never defines the transformation chain. Object localization mentions "trigonometric calculation" without math. - -### Conclusion -This is a **fundamental architectural gap**. Every position estimate flows through coordinate transforms. Without defining them: -- cuVSLAM outputs relative pose in camera frame — how is this converted to NED displacement? -- Satellite matching outputs pixel correspondences — how does homography → WGS84 position? -- Object localization needs camera ray → ground intersection — impossible without camera-to-body and body-to-NED transforms -- The camera is "not autostabilized" — so body frame attitude matters for ground projection - -The fix requires defining: camera intrinsic matrix K, camera-to-body rotation T_cam_body, and the ESKF attitude estimate for body-to-NED. - -### Confidence: ✅ High - ---- - -## Dimension 4: Disconnected Route Segments - -### Fact Confirmation -Per Fact #7, AC explicitly requires handling disconnected segments as "core to the system." Per Fact #6, cuVSLAM IMU fallback gives ~1s before tracking loss. Per Source #8, trajectory-level optimization can handle segment connections. - -### Current State -Draft05 acknowledges this in AC but the solution section says "sharp-turn frames are expected to fail VO and should be handled by satellite-based re-localization." No algorithm is specified. - -### Conclusion -The solution needs a concrete **re-localization pipeline**: -1. Detect tracking loss (cuVSLAM returns tracking_lost state) -2. Continue ESKF with IMU-only prediction (high uncertainty growth) -3. Immediately trigger satellite matching on next available frame -4. If satellite match succeeds: reset ESKF position to matched position, reset cuVSLAM (or start new track) -5. If satellite match fails: retry on next frame, increment failure counter -6. If 3 consecutive failures: send re-localization request to ground station -7. When new segment starts: mark as disconnected, continue building trajectory -8. Optionally: if a later satellite match connects two segments to the same reference, merge them - -This is not trivial but follows directly from the existing architecture. It's a missing algorithm, not a missing component. - -### Confidence: ✅ High - ---- - -## Dimension 5: Startup Bootstrap & Failsafe - -### Fact Confirmation -Per Fact #15, the bootstrap sequence has gaps (first satellite match, initial ESKF state, GPS-denied handoff). Per Fact #16, AC requires mid-flight reboot recovery. Per Fact #19, AC requires 3-consecutive-failure re-localization request. - -### Current State -Draft05 has a 12-step startup sequence that covers the happy path. The failure paths and special cases are not addressed. - -### Conclusion -Three failsafe scenarios need specification: -1. **GPS-denied handoff**: How does the system know to start? Options: (a) always running — takes over when GPS quality degrades, (b) operator command, (c) automatic GPS quality monitoring. The system should probably always be running in parallel and the FC uses the best available source. -2. **Mid-flight reboot**: Read FC position → init ESKF with high uncertainty → start cuVSLAM → immediate satellite match → within ~5s should have a position estimate. TRT engine load (1-3s) is the main startup cost. -3. **3 consecutive failures**: Define "failure" precisely (VO lost + satellite match failed + IMU-only drift > threshold). Send NAMED_VALUE_FLOAT or custom MAVLink message to ground station. Define operator response format. - -### Confidence: ✅ High - ---- - -## Dimension 6: Satellite Matching Pipeline Details - -### Fact Confirmation -Per Fact #9, camera GSD is ~15.9 cm/pixel at 600m with ADTI+16mm lens. Satellite at zoom 19 is ~0.3 m/pixel. Per Fact #10, Google Maps imagery in Ukraine conflict zone is intentionally >1 year old. Per Fact #14, the solution says "pre-resize" but doesn't specify to what resolution. - -### Current State -Draft05 has a solid model selection decision tree (LiteSAM → EfficientLoFTR → XFeat) and TRT conversion workflow. But the actual matching pipeline data flow is incomplete. - -### Conclusion -The matching pipeline needs: -- **Input preparation**: Camera frame (5456×3632 at ~15.9 cm/pixel at 600m) → downsample to matcher input resolution (1280px for LiteSAM). Satellite tile at zoom 18 (~0.6 m/pixel) → no resize needed if using 256px tiles, or assemble 5×5 tile mosaic for coverage. -- **GSD matching**: Either downsample camera image to satellite GSD, or specify that the matcher handles multi-scale internally. LiteSAM was designed for satellite-aerial matching so it may handle this. XFeat is general-purpose and may need explicit scale normalization. -- **Tile selection**: Given ESKF position estimate + uncertainty, select the correct satellite tile(s). What if the position estimate has drifted and the wrong tile is selected? Need a search radius based on ESKF covariance. -- **Match → position**: Homography from RANSAC → decompose to get translation in satellite coordinate frame → convert to WGS84 using tile's geo-reference. -- **Seasonal/temporal mismatch**: Tiles could be from different seasons. Feature matching must be robust to appearance changes. - -### Confidence: ✅ High - ---- - -## Dimension 7: Operational Maturity - -### Fact Confirmation -Per Fact #11, mature systems (Auterion, PX4) define pre-flight checklists, in-flight monitoring, failure response procedures. Per Fact #13, documents are inconsistent (tech_stack.md still says 3fps). - -### Current State -Zero operational procedures defined. Documents are partially inconsistent. - -### Conclusion -This is expected at this stage of development (architecture/design phase). Operational procedures should come after implementation and initial testing. However, the document inconsistencies should be fixed now to avoid confusion during implementation. The tech_stack.md and solution_draft should be aligned. - -### Confidence: ✅ High - ---- - -## Dimension 8: Object Localization - -### Fact Confirmation -Per Fact #12, AC requires other AI systems to request GPS coordinates of detected objects. The Viewpro A40 Pro gimbal has configurable angle and zoom. - -### Current State -Draft05 says "trigonometric calculation using UAV GPS position, camera angle, zoom, altitude. Flat terrain assumed." - -### Conclusion -The math is straightforward but needs specification: -- Input: pixel coordinates (u,v) in Viewpro image, gimbal angles (pan, tilt), zoom level, UAV position (from GPS-denied system), UAV altitude -- Process: (1) pixel → ray in camera frame using intrinsics + zoom, (2) camera frame → body frame using gimbal angles, (3) body frame → NED using UAV attitude, (4) ray-ground intersection assuming flat terrain at known altitude, (5) NED offset → WGS84 -- Output: lat, lon of object + accuracy estimate (propagated from UAV position accuracy + gimbal angle uncertainty) -- API: FastAPI endpoint for other onboard systems to call - -This is a 2-point complexity task but should be specified in the solution. - -### Confidence: ✅ High diff --git a/_docs/00_research/solution_completeness_assessment/05_validation_log.md b/_docs/00_research/solution_completeness_assessment/05_validation_log.md deleted file mode 100644 index c068fe3..0000000 --- a/_docs/00_research/solution_completeness_assessment/05_validation_log.md +++ /dev/null @@ -1,96 +0,0 @@ -# Validation Log - -## Validation Scenario 1: Normal Straight Flight (Happy Path) - -### Expected Based on Conclusions -UAV flies straight at 70 km/h, 600m altitude. ADTI captures at 0.7fps. cuVSLAM processes each frame (~9ms), ESKF fuses VO + IMU. Every 5-10 frames, satellite matching provides absolute correction. GPS_INPUT sent at 5-10Hz. - -### Actual Validation Results -The happy path is well-specified in draft05. Time budgets, memory budgets, overlap calculations all valid. The 5-10Hz ESKF IMU prediction fills gaps between 0.7fps camera frames. Satellite matching async on Stream B. - -**GAP**: Even on straight flight, the GPS_INPUT message field population is undefined. Where does velocity come from? What fix_type is sent? What accuracy values? - ---- - -## Validation Scenario 2: Sharp Turn with No Overlap - -### Expected Based on Conclusions -UAV makes a 90° turn. Next frame has zero overlap with previous. cuVSLAM loses tracking. System falls back to IMU. ESKF uncertainty grows rapidly. Satellite matching on next frame provides re-localization. - -### Actual Validation Results -**CRITICAL GAP**: No algorithm defined for this scenario. Questions: -1. How does the system detect cuVSLAM tracking loss? (cuVSLAM API presumably returns a tracking state) -2. During IMU-only phase, what is the ESKF prediction uncertainty growth rate? (~1-2m/s drift with consumer IMU) -3. When satellite match succeeds after the turn, how is ESKF re-initialized? (measurement update with very high innovation? or state reset?) -4. How is cuVSLAM re-initialized on the new heading? (new track from scratch? or cuVSLAM loop closure if it sees previous terrain?) -5. If the turn area is over featureless terrain (farmland), satellite matching may also fail — then what? - -The AC says "sharp-turn frames should be within 200m drift and angle <70 degrees" — this bounds the problem but the solution doesn't address it algorithmically. - ---- - -## Validation Scenario 3: Long Flight Over Uniform Terrain - -### Expected Based on Conclusions -UAV flies 50km straight over large agricultural fields. cuVSLAM may struggle with low-texture terrain. Satellite matching is the only absolute correction source. - -### Actual Validation Results -This scenario tests the limits of the design: -- cuVSLAM at 600m altitude sees ~577m × 870m footprint. If it's a single wheat field, features may be sparse. cuVSLAM falls back to IMU (~1s acceptable, then tracking lost). -- Satellite matching must carry the entire position estimation burden. -- At 0.7fps with keyframe every 5-10 frames: satellite match every 7-14s. -- Between matches, IMU-only drift at 70 km/h: ~14m/s × time × drift_rate. With consumer IMU: ~1-5m drift per second. -- Over 14s between matches: ~14-70m potential drift. AC requires <100m between anchors. - -**GAP**: The system's behavior in this degraded mode needs explicit specification. Is this VO-failed + satellite-only + IMU acceptable? What's the accuracy? - ---- - -## Validation Scenario 4: First Frame After GPS Denial - -### Expected Based on Conclusions -GPS was working. Now it's denied/spoofed. System takes over. - -### Actual Validation Results -**GAP**: No handoff protocol defined. Questions: -1. How does the system detect GPS denial? (it doesn't — it's assumed the operator knows) -2. The initial ESKF state comes from GLOBAL_POSITION_INT — but this might be the spoofed GPS. How to validate? -3. If GPS is being spoofed rather than denied, the initial position could be wrong by kilometers. -4. Draft05 assumes clean initial GPS. This is reasonable for the first version but should be acknowledged as a limitation. - ---- - -## Validation Scenario 5: Mid-Flight Companion Computer Reboot - -### Expected Based on Conclusions -Companion computer crashes and reboots. Flight controller continues flying on IMU dead reckoning. - -### Actual Validation Results -**GAP**: AC requires recovery. Draft05 doesn't address it. Sequence should be: -1. Jetson boots (~30-60s depending on boot time) -2. GPS-Denied service starts (systemd) -3. Connect to FC, get current position (IMU-extrapolated, may have significant drift) -4. Load TRT engines (~1-3s each, total ~2-6s) -5. Start cuVSLAM (no prior map, fresh start) -6. Immediate satellite match to get absolute position -7. Total recovery time: ~35-70s. During this time, FC uses IMU-only. At 70 km/h: ~700-1400m of uncontrolled drift. - -This is a real operational concern and should be documented even if the solution is "acknowledge the limitation." - ---- - -## Counterexamples -- **NaviLoc achieves 19.5m at lower altitude**: Our system operates at 600-1000m with larger footprints and coarser GSD. The accuracy requirement (50m for 80%, 20m for 60%) is less demanding. The simpler ESKF approach may be adequate. -- **SatLoc-Fusion uses DinoV2 for place recognition**: Our system uses feature matching (LiteSAM/XFeat) which is more precise but less robust to appearance changes. DinoV2 is more robust to seasonal changes but gives coarser position. - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [ ] Issue: ESKF specification is too underspecified to validate accuracy claims -- [ ] Issue: Disconnected segment handling is critical AC and has no algorithm -- [ ] Issue: GPS_INPUT field mapping undocumented -- [ ] Issue: Object localization API undefined - -## Conclusions Requiring Revision -None requiring reversal. All identified gaps are genuine and supported by facts. diff --git a/_docs/00_research/trt_engine_migration/00_question_decomposition.md b/_docs/00_research/trt_engine_migration/00_question_decomposition.md deleted file mode 100644 index 347d35b..0000000 --- a/_docs/00_research/trt_engine_migration/00_question_decomposition.md +++ /dev/null @@ -1,57 +0,0 @@ -# Question Decomposition - -## Original Question -Should we switch from ONNX Runtime to native TensorRT Engine for all AI models in the GPS-Denied pipeline, running on Jetson Orin Nano Super? - -## Active Mode -Mode B: Solution Assessment — existing solution_draft03.md uses ONNX Runtime / mixed inference. User requests focused investigation of TRT Engine migration. - -## Question Type -Decision Support — evaluating a technology switch with cost/risk/benefit dimensions. - -## Research Subject Boundary - -| Dimension | Boundary | -|-----------|----------| -| Population | AI inference models in the GPS-Denied navigation pipeline | -| Hardware | Jetson Orin Nano Super (8GB LPDDR5, 67 TOPS sparse INT8, 1020 MHz GPU, NO DLA) | -| Software | JetPack 6.2 (CUDA 12.6, TensorRT 10.3, cuDNN 9.3) | -| Timeframe | Current (2025-2026), JetPack 6.2 era | - -## AI Models in Pipeline - -| Model | Type | Current Runtime | TRT Applicable? | -|-------|------|----------------|-----------------| -| cuVSLAM | Native CUDA library (closed-source) | CUDA native | NO — already CUDA-optimized binary | -| LiteSAM | PyTorch (MobileOne + TAIFormer + MinGRU) | Planned TRT FP16 | YES | -| XFeat | PyTorch (learned features) | XFeatTensorRT exists | YES | -| ESKF | Mathematical filter (Python/C++) | CPU/NumPy | NO — not an AI model | - -Only LiteSAM and XFeat are convertible to TRT Engine. cuVSLAM is already NVIDIA-native CUDA. - -## Decomposed Sub-Questions - -1. What is the performance difference between ONNX Runtime and native TRT Engine on Jetson Orin Nano Super? -2. What is the memory overhead of ONNX Runtime vs native TRT on 8GB shared memory? -3. What conversion paths exist for PyTorch → TRT Engine on Jetson aarch64? -4. Are TRT engines hardware-specific? What's the deployment workflow? -5. What are the specific conversion steps for LiteSAM and XFeat? -6. Does Jetson Orin Nano Super have DLA for offloading? -7. What are the risks and limitations of going TRT-only? - -## Timeliness Sensitivity Assessment - -- **Research Topic**: TensorRT vs ONNX Runtime inference on Jetson Orin Nano Super -- **Sensitivity Level**: 🟠 High -- **Rationale**: TensorRT, JetPack, and ONNX Runtime release new versions frequently. Jetson Orin Nano Super mode is relatively new (JetPack 6.2, Jan 2025). -- **Source Time Window**: 12 months -- **Priority official sources to consult**: - 1. NVIDIA TensorRT documentation (docs.nvidia.com) - 2. NVIDIA JetPack 6.2 release notes - 3. ONNX Runtime GitHub issues (microsoft/onnxruntime) - 4. NVIDIA TensorRT GitHub issues (NVIDIA/TensorRT) -- **Key version information to verify**: - - TensorRT: 10.3 (JetPack 6.2) - - ONNX Runtime: 1.20.1+ (Jetson builds) - - JetPack: 6.2 - - CUDA: 12.6 diff --git a/_docs/00_research/trt_engine_migration/01_source_registry.md b/_docs/00_research/trt_engine_migration/01_source_registry.md deleted file mode 100644 index a61dd7d..0000000 --- a/_docs/00_research/trt_engine_migration/01_source_registry.md +++ /dev/null @@ -1,231 +0,0 @@ -# Source Registry - -## Source #1 -- **Title**: ONNX Runtime Issue #24085: CUDA EP on Jetson Orin Nano does not use tensor cores -- **Link**: https://github.com/microsoft/onnxruntime/issues/24085 -- **Tier**: L1 (Official GitHub issue with MSFT response) -- **Publication Date**: 2025-03-18 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: ONNX Runtime v1.20.1+, JetPack 6.1, CUDA 12.6 -- **Target Audience**: Jetson Orin Nano developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: ONNX Runtime CUDA EP on Jetson Orin Nano is 7-8x slower than TRT standalone due to tensor cores not being utilized. Workaround: remove cudnn_conv_algo_search option and use FP16 models. -- **Related Sub-question**: Q1 (performance difference) - -## Source #2 -- **Title**: ONNX Runtime Issue #20457: VRAM usage difference between TRT-EP and native TRT -- **Link**: https://github.com/microsoft/onnxruntime/issues/20457 -- **Tier**: L1 (Official GitHub issue with MSFT dev response) -- **Publication Date**: 2024-04-25 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: ONNX Runtime 1.17.1, CUDA 12.2 -- **Target Audience**: All ONNX Runtime + TRT users -- **Research Boundary Match**: ✅ Full match -- **Summary**: ONNX Runtime TRT-EP keeps serialized engine in memory (~420-440MB) during execution. Native TRT drops to 130-140MB after engine build by calling releaseBlob(). Delta: ~280-300MB. -- **Related Sub-question**: Q2 (memory overhead) - -## Source #3 -- **Title**: ONNX Runtime Issue #12083: TensorRT Provider vs TensorRT Native -- **Link**: https://github.com/microsoft/onnxruntime/issues/12083 -- **Tier**: L2 (Official MSFT dev response) -- **Publication Date**: 2022-07-05 (confirmed still relevant) -- **Timeliness Status**: ⚠️ Needs verification (old but fundamental architecture hasn't changed) -- **Version Info**: General ONNX Runtime -- **Target Audience**: All ONNX Runtime users -- **Research Boundary Match**: ✅ Full match -- **Summary**: MSFT engineer confirms TRT-EP "can achieve performance parity with native TensorRT." Benefit is automatic fallback for unsupported ops. -- **Related Sub-question**: Q1 (performance difference) - -## Source #4 -- **Title**: ONNX Runtime Issue #11356: Lower performance on InceptionV3/4 with TRT EP -- **Link**: https://github.com/microsoft/onnxruntime/issues/11356 -- **Tier**: L4 (Community report) -- **Publication Date**: 2022 -- **Timeliness Status**: ⚠️ Needs verification -- **Version Info**: ONNX Runtime older version -- **Target Audience**: ONNX Runtime users -- **Research Boundary Match**: ⚠️ Partial (different model, but same mechanism) -- **Summary**: Reports ~3x performance difference (41 vs 129 inferences/sec) between ONNX RT TRT-EP and native TRT on InceptionV3/4. -- **Related Sub-question**: Q1 (performance difference) - -## Source #5 -- **Title**: NVIDIA JetPack 6.2 Release Notes -- **Link**: https://docs.nvidia.com/jetson/archives/jetpack-archived/jetpack-62/release-notes/index.html -- **Tier**: L1 (Official NVIDIA documentation) -- **Publication Date**: 2025-01 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: JetPack 6.2, TensorRT 10.3, CUDA 12.6, cuDNN 9.3 -- **Target Audience**: Jetson developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: JetPack 6.2 includes TensorRT 10.3, enables Super Mode for Orin Nano (67 TOPS, 1020 MHz GPU, 25W). -- **Related Sub-question**: Q3 (conversion paths) - -## Source #6 -- **Title**: NVIDIA Jetson Orin Nano Super Developer Kit Blog -- **Link**: https://developer.nvidia.com/blog/nvidia-jetson-orin-nano-developer-kit-gets-a-super-boost/ -- **Tier**: L2 (Official NVIDIA blog) -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: Orin Nano Super, 67 TOPS sparse INT8 -- **Target Audience**: Jetson developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Super mode: GPU 1020 MHz (vs 635), 67 TOPS sparse (vs 40), memory bandwidth 102 GB/s (vs 68), power 25W. No DLA cores on Orin Nano. -- **Related Sub-question**: Q6 (DLA availability) - -## Source #7 -- **Title**: Jetson Orin module comparison (Connect Tech) -- **Link**: https://connecttech.com/jetson/jetson-module-comparison -- **Tier**: L3 (Authoritative hardware vendor) -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Jetson hardware buyers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Confirms Orin Nano has NO DLA cores. Orin NX has 1-2 DLA. AGX Orin has 2 DLA. -- **Related Sub-question**: Q6 (DLA availability) - -## Source #8 -- **Title**: TensorRT Engine hardware specificity (NVIDIA/TensorRT Issue #1920) -- **Link**: https://github.com/NVIDIA/TensorRT/issues/1920 -- **Tier**: L1 (Official NVIDIA TensorRT repo) -- **Publication Date**: 2022 (confirmed still valid for TRT 10) -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: All TensorRT versions -- **Target Audience**: TensorRT developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: TRT engines are tied to specific GPU models. Must build on target hardware. Cannot cross-compile x86→aarch64. -- **Related Sub-question**: Q4 (deployment workflow) - -## Source #9 -- **Title**: trtexec ONNX to TRT conversion on Jetson Orin Nano (StackOverflow) -- **Link**: https://stackoverflow.com/questions/78787534/converting-a-pytorch-onnx-model-to-tensorrt-engine-jetson-orin-nano -- **Tier**: L4 (Community) -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Jetson developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Standard workflow: trtexec --onnx=model.onnx --saveEngine=model.trt --fp16. Use --memPoolSize instead of deprecated --workspace. -- **Related Sub-question**: Q3, Q5 (conversion workflow) - -## Source #10 -- **Title**: TensorRT 10 Python API Documentation -- **Link**: https://docs.nvidia.com/deeplearning/tensorrt/10.15.1/inference-library/python-api-docs.html -- **Tier**: L1 (Official NVIDIA docs) -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: TensorRT 10.x -- **Target Audience**: TensorRT Python developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: TRT 10 uses tensor-based API (not binding indices). load engine via runtime.deserialize_cuda_engine(). Async inference via context.enqueue_v3(stream_handle). -- **Related Sub-question**: Q3 (conversion paths) - -## Source #11 -- **Title**: Torch-TensorRT JetPack documentation -- **Link**: https://docs.pytorch.org/TensorRT/v2.10.0/getting_started/jetpack.html -- **Tier**: L1 (Official documentation) -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: Torch-TensorRT, JetPack 6.2, PyTorch 2.8.0 -- **Target Audience**: PyTorch developers on Jetson -- **Research Boundary Match**: ✅ Full match -- **Summary**: Torch-TensorRT supports Jetson aarch64 with JetPack 6.2. Supports AOT compilation, FP16/INT8, dynamic shapes. -- **Related Sub-question**: Q3 (conversion paths) - -## Source #12 -- **Title**: XFeatTensorRT GitHub repo -- **Link**: https://github.com/PranavNedunghat/XFeatTensorRT -- **Tier**: L4 (Community) -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: XFeat users on NVIDIA GPUs -- **Research Boundary Match**: ✅ Full match -- **Summary**: C++ TRT implementation of XFeat feature extraction. Already converts XFeat to TRT engine. -- **Related Sub-question**: Q5 (XFeat conversion) - -## Source #13 -- **Title**: TensorRT Best Practices (Official NVIDIA) -- **Link**: https://docs.nvidia.com/deeplearning/tensorrt/latest/performance/best-practices.html -- **Tier**: L1 (Official NVIDIA docs) -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Version Info**: TensorRT 10.x -- **Target Audience**: TensorRT developers -- **Research Boundary Match**: ✅ Full match -- **Summary**: Comprehensive guide: use trtexec for benchmarking, --fp16 for FP16, use ModelOptimizer for INT8, use polygraphy for model inspection. -- **Related Sub-question**: Q3 (conversion workflow) - -## Source #14 -- **Title**: NVIDIA blog: Maximizing DL Performance on Jetson Orin with DLA -- **Link**: https://developer.nvidia.com/blog/maximizing-deep-learning-performance-on-nvidia-jetson-orin-with-dla/ -- **Tier**: L2 (Official NVIDIA blog) -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Jetson Orin developers (NX and AGX) -- **Research Boundary Match**: ⚠️ Partial (DLA not available on Orin Nano) -- **Summary**: DLA contributes 38-74% of total DL performance on Orin (NX/AGX). Supports CNN layers in FP16/INT8. NOT available on Orin Nano. -- **Related Sub-question**: Q6 (DLA availability) - -## Source #15 -- **Title**: PUT Vision Lab: TensorRT vs ONNXRuntime comparison on Jetson -- **Link**: https://putvision.github.io/article/2021/12/20/jetson-onnxruntime-tensorrt.html -- **Tier**: L3 (Academic lab blog) -- **Publication Date**: 2021 (foundational comparison, architecture unchanged) -- **Timeliness Status**: ⚠️ Needs verification (older, but core findings still apply) -- **Target Audience**: Jetson developers -- **Research Boundary Match**: ⚠️ Partial (older Jetson, but same TRT vs ONNX RT question) -- **Summary**: Native TRT generally faster. ONNX RT TRT-EP adds wrapper overhead. Both use same TRT kernels internally. -- **Related Sub-question**: Q1 (performance difference) - -## Source #16 -- **Title**: LiteSAM paper — MinGRU details (Eqs 12-16, Section 3.4.2) -- **Link**: https://www.mdpi.com/2072-4292/17/19/3349 -- **Tier**: L1 (Peer-reviewed paper) -- **Publication Date**: 2025 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Satellite-aerial matching researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: MinGRU subpixel refinement uses 4 stacked layers, 3×3 window (9 candidates). Gates depend only on input C_f. Ops: Linear, Sigmoid, Mul, Add, ReLU, Tanh. -- **Related Sub-question**: Q5 (LiteSAM TRT compatibility) - -## Source #17 -- **Title**: Coarse_LoFTR_TRT paper — LoFTR TRT adaptation for embedded devices -- **Link**: https://ar5iv.labs.arxiv.org/html/2202.00770 -- **Tier**: L2 (arXiv paper with working open-source code) -- **Publication Date**: 2022 -- **Timeliness Status**: ✅ Currently valid (TRT adaptation techniques still apply) -- **Target Audience**: Feature matching on embedded devices -- **Research Boundary Match**: ✅ Full match -- **Summary**: Documents specific code changes for TRT compatibility: einsum→elementary ops, ONNX export, knowledge distillation. Tested on Jetson Nano 2GB. 2.26M params reduced from 27.95M. -- **Related Sub-question**: Q5 (EfficientLoFTR as TRT-proven alternative) - -## Source #18 -- **Title**: minGRU paper — "Were RNNs All We Needed?" -- **Link**: https://huggingface.co/papers/2410.01201 -- **Tier**: L1 (Research paper) -- **Publication Date**: 2024-10 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: RNN/sequence model researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: MinGRU removes gate dependency on h_{t-1}, enabling parallel computation. Parallel implementation uses logcumsumexp for numerical stability. 175x faster than sequential for seq_len=512. -- **Related Sub-question**: Q5 (MinGRU TRT compatibility) - -## Source #19 -- **Title**: SAM2 TRT performance degradation issue -- **Link**: https://github.com/facebookresearch/sam2/issues/639 -- **Tier**: L4 (GitHub issue) -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: SAM/transformer TRT deployers -- **Research Boundary Match**: ⚠️ Partial (different model, but relevant for transformer attention TRT risks) -- **Summary**: SAM2 MemoryAttention 30ms PyTorch → 100ms TRT FP16. RoPEAttention bottleneck. Warning for transformer TRT conversion. -- **Related Sub-question**: Q7 (TRT conversion risks) - -## Source #20 -- **Title**: EfficientLoFTR (CVPR 2024) -- **Link**: https://github.com/zju3dv/EfficientLoFTR -- **Tier**: L1 (CVPR paper + open-source code) -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Feature matching researchers -- **Research Boundary Match**: ✅ Full match -- **Summary**: 2.5x faster than LoFTR, higher accuracy. 15.05M params. Semi-dense matching. Available on HuggingFace under Apache 2.0. 964 GitHub stars. -- **Related Sub-question**: Q5 (alternative satellite matcher) diff --git a/_docs/00_research/trt_engine_migration/02_fact_cards.md b/_docs/00_research/trt_engine_migration/02_fact_cards.md deleted file mode 100644 index a7e6648..0000000 --- a/_docs/00_research/trt_engine_migration/02_fact_cards.md +++ /dev/null @@ -1,193 +0,0 @@ -# Fact Cards - -## Fact #1 -- **Statement**: ONNX Runtime CUDA Execution Provider on Jetson Orin Nano (JetPack 6.1) is 7-8x slower than TensorRT standalone due to tensor cores not being utilized with default settings. -- **Source**: Source #1 (https://github.com/microsoft/onnxruntime/issues/24085) -- **Phase**: Assessment -- **Target Audience**: Jetson Orin Nano developers using ONNX Runtime -- **Confidence**: ✅ High (confirmed by issue author with NSight profiling, MSFT acknowledged) -- **Related Dimension**: Performance - -## Fact #2 -- **Statement**: The workaround for Fact #1 is to remove the `cudnn_conv_algo_search` option (which defaults to EXHAUSTIVE) and use FP16 models. This restores tensor core usage. -- **Source**: Source #1 -- **Phase**: Assessment -- **Target Audience**: Jetson Orin Nano developers -- **Confidence**: ✅ High (confirmed fix by issue author) -- **Related Dimension**: Performance - -## Fact #3 -- **Statement**: ONNX Runtime TRT-EP keeps serialized TRT engine in memory (~420-440MB) throughout execution. Native TRT via trtexec drops to 130-140MB after engine deserialization by calling releaseBlob(). -- **Source**: Source #2 (https://github.com/microsoft/onnxruntime/issues/20457) -- **Phase**: Assessment -- **Target Audience**: All ONNX RT TRT-EP users, especially memory-constrained devices -- **Confidence**: ✅ High (confirmed by MSFT developer @chilo-ms with detailed explanation) -- **Related Dimension**: Memory consumption - -## Fact #4 -- **Statement**: The ~280-300MB extra memory from ONNX RT TRT-EP (Fact #3) is because the serialized engine is retained across compute function calls for dynamic shape models. Native TRT releases it after deserialization. -- **Source**: Source #2 -- **Phase**: Assessment -- **Target Audience**: Memory-constrained Jetson deployments -- **Confidence**: ✅ High (MSFT developer explanation) -- **Related Dimension**: Memory consumption - -## Fact #5 -- **Statement**: MSFT engineer states "TensorRT EP can achieve performance parity with native TensorRT" — both use the same TRT kernels internally. Benefit of TRT-EP is automatic fallback for unsupported ops. -- **Source**: Source #3 (https://github.com/microsoft/onnxruntime/issues/12083) -- **Phase**: Assessment -- **Target Audience**: General -- **Confidence**: ⚠️ Medium (official statement but contradicted by real benchmarks in some cases) -- **Related Dimension**: Performance - -## Fact #6 -- **Statement**: Real benchmark of InceptionV3/4 showed ONNX RT TRT-EP achieving ~41 inferences/sec vs native TRT at ~129 inferences/sec — approximately 3x performance gap. -- **Source**: Source #4 (https://github.com/microsoft/onnxruntime/issues/11356) -- **Phase**: Assessment -- **Target Audience**: CNN model deployers -- **Confidence**: ⚠️ Medium (community report, older ONNX RT version, model-specific) -- **Related Dimension**: Performance - -## Fact #7 -- **Statement**: Jetson Orin Nano Super specs: 67 TOPS sparse INT8 / 33 TOPS dense, GPU at 1020 MHz, 8GB LPDDR5 shared, 102 GB/s bandwidth, 25W TDP. NO DLA cores. -- **Source**: Source #6, Source #7 -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ✅ High (official NVIDIA specs) -- **Related Dimension**: Hardware constraints - -## Fact #8 -- **Statement**: Jetson Orin Nano has ZERO DLA (Deep Learning Accelerator) cores. DLA is only available on Orin NX (1-2 cores) and AGX Orin (2 cores). -- **Source**: Source #7, Source #14 -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ✅ High (official hardware specifications) -- **Related Dimension**: Hardware constraints - -## Fact #9 -- **Statement**: TensorRT engines are tied to specific GPU models, not just architectures. Must be built on the target device. Cannot cross-compile from x86 to aarch64. -- **Source**: Source #8 (https://github.com/NVIDIA/TensorRT/issues/1920) -- **Phase**: Assessment -- **Target Audience**: TRT deployers -- **Confidence**: ✅ High (NVIDIA confirmed) -- **Related Dimension**: Deployment workflow - -## Fact #10 -- **Statement**: Standard conversion workflow: PyTorch → ONNX (torch.onnx.export) → trtexec --onnx=model.onnx --saveEngine=model.engine --fp16. Use --memPoolSize instead of deprecated --workspace flag. -- **Source**: Source #9, Source #13 -- **Phase**: Assessment -- **Target Audience**: Model deployers on Jetson -- **Confidence**: ✅ High (official NVIDIA workflow) -- **Related Dimension**: Deployment workflow - -## Fact #11 -- **Statement**: TensorRT 10.x Python API: load engine via runtime.deserialize_cuda_engine(data). Async inference via context.enqueue_v3(stream_handle). Uses tensor-name-based API (not binding indices). -- **Source**: Source #10 -- **Phase**: Assessment -- **Target Audience**: Python TRT developers -- **Confidence**: ✅ High (official NVIDIA docs) -- **Related Dimension**: API/integration - -## Fact #12 -- **Statement**: Torch-TensorRT supports Jetson aarch64 with JetPack 6.2. Supports ahead-of-time (AOT) compilation, FP16/INT8, dynamic and static shapes. Alternative path to ONNX→trtexec. -- **Source**: Source #11 -- **Phase**: Assessment -- **Target Audience**: PyTorch developers on Jetson -- **Confidence**: ✅ High (official documentation) -- **Related Dimension**: Deployment workflow - -## Fact #13 -- **Statement**: XFeatTensorRT repo exists — C++ TensorRT implementation of XFeat feature extraction. Confirms XFeat is TRT-convertible. -- **Source**: Source #12 -- **Phase**: Assessment -- **Target Audience**: Our project (XFeat users) -- **Confidence**: ✅ High (working open-source implementation) -- **Related Dimension**: Model-specific conversion - -## Fact #14 -- **Statement**: cuVSLAM is a closed-source NVIDIA CUDA library (PyCuVSLAM). It is NOT an ONNX or PyTorch model. It cannot and does not need to be converted to TRT — it's already native CUDA-optimized for Jetson. -- **Source**: cuVSLAM documentation (https://github.com/NVlabs/PyCuVSLAM) -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ✅ High (verified from PyCuVSLAM docs) -- **Related Dimension**: Model applicability - -## Fact #15 -- **Statement**: JetPack 6.2 ships with TensorRT 10.3, CUDA 12.6, cuDNN 9.3. The tensorrt Python module is pre-installed and accessible on Jetson. -- **Source**: Source #5 -- **Phase**: Assessment -- **Target Audience**: Jetson developers -- **Confidence**: ✅ High (official release notes) -- **Related Dimension**: Software stack - -## Fact #16 -- **Statement**: TRT engine build on Jetson Orin Nano Super (8GB) can cause OOM for large models during the build phase, even if inference fits in memory. Workaround: build on a more powerful machine with same GPU architecture, or use Torch-TensorRT PyTorch workflow. -- **Source**: Source #5 (https://github.com/NVIDIA/TensorRT-LLM/issues/3149) -- **Phase**: Assessment -- **Target Audience**: Jetson Orin Nano developers building large TRT engines -- **Confidence**: ✅ High (confirmed in NVIDIA TRT-LLM issue) -- **Related Dimension**: Deployment workflow - -## Fact #17 -- **Statement**: LiteSAM uses MobileOne backbone which is reparameterizable — multi-branch training structure collapses to a single feed-forward path. This is critical for TRT optimization: fewer layers, better fusion, faster inference. -- **Source**: Solution draft03, LiteSAM paper -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ✅ High (published paper) -- **Related Dimension**: Model-specific conversion - -## Fact #18 -- **Statement**: INT8 quantization is safe for CNN layers (MobileOne backbone) but NOT for transformer components (TAIFormer in LiteSAM). FP16 is safe for both CNN and transformer layers. -- **Source**: Solution draft02/03 analysis -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ⚠️ Medium (general best practice, not verified on LiteSAM specifically) -- **Related Dimension**: Quantization strategy - -## Fact #19 -- **Statement**: On 8GB shared memory Jetson: OS+runtime ~1.5GB, cuVSLAM ~200-500MB, tiles ~200MB. Remaining budget: ~5.8-6.1GB. ONNX RT TRT-EP overhead of ~280-300MB per model is significant. Native TRT saves this memory. -- **Source**: Solution draft03 memory budget + Source #2 -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ✅ High (computed from verified facts) -- **Related Dimension**: Memory consumption - -## Fact #20 -- **Statement**: LiteSAM's MinGRU subpixel refinement (Eqs 12-16) uses: z_t = σ(Linear(C_f)), h̃_t = Linear(C_f), h_t = (1-z_t)⊙h_{t-1} + z_t⊙h̃_t. Gates depend ONLY on input C_f (not h_{t-1}). Operates on 3×3 window (9 candidates), 4 stacked layers. All ops are standard: Linear, Sigmoid, Mul, Add, ReLU, Tanh. -- **Source**: LiteSAM paper (MDPI Remote Sensing, 2025, Eqs 12-16, Section 3.4.2) -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ✅ High (from the published paper) -- **Related Dimension**: LiteSAM TRT compatibility - -## Fact #21 -- **Statement**: MinGRU's parallel implementation can use logcumsumexp (log-space parallel scan), which is NOT a standard ONNX operator. However, for seq_len=9 (LiteSAM's 3×3 window), a simple unrolled loop is equivalent and uses only standard ops. -- **Source**: minGRU paper + lucidrains/minGRU-pytorch implementation -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ⚠️ Medium (logcumsumexp risk depends on implementation; seq_len=9 makes rewrite trivial) -- **Related Dimension**: LiteSAM TRT compatibility - -## Fact #22 -- **Statement**: EfficientLoFTR has a proven TRT conversion path via Coarse_LoFTR_TRT (138 stars). The paper documents specific code changes needed: replace einsum with elementary ops (view, bmm, reshape, sum), adapt for TRT-compatible functions. Tested on Jetson Nano 2GB (~5 FPS with distilled model). -- **Source**: Coarse_LoFTR_TRT paper (arXiv:2202.00770) + GitHub repo -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ✅ High (published paper + working open-source implementation) -- **Related Dimension**: Fallback satellite matcher - -## Fact #23 -- **Statement**: EfficientLoFTR has 15.05M params (2.4x more than LiteSAM's 6.31M). On AGX Orin with PyTorch: ~620ms (LiteSAM is 19.8% faster). Semi-dense matching. CVPR 2024. Available on HuggingFace under Apache 2.0. -- **Source**: LiteSAM paper comparison + EfficientLoFTR docs -- **Phase**: Assessment -- **Target Audience**: Our project -- **Confidence**: ✅ High (published benchmarks) -- **Related Dimension**: Fallback satellite matcher - -## Fact #24 -- **Statement**: SAM2's MemoryAttention showed performance DEGRADATION with TRT: 30ms PyTorch → 100ms TRT FP16. RoPEAttention identified as bottleneck. This warns that transformer attention modules may not always benefit from TRT conversion. -- **Source**: https://github.com/facebookresearch/sam2/issues/639 -- **Phase**: Assessment -- **Target Audience**: Transformer model deployers -- **Confidence**: ⚠️ Medium (different model, but relevant warning for attention layers) -- **Related Dimension**: TRT conversion risks diff --git a/_docs/00_research/trt_engine_migration/03_comparison_framework.md b/_docs/00_research/trt_engine_migration/03_comparison_framework.md deleted file mode 100644 index f5aa368..0000000 --- a/_docs/00_research/trt_engine_migration/03_comparison_framework.md +++ /dev/null @@ -1,38 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Decision Support - -## Selected Dimensions -1. Inference latency -2. Memory consumption -3. Deployment workflow complexity -4. Operator coverage / fallback -5. API / integration effort -6. Hardware utilization (tensor cores) -7. Maintenance / ecosystem -8. Cross-platform portability - -## Comparison: Native TRT Engine vs ONNX Runtime (TRT-EP and CUDA EP) - -| Dimension | Native TRT Engine | ONNX Runtime TRT-EP | ONNX Runtime CUDA EP | Factual Basis | -|-----------|-------------------|---------------------|----------------------|---------------| -| Inference latency | Optimal — uses TRT kernels directly, hardware-tuned | Near-parity with native TRT (same kernels), but up to 3x slower on some models due to wrapper overhead | 7-8x slower on Orin Nano with default settings (tensor core issue) | Fact #1, #5, #6 | -| Memory consumption | ~130-140MB after engine load (releases serialized blob) | ~420-440MB during execution (keeps serialized engine) | Standard CUDA memory + framework overhead | Fact #3, #4 | -| Memory delta per model | Baseline | +280-300MB vs native TRT | Higher than TRT-EP | Fact #3, #19 | -| Deployment workflow | PyTorch → ONNX → trtexec → .engine (must build ON target device) | PyTorch → ONNX → pass to ONNX Runtime session (auto-builds TRT engine) | PyTorch → ONNX → pass to ONNX Runtime session | Fact #9, #10 | -| Operator coverage | Only TRT-supported ops. Unsupported ops = build failure | Auto-fallback to CUDA/CPU for unsupported ops | All ONNX ops supported via CUDA/cuDNN | Fact #5 | -| API complexity | Lower-level: manual buffer allocation, CUDA streams, tensor management | Higher-level: InferenceSession, automatic I/O | Highest-level: same ONNX Runtime API | Fact #11 | -| Hardware utilization | Full: tensor cores, layer fusion, kernel auto-tuning, mixed precision | Full TRT kernels for supported ops, CUDA fallback for rest | Broken on Orin Nano with default settings (no tensor cores) | Fact #1, #2 | -| Maintenance | Engine must be rebuilt per TRT version and per GPU model | ONNX model is portable, engine rebuilt automatically | ONNX model is portable | Fact #9 | -| Cross-platform | NVIDIA-only, hardware-specific engine files | Multi-platform ONNX model, TRT-EP only on NVIDIA | Multi-platform (NVIDIA, AMD, Intel, CPU) | Fact #9 | -| Relevance to our project | ✅ Best — we deploy only on Jetson Orin Nano Super | ❌ Cross-platform benefit wasted — we're NVIDIA-only | ❌ Performance issue on our target hardware | Fact #7, #8 | - -## Per-Model Applicability - -| Model | Can Convert to TRT? | Recommended Path | Notes | -|-------|---------------------|------------------|-------| -| cuVSLAM | NO | N/A — already CUDA native | Closed-source NVIDIA library, already optimized | -| LiteSAM | YES | PyTorch → reparameterize MobileOne → ONNX → trtexec --fp16 | INT8 safe for MobileOne backbone only, NOT TAIFormer | -| XFeat | YES | PyTorch → ONNX → trtexec --fp16 (or use XFeatTensorRT C++) | XFeatTensorRT repo already exists | -| ESKF | N/A | N/A — mathematical filter, not a neural network | Python/C++ NumPy | diff --git a/_docs/00_research/trt_engine_migration/04_reasoning_chain.md b/_docs/00_research/trt_engine_migration/04_reasoning_chain.md deleted file mode 100644 index 64e0026..0000000 --- a/_docs/00_research/trt_engine_migration/04_reasoning_chain.md +++ /dev/null @@ -1,124 +0,0 @@ -# Reasoning Chain - -## Dimension 1: Inference Latency - -### Fact Confirmation -ONNX Runtime CUDA EP on Jetson Orin Nano is 7-8x slower than TRT standalone with default settings (Fact #1). Even with the workaround (Fact #2), ONNX RT adds wrapper overhead. ONNX RT TRT-EP claims "performance parity" (Fact #5), but real benchmarks show up to 3x gaps on specific models (Fact #6). - -### Reference Comparison -Native TRT uses kernel auto-tuning, layer fusion, and mixed-precision natively — no framework wrapper. Our models (LiteSAM, XFeat) are CNN+transformer architectures where TRT's fusion optimizations are most impactful. LiteSAM's reparameterized MobileOne backbone (Fact #17) is particularly well-suited for TRT fusion. - -### Conclusion -Native TRT Engine provides the lowest possible inference latency on Jetson Orin Nano Super. ONNX Runtime adds measurable overhead, ranging from negligible to 3x depending on model architecture and configuration. For our latency-critical pipeline (400ms total budget, satellite matching target ≤200ms), every millisecond matters. - -### Confidence -✅ High — supported by multiple sources, confirmed NVIDIA optimization pipeline. - ---- - -## Dimension 2: Memory Consumption - -### Fact Confirmation -ONNX RT TRT-EP keeps ~420-440MB during execution vs native TRT at ~130-140MB (Fact #3). This is ~280-300MB extra PER MODEL. On our 8GB shared memory Jetson, OS+runtime takes ~1.5GB, cuVSLAM ~200-500MB, tiles ~200MB (Fact #19). - -### Reference Comparison -If we run both LiteSAM and XFeat via ONNX RT TRT-EP: ~560-600MB extra memory overhead. Via native TRT: this overhead drops to near zero. - -With native TRT: -- LiteSAM engine: ~50-80MB -- XFeat engine: ~30-50MB -With ONNX RT TRT-EP: -- LiteSAM: ~50-80MB + ~280MB overhead = ~330-360MB -- XFeat: ~30-50MB + ~280MB overhead = ~310-330MB - -### Conclusion -Native TRT saves ~280-300MB per model vs ONNX RT TRT-EP. On our 8GB shared memory device, this is 3.5-3.75% of total memory PER MODEL. With two models, that's ~7% of total memory saved — meaningful when memory pressure from cuVSLAM map growth is a known risk. - -### Confidence -✅ High — confirmed by MSFT developer with detailed explanation of mechanism. - ---- - -## Dimension 3: Deployment Workflow - -### Fact Confirmation -Native TRT requires: PyTorch → ONNX → trtexec → .engine file. Engine must be built ON the target Jetson device (Fact #9). Engine is tied to specific GPU model and TRT version. TRT engine build on 8GB Jetson can OOM for large models (Fact #16). - -### Reference Comparison -ONNX Runtime auto-builds TRT engine from ONNX at first run (or caches). Simpler developer experience but first-run latency spike. Torch-TensorRT (Fact #12) offers AOT compilation as middle ground. - -Our models are small (LiteSAM 6.31M params, XFeat even smaller). Engine build OOM is unlikely for our model sizes. Build once before flight, ship .engine files. - -### Conclusion -Native TRT requires an explicit offline build step (trtexec on Jetson), but this is a one-time cost per model version. For our use case (pre-flight preparation already includes satellite tile download), adding a TRT engine build to the preparation workflow is trivial. The deployment complexity is acceptable. - -### Confidence -✅ High — well-documented workflow, our model sizes are small enough. - ---- - -## Dimension 4: Operator Coverage / Fallback - -### Fact Confirmation -Native TRT fails if a model contains unsupported operators. ONNX RT TRT-EP auto-falls back to CUDA/CPU for unsupported ops (Fact #5). This is TRT-EP's primary value proposition. - -### Reference Comparison -LiteSAM (MobileOne + TAIFormer + MinGRU) and XFeat use standard operations: Conv2d, attention, GRU, ReLU, etc. These are all well-supported by TensorRT 10.3. MobileOne's reparameterized form is pure Conv2d+BN — trivially supported. TAIFormer attention uses standard softmax/matmul — supported in TRT 10. MinGRU is a simplified GRU — may need verification. - -Risk: If any op in LiteSAM is unsupported by TRT, the entire export fails. Mitigation: verify with polygraphy before deployment. If an op fails, refactor or use Torch-TensorRT which can handle mixed TRT/PyTorch execution. - -### Conclusion -For our specific models, operator coverage risk is LOW. Standard CNN+transformer ops are well-supported in TRT 10.3. ONNX RT's fallback benefit is insurance we're unlikely to need. MinGRU in LiteSAM should be verified, but standard GRU ops are TRT-supported. - -### Confidence -⚠️ Medium — high confidence for MobileOne+TAIFormer, medium for MinGRU (needs verification on TRT 10.3). - ---- - -## Dimension 5: API / Integration Effort - -### Fact Confirmation -Native TRT Python API (Fact #11): manual buffer allocation with PyCUDA, CUDA stream management, tensor setup via engine.get_tensor_name(). ONNX Runtime: simple InferenceSession with .run(). - -### Reference Comparison -TRT Python API requires ~30-50 lines of boilerplate per model (engine load, buffer allocation, inference loop). ONNX Runtime requires ~5-10 lines. However, this is write-once code, encapsulated in a wrapper class. - -Our pipeline already uses CUDA streams for cuVSLAM pipelining (Stream A for VO, Stream B for satellite matching). Adding TRT inference to Stream B is natural — just pass stream_handle to context.enqueue_v3(). - -### Conclusion -Slightly more code with native TRT, but it's boilerplate that gets written once and wrapped. The CUDA stream integration actually BENEFITS from native TRT — direct stream control enables better pipelining with cuVSLAM. - -### Confidence -✅ High — well-documented API, straightforward integration. - ---- - -## Dimension 6: Hardware Utilization - -### Fact Confirmation -ONNX RT CUDA EP does NOT use tensor cores on Jetson Orin Nano by default (Fact #1). Native TRT uses tensor cores, layer fusion, kernel auto-tuning automatically. Jetson Orin Nano Super has 16 tensor cores at 1020 MHz (Fact #7). No DLA available (Fact #8). - -### Reference Comparison -Since there's no DLA to offload to, GPU is our only accelerator. Maximizing GPU utilization is critical. Native TRT squeezes every ounce from the 16 tensor cores. ONNX RT has a known bug preventing this on our exact hardware. - -### Conclusion -Native TRT is the only way to guarantee full hardware utilization on Jetson Orin Nano Super. ONNX RT's tensor core issue (even if workaround exists) introduces fragility. Since we have no DLA, wasting GPU tensor cores is unacceptable. - -### Confidence -✅ High — hardware limitation is confirmed, no alternative accelerator. - ---- - -## Dimension 7: Cross-Platform Portability - -### Fact Confirmation -ONNX Runtime runs on NVIDIA, AMD, Intel, CPU. TRT engines are NVIDIA-specific and even GPU-model-specific (Fact #9). - -### Reference Comparison -Our system deploys ONLY on Jetson Orin Nano Super. The companion computer is fixed hardware. There is no requirement or plan to run on non-NVIDIA hardware. Cross-platform portability has zero value for this project. - -### Conclusion -ONNX Runtime's primary value proposition (portability) is irrelevant for our deployment. We trade unused portability for maximum performance and minimum memory usage. - -### Confidence -✅ High — deployment target is fixed hardware. diff --git a/_docs/00_research/trt_engine_migration/05_validation_log.md b/_docs/00_research/trt_engine_migration/05_validation_log.md deleted file mode 100644 index c09cdce..0000000 --- a/_docs/00_research/trt_engine_migration/05_validation_log.md +++ /dev/null @@ -1,65 +0,0 @@ -# Validation Log - -## Validation Scenario -Full GPS-Denied pipeline running on Jetson Orin Nano Super (8GB) during a 50km flight with ~1500 frames at 3fps. Two AI models active: LiteSAM for satellite matching (keyframes) and XFeat as fallback. cuVSLAM running continuously for VO. - -## Expected Based on Conclusions - -### If using Native TRT Engine: -- LiteSAM TRT FP16 engine loaded: ~50-80MB GPU memory after deserialization -- XFeat TRT FP16 engine loaded: ~30-50MB GPU memory after deserialization -- Total AI model memory: ~80-130MB -- Inference runs on CUDA Stream B, directly integrated with cuVSLAM Stream A pipelining -- Tensor cores fully utilized at 1020 MHz -- LiteSAM satellite matching at estimated ~165-330ms (TRT FP16 at 1280px) -- XFeat matching at estimated ~50-100ms (TRT FP16) -- Engine files pre-built during offline preparation, stored on Jetson storage alongside satellite tiles - -### If using ONNX Runtime TRT-EP: -- LiteSAM via TRT-EP: ~330-360MB during execution -- XFeat via TRT-EP: ~310-330MB during execution -- Total AI model memory: ~640-690MB -- First inference triggers engine build (latency spike at startup) -- CUDA stream management less direct -- Same inference speed (in theory, per MSFT claim) - -### Memory budget comparison (total 8GB): -- Native TRT: OS 1.5GB + cuVSLAM 0.5GB + tiles 0.2GB + models 0.13GB + misc 0.1GB = ~2.43GB (30% used) -- ONNX RT TRT-EP: OS 1.5GB + cuVSLAM 0.5GB + tiles 0.2GB + models 0.69GB + ONNX RT overhead 0.15GB + misc 0.1GB = ~3.14GB (39% used) -- Delta: ~710MB (9% of total memory) - -## Actual Validation Results -The memory savings from native TRT are confirmed by the mechanism explanation from MSFT (Source #2). The 710MB delta is significant given cuVSLAM map growth risk (up to 1GB on long flights without aggressive pruning). - -The workflow integration is validated: engine files can be pre-built as part of the existing offline tile preparation pipeline. No additional hardware or tools needed — trtexec is included in JetPack 6.2. - -## Counterexamples - -### Counterexample 1: MinGRU operator may not be supported in TRT -MinGRU is a simplified GRU variant used in LiteSAM's subpixel refinement. Standard GRU is supported in TRT 10.3, but MinGRU may use custom operations. If MinGRU fails TRT export, options: -1. Replace MinGRU with standard GRU (small accuracy loss) -2. Split model: CNN+TAIFormer in TRT, MinGRU refinement in PyTorch -3. Use Torch-TensorRT which handles mixed execution - -**Assessment**: Low risk. MinGRU is a simplification of GRU, likely uses subset of GRU ops. - -### Counterexample 2: Engine rebuild needed per TRT version update -JetPack updates may change TRT version, invalidating cached engines. Must rebuild all engines after JetPack update. - -**Assessment**: Acceptable. JetPack updates are infrequent on deployed UAVs. Engine rebuild takes minutes. - -### Counterexample 3: Dynamic input shapes -If camera resolution changes between flights, engine with static shapes must be rebuilt. Can use dynamic shapes in trtexec (--minShapes, --optShapes, --maxShapes) but at slight performance cost. - -**Assessment**: Acceptable. Camera resolution is fixed per deployment. Build engine for that resolution. - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions actionable/verifiable -- [x] Memory calculations verified against known budget -- [x] Workflow integration validated against existing offline preparation - -## Conclusions Requiring Revision -None — all conclusions hold under validation. diff --git a/_docs/01_solution/security_analysis.md b/_docs/01_solution/security_analysis.md deleted file mode 100644 index fee0fcd..0000000 --- a/_docs/01_solution/security_analysis.md +++ /dev/null @@ -1,346 +0,0 @@ -# Security Analysis - -## Operational Context - -This system runs on a UAV operating in a **conflict zone** (eastern Ukraine). The UAV could be shot down and physically captured. GPS denial/spoofing is the premise. The Jetson Orin Nano stores satellite imagery, flight plans, and captured photos. Security must assume the worst case: **physical access by an adversary**. - -## Threat Model - -### Asset Inventory - -| Asset | Sensitivity | Location | Notes | -|-------|------------|----------|-------| -| Captured camera imagery | HIGH | Jetson storage | Reconnaissance data — reveals what was surveyed | -| Satellite tile cache | MEDIUM | Jetson storage | Reveals operational area and areas of interest | -| Flight plan / route | HIGH | Jetson memory + storage | Reveals mission objectives and launch/landing sites | -| Computed GPS positions | HIGH | Jetson memory, SSE stream | Real-time position data of UAV and surveyed targets | -| Google Maps API key | MEDIUM | Offline prep machine only | Used pre-flight, NOT stored on Jetson | -| TensorRT model weights | LOW | Jetson storage | LiteSAM/XFeat — publicly available models | -| cuVSLAM binary | LOW | Jetson storage | NVIDIA proprietary but freely distributed | -| IMU calibration data | LOW | Jetson storage | Device-specific calibration | -| System configuration | MEDIUM | Jetson storage | API endpoints, tile paths, fusion parameters | - -### Threat Actors - -| Actor | Capability | Motivation | Likelihood | -|-------|-----------|------------|------------| -| **Adversary military (physical capture)** | Full physical access after UAV loss | Extract intelligence: imagery, flight plans, operational area | HIGH | -| **Electronic warfare unit** | GPS spoofing/jamming, RF jamming | Disrupt navigation, force UAV off course | HIGH (GPS denial is the premise) | -| **Network attacker (ground station link)** | Intercept/inject on UAV-to-ground comms | Steal position data, inject false commands | MEDIUM | -| **Insider / rogue operator** | Authorized access to system | Data exfiltration, mission sabotage | LOW | -| **Supply chain attacker** | Tampered satellite tiles or model weights | Feed corrupted reference data → position errors | LOW | - -### Attack Vectors - -| Vector | Target Asset | Actor | Impact | Likelihood | -|--------|-------------|-------|--------|------------| -| **Physical extraction of storage** | All stored data | Adversary (capture) | Full intelligence compromise | HIGH | -| **GPS spoofing** | Position estimate | EW unit | Already mitigated — system is GPS-denied by design | N/A | -| **IMU acoustic injection** | IMU data → ESKF | EW unit | Drift injection, subtle position errors | LOW | -| **Camera blinding/spoofing** | VO + satellite matching | EW unit | VO failure, incorrect satellite matches | LOW | -| **Adversarial ground patterns** | Satellite matching | Adversary | Physical patches on ground fool feature matching | VERY LOW | -| **SSE stream interception** | Position data | Network attacker | Real-time position leak | MEDIUM | -| **API command injection** | Flight session control | Network attacker | Start/stop/manipulate sessions | MEDIUM | -| **Corrupted satellite tiles** | Satellite matching | Supply chain | Systematic position errors | LOW | -| **Model weight tampering** | Matching accuracy | Supply chain | Degraded matching → higher drift | LOW | - -## Per-Component Security Requirements and Controls - -### 1. Data at Rest (Jetson Storage) - -| Requirement | Risk Level | Control | Implementation | -|-------------|-----------|---------|----------------| -| Protect captured imagery from extraction after capture | CRITICAL | Full-disk encryption (LUKS) | JetPack LUKS support with `ENC_ROOTFS=1`. Use OP-TEE Trusted Application for key management. Modify `luks-srv` to NOT auto-decrypt — require hardware token or secure erase trigger | -| Protect satellite tiles and flight plans | HIGH | Same LUKS encryption | Included in full-disk encryption scope | -| Enable rapid secure erase on capture/crash | CRITICAL | Tamper-triggered wipe | Hardware dead-man switch: if UAV telemetry lost for N seconds OR accelerometer detects crash impact → trigger `cryptsetup luksErase` on all LUKS volumes. Destroys key material in <1 second — data becomes unrecoverable | -| Prevent cold-boot key extraction | HIGH | Minimize key residency in RAM | ESKF state and position history cleared from memory when session ends. Avoid writing position logs to disk unless encrypted | - -### 2. Secure Boot - -| Requirement | Risk Level | Control | Implementation | -|-------------|-----------|---------|----------------| -| Prevent unauthorized code execution | HIGH | NVIDIA Secure Boot with PKC fuse burning | Burn `SecurityMode` fuse (odm_production_mode=0x1) on production Jetsons. Sign all boot images with PKC key pair. Generate keys via HSM | -| Prevent firmware rollback | MEDIUM | Ratchet fuses | Configure anti-rollback fuses in fuse configuration XML | -| Debug port lockdown | HIGH | Disable JTAG/debug after production | Burn debug-disable fuses. Irreversible — production units only | - -### 3. API & Communication (FastAPI + SSE) - -| Requirement | Risk Level | Control | Implementation | -|-------------|-----------|---------|----------------| -| Authenticate API clients | HIGH | JWT bearer token | Pre-shared secret between ground station and Jetson. Generate JWT at session start. Short expiry (flight duration). `HTTPBearer` scheme in FastAPI | -| Encrypt SSE stream | HIGH | TLS 1.3 | Uvicorn with TLS certificate (self-signed for field use, pre-installed on ground station). All SSE position data encrypted in transit | -| Prevent unauthorized session control | HIGH | JWT + endpoint authorization | Session start/stop/anchor endpoints require valid JWT. Rate-limit via `slowapi` | -| Prevent replay attacks | MEDIUM | JWT `exp` + `jti` claims | Token expiry per-flight. Unique token ID (`jti`) tracked to prevent reuse | -| Limit API surface | MEDIUM | Minimal endpoint exposure | Only expose: POST /sessions, GET /sessions/{id}/stream (SSE), POST /sessions/{id}/anchor, DELETE /sessions/{id}. No admin/debug endpoints in production | - -### 4. Visual Odometry (cuVSLAM) - -| Requirement | Risk Level | Control | Implementation | -|-------------|-----------|---------|----------------| -| Detect camera feed tampering | LOW | Sanity checks on frame consistency | If consecutive frames show implausible motion (>500m displacement at 3fps), flag as suspicious. ESKF covariance spike triggers satellite re-localization | -| Protect against VO poisoning | LOW | Cross-validate VO with IMU | ESKF fusion inherently cross-validates: IMU and VO disagreement raises covariance, triggers satellite matching. No single sensor can silently corrupt position | - -### 5. Satellite Image Matching - -| Requirement | Risk Level | Control | Implementation | -|-------------|-----------|---------|----------------| -| Verify tile integrity | MEDIUM | SHA-256 checksums per tile | During offline preprocessing: compute SHA-256 for each tile pair. Store checksums in signed manifest. At runtime: verify checksum on tile load | -| Prevent adversarial tile injection | MEDIUM | Signed tile manifest | Offline tool signs manifest with private key. Jetson verifies signature with embedded public key before accepting tile set | -| Detect satellite match outliers | MEDIUM | RANSAC inlier ratio threshold | If RANSAC inlier ratio <30%, reject match as unreliable. ESKF treats as no-measurement rather than bad measurement | -| Protect against ground-based adversarial patterns | VERY LOW | Multi-tile consensus | Match against multiple overlapping tiles. Physical adversarial patches affect local area — consensus voting across tiles detects anomalies | - -### 6. Sensor Fusion (ESKF) - -| Requirement | Risk Level | Control | Implementation | -|-------------|-----------|---------|----------------| -| Prevent single-sensor corruption of position | HIGH | Adaptive noise + outlier rejection | Mahalanobis distance test on each measurement. Reject updates >5σ from predicted state. No single measurement can cause >50m position jump | -| Detect systematic drift | MEDIUM | Satellite matching rate monitoring | If satellite matches consistently disagree with VO by >100m, flag integrity warning to operator | -| Protect fusion state | LOW | In-memory only, no persistence | ESKF state never written to disk. Lost on power-off — no forensic recovery | - -### 7. Offline Preprocessing (Developer Machine) - -| Requirement | Risk Level | Control | Implementation | -|-------------|-----------|---------|----------------| -| Protect Google Maps API key | MEDIUM | Environment variable, never in code | `.env` file excluded from version control. API key used only on developer machine, never deployed to Jetson | -| Validate downloaded tiles | LOW | Source verification | Download only from Google Maps Tile API via HTTPS. Verify TLS certificate chain | -| Secure tile transfer to Jetson | MEDIUM | Signed + encrypted transfer | Transfer tile set + signed manifest via encrypted channel (SCP/SFTP). Verify manifest signature on Jetson before accepting | - -## Security Controls Summary - -### Authentication & Authorization - -- **Mechanism**: Pre-shared JWT secret between ground station and Jetson -- **Scope**: All API endpoints require valid JWT bearer token -- **Session model**: One JWT per flight session, expires at session end -- **No user management on Jetson** — single-operator system, auth is device-to-device - -### Data Protection - -| State | Protection | Tool | -|-------|-----------|------| -| At rest (Jetson storage) | LUKS full-disk encryption | JetPack LUKS + OP-TEE | -| In transit (SSE stream) | TLS 1.3 | Uvicorn SSL | -| In memory (ESKF state) | No persistence, cleared on session end | Application logic | -| On capture (emergency) | Tamper-triggered LUKS key erase | Hardware dead-man switch + `cryptsetup luksErase` | - -### Secure Communication - -- Ground station ↔ Jetson: TLS 1.3 (self-signed cert, pre-installed) -- No internet connectivity during flight — no external attack surface -- RF link security is out of scope (handled by UAV communication system) - -### Logging & Monitoring - -| What | Where | Retention | -|------|-------|-----------| -| API access logs (request count, errors) | In-memory ring buffer | Current session only, not persisted | -| Security events (auth failures, integrity warnings) | In-memory + SSE alert to operator | Current session only | -| Position history | In-memory for refinement, SSE to ground station | NOT persisted on Jetson after session end | -| Crash/tamper events | Trigger secure erase, no logging | N/A — priority is data destruction | - -**Design principle**: Minimize data persistence on Jetson. The ground station is the system of record. Jetson stores only what's needed for the current flight — satellite tiles (encrypted at rest) and transient processing state (memory only). - -## Protected Code Execution (OP-TEE / ARM TrustZone) - -### Overview - -The Jetson Orin Nano Super supports hardware-enforced protected code execution via **ARM TrustZone** and **OP-TEE v4.2.0** (included in Jetson Linux 36.3+). TrustZone partitions the processor into two isolated worlds: - -- **Secure World** (TEE): Runs at ARMv8 secure EL-1 (OS) and EL-0 (apps). Code here cannot be read or tampered with from the normal world. OP-TEE is the secure OS. -- **Normal World**: Standard Linux (JetPack). Our Python application, cuVSLAM, FastAPI all run here. - -Trusted Applications (TAs) execute inside the secure world and are invoked by Client Applications (CAs) in the normal world via the GlobalPlatform TEE Client API. - -### Architecture on Jetson Orin Nano - -``` -┌─────────────────────────────────────────────────┐ -│ NORMAL WORLD (Linux / JetPack) │ -│ │ -│ Client Application (CA) │ -│ ↕ libteec.so (TEE Client API) │ -│ ↕ OP-TEE Linux Kernel Driver │ -│ ↕ ARM Trusted Firmware (ATF) / Monitor │ -├─────────────────────────────────────────────────┤ -│ SECURE WORLD (OP-TEE v4.2.0) │ -│ │ -│ OP-TEE OS (ARMv8 S-EL1) │ -│ ├── jetson-user-key PTA (key management) │ -│ ├── luks TA (disk encryption passphrase) │ -│ ├── hwkey-agent TA (encrypt/decrypt data) │ -│ ├── PKCS #11 TA (crypto token interface) │ -│ └── Custom TAs (our application-specific TAs) │ -│ │ -│ Hardware: Security Engine (SE), HW RNG, Fuses │ -│ TZ-DRAM: Dedicated memory carveout │ -└─────────────────────────────────────────────────┘ -``` - -### Key Hierarchy (Hardware-Backed) - -The Jetson Orin Nano provides a hardware-rooted key hierarchy via the Security Engine (SE) and Encrypted Key Blob (EKB): - -``` -OEM_K1 fuse (256-bit AES, burned into hardware, cannot be read by software) - │ - ├── EKB_RK (EKB Root Key, derived via AES-128-ECB from OEM_K1 + FV) - │ ├── EKB_EK (encryption key for EKB content) - │ └── EKB_AK (authentication key for EKB content) - │ - ├── HUK (Hardware Unique Key, per-device, derived via NIST-SP-800-108) - │ └── SSK (Secure Storage Key, per-device, generated at OP-TEE boot) - │ ├── TSK (TA Storage Key, per-TA) - │ └── FEK (File Encryption Key, per-file) - │ - └── LUKS passphrase (derived from disk encryption key stored in EKB) -``` - -Fuse keys are loaded into SE keyslots during early boot (before OP-TEE starts). Software cannot read keys from keyslots — only derive new keys through the SE. After use, keyslots should be cleared via `tegra_se_clear_aes_keyslots()`. - -### What to Run in the Secure World (Our Use Cases) - -| Use Case | TA Type | Purpose | -|----------|---------|---------| -| **LUKS disk encryption** | Built-in `luks` TA | Generate one-time passphrase at boot to unlock encrypted rootfs. Keys never leave secure world | -| **Tile manifest verification** | Custom User TA | Verify SHA-256 signatures of satellite tile manifests. Signing key stored in EKB, accessible only in secure world | -| **JWT secret storage** | Custom User TA or `hwkey-agent` TA | Store JWT signing secret in EKB. Sign/verify JWTs inside secure world — secret never exposed to Linux | -| **Secure erase trigger** | Custom User TA | Receive tamper signal → invoke `cryptsetup luksErase` via CA. Key erase logic runs in secure world to prevent normal-world interference | -| **TLS private key protection** | PKCS #11 TA | Store TLS private key in OP-TEE secure storage. Uvicorn uses PKCS #11 interface to perform TLS handshake without key leaving secure world | - -### How to Enable Protected Code Execution - -#### Step 1: Burn OEM_K1 Fuse (One-Time, Irreversible) - -```bash -# Generate 256-bit OEM_K1 key (use HSM in production) -openssl rand -hex 32 > oem_k1_key.txt - -# Create fuse configuration XML with OEM_K1 -# Burn fuse via odmfuse.sh (IRREVERSIBLE) -sudo ./odmfuse.sh -i -``` - -After burning `SecurityMode` fuse (`odm_production_mode=0x1`), all further fuse writes are blocked. OEM_K1 becomes permanently embedded in hardware. - -#### Step 2: Generate and Flash EKB - -```bash -# Generate user keys (disk encryption key, JWT secret, tile signing key) -openssl rand -hex 16 > disk_enc_key.txt -openssl rand -hex 32 > jwt_secret.txt -openssl rand -hex 32 > tile_signing_key.txt - -# Generate EKB binary -python3 gen_ekb.py -chip t234 \ - -oem_k1_key oem_k1_key.txt \ - -in_sym_key uefi_enc_key.txt \ - -in_sym_key2 disk_enc_key.txt \ - -in_auth_key uefi_var_auth_key.txt \ - -out eks_t234.img - -# Flash EKB to EKS partition -# (part of the normal flash process with secure boot enabled) -``` - -#### Step 3: Enable LUKS Disk Encryption - -```bash -# During flash, set ENC_ROOTFS=1 to encrypt rootfs -export ENC_ROOTFS=1 -sudo ./flash.sh -``` - -The `luks` TA in OP-TEE derives a passphrase from the disk encryption key in EKB at boot. The passphrase is generated inside the secure world and passed to `cryptsetup` — it never exists in persistent storage. - -#### Step 4: Develop Custom Trusted Applications - -Cross-compile TAs for aarch64 using the Jetson OP-TEE source package: - -```bash -# Build custom TA (e.g., tile manifest verifier) -make -C \ - CROSS_COMPILE="/bin/aarch64-buildroot-linux-gnu-" \ - TA_DEV_KIT_DIR="/optee/build/t234/export-ta_arm64/" \ - OPTEE_CLIENT_EXPORT="/optee/install/t234/usr" \ - TEEC_EXPORT="/optee/install/t234/usr" \ - -j"$(nproc)" - -# Deploy: copy TA to /lib/optee_armtz/ on Jetson -# Deploy: copy CA to /usr/sbin/ on Jetson -``` - -TAs conform to the GlobalPlatform TEE Internal Core API. Use the `hello_world` example from `optee_examples` as a starting template. - -#### Step 5: Enable Secure Boot - -```bash -# Generate PKC key pair (use HSM for production) -openssl genrsa -out pkc_key.pem 3072 - -# Sign and flash secured images -sudo ./flash.sh --sign pkc_key.pem - -# After verification, burn SecurityMode fuse (IRREVERSIBLE) -``` - -### Available Crypto Services in Secure World - -| Service | Provider | Notes | -|---------|----------|-------| -| AES-128/256 encryption/decryption | SE hardware | Via keyslot-derived keys, never leaves SE | -| Key derivation (NIST-SP-800-108) | `jetson-user-key` PTA | Derive purpose-specific keys from EKB keys | -| Hardware RNG | SE hardware | `TEE_GenerateRandom()` or PTA command | -| PKCS #11 crypto tokens | PKCS #11 TA | Standard crypto interface for TLS, signing | -| SHA-256, HMAC | MbedTLS (bundled in optee_os) | Software crypto in secure world | -| RSA/ECC signing | GlobalPlatform TEE Crypto API | For manifest signature verification | - -### Limitations on Orin Nano - -| Limitation | Impact | Workaround | -|-----------|--------|------------| -| No RPMB support (only AGX Orin has RPMB) | Secure storage uses REE FS instead of replay-protected memory | Acceptable — LUKS encryption protects data at rest. REE FS secure storage is encrypted by SSK | -| EKB can only be updated via OTA, not at runtime | Cannot rotate keys in flight | Pre-provision per-device unique keys at manufacturing time | -| OP-TEE hello_world reported issues on some Orin Nano units | Some users report initialization failures | Use JetPack 6.2.2+ which includes fixes. Test thoroughly on target hardware | -| TZ-DRAM is a fixed carveout | Limits secure world memory | Keep TAs lightweight — only crypto operations and key management, not data processing | - -### Security Hardening Checklist - -- [ ] Burn OEM_K1 fuse with unique per-device key (via HSM) -- [ ] Generate and flash EKB with disk encryption key, JWT secret, tile signing key -- [ ] Enable LUKS full-disk encryption (`ENC_ROOTFS=1`) -- [ ] Modify `luks-srv` to NOT auto-decrypt (require explicit trigger or dead-man switch) -- [ ] Burn SecurityMode fuse (`odm_production_mode=0x1`) — enables secure boot chain -- [ ] Burn debug-disable fuses — disables JTAG -- [ ] Configure anti-rollback ratchet fuses -- [ ] Clear SE keyslots after EKB extraction via `tegra_se_clear_aes_keyslots()` -- [ ] Deploy custom TAs for JWT signing and tile manifest verification -- [ ] Use PKCS #11 for TLS private key protection -- [ ] Test secure erase trigger end-to-end -- [ ] Run `xtest` (OP-TEE test suite) on production Jetson to validate TEE - -## Key Security Risks - -| Risk | Severity | Mitigation Status | -|------|---------|-------------------| -| Physical capture → data extraction | CRITICAL | Mitigated by LUKS + secure erase. Residual risk: attacker extracts RAM before erase triggers | -| No auto-decrypt bypass for LUKS | HIGH | Requires custom `luks-srv` modification — development effort needed | -| Self-signed TLS certificates | MEDIUM | Acceptable for field deployment. Certificate pinning on ground station prevents MITM | -| cuVSLAM is closed-source | LOW | Cannot audit for vulnerabilities. Mitigated by running in sandboxed environment, input validation on camera frames | -| Dead-man switch reliability | HIGH | Hardware integration required. False triggers (temporary signal loss) must NOT cause premature erase. Needs careful threshold tuning | - -## References -- xT-STRIDE threat model for UAVs (2025): https://link.springer.com/article/10.1007/s10207-025-01082-4 -- NVIDIA Jetson OP-TEE Documentation (r36.4.4): https://docs.nvidia.com/jetson/archives/r36.4.4/DeveloperGuide/SD/Security/OpTee.html -- NVIDIA Jetson Security Overview (r36.4.3): https://docs.nvidia.com/jetson/archives/r36.4.3/DeveloperGuide/SD/Security.html -- NVIDIA Jetson LUKS Disk Encryption: https://docs.nvidia.com/jetson/archives/r36.4.4/DeveloperGuide/SD/Security/DiskEncryption.html -- NVIDIA Jetson Secure Boot: https://docs.nvidia.com/jetson/archives/r36.4.4/DeveloperGuide/SD/Security/SecureBoot.html -- NVIDIA Jetson Secure Storage: https://docs.nvidia.com/jetson/archives/r36.4.3/DeveloperGuide/SD/Security/SecureStorage.html -- NVIDIA Jetson Firmware TPM: https://docs.nvidia.com/jetson/archives/r36.4.3/DeveloperGuide/SD/Security/FirmwareTPM.html -- NVIDIA Jetson Rollback Protection: https://docs.nvidia.com/jetson/archives/r36.4.3/DeveloperGuide/SD/Security/RollbackProtection.html -- Jetson Orin Fuse Specification: https://developer.nvidia.com/downloads/jetson-agx-orin-series-fuse-specification -- OP-TEE Official Documentation: https://optee.readthedocs.io/en/latest/ -- OP-TEE Trusted Application Examples: https://github.com/linaro-swg/optee_examples -- RidgeRun OP-TEE on Jetson Guide: https://developer.ridgerun.com/wiki/index.php/RidgeRun_Platform_Security_Manual/Getting_Started/TEE/NVIDA-Jetson -- GlobalPlatform TEE Specifications: https://globalplatform.org/specs-library/?filter-committee=tee -- Model Agnostic Defense against Adversarial Patches on UAVs (2024): https://arxiv.org/html/2405.19179v1 -- FastAPI Security Best Practices (2026): https://fastlaunchapi.dev/blog/fastapi-best-practices-production-2026 diff --git a/_docs/01_solution/solution.md b/_docs/01_solution/solution.md deleted file mode 100644 index 5438fe7..0000000 --- a/_docs/01_solution/solution.md +++ /dev/null @@ -1,622 +0,0 @@ -# Solution Draft - -## Assessment Findings - - -| Old Component Solution | Weak Point (functional/security/performance) | New Solution | -| ------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| ESKF described as "16-state vector, ~10MB" with no mathematical specification | **Functional**: No state vector, no process model (F,Q), no measurement models (H for VO, H for satellite), no noise parameters, no scale observability analysis. Impossible to implement or validate accuracy claims. | **Define complete ESKF specification**: 15-state error vector, IMU-driven prediction, dual measurement models (VO relative pose, satellite absolute position), initial Q/R values, scale constraint via altitude + satellite corrections. | -| GPS_INPUT at 5-10Hz via pymavlink — no field mapping | **Functional**: GPS_INPUT requires 15+ fields (velocity, accuracy, hdop, fix_type, GPS time). No specification of how ESKF state maps to these fields. ArduPilot requires minimum 5Hz. | **Define GPS_INPUT population spec**: velocity from ESKF, accuracy from covariance, fix_type from confidence tier, GPS time from system clock conversion, synthesized hdop/vdop. | -| Confidence scoring "unchanged from draft03" — not in draft05 | **Functional**: Draft05 is supposed to be self-contained. Confidence scoring determines GPS_INPUT accuracy fields and fix_type — directly affects how ArduPilot EKF weights the position data. | **Define confidence scoring inline**: 3 tiers (satellite-anchored, VO-tracked, IMU-only) mapping to fix_type + accuracy values. | -| Coordinate transformations not defined | **Functional**: No pixel→camera→body→NED→WGS84 chain. Camera is not autostabilized, so body attitude matters. Satellite match → WGS84 conversion undefined. Object localization impossible without these transforms. | **Define coordinate transformation chain**: camera intrinsics K, camera-to-body extrinsic T_cam_body, body-to-NED from ESKF attitude, NED origin at mission start point. | -| Disconnected route segments — "satellite re-localization" mentioned but no algorithm | **Functional**: AC requires handling as "core to the system." Multiple disconnected segments expected. No tracking-loss detection, no re-localization trigger, no ESKF re-initialization, no cuVSLAM restart procedure. | **Define re-localization pipeline**: detect cuVSLAM tracking loss → IMU-only ESKF prediction → trigger satellite match on every frame → on match success: ESKF position reset + cuVSLAM restart → on 3 consecutive failures: operator re-localization request. | -| No startup handoff from GPS to GPS-denied | **Functional**: System reads GLOBAL_POSITION_INT at startup but no protocol for when GPS is lost/spoofed vs system start. No validation of initial position. | **Define handoff protocol**: system runs continuously, FC receives both real GPS and GPS_INPUT. GPS-denied system always provides its estimate; FC selects best source. Initial position validated against first satellite match. | -| No mid-flight reboot recovery | **Functional**: AC requires: "re-initialize from flight controller's current IMU-extrapolated position." No procedure defined. Recovery time estimation missing. | **Define reboot recovery sequence**: read FC position → init ESKF with high uncertainty → load TRT engines → start cuVSLAM → immediate satellite match. Estimated recovery: ~35-70s. Document as known limitation. | -| 3-consecutive-failure re-localization request undefined | **Functional**: AC requires ground station re-localization request. No message format, no operator workflow, no system behavior while waiting. | **Define re-localization protocol**: detect 3 failures → send custom MAVLink message with last known position + uncertainty → operator provides approximate coordinates → system uses as ESKF measurement with high covariance. | -| Object localization — "trigonometric calculation" with no details | **Functional**: No math, no API, no Viewpro gimbal integration, no accuracy propagation. Other onboard systems cannot use this component as specified. | **Define object localization**: pixel→ray using Viewpro intrinsics + gimbal angles → body frame → NED → ray-ground intersection → WGS84. FastAPI endpoint: POST /objects/locate. Accuracy propagated from UAV position + gimbal uncertainty. | -| Satellite matching — GSD normalization and tile selection unspecified | **Functional**: Camera GSD ~15.9 cm/px at 600m vs satellite ~0.3 m/px at zoom 19. The "pre-resize" step is mentioned but not specified. Tile selection radius based on ESKF uncertainty not defined. | **Define GSD handling**: downsample camera frame to match satellite GSD. Define tile selection: ESKF position ± 3σ_horizontal → select tiles covering that area. Assemble tile mosaic for matching. | -| Satellite tile storage requirements not calculated | **Functional**: "±2km" preload mentioned but no storage estimate. At zoom 19: a 200km path with ±2km buffer requires ~~130K tiles (~~2.5GB). | **Calculate tile storage**: specify zoom level (18 preferred — 0.6m/px, 4× fewer tiles), estimate storage per mission profile, define maximum mission area by storage limit. | -| FastAPI endpoints not in solution draft | **Functional**: Endpoints only in security_analysis.md. No request/response schemas. No SSE event format. No object localization endpoint. | **Consolidate API spec in solution**: define all endpoints, SSE event schema, object localization endpoint. Reference security_analysis.md for auth. | -| cuVSLAM configuration missing (calibration, IMU params, mode) | **Functional**: No camera calibration procedure, no IMU noise parameters, no T_imu_rig extrinsic, no mode selection (Mono vs Inertial). | **Define cuVSLAM configuration**: use Inertial mode, specify required calibration data (camera intrinsics, distortion, IMU noise params from datasheet, T_imu_rig from physical measurement), define calibration procedure. | -| tech_stack.md inconsistent with draft05 | **Functional**: tech_stack.md says 3fps (should be 0.7fps), LiteSAM at 480px (should be 1280px), missing EfficientLoFTR. | **Flag for update**: tech_stack.md must be synchronized with draft05 corrections. Not addressed in this draft — separate task. | - - -## Overall Maturity Assessment - - -| Category | Maturity (1-5) | Assessment | -| ----------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------- | -| Hardware & Platform Selection | 3.5 | UAV airframe, cameras, Jetson, batteries — well-researched with specs, weight budget, endurance calculations. Ready for procurement. | -| Core Algorithm Selection | 3.0 | cuVSLAM, LiteSAM/XFeat, ESKF — components selected with comparison tables, fallback chains, decision trees. Day-one benchmarks defined. | -| AI Inference Runtime | 3.5 | TRT Engine migration thoroughly analyzed. Conversion workflows, memory savings, performance estimates. Code wrapper provided. | -| Sensor Fusion (ESKF) | 1.5 | Mentioned but not specified. No implementable detail. Blockerfor coding. | -| System Integration | 1.5 | GPS_INPUT, coordinate transforms, inter-component data flow — all under-specified. | -| Edge Cases & Resilience | 1.0 | Disconnected segments, reboot recovery, re-localization — acknowledged but no algorithms. | -| Operational Readiness | 0.5 | No pre-flight procedures, no in-flight monitoring, no failure response. | -| Security | 3.0 | Comprehensive threat model, OP-TEE analysis, LUKS, secure boot. Well-researched. | -| **Overall TRL** | **~2.5** | **Technology concept formulated + some component validation. Not implementation-ready.** | - - -The solution is at approximately **TRL 3** (proof of concept) for hardware/algorithm selection and **TRL 1-2** (basic concept) for system integration, ESKF, and operational procedures. - -## Product Solution Description - -A real-time GPS-denied visual navigation system for fixed-wing UAVs, running on a Jetson Orin Nano Super (8GB). All AI model inference uses native TensorRT Engine files. The system replaces the GPS module by sending MAVLink GPS_INPUT messages via pymavlink over UART at 5-10Hz. - -Position is determined by fusing: (1) CUDA-accelerated visual odometry (cuVSLAM in Inertial mode) from ADTI 20L V1 at 0.7 fps sustained, (2) absolute position corrections from satellite image matching (LiteSAM or XFeat — TRT Engine FP16) using keyframes from the same ADTI image stream, and (3) IMU data from the flight controller via ESKF. Viewpro A40 Pro is reserved for AI object detection only. - -The ESKF is the central state estimator with 15-state error vector. It fuses: - -- **IMU prediction** at 5-10Hz (high-frequency pose propagation) -- **cuVSLAM VO measurement** at 0.7Hz (relative pose correction) -- **Satellite matching measurement** at ~0.07-0.14Hz (absolute position correction) - -GPS_INPUT messages carry position, velocity, and accuracy derived from the ESKF state and covariance. - -**Hard constraint**: ADTI 20L V1 shoots at 0.7 fps sustained (1430ms interval). Full VO+ESKF pipeline within 400ms per frame. Satellite matching async on keyframes (every 5-10 camera frames). GPS_INPUT at 5-10Hz (ESKF IMU prediction fills gaps between camera frames). - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ OFFLINE (Before Flight) │ -│ 1. Satellite Tiles → Download & Validate → Pre-resize → Store │ -│ (Google Maps) (≥0.5m/px, <2yr) (matcher res) (GeoHash)│ -│ 2. TRT Engine Build (one-time per model version): │ -│ PyTorch model → reparameterize → ONNX export → trtexec --fp16 │ -│ Output: litesam.engine, xfeat.engine │ -│ 3. Camera + IMU calibration (one-time per hardware unit) │ -│ 4. Copy tiles + engines + calibration to Jetson storage │ -└─────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ ONLINE (During Flight) │ -│ │ -│ STARTUP: │ -│ 1. pymavlink → read GLOBAL_POSITION_INT → init ESKF state │ -│ 2. Load TRT engines + allocate GPU buffers │ -│ 3. Load camera calibration + IMU calibration │ -│ 4. Start cuVSLAM (Inertial mode) with ADTI 20L V1 │ -│ 5. Preload satellite tiles ±2km into RAM │ -│ 6. First satellite match → validate initial position │ -│ 7. Begin GPS_INPUT output loop at 5-10Hz │ -│ │ -│ EVERY CAMERA FRAME (0.7fps from ADTI 20L V1): │ -│ ┌──────────────────────────────────────┐ │ -│ │ ADTI 20L V1 → Downsample (CUDA) │ │ -│ │ → cuVSLAM VO+IMU (~9ms) │ ← CUDA Stream A │ -│ │ → ESKF VO measurement │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ 5-10Hz CONTINUOUS (IMU-driven between camera frames): │ -│ ┌──────────────────────────────────────┐ │ -│ │ IMU data → ESKF prediction │ │ -│ │ ESKF state → GPS_INPUT fields │ │ -│ │ GPS_INPUT → Flight Controller (UART) │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ KEYFRAMES (every 5-10 camera frames, async): │ -│ ┌──────────────────────────────────────┐ │ -│ │ Camera frame → GSD downsample │ │ -│ │ Select satellite tile (ESKF pos±3σ) │ │ -│ │ TRT inference (Stream B): LiteSAM/ │ │ -│ │ XFeat → correspondences │ │ -│ │ RANSAC → homography → WGS84 position │ │ -│ │ ESKF satellite measurement update │──→ Position correction │ -│ └──────────────────────────────────────┘ │ -│ │ -│ TRACKING LOSS (cuVSLAM fails — sharp turn / featureless): │ -│ ┌──────────────────────────────────────┐ │ -│ │ ESKF → IMU-only prediction (growing │ │ -│ │ uncertainty) │ │ -│ │ Satellite match on EVERY frame │ │ -│ │ On match success → ESKF reset + │ │ -│ │ cuVSLAM restart │ │ -│ │ 3 consecutive failures → operator │ │ -│ │ re-localization request │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ TELEMETRY (1Hz): │ -│ ┌──────────────────────────────────────┐ │ -│ │ NAMED_VALUE_FLOAT: confidence, drift │──→ Ground Station │ -│ └──────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -## Architecture - -### Component: ESKF Sensor Fusion (NEW — previously unspecified) - -**Error-State Kalman Filter** fusing IMU, visual odometry, and satellite matching. - -**Nominal state vector** (propagated by IMU): - - -| State | Symbol | Size | Description | -| ---------- | ------ | ---- | ------------------------------------------------ | -| Position | p | 3 | NED position relative to mission origin (meters) | -| Velocity | v | 3 | NED velocity (m/s) | -| Attitude | q | 4 | Unit quaternion (body-to-NED rotation) | -| Accel bias | b_a | 3 | Accelerometer bias (m/s²) | -| Gyro bias | b_g | 3 | Gyroscope bias (rad/s) | - - -**Error-state vector** (estimated by ESKF): δx = [δp, δv, δθ, δb_a, δb_g]ᵀ ∈ ℝ¹⁵ -where δθ ∈ so(3) is the 3D rotation error. - -**Prediction step** (IMU at 5-10Hz from flight controller): - -- Input: accelerometer a_m, gyroscope ω_m, dt -- Propagate nominal state: p += v·dt, v += (R(q)·(a_m - b_a) - g)·dt, q ⊗= Exp(ω_m - b_g)·dt -- Propagate error covariance: P = F·P·Fᵀ + Q -- F is the 15×15 error-state transition matrix (standard ESKF formulation) -- Q: process noise diagonal, initial values from IMU datasheet noise densities - -**VO measurement update** (0.7Hz from cuVSLAM): - -- cuVSLAM outputs relative pose: ΔR, Δt (camera frame) -- Transform to NED: Δp_ned = R_body_ned · T_cam_body · Δt -- Innovation: z = Δp_ned_measured - Δp_ned_predicted -- Observation matrix H_vo maps error state to relative position change -- R_vo: measurement noise, initial ~0.1-0.5m (from cuVSLAM precision at 600m+ altitude) -- Kalman update: K = P·Hᵀ·(H·P·Hᵀ + R)⁻¹, δx = K·z, P = (I - K·H)·P - -**Satellite measurement update** (0.07-0.14Hz, async): - -- Satellite matching outputs absolute position: lat_sat, lon_sat in WGS84 -- Convert to NED relative to mission origin -- Innovation: z = p_satellite - p_predicted -- H_sat = [I₃, 0, 0, 0, 0] (directly observes position) -- R_sat: measurement noise, from matching confidence (~5-20m based on RANSAC inlier ratio) -- Provides absolute position correction — bounds drift accumulation - -**Scale observability**: - -- Monocular cuVSLAM has scale ambiguity during constant-velocity flight -- Scale is constrained by: (1) satellite matching absolute positions (primary), (2) known flight altitude from barometer + predefined mission altitude, (3) IMU accelerometer during maneuvers -- During long straight segments without satellite correction, scale drift is possible. Satellite corrections every ~7-14s re-anchor scale. - -**Tuning approach**: Start with IMU datasheet noise values for Q. Start with conservative R values (high measurement noise). Tune on flight test data by comparing ESKF output to known GPS ground truth. - - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -| -------------------------- | --------------- | ------------------------------------------------------------- | -------------------------------------- | ------------- | ----------- | -| Custom ESKF (Python/NumPy) | NumPy, SciPy | Full control, minimal dependencies, well-understood algorithm | Implementation effort, tuning required | <1ms per step | ✅ Selected | -| FilterPy ESKF | FilterPy v1.4.5 | Reference implementation, less code | Less flexible for multi-rate fusion | <1ms per step | ⚠️ Fallback | - - -### Component: Coordinate System & Transformations (NEW — previously undefined) - -**Reference frames**: - -- **Camera frame (C)**: origin at camera optical center, Z forward, X right, Y down (OpenCV convention) -- **Body frame (B)**: origin at UAV CG, X forward (nose), Y right (starboard), Z down -- **NED frame (N)**: North-East-Down, origin at mission start point -- **WGS84**: latitude, longitude, altitude (output format) - -**Transformation chain**: - -1. **Pixel → Camera ray**: p_cam = K⁻¹ · [u, v, 1]ᵀ where K = camera intrinsic matrix (ADTI 20L V1: fx, fy from 16mm lens + APS-C sensor) -2. **Camera → Body**: p_body = T_cam_body · p_cam where T_cam_body is the fixed mounting rotation (camera points nadir: 90° pitch rotation from body X-forward to camera Z-down) -3. **Body → NED**: p_ned = R_body_ned(q) · p_body where q is the ESKF quaternion attitude estimate -4. **NED → WGS84**: lat = lat_origin + p_north / R_earth, lon = lon_origin + p_east / (R_earth · cos(lat_origin)) where (lat_origin, lon_origin) is the mission start GPS position - -**Camera intrinsic matrix K** (ADTI 20L V1 + 16mm lens): - -- Sensor: 23.2 × 15.4 mm, Resolution: 5456 × 3632 -- fx = fy = focal_mm × width_px / sensor_width_mm = 16 × 5456 / 23.2 = 3763 pixels -- cx = 2728, cy = 1816 (sensor center) -- Distortion: Brown model (k1, k2, p1, p2 from calibration) - -**T_cam_body** (camera mount): - -- Navigation camera is fixed, pointing nadir (downward), not autostabilized -- R_cam_body = R_x(180°) · R_z(0°) (camera Z-axis aligned with body -Z, camera X with body X) -- Translation: offset from CG to camera mount (measured during assembly, typically <0.3m) - -**Satellite match → WGS84**: - -- Feature correspondences between camera frame and geo-referenced satellite tile -- Homography H maps camera pixels to satellite tile pixels -- Satellite tile pixel → WGS84 via tile's known georeference (zoom level + tile x,y → lat,lon) -- Camera center projects to satellite pixel (cx_sat, cy_sat) via H -- Convert (cx_sat, cy_sat) to WGS84 using tile georeference - -### Component: GPS_INPUT Message Population (NEW — previously undefined) - - -| GPS_INPUT Field | Source | Computation | -| ----------------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -| lat, lon | ESKF position (NED) | NED → WGS84 conversion using mission origin | -| alt | ESKF position (Down) + mission origin altitude | alt = alt_origin - p_down | -| vn, ve, vd | ESKF velocity state | Direct from ESKF v[0], v[1], v[2] | -| fix_type | Confidence tier | 3 (3D fix) when satellite-anchored (last match <30s). 2 (2D) when VO-only. 0 (no fix) when IMU-only >5s | -| hdop | ESKF horizontal covariance | hdop = sqrt(P[0,0] + P[1,1]) / 5.0 (approximate CEP→HDOP mapping) | -| vdop | ESKF vertical covariance | vdop = sqrt(P[2,2]) / 5.0 | -| horiz_accuracy | ESKF horizontal covariance | horiz_accuracy = sqrt(P[0,0] + P[1,1]) meters | -| vert_accuracy | ESKF vertical covariance | vert_accuracy = sqrt(P[2,2]) meters | -| speed_accuracy | ESKF velocity covariance | speed_accuracy = sqrt(P[3,3] + P[4,4]) m/s | -| time_week, time_week_ms | System time | Convert Unix time to GPS epoch (GPS epoch = 1980-01-06, subtract leap seconds) | -| satellites_visible | Constant | 10 (synthetic — prevents satellite-count failsafes in ArduPilot) | -| gps_id | Constant | 0 | -| ignore_flags | Constant | 0 (provide all fields) | - - -**Confidence tiers** mapping to GPS_INPUT: - - -| Tier | Condition | fix_type | horiz_accuracy | Rationale | -| ------ | ------------------------------------------------- | ---------- | ------------------------------- | -------------------------------------- | -| HIGH | Satellite match <30s ago, ESKF covariance < 400m² | 3 (3D fix) | From ESKF P (typically 5-20m) | Absolute position anchor recent | -| MEDIUM | cuVSLAM tracking OK, no recent satellite match | 3 (3D fix) | From ESKF P (typically 20-50m) | Relative tracking valid, drift growing | -| LOW | cuVSLAM lost, IMU-only | 2 (2D fix) | From ESKF P (50-200m+, growing) | Only IMU dead reckoning, rapid drift | -| FAILED | 3+ consecutive total failures | 0 (no fix) | 999.0 | System cannot determine position | - - -### Component: Disconnected Route Segment Handling (NEW — previously undefined) - -**Trigger**: cuVSLAM reports tracking_lost OR tracking confidence drops below threshold - -**Algorithm**: - -``` -STATE: TRACKING_NORMAL - cuVSLAM provides relative pose - ESKF VO measurement updates at 0.7Hz - Satellite matching on keyframes (every 5-10 frames) - -STATE: TRACKING_LOST (enter when cuVSLAM reports loss) - 1. ESKF continues with IMU-only prediction (no VO updates) - → uncertainty grows rapidly (~1-5 m/s drift with consumer IMU) - 2. Switch satellite matching to EVERY frame (not just keyframes) - → maximize chances of getting absolute correction - 3. For each camera frame: - a. Attempt satellite match using ESKF predicted position ± 3σ for tile selection - b. If match succeeds (RANSAC inlier ratio > 30%): - → ESKF measurement update with satellite position - → Restart cuVSLAM with current frame as new origin - → Transition to TRACKING_NORMAL - → Reset failure counter - c. If match fails: - → Increment failure_counter - → Continue IMU-only ESKF prediction - 4. If failure_counter >= 3: - → Send re-localization request to ground station - → GPS_INPUT fix_type = 0 (no fix), horiz_accuracy = 999.0 - → Continue attempting satellite matching on each frame - 5. If operator sends re-localization hint (approximate lat,lon): - → Use as ESKF measurement with high covariance (~500m) - → Attempt satellite match in that area - → On success: transition to TRACKING_NORMAL - -STATE: SEGMENT_DISCONNECT - After re-localization following tracking loss: - → New cuVSLAM track is independent of previous track - → ESKF maintains global NED position continuity via satellite anchor - → No need to "connect" segments at the cuVSLAM level - → ESKF already handles this: satellite corrections keep global position consistent -``` - -### Component: Satellite Image Matching Pipeline (UPDATED — added GSD + tile selection details) - -**GSD normalization**: - -- Camera GSD at 600m: ~15.9 cm/pixel (ADTI 20L V1 + 16mm) -- Satellite tile GSD at zoom 18: ~0.6 m/pixel -- Scale ratio: ~3.8:1 -- Downsample camera image to satellite GSD before matching: resize from 5456×3632 to ~1440×960 (matching zoom 18 GSD) -- This is close to LiteSAM's 1280px input — use 1280px with minor GSD mismatch acceptable for matching - -**Tile selection**: - -- Input: ESKF position estimate (lat, lon) + horizontal covariance σ_h -- Search radius: max(3·σ_h, 500m) — at least 500m to handle initial uncertainty -- Compute geohash for center position → load tiles covering the search area -- Assemble tile mosaic if needed (typically 2×2 to 4×4 tiles for adequate coverage) -- If ESKF uncertainty > 2km: tile selection unreliable, fall back to wider search or request operator input - -**Tile storage calculation** (zoom 18 — 0.6 m/pixel): - -- Each 256×256 tile covers ~153m × 153m -- Flight path 200km with ±2km buffer: area ≈ 200km × 4km = 800 km² -- Tiles needed: 800,000,000 / (153 × 153) ≈ 34,200 tiles -- Storage: ~10-15KB per JPEG tile → ~340-510 MB -- With zoom 19 overlap tiles for higher precision: ×4 = ~1.4-2.0 GB -- Recommended: zoom 18 primary + zoom 19 for ±500m along flight path → ~500-800 MB total - - -| Solution | Tools | Advantages | Limitations | Performance (est. Orin Nano Super TRT FP16) | Params | Fit | -| -------------------------------------- | ------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------ | ------------------------------------------- | ------ | ------------------------------- | -| LiteSAM (opt) TRT Engine FP16 @ 1280px | trtexec + tensorrt Python | Best satellite-aerial accuracy (RMSE@30=17.86m UAV-VisLoc), 6.31M params | MinGRU TRT export needs verification (LOW-MEDIUM risk) | Est. ~165-330ms | 6.31M | ✅ Primary | -| EfficientLoFTR TRT Engine FP16 | trtexec + tensorrt Python | Proven TRT path (Coarse_LoFTR_TRT). Semi-dense. CVPR 2024. | 2.4x more params than LiteSAM. | Est. ~200-400ms | 15.05M | ✅ Fallback if LiteSAM TRT fails | -| XFeat TRT Engine FP16 | trtexec + tensorrt Python | Fastest. Proven TRT implementation. | General-purpose, not designed for cross-view gap. | Est. ~50-100ms | <5M | ✅ Speed fallback | - - -### Component: cuVSLAM Configuration (NEW — previously undefined) - -**Mode**: Inertial (mono camera + IMU) - -**Camera configuration** (ADTI 20L V1 + 16mm lens): - -- Model: Brown distortion -- fx = fy = 3763 px (16mm on 23.2mm sensor at 5456px width) -- cx = 2728 px, cy = 1816 px -- Distortion coefficients: from calibration (k1, k2, p1, p2) -- Border: 50px (ignore lens edge distortion) - -**IMU configuration** (Pixhawk 6x IMU — ICM-42688-P): - -- Gyroscope noise density: 3.0 × 10⁻³ °/s/√Hz -- Gyroscope random walk: 5.0 × 10⁻⁵ °/s²/√Hz -- Accelerometer noise density: 70 µg/√Hz -- Accelerometer random walk: ~2.0 × 10⁻³ m/s³/√Hz -- IMU frequency: 200 Hz (from flight controller via MAVLink) -- T_imu_rig: measured transformation from Pixhawk IMU to camera center (translation + rotation) - -**cuVSLAM settings**: - -- OdometryMode: INERTIAL -- MulticameraMode: PRECISION (favor accuracy over speed — we have 1430ms budget) -- Input resolution: downsample to 1280×852 (or 720p) for processing speed -- async_bundle_adjustment: True - -**Initialization**: - -- cuVSLAM initializes automatically when it receives the first camera frame + IMU data -- First few frames used for feature initialization and scale estimation -- First satellite match validates and corrects the initial position - -**Calibration procedure** (one-time per hardware unit): - -1. Camera intrinsics: checkerboard calibration with OpenCV (or use manufacturer data if available) -2. Camera-IMU extrinsic (T_imu_rig): Kalibr tool with checkerboard + IMU data -3. IMU noise parameters: Allan variance analysis or use datasheet values -4. Store calibration files on Jetson storage - -### Component: AI Model Inference Runtime (UNCHANGED) - -Native TRT Engine — optimal performance and memory on fixed NVIDIA hardware. See draft05 for full comparison table and conversion workflow. - -### Component: Visual Odometry (UNCHANGED) - -cuVSLAM in Inertial mode, fed by ADTI 20L V1 at 0.7 fps sustained. See draft05 for feasibility analysis at 0.7fps. - -### Component: Flight Controller Integration (UPDATED — added GPS_INPUT field spec) - -pymavlink over UART at 5-10Hz. GPS_INPUT field population defined above. - -ArduPilot configuration: - -- GPS1_TYPE = 14 (MAVLink) -- GPS_RATE = 5 (minimum, matching our 5-10Hz output) -- EK3_SRC1_POSXY = 1 (GPS), EK3_SRC1_VELXY = 1 (GPS) — EKF uses GPS_INPUT as position/velocity source - -### Component: Object Localization (NEW — previously undefined) - -**Input**: pixel coordinates (u, v) in Viewpro A40 Pro image, current gimbal angles (pan_deg, tilt_deg), zoom factor, UAV position from GPS-denied system, UAV altitude - -**Process**: - -1. Pixel → camera ray: ray_cam = K_viewpro⁻¹(zoom) · [u, v, 1]ᵀ -2. Camera → gimbal frame: ray_gimbal = R_gimbal(pan, tilt) · ray_cam -3. Gimbal → body: ray_body = T_gimbal_body · ray_gimbal -4. Body → NED: ray_ned = R_body_ned(q) · ray_body -5. Ray-ground intersection: assuming flat terrain at UAV altitude h: t = -h / ray_ned[2], p_ground_ned = p_uav_ned + t · ray_ned -6. NED → WGS84: convert to lat, lon - -**Output**: { lat, lon, accuracy_m, confidence } - -- accuracy_m propagated from: UAV position accuracy (from ESKF) + gimbal angle uncertainty + altitude uncertainty - -**API endpoint**: POST /objects/locate - -- Request: { pixel_x, pixel_y, gimbal_pan_deg, gimbal_tilt_deg, zoom_factor } -- Response: { lat, lon, alt, accuracy_m, confidence, uav_position: {lat, lon, alt}, timestamp } - -### Component: Startup, Handoff & Failsafe (UPDATED — added handoff + reboot + re-localization) - -**GPS-denied handoff protocol**: - -- GPS-denied system runs continuously from companion computer boot -- Reads initial position from FC (GLOBAL_POSITION_INT) — this may be real GPS or last known -- First satellite match validates the initial position -- FC receives both real GPS (if available) and GPS_INPUT; FC EKF selects best source based on accuracy -- No explicit "switch" — the GPS-denied system is a secondary GPS source - -**Startup sequence** (expanded from draft05): - -1. Boot Jetson → start GPS-Denied service (systemd) -2. Connect to flight controller via pymavlink on UART -3. Wait for heartbeat -4. Initialize PyCUDA context -5. Load TRT engines: litesam.engine + xfeat.engine (~1-3s each) -6. Allocate GPU I/O buffers -7. Create CUDA streams: Stream A (cuVSLAM), Stream B (satellite matching) -8. Load camera calibration + IMU calibration files -9. Read GLOBAL_POSITION_INT → set mission origin (NED reference point) → init ESKF -10. Start cuVSLAM (Inertial mode) with ADTI 20L V1 camera stream -11. Preload satellite tiles within ±2km into RAM -12. Trigger first satellite match → validate initial position -13. Begin GPS_INPUT output loop at 5-10Hz -14. System ready - -**Mid-flight reboot recovery**: - -1. Jetson boots (~30-60s) -2. GPS-Denied service starts, connects to FC -3. Read GLOBAL_POSITION_INT (FC's current IMU-extrapolated position) -4. Init ESKF with this position + HIGH uncertainty covariance (σ = 200m) -5. Load TRT engines (~2-6s total) -6. Start cuVSLAM (fresh, no prior map) -7. Immediate satellite matching on first camera frame -8. On satellite match success: ESKF corrected, uncertainty drops -9. Estimated total recovery: ~35-70s -10. During recovery: FC uses IMU-only dead reckoning (at 70 km/h: ~700-1400m uncontrolled drift) -11. **Known limitation**: recovery time is dominated by Jetson boot time - -**3-consecutive-failure re-localization**: - -- Trigger: VO lost + satellite match failed × 3 consecutive camera frames -- Action: send re-localization request via MAVLink STATUSTEXT or custom message -- Message content: "RELOC_REQ: last_lat={lat} last_lon={lon} uncertainty={σ}m" -- Operator response: MAVLink COMMAND_LONG with approximate lat/lon -- System: use operator position as ESKF measurement with R = diag(500², 500², 100²) meters² -- System continues satellite matching with updated search area -- While waiting: GPS_INPUT fix_type=0, IMU-only ESKF prediction continues - -### Component: Ground Station Telemetry (UPDATED — added re-localization) - -MAVLink messages to ground station: - - -| Message | Rate | Content | -| ----------------------------- | -------- | --------------------------------------------------- | -| NAMED_VALUE_FLOAT "gps_conf" | 1Hz | Confidence score (0.0-1.0) | -| NAMED_VALUE_FLOAT "gps_drift" | 1Hz | Estimated drift from last satellite anchor (meters) | -| NAMED_VALUE_FLOAT "gps_hacc" | 1Hz | Horizontal accuracy (meters, from ESKF) | -| STATUSTEXT | On event | "RELOC_REQ: ..." for re-localization request | -| STATUSTEXT | On event | Tracking loss / recovery notifications | - - -### Component: Thermal Management (UNCHANGED) - -Same adaptive pipeline from draft05. Active cooling required at 25W. Throttling at 80°C SoC junction. - -### Component: API & Inter-System Communication (NEW — consolidated) - -FastAPI (Uvicorn) running locally on Jetson for inter-process communication with other onboard systems. - - -| Endpoint | Method | Purpose | Auth | -| --------------------- | --------- | -------------------------------------- | ---- | -| /sessions | POST | Start GPS-denied session | JWT | -| /sessions/{id}/stream | GET (SSE) | Real-time position + confidence stream | JWT | -| /sessions/{id}/anchor | POST | Operator re-localization hint | JWT | -| /sessions/{id} | DELETE | End session | JWT | -| /objects/locate | POST | Object GPS from pixel coordinates | JWT | -| /health | GET | System health + memory + thermal | None | - - -**SSE event schema** (1Hz): - -```json -{ - "type": "position", - "timestamp": "2026-03-17T12:00:00.000Z", - "lat": 48.123456, - "lon": 37.654321, - "alt": 600.0, - "accuracy_h": 15.2, - "accuracy_v": 8.1, - "confidence": "HIGH", - "drift_from_anchor": 12.5, - "vo_status": "tracking", - "last_satellite_match_age_s": 8.3 -} -``` - -## UAV Platform - -Unchanged from draft05. See draft05 for: airframe configuration (3.5m S-2 composite, 12.5kg AUW), flight performance (3.4h endurance at 50 km/h), camera specifications (ADTI 20L V1 + 16mm, Viewpro A40 Pro), ground coverage calculations. - -## Speed Optimization Techniques - -Unchanged from draft05. Key points: cuVSLAM ~9ms/frame, native TRT Engine (no ONNX RT), dual CUDA streams, 5-10Hz GPS_INPUT from ESKF IMU prediction. - -## Processing Time Budget - -Unchanged from draft05. VO frame: ~17-22ms. Satellite matching: ≤210ms async. Well within 1430ms frame interval. - -## Memory Budget (Jetson Orin Nano Super, 8GB shared) - - -| Component | Memory | Notes | -| ------------------------- | -------------- | ------------------------------------------- | -| OS + runtime | ~1.5GB | JetPack 6.2 + Python | -| cuVSLAM | ~200-500MB | CUDA library + map | -| LiteSAM TRT engine | ~50-80MB | If LiteSAM fails: EfficientLoFTR ~100-150MB | -| XFeat TRT engine | ~30-50MB | | -| Preloaded satellite tiles | ~200MB | ±2km of flight plan | -| pymavlink + MAVLink | ~20MB | | -| FastAPI (local IPC) | ~50MB | | -| ESKF + buffers | ~10MB | | -| **Total** | **~2.1-2.9GB** | **26-36% of 8GB** | - - -## Key Risks and Mitigations - - -| Risk | Likelihood | Impact | Mitigation | -| ------------------------------------------------------- | ---------- | ----------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| LiteSAM MinGRU ops unsupported in TRT 10.3 | LOW-MEDIUM | LiteSAM TRT export fails | Day-one verification. Fallback: EfficientLoFTR TRT → XFeat TRT. | -| cuVSLAM fails on low-texture terrain at 0.7fps | HIGH | Frequent tracking loss | Satellite matching corrections bound drift. Re-localization pipeline handles tracking loss. IMU bridges short gaps. | -| Google Maps satellite quality in conflict zone | HIGH | Satellite matching fails, outdated imagery | Pre-flight tile validation. Consider alternative providers (Bing, Mapbox). Robust to seasonal appearance changes via feature-based matching. | -| ESKF scale drift during long constant-velocity segments | MEDIUM | Position error exceeds 100m between satellite anchors | Satellite corrections every 7-14s re-anchor. Altitude constraint from barometer. Monitor drift rate — if >50m between corrections, increase satellite matching frequency. | -| Monocular scale ambiguity | MEDIUM | Metric scale lost during constant-velocity flight | Satellite absolute corrections provide scale. Known altitude constrains vertical scale. IMU acceleration during turns provides observability. | -| AUW exceeds AT4125 recommended range | MEDIUM | Reduced endurance, motor thermal stress | 12.5 kg vs 8-10 kg recommended. Monitor motor temps. Weight optimization. | -| ADTI mechanical shutter lifespan | MEDIUM | Replacement needed periodically | ~8,800 actuations/flight at 0.7fps. Estimated 11-57 flights before replacement. Budget as consumable. | -| Mid-flight companion computer failure | LOW | ~35-70s position gap | Reboot recovery procedure defined. FC uses IMU dead reckoning during gap. Known limitation. | -| Thermal throttling on Jetson | MEDIUM | Satellite matching latency increases | Active cooling required. Monitor SoC temp. Throttling at 80°C. Our workload ~8-15W typical — well under 25W TDP. | -| Engine incompatibility after JetPack update | MEDIUM | Must rebuild engines | Include engine rebuild in update procedure. | -| TRT engine build OOM on 8GB | LOW | Cannot build on target | Models small (6.31M, <5M). Reduce --memPoolSize if needed. | - - -## Testing Strategy - -### Integration / Functional Tests - -- **ESKF correctness**: Feed recorded IMU + synthetic VO/satellite data → verify output matches reference ESKF implementation -- **GPS_INPUT field validation**: Send GPS_INPUT to SITL ArduPilot → verify EKF accepts and uses the data correctly -- **Coordinate transform chain**: Known GPS → NED → pixel → back to GPS — verify round-trip error <0.1m -- **Disconnected segment handling**: Simulate tracking loss → verify satellite re-localization triggers → verify cuVSLAM restarts → verify ESKF position continuity -- **3-consecutive-failure**: Simulate VO + satellite failures → verify re-localization request sent → verify operator hint accepted -- **Object localization**: Known object at known GPS → verify computed GPS matches within camera accuracy -- **Mid-flight reboot**: Kill GPS-denied process → restart → verify recovery within expected time → verify position accuracy after recovery -- **TRT engine load test**: Verify engines load successfully on Jetson -- **TRT inference correctness**: Compare TRT output vs PyTorch reference (max L1 error < 0.01) -- **CUDA Stream pipelining**: Verify Stream B satellite matching does not block Stream A VO -- **ADTI sustained capture rate**: Verify 0.7fps sustained >30 min without buffer overflow -- **Confidence tier transitions**: Verify fix_type and accuracy change correctly across HIGH → MEDIUM → LOW → FAILED transitions - -### Non-Functional Tests - -- **End-to-end accuracy** (primary validation): Fly with real GPS recording → run GPS-denied system in parallel → compare estimated vs real positions → verify 80% within 50m, 60% within 20m -- **VO drift rate**: Measure cuVSLAM drift over 1km straight segment without satellite correction -- **Satellite matching accuracy**: Compare satellite-matched position vs real GPS at known locations -- **Processing time**: Verify end-to-end per-frame <400ms -- **Memory usage**: Monitor over 30-min session → verify <8GB, no leaks -- **Thermal**: Sustained 30-min run → verify no throttling -- **GPS_INPUT rate**: Verify consistent 5-10Hz delivery to FC -- **Tile storage**: Validate calculated storage matches actual for test mission area -- **MinGRU TRT compatibility** (day-one blocker): Clone LiteSAM → ONNX export → polygraphy → trtexec -- **Flight endurance**: Ground-test full system power draw against 267W estimate - -## References - -- ArduPilot GPS_RATE parameter: [https://github.com/ArduPilot/ardupilot/pull/15980](https://github.com/ArduPilot/ardupilot/pull/15980) -- MAVLink GPS_INPUT message: [https://ardupilot.org/mavproxy/docs/modules/GPSInput.html](https://ardupilot.org/mavproxy/docs/modules/GPSInput.html) -- pymavlink GPS_INPUT example: [https://webperso.ensta.fr/lebars/Share/GPS_INPUT_pymavlink.py](https://webperso.ensta.fr/lebars/Share/GPS_INPUT_pymavlink.py) -- ESKF reference (fixed-wing UAV): [https://github.com/ludvigls/ESKF](https://github.com/ludvigls/ESKF) -- ROS ESKF multi-sensor: [https://github.com/EliaTarasov/ESKF](https://github.com/EliaTarasov/ESKF) -- Range-VIO scale observability: [https://arxiv.org/abs/2103.15215](https://arxiv.org/abs/2103.15215) -- NaviLoc trajectory-level localization: [https://www.mdpi.com/2504-446X/10/2/97](https://www.mdpi.com/2504-446X/10/2/97) -- SatLoc-Fusion hierarchical framework: [https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f](https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f) -- Auterion GPS-denied workflow: [https://docs.auterion.com/vehicle-operation/auterion-mission-control/useful-resources/operations/gps-denied-workflow](https://docs.auterion.com/vehicle-operation/auterion-mission-control/useful-resources/operations/gps-denied-workflow) -- PX4 GNSS-denied flight: [https://docs.px4.io/main/en/advanced_config/gnss_degraded_or_denied_flight.html](https://docs.px4.io/main/en/advanced_config/gnss_degraded_or_denied_flight.html) -- ArduPilot GPS_INPUT advanced usage: [https://discuss.ardupilot.org/t/advanced-usage-of-gps-type-mav-14/99406](https://discuss.ardupilot.org/t/advanced-usage-of-gps-type-mav-14/99406) -- Google Maps Ukraine imagery: [https://newsukraine.rbc.ua/news/google-maps-has-surprise-for-satellite-imagery-1727182380.html](https://newsukraine.rbc.ua/news/google-maps-has-surprise-for-satellite-imagery-1727182380.html) -- Jetson Orin Nano Super thermal: [https://edgeaistack.app/blog/jetson-orin-nano-power-consumption/](https://edgeaistack.app/blog/jetson-orin-nano-power-consumption/) -- GSD matching research: [https://www.kjrs.org/journal/view.html?pn=related&uid=756&vmd=Full](https://www.kjrs.org/journal/view.html?pn=related&uid=756&vmd=Full) -- VO+satellite matching pipeline: [https://polen.itu.edu.tr/items/1fe1e872-7cea-44d8-a8de-339e4587bee6](https://polen.itu.edu.tr/items/1fe1e872-7cea-44d8-a8de-339e4587bee6) -- PyCuVSLAM docs: [https://wiki.seeedstudio.com/pycuvslam_recomputer_robotics/](https://wiki.seeedstudio.com/pycuvslam_recomputer_robotics/) -- Pixhawk 6x IMU (ICM-42688-P) datasheet: [https://invensense.tdk.com/products/motion-tracking/6-axis/icm-42688-p/](https://invensense.tdk.com/products/motion-tracking/6-axis/icm-42688-p/) -- All references from solution_draft05.md - -## Related Artifacts - -- AC Assessment: `_docs/00_research/gps_denied_nav/00_ac_assessment.md` -- Completeness assessment research: `_docs/00_research/solution_completeness_assessment/` -- Previous research: `_docs/00_research/trt_engine_migration/` -- Tech stack evaluation: `_docs/01_solution/tech_stack.md` (needs sync with draft05 corrections) -- Security analysis: `_docs/01_solution/security_analysis.md` -- Previous draft: `_docs/01_solution/solution_draft05.md` - diff --git a/_docs/01_solution/solution_draft01.md b/_docs/01_solution/solution_draft01.md deleted file mode 100644 index ef0a3bd..0000000 --- a/_docs/01_solution/solution_draft01.md +++ /dev/null @@ -1,283 +0,0 @@ -# Solution Draft - -## Product Solution Description - -A real-time GPS-denied visual navigation system for fixed-wing UAVs, running entirely on a Jetson Orin Nano Super (8GB). The system determines frame-center GPS coordinates by fusing three information sources: (1) CUDA-accelerated visual odometry (cuVSLAM), (2) absolute position corrections from satellite image matching, and (3) IMU-based motion prediction. Results stream to clients via REST API + SSE in real time. - -**Hard constraint**: Camera shoots at ~3fps (333-400ms interval). The full pipeline must complete within **400ms per frame**. - -**Satellite matching strategy**: Benchmark LiteSAM on actual Orin Nano Super hardware as a day-one priority. If LiteSAM cannot achieve ≤400ms at 480px resolution, **abandon it entirely** and use XFeat semi-dense matching as the primary satellite matcher. Speed is non-negotiable. - -**Core architectural principles**: -1. **cuVSLAM handles VO** — NVIDIA's CUDA-accelerated library achieves 90fps on Jetson Orin Nano, giving VO essentially "for free" (~11ms/frame). -2. **Keyframe-based satellite matching** — satellite matcher runs on keyframes only (every 3-10 frames), amortizing its cost. Non-keyframes rely on cuVSLAM VO + IMU. -3. **Every keyframe independently attempts satellite-based geo-localization** — this handles disconnected segments natively. -4. **Pipeline parallelism** — satellite matching for frame N overlaps with VO processing of frame N+1 via CUDA streams. - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ OFFLINE (Before Flight) │ -│ Satellite Tiles → Download & Crop → Store as tile pairs │ -│ (Google Maps) (per flight plan) (disk, GeoHash indexed) │ -└─────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ ONLINE (During Flight) │ -│ │ -│ EVERY FRAME (400ms budget): │ -│ ┌────────────────────────────────┐ │ -│ │ Camera → Downsample (CUDA 2ms)│ │ -│ │ → cuVSLAM VO+IMU (~11ms)│──→ ESKF Update → SSE Emit │ -│ └────────────────────────────────┘ ↑ │ -│ │ │ -│ KEYFRAMES ONLY (every 3-10 frames): │ │ -│ ┌────────────────────────────────────┐ │ │ -│ │ Satellite match (async CUDA stream)│─────┘ │ -│ │ LiteSAM or XFeat (see benchmark) │ │ -│ │ (does NOT block VO output) │ │ -│ └────────────────────────────────────┘ │ -│ │ -│ IMU: 100+Hz continuous → ESKF prediction │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## Speed Optimization Techniques - -### 1. cuVSLAM for Visual Odometry (~11ms/frame) -NVIDIA's CUDA-accelerated VO library (v15.0.0, March 2026) achieves 90fps on Jetson Orin Nano. Supports monocular camera + IMU natively. Features: automatic IMU fallback when visual tracking fails, loop closure, Python and C++ APIs. This eliminates custom VO entirely. - -### 2. Keyframe-Based Satellite Matching -Not every frame needs satellite matching. Strategy: -- cuVSLAM provides VO at every frame (high-rate, low-latency) -- Satellite matching triggers on **keyframes** selected by: - - Fixed interval: every 3-10 frames (~1-3.3s between satellite corrections) - - Confidence drop: when ESKF covariance exceeds threshold - - VO failure: when cuVSLAM reports tracking loss (sharp turn) - -### 3. Satellite Matcher Selection (Benchmark-Driven) - -**Candidate A: LiteSAM (opt)** — Best accuracy for satellite-aerial matching (RMSE@30 = 17.86m on UAV-VisLoc). 6.31M params, MobileOne + TAIFormer + MinGRU. Benchmarked at 497ms on Jetson AGX Orin at 1184px. AGX Orin is 3-4x more powerful than Orin Nano Super (275 TOPS vs 67 TOPS, $2000+ vs $249). - -Realistic Orin Nano Super estimates: -- At 1184px: ~1.5-2.0s (unusable) -- At 640px: ~500-800ms (borderline) -- At 480px: ~300-500ms (best case) - -**Candidate B: XFeat semi-dense** — ~50-100ms on Orin Nano Super. Proven on Jetson. Not specifically designed for cross-view satellite-aerial, but fast and reliable. - -**Decision rule**: Benchmark LiteSAM TensorRT FP16 at 480px on Orin Nano Super. If ≤400ms → use LiteSAM. If >400ms → **abandon LiteSAM, use XFeat as primary**. No hybrid compromises — pick one and optimize it. - -### 4. TensorRT FP16 Optimization -LiteSAM's MobileOne backbone is reparameterizable — multi-branch training structure collapses to a single feed-forward path at inference. Combined with TensorRT FP16, this maximizes throughput. INT8 is possible for MobileOne backbone but ViT/transformer components may degrade with INT8. - -### 5. CUDA Stream Pipelining -Overlap operations across consecutive frames: -- Stream A: cuVSLAM VO for current frame (~11ms) + ESKF fusion (~1ms) -- Stream B: Satellite matching for previous keyframe (async) -- CPU: SSE emission, tile management, keyframe selection logic - -### 6. Pre-cropped Satellite Tiles -Offline: for each satellite tile, store both the raw image and a pre-resized version matching the satellite matcher's input resolution. Runtime avoids resize cost. - -## Existing/Competitor Solutions Analysis - -| Solution | Approach | Accuracy | Hardware | Limitations | -|----------|----------|----------|----------|-------------| -| Mateos-Ramirez et al. (2024) | VO (ORB) + satellite keypoint correction + Kalman | 142m mean / 17km (0.83%) | Orange Pi class | No re-localization; ORB only; 1000m+ altitude | -| SatLoc (2025) | DinoV2 + XFeat + optical flow + adaptive fusion | <15m, >90% coverage | Edge (unspecified) | Paper not fully accessible | -| LiteSAM (2025) | MobileOne + TAIFormer + MinGRU subpixel refinement | RMSE@30 = 17.86m on UAV-VisLoc | RTX 3090 (62ms), AGX Orin (497ms) | Not tested on Orin Nano; AGX Orin is 3-4x more powerful | -| TerboucheHacene/visual_localization | SuperPoint/SuperGlue/GIM + VO + satellite | Not quantified | Desktop-class | Not edge-optimized | -| cuVSLAM (NVIDIA, 2025-2026) | CUDA-accelerated VO+SLAM, mono/stereo/IMU | <1% trajectory error (KITTI), <5cm (EuRoC) | Jetson Orin Nano (90fps) | VO only, no satellite matching | - -**Key insight**: Combine cuVSLAM (best-in-class VO for Jetson) with the fastest viable satellite-aerial matcher via ESKF fusion. LiteSAM is the accuracy leader but unproven on Orin Nano Super — benchmark first, abandon for XFeat if too slow. - -## Architecture - -### Component: Visual Odometry - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| cuVSLAM (mono+IMU) | PyCuVSLAM / C++ API | 90fps on Orin Nano, NVIDIA-optimized, loop closure, IMU fallback | Closed-source CUDA library | ✅ Best | -| XFeat frame-to-frame | XFeatTensorRT | 5x faster than SuperPoint, open-source | ~30-50ms total, no IMU integration | ⚠️ Fallback | -| ORB-SLAM3 | OpenCV + custom | Well-understood, open-source | CPU-heavy, ~30fps on Orin | ⚠️ Slower | - -**Selected**: **cuVSLAM (mono+IMU mode)** — purpose-built by NVIDIA for Jetson. ~11ms/frame leaves 389ms for everything else. Auto-fallback to IMU when visual tracking fails. - -### Component: Satellite Image Matching - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| LiteSAM (opt) | TensorRT | Best satellite-aerial accuracy (RMSE@30 17.86m), 6.31M params, subpixel refinement | 497ms on AGX Orin at 1184px; AGX Orin is 3-4x more powerful than Orin Nano Super | ✅ If benchmark passes | -| XFeat semi-dense | XFeatTensorRT | ~50-100ms, lightweight, Jetson-proven | Not designed for cross-view satellite-aerial | ✅ If LiteSAM fails benchmark | -| EfficientLoFTR | TensorRT | Good accuracy, semi-dense | 15.05M params (2.4x LiteSAM), slower | ⚠️ Heavier | -| SuperPoint + LightGlue | TensorRT C++ | Good general matching | Sparse only, worse on satellite-aerial | ⚠️ Not specialized | - -**Selection**: Benchmark-driven. Day-one test on Orin Nano Super: -1. Export LiteSAM (opt) to TensorRT FP16 -2. Measure at 480px, 640px, 800px -3. If ≤400ms at 480px → **LiteSAM** -4. If >400ms at any viable resolution → **XFeat semi-dense** (primary, no hybrid) - -### Component: Sensor Fusion - -| Solution | Tools | Advantages | Limitations | Fit | -|----------|-------|-----------|-------------|-----| -| Error-State EKF (ESKF) | Custom Python/C++ | Lightweight, multi-rate, well-understood | Linear approximation | ✅ Best | -| Hybrid ESKF/UKF | Custom | 49% better accuracy | More complex | ⚠️ Upgrade path | -| Factor Graph (GTSAM) | GTSAM | Best accuracy | Heavy compute | ❌ Too heavy | - -**Selected**: **ESKF** with adaptive measurement noise. State vector: [position(3), velocity(3), orientation_quat(4), accel_bias(3), gyro_bias(3)] = 16 states. - -Measurement sources and rates: -- IMU prediction: 100+Hz -- cuVSLAM VO update: ~3Hz (every frame) -- Satellite update: ~0.3-1Hz (keyframes only, delayed via async pipeline) - -### Component: Satellite Tile Preprocessing (Offline) - -**Selected**: **GeoHash-indexed tile pairs on disk**. - -Pipeline: -1. Define operational area from flight plan -2. Download satellite tiles from Google Maps Tile API at max zoom (18-19) -3. Pre-resize each tile to matcher input resolution -4. Store: original tile + resized tile + metadata (GPS bounds, zoom, GSD) in GeoHash-indexed directory structure -5. Copy to Jetson storage before flight - -### Component: Re-localization (Disconnected Segments) - -**Selected**: **Keyframe satellite matching is always active + expanded search on VO failure**. - -When cuVSLAM reports tracking loss (sharp turn, no features): -1. Immediately flag next frame as keyframe → trigger satellite matching -2. Expand tile search radius (from ±200m to ±1km based on IMU dead-reckoning uncertainty) -3. If match found: position recovered, new segment begins -4. If 3+ consecutive keyframe failures: request user input via API - -### Component: Object Center Coordinates - -Geometric calculation once frame-center GPS is known: -1. Pixel offset from center: (dx_px, dy_px) -2. Convert to meters: dx_m = dx_px × GSD, dy_m = dy_px × GSD -3. Rotate by IMU yaw heading -4. Convert meter offset to lat/lon and add to frame-center GPS - -### Component: API & Streaming - -**Selected**: **FastAPI + sse-starlette**. REST for session management, SSE for real-time position stream. OpenAPI auto-documentation. - -## Processing Time Budget (per frame, 400ms budget) - -### Normal Frame (non-keyframe, ~60-80% of frames) - -| Step | Time | Notes | -|------|------|-------| -| Image capture + transfer | ~10ms | CSI/USB3 | -| Downsample (for cuVSLAM) | ~2ms | OpenCV CUDA | -| cuVSLAM VO+IMU | ~11ms | NVIDIA CUDA-optimized, 90fps capable | -| ESKF fusion (VO+IMU update) | ~1ms | C extension or NumPy | -| SSE emit | ~1ms | Async | -| **Total** | **~25ms** | Well within 400ms | - -### Keyframe Satellite Matching (async, every 3-10 frames) - -Runs asynchronously on a separate CUDA stream — does NOT block per-frame VO output. - -**Path A — LiteSAM (if benchmark passes)**: - -| Step | Time | Notes | -|------|------|-------| -| Downsample to ~480px | ~1ms | OpenCV CUDA | -| Load satellite tile | ~5ms | Pre-resized, from storage | -| LiteSAM (opt) matching | ~300-500ms | TensorRT FP16, 480px, Orin Nano Super estimate | -| Geometric pose (RANSAC) | ~5ms | Homography estimation | -| ESKF satellite update | ~1ms | Delayed measurement | -| **Total** | **~310-510ms** | Async, does not block VO | - -**Path B — XFeat (if LiteSAM abandoned)**: - -| Step | Time | Notes | -|------|------|-------| -| XFeat feature extraction (both images) | ~10-20ms | TensorRT FP16/INT8 | -| XFeat semi-dense matching | ~30-50ms | KNN + refinement | -| Geometric verification (RANSAC) | ~5ms | | -| ESKF satellite update | ~1ms | | -| **Total** | **~50-80ms** | Comfortably within budget | - -### Per-Frame Wall-Clock Latency - -Every frame: -- **VO result emitted in ~25ms** (cuVSLAM + ESKF + SSE) -- Satellite correction arrives asynchronously on keyframes -- Client gets immediate position, then refined position when satellite match completes - -## Memory Budget (Jetson Orin Nano Super, 8GB shared) - -| Component | Memory | Notes | -|-----------|--------|-------| -| OS + runtime | ~1.5GB | JetPack 6.2 + Python | -| cuVSLAM | ~200-300MB | NVIDIA CUDA library + internal state | -| Satellite matcher TensorRT | ~50-100MB | LiteSAM FP16 or XFeat FP16 | -| Current frame (downsampled) | ~2MB | 640×480×3 | -| Satellite tile (pre-resized) | ~1MB | Single active tile | -| ESKF state + buffers | ~10MB | | -| FastAPI + SSE runtime | ~100MB | | -| **Total** | **~1.9-2.4GB** | ~25-30% of 8GB — comfortable margin | - -## Confidence Scoring - -| Level | Condition | Expected Accuracy | -|-------|-----------|-------------------| -| HIGH | Satellite match succeeded + cuVSLAM consistent | <20m | -| MEDIUM | cuVSLAM VO only, recent satellite correction (<500m travel) | 20-50m | -| LOW | cuVSLAM VO only, no recent satellite correction | 50-100m+ | -| VERY LOW | IMU dead-reckoning only (cuVSLAM + satellite both failed) | 100m+ | -| MANUAL | User-provided position | As provided | - -## Key Risks and Mitigations - -| Risk | Likelihood | Impact | Mitigation | -|------|-----------|--------|------------| -| LiteSAM too slow on Orin Nano Super | HIGH | Misses 400ms deadline | **Abandon LiteSAM, use XFeat**. Day-one benchmark is the go/no-go gate | -| cuVSLAM not supporting nadir-only camera well | MEDIUM | VO accuracy degrades | Fall back to XFeat frame-to-frame matching | -| Google Maps satellite quality in conflict zone | HIGH | Satellite matching fails | Accept VO+IMU with higher drift; request user input sooner; alternative satellite providers | -| XFeat cross-view accuracy insufficient | MEDIUM | Position corrections less accurate than LiteSAM | Increase keyframe frequency; multi-tile consensus voting; geometric verification with strict RANSAC | -| cuVSLAM is closed-source | LOW | Hard to debug | Fallback to XFeat VO; cuVSLAM has Python+C++ APIs | - -## Testing Strategy - -### Integration / Functional Tests -- End-to-end pipeline test with real flight data (60 images from input_data/) -- Compare computed positions against ground truth GPS from coordinates.csv -- Measure: percentage within 50m, percentage within 20m -- Test sharp-turn handling: introduce 90-degree heading change in sequence -- Test user-input fallback: simulate 3+ consecutive failures -- Test SSE streaming: verify client receives VO result within 50ms, satellite-corrected result within 500ms -- Test session management: start/stop/restart flight sessions via REST API - -### Non-Functional Tests -- **Day-one benchmark**: LiteSAM TensorRT FP16 at 480/640/800px on Orin Nano Super → go/no-go for LiteSAM -- cuVSLAM benchmark: verify 90fps monocular+IMU on Orin Nano Super -- Performance: measure per-frame processing time (must be <400ms) -- Memory: monitor peak usage during 1000-frame session (must stay <8GB) -- Stress: process 3000 frames without memory leak -- Keyframe strategy: vary interval (2, 3, 5, 10) and measure accuracy vs latency tradeoff - -## References -- LiteSAM (2025): https://www.mdpi.com/2072-4292/17/19/3349 -- LiteSAM code: https://github.com/boyagesmile/LiteSAM -- cuVSLAM (2025-2026): https://github.com/NVlabs/PyCuVSLAM -- PyCuVSLAM API: https://nvlabs.github.io/PyCuVSLAM/api.html -- Mateos-Ramirez et al. (2024): https://www.mdpi.com/2076-3417/14/16/7420 -- SatLoc (2025): https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f -- XFeat (CVPR 2024): https://arxiv.org/abs/2404.19174 -- XFeat TensorRT for Jetson: https://github.com/PranavNedunghat/XFeatTensorRT -- EfficientLoFTR (CVPR 2024): https://github.com/zju3dv/EfficientLoFTR -- JetPack 6.2: https://docs.nvidia.com/jetson/archives/jetpack-archived/jetpack-62/release-notes/ -- Hybrid ESKF/UKF: https://arxiv.org/abs/2512.17505 -- Google Maps Tile API: https://developers.google.com/maps/documentation/tile/satellite - -## Related Artifacts -- AC Assessment: `_docs/00_research/gps_denied_nav/00_ac_assessment.md` -- Tech stack evaluation: `_docs/01_solution/tech_stack.md` diff --git a/_docs/01_solution/solution_draft02.md b/_docs/01_solution/solution_draft02.md deleted file mode 100644 index 288efe4..0000000 --- a/_docs/01_solution/solution_draft02.md +++ /dev/null @@ -1,356 +0,0 @@ -# Solution Draft - -## Assessment Findings - -| Old Component Solution | Weak Point (functional/security/performance) | New Solution | -|------------------------|----------------------------------------------|-------------| -| LiteSAM at 480px as satellite matcher | **Performance**: 497ms on AGX Orin at 1184px. Orin Nano Super is ~3-4x slower. At 480px estimated ~270-360ms — borderline. Paper uses PyTorch AMP, not TensorRT FP16. TensorRT could bring 2-3x improvement. | Add TensorRT FP16 as mandatory optimization step. Revised estimate at 480px with TensorRT: ~90-180ms. Still benchmark-driven: abandon if >400ms. | -| XFeat as LiteSAM fallback for satellite matching | **Functional**: XFeat is a general-purpose feature matcher, NOT designed for cross-view satellite-aerial gap. May fail on season/lighting differences between UAV and satellite imagery. | **Expand fallback options**: benchmark EfficientLoFTR (designed for weak-texture aerial) alongside XFeat. Consider STHN-style deep homography as third option. See detailed satellite matcher comparison below. | -| SP+LG considered as "sparse only, worse on satellite-aerial" | **Functional**: LiteSAM paper confirms "SP+LG achieves fastest inference speed but at expense of accuracy." Sparse matcher fails on texture-scarce regions. ~180-360ms on Orin Nano Super. | **Reject SP+LG** for both VO and satellite matching. cuVSLAM is 15-33x faster for VO. | -| cuVSLAM on low-texture terrain | **Functional**: cuVSLAM uses Shi-Tomasi corners + Lucas-Kanade tracking. On uniform agricultural fields/water bodies, features will be sparse → frequent tracking loss. IMU fallback lasts only ~1s. No published benchmarks for nadir agricultural terrain. Does NOT guarantee pose recovery after tracking loss. | **CRITICAL RISK**: cuVSLAM will likely fail frequently over low-texture terrain. Mitigation: (1) increase satellite matching frequency in low-texture areas, (2) use IMU dead-reckoning bridge, (3) accept higher drift in featureless segments, (4) XFeat VO as secondary fallback may also struggle on same terrain. | -| cuVSLAM memory estimate ~200-300MB | **Performance**: Map grows over time. For 3000-frame flights (~16min at 3fps), map could reach 500MB-1GB without pruning. | Configure cuVSLAM map pruning. Set max keyframes. Monitor memory. | -| Tile search on VO failure: "expand to ±1km" | **Functional**: Underspecified. Loading 10-20 tiles slow from disk I/O. | Preload tiles within ±2km of flight plan into RAM. Ranked search by IMU dead-reckoning position. | -| LiteSAM resolution | **Performance**: Paper benchmarked at 1184px on AGX Orin (497ms AMP). TensorRT FP16 with reparameterized MobileOne expected 2-3x faster. | Benchmark LiteSAM TRT FP16 at **1280px** on Orin Nano Super. If ≤200ms → use LiteSAM at 1280px. If >200ms → use XFeat. | -| SP+LG proposed for VO by user | **Performance**: ~130-280ms/frame on Orin Nano. cuVSLAM ~8.6ms/frame. No IMU, no loop closure. | **Reject SP+LG for VO.** cuVSLAM 15-33x faster. XFeat frame-to-frame remains fallback. | - -## Product Solution Description - -A real-time GPS-denied visual navigation system for fixed-wing UAVs, running entirely on a Jetson Orin Nano Super (8GB). The system determines frame-center GPS coordinates by fusing three information sources: (1) CUDA-accelerated visual odometry (cuVSLAM), (2) absolute position corrections from satellite image matching, and (3) IMU-based motion prediction. Results stream to clients via REST API + SSE in real time. - -**Hard constraint**: Camera shoots at ~3fps (333-400ms interval). The full pipeline must complete within **400ms per frame**. - -**Satellite matching strategy**: Benchmark LiteSAM TensorRT FP16 at **1280px** on Orin Nano Super as a day-one priority. The paper's AGX Orin benchmark used PyTorch AMP — TensorRT FP16 with reparameterized MobileOne should yield 2-3x additional speedup. **Decision rule: if LiteSAM TRT FP16 at 1280px ≤200ms → use LiteSAM. If >200ms → use XFeat.** - -**Core architectural principles**: -1. **cuVSLAM handles VO** — 116fps on Orin Nano 8GB, ~8.6ms/frame. SuperPoint+LightGlue was evaluated and rejected (15-33x slower, no IMU integration). -2. **Keyframe-based satellite matching** — satellite matcher runs on keyframes only (every 3-10 frames), amortizing cost. Non-keyframes rely on cuVSLAM VO + IMU. -3. **Every keyframe independently attempts satellite-based geo-localization** — handles disconnected segments natively. -4. **Pipeline parallelism** — satellite matching for frame N overlaps with VO processing of frame N+1 via CUDA streams. -5. **Proactive tile loading** — preload tiles within ±2km of flight plan into RAM for fast lookup during expanded search. - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ OFFLINE (Before Flight) │ -│ Satellite Tiles → Download & Crop → Store as tile pairs │ -│ (Google Maps) (per flight plan) (disk, GeoHash indexed) │ -└─────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ ONLINE (During Flight) │ -│ │ -│ EVERY FRAME (400ms budget): │ -│ ┌────────────────────────────────┐ │ -│ │ Camera → Downsample (CUDA 2ms)│ │ -│ │ → cuVSLAM VO+IMU (~9ms) │──→ ESKF Update → SSE Emit │ -│ └────────────────────────────────┘ ↑ │ -│ │ │ -│ KEYFRAMES ONLY (every 3-10 frames): │ │ -│ ┌────────────────────────────────────┐ │ │ -│ │ Satellite match (async CUDA stream)│─────┘ │ -│ │ LiteSAM TRT FP16 or XFeat │ │ -│ │ (does NOT block VO output) │ │ -│ └────────────────────────────────────┘ │ -│ │ -│ IMU: 100+Hz continuous → ESKF prediction │ -│ TILES: ±2km preloaded in RAM from flight plan │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## Speed Optimization Techniques - -### 1. cuVSLAM for Visual Odometry (~9ms/frame) -NVIDIA's CUDA-accelerated VO library (v15.0.0, March 2026) achieves 116fps on Jetson Orin Nano 8GB at 720p. Supports monocular camera + IMU natively. Features: automatic IMU fallback when visual tracking fails, loop closure, Python and C++ APIs. - -**Why not SuperPoint+LightGlue for VO**: SP+LG is 15-33x slower (~130-280ms vs ~9ms). Lacks IMU integration, loop closure, auto-fallback. - -**CRITICAL: cuVSLAM on difficult/even terrain (agricultural fields, water)**: -cuVSLAM uses Shi-Tomasi corner detection + Lucas-Kanade optical flow tracking (classical features, not learned). On uniform agricultural terrain or water bodies: -- Very few corners will be detected → sparse/unreliable tracking -- Frequent keyframe creation → heavier compute -- Tracking loss → IMU fallback (~1 second) → constant-velocity integrator (~0.5s more) -- cuVSLAM does NOT guarantee pose recovery after tracking loss -- All published benchmarks (KITTI: urban/suburban, EuRoC: indoor) do NOT include nadir agricultural terrain -- Multi-stereo mode helps with featureless surfaces, but we have mono camera only - -**Mitigation strategy for low-texture terrain**: -1. **Increase satellite matching frequency**: In low-texture areas (detected by cuVSLAM's keypoint count dropping), switch from every 3-10 frames to every frame -2. **IMU dead-reckoning bridge**: When cuVSLAM reports tracking loss, ESKF continues with IMU prediction. At 3fps with ~1.5s IMU bridge, that covers ~4-5 frames -3. **Accept higher drift**: In featureless segments, position accuracy degrades to IMU-only level (50-100m+ over ~10s). Satellite matching must recover absolute position when texture returns -4. **Keypoint density monitoring**: Track cuVSLAM's number of tracked features per frame. When below threshold (e.g., <50), proactively trigger satellite matching -5. **XFeat frame-to-frame as VO fallback**: XFeat uses learned features that may detect texture invisible to Shi-Tomasi corners. But XFeat may also struggle on truly uniform terrain - -### 2. Keyframe-Based Satellite Matching -Not every frame needs satellite matching. Strategy: -- cuVSLAM provides VO at every frame (high-rate, low-latency) -- Satellite matching triggers on **keyframes** selected by: - - Fixed interval: every 3-10 frames (~1-3.3s between satellite corrections) - - Confidence drop: when ESKF covariance exceeds threshold - - VO failure: when cuVSLAM reports tracking loss (sharp turn) - -### 3. Satellite Matcher Selection (Benchmark-Driven) - -**Important context**: Our UAV-to-satellite matching is EASIER than typical cross-view geo-localization problems. Both the UAV camera and satellite imagery are approximately nadir (top-down). The main challenges are season/lighting differences, resolution mismatch, and temporal changes — not the extreme viewpoint gap seen in ground-to-satellite matching. This means even general-purpose matchers may perform well. - -**Candidate A: LiteSAM (opt) with TensorRT FP16 at 1280px** — Best satellite-aerial accuracy (RMSE@30 = 17.86m on UAV-VisLoc). 6.31M params, MobileOne reparameterizable for TensorRT. Paper benchmarked at 497ms on AGX Orin using AMP at 1184px. TensorRT FP16 with reparameterized MobileOne expected 2-3x faster than AMP. At 1280px (close to paper's 1184px benchmark resolution), accuracy should match published results. - -Orin Nano Super TensorRT FP16 estimate at 1280px: -- AGX Orin AMP @ 1184px: 497ms -- TRT FP16 speedup over AMP: ~2-3x → AGX Orin TRT estimate: ~165-250ms -- Orin Nano Super is ~3-4x slower → estimate: ~500-1000ms without TRT -- With TRT FP16: **~165-330ms** (realistic range) -- Go/no-go threshold: **≤200ms** - -**Candidate B (fallback): XFeat semi-dense** — ~50-100ms on Orin Nano Super. Proven on Jetson. General-purpose, not designed for cross-view gap. FASTEST option. Since our cross-view gap is small (both nadir), XFeat may work adequately for this specific use case. - -**Other evaluated options (not selected)**: - -- **EfficientLoFTR**: Semi-dense, 15.05M params, handles weak-texture well. ~20% slower than LiteSAM. Strong option if LiteSAM codebase proves difficult to export to TRT, but larger model footprint. -- **Deep Homography (STHN-style)**: End-to-end homography estimation, no feature/RANSAC pipeline. 4.24m at 50m range. Interesting future option but needs RGB retraining — higher implementation risk. -- **PFED and retrieval-based methods**: Image RETRIEVAL only (identifies which tile matches), not pixel-level matching. We already know which tile to use from ESKF position. -- **SuperPoint+LightGlue**: Sparse matcher. LiteSAM paper confirms worse satellite-aerial accuracy. Slower than XFeat. - -**Decision rule** (day-one on Orin Nano Super): -1. Export LiteSAM (opt) to TensorRT FP16 -2. Benchmark at **1280px** -3. **If ≤200ms → use LiteSAM at 1280px** -4. **If >200ms → use XFeat** - -### 4. TensorRT FP16 Optimization -LiteSAM's MobileOne backbone is reparameterizable — multi-branch training structure collapses to a single feed-forward path at inference. Combined with TensorRT FP16, this maximizes throughput. **Do NOT use INT8 on transformer components** (TAIFormer) — accuracy degrades. INT8 is safe only for the MobileOne backbone CNN layers. - -### 5. CUDA Stream Pipelining -Overlap operations across consecutive frames: -- Stream A: cuVSLAM VO for current frame (~9ms) + ESKF fusion (~1ms) -- Stream B: Satellite matching for previous keyframe (async) -- CPU: SSE emission, tile management, keyframe selection logic - -### 6. Proactive Tile Loading -**Change from draft01**: Instead of loading tiles on-demand from disk, preload tiles within ±2km of the flight plan into RAM at session start. This eliminates disk I/O latency during flight. For a 50km flight path, ~2000 tiles at zoom 19 ≈ ~200MB RAM — well within budget. - -On VO failure / expanded search: -1. Compute IMU dead-reckoning position -2. Rank preloaded tiles by distance to predicted position -3. Try top 3 tiles (not all tiles in ±1km radius) -4. If no match in top 3, expand to next 3 - -## Existing/Competitor Solutions Analysis - -| Solution | Approach | Accuracy | Hardware | Limitations | -|----------|----------|----------|----------|-------------| -| Mateos-Ramirez et al. (2024) | VO (ORB) + satellite keypoint correction + Kalman | 142m mean / 17km (0.83%) | Orange Pi class | No re-localization; ORB only; 1000m+ altitude | -| SatLoc (2025) | DinoV2 + XFeat + optical flow + adaptive fusion | <15m, >90% coverage | Edge (unspecified) | Paper not fully accessible | -| LiteSAM (2025) | MobileOne + TAIFormer + MinGRU subpixel refinement | RMSE@30 = 17.86m on UAV-VisLoc | RTX 3090 (62ms), AGX Orin (497ms@1184px) | Not tested on Orin Nano; AGX Orin is 3-4x more powerful | -| TerboucheHacene/visual_localization | SuperPoint/SuperGlue/GIM + VO + satellite | Not quantified | Desktop-class | Not edge-optimized | -| cuVSLAM (NVIDIA, 2025-2026) | CUDA-accelerated VO+SLAM, mono/stereo/IMU | <1% trajectory error (KITTI), <5cm (EuRoC) | Jetson Orin Nano (116fps) | VO only, no satellite matching | -| VRLM (2024) | FocalNet backbone + multi-scale feature fusion | 83.35% MA@20 | Desktop | Not edge-optimized | -| Scale-Aware UAV-to-Satellite (2026) | Semantic geometric + metric scale recovery | N/A | Desktop | Addresses scale ambiguity problem | -| EfficientLoFTR (CVPR 2024) | Aggregated attention + adaptive token selection, semi-dense | Competitive with LiteSAM | 2.5x faster than LoFTR, TRT available | 15.05M params, heavier than LiteSAM | -| PFED (2025) | Knowledge distillation + multi-view refinement, retrieval | 97.15% Recall@1 (University-1652) | AGX Orin (251.5 FPS) | Retrieval only, not pixel-level matching | -| STHN (IEEE RA-L 2024) | Deep homography estimation, coarse-to-fine | 4.24m at 50m range | Open-source, lightweight | Trained on thermal, needs RGB retraining | -| Hierarchical AVL (2025) | DINOv2 retrieval + SuperPoint matching | 64.5-95% success rate | ROS, IMU integration | Two-stage complexity | -| JointLoc (IROS 2024) | Retrieval + VO fusion, adaptive weighting | 0.237m RMSE over 1km | Open-source | Designed for Mars/planetary, needs adaptation | - -## Architecture - -### Component: Visual Odometry - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -|----------|-------|-----------|-------------|------------|-----| -| cuVSLAM (mono+IMU) | PyCuVSLAM v15.0.0 | 116fps on Orin Nano, NVIDIA-optimized, loop closure, IMU fallback | Closed-source CUDA library | ~9ms/frame | ✅ Best | -| XFeat frame-to-frame | XFeatTensorRT | 5x faster than SuperPoint, open-source | ~30-50ms total, no IMU integration | ~30-50ms/frame | ⚠️ Fallback | -| SuperPoint+LightGlue | LightGlue-ONNX TRT | Good accuracy, adaptive pruning | ~130-280ms, no IMU, no loop closure | ~130-280ms/frame | ❌ Rejected | -| ORB-SLAM3 | OpenCV + custom | Well-understood, open-source | CPU-heavy, ~30fps on Orin | ~33ms/frame | ⚠️ Slower | - -**Selected**: **cuVSLAM (mono+IMU mode)** — 116fps, purpose-built by NVIDIA for Jetson. Auto-fallback to IMU when visual tracking fails. - -**SP+LG rejection rationale**: 15-33x slower than cuVSLAM. No built-in IMU fusion, loop closure, or tracking failure detection. Building these features around SP+LG would take significant development time and still be slower. XFeat at ~30-50ms is a better fallback for VO if cuVSLAM fails on nadir camera. - -### Component: Satellite Image Matching - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -|----------|-------|-----------|-------------|------------|-----| -| LiteSAM (opt) TRT FP16 @ 1280px | TensorRT | Best satellite-aerial accuracy (RMSE@30 17.86m), 6.31M params, subpixel refinement | Untested on Orin Nano Super with TensorRT | Est. ~165-330ms @ 1280px TRT FP16 | ✅ If ≤200ms | -| XFeat semi-dense | XFeatTensorRT | ~50-100ms, lightweight, Jetson-proven, fastest | General-purpose, not designed for cross-view. Our nadir-nadir gap is small → may work. | ~50-100ms | ✅ Fallback if LiteSAM >200ms | - -**Selection**: Day-one benchmark on Orin Nano Super: -1. Export LiteSAM (opt) to TensorRT FP16 -2. Benchmark at **1280px** -3. **If ≤200ms → LiteSAM at 1280px** -4. **If >200ms → XFeat** - -### Component: Sensor Fusion - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -|----------|-------|-----------|-------------|------------|-----| -| Error-State EKF (ESKF) | Custom Python/C++ | Lightweight, multi-rate, well-understood | Linear approximation | <1ms/step | ✅ Best | -| Hybrid ESKF/UKF | Custom | 49% better accuracy | More complex | ~2-3ms/step | ⚠️ Upgrade path | -| Factor Graph (GTSAM) | GTSAM | Best accuracy | Heavy compute | ~10-50ms/step | ❌ Too heavy | - -**Selected**: **ESKF** with adaptive measurement noise. State vector: [position(3), velocity(3), orientation_quat(4), accel_bias(3), gyro_bias(3)] = 16 states. - -### Component: Satellite Tile Preprocessing (Offline) - -**Selected**: **GeoHash-indexed tile pairs on disk + RAM preloading**. - -Pipeline: -1. Define operational area from flight plan -2. Download satellite tiles from Google Maps Tile API at max zoom (18-19) -3. Pre-resize each tile to matcher input resolution -4. Store: original tile + resized tile + metadata (GPS bounds, zoom, GSD) in GeoHash-indexed directory structure -5. Copy to Jetson storage before flight -6. **At session start**: preload tiles within ±2km of flight plan into RAM (~200MB for 50km route) - -### Component: Re-localization (Disconnected Segments) - -**Selected**: **Keyframe satellite matching is always active + ranked tile search on VO failure**. - -When cuVSLAM reports tracking loss (sharp turn, no features): -1. Immediately flag next frame as keyframe → trigger satellite matching -2. Compute IMU dead-reckoning position since last known position -3. Rank preloaded tiles by distance to dead-reckoning position -4. Try top 3 tiles sequentially (not all tiles in radius) -5. If match found: position recovered, new segment begins -6. If 3 consecutive keyframe failures across top tiles: expand to next 3 tiles -7. If still no match after 3+ full attempts: request user input via API - -### Component: Object Center Coordinates - -Geometric calculation once frame-center GPS is known: -1. Pixel offset from center: (dx_px, dy_px) -2. Convert to meters: dx_m = dx_px × GSD, dy_m = dy_px × GSD -3. Rotate by IMU yaw heading -4. Convert meter offset to lat/lon and add to frame-center GPS - -### Component: API & Streaming - -**Selected**: **FastAPI + sse-starlette**. REST for session management, SSE for real-time position stream. OpenAPI auto-documentation. - -## Processing Time Budget (per frame, 400ms budget) - -### Normal Frame (non-keyframe, ~60-80% of frames) - -| Step | Time | Notes | -|------|------|-------| -| Image capture + transfer | ~10ms | CSI/USB3 | -| Downsample (for cuVSLAM) | ~2ms | OpenCV CUDA | -| cuVSLAM VO+IMU | ~9ms | NVIDIA CUDA-optimized, 116fps capable | -| ESKF fusion (VO+IMU update) | ~1ms | C extension or NumPy | -| SSE emit | ~1ms | Async | -| **Total** | **~23ms** | Well within 400ms | - -### Keyframe Satellite Matching (async, every 3-10 frames) - -Runs asynchronously on a separate CUDA stream — does NOT block per-frame VO output. - -**Path A — LiteSAM TRT FP16 at 1280px (if ≤200ms benchmark)**: - -| Step | Time | Notes | -|------|------|-------| -| Downsample to 1280px | ~1ms | OpenCV CUDA | -| Load satellite tile | ~1ms | Pre-loaded in RAM | -| LiteSAM (opt) TRT FP16 matching | ≤200ms | TensorRT FP16, 1280px, go/no-go threshold | -| Geometric pose (RANSAC) | ~5ms | Homography estimation | -| ESKF satellite update | ~1ms | Delayed measurement | -| **Total** | **≤210ms** | Async, within budget | - -**Path B — XFeat (if LiteSAM >200ms)**: - -| Step | Time | Notes | -|------|------|-------| -| XFeat feature extraction (both images) | ~10-20ms | TensorRT FP16/INT8 | -| XFeat semi-dense matching | ~30-50ms | KNN + refinement | -| Geometric verification (RANSAC) | ~5ms | | -| ESKF satellite update | ~1ms | | -| **Total** | **~50-80ms** | Comfortably within budget | - -## Memory Budget (Jetson Orin Nano Super, 8GB shared) - -| Component | Memory | Notes | -|-----------|--------|-------| -| OS + runtime | ~1.5GB | JetPack 6.2 + Python | -| cuVSLAM | ~200-500MB | CUDA library + map state. **Configure map pruning for 3000-frame flights** | -| Satellite matcher TensorRT | ~50-100MB | LiteSAM FP16 or XFeat FP16 | -| Preloaded satellite tiles | ~200MB | ±2km of flight plan, pre-resized | -| Current frame (downsampled) | ~2MB | 640×480×3 | -| ESKF state + buffers | ~10MB | | -| FastAPI + SSE runtime | ~100MB | | -| **Total** | **~2.1-2.9GB** | ~26-36% of 8GB — comfortable margin | - -## Confidence Scoring - -| Level | Condition | Expected Accuracy | -|-------|-----------|-------------------| -| HIGH | Satellite match succeeded + cuVSLAM consistent | <20m | -| MEDIUM | cuVSLAM VO only, recent satellite correction (<500m travel) | 20-50m | -| LOW | cuVSLAM VO only, no recent satellite correction | 50-100m+ | -| VERY LOW | IMU dead-reckoning only (cuVSLAM + satellite both failed) | 100m+ | -| MANUAL | User-provided position | As provided | - -## Key Risks and Mitigations - -| Risk | Likelihood | Impact | Mitigation | -|------|-----------|--------|------------| -| **cuVSLAM fails on low-texture agricultural terrain** | **HIGH** | Frequent tracking loss, degraded VO | Increase satellite matching frequency when keypoint count drops. IMU dead-reckoning bridge (~1.5s). Accept higher drift in featureless segments. Satellite matching recovers position when texture returns. | -| LiteSAM TRT FP16 >200ms at 1280px on Orin Nano Super | MEDIUM | Must use XFeat instead (less accurate for cross-view) | Day-one TRT FP16 benchmark. If >200ms → XFeat. Since our nadir-nadir gap is small, XFeat may still perform adequately. | -| XFeat cross-view accuracy insufficient | MEDIUM | Satellite corrections less accurate | Benchmark XFeat on actual operational area satellite-aerial pairs. Increase keyframe frequency; multi-tile consensus; strict RANSAC. | -| cuVSLAM map memory growth on long flights | MEDIUM | Memory pressure | Configure map pruning, set max keyframes. Monitor memory. | -| Google Maps satellite quality in conflict zone | HIGH | Satellite matching fails | Accept VO+IMU with higher drift; request user input sooner; alternative satellite providers | -| cuVSLAM is closed-source, no nadir benchmarks | MEDIUM | Unknown failure modes over farmland | Extensive testing with real nadir UAV imagery before deployment. XFeat VO as fallback (also uses learned features). | -| Tile I/O bottleneck during expanded search | LOW | Delayed re-localization | Preload ±2km tiles in RAM; ranked search instead of exhaustive | - -## Testing Strategy - -### Integration / Functional Tests -- End-to-end pipeline test with real flight data (60 images from input_data/) -- Compare computed positions against ground truth GPS from coordinates.csv -- Measure: percentage within 50m, percentage within 20m -- Test sharp-turn handling: introduce 90-degree heading change in sequence -- Test user-input fallback: simulate 3+ consecutive failures -- Test SSE streaming: verify client receives VO result within 50ms, satellite-corrected result within 500ms -- Test session management: start/stop/restart flight sessions via REST API -- Test cuVSLAM map memory: run 3000-frame session, monitor memory growth - -### Non-Functional Tests -- **Day-one satellite matcher benchmark**: LiteSAM TRT FP16 at **1280px** on Orin Nano Super. If ≤200ms → use LiteSAM. If >200ms → use XFeat. Also measure accuracy on test satellite-aerial pairs for both. -- cuVSLAM benchmark: verify 116fps monocular+IMU on Orin Nano Super -- **cuVSLAM terrain stress test**: test with nadir camera over (a) urban/structured terrain, (b) agricultural fields, (c) water/uniform terrain, (d) forest. Measure: keypoint count, tracking success rate, drift per 100 frames, IMU fallback frequency -- cuVSLAM keypoint monitoring: verify that low-keypoint detection triggers increased satellite matching -- Performance: measure per-frame processing time (must be <400ms) -- Memory: monitor peak usage during 3000-frame session (must stay <8GB) -- Stress: process 3000 frames without memory leak -- Keyframe strategy: vary interval (2, 3, 5, 10) and measure accuracy vs latency tradeoff -- Tile preloading: verify RAM usage of preloaded tiles for 50km flight plan - -## References -- EfficientLoFTR (CVPR 2024): https://github.com/zju3dv/EfficientLoFTR -- EfficientLoFTR paper: https://zju3dv.github.io/efficientloftr/ -- LoFTR TensorRT adaptation: https://github.com/Kolkir/LoFTR_TRT -- PFED (2025): https://github.com/SkyEyeLoc/PFED -- STHN (IEEE RA-L 2024): https://github.com/arplaboratory/STHN -- JointLoc (IROS 2024): https://github.com/LuoXubo/JointLoc -- Hierarchical AVL (MDPI 2025): https://www.mdpi.com/2072-4292/17/20/3470 -- LiteSAM (2025): https://www.mdpi.com/2072-4292/17/19/3349 -- LiteSAM code: https://github.com/boyagesmile/LiteSAM -- cuVSLAM (2025-2026): https://github.com/NVlabs/PyCuVSLAM -- cuVSLAM paper: https://arxiv.org/abs/2506.04359 -- PyCuVSLAM API: https://nvlabs.github.io/PyCuVSLAM/api.html -- Intermodalics cuVSLAM benchmark: https://www.intermodalics.ai/blog/nvidia-isaac-ros-in-depth-cuvslam-and-the-dp3-1-release -- Mateos-Ramirez et al. (2024): https://www.mdpi.com/2076-3417/14/16/7420 -- SatLoc (2025): https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f -- XFeat (CVPR 2024): https://arxiv.org/abs/2404.19174 -- XFeat TensorRT for Jetson: https://github.com/PranavNedunghat/XFeatTensorRT -- EfficientLoFTR (CVPR 2024): https://github.com/zju3dv/EfficientLoFTR -- LightGlue (ICCV 2023): https://github.com/cvg/LightGlue -- LightGlue TensorRT: https://fabio-sim.github.io/blog/accelerating-lightglue-inference-onnx-runtime-tensorrt/ -- LightGlue TRT Jetson: https://github.com/qdLMF/LightGlue-with-FlashAttentionV2-TensorRT -- ForestVO / SP+LG VO: https://arxiv.org/html/2504.01261v1 -- vo_lightglue (SP+LG VO): https://github.com/himadrir/vo_lightglue -- JetPack 6.2: https://docs.nvidia.com/jetson/archives/jetpack-archived/jetpack-62/release-notes/ -- Hybrid ESKF/UKF: https://arxiv.org/abs/2512.17505 -- Google Maps Tile API: https://developers.google.com/maps/documentation/tile/satellite - -## Related Artifacts -- AC Assessment: `_docs/00_research/gps_denied_nav/00_ac_assessment.md` -- Tech stack evaluation: `_docs/01_solution/tech_stack.md` -- Security analysis: `_docs/01_solution/security_analysis.md` diff --git a/_docs/01_solution/solution_draft03.md b/_docs/01_solution/solution_draft03.md deleted file mode 100644 index 051f04d..0000000 --- a/_docs/01_solution/solution_draft03.md +++ /dev/null @@ -1,491 +0,0 @@ -# Solution Draft - -## Assessment Findings - -| Old Component Solution | Weak Point (functional/security/performance) | New Solution | -|------------------------|----------------------------------------------|-------------| -| FastAPI + SSE as primary output | **Functional**: New AC requires MAVLink GPS_INPUT to flight controller, not REST/SSE. The system must act as a GPS replacement module. SSE is wrong output channel. | **Replace with pymavlink GPS_INPUT sender**. Send GPS_INPUT at 5-10Hz to flight controller via UART. Retain minimal FastAPI only for local IPC (object localization API). | -| No ground station integration | **Functional**: New AC requires streaming position+confidence to ground station and receiving re-localization commands via telemetry. Draft02 had no telemetry. | **MAVLink telemetry integration**: GPS data forwarded automatically by flight controller. Custom data via NAMED_VALUE_FLOAT (confidence, drift). Re-localization hints via COMMAND_LONG listener. | -| MAVSDK library (per restriction) | **Functional**: MAVSDK-Python v3.15.3 cannot send GPS_INPUT messages. Feature requested since 2021, still unresolved. This is a blocking limitation for the core output function. | **Use pymavlink** for all MAVLink communication. pymavlink provides `gps_input_send()` and full MAVLink v2 access. Note conflict with restriction — pymavlink is the only viable option. | -| 3fps camera → ~3Hz output | **Performance**: ArduPilot GPS_RATE_MS minimum is 5Hz (200ms). 3Hz camera output is below minimum. Flight controller EKF may not fuse properly. | **IMU-interpolated 5-10Hz GPS_INPUT**: ESKF prediction runs at 100+Hz internally. Emit predicted state as GPS_INPUT at 5-10Hz. Camera corrections arrive at 3Hz within this stream. | -| No startup/failsafe procedures | **Functional**: New AC requires init from last GPS, reboot recovery, IMU-only fallback. Draft02 assumed position was already known. | **Full lifecycle management**: (1) Boot → read GPS from flight controller → init ESKF. (2) Reboot → read IMU-extrapolated position → re-init. (3) N-second failure → stop GPS_INPUT → autopilot falls back to IMU. | -| Basic object localization (nadir only) | **Functional**: New AC adds AI camera with configurable angle and zoom. Nadir pixel-to-GPS is insufficient. | **Trigonometric projection for oblique camera**: ground_distance = alt × tan(tilt), bearing = heading + pan + pixel offset. Local API for AI system requests. | -| No thermal management | **Performance**: Jetson Orin Nano Super throttles at 80°C (GPU drops 1GHz→300MHz = 3x slowdown). Could blow 400ms budget. | **Thermal monitoring + adaptive pipeline**: Use 25W mode. Monitor via tegrastats. If temp >75°C → reduce satellite matching frequency. If >80°C → VO+IMU only. | -| ESKF covariance without explicit drift budget | **Functional**: New AC requires max 100m cumulative VO drift between satellite anchors. Draft02 uses covariance for keyframe selection but no explicit budget. | **Drift budget tracker**: √(σ_x² + σ_y²) from ESKF as drift estimate. When approaching 100m → force every-frame satellite matching. Report via horiz_accuracy in GPS_INPUT. | -| No satellite imagery validation | **Functional**: New AC requires ≥0.5 m/pixel, <2 years old. Draft02 didn't validate. | **Preprocessing validation step**: Check zoom 19 availability (0.3 m/pixel). Fall back to zoom 18 (0.6 m/pixel). Flag stale tiles. | -| "Ask user via API" for re-localization | **Functional**: New AC says send re-localization request to ground station via telemetry link, not REST API. Operator sends hint via telemetry. | **MAVLink re-localization protocol**: On 3 consecutive failures → send STATUSTEXT alert to ground station. Operator sends COMMAND_LONG with approximate lat/lon. System uses hint to constrain tile search. | - -## Product Solution Description - -A real-time GPS-denied visual navigation system for fixed-wing UAVs, running on a Jetson Orin Nano Super (8GB). The system replaces the GPS module for the flight controller by sending MAVLink GPS_INPUT messages via pymavlink over UART. Position is determined by fusing: (1) CUDA-accelerated visual odometry (cuVSLAM), (2) absolute position corrections from satellite image matching, and (3) IMU data from the flight controller. GPS_INPUT is sent at 5-10Hz, with camera-based corrections at 3Hz and IMU prediction filling the gaps. - -**Hard constraint**: Camera shoots at ~3fps (333ms interval). The full VO+ESKF pipeline must complete within 400ms per frame. GPS_INPUT output rate: 5-10Hz minimum (ArduPilot EKF requirement). - -**Output architecture**: -- **Primary**: pymavlink → GPS_INPUT to flight controller via UART (replaces GPS module) -- **Telemetry**: Flight controller auto-forwards GPS data to ground station. Custom NAMED_VALUE_FLOAT for confidence/drift at 1Hz -- **Commands**: Ground station → COMMAND_LONG → flight controller → pymavlink listener on companion computer -- **Local IPC**: Minimal FastAPI on localhost for object localization requests from AI systems - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ OFFLINE (Before Flight) │ -│ Satellite Tiles → Download & Validate → Pre-resize → Store │ -│ (Google Maps) (≥0.5m/px, <2yr) (matcher res) (GeoHash) │ -│ Copy to Jetson storage │ -└─────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ ONLINE (During Flight) │ -│ │ -│ STARTUP: │ -│ pymavlink → read GLOBAL_POSITION_INT → init ESKF → start cuVSLAM │ -│ │ -│ EVERY FRAME (3fps, 333ms interval): │ -│ ┌──────────────────────────────────────┐ │ -│ │ Nav Camera → Downsample (CUDA ~2ms) │ │ -│ │ → cuVSLAM VO+IMU (~9ms) │ │ -│ │ → ESKF measurement update │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ 5-10Hz CONTINUOUS (between camera frames): │ -│ ┌──────────────────────────────────────┐ │ -│ │ ESKF IMU prediction → GPS_INPUT send │──→ Flight Controller │ -│ │ (pymavlink, every 100-200ms) │ (GPS1_TYPE=14) │ -│ └──────────────────────────────────────┘ │ -│ │ -│ KEYFRAMES (every 3-10 frames, async): │ -│ ┌──────────────────────────────────────┐ │ -│ │ Satellite match (CUDA stream B) │──→ ESKF correction │ -│ │ LiteSAM TRT FP16 or XFeat │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ TELEMETRY (1Hz): │ -│ ┌──────────────────────────────────────┐ │ -│ │ NAMED_VALUE_FLOAT: confidence, drift │──→ Ground Station │ -│ │ STATUSTEXT: alerts, re-loc requests │ (via telemetry radio) │ -│ └──────────────────────────────────────┘ │ -│ │ -│ COMMANDS (from ground station): │ -│ ┌──────────────────────────────────────┐ │ -│ │ Listen COMMAND_LONG: re-loc hint │←── Ground Station │ -│ │ (lat/lon from operator) │ (via telemetry radio) │ -│ └──────────────────────────────────────┘ │ -│ │ -│ LOCAL IPC: │ -│ ┌──────────────────────────────────────┐ │ -│ │ FastAPI localhost:8000 │←── AI Detection System │ -│ │ POST /localize (object GPS calc) │ │ -│ │ GET /status (system health) │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ IMU: 100+Hz from flight controller → ESKF prediction │ -│ TILES: ±2km preloaded in RAM from flight plan │ -│ THERMAL: Monitor via tegrastats, adaptive pipeline throttling │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -## Speed Optimization Techniques - -### 1. cuVSLAM for Visual Odometry (~9ms/frame) -NVIDIA's CUDA-accelerated VO library (PyCuVSLAM v15.0.0, March 2026) achieves 116fps on Jetson Orin Nano 8GB at 720p. Supports monocular camera + IMU natively. Auto-fallback to IMU when visual tracking fails, loop closure, Python and C++ APIs. - -**CRITICAL: cuVSLAM on low-texture terrain (agricultural fields, water)**: -cuVSLAM uses Shi-Tomasi corners + Lucas-Kanade optical flow (classical features). On uniform agricultural terrain: -- Few corners detected → sparse/unreliable tracking -- Frequent keyframe creation → heavier compute -- Tracking loss → IMU fallback (~1s) → constant-velocity integrator (~0.5s) -- cuVSLAM does NOT guarantee pose recovery after tracking loss - -**Mitigation**: -1. Increase satellite matching frequency when cuVSLAM keypoint count drops -2. IMU dead-reckoning bridge via ESKF (continues GPS_INPUT output during tracking loss) -3. Accept higher drift in featureless segments — report via horiz_accuracy -4. Keypoint density monitoring triggers adaptive satellite matching - -### 2. Keyframe-Based Satellite Matching -Not every frame needs satellite matching: -- cuVSLAM provides VO at every frame (~9ms) -- Satellite matching triggers on keyframes selected by: - - Fixed interval: every 3-10 frames - - ESKF covariance exceeds threshold (drift approaching budget) - - VO failure: cuVSLAM reports tracking loss - - Thermal: reduce frequency if temperature high - -### 3. Satellite Matcher Selection (Benchmark-Driven) - -**Context**: Our UAV-to-satellite matching is nadir-to-nadir (both top-down). Challenges are season/lighting differences and temporal changes, not extreme viewpoint gaps. - -**Candidate A: LiteSAM (opt) TRT FP16 @ 1280px** — Best satellite-aerial accuracy (RMSE@30 = 17.86m on UAV-VisLoc). 6.31M params. TensorRT FP16 with reparameterized MobileOne. Estimated ~165-330ms on Orin Nano Super with TRT FP16. - -**Candidate B: XFeat semi-dense** — ~50-100ms on Orin Nano Super. Fastest option. General-purpose but our nadir-nadir gap is small. - -**Decision rule** (day-one on Orin Nano Super): -1. Export LiteSAM (opt) to TensorRT FP16 -2. Benchmark at 1280px -3. If ≤200ms → LiteSAM at 1280px -4. If >200ms → XFeat - -### 4. TensorRT FP16 Optimization -LiteSAM's MobileOne backbone is reparameterizable — multi-branch collapses to single feed-forward at inference. INT8 safe only for MobileOne CNN layers, NOT for TAIFormer transformer components. - -### 5. CUDA Stream Pipelining -- Stream A: cuVSLAM VO for current frame (~9ms) + ESKF fusion (~1ms) -- Stream B: Satellite matching for previous keyframe (async, does not block VO) -- CPU: GPS_INPUT output loop, NAMED_VALUE_FLOAT, command listener, tile management - -### 6. Proactive Tile Loading -Preload tiles within ±2km of flight plan into RAM at startup. For a 50km route, ~2000 tiles at zoom 19 ≈ ~200MB. Eliminates disk I/O during flight. - -On VO failure / expanded search: -1. Compute IMU dead-reckoning position -2. Rank preloaded tiles by distance to predicted position -3. Try top 3 tiles, then expand - -### 7. 5-10Hz GPS_INPUT Output Loop -Dedicated thread/coroutine sends GPS_INPUT at fixed rate (5-10Hz): -1. Read current ESKF state (position, velocity, covariance) -2. Compute horiz_accuracy from √(σ_x² + σ_y²) -3. Set fix_type based on last correction type (3=satellite-corrected, 2=VO-only, 1=IMU-only) -4. Send via `mav.gps_input_send()` -5. Sleep until next interval - -This decouples camera frame rate (3fps) from GPS_INPUT rate (5-10Hz). - -## Existing/Competitor Solutions Analysis - -| Solution | Approach | Accuracy | Hardware | Limitations | -|----------|----------|----------|----------|-------------| -| Mateos-Ramirez et al. (2024) | VO (ORB) + satellite keypoint correction + Kalman | 142m mean / 17km (0.83%) | Orange Pi class | No re-localization; ORB only; 1000m+ altitude | -| SatLoc (2025) | DinoV2 + XFeat + optical flow + adaptive fusion | <15m, >90% coverage | Edge (unspecified) | Paper not fully accessible | -| LiteSAM (2025) | MobileOne + TAIFormer + MinGRU subpixel refinement | RMSE@30 = 17.86m on UAV-VisLoc | RTX 3090 (62ms), AGX Orin (497ms@1184px) | Not tested on Orin Nano | -| cuVSLAM (NVIDIA, 2025-2026) | CUDA-accelerated VO+SLAM, mono/stereo/IMU | <1% trajectory error (KITTI) | Jetson Orin Nano (116fps) | VO only, no satellite matching | -| EfficientLoFTR (CVPR 2024) | Aggregated attention + adaptive token selection | Competitive with LiteSAM | TRT available | 15.05M params, heavier | -| STHN (IEEE RA-L 2024) | Deep homography estimation | 4.24m at 50m range | Lightweight | Needs RGB retraining | -| JointLoc (IROS 2024) | Retrieval + VO fusion, adaptive weighting | 0.237m RMSE over 1km | Open-source | Planetary, needs adaptation | - -## Architecture - -### Component: Flight Controller Integration (NEW) - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -|----------|-------|-----------|-------------|------------|-----| -| pymavlink GPS_INPUT | pymavlink | Full MAVLink v2 access, GPS_INPUT support, pure Python, aarch64 compatible | Lower-level API, manual message handling | ~1ms per send | ✅ Best | -| MAVSDK-Python TelemetryServer | MAVSDK v3.15.3 | Higher-level API, aarch64 wheels | NO GPS_INPUT support, no custom messages | N/A — missing feature | ❌ Blocked | -| MAVSDK C++ MavlinkDirect | MAVSDK v4 (future) | Custom message support planned | Not available in Python wrapper yet | N/A — not released | ❌ Not available | -| MAVROS (ROS) | ROS + MAVROS | Full GPS_INPUT support, ROS ecosystem | Heavy ROS dependency, complex setup, unnecessary overhead | ~5ms overhead | ⚠️ Overkill | - -**Selected**: **pymavlink** — only viable Python library for GPS_INPUT. Pure Python, works on aarch64, full MAVLink v2 message set. - -**Restriction note**: restrictions.md specifies "MAVSDK library" but MAVSDK-Python cannot send GPS_INPUT (confirmed: Issue #320, open since 2021). pymavlink is the necessary alternative. - -Configuration: -- Connection: UART (`/dev/ttyTHS0` or `/dev/ttyTHS1` on Jetson, 115200-921600 baud) -- Flight controller: GPS1_TYPE=14, SERIAL2_PROTOCOL=2 (MAVLink2) -- GPS_INPUT rate: 5-10Hz (dedicated output thread) -- Heartbeat: 1Hz to maintain connection - -### Component: Visual Odometry - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -|----------|-------|-----------|-------------|------------|-----| -| cuVSLAM (mono+IMU) | PyCuVSLAM v15.0.0 | 116fps on Orin Nano, NVIDIA-optimized, loop closure, IMU fallback | Closed-source, low-texture terrain risk | ~9ms/frame | ✅ Best | -| XFeat frame-to-frame | XFeatTensorRT | Open-source, learned features | No IMU integration, ~30-50ms | ~30-50ms/frame | ⚠️ Fallback | -| ORB-SLAM3 | OpenCV + custom | Well-understood, open-source | CPU-heavy, ~30fps | ~33ms/frame | ⚠️ Slower | - -**Selected**: **cuVSLAM (mono+IMU mode)** — 116fps, purpose-built for Jetson. - -### Component: Satellite Image Matching - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -|----------|-------|-----------|-------------|------------|-----| -| LiteSAM (opt) TRT FP16 @ 1280px | TensorRT | Best satellite-aerial accuracy, 6.31M params | Untested on Orin Nano Super TRT | Est. ~165-330ms TRT FP16 | ✅ If ≤200ms | -| XFeat semi-dense | XFeatTensorRT | ~50-100ms, Jetson-proven, fastest | General-purpose | ~50-100ms | ✅ Fallback | - -**Selection**: Day-one benchmark. LiteSAM TRT FP16 at 1280px → if ≤200ms → LiteSAM. If >200ms → XFeat. - -### Component: Sensor Fusion - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -|----------|-------|-----------|-------------|------------|-----| -| ESKF (custom) | Python/C++ | Lightweight, multi-rate, well-understood | Linear approximation | <1ms/step | ✅ Best | -| Hybrid ESKF/UKF | Custom | 49% better accuracy | More complex | ~2-3ms/step | ⚠️ Upgrade path | - -**Selected**: **ESKF** with adaptive measurement noise. State vector: [position(3), velocity(3), orientation_quat(4), accel_bias(3), gyro_bias(3)] = 16 states. - -**Output rates**: -- IMU prediction: 100+Hz (from flight controller IMU via pymavlink) -- cuVSLAM VO update: ~3Hz -- Satellite update: ~0.3-1Hz (keyframes, async) -- GPS_INPUT output: 5-10Hz (ESKF predicted state) - -**Drift budget**: Track √(σ_x² + σ_y²) from ESKF covariance. When approaching 100m → force every-frame satellite matching. - -### Component: Ground Station Telemetry (NEW) - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -|----------|-------|-----------|-------------|------------|-----| -| MAVLink auto-forwarding + NAMED_VALUE_FLOAT | pymavlink | Standard MAVLink, no custom protocol, works with all GCS (Mission Planner, QGC) | Limited bandwidth (~12kbit/s), NAMED_VALUE_FLOAT name limited to 10 chars | ~50 bytes/msg | ✅ Best | -| Custom MAVLink dialect messages | pymavlink + custom XML | Full flexibility | Requires custom GCS plugin, non-standard | ~50 bytes/msg | ⚠️ Complex | -| Separate telemetry channel | TCP/UDP over separate radio | Full bandwidth | Extra hardware, extra radio | N/A | ❌ Not available | - -**Selected**: **Standard MAVLink forwarding + NAMED_VALUE_FLOAT** - -Telemetry data sent to ground station: -- GPS position: auto-forwarded by flight controller from GPS_INPUT data -- Confidence score: NAMED_VALUE_FLOAT `"gps_conf"` at 1Hz (values: 1=HIGH, 2=MEDIUM, 3=LOW, 4=VERY_LOW) -- Drift estimate: NAMED_VALUE_FLOAT `"gps_drift"` at 1Hz (meters) -- Matching status: NAMED_VALUE_FLOAT `"sat_match"` at 1Hz (0=inactive, 1=matching, 2=failed) -- Alerts: STATUSTEXT for critical events (re-localization request, system failure) - -Re-localization from ground station: -- Operator sees drift/failure alert in GCS -- Sends COMMAND_LONG (MAV_CMD_USER_1) with lat/lon in param5/param6 -- Companion computer listens for COMMAND_LONG with target component ID -- Receives hint → constrains tile search → attempts satellite matching near hint coordinates - -### Component: Startup & Lifecycle (NEW) - -**Startup sequence**: -1. Boot Jetson → start GPS-Denied service (systemd) -2. Connect to flight controller via pymavlink on UART -3. Wait for heartbeat from flight controller -4. Read GLOBAL_POSITION_INT → extract lat, lon, alt -5. Initialize ESKF state with this position (high confidence if real GPS available) -6. Start cuVSLAM with first camera frames -7. Begin GPS_INPUT output loop at 5-10Hz -8. Preload satellite tiles within ±2km of flight plan into RAM -9. System ready — GPS-Denied active - -**GPS denial detection**: -Not required — the system always outputs GPS_INPUT. If real GPS is available, the flight controller uses whichever GPS source has better accuracy (configurable GPS blending or priority). When real GPS degrades/lost, flight controller seamlessly uses our GPS_INPUT. - -**Failsafe**: -- If no valid position estimate for N seconds (configurable, e.g., 10s): stop sending GPS_INPUT -- Flight controller detects GPS timeout → falls back to IMU-only dead reckoning -- System logs failure, continues attempting recovery (VO + satellite matching) -- When recovery succeeds: resume GPS_INPUT output - -**Reboot recovery**: -1. Jetson reboots → re-establish pymavlink connection -2. Read GPS_RAW_INT (now IMU-extrapolated by flight controller since GPS_INPUT stopped) -3. Initialize ESKF with this position (low confidence, horiz_accuracy=100m+) -4. Resume cuVSLAM + satellite matching → accuracy improves over time -5. Resume GPS_INPUT output - -### Component: Object Localization (UPDATED) - -**Two modes**: - -**Mode 1: Navigation camera (nadir)** -Frame-center GPS from ESKF. Any object in navigation camera frame: -1. Pixel offset from center: (dx_px, dy_px) -2. Convert to meters: dx_m = dx_px × GSD, dy_m = dy_px × GSD -3. Rotate by heading (yaw from IMU) -4. Convert meter offset to lat/lon delta, add to frame-center GPS - -**Mode 2: AI camera (configurable angle and zoom)** -1. Get current UAV position from ESKF -2. Get AI camera params: tilt_angle (from vertical), pan_angle (from heading), zoom (effective focal length) -3. Get pixel coordinates of detected object in AI camera frame -4. Compute bearing: bearing = heading + pan_angle + atan2(dx_px × sensor_width / focal_eff, focal_eff) -5. Compute ground distance: for flat terrain, slant_range = altitude / cos(tilt_angle + dy_angle), ground_range = slant_range × sin(tilt_angle + dy_angle) -6. Convert bearing + ground_range to lat/lon offset -7. Return GPS coordinates with accuracy estimate - -**Local API** (FastAPI on localhost:8000): -- `POST /localize` — accepts: pixel_x, pixel_y, camera_id ("nav" or "ai"), ai_camera_params (tilt, pan, zoom) → returns: lat, lon, accuracy_m -- `GET /status` — returns: system state, confidence, drift, uptime - -### Component: Satellite Tile Preprocessing (Offline) - -**Selected**: GeoHash-indexed tile pairs on disk + RAM preloading. - -Pipeline: -1. Define operational area from flight plan -2. Download satellite tiles from Google Maps Tile API at zoom 19 (0.3 m/pixel) -3. If zoom 19 unavailable: fall back to zoom 18 (0.6 m/pixel — meets ≥0.5 m/pixel requirement) -4. Validate: resolution ≥0.5 m/pixel, check imagery staleness where possible -5. Pre-resize each tile to matcher input resolution -6. Store: original + resized + metadata (GPS bounds, zoom, GSD, download date) in GeoHash-indexed structure -7. Copy to Jetson storage before flight -8. At startup: preload tiles within ±2km of flight plan into RAM - -### Component: Re-localization (Disconnected Segments) - -When cuVSLAM reports tracking loss (sharp turn, no features): -1. Flag next frame as keyframe → trigger satellite matching -2. Compute IMU dead-reckoning position since last known position -3. Rank preloaded tiles by distance to dead-reckoning position -4. Try top 3 tiles sequentially -5. If match found: position recovered, new segment begins -6. If 3 consecutive keyframe failures: send STATUSTEXT alert to ground station ("RE-LOC REQUEST: position uncertain, drift Xm") -7. While waiting for operator hint: continue VO/IMU dead reckoning, report low confidence via horiz_accuracy -8. If operator sends COMMAND_LONG with lat/lon hint: constrain tile search to ±500m of hint -9. If still no match after operator hint: continue dead reckoning, log failure - -### Component: Thermal Management (NEW) - -**Power mode**: 25W (stable sustained performance) - -**Monitoring**: Read GPU/CPU temperature via tegrastats or sysfs thermal zones at 1Hz. - -**Adaptive pipeline**: -- Normal (<70°C): Full pipeline — cuVSLAM every frame + satellite match every 3-10 frames -- Warm (70-75°C): Reduce satellite matching to every 5-10 frames -- Hot (75-80°C): Reduce satellite matching to every 10-15 frames -- Throttling (>80°C): Disable satellite matching entirely, VO+IMU only (cuVSLAM ~9ms is very light). Report LOW confidence. Resume satellite matching when temp drops below 75°C - -**Hardware requirement**: Active cooling fan (5V) mandatory for UAV companion computer enclosure. - -## Processing Time Budget (per frame, 333ms interval) - -### Normal Frame (non-keyframe, ~60-80% of frames) - -| Step | Time | Notes | -|------|------|-------| -| Image capture + transfer | ~10ms | CSI/USB3 | -| Downsample (for cuVSLAM) | ~2ms | OpenCV CUDA | -| cuVSLAM VO+IMU | ~9ms | NVIDIA CUDA-optimized, 116fps | -| ESKF measurement update | ~1ms | NumPy | -| **Total per camera frame** | **~22ms** | Well within 333ms | - -GPS_INPUT output runs independently at 5-10Hz (every 100-200ms): -| Step | Time | Notes | -|------|------|-------| -| Read ESKF state | <0.1ms | Shared state | -| Compute horiz_accuracy | <0.1ms | √(σ²) | -| pymavlink gps_input_send | ~1ms | UART write | -| **Total per GPS_INPUT** | **~1ms** | Negligible overhead | - -### Keyframe Satellite Matching (async, every 3-10 frames) - -Runs on separate CUDA stream — does NOT block VO or GPS_INPUT. - -**Path A — LiteSAM TRT FP16 at 1280px (if ≤200ms benchmark)**: - -| Step | Time | Notes | -|------|------|-------| -| Downsample to 1280px | ~1ms | OpenCV CUDA | -| Load satellite tile | ~1ms | Pre-loaded in RAM | -| LiteSAM (opt) TRT FP16 | ≤200ms | Go/no-go threshold | -| Geometric pose (RANSAC) | ~5ms | Homography | -| ESKF satellite update | ~1ms | Delayed measurement | -| **Total** | **≤210ms** | Async | - -**Path B — XFeat (if LiteSAM >200ms)**: - -| Step | Time | Notes | -|------|------|-------| -| XFeat extraction + matching | ~50-80ms | TensorRT FP16 | -| Geometric verification (RANSAC) | ~5ms | | -| ESKF satellite update | ~1ms | | -| **Total** | **~60-90ms** | Async | - -## Memory Budget (Jetson Orin Nano Super, 8GB shared) - -| Component | Memory | Notes | -|-----------|--------|-------| -| OS + runtime | ~1.5GB | JetPack 6.2 + Python | -| cuVSLAM | ~200-500MB | CUDA library + map state (configure pruning for 3000 frames) | -| Satellite matcher TensorRT | ~50-100MB | LiteSAM FP16 or XFeat FP16 | -| Preloaded satellite tiles | ~200MB | ±2km of flight plan | -| pymavlink + MAVLink runtime | ~20MB | Lightweight | -| FastAPI (local IPC) | ~50MB | Minimal, localhost only | -| Current frame buffer | ~2MB | | -| ESKF state + buffers | ~10MB | | -| **Total** | **~2.1-2.9GB** | ~26-36% of 8GB — comfortable | - -## Confidence Scoring → GPS_INPUT Mapping - -| Level | Condition | horiz_accuracy (m) | fix_type | GPS_INPUT satellites_visible | -|-------|-----------|---------------------|----------|------------------------------| -| HIGH | Satellite match succeeded + cuVSLAM consistent | 10-20 | 3 (3D) | 12 | -| MEDIUM | cuVSLAM VO only, recent satellite correction (<500m travel) | 20-50 | 3 (3D) | 8 | -| LOW | cuVSLAM VO only, no recent correction, OR high thermal throttling | 50-100 | 2 (2D) | 4 | -| VERY LOW | IMU dead-reckoning only | 100-500 | 1 (no fix) | 1 | -| MANUAL | Operator-provided re-localization hint | 200 | 3 (3D) | 6 | - -Note: `satellites_visible` is synthetic — used to influence EKF weighting. ArduPilot gives more weight to GPS with higher satellite count and lower horiz_accuracy. - -## Key Risks and Mitigations - -| Risk | Likelihood | Impact | Mitigation | -|------|-----------|--------|------------| -| **MAVSDK cannot send GPS_INPUT** | CONFIRMED | Must use pymavlink (conflicts with restriction) | Use pymavlink. Document restriction conflict. No alternative in Python. | -| **cuVSLAM fails on low-texture agricultural terrain** | HIGH | Frequent tracking loss, degraded VO | Increase satellite matching frequency. IMU dead-reckoning bridge. Accept higher drift. | -| **Jetson UART instability with ArduPilot** | MEDIUM | MAVLink connection drops | Test thoroughly. Use USB serial adapter if UART unreliable. Add watchdog reconnect. | -| **Thermal throttling blows satellite matching budget** | MEDIUM | Miss keyframe windows | Adaptive pipeline: reduce/skip satellite matching at high temp. Active cooling mandatory. | -| LiteSAM TRT FP16 >200ms at 1280px | MEDIUM | Must use XFeat | Day-one benchmark. XFeat fallback. | -| XFeat cross-view accuracy insufficient | MEDIUM | Satellite corrections less accurate | Multi-tile consensus, strict RANSAC, increase keyframe frequency. | -| cuVSLAM map memory growth on long flights | MEDIUM | Memory pressure | Configure map pruning, max keyframes. | -| Google Maps satellite quality in conflict zone | HIGH | Satellite matching fails | Accept VO+IMU with higher drift. Alternative providers. | -| GPS_INPUT at 3Hz too slow for ArduPilot EKF | HIGH | Poor EKF fusion, position jumps | 5-10Hz output with IMU interpolation between camera frames. | -| Companion computer reboot mid-flight | LOW | ~30-60s GPS gap | Flight controller IMU fallback. Automatic recovery on restart. | -| Telemetry bandwidth saturation | LOW | Custom messages compete with autopilot telemetry | Limit NAMED_VALUE_FLOAT to 1Hz. Keep messages compact. | - -## Testing Strategy - -### Integration / Functional Tests -- End-to-end: camera → cuVSLAM → ESKF → GPS_INPUT → verify flight controller receives valid position -- Compare computed positions against ground truth GPS from coordinates.csv -- Measure: percentage within 50m (target: 80%), percentage within 20m (target: 60%) -- Test GPS_INPUT rate: verify 5-10Hz output to flight controller -- Test sharp-turn handling: verify satellite re-localization after 90-degree heading change -- Test disconnected segments: simulate 3+ route breaks, verify all segments connected -- Test re-localization: simulate 3 consecutive failures → verify STATUSTEXT sent → inject COMMAND_LONG hint → verify recovery -- Test object localization: send POST /localize with known AI camera params → verify GPS accuracy -- Test startup: verify ESKF initializes from flight controller GPS -- Test reboot recovery: kill process → restart → verify reconnection and position recovery -- Test failsafe: simulate total failure → verify GPS_INPUT stops → verify flight controller IMU fallback -- Test cuVSLAM map memory: run 3000-frame session, monitor memory growth - -### Non-Functional Tests -- **Day-one satellite matcher benchmark**: LiteSAM TRT FP16 at 1280px on Orin Nano Super -- cuVSLAM benchmark: verify 116fps monocular+IMU on Orin Nano Super -- cuVSLAM terrain stress test: urban, agricultural, water, forest -- **UART reliability test**: sustained pymavlink communication over 1+ hour -- **Thermal endurance test**: run full pipeline for 30+ minutes, measure GPU temp, verify no throttling with active cooling -- Per-frame latency: must be <400ms for VO pipeline -- GPS_INPUT latency: measure time from camera capture to GPS_INPUT send -- Memory: peak usage during 3000-frame session (must stay <8GB) -- Drift budget: verify ESKF covariance tracks cumulative drift, triggers satellite matching before 100m -- Telemetry bandwidth: measure total MAVLink bandwidth used by companion computer - -## References -- pymavlink GPS_INPUT example: https://webperso.ensta.fr/lebars/Share/GPS_INPUT_pymavlink.py -- pymavlink mavgps.py: https://github.com/ArduPilot/pymavlink/blob/master/examples/mavgps.py -- ArduPilot GPS Input module: https://ardupilot.org/mavproxy/docs/modules/GPSInput.html -- MAVLink GPS_INPUT message spec: https://mavlink.io/en/messages/common.html#GPS_INPUT -- MAVSDK-Python GPS_INPUT limitation: https://github.com/mavlink/MAVSDK-Python/issues/320 -- MAVSDK-Python custom message limitation: https://github.com/mavlink/MAVSDK-Python/issues/739 -- ArduPilot companion computer setup: https://ardupilot.org/dev/docs/raspberry-pi-via-mavlink.html -- Jetson Orin UART with ArduPilot: https://forums.developer.nvidia.com/t/uart-connection-between-jetson-nano-orin-and-ardupilot/325416 -- MAVLink NAMED_VALUE_FLOAT: https://mavlink.io/en/messages/common.html#NAMED_VALUE_FLOAT -- MAVLink STATUSTEXT: https://mavlink.io/en/messages/common.html#STATUSTEXT -- MAVLink telemetry bandwidth: https://github.com/mavlink/mavlink/issues/1605 -- JetPack 6.2 Super Mode: https://developer.nvidia.com/blog/nvidia-jetpack-6-2-brings-super-mode-to-nvidia-jetson-orin-nano-and-jetson-orin-nx-modules/ -- Jetson Orin Nano power consumption: https://edgeaistack.app/blog/jetson-orin-nano-power-consumption/ -- UAV target geolocation: https://www.mdpi.com/1424-8220/22/5/1903 -- LiteSAM (2025): https://www.mdpi.com/2072-4292/17/19/3349 -- LiteSAM code: https://github.com/boyagesmile/LiteSAM -- cuVSLAM (2025-2026): https://github.com/NVlabs/PyCuVSLAM -- PyCuVSLAM API: https://nvlabs.github.io/PyCuVSLAM/api.html -- Intermodalics cuVSLAM benchmark: https://www.intermodalics.ai/blog/nvidia-isaac-ros-in-depth-cuvslam-and-the-dp3-1-release -- XFeat (CVPR 2024): https://arxiv.org/abs/2404.19174 -- XFeat TensorRT for Jetson: https://github.com/PranavNedunghat/XFeatTensorRT -- EfficientLoFTR (CVPR 2024): https://github.com/zju3dv/EfficientLoFTR -- STHN (IEEE RA-L 2024): https://github.com/arplaboratory/STHN -- JointLoc (IROS 2024): https://github.com/LuoXubo/JointLoc -- Hybrid ESKF/UKF: https://arxiv.org/abs/2512.17505 -- Google Maps Tile API: https://developers.google.com/maps/documentation/tile/satellite -- ArduPilot EKF Source Selection: https://ardupilot.org/copter/docs/common-ekf-sources.html -- Mateos-Ramirez et al. (2024): https://www.mdpi.com/2076-3417/14/16/7420 -- SatLoc (2025): https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f - -## Related Artifacts -- AC Assessment: `_docs/00_research/gps_denied_nav/00_ac_assessment.md` -- Research artifacts: `_docs/00_research/gps_denied_nav_v3/` -- Tech stack evaluation: `_docs/01_solution/tech_stack.md` -- Security analysis: `_docs/01_solution/security_analysis.md` diff --git a/_docs/01_solution/solution_draft04.md b/_docs/01_solution/solution_draft04.md deleted file mode 100644 index 59d3f2a..0000000 --- a/_docs/01_solution/solution_draft04.md +++ /dev/null @@ -1,385 +0,0 @@ -# Solution Draft - -## Assessment Findings - -| Old Component Solution | Weak Point (functional/security/performance) | New Solution | -|------------------------|----------------------------------------------|-------------| -| ONNX Runtime as potential inference runtime for AI models | **Performance**: ONNX Runtime CUDA EP on Jetson Orin Nano is 7-8x slower than TRT standalone with default settings (tensor cores not utilized). Even TRT-EP shows up to 3x overhead on some models. | **Use native TRT Engine for all AI models**. Convert PyTorch → ONNX → trtexec → .engine. Load with tensorrt Python module. Eliminates ONNX Runtime dependency entirely. | -| ONNX Runtime TRT-EP memory overhead | **Performance**: ONNX RT TRT-EP keeps serialized engine in memory (~420-440MB vs 130-140MB native TRT). Delta ~280-300MB PER MODEL. On 8GB shared memory, this wastes ~560-600MB for two models. | **Native TRT releases serialized blob after deserialization** → saves ~280-300MB per model. Total savings ~560-600MB — 7% of total memory. Critical given cuVSLAM map growth risk. | -| No explicit TRT engine build step in offline pipeline | **Functional**: Draft03 mentions TRT FP16 but doesn't define the build workflow. When/where are engines built? | **Add TRT engine build to offline preparation pipeline**: After satellite tile download, run trtexec on Jetson to build .engine files. Store alongside tiles. One-time cost per model version. | -| Cross-platform portability via ONNX Runtime | **Functional**: ONNX Runtime's primary value is cross-platform support. Our deployment is Jetson-only — this value is zero. We pay the performance/memory tax for unused portability. | **Drop ONNX Runtime**. Jetson Orin Nano Super is fixed deployment hardware. TRT Engine is the optimal runtime for NVIDIA-only deployment. | -| No DLA offloading considered | **Performance**: Draft03 doesn't mention DLA. Jetson Orin Nano has NO DLA cores — only Orin NX (1-2) and AGX Orin (2) have DLA. | **Confirm: DLA offloading is NOT available on Orin Nano**. All inference must run on GPU (1024 CUDA cores, 16 tensor cores). This makes maximizing GPU efficiency via native TRT even more critical. | -| LiteSAM MinGRU TRT compatibility risk | **Functional**: LiteSAM's subpixel refinement uses 4 stacked MinGRU layers over a 3×3 candidate window (seq_len=9). MinGRU gates depend only on input C_f (not h_{t-1}), so z_t/h̃_t are pre-computable. Ops are standard: Linear, Sigmoid, Mul, Add, ReLU, Tanh. Risk is LOW-MEDIUM — depends on whether implementation uses logcumsumexp (problematic) or simple loop (fine). Seq_len=9 makes this trivially rewritable. | **Day-one verification**: clone LiteSAM repo → torch.onnx.export → polygraphy inspect → trtexec --fp16. If export fails on MinGRU: rewrite forward() as unrolled loop (9 steps). **If LiteSAM cannot be made TRT-compatible: replace with EfficientLoFTR TRT** (proven TRT path via Coarse_LoFTR_TRT, 15.05M params, semi-dense matching). | - -## Product Solution Description - -A real-time GPS-denied visual navigation system for fixed-wing UAVs, running on a Jetson Orin Nano Super (8GB). All AI model inference uses **native TensorRT Engine files** — no ONNX Runtime dependency. The system replaces the GPS module by sending MAVLink GPS_INPUT messages via pymavlink over UART at 5-10Hz. - -Position is determined by fusing: (1) CUDA-accelerated visual odometry (cuVSLAM — native CUDA), (2) absolute position corrections from satellite image matching (LiteSAM or XFeat — TRT Engine FP16), and (3) IMU data from the flight controller via ESKF. - -**Inference runtime decision**: Native TRT Engine over ONNX Runtime because: -1. ONNX RT CUDA EP is 7-8x slower on Orin Nano (tensor core bug) -2. ONNX RT TRT-EP wastes ~280-300MB per model (serialized engine retained in memory) -3. Cross-platform portability has zero value — deployment is Jetson-only -4. Native TRT provides direct CUDA stream control for pipelining with cuVSLAM - -**Hard constraint**: Camera shoots at ~3fps (333ms interval). Full VO+ESKF pipeline within 400ms. GPS_INPUT at 5-10Hz. - -**AI Model Runtime Summary**: - -| Model | Runtime | Precision | Memory | Integration | -|-------|---------|-----------|--------|-------------| -| cuVSLAM | Native CUDA (PyCuVSLAM) | N/A (closed-source) | ~200-500MB | CUDA Stream A | -| LiteSAM | TRT Engine | FP16 | ~50-80MB | CUDA Stream B | -| XFeat | TRT Engine | FP16 | ~30-50MB | CUDA Stream B (fallback) | -| ESKF | CPU (Python/C++) | FP64 | ~10MB | CPU thread | - -**Offline Preparation Pipeline** (before flight): -1. Download satellite tiles → validate → pre-resize → store (existing) -2. **NEW: Build TRT engines on Jetson** (one-time per model version) - - `trtexec --onnx=litesam_fp16.onnx --saveEngine=litesam.engine --fp16` - - `trtexec --onnx=xfeat.onnx --saveEngine=xfeat.engine --fp16` -3. Copy tiles + engines to Jetson storage -4. At startup: load engines + preload tiles into RAM - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ OFFLINE (Before Flight) │ -│ 1. Satellite Tiles → Download & Validate → Pre-resize → Store │ -│ (Google Maps) (≥0.5m/px, <2yr) (matcher res) (GeoHash)│ -│ 2. TRT Engine Build (one-time per model version): │ -│ PyTorch model → reparameterize → ONNX export → trtexec --fp16 │ -│ Output: litesam.engine, xfeat.engine │ -│ 3. Copy tiles + engines to Jetson storage │ -└─────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ ONLINE (During Flight) │ -│ │ -│ STARTUP: │ -│ 1. pymavlink → read GLOBAL_POSITION_INT → init ESKF │ -│ 2. Load TRT engines: litesam.engine + xfeat.engine │ -│ (tensorrt.Runtime → deserialize_cuda_engine → create_context) │ -│ 3. Allocate GPU buffers for TRT input/output (PyCUDA) │ -│ 4. Start cuVSLAM with first camera frames │ -│ 5. Preload satellite tiles ±2km into RAM │ -│ 6. Begin GPS_INPUT output loop at 5-10Hz │ -│ │ -│ EVERY FRAME (3fps, 333ms interval): │ -│ ┌──────────────────────────────────────┐ │ -│ │ Nav Camera → Downsample (CUDA ~2ms) │ │ -│ │ → cuVSLAM VO+IMU (~9ms) │ ← CUDA Stream A │ -│ │ → ESKF measurement update │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ 5-10Hz CONTINUOUS: │ -│ ┌──────────────────────────────────────┐ │ -│ │ ESKF IMU prediction → GPS_INPUT send │──→ Flight Controller │ -│ └──────────────────────────────────────┘ │ -│ │ -│ KEYFRAMES (every 3-10 frames, async): │ -│ ┌──────────────────────────────────────┐ │ -│ │ TRT Engine inference (Stream B): │ │ -│ │ context.enqueue_v3(stream_B) │──→ ESKF correction │ -│ │ LiteSAM FP16 or XFeat FP16 │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ TELEMETRY (1Hz): │ -│ ┌──────────────────────────────────────┐ │ -│ │ NAMED_VALUE_FLOAT: confidence, drift │──→ Ground Station │ -│ └──────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -## Architecture - -### Component: AI Model Inference Runtime - -| Solution | Tools | Advantages | Limitations | Performance | Memory | Fit | -|----------|-------|-----------|-------------|------------|--------|-----| -| Native TRT Engine | tensorrt Python + PyCUDA + trtexec | Optimal latency, minimal memory, full tensor core usage, direct CUDA stream control | Hardware-specific engines, manual buffer management, rebuild per TRT version | Optimal | ~50-130MB total (both models) | ✅ Best | -| ONNX Runtime TRT-EP | onnxruntime + TensorRT EP | Auto-fallback for unsupported ops, simpler API, auto engine caching | +280-300MB per model, wrapper overhead, first-run latency spike | Near-parity (claimed), up to 3x slower (observed) | ~640-690MB total (both models) | ❌ Memory overhead unacceptable | -| ONNX Runtime CUDA EP | onnxruntime + CUDA EP | Simplest API, broadest op support | 7-8x slower on Orin Nano (tensor core bug), no TRT optimizations | 7-8x slower | Standard | ❌ Performance unacceptable | -| Torch-TensorRT | torch_tensorrt | AOT compilation, PyTorch-native, handles mixed TRT/PyTorch | Newer on Jetson, requires PyTorch runtime at inference | Near native TRT | PyTorch runtime ~500MB+ | ⚠️ Viable alternative if TRT export fails | - -**Selected**: **Native TRT Engine** — optimal performance and memory on our fixed NVIDIA hardware. - -**Fallback**: If any model has unsupported TRT ops (e.g., MinGRU in LiteSAM), use **Torch-TensorRT** for that specific model. Torch-TensorRT handles mixed TRT/PyTorch execution but requires PyTorch runtime in memory. - -### Component: TRT Engine Conversion Workflow - -**LiteSAM conversion**: -1. Load PyTorch model with trained weights -2. Reparameterize MobileOne backbone (collapse multi-branch → single Conv2d+BN) -3. Export to ONNX: `torch.onnx.export(model, dummy_input, "litesam.onnx", opset_version=17)` -4. Verify with polygraphy: `polygraphy inspect model litesam.onnx` -5. Build engine on Jetson: `trtexec --onnx=litesam.onnx --saveEngine=litesam.engine --fp16 --memPoolSize=workspace:2048` -6. Verify engine: `trtexec --loadEngine=litesam.engine --fp16` - -**XFeat conversion**: -1. Load PyTorch model -2. Export to ONNX: `torch.onnx.export(model, dummy_input, "xfeat.onnx", opset_version=17)` -3. Build engine on Jetson: `trtexec --onnx=xfeat.onnx --saveEngine=xfeat.engine --fp16` -4. Alternative: use XFeatTensorRT C++ implementation directly - -**INT8 quantization strategy** (optional, future optimization): -- MobileOne backbone (CNN): INT8 safe with calibration data -- TAIFormer (transformer attention): FP16 only — INT8 degrades accuracy -- XFeat: evaluate INT8 on actual UAV-satellite pairs before deploying -- Use nvidia-modelopt for calibration: `from modelopt.onnx.quantization import quantize` - -### Component: TRT Python Inference Wrapper - -Minimal wrapper class for TRT engine inference: - -```python -import tensorrt as trt -import pycuda.driver as cuda - -class TRTInference: - def __init__(self, engine_path, stream): - self.logger = trt.Logger(trt.Logger.WARNING) - self.runtime = trt.Runtime(self.logger) - with open(engine_path, 'rb') as f: - self.engine = self.runtime.deserialize_cuda_engine(f.read()) - self.context = self.engine.create_execution_context() - self.stream = stream - self._allocate_buffers() - - def _allocate_buffers(self): - self.inputs = {} - self.outputs = {} - for i in range(self.engine.num_io_tensors): - name = self.engine.get_tensor_name(i) - shape = self.engine.get_tensor_shape(name) - dtype = trt.nptype(self.engine.get_tensor_dtype(name)) - size = trt.volume(shape) - device_mem = cuda.mem_alloc(size * np.dtype(dtype).itemsize) - self.context.set_tensor_address(name, int(device_mem)) - mode = self.engine.get_tensor_mode(name) - if mode == trt.TensorIOMode.INPUT: - self.inputs[name] = (device_mem, shape, dtype) - else: - self.outputs[name] = (device_mem, shape, dtype) - - def infer_async(self, input_data): - for name, data in input_data.items(): - cuda.memcpy_htod_async(self.inputs[name][0], data, self.stream) - self.context.enqueue_v3(self.stream.handle) - - def get_output(self): - results = {} - for name, (dev_mem, shape, dtype) in self.outputs.items(): - host_mem = np.empty(shape, dtype=dtype) - cuda.memcpy_dtoh_async(host_mem, dev_mem, self.stream) - self.stream.synchronize() - return results -``` - -Key design: `infer_async()` + `get_output()` split enables pipelining with cuVSLAM on Stream A while satellite matching runs on Stream B. - -### Component: Visual Odometry (UNCHANGED) - -cuVSLAM — native CUDA library, not affected by TRT migration. Already optimal. - -### Component: Satellite Image Matching (UPDATED runtime + fallback chain) - -| Solution | Tools | Advantages | Limitations | Performance (est. Orin Nano Super TRT FP16) | Params | Fit | -|----------|-------|-----------|-------------|----------------------------------------------|--------|-----| -| LiteSAM (opt) TRT Engine FP16 @ 1280px | trtexec + tensorrt Python | Best satellite-aerial accuracy (RMSE@30=17.86m UAV-VisLoc), 6.31M params, smallest model | MinGRU TRT export needs verification (LOW-MEDIUM risk) | Est. ~165-330ms | 6.31M | ✅ Primary (if TRT export succeeds AND ≤200ms) | -| EfficientLoFTR TRT Engine FP16 | trtexec + tensorrt Python | Proven TRT path (Coarse_LoFTR_TRT repo, 138 stars). Semi-dense. CVPR 2024. High accuracy. | 2.4x more params than LiteSAM. Requires einsum→elementary ops rewrite for TRT (documented in Coarse_LoFTR_TRT paper). | Est. ~200-400ms | 15.05M | ✅ Fallback if LiteSAM TRT fails | -| XFeat TRT Engine FP16 | trtexec + tensorrt Python (or XFeatTensorRT C++) | Fastest. Proven TRT implementation. Lightweight. | General-purpose, not designed for cross-view satellite-aerial gap (but nadir-nadir gap is small). | Est. ~50-100ms | <5M | ✅ Speed fallback | - -**Decision tree (day-one on Orin Nano Super)**: -1. Clone LiteSAM repo → reparameterize MobileOne → `torch.onnx.export()` → `polygraphy inspect` -2. If ONNX export succeeds → `trtexec --onnx=litesam.onnx --saveEngine=litesam.engine --fp16` -3. If MinGRU causes ONNX/TRT failure → rewrite MinGRU forward() as unrolled 9-step loop → retry -4. If rewrite fails or accuracy degrades → **switch to EfficientLoFTR TRT**: - - Apply Coarse_LoFTR_TRT TRT-adaptation techniques (einsum replacement, etc.) - - Export to ONNX → trtexec --fp16 - - Benchmark at 640×480 and 1280px -5. Benchmark winner: **if ≤200ms → use it. If >200ms but ≤300ms → acceptable (async on Stream B). If >300ms → use XFeat TRT** - -**EfficientLoFTR TRT adaptation** (from Coarse_LoFTR_TRT paper, proven workflow): -- Replace `torch.einsum()` with elementary ops (view, bmm, reshape, sum) -- Replace any TRT-incompatible high-level PyTorch functions -- Use ONNX export path (less memory required than Torch-TensorRT on 8GB device) -- Knowledge distillation available for further parameter reduction if needed - -### Component: Sensor Fusion (UNCHANGED) -ESKF — CPU-based mathematical filter, not affected. - -### Component: Flight Controller Integration (UNCHANGED) -pymavlink — not affected by TRT migration. - -### Component: Ground Station Telemetry (UNCHANGED) -MAVLink NAMED_VALUE_FLOAT — not affected. - -### Component: Startup & Lifecycle (UPDATED) - -**Updated startup sequence**: -1. Boot Jetson → start GPS-Denied service (systemd) -2. Connect to flight controller via pymavlink on UART -3. Wait for heartbeat from flight controller -4. **Initialize PyCUDA context** -5. **Load TRT engines**: litesam.engine + xfeat.engine via tensorrt.Runtime.deserialize_cuda_engine() -6. **Allocate GPU I/O buffers** for both models -7. **Create CUDA streams**: Stream A (cuVSLAM), Stream B (satellite matching) -8. Read GLOBAL_POSITION_INT → init ESKF -9. Start cuVSLAM with first camera frames -10. Begin GPS_INPUT output loop at 5-10Hz -11. Preload satellite tiles within ±2km into RAM -12. System ready - -**Engine load time**: ~1-3 seconds per engine (deserialization from .engine file). One-time cost at startup. - -### Component: Thermal Management (UNCHANGED) -Same adaptive pipeline. TRT engines are slightly more power-efficient than ONNX Runtime, but the difference is within noise. - -### Component: Object Localization (UNCHANGED) -Not affected — trigonometric calculation, no AI inference. - -## Speed Optimization Techniques - -### 1. cuVSLAM for Visual Odometry (~9ms/frame) -Unchanged from draft03. Native CUDA, not part of TRT migration. - -### 2. Native TRT Engine Inference (NEW) -All AI models run as pre-compiled TRT FP16 engines: -- Engine files built offline with trtexec (one-time per model version) -- Loaded at startup (~1-3s per engine) -- Inference via context.enqueue_v3() on dedicated CUDA Stream B -- GPU buffers pre-allocated — zero runtime allocation during flight -- No ONNX Runtime dependency — no framework overhead - -Memory advantage over ONNX Runtime TRT-EP: ~560-600MB saved (both models combined). -Latency advantage: eliminates ONNX wrapper overhead, guaranteed tensor core utilization. - -### 3. CUDA Stream Pipelining (REFINED) -- Stream A: cuVSLAM VO for current frame (~9ms) + ESKF fusion (~1ms) -- Stream B: TRT engine inference for satellite matching (LiteSAM or XFeat, async) -- CPU: GPS_INPUT output loop, NAMED_VALUE_FLOAT, command listener, tile management -- **NEW**: Both cuVSLAM and TRT engines use CUDA streams natively — no framework abstraction layer. Direct GPU scheduling. - -### 4-7. (UNCHANGED from draft03) -Keyframe-based satellite matching, TensorRT FP16 optimization, proactive tile loading, 5-10Hz GPS_INPUT output — all unchanged. - -## Processing Time Budget (per frame, 333ms interval) - -### Normal Frame (non-keyframe) -Unchanged from draft03 — cuVSLAM dominates at ~22ms total. - -### Keyframe Satellite Matching (async, CUDA Stream B) - -**Path A — LiteSAM TRT Engine FP16 at 1280px**: - -| Step | Time | Notes | -|------|------|-------| -| Downsample to 1280px | ~1ms | OpenCV CUDA | -| Load satellite tile | ~1ms | Pre-loaded in RAM | -| Copy input to GPU buffer | <0.5ms | PyCUDA memcpy_htod_async | -| LiteSAM TRT Engine FP16 | ≤200ms | context.enqueue_v3(stream_B) | -| Copy output from GPU | <0.5ms | PyCUDA memcpy_dtoh_async | -| Geometric pose (RANSAC) | ~5ms | Homography | -| ESKF satellite update | ~1ms | Delayed measurement | -| **Total** | **≤210ms** | Async on Stream B | - -**Path B — XFeat TRT Engine FP16**: - -| Step | Time | Notes | -|------|------|-------| -| XFeat TRT Engine inference | ~50-80ms | context.enqueue_v3(stream_B) | -| Geometric verification (RANSAC) | ~5ms | | -| ESKF satellite update | ~1ms | | -| **Total** | **~60-90ms** | Async on Stream B | - -## Memory Budget (Jetson Orin Nano Super, 8GB shared) - -| Component | Memory (Native TRT) | Memory (ONNX RT TRT-EP) | Notes | -|-----------|---------------------|--------------------------|-------| -| OS + runtime | ~1.5GB | ~1.5GB | JetPack 6.2 + Python | -| cuVSLAM | ~200-500MB | ~200-500MB | CUDA library + map | -| **LiteSAM TRT engine** | **~50-80MB** | **~330-360MB** | Native TRT vs TRT-EP. If LiteSAM fails: EfficientLoFTR ~100-150MB | -| **XFeat TRT engine** | **~30-50MB** | **~310-330MB** | Native TRT vs TRT-EP | -| Preloaded satellite tiles | ~200MB | ~200MB | ±2km of flight plan | -| pymavlink + MAVLink | ~20MB | ~20MB | | -| FastAPI (local IPC) | ~50MB | ~50MB | | -| ESKF + buffers | ~10MB | ~10MB | | -| ONNX Runtime framework | **0MB** | **~150MB** | Eliminated with native TRT | -| **Total** | **~2.1-2.9GB** | **~2.8-3.6GB** | | -| **% of 8GB** | **26-36%** | **35-45%** | | -| **Savings** | — | — | **~700MB saved with native TRT** | - -## Confidence Scoring → GPS_INPUT Mapping -Unchanged from draft03. - -## Key Risks and Mitigations - -| Risk | Likelihood | Impact | Mitigation | -|------|-----------|--------|------------| -| **LiteSAM MinGRU ops unsupported in TRT 10.3** | LOW-MEDIUM | LiteSAM TRT export fails | Day-one verification: ONNX export → polygraphy → trtexec. If MinGRU fails: (1) rewrite as unrolled 9-step loop, (2) if still fails: **switch to EfficientLoFTR TRT** (proven TRT path, Coarse_LoFTR_TRT, 15.05M params). XFeat TRT as speed fallback. | -| **TRT engine build OOM on 8GB Jetson** | LOW | Cannot build engines on target device | Our models are small (6.31M LiteSAM, <5M XFeat). OOM unlikely. If occurs: reduce --memPoolSize, or build on identical Orin Nano module with more headroom | -| **Engine incompatibility after JetPack update** | MEDIUM | Must rebuild engines | Include engine rebuild in JetPack update procedure. Takes minutes per model. | -| **MAVSDK cannot send GPS_INPUT** | CONFIRMED | Must use pymavlink | Unchanged from draft03 | -| **cuVSLAM fails on low-texture terrain** | HIGH | Frequent tracking loss | Unchanged from draft03 | -| **Thermal throttling** | MEDIUM | Satellite matching budget blown | Unchanged from draft03 | -| LiteSAM TRT FP16 >200ms at 1280px | MEDIUM | Must use fallback matcher | Day-one benchmark. Fallback chain: EfficientLoFTR TRT (if ≤300ms) → XFeat TRT (if all >300ms) | -| Google Maps satellite quality in conflict zone | HIGH | Satellite matching fails | Unchanged from draft03 | - -## Testing Strategy - -### Integration / Functional Tests -All tests from draft03 unchanged, plus: -- **TRT engine load test**: Verify litesam.engine and xfeat.engine load successfully on Jetson Orin Nano Super -- **TRT inference correctness**: Compare TRT engine output vs PyTorch reference output (max L1 error < 0.01) -- **CUDA Stream B pipelining**: Verify satellite matching on Stream B does not block cuVSLAM on Stream A -- **Engine pre-built validation**: Verify engine files from offline preparation work without rebuild at runtime - -### Non-Functional Tests -All tests from draft03 unchanged, plus: -- **TRT engine build time**: Measure trtexec build time for LiteSAM and XFeat on Orin Nano Super (expected: 1-5 minutes each) -- **TRT engine load time**: Measure deserialization time (expected: 1-3 seconds each) -- **Memory comparison**: Measure actual GPU memory with native TRT vs ONNX RT TRT-EP for both models -- **MinGRU TRT compatibility** (day-one blocker): - 1. Clone LiteSAM repo, load pretrained weights - 2. Reparameterize MobileOne backbone - 3. `torch.onnx.export(model, dummy, "litesam.onnx", opset_version=17)` - 4. `polygraphy inspect model litesam.onnx` — check for unsupported ops - 5. `trtexec --onnx=litesam.onnx --saveEngine=litesam.engine --fp16` - 6. If step 3 or 5 fails on MinGRU: rewrite MinGRU forward() as unrolled loop, retry - 7. If still fails: switch to EfficientLoFTR, apply Coarse_LoFTR_TRT adaptation - 8. Compare TRT output vs PyTorch reference (max L1 error < 0.01) -- **EfficientLoFTR TRT fallback benchmark** (if LiteSAM fails): apply TRT adaptation from Coarse_LoFTR_TRT → ONNX → trtexec → measure latency at 640×480 and 1280px -- **Tensor core utilization**: Verify with NSight that TRT engines use tensor cores (unlike ONNX RT CUDA EP) - -## References -- ONNX Runtime Issue #24085 (Jetson Orin Nano tensor core bug): https://github.com/microsoft/onnxruntime/issues/24085 -- ONNX Runtime Issue #20457 (TRT-EP memory overhead): https://github.com/microsoft/onnxruntime/issues/20457 -- ONNX Runtime Issue #12083 (TRT-EP vs native TRT): https://github.com/microsoft/onnxruntime/issues/12083 -- NVIDIA TensorRT 10 Python API: https://docs.nvidia.com/deeplearning/tensorrt/10.15.1/inference-library/python-api-docs.html -- TensorRT Best Practices: https://docs.nvidia.com/deeplearning/tensorrt/latest/performance/best-practices.html -- TensorRT engine hardware specificity: https://github.com/NVIDIA/TensorRT/issues/1920 -- trtexec ONNX conversion: https://nvidia-jetson.piveral.com/jetson-orin-nano/how-to-convert-onnx-to-engine-on-jetson-orin-nano-dev-board/ -- Torch-TensorRT JetPack 6.2: https://docs.pytorch.org/TensorRT/v2.10.0/getting_started/jetpack.html -- XFeatTensorRT: https://github.com/PranavNedunghat/XFeatTensorRT -- JetPack 6.2 Release Notes: https://docs.nvidia.com/jetson/archives/jetpack-archived/jetpack-62/release-notes/index.html -- Jetson Orin Nano Super: https://developer.nvidia.com/blog/nvidia-jetson-orin-nano-developer-kit-gets-a-super-boost/ -- DLA on Jetson Orin: https://developer.nvidia.com/blog/maximizing-deep-learning-performance-on-nvidia-jetson-orin-with-dla/ -- EfficientLoFTR (CVPR 2024): https://github.com/zju3dv/EfficientLoFTR -- EfficientLoFTR HuggingFace: https://huggingface.co/docs/transformers/en/model_doc/efficientloftr -- Coarse_LoFTR_TRT (TRT for embedded): https://github.com/Kolkir/Coarse_LoFTR_TRT -- Coarse_LoFTR_TRT paper: https://ar5iv.labs.arxiv.org/html/2202.00770 -- LoFTR_TRT: https://github.com/Kolkir/LoFTR_TRT -- minGRU ("Were RNNs All We Needed?"): https://huggingface.co/papers/2410.01201 -- minGRU PyTorch implementation: https://github.com/lucidrains/minGRU-pytorch -- LiteSAM paper (MinGRU details, Eqs 12-16): https://www.mdpi.com/2072-4292/17/19/3349 -- DALGlue (UAV feature matching, 2025): https://www.nature.com/articles/s41598-025-21602-5 -- All references from solution_draft03.md - -## Related Artifacts -- AC Assessment: `_docs/00_research/gps_denied_nav/00_ac_assessment.md` -- Research artifacts (this assessment): `_docs/00_research/trt_engine_migration/` -- Previous research: `_docs/00_research/gps_denied_nav_v3/` -- Tech stack evaluation: `_docs/01_solution/tech_stack.md` -- Security analysis: `_docs/01_solution/security_analysis.md` diff --git a/_docs/01_solution/solution_draft05.md b/_docs/01_solution/solution_draft05.md deleted file mode 100644 index 7e65167..0000000 --- a/_docs/01_solution/solution_draft05.md +++ /dev/null @@ -1,562 +0,0 @@ -# Solution Draft - -## Assessment Findings - -| Old Component Solution | Weak Point (functional/security/performance) | New Solution | -|------------------------|----------------------------------------------|-------------| -| ONNX Runtime as potential inference runtime for AI models | **Performance**: ONNX Runtime CUDA EP on Jetson Orin Nano is 7-8x slower than TRT standalone with default settings (tensor cores not utilized). Even TRT-EP shows up to 3x overhead on some models. | **Use native TRT Engine for all AI models**. Convert PyTorch → ONNX → trtexec → .engine. Load with tensorrt Python module. Eliminates ONNX Runtime dependency entirely. | -| ONNX Runtime TRT-EP memory overhead | **Performance**: ONNX RT TRT-EP keeps serialized engine in memory (~420-440MB vs 130-140MB native TRT). Delta ~280-300MB PER MODEL. On 8GB shared memory, this wastes ~560-600MB for two models. | **Native TRT releases serialized blob after deserialization** → saves ~280-300MB per model. Total savings ~560-600MB — 7% of total memory. Critical given cuVSLAM map growth risk. | -| No explicit TRT engine build step in offline pipeline | **Functional**: Draft03 mentions TRT FP16 but doesn't define the build workflow. When/where are engines built? | **Add TRT engine build to offline preparation pipeline**: After satellite tile download, run trtexec on Jetson to build .engine files. Store alongside tiles. One-time cost per model version. | -| Cross-platform portability via ONNX Runtime | **Functional**: ONNX Runtime's primary value is cross-platform support. Our deployment is Jetson-only — this value is zero. We pay the performance/memory tax for unused portability. | **Drop ONNX Runtime**. Jetson Orin Nano Super is fixed deployment hardware. TRT Engine is the optimal runtime for NVIDIA-only deployment. | -| No DLA offloading considered | **Performance**: Draft03 doesn't mention DLA. Jetson Orin Nano has NO DLA cores — only Orin NX (1-2) and AGX Orin (2) have DLA. | **Confirm: DLA offloading is NOT available on Orin Nano**. All inference must run on GPU (1024 CUDA cores, 16 tensor cores). This makes maximizing GPU efficiency via native TRT even more critical. | -| LiteSAM MinGRU TRT compatibility risk | **Functional**: LiteSAM's subpixel refinement uses 4 stacked MinGRU layers over a 3×3 candidate window (seq_len=9). MinGRU gates depend only on input C_f (not h_{t-1}), so z_t/h̃_t are pre-computable. Ops are standard: Linear, Sigmoid, Mul, Add, ReLU, Tanh. Risk is LOW-MEDIUM — depends on whether implementation uses logcumsumexp (problematic) or simple loop (fine). Seq_len=9 makes this trivially rewritable. | **Day-one verification**: clone LiteSAM repo → torch.onnx.export → polygraphy inspect → trtexec --fp16. If export fails on MinGRU: rewrite forward() as unrolled loop (9 steps). **If LiteSAM cannot be made TRT-compatible: replace with EfficientLoFTR TRT** (proven TRT path via Coarse_LoFTR_TRT, 15.05M params, semi-dense matching). | -| Camera shoots at ~3fps (draft03/04 hard constraint) | **Functional**: ADTI 20L V1 max continuous rate is **2.0 fps** (burst only, buffer-limited). ADTi recommends 1.5s per capture (**0.7 fps sustained**). 3fps is physically impossible. 2fps is not sustainable for multi-hour flights (buffer saturation + mechanical shutter wear). | **Revised to 0.7 fps sustained**. ADTI 20L V1 is the sole navigation camera — used for both cuVSLAM VO and satellite matching. At 70 km/h cruise and 600m altitude: 27.8m inter-frame displacement, ~175px pixel shift, **95.2% frame overlap** — within pyramid-assisted LK optical flow range. ESKF IMU prediction at 5-10Hz bridges 1.43s gaps between frames. Satellite matching triggered on keyframes from the same stream. Viewpro A40 Pro reserved for AI object detection only. | - -## UAV Platform - -### Airframe Configuration - -| Component | Specification | Weight | -|-----------|--------------|--------| -| Airframe | Custom 3.5m S-2 Glass Composite, Eppler 423 airfoil | ~4.5 kg | -| Battery | 2x VANT Semi-Solid State 6S 30Ah (22.2V, 666Wh each) | 5.30 kg (2.65 kg each) | -| Motor | T-Motor AT4125 KV540 (2000W peak, 5.5 kg thrust w/ APC 15x8) | 0.36 kg | -| Propulsion Acc. | ESC, 15x8 Folding Propeller, Servos, Cables | ~0.50 kg | -| Avionics | Pixhawk 6x + GPS | ~0.10 kg | -| Computing | NVIDIA Jetson Orin Nano Super Dev Kit | ~0.30 kg | -| Camera 1 | ADTI 20L V1 APS-C Camera + 16mm Lens | ~0.22 kg | -| Camera 2 | Viewpro A40 Pro (A40TPro) AI Gimbal | ~0.85 kg | -| Misc | Mounts, wiring, connectors | ~0.35 kg | -| **Total AUW** | | **~12.5 kg** | - -T-Motor AT4125 KV540 is spec'd for 8-10 kg fixed-wing. At 12.5 kg AUW, static thrust-to-weight is ~0.44. Flyable but margins are tight — weight optimization should be monitored. - -### Flight Performance (Max Endurance) - -Assumptions: wingspan 3.5m, mean chord ~0.30m, wing area S ~1.05 m², AR ~11.7, Eppler 423 Cl_max ~1.8, cruise altitude 800-1000m (ρ ~1.10 kg/m³). - -| Parameter | Value | -|-----------|-------| -| Stall speed (900m altitude) | 10.6 m/s (38 km/h) | -| Min-power speed (theoretical) | 12.0 m/s (43 km/h) — only 13% above stall, impractical | -| **Max endurance cruise (1.3× stall margin)** | **14 m/s (50 km/h)** | -| Best range speed | ~18 m/s (65 km/h) | - -| Energy Budget | Value | -|---------------|-------| -| Total battery energy | 1332 Wh (2 × 666 Wh) | -| Usable (80% DoD) | 1066 Wh | -| Climb to 900m (~5 min at 3 m/s) | −57 Wh | -| 10% reserve | ×0.9 | -| **Available for cruise** | **~908 Wh** | - -| Power Budget | Value | -|--------------|-------| -| Propulsion (L/D ~15, η_prop 0.65, η_motor 0.80) | ~212 W | -| Electronics (Pixhawk + Jetson + cameras + gimbal + servos) | ~55 W | -| **Total cruise power** | **~267 W** | - -| Endurance | Value | -|-----------|-------| -| **Max endurance (at 50 km/h)** | **~3.4 hours** | -| Total mission (incl. climb + reserve) | ~3.5 hours | -| Max range (at 65 km/h best-range speed) | ~209 km | - -### Camera 1: ADTI 20L V1 + 16mm Lens - -| Spec | Value | -|------|-------| -| Sensor | Sony CMOS APS-C, 23.2 × 15.4 mm | -| Resolution | 5456 × 3632 (20 MP) | -| Focal length | 16 mm | -| Shutter | Mechanical global inter-mirror shutter (ADTI product line) | -| Max continuous fps | **2.0 fps** (spec — burst rate, buffer-limited) | -| Sustained capture rate | **~0.7 fps** (ADTi recommended 1.5s per capture) | -| File formats | JPEG, RAW, RAW+JPEG | -| HDMI video output | 1080p 24p/30p, 1440×1080 30p | -| Weight | 118g body + ~100g lens | -| ISP | Socionext Milbeaut | -| Cooling | Active fan | - -**2.0 fps is a burst rate, not sustained.** The 2.0 fps spec is limited by the internal buffer (estimated 3-5 frames). Once the buffer fills, the camera throttles to the write pipeline speed of ~0.7 fps. The bottleneck chain: mechanical shutter actuation (~100-300ms) + ISP processing (demosaic, NR, JPEG compress) + storage write (~5-10 MB/frame JPEG). The 1.5s/capture recommendation guarantees the buffer never fills and accounts for thermal margin over multi-hour flights. - -**Mechanical shutter wear at sustained rates:** - -| Rate | Actuations per 3.5h flight | Est. flights before 150K shutter life | Est. flights before 500K shutter life | -|------|---------------------------|---------------------------------------|---------------------------------------| -| 2.0 fps (burst, unsustainable) | 25,200 | ~6 | ~20 | -| 1.0 fps | 12,600 | ~12 | ~40 | -| 0.7 fps (recommended) | 8,820 | ~17 | ~57 | - -The 20L V1 shutter lifespan is not documented. The higher-end 102PRO is rated at 500K actuations. The 20L as an entry-level model is likely 100K-150K. At 0.7 fps sustained, this gives ~11-57 flights depending on shutter rating. - -**Confirmed operational rate: 0.7 fps (JPEG mode) for both VO and satellite matching.** - -### Camera 1: Ground Coverage at Mission Altitude - -| Parameter | H = 600 m | H = 800 m | H = 1000 m | -|-----------|-----------|-----------|------------| -| Along-track footprint (15.4mm side) | 577 m | 770 m | 962 m | -| Cross-track footprint (23.2mm side) | 870 m | 1160 m | 1450 m | -| GSD | 15.9 cm/pixel | 21.3 cm/pixel | 26.6 cm/pixel | - -### Camera 1: Forward Overlap at 0.7 fps - -At 0.7 fps, distance between shots = V / 0.7. - -**At 70 km/h (19.4 m/s) — realistic cruise speed:** - -| Altitude | Along-track footprint | Shot gap (27.8m) | Forward overlap | Pixel shift | -|----------|-----------------------|------------------|-----------------|-------------| -| 600 m | 577 m | 27.8 m | **95.2%** | ~175 px | -| 800 m | 770 m | 27.8 m | **96.4%** | ~131 px | -| 1000 m | 962 m | 27.8 m | **97.1%** | ~105 px | - -**Across speed range (at 600m altitude, 0.7 fps):** - -| Speed | Frame gap | Pixel shift | Forward overlap | -|-------|-----------|-------------|-----------------| -| 50 km/h (14 m/s) | 20.0 m | ~126 px | 96.5% | -| 70 km/h (19.4 m/s) | 27.8 m | ~175 px | 95.2% | -| 90 km/h (25 m/s) | 35.7 m | ~224 px | 93.8% | - -Even at 90 km/h and the lowest altitude (600m), overlap remains >93%. The 16mm lens on APS-C at these altitudes produces a footprint so large that 0.7 fps provides massive redundancy for both VO and satellite matching. - -**For satellite matching specifically** (keyframe-based, every 5-10 camera frames): - -| Target overlap | Required gap (600m) | Time between shots (70 km/h) | Capture rate | -|----------------|---------------------|------------------------------|--------------| -| 80% | 115 m | 5.9 s | 0.17 fps | -| 70% | 173 m | 8.9 s | 0.11 fps | -| 60% | 231 m | 11.9 s | 0.084 fps | - -Even at the lowest altitude and highest speed, 1 satellite matching keyframe every 6-12 seconds gives 60-80% overlap. - -### Camera 2: Viewpro A40 Pro (A40TPro) AI Gimbal - -Dual EO/IR gimbal with AI tracking. Reserved for **AI object detection and tracking only** — not used for navigation. Operates independently from the navigation pipeline. - -### Camera Role Assignment - -| Role | Camera | Rate | Notes | -|------|--------|------|-------| -| Visual Odometry (cuVSLAM) | ADTI 20L V1 + 16mm | 0.7 fps (sustained) | Sole navigation camera. At 70 km/h: 27.8m/~175px displacement at 600m alt. 95%+ overlap. | -| Satellite Image Matching | ADTI 20L V1 + 16mm | Keyframes from VO stream (~every 5-10 frames) | Same image stream as VO. Subset routed to satellite matcher on Stream B. | -| AI Object Detection | Viewpro A40 Pro | Independent | Not part of navigation pipeline. | - -### cuVSLAM at 0.7 fps — Feasibility - -At 0.7 fps and 70 km/h cruise, inter-frame displacement is 27.8m. In pixel terms: - -| Altitude | Displacement | Pixel shift | % of image height | Overlap | -|----------|-------------|-------------|-------------------|---------| -| 600 m | 27.8 m | ~175 px | 4.8% | 95.2% | -| 800 m | 27.8 m | ~131 px | 3.6% | 96.4% | -| 1000 m | 27.8 m | ~105 px | 2.9% | 97.1% | - -cuVSLAM uses Lucas-Kanade optical flow with image pyramids. Standard LK handles 30-50px displacements on the base level; with 3-4 pyramid levels, effective search range extends to ~150-200px. At 600m altitude, the 175px shift is within this pyramid-assisted range. At 800-1000m, the shift drops to 105-131px — well within range. - -Key factors that make 0.7 fps viable at high altitude: -- **Large footprint**: The 16mm lens on APS-C at 600-1000m produces 577-962m along-track coverage. The aircraft moves only 4-5% of the frame between shots. -- **High texture from altitude**: At 600-1000m, each frame covers a large area with diverse terrain features (roads, field boundaries, structures) even in agricultural regions. -- **IMU bridging**: cuVSLAM's built-in IMU integrator provides pose prediction during the 1.43s gap between frames. ESKF IMU prediction runs at 5-10Hz for continuous GPS_INPUT output. -- **95%+ overlap**: Consecutive frames share >95% content — abundant features for matching. - -**Risk**: Over completely uniform terrain (e.g., single crop field filling entire 577m+ footprint), feature tracking may still fail. cuVSLAM falls back to IMU-only (~1s acceptable) then constant-velocity (~0.5s) before tracking loss. Satellite matching corrections every 5-10 frames bound accumulated drift. - -## Product Solution Description - -A real-time GPS-denied visual navigation system for fixed-wing UAVs, running on a Jetson Orin Nano Super (8GB). All AI model inference uses **native TensorRT Engine files** — no ONNX Runtime dependency. The system replaces the GPS module by sending MAVLink GPS_INPUT messages via pymavlink over UART at 5-10Hz. - -Position is determined by fusing: (1) CUDA-accelerated visual odometry (cuVSLAM — native CUDA) from ADTI 20L V1 at 0.7 fps sustained, (2) absolute position corrections from satellite image matching (LiteSAM or XFeat — TRT Engine FP16) using keyframes from the same ADTI image stream, and (3) IMU data from the flight controller via ESKF. Viewpro A40 Pro is reserved for AI object detection only. - -**Inference runtime decision**: Native TRT Engine over ONNX Runtime because: -1. ONNX RT CUDA EP is 7-8x slower on Orin Nano (tensor core bug) -2. ONNX RT TRT-EP wastes ~280-300MB per model (serialized engine retained in memory) -3. Cross-platform portability has zero value — deployment is Jetson-only -4. Native TRT provides direct CUDA stream control for pipelining with cuVSLAM - -**Hard constraint**: ADTI 20L V1 shoots at 0.7 fps sustained (1430ms interval). Full VO+ESKF pipeline within 400ms per frame. Satellite matching async on keyframes (every 5-10 camera frames). GPS_INPUT at 5-10Hz (ESKF IMU prediction fills gaps between camera frames). - -**AI Model Runtime Summary**: - -| Model | Runtime | Precision | Memory | Integration | -|-------|---------|-----------|--------|-------------| -| cuVSLAM | Native CUDA (PyCuVSLAM) | N/A (closed-source) | ~200-500MB | CUDA Stream A | -| LiteSAM | TRT Engine | FP16 | ~50-80MB | CUDA Stream B | -| XFeat | TRT Engine | FP16 | ~30-50MB | CUDA Stream B (fallback) | -| ESKF | CPU (Python/C++) | FP64 | ~10MB | CPU thread | - -**Offline Preparation Pipeline** (before flight): -1. Download satellite tiles → validate → pre-resize → store (existing) -2. **NEW: Build TRT engines on Jetson** (one-time per model version) - - `trtexec --onnx=litesam_fp16.onnx --saveEngine=litesam.engine --fp16` - - `trtexec --onnx=xfeat.onnx --saveEngine=xfeat.engine --fp16` -3. Copy tiles + engines to Jetson storage -4. At startup: load engines + preload tiles into RAM - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ OFFLINE (Before Flight) │ -│ 1. Satellite Tiles → Download & Validate → Pre-resize → Store │ -│ (Google Maps) (≥0.5m/px, <2yr) (matcher res) (GeoHash)│ -│ 2. TRT Engine Build (one-time per model version): │ -│ PyTorch model → reparameterize → ONNX export → trtexec --fp16 │ -│ Output: litesam.engine, xfeat.engine │ -│ 3. Copy tiles + engines to Jetson storage │ -└─────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ ONLINE (During Flight) │ -│ │ -│ STARTUP: │ -│ 1. pymavlink → read GLOBAL_POSITION_INT → init ESKF │ -│ 2. Load TRT engines: litesam.engine + xfeat.engine │ -│ (tensorrt.Runtime → deserialize_cuda_engine → create_context) │ -│ 3. Allocate GPU buffers for TRT input/output (PyCUDA) │ -│ 4. Start cuVSLAM with ADTI 20L V1 camera stream │ -│ 5. Preload satellite tiles ±2km into RAM │ -│ 6. Begin GPS_INPUT output loop at 5-10Hz │ -│ │ -│ EVERY CAMERA FRAME (0.7fps sustained from ADTI 20L V1): │ -│ ┌──────────────────────────────────────┐ │ -│ │ ADTI 20L V1 → Downsample (CUDA) │ │ -│ │ → cuVSLAM VO+IMU (~9ms) │ ← CUDA Stream A │ -│ │ → ESKF measurement │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ 5-10Hz CONTINUOUS (IMU-driven between camera frames): │ -│ ┌──────────────────────────────────────┐ │ -│ │ ESKF IMU prediction → GPS_INPUT send │──→ Flight Controller │ -│ └──────────────────────────────────────┘ │ -│ │ -│ KEYFRAMES (every 5-10 camera frames, async): │ -│ ┌──────────────────────────────────────┐ │ -│ │ Same ADTI frame → TRT inference (B): │ │ -│ │ context.enqueue_v3(stream_B) │──→ ESKF correction │ -│ │ LiteSAM FP16 or XFeat FP16 │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ TELEMETRY (1Hz): │ -│ ┌──────────────────────────────────────┐ │ -│ │ NAMED_VALUE_FLOAT: confidence, drift │──→ Ground Station │ -│ └──────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -## Architecture - -### Component: AI Model Inference Runtime - -| Solution | Tools | Advantages | Limitations | Performance | Memory | Fit | -|----------|-------|-----------|-------------|------------|--------|-----| -| Native TRT Engine | tensorrt Python + PyCUDA + trtexec | Optimal latency, minimal memory, full tensor core usage, direct CUDA stream control | Hardware-specific engines, manual buffer management, rebuild per TRT version | Optimal | ~50-130MB total (both models) | ✅ Best | -| ONNX Runtime TRT-EP | onnxruntime + TensorRT EP | Auto-fallback for unsupported ops, simpler API, auto engine caching | +280-300MB per model, wrapper overhead, first-run latency spike | Near-parity (claimed), up to 3x slower (observed) | ~640-690MB total (both models) | ❌ Memory overhead unacceptable | -| ONNX Runtime CUDA EP | onnxruntime + CUDA EP | Simplest API, broadest op support | 7-8x slower on Orin Nano (tensor core bug), no TRT optimizations | 7-8x slower | Standard | ❌ Performance unacceptable | -| Torch-TensorRT | torch_tensorrt | AOT compilation, PyTorch-native, handles mixed TRT/PyTorch | Newer on Jetson, requires PyTorch runtime at inference | Near native TRT | PyTorch runtime ~500MB+ | ⚠️ Viable alternative if TRT export fails | - -**Selected**: **Native TRT Engine** — optimal performance and memory on our fixed NVIDIA hardware. - -**Fallback**: If any model has unsupported TRT ops (e.g., MinGRU in LiteSAM), use **Torch-TensorRT** for that specific model. Torch-TensorRT handles mixed TRT/PyTorch execution but requires PyTorch runtime in memory. - -### Component: TRT Engine Conversion Workflow - -**LiteSAM conversion**: -1. Load PyTorch model with trained weights -2. Reparameterize MobileOne backbone (collapse multi-branch → single Conv2d+BN) -3. Export to ONNX: `torch.onnx.export(model, dummy_input, "litesam.onnx", opset_version=17)` -4. Verify with polygraphy: `polygraphy inspect model litesam.onnx` -5. Build engine on Jetson: `trtexec --onnx=litesam.onnx --saveEngine=litesam.engine --fp16 --memPoolSize=workspace:2048` -6. Verify engine: `trtexec --loadEngine=litesam.engine --fp16` - -**XFeat conversion**: -1. Load PyTorch model -2. Export to ONNX: `torch.onnx.export(model, dummy_input, "xfeat.onnx", opset_version=17)` -3. Build engine on Jetson: `trtexec --onnx=xfeat.onnx --saveEngine=xfeat.engine --fp16` -4. Alternative: use XFeatTensorRT C++ implementation directly - -**INT8 quantization strategy** (optional, future optimization): -- MobileOne backbone (CNN): INT8 safe with calibration data -- TAIFormer (transformer attention): FP16 only — INT8 degrades accuracy -- XFeat: evaluate INT8 on actual UAV-satellite pairs before deploying -- Use nvidia-modelopt for calibration: `from modelopt.onnx.quantization import quantize` - -### Component: TRT Python Inference Wrapper - -Minimal wrapper class for TRT engine inference: - -```python -import tensorrt as trt -import pycuda.driver as cuda - -class TRTInference: - def __init__(self, engine_path, stream): - self.logger = trt.Logger(trt.Logger.WARNING) - self.runtime = trt.Runtime(self.logger) - with open(engine_path, 'rb') as f: - self.engine = self.runtime.deserialize_cuda_engine(f.read()) - self.context = self.engine.create_execution_context() - self.stream = stream - self._allocate_buffers() - - def _allocate_buffers(self): - self.inputs = {} - self.outputs = {} - for i in range(self.engine.num_io_tensors): - name = self.engine.get_tensor_name(i) - shape = self.engine.get_tensor_shape(name) - dtype = trt.nptype(self.engine.get_tensor_dtype(name)) - size = trt.volume(shape) - device_mem = cuda.mem_alloc(size * np.dtype(dtype).itemsize) - self.context.set_tensor_address(name, int(device_mem)) - mode = self.engine.get_tensor_mode(name) - if mode == trt.TensorIOMode.INPUT: - self.inputs[name] = (device_mem, shape, dtype) - else: - self.outputs[name] = (device_mem, shape, dtype) - - def infer_async(self, input_data): - for name, data in input_data.items(): - cuda.memcpy_htod_async(self.inputs[name][0], data, self.stream) - self.context.enqueue_v3(self.stream.handle) - - def get_output(self): - results = {} - for name, (dev_mem, shape, dtype) in self.outputs.items(): - host_mem = np.empty(shape, dtype=dtype) - cuda.memcpy_dtoh_async(host_mem, dev_mem, self.stream) - self.stream.synchronize() - return results -``` - -Key design: `infer_async()` + `get_output()` split enables pipelining with cuVSLAM on Stream A while satellite matching runs on Stream B. - -### Component: Visual Odometry (UPDATED — camera rate corrected) - -cuVSLAM — native CUDA library. Fed by **ADTI 20L V1 at 0.7 fps sustained** (previously assumed 3fps which exceeds camera hardware limit; 2.0 fps spec is burst-only, not sustainable). At 70 km/h cruise the inter-frame displacement is 27.8m — at 600m altitude this translates to ~175px (4.8% of frame), within pyramid-assisted LK optical flow range. At 800-1000m altitude the pixel shift drops to 105-131px. 95%+ frame overlap ensures abundant features for matching. ESKF IMU prediction at 5-10Hz fills the position output between sparse camera frames. - -### Component: Satellite Image Matching (UPDATED runtime + fallback chain) - -| Solution | Tools | Advantages | Limitations | Performance (est. Orin Nano Super TRT FP16) | Params | Fit | -|----------|-------|-----------|-------------|----------------------------------------------|--------|-----| -| LiteSAM (opt) TRT Engine FP16 @ 1280px | trtexec + tensorrt Python | Best satellite-aerial accuracy (RMSE@30=17.86m UAV-VisLoc), 6.31M params, smallest model | MinGRU TRT export needs verification (LOW-MEDIUM risk) | Est. ~165-330ms | 6.31M | ✅ Primary (if TRT export succeeds AND ≤200ms) | -| EfficientLoFTR TRT Engine FP16 | trtexec + tensorrt Python | Proven TRT path (Coarse_LoFTR_TRT repo, 138 stars). Semi-dense. CVPR 2024. High accuracy. | 2.4x more params than LiteSAM. Requires einsum→elementary ops rewrite for TRT (documented in Coarse_LoFTR_TRT paper). | Est. ~200-400ms | 15.05M | ✅ Fallback if LiteSAM TRT fails | -| XFeat TRT Engine FP16 | trtexec + tensorrt Python (or XFeatTensorRT C++) | Fastest. Proven TRT implementation. Lightweight. | General-purpose, not designed for cross-view satellite-aerial gap (but nadir-nadir gap is small). | Est. ~50-100ms | <5M | ✅ Speed fallback | - -**Decision tree (day-one on Orin Nano Super)**: -1. Clone LiteSAM repo → reparameterize MobileOne → `torch.onnx.export()` → `polygraphy inspect` -2. If ONNX export succeeds → `trtexec --onnx=litesam.onnx --saveEngine=litesam.engine --fp16` -3. If MinGRU causes ONNX/TRT failure → rewrite MinGRU forward() as unrolled 9-step loop → retry -4. If rewrite fails or accuracy degrades → **switch to EfficientLoFTR TRT**: - - Apply Coarse_LoFTR_TRT TRT-adaptation techniques (einsum replacement, etc.) - - Export to ONNX → trtexec --fp16 - - Benchmark at 640×480 and 1280px -5. Benchmark winner: **if ≤200ms → use it. If >200ms but ≤300ms → acceptable (async on Stream B). If >300ms → use XFeat TRT** - -**EfficientLoFTR TRT adaptation** (from Coarse_LoFTR_TRT paper, proven workflow): -- Replace `torch.einsum()` with elementary ops (view, bmm, reshape, sum) -- Replace any TRT-incompatible high-level PyTorch functions -- Use ONNX export path (less memory required than Torch-TensorRT on 8GB device) -- Knowledge distillation available for further parameter reduction if needed - -**Satellite matching cadence**: Keyframes selected from the ADTI VO stream every 5-10 frames (~every 2.5-14s depending on camera fps setting). At 800-1000m altitude and 14 m/s cruise, this yields 60-97% forward overlap between satellite match frames. Matching runs async on Stream B — does not block VO on Stream A. - -### Component: Sensor Fusion (UNCHANGED) -ESKF — CPU-based mathematical filter, not affected. - -### Component: Flight Controller Integration (UNCHANGED) -pymavlink — not affected by TRT migration. - -### Component: Ground Station Telemetry (UNCHANGED) -MAVLink NAMED_VALUE_FLOAT — not affected. - -### Component: Startup & Lifecycle (UPDATED) - -**Updated startup sequence**: -1. Boot Jetson → start GPS-Denied service (systemd) -2. Connect to flight controller via pymavlink on UART -3. Wait for heartbeat from flight controller -4. **Initialize PyCUDA context** -5. **Load TRT engines**: litesam.engine + xfeat.engine via tensorrt.Runtime.deserialize_cuda_engine() -6. **Allocate GPU I/O buffers** for both models -7. **Create CUDA streams**: Stream A (cuVSLAM), Stream B (satellite matching) -8. Read GLOBAL_POSITION_INT → init ESKF -9. Start cuVSLAM with ADTI 20L V1 camera frames -10. Begin GPS_INPUT output loop at 5-10Hz -11. Preload satellite tiles within ±2km into RAM -12. System ready - -**Engine load time**: ~1-3 seconds per engine (deserialization from .engine file). One-time cost at startup. - -### Component: Thermal Management (UNCHANGED) -Same adaptive pipeline. TRT engines are slightly more power-efficient than ONNX Runtime, but the difference is within noise. - -### Component: Object Localization (UNCHANGED) -Not affected — trigonometric calculation, no AI inference. - -## Speed Optimization Techniques - -### 1. cuVSLAM for Visual Odometry (~9ms/frame) -Fed by ADTI 20L V1 at 0.7 fps sustained. At 70 km/h cruise and 600m altitude, inter-frame displacement is 27.8m (~175px, 4.8% of frame). With pyramid-based LK optical flow (3-4 levels), effective search range is ~150-200px — 175px is within range. At 800-1000m altitude, pixel shift drops to 105-131px. 95%+ overlap between consecutive frames. - -### 2. Native TRT Engine Inference (NEW) -All AI models run as pre-compiled TRT FP16 engines: -- Engine files built offline with trtexec (one-time per model version) -- Loaded at startup (~1-3s per engine) -- Inference via context.enqueue_v3() on dedicated CUDA Stream B -- GPU buffers pre-allocated — zero runtime allocation during flight -- No ONNX Runtime dependency — no framework overhead - -Memory advantage over ONNX Runtime TRT-EP: ~560-600MB saved (both models combined). -Latency advantage: eliminates ONNX wrapper overhead, guaranteed tensor core utilization. - -### 3. CUDA Stream Pipelining (REFINED) -- Stream A: cuVSLAM VO from ADTI 20L V1 (~9ms) + ESKF fusion (~1ms) -- Stream B: TRT engine inference for satellite matching (LiteSAM or XFeat, async, triggered on keyframe from same ADTI stream) -- CPU: GPS_INPUT output loop, NAMED_VALUE_FLOAT, command listener, tile management -- **NEW**: Both cuVSLAM and TRT engines use CUDA streams natively — no framework abstraction layer. Direct GPU scheduling. - -### 4-7. (UNCHANGED from draft03) -Keyframe-based satellite matching, TensorRT FP16 optimization, proactive tile loading, 5-10Hz GPS_INPUT output — all unchanged. - -## Processing Time Budget - -### VO Frame (every ~1430ms from ADTI 20L V1 at 0.7 fps) - -| Step | Time | Notes | -|------|------|-------| -| ADTI image transfer | ~5-10ms | Trigger + readout | -| Downsample (CUDA) | ~2ms | To cuVSLAM input resolution | -| cuVSLAM VO+IMU | ~9ms | CUDA Stream A | -| ESKF measurement update | ~1ms | CPU | -| **Total** | **~17-22ms** | Well within 1430ms budget | - -Between camera frames, ESKF IMU prediction runs at 5-10Hz to maintain continuous GPS_INPUT output. The ~1.4s gap between frames is bridged entirely by IMU integration. - -### Keyframe Satellite Matching (every 5-10 camera frames, async CUDA Stream B) - -**Path A — LiteSAM TRT Engine FP16 at 1280px**: - -| Step | Time | Notes | -|------|------|-------| -| Image already in GPU (from VO) | ~0ms | Same frame used for VO and matching | -| Load satellite tile | ~1ms | Pre-loaded in RAM | -| Copy input to GPU buffer | <0.5ms | PyCUDA memcpy_htod_async | -| LiteSAM TRT Engine FP16 | ≤200ms | context.enqueue_v3(stream_B) | -| Copy output from GPU | <0.5ms | PyCUDA memcpy_dtoh_async | -| Geometric pose (RANSAC) | ~5ms | Homography | -| ESKF satellite update | ~1ms | Delayed measurement | -| **Total** | **≤210ms** | Async on Stream B, does not block VO | - -**Path B — XFeat TRT Engine FP16**: - -| Step | Time | Notes | -|------|------|-------| -| XFeat TRT Engine inference | ~50-80ms | context.enqueue_v3(stream_B) | -| Geometric verification (RANSAC) | ~5ms | | -| ESKF satellite update | ~1ms | | -| **Total** | **~60-90ms** | Async on Stream B | - -## Memory Budget (Jetson Orin Nano Super, 8GB shared) - -| Component | Memory (Native TRT) | Memory (ONNX RT TRT-EP) | Notes | -|-----------|---------------------|--------------------------|-------| -| OS + runtime | ~1.5GB | ~1.5GB | JetPack 6.2 + Python | -| cuVSLAM | ~200-500MB | ~200-500MB | CUDA library + map | -| **LiteSAM TRT engine** | **~50-80MB** | **~330-360MB** | Native TRT vs TRT-EP. If LiteSAM fails: EfficientLoFTR ~100-150MB | -| **XFeat TRT engine** | **~30-50MB** | **~310-330MB** | Native TRT vs TRT-EP | -| Preloaded satellite tiles | ~200MB | ~200MB | ±2km of flight plan | -| pymavlink + MAVLink | ~20MB | ~20MB | | -| FastAPI (local IPC) | ~50MB | ~50MB | | -| ESKF + buffers | ~10MB | ~10MB | | -| ONNX Runtime framework | **0MB** | **~150MB** | Eliminated with native TRT | -| **Total** | **~2.1-2.9GB** | **~2.8-3.6GB** | | -| **% of 8GB** | **26-36%** | **35-45%** | | -| **Savings** | — | — | **~700MB saved with native TRT** | - -## Confidence Scoring → GPS_INPUT Mapping -Unchanged from draft03. - -## Key Risks and Mitigations - -| Risk | Likelihood | Impact | Mitigation | -|------|-----------|--------|------------| -| **LiteSAM MinGRU ops unsupported in TRT 10.3** | LOW-MEDIUM | LiteSAM TRT export fails | Day-one verification: ONNX export → polygraphy → trtexec. If MinGRU fails: (1) rewrite as unrolled 9-step loop, (2) if still fails: **switch to EfficientLoFTR TRT** (proven TRT path, Coarse_LoFTR_TRT, 15.05M params). XFeat TRT as speed fallback. | -| **TRT engine build OOM on 8GB Jetson** | LOW | Cannot build engines on target device | Our models are small (6.31M LiteSAM, <5M XFeat). OOM unlikely. If occurs: reduce --memPoolSize, or build on identical Orin Nano module with more headroom | -| **Engine incompatibility after JetPack update** | MEDIUM | Must rebuild engines | Include engine rebuild in JetPack update procedure. Takes minutes per model. | -| **MAVSDK cannot send GPS_INPUT** | CONFIRMED | Must use pymavlink | Unchanged from draft03 | -| **cuVSLAM fails on low-texture terrain** | HIGH | Frequent tracking loss | ADTI at 0.7 fps means 27.8m inter-frame displacement at 70 km/h. At 600m+ altitude, pixel shift is 105-175px with 95%+ overlap — within pyramid-assisted LK range. HIGH risk remains over completely uniform terrain (single crop covering 577m+ footprint). IMU bridging + satellite matching corrections bound drift. | -| **Thermal throttling** | MEDIUM | Satellite matching budget blown | Unchanged from draft03 | -| LiteSAM TRT FP16 >200ms at 1280px | MEDIUM | Must use fallback matcher | Day-one benchmark. Fallback chain: EfficientLoFTR TRT (if ≤300ms) → XFeat TRT (if all >300ms) | -| Google Maps satellite quality in conflict zone | HIGH | Satellite matching fails | Unchanged from draft03 | -| **AUW exceeds AT4125 recommended range** | MEDIUM | Reduced endurance, motor thermal stress | 12.5 kg AUW vs 8-10 kg recommended. Monitor motor temps. Consider weight reduction (lighter gimbal, single battery for shorter missions). | -| **cuVSLAM at 0.7 fps — inter-frame displacement** | MEDIUM | VO tracking loss on uniform terrain | At 0.7 fps and 70 km/h: ~175px displacement at 600m (4.8% of frame, 95.2% overlap). Within pyramid-assisted LK range (150-200px). At 800m+: drops to 105-131px. Mitigations: (1) cuVSLAM IMU integrator bridges 1.43s frame gaps, (2) ESKF IMU prediction at 5-10Hz fills position gaps, (3) satellite matching corrections every 5-10 frames bound drift. | -| **ADTI mechanical shutter lifespan** | MEDIUM | Shutter replacement needed periodically | At 0.7 fps sustained over 3.5h flights: ~8,800 actuations/flight. Shutter life unknown for 20L (102PRO is 500K, entry-level likely 100-150K). Estimated 11-57 flights before replacement. Budget for shutter replacement as consumable. | - -## Testing Strategy - -### Integration / Functional Tests -All tests from draft03 unchanged, plus: -- **TRT engine load test**: Verify litesam.engine and xfeat.engine load successfully on Jetson Orin Nano Super -- **TRT inference correctness**: Compare TRT engine output vs PyTorch reference output (max L1 error < 0.01) -- **CUDA Stream B pipelining**: Verify satellite matching on Stream B does not block cuVSLAM on Stream A -- **Engine pre-built validation**: Verify engine files from offline preparation work without rebuild at runtime -- **ADTI 20L V1 sustained capture rate**: Verify camera sustains 0.7 fps in JPEG mode over extended periods (>30 min) without buffer overflow or overheating. Also test 1.0 fps to determine if higher sustained rate is achievable. -- **ADTI trigger timing**: Verify camera trigger and image transfer pipeline delivers frames to cuVSLAM within acceptable latency (<50ms from trigger to GPU buffer) - -### Non-Functional Tests -All tests from draft03 unchanged, plus: -- **TRT engine build time**: Measure trtexec build time for LiteSAM and XFeat on Orin Nano Super (expected: 1-5 minutes each) -- **TRT engine load time**: Measure deserialization time (expected: 1-3 seconds each) -- **Memory comparison**: Measure actual GPU memory with native TRT vs ONNX RT TRT-EP for both models -- **MinGRU TRT compatibility** (day-one blocker): - 1. Clone LiteSAM repo, load pretrained weights - 2. Reparameterize MobileOne backbone - 3. `torch.onnx.export(model, dummy, "litesam.onnx", opset_version=17)` - 4. `polygraphy inspect model litesam.onnx` — check for unsupported ops - 5. `trtexec --onnx=litesam.onnx --saveEngine=litesam.engine --fp16` - 6. If step 3 or 5 fails on MinGRU: rewrite MinGRU forward() as unrolled loop, retry - 7. If still fails: switch to EfficientLoFTR, apply Coarse_LoFTR_TRT adaptation - 8. Compare TRT output vs PyTorch reference (max L1 error < 0.01) -- **EfficientLoFTR TRT fallback benchmark** (if LiteSAM fails): apply TRT adaptation from Coarse_LoFTR_TRT → ONNX → trtexec → measure latency at 640×480 and 1280px -- **Tensor core utilization**: Verify with NSight that TRT engines use tensor cores (unlike ONNX RT CUDA EP) -- **Flight endurance validation**: Ground-test full system power draw (propulsion + electronics) against 267W estimate. Verify ~3.4h endurance target. -- **cuVSLAM at 0.7 fps**: Benchmark VO tracking quality, drift rate, and tracking loss frequency at 0.7 fps with ADTI 20L V1. Measure IMU integrator effectiveness for bridging 1.43s inter-frame gaps. Test at 600m and 800m altitude, over both textured and low-texture terrain. -- **ADTI shutter durability**: Track shutter actuation count across flights. Monitor for shutter failure symptoms (missed frames, inconsistent exposure). - -## References -- ONNX Runtime Issue #24085 (Jetson Orin Nano tensor core bug): https://github.com/microsoft/onnxruntime/issues/24085 -- ONNX Runtime Issue #20457 (TRT-EP memory overhead): https://github.com/microsoft/onnxruntime/issues/20457 -- ONNX Runtime Issue #12083 (TRT-EP vs native TRT): https://github.com/microsoft/onnxruntime/issues/12083 -- NVIDIA TensorRT 10 Python API: https://docs.nvidia.com/deeplearning/tensorrt/10.15.1/inference-library/python-api-docs.html -- TensorRT Best Practices: https://docs.nvidia.com/deeplearning/tensorrt/latest/performance/best-practices.html -- TensorRT engine hardware specificity: https://github.com/NVIDIA/TensorRT/issues/1920 -- trtexec ONNX conversion: https://nvidia-jetson.piveral.com/jetson-orin-nano/how-to-convert-onnx-to-engine-on-jetson-orin-nano-dev-board/ -- Torch-TensorRT JetPack 6.2: https://docs.pytorch.org/TensorRT/v2.10.0/getting_started/jetpack.html -- XFeatTensorRT: https://github.com/PranavNedunghat/XFeatTensorRT -- JetPack 6.2 Release Notes: https://docs.nvidia.com/jetson/archives/jetpack-archived/jetpack-62/release-notes/index.html -- Jetson Orin Nano Super: https://developer.nvidia.com/blog/nvidia-jetson-orin-nano-developer-kit-gets-a-super-boost/ -- DLA on Jetson Orin: https://developer.nvidia.com/blog/maximizing-deep-learning-performance-on-nvidia-jetson-orin-with-dla/ -- EfficientLoFTR (CVPR 2024): https://github.com/zju3dv/EfficientLoFTR -- EfficientLoFTR HuggingFace: https://huggingface.co/docs/transformers/en/model_doc/efficientloftr -- Coarse_LoFTR_TRT (TRT for embedded): https://github.com/Kolkir/Coarse_LoFTR_TRT -- Coarse_LoFTR_TRT paper: https://ar5iv.labs.arxiv.org/html/2202.00770 -- LoFTR_TRT: https://github.com/Kolkir/LoFTR_TRT -- minGRU ("Were RNNs All We Needed?"): https://huggingface.co/papers/2410.01201 -- minGRU PyTorch implementation: https://github.com/lucidrains/minGRU-pytorch -- LiteSAM paper (MinGRU details, Eqs 12-16): https://www.mdpi.com/2072-4292/17/19/3349 -- DALGlue (UAV feature matching, 2025): https://www.nature.com/articles/s41598-025-21602-5 -- ADTI 20L V1 specs: https://unmannedrc.com/products/adti-20l-v1-mapping-camera -- ADTI 20L V1 user manual: https://docs.adti.camera/adti-20l-and-24l-v1-quick-start-guide/ -- T-Motor AT4125 KV540: https://uav-en.tmotor.com/2019/Motors_0429/247.html -- VANT Semi-Solid State 6S 30Ah battery: https://www.xtbattery.com/370wh/kg-42v-high-energy-density-6s-12s-14s-18s-30ah-semi-solid-state-drone-battery/ -- All references from solution_draft03.md - -## Related Artifacts -- AC Assessment: `_docs/00_research/gps_denied_nav/00_ac_assessment.md` -- Research artifacts (this assessment): `_docs/00_research/trt_engine_migration/` -- Previous research: `_docs/00_research/gps_denied_nav_v3/` -- Tech stack evaluation: `_docs/01_solution/tech_stack.md` -- Security analysis: `_docs/01_solution/security_analysis.md` -- Previous draft: `_docs/01_solution/solution_draft04.md` diff --git a/_docs/01_solution/solution_draft06.md b/_docs/01_solution/solution_draft06.md deleted file mode 100644 index 5438fe7..0000000 --- a/_docs/01_solution/solution_draft06.md +++ /dev/null @@ -1,622 +0,0 @@ -# Solution Draft - -## Assessment Findings - - -| Old Component Solution | Weak Point (functional/security/performance) | New Solution | -| ------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| ESKF described as "16-state vector, ~10MB" with no mathematical specification | **Functional**: No state vector, no process model (F,Q), no measurement models (H for VO, H for satellite), no noise parameters, no scale observability analysis. Impossible to implement or validate accuracy claims. | **Define complete ESKF specification**: 15-state error vector, IMU-driven prediction, dual measurement models (VO relative pose, satellite absolute position), initial Q/R values, scale constraint via altitude + satellite corrections. | -| GPS_INPUT at 5-10Hz via pymavlink — no field mapping | **Functional**: GPS_INPUT requires 15+ fields (velocity, accuracy, hdop, fix_type, GPS time). No specification of how ESKF state maps to these fields. ArduPilot requires minimum 5Hz. | **Define GPS_INPUT population spec**: velocity from ESKF, accuracy from covariance, fix_type from confidence tier, GPS time from system clock conversion, synthesized hdop/vdop. | -| Confidence scoring "unchanged from draft03" — not in draft05 | **Functional**: Draft05 is supposed to be self-contained. Confidence scoring determines GPS_INPUT accuracy fields and fix_type — directly affects how ArduPilot EKF weights the position data. | **Define confidence scoring inline**: 3 tiers (satellite-anchored, VO-tracked, IMU-only) mapping to fix_type + accuracy values. | -| Coordinate transformations not defined | **Functional**: No pixel→camera→body→NED→WGS84 chain. Camera is not autostabilized, so body attitude matters. Satellite match → WGS84 conversion undefined. Object localization impossible without these transforms. | **Define coordinate transformation chain**: camera intrinsics K, camera-to-body extrinsic T_cam_body, body-to-NED from ESKF attitude, NED origin at mission start point. | -| Disconnected route segments — "satellite re-localization" mentioned but no algorithm | **Functional**: AC requires handling as "core to the system." Multiple disconnected segments expected. No tracking-loss detection, no re-localization trigger, no ESKF re-initialization, no cuVSLAM restart procedure. | **Define re-localization pipeline**: detect cuVSLAM tracking loss → IMU-only ESKF prediction → trigger satellite match on every frame → on match success: ESKF position reset + cuVSLAM restart → on 3 consecutive failures: operator re-localization request. | -| No startup handoff from GPS to GPS-denied | **Functional**: System reads GLOBAL_POSITION_INT at startup but no protocol for when GPS is lost/spoofed vs system start. No validation of initial position. | **Define handoff protocol**: system runs continuously, FC receives both real GPS and GPS_INPUT. GPS-denied system always provides its estimate; FC selects best source. Initial position validated against first satellite match. | -| No mid-flight reboot recovery | **Functional**: AC requires: "re-initialize from flight controller's current IMU-extrapolated position." No procedure defined. Recovery time estimation missing. | **Define reboot recovery sequence**: read FC position → init ESKF with high uncertainty → load TRT engines → start cuVSLAM → immediate satellite match. Estimated recovery: ~35-70s. Document as known limitation. | -| 3-consecutive-failure re-localization request undefined | **Functional**: AC requires ground station re-localization request. No message format, no operator workflow, no system behavior while waiting. | **Define re-localization protocol**: detect 3 failures → send custom MAVLink message with last known position + uncertainty → operator provides approximate coordinates → system uses as ESKF measurement with high covariance. | -| Object localization — "trigonometric calculation" with no details | **Functional**: No math, no API, no Viewpro gimbal integration, no accuracy propagation. Other onboard systems cannot use this component as specified. | **Define object localization**: pixel→ray using Viewpro intrinsics + gimbal angles → body frame → NED → ray-ground intersection → WGS84. FastAPI endpoint: POST /objects/locate. Accuracy propagated from UAV position + gimbal uncertainty. | -| Satellite matching — GSD normalization and tile selection unspecified | **Functional**: Camera GSD ~15.9 cm/px at 600m vs satellite ~0.3 m/px at zoom 19. The "pre-resize" step is mentioned but not specified. Tile selection radius based on ESKF uncertainty not defined. | **Define GSD handling**: downsample camera frame to match satellite GSD. Define tile selection: ESKF position ± 3σ_horizontal → select tiles covering that area. Assemble tile mosaic for matching. | -| Satellite tile storage requirements not calculated | **Functional**: "±2km" preload mentioned but no storage estimate. At zoom 19: a 200km path with ±2km buffer requires ~~130K tiles (~~2.5GB). | **Calculate tile storage**: specify zoom level (18 preferred — 0.6m/px, 4× fewer tiles), estimate storage per mission profile, define maximum mission area by storage limit. | -| FastAPI endpoints not in solution draft | **Functional**: Endpoints only in security_analysis.md. No request/response schemas. No SSE event format. No object localization endpoint. | **Consolidate API spec in solution**: define all endpoints, SSE event schema, object localization endpoint. Reference security_analysis.md for auth. | -| cuVSLAM configuration missing (calibration, IMU params, mode) | **Functional**: No camera calibration procedure, no IMU noise parameters, no T_imu_rig extrinsic, no mode selection (Mono vs Inertial). | **Define cuVSLAM configuration**: use Inertial mode, specify required calibration data (camera intrinsics, distortion, IMU noise params from datasheet, T_imu_rig from physical measurement), define calibration procedure. | -| tech_stack.md inconsistent with draft05 | **Functional**: tech_stack.md says 3fps (should be 0.7fps), LiteSAM at 480px (should be 1280px), missing EfficientLoFTR. | **Flag for update**: tech_stack.md must be synchronized with draft05 corrections. Not addressed in this draft — separate task. | - - -## Overall Maturity Assessment - - -| Category | Maturity (1-5) | Assessment | -| ----------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------- | -| Hardware & Platform Selection | 3.5 | UAV airframe, cameras, Jetson, batteries — well-researched with specs, weight budget, endurance calculations. Ready for procurement. | -| Core Algorithm Selection | 3.0 | cuVSLAM, LiteSAM/XFeat, ESKF — components selected with comparison tables, fallback chains, decision trees. Day-one benchmarks defined. | -| AI Inference Runtime | 3.5 | TRT Engine migration thoroughly analyzed. Conversion workflows, memory savings, performance estimates. Code wrapper provided. | -| Sensor Fusion (ESKF) | 1.5 | Mentioned but not specified. No implementable detail. Blockerfor coding. | -| System Integration | 1.5 | GPS_INPUT, coordinate transforms, inter-component data flow — all under-specified. | -| Edge Cases & Resilience | 1.0 | Disconnected segments, reboot recovery, re-localization — acknowledged but no algorithms. | -| Operational Readiness | 0.5 | No pre-flight procedures, no in-flight monitoring, no failure response. | -| Security | 3.0 | Comprehensive threat model, OP-TEE analysis, LUKS, secure boot. Well-researched. | -| **Overall TRL** | **~2.5** | **Technology concept formulated + some component validation. Not implementation-ready.** | - - -The solution is at approximately **TRL 3** (proof of concept) for hardware/algorithm selection and **TRL 1-2** (basic concept) for system integration, ESKF, and operational procedures. - -## Product Solution Description - -A real-time GPS-denied visual navigation system for fixed-wing UAVs, running on a Jetson Orin Nano Super (8GB). All AI model inference uses native TensorRT Engine files. The system replaces the GPS module by sending MAVLink GPS_INPUT messages via pymavlink over UART at 5-10Hz. - -Position is determined by fusing: (1) CUDA-accelerated visual odometry (cuVSLAM in Inertial mode) from ADTI 20L V1 at 0.7 fps sustained, (2) absolute position corrections from satellite image matching (LiteSAM or XFeat — TRT Engine FP16) using keyframes from the same ADTI image stream, and (3) IMU data from the flight controller via ESKF. Viewpro A40 Pro is reserved for AI object detection only. - -The ESKF is the central state estimator with 15-state error vector. It fuses: - -- **IMU prediction** at 5-10Hz (high-frequency pose propagation) -- **cuVSLAM VO measurement** at 0.7Hz (relative pose correction) -- **Satellite matching measurement** at ~0.07-0.14Hz (absolute position correction) - -GPS_INPUT messages carry position, velocity, and accuracy derived from the ESKF state and covariance. - -**Hard constraint**: ADTI 20L V1 shoots at 0.7 fps sustained (1430ms interval). Full VO+ESKF pipeline within 400ms per frame. Satellite matching async on keyframes (every 5-10 camera frames). GPS_INPUT at 5-10Hz (ESKF IMU prediction fills gaps between camera frames). - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ OFFLINE (Before Flight) │ -│ 1. Satellite Tiles → Download & Validate → Pre-resize → Store │ -│ (Google Maps) (≥0.5m/px, <2yr) (matcher res) (GeoHash)│ -│ 2. TRT Engine Build (one-time per model version): │ -│ PyTorch model → reparameterize → ONNX export → trtexec --fp16 │ -│ Output: litesam.engine, xfeat.engine │ -│ 3. Camera + IMU calibration (one-time per hardware unit) │ -│ 4. Copy tiles + engines + calibration to Jetson storage │ -└─────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ ONLINE (During Flight) │ -│ │ -│ STARTUP: │ -│ 1. pymavlink → read GLOBAL_POSITION_INT → init ESKF state │ -│ 2. Load TRT engines + allocate GPU buffers │ -│ 3. Load camera calibration + IMU calibration │ -│ 4. Start cuVSLAM (Inertial mode) with ADTI 20L V1 │ -│ 5. Preload satellite tiles ±2km into RAM │ -│ 6. First satellite match → validate initial position │ -│ 7. Begin GPS_INPUT output loop at 5-10Hz │ -│ │ -│ EVERY CAMERA FRAME (0.7fps from ADTI 20L V1): │ -│ ┌──────────────────────────────────────┐ │ -│ │ ADTI 20L V1 → Downsample (CUDA) │ │ -│ │ → cuVSLAM VO+IMU (~9ms) │ ← CUDA Stream A │ -│ │ → ESKF VO measurement │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ 5-10Hz CONTINUOUS (IMU-driven between camera frames): │ -│ ┌──────────────────────────────────────┐ │ -│ │ IMU data → ESKF prediction │ │ -│ │ ESKF state → GPS_INPUT fields │ │ -│ │ GPS_INPUT → Flight Controller (UART) │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ KEYFRAMES (every 5-10 camera frames, async): │ -│ ┌──────────────────────────────────────┐ │ -│ │ Camera frame → GSD downsample │ │ -│ │ Select satellite tile (ESKF pos±3σ) │ │ -│ │ TRT inference (Stream B): LiteSAM/ │ │ -│ │ XFeat → correspondences │ │ -│ │ RANSAC → homography → WGS84 position │ │ -│ │ ESKF satellite measurement update │──→ Position correction │ -│ └──────────────────────────────────────┘ │ -│ │ -│ TRACKING LOSS (cuVSLAM fails — sharp turn / featureless): │ -│ ┌──────────────────────────────────────┐ │ -│ │ ESKF → IMU-only prediction (growing │ │ -│ │ uncertainty) │ │ -│ │ Satellite match on EVERY frame │ │ -│ │ On match success → ESKF reset + │ │ -│ │ cuVSLAM restart │ │ -│ │ 3 consecutive failures → operator │ │ -│ │ re-localization request │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ TELEMETRY (1Hz): │ -│ ┌──────────────────────────────────────┐ │ -│ │ NAMED_VALUE_FLOAT: confidence, drift │──→ Ground Station │ -│ └──────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -## Architecture - -### Component: ESKF Sensor Fusion (NEW — previously unspecified) - -**Error-State Kalman Filter** fusing IMU, visual odometry, and satellite matching. - -**Nominal state vector** (propagated by IMU): - - -| State | Symbol | Size | Description | -| ---------- | ------ | ---- | ------------------------------------------------ | -| Position | p | 3 | NED position relative to mission origin (meters) | -| Velocity | v | 3 | NED velocity (m/s) | -| Attitude | q | 4 | Unit quaternion (body-to-NED rotation) | -| Accel bias | b_a | 3 | Accelerometer bias (m/s²) | -| Gyro bias | b_g | 3 | Gyroscope bias (rad/s) | - - -**Error-state vector** (estimated by ESKF): δx = [δp, δv, δθ, δb_a, δb_g]ᵀ ∈ ℝ¹⁵ -where δθ ∈ so(3) is the 3D rotation error. - -**Prediction step** (IMU at 5-10Hz from flight controller): - -- Input: accelerometer a_m, gyroscope ω_m, dt -- Propagate nominal state: p += v·dt, v += (R(q)·(a_m - b_a) - g)·dt, q ⊗= Exp(ω_m - b_g)·dt -- Propagate error covariance: P = F·P·Fᵀ + Q -- F is the 15×15 error-state transition matrix (standard ESKF formulation) -- Q: process noise diagonal, initial values from IMU datasheet noise densities - -**VO measurement update** (0.7Hz from cuVSLAM): - -- cuVSLAM outputs relative pose: ΔR, Δt (camera frame) -- Transform to NED: Δp_ned = R_body_ned · T_cam_body · Δt -- Innovation: z = Δp_ned_measured - Δp_ned_predicted -- Observation matrix H_vo maps error state to relative position change -- R_vo: measurement noise, initial ~0.1-0.5m (from cuVSLAM precision at 600m+ altitude) -- Kalman update: K = P·Hᵀ·(H·P·Hᵀ + R)⁻¹, δx = K·z, P = (I - K·H)·P - -**Satellite measurement update** (0.07-0.14Hz, async): - -- Satellite matching outputs absolute position: lat_sat, lon_sat in WGS84 -- Convert to NED relative to mission origin -- Innovation: z = p_satellite - p_predicted -- H_sat = [I₃, 0, 0, 0, 0] (directly observes position) -- R_sat: measurement noise, from matching confidence (~5-20m based on RANSAC inlier ratio) -- Provides absolute position correction — bounds drift accumulation - -**Scale observability**: - -- Monocular cuVSLAM has scale ambiguity during constant-velocity flight -- Scale is constrained by: (1) satellite matching absolute positions (primary), (2) known flight altitude from barometer + predefined mission altitude, (3) IMU accelerometer during maneuvers -- During long straight segments without satellite correction, scale drift is possible. Satellite corrections every ~7-14s re-anchor scale. - -**Tuning approach**: Start with IMU datasheet noise values for Q. Start with conservative R values (high measurement noise). Tune on flight test data by comparing ESKF output to known GPS ground truth. - - -| Solution | Tools | Advantages | Limitations | Performance | Fit | -| -------------------------- | --------------- | ------------------------------------------------------------- | -------------------------------------- | ------------- | ----------- | -| Custom ESKF (Python/NumPy) | NumPy, SciPy | Full control, minimal dependencies, well-understood algorithm | Implementation effort, tuning required | <1ms per step | ✅ Selected | -| FilterPy ESKF | FilterPy v1.4.5 | Reference implementation, less code | Less flexible for multi-rate fusion | <1ms per step | ⚠️ Fallback | - - -### Component: Coordinate System & Transformations (NEW — previously undefined) - -**Reference frames**: - -- **Camera frame (C)**: origin at camera optical center, Z forward, X right, Y down (OpenCV convention) -- **Body frame (B)**: origin at UAV CG, X forward (nose), Y right (starboard), Z down -- **NED frame (N)**: North-East-Down, origin at mission start point -- **WGS84**: latitude, longitude, altitude (output format) - -**Transformation chain**: - -1. **Pixel → Camera ray**: p_cam = K⁻¹ · [u, v, 1]ᵀ where K = camera intrinsic matrix (ADTI 20L V1: fx, fy from 16mm lens + APS-C sensor) -2. **Camera → Body**: p_body = T_cam_body · p_cam where T_cam_body is the fixed mounting rotation (camera points nadir: 90° pitch rotation from body X-forward to camera Z-down) -3. **Body → NED**: p_ned = R_body_ned(q) · p_body where q is the ESKF quaternion attitude estimate -4. **NED → WGS84**: lat = lat_origin + p_north / R_earth, lon = lon_origin + p_east / (R_earth · cos(lat_origin)) where (lat_origin, lon_origin) is the mission start GPS position - -**Camera intrinsic matrix K** (ADTI 20L V1 + 16mm lens): - -- Sensor: 23.2 × 15.4 mm, Resolution: 5456 × 3632 -- fx = fy = focal_mm × width_px / sensor_width_mm = 16 × 5456 / 23.2 = 3763 pixels -- cx = 2728, cy = 1816 (sensor center) -- Distortion: Brown model (k1, k2, p1, p2 from calibration) - -**T_cam_body** (camera mount): - -- Navigation camera is fixed, pointing nadir (downward), not autostabilized -- R_cam_body = R_x(180°) · R_z(0°) (camera Z-axis aligned with body -Z, camera X with body X) -- Translation: offset from CG to camera mount (measured during assembly, typically <0.3m) - -**Satellite match → WGS84**: - -- Feature correspondences between camera frame and geo-referenced satellite tile -- Homography H maps camera pixels to satellite tile pixels -- Satellite tile pixel → WGS84 via tile's known georeference (zoom level + tile x,y → lat,lon) -- Camera center projects to satellite pixel (cx_sat, cy_sat) via H -- Convert (cx_sat, cy_sat) to WGS84 using tile georeference - -### Component: GPS_INPUT Message Population (NEW — previously undefined) - - -| GPS_INPUT Field | Source | Computation | -| ----------------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -| lat, lon | ESKF position (NED) | NED → WGS84 conversion using mission origin | -| alt | ESKF position (Down) + mission origin altitude | alt = alt_origin - p_down | -| vn, ve, vd | ESKF velocity state | Direct from ESKF v[0], v[1], v[2] | -| fix_type | Confidence tier | 3 (3D fix) when satellite-anchored (last match <30s). 2 (2D) when VO-only. 0 (no fix) when IMU-only >5s | -| hdop | ESKF horizontal covariance | hdop = sqrt(P[0,0] + P[1,1]) / 5.0 (approximate CEP→HDOP mapping) | -| vdop | ESKF vertical covariance | vdop = sqrt(P[2,2]) / 5.0 | -| horiz_accuracy | ESKF horizontal covariance | horiz_accuracy = sqrt(P[0,0] + P[1,1]) meters | -| vert_accuracy | ESKF vertical covariance | vert_accuracy = sqrt(P[2,2]) meters | -| speed_accuracy | ESKF velocity covariance | speed_accuracy = sqrt(P[3,3] + P[4,4]) m/s | -| time_week, time_week_ms | System time | Convert Unix time to GPS epoch (GPS epoch = 1980-01-06, subtract leap seconds) | -| satellites_visible | Constant | 10 (synthetic — prevents satellite-count failsafes in ArduPilot) | -| gps_id | Constant | 0 | -| ignore_flags | Constant | 0 (provide all fields) | - - -**Confidence tiers** mapping to GPS_INPUT: - - -| Tier | Condition | fix_type | horiz_accuracy | Rationale | -| ------ | ------------------------------------------------- | ---------- | ------------------------------- | -------------------------------------- | -| HIGH | Satellite match <30s ago, ESKF covariance < 400m² | 3 (3D fix) | From ESKF P (typically 5-20m) | Absolute position anchor recent | -| MEDIUM | cuVSLAM tracking OK, no recent satellite match | 3 (3D fix) | From ESKF P (typically 20-50m) | Relative tracking valid, drift growing | -| LOW | cuVSLAM lost, IMU-only | 2 (2D fix) | From ESKF P (50-200m+, growing) | Only IMU dead reckoning, rapid drift | -| FAILED | 3+ consecutive total failures | 0 (no fix) | 999.0 | System cannot determine position | - - -### Component: Disconnected Route Segment Handling (NEW — previously undefined) - -**Trigger**: cuVSLAM reports tracking_lost OR tracking confidence drops below threshold - -**Algorithm**: - -``` -STATE: TRACKING_NORMAL - cuVSLAM provides relative pose - ESKF VO measurement updates at 0.7Hz - Satellite matching on keyframes (every 5-10 frames) - -STATE: TRACKING_LOST (enter when cuVSLAM reports loss) - 1. ESKF continues with IMU-only prediction (no VO updates) - → uncertainty grows rapidly (~1-5 m/s drift with consumer IMU) - 2. Switch satellite matching to EVERY frame (not just keyframes) - → maximize chances of getting absolute correction - 3. For each camera frame: - a. Attempt satellite match using ESKF predicted position ± 3σ for tile selection - b. If match succeeds (RANSAC inlier ratio > 30%): - → ESKF measurement update with satellite position - → Restart cuVSLAM with current frame as new origin - → Transition to TRACKING_NORMAL - → Reset failure counter - c. If match fails: - → Increment failure_counter - → Continue IMU-only ESKF prediction - 4. If failure_counter >= 3: - → Send re-localization request to ground station - → GPS_INPUT fix_type = 0 (no fix), horiz_accuracy = 999.0 - → Continue attempting satellite matching on each frame - 5. If operator sends re-localization hint (approximate lat,lon): - → Use as ESKF measurement with high covariance (~500m) - → Attempt satellite match in that area - → On success: transition to TRACKING_NORMAL - -STATE: SEGMENT_DISCONNECT - After re-localization following tracking loss: - → New cuVSLAM track is independent of previous track - → ESKF maintains global NED position continuity via satellite anchor - → No need to "connect" segments at the cuVSLAM level - → ESKF already handles this: satellite corrections keep global position consistent -``` - -### Component: Satellite Image Matching Pipeline (UPDATED — added GSD + tile selection details) - -**GSD normalization**: - -- Camera GSD at 600m: ~15.9 cm/pixel (ADTI 20L V1 + 16mm) -- Satellite tile GSD at zoom 18: ~0.6 m/pixel -- Scale ratio: ~3.8:1 -- Downsample camera image to satellite GSD before matching: resize from 5456×3632 to ~1440×960 (matching zoom 18 GSD) -- This is close to LiteSAM's 1280px input — use 1280px with minor GSD mismatch acceptable for matching - -**Tile selection**: - -- Input: ESKF position estimate (lat, lon) + horizontal covariance σ_h -- Search radius: max(3·σ_h, 500m) — at least 500m to handle initial uncertainty -- Compute geohash for center position → load tiles covering the search area -- Assemble tile mosaic if needed (typically 2×2 to 4×4 tiles for adequate coverage) -- If ESKF uncertainty > 2km: tile selection unreliable, fall back to wider search or request operator input - -**Tile storage calculation** (zoom 18 — 0.6 m/pixel): - -- Each 256×256 tile covers ~153m × 153m -- Flight path 200km with ±2km buffer: area ≈ 200km × 4km = 800 km² -- Tiles needed: 800,000,000 / (153 × 153) ≈ 34,200 tiles -- Storage: ~10-15KB per JPEG tile → ~340-510 MB -- With zoom 19 overlap tiles for higher precision: ×4 = ~1.4-2.0 GB -- Recommended: zoom 18 primary + zoom 19 for ±500m along flight path → ~500-800 MB total - - -| Solution | Tools | Advantages | Limitations | Performance (est. Orin Nano Super TRT FP16) | Params | Fit | -| -------------------------------------- | ------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------ | ------------------------------------------- | ------ | ------------------------------- | -| LiteSAM (opt) TRT Engine FP16 @ 1280px | trtexec + tensorrt Python | Best satellite-aerial accuracy (RMSE@30=17.86m UAV-VisLoc), 6.31M params | MinGRU TRT export needs verification (LOW-MEDIUM risk) | Est. ~165-330ms | 6.31M | ✅ Primary | -| EfficientLoFTR TRT Engine FP16 | trtexec + tensorrt Python | Proven TRT path (Coarse_LoFTR_TRT). Semi-dense. CVPR 2024. | 2.4x more params than LiteSAM. | Est. ~200-400ms | 15.05M | ✅ Fallback if LiteSAM TRT fails | -| XFeat TRT Engine FP16 | trtexec + tensorrt Python | Fastest. Proven TRT implementation. | General-purpose, not designed for cross-view gap. | Est. ~50-100ms | <5M | ✅ Speed fallback | - - -### Component: cuVSLAM Configuration (NEW — previously undefined) - -**Mode**: Inertial (mono camera + IMU) - -**Camera configuration** (ADTI 20L V1 + 16mm lens): - -- Model: Brown distortion -- fx = fy = 3763 px (16mm on 23.2mm sensor at 5456px width) -- cx = 2728 px, cy = 1816 px -- Distortion coefficients: from calibration (k1, k2, p1, p2) -- Border: 50px (ignore lens edge distortion) - -**IMU configuration** (Pixhawk 6x IMU — ICM-42688-P): - -- Gyroscope noise density: 3.0 × 10⁻³ °/s/√Hz -- Gyroscope random walk: 5.0 × 10⁻⁵ °/s²/√Hz -- Accelerometer noise density: 70 µg/√Hz -- Accelerometer random walk: ~2.0 × 10⁻³ m/s³/√Hz -- IMU frequency: 200 Hz (from flight controller via MAVLink) -- T_imu_rig: measured transformation from Pixhawk IMU to camera center (translation + rotation) - -**cuVSLAM settings**: - -- OdometryMode: INERTIAL -- MulticameraMode: PRECISION (favor accuracy over speed — we have 1430ms budget) -- Input resolution: downsample to 1280×852 (or 720p) for processing speed -- async_bundle_adjustment: True - -**Initialization**: - -- cuVSLAM initializes automatically when it receives the first camera frame + IMU data -- First few frames used for feature initialization and scale estimation -- First satellite match validates and corrects the initial position - -**Calibration procedure** (one-time per hardware unit): - -1. Camera intrinsics: checkerboard calibration with OpenCV (or use manufacturer data if available) -2. Camera-IMU extrinsic (T_imu_rig): Kalibr tool with checkerboard + IMU data -3. IMU noise parameters: Allan variance analysis or use datasheet values -4. Store calibration files on Jetson storage - -### Component: AI Model Inference Runtime (UNCHANGED) - -Native TRT Engine — optimal performance and memory on fixed NVIDIA hardware. See draft05 for full comparison table and conversion workflow. - -### Component: Visual Odometry (UNCHANGED) - -cuVSLAM in Inertial mode, fed by ADTI 20L V1 at 0.7 fps sustained. See draft05 for feasibility analysis at 0.7fps. - -### Component: Flight Controller Integration (UPDATED — added GPS_INPUT field spec) - -pymavlink over UART at 5-10Hz. GPS_INPUT field population defined above. - -ArduPilot configuration: - -- GPS1_TYPE = 14 (MAVLink) -- GPS_RATE = 5 (minimum, matching our 5-10Hz output) -- EK3_SRC1_POSXY = 1 (GPS), EK3_SRC1_VELXY = 1 (GPS) — EKF uses GPS_INPUT as position/velocity source - -### Component: Object Localization (NEW — previously undefined) - -**Input**: pixel coordinates (u, v) in Viewpro A40 Pro image, current gimbal angles (pan_deg, tilt_deg), zoom factor, UAV position from GPS-denied system, UAV altitude - -**Process**: - -1. Pixel → camera ray: ray_cam = K_viewpro⁻¹(zoom) · [u, v, 1]ᵀ -2. Camera → gimbal frame: ray_gimbal = R_gimbal(pan, tilt) · ray_cam -3. Gimbal → body: ray_body = T_gimbal_body · ray_gimbal -4. Body → NED: ray_ned = R_body_ned(q) · ray_body -5. Ray-ground intersection: assuming flat terrain at UAV altitude h: t = -h / ray_ned[2], p_ground_ned = p_uav_ned + t · ray_ned -6. NED → WGS84: convert to lat, lon - -**Output**: { lat, lon, accuracy_m, confidence } - -- accuracy_m propagated from: UAV position accuracy (from ESKF) + gimbal angle uncertainty + altitude uncertainty - -**API endpoint**: POST /objects/locate - -- Request: { pixel_x, pixel_y, gimbal_pan_deg, gimbal_tilt_deg, zoom_factor } -- Response: { lat, lon, alt, accuracy_m, confidence, uav_position: {lat, lon, alt}, timestamp } - -### Component: Startup, Handoff & Failsafe (UPDATED — added handoff + reboot + re-localization) - -**GPS-denied handoff protocol**: - -- GPS-denied system runs continuously from companion computer boot -- Reads initial position from FC (GLOBAL_POSITION_INT) — this may be real GPS or last known -- First satellite match validates the initial position -- FC receives both real GPS (if available) and GPS_INPUT; FC EKF selects best source based on accuracy -- No explicit "switch" — the GPS-denied system is a secondary GPS source - -**Startup sequence** (expanded from draft05): - -1. Boot Jetson → start GPS-Denied service (systemd) -2. Connect to flight controller via pymavlink on UART -3. Wait for heartbeat -4. Initialize PyCUDA context -5. Load TRT engines: litesam.engine + xfeat.engine (~1-3s each) -6. Allocate GPU I/O buffers -7. Create CUDA streams: Stream A (cuVSLAM), Stream B (satellite matching) -8. Load camera calibration + IMU calibration files -9. Read GLOBAL_POSITION_INT → set mission origin (NED reference point) → init ESKF -10. Start cuVSLAM (Inertial mode) with ADTI 20L V1 camera stream -11. Preload satellite tiles within ±2km into RAM -12. Trigger first satellite match → validate initial position -13. Begin GPS_INPUT output loop at 5-10Hz -14. System ready - -**Mid-flight reboot recovery**: - -1. Jetson boots (~30-60s) -2. GPS-Denied service starts, connects to FC -3. Read GLOBAL_POSITION_INT (FC's current IMU-extrapolated position) -4. Init ESKF with this position + HIGH uncertainty covariance (σ = 200m) -5. Load TRT engines (~2-6s total) -6. Start cuVSLAM (fresh, no prior map) -7. Immediate satellite matching on first camera frame -8. On satellite match success: ESKF corrected, uncertainty drops -9. Estimated total recovery: ~35-70s -10. During recovery: FC uses IMU-only dead reckoning (at 70 km/h: ~700-1400m uncontrolled drift) -11. **Known limitation**: recovery time is dominated by Jetson boot time - -**3-consecutive-failure re-localization**: - -- Trigger: VO lost + satellite match failed × 3 consecutive camera frames -- Action: send re-localization request via MAVLink STATUSTEXT or custom message -- Message content: "RELOC_REQ: last_lat={lat} last_lon={lon} uncertainty={σ}m" -- Operator response: MAVLink COMMAND_LONG with approximate lat/lon -- System: use operator position as ESKF measurement with R = diag(500², 500², 100²) meters² -- System continues satellite matching with updated search area -- While waiting: GPS_INPUT fix_type=0, IMU-only ESKF prediction continues - -### Component: Ground Station Telemetry (UPDATED — added re-localization) - -MAVLink messages to ground station: - - -| Message | Rate | Content | -| ----------------------------- | -------- | --------------------------------------------------- | -| NAMED_VALUE_FLOAT "gps_conf" | 1Hz | Confidence score (0.0-1.0) | -| NAMED_VALUE_FLOAT "gps_drift" | 1Hz | Estimated drift from last satellite anchor (meters) | -| NAMED_VALUE_FLOAT "gps_hacc" | 1Hz | Horizontal accuracy (meters, from ESKF) | -| STATUSTEXT | On event | "RELOC_REQ: ..." for re-localization request | -| STATUSTEXT | On event | Tracking loss / recovery notifications | - - -### Component: Thermal Management (UNCHANGED) - -Same adaptive pipeline from draft05. Active cooling required at 25W. Throttling at 80°C SoC junction. - -### Component: API & Inter-System Communication (NEW — consolidated) - -FastAPI (Uvicorn) running locally on Jetson for inter-process communication with other onboard systems. - - -| Endpoint | Method | Purpose | Auth | -| --------------------- | --------- | -------------------------------------- | ---- | -| /sessions | POST | Start GPS-denied session | JWT | -| /sessions/{id}/stream | GET (SSE) | Real-time position + confidence stream | JWT | -| /sessions/{id}/anchor | POST | Operator re-localization hint | JWT | -| /sessions/{id} | DELETE | End session | JWT | -| /objects/locate | POST | Object GPS from pixel coordinates | JWT | -| /health | GET | System health + memory + thermal | None | - - -**SSE event schema** (1Hz): - -```json -{ - "type": "position", - "timestamp": "2026-03-17T12:00:00.000Z", - "lat": 48.123456, - "lon": 37.654321, - "alt": 600.0, - "accuracy_h": 15.2, - "accuracy_v": 8.1, - "confidence": "HIGH", - "drift_from_anchor": 12.5, - "vo_status": "tracking", - "last_satellite_match_age_s": 8.3 -} -``` - -## UAV Platform - -Unchanged from draft05. See draft05 for: airframe configuration (3.5m S-2 composite, 12.5kg AUW), flight performance (3.4h endurance at 50 km/h), camera specifications (ADTI 20L V1 + 16mm, Viewpro A40 Pro), ground coverage calculations. - -## Speed Optimization Techniques - -Unchanged from draft05. Key points: cuVSLAM ~9ms/frame, native TRT Engine (no ONNX RT), dual CUDA streams, 5-10Hz GPS_INPUT from ESKF IMU prediction. - -## Processing Time Budget - -Unchanged from draft05. VO frame: ~17-22ms. Satellite matching: ≤210ms async. Well within 1430ms frame interval. - -## Memory Budget (Jetson Orin Nano Super, 8GB shared) - - -| Component | Memory | Notes | -| ------------------------- | -------------- | ------------------------------------------- | -| OS + runtime | ~1.5GB | JetPack 6.2 + Python | -| cuVSLAM | ~200-500MB | CUDA library + map | -| LiteSAM TRT engine | ~50-80MB | If LiteSAM fails: EfficientLoFTR ~100-150MB | -| XFeat TRT engine | ~30-50MB | | -| Preloaded satellite tiles | ~200MB | ±2km of flight plan | -| pymavlink + MAVLink | ~20MB | | -| FastAPI (local IPC) | ~50MB | | -| ESKF + buffers | ~10MB | | -| **Total** | **~2.1-2.9GB** | **26-36% of 8GB** | - - -## Key Risks and Mitigations - - -| Risk | Likelihood | Impact | Mitigation | -| ------------------------------------------------------- | ---------- | ----------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| LiteSAM MinGRU ops unsupported in TRT 10.3 | LOW-MEDIUM | LiteSAM TRT export fails | Day-one verification. Fallback: EfficientLoFTR TRT → XFeat TRT. | -| cuVSLAM fails on low-texture terrain at 0.7fps | HIGH | Frequent tracking loss | Satellite matching corrections bound drift. Re-localization pipeline handles tracking loss. IMU bridges short gaps. | -| Google Maps satellite quality in conflict zone | HIGH | Satellite matching fails, outdated imagery | Pre-flight tile validation. Consider alternative providers (Bing, Mapbox). Robust to seasonal appearance changes via feature-based matching. | -| ESKF scale drift during long constant-velocity segments | MEDIUM | Position error exceeds 100m between satellite anchors | Satellite corrections every 7-14s re-anchor. Altitude constraint from barometer. Monitor drift rate — if >50m between corrections, increase satellite matching frequency. | -| Monocular scale ambiguity | MEDIUM | Metric scale lost during constant-velocity flight | Satellite absolute corrections provide scale. Known altitude constrains vertical scale. IMU acceleration during turns provides observability. | -| AUW exceeds AT4125 recommended range | MEDIUM | Reduced endurance, motor thermal stress | 12.5 kg vs 8-10 kg recommended. Monitor motor temps. Weight optimization. | -| ADTI mechanical shutter lifespan | MEDIUM | Replacement needed periodically | ~8,800 actuations/flight at 0.7fps. Estimated 11-57 flights before replacement. Budget as consumable. | -| Mid-flight companion computer failure | LOW | ~35-70s position gap | Reboot recovery procedure defined. FC uses IMU dead reckoning during gap. Known limitation. | -| Thermal throttling on Jetson | MEDIUM | Satellite matching latency increases | Active cooling required. Monitor SoC temp. Throttling at 80°C. Our workload ~8-15W typical — well under 25W TDP. | -| Engine incompatibility after JetPack update | MEDIUM | Must rebuild engines | Include engine rebuild in update procedure. | -| TRT engine build OOM on 8GB | LOW | Cannot build on target | Models small (6.31M, <5M). Reduce --memPoolSize if needed. | - - -## Testing Strategy - -### Integration / Functional Tests - -- **ESKF correctness**: Feed recorded IMU + synthetic VO/satellite data → verify output matches reference ESKF implementation -- **GPS_INPUT field validation**: Send GPS_INPUT to SITL ArduPilot → verify EKF accepts and uses the data correctly -- **Coordinate transform chain**: Known GPS → NED → pixel → back to GPS — verify round-trip error <0.1m -- **Disconnected segment handling**: Simulate tracking loss → verify satellite re-localization triggers → verify cuVSLAM restarts → verify ESKF position continuity -- **3-consecutive-failure**: Simulate VO + satellite failures → verify re-localization request sent → verify operator hint accepted -- **Object localization**: Known object at known GPS → verify computed GPS matches within camera accuracy -- **Mid-flight reboot**: Kill GPS-denied process → restart → verify recovery within expected time → verify position accuracy after recovery -- **TRT engine load test**: Verify engines load successfully on Jetson -- **TRT inference correctness**: Compare TRT output vs PyTorch reference (max L1 error < 0.01) -- **CUDA Stream pipelining**: Verify Stream B satellite matching does not block Stream A VO -- **ADTI sustained capture rate**: Verify 0.7fps sustained >30 min without buffer overflow -- **Confidence tier transitions**: Verify fix_type and accuracy change correctly across HIGH → MEDIUM → LOW → FAILED transitions - -### Non-Functional Tests - -- **End-to-end accuracy** (primary validation): Fly with real GPS recording → run GPS-denied system in parallel → compare estimated vs real positions → verify 80% within 50m, 60% within 20m -- **VO drift rate**: Measure cuVSLAM drift over 1km straight segment without satellite correction -- **Satellite matching accuracy**: Compare satellite-matched position vs real GPS at known locations -- **Processing time**: Verify end-to-end per-frame <400ms -- **Memory usage**: Monitor over 30-min session → verify <8GB, no leaks -- **Thermal**: Sustained 30-min run → verify no throttling -- **GPS_INPUT rate**: Verify consistent 5-10Hz delivery to FC -- **Tile storage**: Validate calculated storage matches actual for test mission area -- **MinGRU TRT compatibility** (day-one blocker): Clone LiteSAM → ONNX export → polygraphy → trtexec -- **Flight endurance**: Ground-test full system power draw against 267W estimate - -## References - -- ArduPilot GPS_RATE parameter: [https://github.com/ArduPilot/ardupilot/pull/15980](https://github.com/ArduPilot/ardupilot/pull/15980) -- MAVLink GPS_INPUT message: [https://ardupilot.org/mavproxy/docs/modules/GPSInput.html](https://ardupilot.org/mavproxy/docs/modules/GPSInput.html) -- pymavlink GPS_INPUT example: [https://webperso.ensta.fr/lebars/Share/GPS_INPUT_pymavlink.py](https://webperso.ensta.fr/lebars/Share/GPS_INPUT_pymavlink.py) -- ESKF reference (fixed-wing UAV): [https://github.com/ludvigls/ESKF](https://github.com/ludvigls/ESKF) -- ROS ESKF multi-sensor: [https://github.com/EliaTarasov/ESKF](https://github.com/EliaTarasov/ESKF) -- Range-VIO scale observability: [https://arxiv.org/abs/2103.15215](https://arxiv.org/abs/2103.15215) -- NaviLoc trajectory-level localization: [https://www.mdpi.com/2504-446X/10/2/97](https://www.mdpi.com/2504-446X/10/2/97) -- SatLoc-Fusion hierarchical framework: [https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f](https://www.scilit.com/publications/e5cafaf875a49297a62b298a89d5572f) -- Auterion GPS-denied workflow: [https://docs.auterion.com/vehicle-operation/auterion-mission-control/useful-resources/operations/gps-denied-workflow](https://docs.auterion.com/vehicle-operation/auterion-mission-control/useful-resources/operations/gps-denied-workflow) -- PX4 GNSS-denied flight: [https://docs.px4.io/main/en/advanced_config/gnss_degraded_or_denied_flight.html](https://docs.px4.io/main/en/advanced_config/gnss_degraded_or_denied_flight.html) -- ArduPilot GPS_INPUT advanced usage: [https://discuss.ardupilot.org/t/advanced-usage-of-gps-type-mav-14/99406](https://discuss.ardupilot.org/t/advanced-usage-of-gps-type-mav-14/99406) -- Google Maps Ukraine imagery: [https://newsukraine.rbc.ua/news/google-maps-has-surprise-for-satellite-imagery-1727182380.html](https://newsukraine.rbc.ua/news/google-maps-has-surprise-for-satellite-imagery-1727182380.html) -- Jetson Orin Nano Super thermal: [https://edgeaistack.app/blog/jetson-orin-nano-power-consumption/](https://edgeaistack.app/blog/jetson-orin-nano-power-consumption/) -- GSD matching research: [https://www.kjrs.org/journal/view.html?pn=related&uid=756&vmd=Full](https://www.kjrs.org/journal/view.html?pn=related&uid=756&vmd=Full) -- VO+satellite matching pipeline: [https://polen.itu.edu.tr/items/1fe1e872-7cea-44d8-a8de-339e4587bee6](https://polen.itu.edu.tr/items/1fe1e872-7cea-44d8-a8de-339e4587bee6) -- PyCuVSLAM docs: [https://wiki.seeedstudio.com/pycuvslam_recomputer_robotics/](https://wiki.seeedstudio.com/pycuvslam_recomputer_robotics/) -- Pixhawk 6x IMU (ICM-42688-P) datasheet: [https://invensense.tdk.com/products/motion-tracking/6-axis/icm-42688-p/](https://invensense.tdk.com/products/motion-tracking/6-axis/icm-42688-p/) -- All references from solution_draft05.md - -## Related Artifacts - -- AC Assessment: `_docs/00_research/gps_denied_nav/00_ac_assessment.md` -- Completeness assessment research: `_docs/00_research/solution_completeness_assessment/` -- Previous research: `_docs/00_research/trt_engine_migration/` -- Tech stack evaluation: `_docs/01_solution/tech_stack.md` (needs sync with draft05 corrections) -- Security analysis: `_docs/01_solution/security_analysis.md` -- Previous draft: `_docs/01_solution/solution_draft05.md` - diff --git a/_docs/01_solution/tech_stack.md b/_docs/01_solution/tech_stack.md deleted file mode 100644 index 51c50ad..0000000 --- a/_docs/01_solution/tech_stack.md +++ /dev/null @@ -1,257 +0,0 @@ -# Tech Stack Evaluation - -## Requirements Summary - -### Functional -- GPS-denied visual navigation for fixed-wing UAV -- Frame-center GPS estimation via VO + satellite matching + IMU fusion -- Object-center GPS via geometric projection -- Real-time streaming via REST API + SSE -- Disconnected route segment handling -- User-input fallback for unresolvable frames - -### Non-Functional -- <400ms per-frame processing (camera @ ~3fps) -- <50m accuracy for 80% of frames, <20m for 60% -- <8GB total memory (CPU+GPU shared pool) -- Up to 3000 frames per flight session -- Image Registration Rate >95% (normal segments) - -### Hardware Constraints -- **Jetson Orin Nano Super** (8GB LPDDR5, 1024 CUDA cores, 67 TOPS INT8) -- **JetPack 6.2.2**: CUDA 12.6.10, TensorRT 10.3.0, cuDNN 9.3 -- ARM64 (aarch64) architecture -- No internet connectivity during flight - -## Technology Evaluation - -### Platform & OS - -| Option | Version | Score (1-5) | Notes | -|--------|---------|-------------|-------| -| **JetPack 6.2.2 (L4T)** | Ubuntu 22.04 based | **5** | Only supported OS for Orin Nano Super. Includes CUDA 12.6, TensorRT 10.3, cuDNN 9.3 | - -**Selected**: JetPack 6.2.2 — no alternative. - -### Primary Language - -| Option | Fitness | Maturity | Perf on Jetson | Ecosystem | Score | -|--------|---------|----------|----------------|-----------|-------| -| **Python 3.10+** | 5 | 5 | 4 | 5 | **4.8** | -| C++ | 5 | 5 | 5 | 3 | 4.5 | -| Rust | 3 | 3 | 5 | 2 | 3.3 | - -**Selected**: **Python 3.10+** as primary language. -- cuVSLAM provides Python bindings (PyCuVSLAM v15.0.0) -- TensorRT has Python API -- FastAPI is Python-native -- OpenCV has full Python+CUDA bindings -- Performance-critical paths offloaded to CUDA via cuVSLAM/TensorRT — Python is glue code only -- C++ for custom ESKF if NumPy proves too slow (unlikely for 16-state EKF at 100Hz) - -### Visual Odometry - -| Option | Version | FPS on Orin Nano | Memory | License | Score | -|--------|---------|------------------|--------|---------|-------| -| **cuVSLAM (PyCuVSLAM)** | v15.0.0 (Mar 2026) | 116fps @ 720p | ~200-300MB | Free (NVIDIA, closed-source) | **5** | -| XFeat frame-to-frame | TensorRT engine | ~30-50ms/frame | ~50MB | MIT | 3.5 | -| ORB-SLAM3 | v1.0 | ~30fps | ~300MB | GPLv3 | 2.5 | - -**Selected**: **PyCuVSLAM v15.0.0** -- 116fps on Orin Nano 8G at 720p (verified via Intermodalics benchmark) -- Mono + IMU mode natively supported -- Auto IMU fallback on tracking loss -- Pre-built aarch64 wheel: `pip install -e bin/aarch64` -- Loop closure built-in - -**Risk**: Closed-source; nadir-only camera not explicitly tested. **Fallback**: XFeat frame-to-frame matching. - -### Satellite Image Matching (Benchmark-Driven Selection) - -**Day-one benchmark decides between two candidates:** - -| Option | Params | Accuracy (UAV-VisLoc) | Est. Time on Orin Nano | License | Score | -|--------|--------|----------------------|----------------------|---------|-------| -| **LiteSAM (opt)** | 6.31M | RMSE@30 = 17.86m | ~300-500ms @ 480px (estimated) | Open-source | **4** (if fast enough) | -| **XFeat semi-dense** | ~5M | Not benchmarked on UAV-VisLoc | ~50-100ms | MIT | **4** (if LiteSAM too slow) | - -**Decision rule**: -1. Export LiteSAM (opt) to TensorRT FP16 on Orin Nano Super -2. Benchmark at 480px, 640px, 800px -3. If ≤400ms at 480px → LiteSAM -4. If >400ms → **abandon LiteSAM, XFeat is primary** - -| Requirement | LiteSAM (opt) | XFeat semi-dense | -|-------------|---------------|------------------| -| PyTorch → ONNX → TensorRT export | Required | Required | -| TensorRT FP16 engine | 6.31M params, ~25MB engine | ~5M params, ~20MB engine | -| Input preprocessing | Resize to 480px, normalize | Resize to 640px, normalize | -| Matching pipeline | End-to-end (detect + match + refine) | Detect → KNN match → geometric verify | -| Cross-view robustness | Designed for satellite-aerial gap | General-purpose, less robust | - -### Sensor Fusion - -| Option | Complexity | Accuracy | Compute @ 100Hz | Score | -|--------|-----------|----------|-----------------|-------| -| **ESKF (custom)** | Low | Good | <1ms/step | **5** | -| Hybrid ESKF/UKF | Medium | 49% better | ~2-3ms/step | 3.5 | -| GTSAM Factor Graph | High | Best | ~10-50ms/step | 2 | - -**Selected**: **Custom ESKF in Python (NumPy/SciPy)** -- 16-state vector, well within NumPy capability -- FilterPy (v1.4.5, MIT) as reference/fallback, but custom implementation preferred for tighter control -- If 100Hz IMU prediction step proves slow in Python: rewrite as Cython or C extension (~1 day effort) - -### Image Preprocessing - -| Option | Tool | Time on Orin Nano | Notes | Score | -|--------|------|-------------------|-------|-------| -| **OpenCV CUDA resize** | cv2.cuda.resize | ~2-3ms (pre-allocated) | Must build OpenCV with CUDA from source. Pre-allocate GPU mats to avoid allocation overhead | **4** | -| NVIDIA VPI resize | VPI 3.2 | ~1-2ms | Part of JetPack, potentially faster | 4 | -| CPU resize (OpenCV) | cv2.resize | ~5-10ms | No GPU needed, simpler | 3 | - -**Selected**: **OpenCV CUDA** (pre-allocated GPU memory) or **VPI 3.2** (whichever is faster in benchmark). Both available in JetPack 6.2. -- Must build OpenCV from source with `CUDA_ARCH_BIN=8.7` for Orin Nano Ampere architecture -- Alternative: VPI 3.2 is pre-installed in JetPack 6.2, no build step needed - -### API & Streaming Framework - -| Option | Version | Async Support | SSE Support | Score | -|--------|---------|--------------|-------------|-------| -| **FastAPI + sse-starlette** | FastAPI 0.115+, sse-starlette 3.3.2 | Native async/await | EventSourceResponse with auto-disconnect | **5** | -| Flask + flask-sse | Flask 3.x | Limited | Redis dependency | 2 | -| Raw aiohttp | aiohttp 3.x | Full | Manual SSE implementation | 3 | - -**Selected**: **FastAPI + sse-starlette v3.3.2** -- sse-starlette: 108M downloads/month, BSD-3 license, production-stable -- Auto-generated OpenAPI docs -- Native async for non-blocking VO + satellite pipeline -- Uvicorn as ASGI server - -### Satellite Tile Storage & Indexing - -| Option | Complexity | Lookup Speed | Score | -|--------|-----------|-------------|-------| -| **GeoHash-indexed directory** | Low | O(1) hash lookup | **5** | -| SQLite + spatial index | Medium | O(log n) | 4 | -| PostGIS | High | O(log n) | 2 (overkill) | - -**Selected**: **GeoHash-indexed directory structure** -- Pre-flight: download tiles, store as `{geohash}/{zoom}_{x}_{y}.jpg` + `{geohash}/{zoom}_{x}_{y}_resized.jpg` -- Runtime: compute geohash from ESKF position → direct directory lookup -- Metadata in JSON sidecar files -- No database dependency on the Jetson during flight - -### Satellite Tile Provider - -| Provider | Max Zoom | GSD | Pricing | Eastern Ukraine Coverage | Score | -|----------|----------|-----|---------|--------------------------|-------| -| **Google Maps Tile API** | 18-19 | ~0.3-0.5 m/px | 100K tiles free/month, then $0.48/1K | Partial (conflict zone gaps) | **4** | -| Bing Maps | 18-19 | ~0.3-0.5 m/px | 125K free/year (basic) | Similar | 3.5 | -| Mapbox Satellite | 18-19 | ~0.5 m/px | 200K free/month | Similar | 3.5 | - -**Selected**: **Google Maps Tile API** (per restrictions.md). 100K free tiles/month covers ~25km² at zoom 19. For larger operational areas, costs are manageable at $0.48/1K tiles. - -### Output Format - -| Format | Standard | Tooling | Score | -|--------|----------|---------|-------| -| **GeoJSON** | RFC 7946 | Universal GIS support | **5** | -| CSV (lat, lon, confidence) | De facto | Simple, lightweight | 4 | - -**Selected**: **GeoJSON** as primary, CSV as export option. Per AC: WGS84 coordinates. - -## Tech Stack Summary - -``` -┌────────────────────────────────────────────────────┐ -│ HARDWARE: Jetson Orin Nano Super 8GB │ -│ OS: JetPack 6.2.2 (L4T / Ubuntu 22.04) │ -│ CUDA 12.6.10 / TensorRT 10.3.0 / cuDNN 9.3 │ -├────────────────────────────────────────────────────┤ -│ LANGUAGE: Python 3.10+ │ -│ FRAMEWORK: FastAPI + sse-starlette 3.3.2 │ -│ SERVER: Uvicorn (ASGI) │ -├────────────────────────────────────────────────────┤ -│ VISUAL ODOMETRY: PyCuVSLAM v15.0.0 │ -│ SATELLITE MATCH: LiteSAM(opt) or XFeat (benchmark) │ -│ SENSOR FUSION: Custom ESKF (NumPy/SciPy) │ -│ PREPROCESSING: OpenCV CUDA or VPI 3.2 │ -│ INFERENCE: TensorRT 10.3.0 (FP16) │ -├────────────────────────────────────────────────────┤ -│ TILE PROVIDER: Google Maps Tile API │ -│ TILE STORAGE: GeoHash-indexed directory │ -│ OUTPUT: GeoJSON (WGS84) via SSE stream │ -└────────────────────────────────────────────────────┘ -``` - -## Dependency List - -### Python Packages (pip) - -| Package | Version | Purpose | -|---------|---------|---------| -| pycuvslam | v15.0.0 (aarch64 wheel) | Visual odometry | -| fastapi | >=0.115 | REST API framework | -| sse-starlette | >=3.3.2 | SSE streaming | -| uvicorn | >=0.30 | ASGI server | -| numpy | >=1.26 | ESKF math, array ops | -| scipy | >=1.12 | Rotation matrices, spatial transforms | -| opencv-python (CUDA build) | >=4.8 | Image preprocessing (must build from source with CUDA) | -| torch (aarch64) | >=2.3 (JetPack-compatible) | LiteSAM model loading (if selected) | -| tensorrt | 10.3.0 (JetPack bundled) | Inference engine | -| pycuda | >=2024.1 | CUDA stream management | -| geojson | >=3.1 | GeoJSON output formatting | -| pygeohash | >=1.2 | GeoHash tile indexing | - -### System Dependencies (JetPack 6.2.2) - -| Component | Version | Notes | -|-----------|---------|-------| -| CUDA Toolkit | 12.6.10 | Pre-installed | -| TensorRT | 10.3.0 | Pre-installed | -| cuDNN | 9.3 | Pre-installed | -| VPI | 3.2 | Pre-installed, alternative to OpenCV CUDA for resize | -| cuVSLAM runtime | Bundled with PyCuVSLAM wheel | | - -### Offline Preprocessing Tools (developer machine, not Jetson) - -| Tool | Purpose | -|------|---------| -| Python 3.10+ | Tile download script | -| Google Maps Tile API key | Satellite tile access | -| torch + LiteSAM weights | Feature pre-extraction (if LiteSAM selected) | -| trtexec (TensorRT) | Model export to TensorRT engine | - -## Risk Assessment - -| Technology | Risk | Likelihood | Impact | Mitigation | -|-----------|------|-----------|--------|------------| -| cuVSLAM | Closed-source, nadir camera untested | Medium | High | XFeat frame-to-frame as open-source fallback | -| LiteSAM | May exceed 400ms on Orin Nano Super | High | High | **Abandon for XFeat** — day-one benchmark is go/no-go | -| OpenCV CUDA build | Build complexity on Jetson, CUDA arch compatibility | Medium | Low | VPI 3.2 as drop-in alternative (pre-installed) | -| Google Maps Tile API | Conflict zone coverage gaps, EEA restrictions | Medium | Medium | Test tile availability for operational area pre-flight; alternative providers (Bing, Mapbox) | -| Custom ESKF | Implementation bugs, tuning effort | Low | Medium | FilterPy v1.4.5 as reference; well-understood algorithm | -| Python GIL | Concurrent VO + satellite matching contention | Low | Low | CUDA operations release GIL; use asyncio + threading for I/O | - -## Learning Requirements - -| Technology | Team Expertise Needed | Ramp-up Time | -|-----------|----------------------|--------------| -| PyCuVSLAM | SLAM concepts, Python API, camera calibration | 2-3 days | -| TensorRT model export | ONNX export, trtexec, FP16 optimization | 2-3 days | -| LiteSAM architecture | Transformer-based matching (if selected) | 1-2 days | -| XFeat | Feature detection/matching concepts | 1 day | -| ESKF | Kalman filtering, quaternion math, multi-rate fusion | 3-5 days | -| FastAPI + SSE | Async Python, ASGI, SSE protocol | 1 day | -| GeoHash spatial indexing | Geospatial concepts | 0.5 days | -| Jetson deployment | JetPack, power modes, thermal management | 2-3 days | - -## Development Environment - -| Environment | Purpose | Setup | -|-------------|---------|-------| -| **Developer machine** (x86_64, GPU) | Development, unit testing, model export | Docker with CUDA + TensorRT | -| **Jetson Orin Nano Super** | Integration testing, benchmarking, deployment | JetPack 6.2.2 flashed, SSH access | - -Code should be developed and unit-tested on x86_64, then deployed to Jetson for integration/performance testing. cuVSLAM and TensorRT engines are aarch64-only — mock these in x86_64 tests. diff --git a/_docs/02_document/tests/blackbox-tests.md b/_docs/02_document/tests/blackbox-tests.md deleted file mode 100644 index 683f886..0000000 --- a/_docs/02_document/tests/blackbox-tests.md +++ /dev/null @@ -1,503 +0,0 @@ -# Blackbox Tests - -## Positive Scenarios - -### FT-P-01: End-to-End Position Accuracy — 50m Threshold - -**Summary**: Validate that ≥80% of frame positions are within 50m of ground truth GPS across a full 60-frame flight sequence. -**Traces to**: AC-01 (80% within 50m) -**Category**: Position Accuracy - -**Preconditions**: -- System running with SITL ArduPilot (GPS_TYPE=14) -- Camera replay serving flight-sequence-60 at 0.7fps -- Satellite tiles for test area loaded -- System has completed startup (first satellite match done) - -**Input data**: flight-sequence-60 (60 frames), coordinates.csv (ground truth), position_accuracy.csv (thresholds) - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Start session via POST /sessions | HTTP 201 with session ID | -| 2 | Subscribe to SSE stream GET /sessions/{id}/stream | SSE events begin at ~1Hz | -| 3 | Wait for camera-replay to complete all 60 frames (~86s at 0.7fps) | Position events for each processed frame | -| 4 | Collect all position events with lat/lon | 60 position estimates (some frames may have multiple updates) | -| 5 | For each frame: compute haversine distance between estimated and ground truth position | Distance array | -| 6 | Count frames where distance < 50m, compute percentage | ≥80% | - -**Expected outcome**: ≥48 of 60 frames have position error < 50m from ground truth in coordinates.csv -**Max execution time**: 120s - ---- - -### FT-P-02: End-to-End Position Accuracy — 20m Threshold - -**Summary**: Validate that ≥60% of frame positions are within 20m of ground truth GPS. -**Traces to**: AC-02 (60% within 20m) -**Category**: Position Accuracy - -**Preconditions**: Same as FT-P-01 - -**Input data**: flight-sequence-60, coordinates.csv, position_accuracy.csv - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Reuse position data from FT-P-01 run (or re-run) | 60 position estimates | -| 2 | Count frames where distance < 20m, compute percentage | ≥60% | - -**Expected outcome**: ≥36 of 60 frames have position error < 20m -**Max execution time**: 120s (shared with FT-P-01) - ---- - -### FT-P-03: No Single Frame Exceeds Maximum Error - -**Summary**: Validate that no individual frame position estimate exceeds 100m error. -**Traces to**: AC-01, AC-02 (implicit: no catastrophic outliers) -**Category**: Position Accuracy - -**Preconditions**: Same as FT-P-01 - -**Input data**: flight-sequence-60, coordinates.csv, position_accuracy.csv - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Reuse position data from FT-P-01 | 60 position estimates | -| 2 | Find max error across all frames | max(distances) ≤ 100m | - -**Expected outcome**: Maximum position error across all 60 frames ≤ 100m -**Max execution time**: 120s (shared with FT-P-01) - ---- - -### FT-P-04: VO Drift Between Satellite Anchors - -**Summary**: Validate cumulative VO drift stays below 100m between consecutive satellite correction events. -**Traces to**: AC-03 (drift < 100m between anchors) -**Category**: Position Accuracy - -**Preconditions**: Same as FT-P-01; satellite matching active on keyframes - -**Input data**: flight-sequence-60 SSE stream (includes drift_from_anchor field) - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Subscribe to SSE stream | Events with drift_from_anchor field | -| 2 | Record drift_from_anchor values over the full sequence | Array of drift values | -| 3 | Find maximum drift_from_anchor value | max(drift) < 100m | - -**Expected outcome**: drift_from_anchor never exceeds 100m during the 60-frame sequence -**Max execution time**: 120s - ---- - -### FT-P-05: GPS_INPUT Message Correctness — Normal Tracking - -**Summary**: Validate GPS_INPUT message fields are correctly populated during normal satellite-anchored tracking. -**Traces to**: AC-08 (GPS_INPUT to FC via MAVLink), AC-04 (confidence score) -**Category**: Flight Controller Integration - -**Preconditions**: System tracking normally with recent satellite match (<30s) - -**Input data**: Normal frame + satellite match; MAVLink capture from mavlink-inspector - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Read captured GPS_INPUT messages from mavlink-inspector | GPS_INPUT messages at 5-10Hz | -| 2 | Verify field: fix_type | fix_type == 3 | -| 3 | Verify field: horiz_accuracy | 1.0 ≤ horiz_accuracy ≤ 50.0 | -| 4 | Verify field: satellites_visible | satellites_visible == 10 | -| 5 | Verify fields: lat, lon | Non-zero, within operational area bounds | -| 6 | Verify fields: vn, ve, vd | Populated (non-NaN), magnitude consistent with ~50-70 km/h flight | - -**Expected outcome**: All GPS_INPUT fields populated correctly per specification -**Max execution time**: 30s - ---- - -### FT-P-06: Image Registration Rate - -**Summary**: Validate that ≥95% of frames in a normal flight are successfully registered by the VO pipeline. -**Traces to**: AC-05 (registration > 95%) -**Category**: Image Processing Quality - -**Preconditions**: System running with full 60-frame sequence - -**Input data**: flight-sequence-60 SSE stream (vo_status field) - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Subscribe to SSE stream | Events with vo_status field | -| 2 | Count frames where vo_status == "tracking" | ≥57 of 60 | -| 3 | Compute registration rate | ≥95% | - -**Expected outcome**: ≥57 of 60 frames report vo_status "tracking" -**Max execution time**: 120s - ---- - -### FT-P-07: Confidence Tier — HIGH - -**Summary**: Validate HIGH confidence tier when satellite match is recent and covariance is low. -**Traces to**: AC-04 (confidence score per estimate) -**Category**: Confidence Scoring - -**Preconditions**: System running, satellite match completed <30s ago - -**Input data**: SSE stream during normal tracking - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Read SSE event immediately after satellite match | confidence field | -| 2 | Verify confidence == "HIGH" | "HIGH" | -| 3 | Read GPS_INPUT fix_type from mavlink-inspector | fix_type == 3 | - -**Expected outcome**: Confidence tier is HIGH, fix_type is 3 -**Max execution time**: 30s - ---- - -### FT-P-08: Confidence Tier — MEDIUM (VO-only, No Recent Satellite Match) - -**Summary**: Validate MEDIUM confidence tier when VO is tracking but no satellite match in >30s. -**Traces to**: AC-04 -**Category**: Confidence Scoring - -**Preconditions**: System running; satellite tile server paused (returns 503) to prevent new matches; >30s since last match - -**Input data**: SSE stream during VO-only tracking - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Pause satellite-tile-server (Docker pause) | No new satellite matches possible | -| 2 | Wait >30s after last satellite match | Confidence should transition | -| 3 | Read SSE event | confidence == "MEDIUM" | -| 4 | Read GPS_INPUT fix_type | fix_type == 3 | - -**Expected outcome**: Confidence transitions to MEDIUM; fix_type remains 3 -**Max execution time**: 60s - ---- - -### FT-P-09: GPS_INPUT Output Rate - -**Summary**: Validate GPS_INPUT messages are sent at 5-10Hz continuously. -**Traces to**: AC-08 (GPS_INPUT via MAVLink), AC-09 (frame-by-frame streaming) -**Category**: Flight Controller Integration - -**Preconditions**: System running and producing position estimates - -**Input data**: MAVLink capture from mavlink-inspector (10s window) - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Capture GPS_INPUT messages for 10 seconds | N messages | -| 2 | Compute rate: N / 10 | 5 ≤ rate ≤ 10 | -| 3 | Verify no gaps > 300ms between consecutive messages | max gap ≤ 300ms | - -**Expected outcome**: Rate is 5-10Hz, no gap exceeds 300ms -**Max execution time**: 15s - ---- - -### FT-P-10: Object Localization - -**Summary**: Validate object GPS localization from pixel coordinates via the FastAPI endpoint. -**Traces to**: AC-16 (object localization), AC-17 (trigonometric calculation) -**Category**: Object Localization - -**Preconditions**: System running with known UAV position (from GPS-denied estimate); known object ground truth GPS - -**Input data**: pixel_x, pixel_y (center of frame = nadir), gimbal_pan_deg=0, gimbal_tilt_deg=-90, zoom_factor=1.0 - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | POST /objects/locate with pixel at frame center, gimbal pointing straight down | JSON: { lat, lon, alt, accuracy_m, confidence } | -| 2 | Compute haversine distance between response lat/lon and current UAV position | Should be < accuracy_m (nadir point ≈ UAV position) | -| 3 | Verify accuracy_m is consistent with current system accuracy | accuracy_m > 0, accuracy_m < 100m | - -**Expected outcome**: Object location at nadir matches UAV position within accuracy_m -**Max execution time**: 5s - ---- - -### FT-P-11: Coordinate Transform Round-Trip - -**Summary**: Validate GPS→NED→pixel→GPS round-trip error is <0.1m. -**Traces to**: AC-18 (WGS84 output) -**Category**: Coordinate Transforms - -**Preconditions**: System running, position known - -**Input data**: Known GPS coordinate within operational area - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Query system for current position via SSE | lat, lon | -| 2 | POST /objects/locate with frame center pixel, straight-down gimbal | Returned lat, lon | -| 3 | Compute haversine distance between original UAV lat/lon and round-trip result | distance < 0.1m | - -**Expected outcome**: Round-trip error < 0.1m -**Max execution time**: 5s - ---- - -### FT-P-12: Startup — GPS_INPUT Within 60 Seconds - -**Summary**: Validate the system begins outputting GPS_INPUT messages within 60s of boot. -**Traces to**: AC-11 (startup from last GPS) -**Category**: Startup & Failsafe - -**Preconditions**: Fresh system start; SITL ArduPilot running with GLOBAL_POSITION_INT available - -**Input data**: MAVLink capture from mavlink-inspector - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Start gps-denied-system container | System boots | -| 2 | Monitor mavlink-inspector for first GPS_INPUT message | Timestamp of first GPS_INPUT | -| 3 | Compute elapsed time from container start to first GPS_INPUT | ≤ 60s | - -**Expected outcome**: First GPS_INPUT message arrives within 60s of system start -**Max execution time**: 90s - ---- - -### FT-P-13: Telemetry Output Rate - -**Summary**: Validate telemetry NAMED_VALUE_FLOAT messages are sent at 1Hz. -**Traces to**: AC-14 (telemetry to ground station) -**Category**: Telemetry - -**Preconditions**: System running normally - -**Input data**: MAVLink capture from mavlink-inspector (10s window) - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Capture NAMED_VALUE_FLOAT messages for "gps_conf", "gps_drift", "gps_hacc" over 10s | N messages per name | -| 2 | Verify rate: ~1Hz per metric (8-12 messages per name in 10s) | 0.8-1.2 Hz | - -**Expected outcome**: Each telemetry metric sent at ~1Hz -**Max execution time**: 15s - ---- - -### FT-P-14: SSE Stream Schema - -**Summary**: Validate SSE position events contain all required fields with correct types. -**Traces to**: AC-14 (streaming to ground station) -**Category**: API & Communication - -**Preconditions**: Active session with SSE stream - -**Input data**: SSE events from /sessions/{id}/stream - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Subscribe to SSE stream | Events at ~1Hz | -| 2 | Parse event JSON | Valid JSON | -| 3 | Verify fields: type (string), timestamp (ISO8601), lat (float), lon (float), alt (float), accuracy_h (float), confidence (string), drift_from_anchor (float), vo_status (string), last_satellite_match_age_s (float) | All present with correct types | - -**Expected outcome**: Every SSE event conforms to the specified schema -**Max execution time**: 10s - ---- - -## Negative Scenarios - -### FT-N-01: Trajectory Direction Change (Frames 32-43) - -**Summary**: Validate system continues producing position estimates through a trajectory direction change. -**Traces to**: AC-07 (disconnected segments core to system) -**Category**: Resilience & Edge Cases - -**Preconditions**: System running; camera-replay set to serve frames 32-43 (direction change area) - -**Input data**: Frames AD000032-043.jpg, coordinates for frames 32-43 - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Camera-replay serves frames 32-43 at 0.7fps | System processes frames | -| 2 | Collect SSE position events for each frame | ≥12 position estimates (one per frame minimum) | -| 3 | Verify no gap >5s without a position update | Continuous output | - -**Expected outcome**: System produces position estimates for all frames in the direction-change segment; no prolonged output gap -**Max execution time**: 30s - ---- - -### FT-N-02: Outlier Frame Handling (350m Gap) - -**Summary**: Validate system handles a 350m outlier between consecutive photos without position corruption. -**Traces to**: AC-06 (350m outlier tolerance) -**Category**: Resilience & Edge Cases - -**Preconditions**: System running with normal tracking established; fault injection: camera-replay skips frames to simulate 350m gap - -**Input data**: Normal frames followed by a frame 350m away (simulated by frame skip in camera-replay) - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Normal tracking for 10 frames | Position estimates with <50m error | -| 2 | Camera-replay jumps forward ~350m (skips multiple frames) | System detects discontinuity | -| 3 | Collect position estimates for next 5 frames after the gap | Recovery within 3-5 frames | -| 4 | Verify position error of recovered frames | Error < 100m for first valid frame after recovery | - -**Expected outcome**: System recovers from 350m outlier; post-recovery position error < 100m -**Max execution time**: 30s - ---- - -### FT-N-03: Invalid Object Localization Request - -**Summary**: Validate API rejects invalid pixel coordinates with HTTP 422. -**Traces to**: AC-16 (object localization) -**Category**: API Error Handling - -**Preconditions**: System running with active session - -**Input data**: POST /objects/locate with pixel_x=-100, pixel_y=-100 - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | POST /objects/locate with negative pixel coordinates | HTTP 422 | -| 2 | Verify response body contains error description | JSON with "error" or "detail" field | - -**Expected outcome**: HTTP 422 with validation error -**Max execution time**: 2s - ---- - -### FT-N-04: Unauthenticated API Access - -**Summary**: Validate API rejects unauthenticated requests with HTTP 401. -**Traces to**: AC-14 (security — JWT auth) -**Category**: API Security - -**Preconditions**: System running - -**Input data**: POST /sessions with no Authorization header - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | POST /sessions without JWT token | HTTP 401 | -| 2 | GET /sessions/{id}/stream without JWT | HTTP 401 | -| 3 | POST /objects/locate without JWT | HTTP 401 | -| 4 | GET /health (no auth required) | HTTP 200 | - -**Expected outcome**: Protected endpoints return 401; /health remains accessible -**Max execution time**: 5s - ---- - -### FT-N-05: 3-Consecutive-Failure Re-Localization Request - -**Summary**: Validate that after VO loss + 3 consecutive satellite match failures, the system sends a re-localization request to the ground station. -**Traces to**: AC-08 (3 consecutive failures → re-localization request) -**Category**: Resilience & Edge Cases - -**Preconditions**: System running; camera-replay set to serve featureless frames (VO will fail); satellite-tile-server returning 404 (tile not found) - -**Input data**: Featureless frames (e.g., blank/uniform images), satellite tile server offline - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Camera-replay serves featureless frames | VO tracking lost | -| 2 | Satellite-tile-server returns 404 | Satellite matching fails | -| 3 | Wait for 3 camera frames (3 × 1.43s ≈ 4.3s) | 3 consecutive failures | -| 4 | Check mavlink-inspector for STATUSTEXT | Message matches `RELOC_REQ: last_lat=.* last_lon=.* uncertainty=.*m` | -| 5 | Verify GPS_INPUT fix_type | fix_type == 0 | -| 6 | Verify GPS_INPUT horiz_accuracy | horiz_accuracy == 999.0 | - -**Expected outcome**: RELOC_REQ sent via STATUSTEXT; GPS_INPUT reports no-fix with 999.0 accuracy -**Max execution time**: 15s - ---- - -### FT-N-06: IMU-Only Dead Reckoning (VO Lost, No Satellite) - -**Summary**: Validate system degrades gracefully to IMU-only ESKF prediction when VO and satellite matching both fail. -**Traces to**: AC-06 (VO lost behavior), AC-04 (confidence score reflects state) -**Category**: Resilience & Edge Cases - -**Preconditions**: System running; camera-replay paused (no frames); satellite-tile-server paused - -**Input data**: No camera frames, no satellite tiles; only IMU from SITL - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Pause camera-replay and satellite-tile-server | System loses VO and satellite inputs | -| 2 | Read SSE events over 5s | confidence transitions from HIGH/MEDIUM to LOW | -| 3 | Read GPS_INPUT from mavlink-inspector | fix_type == 2 | -| 4 | Read horiz_accuracy over time | horiz_accuracy ≥ 50m and increasing | -| 5 | Verify GPS_INPUT continues at 5-10Hz | Messages continue (IMU-driven ESKF prediction) | - -**Expected outcome**: System continues GPS_INPUT at 5-10Hz via IMU; confidence drops; accuracy degrades but output never stops -**Max execution time**: 15s - ---- - -### FT-N-07: Operator Re-Localization Hint Accepted - -**Summary**: Validate the system accepts an operator re-localization hint and recovers position. -**Traces to**: AC-08 (re-localization), AC-15 (ground station commands) -**Category**: Ground Station Integration - -**Preconditions**: System in FAILED confidence state (3 consecutive failures); satellite-tile-server restored - -**Input data**: Operator hint: approximate lat/lon (from coordinates.csv ground truth ± 200m offset) - -**Steps**: - -| Step | Consumer Action | Expected System Response | -|------|----------------|------------------------| -| 1 | Trigger 3-consecutive-failure state (FT-N-05 preconditions) | RELOC_REQ sent | -| 2 | Restore satellite-tile-server | Tiles available again | -| 3 | POST /sessions/{id}/anchor with approximate lat/lon | HTTP 200 | -| 4 | Wait for satellite match attempt (~3-5s) | System searches in new area | -| 5 | Read SSE events | confidence transitions back to HIGH/MEDIUM | -| 6 | Read GPS_INPUT fix_type | fix_type == 3 | - -**Expected outcome**: System accepts operator hint, searches satellite tiles in new area, recovers position, confidence returns to HIGH/MEDIUM -**Max execution time**: 30s diff --git a/_docs/02_document/tests/environment.md b/_docs/02_document/tests/environment.md deleted file mode 100644 index 2794b40..0000000 --- a/_docs/02_document/tests/environment.md +++ /dev/null @@ -1,149 +0,0 @@ -# Test Environment - -## Overview - -**System under test**: GPS-Denied Visual Navigation System — a real-time position estimation service running on Jetson Orin Nano Super. Public interfaces: FastAPI REST/SSE endpoints (port 8000), MAVLink GPS_INPUT messages over serial/UDP, MAVLink telemetry messages. - -**Consumer app purpose**: Standalone Python test runner (pytest) that exercises the GPS-denied system through its public interfaces (HTTP API, MAVLink message inspection, SSE stream consumption) without access to internal modules, ESKF state, or GPU buffers. - -## Test Modes - -This is embedded robotics software targeting Jetson Orin Nano Super. A pure Docker environment cannot exercise GPU-dependent paths (TRT inference, cuVSLAM, CUDA streams). The test environment supports two modes: - -**Mode 1 — Docker SITL (CI/dev)**: Full system in Docker containers with ArduPilot SITL providing MAVLink + IMU at 200Hz. Camera images replayed from input_data/. Satellite tiles served from a mock HTTP server. GPS-denied system runs in CPU-mode with stubbed TRT/cuVSLAM inference (functionally equivalent but slower). Tests all integration paths, API, MAVLink, resilience, and security. - -**Mode 2 — Jetson Hardware (nightly/pre-deploy)**: GPS-denied system runs natively on Jetson Orin Nano Super with real CUDA/TRT/cuVSLAM. ArduPilot SITL runs on the Jetson or a companion x86 host, connected via UART or UDP. Camera frames injected via USB camera emulator or replay service. Tests real-time performance, GPU memory, thermal, TRT correctness, and CUDA stream isolation. - -## Docker Environment (Mode 1) - -### Services - -| Service | Image / Build | Purpose | Ports | -|---------|--------------|---------|-------| -| gps-denied-system | `./Dockerfile` (build context: project root) | GPS-denied navigation system in CPU-mode (TRT stubs, cuVSLAM stub returning synthetic VO poses derived from ground truth trajectory) | 8000 (FastAPI), 14550/udp (MAVLink) | -| ardupilot-sitl | `ardupilot/sitl:plane-4.5` (fixed-wing) | ArduPilot SITL: flies waypoint mission following coordinates.csv trajectory, generates IMU at 200Hz via MAVLink, GPS_TYPE=14 accepts GPS_INPUT, provides GLOBAL_POSITION_INT at startup | 5760 (MAVLink TCP), 14551/udp | -| camera-replay | `./tests/docker/camera-replay/Dockerfile` | Replays AD000001-060.jpg from input_data/ at configurable FPS (default 0.7fps) with timestamps synchronized to SITL clock. Supports fault injection: frame skip, corrupted JPEG, pause. | 8001 (frame server) | -| satellite-tile-server | `./tests/docker/tile-server/Dockerfile` | HTTP server for pre-cached satellite tiles (zoom 18, ~50-100 tiles covering test area). Supports fault injection: 404 for specific tiles, 503 for full outage, slow responses. | 8002 | -| mavlink-inspector | `./tests/docker/mavlink-inspector/Dockerfile` | Passively captures all MAVLink traffic (GPS_INPUT, NAMED_VALUE_FLOAT, STATUSTEXT, COMMAND_LONG) for post-test assertion. Can inject operator re-localization hints. | 14552/udp | -| e2e-consumer | `./tests/e2e/Dockerfile` | Black-box test runner (pytest). Communicates only via HTTP API + MAVLink inspector. | — | - -### Networks - -| Network | Services | Purpose | -|---------|----------|---------| -| e2e-net | all | Isolated test network; MAVLink UDP multicast between gps-denied-system, ardupilot-sitl, mavlink-inspector | - -### Volumes - -| Volume | Mounted to | Purpose | -|--------|-----------|---------| -| input-data | camera-replay:/data, e2e-consumer:/test-data | Camera frames (AD000001-060.jpg), coordinates.csv, data_parameters.md, gmaps reference images | -| satellite-tiles | satellite-tile-server:/tiles, gps-denied-system:/tiles | Pre-processed satellite tiles for test area (zoom 18, 48.249-48.276°N, 37.340-37.386°E) | -| sitl-mission | ardupilot-sitl:/mission | Waypoint mission file derived from coordinates.csv (SITL flies this trajectory, generating physically consistent 200Hz IMU data) | -| test-results | e2e-consumer:/results | Test result CSV output | -| mavlink-capture | mavlink-inspector:/capture | Recorded MAVLink messages for post-test assertions | - -### IMU Data Flow - -ArduPilot SITL is the primary source of IMU data. It flies a waypoint mission derived from coordinates.csv and internally generates physically consistent accelerometer + gyroscope readings at 200Hz, delivered to the GPS-denied system via MAVLink (RAW_IMU, SCALED_IMU2). This eliminates the need for pre-recorded IMU data files and ensures IMU/trajectory consistency. - -``` -coordinates.csv → mission_generator script → ArduPilot waypoint file - ↓ - ArduPilot SITL flies trajectory - ↓ - IMU @ 200Hz + heartbeat + GLOBAL_POSITION_INT - ↓ (MAVLink UDP) - gps-denied-system receives IMU for ESKF -``` - -### docker-compose structure - -```yaml -services: - ardupilot-sitl: - # ArduPilot SITL fixed-wing, outputs IMU at 200Hz via MAVLink - # GPS_TYPE=14 (MAVLink), pre-configured for GPS_INPUT acceptance - satellite-tile-server: - # HTTP tile server with tiles for test area (48.249-48.276°N, 37.340-37.386°E) - camera-replay: - # Replays AD000001-060.jpg at 0.7fps, serves via HTTP or shared volume - depends_on: - - satellite-tile-server - gps-denied-system: - # The system under test - depends_on: - - ardupilot-sitl - - satellite-tile-server - - camera-replay - mavlink-inspector: - # Captures GPS_INPUT, NAMED_VALUE_FLOAT, STATUSTEXT messages - depends_on: - - ardupilot-sitl - e2e-consumer: - # pytest runner — executes after system reaches steady state - depends_on: - - gps-denied-system - - mavlink-inspector -``` - -## Consumer Application - -**Tech stack**: Python 3.11, pytest, httpx (HTTP client), pymavlink (MAVLink inspection), sseclient-py (SSE stream) -**Entry point**: `pytest tests/e2e/ --tb=short --csv=results/report.csv` - -### Communication with system under test - -| Interface | Protocol | Endpoint / Topic | Authentication | -|-----------|----------|-----------------|----------------| -| Position API | HTTP REST | http://gps-denied-system:8000/sessions | JWT token | -| Position stream | HTTP SSE | http://gps-denied-system:8000/sessions/{id}/stream | JWT token | -| Object localization | HTTP REST | http://gps-denied-system:8000/objects/locate | JWT token | -| Health check | HTTP REST | http://gps-denied-system:8000/health | None | -| GPS_INPUT inspection | MAVLink UDP | mavlink-inspector:14552 (recorded messages) | None | -| Telemetry inspection | MAVLink UDP | mavlink-inspector:14552 (NAMED_VALUE_FLOAT, STATUSTEXT) | None | - -### What the consumer does NOT have access to - -- No direct access to ESKF internal state, covariance matrices, or error vectors -- No direct access to cuVSLAM tracking state or feature maps -- No direct access to GPU memory, CUDA streams, or TRT engine internals -- No direct access to the system's file system or configuration files -- No direct database or state store access - -## Jetson Hardware Environment (Mode 2) - -Tests tagged `@pytest.mark.jetson` require actual Jetson Orin Nano Super hardware. These run natively (no Docker for the GPS-denied system) to exercise real GPU paths. - -**Hardware setup**: -- Jetson Orin Nano Super (JetPack 6.2, CUDA 12.x, TensorRT 10.3) -- ArduPilot SITL on same Jetson (or x86 companion connected via UART/UDP) -- Camera frames injected via: USB camera emulator (v4l2loopback feeding frames from input_data/) or HTTP replay service -- Satellite tiles on local SSD (same path as production deployment) -- Active cooling attached (required for sustained load tests) - -**Tests in this mode**: -- NFT-PERF-01 through NFT-PERF-06 (real GPU latency, throughput) -- NFT-RES-LIM-01 (GPU+CPU shared memory monitoring via tegrastats) -- NFT-RES-LIM-02 (thermal monitoring via thermal_zone sysfs) -- NFT-RES-LIM-05 (CUDA stream isolation with real concurrent GPU work) -- TRT engine build and inference correctness (expected_results #42-44) - -**Jetson CI runner**: Self-hosted GitHub Actions runner on a dedicated Jetson Orin Nano Super, triggered for nightly builds and pre-deploy gates. - -## CI/CD Integration - -**When to run**: On every PR to `dev`, nightly full suite, before production deploy -**Pipeline stages**: -1. Unit tests (no Docker, no hardware) — on every commit -2. Docker blackbox tests (SITL + CPU mode) — on PR merge to dev -3. Hardware tests (Jetson runner) — nightly + pre-deploy - -**Gate behavior**: Docker tests block merge; hardware tests are advisory (nightly) or blocking (pre-deploy) -**Timeout**: Docker suite: 15 minutes; Hardware suite: 30 minutes - -## Reporting - -**Format**: CSV -**Columns**: Test ID, Test Name, Execution Time (ms), Result (PASS/FAIL/SKIP), Error Message (if FAIL) -**Output path**: `./tests/e2e-results/report.csv` diff --git a/_docs/02_document/tests/performance-tests.md b/_docs/02_document/tests/performance-tests.md deleted file mode 100644 index 0e464ef..0000000 --- a/_docs/02_document/tests/performance-tests.md +++ /dev/null @@ -1,138 +0,0 @@ -# Performance Tests - -### NFT-PERF-01: End-to-End Per-Frame Latency - -**Summary**: Validate total pipeline latency from camera capture to GPS_INPUT output is <400ms. -**Traces to**: AC-07 (< 400ms end-to-end per frame) -**Metric**: End-to-end latency (camera frame timestamp → GPS_INPUT message timestamp) - -**Preconditions**: -- System running on Jetson Orin Nano Super (GPU-mode) -- Camera-replay serving frames at 0.7fps -- System in steady state (warm-up: ≥10 frames processed) - -**Steps**: - -| Step | Consumer Action | Measurement | -|------|----------------|-------------| -| 1 | Camera-replay sends frame with known timestamp | Record t_capture | -| 2 | Monitor GPS_INPUT messages at mavlink-inspector | Record t_gps_input for first GPS_INPUT update after t_capture | -| 3 | Compute latency = t_gps_input - t_capture | Per-frame latency | -| 4 | Repeat for 30 consecutive frames | Array of 30 latency values | - -**Pass criteria**: p95 latency < 400ms; max latency < 500ms -**Duration**: 50s (~30 frames at 0.7fps + warm-up) - ---- - -### NFT-PERF-02: GPS_INPUT Output Rate Consistency - -**Summary**: Validate GPS_INPUT messages are delivered at a sustained 5-10Hz with no gaps. -**Traces to**: AC-08 (GPS_INPUT via MAVLink at 5-10Hz) -**Metric**: Message rate (Hz), maximum inter-message gap (ms) - -**Preconditions**: -- System in steady state -- Camera-replay active - -**Steps**: - -| Step | Consumer Action | Measurement | -|------|----------------|-------------| -| 1 | Capture GPS_INPUT messages for 60 seconds | Count messages, record timestamps | -| 2 | Compute rate: count / 60 | 5 ≤ rate ≤ 10 Hz | -| 3 | Compute max gap between consecutive messages | max_gap ≤ 250ms | -| 4 | Compute jitter: std_dev of inter-message intervals | jitter < 50ms | - -**Pass criteria**: Rate 5-10Hz; max gap ≤ 250ms; jitter < 50ms -**Duration**: 60s - ---- - -### NFT-PERF-03: cuVSLAM Visual Odometry Processing Time - -**Summary**: Validate cuVSLAM processes each frame within 20ms. -**Traces to**: AC-07 (real-time processing budget) -**Metric**: Per-frame cuVSLAM inference time (ms) - -**Preconditions**: -- System running on Jetson Orin Nano Super -- Steady state (≥10 frames processed) - -**Steps**: - -| Step | Consumer Action | Measurement | -|------|----------------|-------------| -| 1 | Replay 30 frames, read processing time from SSE events or health endpoint metrics | Per-frame VO time | -| 2 | Compute p95 of VO time | p95 ≤ 20ms | - -**Pass criteria**: p95 cuVSLAM inference time ≤ 20ms -**Duration**: 50s - ---- - -### NFT-PERF-04: Satellite Matching Latency (Async) - -**Summary**: Validate satellite matching completes within 330ms per keyframe (async, does not block VO). -**Traces to**: AC-07 (within frame budget), solution processing time budget -**Metric**: Per-keyframe satellite matching latency (ms) - -**Preconditions**: -- System running on Jetson Orin Nano Super -- Satellite tiles loaded - -**Steps**: - -| Step | Consumer Action | Measurement | -|------|----------------|-------------| -| 1 | Monitor satellite match events over 60s (expect ~4-8 matches at 0.07-0.14Hz) | Per-match latency from health/metrics endpoint | -| 2 | Verify no VO frame was blocked during satellite matching | VO timestamps maintain 0.7fps cadence | - -**Pass criteria**: p95 satellite matching ≤ 330ms; VO cadence unaffected -**Duration**: 60s - ---- - -### NFT-PERF-05: TRT Engine Load Time - -**Summary**: Validate all TensorRT engines load within 10 seconds total. -**Traces to**: AC-11 (startup), solution startup sequence -**Metric**: Engine load time (seconds) - -**Preconditions**: -- Cold start on Jetson Orin Nano Super -- Engines pre-built and available on storage - -**Steps**: - -| Step | Consumer Action | Measurement | -|------|----------------|-------------| -| 1 | Start system, monitor startup log for engine load timestamps | t_start_load, t_end_load per engine | -| 2 | Compute total: sum of all engine load times | ≤ 10s total | - -**Pass criteria**: Total TRT engine load time ≤ 10s -**Duration**: 30s (includes boot time) - ---- - -### NFT-PERF-06: Sustained 30-Minute Processing - -**Summary**: Validate the system maintains consistent performance over a 30-minute continuous session without degradation. -**Traces to**: AC-07 (real-time), AC-08 (memory < 8GB) -**Metric**: Per-frame latency, GPS_INPUT rate, position accuracy over time - -**Preconditions**: -- System running on Jetson Orin Nano Super -- Camera-replay looping flight-sequence-60 (re-starts after frame 60) -- Satellite tiles available - -**Steps**: - -| Step | Consumer Action | Measurement | -|------|----------------|-------------| -| 1 | Run for 30 minutes, collect per-minute stats | Latency, rate, accuracy | -| 2 | Compare first-5-min stats vs last-5-min stats | No degradation >10% | -| 3 | Monitor for any position output gaps > 1s | Count gaps | - -**Pass criteria**: No latency degradation >10% over 30 min; GPS_INPUT rate remains 5-10Hz; no output gaps >1s -**Duration**: 30 minutes diff --git a/_docs/02_document/tests/resilience-tests.md b/_docs/02_document/tests/resilience-tests.md deleted file mode 100644 index 72b4cb9..0000000 --- a/_docs/02_document/tests/resilience-tests.md +++ /dev/null @@ -1,169 +0,0 @@ -# Resilience Tests - -### NFT-RES-01: Mid-Flight Reboot Recovery - -**Summary**: Validate the system recovers from a companion computer reboot within 70 seconds and restores position accuracy. -**Traces to**: AC-12 (mid-flight reboot recovery) - -**Preconditions**: -- System running in steady state with good position accuracy -- SITL ArduPilot continues running (FC stays up during companion computer reboot) - -**Fault injection**: -- Kill gps-denied-system process (docker stop or SIGKILL) -- Restart after 5s delay (simulates Jetson reboot time) - -**Steps**: - -| Step | Action | Expected Behavior | -|------|--------|------------------| -| 1 | Record current position accuracy and confidence | Baseline metrics | -| 2 | Kill gps-denied-system container | GPS_INPUT messages stop | -| 3 | Verify SITL continues running (heartbeat present) | FC still alive, using IMU dead reckoning | -| 4 | Restart gps-denied-system container after 5s | System starts recovery sequence | -| 5 | Monitor time from restart to first GPS_INPUT | ≤ 70s | -| 6 | Wait for first satellite match | Position accuracy restored | -| 7 | Verify position error after recovery | Error ≤ 50m after first satellite match | - -**Pass criteria**: Recovery time ≤ 70s; post-recovery position error ≤ 50m after satellite match -**Duration**: 120s - ---- - -### NFT-RES-02: Tracking Loss and Satellite Re-Localization - -**Summary**: Validate the system recovers from cuVSLAM tracking loss via satellite-based re-localization. -**Traces to**: AC-07 (disconnected segments), AC-06 (sharp turn handling) - -**Preconditions**: -- System in normal tracking (HIGH confidence) -- Satellite tiles available - -**Fault injection**: -- Camera-replay sends featureless/blurred frames (simulates VO tracking loss from sharp turn) -- Then resumes normal frames - -**Steps**: - -| Step | Action | Expected Behavior | -|------|--------|------------------| -| 1 | Normal tracking established | confidence: HIGH, vo_status: tracking | -| 2 | Camera-replay serves 3 featureless frames | cuVSLAM reports tracking_lost | -| 3 | System enters TRACKING_LOST state | Satellite matching switches to every frame | -| 4 | Camera-replay resumes normal frames | Satellite match succeeds | -| 5 | Monitor SSE: vo_status returns to "tracking" | cuVSLAM restarted | -| 6 | Monitor SSE: confidence returns to HIGH | Position re-anchored | -| 7 | Verify position accuracy after recovery | Error ≤ 50m | - -**Pass criteria**: Recovery within 5 frames after normal frames resume; position error ≤ 50m post-recovery -**Duration**: 30s - ---- - -### NFT-RES-03: Sustained IMU-Only Operation - -**Summary**: Validate the system continues producing position estimates during extended IMU-only periods without crashing. -**Traces to**: AC-08 (system continues during failure), AC-12 (failsafe) - -**Preconditions**: -- System in normal tracking - -**Fault injection**: -- Pause both camera-replay (no VO) and satellite-tile-server (no satellite matching) -- Duration: 30s - -**Steps**: - -| Step | Action | Expected Behavior | -|------|--------|------------------| -| 1 | Establish normal tracking baseline | GPS_INPUT at 5-10Hz, confidence HIGH | -| 2 | Pause camera-replay and satellite-tile-server | VO and satellite inputs stop | -| 3 | Monitor GPS_INPUT for 30s | Messages continue at 5-10Hz (IMU-driven ESKF prediction) | -| 4 | Verify horiz_accuracy grows over time | accuracy increases monotonically | -| 5 | Verify fix_type transitions to 2 | Degraded but present | -| 6 | Verify confidence transitions to LOW | Reflects IMU-only state | -| 7 | Resume camera-replay and satellite-tile-server | System recovers to normal tracking | -| 8 | Verify recovery to HIGH confidence | Satellite match re-anchors position | - -**Pass criteria**: GPS_INPUT never stops during 30s IMU-only period; system recovers when inputs resume -**Duration**: 60s - ---- - -### NFT-RES-04: Satellite Tile Server Failure - -**Summary**: Validate the system continues operating when satellite tile server becomes unavailable, with graceful accuracy degradation. -**Traces to**: AC-07 (resilience), solution risk: Google Maps quality - -**Preconditions**: -- System in normal tracking - -**Fault injection**: -- Stop satellite-tile-server container (simulates tile unavailability) - -**Steps**: - -| Step | Action | Expected Behavior | -|------|--------|------------------| -| 1 | Normal tracking with satellite corrections | confidence: HIGH | -| 2 | Stop satellite-tile-server | Satellite matching returns errors | -| 3 | Monitor for 60s | System falls back to VO-only; confidence drops to MEDIUM after 30s | -| 4 | Verify GPS_INPUT continues | Messages at 5-10Hz, fix_type remains 3 (VO tracking OK) | -| 5 | Restart satellite-tile-server | Satellite matching resumes | -| 6 | Verify confidence returns to HIGH | Position re-anchored | - -**Pass criteria**: No crash or hang; GPS_INPUT continues; confidence degrades gracefully and recovers when tiles return -**Duration**: 90s - ---- - -### NFT-RES-05: Corrupted Camera Frame - -**Summary**: Validate the system handles a corrupted camera frame without crashing. -**Traces to**: AC-06 (outlier tolerance) - -**Preconditions**: -- System in normal tracking - -**Fault injection**: -- Camera-replay injects a truncated/corrupted JPEG between normal frames - -**Steps**: - -| Step | Action | Expected Behavior | -|------|--------|------------------| -| 1 | Normal tracking for 5 frames | Baseline established | -| 2 | Camera-replay sends corrupted JPEG | System logs warning, skips frame | -| 3 | Camera-replay sends next normal frame | VO continues processing | -| 4 | Verify no crash, no hang | GPS_INPUT continues at 5-10Hz | -| 5 | Verify position accuracy on next valid frame | Error < 50m | - -**Pass criteria**: System skips corrupted frame gracefully; no crash; next frame processed normally -**Duration**: 15s - ---- - -### NFT-RES-06: Camera Feed Interruption (No Frames for 10s) - -**Summary**: Validate the system survives a 10-second camera feed interruption. -**Traces to**: AC-12 (failsafe — N seconds no estimate), AC-08 (continued operation) - -**Preconditions**: -- System in normal tracking - -**Fault injection**: -- Camera-replay pauses for 10s (no frames delivered) - -**Steps**: - -| Step | Action | Expected Behavior | -|------|--------|------------------| -| 1 | Normal tracking baseline | GPS_INPUT at 5-10Hz | -| 2 | Pause camera-replay for 10s | No new camera frames | -| 3 | Monitor GPS_INPUT | Messages continue via IMU prediction | -| 4 | Monitor confidence | Transitions to LOW after VO timeout | -| 5 | Resume camera-replay | VO restarts, satellite matching resumes | -| 6 | Verify recovery | confidence returns to HIGH within 10 frames | - -**Pass criteria**: GPS_INPUT never stops; recovery within 10 frames after camera feed resumes -**Duration**: 30s diff --git a/_docs/02_document/tests/resource-limit-tests.md b/_docs/02_document/tests/resource-limit-tests.md deleted file mode 100644 index 1bee79a..0000000 --- a/_docs/02_document/tests/resource-limit-tests.md +++ /dev/null @@ -1,90 +0,0 @@ -# Resource Limit Tests - -### NFT-RES-LIM-01: Memory Usage Under 8GB - -**Summary**: Validate system memory usage stays below 8GB shared memory (CPU + GPU) during sustained operation. -**Traces to**: AC-08 (memory < 8GB), RESTRICT-09 (8GB shared LPDDR5) - -**Preconditions**: -- System running on Jetson Orin Nano Super (8GB shared memory) -- Full pipeline active: cuVSLAM + satellite matching + ESKF + GPS_INPUT + FastAPI - -**Monitoring**: -- Total system memory (RSS + GPU allocated) via `tegrastats` or `/sys/devices/platform/host1x/*/memory` -- Poll every 5s via GET /health (memory_mb field) - -**Duration**: 30 minutes -**Pass criteria**: Peak memory < 8192MB; no memory leak (growth < 50MB over 30 minutes after first 2 minutes warm-up) - ---- - -### NFT-RES-LIM-02: GPU Thermal Envelope - -**Summary**: Validate SoC junction temperature stays below 80°C under sustained processing load. -**Traces to**: AC-08 (thermal), RESTRICT-10 (25W TDP, thermal throttling at 80°C) - -**Preconditions**: -- System running on Jetson Orin Nano Super with active cooling -- Full pipeline active - -**Monitoring**: -- SoC junction temperature via `tegrastats` or `/sys/devices/virtual/thermal/thermal_zone*/temp` -- Poll every 10s via GET /health (gpu_temp_c field) - -**Duration**: 30 minutes -**Pass criteria**: SoC junction temperature < 80°C throughout; no thermal throttling events - ---- - -### NFT-RES-LIM-03: Satellite Tile Storage - -**Summary**: Validate pre-loaded satellite tile storage stays within calculated budget for a test mission area. -**Traces to**: AC-19 (satellite imagery pre-loaded), RESTRICT-08 (onboard storage limited) - -**Preconditions**: -- Satellite tiles pre-processed for the test flight area (48.249-48.276°N, 37.340-37.386°E) -- Zoom 18 primary + zoom 19 for ±500m along flight path - -**Monitoring**: -- Total tile storage size on disk -- RAM usage for preloaded tiles (±2km buffer) - -**Duration**: Static check -**Pass criteria**: Total tile storage ≤ 1000MB on disk; RAM usage for preloaded tiles ≤ 200MB - ---- - -### NFT-RES-LIM-04: Long Flight Simulation (3000 Frames) - -**Summary**: Validate the system handles a maximum-length flight of 3000 frames without resource exhaustion. -**Traces to**: RESTRICT-04 (up to 3000 photos per flight) - -**Preconditions**: -- System running on Jetson (or Docker with CPU-mode for functional test) -- Camera-replay looping flight-sequence-60 to generate 3000 frames (50 loops) - -**Monitoring**: -- Memory usage every 60s -- GPS_INPUT rate every 60s -- Position accuracy sampled every 100 frames - -**Duration**: ~71 minutes (3000 frames at 0.7fps) -**Pass criteria**: Memory stays < 8GB; GPS_INPUT rate stays 5-10Hz; no crash or hang over full 3000-frame sequence - ---- - -### NFT-RES-LIM-05: CUDA Stream Isolation - -**Summary**: Validate that Stream B (satellite matching) does not block Stream A (cuVSLAM) under concurrent load. -**Traces to**: AC-07 (< 400ms per frame), solution CUDA stream pipelining - -**Preconditions**: -- System running on Jetson Orin Nano Super -- Satellite matching triggered on a keyframe while VO is processing the next frame - -**Monitoring**: -- VO frame-to-frame timing during satellite matching -- Satellite matching does not extend VO latency - -**Duration**: 30s (capture ~5 satellite matching events concurrent with VO) -**Pass criteria**: VO per-frame time ≤ 20ms even when satellite matching is running concurrently; no frame drops diff --git a/_docs/02_document/tests/security-tests.md b/_docs/02_document/tests/security-tests.md deleted file mode 100644 index ae9c45f..0000000 --- a/_docs/02_document/tests/security-tests.md +++ /dev/null @@ -1,88 +0,0 @@ -# Security Tests - -### NFT-SEC-01: JWT Authentication Required on Protected Endpoints - -**Summary**: Validate all protected endpoints reject requests without a valid JWT token. -**Traces to**: AC-14 (security — JWT auth on API) - -**Steps**: - -| Step | Consumer Action | Expected Response | -|------|----------------|------------------| -| 1 | POST /sessions with no Authorization header | HTTP 401 | -| 2 | GET /sessions/{id}/stream with no Authorization header | HTTP 401 | -| 3 | POST /sessions/{id}/anchor with no Authorization header | HTTP 401 | -| 4 | DELETE /sessions/{id} with no Authorization header | HTTP 401 | -| 5 | POST /objects/locate with no Authorization header | HTTP 401 | -| 6 | GET /health with no Authorization header | HTTP 200 (health is public) | - -**Pass criteria**: All protected endpoints return 401; /health returns 200 without auth - ---- - -### NFT-SEC-02: Expired JWT Token Rejection - -**Summary**: Validate the system rejects expired JWT tokens. -**Traces to**: AC-14 (security) - -**Steps**: - -| Step | Consumer Action | Expected Response | -|------|----------------|------------------| -| 1 | Generate a JWT token with exp set to 1 hour ago | Expired token | -| 2 | POST /sessions with expired token in Authorization header | HTTP 401 | -| 3 | POST /objects/locate with expired token | HTTP 401 | - -**Pass criteria**: Expired tokens are rejected with 401 - ---- - -### NFT-SEC-03: Invalid JWT Signature Rejection - -**Summary**: Validate the system rejects JWT tokens signed with the wrong key. -**Traces to**: AC-14 (security) - -**Steps**: - -| Step | Consumer Action | Expected Response | -|------|----------------|------------------| -| 1 | Generate a JWT token with a different signing key | Invalid signature token | -| 2 | POST /sessions with invalid-signature token | HTTP 401 | - -**Pass criteria**: Invalid-signature tokens are rejected with 401 - ---- - -### NFT-SEC-04: Malformed API Request Handling - -**Summary**: Validate the system handles malformed API payloads without crashing or leaking internal details. -**Traces to**: AC-14 (security), AC-16 (API robustness) - -**Steps**: - -| Step | Consumer Action | Expected Response | -|------|----------------|------------------| -| 1 | POST /objects/locate with empty body | HTTP 422 with generic error (no stack trace) | -| 2 | POST /objects/locate with body `{"pixel_x": "not_a_number"}` | HTTP 422 with validation error | -| 3 | POST /sessions with body exceeding 1MB | HTTP 413 or 422 (no crash) | -| 4 | POST /sessions/{id}/anchor with body `{"lat": 999, "lon": 999}` | HTTP 422 (invalid coordinates) | -| 5 | Verify system continues operating after all malformed requests | GET /health returns 200 | - -**Pass criteria**: All malformed requests return 4xx errors with safe error messages (no stack traces, no internal paths); system remains operational - ---- - -### NFT-SEC-05: MAVLink Injection Resistance - -**Summary**: Validate the system ignores unexpected or malformed MAVLink messages on the MAVLink channel. -**Traces to**: AC-15 (ground station commands), solution security analysis - -**Steps**: - -| Step | Consumer Action | Expected Response | -|------|----------------|------------------| -| 1 | Send unexpected MAVLink message types to the system's MAVLink port | System ignores (no crash, no state corruption) | -| 2 | Send malformed COMMAND_LONG with invalid lat/lon in re-localization hint | System rejects or ignores invalid coordinates | -| 3 | Verify GPS_INPUT output continues normally | No disruption | - -**Pass criteria**: System ignores unexpected messages; continues normal operation; does not process invalid re-localization coordinates diff --git a/_docs/02_document/tests/test-data.md b/_docs/02_document/tests/test-data.md deleted file mode 100644 index 8712622..0000000 --- a/_docs/02_document/tests/test-data.md +++ /dev/null @@ -1,95 +0,0 @@ -# Test Data Management - -## Seed Data Sets - -| Data Set | Description | Used by Tests | How Loaded | Cleanup | -|----------|-------------|---------------|-----------|---------| -| flight-sequence-60 | 60 aerial images (AD000001-060.jpg) with ground truth GPS from coordinates.csv, captured at ~1 photo per 2-3s from a fixed-wing UAV at 400m altitude | FT-P-01 through FT-P-06, FT-N-01, FT-N-02, NFT-PERF-01, NFT-RES-01 | Volume mount to camera-replay service; coordinates.csv loaded by e2e-consumer for ground truth comparison | Container restart between test groups | -| camera-params | Camera parameters: ADTi Surveyor Lite 26S v2, 26MP (6252x4168), 25mm focal length, 23.5mm sensor width | All position accuracy tests, object localization tests | Volume mount; read by gps-denied-system at startup | N/A (read-only) | -| satellite-tiles-test | Pre-processed satellite tiles (zoom 18) covering test flight area: 48.249-48.276°N, 37.340-37.386°E | All tests requiring satellite matching | Volume mount to satellite-tile-server and gps-denied-system | Container restart | -| ardupilot-params | ArduPilot SITL parameters: GPS1_TYPE=14, GPS_RATE=5, EK3_SRC1_POSXY=1, EK3_SRC1_VELXY=1, fixed-wing frame | All tests requiring flight controller interaction | Baked into ardupilot-sitl Docker image | Container restart | -| imu-replay-data | Synthetic IMU data (accelerometer + gyroscope at 200Hz) matching the flight-sequence-60 trajectory. Generated from coordinates.csv ground truth positions by computing velocities/accelerations between frames, interpolating to 200Hz, adding noise consistent with ICM-42688-P datasheet (gyro: 3.0e-3 °/s/√Hz, accel: 70 µg/√Hz). Stored as `input_data/imu_synthetic_200hz.csv` with columns: timestamp_us, accel_x, accel_y, accel_z, gyro_x, gyro_y, gyro_z. Alternative: SITL ArduPilot flies a waypoint mission following the coordinates.csv trajectory and internally generates physically consistent IMU data at 200Hz. | FT-P-01 through FT-P-06, NFT-PERF-01, NFT-RES-01 through NFT-RES-05 | Primary: SITL ArduPilot flies the trajectory and generates IMU internally via MAVLink. Fallback: pre-generated CSV replayed via MAVLink injector | Container restart | -| invalid-inputs | Malformed images (truncated JPEG, wrong resolution), invalid API payloads, corrupted IMU streams | FT-N-03 through FT-N-06, NFT-SEC-01 through NFT-SEC-04 | Volume mount to e2e-consumer; injected via API calls | Container restart | - -## Data Isolation Strategy - -Each test group runs against a fresh container restart of the gps-denied-system and ardupilot-sitl services. The camera-replay service is restarted and configured per test group (different frame subsets, different replay speeds, or different fault injection modes). MAVLink capture logs are isolated per test run via timestamped directories. - -## Input Data Mapping - -| Input Data File | Source Location | Description | Covers Scenarios | -|-----------------|----------------|-------------|-----------------| -| AD000001-060.jpg | `_docs/00_problem/input_data/` | 60 aerial images (6252x4168, 26MP) from a fixed-wing UAV at 400m altitude. Images taken at ~1 photo/2-3s (wider spacing than real 0.7fps but usable for functional tests) | FT-P-01 to FT-P-06, FT-N-01, FT-N-02, NFT-PERF-01, NFT-RES-01, NFT-RES-LIM-01 | -| AD000001_gmaps.png, AD000002_gmaps.png | `_docs/00_problem/input_data/` | Google Maps satellite reference images for frames 1-2 (used as sample satellite tile data for satellite matching validation) | FT-P-01, NFT-RES-02, NFT-RES-04 | -| coordinates.csv | `_docs/00_problem/input_data/coordinates.csv` | Ground truth GPS (lat, lon) for each of the 60 images | FT-P-01, FT-P-02, FT-P-03, FT-P-04 (comparison baseline) | -| data_parameters.md | `_docs/00_problem/input_data/data_parameters.md` | Camera specs: 400m altitude, ADTi Surveyor Lite 26S v2, 26MP, 25mm focal length, 23.5mm sensor | Test environment configuration | -| position_accuracy.csv | `_docs/00_problem/input_data/expected_results/position_accuracy.csv` | Per-frame ground truth with acceptance thresholds | FT-P-01, FT-P-02 (expected result comparison) | -| imu_synthetic_200hz.csv | `_docs/00_problem/input_data/` (TO BE GENERATED) | Synthetic 200Hz IMU data (accel + gyro) derived from coordinates.csv trajectory. Matches ICM-42688-P noise characteristics. Required for ESKF sensor fusion testing outside SITL. | FT-P-01 to FT-P-06, NFT-PERF-01, NFT-RES-01 to NFT-RES-06 | - -### IMU Data Generation - -No real IMU recordings exist for the 60-image flight sequence. Two approaches for providing IMU data during tests: - -**Approach A — SITL-generated (primary)**: ArduPilot SITL flies a waypoint mission following the coordinates.csv trajectory. SITL's internal physics engine generates physically consistent IMU data at 200Hz, delivered via MAVLink to the GPS-denied system. This is the most realistic approach and requires no pre-generated files. - -**Approach B — Synthetic CSV (fallback/replay)**: Generate `imu_synthetic_200hz.csv` offline from coordinates.csv: -1. Compute inter-frame velocities from GPS positions and timestamps -2. Interpolate position/velocity to 200Hz using cubic splines -3. Compute accelerations (body frame) accounting for gravity + flight dynamics -4. Add sensor noise matching ICM-42688-P specs (gyro: 3.0e-3 °/s/√Hz, accel: 70 µg/√Hz) -5. Add bias random walks (gyro: 5.0e-5 °/s²/√Hz, accel: 2.0e-3 m/s³/√Hz) -6. Replay via MAVLink injector service at 200Hz - -Approach A is recommended for integration tests. Approach B is useful for deterministic unit-level ESKF tests where reproducible IMU streams are needed. - -### Satellite Tile Data - -Only 2 Google Maps screenshots exist (AD000001_gmaps.png, AD000002_gmaps.png). Full satellite tile coverage for the test area must be prepared: -1. Download Google Maps tiles at zoom 18 for the bounding box: 48.249-48.276°N, 37.340-37.386°E -2. Store as 256x256 JPEG tiles with geohash-based naming -3. Load into satellite-tile-server Docker service -4. Estimated: ~50-100 tiles for the test area (~1-2MB total) - -## Expected Results Mapping - -| Test Scenario ID | Input Data | Expected Result | Comparison Method | Tolerance | Expected Result Source | -|-----------------|------------|-----------------|-------------------|-----------|----------------------| -| FT-P-01 | flight-sequence-60 (60 frames) | ≥80% of frames within 50m of ground truth | percentage | ≥80% | `expected_results/position_accuracy.csv` | -| FT-P-02 | flight-sequence-60 (60 frames) | ≥60% of frames within 20m of ground truth | percentage | ≥60% | `expected_results/position_accuracy.csv` | -| FT-P-03 | flight-sequence-60 (60 frames) | No single frame exceeds 100m error | threshold_max | ≤100m | `expected_results/position_accuracy.csv` | -| FT-P-04 | flight-sequence-60 (selected satellite anchor pairs) | VO drift between satellite anchors <100m | threshold_max | ≤100m | inline | -| FT-P-05 | Single frame + satellite match | GPS_INPUT: fix_type=3, horiz_accuracy 5-20m, satellites_visible=10 | exact + range | fix_type==3, accuracy∈[1,50] | `expected_results/results_report.md` #5 | -| FT-P-06 | flight-sequence-60 | ≥57 of 60 frames registered (≥95%) | percentage | ≥95% | inline | -| FT-P-07 | Normal operation, satellite match <30s | Confidence tier: HIGH | exact | N/A | inline | -| FT-P-08 | VO tracking, no satellite >30s | Confidence tier: MEDIUM | exact | N/A | inline | -| FT-P-09 | GPS_INPUT stream | Messages at 5-10Hz | range | [5,10] Hz | inline | -| FT-P-10 | POST /objects/locate (known pixel, gimbal, zoom, UAV position) | lat/lon within accuracy_m of ground truth | numeric_tolerance | within accuracy_m | `expected_results/results_report.md` #27 | -| FT-P-11 | Known GPS → NED → pixel → GPS | Round-trip error <0.1m | threshold_max | ≤0.1m | inline | -| FT-P-12 | System boot + GLOBAL_POSITION_INT | GPS_INPUT output within 60s | threshold_max | ≤60s | inline | -| FT-N-01 | Frames 32-43 (direction change area) | System continues producing estimates | threshold_min | ≥1 output per frame | inline | -| FT-N-02 | Simulated 350m gap between frames | System handles outlier, next valid frame <100m error | threshold_max | ≤100m | inline | -| FT-N-03 | POST /objects/locate with invalid pixels | HTTP 422 | exact | status==422 | inline | -| FT-N-04 | Unauthenticated request to /sessions | HTTP 401 | exact | status==401 | inline | -| FT-N-05 | VO lost + 3 satellite failures | fix_type=0, horiz_accuracy=999.0, RELOC_REQ sent | exact + regex | fix_type==0, matches `RELOC_REQ:.*` | inline | -| FT-N-06 | VO lost + IMU-only | fix_type=2, horiz_accuracy≥50m growing | exact + threshold_min | fix_type==2, accuracy≥50 | inline | - -## External Dependency Mocks - -| External Service | Mock/Stub | How Provided | Behavior | -|-----------------|-----------|-------------|----------| -| Flight Controller (ArduPilot) | ArduPilot SITL | Docker service (ardupilot-sitl) | Full MAVLink protocol: heartbeat, GLOBAL_POSITION_INT, IMU data at 200Hz, accepts GPS_INPUT, responds to COMMAND_LONG | -| Camera hardware (ADTI 20L V1) | Frame replay server | Docker service (camera-replay) | Serves frames from input_data/ at configurable rate (0.7fps default); supports fault injection (frame drop, delayed frame, corrupted JPEG) | -| Satellite imagery (Google Maps) | Static tile server | Docker service (satellite-tile-server) | Serves pre-cached tiles via HTTP; supports fault injection (404 for missing tiles, slow response) | -| Ground station | MAVLink inspector | Docker service (mavlink-inspector) | Captures STATUSTEXT and NAMED_VALUE_FLOAT; can inject COMMAND_LONG (operator re-localization hint) | -| GPU (CUDA/TensorRT) | CPU fallback or Jetson hardware | Conditional | Docker: CPU-mode stubs for TRT inference (slower but functionally equivalent). Jetson: real GPU | - -## Data Validation Rules - -| Data Type | Validation | Invalid Examples | Expected System Behavior | -|-----------|-----------|-----------------|------------------------| -| Camera frame (JPEG) | Valid JPEG, resolution ≥ 1280x720 | Truncated JPEG, 0-byte file, BMP format | Log warning, skip frame, continue with IMU-only ESKF prediction | -| GPS coordinate | lat ∈ [-90, 90], lon ∈ [-180, 180] | lat=999, lon=NaN | Reject, use last valid position | -| IMU data | Acceleration ∈ [-160, 160] m/s², gyro ∈ [-35, 35] rad/s | All zeros, NaN values, extreme spikes | Filter outliers via ESKF process noise, log warning | -| Satellite tile | Valid JPEG/PNG, 256x256 px | Missing tile (404), corrupted image | Skip tile, expand search radius, fall back to wider area | -| API request body | Valid JSON, required fields present | Missing pixel_x, non-numeric zoom_factor | HTTP 422 with validation error details | -| JWT token | Valid signature, not expired | Expired token, invalid signature, missing token | HTTP 401 Unauthorized | diff --git a/_docs/02_document/tests/traceability-matrix.md b/_docs/02_document/tests/traceability-matrix.md deleted file mode 100644 index fce03bc..0000000 --- a/_docs/02_document/tests/traceability-matrix.md +++ /dev/null @@ -1,69 +0,0 @@ -# Traceability Matrix - -## Acceptance Criteria Coverage - -| AC ID | Acceptance Criterion | Test IDs | Coverage | -|-------|---------------------|----------|----------| -| AC-01 | 80% of frames within 50m of real GPS | FT-P-01, FT-P-03 | Covered | -| AC-02 | 60% of frames within 20m of real GPS | FT-P-02 | Covered | -| AC-03 | Cumulative VO drift between satellite anchors < 100m | FT-P-04 | Covered | -| AC-04 | Confidence score per position estimate (high/low) | FT-P-07, FT-P-08, FT-N-06 | Covered | -| AC-05 | Image registration rate > 95% | FT-P-06 | Covered | -| AC-06 | System handles 350m outlier between consecutive photos | FT-N-02 | Covered | -| AC-07 | System handles sharp turns with <5% overlap, <200m drift, <70° angle via satellite re-localization | FT-N-01, NFT-RES-02 | Covered | -| AC-08 | System handles disconnected route segments (core feature) | FT-N-01, NFT-RES-02, NFT-RES-03 | Covered | -| AC-09 | 3 consecutive failures → re-localization request to ground station | FT-N-05, FT-N-07 | Covered | -| AC-10 | < 400ms end-to-end per frame | NFT-PERF-01 | Covered | -| AC-11 | Memory < 8GB shared | NFT-RES-LIM-01, NFT-RES-LIM-04 | Covered | -| AC-12 | GPS_INPUT via MAVLink (MAVSDK) to flight controller | FT-P-05, FT-P-09 | Covered | -| AC-13 | Frame-by-frame streaming, no batch/delay | FT-P-09, NFT-PERF-02 | Covered | -| AC-14 | System initializes from last known GPS position | FT-P-12 | Covered | -| AC-15 | Complete failure for N seconds → FC falls back to IMU, system logs | NFT-RES-03, FT-N-06 | Covered | -| AC-16 | Mid-flight reboot → re-initialize from FC's IMU-extrapolated position | NFT-RES-01 | Covered | -| AC-17 | Position + confidence streamed to ground station via telemetry | FT-P-13, FT-P-14 | Covered | -| AC-18 | Ground station can send commands (re-localization hint) | FT-N-07 | Covered | -| AC-19 | Output coordinates in WGS84 | FT-P-05, FT-P-11 | Covered | -| AC-20 | Other onboard AI can request object GPS coordinates | FT-P-10 | Covered | -| AC-21 | Object coordinates via trigonometric calculation (gimbal, zoom, altitude) | FT-P-10, FT-P-11 | Covered | -| AC-22 | Object accuracy consistent with frame-center accuracy | FT-P-10 | Covered | -| AC-23 | Satellite imagery ≥ 0.5 m/pixel | NFT-RES-LIM-03 | Covered (tile storage validation uses zoom 18 = 0.6m/px) | -| AC-24 | Satellite imagery pre-loaded before flight | NFT-RES-LIM-03 | Covered | -| AC-25 | MRE < 1.0 pixels | — | NOT COVERED — requires cuVSLAM internal reprojection metric; not observable via black-box interfaces. Covered at component test level (Step 5). | -| AC-26 | Satellite imagery < 2 years old | — | NOT COVERED — operational procurement constraint; not runtime-testable. Validated during offline tile preparation. | - -## Restrictions Coverage - -| Restriction ID | Restriction | Test IDs | Coverage | -|---------------|-------------|----------|----------| -| RESTRICT-01 | Fixed-wing UAV only | FT-P-01 through FT-P-06 (test data from fixed-wing flight) | Covered | -| RESTRICT-02 | Camera pointing downwards, not autostabilized | FT-P-10, FT-P-11 (coordinate transforms account for non-stabilized mount) | Covered | -| RESTRICT-03 | Eastern/southern Ukraine operational area | FT-P-01 (test coordinates at 48.25-48.28°N, 37.34-37.39°E) | Covered | -| RESTRICT-04 | Altitude ≤ 1km, terrain height negligible | FT-P-01, FT-P-10 (test data at 400m altitude, flat terrain assumed) | Covered | -| RESTRICT-05 | Mostly sunny weather | — | NOT COVERED — environmental condition; cannot be tested in Docker. Mitigated by feature-based matching robustness. | -| RESTRICT-06 | Sharp turns (no common points possible, exceptional) | FT-N-01, FT-N-02, NFT-RES-02 | Covered | -| RESTRICT-07 | Up to 3000 photos per flight | NFT-RES-LIM-04 | Covered | -| RESTRICT-08 | Two cameras: navigation (fixed nadir) + AI (configurable) | FT-P-10 (object localization uses AI camera angles) | Covered | -| RESTRICT-09 | Navigation camera: FullHD to 6252x4168, known parameters | FT-P-01 (test data at 6252x4168) | Covered | -| RESTRICT-10 | Jetson Orin Nano Super: 67 TOPS, 8GB, 25W TDP | NFT-RES-LIM-01, NFT-RES-LIM-02, NFT-PERF-01 | Covered | -| RESTRICT-11 | Thermal throttling at sustained GPU load | NFT-RES-LIM-02 | Covered | -| RESTRICT-12 | IMU data via flight controller (MAVLink) | FT-P-05, FT-P-12 (SITL provides IMU via MAVLink) | Covered | -| RESTRICT-13 | GPS_INPUT message to flight controller | FT-P-05, FT-P-09 | Covered | -| RESTRICT-14 | Telemetry link bandwidth-limited | FT-P-13 (1Hz telemetry rate is bandwidth-appropriate) | Covered | -| RESTRICT-15 | Google Maps satellite (potentially outdated) | NFT-RES-04 (tile server failure resilience) | Covered | -| RESTRICT-16 | Onboard storage limited for satellite tiles | NFT-RES-LIM-03 | Covered | - -## Coverage Summary - -| Category | Total Items | Covered | Not Covered | Coverage % | -|----------|-----------|---------|-------------|-----------| -| Acceptance Criteria | 26 | 24 | 2 | 92% | -| Restrictions | 16 | 15 | 1 | 94% | -| **Total** | **42** | **39** | **3** | **93%** | - -## Uncovered Items Analysis - -| Item | Reason Not Covered | Risk | Mitigation | -|------|-------------------|------|-----------| -| AC-25 (MRE < 1.0 pixels) | cuVSLAM reprojection error is an internal metric not exposed via public API or MAVLink; cannot be observed at black-box level | LOW — MRE is a proxy for VO quality; position accuracy tests (FT-P-01/02) validate the end result | Covered at component test level (Step 5) with cuVSLAM-specific unit tests | -| AC-26 (Satellite imagery < 2 years old) | Operational procurement constraint validated during offline tile preparation, not at runtime | LOW — outdated imagery degrades matching but is caught by satellite matching accuracy tests | Validated during pre-flight tile download workflow; documented in operational procedures | -| RESTRICT-05 (Sunny weather) | Environmental condition that cannot be reproduced in Docker or controlled test environment | LOW — system uses feature-based matching (LiteSAM/XFeat) robust to lighting variation; not a software test | Validated during real-world flight tests; feature-based matching provides robustness | diff --git a/_docs/_autopilot_state.md b/_docs/_autopilot_state.md deleted file mode 100644 index 6d357c8..0000000 --- a/_docs/_autopilot_state.md +++ /dev/null @@ -1,40 +0,0 @@ -# Autopilot State - -## Current Step -flow: greenfield -step: 3 -name: Plan -status: in_progress -sub_step: 1 — Blackbox Tests (test-spec skill), Phase 2 complete, Phase 3 next -retry_count: 0 - -## Completed Steps - -| Step | Name | Completed | Key Outcome | -|------|------|-----------|-------------| -| 1 | Problem | 2026-03-25 (detected) | Problem, restrictions, acceptance criteria, and input data defined | -| 2 | Research | 2026-03-25 (detected) | 8 research rounds, 6 solution drafts, tech_stack.md, security_analysis.md | - -## Key Decisions -- Research Decision: Proceed to planning with current draft (6 drafts, 8 rounds sufficient) -- GPS-denied visual navigation system for UAV onboard Jetson Orin Nano Super -- Core algorithms: cuVSLAM, LiteSAM/XFeat, ESKF sensor fusion -- TRT engine migration for AI inference -- Latest draft (06) at TRL ~2.5-3; hardware/algorithms well-researched, system integration under-specified -- solution_draft06.md renamed to solution.md as finalized solution -- Test environment: dual-mode (Docker SITL for CI + Jetson hardware for nightly/pre-deploy) -- IMU data: ArduPilot SITL flies waypoint mission from coordinates.csv, generates 200Hz IMU via MAVLink -- Test coverage: 43 scenarios, 93% AC+restriction coverage (39/42) - -## Last Session -date: 2026-03-25 -ended_at: Step 3 Plan — SubStep 1 Blackbox Tests (test-spec Phase 2 complete) -reason: user paused — context break before Phase 3 -notes: Test-spec Phase 1 (input data analysis) and Phase 2 (test scenario specification) complete. 8 artifacts written to _docs/02_document/tests/. Expected results created at _docs/00_problem/input_data/expected_results/. Phase 3 (test data validation gate) is next — scan all test scenarios, verify each has concrete test data + quantifiable expected result, ask user to provide missing items or remove tests, final coverage check ≥70%. Then Phase 4 (test runner scripts). - -## Retry Log -| Attempt | Step | Name | SubStep | Failure Reason | Timestamp | -|---------|------|------|---------|----------------|-----------| - -## Blockers -- none diff --git a/_standalone/UAV_camera_comparison/00_research/00_question_decomposition.md b/_standalone/UAV_camera_comparison/00_research/00_question_decomposition.md deleted file mode 100644 index 88fed32..0000000 --- a/_standalone/UAV_camera_comparison/00_research/00_question_decomposition.md +++ /dev/null @@ -1,68 +0,0 @@ -# Question Decomposition - -## Original Question -Compare ViewPro Z40K and USG-231 cameras: analyze video feed quality (especially from Shark M UAV), wobble effect, zoom capabilities, image crispness, and overall quality during zoom. - -## Active Mode -Mode A Phase 2 — Initial Research (no prior solution drafts exist) - -## Question Type -**Concept Comparison** — comparing two specific camera products across defined quality dimensions - -## Research Subject Boundary -- **Population**: UAV gimbal cameras in the 500-600g class for fixed-wing reconnaissance -- **Geography**: Global (ViewPro is Chinese, USG is Ukrainian) -- **Timeframe**: Current products as of 2025-2026 -- **Level**: Product-grade ISR camera systems - -## Problem Context -User is building a reconnaissance UAV and evaluating camera payloads. The Shark M UAV (by Ukrspecsystems) uses the USG-231 as its standard payload. The ViewPro Z40K is a competing 3rd-party gimbal camera. - -## Decomposed Sub-Questions - -### SQ1: What are the core optical/sensor specifications of each camera? -- "ViewPro Z40K sensor specifications resolution zoom" -- "USG-231 sensor type resolution megapixel" -- "ViewPro Z40K Panasonic CMOS module identification" -- "USG-231 Sony FCB block camera module 30x zoom" -- "1/2.3 inch vs 1/2.8 inch CMOS sensor quality drone" - -### SQ2: How do the stabilization systems compare? -- "3-axis gimbal vs 2-axis gimbal drone camera wobble" -- "digital stabilization vs optical image stabilization OIS drone" -- "2-axis gimbal yaw problem fixed wing drone" -- "ViewPro Z40K 5-axis OIS stabilization performance" -- "USG-231 digital video stabilization quality" - -### SQ3: What is the zoom quality and behavior at high magnification? -- "20x optical zoom 4K vs 30x optical zoom Full HD surveillance" -- "intelligent zoom iA zoom quality degradation crop" -- "30x zoom drone camera atmospheric distortion max zoom" -- "ViewPro Z40K zoom test footage sharpness" - -### SQ4: What does the Shark M video feed actually look like? -- "Shark M UAV video footage quality zoom" -- "Shark UAV combat footage camera quality analysis" -- "USG-231 reconnaissance footage stabilization" - -### SQ5: How does wobble manifest on each camera? -- "3-axis gimbal wobble reduction vs 2-axis jello effect" -- "ViewPro gimbal vibration jello problem" -- "USG-231 wobble fixed wing drone flight" - -## Chosen Perspectives -1. **End-user/Operator**: What does the feed look like during missions? Usability of zoom, clarity of targets -2. **Integrator/Engineer**: Gimbal architecture, stabilization mechanism, weight, integration complexity -3. **Domain Expert (ISR)**: Effective observation range, target identification capability, zoom vs resolution trade-off -4. **Contrarian**: Where does each camera fail? What are the hidden weaknesses? - -## Timeliness Sensitivity Assessment -- **Research Topic**: UAV gimbal camera comparison (ViewPro Z40K vs USG-231) -- **Sensitivity Level**: Medium -- **Rationale**: Hardware products with stable specs; not rapidly changing like AI/software -- **Source Time Window**: 1-2 years -- **Priority official sources**: - 1. ViewPro official product pages - 2. Ukrspecsystems official product pages - 3. Sony FCB block camera datasheets - 4. Defense Express field reports diff --git a/_standalone/UAV_camera_comparison/00_research/01_source_registry.md b/_standalone/UAV_camera_comparison/00_research/01_source_registry.md deleted file mode 100644 index e976a0a..0000000 --- a/_standalone/UAV_camera_comparison/00_research/01_source_registry.md +++ /dev/null @@ -1,133 +0,0 @@ -# Source Registry - -## Source #1 -- **URL**: https://rcdrone.top/products/viewpro-z40k-4k-gimbal-camera -- **Tier**: L2 (authorized reseller with full spec sheet) -- **Summary**: Complete ViewPro Z40K specifications including sensor, zoom, gimbal, OIS details -- **Date Accessed**: 2026-03-21 - -## Source #2 -- **URL**: https://www.viewprouav.com/product/z40k-single-4k-hd-25-times-zoom-gimbal-camera -- **Tier**: L1 (manufacturer official) -- **Summary**: ViewPro official product page with specifications -- **Date Accessed**: 2026-03-21 - -## Source #3 -- **URL**: https://ukrspecsystems.com/drone-gimbals/usg-231 -- **Tier**: L1 (manufacturer official) -- **Summary**: USG-231 official specifications, features, integration details -- **Date Accessed**: 2026-03-21 - -## Source #4 -- **URL**: https://ukrspecsystems.com/drones/shark-m-uas -- **Tier**: L1 (manufacturer official) -- **Summary**: Shark M UAS full specifications, camera options, system details -- **Date Accessed**: 2026-03-21 - -## Source #5 -- **URL**: https://dronexpert.nl/en/viewpro-z40k-20x-optical-zoom-4k-camera-up-to-40x-zoom/ -- **Tier**: L2 (authorized dealer) -- **Summary**: Z40K detailed specs including effective pixel counts per resolution mode -- **Date Accessed**: 2026-03-21 - -## Source #6 -- **URL**: https://www.aeroexpo.online/prod/ukrspecsystems/product-185884-82835.html -- **Tier**: L2 (trade platform with manufacturer data) -- **Summary**: USG-231 specifications and integration details -- **Date Accessed**: 2026-03-21 - -## Source #7 -- **URL**: https://en.defence-ua.com/weapon_and_tech/how_the_newest_ukrainian_shark_uav_works_over_donetsk_and_why_its_really_cool_video-5438.html -- **Tier**: L3 (defense media analysis) -- **Summary**: Shark UAV combat footage analysis, camera quality observations, auto-tracking assessment -- **Date Accessed**: 2026-03-21 - -## Source #8 -- **URL**: https://www.cameraguidepro.com/what-is-the-difference-between-a-2-axis-and-3-axis-gimbal/ -- **Tier**: L3 (tech media) -- **Summary**: 2-axis vs 3-axis gimbal comparison, wobble/jello effect analysis -- **Date Accessed**: 2026-03-21 - -## Source #9 -- **URL**: https://www.makeuseof.com/two-axis-vs-three-axis-gimbals/ -- **Tier**: L3 (tech media) -- **Summary**: Detailed 2-axis vs 3-axis trade-offs including weight, power, cost -- **Date Accessed**: 2026-03-21 - -## Source #10 -- **URL**: https://droneflyingpro.com/2-axis-vs-3-axis-gimbal/ -- **Tier**: L3 (drone specialist media) -- **Summary**: 2-axis vs 3-axis on drones with diagrams, jello effect explanation -- **Date Accessed**: 2026-03-21 - -## Source #11 -- **URL**: https://www.steadxp.com/digital-vs-optical-stabilization-a-comparison-guide/ -- **Tier**: L3 (stabilization specialist) -- **Summary**: EIS vs OIS comparison, quality impact, artifact analysis -- **Date Accessed**: 2026-03-21 - -## Source #12 -- **URL**: https://www.guidingtech.com/eis-vs-ois-stabilization/ -- **Tier**: L3 (tech media) -- **Summary**: Digital vs optical stabilization advantages and limitations -- **Date Accessed**: 2026-03-21 - -## Source #13 -- **URL**: https://www.dronetrest.com/t/whats-the-best-choice-for-the-fixed-wing-3-axis-gimbal-or-2-axis-gimbal/8091 -- **Tier**: L4 (community forum) -- **Summary**: Fixed-wing drone gimbal selection discussion, practitioner perspectives -- **Date Accessed**: 2026-03-21 - -## Source #14 -- **URL**: https://phantompilots.com/threads/yaw-issue-with-2-axis-gimbals.6854 -- **Tier**: L4 (community forum) -- **Summary**: Real user reports of yaw wobble issues with 2-axis gimbals -- **Date Accessed**: 2026-03-21 - -## Source #15 -- **URL**: https://www.manualslib.com/manual/2385515/Viewpro-Z40k.html -- **Tier**: L1 (manufacturer manual) -- **Summary**: ViewPro Z40K user manual with detailed technical specifications -- **Date Accessed**: 2026-03-21 - -## Source #16 -- **URL**: https://www.viewprotech.com/index.php?ac=article&at=read&did=202 -- **Tier**: L1 (ViewPro official tech page) -- **Summary**: Z40K DJI PSDK series technical details, stabilization specs -- **Date Accessed**: 2026-03-21 - -## Source #17 -- **URL**: https://pro.sony/ue_US/products/zoom-camera-blocks/fcb-ev9500l -- **Tier**: L1 (Sony official) -- **Summary**: Sony FCB-EV9500L block camera specs — likely module inside USG-231 -- **Date Accessed**: 2026-03-21 - -## Source #18 -- **URL**: https://block-cameras.com/products/sony-fcb-ev9520l-30x-zoom-full-hd-block-camera-sensor-starvis-gen2 -- **Tier**: L2 (distributor) -- **Summary**: Sony FCB-EV9520L STARVIS 2 block camera specs -- **Date Accessed**: 2026-03-21 - -## Source #19 -- **URL**: https://medium.com/@daily_drones/hands-on-with-the-dji-zenmuse-z30-53ab50fe628c -- **Tier**: L3 (tech reviewer) -- **Summary**: DJI Zenmuse Z30 hands-on review (same class as USG-231 sensor) -- **Date Accessed**: 2026-03-21 - -## Source #20 -- **URL**: https://www.oreateai.com/blog/beyond-the-numbers-what-123-vs-113-inch-sensor-size-really-means-for-your-photos/ -- **Tier**: L3 (tech blog) -- **Summary**: Sensor size comparison impact on image quality -- **Date Accessed**: 2026-03-21 - -## Source #21 -- **URL**: https://en.wikipedia.org/wiki/Ukrspecsystems_Shark -- **Tier**: L3 (Wikipedia) -- **Summary**: Shark UAV family specifications and history -- **Date Accessed**: 2026-03-21 - -## Source #22 -- **URL**: https://en.defence-ua.com/weapon_and_tech/ukrainian_drone_maker_demonstrates_its_new_shark_uav_target_tracking_capabilities_video-4803.html -- **Tier**: L3 (defense media) -- **Summary**: Shark UAV target tracking demo and zoom capabilities -- **Date Accessed**: 2026-03-21 diff --git a/_standalone/UAV_camera_comparison/00_research/02_fact_cards.md b/_standalone/UAV_camera_comparison/00_research/02_fact_cards.md deleted file mode 100644 index 61cb7ad..0000000 --- a/_standalone/UAV_camera_comparison/00_research/02_fact_cards.md +++ /dev/null @@ -1,151 +0,0 @@ -# Fact Cards - -## Fact #1 -- **Statement**: ViewPro Z40K uses a Panasonic 1/2.3" CMOS sensor with 25.9MP total pixels -- **Source**: Source #1, #2 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #2 -- **Statement**: ViewPro Z40K records 4K (3840×2160) at 25/30fps with 8.29MP effective recording pixels; FHD (1080P) at 50/60fps with 6.10MP effective recording pixels -- **Source**: Source #1, #5 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #3 -- **Statement**: ViewPro Z40K provides 20x optical zoom; 25x iA (intelligent) zoom in 4K mode; 40x iA zoom in FHD mode. iA zoom beyond 20x is a digital crop, not true optical. -- **Source**: Source #1, #2, #5 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #4 -- **Statement**: ViewPro Z40K has 3-axis gimbal with ±0.02° vibration angle accuracy on pitch/roll, ±0.03° on yaw, plus 5-axis Optical Image Stabilization (OIS) -- **Source**: Source #1, #5, #16 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #5 -- **Statement**: ViewPro Z40K lens is F1.8 (wide) to F3.6 (tele); horizontal FOV 62.95° (wide) to 3.45° (tele) -- **Source**: Source #1, #2 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #6 -- **Statement**: ViewPro Z40K weighs 595g, operates -20°C to +60°C, CNC aluminum housing -- **Source**: Source #1, #2 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #7 -- **Statement**: ViewPro Z40K has 65dB dynamic range, 38dB S/N ratio, minimum illumination 0.05 lux at F1.6 -- **Source**: Source #1 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #8 -- **Statement**: USG-231 is a 2-axis gyro-stabilized gimbal with Full HD (1920×1080) day-view camera, 30x optical zoom, 3x digital zoom -- **Source**: Source #3, #6 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #9 -- **Statement**: USG-231 uses digital video stabilization (EIS), not optical image stabilization -- **Source**: Source #3, #4 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #10 -- **Statement**: USG-231 uses a CMOS sensor with 63.7° view angle; camera weighs 590g; video processing block weighs 250g (840g total system) -- **Source**: Source #3, #6 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #11 -- **Statement**: USG-231 likely uses a Sony FCB-series block camera module (specs match FCB-EV9500L: 30x zoom, Full HD, 63.7° FOV, 1/2.8" or 1/1.8" STARVIS CMOS) -- **Source**: Source #17, #18 (Sony specs matching USG-231 specs from Source #3) -- **Phase**: Phase 2 -- **Confidence**: ⚠️ Medium (not officially confirmed by Ukrspecsystems, but spec match is very close) - -## Fact #12 -- **Statement**: 2-axis gimbals stabilize pitch and roll only; yaw movement is NOT compensated. This causes visible horizontal jitter/wobble ("jello effect") during turns and wind gusts on fixed-wing drones. -- **Source**: Source #8, #9, #10, #14 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #13 -- **Statement**: 3-axis gimbals add yaw stabilization, which greatly reduces or eliminates horizontal jello effect. Industry standard for professional drone videography. -- **Source**: Source #8, #9, #10 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #14 -- **Statement**: Digital stabilization (EIS) works by cropping the frame and algorithmically shifting pixels. It reduces effective resolution, can introduce warping artifacts, and struggles with fast vibrations. -- **Source**: Source #11, #12 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #15 -- **Statement**: Optical Image Stabilization (OIS) physically moves lens elements to compensate for movement. No resolution loss, no cropping, no warping artifacts. Superior for small/fast vibrations. -- **Source**: Source #11, #12 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #16 -- **Statement**: The Shark M UAV uses USG-231 as its standard EO camera. The camera was used in combat over Donetsk and Defense Express noted "the quality of the camera itself, which allows to receive detailed images online and determine the coordinates of targets." -- **Source**: Source #7, #4 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #17 -- **Statement**: The Shark UAV demonstrated auto-tracking from 800m distance with quality footage. The system tracks both contrasting and complex objects. -- **Source**: Source #7, #22 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #18 -- **Statement**: At 30x optical zoom, atmospheric distortion (heat haze, mirage) becomes visible in drone footage, creating slight jitteriness. This is a physics limitation affecting all cameras equally. -- **Source**: Source #19 (Z30 review showing same effect) -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #19 -- **Statement**: The DJI Zenmuse Z30 (comparable 30x zoom, 1/2.8" sensor, 2.13MP) demonstrates that at max optical zoom, even with excellent stabilization, image quality is sufficient for inspection but shows "slight loss of quality" with digital zoom. -- **Source**: Source #19 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #20 -- **Statement**: ViewPro Z40K price ranges from $2,999-$4,879 depending on variant and retailer. USG-231 price is not public but marketed as "cost-effective and affordable." -- **Source**: Source #1, #2, #3 -- **Phase**: Phase 2 -- **Confidence**: ✅ High (Z40K) / ⚠️ Medium (USG-231 — no public pricing) - -## Fact #21 -- **Statement**: The 1/2.3" sensor (Z40K) is physically larger than the 1/2.8" sensor (likely in USG-231). Larger sensors capture more light, have better low-light performance, wider dynamic range, and less noise. -- **Source**: Source #20 -- **Phase**: Phase 2 -- **Confidence**: ✅ High (sensor size comparison); ⚠️ Medium (USG-231 sensor size assumption) - -## Fact #22 -- **Statement**: USG-231 features anti-fog, weather sealing, IR filter, automatic focus control, onboard recording, IP streaming, and Pixhawk/Ardupilot compatibility (plug-and-play). -- **Source**: Source #3, #6 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #23 -- **Statement**: ViewPro Z40K gimbal mechanical range: Pitch ±120°, Roll ±70°, Yaw ±300°. Supports PWM, TTL, SBUS, UDP control. Has geotagging and object tracking. -- **Source**: Source #1, #2 -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #24 -- **Statement**: Sony FCB-EV9500L (if used in USG-231) has Super Image Stabilizer built into the camera module itself, separate from the gimbal stabilization. It features STARVIS sensor with excellent low-light (0.00008 lux min illumination). -- **Source**: Source #17, #18 -- **Phase**: Phase 2 -- **Confidence**: ⚠️ Medium (conditional on USG-231 actually using this module) - -## Fact #25 -- **Statement**: For the Shark M, video and telemetry are transmitted encrypted in Full HD quality over ranges up to 180 km using Silvus Technologies StreamCaster radio. -- **Source**: Source #4 -- **Phase**: Phase 2 -- **Confidence**: ✅ High diff --git a/_standalone/UAV_camera_comparison/00_research/03_comparison_framework.md b/_standalone/UAV_camera_comparison/00_research/03_comparison_framework.md deleted file mode 100644 index e9a6cfc..0000000 --- a/_standalone/UAV_camera_comparison/00_research/03_comparison_framework.md +++ /dev/null @@ -1,35 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Concept Comparison + Decision Support - -## Selected Dimensions - -1. Video Resolution & Sensor Quality -2. Optical Zoom Range & Quality -3. Zoom Quality During Digital/Extended Zoom -4. Gimbal Stabilization Architecture -5. Wobble / Jello Effect -6. Image Crispness at High Zoom -7. Low-Light Performance -8. Weight & Integration -9. Field-Proven Track Record -10. Cost - -## Comparison Table - -| Dimension | ViewPro Z40K | USG-231 | Factual Basis | -|-----------|-------------|---------|---------------| -| **Video Resolution** | 4K (3840×2160) @ 25/30fps; 8.29MP effective | Full HD (1920×1080); ~2MP effective | Fact #1, #2, #8 | -| **Sensor** | Panasonic 1/2.3" CMOS, 25.9MP total | Sony CMOS (likely 1/2.8" or 1/1.8" STARVIS), ~2MP | Fact #1, #11, #21 | -| **Optical Zoom** | 20x (FOV 62.95°→3.45°) | 30x (FOV 63.7°→~2.1°) | Fact #3, #8 | -| **Extended Zoom** | 25x iA (4K), 40x iA (FHD) — digital crop | 3x digital (90x total) — digital crop | Fact #3, #8 | -| **Gimbal Type** | 3-axis, ±0.02° accuracy | 2-axis, accuracy not published | Fact #4, #8, #12, #13 | -| **OIS** | 5-axis Optical Image Stabilization | None (digital EIS only) | Fact #4, #9, #14, #15 | -| **Wobble/Jello** | Minimal — yaw compensated + OIS | Susceptible — no yaw compensation, EIS can warp | Fact #12, #13, #14 | -| **Image Crispness at Max Optical Zoom** | 4K at 20x = 8.29MP of detail at 3.45° FOV | FHD at 30x = ~2MP of detail at ~2.1° FOV | Fact #2, #8, #19 | -| **Low-Light** | 0.05 lux @ F1.6, 65dB DR | If Sony STARVIS: 0.00008 lux, excellent | Fact #7, #24 | -| **Weight** | 595g (all-in-one) | 590g camera + 250g VPB = 840g total | Fact #6, #10 | -| **Autopilot Integration** | PWM/TTL/SBUS/UDP (needs custom integration) | Pixhawk/Ardupilot plug-and-play | Fact #22, #23 | -| **Combat/Field Proven** | No public combat deployment data | Proven on Shark UAV in Ukraine combat | Fact #16, #17 | -| **Price** | $2,999–$4,879 | Not public ("cost-effective") | Fact #20 | diff --git a/_standalone/UAV_camera_comparison/00_research/04_reasoning_chain.md b/_standalone/UAV_camera_comparison/00_research/04_reasoning_chain.md deleted file mode 100644 index 8ac632a..0000000 --- a/_standalone/UAV_camera_comparison/00_research/04_reasoning_chain.md +++ /dev/null @@ -1,133 +0,0 @@ -# Reasoning Chain - -## Dimension 1: Video Resolution & Sensor Quality - -### Fact Confirmation -The Z40K uses a Panasonic 1/2.3" CMOS with 25.9MP total, recording 4K (8.29MP effective) video. (Fact #1, #2) -The USG-231 records Full HD (1920×1080, ~2MP effective) from a CMOS sensor. (Fact #8) - -### Reference Comparison -4K contains 4× the pixel count of Full HD (8.3M vs 2.1M pixels). A 1/2.3" sensor is physically ~30% larger in area than a 1/2.8" sensor (if that is what USG-231 uses). Larger sensor = more light per pixel, better dynamic range, less noise. (Fact #21) - -### Conclusion -The Z40K delivers dramatically higher resolution. At any given zoom level where both cameras share coverage, the Z40K captures ~4× more detail. This translates directly to better target identification, better image crispness, and more usable footage for post-mission analysis. - -### Confidence: ✅ High - ---- - -## Dimension 2: Optical Zoom Range - -### Fact Confirmation -Z40K: 20x optical zoom, narrowing FOV to 3.45°. (Fact #3, #5) -USG-231: 30x optical zoom, narrowing FOV to approximately 2.1°. (Fact #8) - -### Reference Comparison -The USG-231 reaches 50% more optical magnification. In pure optical zoom terms, the USG-231 can bring distant targets closer without digital quality loss. At 30x, you see objects at roughly 1.5× closer than Z40K's 20x maximum. - -### Conclusion -USG-231 wins on raw optical zoom reach (30x vs 20x). For long-range surveillance where maximum optical magnification matters and you cannot fly closer, the USG-231 has an advantage. - -### Confidence: ✅ High - ---- - -## Dimension 3: Effective Detail at Maximum Zoom (Resolution × Zoom Trade-off) - -### Fact Confirmation -Z40K at 20x optical zoom captures 3840×2160 pixels (8.29MP) across a 3.45° horizontal FOV. (Fact #2, #3) -USG-231 at 30x optical zoom captures 1920×1080 pixels (~2MP) across a ~2.1° horizontal FOV. (Fact #8) - -### Reference Comparison -Effective detail = pixels per degree of FOV. -- Z40K: 3840 pixels / 3.45° ≈ 1,113 pixels per degree (at 20x, 4K) -- USG-231: 1920 pixels / 2.1° ≈ 914 pixels per degree (at 30x, FHD) - -Even though the USG-231 zooms 50% further optically, the Z40K still delivers ~22% more pixels per degree of angular coverage due to its 4K sensor. The Z40K's pixel density advantage persists even when the USG-231 is at full optical zoom. - -### Conclusion -The Z40K produces sharper images at max optical zoom despite zooming less far, because its 4K resolution compensates for the zoom difference and then some. For target identification, the Z40K's 4K at 20x is effectively crisper than USG-231's FHD at 30x. - -### Confidence: ✅ High - ---- - -## Dimension 4: Stabilization — Wobble Effect - -### Fact Confirmation -Z40K: 3-axis gimbal (pitch + roll + yaw) with ±0.02° accuracy, plus 5-axis OIS in the lens. (Fact #4) -USG-231: 2-axis gimbal (pitch + roll only) with digital EIS. (Fact #8, #9) - -### Reference Comparison -2-axis gimbals leave yaw rotation uncompensated. On fixed-wing drones, wind gusts and turns cause yaw movements that create visible horizontal wobble/jello in footage. (Fact #12) Digital EIS attempts to correct this by cropping and shifting the frame, but this: (a) reduces effective resolution, (b) can introduce warping artifacts, (c) fails with fast vibrations. (Fact #14) - -3-axis gimbals mechanically compensate yaw, eliminating the primary source of wobble. Combined with OIS, even small high-frequency vibrations from the airframe are absorbed without resolution loss. (Fact #13, #15) - -### Conclusion -The Z40K has categorically superior stabilization. The 3-axis gimbal + 5-axis OIS architecture eliminates wobble at its physical source. The USG-231's 2-axis + EIS approach is fundamentally limited — uncompensated yaw will produce visible wobble on fixed-wing drones, especially during turns and in wind. The wobble becomes more pronounced at higher zoom levels because angular errors are magnified. - -### Confidence: ✅ High - ---- - -## Dimension 5: Image Crispness During Zoom - -### Fact Confirmation -Z40K: Uses OIS (no resolution loss during stabilization), 4K base resolution. At 25x iA zoom (4K mode), quality begins to degrade due to digital crop but remains at ~4K equivalent through sensor oversampling. (Fact #3, #15) -USG-231: Uses EIS (crops frame, reducing effective resolution from already-FHD). The effective resolution while EIS is active is less than 1920×1080. At 30x optical + EIS crop, the actual visible pixels are reduced. (Fact #14, #8) - -### Reference Comparison -The DJI Zenmuse Z30 (similar sensor to USG-231) shows "sufficient sharpness for inspection work" at 30x but "slight loss of quality" when digital zoom engages. (Fact #19) At maximum zoom, atmospheric distortion becomes the limiting factor for both cameras. (Fact #18) - -### Conclusion -The Z40K maintains significantly crisper images during zoom due to: (1) 4K base resolution, (2) OIS not consuming resolution, (3) higher pixel density even before zoom. The USG-231's crispness degrades more noticeably because EIS crops from an already-lower resolution. However, the USG-231's optical glass reaches further, which partially compensates in scenarios where distance is the primary constraint. - -### Confidence: ✅ High - ---- - -## Dimension 6: Low-Light Performance - -### Fact Confirmation -Z40K: 0.05 lux minimum illumination at F1.6, 65dB dynamic range. (Fact #7) -USG-231: If using Sony STARVIS sensor, minimum illumination could be as low as 0.00008 lux. (Fact #24) - -### Reference Comparison -Sony STARVIS sensors are specifically designed for surveillance with exceptional low-light performance. The USG-231's minimum illumination (if STARVIS) would be ~625× better than the Z40K's. - -### Conclusion -The USG-231 likely has significantly better low-light performance if it uses a Sony STARVIS module. This matters for dawn/dusk and night reconnaissance. The Z40K is adequate in daylight and moderate low-light but is not in the same class for near-dark conditions. - -### Confidence: ⚠️ Medium (USG-231 sensor identification not confirmed) - ---- - -## Dimension 7: Weight & Integration - -### Fact Confirmation -Z40K: 595g all-in-one, needs custom integration (PWM/TTL/SBUS/UDP). (Fact #6, #23) -USG-231: 840g total (590g camera + 250g VPB), plug-and-play with Pixhawk/Ardupilot. (Fact #10, #22) - -### Reference Comparison -The Z40K is 245g lighter as a total system. For a fixed-wing UAV at 10-15kg MTOW, 245g is ~2% of total weight — meaningful for flight endurance. However, the USG-231's plug-and-play Pixhawk integration is a significant engineering advantage if the airframe uses that autopilot. - -### Conclusion -Z40K wins on weight (595g vs 840g) but loses on integration simplicity if the platform uses Pixhawk/Ardupilot. For custom builds, the Z40K requires more integration work but saves weight. The USG-231 is purpose-built for the Shark ecosystem. - -### Confidence: ✅ High - ---- - -## Dimension 8: Field-Proven Track Record - -### Fact Confirmation -USG-231 has extensive combat deployment on Shark UAVs in Ukraine. Defense Express noted good image quality and effective auto-tracking. (Fact #16, #17) -Z40K has no publicly documented combat deployment. - -### Reference Comparison -Combat-proven systems have demonstrated reliability under vibration, temperature extremes, EW interference, and time pressure. The USG-231 has survived this test. The Z40K has not been publicly evaluated under equivalent conditions. - -### Conclusion -USG-231 has a significant advantage in proven reliability and real-world validation. The Z40K is untested in comparable conditions. However, this speaks to platform reliability, not inherently to video quality. - -### Confidence: ✅ High diff --git a/_standalone/UAV_camera_comparison/00_research/05_validation_log.md b/_standalone/UAV_camera_comparison/00_research/05_validation_log.md deleted file mode 100644 index 8d0f19e..0000000 --- a/_standalone/UAV_camera_comparison/00_research/05_validation_log.md +++ /dev/null @@ -1,42 +0,0 @@ -# Validation Log - -## Validation Scenario -A fixed-wing reconnaissance UAV flies at 75 km/h cruising speed at 1,000m altitude. The operator needs to identify a vehicle type at 3 km slant range, then zoom in to read markings at 1.5 km slant range. Wind is 8 m/s with gusts. The UAV performs orbital surveillance (constant turns). - -## Expected Based on Conclusions - -### If using ViewPro Z40K: -- At 3 km: 20x optical zoom narrows FOV to 3.45°. 4K resolution provides 8.29MP of detail. Vehicle type identification is straightforward. -- At 1.5 km with 25x iA zoom (4K): Sufficient resolution to distinguish markings. Image remains crisp. -- During turns: 3-axis gimbal compensates yaw. Operator sees smooth, stable image. OIS absorbs airframe vibration. -- In gusts: 5-axis OIS + gimbal maintains stable frame. No visible wobble at zoom. -- Transmission: 4K may need to be downscaled to FHD for transmission bandwidth. Recording is 4K on SD card. - -### If using USG-231 (on Shark M): -- At 3 km: 30x optical zoom narrows FOV to ~2.1°. But FHD resolution means only ~2MP of detail. Vehicle type identification is possible but with less margin. -- At 1.5 km at 30x: Target fills more of the frame due to higher zoom, but fewer pixels per target compared to Z40K at 20x. -- During turns: 2-axis gimbal does NOT compensate yaw. During orbital surveillance (constant heading change), the image will exhibit horizontal wobble/drift. EIS will attempt correction but consumes resolution and may introduce warping. -- In gusts: EIS handles moderate vibration but produces artifacts under aggressive movement. The operator may see frame edges jumping or brief warping. -- Transmission: FHD native — no downscaling needed. Encrypted Full HD over 180 km via Silvus. - -## Actual Validation (Against Known Evidence) -Defense Express combat footage from Shark UAV (USG-231) over Donetsk confirms: "quality of the camera allows to receive detailed images online and determine the coordinates of targets." Auto-tracking from 800m demonstrated effectively. This suggests that for the Shark's primary mission profile (target coordinate determination at moderate ranges), the USG-231 is sufficient. However, no public footage shows high-zoom image quality during aggressive maneuvering, leaving the wobble question unresolved by direct evidence. - -No comparable field footage exists for the Z40K on a reconnaissance fixed-wing platform. - -## Counterexamples -1. The USG-231's 30x optical reach means it can observe targets at greater standoff distance without digital zoom degradation. If the mission requires maximum standoff (e.g., flying high above enemy AD), the extra optical reach matters more than resolution. -2. The Z40K's 4K recording may be overkill if the transmission link only supports FHD — the operator sees FHD anyway in real time, and 4K is only useful in post-mission review. -3. In electronic warfare environments, the USG-231 (integrated with Shark ecosystem) has proven EW resilience. The Z40K as a standalone payload has no such validation. - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions are actionable -- [ ] Note: USG-231 sensor identification (Sony FCB) is inferred, not confirmed — affects low-light conclusion confidence - -## Conclusions Requiring Caveat -- Low-light performance comparison depends on confirming the USG-231's actual sensor module -- Field reliability comparison is one-sided (USG-231 is combat-proven, Z40K is not) -- Real-world wobble comparison lacks direct video evidence from both cameras on the same platform diff --git a/_standalone/UAV_camera_comparison/01_solution/solution_draft01.md b/_standalone/UAV_camera_comparison/01_solution/solution_draft01.md deleted file mode 100644 index c818446..0000000 --- a/_standalone/UAV_camera_comparison/01_solution/solution_draft01.md +++ /dev/null @@ -1,196 +0,0 @@ -# Solution Draft: ViewPro Z40K vs USG-231 Camera Comparison - -## Product Solution Description - -Comparative analysis of two UAV gimbal cameras — ViewPro Z40K (Chinese, 4K, 3-axis) and USG-231 (Ukrainian/Ukrspecsystems, FHD, 2-axis) — for fixed-wing reconnaissance applications. The USG-231 is the standard payload on the Shark M UAV. The comparison focuses on video feed quality, wobble/jello effect, zoom performance, image crispness during zoom, and overall quality. - -## Head-to-Head Specification Table - - -| Parameter | ViewPro Z40K | USG-231 | -| --------------------- | ---------------------------------- | ------------------------------------------- | -| **Sensor** | Panasonic 1/2.3" CMOS, 25.9MP | Sony CMOS (likely 1/2.8" STARVIS), ~2MP FHD | -| **Video Resolution** | 4K (3840×2160) @ 25/30fps | Full HD (1920×1080) | -| **Photo Resolution** | 25.9MP (6784×3816) | N/A | -| **Optical Zoom** | 20x | 30x | -| **Extended Zoom** | 25x iA (4K) / 40x iA (FHD) | 3x digital (total 90x) | -| **FOV (wide → tele)** | 62.95° → 3.45° | 63.7° → ~2.1° | -| **Gimbal** | 3-axis | 2-axis | -| **Gimbal Accuracy** | ±0.02° pitch/roll, ±0.03° yaw | Not published | -| **OIS** | 5-axis Optical Image Stabilization | None (digital EIS only) | -| **Lens Aperture** | F1.8 (wide) – F3.6 (tele) | Not published | -| **Dynamic Range** | 65 dB | Not published | -| **Min Illumination** | 0.05 lux @ F1.6 | If STARVIS: ~0.00008 lux | -| **Weight** | 595g (all-in-one) | 840g (590g camera + 250g VPB) | -| **Dimensions** | Compact single unit | 105×107×120mm + 50×90×65mm VPB | -| **Temp Range** | -20°C to +60°C | -15°C to +45°C (Shark M spec) | -| **Autopilot Compat** | PWM/TTL/SBUS/UDP | Pixhawk/Ardupilot plug-and-play | -| **Object Tracking** | Yes (up to 192 px/frame) | Yes | -| **Onboard Recording** | SD card up to 256GB | Yes | -| **IP Streaming** | UDP output | RTP IP streaming | -| **Weather Sealing** | CNC aluminum housing | Weather sealed | -| **Price** | $2,999–$4,879 | Not public ("cost-effective") | -| **Combat Proven** | No public data | Yes (Shark UAV, Ukraine 2022–2026) | - - -## Detailed Comparison by Dimension - -### 1. Video Feed Quality - -**Winner: ViewPro Z40K (decisive)** - -The Z40K records native 4K video — 4× the pixel count of the USG-231's Full HD output. In practical terms, this means: - -- A vehicle at 2 km rendered in 4K occupies roughly 4× more identifiable pixels than the same vehicle in FHD -- Post-mission analysis benefits enormously from 4K — you can digitally crop and zoom in post without losing usable detail -- For real-time feed: if the transmission link supports only FHD, the operator sees FHD anyway — but the Z40K's 4K downsampled to FHD is actually sharper than native FHD because it effectively oversamples and eliminates aliasing - -The USG-231's Full HD feed is adequate for coordinate determination and target identification at moderate ranges (confirmed by Defense Express combat reporting). But it cannot match the Z40K's information density. - -### 2. Wobble Effect - -**Winner: ViewPro Z40K (decisive)** - -This is the most architecturally significant difference between the two cameras. - -**USG-231 (2-axis gimbal + digital EIS):** - -- Stabilizes pitch and roll only -- Yaw rotation is NOT mechanically compensated -- On a fixed-wing drone in turns, wind gusts, or orbital surveillance, uncompensated yaw creates visible horizontal wobble/drift in the video feed -- Digital EIS attempts software correction: it crops the frame (losing resolution from an already-FHD signal), shifts pixels between frames, and can introduce warping artifacts during aggressive movement -- At high zoom (30x), even small uncompensated yaw angular errors translate to large image shifts — the wobble is amplified by magnification -- The wobble is most noticeable during: turns, wind gusts, turbulence, and any maneuver involving heading change - -**ViewPro Z40K (3-axis gimbal + 5-axis OIS):** - -- Compensates all three axes mechanically (pitch, roll, yaw) with ±0.02° accuracy -- The 5-axis OIS additionally corrects small/fast vibrations at the lens element level — no resolution loss, no cropping, no warping -- During turns and orbital surveillance, the yaw motor absorbs heading changes, keeping the image locked on target -- At 20x zoom, the ±0.02° accuracy translates to sub-pixel stability — effectively wobble-free for the viewer -- The double stabilization system (mechanical gimbal + optical OIS) is the same architecture used in DJI enterprise cameras - -**Summary**: The USG-231 will exhibit noticeable wobble on a fixed-wing platform, particularly during maneuvering at high zoom. The Z40K eliminates wobble through dual mechanical+optical stabilization. This is not a marginal difference — it is an architectural category gap. - -### 3. Zoom Capability - -**Mixed result — depends on priority** - - -| Zoom Metric | ViewPro Z40K | USG-231 | Winner | -| -------------------------------- | ----------------------------- | ----------------------------------------------- | ------- | -| Max optical zoom | 20x | 30x | USG-231 | -| Max extended zoom (any mode) | 40x iA (FHD) | 90x (30x optical × 3x digital) | USG-231 | -| Resolution at max optical zoom | 8.29MP (4K) at 3.45° FOV | ~2MP (FHD) at ~2.1° FOV | Z40K | -| Pixels per degree at max optical | ~1,113 px/° | ~914 px/° | Z40K | -| Quality during extended zoom | Gradual degradation (iA crop) | Significant degradation (digital crop from FHD) | Z40K | - - -**Key insight**: The USG-231 zooms 50% further optically (30x vs 20x), but the Z40K still delivers 22% more pixels per degree of angular coverage at each camera's maximum optical zoom. The Z40K's resolution advantage outweighs the USG-231's zoom advantage for target identification. - -However, if the mission absolutely requires maximum standoff distance and the image only needs to answer "is something there?" rather than "what exactly is it?", the USG-231's 30x optical reach has merit. - -### 4. Image Crispness During Zoom - -**Winner: ViewPro Z40K** - -Multiple factors compound in the Z40K's favor: - -1. **Base resolution**: 4K starting point vs FHD means 4× more pixels at any zoom level -2. **OIS vs EIS**: OIS preserves full resolution; EIS crops the frame, reducing effective resolution below FHD -3. **Pixel density at max zoom**: Z40K maintains 1,113 pixels per degree vs USG-231's 914 pixels per degree -4. **Vibration at zoom**: At high magnification, vibrations are amplified proportionally. The Z40K's 3-axis + OIS architecture maintains sub-pixel stability; the USG-231's 2-axis + EIS produces visible micro-jitter that degrades perceived sharpness - -**At medium zoom (10-15x)**: Both cameras perform well. The resolution difference is visible but both produce usable imagery. - -**At maximum optical zoom**: The Z40K's image is noticeably crisper. The 4K resolution provides fine detail that FHD cannot resolve. Both cameras will show atmospheric distortion (heat haze) at maximum zoom above hot terrain — this is physics, not a camera limitation. - -**Beyond optical zoom (digital/iA range)**: The Z40K degrades more gracefully. Its iA zoom at 25x (4K) is cropping from a 25.9MP sensor — plenty of overhead. The USG-231 at 90x total is cropping from ~2MP — the image quality drops dramatically. - -### 5. Shark M Video Feed Analysis - -The Shark M uses the USG-231 as its standard EO payload. Based on Defense Express field reports and manufacturer data: - -**Strengths of the USG-231 on Shark M:** - -- Auto-tracking locks onto targets from 800m and handles both contrasting and complex objects -- 30x optical zoom allows observation from >1 km standoff -- Digital stabilization produces "clear and stable video" per manufacturer -- Plug-and-play integration with the Shark's Pixhawk-based autopilot -- Encrypted FHD transmission over 180 km (Silvus StreamCaster) -- Anti-fog feature works in the field -- Combat-proven reliability in intense EW environments - -**Limitations observed/expected:** - -- FHD resolution limits identification range compared to 4K alternatives -- 2-axis gimbal will produce wobble during orbital surveillance patterns (constant heading change) -- Digital EIS further reduces effective resolution under active correction -- At high zoom during maneuvering, the combined effect of uncompensated yaw + EIS cropping will noticeably degrade image quality -- No optical image stabilization means high-frequency airframe vibrations translate to micro-jitter in the feed - -### 6. Low-Light Performance - -**Likely winner: USG-231** (with caveat) - -If the USG-231 uses a Sony STARVIS sensor (specs strongly suggest this), its low-light performance vastly exceeds the Z40K: - -- USG-231 (STARVIS): ~0.00008 lux minimum illumination -- Z40K: 0.05 lux minimum illumination - -This is a 625× difference. For dawn/dusk or night reconnaissance with ambient light, the USG-231 would produce usable imagery where the Z40K would show mostly noise. - -**Caveat**: Ukrspecsystems does not publish the exact sensor module. The STARVIS identification is inferred from matching specifications with Sony FCB-EV9500L/9520L block cameras. - -## Overall Quality Assessment - - -| Dimension | Z40K | USG-231 | Margin | -| ------------------------- | ----- | ------- | ------------------ | -| Video resolution | ★★★★★ | ★★★ | Large | -| Wobble control | ★★★★★ | ★★☆ | Very large | -| Optical zoom reach | ★★★ | ★★★★★ | Moderate | -| Image crispness at zoom | ★★★★★ | ★★★ | Large | -| Low-light | ★★★ | ★★★★★ | Large (if STARVIS) | -| Weight | ★★★★★ | ★★★ | Moderate | -| Integration simplicity | ★★★ | ★★★★★ | Moderate | -| Combat-proven reliability | ★★ | ★★★★★ | Large | -| Auto-tracking | ★★★★ | ★★★★ | Comparable | -| Overall video quality | ★★★★★ | ★★★ | Large | - - -## Recommendation - -**For pure video quality, crispness, and wobble-free footage**: ViewPro Z40K is the clear winner. Its 4K resolution, 3-axis gimbal, and 5-axis OIS produce categorically better and more stable footage than the USG-231. - -**The USG-231's strengths are real but different**: 30x optical zoom reach, likely superior low-light performance, combat-proven reliability, and seamless Shark M integration. It is a proven ISR tool — not the sharpest or smoothest, but reliable and field-tested. - -**The architectural gap in stabilization is the most important finding.** The 2-axis vs 3-axis gimbal difference is not marginal — it is a fundamental design limitation of the USG-231 that manifests as visible wobble on fixed-wing platforms, especially at high zoom during turns. No amount of digital processing can fully compensate for the missing yaw stabilization axis. - -**For a custom reconnaissance UAV build**: The Z40K offers superior imaging quality per gram. For integration with the Shark M ecosystem specifically, the USG-231 is the practical choice due to its plug-and-play integration and proven system-level reliability. - -## References - -1. ViewPro Z40K — RCDrone: [https://rcdrone.top/products/viewpro-z40k-4k-gimbal-camera](https://rcdrone.top/products/viewpro-z40k-4k-gimbal-camera) -2. ViewPro Z40K — Manufacturer: [https://www.viewprouav.com/product/z40k-single-4k-hd-25-times-zoom-gimbal-camera](https://www.viewprouav.com/product/z40k-single-4k-hd-25-times-zoom-gimbal-camera) -3. USG-231 — Ukrspecsystems: [https://ukrspecsystems.com/drone-gimbals/usg-231](https://ukrspecsystems.com/drone-gimbals/usg-231) -4. Shark M UAS — Ukrspecsystems: [https://ukrspecsystems.com/drones/shark-m-uas](https://ukrspecsystems.com/drones/shark-m-uas) -5. DRONExpert Z40K specs: [https://dronexpert.nl/en/viewpro-z40k-20x-optical-zoom-4k-camera-up-to-40x-zoom/](https://dronexpert.nl/en/viewpro-z40k-20x-optical-zoom-4k-camera-up-to-40x-zoom/) -6. AeroExpo USG-231: [https://www.aeroexpo.online/prod/ukrspecsystems/product-185884-82835.html](https://www.aeroexpo.online/prod/ukrspecsystems/product-185884-82835.html) -7. Defense Express — Shark combat footage: [https://en.defence-ua.com/weapon_and_tech/how_the_newest_ukrainian_shark_uav_works_over_donetsk_and_why_its_really_cool_video-5438.html](https://en.defence-ua.com/weapon_and_tech/how_the_newest_ukrainian_shark_uav_works_over_donetsk_and_why_its_really_cool_video-5438.html) -8. Camera Guide Pro — 2-axis vs 3-axis: [https://www.cameraguidepro.com/what-is-the-difference-between-a-2-axis-and-3-axis-gimbal/](https://www.cameraguidepro.com/what-is-the-difference-between-a-2-axis-and-3-axis-gimbal/) -9. MakeUseOf — Gimbal comparison: [https://www.makeuseof.com/two-axis-vs-three-axis-gimbals/](https://www.makeuseof.com/two-axis-vs-three-axis-gimbals/) -10. DroneFlying Pro — 2-axis vs 3-axis: [https://droneflyingpro.com/2-axis-vs-3-axis-gimbal/](https://droneflyingpro.com/2-axis-vs-3-axis-gimbal/) -11. Steadxp — EIS vs OIS: [https://www.steadxp.com/digital-vs-optical-stabilization-a-comparison-guide/](https://www.steadxp.com/digital-vs-optical-stabilization-a-comparison-guide/) -12. Guiding Tech — EIS vs OIS: [https://www.guidingtech.com/eis-vs-ois-stabilization/](https://www.guidingtech.com/eis-vs-ois-stabilization/) -13. DroneTrest — Fixed-wing gimbal forum: [https://www.dronetrest.com/t/whats-the-best-choice-for-the-fixed-wing-3-axis-gimbal-or-2-axis-gimbal/8091](https://www.dronetrest.com/t/whats-the-best-choice-for-the-fixed-wing-3-axis-gimbal-or-2-axis-gimbal/8091) -14. PhantomPilots — Yaw issue with 2-axis: [https://phantompilots.com/threads/yaw-issue-with-2-axis-gimbals.6854](https://phantompilots.com/threads/yaw-issue-with-2-axis-gimbals.6854) -15. ViewPro Z40K Manual — ManualsLib: [https://www.manualslib.com/manual/2385515/Viewpro-Z40k.html](https://www.manualslib.com/manual/2385515/Viewpro-Z40k.html) -16. ViewPro Tech — Z40K PSDK: [https://www.viewprotech.com/index.php?ac=article&at=read&did=202](https://www.viewprotech.com/index.php?ac=article&at=read&did=202) -17. Sony FCB-EV9500L: [https://pro.sony/ue_US/products/zoom-camera-blocks/fcb-ev9500l](https://pro.sony/ue_US/products/zoom-camera-blocks/fcb-ev9500l) -18. Sony FCB-EV9520L: [https://block-cameras.com/products/sony-fcb-ev9520l-30x-zoom-full-hd-block-camera-sensor-starvis-gen2](https://block-cameras.com/products/sony-fcb-ev9520l-30x-zoom-full-hd-block-camera-sensor-starvis-gen2) -19. DJI Zenmuse Z30 review: [https://medium.com/@daily_drones/hands-on-with-the-dji-zenmuse-z30-53ab50fe628c](https://medium.com/@daily_drones/hands-on-with-the-dji-zenmuse-z30-53ab50fe628c) -20. Oreate AI — Sensor size comparison: [https://www.oreateai.com/blog/beyond-the-numbers-what-123-vs-113-inch-sensor-size-really-means-for-your-photos/](https://www.oreateai.com/blog/beyond-the-numbers-what-123-vs-113-inch-sensor-size-really-means-for-your-photos/) -21. Wikipedia — Ukrspecsystems Shark: [https://en.wikipedia.org/wiki/Ukrspecsystems_Shark](https://en.wikipedia.org/wiki/Ukrspecsystems_Shark) -22. Defense Express — Shark tracking demo: [https://en.defence-ua.com/weapon_and_tech/ukrainian_drone_maker_demonstrates_its_new_shark_uav_target_tracking_capabilities_video-4803.html](https://en.defence-ua.com/weapon_and_tech/ukrainian_drone_maker_demonstrates_its_new_shark_uav_target_tracking_capabilities_video-4803.html) - diff --git a/_standalone/UAV_camera_comparison/UAV_frame_material.md b/_standalone/UAV_camera_comparison/UAV_frame_material.md deleted file mode 100644 index 0f468ef..0000000 --- a/_standalone/UAV_camera_comparison/UAV_frame_material.md +++ /dev/null @@ -1 +0,0 @@ -I want to build a UAV plane for reconnaissance missions maximizing flight duration. Investigate what is the best frame material for that purpose \ No newline at end of file diff --git a/_standalone/UAV_frame_material/00_research/UAV_frame_material/00_ac_assessment.md b/_standalone/UAV_frame_material/00_research/UAV_frame_material/00_ac_assessment.md deleted file mode 100644 index 548f3fc..0000000 --- a/_standalone/UAV_frame_material/00_research/UAV_frame_material/00_ac_assessment.md +++ /dev/null @@ -1,41 +0,0 @@ -# Acceptance Criteria Assessment - -## Acceptance Criteria - -| Criterion | Our Values | Researched Values | Cost/Timeline Impact | Status | -|-----------|-----------|-------------------|---------------------|--------| -| Flight duration | "Maximizing" (undefined) | 2-4 hours for electric fixed-wing in 5-15 kg MTOW class (Albatross: 4h, Vulture: 3.5-5h) | Higher endurance = larger battery + lighter frame = higher cost | Added — suggest target: ≥3 hours | -| Payload capacity | 1.47 kg fixed | 1.47 kg is modest; benchmark platforms carry 4-8 kg in this class | Low payload relative to class = more weight budget for battery/fuel | Modified — no change needed, favorable constraint | -| Frame weight | Not specified | Benchmark: 3.0-4.0 kg bare airframe for 3m wingspan class (Albatross: 3.35 kg) | Lighter frame = more battery weight = longer flight | Added — suggest target: ≤3.5 kg bare airframe | -| MTOW | Not specified | 8-15 kg typical for this class | Drives wing sizing, motor selection, battery capacity | Added — suggest target: 8-12 kg | -| Cruise speed | Not specified | 15-25 m/s typical for recon fixed-wing (Albatross: 19 m/s) | Slower cruise = longer endurance but less area coverage | Added — suggest target: 15-20 m/s | -| Wingspan | Not specified | 2.5-3.5m for this MTOW class | Larger span = better L/D = longer endurance, but transport/handling harder | Added — suggest: 2.5-3.5m | -| Battery energy density | Semi-solid state interest | 300-350 Wh/kg (semi-solid, 2025-2026 commercial products) vs 180-250 Wh/kg (LiPo) | Semi-solid ~2-3x cost of LiPo but 30% more flight time | Added — suggest: ≥300 Wh/kg (semi-solid) | -| Budget | $100k total | Sufficient for custom composite airframe + avionics + batteries + ground station | $100k is generous for single prototype in this class | Modified — no change needed | -| Operating temperature | Not specified | -20°C to 45°C is standard for commercial UAVs | Affects battery performance and material selection | Added — suggest: -10°C to 45°C | -| Wind resistance | Not specified | 10-15 m/s sustained for fixed-wing recon | Affects structural requirements and endurance | Added — suggest: up to 12 m/s sustained | - -## Restrictions Assessment - -| Restriction | Our Values | Researched Values | Cost/Timeline Impact | Status | -|-------------|-----------|-------------------|---------------------|--------| -| Budget | $100k | Ample for 1 prototype. Carbon fiber airframe: $5-15k (custom tooling + manufacturing). Batteries: $2-5k. Avionics: $3-5k. Motor/prop/ESC: $1-2k. Ground station + comms: $5-10k. Integration + testing: $10-20k. | Well within range | Modified — no change needed | -| Manufacturing access | None specified | Carbon fiber requires either outsourcing (CNC + layup vendors available globally) or basic workshop with vacuum bagging setup (~$2-5k investment) | Outsourcing is viable within budget; no blocker | Added — outsource recommended | -| Regulatory | None specified | Sub-25 kg in most jurisdictions requires registration + remote pilot license. No specific material restrictions. | Minimal impact at this MTOW class | Added — follow local UAS regulations | -| Payload (fixed) | 1.47 kg | Non-negotiable — mission equipment | No change | Added | -| Frame material | Open investigation | Research strongly favors carbon fiber composite (CFRP) with foam-core sandwich construction | Drives the core research question | Added | - -## Key Findings - -1. **Flight duration target of ≥3 hours is realistic** for an electric fixed-wing in this class with semi-solid batteries. The Albatross achieves 4 hours with LiPo; semi-solid batteries would extend this further. - -2. **1.47 kg payload is light for this class** — leaves substantial weight budget for batteries, which directly translates to longer flight time. This is a favorable constraint. - -3. **Semi-solid state batteries (300-350 Wh/kg) are commercially available now** from multiple vendors (Grepow, Tattu, Herewin). They offer 30% more flight time than LiPo at 2-3x cost per Wh but with 4-6x cycle life, making TCO favorable. - -4. **$100k budget is generous** for a single prototype in this class. Typical custom composite UAV builds in this class cost $30-60k for first prototype including all subsystems. - -5. **Carbon fiber composite is the clear frontrunner** for frame material based on weight-to-stiffness ratio, which is the primary driver for endurance. - -## Sources -- Source #1-#12 (see source registry) diff --git a/_standalone/UAV_frame_material/00_research/UAV_frame_material/00_question_decomposition.md b/_standalone/UAV_frame_material/00_research/UAV_frame_material/00_question_decomposition.md deleted file mode 100644 index 6cf5c2f..0000000 --- a/_standalone/UAV_frame_material/00_research/UAV_frame_material/00_question_decomposition.md +++ /dev/null @@ -1,72 +0,0 @@ -# Question Decomposition — Material Comparison: S2 FG + Carbon Stiffeners vs Shark M - -## Original Question -Compare the researched and selected material (S2 fiberglass with carbon stiffeners) with the Shark M fuselage material. Pros and cons of each approach considering parachute landing survivability and radio transparency. - -## Active Mode -Mode B: Solution Assessment — assessing existing solution_draft05 material selection. - -## Problem Context Summary -- The project is building a reconnaissance UAV maximizing flight duration -- Previous drafts selected S2 fiberglass (S2 FG) fuselage with carbon fiber stiffeners -- Catapult launch + parachute landing is a key variant (Variant B) -- Shark M by Ukrspecsystems is a combat-proven reference platform with similar mission profile -- User confirms Shark M has no radio transparency issues from experience - -## Question Type -**Concept Comparison** + **Decision Support** -Comparing two material approaches across multiple engineering dimensions with a decision outcome. - -## Research Subject Boundary -| Dimension | Boundary | -|-----------|----------| -| Population | Fixed-wing reconnaissance UAVs, 10-20 kg MTOW class | -| Geography | Global, with emphasis on combat-proven systems | -| Timeframe | Current (2024-2026), materials science is Low novelty sensitivity | -| Level | Airframe structural material for fuselage | - -## Timeliness Sensitivity Assessment -- **Research Topic**: Composite material comparison for UAV airframes -- **Sensitivity Level**: Low -- **Rationale**: Composite material properties (fiberglass, carbon fiber) are well-established engineering fundamentals. S2 glass and carbon fiber properties have been stable for decades. -- **Source Time Window**: No limit -- **Priority official sources**: Material datasheets, aerospace research papers, UAV manufacturer specifications - -## Decomposed Sub-questions - -### SQ1: What material does the Shark M actually use? -- "Shark M UAV fuselage material specifications" -- "Ukrspecsystems Shark composite airframe construction" -- "Ukrspecsystems PD-2 PD-1 fuselage material composite" -- "SHARK-M БПЛА матеріал корпус" (Ukrainian language) -- "Ukrspecsystems composite low radar cross section material" - -### SQ2: How does each material behave under parachute landing impact? -- "fiberglass composite impact resistance parachute landing UAV" -- "carbon fiber composite impact damage brittleness crash landing" -- "S2 glass fiber impact energy absorption composite" -- "carbon fiber vs fiberglass UAV crash landing repair" -- "belly landing UAV composite damage modes" - -### SQ3: What are the RF transparency properties of each material? -- "carbon fiber electromagnetic shielding effectiveness dB UAV antenna" -- "fiberglass radome RF transparent dielectric constant" -- "S2 fiberglass radio frequency transparency composite" -- "carbon fiber stiffener fiberglass skin RF shadow antenna" -- "GFRP radar transparent stealth composite UAV" - -### SQ4: What are the weight/stiffness trade-offs? -- "fiberglass vs carbon fiber UAV airframe weight comparison" -- "S-glass vs E-glass impact strength toughness" -- "carbon fiber stiffener fiberglass hybrid composite advantages" - -### SQ5: What are the cost and field repairability differences? -- "fiberglass UAV field repair epoxy patch battlefield" -- "carbon fiber repair cost UAV composite" -- "fiberglass vs carbon fiber material cost comparison" - -## Chosen Perspectives -1. **Practitioner / Field operator**: What works in real battlefield conditions? Shark M has 50,000+ operational hours. -2. **Implementer / Engineer**: What are the structural engineering trade-offs between pure FG and hybrid FG+CF? -3. **Contrarian / Devil's advocate**: What could go wrong with each approach? Hidden failure modes? -4. **Domain expert / Aerospace**: What do composite material scientists say about impact, RF, and hybrid designs? diff --git a/_standalone/UAV_frame_material/00_research/UAV_frame_material/01_source_registry.md b/_standalone/UAV_frame_material/00_research/UAV_frame_material/01_source_registry.md deleted file mode 100644 index f61bd16..0000000 --- a/_standalone/UAV_frame_material/00_research/UAV_frame_material/01_source_registry.md +++ /dev/null @@ -1,199 +0,0 @@ -# Source Registry — Material Comparison - -## Source #1 -- **Title**: Ukrspecsystems SHARK-M UAS Official Page -- **Link**: https://ukrspecsystems.com/drones/shark-m-uas -- **Tier**: L1 -- **Publication Date**: 2025 (continuously updated) -- **Timeliness Status**: Currently valid -- **Target Audience**: Military/government UAV buyers -- **Research Boundary Match**: Full match -- **Summary**: SHARK-M specs: 14.5 kg MTOW, 3.4m wingspan, 7h endurance, catapult launch + parachute landing, Silvus radio modem 180 km range. No fuselage material specified. -- **Related Sub-question**: SQ1 - -## Source #2 -- **Title**: Ukrspecsystems PD-2 UAS Datasheet (PDF) -- **Link**: https://www.unmannedsystemstechnology.com/wp-content/uploads/2016/06/PD_2.pdf -- **Tier**: L1 -- **Publication Date**: 2021 -- **Timeliness Status**: Currently valid -- **Target Audience**: Military/government UAV buyers -- **Research Boundary Match**: Full match (PD-2 is predecessor, same manufacturer) -- **Summary**: PD-2 features "fully composite airframe" with "absence of large metal parts" for "low radar visibility." Composite construction confirmed. No specific composite type named. -- **Related Sub-question**: SQ1 - -## Source #3 -- **Title**: Wikipedia - Ukrspecsystems Shark -- **Link**: https://en.wikipedia.org/wiki/Ukrspecsystems_Shark -- **Tier**: L3 -- **Publication Date**: 2023 -- **Timeliness Status**: Currently valid -- **Target Audience**: General public -- **Research Boundary Match**: Full match -- **Summary**: Shark UAV specs, catapult launch + parachute landing, 12.5 kg weight, 3.4m wingspan. No material info. -- **Related Sub-question**: SQ1 - -## Source #4 -- **Title**: Carbon Fiber UAV RF Shielding — KSZYTec Antenna Design Guide -- **Link**: https://kszytec.com/uav-aerospace-antenna-design-survival-guide/ -- **Tier**: L2 -- **Publication Date**: 2026 -- **Timeliness Status**: Currently valid -- **Target Audience**: UAV engineers, antenna designers -- **Research Boundary Match**: Full match -- **Summary**: Carbon fiber is "pretty much opaque to 2.4GHz radio waves" with shielding effectiveness exceeding 30-50 dB. Acts as Faraday cage. Lethal to embedded signals. -- **Related Sub-question**: SQ3 - -## Source #5 -- **Title**: Carbon Fiber RF Shielding — Drones StackExchange -- **Link**: https://drones.stackexchange.com/questions/283/how-much-does-mounting-an-antenna-near-a-carbon-fiber-frame-degrade-signal-recep -- **Tier**: L4 -- **Publication Date**: 2020 -- **Timeliness Status**: Currently valid -- **Target Audience**: UAV builders/hobbyists -- **Research Boundary Match**: Full match -- **Summary**: Carbon fiber blocks RF rather than generating noise. Antennas must be positioned to avoid obstruction by carbon structure. -- **Related Sub-question**: SQ3 - -## Source #6 -- **Title**: Radio-Transparent Properties Comparison of Aramid, S-Glass, and Quartz Fiber Radome Composites at 900 MHz -- **Link**: https://link.springer.com/article/10.1007/s40033-023-00602-7 -- **Tier**: L1 -- **Publication Date**: 2023 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace engineers, materials scientists -- **Research Boundary Match**: Full match -- **Summary**: S-Glass composites show good radio transparency at 900 MHz; better than aramid. Quartz fiber best. S-Glass used in radomes, antenna windows, fairings. -- **Related Sub-question**: SQ3 - -## Source #7 -- **Title**: Fiberglass Pultrusion for Aerospace & Defense — Tencom -- **Link**: https://www.tencom.com/blog/fiberglass-pultrusion-for-aerospace-defense-lightweight-structural-components -- **Tier**: L3 -- **Publication Date**: 2024 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace/defense engineers -- **Research Boundary Match**: Full match -- **Summary**: GFRP is inherently dielectric and transparent to RF/radar waves. Minimizes electromagnetic interference. Used for antenna housings, sensor fairings. 20-30% weight savings in some drone configurations. -- **Related Sub-question**: SQ3 - -## Source #8 -- **Title**: EM Shielding of Twill Carbon Fiber — IEEE -- **Link**: https://ieeexplore.ieee.org/document/10329805/ -- **Tier**: L1 -- **Publication Date**: 2023 -- **Timeliness Status**: Currently valid -- **Target Audience**: RF engineers, materials scientists -- **Research Boundary Match**: Full match -- **Summary**: CFRP shielding tested across UHF, L-band, S-band. Continuous carbon fiber composites achieve up to 52 dB shielding effectiveness. -- **Related Sub-question**: SQ3 - -## Source #9 -- **Title**: Fiberglass vs Carbon Fiber UAV Comparison — Ganglong Fiberglass -- **Link**: https://www.ganglongfiberglass.com/fiberglass-drone-vs-carbon-fiber/ -- **Tier**: L3 -- **Publication Date**: 2024-12 -- **Timeliness Status**: Currently valid -- **Target Audience**: UAV builders -- **Research Boundary Match**: Full match -- **Summary**: Carbon fiber ~40% lighter than aluminum, ~50% lighter than fiberglass. Carbon fiber is brittle under impact (cracks); fiberglass is flexible (bends/absorbs). Carbon 5-10× more expensive. -- **Related Sub-question**: SQ4, SQ5 - -## Source #10 -- **Title**: E-Glass vs S-Glass: Key Differences — SMI Composites -- **Link**: https://www.smicomposites.com/comparing-e-glass-vs-s-glass-key-differences-and-benefits/ -- **Tier**: L2 -- **Publication Date**: 2024 -- **Timeliness Status**: Currently valid -- **Target Audience**: Composites engineers -- **Research Boundary Match**: Full match -- **Summary**: S-glass 30-40% stronger than E-glass, 10× fatigue resistance, >5% elongation at break vs 4.7% E-glass. Better impact resistance. Higher cost than E-glass. -- **Related Sub-question**: SQ2, SQ4 - -## Source #11 -- **Title**: Impact Damage Resistance of S2/FM94 Glass Fibre Composites — MDPI Polymers -- **Link**: https://mdpi-res.com/d_attachment/polymers/polymers-14-00095/article_deploy/polymers-14-00095-v2.pdf -- **Tier**: L1 -- **Publication Date**: 2022 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace researchers -- **Research Boundary Match**: Full match -- **Summary**: S2/FM94 glass fiber composites: cross-ply and angle-ply orientations absorb impact energy effectively with no penetration. Unidirectional fails in shear. -- **Related Sub-question**: SQ2 - -## Source #12 -- **Title**: E-Glass vs Carbon Fiber UAV Wing Impact Simulations — Preprints.org -- **Link**: https://www.preprints.org/manuscript/202601.1067 -- **Tier**: L1 -- **Publication Date**: 2026-01 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace researchers -- **Research Boundary Match**: Full match -- **Summary**: E-glass composites are tougher and cheaper than CF for impact-resistant UAV structures. CF fails brittlely with delamination. E-glass is a viable cost-effective alternative. -- **Related Sub-question**: SQ2 - -## Source #13 -- **Title**: Field Repair of Severely Damaged FG/Epoxy Fuselage — MATEC Conference -- **Link**: https://www.matec-conferences.org/articles/matecconf/pdf/2019/53/matecconf_easn2019_01002.pdf -- **Tier**: L1 -- **Publication Date**: 2019 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace maintenance engineers -- **Research Boundary Match**: Full match -- **Summary**: Field repair of fiberglass/epoxy structures can be done by personnel with average manual skills. No specialized training or vacuum equipment needed. Restores structural stiffness. -- **Related Sub-question**: SQ5 - -## Source #14 -- **Title**: ACASIAS — Antenna Integration in Carbon Fibre Fuselage Panel -- **Link**: https://www.nlr.org/newsroom/video/acasias-antenna-integration/ -- **Tier**: L1 -- **Publication Date**: 2020 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace engineers -- **Research Boundary Match**: Full match -- **Summary**: ACASIAS project: hybrid panel with GFRP "RF-transparent window" and CFRP structural skin + orthogrid stiffeners. CFRP ribs create electromagnetic interaction with antenna tiles. Design must account for RF shadow from CFRP elements. -- **Related Sub-question**: SQ3 - -## Source #15 -- **Title**: Fiberglass Radome Dielectric Properties — O'Reilly / Radome EM Theory -- **Link**: https://www.oreilly.com/library/view/radome-electromagnetic-theory/9781119410799/b02.xhtml -- **Tier**: L1 -- **Publication Date**: 2019 -- **Timeliness Status**: Currently valid -- **Target Audience**: RF/radome engineers -- **Research Boundary Match**: Full match -- **Summary**: E-glass/epoxy dielectric constant 4.4, loss tangent 0.016 at 8.5 GHz. These values allow reasonable RF transmission with some signal attenuation. -- **Related Sub-question**: SQ3 - -## Source #16 -- **Title**: Belly-Landing Mini UAV Strength Study — Scientific.Net -- **Link**: https://www.scientific.net/AMM.842.178 -- **Tier**: L1 -- **Publication Date**: 2016 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace engineers -- **Research Boundary Match**: Full match -- **Summary**: Fiberglass/epoxy composites used in belly-landing UAV design due to favorable specific strength. Belly landings carry risk of disintegration if too fast. -- **Related Sub-question**: SQ2 - -## Source #17 -- **Title**: Hybrid Composite Wing Spar Analysis — IJVSS -- **Link**: https://yanthrika.com/eja/index.php/ijvss/article/view/1476 -- **Tier**: L1 -- **Publication Date**: 2024 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace researchers -- **Research Boundary Match**: Full match -- **Summary**: Hybrid composites show similar deformation to pure CFRP with cost savings. Higher damping factors than aluminum or pure CFRP. -- **Related Sub-question**: SQ4 - -## Source #18 -- **Title**: UAV Airframe Strength and Structural Optimization — Frontiers -- **Link**: https://www.frontiersin.org/articles/10.3389/fmech.2025.1708043 -- **Tier**: L1 -- **Publication Date**: 2025 -- **Timeliness Status**: Currently valid -- **Target Audience**: Aerospace researchers -- **Research Boundary Match**: Full match -- **Summary**: Stiffener optimization achieves 60.9% stress reduction and 5.2% mass reduction. Reinforced rib designs with stiffeners provide significant structural benefits. -- **Related Sub-question**: SQ4 diff --git a/_standalone/UAV_frame_material/00_research/UAV_frame_material/02_fact_cards.md b/_standalone/UAV_frame_material/00_research/UAV_frame_material/02_fact_cards.md deleted file mode 100644 index d0da421..0000000 --- a/_standalone/UAV_frame_material/00_research/UAV_frame_material/02_fact_cards.md +++ /dev/null @@ -1,145 +0,0 @@ -# Fact Cards — Material Comparison - -## Fact #1 -- **Statement**: Ukrspecsystems PD-2 (predecessor to Shark M) has a "fully composite airframe" with "absence of large metal parts" providing "low radar visibility." This means the composite is non-conductive (i.e., fiberglass/GFRP), because carbon fiber is conductive and would reflect radar. -- **Source**: Source #2 (PD-2 Datasheet) -- **Phase**: Assessment -- **Target Audience**: 10-20 kg reconnaissance UAV class -- **Confidence**: ⚠️ Medium — material type is inferred from "low radar visibility" + "fully composite" + "no large metal parts." Not explicitly stated as fiberglass. -- **Related Dimension**: Material identification - -## Fact #2 -- **Statement**: SHARK-M uses catapult launch + parachute landing, identical recovery method to user's Variant B. 14.5 kg MTOW, 3.4m wingspan, 7h endurance. Max wind for landing: 7 m/s. -- **Source**: Source #1 (Ukrspecsystems official page) -- **Phase**: Assessment -- **Target Audience**: 10-20 kg reconnaissance UAV class -- **Confidence**: ✅ High -- **Related Dimension**: Platform comparison baseline - -## Fact #3 -- **Statement**: Carbon fiber composite provides electromagnetic shielding effectiveness of 30-52 dB across UHF to X-band frequencies (up to 12.4 GHz). It is "pretty much opaque to 2.4 GHz radio waves" and acts as a Faraday cage. -- **Source**: Source #4 (KSZYTec), Source #8 (IEEE), Source #5 (StackExchange) -- **Phase**: Assessment -- **Target Audience**: All UAVs with internal antennas -- **Confidence**: ✅ High — confirmed by multiple independent sources -- **Related Dimension**: Radio transparency - -## Fact #4 -- **Statement**: S-Glass (S2) fiberglass composites are radio-transparent and are the standard material for aerospace radomes, antenna windows, and communication antenna protective coverings. E-glass/epoxy has dielectric constant 4.4 and loss tangent 0.016 at 8.5 GHz — low enough for reasonable RF transmission. -- **Source**: Source #6 (Springer), Source #7 (Tencom), Source #15 (Radome EM Theory) -- **Phase**: Assessment -- **Target Audience**: All UAVs with internal/embedded antennas -- **Confidence**: ✅ High -- **Related Dimension**: Radio transparency - -## Fact #5 -- **Statement**: GFRP is inherently dielectric and transparent to both radio-frequency communications AND radar waves. Carbon fiber reflects/absorbs radar. A fully GFRP airframe achieves low radar cross section by being transparent to radar rather than reflecting it. -- **Source**: Source #7 (Tencom), Source #2 (PD-2 datasheet context) -- **Phase**: Assessment -- **Target Audience**: Military reconnaissance UAVs -- **Confidence**: ✅ High -- **Related Dimension**: Radio transparency, stealth - -## Fact #6 -- **Statement**: User confirms from operational experience that SHARK-M has no issues with radio transparency — "it is still alive." This is consistent with GFRP fuselage and inconsistent with carbon fiber fuselage. -- **Source**: User direct experience -- **Phase**: Assessment -- **Target Audience**: This specific UAV project -- **Confidence**: ✅ High (direct field evidence) -- **Related Dimension**: Radio transparency - -## Fact #7 -- **Statement**: Carbon fiber composites fail in a brittle manner with sudden delamination and fiber fracture under impact. Low-velocity impacts can cause barely visible internal damage (BVID) that substantially reduces structural integrity without external signs. -- **Source**: Source #12 (Preprints.org), Source #9 (Ganglong) -- **Phase**: Assessment -- **Target Audience**: UAV impact/crash scenarios -- **Confidence**: ✅ High -- **Related Dimension**: Parachute landing survivability - -## Fact #8 -- **Statement**: Fiberglass composites are more flexible and absorb shock better than carbon fiber. Under impact, fiberglass bends/deforms rather than cracking or shattering. E-glass composites are a viable, cost-effective, and tougher alternative to CF for impact-resistant UAV structures. -- **Source**: Source #12 (Preprints.org), Source #9 (Ganglong) -- **Phase**: Assessment -- **Target Audience**: UAV impact/crash scenarios -- **Confidence**: ✅ High -- **Related Dimension**: Parachute landing survivability - -## Fact #9 -- **Statement**: S2-glass has >5% elongation at break (vs 4.7% for E-glass), 30-40% higher tensile strength than E-glass (4600 MPa vs 3400 MPa), and 10× fatigue resistance. S2/FM94 cross-ply laminates absorb impact energy with no penetration in tested configurations. -- **Source**: Source #10 (SMI Composites), Source #11 (MDPI Polymers) -- **Phase**: Assessment -- **Target Audience**: UAV structural design -- **Confidence**: ✅ High -- **Related Dimension**: Parachute landing survivability - -## Fact #10 -- **Statement**: Carbon fiber is approximately 40% lighter than aluminum and density ~1.5-1.6 g/cm³ vs fiberglass at 2.46-2.58 g/cm³. Carbon fiber is roughly 5× stiffer than fiberglass by specific modulus. -- **Source**: Source #9 (Ganglong), Source #10 (SMI Composites) -- **Phase**: Assessment -- **Target Audience**: UAV airframe design -- **Confidence**: ✅ High -- **Related Dimension**: Weight/stiffness - -## Fact #11 -- **Statement**: Carbon fiber material costs 5-10× more than fiberglass. Basic fiberglass cloth ~$20-50/m² vs standard carbon fiber (T300) ~$200-500/m². -- **Source**: Source #9 (Ganglong) -- **Phase**: Assessment -- **Target Audience**: UAV production cost -- **Confidence**: ✅ High -- **Related Dimension**: Cost - -## Fact #12 -- **Statement**: Field repair of fiberglass/epoxy structures can be done by personnel with average manual skills without specialized training or vacuum equipment. Pre-cured composite patches bonded with adhesive enable rapid field repairs. -- **Source**: Source #13 (MATEC Conference) -- **Phase**: Assessment -- **Target Audience**: UAV field operations -- **Confidence**: ✅ High -- **Related Dimension**: Field repairability - -## Fact #13 -- **Statement**: Carbon fiber repair requires specialized equipment (autoclave, vacuum bagging) and trained technicians. Scarf-repaired CFRP laminates remain sensitive to subsequent impacts. Internal damage (BVID) requires non-destructive testing to detect. -- **Source**: Source #12 (Preprints.org), research on CFRP repair -- **Phase**: Assessment -- **Target Audience**: UAV maintenance -- **Confidence**: ✅ High -- **Related Dimension**: Field repairability - -## Fact #14 -- **Statement**: In a hybrid FG+CF design (like S2 FG skin + carbon stiffeners), carbon stiffeners create localized RF shadow zones. The ACASIAS project demonstrated that CFRP ribs in a GFRP panel create electromagnetic interactions that must be designed around. Antennas must be placed in GFRP-only zones away from CF structural elements. -- **Source**: Source #14 (ACASIAS/NLR) -- **Phase**: Assessment -- **Target Audience**: Hybrid composite UAV designers -- **Confidence**: ✅ High -- **Related Dimension**: Radio transparency of hybrid design - -## Fact #15 -- **Statement**: Hybrid composites (FG skin + CF stiffeners) achieve similar deformation characteristics to pure CFRP while offering cost savings and higher damping factors than either pure aluminum or pure CFRP. -- **Source**: Source #17 (IJVSS), Source #18 (Frontiers) -- **Phase**: Assessment -- **Target Audience**: UAV structural design -- **Confidence**: ✅ High -- **Related Dimension**: Weight/stiffness - -## Fact #16 -- **Statement**: Stiffener optimization with reinforced rib designs can achieve 60.9% stress reduction and 5.2% mass reduction compared to unstiffened designs. -- **Source**: Source #18 (Frontiers) -- **Phase**: Assessment -- **Target Audience**: UAV structural design -- **Confidence**: ✅ High -- **Related Dimension**: Weight/stiffness - -## Fact #17 -- **Statement**: SHARK-M has 50,000+ operational hours on the battlefield (per Ukrspecsystems marketing). The system is designed for 1,200 flight hours without additional service maintenance. -- **Source**: Source #1 (Ukrspecsystems official) -- **Phase**: Assessment -- **Target Audience**: Military reconnaissance UAVs -- **Confidence**: ⚠️ Medium — marketing claim, but backed by extensive combat use -- **Related Dimension**: Proven reliability - -## Fact #18 -- **Statement**: A pure fiberglass (no carbon stiffeners) airframe for a 14.5 kg MTOW UAV (Shark M class) can achieve 7h endurance. This suggests that pure GFRP without carbon stiffeners is structurally adequate for this weight class, though it may require thicker skins or more material. -- **Source**: Source #1 (Ukrspecsystems official), Source #2 (PD-2 datasheet) -- **Phase**: Assessment -- **Target Audience**: 10-20 kg reconnaissance UAV class -- **Confidence**: ⚠️ Medium — material type inferred, not explicitly confirmed -- **Related Dimension**: Weight/structural adequacy diff --git a/_standalone/UAV_frame_material/00_research/UAV_frame_material/03_comparison_framework.md b/_standalone/UAV_frame_material/00_research/UAV_frame_material/03_comparison_framework.md deleted file mode 100644 index a03df97..0000000 --- a/_standalone/UAV_frame_material/00_research/UAV_frame_material/03_comparison_framework.md +++ /dev/null @@ -1,39 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Concept Comparison + Decision Support - -## Compared Approaches -- **Approach A**: S2 Fiberglass + Carbon Fiber Stiffeners (hybrid, as selected in solution_draft05) -- **Approach B**: Pure Fiberglass Composite (inferred Shark M approach — GFRP, no carbon elements) - -## Selected Dimensions - -### Primary (directly requested by user) -1. Radio transparency (communications: 900 MHz, 2.4 GHz, 5.8 GHz) -2. Radar transparency (stealth / low RCS) -3. Parachute landing impact survivability -4. Parachute landing cumulative damage tolerance - -### Secondary (engineering trade-offs) -5. Weight efficiency (strength-to-weight, stiffness-to-weight) -6. Structural stiffness -7. Material cost -8. Field repairability -9. Proven operational track record -10. Hidden failure modes - -## Initial Population - -| Dimension | S2 FG + Carbon Stiffeners (Approach A) | Pure GFRP (Approach B, Shark M) | Factual Basis | -|-----------|---------------------------------------|--------------------------------|---------------| -| Radio transparency | Mostly transparent (FG skin is RF-transparent); carbon stiffeners create localized RF shadow zones; antenna placement must avoid CF elements | Fully transparent — entire fuselage passes RF; antenna placement unconstrained | Fact #3, #4, #5, #6, #14 | -| Radar transparency | Mostly transparent; CF stiffeners reflect radar → slight increase in RCS compared to pure FG | Fully radar-transparent (low RCS by transparency) | Fact #5, #14 | -| Impact survivability (single event) | FG skin absorbs impact well; CF stiffeners may crack/delaminate under localized impact; BVID risk in CF elements | FG absorbs impact, bends rather than cracks; no brittle CF elements; simpler damage profile | Fact #7, #8, #9 | -| Cumulative damage tolerance | FG skin handles repeated impacts; CF stiffeners accumulate micro-damage that is hard to detect | All-FG structure: damage is visible, cumulative tolerance is good, easier inspection | Fact #7, #8, #13 | -| Weight efficiency | Better — CF stiffeners provide stiffness at lower weight than equivalent FG stiffening | Heavier — must use thicker FG skins or more material to achieve same stiffness | Fact #10, #15, #16 | -| Structural stiffness | Higher — CF stiffeners ~5× stiffer per unit weight than FG | Lower — FG is more flexible; adequate for Shark M class but may need design compensation | Fact #10, #15 | -| Material cost | Higher — CF cloth is 5-10× FG cost; only stiffeners are CF, so total cost increase is moderate | Lower — all FG, significantly cheaper material cost | Fact #11 | -| Field repairability | FG skin: easy field repair; CF stiffeners: harder, needs specialized knowledge to repair | All components field-repairable with basic skills and epoxy patches | Fact #12, #13 | -| Proven track record | Not yet built/tested | 50,000+ operational hours, 1,200h maintenance-free, combat-proven | Fact #17 | -| Hidden failure modes | CF stiffener BVID after impact — invisible internal damage reduces strength | None specific to material; pure FG damage is generally visible | Fact #7, #13 | diff --git a/_standalone/UAV_frame_material/00_research/UAV_frame_material/04_reasoning_chain.md b/_standalone/UAV_frame_material/00_research/UAV_frame_material/04_reasoning_chain.md deleted file mode 100644 index 0839cd2..0000000 --- a/_standalone/UAV_frame_material/00_research/UAV_frame_material/04_reasoning_chain.md +++ /dev/null @@ -1,143 +0,0 @@ -# Reasoning Chain - -## Dimension 1: Radio Transparency - -### Fact Confirmation -Carbon fiber composite provides 30-52 dB electromagnetic shielding across UHF to X-band (Fact #3). This means a carbon fiber structural element blocks 99.9% to 99.999% of RF energy passing through it. S2 fiberglass is radio-transparent — standard radome material with dielectric constant ~4.4 at 8.5 GHz (Fact #4). The ACASIAS project proved that CFRP ribs in a GFRP panel create measurable electromagnetic interaction zones (Fact #14). - -### Reference Comparison -**Approach A (S2 FG + CF stiffeners)**: The FG skin areas are fully RF-transparent. However, carbon stiffeners running through the fuselage create discrete RF shadow zones. Any antenna placed near or behind a CF stiffener will experience 30-50 dB signal degradation in that direction. This constrains antenna placement — antennas must be positioned in FG-only zones between stiffeners. For a UAV with multiple antennas (C2 link, video downlink, GPS, telemetry, ADS-B), this creates a spatial planning challenge. The stiffener geometry defines "forbidden zones" for antenna placement. - -**Approach B (pure GFRP, Shark M)**: The entire fuselage is RF-transparent. Antennas can be placed anywhere inside or on the fuselage without material-induced signal blockage. GPS, C2, video, telemetry antennas have no placement constraints from the airframe material. This is confirmed by Shark M's operational success with 180 km communication range and EW resistance (Fact #6). - -### Conclusion -Pure GFRP has a clear advantage for radio transparency. The hybrid approach is workable but requires careful antenna placement engineering. For a reconnaissance UAV operating at long range (100+ km) in EW-contested environments, unconstrained antenna placement is a significant operational advantage. - -### Confidence -✅ High — supported by quantitative RF data, aerospace project (ACASIAS), and field experience - ---- - -## Dimension 2: Radar Transparency (Stealth) - -### Fact Confirmation -GFRP is inherently dielectric and transparent to radar waves (Fact #5). Carbon fiber, while not as reflective as metal, is conductive and reflects/scatters radar energy. The PD-2/Shark design philosophy explicitly leverages "fully composite airframe + absence of large metal parts" for "low radar visibility" (Fact #1). - -### Reference Comparison -**Approach A**: Carbon stiffeners create discrete radar-reflective structures inside the airframe. While individually small, their regular geometric pattern could create a detectable radar signature at certain angles — essentially a grid of conductive elements acting as a partial radar reflector. - -**Approach B**: Pure GFRP is essentially invisible to radar (the signal passes through). Radar cross section comes only from metallic components (engine, servos, connectors) and the payload, not the airframe itself. - -### Conclusion -For military reconnaissance in contested airspace, pure GFRP provides a stealth advantage. Carbon stiffeners slightly increase radar detectability. The magnitude depends on stiffener geometry and radar frequency, but the principle is clear: less conductive material = lower RCS. - -### Confidence -⚠️ Medium — the magnitude of RCS increase from stiffeners vs pure GFRP is not quantified; the principle is sound but the practical significance depends on adversary radar capabilities - ---- - -## Dimension 3: Parachute Landing Impact Survivability - -### Fact Confirmation -From solution_draft05: parachute landing impact energy ranges from 190 J (calm) to 762 J (8 m/s wind) to 1,499 J (12 m/s wind) for an 18 kg UAV. S2 glass fiber with cross-ply layup absorbs impact energy effectively with no penetration (Fact #9). Carbon fiber fails in a brittle manner — sudden delamination and fiber fracture (Fact #7). BVID in carbon fiber reduces structural integrity without visible signs (Fact #7). Fiberglass bends/deforms rather than cracking (Fact #8). - -### Reference Comparison -**Approach A (S2 FG + CF stiffeners)**: The FG skin absorbs belly landing impact well — it will dent, flex, and possibly crack locally but without catastrophic failure. However, if impact loads are transmitted to CF stiffeners (which they will be, since stiffeners carry structural loads), the CF elements may suffer BVID. This internal damage is invisible but weakens the structure progressively. After multiple parachute landings, CF stiffeners could accumulate micro-delaminations that are detectable only via ultrasound or tap testing. - -**Approach B (pure GFRP)**: The entire structure responds to impact by flexing and absorbing energy. Damage is typically visible (cracks, dents, whitening of the resin). No hidden BVID in brittle elements. The structure degrades gracefully — visible damage allows timely repair/replacement. - -### Conclusion -Pure GFRP is significantly better suited for repeated parachute landings. The key advantage is not just better single-impact performance, but the absence of hidden damage accumulation in brittle carbon elements. For a UAV expected to land on a parachute hundreds of times, this is critical. - -### Confidence -✅ High — supported by materials science (CF brittleness is well-documented) and field evidence (Shark M's 50,000+ hours with parachute landings) - ---- - -## Dimension 4: Cumulative Damage and Inspection - -### Fact Confirmation -Carbon fiber BVID requires non-destructive testing (ultrasound, Lamb wave techniques) to detect (Fact #7, #13). Fiberglass damage is generally visible — cracking, whitening, deformation (Fact #8, #12). Field inspection of FG requires visual inspection; field inspection of CF stiffeners requires ultrasonic equipment. - -### Reference Comparison -**Approach A**: After each parachute landing, operators should theoretically inspect CF stiffeners for BVID. In field conditions (battlefield, remote area), ultrasonic inspection is impractical. This creates a reliability risk — the aircraft may fly with undetected internal damage. - -**Approach B**: Visual inspection sufficient. Operators can see damage and decide whether to fly or repair. Simple tap-test can detect larger delaminations. No special equipment needed. - -### Conclusion -Pure GFRP provides much simpler damage inspection, which is critical for field operations. The inspection advantage compounds over the UAV's lifetime — hundreds of landings, each requiring either a quick visual check (GFRP) or an NDT scan (hybrid with CF). - -### Confidence -✅ High - ---- - -## Dimension 5: Weight Efficiency - -### Fact Confirmation -Carbon fiber density ~1.5-1.6 g/cm³ vs fiberglass 2.46-2.58 g/cm³ (Fact #10). CF is ~5× stiffer per unit weight. CF stiffeners achieve similar structural performance to pure CFRP with hybrid approach (Fact #15). Stiffener optimization achieves 60.9% stress reduction (Fact #16). - -### Reference Comparison -**Approach A**: CF stiffeners provide excellent stiffness at low weight. A few hundred grams of CF stiffeners can replace kilograms of FG stiffening. This is the primary reason for using the hybrid approach — to achieve the required wing/fuselage stiffness without the weight penalty of all-FG construction. - -**Approach B**: Must compensate for lack of CF stiffness with more FG material. This means thicker skins, more internal FG ribs, or geometric stiffening (corrugations, foam sandwich). Results in a heavier airframe for equivalent stiffness. Shark M achieves 14.5 kg MTOW with pure GFRP — so it works, but the user's UAV at 18 kg MTOW with heavier payload (Viewpro Z40K + electronics) may benefit from the weight savings of CF stiffeners. - -### Conclusion -Hybrid approach has a clear weight advantage. For a reconnaissance UAV maximizing endurance, every 100g saved translates to ~2-3 minutes additional flight time. If CF stiffeners save 300-800g vs equivalent pure GFRP stiffening, that's 6-24 minutes additional endurance. This is meaningful but not dramatic. - -### Confidence -✅ High — materials properties are well-established; the magnitude estimate depends on specific structural design - ---- - -## Dimension 6: Material Cost - -### Fact Confirmation -CF cloth is 5-10× more expensive than FG cloth per m² (Fact #11). In the hybrid approach, only stiffeners use CF — perhaps 10-20% of total composite material by area. - -### Reference Comparison -**Approach A**: Moderate cost increase. If total FG material cost for an airframe is ~$300-500, adding CF stiffeners might add $100-300 for the CF material itself, plus slightly more complex layup procedures. - -**Approach B**: All-FG, minimal material cost. Simplest manufacturing. - -### Conclusion -The cost difference is moderate, not dramatic. The hybrid approach costs more but not prohibitively so for a military UAV. - -### Confidence -✅ High - ---- - -## Dimension 7: Field Repairability - -### Fact Confirmation -FG/epoxy field repair requires no specialized training or vacuum equipment (Fact #12). CF repair is more complex, requiring specialized knowledge and ideally autoclave/vacuum bagging (Fact #13). - -### Reference Comparison -**Approach A**: If the FG skin is damaged, field repair is straightforward. If a CF stiffener is damaged, field repair is significantly harder — the operator may need to fabricate a CF patch, which requires proper layup, vacuum bagging, and controlled cure. In practice, a damaged CF stiffener in the field likely means the UAV is grounded until returned to base. - -**Approach B**: All damage is FG, all repairs are FG. Personnel with average manual skills can perform field repairs with epoxy and FG cloth patches. - -### Conclusion -Pure GFRP is much better for field repairability, especially in expeditionary/forward-deployed scenarios. This matters for the user's military use case. - -### Confidence -✅ High - ---- - -## Dimension 8: Proven Operational Track Record - -### Fact Confirmation -Shark M has 50,000+ operational hours with parachute landings in combat (Fact #17). The user has direct operational experience confirming radio transparency (Fact #6). The hybrid S2 FG + CF approach is unproven for this specific application. - -### Reference Comparison -**Approach A**: Novel design, not combat-proven. Introduces CF stiffeners which are new variables in the parachute-landing reliability equation. - -**Approach B**: Combat-proven in the most demanding environment possible (active warfare with EW, harsh conditions). - -### Conclusion -Pure GFRP has massive advantage in proven reliability. This cannot be understated — a combat-proven material system that demonstrably works for this exact mission profile (long-endurance reconnaissance, catapult + parachute, EW-contested) is extremely valuable evidence. - -### Confidence -✅ High diff --git a/_standalone/UAV_frame_material/00_research/UAV_frame_material/05_validation_log.md b/_standalone/UAV_frame_material/00_research/UAV_frame_material/05_validation_log.md deleted file mode 100644 index d8385ed..0000000 --- a/_standalone/UAV_frame_material/00_research/UAV_frame_material/05_validation_log.md +++ /dev/null @@ -1,61 +0,0 @@ -# Validation Log - -## Validation Scenario 1: Parachute Landing in 8 m/s Wind (200th landing) - -### Expected Based on Conclusions - -**Approach A (S2 FG + CF stiffeners)**: UAV lands at 9.2 m/s resultant velocity, 762 J impact energy. FG belly skin absorbs initial impact — possible crack, repairable with field patch. CF wing spar stiffeners experience shock loading. After 200 landings, accumulated micro-delamination in CF stiffeners is possible but invisible without NDT. The stiffeners might be at 70-90% original strength without any visible indication. Operator has no way to know without ultrasonic inspection. - -**Approach B (pure GFRP, Shark M style)**: Same impact conditions. FG belly absorbs impact — same crack/dent pattern. FG internal stiffening ribs absorb shock by flexing. Any damage is visible (cracking, whitening). After 200 landings, operator can visually assess the entire airframe and decide to replace worn components. No hidden degradation. - -### Actual Validation -The Shark M has performed thousands of such landings in operational service. The design is validated by 50,000+ hours of combat operations. The hidden damage accumulation scenario (Approach A) is a real engineering concern documented in aerospace literature (BVID in CFRP is a well-known problem). - -### Counterexamples -- Approach A could be validated if CF stiffeners are designed with high safety margins (oversized stiffeners that tolerate some delamination). This adds weight, partially negating the weight advantage. -- Some carbon fiber structures are designed for crash energy absorption (automotive) — but those are single-use absorbers, not reusable structural elements. - ---- - -## Validation Scenario 2: Long-Range Communication at 150 km, EW Environment - -### Expected Based on Conclusions - -**Approach A (S2 FG + CF stiffeners)**: C2 antenna inside fuselage. If antenna is placed between CF stiffeners (in FG-only zone), signal passes through FG skin with minimal attenuation. If antenna is near a CF stiffener, signal degrades by 30-50 dB in that direction → potential link loss. Requires careful antenna integration engineering during design. - -**Approach B (pure GFRP)**: C2 antenna placed anywhere inside fuselage. 360° RF coverage through fuselage. Signal attenuated only by FG dielectric properties (minimal). The Silvus radio modem in Shark M achieves 180 km range through the GFRP fuselage. - -### Actual Validation -Shark M demonstrates 180 km range with confirmed EW resistance. The user's direct experience confirms radio transparency. Shark M's Silvus-based communication system operates at full capability through the GFRP airframe. - -### Counterexamples -- The hybrid approach can achieve good RF performance if stiffeners are designed to avoid antenna zones. Many military UAVs use carbon fiber with external antennas successfully. -- If antennas are mounted externally (on wings, tail boom), the fuselage material is less critical for RF performance. However, external antennas are vulnerable to parachute landing damage and increase drag. - ---- - -## Validation Scenario 3: Weight-Critical Endurance Mission - -### Expected Based on Conclusions - -**Approach A (S2 FG + CF stiffeners)**: Lighter airframe by 300-800g. At 18 kg MTOW, this translates to larger battery or more fuel → 6-24 minutes additional endurance. For a 7-8h mission, this is 1-5% improvement. - -**Approach B (pure GFRP)**: Heavier airframe. Must compensate with slightly reduced payload or accept lower endurance. Shark M achieves 7h at 14.5 kg MTOW with pure GFRP — the user's UAV at 18 kg MTOW has different payload requirements. - -### Actual Validation -The weight difference is real but modest relative to total system weight. Shark M proves that 7h endurance is achievable with pure GFRP. The question is whether the user's heavier payload (Viewpro Z40K vs Shark's USG-231) makes the weight savings from CF stiffeners more critical. - -### Counterexamples -- If the user can meet endurance requirements with pure GFRP, the CF stiffeners are unnecessary complexity -- Weight savings might be achievable through other means (optimized FG layup, foam sandwich cores, lighter internal components) without introducing CF - ---- - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions actionable/verifiable -- [x] Field evidence (Shark M) validates pure GFRP approach -- [x] Weight trade-off quantified with reasonable estimates -- [ ] Note: Exact weight penalty of pure GFRP vs hybrid cannot be determined without detailed structural analysis of specific airframe geometry diff --git a/_standalone/UAV_frame_material/01_solution/solution_draft01.md b/_standalone/UAV_frame_material/01_solution/solution_draft01.md deleted file mode 100644 index 3bbca7c..0000000 --- a/_standalone/UAV_frame_material/01_solution/solution_draft01.md +++ /dev/null @@ -1,177 +0,0 @@ -# Solution Draft - -## Product Solution Description - -A custom-built electric fixed-wing reconnaissance UAV optimized for maximum flight endurance. The airframe uses **T700 carbon fiber composite sandwich construction** (CFRP skins over PVC foam cores for wings, CFRP monocoque for fuselage) with selective Kevlar reinforcement at impact zones. Powered by **semi-solid state batteries** (330 Wh/kg class), the platform carries a 1.47 kg reconnaissance payload (ADTI 20L V1 + Viewpro A40 Pro gimbal + Jetson Orin Nano Super + Pixhawk 6x). - -**Target performance**: 5-6 hours practical flight endurance, 8-10 kg MTOW, 2.5-3.5m wingspan. - -``` -┌─────────────────────────────────────────────────────────┐ -│ SYSTEM OVERVIEW │ -│ │ -│ CFRP Sandwich Wing (PVC foam core + T700 CF skin) │ -│ ┌──────────────────────────────────┐ │ -│ │ High-aspect-ratio wing │ │ -│ │ Wingspan: 3.0-3.2m │ │ -│ └──────────┬───────────────────────┘ │ -│ │ │ -│ ┌───────────────┴───────────────────┐ │ -│ │ CFRP Monocoque Fuselage │ │ -│ │ ┌─────────┐ ┌──────────────┐ │ │ -│ │ │ Battery │ │ Payload Bay │ │ │ -│ │ │ Bay │ │ (1.47 kg) │ │ │ -│ │ └─────────┘ └──────────────┘ │ │ -│ └───────────────┬───────────────────┘ │ -│ │ │ -│ ┌───────┴───────┐ │ -│ │ Motor + Prop │ │ -│ │ (pusher) │ │ -│ └───────────────┘ │ -│ │ -│ Power: Semi-solid state battery (Tattu 330Wh/kg) │ -│ Avionics: Pixhawk 6x + GPS │ -│ Compute: Jetson Orin Nano Super │ -└─────────────────────────────────────────────────────────┘ -``` - -## Existing/Competitor Solutions Analysis - -| Platform | MTOW | Endurance | Payload | Airframe Material | Battery | Price | -|----------|------|-----------|---------|-------------------|---------|-------| -| Applied Aeronautics Albatross | 10 kg | 4 hours | 4.5 kg | Fiberglass + Carbon fiber | LiPo | ~$8,000 (RTF) | -| DeltaQuad Evo | 10 kg | 4h32m (std) / 8h55m (record) | 1-3 kg | Fiberglass + Carbon + Kevlar | Semi-solid / Solid-state Li | ~$25,000+ | -| Penguin BE | <25 kg class | 110 min | 2.8 kg | Composite | Li-Ion | ~$30,000+ | -| SUX61 | ~11 kg | 91 min | 8 kg | Carbon fiber monocoque | LiPo | ~$5,000 (frame) | - -**Key takeaway**: DeltaQuad Evo demonstrates that semi-solid/solid-state batteries combined with composite airframe can achieve 8+ hours in this MTOW class. Our design targets a similar approach with a lighter payload (1.47 vs 3 kg), leaving more weight budget for batteries. - -## Architecture - -### Component: Frame Material - -| Solution | Tools | Advantages | Limitations | Requirements | Security | Cost | Fit | -|----------|-------|-----------|-------------|-------------|----------|------|-----| -| **T700 CFRP (recommended)** | T700 unidirectional + woven prepreg or dry fabric | 40-50% lighter than Al, specific stiffness 113, excellent fatigue life, corrosion-proof | Brittle under impact, requires specialized manufacturing, difficult field repair | Vacuum infusion or prepreg + oven cure, outsourced manufacturing | N/A | ~$18/m² material; $15-25k total airframe manufacturing | ✅ Best for endurance | -| Fiberglass (E-glass) | E-glass woven fabric + epoxy | Cheap (~$5/m²), easy to work, good impact tolerance, simple field repair | 40% heavier than CFRP for same stiffness, limits endurance | Basic workshop or outsource | N/A | ~$5/m²; $5-10k total | ⚠️ Weight penalty reduces endurance by ~1-2 hours | -| Carbon-Kevlar Hybrid | Hybrid woven fabric | Best crash survivability, 25-40% lighter than Al | Kevlar hard to machine, UV sensitive, expensive (~$30/m²) | Specialized cutting tools, UV-protective coating | N/A | ~$30/m²; $20-30k total | ⚠️ Overkill for cost; Kevlar benefits limited to impact zones | -| Aluminum 6061-T6 | CNC machining | Cheapest, easiest to manufacture, excellent repairability | Heaviest option (2.7 g/cm³), poor fatigue, reduces endurance 2-3 hours | CNC shop | N/A | ~$3-5k total | ❌ Weight kills endurance | - -**Recommendation**: T700 CFRP as primary structure with Kevlar patches at landing gear attach points and belly panel for crash protection (~100-200g weight addition). - -### Component: Construction Method - -| Solution | Tools | Advantages | Limitations | Requirements | Security | Cost | Fit | -|----------|-------|-----------|-------------|-------------|----------|------|-----| -| **Sandwich (foam core + CFRP skin) — recommended for wings** | PVC foam (Divinycell H60-H80), T700 fabric, vacuum infusion setup | Highest stiffness/weight ratio, 30% lighter than solid composite, excellent for wings | Requires quality core material, careful bonding | Vacuum pump, bagging film, infusion consumables | N/A | Core: ~$500-1000; total wing set: $5-8k | ✅ Best for wing endurance | -| Monocoque (solid CFRP shell) — recommended for fuselage | CFRP prepreg or wet layup over male mold | Good torsional rigidity, smooth aerodynamic surface, compact | Heavier than sandwich for same stiffness, needs precise molds | Female or male molds, oven cure | N/A | Molds: $3-5k; layup: $2-3k | ✅ Best for fuselage | -| Spar + Rib + Skin (traditional) | CNC-cut ribs, CF tube spars, film/fabric skin | Easy to prototype and modify, lightweight if well-designed | More labor-intensive, aerodynamic surface quality depends on skin | CNC router for ribs, CF tubes | N/A | $2-4k materials | ⚠️ Good for prototyping, inferior surface finish | - -**Recommendation**: Sandwich wings + monocoque fuselage. Outsource manufacturing to a composite prototyping service (e.g., Scabro Innovations, Refitech, or similar). - -### Component: Foam Core (for wing sandwich) - -| Solution | Tools | Advantages | Limitations | Requirements | Security | Cost | Fit | -|----------|-------|-----------|-------------|-------------|----------|------|-----| -| **PVC — Divinycell H60/H80 (recommended)** | Standard composite tools | Industry standard, good stiffness/weight, closed-cell moisture immune, handles 80°C cure | Not suitable for autoclave temps >100°C | Compatible with vacuum infusion and oven cure | N/A | ~$50-80/m² | ✅ Best value for prototype | -| Rohacell PMI | Standard composite tools | Highest stiffness/weight, handles autoclave temps (180°C+) | Very expensive, overkill for prototype | Same as PVC | N/A | ~$150-300/m² | ⚠️ Only for production optimization | -| XPS (extruded polystyrene) | Hot wire cutting | Cheapest, easy to shape, closed-cell | Lower compressive strength, limited to 75°C cure | Hot wire cutter | N/A | ~$10-20/m² | ⚠️ Budget option, acceptable for first prototype | -| EPS (expanded polystyrene) | Hot wire cutting | Cheapest available | Lowest strength, absorbs moisture, open-cell-like bead structure | Hot wire cutter | N/A | ~$5-10/m² | ❌ Not recommended for flight-critical parts | - -### Component: Battery Technology - -| Solution | Tools | Advantages | Limitations | Requirements | Security | Cost | Fit | -|----------|-------|-----------|-------------|-------------|----------|------|-----| -| **Semi-solid state — Tattu 330Wh/kg (recommended)** | Compatible charger (6S/12S balance) | 310 Wh/kg pack level, 800-1200 cycles, -20 to 60°C, 10C peak | Higher cost per Wh (~$0.50-0.80), limited supplier options | Standard balance charger, battery management | Fire safety: low thermal runaway risk | ~$800-1500/pack (est.) | ✅ Best for max endurance | -| Semi-solid state — Grepow 300Wh/kg | Compatible charger | 300 Wh/kg, 1200+ cycles, 2C charge, multiple configs | Slightly lower energy density than Tattu 330 | Standard balance charger | Fire safety: low risk | ~$700-1200/pack (est.) | ✅ Good alternative | -| Li-Ion 21700 Pack (custom) | Spot welder, BMS, pack assembly | 200-250 Wh/kg, 500-800 cycles, widely available, cheap cells | Lower energy density, requires custom pack building, 3-5C max discharge | BMS, spot welder, cell matching | Medium: requires proper BMS | ~$0.20-0.35/Wh | ⚠️ 20-30% less endurance than semi-solid | -| LiPo (traditional) | Standard RC charger | Cheapest, highest discharge rates (25-50C), widely available | 150-200 Wh/kg, 200-500 cycles, thermal sensitivity | Standard RC charger | Higher thermal runaway risk | ~$0.15-0.25/Wh | ❌ 40-50% less endurance than semi-solid | - -**Recommended configuration**: Tattu 330Wh/kg 6S 33000mAh × 1-2 packs (series or parallel depending on motor voltage requirements). -- 1 pack: 2324g, 732.6 Wh → estimated 4-5 hours practical endurance -- 2 packs (parallel): 4648g, 1465 Wh → estimated 6-7 hours practical (but may exceed MTOW) - -Optimal: single large 12S pack or purpose-selected configuration to stay within MTOW. - -### Component: Carbon Fiber Grade - -| Solution | Tools | Advantages | Limitations | Requirements | Security | Cost | Fit | -|----------|-------|-----------|-------------|-------------|----------|------|-----| -| **T700 (recommended)** | Standard composite tools | 4900 MPa tensile, 230 GPa modulus, good impact tolerance, industry standard for UAVs | Lower modulus than T800 | Standard resin systems | N/A | ~$18/m² | ✅ Best value | -| T800 | Standard composite tools | 5880 MPa tensile, 294 GPa modulus, 28% stiffer | 44% more expensive, more brittle, marginal weight gain at this scale | Same resin systems | N/A | ~$26/m² | ⚠️ Only for specific high-load elements | -| T300 | Standard composite tools | Cheapest, widely available | Significantly lower strength than T700 | Same resin systems | N/A | ~$12/m² | ❌ Insufficient for primary structure | - -## Weight Budget Estimate - -| Component | Weight (kg) | -|-----------|-------------| -| Bare airframe (CFRP sandwich wing + monocoque fuselage) | 2.8-3.2 | -| Motor + ESC + propeller | 0.4-0.6 | -| Wiring, connectors, misc hardware | 0.3-0.5 | -| Payload (camera + gimbal + Jetson + Pixhawk + GPS) | 1.47 | -| Battery (semi-solid, target) | 3.0-3.5 | -| **Total estimated** | **8.0-9.3** | -| MTOW limit | 10.0 | -| **Margin** | **0.7-2.0** | - -## Endurance Estimate - -**Assumptions**: -- MTOW: 9.0 kg (mid-range estimate) -- Cruise speed: 17 m/s -- L/D ratio: ~15 (high-aspect-ratio wing) -- Propulsive efficiency: 0.85 -- Battery: 3.2 kg semi-solid at 310 Wh/kg = 992 Wh -- Payload power: ~30W (Jetson 15-25W + camera/gimbal 10-15W) -- Cruise power: ~130W (aerodynamic) + 30W (payload) = ~160W total -- Battery reserve: 20% -- Usable energy: 992 × 0.80 = 794 Wh - -**Theoretical endurance**: 992 / 160 = 6.2 hours -**Practical endurance (with reserve + real-world losses)**: 794 / 160 ≈ **5.0 hours** - -**Range at cruise**: 5.0h × 17 m/s × 3.6 = **306 km** - -This is conservative. Optimization of airfoil, wing loading, and propulsion system could push practical endurance to 5.5-6.0 hours. - -## Testing Strategy - -### Integration / Functional Tests -- Static load test: wing spar to 3× max flight load (verify no failure at 3g) -- Ground vibration test: verify no flutter modes within flight envelope -- Range/endurance test: fly at cruise speed until 20% battery reserve, measure actual endurance vs predicted -- Payload integration test: verify all electronics (Jetson, Pixhawk, camera, gimbal) function correctly with airframe vibration -- CG range test: verify stable flight across full CG envelope - -### Non-Functional Tests -- Temperature endurance: ground soak at -10°C and +45°C, verify battery and avionics function -- Wind resistance: fly in 10-12 m/s sustained wind, verify controllability and endurance impact -- Hard landing test: drop from 1m at 2 m/s descent rate onto belly, verify structural integrity (Kevlar reinforcement zones) -- Battery cycle test: charge/discharge 50 cycles, verify capacity retention ≥95% -- EMI test: verify Jetson/camera does not interfere with GPS/telemetry - -## References - -1. UAVMODEL — Carbon Fiber Fixed Wing Drones: https://www.uavmodel.com/blogs/news/skyeye-sr260-fixed-wing-drone-2600mm-long-endurance-mapping-amp-inspection -2. SUX61 UAV Frame: https://aerojetparts.com/product/sux61-uav-frame-carbon-fiber-8kg-payload-91min-endurance/ -3. FAI — Vanilla UAV Flight Duration Record: https://www.fai.org/vanilla-uav-flight-duration-record -4. Springer — EPS-Fiber-Reinforced Composite Wing Analysis (2024): https://link.springer.com/10.1007/s11029-024-10185-3 -5. Grepow Semi-Solid Battery: https://www.grepow.com/semi-solid-state-battery/300wh-kg-series-high-energy-density-battery-pack.html -6. Tattu Semi-Solid Battery: https://tattuworld.com/semi-solid-state-battery/ -7. Herewin Semi-Solid Guide (2026): https://www.herewinpower.com/blog/solid-state-drone-batteries-ultimate-guide/ -8. Applied Aeronautics Albatross: https://www.appliedaeronautics.com/albatross-uav -9. KingRaysCarbon — CF vs Al: https://kingrayscarbon.com/carbon-fiber-vs-aluminum-for-drone-frames-which-performs-better/ -10. Dronecarbon — Kevlar vs CF: https://www.dronecarbon.com/kevlar-vs-carbon-fiber_a9075.html -11. Herewin — LFP vs LiPo vs Semi-Solid (2026): https://www.herewinpower.com/blog/lfp-vs-lipo-vs-semi-solid-industrial-drone-batteries-2026-roi-safety-and-performance/ -12. DeltaQuad Evo Specs: https://docs.deltaquad.com/gov/vehicle-specifications -13. DeltaQuad Evo 8h55m Record: https://uasweekly.com/2025/06/27/deltaquad-evo-sets-record-with-8-hour-flight-endurance-for-electric-vtol-uas-milestone/ -14. T700 vs T800 Guide: https://www.carbonfibermaterial.com/t700-vs-t800-carbon-fiber-a-practical-guide-for-material-selection/ -15. CFRP Manufacturing Comparison (Indonesian J. Aerospace): https://ejournal.brin.go.id/ijoa/article/view/286 -16. Rohacell vs Foam Cores — Chem-Craft: https://chem-craft.com/blog/comparative-analysis-rohacell-vs-traditional-materials-in-composite-engineering/ -17. Carbon-Kevlar Hybrid: https://ictmaterial.com/what-is-carbon-kevlar-hybrid-fabric-properties-and-use-cases/ -18. Scabro Innovations — UAV Prototyping: https://scabroinnovations.com/diensten/composite-airframe-prototyping/ -19. Tattu 330Wh/kg 6S Specs: https://www.tattuworld.com/semi-solid-state-battery/semi-solid-330wh-kg-33000mah-22-2v-10c-6s-battery.html -20. ASTM F3563-22: https://www.astm.org/f3563-22.html - -## Related Artifacts -- AC Assessment: `_standalone/UAV_frame_material/00_research/UAV_frame_material/00_ac_assessment.md` diff --git a/_standalone/UAV_frame_material/01_solution/solution_draft02.md b/_standalone/UAV_frame_material/01_solution/solution_draft02.md deleted file mode 100644 index c7a7141..0000000 --- a/_standalone/UAV_frame_material/01_solution/solution_draft02.md +++ /dev/null @@ -1,428 +0,0 @@ -# Solution Draft (Rev 02) - -## Revised Constraints (vs Draft 01) - -| Constraint | Draft 01 | Draft 02 | -|-----------|----------|----------| -| Cost per unit | $100k prototype | < $7k, target < $5k | -| Material | CFRP (T700) | S2 fiberglass (radio transparent) | -| Radio transparency | Not considered | Required — full RF transparency for GPS, telemetry, data links | -| Flight time | 5-6 hours target | Same if possible, can be less | -| Transport | Not specified | Disassembled fits in car trunk; 2 planes per pickup truck | - -## Product Solution Description - -A modular, radio-transparent electric fixed-wing reconnaissance UAV built with **S2 fiberglass/foam-core sandwich construction** with internal **carbon fiber spar reinforcement**. Designed for field deployment — disassembles into 3 sections (2 wing panels + fuselage) that fit in a car trunk, with 2 complete aircraft fitting in a standard pickup truck bed. Powered by semi-solid state batteries for maximum endurance. - -**Target performance**: 3.5-5 hours practical flight endurance, 9-10 kg MTOW, ~3m wingspan, < $5k BOM per unit. - -``` -┌──────────────────────────────────────────────────────────────┐ -│ MODULAR AIRFRAME LAYOUT │ -│ │ -│ LEFT WING PANEL FUSELAGE RIGHT WING PANEL │ -│ (~1.5m span) (~1.0-1.1m) (~1.5m span) │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ -│ │ S2 FG skin │ │ S2 FG skin │ │ S2 FG skin │ │ -│ │ PVC foam core│◄─►│ Battery bay │◄─►│ PVC foam core│ │ -│ │ CF spar cap │ │ Payload bay │ │ CF spar cap │ │ -│ │ (internal) │ │ Motor+ESC │ │ (internal) │ │ -│ └──────────────┘ └──────────────┘ └──────────────┘ │ -│ │ -│ Wing-fuselage joint: aluminum spar joiner + 2 pin locks │ -│ Assembly time target: < 10 minutes │ -│ Material: S2 fiberglass = RF transparent (GPS/telemetry OK) │ -│ Internal CF spar: minimal RF impact (narrow linear element) │ -└──────────────────────────────────────────────────────────────┘ - -TRANSPORT CONFIGURATION (standard pickup truck, 6.5ft bed): -┌───────────────────────────────────────────┐ -│ Truck bed: 198cm × 130cm (wheel wells) │ -│ ┌──────────────────┐ ┌────────────────┐ │ -│ │ Plane 1 wings │ │ Plane 2 wings │ │ -│ │ (2 × 150cm long) │ │ (2 × 150cm) │ │ -│ │ stacked ~20cm │ │ stacked ~20cm │ │ -│ ├──────────────────┤ ├────────────────┤ │ -│ │ Plane 1 fuselage │ │ Plane 2 fuse. │ │ -│ │ (~110cm) │ │ (~110cm) │ │ -│ └──────────────────┘ └────────────────┘ │ -│ Total width: ~60cm × 2 = 120cm < 130cm ✓│ -│ Total length: 150cm < 198cm ✓ │ -└───────────────────────────────────────────┘ -``` - -## Existing/Competitor Solutions Analysis - -| Platform | MTOW | Endurance | Payload | Material | RF Transparent | Modular | Price | -|----------|------|-----------|---------|----------|---------------|---------|-------| -| Albatross (kit) | 10 kg | 4 hours | 4.5 kg | Fiberglass + CF | Partial | No (removable wings) | $1,500 kit | -| Albatross (RTF) | 10 kg | 4 hours | 4.5 kg | Fiberglass + CF | Partial | No | $4,800 | -| DeltaQuad Evo | 10 kg | 4.5h / 8.9h record | 1-3 kg | FG + CF + Kevlar | Partial | Wing removable | $25,000+ | -| Skywalker X8 | ~4 kg | 45-60 min | 1-2 kg | EPO foam | Yes | No | $489-598 | -| Mugin 2600 | 15 kg | 1.5-5h | 4 kg | Carbon fiber | No | Wing sections | $2,299+ | - -**Key insight**: The Albatross kit at $1,500 proves that a 3m wingspan composite airframe is achievable at very low cost. Our target of < $5k per complete unit (with batteries) is realistic. No competitor offers the combination of radio transparency + modular transport + semi-solid batteries. - -## Architecture - -### Component: Frame Material - -| Solution | Advantages | Limitations | Cost (per unit) | Fit | -|----------|-----------|-------------|----------------|-----| -| **S2 fiberglass skin + PVC foam core + internal CF spar (recommended)** | RF transparent skin, strong internal structure, good impact tolerance, easy to repair, $8-19/m² fabric | ~30-40% heavier than pure CFRP for equivalent stiffness | Fabric: $200-400; foam: $100-200; CF spar material: $50-100; resin: $80-150; total materials: $430-850 | ✅ Best balance of RF transparency, cost, repairability | -| E-glass fiberglass (instead of S2) | Cheapest glass option (~$3-5/m²), RF transparent, easy to work | 40% weaker than S2, requires thicker layup → heavier | Materials: $200-500 | ⚠️ Acceptable budget option, slightly heavier | -| Pure S2 fiberglass (no CF spar) | Maximum RF transparency, simplest construction | Insufficient wing stiffness at low weight, flutter risk | Materials: $300-600 | ❌ Stiffness deficit at acceptable weight | -| Pure CFRP (draft 01 approach) | Lightest, stiffest | Blocks RF — GPS/telemetry degraded, expensive | Materials: $800-1500 | ❌ Fails radio transparency requirement | - -**Recommendation**: S2 fiberglass skin over PVC foam core with unidirectional carbon fiber spar caps (top and bottom of main spar, internal). The CF spar is a narrow linear element (~20-30mm wide per cap) inside the wing — negligible RF blockage. All external surfaces are S2 FG = fully radio transparent. - -### Component: Construction Method - -| Solution | Advantages | Limitations | Cost | Fit | -|----------|-----------|-------------|------|-----| -| **Vacuum-bagged foam sandwich (recommended)** | Good quality (53% stronger than hand layup), low tooling cost, reproducible | Requires vacuum pump + consumables | Equipment: $500 one-time; consumables: $50-100/unit | ✅ Best for low-cost production | -| Hand layup over foam core | Cheapest, simplest, no equipment needed | Lower quality (more voids), less consistent | Minimal equipment | ⚠️ Acceptable for prototypes only | -| Vacuum infusion | Best quality (71% stronger than hand layup) | More complex setup, higher consumable cost | Equipment: $1000+; consumables: $100-200/unit | ⚠️ Worth it at higher volume (>20 units) | -| Outsourced prepreg manufacturing | Highest quality | Expensive per unit at low volume | $2000-5000/airframe | ❌ Exceeds per-unit budget | - -### Component: Foam Core - -| Solution | Advantages | Limitations | Cost/m² | Fit | -|----------|-----------|-------------|---------|-----| -| **PVC Divinycell H60 (recommended)** | Good stiffness/weight, closed-cell, 80°C tolerant, industry standard | More expensive than XPS | $50-80/m² | ✅ Best value for production | -| XPS (extruded polystyrene) | Cheapest closed-cell, easy to shape with hot wire | Lower compressive strength, 75°C limit | $10-20/m² | ✅ Good budget alternative | -| EPS (expanded polystyrene) | Very cheap | Absorbs moisture, lowest strength | $5-10/m² | ⚠️ Only for non-critical areas | - -### Component: Wing-Fuselage Joint (Modular Assembly) - -| Solution | Advantages | Limitations | Cost | Fit | -|----------|-----------|-------------|------|-----| -| **Aluminum spar joiner + pin locks (recommended)** | Quick assembly (<5 min), proven in RC/UAV, high strength, replaceable | Adds ~100-150g per joint (200-300g total) | $30-60 machined aluminum parts | ✅ Simple, reliable, fast | -| 3D-printed spar connector with hinge | Very fast assembly (<2 min), lightweight | Lower strength, fatigue concerns, requires testing | $10-20 per set | ⚠️ Good for prototype, risky for production | -| Bolted flange joint | Very strong, proven in full-scale aviation | Heavier (~200g per joint), slower assembly (10+ min) | $20-40 | ⚠️ Over-engineered for this scale | - -**Design**: Wing spar is a carbon fiber tube or C-channel running the full wing half-span. At the root, it slides into an aluminum joiner tube embedded in the fuselage. Secured with 2 quick-release pins (top and bottom). Electrical connections (servo leads) via a quick-disconnect plug at each wing root. - -### Component: Battery Technology - -| Solution | Energy density | Endurance impact | Cycle life | Cost/pack | Fit | -|----------|---------------|-----------------|------------|-----------|-----| -| **Semi-solid Tattu 330Wh/kg 6S (recommended)** | 315 Wh/kg pack | Baseline (best) | 800-1200 | ~$800-1200 est. | ✅ Best endurance per $ | -| Semi-solid Grepow 300Wh/kg 6S | 280-300 Wh/kg pack | -5 to -10% | 1200+ | ~$700-1000 est. | ✅ Good alternative | -| Li-Ion 21700 custom pack (6S) | 200-220 Wh/kg pack | -25 to -35% | 500-800 | ~$200-400 | ⚠️ Budget option, significant endurance loss | -| LiPo 6S (standard RC) | 150-180 Wh/kg pack | -40 to -50% | 200-500 | ~$100-200 | ❌ Too much endurance loss | - -## Weight Budget (S2 Fiberglass Build) - -| Component | Weight (kg) | Notes | -|-----------|-------------|-------| -| Bare airframe (S2 FG sandwich + CF spar) | 3.8-4.5 | ~30% heavier than pure CFRP; Albatross FG+CF is 3.35 kg | -| Wing joints (aluminum) | 0.2-0.3 | Spar joiners + pins + quick-disconnect plugs | -| Motor + ESC + propeller | 0.4-0.6 | | -| Wiring, connectors, misc | 0.3-0.4 | | -| **Platform subtotal** | **4.7-5.8** | | -| Payload (camera + gimbal + Jetson + Pixhawk + GPS) | 1.47 | Fixed | -| Battery (semi-solid) | 2.7-3.8 | Remainder to MTOW | -| **Total (target MTOW 10 kg)** | **~10.0** | | - -Conservative estimate: platform 5.3 kg + payload 1.47 kg + battery 3.2 kg = 9.97 kg. - -## Endurance Estimate (S2 Fiberglass) - -**Assumptions**: -- MTOW: 10 kg -- Platform weight: 5.3 kg (S2 FG airframe + motor + wiring + joints) -- Payload: 1.47 kg -- Battery: 3.23 kg semi-solid at 310 Wh/kg = 1001 Wh -- Cruise power: ~140W (slightly higher than CFRP due to heavier aircraft → higher induced drag) -- Payload power: ~30W (Jetson + camera + gimbal) -- Total system power: ~170W -- Battery reserve: 20% -- Usable energy: 1001 × 0.80 = 801 Wh -- Real-world efficiency factor: 0.75 - -**Theoretical endurance**: 1001 / 170 = 5.9 hours -**Practical endurance (with reserve)**: 801 / 170 ≈ **4.7 hours** -**Practical endurance (with reserve + real-world losses)**: 801 × 0.75 / 170 ≈ **3.5 hours** - -**Comparison to Draft 01 (CFRP)**: -- Draft 01: 5.0 hours practical → Draft 02: 3.5-4.7 hours practical -- Endurance reduction: ~15-30% depending on conditions -- Still competitive with Albatross (4h with LiPo) when using semi-solid batteries - -**With budget Li-Ion pack instead** (to stay under $5k): -- 3.23 kg Li-Ion at 210 Wh/kg = 678 Wh → usable 542 Wh -- Practical: 542 / 170 ≈ **3.2 hours** (reserve only) / **2.4 hours** (worst case) - -## BOM Cost Estimate (Per Unit) - -| Component | Low Est. | High Est. | Notes | -|-----------|----------|-----------|-------| -| S2 fiberglass fabric | $150 | $300 | ~8-10 m² at $15-30/m² | -| PVC foam core (Divinycell H60) | $100 | $200 | Wing + fuselage panels | -| Epoxy resin + hardener | $80 | $150 | ~2-3 kg resin | -| CF spar material (tube + UD tape) | $50 | $100 | Spar caps + tubes | -| Aluminum spar joiners (machined) | $30 | $60 | 2 joiner sets, batch machined | -| Vacuum bagging consumables | $30 | $60 | Bag, breather, peel ply, tape | -| Motor (brushless, ~500W) | $80 | $150 | | -| ESC (40-60A) | $40 | $80 | | -| Propeller (folding) | $15 | $30 | | -| Servos (4× ailerons + elevator + rudder) | $60 | $120 | | -| Wiring, connectors, hardware | $50 | $100 | | -| Semi-solid battery (Tattu 330Wh/kg 6S 33Ah) | $800 | $1,200 | Single pack | -| RC receiver | $30 | $80 | | -| Telemetry radio | $100 | $300 | | -| Transport case / padded bag | $50 | $150 | | -| **Subtotal (airframe + propulsion + battery)** | **$1,665** | **$3,080** | | -| Pixhawk 6x + GPS | $300 | $500 | If not already owned | -| **Total BOM (without mission payload)** | **$1,965** | **$3,580** | | -| **Total BOM (with Pixhawk, without mission payload)** | **$2,265** | **$4,080** | | - -Manufacturing labor (per unit, assuming in-house build with molds amortized): -- First unit (mold making): +$2,000-3,000 tooling -- Subsequent units: ~$500-1,000 labor per airframe (8-16 hours assembly) - -**Per-unit cost at batch of 5+**: **$2,800-4,500** (without mission payload) ✅ Under $5k target -**Per-unit cost at batch of 1 (first prototype)**: **$5,000-7,000** (includes tooling) ✅ Under $7k target - -## Modular Transport Specifications - -| Dimension | Value | -|-----------|-------| -| Wing panel length | ~1.50 m (half-span) | -| Wing panel chord | ~0.25-0.30 m | -| Wing panel thickness | ~0.04-0.05 m | -| Fuselage length | ~1.00-1.10 m | -| Fuselage width/height | ~0.15-0.20 m | -| Assembly time | < 10 minutes (target) | -| Disassembly time | < 5 minutes | - -**Car trunk fit**: 3 sections (2 wings + fuselage) fit in standard sedan trunk (~120×80×45 cm). Wings stack flat, fuselage alongside. ✅ - -**Pickup truck (2 planes)**: Standard 6.5ft bed (198×130 cm between wheel wells). Each plane's longest component is 150 cm (< 198 cm bed length). Two planes side by side need ~120 cm width (< 130 cm between wheel wells). ✅ - -## Trade-off Summary: S2 Fiberglass vs CFRP - -| Dimension | S2 Fiberglass (Draft 02) | CFRP (Draft 01) | Winner | -|-----------|--------------------------|-----------------|--------| -| RF transparency | ✅ Excellent — transparent to GPS, telemetry, data links | ❌ Blocks RF, requires external antennas | S2 FG | -| Cost per unit | $2,800-4,500 | $30,000-60,000 (prototype) | S2 FG | -| Endurance | 3.5-4.7 hours practical | 5.0 hours practical | CFRP (+15-30%) | -| Airframe weight | 3.8-4.5 kg bare | 2.8-3.2 kg bare | CFRP (-25%) | -| Impact resistance | Good (fiberglass is tough) | Poor (CFRP is brittle) | S2 FG | -| Field repairability | Easy (fiberglass patches, epoxy) | Difficult (specialized repair) | S2 FG | -| Manufacturing complexity | Low (basic vacuum bagging) | Medium-High (precise layup) | S2 FG | -| Transport / modularity | Same | Same | Tie | - -**Conclusion**: S2 fiberglass is the clear choice given the revised constraints. The 15-30% endurance reduction vs CFRP is offset by radio transparency (critical for the mission), 10x lower cost, and significantly easier manufacturing and field repair. - -## Testing Strategy - -### Integration / Functional Tests -- Static wing load test: 3× max flight load at spar joiner (verify no failure at 3g) -- Wing joint cycling: 100× assembly/disassembly, verify no wear or looseness -- RF transparency test: measure GPS signal strength through airframe skin vs free-air (target: < 3 dB attenuation) -- Assembly time test: verify < 10 minutes from transport case to flight-ready -- Range/endurance test: fly at cruise until 20% reserve, measure actual vs predicted -- Payload integration test: all electronics function under vibration - -### Non-Functional Tests -- Transport test: load 2 planes in pickup truck, drive 100 km on mixed roads, verify no damage -- Hard landing test: belly landing at 2 m/s descent, verify structural integrity -- Field repair test: simulate wing skin puncture, repair with FG patch + epoxy, verify airworthy in < 30 minutes -- Temperature test: battery + avionics function at -10°C and +45°C -- Battery cycle test: 50 charge/discharge cycles, verify ≥95% capacity retention - -## Production BOM: 5 UAVs From Scratch - -### A. One-Time Equipment & Tooling - -| Item | Qty | Unit Price | Total | Notes | -|------|-----|-----------|-------|-------| -| **Composite Workshop Equipment** | | | | | -| Vacuum pump (6 CFM 2-stage) | 1 | $280 | $280 | VIOT or equivalent | -| Vacuum bagging starter kit (gauges, tubing, valves, connectors) | 1 | $150 | $150 | | -| Digital scale (0.1g precision, 5 kg capacity) | 1 | $50 | $50 | For resin mixing | -| Mixing cups, squeegees, rollers, brushes set | 1 | $80 | $80 | | -| Large work table (4×8 ft plywood + sawhorses) | 1 | $150 | $150 | | -| Self-healing cutting mat (4×8 ft) | 1 | $80 | $80 | | -| **Foam Cutting** | | | | | -| CNC hot wire foam cutter (4-axis, DIY kit) | 1 | $350 | $350 | Vortex-RC or similar | -| **Mold Making** | | | | | -| MDF sheets for plugs (4×8 ft × ¾") | 4 | $45 | $180 | Wing + fuselage plugs | -| Tooling epoxy + fiberglass for female molds | 1 | $600 | $600 | 2× wing mold halves + fuselage molds | -| Mold release agent (PVA + wax) | 1 | $60 | $60 | | -| Filler / fairing compound | 1 | $80 | $80 | For plug finishing | -| Sandpaper assortment (80-600 grit) | 1 | $40 | $40 | | -| **Metal Work** | | | | | -| Aluminum spar joiner machining (batch of 12 sets) | 1 | $400 | $400 | CNC outsourced, 10 sets + 2 spare | -| **PPE & Ventilation** | | | | | -| Respirator (half-face, organic vapor + P100) | 2 | $40 | $80 | 1 per worker | -| Nitrile gloves (box of 200) | 2 | $25 | $50 | | -| Safety glasses | 3 | $10 | $30 | | -| Portable fume extractor / fan | 1 | $120 | $120 | | -| **Hand & Power Tools** | | | | | -| Drill + mixing paddle | 1 | $80 | $80 | | -| Jigsaw | 1 | $60 | $60 | | -| Rotary tool (Dremel) | 1 | $50 | $50 | | -| Heat gun | 1 | $35 | $35 | | -| Scissors, utility knives, rulers, clamps | 1 | $80 | $80 | Assorted set | -| **Charging & Testing** | | | | | -| Battery charger (6S/12S balance, 1000W) | 1 | $200 | $200 | | -| Multimeter | 1 | $30 | $30 | | -| Servo tester | 1 | $15 | $15 | | -| **Software & Design** | | | | | -| CAD/CAM (FreeCAD / OpenVSP — free) | — | $0 | $0 | Open source | -| Hot wire CNC software (included with cutter) | — | $0 | $0 | | -| | | | | | -| **EQUIPMENT & TOOLING TOTAL** | | | **$3,335** | | - -### B. Raw Materials (for 5 UAVs + 20% waste margin) - -Material quantities per UAV: -- Wing skin area: ~1.6 m² planform × 2 (top+bottom) × 2 layers = ~6.4 m² S2 fabric -- Fuselage skin: ~0.6 m² × 2 layers = ~1.2 m² -- Tail surfaces: ~0.3 m² × 2 layers = ~0.6 m² -- Total S2 fabric per UAV: ~8.2 m² → with waste: ~10 m² -- Foam core per UAV: ~2.5 m² (wings + tail) -- Resin per UAV: ~2.5 kg (fabric weight × 1:1 ratio + extra) - -| Item | Qty (5 UAVs + margin) | Unit Price | Total | Notes | -|------|----------------------|-----------|-------|-------| -| **Structural Materials** | | | | | -| S2 fiberglass fabric 6oz (30" wide) | 70 yards (~64 m) | $12.50/yard | $875 | ~10 m² per UAV × 5 + waste | -| PVC foam Divinycell H60 10mm (1.22×0.81m sheets) | 16 sheets | $40/sheet | $640 | ~2.5 m² per UAV × 5 + waste | -| Laminating epoxy resin (West System 105 or equiv) | 4 gallons | $125/gal | $500 | ~2.5 kg resin per UAV | -| Epoxy hardener | 2 gallons | $80/gal | $160 | | -| Carbon fiber tube (spar, 20mm OD, 1.5m) | 12 | $25 each | $300 | 2 per UAV + spare | -| Carbon fiber UD tape (spar caps, 25mm wide) | 30 m | $5/m | $150 | 5m per UAV + spare | -| **Vacuum Bagging Consumables** | | | | | -| Vacuum bag film (5m × 1.5m rolls) | 6 rolls | $20/roll | $120 | ~1 roll per UAV + spare | -| Peel ply fabric | 20 yards | $5/yard | $100 | | -| Breather cloth | 20 yards | $4/yard | $80 | | -| Sealant tape | 6 rolls | $12/roll | $72 | | -| **Hardware (per 5 UAVs)** | | | | | -| Aluminum spar joiners | (included in tooling) | — | $0 | Batch machined above | -| Quick-release pins (stainless) | 20 | $3 each | $60 | 4 per UAV | -| Quick-disconnect electrical plugs | 10 | $8 each | $80 | 2 per UAV (wing roots) | -| Misc hardware (bolts, nuts, hinges, control horns) | 5 sets | $30/set | $150 | | -| | | | | | -| **RAW MATERIALS TOTAL (5 UAVs)** | | | **$3,287** | | -| **Per UAV materials** | | | **~$657** | | - -### C. Electronics & Propulsion (per UAV × 5) - -| Item | Qty/UAV | Unit Price | Per UAV | ×5 Total | Notes | -|------|---------|-----------|---------|----------|-------| -| Motor (brushless ~500W, e.g. Dualsky XM5050EA) | 1 | $90 | $90 | $450 | Fixed-wing optimized | -| ESC (40-60A, BLHeli) | 1 | $50 | $50 | $250 | | -| Folding propeller (13×8 or similar) | 2 | $15 | $30 | $150 | 1 spare per UAV | -| Servos (digital metal gear, 15-20 kg·cm) | 5 | $25 | $125 | $625 | 2× aileron + elevator + rudder + flap/spare | -| Pixhawk 6X Mini + GPS | 1 | $380 | $380 | $1,900 | | -| RC receiver (long range, e.g. TBS Crossfire) | 1 | $60 | $60 | $300 | | -| RFD900x telemetry pair (shared GCS unit) | 1 air + 0.2 GCS | $170 (air) | $170 | $850 + $350 GCS = $1,200 | 1 GCS module shared | -| Power distribution board + BEC | 1 | $25 | $25 | $125 | | -| Wiring, connectors (XT90, JST, servo ext.) | 1 set | $40 | $40 | $200 | | -| Semi-solid battery (Tattu 330Wh/kg 6S 33Ah) | 1 | $732 | $732 | $3,660 | | -| | | | | | | -| **ELECTRONICS TOTAL (5 UAVs)** | | | | **$8,910** | | -| **Per UAV electronics** | | | **~$1,702** | | Excl. shared GCS telemetry | - -### D. Consumables & Misc (for 5 UAVs) - -| Item | Total | Notes | -|------|-------|-------| -| Transport bags / padded cases (per UAV) | $300 | $60 × 5 (padded wing bags + fuselage bag) | -| Battery charger cables + adapters | $50 | | -| Field repair kit (S2 FG patches, epoxy sachets, sandpaper) | $150 | $30 × 5 | -| Spare hardware kit (pins, bolts, servo horns) | $100 | | -| Shipping / freight (materials + components) | $400 | Estimate | -| **CONSUMABLES TOTAL** | **$1,000** | | - -### E. Labor - -| Role | People | Duration | Rate | Total | Notes | -|------|--------|----------|------|-------|-------| -| Mold making + setup (one-time) | 2 | 3 weeks | $30/hr | $7,200 | 2 people × 40h/wk × 3 wk | -| Airframe layup + cure (per UAV) | 2 | 3 days | $30/hr | $2,880 | 2 people × 8h × 3 days × 5 UAVs | -| Post-cure trim, finish, assembly | 1 | 2 days | $30/hr | $2,400 | 1 person × 8h × 2 days × 5 | -| Electronics integration + wiring | 1 | 1.5 days | $35/hr | $2,100 | 1 person × 8h × 1.5 days × 5 | -| QA, testing, calibration | 1 | 1 day | $35/hr | $1,400 | 1 person × 8h × 1 day × 5 | -| **LABOR TOTAL** | | | | **$15,980** | | -| **Per UAV labor** | | | | **~$2,516** | Including amortized mold making | - -### F. Production Summary — Total Investment for 5 UAVs - -| Category | Total | Per UAV | -|----------|-------|---------| -| A. Equipment & Tooling (one-time) | $3,335 | $667 | -| B. Raw Materials | $3,287 | $657 | -| C. Electronics & Propulsion | $8,910 | $1,782 | -| D. Consumables & Misc | $1,000 | $200 | -| E. Labor | $15,980 | $3,196 | -| | | | -| **GRAND TOTAL (5 UAVs)** | **$32,512** | | -| **Per UAV (all-in, including labor)** | | **$6,502** | -| **Per UAV (materials + electronics only, no labor)** | | **$3,306** | - -### G. Cost Optimization Options - -| Optimization | Savings/UAV | Impact | -|-------------|-------------|--------| -| Use XPS foam instead of Divinycell H60 | -$90 | Slightly lower stiffness, acceptable for prototype | -| Use E-glass instead of S2 glass | -$100 | ~40% weaker, needs thicker layup → ~200g heavier | -| Use Li-Ion 21700 pack instead of Tattu semi-solid | -$400 | Endurance drops from 3.5-4.7h to 2.4-3.2h | -| Self-machine spar joiners (manual lathe) | -$50 | Requires metalworking skill | -| Use cheaper servos ($15 each) | -$50 | Lower torque, shorter lifespan | -| **Aggressive budget build** | **-$690** | **$2,616/UAV materials only** | - -### H. Minimum Viable Team - -| Role | Count | Skills Required | Commitment | -|------|-------|----------------|------------| -| Composite fabricator | 1-2 | Fiberglass layup, vacuum bagging, mold making | Full-time during build (8 weeks) | -| Electronics/avionics tech | 1 | Soldering, Pixhawk configuration, wiring | Part-time (can overlap with fabricator) | -| **Minimum: 2 people for 8 weeks** | | | | - -**Timeline for 5 UAVs**: -- Week 1-3: Mold making (CNC foam plugs → fiberglass female molds) -- Week 4-5: First 2 airframes layup + cure + trim -- Week 5-6: Next 3 airframes layup + cure + trim -- Week 6-7: Electronics integration all 5 units -- Week 7-8: Testing, calibration, flight testing -- **Total: ~8 weeks with 2 people** - -### I. Minimal Absolute Cost (No Labor Accounted) - -If labor is free (owner-operators building their own): - -| Category | Total | Per UAV | -|----------|-------|---------| -| Equipment & Tooling | $3,335 | $667 | -| Raw Materials | $3,287 | $657 | -| Electronics & Propulsion | $8,910 | $1,782 | -| Consumables & Misc | $1,000 | $200 | -| **TOTAL (5 UAVs, no labor)** | **$16,532** | | -| **Per UAV (no labor)** | | **$3,306** | - -**Absolute minimum per UAV** (with budget optimizations from Section G): **~$2,616** - -## References - -1-20: See Draft 01 references (all still applicable) - -Additional sources: -21. S-Glass vs E-Glass comparison: https://wiki-science.blog/s-glass-vs-e-glass-key-differences -22. Reinforcement Fiber Reference: https://explorecomposites.com/materials-library/fiber-ref/ -23. S-Glass vs Carbon Fiber: https://carbonfiberfriend.com/s-glass-vs-carbon-fiber/ -24. RF Attenuation by composite materials: https://www.rocketryforum.com/threads/rf-attenuation-by-body-tube-nosecone.186634/ -25. Russian foamplast UAV (max radio transparency): https://bulgarianmilitary.com/2023/10/15/russia-unveils-foamplast-fpv-uav-with-max-radio-transparency/ -26. Albatross UAV Kit: https://store.appliedaeronautics.com/albatross-uav-kit/ -27. UAV spar connector development: https://www.konelson.net/home/spar-connector-development -28. Scabro Innovations UAV prototyping: https://scabroinnovations.com/diensten/composite-airframe-prototyping/ -29. Tattu 330Wh/kg 6S pricing — GenStattu: https://genstattu.com/tattu-semi-solid-state-330wh-kg-33000mah-10c-22-2v-6s1p-g-tech-lipo-battery-pack-with-xt90-s-plug/ -30. Pixhawk 6X pricing — Holybro: https://holybro.com/products/pixhawk-6x-rev3 -31. RFD900x pricing — DrUAV: https://druav.com/products/rfdesign-rfd900x -32. Composite workshop setup — Fibre Glast: https://www.fibreglast.com/blogs/learning-center/setting-up-a-composite-shop -33. CNC hot wire foam cutter — Vortex-RC: https://www.vortex-rc.com/product/4-axis-diy-hot-wire-cnc-for-rc-hobbyists-aeromodellers-and-designers/ -34. Composite mold making — Canuck Engineering: https://www.canuckengineering.com/capabilities/composite-molds/ diff --git a/_standalone/UAV_frame_material/01_solution/solution_draft03.md b/_standalone/UAV_frame_material/01_solution/solution_draft03.md deleted file mode 100644 index 5ffc3ba..0000000 --- a/_standalone/UAV_frame_material/01_solution/solution_draft03.md +++ /dev/null @@ -1,489 +0,0 @@ -# Solution Draft (Rev 03) — 8+ Hour Endurance - -## Assessment Findings - -| Old Component Solution | Weak Point | New Solution | -|------------------------|------------|-------------| -| Single 6S 33Ah battery (1001 Wh) | Only 3.5-4.7h endurance — insufficient for 8h target | 4× 6S 33Ah 350 Wh/kg packs (2930 Wh) or 2× 12S 33Ah (2930 Wh) | -| 10 kg MTOW | Cannot carry enough battery for 8h at current energy densities | Increase to 18 kg MTOW | -| 3.0m wingspan | L/D ≈ 15 at AR≈10; higher wing loading increases cruise power | Scale to 3.8-4.0m wingspan (AR≈14, L/D≈17) | -| S2 FG airframe (3m) | Good but limited battery capacity due to MTOW constraint | S2 FG airframe scaled to 4m; same material, radio transparency preserved | -| Motor + ESC (500W class) | Undersized for 18 kg platform | Scale to 700-800W motor + 60-80A ESC | -| ADTI 20L V1 nav camera (20MP APS-C) | 34 cm/px GSD at 2 km — too coarse for feature matching | ADTI 26S V1 (26MP APS-C, mech. shutter) + 35mm lens → 21.6 cm/px at 2 km | -| Viewpro A40 Pro AI camera (1080p, 40×) | 1080p limits FoV to 65×37m at max zoom from 2 km | Viewpro Z40K (4K, 20×) → 2.7 cm/px GSD, 103×58m FoV, 479g lighter | - -## Product Solution Description - -A scaled-up modular, radio-transparent electric fixed-wing reconnaissance UAV built with **S2 fiberglass/foam-core sandwich construction** and internal carbon fiber spar reinforcement. Wingspan increased to **3.8-4.0m** for better aerodynamic efficiency (L/D ≈ 17). MTOW raised to **18 kg** to accommodate **4× semi-solid battery packs** totaling ~2930 Wh. Disassembles into modular sections for pickup truck transport; 2 complete aircraft fit in a standard 6.5ft bed. - -**Target performance**: 8-9 hours practical flight endurance, 18 kg MTOW, 3.8-4.0m wingspan. Camera payload: ADTI 26S V1 (26MP, mech. shutter, 21.6 cm/px at 2 km) for GPS-denied navigation + Viewpro Z40K (4K, 20× zoom, 2.7 cm/px at 2 km) for AI reconnaissance. Total payload 892g — 578g lighter than Draft 02. - -``` -┌──────────────────────────────────────────────────────────────────┐ -│ SCALED-UP MODULAR AIRFRAME LAYOUT │ -│ │ -│ LEFT WING PANEL FUSELAGE RIGHT WING PANEL │ -│ (~1.9m span) (~1.1m) (~1.9m span) │ -│ ┌──────────────┐ ┌──────────────────┐ ┌──────────────┐ │ -│ │ S2 FG skin │ │ S2 FG skin │ │ S2 FG skin │ │ -│ │ PVC foam core│◄─►│ Battery bay ×4 │◄─►│ PVC foam core│ │ -│ │ CF spar cap │ │ Payload bay │ │ CF spar cap │ │ -│ │ (internal) │ │ Motor (700W) │ │ (internal) │ │ -│ └──────────────┘ └──────────────────┘ └──────────────┘ │ -│ │ -│ Wing-fuselage joint: aluminum spar joiner + 2 pin locks │ -│ Assembly time target: < 10 minutes │ -│ Material: S2 fiberglass = RF transparent (GPS/telemetry OK) │ -│ Internal CF spar: minimal RF impact (narrow linear element) │ -│ │ -│ BATTERY BAY (4 packs, 2S2P wiring for 12S 66Ah): │ -│ ┌──────┐ ┌──────┐ │ -│ │ 6S │ │ 6S │ Series pair A → 12S 33Ah │ -│ │ 33Ah │ │ 33Ah │ │ -│ └──────┘ └──────┘ │ -│ ┌──────┐ ┌──────┐ │ -│ │ 6S │ │ 6S │ Series pair B → 12S 33Ah │ -│ │ 33Ah │ │ 33Ah │ Pairs A+B in parallel → 12S 66Ah │ -│ └──────┘ └──────┘ │ -│ Total: 44.4V × 66Ah = 2930 Wh │ -└──────────────────────────────────────────────────────────────────┘ - -TRANSPORT CONFIGURATION (standard pickup truck, 6.5ft bed): -┌───────────────────────────────────────────────┐ -│ Truck bed: 198cm × 130cm (between wells) │ -│ ┌────────────────────┐ ┌──────────────────┐ │ -│ │ Plane 1 wings │ │ Plane 2 wings │ │ -│ │ (2 × 190cm long) │ │ (2 × 190cm) │ │ -│ │ stacked ~25cm │ │ stacked ~25cm │ │ -│ ├────────────────────┤ ├──────────────────┤ │ -│ │ Plane 1 fuselage │ │ Plane 2 fuse. │ │ -│ │ (~110cm) │ │ (~110cm) │ │ -│ └────────────────────┘ └──────────────────┘ │ -│ Width per plane: ~35cm × 2 = 70cm │ -│ Total width: 70cm × 2 = 140cm > 130cm ⚠️ │ -│ → Stack all 4 wings in one pile + 2 fuselages │ -│ alongside: 190cm × 70cm + 110cm × 40cm │ -│ Total width: ~110cm < 130cm ✓ │ -│ Total length: 190cm < 198cm ✓ │ -└───────────────────────────────────────────────┘ -``` - -## Existing/Competitor Solutions Analysis - -| Platform | MTOW | Endurance | Battery | Wingspan | Material | RF Transparent | Transport | Price | -|----------|------|-----------|---------|----------|----------|---------------|-----------|-------| -| DeltaQuad Evo (standard) | 10 kg | 4h32m | 2× 22Ah semi-solid | 2.69m | CF+Kevlar+FG | Partial | Wing removable | $25,000+ | -| DeltaQuad Evo (record) | ~9 kg | **8h55m** | 2× Tulip Tech 450 Wh/kg | 2.69m | CF+Kevlar+FG | Partial | Wing removable | N/A (prototype batteries) | -| **YUAV Y37** | 17-20 kg | **8.5h** (1 kg payload) | 12S 60Ah semi-solid (~2700 Wh) | 3.7m | Full carbon | ❌ No | 138×55×45 cm | ~$15,000+ est. | -| NOCTUA (H2) | 20-25 kg | **10h** | Hydrogen fuel cell | 5.10m | CFRP | ❌ No | Field-portable | Academic | -| CW-80E (JOUAV) | >25 kg | 10-11h | Large electric | >4m | Composite | Unknown | Vehicle-mounted | $50,000+ | -| Albatross | 10 kg | 4h | LiPo | 3.0m | FG+CF | Partial | Removable wings | $4,800 RTF | -| **Our Draft 03** | **18 kg** | **8-9h target** | **4× 6S 33Ah 330+ Wh/kg** | **3.8-4.0m** | **S2 FG** | **✅ Yes** | **2 in pickup** | **$5,500-7,500** | - -**Key insight**: YUAV Y37 proves that 8.5h at 17-20 kg MTOW with 3.7m wingspan and semi-solid batteries is achievable in production. Our design targets similar performance with S2 FG (heavier but radio transparent) offset by slightly longer wingspan for better L/D. - -## Architecture - -### Component: Frame Material - -| Solution | Advantages | Limitations | Cost (per unit) | Fit | -|----------|-----------|-------------|----------------|-----| -| **S2 fiberglass skin + PVC foam core + internal CF spar (recommended)** | RF transparent, good impact tolerance, field repairable, proven at 3m scale | ~25-30% heavier than carbon at 4m scale; requires careful weight management | $600-1,200 materials | ✅ Only option that preserves RF transparency | -| Full carbon fiber (YUAV Y37 approach) | Lightest possible (~4-5 kg bare at 4m), best L/D | Blocks RF — GPS/telemetry degraded | $1,500-3,000 | ❌ Fails radio transparency | -| Carbon-Kevlar hybrid | Good crash survivability, lighter than FG | Partially blocks RF, expensive, hard to machine | $1,200-2,500 | ❌ RF compromise | -| S2 FG with Dyneema (UHMWPE) reinforcement | RF transparent, excellent impact resistance | Dyneema has poor compression strength, complex bonding | $800-1,500 | ⚠️ Complex but possible | - -### Component: Wingspan & Aerodynamics - -| Solution | L/D | Platform Weight | Endurance Impact | Transport | Fit | -|----------|-----|----------------|-----------------|-----------|-----| -| **3.8m wingspan (recommended for 2-in-pickup)** | ~17 | 6.5-7.5 kg | Baseline | 190cm half-wings fit 198cm bed ✓ | ✅ Best balance | -| 4.0m wingspan | ~17.5 | 7.0-8.0 kg | +3-5% | 200cm > 198cm; needs 3-section wing | ⚠️ Good but transport harder | -| 4.5m wingspan (single UAV transport) | ~18.5 | 8.0-9.5 kg | +8-12% | 225cm half-wings; 1 UAV per pickup | ⚠️ Maximum endurance, 1 plane only | -| 3.0m wingspan (Draft 02) | ~15 | 5.3 kg | Reference (3.5-4.7h) | 150cm easily fits | ❌ Insufficient for 8h | - -**Recommendation**: 3.8m wingspan as primary design. Half-wings at 190cm fit within 198cm pickup bed length. AR ≈ 13.6, L/D ≈ 17. Optional detachable wingtips (+20cm per side = 4.2m total) for maximum endurance missions where single-UAV transport is acceptable. - -### Component: Battery Configuration - -| Solution | Total Energy | Weight | Wiring | Cost | Endurance (18 kg) | Fit | -|----------|-------------|--------|--------|------|-------------------|-----| -| **4× Tattu 6S 33Ah 350 Wh/kg (recommended)** | 2930 Wh | 8.86 kg | 2S2P → 12S 66Ah | ~$2,930 | **8-8.5h** | ✅ Best modularity, off-the-shelf | -| 2× Tattu 12S 33Ah 350 Wh/kg | 2930 Wh | 8.89 kg | 2P → 12S 66Ah | ~$3,800 | **8-8.5h** | ✅ Simpler wiring, same endurance | -| 1× Tattu 12S 76Ah 330 Wh/kg | 3374 Wh | 10.88 kg | Direct 12S | ~$4,300 | **8.5-9h** (needs 20 kg MTOW) | ⚠️ Best energy but requires 20 kg MTOW | -| 4× Xingto 6S 30Ah 370 Wh/kg | ~3280 Wh (est.) | ~8.9 kg (est.) | 2S2P → 12S 60Ah | ~$3,000-4,000 | **9-9.5h** | ⚠️ Higher density but less verified | -| Future: 4× 450 Wh/kg packs | ~4000 Wh | ~8.9 kg | 2S2P → 12S | $5,000-8,000 est. | **10-11h** | ⚠️ Not yet available at volume | - -**4-Battery Configuration Detail (2S2P)**: -- 2 series pairs: each pair = 2× 6S in series = 12S 33Ah (44.4V, 1465 Wh) -- 2 parallel pairs: both 12S pairs in parallel = 12S 66Ah (44.4V, 2930 Wh) -- Requires: 2× series adapters, 1× parallel bus bar, battery management for each pair -- Advantage: individual pack replacement if one degrades; modular packing for transport -- Disadvantage: more wiring complexity, more connectors (failure points) - -**2-Battery Configuration (2P)**: -- 2× 12S 33Ah in parallel = 12S 66Ah (44.4V, 2930 Wh) -- Simpler wiring, fewer connectors -- Each pack heavier individually (4.4 kg) but fewer handling steps - -### Component: Motor & Propulsion (scaled for 18 kg) - -| Solution | Power | Weight | Efficiency | Cost | Fit | -|----------|-------|--------|-----------|------|-----| -| **T-Motor U8 Lite (recommended)** | 700W max, 200-300W cruise | ~250g | η ≈ 0.92 at cruise | ~$150 | ✅ Proven for this MTOW class | -| Dualsky XM6350EA | 800W max | ~280g | η ≈ 0.90 | ~$120 | ✅ Good budget option | -| SunnySky V4014 | 600W max | ~210g | η ≈ 0.91 | ~$90 | ⚠️ Borderline power margin | - -Propeller: 16×10 or 17×10 folding (vs 13×8 in Draft 02). Larger prop = higher propulsive efficiency at lower RPM, critical for endurance. - -ESC: 60-80A continuous rating (vs 40-60A in Draft 02). - -### Component: Foam Core - -Same as Draft 02 — PVC Divinycell H60 recommended. No change. - -### Component: Wing-Fuselage Joint - -Same aluminum spar joiner + pin lock concept as Draft 02, but scaled for larger wing loads: -- Spar tube: 25mm OD (vs 20mm) to handle higher bending moments -- Joiner: machined 7075-T6 aluminum (stronger than 6061-T6) -- Weight: ~0.35 kg per joint set (vs 0.2-0.3 in Draft 02) - -### Component: Camera Payload (Upgraded for 2 km Altitude) - -**GSD = (Sensor Width × Altitude) / (Focal Length × Image Width)** - -#### Navigation Camera (GPS-Denied System) - -| Solution | Sensor | Resolution | Weight (body+lens) | GSD at 2 km | FoV at 2 km | Cost | Fit | -|----------|--------|-----------|-------------------|-------------|-------------|------|-----| -| ADTI 20L V1 (Draft 02) | APS-C 23.2mm | 20MP (5456×3632) | ~271g (121g+150g) | 34 cm/px (25mm) | 1855×1235m | $480+lens | ❌ Too coarse at 2 km | -| **ADTI 26S V1 + 35mm (recommended)** | APS-C 23.4mm | 26MP (6192×4128) | **~172g** (122g+50g) | **21.6 cm/px** (35mm) | 1337×892m | **$1,890** | ✅ Best value: mech. shutter, light, good GSD | -| ADTI 61PRO + 50mm | FF 35.7mm | 61MP (9504×6336) | ~426g (276g+150g) | **15 cm/px** (50mm) | 1426×950m | $2,830 | ✅ Best GSD but +$940 over 26S | -| Sony ILX-LR1 + 50mm | FF 35.7mm | 61MP (9504×6336) | ~393g (243g+150g) | **15 cm/px** (50mm) | 1426×950m | $3,100 | ⚠️ Lightest 61MP, drone-native, most expensive | -| ADTI 36S + 50mm | FF 35.9mm | 36MP (7360×4912) | ~390g (240g+150g) | 19.5 cm/px (50mm) | 1434×957m | $1,600 | ❌ No mechanical shutter — rolling shutter distortion | - -**Recommendation**: ADTI 26S V1 with 35mm fixed lens. Mechanical shutter eliminates rolling shutter distortion (critical for GPS-denied feature matching at speed). 21.6 cm/pixel GSD at 2 km is sufficient for terrain feature matching, road/building identification, and satellite image correlation. IMX571 back-illuminated sensor delivers excellent dynamic range. Lightest option at 172g. Upgrade to ADTI 61PRO (+$940, 15 cm/px) if finer GSD is needed. - -#### AI Camera (Reconnaissance — "Nice Shots" from 2 km) - -| Solution | Sensor | Resolution | Zoom | Weight | GSD at 2 km (max zoom) | FoV at max zoom | Thermal | Cost | Fit | -|----------|--------|-----------|------|--------|----------------------|----------------|---------|------|-----| -| Viewpro A40 Pro (Draft 02) | 1/2.8" | 1080p (1920×1080) | 40× optical | 1074g | 3.4 cm/px | 65×37m | 640×512 | $2,999 | ⚠️ Good zoom but 1080p limits FoV | -| **Viewpro Z40K (recommended)** | 1/2.3" | **4K** (3840×2160) | 20× optical + 25× IA (4K) | **595g** | **2.7 cm/px** | **103×58m** | No | $2,999-4,879 | ✅ Better GSD, 2.5× wider FoV, 479g lighter | -| Viewpro Z40TIR | 1/2.3" | **4K** (3840×2160) | 20× optical + 40× IA (1080p) | ~700g est. | **2.7 cm/px** (4K) | 103×58m | 640×480 | ~$5,000 est. | ✅ Best of both: 4K + thermal | -| Viewpro A40T Pro | 1/2.8" | 1080p | 40× optical | ~1200g | 3.4 cm/px | 65×37m | 640×512 | $5,999 | ⚠️ Thermal + zoom but 1080p, heavy | - -**Recommendation**: Viewpro Z40K. At 4K resolution with 20× optical zoom, it delivers **better GSD (2.7 vs 3.4 cm/px)** and **2.5× wider field of view** at max zoom than the A40 Pro at 1080p/40×. And it's **479g lighter** — weight that can go to battery or margin. If thermal is needed, step up to Z40TIR. - -At 2.7 cm/pixel: vehicles clearly identifiable, human figures detectable, building details visible. At 20× wide end (53 cm/px): wide-area situational awareness covering ~2 km × 1.2 km. - -#### Payload Weight Summary (Upgraded) - -| Component | Draft 02/03 | Upgraded | Delta | -|-----------|-------------|---------|-------| -| Navigation camera (body+lens) | ADTI 20L + 25mm = 271g | ADTI 26S + 35mm = 172g | **-99g** | -| AI camera + gimbal | Viewpro A40 Pro = 1074g | Viewpro Z40K = 595g | **-479g** | -| Jetson Orin Nano Super | 60g | 60g | — | -| Pixhawk 6x + GPS | 65g | 65g | — | -| **Payload total** | **1470g** | **892g** | **-578g** | - -**Net effect: 578g saved.** This frees ~191 Wh of battery capacity at 331 Wh/kg (~42 min extra endurance) or provides comfortable MTOW margin. - -### Component: Alternative Power Sources Assessment - -| Solution | Endurance | System Weight | Cost | Logistics | RF Compat. | Fit | -|----------|-----------|---------------|------|-----------|-----------|-----| -| **Semi-solid battery (primary)** | 8-9h | 8.9 kg | $2,930-3,800 | ✅ Charge from any outlet | ✅ S2 FG | ✅ Recommended | -| Solid-state 450 Wh/kg (upgrade path) | 10-11h | 8.9 kg (or lighter) | $5,000-8,000 est. | ✅ Same as above | ✅ S2 FG | ⚠️ Future upgrade | -| Hydrogen fuel cell | 15-17h | 9.8 kg (FC + tank) | $25,000-40,000 | ❌ H2 supply in field | ❌ Needs CFRP | ❌ Impractical | -| Solar + battery hybrid | +1h over battery alone | +0.5-1.0 kg panels | +$500-1,500 | ⚠️ Weather dependent | ⚠️ Panels on wing | ❌ Marginal gain | - -## Weight Budget (18 kg MTOW, 3.8m Wingspan) - -| Component | Weight (kg) | Notes | -|-----------|-------------|-------| -| Airframe (S2 FG sandwich + CF spar, 3.8m) | 5.5-6.5 | Scaled from 3m (3.8-4.5 kg) proportional to area | -| Wing joints (aluminum 7075) | 0.35 | Larger joiner for higher loads | -| Motor (700W) + ESC (80A) + folding prop 16" | 0.6 | Scaled up from Draft 02 | -| Wiring, connectors, battery bus | 0.45 | More wiring for 4-battery config | -| **Platform subtotal** | **6.9-7.9** | | -| Payload (ADTI 26S + Z40K + Jetson + Pixhawk + GPS) | 0.89 | Upgraded cameras — 578g lighter than Draft 02 payload | -| Battery (4× Tattu 6S 33Ah) | 8.86 | 4 × 2.216 kg | -| **Total** | **16.7-17.7** | | - -Conservative: 7.9 + 0.89 + 8.86 = **17.65 kg** (well under 18 kg MTOW ✓). -Optimistic: 6.9 + 0.89 + 8.86 = **16.65 kg** (1.35 kg margin for accessories or extra battery). - -## Endurance Estimates - -### Flight Physics Parameters -- Cruise speed: 17 m/s (optimized for endurance at this wing loading) -- L/D at cruise: 17 (conservative; L/D_max ≈ 19-20 for AR=13.6) -- Overall propulsive efficiency: η = 0.72 (motor 0.92 × prop 0.82 × ESC 0.95) - -### Cruise Power Calculation -P_cruise = (W × g × V) / (L/D × η) -= (18 × 9.81 × 17) / (17 × 0.72) -= 3001.9 / 12.24 = **245W** -P_total = 245 + 30 (payload) = **275W** - -### Endurance by Battery Configuration - -| Config | Energy (Wh) | Usable 80% (Wh) | Theoretical (h) | Practical (h) | Conservative (h) | -|--------|------------|------------------|-----------------|---------------|------------------| -| 4× 6S 33Ah 330 Wh/kg | 2930 | 2344 | 10.7 | **8.5** | **7.5-8.0** | -| 2× 12S 33Ah 350 Wh/kg | 2930 | 2344 | 10.7 | **8.5** | **7.5-8.0** | -| 4× Xingto 370 Wh/kg (est.) | ~3280 | ~2624 | 11.9 | **9.5** | **8.5-9.0** | -| 1× 12S 76Ah 330 Wh/kg (20 kg MTOW) | 3374 | 2699 | 10.5* | **8.4** | **7.5-8.0** | -| Future 450 Wh/kg (est.) | ~4000 | ~3200 | 14.5 | **11.6** | **10-10.5** | - -*Higher MTOW (20 kg) → higher cruise power (~300W) partially offsets larger battery. - -**Practical** = with 80% DoD. **Conservative** = with additional 10% real-world margin (wind, maneuvers, non-optimal cruise). - -### Cross-Validation Against Reference Platforms - -| Reference | MTOW | Energy | Endurance | Wh/min | Our scaled | -|-----------|------|--------|-----------|--------|------------| -| DeltaQuad Evo (standard) | 10 kg | 976 Wh | 4.5h | 3.62 | — | -| DeltaQuad Evo (record) | ~9 kg | ~1800 Wh | 8.9h | 3.37 | — | -| YUAV Y37 | ~17 kg | 2700 Wh | 8.5h | 5.29 | Our 18 kg @ 2930 Wh: extrapolated **8.0-8.7h** | - -The YUAV Y37 cross-check (full carbon, 3.7m) extrapolates to 8.0-8.7h for our S2 FG design at 18 kg with 2930 Wh, accounting for the ~10% aerodynamic penalty of fiberglass vs carbon. This confirms our calculated range. - -### Comparison to Draft 02 - -| Parameter | Draft 02 | Draft 03 | Change | -|-----------|----------|----------|--------| -| MTOW | 10 kg | 18 kg | +80% | -| Wingspan | 3.0m | 3.8m | +27% | -| Battery weight | 3.2 kg | 8.86 kg | +177% | -| Battery energy | 1001 Wh | 2930 Wh | +193% | -| Cruise power | ~170W | ~275W | +62% | -| Practical endurance | 3.5-4.7h | **8-8.5h** | +80-140% | -| BOM cost | $2,800-4,500 | $5,500-7,500 | +67% | - -## BOM Cost Estimate (Per Unit, 8h Config) - -| Component | Low Est. | High Est. | Notes | -|-----------|----------|-----------|-------| -| S2 fiberglass fabric | $250 | $500 | ~14 m² at $15-30/m² (40% more than 3m) | -| PVC foam core (Divinycell H60) | $160 | $300 | Wing + fuselage + tail | -| Epoxy resin + hardener | $120 | $230 | ~3.5-4 kg resin | -| CF spar material (tube + UD tape) | $80 | $150 | Longer spars for 3.8m | -| Aluminum spar joiners 7075-T6 | $50 | $100 | Larger, machined | -| Vacuum bagging consumables | $40 | $80 | | -| Motor (T-Motor U8 Lite or equiv.) | $120 | $200 | 700W class | -| ESC (60-80A) | $60 | $120 | | -| Folding propeller (16×10) | $20 | $40 | | -| Servos (6× for larger surfaces) | $80 | $160 | | -| Wiring, connectors, battery bus | $80 | $150 | More complex 4-battery wiring | -| **Batteries (4× Tattu 6S 33Ah 350)** | **$2,930** | **$2,930** | Retail price | -| RC receiver | $30 | $80 | | -| Telemetry radio | $100 | $300 | | -| Transport case / padded bag | $80 | $200 | Larger for 190cm wings | -| **Subtotal (airframe + propulsion + battery)** | **$4,200** | **$5,540** | | -| Nav camera: ADTI 26S V1 + 35mm lens | $1,890 | $1,890 | 26MP APS-C, mech. shutter, 21.6 cm/px at 2 km | -| AI camera: Viewpro Z40K 4K gimbal | $2,999 | $4,879 | 4K 20× zoom, 2.7 cm/px at 2 km | -| Pixhawk 6x + GPS | $300 | $500 | | -| **Total BOM (complete unit)** | **$9,389** | **$12,809** | | - -With 2× 12S 33Ah instead of 4× 6S: battery cost rises to ~$3,800 (+$870). -With Xingto 370 Wh/kg: battery cost est. ~$3,000-4,000 but better endurance. - -**Per-unit cost at batch of 5+**: **$10,500-14,500** (including cameras, tooling amortization) -**Per-unit cost first prototype**: **$13,500-17,000** (includes tooling) - -Optional upgrade: swap ADTI 26S → ADTI 61PRO (+$940/unit) for 15 cm/px GSD if finer nav resolution needed. - -## Battery Upgrade Roadmap - -| Timeline | Battery Technology | Energy Density (pack) | Endurance (18 kg platform) | Availability | -|----------|-------------------|----------------------|---------------------------|-------------| -| **Now (2025-2026)** | Tattu/Grepow semi-solid 350 Wh/kg | ~331 Wh/kg | **8-8.5h** | ✅ Off-the-shelf | -| **Now (2025-2026)** | Xingto semi-solid 370 Wh/kg | ~350 Wh/kg | **9-9.5h** | ✅ Available (limited) | -| **Near-term (2026-2027)** | Tulip Tech Ampera solid-state | ~430 Wh/kg | **10-11h** | ⚠️ Shipping to select partners | -| **Near-term (2026-2027)** | Amprius SA102 silicon-nanowire | ~430 Wh/kg | **10-11h** | ⚠️ Pilot production | -| **Future (2027-2028)** | Tulip Tech Enerza / Amprius 500 | ~475 Wh/kg | **11-12h** | ❓ Announced, not volume | - -### Solid-State 450 Wh/kg Cost Impact - -Solid-state batteries (Tulip Tech, Amprius) are not yet priced publicly — both sell on custom quotes to defense/aerospace customers. Industry estimates for 2025-2026 production cost: $800-1,000/kWh. With small-volume aerospace/defense retail markup (1.5-3×), estimated retail: $1,500-2,500/kWh. - -| Battery | Pack Wh/kg | Total Energy | Endurance | Battery Cost | Total UAV BOM | Delta vs Baseline | -|---------|-----------|-------------|-----------|-------------|--------------|------------------| -| Tattu semi-solid (baseline) | ~331 | 2930 Wh | 8-8.5h | **$2,930** | ~$6,500 | — | -| Solid-state 450 (low est.) | ~430 | 3810 Wh | 10-11h | **$5,700** | ~$9,300 | **+$2,800 (+43%)** | -| Solid-state 450 (mid est.) | ~430 | 3810 Wh | 10-11h | **$7,600** | ~$11,200 | **+$4,700 (+72%)** | -| Solid-state 450 (defense premium) | ~430 | 3810 Wh | 10-11h | **$9,500** | ~$13,100 | **+$6,600 (+100%)** | - -Prices should converge toward production cost ($800-1,000/kWh → low estimate above) as Amprius scales 1.8 GWh contract manufacturing capacity and Tulip Tech ramps with Dutch MoD backing through 2026-2027. - -**Design for upgradability**: The battery bay should accommodate the same physical volume regardless of chemistry. Start with Tattu semi-solid at 8-8.5h for $2,930. When solid-state packs become available in compatible form factor, drop them in for 10-11h — no airframe changes needed, just a battery swap. - -## Modular Transport Specifications - -| Dimension | Value (3.8m) | Value (4.0m, 3-section) | -|-----------|-------------|------------------------| -| Wing panel length | 190 cm (half-span) | 170 cm outer + 60 cm center | -| Wing panel chord | 28-30 cm | 28-30 cm | -| Wing panel thickness | 4-5 cm | 4-5 cm | -| Fuselage length | 110 cm | 110 cm | -| Fuselage width/height | 18-22 cm | 18-22 cm | -| Assembly time | < 12 minutes | < 15 minutes | -| Disassembly time | < 7 minutes | < 10 minutes | - -**Pickup truck (2 planes, 3.8m design)**: All wing panels stack in one pile (190×30×20 cm = 4 panels × 5cm). Fuselages alongside (110×22 cm × 2). Total footprint: 190×110 cm < 198×130 cm. ✅ - -**Car trunk (1 plane, 3.8m)**: Tight but possible in larger sedans/SUVs. Two wing panels (190cm) require fold-down rear seats or diagonal placement. Fuselage fits easily. ⚠️ Borderline for sedans; SUV or wagon preferred. - -## Hydrogen Fuel Cell — Assessment (Not Recommended) - -Investigated as requested. While hydrogen offers dramatically higher endurance (15-17h), it is **not recommended** for this application: - -| Factor | Assessment | -|--------|-----------| -| Endurance | ✅ 15-17h theoretical with IE-SOAR 2.4 + 10.8L tank | -| System weight | ⚠️ ~9.8 kg (FC 4.8 + tank 4.2 + regulator 0.3 + buffer 0.5) — similar to 4-battery pack but higher complexity | -| Cost | ❌ $25,000-40,000 per unit (FC module alone est. $15-25k) | -| H2 logistics | ❌ Compressed hydrogen (350 bar) supply chain in eastern Ukraine = extremely difficult. Requires specialized transport, hazmat protocols, compressor equipment | -| Radio transparency | ❌ H2 platforms (NOCTUA, Doosan) use CFRP to save weight, conflicting with RF requirement | -| Reliability | ⚠️ Fuel cells have 1000h life but are sensitive to contaminants and temperature extremes | -| Practical recommendation | Revisit only if (1) hydrogen infrastructure develops in theater, (2) RF transparency requirement is relaxed, or (3) endurance requirement exceeds 12h | - -## Solar Augmentation — Assessment (Not Recommended) - -| Factor | Assessment | -|--------|-----------| -| Available wing area | ~0.7 m² usable upper surface | -| Solar power at altitude | ~35-40W average (Ukrainian latitude, 22% efficient flexible panels) | -| Endurance gain | +1.0-1.5h theoretical, but -0.5h from panel weight → net +0.5-1.0h | -| Cost | +$500-1,500 per unit for flexible panels | -| Complexity | Adds MPPT controller, fragile surface, weather dependency | -| Recommendation | Not worth the cost/complexity for ~1h marginal gain | - -## Testing Strategy - -### Integration / Functional Tests -- Static wing load test: 3× max flight load at spar joiner (verify no failure at 3g with 18 kg MTOW) -- Wing joint cycling: 100× assembly/disassembly, verify no wear (critical at higher loads) -- RF transparency test: measure GPS signal through airframe skin (target: < 3 dB attenuation) -- Assembly time test: verify < 12 minutes from transport case to flight-ready -- Battery wiring test: verify 2S2P balancing, measure voltage sag under load, test fail-safe (single pack disconnect) -- Range/endurance test: fly at cruise until 20% reserve, measure actual vs predicted -- Payload integration: electronics function under vibration at 18 kg flight loads - -### Non-Functional Tests -- Transport test: load 2 planes in pickup, drive 100 km on mixed roads, verify no damage -- Hard landing test: belly landing at 2.5 m/s descent (higher than Draft 02 due to heavier aircraft) -- Field repair test: wing skin puncture → FG patch + epoxy → airworthy in < 30 minutes -- Temperature test: battery + avionics at -10°C and +45°C -- Battery endurance test: 50 charge/discharge cycles on 4-battery 2S2P config, verify balanced degradation -- CG test: verify stable CG across all battery configurations (4-battery, 3-battery partial, 2-battery emergency) -- Emergency flight test: verify aircraft can fly safely on 2 batteries (reduced endurance) if 1 series pair fails - -## Production BOM: 5 UAVs From Scratch (8h Config) - -### A. One-Time Equipment & Tooling - -Same as Draft 02 base equipment: $3,335. Add: -| Item | Qty | Unit Price | Total | Notes | -|------|-----|-----------|-------|-------| -| Larger mold materials (4m wing + fuselage) | 1 set | $900 | $900 | MDF plugs + tooling epoxy for 3.8m molds | -| Aluminum spar joiner machining (7075, 12 sets) | 1 | $600 | $600 | Larger joiners, CNC outsourced | -| Battery parallel bus bar / wiring jig | 1 | $100 | $100 | For consistent 2S2P assembly | -| **Equipment & Tooling TOTAL** | | | **$4,935** | | - -### B. Raw Materials (5 UAVs + 20% waste) - -| Item | Qty (5 UAVs + margin) | Unit Price | Total | -|------|----------------------|-----------|-------| -| S2 fiberglass fabric 6oz | 100 yards | $12.50/yard | $1,250 | -| PVC foam Divinycell H60 10mm | 24 sheets | $40/sheet | $960 | -| Laminating epoxy resin | 6 gallons | $125/gal | $750 | -| Epoxy hardener | 3 gallons | $80/gal | $240 | -| Carbon fiber tube (spar, 25mm OD, 2.0m) | 12 | $35 each | $420 | -| Carbon fiber UD tape 25mm | 50 m | $5/m | $250 | -| Vacuum bagging consumables | — | — | $400 | -| Misc hardware | — | — | $250 | -| **Materials TOTAL (5 UAVs)** | | | **$4,520** | -| **Per UAV materials** | | | **~$904** | - -### C. Electronics & Propulsion (per UAV × 5) - -| Item | Per UAV | ×5 Total | -|------|---------|----------| -| Motor (T-Motor U8 Lite or equiv.) | $150 | $750 | -| ESC (80A) | $80 | $400 | -| Folding propeller 16×10 (2 per UAV) | $40 | $200 | -| Servos (6× digital metal gear) | $150 | $750 | -| Nav camera: ADTI 26S V1 + 35mm lens | $1,890 | $9,450 | -| AI camera: Viewpro Z40K 4K gimbal | $3,500 | $17,500 | -| Pixhawk 6X Mini + GPS | $380 | $1,900 | -| RC receiver (TBS Crossfire) | $60 | $300 | -| RFD900x telemetry | $170 air × 5 + $350 GCS | $1,200 | -| Power distribution + BEC | $30 | $150 | -| Wiring, connectors, battery bus | $80 | $400 | -| **Batteries: 4× Tattu 6S 33Ah 350 (per UAV)** | **$2,930** | **$14,650** | -| **Electronics TOTAL (5 UAVs)** | | **$47,650** | -| **Per UAV electronics** | | **~$9,530** | - -### D. Summary - -| Category | Total | Per UAV | -|----------|-------|---------| -| A. Equipment & Tooling | $4,935 | $987 | -| B. Raw Materials | $4,520 | $904 | -| C. Electronics & Propulsion | $47,650 | $9,530 | -| D. Consumables & Misc | $1,200 | $240 | -| E. Labor (est. same structure as Draft 02, +20%) | $19,176 | $3,835 | -| **GRAND TOTAL (5 UAVs)** | **$77,481** | | -| **Per UAV (all-in, with labor)** | | **$15,496** | -| **Per UAV (materials + electronics, no labor)** | | **$11,661** | - -The cost increase vs Draft 02 ($6,502/unit) is driven by cameras (+$2,391/unit: ADTI 26S replaces ADTI 20L, Z40K replaces A40 Pro), batteries (+$2,200/unit), and larger airframe (+$250/unit). Optional: swap to ADTI 61PRO (+$940/unit) for 15 cm/px nav GSD. - -## Risk Assessment - -| Risk | Impact | Probability | Mitigation | -|------|--------|------------|-----------| -| S2 FG airframe heavier than estimated → MTOW exceeded | Reduced endurance | Medium | Build weight tracking into construction; accept 18.5 kg MTOW if needed | -| 4-battery wiring complexity → connector failure | Loss of power pair | Low | Redundant connectors; test fail-safe on 2 batteries; parallel bus bar design | -| Semi-solid battery supply disruption | Cannot build | Low | Multiple suppliers (Tattu, Grepow, Xingto) | -| L/D lower than 17 in practice | Endurance drops to 7-7.5h | Medium | Use Xingto 370 Wh/kg for margin; optimize airfoil selection (SD7037 or AG series) | -| Wing flutter at 3.8m span | Structural failure | Low | Ground vibration test; CF spar sized for 1.5× flutter speed margin | -| CG shift with 4 battery packs | Controllability | Low | Fixed battery bay positions; CG calculated for all configurations | - -## References - -1-34: See Draft 01 and Draft 02 references (all still applicable) - -Additional sources: -35. DeltaQuad Evo 8h55m record: https://uasweekly.com/2025/06/27/deltaquad-evo-sets-record-with-8-hour-flight-endurance-for-electric-vtol-uas-milestone/ -36. Tulip Tech batteries: https://tulip.tech/batteries/ -37. DeltaQuad Evo specs: https://docs.deltaquad.com/tac/vehicle-specifications -38. DeltaQuad Evo performance calculator: https://evo.deltaquad.com/calc/ -39. YUAV Y37 specs: https://www.airmobi.com/yuav-y37-a-new-standard-in-long-endurance-vtol-fixed-wing-uavs/ -40. YUAV Y37 product page: https://www.airmobi.com/product/yuav-y37-3700mm-vtol-fixed-wing-uav-pnp/ -41. Tattu 350 Wh/kg 6S 33Ah: https://tattuworld.com/semi-solid-state-battery/semi-solid-350wh-kg-33000mah-22-2v-10c-6s-battery.html -42. Tattu 350 Wh/kg 12S 33Ah: https://tattuworld.com/semi-solid-state-battery/semi-solid-350wh-kg-33000mah-44-4v-10c-12s-battery.html -43. Tattu 330 Wh/kg 12S 76Ah: https://tattuworld.com/semi-solid-state-battery/semi-solid-330wh-kg-76000mah-44-4v-10c-12s-battery.html -44. Xingto 370 Wh/kg battery: https://www.xtbattery.com/370wh/kg-42v-high-energy-density-6s-12s-14s-18s-30ah-semi-solid-state-drone-battery/ -45. Amprius SA102 450 Wh/kg: https://amprius.com/the-all-new-amprius-500-wh-kg-battery-platform-is-here/ -46. Amprius UAV selection: https://amprius.com/amprius-high-power-silicon-batteries-selected-by-esaero-to-power-next-generation-uavs/ -47. NOCTUA hydrogen UAV: https://noctua.ethz.ch/technology -48. IE-SOAR 2.4 fuel cell: https://www.intelligent-energy.com/our-products/ie-soar-fuel-cells-for-uavs/ie-soar-2-4/ -49. IE-SOAR specs (retail): https://shop.thebioniceye.co.uk/products/ie-soar-2-4kw-hydrogen-fuel-cell -50. Doosan DS30W specs: https://www.doosanmobility.com/en/products/drone-ds30 -51. Cellen hydrogen refueling: https://cellenh2.com/reinventing-hydrogen-refueling-for-drones/ -52. Tattu battery catalog (pricing): https://rcdrone.top/collections/tattu-semi-solid-state-battery -53. Tattu 76Ah pricing (FlexRC): https://flexrc.com/product/tattu-semi-solid-state-330wh-kg-76000mah-10c-44-4v-12s1p-lipo-battery-pack-with-qs12-s-plug/ -54. JOUAV CW-80E: https://www.jouav.com/products/cw-80e.html -55. Discus 2b 4m glider: https://icare-rc.com/discus2b_4m.htm -56. Pickup bed dimensions: https://kevinsautos.com/faq/what-are-the-dimensions-of-a-65-foot-truck-bed.html -57. Tulip Tech Dutch MoD partnership: https://www.tulip.tech/news/ - -## Related Artifacts -- Previous drafts: `solution_draft01.md` (CFRP), `solution_draft02.md` (S2 FG, 3m, 10 kg) -- Research artifacts: `_standalone/UAV_frame_material/00_research/UAV_frame_material/` diff --git a/_standalone/UAV_frame_material/01_solution/solution_draft04.md b/_standalone/UAV_frame_material/01_solution/solution_draft04.md deleted file mode 100644 index aba7a88..0000000 --- a/_standalone/UAV_frame_material/01_solution/solution_draft04.md +++ /dev/null @@ -1,296 +0,0 @@ -# Solution Draft (Rev 04) — Launch & Recovery Assessment - -## Assessment Findings - -| Old Component Solution | Weak Point | New Solution | -|------------------------|------------|-------------| -| No launch/recovery method specified | Aircraft cannot operate without a defined takeoff/landing approach | Two viable options analyzed: Quad VTOL (recommended for field ops) or Catapult + Parachute (recommended for maximum endurance) | -| Y-3 tricopter VTOL (user proposed) | Zero motor redundancy, tilt servo failure risk, no production platforms use Y-3 | Quad (4+1) VTOL — industry standard used by DeltaQuad, YUAV Y37, WingtraOne | -| YUAV Y37 listed as 17-20 kg MTOW | Product page confirms TOW 22-26 kg; 10 kg empty weight with VTOL system | Corrected Y37 specs: TOW 22-26 kg, empty 10 kg (with VTOL), 4+1 config, $16,900 PNP | -| 18 kg MTOW design (Draft 03) | Cannot accommodate VTOL within 18 kg — VTOL system adds 2.5-3.2 kg | Option A: raise MTOW to 21-22 kg for VTOL variant; Option B: keep 18 kg for catapult variant | - -## Product Solution Description - -Two platform variants from the same S2 FG airframe, optimized for different operational needs: - -**Variant A — Quad VTOL** (recommended for forward/mobile operations): -Scaled-up modular S2 FG fixed-wing with 4+1 quadplane VTOL. Wingspan 3.8m, MTOW 21-22 kg. 4 dedicated VTOL motors on carbon fiber tube booms + 1 pusher for cruise. Separate VTOL battery (12S 5500 mAh). Endurance 6.5-7.5 hours. Launches and recovers from any 5m × 5m flat area. No ground equipment needed. - -**Variant B — Catapult + Parachute** (recommended for maximum endurance from established bases): -Same S2 FG fixed-wing, no VTOL hardware. Wingspan 3.8m, MTOW 18 kg. Pneumatic catapult launch (ELI PL-60 class). Parachute recovery (Fruity Chutes 20 kg bundle). Endurance 8-8.5 hours. Requires 108 kg catapult system and 8m launch space. - -``` -VARIANT A — QUAD VTOL (4+1) -┌───────────────────────────────────────────────────────────┐ -│ │ -│ VTOL Motor 1 VTOL Motor 2 │ -│ (front-left) (front-right) │ -│ ⟐ 15" prop ⟐ 15" prop │ -│ \ / │ -│ \ CF tube boom / │ -│ \ / │ -│ ┌────────────────────────────┐ │ -│ │ LEFT FUSELAGE RIGHT│ │ -│ │ WING [VTOL bat] WING │ │ -│ │ 1.9m [Cruise 1.9m │ │ -│ │ batteries] │ Pusher motor │ -│ │ [Payload] ─────┤────── ⊕ (cruise) │ -│ └────────────────────────────┘ │ -│ / \ │ -│ / CF tube boom \ │ -│ / \ │ -│ ⟐ 15" prop ⟐ 15" prop │ -│ VTOL Motor 3 VTOL Motor 4 │ -│ (rear-left) (rear-right) │ -│ │ -│ Motor booms: CF tubes (narrow, minimal RF impact) │ -│ Boom-wing joints: aluminum brackets with S2 FG layup │ -└───────────────────────────────────────────────────────────┘ - -VARIANT B — CATAPULT + PARACHUTE -┌───────────────────────────────────────────────────────────┐ -│ │ -│ ┌────────────────────────────┐ │ -│ │ LEFT FUSELAGE RIGHT│ │ -│ │ WING [Parachute WING │ │ -│ │ 1.9m bay + hatch] │ Pusher motor │ -│ │ [Cruise 1.9m │ │ -│ │ batteries] │ ⊕ (cruise) │ -│ │ [Payload] ─────┤─────── │ -│ └────────────────────────────┘ │ -│ │ -│ No motor booms = cleaner aerodynamics │ -│ Parachute bay with spring-loaded hatch (top/bottom) │ -│ Catapult carriage mounting rails on belly │ -└───────────────────────────────────────────────────────────┘ -``` - -## Why Not Y-3 (Tricopter)? - -The user asked specifically about Y-3 (3-motor) VTOL. After research, Y-3 is **not recommended** for this application: - -| Factor | Y-3 (Tricopter) | Quad (4+1) | -|--------|-----------------|------------| -| Weight saving vs quad | ~400g less | Baseline | -| Motor redundancy | **Zero** — any motor failure = crash | Partial — single motor loss survivable | -| Yaw control | Tilt servo on rear motor (mechanical failure point) | Differential thrust (no moving parts) | -| Production platforms using this | None found in 15-25 kg class | DeltaQuad, YUAV Y37, WingtraOne | -| ArduPilot support | Supported but less tested | Well-tested, widely deployed | -| Hover stability | Lower (3-point, asymmetric) | Higher (4-point, symmetric) | - -The 400g weight saving (~2% of MTOW) does not justify the reliability and redundancy loss. For a $15,000-17,000 aircraft in a conflict zone, motor redundancy is critical. - -## Architecture - -### Component: Launch & Recovery System - -| Solution | Weight on Aircraft | Ground Equipment | Endurance | Landing Precision | Cost (airborne) | Cost (ground) | Deployment Speed | Fit | -|----------|-------------------|-----------------|-----------|------------------|----------------|---------------|-----------------|-----| -| **Quad VTOL (recommended for field ops)** | +3.0-3.2 kg | None | 6.5-7.5h | 1-2m | $1,000-1,500 | $0 | < 2 min | ✅ Best for mobile ops | -| **Catapult + Parachute (recommended for max endurance)** | +0.95 kg | 108 kg catapult | 7.5-8.2h | 50-200m drift | $925 | $15,000-25,000 | 5-10 min | ✅ Best for endurance | -| Catapult + Belly landing | 0 kg | 108 kg catapult + 200m strip | 8-8.5h | On strip | $0 | $15,000-25,000 | 5-10 min + strip | ⚠️ Needs flat terrain | -| Y-3 VTOL | +2.5-2.7 kg | None | 7-7.5h | 1-2m | $800-1,200 | $0 | < 2 min | ❌ Reliability risk | - -### Component: VTOL System (Variant A — Quad) - -| Component | Specification | Weight | Cost | -|-----------|--------------|--------|------| -| VTOL motors (×4) | T-Motor MN505-S or equiv., ~5-6 kg thrust each on 15" prop | 880g total | $400-600 | -| VTOL ESCs (×4) | 40A BLHeli_32 or equiv. | 320g total | $120-200 | -| VTOL propellers (×4) | 15" folding (fold for cruise to reduce drag) | 200g total | $60-100 | -| Motor booms (×4) | Carbon fiber tubes 20mm OD, 400mm length + aluminum brackets | 700g total | $150-250 | -| VTOL battery | 12S 5500 mAh LiPo (dedicated) | 700g | $120-180 | -| Wiring + connectors | 12AWG silicone, XT60 connectors | 180g | $30-50 | -| **VTOL system total** | | **2,980g** | **$880-1,380** | - -### Component: Catapult System (Variant B) - -| Component | Specification | Weight/Size | Cost | -|-----------|--------------|-------------|------| -| Pneumatic catapult | ELI PL-60 or equivalent | 108 kg (2 cases) | $15,000-25,000 est. | -| Catapult carriage | Custom for UAV fuselage, quick-release | ~2 kg (stays on ground) | Included or $500 custom | -| Belly mounting rails | Aluminum rails on fuselage for carriage attachment | ~150g on aircraft | $50 | - -### Component: Parachute System (Variant B) - -| Component | Specification | Weight | Cost | -|-----------|--------------|--------|------| -| Fruity Chutes FW bundle 20 kg | IFC-120-S Iris Ultra + pilot chute + deployment bag + Y-harness | 950g | $925 | -| Servo-actuated hatch | Spring-loaded door on fuselage top/bottom, triggered by autopilot | 80g | $30 | -| **Recovery system total** | | **1,030g** | **$955** | - -## Updated Weight Budgets - -### Variant A — Quad VTOL (21 kg MTOW) - -| Component | Weight (kg) | Notes | -|-----------|-------------|-------| -| Airframe (S2 FG, 3.8m, reinforced for VTOL loads) | 6.0-7.0 | +0.5 kg structural reinforcement at boom attach points | -| Wing joints (aluminum 7075) | 0.35 | Same as Draft 03 | -| Motor (800W cruise) + ESC + prop | 0.65 | Slightly larger to handle higher MTOW | -| Wiring, connectors (cruise) | 0.45 | Same as Draft 03 | -| **VTOL system** | **2.98** | **4 motors, 4 ESCs, 4 props, booms, VTOL battery, wiring** | -| **Platform subtotal** | **10.4-11.4** | | -| Payload (cameras + compute) | 0.89 | Same as Draft 03 | -| Cruise battery (4× Tattu 6S 33Ah) | 8.86 | Same as Draft 03 | -| **Total** | **20.2-21.2** | | - -Conservative: 11.4 + 0.89 + 8.86 = **21.15 kg** (at 21 kg MTOW — tight) -Optimistic: 10.4 + 0.89 + 8.86 = **20.15 kg** (0.85 kg margin) - -**To fit 21 kg MTOW**: reduce to 3× cruise battery packs (6.65 kg, 2198 Wh) → total 18.9-19.9 kg → endurance ~5.5-6.5h. Or accept 22 kg MTOW → endurance ~6.5-7h with 4 packs. - -### Variant B — Catapult + Parachute (18 kg MTOW) - -| Component | Weight (kg) | Notes | -|-----------|-------------|-------| -| Airframe (S2 FG, 3.8m) | 5.5-6.5 | Same as Draft 03 | -| Wing joints (aluminum 7075) | 0.35 | Same | -| Motor (700W cruise) + ESC + prop | 0.6 | Same as Draft 03 | -| Wiring, connectors | 0.45 | Same | -| Catapult belly rails | 0.15 | Aluminum mounting interface | -| Parachute system | 1.03 | Chute + hatch mechanism | -| **Platform subtotal** | **8.1-9.1** | | -| Payload (cameras + compute) | 0.89 | Same | -| Cruise battery (4× Tattu 6S 33Ah) | 8.86 | Same | -| **Total** | **17.9-18.9** | | - -Conservative: 9.1 + 0.89 + 8.86 = **18.85 kg** (slightly over 18 kg; accept 19 kg MTOW or trim airframe) -Optimistic: 8.1 + 0.89 + 8.86 = **17.85 kg** (fits within 18 kg ✓) - -## Endurance Comparison - -### Variant A — Quad VTOL - -| MTOW | Battery Config | Usable Energy | Cruise Power | Endurance (practical) | -|------|---------------|--------------|-------------|----------------------| -| 21 kg | 4× 6S 33Ah (2930 Wh) | 2344 Wh | ~310W | **7.0-7.5h** | -| 22 kg | 4× 6S 33Ah (2930 Wh) | 2344 Wh | ~330W | **6.5-7.0h** | -| 20 kg | 3× 6S 33Ah (2198 Wh) | 1758 Wh | ~295W | **5.5-6.0h** | - -Cruise power increase vs Draft 03: higher MTOW (21-22 vs 18 kg) + ~3-5% additional drag from VTOL booms. - -P_cruise (21 kg) = (21 × 9.81 × 17) / (17 × 0.72) × 1.04 = ~310W (including boom drag penalty) - -### Variant B — Catapult + Parachute - -| MTOW | Battery Config | Usable Energy | Cruise Power | Endurance (practical) | -|------|---------------|--------------|-------------|----------------------| -| 18 kg | 4× 6S 33Ah (2930 Wh) | 2344 Wh | ~275W | **8.0-8.5h** | -| 19 kg | 4× 6S 33Ah (2930 Wh) | 2344 Wh | ~285W | **7.5-8.0h** | - -Parachute adds ~1 kg but no aerodynamic penalty (stowed internally). - -### Summary - -| Variant | MTOW | Endurance | vs Draft 03 (8-8.5h) | -|---------|------|-----------|---------------------| -| A: Quad VTOL (4 packs) | 21-22 kg | **6.5-7.5h** | -12-20% | -| A: Quad VTOL (3 packs) | 20 kg | **5.5-6.0h** | -30-35% | -| B: Catapult + Parachute | 18-19 kg | **7.5-8.5h** | -0-6% | -| B: Catapult + Belly | 18 kg | **8-8.5h** | 0% | - -## Cross-Validation Against YUAV Y37 - -The Y37 is the closest production reference for our VTOL variant: - -| Parameter | YUAV Y37 | Our Variant A (Quad VTOL) | Delta | -|-----------|----------|--------------------------|-------| -| Wingspan | 3.7m | 3.8m | +3% | -| Empty weight (with VTOL) | 10 kg | 10.4-11.4 kg | +4-14% (S2 FG heavier than carbon) | -| MTOW | 22-26 kg | 21-22 kg | Similar | -| Battery energy | 2700 Wh | 2930 Wh | +9% | -| Endurance (1 kg payload) | 8.5h | ~7h (est. at 0.89 kg payload) | -18% (S2 FG weight penalty) | -| Material | Full carbon | S2 FG + CF spar | S2 FG is ~2-3 kg heavier | -| RF transparent | No | Yes | Our advantage | -| Price (PNP) | $16,900 | ~$11,000-14,000 (DIY) | 18-35% cheaper | - -The 18% endurance gap between Y37 and our Variant A is primarily due to the S2 FG weight penalty (~2-3 kg heavier airframe). If RF transparency is not required, a carbon airframe would close this gap. - -## BOM Cost Impact (5 UAVs) - -### Variant A — Quad VTOL - -| Category | Total (5 UAVs) | Per UAV | vs Draft 03 | -|----------|----------------|---------|-------------| -| Draft 03 baseline | $77,481 | $15,496 | — | -| VTOL system hardware | $5,000-7,000 | $1,000-1,400 | +$1,000-1,400/unit | -| Structural reinforcement | $750 | $150 | +$150/unit | -| Larger cruise motor/ESC | $250 | $50 | +$50/unit | -| **Variant A total** | **$83,481-85,481** | **$16,696-17,096** | **+$1,200-1,600/unit** | - -### Variant B — Catapult + Parachute - -| Category | Total (5 UAVs) | Per UAV | vs Draft 03 | -|----------|----------------|---------|-------------| -| Draft 03 baseline | $77,481 | $15,496 | — | -| Parachute systems (×5) | $4,775 | $955 | +$955/unit | -| Catapult (ELI PL-60, ×1) | $15,000-25,000 | $3,000-5,000 (amortized) | +$3,000-5,000/unit | -| Belly rails + hatch mech. | $500 | $100 | +$100/unit | -| **Variant B total** | **$97,756-107,756** | **$19,551-21,551** | **+$4,055-6,055/unit** | - -**Key insight**: VTOL is cheaper per fleet. The catapult is expensive one-time equipment that only amortizes well over large fleets (20+ UAVs). - -## Recommendation Matrix - -| Operational Scenario | Recommended Variant | Rationale | -|---------------------|--------------------|-----------| -| **Mobile forward operations** (changing locations, no established base) | **A: Quad VTOL** | No ground equipment, instant deploy from any flat area, precision recovery | -| **Fixed base operations** (airfield or prepared area available) | **B: Catapult + Parachute** | Maximum endurance, no VTOL dead weight, lower per-unit complexity | -| **Mixed operations** (both scenarios) | **A: Quad VTOL** | VTOL works everywhere; endurance trade-off (6.5-7.5h vs 8h) is acceptable for operational flexibility | -| **Maximum endurance priority** (>8h critical) | **B: Catapult + Belly** | Zero weight penalty; but needs 200m landing strip | -| **Budget-constrained fleet** (5 units) | **A: Quad VTOL** | $83-85k total vs $98-108k for catapult variant | - -## Risk Assessment (New Items for Draft 04) - -| Risk | Impact | Probability | Mitigation | -|------|--------|------------|-----------| -| VTOL motor failure during hover landing | Aircraft loss ($17k) | Low | Quad config allows single-motor-out survival; redundant ESC power feeds | -| VTOL boom attachment failure on S2 FG | Boom separation → crash | Low | Aluminum through-bolt brackets; static load test to 5× hover thrust | -| Catapult malfunction | No launch capability | Low | Carry spare seals and Makita batteries; ELI PL-60 is simple design | -| Parachute deployment failure | Aircraft loss + ground damage | Very Low | Dual deployment triggers (autopilot + RC manual); pre-flight chute check | -| Wind drift on parachute recovery | UAV lands in inaccessible area | Medium | Select recovery area with margin; GPS tracking; contingency recovery team | -| VTOL adds drag → endurance less than calculated | Endurance only 6h instead of 7h | Medium | Folding VTOL props reduce cruise drag; boom fairing; accept margin | -| S2 FG structure insufficient for 21-22 kg VTOL loads | Structural failure | Low | Full FEA analysis; static wing load test at 3.5g; boom attachment cycling test | - -## Testing Strategy (Additions for Draft 04) - -### VTOL-Specific Tests (Variant A) -- Hover stability test: 60-second hover at 21 kg, measure motor temps and vibration -- Transition test: full transition from hover to cruise and back, measure altitude loss and energy -- Single-motor-out test: kill one VTOL motor at 30m altitude, verify safe emergency landing -- Boom attachment cycling: 200× VTOL power-on/off cycles, inspect boom joints for fatigue -- VTOL battery endurance: verify 2+ full VTOL cycles (takeoff + landing) on single charge -- Drag measurement: compare cruise power with VTOL booms vs clean airframe - -### Catapult-Specific Tests (Variant B) -- Catapult launch: 10 consecutive launches, verify consistent exit speed and UAV integrity -- Launch acceleration: measure g-forces on airframe and payload during catapult stroke -- Parachute deployment: 5 test deployments at various speeds and altitudes (min 50m AGL) -- Parachute reliability: 20 pack-deploy cycles, verify consistent opening -- Landing impact: verify payload cameras survive 4.6 m/s descent impact - -## References - -1-57: See Draft 03 references (all still applicable) - -Additional sources: -58. YUAV Y37 product page (updated specs): https://www.airmobi.com/product/yuav-y37-3700mm-vtol-fixed-wing-uav-pnp/ -59. YUAV Y37 engineering blog: https://www.airmobi.com/yuav-y37-a-new-standard-in-long-endurance-vtol-fixed-wing-uavs/ -60. DeltaQuad Evo TAC specs: https://docs.deltaquad.com/tac/vehicle-specifications -61. DeltaQuad Evo VTOL takeoff: https://docs.deltaquad.com/tac/flight/quick-takeoff/vtol-takeoff -62. ELI PL-60 pneumatic catapult: https://eli.ee/products/catapults/pl60/ -63. Fruity Chutes FW bundle 20 kg: https://shop.fruitychutes.com/products/fixed-wing-recovery-bundle-44lbs-20kg-15fps -64. Robonic pneumatic launcher advantages: https://www.robonic.fi/advantages-of-pneumatic-launch/ -65. Starlino power-to-thrust analysis: http://www.starlino.com/power2thrust.html -66. T-Motor U13II specs: https://store.tmotor.com/product/U13-v2-KV130-Power-Type-UAV-Motor.html -67. Belly landing research: https://www.scientific.net/AMM.842.178 -68. Aeromao Talon belly landing: https://aeromao.com/2018/10/18/talon-fully-autonomous-belly-landing/ -69. SCL bungee launcher specs: https://uascomponents.com/launch-and-landing-systems/bungee-catapult-scl2 -70. UkrSpecSystems SCL-1A: https://ukrspecsystems.com/uascomponents/bungee-uav-launching-system-scl-1a -71. VTOL weight penalty research: https://hal.science/hal-03832115v1/document -72. VTOL configuration endurance comparison: https://mediatum.ub.tum.de/1462822 - -## Related Artifacts -- Previous drafts: `solution_draft01.md` through `solution_draft03.md` -- Research artifacts: `_standalone/UAV_frame_material/00_research/UAV_frame_material/` diff --git a/_standalone/UAV_frame_material/01_solution/solution_draft05.md b/_standalone/UAV_frame_material/01_solution/solution_draft05.md deleted file mode 100644 index 54d6d06..0000000 --- a/_standalone/UAV_frame_material/01_solution/solution_draft05.md +++ /dev/null @@ -1,354 +0,0 @@ -# Solution Draft (Rev 05) — Reliability & Durability Assessment - -## Assessment Findings - -| Old Component Solution | Weak Point (functional/security/performance) | New Solution | -|------------------------|----------------------------------------------|-------------| -| Quad VTOL (Draft 04 Variant A) — reliability listed as "Low probability" motor failure | Motor/ESC failure during low-altitude hover (< 10m) is survivable at altitude but likely fatal below 10m; ArduPilot has no motor-out compensation for quadplane VTOL; ESC desync is dominant propulsion failure mode; 1-3 incidents expected per fleet lifetime | Risk reclassified: LOW per sortie but SIGNIFICANT over fleet lifetime; add ESC desync mitigation (low-ESR caps, DShot protocol); add VTOL battery health monitoring; consider redundant ESC feeds | -| Catapult+Parachute (Draft 04 Variant B) — camera damage risk not addressed | Belly-mounted Viewpro Z40K gimbal protruding 8-10cm below fuselage is directly vulnerable to parachute landing impact; wind increases impact energy 4× (190 J calm → 762 J at 8 m/s wind); post-landing drag abrades exposed components | **Semi-recessed gimbal mount** (recommended): mount Z40K in a 120mm-deep belly cavity with only ~40mm lens protrusion; fuselage structure acts as natural bumper. No retractable mechanism needed. Saves 150g and $100-200 vs retractable approach. Add replaceable belly panel + foam bumper around cavity opening | -| Draft 04 parachute landing analysis — calm-air only | Did not account for horizontal wind velocity during parachute descent; at 8 m/s wind, resultant velocity is 9.2 m/s (not 4.6 m/s), impact energy increases 4× | Revised landing energy analysis including wind scenarios; belly panel design must handle 762 J at moderate wind | -| Draft 04 risk matrix — qualitative only | No quantitative risk estimation over fleet lifetime | Added fleet-lifetime risk analysis: expected incidents, costs, and comparison for 5 UAVs × 300 sorties each | - -## Product Solution Description - -Two platform variants from the same S2 FG airframe with updated reliability assessment and camera protection requirements: - -**Variant A — Quad VTOL**: Higher-risk takeoff/landing phase (8 active electronic components during hover, ESC desync possible) but near-zero landing damage to aircraft and payload. Dominant risk: motor/ESC failure below 10m altitude. Estimated 1-3 propulsion incidents per 1,500 fleet sorties. - -**Variant B — Catapult + Parachute**: No powered hover risk. Passive parachute recovery is inherently reliable (>99% deployment success). Landing impact (190-762 J depending on wind) is manageable for S2 FG airframe. Camera protection achieved via **semi-recessed gimbal mount** — the same Viewpro Z40K mounted inside a belly cavity with only the lens ball protruding ~40mm, shielded by the fuselage structure. - -**Key reliability finding**: Both variants have comparable overall reliability when proper mitigations are applied. VTOL risks are **electronic/catastrophic** (rare but expensive). Catapult+parachute risks are **mechanical/incremental** (more frequent but cheaper and repairable). - -## Architecture - -### Component: VTOL Reliability System (Variant A) - -| Failure Mode | Probability (per sortie) | Consequence | Mitigation | Residual Risk | -|--------------|-------------------------|-------------|-----------|---------------| -| ESC desync during VTOL transition | 1 in 500-2,000 | Aircraft loss at low altitude | Low-ESR capacitors on each ESC; DShot protocol; rampup power tuning; fresh VTOL battery per sortie | Medium — hardware mitigation reduces but doesn't eliminate | -| Motor bearing failure during hover | 1 in 5,000+ | Aircraft loss at low altitude | Replace VTOL motors every 6 months (not 12); pre-flight motor spin test | Low | -| VTOL battery voltage sag | 1 in 200-500 (partial) | ESC desync trigger → motor stall | Dedicated VTOL battery; replace after 200 cycles; monitor internal resistance | Low-Medium | -| VTOL boom attachment fatigue | 1 in 2,000+ | Boom separation → crash | Aluminum through-bolt brackets; inspect every 50 sorties; cycling test per Draft 04 | Low | -| Single motor out at altitude (> 30m) | N/A | Degraded landing, likely survivable | 195% thrust on 3 motors; controlled descent possible with yaw sacrifice | Low — survivable | -| Single motor out at low altitude (< 10m) | N/A | Likely crash — < 2s reaction time | No firmware solution exists; this is an accepted residual risk of VTOL | **HIGH** — inherent to VTOL | - -**VTOL Reliability Enhancements (recommended additions to Draft 04):** - -| Enhancement | Weight | Cost | Benefit | -|-------------|--------|------|---------| -| Low-ESR capacitors (4×, on each ESC) | 40g | $20 | Reduces voltage noise → fewer ESC desyncs | -| DShot protocol (firmware config) | 0g | $0 | Digital ESC communication → no signal noise | -| Redundant ESC power feeds (dual BEC) | 30g | $40 | Prevents ESC brownout from single feed failure | -| VTOL battery health monitor (voltage + IR) | 10g | $15 | Alerts to degraded battery before failure | -| 6-month VTOL motor replacement (vs 12) | 0g | +$200-300/year per UAV | Halves motor wear risk | -| Pre-flight VTOL motor spin test (procedure) | 0g | $0 | Detects bearing wear, ESC issues before flight | -| **Total** | **80g** | **$75 initial + $200-300/year** | **~50% reduction in ESC desync risk** | - -### Component: Camera Mounting & Parachute Landing Protection (Variant B) - -#### Camera Mounting Options Comparison - -| Mounting Approach | Protrusion Below Belly | Camera Protection | Weight Impact | Cost | FoV | Complexity | Fit | -|-------------------|----------------------|-------------------|-------------|------|-----|-----------|-----| -| **Protruding gimbal (Draft 04)** | 8-10 cm | None — first ground contact point | 0g (baseline) | $0 | 360° pan, full tilt | Lowest | ❌ Incompatible with parachute recovery | -| **Retractable gimbal** | 0-8 cm (retracted/deployed) | Full when retracted | +150g (servo + rail) | +$100-200 | Same as protruding when deployed | Medium — moving parts, timing sequence | ⚠️ Works but adds complexity and failure mode | -| **Semi-recessed mount (recommended)** | ~4 cm (lens ball only) | High — fuselage structure is natural bumper | +50-80g (cavity reinforcing frame) | +$30-60 | ±60-70° pan, ±60° tilt | Lowest — no moving parts | ✅ Best balance of protection, simplicity, weight | -| **Fully recessed / internal turret** | 0 cm | Maximum | +100-200g (window + deeper cavity) | +$100-300 | Most restricted (±45° pan) | Low — but needs optical window | ⚠️ Best protection, but FoV too restricted | - -#### Semi-Recessed Gimbal Mount (Recommended) - -The same Viewpro Z40K (153 × 95.3 × 166mm, 595g) mounted inside a belly cavity rather than hanging below. The damping board attaches at the top of the cavity — same mounting hardware, same damping balls, no modifications to the camera itself. - -``` -SEMI-RECESSED Z40K — CROSS SECTION - -┌──────────────────────────────────────────────┐ -│ FUSELAGE (18-22cm deep) │ -│ │ -│ ═══════ Damping board + balls ════════ │ ← Same Z40K mounting hardware -│ │ │ │ -│ │ Z40K gimbal body │ │ -│ │ (153mm tall) │ │ ← Entire gimbal mechanism -│ │ 3-axis motors │ │ inside fuselage -│ │ CNC aluminum housing │ │ -│ │ │ │ -│ ══════╧══════════════════════════╧═══════ │ ← Belly skin with opening -│ reinforcing ┌──────────┐ reinforcing │ (~170×125mm cutout) -│ frame (FG) │ Lens ball │ frame (FG) │ -│ │ (~40mm │ │ -└────────────────│protrusion)│───────────────────┘ - └──────────┘ - ▲ - Only this part exposed to ground - Fuselage belly absorbs impact first -``` - -**Cavity specifications:** -- Depth: ~120mm (of 166mm total gimbal height) -- Opening: ~170 × 125mm (15mm clearance on each side of 153 × 95mm gimbal body) -- Reinforcing frame: S2 FG layup around cavity edges, ~50-80g -- Lens protrusion below belly: ~40-45mm -- Foam bumper strip around opening: EVA 15mm, ~30-50g - -**Why clearance matters:** 10-15mm gap between gimbal body and cavity walls prevents physical contact during vibration. If the gimbal touches the walls, aircraft vibration transmits directly to the camera sensor, defeating the damping system and causing jello/blur. - -#### Vibration & Stabilization Analysis - -Semi-recessed mounting does NOT degrade image stabilization — it improves it compared to a protruding mount: - -| Factor | Protruding Mount | Semi-Recessed Mount | -|--------|-----------------|-------------------| -| Pendulum arm length | 8-10 cm (full gimbal below belly) | ~4 cm (lens ball only) | -| Pendulum sway amplitude | Higher — longer arm amplifies aircraft oscillations | Lower — shorter arm, less amplification | -| Aerodynamic buffeting on gimbal | Full exposure to 17 m/s airflow | Shielded — gimbal body inside fuselage cavity | -| Turbulence source | Direct airflow on gimbal housing + arm | Minor cavity vortex only (blowing across opening) | -| Damping system function | Works as designed | Identical — same damping board, same balls | -| Active stabilization (3-axis) | ±0.02° — handles remaining vibration | ±0.02° — same; less input vibration to cancel | - -The Z40K's stabilization is a two-stage system: -1. **Passive** (damping balls/board): decouples gimbal from high-frequency aircraft vibration (motor buzz, prop harmonics). The "float" is intentional — do NOT rigidly fasten the camera to reduce wobble, as this defeats the passive stage and overloads the active stage. -2. **Active** (3-axis gimbal motors): cancels low-frequency movement (aircraft roll/pitch/yaw). Achieves ±0.02° precision. Works identically regardless of mounting position. - -If image wobble is observed, the correct fix is **at the vibration source** (balance propeller, soft-mount cruise motor, stiffen fuselage skin), not at the camera mount. Optionally, slightly stiffer damping balls (harder durometer) can reduce sway amplitude without compromising high-frequency isolation. - -#### Parachute Landing Failure Modes (with Semi-Recessed Mount) - -| Failure Mode | Probability (per sortie) | Consequence | Mitigation | Residual Risk | -|--------------|-------------------------|-------------|-----------|---------------| -| Parachute non-deployment | 1 in 200+ | Aircraft loss ($17k) | Dual triggers (autopilot + RC manual); spring-loaded hatch; pre-flight chute inspection | Very Low | -| Lens ball ground contact | 1 in 20-50 (moderate wind) | Lens scratch or crack ($200-500 lens replacement) | Foam bumper around cavity opening provides ~15mm standoff; belly skin contacts ground first | Low | -| Belly skin damage from landing impact | 1 in 5-20 | Cosmetic to minor structural ($200-500) | Replaceable belly panel; foam bumper strip | Low — acceptable wear | -| Post-landing drag in wind | 1 in 5-15 | Abrasion to skin, antennas | Parachute release mechanism; wind-aware recovery area selection. Semi-recessed camera NOT exposed to drag abrasion | Low-Medium | -| Landing in inaccessible terrain (wind drift) | 1 in 10-30 | Recovery difficulty, time loss | GPS tracking; plan recovery area with 300m margin; recovery team | Low-Medium | -| Parachute lines tangled on aircraft structure | 1 in 100+ | Incomplete chute inflation → hard landing | Clean exterior (semi-recessed camera reduces snag risk); proper packing | Very Low | -| Gimbal contacts cavity wall (vibration) | Continuous if undersized | Image quality degradation (jello, blur) | Maintain 10-15mm clearance on all sides; opening ~170×125mm for 153×95mm gimbal | Negligible with proper sizing | - -**Parachute Landing Protection (recommended additions to Draft 04):** - -| Enhancement | Weight | Cost | Benefit | -|-------------|--------|------|---------| -| **Semi-recessed gimbal cavity** (structural cutout + FG reinforcing frame) | +50-80g | $30-60 | Camera shielded by fuselage structure; no moving parts; no retraction mechanism needed | -| Replaceable belly panel (S2 FG sandwich, 2mm) | 0g (replaces existing skin section) | $50-100 per panel | Swap every 50-100 landings; absorbs cumulative impact | -| Belly foam bumper strip around cavity (EVA foam, 15mm) | 30-50g | $10 | Additional impact absorption + ~15mm standoff for lens ball | -| Parachute release mechanism (servo cutter) | 30g | $40 | Cuts risers after touchdown to prevent wind drag | -| **Total** | **110-160g** | **$130-210 initial** | **Camera protected; no moving parts; lighter and simpler than retractable** | - -Compared to retractable gimbal approach: **saves 100-150g, saves $70-140, eliminates retraction servo failure mode, no timing sequence needed.** - -#### FoV Trade-Off (Semi-Recessed) - -| Pan Angle | View Direction | Available? | Notes | -|-----------|---------------|-----------|-------| -| 0° (forward) | Along flight path | ✅ | Primary reconnaissance direction | -| ±30° | Forward oblique | ✅ | Full quality | -| ±60° | Side-looking | ✅ | Slight vignetting at cavity edge | -| ±70° | Wide oblique | ⚠️ | Cavity wall partially blocks — usable at reduced quality | -| ±90° (perpendicular) | Direct side | ❌ | Blocked by cavity wall | -| ±180° (rear) | Behind aircraft | ❌ | Blocked | - -For reconnaissance at 2 km altitude: ±60-70° pan covers a ground swath of ~4.6 km wide (±tan(70°) × 2 km). This is sufficient for most reconnaissance profiles. The 360° pan of a protruding gimbal is rarely used — the aircraft itself rotates to look at different areas. - -### Component: Catapult System Reliability - -| Failure Mode | Probability (per sortie) | Consequence | Mitigation | Residual Risk | -|--------------|-------------------------|-------------|-----------|---------------| -| Pressure seal leak | 1 in 500+ | Cannot launch → mission abort | Carry spare seals; pre-launch pressure test | Very Low | -| Carriage jam | 1 in 1,000+ | Cannot launch → mission abort | Pre-launch dry run; lubricant | Very Low | -| Battery depletion (Makita 18V) | Negligible | Cannot pressurize | Carry 2-3 spare Makita batteries ($30 each) | Negligible | -| Rail damage from transport | 1 in 200+ | Misaligned launch → UAV damage | Transport padding; pre-launch rail alignment check | Low | -| **Complete catapult failure** | **1 in 2,000+** | **Fleet grounded** | **Carry field repair kit; backup launch method (hand launch for reduced MTOW)** | **Low — SPOF** | - -## Reliability Comparison Matrix - -### Per-Sortie Risk - -| Risk Category | Quad VTOL (Variant A) | Catapult+Parachute (Variant B, with protection) | -|---------------|----------------------|------------------------------------------------| -| **Catastrophic aircraft loss** | 1 in 500-2,000 (motor/ESC fail during hover) | 1 in 200+ (parachute non-deploy) — but parachute is simpler and more reliable than 8 electronic components | -| **Camera/gimbal damage** | Near-zero | Very Low — lens scratch possible; semi-recessed mount shields gimbal body | -| **Airframe damage** | Near-zero | 1 in 5-20 (belly panel — cheap, replaceable) | -| **Mission abort (no aircraft loss)** | Near-zero | 1 in 500+ (catapult failure) | -| **Recovery difficulty** | Near-zero (precision 1-2m) | 1 in 10-30 (wind drift to awkward terrain) | - -### Fleet Lifetime Risk (5 UAVs × 300 sorties = 1,500 sorties) - -| Risk | VTOL Expected Cost | Catapult+Parachute Expected Cost | -|------|-------------------|--------------------------------| -| Aircraft loss (motor/ESC or chute failure) | 1-3 incidents × $17k = **$17,000-51,000** | 0-1 incident × $17k = **$0-17,000** | -| Camera damage (lens scratch/crack) | ~$0 | 0-3 × $300 = **$0-900** (lens replacement; gimbal body protected) | -| Belly panel replacements | ~$0 | 15-30 × $100 = **$1,500-3,000** | -| Catapult maintenance | $0 | 5 years × $750-1,250 = **$3,750-6,250** | -| VTOL motor replacements | 5 UAVs × 5 years × $300 = **$7,500** | $0 | -| **Total expected damage/maintenance cost** | **$24,500-58,500** | **$5,250-27,150** | - -### Reliability Verdict - -| Factor | VTOL | Catapult+Parachute | Winner | -|--------|------|-------------------|--------| -| Catastrophic failure risk (aircraft loss) | Higher — ESC desync during hover | Lower — parachute is passive/reliable | **Catapult+Parachute** | -| Camera/payload safety per landing | Better — precision soft landing | Good with semi-recessed mount; lens ball slightly exposed (~40mm) | **VTOL** (slight edge) | -| Airframe wear per landing | Better — no ground impact | Worse — 190-762 J per landing, cumulative | **VTOL** | -| System complexity (failure points) | Worse — 8 additional electronic components | Better — passive parachute + simple mechanical catapult | **Catapult+Parachute** | -| Single point of failure | None (distributed) | Catapult (fleet grounded if broken) | **VTOL** | -| Maintenance cost over 5 years | Higher ($7,500 motor replacements) | Lower ($5,250-6,250 panels + catapult) | **Catapult+Parachute** | -| Failure consequence type | Catastrophic (aircraft loss) | Incremental (repairable damage) | **Catapult+Parachute** | -| Fleet lifetime expected cost | $24,500-58,500 | $5,250-27,150 | **Catapult+Parachute** | - -## Parachute Landing — Wind Impact Analysis (New) - -Draft 04 analyzed only calm-air parachute landing (4.6 m/s vertical, 190 J). Real-world wind significantly changes the picture: - -| Wind Speed | Horizontal Drift (100m deploy) | Resultant Velocity | Impact Energy | Damage Profile | -|------------|-------------------------------|-------------------|---------------|----------------| -| Calm (0 m/s) | 10-20m | 4.6 m/s | 190 J | Vertical drop — belly panel absorbs | -| Light (5 m/s) | 110m | 6.8 m/s | 416 J | Angled impact — sliding risk | -| Moderate (8 m/s) | 176m | 9.2 m/s | 762 J | Hard angled impact — tumbling likely | -| Strong (12 m/s) | 264m | 12.9 m/s | 1,499 J | Severe — airframe structural risk | -| DeltaQuad max VTOL wind | — | — | — | 12.5 m/s (VTOL limited too) | - -**Key insight**: At moderate wind (8 m/s), parachute landing energy is 4× calm-air estimate. Belly panel and protection systems must be designed for moderate wind case (762 J), not calm-air (190 J). - -At strong wind (12 m/s), parachute landing becomes dangerous — but VTOL hover is also marginal at 12+ m/s wind. Both systems have degraded reliability in strong wind. - -**Mitigation for wind**: Deploy parachute at higher altitude (200m) to give more time for wind assessment; choose recovery area downwind with soft terrain; auto-release parachute risers after touchdown to prevent drag. - -## Updated Weight Budgets - -### Variant A — Quad VTOL (21 kg MTOW) — with reliability enhancements - -| Component | Weight (kg) | Change from Draft 04 | -|-----------|-------------|---------------------| -| Draft 04 Variant A total | 20.2-21.2 | — | -| ESC capacitors (4×) | +0.04 | New | -| Redundant BEC | +0.03 | New | -| Battery health monitor | +0.01 | New | -| **Revised total** | **20.3-21.3** | **+80g** (negligible) | - -### Variant B — Catapult + Parachute (18 kg MTOW) — with semi-recessed camera mount - -| Component | Weight (kg) | Change from Draft 04 | -|-----------|-------------|---------------------| -| Draft 04 Variant B total | 17.9-18.9 | — | -| Semi-recessed cavity reinforcing frame | +0.05-0.08 | New (replaces retractable mechanism) | -| Belly foam bumper around cavity | +0.03-0.05 | New | -| Parachute riser cutter | +0.03 | New | -| **Revised total** | **18.0-19.1** | **+110-160g** | - -At 19.1 kg conservative: slightly over 18 kg MTOW. Options: accept 19 kg MTOW (minimal endurance impact: ~7.5-8.0h) or trim 160g from airframe. Saves 100-150g vs retractable gimbal approach. - -## Updated Cost Impact - -### Variant A — VTOL reliability enhancements - -| Item | Per UAV | ×5 Fleet | -|------|---------|----------| -| Draft 04 Variant A total | $16,696-17,096 | $83,481-85,481 | -| ESC capacitors + BEC + monitor | $75 | $375 | -| Annual VTOL motor replacement (5 years) | $300/year | $7,500 total | -| **Revised total (5-year)** | | **$91,356-93,356** | - -### Variant B — Catapult+Parachute with semi-recessed camera mount - -| Item | Per UAV | ×5 Fleet | -|------|---------|----------| -| Draft 04 Variant B total | $19,551-21,551 | $97,756-107,756 | -| Semi-recessed cavity (reinforcing frame, built into airframe) | $40 | $200 | -| Belly bumper + riser cutter | $50 | $250 | -| Replacement belly panels (5 years) | $500 | $2,500 | -| **Revised total (5-year)** | | **$100,706-110,706** | - -## Recommendation — Updated - -| Operational Scenario | Recommended | Rationale (Reliability Focus) | -|---------------------|-------------|------------------------------| -| **Maximum reliability, accept ground equipment** | **B: Catapult+Parachute** (with semi-recessed gimbal) | Lower probability of catastrophic loss; failure modes are incremental/repairable; passive parachute has fewer electronic failure points | -| **Maximum operational flexibility, accept higher risk** | **A: Quad VTOL** (with reliability enhancements) | No ground equipment SPOF; precision landing protects payload; accepts 1-3 motor/ESC incidents per fleet lifetime | -| **Highest-value payloads (expensive cameras)** | **A: Quad VTOL** | Near-zero camera damage per landing; semi-recessed mount for parachute variant is good but lens ball still slightly exposed | -| **Budget-constrained operations** | **A: Quad VTOL** | Lower 5-year fleet cost ($91k vs $101k) despite higher aircraft loss risk | -| **Risk-averse operations (conflict zone, irreplaceable assets)** | **B: Catapult+Parachute** | Each UAV is $17k in a supply-constrained environment; losing fewer aircraft matters more than operational convenience | - -## Answer to User's Questions - -**1. "VTOL can suddenly break during faulty of 1 of the motor during takeoff or landing"** - -**Confirmed risk.** ESC desync is the most common propulsion failure mode and is triggered by exactly the conditions present during VTOL hover: sudden throttle changes, high current draw, voltage sag. Quad configuration provides partial redundancy at altitude (> 30m) but is likely fatal below 10m due to < 2 seconds reaction time. ArduPilot quadplane firmware has no built-in single motor failure compensation. Over 1,500 fleet sorties, 1-3 such incidents are plausible. Each incident at low altitude = ~$17k aircraft loss. - -**Mitigations**: Low-ESR capacitors, DShot protocol, fresh VTOL battery per sortie, 6-month motor replacement interval, pre-flight motor spin test. These reduce but do not eliminate the risk. - -**2. "Landing on the parachute can damage the UAV, especially having sticking out AI camera on the gimbal"** - -**Confirmed risk, but solvable.** A belly-mounted protruding gimbal like the Viewpro Z40K hanging 8-10cm below the fuselage IS highly vulnerable during parachute landing — it will be the first ground contact point. In wind, impact energy increases 4× (190 J → 762 J at 8 m/s wind). Post-landing drag from the parachute can cause additional abrasion damage. - -**Recommended solution: Semi-recessed gimbal mount.** Mount the same Z40K inside a 120mm-deep belly cavity using its standard damping board. Only the lens ball protrudes ~40mm below belly. The fuselage structure around the cavity acts as a natural bumper — the belly skin contacts the ground first, not the camera. This approach: -- Needs NO retractable mechanism (no moving parts, no timing sequence, no servo failure mode) -- Saves 100-150g and $70-140 compared to retractable approach -- Provides better vibration isolation than protruding mount (shorter pendulum arm, wind shielding inside cavity) -- Restricts FoV to ±60-70° pan (vs 360° protruding) — sufficient for reconnaissance at 2 km altitude -- Small residual risk: lens ball scratch in rough terrain or tumbling landing — replaceable lens ($200-300) - -The S2 FG airframe itself handles parachute landing forces well — a replaceable belly panel ($50-100) absorbs cumulative wear. - -**3. "It depends on the actual camera design and position of the parachute"** - -**Correct.** The damage risk is entirely design-dependent. Camera mounting options ranked by parachute landing compatibility: - -| Mounting | Landing Damage Risk | Notes | -|----------|-------------------|-------| -| Protruding gimbal (8-10cm below belly) | **HIGH** | First ground contact; incompatible with parachute recovery | -| **Semi-recessed mount (recommended)** | **LOW** | Fuselage shields gimbal body; only lens ball slightly exposed (~40mm) | -| Retractable gimbal | **VERY LOW** | Works but adds 150g, $100-200, and retraction servo failure mode | -| Internal turret with window | **NEAR-ZERO** | Maximum protection but limits FoV and adds optical window | - -Parachute Y-harness at CG → default nose-down attitude → further protects belly-mounted components since the nose contacts ground first. Semi-recessed mount combined with nose-down harness attitude gives excellent camera protection with no moving parts. - -**Important: do NOT rigidly fasten the camera** to reduce perceived wobble. The damping balls/board are intentional passive isolation. Rigid mounting defeats vibration isolation and causes jello/blur. If wobble is observed, fix at the source: balance propeller, soft-mount cruise motor. The Z40K's 3-axis stabilization (±0.02°) handles the rest. - -## Testing Strategy (Additions for Draft 05) - -### VTOL Reliability Tests -- ESC desync provocation test: induce voltage sag on VTOL battery during hover at 30m, verify no desync with mitigation hardware -- Single motor shutdown test: kill one motor at 30m altitude, measure altitude loss and control degradation -- Motor thermal endurance: 10× back-to-back VTOL cycles, monitor motor temperatures and ESC performance -- VTOL battery degradation test: track VTOL battery internal resistance over 200 cycles, correlate with ESC performance - -### Parachute Landing & Semi-Recessed Camera Tests -- Cavity clearance verification: confirm 10-15mm gap on all sides between Z40K body and cavity walls at all gimbal angles; verify no physical contact during flight vibration -- Image quality comparison: fly same route with protruding mount vs semi-recessed mount, compare stabilization performance and image sharpness -- Wind landing impact: drop UAV from 1.5m with 5 m/s horizontal velocity onto grass/dirt, verify lens ball clearance and belly panel integrity -- Lens ball contact test: drop UAV belly-first from 0.5m onto gravel, inspect lens ball for damage — establish whether foam bumper standoff is sufficient -- Belly panel replacement: verify panel swap in < 10 minutes with field tools -- Parachute riser cutter: 20× cut tests, verify clean separation within 3 seconds of touchdown -- Drag abrasion test: drag UAV 5m across gravel with parachute attached, verify semi-recessed camera is not damaged (vs protruding gimbal baseline) -- Cavity turbulence test: smoke visualization or tuft test at cruise speed to verify no harmful vortex inside cavity - -## References - -1-72: See Draft 04 references (all still applicable) - -Additional sources: -73. ArduPilot quadplane reliability tips: https://ardupilot.org/plane/docs/quadplane-reliability.html -74. DeltaQuad Evo preventative maintenance: https://docs.deltaquad.com/tac/maintenance/preventative-maintenance -75. Brushless motor lifespan: https://www.mepsking.shop/blog/how-long-do-brushless-drone-motors-last.html -76. ESC desync diagnosis: https://oscarliang.com/fix-esc-desync/ -77. ESC common faults: https://www.mepsking.com/blog/esc-faults-and-fixes-for-fpv-drones.html -78. Fruity Chutes parachute integration guide: https://fruitychutes.com/uav_rpv_drone_recovery_parachutes/integrating-a-drone-parachute -79. UAS recovery tutorial: https://fruitychutes.com/uav_rpv_drone_recovery_parachutes/uas-parachute-recovery-tutorial -80. DRS-25 parachute system: https://harrisaerial.com/drs-25-drone-parachute-recovery-system-15-25-kg-uav/ -81. ScanEagle 150,000 hours: https://boeing.mediaroom.com/2009-04-13-Boeing-Insitu-ScanEagle-Logs-150-000-Service-Hours-in-Iraq-and-Afghanistan -82. ScanEagle 1,500 recoveries: http://www.globalsecurity.org/intell/library/news/2009/intell-090107-boeing01.htm -83. Drone impact energy transfer study: https://pmc.ncbi.nlm.nih.gov/articles/PMC12900295/ -84. Aludra SR-10 parachute performance: https://files.core.ac.uk/download/478919988.pdf -85. Runway-free recovery methods review: https://www.mdpi.com/2504-446X/8/9/463 -86. ViewPro Z40K manual: https://www.manualslib.com/manual/2385515/Viewpro-Z40k.html -87. Parachute repositioning event design: https://airborne-sys.com/wp-content/uploads/2016/10/aiaa-2009-2911_basic_design_of_a_reposit.pdf -88. UAV payload retraction patent: https://patents.justia.com/patent/11975867 -89. ArduPilot landing gear retraction: https://ardupilot.org/plane/docs/common-landing-gear.html -90. NASA eVTOL propulsion reliability: https://ntrs.nasa.gov/citations/20240005899 -91. Multi-rotor UAV fault tree reliability analysis: https://link.springer.com/chapter/10.1007/978-981-10-6553-8_100 -92. ArduPilot thrust loss/yaw imbalance detection: https://ardupilot.org/copter/docs/thrust_loss_yaw_imbalance.html -93. ViewPro Z40K dimensions/specs (RCDrone): https://rcdrone.top/products/viewpro-z40k-4k-gimbal-camera -94. ViewPro Z40K manufacturer specs (ViewproUAV): https://www.viewprouav.com/product/z40k-single-4k-hd-25-times-zoom-gimbal-camera-3-axis-gimbal-uav-aerial-photography-cartography-and-patrol-inspection.html - -## Related Artifacts -- Previous drafts: `solution_draft01.md` through `solution_draft04.md` -- Research artifacts: `_standalone/UAV_frame_material/00_research/UAV_frame_material/` diff --git a/_standalone/UAV_frame_material/01_solution/solution_draft06.md b/_standalone/UAV_frame_material/01_solution/solution_draft06.md deleted file mode 100644 index 91e3858..0000000 --- a/_standalone/UAV_frame_material/01_solution/solution_draft06.md +++ /dev/null @@ -1,206 +0,0 @@ -# Solution Draft (Rev 06) — Material Comparison: S2 FG + Carbon Stiffeners vs Shark M (Pure GFRP) - -## Assessment Findings - - -| Old Component Solution | Weak Point (functional/security/performance) | New Solution | -| -------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| S2 FG fuselage with carbon fiber stiffeners (Drafts 01-05) — radio transparency not analyzed | Carbon fiber stiffeners provide 30-52 dB RF shielding, creating localized RF shadow zones inside the fuselage; antenna placement is constrained to FG-only zones between stiffeners; for a multi-antenna UAV (C2, video, GPS, telemetry) this creates spatial planning complexity | Two options evaluated: (1) retain hybrid but engineer antenna placement around CF zones, or (2) switch to pure GFRP (Shark M approach) eliminating all RF constraints | -| S2 FG + CF stiffeners — parachute landing BVID risk not analyzed | Carbon fiber stiffeners fail brittlely under impact (sudden delamination); after repeated parachute landings (190-762 J per landing), CF stiffeners accumulate invisible internal damage (BVID) detectable only by ultrasonic NDT — impractical in field conditions | Pure GFRP approach eliminates BVID risk entirely; all damage is visible and field-inspectable; Shark M validates this approach with 50,000+ operational hours including thousands of parachute landings | -| S2 FG + CF stiffeners — radar signature not analyzed | CF stiffeners are conductive and reflect radar energy; a regular geometric pattern of CF ribs inside a GFRP skin creates a partial radar reflector, slightly increasing RCS vs pure GFRP | Pure GFRP airframe is radar-transparent; RCS limited to metallic internals (engine, servos, connectors) only; this is exactly how Shark M achieves "low radar visibility" per Ukrspecsystems | - - -## Shark M Material Identification - -The Shark M's fuselage material is not publicly disclosed by Ukrspecsystems. However, convergent evidence strongly indicates **pure GFRP (glass fiber reinforced polymer)** — likely E-glass or S-glass fiberglass with epoxy resin: - - -| Evidence | Implication | -| ---------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ | -| PD-2 datasheet states "fully composite airframe" + "absence of large metal parts" → "low radar visibility" | Low radar visibility via material transparency = non-conductive composite = GFRP, not CFRP | -| Shark M achieves 180 km communication range through fuselage (Silvus modem) | Fuselage must be RF-transparent; CF would block signals (30-52 dB shielding) | -| User confirms from experience: "no issues with radiotransparency, cause it is still alive" | Direct field validation of RF transparency through airframe | -| UAVs in this class (10-15 kg MTOW) commonly use fiberglass composite | Industry norm for this weight/mission class | -| Ukrspecsystems claims "low radar visibility" specifically from "fully composite airframe" | Stealth through radar transparency (GFRP property), not radar absorption | - - -**Confidence**: ⚠️ Medium-High. Not officially confirmed, but all available evidence points to GFRP. No evidence contradicts this conclusion. - -## Product Solution Description - -Material comparison between three airframe construction approaches for a reconnaissance UAV (18 kg MTOW, catapult + parachute recovery): - -**Approach A — S2 Fiberglass + Carbon Fiber Stiffeners (full hybrid)** -S2 FG fuselage skins with carbon fiber unidirectional strips as wing spars, fuselage longerons, and key structural stiffeners. Combines FG impact tolerance with CF stiffness-to-weight efficiency. Requires engineered antenna placement to avoid CF-induced RF shadows. - -**Approach B — Pure GFRP (Shark M style)** -All-fiberglass construction (E-glass or S2-glass with epoxy). Thicker skins and/or foam-core sandwich panels compensate for lower stiffness. Entire airframe is RF-transparent and radar-transparent. Heavier than hybrid, but eliminates all CF-related complications. - -**Approach C — S2 GFRP + CF Wing Spar Only (recommended)** -S2 FG for all skins, fuselage structure, ribs, and secondary stiffeners. Carbon fiber used only for the main wing spar (one per wing half). The CF spar runs spanwise through the wing and connects at the fuselage center section, acting as the structural backbone: it provides wing flutter resistance, resists fuselage torsion and bending at the wing root junction, and stiffens the overall airframe. All antennas are in the fuselage — the wing spar creates no RF shadow in communication paths. BVID risk is limited to two non-impact-zone elements. Recovers ~200-400g of the pure GFRP weight penalty. - -## Architecture - -### Component: Airframe Material System - - -| Dimension | S2 FG + CF Stiffeners (A) | Pure GFRP (B, Shark M) | Winner | -| -------------------------------- | ---------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | -------------- | -| **Radio transparency** | Partial — FG zones are RF-transparent; CF stiffeners block 30-52 dB; antenna placement constrained | Full — entire fuselage passes RF; antenna placement unconstrained; validated at 180 km range | **B** | -| **Radar transparency (stealth)** | Partial — CF elements reflect radar; slight RCS increase from conductive stiffener grid | Full — GFRP is radar-transparent; RCS from internals only; validated in combat ("low radar visibility") | **B** | -| **Single-impact survivability** | Good — S2 FG skin absorbs well, but CF stiffeners may crack/delaminate under localized loads | Good — all-FG flexes and absorbs; no brittle failure modes; graceful degradation | **B** (slight) | -| **Cumulative landing damage** | Risk — CF stiffener micro-delamination after repeated landings; BVID invisible without ultrasonic NDT | Safe — all damage visible; simple visual inspection per landing; no hidden degradation | **B** | -| **Weight efficiency** | Better — CF stiffeners save est. 300-800g over equivalent FG stiffening for same structural performance | Heavier — must use thicker skins, foam sandwich, or more ribs; est. 300-800g penalty | **A** | -| **Structural stiffness** | Higher — CF is ~5× stiffer per unit weight; wing flutter resistance superior | Lower — FG is more flexible; adequate for Shark M class (3.4m wingspan) but needs design compensation | **A** | -| **Material cost** | Higher — CF cloth 5-10× more expensive than FG; moderate total increase (CF only in stiffeners, ~$100-300 extra) | Lower — all FG; cheapest composite option | **B** | -| **Manufacturing simplicity** | Moderate — two material systems require different layup procedures; CF needs precise fiber alignment | Simple — single material system; one set of procedures; easier quality control | **B** | -| **Field repairability** | Partial — FG skin: easy field repair; CF stiffeners: needs specialized skills, vacuum bagging, controlled cure | Full — all components repairable with basic epoxy + FG cloth patches; average manual skills sufficient | **B** | -| **Field inspection** | Hard — CF stiffener BVID requires ultrasonic NDT equipment (impractical in field) | Easy — visual inspection + tap test; no specialized equipment | **B** | -| **Combat-proven track record** | None — novel approach, untested in operational service | Extensive — Shark M: 50,000+ operational hours, 1,200h maintenance-free, combat-validated parachute landings | **B** | -| **Endurance impact** | Baseline — lighter airframe → est. 6-24 min additional flight time (~1-5% of 7-8h mission) | Heavier by 300-800g → 6-24 min less flight time; Shark M achieves 7h with pure GFRP at 14.5 kg | **A** (modest) | -| **Vibration damping** | Lower — CF is stiffer but transmits more high-frequency vibration | Better — hybrid composites show higher damping factors; FG naturally dampens vibration | **B** (slight) | - - -**Score: Approach A wins 2.5 dimensions, Approach B wins 10.5 dimensions.** - -### Component: Approach C — S2 GFRP + CF Wing Spar Only (Recommended Compromise) - -Approach C takes the best of both worlds. The CF wing spar is the single highest-value use of carbon fiber in the airframe: - -| Dimension | Approach C vs Pure GFRP (B) | Approach C vs Full Hybrid (A) | -|-----------|---------------------------|------------------------------| -| **Radio transparency** | Identical in practice — spar is in the wing, not in fuselage antenna paths | Much better — no CF in fuselage; no antenna placement constraints | -| **Radar transparency** | Negligible RCS from two spar elements buried inside wing structure | Better — no CF grid pattern in fuselage | -| **Parachute landing BVID** | Negligible — wing spars don't take direct ground impact; shock attenuated through wing root | Much better — no CF in belly/fuselage impact zone | -| **Weight** | ~200-400g lighter (CF spar vs equivalent FG spar) | ~100-400g heavier (no CF fuselage stiffeners) | -| **Structural stiffness** | Significantly better — CF spar stiffens the entire airframe: wing bending, fuselage torsion at wing root, overall rigidity | Slightly lower — no fuselage longerons, but spar carry-through compensates at the critical center section | -| **Flutter resistance** | Same as full hybrid — CF spar is the primary flutter prevention element | Same | -| **Field repairability** | FG fuselage fully field-repairable; CF spar damage is rare (no impact exposure) and would require return to base | Better than full hybrid — only 2 CF elements vs many | -| **Manufacturing** | Simpler than full hybrid — CF layup only for two spar elements; everything else is single-material FG | Simpler | -| **Cost** | ~$50-150 more than pure GFRP (two CF spar elements) | ~$50-150 cheaper than full hybrid | - -**Why the CF spar stiffens the whole airframe**: The wing spar is not just a wing element — it runs through or connects at the fuselage center section (wing root junction). This junction is the highest-stress point on the airframe. A stiff CF spar at this junction: -- Resists wing bending under gust loads and maneuvers -- Prevents fuselage torsion (twisting) caused by asymmetric wing loading -- Acts as a rigid backbone that the FG fuselage shell wraps around -- Increases the natural frequency of the airframe, pushing flutter speed higher - -The result: the airframe behaves nearly as stiff as the full hybrid (Approach A) for the loads that matter most, while the fuselage remains pure FG with all its RF and impact advantages. - -**Weight budget for Approach C** (18 kg MTOW, 3.4m wingspan): - -| Component | Approach A (full hybrid) | Approach B (pure GFRP) | Approach C (FG + CF spar) | -|-----------|------------------------|----------------------|--------------------------| -| Wing spar (both halves) | CF: 150-250g | S2 FG: 400-600g | CF: 150-250g | -| Fuselage stiffeners | CF: 200-400g | S2 FG: 400-600g | S2 FG: 400-600g | -| Skins + ribs | S2 FG: 3.5-4.0 kg | S2 FG: 3.8-4.2 kg | S2 FG: 3.8-4.2 kg | -| **Total airframe** | **~4.5-5.0 kg** | **~5.0-5.8 kg** | **~4.7-5.4 kg** | -| **vs full hybrid** | Baseline | +500-800g | **+200-400g** | -| **Endurance impact** | Baseline (~7.5-8h) | -15-24 min | **-6-12 min** | - -### Radio Transparency — Detailed Analysis - - -| Frequency Band | Use | S2 FG + CF Stiffeners | Pure GFRP | -| --------------------- | ----------------- | --------------------------------------------------------------- | -------------------------------------------------------- | -| 900 MHz (Silvus) | C2 datalink | Passes through FG skin; CF stiffeners block directional sectors | Passes through entire fuselage; omnidirectional coverage | -| 1.575 GHz (GPS L1) | Navigation | GPS antenna must be on top, away from CF elements; workable | No constraints; GPS antenna anywhere on upper fuselage | -| 2.4 GHz (backup link) | Telemetry/control | ~30 dB blockage through CF; FG zones OK | Full transparency | -| 5.8 GHz (video) | HD video downlink | Higher frequency → more susceptible to CF blockage | Full transparency | - - -**Key insight**: The hybrid approach works if antennas are carefully placed in FG-only zones. But this constrains the internal layout and means that if a stiffener is later moved (design iteration), antenna placement must be re-validated. Pure GFRP gives antenna engineers complete freedom. - -### Parachute Landing — Material Behavior Under Repeated Impact - - -| Landing # | S2 FG + CF Stiffeners | Pure GFRP | -| --------- | --------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------- | -| 1-50 | Both perform well; no visible damage in calm/light wind | Same | -| 50-100 | FG belly panels show wear; CF stiffeners accumulate micro-stress | FG belly panels show same wear; FG stiffeners flex and reset | -| 100-200 | CF stiffener BVID possible; invisible without NDT; structural margin unknown | FG damage remains visible; operator can track degradation | -| 200-500 | Risk of sudden CF stiffener failure from accumulated BVID → catastrophic structural failure during flight | FG degrades gracefully; worn components replaced based on visual inspection | - - -**Key insight**: The failure mode difference is critical. CF stiffener failure is **sudden and catastrophic** (delamination → loss of structural integrity → possible in-flight breakup). FG failure is **gradual and visible** (cracking → flexibility → obvious degradation → scheduled replacement). - -### Weight Trade-Off Quantification - -For an 18 kg MTOW UAV with 3.4m wingspan: - - -| Stiffening Approach | Estimated Airframe Weight | Weight vs Full Hybrid | Endurance Impact | -|---------------------|--------------------------|----------------------|------------------| -| Approach A: S2 FG skin + CF stiffeners (full hybrid) | ~4.5-5.0 kg | Baseline | Baseline (est. 7.5-8h) | -| **Approach C: S2 FG skin + CF wing spar only (recommended)** | **~4.7-5.4 kg** | **+200-400g** | **-6-12 min (~1-3%)** | -| Approach B: S2 FG skin + S2 FG stiffeners (pure S2 FG) | ~5.0-5.8 kg | +500-800g | -15-24 min (~3-5%) | -| E-glass skin + E-glass stiffeners (pure E-glass, likely Shark M) | ~5.2-6.0 kg | +700-1000g | -20-30 min (~4-6%) | - -**Note**: Shark M achieves 7h at 14.5 kg MTOW with pure GFRP. The user's UAV at 18 kg MTOW has ~3.5 kg more budget. Approach C costs only 200-400g and 6-12 minutes vs the full hybrid — a minor trade for the operational benefits gained. - -## Recommendation - - -| Scenario | Recommended | Rationale | -|----------|-------------|-----------| -| **Military reconnaissance, parachute landing, EW-contested** | **Approach C: S2 GFRP + CF wing spar only** | Near-full radio + radar transparency (CF only in wings, away from antennas); no BVID risk in impact zone; field-repairable fuselage; CF spar stiffens entire airframe including fuselage torsion; only 200-400g heavier than full hybrid; 6-12 min endurance cost is acceptable | -| **Absolute maximum RF transparency required** | Approach B: Pure GFRP | Eliminates all CF; 100% RF/radar transparent; validated by Shark M; 500-800g heavier than full hybrid | -| **Maximum endurance priority, VTOL landing (no parachute)** | Approach A: S2 FG + CF stiffeners (full hybrid) | Weight savings matter most for hover efficiency; VTOL eliminates repeated landing impact; antenna placement needs engineering but is manageable | - -**For Variant B (catapult + parachute)**: **Approach C (S2 GFRP + CF wing spar only)** is recommended. It delivers nearly all the operational advantages of pure GFRP — radio transparency in the fuselage, no BVID in the impact zone, full field repairability of the fuselage — while recovering ~200-400g through CF spars exactly where stiffness matters most. The CF spar also stiffens the overall airframe through the wing root junction, improving flutter resistance and fuselage rigidity with no RF penalty. The endurance cost vs full hybrid is only 6-12 minutes on a 7-8h mission. - -**For Variant A (VTOL)**: Retain **Approach A (S2 FG + CF stiffeners)**. VTOL eliminates repeated impact concern, and weight savings directly benefit hover efficiency. - -### Approach C — Fuselage Stiffness Compensation (no CF in fuselage) - -With CF removed from fuselage stiffeners, the fuselage shell needs alternative stiffening. The CF wing spar carry-through handles the critical wing root junction loads, but fuselage panels still need local stiffening. Recommended techniques (can be combined): - -| Technique | Weight Impact | Benefit | -|-----------|--------------|---------| -| Foam sandwich panels (PVC or PMI foam core, S2 FG skins) | +50-150g vs monolithic | Dramatically increases panel stiffness without CF; widely used in gliders and UAVs | -| S2 FG hat-section ribs (replacing CF longerons) | +100-200g vs CF equivalent | Heavier but fully RF-transparent and field-repairable; standard FG construction | -| Geometric stiffening (corrugated skin sections) | +0-50g | Stiffens panels through geometry, not material; minimal weight penalty | -| Thicker S2 FG skins at critical zones (2.5mm vs 2.0mm) | +50-100g | Targeted reinforcement at high-stress areas (wing root, nose, tail boom junction) | - - -## Testing Strategy - -### Approach C Validation Tests -- Wing spar flutter test: ground vibration test at max speed (130 km/h equivalent) to confirm CF spar provides adequate flutter margin -- Fuselage torsion test: apply asymmetric wing loading at wing root junction, measure fuselage twist; compare CF spar carry-through vs FG-only baseline -- RF transmission verification: measure signal attenuation at 900 MHz, 2.4 GHz, 5.8 GHz through fuselage panels in all directions; confirm no RF shadow from wing spars at typical antenna-to-GCS angles -- Belly impact test: drop test at 762 J (8 m/s wind equivalent) on fuselage belly panel (FG only); confirm no damage propagation to CF wing spar -- Repeated landing test: 100× drop tests at 190 J (calm landing) on fuselage belly; verify CF spar shows zero damage (spar is not in impact path) -- Foam sandwich qualification (if used for fuselage panels): flatwise tension, edgewise compression, and impact per ASTM standards -- Field repair validation: induce belly skin damage, repair with field kit (epoxy + S2 FG cloth), test repaired panel to 80% original strength -- Endurance verification: compare actual flight time vs full hybrid prototype (if available); confirm 6-12 min difference estimate - -## References - -1-94: See Drafts 01-05 references (all still applicable) - -Additional sources: -95. Ukrspecsystems SHARK-M UAS: [https://ukrspecsystems.com/drones/shark-m-uas](https://ukrspecsystems.com/drones/shark-m-uas) -96. Ukrspecsystems PD-2 Datasheet: [https://www.unmannedsystemstechnology.com/wp-content/uploads/2016/06/PD_2.pdf](https://www.unmannedsystemstechnology.com/wp-content/uploads/2016/06/PD_2.pdf) -97. KSZYTec UAV Antenna Design Survival Guide (CF RF shielding 30-50 dB): [https://kszytec.com/uav-aerospace-antenna-design-survival-guide/](https://kszytec.com/uav-aerospace-antenna-design-survival-guide/) -98. Radio-Transparent Properties of S-Glass, Aramid, Quartz Radome Composites at 900 MHz: [https://link.springer.com/article/10.1007/s40033-023-00602-7](https://link.springer.com/article/10.1007/s40033-023-00602-7) -99. GFRP radar transparency for aerospace/defense: [https://www.tencom.com/blog/fiberglass-pultrusion-for-aerospace-defense-lightweight-structural-components](https://www.tencom.com/blog/fiberglass-pultrusion-for-aerospace-defense-lightweight-structural-components) -100. EM Shielding of Twill CFRP in UHF/L/S-band (IEEE): [https://ieeexplore.ieee.org/document/10329805/](https://ieeexplore.ieee.org/document/10329805/) -101. EM Shielding of Continuous CF Composites — 52 dB: [https://www.mdpi.com/2073-4360/15/24/4649](https://www.mdpi.com/2073-4360/15/24/4649) -102. E-Glass vs CF Impact Resistance for UAV Wings: [https://www.preprints.org/manuscript/202601.1067](https://www.preprints.org/manuscript/202601.1067) -103. S2/FM94 Glass Fiber Impact Damage Resistance: [https://mdpi-res.com/d_attachment/polymers/polymers-14-00095/article_deploy/polymers-14-00095-v2.pdf](https://mdpi-res.com/d_attachment/polymers/polymers-14-00095/article_deploy/polymers-14-00095-v2.pdf) -104. Field Repair of FG/Epoxy Fuselage: [https://www.matec-conferences.org/articles/matecconf/pdf/2019/53/matecconf_easn2019_01002.pdf](https://www.matec-conferences.org/articles/matecconf/pdf/2019/53/matecconf_easn2019_01002.pdf) -105. ACASIAS Antenna Integration in CF Fuselage Panel: [https://www.nlr.org/newsroom/video/acasias-antenna-integration/](https://www.nlr.org/newsroom/video/acasias-antenna-integration/) -106. Fiberglass Radome Dielectric Properties: [https://www.oreilly.com/library/view/radome-electromagnetic-theory/9781119410799/b02.xhtml](https://www.oreilly.com/library/view/radome-electromagnetic-theory/9781119410799/b02.xhtml) -107. E-Glass vs S-Glass Comparison: [https://www.smicomposites.com/comparing-e-glass-vs-s-glass-key-differences-and-benefits/](https://www.smicomposites.com/comparing-e-glass-vs-s-glass-key-differences-and-benefits/) -108. CF vs FG UAV Drone Material Comparison: [https://www.ganglongfiberglass.com/fiberglass-drone-vs-carbon-fiber/](https://www.ganglongfiberglass.com/fiberglass-drone-vs-carbon-fiber/) -109. CF RF Blocking — StackExchange: [https://drones.stackexchange.com/questions/283/how-much-does-mounting-an-antenna-near-a-carbon-fiber-frame-degrade-signal-recep](https://drones.stackexchange.com/questions/283/how-much-does-mounting-an-antenna-near-a-carbon-fiber-frame-degrade-signal-recep) -110. Belly-Landing Mini UAV Strength Study: [https://www.scientific.net/AMM.842.178](https://www.scientific.net/AMM.842.178) -111. Hybrid Composite Wing Spar Analysis: [https://yanthrika.com/eja/index.php/ijvss/article/view/1476](https://yanthrika.com/eja/index.php/ijvss/article/view/1476) -112. UAV Airframe Structural Optimization: [https://www.frontiersin.org/articles/10.3389/fmech.2025.1708043](https://www.frontiersin.org/articles/10.3389/fmech.2025.1708043) - -## Related Artifacts - -- Previous drafts: `solution_draft01.md` through `solution_draft05.md` -- Research artifacts: `_standalone/UAV_frame_material/00_research/UAV_frame_material/` - diff --git a/_standalone/UAV_frame_material/01_solution/solution_draft07.md b/_standalone/UAV_frame_material/01_solution/solution_draft07.md deleted file mode 100644 index d96aab4..0000000 --- a/_standalone/UAV_frame_material/01_solution/solution_draft07.md +++ /dev/null @@ -1,418 +0,0 @@ -# Solution Draft (Rev 07) — Complete UAV BOM & Cost Analysis - -Reconnaissance fixed-wing UAV. 18 kg MTOW, 3.8m wingspan, catapult launch, parachute recovery. S2 GFRP airframe with CF wing spar. Optimized for radio transparency, parachute landing durability, and field repairability. - -## Material Architecture - -**S2 fiberglass (GFRP) everywhere** — skins, fuselage structure, ribs, hat-section stiffeners, tail surfaces, control surfaces. **Carbon fiber only in the main wing spar** (one per wing half, carry-through at fuselage center section). - -The CF wing spar runs spanwise through the wing and connects at the fuselage center section, providing flutter resistance and torsional rigidity. The fuselage remains 100% GFRP — fully RF-transparent, radar-transparent, field-repairable, with no hidden damage from parachute landings. - -Fuselage panels use foam-core sandwich construction (S2 FG skins over PVC foam core). Hat-section S2 FG ribs at load-bearing stations. - -## Bill of Materials — Complete UAV (Per Unit) - -### 1. Composite Reinforcement Fabrics - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 1.1 | S2-glass cloth, 6oz plain weave | Style 4533, 30" width, aerospace silane finish | 15 yd | ~2.0 kg (in laminate) | $10.45/yd | $156.75 | [LeapTech](https://www.carbonfiberglass.com/product/6oz-s-glass-27-width-html/) | S2 provides 30-40% higher tensile strength and 10× fatigue life vs E-glass. 6oz for 2mm skin layups (3-4 layers). Plain weave for compound curves. | -| 1.2 | S2-glass cloth, 9oz satin weave | Style 7781, 38" width | 5 yd | ~0.8 kg (in laminate) | $14.50/yd | $72.50 | [LeapTech](https://www.carbonfiberglass.com/product/8-9oz-s-glass-satin-weave-38-width-html/) | Satin weave drapes on tight-radius parts (nose cone, wing root fairing). 9oz for wing root junction reinforcement. | -| 1.3 | CF unidirectional tape, 250gsm, 50mm | 12K, glass cross-stitch | 8 m | ~0.15 kg | $4.70/m | $37.60 | [Easy Composites](https://www.easycomposites.co.uk/250g-unidirectional-carbon-fibre-tape) | Maximum stiffness along spar axis. 50mm matches spar cap. 4-6 layers per cap. | -| 1.4 | E-glass cloth, 4oz plain weave | Standard, 50" width | 3 yd | — | $4.50/yd | $13.50 | [The Gelcoater](https://www.thegelcoater.com/pages/6oz-200-gsm-plain-weave-e-glass) | Non-structural areas: cable guides, servo mount pads. E-glass adequate where S2 premium isn't needed. | - -**Subtotal fabrics: ~$280 / ~3.0 kg in laminate** - -### 2. Matrix Resin System - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 2.1 | Aeropoxy PR2032 + PH3660 hardener | 3:1 mix, 1-hour pot life | 1 qt kit | ~0.9 kg | $81.50 | $81.50 | [Aircraft Spruce](https://www.aircraftspruce.com/catalog/pnpages/01-42135.php) | Aerospace-grade, Rutan-tested. Room-temp cure. Good wet-out. Compatible with S2 FG and CF. | -| 2.2 | Aeropoxy PR2032 + PH3630 fast hardener | 3:1 mix, 30-min pot life | 1 pint | ~0.45 kg | $45.00 | $45.00 | [Aircraft Spruce](https://www.aircraftspruce.com/catalog/cmpages/aeropoxy.php) | Fast hardener for bonding joints, fillets, quick repairs. | - -**Subtotal resin: ~$127 / ~0.8 kg in structure** - -### 3. Core Material - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 3.1 | PVC foam core, 3mm, 80 kg/m³ | EasyCell 75 / Divinycell H80 | 8 sheets | ~0.33 kg | $9.10/sheet | $72.80 | [Easy Composites](https://www.easycomposites.us/easycell75-closed-cell-pvc-foam) | Fuselage panels, tail surfaces. 3mm foam + 2×1mm FG skins = ~5mm sandwich. | -| 3.2 | PVC foam core, 5mm, 80 kg/m³ | Same material, thicker | 4 sheets | ~0.27 kg | $9.10/sheet | $36.40 | [Easy Composites](https://www.easycomposites.us/easycell75-closed-cell-pvc-foam) | Wing trailing edge panels and control surfaces. | - -**Subtotal core: ~$109 / ~0.60 kg** - -### 4. Consumables (Layup & Cure) - -| # | Component | Specification | Qty | Unit Price | Total | Link | -|---|-----------|---------------|-----|-----------|-------|------| -| 4.1 | Vacuum bagging kit | Film, sealant tape, peel ply, breather, tubing | 1 kit | $42.48 | $42.48 | [Fiberglass Supply](https://fiberglasssupply.com/basic-vacuum-bagging-kit/) | -| 4.2 | Mold release wax | Partall paste wax, 12oz | 1 can | $18.00 | $18.00 | [Aircraft Spruce](https://www.aircraftspruce.com) | -| 4.3 | PVA mold release | Liquid, 1 pint | 1 pint | $12.00 | $12.00 | [Aircraft Spruce](https://www.aircraftspruce.com) | -| 4.4 | Mixing cups, brushes, squeegees | Assorted laminating tools | 1 set | $25.00 | $25.00 | Various | -| 4.5 | Sandpaper assortment | 80, 120, 220, 400 grit | 1 pack | $15.00 | $15.00 | Various | -| 4.6 | Acetone / IPA | Surface cleaning, 1 gallon | 1 gal | $12.00 | $12.00 | Various | - -**Subtotal consumables: ~$125** - -### 5. Structural Hardware - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 5.1 | Wing root aluminum fittings | 6061-T6, CNC machined | 2 pcs | ~120g | $35.00/pc | $70.00 | [SendCutSend](https://sendcutsend.com) | Transfers wing bending loads to fuselage. Small, inspectable, not in RF path. | -| 5.2 | Wing spar carry-through tube | Pultruded CF tube, 25mm OD × 1.5mm | 0.6 m | ~60g | $15.00 | $15.00 | [DragonPlate](https://dragonplate.com) | Connects L/R wing spars through fuselage. Airframe backbone. | -| 5.3 | Control surface hinges | Composite-compatible pin hinges, 50mm | 10 pcs | ~50g | $2.50/pc | $25.00 | [Aircraft Spruce](https://www.aircraftspruce.com) | Aileron (4), elevator (4), rudder (2). Stainless steel pins. | -| 5.4 | Servo mounting plates | G10 fiberglass, 3mm, 100×60mm | 5 pcs | ~45g | $3.00/pc | $15.00 | [Aircraft Spruce](https://www.aircraftspruce.com) | RF-transparent, strong, bonds into FG structure. | -| 5.5 | Threaded inserts | M3 and M4 brass | 30 pcs | ~30g | $0.50/pc | $15.00 | Various | Access panels, servo covers, wing mounting. | -| 5.6 | Stainless fasteners | M3, M4 bolts/nuts/washers kit | 1 kit | ~80g | $20.00 | $20.00 | Various | Corrosion resistant. | -| 5.7 | Push rods + clevis | 2mm steel rod + nylon clevis | 5 sets | ~60g | $4.00/set | $20.00 | [HobbyKing](https://hobbyking.com) | Servo-to-surface linkage. | - -**Subtotal hardware: ~$180 / ~0.45 kg** - -### 6. Belly Protection (Parachute Landing) - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|----------| -| 6.1 | Replaceable belly panel | S2 FG / foam sandwich, 2mm skins + 3mm foam | 2 pcs (1+spare) | ~150g each | $15.00/pc | $30.00 | Sacrificial panel, field-swappable in <10 min. | -| 6.2 | EVA foam bumper strip | 15mm closed-cell, adhesive-backed | 1 m | ~40g | $5.00 | $5.00 | Wraps gimbal cavity. Absorbs minor impacts. | - -**Subtotal belly protection: ~$35 / ~0.19 kg installed** - -### 7. Parachute Recovery System - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 7.1 | Fruity Chutes FW Recovery Bundle | IFC-120-S Iris Ultra Compact + pilot chute + deployment bag + Y-harness + shock cord | 1 system | 950g | $830.00 | $830.00 | [Unmanned Systems Source](https://www.unmannedsystemssource.com/shop/parachutes/fixed-wing-bundles/fixed-wing-recovery-bundle-44lbs-20kg-15fps/) | Proven fixed-wing recovery system. IFC-120-S canopy rated 44lb (20kg) @ 15fps (4.6 m/s). Pilot chute ensures reliable air-stream deployment. Spectra shroud lines. Compact packing (190 cu"). Repackable. No pyrotechnics, no CO2 — just pilot chute + deployment bag for planned parachute landings. | -| 7.2 | Servo-actuated hatch | Spring-loaded door, triggered by autopilot | 1 | 80g | $30.00 | $30.00 | Custom | Autopilot triggers servo → spring ejects parachute bag into airstream. Same concept as Shark M: simple, reusable, no gases or explosives. | -| 7.3 | Parachute riser cutter | Servo-actuated line cutter | 1 | 30g | $40.00 | $40.00 | Custom | Cuts risers after touchdown to prevent wind drag. | - -**Subtotal parachute: ~$900 / ~1.06 kg** - -### 8. Propulsion - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 8.1 | T-Motor AT4120 KV250 | Long shaft pusher motor, 12S rated, 2100W max | 1 | 304g | $110.00 | $110.00 | [T-Motor Store](https://store.tmotor.com/product/at4120-long-shaft-vtol-pusher-motor.html) | 12S rated, triple-bearing long shaft for pusher config. At 40-50% throttle: 275W cruise, 7.8-8.7 g/W efficiency. 304g is lightweight for this power class. | -| 8.2 | T-Motor ALPHA 60A 12S ESC | FOC, 18-50.4V, 60A continuous | 1 | 73g | $110.00 | $110.00 | [T-Motor Store](https://store.tmotor.com/product/alpha-60a-12s-esc.html) | Matched to AT4120 motor. FOC for smooth low-RPM cruise. 60A continuous gives ample margin over ~7A cruise draw. Built-in protections. | -| 8.3 | APC 16×8E propeller | Thin electric, fiberglass nylon | 3 pcs (1+2 spare) | ~52g each | $10.00/pc | $30.00 | [APC Propellers](https://www.apcprop.com/product/16x8e/) | Excellent efficiency data matched with AT4120. 16" diameter for high propulsive efficiency at low RPM. Spares included — props are consumables. | - -**Subtotal propulsion: ~$250 / ~0.43 kg installed** - -### 9. Servos - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 9.1 | Digital metal gear servos | HV, 5-10 kg·cm torque, coreless | 5 pcs | ~175g total | $25.00/pc | $125.00 | [Savox](https://www.savox.com) / [KST](https://kstservos.com) | 2 aileron, 2 elevator, 1 rudder. Metal gears for reliability. HV (6-8.4V) powered direct from BEC. Coreless for precision and longevity. | - -**Subtotal servos: ~$125 / ~0.18 kg** - -### 10. Flight Controller & Navigation - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 10.1 | Holybro Pixhawk 6X Mini Set | STM32H753, triple IMU, PM02D power module | 1 set | ~38g | $313.00 | $313.00 | [Holybro](https://holybro.com/products/pixhawk-6x) | Industry standard for ArduPilot. Triple redundant IMU. Ethernet for Jetson link. Mini form factor for fixed-wing. | -| 10.2 | Holybro M10 GPS | u-blox M10, GPS/Galileo/GLONASS/BeiDou, compass | 1 | ~20g | $44.00 | $44.00 | [Holybro](https://holybro.com/collections/gps/products/m10-gps) | Matches Pixhawk 6X connector. Multi-constellation GNSS. Includes IST8310 compass, buzzer, safety switch. | - -**Subtotal flight controller: ~$357 / ~0.06 kg** - -### 11. Onboard Computer - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 11.1 | NVIDIA Jetson Orin Nano Super 8GB | 67 TOPS AI, ARM Cortex-A78AE | 1 | ~60g (board only) | $249.00 | $249.00 | [NVIDIA](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/nano-super-developer-kit/) | Runs GPS-denied navigation (visual odometry, terrain matching) + AI reconnaissance pipeline. 67 TOPS for real-time inference. Ethernet to Pixhawk. | - -**Subtotal compute: ~$249 / ~0.06 kg** - -### 12. Cameras - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 12.1 | ADTI 26S V1 + 35mm lens | 26MP APS-C, Sony IMX571, mechanical shutter | 1 | ~122g | $1,890.00 | $1,890.00 | [UnmannedRC](https://unmannedrc.com/products/26mp-26s-v1-aps-c-mapping-camera) | GPS-denied navigation camera. Mechanical shutter eliminates rolling shutter distortion at speed. 21.6 cm/px GSD at 2 km. Lightest 26MP APS-C option (122g with lens). | -| 12.2 | Viewpro Z40K 4K gimbal | 4K 20× optical zoom, 3-axis stabilized, 25.9MP | 1 | ~595g | $3,000.00 | $3,000.00 | [Viewpro](https://www.viewprouav.com/product/z40k-single-4k-hd-25-times-zoom-gimbal-camera-3-axis-gimbal-uav-aerial-photography-cartography-and-patrol-inspection.html) | AI reconnaissance camera. 2.7 cm/px GSD at 2 km max zoom. 103×58m FoV in 4K. 479g lighter than Viewpro A40 Pro. PWM/TTL/SBUS control compatible with ArduPilot. | - -**Subtotal cameras: ~$4,890 / ~0.72 kg** - -### 13. Communications - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 13.1 | TBS Crossfire Nano RX | 915 MHz, long range RC receiver | 1 | ~2g | $30.00 | $30.00 | [GetFPV](https://www.getfpv.com/tbs-crossfire-nano-rx.html) | Long-range RC link (>40 km). Ultra-light. ArduPilot CRSF protocol support. | -| 13.2 | RFD900x telemetry modem (air) | 900 MHz, 1W, >40 km range, AES-128 | 1 | ~30g | $97.00 | $97.00 | [Droneyard](https://event38.com/product/rfd-900x-telemetry-set/) | MAVLink telemetry + mission commands. Encrypted. Long range. Pixhawk-native integration. | - -**Subtotal comms: ~$127 / ~0.03 kg** - -### 14. Power System - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Link | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|------|----------| -| 14.1 | Tattu 6S 33Ah 350 Wh/kg semi-solid | 22.2V, 10C, 2216g each, XT90-S | 4 pcs | 8.86 kg total | $750.00/pc | $3,000.00 | [GenStattu](https://genstattu.com/tattu-semi-solid-state-350wh-kg-33000mah-10c-22-2v-6s1p-g-tech-lipo-battery-pack-with-xt90-s-plug/) | 4× in 2S2P → 12S 66Ah (2930 Wh). 350 Wh/kg is highest available density in production. 500+ cycle life at 90% retention. Modular — individual pack replacement. | -| 14.2 | Power distribution board + BEC | 12S input, 5V/3A + 12V/3A BEC outputs | 1 | ~25g | $30.00 | $30.00 | Various | Powers servos (5V HV), Pixhawk, GPS, RC receiver. | -| 14.3 | Wiring + connectors + battery bus | 10-12AWG silicone, XT90, series adapters, parallel bus bar | 1 set | ~450g | $80.00 | $80.00 | Various | 2S2P wiring: 2× series adapters + parallel bus bar. Redundant connectors. | - -**Subtotal power: ~$3,110 / ~9.34 kg** - -### 15. Catapult Interface - -| # | Component | Specification | Qty | Weight | Unit Price | Total | Why This | -|---|-----------|---------------|-----|--------|-----------|-------|----------| -| 15.1 | Belly mounting rails | Aluminum rails for catapult carriage attachment | 1 set | ~150g | $50.00 | $50.00 | Interface between airframe and pneumatic catapult carriage. Quick-release on launch. | - -**Subtotal catapult interface: ~$50 / ~0.15 kg** - -### 16. Field Repair Kit - -| # | Component | Specification | Qty | Weight | Unit Price | Total | -|---|-----------|---------------|-----|--------|-----------|-------| -| 16.1 | S2-glass patches | 6oz, 150×150mm pre-cut | 10 pcs | ~50g | $2.00/pc | $20.00 | -| 16.2 | Field epoxy kit | Aeropoxy PR2032/PH3630 fast, 4oz | 1 | ~120g | $25.00 | $25.00 | -| 16.3 | Repair tools pouch | Cups, gloves, sandpaper, scissors, tape | 1 | ~200g | $15.00 | $15.00 | -| 16.4 | Spare belly panels | Pre-manufactured (item 6.1) | 3 pcs | ~450g (stored) | $15.00/pc | $45.00 | - -**Subtotal repair kit: ~$105 / ~0.37 kg carried** - -## Weight Summary - -| Category | Weight | -|----------|--------| -| S2 FG skins + ribs + stiffeners (cured laminate) | ~3.80 kg | -| Foam core (in sandwich panels) | ~0.45 kg | -| CF wing spar (both halves, cured) | ~0.20 kg | -| Structural hardware (fittings, fasteners, hinges) | ~0.45 kg | -| Belly panel + bumper (installed) | ~0.19 kg | -| Catapult belly rails | ~0.15 kg | -| Parachute system | ~1.06 kg | -| **Airframe subtotal** | **~6.30 kg** | -| Motor + ESC + propeller | ~0.43 kg | -| Servos (×5) | ~0.18 kg | -| Pixhawk 6X + GPS | ~0.06 kg | -| Jetson Orin Nano Super | ~0.06 kg | -| ADTI 26S V1 + 35mm lens | ~0.12 kg | -| Viewpro Z40K gimbal | ~0.60 kg | -| TBS Crossfire Nano RX + RFD900x air | ~0.03 kg | -| Power distribution + wiring | ~0.48 kg | -| **Electronics subtotal** | **~1.96 kg** | -| 4× Tattu 6S 33Ah 350 Wh/kg | **8.86 kg** | -| **TOTAL** | **~17.12 kg** | - -Margin to 18 kg MTOW: **~0.88 kg** (for paint, antenna, miscellaneous hardware) - -## Per-UAV Cost Summary - -| Category | Cost | % | -|----------|------|---| -| Composite fabrics | $280 | 3% | -| Resin system | $127 | 1% | -| Foam core | $109 | 1% | -| Consumables | $125 | 1% | -| Structural hardware | $180 | 2% | -| Belly protection | $35 | <1% | -| Parachute system | $900 | 8% | -| Field repair kit | $105 | 1% | -| **Airframe subtotal** | **$1,861** | **17%** | -| Propulsion (motor + ESC + props) | $250 | 2% | -| Servos | $125 | 1% | -| **Propulsion + actuators subtotal** | **$375** | **3%** | -| Pixhawk 6X Mini Set | $313 | 3% | -| GPS M10 | $44 | <1% | -| Jetson Orin Nano Super | $249 | 2% | -| **Avionics subtotal** | **$606** | **6%** | -| ADTI 26S V1 + 35mm (navigation) | $1,890 | 17% | -| Viewpro Z40K 4K gimbal (reconnaissance) | $3,000 | 27% | -| **Camera subtotal** | **$4,890** | **45%** | -| TBS Crossfire Nano RX | $30 | <1% | -| RFD900x air module | $97 | 1% | -| **Comms subtotal** | **$127** | **1%** | -| 4× Tattu 6S 33Ah 350 Wh/kg batteries | $3,000 | 27% | -| Power distribution + wiring | $110 | 1% | -| **Power subtotal** | **$3,110** | **28%** | -| Catapult belly rails | $50 | <1% | -| **TOTAL PER UAV** | **$11,019** | **100%** | - -### Cost Drivers - -The cameras (45%) and batteries (28%) together account for 73% of per-UAV cost. The airframe material is only 5% ($641 for fabrics + resin + foam). The parachute system at $900 is 8% — significantly reduced from the $2,310 ballistic system in earlier drafts by switching from the Peregrine CO2 ballistic system to the simpler FW Recovery Bundle (canopy + pilot chute + deployment bag). The UAV performs planned parachute landings, not emergency deployments — no ballistic launcher needed. - -## Tooling (One-Time) - -| # | Component | Cost | Amortization | -|---|-----------|------|-------------| -| 9.1 | Fuselage mold set (FG/epoxy female, L+R halves) | $800 | 50+ pulls | -| 9.2 | Wing mold set (FG/epoxy female, upper+lower) | $600 | 50+ pulls | -| 9.3 | Tail surface molds (H-stab + V-stab) | $400 | 50+ pulls | -| 9.4 | Wing spar jig (aluminum + MDF fixture) | $200 | 100+ uses | -| 9.5 | Vacuum pump (2.5 CFM electric) | $150 | Permanent | -| 9.6 | CNC foam plug machining (outsourced) | $1,500 | One-time | -| | **Total tooling** | **$3,650** | | - -## Ground Equipment (One-Time, Shared) - -| # | Component | Cost | Notes | -|---|-----------|------|-------| -| G.1 | TBS Crossfire TX module | $100 | Shared across fleet, plugs into RC transmitter | -| G.2 | RFD900x ground station modem | $200 | Shared GCS telemetry module | -| G.3 | RC transmitter (e.g. RadioMaster TX16S) | $200 | If not already owned | -| G.4 | Pneumatic catapult (ELI PL-60 class) | $15,000-25,000 | Shared launch system; 108 kg, 2 transport cases | -| | **Total GCS equipment (excl. catapult)** | **$500** | | - -## Labor - -| # | Task | Hours (first 5 units) | Hours (at 100 units) | Rate | -|---|------|----------------------|---------------------|------| -| L.1 | Mold prep + release | 2h | 1h | Technician | -| L.2 | Fuselage skin layup + vacuum bag + cure | 8h | 5h | Technician | -| L.3 | Wing skin layup + vacuum bag + cure | 6h | 4h | Technician | -| L.4 | CF wing spar layup + cure | 3h | 2h | Technician | -| L.5 | Tail surface layup + cure | 3h | 2h | Technician | -| L.6 | Demolding + trimming | 4h | 2.5h | Technician | -| L.7 | Assembly (bond ribs, fittings, hardware) | 8h | 5h | Technician | -| L.8 | Electronics integration + wiring | 6h | 4h | Technician | -| L.9 | Parachute system install + test | 2h | 1.5h | Technician | -| L.10 | Finishing (fill, sand, paint) | 6h | 4h | Technician | -| L.11 | Quality inspection + flight test | 4h | 2h | Senior tech | -| | **Total labor per airframe** | **~52h** | **~33h** | | - -## Fleet Cost — 5 Aircraft - -| Item | Calculation | Cost | -|------|------------|------| -| **Tooling (one-time)** | Molds + jigs + CNC plugs + vacuum pump | $3,650 | -| **GCS equipment (one-time)** | TX module + RFD900x ground + RC transmitter | $500 | -| **UAV components × 5** | $11,019 × 5 | $55,095 | -| **Labor × 5** | 52h × 5 × $30/h | $7,800 | -| **Spare parts stock** | Extra belly panels, props, connectors | $600 | -| | | | -| **Total for 5 aircraft** | | **$67,645** | -| **Per aircraft (all-in, incl. tooling)** | | **$13,529** | -| **Per aircraft (excl. tooling, marginal)** | | **$12,699** | - -**Note**: Catapult ($15,000-25,000) is listed separately as ground equipment — not included in per-aircraft cost. It's a shared infrastructure item amortized across operations, not per-unit. - -### Cost Breakdown — 5 Aircraft - -| Category | Amount | % | -|----------|--------|---| -| Cameras (×5) | $24,450 | 36% | -| Batteries (×5) | $15,000 | 22% | -| Airframe materials (×5) | $9,305 | 14% | -| Labor | $7,800 | 12% | -| Avionics + compute (×5) | $3,030 | 4% | -| Tooling | $3,650 | 5% | -| Propulsion + servos (×5) | $1,875 | 3% | -| Comms (×5) + GCS equip. | $1,135 | 2% | -| Spares + repair kits | $1,125 | 2% | -| Catapult interface (×5) | $250 | <1% | - -## Fleet Cost — 100 Aircraft - -At 100 units, bulk pricing and learning-curve labor savings: - -| Item | Unit Price Change | Reasoning | -|------|------------------|-----------| -| S2 FG cloth | $7.50/yd (−28%) | Bolt pricing from AGY distributor | -| CF UD tape | $2.50/m (−47%) | 800m order | -| Epoxy resin | $65/qt kit (−20%) | 5-gallon drums | -| Foam core | $6.50/sheet (−29%) | Case quantity from Diab | -| Consumables | $80/set (−36%) | Roll quantities | -| Hardware | $140/set (−22%) | Batch CNC, bulk fasteners | -| Parachute | $700/unit (−16%) | Volume discount from Fruity Chutes | -| Motor AT4120 | $95 (−14%) | 100+ order from T-Motor | -| ESC ALPHA 60A | $95 (−14%) | 100+ order from T-Motor | -| Batteries | $650/pc (−13%) | Tattu bulk/OEM pricing | -| ADTI 26S V1 | $1,700 (−10%) | Volume pricing | -| Viewpro Z40K | $2,500 (−17%) | Direct OEM/volume | -| Pixhawk 6X Mini | $250 (−20%) | Holybro 100+ discount tier | -| GPS M10 | $33 (−25%) | Holybro 100+ discount | -| Jetson Orin Nano | $199 (−20%) | NVIDIA volume/module pricing | -| RFD900x | $85 (−12%) | Bulk order | -| Servos | $20/pc (−20%) | Bulk order | -| Labor | 33h × $30/h = $990 (−37%) | Learning curve, jigs, repetition | - -| Item | Calculation | Cost | -|------|------------|------| -| **Tooling** | 2 mold sets (50 pulls each) + jigs + vacuum | $7,300 | -| **Airframe materials × 100** | Bulk-priced fabrics + resin + foam + consumables + hardware | $116,000 | -| **Parachute systems × 100** | $700 × 100 | $70,000 | -| **Propulsion × 100** | (95 + 95 + 25) × 100 | $21,500 | -| **Servos × 100** | $100 × 100 | $10,000 | -| **Cameras × 100** | ($1,700 + $2,500) × 100 | $420,000 | -| **Avionics × 100** | ($250 + $33 + $199) × 100 | $48,200 | -| **Comms × 100** | ($30 + $85) × 100 | $11,500 | -| **Power system × 100** | ($2,600 + $100) × 100 | $270,000 | -| **Catapult interface × 100** | $40 × 100 | $4,000 | -| **Repair kits × 100** | $80 × 100 | $8,000 | -| **Labor × 100** | 33h × $30 × 100 | $99,000 | -| **Spare parts stock** | Belly panels, props, misc | $8,000 | -| **Quality tools** | Ultrasonic tester, etc. | $2,000 | -| **GCS equipment** | 5 GCS sets at $500 each | $2,500 | -| | | | -| **Total for 100 aircraft** | | **$1,098,000** | -| **Per aircraft (all-in)** | | **$10,980** | -| **Per aircraft (excl. tooling, marginal)** | | **$10,883** | - -### Cost Breakdown — 100 Aircraft - -| Category | Amount | % | -|----------|--------|---| -| Cameras | $420,000 | 38% | -| Power (batteries + wiring) | $270,000 | 25% | -| Airframe materials | $116,000 | 11% | -| Labor | $99,000 | 9% | -| Parachute systems | $70,000 | 6% | -| Avionics + compute | $48,200 | 4% | -| Propulsion + servos | $31,500 | 3% | -| Comms + GCS | $14,000 | 1% | -| Tooling + quality tools | $9,300 | 1% | -| Repair/spares | $16,000 | 2% | -| Catapult interface | $4,000 | <1% | - -### Scaling Comparison - -| Metric | 5 Aircraft | 100 Aircraft | Savings at Scale | -|--------|-----------|-------------|-----------------| -| Per-aircraft total cost | $13,529 | $10,980 | −19% | -| Per-aircraft airframe | $1,861 | $1,160 | −38% | -| Per-aircraft cameras | $4,890 | $4,200 | −14% | -| Per-aircraft batteries | $3,000 | $2,600 | −13% | -| Per-aircraft labor | $1,560 | $990 | −37% | -| Tooling per aircraft | $730 | $73 | −90% | -| Parachute per aircraft | $900 | $700 | −22% | - -Scaling savings are modest (19%) because cameras and batteries dominate cost and have limited bulk discount potential. The largest percentage savings come from tooling amortization (−90%) and labor learning curve (−37%). - -### Parachute System Alternatives - -The FW Recovery Bundle at $830 is the recommended baseline. For reference, other options: - -| System | Price | Weight | Rated | Deployment | Pro | Con | -|--------|-------|--------|-------|-----------|-----|-----| -| **Fruity Chutes FW Bundle (recommended)** | $830 | 950g | 20 kg @ 15fps | Pilot chute + deployment bag (air-stream) | Proven, sized right, includes harness, repackable | 2-4 week lead time | -| Fruity Chutes Peregrine UAV 5 Light | $2,310 | 1,480g | 20 kg @ 15fps | CO2 ballistic ejection | Fastest deployment, works at zero airspeed | 2.8× more expensive, heavier, CO2 cartridge consumable | -| Foxtech Parachute + Ejector 20kg | $899 | 1,600g | 20 kg | Servo + spring | Cheaper than Peregrine | Designed for multirotor vertical eject, heavier, unproven for FW | -| Skycat X68 + IFC-84-SUL | ~$1,100 | 420g | 17 kg max | Skycat Fuse® | Lightest system, fast deployment | Max 17 kg — borderline for 18 kg MTOW | -| DIY: Rocketman 120" + custom deployment | ~$350 | ~600g est. | ~18 kg | Servo hatch + spring | Cheapest | Unproven for this weight class, 4 shroud lines only | - -## References - -1. S2-glass cloth: https://www.carbonfiberglass.com/product/6oz-s-glass-27-width-html/ -2. CF UD tape: https://www.easycomposites.co.uk/250g-unidirectional-carbon-fibre-tape -3. Aeropoxy PR2032: https://www.aircraftspruce.com/catalog/pnpages/01-42135.php -4. PVC foam core: https://www.easycomposites.us/easycell75-closed-cell-pvc-foam -5. Fruity Chutes FW Bundle: https://www.unmannedsystemssource.com/shop/parachutes/fixed-wing-bundles/fixed-wing-recovery-bundle-44lbs-20kg-15fps/ -6. T-Motor AT4120: https://store.tmotor.com/product/at4120-long-shaft-vtol-pusher-motor.html -7. T-Motor ALPHA 60A: https://store.tmotor.com/product/alpha-60a-12s-esc.html -8. APC 16×8E: https://www.apcprop.com/product/16x8e/ -9. Holybro Pixhawk 6X: https://holybro.com/products/pixhawk-6x -10. Holybro M10 GPS: https://holybro.com/collections/gps/products/m10-gps -11. Jetson Orin Nano Super: https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/nano-super-developer-kit/ -12. ADTI 26S V1: https://unmannedrc.com/products/26mp-26s-v1-aps-c-mapping-camera -13. Viewpro Z40K: https://www.viewprouav.com/product/z40k-single-4k-hd-25-times-zoom-gimbal-camera-3-axis-gimbal-uav-aerial-photography-cartography-and-patrol-inspection.html -14. TBS Crossfire Nano RX: https://www.getfpv.com/tbs-crossfire-nano-rx.html -15. RFD900x: https://event38.com/product/rfd-900x-telemetry-set/ -16. Tattu 350Wh/kg 6S 33Ah: https://genstattu.com/tattu-semi-solid-state-350wh-kg-33000mah-10c-22-2v-6s1p-g-tech-lipo-battery-pack-with-xt90-s-plug/ -17. Foxtech Parachute 20kg: https://store.foxtech.com/parachute-for-20kg-uav-airplanes/ -18. Skycat X68: https://www.skycat.pro/shop/skycat-x68-3zdz9 -19. Rocketman parachutes: https://www.the-rocketman.com/products/ultra-light-high-performance-drone-parachutes - -## Related Artifacts - -- Previous drafts: `solution_draft01.md` through `solution_draft06.md` -- Research artifacts: `_standalone/UAV_frame_material/00_research/UAV_frame_material/` diff --git a/_standalone/UAV_frame_material/UAV_frame_material.md b/_standalone/UAV_frame_material/UAV_frame_material.md deleted file mode 100644 index 0f468ef..0000000 --- a/_standalone/UAV_frame_material/UAV_frame_material.md +++ /dev/null @@ -1 +0,0 @@ -I want to build a UAV plane for reconnaissance missions maximizing flight duration. Investigate what is the best frame material for that purpose \ No newline at end of file diff --git a/_standalone/camera_high_altitude/00_research/camera_high_altitude/00_ac_assessment.md b/_standalone/camera_high_altitude/00_research/camera_high_altitude/00_ac_assessment.md deleted file mode 100644 index 8ce5469..0000000 --- a/_standalone/camera_high_altitude/00_research/camera_high_altitude/00_ac_assessment.md +++ /dev/null @@ -1,98 +0,0 @@ -# Acceptance Criteria Assessment - -## Context - -Balloon at 10km altitude. AI detection model trained on imagery from 600-1000m altitude (trucks, vehicles, tracked machinery). Goal: reuse model by using a zoom gimbal camera to achieve equivalent Ground Sample Distance (GSD) from 10km. Budget: under $30k. - -## Key Technical Analysis - -### GSD Requirements - -The AI model was trained on 600-1000m imagery. Typical drone cameras at those altitudes produce: - -- At 600m: GSD ~5-10 cm/pixel -- At 1000m: GSD ~10-15 cm/pixel - -For vehicle detection per NIIRS scale: - -- Vehicle detection: ~1m GSD -- Vehicle type identification: ~25-50 cm GSD -- Detailed vehicle description: ~10 cm GSD - -Target GSD from 10km: **5-15 cm/pixel** to match training data. - -### Required Focal Length from 10km - -Formula: `focal_length = (pixel_pitch × altitude) / target_GSD` - - -| Sensor Type | Pixel Pitch | Target GSD 15cm | Target GSD 10cm | Target GSD 5cm | -| ----------------- | ----------- | --------------- | --------------- | -------------- | -| 1/2.8" (5MP) | ~2.0µm | 133mm | 200mm | 400mm | -| 1/2.3" (25MP) | ~1.05µm | 70mm | 105mm | 210mm | -| 1" (20MP) | ~2.4µm | 160mm | 240mm | 480mm | -| Full-frame (61MP) | 3.76µm | 251mm | 376mm | 752mm | - - -### Atmospheric Resolution Limit - -From 10km looking down, atmospheric turbulence limits ground resolution to approximately **4.6 cm** (theoretical, under ideal conditions). Practical limit: **5-8 cm** depending on weather. This means: - -- GSD finer than ~5 cm provides no benefit -- 10-15 cm GSD is safely achievable (optics-limited, not atmosphere-limited) - -### Environmental Conditions at 10km Altitude - -- Temperature: approximately **-50°C** -- Pressure: ~264 hPa (26% of sea level) -- Consumer cameras rated 0°C to 40°C — **will not function without heated enclosure** -- Drone gimbal cameras rated -20°C to +60°C — **still insufficient for -50°C** -- All solutions require thermal management (insulated/heated housing) - -## Acceptance Criteria - - -| Criterion | Derived Value | Researched Feasibility | Cost/Timeline Impact | Status | -| --------------------- | -------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------- | ---------------------------------------------- | -| Budget | <$30k | Realistic. Good systems $5k-15k, premium $15k-25k including enclosure | Achievable | Confirmed | -| GSD from 10km | 5-15 cm/pixel (match training data) | Achievable with 170-800mm FL depending on sensor. Atmospheric limit ~5cm | Core requirement, drives lens/sensor choice | Confirmed | -| FOV per frame | >200m ground width | At 10-15cm GSD with 2000-8000px: 200-1200m width. Acceptable | No issue with most cameras | Confirmed | -| Gimbal stabilization | 3-axis, ±0.02° or better | Standard in $2k-8k gimbal cameras. Balloon needs additional passive stabilization | Balloon motion is the challenge, not gimbal specs | Modified — need passive + active stabilization | -| Image quality for AI | Must be compatible with model trained at 600-1000m | Atmospheric haze at 10km WILL degrade contrast vs clean 600-1000m imagery. May need dehazing and/or model fine-tuning | Moderate risk — software mitigation possible | Added | -| Operating temperature | Balloon has thermal protection — not a concern | User confirmed thermal management is handled | No additional cost | Removed | -| Weight | Not critical | Most camera+gimbal systems 1-5kg. Enclosure adds 1-3kg. Total 3-8kg | Fine for balloon | Confirmed | - - -## Restrictions Assessment - - -| Restriction | Derived Value | Researched Feasibility | Cost/Timeline Impact | Status | -| --------------------------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------- | --------- | -| Balloon platform at 10km | Fixed constraint | Unusual altitude — between aerostat (500-3500m) and HAPS (18-20km). Very few purpose-built systems. Must adapt drone/DSLR equipment | Custom integration required | Confirmed | -| AI model reuse (no retrain) | Must match training GSD | Feasible for GSD matching. Image quality differences (haze, contrast) may require model fine-tuning or preprocessing | Low-moderate risk | Confirmed | -| Temperature at 10km | -50°C ambient | Heated enclosure mandatory. All cameras, gimbals, and batteries need thermal protection. Power budget increases 20-50W for heating | $2k-5k added, +2-3kg weight | Added | -| Pressure at 10km | ~264 hPa | Some electronics may need conformal coating or pressurized housing. Lens fogging risk. Most industrial electronics tolerate low pressure | Minor added cost ($500-1k) | Added | -| Power at 10km | Balloon power source | Camera (10-20W) + gimbal (20-50W) + heating (20-50W) ≈ 50-120W total. Balloon must supply sufficient power | Must verify balloon power budget | Added | -| Atmospheric haze | 10km air column | Clear weather (user-specified) helps. Contrast reduction is inevitable. Computational dehazing recommended | Software mitigation, low cost | Added | - - -## Key Findings - -1. **GSD matching is achievable** within budget. Multiple camera/lens combinations can deliver 5-15 cm/pixel GSD from 10km -2. **Thermal management is the hidden critical requirement**. No commercial camera or gimbal operates at -50°C without protection -3. **Atmospheric degradation is the biggest risk for AI model reuse**. Even with perfect GSD match, image contrast/sharpness will be lower than training data from 600-1000m. Recommend computational dehazing and potential model fine-tuning -4. **Balloon motion stabilization** requires both passive (pendulum damping, suspension design) and active (3-axis gimbal) approaches -5. **Budget is realistic** for mid-range solutions ($8k-20k for camera+gimbal+enclosure) with room for integration costs - -## Sources - -- NIIRS Civil Reference Guide (irp.fas.org) — GSD requirements for vehicle detection -- Pix4D — GSD calculation methodology -- "Limiting Resolution Looking Down Through the Atmosphere" (Optica) — atmospheric resolution limits -- UAVOS POD specifications — stratospheric camera benchmark (69cm GSD at 15km) -- Airmobi/Viewpro A40 Pro, Z40K — drone gimbal camera specs and pricing -- LoongUAV VT500Rs — high-res zoom gimbal specs -- Sony FE 400-800mm announcement — super telephoto lens specs/pricing -- Gremsy T7 specifications — gimbal payload capacity -- IAState Digital Press — high-altitude balloon payload stabilization research - diff --git a/_standalone/camera_high_altitude/00_research/camera_high_altitude/00_question_decomposition.md b/_standalone/camera_high_altitude/00_research/camera_high_altitude/00_question_decomposition.md deleted file mode 100644 index 019184e..0000000 --- a/_standalone/camera_high_altitude/00_research/camera_high_altitude/00_question_decomposition.md +++ /dev/null @@ -1,56 +0,0 @@ -# Question Decomposition - -## Original Question - -Research gimbal camera options under $30k for a balloon at 10km altitude to produce imagery compatible with an AI detection model trained on 600-1000m altitude data (10-20 cm/pixel GSD). Targets: trucks, vehicles, tracked machinery. - -## Active Mode - -Mode A Phase 2 — Initial Research (no existing solution draft) - -## Question Type Classification - -**Decision Support** — need to evaluate trade-offs between different gimbal camera systems for a specific use case. - -## Research Subject Boundary Definition - -- **Population**: Commercial and semi-professional gimbal camera systems suitable for balloon platforms -- **Geography**: Global market, available for purchase/export -- **Timeframe**: Currently available products (2024-2026) -- **Level**: Systems under $30k, excluding military-restricted/ITAR-controlled equipment -- **Platform**: High-altitude balloon at 10km with thermal protection - -## Problem Context Summary - -- Balloon at 10km altitude, thermally protected -- AI model trained on 600-1000m imagery, GSD ~10-20 cm/pixel -- Target objects: trucks, vehicles, tracked machinery -- Need zoom gimbal camera to achieve equivalent GSD from 10km -- Budget: under $30k -- Weight: not a major constraint -- Weather: mostly sunny conditions (eastern/southern Ukraine) - -## Decomposed Sub-Questions - -1. What are the available commercial gimbal camera categories suitable for this application? -2. For each category, what specific products can deliver 10-20 cm/pixel GSD from 10km? -3. What are the trade-offs between integrated zoom gimbals vs separate camera+lens+gimbal setups? -4. How does atmospheric haze affect AI model performance, and what mitigation exists? -5. What gimbal stabilization approach works best for balloon platforms? -6. What is the total system cost (camera + gimbal + integration) for each option? -7. What are the power requirements and interfaces for each option? - -## Timeliness Sensitivity Assessment - -- **Research Topic**: Gimbal cameras and aerial surveillance equipment -- **Sensitivity Level**: 🟡 Medium -- **Rationale**: Hardware products evolve on 1-2 year cycles. New products appear but existing ones remain available. -- **Source Time Window**: 2 years -- **Priority official sources**: - 1. Manufacturer product pages (Viewpro, Airmobi, LoongUAV, Sony, Gremsy) - 2. Distributor sites with pricing (druav.com, dronexpert.nl) -- **Key version information to verify**: - - ViewPro A40 Pro: current variant and pricing - - LOONG VT500Rs: availability and pricing - - Sony FE 400-800mm: shipping status and pricing - diff --git a/_standalone/camera_high_altitude/00_research/camera_high_altitude/01_source_registry.md b/_standalone/camera_high_altitude/00_research/camera_high_altitude/01_source_registry.md deleted file mode 100644 index ee4a49b..0000000 --- a/_standalone/camera_high_altitude/00_research/camera_high_altitude/01_source_registry.md +++ /dev/null @@ -1,146 +0,0 @@ -# Source Registry - -## Source #1 -- **Title**: Civil NIIRS Reference Guide -- **Link**: https://irp.fas.org/imint/niirs_c/guide.htm -- **Tier**: L1 -- **Publication Date**: Established standard (ongoing) -- **Timeliness Status**: ✅ Currently valid -- **Target Audience**: Military/intelligence imagery analysts -- **Research Boundary Match**: ✅ Full match — GSD requirements for vehicle detection -- **Summary**: NIIRS 6 required for vehicle type identification (~25-50cm GSD). Detailed vehicle description at ~10cm GSD. - -## Source #2 -- **Title**: Limiting Resolution Looking Down Through the Atmosphere -- **Link**: https://opg.optica.org/josa/abstract.cfm?uri=josa-56-10-1380 -- **Tier**: L1 -- **Publication Date**: Academic publication -- **Timeliness Status**: ✅ Currently valid (fundamental physics) -- **Summary**: Atmospheric turbulence limits ground resolution from high altitude to ~4.6cm. Independent of optics quality. - -## Source #3 -- **Title**: Pix4D — How to select Camera Focal Length and Flight Altitude -- **Link**: https://support.pix4d.com/hc/en-us/articles/202558849 -- **Tier**: L2 -- **Publication Date**: Updated regularly -- **Timeliness Status**: ✅ Currently valid -- **Summary**: GSD = (pixel_pitch × altitude) / focal_length. Standard formula for aerial imaging. - -## Source #4 -- **Title**: Airmobi A40 Pro Product Page -- **Link**: https://www.airmobi.com/product/a40-pro-40x-optical-zoom-3-axis-ai-tracking-gimbal-camera/ -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: $2,299. 40x optical zoom, 170mm max FL, 1/2.8" Sony CMOS, 5MP, 3-axis gimbal. AI tracking. Detects vehicles up to 16km. - -## Source #5 -- **Title**: ViewPro Z40K Product Page -- **Link**: https://www.viewprouav.com/product/z40k-single-4k-hd-25-times-zoom-gimbal-camera.html -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: $3,000-5,000. 20x optical (40x hybrid) zoom, 1/2.3" Panasonic 25.9MP, 4K video, 3-axis gimbal. - -## Source #6 -- **Title**: SIYI ZT30 Product Page -- **Link**: https://shop.siyi.biz/products/siyi-zt30 -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: $6,099-7,309. 30x optical zoom, 1/2.7" 8MP, 4K, thermal+LRF, 4-sensor pod. 9W average power. - -## Source #7 -- **Title**: DJI Zenmuse H30T Specs -- **Link**: https://enterprise.dji.com/zenmuse-h30-series/specs -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: $10,240-11,610. 34x optical, 1/1.8" 40MP, 4K, thermal 1280×1024, LRF. LOCKED TO DJI MATRICE PLATFORM. - -## Source #8 -- **Title**: LOONG VT500Rs Product Page -- **Link**: https://www.loonguav.com/vt500rs -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Price unknown. 42x optical, 48MP (8000×6000), dual visible cameras, thermal, LRF. 1.2kg. Military ISR focus. - -## Source #9 -- **Title**: Sony FE 400-800mm F6.3-8 G OSS Announcement -- **Link**: https://alphauniverse.com/stories/sony-unveils-specialty-400800mm-f6-38-g-oss-super-telephoto-zoom-g-lens/ -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2025-02-26 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: £2,399 (~$3,000). 400-800mm f/6.3-8, 2,475g. Supports 1.4x/2x TC for 1120/1600mm. Shipping March 2025. - -## Source #10 -- **Title**: Gremsy T7 Specifications -- **Link**: https://gremsy.com/gremsy-t7-spec -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: $2,349. 3.175kg max payload. 3-axis, ±0.02° vibration. USB/CAN/UART. Phase One compatible. - -## Source #11 -- **Title**: Viewpro Viewlink Serial Command Protocol V3.3.3 -- **Link**: https://www.viewprotech.com/index.php?ac=article&at=read&did=510 -- **Tier**: L2 (manufacturer documentation) -- **Publication Date**: 2024 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Documented serial UART and TCP protocol for controlling Viewpro gimbals from custom platforms. - -## Source #12 -- **Title**: ArduPilot ViewPro Gimbal Driver (AP_Mount) -- **Link**: https://github.com/ArduPilot/ardupilot/pull/22568 -- **Tier**: L2 (open source) -- **Publication Date**: 2023-2024 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Lua-based gimbal driver for Viewpro cameras in ArduPilot. Demonstrates serial integration feasibility. - -## Source #13 -- **Title**: MAVLink Gimbal Protocol v2 -- **Link**: https://mavlink.io/en/services/gimbal_v2.html -- **Tier**: L1 (protocol standard) -- **Publication Date**: Ongoing standard -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Standard protocol for gimbal control via MAVLink. Supports companion computer as gimbal manager. - -## Source #14 -- **Title**: UAVOS POD Stratospheric Earth Observation Payload -- **Link**: https://uasweekly.com/2026/02/02/uavos-unveils-stratospheric-earth-observation-payload/ -- **Tier**: L2 (press release) -- **Publication Date**: 2026-02-02 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: 3.6kg, 69cm GSD from 15km, gyro-stabilized. Benchmark for stratospheric imaging. Price unknown (likely >>$30k). - -## Source #15 -- **Title**: A40TR Pro Product Page (Airmobi) -- **Link**: https://www.airmobi.com/product/a40tr-pro-40x-eo-ir-lrf-ai-object-tracking-gimbal-camera/ -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: $7,499-7,999. 40x zoom EO + thermal + LRF. 3-axis gimbal. ±0.005° pointing. MIL-STD-810H certified. - -## Source #16 -- **Title**: Foxtech Seeker-30 TR Product Page -- **Link**: https://store.foxtech.com/seeker-30-tr-30x-optical-zoom-camera-with-3-axis-gimbal/ -- **Tier**: L1 (manufacturer) -- **Publication Date**: 2024-2025 -- **Timeliness Status**: ✅ Currently valid -- **Summary**: $3,899-4,299. 30x optical zoom, 2.13MP 1080p. Auto tracking. 4.0kg. Serial/PWM control. - -## Source #17 -- **Title**: High-altitude balloon payload stabilization research -- **Link**: https://iastatedigitalpress.com/ahac/article/5570/galley/5436/view/ -- **Tier**: L1 (academic) -- **Timeliness Status**: ✅ Currently valid -- **Summary**: Balloon payloads experience continuous rotation and pendulum swinging. Active gimbals may be insufficient alone. Passive stabilization through suspension design is critical. - -## Source #18 -- **Title**: Sony RX10 IV Specifications -- **Link**: https://www.bhphotovideo.com/c/product/1361560-REG/ -- **Tier**: L1 -- **Publication Date**: 2017 (product), specs current -- **Timeliness Status**: ✅ Currently valid (still in production) -- **Summary**: $1,700. 1" sensor 20MP, 24-600mm equiv (8.8-220mm actual), 2.41µm pixel pitch. 4K video. diff --git a/_standalone/camera_high_altitude/00_research/camera_high_altitude/02_fact_cards.md b/_standalone/camera_high_altitude/00_research/camera_high_altitude/02_fact_cards.md deleted file mode 100644 index a2f8b86..0000000 --- a/_standalone/camera_high_altitude/00_research/camera_high_altitude/02_fact_cards.md +++ /dev/null @@ -1,103 +0,0 @@ -# Fact Cards - -## Fact #1 -- **Statement**: GSD formula: GSD = (pixel_pitch × altitude) / focal_length. This is the fundamental relationship between sensor, optics, altitude, and ground resolution. -- **Source**: Source #3 (Pix4d) -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #2 -- **Statement**: Atmospheric turbulence limits ground resolution from 10km altitude to approximately 4.6 cm regardless of optical system quality. Practical achievable limit is 5-8 cm in good conditions. -- **Source**: Source #2 (Optica journal) -- **Phase**: Phase 1 -- **Confidence**: ✅ High - -## Fact #3 -- **Statement**: Vehicle detection requires ~1m GSD. Vehicle type identification requires ~25-50 cm GSD. Detailed vehicle description requires ~10 cm GSD. Per NIIRS scale. -- **Source**: Source #1 (NIIRS Guide) -- **Phase**: Phase 1 -- **Confidence**: ✅ High - -## Fact #4 -- **Statement**: Training data GSD is approximately 10-20 cm/pixel (user confirmed). Target GSD from 10km must match this range. -- **Source**: User confirmation -- **Phase**: Phase 1 -- **Confidence**: ✅ High - -## Fact #5 -- **Statement**: ViewPro A40 Pro achieves GSD of 11.6 cm/pixel from 10km at maximum zoom (170mm FL, 5MP stills). Video (1080p) gives 15.6 cm/pixel. Both within target 10-20 cm range. -- **Source**: Source #4 (Airmobi), calculated -- **Phase**: Phase 2 -- **Confidence**: ✅ High (calculation verified) - -## Fact #6 -- **Statement**: ViewPro Z40K achieves GSD of ~10.3 cm/pixel from 10km at maximum optical zoom (~102mm FL, 25.9MP). Significantly more pixels per frame than A40 Pro (5888 vs 2560). -- **Source**: Source #5 (Viewpro), calculated -- **Phase**: Phase 2 -- **Confidence**: ⚠️ Medium (FL estimated from HFOV spec) - -## Fact #7 -- **Statement**: DJI Zenmuse H30T achieves GSD of ~5.7 cm/pixel from 10km (172mm FL, 40MP). Best integrated zoom camera GSD. But LOCKED to DJI Matrice platform — cannot be used on custom balloon platform without reverse engineering. -- **Source**: Source #7 (DJI) -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #8 -- **Statement**: LOONG VT500Rs achieves GSD of ~4.8 cm/pixel from 10km (192mm FL, 48MP). Near atmospheric limit. Price unknown, estimated $10k-20k based on market positioning. -- **Source**: Source #8 (LoongUAV), calculated -- **Phase**: Phase 2 -- **Confidence**: ⚠️ Medium (price estimated) - -## Fact #9 -- **Statement**: Sony α7RV + FE 400-800mm achieves 4.7-9.4 cm/pixel GSD from 10km (400-800mm FL, 61MP, 3.76µm pixel pitch). Best image quality due to full-frame sensor and large pixels. Total weight with lens: 3,140g. -- **Source**: Source #9, #10 (Sony, Gremsy), calculated -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #10 -- **Statement**: Viewpro gimbal cameras have documented serial (UART) and TCP/IP control protocols (Viewlink V3.3.3). ArduPilot has native Lua driver for Viewpro gimbals. Suitable for custom platform integration. -- **Source**: Source #11, #12 (Viewpro, ArduPilot) -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #11 -- **Statement**: Gremsy T7 gimbal has 3.175kg max payload. Sony α7RV (665g) + FE 400-800mm (2,475g) = 3,140g — at the payload limit with virtually no margin. Risk of unreliable operation. -- **Source**: Source #10 (Gremsy) -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #12 -- **Statement**: Balloon payloads experience continuous rotation and pendulum swinging. Active gimbal alone may be insufficient. Passive stabilization through suspension design is critical for image quality. -- **Source**: Source #17 (Iowa State) -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #13 -- **Statement**: SIYI ZT30 is a 4-sensor pod ($6,099-7,309) with 30x optical zoom, 8MP, 4K, thermal, and LRF. 9W average power. Achieves ~12 cm/pixel GSD from 10km. -- **Source**: Source #6 (SIYI) -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #14 -- **Statement**: A40TR Pro ($7,499-7,999) adds thermal imaging and laser rangefinder to the 40x zoom capability. MIL-STD-810H certified for -40°C to +55°C — best temperature rating among all options. -- **Source**: Source #15 (Airmobi) -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #15 -- **Statement**: Sony RX10 IV ($1,700) with 1" sensor and 220mm actual FL achieves ~11 cm/pixel GSD from 10km. Has 20MP resolution (5472×3648). Needs external gimbal for balloon mounting. No documented remote zoom control for UAV integration. -- **Source**: Source #18 (B&H) -- **Phase**: Phase 2 -- **Confidence**: ⚠️ Medium (remote control integration unclear) - -## Fact #16 -- **Statement**: With zoom capability, cameras can be adjusted to match any target GSD within their zoom range. A camera capable of 5cm GSD at max zoom can be zoomed out to deliver 10-20cm GSD matching training data. This means over-capability in GSD is an advantage, not a problem. -- **Source**: Derived from GSD formula -- **Phase**: Phase 2 -- **Confidence**: ✅ High - -## Fact #17 -- **Statement**: Atmospheric haze at 10km reduces image contrast compared to 600-1000m imagery. Even with matched GSD, the AI model may need image preprocessing (dehazing) or fine-tuning for degraded imagery. -- **Source**: Source #2 (atmospheric research) -- **Phase**: Phase 2 -- **Confidence**: ⚠️ Medium (impact on specific AI model unknown) diff --git a/_standalone/camera_high_altitude/00_research/camera_high_altitude/03_comparison_framework.md b/_standalone/camera_high_altitude/00_research/camera_high_altitude/03_comparison_framework.md deleted file mode 100644 index d82c50f..0000000 --- a/_standalone/camera_high_altitude/00_research/camera_high_altitude/03_comparison_framework.md +++ /dev/null @@ -1,51 +0,0 @@ -# Comparison Framework - -## Selected Framework Type -Decision Support — evaluating trade-offs between gimbal camera systems for a specific use case - -## Selected Dimensions -1. GSD from 10km (achievable ground resolution) -2. Image resolution / FOV coverage -3. Sensor quality (size, SNR, dynamic range) -4. Integration feasibility (control protocol, balloon platform compatibility) -5. Stabilization quality (gimbal precision, balloon motion handling) -6. Additional sensors (thermal, LRF) -7. Power requirements -8. Weight -9. Price (total system cost) -10. Atmospheric haze resilience (sensor size impact on contrast) - -## GSD Calculations Summary - -All calculations assume: -- Altitude: 10,000m -- Target GSD: 10-20 cm/pixel (to match AI training data) - -| System | Sensor | Pixel Pitch | Max FL | GSD@10km | Resolution | FOV@10km | In Target? | -|---|---|---|---|---|---|---|---| -| A40 Pro | 1/2.8" 5MP | 1.98µm | 170mm | 11.6 cm | 2560×1920 | 297×223m | ✅ Yes | -| Z40K | 1/2.3" 25.9MP | 1.05µm | ~102mm | 10.3 cm | 5888×4400 | 606×453m | ✅ Yes | -| SIYI ZT30 | 1/2.7" 8MP | ~1.65µm | ~138mm | 12.0 cm | 3264×2448 | 392×294m | ✅ Yes | -| A40TR Pro | 1/2.8" 5MP | 1.98µm | 170mm | 11.6 cm | 2560×1920 | 297×223m | ✅ Yes | -| DJI H30T | 1/1.8" 40MP | 0.99µm | 172mm | 5.7 cm | 7296×5472 | 416×312m | ✅ (adjustable) | -| VT500Rs | ~1/2" 48MP | 0.92µm | 192mm | 4.8 cm | 8000×6000 | 385×289m | ✅ (adjustable) | -| Sony α7RV+800mm | FF 61MP | 3.76µm | 800mm | 4.7 cm | 9504×6336 | 447×298m | ✅ (adjustable) | -| Sony RX10 IV | 1" 20MP | 2.41µm | 220mm | 11.0 cm | 5472×3648 | 602×401m | ✅ Yes | - -## Initial Population - -| Dimension | A40 Pro | Z40K | SIYI ZT30 | A40TR Pro | Sony α7RV+800mm | Sony RX10 IV | -|---|---|---|---|---|---|---| -| GSD@10km | 11.6 cm ✅ | 10.3 cm ✅ | 12.0 cm ✅ | 11.6 cm ✅ | 4.7-9.4 cm ✅ | 11.0 cm ✅ | -| Resolution | 5MP (low) | 25.9MP (good) | 8MP (adequate) | 5MP (low) | 61MP (excellent) | 20MP (good) | -| FOV@10km | 297×223m | 606×453m | 392×294m | 297×223m | 447×298m | 602×401m | -| Sensor quality | Small, low SNR | Small, decent | Small, adequate | Small, low SNR | Full-frame, excellent | 1", good | -| Integration | Serial/TCP ✅ | Serial/TCP ✅ | Serial ✅ | Serial/TCP ✅ | Custom ⚠️ | Custom ⚠️ | -| Gimbal quality | ±0.02° | ±0.02° | ±0.01° | ±0.005° | Gremsy T7 ±0.02° | Needs external | -| Additional sensors | AI tracking | AI tracking | Thermal+LRF | Thermal+LRF | None | None | -| Power | ~15-25W | ~15-25W | 9W avg | ~20-30W | ~40-60W | ~10-15W | -| Weight | ~1kg | ~1kg | ~1.2kg | ~1.5kg | ~5kg total | ~1.1kg+gimbal | -| Price | $2,299 | $3,000-5,000 | $6,099-7,309 | $7,499-7,999 | ~$10,850 | ~$4,200-5,200 | -| Haze resilience | Low | Low | Low | Low | High | Moderate | - -Note: DJI H30T ($10,240-11,610) and LOONG VT500Rs (price unknown) excluded from primary comparison due to DJI platform lock and unknown pricing respectively. diff --git a/_standalone/camera_high_altitude/00_research/camera_high_altitude/04_reasoning_chain.md b/_standalone/camera_high_altitude/00_research/camera_high_altitude/04_reasoning_chain.md deleted file mode 100644 index fd14b7a..0000000 --- a/_standalone/camera_high_altitude/00_research/camera_high_altitude/04_reasoning_chain.md +++ /dev/null @@ -1,130 +0,0 @@ -# Reasoning Chain - -## Dimension 1: GSD Achievability - -### Fact Confirmation -All candidate systems achieve 10-20 cm/pixel GSD from 10km (Facts #5-#9, #15). The atmospheric limit is ~5cm (Fact #2), so all options are optics-limited (good — no wasted capability). - -### Reference Comparison -Systems with higher zoom/longer FL can achieve finer GSD but can always zoom out to match training data (Fact #16). Higher-capability systems provide flexibility to zoom in for closer inspection of specific targets. - -### Conclusion -All candidates pass the GSD requirement. Systems with higher resolution sensors (Z40K 25.9MP, Sony α7RV 61MP) provide significantly better spatial coverage per frame at the target GSD. - -### Confidence -✅ High — based on verified calculations - ---- - -## Dimension 2: Image Quality for AI Model Reuse - -### Fact Confirmation -The AI model was trained on 10-20 cm/pixel imagery from 600-1000m altitude (Fact #4). At that altitude, atmospheric path is short — images are sharp with high contrast. At 10km, haze reduces contrast (Fact #17). - -### Reference Comparison -Larger sensors have better SNR and dynamic range, which partially compensates for atmospheric degradation: -- Full-frame (Sony α7RV): 3.76µm pixels, excellent SNR -- 1" sensor (RX10 IV): 2.41µm pixels, good SNR -- 1/2.3" to 1/2.8" (drone gimbals): 1.0-2.0µm pixels, limited SNR - -Smaller pixels on drone gimbals mean each pixel gathers less light, amplifying atmospheric haze impact. - -### Conclusion -For AI model reuse from 10km, larger sensors provide measurably better imagery. However, the practical impact depends on atmospheric conditions. In mostly sunny weather (user constraint), even small-sensor cameras may produce adequate results. Computational dehazing can bridge the gap. The Sony full-frame option offers the best margin of safety for image quality. - -### Confidence -⚠️ Medium — depends on specific atmospheric conditions and AI model sensitivity - ---- - -## Dimension 3: Platform Integration - -### Fact Confirmation -Viewpro cameras have documented serial (Viewlink) and TCP protocols (Fact #10). ArduPilot has native Lua driver. SIYI has serial protocol. DJI H30T is locked to DJI Matrice (Fact #7). Sony cameras have no native gimbal/serial integration. - -### Reference Comparison -- Viewpro/Airmobi: Plug-and-play for custom platforms via UART/TCP. Best integration story. -- SIYI: Serial protocol available, community support in ArduPilot ecosystem. -- Sony + Gremsy: Gremsy T7 has MAVLink support, but camera zoom/photo control requires separate integration (USB/HDMI). More complex. -- DJI: Effectively impossible on non-DJI platforms. - -### Conclusion -Chinese drone gimbal cameras (Viewpro, SIYI) offer the simplest path to balloon integration. Sony approach requires significantly more custom engineering. DJI is disqualified for this use case. - -### Confidence -✅ High — based on manufacturer documentation and open-source drivers - ---- - -## Dimension 4: Balloon-Specific Stabilization - -### Fact Confirmation -Balloon payloads experience continuous rotation and pendulum swinging (Fact #12). Active gimbal alone may be insufficient. All candidate gimbals have ±0.01° to ±0.02° vibration accuracy. - -### Comparison -All integrated drone gimbals are designed for UAV vibration (high frequency, small amplitude). Balloon motion is different (low frequency, large amplitude rotation/swinging). The A40TR Pro has the tightest pointing accuracy (±0.005°) and MIL-STD certification. - -For the Sony approach, the Gremsy T7 is at payload limit (3,140g vs 3,175g max) — this leaves almost no margin and the gimbal motors may struggle with balloon-specific motion compensation. - -### Conclusion -All candidates need passive stabilization via suspension design (anti-rotation mechanism, pendulum dampers). The gimbal handles residual motion. The A40TR Pro has the best specifications for pointing accuracy. The Sony approach has gimbal weight margin concerns. - -### Confidence -⚠️ Medium — balloon-specific testing required to validate - ---- - -## Dimension 5: Cost-Benefit Analysis - -### Fact Confirmation -Budget: $30k. Options range from $2,299 to ~$10,850 for the camera system alone. Need to account for integration hardware, cables, power, and passive stabilization. - -### Comparison -| Option | Camera System | Integration/Mounting | Passive Stabilization | Total Est. | -|---|---|---|---|---| -| A40 Pro | $2,299 | $500 | $1,000-2,000 | $3,800-4,800 | -| Z40K | $3,000-5,000 | $500 | $1,000-2,000 | $4,500-7,500 | -| SIYI ZT30 | $6,099-7,309 | $500 | $1,000-2,000 | $7,600-9,800 | -| A40TR Pro | $7,499-7,999 | $500 | $1,000-2,000 | $9,000-10,500 | -| Sony α7RV+800mm+T7 | $10,850 | $2,000-3,000 | $1,000-2,000 | $13,850-15,850 | - -All options are well within $30k budget. - -### Conclusion -There's significant budget headroom. The decision should be driven by image quality needs and integration simplicity rather than cost alone. Even the premium Sony option leaves >$14k for contingency and additional equipment. - -### Confidence -✅ High — pricing from manufacturer sources - ---- - -## Dimension 6: Recommendation Reasoning - -### For maximum simplicity and value: ViewPro Z40K ($3,000-5,000) -- 25.9MP resolution gives excellent ground coverage (606×453m FOV) -- 10.3 cm GSD at max zoom matches training data -- 4K video for review -- Documented serial protocol for custom integration -- Lightweight (~1kg) -- Leaves >$25k budget for spare units, integration, and contingency - -### For maximum image quality: Sony α7RV + FE 400-800mm + Gremsy T7 (~$10,850) -- Full-frame sensor provides best atmospheric haze resilience -- Adjustable zoom from 400-800mm (9.4-4.7 cm GSD) -- 61MP resolution for highest detail -- Risk: weight at gimbal limit, complex integration, no turnkey zoom control - -### For best balance of quality, features, and integration: A40TR Pro ($7,499-7,999) -- 40x zoom (170mm max) gives 11.6 cm GSD — within target -- MIL-STD-810H certification (-40°C to +55°C) — best environmental rating -- Thermal camera for night/low-visibility operation -- Laser rangefinder for target distance measurement -- ±0.005° pointing accuracy — tightest of all options -- Documented serial protocol -- Only 1.5kg - -### For maximum versatility with multi-sensor: SIYI ZT30 ($6,099-7,309) -- 4 sensors (zoom, thermal, wide-angle, LRF) -- 12 cm GSD at max zoom — within target -- Lowest power consumption (9W average) -- Wide ArduPilot/PX4 community support diff --git a/_standalone/camera_high_altitude/00_research/camera_high_altitude/05_validation_log.md b/_standalone/camera_high_altitude/00_research/camera_high_altitude/05_validation_log.md deleted file mode 100644 index 3aff4b6..0000000 --- a/_standalone/camera_high_altitude/00_research/camera_high_altitude/05_validation_log.md +++ /dev/null @@ -1,51 +0,0 @@ -# Validation Log - -## Validation Scenario -A balloon at 10km altitude in eastern Ukraine on a sunny day needs to photograph a convoy of military trucks on a road. The AI model (trained on 600-1000m imagery at 10-20 cm/pixel) must detect and classify the vehicles. - -## Expected Based on Conclusions - -### Using ViewPro Z40K: -- GSD at max zoom: 10.3 cm/pixel. Each truck (~8m long) ≈ 78 pixels long in the image. -- FOV: 606×453m — covers a road segment of ~600m. Can see multiple vehicles. -- Image quality: 25.9MP provides good detail. Small sensor (1/2.3") may show some contrast loss from haze in sunny conditions. -- Integration: Serial control from companion computer. Can trigger photos and adjust zoom remotely. -- Expected: Model should detect trucks with reasonable confidence. May need dehazing in hazy conditions. - -### Using A40TR Pro: -- GSD at max zoom: 11.6 cm/pixel. Each truck ≈ 69 pixels long. -- FOV: 297×223m — narrower coverage, fewer vehicles per frame. -- Thermal camera adds night capability and can detect engine heat signatures. -- MIL-STD certified, most robust for field conditions. -- Expected: Model should detect trucks. Thermal overlay provides backup detection. - -### Using Sony α7RV + 400-800mm: -- At 600mm (15.8 cm GSD target): GSD = 6.3 cm/pixel. Each truck ≈ 127 pixels long. -- FOV: 599×399m — good coverage. -- Full-frame sensor captures more light per pixel, better contrast through haze. -- Expected: Best image quality. Model may need GSD adjustment (zoom to match 10-20cm range or downscale). -- Risk: Gimbal at weight limit may cause jitter on balloon. Integration is complex. - -## Actual Validation Results (analytical) - -1. All three systems can achieve the target 10-20 cm/pixel GSD — validated by calculation -2. Trucks at 10-20 cm/pixel occupy ~40-80 pixels in length — sufficient for CNN-based detection models -3. Atmospheric haze in sunny conditions over Ukraine plains is typically moderate — dehazing should be effective -4. Balloon passive stabilization is the unvalidated risk — needs physical prototyping - -## Counterexamples - -1. **Cloudy/hazy day**: Small-sensor cameras will produce significantly degraded imagery. The Sony full-frame option is more resilient but still affected. Cloud cover completely blocks all optical imaging. -2. **Nighttime**: Only A40TR Pro and SIYI ZT30 have thermal cameras for night operation. Others are daylight-only. -3. **Fast-moving targets**: At 10km range, even fast vehicles appear slow relative to the camera. Not a concern. -4. **Very small targets**: People or small objects at 10-20 cm/pixel would be only a few pixels — below detection threshold. This use case is for trucks/vehicles only. - -## Review Checklist -- [x] Draft conclusions consistent with fact cards -- [x] No important dimensions missed -- [x] No over-extrapolation -- [x] Conclusions actionable — user can purchase any recommended system -- [ ] Note: Balloon-specific stabilization requires physical validation - -## Conclusions Requiring Revision -None — but balloon motion compensation is flagged as the key unknown requiring prototyping. diff --git a/_standalone/camera_high_altitude/01_solution/solution_draft01.md b/_standalone/camera_high_altitude/01_solution/solution_draft01.md deleted file mode 100644 index f4bd431..0000000 --- a/_standalone/camera_high_altitude/01_solution/solution_draft01.md +++ /dev/null @@ -1,152 +0,0 @@ -# Solution Draft — Gimbal Camera for High-Altitude Balloon (10km) - -## Product Solution Description - -A zoom gimbal camera mounted on a balloon at 10km altitude, producing imagery with 10-20 cm/pixel GSD to match an AI detection model trained on 600-1000m altitude data. The system must detect trucks, vehicles, and tracked machinery. Budget: under $30k. - -The solution consists of: (1) a zoom gimbal camera with documented serial/TCP control protocol, (2) passive anti-rotation and pendulum damping suspension between the balloon and the camera, (3) a companion computer controlling the gimbal via serial/TCP/MAVLink, and (4) optional computational dehazing to improve image quality through 10km of atmosphere. - -``` -Balloon (10km) - │ - ├── Anti-rotation swivel - │ - ├── Pendulum damper / shock absorber - │ - └── Camera payload bay (thermally protected) - ├── Zoom gimbal camera (3-axis stabilized) - ├── Companion computer (control + image capture) - └── Power distribution -``` - -## Existing/Competitor Solutions Analysis - -| Solution | Altitude | GSD | Price | Notes | -|---|---|---|---|---| -| UAVOS POD (HAPS) | 15km | 69 cm/pixel | Unknown (>>$30k) | Purpose-built for stratosphere. GSD too coarse for vehicle identification | -| Aerostat systems (SKYSTAR, Hemeria) | 500-3500m | Sub-meter | $28k-500k+ | Lower altitude, includes balloon+winch+camera. Purpose-built but wrong altitude range | -| Military gimbals (WESCAM MX-10) | Any | Sub-cm to m | $100k-500k+ | Best quality but far exceeds budget and has export restrictions | -| DJI Zenmuse H30T | Drone altitude | 5.7 cm @10km | $10,240 | Excellent camera but locked to DJI Matrice drones. Cannot be used on custom balloon platform | - -No off-the-shelf solution exists for a $30k camera on a 10km balloon with 10-20cm GSD. All viable approaches use drone gimbal cameras adapted for balloon mounting. - -## Architecture - -### Component: Zoom Gimbal Camera - -| Solution | Sensor / Resolution | Max FL / GSD@10km | FOV@10km | Integration | Additional Sensors | Weight | Price | Fit | -|---|---|---|---|---|---|---|---|---| -| **ViewPro Z40K** | 1/2.3" 25.9MP | ~102mm / 10.3 cm | 606×453m | Serial/TCP, ArduPilot ✅ | AI tracking | ~1kg | $3,000-5,000 | ⭐ Best value — highest resolution for price, largest FOV | -| **A40TR Pro** | 1/2.8" 5MP | 170mm / 11.6 cm | 297×223m | Serial/TCP, ArduPilot ✅ | Thermal + LRF + AI | ~1.5kg | $7,499-7,999 | ⭐ Best multi-sensor — thermal night ops, MIL-STD-810H, tightest pointing | -| **SIYI ZT30** | 1/2.7" 8MP | ~138mm / 12.0 cm | 392×294m | Serial, ArduPilot/PX4 ✅ | Thermal + LRF + wide-angle | ~1.2kg | $6,099-7,309 | Good versatility — 4 sensors, lowest power (9W), wide community support | -| **ViewPro A40 Pro** | 1/2.8" 5MP | 170mm / 11.6 cm | 297×223m | Serial/TCP, ArduPilot ✅ | AI tracking | ~1kg | $2,299 | Budget option — proven 40x zoom, lowest cost, but only 5MP/1080p | -| **Sony α7RV + 400-800mm + T7** | FF 61MP | 800mm / 4.7 cm | 447×298m | Custom integration ⚠️ | None | ~5kg | ~$10,850 | Best image quality — full-frame, best haze resilience, complex integration | -| **Sony RX10 IV + gimbal** | 1" 20MP | 220mm / 11.0 cm | 602×401m | Custom integration ⚠️ | None | ~1.1kg+gimbal | ~$4,200-5,200 | Good sensor quality, wide FOV, but no remote zoom protocol | - -### GSD Calculation Reference - -``` -GSD = (pixel_pitch × altitude) / focal_length - -Example for Z40K: - pixel_pitch = 6.17mm / 5888px = 1.048µm - GSD = (0.001048mm × 10,000,000mm) / 102mm = 102.7mm ≈ 10.3 cm/pixel -``` - -### Component: Passive Stabilization System - -| Solution | Mechanism | Advantages | Limitations | Cost | Fit | -|---|---|---|---|---|---| -| Anti-rotation swivel + pendulum damper | Mechanical swivel bearing at suspension point, viscous/spring dampers on suspension lines | Proven in aerostat/balloon systems, no power needed, reduces rotation to near-zero | Adds weight (1-3kg), requires custom fabrication | $1,000-3,000 | Recommended baseline | -| Passive pendulum (long suspension line) | Increase distance between balloon and payload (5-10m line) | Simple, reduces oscillation frequency | Doesn't eliminate rotation, adds deployment complexity | $200-500 | Supplement to swivel | -| Reaction wheel (active) | Motorized flywheel counters rotation torque | Eliminates rotation completely | Adds complexity, weight, and power draw | $2,000-5,000 | For demanding pointing requirements | - -### Component: Companion Computer Integration - -For this project, the existing GPS-Denied system runs on Jetson Orin Nano with MAVLink/MAVSDK. The gimbal camera integration would use the same companion computer architecture: - -| Approach | Protocol | Camera Support | Complexity | Fit | -|---|---|---|---|---| -| ArduPilot Lua driver (Viewpro) | Viewlink serial | A40 Pro, Z40K, A40TR Pro | Low — use existing ArduPilot driver | Best for Viewpro cameras | -| MAVLink Gimbal Protocol v2 | MAVLink serial | SIYI, Viewpro (via proxy) | Low-Medium — standard protocol | Best for SIYI cameras | -| Custom serial integration | Manufacturer protocol | Any with serial API | Medium — write custom driver | Fallback for any camera | -| USB/HDMI + Gremsy SDK | USB + CAN | Sony + Gremsy T7 | High — separate camera and gimbal control | Only option for Sony approach | - -### Component: Image Preprocessing (Atmospheric Haze Mitigation) - -| Solution | Approach | Advantages | Limitations | Cost | Fit | -|---|---|---|---|---|---| -| Dark Channel Prior dehazing | Classic computer vision algorithm | Fast, no training needed, well-proven | May introduce artifacts, struggles with sky regions | Free (OpenCV) | Good baseline | -| CNN-based dehazing (AOD-Net, DehazeFormer) | Deep learning single-image dehazing | Better quality than classical, handles complex haze | Needs GPU, adds latency (~50-100ms) | Free (open source) | Better quality, adds processing time | -| Multi-scale Retinex (MSR) | Contrast enhancement | Simple, fast, improves visibility | Not true dehazing, may amplify noise | Free (OpenCV) | Quick alternative | -| No dehazing (sunny weather) | Direct use | No processing overhead | May reduce AI model accuracy in hazy conditions | Free | Acceptable for clear conditions | - -## Recommendations - -### Primary Recommendation: ViewPro Z40K ($3,000-5,000) - -**Rationale**: Best value proposition for this specific use case. -- 25.9MP resolution — by far the highest among integrated gimbal cameras in this price range -- 10.3 cm/pixel GSD at max zoom — directly matches training data range -- 606×453m FOV — covers the largest ground area per frame, meaning more vehicles visible per image -- 4K video output for live monitoring -- Documented Viewlink serial protocol with existing ArduPilot driver -- ~1kg weight, ~15-25W power -- $3,000-5,000 leaves ample budget for integration, spares, and contingency - -**Total estimated system cost**: $5,000-8,000 (camera + passive stabilization + integration hardware) - -### Secondary Recommendation: A40TR Pro ($7,499-7,999) - -**When to choose instead**: If you need night/thermal operation, laser ranging for target distance, or require MIL-STD-810H environmental certification. The thermal camera enables 24/7 operation and backup detection when optical imagery is degraded. - -**Trade-off**: 5MP EO resolution is significantly lower than Z40K's 25.9MP, resulting in 4x smaller ground coverage per frame. For pure AI detection in daylight, Z40K is better. - -**Total estimated system cost**: $9,000-12,000 - -### Alternative: Sony α7RV + FE 400-800mm + Gremsy T7 (~$10,850) - -**When to choose**: If atmospheric haze proves too degrading for small-sensor cameras, the full-frame sensor provides significantly better contrast and SNR. This is the "maximum image quality" option. - -**Risks**: -- Camera+lens weight (3,140g) is at the Gremsy T7 payload limit (3,175g) — virtually no margin -- No turnkey integration — requires custom camera control, zoom control, and photo trigger -- Sony camera rated 0-40°C only (balloon thermal protection must maintain this range) -- Most complex and heaviest system - -**Total estimated system cost**: $13,850-15,850 - -## Testing Strategy - -### Integration / Functional Tests -- Mount camera on test platform, verify serial/TCP control of zoom, pan, tilt, and photo capture -- Verify image capture at different zoom levels and calculate actual GSD against predictions -- Test passive stabilization mock-up with simulated balloon motion (pendulum, rotation) -- Verify power budget under sustained operation (camera + gimbal + companion computer) - -### Non-Functional Tests -- Ground-level test: photograph known-size vehicles from maximum available height and verify AI model detection at target GSD (resize images to simulate 10km GSD) -- Atmospheric test: if possible, test from lower altitude (1-2km) and compare image quality with/without dehazing -- Duration test: run camera continuously for 4+ hours to verify thermal stability and reliability -- Balloon integration test: short tethered balloon flight at lower altitude to validate stabilization and control - -## References - -- NIIRS Civil Reference Guide — https://irp.fas.org/imint/niirs_c/guide.htm -- Atmospheric resolution limit — https://opg.optica.org/josa/abstract.cfm?uri=josa-56-10-1380 -- GSD formula — https://support.pix4d.com/hc/en-us/articles/202558849 -- Airmobi A40 Pro — https://www.airmobi.com/product/a40-pro-40x-optical-zoom-3-axis-ai-tracking-gimbal-camera/ -- ViewPro Z40K — https://www.viewprouav.com/product/z40k-single-4k-hd-25-times-zoom-gimbal-camera.html -- A40TR Pro — https://www.airmobi.com/product/a40tr-pro-40x-eo-ir-lrf-ai-object-tracking-gimbal-camera/ -- SIYI ZT30 — https://shop.siyi.biz/products/siyi-zt30 -- DJI H30T — https://enterprise.dji.com/zenmuse-h30-series/specs -- LOONG VT500Rs — https://www.loonguav.com/vt500rs -- Sony FE 400-800mm — https://alphauniverse.com/stories/sony-unveils-specialty-400800mm-f6-38-g-oss-super-telephoto-zoom-g-lens/ -- Gremsy T7 — https://gremsy.com/gremsy-t7-spec -- Viewpro Viewlink Protocol — https://www.viewprotech.com/index.php?ac=article&at=read&did=510 -- ArduPilot Viewpro Driver — https://github.com/ArduPilot/ardupilot/pull/22568 -- MAVLink Gimbal Protocol v2 — https://mavlink.io/en/services/gimbal_v2.html -- UAVOS POD — https://uasweekly.com/2026/02/02/uavos-unveils-stratospheric-earth-observation-payload/ -- Balloon stabilization — https://iastatedigitalpress.com/ahac/article/5570/galley/5436/view/ -- Sony RX10 IV — https://www.bhphotovideo.com/c/product/1361560-REG/ -- Foxtech Seeker-30 TR — https://store.foxtech.com/seeker-30-tr-30x-optical-zoom-camera-with-3-axis-gimbal/ diff --git a/_standalone/camera_high_altitude/camera_high_altitude.md b/_standalone/camera_high_altitude/camera_high_altitude.md deleted file mode 100644 index 2eec50f..0000000 --- a/_standalone/camera_high_altitude/camera_high_altitude.md +++ /dev/null @@ -1,6 +0,0 @@ -I have an balloon flying on the 10 km altitude. -I have also AI detection model trained on the annotations of objects taken from 600-1000 m altitude. -I want to reuse this model to detect objects taken from the camera on the balloon. -For that I want to use some camera with zoom capabilities on a pretty decent gimbal. - -Research for the options for such gimbal camera under $30k \ No newline at end of file