mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-04-23 04:26:36 +00:00
test(e2e): add SHA256-verified dataset downloader + EuRoC registry entry
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
"""Dataset downloader — registry + SHA256-verified fetch."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DatasetSpec:
|
||||
url: str
|
||||
sha256: str
|
||||
target_subdir: str
|
||||
unpack: bool = True
|
||||
|
||||
|
||||
# NOTE ON SHA256 VALUES BELOW:
|
||||
# The EuRoC MH_01 hash is a placeholder of 64 zeros. Before the first real
|
||||
# download the engineer MUST:
|
||||
# 1. Manually fetch the zip:
|
||||
# curl -L <url from spec> -o /tmp/MH_01_easy.zip
|
||||
# 2. Compute its hash:
|
||||
# sha256sum /tmp/MH_01_easy.zip
|
||||
# 3. Replace the placeholder here with the real hex string.
|
||||
# 4. Commit the updated hash alongside the first real test run.
|
||||
# Leaving a length-valid (64-char) placeholder keeps the registry well-formed
|
||||
# and lets the download function refuse to keep any file that doesn't match,
|
||||
# so the placeholder state fails loudly rather than silently accepting.
|
||||
DATASET_REGISTRY: dict[str, DatasetSpec] = {
|
||||
"euroc_mh01": DatasetSpec(
|
||||
url=(
|
||||
"http://robotics.ethz.ch/~asl-datasets/ijrr_euroc_mav_dataset"
|
||||
"/machine_hall/MH_01_easy/MH_01_easy.zip"
|
||||
),
|
||||
sha256="0" * 64, # placeholder — see note above
|
||||
target_subdir="euroc/MH_01",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def verify_sha256(path: Path, expected_hex: str) -> bool:
|
||||
"""Return True iff SHA256 of `path` hex-matches `expected_hex`."""
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as fh:
|
||||
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest() == expected_hex
|
||||
|
||||
|
||||
def download_dataset(spec: DatasetSpec, dest: Path) -> Path:
|
||||
"""Download `spec.url` to `dest` if not already present and valid.
|
||||
|
||||
If `dest` already exists and its SHA256 matches `spec.sha256`, return
|
||||
immediately without touching the network. Otherwise download, verify,
|
||||
and raise RuntimeError on hash mismatch (deleting the bad file).
|
||||
"""
|
||||
if dest.exists() and verify_sha256(dest, spec.sha256):
|
||||
return dest
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = dest.with_suffix(dest.suffix + ".part")
|
||||
urllib.request.urlretrieve(spec.url, tmp)
|
||||
tmp.rename(dest)
|
||||
if not verify_sha256(dest, spec.sha256):
|
||||
dest.unlink()
|
||||
raise RuntimeError(f"SHA256 mismatch after download of {spec.url}")
|
||||
return dest
|
||||
Reference in New Issue
Block a user