mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-04-23 02:06:36 +00:00
test(e2e): rename registry entry to euroc_machine_hall with real SHA256
The prior registry entry was speculative: ``euroc_mh01`` pointing at an
old ``robotics.ethz.ch`` URL that no longer resolves (TCP timeout).
The dataset moved to ETH Research Collection (DOI 10.3929/ethz-b-000690084)
as a single 12.6 GB ``machine_hall.zip`` bundle containing MH_01…MH_05.
There's no stable direct download URL — DSpace gates behind a UI —
so:
- Renamed entry: ``euroc_mh01`` → ``euroc_machine_hall`` (matches the
actual artifact).
- SHA256 set to the real bundle hash 5ed7d07…
- URL left empty (same pattern as ``vpair_sample``); the CLI now
exits 3 and prints fetch instructions for empty-URL entries instead
of crashing on ``urllib.request.urlretrieve("")``.
- Adapter ``DatasetNotAvailableError`` message and conftest skip-reason
updated to tell engineers how to fetch/unpack manually.
- ``test_registry_has_euroc_machine_hall`` pin test replaces the old
pin; asserts real hash (not the ``"0"*64`` placeholder).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,10 @@
|
||||
"""CLI: python scripts/download_dataset.py euroc_mh01"""
|
||||
"""CLI: python scripts/download_dataset.py <dataset_name>
|
||||
|
||||
Registered datasets: see DATASET_REGISTRY in gps_denied.testing.download.
|
||||
Entries with an empty URL are manual-download-only (e.g. EuRoC via ETH
|
||||
Research Collection, VPAIR via a Zenodo form) — this script prints the
|
||||
instructions and exits instead of guessing a download URL.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
@@ -14,8 +20,23 @@ def main(argv: list[str]) -> int:
|
||||
name = argv[1]
|
||||
if name not in DATASET_REGISTRY:
|
||||
print(f"unknown dataset: {name}", file=sys.stderr)
|
||||
print(f"available: {', '.join(DATASET_REGISTRY)}", file=sys.stderr)
|
||||
return 2
|
||||
spec = DATASET_REGISTRY[name]
|
||||
if not spec.url:
|
||||
print(
|
||||
f"Dataset '{name}' has no automated download URL.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"Fetch manually, then place the archive so its SHA256 matches:\n"
|
||||
f" expected sha256: {spec.sha256}\n"
|
||||
f" expected path subdir under datasets/: {spec.target_subdir}\n"
|
||||
"See DATASET_REGISTRY comments in src/gps_denied/testing/download.py "
|
||||
"for per-dataset instructions.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 3
|
||||
root = Path("datasets") / spec.target_subdir
|
||||
dest = root / Path(spec.url).name
|
||||
print(f"→ {dest}")
|
||||
|
||||
@@ -33,7 +33,9 @@ class EuRoCAdapter(DatasetAdapter):
|
||||
raise DatasetNotAvailableError(
|
||||
f"EuRoC sequence not found at {self._root} "
|
||||
f"(expected {self._root}/mav0/). "
|
||||
"Run `python scripts/download_dataset.py euroc_mh01` first."
|
||||
"Fetch the Machine Hall bundle from ETH Research Collection "
|
||||
"(DOI 10.3929/ethz-b-000690084), then unpack the inner "
|
||||
"MH_0N_easy.zip of interest into this directory."
|
||||
)
|
||||
|
||||
@property
|
||||
|
||||
@@ -16,26 +16,23 @@ class DatasetSpec:
|
||||
unpack: bool = True
|
||||
|
||||
|
||||
# NOTE ON SHA256 VALUES BELOW:
|
||||
# The EuRoC MH_01 hash is a placeholder of 64 zeros. Before the first real
|
||||
# download the engineer MUST:
|
||||
# 1. Manually fetch the zip:
|
||||
# curl -L <url from spec> -o /tmp/MH_01_easy.zip
|
||||
# 2. Compute its hash:
|
||||
# sha256sum /tmp/MH_01_easy.zip
|
||||
# 3. Replace the placeholder here with the real hex string.
|
||||
# 4. Commit the updated hash alongside the first real test run.
|
||||
# Leaving a length-valid (64-char) placeholder keeps the registry well-formed
|
||||
# and lets the download function refuse to keep any file that doesn't match,
|
||||
# so the placeholder state fails loudly rather than silently accepting.
|
||||
# REGISTRY NOTES:
|
||||
# Entries with url="" are manual-download-only. The ETH Research Collection
|
||||
# (DOI 10.3929/ethz-b-000690084) gates downloads behind a DSpace UI that
|
||||
# doesn't expose a stable direct URL, so the bundle has to be fetched by
|
||||
# hand and placed beside the project. SHA256 remains authoritative: the
|
||||
# download helper refuses anything that doesn't match.
|
||||
DATASET_REGISTRY: dict[str, DatasetSpec] = {
|
||||
"euroc_mh01": DatasetSpec(
|
||||
url=(
|
||||
"http://robotics.ethz.ch/~asl-datasets/ijrr_euroc_mav_dataset"
|
||||
"/machine_hall/MH_01_easy/MH_01_easy.zip"
|
||||
),
|
||||
sha256="0" * 64, # placeholder — see note above
|
||||
target_subdir="euroc/MH_01",
|
||||
"euroc_machine_hall": DatasetSpec(
|
||||
# 12.6 GB bundle containing MH_01 … MH_05 (each as inner .zip + .bag).
|
||||
# Download from ETH Research Collection:
|
||||
# https://doi.org/10.3929/ethz-b-000690084
|
||||
# After fetching, unpack the inner MH_0N_easy.zip of interest into
|
||||
# datasets/euroc/MH_0N/ so the adapter finds mav0/.
|
||||
url="", # manual download — see comment above
|
||||
sha256="5ed7d07903f8d19b6c8808e2ae8a0872b281f6e34ef5497023b8ac58c3de0f6f",
|
||||
target_subdir="euroc",
|
||||
unpack=False, # the bundle itself is not unpacked end-to-end; see README
|
||||
),
|
||||
"vpair_sample": DatasetSpec(
|
||||
url="", # manual download only — see Zenodo link on
|
||||
|
||||
@@ -14,7 +14,9 @@ def euroc_mh01_root() -> Path:
|
||||
if not (root / "mav0").is_dir():
|
||||
pytest.skip(
|
||||
f"EuRoC MH_01 not present at {root}. "
|
||||
"Run `python scripts/download_dataset.py euroc_mh01` to fetch it."
|
||||
"Fetch the Machine Hall bundle from ETH Research Collection "
|
||||
"(DOI 10.3929/ethz-b-000690084), unpack the inner MH_01_easy.zip "
|
||||
f"into {root}/ so that {root}/mav0/ exists."
|
||||
)
|
||||
return root
|
||||
|
||||
|
||||
@@ -11,12 +11,15 @@ from gps_denied.testing.download import (
|
||||
)
|
||||
|
||||
|
||||
def test_registry_has_euroc():
|
||||
assert "euroc_mh01" in DATASET_REGISTRY
|
||||
spec = DATASET_REGISTRY["euroc_mh01"]
|
||||
def test_registry_has_euroc_machine_hall():
|
||||
assert "euroc_machine_hall" in DATASET_REGISTRY
|
||||
spec = DATASET_REGISTRY["euroc_machine_hall"]
|
||||
assert isinstance(spec, DatasetSpec)
|
||||
assert spec.url.startswith("http")
|
||||
# URL intentionally empty — ETH Research Collection gates downloads behind
|
||||
# a DSpace UI without a stable direct URL. Registry records SHA256 only.
|
||||
assert spec.url == ""
|
||||
assert len(spec.sha256) == 64
|
||||
assert spec.sha256 != "0" * 64 # real hash, not the placeholder
|
||||
|
||||
|
||||
def test_registry_has_vpair_sample():
|
||||
|
||||
Reference in New Issue
Block a user