mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-04-23 08:56:38 +00:00
test(e2e): rename registry entry to euroc_machine_hall with real SHA256
The prior registry entry was speculative: ``euroc_mh01`` pointing at an
old ``robotics.ethz.ch`` URL that no longer resolves (TCP timeout).
The dataset moved to ETH Research Collection (DOI 10.3929/ethz-b-000690084)
as a single 12.6 GB ``machine_hall.zip`` bundle containing MH_01…MH_05.
There's no stable direct download URL — DSpace gates behind a UI —
so:
- Renamed entry: ``euroc_mh01`` → ``euroc_machine_hall`` (matches the
actual artifact).
- SHA256 set to the real bundle hash 5ed7d07…
- URL left empty (same pattern as ``vpair_sample``); the CLI now
exits 3 and prints fetch instructions for empty-URL entries instead
of crashing on ``urllib.request.urlretrieve("")``.
- Adapter ``DatasetNotAvailableError`` message and conftest skip-reason
updated to tell engineers how to fetch/unpack manually.
- ``test_registry_has_euroc_machine_hall`` pin test replaces the old
pin; asserts real hash (not the ``"0"*64`` placeholder).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,10 @@
|
|||||||
"""CLI: python scripts/download_dataset.py euroc_mh01"""
|
"""CLI: python scripts/download_dataset.py <dataset_name>
|
||||||
|
|
||||||
|
Registered datasets: see DATASET_REGISTRY in gps_denied.testing.download.
|
||||||
|
Entries with an empty URL are manual-download-only (e.g. EuRoC via ETH
|
||||||
|
Research Collection, VPAIR via a Zenodo form) — this script prints the
|
||||||
|
instructions and exits instead of guessing a download URL.
|
||||||
|
"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -14,8 +20,23 @@ def main(argv: list[str]) -> int:
|
|||||||
name = argv[1]
|
name = argv[1]
|
||||||
if name not in DATASET_REGISTRY:
|
if name not in DATASET_REGISTRY:
|
||||||
print(f"unknown dataset: {name}", file=sys.stderr)
|
print(f"unknown dataset: {name}", file=sys.stderr)
|
||||||
|
print(f"available: {', '.join(DATASET_REGISTRY)}", file=sys.stderr)
|
||||||
return 2
|
return 2
|
||||||
spec = DATASET_REGISTRY[name]
|
spec = DATASET_REGISTRY[name]
|
||||||
|
if not spec.url:
|
||||||
|
print(
|
||||||
|
f"Dataset '{name}' has no automated download URL.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"Fetch manually, then place the archive so its SHA256 matches:\n"
|
||||||
|
f" expected sha256: {spec.sha256}\n"
|
||||||
|
f" expected path subdir under datasets/: {spec.target_subdir}\n"
|
||||||
|
"See DATASET_REGISTRY comments in src/gps_denied/testing/download.py "
|
||||||
|
"for per-dataset instructions.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 3
|
||||||
root = Path("datasets") / spec.target_subdir
|
root = Path("datasets") / spec.target_subdir
|
||||||
dest = root / Path(spec.url).name
|
dest = root / Path(spec.url).name
|
||||||
print(f"→ {dest}")
|
print(f"→ {dest}")
|
||||||
|
|||||||
@@ -33,7 +33,9 @@ class EuRoCAdapter(DatasetAdapter):
|
|||||||
raise DatasetNotAvailableError(
|
raise DatasetNotAvailableError(
|
||||||
f"EuRoC sequence not found at {self._root} "
|
f"EuRoC sequence not found at {self._root} "
|
||||||
f"(expected {self._root}/mav0/). "
|
f"(expected {self._root}/mav0/). "
|
||||||
"Run `python scripts/download_dataset.py euroc_mh01` first."
|
"Fetch the Machine Hall bundle from ETH Research Collection "
|
||||||
|
"(DOI 10.3929/ethz-b-000690084), then unpack the inner "
|
||||||
|
"MH_0N_easy.zip of interest into this directory."
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -16,26 +16,23 @@ class DatasetSpec:
|
|||||||
unpack: bool = True
|
unpack: bool = True
|
||||||
|
|
||||||
|
|
||||||
# NOTE ON SHA256 VALUES BELOW:
|
# REGISTRY NOTES:
|
||||||
# The EuRoC MH_01 hash is a placeholder of 64 zeros. Before the first real
|
# Entries with url="" are manual-download-only. The ETH Research Collection
|
||||||
# download the engineer MUST:
|
# (DOI 10.3929/ethz-b-000690084) gates downloads behind a DSpace UI that
|
||||||
# 1. Manually fetch the zip:
|
# doesn't expose a stable direct URL, so the bundle has to be fetched by
|
||||||
# curl -L <url from spec> -o /tmp/MH_01_easy.zip
|
# hand and placed beside the project. SHA256 remains authoritative: the
|
||||||
# 2. Compute its hash:
|
# download helper refuses anything that doesn't match.
|
||||||
# sha256sum /tmp/MH_01_easy.zip
|
|
||||||
# 3. Replace the placeholder here with the real hex string.
|
|
||||||
# 4. Commit the updated hash alongside the first real test run.
|
|
||||||
# Leaving a length-valid (64-char) placeholder keeps the registry well-formed
|
|
||||||
# and lets the download function refuse to keep any file that doesn't match,
|
|
||||||
# so the placeholder state fails loudly rather than silently accepting.
|
|
||||||
DATASET_REGISTRY: dict[str, DatasetSpec] = {
|
DATASET_REGISTRY: dict[str, DatasetSpec] = {
|
||||||
"euroc_mh01": DatasetSpec(
|
"euroc_machine_hall": DatasetSpec(
|
||||||
url=(
|
# 12.6 GB bundle containing MH_01 … MH_05 (each as inner .zip + .bag).
|
||||||
"http://robotics.ethz.ch/~asl-datasets/ijrr_euroc_mav_dataset"
|
# Download from ETH Research Collection:
|
||||||
"/machine_hall/MH_01_easy/MH_01_easy.zip"
|
# https://doi.org/10.3929/ethz-b-000690084
|
||||||
),
|
# After fetching, unpack the inner MH_0N_easy.zip of interest into
|
||||||
sha256="0" * 64, # placeholder — see note above
|
# datasets/euroc/MH_0N/ so the adapter finds mav0/.
|
||||||
target_subdir="euroc/MH_01",
|
url="", # manual download — see comment above
|
||||||
|
sha256="5ed7d07903f8d19b6c8808e2ae8a0872b281f6e34ef5497023b8ac58c3de0f6f",
|
||||||
|
target_subdir="euroc",
|
||||||
|
unpack=False, # the bundle itself is not unpacked end-to-end; see README
|
||||||
),
|
),
|
||||||
"vpair_sample": DatasetSpec(
|
"vpair_sample": DatasetSpec(
|
||||||
url="", # manual download only — see Zenodo link on
|
url="", # manual download only — see Zenodo link on
|
||||||
|
|||||||
@@ -14,7 +14,9 @@ def euroc_mh01_root() -> Path:
|
|||||||
if not (root / "mav0").is_dir():
|
if not (root / "mav0").is_dir():
|
||||||
pytest.skip(
|
pytest.skip(
|
||||||
f"EuRoC MH_01 not present at {root}. "
|
f"EuRoC MH_01 not present at {root}. "
|
||||||
"Run `python scripts/download_dataset.py euroc_mh01` to fetch it."
|
"Fetch the Machine Hall bundle from ETH Research Collection "
|
||||||
|
"(DOI 10.3929/ethz-b-000690084), unpack the inner MH_01_easy.zip "
|
||||||
|
f"into {root}/ so that {root}/mav0/ exists."
|
||||||
)
|
)
|
||||||
return root
|
return root
|
||||||
|
|
||||||
|
|||||||
@@ -11,12 +11,15 @@ from gps_denied.testing.download import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_registry_has_euroc():
|
def test_registry_has_euroc_machine_hall():
|
||||||
assert "euroc_mh01" in DATASET_REGISTRY
|
assert "euroc_machine_hall" in DATASET_REGISTRY
|
||||||
spec = DATASET_REGISTRY["euroc_mh01"]
|
spec = DATASET_REGISTRY["euroc_machine_hall"]
|
||||||
assert isinstance(spec, DatasetSpec)
|
assert isinstance(spec, DatasetSpec)
|
||||||
assert spec.url.startswith("http")
|
# URL intentionally empty — ETH Research Collection gates downloads behind
|
||||||
|
# a DSpace UI without a stable direct URL. Registry records SHA256 only.
|
||||||
|
assert spec.url == ""
|
||||||
assert len(spec.sha256) == 64
|
assert len(spec.sha256) == 64
|
||||||
|
assert spec.sha256 != "0" * 64 # real hash, not the placeholder
|
||||||
|
|
||||||
|
|
||||||
def test_registry_has_vpair_sample():
|
def test_registry_has_vpair_sample():
|
||||||
|
|||||||
Reference in New Issue
Block a user