mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-04-22 22:36:37 +00:00
b57187e1b8
The prior registry entry was speculative: ``euroc_mh01`` pointing at an
old ``robotics.ethz.ch`` URL that no longer resolves (TCP timeout).
The dataset moved to ETH Research Collection (DOI 10.3929/ethz-b-000690084)
as a single 12.6 GB ``machine_hall.zip`` bundle containing MH_01…MH_05.
There's no stable direct download URL — DSpace gates behind a UI —
so:
- Renamed entry: ``euroc_mh01`` → ``euroc_machine_hall`` (matches the
actual artifact).
- SHA256 set to the real bundle hash 5ed7d07…
- URL left empty (same pattern as ``vpair_sample``); the CLI now
exits 3 and prints fetch instructions for empty-URL entries instead
of crashing on ``urllib.request.urlretrieve("")``.
- Adapter ``DatasetNotAvailableError`` message and conftest skip-reason
updated to tell engineers how to fetch/unpack manually.
- ``test_registry_has_euroc_machine_hall`` pin test replaces the old
pin; asserts real hash (not the ``"0"*64`` placeholder).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
67 lines
2.0 KiB
Python
67 lines
2.0 KiB
Python
"""Dataset downloader — URL registry + SHA256 verification."""
|
|
|
|
import hashlib
|
|
from pathlib import Path
|
|
|
|
from gps_denied.testing.download import (
|
|
DATASET_REGISTRY,
|
|
DatasetSpec,
|
|
download_dataset,
|
|
verify_sha256,
|
|
)
|
|
|
|
|
|
def test_registry_has_euroc_machine_hall():
|
|
assert "euroc_machine_hall" in DATASET_REGISTRY
|
|
spec = DATASET_REGISTRY["euroc_machine_hall"]
|
|
assert isinstance(spec, DatasetSpec)
|
|
# URL intentionally empty — ETH Research Collection gates downloads behind
|
|
# a DSpace UI without a stable direct URL. Registry records SHA256 only.
|
|
assert spec.url == ""
|
|
assert len(spec.sha256) == 64
|
|
assert spec.sha256 != "0" * 64 # real hash, not the placeholder
|
|
|
|
|
|
def test_registry_has_vpair_sample():
|
|
assert "vpair_sample" in DATASET_REGISTRY
|
|
spec = DATASET_REGISTRY["vpair_sample"]
|
|
# URL intentionally empty — Zenodo form-gated download; registry records SHA256 only.
|
|
assert spec.url == ""
|
|
assert len(spec.sha256) == 64
|
|
assert spec.sha256 != "0" * 64 # real hash, not the placeholder
|
|
|
|
|
|
def test_verify_sha256_matches(tmp_path: Path):
|
|
data = b"hello world"
|
|
f = tmp_path / "x.bin"
|
|
f.write_bytes(data)
|
|
expected = hashlib.sha256(data).hexdigest()
|
|
assert verify_sha256(f, expected) is True
|
|
|
|
|
|
def test_verify_sha256_mismatch(tmp_path: Path):
|
|
f = tmp_path / "x.bin"
|
|
f.write_bytes(b"hello world")
|
|
assert verify_sha256(f, "0" * 64) is False
|
|
|
|
|
|
def test_download_skip_if_present(tmp_path: Path, monkeypatch):
|
|
f = tmp_path / "cached.zip"
|
|
f.write_bytes(b"cached")
|
|
spec = DatasetSpec(
|
|
url="http://example.invalid/cached.zip",
|
|
sha256=hashlib.sha256(b"cached").hexdigest(),
|
|
target_subdir="cached",
|
|
)
|
|
# Should return the path without hitting the network
|
|
called = {"n": 0}
|
|
|
|
def fake_get(*args, **kwargs):
|
|
called["n"] += 1
|
|
raise AssertionError("download should have been skipped")
|
|
|
|
monkeypatch.setattr("urllib.request.urlretrieve", fake_get)
|
|
result = download_dataset(spec, f)
|
|
assert result == f
|
|
assert called["n"] == 0
|