"""Dataset downloader — URL registry + SHA256 verification.""" import hashlib from pathlib import Path from gps_denied.testing.download import ( DATASET_REGISTRY, DatasetSpec, download_dataset, verify_sha256, ) def test_registry_has_euroc_machine_hall(): assert "euroc_machine_hall" in DATASET_REGISTRY spec = DATASET_REGISTRY["euroc_machine_hall"] assert isinstance(spec, DatasetSpec) # URL intentionally empty — ETH Research Collection gates downloads behind # a DSpace UI without a stable direct URL. Registry records SHA256 only. assert spec.url == "" assert len(spec.sha256) == 64 assert spec.sha256 != "0" * 64 # real hash, not the placeholder def test_registry_has_vpair_sample(): assert "vpair_sample" in DATASET_REGISTRY spec = DATASET_REGISTRY["vpair_sample"] # URL intentionally empty — Zenodo form-gated download; registry records SHA256 only. assert spec.url == "" assert len(spec.sha256) == 64 assert spec.sha256 != "0" * 64 # real hash, not the placeholder def test_verify_sha256_matches(tmp_path: Path): data = b"hello world" f = tmp_path / "x.bin" f.write_bytes(data) expected = hashlib.sha256(data).hexdigest() assert verify_sha256(f, expected) is True def test_verify_sha256_mismatch(tmp_path: Path): f = tmp_path / "x.bin" f.write_bytes(b"hello world") assert verify_sha256(f, "0" * 64) is False def test_download_skip_if_present(tmp_path: Path, monkeypatch): f = tmp_path / "cached.zip" f.write_bytes(b"cached") spec = DatasetSpec( url="http://example.invalid/cached.zip", sha256=hashlib.sha256(b"cached").hexdigest(), target_subdir="cached", ) # Should return the path without hitting the network called = {"n": 0} def fake_get(*args, **kwargs): called["n"] += 1 raise AssertionError("download should have been skipped") monkeypatch.setattr("urllib.request.urlretrieve", fake_get) result = download_dataset(spec, f) assert result == f assert called["n"] == 0