parallel processing for routes and regions

This commit is contained in:
Anton Martynenko
2025-11-19 13:01:30 +01:00
parent 7f33567632
commit b66d3a0277
8 changed files with 331 additions and 133 deletions
@@ -28,18 +28,24 @@ public class GoogleMapsDownloaderV2
private readonly ILogger<GoogleMapsDownloaderV2> _logger;
private readonly string _apiKey;
private readonly StorageConfig _storageConfig;
private readonly ProcessingConfig _processingConfig;
private readonly IHttpClientFactory _httpClientFactory;
private readonly SemaphoreSlim _downloadSemaphore;
private static readonly System.Collections.Concurrent.ConcurrentDictionary<string, Task<DownloadedTileInfoV2>> _activeDownloads = new();
public GoogleMapsDownloaderV2(
ILogger<GoogleMapsDownloaderV2> logger,
IOptions<MapConfig> mapConfig,
IOptions<StorageConfig> storageConfig,
IOptions<ProcessingConfig> processingConfig,
IHttpClientFactory httpClientFactory)
{
_logger = logger;
_apiKey = mapConfig.Value.ApiKey;
_storageConfig = storageConfig.Value;
_processingConfig = processingConfig.Value;
_httpClientFactory = httpClientFactory;
_downloadSemaphore = new SemaphoreSlim(_processingConfig.MaxConcurrentDownloads, _processingConfig.MaxConcurrentDownloads);
}
private record SessionResponse(string Session);
@@ -247,15 +253,13 @@ public class GoogleMapsDownloaderV2
centerGeoPoint.Lat, centerGeoPoint.Lon, radiusM, zoomLevel);
_logger.LogInformation("Tile range: X=[{XMin}, {XMax}], Y=[{YMin}, {YMax}]", xMin, xMax, yMin, yMax);
var downloadedTiles = new List<DownloadedTileInfoV2>();
var tilesToDownload = new List<(int x, int y, GeoPoint center, double tileSizeMeters)>();
int skippedCount = 0;
for (var y = yMin; y <= yMax; y++)
{
for (var x = xMin; x <= xMax; x++)
{
token.ThrowIfCancellationRequested();
var tileCenter = GeoUtils.TileToWorldPos(x, y, zoomLevel);
var existingTile = existingTiles.FirstOrDefault(t =>
@@ -266,83 +270,190 @@ public class GoogleMapsDownloaderV2
if (existingTile != null)
{
skippedCount++;
_logger.LogInformation("Skipping tile ({X}, {Y}) - already exists at {FilePath}", x, y, existingTile.FilePath);
continue;
}
var tileSizeMeters = CalculateTileSizeInMeters(zoomLevel, tileCenter.Lat);
try
{
var sessionToken = await GetSessionToken();
var server = (x + y) % 4;
var url = string.Format(TILE_URL_TEMPLATE, server, x, y, zoomLevel, sessionToken);
var timestamp = DateTime.UtcNow.ToString("yyyyMMddHHmmss");
var subdirectory = _storageConfig.GetTileSubdirectoryPath(zoomLevel, x, y);
Directory.CreateDirectory(subdirectory);
var filePath = _storageConfig.GetTileFilePath(zoomLevel, x, y, timestamp);
var imageBytes = await ExecuteWithRetryAsync(async () =>
{
using var httpClient = _httpClientFactory.CreateClient();
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(USER_AGENT);
var response = await httpClient.GetAsync(url, token);
if (!response.IsSuccessStatusCode)
{
var errorBody = await response.Content.ReadAsStringAsync(token);
_logger.LogError("Tile download failed. Tile: ({X}, {Y}), Status: {StatusCode}, URL: {Url}, Response: {Response}",
x, y, response.StatusCode, url, errorBody);
}
response.EnsureSuccessStatusCode();
return await response.Content.ReadAsByteArrayAsync(token);
}, cancellationToken: token);
await File.WriteAllBytesAsync(filePath, imageBytes, token);
_logger.LogInformation("Downloaded tile ({X}, {Y}) to {FilePath}, center=({Lat:F6}, {Lon:F6}), size={Size:F2}m",
x, y, filePath, tileCenter.Lat, tileCenter.Lon, tileSizeMeters);
downloadedTiles.Add(new DownloadedTileInfoV2(
x, y, zoomLevel, tileCenter.Lat, tileCenter.Lon, filePath, tileSizeMeters));
}
catch (TaskCanceledException ex)
{
_logger.LogError(ex, "Tile download cancelled for ({X}, {Y}). This may be due to HttpClient timeout or explicit cancellation.", x, y);
throw;
}
catch (OperationCanceledException ex)
{
_logger.LogError(ex, "Tile download operation cancelled for ({X}, {Y})", x, y);
throw;
}
catch (RateLimitException ex)
{
_logger.LogError(ex, "Rate limit exceeded for tile ({X}, {Y}). Google Maps API is throttling requests. Consider reducing concurrent requests or adding delays.", x, y);
throw;
}
catch (HttpRequestException ex)
{
_logger.LogError(ex, "HTTP request failed for tile ({X}, {Y}). StatusCode: {StatusCode}, Message: {Message}",
x, y, ex.StatusCode, ex.Message);
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Unexpected error downloading tile ({X}, {Y}). Type: {ExceptionType}, Message: {Message}",
x, y, ex.GetType().Name, ex.Message);
throw;
}
tilesToDownload.Add((x, y, tileCenter, tileSizeMeters));
}
}
_logger.LogInformation("Downloaded {Count} new tiles, skipped {Skipped} existing tiles", downloadedTiles.Count, skippedCount);
_logger.LogInformation("Need to download {Count} tiles (skipped {Skipped} existing), using {MaxConcurrent} parallel downloads",
tilesToDownload.Count, skippedCount, _processingConfig.MaxConcurrentDownloads);
if (tilesToDownload.Count == 0)
{
_logger.LogInformation("All tiles already exist, returning empty list");
return new List<DownloadedTileInfoV2>();
}
_logger.LogInformation("Getting initial session token before starting {Count} downloads", tilesToDownload.Count);
var sessionToken = await GetSessionToken();
_logger.LogInformation("Session token obtained, starting parallel downloads");
var downloadTasks = new List<Task<DownloadedTileInfoV2?>>();
int sessionTokenUsageCount = 0;
for (int i = 0; i < tilesToDownload.Count; i++)
{
var tileInfo = tilesToDownload[i];
if (sessionTokenUsageCount >= _processingConfig.SessionTokenReuseCount)
{
_logger.LogInformation("Session token usage limit reached ({Count}), requesting new token", sessionTokenUsageCount);
sessionToken = await GetSessionToken();
sessionTokenUsageCount = 0;
_logger.LogInformation("New session token obtained, continuing downloads");
}
var currentToken = sessionToken;
var tileIndex = i;
sessionTokenUsageCount++;
var downloadTask = DownloadTileAsync(
tileInfo.x,
tileInfo.y,
tileInfo.center,
tileInfo.tileSizeMeters,
zoomLevel,
currentToken,
tileIndex,
tilesToDownload.Count,
token);
downloadTasks.Add(downloadTask);
}
_logger.LogInformation("All {Count} download tasks created, waiting for completion", downloadTasks.Count);
var results = await Task.WhenAll(downloadTasks);
_logger.LogInformation("Task.WhenAll completed, processing results");
var downloadedTiles = results.Where(r => r != null).Cast<DownloadedTileInfoV2>().ToList();
_logger.LogInformation("Parallel download completed: {Downloaded} tiles downloaded, {Skipped} skipped, {Failed} failed",
downloadedTiles.Count, skippedCount, tilesToDownload.Count - downloadedTiles.Count);
return downloadedTiles;
}
private async Task<DownloadedTileInfoV2?> DownloadTileAsync(
int x,
int y,
GeoPoint tileCenter,
double tileSizeMeters,
int zoomLevel,
string? sessionToken,
int tileIndex,
int totalTiles,
CancellationToken token)
{
var tileKey = $"{zoomLevel}_{x}_{y}";
var downloadTask = _activeDownloads.GetOrAdd(tileKey, _ => PerformDownloadAsync(
x, y, tileCenter, tileSizeMeters, zoomLevel, sessionToken, tileIndex, totalTiles, token));
try
{
return await downloadTask;
}
finally
{
_activeDownloads.TryRemove(tileKey, out _);
}
}
private async Task<DownloadedTileInfoV2> PerformDownloadAsync(
int x,
int y,
GeoPoint tileCenter,
double tileSizeMeters,
int zoomLevel,
string? sessionToken,
int tileIndex,
int totalTiles,
CancellationToken token)
{
_logger.LogDebug("Tile ({X},{Y}) [{Index}/{Total}]: Waiting for semaphore slot", x, y, tileIndex + 1, totalTiles);
await _downloadSemaphore.WaitAsync(token);
_logger.LogDebug("Tile ({X},{Y}) [{Index}/{Total}]: Acquired semaphore slot, starting download", x, y, tileIndex + 1, totalTiles);
try
{
if (_processingConfig.DelayBetweenRequestsMs > 0)
{
await Task.Delay(_processingConfig.DelayBetweenRequestsMs, token);
}
var server = (x + y) % 4;
var url = string.Format(TILE_URL_TEMPLATE, server, x, y, zoomLevel, sessionToken);
var timestamp = DateTime.UtcNow.ToString("yyyyMMddHHmmss");
var subdirectory = _storageConfig.GetTileSubdirectoryPath(zoomLevel, x, y);
Directory.CreateDirectory(subdirectory);
var filePath = _storageConfig.GetTileFilePath(zoomLevel, x, y, timestamp);
var imageBytes = await ExecuteWithRetryAsync(async () =>
{
using var httpClient = _httpClientFactory.CreateClient();
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(USER_AGENT);
var response = await httpClient.GetAsync(url, token);
if (!response.IsSuccessStatusCode)
{
var errorBody = await response.Content.ReadAsStringAsync(token);
_logger.LogError("Tile download failed. Tile: ({X}, {Y}), Status: {StatusCode}, Response: {Response}",
x, y, response.StatusCode, errorBody);
}
response.EnsureSuccessStatusCode();
return await response.Content.ReadAsByteArrayAsync(token);
}, cancellationToken: token);
await File.WriteAllBytesAsync(filePath, imageBytes, token);
if ((tileIndex + 1) % 10 == 0 || tileIndex == 0 || tileIndex == totalTiles - 1)
{
_logger.LogInformation("Progress: {Current}/{Total} tiles downloaded - tile ({X}, {Y})",
tileIndex + 1, totalTiles, x, y);
}
return new DownloadedTileInfoV2(
x, y, zoomLevel, tileCenter.Lat, tileCenter.Lon, filePath, tileSizeMeters);
}
catch (TaskCanceledException ex)
{
_logger.LogError(ex, "Tile download cancelled for ({X}, {Y})", x, y);
throw;
}
catch (OperationCanceledException ex)
{
_logger.LogError(ex, "Tile download operation cancelled for ({X}, {Y})", x, y);
throw;
}
catch (RateLimitException ex)
{
_logger.LogError(ex, "Rate limit exceeded for tile ({X}, {Y})", x, y);
throw;
}
catch (HttpRequestException ex)
{
_logger.LogError(ex, "HTTP request failed for tile ({X}, {Y}). StatusCode: {StatusCode}",
x, y, ex.StatusCode);
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Unexpected error downloading tile ({X}, {Y})", x, y);
throw;
}
finally
{
_logger.LogDebug("Tile ({X},{Y}) [{Index}/{Total}]: Releasing semaphore slot", x, y, tileIndex + 1, totalTiles);
_downloadSemaphore.Release();
}
}
}