From ad782bcbaa285b7960968075e0a9584f91ffe654 Mon Sep 17 00:00:00 2001 From: Oleksandr Bezdieniezhnykh Date: Tue, 12 Aug 2025 14:48:56 +0300 Subject: [PATCH] splitting python complete --- Azaion.Annotator/Annotator.xaml.cs | 182 ++++++------ Azaion.Annotator/AnnotatorEventHandler.cs | 63 +++-- Azaion.Common/Azaion.Common.csproj | 1 + Azaion.Common/Constants.cs | 88 +++--- Azaion.Common/Controls/CanvasEditor.cs | 41 ++- Azaion.Common/Controls/DetectionControl.cs | 4 +- Azaion.Common/DTO/AnnotationResult.cs | 48 ++-- Azaion.Common/DTO/FormState.cs | 6 +- Azaion.Common/DTO/Label.cs | 58 ++-- Azaion.Common/Database/Annotation.cs | 67 ++++- Azaion.Common/Database/DbFactory.cs | 3 +- Azaion.Common/Services/AnnotationService.cs | 20 +- Azaion.Common/Services/GalleryService.cs | 14 +- Azaion.Common/Services/InferenceClient.cs | 2 +- Azaion.Common/Services/TileProcessor.cs | 41 ++- Azaion.Dataset/DatasetExplorer.xaml | 23 +- Azaion.Dataset/DatasetExplorer.xaml.cs | 12 +- Azaion.Dataset/DatasetExplorerEventHandler.cs | 2 +- Azaion.Inference/annotation.pxd | 1 - Azaion.Inference/annotation.pyx | 30 +- Azaion.Inference/constants_inf.pxd | 6 +- Azaion.Inference/constants_inf.pyx | 16 +- Azaion.Inference/inference.pxd | 15 +- Azaion.Inference/inference.pyx | 129 ++++++--- Azaion.Inference/setup.py | 16 +- Azaion.Inference/test/test_inference.py | 30 +- Azaion.Loader/hardware_service.pyx | 8 +- Azaion.Suite/App.xaml.cs | 4 +- Azaion.Suite/config.json | 8 +- Azaion.Suite/config.system.json | 2 +- Azaion.Test/TileProcessorTest.cs | 263 ++++++++++++++++++ 31 files changed, 834 insertions(+), 369 deletions(-) create mode 100644 Azaion.Test/TileProcessorTest.cs diff --git a/Azaion.Annotator/Annotator.xaml.cs b/Azaion.Annotator/Annotator.xaml.cs index 13bfb44..9feeba4 100644 --- a/Azaion.Annotator/Annotator.xaml.cs +++ b/Azaion.Annotator/Annotator.xaml.cs @@ -29,7 +29,7 @@ namespace Azaion.Annotator; public partial class Annotator { private readonly AppConfig _appConfig; - private readonly LibVLC _libVLC; + private readonly LibVLC _libVlc; private readonly MediaPlayer _mediaPlayer; private readonly IMediator _mediator; private readonly FormState _formState; @@ -42,17 +42,17 @@ public partial class Annotator private readonly IInferenceClient _inferenceClient; private bool _suspendLayout; - private bool _gpsPanelVisible = false; + private bool _gpsPanelVisible; - public readonly CancellationTokenSource MainCancellationSource = new(); + private readonly CancellationTokenSource _mainCancellationSource = new(); public CancellationTokenSource DetectionCancellationSource = new(); - public bool IsInferenceNow = false; + private bool _isInferenceNow; private readonly TimeSpan _thresholdBefore = TimeSpan.FromMilliseconds(50); private readonly TimeSpan _thresholdAfter = TimeSpan.FromMilliseconds(150); public ObservableCollection AllMediaFiles { get; set; } = new(); - public ObservableCollection FilteredMediaFiles { get; set; } = new(); + private ObservableCollection FilteredMediaFiles { get; set; } = new(); public Dictionary MediaFilesDict = new(); public IntervalTree TimedAnnotations { get; set; } = new(); @@ -61,7 +61,7 @@ public partial class Annotator public Annotator( IConfigUpdater configUpdater, IOptions appConfig, - LibVLC libVLC, + LibVLC libVlc, MediaPlayer mediaPlayer, IMediator mediator, FormState formState, @@ -78,7 +78,7 @@ public partial class Annotator Title = MainTitle; _appConfig = appConfig.Value; _configUpdater = configUpdater; - _libVLC = libVLC; + _libVlc = libVlc; _mediaPlayer = mediaPlayer; _mediator = mediator; _formState = formState; @@ -91,7 +91,7 @@ public partial class Annotator Loaded += OnLoaded; Closed += OnFormClosed; Activated += (_, _) => _formState.ActiveWindow = WindowEnum.Annotator; - TbFolder.TextChanged += async (sender, args) => + TbFolder.TextChanged += async (_, _) => { if (!Path.Exists(TbFolder.Text)) return; @@ -179,22 +179,8 @@ public partial class Annotator VideoView.MediaPlayer = _mediaPlayer; //On start playing media - _mediaPlayer.Playing += async (sender, args) => + _mediaPlayer.Playing += (_, _) => { - if (_formState.CurrentMrl == _mediaPlayer.Media?.Mrl) - return; //already loaded all the info - - await Dispatcher.Invoke(async () => await ReloadAnnotations()); - - //show image - if (_formState.CurrentMedia?.MediaType == MediaTypes.Image) - { - await Task.Delay(100); //wait to load the frame and set on pause - ShowTimeAnnotations(TimeSpan.FromMilliseconds(_mediaPlayer.Time), showImage: true); - return; - } - - _formState.CurrentMrl = _mediaPlayer.Media?.Mrl ?? ""; uint vw = 0, vh = 0; _mediaPlayer.Size(0, ref vw, ref vh); _formState.CurrentMediaSize = new Size(vw, vh); @@ -211,12 +197,12 @@ public partial class Annotator var selectedClass = args.DetectionClass; Editor.CurrentAnnClass = selectedClass; _mediator.Publish(new AnnClassSelectedEvent(selectedClass)); - }; + }; - _mediaPlayer.PositionChanged += (o, args) => + _mediaPlayer.PositionChanged += (_, _) => ShowTimeAnnotations(TimeSpan.FromMilliseconds(_mediaPlayer.Time)); - VideoSlider.ValueChanged += (value, newValue) => + VideoSlider.ValueChanged += (_, newValue) => _mediaPlayer.Position = (float)(newValue / VideoSlider.Maximum); VideoSlider.KeyDown += (sender, args) => @@ -227,51 +213,49 @@ public partial class Annotator DgAnnotations.MouseDoubleClick += (sender, args) => { - var dgRow = ItemsControl.ContainerFromElement((DataGrid)sender, (args.OriginalSource as DependencyObject)!) as DataGridRow; - if (dgRow != null) - OpenAnnotationResult((AnnotationResult)dgRow!.Item); + if (ItemsControl.ContainerFromElement((DataGrid)sender, (args.OriginalSource as DependencyObject)!) is DataGridRow dgRow) + OpenAnnotationResult((Annotation)dgRow.Item); }; - DgAnnotations.KeyUp += async (sender, args) => + DgAnnotations.KeyUp += async (_, args) => { switch (args.Key) { - case Key.Up: case Key.Down: //cursor is already moved by system behaviour - OpenAnnotationResult((AnnotationResult)DgAnnotations.SelectedItem); + OpenAnnotationResult((Annotation)DgAnnotations.SelectedItem); break; case Key.Delete: var result = MessageBox.Show("Чи дійсно видалити аннотації?","Підтвердження видалення", MessageBoxButton.OKCancel, MessageBoxImage.Question); if (result != MessageBoxResult.OK) return; - var res = DgAnnotations.SelectedItems.Cast().ToList(); - var annotationNames = res.Select(x => x.Annotation.Name).ToList(); + var res = DgAnnotations.SelectedItems.Cast().ToList(); + var annotationNames = res.Select(x => x.Name).ToList(); await _mediator.Publish(new AnnotationsDeletedEvent(annotationNames)); break; } }; - - Editor.Mediator = _mediator; DgAnnotations.ItemsSource = _formState.AnnotationResults; } - public void OpenAnnotationResult(AnnotationResult res) + private void OpenAnnotationResult(Annotation ann) { _mediaPlayer.SetPause(true); - Editor.RemoveAllAnns(); - _mediaPlayer.Time = (long)res.Annotation.Time.TotalMilliseconds; + if (!ann.IsSplit) + Editor.RemoveAllAnns(); + + _mediaPlayer.Time = (long)ann.Time.TotalMilliseconds; Dispatcher.Invoke(() => { VideoSlider.Value = _mediaPlayer.Position * VideoSlider.Maximum; StatusClock.Text = $"{TimeSpan.FromMilliseconds(_mediaPlayer.Time):mm\\:ss} / {_formState.CurrentVideoLength:mm\\:ss}"; - Editor.ClearExpiredAnnotations(res.Annotation.Time); + Editor.ClearExpiredAnnotations(ann.Time); }); - ShowAnnotation(res.Annotation, showImage: true); + ShowAnnotation(ann, showImage: true, openResult: true); } private void SaveUserSettings() { @@ -284,7 +268,7 @@ public partial class Annotator _configUpdater.Save(_appConfig); } - private void ShowTimeAnnotations(TimeSpan time, bool showImage = false) + public void ShowTimeAnnotations(TimeSpan time, bool showImage = false) { Dispatcher.Invoke(() => { @@ -292,60 +276,68 @@ public partial class Annotator StatusClock.Text = $"{TimeSpan.FromMilliseconds(_mediaPlayer.Time):mm\\:ss} / {_formState.CurrentVideoLength:mm\\:ss}"; Editor.ClearExpiredAnnotations(time); }); - var annotation = TimedAnnotations.Query(time).FirstOrDefault(); - if (annotation != null) ShowAnnotation(annotation, showImage); + var annotations = TimedAnnotations.Query(time).ToList(); + if (!annotations.Any()) + return; + foreach (var ann in annotations) + ShowAnnotation(ann, showImage); } - private void ShowAnnotation(Annotation annotation, bool showImage = false) + private void ShowAnnotation(Annotation annotation, bool showImage = false, bool openResult = false) { Dispatcher.Invoke(async () => { - if (showImage) + if (showImage && !annotation.IsSplit && File.Exists(annotation.ImagePath)) { - if (File.Exists(annotation.ImagePath)) - { - Editor.SetBackground(await annotation.ImagePath.OpenImage()); - _formState.BackgroundTime = annotation.Time; - } + Editor.SetBackground(await annotation.ImagePath.OpenImage()); + _formState.BackgroundTime = annotation.Time; } - Editor.CreateDetections(annotation.Time, annotation.Detections, _appConfig.AnnotationConfig.DetectionClasses, _formState.CurrentMediaSize); + + if (annotation.SplitTile != null && openResult) + { + var canvasTileLocation = new CanvasLabel(new YoloLabel(annotation.SplitTile, _formState.CurrentMediaSize), + RenderSize); + Editor.ZoomTo(new Point(canvasTileLocation.CenterX, canvasTileLocation.CenterY)); + } + else + Editor.CreateDetections(annotation, _appConfig.AnnotationConfig.DetectionClasses, _formState.CurrentMediaSize); }); } - private async Task ReloadAnnotations() + public async Task ReloadAnnotations() { - _formState.AnnotationResults.Clear(); - TimedAnnotations.Clear(); - Editor.RemoveAllAnns(); - - var annotations = await _dbFactory.Run(async db => - await db.Annotations.LoadWith(x => x.Detections) - .Where(x => x.OriginalMediaName == _formState.MediaName) - .OrderBy(x => x.Time) - .ToListAsync(token: MainCancellationSource.Token)); - - TimedAnnotations.Clear(); - _formState.AnnotationResults.Clear(); - foreach (var ann in annotations) + await Dispatcher.InvokeAsync(async () => { - TimedAnnotations.Add(ann.Time.Subtract(_thresholdBefore), ann.Time.Add(_thresholdAfter), ann); - _formState.AnnotationResults.Add(new AnnotationResult(_appConfig.AnnotationConfig.DetectionClassesDict, ann)); - } + _formState.AnnotationResults.Clear(); + TimedAnnotations.Clear(); + Editor.RemoveAllAnns(); + + var annotations = await _dbFactory.Run(async db => + await db.Annotations.LoadWith(x => x.Detections) + .Where(x => x.OriginalMediaName == _formState.MediaName) + .OrderBy(x => x.Time) + .ToListAsync(token: _mainCancellationSource.Token)); + + TimedAnnotations.Clear(); + _formState.AnnotationResults.Clear(); + foreach (var ann in annotations) + { + // Duplicate for speed + TimedAnnotations.Add(ann.Time.Subtract(_thresholdBefore), ann.Time.Add(_thresholdAfter), ann); + _formState.AnnotationResults.Add(ann); + } + }); } //Add manually public void AddAnnotation(Annotation annotation) { - var mediaInfo = (MediaFileInfo)LvFiles.SelectedItem; - if ((mediaInfo?.FName ?? "") != annotation.OriginalMediaName) - return; - var time = annotation.Time; var previousAnnotations = TimedAnnotations.Query(time); TimedAnnotations.Remove(previousAnnotations); TimedAnnotations.Add(time.Subtract(_thresholdBefore), time.Add(_thresholdAfter), annotation); - var existingResult = _formState.AnnotationResults.FirstOrDefault(x => x.Annotation.Time == time); + var existingResult = _formState.AnnotationResults.FirstOrDefault(x => x.Time == time); if (existingResult != null) { try @@ -360,16 +352,14 @@ public partial class Annotator } var dict = _formState.AnnotationResults - .Select((x, i) => new { x.Annotation.Time, Index = i }) + .Select((x, i) => new { x.Time, Index = i }) .ToDictionary(x => x.Time, x => x.Index); var index = dict.Where(x => x.Key < time) .OrderBy(x => time - x.Key) .Select(x => x.Value + 1) .FirstOrDefault(); - - var annRes = new AnnotationResult(_appConfig.AnnotationConfig.DetectionClassesDict, annotation); - _formState.AnnotationResults.Insert(index, annRes); + _formState.AnnotationResults.Insert(index, annotation); } private async Task ReloadFiles() @@ -380,7 +370,7 @@ public partial class Annotator var videoFiles = dir.GetFiles(_appConfig.AnnotationConfig.VideoFormats.ToArray()).Select(x => { - using var media = new Media(_libVLC, x.FullName); + var media = new Media(_libVlc, x.FullName); media.Parse(); var fInfo = new MediaFileInfo { @@ -403,14 +393,16 @@ public partial class Annotator var allFileNames = allFiles.Select(x => x.FName).ToList(); - var labelsDict = await _dbFactory.Run(async db => await db.Annotations - .GroupBy(x => x.Name.Substring(0, x.Name.Length - 7)) + var labelsDict = await _dbFactory.Run(async db => + await db.Annotations + .GroupBy(x => x.OriginalMediaName) .Where(x => allFileNames.Contains(x.Key)) - .ToDictionaryAsync(x => x.Key, x => x.Key)); - + .Select(x => x.Key) + .ToDictionaryAsync(x => x, x => x)); + foreach (var mediaFile in allFiles) mediaFile.HasAnnotations = labelsDict.ContainsKey(mediaFile.FName); - + AllMediaFiles = new ObservableCollection(allFiles); MediaFilesDict = AllMediaFiles.GroupBy(x => x.Name) .ToDictionary(gr => gr.Key, gr => gr.First()); @@ -420,13 +412,13 @@ public partial class Annotator private void OnFormClosed(object? sender, EventArgs e) { - MainCancellationSource.Cancel(); + _mainCancellationSource.Cancel(); _inferenceService.StopInference(); DetectionCancellationSource.Cancel(); _mediaPlayer.Stop(); _mediaPlayer.Dispose(); - _libVLC.Dispose(); + _libVlc.Dispose(); } private void OpenContainingFolder(object sender, RoutedEventArgs e) @@ -447,13 +439,10 @@ public partial class Annotator StatusClock.Text = $"{TimeSpan.FromMilliseconds(_mediaPlayer.Time):mm\\:ss} / {_formState.CurrentVideoLength:mm\\:ss}"; } - private void SeekTo(TimeSpan time) => - SeekTo((long)time.TotalMilliseconds); + private void OpenFolderItemClick(object sender, RoutedEventArgs e) => OpenFolder(); + private void OpenFolderButtonClick(object sender, RoutedEventArgs e) => OpenFolder(); - private async void OpenFolderItemClick(object sender, RoutedEventArgs e) => await OpenFolder(); - private async void OpenFolderButtonClick(object sender, RoutedEventArgs e) => await OpenFolder(); - - private async Task OpenFolder() + private void OpenFolder() { var dlg = new CommonOpenFileDialog { @@ -468,7 +457,6 @@ public partial class Annotator _appConfig.DirectoriesConfig.VideosDirectory = dlg.FileName; TbFolder.Text = dlg.FileName; - await Task.CompletedTask; } private void TbFilter_OnTextChanged(object sender, TextChangedEventArgs e) @@ -525,7 +513,7 @@ public partial class Annotator public async Task AutoDetect() { - if (IsInferenceNow) + if (_isInferenceNow) return; if (LvFiles.Items.IsEmpty) @@ -535,7 +523,7 @@ public partial class Annotator Dispatcher.Invoke(() => Editor.SetBackground(null)); - IsInferenceNow = true; + _isInferenceNow = true; AIDetectBtn.IsEnabled = false; DetectionCancellationSource = new CancellationTokenSource(); @@ -550,7 +538,7 @@ public partial class Annotator await _inferenceService.RunInference(files, DetectionCancellationSource.Token); LvFiles.Items.Refresh(); - IsInferenceNow = false; + _isInferenceNow = false; StatusHelp.Text = "Розпізнавання зваершено"; AIDetectBtn.IsEnabled = true; } @@ -596,7 +584,7 @@ public class GradientStyleSelector : StyleSelector { public override Style? SelectStyle(object item, DependencyObject container) { - if (container is not DataGridRow row || row.DataContext is not AnnotationResult result) + if (container is not DataGridRow row || row.DataContext is not Annotation result) return null; var style = new Style(typeof(DataGridRow)); diff --git a/Azaion.Annotator/AnnotatorEventHandler.cs b/Azaion.Annotator/AnnotatorEventHandler.cs index 0607530..ee52a7e 100644 --- a/Azaion.Annotator/AnnotatorEventHandler.cs +++ b/Azaion.Annotator/AnnotatorEventHandler.cs @@ -23,7 +23,7 @@ using MediaPlayer = LibVLCSharp.Shared.MediaPlayer; namespace Azaion.Annotator; public class AnnotatorEventHandler( - LibVLC libVLC, + LibVLC libVlc, MediaPlayer mediaPlayer, Annotator mainWindow, FormState formState, @@ -47,8 +47,7 @@ public class AnnotatorEventHandler( { private const int STEP = 20; private const int LARGE_STEP = 5000; - private const int RESULT_WIDTH = 1280; - private readonly string tempImgPath = Path.Combine(dirConfig.Value.ImagesDirectory, "___temp___.jpg"); + private readonly string _tempImgPath = Path.Combine(dirConfig.Value.ImagesDirectory, "___temp___.jpg"); private readonly Dictionary _keysControlEnumDict = new() { @@ -144,8 +143,8 @@ public class AnnotatorEventHandler( if (mediaPlayer.IsPlaying) { mediaPlayer.Pause(); - mediaPlayer.TakeSnapshot(0, tempImgPath, 0, 0); - mainWindow.Editor.SetBackground(await tempImgPath.OpenImage()); + mediaPlayer.TakeSnapshot(0, _tempImgPath, 0, 0); + mainWindow.Editor.SetBackground(await _tempImgPath.OpenImage()); formState.BackgroundTime = TimeSpan.FromMilliseconds(mediaPlayer.Time); } else @@ -238,16 +237,21 @@ public class AnnotatorEventHandler( return; var mediaInfo = (MediaFileInfo)mainWindow.LvFiles.SelectedItem; + if (formState.CurrentMedia == mediaInfo) + return; //already loaded + formState.CurrentMedia = mediaInfo; mainWindow.Title = $"{mainWindow.MainTitle} - {mediaInfo.Name}"; - + + await mainWindow.ReloadAnnotations(); + if (mediaInfo.MediaType == MediaTypes.Video) { mainWindow.Editor.SetBackground(null); //need to wait a bit for correct VLC playback event handling await Task.Delay(100, ct); mediaPlayer.Stop(); - mediaPlayer.Play(new Media(libVLC, mediaInfo.Path)); + mediaPlayer.Play(new Media(libVlc, mediaInfo.Path)); } else { @@ -256,6 +260,7 @@ public class AnnotatorEventHandler( formState.CurrentMediaSize = new Size(image.PixelWidth, image.PixelHeight); mainWindow.Editor.SetBackground(image); mediaPlayer.Stop(); + mainWindow.ShowTimeAnnotations(TimeSpan.Zero, showImage: true); } } @@ -282,13 +287,14 @@ public class AnnotatorEventHandler( // var annGrid = mainWindow.DgAnnotations; // annGrid.SelectedIndex = Math.Min(annGrid.Items.Count, annGrid.SelectedIndex + 1); // mainWindow.OpenAnnotationResult((AnnotationResult)annGrid.SelectedItem); + + mainWindow.Editor.SetBackground(null); + formState.BackgroundTime = null; } else { await NextMedia(ct: cancellationToken); } - mainWindow.Editor.SetBackground(null); - formState.BackgroundTime = null; mainWindow.LvFiles.Items.Refresh(); mainWindow.Editor.RemoveAllAnns(); @@ -301,7 +307,7 @@ public class AnnotatorEventHandler( if (!File.Exists(imgPath)) { var source = (mainWindow.Editor.BackgroundImage.Source as BitmapSource)!; - if (source.PixelWidth <= RESULT_WIDTH * 2 && source.PixelHeight <= RESULT_WIDTH * 2) // Allow to be up to 2560*2560 to save to 1280*1280 + if (source.PixelWidth <= Constants.AI_TILE_SIZE * 2 && source.PixelHeight <= Constants.AI_TILE_SIZE * 2) // Allow to be up to 2560*2560 to save to 1280*1280 { //Save image await using var stream = new FileStream(imgPath, FileMode.Create); @@ -314,28 +320,28 @@ public class AnnotatorEventHandler( { //Tiling - //1. Restore original picture coordinates - var pictureCoordinatesDetections = canvasDetections.Select(x => new CanvasLabel( + //1. Convert from RenderSize to CurrentMediaSize + var detectionCoords = canvasDetections.Select(x => new CanvasLabel( new YoloLabel(x, mainWindow.Editor.RenderSize, formState.CurrentMediaSize), formState.CurrentMediaSize, null, x.Confidence)) .ToList(); - //2. Split to 1280*1280 frames - var results = TileProcessor.Split(formState.CurrentMediaSize, pictureCoordinatesDetections, cancellationToken); + //2. Split to frames + var results = TileProcessor.Split(formState.CurrentMediaSize, detectionCoords, cancellationToken); //3. Save each frame as a separate annotation - BitmapEncoder tileEncoder = new JpegBitmapEncoder(); foreach (var res in results) { - var mediaName = $"{formState.MediaName}!split!{res.Tile.X}_{res.Tile.Y}!"; var time = TimeSpan.Zero; - var annotationName = mediaName.ToTimeName(time); + var annotationName = $"{formState.MediaName}{Constants.SPLIT_SUFFIX}{res.Tile.Left:0000}_{res.Tile.Top:0000}!".ToTimeName(time); var tileImgPath = Path.Combine(dirConfig.Value.ImagesDirectory, $"{annotationName}{Constants.JPG_EXT}"); await using var tileStream = new FileStream(tileImgPath, FileMode.Create); - var bitmap = new CroppedBitmap(source, new Int32Rect((int)res.Tile.X, (int)res.Tile.Y, (int)res.Tile.Width, (int)res.Tile.Height)); - tileEncoder.Frames.Add(BitmapFrame.Create(bitmap)); + var bitmap = new CroppedBitmap(source, new Int32Rect((int)res.Tile.Left, (int)res.Tile.Top, (int)res.Tile.Width, (int)res.Tile.Height)); + + var tileEncoder = new JpegBitmapEncoder { Frames = [BitmapFrame.Create(bitmap)] }; tileEncoder.Save(tileStream); await tileStream.FlushAsync(cancellationToken); + tileStream.Close(); var frameSize = new Size(res.Tile.Width, res.Tile.Height); var detections = res.Detections @@ -343,18 +349,18 @@ public class AnnotatorEventHandler( .Select(x => new Detection(annotationName, new YoloLabel(x, frameSize))) .ToList(); - annotationsResult.Add(await annotationService.SaveAnnotation(mediaName, time, detections, token: cancellationToken)); + annotationsResult.Add(await annotationService.SaveAnnotation(formState.MediaName, annotationName, time, detections, token: cancellationToken)); } return annotationsResult; } } var timeImg = formState.BackgroundTime ?? TimeSpan.FromMilliseconds(mediaPlayer.Time); - var timeName = formState.MediaName.ToTimeName(timeImg); + var annName = formState.MediaName.ToTimeName(timeImg); var currentDetections = canvasDetections.Select(x => - new Detection(timeName, new YoloLabel(x, mainWindow.Editor.RenderSize))) + new Detection(annName, new YoloLabel(x, mainWindow.Editor.RenderSize))) .ToList(); - var annotation = await annotationService.SaveAnnotation(formState.MediaName, timeImg, currentDetections, token: cancellationToken); + var annotation = await annotationService.SaveAnnotation(formState.MediaName, annName, timeImg, currentDetections, token: cancellationToken); return [annotation]; } @@ -367,15 +373,15 @@ public class AnnotatorEventHandler( var namesSet = notification.AnnotationNames.ToHashSet(); var remainAnnotations = formState.AnnotationResults - .Where(x => !namesSet.Contains(x.Annotation?.Name ?? "")).ToList(); + .Where(x => !namesSet.Contains(x.Name)).ToList(); formState.AnnotationResults.Clear(); foreach (var ann in remainAnnotations) formState.AnnotationResults.Add(ann); - var timedAnnsToRemove = mainWindow.TimedAnnotations + var timedAnnotationsToRemove = mainWindow.TimedAnnotations .Where(x => namesSet.Contains(x.Value.Name)) .Select(x => x.Value).ToList(); - mainWindow.TimedAnnotations.Remove(timedAnnsToRemove); + mainWindow.TimedAnnotations.Remove(timedAnnotationsToRemove); if (formState.AnnotationResults.Count == 0) { @@ -420,7 +426,10 @@ public class AnnotatorEventHandler( { mainWindow.Dispatcher.Invoke(() => { - mainWindow.AddAnnotation(e.Annotation); + + var mediaInfo = (MediaFileInfo)mainWindow.LvFiles.SelectedItem; + if ((mediaInfo?.FName ?? "") == e.Annotation.OriginalMediaName) + mainWindow.AddAnnotation(e.Annotation); var log = string.Join(Environment.NewLine, e.Annotation.Detections.Select(det => $"Розпізнавання {e.Annotation.OriginalMediaName}: {annotationConfig.Value.DetectionClassesDict[det.ClassNumber].ShortName}: " + diff --git a/Azaion.Common/Azaion.Common.csproj b/Azaion.Common/Azaion.Common.csproj index ca47d48..3f1339f 100644 --- a/Azaion.Common/Azaion.Common.csproj +++ b/Azaion.Common/Azaion.Common.csproj @@ -4,6 +4,7 @@ enable enable true + 12 diff --git a/Azaion.Common/Constants.cs b/Azaion.Common/Constants.cs index b92b8aa..fe26b29 100644 --- a/Azaion.Common/Constants.cs +++ b/Azaion.Common/Constants.cs @@ -9,13 +9,15 @@ using System.Windows; namespace Azaion.Common; -public class Constants +public static class Constants { public const string CONFIG_PATH = "config.json"; public const string LOADER_CONFIG_PATH = "loaderconfig.json"; public const string DEFAULT_API_URL = "https://api.azaion.com"; public const string AZAION_SUITE_EXE = "Azaion.Suite.exe"; + public const int AI_TILE_SIZE = 1280; + #region ExternalClientsConfig private const string DEFAULT_ZMQ_LOADER_HOST = "127.0.0.1"; @@ -27,11 +29,11 @@ public class Constants public static readonly string ExternalGpsDeniedPath = Path.Combine(EXTERNAL_GPS_DENIED_FOLDER, "image-matcher.exe"); public const string DEFAULT_ZMQ_INFERENCE_HOST = "127.0.0.1"; - public const int DEFAULT_ZMQ_INFERENCE_PORT = 5227; + private const int DEFAULT_ZMQ_INFERENCE_PORT = 5227; - public const string DEFAULT_ZMQ_GPS_DENIED_HOST = "127.0.0.1"; - public const int DEFAULT_ZMQ_GPS_DENIED_PORT = 5255; - public const int DEFAULT_ZMQ_GPS_DENIED_PUBLISH_PORT = 5256; + private const string DEFAULT_ZMQ_GPS_DENIED_HOST = "127.0.0.1"; + private const int DEFAULT_ZMQ_GPS_DENIED_PORT = 5255; + private const int DEFAULT_ZMQ_GPS_DENIED_PUBLISH_PORT = 5256; #endregion ExternalClientsConfig @@ -42,41 +44,33 @@ public class Constants # endregion - public const string JPG_EXT = ".jpg"; + public const string JPG_EXT = ".jpg"; public const string TXT_EXT = ".txt"; #region DirectoriesConfig - public const string DEFAULT_VIDEO_DIR = "video"; - public const string DEFAULT_LABELS_DIR = "labels"; - public const string DEFAULT_IMAGES_DIR = "images"; - public const string DEFAULT_RESULTS_DIR = "results"; - public const string DEFAULT_THUMBNAILS_DIR = "thumbnails"; - public const string DEFAULT_GPS_SAT_DIRECTORY = "satellitesDir"; - public const string DEFAULT_GPS_ROUTE_DIRECTORY = "routeDir"; + private const string DEFAULT_VIDEO_DIR = "video"; + private const string DEFAULT_LABELS_DIR = "labels"; + private const string DEFAULT_IMAGES_DIR = "images"; + private const string DEFAULT_RESULTS_DIR = "results"; + private const string DEFAULT_THUMBNAILS_DIR = "thumbnails"; + private const string DEFAULT_GPS_SAT_DIRECTORY = "satellitesDir"; + private const string DEFAULT_GPS_ROUTE_DIRECTORY = "routeDir"; #endregion #region AnnotatorConfig - public static readonly AnnotationConfig DefaultAnnotationConfig = new() - { - DetectionClasses = DefaultAnnotationClasses!, - VideoFormats = DefaultVideoFormats!, - ImageFormats = DefaultImageFormats!, - AnnotationsDbFile = DEFAULT_ANNOTATIONS_DB_FILE - }; - private static readonly List DefaultAnnotationClasses = [ new() { Id = 0, Name = "ArmorVehicle", ShortName = "Броня", Color = "#FF0000".ToColor() }, new() { Id = 1, Name = "Truck", ShortName = "Вантаж.", Color = "#00FF00".ToColor() }, new() { Id = 2, Name = "Vehicle", ShortName = "Машина", Color = "#0000FF".ToColor() }, - new() { Id = 3, Name = "Atillery", ShortName = "Арта", Color = "#FFFF00".ToColor() }, + new() { Id = 3, Name = "Artillery", ShortName = "Арта", Color = "#FFFF00".ToColor() }, new() { Id = 4, Name = "Shadow", ShortName = "Тінь", Color = "#FF00FF".ToColor() }, new() { Id = 5, Name = "Trenches", ShortName = "Окопи", Color = "#00FFFF".ToColor() }, new() { Id = 6, Name = "MilitaryMan", ShortName = "Військов", Color = "#188021".ToColor() }, new() { Id = 7, Name = "TyreTracks", ShortName = "Накати", Color = "#800000".ToColor() }, - new() { Id = 8, Name = "AdditArmoredTank", ShortName = "Танк.захист", Color = "#008000".ToColor() }, + new() { Id = 8, Name = "AdditionArmoredTank",ShortName = "Танк.захист", Color = "#008000".ToColor() }, new() { Id = 9, Name = "Smoke", ShortName = "Дим", Color = "#000080".ToColor() }, new() { Id = 10, Name = "Plane", ShortName = "Літак", Color = "#000080".ToColor() }, new() { Id = 11, Name = "Moto", ShortName = "Мото", Color = "#808000".ToColor() }, @@ -86,20 +80,28 @@ public class Constants new() { Id = 15, Name = "Building", ShortName = "Будівля", Color = "#ffb6c1".ToColor() }, new() { Id = 16, Name = "Caponier", ShortName = "Капонір", Color = "#ffb6c1".ToColor() }, ]; + + private static readonly List DefaultVideoFormats = ["mp4", "mov", "avi"]; + private static readonly List DefaultImageFormats = ["jpg", "jpeg", "png", "bmp"]; - public static readonly List DefaultVideoFormats = ["mp4", "mov", "avi"]; - public static readonly List DefaultImageFormats = ["jpg", "jpeg", "png", "bmp"]; + private static readonly AnnotationConfig DefaultAnnotationConfig = new() + { + DetectionClasses = DefaultAnnotationClasses, + VideoFormats = DefaultVideoFormats, + ImageFormats = DefaultImageFormats, + AnnotationsDbFile = DEFAULT_ANNOTATIONS_DB_FILE + }; + + private const int DEFAULT_LEFT_PANEL_WIDTH = 250; + private const int DEFAULT_RIGHT_PANEL_WIDTH = 250; - public static int DEFAULT_LEFT_PANEL_WIDTH = 250; - public static int DEFAULT_RIGHT_PANEL_WIDTH = 250; - - public const string DEFAULT_ANNOTATIONS_DB_FILE = "annotations.db"; + private const string DEFAULT_ANNOTATIONS_DB_FILE = "annotations.db"; # endregion AnnotatorConfig # region AIRecognitionConfig - public static readonly AIRecognitionConfig DefaultAIRecognitionConfig = new() + private static readonly AIRecognitionConfig DefaultAIRecognitionConfig = new() { FrameRecognitionSeconds = DEFAULT_FRAME_RECOGNITION_SECONDS, TrackingDistanceConfidence = TRACKING_DISTANCE_CONFIDENCE, @@ -109,18 +111,18 @@ public class Constants FramePeriodRecognition = DEFAULT_FRAME_PERIOD_RECOGNITION }; - public const double DEFAULT_FRAME_RECOGNITION_SECONDS = 2; - public const double TRACKING_DISTANCE_CONFIDENCE = 0.15; - public const double TRACKING_PROBABILITY_INCREASE = 15; - public const double TRACKING_INTERSECTION_THRESHOLD = 0.8; - public const int DEFAULT_BIG_IMAGE_TILE_OVERLAP_PERCENT = 20; - public const int DEFAULT_FRAME_PERIOD_RECOGNITION = 4; + private const double DEFAULT_FRAME_RECOGNITION_SECONDS = 2; + private const double TRACKING_DISTANCE_CONFIDENCE = 0.15; + private const double TRACKING_PROBABILITY_INCREASE = 15; + private const double TRACKING_INTERSECTION_THRESHOLD = 0.8; + private const int DEFAULT_BIG_IMAGE_TILE_OVERLAP_PERCENT = 20; + private const int DEFAULT_FRAME_PERIOD_RECOGNITION = 4; # endregion AIRecognitionConfig # region GpsDeniedConfig - public static readonly GpsDeniedConfig DefaultGpsDeniedConfig = new() + private static readonly GpsDeniedConfig DefaultGpsDeniedConfig = new() { MinKeyPoints = 11 }; @@ -129,15 +131,15 @@ public class Constants #region Thumbnails - public static readonly ThumbnailConfig DefaultThumbnailConfig = new() + private static readonly Size DefaultThumbnailSize = new(240, 135); + + private static readonly ThumbnailConfig DefaultThumbnailConfig = new() { Size = DefaultThumbnailSize, Border = DEFAULT_THUMBNAIL_BORDER }; - public static readonly Size DefaultThumbnailSize = new(240, 135); - - public const int DEFAULT_THUMBNAIL_BORDER = 10; + private const int DEFAULT_THUMBNAIL_BORDER = 10; public const string THUMBNAIL_PREFIX = "_thumb"; public const string RESULT_PREFIX = "_result"; @@ -163,10 +165,10 @@ public class Constants #endregion - public const string CSV_PATH = "matches.csv"; + public const string SPLIT_SUFFIX = "!split!"; - public static readonly InitConfig DefaultInitConfig = new() + private static readonly InitConfig DefaultInitConfig = new() { LoaderClientConfig = new LoaderClientConfig { diff --git a/Azaion.Common/Controls/CanvasEditor.cs b/Azaion.Common/Controls/CanvasEditor.cs index 48ea36c..ff4969e 100644 --- a/Azaion.Common/Controls/CanvasEditor.cs +++ b/Azaion.Common/Controls/CanvasEditor.cs @@ -5,6 +5,7 @@ using System.Windows.Input; using System.Windows.Media; using System.Windows.Media.Imaging; using System.Windows.Shapes; +using Azaion.Common.Database; using Azaion.Common.DTO; using Azaion.Common.Events; using MediatR; @@ -39,7 +40,6 @@ public class CanvasEditor : Canvas private readonly TimeSpan _viewThreshold = TimeSpan.FromMilliseconds(400); public Image BackgroundImage { get; set; } = new() { Stretch = Stretch.Uniform }; - public IMediator Mediator { get; set; } = null!; public static readonly DependencyProperty GetTimeFuncProp = DependencyProperty.Register( @@ -191,7 +191,6 @@ public class CanvasEditor : Canvas private void CanvasMouseMove(object sender, MouseEventArgs e) { var pos = e.GetPosition(this); - Mediator.Publish(new SetStatusTextEvent($"Mouse Coordinates: {pos.X}, {pos.Y}")); _horizontalLine.Y1 = _horizontalLine.Y2 = pos.Y; _verticalLine.X1 = _verticalLine.X2 = pos.X; SetLeft(_classNameHint, pos.X + 10); @@ -223,7 +222,6 @@ public class CanvasEditor : Canvas matrix.Translate(delta.X, delta.Y); _matrixTransform.Matrix = matrix; - Mediator.Publish(new SetStatusTextEvent(_matrixTransform.Matrix.ToString())); } private void CanvasMouseUp(object sender, MouseButtonEventArgs e) @@ -243,8 +241,8 @@ public class CanvasEditor : Canvas { Width = width, Height = height, - X = Math.Min(endPos.X, _newAnnotationStartPos.X), - Y = Math.Min(endPos.Y, _newAnnotationStartPos.Y), + Left = Math.Min(endPos.X, _newAnnotationStartPos.X), + Top = Math.Min(endPos.Y, _newAnnotationStartPos.Y), Confidence = 1 }); control.UpdateLayout(); @@ -415,13 +413,26 @@ public class CanvasEditor : Canvas SetTop(_newAnnotationRect, currentPos.Y); } - public void CreateDetections(TimeSpan time, IEnumerable detections, List detectionClasses, Size videoSize) + public void CreateDetections(Annotation annotation, List detectionClasses, Size mediaSize) { - foreach (var detection in detections) + var splitTile = annotation.SplitTile; + foreach (var detection in annotation.Detections) { var detectionClass = DetectionClass.FromYoloId(detection.ClassNumber, detectionClasses); - var canvasLabel = new CanvasLabel(detection, RenderSize, videoSize, detection.Confidence); - CreateDetectionControl(detectionClass, time, canvasLabel); + CanvasLabel canvasLabel; + if (splitTile == null) + canvasLabel = new CanvasLabel(detection, RenderSize, mediaSize, detection.Confidence); + else + { + canvasLabel = new CanvasLabel(detection, new Size(Constants.AI_TILE_SIZE, Constants.AI_TILE_SIZE), null, detection.Confidence) + .ReframeFromSmall(splitTile); + + //From CurrentMediaSize to Render Size + var yoloLabel = new YoloLabel(canvasLabel, mediaSize); + canvasLabel = new CanvasLabel(yoloLabel, RenderSize, mediaSize, canvasLabel.Confidence); + } + + CreateDetectionControl(detectionClass, annotation.Time, canvasLabel); } } @@ -429,8 +440,8 @@ public class CanvasEditor : Canvas { var detectionControl = new DetectionControl(detectionClass, time, AnnotationResizeStart, canvasLabel); detectionControl.MouseDown += AnnotationPositionStart; - SetLeft(detectionControl, canvasLabel.X ); - SetTop(detectionControl, canvasLabel.Y); + SetLeft(detectionControl, canvasLabel.Left ); + SetTop(detectionControl, canvasLabel.Top); Children.Add(detectionControl); CurrentDetections.Add(detectionControl); _newAnnotationRect.Fill = new SolidColorBrush(detectionClass.Color); @@ -472,4 +483,12 @@ public class CanvasEditor : Canvas } public void ResetBackground() => Background = new SolidColorBrush(Color.FromArgb(1, 0, 0, 0)); + + public void ZoomTo(Point point) + { + SetZoom(); + var matrix = _matrixTransform.Matrix; + matrix.ScaleAt(2, 2, point.X, point.Y); + SetZoom(matrix); + } } \ No newline at end of file diff --git a/Azaion.Common/Controls/DetectionControl.cs b/Azaion.Common/Controls/DetectionControl.cs index 9974464..39c8340 100644 --- a/Azaion.Common/Controls/DetectionControl.cs +++ b/Azaion.Common/Controls/DetectionControl.cs @@ -30,7 +30,7 @@ public class DetectionControl : Border { var brush = new SolidColorBrush(value.Color.ToConfidenceColor()); BorderBrush = brush; - BorderThickness = new Thickness(3); + BorderThickness = new Thickness(1); foreach (var rect in _resizedRectangles) rect.Stroke = brush; @@ -141,7 +141,7 @@ public class DetectionControl : Border var rect = new Rectangle() // small rectangles at the corners and sides { ClipToBounds = false, - Margin = new Thickness(-RESIZE_RECT_SIZE), + Margin = new Thickness(-1.1 * RESIZE_RECT_SIZE), HorizontalAlignment = ha, VerticalAlignment = va, Width = RESIZE_RECT_SIZE, diff --git a/Azaion.Common/DTO/AnnotationResult.cs b/Azaion.Common/DTO/AnnotationResult.cs index d3a42a9..48f635a 100644 --- a/Azaion.Common/DTO/AnnotationResult.cs +++ b/Azaion.Common/DTO/AnnotationResult.cs @@ -3,31 +3,33 @@ using Azaion.Common.Database; namespace Azaion.Common.DTO; -public class AnnotationResult -{ - public Annotation Annotation { get; set; } - public List<(Color Color, double Confidence)> Colors { get; private set; } +// public class AnnotationResult +//{ + //public Annotation Annotation { get; set; } + - public string ImagePath { get; set; } - public string TimeStr { get; set; } - public string ClassName { get; set; } + //public string ImagePath { get; set; } + //public string TimeStr { get; set; } + + //public List<(Color Color, double Confidence)> Colors { get; private set; } +// public string ClassName { get; set; } - public AnnotationResult(Dictionary allDetectionClasses, Annotation annotation) - { + // public AnnotationResult(Dictionary allDetectionClasses, Annotation annotation) + // { - Annotation = annotation; + //Annotation = annotation; - TimeStr = $"{annotation.Time:h\\:mm\\:ss}"; - ImagePath = annotation.ImagePath; + //TimeStr = $"{annotation.Time:h\\:mm\\:ss}"; + //ImagePath = annotation.ImagePath; - var detectionClasses = annotation.Detections.Select(x => x.ClassNumber).Distinct().ToList(); - - Colors = annotation.Detections - .Select(d => (allDetectionClasses[d.ClassNumber].Color, d.Confidence)) - .ToList(); - - ClassName = detectionClasses.Count > 1 - ? string.Join(", ", detectionClasses.Select(x => allDetectionClasses[x].UIName)) - : allDetectionClasses[detectionClasses.FirstOrDefault()].UIName; - } -} \ No newline at end of file + // var detectionClasses = annotation.Detections.Select(x => x.ClassNumber).Distinct().ToList(); + // ClassName = detectionClasses.Count > 1 + // ? string.Join(", ", detectionClasses.Select(x => allDetectionClasses[x].UIName)) + // : allDetectionClasses[detectionClasses.FirstOrDefault()].UIName; + // + // Colors = annotation.Detections + // .Select(d => (allDetectionClasses[d.ClassNumber].Color, d.Confidence)) + // .ToList(); + + // } +// } \ No newline at end of file diff --git a/Azaion.Common/DTO/FormState.cs b/Azaion.Common/DTO/FormState.cs index fcac093..7d694d5 100644 --- a/Azaion.Common/DTO/FormState.cs +++ b/Azaion.Common/DTO/FormState.cs @@ -1,5 +1,6 @@ using System.Collections.ObjectModel; using System.Windows; +using Azaion.Common.Database; namespace Azaion.Common.DTO; @@ -7,13 +8,12 @@ public class FormState { public MediaFileInfo? CurrentMedia { get; set; } public string MediaName => CurrentMedia?.FName ?? ""; - - public string CurrentMrl { get; set; } = null!; + public Size CurrentMediaSize { get; set; } public TimeSpan CurrentVideoLength { get; set; } public TimeSpan? BackgroundTime { get; set; } public int CurrentVolume { get; set; } = 100; - public ObservableCollection AnnotationResults { get; set; } = []; + public ObservableCollection AnnotationResults { get; set; } = []; public WindowEnum ActiveWindow { get; set; } } \ No newline at end of file diff --git a/Azaion.Common/DTO/Label.cs b/Azaion.Common/DTO/Label.cs index 65baec7..0a6130b 100644 --- a/Azaion.Common/DTO/Label.cs +++ b/Azaion.Common/DTO/Label.cs @@ -22,52 +22,56 @@ public abstract class Label public class CanvasLabel : Label { - public double X { get; set; } //left - public double Y { get; set; } //top + public double Left { get; set; } + public double Top { get; set; } public double Width { get; set; } public double Height { get; set; } public double Confidence { get; set; } public double Bottom { - get => Y + Height; - set => Height = value - Y; + get => Top + Height; + set => Height = value - Top; } public double Right { - get => X + Width; - set => Width = value - X; + get => Left + Width; + set => Width = value - Left; } + + public double CenterX => Left + Width / 2.0; + public double CenterY => Top + Height / 2.0; + public Size Size => new(Width, Height); public CanvasLabel() { } public CanvasLabel(double left, double right, double top, double bottom) { - X = left; - Y = top; + Left = left; + Top = top; Width = right - left; Height = bottom - top; Confidence = 1; ClassNumber = -1; } - public CanvasLabel(int classNumber, double x, double y, double width, double height, double confidence = 1) : base(classNumber) + public CanvasLabel(int classNumber, double left, double top, double width, double height, double confidence = 1) : base(classNumber) { - X = x; - Y = y; + Left = left; + Top = top; Width = width; Height = height; Confidence = confidence; } - public CanvasLabel(YoloLabel label, Size canvasSize, Size? videoSize = null, double confidence = 1) + public CanvasLabel(YoloLabel label, Size canvasSize, Size? mediaSize = null, double confidence = 1) { var cw = canvasSize.Width; var ch = canvasSize.Height; var canvasAr = cw / ch; - var videoAr = videoSize.HasValue - ? videoSize.Value.Width / videoSize.Value.Height + var videoAr = mediaSize.HasValue + ? mediaSize.Value.Width / mediaSize.Value.Height : canvasAr; ClassNumber = label.ClassNumber; @@ -80,8 +84,8 @@ public class CanvasLabel : Label var realHeight = cw / videoAr; //real video height in pixels on canvas var blackStripHeight = (ch - realHeight) / 2.0; //height of black strips at the top and bottom - X = left * cw; - Y = top * realHeight + blackStripHeight; + Left = left * cw; + Top = top * realHeight + blackStripHeight; Width = label.Width * cw; Height = label.Height * realHeight; } @@ -90,8 +94,8 @@ public class CanvasLabel : Label var realWidth = ch * videoAr; //real video width in pixels on canvas var blackStripWidth = (cw - realWidth) / 2.0; //height of black strips at the top and bottom - X = left * realWidth + blackStripWidth; - Y = top * ch; + Left = left * realWidth + blackStripWidth; + Top = top * ch; Width = label.Width * realWidth; Height = label.Height * ch; } @@ -99,10 +103,10 @@ public class CanvasLabel : Label } public CanvasLabel ReframeToSmall(CanvasLabel smallTile) => - new(ClassNumber, X - smallTile.X, Y - smallTile.Y, Width, Height, Confidence); + new(ClassNumber, Left - smallTile.Left, Top - smallTile.Top, Width, Height, Confidence); public CanvasLabel ReframeFromSmall(CanvasLabel smallTile) => - new(ClassNumber, X + smallTile.X, Y + smallTile.Y, Width, Height, Confidence); + new(ClassNumber, Left + smallTile.Left, Top + smallTile.Top, Width, Height, Confidence); } @@ -132,13 +136,13 @@ public class YoloLabel : Label public RectangleF ToRectangle() => new((float)(CenterX - Width / 2.0), (float)(CenterY - Height / 2.0), (float)Width, (float)Height); - public YoloLabel(CanvasLabel canvasLabel, Size canvasSize, Size? videoSize = null) + public YoloLabel(CanvasLabel canvasLabel, Size canvasSize, Size? mediaSize = null) { var cw = canvasSize.Width; var ch = canvasSize.Height; var canvasAr = cw / ch; - var videoAr = videoSize.HasValue - ? videoSize.Value.Width / videoSize.Value.Height + var videoAr = mediaSize.HasValue + ? mediaSize.Value.Width / mediaSize.Value.Height : canvasAr; ClassNumber = canvasLabel.ClassNumber; @@ -146,20 +150,20 @@ public class YoloLabel : Label double left, top; if (videoAr > canvasAr) //100% width { - left = canvasLabel.X / cw; + left = canvasLabel.Left / cw; Width = canvasLabel.Width / cw; var realHeight = cw / videoAr; //real video height in pixels on canvas var blackStripHeight = (ch - realHeight) / 2.0; //height of black strips at the top and bottom - top = (canvasLabel.Y - blackStripHeight) / realHeight; + top = (canvasLabel.Top - blackStripHeight) / realHeight; Height = canvasLabel.Height / realHeight; } else //100% height { - top = canvasLabel.Y / ch; + top = canvasLabel.Top / ch; Height = canvasLabel.Height / ch; var realWidth = ch * videoAr; //real video width in pixels on canvas var blackStripWidth = (cw - realWidth) / 2.0; //height of black strips at the top and bottom - left = (canvasLabel.X - blackStripWidth) / realWidth; + left = (canvasLabel.Left - blackStripWidth) / realWidth; Width = canvasLabel.Width / realWidth; } diff --git a/Azaion.Common/Database/Annotation.cs b/Azaion.Common/Database/Annotation.cs index c89f2ab..be7970e 100644 --- a/Azaion.Common/Database/Annotation.cs +++ b/Azaion.Common/Database/Annotation.cs @@ -1,4 +1,5 @@ using System.IO; +using System.Windows.Media; using Azaion.Common.DTO; using Azaion.Common.DTO.Config; using Azaion.Common.DTO.Queue; @@ -12,12 +13,14 @@ public class Annotation private static string _labelsDir = null!; private static string _imagesDir = null!; private static string _thumbDir = null!; - - public static void InitializeDirs(DirectoriesConfig config) + private static Dictionary _detectionClassesDict; + + public static void Init(DirectoriesConfig config, Dictionary detectionClassesDict) { _labelsDir = config.LabelsDirectory; _imagesDir = config.ImagesDirectory; _thumbDir = config.ThumbnailsDirectory; + _detectionClassesDict = detectionClassesDict; } [Key("n")] public string Name { get; set; } = null!; @@ -40,12 +43,64 @@ public class Annotation [Key("lon")]public double Lon { get; set; } #region Calculated - [IgnoreMember]public List Classes => Detections.Select(x => x.ClassNumber).ToList(); - [IgnoreMember]public string ImagePath => Path.Combine(_imagesDir, $"{Name}{ImageExtension}"); - [IgnoreMember]public string LabelPath => Path.Combine(_labelsDir, $"{Name}.txt"); - [IgnoreMember]public string ThumbPath => Path.Combine(_thumbDir, $"{Name}{Constants.THUMBNAIL_PREFIX}.jpg"); + [IgnoreMember] public List Classes => Detections.Select(x => x.ClassNumber).ToList(); + [IgnoreMember] public string ImagePath => Path.Combine(_imagesDir, $"{Name}{ImageExtension}"); + [IgnoreMember] public string LabelPath => Path.Combine(_labelsDir, $"{Name}.txt"); + [IgnoreMember] public string ThumbPath => Path.Combine(_thumbDir, $"{Name}{Constants.THUMBNAIL_PREFIX}.jpg"); + [IgnoreMember] public bool IsSplit => Name.Contains(Constants.SPLIT_SUFFIX); + private CanvasLabel? _splitTile; + [IgnoreMember] public CanvasLabel? SplitTile + { + get + { + if (!IsSplit) + return null; + if (_splitTile != null) + return _splitTile; + + var startCoordIndex = Name.IndexOf(Constants.SPLIT_SUFFIX, StringComparison.Ordinal) + Constants.SPLIT_SUFFIX.Length; + var coordsStr = Name.Substring(startCoordIndex, 9).Split('_'); + _splitTile = new CanvasLabel + { + Left = double.Parse(coordsStr[0]), + Top = double.Parse(coordsStr[1]), + Width = Constants.AI_TILE_SIZE, + Height = Constants.AI_TILE_SIZE + }; + return _splitTile; + } + } + + [IgnoreMember] public string TimeStr => $"{Time:h\\:mm\\:ss}"; + + private List<(Color Color, double Confidence)>? _colors; + [IgnoreMember] public List<(Color Color, double Confidence)> Colors => _colors ??= Detections + .Select(d => (_detectionClassesDict[d.ClassNumber].Color, d.Confidence)) + .ToList(); + + private string _className; + [IgnoreMember] public string ClassName + { + get + { + if (string.IsNullOrEmpty(_className)) + { + var detectionClasses = Detections.Select(x => x.ClassNumber).Distinct().ToList(); + _className = detectionClasses.Count > 1 + ? string.Join(", ", detectionClasses.Select(x => _detectionClassesDict[x].UIName)) + : _detectionClassesDict[detectionClasses.FirstOrDefault()].UIName; + } + return _className; + } + } + + #endregion Calculated + + + + } [MessagePackObject] diff --git a/Azaion.Common/Database/DbFactory.cs b/Azaion.Common/Database/DbFactory.cs index 1b85c50..6103939 100644 --- a/Azaion.Common/Database/DbFactory.cs +++ b/Azaion.Common/Database/DbFactory.cs @@ -1,4 +1,5 @@ using System.Data.SQLite; +using System.Diagnostics; using System.IO; using Azaion.Common.DTO; using Azaion.Common.DTO.Config; @@ -48,7 +49,7 @@ public class DbFactory : IDbFactory .UseDataProvider(SQLiteTools.GetDataProvider()) .UseConnection(_memoryConnection) .UseMappingSchema(AnnotationsDbSchemaHolder.MappingSchema) - ;//.UseTracing(TraceLevel.Info, t => logger.LogInformation(t.SqlText)); + .UseTracing(TraceLevel.Info, t => logger.LogInformation(t.SqlText)); _fileConnection = new SQLiteConnection(FileConnStr); diff --git a/Azaion.Common/Services/AnnotationService.cs b/Azaion.Common/Services/AnnotationService.cs index e34edb6..190c396 100644 --- a/Azaion.Common/Services/AnnotationService.cs +++ b/Azaion.Common/Services/AnnotationService.cs @@ -94,6 +94,7 @@ public class AnnotationService : IAnnotationService await SaveAnnotationInner( msg.CreatedDate, msg.OriginalMediaName, + msg.Name, msg.Time, JsonConvert.DeserializeObject>(msg.Detections) ?? [], msg.Source, @@ -136,16 +137,16 @@ public class AnnotationService : IAnnotationService public async Task SaveAnnotation(AnnotationImage a, CancellationToken ct = default) { a.Time = TimeSpan.FromMilliseconds(a.Milliseconds); - return await SaveAnnotationInner(DateTime.UtcNow, a.OriginalMediaName, a.Time, a.Detections.ToList(), + return await SaveAnnotationInner(DateTime.UtcNow, a.OriginalMediaName, a.Name, a.Time, a.Detections.ToList(), SourceEnum.AI, new MemoryStream(a.Image), _api.CurrentUser.Role, _api.CurrentUser.Email, token: ct); } //Manual - public async Task SaveAnnotation(string originalMediaName, TimeSpan time, List detections, Stream? stream = null, CancellationToken token = default) => - await SaveAnnotationInner(DateTime.UtcNow, originalMediaName, time, detections, SourceEnum.Manual, stream, + public async Task SaveAnnotation(string originalMediaName, string annotationName, TimeSpan time, List detections, Stream? stream = null, CancellationToken token = default) => + await SaveAnnotationInner(DateTime.UtcNow, originalMediaName, annotationName, time, detections, SourceEnum.Manual, stream, _api.CurrentUser.Role, _api.CurrentUser.Email, token: token); - private async Task SaveAnnotationInner(DateTime createdDate, string originalMediaName, TimeSpan time, + private async Task SaveAnnotationInner(DateTime createdDate, string originalMediaName, string annotationName, TimeSpan time, List detections, SourceEnum source, Stream? stream, RoleEnum userRole, string createdEmail, @@ -153,21 +154,20 @@ public class AnnotationService : IAnnotationService CancellationToken token = default) { var status = AnnotationStatus.Created; - var fName = originalMediaName.ToTimeName(time); var annotation = await _dbFactory.RunWrite(async db => { var ann = await db.Annotations .LoadWith(x => x.Detections) - .FirstOrDefaultAsync(x => x.Name == fName, token: token); + .FirstOrDefaultAsync(x => x.Name == annotationName, token: token); - await db.Detections.DeleteAsync(x => x.AnnotationName == fName, token: token); + await db.Detections.DeleteAsync(x => x.AnnotationName == annotationName, token: token); if (ann != null) //Annotation is already exists { status = AnnotationStatus.Edited; var annotationUpdatable = db.Annotations - .Where(x => x.Name == fName) + .Where(x => x.Name == annotationName) .Set(x => x.Source, source); if (userRole.IsValidator() && source == SourceEnum.Manual) @@ -188,7 +188,7 @@ public class AnnotationService : IAnnotationService ann = new Annotation { CreatedDate = createdDate, - Name = fName, + Name = annotationName, OriginalMediaName = originalMediaName, Time = time, ImageExtension = Constants.JPG_EXT, @@ -264,6 +264,6 @@ public class AnnotationService : IAnnotationService public interface IAnnotationService { Task SaveAnnotation(AnnotationImage a, CancellationToken ct = default); - Task SaveAnnotation(string originalMediaName, TimeSpan time, List detections, Stream? stream = null, CancellationToken token = default); + Task SaveAnnotation(string originalMediaName, string annotationName, TimeSpan time, List detections, Stream? stream = null, CancellationToken token = default); Task ValidateAnnotations(List annotationNames, bool fromQueue = false, CancellationToken token = default); } \ No newline at end of file diff --git a/Azaion.Common/Services/GalleryService.cs b/Azaion.Common/Services/GalleryService.cs index 883437d..bb1e611 100644 --- a/Azaion.Common/Services/GalleryService.cs +++ b/Azaion.Common/Services/GalleryService.cs @@ -237,11 +237,11 @@ public class GalleryService( .ToList(); if (annotation.Detections.Any()) { - var labelsMinX = labels.Min(x => x.X); - var labelsMaxX = labels.Max(x => x.X + x.Width); + var labelsMinX = labels.Min(x => x.Left); + var labelsMaxX = labels.Max(x => x.Left + x.Width); - var labelsMinY = labels.Min(x => x.Y); - var labelsMaxY = labels.Max(x => x.Y + x.Height); + var labelsMinY = labels.Min(x => x.Top); + var labelsMaxY = labels.Max(x => x.Top + x.Height); var labelsHeight = labelsMaxY - labelsMinY + 2 * border; var labelsWidth = labelsMaxX - labelsMinX + 2 * border; @@ -270,7 +270,7 @@ public class GalleryService( var color = _annotationConfig.DetectionClassesDict[label.ClassNumber].Color; var brush = new SolidBrush(Color.FromArgb(color.A, color.R, color.G, color.B)); - g.DrawRectangle(new Pen(brush, width: 3), (float)((label.X - frameX) / scale), (float)((label.Y - frameY) / scale), (float)(label.Width / scale), (float)(label.Height / scale)); + g.DrawRectangle(new Pen(brush, width: 3), (float)((label.Left - frameX) / scale), (float)((label.Top - frameY) / scale), (float)(label.Width / scale), (float)(label.Height / scale)); } bitmap.Save(annotation.ThumbPath, ImageFormat.Jpeg); @@ -291,10 +291,10 @@ public class GalleryService( var color = detClass.Color; var brush = new SolidBrush(Color.FromArgb(color.A, color.R, color.G, color.B)); var det = new CanvasLabel(detection, new Size(originalImage.Width, originalImage.Height)); - g.DrawRectangle(new Pen(brush, width: 3), (float)det.X, (float)det.Y, (float)det.Width, (float)det.Height); + g.DrawRectangle(new Pen(brush, width: 3), (float)det.Left, (float)det.Top, (float)det.Width, (float)det.Height); var label = detection.Confidence >= 0.995 ? detClass.UIName : $"{detClass.UIName}: {detection.Confidence * 100:F0}%"; - g.DrawTextBox(label, new PointF((float)(det.X + det.Width / 2.0), (float)(det.Y - 24)), brush, Brushes.Black); + g.DrawTextBox(label, new PointF((float)(det.Left + det.Width / 2.0), (float)(det.Top - 24)), brush, Brushes.Black); } var imagePath = Path.Combine(_dirConfig.ResultsDirectory, $"{annotation.Name}{Constants.RESULT_PREFIX}.jpg"); diff --git a/Azaion.Common/Services/InferenceClient.cs b/Azaion.Common/Services/InferenceClient.cs index 7e59620..714311f 100644 --- a/Azaion.Common/Services/InferenceClient.cs +++ b/Azaion.Common/Services/InferenceClient.cs @@ -49,7 +49,7 @@ public class InferenceClient : IInferenceClient Arguments = $"-p {_inferenceClientConfig.ZeroMqPort} -lp {_loaderClientConfig.ZeroMqPort} -a {_inferenceClientConfig.ApiUrl}", CreateNoWindow = true }; - process.Start(); + //process.Start(); } catch (Exception e) { diff --git a/Azaion.Common/Services/TileProcessor.cs b/Azaion.Common/Services/TileProcessor.cs index c534595..71084ec 100644 --- a/Azaion.Common/Services/TileProcessor.cs +++ b/Azaion.Common/Services/TileProcessor.cs @@ -18,10 +18,8 @@ public class TileResult public static class TileProcessor { - private const int MaxTileWidth = 1280; - private const int MaxTileHeight = 1280; - private const int Border = 10; - + public const int BORDER = 10; + public static List Split(Size originalSize, List detections, CancellationToken cancellationToken) { var results = new List(); @@ -30,7 +28,7 @@ public static class TileProcessor while (processingDetectionList.Count > 0 && !cancellationToken.IsCancellationRequested) { var topMostDetection = processingDetectionList - .OrderBy(d => d.Y) + .OrderBy(d => d.Top) .First(); var result = GetDetectionsInTile(originalSize, topMostDetection, processingDetectionList); @@ -42,11 +40,8 @@ public static class TileProcessor private static TileResult GetDetectionsInTile(Size originalSize, CanvasLabel startDet, List allDetections) { - var tile = new CanvasLabel( - left: Math.Max(startDet.X - Border, 0), - right: Math.Min(startDet.Right + Border, originalSize.Width), - top: Math.Max(startDet.Y - Border, 0), - bottom: Math.Min(startDet.Bottom + Border, originalSize.Height)); + var tile = new CanvasLabel(startDet.Left, startDet.Right, startDet.Top, startDet.Bottom); + var maxSize = new List { startDet.Width + BORDER, startDet.Height + BORDER, Constants.AI_TILE_SIZE }.Max(); var selectedDetections = new List{startDet}; foreach (var det in allDetections) @@ -55,26 +50,26 @@ public static class TileProcessor continue; var commonTile = new CanvasLabel( - left: Math.Max(Math.Min(tile.X, det.X) - Border, 0), - right: Math.Min(Math.Max(tile.Right, det.Right) + Border, originalSize.Width), - top: Math.Max(Math.Min(tile.Y, det.Y) - Border, 0), - bottom: Math.Min(Math.Max(tile.Bottom, det.Bottom) + Border, originalSize.Height) + left: Math.Min(tile.Left, det.Left), + right: Math.Max(tile.Right, det.Right), + top: Math.Min(tile.Top, det.Top), + bottom: Math.Max(tile.Bottom, det.Bottom) ); - - if (commonTile.Width > MaxTileWidth || commonTile.Height > MaxTileHeight) + + if (commonTile.Width + BORDER > maxSize || commonTile.Height + BORDER > maxSize) continue; tile = commonTile; selectedDetections.Add(det); } - - //normalization, width and height should be at least half of 1280px - tile.Width = Math.Max(tile.Width, MaxTileWidth / 2.0); - tile.Height = Math.Max(tile.Height, MaxTileHeight / 2.0); - //boundaries check after normalization - tile.Right = Math.Min(tile.Right, originalSize.Width); - tile.Bottom = Math.Min(tile.Bottom, originalSize.Height); + // boundary-aware centering + var centerX = selectedDetections.Average(x => x.CenterX); + var centerY = selectedDetections.Average(d => d.CenterY); + tile.Width = maxSize; + tile.Height = maxSize; + tile.Left = Math.Max(0, Math.Min(originalSize.Width - maxSize, centerX - tile.Width / 2.0)); + tile.Top = Math.Max(0, Math.Min(originalSize.Height - maxSize, centerY - tile.Height / 2.0)); return new TileResult(tile, selectedDetections); } diff --git a/Azaion.Dataset/DatasetExplorer.xaml b/Azaion.Dataset/DatasetExplorer.xaml index 17f05b7..2791540 100644 --- a/Azaion.Dataset/DatasetExplorer.xaml +++ b/Azaion.Dataset/DatasetExplorer.xaml @@ -80,7 +80,7 @@ - + Показувати лише анотації з об'єктами + + + + + + + appConfig, ILogger logger, @@ -199,9 +201,8 @@ public partial class DatasetExplorer }; SwitchTab(toEditor: true); - var time = ann.Time; ExplorerEditor.RemoveAllAnns(); - ExplorerEditor.CreateDetections(time, ann.Detections, _appConfig.AnnotationConfig.DetectionClasses, ExplorerEditor.RenderSize); + ExplorerEditor.CreateDetections(ann, _appConfig.AnnotationConfig.DetectionClasses, ExplorerEditor.RenderSize); } catch (Exception e) { @@ -261,6 +262,7 @@ public partial class DatasetExplorer SelectedAnnotationDict.Clear(); var annThumbnails = _annotationsDict[ExplorerEditor.CurrentAnnClass.YoloId] .WhereIf(withDetectionsOnly, x => x.Value.Detections.Any()) + .WhereIf(TbSearch.Text.Length > 2, x => x.Key.ToLower().Contains(TbSearch.Text)) .Select(x => new AnnotationThumbnail(x.Value, _azaionApi.CurrentUser.Role.IsValidator())) .OrderBy(x => !x.IsSeed) .ThenByDescending(x =>x.Annotation.CreatedDate); @@ -295,4 +297,10 @@ public partial class DatasetExplorer _configUpdater.Save(_appConfig); await ReloadThumbnails(); } + + private void TbSearch_OnTextChanged(object sender, TextChangedEventArgs e) + { + TbSearch.Foreground = TbSearch.Text.Length > 2 ? Brushes.Black : Brushes.Gray; + ThrottleExt.Throttle(ReloadThumbnails, SearchActionId, TimeSpan.FromMilliseconds(400));; + } } diff --git a/Azaion.Dataset/DatasetExplorerEventHandler.cs b/Azaion.Dataset/DatasetExplorerEventHandler.cs index 4d66a4e..60ca2d3 100644 --- a/Azaion.Dataset/DatasetExplorerEventHandler.cs +++ b/Azaion.Dataset/DatasetExplorerEventHandler.cs @@ -70,7 +70,7 @@ public class DatasetExplorerEventHandler( .Select(x => new Detection(a.Name, x.ToYoloLabel(datasetExplorer.ExplorerEditor.RenderSize))) .ToList(); var index = datasetExplorer.ThumbnailsView.SelectedIndex; - var annotation = await annotationService.SaveAnnotation(a.OriginalMediaName, a.Time, detections, token: token); + var annotation = await annotationService.SaveAnnotation(a.OriginalMediaName, a.Name, a.Time, detections, token: token); await ValidateAnnotations([annotation], token); await datasetExplorer.EditAnnotation(index + 1); break; diff --git a/Azaion.Inference/annotation.pxd b/Azaion.Inference/annotation.pxd index 932e969..8bfc0bc 100644 --- a/Azaion.Inference/annotation.pxd +++ b/Azaion.Inference/annotation.pxd @@ -12,5 +12,4 @@ cdef class Annotation: cdef public list[Detection] detections cdef public bytes image - cdef format_time(self, ms) cdef bytes serialize(self) diff --git a/Azaion.Inference/annotation.pyx b/Azaion.Inference/annotation.pyx index 454eda5..485c5cb 100644 --- a/Azaion.Inference/annotation.pyx +++ b/Azaion.Inference/annotation.pyx @@ -1,5 +1,5 @@ import msgpack -from pathlib import Path +cimport constants_inf cdef class Detection: def __init__(self, double x, double y, double w, double h, int cls, double confidence): @@ -14,6 +14,17 @@ cdef class Detection: def __str__(self): return f'{self.cls}: {self.x:.2f} {self.y:.2f} {self.w:.2f} {self.h:.2f}, prob: {(self.confidence*100):.1f}%' + def __eq__(self, other): + if not isinstance(other, Detection): + return False + + if max(abs(self.x - other.x), + abs(self.y - other.y), + abs(self.w - other.w), + abs(self.h - other.h)) > constants_inf.TILE_DUPLICATE_CONFIDENCE_THRESHOLD: + return False + return True + cdef overlaps(self, Detection det2, float confidence_threshold): cdef double overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x) cdef double overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y) @@ -23,9 +34,9 @@ cdef class Detection: return overlap_area / min_area > confidence_threshold cdef class Annotation: - def __init__(self, str name, long ms, list[Detection] detections): - self.original_media_name = Path(name).stem.replace(" ", "") - self.name = f'{self.original_media_name}_{self.format_time(ms)}' + def __init__(self, str name, str original_media_name, long ms, list[Detection] detections): + self.name = name + self.original_media_name = original_media_name self.time = ms self.detections = detections if detections is not None else [] for d in self.detections: @@ -42,17 +53,6 @@ cdef class Annotation: ) return f"{self.name}: {detections_str}" - cdef format_time(self, ms): - # Calculate hours, minutes, seconds, and hundreds of milliseconds. - h = ms // 3600000 # Total full hours. - ms_remaining = ms % 3600000 - m = ms_remaining // 60000 # Full minutes. - ms_remaining %= 60000 - s = ms_remaining // 1000 # Full seconds. - f = (ms_remaining % 1000) // 100 # Hundreds of milliseconds. - h = h % 10 - return f"{h}{m:02}{s:02}{f}" - cdef bytes serialize(self): return msgpack.packb({ "n": self.name, diff --git a/Azaion.Inference/constants_inf.pxd b/Azaion.Inference/constants_inf.pxd index 19ac2a3..0dad79f 100644 --- a/Azaion.Inference/constants_inf.pxd +++ b/Azaion.Inference/constants_inf.pxd @@ -13,5 +13,9 @@ cdef str MODELS_FOLDER cdef int SMALL_SIZE_KB +cdef str SPLIT_SUFFIX +cdef int TILE_DUPLICATE_CONFIDENCE_THRESHOLD + cdef log(str log_message) -cdef logerror(str error) \ No newline at end of file +cdef logerror(str error) +cdef format_time(int ms) \ No newline at end of file diff --git a/Azaion.Inference/constants_inf.pyx b/Azaion.Inference/constants_inf.pyx index d552486..1630bc5 100644 --- a/Azaion.Inference/constants_inf.pyx +++ b/Azaion.Inference/constants_inf.pyx @@ -12,6 +12,9 @@ cdef str MODELS_FOLDER = "models" cdef int SMALL_SIZE_KB = 3 +cdef str SPLIT_SUFFIX = "!split!" +cdef int TILE_DUPLICATE_CONFIDENCE_THRESHOLD = 5 + logger.remove() log_format = "[{time:HH:mm:ss} {level}] {message}" logger.add( @@ -40,4 +43,15 @@ cdef log(str log_message): logger.info(log_message) cdef logerror(str error): - logger.error(error) \ No newline at end of file + logger.error(error) + +cdef format_time(int ms): + # Calculate hours, minutes, seconds, and hundreds of milliseconds. + h = ms // 3600000 # Total full hours. + ms_remaining = ms % 3600000 + m = ms_remaining // 60000 # Full minutes. + ms_remaining %= 60000 + s = ms_remaining // 1000 # Full seconds. + f = (ms_remaining % 1000) // 100 # Hundreds of milliseconds. + h = h % 10 + return f"{h}{m:02}{s:02}{f}" diff --git a/Azaion.Inference/inference.pxd b/Azaion.Inference/inference.pxd index 45952f4..781c08a 100644 --- a/Azaion.Inference/inference.pxd +++ b/Azaion.Inference/inference.pxd @@ -9,23 +9,26 @@ cdef class Inference: cdef InferenceEngine engine cdef object on_annotation cdef Annotation _previous_annotation + cdef dict[str, list(Detection)] _tile_detections cdef AIRecognitionConfig ai_config cdef bint stop_signal cdef str model_input cdef int model_width cdef int model_height + cdef int tile_width + cdef int tile_height cdef build_tensor_engine(self, object updater_callback) - cdef init_ai(self) + cpdef init_ai(self) cdef bint is_building_engine cdef bint is_video(self, str filepath) cdef run_inference(self, RemoteCommand cmd) cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name) - cpdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths) - cpdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data) - cpdef split_to_tiles(self, frame, path, img_w, img_h, overlap_percent) + cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths) + cdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data) + cpdef split_to_tiles(self, frame, path, overlap_percent) cdef stop(self) cdef preprocess(self, frames) @@ -33,4 +36,6 @@ cdef class Inference: cdef postprocess(self, output, ai_config) cdef split_list_extend(self, lst, chunk_size) - cdef bint is_valid_annotation(self, Annotation annotation, AIRecognitionConfig ai_config) + cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config) + cdef bint is_valid_image_annotation(self, Annotation annotation) + cdef remove_tiled_duplicates(self, Annotation annotation) diff --git a/Azaion.Inference/inference.pyx b/Azaion.Inference/inference.pyx index 5e6b16e..05ddc48 100644 --- a/Azaion.Inference/inference.pyx +++ b/Azaion.Inference/inference.pyx @@ -1,5 +1,7 @@ import mimetypes import time +from pathlib import Path + import cv2 import numpy as np cimport constants_inf @@ -54,6 +56,8 @@ cdef class Inference: self.model_input = None self.model_width = 0 self.model_height = 0 + self.tile_width = 0 + self.tile_height = 0 self.engine = None self.is_building_engine = False @@ -93,7 +97,7 @@ cdef class Inference: except Exception as e: updater_callback(f'Error. {str(e)}') - cdef init_ai(self): + cpdef init_ai(self): if self.engine is not None: return @@ -114,6 +118,8 @@ cdef class Inference: self.engine = OnnxEngine(res.data) self.model_height, self.model_width = self.engine.get_input_shape() + self.tile_width = self.model_width + self.tile_height = self.model_height cdef preprocess(self, frames): blobs = [cv2.dnn.blobFromImage(frame, @@ -211,11 +217,11 @@ cdef class Inference: images.append(m) # images first, it's faster if len(images) > 0: - constants_inf.log(f'run inference on {" ".join(images)}...') + constants_inf.log(f'run inference on {" ".join(images)}...') self._process_images(cmd, ai_config, images) if len(videos) > 0: for v in videos: - constants_inf.log(f'run inference on {v}...') + constants_inf.log(f'run inference on {v}...') self._process_video(cmd, ai_config, v) @@ -223,8 +229,10 @@ cdef class Inference: cdef int frame_count = 0 cdef list batch_frames = [] cdef list[int] batch_timestamps = [] + cdef Annotation annotation self._previous_annotation = None + v_input = cv2.VideoCapture(video_name) while v_input.isOpened() and not self.stop_signal: ret, frame = v_input.read() @@ -244,8 +252,12 @@ cdef class Inference: list_detections = self.postprocess(outputs, ai_config) for i in range(len(list_detections)): detections = list_detections[i] - annotation = Annotation(video_name, batch_timestamps[i], detections) - if self.is_valid_annotation(annotation, ai_config): + + original_media_name = Path(video_name).stem.replace(" ", "") + name = f'{original_media_name}_{constants_inf.format_time(batch_timestamps[i])}' + annotation = Annotation(name, original_media_name, batch_timestamps[i], detections) + + if self.is_valid_video_annotation(annotation, ai_config): _, image = cv2.imencode('.jpg', batch_frames[i]) annotation.image = image.tobytes() self._previous_annotation = annotation @@ -256,71 +268,104 @@ cdef class Inference: v_input.release() - cpdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths): - cdef list frame_data = [] + cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths): + cdef list frame_data + self._tile_detections = {} for path in image_paths: + frame_data = [] frame = cv2.imread(path) + img_h, img_w, _ = frame.shape if frame is None: constants_inf.logerror(f'Failed to read image {path}') continue - img_h, img_w, _ = frame.shape + original_media_name = Path( path).stem.replace(" ", "") if img_h <= 1.5 * self.model_height and img_w <= 1.5 * self.model_width: - frame_data.append((frame, path)) + frame_data.append((frame, original_media_name, f'{original_media_name}_000000')) else: - (split_frames, split_pats) = self.split_to_tiles(frame, path, img_w, img_h, ai_config.big_image_tile_overlap_percent) - frame_data.extend(zip(split_frames, split_pats)) + res = self.split_to_tiles(frame, path, ai_config.big_image_tile_overlap_percent) + frame_data.extend(res) + if len(frame_data) > self.engine.get_batch_size(): + for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()): + self._process_images_inner(cmd, ai_config, chunk) for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()): self._process_images_inner(cmd, ai_config, chunk) - cpdef split_to_tiles(self, frame, path, img_w, img_h, overlap_percent): - stride_w = self.model_width * (1 - overlap_percent / 100) - stride_h = self.model_height * (1 - overlap_percent / 100) - n_tiles_x = int(np.ceil((img_w - self.model_width) / stride_w)) + 1 - n_tiles_y = int(np.ceil((img_h - self.model_height) / stride_h)) + 1 + cpdef split_to_tiles(self, frame, path, overlap_percent): + constants_inf.log(f'splitting image {path} to tiles...') + img_h, img_w, _ = frame.shape + stride_w = int(self.tile_width * (1 - overlap_percent / 100)) + stride_h = int(self.tile_height * (1 - overlap_percent / 100)) results = [] - for y_idx in range(n_tiles_y): - for x_idx in range(n_tiles_x): - y_start = y_idx * stride_w - x_start = x_idx * stride_h + original_media_name = Path( path).stem.replace(" ", "") + for y in range(0, img_h, stride_h): + for x in range(0, img_w, stride_w): + x_end = min(x + self.tile_width, img_w) + y_end = min(y + self.tile_height, img_h) - # Ensure the tile doesn't go out of bounds - y_end = min(y_start + self.model_width, img_h) - x_end = min(x_start + self.model_height, img_w) + # correct x,y for the close-to-border tiles + if x_end - x < self.tile_width: + if img_w - (x - stride_w) <= self.tile_width: + continue # the previous tile already covered the last gap + x = img_w - self.tile_width + if y_end - y < self.tile_height: + if img_h - (y - stride_h) <= self.tile_height: + continue # the previous tile already covered the last gap + y = img_h - self.tile_height - # We need to re-calculate start if we are at the edge to get a full 1280x1280 tile - if y_end == img_h: - y_start = img_h - self.model_height - if x_end == img_w: - x_start = img_w - self.model_width - - tile = frame[y_start:y_end, x_start:x_end] - name = path.stem + f'.tile_{x_start}_{y_start}' + path.suffix - results.append((tile, name)) + tile = frame[y:y_end, x:x_end] + name = f'{original_media_name}{constants_inf.SPLIT_SUFFIX}{x:04d}_{y:04d}!_000000' + results.append((tile, original_media_name, name)) return results - cpdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data): - frames = [frame for frame, _ in frame_data] + cdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data): + cdef list frames, original_media_names, names + cdef Annotation annotation + frames, original_media_names, names = map(list, zip(*frame_data)) input_blob = self.preprocess(frames) - outputs = self.engine.run(input_blob) list_detections = self.postprocess(outputs, ai_config) for i in range(len(list_detections)): - detections = list_detections[i] - annotation = Annotation(frame_data[i][1], 0, detections) - _, image = cv2.imencode('.jpg', frames[i]) - annotation.image = image.tobytes() - self.on_annotation(cmd, annotation) + annotation = Annotation(names[i], original_media_names[i], 0, list_detections[i]) + if self.is_valid_image_annotation(annotation): + _, image = cv2.imencode('.jpg', frames[i]) + annotation.image = image.tobytes() + self.on_annotation(cmd, annotation) cdef stop(self): self.stop_signal = True - cdef bint is_valid_annotation(self, Annotation annotation, AIRecognitionConfig ai_config): - # No detections, invalid + cdef remove_tiled_duplicates(self, Annotation annotation): + right = annotation.name.rindex('!') + left = annotation.name.index(constants_inf.SPLIT_SUFFIX) + len(constants_inf.SPLIT_SUFFIX) + x_str, y_str = annotation.name[left:right].split('_') + x = int(x_str) + y = int(y_str) + + for det in annotation.detections: + x1 = det.x * self.tile_width + y1 = det.y * self.tile_height + det_abs = Detection(x + x1, y + y1, det.w * self.tile_width, det.h * self.tile_height, det.cls, det.confidence) + detections = self._tile_detections.setdefault(annotation.original_media_name, []) + if det_abs in detections: + annotation.detections.remove(det) + else: + detections.append(det_abs) + + cdef bint is_valid_image_annotation(self, Annotation annotation): + if constants_inf.SPLIT_SUFFIX in annotation.name: + self.remove_tiled_duplicates(annotation) + if not annotation.detections: + return False + return True + + cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config): + if constants_inf.SPLIT_SUFFIX in annotation.name: + self.remove_tiled_duplicates(annotation) if not annotation.detections: return False diff --git a/Azaion.Inference/setup.py b/Azaion.Inference/setup.py index 9e3edcc..54901f1 100644 --- a/Azaion.Inference/setup.py +++ b/Azaion.Inference/setup.py @@ -2,15 +2,15 @@ from setuptools import setup, Extension from Cython.Build import cythonize import numpy as np -# debug_args = {} -# trace_line = False +debug_args = {} +trace_line = False -debug_args = { - 'extra_compile_args': ['-O0', '-g'], - 'extra_link_args': ['-g'], - 'define_macros': [('CYTHON_TRACE_NOGIL', '1')] -} -trace_line = True +# debug_args = { +# 'extra_compile_args': ['-O0', '-g'], +# 'extra_link_args': ['-g'], +# 'define_macros': [('CYTHON_TRACE_NOGIL', '1')] +# } +# trace_line = True extensions = [ Extension('constants_inf', ['constants_inf.pyx'], **debug_args), diff --git a/Azaion.Inference/test/test_inference.py b/Azaion.Inference/test/test_inference.py index 6407ad2..e3047d3 100644 --- a/Azaion.Inference/test/test_inference.py +++ b/Azaion.Inference/test/test_inference.py @@ -1,8 +1,30 @@ import inference from ai_config import AIRecognitionConfig -from remote_command_inf import RemoteCommand +from unittest.mock import Mock +import numpy as np + +from loader_client import LoaderClient -def test_process_images(): - inf = inference.Inference(None, None) - inf._process_images(RemoteCommand(30), AIRecognitionConfig(4, 2, 15, 0.15, 15, 0.8, 20, b'test', [], 4), ['test_img01.JPG', 'test_img02.jpg']) \ No newline at end of file +def test_split_to_tiles(): + loader_client = LoaderClient("test", 0) + ai_config = AIRecognitionConfig( + frame_period_recognition=4, + frame_recognition_seconds=2, + probability_threshold=0.2, + + tracking_distance_confidence=0.15, + tracking_probability_increase=0.15, + tracking_intersection_threshold=0.6, + big_image_tile_overlap_percent=20, + + file_data=None, + paths=[], + model_batch_size=4 + ) + inf = inference.Inference(loader_client, ai_config) + test_frame = np.zeros((6336, 8448, 3), dtype=np.uint8) + + inf.init_ai() + inf.split_to_tiles(test_frame, 'test_image.jpg', ai_config.big_image_tile_overlap_percent) + diff --git a/Azaion.Loader/hardware_service.pyx b/Azaion.Loader/hardware_service.pyx index 997ef1b..ccc6641 100644 --- a/Azaion.Loader/hardware_service.pyx +++ b/Azaion.Loader/hardware_service.pyx @@ -2,9 +2,14 @@ import os import subprocess cimport constants cdef class HardwareService: + cdef str _CACHED_HW_INFO = None @staticmethod cdef str get_hardware_info(): + global _CACHED_HW_INFO + if _CACHED_HW_INFO is not None: + return _CACHED_HW_INFO + if os.name == 'nt': # windows os_command = ( "powershell -Command \"" @@ -34,5 +39,6 @@ cdef class HardwareService: cdef str drive_serial = lines[len_lines-1] cdef str res = f'CPU: {cpu}. GPU: {gpu}. Memory: {memory}. DriveSerial: {drive_serial}' - constants.log(f'Gathered hardware: {res}') + constants.log(f'Gathered hardware: {res}') + _CACHED_HW_INFO = res return res diff --git a/Azaion.Suite/App.xaml.cs b/Azaion.Suite/App.xaml.cs index 1b62fbc..0f7b671 100644 --- a/Azaion.Suite/App.xaml.cs +++ b/Azaion.Suite/App.xaml.cs @@ -175,7 +175,9 @@ public partial class App }) .Build(); - Annotation.InitializeDirs(_host.Services.GetRequiredService>().Value); + Annotation.Init(_host.Services.GetRequiredService>().Value, + _host.Services.GetRequiredService>().Value.DetectionClassesDict); + _host.Services.GetRequiredService(); _mediator = _host.Services.GetRequiredService(); diff --git a/Azaion.Suite/config.json b/Azaion.Suite/config.json index 748dbb9..c5595a5 100644 --- a/Azaion.Suite/config.json +++ b/Azaion.Suite/config.json @@ -17,10 +17,10 @@ "DirectoriesConfig": { "ApiResourcesDirectory": "stage", "VideosDirectory": "E:\\Azaion6", - "LabelsDirectory": "E:\\labels", - "ImagesDirectory": "E:\\images", - "ResultsDirectory": "E:\\results", - "ThumbnailsDirectory": "E:\\thumbnails", + "LabelsDirectory": "E:\\labels_test", + "ImagesDirectory": "E:\\images_test", + "ResultsDirectory": "E:\\results_test", + "ThumbnailsDirectory": "E:\\thumbnails_test", "GpsSatDirectory": "satellitesDir", "GpsRouteDirectory": "routeDir" }, diff --git a/Azaion.Suite/config.system.json b/Azaion.Suite/config.system.json index c740a46..7b2dce5 100644 --- a/Azaion.Suite/config.system.json +++ b/Azaion.Suite/config.system.json @@ -29,7 +29,7 @@ "ProbabilityThreshold": 0.25, "TrackingDistanceConfidence": 0.15, - "TrackingProbabilityIncrease": 15.0, + "TrackingProbabilityIncrease": 0.15, "TrackingIntersectionThreshold": 0.6, "BigImageTileOverlapPercent": 20, diff --git a/Azaion.Test/TileProcessorTest.cs b/Azaion.Test/TileProcessorTest.cs new file mode 100644 index 0000000..3c7b98e --- /dev/null +++ b/Azaion.Test/TileProcessorTest.cs @@ -0,0 +1,263 @@ +using System.Windows; +using Azaion.Common; +using Azaion.Common.DTO; +using Azaion.Common.Services; +using Xunit; + +namespace Azaion.Annotator.Test; + + public class TileProcessorTest +{ + private const int IMAGE_SIZE = 5000; + + [Fact] + public void Split_DetectionsNearImageCorners_ShouldCreateFourTiles() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var detections = new List + { + new(10, 60, 10, 60), // Top-left corner + new(IMAGE_SIZE - 60, IMAGE_SIZE - 10, 10, 60), // Top-right corner + new(10, 60, IMAGE_SIZE - 60, IMAGE_SIZE - 10), // Bottom-left corner + new(IMAGE_SIZE - 60, IMAGE_SIZE - 10, IMAGE_SIZE - 60, IMAGE_SIZE - 10) // Bottom-right corner + }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Equal(4, results.Count); + } + + [Fact] + public void Split_DetectionsFarApartButFitInOneTile_ShouldCreateOneTile() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var detections = new List + { + new(100, 150, 100, 150), + new(1200, 1250, 1200, 1250) + }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Single(results); + Assert.Equal(2, results[0].Detections.Count); + } + + [Fact] + public void Split_DetectionsTooFarApart_ShouldCreateMultipleTiles() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var detections = new List + { + new(100, 150, 100, 150), + new(2000, 2050, 2000, 2050) // More than Constants.AI_TILE_SIZE away + }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Equal(2, results.Count); + Assert.Contains(results, r => r.Detections.Count == 1 && r.Detections.Contains(detections[0])); + Assert.Contains(results, r => r.Detections.Count == 1 && r.Detections.Contains(detections[1])); + } + + [Fact] + public void Split_ComplexScenario_ShouldCreateCorrectNumberOfTiles() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var detections = new List + { + // Group 1 (should be tiled together) + new(100, 150, 100, 150), + new(200, 250, 200, 250), + new(500, 550, 500, 550), + // Group 2 (far from group 1, should be in a separate tile) + new(3000, 3050, 3000, 3050), + new(3100, 3150, 3100, 3150), + }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Equal(2, results.Count); + var group1Tile = results.FirstOrDefault(r => r.Detections.Count == 3); + var group2Tile = results.FirstOrDefault(r => r.Detections.Count == 2); + + Assert.NotNull(group1Tile); + Assert.NotNull(group2Tile); + + Assert.Contains(detections[0], group1Tile.Detections); + Assert.Contains(detections[1], group1Tile.Detections); + Assert.Contains(detections[2], group1Tile.Detections); + + Assert.Contains(detections[3], group2Tile.Detections); + Assert.Contains(detections[4], group2Tile.Detections); + } + + [Fact] + public void Split_NoDetections_ShouldReturnEmptyList() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var detections = new List(); + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Empty(results); + } + + [Fact] + public void Split_OneDetection_ShouldCreateOneTile() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var detections = new List { new(100, 150, 100, 150) }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Single(results); + Assert.Single(results[0].Detections); + Assert.Equal(detections[0], results[0].Detections[0]); + } + + [Fact] + public void Split_DetectionsOnTileBoundary_ShouldFitInOneTile() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + // Combined width is 1270. 1270 + BORDER (10) is not > Constants.AI_TILE_SIZE (1280), so they fit. + var detections = new List + { + new(0, 50, 0, 50), + new(Constants.AI_TILE_SIZE - TileProcessor.BORDER - 50, Constants.AI_TILE_SIZE - TileProcessor.BORDER, 0, 50) + }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Single(results); + Assert.Equal(2, results[0].Detections.Count); + } + + [Fact] + public void Split_DetectionsJustOverTileBoundary_ShouldCreateTwoTiles() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + // Combined width is 1271. 1271 + BORDER (10) is > Constants.AI_TILE_SIZE (1280), so they don't fit. + var detections = new List + { + new(0, 50, 1000, 1050), // Top-most + new(Constants.AI_TILE_SIZE - TileProcessor.BORDER - 49, Constants.AI_TILE_SIZE - TileProcessor.BORDER + 1, 0, 50) + }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Equal(2, results.Count); + } + + [Fact] + public void Split_ResultingTiles_ShouldBeWithinImageBoundaries() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var detections = new List + { + new(10, 60, 10, 60), // Top-left corner + new(IMAGE_SIZE - 60, IMAGE_SIZE - 10, IMAGE_SIZE - 60, IMAGE_SIZE - 10) // Bottom-right corner + }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Equal(2, results.Count); + foreach (var result in results) + { + var tile = result.Tile; + Assert.True(tile.Left >= 0, $"Tile Left boundary {tile.Left} is out of bounds."); + Assert.True(tile.Top >= 0, $"Tile Top boundary {tile.Top} is out of bounds."); + Assert.True(tile.Right <= originalSize.Width, $"Tile Right boundary {tile.Right} is out of bounds."); + Assert.True(tile.Bottom <= originalSize.Height, $"Tile Bottom boundary {tile.Bottom} is out of bounds."); + } + } + + [Fact] + public void Split_ChainedDetections_ShouldCreateOneTile() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var detections = new List + { + new(100, 200, 100, 200), // Detection A + new(600, 700, 600, 700), // Detection B (close to A) + new(1100, 1200, 1100, 1200) // Detection C (close to B, but far from A) + }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Single(results); + Assert.Equal(3, results[0].Detections.Count); + } + + [Fact] + public void Split_SingleDetectionLargerThanTileSize_ShouldCreateOneTile() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var largeDetection = new CanvasLabel(100, 100 + Constants.AI_TILE_SIZE + 100, 100, 200); + var detections = new List { largeDetection }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Single(results); + var resultTile = results[0]; + Assert.Single(resultTile.Detections); + Assert.Equal(largeDetection, resultTile.Detections[0]); + // The tile should be at least as large as the detection it contains. + Assert.True(resultTile.Tile.Width >= largeDetection.Width); + Assert.True(resultTile.Tile.Height >= largeDetection.Height); + } + + [Fact] + public void Split_LargeDetectionWithNearbySmallDetection_ShouldCreateOneTile() + { + // Arrange + var originalSize = new Size(IMAGE_SIZE, IMAGE_SIZE); + var largeTallDetection = new CanvasLabel(100, 150, 100, 100 + Constants.AI_TILE_SIZE + 200); + var smallDetectionNearby = new CanvasLabel(largeTallDetection.Right + 15, largeTallDetection.Right + 35, 700, 720); + + var detections = new List { largeTallDetection, smallDetectionNearby }; + + // Act + var results = TileProcessor.Split(originalSize, detections, CancellationToken.None); + + // Assert + Assert.Single(results); + Assert.Equal(2, results[0].Detections.Count); + Assert.Contains(largeTallDetection, results[0].Detections); + Assert.Contains(smallDetectionNearby, results[0].Detections); + } + +} \ No newline at end of file