From 418a2116b78338d51c2647026c006df247daeb8f Mon Sep 17 00:00:00 2001 From: Alex Bezdieniezhnykh Date: Sat, 2 Nov 2024 13:09:00 +0200 Subject: [PATCH] add autodetection --- Azaion.Annotator/AIDetector.cs | 54 ---- Azaion.Annotator/App.xaml.cs | 1 + Azaion.Annotator/AutodetectDialog.xaml | 37 +++ Azaion.Annotator/AutodetectDialog.xaml.cs | 19 ++ Azaion.Annotator/Azaion.Annotator.csproj | 2 +- .../Controls/AnnotationControl.cs | 12 +- Azaion.Annotator/Controls/CanvasEditor.cs | 2 +- Azaion.Annotator/DTO/Config.cs | 22 +- Azaion.Annotator/DTO/Label.cs | 15 +- Azaion.Annotator/DTO/MediatrEvents.cs | 1 - Azaion.Annotator/DatasetExplorer.xaml.cs | 12 +- .../Extensions/PointExtensions.cs | 9 + .../Extensions/VLCFrameExtractor.cs | 103 +++---- Azaion.Annotator/GalleryManager.cs | 182 +++++++------ Azaion.Annotator/MainWindow.xaml | 7 +- Azaion.Annotator/MainWindow.xaml.cs | 252 +++++++++++++++--- Azaion.Annotator/MainWindowEventHandler.cs | 38 ++- Azaion.Annotator/YOLODetector.cs | 36 +++ Azaion.Annotator/config.json | 9 +- 19 files changed, 545 insertions(+), 268 deletions(-) delete mode 100644 Azaion.Annotator/AIDetector.cs create mode 100644 Azaion.Annotator/AutodetectDialog.xaml create mode 100644 Azaion.Annotator/AutodetectDialog.xaml.cs create mode 100644 Azaion.Annotator/Extensions/PointExtensions.cs create mode 100644 Azaion.Annotator/YOLODetector.cs diff --git a/Azaion.Annotator/AIDetector.cs b/Azaion.Annotator/AIDetector.cs deleted file mode 100644 index fabde90..0000000 --- a/Azaion.Annotator/AIDetector.cs +++ /dev/null @@ -1,54 +0,0 @@ -using System.Diagnostics; -using System.IO; -using Azaion.Annotator.DTO; -using Azaion.Annotator.Extensions; -using Compunet.YoloV8; -using LibVLCSharp.Shared; -using MediatR; - -namespace Azaion.Annotator; - -public class AIDetector(Config config, MediaPlayer mediaPlayer, VLCFrameExtractor frameExtractor) - : IRequestHandler> -{ - public async Task> Handle(AIDetectEvent request, CancellationToken cancellationToken) - { - using var predictor = new YoloPredictor(config.AIModelPath); - await frameExtractor.Start(async stream => - { - stream.Seek(0, SeekOrigin.Begin); - var sw = Stopwatch.StartNew(); - var result = await predictor.DetectAsync(stream); - sw.Stop(); - var log = string.Join("|", result.Select(det => - $"{det.Name.Id}.{det.Name.Name}: xy=({det.Bounds.X},{det.Bounds.Y}), size=({det.Bounds.Width}, {det.Bounds.Height}), Prob: {det.Confidence*100:F1}%")); - log += $". Inf time: {sw.ElapsedMilliseconds} ms"; - Console.WriteLine(log); - }); - - while (mediaPlayer.IsPlaying) - { - - try - { - // using var thumbnail = await mediaPlayer.Media.GenerateThumbnail(time: 200, - // speed: ThumbnailerSeekSpeed.Fast, - // width: 1280, - // height: resultHeight, - // crop: false, - // pictureType: PictureType.Argb) - // - // mediaPlayer.TakeSnapshot(0, TEMP_IMG, 1280, resultHeight); - } - catch (Exception e) - { - Console.WriteLine(e); - throw; - } - //var result = predictor.Detect(); - } - - return new List(); - } - -} diff --git a/Azaion.Annotator/App.xaml.cs b/Azaion.Annotator/App.xaml.cs index dd1674d..61612ad 100644 --- a/Azaion.Annotator/App.xaml.cs +++ b/Azaion.Annotator/App.xaml.cs @@ -38,6 +38,7 @@ public partial class App : Application services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); + services.AddSingleton(); services.AddMediatR(c => c.RegisterServicesFromAssembly(Assembly.GetExecutingAssembly())); services.AddSingleton(_ => new LibVLC()); services.AddSingleton(); diff --git a/Azaion.Annotator/AutodetectDialog.xaml b/Azaion.Annotator/AutodetectDialog.xaml new file mode 100644 index 0000000..0c16307 --- /dev/null +++ b/Azaion.Annotator/AutodetectDialog.xaml @@ -0,0 +1,37 @@ + + + + + + + + Йде розпізнавання... + + + + + + diff --git a/Azaion.Annotator/AutodetectDialog.xaml.cs b/Azaion.Annotator/AutodetectDialog.xaml.cs new file mode 100644 index 0000000..ecb09e3 --- /dev/null +++ b/Azaion.Annotator/AutodetectDialog.xaml.cs @@ -0,0 +1,19 @@ +using System.Windows; + +namespace Azaion.Annotator; + +public partial class AutodetectDialog : Window +{ + public AutodetectDialog() + { + InitializeComponent(); + } + + public void Log(string message) => + TextBlockLog.Text = TextBlockLog.Text + Environment.NewLine + message; + + private void ButtonBase_OnClick(object sender, RoutedEventArgs e) + { + Close(); + } +} \ No newline at end of file diff --git a/Azaion.Annotator/Azaion.Annotator.csproj b/Azaion.Annotator/Azaion.Annotator.csproj index a9f52aa..2f86b60 100644 --- a/Azaion.Annotator/Azaion.Annotator.csproj +++ b/Azaion.Annotator/Azaion.Annotator.csproj @@ -39,7 +39,7 @@ PreserveNewest - PreserveNewest + Always diff --git a/Azaion.Annotator/Controls/AnnotationControl.cs b/Azaion.Annotator/Controls/AnnotationControl.cs index 8d13940..886ea0c 100644 --- a/Azaion.Annotator/Controls/AnnotationControl.cs +++ b/Azaion.Annotator/Controls/AnnotationControl.cs @@ -4,6 +4,7 @@ using System.Windows.Input; using System.Windows.Media; using System.Windows.Shapes; using Azaion.Annotator.DTO; +using Label = System.Windows.Controls.Label; namespace Azaion.Annotator.Controls; @@ -14,7 +15,9 @@ public class AnnotationControl : Border private readonly Grid _grid; private readonly TextBlock _classNameLabel; + private readonly Label _probabilityLabel; public TimeSpan? Time { get; set; } + public double? Probability { get; set; } private AnnotationClass _annotationClass = null!; public AnnotationClass AnnotationClass @@ -41,7 +44,7 @@ public class AnnotationControl : Border } } - public AnnotationControl(AnnotationClass annotationClass, TimeSpan? time, Action resizeStart) + public AnnotationControl(AnnotationClass annotationClass, TimeSpan? time, Action resizeStart, double? probability = null) { Time = time; _resizeStart = resizeStart; @@ -54,6 +57,13 @@ public class AnnotationControl : Border FontSize = 14, Cursor = Cursors.SizeAll }; + _probabilityLabel = new Label + { + Content = probability?.ToString("F1") ?? string.Empty, + HorizontalAlignment = HorizontalAlignment.Right, + VerticalAlignment = VerticalAlignment.Top, + Margin = new Thickness(0, -15, 0, 0), + }; _selectionFrame = new Rectangle { HorizontalAlignment = HorizontalAlignment.Stretch, diff --git a/Azaion.Annotator/Controls/CanvasEditor.cs b/Azaion.Annotator/Controls/CanvasEditor.cs index 143ec9d..9046276 100644 --- a/Azaion.Annotator/Controls/CanvasEditor.cs +++ b/Azaion.Annotator/Controls/CanvasEditor.cs @@ -312,7 +312,7 @@ public class CanvasEditor : Canvas public AnnotationControl CreateAnnotation(AnnotationClass annClass, TimeSpan? time, CanvasLabel canvasLabel) { - var annotationControl = new AnnotationControl(annClass, time, AnnotationResizeStart) + var annotationControl = new AnnotationControl(annClass, time, AnnotationResizeStart, canvasLabel.Probability) { Width = canvasLabel.Width, Height = canvasLabel.Height diff --git a/Azaion.Annotator/DTO/Config.cs b/Azaion.Annotator/DTO/Config.cs index 48fa0d1..f082e2c 100644 --- a/Azaion.Annotator/DTO/Config.cs +++ b/Azaion.Annotator/DTO/Config.cs @@ -37,7 +37,15 @@ public class Config public ThumbnailConfig ThumbnailConfig { get; set; } = null!; public int? LastSelectedExplorerClass { get; set; } + public AIRecognitionConfig AIRecognitionConfig { get; set; } = null!; +} + +public class AIRecognitionConfig +{ public string AIModelPath { get; set; } = null!; + public double FrameRecognitionSeconds { get; set; } + public double TrackingDistanceConfidence { get; set; } + public double TrackingProbabilityIncrease { get; set; } } public class WindowConfig @@ -71,6 +79,11 @@ public class FileConfigRepository(ILogger logger) : IConfi private const string DEFAULT_THUMBNAILS_DIR = "thumbnails"; private const string DEFAULT_UNKNOWN_IMG_DIR = "unknown"; + private const int DEFAULT_THUMBNAIL_BORDER = 10; + private const double DEFAULT_FRAME_RECOGNITION_SECONDS = 2; + private const double TRACKING_DISTANCE_CONFIDENCE = 0.15; + private const double TRACKING_PROBABILITY_INCREASE = 15; + private static readonly Size DefaultWindowSize = new(1280, 720); private static readonly Point DefaultWindowLocation = new(100, 100); private static readonly Size DefaultThumbnailSize = new(240, 135); @@ -111,7 +124,14 @@ public class FileConfigRepository(ILogger logger) : IConfi ThumbnailConfig = new ThumbnailConfig { Size = DefaultThumbnailSize, - Border = 10 + Border = DEFAULT_THUMBNAIL_BORDER + }, + AIRecognitionConfig = new AIRecognitionConfig + { + AIModelPath = "azaion.onnx", + FrameRecognitionSeconds = DEFAULT_FRAME_RECOGNITION_SECONDS, + TrackingDistanceConfidence = TRACKING_DISTANCE_CONFIDENCE, + TrackingProbabilityIncrease = TRACKING_PROBABILITY_INCREASE } }; } diff --git a/Azaion.Annotator/DTO/Label.cs b/Azaion.Annotator/DTO/Label.cs index b3bdf8e..b14d8cf 100644 --- a/Azaion.Annotator/DTO/Label.cs +++ b/Azaion.Annotator/DTO/Label.cs @@ -9,11 +9,11 @@ public abstract class Label { [JsonProperty(PropertyName = "cl")] public int ClassNumber { get; set; } - public Label() + protected Label() { } - public Label(int classNumber) + protected Label(int classNumber) { ClassNumber = classNumber; } @@ -25,20 +25,22 @@ public class CanvasLabel : Label public double Y { get; set; } public double Width { get; set; } public double Height { get; set; } + public double? Probability { get; } public CanvasLabel() { } - public CanvasLabel(int classNumber, double x, double y, double width, double height) : base(classNumber) + public CanvasLabel(int classNumber, double x, double y, double width, double height, double? probability = null) : base(classNumber) { X = x; Y = y; Width = width; Height = height; + Probability = probability; } - public CanvasLabel(YoloLabel label, Size canvasSize, Size videoSize) + public CanvasLabel(YoloLabel label, Size canvasSize, Size videoSize, double? probability = null) { var cw = canvasSize.Width; var ch = canvasSize.Height; @@ -70,6 +72,7 @@ public class CanvasLabel : Label Width = label.Width * realWidth; Height = label.Height * ch; } + Probability = probability; } } @@ -158,10 +161,10 @@ public class YoloLabel : Label .ToList()!; } - public static async Task WriteToFile(IEnumerable labels, string filename) + public static async Task WriteToFile(IEnumerable labels, string filename, CancellationToken cancellationToken = default) { var labelsStr = string.Join(Environment.NewLine, labels.Select(x => x.ToString())); - await File.WriteAllTextAsync(filename, labelsStr); + await File.WriteAllTextAsync(filename, labelsStr, cancellationToken); } public override string ToString() => $"{ClassNumber} {CenterX:F5} {CenterY:F5} {Width:F5} {Height:F5}".Replace(',', '.'); diff --git a/Azaion.Annotator/DTO/MediatrEvents.cs b/Azaion.Annotator/DTO/MediatrEvents.cs index 775171a..b727ce2 100644 --- a/Azaion.Annotator/DTO/MediatrEvents.cs +++ b/Azaion.Annotator/DTO/MediatrEvents.cs @@ -19,4 +19,3 @@ public class VolumeChangedEvent(int volume) : INotification public int Volume { get; set; } = volume; } -public class AIDetectEvent : IRequest>; diff --git a/Azaion.Annotator/DatasetExplorer.xaml.cs b/Azaion.Annotator/DatasetExplorer.xaml.cs index 6ca0851..b22cac0 100644 --- a/Azaion.Annotator/DatasetExplorer.xaml.cs +++ b/Azaion.Annotator/DatasetExplorer.xaml.cs @@ -192,8 +192,8 @@ public partial class DatasetExplorer foreach (var ann in await YoloLabel.ReadFromFile(dto.LabelPath)) { var annClass = _config.AnnotationClassesDict[ann.ClassNumber]; - var annInfo = new CanvasLabel(ann, ExplorerEditor.RenderSize, ExplorerEditor.RenderSize); - ExplorerEditor.CreateAnnotation(annClass, time, annInfo); + var canvasLabel = new CanvasLabel(ann, ExplorerEditor.RenderSize, ExplorerEditor.RenderSize); + ExplorerEditor.CreateAnnotation(annClass, time, canvasLabel); } ThumbnailLoading = false; @@ -343,4 +343,12 @@ public partial class DatasetExplorer return null; } } + + public void AddThumbnail(ThumbnailDto thumbnailDto, IEnumerable classes) + { + var selectedClass = ((AnnotationClass?)LvClasses.SelectedItem)?.Id; + + if (selectedClass != null && (selectedClass == -1 || classes.Any(x => x == selectedClass))) + ThumbnailsDtos.Insert(0, thumbnailDto); + } } \ No newline at end of file diff --git a/Azaion.Annotator/Extensions/PointExtensions.cs b/Azaion.Annotator/Extensions/PointExtensions.cs new file mode 100644 index 0000000..3881727 --- /dev/null +++ b/Azaion.Annotator/Extensions/PointExtensions.cs @@ -0,0 +1,9 @@ +using System.Windows; + +namespace Azaion.Annotator.Extensions; + +public static class PointExtensions +{ + public static double SqrDistance(this Point p1, Point p2) => + (p2.X - p1.X) * (p2.X - p1.X) + (p2.Y - p1.Y) * (p2.Y - p1.Y); +} \ No newline at end of file diff --git a/Azaion.Annotator/Extensions/VLCFrameExtractor.cs b/Azaion.Annotator/Extensions/VLCFrameExtractor.cs index 1c61fd2..d693081 100644 --- a/Azaion.Annotator/Extensions/VLCFrameExtractor.cs +++ b/Azaion.Annotator/Extensions/VLCFrameExtractor.cs @@ -1,31 +1,27 @@ using System.Collections.Concurrent; -using System.Diagnostics; -using System.Drawing; -using System.Drawing.Imaging; using System.IO; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using Azaion.Annotator.DTO; using LibVLCSharp.Shared; -using SixLabors.ImageSharp.Drawing; using SkiaSharp; namespace Azaion.Annotator.Extensions; -public class VLCFrameExtractor(LibVLC libVLC, MainWindow mainWindow) +public class VLCFrameExtractor(LibVLC libVLC) { private const uint RGBA_BYTES = 4; - private const int PLAYBACK_RATE = 3; - private const uint DEFAULT_WIDTH = 1280; + private const int PLAYBACK_RATE = 4; private uint _pitch; // Number of bytes per "line", aligned to x32. private uint _lines; // Number of lines in the buffer, aligned to x32. private uint _width; // Thumbnail width private uint _height; // Thumbnail height - private uint _videoFPS; - private Func _frameProcessFn = null!; private MediaPlayer _mediaPlayer = null!; + private TimeSpan _lastFrameTimestamp; + private long _lastFrame; + private static uint Align32(uint size) { if (size % 32 == 0) @@ -34,76 +30,66 @@ public class VLCFrameExtractor(LibVLC libVLC, MainWindow mainWindow) } private static SKBitmap? _currentBitmap; - private static readonly ConcurrentQueue FilesToProcess = new(); + private static readonly ConcurrentQueue FramesQueue = new(); private static long _frameCounter; - public async Task Start(Func frameProcessFn) + public async IAsyncEnumerable<(TimeSpan Time, Stream Stream)> ExtractFrames(string mediaPath, + [EnumeratorCancellation] CancellationToken manualCancellationToken = default) { - _frameProcessFn = frameProcessFn; - var processingCancellationTokenSource = new CancellationTokenSource(); + var videoFinishedCancellationToken = new CancellationTokenSource(); _mediaPlayer = new MediaPlayer(libVLC); - _mediaPlayer.Stopped += (s, e) => processingCancellationTokenSource.CancelAfter(1); + _mediaPlayer.Stopped += (s, e) => videoFinishedCancellationToken.CancelAfter(1); - using var media = new Media(libVLC, ((MediaFileInfo)mainWindow.LvFiles.SelectedItem).Path); - await media.Parse(cancellationToken: processingCancellationTokenSource.Token); + using var media = new Media(libVLC, mediaPath); + await media.Parse(cancellationToken: videoFinishedCancellationToken.Token); var videoTrack = media.Tracks.FirstOrDefault(x => x.Data.Video.Width != 0); _width = videoTrack.Data.Video.Width; _height = videoTrack.Data.Video.Height; - _videoFPS = videoTrack.Data.Video.FrameRateNum; //rescaling to DEFAULT_WIDTH - _height = (uint)(DEFAULT_WIDTH * _height / (double)_width); - _width = DEFAULT_WIDTH; + //TODO: probably rescaling is not necessary, should be checked + //_width = DEFAULT_WIDTH; + //_height = (uint)(DEFAULT_WIDTH * _height / (double)_width); _pitch = Align32(_width * RGBA_BYTES); _lines = Align32(_height); + _mediaPlayer.SetRate(PLAYBACK_RATE); + + media.AddOption(":no-audio"); + _mediaPlayer.SetVideoFormat("RV32", _width, _height, _pitch); + _mediaPlayer.SetVideoCallbacks(Lock, null, Display); + _mediaPlayer.Play(media); - _mediaPlayer.SetRate(3); - - try - { - media.AddOption(":no-audio"); - _mediaPlayer.SetVideoFormat("RV32", _width, _height, _pitch); - _mediaPlayer.SetVideoCallbacks(Lock, null, Display); - await ProcessThumbnailsAsync(processingCancellationTokenSource.Token); - } - catch (Exception e) - { - Console.WriteLine(e.Message); - _mediaPlayer.Stop(); - _mediaPlayer.Dispose(); - } - } - - private async Task ProcessThumbnailsAsync(CancellationToken token) - { _frameCounter = 0; var surface = SKSurface.Create(new SKImageInfo((int) _width, (int) _height)); - while (!token.IsCancellationRequested) + var token = videoFinishedCancellationToken.Token; + + while (!(FramesQueue.IsEmpty && token.IsCancellationRequested) && !manualCancellationToken.IsCancellationRequested) { - if (FilesToProcess.TryDequeue(out var bitmap)) + if (FramesQueue.TryDequeue(out var frameInfo)) { - if (bitmap == null) + if (frameInfo.Bitmap == null) continue; - surface.Canvas.DrawBitmap(bitmap, 0, 0); // Effectively crops the original bitmap to get only the visible area + surface.Canvas.DrawBitmap(frameInfo.Bitmap, 0, 0); // Effectively crops the original bitmap to get only the visible area using var outputImage = surface.Snapshot(); using var data = outputImage.Encode(SKEncodedImageFormat.Jpeg, 85); using var ms = new MemoryStream(); data.SaveTo(ms); - if (_frameProcessFn != null) - await _frameProcessFn(ms); - Console.WriteLine($"Time: {TimeSpan.FromMilliseconds(_mediaPlayer.Time):mm\\:ss} Queue size: {FilesToProcess.Count}"); - bitmap.Dispose(); + yield return (frameInfo.Time, ms); + + Console.WriteLine($"Queue size: {FramesQueue.Count}"); + frameInfo.Bitmap?.Dispose(); } else { await Task.Delay(TimeSpan.FromSeconds(1), token); } } + _mediaPlayer.Stop(); _mediaPlayer.Dispose(); } @@ -116,12 +102,31 @@ public class VLCFrameExtractor(LibVLC libVLC, MainWindow mainWindow) private void Display(IntPtr opaque, IntPtr picture) { - if (_frameCounter % (int)(_videoFPS / 3.0) == 0) - FilesToProcess.Enqueue(_currentBitmap); + var playerTime = TimeSpan.FromMilliseconds(_mediaPlayer.Time); + if (_lastFrameTimestamp != playerTime) + { + _lastFrame = _frameCounter; + _lastFrameTimestamp = playerTime; + } + + if (_frameCounter > 20 && _frameCounter % 10 == 0) + { + var msToAdd = (_frameCounter - _lastFrame) * (_lastFrameTimestamp.TotalMilliseconds / _lastFrame); + var time = _lastFrameTimestamp.Add(TimeSpan.FromMilliseconds(msToAdd)); + FramesQueue.Enqueue(new FrameInfo(time, _currentBitmap)); + } else + { _currentBitmap?.Dispose(); + } _currentBitmap = null; _frameCounter++; } +} + +public class FrameInfo(TimeSpan time, SKBitmap? bitmap) +{ + public TimeSpan Time { get; set; } = time; + public SKBitmap? Bitmap { get; set; } = bitmap; } \ No newline at end of file diff --git a/Azaion.Annotator/GalleryManager.cs b/Azaion.Annotator/GalleryManager.cs index 30261af..769f6b4 100644 --- a/Azaion.Annotator/GalleryManager.cs +++ b/Azaion.Annotator/GalleryManager.cs @@ -124,96 +124,104 @@ public class GalleryManager : IGalleryManager public async Task CreateThumbnail(string imgPath, CancellationToken cancellationToken = default) { - var width = (int)_config.ThumbnailConfig.Size.Width; - var height = (int)_config.ThumbnailConfig.Size.Height; - - var imgName = Path.GetFileName(imgPath); - var labelName = Path.Combine(_config.LabelsDirectory, $"{Path.GetFileNameWithoutExtension(imgPath)}.txt"); - - var originalImage = Image.FromStream(new MemoryStream(await File.ReadAllBytesAsync(imgPath, cancellationToken))); - - var bitmap = new Bitmap(width, height); - - using var g = Graphics.FromImage(bitmap); - g.CompositingQuality = CompositingQuality.HighSpeed; - g.SmoothingMode = SmoothingMode.HighSpeed; - g.InterpolationMode = InterpolationMode.Default; - - var size = new Size(originalImage.Width, originalImage.Height); - if (!File.Exists(labelName)) + try { - File.Move(imgPath, Path.Combine(_config.UnknownImages, imgName)); - _logger.LogInformation($"No labels found for image {imgName}! Moved image to the {_config.UnknownImages} folder."); + var width = (int)_config.ThumbnailConfig.Size.Width; + var height = (int)_config.ThumbnailConfig.Size.Height; + + var imgName = Path.GetFileName(imgPath); + var labelName = Path.Combine(_config.LabelsDirectory, $"{Path.GetFileNameWithoutExtension(imgPath)}.txt"); + + var originalImage = Image.FromStream(new MemoryStream(await File.ReadAllBytesAsync(imgPath, cancellationToken))); + + var bitmap = new Bitmap(width, height); + + using var g = Graphics.FromImage(bitmap); + g.CompositingQuality = CompositingQuality.HighSpeed; + g.SmoothingMode = SmoothingMode.HighSpeed; + g.InterpolationMode = InterpolationMode.Default; + + var size = new Size(originalImage.Width, originalImage.Height); + if (!File.Exists(labelName)) + { + File.Move(imgPath, Path.Combine(_config.UnknownImages, imgName)); + _logger.LogInformation($"No labels found for image {imgName}! Moved image to the {_config.UnknownImages} folder."); + return null; + } + var labels = (await YoloLabel.ReadFromFile(labelName, cancellationToken)) + .Select(x => new CanvasLabel(x, size, size)) + .ToList(); + + var thumbWhRatio = width / (float)height; + var border = _config.ThumbnailConfig.Border; + + var classes = labels.Select(x => x.ClassNumber).Distinct().ToList(); + LabelsCache.TryAdd(imgName, new LabelInfo + { + Classes = classes, + ImageDateTime = File.GetCreationTimeUtc(imgPath) + }); + + var frameX = 0.0; + var frameY = 0.0; + var frameHeight = size.Height; + var frameWidth = size.Width; + + if (labels.Any()) + { + var labelsMinX = labels.Min(x => x.X); + var labelsMaxX = labels.Max(x => x.X + x.Width); + + var labelsMinY = labels.Min(x => x.Y); + var labelsMaxY = labels.Max(x => x.Y + x.Height); + + var labelsHeight = labelsMaxY - labelsMinY + 2 * border; + var labelsWidth = labelsMaxX - labelsMinX + 2 * border; + + if (labelsWidth / labelsHeight > thumbWhRatio) + { + frameWidth = labelsWidth; + frameHeight = Math.Min(labelsWidth / thumbWhRatio, size.Height); + frameX = Math.Max(0, labelsMinX - border); + frameY = Math.Max(0, 0.5 * (labelsMinY + labelsMaxY - frameHeight) - border); + } + else + { + frameHeight = labelsHeight; + frameWidth = Math.Min(labelsHeight * thumbWhRatio, size.Width); + frameY = Math.Max(0, labelsMinY - border); + frameX = Math.Max(0, 0.5 * (labelsMinX + labelsMaxX - frameWidth) - border); + } + } + + var scale = frameHeight / height; + g.DrawImage(originalImage, new Rectangle(0, 0, width, height), new RectangleF((float)frameX, (float)frameY, (float)frameWidth, (float)frameHeight), GraphicsUnit.Pixel); + + foreach (var label in labels) + { + var color = _config.AnnotationClassesDict[label.ClassNumber].Color; + var brush = new SolidBrush(Color.FromArgb(color.A, color.R, color.G, color.B)); + + var rectangle = new RectangleF((float)((label.X - frameX) / scale), (float)((label.Y - frameY) / scale), (float)(label.Width / scale), (float)(label.Height / scale)); + g.FillRectangle(brush, rectangle); + } + + var thumbnailName = Path.Combine(ThumbnailsDirectory.FullName, $"{Path.GetFileNameWithoutExtension(imgPath)}{Config.THUMBNAIL_PREFIX}.jpg"); + bitmap.Save(thumbnailName, ImageFormat.Jpeg); + + return new ThumbnailDto + { + ThumbnailPath = thumbnailName, + ImagePath = imgPath, + LabelPath = labelName, + ImageDate = File.GetCreationTimeUtc(imgPath) + }; + } + catch (Exception e) + { + _logger.LogError(e, e.Message); return null; } - var labels = (await YoloLabel.ReadFromFile(labelName, cancellationToken)) - .Select(x => new CanvasLabel(x, size, size)) - .ToList(); - - var thumbWhRatio = width / (float)height; - var border = _config.ThumbnailConfig.Border; - - var classes = labels.Select(x => x.ClassNumber).Distinct().ToList(); - LabelsCache.TryAdd(imgName, new LabelInfo - { - Classes = classes, - ImageDateTime = File.GetCreationTimeUtc(imgPath) - }); - - var frameX = 0.0; - var frameY = 0.0; - var frameHeight = size.Height; - var frameWidth = size.Width; - - if (labels.Any()) - { - var labelsMinX = labels.Min(x => x.X); - var labelsMaxX = labels.Max(x => x.X + x.Width); - - var labelsMinY = labels.Min(x => x.Y); - var labelsMaxY = labels.Max(x => x.Y + x.Height); - - var labelsHeight = labelsMaxY - labelsMinY + 2 * border; - var labelsWidth = labelsMaxX - labelsMinX + 2 * border; - - if (labelsWidth / labelsHeight > thumbWhRatio) - { - frameWidth = labelsWidth; - frameHeight = Math.Min(labelsWidth / thumbWhRatio, size.Height); - frameX = Math.Max(0, labelsMinX - border); - frameY = Math.Max(0, 0.5 * (labelsMinY + labelsMaxY - frameHeight) - border); - } - else - { - frameHeight = labelsHeight; - frameWidth = Math.Min(labelsHeight * thumbWhRatio, size.Width); - frameY = Math.Max(0, labelsMinY - border); - frameX = Math.Max(0, 0.5 * (labelsMinX + labelsMaxX - frameWidth) - border); - } - } - - var scale = frameHeight / height; - g.DrawImage(originalImage, new Rectangle(0, 0, width, height), new RectangleF((float)frameX, (float)frameY, (float)frameWidth, (float)frameHeight), GraphicsUnit.Pixel); - - foreach (var label in labels) - { - var color = _config.AnnotationClassesDict[label.ClassNumber].Color; - var brush = new SolidBrush(Color.FromArgb(color.A, color.R, color.G, color.B)); - - var rectangle = new RectangleF((float)((label.X - frameX) / scale), (float)((label.Y - frameY) / scale), (float)(label.Width / scale), (float)(label.Height / scale)); - g.FillRectangle(brush, rectangle); - } - - var thumbnailName = Path.Combine(ThumbnailsDirectory.FullName, $"{Path.GetFileNameWithoutExtension(imgPath)}{Config.THUMBNAIL_PREFIX}.jpg"); - bitmap.Save(thumbnailName, ImageFormat.Jpeg); - - return new ThumbnailDto - { - ThumbnailPath = thumbnailName, - ImagePath = imgPath, - LabelPath = labelName, - ImageDate = File.GetCreationTimeUtc(imgPath) - }; } } diff --git a/Azaion.Annotator/MainWindow.xaml b/Azaion.Annotator/MainWindow.xaml index 7580ae8..7bb400f 100644 --- a/Azaion.Annotator/MainWindow.xaml +++ b/Azaion.Annotator/MainWindow.xaml @@ -229,7 +229,7 @@ RowHeaderWidth="0" Padding="2 0 0 0" AutoGenerateColumns="False" - SelectionMode="Single" + SelectionMode="Extended" CellStyle="{DynamicResource DataGridCellStyle1}" IsReadOnly="True" CanUserResizeRows="False" @@ -267,11 +267,6 @@ - - - diff --git a/Azaion.Annotator/MainWindow.xaml.cs b/Azaion.Annotator/MainWindow.xaml.cs index c40da69..be20a06 100644 --- a/Azaion.Annotator/MainWindow.xaml.cs +++ b/Azaion.Annotator/MainWindow.xaml.cs @@ -1,20 +1,25 @@ using System.Collections.ObjectModel; using System.Diagnostics; +using System.Drawing.Imaging; using System.IO; using System.Windows; using System.Windows.Controls; using System.Windows.Controls.Primitives; using System.Windows.Input; +using System.Windows.Media; +using System.Windows.Threading; using Azaion.Annotator.DTO; using Azaion.Annotator.Extensions; using LibVLCSharp.Shared; using MediatR; using Microsoft.WindowsAPICodePack.Dialogs; using Newtonsoft.Json; -using Point = System.Windows.Point; using Size = System.Windows.Size; using IntervalTree; using Microsoft.Extensions.Logging; +using OpenTK.Graphics.OpenGL; +using Serilog; +using MediaPlayer = LibVLCSharp.Shared.MediaPlayer; namespace Azaion.Annotator; @@ -29,7 +34,9 @@ public partial class MainWindow private readonly HelpWindow _helpWindow; private readonly ILogger _logger; private readonly IGalleryManager _galleryManager; - private CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); + private readonly VLCFrameExtractor _vlcFrameExtractor; + private readonly IAIDetector _aiDetector; + private CancellationTokenSource _cancellationTokenSource = new(); private ObservableCollection AnnotationClasses { get; set; } = new(); private bool _suspendLayout; @@ -43,6 +50,7 @@ public partial class MainWindow private ObservableCollection FilteredMediaFiles { get; set; } = new(); public IntervalTree> Annotations { get; set; } = new(); + private AutodetectDialog _autoDetectDialog; public MainWindow(LibVLC libVLC, MediaPlayer mediaPlayer, IMediator mediator, @@ -51,7 +59,9 @@ public partial class MainWindow HelpWindow helpWindow, DatasetExplorer datasetExplorer, ILogger logger, - IGalleryManager galleryManager) + IGalleryManager galleryManager, + VLCFrameExtractor vlcFrameExtractor, + IAIDetector aiDetector) { InitializeComponent(); _libVLC = libVLC; @@ -64,6 +74,8 @@ public partial class MainWindow _datasetExplorer = datasetExplorer; _logger = logger; _galleryManager = galleryManager; + _vlcFrameExtractor = vlcFrameExtractor; + _aiDetector = aiDetector; VideoView.Loaded += VideoView_Loaded; Closed += OnFormClosed; @@ -189,6 +201,39 @@ public partial class MainWindow LocationChanged += async (_, _) => await SaveUserSettings(); StateChanged += async (_, _) => await SaveUserSettings(); + DgAnnotations.MouseDoubleClick += (sender, args) => + { + Editor.RemoveAllAnns(); + var dgRow = ItemsControl.ContainerFromElement((DataGrid)sender, (args.OriginalSource as DependencyObject)!) as DataGridRow; + var res = (AnnotationResult)dgRow!.Item; + _mediaPlayer.SetPause(true); + Editor.RemoveAllAnns(); + _mediaPlayer.Time = (long)res.Time.TotalMilliseconds; + ShowTimeAnnotations(res.Time); + }; + + DgAnnotations.KeyUp += (sender, args) => + { + if (args.Key != Key.Delete) + return; + + var result = MessageBox.Show("Чи дійсно видалити аннотації?","Підтвердження видалення", MessageBoxButton.OKCancel, MessageBoxImage.Question); + if (result != MessageBoxResult.OK) + return; + + var res = DgAnnotations.SelectedItems.Cast().ToList(); + foreach (var annotationResult in res) + { + var imgName = Path.GetFileNameWithoutExtension(annotationResult.Image); + var thumbnailPath = Path.Combine(_config.ThumbnailsDirectory, $"{imgName}{Config.THUMBNAIL_PREFIX}.jpg"); + File.Delete(annotationResult.Image); + File.Delete(Path.Combine(_config.LabelsDirectory, $"{imgName}.txt")); + File.Delete(thumbnailPath); + _formState.AnnotationResults.Remove(annotationResult); + Annotations.Remove(Annotations.Query(annotationResult.Time)); + } + }; + Editor.FormState = _formState; Editor.Mediator = _mediator; DgAnnotations.ItemsSource = _formState.AnnotationResults; @@ -221,14 +266,16 @@ public partial class MainWindow var annotations = Annotations.Query(time).SelectMany(x => x).ToList(); foreach (var ann in annotations) - { - var annClass = _config.AnnotationClasses[ann.ClassNumber]; - var annInfo = new CanvasLabel(ann, Editor.RenderSize, _formState.CurrentVideoSize); - Dispatcher.Invoke(() => Editor.CreateAnnotation(annClass, time, annInfo)); - } + AddAnnotationToCanvas(time, new CanvasLabel(ann, Editor.RenderSize, _formState.CurrentVideoSize)); } - public async Task ReloadAnnotations(CancellationToken cancellationToken) + private void AddAnnotationToCanvas(TimeSpan? time, CanvasLabel canvasLabel) + { + var annClass = _config.AnnotationClasses[canvasLabel.ClassNumber]; + Dispatcher.Invoke(() => Editor.CreateAnnotation(annClass, time, canvasLabel)); + } + + private async Task ReloadAnnotations(CancellationToken cancellationToken) { _formState.AnnotationResults.Clear(); Annotations.Clear(); @@ -243,14 +290,12 @@ public partial class MainWindow { var name = Path.GetFileNameWithoutExtension(file.Name); var time = _formState.GetTime(name); - await AddAnnotation(time, await YoloLabel.ReadFromFile(file.FullName, cancellationToken)); + await AddAnnotations(time, await YoloLabel.ReadFromFile(file.FullName, cancellationToken)); } } - public async Task AddAnnotation(TimeSpan? time, List annotations) + public async Task AddAnnotations(TimeSpan? time, List annotations) { - var fName = _formState.GetTimeName(time); - var timeValue = time ?? TimeSpan.FromMinutes(0); var previousAnnotations = Annotations.Query(timeValue); Annotations.Remove(previousAnnotations); @@ -269,8 +314,8 @@ public partial class MainWindow .Select(x => x.Value + 1) .FirstOrDefault(); - _formState.AnnotationResults.Insert(index, new AnnotationResult(timeValue, fName, annotations, _config)); - await File.WriteAllTextAsync($"{_config.ResultsDirectory}/{fName}.json", JsonConvert.SerializeObject(_formState.AnnotationResults)); + _formState.AnnotationResults.Insert(index, new AnnotationResult(timeValue, _formState.GetTimeName(time), annotations, _config)); + await File.WriteAllTextAsync($"{_config.ResultsDirectory}/{_formState.VideoName}.json", JsonConvert.SerializeObject(_formState.AnnotationResults)); } private void ReloadFiles() @@ -339,9 +384,19 @@ public partial class MainWindow if (mediaFileInfo == null) return; - Process.Start("explorer.exe", "/select, \"" + mediaFileInfo.Path +"\""); + Process.Start("explorer.exe", "/select,\"" + mediaFileInfo.Path +"\""); } + public void SeekTo(long timeMilliseconds) + { + _mediaPlayer.SetPause(true); + _mediaPlayer.Time = timeMilliseconds; + VideoSlider.Value = _mediaPlayer.Position * 100; + } + + private void SeekTo(TimeSpan time) => + SeekTo((long)time.TotalMilliseconds); + // private void AddClassBtnClick(object sender, RoutedEventArgs e) // { // LvClasses.IsReadOnly = false; @@ -402,32 +457,12 @@ public partial class MainWindow private void TurnOffVolume(object sender, RoutedEventArgs e) => _mediator.Publish(new PlaybackControlEvent(PlaybackControlEnum.TurnOffVolume)); private void TurnOnVolume(object sender, RoutedEventArgs e) => _mediator.Publish(new PlaybackControlEvent(PlaybackControlEnum.TurnOnVolume)); - private async void AutoDetect(object sender, RoutedEventArgs e) - { - if (LvFiles.SelectedItem == null) - return; - await _mediator.Send(new AIDetectEvent()); - } - private void OpenHelpWindowClick(object sender, RoutedEventArgs e) { _helpWindow.Show(); _helpWindow.Activate(); } - private void DgAnnotationsRowClick(object sender, MouseButtonEventArgs e) - { - DgAnnotations.MouseDoubleClick += (sender, args) => - { - Editor.RemoveAllAnns(); - var dgRow = ItemsControl.ContainerFromElement((DataGrid)sender, (args.OriginalSource as DependencyObject)!) as DataGridRow; - var res = (AnnotationResult)dgRow!.Item; - _mediaPlayer.SetPause(true); - _mediaPlayer.Time = (long)res.Time.TotalMilliseconds; // + 250; - ShowTimeAnnotations(res.Time); - }; - } - private void Thumb_OnDragCompleted(object sender, DragCompletedEventArgs e) => _ = SaveUserSettings(); private void ReloadThumbnailsItemClick(object sender, RoutedEventArgs e) @@ -445,4 +480,149 @@ public partial class MainWindow var listItem = sender as ListViewItem; LvFilesContextMenu.DataContext = listItem.DataContext; } + + private (TimeSpan Time, List<(YoloLabel Label, float Probability)> Detections)? _previousDetection; + + public void AutoDetect(object sender, RoutedEventArgs e) + { + if (LvFiles.SelectedItem == null) + return; + + _mediator.Publish(new PlaybackControlEvent(PlaybackControlEnum.Play)); + var mediaInfo = (MediaFileInfo)LvFiles.SelectedItem; + _formState.CurrentMedia = mediaInfo; + _mediaPlayer.Stop(); + var path = mediaInfo.Path; + + var manualCancellationSource = new CancellationTokenSource(); + var token = manualCancellationSource.Token; + + _autoDetectDialog = new AutodetectDialog + { + Topmost = true, + Owner = this + }; + _autoDetectDialog.Closing += (_, _) => + { + manualCancellationSource.Cancel(); + _mediaPlayer.Stop(); + }; + _autoDetectDialog.Top = Height - _autoDetectDialog.Height - 80; + + _ = Task.Run(async () => + { + using var detector = new YOLODetector(_config); + Dispatcher.Invoke(() => _autoDetectDialog.Log("Ініціалізація AI...")); + + await foreach (var timeframe in _vlcFrameExtractor.ExtractFrames(path, token)) + { + try + { + var detections = _aiDetector.Detect(timeframe.Stream); + + if (!IsValidDetection(timeframe.Time, detections)) + continue; + + await ProcessDetection(timeframe, detections, token); + } + catch (Exception ex) + { + _logger.LogError(ex, ex.Message); + await manualCancellationSource.CancelAsync(); + } + } + _autoDetectDialog.Close(); + }, token); + + + _autoDetectDialog.ShowDialog(); + Dispatcher.Invoke(() => Editor.Background = new SolidColorBrush(Color.FromArgb(1, 0, 0, 0))); + } + + private bool IsValidDetection(TimeSpan time, List<(YoloLabel Label, float Probability)> detections) + { + // No AI detection, forbid + if (detections.Count == 0) + return false; + + // Very first detection, allow + if (!_previousDetection.HasValue) + return true; + + var prev = _previousDetection.Value; + + // Time between detections is >= than Frame Recognition Seconds, allow + if (time >= prev.Time.Add(TimeSpan.FromSeconds(_config.AIRecognitionConfig.FrameRecognitionSeconds))) + return true; + + // Detection is earlier than previous + FrameRecognitionSeconds. + // Look to the detections more in detail + + // More detected objects, allow + if (detections.Count > prev.Detections.Count) + return true; + + foreach (var det in detections) + { + var point = new Point(det.Label.CenterX, det.Label.CenterY); + var closestObject = prev.Detections + .Select(p => new + { + Point = p, + Distance = point.SqrDistance(new Point(p.Label.CenterX, p.Label.CenterY)) + }) + .OrderBy(x => x.Distance) + .First(); + + // Closest object is farther than Tracking distance confidence, hence it's a different object, allow + if (closestObject.Distance > _config.AIRecognitionConfig.TrackingDistanceConfidence) + return true; + + // Since closest object within distance confidence, then it is tracking of the same object. Then if recognition probability for the object > increase from previous + if (det.Probability >= closestObject.Point.Probability + _config.AIRecognitionConfig.TrackingProbabilityIncrease) + return true; + } + + return false; + } + + private async Task ProcessDetection((TimeSpan Time, Stream Stream) timeframe, List<(YoloLabel Label, float Probability)> detections, CancellationToken token = default) + { + _previousDetection = (timeframe.Time, detections); + await Dispatcher.Invoke(async () => + { + try + { + var time = timeframe.Time; + var labels = detections.Select(x => x.Label).ToList(); + + var fName = _formState.GetTimeName(timeframe.Time); + var imgPath = Path.Combine(_config.ImagesDirectory, $"{fName}.jpg"); + var img = System.Drawing.Image.FromStream(timeframe.Stream); + img.Save(imgPath, ImageFormat.Jpeg); + await YoloLabel.WriteToFile(labels, Path.Combine(_config.LabelsDirectory, $"{fName}.txt"), token); + + Editor.Background = new ImageBrush { ImageSource = await imgPath.OpenImage() }; + Editor.RemoveAllAnns(); + foreach (var (label, probability) in detections) + AddAnnotationToCanvas(time, new CanvasLabel(label, Editor.RenderSize, Editor.RenderSize, probability)); + await AddAnnotations(timeframe.Time, labels); + + var log = string.Join(Environment.NewLine, detections.Select(det => + $"{_config.AnnotationClassesDict[det.Label.ClassNumber].Name}: " + + $"xy=({det.Label.CenterX:F2},{det.Label.CenterY:F2}), " + + $"size=({det.Label.Width:F2}, {det.Label.Height:F2}), " + + $"prob: {det.Probability:F1}%")); + Dispatcher.Invoke(() => _autoDetectDialog.Log(log)); + + var thumbnailDto = await _galleryManager.CreateThumbnail(imgPath, token); + if (thumbnailDto != null) + _datasetExplorer.AddThumbnail(thumbnailDto, labels.Select(x => x.ClassNumber)); + } + catch (Exception e) + { + _logger.LogError(e, e.Message); + } + }); + } } diff --git a/Azaion.Annotator/MainWindowEventHandler.cs b/Azaion.Annotator/MainWindowEventHandler.cs index e4e8691..29f5b49 100644 --- a/Azaion.Annotator/MainWindowEventHandler.cs +++ b/Azaion.Annotator/MainWindowEventHandler.cs @@ -1,5 +1,6 @@ using System.IO; using System.Windows; +using System.Windows.Controls; using System.Windows.Input; using Azaion.Annotator.DTO; using LibVLCSharp.Shared; @@ -95,7 +96,7 @@ public class MainWindowEventHandler : await ControlPlayback(value); if (key == Key.A) - await _mediator.Send( new AIDetectEvent(), cancellationToken); + _mainWindow.AutoDetect(null!, null!); await VolumeControl(key); } @@ -141,7 +142,7 @@ public class MainWindowEventHandler : switch (controlEnum) { case PlaybackControlEnum.Play: - await Play(); + Play(); break; case PlaybackControlEnum.Pause: _mediaPlayer.Pause(); @@ -152,19 +153,16 @@ public class MainWindowEventHandler : _mediaPlayer.Stop(); break; case PlaybackControlEnum.PreviousFrame: - _mediaPlayer.SetPause(true); - _mediaPlayer.Time -= step; - _mainWindow.VideoSlider.Value = _mediaPlayer.Position * 100; + _mainWindow.SeekTo(_mediaPlayer.Time - step); break; case PlaybackControlEnum.NextFrame: - _mediaPlayer.SetPause(true); - _mediaPlayer.Time += step; - _mainWindow.VideoSlider.Value = _mediaPlayer.Position * 100; + _mainWindow.SeekTo(_mediaPlayer.Time + step); break; case PlaybackControlEnum.SaveAnnotations: await SaveAnnotations(); break; case PlaybackControlEnum.RemoveSelectedAnns: + _mainWindow.Editor.RemoveSelectedAnns(); break; case PlaybackControlEnum.RemoveAllAnns: @@ -182,10 +180,10 @@ public class MainWindowEventHandler : _mediaPlayer.Volume = 0; break; case PlaybackControlEnum.Previous: - await NextMedia(isPrevious: true); + NextMedia(isPrevious: true); break; case PlaybackControlEnum.Next: - await NextMedia(); + NextMedia(); break; case PlaybackControlEnum.None: break; @@ -195,12 +193,12 @@ public class MainWindowEventHandler : } catch (Exception e) { - Console.WriteLine(e); + _logger.LogError(e, e.Message); throw; } } - private async Task NextMedia(bool isPrevious = false) + private void NextMedia(bool isPrevious = false) { var increment = isPrevious ? -1 : 1; var check = isPrevious ? -1 : _mainWindow.LvFiles.Items.Count; @@ -208,7 +206,7 @@ public class MainWindowEventHandler : return; _mainWindow.LvFiles.SelectedIndex += increment; - await Play(); + Play(); } public async Task Handle(VolumeChangedEvent notification, CancellationToken cancellationToken) @@ -223,7 +221,7 @@ public class MainWindowEventHandler : _mediaPlayer.Volume = volume; } - private async Task Play() + private void Play() { if (_mainWindow.LvFiles.SelectedItem == null) return; @@ -242,7 +240,7 @@ public class MainWindowEventHandler : return; var time = TimeSpan.FromMilliseconds(_mediaPlayer.Time); - var fName = _formState.GetTimeName(time); + var fName = _formState.GetTimeName(time); var currentAnns = _mainWindow.Editor.CurrentAnns .Select(x => new YoloLabel(x.Info, _mainWindow.Editor.RenderSize, _formState.CurrentVideoSize)) @@ -252,7 +250,7 @@ public class MainWindowEventHandler : var resultHeight = (uint)Math.Round(RESULT_WIDTH / _formState.CurrentVideoSize.Width * _formState.CurrentVideoSize.Height); - await _mainWindow.AddAnnotation(time, currentAnns); + await _mainWindow.AddAnnotations(time, currentAnns); _formState.CurrentMedia.HasAnnotations = _mainWindow.Annotations.Count != 0; _mainWindow.LvFiles.Items.Refresh(); @@ -269,13 +267,11 @@ public class MainWindowEventHandler : else { File.Copy(_formState.CurrentMedia.Path, destinationPath, overwrite: true); - await NextMedia(); + NextMedia(); } var thumbnailDto = await _galleryManager.CreateThumbnail(destinationPath); - var selectedClass = ((AnnotationClass?)_datasetExplorer.LvClasses.SelectedItem)?.Id; - - if (selectedClass != null && (selectedClass == -1 || currentAnns.Any(x => x.ClassNumber == selectedClass))) - _datasetExplorer.ThumbnailsDtos.Insert(0, thumbnailDto); + if (thumbnailDto != null) + _datasetExplorer.AddThumbnail(thumbnailDto, currentAnns.Select(x => x.ClassNumber)); } } \ No newline at end of file diff --git a/Azaion.Annotator/YOLODetector.cs b/Azaion.Annotator/YOLODetector.cs new file mode 100644 index 0000000..33eb217 --- /dev/null +++ b/Azaion.Annotator/YOLODetector.cs @@ -0,0 +1,36 @@ +using System.Drawing.Imaging; +using System.IO; +using Azaion.Annotator.DTO; +using Compunet.YoloV8; +using SixLabors.ImageSharp; +using SixLabors.ImageSharp.Formats.Jpeg; +using SixLabors.ImageSharp.PixelFormats; + +namespace Azaion.Annotator; + +public interface IAIDetector +{ + List<(YoloLabel Label, float Probability)> Detect(Stream stream); +} + +public class YOLODetector(Config config) : IAIDetector, IDisposable +{ + private readonly YoloPredictor _predictor = new(config.AIRecognitionConfig.AIModelPath); + + public List<(YoloLabel Label, float Probability)> Detect(Stream stream) + { + stream.Seek(0, SeekOrigin.Begin); + var image = Image.Load(stream); + var result = _predictor.Detect(image); + + var imageSize = new System.Windows.Size(image.Width, image.Height); + + return result.Select(d => + { + var label = new YoloLabel(new CanvasLabel(d.Name.Id, d.Bounds.X, d.Bounds.Y, d.Bounds.Width, d.Bounds.Height), imageSize, imageSize); + return (label, d.Confidence * 100); + }).ToList(); + } + + public void Dispose() => _predictor.Dispose(); +} diff --git a/Azaion.Annotator/config.json b/Azaion.Annotator/config.json index 78f538d..bd50cc6 100644 --- a/Azaion.Annotator/config.json +++ b/Azaion.Annotator/config.json @@ -1,5 +1,5 @@ { - "VideosDirectory": "E:\\Azaion3\\Videos", + "VideosDirectory": "E:\\Azaion3\\VideosTest", "LabelsDirectory": "E:\\labels", "ImagesDirectory": "E:\\images", "ThumbnailsDirectory": "E:\\thumbnails", @@ -36,5 +36,10 @@ "ShowHelpOnStart": false, "VideoFormats": ["mov", "mp4"], "ImageFormats": ["jpg", "jpeg", "png", "bmp", "gif"], - "AIModelPath": "D:\\dev\\azaion\\azaion_2024-09-19.onnx" + "AIRecognitionConfig": { + "AIModelPath": "azaion.onnx", + "FrameRecognitionSeconds": 2, + "TrackingDistanceConfidence": 0.15, + "TrackingProbabilityIncrease": 15 + } } \ No newline at end of file