mirror of
https://github.com/azaion/annotations.git
synced 2026-04-22 11:16:30 +00:00
add ramdisk, load AI model to ramdisk and start recognition from it
rewrite zmq to DEALER and ROUTER add GET_USER command to get CurrentUser from Python all auth is on the python side inference run and validate annotations on python
This commit is contained in:
@@ -480,7 +480,7 @@
|
||||
Grid.Column="10"
|
||||
Padding="2" Width="25"
|
||||
Height="25"
|
||||
ToolTip="Розпізнати за допомогою AI. Клавіша: [A]" Background="Black" BorderBrush="Black"
|
||||
ToolTip="Розпізнати за допомогою AI. Клавіша: [R]" Background="Black" BorderBrush="Black"
|
||||
Click="AutoDetect">
|
||||
<Path Stretch="Fill" Fill="LightGray" Data="M144.317 85.269h223.368c15.381 0 29.391 6.325 39.567 16.494l.025-.024c10.163 10.164 16.477 24.193 16.477
|
||||
39.599v189.728c0 15.401-6.326 29.425-16.485 39.584-10.159 10.159-24.183 16.484-39.584 16.484H144.317c-15.4
|
||||
|
||||
+148
-154
@@ -6,9 +6,7 @@ using System.Windows.Controls;
|
||||
using System.Windows.Controls.Primitives;
|
||||
using System.Windows.Input;
|
||||
using System.Windows.Media;
|
||||
using System.Windows.Media.Imaging;
|
||||
using Azaion.Annotator.DTO;
|
||||
using Azaion.Annotator.Extensions;
|
||||
using Azaion.Common.Database;
|
||||
using Azaion.Common.DTO;
|
||||
using Azaion.Common.DTO.Config;
|
||||
@@ -39,10 +37,9 @@ public partial class Annotator
|
||||
private readonly IConfigUpdater _configUpdater;
|
||||
private readonly HelpWindow _helpWindow;
|
||||
private readonly ILogger<Annotator> _logger;
|
||||
private readonly VLCFrameExtractor _vlcFrameExtractor;
|
||||
private readonly IAIDetector _aiDetector;
|
||||
private readonly AnnotationService _annotationService;
|
||||
private readonly IDbFactory _dbFactory;
|
||||
private readonly IInferenceService _inferenceService;
|
||||
private readonly CancellationTokenSource _ctSource = new();
|
||||
|
||||
private ObservableCollection<DetectionClass> AnnotationClasses { get; set; } = new();
|
||||
@@ -67,10 +64,9 @@ public partial class Annotator
|
||||
FormState formState,
|
||||
HelpWindow helpWindow,
|
||||
ILogger<Annotator> logger,
|
||||
VLCFrameExtractor vlcFrameExtractor,
|
||||
IAIDetector aiDetector,
|
||||
AnnotationService annotationService,
|
||||
IDbFactory dbFactory)
|
||||
IDbFactory dbFactory,
|
||||
IInferenceService inferenceService)
|
||||
{
|
||||
InitializeComponent();
|
||||
_appConfig = appConfig.Value;
|
||||
@@ -81,10 +77,9 @@ public partial class Annotator
|
||||
_formState = formState;
|
||||
_helpWindow = helpWindow;
|
||||
_logger = logger;
|
||||
_vlcFrameExtractor = vlcFrameExtractor;
|
||||
_aiDetector = aiDetector;
|
||||
_annotationService = annotationService;
|
||||
_dbFactory = dbFactory;
|
||||
_inferenceService = inferenceService;
|
||||
|
||||
Loaded += OnLoaded;
|
||||
Closed += OnFormClosed;
|
||||
@@ -304,11 +299,16 @@ public partial class Annotator
|
||||
|
||||
var annotations = await _dbFactory.Run(async db =>
|
||||
await db.Annotations.LoadWith(x => x.Detections)
|
||||
.Where(x => x.Name.Contains(_formState.VideoName))
|
||||
.Where(x => x.OriginalMediaName == _formState.VideoName)
|
||||
.ToListAsync(token: _ctSource.Token));
|
||||
|
||||
TimedAnnotations.Clear();
|
||||
_formState.AnnotationResults.Clear();
|
||||
foreach (var ann in annotations)
|
||||
AddAnnotation(ann);
|
||||
{
|
||||
TimedAnnotations.Add(ann.Time.Subtract(_thresholdBefore), ann.Time.Add(_thresholdAfter), ann);
|
||||
_formState.AnnotationResults.Add(new AnnotationResult(_appConfig.AnnotationConfig.DetectionClassesDict, ann));
|
||||
}
|
||||
}
|
||||
|
||||
//Add manually
|
||||
@@ -435,8 +435,6 @@ public partial class Annotator
|
||||
|
||||
_appConfig.DirectoriesConfig.VideosDirectory = dlg.FileName;
|
||||
TbFolder.Text = dlg.FileName;
|
||||
await ReloadFiles();
|
||||
await SaveUserSettings();
|
||||
}
|
||||
|
||||
private void TbFilter_OnTextChanged(object sender, TextChangedEventArgs e)
|
||||
@@ -487,11 +485,8 @@ public partial class Annotator
|
||||
if (LvFiles.SelectedIndex == -1)
|
||||
LvFiles.SelectedIndex = 0;
|
||||
|
||||
await _mediator.Publish(new AnnotatorControlEvent(PlaybackControlEnum.Play));
|
||||
_mediaPlayer.Stop();
|
||||
|
||||
var manualCancellationSource = new CancellationTokenSource();
|
||||
var token = manualCancellationSource.Token;
|
||||
var mct = new CancellationTokenSource();
|
||||
var token = mct.Token;
|
||||
|
||||
_autoDetectDialog = new AutodetectDialog
|
||||
{
|
||||
@@ -500,7 +495,7 @@ public partial class Annotator
|
||||
};
|
||||
_autoDetectDialog.Closing += (_, _) =>
|
||||
{
|
||||
manualCancellationSource.Cancel();
|
||||
mct.Cancel();
|
||||
_mediaPlayer.SeekTo(TimeSpan.Zero);
|
||||
Editor.RemoveAllAnns();
|
||||
};
|
||||
@@ -515,16 +510,17 @@ public partial class Annotator
|
||||
var mediaInfo = Dispatcher.Invoke(() => (MediaFileInfo)LvFiles.SelectedItem);
|
||||
while (mediaInfo != null)
|
||||
{
|
||||
_formState.CurrentMedia = mediaInfo;
|
||||
await Dispatcher.Invoke(async () => await ReloadAnnotations());
|
||||
|
||||
if (mediaInfo.MediaType == MediaTypes.Image)
|
||||
await Dispatcher.Invoke(async () =>
|
||||
{
|
||||
await DetectImage(mediaInfo, manualCancellationSource, token);
|
||||
await Task.Delay(70, token);
|
||||
}
|
||||
else
|
||||
await DetectVideo(mediaInfo, manualCancellationSource, token);
|
||||
await _mediator.Publish(new AnnotatorControlEvent(PlaybackControlEnum.Play), token);
|
||||
await ReloadAnnotations();
|
||||
});
|
||||
|
||||
await _inferenceService.RunInference(mediaInfo.Path, async (annotationImage, ct) =>
|
||||
{
|
||||
annotationImage.OriginalMediaName = mediaInfo.FName;
|
||||
await ProcessDetection(annotationImage, ct);
|
||||
}, token);
|
||||
|
||||
mediaInfo = Dispatcher.Invoke(() =>
|
||||
{
|
||||
@@ -533,6 +529,7 @@ public partial class Annotator
|
||||
LvFiles.SelectedIndex += 1;
|
||||
return (MediaFileInfo)LvFiles.SelectedItem;
|
||||
});
|
||||
LvFiles.Items.Refresh();
|
||||
}
|
||||
Dispatcher.Invoke(() =>
|
||||
{
|
||||
@@ -546,143 +543,140 @@ public partial class Annotator
|
||||
Dispatcher.Invoke(() => Editor.ResetBackground());
|
||||
}
|
||||
|
||||
private async Task DetectImage(MediaFileInfo mediaInfo, CancellationTokenSource manualCancellationSource, CancellationToken token)
|
||||
// private async Task DetectImage(MediaFileInfo mediaInfo, CancellationTokenSource manualCancellationSource, CancellationToken token)
|
||||
// {
|
||||
// try
|
||||
// {
|
||||
// var fName = Path.GetFileNameWithoutExtension(mediaInfo.Path);
|
||||
// var stream = new FileStream(mediaInfo.Path, FileMode.Open);
|
||||
// var detections = await _aiDetector.Detect(fName, stream, token);
|
||||
// await ProcessDetection((TimeSpan.FromMilliseconds(0), stream), Path.GetExtension(mediaInfo.Path), detections, token);
|
||||
// if (detections.Count != 0)
|
||||
// mediaInfo.HasAnnotations = true;
|
||||
// }
|
||||
// catch (Exception e)
|
||||
// {
|
||||
// _logger.LogError(e, e.Message);
|
||||
// await manualCancellationSource.CancelAsync();
|
||||
// }
|
||||
// }
|
||||
|
||||
// private async Task DetectVideo(MediaFileInfo mediaInfo, CancellationTokenSource manualCancellationSource, CancellationToken token)
|
||||
// {
|
||||
// var prevSeekTime = 0.0;
|
||||
// await foreach (var timeframe in _vlcFrameExtractor.ExtractFrames(mediaInfo.Path, token))
|
||||
// {
|
||||
// Console.WriteLine($"Detect time: {timeframe.Time}");
|
||||
// try
|
||||
// {
|
||||
// var fName = _formState.GetTimeName(timeframe.Time);
|
||||
// var detections = await _aiDetector.Detect(fName, timeframe.Stream, token);
|
||||
//
|
||||
// var isValid = IsValidDetection(timeframe.Time, detections);
|
||||
// Console.WriteLine($"Detection time: {timeframe.Time}");
|
||||
//
|
||||
// var log = string.Join(Environment.NewLine, detections.Select(det =>
|
||||
// $"{_appConfig.AnnotationConfig.DetectionClassesDict[det.ClassNumber].Name}: " +
|
||||
// $"xy=({det.CenterX:F2},{det.CenterY:F2}), " +
|
||||
// $"size=({det.Width:F2}, {det.Height:F2}), " +
|
||||
// $"prob: {det.Probability:F1}%"));
|
||||
//
|
||||
// log = $"Detection time: {timeframe.Time}, Valid: {isValid}. {Environment.NewLine} {log}";
|
||||
// Dispatcher.Invoke(() => _autoDetectDialog.Log(log));
|
||||
//
|
||||
// if (timeframe.Time.TotalMilliseconds > prevSeekTime + 250)
|
||||
// {
|
||||
// Dispatcher.Invoke(() => SeekTo(timeframe.Time));
|
||||
// prevSeekTime = timeframe.Time.TotalMilliseconds;
|
||||
// if (!isValid) //Show frame anyway
|
||||
// {
|
||||
// Dispatcher.Invoke(() =>
|
||||
// {
|
||||
// Editor.RemoveAllAnns();
|
||||
// Editor.Background = new ImageBrush
|
||||
// {
|
||||
// ImageSource = timeframe.Stream.OpenImage()
|
||||
// };
|
||||
// });
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (!isValid)
|
||||
// continue;
|
||||
//
|
||||
// mediaInfo.HasAnnotations = true;
|
||||
// await ProcessDetection(timeframe, ".jpg", detections, token);
|
||||
// await timeframe.Stream.DisposeAsync();
|
||||
// }
|
||||
// catch (Exception ex)
|
||||
// {
|
||||
// _logger.LogError(ex, ex.Message);
|
||||
// await manualCancellationSource.CancelAsync();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// private bool IsValidDetection(TimeSpan time, List<Detection> detections)
|
||||
// {
|
||||
// // No AI detection, forbid
|
||||
// if (detections.Count == 0)
|
||||
// return false;
|
||||
//
|
||||
// // Very first detection, allow
|
||||
// if (!_previousDetection.HasValue)
|
||||
// return true;
|
||||
//
|
||||
// var prev = _previousDetection.Value;
|
||||
//
|
||||
// // Time between detections is >= than Frame Recognition Seconds, allow
|
||||
// if (time >= prev.Time.Add(TimeSpan.FromSeconds(_appConfig.AIRecognitionConfig.FrameRecognitionSeconds)))
|
||||
// return true;
|
||||
//
|
||||
// // Detection is earlier than previous + FrameRecognitionSeconds.
|
||||
// // Look to the detections more in detail
|
||||
//
|
||||
// // More detected objects, allow
|
||||
// if (detections.Count > prev.Detections.Count)
|
||||
// return true;
|
||||
//
|
||||
// foreach (var det in detections)
|
||||
// {
|
||||
// var point = new Point(det.CenterX, det.CenterY);
|
||||
// var closestObject = prev.Detections
|
||||
// .Select(p => new
|
||||
// {
|
||||
// Point = p,
|
||||
// Distance = point.SqrDistance(new Point(p.CenterX, p.CenterY))
|
||||
// })
|
||||
// .OrderBy(x => x.Distance)
|
||||
// .First();
|
||||
//
|
||||
// // Closest object is farther than Tracking distance confidence, hence it's a different object, allow
|
||||
// if (closestObject.Distance > _appConfig.AIRecognitionConfig.TrackingDistanceConfidence)
|
||||
// return true;
|
||||
//
|
||||
// // Since closest object within distance confidence, then it is tracking of the same object. Then if recognition probability for the object > increase from previous
|
||||
// if (det.Probability >= closestObject.Point.Probability + _appConfig.AIRecognitionConfig.TrackingProbabilityIncrease)
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// return false;
|
||||
// }
|
||||
|
||||
private async Task ProcessDetection(AnnotationImage annotationImage, CancellationToken token = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var fName = Path.GetFileNameWithoutExtension(mediaInfo.Path);
|
||||
var stream = new FileStream(mediaInfo.Path, FileMode.Open);
|
||||
var detections = await _aiDetector.Detect(fName, stream, token);
|
||||
await ProcessDetection((TimeSpan.FromMilliseconds(0), stream), Path.GetExtension(mediaInfo.Path), detections, token);
|
||||
if (detections.Count != 0)
|
||||
mediaInfo.HasAnnotations = true;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
_logger.LogError(e, e.Message);
|
||||
await manualCancellationSource.CancelAsync();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DetectVideo(MediaFileInfo mediaInfo, CancellationTokenSource manualCancellationSource, CancellationToken token)
|
||||
{
|
||||
var prevSeekTime = 0.0;
|
||||
await foreach (var timeframe in _vlcFrameExtractor.ExtractFrames(mediaInfo.Path, token))
|
||||
{
|
||||
Console.WriteLine($"Detect time: {timeframe.Time}");
|
||||
try
|
||||
{
|
||||
var fName = _formState.GetTimeName(timeframe.Time);
|
||||
var detections = await _aiDetector.Detect(fName, timeframe.Stream, token);
|
||||
|
||||
var isValid = IsValidDetection(timeframe.Time, detections);
|
||||
Console.WriteLine($"Detection time: {timeframe.Time}");
|
||||
|
||||
var log = string.Join(Environment.NewLine, detections.Select(det =>
|
||||
$"{_appConfig.AnnotationConfig.DetectionClassesDict[det.ClassNumber].Name}: " +
|
||||
$"xy=({det.CenterX:F2},{det.CenterY:F2}), " +
|
||||
$"size=({det.Width:F2}, {det.Height:F2}), " +
|
||||
$"prob: {det.Probability:F1}%"));
|
||||
|
||||
log = $"Detection time: {timeframe.Time}, Valid: {isValid}. {Environment.NewLine} {log}";
|
||||
Dispatcher.Invoke(() => _autoDetectDialog.Log(log));
|
||||
|
||||
if (timeframe.Time.TotalMilliseconds > prevSeekTime + 250)
|
||||
{
|
||||
Dispatcher.Invoke(() => SeekTo(timeframe.Time));
|
||||
prevSeekTime = timeframe.Time.TotalMilliseconds;
|
||||
if (!isValid) //Show frame anyway
|
||||
{
|
||||
Dispatcher.Invoke(() =>
|
||||
{
|
||||
Editor.RemoveAllAnns();
|
||||
Editor.Background = new ImageBrush
|
||||
{
|
||||
ImageSource = timeframe.Stream.OpenImage()
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (!isValid)
|
||||
continue;
|
||||
|
||||
mediaInfo.HasAnnotations = true;
|
||||
await ProcessDetection(timeframe, ".jpg", detections, token);
|
||||
await timeframe.Stream.DisposeAsync();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, ex.Message);
|
||||
await manualCancellationSource.CancelAsync();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private bool IsValidDetection(TimeSpan time, List<Detection> detections)
|
||||
{
|
||||
// No AI detection, forbid
|
||||
if (detections.Count == 0)
|
||||
return false;
|
||||
|
||||
// Very first detection, allow
|
||||
if (!_previousDetection.HasValue)
|
||||
return true;
|
||||
|
||||
var prev = _previousDetection.Value;
|
||||
|
||||
// Time between detections is >= than Frame Recognition Seconds, allow
|
||||
if (time >= prev.Time.Add(TimeSpan.FromSeconds(_appConfig.AIRecognitionConfig.FrameRecognitionSeconds)))
|
||||
return true;
|
||||
|
||||
// Detection is earlier than previous + FrameRecognitionSeconds.
|
||||
// Look to the detections more in detail
|
||||
|
||||
// More detected objects, allow
|
||||
if (detections.Count > prev.Detections.Count)
|
||||
return true;
|
||||
|
||||
foreach (var det in detections)
|
||||
{
|
||||
var point = new Point(det.CenterX, det.CenterY);
|
||||
var closestObject = prev.Detections
|
||||
.Select(p => new
|
||||
{
|
||||
Point = p,
|
||||
Distance = point.SqrDistance(new Point(p.CenterX, p.CenterY))
|
||||
})
|
||||
.OrderBy(x => x.Distance)
|
||||
.First();
|
||||
|
||||
// Closest object is farther than Tracking distance confidence, hence it's a different object, allow
|
||||
if (closestObject.Distance > _appConfig.AIRecognitionConfig.TrackingDistanceConfidence)
|
||||
return true;
|
||||
|
||||
// Since closest object within distance confidence, then it is tracking of the same object. Then if recognition probability for the object > increase from previous
|
||||
if (det.Probability >= closestObject.Point.Probability + _appConfig.AIRecognitionConfig.TrackingProbabilityIncrease)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private async Task ProcessDetection((TimeSpan Time, Stream Stream) timeframe, string imageExtension, List<Detection> detections, CancellationToken token = default)
|
||||
{
|
||||
_previousDetection = (timeframe.Time, detections);
|
||||
await Dispatcher.Invoke(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
var fName = _formState.GetTimeName(timeframe.Time);
|
||||
|
||||
var annotation = await _annotationService.SaveAnnotation(fName, imageExtension, detections, SourceEnum.AI, timeframe.Stream, token);
|
||||
var annotation = await _annotationService.SaveAnnotation(annotationImage, token);
|
||||
|
||||
Editor.Background = new ImageBrush { ImageSource = await annotation.ImagePath.OpenImage() };
|
||||
Editor.RemoveAllAnns();
|
||||
ShowAnnotations(annotation, true);
|
||||
AddAnnotation(annotation);
|
||||
|
||||
var log = string.Join(Environment.NewLine, detections.Select(det =>
|
||||
var log = string.Join(Environment.NewLine, annotation.Detections.Select(det =>
|
||||
$"{_appConfig.AnnotationConfig.DetectionClassesDict[det.ClassNumber].Name}: " +
|
||||
$"xy=({det.CenterX:F2},{det.CenterY:F2}), " +
|
||||
$"size=({det.Width:F2}, {det.Height:F2}), " +
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
using System.Windows;
|
||||
using System.Windows.Input;
|
||||
using Azaion.Annotator.DTO;
|
||||
using Azaion.Common;
|
||||
using Azaion.Common.DTO;
|
||||
using Azaion.Common.DTO.Config;
|
||||
using Azaion.Common.DTO.Queue;
|
||||
using Azaion.Common.Events;
|
||||
using Azaion.Common.Extensions;
|
||||
using Azaion.Common.Services;
|
||||
using LibVLCSharp.Shared;
|
||||
using MediatR;
|
||||
@@ -79,7 +79,7 @@ public class AnnotatorEventHandler(
|
||||
if (_keysControlEnumDict.TryGetValue(key, out var value))
|
||||
await ControlPlayback(value, cancellationToken);
|
||||
|
||||
if (key == Key.A)
|
||||
if (key == Key.R)
|
||||
mainWindow.AutoDetect(null!, null!);
|
||||
|
||||
#region Volume
|
||||
@@ -228,7 +228,7 @@ public class AnnotatorEventHandler(
|
||||
return;
|
||||
|
||||
var time = formState.BackgroundTime ?? TimeSpan.FromMilliseconds(mediaPlayer.Time);
|
||||
var fName = formState.GetTimeName(time);
|
||||
var fName = formState.VideoName.ToTimeName(time);
|
||||
|
||||
var currentDetections = mainWindow.Editor.CurrentDetections
|
||||
.Select(x => new Detection(fName, x.GetLabel(mainWindow.Editor.RenderSize, formState.BackgroundTime.HasValue ? mainWindow.Editor.RenderSize : formState.CurrentVideoSize)))
|
||||
@@ -267,7 +267,7 @@ public class AnnotatorEventHandler(
|
||||
File.Copy(formState.CurrentMedia.Path, imgPath, overwrite: true);
|
||||
NextMedia();
|
||||
}
|
||||
var annotation = await annotationService.SaveAnnotation(fName, imageExtension, currentDetections, SourceEnum.Manual, token: cancellationToken);
|
||||
var annotation = await annotationService.SaveAnnotation(formState.VideoName, time, imageExtension, currentDetections, SourceEnum.Manual, token: cancellationToken);
|
||||
mainWindow.AddAnnotation(annotation);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
using System.Windows;
|
||||
|
||||
namespace Azaion.Annotator.Extensions;
|
||||
|
||||
public static class PointExtensions
|
||||
{
|
||||
public static double SqrDistance(this Point p1, Point p2) =>
|
||||
(p2.X - p1.X) * (p2.X - p1.X) + (p2.Y - p1.Y) * (p2.Y - p1.Y);
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using Azaion.Common.DTO.Config;
|
||||
using LibVLCSharp.Shared;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SkiaSharp;
|
||||
|
||||
namespace Azaion.Annotator.Extensions;
|
||||
|
||||
public class VLCFrameExtractor(LibVLC libVLC, IOptions<AIRecognitionConfig> config)
|
||||
{
|
||||
private const uint RGBA_BYTES = 4;
|
||||
private const int PLAYBACK_RATE = 4;
|
||||
|
||||
private uint _pitch; // Number of bytes per "line", aligned to x32.
|
||||
private uint _lines; // Number of lines in the buffer, aligned to x32.
|
||||
private uint _width; // Thumbnail width
|
||||
private uint _height; // Thumbnail height
|
||||
|
||||
private MediaPlayer _mediaPlayer = null!;
|
||||
|
||||
private TimeSpan _lastFrameTimestamp;
|
||||
private long _lastFrame;
|
||||
|
||||
private static uint Align32(uint size)
|
||||
{
|
||||
if (size % 32 == 0)
|
||||
return size;
|
||||
return (size / 32 + 1) * 32;// Align on the next multiple of 32
|
||||
}
|
||||
|
||||
private static SKBitmap? _currentBitmap;
|
||||
private static readonly ConcurrentQueue<FrameInfo> FramesQueue = new();
|
||||
private static long _frameCounter;
|
||||
|
||||
public async IAsyncEnumerable<(TimeSpan Time, Stream Stream)> ExtractFrames(string mediaPath,
|
||||
[EnumeratorCancellation] CancellationToken manualCancellationToken = default)
|
||||
{
|
||||
var videoFinishedCancellationSource = new CancellationTokenSource();
|
||||
|
||||
_mediaPlayer = new MediaPlayer(libVLC);
|
||||
_mediaPlayer.Stopped += (s, e) => videoFinishedCancellationSource.CancelAfter(1);
|
||||
|
||||
using var media = new Media(libVLC, mediaPath);
|
||||
await media.Parse(cancellationToken: videoFinishedCancellationSource.Token);
|
||||
var videoTrack = media.Tracks.FirstOrDefault(x => x.Data.Video.Width != 0);
|
||||
_width = videoTrack.Data.Video.Width;
|
||||
_height = videoTrack.Data.Video.Height;
|
||||
|
||||
_pitch = Align32(_width * RGBA_BYTES);
|
||||
_lines = Align32(_height);
|
||||
_mediaPlayer.SetRate(PLAYBACK_RATE);
|
||||
|
||||
media.AddOption(":no-audio");
|
||||
_mediaPlayer.SetVideoFormat("RV32", _width, _height, _pitch);
|
||||
_mediaPlayer.SetVideoCallbacks(Lock, null, Display);
|
||||
|
||||
_mediaPlayer.Play(media);
|
||||
_frameCounter = 0;
|
||||
var surface = SKSurface.Create(new SKImageInfo((int) _width, (int) _height));
|
||||
var videoFinishedCT = videoFinishedCancellationSource.Token;
|
||||
|
||||
while ( !(FramesQueue.IsEmpty && videoFinishedCT.IsCancellationRequested || manualCancellationToken.IsCancellationRequested))
|
||||
{
|
||||
if (FramesQueue.TryDequeue(out var frameInfo))
|
||||
{
|
||||
if (frameInfo.Bitmap == null)
|
||||
continue;
|
||||
|
||||
surface.Canvas.DrawBitmap(frameInfo.Bitmap, 0, 0); // Effectively crops the original bitmap to get only the visible area
|
||||
|
||||
using var outputImage = surface.Snapshot();
|
||||
using var data = outputImage.Encode(SKEncodedImageFormat.Jpeg, 85);
|
||||
var ms = new MemoryStream();
|
||||
data.SaveTo(ms);
|
||||
|
||||
yield return (frameInfo.Time, ms);
|
||||
|
||||
frameInfo.Bitmap?.Dispose();
|
||||
}
|
||||
else
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(1), videoFinishedCT);
|
||||
}
|
||||
}
|
||||
FramesQueue.Clear(); //clear queue in case of manual stop
|
||||
_mediaPlayer.Stop();
|
||||
_mediaPlayer.Dispose();
|
||||
}
|
||||
|
||||
private IntPtr Lock(IntPtr opaque, IntPtr planes)
|
||||
{
|
||||
_currentBitmap = new SKBitmap(new SKImageInfo((int)(_pitch / RGBA_BYTES), (int)_lines, SKColorType.Bgra8888));
|
||||
Marshal.WriteIntPtr(planes, _currentBitmap.GetPixels());
|
||||
return IntPtr.Zero;
|
||||
}
|
||||
|
||||
private void Display(IntPtr opaque, IntPtr picture)
|
||||
{
|
||||
var playerTime = TimeSpan.FromMilliseconds(_mediaPlayer.Time);
|
||||
if (_lastFrameTimestamp != playerTime)
|
||||
{
|
||||
_lastFrame = _frameCounter;
|
||||
_lastFrameTimestamp = playerTime;
|
||||
}
|
||||
|
||||
if (_frameCounter > 20 && _frameCounter % config.Value.FramePeriodRecognition == 0)
|
||||
{
|
||||
var msToAdd = (_frameCounter - _lastFrame) * (_lastFrame == 0 ? 0 : _lastFrameTimestamp.TotalMilliseconds / _lastFrame);
|
||||
var time = _lastFrameTimestamp.Add(TimeSpan.FromMilliseconds(msToAdd));
|
||||
|
||||
FramesQueue.Enqueue(new FrameInfo(time, _currentBitmap));
|
||||
}
|
||||
else
|
||||
{
|
||||
_currentBitmap?.Dispose();
|
||||
}
|
||||
|
||||
_currentBitmap = null;
|
||||
_frameCounter++;
|
||||
}
|
||||
}
|
||||
|
||||
public class FrameInfo(TimeSpan time, SKBitmap? bitmap)
|
||||
{
|
||||
public TimeSpan Time { get; set; } = time;
|
||||
public SKBitmap? Bitmap { get; set; } = bitmap;
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using Azaion.Annotator.Extensions;
|
||||
using Azaion.Common.DTO;
|
||||
using Azaion.Common.DTO.Config;
|
||||
using Azaion.CommonSecurity.Services;
|
||||
using Compunet.YoloV8;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SixLabors.ImageSharp;
|
||||
using SixLabors.ImageSharp.PixelFormats;
|
||||
using Detection = Azaion.Common.DTO.Detection;
|
||||
|
||||
namespace Azaion.Annotator;
|
||||
|
||||
public interface IAIDetector
|
||||
{
|
||||
Task<List<Detection>> Detect(string fName, Stream imageStream, CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public class YOLODetector(IOptions<AIRecognitionConfig> recognitionConfig, IResourceLoader resourceLoader) : IAIDetector, IDisposable
|
||||
{
|
||||
private readonly AIRecognitionConfig _recognitionConfig = recognitionConfig.Value;
|
||||
private YoloPredictor? _predictor;
|
||||
private const string YOLO_MODEL = "azaion.onnx";
|
||||
|
||||
|
||||
public async Task<List<Detection>> Detect(string fName, Stream imageStream, CancellationToken cancellationToken)
|
||||
{
|
||||
if (_predictor == null)
|
||||
{
|
||||
await using var stream = await resourceLoader.Load(YOLO_MODEL, cancellationToken);
|
||||
_predictor = new YoloPredictor(stream.ToArray());
|
||||
}
|
||||
|
||||
imageStream.Seek(0, SeekOrigin.Begin);
|
||||
|
||||
using var image = Image.Load<Rgb24>(imageStream);
|
||||
var result = await _predictor.DetectAsync(image);
|
||||
var imageSize = new System.Windows.Size(image.Width, image.Height);
|
||||
var detections = result.Select(d =>
|
||||
{
|
||||
var label = new YoloLabel(new CanvasLabel(d.Name.Id, d.Bounds.X, d.Bounds.Y, d.Bounds.Width, d.Bounds.Height), imageSize, imageSize);
|
||||
return new Detection(fName, label, (double?)d.Confidence * 100);
|
||||
}).ToList();
|
||||
|
||||
return FilterOverlapping(detections);
|
||||
}
|
||||
|
||||
private List<Detection> FilterOverlapping(List<Detection> detections)
|
||||
{
|
||||
var k = _recognitionConfig.TrackingIntersectionThreshold;
|
||||
var filteredDetections = new List<Detection>();
|
||||
for (var i = 0; i < detections.Count; i++)
|
||||
{
|
||||
var detectionSelected = false;
|
||||
for (var j = i + 1; j < detections.Count; j++)
|
||||
{
|
||||
var intersect = detections[i].ToRectangle();
|
||||
intersect.Intersect(detections[j].ToRectangle());
|
||||
|
||||
var maxArea = Math.Max(detections[i].ToRectangle().Area(), detections[j].ToRectangle().Area());
|
||||
if (!(intersect.Area() > k * maxArea))
|
||||
continue;
|
||||
|
||||
if (detections[i].Probability > detections[j].Probability)
|
||||
{
|
||||
filteredDetections.Add(detections[i]);
|
||||
detections.RemoveAt(j);
|
||||
}
|
||||
else
|
||||
{
|
||||
filteredDetections.Add(detections[j]);
|
||||
detections.RemoveAt(i);
|
||||
}
|
||||
detectionSelected = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!detectionSelected)
|
||||
filteredDetections.Add(detections[i]);
|
||||
}
|
||||
|
||||
return filteredDetections;
|
||||
}
|
||||
|
||||
public void Dispose() => _predictor?.Dispose();
|
||||
}
|
||||
Reference in New Issue
Block a user