Skip to content

Commit 5c8f596

Browse files
committed
Pose extractor pipeline
1 parent 8fd8977 commit 5c8f596

File tree

15 files changed

+679
-112
lines changed

15 files changed

+679
-112
lines changed

Examples/TensorStack.Example.Extractors/Services/BackgroundService.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ public async Task<ImageInput> ExecuteAsync(BackgroundImageRequest options)
133133
var imageTensor = await Task.Run(() => _currentPipeline.RunAsync(new BackgroundImageOptions
134134
{
135135
Mode = options.Mode,
136-
Input = options.Image
136+
Image = options.Image
137137
}, cancellationToken: _cancellationTokenSource.Token));
138138

139139
if (options.IsTransparentSupported)
@@ -169,7 +169,7 @@ async Task<VideoFrame> FrameProcessor(VideoFrame frame)
169169
{
170170
var processedFrame = await _currentPipeline.RunAsync(new BackgroundImageOptions
171171
{
172-
Input = frame.Frame,
172+
Image = frame.Frame,
173173
Mode = options.Mode
174174
}, cancellationToken: cancellationToken);
175175

TensorStack.Common/Common/Enums.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,12 @@ public enum ResizeMode
5858
/// <summary>
5959
/// Center Crop Image
6060
/// </summary>
61-
Crop = 1
61+
Crop = 1,
62+
63+
/// <summary>
64+
/// LetterBox Center Image
65+
/// </summary>
66+
LetterBox = 2
6267
}
6368

6469

TensorStack.Common/Extensions/TensorExtensions.cs

Lines changed: 98 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,45 @@ public static ImageTensor ResizeImage(this ImageTensor sourceImage, int targetWi
10191019
}
10201020

10211021

1022+
/// <summary>
1023+
/// Gets the crop coordinates.
1024+
/// </summary>
1025+
/// <param name="sourceHeight">Height of the source.</param>
1026+
/// <param name="sourceWidth">Width of the source.</param>
1027+
/// <param name="targetHeight">Height of the target.</param>
1028+
/// <param name="targetWidth">Width of the target.</param>
1029+
/// <param name="resizeMode">The resize mode.</param>
1030+
/// <returns>CoordinateBox&lt;System.Int32&gt;.</returns>
1031+
private static CoordinateBox<int> GetCropCoordinates(int sourceHeight, int sourceWidth, int targetHeight, int targetWidth, ResizeMode resizeMode)
1032+
{
1033+
var cropX = 0;
1034+
var cropY = 0;
1035+
var croppedWidth = targetWidth;
1036+
var croppedHeight = targetHeight;
1037+
if (resizeMode == ResizeMode.Crop)
1038+
{
1039+
var scaleX = (float)targetWidth / sourceWidth;
1040+
var scaleY = (float)targetHeight / sourceHeight;
1041+
var scaleFactor = Math.Max(scaleX, scaleY);
1042+
croppedWidth = (int)(sourceWidth * scaleFactor);
1043+
croppedHeight = (int)(sourceHeight * scaleFactor);
1044+
cropX = Math.Abs(Math.Max((croppedWidth - targetWidth) / 2, 0));
1045+
cropY = Math.Abs(Math.Max((croppedHeight - targetHeight) / 2, 0));
1046+
}
1047+
else if (resizeMode == ResizeMode.LetterBox)
1048+
{
1049+
var scaleX = (float)targetWidth / sourceWidth;
1050+
var scaleY = (float)targetHeight / sourceHeight;
1051+
var scaleFactor = Math.Min(scaleX, scaleY);
1052+
croppedWidth = (int)(sourceWidth * scaleFactor);
1053+
croppedHeight = (int)(sourceHeight * scaleFactor);
1054+
cropX = -(targetWidth - croppedWidth) / 2;
1055+
cropY = -(targetHeight - croppedHeight) / 2;
1056+
}
1057+
return new CoordinateBox<int>(cropX, cropY, croppedWidth, croppedHeight);
1058+
}
1059+
1060+
10221061
/// <summary>
10231062
/// Resizes the specified ImageTensor (Bilinear)
10241063
/// </summary>
@@ -1032,15 +1071,20 @@ private static ImageTensor ResizeImageBilinear(ImageTensor sourceImage, int targ
10321071
var sourceHeight = sourceImage.Dimensions[2];
10331072
var sourceWidth = sourceImage.Dimensions[3];
10341073
var cropSize = GetCropCoordinates(sourceHeight, sourceWidth, targetHeight, targetWidth, resizeMode);
1035-
var destination = new ImageTensor(new[] { 1, channels, targetHeight, targetWidth });
1036-
Parallel.For(0, channels, c =>
1074+
var destination = new ImageTensor([1, channels, targetHeight, targetWidth]);
1075+
if (resizeMode == ResizeMode.LetterBox)
1076+
destination.Fill(0f);
1077+
1078+
var scaleY = (float)(sourceHeight - 1) / (cropSize.MaxY - 1);
1079+
var sclaeX = (float)(sourceWidth - 1) / (cropSize.MaxX - 1);
1080+
Parallel.For(0, cropSize.MaxY, h =>
10371081
{
1038-
for (int h = 0; h < cropSize.MaxY; h++)
1082+
for (var c = 0; c < channels; c++)
10391083
{
10401084
for (int w = 0; w < cropSize.MaxX; w++)
10411085
{
1042-
var y = h * (float)(sourceHeight - 1) / (cropSize.MaxY - 1);
1043-
var x = w * (float)(sourceWidth - 1) / (cropSize.MaxX - 1);
1086+
var y = h * scaleY;
1087+
var x = w * sclaeX;
10441088

10451089
var y0 = (int)Math.Floor(y);
10461090
var x0 = (int)Math.Floor(x);
@@ -1084,42 +1128,46 @@ private static ImageTensor ResizeImageBicubic(ImageTensor sourceImage, int targe
10841128
var sourceHeight = sourceImage.Dimensions[2];
10851129
var sourceWidth = sourceImage.Dimensions[3];
10861130
var cropSize = GetCropCoordinates(sourceHeight, sourceWidth, targetHeight, targetWidth, resizeMode);
1087-
var destination = new ImageTensor(new[] { 1, channels, targetHeight, targetWidth });
1088-
Parallel.For(0, channels, c =>
1131+
var destination = new ImageTensor([1, channels, targetHeight, targetWidth]);
1132+
if (resizeMode == ResizeMode.LetterBox)
1133+
destination.Fill(0f);
1134+
1135+
var scaleX = (float)sourceWidth / cropSize.MaxX;
1136+
var scaleY = (float)sourceHeight / cropSize.MaxY;
1137+
Parallel.For(0, cropSize.MaxY, h =>
10891138
{
1090-
for (int h = 0; h < cropSize.MaxY; h++)
1139+
for (var c = 0; c < channels; c++)
10911140
{
10921141
for (int w = 0; w < cropSize.MaxX; w++)
10931142
{
1094-
float y = h * (float)(sourceHeight - 1) / (cropSize.MaxY - 1);
1095-
float x = w * (float)(sourceWidth - 1) / (cropSize.MaxX - 1);
1143+
float srcY = (h + 0.5f) * scaleY - 0.5f;
1144+
float srcX = (w + 0.5f) * scaleX - 0.5f;
10961145

1097-
int yInt = (int)Math.Floor(y);
1098-
int xInt = (int)Math.Floor(x);
1099-
float yFrac = y - yInt;
1100-
float xFrac = x - xInt;
1146+
int yInt = (int)Math.Floor(srcY);
1147+
int xInt = (int)Math.Floor(srcX);
1148+
float yFrac = srcY - yInt;
1149+
float xFrac = srcX - xInt;
1150+
float pixelValue = 0f;
11011151

1102-
float[] colVals = new float[4];
1103-
1104-
for (int i = -1; i <= 2; i++)
1152+
// 2D bicubic: sum over 16 neighbors
1153+
for (int m = -1; m <= 2; m++)
11051154
{
1106-
int yi = Math.Clamp(yInt + i, 0, sourceHeight - 1);
1107-
float[] rowVals = new float[4];
1155+
int yi = MirrorIndex(yInt + m, sourceHeight);
1156+
float wY = MitchellNetravali(m - yFrac);
11081157

1109-
for (int j = -1; j <= 2; j++)
1158+
for (int n = -1; n <= 2; n++)
11101159
{
1111-
int xi = Math.Clamp(xInt + j, 0, sourceWidth - 1);
1112-
rowVals[j + 1] = sourceImage[0, c, yi, xi];
1160+
int xi = MirrorIndex(xInt + n, sourceWidth);
1161+
float wX = MitchellNetravali(n - xFrac);
1162+
pixelValue += sourceImage[0, c, yi, xi] * wX * wY;
11131163
}
1114-
1115-
colVals[i + 1] = CubicInterpolate(rowVals[0], rowVals[1], rowVals[2], rowVals[3], xFrac);
11161164
}
11171165

1118-
var targetY = h - cropSize.MinY;
1119-
var targetX = w - cropSize.MinX;
1166+
int targetY = h - cropSize.MinY;
1167+
int targetX = w - cropSize.MinX;
11201168
if (targetX >= 0 && targetY >= 0 && targetY < targetHeight && targetX < targetWidth)
11211169
{
1122-
destination[0, c, h, w] = CubicInterpolate(colVals[0], colVals[1], colVals[2], colVals[3], yFrac);
1170+
destination[0, c, targetY, targetX] = pixelValue;
11231171
}
11241172
}
11251173
}
@@ -1130,50 +1178,38 @@ private static ImageTensor ResizeImageBicubic(ImageTensor sourceImage, int targe
11301178

11311179

11321180
/// <summary>
1133-
/// Cubic interpolate.
1181+
/// Mitchell-Netravali kernel (sharper, natural bicubic)
11341182
/// </summary>
1135-
/// <param name="v0">The v0.</param>
1136-
/// <param name="v1">The v1.</param>
1137-
/// <param name="v2">The v2.</param>
1138-
/// <param name="v3">The v3.</param>
1139-
/// <param name="fraction">The fraction.</param>
1183+
/// <param name="value">The value.</param>
11401184
/// <returns>System.Single.</returns>
1141-
private static float CubicInterpolate(float v0, float v1, float v2, float v3, float fraction)
1185+
private static float MitchellNetravali(float value)
11421186
{
1143-
float A = (-0.5f * v0) + (1.5f * v1) - (1.5f * v2) + (0.5f * v3);
1144-
float B = (v0 * -1.0f) + (v1 * 2.5f) - (v2 * 2.0f) + (v3 * 0.5f);
1145-
float C = (-0.5f * v0) + (0.5f * v2);
1146-
float D = v1;
1147-
return A * (fraction * fraction * fraction) + B * (fraction * fraction) + C * fraction + D;
1187+
value = Math.Abs(value);
1188+
const float B = 1f / 3f;
1189+
const float C = 1f / 3f;
1190+
1191+
if (value < 1f)
1192+
return ((12 - 9 * B - 6 * C) * (value * value * value) + (-18 + 12 * B + 6 * C) * (value * value) + (6 - 2 * B)) / 6f;
1193+
else if (value < 2f)
1194+
return ((-B - 6 * C) * (value * value * value) + (6 * B + 30 * C) * (value * value) + (-12 * B - 48 * C) * value + (8 * B + 24 * C)) / 6f;
1195+
else
1196+
return 0f;
11481197
}
11491198

11501199

11511200
/// <summary>
1152-
/// Gets the crop coordinates.
1201+
/// Mirror padding helper
11531202
/// </summary>
1154-
/// <param name="sourceHeight">Height of the source.</param>
1155-
/// <param name="sourceWidth">Width of the source.</param>
1156-
/// <param name="targetHeight">Height of the target.</param>
1157-
/// <param name="targetWidth">Width of the target.</param>
1158-
/// <param name="resizeMode">The resize mode.</param>
1159-
/// <returns>CoordinateBox&lt;System.Int32&gt;.</returns>
1160-
private static CoordinateBox<int> GetCropCoordinates(int sourceHeight, int sourceWidth, int targetHeight, int targetWidth, ResizeMode resizeMode)
1203+
/// <param name="i">The i.</param>
1204+
/// <param name="max">The maximum.</param>
1205+
/// <returns>System.Int32.</returns>
1206+
private static int MirrorIndex(int i, int max)
11611207
{
1162-
var cropX = 0;
1163-
var cropY = 0;
1164-
var croppedWidth = targetWidth;
1165-
var croppedHeight = targetHeight;
1166-
if (resizeMode == ResizeMode.Crop)
1167-
{
1168-
var scaleX = (float)targetWidth / sourceWidth;
1169-
var scaleY = (float)targetHeight / sourceHeight;
1170-
var scaleFactor = Math.Max(scaleX, scaleY);
1171-
croppedWidth = (int)(sourceWidth * scaleFactor);
1172-
croppedHeight = (int)(sourceHeight * scaleFactor);
1173-
cropX = Math.Abs(Math.Max((croppedWidth - targetWidth) / 2, 0));
1174-
cropY = Math.Abs(Math.Max((croppedHeight - targetHeight) / 2, 0));
1175-
}
1176-
return new CoordinateBox<int>(cropX, cropY, croppedWidth, croppedHeight);
1208+
if (i < 0)
1209+
return -i;
1210+
if (i >= max)
1211+
return 2 * max - i - 2;
1212+
return i;
11771213
}
11781214
}
11791215
}

TensorStack.Common/Tensor/ImageTensor.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ public void UpdateAlphaChannel(ImageTensor tensor)
133133
/// <param name="width">The target width in pixels.</param>
134134
/// <param name="height">The target height in pixels..</param>
135135
/// <param name="resizeMode">The resize mode.</param>
136-
public void Resize(int width, int height, ResizeMode resizeMode)
136+
public void Resize(int width, int height, ResizeMode resizeMode, ResizeMethod resizeMethod = ResizeMethod.Bilinear)
137137
{
138-
UpdateTensor(this.ResizeImage(width, height, resizeMode));
138+
UpdateTensor(this.ResizeImage(width, height, resizeMode, resizeMethod));
139139
}
140140

141141

TensorStack.Common/Vision/CLIPImage.cs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ public class CLIPImage
1616
/// <param name="width">The width.</param>
1717
/// <param name="height">The height.</param>
1818
/// <returns>ImageTensor.</returns>
19-
public static ImageTensor Process(ImageTensor input, int width = 224, int height = 224)
19+
public static ImageTensor Process(ImageTensor input, int width = 224, int height = 224, ResizeMode ResizeMode = ResizeMode.Stretch)
2020
{
21-
var options = new CLIPImageOptions(width, height);
22-
return Process(input, options);
21+
return Process(input, new CLIPImageOptions(width, height, ResizeMode));
2322
}
2423

2524

@@ -32,8 +31,7 @@ public static ImageTensor Process(ImageTensor input, int width = 224, int height
3231
public static ImageTensor Process(ImageTensor input, CLIPImageOptions options)
3332
{
3433
options ??= new CLIPImageOptions();
35-
var resultTensor = input.ResizeImage(options.Width, options.Height, ResizeMode.Stretch);
36-
resultTensor.NormalizeZeroToOne();
34+
var resultTensor = input.ResizeImage(options.Width, options.Height, options.ResizeMode, options.ResizeMethod);
3735
for (int x = 0; x < resultTensor.Width; x++)
3836
{
3937
for (int y = 0; y < resultTensor.Height; y++)

TensorStack.Common/Vision/CLIPImageOptions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Licensed under the Apache 2.0 License.
33
namespace TensorStack.Common.Vision
44
{
5-
public record CLIPImageOptions(int Width = 224, int Height = 224)
5+
public record CLIPImageOptions(int Width = 224, int Height = 224, ResizeMode ResizeMode = ResizeMode.Stretch, ResizeMethod ResizeMethod = ResizeMethod.Bilinear)
66
{
77
/// <summary>
88
/// The Mean to use if normalizing the image.

TensorStack.Extractors/Common/BackgroundImageOptions.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ public record BackgroundImageOptions : IRunOptions
1818
/// <summary>
1919
/// Gets the input.
2020
/// </summary>
21-
public ImageTensor Input { get; init; }
21+
public ImageTensor Image { get; init; }
2222
}
2323

24+
2425
public enum BackgroundMode
2526
{
2627
MaskBackground = 0,

TensorStack.Extractors/Common/ExtractorImageOptions.cs

Lines changed: 0 additions & 11 deletions
This file was deleted.

TensorStack.Extractors/Common/ExtractorOptions.cs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
// Copyright (c) TensorStack. All rights reserved.
22
// Licensed under the Apache 2.0 License.
3+
using System.Collections.Generic;
34
using TensorStack.Common;
45
using TensorStack.Common.Pipeline;
6+
using TensorStack.Common.Tensor;
7+
using TensorStack.Common.Video;
58

69
namespace TensorStack.Extractors.Common
710
{
@@ -35,4 +38,40 @@ public record ExtractorOptions : IRunOptions
3538
/// </summary>
3639
public bool IsInverted { get; init; }
3740
}
41+
42+
43+
/// <summary>
44+
/// Image ExtractorOptions.
45+
/// </summary>
46+
public record ExtractorImageOptions : ExtractorOptions
47+
{
48+
/// <summary>
49+
/// Gets the image.
50+
/// </summary>
51+
public ImageTensor Image { get; init; }
52+
}
53+
54+
55+
/// <summary>
56+
/// Video ExtractorOptions.
57+
/// </summary>
58+
public record ExtractorVideoOptions : ExtractorOptions
59+
{
60+
/// <summary>
61+
/// Gets the video.
62+
/// </summary>
63+
public VideoTensor Video { get; init; }
64+
}
65+
66+
67+
/// <summary>
68+
/// Stream ExtractorOptions.
69+
/// </summary>
70+
public record ExtractorStreamOptions : ExtractorOptions
71+
{
72+
/// <summary>
73+
/// Gets the stream.
74+
/// </summary>
75+
public IAsyncEnumerable<VideoFrame> Stream { get; init; }
76+
}
3877
}

TensorStack.Extractors/Common/ExtractorStreamOptions.cs

Lines changed: 0 additions & 12 deletions
This file was deleted.

0 commit comments

Comments
 (0)