Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drawing performance tweaks #290

Merged
merged 15 commits into from
Sep 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions src/ImageSharp.Drawing/Shapes/PolygonClipper/ClipperOffset.cs
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Numerics;

namespace SixLabors.ImageSharp.Drawing.Shapes.PolygonClipper;

/// <summary>
/// Wrapper for clipper offset
/// </summary>
internal class ClipperOffset
{
// To make the floating point polygons compatible with clipper we have to scale them.
private const float ScalingFactor = 1000F;
private readonly PolygonOffsetter polygonClipperOffset;

/// <summary>
Expand All @@ -30,16 +26,16 @@ public ClipperOffset(float meterLimit = 2F, float arcTolerance = .25F)
public ComplexPolygon Execute(float width)
{
PathsF solution = new();
this.polygonClipperOffset.Execute(width * ScalingFactor, solution);
this.polygonClipperOffset.Execute(width, solution);

var polygons = new Polygon[solution.Count];
Polygon[] polygons = new Polygon[solution.Count];
for (int i = 0; i < solution.Count; i++)
{
PathF pt = solution[i];
var points = new PointF[pt.Count];
PointF[] points = new PointF[pt.Count];
for (int j = 0; j < pt.Count; j++)
{
points[j] = pt[j] / ScalingFactor;
points[j] = pt[j];
}

polygons[i] = new Polygon(points);
Expand All @@ -59,7 +55,7 @@ public void AddPath(ReadOnlySpan<PointF> pathPoints, JointStyle jointStyle, EndC
PathF points = new(pathPoints.Length);
for (int i = 0; i < pathPoints.Length; i++)
{
points.Add((Vector2)pathPoints[i] * ScalingFactor);
points.Add(pathPoints[i]);
}

this.polygonClipperOffset.AddPath(points, jointStyle, endCapStyle);
Expand Down
148 changes: 135 additions & 13 deletions src/ImageSharp.Drawing/Shapes/Rasterization/ScanEdgeCollection.Build.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.Memory;

namespace SixLabors.ImageSharp.Drawing.Shapes.Rasterization;
Expand Down Expand Up @@ -42,17 +46,17 @@ private enum VertexCategory
RightRight,
}

internal static ScanEdgeCollection Create(TessellatedMultipolygon multipolygon, MemoryAllocator allocator, int subsampling)
internal static ScanEdgeCollection Create(TessellatedMultipolygon multiPolygon, MemoryAllocator allocator, int subsampling)
{
// We allocate more than we need, since we don't know how many horizontal edges do we have:
IMemoryOwner<ScanEdge> buffer = allocator.Allocate<ScanEdge>(multipolygon.TotalVertexCount);
IMemoryOwner<ScanEdge> buffer = allocator.Allocate<ScanEdge>(multiPolygon.TotalVertexCount);

RingWalker walker = new RingWalker(buffer.Memory.Span);
RingWalker walker = new(buffer.Memory.Span);

using IMemoryOwner<float> roundedYBuffer = allocator.Allocate<float>(multipolygon.Max(r => r.Vertices.Length));
using IMemoryOwner<float> roundedYBuffer = allocator.Allocate<float>(multiPolygon.Max(r => r.Vertices.Length));
Span<float> roundedY = roundedYBuffer.Memory.Span;

foreach (TessellatedMultipolygon.Ring ring in multipolygon)
foreach (TessellatedMultipolygon.Ring ring in multiPolygon)
{
if (ring.VertexCount < 3)
{
Expand Down Expand Up @@ -82,22 +86,140 @@ internal static ScanEdgeCollection Create(TessellatedMultipolygon multipolygon,

static void RoundY(ReadOnlySpan<PointF> vertices, Span<float> destination, float subsamplingRatio)
{
for (int i = 0; i < vertices.Length; i++)
int ri = 0;
if (Avx.IsSupported)
Copy link
Member

@antonfirsov antonfirsov Sep 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to make sure all 3 cases (AVX, SSE, scalar) are covered by tests for various input sizes. Given that running an extensive set of integration tests out of process is rather expensive, it would be much better to have the method unit-tested with FeatureTestRunner against random input of various sizes. Here's an example for this approach:

https://github.com/SixLabors/ImageSharp/blob/54b7e04f7a3c2921af3c769bd6c27fd3d5156f04/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs#L155-L166

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've ported FeatureTestRunner across and added a wrapper around existing ScanEdgeCollection tests to cover changes. We now use the same rounding throughout as I figured out an easy way to replicate midpoint rounding.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage is better than reported (actually better than main) because we can't provide a coverage report for the ARM code.

{
// for future SIMD impl:
// https://www.ocf.berkeley.edu/~horie/rounding.html
// Avx.RoundToPositiveInfinity()
destination[i] = MathF.Round(vertices[i].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
// If the length of the input buffer as a float array is a multiple of 16, we can use AVX instructions:
int verticesLengthInFloats = vertices.Length * 2;
int vector256FloatCount_x2 = Vector256<float>.Count * 2;
int remainder = verticesLengthInFloats % vector256FloatCount_x2;
int verticesLength = verticesLengthInFloats - remainder;

if (verticesLength > 0)
{
ri = vertices.Length - (remainder / 2);
float maxIterations = verticesLength / (Vector256<float>.Count * 2);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be nint. (Also for Sse41 and AdvSimd.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, fixed here and in benchmarks

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure you've pushed the change? I don't see the update in the PR diff.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🫢Nope.... committed but didn't push. I'll open a PR.

ref Vector256<float> sourceBase = ref Unsafe.As<PointF, Vector256<float>>(ref MemoryMarshal.GetReference(vertices));
ref Vector256<float> destinationBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(destination));

Vector256<float> ssRatio = Vector256.Create(subsamplingRatio);
Vector256<float> inverseSsRatio = Vector256.Create(1F / subsamplingRatio);
Vector256<float> half = Vector256.Create(.5F);

// For every 1 vector we add to the destination we read 2 from the vertices.
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
{
// Load 8 PointF
Vector256<float> points1 = Unsafe.Add(ref sourceBase, j);
Vector256<float> points2 = Unsafe.Add(ref sourceBase, j + 1);

// Shuffle the points to group the Y properties
Vector128<float> points1Y = Sse.Shuffle(points1.GetLower(), points1.GetUpper(), 0b11_01_11_01);
Vector128<float> points2Y = Sse.Shuffle(points2.GetLower(), points2.GetUpper(), 0b11_01_11_01);
Vector256<float> pointsY = Vector256.Create(points1Y, points2Y);

// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
// https://www.ocf.berkeley.edu/~horie/rounding.html
Vector256<float> rounded = Avx.RoundToPositiveInfinity(Avx.Subtract(Avx.Multiply(pointsY, ssRatio), half));
Unsafe.Add(ref destinationBase, i) = Avx.Multiply(rounded, inverseSsRatio);
}
}
}
else if (Sse41.IsSupported)
{
// If the length of the input buffer as a float array is a multiple of 8, we can use Sse instructions:
int verticesLengthInFloats = vertices.Length * 2;
int vector128FloatCount_x2 = Vector128<float>.Count * 2;
int remainder = verticesLengthInFloats % vector128FloatCount_x2;
int verticesLength = verticesLengthInFloats - remainder;

if (verticesLength > 0)
{
ri = vertices.Length - (remainder / 2);
float maxIterations = verticesLength / (Vector128<float>.Count * 2);
ref Vector128<float> sourceBase = ref Unsafe.As<PointF, Vector128<float>>(ref MemoryMarshal.GetReference(vertices));
ref Vector128<float> destinationBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(destination));

Vector128<float> ssRatio = Vector128.Create(subsamplingRatio);
Vector128<float> inverseSsRatio = Vector128.Create(1F / subsamplingRatio);
Vector128<float> half = Vector128.Create(.5F);

// For every 1 vector we add to the destination we read 2 from the vertices.
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
{
// Load 4 PointF
Vector128<float> points1 = Unsafe.Add(ref sourceBase, j);
Vector128<float> points2 = Unsafe.Add(ref sourceBase, j + 1);

// Shuffle the points to group the Y properties
Vector128<float> pointsY = Sse.Shuffle(points1, points2, 0b11_01_11_01);

// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
// https://www.ocf.berkeley.edu/~horie/rounding.html
Vector128<float> rounded = Sse41.RoundToPositiveInfinity(Sse.Subtract(Sse.Multiply(pointsY, ssRatio), half));
Unsafe.Add(ref destinationBase, i) = Sse.Multiply(rounded, inverseSsRatio);
}
}
}
else if (AdvSimd.IsSupported)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know if this is true on the BuildJet image?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we've specified the ARM images. These are the ones we've used previously to diagnose issues in ImageSharp. https://buildjet.com/for-github-actions/docs/runners/hardware#arm

{
// If the length of the input buffer as a float array is a multiple of 8, we can use AdvSimd instructions:
int verticesLengthInFloats = vertices.Length * 2;
int vector128FloatCount_x2 = Vector128<float>.Count * 2;
int remainder = verticesLengthInFloats % vector128FloatCount_x2;
int verticesLength = verticesLengthInFloats - remainder;

if (verticesLength > 0)
{
ri = vertices.Length - (remainder / 2);
float maxIterations = verticesLength / (Vector128<float>.Count * 2);
ref Vector128<float> sourceBase = ref Unsafe.As<PointF, Vector128<float>>(ref MemoryMarshal.GetReference(vertices));
ref Vector128<float> destinationBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(destination));

Vector128<float> ssRatio = Vector128.Create(subsamplingRatio);
Vector128<float> inverseSsRatio = Vector128.Create(1F / subsamplingRatio);

// For every 1 vector we add to the destination we read 2 from the vertices.
for (nint i = 0, j = 0; i < maxIterations; i++, j += 2)
{
// Load 4 PointF
Vector128<float> points1 = Unsafe.Add(ref sourceBase, j);
Vector128<float> points2 = Unsafe.Add(ref sourceBase, j + 1);

// Shuffle the points to group the Y properties
Vector128<float> pointsY = AdvSimdShuffle(points1, points2, 0b11_01_11_01);

// Multiply by the subsampling ratio, round, then multiply by the inverted subsampling ratio and assign.
Vector128<float> rounded = AdvSimd.RoundAwayFromZero(AdvSimd.Multiply(pointsY, ssRatio));
Unsafe.Add(ref destinationBase, i) = AdvSimd.Multiply(rounded, inverseSsRatio);
}
}
}

for (; ri < vertices.Length; ri++)
{
destination[ri] = MathF.Round(vertices[ri].Y * subsamplingRatio, MidpointRounding.AwayFromZero) / subsamplingRatio;
}
}

return new ScanEdgeCollection(buffer, walker.EdgeCounter);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<float> AdvSimdShuffle(Vector128<float> a, Vector128<float> b, byte control)
{
Vector128<float> result = Vector128.Create(AdvSimd.Extract(a, (byte)(control & 0x3)));
result = AdvSimd.Insert(result, 1, AdvSimd.Extract(a, (byte)((control >> 2) & 0x3)));
result = AdvSimd.Insert(result, 2, AdvSimd.Extract(b, (byte)((control >> 4) & 0x3)));
result = AdvSimd.Insert(result, 3, AdvSimd.Extract(b, (byte)((control >> 6) & 0x3)));

return result;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static VertexCategory CreateVertexCategory(EdgeCategory previousCategory, EdgeCategory currentCategory)
{
var value = (VertexCategory)(((int)previousCategory << 2) | (int)currentCategory);
VertexCategory value = (VertexCategory)(((int)previousCategory << 2) | (int)currentCategory);
VerifyVertexCategory(value);
return value;
}
Expand All @@ -106,7 +228,7 @@ private static VertexCategory CreateVertexCategory(EdgeCategory previousCategory
private static void VerifyVertexCategory(VertexCategory vertexCategory)
{
int value = (int)vertexCategory;
if (value < 0 || value >= 16)
if (value is < 0 or >= 16)
{
throw new ArgumentOutOfRangeException(nameof(vertexCategory), "EdgeCategoryPair value shall be: 0 <= value < 16");
}
Expand Down Expand Up @@ -151,7 +273,7 @@ public EdgeData(float startX, float endX, float startYRounded, float endYRounded

public void EmitScanEdge(Span<ScanEdge> edges, ref int edgeCounter)
{
if (this.EdgeCategory == EdgeCategory.Left || this.EdgeCategory == EdgeCategory.Right)
if (this.EdgeCategory is EdgeCategory.Left or EdgeCategory.Right)
{
return;
}
Expand Down
4 changes: 3 additions & 1 deletion tests/Directory.Build.targets
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
<ItemGroup>
<!-- Test Dependencies -->
<PackageReference Update="BenchmarkDotNet" Version="0.13.1" />
<PackageReference Update="Magick.NET-Q16-AnyCPU" Version="8.3.3" />
<PackageReference Update="Magick.NET-Q16-AnyCPU" Version="12.2.2" />
<PackageReference Update="Microsoft.DotNet.RemoteExecutor" Version="6.0.0-beta.21311.3" />
<PackageReference Update="Microsoft.DotNet.XUnitExtensions" Version="6.0.0-beta.21311.3" />
<PackageReference Update="Moq" Version="4.16.1" />
<PackageReference Include="runtime.osx.10.10-x64.CoreCompat.System.Drawing" Version="5.8.64" Condition="'$(IsOSX)'=='true'" />
<PackageReference Update="System.Drawing.Common" Version="5.0.2" />
Expand Down
Loading