[ios] Using SIMD to replace vector_float types can boost CPU performance on mid-range models.

This commit is contained in:
Allen 2024-12-27 11:00:03 +08:00 committed by Mario Zechner
parent b9abc0c5b9
commit 7629d7dd35
3 changed files with 34 additions and 39 deletions

View File

@ -1,10 +1,11 @@
import SpineShadersStructs
import Foundation
import simd
extension RenderCommand {
func getVertices() -> [SpineVertex] {
var vertices = [SpineVertex]()
let indices = indices
let numVertices = numVertices
let positions = positions(numVertices: numVertices)
@ -13,36 +14,30 @@ extension RenderCommand {
vertices.reserveCapacity(indices.count)
for i in 0..<indices.count {
let index = Int(indices[i])
let xIndex = 2 * index
let yIndex = xIndex + 1
let positionX = positions[xIndex]
let positionY = positions[yIndex]
let uvX = uvs[xIndex]
let uvY = uvs[yIndex]
let position = SIMD2<Float>(positions[xIndex], positions[yIndex])
let uv = SIMD2<Float>(uvs[xIndex], uvs[yIndex])
let color = extractRGBA(from: colors[index])
let vertex = SpineVertex(
position: vector_float2(positionX, positionY),
position: position,
color: color,
uv: vector_float2(uvX, uvY)
uv: uv
)
vertices.append(vertex)
}
return vertices
}
private func extractRGBA(from color: Int32) -> vector_float4 {
private func extractRGBA(from color: Int32) -> SIMD4<Float> {
guard color != -1 else {
return vector_float4(1.0, 1.0, 1.0, 1.0)
return SIMD4<Float>(1.0, 1.0, 1.0, 1.0)
}
let alpha = (color >> 24) & 0xFF
let red = (color >> 16) & 0xFF
let green = (color >> 8) & 0xFF
let blue = color & 0xFF
return vector_float4(Float(red)/255, Float(green)/255, Float(blue)/255, (Float(alpha)/255))
let alpha = Float((color >> 24) & 0xFF) / 255.0
let red = Float((color >> 16) & 0xFF) / 255.0
let green = Float((color >> 8) & 0xFF) / 255.0
let blue = Float(color & 0xFF) / 255.0
return SIMD4<Float>(red, green, blue, alpha)
}
}

View File

@ -1,5 +1,5 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef enum SpineVertexInputIndex {
@ -13,21 +13,21 @@ typedef enum SpineTextureIndex {
} SpineTextureIndex;
typedef struct {
vector_float2 position;
vector_float4 color;
vector_float2 uv;
simd_float2 position;
simd_float4 color;
simd_float2 uv;
} SpineVertex;
typedef struct {
vector_float2 translation;
vector_float2 scale;
vector_float2 offset;
simd_float2 translation;
simd_float2 scale;
simd_float2 offset;
} SpineTransform;
struct RasterizerData {
float4 position [[position]];
float4 color;
float2 textureCoordinate;
simd_float4 position [[position]];
simd_float4 color;
simd_float2 textureCoordinate;
};
vertex RasterizerData
@ -38,12 +38,12 @@ vertexShader(uint vertexID [[vertex_id]],
{
RasterizerData out;
float2 pixelSpacePosition = vertices[vertexID].position.xy;
simd_float2 pixelSpacePosition = vertices[vertexID].position.xy;
simd_float2 viewportSize = simd_float2(*viewportSizePointer);
out.position = simd_float4(0.0, 0.0, 0.0, 1.0);
vector_float2 viewportSize = vector_float2(*viewportSizePointer);
out.position = vector_float4(0.0, 0.0, 0.0, 1.0);
out.position.xy = pixelSpacePosition;
out.position.xy *= transform->scale;
out.position.xy += transform->translation * transform->scale + transform->offset;
@ -57,7 +57,7 @@ vertexShader(uint vertexID [[vertex_id]],
return out;
}
fragment float4
fragment simd_float4
fragmentShader(RasterizerData in [[stage_in]],
texture2d<half> colorTexture [[ texture(SpineTextureIndexBaseColor) ]])
{
@ -66,5 +66,5 @@ fragmentShader(RasterizerData in [[stage_in]],
const half4 colorSample = colorTexture.sample(textureSampler, in.textureCoordinate);
return float4(colorSample) * in.color;
return simd_float4(colorSample) * in.color;
}

View File

@ -14,9 +14,9 @@ typedef enum SpineTextureIndex {
} SpineTextureIndex;
typedef struct {
vector_float2 position;
vector_float4 color;
vector_float2 uv;
simd_float2 position;
simd_float4 color;
simd_float2 uv;
} SpineVertex;
typedef struct {