Re: Noob learns to code in 3 months
Posted: December 25th, 2017, 9:31 am
hahaha xD
Learn to program or die trying
https://forum.planetchili.net:443/
Code: Select all
ParticleSystemData psData;
psData.birthColor = Colors::Yellow;
psData.deathColor = Colors::Red;
psData.gravity = Vec2D(0.0f, -1.0f);
psData.lifeTime = 2.0f;
psData.maxVel = Vec2D(20.0f, 5.0f);
psData.minVel = Vec2D(-20.0f, -60.0f);
psData.position = Vec2D(400.0f, 300.0f);
psData.shape = Shapes::SHAPE_CIRCLE;
psData.minSize = 10.0f;
psData.maxSize = 20.0f;
psData.spawnRate = 10;
psData.frameRate = 60;
Code: Select all
Min frame time: 0.000002 // 2 ns
Max frame time: 0.000130 // 130 ns
Avg frame time: 0.000043 // 43 ns
Code: Select all
Min frame time: 0.000041 // 41 ns
Max frame time: 0.002345 // 2.3 ms
Avg frame time: 0.001693 // 1.6 ms
Code: Select all
Min frame time: 0.000003 // 3 ns
Max frame time: 0.000099 // 99 ns
Avg frame time: 0.000026 // 26 ns
Code: Select all
Min frame time: 0.000027 // 27 ns
Max frame time: 0.001274 // 1.2 ms
Avg frame time: 0.000856 // 856 ns
Code: Select all
// Particle::Advance
void Particle::Advance(float dt)
{
m_Data.position += m_Data.velocity * dt;
m_Data.lifeTime -= dt;
m_BlendFactor += dt / m_TotalLifeTime;
const auto blendFactor = static_cast< unsigned char >( m_BlendFactor * 255.f );
m_Color = m_Data.birthColor.Blend( m_Data.deathColor, blendFactor );
m_Dead =
m_Data.position.x < 0 || m_Data.position.x > Graphics::ScreenWidth ||
m_Data.position.y < 0 || m_Data.position.y > Graphics::ScreenHeight ||
m_Data.lifeTime <= 0.0f;
}
Code: Select all
// ParticleSystem2D::Advance
void ParticleSystem2D::Advance(float dt)
{
m_SpawnTimer += dt;
if (m_SpawnTimer >= (1.0f / (float)m_Data.frameRate))
{
m_SpawnTimer = 0.0f;
Spawn();
}
for( auto& p : m_Particles )
{
p.Advance( dt );
p.AddVelocity( m_Data.gravity );
}
m_Particles.erase(
std::remove_if( m_Particles.begin(), m_Particles.end(),
[]( const Particle& p )
{
return p.IsDead();
} ), m_Particles.end() );
}
Code: Select all
void Graphics::DrawSquare(const Vec2D & pos, float size, Color color, float blendFactor)
{
// ( if right is less_equal to 0 or left is greater_equal to screen right or
// if top is less_equal to 0 or bottom is greater_equal to screen bottom )
// Early out
if( pos.x + size <= 0.f || pos.x >= ScreenWidth ||
pos.y + size <= 0.f || pos.y >= ScreenHeight )
{
return;
}
// If any part on screen, calculate offsets
const auto xStart = static_cast< int >( std::max( -pos.x, 0.f ) );
const auto xEnd = static_cast< int >( std::min( ScreenWidth - size, size ) );
const auto yStart = static_cast< int >( std::max( -pos.y, 0.f ) );
const auto yEnd = static_cast< int >( std::min( ScreenHeight - size, size ) );
// Casting once per call instead of per loop iteration
const auto _x = static_cast< int >( pos.x );
const auto _y = static_cast< int >( pos.y );
const auto _size = static_cast< int >( size );
const auto _blendFactor = static_cast< unsigned char >( blendFactor * 255.f );
// Loop from position + offset to position + size + offset
for( int y = yStart + _y; y < yEnd + ( _y + _size ); ++y )
{
for( int x = xStart + _x; x < xEnd + ( _x + _size ); ++x )
{
const Color dstPixel = GetPixel( x, y );
const Color blendedPixel = color.Blend( dstPixel, _blendFactor );
PutPixel( x, y, blendedPixel );
}
}
}
void Graphics::DrawCircle(const Vec2D & pos, float radius, Color color, float blendFactor)
{
// ( if right is less_equal to 0 or left is greater_equal to screen right or
// if top is less_equal to 0 or bottom is greater_equal to screen bottom )
// Early out
if( pos.x + radius <= 0.f || pos.x - radius >= ScreenWidth ||
pos.y + radius <= 0.f || pos.y - radius >= ScreenHeight )
{
return;
}
// If any part on screen, calculate offsets
const auto xStart = static_cast< int >( std::max( -pos.x, -radius ) );
const auto xEnd = static_cast< int >( std::min( ScreenWidth - radius, radius ) );
const auto yStart = static_cast< int >( std::max( -pos.y, -radius ) );
const auto yEnd = static_cast< int >( std::min( ScreenHeight - radius, radius ) );
// Casting once per call instead of per loop iteration
const auto _x = static_cast< int >( pos.x );
const auto _y = static_cast< int >( pos.y );
const auto _size = static_cast< int >( radius );
const auto radiSq = static_cast< int >( radius * radius );
const auto _blendFactor = static_cast< unsigned char >( blendFactor * 255.f );
// Loop from offset to size + offset
for( int iy = yStart; iy < yEnd; ++iy )
{
for( int ix = xStart; ix < xEnd; ++ix )
{
const auto sqDist = ( ix * ix ) + ( iy * iy );
if( sqDist <= radiSq )
{
const auto x = ix + _x;
const auto y = iy + _y;
const Color dstPixel = GetPixel( x, y );
const Color blendedPixel = color.Blend( dstPixel, _blendFactor );
PutPixel( x, y, blendedPixel );
}
}
}
}
Code: Select all
// UpdateModel yours
Min frame time: 0.000026 // 26 ns
Max frame time: 0.000246 // 246 ns
Avg frame time: 0.000065 // 65 ns
// ComposeFrame yours
Min frame time: 0.031235 // 31.2 ms
Max frame time: 0.037946 // 37.9 ms
Avg frame time: 0.033004 // 33.0 ms
Code: Select all
// UpdateModel after tweaks
Min frame time: 0.000024 // 24 ns
Max frame time: 0.000153 // 153 ns
Avg frame time: 0.000038 // 38 ns
// ComposeFrame after tweaks
Min frame time: 0.022510 // 22.5 ms
Max frame time: 0.033995 // 33.9 ms
Avg frame time: 0.023709 // 23.7 ms
Code: Select all
// ComposeFrame yours
Min frame time: 0.031235 // 31.2 ms
Max frame time: 0.037946 // 37.9 ms
Avg frame time: 0.033004 // 33.0 ms
// ComposeFrame after tweaks
Min frame time: 0.022510 // 22.5 ms
Max frame time: 0.033995 // 33.9 ms
Avg frame time: 0.023709 // 23.7 ms
// ComposeFrame with SSE
Min frame time: 0.005853 // 5.8 ms
Max frame time: 0.008483 // 8.4 ms
Avg frame time: 0.006656 // 6.6 ms
Code: Select all
#include <intrin.h>
void Graphics::DrawCircle(const Vec2D & pos, float radius, Color color, float blendFactor)
{
// ( if right is less_equal to 0 or left is greater_equal to screen right or
// if top is less_equal to 0 or bottom is greater_equal to screen bottom )
// Early out
if( pos.x + radius <= 0.f || pos.x - radius >= ScreenWidth ||
pos.y + radius <= 0.f || pos.y - radius >= ScreenHeight )
{
return;
}
auto CalculateAlignedBoundary = []( int X )
{
return X & ( ~3 );
};
auto CalculateGraphicsBoundary =
[]( const Bounds& Src, const Bounds& Clip)
{
return Bounds{
std::max( -Src.left, Clip.left ),
std::max( -Src.top, Clip.top ),
std::min( Clip.right - Src.left, Src.right - Src.left ),
std::min( Clip.bottom - Src.top, Src.bottom - Src.top )
};
};
struct Unpacked_8_m128i_16
{
Unpacked_8_m128i_16( __m128i Value )
:
lo( _mm_unpacklo_epi8( Value, _mm_setzero_si128() ) ),
hi( _mm_unpackhi_epi8( Value, _mm_setzero_si128() ) )
{}
Unpacked_8_m128i_16( __m128i lo, __m128i hi )
:
lo( lo ), hi( hi )
{}
Unpacked_8_m128i_16 operator*( __m128i other )const
{
return {
_mm_mullo_epi16( lo, other ),
_mm_mullo_epi16( hi, other )
};
}
Unpacked_8_m128i_16 operator+( Unpacked_8_m128i_16 other )const
{
return {
_mm_add_epi16( lo,other.lo ),
_mm_add_epi16( hi,other.hi )
};
}
Unpacked_8_m128i_16 operator>>( const int Imm8 )const
{
return {
_mm_srli_epi16( lo, Imm8 ),
_mm_srli_epi16( hi, Imm8 )
};
}
__m128i Pack()const
{
return _mm_packus_epi16( lo, hi );
}
__m128i lo, hi;
};
auto SSE_ColorBlend = []( __m128i SrcColor, __m128i DstColor,
__m128i BlendFactor, __m128i InvBlendFactor )
{
auto result = (
( Unpacked_8_m128i_16( SrcColor ) * InvBlendFactor ) +
( Unpacked_8_m128i_16( DstColor ) * BlendFactor ) ) >> 8;
return result.Pack();
};
auto SSE_IsInCircle = []( const int IX, const int IY, const __m128i RadiusSq )
{
// Load index to index + 3 into SSE register
const auto mX = _mm_setr_epi32( IX, IX + 1, IX + 2, IX + 3 );
const auto mY = _mm_set1_epi32( IY );
const auto mxSq = _mm_mullo_epi32( mX, mX );
const auto mySq = _mm_mullo_epi32( mY, mY );
const auto mDelta = _mm_add_epi32( mxSq, mySq );
// Get mask of pixels within circumference
const auto inRange = _mm_cmplt_epi32( mDelta, RadiusSq );
return inRange;
};
auto SSE_IfElseBlend = []( const __m128i ifTrue, const __m128i ifFalse, const __m128i Mask )
{
const auto use_true = _mm_and_si128( Mask, ifTrue );
const auto use_false = _mm_andnot_si128( Mask, ifFalse );
const auto blended = _mm_or_si128( use_true, use_false );
return blended;
};
auto SSE_BlendPixels =
[this, SSE_ColorBlend, SSE_IsInCircle, SSE_IfElseBlend ](
const int PosX, const int PosY, const int IY,
const int xStart, const int xEnd,
const __m128i RadiusSq, const __m128i Src,
const __m128i BlendFactor, const __m128i InvBlendFactor)
{
for( int ix = xStart; ix < xEnd; ix += 4 )
{
const auto inRange = SSE_IsInCircle( ix, IY, RadiusSq );
// If not inside circle, continue
if( _mm_movemask_epi8( inRange ) == 0 ) continue;
const auto index = ( PosX + ix ) + ( ( PosY + IY ) * ScreenWidth );
auto* bg = reinterpret_cast< __m128i* >( &pSysBuffer[ index ] );
const auto dst = _mm_load_si128( bg );
// Else, do color blending
auto result = SSE_ColorBlend( Src, dst, BlendFactor, InvBlendFactor );
// Use inRange mask to determine which pixels will be
// background color or blended color
result = SSE_IfElseBlend( result, dst, inRange );
_mm_store_si128( bg, result );
}
};
auto x86_BlendPisels =
[ this, color ]( const int PosX, const int PosY, const int IY,
const int xStart, const int xEnd, const int RadiusSq,
const int BlendFactor, const int InvBlendFactor )
{
for( int ix = xStart; ix < xEnd; ++ix )
{
if((ix * ix)+(IY * IY) < RadiusSq )
{
const auto dst = GetPixel( PosX + ix, PosY + IY );
PutPixel( PosX + ix, PosY + IY,
color.Blend( dst, static_cast< unsigned char >( BlendFactor ) ) );
}
}
};
// Casting once per call instead of per loop iteration
const auto _x = static_cast< int >( pos.x );
const auto _y = static_cast< int >( pos.y );
const auto _size = static_cast< int >( radius );
const auto radSq = static_cast< int >( radius * radius );
const auto _blendFactor = static_cast< unsigned char >( blendFactor * 255.f );
// If any part on screen, calculate offsets
const auto bounds = CalculateGraphicsBoundary(
{ ( _x - _size ), ( _y - _size ), ( _x + _size ), ( _y + _size ) },
{ 0,0,ScreenWidth,ScreenHeight } );
// Preload SSE register with color, radius and _blendFactor
const __m128i mColor = _mm_set1_epi32( color.dword );
const __m128i mRadSq = _mm_set1_epi32( radSq );
const __m128i mBlendFactor = _mm_set1_epi16( _blendFactor );
const __m128i mInvBlendFactor = _mm_sub_epi16( _mm_set1_epi16( 255 ), mBlendFactor );
// SSE registers are 16 bytes wide, so need to start and end at a multiple of 16
// The x86 version will pick up the rest at the end of each row
const auto sse_xStart = CalculateAlignedBoundary( _x + ( bounds.left - _size) );
const auto sse_xEnd = CalculateAlignedBoundary( _x + ( bounds.right - _size ) );
const auto x86_xStart = sse_xEnd;
const auto x86_xEnd = _x + bounds.right - _size;
for( int iy = bounds.top - _size; iy < bounds.bottom - _size; ++iy )
{
SSE_BlendPixels( _x, _y, iy, sse_xStart - _x, sse_xEnd - _x, mRadSq, mColor, mBlendFactor, mInvBlendFactor );
x86_BlendPisels( _x, _y, iy, x86_xStart - _x, x86_xEnd - _x, radSq, _blendFactor, ( 255 - _blendFactor ) );
}
}