Code: Select all
void Update ( const float delta_time )
{
if ( alive_ )
{
position_ += speed_ * delta_time;
}
}
Code: Select all
void Update ( const float delta_time )
{
position_ += speed_ * delta_time;
}
Now the reason why this may be the case is that if you read up on modern cpu architecture, you will realise that cpus are already getting better and better at making complex calculations in a matter of nanoseconds while at the same time, getting worse at branching. Your if statements, switch statements and virtual function calls are becoming more expensive than just simply doing x + y. Their actual costs hasn't actually increased at all really but the cpu has a lot of optimisations built into it that relies on it knowing ahead of time which code to run. So even though branching itself hasn't become more expensive, it prevents you from using so many of the optimisations the cpu uses to increase efficiency.
The advantage of the second example I gave(with the branching code) is that you can skip the unnecessary calculations since the entity is dead. In my tests, if you have a whole lot of entities and 90% of them are dead, the branching code is actually much faster. At the same time however, that's not representative to real world scenarios and in my games personally, the number of dead entities in my buffer is never lower than 50%. For me, doing a check on each and every entity to see if they're dead is usually not as fast as just doing the calculations for all of them.
Don't get me wrong: You can't make a real program that has no branching in it. You still need your if statements, switch statements and virtual function calls if you wish to do the most basic of user input. Just try and watch out for unnecessary ones where just doing the calculations is faster than doing a check with it.
I made a benchmark to help prove my point. Try running it on your system and see if I'm right or just spouting nonsense.
Code: Select all
#include <iostream>
#include <string>
#include <cstdint>
#include <vector>
#include <chrono>
#include <random>
#include <memory>
#include <algorithm>
#include <limits>
#include <fstream>
#include <functional>
#include <future>
#include <type_traits>
#include <array>
#include <unordered_map>
class Vector3
{
using Float = float;
Float x_ { };
Float y_ { };
Float z_ { };
public:
Vector3 ( ) = default;
constexpr explicit Vector3 ( const Float x, const Float y, const Float z ) noexcept : x_ { x }, y_ { y }, z_ { z }
{
}
Vector3& operator+=( const Vector3 vector ) noexcept
{
x_ += vector.x_;
y_ += vector.y_;
z_ += vector.z_;
return *this;
}
constexpr Vector3 operator+( const Vector3 vector ) const noexcept
{
return Vector3 { x_ + vector.x_, y_ + vector.y_, z_ + vector.z_ };
}
Vector3& operator-=( const Vector3 vector ) noexcept
{
x_ -= vector.x_;
y_ -= vector.y_;
z_ -= vector.z_;
return *this;
}
constexpr Vector3 operator-( const Vector3 vector ) const noexcept
{
return Vector3 { x_ - vector.x_, y_ - vector.y_, z_ - vector.z_ };
}
constexpr Vector3 operator-( ) const noexcept
{
return Vector3 { -x_, -y_, -z_ };
}
Vector3& operator*=( const Float scalar ) noexcept
{
x_ *= scalar;
y_ *= scalar;
z_ *= scalar;
return *this;
}
constexpr Vector3 operator*( const Float scalar ) const noexcept
{
return Vector3 { x_ * scalar, y_ * scalar, z_ * scalar };
}
Vector3& operator/=( const Float scalar ) noexcept
{
x_ /= scalar;
y_ /= scalar;
z_ /= scalar;
return *this;
}
constexpr Vector3 operator/( const Float scalar ) const noexcept
{
return Vector3 { x_ / scalar, y_ / scalar, z_ / scalar };
}
Float Length ( ) const noexcept
{
return std::sqrt ( LengthSquared ( ) );
}
Float LengthSquared ( ) const noexcept
{
return x_ * x_ + y_ * y_ + z_ * z_;
}
Vector3 Normalized ( ) const noexcept
{
const Float length = Length ( );
return Vector3 { x_ / length, y_ / length, z_ / length };
}
constexpr Float Dot ( const Vector3 vector ) const noexcept
{
return x_ * vector.x_ + y_ * vector.y_ + z_ * vector.z_;
}
constexpr Vector3 Cross ( const Vector3 vector ) const noexcept
{
return Vector3 { y_ * vector.z_ - z_ * vector.y_, z_ * vector.x_ - x_ * vector.z_, x_ * vector.y_ - y_ * vector.x_ };
}
constexpr Float X ( ) const noexcept
{
return x_;
}
constexpr Float Y ( ) const noexcept
{
return y_;
}
constexpr Float Z ( ) const noexcept
{
return z_;
}
};
class EntityWithoutBranch
{
public:
EntityWithoutBranch ( const bool alive, const Vector3 position, const Vector3 speed ) : position_ { position }
{
if ( alive )
{
speed_ = speed;
}
}
void Update ( const float delta_time )
{
position_ += speed_ * delta_time;
}
private:
Vector3 position_;
Vector3 speed_;
};
class EntityWithBranch
{
public:
EntityWithBranch ( const bool alive, const Vector3 position, const Vector3 speed ) : alive_ { alive }, position_ { position }, speed_ { speed }
{
}
void Update ( const float delta_time )
{
if ( alive_ )
{
position_ += speed_ * delta_time;
}
}
private:
bool alive_;
Vector3 position_;
Vector3 speed_;
};
using Entity = EntityWithBranch;
int main ( )
{
std::mt19937 engine;
std::uniform_int_distribution<int> rand_alive ( 0, 1 );
std::uniform_real_distribution<float> rand_pos( -5.0f, 5.0f );
constexpr std::size_t num_tests = 4000;
constexpr std::size_t num_entities = 100000;
constexpr std::size_t expected_active_entities = 50000;
std::size_t active_entities = 0;
std::vector<Entity> entities;
for ( std::size_t i = 0; i < num_entities; ++i )
{
bool alive = false;
if ( active_entities < expected_active_entities && rand_alive ( engine ) > 0 )
{
alive = true;
++active_entities;
}
entities.emplace_back ( alive, Vector3 { rand_pos ( engine ), rand_pos ( engine ), rand_pos ( engine ) }, Vector3 { rand_pos ( engine ), rand_pos ( engine ), rand_pos ( engine ) } );
}
// It's supposed to be called mean but I don't give a fuck!
unsigned long long lowest_time_taken = std::numeric_limits<unsigned long long>::max ( );
unsigned long long highest_time_taken = std::numeric_limits<unsigned long long>::min ( );
long double average = 0.0;
std::vector<unsigned long long> data_points;
data_points.resize ( num_tests );
for ( std::size_t i = 0; i < num_tests; ++i )
{
const auto start = std::chrono::steady_clock::now ( );
for ( auto& entity : entities )
{
entity.Update ( 0.5f );
}
const auto end = std::chrono::steady_clock::now ( );
const auto duration = end - start;
const auto time_taken = std::chrono::duration_cast< std::chrono::nanoseconds >( duration ).count ( );
data_points [ i ] = time_taken;
}
std::sort ( data_points.begin ( ), data_points.end ( ) );
constexpr std::size_t median_index = num_tests % 2 == 0 ? num_tests / 2 : num_tests / 2 + 1;
const auto median = data_points [ median_index ];
long double standard_deviation = 0.0l;
for ( std::size_t i = 0; i < num_tests; ++i )
{
const auto time_taken = data_points [ i ];
average += time_taken;
if ( time_taken < lowest_time_taken )
{
lowest_time_taken = time_taken;
}
if ( time_taken > highest_time_taken )
{
highest_time_taken = time_taken;
}
const long double distance_to_average = static_cast<long double>( time_taken ) - average;
standard_deviation += distance_to_average * distance_to_average;
}
average = average / num_tests;
standard_deviation = std::sqrt ( standard_deviation / num_tests );
std::cout << "Num entities: " << num_entities << std::endl;
std::cout << "Active entities: " << active_entities << std::endl;
std::cout << "Num tests: " << num_tests << std::endl;
std::cout << "Lowest time taken: " << lowest_time_taken << std::endl;
std::cout << "Highest time taken: " << highest_time_taken << std::endl;
std::cout << "Average: " << average << std::endl;
std::cout << "Median: " << median << std::endl;
std::cout << "Standard deviation: " << standard_deviation << std::endl;
system ( "pause" );
return 0;
}