I would try the Fast Inverse Square Root trick.

It’s a way to get a very good approximation of `1/sqrt(n)`

without any branch, based on some bit-twiddling so not portable (notably between 32-bits and 64-bits platforms).

Once you get it, you just need to inverse the result, and takes the integer part.

There might be faster tricks, of course, since this one is a bit of a round about.

**EDIT**: let’s do it!

First a little helper:

```
// benchmark.h
#include <sys/time.h>
template <typename Func>
double benchmark(Func f, size_t iterations)
{
f();
timeval a, b;
gettimeofday(&a, 0);
for (; iterations --> 0;)
{
f();
}
gettimeofday(&b, 0);
return (b.tv_sec * (unsigned int)1e6 + b.tv_usec) -
(a.tv_sec * (unsigned int)1e6 + a.tv_usec);
}
```

Then the main body:

```
#include <iostream>
#include <cmath>
#include "benchmark.h"
class Sqrt
{
public:
Sqrt(int n): _number(n) {}
int operator()() const
{
double d = _number;
return static_cast<int>(std::sqrt(d) + 0.5);
}
private:
int _number;
};
// http://www.codecodex.com/wiki/Calculate_an_integer_square_root
class IntSqrt
{
public:
IntSqrt(int n): _number(n) {}
int operator()() const
{
int remainder = _number;
if (remainder < 0) { return 0; }
int place = 1 <<(sizeof(int)*8 -2);
while (place > remainder) { place /= 4; }
int root = 0;
while (place)
{
if (remainder >= root + place)
{
remainder -= root + place;
root += place*2;
}
root /= 2;
place /= 4;
}
return root;
}
private:
int _number;
};
// http://en.wikipedia.org/wiki/Fast_inverse_square_root
class FastSqrt
{
public:
FastSqrt(int n): _number(n) {}
int operator()() const
{
float number = _number;
float x2 = number * 0.5F;
float y = number;
long i = *(long*)&y;
//i = (long)0x5fe6ec85e7de30da - (i >> 1);
i = 0x5f3759df - (i >> 1);
y = *(float*)&i;
y = y * (1.5F - (x2*y*y));
y = y * (1.5F - (x2*y*y)); // let's be precise
return static_cast<int>(1/y + 0.5f);
}
private:
int _number;
};
int main(int argc, char* argv[])
{
if (argc != 3) {
std::cerr << "Usage: %prog integer iterations\n";
return 1;
}
int n = atoi(argv[1]);
int it = atoi(argv[2]);
assert(Sqrt(n)() == IntSqrt(n)() &&
Sqrt(n)() == FastSqrt(n)() && "Different Roots!");
std::cout << "sqrt(" << n << ") = " << Sqrt(n)() << "\n";
double time = benchmark(Sqrt(n), it);
double intTime = benchmark(IntSqrt(n), it);
double fastTime = benchmark(FastSqrt(n), it);
std::cout << "Number iterations: " << it << "\n"
"Sqrt computation : " << time << "\n"
"Int computation : " << intTime << "\n"
"Fast computation : " << fastTime << "\n";
return 0;
}
```

And the results:

```
sqrt(82) = 9
Number iterations: 4096
Sqrt computation : 56
Int computation : 217
Fast computation : 119
// Note had to tweak the program here as Int here returns -1 :/
sqrt(2147483647) = 46341 // real answer sqrt(2 147 483 647) = 46 340.95
Number iterations: 4096
Sqrt computation : 57
Int computation : 313
Fast computation : 119
```

Where as expected the *Fast* computation performs much better than the *Int* computation.

Oh, and by the way, `sqrt`

is faster ðŸ™‚