This code is based on an algorithm I found on Microchip's web site. Its for 32 bit integers, but can be adapted for 24 and 16 bit ints by changing the mask and result values.
Code:
typedef unsigned long uint32;
uint32 sqrt32(uint32 x)
{
uint32 mask = 0x1000;
uint32 result = 0x800;
while (mask > 0)
{
if ((result * result) > x)
{
result &= (~mask);
}
mask >>= 1;
result |= mask;
}
return result;
}