Multiple Precision Floating-Point Arithmetic

a package of computer routines written in C


The following source code is made freely available for non-commercial use.
It comes with absolutely no guarantees or assurances of any kind.  Use it at your own risk.

I apologize in advance for shortcomings in my programming skills and habits; and I request that users let me know about their successful applications of or improvements in these programs.

Other (professional) packages available:
in C: http://www.onlinecomputersciencedegree.com/resources/gnu-multiple-precision-arithmetic-library-gmp/ (updated 11/2012)
in Fortran:  http://www.nersc.gov/~dhbailey/mpdist/mpdist.html

Charles Schwartz
schwartz@physics.berkeley.edu


/* mppkg3.c   multiple precision floating point package; in C; version 3 */
/* March 3, 2002- October 13, 2003 - by Charles Schwartz, UC Berkeley */
/* 16 bits per word - unsigned short */
/* zeroth word is sign bit and exponent biased by 2^14 */
/* subsequent words are mantissa with leading 1-bit, else zero */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define MAX  10L  /* number of words in mantissa - at least 3 */
#define BITS 16   /* number of bits per word */
#define BIAS     0x4000U
#define SIGNBIT  0x8000U
#define MAXEXP   0x7FFFU
#define CHECKBIT 0x8000U  /* leading bit in first word */
#define BOTTOM   0x0000FFFFUL
#define HALF     0x00010000UL

#define SIZE 1000L

unsigned short fact[SIZE][MAX+1],Log2[MAX+1];

/* conversion, etc., routines */
int longtomp(long x, unsigned short *p);
int dbltomp(double A, unsigned short *p);
int mpput(unsigned short *pa,unsigned short *pb);
int mpcompare(unsigned short *pa,unsigned short *pb);
int mpprintdec(char *s,unsigned short *p);
/* basic arithmetic routines */
int mpaddsub(unsigned short *pa,int pm1,unsigned short *pb,unsigned short *pc);
int mpmult(unsigned short *pa,unsigned short *pb,unsigned short *pc);
int mpdiv(unsigned short *pa,unsigned short *pb,unsigned short *pc);
/* auxiliary routines for the above */
int mpadd(unsigned short *pa,unsigned short *pb,unsigned short *pc,long diff);
int mpsub(unsigned short *pa,unsigned short *pb,unsigned short *pc,long diff);
int mpadjust(unsigned short *p);
int mpshift(unsigned short *p, long diff);
int mpshift1r(unsigned short *p);
int intpart(unsigned short *p, long *pint);
int mpintpart(unsigned short *pa,unsigned short *pb);
/* common functions */
int mplog(unsigned short *pa,unsigned short *pb);
int mpexp(unsigned short *pa,unsigned short *pb);
int mpsqrt(unsigned short *pa, unsigned short *pb);
int mppi(unsigned short *p);
 

int main() /* demo */
{long i;
double x;
unsigned short a[MAX+1],b[MAX+1];
extern unsigned short fact[][];
printf("  MAX=%d,    SIZE=%d\n\n",MAX,SIZE);
longtomp(1L,fact[0]);
for(i=1;i<SIZE;i++)
  {longtomp(i,a);
   mpmult(a,fact[i-1],fact[i]);}
mpprintdec(" (SIZE-1)! = ",fact[SIZE-1]);
do{
printf("\n   Enter a floating point number (0=quit): ");
scanf("%lf",&x);
if(x == 0) break;
dbltomp(x,a);
mpprintdec("x=",a);
mpdiv(fact[0],a,b);
mpprintdec("1/x=",b);
mpdiv(fact[0],b,b);
mpaddsub(a,-1,b,b);
mpprintdec("x-1/(1/x)=",b);
mpsqrt(a,b);
mpprintdec("sqrt(x)=",b);
mpmult(b,b,b);
mpprintdec("sqrt(x)**2=",b);
mplog(a,b);
mpprintdec("log(x)=",b);
mpexp(a,b);
mpprintdec("exp(x)=",b);
mplog(b,b);
mpprintdec("log(exp(x))=",b);
}while(1);
return 0;
}
 

int longtomp(long x, unsigned short *p) /* convert long int to mp */
{long i;
unsigned long y;
*p = (unsigned short)(BIAS + 2*BITS);
for(i=1;i<=MAX;i++) {*(p+i) = 0;}
y = (unsigned long)x;
if(x < 0) {*p = (SIGNBIT | *p);
            y = (unsigned long) (-x);}
*(p+2) =(unsigned short) (BOTTOM & y);
y = y/HALF;
*(p+1) = (unsigned short) (BOTTOM & y);
for(i=3;i<=MAX;i++) {*(p+i)=0;}
mpadjust(p);
return 0;
}
 

int dbltomp(double A, unsigned short *p) /* convert double to mp */
{unsigned long x,y,topbit=0x80000000,expbits=0x7FF00000,mbits=0x000FFFFF;
long i, sign=0;
union {
  double dd;
  struct {
    unsigned long one;
    unsigned long two;
    } pair;
   }u;
if(A == 0) {longtomp(0L,p);
            return 0;}
u.dd=A;
x=u.pair.one;
if(x & topbit) {sign=1;}
y=(x & expbits) >> 20;
*p = (unsigned short) (y + (BIAS - 1023)+1);
if(sign) {*p = *p + SIGNBIT;}
x = ((x & mbits) << 11) + topbit;
y = u.pair.two;
x = x + (y >> 21);
*(p+2) = (unsigned short) (x & BOTTOM);
*(p+1) = (unsigned short) (x/HALF);
y = (y << 11);
if(MAX >= 3) {*(p+3) = (unsigned short) (y/HALF);}
if(MAX >= 4) {*(p+4) = (unsigned short) (y & BOTTOM);}
for(i=5;i<=MAX;i++) {*(p+i) = 0;}
return 0;
}

int mpput(unsigned short *pa,unsigned short *pb) /* put A into B */
{long i;
for(i=0;i<=MAX;i++) {*(pb+i) = *(pa+i);}
return 0;}
 

int mpcompare(unsigned short *pa, unsigned short *pb)/* compare magnitudes */
{long i,expa,expb;
if(*(pa+1) == 0)
  {if(*(pb+1) == 0)
     {return 0;}/* A = B */
     return -1;} /* A < B */
if(*(pb+1) == 0) return 1; /* A > B */
expa = *pa & MAXEXP;
expb = *pb & MAXEXP;
if(expa > expb) return 1;
if(expa < expb) return -1;
for(i=1;i<=MAX;i++)
  {if(*(pa+i) > *(pb+i)) return 1;
   if(*(pa+i) < *(pb+i)) return -1;
   }
return 0;
}

int mpprintdec(char *s,unsigned short *p) /* print as a decimal number */
{long i,j,k,kmax,exp,mouthful=100000L,x;
unsigned short b[MAX+1],c[MAX+1],dec[MAX+1];
kmax= (MAX*BITS*0.301)/5.;
longtomp(mouthful,dec);
 printf("%6s",s);
 if(*p & SIGNBIT) {printf(" -");}
      else {printf(" +");}
  b[0]= *p & MAXEXP;
 for(i=1;i<=MAX;i++) {b[i]=*(p+i);}
 if(b[1] == 0)
   {printf(" ZERO\n");
    return 0;}
 j=0;
 do
 {exp= (b[0] & MAXEXP) - BIAS;
 if(exp > 17)
    {mpdiv(b,dec,b);
     j++;}
  else if(exp < 0)
     {mpmult(b,dec,b);
     j--;}
  else {break;}
  }while(1);
 if(intpart(b,&x)) {return 1;}
 printf("%d.",x);
 longtomp(x,c);
 mpaddsub(b,-1,c,b);
 for(k=1;k<=kmax;k++)
   {if(b[1] == 0) break;
    mpmult(b,dec,b);
   intpart(b,&x);
   printf("%05d ",x);
   longtomp(x,c);
   mpaddsub(b,-1,c,b);
 }
 printf("E %d\n",5*j);
 return 0;
 }
 

/* Add or Subtract : sort out cases */
/* the int pm1 is either +1 or -1, signifying whether to ADD or SUBTRACT */
int mpaddsub(unsigned short *pa,int pm1,unsigned short *pb,unsigned short *pc)
{long expa,expb,relsign=1,choice,i;
if(*(pb+1) == 0)
  {mpput(pa,pc);
   return 0;}
if(*(pa+1) == 0)
  {mpput(pb,pc);
   if(pm1 < 0) {*pc = SIGNBIT ^ *pc;}
   return 0;}
if((*pa & SIGNBIT) != (*pb & SIGNBIT)){ relsign=-1;}
choice=pm1*relsign;
expa = (long)(*pa & MAXEXP);
expb = (long)(*pb & MAXEXP);
if(expa > expb) goto doab;
if(expb > expa) goto doba;
for(i=1;i<=MAX;i++)
  {if(*(pa+i) > *(pb+i)) goto doab;
   if(*(pb+i) > *(pa+i)) goto doba;
  }
if(choice == -1) /* they are equal */
    {*(pc+1)=0;
    return 0;}
doab: if(choice == 1) {mpadd(pa,pb,pc,expa-expb);}
      else {mpsub(pa,pb,pc,expa-expb);}
      return 0;
doba: if(choice == 1) {mpadd(pb,pa,pc,expb-expa);}
      else {mpsub(pb,pa,pc,expb-expa);}
      if(pm1 < 0) {*pc = (*pc ^ SIGNBIT);
      }
return 0;
}

 /* C=A*B */
int mpmult(unsigned short *pa,unsigned short *pb,unsigned short *pc)
{unsigned long c[MAX+2],x,y,z,carry;
long i,j,expa,expb,expc;
if((*(pa+1) == 0) | (*(pb+1) == 0) )
  { *(pc+1) = 0;
   return 0;}
for(i=0;i<=MAX+1;i++) {c[i]=0;}
for(i=1;i<=MAX;i++)
  {x=*(pa+i);
    for(j=1;j<=MAX+1-i; j++)
     {y=*(pb+j);
     c[i+j-1]+= (z=x*y)/HALF;
     c[i+j]+= BOTTOM &z;
     }}
carry=(c[MAX+1]+HALF/2)/HALF;
for(i=MAX; i>0;i--)
  { z=c[i]+carry;
   *(pc+i) = z & BOTTOM;
    carry = z/HALF;
   }
expa = (long)(*pa & MAXEXP);
expb = (long)(*pb & MAXEXP);
expc = expa + (expb - BIAS);
if(expc > (long)MAXEXP)
  {printf("Overflow in mpMULT\n");
    return 1;}
if(expc <= 0)
   {printf("Underflow in mpMULT\n");
    return -1;}
if ((*pa & SIGNBIT) == (*pb & SIGNBIT))
  {*(pc) =  (unsigned short) expc;}
else {*pc = (SIGNBIT | (unsigned short) expc);}
if(*(pc+1) & CHECKBIT) return 0;
mpadjust(pc);
return 0;
}
 

/* C = A/B   using Bailey's iteration for 1/B  */
int mpdiv(unsigned short *pa,unsigned short *pb,unsigned short *pc)
{long i,expa,expb,expc;
unsigned long y,z;
unsigned short a[MAX+1],b[MAX+1],x[MAX+1],one[MAX+1],c[MAX+1],d[MAX+1];
if(*(pa+1) == 0)
  { *(pc+1) = 0;
    return 0;}  /* 0/0 = 0 */
if(*(pb+1) == 0)
    {printf("divide by zero\n");
     return -2;}
for(i=0;i<=MAX;i++) {b[i]=*(pb+i);
                     a[i]=*(pa+i);
                     x[i]=0;
                     one[i]=0;}
a[0]=b[0]=BIAS;
one[0]=BIAS+1;
one[1]=( unsigned short)(HALF/2);
z=b[1];
y=(HALF*(HALF/2))/z;
if(y & HALF)
     {x[0]=BIAS + 2;
      x[1]=(unsigned short)(y/2);}
else {x[0]=BIAS+1;
      x[1]=(unsigned short)y;}
do
 {mpmult(b,x,c);
  mpaddsub(one,-1,c,c);
  mpmult(c,x,c);
  mpaddsub(x,+1,c,x);
 } while (((long)c[0] > (BIAS-BITS*(MAX+1)/2)) && (c[1] != 0));
mpmult(a,x,c);
mpmult(c,b,d);
mpaddsub(a,-1,d,d);
mpmult(d,x,d);
mpaddsub(c,+1,d,c);
for(i=1;i<=MAX;i++) {*(pc+i)=c[i];}
expa = (long)(*pa & MAXEXP);
expb = (long)(*pb & MAXEXP);
expc = (expa - expb) + (c[0] & MAXEXP);
if(expc > (long)MAXEXP)
  {printf("Overflow in mpDIV\n");
    return 1;}
if(expc <= 0)
   {printf("Underflow in mpDIV\n");
    return -1;}
if ((*pa & SIGNBIT) == (*pb & SIGNBIT))
  {*(pc) =  (unsigned short) expc;}
else {*pc = SIGNBIT | (unsigned short) expc;}
 return 0;
}
 
 

/* C = A + B, with sign of A, which is larger than B */
int mpadd(unsigned short *pa,unsigned short *pb,unsigned short *pc,long diff)
{long i;
unsigned long x,y,carry;
unsigned short temp[MAX+1], *pt;
if(diff >= BITS*MAX)
 {mpput(pa,pc);
     return 0;}
pt=&temp[0];
mpput(pb,pt);
mpshift(pt,diff);
carry=0;
for(i=MAX;i>0;i--)
  {x=*(pa+i);
   y=x+carry+*(pt+i);
   *(pc+i) = (unsigned short)( BOTTOM & y);
   carry = y/HALF;
   }
*pc=*pa;
 if(carry != 0) {mpshift1r(pc);}
return 0;
}

/* C = A - B, with sign of A, which is larger than B */
int mpsub(unsigned short *pa,unsigned short *pb,unsigned short *pc,long diff)
{long i;
unsigned long x,y,z,borrow;
unsigned short temp[MAX+1],*pt;
if(diff >= BITS*MAX)
 {mpput(pa,pc);
     return 0;}
pt=&temp[0];
for(i=1;i<=MAX;i++) {*(pt+i) = *(pb+i);}
mpshift(pt,diff);
borrow=0;
for(i=MAX;i>0;i--)
  {x=*(pa+i);
   y= *(pt+i)+borrow;
   if(x >= y) {z=x-y;
               borrow=0;}
   else {z = (x+HALF)-y;
         borrow = 1;}
   *(pc+i) = BOTTOM & z;
   }
*pc=*pa;
mpadjust(pc);
return 0;
}
 
 

int mpadjust (unsigned short *p) /* adjust for leading 1-bit */
{unsigned long x,y,push;
long n,k,i,exp;
unsigned short check=CHECKBIT,c[MAX+1];
if(*(p+1) & check ) return 0; /* ok */
for(n=1;n<=MAX;n++)
   {if(*(p+n) != 0 ) goto getit;
   }
   return 0;  /* it is zero */
getit: push=1;
 for(k=0;k<BITS;k++)
 {
  if(check & *(p+n)) {break;}
 check=check/2;
 push*=2;}  /* k counts number of leading zero bits */
for(i=n;i<MAX;i++)
    {x=*(p+i);
     x=(HALF*x + *(p+i+1))*push;
     y=  x/ HALF;
     c[i-n+1] = (unsigned short)(BOTTOM & y);
     }
  c[MAX-n+1]=(unsigned short)((*(p+MAX))*push & BOTTOM);
  for(i=MAX-n+2;i<=MAX;i++) {c[i]=0;}
  exp= (long) (*p & MAXEXP);
  exp-= k+BITS*(n-1);
  if(exp < 0)
     {printf("Underflow in mpadjust\n");
    return -1;}
  *p = (*p & SIGNBIT) | ((unsigned short)exp);
  for(i=1;i<=MAX;i++) {*(p+i) = c[i];}
  return 0;
  }

int mpshift(unsigned short *p, long diff) /* to right; disregard sign and exp */
{long n,k,i;
unsigned long x,y;
unsigned short c[MAX+1];
if(diff == 0) {return 0;}
n = diff/BITS;
k = diff % BITS;
for(i=MAX;i>0;i--)
  { if(i-n > 0)
    {x =  *(p+i-n);
    if(i-n > 1) {x+= (*(p+i-n-1))*HALF;}
     y = x >> k;
     c[i] = (unsigned short) (y & BOTTOM);
    }
    else {c[i] = 0;}
  }
for(i=1;i<=MAX;i++) {*(p+i) = c[i];}
return 0;
}

int mpshift1r(unsigned short *p) /* shift 1 bit right */
{unsigned long x;
long i,exp;
unsigned short c[MAX+1];
exp=MAXEXP & *p;
if(exp == (long)MAXEXP) {printf("overflow in mpshift1\n");
                    return 1;}
 exp++;
 *p = (unsigned short) ((*p & SIGNBIT) | exp);
 x=*(p+1);
 x=(x+HALF)/2;
 c[1] =(unsigned short)(BOTTOM & x);
 for(i=2;i<=MAX;i++)
    {x=*(p+i-1);
     x=(HALF*x + *(p+i))/2;
     c[i] =(unsigned short) (BOTTOM & x);
     }
 for(i=1;i<=MAX;i++) {*(p+i) = c[i];}
  return 0;
  }

/* get integral part, disregard sign */
int intpart(unsigned short *p, long *pint)
{long exp,dd;
unsigned long y;
exp = (*p & MAXEXP) - BIAS;
dd= 2*BITS-exp;
if(dd < 0 ) {printf("too big\n");
   return 1;}
   if(dd >= 32) {*pint = 0L;
                 return 0;}
 y= (*(p+1))*HALF + *(p+2);
 *pint=(long) ( y >> dd);
return 0;
}

int mpintpart(unsigned short *pa,unsigned short *pb) /*truncate toward zero */
{long exp,n,k,i;
unsigned long y;
exp = (MAXEXP & *pa) - BIAS;
if(exp >= MAX*BITS)
   {mpput(pa,pb);
      return 0;}
if((exp <= 0) | (*(pa+1) == 0))
   {for(i=0;i<=MAX;i++) {*(pb+i)=0;}
      return 0;}
n=exp/BITS;
k=exp%BITS;
for(i=0;i<=n;i++) {*(pb+i)=*(pa+i);}
y=*(pa+n+1);
y= y >> (BITS-k);
*(pb+n+1) = (unsigned short) (y << (BITS-k));
for(i=n+2;i<=MAX;i++) {*(pb+i)=0;}
return 0;
}

 int mplog(unsigned short *pa,unsigned short *pb) /*b = natural log(a) */
 {unsigned short c[MAX+1],one[MAX+1],sum[MAX+1],t[MAX+1],L2[MAX+1],u2[MAX+1];
 long n,i,exp;
 if((*pa & SIGNBIT) || (*(pa+1) == 0)) {printf("error in mpLOG\n");return 1;}
 longtomp(1L,one);
 longtomp(3L,c);
 mpdiv(one,c,t);
 mpput(t,sum);
 mpmult(t,t,u2);
 n=1;
 do
 {n+=2;
 longtomp(n,c);
 mpmult(t,u2,t);
 mpdiv(t,c,c);
 mpaddsub(sum,+1,c,sum);
 }while(c[0] > BIAS - (MAX+1)*BITS);
 sum[0]++;
 mpput(sum,L2);
 for(i=1;i<=MAX;i++) {t[i]=*(pa+i);}
 t[0]=BIAS;
 mpaddsub(one,-1,t,c);
 mpaddsub(one,+1,t,t);
 mpdiv(c,t,t);
 mpput(t,sum);
 mpmult(t,t,u2);
 n=1;
 do
 {n+=2;
 longtomp(n,c);
 mpmult(t,u2,t);
 mpdiv(t,c,c);
 mpaddsub(sum,+1,c,sum);
 if(c[1] == 0) break;
 }while(c[0] > BIAS - (MAX+1)*BITS);
 sum[0]++;
 exp= (MAXEXP & *pa) - BIAS;
 longtomp(exp,c);
 mpmult(c,L2,c);
 mpaddsub(c,-1,sum,c);
 mpput(c,pb);
 return 0;
 }
 

int mpexp(unsigned short *pa,unsigned short *pb) /* B = exp(A) */
{long n,i;
unsigned short c[MAX+1],d[MAX+1],sum[MAX+1],t[MAX+1];
static int iffirst=0;
extern unsigned short Log2[];
if(*(pa+1) == 0)
   {longtomp(1L,pb);
     return 0;}
if(iffirst == 0)
{
longtomp(2L,c);
mplog(c,Log2);
iffirst=1;}
mpdiv(pa,Log2,c);
c[0]= c[0] & MAXEXP;
mpintpart(c,t);
intpart(t,&n);
if(fabs(n) > 16380)
 {if (SIGNBIT & *pa)
    {longtomp(0L,pb);
      return 0;}
    printf("overflow in mpEXP\n");
    return 1;}
mpaddsub(c,-1,t,t);
mpmult(t,Log2,t);
if (SIGNBIT & *pa)
  {n = -n;
   t[0]= SIGNBIT + t[0];}
longtomp(1L,sum);
longtomp(1L,d);
i=0;
do
{i++;
longtomp(i,c);
mpmult(d,t,d);
mpdiv(d,c,d);
mpaddsub(sum,+1,d,sum);
if(d[1] == 0) break;
}while((long)(MAXEXP & d[0]) > BIAS - (MAX+1)*BITS);
sum[0]+=n;
for(i=0;i<=MAX;i++) {*(pb+i) = sum[i];}
return 0;
}
 

int mpsqrt(unsigned short *pa, unsigned short *pb) /* B = square root of A */
{unsigned short c[MAX+1],d[MAX+1];
long x,exp,exph,i;
unsigned long y;
double z;
if(*pa & SIGNBIT) {printf("negative in sqrt\n");
                   return -1;}
exp =(long)( (MAXEXP & *pa) - BIAS);
y=(unsigned long) *(pa+1);
x = (long) ((y*HALF + *(pa+2))/4);
exph=exp/2;
if(exp != 2*exph)
   {x=2*x;}
 z=x;
 x=(long) sqrt(z*65536.);
 longtomp(x,c);
 c[0]+= exph +1 - BITS-8;
 i=0;
 do
 {i++;
  mpmult(c,c,d);
  mpaddsub(pa,-1,d,d);
  mpdiv(d,c,d);
  d[0]--;
  mpaddsub(c,+1,d,c);
  if(i > 10) { printf("maxed out iterations in sqrt\n"); break;}
 }while(((long)(c[0]-(MAXEXP & d[0])) <= BITS*(MAX+1)/2) && (d[1] != 0));
 mpput(c,pb);
 return 0;
 }

 int mppi(unsigned short *p)  /* pi */
 {long k;
 int sign;
 unsigned short c[MAX+1],d[MAX+1],d2[MAX+1],temp[MAX+1];
 longtomp(2L,temp);
 mpsqrt(temp,d);
 mpaddsub(temp,+1,d,d);
 longtomp(3L,temp);
 mpsqrt(temp,temp);
 mpaddsub(d,+1,temp,d);
 longtomp(6L,temp);
 mpsqrt(temp,temp);
 mpaddsub(temp,+1,d,d);
 longtomp(1L,temp);
 mpdiv(temp,d,d);/* 1/( 2 + sq2 + sq3 + sq6) */
 mpmult(d,d,d2);
 longtomp(24L,temp);
 mpmult(temp,d,d);
 mpput(d,c);/* first term in series */
 k=3;
 sign=-1;
 do
 {longtomp(k,temp);
 mpmult(d,d2,d);
 mpdiv(d,temp,temp);
 mpaddsub(c,sign,temp,c);
 sign=-sign;
 k+=2;
 }while((long)(c[0]-d[0]) < BITS*(MAX+1));
 mpput(c,p);
 return 0;
 }