# simple class to take care of statistically determined quantities
class Cumulants:
    def __init__(self):
        self.sum=0.0    # f_0 + f_1 +.... + f_N
        self.sqsum=0.0  # f_0^2 + f_1^2 +....+ f_N^2
        self.avg = 0.0  # I_best when many iterations, otherwise <f> = 1/N\sum_i f_i
        self.err = 0.0  # sigma of I_best when many iterations, otherwise sqrt( <f^2>-<f>^2 )/sqrt(N)
        self.chisq = 0.0
        self.weightsum=0.0 # \sum_i 1/sigma_i^2
        self.avgsum=0.0    # \sum_i <f>_i/sigma_i^2
        self.avg2sum=0.0   # \sum_i <f>_i^2/sigma_i^2

def SimpleMC(integrant, ndim, unit, maxeval, cum):
    """ integrant  -- function to integrate. It should be vectorized function!
        ndim       -- how many dimenions the function has
        unit       -- the size of the box we are using for integration [[0,L],[0,L]...]
        maxeval    -- total number of function evaluations we will perform
        cum        -- class Cumulants, which will hold all information we want to obatin.
    """
    nbatch=1000             # function will be evaluated in bacthes of 1000 evaluations at one time (for efficiency and storage issues)
    neval=0
    for nsamples in range(maxeval,0,-nbatch):  # loop over all_nsample evaluations in batches of nbatch
        # nsamples counts remaining number of function evaluations
        n = min(nbatch,nsamples)  # How many evaluations in this pass?
        xr = unit*random.random((n,ndim)) # generates 2-d array of random numbers in the interval [0,1)
        wfun = integrant(xr)  # n function evaluations required in single call
        neval += n  # We just added so many fuction evaluations
        cum.sum += sum(wfun)      # sum_i f_i = <fw> * neval
        cum.sqsum += sum(wfun*wfun)    # sum_i f_i^2 = <fw^2> * neval
    
    cum.avg = cum.sum/neval
    w0 = sqrt(cum.sqsum/neval)  # sqrt(<f^2>)
    cum.err = sqrt((w0-cum.avg)*(w0+cum.avg)/neval) # sqrt(sqsum-sum**2)
    cum.avg *= unit**ndim  # adding units if not [0,1] interval
    cum.err *= unit**ndim  # adding units if not [0,1] interval

def my_integrant2(x):
    """ For testing, we are integration the function
       1/(1-cos(x)*cos(y)*cos(z))/pi^3
       in the interval [0,pi]**3
    """
    #nbatch,ndim = shape(x)
    return 1.0/(1.0-cos(x[:,0])*cos(x[:,1])*cos(x[:,2]))/pi**3

from scipy import *
from numpy import *
unit=pi
ndim=3
maxeval=200000
exact = 1.3932  # exact value of the integral
    
cum = Cumulants()
    
SimpleMC(my_integrant2, ndim, pi, maxeval, cum)
print(cum.avg, '+-', cum.err, 'exact=', exact)
print('how many sigma away', abs(cum.avg-exact)/cum.err)

1.382434819789234 +- 0.011166599305649706 exact= 1.3932
how many sigma away 0.9640518045023243

class Grid:
    """Contains the grid points g_n(x) with x=[0...1], and g=[0...1]
       for Vegas integration. There are n-dim g_n functions.
       Constraints : g(0)=0 and g(1)=1.
    """
    def __init__(self, ndim, nbins):
        self.g = zeros((ndim,nbins+1))  
        # a bit dirty trick: We will later use also g[-1] in interpolation, which should be set to zero, hence
        # we allocate dimension nbins+1, rather than nbins
        self.ndim=ndim
        self.nbins=nbins
        # At the beginning we set g(x)=x
        # The grid-points are x_0 = 1/N, x_1 = 2/N, ... x_{N-1}=1.0. Note x_N=x_{-1}=0 
        # Note that g(0)=0, and we skip this point on the mesh.
        for idim in range(ndim): # over all 3 dimensions
            self.g[idim,:nbins] = arange(1,nbins+1)/float(nbins)

def Vegas_step1(integrant, unit, maxeval, nstart, nincrease, grid, cum):
    """
        integrant  -- function to integrate. It should be vectorized function!
        unit       -- the size of the box we are using for integration [[0,L],[0,L]...]
        maxeval    -- total number of function evaluations we will perform
        nstart     -- number of MC steps in the first iteration (before refining the grid)
        nincrease  -- how many points we add once grid is refined
        grid       -- the class which contains current grid, and is self-consistently refined.
        cum        -- class Cumulants, which will hold all information we want to obatin.    
    """
    ndim, nbins = grid.ndim,grid.nbins  # dimension of the integral, size of the grid for binning in each direction
    unit_dim = unit**ndim   # converts from unit cube integration to generalized cube with unit length
    nbatch=1000             # function will be evaluated in bacthes of 1000 evaluations at one time (for efficiency and storage issues)
    neval=0
    print ("""Vegas parameters:
       ndim = """+str(ndim)+"""
       unit = """+str(unit)+"""
       maxeval = """+str(maxeval)+"""
       nstart = """+str(nstart)+"""
       nincrease = """+str(nincrease)+"""
       nbins = """+str(nbins)+"""
       nbaths = """+str(nbatch)+"\n")

    bins = zeros((nbatch,ndim),dtype=int) # in which sampled bin does this point fall?
    
    all_nsamples = nstart
    for nsamples in range(all_nsamples,0,-nbatch):  # loop over all_nsample evaluations in batches of nbatch
        # nsamples is the number of points left to evaluate
        n = min(nbatch,nsamples)  # How many evaluations in this pass?
        # We are integrating f(g_1(x),g_2(y),g_3(z))*dg_1/dx*dg_2/dy*dg_3/dz dx*dy*dz
        # This is represented as  1/all_nsamples \sum_{x_i,y_i,z_i} f(g_1(x_i),g_2(y_i),g_3(z_i))*dg_1/dx*dg_2/dy*dg_3/dz
        #  where dg_1/dx = diff*NBINS
        wgh = zeros(nbatch)          # weights for each random point in the batch
        xr = random.random((n,ndim)) # generates 2-d array of random numbers in the interval [0,1)
        # This part takes quite a lot of time and it would be nice to rewrite with numba!
        for i in range(n):   # over the batch, which is n-items long
            weight = 1.0/all_nsamples    # weight in this point of the batch, i.e., one configuration
            for dim in range(ndim):      # over all dimension of the function ovaluation
                # We want to evaluate the function f at point g(x), i.e, f(g_1(x_1),g_2(x_2),...)
                # Here we transform the points x_1,x_2,x_3 -> g_1(x_1), g_2(x_2), g_3(x_3)
                # We hence want to evaluate g(x) ~ g(x[i]), where x is the random number and g is the grid function
                # The discretized g(t) is defined on the grid :
                #       t[-1]=0, t[0]=1/N, t[1]=2/N, t[2]=3/N ... t[N-1]=1.
                # We know that g(0)=0 and g(1)=1, so that g[-1]=0.0 and g[N-1]=1.0
                # To interpolate g at x, we first compute  i=int(x*N) and then we use linear interpolation
                # g(x) = g[i-1] + (g[i]-g[i-1])*(x*N-i)  ;  if i>0
                # g(x) =   0    + (g[0]-0)*(x*N-0)       ;  if i=0
                #
                pos = xr[i,dim]*nbins               # which grid would it fit ? (x*N)
                ipos = int(pos)                     # the grid position is ipos : int(x*N)==i    
                diff = grid.g[dim,ipos] - grid.g[dim,ipos-1]  # g[i]-g[i-1]
                # linear interpolation for g(x) : 
                xr[i,dim] = (grid.g[dim,ipos-1] + (pos-ipos)*diff)*unit # g(xr) ~ ( g[i-1]+(g[i]-g[i-1])*(x*N-i) )*[units]
                bins[i,dim]=ipos                    # remember in which bin this random number falls.
                weight *= diff*nbins                #  weight for this dimension is dg/dx = (g[i]-g[i-1])*N
                                                    # because dx = i/N - (i-1)/N = 1/N
            wgh[i] = weight # total weight is for this point (df/dx)*(df/dy)*(df/dz).../N_{samples}
        # Here we evaluate function f on all randomly generated x points above
        fx = integrant(xr)  # n function evaluations required in single call
        neval += n  # We just added so many fuction evaluations
        
        # Now we compute the integral as weighted average, namely, f(g(x))*dg/dx
        wfun = wgh * fx           # weight * function ~ f_i*w_i, here w_i has 1/N in its weight, hence actually we are 
                                  # evaluating  1/N * sum_i f_i*w_i
        cum.sum += sum(wfun)      # 1/N sum_i f_i*w_i = <fw>
        wfun *= wfun              # carefull : we need 1/N (f_i w_i)^2, while this gives (f_i w_i/N)^2
        cum.sqsum += sum(wfun)    # sum_i (f_i*w_i/N)^2 = <fw^2>/all_nsamples
                                  # 
    w0 = sqrt(cum.sqsum*all_nsamples)  # w0 = sqrt(<fw^2>)
    w1 = (w0 + cum.sum)*(w0 - cum.sum) # w1 = (w0^2 - <fw>^2) = (<fw^2>-<fw>^2)
    w = (all_nsamples-1)/w1            # w ~ 1/sigma_i^2 = (N-1)/(<fw^2>-<fw>^2)
    # Note that variance of the MC sampling is Var(monte-f) = (<f^2>-<f>^2)/N == 1/sigma_i^2
    cum.weightsum += w          # weightsum ~ \sum_i 1/sigma_i^2
    cum.avgsum += w*cum.sum     # avgsum    ~ \sum_i <fw>_i / sigma_i^2
    
    cum.avg = cum.avgsum/cum.weightsum     # I_best = (\sum_i <fw>_i/sigma_i^2 )/(\sum_i 1/sigma_i^2)
    cum.err = sqrt(1/cum.weightsum)        # err ~ sqrt(best sigma^2) = sqrt(1/(\sum_i 1/sigma_i^2))
     
    cum.avg *= unit**ndim
    cum.err *= unit**ndim

from scipy import *
from numpy import *

unit=pi
ndim=3
maxeval=200000
exact = 1.3932  # exact value of the integral
    
cum = Cumulants()
    
nbins=128
nstart =10000
nincrease=5000
grid = Grid(ndim,nbins)
random.seed(0)

#SimpleMC(my_integrant2, ndim, pi, maxeval, cum)
Vegas_step1(my_integrant2, pi, maxeval, nstart, nincrease, grid, cum)
print (cum.avg, '+-', cum.err, 'exact=', exact)

Vegas parameters:
       ndim = 3
       unit = 3.141592653589793
       maxeval = 200000
       nstart = 10000
       nincrease = 5000
       nbins = 128
       nbaths = 1000

1.3258378290394897 +- 0.02037303730823348 exact= 1.3932

def Vegas_step2(integrant, unit, maxeval, nstart, nincrease, grid, cum):
    ndim, nbins = grid.ndim,grid.nbins  # dimension of the integral, size of the grid for binning in each direction
    unit_dim = unit**ndim   # converts from unit cube integration to generalized cube with unit length
    nbatch=1000             # function will be evaluated in bacthes of 1000 evaluations at one time (for efficiency and storage issues)
    neval=0
    print ("""Vegas parameters:
       ndim = """+str(ndim)+"""
       unit = """+str(unit)+"""
       maxeval = """+str(maxeval)+"""
       nstart = """+str(nstart)+"""
       nincrease = """+str(nincrease)+"""
       nbins = """+str(nbins)+"""
       nbaths = """+str(nbatch)+"\n")

    bins = zeros((nbatch,ndim),dtype=int) # in which sampled bin does this point fall?
    
    all_nsamples = nstart
    fxbin = zeros((ndim,nbins))    #new2: after each iteration we reset the average function being binned
    for nsamples in range(all_nsamples,0,-nbatch):  # loop over all_nsample evaluations in batches of nbatch
        n = min(nbatch,nsamples)  # How many evaluations in this pass?
        # We are integrating f(g_1(x),g_2(y),g_3(z))*dg_1/dx*dg_2/dy*dg_3/dz dx*dy*dz
        # This is represented as  1/all_nsamples \sum_{x_i,y_i,z_i} f(g_1(x_i),g_2(y_i),g_3(z_i))*dg_1/dx*dg_2/dy*dg_3/dz
        #  where dg_1/dx = diff*NBINS
        wgh = zeros(nbatch)          # weights for each random point in the batch
        xr = random.random((n,ndim)) # generates 2-d array of random numbers in the interval [0,1)
        for i in range(n):
            weight = 1.0/all_nsamples
            for dim in range(ndim):
                # We want to evaluate the function f at point g(x), i.e, f(g_1(x),g_2(y),...)
                # Here we transform the points x,y,z -> g_1(x), g_2(y), g_3(z)
                # We hence want to evaluate g(x) ~ g(x[i]), where x is the random number and g is the grid function
                # The discretized g(t) is defined on the grid :
                #       t[-1]=0, t[0]=1/N, t[1]=2/N, t[2]=3/N ... t[N-1]=1.
                # We know that g(0)=0 and g(1)=1, so that g[-1]=0.0 and g[N-1]=1.0
                # To interpolate g at x, we first compute  i=int(x*N) and then we use linear interpolation
                # g(x) = g[i-1] + (g[i]-g[i-1])*(x*N-i)  ;  if i>0
                # g(x) =   0    + (g[0]-0)*(x*N-0)       ;  if i=0
                #
                pos = xr[i,dim]*nbins               # which grid would it fit ? (x*N)
                ipos = int(pos)                     # the grid position is ipos : int(x*N)==i
                diff = grid.g[dim,ipos] - grid.g[dim,ipos-1]  # g[i]-g[-1]
                # linear interpolation for g(x) : 
                xr[i,dim] = (grid.g[dim,ipos-1] + (pos-ipos)*diff)*unit # g(xr) ~ ( g[i-1]+(g[i]-g[i-1])*(x*N-i) )*[units]
                bins[i,dim]=ipos                    # remember in which bin this random number falls.
                weight *= diff*nbins                #  weight for this dimension is dg/dx = (g[i]-g[i-1])*N
                                                    # because dx = i/N - (i-1)/N = 1/N
            wgh[i] = weight # total weight is  (df/dx)*(df/dy)*(df/dx).../N_{samples}
        # Here we evaluate function f on all randomly generated x points above
        fx = integrant(xr)  # n function evaluations required in single call
        neval += n  # We just added so many fuction evaluations

        # Now we compute the integral as weighted average, namely, f(g(x))*dg/dx
        wfun = wgh * fx           # weight * function ~ f_i*w_i
        cum.sum += sum(wfun)      # sum_i f_i*w_i = <fw>
        wfun *= wfun              # carefull : this is like  (f_i * w_i/N)^2 hence  1/N (1/N (f_i*w_i)^2)
        cum.sqsum += sum(wfun)    # sum_i (f_i*w_i)^2 = <fw^2>/all_nsamples
                                  # 
        for dim in range(ndim):   #new2
            # projection of the sampled function^2 to each dimension, which will be used to improve the grid.
            for i in range(n):    #new2
                # sum_{point,dimension_i} fxbin[dim,x_i] += |f(g_1(x_1),g_2(x_2),...)*dg_1/dx*dg_2/dx...|^2
                fxbin[dim, bins[i,dim] ] += wfun[i] #new2: just bin the function f. We saved the bin position before.
            
    w0 = sqrt(cum.sqsum*all_nsamples)  # w0 = sqrt(<fw^2>)
    w1 = (w0 + cum.sum)*(w0 - cum.sum) # w1 = (w0^2 - <fw>^2) = (<fw^2>-<fw>^2)
    w = (all_nsamples-1)/w1            # w ~ 1/sigma_i^2 = (N-1)/(<fw^2>-<fw>^2)
    # Note that variance of the MC sampling is Var(monte-f) = (<f^2>-<f>^2)/N == 1/sigma_i^2
    cum.weightsum += w          # weightsum ~ \sum_i 1/sigma_i^2
    cum.avgsum += w*cum.sum     # avgsum    ~ \sum_i <fw>_i / sigma_i^2
    
    cum.avg = cum.avgsum/cum.weightsum     # I_best = (\sum_i <fw>_i/sigma_i^2 )/(\sum_i 1/sigma_i^2)
    cum.err = sqrt(1/cum.weightsum)        # err ~ sqrt(best sigma^2) = sqrt(1/(\sum_i 1/sigma_i^2))
     
    cum.avg *= unit**ndim
    cum.err *= unit**ndim
    
    return fxbin

from scipy import *
from numpy import *

unit=pi
ndim=3
maxeval=200000
exact = 1.3932  # exact value of the integral
    
cum = Cumulants()
    
nbins=128
nstart =10000
nincrease=5000
grid = Grid(ndim,nbins)

#SimpleMC(my_integrant2, ndim, pi, maxeval, cum)
fxbin = Vegas_step2(my_integrant2, pi, maxeval, nstart, nincrease, grid, cum)
print(cum.avg, '+-', cum.err, 'exact=', exact)

Vegas parameters:
       ndim = 3
       unit = 3.141592653589793
       maxeval = 200000
       nstart = 10000
       nincrease = 5000
       nbins = 128
       nbaths = 1000

1.380557370232699 +- 0.03849036529933611 exact= 1.3932

from pylab import *
%matplotlib inline

plot(fxbin[0])
plot(fxbin[1])
plot(fxbin[2])
#xlim([0,128])
ylim([0,1e-7])
show()

def Smoothen(fxbin):
    (ndim,nbins) = shape(fxbin)
    final = zeros(shape(fxbin))
    for idim in range(ndim):
        fxb = copy(fxbin[idim,:]) # widetilde{f}(x) copied
        #**** smooth the widetilde{f} value stored for each bin ****
        # f[i] <- (f[i+1]+f[i]+f[i-1])/3.
        fxb[:nbins-1] += fxbin[idim,1:nbins]
        fxb[1:nbins]  += fxbin[idim,:nbins-1]
        fxb[1:nbins-1] *= 1/3.
        fxb[0] *= 1/2.
        fxb[nbins-1] *= 1/2.
        norm = sum(fxb)
        if( norm == 0 ):
            print ('ERROR can not refine the grid with zero grid function')
            return # can not refine the grid if the function is zero.
        fxb *= 1.0/norm         # we normalize the function.
        # Note that normalization is such that the sum is 1.
        # And than we take the square root of abs value just in case the value is negative at a point.
        final[idim,:] = sqrt(abs(fxb))
    return final

from pylab import *
%matplotlib inline

imp = Smoothen(fxbin)

plot(imp[0])
plot(imp[1])
plot(imp[2])
xlim([0,128])
show()

class Grid:
    """Contains the grid points g_n(x) with x=[0...1], and g=[0...1]
       for Vegas integration. There are n-dim g_n functions.
       Constraints : g(0)=0 and g(1)=1.
    """
    def __init__(self, ndim, nbins):
        self.g = zeros((ndim,nbins+1))  
        # a bit dirty trick: We will later use also g[-1] in interpolation, which should be set to zero, hence
        # we allocate dimension nbins+1, rather than nbinx
        self.ndim=ndim
        self.nbins=nbins
        # At the beginning we set g(x)=x
        # The grid-points are x_0 = 1/N, x_1 = 2/N, ... x_{N-1}=1.0. 
        # Note that g(0)=0, and we skip this point on the mesh.
        for idim in range(ndim):
            self.g[idim,:nbins] = arange(1,nbins+1)/float(nbins)
            
    def RefineGrid(self, imp):
        # imp[idim,ibin] is the input function \widetilde{f}(g) from which we construct the grid
        (ndim,nbins) = shape(imp)
        gnew = zeros((ndim,nbins+1))
        for idim in range(ndim):
            avgperbin = sum(imp[idim,:])/nbins
            #**** redefine the size of each bin  ****
            newgrid = zeros(nbins)
            cur=0.0
            newcur=0.0
            thisbin = 0.0
            ibin = -1
            # we are trying to determine
            #   Int[ f(g) dg, {g, g_{i-1},g_i}] == I/N_g
            #   where I == avgperbin
            for newbin in range(nbins-1):  # all but the last bin, which is 1.0
                while (thisbin < avgperbin) :
                    ibin+=1
                    thisbin += imp[idim,ibin] # adding widetilde{f}(g)
                    prev = cur               # g^old_{l-1}
                    cur = self.g[idim,ibin]  # g^old_{l}
                # Explanation is in order : 
                #   g^new_l should be somewhere between g^old_l and g^old_{l-1}, because if we add the last point
                #      we exceeded I/Ng value, while withouth the last point, we do not have enough. Hence need to interpolate.
                #   prev    -- g^{old}_{l-1}
                #   cur     -- g^{old}_l
                #   thisbin -- Sm = f_{l-k}+.... +f_{l-2}+f_{l-1}+f_l
                #   we know that  Sm is just a bit more than we need, i.e., I/N_g, hence we need to compute how much more
                #   using linear interpolation :
                #   g^{new} = g_l - (g_l-g_{l-1}) * (f_{l-k}+....+f_{l-2}+f_{l-1}+f_l - I/N_g)/f_l
                #    clearly
                #         if I/N_g == f_{l-k}+....+f_{l-2}+f_{l-1}+f_l
                #            we will get g^{new} = g_l
                #     and if I/N_g == f_{l-k}+....+f_{l-2}+f_{l-1}
                #            we will get g^{new} = g_{l-1}
                #     and if I/N_g  is between the two possibilities, we will get linear interpolation between
                #     g_{l-1} and g_l
                #     
                thisbin -= avgperbin   # thisbin <- (f_{l-k}+....+f_{l-2}+f_{l-1}+f_l - I/N_g)
                dx=thisbin/imp[idim,ibin] # dx is between 0 and 1
                # cur=g_l and prev=g_{l-1} and dx is the fraction of each we need to take for linear interpolation
                newgrid[newbin] = cur + (prev-cur)*dx # linear interpolation between g_l and g_{l-1}

            newgrid[nbins-1]=1.0
            gnew[idim,:nbins]= newgrid
        self.g = gnew
        return gnew

from scipy import *
from numpy import *

unit=pi
ndim=3
maxeval=200000
exact = 1.3932  # exact value of the integral
    
cum = Cumulants()
    
nbins=128
nstart =10000
nincrease=5000

grid = Grid(ndim,nbins)

fxbin = Vegas_step2(my_integrant2, pi, maxeval, nstart, nincrease, grid, cum)
imp = Smoothen(fxbin)
grid.RefineGrid(imp)
print(cum.avg, '+-', cum.err, 'exact=', exact)

plot(grid.g[0,:nbins])
plot(grid.g[1,:nbins])
plot(grid.g[2,:nbins])
xlim([0,128])
show()

Vegas parameters:
       ndim = 3
       unit = 3.141592653589793
       maxeval = 200000
       nstart = 10000
       nincrease = 5000
       nbins = 128
       nbaths = 1000

1.3309249724420185 +- 0.024495231851185392 exact= 1.3932

def Vegas_step3(integrant, unit, maxeval, nstart, nincrease, grid, cum):
    ndim, nbins = grid.ndim,grid.nbins  # dimension of the integral, size of the grid for binning in each direction
    unit_dim = unit**ndim   # converts from unit cube integration to generalized cube with unit length
    nbatch=1000             # function will be evaluated in bacthes of 1000 evaluations at one time (for efficiency and storage issues)
    neval=0
    print ("""Vegas parameters:
       ndim = """+str(ndim)+"""
       unit = """+str(unit)+"""
       maxeval = """+str(maxeval)+"""
       nstart = """+str(nstart)+"""
       nincrease = """+str(nincrease)+"""
       nbins = """+str(nbins)+"""
       nbaths = """+str(nbatch)+"\n")

    bins = zeros((nbatch,ndim),dtype=int) # in which sampled bin does this point fall?
    
    
    all_nsamples = nstart
    for iter in range(1000):         # NEW in step 3
        wgh = zeros(nbatch)            # weights for each random point in the batch
        fxbin = zeros((ndim,nbins))    # after each iteration we reset the average function being binned
        for nsamples in range(all_nsamples,0,-nbatch):  # loop over all_nsample evaluations in batches of nbatch
            n = min(nbatch,nsamples)  # How many evaluations in this pass?
            # We are integrating f(g_1(x),g_2(y),g_3(z))*dg_1/dx*dg_2/dy*dg_3/dz dx*dy*dz
            # This is represented as  1/all_nsamples \sum_{x_i,y_i,z_i} f(g_1(x_i),g_2(y_i),g_3(z_i))*dg_1/dx*dg_2/dy*dg_3/dz
            #  where dg_1/dx = diff*NBINS
            xr = random.random((n,ndim)) # generates 2-d array of random numbers in the interval [0,1)
            for i in range(n):
                weight = 1.0/all_nsamples
                for dim in range(ndim):
                    # We want to evaluate the function f at point g(x), i.e, f(g_1(x),g_2(y),...)
                    # Here we transform the points x,y,z -> g_1(x), g_2(y), g_3(z)
                    # We hence want to evaluate g(x) ~ g(x[i]), where x is the random number and g is the grid function
                    # The discretized g(t) is defined on the grid :
                    #       t[-1]=0, t[0]=1/N, t[1]=2/N, t[2]=3/N ... t[N-1]=1.
                    # We know that g(0)=0 and g(1)=1, so that g[-1]=0.0 and g[N-1]=1.0
                    # To interpolate g at x, we first compute  i=int(x*N) and then we use linear interpolation
                    # g(x) = g[i-1] + (g[i]-g[i-1])*(x*N-i)  ;  if i>0
                    # g(x) =   0    + (g[0]-0)*(x*N-0)       ;  if i=0
                    #
                    pos = xr[i,dim]*nbins               # which grid would it fit ? (x*N)
                    ipos = int(pos)                     # the grid position is ipos : int(x*N)==i
                    diff = grid.g[dim,ipos] - grid.g[dim,ipos-1]  # g[i]-g[i-1]
                    # linear interpolation for g(x) : 
                    xr[i,dim] = (grid.g[dim,ipos-1] + (pos-ipos)*diff)*unit # g(xr) ~ ( g[i-1]+(g[i]-g[i-1])*(x*N-i) )*[units]
                    bins[i,dim]=ipos                    # remember in which bin this random number falls.
                    weight *= diff*nbins                #  weight for this dimension is dg/dx = (g[i]-g[i-1])*N
                                                        # because dx = i/N - (i-1)/N = 1/N
                wgh[i] = weight # total weight is  (df/dx)*(df/dy)*(df/dx).../N_{samples}
            
            # Here we evaluate function f on all randomly generated x points above
            fx = integrant(xr)  # n function evaluations required in single call
            neval += n  # We just added so many fuction evaluations
            
            # Now we compute the integral as weighted average, namely, f(g(x))*dg/dx
            wfun = wgh * fx           # weight * function ~ f_i*w_i            
            cum.sum += sum(wfun)      # sum_i f_i*w_i = <fw>
            wfun *= wfun              # carefull : this is like  (f_i * w_i/N)^2 hence  1/N (1/N (f_i*w_i)^2)
            cum.sqsum += sum(wfun)    # sum_i (f_i*w_i)^2 = <fw^2>/all_nsamples
                                      # 
            for dim in range(ndim):   #new2
                # Here we make a better approximation for the function, which we are integrating.
                for i in range(n):    #new2
                    fxbin[dim, bins[i,dim] ] += wfun[i] #new2: just bin the function f. We saved the bin position before.
            
        w0 = sqrt(cum.sqsum*all_nsamples)  # w0 = sqrt(<fw^2>)
        w1 = (w0 + cum.sum)*(w0 - cum.sum) # w1 = (w0^2 - <fw>^2) = (<fw^2>-<fw>^2)
        w = (all_nsamples-1)/w1            # w ~ 1/sigma_i^2 = (N-1)/(<fw^2>-<fw>^2)
        # Note that variance of the MC sampling is Var(monte-f) = (<f^2>-<f>^2)/N == 1/sigma_i^2
        cum.weightsum += w          # weightsum ~ \sum_i 1/sigma_i^2
        cum.avgsum += w*cum.sum     # avgsum    ~ \sum_i <fw>_i / sigma_i^2
        cum.avg2sum += w*cum.sum**2  # avg2cum   ~ \sum_i <fw>_i^2/sigma_i^2
        
        cum.avg = cum.avgsum/cum.weightsum     # I_best = (\sum_i <fw>_i/sigma_i^2 )/(\sum_i 1/sigma_i^2)
        cum.err = sqrt(1/cum.weightsum)        # err ~ sqrt(best sigma^2) = sqrt(1/(\sum_i 1/sigma_i^2))
     
        # NEW in this step3
        if iter>0:
            cum.chisq = (cum.avg2sum - 2*cum.avgsum*cum.avg + cum.weightsum*cum.avg**2)/iter
    
        print ("Iteration {:3d}: I= {:10.8f} +- {:10.8f}  chisq= {:10.8f} number of evaluations = {:7d} ".format(iter+1, cum.avg*unit_dim, cum.err*unit_dim, cum.chisq, neval))
        imp = Smoothen(fxbin)
        grid.RefineGrid(imp)
        
        cum.sum=0                    # clear the partial sum for the next step
        cum.sqsum=0
        all_nsamples += nincrease    # for the next time, increase the number of steps a bit
        if (neval>=maxeval): break
        
    cum.avg *= unit**ndim
    cum.err *= unit**ndim

from scipy import *
from numpy import *

unit=pi
ndim=3
maxeval=2000000
exact = 1.3932  # exact value of the integral
    
cum = Cumulants()
    
nbins=128
nstart =100000
nincrease=5000

grid = Grid(ndim,nbins)

random.seed(0)

Vegas_step3(my_integrant2, pi, maxeval, nstart, nincrease, grid, cum)

print (cum.avg, '+-', cum.err, 'exact=', exact, 'real error=', abs(cum.avg-exact)/exact)

plot(grid.g[0,:nbins])
plot(grid.g[1,:nbins])
plot(grid.g[2,:nbins])
show()

Vegas parameters:
       ndim = 3
       unit = 3.141592653589793
       maxeval = 2000000
       nstart = 100000
       nincrease = 5000
       nbins = 128
       nbaths = 1000

Iteration   1: I= 1.36762346 +- 0.01023309  chisq= 0.00000000 number of evaluations =  100000 
Iteration   2: I= 1.38493614 +- 0.00552045  chisq= 4.03725299 number of evaluations =  205000 
Iteration   3: I= 1.39107078 +- 0.00365694  chisq= 3.11889891 number of evaluations =  315000 
Iteration   4: I= 1.39220291 +- 0.00285603  chisq= 2.16116961 number of evaluations =  430000 
Iteration   5: I= 1.39391923 +- 0.00246919  chisq= 1.97837591 number of evaluations =  550000 
Iteration   6: I= 1.39327395 +- 0.00216911  chisq= 1.64253221 number of evaluations =  675000 
Iteration   7: I= 1.39237495 +- 0.00190896  chisq= 1.49574064 number of evaluations =  805000 
Iteration   8: I= 1.39084621 +- 0.00171092  chisq= 1.74779014 number of evaluations =  940000 
Iteration   9: I= 1.39124007 +- 0.00159307  chisq= 1.57911830 number of evaluations = 1080000 
Iteration  10: I= 1.39178313 +- 0.00148354  chisq= 1.50089423 number of evaluations = 1225000 
Iteration  11: I= 1.39129403 +- 0.00138447  chisq= 1.43499906 number of evaluations = 1375000 
Iteration  12: I= 1.39136789 +- 0.00129897  chisq= 1.30670608 number of evaluations = 1530000 
Iteration  13: I= 1.39192235 +- 0.00122906  chisq= 1.34277224 number of evaluations = 1690000 
Iteration  14: I= 1.39210209 +- 0.00116712  chisq= 1.25622458 number of evaluations = 1855000 
Iteration  15: I= 1.39289525 +- 0.00112200  chisq= 1.60155180 number of evaluations = 2025000 
1.3928952457852937 +- 0.0011219958675160147 exact= 1.3932 real error= 0.00021874405304789438

from numba import jit

@jit(nopython=True)
def SetFxbin(fxbin,bins,wfun):
    (n,ndim) = bins.shape
    for dim in range(ndim):
        # Here we make a better approximation for the function, which we are integrating.
        for i in range(n):
            fxbin[dim, bins[i,dim] ] += abs(wfun[i]) # just bin the function f. We saved the bin position before.                


def Vegas_step3b(integrant, unit, maxeval, nstart, nincrease, grid, cum):
    ndim, nbins = grid.ndim,grid.nbins  # dimension of the integral, size of the grid for binning in each direction
    unit_dim = unit**ndim   # converts from unit cube integration to generalized cube with unit length
    nbatch=1000             # function will be evaluated in bacthes of 1000 evaluations at one time (for efficiency and storage issues)
    neval=0
    print ("""Vegas parameters:
       ndim = """+str(ndim)+"""
       unit = """+str(unit)+"""
       maxeval = """+str(maxeval)+"""
       nstart = """+str(nstart)+"""
       nincrease = """+str(nincrease)+"""
       nbins = """+str(nbins)+"""
       nbaths = """+str(nbatch)+"\n")

    bins = zeros((nbatch,ndim),dtype=int) # in which sampled bin does this point fall?
    
    
    all_nsamples = nstart
    for iter in range(1000):         # NEW in step 3
        wgh = zeros(nbatch)            # weights for each random point in the batch
        fxbin = zeros((ndim,nbins))    # after each iteration we reset the average function being binned
        for nsamples in range(all_nsamples,0,-nbatch):  # loop over all_nsample evaluations in batches of nbatch
            n = min(nbatch,nsamples)  # How many evaluations in this pass?
            # We are integrating f(g_1(x),g_2(y),g_3(z))*dg_1/dx*dg_2/dy*dg_3/dz dx*dy*dz
            # This is represented as  1/all_nsamples \sum_{x_i,y_i,z_i} f(g_1(x_i),g_2(y_i),g_3(z_i))*dg_1/dx*dg_2/dy*dg_3/dz
            #  where dg_1/dx = diff*NBINS
            xr = random.random((n,ndim)) # generates 2-d array of random numbers in the interval [0,1)
            pos = xr*nbins                   # (x*N)
            bins = array(pos,dtype=int)      # which grid would it fit ? (x*N)
            wgh = ones(nbatch)/all_nsamples
            for dim in range(ndim):   
                # We want to evaluate the function f at point g(x), i.e, f(g_1(x),g_2(y),...)
                # Here we transform the points x,y,z -> g_1(x), g_2(y), g_3(z)
                # We hence want to evaluate g(x) ~ g(x[i]), where x is the random number and g is the grid function
                # The discretized g(t) is defined on the grid :
                #       t[-1]=0, t[0]=1/N, t[1]=2/N, t[2]=3/N ... t[N-1]=1.
                # We know that g(0)=0 and g(1)=1, so that g[-1]=0.0 and g[N-1]=1.0
                # To interpolate g at x, we first compute  i=int(x*N) and then we use linear interpolation
                # g(x) = g[i-1] + (g[i]-g[i-1])*(x*N-int(x*N))
                gi = grid.g[dim,bins[:,dim]]            # g[i]
                gm = grid.g[dim,bins[:,dim]-1]          # g[i-1]
                diff = gi - gm                          # g[i]-g[i-1]
                gx = gm + (pos[:,dim]-bins[:,dim])*diff # linear interpolation g(xr)
                xr[:,dim] = gx*unit                     # xr <- g(xr)
                wgh *= diff*nbins                       # wgh = prod_{dim} dg/dx
            
            # Here we evaluate function f on all randomly generated x points above
            fx = integrant(xr)  # n function evaluations required in single call
            neval += n  # We just added so many fuction evaluations
            
            # Now we compute the integral as weighted average, namely, f(g(x))*dg/dx
            wfun = wgh * fx           # weight * function ~ f_i*w_i            
            cum.sum += sum(wfun)      # sum_i f_i*w_i = <fw>
            wfun *= wfun              # carefull : this is like  (f_i * w_i/N)^2 hence  1/N (1/N (f_i*w_i)^2)
            cum.sqsum += sum(wfun)    # sum_i (f_i*w_i)^2 = <fw^2>/all_nsamples
                                      # 
            SetFxbin(fxbin,bins,wfun)
            #for dim in range(ndim):   #new2
            #    # Here we make a better approximation for the function, which we are integrating.
            #    for i in range(n):    #new2
            #        fxbin[dim, bins[i,dim] ] += wfun[i] #new2: just bin the function f. We saved the bin position before.
            
        w0 = sqrt(cum.sqsum*all_nsamples)  # w0 = sqrt(<fw^2>)
        w1 = (w0 + cum.sum)*(w0 - cum.sum) # w1 = (w0^2 - <fw>^2) = (<fw^2>-<fw>^2)
        w = (all_nsamples-1)/w1            # w ~ 1/sigma_i^2 = (N-1)/(<fw^2>-<fw>^2)
        # Note that variance of the MC sampling is Var(monte-f) = (<f^2>-<f>^2)/N == 1/sigma_i^2
        cum.weightsum += w          # weightsum ~ \sum_i 1/sigma_i^2
        cum.avgsum += w*cum.sum     # avgsum    ~ \sum_i <fw>_i / sigma_i^2
        cum.avg2sum += w*cum.sum**2  # avg2cum   ~ \sum_i <fw>_i^2/sigma_i^2
        
        cum.avg = cum.avgsum/cum.weightsum     # I_best = (\sum_i <fw>_i/sigma_i^2 )/(\sum_i 1/sigma_i^2)
        cum.err = sqrt(1/cum.weightsum)        # err ~ sqrt(best sigma^2) = sqrt(1/(\sum_i 1/sigma_i^2))
     
        # NEW in this step3
        if iter>0:
            cum.chisq = (cum.avg2sum - 2*cum.avgsum*cum.avg + cum.weightsum*cum.avg**2)/iter
    
        print ("Iteration {:3d}: I= {:10.8f} +- {:10.8f}  chisq= {:10.8f} number of evaluations = {:7d} ".format(iter+1, cum.avg*unit_dim, cum.err*unit_dim, cum.chisq, neval))
        imp = Smoothen(fxbin)
        grid.RefineGrid(imp)
        
        cum.sum=0                    # clear the partial sum for the next step
        cum.sqsum=0
        all_nsamples += nincrease    # for the next time, increase the number of steps a bit
        if (neval>=maxeval): break
        
    cum.avg *= unit**ndim
    cum.err *= unit**ndim

from scipy import *
from numpy import *

unit=pi
ndim=3
maxeval=2000000
exact = 1.3932  # exact value of the integral
    
cum = Cumulants()
    
nbins=128
nstart =100000
nincrease=5000

grid = Grid(ndim,nbins)

random.seed(0)

Vegas_step3b(my_integrant2, pi, maxeval, nstart, nincrease, grid, cum)

print (cum.avg, '+-', cum.err, 'exact=', exact, 'real error=', abs(cum.avg-exact)/exact)

plot(grid.g[0,:nbins])
plot(grid.g[1,:nbins])
plot(grid.g[2,:nbins])
show()

Vegas parameters:
       ndim = 3
       unit = 3.141592653589793
       maxeval = 2000000
       nstart = 100000
       nincrease = 5000
       nbins = 128
       nbaths = 1000

Iteration   1: I= 1.36762346 +- 0.01023309  chisq= 0.00000000 number of evaluations =  100000 
Iteration   2: I= 1.38493614 +- 0.00552045  chisq= 4.03725299 number of evaluations =  205000 
Iteration   3: I= 1.39107078 +- 0.00365694  chisq= 3.11889891 number of evaluations =  315000 
Iteration   4: I= 1.39220291 +- 0.00285603  chisq= 2.16116961 number of evaluations =  430000 
Iteration   5: I= 1.39391923 +- 0.00246919  chisq= 1.97837591 number of evaluations =  550000 
Iteration   6: I= 1.39327395 +- 0.00216911  chisq= 1.64253221 number of evaluations =  675000 
Iteration   7: I= 1.39237495 +- 0.00190896  chisq= 1.49574064 number of evaluations =  805000 
Iteration   8: I= 1.39084621 +- 0.00171092  chisq= 1.74779014 number of evaluations =  940000 
Iteration   9: I= 1.39124007 +- 0.00159307  chisq= 1.57911830 number of evaluations = 1080000 
Iteration  10: I= 1.39178313 +- 0.00148354  chisq= 1.50089423 number of evaluations = 1225000 
Iteration  11: I= 1.39129403 +- 0.00138447  chisq= 1.43499906 number of evaluations = 1375000 
Iteration  12: I= 1.39136789 +- 0.00129897  chisq= 1.30670608 number of evaluations = 1530000 
Iteration  13: I= 1.39192235 +- 0.00122906  chisq= 1.34277224 number of evaluations = 1690000 
Iteration  14: I= 1.39210209 +- 0.00116712  chisq= 1.25622458 number of evaluations = 1855000 
Iteration  15: I= 1.39289525 +- 0.00112200  chisq= 1.60155180 number of evaluations = 2025000 
1.3928952457852937 +- 0.0011219958675160147 exact= 1.3932 real error= 0.00021874405304789438

Monte Carlo integration by Vegas¶

Vegas Algorithm¶

Brute force Monte Carlo¶

Vegas¶

Homework¶