# Binding energy in terms of nucleous weight A==x, is in this file
from numpy import *
from pylab import *

na_data = loadtxt('NucleousEnergy.dat').T
x,y = na_data
plot(x,y)
show()


# Seeting up the Design matrix
X=zeros((len(x),5))
X[:,0]=1        # all functions we need
X[:,1]=x
X[:,2]=x**(2/3)
X[:,3]=x**(-1/3)
X[:,4]=1./x


from sklearn.linear_model import LinearRegression

# Using scikit to get coefficients
lg = LinearRegression()
clf=lg.fit(X,y)
yt=clf.predict(X)   # widetilde(y)


# matrix inversion to find beta
Beta = dot(linalg.inv(X.T @ X) @ X.T, y)
# and then make the prediction
ytilde = X @ Beta


# comparing the regression with data and our regression with those from the library 

plot(x,yt,label='Regression library')
plot(x,y, label='Data')
plot(x,ytilde,label='Regression ours')
xlabel('A')
ylabel('Binding Energy')
legend(loc='best')
show()


from sklearn.metrics import mean_squared_error, r2_score, mean_squared_log_error, mean_absolute_error

# more information from library
print('The intercept alpha: \n', lg.intercept_)
print('Coefficient beta : \n', lg.coef_)
print('Our coefficients: \n', Beta)
# The mean squared error                               
print("Mean squared error: %.2f" % mean_squared_error(y, yt))
# Explained variance score: 1 is perfect prediction                                 
print('Variance score: %.2f' % r2_score(y, yt))
# Mean squared log error                                                        
print('Mean squared log error: %.2f' % mean_squared_log_error(y, yt) )
# Mean absolute error                                                           
print('Mean absolute error: %.2f' % mean_absolute_error(y, yt))

The intercept alpha: 
 5.294399745619717
Coefficient beta : 
 [ 0.00000000e+00 -2.96611194e-02  2.01719003e-01  1.08078025e+01
 -4.03097597e+01]
Our coefficients: 
 [ 5.29439975e+00 -2.96611194e-02  2.01719003e-01  1.08078025e+01
 -4.03097597e+01]
Mean squared error: 0.02
Variance score: 0.95
Mean squared log error: 0.00
Mean absolute error: 0.05


import pandas as pd
from IPython.display import display
data = {'First Name': ["Frodo", "Bilbo", "Aragorn II", "Samwise"],
        'Last Name': ["Baggins", "Baggins","Elessar","Gamgee"],
        'Place of birth': ["Shire", "Shire", "Eriador", "Shire"],
        'Date of Birth T.A.': [2968, 2890, 2931, 2980]
        }
data_pandas = pd.DataFrame(data)
display(data_pandas)

print('Standard Python print:', data)

Standard Python print: {'First Name': ['Frodo', 'Bilbo', 'Aragorn II', 'Samwise'], 'Last Name': ['Baggins', 'Baggins', 'Elessar', 'Gamgee'], 'Place of birth': ['Shire', 'Shire', 'Eriador', 'Shire'], 'Date of Birth T.A.': [2968, 2890, 2931, 2980]}


data_pandas = pd.DataFrame(data,index=['Frodo','Bilbo','Aragorn','Sam'])
display(data_pandas)


display(data_pandas.loc['Aragorn'])

First Name            Aragorn II
Last Name                Elessar
Place of birth           Eriador
Date of Birth T.A.          2931
Name: Aragorn, dtype: object


new_hobbit = {'First Name': ["Peregrin"],
              'Last Name': ["Took"],
              'Place of birth': ["Shire"],
              'Date of Birth T.A.': [2990]
              }
#data_pandas=data_pandas.append(pd.DataFrame(new_hobbit, index=['Pippin']))

data_pandas=pd.concat([data_pandas,pd.DataFrame(new_hobbit, index=['Pippin'])])
display(data_pandas)


import numpy as np
import pandas as pd
from IPython.display import display
np.random.seed(100)              # some arbitrary random seed to always start the same
# setting up a 10 x 5 matrix
rows = 10
cols = 5
a = np.random.randn(rows,cols)  # normal distributed random numbers (10x5)
df = pd.DataFrame(a)            # df is now panda DataFrame
display(df)
print(df.mean())   # easy to get mean of each column

our_mean = [sum(a[:,i])/rows for i in range(cols)]
print('our mean=', our_mean)

print(df.std())    # easy to get standard deviation
# Note that we have to use 1/(rows-1), which is more precise than 1/rows!
our_s = [ np.sqrt(sum((a[:,i]-our_mean[i])**2)/(rows-1)) for i in range(cols)]
print('our sigma=', our_s)

0   -0.175300
1    0.083527
2   -0.044334
3   -0.399836
4    0.331939
dtype: float64
our mean= [-0.1753003003007798, 0.08352721390288316, -0.044333972284362075, -0.39983564919591685, 0.33193916850842475]
0    1.069584
1    0.965548
2    1.018232
3    0.793167
4    0.918992
dtype: float64
our sigma= [1.069584393754908, 0.9655479229776867, 1.0182315502230195, 0.7931668855918225, 0.918992356527374]


df.columns = ['First', 'Second', 'Third', 'Fourth', 'Fifth']
df.index = np.arange(1,11)

display(df)
print('<Second>=', df['Second'].mean() )
print('Info of DataFrame=', df.info())
print(df.describe())

<Second>= 0.08352721390288316
<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 1 to 10
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   First   10 non-null     float64
 1   Second  10 non-null     float64
 2   Third   10 non-null     float64
 3   Fourth  10 non-null     float64
 4   Fifth   10 non-null     float64
dtypes: float64(5)
memory usage: 480.0 bytes
Info of DataFrame= None
           First     Second      Third     Fourth      Fifth
count  10.000000  10.000000  10.000000  10.000000  10.000000
mean   -0.175300   0.083527  -0.044334  -0.399836   0.331939
std     1.069584   0.965548   1.018232   0.793167   0.918992
min    -1.749765  -1.443217  -1.690617  -1.613579  -1.232435
25%    -0.522836  -0.633949  -0.713163  -0.785061   0.087887
50%    -0.280179   0.281930  -0.122282  -0.382187   0.463861
75%     0.441264   0.657041   0.861676  -0.205231   0.923602
max     1.618982   1.541605   1.361556   0.816847   1.470714


from pylab import plt, mpl
mpl.rcParams['font.family'] = 'serif'

#usual type plotting 
plt.plot(df.cumsum(), lw=2.0)

#using pandas makes it easier
df.cumsum().plot(lw=2.0, figsize=(10,6))
plt.show()

#using bars
df.plot.bar(figsize=(10,6), rot=15)
plt.show()


b = np.arange(16).reshape((4,4))
print(b)
df1 = pd.DataFrame(b)
print(df1)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
    0   1   2   3
0   0   1   2   3
1   4   5   6   7
2   8   9  10  11
3  12  13  14  15

	0	1	2	3	4
0	-1.749765	0.342680	1.153036	-0.252436	0.981321
1	0.514219	0.221180	-1.070043	-0.189496	0.255001
2	-0.458027	0.435163	-0.583595	0.816847	0.672721
3	-0.104411	-0.531280	1.029733	-0.438136	-1.118318
4	1.618982	1.541605	-0.251879	-0.842436	0.184519
5	0.937082	0.731000	1.361556	-0.326238	0.055676
6	0.222400	-1.443217	-0.756352	0.816454	0.750445
7	-0.455947	1.189622	-1.690617	-1.356399	-1.232435
8	-0.544439	-0.668172	0.007315	-0.612939	1.299748
9	-1.733096	-0.983310	0.357508	-1.613579	1.470714

	First	Second	Third	Fourth	Fifth
1	-1.749765	0.342680	1.153036	-0.252436	0.981321
2	0.514219	0.221180	-1.070043	-0.189496	0.255001
3	-0.458027	0.435163	-0.583595	0.816847	0.672721
4	-0.104411	-0.531280	1.029733	-0.438136	-1.118318
5	1.618982	1.541605	-0.251879	-0.842436	0.184519
6	0.937082	0.731000	1.361556	-0.326238	0.055676
7	0.222400	-1.443217	-0.756352	0.816454	0.750445
8	-0.455947	1.189622	-1.690617	-1.356399	-1.232435
9	-0.544439	-0.668172	0.007315	-0.612939	1.299748
10	-1.733096	-0.983310	0.357508	-1.613579	1.470714

Linear regresion¶

Linear regression details¶

Simple linear regression model using scikit-learn¶

Libraries¶

	First Name	Last Name	Place of birth	Date of Birth T.A.
0	Frodo	Baggins	Shire	2968
1	Bilbo	Baggins	Shire	2890
2	Aragorn II	Elessar	Eriador	2931
3	Samwise	Gamgee	Shire	2980