Compute Triple Junction dataset
Contents
Machine Learning to predict location of ice recrystallizationMay - July 2022 UGA and IGE internship M1 Statistics and Data Sciences (SSD) Renan MANCEAUX Supervisor : Thomas CHAUVE Data Computing |
8.2. Compute Triple Junction dataset#
import numpy as np
import pandas as pd
import sys
sys.path.append("../../scripts/")
import utils
from tqdm.notebook import tqdm
import pickle
import xarrayaita.aita as xa
from collections import Counter
from scipy.spatial import distance_matrix
name = 'CI09'
8.2.1. Loading data#
data = utils.load_data("../../data/for_learning_plus/"+name+".npy")
file = open('../../data/craft/'+name+'.xr', 'rb')
ds_data = pickle.load(file)
file.close()
8.2.2. Compute variables#
8.2.2.1. Compute map of TJ and distance#
maps = ds_data.aita.dist2eachTJ()
data['idTJ'] = np.argmin(np.array(maps), axis=2).flatten()
data['grainId'] = np.array(ds_data.grainId).flatten()
imshape = np.shape(ds_data.grainId)
ntj = len(data.idTJ.unique())
M = ds_data.aita.TJ_map()
8.2.2.2. Filter TJ to close to border#
tj_to_skip = []
for idtj in tqdm(np.unique(data.idTJ)):
coord = M[idtj][0:2]
if (coord[0]-4.5<0)|(coord[1]-4.5<0)|(coord[0]+4.5>imshape[1]-1)|(coord[1]+4.5>imshape[0]-1):
tj_to_skip.append(idtj)
8.2.2.3. Compute RX of each TJ (10%)#
RX = np.zeros((ntj))
for i in tqdm(data.idTJ.unique()):
if i in tj_to_skip :
RX[i] = np.nan
else:
m = np.array(maps[:,:,i]).flatten()
n = len(np.where(m < 10)[0])
if sum(data.loc[(np.where(m < 10)[0])].Y)>(n*10/100): #10%
RX[i] = 1
8.2.2.4. Compute schmid factor of each 3 grains#
schmid_tj = np.zeros((ntj,3))
for i in tqdm(range(2,5)) :
for j in range(ntj) :
if j in tj_to_skip :
schmid_tj[j][i-2] = np.nan
else:
id = int(M[j][i])
schmid_tj[j][i-2] = data.loc[(data.grainId == id),'schmid'].unique()
8.2.2.5. Compute schmid diff for each 3 GB#
diff_schmid_tj = np.zeros((ntj,3))
for i in tqdm(range(ntj)) :
if i in tj_to_skip :
diff_schmid_tj[i] = np.nan
else:
coord = M[i][0:2]
pts_y = np.array([coord[0]-1.5,coord[0]-0.5,coord[0]+0.5,coord[0]+1.5]).astype(int)
pts_x = np.array([coord[1]-1.5,coord[1]-0.5,coord[1]+0.5,coord[1]+1.5]).astype(int)
val = []
for x in pts_x:
for y in pts_y:
val.append(np.array((data.diff_schmid)).reshape(imshape)[x,y])
if len(np.unique(val))<3:
pts_y = np.append(pts_y,coord[0]-2.5).astype(int)
pts_y = np.append(pts_y,coord[0]+2.5).astype(int)
pts_x = np.append(pts_x,coord[1]-2.5).astype(int)
pts_x = np.append(pts_x,coord[1]+2.5).astype(int)
val = []
for x in pts_x:
for y in pts_y:
val.append(np.array((data.diff_schmid)).reshape(imshape)[x,y])
if len(np.unique(val))<3:
pts_y = np.append(pts_y,coord[0]-3.5).astype(int)
pts_y = np.append(pts_y,coord[0]+3.5).astype(int)
pts_x = np.append(pts_x,coord[1]-3.5).astype(int)
pts_x = np.append(pts_x,coord[1]+3.5).astype(int)
val = []
for x in pts_x:
for y in pts_y:
val.append(np.array((data.diff_schmid)).reshape(imshape)[x,y])
val_sort = sorted(val, key=Counter(val).get, reverse=True)
good_val= list(dict.fromkeys(val_sort))[0:3]
diff_schmid_tj[i] = good_val
8.2.2.6. Compute misorientation angle of each 3 GB#
misangle_tj = np.zeros((ntj,3))
for i in tqdm(range(ntj)) :
if i in tj_to_skip :
misangle_tj[i] = np.nan
else:
coord = M[i][0:2]
pts_y = np.array([coord[0]-1.5,coord[0]-0.5,coord[0]+0.5,coord[0]+1.5]).astype(int)
pts_x = np.array([coord[1]-1.5,coord[1]-0.5,coord[1]+0.5,coord[1]+1.5]).astype(int)
val = []
for x in pts_x:
for y in pts_y:
val.append(np.array((data.misangle)).reshape(imshape)[x,y])
if len(np.unique(val))<3:
pts_y = np.append(pts_y,coord[0]-2.5).astype(int)
pts_y = np.append(pts_y,coord[0]+2.5).astype(int)
pts_x = np.append(pts_x,coord[1]-2.5).astype(int)
pts_x = np.append(pts_x,coord[1]+2.5).astype(int)
val = []
for x in pts_x:
for y in pts_y:
val.append(np.array((data.misangle)).reshape(imshape)[x,y])
if len(np.unique(val))<3:
pts_y = np.append(pts_y,coord[0]-3.5).astype(int)
pts_y = np.append(pts_y,coord[0]+3.5).astype(int)
pts_x = np.append(pts_x,coord[1]-3.5).astype(int)
pts_x = np.append(pts_x,coord[1]+3.5).astype(int)
val = []
for x in pts_x:
for y in pts_y:
val.append(np.array((data.diff_schmid)).reshape(imshape)[x,y])
val_sort = sorted(val, key=Counter(val).get, reverse=True)
good_val= list(dict.fromkeys(val_sort))[0:3]
misangle_tj[i] = good_val
8.2.2.7. Compute volume ratio anisotropy of each TJ#
volratio_an_tj = np.zeros((ntj))
for i in tqdm(range(ntj)) :
if i in tj_to_skip :
volratio_an_tj[i] = np.nan
else:
coord = M[i][0:2]
volratio_an_tj[i] = np.array((data.volratio_an)).reshape(imshape)[(np.array(coord)[1]-0.5).astype(int),(np.array(coord)[0]-0.5).astype(int)]
8.2.2.8. Compute mean of craft variables for each TJ#
work_mean_tj = np.zeros((ntj))
eqStrain_mean_tj = np.zeros((ntj))
eqStress_mean_tj = np.zeros((ntj))
act_py_mean_tj = np.zeros((ntj))
act_pr_mean_tj = np.zeros((ntj))
for i in tqdm(range(ntj)) :
if i in tj_to_skip :
work_mean_tj[i] = np.nan
eqStrain_mean_tj[i] = np.nan
eqStress_mean_tj[i] = np.nan
act_py_mean_tj[i] = np.nan
act_pr_mean_tj[i] = np.nan
else:
coord = M[i][0:2]
pts_y = np.array([coord[0]-1.5,coord[0]-0.5,coord[0]+0.5,coord[0]+1.5]).astype(int)
pts_x = np.array([coord[1]-1.5,coord[1]-0.5,coord[1]+0.5,coord[1]+1.5]).astype(int)
val_w = []
val_strain = []
val_stress = []
val_py = []
val_pr = []
for x in pts_x:
for y in pts_y:
val_w.append(np.array((data.work)).reshape(imshape)[x,y])
val_strain.append(np.array((data.eqStrain)).reshape(imshape)[x,y])
val_stress.append(np.array((data.eqStress)).reshape(imshape)[x,y])
val_py.append(np.array((data.act_py)).reshape(imshape)[x,y])
val_pr.append(np.array((data.act_pr)).reshape(imshape)[x,y])
work_mean_tj[i] = np.mean(val_w)
eqStrain_mean_tj[i] = np.mean(val_strain)
eqStress_mean_tj[i] = np.mean(val_stress)
act_py_mean_tj[i] = np.mean(val_py)
act_pr_mean_tj[i] = np.mean(val_pr)
8.2.2.9. Compute Distance to other TJ#
Ma = ds_data.aita.TJ_map().T[0:2].T
dist = pd.DataFrame(distance_matrix(Ma,Ma))
dist2oTJ = np.zeros((ntj))
for i in tqdm(range(ntj)) :
if i in tj_to_skip :
dist2oTJ[i] = np.nan
else:
dist2oTJ[i] = np.array(dist[i].sort_values()[1:2])
8.2.2.10. Compute number of pixel of the 3 grains of each TJ#
nb_pix_g = []
for id in tqdm(np.unique(ds_data.grainId)) :
nb_pix_g.append(int(sum(sum(ds_data.grainId == id))))
nb_pix_g = pd.Series(nb_pix_g,index=np.unique(ds_data.grainId).astype(int))
nb_pix_g_tj = np.zeros((ntj,3))
for i in tqdm(range(ntj)) :
if i in tj_to_skip :
nb_pix_g_tj[i] = np.nan
else:
idTJ = M[i][2:]
nb_pix_g_tj[i] = np.array(nb_pix_g[np.array(idTJ)])
8.2.3. Building dataset#
TJ_data = pd.DataFrame(
(
RX,
schmid_tj.T[0],
schmid_tj.T[1],
schmid_tj.T[2],
diff_schmid_tj.T[0],
diff_schmid_tj.T[1],
diff_schmid_tj.T[2],
misangle_tj.T[0],
misangle_tj.T[1],
misangle_tj.T[2],
volratio_an_tj,
eqStrain_mean_tj,
eqStress_mean_tj,
act_pr_mean_tj,
act_py_mean_tj,
work_mean_tj,
dist2oTJ,
nb_pix_g_tj.T[0],
nb_pix_g_tj.T[1],
nb_pix_g_tj.T[2]
)
).T
TJ_data.columns=["RX","schmid1","schmid2","schmid3","diff_schmid1","diff_schmid2","diff_schmid3","misangle1","misangle2","misangle3",
"volratio_an","eqStrain","eqStress","act_pr","act_py","work","dist1neigh","nb_pix_g1","nb_pix_g2","nb_pix_g3"]
TJ_data
RX | schmid1 | schmid2 | schmid3 | diff_schmid1 | diff_schmid2 | diff_schmid3 | misangle1 | misangle2 | misangle3 | volratio_an | eqStrain | eqStress | act_pr | act_py | work | dist1neigh | nb_pix_g1 | nb_pix_g2 | nb_pix_g3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 0.433383 | 0.314354 | 0.464099 | 0.030716 | 0.119029 | 0.149745 | 0.726541 | 1.124480 | 0.706312 | 0.976792 | 0.018254 | 1.331305 | 0.000026 | 4.466442e-07 | 0.002944 | 21.377558 | 5824.0 | 3173.0 | 552.0 |
1 | 0.0 | 0.499819 | 0.433383 | 0.464099 | 0.066436 | 0.035720 | 0.030716 | 1.399579 | 1.130727 | 0.340875 | 0.964957 | 0.017129 | 1.155659 | 0.000018 | 1.703326e-08 | 0.001886 | 21.377558 | 5931.0 | 5824.0 | 552.0 |
2 | 1.0 | 0.056638 | 0.220494 | 0.244690 | 0.024196 | 0.188052 | 0.163855 | 1.397844 | 1.482392 | 0.242694 | 0.950161 | 0.011322 | 1.132815 | 0.000006 | 4.944418e-08 | 0.001001 | 141.014184 | 36321.0 | 1386.0 | 3667.0 |
3 | 1.0 | 0.491447 | 0.056638 | 0.314354 | 0.434809 | 0.177093 | 0.257716 | 0.636235 | 0.283226 | 0.357118 | 0.999528 | 0.020613 | 1.025976 | 0.000004 | 1.232579e-08 | 0.001333 | 41.231056 | 27290.0 | 36321.0 | 3173.0 |
4 | 1.0 | 0.491447 | 0.056638 | 0.326571 | 0.164876 | 0.434809 | 0.269933 | 0.636235 | 0.983226 | 0.397189 | 0.982284 | 0.004298 | 1.074021 | 0.000004 | 8.676976e-10 | 0.000441 | 61.400326 | 27290.0 | 36321.0 | 5481.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
57 | 0.0 | 0.066017 | 0.492388 | 0.437152 | 0.055236 | 0.426370 | 0.371135 | 1.131787 | 0.382646 | 0.764838 | 0.992830 | 0.028784 | 1.937496 | 0.000114 | 6.602019e-06 | 0.008137 | 27.202941 | 3422.0 | 967.0 | 3726.0 |
58 | 0.0 | 0.043031 | 0.195062 | 0.338734 | 0.295703 | 0.143672 | 0.152031 | 1.014145 | 1.250826 | 0.249974 | 0.994848 | 0.043997 | 2.038062 | 0.000110 | 1.538746e-05 | 0.011282 | 5.000000 | 3440.0 | 1015.0 | 3007.0 |
59 | 1.0 | 0.195062 | 0.066017 | 0.148617 | 0.046446 | 0.129045 | 0.082599 | 0.359894 | 0.233268 | 0.134345 | 0.999837 | 0.015061 | 1.956725 | 0.000011 | 7.114007e-06 | 0.005079 | 55.659680 | 1015.0 | 3422.0 | 20561.0 |
60 | 0.0 | 0.074013 | 0.043031 | 0.059130 | 0.030982 | 0.014883 | 0.016099 | 0.284868 | 0.250435 | 0.050341 | 0.999907 | 0.016632 | 1.084423 | 0.000032 | 3.118853e-07 | 0.002953 | 72.560320 | 2194.0 | 3440.0 | 16226.0 |
61 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
62 rows × 20 columns
8.2.3.1. Save New Dataset#
#np.save(file="../../data/TJ/TJ_"+name,arr=TJ_data)