logo_ige
logo_uga

Machine Learning to predict location of ice recrystallization



May - July 2022
UGA and IGE internship
M1 Statistics and Data Sciences (SSD)

Renan MANCEAUX
Supervisor : Thomas CHAUVE

Data Computing


8.2. Compute Triple Junction dataset#

import numpy as np
import pandas as pd
import sys
sys.path.append("../../scripts/")
import utils

from tqdm.notebook import tqdm
import pickle

import xarrayaita.aita as xa
from collections import Counter
from scipy.spatial import distance_matrix
name = 'CI09'

8.2.1. Loading data#

data = utils.load_data("../../data/for_learning_plus/"+name+".npy")
file = open('../../data/craft/'+name+'.xr', 'rb')
ds_data = pickle.load(file)
file.close()

8.2.2. Compute variables#

8.2.2.1. Compute map of TJ and distance#

maps = ds_data.aita.dist2eachTJ()
data['idTJ'] = np.argmin(np.array(maps), axis=2).flatten()
data['grainId'] = np.array(ds_data.grainId).flatten()
imshape = np.shape(ds_data.grainId)
ntj = len(data.idTJ.unique())
M = ds_data.aita.TJ_map()

8.2.2.2. Filter TJ to close to border#

tj_to_skip = []
for idtj in tqdm(np.unique(data.idTJ)):
    coord =  M[idtj][0:2]
    if (coord[0]-4.5<0)|(coord[1]-4.5<0)|(coord[0]+4.5>imshape[1]-1)|(coord[1]+4.5>imshape[0]-1):
        tj_to_skip.append(idtj)

8.2.2.3. Compute RX of each TJ (10%)#

RX = np.zeros((ntj))
for i in tqdm(data.idTJ.unique()):
    if i in tj_to_skip :
        RX[i] = np.nan
    else:
        m = np.array(maps[:,:,i]).flatten()
        n = len(np.where(m < 10)[0])
        if sum(data.loc[(np.where(m < 10)[0])].Y)>(n*10/100): #10%
            RX[i] = 1

8.2.2.4. Compute schmid factor of each 3 grains#

schmid_tj = np.zeros((ntj,3))
for i in tqdm(range(2,5)) :
    for j in range(ntj) :
        if j in tj_to_skip :
            schmid_tj[j][i-2] = np.nan
        else:
            id = int(M[j][i])
            schmid_tj[j][i-2] = data.loc[(data.grainId == id),'schmid'].unique()   

8.2.2.5. Compute schmid diff for each 3 GB#

diff_schmid_tj = np.zeros((ntj,3))
for i in tqdm(range(ntj)) :
    if i in tj_to_skip :
        diff_schmid_tj[i] = np.nan
    else:
        coord = M[i][0:2]

        pts_y = np.array([coord[0]-1.5,coord[0]-0.5,coord[0]+0.5,coord[0]+1.5]).astype(int)
        pts_x = np.array([coord[1]-1.5,coord[1]-0.5,coord[1]+0.5,coord[1]+1.5]).astype(int)

        val = []
        for x in pts_x:
            for y in pts_y:
                val.append(np.array((data.diff_schmid)).reshape(imshape)[x,y])

        if len(np.unique(val))<3:

            pts_y = np.append(pts_y,coord[0]-2.5).astype(int)
            pts_y = np.append(pts_y,coord[0]+2.5).astype(int)
            pts_x = np.append(pts_x,coord[1]-2.5).astype(int)
            pts_x = np.append(pts_x,coord[1]+2.5).astype(int)

            val = []
            for x in pts_x:
                for y in pts_y:
                    val.append(np.array((data.diff_schmid)).reshape(imshape)[x,y])

        if len(np.unique(val))<3:

            pts_y = np.append(pts_y,coord[0]-3.5).astype(int)
            pts_y = np.append(pts_y,coord[0]+3.5).astype(int)
            pts_x = np.append(pts_x,coord[1]-3.5).astype(int)
            pts_x = np.append(pts_x,coord[1]+3.5).astype(int)

            val = []
            for x in pts_x:
                for y in pts_y:
                    val.append(np.array((data.diff_schmid)).reshape(imshape)[x,y])

        val_sort = sorted(val, key=Counter(val).get, reverse=True)
        good_val= list(dict.fromkeys(val_sort))[0:3]

        diff_schmid_tj[i] = good_val

8.2.2.6. Compute misorientation angle of each 3 GB#

misangle_tj = np.zeros((ntj,3))
for i in tqdm(range(ntj)) :
    if i in tj_to_skip :
        misangle_tj[i] = np.nan
    else:
        coord = M[i][0:2]

        pts_y = np.array([coord[0]-1.5,coord[0]-0.5,coord[0]+0.5,coord[0]+1.5]).astype(int)
        pts_x = np.array([coord[1]-1.5,coord[1]-0.5,coord[1]+0.5,coord[1]+1.5]).astype(int)

        val = []
        for x in pts_x:
            for y in pts_y:
                val.append(np.array((data.misangle)).reshape(imshape)[x,y])

        if len(np.unique(val))<3:

            pts_y = np.append(pts_y,coord[0]-2.5).astype(int)
            pts_y = np.append(pts_y,coord[0]+2.5).astype(int)
            pts_x = np.append(pts_x,coord[1]-2.5).astype(int)
            pts_x = np.append(pts_x,coord[1]+2.5).astype(int)

            val = []
            for x in pts_x:
                for y in pts_y:
                    val.append(np.array((data.misangle)).reshape(imshape)[x,y])
        
        if len(np.unique(val))<3:

            pts_y = np.append(pts_y,coord[0]-3.5).astype(int)
            pts_y = np.append(pts_y,coord[0]+3.5).astype(int)
            pts_x = np.append(pts_x,coord[1]-3.5).astype(int)
            pts_x = np.append(pts_x,coord[1]+3.5).astype(int)

            val = []
            for x in pts_x:
                for y in pts_y:
                    val.append(np.array((data.diff_schmid)).reshape(imshape)[x,y])

        val_sort = sorted(val, key=Counter(val).get, reverse=True)
        good_val= list(dict.fromkeys(val_sort))[0:3]

        misangle_tj[i] = good_val

8.2.2.7. Compute volume ratio anisotropy of each TJ#

volratio_an_tj = np.zeros((ntj))

for i in tqdm(range(ntj)) :
    if i in tj_to_skip :
        volratio_an_tj[i] = np.nan
    else:
        coord = M[i][0:2]
        volratio_an_tj[i] = np.array((data.volratio_an)).reshape(imshape)[(np.array(coord)[1]-0.5).astype(int),(np.array(coord)[0]-0.5).astype(int)]

8.2.2.8. Compute mean of craft variables for each TJ#

work_mean_tj = np.zeros((ntj))
eqStrain_mean_tj = np.zeros((ntj))
eqStress_mean_tj = np.zeros((ntj))
act_py_mean_tj = np.zeros((ntj))
act_pr_mean_tj = np.zeros((ntj))

for i in tqdm(range(ntj)) :
    if i in tj_to_skip :
        work_mean_tj[i] = np.nan
        eqStrain_mean_tj[i] = np.nan
        eqStress_mean_tj[i] = np.nan
        act_py_mean_tj[i] = np.nan
        act_pr_mean_tj[i] = np.nan
    else:
        coord = M[i][0:2]

        pts_y = np.array([coord[0]-1.5,coord[0]-0.5,coord[0]+0.5,coord[0]+1.5]).astype(int)
        pts_x = np.array([coord[1]-1.5,coord[1]-0.5,coord[1]+0.5,coord[1]+1.5]).astype(int)

        val_w = []
        val_strain = []
        val_stress = []
        val_py = []
        val_pr = []
        for x in pts_x:
            for y in pts_y:
                val_w.append(np.array((data.work)).reshape(imshape)[x,y])
                val_strain.append(np.array((data.eqStrain)).reshape(imshape)[x,y])
                val_stress.append(np.array((data.eqStress)).reshape(imshape)[x,y])
                val_py.append(np.array((data.act_py)).reshape(imshape)[x,y])
                val_pr.append(np.array((data.act_pr)).reshape(imshape)[x,y])

        work_mean_tj[i] = np.mean(val_w)
        eqStrain_mean_tj[i] = np.mean(val_strain)
        eqStress_mean_tj[i] = np.mean(val_stress)
        act_py_mean_tj[i] = np.mean(val_py)
        act_pr_mean_tj[i] = np.mean(val_pr)

8.2.2.9. Compute Distance to other TJ#

Ma = ds_data.aita.TJ_map().T[0:2].T
dist = pd.DataFrame(distance_matrix(Ma,Ma))

dist2oTJ = np.zeros((ntj))

for i in tqdm(range(ntj)) :
    if i in tj_to_skip :
        dist2oTJ[i] = np.nan
    else:
        dist2oTJ[i] = np.array(dist[i].sort_values()[1:2])

8.2.2.10. Compute number of pixel of the 3 grains of each TJ#

nb_pix_g = []

for id in tqdm(np.unique(ds_data.grainId)) :
    nb_pix_g.append(int(sum(sum(ds_data.grainId == id))))

nb_pix_g = pd.Series(nb_pix_g,index=np.unique(ds_data.grainId).astype(int))
nb_pix_g_tj = np.zeros((ntj,3))

for i in tqdm(range(ntj)) :
    if i in tj_to_skip :
        nb_pix_g_tj[i] = np.nan
    else:
        idTJ = M[i][2:]
        nb_pix_g_tj[i] = np.array(nb_pix_g[np.array(idTJ)])

8.2.3. Building dataset#

TJ_data = pd.DataFrame(
    (
        RX,
        schmid_tj.T[0],
        schmid_tj.T[1],
        schmid_tj.T[2],
        diff_schmid_tj.T[0],
        diff_schmid_tj.T[1],
        diff_schmid_tj.T[2],
        misangle_tj.T[0],
        misangle_tj.T[1],
        misangle_tj.T[2],
        volratio_an_tj,
        eqStrain_mean_tj,
        eqStress_mean_tj,
        act_pr_mean_tj,
        act_py_mean_tj,
        work_mean_tj,
        dist2oTJ,
        nb_pix_g_tj.T[0],
        nb_pix_g_tj.T[1],
        nb_pix_g_tj.T[2]
    )
).T

TJ_data.columns=["RX","schmid1","schmid2","schmid3","diff_schmid1","diff_schmid2","diff_schmid3","misangle1","misangle2","misangle3",
        "volratio_an","eqStrain","eqStress","act_pr","act_py","work","dist1neigh","nb_pix_g1","nb_pix_g2","nb_pix_g3"]

TJ_data
RX schmid1 schmid2 schmid3 diff_schmid1 diff_schmid2 diff_schmid3 misangle1 misangle2 misangle3 volratio_an eqStrain eqStress act_pr act_py work dist1neigh nb_pix_g1 nb_pix_g2 nb_pix_g3
0 1.0 0.433383 0.314354 0.464099 0.030716 0.119029 0.149745 0.726541 1.124480 0.706312 0.976792 0.018254 1.331305 0.000026 4.466442e-07 0.002944 21.377558 5824.0 3173.0 552.0
1 0.0 0.499819 0.433383 0.464099 0.066436 0.035720 0.030716 1.399579 1.130727 0.340875 0.964957 0.017129 1.155659 0.000018 1.703326e-08 0.001886 21.377558 5931.0 5824.0 552.0
2 1.0 0.056638 0.220494 0.244690 0.024196 0.188052 0.163855 1.397844 1.482392 0.242694 0.950161 0.011322 1.132815 0.000006 4.944418e-08 0.001001 141.014184 36321.0 1386.0 3667.0
3 1.0 0.491447 0.056638 0.314354 0.434809 0.177093 0.257716 0.636235 0.283226 0.357118 0.999528 0.020613 1.025976 0.000004 1.232579e-08 0.001333 41.231056 27290.0 36321.0 3173.0
4 1.0 0.491447 0.056638 0.326571 0.164876 0.434809 0.269933 0.636235 0.983226 0.397189 0.982284 0.004298 1.074021 0.000004 8.676976e-10 0.000441 61.400326 27290.0 36321.0 5481.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
57 0.0 0.066017 0.492388 0.437152 0.055236 0.426370 0.371135 1.131787 0.382646 0.764838 0.992830 0.028784 1.937496 0.000114 6.602019e-06 0.008137 27.202941 3422.0 967.0 3726.0
58 0.0 0.043031 0.195062 0.338734 0.295703 0.143672 0.152031 1.014145 1.250826 0.249974 0.994848 0.043997 2.038062 0.000110 1.538746e-05 0.011282 5.000000 3440.0 1015.0 3007.0
59 1.0 0.195062 0.066017 0.148617 0.046446 0.129045 0.082599 0.359894 0.233268 0.134345 0.999837 0.015061 1.956725 0.000011 7.114007e-06 0.005079 55.659680 1015.0 3422.0 20561.0
60 0.0 0.074013 0.043031 0.059130 0.030982 0.014883 0.016099 0.284868 0.250435 0.050341 0.999907 0.016632 1.084423 0.000032 3.118853e-07 0.002953 72.560320 2194.0 3440.0 16226.0
61 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

62 rows × 20 columns

8.2.3.1. Save New Dataset#

#np.save(file="../../data/TJ/TJ_"+name,arr=TJ_data)