#Copyright Aksyonov D.A
from __future__ import division, unicode_literals, absolute_import
import shelve, sys, datetime, shutil, tempfile, os, json, re, glob
import json
try:
import pandas as pd
except:
print('no pandas')
from siman import header
from siman.header import runBash, printlog
from siman.small_functions import makedir
from siman.set_functions import init_default_sets
from siman.functions import invert
# from siman import classes
"""
Module contains utilities for project database management
and working with external databases
TODO:
1) in write_database() make update of history file more clever
"""
calc_key = 'calc'
conv_key = 'conv'
varset_key = 'varset'
history_key = 'history'
struct_des_key = 'struct_des'
[docs]def read_database(scratch = False, init_sets = 0):
"""
Read database of calculations
INPUT:
scratch - not used
init_sets - required to reinit defaults sets in init_default_sets function
RETURN:
calc - dict, contains all calculations of the project
conv - dict, convergence sequences
varset - dict, parameter sets of the project
size_on_start, int - not used now
"""
# databasefile = 'calc.s' #was used with python2
# databasefile = 'calc.gdbm'
databasefile3 = 'calc.gdbm3'
# if header.RAMDISK:
# databasefile3 = header.RAMDISK+databasefile3
# sys.exit()
# if scratch == True: databasefile = '/scratch/aksenov/calc.s'
printlog("\nLaunch at "+str( datetime.datetime.today() )+'\n')
# mod = __import__("gdbm")
# d = shelve.Shelf(mod.open(databasefile, protocol=1))
with shelve.open(databasefile3, protocol = 3) as d:
try:
header.conv = d[conv_key] #dictionary of convergence lists
except KeyError:
printlog( "There is no dictionary of convergence lists. I create new");
header.conv = {}
try:
header.varset = d[varset_key]
except KeyError:
printlog( "There is no dictionary of inputsets. I create new");
header.varset = {}
try:
header.history = d[history_key]
except KeyError:
header.history = ['Project started on '+ str( datetime.date.today() ) ]
printlog( "There is still no history in database. The list is in header module ");
try:
header.struct_des = d[struct_des_key]
except KeyError:
printlog( "There is no struct_des in database. The dict is in header module ");
#print history
init_default_sets(init_sets)
return header.conv, header.varset, sys.getsizeof(d)
[docs]def write_database(calc = None, conv = None, varset = None, size_on_start = None):
"""
The function writes main dictionaries to database file calc.s
Also creates copy of calc.s
INPUT:
calc - dict, contains all calculations of the project
conv - dict, convergence sequences
varset - dict, parameter sets of the project
size_on_start - not used now
RETURN:
None
"""
#size_on_finish = sys.getsizeof(dbase)
#if size_on_finish != size_on_start:
# runBash("cp calc.s calc_copy.s") #create copy before writing
databasefile3 = 'calc.gdbm3'
# if header.RAMDISK:
# databasefile3 = header.RAMDISK+databasefile3
size = os.path.getsize
if os.path.isfile(databasefile3) and os.path.isfile('calc_copy.gdbm3'):
if size(databasefile3) < size('calc_copy.gdbm3') - 200000:
print(size(databasefile3), size('calc_copy.gdbm3'))
printlog('Error! actual database file is smaller than reserve copy, something is wrong! Check')
else:
''
shutil.copyfile(databasefile3, 'calc_copy.gdbm3')
if 0:
d = shelve.open('calc.s', protocol=1) #Write database of calculations
d[calc_key] = calc
d[conv_key] = conv
d[varset_key] = varset
d[history_key] = header.history
d[struct_des_key] = header.struct_des
d.close()
python2 = False
if python2:
import gdbm# use in python2
d = shelve.Shelf(gdbm.open('calc.gdbm', 'c'), protocol=1) #Write dbm database for python3
d[unicode(calc_key)] = calc
d[unicode(conv_key)] = conv
d[unicode(varset_key)] = varset
d[unicode(history_key)] = header.history
d[unicode(struct_des_key)] = header.struct_des
d.close()
else: #python3
import dbm
d = shelve.Shelf(dbm.open(databasefile3, 'c'), protocol = 3) #Write dbm database for python3 / it was n key, now c, working on new python
# print(header.varset['test'])
d[conv_key] = header.conv
d[varset_key] = header.varset
d[history_key] = header.history
d[struct_des_key] = header.struct_des
# print(d[struct_des_key]['test'])
d.close()
if 0: #please run me from time to time to reduce the size of the database file, calc
with dbm.open(databasefile3, 'w') as d:
d.reorganize()
printlog('Opening ', header.calc_database, 'for writing')
with shelve.Shelf(dbm.open(header.calc_database, 'c'), protocol = 3) as d:
for key in header.db:
printlog('saving key:', key, imp = '')
# print(key)
# print(key, header.calc[key].inh_id)
d[str(key)] = header.db[key]
if header.reorganize: #please run me from time to time to reduce the size of the database file
with dbm.open(header.calc_database, 'w') as d:
d.reorganize()
printlog("\nEnd of work at "+str(datetime.datetime.now())+'\n')
try:
header.log.close()
except:
pass
#Update history file
with open('history','w') as his:
#print history
for i in header.history:
#print i
his.write(i+"\n")
print("\nDatabase has been successfully updated\n")
return
[docs]def get_from_database(x1, x2, mat, inquiry_keys = None, silent = None, ssh_object = None):
"""
inquiry_keys (list) - list of keys that should exist in filenames both for x1 and x2
ssh_object (SSHTools) - ssh object based on paramiko with access details
"""
from siman.classes import CalculationVasp
def check(key, inquiry_keys):
return all([k in key for k in inquiry_keys])
path2database = '/home/Data/CEStorage/'
hash_dict_file = 'hash_dict.json'
cluster_path2hash = os.path.join(path2database, hash_dict_file)
if inquiry_keys is None:
inquiry_keys = []
if ssh_object:
# ssh_object.get()
tempdir = tempfile.gettempdir()
local_path2hash = os.path.join(tempdir, hash_dict_file)
ssh_object.get(cluster_path2hash, local_path2hash )
# sys.exit()
with open(local_path2hash, 'r') as fp:
hash_dict = json.load(fp)
# print(hash_dict)
x1s = []
x2s = []
# print(hash_dict)
for key, val in hash_dict.items():
if check(key, inquiry_keys+[x1, mat]):
x1s.append(key)
if check(key, inquiry_keys+[x2, mat]):
x2s.append(key)
x1s = sorted(x1s, key = lambda el: len(el) )
x2s = sorted(x2s, key = lambda el: len(el) )
for xi, xis in (x1, x1s), (x2, x2s):
if not silent:
print('\nFiles for',xi,':')
for i, f in enumerate(xis):
if not silent:
print(i+1,f)
if len(x1s) == 0 or len(x2s) == 0:
print('No information in database for this inquire:', x1, x2, mat, str(inquiry_keys) )
return None, None
key1 = x1s[0]
key2 = x2s[0]
if not silent:
print('\nI choose first entries for both concentrations:',key1, 'and', key2,'\n')
# print('Use *inquiry_keys* arg to clarify the output results.\n')
#get files
loc1 = os.path.join(tempdir, hash_dict[key1])
loc2 = os.path.join(tempdir, hash_dict[key2])
makedir(loc1)
makedir(loc2)
# print()/
ssh_object.get(os.path.join(path2database, hash_dict[key1]), loc1 )
ssh_object.get(os.path.join(path2database, hash_dict[key2]), loc2 )
cl1 = CalculationVasp().deserialize(loc1)
cl2 = CalculationVasp().deserialize(loc2)
return cl1, cl2
[docs]def read_cvs_database(columns):
"""
Allows to read cvs file with experimental results
"""
dfs = pd.read_csv(r'database/literature.csv')[columns]
dfs.drop(0, inplace=True)
dfs.dropna(inplace=True)
dfs.set_index('is', inplace = True)
dfs = dfs.apply(lambda x: pd.to_numeric(x, errors='ignore'))
# dfs['owner'] = 'world'
# print(dfs.sort_index())
# print(dfs)
# sys.exit()
return dfs
[docs]def add_to_archive_database(cl, subgroup):
"""
cl is Calculation which should be added to database
subgroup (str) - subgroup folder
"""
from pymatgen.core.composition import Composition
from pymatgen.io.cif import CifWriter
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
join = os.path.join
basename = os.path.basename
dirname = os.path.dirname
save_format = 'azh'
dbpath = header.PATH2DATABASE
it = cl.id[0]
# print(cl.path)
sub_folder = cl.path['output'].split('/')[0] # usually basic chemical formula
# sub_folder = header.struct_des[it].sfolder.split('/')[0] # usually basic chemical formula
print( 'Processing ', cl.id)
cl.read_results()
if '4' not in cl.state:
return
st = cl.end
# print(cl.end.typat)
# sys.exit()
if 1:
#determine x
#universal method, supports several alkali elements
#requires cl.base_formula in 'Na2FePO4F' format
#requires pymatgen, would be nice to remove dependency
cmb = Composition(cl.base_formula)
cm = st.get_pm_composition()
rc = cl.end.get_reduced_composition().as_dict() #reduced composition dict
rcb = cmb.reduced_composition.as_dict()
# print(rc, rcb)
alk = list(set(st.get_specific_elements(header.ALKALI_ION_ELEMENTS))) # list of unique alkali elements
tra = list(set(st.get_specific_elements(header.TRANSITION_ELEMENTS)))
# print(alk, tra)
el_for_norm = tra[0] #first element used for normalization
nnb = rcb[el_for_norm] #number of norm elements in base
nn = rc[el_for_norm] #number of norm elements in interesting structure
# print(nb, n)
mul = nn/nnb # multiplier that garanties normalization
# print(rcb)
nab = sum([rcb[invert(z)] for z in header.ALKALI_ION_ELEMENTS if invert(z) in rcb])
na = sum([rc[el] for el in alk])
x = na / mul / nab
# determine formula
# cm = st.get_pm_composition() #get pymatgen composition class
# print( (cm/4).formula)
# print('Material detected:', formula, 'sub_folder:', sub_folder)
#obtain base without alk
formula = (cm.reduced_composition/mul).formula
# formula = formula.replace('1 ', '').replace(' ', '')
# print(formula)
cl.formula = formula
# print(Composition('Na0.75'))
print('Material detected:', formula, 'sub_folder:', sub_folder)
# sys.exit()
if 0:
#Old method, not robust at all!
#determine x for alkali ion from structure name
parsed = re.findall(r'([A-Z][a-z]*)(\d*)', formula)
parsed = [(el, x if x else '1') for (el, x) in parsed ]
print(parsed)
print('detected element is ', parsed[0][0])
if parsed[0][0] in [invert(z) for z in header.ALKALI_ION_ELEMENTS]:
x = parsed[0][0]
if hasattr(cl, 'max_alk_ion_content'):
x = float(x)/cl.max_alk_ion_content
else:
x = '1'
else:
x = '0'
sfolder = os.path.join(dbpath, sub_folder)
name = []
if 'azh' in save_format:
#1. Single point calculation of total energy
# print(sfolder)
makedir( join( sfolder, 'dummy') )
if x < 1:
x = int(round(100*x, 0))
else:
x = int(round(x, 0))
print('Concentration x:', x)
name.append('x'+str(x))
# sys.exit()
# if formula in ['LiCoO2', 'LiTiO2', 'LiFePO4', 'NaFePO4', 'LiMnPO4',
# 'LiNiO2', 'LiTiS2', 'LiMn2O4', 'LiVP2O7', 'LiVPO4F',
# 'NaMnAsO4', 'Na2FePO4F', 'Na2FeVF7', 'KFeSO4F', 'NaLiCoPO4F', 'KVPO4F' ]:
sfolder = join(sfolder, subgroup)
makedir( join(sfolder,'dummy') )
cl.set.update()
# print(cl.potcar_lines)
potcar1_m = cl.potcar_lines[0][0]
if '_' in potcar1_m:
(pot, _) = potcar1_m.split('_')
else:
pot = potcar1_m
xc = cl.xc_inc
if '-' in xc:
xc = cl.xc_pot
if xc == 'PE':
func = 'PBE'
elif xc == 'CA':
func = 'LDA'
elif xc == 'PS':
func = 'PBEsol'
else:
print('uknown xc type:', xc)
sys.exit()
if cl.set.spin_polarized:
func = 'U'+func #unrestricted
u_ramping_flag = False
if hasattr(cl.set, 'u_ramping_nstep') and cl.set.u_ramping_nstep:
func += '-UR'
u_ramping_flag = True
elif cl.set.dftu:
func += '-U'
else:
func += '-'
func+=pot.lower()
ecut = str(round(cl.set.ecut ))
func+=ecut
# print(func)
name.append(func)
name.extend([it.replace('.', '_')]+[cl.id[1]]+[str(cl.id[2])])
name_str = '_'.join(name)
# print('_'.join(name) )
# sys.exit()
outcar_name = name_str+'.out'
shutil.copyfile(cl.path["output"], join(sfolder, outcar_name) )
if u_ramping_flag:
print(cl.associated_outcars)
for i, u_outcar in enumerate(cl.associated_outcars[:-1]): # except the last one, which was copied above
u = u_outcar.split('.')[1]
# print(u)
path_to_outcar = join( dirname(cl.path["output"]), u_outcar )
cl.read_results(load = 'o', choose_outcar = i+1, only_load = 1)
shutil.copyfile(path_to_outcar, join(sfolder, name_str+'_'+u+'.out') )
# sys.exit()
cl.end.write_xyz(path = sfolder, filename = name_str)
pickle_file = cl.serialize(os.path.join(sfolder, 'bin', name_str) )
# cl
#write input, problem with fitted version 100, which does not have input geometry, since they are created on cluster
# makedir(sfolder+'input/dummy')
# shutil.copyfile(cl.path["input_geo"], sfolder+'input/'+name_str+'.geo')
st_mp = cl.end.convert2pymatgen()
sg_before = st_mp.get_space_group_info()
# from pymatgen.symmetry.finder import SymmetryFinder
# sf = SymmetryFinder(st_mp_prim)
symprec = 0.1
sf = SpacegroupAnalyzer(st_mp, symprec = symprec)
st_mp_prim = sf.find_primitive()
# st_mp_prim = sf.get_primitive_standard_structure()
# st_mp_prim = sf.get_conventional_standard_structure()
# st_mp_conv = sf.get_conventional_standard_structure()
# print(st_mp_conv)
# print(st_mp_conv.lattice.matrix)
# print(st_mp_prim)
# print(st_mp_prim.lattice)
sg_after = st_mp_prim.get_space_group_info()
if sg_before[0] != sg_after[0]:
printlog('Attention! the space group was changed after primitive cell searching', sg_before, sg_after)
printlog('I will save supercell in cif and reduce symprec to 0.01')
st_mp_prim = st_mp
symprec = 0.01
if st_mp_prim:
cif = CifWriter(st_mp_prim, symprec = symprec)
cif_name = name_str+'.cif'
cif.write_file( join(sfolder, cif_name) )
printlog('Writing cif', cif_name)
if 0:
#get multiplication matrix which allows to obtain the supercell from primitive cell.
#however this matrix is not integer which is not convinient.
print(st_mp.lattice.matrix.round(2))
print(st_mp_prim.lattice.matrix.round(2))
mul_matrix = np.dot(st_mp.lattice.matrix, np.linalg.inv(st_mp_prim.lattice.matrix) )
print(mul_matrix.round(1))
rprimd = np.dot(mul_matrix, st_mp_prim.lattice.matrix )
print(rprimd.round(2))
#write chg
if 1:
path_to_chg = cl.get_chg_file('CHGCAR')
if path_to_chg:
makedir( join(sfolder,'bin','dummy') )
printlog('path to chgcar',path_to_chg)
gz = '.gz'
if gz not in path_to_chg:
gz = ''
shutil.copyfile(path_to_chg, join( sfolder, 'bin', name_str+'.chg'+gz) )
#write dos
if subgroup in ['dos', 'DOS']:
DOSCAR = cl.get_file('DOSCAR', nametype = 'asoutcar');
if DOSCAR:
printlog('path to DOSCAR', DOSCAR)
gz = '.gz'
if gz not in path_to_chg:
gz = ''
shutil.copyfile(DOSCAR, join( sfolder, 'bin', name_str+'.dos'+gz) )
if subgroup in ['BAD']: #bader
cl.get_bader_ACF()
acf = cl.get_file(basename(cl.path['acf']))
# print(acf)
# sys.exit()
if acf:
shutil.copyfile(acf, join( sfolder, 'bin', name_str+'.acf') )
if subgroup in ['ph', 'PH']: #bader
# cl.get_bader_ACF()
xml = cl.get_file('vasprun.xml', nametype = 'asoutcar')
# print(acf)
# sys.exit()
if xml:
shutil.copyfile(xml, join( sfolder, 'bin', name_str+'.xml') )
#make dat
#incars
makedir( join(sfolder, 'dat','dummy') )
incars = glob.glob( join(cl.dir, '*INCAR*') )
# print(incars)
for inc in incars:
dest = join(sfolder, 'dat')
# inc_name =
if not os.path.exists(join(dest, basename(inc) )):
shutil.copy( inc, dest )
#kpoints
if it in header.struct_des:
with open( join(sfolder, 'dat', 'kpoints_for_kspacings.json'), 'w', newline = '') as fp:
json.dump(header.struct_des[it].ngkpt_dict_for_kspacings, fp,)
else:
printlog('Warning!, it not in struct_des:',it )
# print(cl.set.toJSON())
#prepare for neb
# makedir(sfolder+'neb_'+name_str+'/dummy')
return