Source code for aiida_gulp.potentials.base

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2019 Chris Sewell
#
# This file is part of aiida-gulp.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms and conditions
# of version 3 of the GNU Lesser General Public License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
from collections import namedtuple
import copy
import re

from aiida_gulp.validation import validate_against_schema
from aiida_gulp.potentials.common import filter_by_species

PotentialContent = namedtuple(
    "PotentialContent", ["content", "number_of_flags", "number_flagged"]
)
"""used for returning the content creation for a potential

Parameters
----------
content: str
    the potential file content
number_of_flags: int
    number of potential flags for fitting
number_flagged: int
    number of variables flagged to fit

"""

RE_SYMBOL = "([A-Z][a-z]?)"
RE_SYMBOL_TYPE = "([A-Z][a-z]?)\\s+(\\bc\\b|\\bcore\\b|\\bs\\b|\\bshell\\b)"
# take from version 4.5.3
OPTION_TERMS = (
    "3coulomb",
    "absdipolemoment",
    "absolute_coordinates",
    "accelerations",
    "accuracy",
    "ala_cutoff",
    "ala_disp",
    "ala_processors",
    "ala_shrink",
    "and",
    "anisotropic_pressure",
    "ashift",
    "atomab",
    "aver",
    "axilrod-teller",
    "bacoscross",
    "bacross",
    "bagcross",
    "balcross",
    "baskes",
    "bbar",
    "bcoscross",
    "bcross",
    "becke_johnson_c6",
    "best",
    "blocksize",
    "boattractive",
    "bocharge",
    "bocnswitch",
    "bocntolerance",
    "bocoordination",
    "bondtype",
    "borepulsive",
    "bornq",
    "boselfenergy",
    "both",
    "botwobody",
    "box",
    "brenner",
    "bsm",
    "bspline",
    "buck4",
    "buckingham",
    "buffered_lj",
    "bulk_modulus",
    "cartesian",
    "catomic_stress",
    "caver",
    "cell",
    "cellstrain",
    "centre",
    "cfaver",
    "cfm_fermi",
    "cfm_gaussian",
    "cfm_harmonic",
    "cfm_power",
    "charge",
    "chemshell_mode",
    "cmm",
    "configurations",
    "connect",
    "constrain",
    "contents",
    "coordno",
    "cosh-spring",
    "cosmoframe",
    "cosmoshape",
    "coulomb_subtract",
    "covalent",
    "covexp",
    "crossover",
    "current_time",
    "cutd",
    "cutmany",
    "cutp",
    "cuts",
    "cv",
    "cvec",
    "cwolf",
    "damped_dispersion",
    "default_weight",
    "deflist",
    "delay_field",
    "delay_force",
    "delf",
    "delta",
    "dhkl",
    "discrete",
    "dispersion",
    "ditto",
    "dmaximum",
    "dminimum",
    "dump",
    "eam_alloy",
    "eam_density",
    "eam_functional",
    "eam_potential_shift",
    "edip_accuracy",
    "edip_coordination",
    "edip_threebody",
    "edip_twobody",
    "edip_zmax",
    "einstein",
    "elastic",
    "electronegativity",
    "element",
    "end_field",
    "end_force",
    "energy",
    "ensemble",
    "entropy",
    "epsilon/sigma",
    "equatorial",
    "equilibration",
    "erferfc",
    "erfpot",
    "erongi",
    "ewaldrealradius",
    "exp2",
    "exponential_three_body",
    "exppowers",
    "external_force",
    "external_potential",
    "extracutoff",
    "factor",
    "fangle",
    "fbond",
    "fc_supercell",
    "fcartesian",
    "fcell",
    "fenergy",
    "fermi-dirac",
    "ffractional",
    "field",
    "finite",
    "fix_atom",
    "forceconstant",
    "fractional",
    "frequency",
    "frqtol",
    "ftol",
    "fvectors",
    "g3coulomb",
    "gamma_angular_steps",
    "gamma_direction_of_approach",
    "gastdamping",
    "gastiter",
    "gastparam",
    "gasttol",
    "gcmcexistingmolecules",
    "gcmcmolecule",
    "gcmcspecies",
    "gcoulomb",
    "gdcrit",
    "general",
    "genetic",
    "gexp",
    "ghost_supercell",
    "gmax",
    "gradients",
    "grid",
    "grimme_c6",
    "gtol",
    "harmonic",
    "hfdlc",
    "hfrefractive_index",
    "high-fq",
    "hydrogen-bond",
    "igauss",
    "ignore",
    "impurity",
    "include",
    "index_k",
    "initial_coordinates",
    "intconserved",
    "integrator",
    "inter",
    "interstitial",
    "intra",
    "inversion",
    "ionic",
    "iterations",
    "keyword",
    "kim_model",
    "kpoints",
    "lbfgs_order",
    "lennard",
    "library",
    "lin3",
    "line",
    "ljbuffered",
    "lorentzian_tolerance",
    "lowest_mode",
    "manybody",
    "marvin",
    "mass",
    "maths",
    "matrix_format",
    "maxcyc",
    "maximise",
    "maximum",
    "mcchemicalpotential",
    "mccreate",
    "mcdestroy",
    "mclowest",
    "mcmaxdisplacement",
    "mcmaxrotation",
    "mcmaxstrain",
    "mcmeans",
    "mcmove",
    "mcoutfreq",
    "mcrotate",
    "mcsample",
    "mcstep",
    "mcstrain",
    "mcswap",
    "mctrial",
    "mcvolume",
    "mdarchive",
    "mdmaxtemp",
    "mdmaxvolume",
    "meam_density",
    "meam_functional",
    "meam_rhotype",
    "meam_screening",
    "mei-davenport",
    "mincell",
    "minimum",
    "mm3angle",
    "mm3buck",
    "mm3stretch",
    "mode",
    "mode2a",
    "momentum_correct",
    "monopoleq",
    "morse",
    "move_2a_to_1",
    "murrell-mottram",
    "mutation",
    "name",
    "nebiterations",
    "nebrandom",
    "nebreplica",
    "nebspring",
    "nebtangent",
    "nebtolerance",
    "nmr",
    "nobond",
    "observables",
    "odirection",
    "omega",
    "omega_af",
    "omega_damping",
    "origin",
    "outofplane",
    "output",
    "p_flexible",
    "p_isotropic",
    "parallel",
    "pcell",
    "pdf",
    "pfinite",
    "pfractional",
    "piezoelectric",
    "plane_lj",
    "plumed_input",
    "plumed_log",
    "pointsperatom",
    "poisson_ratio",
    "polarisability",
    "polynomial",
    "potential",
    "potential_interpolation",
    "potgrid",
    "potsites",
    "pressure",
    "print",
    "production",
    "project_dos",
    "pvector",
    "qelectronegativity",
    "qeqiter",
    "qeqradius",
    "qeqtol",
    "qerfc",
    "qgrid",
    "qincrement",
    "qiterations",
    "qmmm",
    "qonsas",
    "qoverr2",
    "qreaxff",
    "qsolver",
    "qtaper",
    "qwolf",
    "radial_force",
    "random",
    "rangeforsmooth",
    "rbins",
    "rcartesian",
    "rcell",
    "rcspatial",
    "rdirection",
    "reaction",
    "reaxff0_bond",
    "reaxff0_lonepair",
    "reaxff0_over",
    "reaxff0_penalty",
    "reaxff0_torsion",
    "reaxff0_valence",
    "reaxff0_vdw",
    "reaxff1_angle",
    "reaxff1_include_under",
    "reaxff1_lonepair",
    "reaxff1_morse",
    "reaxff1_over",
    "reaxff1_radii",
    "reaxff1_under",
    "reaxff1_valence",
    "reaxff2_bo",
    "reaxff2_bond",
    "reaxff2_morse",
    "reaxff2_over",
    "reaxff2_pen",
    "reaxff3_angle",
    "reaxff3_conjugation",
    "reaxff3_hbond",
    "reaxff3_pen",
    "reaxff4_torsion",
    "reaxff_chi",
    "reaxff_gamma",
    "reaxff_mu",
    "reaxff_q0",
    "reaxff_qshell",
    "reaxff_r12",
    "reaxfftol",
    "region_1",
    "reldef",
    "reperfc",
    "resetvectors",
    "rfractional",
    "rmax",
    "rspeed",
    "rtol",
    "ryckaert",
    "rydberg",
    "sample",
    "sasexclude",
    "sasparticles",
    "sbulkenergy",
    "scale",
    "scan_cell",
    "scell",
    "scmaxsearch",
    "sdlc",
    "seed",
    "segmentsperatom",
    "sfinite",
    "sfractional",
    "shear_modulus",
    "shellmass",
    "shift",
    "shrink",
    "siginc",
    "size",
    "slater",
    "slower",
    "smelectronegativity",
    "solventepsilon",
    "solventradius",
    "solventrmax",
    "spacegroup",
    "species",
    "spline",
    "split",
    "spring",
    "sqomega",
    "squaredharmonic",
    "srefractive_index",
    "sregion2",
    "srglue",
    "sshift",
    "start",
    "static",
    "stepmx",
    "stop",
    "strain_derivative",
    "stress",
    "supercell",
    "svectors",
    "sw2",
    "sw2jb",
    "sw3",
    "sw3jb",
    "switch_minimiser",
    "switch_stepmx",
    "symbol",
    "symmetry_cell",
    "symmetry_number",
    "symmetry_operator",
    "synciterations",
    "syncsteps",
    "synctolerance",
    "tau_barostat",
    "tau_thermostat",
    "td_external_force",
    "td_field",
    "temperature",
    "terse",
    "tether",
    "three-body",
    "threshold",
    "time",
    "timestep",
    "title",
    "torangle",
    "torcosangle",
    "torexp",
    "torharm",
    "torsion",
    "tortaper",
    "totalenergy",
    "tournament",
    "tpxo",
    "translate",
    "tscale",
    "tsuneyuki",
    "ttol",
    "twist",
    "uff1",
    "uff3",
    "uff4",
    "uff_bondorder",
    "uffoop",
    "unfreeze",
    "unique",
    "units",
    "update",
    "urey-bradley",
    "vacancy",
    "variables",
    "vbo_twobody",
    "vdw",
    "vectors",
    "velocities",
    "volume",
    "weight",
    "wmax",
    "wmin",
    "write",
    "xangleangle",
    "xcosangleangle",
    "xoutofplane",
    "xtol",
    "youngs_modulus",
    "zbl",
)

# Note: 'static' should actually be 'static dielectric', and 'high-fq' 'high-fq dielectric'


[docs]class PotentialWriterAbstract(object): """abstract class for creating gulp inter-atomic potential inputs, from a data dictionary. sub-classes should override the ``get_description``, ``get_schema``, ``_make_string`` and ``read_exising`` methods """ _schema = None _fitting_schema = None
[docs] @classmethod def get_description(cls): """return description of the potential type""" return ""
[docs] @classmethod def get_schema(cls): """return the schema to validate input data Returns ------- dict """ # only load it once if cls._schema is None: cls._schema = cls._get_schema() return copy.deepcopy(cls._schema)
@classmethod def _get_schema(cls): """return the schema to validate input data should be overridden by subclass Returns ------- dict """ raise NotImplementedError
[docs] @classmethod def get_fitting_schema(cls): """return the schema to validate input data Returns ------- dict """ # only load it once if cls._fitting_schema is None: cls._fitting_schema = cls._get_fitting_schema() return copy.deepcopy(cls._fitting_schema)
@classmethod def _get_fitting_schema(cls): """return the schema to validate input data should be overridden by subclass Returns ------- dict """ raise NotImplementedError def _make_string(self, data, fitting_data=None): """create string for inter-atomic potential section for main.gin file Parameters ---------- data : dict dictionary of data species_filter : list[str] or None list of atomic symbols to filter by Returns ------- PotentialContent """ raise NotImplementedError
[docs] def create_content(self, data, species_filter=None, fitting_data=None): """create string for inter-atomic potential section for main.gin file Parameters ---------- data : dict dictionary of data required to create potential species_filter : list[str] or None list of atomic symbols to filter by fitting_data: dict or None a dictionary specifying which variables to flag for optimisation, of the form; {<type>: {<index>: [variable1, ...]}} if None, no flags will be added Returns ------- PotentialContent """ # validate data schema = self.get_schema() validate_against_schema(data, schema) # test that e.g. '1-2' and '2-1' aren't present if "2body" in data: bonds = [] for indices in data["2body"]: index_set = set(indices.split("-")) if index_set in bonds: raise AssertionError( "both {0}-{1} and {1}-{0} 2body keys exist in the data".format( *index_set ) ) bonds.append(index_set) # test that e.g. '1-2-3' and '3-2-1' aren't present (2 is the pivot atom) if "3body" in data: angles = [] for indices in data["3body"]: i1, i2, i3 = indices.split("-") if (i1, i2, i3) in angles: raise AssertionError( "both {0}-{1}-{2} and {2}-{1}-{0} 3body keys exist in the data".format( i1, i2, i3 ) ) angles.append((i1, i2, i3)) angles.append((i3, i2, i1)) if species_filter is not None: data = filter_by_species(data, species_filter) # validate fitting data if fitting_data is not None: fit_schema = self.get_fitting_schema() validate_against_schema(fitting_data, fit_schema) if species_filter is not None: fitting_data = filter_by_species(fitting_data, species_filter) if fitting_data["species"] != data["species"]: raise AssertionError( "the fitting data species ({}) must be equal to the data species ({})".format( fitting_data["species"], data["species"] ) ) # TODO same checks as main data and possibly switch 2body/3body indices to line up with those for main data return self._make_string(data, fitting_data=fitting_data)
[docs] def read_exising(self, lines): """read an existing potential file NOTE: this should be overriden by the subclass Parameters ---------- lines : list[str] Returns ------- dict the potential data Raises ------ IOError on parsing failure """ raise NotImplementedError
[docs] @staticmethod def read_atom_section(lines, lineno, number_atoms, global_args=None): """read a section of a potential file, e.g. :: H core He shell 1.00000000E+00 2.00000000E+00 12.00000 0 1 H B 3.00000000E+00 4.00000000E+00 0.00 12.00000 1 0 Parameters ---------- lines : list[str] the lines in the file lineno : int the current line number, should be the line below the option line number_atoms : int the number of interacting atoms expected global_args : dict additional arguments to add to the result of each line Returns ------- int: lineno the final line of the section set: species_set a set of species identified in the section dict: results {tuple[species]: {"values": str, "global": global_args}} Raises ------ IOError If a parsing error occurs """ results = {} symbol_set = set() while lineno < len(lines): line = lines[lineno] first_term = line.strip().split()[0] # ignore comment lines if first_term == "#": lineno += 1 continue # break if we find the next section if first_term in OPTION_TERMS: break # TODO ignore comments at end of line # check for breaking lines if line.strip().endswith(" &"): lineno += 1 line = line.strip()[:-2] + " " + lines[lineno].strip() # check for lines containing both atom symbols and types (core/shell) match_sym_type = re.findall( "^{}\\s+(.+)\\s*$".format( "\\s+".join([RE_SYMBOL_TYPE for _ in range(number_atoms)]) ), line.strip(), ) # check for lines containing only atom symbols (assume types to be core) match_sym = re.findall( "^{}\\s+(.+)\\s*$".format( "\\s+".join([RE_SYMBOL for _ in range(number_atoms)]) ), line.strip(), ) # TODO also match atomic numbers (and mixed type / no type) if match_sym_type: result = list(match_sym_type[0]) index = [] for _ in range(number_atoms): symbol = result[0] stype = {"c": "core", "s": "shell"}[result[1][0]] index.append("{} {}".format(symbol, stype)) result = result[2:] results[tuple(index)] = {"values": result[0], "global": global_args} symbol_set.update(index) elif match_sym: result = list(match_sym[0]) index = [] for _ in range(number_atoms): symbol = result[0] index.append("{} {}".format(symbol, "core")) result = result[1:] results[tuple(index)] = {"values": result[0], "global": global_args} symbol_set.update(index) else: raise IOError( "expected line to be of form " "'symbol1 <type> symbol2 <type> ... variables': {}".format(line) ) lineno += 1 return lineno - 1, symbol_set, results