Source code for pyiron_atomistics.atomistics.structure.structurestorage

# coding: utf-8
# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.

"""
Alternative structure container that stores them in flattened arrays.
"""

from typing import List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pyiron_base import FlattenedStorage
from pyiron_snippets.import_alarm import ImportAlarm
from structuretoolkit.common.error import SymmetryError

import pyiron_atomistics.atomistics.structure.has_structure as pa_has_structure
from pyiron_atomistics.atomistics.structure.atom import Atom
from pyiron_atomistics.atomistics.structure.atoms import Atoms
from pyiron_atomistics.atomistics.structure.neighbors import NeighborsTrajectory

with ImportAlarm(
    "Some plotting functionality requires the seaborn library."
) as seaborn_alarm:
    import seaborn as sns


[docs] class StructureStorage(FlattenedStorage, pa_has_structure.HasStructure): """ Class that can write and read lots of structures from and to hdf quickly. This is done by storing positions, cells, etc. into large arrays instead of writing every structure into a new group. Structures are stored together with an identifier that should be unique. The class can be initialized with the number of structures and the total number of atoms in all structures, but re-allocates memory as necessary when more (or larger) structures are added than initially anticipated. You can add structures and a human-readable name with :meth:`.add_structure()`. >>> container = StructureStorage() >>> container.add_structure(Atoms(...), "fcc") >>> container.add_structure(Atoms(...), "hcp") >>> container.add_structure(Atoms(...), "bcc") Accessing stored structures works with :meth:`.get_strucure()`. You can either pass the identifier you passed when adding the structure or the numeric index >>> container.get_structure(frame=0) == container.get_structure(frame="fcc") True Custom arrays may also be defined on the container >>> container.add_array("energy", shape=(), dtype=np.float64, fill=-1, per="chunk") (chunk means structure in this case, see below and :class:`.FlattenedStorage`) You can then pass arrays of the corresponding shape to :meth:`add_structure()` >>> container.add_structure(Atoms(...), "grain_boundary", energy=3.14) Saved arrays are accessed with :meth:`.get_array()` >>> container.get_array("energy", 3) 3.14 >>> container.get_array("energy", 0) -1 It is also possible to use the same names in :meth:`.get_array()` as in :meth:`.get_structure()`. >>> container.get_array("energy", 0) == container.get_array("energy", "fcc") True The length of the container is the number of structures inside it. >>> len(container) 4 Each structure corresponds to a chunk in :class:`.FlattenedStorage` and each atom to an element. By default the following arrays are defined for each structure: - identifier shape=(), dtype=str, per chunk; human readable name of the structure - cell shape=(3,3), dtype=np.float64, per chunk; cell shape - pbc shape=(3,), dtype=bool per chunk; periodic boundary conditions - symbols: shape=(), dtype=str, per element; chemical symbol - positions: shape=(3,), dtype=np.float64, per element: atomic positions If a structure has spins/magnetic moments defined on its atoms these will be saved in a per atom array as well. In that case, however all structures in the container must either have all collinear spins or all non-collinear spins. """
[docs] def __init__(self, num_atoms=1, num_structures=1): """ Create new structure container. Args: num_atoms (int): total number of atoms across all structures to pre-allocate num_structures (int): number of structures to pre-allocate """ super().__init__(num_elements=num_atoms, num_chunks=num_structures) self._element_cache = None self._plots = None
def _init_arrays(self): super()._init_arrays() # 2 character unicode array for chemical symbols self._per_element_arrays["symbols"] = np.full( self._num_elements_alloc, "XX", dtype=np.dtype("U2") ) self._per_element_arrays["positions"] = np.empty((self._num_elements_alloc, 3)) self._per_chunk_arrays["cell"] = np.empty((self._num_chunks_alloc, 3, 3)) self._per_chunk_arrays["pbc"] = np.empty( (self._num_elements_alloc, 3), dtype=bool ) @property def symbols(self): """:meta private:""" return self._per_element_arrays["symbols"] @property def positions(self): """:meta private:""" return self._per_element_arrays["positions"] @property def start_index(self): """:meta private:""" return self._per_chunk_arrays["start_index"] @property def length(self): """:meta private:""" return self._per_chunk_arrays["length"] @property def identifier(self): """:meta private:""" return self._per_chunk_arrays["identifier"] @property def cell(self): """:meta private:""" return self._per_chunk_arrays["cell"] @property def pbc(self): """:meta private:""" return self._per_chunk_arrays["pbc"]
[docs] def set_array(self, name, frame, value): """ Add array for given structure. Works for per chunk and per element arrays. Args: name (str): name of array to set frame (int, str): selects structure to set, as in :meth:`.get_strucure()` value: value (for per chunk) or array of values (for per element); type and shape as per :meth:`.hasarray()`. Raises: `KeyError`: if array with name does not exists """ super().set_array(name, frame, value) # invalidate cache when writing to symbols; could be smarter by checking if value is not in in cache but w/e if name == "symbols": self._element_cache = None
[docs] def get_elements(self) -> List[str]: """ Return a list of chemical elements present in the storage. Returns: :class:`list`: list of unique elements as strings of chemical symbols """ if self._element_cache is None: self._element_cache = list(np.unique(self._per_element_arrays["symbols"])) return self._element_cache
[docs] def add_structure(self, structure, identifier=None, **arrays): """ Add a new structure to the container. Additional keyword arguments given specify additional arrays to store for the structure. If an array with the given keyword name does not exist yet, it will be added to the container. >>> container = StructureStorage() >>> container.add_structure(Atoms(...), identifier="A", energy=3.14) >>> container.get_array("energy", 0) 3.14 If the first axis of the extra array matches the length of the given structure, it will be added as an per atom array, otherwise as an per structure array. >>> structure = Atoms(...) >>> container.add_structure(structure, identifier="B", forces=len(structure) * [[0,0,0]]) >>> len(container.get_array("forces", 1)) == len(structure) True Reshaping the array to have the first axis be length 1 forces the array to be set as per structure array. That axis will then be stripped. >>> container.add_structure(Atoms(...), identifier="C", pressure=np.eye(3)[np.newaxis, :, :]) >>> container.get_array("pressure", 2).shape (3, 3) Args: structure (:class:`.Atoms`): structure to add identifier (str, optional): human-readable name for the structure, if None use current structre index as string **kwargs: additional arrays to store for structure """ if structure.has("initial_magmoms"): arrays["spins"] = structure.spins if "selective_dynamics" in structure.get_tags(): arrays["selective_dynamics"] = structure.selective_dynamics self.add_chunk( len(structure), identifier=identifier, symbols=np.array(structure.symbols), positions=structure.positions, cell=[structure.cell.array], pbc=[structure.pbc], **arrays, )
def _translate_frame(self, frame): try: return self.find_chunk(frame) except KeyError: raise KeyError(f"No structure named {frame}.") from None def _get_structure(self, frame=-1, wrap_atoms=True): symbols = self.get_array("symbols", frame) elements = [e for e in self.get_elements() if e in symbols] index_map = {e: i for i, e in enumerate(elements)} try: magmoms = self.get_array("spins", frame) except KeyError: # not all structures have spins saved on them magmoms = None structure = Atoms( species=[Atom(e).element for e in elements], indices=[index_map[e] for e in symbols], positions=self.get_array("positions", frame), cell=self.get_array("cell", frame), pbc=self.get_array("pbc", frame), magmoms=magmoms, ) if self.has_array("selective_dynamics"): structure.add_tag(selective_dynamics=[True, True, True]) selective_dynamics = self.get_array("selective_dynamics", frame) for i, d in enumerate(selective_dynamics): structure.selective_dynamics[i] = d.tolist() return structure def _number_of_structures(self): return len(self) def _get_hdf_group_name(self): return "structures" @property def plot(self): """ Accessor for :class:`.StructurePlots` instance using these structures. """ if self._plots is None: self._plots = StructurePlots(self) return self._plots
[docs] class StructurePlots: """ Simple interface to plot various properties of structures. """
[docs] @seaborn_alarm def __init__(self, store: StructureStorage): self._store = store self._neigh = None
[docs] def atoms(self): """ Plot a histogram of the number of atoms in each structure. """ length = self._store["length"] lo = length.min() hi = length.max() # make the bins fall in between whole numbers and include hi plt.hist(length, bins=np.arange(lo, hi + 2) - 0.5) plt.xlabel("#Atoms") plt.ylabel("Count")
[docs] def cell(self, angle_in_degrees=True): """ Plot histograms of cell parameters. Plotted are atomic volume, density, cell vector lengths and cell vector angles in separate subplots all on a log-scale. Args: angle_in_degrees (bool): whether unit for angles is degree or radians Returns: `DataFrame`: contains the plotted information in the columns: - a: length of first vector - b: length of second vector - c: length of third vector - alpha: angle between first and second vector - beta: angle between second and third vector - gamma: angle between third and first vector - V: volume of the cell - N: number of atoms in the cell """ N = self._store.get_array("length") C = self._store.get_array("cell") def get_angle(cell, idx=0): return np.arccos( np.dot(cell[idx], cell[(idx + 1) % 3]) / np.linalg.norm(cell[idx]) / np.linalg.norm(cell[(idx + 1) % 3]) ) def extract(n, c): return { "a": np.linalg.norm(c[0]), "b": np.linalg.norm(c[1]), "c": np.linalg.norm(c[2]), "alpha": get_angle(c, 0), "beta": get_angle(c, 1), "gamma": get_angle(c, 2), } df = pd.DataFrame([extract(n, c) for n, c in zip(N, C)]) df["V"] = np.linalg.det(C) df["N"] = N if angle_in_degrees: df["alpha"] = np.rad2deg(df["alpha"]) df["beta"] = np.rad2deg(df["beta"]) df["gamma"] = np.rad2deg(df["gamma"]) plt.subplot(1, 4, 1) plt.title("Atomic Volume") plt.hist(df.V / df.N, bins=20, log=True) plt.xlabel(r"$V$ [$\AA^3$]") plt.subplot(1, 4, 2) plt.title("Density") plt.hist(df.N / df.V, bins=20, log=True) plt.xlabel(r"$\rho$ [$\AA^{-3}$]") plt.subplot(1, 4, 3) plt.title("Lattice Vector Lengths") plt.hist([df.a, df.b, df.c], log=True) plt.xlabel(r"$a,b,c$ [$\AA$]") plt.subplot(1, 4, 4) plt.title("Lattice Vector Angles") plt.hist([df.alpha, df.beta, df.gamma], log=True) if angle_in_degrees: label = r"$\alpha,\beta,\gamma$ [°]" else: label = r"$\alpha,\beta,\gamma$ [rad]" plt.xlabel(label) return df
def _calc_spacegroups(self, symprec=1e-3): """ Calculate space groups of all structures. Args: symprec (float): symmetry precision given to spglib Returns: DataFrame: contains columns 'crystal_system' (str) and 'space_group' (int) for each structure """ def get_crystal_system(num): if num in range(1, 3): return "triclinic" elif num in range(3, 16): return "monoclinic" elif num in range(16, 75): return "orthorhombic" elif num in range(75, 143): return "tetragonal" elif num in range(143, 168): return "trigonal" elif num in range(168, 195): return "hexagonal" elif num in range(195, 231): return "cubic" def extract(s): try: spg = s.get_symmetry(symprec=symprec).spacegroup["Number"] except SymmetryError: spg = 1 return {"space_group": spg, "crystal_system": get_crystal_system(spg)} return pd.DataFrame(map(extract, self._store.iter_structures()))
[docs] def spacegroups(self, symprec=1e-3): """ Plot histograms of space groups and crystal systems. Spacegroups and crystal systems are plotted in separate subplots. Args: symprec (float): precision of the symmetry search (passed to spglib) Returns: DataFrame: contains two columns "space_group", "crystal_system" for each structure """ df = self._calc_spacegroups(symprec=symprec) plt.subplot(1, 2, 1) plt.hist(df.space_group, bins=230) plt.xlabel("Space Group") plt.subplot(1, 2, 2) l, h = np.unique(df.crystal_system, return_counts=True) sort_key = { "triclinic": 1, "monoclinic": 3, "orthorhombic": 16, "tetragonal": 75, "trigonal": 143, "hexagonal": 168, "cubic": 195, } I = np.argsort([sort_key[ll] for ll in l]) plt.bar(l[I], h[I]) plt.xlabel("Crystal System") plt.xticks(rotation=35) return df
def _calc_neighbors(self, num_neighbors): """ Calculate the neighbor information with additional caching. If 'distances' and 'shells' are provided in the underlying store, they are returned directly without checking `num_neighbors`. If they are not provided there, they are calculated here and cached. Recalculation happens when a different `num_neighbors` is provided than in a previous call or the underlying store changes. If `num_neighbors` is `None` on the first call, the default is 36. Returns: dict: with keys 'distances' and 'shells' containing the respective flattened arrays from :meth:`.Atoms.get_neighbors`. """ if self._store.has_array("distances") and self._store.has_array("shells"): return { "distances": self._store["distances"], "shells": self._store["shells"], } # check that _store and _neigh are still consistent if ( self._neigh is None or len(self._store) != len(self._neigh) or ( num_neighbors is None or self._neigh.has_array("distances")["shape"][0] != num_neighbors ) ): if num_neighbors is None: num_neighbors = 36 self._neigh = FlattenedStorage() neigh_traj = NeighborsTrajectory( has_structure=self._store, num_neighbors=num_neighbors, store=self._neigh, ) return { "distances": self._neigh["distances"], "shells": self._neigh["shells"], }
[docs] def coordination(self, num_shells=4, log=True, num_neighbors=None): """ Plot histogram of coordination in neighbor shells. Computes one histogram of the number of neighbors in each neighbor shell up to `num_shells` and then plots them together. If the underlying :class:`.StructureStorage` has a 'shells' array defined it is used, if not it is calculated on the fly. Args: num_shells (int): maximum shell to plot num_neighbors (int): maximum number of neighbors to calculate, when 'shells' is not defined in storage, default is the value from the previous call or 36 log (float): plot histogram values on a log scale """ neigh = self._calc_neighbors(num_neighbors=num_neighbors) shells = neigh["shells"] shell_index = ( shells[np.newaxis, :, :] == np.arange(1, num_shells + 1)[:, np.newaxis, np.newaxis] ) neigh_count = shell_index.sum(axis=-1) ticks = np.arange(neigh_count.min(), neigh_count.max() + 1) plt.hist( neigh_count.T, bins=ticks - 0.5, log=True, label=[f"{i}." for i in range(1, num_shells + 1)], ) plt.xticks(ticks) plt.xlabel("Number of Neighbors") plt.legend(title="Shell") plt.title("Neighbor Coordination in Shells")
[docs] def distances( self, bins: int = 50, num_neighbors: int = None, normalize: bool = False, ): """ Plot a histogram of the neighbor distances. Setting `normalize` plots the radial distribution function. Args: bins (int): number of bins num_neighbors (int): maximum number of neighbors to calculate, when 'shells' or 'distances' are not defined in storage default is the value from the previous call or 36 normalize (bool): normalize the distribution by the surface area of the radial bin, 4pi r^2 """ neigh = self._calc_neighbors(num_neighbors=num_neighbors) distances = neigh["distances"].flatten() if normalize: plt.hist( distances, bins=bins, weights=1 / (4 * np.pi * distances**2), ) plt.ylabel("Neighbor density [$\mathrm{\AA}^{-2}$]") else: plt.hist(distances, bins=bins) plt.ylabel("Neighbor count") plt.xlabel(r"Distance [$\mathrm{\AA}$]")
[docs] def shell_distances(self, num_shells=4, num_neighbors=None): """ Plot a violin plot of the neighbor distances in shells up to `num_shells`. Args: num_shells (int): maximum shell to plot num_neighbors (int): maximum number of neighbors to calculate, when 'shells' or 'distances' are not defined in storage default is the value from the previous call or 36 """ neigh = self._calc_neighbors(num_neighbors=num_neighbors) shells = neigh["shells"] distances = neigh["distances"] R = distances.flatten() S = shells.ravel() d = pd.DataFrame( {"distance": R[S < num_shells + 1], "shells": S[S < num_shells + 1]} ) sns.violinplot(y=d.shells, x=d.distance, scale="width", orient="h") plt.xlabel(r"Distance [$\AA$]") plt.ylabel("Shell")
[docs] def concentration(self, elements: List[str] = None, **kwargs) -> pd.DataFrame: """ Plot histograms of the concentrations in each structure. Args: elements (list of str): elements to plot the histograms for; default is for all elements in the container **kwargs: passed through to `seaborn.histplot` Returns: `pandas.DataFrame`: table of concentrations in each structure; column headers are the element names """ if elements is not None: for elem in elements: if elem not in self._store.get_elements(): raise ValueError(f"Element {elem} not present in storage!") else: elements = self._store.get_elements() df = pd.DataFrame( [ {elem: sum(elem == sym) / len(sym) for elem in elements} for sym in self._store.get_array_ragged("symbols") ] ) sns.histplot( data=df.melt(var_name="element", value_name="concentration"), x="concentration", hue="element", multiple="dodge", **kwargs, ) return df